@open-insight/eval 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/exec-w0BrRdRA.mjs +1103 -0
- package/dist/index-CjLj-Fvc.d.mts +863 -0
- package/dist/index.d.mts +35 -0
- package/dist/index.mjs +105 -0
- package/dist/internal.d.mts +6 -0
- package/dist/internal.mjs +6 -0
- package/dist/rolldown-runtime-D7D4PA-g.mjs +13 -0
- package/package.json +52 -0
package/dist/index.d.mts
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { $ as fromStream, A as all, B as InitError$1, C as all$2, D as each$1, E as all$1, F as build$2, G as BenchmarkError, H as build, I as init$2, J as Loader, K as InitError, L as withAgentProvider, M as reduce, O as reduce$1, P as Harness, Q as fromIterable, R as withSandboxProvider, S as withTrajReduce, T as reduce$2, U as init, W as withTasks, X as fromArray, Y as Tasks, Z as fromAsyncIterable, _ as withTask, a as build$3, at as withContext, b as withTraj, c as withHarness, ct as withResources, d as index_d_exports$2, dt as Grader, et as withGitRepo, f as Metrics, ft as Map, g as withBenchmark, gt as bash, h as withBenchReduce, ht as index_d_exports, i as Executor, it as init$1, j as each, k as Metric, l as withMetrics, lt as withSnapshot, m as withBenchEach, mt as ResultSchema, n as run, nt as Task, o as init$4, ot as withGrader, p as init$3, pt as Result, q as index_d_exports$1, r as runPromise, rt as build$1, s as withBenchmark$1, st as withPrompt, tt as fromDir, u as withTrailCount, ut as Exec, v as withTaskEach, w as each$2, x as withTrajEach, y as withTaskReduce, z as HarnessError } from "./index-CjLj-Fvc.mjs";
|
|
2
|
+
import { Effect } from "effect";
|
|
3
|
+
import { Prompt } from "effect/unstable/ai";
|
|
4
|
+
export * from "@open-insight/core";
|
|
5
|
+
|
|
6
|
+
//#region src/benchmark/export.d.ts
|
|
7
|
+
declare namespace export_d_exports {
|
|
8
|
+
export { BenchmarkError, InitError, build, init, withTasks };
|
|
9
|
+
}
|
|
10
|
+
declare namespace export_d_exports$9 {
|
|
11
|
+
export { Exec, Grader, index_d_exports as Internal, Map, Result, ResultSchema, bash };
|
|
12
|
+
}
|
|
13
|
+
declare namespace export_d_exports$5 {
|
|
14
|
+
export { export_d_exports$9 as Grade, Grader, index_d_exports$1 as Internal, Loader, Task, Tasks, build$1 as build, fromArray, fromAsyncIterable, fromDir, fromIterable, fromStream, init$1 as init, withContext, withGitRepo, withGrader, withPrompt, withResources, withSnapshot };
|
|
15
|
+
}
|
|
16
|
+
declare namespace export_d_exports$2 {
|
|
17
|
+
export { Harness, HarnessError, InitError$1 as InitError, build$2 as build, init$2 as init, withAgentProvider, withSandboxProvider };
|
|
18
|
+
}
|
|
19
|
+
declare namespace export_d_exports$8 {
|
|
20
|
+
export { Metric, all, each, reduce };
|
|
21
|
+
}
|
|
22
|
+
declare namespace export_d_exports$7 {
|
|
23
|
+
export { all$1 as all, each$1 as each, reduce$1 as reduce };
|
|
24
|
+
}
|
|
25
|
+
declare namespace export_d_exports$6 {
|
|
26
|
+
export { all$2 as all, each$2 as each, reduce$2 as reduce };
|
|
27
|
+
}
|
|
28
|
+
declare namespace export_d_exports$4 {
|
|
29
|
+
export { export_d_exports$6 as Bench, index_d_exports$2 as Internal, Metrics, export_d_exports$7 as Task, export_d_exports$8 as Traj, init$3 as init, withBenchEach, withBenchReduce, withBenchmark, withTask, withTaskEach, withTaskReduce, withTraj, withTrajEach, withTrajReduce };
|
|
30
|
+
}
|
|
31
|
+
declare namespace export_d_exports$1 {
|
|
32
|
+
export { Executor, build$3 as build, init$4 as init, run, runPromise, withBenchmark$1 as withBenchmark, withHarness, withMetrics, withTrailCount };
|
|
33
|
+
}
|
|
34
|
+
//#endregion
|
|
35
|
+
export { export_d_exports as Benchmark, Effect, export_d_exports$1 as Exec, export_d_exports$2 as Harness, export_d_exports$3 as Matrix, export_d_exports$4 as Metric, Prompt, export_d_exports$5 as Task };
|
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import { t as __exportAll } from "./rolldown-runtime-D7D4PA-g.mjs";
|
|
2
|
+
import { $ as withSnapshot, B as fromIterable, C as reduce, D as all$2, E as reduce$1, F as HarnessError, G as grade_exports, H as withGitRepo, I as InitError$1, J as init$1, K as bash, L as task_exports, M as init$2, N as withAgentProvider, O as each$2, P as withSandboxProvider, Q as withResources, R as fromArray, S as each, T as each$1, U as fromDir, V as fromStream, W as ResultSchema, X as withGrader, Y as withContext, Z as withPrompt, _ as withTaskReduce, a as init$4, at as InitError, b as withTrajReduce, c as withMetrics, d as init$3, f as withBenchEach, g as withTaskEach, h as withTask, i as build$3, it as BenchmarkError, j as build$2, k as reduce$2, l as withTrailCount, m as withBenchmark, n as run, nt as init, o as withBenchmark$1, p as withBenchReduce, q as build$1, r as runPromise, rt as withTasks, s as withHarness, tt as build, u as metric_exports, v as withTraj, w as all$1, x as all, y as withTrajEach, z as fromAsyncIterable } from "./exec-w0BrRdRA.mjs";
|
|
3
|
+
import { Effect } from "effect";
|
|
4
|
+
import { Prompt } from "effect/unstable/ai";
|
|
5
|
+
export * from "@open-insight/core";
|
|
6
|
+
//#region src/benchmark/export.ts
|
|
7
|
+
var export_exports = /* @__PURE__ */ __exportAll({
|
|
8
|
+
BenchmarkError: () => BenchmarkError,
|
|
9
|
+
InitError: () => InitError,
|
|
10
|
+
build: () => build,
|
|
11
|
+
init: () => init,
|
|
12
|
+
withTasks: () => withTasks
|
|
13
|
+
});
|
|
14
|
+
//#endregion
|
|
15
|
+
//#region src/task/grade/export.ts
|
|
16
|
+
var export_exports$9 = /* @__PURE__ */ __exportAll({
|
|
17
|
+
Internal: () => grade_exports,
|
|
18
|
+
ResultSchema: () => ResultSchema,
|
|
19
|
+
bash: () => bash
|
|
20
|
+
});
|
|
21
|
+
//#endregion
|
|
22
|
+
//#region src/task/export.ts
|
|
23
|
+
var export_exports$5 = /* @__PURE__ */ __exportAll({
|
|
24
|
+
Grade: () => export_exports$9,
|
|
25
|
+
Internal: () => task_exports,
|
|
26
|
+
build: () => build$1,
|
|
27
|
+
fromArray: () => fromArray,
|
|
28
|
+
fromAsyncIterable: () => fromAsyncIterable,
|
|
29
|
+
fromDir: () => fromDir,
|
|
30
|
+
fromIterable: () => fromIterable,
|
|
31
|
+
fromStream: () => fromStream,
|
|
32
|
+
init: () => init$1,
|
|
33
|
+
withContext: () => withContext,
|
|
34
|
+
withGitRepo: () => withGitRepo,
|
|
35
|
+
withGrader: () => withGrader,
|
|
36
|
+
withPrompt: () => withPrompt,
|
|
37
|
+
withResources: () => withResources,
|
|
38
|
+
withSnapshot: () => withSnapshot
|
|
39
|
+
});
|
|
40
|
+
//#endregion
|
|
41
|
+
//#region src/harness/export.ts
|
|
42
|
+
var export_exports$2 = /* @__PURE__ */ __exportAll({
|
|
43
|
+
HarnessError: () => HarnessError,
|
|
44
|
+
InitError: () => InitError$1,
|
|
45
|
+
build: () => build$2,
|
|
46
|
+
init: () => init$2,
|
|
47
|
+
withAgentProvider: () => withAgentProvider,
|
|
48
|
+
withSandboxProvider: () => withSandboxProvider
|
|
49
|
+
});
|
|
50
|
+
//#endregion
|
|
51
|
+
//#region src/metric/traj/export.ts
|
|
52
|
+
var export_exports$8 = /* @__PURE__ */ __exportAll({
|
|
53
|
+
all: () => all,
|
|
54
|
+
each: () => each,
|
|
55
|
+
reduce: () => reduce
|
|
56
|
+
});
|
|
57
|
+
//#endregion
|
|
58
|
+
//#region src/metric/task/export.ts
|
|
59
|
+
var export_exports$7 = /* @__PURE__ */ __exportAll({
|
|
60
|
+
all: () => all$1,
|
|
61
|
+
each: () => each$1,
|
|
62
|
+
reduce: () => reduce$1
|
|
63
|
+
});
|
|
64
|
+
//#endregion
|
|
65
|
+
//#region src/metric/bench/export.ts
|
|
66
|
+
var export_exports$6 = /* @__PURE__ */ __exportAll({
|
|
67
|
+
all: () => all$2,
|
|
68
|
+
each: () => each$2,
|
|
69
|
+
reduce: () => reduce$2
|
|
70
|
+
});
|
|
71
|
+
//#endregion
|
|
72
|
+
//#region src/metric/export.ts
|
|
73
|
+
var export_exports$4 = /* @__PURE__ */ __exportAll({
|
|
74
|
+
Bench: () => export_exports$6,
|
|
75
|
+
Internal: () => metric_exports,
|
|
76
|
+
Task: () => export_exports$7,
|
|
77
|
+
Traj: () => export_exports$8,
|
|
78
|
+
init: () => init$3,
|
|
79
|
+
withBenchEach: () => withBenchEach,
|
|
80
|
+
withBenchReduce: () => withBenchReduce,
|
|
81
|
+
withBenchmark: () => withBenchmark,
|
|
82
|
+
withTask: () => withTask,
|
|
83
|
+
withTaskEach: () => withTaskEach,
|
|
84
|
+
withTaskReduce: () => withTaskReduce,
|
|
85
|
+
withTraj: () => withTraj,
|
|
86
|
+
withTrajEach: () => withTrajEach,
|
|
87
|
+
withTrajReduce: () => withTrajReduce
|
|
88
|
+
});
|
|
89
|
+
//#endregion
|
|
90
|
+
//#region src/exec/export.ts
|
|
91
|
+
var export_exports$1 = /* @__PURE__ */ __exportAll({
|
|
92
|
+
build: () => build$3,
|
|
93
|
+
init: () => init$4,
|
|
94
|
+
run: () => run,
|
|
95
|
+
runPromise: () => runPromise,
|
|
96
|
+
withBenchmark: () => withBenchmark$1,
|
|
97
|
+
withHarness: () => withHarness,
|
|
98
|
+
withMetrics: () => withMetrics,
|
|
99
|
+
withTrailCount: () => withTrailCount
|
|
100
|
+
});
|
|
101
|
+
//#endregion
|
|
102
|
+
//#region src/matrix/export.ts
|
|
103
|
+
var export_exports$3 = /* @__PURE__ */ __exportAll({});
|
|
104
|
+
//#endregion
|
|
105
|
+
export { export_exports as Benchmark, Effect, export_exports$1 as Exec, export_exports$2 as Harness, export_exports$3 as Matrix, export_exports$4 as Metric, Prompt, export_exports$5 as Task };
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { N as index_d_exports$2, V as index_d_exports, d as index_d_exports$4, q as index_d_exports$5, t as index_d_exports$1 } from "./index-CjLj-Fvc.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/matrix/index.d.ts
|
|
4
|
+
|
|
5
|
+
//#endregion
|
|
6
|
+
export { index_d_exports as Benchmark, index_d_exports$1 as Exec, index_d_exports$2 as Harness, index_d_exports$3 as Matrix, index_d_exports$4 as Metric, index_d_exports$5 as Task };
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { t as __exportAll } from "./rolldown-runtime-D7D4PA-g.mjs";
|
|
2
|
+
import { A as harness_exports, L as task_exports, et as benchmark_exports, t as exec_exports, u as metric_exports } from "./exec-w0BrRdRA.mjs";
|
|
3
|
+
//#region src/matrix/index.ts
|
|
4
|
+
var matrix_exports = /* @__PURE__ */ __exportAll({});
|
|
5
|
+
//#endregion
|
|
6
|
+
export { benchmark_exports as Benchmark, exec_exports as Exec, harness_exports as Harness, matrix_exports as Matrix, metric_exports as Metric, task_exports as Task };
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
//#region \0rolldown/runtime.js
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __exportAll = (all, no_symbols) => {
|
|
4
|
+
let target = {};
|
|
5
|
+
for (var name in all) __defProp(target, name, {
|
|
6
|
+
get: all[name],
|
|
7
|
+
enumerable: true
|
|
8
|
+
});
|
|
9
|
+
if (!no_symbols) __defProp(target, Symbol.toStringTag, { value: "Module" });
|
|
10
|
+
return target;
|
|
11
|
+
};
|
|
12
|
+
//#endregion
|
|
13
|
+
export { __exportAll as t };
|
package/package.json
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@open-insight/eval",
|
|
3
|
+
"version": "0.0.0",
|
|
4
|
+
"description": "Evaluator framework for Open Insight — benchmarks, task grading, harnesses.",
|
|
5
|
+
"homepage": "https://github.com/open-insight/open-insight#readme",
|
|
6
|
+
"bugs": {
|
|
7
|
+
"url": "https://github.com/open-insight/open-insight/issues"
|
|
8
|
+
},
|
|
9
|
+
"license": "MIT",
|
|
10
|
+
"repository": {
|
|
11
|
+
"type": "git",
|
|
12
|
+
"url": "git+https://github.com/open-insight/open-insight.git",
|
|
13
|
+
"directory": "packages/eval"
|
|
14
|
+
},
|
|
15
|
+
"files": [
|
|
16
|
+
"dist"
|
|
17
|
+
],
|
|
18
|
+
"type": "module",
|
|
19
|
+
"exports": {
|
|
20
|
+
".": "./dist/index.mjs",
|
|
21
|
+
"./internal": "./dist/internal.mjs",
|
|
22
|
+
"./package.json": "./package.json"
|
|
23
|
+
},
|
|
24
|
+
"publishConfig": {
|
|
25
|
+
"access": "public"
|
|
26
|
+
},
|
|
27
|
+
"dependencies": {
|
|
28
|
+
"@effect/platform-node": "4.0.0-beta.91",
|
|
29
|
+
"@effect/vitest": "4.0.0-beta.91",
|
|
30
|
+
"ai": "^7.0.0",
|
|
31
|
+
"effect": "4.0.0-beta.90",
|
|
32
|
+
"immer": "^10.1.1",
|
|
33
|
+
"picomatch": "^4.0.4",
|
|
34
|
+
"@open-insight/core": "0.0.0"
|
|
35
|
+
},
|
|
36
|
+
"devDependencies": {
|
|
37
|
+
"@types/node": "^25.6.2",
|
|
38
|
+
"@types/picomatch": "^4.0.3",
|
|
39
|
+
"@typescript/native-preview": "7.0.0-dev.20260629.1",
|
|
40
|
+
"bumpp": "^11.1.0",
|
|
41
|
+
"typescript": "^5",
|
|
42
|
+
"vite-plus": "latest",
|
|
43
|
+
"vitest": "4.1.9",
|
|
44
|
+
"@open-insight/utils": "0.0.0"
|
|
45
|
+
},
|
|
46
|
+
"scripts": {
|
|
47
|
+
"build": "vp pack",
|
|
48
|
+
"dev": "vp pack --watch",
|
|
49
|
+
"test": "vp test",
|
|
50
|
+
"check": "vp check"
|
|
51
|
+
}
|
|
52
|
+
}
|