@jean.gnc/harness-kit 0.12.7 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +43 -0
- package/dist/cli.js +13 -1
- package/dist/cli.js.map +1 -1
- package/dist/eval/capture.d.ts +23 -0
- package/dist/eval/capture.d.ts.map +1 -0
- package/dist/eval/capture.js +79 -0
- package/dist/eval/capture.js.map +1 -0
- package/dist/eval/cases.d.ts +10 -2
- package/dist/eval/cases.d.ts.map +1 -1
- package/dist/eval/cases.js +9 -3
- package/dist/eval/cases.js.map +1 -1
- package/dist/eval/detect.d.ts +1 -0
- package/dist/eval/detect.d.ts.map +1 -1
- package/dist/eval/detect.js +1 -1
- package/dist/eval/detect.js.map +1 -1
- package/dist/eval/grade-deterministic.d.ts +9 -0
- package/dist/eval/grade-deterministic.d.ts.map +1 -0
- package/dist/eval/grade-deterministic.js +87 -0
- package/dist/eval/grade-deterministic.js.map +1 -0
- package/dist/eval/grade-judge.d.ts +12 -0
- package/dist/eval/grade-judge.d.ts.map +1 -0
- package/dist/eval/grade-judge.js +14 -0
- package/dist/eval/grade-judge.js.map +1 -0
- package/dist/eval/grade.d.ts +5 -0
- package/dist/eval/grade.d.ts.map +1 -0
- package/dist/eval/grade.js +25 -0
- package/dist/eval/grade.js.map +1 -0
- package/dist/eval/index.d.ts +4 -0
- package/dist/eval/index.d.ts.map +1 -1
- package/dist/eval/index.js +27 -5
- package/dist/eval/index.js.map +1 -1
- package/dist/eval/judge.d.ts +26 -0
- package/dist/eval/judge.d.ts.map +1 -0
- package/dist/eval/judge.js +55 -0
- package/dist/eval/judge.js.map +1 -0
- package/dist/eval/report.d.ts +5 -1
- package/dist/eval/report.d.ts.map +1 -1
- package/dist/eval/report.js +66 -13
- package/dist/eval/report.js.map +1 -1
- package/dist/eval/runner.d.ts +13 -5
- package/dist/eval/runner.d.ts.map +1 -1
- package/dist/eval/runner.js +105 -31
- package/dist/eval/runner.js.map +1 -1
- package/dist/eval/schema.d.ts +644 -29
- package/dist/eval/schema.d.ts.map +1 -1
- package/dist/eval/schema.js +57 -6
- package/dist/eval/schema.js.map +1 -1
- package/dist/eval/score.d.ts +8 -0
- package/dist/eval/score.d.ts.map +1 -1
- package/dist/eval/score.js +17 -0
- package/dist/eval/score.js.map +1 -1
- package/dist/skill/includes.d.ts +4 -0
- package/dist/skill/includes.d.ts.map +1 -1
- package/dist/skill/includes.js +38 -32
- package/dist/skill/includes.js.map +1 -1
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -211,10 +211,53 @@ harness lint # lint compiled markdown under dist/
|
|
|
211
211
|
harness check # validate plugin references against local + installed sources
|
|
212
212
|
harness install # link configs + register plugins per declared vendor (--mode=local|remote)
|
|
213
213
|
harness uninstall # remove installed plugins per declared vendor
|
|
214
|
+
harness eval # run routing + solving evals against the installed harness
|
|
214
215
|
```
|
|
215
216
|
|
|
216
217
|
→ Full flag reference, bundled lint rules, and `package.json` integration: [docs/cli.md](./docs/cli.md).
|
|
217
218
|
|
|
219
|
+
## Evals
|
|
220
|
+
|
|
221
|
+
`harness eval` runs YAML cases in two tiers. A case file declares its `tier`, and the two
|
|
222
|
+
tiers carry mutually exclusive keys.
|
|
223
|
+
|
|
224
|
+
**Routing** — *did the right skill fire?* The session is killed on the first `Skill` tool_use;
|
|
225
|
+
the detector scores against an `expect` clause (`first` / `anyOf` / `path` / `noSkill`).
|
|
226
|
+
|
|
227
|
+
**Solving** — *did the agent produce the right thing?* The session runs to completion; the
|
|
228
|
+
final output, tool trajectory, and written files are graded by **deterministic assertions**
|
|
229
|
+
(all must pass) plus an optional **LLM-judge rubric** (one isolated call per dimension).
|
|
230
|
+
|
|
231
|
+
```yaml
|
|
232
|
+
suite: docs
|
|
233
|
+
tier: solving
|
|
234
|
+
cases:
|
|
235
|
+
- id: writes-a-readme
|
|
236
|
+
prompt: "Create a README.md describing this project."
|
|
237
|
+
expectSkill: dev-tools:typescript # optional; validated against installed skills
|
|
238
|
+
assert:
|
|
239
|
+
- { kind: wroteFile, path: README.md, contentMatches: "## " }
|
|
240
|
+
- { kind: usedTool, tool: Write }
|
|
241
|
+
- { kind: didNotUseTool, tool: Bash }
|
|
242
|
+
- { kind: outputMatches, pattern: "README", regex: false }
|
|
243
|
+
rubric:
|
|
244
|
+
combine: { combine: fraction, threshold: 0.5 } # or { combine: all }
|
|
245
|
+
dimensions:
|
|
246
|
+
- { dimension: clarity, criterion: "The README explains what the project does." }
|
|
247
|
+
- { dimension: structure, criterion: "The README has clear sections." }
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
A solving run passes when every assertion passes **and** the rubric meets its combine rule;
|
|
251
|
+
across `runs`, the case passes when the pass rate meets `threshold` (default `runs` is 1).
|
|
252
|
+
|
|
253
|
+
Routing needs no API key. Solving cases that declare a `rubric` use `claude` for the judge —
|
|
254
|
+
set `ANTHROPIC_API_KEY` (the run errors clearly if absent). The judge model is separate from
|
|
255
|
+
the session model:
|
|
256
|
+
|
|
257
|
+
```sh
|
|
258
|
+
harness eval --tier solving --model claude-opus-4-8 --judge-model claude-sonnet-4-5
|
|
259
|
+
```
|
|
260
|
+
|
|
218
261
|
## Programmatic API
|
|
219
262
|
|
|
220
263
|
Everything the CLI does is also a typed module API. See [docs/api.md](./docs/api.md).
|
package/dist/cli.js
CHANGED
|
@@ -197,7 +197,7 @@ function parsePositiveInt(value, flag) {
|
|
|
197
197
|
const evalCmd = defineCommand({
|
|
198
198
|
meta: {
|
|
199
199
|
name: "eval",
|
|
200
|
-
description: "Run
|
|
200
|
+
description: "Run routing (did the right skill fire?) and solving (graded behavior) evals",
|
|
201
201
|
},
|
|
202
202
|
args: {
|
|
203
203
|
cases: {
|
|
@@ -219,6 +219,14 @@ const evalCmd = defineCommand({
|
|
|
219
219
|
type: "string",
|
|
220
220
|
description: "model for claude -p (default: user's configured model)",
|
|
221
221
|
},
|
|
222
|
+
"judge-model": {
|
|
223
|
+
type: "string",
|
|
224
|
+
description: "model for the solving-tier LLM judge (default: claude-sonnet-4-5)",
|
|
225
|
+
},
|
|
226
|
+
"solving-timeout": {
|
|
227
|
+
type: "string",
|
|
228
|
+
description: "per-case timeout in seconds for solving sessions (default: 300)",
|
|
229
|
+
},
|
|
222
230
|
json: { type: "string", description: "write machine-readable results to this path" },
|
|
223
231
|
},
|
|
224
232
|
run: async ({ args }) => {
|
|
@@ -231,6 +239,10 @@ const evalCmd = defineCommand({
|
|
|
231
239
|
...(args.tier !== undefined && { tier: parseTier(args.tier) }),
|
|
232
240
|
...(args.runs !== undefined && { runs: parsePositiveInt(args.runs, "runs") }),
|
|
233
241
|
...(args.model !== undefined && { model: args.model }),
|
|
242
|
+
...(args["judge-model"] !== undefined && { judgeModel: args["judge-model"] }),
|
|
243
|
+
...(args["solving-timeout"] !== undefined && {
|
|
244
|
+
solvingTimeoutMs: parsePositiveInt(args["solving-timeout"], "solving-timeout") * 1000,
|
|
245
|
+
}),
|
|
234
246
|
});
|
|
235
247
|
if (!result.ok) {
|
|
236
248
|
for (const e of result.error)
|
package/dist/cli.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAEzC,OAAO,EAAE,aAAa,EAAE,OAAO,EAAE,MAAM,OAAO,CAAC;AAC/C,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AACpD,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAEpC,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EACL,WAAW,EACX,KAAK,GAIN,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,aAAa,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AACjE,OAAO,EAAE,KAAK,EAAa,MAAM,kBAAkB,CAAC;AACpD,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAC9C,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AACxD,OAAO,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,cAAc,EAAE,qBAAqB,EAAE,MAAM,sBAAsB,CAAC;AAE7E,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;AAEnE,MAAM,OAAO,GAAG,aAAa,CAAC,IAAI,GAAG,CAAC,iBAAiB,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAC3E,MAAM,GAAG,GAAG,iBAAiB,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC;AAE/E,SAAS,WAAW,CAAC,KAAa;IAChC,OAAQ,WAAiC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;AAC5D,CAAC;AAED,SAAS,cAAc,CAAC,KAAa;IACnC,IAAI,WAAW,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IACrC,MAAM,IAAI,KAAK,CAAC,uBAAuB,KAAK,aAAa,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;AACrF,CAAC;AAED,MAAM,UAAU,GAAG,aAAa,CAAC;IAC/B,IAAI,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,kCAAkC,EAAE;IAC1E,IAAI,EAAE;QACJ,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE;QACrE,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,aAAa,EAAE;QACtE,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,sCAAsC,EAAE;QAC3F,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,sBAAsB,EAAE;KACjF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,OAAO,CAAC;YACZ,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,MAAM,EAAE,IAAI,CAAC,MAAM;SACpB,CAAC,CAAC;IACL,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,WAAW,GAAG;IAClB,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,WAAW,EAAE;IACrE,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,sCAAsC,EAAE;IAC3F,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,sBAAsB,EAAE;IAChF,SAAS,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,6BAA6B,EAAE;CAClF,CAAC;AAEX,MAAM,UAAU,GAAG,aAAa,CAAC;IAC/B,IAAI,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,qDAAqD,EAAE;IAC7F,IAAI,EAAE;QACJ,GAAG,WAAW;QACd,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,gCAAgC,EAAE;KAC1F;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,IAAI,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACzC,MAAM,OAAO,GAAG,MAAM,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvD,MAAM,OAAO,CAAC;YACZ,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,OAAO;YACP,IAAI;YACJ,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC;SACxB,CAAC,CAAC;IACL,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,YAAY,GAAG,aAAa,CAAC;IACjC,IAAI,EAAE,EAAE,IAAI,EAAE,WAAW,EAAE,WAAW,EAAE,8CAA8C,EAAE;IACxF,IAAI,EAAE,WAAW;IACjB,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,OAAO,GAAG,MAAM,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvD,MAAM,SAAS,CAAC;YACd,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,OAAO;YACP,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC;SACxB,CAAC,CAAC;IACL,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,OAAO,GAAG,aAAa,CAAC;IAC5B,IAAI,EAAE;QACJ,IAAI,EAAE,MAAM;QACZ,WAAW,EAAE,2EAA2E;KACzF;IACD,IAAI,EAAE;QACJ,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,4BAA4B,EAAE;QACjF,WAAW,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAE,WAAW,EAAE,kBAAkB,EAAE;QAChF,OAAO,EAAE;YACP,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;YACd,WAAW,EAAE,mCAAmC,cAAc,EAAE;iBAC7D,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;iBAClB,IAAI,CAAC,IAAI,CAAC,GAAG;SACjB;QACD,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,sBAAsB,EAAE;KACjF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO;aACzB,KAAK,CAAC,GAAG,CAAC;aACV,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;aACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC/B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;QACrF,cAAc,CAAC,OAAO,EAAE,cAAc,EAAE,CAAC,CAAC;QAC1C,MAAM,WAAW,CAAC;YAChB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,OAAO;YACP,MAAM,EAAE,IAAI,CAAC,MAAM;SACpB,CAAC,CAAC;IACL,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,QAAQ,GAAG,aAAa,CAAC;IAC7B,IAAI,EAAE;QACJ,IAAI,EAAE,OAAO;QACb,WAAW,EAAE,8DAA8D;KAC5E;IACD,IAAI,EAAE;QACJ,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE;QACrE,IAAI,EAAE;YACJ,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,WAAW;YACpB,WAAW,EAAE,2CAA2C;SACzD;QACD,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,2BAA2B,EAAE;KACtF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvC,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC,EAAE,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;QACxD,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;YACjB,MAAM,SAAS,GAAG,MAAM,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC7F,MAAM,KAAK,GAAG,MAAM,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;YAC9E,OAAO,CAAC,GAAG,CACT,WAAW,KAAK,kBAAkB,MAAM,CAAC,cAAc,CAAC,MAAM,aAAa,SAAS,GAAG,CACxF,CAAC;YACF,OAAO,CAAC,GAAG,CAAC,WAAW,MAAM,CAAC,YAAY,eAAe,CAAC,CAAC;YAC3D,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC;gBAAE,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAClD,CAAC;QACD,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YAChC,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC;QAChC,CAAC;QACD,IAAI,CAAC,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC/C,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAChB,OAAO,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,WAAW,CAAC,CAAC;QACpD,CAAC;QACD,IAAI,CAAC,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAClE,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;YAClC,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,CAAC;QAClC,CAAC;QACD,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;gBACjB,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;gBAChB,OAAO,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,aAAa,CAAC,CAAC;YACxD,CAAC;YACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;CACF,CAAC,CAAC;AAEH,SAAS,eAAe,CAAC,CAAqB;IAC5C,OAAO,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,MAAM,OAAO,CAAC,CAAC,KAAK,QAAQ,CAAC,CAAC,OAAO,EAAE,CAAC;AAC1E,CAAC;AAED,SAAS,aAAa,CAAC,CAAgB;IACrC,OAAO,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,MAAM,cAAc,CAAC,CAAC,OAAO,EAAE,CAAC;AAClE,CAAC;AAED,MAAM,OAAO,GAAG,aAAa,CAAC;IAC5B,IAAI,EAAE;QACJ,IAAI,EAAE,MAAM;QACZ,WAAW,EAAE,qEAAqE;KACnF;IACD,IAAI,EAAE;QACJ,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,aAAa,EAAE;QACtE,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,2BAA2B,EAAE;KACtF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,EAAE,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;QACtE,IAAI,MAAM,CAAC,UAAU,GAAG,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC7C,CAAC;CACF,CAAC,CAAC;AAEH,SAAS,SAAS,CAAC,KAAa;IAC9B,IAAK,KAA2B,CAAC,QAAQ,CAAC,KAAK,CAAC;QAAE,OAAO,KAAa,CAAC;IACvE,MAAM,IAAI,KAAK,CAAC,iBAAiB,KAAK,aAAa,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;AACzE,CAAC;AAED,SAAS,gBAAgB,CAAC,KAAa,EAAE,IAAY;IACnD,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;IAC7B,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,KAAK,IAAI,qCAAqC,KAAK,GAAG,CAAC,CAAC;IAC1E,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,OAAO,GAAG,aAAa,CAAC;IAC5B,IAAI,EAAE;QACJ,IAAI,EAAE,MAAM;QACZ,WAAW,EAAE,uDAAuD;KACrE;IACD,IAAI,EAAE;QACJ,KAAK,EAAE;YACL,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,eAAe;YACxB,WAAW,EAAE,8BAA8B;SAC5C;QACD,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,0CAA0C,EAAE;QAC9F,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,gCAAgC,EAAE;QACxE,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,gCAAgC,EAAE;QACvE,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,uBAAuB,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,EAAE;QACjF,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,4CAA4C,EAAE;QACnF,WAAW,EAAE;YACX,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,GAAG;YACZ,WAAW,EAAE,+EAA+E;SAC7F;QACD,KAAK,EAAE;YACL,IAAI,EAAE,QAAQ;YACd,WAAW,EAAE,wDAAwD;SACtE;QACD,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,6CAA6C,EAAE;KACrF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC;YAC3B,QAAQ,EAAE,IAAI,CAAC,KAAK;YACpB,GAAG,EAAE,IAAI,CAAC,GAAG;YACb,WAAW,EAAE,gBAAgB,CAAC,IAAI,CAAC,WAAW,EAAE,aAAa,CAAC;YAC9D,GAAG,CAAC,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC;YACtD,GAAG,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC;YACrD,GAAG,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,EAAE,IAAI,EAAE,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC9D,GAAG,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,EAAE,IAAI,EAAE,gBAAgB,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,EAAE,CAAC;YAC7E,GAAG,CAAC,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC;SACvD,CAAC,CAAC;QAEH,IAAI,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC;YACf,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,KAAK;gBAAE,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;YACvE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;QACzC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YACd,MAAM,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YACrD,MAAM,SAAS,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,CAAC;QAC1D,CAAC;QACD,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC/C,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,IAAI,GAAG,aAAa,CAAC;IACzB,IAAI,EAAE;QACJ,IAAI,EAAE,SAAS;QACf,OAAO,EAAE,GAAG,CAAC,OAAO;QACpB,WAAW,EACT,yFAAyF;KAC5F;IACD,WAAW,EAAE;QACX,KAAK,EAAE,QAAQ;QACf,OAAO,EAAE,UAAU;QACnB,IAAI,EAAE,OAAO;QACb,IAAI,EAAE,OAAO;QACb,OAAO,EAAE,UAAU;QACnB,IAAI,EAAE,OAAO;QACb,SAAS,EAAE,YAAY;KACxB;CACF,CAAC,CAAC;AAEH,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC"}
|
|
1
|
+
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAEzC,OAAO,EAAE,aAAa,EAAE,OAAO,EAAE,MAAM,OAAO,CAAC;AAC/C,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AACpD,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAEpC,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EACL,WAAW,EACX,KAAK,GAIN,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,aAAa,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AACjE,OAAO,EAAE,KAAK,EAAa,MAAM,kBAAkB,CAAC;AACpD,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAC9C,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AACxD,OAAO,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,cAAc,EAAE,qBAAqB,EAAE,MAAM,sBAAsB,CAAC;AAE7E,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;AAEnE,MAAM,OAAO,GAAG,aAAa,CAAC,IAAI,GAAG,CAAC,iBAAiB,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAC3E,MAAM,GAAG,GAAG,iBAAiB,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC;AAE/E,SAAS,WAAW,CAAC,KAAa;IAChC,OAAQ,WAAiC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;AAC5D,CAAC;AAED,SAAS,cAAc,CAAC,KAAa;IACnC,IAAI,WAAW,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IACrC,MAAM,IAAI,KAAK,CAAC,uBAAuB,KAAK,aAAa,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;AACrF,CAAC;AAED,MAAM,UAAU,GAAG,aAAa,CAAC;IAC/B,IAAI,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,kCAAkC,EAAE;IAC1E,IAAI,EAAE;QACJ,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE;QACrE,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,aAAa,EAAE;QACtE,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,sCAAsC,EAAE;QAC3F,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,sBAAsB,EAAE;KACjF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,OAAO,CAAC;YACZ,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,MAAM,EAAE,IAAI,CAAC,MAAM;SACpB,CAAC,CAAC;IACL,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,WAAW,GAAG;IAClB,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,WAAW,EAAE;IACrE,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,sCAAsC,EAAE;IAC3F,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,sBAAsB,EAAE;IAChF,SAAS,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,6BAA6B,EAAE;CAClF,CAAC;AAEX,MAAM,UAAU,GAAG,aAAa,CAAC;IAC/B,IAAI,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,qDAAqD,EAAE;IAC7F,IAAI,EAAE;QACJ,GAAG,WAAW;QACd,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,gCAAgC,EAAE;KAC1F;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,IAAI,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACzC,MAAM,OAAO,GAAG,MAAM,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvD,MAAM,OAAO,CAAC;YACZ,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,OAAO;YACP,IAAI;YACJ,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC;SACxB,CAAC,CAAC;IACL,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,YAAY,GAAG,aAAa,CAAC;IACjC,IAAI,EAAE,EAAE,IAAI,EAAE,WAAW,EAAE,WAAW,EAAE,8CAA8C,EAAE;IACxF,IAAI,EAAE,WAAW;IACjB,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,OAAO,GAAG,MAAM,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvD,MAAM,SAAS,CAAC;YACd,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,OAAO;YACP,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC;SACxB,CAAC,CAAC;IACL,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,OAAO,GAAG,aAAa,CAAC;IAC5B,IAAI,EAAE;QACJ,IAAI,EAAE,MAAM;QACZ,WAAW,EAAE,2EAA2E;KACzF;IACD,IAAI,EAAE;QACJ,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,4BAA4B,EAAE;QACjF,WAAW,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAE,WAAW,EAAE,kBAAkB,EAAE;QAChF,OAAO,EAAE;YACP,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;YACd,WAAW,EAAE,mCAAmC,cAAc,EAAE;iBAC7D,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;iBAClB,IAAI,CAAC,IAAI,CAAC,GAAG;SACjB;QACD,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,sBAAsB,EAAE;KACjF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO;aACzB,KAAK,CAAC,GAAG,CAAC;aACV,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;aACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC/B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;QACrF,cAAc,CAAC,OAAO,EAAE,cAAc,EAAE,CAAC,CAAC;QAC1C,MAAM,WAAW,CAAC;YAChB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,OAAO;YACP,MAAM,EAAE,IAAI,CAAC,MAAM;SACpB,CAAC,CAAC;IACL,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,QAAQ,GAAG,aAAa,CAAC;IAC7B,IAAI,EAAE;QACJ,IAAI,EAAE,OAAO;QACb,WAAW,EAAE,8DAA8D;KAC5E;IACD,IAAI,EAAE;QACJ,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE;QACrE,IAAI,EAAE;YACJ,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,WAAW;YACpB,WAAW,EAAE,2CAA2C;SACzD;QACD,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,2BAA2B,EAAE;KACtF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvC,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC,EAAE,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;QACxD,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;YACjB,MAAM,SAAS,GAAG,MAAM,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC7F,MAAM,KAAK,GAAG,MAAM,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;YAC9E,OAAO,CAAC,GAAG,CACT,WAAW,KAAK,kBAAkB,MAAM,CAAC,cAAc,CAAC,MAAM,aAAa,SAAS,GAAG,CACxF,CAAC;YACF,OAAO,CAAC,GAAG,CAAC,WAAW,MAAM,CAAC,YAAY,eAAe,CAAC,CAAC;YAC3D,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC;gBAAE,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAClD,CAAC;QACD,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YAChC,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC;QAChC,CAAC;QACD,IAAI,CAAC,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC/C,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAChB,OAAO,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,WAAW,CAAC,CAAC;QACpD,CAAC;QACD,IAAI,CAAC,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAClE,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;YAClC,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,CAAC;QAClC,CAAC;QACD,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;gBACjB,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;gBAChB,OAAO,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,aAAa,CAAC,CAAC;YACxD,CAAC;YACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;CACF,CAAC,CAAC;AAEH,SAAS,eAAe,CAAC,CAAqB;IAC5C,OAAO,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,MAAM,OAAO,CAAC,CAAC,KAAK,QAAQ,CAAC,CAAC,OAAO,EAAE,CAAC;AAC1E,CAAC;AAED,SAAS,aAAa,CAAC,CAAgB;IACrC,OAAO,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,MAAM,cAAc,CAAC,CAAC,OAAO,EAAE,CAAC;AAClE,CAAC;AAED,MAAM,OAAO,GAAG,aAAa,CAAC;IAC5B,IAAI,EAAE;QACJ,IAAI,EAAE,MAAM;QACZ,WAAW,EAAE,qEAAqE;KACnF;IACD,IAAI,EAAE;QACJ,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,aAAa,EAAE;QACtE,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,2BAA2B,EAAE;KACtF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,EAAE,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;QACtE,IAAI,MAAM,CAAC,UAAU,GAAG,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC7C,CAAC;CACF,CAAC,CAAC;AAEH,SAAS,SAAS,CAAC,KAAa;IAC9B,IAAK,KAA2B,CAAC,QAAQ,CAAC,KAAK,CAAC;QAAE,OAAO,KAAa,CAAC;IACvE,MAAM,IAAI,KAAK,CAAC,iBAAiB,KAAK,aAAa,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;AACzE,CAAC;AAED,SAAS,gBAAgB,CAAC,KAAa,EAAE,IAAY;IACnD,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;IAC7B,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,KAAK,IAAI,qCAAqC,KAAK,GAAG,CAAC,CAAC;IAC1E,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,OAAO,GAAG,aAAa,CAAC;IAC5B,IAAI,EAAE;QACJ,IAAI,EAAE,MAAM;QACZ,WAAW,EAAE,6EAA6E;KAC3F;IACD,IAAI,EAAE;QACJ,KAAK,EAAE;YACL,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,eAAe;YACxB,WAAW,EAAE,8BAA8B;SAC5C;QACD,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,0CAA0C,EAAE;QAC9F,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,gCAAgC,EAAE;QACxE,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,gCAAgC,EAAE;QACvE,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,uBAAuB,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,EAAE;QACjF,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,4CAA4C,EAAE;QACnF,WAAW,EAAE;YACX,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,GAAG;YACZ,WAAW,EAAE,+EAA+E;SAC7F;QACD,KAAK,EAAE;YACL,IAAI,EAAE,QAAQ;YACd,WAAW,EAAE,wDAAwD;SACtE;QACD,aAAa,EAAE;YACb,IAAI,EAAE,QAAQ;YACd,WAAW,EAAE,mEAAmE;SACjF;QACD,iBAAiB,EAAE;YACjB,IAAI,EAAE,QAAQ;YACd,WAAW,EAAE,iEAAiE;SAC/E;QACD,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,6CAA6C,EAAE;KACrF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC;YAC3B,QAAQ,EAAE,IAAI,CAAC,KAAK;YACpB,GAAG,EAAE,IAAI,CAAC,GAAG;YACb,WAAW,EAAE,gBAAgB,CAAC,IAAI,CAAC,WAAW,EAAE,aAAa,CAAC;YAC9D,GAAG,CAAC,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC;YACtD,GAAG,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC;YACrD,GAAG,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,EAAE,IAAI,EAAE,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC9D,GAAG,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,EAAE,IAAI,EAAE,gBAAgB,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,EAAE,CAAC;YAC7E,GAAG,CAAC,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC;YACtD,GAAG,CAAC,IAAI,CAAC,aAAa,CAAC,KAAK,SAAS,IAAI,EAAE,UAAU,EAAE,IAAI,CAAC,aAAa,CAAC,EAAE,CAAC;YAC7E,GAAG,CAAC,IAAI,CAAC,iBAAiB,CAAC,KAAK,SAAS,IAAI;gBAC3C,gBAAgB,EAAE,gBAAgB,CAAC,IAAI,CAAC,iBAAiB,CAAC,EAAE,iBAAiB,CAAC,GAAG,IAAI;aACtF,CAAC;SACH,CAAC,CAAC;QAEH,IAAI,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC;YACf,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,KAAK;gBAAE,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;YACvE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;QACzC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YACd,MAAM,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YACrD,MAAM,SAAS,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,CAAC;QAC1D,CAAC;QACD,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC/C,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,IAAI,GAAG,aAAa,CAAC;IACzB,IAAI,EAAE;QACJ,IAAI,EAAE,SAAS;QACf,OAAO,EAAE,GAAG,CAAC,OAAO;QACpB,WAAW,EACT,yFAAyF;KAC5F;IACD,WAAW,EAAE;QACX,KAAK,EAAE,QAAQ;QACf,OAAO,EAAE,UAAU;QACnB,IAAI,EAAE,OAAO;QACb,IAAI,EAAE,OAAO;QACb,OAAO,EAAE,UAAU;QACnB,IAAI,EAAE,OAAO;QACb,SAAS,EAAE,YAAY;KACxB;CACF,CAAC,CAAC;AAEH,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
export type CaptureExitReason = "result" | "timeout" | "stream-end";
|
|
2
|
+
export interface ToolCall {
|
|
3
|
+
readonly name: string;
|
|
4
|
+
readonly input: unknown;
|
|
5
|
+
}
|
|
6
|
+
export interface WrittenFile {
|
|
7
|
+
readonly path: string;
|
|
8
|
+
readonly content: string;
|
|
9
|
+
}
|
|
10
|
+
export interface SolvingCapture {
|
|
11
|
+
readonly outputText: string;
|
|
12
|
+
readonly trajectory: readonly ToolCall[];
|
|
13
|
+
readonly writes: readonly WrittenFile[];
|
|
14
|
+
readonly exitReason: CaptureExitReason;
|
|
15
|
+
}
|
|
16
|
+
interface Captor {
|
|
17
|
+
readonly push: (line: string) => void;
|
|
18
|
+
readonly done: boolean;
|
|
19
|
+
readonly result: (reason: CaptureExitReason) => SolvingCapture;
|
|
20
|
+
}
|
|
21
|
+
export declare function createCaptor(): Captor;
|
|
22
|
+
export {};
|
|
23
|
+
//# sourceMappingURL=capture.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"capture.d.ts","sourceRoot":"","sources":["../../src/eval/capture.ts"],"names":[],"mappings":"AAEA,MAAM,MAAM,iBAAiB,GAAG,QAAQ,GAAG,SAAS,GAAG,YAAY,CAAC;AAEpE,MAAM,WAAW,QAAQ;IACvB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,KAAK,EAAE,OAAO,CAAC;CACzB;AAED,MAAM,WAAW,WAAW;IAC1B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B;AAED,MAAM,WAAW,cAAc;IAC7B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,UAAU,EAAE,SAAS,QAAQ,EAAE,CAAC;IACzC,QAAQ,CAAC,MAAM,EAAE,SAAS,WAAW,EAAE,CAAC;IACxC,QAAQ,CAAC,UAAU,EAAE,iBAAiB,CAAC;CACxC;AAED,UAAU,MAAM;IACd,QAAQ,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,CAAC;IACtC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC;IACvB,QAAQ,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,iBAAiB,KAAK,cAAc,CAAC;CAChE;AAID,wBAAgB,YAAY,IAAI,MAAM,CA8DrC"}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import { asRecord } from "./detect.js";
|
|
2
|
+
const WRITE_TOOLS = new Set(["Write", "Edit"]);
|
|
3
|
+
export function createCaptor() {
|
|
4
|
+
const trajectory = [];
|
|
5
|
+
const writes = [];
|
|
6
|
+
let lastAssistantText = "";
|
|
7
|
+
let resultText = null;
|
|
8
|
+
let finished = false;
|
|
9
|
+
function recordToolUse(block) {
|
|
10
|
+
const name = block["name"];
|
|
11
|
+
if (typeof name !== "string")
|
|
12
|
+
return;
|
|
13
|
+
trajectory.push({ name, input: block["input"] });
|
|
14
|
+
const write = writtenFileOf(name, block["input"]);
|
|
15
|
+
if (write)
|
|
16
|
+
writes.push(write);
|
|
17
|
+
}
|
|
18
|
+
function recordText(block) {
|
|
19
|
+
const text = block["text"];
|
|
20
|
+
if (typeof text === "string")
|
|
21
|
+
lastAssistantText = text;
|
|
22
|
+
}
|
|
23
|
+
return {
|
|
24
|
+
get done() {
|
|
25
|
+
return finished;
|
|
26
|
+
},
|
|
27
|
+
push(line) {
|
|
28
|
+
if (finished)
|
|
29
|
+
return;
|
|
30
|
+
const trimmed = line.trim();
|
|
31
|
+
if (!trimmed)
|
|
32
|
+
return;
|
|
33
|
+
let parsed;
|
|
34
|
+
try {
|
|
35
|
+
parsed = JSON.parse(trimmed);
|
|
36
|
+
}
|
|
37
|
+
catch {
|
|
38
|
+
return;
|
|
39
|
+
}
|
|
40
|
+
const event = asRecord(parsed);
|
|
41
|
+
if (event["type"] === "assistant") {
|
|
42
|
+
const content = asRecord(event["message"])["content"];
|
|
43
|
+
for (const item of Array.isArray(content) ? content : []) {
|
|
44
|
+
const block = asRecord(item);
|
|
45
|
+
if (block["type"] === "tool_use")
|
|
46
|
+
recordToolUse(block);
|
|
47
|
+
else if (block["type"] === "text")
|
|
48
|
+
recordText(block);
|
|
49
|
+
}
|
|
50
|
+
return;
|
|
51
|
+
}
|
|
52
|
+
if (event["type"] === "result") {
|
|
53
|
+
const text = event["result"];
|
|
54
|
+
resultText = typeof text === "string" ? text : "";
|
|
55
|
+
finished = true;
|
|
56
|
+
}
|
|
57
|
+
},
|
|
58
|
+
result(reason) {
|
|
59
|
+
const outputText = resultText && resultText.length > 0 ? resultText : lastAssistantText;
|
|
60
|
+
return {
|
|
61
|
+
outputText,
|
|
62
|
+
trajectory,
|
|
63
|
+
writes,
|
|
64
|
+
exitReason: resultText !== null ? "result" : reason,
|
|
65
|
+
};
|
|
66
|
+
},
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
function writtenFileOf(name, input) {
|
|
70
|
+
if (!WRITE_TOOLS.has(name))
|
|
71
|
+
return null;
|
|
72
|
+
const record = asRecord(input);
|
|
73
|
+
const path = record["file_path"];
|
|
74
|
+
if (typeof path !== "string")
|
|
75
|
+
return null;
|
|
76
|
+
const content = record["content"] ?? record["new_string"];
|
|
77
|
+
return { path, content: typeof content === "string" ? content : "" };
|
|
78
|
+
}
|
|
79
|
+
//# sourceMappingURL=capture.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"capture.js","sourceRoot":"","sources":["../../src/eval/capture.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AA2BvC,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC;AAE/C,MAAM,UAAU,YAAY;IAC1B,MAAM,UAAU,GAAe,EAAE,CAAC;IAClC,MAAM,MAAM,GAAkB,EAAE,CAAC;IACjC,IAAI,iBAAiB,GAAG,EAAE,CAAC;IAC3B,IAAI,UAAU,GAAkB,IAAI,CAAC;IACrC,IAAI,QAAQ,GAAG,KAAK,CAAC;IAErB,SAAS,aAAa,CAAC,KAA8B;QACnD,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;QAC3B,IAAI,OAAO,IAAI,KAAK,QAAQ;YAAE,OAAO;QACrC,UAAU,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QACjD,MAAM,KAAK,GAAG,aAAa,CAAC,IAAI,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;QAClD,IAAI,KAAK;YAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAChC,CAAC;IAED,SAAS,UAAU,CAAC,KAA8B;QAChD,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;QAC3B,IAAI,OAAO,IAAI,KAAK,QAAQ;YAAE,iBAAiB,GAAG,IAAI,CAAC;IACzD,CAAC;IAED,OAAO;QACL,IAAI,IAAI;YACN,OAAO,QAAQ,CAAC;QAClB,CAAC;QACD,IAAI,CAAC,IAAY;YACf,IAAI,QAAQ;gBAAE,OAAO;YACrB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAC5B,IAAI,CAAC,OAAO;gBAAE,OAAO;YACrB,IAAI,MAAe,CAAC;YACpB,IAAI,CAAC;gBACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAC/B,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO;YACT,CAAC;YACD,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC;YAE/B,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,WAAW,EAAE,CAAC;gBAClC,MAAM,OAAO,GAAG,QAAQ,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;gBACtD,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;oBACzD,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;oBAC7B,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,UAAU;wBAAE,aAAa,CAAC,KAAK,CAAC,CAAC;yBAClD,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,MAAM;wBAAE,UAAU,CAAC,KAAK,CAAC,CAAC;gBACvD,CAAC;gBACD,OAAO;YACT,CAAC;YAED,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,QAAQ,EAAE,CAAC;gBAC/B,MAAM,IAAI,GAAG,KAAK,CAAC,QAAQ,CAAC,CAAC;gBAC7B,UAAU,GAAG,OAAO,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;gBAClD,QAAQ,GAAG,IAAI,CAAC;YAClB,CAAC;QACH,CAAC;QACD,MAAM,CAAC,MAAyB;YAC9B,MAAM,UAAU,GAAG,UAAU,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,iBAAiB,CAAC;YACxF,OAAO;gBACL,UAAU;gBACV,UAAU;gBACV,MAAM;gBACN,UAAU,EAAE,UAAU,KAAK,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM;aACpD,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC;AAED,SAAS,aAAa,CAAC,IAAY,EAAE,KAAc;IACjD,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IACxC,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC;IAC/B,MAAM,IAAI,GAAG,MAAM,CAAC,WAAW,CAAC,CAAC;IACjC,IAAI,OAAO,IAAI,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC;IAC1C,MAAM,OAAO,GAAG,MAAM,CAAC,SAAS,CAAC,IAAI,MAAM,CAAC,YAAY,CAAC,CAAC;IAC1D,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;AACvE,CAAC"}
|
package/dist/eval/cases.d.ts
CHANGED
|
@@ -1,14 +1,22 @@
|
|
|
1
1
|
import { type Result } from "../result.js";
|
|
2
|
-
import { type
|
|
3
|
-
|
|
2
|
+
import { type RoutingCase, type SolvingCase, type Tier } from "./schema.js";
|
|
3
|
+
interface CaseLocation {
|
|
4
4
|
readonly suite: string;
|
|
5
5
|
readonly tier: Tier;
|
|
6
6
|
readonly file: string;
|
|
7
7
|
}
|
|
8
|
+
export type LoadedRoutingCase = RoutingCase & CaseLocation & {
|
|
9
|
+
readonly tier: "routing";
|
|
10
|
+
};
|
|
11
|
+
export type LoadedSolvingCase = SolvingCase & CaseLocation & {
|
|
12
|
+
readonly tier: "solving";
|
|
13
|
+
};
|
|
14
|
+
export type LoadedCase = LoadedRoutingCase | LoadedSolvingCase;
|
|
8
15
|
export interface CaseLoadError {
|
|
9
16
|
readonly file: string;
|
|
10
17
|
readonly message: string;
|
|
11
18
|
}
|
|
12
19
|
export declare function loadCases(casesDir: string): Promise<Result<LoadedCase[], CaseLoadError[]>>;
|
|
13
20
|
export declare function unresolvedSkills(cases: readonly LoadedCase[], installedIds: ReadonlySet<string>): CaseLoadError[];
|
|
21
|
+
export {};
|
|
14
22
|
//# sourceMappingURL=cases.d.ts.map
|
package/dist/eval/cases.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cases.d.ts","sourceRoot":"","sources":["../../src/eval/cases.ts"],"names":[],"mappings":"AAMA,OAAO,EAAW,KAAK,MAAM,EAAE,MAAM,cAAc,CAAC;AACpD,OAAO,
|
|
1
|
+
{"version":3,"file":"cases.d.ts","sourceRoot":"","sources":["../../src/eval/cases.ts"],"names":[],"mappings":"AAMA,OAAO,EAAW,KAAK,MAAM,EAAE,MAAM,cAAc,CAAC;AACpD,OAAO,EAKL,KAAK,WAAW,EAChB,KAAK,WAAW,EAChB,KAAK,IAAI,EACV,MAAM,aAAa,CAAC;AAErB,UAAU,YAAY;IACpB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC;IACpB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,MAAM,iBAAiB,GAAG,WAAW,GAAG,YAAY,GAAG;IAAE,QAAQ,CAAC,IAAI,EAAE,SAAS,CAAA;CAAE,CAAC;AAC1F,MAAM,MAAM,iBAAiB,GAAG,WAAW,GAAG,YAAY,GAAG;IAAE,QAAQ,CAAC,IAAI,EAAE,SAAS,CAAA;CAAE,CAAC;AAC1F,MAAM,MAAM,UAAU,GAAG,iBAAiB,GAAG,iBAAiB,CAAC;AAE/D,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B;AAED,wBAAsB,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,UAAU,EAAE,EAAE,aAAa,EAAE,CAAC,CAAC,CAwBhG;AAQD,wBAAgB,gBAAgB,CAC9B,KAAK,EAAE,SAAS,UAAU,EAAE,EAC5B,YAAY,EAAE,WAAW,CAAC,MAAM,CAAC,GAChC,aAAa,EAAE,CAajB"}
|
package/dist/eval/cases.js
CHANGED
|
@@ -3,7 +3,7 @@ import { join } from "node:path";
|
|
|
3
3
|
import yaml from "js-yaml";
|
|
4
4
|
import { formatZodIssues } from "../errors/zod.js";
|
|
5
5
|
import { err, ok } from "../result.js";
|
|
6
|
-
import { CaseFileSchema,
|
|
6
|
+
import { CaseFileSchema, caseExpectedSkills, } from "./schema.js";
|
|
7
7
|
export async function loadCases(casesDir) {
|
|
8
8
|
const files = await collectYamlFiles(casesDir);
|
|
9
9
|
const loaded = [];
|
|
@@ -15,21 +15,27 @@ export async function loadCases(casesDir) {
|
|
|
15
15
|
errors.push(parsed.error);
|
|
16
16
|
continue;
|
|
17
17
|
}
|
|
18
|
+
const { suite } = parsed.value;
|
|
18
19
|
for (const evalCase of parsed.value.cases) {
|
|
19
20
|
if (seenIds.has(evalCase.id)) {
|
|
20
21
|
errors.push({ file, message: `duplicate case id "${evalCase.id}"` });
|
|
21
22
|
continue;
|
|
22
23
|
}
|
|
23
24
|
seenIds.add(evalCase.id);
|
|
24
|
-
loaded.push(
|
|
25
|
+
loaded.push(locate(evalCase, suite, file));
|
|
25
26
|
}
|
|
26
27
|
}
|
|
27
28
|
return errors.length > 0 ? err(errors) : ok(loaded);
|
|
28
29
|
}
|
|
30
|
+
function locate(evalCase, suite, file) {
|
|
31
|
+
return "expect" in evalCase
|
|
32
|
+
? { ...evalCase, suite, file, tier: "routing" }
|
|
33
|
+
: { ...evalCase, suite, file, tier: "solving" };
|
|
34
|
+
}
|
|
29
35
|
export function unresolvedSkills(cases, installedIds) {
|
|
30
36
|
const errors = [];
|
|
31
37
|
for (const evalCase of cases) {
|
|
32
|
-
for (const id of
|
|
38
|
+
for (const id of caseExpectedSkills(evalCase)) {
|
|
33
39
|
if (!installedIds.has(id)) {
|
|
34
40
|
errors.push({
|
|
35
41
|
file: evalCase.file,
|
package/dist/eval/cases.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cases.js","sourceRoot":"","sources":["../../src/eval/cases.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,OAAO,IAAI,MAAM,SAAS,CAAC;AAE3B,OAAO,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AACnD,OAAO,EAAE,GAAG,EAAE,EAAE,EAAe,MAAM,cAAc,CAAC;AACpD,OAAO,EACL,cAAc,EACd,
|
|
1
|
+
{"version":3,"file":"cases.js","sourceRoot":"","sources":["../../src/eval/cases.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,OAAO,IAAI,MAAM,SAAS,CAAC;AAE3B,OAAO,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AACnD,OAAO,EAAE,GAAG,EAAE,EAAE,EAAe,MAAM,cAAc,CAAC;AACpD,OAAO,EACL,cAAc,EACd,kBAAkB,GAMnB,MAAM,aAAa,CAAC;AAiBrB,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,QAAgB;IAC9C,MAAM,KAAK,GAAG,MAAM,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IAC/C,MAAM,MAAM,GAAiB,EAAE,CAAC;IAChC,MAAM,MAAM,GAAoB,EAAE,CAAC;IACnC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,CAAC;QACrC,IAAI,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC;YACf,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YAC1B,SAAS;QACX,CAAC;QACD,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,CAAC,KAAK,CAAC;QAC/B,KAAK,MAAM,QAAQ,IAAI,MAAM,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;YAC1C,IAAI,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,CAAC;gBAC7B,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,sBAAsB,QAAQ,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC;gBACrE,SAAS;YACX,CAAC;YACD,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;YACzB,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC;QAC7C,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC;AACtD,CAAC;AAED,SAAS,MAAM,CAAC,QAAkB,EAAE,KAAa,EAAE,IAAY;IAC7D,OAAO,QAAQ,IAAI,QAAQ;QACzB,CAAC,CAAC,EAAE,GAAG,QAAQ,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE;QAC/C,CAAC,CAAC,EAAE,GAAG,QAAQ,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;AACpD,CAAC;AAED,MAAM,UAAU,gBAAgB,CAC9B,KAA4B,EAC5B,YAAiC;IAEjC,MAAM,MAAM,GAAoB,EAAE,CAAC;IACnC,KAAK,MAAM,QAAQ,IAAI,KAAK,EAAE,CAAC;QAC7B,KAAK,MAAM,EAAE,IAAI,kBAAkB,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC9C,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;gBAC1B,MAAM,CAAC,IAAI,CAAC;oBACV,IAAI,EAAE,QAAQ,CAAC,IAAI;oBACnB,OAAO,EAAE,SAAS,QAAQ,CAAC,EAAE,oBAAoB,EAAE,2BAA2B;iBAC/E,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,KAAK,UAAU,SAAS,CAAC,IAAY;IACnC,IAAI,GAAW,CAAC;IAChB,IAAI,CAAC;QACH,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IACrC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,GAAG,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,qBAAsB,KAAe,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;IACjF,CAAC;IACD,IAAI,GAAY,CAAC;IACjB,IAAI,CAAC;QACH,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACvB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,GAAG,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,iBAAkB,KAAe,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;IAC7E,CAAC;IACD,MAAM,MAAM,GAAG,cAAc,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;IAC7C,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,OAAO,GAAG,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,eAAe,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC1E,CAAC;IACD,OAAO,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;AACzB,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,GAAW;IACzC,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,OAAO,CAAC;IACZ,IAAI,CAAC;QACH,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;IACxD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;IACD,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;QACnC,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACxB,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChD,CAAC;aAAM,IAAI,KAAK,CAAC,MAAM,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC;YAC3F,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnB,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;AACtB,CAAC"}
|
package/dist/eval/detect.d.ts
CHANGED
|
@@ -9,6 +9,7 @@ interface Detector {
|
|
|
9
9
|
readonly done: boolean;
|
|
10
10
|
readonly result: (reason: ExitReason) => DetectionResult;
|
|
11
11
|
}
|
|
12
|
+
export declare function asRecord(value: unknown): Record<string, unknown>;
|
|
12
13
|
export declare function createDetector(stopAfter?: number): Detector;
|
|
13
14
|
export {};
|
|
14
15
|
//# sourceMappingURL=detect.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"detect.d.ts","sourceRoot":"","sources":["../../src/eval/detect.ts"],"names":[],"mappings":"AAEA,MAAM,MAAM,UAAU,GAAG,OAAO,GAAG,UAAU,GAAG,SAAS,GAAG,YAAY,CAAC;AAEzE,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,QAAQ,EAAE,SAAS,MAAM,EAAE,CAAC;IACrC,QAAQ,CAAC,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,QAAQ,CAAC,UAAU,EAAE,UAAU,CAAC;CACjC;AAED,UAAU,QAAQ;IAChB,QAAQ,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,CAAC;IACtC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC;IACvB,QAAQ,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,UAAU,KAAK,eAAe,CAAC;CAC1D;
|
|
1
|
+
{"version":3,"file":"detect.d.ts","sourceRoot":"","sources":["../../src/eval/detect.ts"],"names":[],"mappings":"AAEA,MAAM,MAAM,UAAU,GAAG,OAAO,GAAG,UAAU,GAAG,SAAS,GAAG,YAAY,CAAC;AAEzE,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,QAAQ,EAAE,SAAS,MAAM,EAAE,CAAC;IACrC,QAAQ,CAAC,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,QAAQ,CAAC,UAAU,EAAE,UAAU,CAAC;CACjC;AAED,UAAU,QAAQ;IAChB,QAAQ,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,CAAC;IACtC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC;IACvB,QAAQ,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,UAAU,KAAK,eAAe,CAAC;CAC1D;AAED,wBAAgB,QAAQ,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAEhE;AAoBD,wBAAgB,cAAc,CAAC,SAAS,SAAI,GAAG,QAAQ,CA6EtD"}
|
package/dist/eval/detect.js
CHANGED
package/dist/eval/detect.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"detect.js","sourceRoot":"","sources":["../../src/eval/detect.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,WAAW,CAAC;AAgBlC,
|
|
1
|
+
{"version":3,"file":"detect.js","sourceRoot":"","sources":["../../src/eval/detect.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,WAAW,CAAC;AAgBlC,MAAM,UAAU,QAAQ,CAAC,KAAc;IACrC,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,CAAC,CAAC,CAAE,KAAiC,CAAC,CAAC,CAAC,EAAE,CAAC;AAC/F,CAAC;AAED,SAAS,cAAc,CAAC,KAA8B;IACpD,OAAO,KAAK,CAAC,MAAM,CAAC,KAAK,UAAU,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,OAAO,CAAC;AACnE,CAAC;AAED,SAAS,SAAS,CAAC,KAAc;IAC/B,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC;IAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,SAAS,CAAC,CAAC;IACnD,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;AACvE,CAAC;AAED,SAAS,aAAa,CAAC,MAAc;IACnC,IAAI,CAAC;QACH,OAAO,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IACvC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,SAAS,GAAG,CAAC;IAC1C,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,IAAI,gBAAgB,GAAkB,IAAI,CAAC;IAC3C,IAAI,QAAQ,GAAG,KAAK,CAAC;IAErB,SAAS,MAAM,CAAC,EAAiB;QAC/B,IAAI,EAAE,KAAK,IAAI;YAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACnC,IAAI,QAAQ,CAAC,MAAM,IAAI,SAAS;YAAE,QAAQ,GAAG,IAAI,CAAC;QAClD,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,SAAS,YAAY;QACnB,IAAI,gBAAgB,KAAK,IAAI;YAAE,OAAO;QACtC,MAAM,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC,CAAC;QACxC,gBAAgB,GAAG,IAAI,CAAC;IAC1B,CAAC;IAED,OAAO;QACL,IAAI,IAAI;YACN,OAAO,QAAQ,CAAC;QAClB,CAAC;QACD,IAAI,CAAC,IAAY;YACf,IAAI,QAAQ;gBAAE,OAAO;YACrB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAC5B,IAAI,CAAC,OAAO;gBAAE,OAAO;YACrB,IAAI,MAAe,CAAC;YACpB,IAAI,CAAC;gBACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAC/B,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO;YACT,CAAC;YACD,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC;YAC/B,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;YAE3B,IAAI,IAAI,KAAK,cAAc,EAAE,CAAC;gBAC5B,MAAM,EAAE,GAAG,QAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;gBACpC,MAAM,MAAM,GAAG,EAAE,CAAC,MAAM,CAAC,CAAC;gBAE1B,IAAI,MAAM,KAAK,qBAAqB,EAAE,CAAC;oBACrC,IAAI,cAAc,CAAC,QAAQ,CAAC,EAAE,CAAC,eAAe,CAAC,CAAC,CAAC;wBAAE,gBAAgB,GAAG,EAAE,CAAC;gBAC3E,CAAC;qBAAM,IAAI,MAAM,KAAK,qBAAqB,IAAI,gBAAgB,KAAK,IAAI,EAAE,CAAC;oBACzE,MAAM,KAAK,GAAG,QAAQ,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;oBACpC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,kBAAkB,EAAE,CAAC;wBACzC,MAAM,OAAO,GAAG,KAAK,CAAC,cAAc,CAAC,CAAC;wBACtC,IAAI,OAAO,OAAO,KAAK,QAAQ;4BAAE,gBAAgB,IAAI,OAAO,CAAC;wBAC7D,IAAI,aAAa,CAAC,gBAAgB,CAAC,KAAK,IAAI;4BAAE,YAAY,EAAE,CAAC;oBAC/D,CAAC;gBACH,CAAC;qBAAM,IAAI,MAAM,KAAK,oBAAoB,EAAE,CAAC;oBAC3C,YAAY,EAAE,CAAC;gBACjB,CAAC;qBAAM,IAAI,MAAM,KAAK,cAAc,EAAE,CAAC;oBACrC,YAAY,EAAE,CAAC;oBACf,QAAQ,GAAG,IAAI,CAAC;gBAClB,CAAC;gBACD,OAAO;YACT,CAAC;YAED,IAAI,IAAI,KAAK,WAAW,EAAE,CAAC;gBACzB,MAAM,OAAO,GAAG,QAAQ,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;gBACtD,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;oBACzD,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;oBAC7B,IAAI,cAAc,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;wBAAE,OAAO;gBACzE,CAAC;gBACD,OAAO;YACT,CAAC;YAED,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;gBACtB,QAAQ,GAAG,IAAI,CAAC;YAClB,CAAC;QACH,CAAC;QACD,MAAM,CAAC,MAAkB;YACvB,OAAO;gBACL,QAAQ;gBACR,UAAU,EAAE,QAAQ,CAAC,CAAC,CAAC,IAAI,IAAI;gBAC/B,UAAU,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM;aACnD,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { SolvingCapture } from "./capture.js";
|
|
2
|
+
import type { Assertion } from "./schema.js";
|
|
3
|
+
export interface AssertionResult {
|
|
4
|
+
readonly assertion: Assertion;
|
|
5
|
+
readonly pass: boolean;
|
|
6
|
+
readonly evidence: string;
|
|
7
|
+
}
|
|
8
|
+
export declare function gradeAssertions(assertions: readonly Assertion[], capture: SolvingCapture): AssertionResult[];
|
|
9
|
+
//# sourceMappingURL=grade-deterministic.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"grade-deterministic.d.ts","sourceRoot":"","sources":["../../src/eval/grade-deterministic.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAEnD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAE7C,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,SAAS,EAAE,SAAS,CAAC;IAC9B,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC;IACvB,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;CAC3B;AAED,wBAAgB,eAAe,CAC7B,UAAU,EAAE,SAAS,SAAS,EAAE,EAChC,OAAO,EAAE,cAAc,GACtB,eAAe,EAAE,CAEnB"}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import { err, ok } from "../result.js";
|
|
2
|
+
import { asRecord } from "./detect.js";
|
|
3
|
+
export function gradeAssertions(assertions, capture) {
|
|
4
|
+
return assertions.map((assertion) => grade(assertion, capture));
|
|
5
|
+
}
|
|
6
|
+
function grade(assertion, capture) {
|
|
7
|
+
switch (assertion.kind) {
|
|
8
|
+
case "outputMatches":
|
|
9
|
+
return matchOutput(assertion, capture.outputText, {
|
|
10
|
+
whenPresent: () => pass(assertion, `matched "${assertion.pattern}"`),
|
|
11
|
+
whenAbsent: () => fail(assertion, `no match for "${assertion.pattern}"`),
|
|
12
|
+
});
|
|
13
|
+
case "outputExcludes":
|
|
14
|
+
return matchOutput(assertion, capture.outputText, {
|
|
15
|
+
whenPresent: (hit) => fail(assertion, `found forbidden "${hit}"`),
|
|
16
|
+
whenAbsent: () => pass(assertion, `absent: "${assertion.pattern}"`),
|
|
17
|
+
});
|
|
18
|
+
case "usedTool":
|
|
19
|
+
return tools(capture).includes(assertion.tool)
|
|
20
|
+
? pass(assertion, `used ${assertion.tool}`)
|
|
21
|
+
: fail(assertion, `tools used: [${tools(capture).join(", ")}]`);
|
|
22
|
+
case "didNotUseTool":
|
|
23
|
+
return tools(capture).includes(assertion.tool)
|
|
24
|
+
? fail(assertion, `unexpectedly used ${assertion.tool}`)
|
|
25
|
+
: pass(assertion, `did not use ${assertion.tool}`);
|
|
26
|
+
case "wroteFile":
|
|
27
|
+
return gradeWroteFile(assertion, capture);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
function matchOutput(assertion, text, outcome) {
|
|
31
|
+
const matcher = compileMatcher(assertion.pattern, assertion.regex);
|
|
32
|
+
if (!matcher.ok)
|
|
33
|
+
return fail(assertion, matcher.error);
|
|
34
|
+
const hit = matcher.value(text);
|
|
35
|
+
return hit === null ? outcome.whenAbsent() : outcome.whenPresent(hit);
|
|
36
|
+
}
|
|
37
|
+
function gradeWroteFile(assertion, capture) {
|
|
38
|
+
if (!wroteTo(capture, assertion.path)) {
|
|
39
|
+
return fail(assertion, `${assertion.path} not written (wrote: [${writtenPaths(capture).join(", ")}])`);
|
|
40
|
+
}
|
|
41
|
+
if (assertion.contentMatches === undefined) {
|
|
42
|
+
return pass(assertion, `wrote ${assertion.path}`);
|
|
43
|
+
}
|
|
44
|
+
const content = capture.writes.find((w) => w.path === assertion.path)?.content ?? "";
|
|
45
|
+
const matcher = compileMatcher(assertion.contentMatches, assertion.regex);
|
|
46
|
+
if (!matcher.ok)
|
|
47
|
+
return fail(assertion, matcher.error);
|
|
48
|
+
return matcher.value(content) !== null
|
|
49
|
+
? pass(assertion, `${assertion.path} content matched`)
|
|
50
|
+
: fail(assertion, `${assertion.path} content did not match "${assertion.contentMatches}"`);
|
|
51
|
+
}
|
|
52
|
+
function compileMatcher(pattern, regex) {
|
|
53
|
+
if (!regex) {
|
|
54
|
+
return ok((text) => (text.includes(pattern) ? pattern : null));
|
|
55
|
+
}
|
|
56
|
+
let compiled;
|
|
57
|
+
try {
|
|
58
|
+
compiled = new RegExp(pattern);
|
|
59
|
+
}
|
|
60
|
+
catch (cause) {
|
|
61
|
+
return err(`invalid regex "${pattern}": ${cause.message}`);
|
|
62
|
+
}
|
|
63
|
+
return ok((text) => compiled.exec(text)?.[0] ?? null);
|
|
64
|
+
}
|
|
65
|
+
function wroteTo(capture, path) {
|
|
66
|
+
return writtenPaths(capture).includes(path);
|
|
67
|
+
}
|
|
68
|
+
function writtenPaths(capture) {
|
|
69
|
+
return capture.trajectory.flatMap((call) => isWriteTool(call.name) ? writePathOf(call.input) : []);
|
|
70
|
+
}
|
|
71
|
+
function isWriteTool(name) {
|
|
72
|
+
return name === "Write" || name === "Edit";
|
|
73
|
+
}
|
|
74
|
+
function writePathOf(input) {
|
|
75
|
+
const path = asRecord(input)["file_path"];
|
|
76
|
+
return typeof path === "string" ? [path] : [];
|
|
77
|
+
}
|
|
78
|
+
function tools(capture) {
|
|
79
|
+
return capture.trajectory.map((call) => call.name);
|
|
80
|
+
}
|
|
81
|
+
function pass(assertion, evidence) {
|
|
82
|
+
return { assertion, pass: true, evidence };
|
|
83
|
+
}
|
|
84
|
+
function fail(assertion, evidence) {
|
|
85
|
+
return { assertion, pass: false, evidence };
|
|
86
|
+
}
|
|
87
|
+
//# sourceMappingURL=grade-deterministic.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"grade-deterministic.js","sourceRoot":"","sources":["../../src/eval/grade-deterministic.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,EAAE,EAAe,MAAM,cAAc,CAAC;AAEpD,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AASvC,MAAM,UAAU,eAAe,CAC7B,UAAgC,EAChC,OAAuB;IAEvB,OAAO,UAAU,CAAC,GAAG,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,KAAK,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC;AAClE,CAAC;AAED,SAAS,KAAK,CAAC,SAAoB,EAAE,OAAuB;IAC1D,QAAQ,SAAS,CAAC,IAAI,EAAE,CAAC;QACvB,KAAK,eAAe;YAClB,OAAO,WAAW,CAAC,SAAS,EAAE,OAAO,CAAC,UAAU,EAAE;gBAChD,WAAW,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,SAAS,EAAE,YAAY,SAAS,CAAC,OAAO,GAAG,CAAC;gBACpE,UAAU,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,SAAS,EAAE,iBAAiB,SAAS,CAAC,OAAO,GAAG,CAAC;aACzE,CAAC,CAAC;QAEL,KAAK,gBAAgB;YACnB,OAAO,WAAW,CAAC,SAAS,EAAE,OAAO,CAAC,UAAU,EAAE;gBAChD,WAAW,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,EAAE,oBAAoB,GAAG,GAAG,CAAC;gBACjE,UAAU,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,SAAS,EAAE,YAAY,SAAS,CAAC,OAAO,GAAG,CAAC;aACpE,CAAC,CAAC;QAEL,KAAK,UAAU;YACb,OAAO,KAAK,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC;gBAC5C,CAAC,CAAC,IAAI,CAAC,SAAS,EAAE,QAAQ,SAAS,CAAC,IAAI,EAAE,CAAC;gBAC3C,CAAC,CAAC,IAAI,CAAC,SAAS,EAAE,gBAAgB,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAEpE,KAAK,eAAe;YAClB,OAAO,KAAK,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC;gBAC5C,CAAC,CAAC,IAAI,CAAC,SAAS,EAAE,qBAAqB,SAAS,CAAC,IAAI,EAAE,CAAC;gBACxD,CAAC,CAAC,IAAI,CAAC,SAAS,EAAE,eAAe,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC;QAEvD,KAAK,WAAW;YACd,OAAO,cAAc,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;IAC9C,CAAC;AACH,CAAC;AAOD,SAAS,WAAW,CAClB,SAA2E,EAC3E,IAAY,EACZ,OAAqB;IAErB,MAAM,OAAO,GAAG,cAAc,CAAC,SAAS,CAAC,OAAO,EAAE,SAAS,CAAC,KAAK,CAAC,CAAC;IACnE,IAAI,CAAC,OAAO,CAAC,EAAE;QAAE,OAAO,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC;IACvD,MAAM,GAAG,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAChC,OAAO,GAAG,KAAK,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;AACxE,CAAC;AAED,SAAS,cAAc,CACrB,SAAoD,EACpD,OAAuB;IAEvB,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC;QACtC,OAAO,IAAI,CACT,SAAS,EACT,GAAG,SAAS,CAAC,IAAI,yBAAyB,YAAY,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAC/E,CAAC;IACJ,CAAC;IACD,IAAI,SAAS,CAAC,cAAc,KAAK,SAAS,EAAE,CAAC;QAC3C,OAAO,IAAI,CAAC,SAAS,EAAE,SAAS,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC;IACpD,CAAC;IACD,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,SAAS,CAAC,IAAI,CAAC,EAAE,OAAO,IAAI,EAAE,CAAC;IACrF,MAAM,OAAO,GAAG,cAAc,CAAC,SAAS,CAAC,cAAc,EAAE,SAAS,CAAC,KAAK,CAAC,CAAC;IAC1E,IAAI,CAAC,OAAO,CAAC,EAAE;QAAE,OAAO,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC;IACvD,OAAO,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,IAAI;QACpC,CAAC,CAAC,IAAI,CAAC,SAAS,EAAE,GAAG,SAAS,CAAC,IAAI,kBAAkB,CAAC;QACtD,CAAC,CAAC,IAAI,CAAC,SAAS,EAAE,GAAG,SAAS,CAAC,IAAI,2BAA2B,SAAS,CAAC,cAAc,GAAG,CAAC,CAAC;AAC/F,CAAC;AAID,SAAS,cAAc,CAAC,OAAe,EAAE,KAAc;IACrD,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,OAAO,EAAE,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IACjE,CAAC;IACD,IAAI,QAAgB,CAAC;IACrB,IAAI,CAAC;QACH,QAAQ,GAAG,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC;IACjC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,GAAG,CAAC,kBAAkB,OAAO,MAAO,KAAe,CAAC,OAAO,EAAE,CAAC,CAAC;IACxE,CAAC;IACD,OAAO,EAAE,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC;AACxD,CAAC;AAED,SAAS,OAAO,CAAC,OAAuB,EAAE,IAAY;IACpD,OAAO,YAAY,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;AAC9C,CAAC;AAED,SAAS,YAAY,CAAC,OAAuB;IAC3C,OAAO,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE,CACzC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CACtD,CAAC;AACJ,CAAC;AAED,SAAS,WAAW,CAAC,IAAY;IAC/B,OAAO,IAAI,KAAK,OAAO,IAAI,IAAI,KAAK,MAAM,CAAC;AAC7C,CAAC;AAED,SAAS,WAAW,CAAC,KAAc;IACjC,MAAM,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,WAAW,CAAC,CAAC;IAC1C,OAAO,OAAO,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;AAChD,CAAC;AAED,SAAS,KAAK,CAAC,OAAuB;IACpC,OAAO,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACrD,CAAC;AAED,SAAS,IAAI,CAAC,SAAoB,EAAE,QAAgB;IAClD,OAAO,EAAE,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;AAC7C,CAAC;AAED,SAAS,IAAI,CAAC,SAAoB,EAAE,QAAgB;IAClD,OAAO,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AAC9C,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { Judge, Verdict } from "./judge.js";
|
|
2
|
+
import type { Rubric } from "./schema.js";
|
|
3
|
+
export interface DimensionResult {
|
|
4
|
+
readonly dimension: string;
|
|
5
|
+
readonly verdict: Verdict;
|
|
6
|
+
}
|
|
7
|
+
export interface RubricResult {
|
|
8
|
+
readonly dimensions: readonly DimensionResult[];
|
|
9
|
+
readonly pass: boolean;
|
|
10
|
+
}
|
|
11
|
+
export declare function gradeRubric(rubric: Rubric, output: string, judge: Judge): Promise<RubricResult>;
|
|
12
|
+
//# sourceMappingURL=grade-judge.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"grade-judge.d.ts","sourceRoot":"","sources":["../../src/eval/grade-judge.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,YAAY,CAAC;AACjD,OAAO,KAAK,EAAe,MAAM,EAAE,MAAM,aAAa,CAAC;AAEvD,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,OAAO,EAAE,OAAO,CAAC;CAC3B;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,UAAU,EAAE,SAAS,eAAe,EAAE,CAAC;IAChD,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC;CACxB;AAED,wBAAsB,WAAW,CAC/B,MAAM,EAAE,MAAM,EACd,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,KAAK,GACX,OAAO,CAAC,YAAY,CAAC,CAQvB"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export async function gradeRubric(rubric, output, judge) {
|
|
2
|
+
const dimensions = await Promise.all(rubric.dimensions.map(async ({ dimension, criterion }) => ({
|
|
3
|
+
dimension,
|
|
4
|
+
verdict: await judge({ dimension, criterion, output }),
|
|
5
|
+
})));
|
|
6
|
+
return { dimensions, pass: combine(rubric.combine, dimensions) };
|
|
7
|
+
}
|
|
8
|
+
function combine(rule, dimensions) {
|
|
9
|
+
const passed = dimensions.filter((d) => d.verdict.pass === true).length;
|
|
10
|
+
if (rule.combine === "all")
|
|
11
|
+
return passed === dimensions.length;
|
|
12
|
+
return dimensions.length > 0 && passed / dimensions.length >= rule.threshold;
|
|
13
|
+
}
|
|
14
|
+
//# sourceMappingURL=grade-judge.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"grade-judge.js","sourceRoot":"","sources":["../../src/eval/grade-judge.ts"],"names":[],"mappings":"AAaA,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,MAAc,EACd,MAAc,EACd,KAAY;IAEZ,MAAM,UAAU,GAAG,MAAM,OAAO,CAAC,GAAG,CAClC,MAAM,CAAC,UAAU,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,EAAE,EAAE,CAAC,CAAC;QACzD,SAAS;QACT,OAAO,EAAE,MAAM,KAAK,CAAC,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC;KACvD,CAAC,CAAC,CACJ,CAAC;IACF,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,UAAU,CAAC,EAAE,CAAC;AACnE,CAAC;AAED,SAAS,OAAO,CAAC,IAAiB,EAAE,UAAsC;IACxE,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC,MAAM,CAAC;IACxE,IAAI,IAAI,CAAC,OAAO,KAAK,KAAK;QAAE,OAAO,MAAM,KAAK,UAAU,CAAC,MAAM,CAAC;IAChE,OAAO,UAAU,CAAC,MAAM,GAAG,CAAC,IAAI,MAAM,GAAG,UAAU,CAAC,MAAM,IAAI,IAAI,CAAC,SAAS,CAAC;AAC/E,CAAC"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { Judge } from "./judge.js";
|
|
2
|
+
import type { CaseReport } from "./report.js";
|
|
3
|
+
import type { CaseResult } from "./runner.js";
|
|
4
|
+
export declare function gradeResults(results: readonly CaseResult[], judge: Judge | undefined): Promise<CaseReport[]>;
|
|
5
|
+
//# sourceMappingURL=grade.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"grade.d.ts","sourceRoot":"","sources":["../../src/eval/grade.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACxC,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAC9C,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAI9C,wBAAsB,YAAY,CAChC,OAAO,EAAE,SAAS,UAAU,EAAE,EAC9B,KAAK,EAAE,KAAK,GAAG,SAAS,GACvB,OAAO,CAAC,UAAU,EAAE,CAAC,CAEvB"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { gradeAssertions } from "./grade-deterministic.js";
|
|
2
|
+
import { gradeRubric } from "./grade-judge.js";
|
|
3
|
+
import { scoreCase, scoreSolving } from "./score.js";
|
|
4
|
+
export async function gradeResults(results, judge) {
|
|
5
|
+
return Promise.all(results.map(async (result) => gradeOne(result, judge)));
|
|
6
|
+
}
|
|
7
|
+
async function gradeOne(result, judge) {
|
|
8
|
+
if (result.tier === "routing") {
|
|
9
|
+
const { evalCase, runs } = result;
|
|
10
|
+
return { evalCase, score: scoreCase(evalCase.expect, runs, evalCase.threshold) };
|
|
11
|
+
}
|
|
12
|
+
const { evalCase, captures } = result;
|
|
13
|
+
const perRun = await Promise.all(captures.map(async (capture) => gradeRun(evalCase.assert, evalCase.rubric, capture, judge)));
|
|
14
|
+
return {
|
|
15
|
+
evalCase,
|
|
16
|
+
score: scoreSolving(perRun, evalCase.threshold),
|
|
17
|
+
solving: { perRun },
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
async function gradeRun(assertions, rubric, capture, judge) {
|
|
21
|
+
const graded = gradeAssertions(assertions, capture);
|
|
22
|
+
const rubricResult = rubric && judge ? await gradeRubric(rubric, capture.outputText, judge) : null;
|
|
23
|
+
return { assertions: graded, rubric: rubricResult };
|
|
24
|
+
}
|
|
25
|
+
//# sourceMappingURL=grade.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"grade.js","sourceRoot":"","sources":["../../src/eval/grade.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,MAAM,0BAA0B,CAAC;AAC3D,OAAO,EAAE,WAAW,EAAqB,MAAM,kBAAkB,CAAC;AAIlE,OAAO,EAAE,SAAS,EAAE,YAAY,EAAyB,MAAM,YAAY,CAAC;AAG5E,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,OAA8B,EAC9B,KAAwB;IAExB,OAAO,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC;AAC7E,CAAC;AAED,KAAK,UAAU,QAAQ,CAAC,MAAkB,EAAE,KAAwB;IAClE,IAAI,MAAM,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;QAC9B,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,GAAG,MAAM,CAAC;QAClC,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,SAAS,CAAC,QAAQ,CAAC,MAAM,EAAE,IAAI,EAAE,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;IACnF,CAAC;IAED,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,MAAM,CAAC;IACtC,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,GAAG,CAC9B,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC,CAC5F,CAAC;IACF,OAAO;QACL,QAAQ;QACR,KAAK,EAAE,YAAY,CAAC,MAAM,EAAE,QAAQ,CAAC,SAAS,CAAC;QAC/C,OAAO,EAAE,EAAE,MAAM,EAAE;KACpB,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,QAAQ,CACrB,UAAgC,EAChC,MAA0B,EAC1B,OAAuB,EACvB,KAAwB;IAExB,MAAM,MAAM,GAAG,eAAe,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;IACpD,MAAM,YAAY,GAChB,MAAM,IAAI,KAAK,CAAC,CAAC,CAAC,MAAM,WAAW,CAAC,MAAM,EAAE,OAAO,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAChF,OAAO,EAAE,UAAU,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC;AACtD,CAAC"}
|
package/dist/eval/index.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { type Result } from "../result.js";
|
|
2
2
|
import { type CaseLoadError, type LoadedCase } from "./cases.js";
|
|
3
|
+
import { type Judge } from "./judge.js";
|
|
3
4
|
import { type EvalReport } from "./report.js";
|
|
4
5
|
import { type RunnerOptions } from "./runner.js";
|
|
5
6
|
export interface EvalOptions {
|
|
@@ -11,6 +12,9 @@ export interface EvalOptions {
|
|
|
11
12
|
readonly runs?: number;
|
|
12
13
|
readonly concurrency?: number;
|
|
13
14
|
readonly model?: string;
|
|
15
|
+
readonly judgeModel?: string;
|
|
16
|
+
readonly solvingTimeoutMs?: number;
|
|
17
|
+
readonly judge?: Judge;
|
|
14
18
|
readonly onRun?: RunnerOptions["onRun"];
|
|
15
19
|
}
|
|
16
20
|
export declare function runEval(options: EvalOptions): Promise<Result<EvalReport, CaseLoadError[]>>;
|
package/dist/eval/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/eval/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAW,KAAK,MAAM,EAAE,MAAM,cAAc,CAAC;AACpD,OAAO,EAA+B,KAAK,aAAa,EAAE,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/eval/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAW,KAAK,MAAM,EAAE,MAAM,cAAc,CAAC;AACpD,OAAO,EAA+B,KAAK,aAAa,EAAE,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC;AAE9F,OAAO,EAAwB,KAAK,KAAK,EAAE,MAAM,YAAY,CAAC;AAC9D,OAAO,EAAe,KAAK,UAAU,EAAE,MAAM,aAAa,CAAC;AAC3D,OAAO,EAAY,KAAK,aAAa,EAAE,MAAM,aAAa,CAAC;AAI3D,MAAM,WAAW,WAAW;IAC1B,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,IAAI,CAAC,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;IACnC,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,gBAAgB,CAAC,EAAE,MAAM,CAAC;IACnC,QAAQ,CAAC,KAAK,CAAC,EAAE,KAAK,CAAC;IACvB,QAAQ,CAAC,KAAK,CAAC,EAAE,aAAa,CAAC,OAAO,CAAC,CAAC;CACzC;AAED,wBAAsB,OAAO,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,UAAU,EAAE,aAAa,EAAE,CAAC,CAAC,CA6BhG;AAkCD,YAAY,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAC1D,OAAO,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACpD,YAAY,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC"}
|