@jean.gnc/harness-kit 0.12.7 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/README.md +43 -0
  2. package/dist/cli.js +13 -1
  3. package/dist/cli.js.map +1 -1
  4. package/dist/eval/capture.d.ts +23 -0
  5. package/dist/eval/capture.d.ts.map +1 -0
  6. package/dist/eval/capture.js +79 -0
  7. package/dist/eval/capture.js.map +1 -0
  8. package/dist/eval/cases.d.ts +10 -2
  9. package/dist/eval/cases.d.ts.map +1 -1
  10. package/dist/eval/cases.js +9 -3
  11. package/dist/eval/cases.js.map +1 -1
  12. package/dist/eval/detect.d.ts +1 -0
  13. package/dist/eval/detect.d.ts.map +1 -1
  14. package/dist/eval/detect.js +1 -1
  15. package/dist/eval/detect.js.map +1 -1
  16. package/dist/eval/grade-deterministic.d.ts +9 -0
  17. package/dist/eval/grade-deterministic.d.ts.map +1 -0
  18. package/dist/eval/grade-deterministic.js +87 -0
  19. package/dist/eval/grade-deterministic.js.map +1 -0
  20. package/dist/eval/grade-judge.d.ts +12 -0
  21. package/dist/eval/grade-judge.d.ts.map +1 -0
  22. package/dist/eval/grade-judge.js +14 -0
  23. package/dist/eval/grade-judge.js.map +1 -0
  24. package/dist/eval/grade.d.ts +5 -0
  25. package/dist/eval/grade.d.ts.map +1 -0
  26. package/dist/eval/grade.js +25 -0
  27. package/dist/eval/grade.js.map +1 -0
  28. package/dist/eval/index.d.ts +4 -0
  29. package/dist/eval/index.d.ts.map +1 -1
  30. package/dist/eval/index.js +27 -5
  31. package/dist/eval/index.js.map +1 -1
  32. package/dist/eval/judge.d.ts +26 -0
  33. package/dist/eval/judge.d.ts.map +1 -0
  34. package/dist/eval/judge.js +55 -0
  35. package/dist/eval/judge.js.map +1 -0
  36. package/dist/eval/report.d.ts +5 -1
  37. package/dist/eval/report.d.ts.map +1 -1
  38. package/dist/eval/report.js +66 -13
  39. package/dist/eval/report.js.map +1 -1
  40. package/dist/eval/runner.d.ts +13 -5
  41. package/dist/eval/runner.d.ts.map +1 -1
  42. package/dist/eval/runner.js +105 -31
  43. package/dist/eval/runner.js.map +1 -1
  44. package/dist/eval/schema.d.ts +644 -29
  45. package/dist/eval/schema.d.ts.map +1 -1
  46. package/dist/eval/schema.js +57 -6
  47. package/dist/eval/schema.js.map +1 -1
  48. package/dist/eval/score.d.ts +8 -0
  49. package/dist/eval/score.d.ts.map +1 -1
  50. package/dist/eval/score.js +17 -0
  51. package/dist/eval/score.js.map +1 -1
  52. package/dist/skill/includes.d.ts +4 -0
  53. package/dist/skill/includes.d.ts.map +1 -1
  54. package/dist/skill/includes.js +38 -32
  55. package/dist/skill/includes.js.map +1 -1
  56. package/package.json +2 -1
package/README.md CHANGED
@@ -211,10 +211,53 @@ harness lint # lint compiled markdown under dist/
211
211
  harness check # validate plugin references against local + installed sources
212
212
  harness install # link configs + register plugins per declared vendor (--mode=local|remote)
213
213
  harness uninstall # remove installed plugins per declared vendor
214
+ harness eval # run routing + solving evals against the installed harness
214
215
  ```
215
216
 
216
217
  → Full flag reference, bundled lint rules, and `package.json` integration: [docs/cli.md](./docs/cli.md).
217
218
 
219
+ ## Evals
220
+
221
+ `harness eval` runs YAML cases in two tiers. A case file declares its `tier`, and the two
222
+ tiers carry mutually exclusive keys.
223
+
224
+ **Routing** — *did the right skill fire?* The session is killed on the first `Skill` tool_use;
225
+ the detector scores against an `expect` clause (`first` / `anyOf` / `path` / `noSkill`).
226
+
227
+ **Solving** — *did the agent produce the right thing?* The session runs to completion; the
228
+ final output, tool trajectory, and written files are graded by **deterministic assertions**
229
+ (all must pass) plus an optional **LLM-judge rubric** (one isolated call per dimension).
230
+
231
+ ```yaml
232
+ suite: docs
233
+ tier: solving
234
+ cases:
235
+ - id: writes-a-readme
236
+ prompt: "Create a README.md describing this project."
237
+ expectSkill: dev-tools:typescript # optional; validated against installed skills
238
+ assert:
239
+ - { kind: wroteFile, path: README.md, contentMatches: "## " }
240
+ - { kind: usedTool, tool: Write }
241
+ - { kind: didNotUseTool, tool: Bash }
242
+ - { kind: outputMatches, pattern: "README", regex: false }
243
+ rubric:
244
+ combine: { combine: fraction, threshold: 0.5 } # or { combine: all }
245
+ dimensions:
246
+ - { dimension: clarity, criterion: "The README explains what the project does." }
247
+ - { dimension: structure, criterion: "The README has clear sections." }
248
+ ```
249
+
250
+ A solving run passes when every assertion passes **and** the rubric meets its combine rule;
251
+ across `runs`, the case passes when the pass rate meets `threshold` (default `runs` is 1).
252
+
253
+ Routing needs no API key. Solving cases that declare a `rubric` use `claude` for the judge —
254
+ set `ANTHROPIC_API_KEY` (the run errors clearly if absent). The judge model is separate from
255
+ the session model:
256
+
257
+ ```sh
258
+ harness eval --tier solving --model claude-opus-4-8 --judge-model claude-sonnet-4-5
259
+ ```
260
+
218
261
  ## Programmatic API
219
262
 
220
263
  Everything the CLI does is also a typed module API. See [docs/api.md](./docs/api.md).
package/dist/cli.js CHANGED
@@ -197,7 +197,7 @@ function parsePositiveInt(value, flag) {
197
197
  const evalCmd = defineCommand({
198
198
  meta: {
199
199
  name: "eval",
200
- description: "Run skill-routing evals against the installed harness",
200
+ description: "Run routing (did the right skill fire?) and solving (graded behavior) evals",
201
201
  },
202
202
  args: {
203
203
  cases: {
@@ -219,6 +219,14 @@ const evalCmd = defineCommand({
219
219
  type: "string",
220
220
  description: "model for claude -p (default: user's configured model)",
221
221
  },
222
+ "judge-model": {
223
+ type: "string",
224
+ description: "model for the solving-tier LLM judge (default: claude-sonnet-4-5)",
225
+ },
226
+ "solving-timeout": {
227
+ type: "string",
228
+ description: "per-case timeout in seconds for solving sessions (default: 300)",
229
+ },
222
230
  json: { type: "string", description: "write machine-readable results to this path" },
223
231
  },
224
232
  run: async ({ args }) => {
@@ -231,6 +239,10 @@ const evalCmd = defineCommand({
231
239
  ...(args.tier !== undefined && { tier: parseTier(args.tier) }),
232
240
  ...(args.runs !== undefined && { runs: parsePositiveInt(args.runs, "runs") }),
233
241
  ...(args.model !== undefined && { model: args.model }),
242
+ ...(args["judge-model"] !== undefined && { judgeModel: args["judge-model"] }),
243
+ ...(args["solving-timeout"] !== undefined && {
244
+ solvingTimeoutMs: parsePositiveInt(args["solving-timeout"], "solving-timeout") * 1000,
245
+ }),
234
246
  });
235
247
  if (!result.ok) {
236
248
  for (const e of result.error)
package/dist/cli.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAEzC,OAAO,EAAE,aAAa,EAAE,OAAO,EAAE,MAAM,OAAO,CAAC;AAC/C,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AACpD,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAEpC,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EACL,WAAW,EACX,KAAK,GAIN,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,aAAa,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AACjE,OAAO,EAAE,KAAK,EAAa,MAAM,kBAAkB,CAAC;AACpD,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAC9C,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AACxD,OAAO,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,cAAc,EAAE,qBAAqB,EAAE,MAAM,sBAAsB,CAAC;AAE7E,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;AAEnE,MAAM,OAAO,GAAG,aAAa,CAAC,IAAI,GAAG,CAAC,iBAAiB,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAC3E,MAAM,GAAG,GAAG,iBAAiB,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC;AAE/E,SAAS,WAAW,CAAC,KAAa;IAChC,OAAQ,WAAiC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;AAC5D,CAAC;AAED,SAAS,cAAc,CAAC,KAAa;IACnC,IAAI,WAAW,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IACrC,MAAM,IAAI,KAAK,CAAC,uBAAuB,KAAK,aAAa,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;AACrF,CAAC;AAED,MAAM,UAAU,GAAG,aAAa,CAAC;IAC/B,IAAI,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,kCAAkC,EAAE;IAC1E,IAAI,EAAE;QACJ,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE;QACrE,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,aAAa,EAAE;QACtE,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,sCAAsC,EAAE;QAC3F,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,sBAAsB,EAAE;KACjF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,OAAO,CAAC;YACZ,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,MAAM,EAAE,IAAI,CAAC,MAAM;SACpB,CAAC,CAAC;IACL,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,WAAW,GAAG;IAClB,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,WAAW,EAAE;IACrE,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,sCAAsC,EAAE;IAC3F,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,sBAAsB,EAAE;IAChF,SAAS,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,6BAA6B,EAAE;CAClF,CAAC;AAEX,MAAM,UAAU,GAAG,aAAa,CAAC;IAC/B,IAAI,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,qDAAqD,EAAE;IAC7F,IAAI,EAAE;QACJ,GAAG,WAAW;QACd,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,gCAAgC,EAAE;KAC1F;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,IAAI,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACzC,MAAM,OAAO,GAAG,MAAM,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvD,MAAM,OAAO,CAAC;YACZ,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,OAAO;YACP,IAAI;YACJ,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC;SACxB,CAAC,CAAC;IACL,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,YAAY,GAAG,aAAa,CAAC;IACjC,IAAI,EAAE,EAAE,IAAI,EAAE,WAAW,EAAE,WAAW,EAAE,8CAA8C,EAAE;IACxF,IAAI,EAAE,WAAW;IACjB,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,OAAO,GAAG,MAAM,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvD,MAAM,SAAS,CAAC;YACd,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,OAAO;YACP,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC;SACxB,CAAC,CAAC;IACL,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,OAAO,GAAG,aAAa,CAAC;IAC5B,IAAI,EAAE;QACJ,IAAI,EAAE,MAAM;QACZ,WAAW,EAAE,2EAA2E;KACzF;IACD,IAAI,EAAE;QACJ,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,4BAA4B,EAAE;QACjF,WAAW,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAE,WAAW,EAAE,kBAAkB,EAAE;QAChF,OAAO,EAAE;YACP,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;YACd,WAAW,EAAE,mCAAmC,cAAc,EAAE;iBAC7D,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;iBAClB,IAAI,CAAC,IAAI,CAAC,GAAG;SACjB;QACD,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,sBAAsB,EAAE;KACjF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO;aACzB,KAAK,CAAC,GAAG,CAAC;aACV,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;aACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC/B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;QACrF,cAAc,CAAC,OAAO,EAAE,cAAc,EAAE,CAAC,CAAC;QAC1C,MAAM,WAAW,CAAC;YAChB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,OAAO;YACP,MAAM,EAAE,IAAI,CAAC,MAAM;SACpB,CAAC,CAAC;IACL,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,QAAQ,GAAG,aAAa,CAAC;IAC7B,IAAI,EAAE;QACJ,IAAI,EAAE,OAAO;QACb,WAAW,EAAE,8DAA8D;KAC5E;IACD,IAAI,EAAE;QACJ,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE;QACrE,IAAI,EAAE;YACJ,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,WAAW;YACpB,WAAW,EAAE,2CAA2C;SACzD;QACD,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,2BAA2B,EAAE;KACtF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvC,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC,EAAE,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;QACxD,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;YACjB,MAAM,SAAS,GAAG,MAAM,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC7F,MAAM,KAAK,GAAG,MAAM,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;YAC9E,OAAO,CAAC,GAAG,CACT,WAAW,KAAK,kBAAkB,MAAM,CAAC,cAAc,CAAC,MAAM,aAAa,SAAS,GAAG,CACxF,CAAC;YACF,OAAO,CAAC,GAAG,CAAC,WAAW,MAAM,CAAC,YAAY,eAAe,CAAC,CAAC;YAC3D,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC;gBAAE,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAClD,CAAC;QACD,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YAChC,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC;QAChC,CAAC;QACD,IAAI,CAAC,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC/C,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAChB,OAAO,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,WAAW,CAAC,CAAC;QACpD,CAAC;QACD,IAAI,CAAC,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAClE,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;YAClC,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,CAAC;QAClC,CAAC;QACD,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;gBACjB,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;gBAChB,OAAO,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,aAAa,CAAC,CAAC;YACxD,CAAC;YACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;CACF,CAAC,CAAC;AAEH,SAAS,eAAe,CAAC,CAAqB;IAC5C,OAAO,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,MAAM,OAAO,CAAC,CAAC,KAAK,QAAQ,CAAC,CAAC,OAAO,EAAE,CAAC;AAC1E,CAAC;AAED,SAAS,aAAa,CAAC,CAAgB;IACrC,OAAO,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,MAAM,cAAc,CAAC,CAAC,OAAO,EAAE,CAAC;AAClE,CAAC;AAED,MAAM,OAAO,GAAG,aAAa,CAAC;IAC5B,IAAI,EAAE;QACJ,IAAI,EAAE,MAAM;QACZ,WAAW,EAAE,qEAAqE;KACnF;IACD,IAAI,EAAE;QACJ,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,aAAa,EAAE;QACtE,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,2BAA2B,EAAE;KACtF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,EAAE,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;QACtE,IAAI,MAAM,CAAC,UAAU,GAAG,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC7C,CAAC;CACF,CAAC,CAAC;AAEH,SAAS,SAAS,CAAC,KAAa;IAC9B,IAAK,KAA2B,CAAC,QAAQ,CAAC,KAAK,CAAC;QAAE,OAAO,KAAa,CAAC;IACvE,MAAM,IAAI,KAAK,CAAC,iBAAiB,KAAK,aAAa,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;AACzE,CAAC;AAED,SAAS,gBAAgB,CAAC,KAAa,EAAE,IAAY;IACnD,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;IAC7B,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,KAAK,IAAI,qCAAqC,KAAK,GAAG,CAAC,CAAC;IAC1E,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,OAAO,GAAG,aAAa,CAAC;IAC5B,IAAI,EAAE;QACJ,IAAI,EAAE,MAAM;QACZ,WAAW,EAAE,uDAAuD;KACrE;IACD,IAAI,EAAE;QACJ,KAAK,EAAE;YACL,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,eAAe;YACxB,WAAW,EAAE,8BAA8B;SAC5C;QACD,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,0CAA0C,EAAE;QAC9F,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,gCAAgC,EAAE;QACxE,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,gCAAgC,EAAE;QACvE,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,uBAAuB,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,EAAE;QACjF,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,4CAA4C,EAAE;QACnF,WAAW,EAAE;YACX,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,GAAG;YACZ,WAAW,EAAE,+EAA+E;SAC7F;QACD,KAAK,EAAE;YACL,IAAI,EAAE,QAAQ;YACd,WAAW,EAAE,wDAAwD;SACtE;QACD,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,6CAA6C,EAAE;KACrF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC;YAC3B,QAAQ,EAAE,IAAI,CAAC,KAAK;YACpB,GAAG,EAAE,IAAI,CAAC,GAAG;YACb,WAAW,EAAE,gBAAgB,CAAC,IAAI,CAAC,WAAW,EAAE,aAAa,CAAC;YAC9D,GAAG,CAAC,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC;YACtD,GAAG,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC;YACrD,GAAG,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,EAAE,IAAI,EAAE,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC9D,GAAG,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,EAAE,IAAI,EAAE,gBAAgB,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,EAAE,CAAC;YAC7E,GAAG,CAAC,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC;SACvD,CAAC,CAAC;QAEH,IAAI,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC;YACf,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,KAAK;gBAAE,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;YACvE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;QACzC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YACd,MAAM,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YACrD,MAAM,SAAS,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,CAAC;QAC1D,CAAC;QACD,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC/C,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,IAAI,GAAG,aAAa,CAAC;IACzB,IAAI,EAAE;QACJ,IAAI,EAAE,SAAS;QACf,OAAO,EAAE,GAAG,CAAC,OAAO;QACpB,WAAW,EACT,yFAAyF;KAC5F;IACD,WAAW,EAAE;QACX,KAAK,EAAE,QAAQ;QACf,OAAO,EAAE,UAAU;QACnB,IAAI,EAAE,OAAO;QACb,IAAI,EAAE,OAAO;QACb,OAAO,EAAE,UAAU;QACnB,IAAI,EAAE,OAAO;QACb,SAAS,EAAE,YAAY;KACxB;CACF,CAAC,CAAC;AAEH,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC"}
1
+ {"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAEzC,OAAO,EAAE,aAAa,EAAE,OAAO,EAAE,MAAM,OAAO,CAAC;AAC/C,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AACpD,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAEpC,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EACL,WAAW,EACX,KAAK,GAIN,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,aAAa,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AACjE,OAAO,EAAE,KAAK,EAAa,MAAM,kBAAkB,CAAC;AACpD,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAC9C,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AACxD,OAAO,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,cAAc,EAAE,qBAAqB,EAAE,MAAM,sBAAsB,CAAC;AAE7E,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;AAEnE,MAAM,OAAO,GAAG,aAAa,CAAC,IAAI,GAAG,CAAC,iBAAiB,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAC3E,MAAM,GAAG,GAAG,iBAAiB,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC;AAE/E,SAAS,WAAW,CAAC,KAAa;IAChC,OAAQ,WAAiC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;AAC5D,CAAC;AAED,SAAS,cAAc,CAAC,KAAa;IACnC,IAAI,WAAW,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IACrC,MAAM,IAAI,KAAK,CAAC,uBAAuB,KAAK,aAAa,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;AACrF,CAAC;AAED,MAAM,UAAU,GAAG,aAAa,CAAC;IAC/B,IAAI,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,kCAAkC,EAAE;IAC1E,IAAI,EAAE;QACJ,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE;QACrE,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,aAAa,EAAE;QACtE,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,sCAAsC,EAAE;QAC3F,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,sBAAsB,EAAE;KACjF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,OAAO,CAAC;YACZ,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,MAAM,EAAE,IAAI,CAAC,MAAM;SACpB,CAAC,CAAC;IACL,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,WAAW,GAAG;IAClB,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,WAAW,EAAE;IACrE,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,sCAAsC,EAAE;IAC3F,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,sBAAsB,EAAE;IAChF,SAAS,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,6BAA6B,EAAE;CAClF,CAAC;AAEX,MAAM,UAAU,GAAG,aAAa,CAAC;IAC/B,IAAI,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,qDAAqD,EAAE;IAC7F,IAAI,EAAE;QACJ,GAAG,WAAW;QACd,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,gCAAgC,EAAE;KAC1F;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,IAAI,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACzC,MAAM,OAAO,GAAG,MAAM,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvD,MAAM,OAAO,CAAC;YACZ,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,OAAO;YACP,IAAI;YACJ,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC;SACxB,CAAC,CAAC;IACL,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,YAAY,GAAG,aAAa,CAAC;IACjC,IAAI,EAAE,EAAE,IAAI,EAAE,WAAW,EAAE,WAAW,EAAE,8CAA8C,EAAE;IACxF,IAAI,EAAE,WAAW;IACjB,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,OAAO,GAAG,MAAM,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvD,MAAM,SAAS,CAAC;YACd,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,OAAO;YACP,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC;SACxB,CAAC,CAAC;IACL,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,OAAO,GAAG,aAAa,CAAC;IAC5B,IAAI,EAAE;QACJ,IAAI,EAAE,MAAM;QACZ,WAAW,EAAE,2EAA2E;KACzF;IACD,IAAI,EAAE;QACJ,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,4BAA4B,EAAE;QACjF,WAAW,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAE,WAAW,EAAE,kBAAkB,EAAE;QAChF,OAAO,EAAE;YACP,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;YACd,WAAW,EAAE,mCAAmC,cAAc,EAAE;iBAC7D,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;iBAClB,IAAI,CAAC,IAAI,CAAC,GAAG;SACjB;QACD,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,sBAAsB,EAAE;KACjF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO;aACzB,KAAK,CAAC,GAAG,CAAC;aACV,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;aACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC/B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;QACrF,cAAc,CAAC,OAAO,EAAE,cAAc,EAAE,CAAC,CAAC;QAC1C,MAAM,WAAW,CAAC;YAChB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,OAAO;YACP,MAAM,EAAE,IAAI,CAAC,MAAM;SACpB,CAAC,CAAC;IACL,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,QAAQ,GAAG,aAAa,CAAC;IAC7B,IAAI,EAAE;QACJ,IAAI,EAAE,OAAO;QACb,WAAW,EAAE,8DAA8D;KAC5E;IACD,IAAI,EAAE;QACJ,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE;QACrE,IAAI,EAAE;YACJ,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,WAAW;YACpB,WAAW,EAAE,2CAA2C;SACzD;QACD,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,2BAA2B,EAAE;KACtF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvC,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC,EAAE,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;QACxD,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;YACjB,MAAM,SAAS,GAAG,MAAM,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC7F,MAAM,KAAK,GAAG,MAAM,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;YAC9E,OAAO,CAAC,GAAG,CACT,WAAW,KAAK,kBAAkB,MAAM,CAAC,cAAc,CAAC,MAAM,aAAa,SAAS,GAAG,CACxF,CAAC;YACF,OAAO,CAAC,GAAG,CAAC,WAAW,MAAM,CAAC,YAAY,eAAe,CAAC,CAAC;YAC3D,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC;gBAAE,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAClD,CAAC;QACD,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YAChC,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC;QAChC,CAAC;QACD,IAAI,CAAC,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC/C,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAChB,OAAO,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,WAAW,CAAC,CAAC;QACpD,CAAC;QACD,IAAI,CAAC,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAClE,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;YAClC,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,CAAC;QAClC,CAAC;QACD,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;gBACjB,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;gBAChB,OAAO,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,aAAa,CAAC,CAAC;YACxD,CAAC;YACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;CACF,CAAC,CAAC;AAEH,SAAS,eAAe,CAAC,CAAqB;IAC5C,OAAO,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,MAAM,OAAO,CAAC,CAAC,KAAK,QAAQ,CAAC,CAAC,OAAO,EAAE,CAAC;AAC1E,CAAC;AAED,SAAS,aAAa,CAAC,CAAgB;IACrC,OAAO,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,MAAM,cAAc,CAAC,CAAC,OAAO,EAAE,CAAC;AAClE,CAAC;AAED,MAAM,OAAO,GAAG,aAAa,CAAC;IAC5B,IAAI,EAAE;QACJ,IAAI,EAAE,MAAM;QACZ,WAAW,EAAE,qEAAqE;KACnF;IACD,IAAI,EAAE;QACJ,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,aAAa,EAAE;QACtE,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,2BAA2B,EAAE;KACtF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,EAAE,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;QACtE,IAAI,MAAM,CAAC,UAAU,GAAG,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC7C,CAAC;CACF,CAAC,CAAC;AAEH,SAAS,SAAS,CAAC,KAAa;IAC9B,IAAK,KAA2B,CAAC,QAAQ,CAAC,KAAK,CAAC;QAAE,OAAO,KAAa,CAAC;IACvE,MAAM,IAAI,KAAK,CAAC,iBAAiB,KAAK,aAAa,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;AACzE,CAAC;AAED,SAAS,gBAAgB,CAAC,KAAa,EAAE,IAAY;IACnD,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;IAC7B,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,KAAK,IAAI,qCAAqC,KAAK,GAAG,CAAC,CAAC;IAC1E,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,OAAO,GAAG,aAAa,CAAC;IAC5B,IAAI,EAAE;QACJ,IAAI,EAAE,MAAM;QACZ,WAAW,EAAE,6EAA6E;KAC3F;IACD,IAAI,EAAE;QACJ,KAAK,EAAE;YACL,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,eAAe;YACxB,WAAW,EAAE,8BAA8B;SAC5C;QACD,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,0CAA0C,EAAE;QAC9F,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,gCAAgC,EAAE;QACxE,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,gCAAgC,EAAE;QACvE,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,uBAAuB,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,EAAE;QACjF,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,4CAA4C,EAAE;QACnF,WAAW,EAAE;YACX,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,GAAG;YACZ,WAAW,EAAE,+EAA+E;SAC7F;QACD,KAAK,EAAE;YACL,IAAI,EAAE,QAAQ;YACd,WAAW,EAAE,wDAAwD;SACtE;QACD,aAAa,EAAE;YACb,IAAI,EAAE,QAAQ;YACd,WAAW,EAAE,mEAAmE;SACjF;QACD,iBAAiB,EAAE;YACjB,IAAI,EAAE,QAAQ;YACd,WAAW,EAAE,iEAAiE;SAC/E;QACD,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,6CAA6C,EAAE;KACrF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC;YAC3B,QAAQ,EAAE,IAAI,CAAC,KAAK;YACpB,GAAG,EAAE,IAAI,CAAC,GAAG;YACb,WAAW,EAAE,gBAAgB,CAAC,IAAI,CAAC,WAAW,EAAE,aAAa,CAAC;YAC9D,GAAG,CAAC,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC;YACtD,GAAG,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC;YACrD,GAAG,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,EAAE,IAAI,EAAE,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC9D,GAAG,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,EAAE,IAAI,EAAE,gBAAgB,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,EAAE,CAAC;YAC7E,GAAG,CAAC,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC;YACtD,GAAG,CAAC,IAAI,CAAC,aAAa,CAAC,KAAK,SAAS,IAAI,EAAE,UAAU,EAAE,IAAI,CAAC,aAAa,CAAC,EAAE,CAAC;YAC7E,GAAG,CAAC,IAAI,CAAC,iBAAiB,CAAC,KAAK,SAAS,IAAI;gBAC3C,gBAAgB,EAAE,gBAAgB,CAAC,IAAI,CAAC,iBAAiB,CAAC,EAAE,iBAAiB,CAAC,GAAG,IAAI;aACtF,CAAC;SACH,CAAC,CAAC;QAEH,IAAI,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC;YACf,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,KAAK;gBAAE,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;YACvE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;QACzC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YACd,MAAM,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YACrD,MAAM,SAAS,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,CAAC;QAC1D,CAAC;QACD,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC/C,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,IAAI,GAAG,aAAa,CAAC;IACzB,IAAI,EAAE;QACJ,IAAI,EAAE,SAAS;QACf,OAAO,EAAE,GAAG,CAAC,OAAO;QACpB,WAAW,EACT,yFAAyF;KAC5F;IACD,WAAW,EAAE;QACX,KAAK,EAAE,QAAQ;QACf,OAAO,EAAE,UAAU;QACnB,IAAI,EAAE,OAAO;QACb,IAAI,EAAE,OAAO;QACb,OAAO,EAAE,UAAU;QACnB,IAAI,EAAE,OAAO;QACb,SAAS,EAAE,YAAY;KACxB;CACF,CAAC,CAAC;AAEH,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC"}
@@ -0,0 +1,23 @@
1
+ export type CaptureExitReason = "result" | "timeout" | "stream-end";
2
+ export interface ToolCall {
3
+ readonly name: string;
4
+ readonly input: unknown;
5
+ }
6
+ export interface WrittenFile {
7
+ readonly path: string;
8
+ readonly content: string;
9
+ }
10
+ export interface SolvingCapture {
11
+ readonly outputText: string;
12
+ readonly trajectory: readonly ToolCall[];
13
+ readonly writes: readonly WrittenFile[];
14
+ readonly exitReason: CaptureExitReason;
15
+ }
16
+ interface Captor {
17
+ readonly push: (line: string) => void;
18
+ readonly done: boolean;
19
+ readonly result: (reason: CaptureExitReason) => SolvingCapture;
20
+ }
21
+ export declare function createCaptor(): Captor;
22
+ export {};
23
+ //# sourceMappingURL=capture.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"capture.d.ts","sourceRoot":"","sources":["../../src/eval/capture.ts"],"names":[],"mappings":"AAEA,MAAM,MAAM,iBAAiB,GAAG,QAAQ,GAAG,SAAS,GAAG,YAAY,CAAC;AAEpE,MAAM,WAAW,QAAQ;IACvB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,KAAK,EAAE,OAAO,CAAC;CACzB;AAED,MAAM,WAAW,WAAW;IAC1B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B;AAED,MAAM,WAAW,cAAc;IAC7B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,UAAU,EAAE,SAAS,QAAQ,EAAE,CAAC;IACzC,QAAQ,CAAC,MAAM,EAAE,SAAS,WAAW,EAAE,CAAC;IACxC,QAAQ,CAAC,UAAU,EAAE,iBAAiB,CAAC;CACxC;AAED,UAAU,MAAM;IACd,QAAQ,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,CAAC;IACtC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC;IACvB,QAAQ,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,iBAAiB,KAAK,cAAc,CAAC;CAChE;AAID,wBAAgB,YAAY,IAAI,MAAM,CA8DrC"}
@@ -0,0 +1,79 @@
1
+ import { asRecord } from "./detect.js";
2
+ const WRITE_TOOLS = new Set(["Write", "Edit"]);
3
+ export function createCaptor() {
4
+ const trajectory = [];
5
+ const writes = [];
6
+ let lastAssistantText = "";
7
+ let resultText = null;
8
+ let finished = false;
9
+ function recordToolUse(block) {
10
+ const name = block["name"];
11
+ if (typeof name !== "string")
12
+ return;
13
+ trajectory.push({ name, input: block["input"] });
14
+ const write = writtenFileOf(name, block["input"]);
15
+ if (write)
16
+ writes.push(write);
17
+ }
18
+ function recordText(block) {
19
+ const text = block["text"];
20
+ if (typeof text === "string")
21
+ lastAssistantText = text;
22
+ }
23
+ return {
24
+ get done() {
25
+ return finished;
26
+ },
27
+ push(line) {
28
+ if (finished)
29
+ return;
30
+ const trimmed = line.trim();
31
+ if (!trimmed)
32
+ return;
33
+ let parsed;
34
+ try {
35
+ parsed = JSON.parse(trimmed);
36
+ }
37
+ catch {
38
+ return;
39
+ }
40
+ const event = asRecord(parsed);
41
+ if (event["type"] === "assistant") {
42
+ const content = asRecord(event["message"])["content"];
43
+ for (const item of Array.isArray(content) ? content : []) {
44
+ const block = asRecord(item);
45
+ if (block["type"] === "tool_use")
46
+ recordToolUse(block);
47
+ else if (block["type"] === "text")
48
+ recordText(block);
49
+ }
50
+ return;
51
+ }
52
+ if (event["type"] === "result") {
53
+ const text = event["result"];
54
+ resultText = typeof text === "string" ? text : "";
55
+ finished = true;
56
+ }
57
+ },
58
+ result(reason) {
59
+ const outputText = resultText && resultText.length > 0 ? resultText : lastAssistantText;
60
+ return {
61
+ outputText,
62
+ trajectory,
63
+ writes,
64
+ exitReason: resultText !== null ? "result" : reason,
65
+ };
66
+ },
67
+ };
68
+ }
69
+ function writtenFileOf(name, input) {
70
+ if (!WRITE_TOOLS.has(name))
71
+ return null;
72
+ const record = asRecord(input);
73
+ const path = record["file_path"];
74
+ if (typeof path !== "string")
75
+ return null;
76
+ const content = record["content"] ?? record["new_string"];
77
+ return { path, content: typeof content === "string" ? content : "" };
78
+ }
79
+ //# sourceMappingURL=capture.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"capture.js","sourceRoot":"","sources":["../../src/eval/capture.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AA2BvC,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC;AAE/C,MAAM,UAAU,YAAY;IAC1B,MAAM,UAAU,GAAe,EAAE,CAAC;IAClC,MAAM,MAAM,GAAkB,EAAE,CAAC;IACjC,IAAI,iBAAiB,GAAG,EAAE,CAAC;IAC3B,IAAI,UAAU,GAAkB,IAAI,CAAC;IACrC,IAAI,QAAQ,GAAG,KAAK,CAAC;IAErB,SAAS,aAAa,CAAC,KAA8B;QACnD,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;QAC3B,IAAI,OAAO,IAAI,KAAK,QAAQ;YAAE,OAAO;QACrC,UAAU,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QACjD,MAAM,KAAK,GAAG,aAAa,CAAC,IAAI,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;QAClD,IAAI,KAAK;YAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAChC,CAAC;IAED,SAAS,UAAU,CAAC,KAA8B;QAChD,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;QAC3B,IAAI,OAAO,IAAI,KAAK,QAAQ;YAAE,iBAAiB,GAAG,IAAI,CAAC;IACzD,CAAC;IAED,OAAO;QACL,IAAI,IAAI;YACN,OAAO,QAAQ,CAAC;QAClB,CAAC;QACD,IAAI,CAAC,IAAY;YACf,IAAI,QAAQ;gBAAE,OAAO;YACrB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAC5B,IAAI,CAAC,OAAO;gBAAE,OAAO;YACrB,IAAI,MAAe,CAAC;YACpB,IAAI,CAAC;gBACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAC/B,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO;YACT,CAAC;YACD,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC;YAE/B,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,WAAW,EAAE,CAAC;gBAClC,MAAM,OAAO,GAAG,QAAQ,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;gBACtD,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;oBACzD,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;oBAC7B,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,UAAU;wBAAE,aAAa,CAAC,KAAK,CAAC,CAAC;yBAClD,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,MAAM;wBAAE,UAAU,CAAC,KAAK,CAAC,CAAC;gBACvD,CAAC;gBACD,OAAO;YACT,CAAC;YAED,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,QAAQ,EAAE,CAAC;gBAC/B,MAAM,IAAI,GAAG,KAAK,CAAC,QAAQ,CAAC,CAAC;gBAC7B,UAAU,GAAG,OAAO,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;gBAClD,QAAQ,GAAG,IAAI,CAAC;YAClB,CAAC;QACH,CAAC;QACD,MAAM,CAAC,MAAyB;YAC9B,MAAM,UAAU,GAAG,UAAU,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,iBAAiB,CAAC;YACxF,OAAO;gBACL,UAAU;gBACV,UAAU;gBACV,MAAM;gBACN,UAAU,EAAE,UAAU,KAAK,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM;aACpD,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC;AAED,SAAS,aAAa,CAAC,IAAY,EAAE,KAAc;IACjD,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IACxC,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC;IAC/B,MAAM,IAAI,GAAG,MAAM,CAAC,WAAW,CAAC,CAAC;IACjC,IAAI,OAAO,IAAI,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC;IAC1C,MAAM,OAAO,GAAG,MAAM,CAAC,SAAS,CAAC,IAAI,MAAM,CAAC,YAAY,CAAC,CAAC;IAC1D,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;AACvE,CAAC"}
@@ -1,14 +1,22 @@
1
1
  import { type Result } from "../result.js";
2
- import { type EvalCase, type Tier } from "./schema.js";
3
- export interface LoadedCase extends EvalCase {
2
+ import { type RoutingCase, type SolvingCase, type Tier } from "./schema.js";
3
+ interface CaseLocation {
4
4
  readonly suite: string;
5
5
  readonly tier: Tier;
6
6
  readonly file: string;
7
7
  }
8
+ export type LoadedRoutingCase = RoutingCase & CaseLocation & {
9
+ readonly tier: "routing";
10
+ };
11
+ export type LoadedSolvingCase = SolvingCase & CaseLocation & {
12
+ readonly tier: "solving";
13
+ };
14
+ export type LoadedCase = LoadedRoutingCase | LoadedSolvingCase;
8
15
  export interface CaseLoadError {
9
16
  readonly file: string;
10
17
  readonly message: string;
11
18
  }
12
19
  export declare function loadCases(casesDir: string): Promise<Result<LoadedCase[], CaseLoadError[]>>;
13
20
  export declare function unresolvedSkills(cases: readonly LoadedCase[], installedIds: ReadonlySet<string>): CaseLoadError[];
21
+ export {};
14
22
  //# sourceMappingURL=cases.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"cases.d.ts","sourceRoot":"","sources":["../../src/eval/cases.ts"],"names":[],"mappings":"AAMA,OAAO,EAAW,KAAK,MAAM,EAAE,MAAM,cAAc,CAAC;AACpD,OAAO,EAIL,KAAK,QAAQ,EACb,KAAK,IAAI,EACV,MAAM,aAAa,CAAC;AAErB,MAAM,WAAW,UAAW,SAAQ,QAAQ;IAC1C,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC;IACpB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B;AAED,wBAAsB,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,UAAU,EAAE,EAAE,aAAa,EAAE,CAAC,CAAC,CAuBhG;AAED,wBAAgB,gBAAgB,CAC9B,KAAK,EAAE,SAAS,UAAU,EAAE,EAC5B,YAAY,EAAE,WAAW,CAAC,MAAM,CAAC,GAChC,aAAa,EAAE,CAajB"}
1
+ {"version":3,"file":"cases.d.ts","sourceRoot":"","sources":["../../src/eval/cases.ts"],"names":[],"mappings":"AAMA,OAAO,EAAW,KAAK,MAAM,EAAE,MAAM,cAAc,CAAC;AACpD,OAAO,EAKL,KAAK,WAAW,EAChB,KAAK,WAAW,EAChB,KAAK,IAAI,EACV,MAAM,aAAa,CAAC;AAErB,UAAU,YAAY;IACpB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC;IACpB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,MAAM,iBAAiB,GAAG,WAAW,GAAG,YAAY,GAAG;IAAE,QAAQ,CAAC,IAAI,EAAE,SAAS,CAAA;CAAE,CAAC;AAC1F,MAAM,MAAM,iBAAiB,GAAG,WAAW,GAAG,YAAY,GAAG;IAAE,QAAQ,CAAC,IAAI,EAAE,SAAS,CAAA;CAAE,CAAC;AAC1F,MAAM,MAAM,UAAU,GAAG,iBAAiB,GAAG,iBAAiB,CAAC;AAE/D,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B;AAED,wBAAsB,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,UAAU,EAAE,EAAE,aAAa,EAAE,CAAC,CAAC,CAwBhG;AAQD,wBAAgB,gBAAgB,CAC9B,KAAK,EAAE,SAAS,UAAU,EAAE,EAC5B,YAAY,EAAE,WAAW,CAAC,MAAM,CAAC,GAChC,aAAa,EAAE,CAajB"}
@@ -3,7 +3,7 @@ import { join } from "node:path";
3
3
  import yaml from "js-yaml";
4
4
  import { formatZodIssues } from "../errors/zod.js";
5
5
  import { err, ok } from "../result.js";
6
- import { CaseFileSchema, expectedSkills, } from "./schema.js";
6
+ import { CaseFileSchema, caseExpectedSkills, } from "./schema.js";
7
7
  export async function loadCases(casesDir) {
8
8
  const files = await collectYamlFiles(casesDir);
9
9
  const loaded = [];
@@ -15,21 +15,27 @@ export async function loadCases(casesDir) {
15
15
  errors.push(parsed.error);
16
16
  continue;
17
17
  }
18
+ const { suite } = parsed.value;
18
19
  for (const evalCase of parsed.value.cases) {
19
20
  if (seenIds.has(evalCase.id)) {
20
21
  errors.push({ file, message: `duplicate case id "${evalCase.id}"` });
21
22
  continue;
22
23
  }
23
24
  seenIds.add(evalCase.id);
24
- loaded.push({ ...evalCase, suite: parsed.value.suite, tier: parsed.value.tier, file });
25
+ loaded.push(locate(evalCase, suite, file));
25
26
  }
26
27
  }
27
28
  return errors.length > 0 ? err(errors) : ok(loaded);
28
29
  }
30
+ function locate(evalCase, suite, file) {
31
+ return "expect" in evalCase
32
+ ? { ...evalCase, suite, file, tier: "routing" }
33
+ : { ...evalCase, suite, file, tier: "solving" };
34
+ }
29
35
  export function unresolvedSkills(cases, installedIds) {
30
36
  const errors = [];
31
37
  for (const evalCase of cases) {
32
- for (const id of expectedSkills(evalCase.expect)) {
38
+ for (const id of caseExpectedSkills(evalCase)) {
33
39
  if (!installedIds.has(id)) {
34
40
  errors.push({
35
41
  file: evalCase.file,
@@ -1 +1 @@
1
- {"version":3,"file":"cases.js","sourceRoot":"","sources":["../../src/eval/cases.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,OAAO,IAAI,MAAM,SAAS,CAAC;AAE3B,OAAO,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AACnD,OAAO,EAAE,GAAG,EAAE,EAAE,EAAe,MAAM,cAAc,CAAC;AACpD,OAAO,EACL,cAAc,EACd,cAAc,GAIf,MAAM,aAAa,CAAC;AAarB,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,QAAgB;IAC9C,MAAM,KAAK,GAAG,MAAM,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IAC/C,MAAM,MAAM,GAAiB,EAAE,CAAC;IAChC,MAAM,MAAM,GAAoB,EAAE,CAAC;IACnC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,CAAC;QACrC,IAAI,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC;YACf,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YAC1B,SAAS;QACX,CAAC;QACD,KAAK,MAAM,QAAQ,IAAI,MAAM,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;YAC1C,IAAI,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,CAAC;gBAC7B,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,sBAAsB,QAAQ,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC;gBACrE,SAAS;YACX,CAAC;YACD,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;YACzB,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,QAAQ,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,KAAK,EAAE,IAAI,EAAE,MAAM,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;QACzF,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC;AACtD,CAAC;AAED,MAAM,UAAU,gBAAgB,CAC9B,KAA4B,EAC5B,YAAiC;IAEjC,MAAM,MAAM,GAAoB,EAAE,CAAC;IACnC,KAAK,MAAM,QAAQ,IAAI,KAAK,EAAE,CAAC;QAC7B,KAAK,MAAM,EAAE,IAAI,cAAc,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YACjD,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;gBAC1B,MAAM,CAAC,IAAI,CAAC;oBACV,IAAI,EAAE,QAAQ,CAAC,IAAI;oBACnB,OAAO,EAAE,SAAS,QAAQ,CAAC,EAAE,oBAAoB,EAAE,2BAA2B;iBAC/E,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,KAAK,UAAU,SAAS,CAAC,IAAY;IACnC,IAAI,GAAW,CAAC;IAChB,IAAI,CAAC;QACH,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IACrC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,GAAG,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,qBAAsB,KAAe,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;IACjF,CAAC;IACD,IAAI,GAAY,CAAC;IACjB,IAAI,CAAC;QACH,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACvB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,GAAG,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,iBAAkB,KAAe,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;IAC7E,CAAC;IACD,MAAM,MAAM,GAAG,cAAc,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;IAC7C,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,OAAO,GAAG,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,eAAe,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC1E,CAAC;IACD,OAAO,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;AACzB,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,GAAW;IACzC,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,OAAO,CAAC;IACZ,IAAI,CAAC;QACH,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;IACxD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;IACD,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;QACnC,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACxB,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChD,CAAC;aAAM,IAAI,KAAK,CAAC,MAAM,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC;YAC3F,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnB,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;AACtB,CAAC"}
1
+ {"version":3,"file":"cases.js","sourceRoot":"","sources":["../../src/eval/cases.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,OAAO,IAAI,MAAM,SAAS,CAAC;AAE3B,OAAO,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AACnD,OAAO,EAAE,GAAG,EAAE,EAAE,EAAe,MAAM,cAAc,CAAC;AACpD,OAAO,EACL,cAAc,EACd,kBAAkB,GAMnB,MAAM,aAAa,CAAC;AAiBrB,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,QAAgB;IAC9C,MAAM,KAAK,GAAG,MAAM,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IAC/C,MAAM,MAAM,GAAiB,EAAE,CAAC;IAChC,MAAM,MAAM,GAAoB,EAAE,CAAC;IACnC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,CAAC;QACrC,IAAI,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC;YACf,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YAC1B,SAAS;QACX,CAAC;QACD,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,CAAC,KAAK,CAAC;QAC/B,KAAK,MAAM,QAAQ,IAAI,MAAM,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;YAC1C,IAAI,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,CAAC;gBAC7B,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,sBAAsB,QAAQ,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC;gBACrE,SAAS;YACX,CAAC;YACD,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;YACzB,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC;QAC7C,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC;AACtD,CAAC;AAED,SAAS,MAAM,CAAC,QAAkB,EAAE,KAAa,EAAE,IAAY;IAC7D,OAAO,QAAQ,IAAI,QAAQ;QACzB,CAAC,CAAC,EAAE,GAAG,QAAQ,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE;QAC/C,CAAC,CAAC,EAAE,GAAG,QAAQ,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;AACpD,CAAC;AAED,MAAM,UAAU,gBAAgB,CAC9B,KAA4B,EAC5B,YAAiC;IAEjC,MAAM,MAAM,GAAoB,EAAE,CAAC;IACnC,KAAK,MAAM,QAAQ,IAAI,KAAK,EAAE,CAAC;QAC7B,KAAK,MAAM,EAAE,IAAI,kBAAkB,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC9C,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;gBAC1B,MAAM,CAAC,IAAI,CAAC;oBACV,IAAI,EAAE,QAAQ,CAAC,IAAI;oBACnB,OAAO,EAAE,SAAS,QAAQ,CAAC,EAAE,oBAAoB,EAAE,2BAA2B;iBAC/E,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,KAAK,UAAU,SAAS,CAAC,IAAY;IACnC,IAAI,GAAW,CAAC;IAChB,IAAI,CAAC;QACH,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IACrC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,GAAG,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,qBAAsB,KAAe,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;IACjF,CAAC;IACD,IAAI,GAAY,CAAC;IACjB,IAAI,CAAC;QACH,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACvB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,GAAG,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,iBAAkB,KAAe,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;IAC7E,CAAC;IACD,MAAM,MAAM,GAAG,cAAc,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;IAC7C,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,OAAO,GAAG,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,eAAe,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC1E,CAAC;IACD,OAAO,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;AACzB,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,GAAW;IACzC,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,OAAO,CAAC;IACZ,IAAI,CAAC;QACH,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;IACxD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;IACD,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;QACnC,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACxB,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChD,CAAC;aAAM,IAAI,KAAK,CAAC,MAAM,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC;YAC3F,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnB,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;AACtB,CAAC"}
@@ -9,6 +9,7 @@ interface Detector {
9
9
  readonly done: boolean;
10
10
  readonly result: (reason: ExitReason) => DetectionResult;
11
11
  }
12
+ export declare function asRecord(value: unknown): Record<string, unknown>;
12
13
  export declare function createDetector(stopAfter?: number): Detector;
13
14
  export {};
14
15
  //# sourceMappingURL=detect.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"detect.d.ts","sourceRoot":"","sources":["../../src/eval/detect.ts"],"names":[],"mappings":"AAEA,MAAM,MAAM,UAAU,GAAG,OAAO,GAAG,UAAU,GAAG,SAAS,GAAG,YAAY,CAAC;AAEzE,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,QAAQ,EAAE,SAAS,MAAM,EAAE,CAAC;IACrC,QAAQ,CAAC,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,QAAQ,CAAC,UAAU,EAAE,UAAU,CAAC;CACjC;AAED,UAAU,QAAQ;IAChB,QAAQ,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,CAAC;IACtC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC;IACvB,QAAQ,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,UAAU,KAAK,eAAe,CAAC;CAC1D;AAwBD,wBAAgB,cAAc,CAAC,SAAS,SAAI,GAAG,QAAQ,CA6EtD"}
1
+ {"version":3,"file":"detect.d.ts","sourceRoot":"","sources":["../../src/eval/detect.ts"],"names":[],"mappings":"AAEA,MAAM,MAAM,UAAU,GAAG,OAAO,GAAG,UAAU,GAAG,SAAS,GAAG,YAAY,CAAC;AAEzE,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,QAAQ,EAAE,SAAS,MAAM,EAAE,CAAC;IACrC,QAAQ,CAAC,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,QAAQ,CAAC,UAAU,EAAE,UAAU,CAAC;CACjC;AAED,UAAU,QAAQ;IAChB,QAAQ,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,CAAC;IACtC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC;IACvB,QAAQ,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,UAAU,KAAK,eAAe,CAAC;CAC1D;AAED,wBAAgB,QAAQ,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAEhE;AAoBD,wBAAgB,cAAc,CAAC,SAAS,SAAI,GAAG,QAAQ,CA6EtD"}
@@ -1,5 +1,5 @@
1
1
  import { FQ_ID } from "../ids.js";
2
- function asRecord(value) {
2
+ export function asRecord(value) {
3
3
  return typeof value === "object" && value !== null ? value : {};
4
4
  }
5
5
  function isSkillToolUse(block) {
@@ -1 +1 @@
1
- {"version":3,"file":"detect.js","sourceRoot":"","sources":["../../src/eval/detect.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,WAAW,CAAC;AAgBlC,SAAS,QAAQ,CAAC,KAAc;IAC9B,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,CAAC,CAAC,CAAE,KAAiC,CAAC,CAAC,CAAC,EAAE,CAAC;AAC/F,CAAC;AAED,SAAS,cAAc,CAAC,KAA8B;IACpD,OAAO,KAAK,CAAC,MAAM,CAAC,KAAK,UAAU,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,OAAO,CAAC;AACnE,CAAC;AAED,SAAS,SAAS,CAAC,KAAc;IAC/B,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC;IAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,SAAS,CAAC,CAAC;IACnD,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;AACvE,CAAC;AAED,SAAS,aAAa,CAAC,MAAc;IACnC,IAAI,CAAC;QACH,OAAO,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IACvC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,SAAS,GAAG,CAAC;IAC1C,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,IAAI,gBAAgB,GAAkB,IAAI,CAAC;IAC3C,IAAI,QAAQ,GAAG,KAAK,CAAC;IAErB,SAAS,MAAM,CAAC,EAAiB;QAC/B,IAAI,EAAE,KAAK,IAAI;YAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACnC,IAAI,QAAQ,CAAC,MAAM,IAAI,SAAS;YAAE,QAAQ,GAAG,IAAI,CAAC;QAClD,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,SAAS,YAAY;QACnB,IAAI,gBAAgB,KAAK,IAAI;YAAE,OAAO;QACtC,MAAM,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC,CAAC;QACxC,gBAAgB,GAAG,IAAI,CAAC;IAC1B,CAAC;IAED,OAAO;QACL,IAAI,IAAI;YACN,OAAO,QAAQ,CAAC;QAClB,CAAC;QACD,IAAI,CAAC,IAAY;YACf,IAAI,QAAQ;gBAAE,OAAO;YACrB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAC5B,IAAI,CAAC,OAAO;gBAAE,OAAO;YACrB,IAAI,MAAe,CAAC;YACpB,IAAI,CAAC;gBACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAC/B,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO;YACT,CAAC;YACD,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC;YAC/B,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;YAE3B,IAAI,IAAI,KAAK,cAAc,EAAE,CAAC;gBAC5B,MAAM,EAAE,GAAG,QAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;gBACpC,MAAM,MAAM,GAAG,EAAE,CAAC,MAAM,CAAC,CAAC;gBAE1B,IAAI,MAAM,KAAK,qBAAqB,EAAE,CAAC;oBACrC,IAAI,cAAc,CAAC,QAAQ,CAAC,EAAE,CAAC,eAAe,CAAC,CAAC,CAAC;wBAAE,gBAAgB,GAAG,EAAE,CAAC;gBAC3E,CAAC;qBAAM,IAAI,MAAM,KAAK,qBAAqB,IAAI,gBAAgB,KAAK,IAAI,EAAE,CAAC;oBACzE,MAAM,KAAK,GAAG,QAAQ,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;oBACpC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,kBAAkB,EAAE,CAAC;wBACzC,MAAM,OAAO,GAAG,KAAK,CAAC,cAAc,CAAC,CAAC;wBACtC,IAAI,OAAO,OAAO,KAAK,QAAQ;4BAAE,gBAAgB,IAAI,OAAO,CAAC;wBAC7D,IAAI,aAAa,CAAC,gBAAgB,CAAC,KAAK,IAAI;4BAAE,YAAY,EAAE,CAAC;oBAC/D,CAAC;gBACH,CAAC;qBAAM,IAAI,MAAM,KAAK,oBAAoB,EAAE,CAAC;oBAC3C,YAAY,EAAE,CAAC;gBACjB,CAAC;qBAAM,IAAI,MAAM,KAAK,cAAc,EAAE,CAAC;oBACrC,YAAY,EAAE,CAAC;oBACf,QAAQ,GAAG,IAAI,CAAC;gBAClB,CAAC;gBACD,OAAO;YACT,CAAC;YAED,IAAI,IAAI,KAAK,WAAW,EAAE,CAAC;gBACzB,MAAM,OAAO,GAAG,QAAQ,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;gBACtD,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;oBACzD,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;oBAC7B,IAAI,cAAc,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;wBAAE,OAAO;gBACzE,CAAC;gBACD,OAAO;YACT,CAAC;YAED,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;gBACtB,QAAQ,GAAG,IAAI,CAAC;YAClB,CAAC;QACH,CAAC;QACD,MAAM,CAAC,MAAkB;YACvB,OAAO;gBACL,QAAQ;gBACR,UAAU,EAAE,QAAQ,CAAC,CAAC,CAAC,IAAI,IAAI;gBAC/B,UAAU,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM;aACnD,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC"}
1
+ {"version":3,"file":"detect.js","sourceRoot":"","sources":["../../src/eval/detect.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,WAAW,CAAC;AAgBlC,MAAM,UAAU,QAAQ,CAAC,KAAc;IACrC,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,CAAC,CAAC,CAAE,KAAiC,CAAC,CAAC,CAAC,EAAE,CAAC;AAC/F,CAAC;AAED,SAAS,cAAc,CAAC,KAA8B;IACpD,OAAO,KAAK,CAAC,MAAM,CAAC,KAAK,UAAU,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,OAAO,CAAC;AACnE,CAAC;AAED,SAAS,SAAS,CAAC,KAAc;IAC/B,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC;IAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,SAAS,CAAC,CAAC;IACnD,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;AACvE,CAAC;AAED,SAAS,aAAa,CAAC,MAAc;IACnC,IAAI,CAAC;QACH,OAAO,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IACvC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,SAAS,GAAG,CAAC;IAC1C,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,IAAI,gBAAgB,GAAkB,IAAI,CAAC;IAC3C,IAAI,QAAQ,GAAG,KAAK,CAAC;IAErB,SAAS,MAAM,CAAC,EAAiB;QAC/B,IAAI,EAAE,KAAK,IAAI;YAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACnC,IAAI,QAAQ,CAAC,MAAM,IAAI,SAAS;YAAE,QAAQ,GAAG,IAAI,CAAC;QAClD,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,SAAS,YAAY;QACnB,IAAI,gBAAgB,KAAK,IAAI;YAAE,OAAO;QACtC,MAAM,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC,CAAC;QACxC,gBAAgB,GAAG,IAAI,CAAC;IAC1B,CAAC;IAED,OAAO;QACL,IAAI,IAAI;YACN,OAAO,QAAQ,CAAC;QAClB,CAAC;QACD,IAAI,CAAC,IAAY;YACf,IAAI,QAAQ;gBAAE,OAAO;YACrB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAC5B,IAAI,CAAC,OAAO;gBAAE,OAAO;YACrB,IAAI,MAAe,CAAC;YACpB,IAAI,CAAC;gBACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAC/B,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO;YACT,CAAC;YACD,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC;YAC/B,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;YAE3B,IAAI,IAAI,KAAK,cAAc,EAAE,CAAC;gBAC5B,MAAM,EAAE,GAAG,QAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;gBACpC,MAAM,MAAM,GAAG,EAAE,CAAC,MAAM,CAAC,CAAC;gBAE1B,IAAI,MAAM,KAAK,qBAAqB,EAAE,CAAC;oBACrC,IAAI,cAAc,CAAC,QAAQ,CAAC,EAAE,CAAC,eAAe,CAAC,CAAC,CAAC;wBAAE,gBAAgB,GAAG,EAAE,CAAC;gBAC3E,CAAC;qBAAM,IAAI,MAAM,KAAK,qBAAqB,IAAI,gBAAgB,KAAK,IAAI,EAAE,CAAC;oBACzE,MAAM,KAAK,GAAG,QAAQ,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;oBACpC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,kBAAkB,EAAE,CAAC;wBACzC,MAAM,OAAO,GAAG,KAAK,CAAC,cAAc,CAAC,CAAC;wBACtC,IAAI,OAAO,OAAO,KAAK,QAAQ;4BAAE,gBAAgB,IAAI,OAAO,CAAC;wBAC7D,IAAI,aAAa,CAAC,gBAAgB,CAAC,KAAK,IAAI;4BAAE,YAAY,EAAE,CAAC;oBAC/D,CAAC;gBACH,CAAC;qBAAM,IAAI,MAAM,KAAK,oBAAoB,EAAE,CAAC;oBAC3C,YAAY,EAAE,CAAC;gBACjB,CAAC;qBAAM,IAAI,MAAM,KAAK,cAAc,EAAE,CAAC;oBACrC,YAAY,EAAE,CAAC;oBACf,QAAQ,GAAG,IAAI,CAAC;gBAClB,CAAC;gBACD,OAAO;YACT,CAAC;YAED,IAAI,IAAI,KAAK,WAAW,EAAE,CAAC;gBACzB,MAAM,OAAO,GAAG,QAAQ,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;gBACtD,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;oBACzD,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;oBAC7B,IAAI,cAAc,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;wBAAE,OAAO;gBACzE,CAAC;gBACD,OAAO;YACT,CAAC;YAED,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;gBACtB,QAAQ,GAAG,IAAI,CAAC;YAClB,CAAC;QACH,CAAC;QACD,MAAM,CAAC,MAAkB;YACvB,OAAO;gBACL,QAAQ;gBACR,UAAU,EAAE,QAAQ,CAAC,CAAC,CAAC,IAAI,IAAI;gBAC/B,UAAU,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM;aACnD,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC"}
@@ -0,0 +1,9 @@
1
+ import type { SolvingCapture } from "./capture.js";
2
+ import type { Assertion } from "./schema.js";
3
+ export interface AssertionResult {
4
+ readonly assertion: Assertion;
5
+ readonly pass: boolean;
6
+ readonly evidence: string;
7
+ }
8
+ export declare function gradeAssertions(assertions: readonly Assertion[], capture: SolvingCapture): AssertionResult[];
9
+ //# sourceMappingURL=grade-deterministic.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"grade-deterministic.d.ts","sourceRoot":"","sources":["../../src/eval/grade-deterministic.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAEnD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAE7C,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,SAAS,EAAE,SAAS,CAAC;IAC9B,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC;IACvB,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;CAC3B;AAED,wBAAgB,eAAe,CAC7B,UAAU,EAAE,SAAS,SAAS,EAAE,EAChC,OAAO,EAAE,cAAc,GACtB,eAAe,EAAE,CAEnB"}
@@ -0,0 +1,87 @@
1
+ import { err, ok } from "../result.js";
2
+ import { asRecord } from "./detect.js";
3
+ export function gradeAssertions(assertions, capture) {
4
+ return assertions.map((assertion) => grade(assertion, capture));
5
+ }
6
+ function grade(assertion, capture) {
7
+ switch (assertion.kind) {
8
+ case "outputMatches":
9
+ return matchOutput(assertion, capture.outputText, {
10
+ whenPresent: () => pass(assertion, `matched "${assertion.pattern}"`),
11
+ whenAbsent: () => fail(assertion, `no match for "${assertion.pattern}"`),
12
+ });
13
+ case "outputExcludes":
14
+ return matchOutput(assertion, capture.outputText, {
15
+ whenPresent: (hit) => fail(assertion, `found forbidden "${hit}"`),
16
+ whenAbsent: () => pass(assertion, `absent: "${assertion.pattern}"`),
17
+ });
18
+ case "usedTool":
19
+ return tools(capture).includes(assertion.tool)
20
+ ? pass(assertion, `used ${assertion.tool}`)
21
+ : fail(assertion, `tools used: [${tools(capture).join(", ")}]`);
22
+ case "didNotUseTool":
23
+ return tools(capture).includes(assertion.tool)
24
+ ? fail(assertion, `unexpectedly used ${assertion.tool}`)
25
+ : pass(assertion, `did not use ${assertion.tool}`);
26
+ case "wroteFile":
27
+ return gradeWroteFile(assertion, capture);
28
+ }
29
+ }
30
+ function matchOutput(assertion, text, outcome) {
31
+ const matcher = compileMatcher(assertion.pattern, assertion.regex);
32
+ if (!matcher.ok)
33
+ return fail(assertion, matcher.error);
34
+ const hit = matcher.value(text);
35
+ return hit === null ? outcome.whenAbsent() : outcome.whenPresent(hit);
36
+ }
37
+ function gradeWroteFile(assertion, capture) {
38
+ if (!wroteTo(capture, assertion.path)) {
39
+ return fail(assertion, `${assertion.path} not written (wrote: [${writtenPaths(capture).join(", ")}])`);
40
+ }
41
+ if (assertion.contentMatches === undefined) {
42
+ return pass(assertion, `wrote ${assertion.path}`);
43
+ }
44
+ const content = capture.writes.find((w) => w.path === assertion.path)?.content ?? "";
45
+ const matcher = compileMatcher(assertion.contentMatches, assertion.regex);
46
+ if (!matcher.ok)
47
+ return fail(assertion, matcher.error);
48
+ return matcher.value(content) !== null
49
+ ? pass(assertion, `${assertion.path} content matched`)
50
+ : fail(assertion, `${assertion.path} content did not match "${assertion.contentMatches}"`);
51
+ }
52
+ function compileMatcher(pattern, regex) {
53
+ if (!regex) {
54
+ return ok((text) => (text.includes(pattern) ? pattern : null));
55
+ }
56
+ let compiled;
57
+ try {
58
+ compiled = new RegExp(pattern);
59
+ }
60
+ catch (cause) {
61
+ return err(`invalid regex "${pattern}": ${cause.message}`);
62
+ }
63
+ return ok((text) => compiled.exec(text)?.[0] ?? null);
64
+ }
65
+ function wroteTo(capture, path) {
66
+ return writtenPaths(capture).includes(path);
67
+ }
68
+ function writtenPaths(capture) {
69
+ return capture.trajectory.flatMap((call) => isWriteTool(call.name) ? writePathOf(call.input) : []);
70
+ }
71
+ function isWriteTool(name) {
72
+ return name === "Write" || name === "Edit";
73
+ }
74
+ function writePathOf(input) {
75
+ const path = asRecord(input)["file_path"];
76
+ return typeof path === "string" ? [path] : [];
77
+ }
78
+ function tools(capture) {
79
+ return capture.trajectory.map((call) => call.name);
80
+ }
81
+ function pass(assertion, evidence) {
82
+ return { assertion, pass: true, evidence };
83
+ }
84
+ function fail(assertion, evidence) {
85
+ return { assertion, pass: false, evidence };
86
+ }
87
+ //# sourceMappingURL=grade-deterministic.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"grade-deterministic.js","sourceRoot":"","sources":["../../src/eval/grade-deterministic.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,EAAE,EAAe,MAAM,cAAc,CAAC;AAEpD,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AASvC,MAAM,UAAU,eAAe,CAC7B,UAAgC,EAChC,OAAuB;IAEvB,OAAO,UAAU,CAAC,GAAG,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,KAAK,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC;AAClE,CAAC;AAED,SAAS,KAAK,CAAC,SAAoB,EAAE,OAAuB;IAC1D,QAAQ,SAAS,CAAC,IAAI,EAAE,CAAC;QACvB,KAAK,eAAe;YAClB,OAAO,WAAW,CAAC,SAAS,EAAE,OAAO,CAAC,UAAU,EAAE;gBAChD,WAAW,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,SAAS,EAAE,YAAY,SAAS,CAAC,OAAO,GAAG,CAAC;gBACpE,UAAU,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,SAAS,EAAE,iBAAiB,SAAS,CAAC,OAAO,GAAG,CAAC;aACzE,CAAC,CAAC;QAEL,KAAK,gBAAgB;YACnB,OAAO,WAAW,CAAC,SAAS,EAAE,OAAO,CAAC,UAAU,EAAE;gBAChD,WAAW,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,EAAE,oBAAoB,GAAG,GAAG,CAAC;gBACjE,UAAU,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,SAAS,EAAE,YAAY,SAAS,CAAC,OAAO,GAAG,CAAC;aACpE,CAAC,CAAC;QAEL,KAAK,UAAU;YACb,OAAO,KAAK,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC;gBAC5C,CAAC,CAAC,IAAI,CAAC,SAAS,EAAE,QAAQ,SAAS,CAAC,IAAI,EAAE,CAAC;gBAC3C,CAAC,CAAC,IAAI,CAAC,SAAS,EAAE,gBAAgB,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAEpE,KAAK,eAAe;YAClB,OAAO,KAAK,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC;gBAC5C,CAAC,CAAC,IAAI,CAAC,SAAS,EAAE,qBAAqB,SAAS,CAAC,IAAI,EAAE,CAAC;gBACxD,CAAC,CAAC,IAAI,CAAC,SAAS,EAAE,eAAe,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC;QAEvD,KAAK,WAAW;YACd,OAAO,cAAc,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;IAC9C,CAAC;AACH,CAAC;AAOD,SAAS,WAAW,CAClB,SAA2E,EAC3E,IAAY,EACZ,OAAqB;IAErB,MAAM,OAAO,GAAG,cAAc,CAAC,SAAS,CAAC,OAAO,EAAE,SAAS,CAAC,KAAK,CAAC,CAAC;IACnE,IAAI,CAAC,OAAO,CAAC,EAAE;QAAE,OAAO,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC;IACvD,MAAM,GAAG,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAChC,OAAO,GAAG,KAAK,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;AACxE,CAAC;AAED,SAAS,cAAc,CACrB,SAAoD,EACpD,OAAuB;IAEvB,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC;QACtC,OAAO,IAAI,CACT,SAAS,EACT,GAAG,SAAS,CAAC,IAAI,yBAAyB,YAAY,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAC/E,CAAC;IACJ,CAAC;IACD,IAAI,SAAS,CAAC,cAAc,KAAK,SAAS,EAAE,CAAC;QAC3C,OAAO,IAAI,CAAC,SAAS,EAAE,SAAS,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC;IACpD,CAAC;IACD,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,SAAS,CAAC,IAAI,CAAC,EAAE,OAAO,IAAI,EAAE,CAAC;IACrF,MAAM,OAAO,GAAG,cAAc,CAAC,SAAS,CAAC,cAAc,EAAE,SAAS,CAAC,KAAK,CAAC,CAAC;IAC1E,IAAI,CAAC,OAAO,CAAC,EAAE;QAAE,OAAO,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC;IACvD,OAAO,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,IAAI;QACpC,CAAC,CAAC,IAAI,CAAC,SAAS,EAAE,GAAG,SAAS,CAAC,IAAI,kBAAkB,CAAC;QACtD,CAAC,CAAC,IAAI,CAAC,SAAS,EAAE,GAAG,SAAS,CAAC,IAAI,2BAA2B,SAAS,CAAC,cAAc,GAAG,CAAC,CAAC;AAC/F,CAAC;AAID,SAAS,cAAc,CAAC,OAAe,EAAE,KAAc;IACrD,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,OAAO,EAAE,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IACjE,CAAC;IACD,IAAI,QAAgB,CAAC;IACrB,IAAI,CAAC;QACH,QAAQ,GAAG,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC;IACjC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,GAAG,CAAC,kBAAkB,OAAO,MAAO,KAAe,CAAC,OAAO,EAAE,CAAC,CAAC;IACxE,CAAC;IACD,OAAO,EAAE,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC;AACxD,CAAC;AAED,SAAS,OAAO,CAAC,OAAuB,EAAE,IAAY;IACpD,OAAO,YAAY,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;AAC9C,CAAC;AAED,SAAS,YAAY,CAAC,OAAuB;IAC3C,OAAO,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE,CACzC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CACtD,CAAC;AACJ,CAAC;AAED,SAAS,WAAW,CAAC,IAAY;IAC/B,OAAO,IAAI,KAAK,OAAO,IAAI,IAAI,KAAK,MAAM,CAAC;AAC7C,CAAC;AAED,SAAS,WAAW,CAAC,KAAc;IACjC,MAAM,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,WAAW,CAAC,CAAC;IAC1C,OAAO,OAAO,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;AAChD,CAAC;AAED,SAAS,KAAK,CAAC,OAAuB;IACpC,OAAO,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACrD,CAAC;AAED,SAAS,IAAI,CAAC,SAAoB,EAAE,QAAgB;IAClD,OAAO,EAAE,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;AAC7C,CAAC;AAED,SAAS,IAAI,CAAC,SAAoB,EAAE,QAAgB;IAClD,OAAO,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AAC9C,CAAC"}
@@ -0,0 +1,12 @@
1
+ import type { Judge, Verdict } from "./judge.js";
2
+ import type { Rubric } from "./schema.js";
3
+ export interface DimensionResult {
4
+ readonly dimension: string;
5
+ readonly verdict: Verdict;
6
+ }
7
+ export interface RubricResult {
8
+ readonly dimensions: readonly DimensionResult[];
9
+ readonly pass: boolean;
10
+ }
11
+ export declare function gradeRubric(rubric: Rubric, output: string, judge: Judge): Promise<RubricResult>;
12
+ //# sourceMappingURL=grade-judge.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"grade-judge.d.ts","sourceRoot":"","sources":["../../src/eval/grade-judge.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,YAAY,CAAC;AACjD,OAAO,KAAK,EAAe,MAAM,EAAE,MAAM,aAAa,CAAC;AAEvD,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,OAAO,EAAE,OAAO,CAAC;CAC3B;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,UAAU,EAAE,SAAS,eAAe,EAAE,CAAC;IAChD,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC;CACxB;AAED,wBAAsB,WAAW,CAC/B,MAAM,EAAE,MAAM,EACd,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,KAAK,GACX,OAAO,CAAC,YAAY,CAAC,CAQvB"}
@@ -0,0 +1,14 @@
1
+ export async function gradeRubric(rubric, output, judge) {
2
+ const dimensions = await Promise.all(rubric.dimensions.map(async ({ dimension, criterion }) => ({
3
+ dimension,
4
+ verdict: await judge({ dimension, criterion, output }),
5
+ })));
6
+ return { dimensions, pass: combine(rubric.combine, dimensions) };
7
+ }
8
+ function combine(rule, dimensions) {
9
+ const passed = dimensions.filter((d) => d.verdict.pass === true).length;
10
+ if (rule.combine === "all")
11
+ return passed === dimensions.length;
12
+ return dimensions.length > 0 && passed / dimensions.length >= rule.threshold;
13
+ }
14
+ //# sourceMappingURL=grade-judge.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"grade-judge.js","sourceRoot":"","sources":["../../src/eval/grade-judge.ts"],"names":[],"mappings":"AAaA,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,MAAc,EACd,MAAc,EACd,KAAY;IAEZ,MAAM,UAAU,GAAG,MAAM,OAAO,CAAC,GAAG,CAClC,MAAM,CAAC,UAAU,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,EAAE,EAAE,CAAC,CAAC;QACzD,SAAS;QACT,OAAO,EAAE,MAAM,KAAK,CAAC,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC;KACvD,CAAC,CAAC,CACJ,CAAC;IACF,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,UAAU,CAAC,EAAE,CAAC;AACnE,CAAC;AAED,SAAS,OAAO,CAAC,IAAiB,EAAE,UAAsC;IACxE,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC,MAAM,CAAC;IACxE,IAAI,IAAI,CAAC,OAAO,KAAK,KAAK;QAAE,OAAO,MAAM,KAAK,UAAU,CAAC,MAAM,CAAC;IAChE,OAAO,UAAU,CAAC,MAAM,GAAG,CAAC,IAAI,MAAM,GAAG,UAAU,CAAC,MAAM,IAAI,IAAI,CAAC,SAAS,CAAC;AAC/E,CAAC"}
@@ -0,0 +1,5 @@
1
+ import type { Judge } from "./judge.js";
2
+ import type { CaseReport } from "./report.js";
3
+ import type { CaseResult } from "./runner.js";
4
+ export declare function gradeResults(results: readonly CaseResult[], judge: Judge | undefined): Promise<CaseReport[]>;
5
+ //# sourceMappingURL=grade.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"grade.d.ts","sourceRoot":"","sources":["../../src/eval/grade.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACxC,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAC9C,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAI9C,wBAAsB,YAAY,CAChC,OAAO,EAAE,SAAS,UAAU,EAAE,EAC9B,KAAK,EAAE,KAAK,GAAG,SAAS,GACvB,OAAO,CAAC,UAAU,EAAE,CAAC,CAEvB"}
@@ -0,0 +1,25 @@
1
+ import { gradeAssertions } from "./grade-deterministic.js";
2
+ import { gradeRubric } from "./grade-judge.js";
3
+ import { scoreCase, scoreSolving } from "./score.js";
4
+ export async function gradeResults(results, judge) {
5
+ return Promise.all(results.map(async (result) => gradeOne(result, judge)));
6
+ }
7
+ async function gradeOne(result, judge) {
8
+ if (result.tier === "routing") {
9
+ const { evalCase, runs } = result;
10
+ return { evalCase, score: scoreCase(evalCase.expect, runs, evalCase.threshold) };
11
+ }
12
+ const { evalCase, captures } = result;
13
+ const perRun = await Promise.all(captures.map(async (capture) => gradeRun(evalCase.assert, evalCase.rubric, capture, judge)));
14
+ return {
15
+ evalCase,
16
+ score: scoreSolving(perRun, evalCase.threshold),
17
+ solving: { perRun },
18
+ };
19
+ }
20
+ async function gradeRun(assertions, rubric, capture, judge) {
21
+ const graded = gradeAssertions(assertions, capture);
22
+ const rubricResult = rubric && judge ? await gradeRubric(rubric, capture.outputText, judge) : null;
23
+ return { assertions: graded, rubric: rubricResult };
24
+ }
25
+ //# sourceMappingURL=grade.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"grade.js","sourceRoot":"","sources":["../../src/eval/grade.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,MAAM,0BAA0B,CAAC;AAC3D,OAAO,EAAE,WAAW,EAAqB,MAAM,kBAAkB,CAAC;AAIlE,OAAO,EAAE,SAAS,EAAE,YAAY,EAAyB,MAAM,YAAY,CAAC;AAG5E,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,OAA8B,EAC9B,KAAwB;IAExB,OAAO,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC;AAC7E,CAAC;AAED,KAAK,UAAU,QAAQ,CAAC,MAAkB,EAAE,KAAwB;IAClE,IAAI,MAAM,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;QAC9B,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,GAAG,MAAM,CAAC;QAClC,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,SAAS,CAAC,QAAQ,CAAC,MAAM,EAAE,IAAI,EAAE,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;IACnF,CAAC;IAED,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,MAAM,CAAC;IACtC,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,GAAG,CAC9B,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC,CAC5F,CAAC;IACF,OAAO;QACL,QAAQ;QACR,KAAK,EAAE,YAAY,CAAC,MAAM,EAAE,QAAQ,CAAC,SAAS,CAAC;QAC/C,OAAO,EAAE,EAAE,MAAM,EAAE;KACpB,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,QAAQ,CACrB,UAAgC,EAChC,MAA0B,EAC1B,OAAuB,EACvB,KAAwB;IAExB,MAAM,MAAM,GAAG,eAAe,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;IACpD,MAAM,YAAY,GAChB,MAAM,IAAI,KAAK,CAAC,CAAC,CAAC,MAAM,WAAW,CAAC,MAAM,EAAE,OAAO,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAChF,OAAO,EAAE,UAAU,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC;AACtD,CAAC"}
@@ -1,5 +1,6 @@
1
1
  import { type Result } from "../result.js";
2
2
  import { type CaseLoadError, type LoadedCase } from "./cases.js";
3
+ import { type Judge } from "./judge.js";
3
4
  import { type EvalReport } from "./report.js";
4
5
  import { type RunnerOptions } from "./runner.js";
5
6
  export interface EvalOptions {
@@ -11,6 +12,9 @@ export interface EvalOptions {
11
12
  readonly runs?: number;
12
13
  readonly concurrency?: number;
13
14
  readonly model?: string;
15
+ readonly judgeModel?: string;
16
+ readonly solvingTimeoutMs?: number;
17
+ readonly judge?: Judge;
14
18
  readonly onRun?: RunnerOptions["onRun"];
15
19
  }
16
20
  export declare function runEval(options: EvalOptions): Promise<Result<EvalReport, CaseLoadError[]>>;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/eval/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAW,KAAK,MAAM,EAAE,MAAM,cAAc,CAAC;AACpD,OAAO,EAA+B,KAAK,aAAa,EAAE,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC;AAC9F,OAAO,EAAgC,KAAK,UAAU,EAAE,MAAM,aAAa,CAAC;AAC5E,OAAO,EAAY,KAAK,aAAa,EAAE,MAAM,aAAa,CAAC;AAG3D,MAAM,WAAW,WAAW;IAC1B,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,IAAI,CAAC,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;IACnC,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,KAAK,CAAC,EAAE,aAAa,CAAC,OAAO,CAAC,CAAC;CACzC;AAED,wBAAsB,OAAO,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,UAAU,EAAE,aAAa,EAAE,CAAC,CAAC,CA6BhG;AAWD,YAAY,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAC1D,OAAO,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACpD,YAAY,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/eval/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAW,KAAK,MAAM,EAAE,MAAM,cAAc,CAAC;AACpD,OAAO,EAA+B,KAAK,aAAa,EAAE,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC;AAE9F,OAAO,EAAwB,KAAK,KAAK,EAAE,MAAM,YAAY,CAAC;AAC9D,OAAO,EAAe,KAAK,UAAU,EAAE,MAAM,aAAa,CAAC;AAC3D,OAAO,EAAY,KAAK,aAAa,EAAE,MAAM,aAAa,CAAC;AAI3D,MAAM,WAAW,WAAW;IAC1B,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,IAAI,CAAC,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;IACnC,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,gBAAgB,CAAC,EAAE,MAAM,CAAC;IACnC,QAAQ,CAAC,KAAK,CAAC,EAAE,KAAK,CAAC;IACvB,QAAQ,CAAC,KAAK,CAAC,EAAE,aAAa,CAAC,OAAO,CAAC,CAAC;CACzC;AAED,wBAAsB,OAAO,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,UAAU,EAAE,aAAa,EAAE,CAAC,CAAC,CA6BhG;AAkCD,YAAY,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAC1D,OAAO,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACpD,YAAY,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC"}