@jean.gnc/harness-kit 0.11.2 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,13 +7,13 @@ export declare const AgentSchema: z.ZodObject<{
7
7
  }, "strict", z.ZodTypeAny, {
8
8
  name: string;
9
9
  description: string;
10
- tools?: string | string[] | undefined;
11
10
  model?: string | undefined;
11
+ tools?: string | string[] | undefined;
12
12
  }, {
13
13
  name: string;
14
14
  description: string;
15
- tools?: string | string[] | undefined;
16
15
  model?: string | undefined;
16
+ tools?: string | string[] | undefined;
17
17
  }>;
18
18
  export type Agent = z.infer<typeof AgentSchema>;
19
19
  export declare function defineAgent(agent: Agent): Agent;
package/dist/cli.js CHANGED
@@ -3,8 +3,12 @@ import { readFileSync } from "node:fs";
3
3
  import { fileURLToPath } from "node:url";
4
4
  import { defineCommand, runMain } from "citty";
5
5
  import { z } from "zod";
6
+ import { mkdir, writeFile } from "node:fs/promises";
7
+ import { dirname } from "node:path";
6
8
  import { compile } from "./compile.js";
7
9
  import { CHECK_MODES, check } from "./check/index.js";
10
+ import { formatConsole, runEval, toJson } from "./eval/index.js";
11
+ import { TIERS } from "./eval/schema.js";
8
12
  import { initHarness } from "./init/index.js";
9
13
  import { install, uninstall } from "./install/index.js";
10
14
  import { parseInstallMode } from "./install/mode.js";
@@ -166,6 +170,70 @@ const lintCmd = defineCommand({
166
170
  process.exit(1);
167
171
  },
168
172
  });
173
+ function parseTier(value) {
174
+ if (TIERS.includes(value))
175
+ return value;
176
+ throw new Error(`Unknown tier "${value}". Valid: ${TIERS.join(", ")}`);
177
+ }
178
+ function parsePositiveInt(value, flag) {
179
+ const parsed = Number(value);
180
+ if (!Number.isInteger(parsed) || parsed < 1) {
181
+ throw new Error(`--${flag} must be a positive integer, got "${value}"`);
182
+ }
183
+ return parsed;
184
+ }
185
+ const evalCmd = defineCommand({
186
+ meta: {
187
+ name: "eval",
188
+ description: "Run skill-routing evals against the installed harness",
189
+ },
190
+ args: {
191
+ cases: {
192
+ type: "string",
193
+ default: "./evals/cases",
194
+ description: "directory of eval case files",
195
+ },
196
+ cwd: { type: "string", default: ".", description: "working directory for claude -p sessions" },
197
+ suite: { type: "string", description: "only run cases from this suite" },
198
+ case: { type: "string", description: "only run the case with this id" },
199
+ tier: { type: "string", description: `only run this tier: ${TIERS.join(" | ")}` },
200
+ runs: { type: "string", description: "runs per case (overrides per-case default)" },
201
+ concurrency: {
202
+ type: "string",
203
+ default: "1",
204
+ description: "max concurrent sessions (parallel sessions interfere with routing; default 1)",
205
+ },
206
+ model: {
207
+ type: "string",
208
+ description: "model for claude -p (default: user's configured model)",
209
+ },
210
+ json: { type: "string", description: "write machine-readable results to this path" },
211
+ },
212
+ run: async ({ args }) => {
213
+ const result = await runEval({
214
+ casesDir: args.cases,
215
+ cwd: args.cwd,
216
+ concurrency: parsePositiveInt(args.concurrency, "concurrency"),
217
+ ...(args.suite !== undefined && { suite: args.suite }),
218
+ ...(args.case !== undefined && { caseId: args.case }),
219
+ ...(args.tier !== undefined && { tier: parseTier(args.tier) }),
220
+ ...(args.runs !== undefined && { runs: parsePositiveInt(args.runs, "runs") }),
221
+ ...(args.model !== undefined && { model: args.model }),
222
+ });
223
+ if (!result.ok) {
224
+ for (const e of result.error)
225
+ console.error(`${e.file}: ${e.message}`);
226
+ process.exit(1);
227
+ }
228
+ console.log(formatConsole(result.value));
229
+ if (args.json) {
230
+ await mkdir(dirname(args.json), { recursive: true });
231
+ await writeFile(args.json, toJson(result.value) + "\n");
232
+ }
233
+ if (result.value.failed > 0)
234
+ process.exit(1);
235
+ },
236
+ });
169
237
  const main = defineCommand({
170
238
  meta: {
171
239
  name: "harness",
@@ -175,6 +243,7 @@ const main = defineCommand({
175
243
  subCommands: {
176
244
  check: checkCmd,
177
245
  compile: compileCmd,
246
+ eval: evalCmd,
178
247
  init: initCmd,
179
248
  install: installCmd,
180
249
  lint: lintCmd,
package/dist/cli.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAEzC,OAAO,EAAE,aAAa,EAAE,OAAO,EAAE,MAAM,OAAO,CAAC;AAC/C,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,WAAW,EAAE,KAAK,EAA2C,MAAM,kBAAkB,CAAC;AAC/F,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAC9C,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AACxD,OAAO,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,cAAc,EAAE,qBAAqB,EAAE,MAAM,sBAAsB,CAAC;AAE7E,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;AAEnE,MAAM,OAAO,GAAG,aAAa,CAAC,IAAI,GAAG,CAAC,iBAAiB,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAC3E,MAAM,GAAG,GAAG,iBAAiB,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC;AAE/E,SAAS,WAAW,CAAC,KAAa;IAChC,OAAQ,WAAiC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;AAC5D,CAAC;AAED,SAAS,cAAc,CAAC,KAAa;IACnC,IAAI,WAAW,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IACrC,MAAM,IAAI,KAAK,CAAC,uBAAuB,KAAK,aAAa,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;AACrF,CAAC;AAED,MAAM,UAAU,GAAG,aAAa,CAAC;IAC/B,IAAI,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,kCAAkC,EAAE;IAC1E,IAAI,EAAE;QACJ,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE;QACrE,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,aAAa,EAAE;QACtE,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,sCAAsC,EAAE;QAC3F,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,sBAAsB,EAAE;KACjF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,OAAO,CAAC;YACZ,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,MAAM,EAAE,IAAI,CAAC,MAAM;SACpB,CAAC,CAAC;IACL,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,WAAW,GAAG;IAClB,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,WAAW,EAAE;IACrE,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,sCAAsC,EAAE;IAC3F,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,sBAAsB,EAAE;IAChF,SAAS,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,6BAA6B,EAAE;CAClF,CAAC;AAEX,MAAM,UAAU,GAAG,aAAa,CAAC;IAC/B,IAAI,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,qDAAqD,EAAE;IAC7F,IAAI,EAAE;QACJ,GAAG,WAAW;QACd,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,gCAAgC,EAAE;KAC1F;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,IAAI,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACzC,MAAM,OAAO,GAAG,MAAM,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvD,MAAM,OAAO,CAAC;YACZ,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,OAAO;YACP,IAAI;YACJ,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC;SACxB,CAAC,CAAC;IACL,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,YAAY,GAAG,aAAa,CAAC;IACjC,IAAI,EAAE,EAAE,IAAI,EAAE,WAAW,EAAE,WAAW,EAAE,8CAA8C,EAAE;IACxF,IAAI,EAAE,WAAW;IACjB,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,OAAO,GAAG,MAAM,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvD,MAAM,SAAS,CAAC;YACd,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,OAAO;YACP,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC;SACxB,CAAC,CAAC;IACL,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,OAAO,GAAG,aAAa,CAAC;IAC5B,IAAI,EAAE;QACJ,IAAI,EAAE,MAAM;QACZ,WAAW,EAAE,2EAA2E;KACzF;IACD,IAAI,EAAE;QACJ,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,4BAA4B,EAAE;QACjF,WAAW,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAE,WAAW,EAAE,kBAAkB,EAAE;QAChF,OAAO,EAAE;YACP,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;YACd,WAAW,EAAE,mCAAmC,cAAc,EAAE;iBAC7D,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;iBAClB,IAAI,CAAC,IAAI,CAAC,GAAG;SACjB;QACD,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,sBAAsB,EAAE;KACjF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO;aACzB,KAAK,CAAC,GAAG,CAAC;aACV,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;aACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC/B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;QACrF,cAAc,CAAC,OAAO,EAAE,cAAc,EAAE,CAAC,CAAC;QAC1C,MAAM,WAAW,CAAC;YAChB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,OAAO;YACP,MAAM,EAAE,IAAI,CAAC,MAAM;SACpB,CAAC,CAAC;IACL,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,QAAQ,GAAG,aAAa,CAAC;IAC7B,IAAI,EAAE;QACJ,IAAI,EAAE,OAAO;QACb,WAAW,EAAE,8DAA8D;KAC5E;IACD,IAAI,EAAE;QACJ,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE;QACrE,IAAI,EAAE;YACJ,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,WAAW;YACpB,WAAW,EAAE,2CAA2C;SACzD;QACD,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,2BAA2B,EAAE;KACtF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvC,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC,EAAE,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;QACxD,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;YACjB,MAAM,SAAS,GAAG,MAAM,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC7F,MAAM,KAAK,GAAG,MAAM,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;YAC9E,OAAO,CAAC,GAAG,CACT,WAAW,KAAK,kBAAkB,MAAM,CAAC,cAAc,CAAC,MAAM,aAAa,SAAS,GAAG,CACxF,CAAC;YACF,OAAO,CAAC,GAAG,CAAC,WAAW,MAAM,CAAC,YAAY,eAAe,CAAC,CAAC;YAC3D,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC;gBAAE,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACpD,CAAC;QACD,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;YAClC,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,CAAC;QAClC,CAAC;QACD,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;gBACjB,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;gBAChB,OAAO,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,aAAa,CAAC,CAAC;YACxD,CAAC;YACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;CACF,CAAC,CAAC;AAEH,SAAS,eAAe,CAAC,CAAqB;IAC5C,OAAO,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,MAAM,OAAO,CAAC,CAAC,KAAK,QAAQ,CAAC,CAAC,OAAO,EAAE,CAAC;AAC1E,CAAC;AAED,MAAM,OAAO,GAAG,aAAa,CAAC;IAC5B,IAAI,EAAE;QACJ,IAAI,EAAE,MAAM;QACZ,WAAW,EAAE,qEAAqE;KACnF;IACD,IAAI,EAAE;QACJ,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,aAAa,EAAE;QACtE,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,2BAA2B,EAAE;KACtF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,EAAE,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;QACtE,IAAI,MAAM,CAAC,UAAU,GAAG,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC7C,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,IAAI,GAAG,aAAa,CAAC;IACzB,IAAI,EAAE;QACJ,IAAI,EAAE,SAAS;QACf,OAAO,EAAE,GAAG,CAAC,OAAO;QACpB,WAAW,EACT,yFAAyF;KAC5F;IACD,WAAW,EAAE;QACX,KAAK,EAAE,QAAQ;QACf,OAAO,EAAE,UAAU;QACnB,IAAI,EAAE,OAAO;QACb,OAAO,EAAE,UAAU;QACnB,IAAI,EAAE,OAAO;QACb,SAAS,EAAE,YAAY;KACxB;CACF,CAAC,CAAC;AAEH,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC"}
1
+ {"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAEzC,OAAO,EAAE,aAAa,EAAE,OAAO,EAAE,MAAM,OAAO,CAAC;AAC/C,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AACpD,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAEpC,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,WAAW,EAAE,KAAK,EAA2C,MAAM,kBAAkB,CAAC;AAC/F,OAAO,EAAE,aAAa,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AACjE,OAAO,EAAE,KAAK,EAAa,MAAM,kBAAkB,CAAC;AACpD,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAC9C,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AACxD,OAAO,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,cAAc,EAAE,qBAAqB,EAAE,MAAM,sBAAsB,CAAC;AAE7E,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;AAEnE,MAAM,OAAO,GAAG,aAAa,CAAC,IAAI,GAAG,CAAC,iBAAiB,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAC3E,MAAM,GAAG,GAAG,iBAAiB,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC;AAE/E,SAAS,WAAW,CAAC,KAAa;IAChC,OAAQ,WAAiC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;AAC5D,CAAC;AAED,SAAS,cAAc,CAAC,KAAa;IACnC,IAAI,WAAW,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IACrC,MAAM,IAAI,KAAK,CAAC,uBAAuB,KAAK,aAAa,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;AACrF,CAAC;AAED,MAAM,UAAU,GAAG,aAAa,CAAC;IAC/B,IAAI,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,kCAAkC,EAAE;IAC1E,IAAI,EAAE;QACJ,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE;QACrE,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,aAAa,EAAE;QACtE,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,sCAAsC,EAAE;QAC3F,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,sBAAsB,EAAE;KACjF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,OAAO,CAAC;YACZ,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,MAAM,EAAE,IAAI,CAAC,MAAM;SACpB,CAAC,CAAC;IACL,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,WAAW,GAAG;IAClB,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,WAAW,EAAE;IACrE,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,sCAAsC,EAAE;IAC3F,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,sBAAsB,EAAE;IAChF,SAAS,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,6BAA6B,EAAE;CAClF,CAAC;AAEX,MAAM,UAAU,GAAG,aAAa,CAAC;IAC/B,IAAI,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,qDAAqD,EAAE;IAC7F,IAAI,EAAE;QACJ,GAAG,WAAW;QACd,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,gCAAgC,EAAE;KAC1F;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,IAAI,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACzC,MAAM,OAAO,GAAG,MAAM,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvD,MAAM,OAAO,CAAC;YACZ,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,OAAO;YACP,IAAI;YACJ,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC;SACxB,CAAC,CAAC;IACL,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,YAAY,GAAG,aAAa,CAAC;IACjC,IAAI,EAAE,EAAE,IAAI,EAAE,WAAW,EAAE,WAAW,EAAE,8CAA8C,EAAE;IACxF,IAAI,EAAE,WAAW;IACjB,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,OAAO,GAAG,MAAM,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvD,MAAM,SAAS,CAAC;YACd,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,OAAO;YACP,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC;SACxB,CAAC,CAAC;IACL,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,OAAO,GAAG,aAAa,CAAC;IAC5B,IAAI,EAAE;QACJ,IAAI,EAAE,MAAM;QACZ,WAAW,EAAE,2EAA2E;KACzF;IACD,IAAI,EAAE;QACJ,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,4BAA4B,EAAE;QACjF,WAAW,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAE,WAAW,EAAE,kBAAkB,EAAE;QAChF,OAAO,EAAE;YACP,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;YACd,WAAW,EAAE,mCAAmC,cAAc,EAAE;iBAC7D,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;iBAClB,IAAI,CAAC,IAAI,CAAC,GAAG;SACjB;QACD,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,sBAAsB,EAAE;KACjF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO;aACzB,KAAK,CAAC,GAAG,CAAC;aACV,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;aACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC/B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;QACrF,cAAc,CAAC,OAAO,EAAE,cAAc,EAAE,CAAC,CAAC;QAC1C,MAAM,WAAW,CAAC;YAChB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,OAAO;YACP,MAAM,EAAE,IAAI,CAAC,MAAM;SACpB,CAAC,CAAC;IACL,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,QAAQ,GAAG,aAAa,CAAC;IAC7B,IAAI,EAAE;QACJ,IAAI,EAAE,OAAO;QACb,WAAW,EAAE,8DAA8D;KAC5E;IACD,IAAI,EAAE;QACJ,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE;QACrE,IAAI,EAAE;YACJ,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,WAAW;YACpB,WAAW,EAAE,2CAA2C;SACzD;QACD,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,2BAA2B,EAAE;KACtF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvC,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC,EAAE,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;QACxD,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;YACjB,MAAM,SAAS,GAAG,MAAM,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC7F,MAAM,KAAK,GAAG,MAAM,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;YAC9E,OAAO,CAAC,GAAG,CACT,WAAW,KAAK,kBAAkB,MAAM,CAAC,cAAc,CAAC,MAAM,aAAa,SAAS,GAAG,CACxF,CAAC;YACF,OAAO,CAAC,GAAG,CAAC,WAAW,MAAM,CAAC,YAAY,eAAe,CAAC,CAAC;YAC3D,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC;gBAAE,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACpD,CAAC;QACD,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;YAClC,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,CAAC;QAClC,CAAC;QACD,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;gBACjB,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;gBAChB,OAAO,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,aAAa,CAAC,CAAC;YACxD,CAAC;YACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;CACF,CAAC,CAAC;AAEH,SAAS,eAAe,CAAC,CAAqB;IAC5C,OAAO,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,MAAM,OAAO,CAAC,CAAC,KAAK,QAAQ,CAAC,CAAC,OAAO,EAAE,CAAC;AAC1E,CAAC;AAED,MAAM,OAAO,GAAG,aAAa,CAAC;IAC5B,IAAI,EAAE;QACJ,IAAI,EAAE,MAAM;QACZ,WAAW,EAAE,qEAAqE;KACnF;IACD,IAAI,EAAE;QACJ,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,aAAa,EAAE;QACtE,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,2BAA2B,EAAE;KACtF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,EAAE,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;QACtE,IAAI,MAAM,CAAC,UAAU,GAAG,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC7C,CAAC;CACF,CAAC,CAAC;AAEH,SAAS,SAAS,CAAC,KAAa;IAC9B,IAAK,KAA2B,CAAC,QAAQ,CAAC,KAAK,CAAC;QAAE,OAAO,KAAa,CAAC;IACvE,MAAM,IAAI,KAAK,CAAC,iBAAiB,KAAK,aAAa,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;AACzE,CAAC;AAED,SAAS,gBAAgB,CAAC,KAAa,EAAE,IAAY;IACnD,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;IAC7B,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,KAAK,IAAI,qCAAqC,KAAK,GAAG,CAAC,CAAC;IAC1E,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,OAAO,GAAG,aAAa,CAAC;IAC5B,IAAI,EAAE;QACJ,IAAI,EAAE,MAAM;QACZ,WAAW,EAAE,uDAAuD;KACrE;IACD,IAAI,EAAE;QACJ,KAAK,EAAE;YACL,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,eAAe;YACxB,WAAW,EAAE,8BAA8B;SAC5C;QACD,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,0CAA0C,EAAE;QAC9F,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,gCAAgC,EAAE;QACxE,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,gCAAgC,EAAE;QACvE,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,uBAAuB,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,EAAE;QACjF,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,4CAA4C,EAAE;QACnF,WAAW,EAAE;YACX,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,GAAG;YACZ,WAAW,EAAE,+EAA+E;SAC7F;QACD,KAAK,EAAE;YACL,IAAI,EAAE,QAAQ;YACd,WAAW,EAAE,wDAAwD;SACtE;QACD,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,6CAA6C,EAAE;KACrF;IACD,GAAG,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QACtB,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC;YAC3B,QAAQ,EAAE,IAAI,CAAC,KAAK;YACpB,GAAG,EAAE,IAAI,CAAC,GAAG;YACb,WAAW,EAAE,gBAAgB,CAAC,IAAI,CAAC,WAAW,EAAE,aAAa,CAAC;YAC9D,GAAG,CAAC,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC;YACtD,GAAG,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC;YACrD,GAAG,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,EAAE,IAAI,EAAE,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC9D,GAAG,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,EAAE,IAAI,EAAE,gBAAgB,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,EAAE,CAAC;YAC7E,GAAG,CAAC,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC;SACvD,CAAC,CAAC;QAEH,IAAI,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC;YACf,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,KAAK;gBAAE,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;YACvE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;QACzC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YACd,MAAM,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YACrD,MAAM,SAAS,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,CAAC;QAC1D,CAAC;QACD,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC/C,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,IAAI,GAAG,aAAa,CAAC;IACzB,IAAI,EAAE;QACJ,IAAI,EAAE,SAAS;QACf,OAAO,EAAE,GAAG,CAAC,OAAO;QACpB,WAAW,EACT,yFAAyF;KAC5F;IACD,WAAW,EAAE;QACX,KAAK,EAAE,QAAQ;QACf,OAAO,EAAE,UAAU;QACnB,IAAI,EAAE,OAAO;QACb,IAAI,EAAE,OAAO;QACb,OAAO,EAAE,UAAU;QACnB,IAAI,EAAE,OAAO;QACb,SAAS,EAAE,YAAY;KACxB;CACF,CAAC,CAAC;AAEH,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC"}
@@ -0,0 +1,14 @@
1
+ import { type Result } from "../result.js";
2
+ import { type EvalCase, type Tier } from "./schema.js";
3
+ export interface LoadedCase extends EvalCase {
4
+ readonly suite: string;
5
+ readonly tier: Tier;
6
+ readonly file: string;
7
+ }
8
+ export interface CaseLoadError {
9
+ readonly file: string;
10
+ readonly message: string;
11
+ }
12
+ export declare function loadCases(casesDir: string): Promise<Result<LoadedCase[], CaseLoadError[]>>;
13
+ export declare function unresolvedSkills(cases: readonly LoadedCase[], installedIds: ReadonlySet<string>): CaseLoadError[];
14
+ //# sourceMappingURL=cases.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cases.d.ts","sourceRoot":"","sources":["../../src/eval/cases.ts"],"names":[],"mappings":"AAMA,OAAO,EAAW,KAAK,MAAM,EAAE,MAAM,cAAc,CAAC;AACpD,OAAO,EAIL,KAAK,QAAQ,EACb,KAAK,IAAI,EACV,MAAM,aAAa,CAAC;AAErB,MAAM,WAAW,UAAW,SAAQ,QAAQ;IAC1C,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC;IACpB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B;AAED,wBAAsB,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,UAAU,EAAE,EAAE,aAAa,EAAE,CAAC,CAAC,CAuBhG;AAED,wBAAgB,gBAAgB,CAC9B,KAAK,EAAE,SAAS,UAAU,EAAE,EAC5B,YAAY,EAAE,WAAW,CAAC,MAAM,CAAC,GAChC,aAAa,EAAE,CAajB"}
@@ -0,0 +1,84 @@
1
+ import { readdir, readFile } from "node:fs/promises";
2
+ import { join } from "node:path";
3
+ import yaml from "js-yaml";
4
+ import { formatZodIssues } from "../errors/zod.js";
5
+ import { err, ok } from "../result.js";
6
+ import { CaseFileSchema, expectedSkills, } from "./schema.js";
7
+ export async function loadCases(casesDir) {
8
+ const files = await collectYamlFiles(casesDir);
9
+ const loaded = [];
10
+ const errors = [];
11
+ const seenIds = new Set();
12
+ for (const file of files) {
13
+ const parsed = await parseFile(file);
14
+ if (!parsed.ok) {
15
+ errors.push(parsed.error);
16
+ continue;
17
+ }
18
+ for (const evalCase of parsed.value.cases) {
19
+ if (seenIds.has(evalCase.id)) {
20
+ errors.push({ file, message: `duplicate case id "${evalCase.id}"` });
21
+ continue;
22
+ }
23
+ seenIds.add(evalCase.id);
24
+ loaded.push({ ...evalCase, suite: parsed.value.suite, tier: parsed.value.tier, file });
25
+ }
26
+ }
27
+ return errors.length > 0 ? err(errors) : ok(loaded);
28
+ }
29
+ export function unresolvedSkills(cases, installedIds) {
30
+ const errors = [];
31
+ for (const evalCase of cases) {
32
+ for (const id of expectedSkills(evalCase.expect)) {
33
+ if (!installedIds.has(id)) {
34
+ errors.push({
35
+ file: evalCase.file,
36
+ message: `case "${evalCase.id}" expects skill "${id}", which is not installed`,
37
+ });
38
+ }
39
+ }
40
+ }
41
+ return errors;
42
+ }
43
+ async function parseFile(file) {
44
+ let raw;
45
+ try {
46
+ raw = await readFile(file, "utf8");
47
+ }
48
+ catch (cause) {
49
+ return err({ file, message: `cannot read file: ${cause.message}` });
50
+ }
51
+ let doc;
52
+ try {
53
+ doc = yaml.load(raw);
54
+ }
55
+ catch (cause) {
56
+ return err({ file, message: `invalid YAML: ${cause.message}` });
57
+ }
58
+ const result = CaseFileSchema.safeParse(doc);
59
+ if (!result.success) {
60
+ return err({ file, message: formatZodIssues(result.error).join("; ") });
61
+ }
62
+ return ok(result.data);
63
+ }
64
+ async function collectYamlFiles(dir) {
65
+ const files = [];
66
+ let entries;
67
+ try {
68
+ entries = await readdir(dir, { withFileTypes: true });
69
+ }
70
+ catch {
71
+ return files;
72
+ }
73
+ for (const entry of entries) {
74
+ const full = join(dir, entry.name);
75
+ if (entry.isDirectory()) {
76
+ files.push(...(await collectYamlFiles(full)));
77
+ }
78
+ else if (entry.isFile() && (entry.name.endsWith(".yaml") || entry.name.endsWith(".yml"))) {
79
+ files.push(full);
80
+ }
81
+ }
82
+ return files.sort();
83
+ }
84
+ //# sourceMappingURL=cases.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cases.js","sourceRoot":"","sources":["../../src/eval/cases.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,OAAO,IAAI,MAAM,SAAS,CAAC;AAE3B,OAAO,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AACnD,OAAO,EAAE,GAAG,EAAE,EAAE,EAAe,MAAM,cAAc,CAAC;AACpD,OAAO,EACL,cAAc,EACd,cAAc,GAIf,MAAM,aAAa,CAAC;AAarB,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,QAAgB;IAC9C,MAAM,KAAK,GAAG,MAAM,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IAC/C,MAAM,MAAM,GAAiB,EAAE,CAAC;IAChC,MAAM,MAAM,GAAoB,EAAE,CAAC;IACnC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,CAAC;QACrC,IAAI,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC;YACf,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YAC1B,SAAS;QACX,CAAC;QACD,KAAK,MAAM,QAAQ,IAAI,MAAM,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;YAC1C,IAAI,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,CAAC;gBAC7B,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,sBAAsB,QAAQ,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC;gBACrE,SAAS;YACX,CAAC;YACD,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;YACzB,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,QAAQ,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,KAAK,EAAE,IAAI,EAAE,MAAM,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;QACzF,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC;AACtD,CAAC;AAED,MAAM,UAAU,gBAAgB,CAC9B,KAA4B,EAC5B,YAAiC;IAEjC,MAAM,MAAM,GAAoB,EAAE,CAAC;IACnC,KAAK,MAAM,QAAQ,IAAI,KAAK,EAAE,CAAC;QAC7B,KAAK,MAAM,EAAE,IAAI,cAAc,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YACjD,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;gBAC1B,MAAM,CAAC,IAAI,CAAC;oBACV,IAAI,EAAE,QAAQ,CAAC,IAAI;oBACnB,OAAO,EAAE,SAAS,QAAQ,CAAC,EAAE,oBAAoB,EAAE,2BAA2B;iBAC/E,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,KAAK,UAAU,SAAS,CAAC,IAAY;IACnC,IAAI,GAAW,CAAC;IAChB,IAAI,CAAC;QACH,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IACrC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,GAAG,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,qBAAsB,KAAe,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;IACjF,CAAC;IACD,IAAI,GAAY,CAAC;IACjB,IAAI,CAAC;QACH,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACvB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,GAAG,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,iBAAkB,KAAe,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;IAC7E,CAAC;IACD,MAAM,MAAM,GAAG,cAAc,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;IAC7C,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,OAAO,GAAG,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,eAAe,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC1E,CAAC;IACD,OAAO,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;AACzB,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,GAAW;IACzC,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,OAAO,CAAC;IACZ,IAAI,CAAC;QACH,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;IACxD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;IACD,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;QACnC,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACxB,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChD,CAAC;aAAM,IAAI,KAAK,CAAC,MAAM,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC;YAC3F,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnB,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;AACtB,CAAC"}
@@ -0,0 +1,14 @@
1
+ export type ExitReason = "skill" | "no-skill" | "timeout" | "stream-end";
2
+ export interface DetectionResult {
3
+ readonly observed: readonly string[];
4
+ readonly firstSkill: string | null;
5
+ readonly exitReason: ExitReason;
6
+ }
7
+ interface Detector {
8
+ readonly push: (line: string) => void;
9
+ readonly done: boolean;
10
+ readonly result: (reason: ExitReason) => DetectionResult;
11
+ }
12
+ export declare function createDetector(stopAfter?: number): Detector;
13
+ export {};
14
+ //# sourceMappingURL=detect.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"detect.d.ts","sourceRoot":"","sources":["../../src/eval/detect.ts"],"names":[],"mappings":"AAEA,MAAM,MAAM,UAAU,GAAG,OAAO,GAAG,UAAU,GAAG,SAAS,GAAG,YAAY,CAAC;AAEzE,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,QAAQ,EAAE,SAAS,MAAM,EAAE,CAAC;IACrC,QAAQ,CAAC,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,QAAQ,CAAC,UAAU,EAAE,UAAU,CAAC;CACjC;AAED,UAAU,QAAQ;IAChB,QAAQ,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,CAAC;IACtC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC;IACvB,QAAQ,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,UAAU,KAAK,eAAe,CAAC;CAC1D;AAwBD,wBAAgB,cAAc,CAAC,SAAS,SAAI,GAAG,QAAQ,CA6EtD"}
@@ -0,0 +1,105 @@
1
+ import { FQ_ID } from "../ids.js";
2
+ function asRecord(value) {
3
+ return typeof value === "object" && value !== null ? value : {};
4
+ }
5
+ function isSkillToolUse(block) {
6
+ return block["type"] === "tool_use" && block["name"] === "Skill";
7
+ }
8
+ function skillIdOf(input) {
9
+ const record = asRecord(input);
10
+ const value = record["skill"] ?? record["command"];
11
+ return typeof value === "string" && FQ_ID.test(value) ? value : null;
12
+ }
13
+ function skillIdInJson(buffer) {
14
+ try {
15
+ return skillIdOf(JSON.parse(buffer));
16
+ }
17
+ catch {
18
+ return null;
19
+ }
20
+ }
21
+ export function createDetector(stopAfter = 1) {
22
+ const observed = [];
23
+ let pendingSkillJson = null;
24
+ let finished = false;
25
+ function record(id) {
26
+ if (id !== null)
27
+ observed.push(id);
28
+ if (observed.length >= stopAfter)
29
+ finished = true;
30
+ return finished;
31
+ }
32
+ function flushPending() {
33
+ if (pendingSkillJson === null)
34
+ return;
35
+ record(skillIdInJson(pendingSkillJson));
36
+ pendingSkillJson = null;
37
+ }
38
+ return {
39
+ get done() {
40
+ return finished;
41
+ },
42
+ push(line) {
43
+ if (finished)
44
+ return;
45
+ const trimmed = line.trim();
46
+ if (!trimmed)
47
+ return;
48
+ let parsed;
49
+ try {
50
+ parsed = JSON.parse(trimmed);
51
+ }
52
+ catch {
53
+ return;
54
+ }
55
+ const event = asRecord(parsed);
56
+ const type = event["type"];
57
+ if (type === "stream_event") {
58
+ const se = asRecord(event["event"]);
59
+ const seType = se["type"];
60
+ if (seType === "content_block_start") {
61
+ if (isSkillToolUse(asRecord(se["content_block"])))
62
+ pendingSkillJson = "";
63
+ }
64
+ else if (seType === "content_block_delta" && pendingSkillJson !== null) {
65
+ const delta = asRecord(se["delta"]);
66
+ if (delta["type"] === "input_json_delta") {
67
+ const partial = delta["partial_json"];
68
+ if (typeof partial === "string")
69
+ pendingSkillJson += partial;
70
+ if (skillIdInJson(pendingSkillJson) !== null)
71
+ flushPending();
72
+ }
73
+ }
74
+ else if (seType === "content_block_stop") {
75
+ flushPending();
76
+ }
77
+ else if (seType === "message_stop") {
78
+ flushPending();
79
+ finished = true;
80
+ }
81
+ return;
82
+ }
83
+ if (type === "assistant") {
84
+ const content = asRecord(event["message"])["content"];
85
+ for (const item of Array.isArray(content) ? content : []) {
86
+ const block = asRecord(item);
87
+ if (isSkillToolUse(block) && record(skillIdOf(block["input"])))
88
+ return;
89
+ }
90
+ return;
91
+ }
92
+ if (type === "result") {
93
+ finished = true;
94
+ }
95
+ },
96
+ result(reason) {
97
+ return {
98
+ observed,
99
+ firstSkill: observed[0] ?? null,
100
+ exitReason: observed.length > 0 ? "skill" : reason,
101
+ };
102
+ },
103
+ };
104
+ }
105
+ //# sourceMappingURL=detect.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"detect.js","sourceRoot":"","sources":["../../src/eval/detect.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,WAAW,CAAC;AAgBlC,SAAS,QAAQ,CAAC,KAAc;IAC9B,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,CAAC,CAAC,CAAE,KAAiC,CAAC,CAAC,CAAC,EAAE,CAAC;AAC/F,CAAC;AAED,SAAS,cAAc,CAAC,KAA8B;IACpD,OAAO,KAAK,CAAC,MAAM,CAAC,KAAK,UAAU,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,OAAO,CAAC;AACnE,CAAC;AAED,SAAS,SAAS,CAAC,KAAc;IAC/B,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC;IAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,SAAS,CAAC,CAAC;IACnD,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;AACvE,CAAC;AAED,SAAS,aAAa,CAAC,MAAc;IACnC,IAAI,CAAC;QACH,OAAO,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IACvC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,SAAS,GAAG,CAAC;IAC1C,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,IAAI,gBAAgB,GAAkB,IAAI,CAAC;IAC3C,IAAI,QAAQ,GAAG,KAAK,CAAC;IAErB,SAAS,MAAM,CAAC,EAAiB;QAC/B,IAAI,EAAE,KAAK,IAAI;YAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACnC,IAAI,QAAQ,CAAC,MAAM,IAAI,SAAS;YAAE,QAAQ,GAAG,IAAI,CAAC;QAClD,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,SAAS,YAAY;QACnB,IAAI,gBAAgB,KAAK,IAAI;YAAE,OAAO;QACtC,MAAM,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC,CAAC;QACxC,gBAAgB,GAAG,IAAI,CAAC;IAC1B,CAAC;IAED,OAAO;QACL,IAAI,IAAI;YACN,OAAO,QAAQ,CAAC;QAClB,CAAC;QACD,IAAI,CAAC,IAAY;YACf,IAAI,QAAQ;gBAAE,OAAO;YACrB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAC5B,IAAI,CAAC,OAAO;gBAAE,OAAO;YACrB,IAAI,MAAe,CAAC;YACpB,IAAI,CAAC;gBACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAC/B,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO;YACT,CAAC;YACD,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC;YAC/B,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;YAE3B,IAAI,IAAI,KAAK,cAAc,EAAE,CAAC;gBAC5B,MAAM,EAAE,GAAG,QAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;gBACpC,MAAM,MAAM,GAAG,EAAE,CAAC,MAAM,CAAC,CAAC;gBAE1B,IAAI,MAAM,KAAK,qBAAqB,EAAE,CAAC;oBACrC,IAAI,cAAc,CAAC,QAAQ,CAAC,EAAE,CAAC,eAAe,CAAC,CAAC,CAAC;wBAAE,gBAAgB,GAAG,EAAE,CAAC;gBAC3E,CAAC;qBAAM,IAAI,MAAM,KAAK,qBAAqB,IAAI,gBAAgB,KAAK,IAAI,EAAE,CAAC;oBACzE,MAAM,KAAK,GAAG,QAAQ,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;oBACpC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,kBAAkB,EAAE,CAAC;wBACzC,MAAM,OAAO,GAAG,KAAK,CAAC,cAAc,CAAC,CAAC;wBACtC,IAAI,OAAO,OAAO,KAAK,QAAQ;4BAAE,gBAAgB,IAAI,OAAO,CAAC;wBAC7D,IAAI,aAAa,CAAC,gBAAgB,CAAC,KAAK,IAAI;4BAAE,YAAY,EAAE,CAAC;oBAC/D,CAAC;gBACH,CAAC;qBAAM,IAAI,MAAM,KAAK,oBAAoB,EAAE,CAAC;oBAC3C,YAAY,EAAE,CAAC;gBACjB,CAAC;qBAAM,IAAI,MAAM,KAAK,cAAc,EAAE,CAAC;oBACrC,YAAY,EAAE,CAAC;oBACf,QAAQ,GAAG,IAAI,CAAC;gBAClB,CAAC;gBACD,OAAO;YACT,CAAC;YAED,IAAI,IAAI,KAAK,WAAW,EAAE,CAAC;gBACzB,MAAM,OAAO,GAAG,QAAQ,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;gBACtD,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;oBACzD,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;oBAC7B,IAAI,cAAc,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;wBAAE,OAAO;gBACzE,CAAC;gBACD,OAAO;YACT,CAAC;YAED,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;gBACtB,QAAQ,GAAG,IAAI,CAAC;YAClB,CAAC;QACH,CAAC;QACD,MAAM,CAAC,MAAkB;YACvB,OAAO;gBACL,QAAQ;gBACR,UAAU,EAAE,QAAQ,CAAC,CAAC,CAAC,IAAI,IAAI;gBAC/B,UAAU,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM;aACnD,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC"}
@@ -0,0 +1,20 @@
1
+ import { type Result } from "../result.js";
2
+ import { type CaseLoadError, type LoadedCase } from "./cases.js";
3
+ import { type EvalReport } from "./report.js";
4
+ import { type RunnerOptions } from "./runner.js";
5
+ export interface EvalOptions {
6
+ readonly casesDir: string;
7
+ readonly cwd: string;
8
+ readonly suite?: string;
9
+ readonly caseId?: string;
10
+ readonly tier?: LoadedCase["tier"];
11
+ readonly runs?: number;
12
+ readonly concurrency?: number;
13
+ readonly model?: string;
14
+ readonly onRun?: RunnerOptions["onRun"];
15
+ }
16
+ export declare function runEval(options: EvalOptions): Promise<Result<EvalReport, CaseLoadError[]>>;
17
+ export type { EvalReport, CaseReport } from "./report.js";
18
+ export { formatConsole, toJson } from "./report.js";
19
+ export type { LoadedCase, CaseLoadError } from "./cases.js";
20
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/eval/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAW,KAAK,MAAM,EAAE,MAAM,cAAc,CAAC;AACpD,OAAO,EAA+B,KAAK,aAAa,EAAE,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC;AAC9F,OAAO,EAAgC,KAAK,UAAU,EAAE,MAAM,aAAa,CAAC;AAC5E,OAAO,EAAY,KAAK,aAAa,EAAE,MAAM,aAAa,CAAC;AAG3D,MAAM,WAAW,WAAW;IAC1B,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,IAAI,CAAC,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;IACnC,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,KAAK,CAAC,EAAE,aAAa,CAAC,OAAO,CAAC,CAAC;CACzC;AAED,wBAAsB,OAAO,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,UAAU,EAAE,aAAa,EAAE,CAAC,CAAC,CA6BhG;AAWD,YAAY,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAC1D,OAAO,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACpD,YAAY,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC"}
@@ -0,0 +1,46 @@
1
+ import { defaultSources, discoverInstalled, indexInstalled } from "../installed.js";
2
+ import { err, ok } from "../result.js";
3
+ import { loadCases, unresolvedSkills } from "./cases.js";
4
+ import { buildReport } from "./report.js";
5
+ import { runCases } from "./runner.js";
6
+ import { scoreCase } from "./score.js";
7
+ export async function runEval(options) {
8
+ const loaded = await loadCases(options.casesDir);
9
+ if (!loaded.ok)
10
+ return err(loaded.error);
11
+ const selected = select(loaded.value, options);
12
+ if (selected.length === 0) {
13
+ return err([{ file: options.casesDir, message: "no cases matched the given filters" }]);
14
+ }
15
+ const installed = indexInstalled(await discoverInstalled(defaultSources()));
16
+ const installedIds = new Set(installed.skills.keys());
17
+ const unresolved = unresolvedSkills(selected, installedIds);
18
+ if (unresolved.length > 0)
19
+ return err(unresolved);
20
+ const runnerOptions = {
21
+ cwd: options.cwd,
22
+ ...(options.runs !== undefined && { runs: options.runs }),
23
+ ...(options.concurrency !== undefined && { concurrency: options.concurrency }),
24
+ ...(options.model !== undefined && { model: options.model }),
25
+ ...(options.onRun !== undefined && { onRun: options.onRun }),
26
+ };
27
+ const results = await runCases(selected, runnerOptions);
28
+ const reports = results.map(({ evalCase, runs }) => ({
29
+ evalCase,
30
+ score: scoreCase(evalCase.expect, runs, evalCase.threshold),
31
+ }));
32
+ return ok(buildReport(reports));
33
+ }
34
+ function select(cases, options) {
35
+ return cases.filter((evalCase) => {
36
+ if (options.caseId && evalCase.id !== options.caseId)
37
+ return false;
38
+ if (options.suite && evalCase.suite !== options.suite)
39
+ return false;
40
+ if (options.tier && evalCase.tier !== options.tier)
41
+ return false;
42
+ return true;
43
+ });
44
+ }
45
+ export { formatConsole, toJson } from "./report.js";
46
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/eval/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACpF,OAAO,EAAE,GAAG,EAAE,EAAE,EAAe,MAAM,cAAc,CAAC;AACpD,OAAO,EAAE,SAAS,EAAE,gBAAgB,EAAuC,MAAM,YAAY,CAAC;AAC9F,OAAO,EAAE,WAAW,EAAoC,MAAM,aAAa,CAAC;AAC5E,OAAO,EAAE,QAAQ,EAAsB,MAAM,aAAa,CAAC;AAC3D,OAAO,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAcvC,MAAM,CAAC,KAAK,UAAU,OAAO,CAAC,OAAoB;IAChD,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;IACjD,IAAI,CAAC,MAAM,CAAC,EAAE;QAAE,OAAO,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAEzC,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;IAC/C,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,OAAO,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,QAAQ,EAAE,OAAO,EAAE,oCAAoC,EAAE,CAAC,CAAC,CAAC;IAC1F,CAAC;IAED,MAAM,SAAS,GAAG,cAAc,CAAC,MAAM,iBAAiB,CAAC,cAAc,EAAE,CAAC,CAAC,CAAC;IAC5E,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;IACtD,MAAM,UAAU,GAAG,gBAAgB,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;IAC5D,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,GAAG,CAAC,UAAU,CAAC,CAAC;IAElD,MAAM,aAAa,GAAkB;QACnC,GAAG,EAAE,OAAO,CAAC,GAAG;QAChB,GAAG,CAAC,OAAO,CAAC,IAAI,KAAK,SAAS,IAAI,EAAE,IAAI,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC;QACzD,GAAG,CAAC,OAAO,CAAC,WAAW,KAAK,SAAS,IAAI,EAAE,WAAW,EAAE,OAAO,CAAC,WAAW,EAAE,CAAC;QAC9E,GAAG,CAAC,OAAO,CAAC,KAAK,KAAK,SAAS,IAAI,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC;QAC5D,GAAG,CAAC,OAAO,CAAC,KAAK,KAAK,SAAS,IAAI,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC;KAC7D,CAAC;IACF,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;IAExD,MAAM,OAAO,GAAiB,OAAO,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,CAAC;QACjE,QAAQ;QACR,KAAK,EAAE,SAAS,CAAC,QAAQ,CAAC,MAAM,EAAE,IAAI,EAAE,QAAQ,CAAC,SAAS,CAAC;KAC5D,CAAC,CAAC,CAAC;IAEJ,OAAO,EAAE,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC;AAClC,CAAC;AAED,SAAS,MAAM,CAAC,KAA4B,EAAE,OAAoB;IAChE,OAAO,KAAK,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,EAAE;QAC/B,IAAI,OAAO,CAAC,MAAM,IAAI,QAAQ,CAAC,EAAE,KAAK,OAAO,CAAC,MAAM;YAAE,OAAO,KAAK,CAAC;QACnE,IAAI,OAAO,CAAC,KAAK,IAAI,QAAQ,CAAC,KAAK,KAAK,OAAO,CAAC,KAAK;YAAE,OAAO,KAAK,CAAC;QACpE,IAAI,OAAO,CAAC,IAAI,IAAI,QAAQ,CAAC,IAAI,KAAK,OAAO,CAAC,IAAI;YAAE,OAAO,KAAK,CAAC;QACjE,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;AACL,CAAC;AAGD,OAAO,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC"}
@@ -0,0 +1,15 @@
1
+ import type { LoadedCase } from "./cases.js";
2
+ import type { CaseScore } from "./score.js";
3
+ export interface CaseReport {
4
+ readonly evalCase: LoadedCase;
5
+ readonly score: CaseScore;
6
+ }
7
+ export interface EvalReport {
8
+ readonly cases: readonly CaseReport[];
9
+ readonly passed: number;
10
+ readonly failed: number;
11
+ }
12
+ export declare function buildReport(cases: readonly CaseReport[]): EvalReport;
13
+ export declare function formatConsole(report: EvalReport): string;
14
+ export declare function toJson(report: EvalReport): string;
15
+ //# sourceMappingURL=report.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"report.d.ts","sourceRoot":"","sources":["../../src/eval/report.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAC7C,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAG5C,MAAM,WAAW,UAAU;IACzB,QAAQ,CAAC,QAAQ,EAAE,UAAU,CAAC;IAC9B,QAAQ,CAAC,KAAK,EAAE,SAAS,CAAC;CAC3B;AAED,MAAM,WAAW,UAAU;IACzB,QAAQ,CAAC,KAAK,EAAE,SAAS,UAAU,EAAE,CAAC;IACtC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;CACzB;AAED,wBAAgB,WAAW,CAAC,KAAK,EAAE,SAAS,UAAU,EAAE,GAAG,UAAU,CAGpE;AAED,wBAAgB,aAAa,CAAC,MAAM,EAAE,UAAU,GAAG,MAAM,CAuBxD;AAED,wBAAgB,MAAM,CAAC,MAAM,EAAE,UAAU,GAAG,MAAM,CAyBjD"}
@@ -0,0 +1,81 @@
1
+ export function buildReport(cases) {
2
+ const passed = cases.filter((c) => c.score.pass).length;
3
+ return { cases, passed, failed: cases.length - passed };
4
+ }
5
+ export function formatConsole(report) {
6
+ const lines = [];
7
+ const groups = groupBy(report.cases, (c) => `${c.evalCase.tier}/${c.evalCase.suite}`);
8
+ for (const [group, entries] of groups) {
9
+ lines.push(group);
10
+ for (const { evalCase, score } of entries) {
11
+ const tag = score.pass ? "PASS" : "FAIL";
12
+ const tally = `${score.matched}/${score.runs}`;
13
+ lines.push(` ${tag} ${evalCase.id} ${tally} → ${describeExpectation(evalCase.expect)}`);
14
+ if (!score.pass) {
15
+ lines.push(` got: ${formatHistogram(score.histogram)}`);
16
+ lines.push(` prompt: ${truncate(evalCase.prompt)}`);
17
+ if (evalCase.note)
18
+ lines.push(` note: ${evalCase.note}`);
19
+ }
20
+ }
21
+ lines.push("");
22
+ }
23
+ const total = report.passed + report.failed;
24
+ const pct = total === 0 ? 0 : Math.round((report.passed / total) * 100);
25
+ lines.push(`Summary: ${report.passed}/${total} cases passed (${pct}%).`);
26
+ return lines.join("\n");
27
+ }
28
+ export function toJson(report) {
29
+ return JSON.stringify({
30
+ summary: {
31
+ total: report.passed + report.failed,
32
+ passed: report.passed,
33
+ failed: report.failed,
34
+ },
35
+ cases: report.cases.map(({ evalCase, score }) => ({
36
+ id: evalCase.id,
37
+ suite: evalCase.suite,
38
+ tier: evalCase.tier,
39
+ prompt: evalCase.prompt,
40
+ expect: evalCase.expect,
41
+ pass: score.pass,
42
+ matched: score.matched,
43
+ runs: score.runs,
44
+ triggerRate: score.triggerRate,
45
+ threshold: score.threshold,
46
+ histogram: Object.fromEntries(score.histogram),
47
+ })),
48
+ }, null, 2);
49
+ }
50
+ function describeExpectation(expectation) {
51
+ if ("noSkill" in expectation)
52
+ return "(no skill)";
53
+ if ("first" in expectation)
54
+ return expectation.first;
55
+ if ("anyOf" in expectation)
56
+ return `one of [${expectation.anyOf.join(", ")}]`;
57
+ return expectation.path.join(" → ");
58
+ }
59
+ function formatHistogram(histogram) {
60
+ return [...histogram.entries()]
61
+ .sort((a, b) => b[1] - a[1])
62
+ .map(([id, count]) => `${id} ×${count}`)
63
+ .join(", ");
64
+ }
65
+ function truncate(text, max = 80) {
66
+ const single = text.replace(/\s+/g, " ").trim();
67
+ return single.length > max ? `${single.slice(0, max - 1)}…` : single;
68
+ }
69
+ function groupBy(items, key) {
70
+ const groups = new Map();
71
+ for (const item of items) {
72
+ const k = key(item);
73
+ const existing = groups.get(k);
74
+ if (existing)
75
+ existing.push(item);
76
+ else
77
+ groups.set(k, [item]);
78
+ }
79
+ return groups;
80
+ }
81
+ //# sourceMappingURL=report.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"report.js","sourceRoot":"","sources":["../../src/eval/report.ts"],"names":[],"mappings":"AAeA,MAAM,UAAU,WAAW,CAAC,KAA4B;IACtD,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;IACxD,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,GAAG,MAAM,EAAE,CAAC;AAC1D,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,MAAkB;IAC9C,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC;IAEtF,KAAK,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,MAAM,EAAE,CAAC;QACtC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAClB,KAAK,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,IAAI,OAAO,EAAE,CAAC;YAC1C,MAAM,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;YACzC,MAAM,KAAK,GAAG,GAAG,KAAK,CAAC,OAAO,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC;YAC/C,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,KAAK,QAAQ,CAAC,EAAE,KAAK,KAAK,OAAO,mBAAmB,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;YAC5F,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;gBAChB,KAAK,CAAC,IAAI,CAAC,kBAAkB,eAAe,CAAC,KAAK,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;gBACjE,KAAK,CAAC,IAAI,CAAC,qBAAqB,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;gBAC7D,IAAI,QAAQ,CAAC,IAAI;oBAAE,KAAK,CAAC,IAAI,CAAC,mBAAmB,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;YACpE,CAAC;QACH,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACjB,CAAC;IAED,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;IAC5C,MAAM,GAAG,GAAG,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,MAAM,GAAG,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC;IACxE,KAAK,CAAC,IAAI,CAAC,YAAY,MAAM,CAAC,MAAM,IAAI,KAAK,kBAAkB,GAAG,KAAK,CAAC,CAAC;IACzE,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,MAAM,UAAU,MAAM,CAAC,MAAkB;IACvC,OAAO,IAAI,CAAC,SAAS,CACnB;QACE,OAAO,EAAE;YACP,KAAK,EAAE,MAAM,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM;YACpC,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,MAAM,EAAE,MAAM,CAAC,MAAM;SACtB;QACD,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;YAChD,EAAE,EAAE,QAAQ,CAAC,EAAE;YACf,KAAK,EAAE,QAAQ,CAAC,KAAK;YACrB,IAAI,EAAE,QAAQ,CAAC,IAAI;YACnB,MAAM,EAAE,QAAQ,CAAC,MAAM;YACvB,MAAM,EAAE,QAAQ,CAAC,MAAM;YACvB,IAAI,EAAE,KAAK,CAAC,IAAI;YAChB,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,IAAI,EAAE,KAAK,CAAC,IAAI;YAChB,WAAW,EAAE,KAAK,CAAC,WAAW;YAC9B,SAAS,EAAE,KAAK,CAAC,SAAS;YAC1B,SAAS,EAAE,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,SAAS,CAAC;SAC/C,CAAC,CAAC;KACJ,EACD,IAAI,EACJ,CAAC,CACF,CAAC;AACJ,CAAC;AAED,SAAS,mBAAmB,CAAC,WAAwB;IACnD,IAAI,SAAS,IAAI,WAAW;QAAE,OAAO,YAAY,CAAC;IAClD,IAAI,OAAO,IAAI,WAAW;QAAE,OAAO,WAAW,CAAC,KAAK,CAAC;IACrD,IAAI,OAAO,IAAI,WAAW;QAAE,OAAO,WAAW,WAAW,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC;IAC9E,OAAO,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AACtC,CAAC;AAED,SAAS,eAAe,CAAC,SAAsC;IAC7D,OAAO,CAAC,GAAG,SAAS,CAAC,OAAO,EAAE,CAAC;SAC5B,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;SAC3B,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,GAAG,EAAE,KAAK,KAAK,EAAE,CAAC;SACvC,IAAI,CAAC,IAAI,CAAC,CAAC;AAChB,CAAC;AAED,SAAS,QAAQ,CAAC,IAAY,EAAE,GAAG,GAAG,EAAE;IACtC,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IAChD,OAAO,MAAM,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;AACvE,CAAC;AAED,SAAS,OAAO,CAAI,KAAmB,EAAE,GAAwB;IAC/D,MAAM,MAAM,GAAG,IAAI,GAAG,EAAe,CAAC;IACtC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,CAAC,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC;QACpB,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QAC/B,IAAI,QAAQ;YAAE,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;;YAC7B,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC;IAC7B,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -0,0 +1,17 @@
1
+ import { type DetectionResult } from "./detect.js";
2
+ import type { LoadedCase } from "./cases.js";
3
+ export interface RunnerOptions {
4
+ readonly cwd: string;
5
+ readonly runs?: number;
6
+ readonly concurrency?: number;
7
+ readonly timeoutMs?: number;
8
+ readonly model?: string;
9
+ readonly claudeBin?: string;
10
+ readonly onRun?: (caseId: string, result: DetectionResult) => void;
11
+ }
12
+ export interface CaseRuns {
13
+ readonly evalCase: LoadedCase;
14
+ readonly runs: readonly DetectionResult[];
15
+ }
16
+ export declare function runCases(cases: readonly LoadedCase[], options: RunnerOptions): Promise<CaseRuns[]>;
17
+ //# sourceMappingURL=runner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../../src/eval/runner.ts"],"names":[],"mappings":"AAIA,OAAO,EAAkB,KAAK,eAAe,EAAE,MAAM,aAAa,CAAC;AACnE,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAM7C,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,SAAS,CAAC,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,SAAS,CAAC,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,eAAe,KAAK,IAAI,CAAC;CACpE;AAED,MAAM,WAAW,QAAQ;IACvB,QAAQ,CAAC,QAAQ,EAAE,UAAU,CAAC;IAC9B,QAAQ,CAAC,IAAI,EAAE,SAAS,eAAe,EAAE,CAAC;CAC3C;AAED,wBAAsB,QAAQ,CAC5B,KAAK,EAAE,SAAS,UAAU,EAAE,EAC5B,OAAO,EAAE,aAAa,GACrB,OAAO,CAAC,QAAQ,EAAE,CAAC,CAgBrB"}
@@ -0,0 +1,89 @@
1
+ import { spawn } from "node:child_process";
2
+ import { isAbsolute, resolve } from "node:path";
3
+ import { createInterface } from "node:readline";
4
+ import { createDetector } from "./detect.js";
5
+ const DEFAULT_RUNS = 5;
6
+ const DEFAULT_CONCURRENCY = 1;
7
+ const DEFAULT_TIMEOUT_MS = 60_000;
8
+ export async function runCases(cases, options) {
9
+ const byCase = new Map();
10
+ const jobs = cases.flatMap((evalCase) => {
11
+ byCase.set(evalCase, []);
12
+ const count = evalCase.runs ?? options.runs ?? DEFAULT_RUNS;
13
+ return Array.from({ length: count }, () => evalCase);
14
+ });
15
+ const concurrency = Math.max(1, options.concurrency ?? DEFAULT_CONCURRENCY);
16
+ await forEachLimit(jobs, concurrency, async (evalCase) => {
17
+ const result = await runOnce(evalCase, options);
18
+ byCase.get(evalCase)?.push(result);
19
+ options.onRun?.(evalCase.id, result);
20
+ });
21
+ return cases.map((evalCase) => ({ evalCase, runs: byCase.get(evalCase) ?? [] }));
22
+ }
23
+ function skillsToCollect(evalCase) {
24
+ return "path" in evalCase.expect ? evalCase.expect.path.length : 1;
25
+ }
26
+ async function runOnce(evalCase, options) {
27
+ const cwd = evalCase.cwd ? resolveCwd(options.cwd, evalCase.cwd) : options.cwd;
28
+ const args = [
29
+ "-p",
30
+ evalCase.prompt,
31
+ "--output-format",
32
+ "stream-json",
33
+ "--verbose",
34
+ "--include-partial-messages",
35
+ ];
36
+ if (options.model)
37
+ args.push("--model", options.model);
38
+ const env = { ...process.env };
39
+ delete env["CLAUDECODE"];
40
+ const detector = createDetector(skillsToCollect(evalCase));
41
+ const child = spawn(options.claudeBin ?? "claude", args, {
42
+ cwd,
43
+ env,
44
+ stdio: ["ignore", "pipe", "ignore"],
45
+ });
46
+ const spawnFailure = new Promise((_, reject) => {
47
+ child.on("error", (cause) => reject(new Error(`failed to spawn claude: ${cause.message}`)));
48
+ });
49
+ const deadline = { reached: false };
50
+ const timer = setTimeout(() => {
51
+ deadline.reached = true;
52
+ child.kill("SIGKILL");
53
+ }, options.timeoutMs ?? DEFAULT_TIMEOUT_MS);
54
+ try {
55
+ await Promise.race([drain(child.stdout, () => child.kill("SIGKILL"), detector), spawnFailure]);
56
+ }
57
+ finally {
58
+ clearTimeout(timer);
59
+ if (child.exitCode === null)
60
+ child.kill("SIGKILL");
61
+ }
62
+ return detector.result(deadline.reached ? "timeout" : "no-skill");
63
+ }
64
+ async function drain(stdout, stop, detector) {
65
+ const lines = createInterface({ input: stdout });
66
+ for await (const line of lines) {
67
+ detector.push(line);
68
+ if (detector.done) {
69
+ stop();
70
+ break;
71
+ }
72
+ }
73
+ }
74
+ function resolveCwd(base, caseCwd) {
75
+ return isAbsolute(caseCwd) ? caseCwd : resolve(base, caseCwd);
76
+ }
77
+ async function forEachLimit(items, limit, worker) {
78
+ let cursor = 0;
79
+ const runners = Array.from({ length: Math.min(limit, items.length) }, async () => {
80
+ while (cursor < items.length) {
81
+ const item = items[cursor];
82
+ cursor += 1;
83
+ if (item !== undefined)
84
+ await worker(item);
85
+ }
86
+ });
87
+ await Promise.all(runners);
88
+ }
89
+ //# sourceMappingURL=runner.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"runner.js","sourceRoot":"","sources":["../../src/eval/runner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAC3C,OAAO,EAAE,UAAU,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAChD,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAEhD,OAAO,EAAE,cAAc,EAAwB,MAAM,aAAa,CAAC;AAGnE,MAAM,YAAY,GAAG,CAAC,CAAC;AACvB,MAAM,mBAAmB,GAAG,CAAC,CAAC;AAC9B,MAAM,kBAAkB,GAAG,MAAM,CAAC;AAiBlC,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC5B,KAA4B,EAC5B,OAAsB;IAEtB,MAAM,MAAM,GAAG,IAAI,GAAG,EAAiC,CAAC;IACxD,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,EAAE;QACtC,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QACzB,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,IAAI,OAAO,CAAC,IAAI,IAAI,YAAY,CAAC;QAC5D,OAAO,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,EAAE,GAAG,EAAE,CAAC,QAAQ,CAAC,CAAC;IACvD,CAAC,CAAC,CAAC;IAEH,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,WAAW,IAAI,mBAAmB,CAAC,CAAC;IAC5E,MAAM,YAAY,CAAC,IAAI,EAAE,WAAW,EAAE,KAAK,EAAE,QAAQ,EAAE,EAAE;QACvD,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAChD,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QACnC,OAAO,CAAC,KAAK,EAAE,CAAC,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC;AACnF,CAAC;AAED,SAAS,eAAe,CAAC,QAAoB;IAC3C,OAAO,MAAM,IAAI,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;AACrE,CAAC;AAED,KAAK,UAAU,OAAO,CAAC,QAAoB,EAAE,OAAsB;IACjE,MAAM,GAAG,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC;IAC/E,MAAM,IAAI,GAAG;QACX,IAAI;QACJ,QAAQ,CAAC,MAAM;QACf,iBAAiB;QACjB,aAAa;QACb,WAAW;QACX,4BAA4B;KAC7B,CAAC;IACF,IAAI,OAAO,CAAC,KAAK;QAAE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC;IAEvD,MAAM,GAAG,GAAG,EAAE,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;IAC/B,OAAO,GAAG,CAAC,YAAY,CAAC,CAAC;IAEzB,MAAM,QAAQ,GAAG,cAAc,CAAC,eAAe,CAAC,QAAQ,CAAC,CAAC,CAAC;IAC3D,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,SAAS,IAAI,QAAQ,EAAE,IAAI,EAAE;QACvD,GAAG;QACH,GAAG;QACH,KAAK,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC;KACpC,CAAC,CAAC;IAEH,MAAM,YAAY,GAAG,IAAI,OAAO,CAAQ,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE;QACpD,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,2BAA2B,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC;IAC9F,CAAC,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IACpC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE;QAC5B,QAAQ,CAAC,OAAO,GAAG,IAAI,CAAC;QACxB,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACxB,CAAC,EAAE,OAAO,CAAC,SAAS,IAAI,kBAAkB,CAAC,CAAC;IAE5C,IAAI,CAAC;QACH,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,EAAE,GAAG,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,QAAQ,CAAC,EAAE,YAAY,CAAC,CAAC,CAAC;IACjG,CAAC;YAAS,CAAC;QACT,YAAY,CAAC,KAAK,CAAC,CAAC;QACpB,IAAI,KAAK,CAAC,QAAQ,KAAK,IAAI;YAAE,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACrD,CAAC;IAED,OAAO,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC;AACpE,CAAC;AAED,KAAK,UAAU,KAAK,CAClB,MAA6B,EAC7B,IAAgB,EAChB,QAA2C;IAE3C,MAAM,KAAK,GAAG,eAAe,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;IACjD,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC/B,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACpB,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC;YAClB,IAAI,EAAE,CAAC;YACP,MAAM;QACR,CAAC;IACH,CAAC;AACH,CAAC;AAED,SAAS,UAAU,CAAC,IAAY,EAAE,OAAe;IAC/C,OAAO,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;AAChE,CAAC;AAED,KAAK,UAAU,YAAY,CACzB,KAAmB,EACnB,KAAa,EACb,MAAkC;IAElC,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,CAAC,MAAM,CAAC,EAAE,EAAE,KAAK,IAAI,EAAE;QAC/E,OAAO,MAAM,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;YAC7B,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;YAC3B,MAAM,IAAI,CAAC,CAAC;YACZ,IAAI,IAAI,KAAK,SAAS;gBAAE,MAAM,MAAM,CAAC,IAAI,CAAC,CAAC;QAC7C,CAAC;IACH,CAAC,CAAC,CAAC;IACH,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;AAC7B,CAAC"}
@@ -0,0 +1,253 @@
1
+ import { z } from "zod";
2
+ declare const Expectation: z.ZodUnion<[z.ZodObject<{
3
+ first: z.ZodString;
4
+ not: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
5
+ }, "strip", z.ZodTypeAny, {
6
+ first: string;
7
+ not?: string[] | undefined;
8
+ }, {
9
+ first: string;
10
+ not?: string[] | undefined;
11
+ }>, z.ZodObject<{
12
+ anyOf: z.ZodArray<z.ZodString, "many">;
13
+ not: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
14
+ }, "strip", z.ZodTypeAny, {
15
+ anyOf: string[];
16
+ not?: string[] | undefined;
17
+ }, {
18
+ anyOf: string[];
19
+ not?: string[] | undefined;
20
+ }>, z.ZodObject<{
21
+ path: z.ZodArray<z.ZodString, "many">;
22
+ not: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
23
+ }, "strip", z.ZodTypeAny, {
24
+ path: string[];
25
+ not?: string[] | undefined;
26
+ }, {
27
+ path: string[];
28
+ not?: string[] | undefined;
29
+ }>, z.ZodObject<{
30
+ noSkill: z.ZodLiteral<true>;
31
+ }, "strip", z.ZodTypeAny, {
32
+ noSkill: true;
33
+ }, {
34
+ noSkill: true;
35
+ }>]>;
36
+ declare const Case: z.ZodObject<{
37
+ id: z.ZodString;
38
+ prompt: z.ZodString;
39
+ expect: z.ZodUnion<[z.ZodObject<{
40
+ first: z.ZodString;
41
+ not: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
42
+ }, "strip", z.ZodTypeAny, {
43
+ first: string;
44
+ not?: string[] | undefined;
45
+ }, {
46
+ first: string;
47
+ not?: string[] | undefined;
48
+ }>, z.ZodObject<{
49
+ anyOf: z.ZodArray<z.ZodString, "many">;
50
+ not: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
51
+ }, "strip", z.ZodTypeAny, {
52
+ anyOf: string[];
53
+ not?: string[] | undefined;
54
+ }, {
55
+ anyOf: string[];
56
+ not?: string[] | undefined;
57
+ }>, z.ZodObject<{
58
+ path: z.ZodArray<z.ZodString, "many">;
59
+ not: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
60
+ }, "strip", z.ZodTypeAny, {
61
+ path: string[];
62
+ not?: string[] | undefined;
63
+ }, {
64
+ path: string[];
65
+ not?: string[] | undefined;
66
+ }>, z.ZodObject<{
67
+ noSkill: z.ZodLiteral<true>;
68
+ }, "strip", z.ZodTypeAny, {
69
+ noSkill: true;
70
+ }, {
71
+ noSkill: true;
72
+ }>]>;
73
+ cwd: z.ZodOptional<z.ZodString>;
74
+ runs: z.ZodOptional<z.ZodNumber>;
75
+ threshold: z.ZodOptional<z.ZodNumber>;
76
+ note: z.ZodOptional<z.ZodString>;
77
+ }, "strip", z.ZodTypeAny, {
78
+ id: string;
79
+ prompt: string;
80
+ expect: {
81
+ first: string;
82
+ not?: string[] | undefined;
83
+ } | {
84
+ anyOf: string[];
85
+ not?: string[] | undefined;
86
+ } | {
87
+ path: string[];
88
+ not?: string[] | undefined;
89
+ } | {
90
+ noSkill: true;
91
+ };
92
+ cwd?: string | undefined;
93
+ runs?: number | undefined;
94
+ threshold?: number | undefined;
95
+ note?: string | undefined;
96
+ }, {
97
+ id: string;
98
+ prompt: string;
99
+ expect: {
100
+ first: string;
101
+ not?: string[] | undefined;
102
+ } | {
103
+ anyOf: string[];
104
+ not?: string[] | undefined;
105
+ } | {
106
+ path: string[];
107
+ not?: string[] | undefined;
108
+ } | {
109
+ noSkill: true;
110
+ };
111
+ cwd?: string | undefined;
112
+ runs?: number | undefined;
113
+ threshold?: number | undefined;
114
+ note?: string | undefined;
115
+ }>;
116
+ export declare const TIERS: readonly ["routing", "solving"];
117
+ export declare const CaseFileSchema: z.ZodObject<{
118
+ suite: z.ZodString;
119
+ tier: z.ZodEnum<["routing", "solving"]>;
120
+ cases: z.ZodArray<z.ZodObject<{
121
+ id: z.ZodString;
122
+ prompt: z.ZodString;
123
+ expect: z.ZodUnion<[z.ZodObject<{
124
+ first: z.ZodString;
125
+ not: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
126
+ }, "strip", z.ZodTypeAny, {
127
+ first: string;
128
+ not?: string[] | undefined;
129
+ }, {
130
+ first: string;
131
+ not?: string[] | undefined;
132
+ }>, z.ZodObject<{
133
+ anyOf: z.ZodArray<z.ZodString, "many">;
134
+ not: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
135
+ }, "strip", z.ZodTypeAny, {
136
+ anyOf: string[];
137
+ not?: string[] | undefined;
138
+ }, {
139
+ anyOf: string[];
140
+ not?: string[] | undefined;
141
+ }>, z.ZodObject<{
142
+ path: z.ZodArray<z.ZodString, "many">;
143
+ not: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
144
+ }, "strip", z.ZodTypeAny, {
145
+ path: string[];
146
+ not?: string[] | undefined;
147
+ }, {
148
+ path: string[];
149
+ not?: string[] | undefined;
150
+ }>, z.ZodObject<{
151
+ noSkill: z.ZodLiteral<true>;
152
+ }, "strip", z.ZodTypeAny, {
153
+ noSkill: true;
154
+ }, {
155
+ noSkill: true;
156
+ }>]>;
157
+ cwd: z.ZodOptional<z.ZodString>;
158
+ runs: z.ZodOptional<z.ZodNumber>;
159
+ threshold: z.ZodOptional<z.ZodNumber>;
160
+ note: z.ZodOptional<z.ZodString>;
161
+ }, "strip", z.ZodTypeAny, {
162
+ id: string;
163
+ prompt: string;
164
+ expect: {
165
+ first: string;
166
+ not?: string[] | undefined;
167
+ } | {
168
+ anyOf: string[];
169
+ not?: string[] | undefined;
170
+ } | {
171
+ path: string[];
172
+ not?: string[] | undefined;
173
+ } | {
174
+ noSkill: true;
175
+ };
176
+ cwd?: string | undefined;
177
+ runs?: number | undefined;
178
+ threshold?: number | undefined;
179
+ note?: string | undefined;
180
+ }, {
181
+ id: string;
182
+ prompt: string;
183
+ expect: {
184
+ first: string;
185
+ not?: string[] | undefined;
186
+ } | {
187
+ anyOf: string[];
188
+ not?: string[] | undefined;
189
+ } | {
190
+ path: string[];
191
+ not?: string[] | undefined;
192
+ } | {
193
+ noSkill: true;
194
+ };
195
+ cwd?: string | undefined;
196
+ runs?: number | undefined;
197
+ threshold?: number | undefined;
198
+ note?: string | undefined;
199
+ }>, "many">;
200
+ }, "strip", z.ZodTypeAny, {
201
+ suite: string;
202
+ tier: "routing" | "solving";
203
+ cases: {
204
+ id: string;
205
+ prompt: string;
206
+ expect: {
207
+ first: string;
208
+ not?: string[] | undefined;
209
+ } | {
210
+ anyOf: string[];
211
+ not?: string[] | undefined;
212
+ } | {
213
+ path: string[];
214
+ not?: string[] | undefined;
215
+ } | {
216
+ noSkill: true;
217
+ };
218
+ cwd?: string | undefined;
219
+ runs?: number | undefined;
220
+ threshold?: number | undefined;
221
+ note?: string | undefined;
222
+ }[];
223
+ }, {
224
+ suite: string;
225
+ tier: "routing" | "solving";
226
+ cases: {
227
+ id: string;
228
+ prompt: string;
229
+ expect: {
230
+ first: string;
231
+ not?: string[] | undefined;
232
+ } | {
233
+ anyOf: string[];
234
+ not?: string[] | undefined;
235
+ } | {
236
+ path: string[];
237
+ not?: string[] | undefined;
238
+ } | {
239
+ noSkill: true;
240
+ };
241
+ cwd?: string | undefined;
242
+ runs?: number | undefined;
243
+ threshold?: number | undefined;
244
+ note?: string | undefined;
245
+ }[];
246
+ }>;
247
+ export type Tier = (typeof TIERS)[number];
248
+ export type Expectation = z.infer<typeof Expectation>;
249
+ export type EvalCase = z.infer<typeof Case>;
250
+ export type CaseFile = z.infer<typeof CaseFileSchema>;
251
+ export declare function expectedSkills(expectation: Expectation): readonly string[];
252
+ export {};
253
+ //# sourceMappingURL=schema.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../src/eval/schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAyBxB,QAAA,MAAM,WAAW;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;IAKf,CAAC;AAEH,QAAA,MAAM,IAAI;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAQR,CAAC;AAEH,eAAO,MAAM,KAAK,iCAAkC,CAAC;AAErD,eAAO,MAAM,cAAc;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAIzB,CAAC;AAEH,MAAM,MAAM,IAAI,GAAG,CAAC,OAAO,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC;AAC1C,MAAM,MAAM,WAAW,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,WAAW,CAAC,CAAC;AACtD,MAAM,MAAM,QAAQ,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,IAAI,CAAC,CAAC;AAC5C,MAAM,MAAM,QAAQ,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,cAAc,CAAC,CAAC;AAEtD,wBAAgB,cAAc,CAAC,WAAW,EAAE,WAAW,GAAG,SAAS,MAAM,EAAE,CAM1E"}
@@ -0,0 +1,50 @@
1
+ import { z } from "zod";
2
+ import { FQ_ID } from "../ids.js";
3
+ const FqId = z.string().regex(FQ_ID, "must be a `plugin:name` id");
4
+ const FirstExpectation = z.object({
5
+ first: FqId,
6
+ not: z.array(FqId).optional(),
7
+ });
8
+ const AnyOfExpectation = z.object({
9
+ anyOf: z.array(FqId).min(1),
10
+ not: z.array(FqId).optional(),
11
+ });
12
+ const PathExpectation = z.object({
13
+ path: z.array(FqId).min(2),
14
+ not: z.array(FqId).optional(),
15
+ });
16
+ const NoSkillExpectation = z.object({
17
+ noSkill: z.literal(true),
18
+ });
19
+ const Expectation = z.union([
20
+ FirstExpectation,
21
+ AnyOfExpectation,
22
+ PathExpectation,
23
+ NoSkillExpectation,
24
+ ]);
25
+ const Case = z.object({
26
+ id: z.string().min(1),
27
+ prompt: z.string().min(1),
28
+ expect: Expectation,
29
+ cwd: z.string().optional(),
30
+ runs: z.number().int().positive().optional(),
31
+ threshold: z.number().min(0).max(1).optional(),
32
+ note: z.string().optional(),
33
+ });
34
+ export const TIERS = ["routing", "solving"];
35
+ export const CaseFileSchema = z.object({
36
+ suite: z.string().min(1),
37
+ tier: z.enum(TIERS),
38
+ cases: z.array(Case).min(1),
39
+ });
40
+ export function expectedSkills(expectation) {
41
+ if ("noSkill" in expectation)
42
+ return [];
43
+ const forbidden = expectation.not ?? [];
44
+ if ("first" in expectation)
45
+ return [expectation.first, ...forbidden];
46
+ if ("anyOf" in expectation)
47
+ return [...expectation.anyOf, ...forbidden];
48
+ return [...expectation.path, ...forbidden];
49
+ }
50
+ //# sourceMappingURL=schema.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"schema.js","sourceRoot":"","sources":["../../src/eval/schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,OAAO,EAAE,KAAK,EAAE,MAAM,WAAW,CAAC;AAElC,MAAM,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,KAAK,CAAC,KAAK,EAAE,4BAA4B,CAAC,CAAC;AAEnE,MAAM,gBAAgB,GAAG,CAAC,CAAC,MAAM,CAAC;IAChC,KAAK,EAAE,IAAI;IACX,GAAG,EAAE,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE;CAC9B,CAAC,CAAC;AAEH,MAAM,gBAAgB,GAAG,CAAC,CAAC,MAAM,CAAC;IAChC,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC3B,GAAG,EAAE,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE;CAC9B,CAAC,CAAC;AAEH,MAAM,eAAe,GAAG,CAAC,CAAC,MAAM,CAAC;IAC/B,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC1B,GAAG,EAAE,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE;CAC9B,CAAC,CAAC;AAEH,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,OAAO,EAAE,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC;CACzB,CAAC,CAAC;AAEH,MAAM,WAAW,GAAG,CAAC,CAAC,KAAK,CAAC;IAC1B,gBAAgB;IAChB,gBAAgB;IAChB,eAAe;IACf,kBAAkB;CACnB,CAAC,CAAC;AAEH,MAAM,IAAI,GAAG,CAAC,CAAC,MAAM,CAAC;IACpB,EAAE,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;IACrB,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;IACzB,MAAM,EAAE,WAAW;IACnB,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC1B,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE;IAC5C,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE;IAC9C,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;CAC5B,CAAC,CAAC;AAEH,MAAM,CAAC,MAAM,KAAK,GAAG,CAAC,SAAS,EAAE,SAAS,CAAU,CAAC;AAErD,MAAM,CAAC,MAAM,cAAc,GAAG,CAAC,CAAC,MAAM,CAAC;IACrC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;IACxB,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC;IACnB,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;CAC5B,CAAC,CAAC;AAOH,MAAM,UAAU,cAAc,CAAC,WAAwB;IACrD,IAAI,SAAS,IAAI,WAAW;QAAE,OAAO,EAAE,CAAC;IACxC,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,IAAI,EAAE,CAAC;IACxC,IAAI,OAAO,IAAI,WAAW;QAAE,OAAO,CAAC,WAAW,CAAC,KAAK,EAAE,GAAG,SAAS,CAAC,CAAC;IACrE,IAAI,OAAO,IAAI,WAAW;QAAE,OAAO,CAAC,GAAG,WAAW,CAAC,KAAK,EAAE,GAAG,SAAS,CAAC,CAAC;IACxE,OAAO,CAAC,GAAG,WAAW,CAAC,IAAI,EAAE,GAAG,SAAS,CAAC,CAAC;AAC7C,CAAC"}
@@ -0,0 +1,13 @@
1
+ import type { DetectionResult } from "./detect.js";
2
+ import type { Expectation } from "./schema.js";
3
+ export interface CaseScore {
4
+ readonly matched: number;
5
+ readonly runs: number;
6
+ readonly triggerRate: number;
7
+ readonly threshold: number;
8
+ readonly pass: boolean;
9
+ readonly histogram: ReadonlyMap<string, number>;
10
+ }
11
+ export declare function matchesExpectation(expectation: Expectation, run: DetectionResult): boolean;
12
+ export declare function scoreCase(expectation: Expectation, runs: readonly DetectionResult[], threshold?: number): CaseScore;
13
+ //# sourceMappingURL=score.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"score.d.ts","sourceRoot":"","sources":["../../src/eval/score.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AACnD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAI/C,MAAM,WAAW,SAAS;IACxB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC;IACvB,QAAQ,CAAC,SAAS,EAAE,WAAW,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACjD;AAED,wBAAgB,kBAAkB,CAAC,WAAW,EAAE,WAAW,EAAE,GAAG,EAAE,eAAe,GAAG,OAAO,CAa1F;AAED,wBAAgB,SAAS,CACvB,WAAW,EAAE,WAAW,EACxB,IAAI,EAAE,SAAS,eAAe,EAAE,EAChC,SAAS,SAAoB,GAC5B,SAAS,CAWX"}
@@ -0,0 +1,52 @@
1
+ const DEFAULT_THRESHOLD = 1.0;
2
+ export function matchesExpectation(expectation, run) {
3
+ if (violatesNot(expectation, run))
4
+ return false;
5
+ if ("noSkill" in expectation) {
6
+ return run.firstSkill === null;
7
+ }
8
+ if ("first" in expectation) {
9
+ return run.firstSkill === expectation.first;
10
+ }
11
+ if ("anyOf" in expectation) {
12
+ return run.firstSkill !== null && expectation.anyOf.includes(run.firstSkill);
13
+ }
14
+ return isOrderedSubsequence(expectation.path, run.observed);
15
+ }
16
+ export function scoreCase(expectation, runs, threshold = DEFAULT_THRESHOLD) {
17
+ const matched = runs.filter((run) => matchesExpectation(expectation, run)).length;
18
+ const triggerRate = runs.length === 0 ? 0 : matched / runs.length;
19
+ return {
20
+ matched,
21
+ runs: runs.length,
22
+ triggerRate,
23
+ threshold,
24
+ pass: triggerRate >= threshold,
25
+ histogram: histogramOf(runs),
26
+ };
27
+ }
28
+ function violatesNot(expectation, run) {
29
+ const forbidden = "noSkill" in expectation ? undefined : expectation.not;
30
+ if (!forbidden || forbidden.length === 0)
31
+ return false;
32
+ return run.firstSkill !== null && forbidden.includes(run.firstSkill);
33
+ }
34
+ function isOrderedSubsequence(needle, haystack) {
35
+ let cursor = 0;
36
+ for (const id of haystack) {
37
+ if (id === needle[cursor])
38
+ cursor += 1;
39
+ if (cursor === needle.length)
40
+ return true;
41
+ }
42
+ return cursor === needle.length;
43
+ }
44
+ function histogramOf(runs) {
45
+ const counts = new Map();
46
+ for (const run of runs) {
47
+ const key = run.firstSkill ?? "(no skill)";
48
+ counts.set(key, (counts.get(key) ?? 0) + 1);
49
+ }
50
+ return counts;
51
+ }
52
+ //# sourceMappingURL=score.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"score.js","sourceRoot":"","sources":["../../src/eval/score.ts"],"names":[],"mappings":"AAGA,MAAM,iBAAiB,GAAG,GAAG,CAAC;AAW9B,MAAM,UAAU,kBAAkB,CAAC,WAAwB,EAAE,GAAoB;IAC/E,IAAI,WAAW,CAAC,WAAW,EAAE,GAAG,CAAC;QAAE,OAAO,KAAK,CAAC;IAEhD,IAAI,SAAS,IAAI,WAAW,EAAE,CAAC;QAC7B,OAAO,GAAG,CAAC,UAAU,KAAK,IAAI,CAAC;IACjC,CAAC;IACD,IAAI,OAAO,IAAI,WAAW,EAAE,CAAC;QAC3B,OAAO,GAAG,CAAC,UAAU,KAAK,WAAW,CAAC,KAAK,CAAC;IAC9C,CAAC;IACD,IAAI,OAAO,IAAI,WAAW,EAAE,CAAC;QAC3B,OAAO,GAAG,CAAC,UAAU,KAAK,IAAI,IAAI,WAAW,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IAC/E,CAAC;IACD,OAAO,oBAAoB,CAAC,WAAW,CAAC,IAAI,EAAE,GAAG,CAAC,QAAQ,CAAC,CAAC;AAC9D,CAAC;AAED,MAAM,UAAU,SAAS,CACvB,WAAwB,EACxB,IAAgC,EAChC,SAAS,GAAG,iBAAiB;IAE7B,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,kBAAkB,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;IAClF,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC;IAClE,OAAO;QACL,OAAO;QACP,IAAI,EAAE,IAAI,CAAC,MAAM;QACjB,WAAW;QACX,SAAS;QACT,IAAI,EAAE,WAAW,IAAI,SAAS;QAC9B,SAAS,EAAE,WAAW,CAAC,IAAI,CAAC;KAC7B,CAAC;AACJ,CAAC;AAED,SAAS,WAAW,CAAC,WAAwB,EAAE,GAAoB;IACjE,MAAM,SAAS,GAAG,SAAS,IAAI,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC;IACzE,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IACvD,OAAO,GAAG,CAAC,UAAU,KAAK,IAAI,IAAI,SAAS,CAAC,QAAQ,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;AACvE,CAAC;AAED,SAAS,oBAAoB,CAAC,MAAyB,EAAE,QAA2B;IAClF,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;QAC1B,IAAI,EAAE,KAAK,MAAM,CAAC,MAAM,CAAC;YAAE,MAAM,IAAI,CAAC,CAAC;QACvC,IAAI,MAAM,KAAK,MAAM,CAAC,MAAM;YAAE,OAAO,IAAI,CAAC;IAC5C,CAAC;IACD,OAAO,MAAM,KAAK,MAAM,CAAC,MAAM,CAAC;AAClC,CAAC;AAED,SAAS,WAAW,CAAC,IAAgC;IACnD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;IACzC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,GAAG,CAAC,UAAU,IAAI,YAAY,CAAC;QAC3C,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC9C,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC"}
package/dist/index.d.ts CHANGED
@@ -27,6 +27,10 @@ export { resolveVendors } from "./vendor/registry.js";
27
27
  export type { DiscoveredVendorPlugin, LinkedFile, Vendor, VendorEmitContext, VendorInstallContext, } from "./vendor/schema.js";
28
28
  export { check } from "./check/index.js";
29
29
  export type { CheckOptions, CheckResult, ReferenceViolation, ReferenceViolationKind, SourceSummary, } from "./check/index.js";
30
+ export { runEval, formatConsole, toJson } from "./eval/index.js";
31
+ export type { EvalOptions, EvalReport, CaseReport, LoadedCase, CaseLoadError, } from "./eval/index.js";
32
+ export { CaseFileSchema, TIERS } from "./eval/schema.js";
33
+ export type { CaseFile, EvalCase, Expectation, Tier } from "./eval/schema.js";
30
34
  export { defaultSources, discoverInstalled, indexInstalled } from "./installed.js";
31
35
  export type { InstalledAgent, InstalledArtifacts, InstalledCommand, InstalledIndex, InstalledSkill, PluginSource, } from "./installed.js";
32
36
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EACf,WAAW,EACX,mBAAmB,EACnB,WAAW,EACX,aAAa,EACb,oBAAoB,EACpB,2BAA2B,EAC3B,SAAS,GACV,MAAM,kBAAkB,CAAC;AAC1B,YAAY,EACV,SAAS,EACT,kBAAkB,EAClB,WAAW,EACX,cAAc,EACd,KAAK,EACL,SAAS,EACT,WAAW,GACZ,MAAM,kBAAkB,CAAC;AAE1B,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAC/D,YAAY,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAEhD,OAAO,EAAE,sBAAsB,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAC9E,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAE9E,OAAO,EAAE,GAAG,EAAE,EAAE,EAAE,MAAM,aAAa,CAAC;AACtC,YAAY,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAE1C,OAAO,EAAE,iBAAiB,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AACxE,YAAY,EACV,WAAW,EACX,gBAAgB,EAChB,SAAS,EACT,iBAAiB,EACjB,eAAe,GAChB,MAAM,yBAAyB,CAAC;AAEjC,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpD,YAAY,EAAE,qBAAqB,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAE7E,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,YAAY,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAEnD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,YAAY,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAEzD,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AACxD,YAAY,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AACnF,YAAY,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAErD,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAC9C,YAAY,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAEnD,OAAO,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AACxD,YAAY,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAEzD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,YAAY,EACV,sBAAsB,EACtB,UAAU,EACV,MAAM,EACN,iBAAiB,EACjB,oBAAoB,GACrB,MAAM,oBAAoB,CAAC;AAE5B,OAAO,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AACzC,YAAY,EACV,YAAY,EACZ,WAAW,EACX,kBAAkB,EAClB,sBAAsB,EACtB,aAAa,GACd,MAAM,kBAAkB,CAAC;AAE1B,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AACnF,YAAY,EACV,cAAc,EACd,kBAAkB,EAClB,gBAAgB,EAChB,cAAc,EACd,cAAc,EACd,YAAY,GACb,MAAM,gBAAgB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EACf,WAAW,EACX,mBAAmB,EACnB,WAAW,EACX,aAAa,EACb,oBAAoB,EACpB,2BAA2B,EAC3B,SAAS,GACV,MAAM,kBAAkB,CAAC;AAC1B,YAAY,EACV,SAAS,EACT,kBAAkB,EAClB,WAAW,EACX,cAAc,EACd,KAAK,EACL,SAAS,EACT,WAAW,GACZ,MAAM,kBAAkB,CAAC;AAE1B,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAC/D,YAAY,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAEhD,OAAO,EAAE,sBAAsB,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAC9E,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAE9E,OAAO,EAAE,GAAG,EAAE,EAAE,EAAE,MAAM,aAAa,CAAC;AACtC,YAAY,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAE1C,OAAO,EAAE,iBAAiB,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AACxE,YAAY,EACV,WAAW,EACX,gBAAgB,EAChB,SAAS,EACT,iBAAiB,EACjB,eAAe,GAChB,MAAM,yBAAyB,CAAC;AAEjC,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpD,YAAY,EAAE,qBAAqB,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAE7E,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,YAAY,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAEnD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,YAAY,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAEzD,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AACxD,YAAY,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AACnF,YAAY,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAErD,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAC9C,YAAY,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAEnD,OAAO,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AACxD,YAAY,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAEzD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,YAAY,EACV,sBAAsB,EACtB,UAAU,EACV,MAAM,EACN,iBAAiB,EACjB,oBAAoB,GACrB,MAAM,oBAAoB,CAAC;AAE5B,OAAO,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AACzC,YAAY,EACV,YAAY,EACZ,WAAW,EACX,kBAAkB,EAClB,sBAAsB,EACtB,aAAa,GACd,MAAM,kBAAkB,CAAC;AAE1B,OAAO,EAAE,OAAO,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AACjE,YAAY,EACV,WAAW,EACX,UAAU,EACV,UAAU,EACV,UAAU,EACV,aAAa,GACd,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAE,cAAc,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AACzD,YAAY,EAAE,QAAQ,EAAE,QAAQ,EAAE,WAAW,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAC;AAE9E,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AACnF,YAAY,EACV,cAAc,EACd,kBAAkB,EAClB,gBAAgB,EAChB,cAAc,EACd,cAAc,EACd,YAAY,GACb,MAAM,gBAAgB,CAAC"}
package/dist/index.js CHANGED
@@ -13,5 +13,7 @@ export { loadHarnessConfig } from "./config/harness.js";
13
13
  export { builtinVendors } from "./vendor/builtins.js";
14
14
  export { resolveVendors } from "./vendor/registry.js";
15
15
  export { check } from "./check/index.js";
16
+ export { runEval, formatConsole, toJson } from "./eval/index.js";
17
+ export { CaseFileSchema, TIERS } from "./eval/schema.js";
16
18
  export { defaultSources, discoverInstalled, indexInstalled } from "./installed.js";
17
19
  //# sourceMappingURL=index.js.map
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EACf,WAAW,EACX,mBAAmB,EACnB,WAAW,EACX,aAAa,EACb,oBAAoB,EACpB,2BAA2B,EAC3B,SAAS,GACV,MAAM,kBAAkB,CAAC;AAW1B,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAG/D,OAAO,EAAE,sBAAsB,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAG9E,OAAO,EAAE,GAAG,EAAE,EAAE,EAAE,MAAM,aAAa,CAAC;AAGtC,OAAO,EAAE,iBAAiB,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AASxE,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AAGpD,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAGvC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAGjC,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAExD,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAGnF,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAG9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AAGxD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAStD,OAAO,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AASzC,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EACf,WAAW,EACX,mBAAmB,EACnB,WAAW,EACX,aAAa,EACb,oBAAoB,EACpB,2BAA2B,EAC3B,SAAS,GACV,MAAM,kBAAkB,CAAC;AAW1B,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAG/D,OAAO,EAAE,sBAAsB,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAG9E,OAAO,EAAE,GAAG,EAAE,EAAE,EAAE,MAAM,aAAa,CAAC;AAGtC,OAAO,EAAE,iBAAiB,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AASxE,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AAGpD,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAGvC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAGjC,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAExD,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAGnF,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAG9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AAGxD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAStD,OAAO,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AASzC,OAAO,EAAE,OAAO,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AAQjE,OAAO,EAAE,cAAc,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAGzD,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@jean.gnc/harness-kit",
3
- "version": "0.11.2",
3
+ "version": "0.12.1",
4
4
  "type": "module",
5
5
  "description": "Build your own multi-agent harness: typed toolkit for authoring plugins (skills, agents, commands, hooks) and shipping them to Claude Code and Codex from a single source tree.",
6
6
  "license": "MIT",