@jean.gnc/harness-kit 0.12.7 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +43 -0
- package/dist/cli.js +13 -1
- package/dist/cli.js.map +1 -1
- package/dist/eval/capture.d.ts +23 -0
- package/dist/eval/capture.d.ts.map +1 -0
- package/dist/eval/capture.js +79 -0
- package/dist/eval/capture.js.map +1 -0
- package/dist/eval/cases.d.ts +10 -2
- package/dist/eval/cases.d.ts.map +1 -1
- package/dist/eval/cases.js +9 -3
- package/dist/eval/cases.js.map +1 -1
- package/dist/eval/detect.d.ts +1 -0
- package/dist/eval/detect.d.ts.map +1 -1
- package/dist/eval/detect.js +1 -1
- package/dist/eval/detect.js.map +1 -1
- package/dist/eval/grade-deterministic.d.ts +9 -0
- package/dist/eval/grade-deterministic.d.ts.map +1 -0
- package/dist/eval/grade-deterministic.js +87 -0
- package/dist/eval/grade-deterministic.js.map +1 -0
- package/dist/eval/grade-judge.d.ts +12 -0
- package/dist/eval/grade-judge.d.ts.map +1 -0
- package/dist/eval/grade-judge.js +14 -0
- package/dist/eval/grade-judge.js.map +1 -0
- package/dist/eval/grade.d.ts +5 -0
- package/dist/eval/grade.d.ts.map +1 -0
- package/dist/eval/grade.js +25 -0
- package/dist/eval/grade.js.map +1 -0
- package/dist/eval/index.d.ts +4 -0
- package/dist/eval/index.d.ts.map +1 -1
- package/dist/eval/index.js +27 -5
- package/dist/eval/index.js.map +1 -1
- package/dist/eval/judge.d.ts +26 -0
- package/dist/eval/judge.d.ts.map +1 -0
- package/dist/eval/judge.js +55 -0
- package/dist/eval/judge.js.map +1 -0
- package/dist/eval/report.d.ts +5 -1
- package/dist/eval/report.d.ts.map +1 -1
- package/dist/eval/report.js +66 -13
- package/dist/eval/report.js.map +1 -1
- package/dist/eval/runner.d.ts +13 -5
- package/dist/eval/runner.d.ts.map +1 -1
- package/dist/eval/runner.js +105 -31
- package/dist/eval/runner.js.map +1 -1
- package/dist/eval/schema.d.ts +644 -29
- package/dist/eval/schema.d.ts.map +1 -1
- package/dist/eval/schema.js +57 -6
- package/dist/eval/schema.js.map +1 -1
- package/dist/eval/score.d.ts +8 -0
- package/dist/eval/score.d.ts.map +1 -1
- package/dist/eval/score.js +17 -0
- package/dist/eval/score.js.map +1 -1
- package/dist/skill/includes.d.ts +4 -0
- package/dist/skill/includes.d.ts.map +1 -1
- package/dist/skill/includes.js +38 -32
- package/dist/skill/includes.js.map +1 -1
- package/package.json +2 -1
package/dist/eval/index.js
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import { defaultSources, discoverInstalled, indexInstalled } from "../installed.js";
|
|
2
2
|
import { err, ok } from "../result.js";
|
|
3
3
|
import { loadCases, unresolvedSkills } from "./cases.js";
|
|
4
|
+
import { gradeResults } from "./grade.js";
|
|
5
|
+
import { createAnthropicJudge } from "./judge.js";
|
|
4
6
|
import { buildReport } from "./report.js";
|
|
5
7
|
import { runCases } from "./runner.js";
|
|
6
|
-
|
|
8
|
+
const DEFAULT_JUDGE_MODEL = "claude-sonnet-4-5";
|
|
7
9
|
export async function runEval(options) {
|
|
8
10
|
const loaded = await loadCases(options.casesDir);
|
|
9
11
|
if (!loaded.ok)
|
|
@@ -17,20 +19,40 @@ export async function runEval(options) {
|
|
|
17
19
|
const unresolved = unresolvedSkills(selected, installedIds);
|
|
18
20
|
if (unresolved.length > 0)
|
|
19
21
|
return err(unresolved);
|
|
22
|
+
const judge = resolveJudge(selected, options);
|
|
23
|
+
if (!judge.ok)
|
|
24
|
+
return err(judge.error);
|
|
20
25
|
const runnerOptions = {
|
|
21
26
|
cwd: options.cwd,
|
|
22
27
|
...(options.runs !== undefined && { runs: options.runs }),
|
|
23
28
|
...(options.concurrency !== undefined && { concurrency: options.concurrency }),
|
|
24
29
|
...(options.model !== undefined && { model: options.model }),
|
|
30
|
+
...(options.solvingTimeoutMs !== undefined && { solvingTimeoutMs: options.solvingTimeoutMs }),
|
|
25
31
|
...(options.onRun !== undefined && { onRun: options.onRun }),
|
|
26
32
|
};
|
|
27
33
|
const results = await runCases(selected, runnerOptions);
|
|
28
|
-
const reports = results
|
|
29
|
-
evalCase,
|
|
30
|
-
score: scoreCase(evalCase.expect, runs, evalCase.threshold),
|
|
31
|
-
}));
|
|
34
|
+
const reports = await gradeResults(results, judge.value);
|
|
32
35
|
return ok(buildReport(reports));
|
|
33
36
|
}
|
|
37
|
+
function needsJudge(cases) {
|
|
38
|
+
return cases.some((c) => c.tier === "solving" && c.rubric !== undefined);
|
|
39
|
+
}
|
|
40
|
+
function resolveJudge(cases, options) {
|
|
41
|
+
if (options.judge)
|
|
42
|
+
return ok(options.judge);
|
|
43
|
+
if (!needsJudge(cases))
|
|
44
|
+
return ok(undefined);
|
|
45
|
+
const apiKey = process.env["ANTHROPIC_API_KEY"];
|
|
46
|
+
if (!apiKey) {
|
|
47
|
+
return err([
|
|
48
|
+
{
|
|
49
|
+
file: options.casesDir,
|
|
50
|
+
message: "solving rubric grading requires ANTHROPIC_API_KEY for the judge",
|
|
51
|
+
},
|
|
52
|
+
]);
|
|
53
|
+
}
|
|
54
|
+
return ok(createAnthropicJudge({ model: options.judgeModel ?? DEFAULT_JUDGE_MODEL, apiKey }));
|
|
55
|
+
}
|
|
34
56
|
function select(cases, options) {
|
|
35
57
|
return cases.filter((evalCase) => {
|
|
36
58
|
if (options.caseId && evalCase.id !== options.caseId)
|
package/dist/eval/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/eval/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACpF,OAAO,EAAE,GAAG,EAAE,EAAE,EAAe,MAAM,cAAc,CAAC;AACpD,OAAO,EAAE,SAAS,EAAE,gBAAgB,EAAuC,MAAM,YAAY,CAAC;AAC9F,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/eval/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACpF,OAAO,EAAE,GAAG,EAAE,EAAE,EAAe,MAAM,cAAc,CAAC;AACpD,OAAO,EAAE,SAAS,EAAE,gBAAgB,EAAuC,MAAM,YAAY,CAAC;AAC9F,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAC1C,OAAO,EAAE,oBAAoB,EAAc,MAAM,YAAY,CAAC;AAC9D,OAAO,EAAE,WAAW,EAAmB,MAAM,aAAa,CAAC;AAC3D,OAAO,EAAE,QAAQ,EAAsB,MAAM,aAAa,CAAC;AAE3D,MAAM,mBAAmB,GAAG,mBAAmB,CAAC;AAiBhD,MAAM,CAAC,KAAK,UAAU,OAAO,CAAC,OAAoB;IAChD,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;IACjD,IAAI,CAAC,MAAM,CAAC,EAAE;QAAE,OAAO,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAEzC,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;IAC/C,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,OAAO,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,QAAQ,EAAE,OAAO,EAAE,oCAAoC,EAAE,CAAC,CAAC,CAAC;IAC1F,CAAC;IAED,MAAM,SAAS,GAAG,cAAc,CAAC,MAAM,iBAAiB,CAAC,cAAc,EAAE,CAAC,CAAC,CAAC;IAC5E,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;IACtD,MAAM,UAAU,GAAG,gBAAgB,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;IAC5D,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,GAAG,CAAC,UAAU,CAAC,CAAC;IAElD,MAAM,KAAK,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAC9C,IAAI,CAAC,KAAK,CAAC,EAAE;QAAE,OAAO,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAEvC,MAAM,aAAa,GAAkB;QACnC,GAAG,EAAE,OAAO,CAAC,GAAG;QAChB,GAAG,CAAC,OAAO,CAAC,IAAI,KAAK,SAAS,IAAI,EAAE,IAAI,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC;QACzD,GAAG,CAAC,OAAO,CAAC,WAAW,KAAK,SAAS,IAAI,EAAE,WAAW,EAAE,OAAO,CAAC,WAAW,EAAE,CAAC;QAC9E,GAAG,CAAC,OAAO,CAAC,KAAK,KAAK,SAAS,IAAI,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC;QAC5D,GAAG,CAAC,OAAO,CAAC,gBAAgB,KAAK,SAAS,IAAI,EAAE,gBAAgB,EAAE,OAAO,CAAC,gBAAgB,EAAE,CAAC;QAC7F,GAAG,CAAC,OAAO,CAAC,KAAK,KAAK,SAAS,IAAI,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC;KAC7D,CAAC;IACF,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;IACxD,MAAM,OAAO,GAAG,MAAM,YAAY,CAAC,OAAO,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC;IAEzD,OAAO,EAAE,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC;AAClC,CAAC;AAED,SAAS,UAAU,CAAC,KAA4B;IAC9C,OAAO,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,SAAS,IAAI,CAAC,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC;AAC3E,CAAC;AAED,SAAS,YAAY,CACnB,KAA4B,EAC5B,OAAoB;IAEpB,IAAI,OAAO,CAAC,KAAK;QAAE,OAAO,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IAC5C,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC,SAAS,CAAC,CAAC;IAE7C,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IAChD,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,GAAG,CAAC;YACT;gBACE,IAAI,EAAE,OAAO,CAAC,QAAQ;gBACtB,OAAO,EAAE,iEAAiE;aAC3E;SACF,CAAC,CAAC;IACL,CAAC;IACD,OAAO,EAAE,CAAC,oBAAoB,CAAC,EAAE,KAAK,EAAE,OAAO,CAAC,UAAU,IAAI,mBAAmB,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC;AAChG,CAAC;AAED,SAAS,MAAM,CAAC,KAA4B,EAAE,OAAoB;IAChE,OAAO,KAAK,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,EAAE;QAC/B,IAAI,OAAO,CAAC,MAAM,IAAI,QAAQ,CAAC,EAAE,KAAK,OAAO,CAAC,MAAM;YAAE,OAAO,KAAK,CAAC;QACnE,IAAI,OAAO,CAAC,KAAK,IAAI,QAAQ,CAAC,KAAK,KAAK,OAAO,CAAC,KAAK;YAAE,OAAO,KAAK,CAAC;QACpE,IAAI,OAAO,CAAC,IAAI,IAAI,QAAQ,CAAC,IAAI,KAAK,OAAO,CAAC,IAAI;YAAE,OAAO,KAAK,CAAC;QACjE,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;AACL,CAAC;AAGD,OAAO,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
export declare const Verdict: z.ZodObject<{
|
|
3
|
+
pass: z.ZodUnion<[z.ZodBoolean, z.ZodLiteral<"unknown">]>;
|
|
4
|
+
evidence: z.ZodString;
|
|
5
|
+
}, "strip", z.ZodTypeAny, {
|
|
6
|
+
pass: boolean | "unknown";
|
|
7
|
+
evidence: string;
|
|
8
|
+
}, {
|
|
9
|
+
pass: boolean | "unknown";
|
|
10
|
+
evidence: string;
|
|
11
|
+
}>;
|
|
12
|
+
export type Verdict = z.infer<typeof Verdict>;
|
|
13
|
+
export interface JudgeRequest {
|
|
14
|
+
readonly dimension: string;
|
|
15
|
+
readonly criterion: string;
|
|
16
|
+
readonly output: string;
|
|
17
|
+
}
|
|
18
|
+
export type Judge = (request: JudgeRequest) => Promise<Verdict>;
|
|
19
|
+
export declare function parseVerdict(raw: unknown): Verdict;
|
|
20
|
+
export interface AnthropicJudgeOptions {
|
|
21
|
+
readonly model: string;
|
|
22
|
+
readonly apiKey: string;
|
|
23
|
+
readonly maxTokens?: number;
|
|
24
|
+
}
|
|
25
|
+
export declare function createAnthropicJudge(options: AnthropicJudgeOptions): Judge;
|
|
26
|
+
//# sourceMappingURL=judge.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"judge.d.ts","sourceRoot":"","sources":["../../src/eval/judge.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,eAAO,MAAM,OAAO;;;;;;;;;EAGlB,CAAC;AAEH,MAAM,MAAM,OAAO,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,OAAO,CAAC,CAAC;AAE9C,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,MAAM,KAAK,GAAG,CAAC,OAAO,EAAE,YAAY,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;AAMhE,wBAAgB,YAAY,CAAC,GAAG,EAAE,OAAO,GAAG,OAAO,CAIlD;AAwBD,MAAM,WAAW,qBAAqB;IACpC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,SAAS,CAAC,EAAE,MAAM,CAAC;CAC7B;AAED,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,qBAAqB,GAAG,KAAK,CAmB1E"}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import Anthropic from "@anthropic-ai/sdk";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
export const Verdict = z.object({
|
|
4
|
+
pass: z.union([z.boolean(), z.literal("unknown")]),
|
|
5
|
+
evidence: z.string(),
|
|
6
|
+
});
|
|
7
|
+
function abstain(evidence) {
|
|
8
|
+
return { pass: "unknown", evidence };
|
|
9
|
+
}
|
|
10
|
+
export function parseVerdict(raw) {
|
|
11
|
+
if (raw === undefined || raw === null)
|
|
12
|
+
return abstain("judge returned no verdict");
|
|
13
|
+
const result = Verdict.safeParse(raw);
|
|
14
|
+
return result.success ? result.data : abstain("judge returned an unparseable verdict");
|
|
15
|
+
}
|
|
16
|
+
const SYSTEM_PREAMBLE = "You are a strict grader for AI agent outputs. Judge the output against the single criterion " +
|
|
17
|
+
"you are given, in isolation. Record a verdict via the tool: pass=true only if the criterion is " +
|
|
18
|
+
'clearly met, pass=false if clearly unmet, and pass="unknown" if you genuinely cannot tell. ' +
|
|
19
|
+
"Always cite concrete evidence from the output.";
|
|
20
|
+
const VERDICT_TOOL = {
|
|
21
|
+
name: "record_verdict",
|
|
22
|
+
description: "Record the grading verdict for one rubric dimension.",
|
|
23
|
+
input_schema: {
|
|
24
|
+
type: "object",
|
|
25
|
+
properties: {
|
|
26
|
+
pass: {
|
|
27
|
+
description: 'true if met, false if unmet, "unknown" if undeterminable',
|
|
28
|
+
anyOf: [{ type: "boolean" }, { type: "string", enum: ["unknown"] }],
|
|
29
|
+
},
|
|
30
|
+
evidence: { type: "string", description: "concrete evidence from the output" },
|
|
31
|
+
},
|
|
32
|
+
required: ["pass", "evidence"],
|
|
33
|
+
},
|
|
34
|
+
};
|
|
35
|
+
export function createAnthropicJudge(options) {
|
|
36
|
+
const client = new Anthropic({ apiKey: options.apiKey });
|
|
37
|
+
return async ({ dimension, criterion, output }) => {
|
|
38
|
+
const response = await client.messages.create({
|
|
39
|
+
model: options.model,
|
|
40
|
+
max_tokens: options.maxTokens ?? 1024,
|
|
41
|
+
tools: [VERDICT_TOOL],
|
|
42
|
+
tool_choice: { type: "tool", name: VERDICT_TOOL.name },
|
|
43
|
+
system: [{ type: "text", text: SYSTEM_PREAMBLE, cache_control: { type: "ephemeral" } }],
|
|
44
|
+
messages: [
|
|
45
|
+
{
|
|
46
|
+
role: "user",
|
|
47
|
+
content: `Dimension: ${dimension}\nCriterion: ${criterion}\n\nOutput to grade:\n${output}`,
|
|
48
|
+
},
|
|
49
|
+
],
|
|
50
|
+
});
|
|
51
|
+
const block = response.content.find((b) => b.type === "tool_use");
|
|
52
|
+
return parseVerdict(block?.input);
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
//# sourceMappingURL=judge.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"judge.js","sourceRoot":"","sources":["../../src/eval/judge.ts"],"names":[],"mappings":"AAAA,OAAO,SAAS,MAAM,mBAAmB,CAAC;AAC1C,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,MAAM,CAAC,MAAM,OAAO,GAAG,CAAC,CAAC,MAAM,CAAC;IAC9B,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC;IAClD,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE;CACrB,CAAC,CAAC;AAYH,SAAS,OAAO,CAAC,QAAgB;IAC/B,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC;AACvC,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,GAAY;IACvC,IAAI,GAAG,KAAK,SAAS,IAAI,GAAG,KAAK,IAAI;QAAE,OAAO,OAAO,CAAC,2BAA2B,CAAC,CAAC;IACnF,MAAM,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;IACtC,OAAO,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,uCAAuC,CAAC,CAAC;AACzF,CAAC;AAED,MAAM,eAAe,GACnB,8FAA8F;IAC9F,iGAAiG;IACjG,6FAA6F;IAC7F,gDAAgD,CAAC;AAEnD,MAAM,YAAY,GAAmB;IACnC,IAAI,EAAE,gBAAgB;IACtB,WAAW,EAAE,sDAAsD;IACnE,YAAY,EAAE;QACZ,IAAI,EAAE,QAAQ;QACd,UAAU,EAAE;YACV,IAAI,EAAE;gBACJ,WAAW,EAAE,0DAA0D;gBACvE,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,SAAS,CAAC,EAAE,CAAC;aACpE;YACD,QAAQ,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,mCAAmC,EAAE;SAC/E;QACD,QAAQ,EAAE,CAAC,MAAM,EAAE,UAAU,CAAC;KAC/B;CACF,CAAC;AAQF,MAAM,UAAU,oBAAoB,CAAC,OAA8B;IACjE,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACzD,OAAO,KAAK,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,EAAE,EAAE,EAAE;QAChD,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;YAC5C,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,UAAU,EAAE,OAAO,CAAC,SAAS,IAAI,IAAI;YACrC,KAAK,EAAE,CAAC,YAAY,CAAC;YACrB,WAAW,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,YAAY,CAAC,IAAI,EAAE;YACtD,MAAM,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,eAAe,EAAE,aAAa,EAAE,EAAE,IAAI,EAAE,WAAW,EAAE,EAAE,CAAC;YACvF,QAAQ,EAAE;gBACR;oBACE,IAAI,EAAE,MAAM;oBACZ,OAAO,EAAE,cAAc,SAAS,gBAAgB,SAAS,yBAAyB,MAAM,EAAE;iBAC3F;aACF;SACF,CAAC,CAAC;QACH,MAAM,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,UAAU,CAAC,CAAC;QAClE,OAAO,YAAY,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;IACpC,CAAC,CAAC;AACJ,CAAC"}
|
package/dist/eval/report.d.ts
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
import type { LoadedCase } from "./cases.js";
|
|
2
|
-
import type { CaseScore } from "./score.js";
|
|
2
|
+
import type { CaseScore, SolvingRunResult } from "./score.js";
|
|
3
|
+
export interface SolvingBreakdown {
|
|
4
|
+
readonly perRun: readonly SolvingRunResult[];
|
|
5
|
+
}
|
|
3
6
|
export interface CaseReport {
|
|
4
7
|
readonly evalCase: LoadedCase;
|
|
5
8
|
readonly score: CaseScore;
|
|
9
|
+
readonly solving?: SolvingBreakdown;
|
|
6
10
|
}
|
|
7
11
|
export interface EvalReport {
|
|
8
12
|
readonly cases: readonly CaseReport[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"report.d.ts","sourceRoot":"","sources":["../../src/eval/report.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAC7C,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"report.d.ts","sourceRoot":"","sources":["../../src/eval/report.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAC7C,OAAO,KAAK,EAAE,SAAS,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAG9D,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,MAAM,EAAE,SAAS,gBAAgB,EAAE,CAAC;CAC9C;AAED,MAAM,WAAW,UAAU;IACzB,QAAQ,CAAC,QAAQ,EAAE,UAAU,CAAC;IAC9B,QAAQ,CAAC,KAAK,EAAE,SAAS,CAAC;IAC1B,QAAQ,CAAC,OAAO,CAAC,EAAE,gBAAgB,CAAC;CACrC;AAED,MAAM,WAAW,UAAU;IACzB,QAAQ,CAAC,KAAK,EAAE,SAAS,UAAU,EAAE,CAAC;IACtC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;CACzB;AAED,wBAAgB,WAAW,CAAC,KAAK,EAAE,SAAS,UAAU,EAAE,GAAG,UAAU,CAGpE;AAED,wBAAgB,aAAa,CAAC,MAAM,EAAE,UAAU,GAAG,MAAM,CAgBxD;AAED,wBAAgB,MAAM,CAAC,MAAM,EAAE,UAAU,GAAG,MAAM,CA2BjD"}
|
package/dist/eval/report.js
CHANGED
|
@@ -7,16 +7,8 @@ export function formatConsole(report) {
|
|
|
7
7
|
const groups = groupBy(report.cases, (c) => `${c.evalCase.tier}/${c.evalCase.suite}`);
|
|
8
8
|
for (const [group, entries] of groups) {
|
|
9
9
|
lines.push(group);
|
|
10
|
-
for (const
|
|
11
|
-
|
|
12
|
-
const tally = `${score.matched}/${score.runs}`;
|
|
13
|
-
lines.push(` ${tag} ${evalCase.id} ${tally} → ${describeExpectation(evalCase.expect)}`);
|
|
14
|
-
if (!score.pass) {
|
|
15
|
-
lines.push(` got: ${formatHistogram(score.histogram)}`);
|
|
16
|
-
lines.push(` prompt: ${truncate(evalCase.prompt)}`);
|
|
17
|
-
if (evalCase.note)
|
|
18
|
-
lines.push(` note: ${evalCase.note}`);
|
|
19
|
-
}
|
|
10
|
+
for (const entry of entries) {
|
|
11
|
+
lines.push(...formatCase(entry));
|
|
20
12
|
}
|
|
21
13
|
lines.push("");
|
|
22
14
|
}
|
|
@@ -32,21 +24,82 @@ export function toJson(report) {
|
|
|
32
24
|
passed: report.passed,
|
|
33
25
|
failed: report.failed,
|
|
34
26
|
},
|
|
35
|
-
cases: report.cases.map(({ evalCase, score }) => ({
|
|
27
|
+
cases: report.cases.map(({ evalCase, score, solving }) => ({
|
|
36
28
|
id: evalCase.id,
|
|
37
29
|
suite: evalCase.suite,
|
|
38
30
|
tier: evalCase.tier,
|
|
39
31
|
prompt: evalCase.prompt,
|
|
40
|
-
expect: evalCase.expect,
|
|
32
|
+
...(evalCase.tier === "routing" && { expect: evalCase.expect }),
|
|
41
33
|
pass: score.pass,
|
|
42
34
|
matched: score.matched,
|
|
43
35
|
runs: score.runs,
|
|
44
36
|
triggerRate: score.triggerRate,
|
|
45
37
|
threshold: score.threshold,
|
|
46
|
-
|
|
38
|
+
...(solving
|
|
39
|
+
? { solving: jsonSolving(solving) }
|
|
40
|
+
: { histogram: Object.fromEntries(score.histogram) }),
|
|
47
41
|
})),
|
|
48
42
|
}, null, 2);
|
|
49
43
|
}
|
|
44
|
+
const INDENT = " ";
|
|
45
|
+
function formatCase({ evalCase, score, solving }) {
|
|
46
|
+
const tag = score.pass ? "PASS" : "FAIL";
|
|
47
|
+
const tally = `${score.matched}/${score.runs}`;
|
|
48
|
+
const lines = [` ${tag} ${evalCase.id} ${tally} → ${describeCase(evalCase)}`];
|
|
49
|
+
if (score.pass)
|
|
50
|
+
return lines;
|
|
51
|
+
if (solving)
|
|
52
|
+
lines.push(...solvingFailures(solving));
|
|
53
|
+
else
|
|
54
|
+
lines.push(`${INDENT}got: ${formatHistogram(score.histogram)}`);
|
|
55
|
+
lines.push(`${INDENT}prompt: ${truncate(evalCase.prompt)}`);
|
|
56
|
+
if (evalCase.note)
|
|
57
|
+
lines.push(`${INDENT}note: ${evalCase.note}`);
|
|
58
|
+
return lines;
|
|
59
|
+
}
|
|
60
|
+
function describeCase(evalCase) {
|
|
61
|
+
if (evalCase.tier === "routing")
|
|
62
|
+
return describeExpectation(evalCase.expect);
|
|
63
|
+
const parts = [`${evalCase.assert.length} assertions`];
|
|
64
|
+
if (evalCase.rubric)
|
|
65
|
+
parts.push(`${evalCase.rubric.dimensions.length} rubric dims`);
|
|
66
|
+
return parts.join(", ");
|
|
67
|
+
}
|
|
68
|
+
function solvingFailures(solving) {
|
|
69
|
+
const lines = [];
|
|
70
|
+
solving.perRun.forEach((run, index) => {
|
|
71
|
+
const prefix = solving.perRun.length > 1 ? `run ${index + 1} ` : "";
|
|
72
|
+
for (const a of run.assertions) {
|
|
73
|
+
if (!a.pass)
|
|
74
|
+
lines.push(`${INDENT}${prefix}assert ${a.assertion.kind}: ${a.evidence}`);
|
|
75
|
+
}
|
|
76
|
+
for (const d of run.rubric?.dimensions ?? []) {
|
|
77
|
+
if (d.verdict.pass !== true) {
|
|
78
|
+
lines.push(`${INDENT}${prefix}rubric ${d.dimension} (${d.verdict.pass}): ${d.verdict.evidence}`);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
});
|
|
82
|
+
return lines;
|
|
83
|
+
}
|
|
84
|
+
function jsonSolving(solving) {
|
|
85
|
+
return solving.perRun.map((run) => ({
|
|
86
|
+
assertions: run.assertions.map((a) => ({
|
|
87
|
+
kind: a.assertion.kind,
|
|
88
|
+
pass: a.pass,
|
|
89
|
+
evidence: a.evidence,
|
|
90
|
+
})),
|
|
91
|
+
rubric: run.rubric
|
|
92
|
+
? {
|
|
93
|
+
pass: run.rubric.pass,
|
|
94
|
+
dimensions: run.rubric.dimensions.map((d) => ({
|
|
95
|
+
dimension: d.dimension,
|
|
96
|
+
pass: d.verdict.pass,
|
|
97
|
+
evidence: d.verdict.evidence,
|
|
98
|
+
})),
|
|
99
|
+
}
|
|
100
|
+
: null,
|
|
101
|
+
}));
|
|
102
|
+
}
|
|
50
103
|
function describeExpectation(expectation) {
|
|
51
104
|
if ("noSkill" in expectation)
|
|
52
105
|
return "(no skill)";
|
package/dist/eval/report.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"report.js","sourceRoot":"","sources":["../../src/eval/report.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"report.js","sourceRoot":"","sources":["../../src/eval/report.ts"],"names":[],"mappings":"AAoBA,MAAM,UAAU,WAAW,CAAC,KAA4B;IACtD,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;IACxD,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,GAAG,MAAM,EAAE,CAAC;AAC1D,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,MAAkB;IAC9C,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC;IAEtF,KAAK,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,MAAM,EAAE,CAAC;QACtC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAClB,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,KAAK,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC;QACnC,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACjB,CAAC;IAED,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;IAC5C,MAAM,GAAG,GAAG,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,MAAM,GAAG,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC;IACxE,KAAK,CAAC,IAAI,CAAC,YAAY,MAAM,CAAC,MAAM,IAAI,KAAK,kBAAkB,GAAG,KAAK,CAAC,CAAC;IACzE,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,MAAM,UAAU,MAAM,CAAC,MAAkB;IACvC,OAAO,IAAI,CAAC,SAAS,CACnB;QACE,OAAO,EAAE;YACP,KAAK,EAAE,MAAM,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM;YACpC,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,MAAM,EAAE,MAAM,CAAC,MAAM;SACtB;QACD,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC,CAAC;YACzD,EAAE,EAAE,QAAQ,CAAC,EAAE;YACf,KAAK,EAAE,QAAQ,CAAC,KAAK;YACrB,IAAI,EAAE,QAAQ,CAAC,IAAI;YACnB,MAAM,EAAE,QAAQ,CAAC,MAAM;YACvB,GAAG,CAAC,QAAQ,CAAC,IAAI,KAAK,SAAS,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,CAAC;YAC/D,IAAI,EAAE,KAAK,CAAC,IAAI;YAChB,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,IAAI,EAAE,KAAK,CAAC,IAAI;YAChB,WAAW,EAAE,KAAK,CAAC,WAAW;YAC9B,SAAS,EAAE,KAAK,CAAC,SAAS;YAC1B,GAAG,CAAC,OAAO;gBACT,CAAC,CAAC,EAAE,OAAO,EAAE,WAAW,CAAC,OAAO,CAAC,EAAE;gBACnC,CAAC,CAAC,EAAE,SAAS,EAAE,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,SAAS,CAAC,EAAE,CAAC;SACxD,CAAC,CAAC;KACJ,EACD,IAAI,EACJ,CAAC,CACF,CAAC;AACJ,CAAC;AAED,MAAM,MAAM,GAAG,YAAY,CAAC;AAE5B,SAAS,UAAU,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,OAAO,EAAc;IAC1D,MAAM,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;IACzC,MAAM,KAAK,GAAG,GAAG,KAAK,CAAC,OAAO,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC;IAC/C,MAAM,KAAK,GAAG,CAAC,KAAK,GAAG,KAAK,QAAQ,CAAC,EAAE,KAAK,KAAK,OAAO,YAAY,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;IAClF,IAAI,KAAK,CAAC,IAAI;QAAE,OAAO,KAAK,CAAC;IAE7B,IAAI,OAAO;QAAE,KAAK,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC;;QAChD,KAAK,CAAC,IAAI,CAAC,GAAG,MAAM,QAAQ,eAAe,CAAC,KAAK,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;IACrE,KAAK,CAAC,IAAI,CAAC,GAAG,MAAM,WAAW,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IAC5D,IAAI,QAAQ,CAAC,IAAI;QAAE,KAAK,CAAC,IAAI,CAAC,GAAG,MAAM,SAAS,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;IACjE,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,YAAY,CAAC,QAAoB;IACxC,IAAI,QAAQ,CAAC,IAAI,KAAK,SAAS;QAAE,OAAO,mBAAmB,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;IAC7E,MAAM,KAAK,GAAG,CAAC,GAAG,QAAQ,CAAC,MAAM,CAAC,MAAM,aAAa,CAAC,CAAC;IACvD,IAAI,QAAQ,CAAC,MAAM;QAAE,KAAK,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,MAAM,CAAC,UAAU,CAAC,MAAM,cAAc,CAAC,CAAC;IACpF,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,SAAS,eAAe,CAAC,OAAyB;IAChD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE;QACpC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,KAAK,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QACpE,KAAK,MAAM,CAAC,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC;YAC/B,IAAI,CAAC,CAAC,CAAC,IAAI;gBAAE,KAAK,CAAC,IAAI,CAAC,GAAG,MAAM,GAAG,MAAM,UAAU,CAAC,CAAC,SAAS,CAAC,IAAI,KAAK,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC;QACzF,CAAC;QACD,KAAK,MAAM,CAAC,IAAI,GAAG,CAAC,MAAM,EAAE,UAAU,IAAI,EAAE,EAAE,CAAC;YAC7C,IAAI,CAAC,CAAC,OAAO,CAAC,IAAI,KAAK,IAAI,EAAE,CAAC;gBAC5B,KAAK,CAAC,IAAI,CACR,GAAG,MAAM,GAAG,MAAM,UAAU,CAAC,CAAC,SAAS,KAAK,CAAC,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,CACrF,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC,CAAC,CAAC;IACH,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,WAAW,CAAC,OAAyB;IAC5C,OAAO,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QAClC,UAAU,EAAE,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACrC,IAAI,EAAE,CAAC,CAAC,SAAS,CAAC,IAAI;YACtB,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,QAAQ,EAAE,CAAC,CAAC,QAAQ;SACrB,CAAC,CAAC;QACH,MAAM,EAAE,GAAG,CAAC,MAAM;YAChB,CAAC,CAAC;gBACE,IAAI,EAAE,GAAG,CAAC,MAAM,CAAC,IAAI;gBACrB,UAAU,EAAE,GAAG,CAAC,MAAM,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAC5C,SAAS,EAAE,CAAC,CAAC,SAAS;oBACtB,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,IAAI;oBACpB,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC,QAAQ;iBAC7B,CAAC,CAAC;aACJ;YACH,CAAC,CAAC,IAAI;KACT,CAAC,CAAC,CAAC;AACN,CAAC;AAED,SAAS,mBAAmB,CAAC,WAAwB;IACnD,IAAI,SAAS,IAAI,WAAW;QAAE,OAAO,YAAY,CAAC;IAClD,IAAI,OAAO,IAAI,WAAW;QAAE,OAAO,WAAW,CAAC,KAAK,CAAC;IACrD,IAAI,OAAO,IAAI,WAAW;QAAE,OAAO,WAAW,WAAW,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC;IAC9E,OAAO,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AACtC,CAAC;AAED,SAAS,eAAe,CAAC,SAAsC;IAC7D,OAAO,CAAC,GAAG,SAAS,CAAC,OAAO,EAAE,CAAC;SAC5B,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;SAC3B,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,GAAG,EAAE,KAAK,KAAK,EAAE,CAAC;SACvC,IAAI,CAAC,IAAI,CAAC,CAAC;AAChB,CAAC;AAED,SAAS,QAAQ,CAAC,IAAY,EAAE,GAAG,GAAG,EAAE;IACtC,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IAChD,OAAO,MAAM,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;AACvE,CAAC;AAED,SAAS,OAAO,CAAI,KAAmB,EAAE,GAAwB;IAC/D,MAAM,MAAM,GAAG,IAAI,GAAG,EAAe,CAAC;IACtC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,CAAC,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC;QACpB,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QAC/B,IAAI,QAAQ;YAAE,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;;YAC7B,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC;IAC7B,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
package/dist/eval/runner.d.ts
CHANGED
|
@@ -1,17 +1,25 @@
|
|
|
1
|
+
import { type SolvingCapture } from "./capture.js";
|
|
1
2
|
import { type DetectionResult } from "./detect.js";
|
|
2
|
-
import type { LoadedCase } from "./cases.js";
|
|
3
|
+
import type { LoadedCase, LoadedRoutingCase, LoadedSolvingCase } from "./cases.js";
|
|
3
4
|
export interface RunnerOptions {
|
|
4
5
|
readonly cwd: string;
|
|
5
6
|
readonly runs?: number;
|
|
6
7
|
readonly concurrency?: number;
|
|
7
8
|
readonly timeoutMs?: number;
|
|
9
|
+
readonly solvingTimeoutMs?: number;
|
|
8
10
|
readonly model?: string;
|
|
9
11
|
readonly claudeBin?: string;
|
|
10
12
|
readonly onRun?: (caseId: string, result: DetectionResult) => void;
|
|
13
|
+
readonly onCapture?: (caseId: string, capture: SolvingCapture) => void;
|
|
11
14
|
}
|
|
12
|
-
export
|
|
13
|
-
readonly
|
|
15
|
+
export type CaseResult = {
|
|
16
|
+
readonly tier: "routing";
|
|
17
|
+
readonly evalCase: LoadedRoutingCase;
|
|
14
18
|
readonly runs: readonly DetectionResult[];
|
|
15
|
-
}
|
|
16
|
-
|
|
19
|
+
} | {
|
|
20
|
+
readonly tier: "solving";
|
|
21
|
+
readonly evalCase: LoadedSolvingCase;
|
|
22
|
+
readonly captures: readonly SolvingCapture[];
|
|
23
|
+
};
|
|
24
|
+
export declare function runCases(cases: readonly LoadedCase[], options: RunnerOptions): Promise<CaseResult[]>;
|
|
17
25
|
//# sourceMappingURL=runner.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../../src/eval/runner.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../../src/eval/runner.ts"],"names":[],"mappings":"AAKA,OAAO,EAAgB,KAAK,cAAc,EAAoB,MAAM,cAAc,CAAC;AACnF,OAAO,EAAkB,KAAK,eAAe,EAAE,MAAM,aAAa,CAAC;AACnE,OAAO,KAAK,EAAE,UAAU,EAAE,iBAAiB,EAAE,iBAAiB,EAAE,MAAM,YAAY,CAAC;AAQnF,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,SAAS,CAAC,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,gBAAgB,CAAC,EAAE,MAAM,CAAC;IACnC,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,SAAS,CAAC,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,eAAe,KAAK,IAAI,CAAC;IACnE,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,cAAc,KAAK,IAAI,CAAC;CACxE;AAED,MAAM,MAAM,UAAU,GAClB;IACE,QAAQ,CAAC,IAAI,EAAE,SAAS,CAAC;IACzB,QAAQ,CAAC,QAAQ,EAAE,iBAAiB,CAAC;IACrC,QAAQ,CAAC,IAAI,EAAE,SAAS,eAAe,EAAE,CAAC;CAC3C,GACD;IACE,QAAQ,CAAC,IAAI,EAAE,SAAS,CAAC;IACzB,QAAQ,CAAC,QAAQ,EAAE,iBAAiB,CAAC;IACrC,QAAQ,CAAC,QAAQ,EAAE,SAAS,cAAc,EAAE,CAAC;CAC9C,CAAC;AAEN,wBAAsB,QAAQ,CAC5B,KAAK,EAAE,SAAS,UAAU,EAAE,EAC5B,OAAO,EAAE,aAAa,GACrB,OAAO,CAAC,UAAU,EAAE,CAAC,CA2BvB"}
|
package/dist/eval/runner.js
CHANGED
|
@@ -1,46 +1,74 @@
|
|
|
1
1
|
import { spawn } from "node:child_process";
|
|
2
|
+
import { readFile } from "node:fs/promises";
|
|
2
3
|
import { isAbsolute, resolve } from "node:path";
|
|
3
4
|
import { createInterface } from "node:readline";
|
|
5
|
+
import { createCaptor } from "./capture.js";
|
|
4
6
|
import { createDetector } from "./detect.js";
|
|
5
7
|
const DEFAULT_RUNS = 5;
|
|
8
|
+
const DEFAULT_SOLVING_RUNS = 1;
|
|
6
9
|
const DEFAULT_CONCURRENCY = 1;
|
|
7
10
|
const DEFAULT_TIMEOUT_MS = 60_000;
|
|
11
|
+
const DEFAULT_SOLVING_TIMEOUT_MS = 300_000;
|
|
8
12
|
export async function runCases(cases, options) {
|
|
9
|
-
const
|
|
13
|
+
const routing = new Map();
|
|
14
|
+
const solving = new Map();
|
|
10
15
|
const jobs = cases.flatMap((evalCase) => {
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
16
|
+
if (evalCase.tier === "routing")
|
|
17
|
+
routing.set(evalCase, []);
|
|
18
|
+
else
|
|
19
|
+
solving.set(evalCase, []);
|
|
20
|
+
return Array.from({ length: runsFor(evalCase, options) }, () => evalCase);
|
|
14
21
|
});
|
|
15
22
|
const concurrency = Math.max(1, options.concurrency ?? DEFAULT_CONCURRENCY);
|
|
16
23
|
await forEachLimit(jobs, concurrency, async (evalCase) => {
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
24
|
+
if (evalCase.tier === "routing") {
|
|
25
|
+
const result = await runRouting(evalCase, options);
|
|
26
|
+
routing.get(evalCase)?.push(result);
|
|
27
|
+
options.onRun?.(evalCase.id, result);
|
|
28
|
+
}
|
|
29
|
+
else {
|
|
30
|
+
const capture = await runSolving(evalCase, options);
|
|
31
|
+
solving.get(evalCase)?.push(capture);
|
|
32
|
+
options.onCapture?.(evalCase.id, capture);
|
|
33
|
+
}
|
|
20
34
|
});
|
|
21
|
-
return cases.map((evalCase) =>
|
|
35
|
+
return cases.map((evalCase) => evalCase.tier === "routing"
|
|
36
|
+
? { tier: "routing", evalCase, runs: routing.get(evalCase) ?? [] }
|
|
37
|
+
: { tier: "solving", evalCase, captures: solving.get(evalCase) ?? [] });
|
|
38
|
+
}
|
|
39
|
+
function runsFor(evalCase, options) {
|
|
40
|
+
const fallback = evalCase.tier === "solving" ? DEFAULT_SOLVING_RUNS : DEFAULT_RUNS;
|
|
41
|
+
return evalCase.runs ?? options.runs ?? fallback;
|
|
22
42
|
}
|
|
23
43
|
function skillsToCollect(evalCase) {
|
|
24
44
|
return "path" in evalCase.expect ? evalCase.expect.path.length : 1;
|
|
25
45
|
}
|
|
26
|
-
async function
|
|
27
|
-
const cwd = evalCase.cwd ? resolveCwd(options.cwd, evalCase.cwd) : options.cwd;
|
|
28
|
-
const args = [
|
|
29
|
-
"-p",
|
|
30
|
-
evalCase.prompt,
|
|
31
|
-
"--output-format",
|
|
32
|
-
"stream-json",
|
|
33
|
-
"--verbose",
|
|
34
|
-
"--include-partial-messages",
|
|
35
|
-
];
|
|
36
|
-
if (options.model)
|
|
37
|
-
args.push("--model", options.model);
|
|
38
|
-
const env = { ...process.env };
|
|
39
|
-
delete env["CLAUDECODE"];
|
|
46
|
+
async function runRouting(evalCase, options) {
|
|
40
47
|
const detector = createDetector(skillsToCollect(evalCase));
|
|
41
|
-
const
|
|
48
|
+
const reached = await runSession(evalCase, options, {
|
|
49
|
+
timeoutMs: options.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
|
50
|
+
onLine: (line) => detector.push(line),
|
|
51
|
+
done: () => detector.done,
|
|
52
|
+
killOnDone: true,
|
|
53
|
+
});
|
|
54
|
+
return detector.result(reached ? "timeout" : "no-skill");
|
|
55
|
+
}
|
|
56
|
+
async function runSolving(evalCase, options) {
|
|
57
|
+
const captor = createCaptor();
|
|
58
|
+
const reached = await runSession(evalCase, options, {
|
|
59
|
+
timeoutMs: options.solvingTimeoutMs ?? DEFAULT_SOLVING_TIMEOUT_MS,
|
|
60
|
+
onLine: (line) => captor.push(line),
|
|
61
|
+
done: () => captor.done,
|
|
62
|
+
killOnDone: false,
|
|
63
|
+
});
|
|
64
|
+
const capture = captor.result(reached ? "timeout" : "stream-end");
|
|
65
|
+
return mergeDiskWrites(capture, evalCase, options.cwd);
|
|
66
|
+
}
|
|
67
|
+
async function runSession(evalCase, options, handlers) {
|
|
68
|
+
const cwd = evalCase.cwd ? resolveCwd(options.cwd, evalCase.cwd) : options.cwd;
|
|
69
|
+
const child = spawn(options.claudeBin ?? "claude", buildArgs(evalCase.prompt, options.model), {
|
|
42
70
|
cwd,
|
|
43
|
-
env,
|
|
71
|
+
env: scrubbedEnv(),
|
|
44
72
|
stdio: ["ignore", "pipe", "ignore"],
|
|
45
73
|
});
|
|
46
74
|
const spawnFailure = new Promise((_, reject) => {
|
|
@@ -50,27 +78,73 @@ async function runOnce(evalCase, options) {
|
|
|
50
78
|
const timer = setTimeout(() => {
|
|
51
79
|
deadline.reached = true;
|
|
52
80
|
child.kill("SIGKILL");
|
|
53
|
-
},
|
|
81
|
+
}, handlers.timeoutMs);
|
|
82
|
+
const killOnDone = handlers.killOnDone ? () => child.kill("SIGKILL") : undefined;
|
|
54
83
|
try {
|
|
55
|
-
await Promise.race([
|
|
84
|
+
await Promise.race([
|
|
85
|
+
drain(child.stdout, handlers.onLine, handlers.done, killOnDone),
|
|
86
|
+
spawnFailure,
|
|
87
|
+
]);
|
|
56
88
|
}
|
|
57
89
|
finally {
|
|
58
90
|
clearTimeout(timer);
|
|
59
91
|
if (child.exitCode === null)
|
|
60
92
|
child.kill("SIGKILL");
|
|
61
93
|
}
|
|
62
|
-
return
|
|
94
|
+
return deadline.reached;
|
|
63
95
|
}
|
|
64
|
-
|
|
96
|
+
function buildArgs(prompt, model) {
|
|
97
|
+
const args = [
|
|
98
|
+
"-p",
|
|
99
|
+
prompt,
|
|
100
|
+
"--output-format",
|
|
101
|
+
"stream-json",
|
|
102
|
+
"--verbose",
|
|
103
|
+
"--include-partial-messages",
|
|
104
|
+
];
|
|
105
|
+
if (model)
|
|
106
|
+
args.push("--model", model);
|
|
107
|
+
return args;
|
|
108
|
+
}
|
|
109
|
+
function scrubbedEnv() {
|
|
110
|
+
const env = { ...process.env };
|
|
111
|
+
delete env["CLAUDECODE"];
|
|
112
|
+
return env;
|
|
113
|
+
}
|
|
114
|
+
async function drain(stdout, onLine, done, killOnDone) {
|
|
65
115
|
const lines = createInterface({ input: stdout });
|
|
66
116
|
for await (const line of lines) {
|
|
67
|
-
|
|
68
|
-
if (
|
|
69
|
-
|
|
117
|
+
onLine(line);
|
|
118
|
+
if (done()) {
|
|
119
|
+
killOnDone?.();
|
|
70
120
|
break;
|
|
71
121
|
}
|
|
72
122
|
}
|
|
73
123
|
}
|
|
124
|
+
async function mergeDiskWrites(capture, evalCase, baseCwd) {
|
|
125
|
+
const declared = declaredWritePaths(evalCase);
|
|
126
|
+
if (declared.length === 0)
|
|
127
|
+
return capture;
|
|
128
|
+
const cwd = evalCase.cwd ? resolveCwd(baseCwd, evalCase.cwd) : baseCwd;
|
|
129
|
+
const writes = new Map(capture.writes.map((w) => [w.path, w]));
|
|
130
|
+
for (const path of declared) {
|
|
131
|
+
const onDisk = await readFileOrNull(resolveCwd(cwd, path));
|
|
132
|
+
if (onDisk !== null)
|
|
133
|
+
writes.set(path, { path, content: onDisk });
|
|
134
|
+
}
|
|
135
|
+
return { ...capture, writes: [...writes.values()] };
|
|
136
|
+
}
|
|
137
|
+
function declaredWritePaths(evalCase) {
|
|
138
|
+
return evalCase.assert.flatMap((a) => (a.kind === "wroteFile" ? [a.path] : []));
|
|
139
|
+
}
|
|
140
|
+
async function readFileOrNull(path) {
|
|
141
|
+
try {
|
|
142
|
+
return await readFile(path, "utf8");
|
|
143
|
+
}
|
|
144
|
+
catch {
|
|
145
|
+
return null;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
74
148
|
function resolveCwd(base, caseCwd) {
|
|
75
149
|
return isAbsolute(caseCwd) ? caseCwd : resolve(base, caseCwd);
|
|
76
150
|
}
|
package/dist/eval/runner.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"runner.js","sourceRoot":"","sources":["../../src/eval/runner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAC3C,OAAO,EAAE,UAAU,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAChD,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAEhD,OAAO,EAAE,cAAc,EAAwB,MAAM,aAAa,CAAC;AAGnE,MAAM,YAAY,GAAG,CAAC,CAAC;AACvB,MAAM,mBAAmB,GAAG,CAAC,CAAC;AAC9B,MAAM,kBAAkB,GAAG,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"runner.js","sourceRoot":"","sources":["../../src/eval/runner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAC3C,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAE,UAAU,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAChD,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAEhD,OAAO,EAAE,YAAY,EAAyC,MAAM,cAAc,CAAC;AACnF,OAAO,EAAE,cAAc,EAAwB,MAAM,aAAa,CAAC;AAGnE,MAAM,YAAY,GAAG,CAAC,CAAC;AACvB,MAAM,oBAAoB,GAAG,CAAC,CAAC;AAC/B,MAAM,mBAAmB,GAAG,CAAC,CAAC;AAC9B,MAAM,kBAAkB,GAAG,MAAM,CAAC;AAClC,MAAM,0BAA0B,GAAG,OAAO,CAAC;AA0B3C,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC5B,KAA4B,EAC5B,OAAsB;IAEtB,MAAM,OAAO,GAAG,IAAI,GAAG,EAAiC,CAAC;IACzD,MAAM,OAAO,GAAG,IAAI,GAAG,EAAgC,CAAC;IACxD,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,EAAE;QACtC,IAAI,QAAQ,CAAC,IAAI,KAAK,SAAS;YAAE,OAAO,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;;YACtD,OAAO,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QAC/B,OAAO,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,QAAQ,EAAE,OAAO,CAAC,EAAE,EAAE,GAAG,EAAE,CAAC,QAAQ,CAAC,CAAC;IAC5E,CAAC,CAAC,CAAC;IAEH,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,WAAW,IAAI,mBAAmB,CAAC,CAAC;IAC5E,MAAM,YAAY,CAAC,IAAI,EAAE,WAAW,EAAE,KAAK,EAAE,QAAQ,EAAE,EAAE;QACvD,IAAI,QAAQ,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;YAChC,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YACnD,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;YACpC,OAAO,CAAC,KAAK,EAAE,CAAC,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC;QACvC,CAAC;aAAM,CAAC;YACN,MAAM,OAAO,GAAG,MAAM,UAAU,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YACpD,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;YACrC,OAAO,CAAC,SAAS,EAAE,CAAC,QAAQ,CAAC,EAAE,EAAE,OAAO,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE,CAC5B,QAAQ,CAAC,IAAI,KAAK,SAAS;QACzB,CAAC,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,QAAQ,EAAE,IAAI,EAAE,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE;QAClE,CAAC,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,CACzE,CAAC;AACJ,CAAC;AAED,SAAS,OAAO,CAAC,QAAoB,EAAE,OAAsB;IAC3D,MAAM,QAAQ,GAAG,QAAQ,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,YAAY,CAAC;IACnF,OAAO,QAAQ,CAAC,IAAI,IAAI,OAAO,CAAC,IAAI,IAAI,QAAQ,CAAC;AACnD,CAAC;AAED,SAAS,eAAe,CAAC,QAA2B;IAClD,OAAO,MAAM,IAAI,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;AACrE,CAAC;AAED,KAAK,UAAU,UAAU,CACvB,QAA2B,EAC3B,OAAsB;IAEtB,MAAM,QAAQ,GAAG,cAAc,CAAC,eAAe,CAAC,QAAQ,CAAC,CAAC,CAAC;IAC3D,MAAM,OAAO,GAAG,MAAM,UAAU,CAAC,QAAQ,EAAE,OAAO,EAAE;QAClD,SAAS,EAAE,OAAO,CAAC,SAAS,IAAI,kBAAkB;QAClD,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC;QACrC,IAAI,EAAE,GAAG,EAAE,CAAC,QAAQ,CAAC,IAAI;QACzB,UAAU,EAAE,IAAI;KACjB,CAAC,CAAC;IACH,OAAO,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC;AAC3D,CAAC;AAED,KAAK,UAAU,UAAU,CACvB,QAA2B,EAC3B,OAAsB;IAEtB,MAAM,MAAM,GAAG,YAAY,EAAE,CAAC;IAC9B,MAAM,OAAO,GAAG,MAAM,UAAU,CAAC,QAAQ,EAAE,OAAO,EAAE;QAClD,SAAS,EAAE,OAAO,CAAC,gBAAgB,IAAI,0BAA0B;QACjE,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC;QACnC,IAAI,EAAE,GAAG,EAAE,CAAC,MAAM,CAAC,IAAI;QACvB,UAAU,EAAE,KAAK;KAClB,CAAC,CAAC;IACH,MAAM,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC;IAClE,OAAO,eAAe,CAAC,OAAO,EAAE,QAAQ,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC;AACzD,CAAC;AASD,KAAK,UAAU,UAAU,CACvB,QAAoB,EACpB,OAAsB,EACtB,QAAyB;IAEzB,MAAM,GAAG,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC;IAC/E,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,SAAS,IAAI,QAAQ,EAAE,SAAS,CAAC,QAAQ,CAAC,MAAM,EAAE,OAAO,CAAC,KAAK,CAAC,EAAE;QAC5F,GAAG;QACH,GAAG,EAAE,WAAW,EAAE;QAClB,KAAK,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC;KACpC,CAAC,CAAC;IAEH,MAAM,YAAY,GAAG,IAAI,OAAO,CAAQ,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE;QACpD,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,2BAA2B,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC;IAC9F,CAAC,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IACpC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE;QAC5B,QAAQ,CAAC,OAAO,GAAG,IAAI,CAAC;QACxB,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACxB,CAAC,EAAE,QAAQ,CAAC,SAAS,CAAC,CAAC;IAEvB,MAAM,UAAU,GAAG,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IACjF,IAAI,CAAC;QACH,MAAM,OAAO,CAAC,IAAI,CAAC;YACjB,KAAK,CAAC,KAAK,CAAC,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,QAAQ,CAAC,IAAI,EAAE,UAAU,CAAC;YAC/D,YAAY;SACb,CAAC,CAAC;IACL,CAAC;YAAS,CAAC;QACT,YAAY,CAAC,KAAK,CAAC,CAAC;QACpB,IAAI,KAAK,CAAC,QAAQ,KAAK,IAAI;YAAE,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACrD,CAAC;IACD,OAAO,QAAQ,CAAC,OAAO,CAAC;AAC1B,CAAC;AAED,SAAS,SAAS,CAAC,MAAc,EAAE,KAAyB;IAC1D,MAAM,IAAI,GAAG;QACX,IAAI;QACJ,MAAM;QACN,iBAAiB;QACjB,aAAa;QACb,WAAW;QACX,4BAA4B;KAC7B,CAAC;IACF,IAAI,KAAK;QAAE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;IACvC,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,WAAW;IAClB,MAAM,GAAG,GAAG,EAAE,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;IAC/B,OAAO,GAAG,CAAC,YAAY,CAAC,CAAC;IACzB,OAAO,GAAG,CAAC;AACb,CAAC;AAED,KAAK,UAAU,KAAK,CAClB,MAA6B,EAC7B,MAA8B,EAC9B,IAAmB,EACnB,UAAuB;IAEvB,MAAM,KAAK,GAAG,eAAe,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;IACjD,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC/B,MAAM,CAAC,IAAI,CAAC,CAAC;QACb,IAAI,IAAI,EAAE,EAAE,CAAC;YACX,UAAU,EAAE,EAAE,CAAC;YACf,MAAM;QACR,CAAC;IACH,CAAC;AACH,CAAC;AAED,KAAK,UAAU,eAAe,CAC5B,OAAuB,EACvB,QAA2B,EAC3B,OAAe;IAEf,MAAM,QAAQ,GAAG,kBAAkB,CAAC,QAAQ,CAAC,CAAC;IAC9C,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,OAAO,CAAC;IAE1C,MAAM,GAAG,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,UAAU,CAAC,OAAO,EAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;IACvE,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAC/D,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;QAC5B,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,UAAU,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC;QAC3D,IAAI,MAAM,KAAK,IAAI;YAAE,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;IACnE,CAAC;IACD,OAAO,EAAE,GAAG,OAAO,EAAE,MAAM,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAA2B,EAAE,CAAC;AAChF,CAAC;AAED,SAAS,kBAAkB,CAAC,QAA2B;IACrD,OAAO,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,KAAK,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;AAClF,CAAC;AAED,KAAK,UAAU,cAAc,CAAC,IAAY;IACxC,IAAI,CAAC;QACH,OAAO,MAAM,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IACtC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,SAAS,UAAU,CAAC,IAAY,EAAE,OAAe;IAC/C,OAAO,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;AAChE,CAAC;AAED,KAAK,UAAU,YAAY,CACzB,KAAmB,EACnB,KAAa,EACb,MAAkC;IAElC,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,CAAC,MAAM,CAAC,EAAE,EAAE,KAAK,IAAI,EAAE;QAC/E,OAAO,MAAM,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;YAC7B,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;YAC3B,MAAM,IAAI,CAAC,CAAC;YACZ,IAAI,IAAI,KAAK,SAAS;gBAAE,MAAM,MAAM,CAAC,IAAI,CAAC,CAAC;QAC7C,CAAC;IACH,CAAC,CAAC,CAAC;IACH,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;AAC7B,CAAC"}
|