@jean.gnc/harness-kit 0.11.1 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/agent/schema.d.ts +2 -2
- package/dist/cli.js +65 -0
- package/dist/cli.js.map +1 -1
- package/dist/compile/emit.d.ts +0 -3
- package/dist/compile/emit.d.ts.map +1 -1
- package/dist/compile/emit.js +2 -3
- package/dist/compile/emit.js.map +1 -1
- package/dist/compile/index.d.ts.map +1 -1
- package/dist/compile/index.js +0 -9
- package/dist/compile/index.js.map +1 -1
- package/dist/compile/validators.d.ts +1 -9
- package/dist/compile/validators.d.ts.map +1 -1
- package/dist/compile/validators.js +13 -29
- package/dist/compile/validators.js.map +1 -1
- package/dist/configs/compile.d.ts.map +1 -1
- package/dist/configs/compile.js +1 -3
- package/dist/configs/compile.js.map +1 -1
- package/dist/eval/cases.d.ts +14 -0
- package/dist/eval/cases.d.ts.map +1 -0
- package/dist/eval/cases.js +84 -0
- package/dist/eval/cases.js.map +1 -0
- package/dist/eval/detect.d.ts +14 -0
- package/dist/eval/detect.d.ts.map +1 -0
- package/dist/eval/detect.js +105 -0
- package/dist/eval/detect.js.map +1 -0
- package/dist/eval/index.d.ts +20 -0
- package/dist/eval/index.d.ts.map +1 -0
- package/dist/eval/index.js +46 -0
- package/dist/eval/index.js.map +1 -0
- package/dist/eval/report.d.ts +15 -0
- package/dist/eval/report.d.ts.map +1 -0
- package/dist/eval/report.js +81 -0
- package/dist/eval/report.js.map +1 -0
- package/dist/eval/runner.d.ts +17 -0
- package/dist/eval/runner.d.ts.map +1 -0
- package/dist/eval/runner.js +89 -0
- package/dist/eval/runner.js.map +1 -0
- package/dist/eval/schema.d.ts +253 -0
- package/dist/eval/schema.d.ts.map +1 -0
- package/dist/eval/schema.js +50 -0
- package/dist/eval/schema.js.map +1 -0
- package/dist/eval/score.d.ts +13 -0
- package/dist/eval/score.d.ts.map +1 -0
- package/dist/eval/score.js +52 -0
- package/dist/eval/score.js.map +1 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import { FQ_ID } from "../ids.js";
|
|
2
|
+
function skillIdOf(input) {
|
|
3
|
+
if (typeof input !== "object" || input === null)
|
|
4
|
+
return null;
|
|
5
|
+
const record = input;
|
|
6
|
+
const value = record["skill"] ?? record["command"];
|
|
7
|
+
return typeof value === "string" && FQ_ID.test(value) ? value : null;
|
|
8
|
+
}
|
|
9
|
+
function parseSkillInput(buffer) {
|
|
10
|
+
try {
|
|
11
|
+
return JSON.parse(buffer);
|
|
12
|
+
}
|
|
13
|
+
catch {
|
|
14
|
+
return null;
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
export function createDetector(stopAfter = 1) {
|
|
18
|
+
const observed = [];
|
|
19
|
+
let pendingSkillJson = null;
|
|
20
|
+
let finished = false;
|
|
21
|
+
function record(id) {
|
|
22
|
+
if (id !== null)
|
|
23
|
+
observed.push(id);
|
|
24
|
+
if (observed.length >= stopAfter)
|
|
25
|
+
finished = true;
|
|
26
|
+
return finished;
|
|
27
|
+
}
|
|
28
|
+
function flushPending() {
|
|
29
|
+
if (pendingSkillJson === null)
|
|
30
|
+
return;
|
|
31
|
+
record(skillIdOf(parseSkillInput(pendingSkillJson)));
|
|
32
|
+
pendingSkillJson = null;
|
|
33
|
+
}
|
|
34
|
+
return {
|
|
35
|
+
get done() {
|
|
36
|
+
return finished;
|
|
37
|
+
},
|
|
38
|
+
push(line) {
|
|
39
|
+
if (finished)
|
|
40
|
+
return;
|
|
41
|
+
const trimmed = line.trim();
|
|
42
|
+
if (!trimmed)
|
|
43
|
+
return;
|
|
44
|
+
let event;
|
|
45
|
+
try {
|
|
46
|
+
event = JSON.parse(trimmed);
|
|
47
|
+
}
|
|
48
|
+
catch {
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
const type = event["type"];
|
|
52
|
+
if (type === "stream_event") {
|
|
53
|
+
const se = (event["event"] ?? {});
|
|
54
|
+
const seType = se["type"];
|
|
55
|
+
if (seType === "content_block_start") {
|
|
56
|
+
const block = (se["content_block"] ?? {});
|
|
57
|
+
if (block["type"] === "tool_use" && block["name"] === "Skill") {
|
|
58
|
+
pendingSkillJson = "";
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
else if (seType === "content_block_delta" && pendingSkillJson !== null) {
|
|
62
|
+
const delta = (se["delta"] ?? {});
|
|
63
|
+
if (delta["type"] === "input_json_delta") {
|
|
64
|
+
const partial = delta["partial_json"];
|
|
65
|
+
if (typeof partial === "string")
|
|
66
|
+
pendingSkillJson += partial;
|
|
67
|
+
if (skillIdOf(parseSkillInput(pendingSkillJson)) !== null)
|
|
68
|
+
flushPending();
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
else if (seType === "content_block_stop") {
|
|
72
|
+
flushPending();
|
|
73
|
+
}
|
|
74
|
+
else if (seType === "message_stop") {
|
|
75
|
+
flushPending();
|
|
76
|
+
finished = true;
|
|
77
|
+
}
|
|
78
|
+
return;
|
|
79
|
+
}
|
|
80
|
+
if (type === "assistant") {
|
|
81
|
+
const message = (event["message"] ?? {});
|
|
82
|
+
const content = (message["content"] ?? []);
|
|
83
|
+
for (const item of content) {
|
|
84
|
+
const block = item;
|
|
85
|
+
if (block["type"] === "tool_use" && block["name"] === "Skill") {
|
|
86
|
+
if (record(skillIdOf(block["input"])))
|
|
87
|
+
return;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
return;
|
|
91
|
+
}
|
|
92
|
+
if (type === "result") {
|
|
93
|
+
finished = true;
|
|
94
|
+
}
|
|
95
|
+
},
|
|
96
|
+
result(reason) {
|
|
97
|
+
return {
|
|
98
|
+
observed,
|
|
99
|
+
firstSkill: observed[0] ?? null,
|
|
100
|
+
exitReason: observed.length > 0 ? "skill" : reason,
|
|
101
|
+
};
|
|
102
|
+
},
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
//# sourceMappingURL=detect.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"detect.js","sourceRoot":"","sources":["../../src/eval/detect.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,WAAW,CAAC;AAgBlC,SAAS,SAAS,CAAC,KAAc;IAC/B,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI;QAAE,OAAO,IAAI,CAAC;IAC7D,MAAM,MAAM,GAAG,KAAgC,CAAC;IAChD,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,SAAS,CAAC,CAAC;IACnD,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;AACvE,CAAC;AAED,SAAS,eAAe,CAAC,MAAc;IACrC,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,SAAS,GAAG,CAAC;IAC1C,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,IAAI,gBAAgB,GAAkB,IAAI,CAAC;IAC3C,IAAI,QAAQ,GAAG,KAAK,CAAC;IAErB,SAAS,MAAM,CAAC,EAAiB;QAC/B,IAAI,EAAE,KAAK,IAAI;YAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACnC,IAAI,QAAQ,CAAC,MAAM,IAAI,SAAS;YAAE,QAAQ,GAAG,IAAI,CAAC;QAClD,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,SAAS,YAAY;QACnB,IAAI,gBAAgB,KAAK,IAAI;YAAE,OAAO;QACtC,MAAM,CAAC,SAAS,CAAC,eAAe,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC;QACrD,gBAAgB,GAAG,IAAI,CAAC;IAC1B,CAAC;IAED,OAAO;QACL,IAAI,IAAI;YACN,OAAO,QAAQ,CAAC;QAClB,CAAC;QACD,IAAI,CAAC,IAAY;YACf,IAAI,QAAQ;gBAAE,OAAO;YACrB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAC5B,IAAI,CAAC,OAAO;gBAAE,OAAO;YACrB,IAAI,KAA8B,CAAC;YACnC,IAAI,CAAC;gBACH,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAA4B,CAAC;YACzD,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO;YACT,CAAC;YAED,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;YAE3B,IAAI,IAAI,KAAK,cAAc,EAAE,CAAC;gBAC5B,MAAM,EAAE,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE,CAA4B,CAAC;gBAC7D,MAAM,MAAM,GAAG,EAAE,CAAC,MAAM,CAAC,CAAC;gBAE1B,IAAI,MAAM,KAAK,qBAAqB,EAAE,CAAC;oBACrC,MAAM,KAAK,GAAG,CAAC,EAAE,CAAC,eAAe,CAAC,IAAI,EAAE,CAA4B,CAAC;oBACrE,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,UAAU,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,OAAO,EAAE,CAAC;wBAC9D,gBAAgB,GAAG,EAAE,CAAC;oBACxB,CAAC;gBACH,CAAC;qBAAM,IAAI,MAAM,KAAK,qBAAqB,IAAI,gBAAgB,KAAK,IAAI,EAAE,CAAC;oBACzE,MAAM,KAAK,GAAG,CAAC,EAAE,CAAC,OAAO,CAAC,IAAI,EAAE,CAA4B,CAAC;oBAC7D,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,kBAAkB,EAAE,CAAC;wBACzC,MAAM,OAAO,GAAG,KAAK,CAAC,cAAc,CAAC,CAAC;wBACtC,IAAI,OAAO,OAAO,KAAK,QAAQ;4BAAE,gBAAgB,IAAI,OAAO,CAAC;wBAC7D,IAAI,SAAS,CAAC,eAAe,CAAC,gBAAgB,CAAC,CAAC,KAAK,IAAI;4BAAE,YAAY,EAAE,CAAC;oBAC5E,CAAC;gBACH,CAAC;qBAAM,IAAI,MAAM,KAAK,oBAAoB,EAAE,CAAC;oBAC3C,YAAY,EAAE,CAAC;gBACjB,CAAC;qBAAM,IAAI,MAAM,KAAK,cAAc,EAAE,CAAC;oBACrC,YAAY,EAAE,CAAC;oBACf,QAAQ,GAAG,IAAI,CAAC;gBAClB,CAAC;gBACD,OAAO;YACT,CAAC;YAED,IAAI,IAAI,KAAK,WAAW,EAAE,CAAC;gBACzB,MAAM,OAAO,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,EAAE,CAA4B,CAAC;gBACpE,MAAM,OAAO,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI,EAAE,CAAc,CAAC;gBACxD,KAAK,MAAM,IAAI,IAAI,OAAO,EAAE,CAAC;oBAC3B,MAAM,KAAK,GAAG,IAA+B,CAAC;oBAC9C,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,UAAU,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,OAAO,EAAE,CAAC;wBAC9D,IAAI,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;4BAAE,OAAO;oBAChD,CAAC;gBACH,CAAC;gBACD,OAAO;YACT,CAAC;YAED,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;gBACtB,QAAQ,GAAG,IAAI,CAAC;YAClB,CAAC;QACH,CAAC;QACD,MAAM,CAAC,MAAkB;YACvB,OAAO;gBACL,QAAQ;gBACR,UAAU,EAAE,QAAQ,CAAC,CAAC,CAAC,IAAI,IAAI;gBAC/B,UAAU,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM;aACnD,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { type Result } from "../result.js";
|
|
2
|
+
import { type CaseLoadError, type LoadedCase } from "./cases.js";
|
|
3
|
+
import { type EvalReport } from "./report.js";
|
|
4
|
+
import { type RunnerOptions } from "./runner.js";
|
|
5
|
+
export interface EvalOptions {
|
|
6
|
+
readonly casesDir: string;
|
|
7
|
+
readonly cwd: string;
|
|
8
|
+
readonly suite?: string;
|
|
9
|
+
readonly caseId?: string;
|
|
10
|
+
readonly tier?: LoadedCase["tier"];
|
|
11
|
+
readonly runs?: number;
|
|
12
|
+
readonly concurrency?: number;
|
|
13
|
+
readonly model?: string;
|
|
14
|
+
readonly onRun?: RunnerOptions["onRun"];
|
|
15
|
+
}
|
|
16
|
+
export declare function runEval(options: EvalOptions): Promise<Result<EvalReport, CaseLoadError[]>>;
|
|
17
|
+
export type { EvalReport, CaseReport } from "./report.js";
|
|
18
|
+
export { formatConsole, toJson } from "./report.js";
|
|
19
|
+
export type { LoadedCase, CaseLoadError } from "./cases.js";
|
|
20
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/eval/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAW,KAAK,MAAM,EAAE,MAAM,cAAc,CAAC;AACpD,OAAO,EAA+B,KAAK,aAAa,EAAE,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC;AAC9F,OAAO,EAAgC,KAAK,UAAU,EAAE,MAAM,aAAa,CAAC;AAC5E,OAAO,EAAY,KAAK,aAAa,EAAE,MAAM,aAAa,CAAC;AAG3D,MAAM,WAAW,WAAW;IAC1B,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,IAAI,CAAC,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;IACnC,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,KAAK,CAAC,EAAE,aAAa,CAAC,OAAO,CAAC,CAAC;CACzC;AAED,wBAAsB,OAAO,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,UAAU,EAAE,aAAa,EAAE,CAAC,CAAC,CA6BhG;AAWD,YAAY,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAC1D,OAAO,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACpD,YAAY,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC"}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import { defaultSources, discoverInstalled, indexInstalled } from "../installed.js";
|
|
2
|
+
import { err, ok } from "../result.js";
|
|
3
|
+
import { loadCases, unresolvedSkills } from "./cases.js";
|
|
4
|
+
import { buildReport } from "./report.js";
|
|
5
|
+
import { runCases } from "./runner.js";
|
|
6
|
+
import { scoreCase } from "./score.js";
|
|
7
|
+
export async function runEval(options) {
|
|
8
|
+
const loaded = await loadCases(options.casesDir);
|
|
9
|
+
if (!loaded.ok)
|
|
10
|
+
return err(loaded.error);
|
|
11
|
+
const selected = select(loaded.value, options);
|
|
12
|
+
if (selected.length === 0) {
|
|
13
|
+
return err([{ file: options.casesDir, message: "no cases matched the given filters" }]);
|
|
14
|
+
}
|
|
15
|
+
const installed = indexInstalled(await discoverInstalled(defaultSources()));
|
|
16
|
+
const installedIds = new Set(installed.skills.keys());
|
|
17
|
+
const unresolved = unresolvedSkills(selected, installedIds);
|
|
18
|
+
if (unresolved.length > 0)
|
|
19
|
+
return err(unresolved);
|
|
20
|
+
const runnerOptions = {
|
|
21
|
+
cwd: options.cwd,
|
|
22
|
+
...(options.runs !== undefined && { runs: options.runs }),
|
|
23
|
+
...(options.concurrency !== undefined && { concurrency: options.concurrency }),
|
|
24
|
+
...(options.model !== undefined && { model: options.model }),
|
|
25
|
+
...(options.onRun !== undefined && { onRun: options.onRun }),
|
|
26
|
+
};
|
|
27
|
+
const results = await runCases(selected, runnerOptions);
|
|
28
|
+
const reports = results.map(({ evalCase, runs }) => ({
|
|
29
|
+
evalCase,
|
|
30
|
+
score: scoreCase(evalCase.expect, runs, evalCase.threshold),
|
|
31
|
+
}));
|
|
32
|
+
return ok(buildReport(reports));
|
|
33
|
+
}
|
|
34
|
+
function select(cases, options) {
|
|
35
|
+
return cases.filter((evalCase) => {
|
|
36
|
+
if (options.caseId && evalCase.id !== options.caseId)
|
|
37
|
+
return false;
|
|
38
|
+
if (options.suite && evalCase.suite !== options.suite)
|
|
39
|
+
return false;
|
|
40
|
+
if (options.tier && evalCase.tier !== options.tier)
|
|
41
|
+
return false;
|
|
42
|
+
return true;
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
export { formatConsole, toJson } from "./report.js";
|
|
46
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/eval/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACpF,OAAO,EAAE,GAAG,EAAE,EAAE,EAAe,MAAM,cAAc,CAAC;AACpD,OAAO,EAAE,SAAS,EAAE,gBAAgB,EAAuC,MAAM,YAAY,CAAC;AAC9F,OAAO,EAAE,WAAW,EAAoC,MAAM,aAAa,CAAC;AAC5E,OAAO,EAAE,QAAQ,EAAsB,MAAM,aAAa,CAAC;AAC3D,OAAO,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAcvC,MAAM,CAAC,KAAK,UAAU,OAAO,CAAC,OAAoB;IAChD,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;IACjD,IAAI,CAAC,MAAM,CAAC,EAAE;QAAE,OAAO,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAEzC,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;IAC/C,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,OAAO,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,QAAQ,EAAE,OAAO,EAAE,oCAAoC,EAAE,CAAC,CAAC,CAAC;IAC1F,CAAC;IAED,MAAM,SAAS,GAAG,cAAc,CAAC,MAAM,iBAAiB,CAAC,cAAc,EAAE,CAAC,CAAC,CAAC;IAC5E,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;IACtD,MAAM,UAAU,GAAG,gBAAgB,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;IAC5D,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,GAAG,CAAC,UAAU,CAAC,CAAC;IAElD,MAAM,aAAa,GAAkB;QACnC,GAAG,EAAE,OAAO,CAAC,GAAG;QAChB,GAAG,CAAC,OAAO,CAAC,IAAI,KAAK,SAAS,IAAI,EAAE,IAAI,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC;QACzD,GAAG,CAAC,OAAO,CAAC,WAAW,KAAK,SAAS,IAAI,EAAE,WAAW,EAAE,OAAO,CAAC,WAAW,EAAE,CAAC;QAC9E,GAAG,CAAC,OAAO,CAAC,KAAK,KAAK,SAAS,IAAI,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC;QAC5D,GAAG,CAAC,OAAO,CAAC,KAAK,KAAK,SAAS,IAAI,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC;KAC7D,CAAC;IACF,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;IAExD,MAAM,OAAO,GAAiB,OAAO,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,CAAC;QACjE,QAAQ;QACR,KAAK,EAAE,SAAS,CAAC,QAAQ,CAAC,MAAM,EAAE,IAAI,EAAE,QAAQ,CAAC,SAAS,CAAC;KAC5D,CAAC,CAAC,CAAC;IAEJ,OAAO,EAAE,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC;AAClC,CAAC;AAED,SAAS,MAAM,CAAC,KAA4B,EAAE,OAAoB;IAChE,OAAO,KAAK,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,EAAE;QAC/B,IAAI,OAAO,CAAC,MAAM,IAAI,QAAQ,CAAC,EAAE,KAAK,OAAO,CAAC,MAAM;YAAE,OAAO,KAAK,CAAC;QACnE,IAAI,OAAO,CAAC,KAAK,IAAI,QAAQ,CAAC,KAAK,KAAK,OAAO,CAAC,KAAK;YAAE,OAAO,KAAK,CAAC;QACpE,IAAI,OAAO,CAAC,IAAI,IAAI,QAAQ,CAAC,IAAI,KAAK,OAAO,CAAC,IAAI;YAAE,OAAO,KAAK,CAAC;QACjE,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;AACL,CAAC;AAGD,OAAO,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import type { LoadedCase } from "./cases.js";
|
|
2
|
+
import type { CaseScore } from "./score.js";
|
|
3
|
+
export interface CaseReport {
|
|
4
|
+
readonly evalCase: LoadedCase;
|
|
5
|
+
readonly score: CaseScore;
|
|
6
|
+
}
|
|
7
|
+
export interface EvalReport {
|
|
8
|
+
readonly cases: readonly CaseReport[];
|
|
9
|
+
readonly passed: number;
|
|
10
|
+
readonly failed: number;
|
|
11
|
+
}
|
|
12
|
+
export declare function buildReport(cases: readonly CaseReport[]): EvalReport;
|
|
13
|
+
export declare function formatConsole(report: EvalReport): string;
|
|
14
|
+
export declare function toJson(report: EvalReport): string;
|
|
15
|
+
//# sourceMappingURL=report.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"report.d.ts","sourceRoot":"","sources":["../../src/eval/report.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAC7C,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAG5C,MAAM,WAAW,UAAU;IACzB,QAAQ,CAAC,QAAQ,EAAE,UAAU,CAAC;IAC9B,QAAQ,CAAC,KAAK,EAAE,SAAS,CAAC;CAC3B;AAED,MAAM,WAAW,UAAU;IACzB,QAAQ,CAAC,KAAK,EAAE,SAAS,UAAU,EAAE,CAAC;IACtC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;CACzB;AAED,wBAAgB,WAAW,CAAC,KAAK,EAAE,SAAS,UAAU,EAAE,GAAG,UAAU,CAGpE;AAED,wBAAgB,aAAa,CAAC,MAAM,EAAE,UAAU,GAAG,MAAM,CAuBxD;AAED,wBAAgB,MAAM,CAAC,MAAM,EAAE,UAAU,GAAG,MAAM,CAyBjD"}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
export function buildReport(cases) {
|
|
2
|
+
const passed = cases.filter((c) => c.score.pass).length;
|
|
3
|
+
return { cases, passed, failed: cases.length - passed };
|
|
4
|
+
}
|
|
5
|
+
export function formatConsole(report) {
|
|
6
|
+
const lines = [];
|
|
7
|
+
const groups = groupBy(report.cases, (c) => `${c.evalCase.tier}/${c.evalCase.suite}`);
|
|
8
|
+
for (const [group, entries] of groups) {
|
|
9
|
+
lines.push(group);
|
|
10
|
+
for (const { evalCase, score } of entries) {
|
|
11
|
+
const tag = score.pass ? "PASS" : "FAIL";
|
|
12
|
+
const tally = `${score.matched}/${score.runs}`;
|
|
13
|
+
lines.push(` ${tag} ${evalCase.id} ${tally} → ${describeExpectation(evalCase.expect)}`);
|
|
14
|
+
if (!score.pass) {
|
|
15
|
+
lines.push(` got: ${formatHistogram(score.histogram)}`);
|
|
16
|
+
lines.push(` prompt: ${truncate(evalCase.prompt)}`);
|
|
17
|
+
if (evalCase.note)
|
|
18
|
+
lines.push(` note: ${evalCase.note}`);
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
lines.push("");
|
|
22
|
+
}
|
|
23
|
+
const total = report.passed + report.failed;
|
|
24
|
+
const pct = total === 0 ? 0 : Math.round((report.passed / total) * 100);
|
|
25
|
+
lines.push(`Summary: ${report.passed}/${total} cases passed (${pct}%).`);
|
|
26
|
+
return lines.join("\n");
|
|
27
|
+
}
|
|
28
|
+
export function toJson(report) {
|
|
29
|
+
return JSON.stringify({
|
|
30
|
+
summary: {
|
|
31
|
+
total: report.passed + report.failed,
|
|
32
|
+
passed: report.passed,
|
|
33
|
+
failed: report.failed,
|
|
34
|
+
},
|
|
35
|
+
cases: report.cases.map(({ evalCase, score }) => ({
|
|
36
|
+
id: evalCase.id,
|
|
37
|
+
suite: evalCase.suite,
|
|
38
|
+
tier: evalCase.tier,
|
|
39
|
+
prompt: evalCase.prompt,
|
|
40
|
+
expect: evalCase.expect,
|
|
41
|
+
pass: score.pass,
|
|
42
|
+
matched: score.matched,
|
|
43
|
+
runs: score.runs,
|
|
44
|
+
triggerRate: score.triggerRate,
|
|
45
|
+
threshold: score.threshold,
|
|
46
|
+
histogram: Object.fromEntries(score.histogram),
|
|
47
|
+
})),
|
|
48
|
+
}, null, 2);
|
|
49
|
+
}
|
|
50
|
+
function describeExpectation(expectation) {
|
|
51
|
+
if ("noSkill" in expectation)
|
|
52
|
+
return "(no skill)";
|
|
53
|
+
if ("first" in expectation)
|
|
54
|
+
return expectation.first;
|
|
55
|
+
if ("anyOf" in expectation)
|
|
56
|
+
return `one of [${expectation.anyOf.join(", ")}]`;
|
|
57
|
+
return expectation.path.join(" → ");
|
|
58
|
+
}
|
|
59
|
+
function formatHistogram(histogram) {
|
|
60
|
+
return [...histogram.entries()]
|
|
61
|
+
.sort((a, b) => b[1] - a[1])
|
|
62
|
+
.map(([id, count]) => `${id} ×${count}`)
|
|
63
|
+
.join(", ");
|
|
64
|
+
}
|
|
65
|
+
function truncate(text, max = 80) {
|
|
66
|
+
const single = text.replace(/\s+/g, " ").trim();
|
|
67
|
+
return single.length > max ? `${single.slice(0, max - 1)}…` : single;
|
|
68
|
+
}
|
|
69
|
+
function groupBy(items, key) {
|
|
70
|
+
const groups = new Map();
|
|
71
|
+
for (const item of items) {
|
|
72
|
+
const k = key(item);
|
|
73
|
+
const existing = groups.get(k);
|
|
74
|
+
if (existing)
|
|
75
|
+
existing.push(item);
|
|
76
|
+
else
|
|
77
|
+
groups.set(k, [item]);
|
|
78
|
+
}
|
|
79
|
+
return groups;
|
|
80
|
+
}
|
|
81
|
+
//# sourceMappingURL=report.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"report.js","sourceRoot":"","sources":["../../src/eval/report.ts"],"names":[],"mappings":"AAeA,MAAM,UAAU,WAAW,CAAC,KAA4B;IACtD,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;IACxD,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,GAAG,MAAM,EAAE,CAAC;AAC1D,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,MAAkB;IAC9C,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC;IAEtF,KAAK,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,MAAM,EAAE,CAAC;QACtC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAClB,KAAK,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,IAAI,OAAO,EAAE,CAAC;YAC1C,MAAM,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;YACzC,MAAM,KAAK,GAAG,GAAG,KAAK,CAAC,OAAO,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC;YAC/C,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,KAAK,QAAQ,CAAC,EAAE,KAAK,KAAK,OAAO,mBAAmB,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;YAC5F,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;gBAChB,KAAK,CAAC,IAAI,CAAC,kBAAkB,eAAe,CAAC,KAAK,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;gBACjE,KAAK,CAAC,IAAI,CAAC,qBAAqB,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;gBAC7D,IAAI,QAAQ,CAAC,IAAI;oBAAE,KAAK,CAAC,IAAI,CAAC,mBAAmB,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;YACpE,CAAC;QACH,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACjB,CAAC;IAED,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;IAC5C,MAAM,GAAG,GAAG,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,MAAM,GAAG,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC;IACxE,KAAK,CAAC,IAAI,CAAC,YAAY,MAAM,CAAC,MAAM,IAAI,KAAK,kBAAkB,GAAG,KAAK,CAAC,CAAC;IACzE,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,MAAM,UAAU,MAAM,CAAC,MAAkB;IACvC,OAAO,IAAI,CAAC,SAAS,CACnB;QACE,OAAO,EAAE;YACP,KAAK,EAAE,MAAM,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM;YACpC,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,MAAM,EAAE,MAAM,CAAC,MAAM;SACtB;QACD,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;YAChD,EAAE,EAAE,QAAQ,CAAC,EAAE;YACf,KAAK,EAAE,QAAQ,CAAC,KAAK;YACrB,IAAI,EAAE,QAAQ,CAAC,IAAI;YACnB,MAAM,EAAE,QAAQ,CAAC,MAAM;YACvB,MAAM,EAAE,QAAQ,CAAC,MAAM;YACvB,IAAI,EAAE,KAAK,CAAC,IAAI;YAChB,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,IAAI,EAAE,KAAK,CAAC,IAAI;YAChB,WAAW,EAAE,KAAK,CAAC,WAAW;YAC9B,SAAS,EAAE,KAAK,CAAC,SAAS;YAC1B,SAAS,EAAE,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,SAAS,CAAC;SAC/C,CAAC,CAAC;KACJ,EACD,IAAI,EACJ,CAAC,CACF,CAAC;AACJ,CAAC;AAED,SAAS,mBAAmB,CAAC,WAAwB;IACnD,IAAI,SAAS,IAAI,WAAW;QAAE,OAAO,YAAY,CAAC;IAClD,IAAI,OAAO,IAAI,WAAW;QAAE,OAAO,WAAW,CAAC,KAAK,CAAC;IACrD,IAAI,OAAO,IAAI,WAAW;QAAE,OAAO,WAAW,WAAW,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC;IAC9E,OAAO,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AACtC,CAAC;AAED,SAAS,eAAe,CAAC,SAAsC;IAC7D,OAAO,CAAC,GAAG,SAAS,CAAC,OAAO,EAAE,CAAC;SAC5B,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;SAC3B,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,GAAG,EAAE,KAAK,KAAK,EAAE,CAAC;SACvC,IAAI,CAAC,IAAI,CAAC,CAAC;AAChB,CAAC;AAED,SAAS,QAAQ,CAAC,IAAY,EAAE,GAAG,GAAG,EAAE;IACtC,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IAChD,OAAO,MAAM,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;AACvE,CAAC;AAED,SAAS,OAAO,CAAI,KAAmB,EAAE,GAAwB;IAC/D,MAAM,MAAM,GAAG,IAAI,GAAG,EAAe,CAAC;IACtC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,CAAC,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC;QACpB,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QAC/B,IAAI,QAAQ;YAAE,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;;YAC7B,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC;IAC7B,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { type DetectionResult } from "./detect.js";
|
|
2
|
+
import type { LoadedCase } from "./cases.js";
|
|
3
|
+
export interface RunnerOptions {
|
|
4
|
+
readonly cwd: string;
|
|
5
|
+
readonly runs?: number;
|
|
6
|
+
readonly concurrency?: number;
|
|
7
|
+
readonly timeoutMs?: number;
|
|
8
|
+
readonly model?: string;
|
|
9
|
+
readonly claudeBin?: string;
|
|
10
|
+
readonly onRun?: (caseId: string, result: DetectionResult) => void;
|
|
11
|
+
}
|
|
12
|
+
export interface CaseRuns {
|
|
13
|
+
readonly evalCase: LoadedCase;
|
|
14
|
+
readonly runs: readonly DetectionResult[];
|
|
15
|
+
}
|
|
16
|
+
export declare function runCases(cases: readonly LoadedCase[], options: RunnerOptions): Promise<CaseRuns[]>;
|
|
17
|
+
//# sourceMappingURL=runner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../../src/eval/runner.ts"],"names":[],"mappings":"AAIA,OAAO,EAAkB,KAAK,eAAe,EAAE,MAAM,aAAa,CAAC;AACnE,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAM7C,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,SAAS,CAAC,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,SAAS,CAAC,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,eAAe,KAAK,IAAI,CAAC;CACpE;AAED,MAAM,WAAW,QAAQ;IACvB,QAAQ,CAAC,QAAQ,EAAE,UAAU,CAAC;IAC9B,QAAQ,CAAC,IAAI,EAAE,SAAS,eAAe,EAAE,CAAC;CAC3C;AAED,wBAAsB,QAAQ,CAC5B,KAAK,EAAE,SAAS,UAAU,EAAE,EAC5B,OAAO,EAAE,aAAa,GACrB,OAAO,CAAC,QAAQ,EAAE,CAAC,CAgBrB"}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import { spawn } from "node:child_process";
|
|
2
|
+
import { isAbsolute, resolve } from "node:path";
|
|
3
|
+
import { createInterface } from "node:readline";
|
|
4
|
+
import { createDetector } from "./detect.js";
|
|
5
|
+
const DEFAULT_RUNS = 5;
|
|
6
|
+
const DEFAULT_CONCURRENCY = 4;
|
|
7
|
+
const DEFAULT_TIMEOUT_MS = 60_000;
|
|
8
|
+
export async function runCases(cases, options) {
|
|
9
|
+
const byCase = new Map();
|
|
10
|
+
const jobs = cases.flatMap((evalCase) => {
|
|
11
|
+
byCase.set(evalCase, []);
|
|
12
|
+
const count = evalCase.runs ?? options.runs ?? DEFAULT_RUNS;
|
|
13
|
+
return Array.from({ length: count }, () => evalCase);
|
|
14
|
+
});
|
|
15
|
+
const concurrency = Math.max(1, options.concurrency ?? DEFAULT_CONCURRENCY);
|
|
16
|
+
await forEachLimit(jobs, concurrency, async (evalCase) => {
|
|
17
|
+
const result = await runOnce(evalCase, options);
|
|
18
|
+
byCase.get(evalCase)?.push(result);
|
|
19
|
+
options.onRun?.(evalCase.id, result);
|
|
20
|
+
});
|
|
21
|
+
return cases.map((evalCase) => ({ evalCase, runs: byCase.get(evalCase) ?? [] }));
|
|
22
|
+
}
|
|
23
|
+
function skillsToCollect(evalCase) {
|
|
24
|
+
return "path" in evalCase.expect ? evalCase.expect.path.length : 1;
|
|
25
|
+
}
|
|
26
|
+
async function runOnce(evalCase, options) {
|
|
27
|
+
const cwd = evalCase.cwd ? resolveCwd(options.cwd, evalCase.cwd) : options.cwd;
|
|
28
|
+
const args = [
|
|
29
|
+
"-p",
|
|
30
|
+
evalCase.prompt,
|
|
31
|
+
"--output-format",
|
|
32
|
+
"stream-json",
|
|
33
|
+
"--verbose",
|
|
34
|
+
"--include-partial-messages",
|
|
35
|
+
];
|
|
36
|
+
if (options.model)
|
|
37
|
+
args.push("--model", options.model);
|
|
38
|
+
const env = { ...process.env };
|
|
39
|
+
delete env["CLAUDECODE"];
|
|
40
|
+
const detector = createDetector(skillsToCollect(evalCase));
|
|
41
|
+
const child = spawn(options.claudeBin ?? "claude", args, {
|
|
42
|
+
cwd,
|
|
43
|
+
env,
|
|
44
|
+
stdio: ["ignore", "pipe", "ignore"],
|
|
45
|
+
});
|
|
46
|
+
const spawnFailure = new Promise((_, reject) => {
|
|
47
|
+
child.on("error", (cause) => reject(new Error(`failed to spawn claude: ${cause.message}`)));
|
|
48
|
+
});
|
|
49
|
+
const deadline = { reached: false };
|
|
50
|
+
const timer = setTimeout(() => {
|
|
51
|
+
deadline.reached = true;
|
|
52
|
+
child.kill("SIGKILL");
|
|
53
|
+
}, options.timeoutMs ?? DEFAULT_TIMEOUT_MS);
|
|
54
|
+
try {
|
|
55
|
+
await Promise.race([drain(child.stdout, () => child.kill("SIGKILL"), detector), spawnFailure]);
|
|
56
|
+
}
|
|
57
|
+
finally {
|
|
58
|
+
clearTimeout(timer);
|
|
59
|
+
if (child.exitCode === null)
|
|
60
|
+
child.kill("SIGKILL");
|
|
61
|
+
}
|
|
62
|
+
return detector.result(deadline.reached ? "timeout" : "no-skill");
|
|
63
|
+
}
|
|
64
|
+
async function drain(stdout, stop, detector) {
|
|
65
|
+
const lines = createInterface({ input: stdout });
|
|
66
|
+
for await (const line of lines) {
|
|
67
|
+
detector.push(line);
|
|
68
|
+
if (detector.done) {
|
|
69
|
+
stop();
|
|
70
|
+
break;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
function resolveCwd(base, caseCwd) {
|
|
75
|
+
return isAbsolute(caseCwd) ? caseCwd : resolve(base, caseCwd);
|
|
76
|
+
}
|
|
77
|
+
async function forEachLimit(items, limit, worker) {
|
|
78
|
+
let cursor = 0;
|
|
79
|
+
const runners = Array.from({ length: Math.min(limit, items.length) }, async () => {
|
|
80
|
+
while (cursor < items.length) {
|
|
81
|
+
const item = items[cursor];
|
|
82
|
+
cursor += 1;
|
|
83
|
+
if (item !== undefined)
|
|
84
|
+
await worker(item);
|
|
85
|
+
}
|
|
86
|
+
});
|
|
87
|
+
await Promise.all(runners);
|
|
88
|
+
}
|
|
89
|
+
//# sourceMappingURL=runner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"runner.js","sourceRoot":"","sources":["../../src/eval/runner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAC3C,OAAO,EAAE,UAAU,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAChD,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAEhD,OAAO,EAAE,cAAc,EAAwB,MAAM,aAAa,CAAC;AAGnE,MAAM,YAAY,GAAG,CAAC,CAAC;AACvB,MAAM,mBAAmB,GAAG,CAAC,CAAC;AAC9B,MAAM,kBAAkB,GAAG,MAAM,CAAC;AAiBlC,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC5B,KAA4B,EAC5B,OAAsB;IAEtB,MAAM,MAAM,GAAG,IAAI,GAAG,EAAiC,CAAC;IACxD,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,EAAE;QACtC,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QACzB,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,IAAI,OAAO,CAAC,IAAI,IAAI,YAAY,CAAC;QAC5D,OAAO,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,EAAE,GAAG,EAAE,CAAC,QAAQ,CAAC,CAAC;IACvD,CAAC,CAAC,CAAC;IAEH,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,WAAW,IAAI,mBAAmB,CAAC,CAAC;IAC5E,MAAM,YAAY,CAAC,IAAI,EAAE,WAAW,EAAE,KAAK,EAAE,QAAQ,EAAE,EAAE;QACvD,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAChD,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QACnC,OAAO,CAAC,KAAK,EAAE,CAAC,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC;AACnF,CAAC;AAED,SAAS,eAAe,CAAC,QAAoB;IAC3C,OAAO,MAAM,IAAI,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;AACrE,CAAC;AAED,KAAK,UAAU,OAAO,CAAC,QAAoB,EAAE,OAAsB;IACjE,MAAM,GAAG,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC;IAC/E,MAAM,IAAI,GAAG;QACX,IAAI;QACJ,QAAQ,CAAC,MAAM;QACf,iBAAiB;QACjB,aAAa;QACb,WAAW;QACX,4BAA4B;KAC7B,CAAC;IACF,IAAI,OAAO,CAAC,KAAK;QAAE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC;IAEvD,MAAM,GAAG,GAAG,EAAE,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;IAC/B,OAAO,GAAG,CAAC,YAAY,CAAC,CAAC;IAEzB,MAAM,QAAQ,GAAG,cAAc,CAAC,eAAe,CAAC,QAAQ,CAAC,CAAC,CAAC;IAC3D,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,SAAS,IAAI,QAAQ,EAAE,IAAI,EAAE;QACvD,GAAG;QACH,GAAG;QACH,KAAK,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC;KACpC,CAAC,CAAC;IAEH,MAAM,YAAY,GAAG,IAAI,OAAO,CAAQ,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE;QACpD,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,2BAA2B,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC;IAC9F,CAAC,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IACpC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE;QAC5B,QAAQ,CAAC,OAAO,GAAG,IAAI,CAAC;QACxB,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACxB,CAAC,EAAE,OAAO,CAAC,SAAS,IAAI,kBAAkB,CAAC,CAAC;IAE5C,IAAI,CAAC;QACH,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,EAAE,GAAG,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,QAAQ,CAAC,EAAE,YAAY,CAAC,CAAC,CAAC;IACjG,CAAC;YAAS,CAAC;QACT,YAAY,CAAC,KAAK,CAAC,CAAC;QACpB,IAAI,KAAK,CAAC,QAAQ,KAAK,IAAI;YAAE,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACrD,CAAC;IAED,OAAO,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC;AACpE,CAAC;AAED,KAAK,UAAU,KAAK,CAClB,MAA6B,EAC7B,IAAgB,EAChB,QAA2C;IAE3C,MAAM,KAAK,GAAG,eAAe,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;IACjD,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC/B,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACpB,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC;YAClB,IAAI,EAAE,CAAC;YACP,MAAM;QACR,CAAC;IACH,CAAC;AACH,CAAC;AAED,SAAS,UAAU,CAAC,IAAY,EAAE,OAAe;IAC/C,OAAO,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;AAChE,CAAC;AAED,KAAK,UAAU,YAAY,CACzB,KAAmB,EACnB,KAAa,EACb,MAAkC;IAElC,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,CAAC,MAAM,CAAC,EAAE,EAAE,KAAK,IAAI,EAAE;QAC/E,OAAO,MAAM,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;YAC7B,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;YAC3B,MAAM,IAAI,CAAC,CAAC;YACZ,IAAI,IAAI,KAAK,SAAS;gBAAE,MAAM,MAAM,CAAC,IAAI,CAAC,CAAC;QAC7C,CAAC;IACH,CAAC,CAAC,CAAC;IACH,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;AAC7B,CAAC"}
|