@inbrowser/agent 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +2 -2
- package/dist/cli/commands/run.js.map +1 -1
- package/dist/cli/fixtures.d.ts +2 -2
- package/dist/cli/fixtures.d.ts.map +1 -1
- package/dist/cli/fixtures.js +7 -16
- package/dist/cli/fixtures.js.map +1 -1
- package/dist/cli/llm/openrouter.d.ts +4 -4
- package/dist/cli/llm/openrouter.d.ts.map +1 -1
- package/dist/cli/llm/openrouter.js +20 -31
- package/dist/cli/llm/openrouter.js.map +1 -1
- package/dist/diagnostics/index.d.ts +5 -0
- package/dist/diagnostics/index.d.ts.map +1 -0
- package/dist/diagnostics/index.js +3 -0
- package/dist/diagnostics/index.js.map +1 -0
- package/dist/diagnostics/timing.d.ts +48 -0
- package/dist/diagnostics/timing.d.ts.map +1 -0
- package/dist/diagnostics/timing.js +85 -0
- package/dist/diagnostics/timing.js.map +1 -0
- package/dist/diagnostics/truthfulness.d.ts +36 -0
- package/dist/diagnostics/truthfulness.d.ts.map +1 -0
- package/dist/diagnostics/truthfulness.js +180 -0
- package/dist/diagnostics/truthfulness.js.map +1 -0
- package/dist/dispatch-memoization.d.ts +84 -0
- package/dist/dispatch-memoization.d.ts.map +1 -0
- package/dist/dispatch-memoization.js +197 -0
- package/dist/dispatch-memoization.js.map +1 -0
- package/dist/eval/comparison-report.d.ts +164 -0
- package/dist/eval/comparison-report.d.ts.map +1 -0
- package/dist/eval/comparison-report.js +316 -0
- package/dist/eval/comparison-report.js.map +1 -0
- package/dist/eval/fixture.d.ts +74 -0
- package/dist/eval/fixture.d.ts.map +1 -0
- package/dist/eval/fixture.js +217 -0
- package/dist/eval/fixture.js.map +1 -0
- package/dist/eval/index.d.ts +13 -0
- package/dist/eval/index.d.ts.map +1 -0
- package/dist/eval/index.js +7 -0
- package/dist/eval/index.js.map +1 -0
- package/dist/eval/load-node.d.ts +16 -0
- package/dist/eval/load-node.d.ts.map +1 -0
- package/dist/eval/load-node.js +58 -0
- package/dist/eval/load-node.js.map +1 -0
- package/dist/eval/metric-collector.d.ts +209 -0
- package/dist/eval/metric-collector.d.ts.map +1 -0
- package/dist/eval/metric-collector.js +293 -0
- package/dist/eval/metric-collector.js.map +1 -0
- package/dist/eval/run-record.d.ts +76 -0
- package/dist/eval/run-record.d.ts.map +1 -0
- package/dist/eval/run-record.js +32 -0
- package/dist/eval/run-record.js.map +1 -0
- package/dist/eval/runner.d.ts +140 -0
- package/dist/eval/runner.d.ts.map +1 -0
- package/dist/eval/runner.js +310 -0
- package/dist/eval/runner.js.map +1 -0
- package/dist/eval/spec-framework.d.ts +113 -0
- package/dist/eval/spec-framework.d.ts.map +1 -0
- package/dist/eval/spec-framework.js +100 -0
- package/dist/eval/spec-framework.js.map +1 -0
- package/dist/eval/spec-helpers.d.ts +245 -0
- package/dist/eval/spec-helpers.d.ts.map +1 -0
- package/dist/eval/spec-helpers.js +605 -0
- package/dist/eval/spec-helpers.js.map +1 -0
- package/dist/index.d.ts +32 -8
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +14 -3
- package/dist/index.js.map +1 -1
- package/dist/llm-adapter.d.ts +30 -34
- package/dist/llm-adapter.d.ts.map +1 -1
- package/dist/llm-adapter.js +61 -51
- package/dist/llm-adapter.js.map +1 -1
- package/dist/mcp/connect.d.ts +68 -0
- package/dist/mcp/connect.d.ts.map +1 -0
- package/dist/mcp/connect.js +111 -0
- package/dist/mcp/connect.js.map +1 -0
- package/dist/metrics.js +4 -4
- package/dist/metrics.js.map +1 -1
- package/dist/node.d.ts +3 -0
- package/dist/node.d.ts.map +1 -1
- package/dist/node.js +2 -0
- package/dist/node.js.map +1 -1
- package/dist/planner-executor.d.ts +132 -0
- package/dist/planner-executor.d.ts.map +1 -0
- package/dist/planner-executor.js +274 -0
- package/dist/planner-executor.js.map +1 -0
- package/dist/retrieval.d.ts +74 -0
- package/dist/retrieval.d.ts.map +1 -0
- package/dist/retrieval.js +287 -0
- package/dist/retrieval.js.map +1 -0
- package/dist/session.d.ts.map +1 -1
- package/dist/session.js +8 -2
- package/dist/session.js.map +1 -1
- package/dist/skill-catalog.d.ts +81 -0
- package/dist/skill-catalog.d.ts.map +1 -0
- package/dist/skill-catalog.js +388 -0
- package/dist/skill-catalog.js.map +1 -0
- package/dist/skill-router.d.ts +95 -0
- package/dist/skill-router.d.ts.map +1 -0
- package/dist/skill-router.js +130 -0
- package/dist/skill-router.js.map +1 -0
- package/dist/strategy.d.ts +22 -2
- package/dist/strategy.d.ts.map +1 -1
- package/dist/strategy.js +358 -28
- package/dist/strategy.js.map +1 -1
- package/dist/tools.d.ts +15 -1
- package/dist/tools.d.ts.map +1 -1
- package/dist/tools.js +18 -0
- package/dist/tools.js.map +1 -1
- package/dist/types/agent.d.ts +2 -3
- package/dist/types/agent.d.ts.map +1 -1
- package/dist/types/agent.js +1 -1
- package/dist/types/chat.d.ts +0 -15
- package/dist/types/chat.d.ts.map +1 -1
- package/dist/types/llm.d.ts +11 -64
- package/dist/types/llm.d.ts.map +1 -1
- package/dist/types/llm.js +7 -8
- package/dist/types/llm.js.map +1 -1
- package/dist/types/metrics.d.ts +2 -2
- package/dist/types/metrics.d.ts.map +1 -1
- package/dist/types/session.d.ts +2 -2
- package/dist/types/session.d.ts.map +1 -1
- package/dist/types/strategy.d.ts +60 -3
- package/dist/types/strategy.d.ts.map +1 -1
- package/dist/types/tools.d.ts +18 -0
- package/dist/types/tools.d.ts.map +1 -1
- package/dist/types/trace.d.ts +67 -15
- package/dist/types/trace.d.ts.map +1 -1
- package/dist/types/trace.js +5 -3
- package/dist/types/trace.js.map +1 -1
- package/package.json +3 -2
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export { SKILL_NAMES, applyWorkspaceOverrides, parseFixture, validateFixture } from './fixture.js';
|
|
2
|
+
export { defaultSystemPromptBuilder, runFixture, runFixtures } from './runner.js';
|
|
3
|
+
export { createSpecRegistry, evaluateSpec } from './spec-framework.js';
|
|
4
|
+
export { aggregateTrials, collectMetrics, extractTrialMetrics } from './metric-collector.js';
|
|
5
|
+
export { POLARITY, compareMetrics, renderJson, renderMarkdown } from './comparison-report.js';
|
|
6
|
+
export { CUSTOM_SPEC_NAMES, SPEC_FINAL_RULES_EXCLUDES_LITERAL, SPEC_FINAL_RULES_INCLUDES_LITERAL, SPEC_FINAL_RUNTIME_RUN_SUMMARY_OK, SPEC_GAME_RULES_SIMULATOR_ACCEPTS_POSITIVE_AND_REJECTS_CHEAT, SPEC_PYRIC_AGENTS_LINT_CLEAN_AND_RULE_REJECTS_CHEAT, SPEC_REPORT_MENTIONS_ALL_OF, SPEC_REPORT_MENTIONS_AT_LEAST_ONE_OF, SPEC_TRACE_CONTAINS_TOOL_CALL_BY_NAME, STARTER_SPEC_NAMES, finalRulesExcludesLiteral, finalRulesIncludesLiteral, finalRuntimeRunSummaryOk, gameRulesSimulatorAcceptsPositiveAndRejectsCheat, pyricAgentsLintCleanAndRuleRejectsCheat, registerAllSpecs, registerCustomSpecs, registerStarterSpecs, reportMentionsAllOf, reportMentionsAtLeastOneOf, traceContainsToolCallByName, } from './spec-helpers.js';
|
|
7
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/eval/index.ts"],"names":[],"mappings":"AAQA,OAAO,EAAE,WAAW,EAAE,uBAAuB,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAQnG,OAAO,EAAE,0BAA0B,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAGlF,OAAO,EAAE,kBAAkB,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AASvE,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAW7F,OAAO,EAAE,QAAQ,EAAE,cAAc,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AAE9F,OAAO,EACL,iBAAiB,EACjB,iCAAiC,EACjC,iCAAiC,EACjC,iCAAiC,EACjC,4DAA4D,EAC5D,mDAAmD,EACnD,2BAA2B,EAC3B,oCAAoC,EACpC,qCAAqC,EACrC,kBAAkB,EAClB,yBAAyB,EACzB,yBAAyB,EACzB,wBAAwB,EACxB,gDAAgD,EAChD,uCAAuC,EACvC,gBAAgB,EAChB,mBAAmB,EACnB,oBAAoB,EACpB,mBAAmB,EACnB,0BAA0B,EAC1B,2BAA2B,GAC5B,MAAM,mBAAmB,CAAC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Node-only fixture file/directory loader.
|
|
3
|
+
*
|
|
4
|
+
* Browser-safe parsing and validation live in `./fixture.ts`. This
|
|
5
|
+
* module wraps those with `node:fs` reads. Imported by consumers via
|
|
6
|
+
* `@inbrowser/agent/node`, not the universal entry.
|
|
7
|
+
*/
|
|
8
|
+
import { type TaskFixture, type ValidationError } from './fixture.js';
|
|
9
|
+
export declare class FixtureLoadError extends Error {
|
|
10
|
+
readonly file: string;
|
|
11
|
+
readonly errors: ValidationError[];
|
|
12
|
+
constructor(file: string, errors: ValidationError[]);
|
|
13
|
+
}
|
|
14
|
+
export declare function loadFixture(filePath: string): TaskFixture;
|
|
15
|
+
export declare function loadFixtures(dirPath: string): TaskFixture[];
|
|
16
|
+
//# sourceMappingURL=load-node.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"load-node.d.ts","sourceRoot":"","sources":["../../src/eval/load-node.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAKH,OAAO,EAAE,KAAK,WAAW,EAAE,KAAK,eAAe,EAAgB,MAAM,cAAc,CAAC;AAEpF,qBAAa,gBAAiB,SAAQ,KAAK;IACzC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,eAAe,EAAE,CAAC;gBAEvB,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,eAAe,EAAE;CAOpD;AAED,wBAAgB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,WAAW,CAOzD;AAED,wBAAgB,YAAY,CAAC,OAAO,EAAE,MAAM,GAAG,WAAW,EAAE,CA8B3D"}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Node-only fixture file/directory loader.
|
|
3
|
+
*
|
|
4
|
+
* Browser-safe parsing and validation live in `./fixture.ts`. This
|
|
5
|
+
* module wraps those with `node:fs` reads. Imported by consumers via
|
|
6
|
+
* `@inbrowser/agent/node`, not the universal entry.
|
|
7
|
+
*/
|
|
8
|
+
import { readFileSync, readdirSync, statSync } from 'node:fs';
|
|
9
|
+
import { join } from 'node:path';
|
|
10
|
+
import { parseFixture } from './fixture.js';
|
|
11
|
+
export class FixtureLoadError extends Error {
|
|
12
|
+
file;
|
|
13
|
+
errors;
|
|
14
|
+
constructor(file, errors) {
|
|
15
|
+
const summary = errors.map((e) => ` - ${e.path ? `${e.path}: ` : ''}${e.message}`).join('\n');
|
|
16
|
+
super(`fixture "${file}" failed validation:\n${summary}`);
|
|
17
|
+
this.name = 'FixtureLoadError';
|
|
18
|
+
this.file = file;
|
|
19
|
+
this.errors = errors;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
export function loadFixture(filePath) {
|
|
23
|
+
const json = readFileSync(filePath, 'utf8');
|
|
24
|
+
const result = parseFixture(json);
|
|
25
|
+
if (!result.ok) {
|
|
26
|
+
throw new FixtureLoadError(filePath, result.errors);
|
|
27
|
+
}
|
|
28
|
+
return result.fixture;
|
|
29
|
+
}
|
|
30
|
+
export function loadFixtures(dirPath) {
|
|
31
|
+
const fixtures = [];
|
|
32
|
+
const failures = [];
|
|
33
|
+
for (const entry of readdirSync(dirPath)) {
|
|
34
|
+
if (!entry.endsWith('.fixture.json'))
|
|
35
|
+
continue;
|
|
36
|
+
const full = join(dirPath, entry);
|
|
37
|
+
if (!statSync(full).isFile())
|
|
38
|
+
continue;
|
|
39
|
+
const json = readFileSync(full, 'utf8');
|
|
40
|
+
const result = parseFixture(json);
|
|
41
|
+
if (!result.ok) {
|
|
42
|
+
failures.push({ file: full, errors: result.errors });
|
|
43
|
+
}
|
|
44
|
+
else {
|
|
45
|
+
fixtures.push(result.fixture);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
if (failures.length > 0) {
|
|
49
|
+
const summary = failures
|
|
50
|
+
.map((f) => `- ${f.file}:\n${f.errors
|
|
51
|
+
.map((e) => ` ${e.path ? `${e.path}: ` : ''}${e.message}`)
|
|
52
|
+
.join('\n')}`)
|
|
53
|
+
.join('\n');
|
|
54
|
+
throw new Error(`one or more fixtures failed validation:\n${summary}`);
|
|
55
|
+
}
|
|
56
|
+
return fixtures.sort((a, b) => a.id.localeCompare(b.id));
|
|
57
|
+
}
|
|
58
|
+
//# sourceMappingURL=load-node.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"load-node.js","sourceRoot":"","sources":["../../src/eval/load-node.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AAC9D,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,OAAO,EAA0C,YAAY,EAAE,MAAM,cAAc,CAAC;AAEpF,MAAM,OAAO,gBAAiB,SAAQ,KAAK;IAChC,IAAI,CAAS;IACb,MAAM,CAAoB;IAEnC,YAAY,IAAY,EAAE,MAAyB;QACjD,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC/F,KAAK,CAAC,YAAY,IAAI,yBAAyB,OAAO,EAAE,CAAC,CAAC;QAC1D,IAAI,CAAC,IAAI,GAAG,kBAAkB,CAAC;QAC/B,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACjB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;CACF;AAED,MAAM,UAAU,WAAW,CAAC,QAAgB;IAC1C,MAAM,IAAI,GAAG,YAAY,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IAC5C,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IAClC,IAAI,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC;QACf,MAAM,IAAI,gBAAgB,CAAC,QAAQ,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;IACtD,CAAC;IACD,OAAO,MAAM,CAAC,OAAO,CAAC;AACxB,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,OAAe;IAC1C,MAAM,QAAQ,GAAkB,EAAE,CAAC;IACnC,MAAM,QAAQ,GAAkD,EAAE,CAAC;IAEnE,KAAK,MAAM,KAAK,IAAI,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC;QACzC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,eAAe,CAAC;YAAE,SAAS;QAC/C,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QAClC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE;YAAE,SAAS;QACvC,MAAM,IAAI,GAAG,YAAY,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;QACxC,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;QAClC,IAAI,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC;YACf,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;QACvD,CAAC;aAAM,CAAC;YACN,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAChC,CAAC;IACH,CAAC;IAED,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxB,MAAM,OAAO,GAAG,QAAQ;aACrB,GAAG,CACF,CAAC,CAAC,EAAE,EAAE,CACJ,KAAK,CAAC,CAAC,IAAI,MAAM,CAAC,CAAC,MAAM;aACtB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,OAAO,EAAE,CAAC;aAC5D,IAAI,CAAC,IAAI,CAAC,EAAE,CAClB;aACA,IAAI,CAAC,IAAI,CAAC,CAAC;QACd,MAAM,IAAI,KAAK,CAAC,4CAA4C,OAAO,EAAE,CAAC,CAAC;IACzE,CAAC;IAED,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,aAAa,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;AAC3D,CAAC"}
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `collectMetrics` — the eval harness's metric extractor + aggregator.
|
|
3
|
+
*
|
|
4
|
+
* Bridges raw `RunRecord`s (the per-trial captures the runner produces)
|
|
5
|
+
* and the comparison report. Given an array of records, an optional
|
|
6
|
+
* parallel array of spec evaluations, and a tool registry to classify
|
|
7
|
+
* tool calls, the collector returns one `MetricsTable` per fixture.
|
|
8
|
+
* Each table carries:
|
|
9
|
+
*
|
|
10
|
+
* - a row per trial (`TrialMetrics`) with the eight phase-one metrics,
|
|
11
|
+
* - one aggregated row (`AggregatedMetrics`) summarising mean and
|
|
12
|
+
* N-1 standard deviation across trials.
|
|
13
|
+
*
|
|
14
|
+
* The eight metrics are extracted exactly as the implementation plan
|
|
15
|
+
* specifies:
|
|
16
|
+
*
|
|
17
|
+
* 1. `taskSuccess` — pulled from the supplied `SpecResult.ok`.
|
|
18
|
+
* 2. `wallClockMs` — `completedAt - startedAt`.
|
|
19
|
+
* 3. `promptTokens` — sum of `usage.promptTokens` across all
|
|
20
|
+
* `llm_response` events that carry usage.
|
|
21
|
+
* 4. `completionTokens` — sum of `usage.outputTokens` across the
|
|
22
|
+
* same events.
|
|
23
|
+
* 5. `toolCallCount` — total tool calls across `llm_response`
|
|
24
|
+
* events, split into `reads` vs
|
|
25
|
+
* `mutations` by the `parallelSafe` tag.
|
|
26
|
+
* Names not registered in the supplied
|
|
27
|
+
* tool registry count as mutations.
|
|
28
|
+
* 6. `turnCount` — count of distinct `requestId`s in the
|
|
29
|
+
* trace.
|
|
30
|
+
* 7. `peakContextWindowBytes` — max of `JSON.stringify(messages).length`
|
|
31
|
+
* across `llm_request` events.
|
|
32
|
+
* 8. `truthfulnessViolationRate` — `analyzeTruthfulness(trace).violationRate`.
|
|
33
|
+
* 9. `dispatchVsLlmRatio` — sum of `dispatchMs` / sum of `llmMs`
|
|
34
|
+
* across rows from `turnTimingTable(trace)`.
|
|
35
|
+
* `undefined` when either total is zero.
|
|
36
|
+
*
|
|
37
|
+
* Numeric metrics with no data resolve to `undefined`, not `0`, so a
|
|
38
|
+
* downstream consumer can distinguish "no data" from "really zero".
|
|
39
|
+
*
|
|
40
|
+
* Aggregation: mean is the arithmetic average across trials that have
|
|
41
|
+
* a defined value for the metric; `undefined` when no trial has data.
|
|
42
|
+
* Spread is the sample standard deviation (N-1 denominator). A single
|
|
43
|
+
* defined value yields `stdDev: 0`. `taskSuccess` aggregates as a
|
|
44
|
+
* success rate — booleans are cast to `0`/`1` before averaging.
|
|
45
|
+
*
|
|
46
|
+
* No comparison logic, no persistence. The comparison report is a
|
|
47
|
+
* separate branch (`eval/comparison-report`).
|
|
48
|
+
*
|
|
49
|
+
* Browser-safe — no Node imports, no provider-specific code.
|
|
50
|
+
*
|
|
51
|
+
* Note on naming: there are two `RunRecord` types in this package.
|
|
52
|
+
* This collector consumes the eval-harness one defined at
|
|
53
|
+
* `./run-record.js`. The package root re-exports it as
|
|
54
|
+
* `EvalRunRecord` so it does not collide with the unrelated
|
|
55
|
+
* per-MCP-tool-call `RunRecord` at `../metrics/runs.js`.
|
|
56
|
+
*/
|
|
57
|
+
import type { ToolRegistry } from '../types/tools.js';
|
|
58
|
+
import type { RunRecord } from './run-record.js';
|
|
59
|
+
import type { SpecResult } from './spec-framework.js';
|
|
60
|
+
/**
|
|
61
|
+
* Per-trial metric row. One per `RunRecord` consumed. Numeric metrics
|
|
62
|
+
* are `undefined` when the trial produced no data for them (e.g. no
|
|
63
|
+
* `llm_response.usage` events → `promptTokens: undefined`). `taskSuccess`
|
|
64
|
+
* is `undefined` when the caller passed no spec evaluation for the
|
|
65
|
+
* trial.
|
|
66
|
+
*/
|
|
67
|
+
export interface TrialMetrics {
|
|
68
|
+
/** Echoed from `record.fixture.id`. */
|
|
69
|
+
fixtureId: string;
|
|
70
|
+
/** Echoed from `record.trial`. */
|
|
71
|
+
trial: number;
|
|
72
|
+
/** Spec verdict for this trial. `undefined` when no evaluation was
|
|
73
|
+
* supplied or the supplied evaluation was `undefined`. */
|
|
74
|
+
taskSuccess: boolean | undefined;
|
|
75
|
+
/** `completedAt - startedAt` from the record. Always defined. */
|
|
76
|
+
wallClockMs: number;
|
|
77
|
+
/** Sum of `usage.promptTokens` across `llm_response` events that
|
|
78
|
+
* carry usage. `undefined` when no such event was emitted. */
|
|
79
|
+
promptTokens: number | undefined;
|
|
80
|
+
/** Sum of `usage.outputTokens` across `llm_response` events that
|
|
81
|
+
* carry usage. `undefined` when no such event was emitted. */
|
|
82
|
+
completionTokens: number | undefined;
|
|
83
|
+
/** Total tool calls + read/mutation split. `total` is `undefined`
|
|
84
|
+
* when no `llm_response` event carried any tool calls (a no-tool
|
|
85
|
+
* run is not the same as a run that emitted zero tool calls
|
|
86
|
+
* unintentionally — but at the extraction layer both look the
|
|
87
|
+
* same; downstream can decide). */
|
|
88
|
+
toolCallCount: {
|
|
89
|
+
total: number | undefined;
|
|
90
|
+
reads: number | undefined;
|
|
91
|
+
mutations: number | undefined;
|
|
92
|
+
};
|
|
93
|
+
/** Count of distinct `requestId` values across the trace.
|
|
94
|
+
* `undefined` when the trace contains no LLM events. */
|
|
95
|
+
turnCount: number | undefined;
|
|
96
|
+
/** Max of `JSON.stringify(messages).length` across the trace's
|
|
97
|
+
* `llm_request` events. `undefined` when no such event exists. */
|
|
98
|
+
peakContextWindowBytes: number | undefined;
|
|
99
|
+
/** `analyzeTruthfulness(trace).violationRate`. `undefined` when
|
|
100
|
+
* the trace contains no assistant turns (i.e. nothing to score). */
|
|
101
|
+
truthfulnessViolationRate: number | undefined;
|
|
102
|
+
/** Sum-of-dispatchMs divided by sum-of-llmMs across turn-timing
|
|
103
|
+
* rows. `undefined` when either sum is zero. */
|
|
104
|
+
dispatchVsLlmRatio: number | undefined;
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Aggregate of one numeric column across the trials of a fixture.
|
|
108
|
+
*
|
|
109
|
+
* `mean` is the arithmetic average across trials that had a defined
|
|
110
|
+
* value for the column. `stdDev` is the sample (N-1) standard
|
|
111
|
+
* deviation across the same trials. Both fields are `undefined` when
|
|
112
|
+
* no trial had data for the column. A single defined value yields
|
|
113
|
+
* `mean` equal to that value and `stdDev: 0`.
|
|
114
|
+
*
|
|
115
|
+
* `count` reports how many trials contributed a defined value, which
|
|
116
|
+
* a downstream report needs to weight or warn about thin samples.
|
|
117
|
+
*/
|
|
118
|
+
export interface AggregateStat {
|
|
119
|
+
mean: number | undefined;
|
|
120
|
+
stdDev: number | undefined;
|
|
121
|
+
count: number;
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* One row per fixture summarising mean + spread across its trials.
|
|
125
|
+
* `taskSuccessRate` is the mean of booleans cast to `0`/`1`. The
|
|
126
|
+
* remaining columns are sample-stat aggregates of the numeric trial
|
|
127
|
+
* metrics. Read/mutation totals follow the same shape as their
|
|
128
|
+
* per-trial counterpart.
|
|
129
|
+
*/
|
|
130
|
+
export interface AggregatedMetrics {
|
|
131
|
+
/** Echoed from the fixture id. */
|
|
132
|
+
fixtureId: string;
|
|
133
|
+
/** Number of trials contributing to this row. */
|
|
134
|
+
trials: number;
|
|
135
|
+
taskSuccessRate: AggregateStat;
|
|
136
|
+
wallClockMs: AggregateStat;
|
|
137
|
+
promptTokens: AggregateStat;
|
|
138
|
+
completionTokens: AggregateStat;
|
|
139
|
+
toolCallCount: {
|
|
140
|
+
total: AggregateStat;
|
|
141
|
+
reads: AggregateStat;
|
|
142
|
+
mutations: AggregateStat;
|
|
143
|
+
};
|
|
144
|
+
turnCount: AggregateStat;
|
|
145
|
+
peakContextWindowBytes: AggregateStat;
|
|
146
|
+
truthfulnessViolationRate: AggregateStat;
|
|
147
|
+
dispatchVsLlmRatio: AggregateStat;
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* One fixture's per-trial rows plus its aggregated row. The
|
|
151
|
+
* comparison report consumes a pair of these (baseline vs variant)
|
|
152
|
+
* and decides whether the variant moved the needle.
|
|
153
|
+
*/
|
|
154
|
+
export interface MetricsTable {
|
|
155
|
+
fixtureId: string;
|
|
156
|
+
trials: TrialMetrics[];
|
|
157
|
+
aggregate: AggregatedMetrics;
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Input to `collectMetrics`. `evaluations` is positionally parallel
|
|
161
|
+
* to `records` — index `i` of `evaluations` is the spec result for
|
|
162
|
+
* `records[i]`. A missing entry (either the array is shorter or the
|
|
163
|
+
* slot is `undefined`) leaves `taskSuccess` undefined for that trial.
|
|
164
|
+
*
|
|
165
|
+
* `toolRegistry` is consulted to classify each emitted tool call as a
|
|
166
|
+
* read (parallel-safe) or a mutation (not parallel-safe). Tools whose
|
|
167
|
+
* name is not registered count as mutations.
|
|
168
|
+
*/
|
|
169
|
+
export interface CollectMetricsInput {
|
|
170
|
+
/** The per-trial captures from `runFixture` / `runFixtures`. Order
|
|
171
|
+
* is preserved in the returned tables. */
|
|
172
|
+
records: readonly RunRecord[];
|
|
173
|
+
/** Parallel to `records`. Optional. `undefined` slots and a shorter
|
|
174
|
+
* array both translate to `taskSuccess: undefined` on the row. */
|
|
175
|
+
evaluations?: readonly (SpecResult | undefined)[];
|
|
176
|
+
/** Source of truth for `parallelSafe` tags. The collector reads it
|
|
177
|
+
* via `registry.list()` once; the returned handlers are scanned by
|
|
178
|
+
* `name`. */
|
|
179
|
+
toolRegistry: ToolRegistry;
|
|
180
|
+
}
|
|
181
|
+
/**
|
|
182
|
+
* Compute one `MetricsTable` per fixture from a flat batch of
|
|
183
|
+
* `RunRecord`s. Records are grouped by `fixture.id` in first-seen
|
|
184
|
+
* order; within a group trials are kept in input order. The returned
|
|
185
|
+
* array preserves fixture order from the input.
|
|
186
|
+
*
|
|
187
|
+
* `evaluations` (when supplied) is consumed positionally — index `i`
|
|
188
|
+
* pairs with `records[i]`. A missing slot leaves the trial's
|
|
189
|
+
* `taskSuccess` undefined.
|
|
190
|
+
*
|
|
191
|
+
* Never throws on missing data: every metric extractor degrades to
|
|
192
|
+
* `undefined` rather than throwing. A malformed trace (e.g. an
|
|
193
|
+
* `llm_response` with no `usage`) just contributes nothing to the
|
|
194
|
+
* affected column.
|
|
195
|
+
*/
|
|
196
|
+
export declare function collectMetrics(input: CollectMetricsInput): MetricsTable[];
|
|
197
|
+
/**
|
|
198
|
+
* Extract a single `TrialMetrics` row from a record + optional
|
|
199
|
+
* evaluation. Exported for tests that want to exercise the eight
|
|
200
|
+
* extractors against a hand-built record without going through the
|
|
201
|
+
* fixture-grouping layer.
|
|
202
|
+
*/
|
|
203
|
+
export declare function extractTrialMetrics(record: RunRecord, evaluation: SpecResult | undefined, toolRegistry: ToolRegistry): TrialMetrics;
|
|
204
|
+
/**
|
|
205
|
+
* Aggregate a list of `TrialMetrics` for a single fixture. Exported
|
|
206
|
+
* for tests that want to exercise aggregation in isolation.
|
|
207
|
+
*/
|
|
208
|
+
export declare function aggregateTrials(fixtureId: string, trials: readonly TrialMetrics[]): AggregatedMetrics;
|
|
209
|
+
//# sourceMappingURL=metric-collector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"metric-collector.d.ts","sourceRoot":"","sources":["../../src/eval/metric-collector.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAuDG;AAKH,OAAO,KAAK,EAAe,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAEnE,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AAEtD;;;;;;GAMG;AACH,MAAM,WAAW,YAAY;IAC3B,uCAAuC;IACvC,SAAS,EAAE,MAAM,CAAC;IAClB,kCAAkC;IAClC,KAAK,EAAE,MAAM,CAAC;IACd;+DAC2D;IAC3D,WAAW,EAAE,OAAO,GAAG,SAAS,CAAC;IACjC,iEAAiE;IACjE,WAAW,EAAE,MAAM,CAAC;IACpB;mEAC+D;IAC/D,YAAY,EAAE,MAAM,GAAG,SAAS,CAAC;IACjC;mEAC+D;IAC/D,gBAAgB,EAAE,MAAM,GAAG,SAAS,CAAC;IACrC;;;;wCAIoC;IACpC,aAAa,EAAE;QACb,KAAK,EAAE,MAAM,GAAG,SAAS,CAAC;QAC1B,KAAK,EAAE,MAAM,GAAG,SAAS,CAAC;QAC1B,SAAS,EAAE,MAAM,GAAG,SAAS,CAAC;KAC/B,CAAC;IACF;6DACyD;IACzD,SAAS,EAAE,MAAM,GAAG,SAAS,CAAC;IAC9B;uEACmE;IACnE,sBAAsB,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3C;yEACqE;IACrE,yBAAyB,EAAE,MAAM,GAAG,SAAS,CAAC;IAC9C;qDACiD;IACjD,kBAAkB,EAAE,MAAM,GAAG,SAAS,CAAC;CACxC;AAED;;;;;;;;;;;GAWG;AACH,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,GAAG,SAAS,CAAC;IACzB,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,KAAK,EAAE,MAAM,CAAC;CACf;AAED;;;;;;GAMG;AACH,MAAM,WAAW,iBAAiB;IAChC,kCAAkC;IAClC,SAAS,EAAE,MAAM,CAAC;IAClB,iDAAiD;IACjD,MAAM,EAAE,MAAM,CAAC;IACf,eAAe,EAAE,aAAa,CAAC;IAC/B,WAAW,EAAE,aAAa,CAAC;IAC3B,YAAY,EAAE,aAAa,CAAC;IAC5B,gBAAgB,EAAE,aAAa,CAAC;IAChC,aAAa,EAAE;QACb,KAAK,EAAE,aAAa,CAAC;QACrB,KAAK,EAAE,aAAa,CAAC;QACrB,SAAS,EAAE,aAAa,CAAC;KAC1B,CAAC;IACF,SAAS,EAAE,aAAa,CAAC;IACzB,sBAAsB,EAAE,aAAa,CAAC;IACtC,yBAAyB,EAAE,aAAa,CAAC;IACzC,kBAAkB,EAAE,aAAa,CAAC;CACnC;AAED;;;;GAIG;AACH,MAAM,WAAW,YAAY;IAC3B,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,YAAY,EAAE,CAAC;IACvB,SAAS,EAAE,iBAAiB,CAAC;CAC9B;AAED;;;;;;;;;GASG;AACH,MAAM,WAAW,mBAAmB;IAClC;+CAC2C;IAC3C,OAAO,EAAE,SAAS,SAAS,EAAE,CAAC;IAC9B;uEACmE;IACnE,WAAW,CAAC,EAAE,SAAS,CAAC,UAAU,GAAG,SAAS,CAAC,EAAE,CAAC;IAClD;;kBAEc;IACd,YAAY,EAAE,YAAY,CAAC;CAC5B;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE,mBAAmB,GAAG,YAAY,EAAE,CAqCzE;AAED;;;;;GAKG;AACH,wBAAgB,mBAAmB,CACjC,MAAM,EAAE,SAAS,EACjB,UAAU,EAAE,UAAU,GAAG,SAAS,EAClC,YAAY,EAAE,YAAY,GACzB,YAAY,CAEd;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAC7B,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,SAAS,YAAY,EAAE,GAC9B,iBAAiB,CAoBnB"}
|
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `collectMetrics` — the eval harness's metric extractor + aggregator.
|
|
3
|
+
*
|
|
4
|
+
* Bridges raw `RunRecord`s (the per-trial captures the runner produces)
|
|
5
|
+
* and the comparison report. Given an array of records, an optional
|
|
6
|
+
* parallel array of spec evaluations, and a tool registry to classify
|
|
7
|
+
* tool calls, the collector returns one `MetricsTable` per fixture.
|
|
8
|
+
* Each table carries:
|
|
9
|
+
*
|
|
10
|
+
* - a row per trial (`TrialMetrics`) with the eight phase-one metrics,
|
|
11
|
+
* - one aggregated row (`AggregatedMetrics`) summarising mean and
|
|
12
|
+
* N-1 standard deviation across trials.
|
|
13
|
+
*
|
|
14
|
+
* The eight metrics are extracted exactly as the implementation plan
|
|
15
|
+
* specifies:
|
|
16
|
+
*
|
|
17
|
+
* 1. `taskSuccess` — pulled from the supplied `SpecResult.ok`.
|
|
18
|
+
* 2. `wallClockMs` — `completedAt - startedAt`.
|
|
19
|
+
* 3. `promptTokens` — sum of `usage.promptTokens` across all
|
|
20
|
+
* `llm_response` events that carry usage.
|
|
21
|
+
* 4. `completionTokens` — sum of `usage.outputTokens` across the
|
|
22
|
+
* same events.
|
|
23
|
+
* 5. `toolCallCount` — total tool calls across `llm_response`
|
|
24
|
+
* events, split into `reads` vs
|
|
25
|
+
* `mutations` by the `parallelSafe` tag.
|
|
26
|
+
* Names not registered in the supplied
|
|
27
|
+
* tool registry count as mutations.
|
|
28
|
+
* 6. `turnCount` — count of distinct `requestId`s in the
|
|
29
|
+
* trace.
|
|
30
|
+
* 7. `peakContextWindowBytes` — max of `JSON.stringify(messages).length`
|
|
31
|
+
* across `llm_request` events.
|
|
32
|
+
* 8. `truthfulnessViolationRate` — `analyzeTruthfulness(trace).violationRate`.
|
|
33
|
+
* 9. `dispatchVsLlmRatio` — sum of `dispatchMs` / sum of `llmMs`
|
|
34
|
+
* across rows from `turnTimingTable(trace)`.
|
|
35
|
+
* `undefined` when either total is zero.
|
|
36
|
+
*
|
|
37
|
+
* Numeric metrics with no data resolve to `undefined`, not `0`, so a
|
|
38
|
+
* downstream consumer can distinguish "no data" from "really zero".
|
|
39
|
+
*
|
|
40
|
+
* Aggregation: mean is the arithmetic average across trials that have
|
|
41
|
+
* a defined value for the metric; `undefined` when no trial has data.
|
|
42
|
+
* Spread is the sample standard deviation (N-1 denominator). A single
|
|
43
|
+
* defined value yields `stdDev: 0`. `taskSuccess` aggregates as a
|
|
44
|
+
* success rate — booleans are cast to `0`/`1` before averaging.
|
|
45
|
+
*
|
|
46
|
+
* No comparison logic, no persistence. The comparison report is a
|
|
47
|
+
* separate branch (`eval/comparison-report`).
|
|
48
|
+
*
|
|
49
|
+
* Browser-safe — no Node imports, no provider-specific code.
|
|
50
|
+
*
|
|
51
|
+
* Note on naming: there are two `RunRecord` types in this package.
|
|
52
|
+
* This collector consumes the eval-harness one defined at
|
|
53
|
+
* `./run-record.js`. The package root re-exports it as
|
|
54
|
+
* `EvalRunRecord` so it does not collide with the unrelated
|
|
55
|
+
* per-MCP-tool-call `RunRecord` at `../metrics/runs.js`.
|
|
56
|
+
*/
|
|
57
|
+
import { turnTimingTable } from '../diagnostics/timing.js';
|
|
58
|
+
import { analyzeTruthfulness } from '../diagnostics/truthfulness.js';
|
|
59
|
+
import { isParallelSafe } from '../tools.js';
|
|
60
|
+
/**
|
|
61
|
+
* Compute one `MetricsTable` per fixture from a flat batch of
|
|
62
|
+
* `RunRecord`s. Records are grouped by `fixture.id` in first-seen
|
|
63
|
+
* order; within a group trials are kept in input order. The returned
|
|
64
|
+
* array preserves fixture order from the input.
|
|
65
|
+
*
|
|
66
|
+
* `evaluations` (when supplied) is consumed positionally — index `i`
|
|
67
|
+
* pairs with `records[i]`. A missing slot leaves the trial's
|
|
68
|
+
* `taskSuccess` undefined.
|
|
69
|
+
*
|
|
70
|
+
* Never throws on missing data: every metric extractor degrades to
|
|
71
|
+
* `undefined` rather than throwing. A malformed trace (e.g. an
|
|
72
|
+
* `llm_response` with no `usage`) just contributes nothing to the
|
|
73
|
+
* affected column.
|
|
74
|
+
*/
|
|
75
|
+
export function collectMetrics(input) {
|
|
76
|
+
const { records, evaluations, toolRegistry } = input;
|
|
77
|
+
const readNameSet = buildReadNameSet(toolRegistry);
|
|
78
|
+
// First-seen fixture order. We keep an ordered list of ids alongside
|
|
79
|
+
// a per-id bucket so the returned `MetricsTable[]` preserves the
|
|
80
|
+
// caller's fixture ordering rather than relying on `Map` iteration
|
|
81
|
+
// (which is insertion-ordered in v8 but explicit is clearer).
|
|
82
|
+
const order = [];
|
|
83
|
+
const buckets = new Map();
|
|
84
|
+
for (let i = 0; i < records.length; i++) {
|
|
85
|
+
const record = records[i];
|
|
86
|
+
if (!record)
|
|
87
|
+
continue;
|
|
88
|
+
const evaluation = evaluations?.[i];
|
|
89
|
+
const trial = extractTrial(record, evaluation, readNameSet);
|
|
90
|
+
const fixtureId = trial.fixtureId;
|
|
91
|
+
let bucket = buckets.get(fixtureId);
|
|
92
|
+
if (!bucket) {
|
|
93
|
+
bucket = [];
|
|
94
|
+
buckets.set(fixtureId, bucket);
|
|
95
|
+
order.push(fixtureId);
|
|
96
|
+
}
|
|
97
|
+
bucket.push(trial);
|
|
98
|
+
}
|
|
99
|
+
const tables = [];
|
|
100
|
+
for (const fixtureId of order) {
|
|
101
|
+
const trials = buckets.get(fixtureId);
|
|
102
|
+
if (!trials || trials.length === 0)
|
|
103
|
+
continue;
|
|
104
|
+
tables.push({
|
|
105
|
+
fixtureId,
|
|
106
|
+
trials,
|
|
107
|
+
aggregate: aggregateTrials(fixtureId, trials),
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
return tables;
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Extract a single `TrialMetrics` row from a record + optional
|
|
114
|
+
* evaluation. Exported for tests that want to exercise the eight
|
|
115
|
+
* extractors against a hand-built record without going through the
|
|
116
|
+
* fixture-grouping layer.
|
|
117
|
+
*/
|
|
118
|
+
export function extractTrialMetrics(record, evaluation, toolRegistry) {
|
|
119
|
+
return extractTrial(record, evaluation, buildReadNameSet(toolRegistry));
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Aggregate a list of `TrialMetrics` for a single fixture. Exported
|
|
123
|
+
* for tests that want to exercise aggregation in isolation.
|
|
124
|
+
*/
|
|
125
|
+
export function aggregateTrials(fixtureId, trials) {
|
|
126
|
+
return {
|
|
127
|
+
fixtureId,
|
|
128
|
+
trials: trials.length,
|
|
129
|
+
taskSuccessRate: aggregateSamples(trials.map((t) => (t.taskSuccess === undefined ? undefined : t.taskSuccess ? 1 : 0))),
|
|
130
|
+
wallClockMs: aggregateSamples(trials.map((t) => t.wallClockMs)),
|
|
131
|
+
promptTokens: aggregateSamples(trials.map((t) => t.promptTokens)),
|
|
132
|
+
completionTokens: aggregateSamples(trials.map((t) => t.completionTokens)),
|
|
133
|
+
toolCallCount: {
|
|
134
|
+
total: aggregateSamples(trials.map((t) => t.toolCallCount.total)),
|
|
135
|
+
reads: aggregateSamples(trials.map((t) => t.toolCallCount.reads)),
|
|
136
|
+
mutations: aggregateSamples(trials.map((t) => t.toolCallCount.mutations)),
|
|
137
|
+
},
|
|
138
|
+
turnCount: aggregateSamples(trials.map((t) => t.turnCount)),
|
|
139
|
+
peakContextWindowBytes: aggregateSamples(trials.map((t) => t.peakContextWindowBytes)),
|
|
140
|
+
truthfulnessViolationRate: aggregateSamples(trials.map((t) => t.truthfulnessViolationRate)),
|
|
141
|
+
dispatchVsLlmRatio: aggregateSamples(trials.map((t) => t.dispatchVsLlmRatio)),
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
// ---------- internals ----------
|
|
145
|
+
function extractTrial(record, evaluation, readNameSet) {
|
|
146
|
+
const trace = record.trace;
|
|
147
|
+
const tokens = sumTokens(trace);
|
|
148
|
+
const toolCalls = countToolCalls(trace, readNameSet);
|
|
149
|
+
const turnCount = countDistinctRequestIds(trace);
|
|
150
|
+
const peakContextWindowBytes = peakContextBytes(trace);
|
|
151
|
+
const truthfulnessViolationRate = computeTruthfulness(trace);
|
|
152
|
+
const dispatchVsLlmRatio = computeDispatchVsLlmRatio(trace);
|
|
153
|
+
return {
|
|
154
|
+
fixtureId: record.fixture.id,
|
|
155
|
+
trial: record.trial,
|
|
156
|
+
taskSuccess: evaluation === undefined ? undefined : evaluation.ok,
|
|
157
|
+
wallClockMs: record.completedAt - record.startedAt,
|
|
158
|
+
promptTokens: tokens.promptTokens,
|
|
159
|
+
completionTokens: tokens.completionTokens,
|
|
160
|
+
toolCallCount: toolCalls,
|
|
161
|
+
turnCount,
|
|
162
|
+
peakContextWindowBytes,
|
|
163
|
+
truthfulnessViolationRate,
|
|
164
|
+
dispatchVsLlmRatio,
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
/** Build the set of registered tool names whose handler is
|
|
168
|
+
* `parallelSafe`. A name absent from this set is treated as a
|
|
169
|
+
* mutation (or unknown → mutation, per the brief). */
|
|
170
|
+
function buildReadNameSet(registry) {
|
|
171
|
+
const reads = new Set();
|
|
172
|
+
const handlers = registry.list();
|
|
173
|
+
for (const h of handlers) {
|
|
174
|
+
if (isParallelSafe(h))
|
|
175
|
+
reads.add(h.name);
|
|
176
|
+
}
|
|
177
|
+
return reads;
|
|
178
|
+
}
|
|
179
|
+
function sumTokens(trace) {
|
|
180
|
+
let prompt;
|
|
181
|
+
let completion;
|
|
182
|
+
for (const ev of trace) {
|
|
183
|
+
if (ev.kind !== 'llm_response')
|
|
184
|
+
continue;
|
|
185
|
+
const usage = ev.data.usage;
|
|
186
|
+
if (!usage)
|
|
187
|
+
continue;
|
|
188
|
+
prompt = (prompt ?? 0) + (usage.promptTokens ?? 0);
|
|
189
|
+
completion = (completion ?? 0) + (usage.outputTokens ?? 0);
|
|
190
|
+
}
|
|
191
|
+
return { promptTokens: prompt, completionTokens: completion };
|
|
192
|
+
}
|
|
193
|
+
function countToolCalls(trace, readNameSet) {
|
|
194
|
+
let total = 0;
|
|
195
|
+
let reads = 0;
|
|
196
|
+
let mutations = 0;
|
|
197
|
+
let sawAny = false;
|
|
198
|
+
for (const ev of trace) {
|
|
199
|
+
if (ev.kind !== 'llm_response')
|
|
200
|
+
continue;
|
|
201
|
+
sawAny = true;
|
|
202
|
+
for (const call of ev.data.toolCalls) {
|
|
203
|
+
total += 1;
|
|
204
|
+
if (readNameSet.has(call.name))
|
|
205
|
+
reads += 1;
|
|
206
|
+
else
|
|
207
|
+
mutations += 1;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
if (!sawAny) {
|
|
211
|
+
// No `llm_response` events at all — undefined is the right signal
|
|
212
|
+
// for "no data". Distinguishes a fully aborted trial from one
|
|
213
|
+
// that simply emitted zero tool calls.
|
|
214
|
+
return { total: undefined, reads: undefined, mutations: undefined };
|
|
215
|
+
}
|
|
216
|
+
return { total, reads, mutations };
|
|
217
|
+
}
|
|
218
|
+
function countDistinctRequestIds(trace) {
|
|
219
|
+
const ids = new Set();
|
|
220
|
+
for (const ev of trace) {
|
|
221
|
+
if (ev.kind === 'llm_request' ||
|
|
222
|
+
ev.kind === 'llm_response' ||
|
|
223
|
+
ev.kind === 'turn_dispatch_complete') {
|
|
224
|
+
ids.add(ev.data.requestId);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
return ids.size === 0 ? undefined : ids.size;
|
|
228
|
+
}
|
|
229
|
+
function peakContextBytes(trace) {
|
|
230
|
+
let peak;
|
|
231
|
+
for (const ev of trace) {
|
|
232
|
+
if (ev.kind !== 'llm_request')
|
|
233
|
+
continue;
|
|
234
|
+
const bytes = JSON.stringify(ev.data.messages).length;
|
|
235
|
+
if (peak === undefined || bytes > peak)
|
|
236
|
+
peak = bytes;
|
|
237
|
+
}
|
|
238
|
+
return peak;
|
|
239
|
+
}
|
|
240
|
+
function computeTruthfulness(trace) {
|
|
241
|
+
// `analyzeTruthfulness` returns `violationRate: 0` when there are
|
|
242
|
+
// zero assistant turns to score. The brief says "no data → undefined,
|
|
243
|
+
// not zero", so we surface undefined in that degenerate case.
|
|
244
|
+
const report = analyzeTruthfulness(trace);
|
|
245
|
+
if (report.totalAssistantTurns === 0)
|
|
246
|
+
return undefined;
|
|
247
|
+
return report.violationRate;
|
|
248
|
+
}
|
|
249
|
+
function computeDispatchVsLlmRatio(trace) {
|
|
250
|
+
const rows = turnTimingTable(trace);
|
|
251
|
+
let llmSum = 0;
|
|
252
|
+
let dispatchSum = 0;
|
|
253
|
+
for (const row of rows) {
|
|
254
|
+
if (typeof row.llmMs === 'number')
|
|
255
|
+
llmSum += row.llmMs;
|
|
256
|
+
if (typeof row.dispatchMs === 'number')
|
|
257
|
+
dispatchSum += row.dispatchMs;
|
|
258
|
+
}
|
|
259
|
+
if (llmSum === 0 || dispatchSum === 0)
|
|
260
|
+
return undefined;
|
|
261
|
+
return dispatchSum / llmSum;
|
|
262
|
+
}
|
|
263
|
+
/**
|
|
264
|
+
* Aggregate an array of optional samples. Returns `mean` and `stdDev`
|
|
265
|
+
* across the defined entries. Sample standard deviation uses an N-1
|
|
266
|
+
* denominator; a single defined sample yields `stdDev: 0`. When zero
|
|
267
|
+
* samples are defined, both `mean` and `stdDev` are `undefined`.
|
|
268
|
+
*/
|
|
269
|
+
function aggregateSamples(samples) {
|
|
270
|
+
const defined = [];
|
|
271
|
+
for (const s of samples) {
|
|
272
|
+
if (typeof s === 'number' && Number.isFinite(s))
|
|
273
|
+
defined.push(s);
|
|
274
|
+
}
|
|
275
|
+
const count = defined.length;
|
|
276
|
+
if (count === 0)
|
|
277
|
+
return { mean: undefined, stdDev: undefined, count: 0 };
|
|
278
|
+
let sum = 0;
|
|
279
|
+
for (const v of defined)
|
|
280
|
+
sum += v;
|
|
281
|
+
const mean = sum / count;
|
|
282
|
+
if (count === 1)
|
|
283
|
+
return { mean, stdDev: 0, count };
|
|
284
|
+
let sqSum = 0;
|
|
285
|
+
for (const v of defined) {
|
|
286
|
+
const d = v - mean;
|
|
287
|
+
sqSum += d * d;
|
|
288
|
+
}
|
|
289
|
+
// N-1 denominator. Guaranteed `count >= 2` here.
|
|
290
|
+
const stdDev = Math.sqrt(sqSum / (count - 1));
|
|
291
|
+
return { mean, stdDev, count };
|
|
292
|
+
}
|
|
293
|
+
//# sourceMappingURL=metric-collector.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"metric-collector.js","sourceRoot":"","sources":["../../src/eval/metric-collector.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAuDG;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,0BAA0B,CAAC;AAC3D,OAAO,EAAE,mBAAmB,EAAE,MAAM,gCAAgC,CAAC;AACrE,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAoI7C;;;;;;;;;;;;;;GAcG;AACH,MAAM,UAAU,cAAc,CAAC,KAA0B;IACvD,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,GAAG,KAAK,CAAC;IACrD,MAAM,WAAW,GAAG,gBAAgB,CAAC,YAAY,CAAC,CAAC;IAEnD,qEAAqE;IACrE,iEAAiE;IACjE,mEAAmE;IACnE,8DAA8D;IAC9D,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,OAAO,GAAG,IAAI,GAAG,EAA0B,CAAC;IAElD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,MAAM,MAAM,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;QAC1B,IAAI,CAAC,MAAM;YAAE,SAAS;QACtB,MAAM,UAAU,GAAG,WAAW,EAAE,CAAC,CAAC,CAAC,CAAC;QACpC,MAAM,KAAK,GAAG,YAAY,CAAC,MAAM,EAAE,UAAU,EAAE,WAAW,CAAC,CAAC;QAC5D,MAAM,SAAS,GAAG,KAAK,CAAC,SAAS,CAAC;QAClC,IAAI,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACpC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,GAAG,EAAE,CAAC;YACZ,OAAO,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;YAC/B,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACxB,CAAC;QACD,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACrB,CAAC;IAED,MAAM,MAAM,GAAmB,EAAE,CAAC;IAClC,KAAK,MAAM,SAAS,IAAI,KAAK,EAAE,CAAC;QAC9B,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACtC,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAC7C,MAAM,CAAC,IAAI,CAAC;YACV,SAAS;YACT,MAAM;YACN,SAAS,EAAE,eAAe,CAAC,SAAS,EAAE,MAAM,CAAC;SAC9C,CAAC,CAAC;IACL,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,mBAAmB,CACjC,MAAiB,EACjB,UAAkC,EAClC,YAA0B;IAE1B,OAAO,YAAY,CAAC,MAAM,EAAE,UAAU,EAAE,gBAAgB,CAAC,YAAY,CAAC,CAAC,CAAC;AAC1E,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,eAAe,CAC7B,SAAiB,EACjB,MAA+B;IAE/B,OAAO;QACL,SAAS;QACT,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,eAAe,EAAE,gBAAgB,CAC/B,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,KAAK,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CACrF;QACD,WAAW,EAAE,gBAAgB,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;QAC/D,YAAY,EAAE,gBAAgB,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC;QACjE,gBAAgB,EAAE,gBAAgB,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,gBAAgB,CAAC,CAAC;QACzE,aAAa,EAAE;YACb,KAAK,EAAE,gBAAgB,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YACjE,KAAK,EAAE,gBAAgB,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YACjE,SAAS,EAAE,gBAAgB,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;SAC1E;QACD,SAAS,EAAE,gBAAgB,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;QAC3D,sBAAsB,EAAE,gBAAgB,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,sBAAsB,CAAC,CAAC;QACrF,yBAAyB,EAAE,gBAAgB,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,yBAAyB,CAAC,CAAC;QAC3F,kBAAkB,EAAE,gBAAgB,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC;KAC9E,CAAC;AACJ,CAAC;AAED,kCAAkC;AAElC,SAAS,YAAY,CACnB,MAAiB,EACjB,UAAkC,EAClC,WAAgC;IAEhC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC;IAC3B,MAAM,MAAM,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC;IAChC,MAAM,SAAS,GAAG,cAAc,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;IACrD,MAAM,SAAS,GAAG,uBAAuB,CAAC,KAAK,CAAC,CAAC;IACjD,MAAM,sBAAsB,GAAG,gBAAgB,CAAC,KAAK,CAAC,CAAC;IACvD,MAAM,yBAAyB,GAAG,mBAAmB,CAAC,KAAK,CAAC,CAAC;IAC7D,MAAM,kBAAkB,GAAG,yBAAyB,CAAC,KAAK,CAAC,CAAC;IAE5D,OAAO;QACL,SAAS,EAAE,MAAM,CAAC,OAAO,CAAC,EAAE;QAC5B,KAAK,EAAE,MAAM,CAAC,KAAK;QACnB,WAAW,EAAE,UAAU,KAAK,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,UAAU,CAAC,EAAE;QACjE,WAAW,EAAE,MAAM,CAAC,WAAW,GAAG,MAAM,CAAC,SAAS;QAClD,YAAY,EAAE,MAAM,CAAC,YAAY;QACjC,gBAAgB,EAAE,MAAM,CAAC,gBAAgB;QACzC,aAAa,EAAE,SAAS;QACxB,SAAS;QACT,sBAAsB;QACtB,yBAAyB;QACzB,kBAAkB;KACnB,CAAC;AACJ,CAAC;AAED;;uDAEuD;AACvD,SAAS,gBAAgB,CAAC,QAAsB;IAC9C,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAChC,MAAM,QAAQ,GAAkB,QAAQ,CAAC,IAAI,EAAE,CAAC;IAChD,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,IAAI,cAAc,CAAC,CAAC,CAAC;YAAE,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAC3C,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,SAAS,CAAC,KAA4B;IAI7C,IAAI,MAA0B,CAAC;IAC/B,IAAI,UAA8B,CAAC;IACnC,KAAK,MAAM,EAAE,IAAI,KAAK,EAAE,CAAC;QACvB,IAAI,EAAE,CAAC,IAAI,KAAK,cAAc;YAAE,SAAS;QACzC,MAAM,KAAK,GAAG,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC;QAC5B,IAAI,CAAC,KAAK;YAAE,SAAS;QACrB,MAAM,GAAG,CAAC,MAAM,IAAI,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,YAAY,IAAI,CAAC,CAAC,CAAC;QACnD,UAAU,GAAG,CAAC,UAAU,IAAI,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,YAAY,IAAI,CAAC,CAAC,CAAC;IAC7D,CAAC;IACD,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,gBAAgB,EAAE,UAAU,EAAE,CAAC;AAChE,CAAC;AAED,SAAS,cAAc,CACrB,KAA4B,EAC5B,WAAgC;IAEhC,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,MAAM,GAAG,KAAK,CAAC;IACnB,KAAK,MAAM,EAAE,IAAI,KAAK,EAAE,CAAC;QACvB,IAAI,EAAE,CAAC,IAAI,KAAK,cAAc;YAAE,SAAS;QACzC,MAAM,GAAG,IAAI,CAAC;QACd,KAAK,MAAM,IAAI,IAAI,EAAE,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACrC,KAAK,IAAI,CAAC,CAAC;YACX,IAAI,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC;gBAAE,KAAK,IAAI,CAAC,CAAC;;gBACtC,SAAS,IAAI,CAAC,CAAC;QACtB,CAAC;IACH,CAAC;IACD,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,kEAAkE;QAClE,8DAA8D;QAC9D,uCAAuC;QACvC,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,KAAK,EAAE,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC;IACtE,CAAC;IACD,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC;AACrC,CAAC;AAED,SAAS,uBAAuB,CAAC,KAA4B;IAC3D,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAC;IAC9B,KAAK,MAAM,EAAE,IAAI,KAAK,EAAE,CAAC;QACvB,IACE,EAAE,CAAC,IAAI,KAAK,aAAa;YACzB,EAAE,CAAC,IAAI,KAAK,cAAc;YAC1B,EAAE,CAAC,IAAI,KAAK,wBAAwB,EACpC,CAAC;YACD,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAC7B,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC;AAC/C,CAAC;AAED,SAAS,gBAAgB,CAAC,KAA4B;IACpD,IAAI,IAAwB,CAAC;IAC7B,KAAK,MAAM,EAAE,IAAI,KAAK,EAAE,CAAC;QACvB,IAAI,EAAE,CAAC,IAAI,KAAK,aAAa;YAAE,SAAS;QACxC,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC;QACtD,IAAI,IAAI,KAAK,SAAS,IAAI,KAAK,GAAG,IAAI;YAAE,IAAI,GAAG,KAAK,CAAC;IACvD,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,mBAAmB,CAAC,KAA4B;IACvD,kEAAkE;IAClE,sEAAsE;IACtE,8DAA8D;IAC9D,MAAM,MAAM,GAAG,mBAAmB,CAAC,KAAK,CAAC,CAAC;IAC1C,IAAI,MAAM,CAAC,mBAAmB,KAAK,CAAC;QAAE,OAAO,SAAS,CAAC;IACvD,OAAO,MAAM,CAAC,aAAa,CAAC;AAC9B,CAAC;AAED,SAAS,yBAAyB,CAAC,KAA4B;IAC7D,MAAM,IAAI,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;IACpC,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,IAAI,OAAO,GAAG,CAAC,KAAK,KAAK,QAAQ;YAAE,MAAM,IAAI,GAAG,CAAC,KAAK,CAAC;QACvD,IAAI,OAAO,GAAG,CAAC,UAAU,KAAK,QAAQ;YAAE,WAAW,IAAI,GAAG,CAAC,UAAU,CAAC;IACxE,CAAC;IACD,IAAI,MAAM,KAAK,CAAC,IAAI,WAAW,KAAK,CAAC;QAAE,OAAO,SAAS,CAAC;IACxD,OAAO,WAAW,GAAG,MAAM,CAAC;AAC9B,CAAC;AAED;;;;;GAKG;AACH,SAAS,gBAAgB,CAAC,OAAwC;IAChE,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACnE,CAAC;IACD,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC;IAC7B,IAAI,KAAK,KAAK,CAAC;QAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IACzE,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,MAAM,CAAC,IAAI,OAAO;QAAE,GAAG,IAAI,CAAC,CAAC;IAClC,MAAM,IAAI,GAAG,GAAG,GAAG,KAAK,CAAC;IACzB,IAAI,KAAK,KAAK,CAAC;QAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC;IACnD,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,MAAM,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC;QACnB,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC;IACjB,CAAC;IACD,iDAAiD;IACjD,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC;IAC9C,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;AACjC,CAAC"}
|