@inbrowser/agent 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/LICENSE +21 -0
  2. package/dist/diagnostics/index.d.ts +5 -0
  3. package/dist/diagnostics/index.d.ts.map +1 -0
  4. package/dist/diagnostics/index.js +3 -0
  5. package/dist/diagnostics/index.js.map +1 -0
  6. package/dist/diagnostics/timing.d.ts +48 -0
  7. package/dist/diagnostics/timing.d.ts.map +1 -0
  8. package/dist/diagnostics/timing.js +85 -0
  9. package/dist/diagnostics/timing.js.map +1 -0
  10. package/dist/diagnostics/truthfulness.d.ts +36 -0
  11. package/dist/diagnostics/truthfulness.d.ts.map +1 -0
  12. package/dist/diagnostics/truthfulness.js +180 -0
  13. package/dist/diagnostics/truthfulness.js.map +1 -0
  14. package/dist/dispatch-memoization.d.ts +84 -0
  15. package/dist/dispatch-memoization.d.ts.map +1 -0
  16. package/dist/dispatch-memoization.js +197 -0
  17. package/dist/dispatch-memoization.js.map +1 -0
  18. package/dist/eval/comparison-report.d.ts +164 -0
  19. package/dist/eval/comparison-report.d.ts.map +1 -0
  20. package/dist/eval/comparison-report.js +316 -0
  21. package/dist/eval/comparison-report.js.map +1 -0
  22. package/dist/eval/fixture.d.ts +74 -0
  23. package/dist/eval/fixture.d.ts.map +1 -0
  24. package/dist/eval/fixture.js +217 -0
  25. package/dist/eval/fixture.js.map +1 -0
  26. package/dist/eval/index.d.ts +13 -0
  27. package/dist/eval/index.d.ts.map +1 -0
  28. package/dist/eval/index.js +7 -0
  29. package/dist/eval/index.js.map +1 -0
  30. package/dist/eval/load-node.d.ts +16 -0
  31. package/dist/eval/load-node.d.ts.map +1 -0
  32. package/dist/eval/load-node.js +58 -0
  33. package/dist/eval/load-node.js.map +1 -0
  34. package/dist/eval/metric-collector.d.ts +209 -0
  35. package/dist/eval/metric-collector.d.ts.map +1 -0
  36. package/dist/eval/metric-collector.js +293 -0
  37. package/dist/eval/metric-collector.js.map +1 -0
  38. package/dist/eval/run-record.d.ts +76 -0
  39. package/dist/eval/run-record.d.ts.map +1 -0
  40. package/dist/eval/run-record.js +32 -0
  41. package/dist/eval/run-record.js.map +1 -0
  42. package/dist/eval/runner.d.ts +140 -0
  43. package/dist/eval/runner.d.ts.map +1 -0
  44. package/dist/eval/runner.js +310 -0
  45. package/dist/eval/runner.js.map +1 -0
  46. package/dist/eval/spec-framework.d.ts +113 -0
  47. package/dist/eval/spec-framework.d.ts.map +1 -0
  48. package/dist/eval/spec-framework.js +100 -0
  49. package/dist/eval/spec-framework.js.map +1 -0
  50. package/dist/eval/spec-helpers.d.ts +245 -0
  51. package/dist/eval/spec-helpers.d.ts.map +1 -0
  52. package/dist/eval/spec-helpers.js +605 -0
  53. package/dist/eval/spec-helpers.js.map +1 -0
  54. package/dist/index.d.ts +24 -3
  55. package/dist/index.d.ts.map +1 -1
  56. package/dist/index.js +11 -1
  57. package/dist/index.js.map +1 -1
  58. package/dist/node.d.ts +1 -0
  59. package/dist/node.d.ts.map +1 -1
  60. package/dist/node.js +1 -0
  61. package/dist/node.js.map +1 -1
  62. package/dist/planner-executor.d.ts +132 -0
  63. package/dist/planner-executor.d.ts.map +1 -0
  64. package/dist/planner-executor.js +274 -0
  65. package/dist/planner-executor.js.map +1 -0
  66. package/dist/skill-catalog.d.ts +81 -0
  67. package/dist/skill-catalog.d.ts.map +1 -0
  68. package/dist/skill-catalog.js +388 -0
  69. package/dist/skill-catalog.js.map +1 -0
  70. package/dist/skill-router.d.ts +95 -0
  71. package/dist/skill-router.d.ts.map +1 -0
  72. package/dist/skill-router.js +130 -0
  73. package/dist/skill-router.js.map +1 -0
  74. package/dist/strategy.d.ts +20 -1
  75. package/dist/strategy.d.ts.map +1 -1
  76. package/dist/strategy.js +333 -13
  77. package/dist/strategy.js.map +1 -1
  78. package/dist/tools.d.ts +15 -1
  79. package/dist/tools.d.ts.map +1 -1
  80. package/dist/tools.js +18 -0
  81. package/dist/tools.js.map +1 -1
  82. package/dist/types/strategy.d.ts +48 -0
  83. package/dist/types/strategy.d.ts.map +1 -1
  84. package/dist/types/tools.d.ts +18 -0
  85. package/dist/types/tools.d.ts.map +1 -1
  86. package/dist/types/trace.d.ts +59 -9
  87. package/dist/types/trace.d.ts.map +1 -1
  88. package/dist/types/trace.js +5 -3
  89. package/dist/types/trace.js.map +1 -1
  90. package/package.json +1 -1
@@ -0,0 +1,7 @@
1
+ export { SKILL_NAMES, applyWorkspaceOverrides, parseFixture, validateFixture } from './fixture.js';
2
+ export { defaultSystemPromptBuilder, runFixture, runFixtures } from './runner.js';
3
+ export { createSpecRegistry, evaluateSpec } from './spec-framework.js';
4
+ export { aggregateTrials, collectMetrics, extractTrialMetrics } from './metric-collector.js';
5
+ export { POLARITY, compareMetrics, renderJson, renderMarkdown } from './comparison-report.js';
6
+ export { CUSTOM_SPEC_NAMES, SPEC_FINAL_RULES_EXCLUDES_LITERAL, SPEC_FINAL_RULES_INCLUDES_LITERAL, SPEC_FINAL_RUNTIME_RUN_SUMMARY_OK, SPEC_GAME_RULES_SIMULATOR_ACCEPTS_POSITIVE_AND_REJECTS_CHEAT, SPEC_PYRIC_AGENTS_LINT_CLEAN_AND_RULE_REJECTS_CHEAT, SPEC_REPORT_MENTIONS_ALL_OF, SPEC_REPORT_MENTIONS_AT_LEAST_ONE_OF, SPEC_TRACE_CONTAINS_TOOL_CALL_BY_NAME, STARTER_SPEC_NAMES, finalRulesExcludesLiteral, finalRulesIncludesLiteral, finalRuntimeRunSummaryOk, gameRulesSimulatorAcceptsPositiveAndRejectsCheat, pyricAgentsLintCleanAndRuleRejectsCheat, registerAllSpecs, registerCustomSpecs, registerStarterSpecs, reportMentionsAllOf, reportMentionsAtLeastOneOf, traceContainsToolCallByName, } from './spec-helpers.js';
7
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/eval/index.ts"],"names":[],"mappings":"AAQA,OAAO,EAAE,WAAW,EAAE,uBAAuB,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAQnG,OAAO,EAAE,0BAA0B,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAGlF,OAAO,EAAE,kBAAkB,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AASvE,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAW7F,OAAO,EAAE,QAAQ,EAAE,cAAc,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AAE9F,OAAO,EACL,iBAAiB,EACjB,iCAAiC,EACjC,iCAAiC,EACjC,iCAAiC,EACjC,4DAA4D,EAC5D,mDAAmD,EACnD,2BAA2B,EAC3B,oCAAoC,EACpC,qCAAqC,EACrC,kBAAkB,EAClB,yBAAyB,EACzB,yBAAyB,EACzB,wBAAwB,EACxB,gDAAgD,EAChD,uCAAuC,EACvC,gBAAgB,EAChB,mBAAmB,EACnB,oBAAoB,EACpB,mBAAmB,EACnB,0BAA0B,EAC1B,2BAA2B,GAC5B,MAAM,mBAAmB,CAAC"}
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Node-only fixture file/directory loader.
3
+ *
4
+ * Browser-safe parsing and validation live in `./fixture.ts`. This
5
+ * module wraps those with `node:fs` reads. Imported by consumers via
6
+ * `@inbrowser/agent/node`, not the universal entry.
7
+ */
8
+ import { type TaskFixture, type ValidationError } from './fixture.js';
9
+ export declare class FixtureLoadError extends Error {
10
+ readonly file: string;
11
+ readonly errors: ValidationError[];
12
+ constructor(file: string, errors: ValidationError[]);
13
+ }
14
+ export declare function loadFixture(filePath: string): TaskFixture;
15
+ export declare function loadFixtures(dirPath: string): TaskFixture[];
16
+ //# sourceMappingURL=load-node.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"load-node.d.ts","sourceRoot":"","sources":["../../src/eval/load-node.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAKH,OAAO,EAAE,KAAK,WAAW,EAAE,KAAK,eAAe,EAAgB,MAAM,cAAc,CAAC;AAEpF,qBAAa,gBAAiB,SAAQ,KAAK;IACzC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,eAAe,EAAE,CAAC;gBAEvB,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,eAAe,EAAE;CAOpD;AAED,wBAAgB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,WAAW,CAOzD;AAED,wBAAgB,YAAY,CAAC,OAAO,EAAE,MAAM,GAAG,WAAW,EAAE,CA8B3D"}
@@ -0,0 +1,58 @@
1
+ /**
2
+ * Node-only fixture file/directory loader.
3
+ *
4
+ * Browser-safe parsing and validation live in `./fixture.ts`. This
5
+ * module wraps those with `node:fs` reads. Imported by consumers via
6
+ * `@inbrowser/agent/node`, not the universal entry.
7
+ */
8
+ import { readFileSync, readdirSync, statSync } from 'node:fs';
9
+ import { join } from 'node:path';
10
+ import { parseFixture } from './fixture.js';
11
+ export class FixtureLoadError extends Error {
12
+ file;
13
+ errors;
14
+ constructor(file, errors) {
15
+ const summary = errors.map((e) => ` - ${e.path ? `${e.path}: ` : ''}${e.message}`).join('\n');
16
+ super(`fixture "${file}" failed validation:\n${summary}`);
17
+ this.name = 'FixtureLoadError';
18
+ this.file = file;
19
+ this.errors = errors;
20
+ }
21
+ }
22
+ export function loadFixture(filePath) {
23
+ const json = readFileSync(filePath, 'utf8');
24
+ const result = parseFixture(json);
25
+ if (!result.ok) {
26
+ throw new FixtureLoadError(filePath, result.errors);
27
+ }
28
+ return result.fixture;
29
+ }
30
+ export function loadFixtures(dirPath) {
31
+ const fixtures = [];
32
+ const failures = [];
33
+ for (const entry of readdirSync(dirPath)) {
34
+ if (!entry.endsWith('.fixture.json'))
35
+ continue;
36
+ const full = join(dirPath, entry);
37
+ if (!statSync(full).isFile())
38
+ continue;
39
+ const json = readFileSync(full, 'utf8');
40
+ const result = parseFixture(json);
41
+ if (!result.ok) {
42
+ failures.push({ file: full, errors: result.errors });
43
+ }
44
+ else {
45
+ fixtures.push(result.fixture);
46
+ }
47
+ }
48
+ if (failures.length > 0) {
49
+ const summary = failures
50
+ .map((f) => `- ${f.file}:\n${f.errors
51
+ .map((e) => ` ${e.path ? `${e.path}: ` : ''}${e.message}`)
52
+ .join('\n')}`)
53
+ .join('\n');
54
+ throw new Error(`one or more fixtures failed validation:\n${summary}`);
55
+ }
56
+ return fixtures.sort((a, b) => a.id.localeCompare(b.id));
57
+ }
58
+ //# sourceMappingURL=load-node.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"load-node.js","sourceRoot":"","sources":["../../src/eval/load-node.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AAC9D,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,OAAO,EAA0C,YAAY,EAAE,MAAM,cAAc,CAAC;AAEpF,MAAM,OAAO,gBAAiB,SAAQ,KAAK;IAChC,IAAI,CAAS;IACb,MAAM,CAAoB;IAEnC,YAAY,IAAY,EAAE,MAAyB;QACjD,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC/F,KAAK,CAAC,YAAY,IAAI,yBAAyB,OAAO,EAAE,CAAC,CAAC;QAC1D,IAAI,CAAC,IAAI,GAAG,kBAAkB,CAAC;QAC/B,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACjB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;CACF;AAED,MAAM,UAAU,WAAW,CAAC,QAAgB;IAC1C,MAAM,IAAI,GAAG,YAAY,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IAC5C,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IAClC,IAAI,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC;QACf,MAAM,IAAI,gBAAgB,CAAC,QAAQ,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;IACtD,CAAC;IACD,OAAO,MAAM,CAAC,OAAO,CAAC;AACxB,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,OAAe;IAC1C,MAAM,QAAQ,GAAkB,EAAE,CAAC;IACnC,MAAM,QAAQ,GAAkD,EAAE,CAAC;IAEnE,KAAK,MAAM,KAAK,IAAI,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC;QACzC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,eAAe,CAAC;YAAE,SAAS;QAC/C,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QAClC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE;YAAE,SAAS;QACvC,MAAM,IAAI,GAAG,YAAY,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;QACxC,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;QAClC,IAAI,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC;YACf,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;QACvD,CAAC;aAAM,CAAC;YACN,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAChC,CAAC;IACH,CAAC;IAED,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxB,MAAM,OAAO,GAAG,QAAQ;aACrB,GAAG,CACF,CAAC,CAAC,EAAE,EAAE,CACJ,KAAK,CAAC,CAAC,IAAI,MAAM,CAAC,CAAC,MAAM;aACtB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,OAAO,EAAE,CAAC;aAC5D,IAAI,CAAC,IAAI,CAAC,EAAE,CAClB;aACA,IAAI,CAAC,IAAI,CAAC,CAAC;QACd,MAAM,IAAI,KAAK,CAAC,4CAA4C,OAAO,EAAE,CAAC,CAAC;IACzE,CAAC;IAED,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,aAAa,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;AAC3D,CAAC"}
@@ -0,0 +1,209 @@
1
+ /**
2
+ * `collectMetrics` — the eval harness's metric extractor + aggregator.
3
+ *
4
+ * Bridges raw `RunRecord`s (the per-trial captures the runner produces)
5
+ * and the comparison report. Given an array of records, an optional
6
+ * parallel array of spec evaluations, and a tool registry to classify
7
+ * tool calls, the collector returns one `MetricsTable` per fixture.
8
+ * Each table carries:
9
+ *
10
+ * - a row per trial (`TrialMetrics`) with the eight phase-one metrics,
11
+ * - one aggregated row (`AggregatedMetrics`) summarising mean and
12
+ * N-1 standard deviation across trials.
13
+ *
14
+ * The eight metrics are extracted exactly as the implementation plan
15
+ * specifies:
16
+ *
17
+ * 1. `taskSuccess` — pulled from the supplied `SpecResult.ok`.
18
+ * 2. `wallClockMs` — `completedAt - startedAt`.
19
+ * 3. `promptTokens` — sum of `usage.promptTokens` across all
20
+ * `llm_response` events that carry usage.
21
+ * 4. `completionTokens` — sum of `usage.outputTokens` across the
22
+ * same events.
23
+ * 5. `toolCallCount` — total tool calls across `llm_response`
24
+ * events, split into `reads` vs
25
+ * `mutations` by the `parallelSafe` tag.
26
+ * Names not registered in the supplied
27
+ * tool registry count as mutations.
28
+ * 6. `turnCount` — count of distinct `requestId`s in the
29
+ * trace.
30
+ * 7. `peakContextWindowBytes` — max of `JSON.stringify(messages).length`
31
+ * across `llm_request` events.
32
+ * 8. `truthfulnessViolationRate` — `analyzeTruthfulness(trace).violationRate`.
33
+ * 9. `dispatchVsLlmRatio` — sum of `dispatchMs` / sum of `llmMs`
34
+ * across rows from `turnTimingTable(trace)`.
35
+ * `undefined` when either total is zero.
36
+ *
37
+ * Numeric metrics with no data resolve to `undefined`, not `0`, so a
38
+ * downstream consumer can distinguish "no data" from "really zero".
39
+ *
40
+ * Aggregation: mean is the arithmetic average across trials that have
41
+ * a defined value for the metric; `undefined` when no trial has data.
42
+ * Spread is the sample standard deviation (N-1 denominator). A single
43
+ * defined value yields `stdDev: 0`. `taskSuccess` aggregates as a
44
+ * success rate — booleans are cast to `0`/`1` before averaging.
45
+ *
46
+ * No comparison logic, no persistence. The comparison report is a
47
+ * separate branch (`eval/comparison-report`).
48
+ *
49
+ * Browser-safe — no Node imports, no provider-specific code.
50
+ *
51
+ * Note on naming: there are two `RunRecord` types in this package.
52
+ * This collector consumes the eval-harness one defined at
53
+ * `./run-record.js`. The package root re-exports it as
54
+ * `EvalRunRecord` so it does not collide with the unrelated
55
+ * per-MCP-tool-call `RunRecord` at `../metrics/runs.js`.
56
+ */
57
+ import type { ToolRegistry } from '../types/tools.js';
58
+ import type { RunRecord } from './run-record.js';
59
+ import type { SpecResult } from './spec-framework.js';
60
+ /**
61
+ * Per-trial metric row. One per `RunRecord` consumed. Numeric metrics
62
+ * are `undefined` when the trial produced no data for them (e.g. no
63
+ * `llm_response.usage` events → `promptTokens: undefined`). `taskSuccess`
64
+ * is `undefined` when the caller passed no spec evaluation for the
65
+ * trial.
66
+ */
67
+ export interface TrialMetrics {
68
+ /** Echoed from `record.fixture.id`. */
69
+ fixtureId: string;
70
+ /** Echoed from `record.trial`. */
71
+ trial: number;
72
+ /** Spec verdict for this trial. `undefined` when no evaluation was
73
+ * supplied or the supplied evaluation was `undefined`. */
74
+ taskSuccess: boolean | undefined;
75
+ /** `completedAt - startedAt` from the record. Always defined. */
76
+ wallClockMs: number;
77
+ /** Sum of `usage.promptTokens` across `llm_response` events that
78
+ * carry usage. `undefined` when no such event was emitted. */
79
+ promptTokens: number | undefined;
80
+ /** Sum of `usage.outputTokens` across `llm_response` events that
81
+ * carry usage. `undefined` when no such event was emitted. */
82
+ completionTokens: number | undefined;
83
+ /** Total tool calls + read/mutation split. `total` is `undefined`
84
+ * when no `llm_response` event carried any tool calls (a no-tool
85
+ * run is not the same as a run that emitted zero tool calls
86
+ * unintentionally — but at the extraction layer both look the
87
+ * same; downstream can decide). */
88
+ toolCallCount: {
89
+ total: number | undefined;
90
+ reads: number | undefined;
91
+ mutations: number | undefined;
92
+ };
93
+ /** Count of distinct `requestId` values across the trace.
94
+ * `undefined` when the trace contains no LLM events. */
95
+ turnCount: number | undefined;
96
+ /** Max of `JSON.stringify(messages).length` across the trace's
97
+ * `llm_request` events. `undefined` when no such event exists. */
98
+ peakContextWindowBytes: number | undefined;
99
+ /** `analyzeTruthfulness(trace).violationRate`. `undefined` when
100
+ * the trace contains no assistant turns (i.e. nothing to score). */
101
+ truthfulnessViolationRate: number | undefined;
102
+ /** Sum-of-dispatchMs divided by sum-of-llmMs across turn-timing
103
+ * rows. `undefined` when either sum is zero. */
104
+ dispatchVsLlmRatio: number | undefined;
105
+ }
106
+ /**
107
+ * Aggregate of one numeric column across the trials of a fixture.
108
+ *
109
+ * `mean` is the arithmetic average across trials that had a defined
110
+ * value for the column. `stdDev` is the sample (N-1) standard
111
+ * deviation across the same trials. Both fields are `undefined` when
112
+ * no trial had data for the column. A single defined value yields
113
+ * `mean` equal to that value and `stdDev: 0`.
114
+ *
115
+ * `count` reports how many trials contributed a defined value, which
116
+ * a downstream report needs to weight or warn about thin samples.
117
+ */
118
+ export interface AggregateStat {
119
+ mean: number | undefined;
120
+ stdDev: number | undefined;
121
+ count: number;
122
+ }
123
+ /**
124
+ * One row per fixture summarising mean + spread across its trials.
125
+ * `taskSuccessRate` is the mean of booleans cast to `0`/`1`. The
126
+ * remaining columns are sample-stat aggregates of the numeric trial
127
+ * metrics. Read/mutation totals follow the same shape as their
128
+ * per-trial counterpart.
129
+ */
130
+ export interface AggregatedMetrics {
131
+ /** Echoed from the fixture id. */
132
+ fixtureId: string;
133
+ /** Number of trials contributing to this row. */
134
+ trials: number;
135
+ taskSuccessRate: AggregateStat;
136
+ wallClockMs: AggregateStat;
137
+ promptTokens: AggregateStat;
138
+ completionTokens: AggregateStat;
139
+ toolCallCount: {
140
+ total: AggregateStat;
141
+ reads: AggregateStat;
142
+ mutations: AggregateStat;
143
+ };
144
+ turnCount: AggregateStat;
145
+ peakContextWindowBytes: AggregateStat;
146
+ truthfulnessViolationRate: AggregateStat;
147
+ dispatchVsLlmRatio: AggregateStat;
148
+ }
149
+ /**
150
+ * One fixture's per-trial rows plus its aggregated row. The
151
+ * comparison report consumes a pair of these (baseline vs variant)
152
+ * and decides whether the variant moved the needle.
153
+ */
154
+ export interface MetricsTable {
155
+ fixtureId: string;
156
+ trials: TrialMetrics[];
157
+ aggregate: AggregatedMetrics;
158
+ }
159
+ /**
160
+ * Input to `collectMetrics`. `evaluations` is positionally parallel
161
+ * to `records` — index `i` of `evaluations` is the spec result for
162
+ * `records[i]`. A missing entry (either the array is shorter or the
163
+ * slot is `undefined`) leaves `taskSuccess` undefined for that trial.
164
+ *
165
+ * `toolRegistry` is consulted to classify each emitted tool call as a
166
+ * read (parallel-safe) or a mutation (not parallel-safe). Tools whose
167
+ * name is not registered count as mutations.
168
+ */
169
+ export interface CollectMetricsInput {
170
+ /** The per-trial captures from `runFixture` / `runFixtures`. Order
171
+ * is preserved in the returned tables. */
172
+ records: readonly RunRecord[];
173
+ /** Parallel to `records`. Optional. `undefined` slots and a shorter
174
+ * array both translate to `taskSuccess: undefined` on the row. */
175
+ evaluations?: readonly (SpecResult | undefined)[];
176
+ /** Source of truth for `parallelSafe` tags. The collector reads it
177
+ * via `registry.list()` once; the returned handlers are scanned by
178
+ * `name`. */
179
+ toolRegistry: ToolRegistry;
180
+ }
181
+ /**
182
+ * Compute one `MetricsTable` per fixture from a flat batch of
183
+ * `RunRecord`s. Records are grouped by `fixture.id` in first-seen
184
+ * order; within a group trials are kept in input order. The returned
185
+ * array preserves fixture order from the input.
186
+ *
187
+ * `evaluations` (when supplied) is consumed positionally — index `i`
188
+ * pairs with `records[i]`. A missing slot leaves the trial's
189
+ * `taskSuccess` undefined.
190
+ *
191
+ * Never throws on missing data: every metric extractor degrades to
192
+ * `undefined` rather than throwing. A malformed trace (e.g. an
193
+ * `llm_response` with no `usage`) just contributes nothing to the
194
+ * affected column.
195
+ */
196
+ export declare function collectMetrics(input: CollectMetricsInput): MetricsTable[];
197
+ /**
198
+ * Extract a single `TrialMetrics` row from a record + optional
199
+ * evaluation. Exported for tests that want to exercise the eight
200
+ * extractors against a hand-built record without going through the
201
+ * fixture-grouping layer.
202
+ */
203
+ export declare function extractTrialMetrics(record: RunRecord, evaluation: SpecResult | undefined, toolRegistry: ToolRegistry): TrialMetrics;
204
+ /**
205
+ * Aggregate a list of `TrialMetrics` for a single fixture. Exported
206
+ * for tests that want to exercise aggregation in isolation.
207
+ */
208
+ export declare function aggregateTrials(fixtureId: string, trials: readonly TrialMetrics[]): AggregatedMetrics;
209
+ //# sourceMappingURL=metric-collector.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"metric-collector.d.ts","sourceRoot":"","sources":["../../src/eval/metric-collector.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAuDG;AAKH,OAAO,KAAK,EAAe,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAEnE,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AAEtD;;;;;;GAMG;AACH,MAAM,WAAW,YAAY;IAC3B,uCAAuC;IACvC,SAAS,EAAE,MAAM,CAAC;IAClB,kCAAkC;IAClC,KAAK,EAAE,MAAM,CAAC;IACd;+DAC2D;IAC3D,WAAW,EAAE,OAAO,GAAG,SAAS,CAAC;IACjC,iEAAiE;IACjE,WAAW,EAAE,MAAM,CAAC;IACpB;mEAC+D;IAC/D,YAAY,EAAE,MAAM,GAAG,SAAS,CAAC;IACjC;mEAC+D;IAC/D,gBAAgB,EAAE,MAAM,GAAG,SAAS,CAAC;IACrC;;;;wCAIoC;IACpC,aAAa,EAAE;QACb,KAAK,EAAE,MAAM,GAAG,SAAS,CAAC;QAC1B,KAAK,EAAE,MAAM,GAAG,SAAS,CAAC;QAC1B,SAAS,EAAE,MAAM,GAAG,SAAS,CAAC;KAC/B,CAAC;IACF;6DACyD;IACzD,SAAS,EAAE,MAAM,GAAG,SAAS,CAAC;IAC9B;uEACmE;IACnE,sBAAsB,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3C;yEACqE;IACrE,yBAAyB,EAAE,MAAM,GAAG,SAAS,CAAC;IAC9C;qDACiD;IACjD,kBAAkB,EAAE,MAAM,GAAG,SAAS,CAAC;CACxC;AAED;;;;;;;;;;;GAWG;AACH,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,GAAG,SAAS,CAAC;IACzB,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,KAAK,EAAE,MAAM,CAAC;CACf;AAED;;;;;;GAMG;AACH,MAAM,WAAW,iBAAiB;IAChC,kCAAkC;IAClC,SAAS,EAAE,MAAM,CAAC;IAClB,iDAAiD;IACjD,MAAM,EAAE,MAAM,CAAC;IACf,eAAe,EAAE,aAAa,CAAC;IAC/B,WAAW,EAAE,aAAa,CAAC;IAC3B,YAAY,EAAE,aAAa,CAAC;IAC5B,gBAAgB,EAAE,aAAa,CAAC;IAChC,aAAa,EAAE;QACb,KAAK,EAAE,aAAa,CAAC;QACrB,KAAK,EAAE,aAAa,CAAC;QACrB,SAAS,EAAE,aAAa,CAAC;KAC1B,CAAC;IACF,SAAS,EAAE,aAAa,CAAC;IACzB,sBAAsB,EAAE,aAAa,CAAC;IACtC,yBAAyB,EAAE,aAAa,CAAC;IACzC,kBAAkB,EAAE,aAAa,CAAC;CACnC;AAED;;;;GAIG;AACH,MAAM,WAAW,YAAY;IAC3B,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,YAAY,EAAE,CAAC;IACvB,SAAS,EAAE,iBAAiB,CAAC;CAC9B;AAED;;;;;;;;;GASG;AACH,MAAM,WAAW,mBAAmB;IAClC;+CAC2C;IAC3C,OAAO,EAAE,SAAS,SAAS,EAAE,CAAC;IAC9B;uEACmE;IACnE,WAAW,CAAC,EAAE,SAAS,CAAC,UAAU,GAAG,SAAS,CAAC,EAAE,CAAC;IAClD;;kBAEc;IACd,YAAY,EAAE,YAAY,CAAC;CAC5B;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE,mBAAmB,GAAG,YAAY,EAAE,CAqCzE;AAED;;;;;GAKG;AACH,wBAAgB,mBAAmB,CACjC,MAAM,EAAE,SAAS,EACjB,UAAU,EAAE,UAAU,GAAG,SAAS,EAClC,YAAY,EAAE,YAAY,GACzB,YAAY,CAEd;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAC7B,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,SAAS,YAAY,EAAE,GAC9B,iBAAiB,CAoBnB"}
@@ -0,0 +1,293 @@
1
+ /**
2
+ * `collectMetrics` — the eval harness's metric extractor + aggregator.
3
+ *
4
+ * Bridges raw `RunRecord`s (the per-trial captures the runner produces)
5
+ * and the comparison report. Given an array of records, an optional
6
+ * parallel array of spec evaluations, and a tool registry to classify
7
+ * tool calls, the collector returns one `MetricsTable` per fixture.
8
+ * Each table carries:
9
+ *
10
+ * - a row per trial (`TrialMetrics`) with the eight phase-one metrics,
11
+ * - one aggregated row (`AggregatedMetrics`) summarising mean and
12
+ * N-1 standard deviation across trials.
13
+ *
14
+ * The eight metrics are extracted exactly as the implementation plan
15
+ * specifies:
16
+ *
17
+ * 1. `taskSuccess` — pulled from the supplied `SpecResult.ok`.
18
+ * 2. `wallClockMs` — `completedAt - startedAt`.
19
+ * 3. `promptTokens` — sum of `usage.promptTokens` across all
20
+ * `llm_response` events that carry usage.
21
+ * 4. `completionTokens` — sum of `usage.outputTokens` across the
22
+ * same events.
23
+ * 5. `toolCallCount` — total tool calls across `llm_response`
24
+ * events, split into `reads` vs
25
+ * `mutations` by the `parallelSafe` tag.
26
+ * Names not registered in the supplied
27
+ * tool registry count as mutations.
28
+ * 6. `turnCount` — count of distinct `requestId`s in the
29
+ * trace.
30
+ * 7. `peakContextWindowBytes` — max of `JSON.stringify(messages).length`
31
+ * across `llm_request` events.
32
+ * 8. `truthfulnessViolationRate` — `analyzeTruthfulness(trace).violationRate`.
33
+ * 9. `dispatchVsLlmRatio` — sum of `dispatchMs` / sum of `llmMs`
34
+ * across rows from `turnTimingTable(trace)`.
35
+ * `undefined` when either total is zero.
36
+ *
37
+ * Numeric metrics with no data resolve to `undefined`, not `0`, so a
38
+ * downstream consumer can distinguish "no data" from "really zero".
39
+ *
40
+ * Aggregation: mean is the arithmetic average across trials that have
41
+ * a defined value for the metric; `undefined` when no trial has data.
42
+ * Spread is the sample standard deviation (N-1 denominator). A single
43
+ * defined value yields `stdDev: 0`. `taskSuccess` aggregates as a
44
+ * success rate — booleans are cast to `0`/`1` before averaging.
45
+ *
46
+ * No comparison logic, no persistence. The comparison report is a
47
+ * separate branch (`eval/comparison-report`).
48
+ *
49
+ * Browser-safe — no Node imports, no provider-specific code.
50
+ *
51
+ * Note on naming: there are two `RunRecord` types in this package.
52
+ * This collector consumes the eval-harness one defined at
53
+ * `./run-record.js`. The package root re-exports it as
54
+ * `EvalRunRecord` so it does not collide with the unrelated
55
+ * per-MCP-tool-call `RunRecord` at `../metrics/runs.js`.
56
+ */
57
+ import { turnTimingTable } from '../diagnostics/timing.js';
58
+ import { analyzeTruthfulness } from '../diagnostics/truthfulness.js';
59
+ import { isParallelSafe } from '../tools.js';
60
+ /**
61
+ * Compute one `MetricsTable` per fixture from a flat batch of
62
+ * `RunRecord`s. Records are grouped by `fixture.id` in first-seen
63
+ * order; within a group trials are kept in input order. The returned
64
+ * array preserves fixture order from the input.
65
+ *
66
+ * `evaluations` (when supplied) is consumed positionally — index `i`
67
+ * pairs with `records[i]`. A missing slot leaves the trial's
68
+ * `taskSuccess` undefined.
69
+ *
70
+ * Never throws on missing data: every metric extractor degrades to
71
+ * `undefined` rather than throwing. A malformed trace (e.g. an
72
+ * `llm_response` with no `usage`) just contributes nothing to the
73
+ * affected column.
74
+ */
75
+ export function collectMetrics(input) {
76
+ const { records, evaluations, toolRegistry } = input;
77
+ const readNameSet = buildReadNameSet(toolRegistry);
78
+ // First-seen fixture order. We keep an ordered list of ids alongside
79
+ // a per-id bucket so the returned `MetricsTable[]` preserves the
80
+ // caller's fixture ordering rather than relying on `Map` iteration
81
+ // (which is insertion-ordered in v8 but explicit is clearer).
82
+ const order = [];
83
+ const buckets = new Map();
84
+ for (let i = 0; i < records.length; i++) {
85
+ const record = records[i];
86
+ if (!record)
87
+ continue;
88
+ const evaluation = evaluations?.[i];
89
+ const trial = extractTrial(record, evaluation, readNameSet);
90
+ const fixtureId = trial.fixtureId;
91
+ let bucket = buckets.get(fixtureId);
92
+ if (!bucket) {
93
+ bucket = [];
94
+ buckets.set(fixtureId, bucket);
95
+ order.push(fixtureId);
96
+ }
97
+ bucket.push(trial);
98
+ }
99
+ const tables = [];
100
+ for (const fixtureId of order) {
101
+ const trials = buckets.get(fixtureId);
102
+ if (!trials || trials.length === 0)
103
+ continue;
104
+ tables.push({
105
+ fixtureId,
106
+ trials,
107
+ aggregate: aggregateTrials(fixtureId, trials),
108
+ });
109
+ }
110
+ return tables;
111
+ }
112
+ /**
113
+ * Extract a single `TrialMetrics` row from a record + optional
114
+ * evaluation. Exported for tests that want to exercise the eight
115
+ * extractors against a hand-built record without going through the
116
+ * fixture-grouping layer.
117
+ */
118
+ export function extractTrialMetrics(record, evaluation, toolRegistry) {
119
+ return extractTrial(record, evaluation, buildReadNameSet(toolRegistry));
120
+ }
121
+ /**
122
+ * Aggregate a list of `TrialMetrics` for a single fixture. Exported
123
+ * for tests that want to exercise aggregation in isolation.
124
+ */
125
+ export function aggregateTrials(fixtureId, trials) {
126
+ return {
127
+ fixtureId,
128
+ trials: trials.length,
129
+ taskSuccessRate: aggregateSamples(trials.map((t) => (t.taskSuccess === undefined ? undefined : t.taskSuccess ? 1 : 0))),
130
+ wallClockMs: aggregateSamples(trials.map((t) => t.wallClockMs)),
131
+ promptTokens: aggregateSamples(trials.map((t) => t.promptTokens)),
132
+ completionTokens: aggregateSamples(trials.map((t) => t.completionTokens)),
133
+ toolCallCount: {
134
+ total: aggregateSamples(trials.map((t) => t.toolCallCount.total)),
135
+ reads: aggregateSamples(trials.map((t) => t.toolCallCount.reads)),
136
+ mutations: aggregateSamples(trials.map((t) => t.toolCallCount.mutations)),
137
+ },
138
+ turnCount: aggregateSamples(trials.map((t) => t.turnCount)),
139
+ peakContextWindowBytes: aggregateSamples(trials.map((t) => t.peakContextWindowBytes)),
140
+ truthfulnessViolationRate: aggregateSamples(trials.map((t) => t.truthfulnessViolationRate)),
141
+ dispatchVsLlmRatio: aggregateSamples(trials.map((t) => t.dispatchVsLlmRatio)),
142
+ };
143
+ }
144
+ // ---------- internals ----------
145
+ function extractTrial(record, evaluation, readNameSet) {
146
+ const trace = record.trace;
147
+ const tokens = sumTokens(trace);
148
+ const toolCalls = countToolCalls(trace, readNameSet);
149
+ const turnCount = countDistinctRequestIds(trace);
150
+ const peakContextWindowBytes = peakContextBytes(trace);
151
+ const truthfulnessViolationRate = computeTruthfulness(trace);
152
+ const dispatchVsLlmRatio = computeDispatchVsLlmRatio(trace);
153
+ return {
154
+ fixtureId: record.fixture.id,
155
+ trial: record.trial,
156
+ taskSuccess: evaluation === undefined ? undefined : evaluation.ok,
157
+ wallClockMs: record.completedAt - record.startedAt,
158
+ promptTokens: tokens.promptTokens,
159
+ completionTokens: tokens.completionTokens,
160
+ toolCallCount: toolCalls,
161
+ turnCount,
162
+ peakContextWindowBytes,
163
+ truthfulnessViolationRate,
164
+ dispatchVsLlmRatio,
165
+ };
166
+ }
167
+ /** Build the set of registered tool names whose handler is
168
+ * `parallelSafe`. A name absent from this set is treated as a
169
+ * mutation (or unknown → mutation, per the brief). */
170
+ function buildReadNameSet(registry) {
171
+ const reads = new Set();
172
+ const handlers = registry.list();
173
+ for (const h of handlers) {
174
+ if (isParallelSafe(h))
175
+ reads.add(h.name);
176
+ }
177
+ return reads;
178
+ }
179
+ function sumTokens(trace) {
180
+ let prompt;
181
+ let completion;
182
+ for (const ev of trace) {
183
+ if (ev.kind !== 'llm_response')
184
+ continue;
185
+ const usage = ev.data.usage;
186
+ if (!usage)
187
+ continue;
188
+ prompt = (prompt ?? 0) + (usage.promptTokens ?? 0);
189
+ completion = (completion ?? 0) + (usage.outputTokens ?? 0);
190
+ }
191
+ return { promptTokens: prompt, completionTokens: completion };
192
+ }
193
+ function countToolCalls(trace, readNameSet) {
194
+ let total = 0;
195
+ let reads = 0;
196
+ let mutations = 0;
197
+ let sawAny = false;
198
+ for (const ev of trace) {
199
+ if (ev.kind !== 'llm_response')
200
+ continue;
201
+ sawAny = true;
202
+ for (const call of ev.data.toolCalls) {
203
+ total += 1;
204
+ if (readNameSet.has(call.name))
205
+ reads += 1;
206
+ else
207
+ mutations += 1;
208
+ }
209
+ }
210
+ if (!sawAny) {
211
+ // No `llm_response` events at all — undefined is the right signal
212
+ // for "no data". Distinguishes a fully aborted trial from one
213
+ // that simply emitted zero tool calls.
214
+ return { total: undefined, reads: undefined, mutations: undefined };
215
+ }
216
+ return { total, reads, mutations };
217
+ }
218
+ function countDistinctRequestIds(trace) {
219
+ const ids = new Set();
220
+ for (const ev of trace) {
221
+ if (ev.kind === 'llm_request' ||
222
+ ev.kind === 'llm_response' ||
223
+ ev.kind === 'turn_dispatch_complete') {
224
+ ids.add(ev.data.requestId);
225
+ }
226
+ }
227
+ return ids.size === 0 ? undefined : ids.size;
228
+ }
229
+ function peakContextBytes(trace) {
230
+ let peak;
231
+ for (const ev of trace) {
232
+ if (ev.kind !== 'llm_request')
233
+ continue;
234
+ const bytes = JSON.stringify(ev.data.messages).length;
235
+ if (peak === undefined || bytes > peak)
236
+ peak = bytes;
237
+ }
238
+ return peak;
239
+ }
240
+ function computeTruthfulness(trace) {
241
+ // `analyzeTruthfulness` returns `violationRate: 0` when there are
242
+ // zero assistant turns to score. The brief says "no data → undefined,
243
+ // not zero", so we surface undefined in that degenerate case.
244
+ const report = analyzeTruthfulness(trace);
245
+ if (report.totalAssistantTurns === 0)
246
+ return undefined;
247
+ return report.violationRate;
248
+ }
249
+ function computeDispatchVsLlmRatio(trace) {
250
+ const rows = turnTimingTable(trace);
251
+ let llmSum = 0;
252
+ let dispatchSum = 0;
253
+ for (const row of rows) {
254
+ if (typeof row.llmMs === 'number')
255
+ llmSum += row.llmMs;
256
+ if (typeof row.dispatchMs === 'number')
257
+ dispatchSum += row.dispatchMs;
258
+ }
259
+ if (llmSum === 0 || dispatchSum === 0)
260
+ return undefined;
261
+ return dispatchSum / llmSum;
262
+ }
263
+ /**
264
+ * Aggregate an array of optional samples. Returns `mean` and `stdDev`
265
+ * across the defined entries. Sample standard deviation uses an N-1
266
+ * denominator; a single defined sample yields `stdDev: 0`. When zero
267
+ * samples are defined, both `mean` and `stdDev` are `undefined`.
268
+ */
269
+ function aggregateSamples(samples) {
270
+ const defined = [];
271
+ for (const s of samples) {
272
+ if (typeof s === 'number' && Number.isFinite(s))
273
+ defined.push(s);
274
+ }
275
+ const count = defined.length;
276
+ if (count === 0)
277
+ return { mean: undefined, stdDev: undefined, count: 0 };
278
+ let sum = 0;
279
+ for (const v of defined)
280
+ sum += v;
281
+ const mean = sum / count;
282
+ if (count === 1)
283
+ return { mean, stdDev: 0, count };
284
+ let sqSum = 0;
285
+ for (const v of defined) {
286
+ const d = v - mean;
287
+ sqSum += d * d;
288
+ }
289
+ // N-1 denominator. Guaranteed `count >= 2` here.
290
+ const stdDev = Math.sqrt(sqSum / (count - 1));
291
+ return { mean, stdDev, count };
292
+ }
293
+ //# sourceMappingURL=metric-collector.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"metric-collector.js","sourceRoot":"","sources":["../../src/eval/metric-collector.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAuDG;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,0BAA0B,CAAC;AAC3D,OAAO,EAAE,mBAAmB,EAAE,MAAM,gCAAgC,CAAC;AACrE,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAoI7C;;;;;;;;;;;;;;GAcG;AACH,MAAM,UAAU,cAAc,CAAC,KAA0B;IACvD,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,GAAG,KAAK,CAAC;IACrD,MAAM,WAAW,GAAG,gBAAgB,CAAC,YAAY,CAAC,CAAC;IAEnD,qEAAqE;IACrE,iEAAiE;IACjE,mEAAmE;IACnE,8DAA8D;IAC9D,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,OAAO,GAAG,IAAI,GAAG,EAA0B,CAAC;IAElD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,MAAM,MAAM,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;QAC1B,IAAI,CAAC,MAAM;YAAE,SAAS;QACtB,MAAM,UAAU,GAAG,WAAW,EAAE,CAAC,CAAC,CAAC,CAAC;QACpC,MAAM,KAAK,GAAG,YAAY,CAAC,MAAM,EAAE,UAAU,EAAE,WAAW,CAAC,CAAC;QAC5D,MAAM,SAAS,GAAG,KAAK,CAAC,SAAS,CAAC;QAClC,IAAI,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACpC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,GAAG,EAAE,CAAC;YACZ,OAAO,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;YAC/B,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACxB,CAAC;QACD,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACrB,CAAC;IAED,MAAM,MAAM,GAAmB,EAAE,CAAC;IAClC,KAAK,MAAM,SAAS,IAAI,KAAK,EAAE,CAAC;QAC9B,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACtC,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAC7C,MAAM,CAAC,IAAI,CAAC;YACV,SAAS;YACT,MAAM;YACN,SAAS,EAAE,eAAe,CAAC,SAAS,EAAE,MAAM,CAAC;SAC9C,CAAC,CAAC;IACL,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,mBAAmB,CACjC,MAAiB,EACjB,UAAkC,EAClC,YAA0B;IAE1B,OAAO,YAAY,CAAC,MAAM,EAAE,UAAU,EAAE,gBAAgB,CAAC,YAAY,CAAC,CAAC,CAAC;AAC1E,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,eAAe,CAC7B,SAAiB,EACjB,MAA+B;IAE/B,OAAO;QACL,SAAS;QACT,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,eAAe,EAAE,gBAAgB,CAC/B,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,KAAK,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CACrF;QACD,WAAW,EAAE,gBAAgB,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;QAC/D,YAAY,EAAE,gBAAgB,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC;QACjE,gBAAgB,EAAE,gBAAgB,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,gBAAgB,CAAC,CAAC;QACzE,aAAa,EAAE;YACb,KAAK,EAAE,gBAAgB,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YACjE,KAAK,EAAE,gBAAgB,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YACjE,SAAS,EAAE,gBAAgB,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;SAC1E;QACD,SAAS,EAAE,gBAAgB,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;QAC3D,sBAAsB,EAAE,gBAAgB,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,sBAAsB,CAAC,CAAC;QACrF,yBAAyB,EAAE,gBAAgB,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,yBAAyB,CAAC,CAAC;QAC3F,kBAAkB,EAAE,gBAAgB,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC;KAC9E,CAAC;AACJ,CAAC;AAED,kCAAkC;AAElC,SAAS,YAAY,CACnB,MAAiB,EACjB,UAAkC,EAClC,WAAgC;IAEhC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC;IAC3B,MAAM,MAAM,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC;IAChC,MAAM,SAAS,GAAG,cAAc,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;IACrD,MAAM,SAAS,GAAG,uBAAuB,CAAC,KAAK,CAAC,CAAC;IACjD,MAAM,sBAAsB,GAAG,gBAAgB,CAAC,KAAK,CAAC,CAAC;IACvD,MAAM,yBAAyB,GAAG,mBAAmB,CAAC,KAAK,CAAC,CAAC;IAC7D,MAAM,kBAAkB,GAAG,yBAAyB,CAAC,KAAK,CAAC,CAAC;IAE5D,OAAO;QACL,SAAS,EAAE,MAAM,CAAC,OAAO,CAAC,EAAE;QAC5B,KAAK,EAAE,MAAM,CAAC,KAAK;QACnB,WAAW,EAAE,UAAU,KAAK,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,UAAU,CAAC,EAAE;QACjE,WAAW,EAAE,MAAM,CAAC,WAAW,GAAG,MAAM,CAAC,SAAS;QAClD,YAAY,EAAE,MAAM,CAAC,YAAY;QACjC,gBAAgB,EAAE,MAAM,CAAC,gBAAgB;QACzC,aAAa,EAAE,SAAS;QACxB,SAAS;QACT,sBAAsB;QACtB,yBAAyB;QACzB,kBAAkB;KACnB,CAAC;AACJ,CAAC;AAED;;uDAEuD;AACvD,SAAS,gBAAgB,CAAC,QAAsB;IAC9C,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAChC,MAAM,QAAQ,GAAkB,QAAQ,CAAC,IAAI,EAAE,CAAC;IAChD,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,IAAI,cAAc,CAAC,CAAC,CAAC;YAAE,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAC3C,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,SAAS,CAAC,KAA4B;IAI7C,IAAI,MAA0B,CAAC;IAC/B,IAAI,UAA8B,CAAC;IACnC,KAAK,MAAM,EAAE,IAAI,KAAK,EAAE,CAAC;QACvB,IAAI,EAAE,CAAC,IAAI,KAAK,cAAc;YAAE,SAAS;QACzC,MAAM,KAAK,GAAG,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC;QAC5B,IAAI,CAAC,KAAK;YAAE,SAAS;QACrB,MAAM,GAAG,CAAC,MAAM,IAAI,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,YAAY,IAAI,CAAC,CAAC,CAAC;QACnD,UAAU,GAAG,CAAC,UAAU,IAAI,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,YAAY,IAAI,CAAC,CAAC,CAAC;IAC7D,CAAC;IACD,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,gBAAgB,EAAE,UAAU,EAAE,CAAC;AAChE,CAAC;AAED,SAAS,cAAc,CACrB,KAA4B,EAC5B,WAAgC;IAEhC,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,MAAM,GAAG,KAAK,CAAC;IACnB,KAAK,MAAM,EAAE,IAAI,KAAK,EAAE,CAAC;QACvB,IAAI,EAAE,CAAC,IAAI,KAAK,cAAc;YAAE,SAAS;QACzC,MAAM,GAAG,IAAI,CAAC;QACd,KAAK,MAAM,IAAI,IAAI,EAAE,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACrC,KAAK,IAAI,CAAC,CAAC;YACX,IAAI,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC;gBAAE,KAAK,IAAI,CAAC,CAAC;;gBACtC,SAAS,IAAI,CAAC,CAAC;QACtB,CAAC;IACH,CAAC;IACD,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,kEAAkE;QAClE,8DAA8D;QAC9D,uCAAuC;QACvC,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,KAAK,EAAE,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC;IACtE,CAAC;IACD,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC;AACrC,CAAC;AAED,SAAS,uBAAuB,CAAC,KAA4B;IAC3D,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAC;IAC9B,KAAK,MAAM,EAAE,IAAI,KAAK,EAAE,CAAC;QACvB,IACE,EAAE,CAAC,IAAI,KAAK,aAAa;YACzB,EAAE,CAAC,IAAI,KAAK,cAAc;YAC1B,EAAE,CAAC,IAAI,KAAK,wBAAwB,EACpC,CAAC;YACD,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAC7B,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC;AAC/C,CAAC;AAED,SAAS,gBAAgB,CAAC,KAA4B;IACpD,IAAI,IAAwB,CAAC;IAC7B,KAAK,MAAM,EAAE,IAAI,KAAK,EAAE,CAAC;QACvB,IAAI,EAAE,CAAC,IAAI,KAAK,aAAa;YAAE,SAAS;QACxC,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC;QACtD,IAAI,IAAI,KAAK,SAAS,IAAI,KAAK,GAAG,IAAI;YAAE,IAAI,GAAG,KAAK,CAAC;IACvD,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,mBAAmB,CAAC,KAA4B;IACvD,kEAAkE;IAClE,sEAAsE;IACtE,8DAA8D;IAC9D,MAAM,MAAM,GAAG,mBAAmB,CAAC,KAAK,CAAC,CAAC;IAC1C,IAAI,MAAM,CAAC,mBAAmB,KAAK,CAAC;QAAE,OAAO,SAAS,CAAC;IACvD,OAAO,MAAM,CAAC,aAAa,CAAC;AAC9B,CAAC;AAED,SAAS,yBAAyB,CAAC,KAA4B;IAC7D,MAAM,IAAI,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;IACpC,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,IAAI,OAAO,GAAG,CAAC,KAAK,KAAK,QAAQ;YAAE,MAAM,IAAI,GAAG,CAAC,KAAK,CAAC;QACvD,IAAI,OAAO,GAAG,CAAC,UAAU,KAAK,QAAQ;YAAE,WAAW,IAAI,GAAG,CAAC,UAAU,CAAC;IACxE,CAAC;IACD,IAAI,MAAM,KAAK,CAAC,IAAI,WAAW,KAAK,CAAC;QAAE,OAAO,SAAS,CAAC;IACxD,OAAO,WAAW,GAAG,MAAM,CAAC;AAC9B,CAAC;AAED;;;;;GAKG;AACH,SAAS,gBAAgB,CAAC,OAAwC;IAChE,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACnE,CAAC;IACD,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC;IAC7B,IAAI,KAAK,KAAK,CAAC;QAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IACzE,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,MAAM,CAAC,IAAI,OAAO;QAAE,GAAG,IAAI,CAAC,CAAC;IAClC,MAAM,IAAI,GAAG,GAAG,GAAG,KAAK,CAAC;IACzB,IAAI,KAAK,KAAK,CAAC;QAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC;IACnD,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,MAAM,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC;QACnB,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC;IACjB,CAAC;IACD,iDAAiD;IACjD,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC;IAC9C,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;AACjC,CAAC"}