@inbrowser/agent 0.0.0-placeholder → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +270 -0
- package/LICENSE +21 -0
- package/README.md +117 -2
- package/bin/agent.ts +10 -0
- package/dist/cli/commands/describe.d.ts +14 -0
- package/dist/cli/commands/describe.d.ts.map +1 -0
- package/dist/cli/commands/describe.js +179 -0
- package/dist/cli/commands/describe.js.map +1 -0
- package/dist/cli/commands/events.d.ts +21 -0
- package/dist/cli/commands/events.d.ts.map +1 -0
- package/dist/cli/commands/events.js +59 -0
- package/dist/cli/commands/events.js.map +1 -0
- package/dist/cli/commands/fleet.d.ts +15 -0
- package/dist/cli/commands/fleet.d.ts.map +1 -0
- package/dist/cli/commands/fleet.js +149 -0
- package/dist/cli/commands/fleet.js.map +1 -0
- package/dist/cli/commands/help.d.ts +15 -0
- package/dist/cli/commands/help.d.ts.map +1 -0
- package/dist/cli/commands/help.js +93 -0
- package/dist/cli/commands/help.js.map +1 -0
- package/dist/cli/commands/migrate.d.ts +27 -0
- package/dist/cli/commands/migrate.d.ts.map +1 -0
- package/dist/cli/commands/migrate.js +109 -0
- package/dist/cli/commands/migrate.js.map +1 -0
- package/dist/cli/commands/run.d.ts +38 -0
- package/dist/cli/commands/run.d.ts.map +1 -0
- package/dist/cli/commands/run.js +535 -0
- package/dist/cli/commands/run.js.map +1 -0
- package/dist/cli/commands/schema.d.ts +8 -0
- package/dist/cli/commands/schema.d.ts.map +1 -0
- package/dist/cli/commands/schema.js +12 -0
- package/dist/cli/commands/schema.js.map +1 -0
- package/dist/cli/commands/serve.d.ts +39 -0
- package/dist/cli/commands/serve.d.ts.map +1 -0
- package/dist/cli/commands/serve.js +65 -0
- package/dist/cli/commands/serve.js.map +1 -0
- package/dist/cli/commands/undo.d.ts +36 -0
- package/dist/cli/commands/undo.d.ts.map +1 -0
- package/dist/cli/commands/undo.js +132 -0
- package/dist/cli/commands/undo.js.map +1 -0
- package/dist/cli/fixtures.d.ts +17 -0
- package/dist/cli/fixtures.d.ts.map +1 -0
- package/dist/cli/fixtures.js +107 -0
- package/dist/cli/fixtures.js.map +1 -0
- package/dist/cli/hardening.d.ts +39 -0
- package/dist/cli/hardening.d.ts.map +1 -0
- package/dist/cli/hardening.js +68 -0
- package/dist/cli/hardening.js.map +1 -0
- package/dist/cli/index.d.ts +28 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +19 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/cli/llm/openrouter.d.ts +33 -0
- package/dist/cli/llm/openrouter.d.ts.map +1 -0
- package/dist/cli/llm/openrouter.js +285 -0
- package/dist/cli/llm/openrouter.js.map +1 -0
- package/dist/cli/main.d.ts +32 -0
- package/dist/cli/main.d.ts.map +1 -0
- package/dist/cli/main.js +106 -0
- package/dist/cli/main.js.map +1 -0
- package/dist/cli/output.d.ts +36 -0
- package/dist/cli/output.d.ts.map +1 -0
- package/dist/cli/output.js +95 -0
- package/dist/cli/output.js.map +1 -0
- package/dist/cli/parse.d.ts +26 -0
- package/dist/cli/parse.d.ts.map +1 -0
- package/dist/cli/parse.js +160 -0
- package/dist/cli/parse.js.map +1 -0
- package/dist/cli/session-log.d.ts +34 -0
- package/dist/cli/session-log.d.ts.map +1 -0
- package/dist/cli/session-log.js +52 -0
- package/dist/cli/session-log.js.map +1 -0
- package/dist/cli/spec.d.ts +62 -0
- package/dist/cli/spec.d.ts.map +1 -0
- package/dist/cli/spec.js +510 -0
- package/dist/cli/spec.js.map +1 -0
- package/dist/cli/ui/RunView.d.ts +134 -0
- package/dist/cli/ui/RunView.d.ts.map +1 -0
- package/dist/cli/ui/RunView.js +341 -0
- package/dist/cli/ui/RunView.js.map +1 -0
- package/dist/diagnostics/index.d.ts +5 -0
- package/dist/diagnostics/index.d.ts.map +1 -0
- package/dist/diagnostics/index.js +3 -0
- package/dist/diagnostics/index.js.map +1 -0
- package/dist/diagnostics/timing.d.ts +48 -0
- package/dist/diagnostics/timing.d.ts.map +1 -0
- package/dist/diagnostics/timing.js +85 -0
- package/dist/diagnostics/timing.js.map +1 -0
- package/dist/diagnostics/truthfulness.d.ts +36 -0
- package/dist/diagnostics/truthfulness.d.ts.map +1 -0
- package/dist/diagnostics/truthfulness.js +180 -0
- package/dist/diagnostics/truthfulness.js.map +1 -0
- package/dist/dispatch-memoization.d.ts +84 -0
- package/dist/dispatch-memoization.d.ts.map +1 -0
- package/dist/dispatch-memoization.js +197 -0
- package/dist/dispatch-memoization.js.map +1 -0
- package/dist/eval/comparison-report.d.ts +164 -0
- package/dist/eval/comparison-report.d.ts.map +1 -0
- package/dist/eval/comparison-report.js +316 -0
- package/dist/eval/comparison-report.js.map +1 -0
- package/dist/eval/fixture.d.ts +74 -0
- package/dist/eval/fixture.d.ts.map +1 -0
- package/dist/eval/fixture.js +217 -0
- package/dist/eval/fixture.js.map +1 -0
- package/dist/eval/index.d.ts +13 -0
- package/dist/eval/index.d.ts.map +1 -0
- package/dist/eval/index.js +7 -0
- package/dist/eval/index.js.map +1 -0
- package/dist/eval/load-node.d.ts +16 -0
- package/dist/eval/load-node.d.ts.map +1 -0
- package/dist/eval/load-node.js +58 -0
- package/dist/eval/load-node.js.map +1 -0
- package/dist/eval/metric-collector.d.ts +209 -0
- package/dist/eval/metric-collector.d.ts.map +1 -0
- package/dist/eval/metric-collector.js +293 -0
- package/dist/eval/metric-collector.js.map +1 -0
- package/dist/eval/run-record.d.ts +76 -0
- package/dist/eval/run-record.d.ts.map +1 -0
- package/dist/eval/run-record.js +32 -0
- package/dist/eval/run-record.js.map +1 -0
- package/dist/eval/runner.d.ts +140 -0
- package/dist/eval/runner.d.ts.map +1 -0
- package/dist/eval/runner.js +310 -0
- package/dist/eval/runner.js.map +1 -0
- package/dist/eval/spec-framework.d.ts +113 -0
- package/dist/eval/spec-framework.d.ts.map +1 -0
- package/dist/eval/spec-framework.js +100 -0
- package/dist/eval/spec-framework.js.map +1 -0
- package/dist/eval/spec-helpers.d.ts +245 -0
- package/dist/eval/spec-helpers.d.ts.map +1 -0
- package/dist/eval/spec-helpers.js +605 -0
- package/dist/eval/spec-helpers.js.map +1 -0
- package/dist/events/codec.d.ts +79 -0
- package/dist/events/codec.d.ts.map +1 -0
- package/dist/events/codec.js +142 -0
- package/dist/events/codec.js.map +1 -0
- package/dist/events/log-core.d.ts +76 -0
- package/dist/events/log-core.d.ts.map +1 -0
- package/dist/events/log-core.js +73 -0
- package/dist/events/log-core.js.map +1 -0
- package/dist/events/log.d.ts +60 -0
- package/dist/events/log.d.ts.map +1 -0
- package/dist/events/log.js +193 -0
- package/dist/events/log.js.map +1 -0
- package/dist/events/replay.d.ts +106 -0
- package/dist/events/replay.d.ts.map +1 -0
- package/dist/events/replay.js +137 -0
- package/dist/events/replay.js.map +1 -0
- package/dist/events/wrap.d.ts +100 -0
- package/dist/events/wrap.d.ts.map +1 -0
- package/dist/events/wrap.js +141 -0
- package/dist/events/wrap.js.map +1 -0
- package/dist/index.d.ts +73 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +47 -0
- package/dist/index.js.map +1 -0
- package/dist/llm-adapter.d.ts +96 -0
- package/dist/llm-adapter.d.ts.map +1 -0
- package/dist/llm-adapter.js +132 -0
- package/dist/llm-adapter.js.map +1 -0
- package/dist/mcp/serve.d.ts +70 -0
- package/dist/mcp/serve.d.ts.map +1 -0
- package/dist/mcp/serve.js +154 -0
- package/dist/mcp/serve.js.map +1 -0
- package/dist/metrics/runs.d.ts +58 -0
- package/dist/metrics/runs.d.ts.map +1 -0
- package/dist/metrics/runs.js +99 -0
- package/dist/metrics/runs.js.map +1 -0
- package/dist/metrics.d.ts +38 -0
- package/dist/metrics.d.ts.map +1 -0
- package/dist/metrics.js +123 -0
- package/dist/metrics.js.map +1 -0
- package/dist/node.d.ts +23 -0
- package/dist/node.d.ts.map +1 -0
- package/dist/node.js +23 -0
- package/dist/node.js.map +1 -0
- package/dist/planner-executor.d.ts +132 -0
- package/dist/planner-executor.d.ts.map +1 -0
- package/dist/planner-executor.js +274 -0
- package/dist/planner-executor.js.map +1 -0
- package/dist/session.d.ts +10 -0
- package/dist/session.d.ts.map +1 -0
- package/dist/session.js +179 -0
- package/dist/session.js.map +1 -0
- package/dist/skill-catalog.d.ts +81 -0
- package/dist/skill-catalog.d.ts.map +1 -0
- package/dist/skill-catalog.js +388 -0
- package/dist/skill-catalog.js.map +1 -0
- package/dist/skill-router.d.ts +95 -0
- package/dist/skill-router.d.ts.map +1 -0
- package/dist/skill-router.js +130 -0
- package/dist/skill-router.js.map +1 -0
- package/dist/storage.d.ts +14 -0
- package/dist/storage.d.ts.map +1 -0
- package/dist/storage.js +58 -0
- package/dist/storage.js.map +1 -0
- package/dist/strategy.d.ts +45 -0
- package/dist/strategy.d.ts.map +1 -0
- package/dist/strategy.js +520 -0
- package/dist/strategy.js.map +1 -0
- package/dist/tools.d.ts +40 -0
- package/dist/tools.d.ts.map +1 -0
- package/dist/tools.js +147 -0
- package/dist/tools.js.map +1 -0
- package/dist/types/agent.d.ts +94 -0
- package/dist/types/agent.d.ts.map +1 -0
- package/dist/types/agent.js +17 -0
- package/dist/types/agent.js.map +1 -0
- package/dist/types/capabilities.d.ts +17 -0
- package/dist/types/capabilities.d.ts.map +1 -0
- package/dist/types/capabilities.js +13 -0
- package/dist/types/capabilities.js.map +1 -0
- package/dist/types/chat.d.ts +74 -0
- package/dist/types/chat.d.ts.map +1 -0
- package/dist/types/chat.js +10 -0
- package/dist/types/chat.js.map +1 -0
- package/dist/types/events.d.ts +115 -0
- package/dist/types/events.d.ts.map +1 -0
- package/dist/types/events.js +30 -0
- package/dist/types/events.js.map +1 -0
- package/dist/types/llm.d.ts +89 -0
- package/dist/types/llm.d.ts.map +1 -0
- package/dist/types/llm.js +12 -0
- package/dist/types/llm.js.map +1 -0
- package/dist/types/metrics.d.ts +34 -0
- package/dist/types/metrics.d.ts.map +1 -0
- package/dist/types/metrics.js +10 -0
- package/dist/types/metrics.js.map +1 -0
- package/dist/types/observer.d.ts +41 -0
- package/dist/types/observer.d.ts.map +1 -0
- package/dist/types/observer.js +41 -0
- package/dist/types/observer.js.map +1 -0
- package/dist/types/project-context.d.ts +18 -0
- package/dist/types/project-context.d.ts.map +1 -0
- package/dist/types/project-context.js +11 -0
- package/dist/types/project-context.js.map +1 -0
- package/dist/types/runtime.d.ts +71 -0
- package/dist/types/runtime.d.ts.map +1 -0
- package/dist/types/runtime.js +21 -0
- package/dist/types/runtime.js.map +1 -0
- package/dist/types/session.d.ts +103 -0
- package/dist/types/session.d.ts.map +1 -0
- package/dist/types/session.js +11 -0
- package/dist/types/session.js.map +1 -0
- package/dist/types/storage.d.ts +20 -0
- package/dist/types/storage.d.ts.map +1 -0
- package/dist/types/storage.js +41 -0
- package/dist/types/storage.js.map +1 -0
- package/dist/types/strategy.d.ts +124 -0
- package/dist/types/strategy.d.ts.map +1 -0
- package/dist/types/strategy.js +10 -0
- package/dist/types/strategy.js.map +1 -0
- package/dist/types/tools.d.ts +154 -0
- package/dist/types/tools.d.ts.map +1 -0
- package/dist/types/tools.js +11 -0
- package/dist/types/tools.js.map +1 -0
- package/dist/types/trace.d.ts +175 -0
- package/dist/types/trace.d.ts.map +1 -0
- package/dist/types/trace.js +26 -0
- package/dist/types/trace.js.map +1 -0
- package/dist/types/workspace.d.ts +29 -0
- package/dist/types/workspace.d.ts.map +1 -0
- package/dist/types/workspace.js +18 -0
- package/dist/types/workspace.js.map +1 -0
- package/package.json +45 -14
- package/skills/agent-cli.md +218 -0
- package/index.js +0 -2
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Post-hoc truthfulness detector for agent traces.
|
|
3
|
+
*
|
|
4
|
+
* Walks a list of `TraceEvent`s, pairs each `llm_request` with its
|
|
5
|
+
* matching `llm_response` (or derives the response text from the next
|
|
6
|
+
* request's appended assistant message), extracts candidate factual
|
|
7
|
+
* claims from the assistant text, and flags claims that do not appear
|
|
8
|
+
* in the grounding corpus visible to the model at that moment.
|
|
9
|
+
*
|
|
10
|
+
* The grounding corpus is the union of the system prompt, every
|
|
11
|
+
* message text in the request, and every tool result JSON in the
|
|
12
|
+
* request. Verification is literal substring match — case sensitive.
|
|
13
|
+
*
|
|
14
|
+
* The implementation plan's phase zero calls for an intentionally
|
|
15
|
+
* simple first version. False positives are acceptable. False
|
|
16
|
+
* negatives (missed fabrications) are the failure mode the eval
|
|
17
|
+
* harness will surface later via golden tasks.
|
|
18
|
+
*/
|
|
19
|
+
const PATH_PATTERN = /(?<![:/\w.])([A-Za-z][\w-]*(?:\/[\w\-{}$]+){1,})/g;
|
|
20
|
+
const QUOTED_PATTERN = /`([A-Za-z_][\w/.\-{}$]{2,})`/g;
|
|
21
|
+
const STOPWORDS = new Set([
|
|
22
|
+
'true',
|
|
23
|
+
'false',
|
|
24
|
+
'null',
|
|
25
|
+
'undefined',
|
|
26
|
+
'object',
|
|
27
|
+
'string',
|
|
28
|
+
'number',
|
|
29
|
+
'boolean',
|
|
30
|
+
'array',
|
|
31
|
+
'function',
|
|
32
|
+
'request',
|
|
33
|
+
'resource',
|
|
34
|
+
'response',
|
|
35
|
+
'auth',
|
|
36
|
+
'context',
|
|
37
|
+
'database',
|
|
38
|
+
'document',
|
|
39
|
+
'collection',
|
|
40
|
+
'subcollection',
|
|
41
|
+
'firestore',
|
|
42
|
+
'firebase',
|
|
43
|
+
'permission-denied',
|
|
44
|
+
'not-found',
|
|
45
|
+
'unauthenticated',
|
|
46
|
+
'invalid-argument',
|
|
47
|
+
'failed-precondition',
|
|
48
|
+
'already-exists',
|
|
49
|
+
'resource-exhausted',
|
|
50
|
+
'deadline-exceeded',
|
|
51
|
+
'out-of-range',
|
|
52
|
+
'aborted',
|
|
53
|
+
'unavailable',
|
|
54
|
+
'data-loss',
|
|
55
|
+
'internal',
|
|
56
|
+
'cancelled',
|
|
57
|
+
'unknown',
|
|
58
|
+
'unimplemented',
|
|
59
|
+
'getauth',
|
|
60
|
+
'getfirestore',
|
|
61
|
+
'getdatabase',
|
|
62
|
+
'doc',
|
|
63
|
+
'query',
|
|
64
|
+
'where',
|
|
65
|
+
'orderby',
|
|
66
|
+
'limit',
|
|
67
|
+
]);
|
|
68
|
+
export function analyzeTruthfulness(events) {
|
|
69
|
+
const pairs = pairEvents(events);
|
|
70
|
+
const flags = [];
|
|
71
|
+
for (const pair of pairs) {
|
|
72
|
+
if (!pair.responseText)
|
|
73
|
+
continue;
|
|
74
|
+
const corpus = buildGroundingCorpus(pair.request);
|
|
75
|
+
const seen = new Set();
|
|
76
|
+
for (const candidate of extractCandidates(pair.responseText)) {
|
|
77
|
+
if (isStopword(candidate.claim))
|
|
78
|
+
continue;
|
|
79
|
+
if (corpus.includes(candidate.claim))
|
|
80
|
+
continue;
|
|
81
|
+
const dedupeKey = `${candidate.category}::${candidate.claim}`;
|
|
82
|
+
if (seen.has(dedupeKey))
|
|
83
|
+
continue;
|
|
84
|
+
seen.add(dedupeKey);
|
|
85
|
+
flags.push({
|
|
86
|
+
requestId: pair.request.requestId,
|
|
87
|
+
turnId: pair.request.turnId,
|
|
88
|
+
iteration: pair.request.iteration,
|
|
89
|
+
claim: candidate.claim,
|
|
90
|
+
category: candidate.category,
|
|
91
|
+
context: candidate.context,
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
const totalAssistantTurns = pairs.filter((p) => p.responseText.length > 0).length;
|
|
96
|
+
return {
|
|
97
|
+
totalAssistantTurns,
|
|
98
|
+
totalFlags: flags.length,
|
|
99
|
+
flags,
|
|
100
|
+
violationRate: totalAssistantTurns === 0 ? 0 : flags.length / totalAssistantTurns,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
function isStopword(claim) {
|
|
104
|
+
if (STOPWORDS.has(claim.toLowerCase()))
|
|
105
|
+
return true;
|
|
106
|
+
if (claim.includes('{') && claim.includes('}'))
|
|
107
|
+
return true;
|
|
108
|
+
return false;
|
|
109
|
+
}
|
|
110
|
+
function pairEvents(events) {
|
|
111
|
+
const requestOrder = [];
|
|
112
|
+
const responses = new Map();
|
|
113
|
+
for (const ev of events) {
|
|
114
|
+
if (ev.kind === 'llm_request') {
|
|
115
|
+
requestOrder.push(ev.data);
|
|
116
|
+
}
|
|
117
|
+
else if (ev.kind === 'llm_response') {
|
|
118
|
+
responses.set(ev.data.requestId, ev.data);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
const pairs = [];
|
|
122
|
+
for (let i = 0; i < requestOrder.length; i++) {
|
|
123
|
+
const req = requestOrder[i];
|
|
124
|
+
if (!req)
|
|
125
|
+
continue;
|
|
126
|
+
const resp = responses.get(req.requestId);
|
|
127
|
+
let responseText = '';
|
|
128
|
+
if (resp) {
|
|
129
|
+
responseText = resp.text;
|
|
130
|
+
}
|
|
131
|
+
else {
|
|
132
|
+
const next = requestOrder[i + 1];
|
|
133
|
+
if (next) {
|
|
134
|
+
const derived = trailingAssistantText(req.messages, next.messages);
|
|
135
|
+
if (derived)
|
|
136
|
+
responseText = derived;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
pairs.push({ request: req, responseText });
|
|
140
|
+
}
|
|
141
|
+
return pairs;
|
|
142
|
+
}
|
|
143
|
+
function trailingAssistantText(prev, next) {
|
|
144
|
+
for (let i = prev.length; i < next.length; i++) {
|
|
145
|
+
const m = next[i];
|
|
146
|
+
if (m && m.role === 'assistant' && m.text)
|
|
147
|
+
return m.text;
|
|
148
|
+
}
|
|
149
|
+
return undefined;
|
|
150
|
+
}
|
|
151
|
+
function buildGroundingCorpus(req) {
|
|
152
|
+
const parts = [req.systemPrompt];
|
|
153
|
+
for (const m of req.messages) {
|
|
154
|
+
if (m.text)
|
|
155
|
+
parts.push(m.text);
|
|
156
|
+
if (m.resultJson)
|
|
157
|
+
parts.push(m.resultJson);
|
|
158
|
+
}
|
|
159
|
+
return parts.join('\n');
|
|
160
|
+
}
|
|
161
|
+
function extractCandidates(text) {
|
|
162
|
+
const out = [];
|
|
163
|
+
collect(text, PATH_PATTERN, 'firestore-path', out);
|
|
164
|
+
collect(text, QUOTED_PATTERN, 'quoted-identifier', out);
|
|
165
|
+
return out;
|
|
166
|
+
}
|
|
167
|
+
function collect(text, pattern, category, out) {
|
|
168
|
+
const re = new RegExp(pattern.source, pattern.flags);
|
|
169
|
+
let m = re.exec(text);
|
|
170
|
+
while (m !== null) {
|
|
171
|
+
const claim = m[1] ?? m[0];
|
|
172
|
+
if (claim) {
|
|
173
|
+
const start = Math.max(0, m.index - 32);
|
|
174
|
+
const end = Math.min(text.length, m.index + claim.length + 32);
|
|
175
|
+
out.push({ claim, category, context: text.slice(start, end) });
|
|
176
|
+
}
|
|
177
|
+
m = re.exec(text);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
//# sourceMappingURL=truthfulness.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"truthfulness.js","sourceRoot":"","sources":["../../src/diagnostics/truthfulness.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAuBH,MAAM,YAAY,GAAG,mDAAmD,CAAC;AACzE,MAAM,cAAc,GAAG,+BAA+B,CAAC;AAEvD,MAAM,SAAS,GAAwB,IAAI,GAAG,CAAC;IAC7C,MAAM;IACN,OAAO;IACP,MAAM;IACN,WAAW;IACX,QAAQ;IACR,QAAQ;IACR,QAAQ;IACR,SAAS;IACT,OAAO;IACP,UAAU;IACV,SAAS;IACT,UAAU;IACV,UAAU;IACV,MAAM;IACN,SAAS;IACT,UAAU;IACV,UAAU;IACV,YAAY;IACZ,eAAe;IACf,WAAW;IACX,UAAU;IACV,mBAAmB;IACnB,WAAW;IACX,iBAAiB;IACjB,kBAAkB;IAClB,qBAAqB;IACrB,gBAAgB;IAChB,oBAAoB;IACpB,mBAAmB;IACnB,cAAc;IACd,SAAS;IACT,aAAa;IACb,WAAW;IACX,UAAU;IACV,WAAW;IACX,SAAS;IACT,eAAe;IACf,SAAS;IACT,cAAc;IACd,aAAa;IACb,KAAK;IACL,OAAO;IACP,OAAO;IACP,SAAS;IACT,OAAO;CACR,CAAC,CAAC;AAEH,MAAM,UAAU,mBAAmB,CAAC,MAA6B;IAC/D,MAAM,KAAK,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC;IACjC,MAAM,KAAK,GAAuB,EAAE,CAAC;IAErC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,IAAI,CAAC,YAAY;YAAE,SAAS;QACjC,MAAM,MAAM,GAAG,oBAAoB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAClD,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAC/B,KAAK,MAAM,SAAS,IAAI,iBAAiB,CAAC,IAAI,CAAC,YAAY,CAAC,EAAE,CAAC;YAC7D,IAAI,UAAU,CAAC,SAAS,CAAC,KAAK,CAAC;gBAAE,SAAS;YAC1C,IAAI,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,KAAK,CAAC;gBAAE,SAAS;YAC/C,MAAM,SAAS,GAAG,GAAG,SAAS,CAAC,QAAQ,KAAK,SAAS,CAAC,KAAK,EAAE,CAAC;YAC9D,IAAI,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC;gBAAE,SAAS;YAClC,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;YACpB,KAAK,CAAC,IAAI,CAAC;gBACT,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,SAAS;gBACjC,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,MAAM;gBAC3B,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,SAAS;gBACjC,KAAK,EAAE,SAAS,CAAC,KAAK;gBACtB,QAAQ,EAAE,SAAS,CAAC,QAAQ;gBAC5B,OAAO,EAAE,SAAS,CAAC,OAAO;aAC3B,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,MAAM,mBAAmB,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;IAClF,OAAO;QACL,mBAAmB;QACnB,UAAU,EAAE,KAAK,CAAC,MAAM;QACxB,KAAK;QACL,aAAa,EAAE,mBAAmB,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,GAAG,mBAAmB;KAClF,CAAC;AACJ,CAAC;AAED,SAAS,UAAU,CAAC,KAAa;IAC/B,IAAI,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC;QAAE,OAAO,IAAI,CAAC;IACpD,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IAC5D,OAAO,KAAK,CAAC;AACf,CAAC;AAOD,SAAS,UAAU,CAAC,MAA6B;IAC/C,MAAM,YAAY,GAAsB,EAAE,CAAC;IAC3C,MAAM,SAAS,GAAG,IAAI,GAAG,EAA4B,CAAC;IACtD,KAAK,MAAM,EAAE,IAAI,MAAM,EAAE,CAAC;QACxB,IAAI,EAAE,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;YAC9B,YAAY,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;QAC7B,CAAC;aAAM,IAAI,EAAE,CAAC,IAAI,KAAK,cAAc,EAAE,CAAC;YACtC,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC;IACD,MAAM,KAAK,GAAmB,EAAE,CAAC;IACjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7C,MAAM,GAAG,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;QAC5B,IAAI,CAAC,GAAG;YAAE,SAAS;QACnB,MAAM,IAAI,GAAG,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAC1C,IAAI,YAAY,GAAG,EAAE,CAAC;QACtB,IAAI,IAAI,EAAE,CAAC;YACT,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC;QAC3B,CAAC;aAAM,CAAC;YACN,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YACjC,IAAI,IAAI,EAAE,CAAC;gBACT,MAAM,OAAO,GAAG,qBAAqB,CAAC,GAAG,CAAC,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;gBACnE,IAAI,OAAO;oBAAE,YAAY,GAAG,OAAO,CAAC;YACtC,CAAC;QACH,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,GAAG,EAAE,YAAY,EAAE,CAAC,CAAC;IAC7C,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,qBAAqB,CAC5B,IAAkC,EAClC,IAAkC;IAElC,KAAK,IAAI,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC/C,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,KAAK,WAAW,IAAI,CAAC,CAAC,IAAI;YAAE,OAAO,CAAC,CAAC,IAAI,CAAC;IAC3D,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,SAAS,oBAAoB,CAAC,GAAoB;IAChD,MAAM,KAAK,GAAa,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;IAC3C,KAAK,MAAM,CAAC,IAAI,GAAG,CAAC,QAAQ,EAAE,CAAC;QAC7B,IAAI,CAAC,CAAC,IAAI;YAAE,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC/B,IAAI,CAAC,CAAC,UAAU;YAAE,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC;IAC7C,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAQD,SAAS,iBAAiB,CAAC,IAAY;IACrC,MAAM,GAAG,GAAgB,EAAE,CAAC;IAC5B,OAAO,CAAC,IAAI,EAAE,YAAY,EAAE,gBAAgB,EAAE,GAAG,CAAC,CAAC;IACnD,OAAO,CAAC,IAAI,EAAE,cAAc,EAAE,mBAAmB,EAAE,GAAG,CAAC,CAAC;IACxD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,OAAO,CACd,IAAY,EACZ,OAAe,EACf,QAAkC,EAClC,GAAgB;IAEhB,MAAM,EAAE,GAAG,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC;IACrD,IAAI,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACtB,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC;QAClB,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;QAC3B,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC,CAAC;YACxC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,KAAK,GAAG,KAAK,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;YAC/D,GAAG,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;QACjE,CAAC;QACD,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACpB,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Content-addressed memoization layer over `createDispatch`.
|
|
3
|
+
*
|
|
4
|
+
* `createMemoizedDispatch(registry, options?)` returns a `ToolDispatch`-shaped
|
|
5
|
+
* object that caches `ToolResult`s keyed on `(toolName, argsHash,
|
|
6
|
+
* workspaceHash, runtimeHash)`. The cache is consulted only for handlers
|
|
7
|
+
* tagged `pure` (see `isPure` in `./tools.ts`). Non-pure handlers bypass
|
|
8
|
+
* the cache entirely and always execute. Errors from the underlying
|
|
9
|
+
* dispatch propagate; they are NOT cached, since they may be transient.
|
|
10
|
+
*
|
|
11
|
+
* The returned object is structurally a `ToolDispatch` — strategies and
|
|
12
|
+
* downstream code that already accept `ToolDispatch` use it transparently.
|
|
13
|
+
* The one addition is `stats()`, which returns the running counters for
|
|
14
|
+
* hits / misses / bypassed calls. The cache lives for the lifetime of
|
|
15
|
+
* one `MemoizedDispatch` instance; there is no global state.
|
|
16
|
+
*
|
|
17
|
+
* Design notes:
|
|
18
|
+
*
|
|
19
|
+
* - Hashing uses FNV-1a 32-bit over a stable-stringified JSON
|
|
20
|
+
* representation. The cache is for short-running test loops; a
|
|
21
|
+
* cryptographic hash is overkill. Collisions are tolerable at our
|
|
22
|
+
* cache sizes, and the cost of a missed hit is at worst a recomputation.
|
|
23
|
+
* - Argument keys are sorted at every level via `stableStringify` so two
|
|
24
|
+
* structurally-equal arg objects produce the same key regardless of
|
|
25
|
+
* property insertion order.
|
|
26
|
+
* - Workspace hash covers `presetId`, `rules`, `code`, and `appSource`.
|
|
27
|
+
* `stitch` is excluded per the brief — pure tools don't read from it.
|
|
28
|
+
* - Runtime hash is included only when `'runtime' \in keyComponents`.
|
|
29
|
+
* Defaults to `['workspace']`; opting into runtime opt-in keeps the
|
|
30
|
+
* default key small for the dominant pure-tool population.
|
|
31
|
+
* - No eviction in v1. Eval runs are bounded; one instance per harness
|
|
32
|
+
* trial keeps cache growth bounded too.
|
|
33
|
+
*/
|
|
34
|
+
import type { ToolDispatch, ToolRegistry } from './types/tools.js';
|
|
35
|
+
/** Which `ctx` fields contribute to the cache key. */
|
|
36
|
+
export type MemoKeyComponent = 'workspace' | 'runtime';
|
|
37
|
+
export interface MemoOptions {
|
|
38
|
+
/**
|
|
39
|
+
* Which `ctx` fields contribute to the cache key. Defaults to
|
|
40
|
+
* `['workspace']`. Some pure tools depend on runtime; opt-in keeps
|
|
41
|
+
* the default key small for tools that are workspace-determined.
|
|
42
|
+
*/
|
|
43
|
+
keyComponents?: MemoKeyComponent[];
|
|
44
|
+
}
|
|
45
|
+
export interface MemoStats {
|
|
46
|
+
/** Pure tool dispatched and a cached result was served. */
|
|
47
|
+
hits: number;
|
|
48
|
+
/** Pure tool dispatched, cache missed, underlying handler ran. */
|
|
49
|
+
misses: number;
|
|
50
|
+
/** Non-pure tool dispatched; cache layer was bypassed. */
|
|
51
|
+
bypassed: number;
|
|
52
|
+
}
|
|
53
|
+
export interface MemoizedDispatch extends ToolDispatch {
|
|
54
|
+
/** Snapshot of the running counters. Returns a fresh object on every call. */
|
|
55
|
+
stats(): MemoStats;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Wrap a registry in a memoizing dispatcher. The wrapper holds its own
|
|
59
|
+
* cache; the underlying dispatch is the standard `createDispatch(registry)`.
|
|
60
|
+
*
|
|
61
|
+
* Non-pure handlers (including unknown-tool errors) bypass the cache and
|
|
62
|
+
* are dispatched directly; `bypassed` is incremented for those calls.
|
|
63
|
+
*/
|
|
64
|
+
export declare function createMemoizedDispatch(registry: ToolRegistry, options?: MemoOptions): MemoizedDispatch;
|
|
65
|
+
/**
|
|
66
|
+
* Stable JSON serialization: object keys are sorted alphabetically at
|
|
67
|
+
* every nesting level. Arrays preserve order (they are positional).
|
|
68
|
+
* Functions, symbols, `undefined` properties are omitted (standard
|
|
69
|
+
* JSON behaviour). `null` is preserved.
|
|
70
|
+
*
|
|
71
|
+
* This is intentionally not `JSON.stringify(value)` — that emits keys
|
|
72
|
+
* in insertion order, so two structurally-equal arg objects assembled
|
|
73
|
+
* differently would produce different cache keys.
|
|
74
|
+
*/
|
|
75
|
+
export declare function stableStringify(value: unknown): string;
|
|
76
|
+
/**
|
|
77
|
+
* FNV-1a 32-bit hash. Returns the lowercase hex string. Fast,
|
|
78
|
+
* dependency-free, and collision-tolerable at our cache sizes.
|
|
79
|
+
* Iterates the UTF-16 code units of the input; sufficient for our
|
|
80
|
+
* stably-stringified JSON payloads, which only contain ASCII control
|
|
81
|
+
* characters and JSON syntax tokens plus user-supplied string data.
|
|
82
|
+
*/
|
|
83
|
+
export declare function hashFnv1a32(input: string): string;
|
|
84
|
+
//# sourceMappingURL=dispatch-memoization.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"dispatch-memoization.d.ts","sourceRoot":"","sources":["../src/dispatch-memoization.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AAIH,OAAO,KAAK,EAGV,YAAY,EAEZ,YAAY,EAEb,MAAM,kBAAkB,CAAC;AAG1B,sDAAsD;AACtD,MAAM,MAAM,gBAAgB,GAAG,WAAW,GAAG,SAAS,CAAC;AAEvD,MAAM,WAAW,WAAW;IAC1B;;;;OAIG;IACH,aAAa,CAAC,EAAE,gBAAgB,EAAE,CAAC;CACpC;AAED,MAAM,WAAW,SAAS;IACxB,2DAA2D;IAC3D,IAAI,EAAE,MAAM,CAAC;IACb,kEAAkE;IAClE,MAAM,EAAE,MAAM,CAAC;IACf,0DAA0D;IAC1D,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,gBAAiB,SAAQ,YAAY;IACpD,8EAA8E;IAC9E,KAAK,IAAI,SAAS,CAAC;CACpB;AAED;;;;;;GAMG;AACH,wBAAgB,sBAAsB,CACpC,QAAQ,EAAE,YAAY,EACtB,OAAO,CAAC,EAAE,WAAW,GACpB,gBAAgB,CA6ClB;AAoDD;;;;;;;;;GASG;AACH,wBAAgB,eAAe,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,CAEtD;AA8BD;;;;;;GAMG;AACH,wBAAgB,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAQjD"}
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Content-addressed memoization layer over `createDispatch`.
|
|
3
|
+
*
|
|
4
|
+
* `createMemoizedDispatch(registry, options?)` returns a `ToolDispatch`-shaped
|
|
5
|
+
* object that caches `ToolResult`s keyed on `(toolName, argsHash,
|
|
6
|
+
* workspaceHash, runtimeHash)`. The cache is consulted only for handlers
|
|
7
|
+
* tagged `pure` (see `isPure` in `./tools.ts`). Non-pure handlers bypass
|
|
8
|
+
* the cache entirely and always execute. Errors from the underlying
|
|
9
|
+
* dispatch propagate; they are NOT cached, since they may be transient.
|
|
10
|
+
*
|
|
11
|
+
* The returned object is structurally a `ToolDispatch` — strategies and
|
|
12
|
+
* downstream code that already accept `ToolDispatch` use it transparently.
|
|
13
|
+
* The one addition is `stats()`, which returns the running counters for
|
|
14
|
+
* hits / misses / bypassed calls. The cache lives for the lifetime of
|
|
15
|
+
* one `MemoizedDispatch` instance; there is no global state.
|
|
16
|
+
*
|
|
17
|
+
* Design notes:
|
|
18
|
+
*
|
|
19
|
+
* - Hashing uses FNV-1a 32-bit over a stable-stringified JSON
|
|
20
|
+
* representation. The cache is for short-running test loops; a
|
|
21
|
+
* cryptographic hash is overkill. Collisions are tolerable at our
|
|
22
|
+
* cache sizes, and the cost of a missed hit is at worst a recomputation.
|
|
23
|
+
* - Argument keys are sorted at every level via `stableStringify` so two
|
|
24
|
+
* structurally-equal arg objects produce the same key regardless of
|
|
25
|
+
* property insertion order.
|
|
26
|
+
* - Workspace hash covers `presetId`, `rules`, `code`, and `appSource`.
|
|
27
|
+
* `stitch` is excluded per the brief — pure tools don't read from it.
|
|
28
|
+
* - Runtime hash is included only when `'runtime' \in keyComponents`.
|
|
29
|
+
* Defaults to `['workspace']`; opting into runtime opt-in keeps the
|
|
30
|
+
* default key small for the dominant pure-tool population.
|
|
31
|
+
* - No eviction in v1. Eval runs are bounded; one instance per harness
|
|
32
|
+
* trial keeps cache growth bounded too.
|
|
33
|
+
*/
|
|
34
|
+
import { createDispatch, isPure } from './tools.js';
|
|
35
|
+
/**
|
|
36
|
+
* Wrap a registry in a memoizing dispatcher. The wrapper holds its own
|
|
37
|
+
* cache; the underlying dispatch is the standard `createDispatch(registry)`.
|
|
38
|
+
*
|
|
39
|
+
* Non-pure handlers (including unknown-tool errors) bypass the cache and
|
|
40
|
+
* are dispatched directly; `bypassed` is incremented for those calls.
|
|
41
|
+
*/
|
|
42
|
+
export function createMemoizedDispatch(registry, options) {
|
|
43
|
+
const keyComponents = options?.keyComponents ?? ['workspace'];
|
|
44
|
+
const includeRuntime = keyComponents.includes('runtime');
|
|
45
|
+
const includeWorkspace = keyComponents.includes('workspace');
|
|
46
|
+
const underlying = createDispatch(registry);
|
|
47
|
+
const cache = new Map();
|
|
48
|
+
const counters = { hits: 0, misses: 0, bypassed: 0 };
|
|
49
|
+
return {
|
|
50
|
+
async execute(call, ctx) {
|
|
51
|
+
const handler = findHandler(registry, call.name);
|
|
52
|
+
// Non-pure handlers (and unknown tools) skip the cache. Unknown
|
|
53
|
+
// tools surface their error message through the underlying
|
|
54
|
+
// dispatch unchanged — the cache layer is invisible on the
|
|
55
|
+
// non-pure path.
|
|
56
|
+
if (!handler || !isPure(handler)) {
|
|
57
|
+
counters.bypassed += 1;
|
|
58
|
+
return underlying.execute(call, ctx);
|
|
59
|
+
}
|
|
60
|
+
const key = buildCacheKey(call, ctx, includeWorkspace, includeRuntime);
|
|
61
|
+
const cached = cache.get(key);
|
|
62
|
+
if (cached !== undefined) {
|
|
63
|
+
counters.hits += 1;
|
|
64
|
+
return cached;
|
|
65
|
+
}
|
|
66
|
+
// Cache miss. Underlying dispatch runs; the result is cached on
|
|
67
|
+
// success. Note: `createDispatch` already converts thrown
|
|
68
|
+
// handlers into `{ ok: false, summary: '... threw: ...' }`
|
|
69
|
+
// results, so we never observe a thrown error here. The brief
|
|
70
|
+
// says "errors propagate, are not cached" — that maps to
|
|
71
|
+
// `ok === false` here. We cache only successful results.
|
|
72
|
+
const result = await underlying.execute(call, ctx);
|
|
73
|
+
counters.misses += 1;
|
|
74
|
+
if (result.ok) {
|
|
75
|
+
cache.set(key, result);
|
|
76
|
+
}
|
|
77
|
+
return result;
|
|
78
|
+
},
|
|
79
|
+
stats() {
|
|
80
|
+
return { ...counters };
|
|
81
|
+
},
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Build the deterministic cache key for `(toolName, argsHash,
|
|
86
|
+
* workspaceHash, runtimeHash)`. Components are joined with `|` so a
|
|
87
|
+
* single string is hashable in one pass on lookup.
|
|
88
|
+
*/
|
|
89
|
+
function buildCacheKey(call, ctx, includeWorkspace, includeRuntime) {
|
|
90
|
+
const argsKey = hashFnv1a32(stableStringify(call.args));
|
|
91
|
+
const wsKey = includeWorkspace ? hashFnv1a32(stableStringify(workspaceShape(ctx.workspace))) : '';
|
|
92
|
+
const rtKey = includeRuntime ? hashFnv1a32(stableStringify(runtimeShape(ctx.runtime))) : '';
|
|
93
|
+
return `${call.name}|${argsKey}|${wsKey}|${rtKey}`;
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Project the workspace into the subset of fields a pure tool can
|
|
97
|
+
* legitimately read. `stitch` is excluded — design context is
|
|
98
|
+
* orthogonal to the documented pure-tool population (rules-stdlib-list,
|
|
99
|
+
* path-discovery, etc.). Two workspaces that differ only in `stitch`
|
|
100
|
+
* are treated as equivalent for cache purposes.
|
|
101
|
+
*/
|
|
102
|
+
function workspaceShape(ws) {
|
|
103
|
+
if (!ws)
|
|
104
|
+
return { _present: false };
|
|
105
|
+
return {
|
|
106
|
+
presetId: ws.presetId,
|
|
107
|
+
rules: ws.rules,
|
|
108
|
+
code: ws.code,
|
|
109
|
+
appSource: ws.appSource,
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Project the runtime into a stable shape. Only included in the cache
|
|
114
|
+
* key when the caller opts into `'runtime'` in `keyComponents`.
|
|
115
|
+
*/
|
|
116
|
+
function runtimeShape(rt) {
|
|
117
|
+
if (!rt)
|
|
118
|
+
return { _present: false };
|
|
119
|
+
return {
|
|
120
|
+
terminal: rt.terminal,
|
|
121
|
+
runSummary: rt.runSummary,
|
|
122
|
+
deploy: rt.deploy,
|
|
123
|
+
parseError: rt.parseError,
|
|
124
|
+
uiErrors: rt.uiErrors,
|
|
125
|
+
sandboxVersion: rt.sandboxVersion,
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* Stable JSON serialization: object keys are sorted alphabetically at
|
|
130
|
+
* every nesting level. Arrays preserve order (they are positional).
|
|
131
|
+
* Functions, symbols, `undefined` properties are omitted (standard
|
|
132
|
+
* JSON behaviour). `null` is preserved.
|
|
133
|
+
*
|
|
134
|
+
* This is intentionally not `JSON.stringify(value)` — that emits keys
|
|
135
|
+
* in insertion order, so two structurally-equal arg objects assembled
|
|
136
|
+
* differently would produce different cache keys.
|
|
137
|
+
*/
|
|
138
|
+
export function stableStringify(value) {
|
|
139
|
+
return serialize(value);
|
|
140
|
+
}
|
|
141
|
+
function serialize(value) {
|
|
142
|
+
if (value === null)
|
|
143
|
+
return 'null';
|
|
144
|
+
if (value === undefined)
|
|
145
|
+
return 'undefined';
|
|
146
|
+
if (typeof value === 'number') {
|
|
147
|
+
return Number.isFinite(value) ? JSON.stringify(value) : 'null';
|
|
148
|
+
}
|
|
149
|
+
if (typeof value === 'string' || typeof value === 'boolean') {
|
|
150
|
+
return JSON.stringify(value);
|
|
151
|
+
}
|
|
152
|
+
if (Array.isArray(value)) {
|
|
153
|
+
return `[${value.map(serialize).join(',')}]`;
|
|
154
|
+
}
|
|
155
|
+
if (typeof value === 'object') {
|
|
156
|
+
const obj = value;
|
|
157
|
+
const keys = Object.keys(obj).sort();
|
|
158
|
+
const parts = [];
|
|
159
|
+
for (const k of keys) {
|
|
160
|
+
const v = obj[k];
|
|
161
|
+
if (v === undefined)
|
|
162
|
+
continue;
|
|
163
|
+
parts.push(`${JSON.stringify(k)}:${serialize(v)}`);
|
|
164
|
+
}
|
|
165
|
+
return `{${parts.join(',')}}`;
|
|
166
|
+
}
|
|
167
|
+
// Functions, symbols, bigints — fall back to a stable string form.
|
|
168
|
+
// Pure tool args should never contain these; the fallback is defensive.
|
|
169
|
+
return JSON.stringify(String(value));
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* FNV-1a 32-bit hash. Returns the lowercase hex string. Fast,
|
|
173
|
+
* dependency-free, and collision-tolerable at our cache sizes.
|
|
174
|
+
* Iterates the UTF-16 code units of the input; sufficient for our
|
|
175
|
+
* stably-stringified JSON payloads, which only contain ASCII control
|
|
176
|
+
* characters and JSON syntax tokens plus user-supplied string data.
|
|
177
|
+
*/
|
|
178
|
+
export function hashFnv1a32(input) {
|
|
179
|
+
let hash = 0x811c9dc5; // FNV offset basis (32-bit)
|
|
180
|
+
for (let i = 0; i < input.length; i += 1) {
|
|
181
|
+
hash ^= input.charCodeAt(i);
|
|
182
|
+
// 32-bit FNV prime multiplication via shifts (avoids precision loss).
|
|
183
|
+
hash = (hash + ((hash << 1) + (hash << 4) + (hash << 7) + (hash << 8) + (hash << 24))) >>> 0;
|
|
184
|
+
}
|
|
185
|
+
return hash.toString(16).padStart(8, '0');
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Lookup a handler by name without exposing a `get` method on the
|
|
189
|
+
* registry interface. Mirrors the helper in `tools.ts`; duplicated here
|
|
190
|
+
* to keep the memoization module independent of internal helpers.
|
|
191
|
+
*/
|
|
192
|
+
function findHandler(registry, name) {
|
|
193
|
+
if (!registry.has(name))
|
|
194
|
+
return undefined;
|
|
195
|
+
return registry.list().find((h) => h.name === name);
|
|
196
|
+
}
|
|
197
|
+
//# sourceMappingURL=dispatch-memoization.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"dispatch-memoization.js","sourceRoot":"","sources":["../src/dispatch-memoization.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AAEH,OAAO,EAAE,cAAc,EAAE,MAAM,EAAE,MAAM,YAAY,CAAC;AAsCpD;;;;;;GAMG;AACH,MAAM,UAAU,sBAAsB,CACpC,QAAsB,EACtB,OAAqB;IAErB,MAAM,aAAa,GAAuB,OAAO,EAAE,aAAa,IAAI,CAAC,WAAW,CAAC,CAAC;IAClF,MAAM,cAAc,GAAG,aAAa,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;IACzD,MAAM,gBAAgB,GAAG,aAAa,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;IAE7D,MAAM,UAAU,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;IAC5C,MAAM,KAAK,GAAG,IAAI,GAAG,EAAsB,CAAC;IAC5C,MAAM,QAAQ,GAAc,EAAE,IAAI,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC;IAEhE,OAAO;QACL,KAAK,CAAC,OAAO,CAAC,IAAc,EAAE,GAAgB;YAC5C,MAAM,OAAO,GAAG,WAAW,CAAC,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;YACjD,gEAAgE;YAChE,2DAA2D;YAC3D,2DAA2D;YAC3D,iBAAiB;YACjB,IAAI,CAAC,OAAO,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC;gBACjC,QAAQ,CAAC,QAAQ,IAAI,CAAC,CAAC;gBACvB,OAAO,UAAU,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;YACvC,CAAC;YAED,MAAM,GAAG,GAAG,aAAa,CAAC,IAAI,EAAE,GAAG,EAAE,gBAAgB,EAAE,cAAc,CAAC,CAAC;YACvE,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAC9B,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;gBACzB,QAAQ,CAAC,IAAI,IAAI,CAAC,CAAC;gBACnB,OAAO,MAAM,CAAC;YAChB,CAAC;YAED,gEAAgE;YAChE,0DAA0D;YAC1D,2DAA2D;YAC3D,8DAA8D;YAC9D,yDAAyD;YACzD,yDAAyD;YACzD,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;YACnD,QAAQ,CAAC,MAAM,IAAI,CAAC,CAAC;YACrB,IAAI,MAAM,CAAC,EAAE,EAAE,CAAC;gBACd,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;YACzB,CAAC;YACD,OAAO,MAAM,CAAC;QAChB,CAAC;QACD,KAAK;YACH,OAAO,EAAE,GAAG,QAAQ,EAAE,CAAC;QACzB,CAAC;KACF,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,SAAS,aAAa,CACpB,IAAc,EACd,GAAgB,EAChB,gBAAyB,EACzB,cAAuB;IAEvB,MAAM,OAAO,GAAG,WAAW,CAAC,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;IACxD,MAAM,KAAK,GAAG,gBAAgB,CAAC,CAAC,CAAC,WAAW,CAAC,eAAe,CAAC,cAAc,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAClG,MAAM,KAAK,GAAG,cAAc,CAAC,CAAC,CAAC,WAAW,CAAC,eAAe,CAAC,YAAY,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAC5F,OAAO,GAAG,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,IAAI,KAAK,EAAE,CAAC;AACrD,CAAC;AAED;;;;;;GAMG;AACH,SAAS,cAAc,CAAC,EAAc;IACpC,IAAI,CAAC,EAAE;QAAE,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;IACpC,OAAO;QACL,QAAQ,EAAE,EAAE,CAAC,QAAQ;QACrB,KAAK,EAAE,EAAE,CAAC,KAAK;QACf,IAAI,EAAE,EAAE,CAAC,IAAI;QACb,SAAS,EAAE,EAAE,CAAC,SAAS;KACxB,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,SAAS,YAAY,CAAC,EAAiB;IACrC,IAAI,CAAC,EAAE;QAAE,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;IACpC,OAAO;QACL,QAAQ,EAAE,EAAE,CAAC,QAAQ;QACrB,UAAU,EAAE,EAAE,CAAC,UAAU;QACzB,MAAM,EAAE,EAAE,CAAC,MAAM;QACjB,UAAU,EAAE,EAAE,CAAC,UAAU;QACzB,QAAQ,EAAE,EAAE,CAAC,QAAQ;QACrB,cAAc,EAAE,EAAE,CAAC,cAAc;KAClC,CAAC;AACJ,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,eAAe,CAAC,KAAc;IAC5C,OAAO,SAAS,CAAC,KAAK,CAAC,CAAC;AAC1B,CAAC;AAED,SAAS,SAAS,CAAC,KAAc;IAC/B,IAAI,KAAK,KAAK,IAAI;QAAE,OAAO,MAAM,CAAC;IAClC,IAAI,KAAK,KAAK,SAAS;QAAE,OAAO,WAAW,CAAC;IAC5C,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,OAAO,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IACjE,CAAC;IACD,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,OAAO,KAAK,KAAK,SAAS,EAAE,CAAC;QAC5D,OAAO,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;IAC/B,CAAC;IACD,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,KAAK,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;IAC/C,CAAC;IACD,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,MAAM,GAAG,GAAG,KAAgC,CAAC;QAC7C,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QACrC,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;YACrB,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;YACjB,IAAI,CAAC,KAAK,SAAS;gBAAE,SAAS;YAC9B,KAAK,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACrD,CAAC;QACD,OAAO,IAAI,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;IAChC,CAAC;IACD,mEAAmE;IACnE,wEAAwE;IACxE,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;AACvC,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,WAAW,CAAC,KAAa;IACvC,IAAI,IAAI,GAAG,UAAU,CAAC,CAAC,4BAA4B;IACnD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QACzC,IAAI,IAAI,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QAC5B,sEAAsE;QACtE,IAAI,GAAG,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;IAC/F,CAAC;IACD,OAAO,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;AAC5C,CAAC;AAED;;;;GAIG;AACH,SAAS,WAAW,CAAC,QAAsB,EAAE,IAAY;IACvD,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC;QAAE,OAAO,SAAS,CAAC;IAC1C,OAAO,QAAQ,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;AACtD,CAAC"}
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `compareMetrics` — the eval harness's A/B comparison report.
|
|
3
|
+
*
|
|
4
|
+
* Consumes two named `MetricsTable[]`s (typically a "baseline" and a
|
|
5
|
+
* "variant"), pairs them by `fixtureId`, and for each per-metric
|
|
6
|
+
* column emits a row containing the inputs (mean +/- spread for both
|
|
7
|
+
* sides), the delta (`variantMean - baselineMean`), the threshold
|
|
8
|
+
* (`max(baselineSpread, variantSpread)`), and a label.
|
|
9
|
+
*
|
|
10
|
+
* The label uses the implementation plan's no-effect rule:
|
|
11
|
+
*
|
|
12
|
+
* - If `Math.abs(delta) < threshold`, label `no-effect`. This is the
|
|
13
|
+
* central discipline: noisy trial-to-trial variance must not be
|
|
14
|
+
* reported as a winner.
|
|
15
|
+
* - Otherwise consult the metric's polarity (see `POLARITY` below):
|
|
16
|
+
* higher-is-better -> sign of `delta` picks the winner;
|
|
17
|
+
* lower-is-better -> sign of `delta` picks the loser;
|
|
18
|
+
* neutral -> emit `changed` (no winner, just a flag).
|
|
19
|
+
*
|
|
20
|
+
* Polarity is a static map declared in this module. Adding a new
|
|
21
|
+
* metric to the collector forces an entry here because the map is
|
|
22
|
+
* keyed by a string-literal union derived from `MetricsTable` fields.
|
|
23
|
+
*
|
|
24
|
+
* Two renderers ship with the comparator:
|
|
25
|
+
*
|
|
26
|
+
* - `renderMarkdown(report)` — a human-readable markdown table for
|
|
27
|
+
* piping to stdout from a CLI consumer.
|
|
28
|
+
* - `renderJson(report)` — `JSON.stringify(report, null, 2)`. Plain
|
|
29
|
+
* data; round-trips through `JSON.parse(JSON.stringify(report))`
|
|
30
|
+
* without loss.
|
|
31
|
+
*
|
|
32
|
+
* No statistical sophistication beyond the no-effect rule. Confidence
|
|
33
|
+
* intervals, cross-fixture aggregation, and multi-variant comparisons
|
|
34
|
+
* are deliberately deferred to follow-up branches.
|
|
35
|
+
*
|
|
36
|
+
* Browser-safe — no Node imports.
|
|
37
|
+
*
|
|
38
|
+
* Note on naming: metric names match the collector exactly. Do not
|
|
39
|
+
* rename. The CLI, the report, and any later UI all key on these
|
|
40
|
+
* identifiers, and renaming in one layer without the others silently
|
|
41
|
+
* breaks consumers.
|
|
42
|
+
*/
|
|
43
|
+
import type { AggregateStat, MetricsTable } from './metric-collector.js';
|
|
44
|
+
/**
|
|
45
|
+
* The set of metric names a comparison row can target. Each name is
|
|
46
|
+
* the dotted path from `AggregatedMetrics` down to an `AggregateStat`.
|
|
47
|
+
* `toolCallCount` is a nested object on the aggregate; the three
|
|
48
|
+
* children each get their own polarity entry.
|
|
49
|
+
*
|
|
50
|
+
* Keep this in lockstep with `AggregatedMetrics` in
|
|
51
|
+
* `./metric-collector.ts`. The compiler enforces exhaustiveness on
|
|
52
|
+
* `POLARITY` below.
|
|
53
|
+
*/
|
|
54
|
+
export type ComparisonMetricName = 'taskSuccessRate' | 'wallClockMs' | 'promptTokens' | 'completionTokens' | 'toolCallCount.total' | 'toolCallCount.reads' | 'toolCallCount.mutations' | 'turnCount' | 'peakContextWindowBytes' | 'truthfulnessViolationRate' | 'dispatchVsLlmRatio';
|
|
55
|
+
/** Direction in which "more" is better, worse, or neither. */
|
|
56
|
+
export type Polarity = 'higher-is-better' | 'lower-is-better' | 'neutral';
|
|
57
|
+
/**
|
|
58
|
+
* Static polarity table. Choices are documented per row; the
|
|
59
|
+
* neutral defaults are the metrics the implementation plan
|
|
60
|
+
* explicitly says are context-dependent.
|
|
61
|
+
*
|
|
62
|
+
* Adding a new metric to the collector requires adding an entry
|
|
63
|
+
* here. The `Record<ComparisonMetricName, Polarity>` shape keeps the
|
|
64
|
+
* compiler honest if `ComparisonMetricName` is extended.
|
|
65
|
+
*/
|
|
66
|
+
export declare const POLARITY: Record<ComparisonMetricName, Polarity>;
|
|
67
|
+
/** Label assigned to a per-metric row after applying the no-effect rule. */
|
|
68
|
+
export type ComparisonLabel = 'no-effect' | 'winner-baseline' | 'winner-variant' | 'changed';
|
|
69
|
+
/**
|
|
70
|
+
* A single per-fixture, per-metric row.
|
|
71
|
+
*
|
|
72
|
+
* `delta` is `variantMean - baselineMean`. Both means are passed
|
|
73
|
+
* through verbatim from the input aggregates and may be `undefined`
|
|
74
|
+
* (the collector returns `undefined` for metrics it could not
|
|
75
|
+
* compute). When either mean is `undefined`, the row's `label` is
|
|
76
|
+
* `no-effect` and `delta` / `threshold` are `undefined` — there is
|
|
77
|
+
* nothing to compare.
|
|
78
|
+
*/
|
|
79
|
+
export interface ComparisonRow {
|
|
80
|
+
fixtureId: string;
|
|
81
|
+
metric: ComparisonMetricName;
|
|
82
|
+
polarity: Polarity;
|
|
83
|
+
baseline: AggregateStat;
|
|
84
|
+
variant: AggregateStat;
|
|
85
|
+
/** `variantMean - baselineMean`. `undefined` when either mean is `undefined`. */
|
|
86
|
+
delta: number | undefined;
|
|
87
|
+
/** `max(baselineSpread, variantSpread)`. `undefined` when either spread is `undefined`. */
|
|
88
|
+
threshold: number | undefined;
|
|
89
|
+
label: ComparisonLabel;
|
|
90
|
+
}
|
|
91
|
+
/** Per-fixture grouping. Carries the comparison rows plus a coverage status. */
|
|
92
|
+
export interface ComparisonFixture {
|
|
93
|
+
fixtureId: string;
|
|
94
|
+
/**
|
|
95
|
+
* `both` when the fixture appears in both inputs;
|
|
96
|
+
* `baseline-only` / `variant-only` when one side is missing the
|
|
97
|
+
* fixture. The missing-side cases carry no per-metric rows.
|
|
98
|
+
*/
|
|
99
|
+
status: 'both' | 'baseline-only' | 'variant-only';
|
|
100
|
+
rows: ComparisonRow[];
|
|
101
|
+
}
|
|
102
|
+
/** The full report. Suitable for both renderers. */
|
|
103
|
+
export interface ComparisonReport {
|
|
104
|
+
/** Label for the left-hand side. Defaults to `'baseline'`. */
|
|
105
|
+
baselineName: string;
|
|
106
|
+
/** Label for the right-hand side. Defaults to `'variant'`. */
|
|
107
|
+
variantName: string;
|
|
108
|
+
/** One entry per fixture across both inputs, in baseline-first union order. */
|
|
109
|
+
fixtures: ComparisonFixture[];
|
|
110
|
+
}
|
|
111
|
+
/** Input to `compareMetrics`. */
|
|
112
|
+
export interface CompareMetricsInput {
|
|
113
|
+
baseline: readonly MetricsTable[];
|
|
114
|
+
variant: readonly MetricsTable[];
|
|
115
|
+
/** Optional label for the baseline column. Defaults to `'baseline'`. */
|
|
116
|
+
baselineName?: string;
|
|
117
|
+
/** Optional label for the variant column. Defaults to `'variant'`. */
|
|
118
|
+
variantName?: string;
|
|
119
|
+
/**
|
|
120
|
+
* Override entries in the static `POLARITY` map for this report.
|
|
121
|
+
* Useful when a specific experiment has a directional read on a
|
|
122
|
+
* normally-neutral metric (e.g. parallel dispatch expects
|
|
123
|
+
* `toolCallCount.reads` to stay flat — which is still neutral —
|
|
124
|
+
* but a memoization experiment might prefer lower `wallClockMs`
|
|
125
|
+
* exclusively without changing other directions; in practice the
|
|
126
|
+
* defaults are correct and this is rarely needed).
|
|
127
|
+
*/
|
|
128
|
+
polarityOverrides?: Partial<Record<ComparisonMetricName, Polarity>>;
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Build a `ComparisonReport` from two named metric tables.
|
|
132
|
+
*
|
|
133
|
+
* Fixtures present in both sides are paired; fixtures unique to one
|
|
134
|
+
* side surface with an explicit `status` and no rows. Within each
|
|
135
|
+
* paired fixture, every entry in `POLARITY` produces one row.
|
|
136
|
+
*
|
|
137
|
+
* Never throws on missing data — `undefined` means / spreads carry
|
|
138
|
+
* through to `undefined` deltas / thresholds and a `no-effect` label.
|
|
139
|
+
*/
|
|
140
|
+
export declare function compareMetrics(input: CompareMetricsInput): ComparisonReport;
|
|
141
|
+
/**
|
|
142
|
+
* Render a `ComparisonReport` as plain JSON.
|
|
143
|
+
*
|
|
144
|
+
* Implementation is `JSON.stringify(report, null, 2)`. The report is
|
|
145
|
+
* pure data: no functions, no `undefined` in places where the
|
|
146
|
+
* renderer cares (numeric fields surface as `null` after a round-trip
|
|
147
|
+
* but the consumer treats `null` and missing the same way).
|
|
148
|
+
*/
|
|
149
|
+
export declare function renderJson(report: ComparisonReport): string;
|
|
150
|
+
/**
|
|
151
|
+
* Render a `ComparisonReport` as a markdown document.
|
|
152
|
+
*
|
|
153
|
+
* One section per fixture. Each section has a header with the
|
|
154
|
+
* fixture id and its coverage status, then a table with seven
|
|
155
|
+
* columns: metric, baseline (mean +/- spread), variant (mean +/-
|
|
156
|
+
* spread), delta, threshold, polarity, label.
|
|
157
|
+
*
|
|
158
|
+
* Numeric formatting:
|
|
159
|
+
* - mean / spread use up to four significant digits;
|
|
160
|
+
* - `undefined` renders as `-`;
|
|
161
|
+
* - delta and threshold use the same formatter as mean.
|
|
162
|
+
*/
|
|
163
|
+
export declare function renderMarkdown(report: ComparisonReport): string;
|
|
164
|
+
//# sourceMappingURL=comparison-report.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"comparison-report.d.ts","sourceRoot":"","sources":["../../src/eval/comparison-report.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAyCG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AAIzE;;;;;;;;;GASG;AACH,MAAM,MAAM,oBAAoB,GAC5B,iBAAiB,GACjB,aAAa,GACb,cAAc,GACd,kBAAkB,GAClB,qBAAqB,GACrB,qBAAqB,GACrB,yBAAyB,GACzB,WAAW,GACX,wBAAwB,GACxB,2BAA2B,GAC3B,oBAAoB,CAAC;AAEzB,8DAA8D;AAC9D,MAAM,MAAM,QAAQ,GAAG,kBAAkB,GAAG,iBAAiB,GAAG,SAAS,CAAC;AAE1E;;;;;;;;GAQG;AACH,eAAO,MAAM,QAAQ,EAAE,MAAM,CAAC,oBAAoB,EAAE,QAAQ,CAqB3D,CAAC;AAIF,4EAA4E;AAC5E,MAAM,MAAM,eAAe,GAAG,WAAW,GAAG,iBAAiB,GAAG,gBAAgB,GAAG,SAAS,CAAC;AAE7F;;;;;;;;;GASG;AACH,MAAM,WAAW,aAAa;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,oBAAoB,CAAC;IAC7B,QAAQ,EAAE,QAAQ,CAAC;IACnB,QAAQ,EAAE,aAAa,CAAC;IACxB,OAAO,EAAE,aAAa,CAAC;IACvB,iFAAiF;IACjF,KAAK,EAAE,MAAM,GAAG,SAAS,CAAC;IAC1B,2FAA2F;IAC3F,SAAS,EAAE,MAAM,GAAG,SAAS,CAAC;IAC9B,KAAK,EAAE,eAAe,CAAC;CACxB;AAED,gFAAgF;AAChF,MAAM,WAAW,iBAAiB;IAChC,SAAS,EAAE,MAAM,CAAC;IAClB;;;;OAIG;IACH,MAAM,EAAE,MAAM,GAAG,eAAe,GAAG,cAAc,CAAC;IAClD,IAAI,EAAE,aAAa,EAAE,CAAC;CACvB;AAED,oDAAoD;AACpD,MAAM,WAAW,gBAAgB;IAC/B,8DAA8D;IAC9D,YAAY,EAAE,MAAM,CAAC;IACrB,8DAA8D;IAC9D,WAAW,EAAE,MAAM,CAAC;IACpB,+EAA+E;IAC/E,QAAQ,EAAE,iBAAiB,EAAE,CAAC;CAC/B;AAID,iCAAiC;AACjC,MAAM,WAAW,mBAAmB;IAClC,QAAQ,EAAE,SAAS,YAAY,EAAE,CAAC;IAClC,OAAO,EAAE,SAAS,YAAY,EAAE,CAAC;IACjC,wEAAwE;IACxE,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,sEAAsE;IACtE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;;;;;;;;OAQG;IACH,iBAAiB,CAAC,EAAE,OAAO,CAAC,MAAM,CAAC,oBAAoB,EAAE,QAAQ,CAAC,CAAC,CAAC;CACrE;AAED;;;;;;;;;GASG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE,mBAAmB,GAAG,gBAAgB,CA8C3E;AAgHD;;;;;;;GAOG;AACH,wBAAgB,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,MAAM,CAE3D;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,cAAc,CAAC,MAAM,EAAE,gBAAgB,GAAG,MAAM,CA6C/D"}
|