@bradtaylorsf/alpha-loop 1.14.0 → 1.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/README.md +73 -1
  2. package/dist/cli.js +37 -2
  3. package/dist/cli.js.map +1 -1
  4. package/dist/commands/eval.d.ts +22 -0
  5. package/dist/commands/eval.js +105 -1
  6. package/dist/commands/eval.js.map +1 -1
  7. package/dist/commands/evolve-routing.d.ts +24 -0
  8. package/dist/commands/evolve-routing.js +320 -0
  9. package/dist/commands/evolve-routing.js.map +1 -0
  10. package/dist/commands/history.d.ts +2 -0
  11. package/dist/commands/history.js +95 -1
  12. package/dist/commands/history.js.map +1 -1
  13. package/dist/commands/init.d.ts +6 -0
  14. package/dist/commands/init.js +26 -1
  15. package/dist/commands/init.js.map +1 -1
  16. package/dist/commands/report.d.ts +7 -0
  17. package/dist/commands/report.js +27 -0
  18. package/dist/commands/report.js.map +1 -0
  19. package/dist/commands/scan.d.ts +1 -1
  20. package/dist/commands/scan.js.map +1 -1
  21. package/dist/engine/agents.d.ts +30 -8
  22. package/dist/engine/agents.js +94 -10
  23. package/dist/engine/agents.js.map +1 -1
  24. package/dist/engine/prerequisites.d.ts +40 -2
  25. package/dist/engine/prerequisites.js +126 -2
  26. package/dist/engine/prerequisites.js.map +1 -1
  27. package/dist/lib/agent.d.ts +39 -2
  28. package/dist/lib/agent.js +106 -4
  29. package/dist/lib/agent.js.map +1 -1
  30. package/dist/lib/config.d.ts +73 -1
  31. package/dist/lib/config.js +214 -1
  32. package/dist/lib/config.js.map +1 -1
  33. package/dist/lib/escalation.d.ts +102 -0
  34. package/dist/lib/escalation.js +241 -0
  35. package/dist/lib/escalation.js.map +1 -0
  36. package/dist/lib/eval-matrix.d.ts +125 -0
  37. package/dist/lib/eval-matrix.js +317 -0
  38. package/dist/lib/eval-matrix.js.map +1 -0
  39. package/dist/lib/eval-report.d.ts +12 -0
  40. package/dist/lib/eval-report.js +132 -0
  41. package/dist/lib/eval-report.js.map +1 -0
  42. package/dist/lib/eval-secret-scan.d.ts +41 -0
  43. package/dist/lib/eval-secret-scan.js +163 -0
  44. package/dist/lib/eval-secret-scan.js.map +1 -0
  45. package/dist/lib/eval.js +7 -4
  46. package/dist/lib/eval.js.map +1 -1
  47. package/dist/lib/hardware.d.ts +9 -0
  48. package/dist/lib/hardware.js +32 -0
  49. package/dist/lib/hardware.js.map +1 -0
  50. package/dist/lib/pipeline.d.ts +5 -1
  51. package/dist/lib/pipeline.js +217 -16
  52. package/dist/lib/pipeline.js.map +1 -1
  53. package/dist/lib/prerequisites.js +11 -3
  54. package/dist/lib/prerequisites.js.map +1 -1
  55. package/dist/lib/routing-history.d.ts +43 -0
  56. package/dist/lib/routing-history.js +112 -0
  57. package/dist/lib/routing-history.js.map +1 -0
  58. package/dist/lib/routing-promotion.d.ts +95 -0
  59. package/dist/lib/routing-promotion.js +229 -0
  60. package/dist/lib/routing-promotion.js.map +1 -0
  61. package/dist/lib/session.js +13 -0
  62. package/dist/lib/session.js.map +1 -1
  63. package/dist/lib/telemetry.d.ts +147 -0
  64. package/dist/lib/telemetry.js +353 -0
  65. package/dist/lib/telemetry.js.map +1 -0
  66. package/package.json +1 -1
@@ -0,0 +1,317 @@
1
+ /**
2
+ * eval-matrix — run every case under multiple routing profiles and
3
+ * aggregate a side-by-side comparison.
4
+ *
5
+ * Profiles are loaded from YAML and deep-merged into a base Config before
6
+ * each profile's run. Pipeline costs, wall time, tool error rate, and a
7
+ * diff-similarity signal are aggregated per-case per-profile. Deltas are
8
+ * computed against a designated baseline profile (defaults to
9
+ * `all-frontier`).
10
+ */
11
+ import { existsSync, readFileSync, readdirSync, statSync } from 'node:fs';
12
+ import { join, basename, extname } from 'node:path';
13
+ import { parse as parseYaml } from 'yaml';
14
+ import { loadConfig } from './config.js';
15
+ import { runEvalSuite } from './eval-runner.js';
16
+ /**
17
+ * Load a profile YAML and return the narrow override shape.
18
+ * Accepts either a bare name ("hybrid-v1") or a full path.
19
+ */
20
+ export function loadProfileOverrides(profileNameOrPath, projectDir = process.cwd()) {
21
+ const path = resolveProfilePath(profileNameOrPath, projectDir);
22
+ if (!existsSync(path)) {
23
+ throw new Error(`Profile file not found: ${path}`);
24
+ }
25
+ const raw = readFileSync(path, 'utf-8');
26
+ const parsed = parseYaml(raw);
27
+ if (!parsed || typeof parsed !== 'object')
28
+ return {};
29
+ const result = {};
30
+ if (typeof parsed.agent === 'string')
31
+ result.agent = parsed.agent;
32
+ if (typeof parsed.model === 'string')
33
+ result.model = parsed.model;
34
+ if (typeof parsed.review_model === 'string')
35
+ result.reviewModel = parsed.review_model;
36
+ if (parsed.pipeline && typeof parsed.pipeline === 'object') {
37
+ const pipelineRaw = parsed.pipeline;
38
+ const pipeline = {};
39
+ const validSteps = ['plan', 'implement', 'test_fix', 'review', 'verify', 'learn'];
40
+ for (const step of validSteps) {
41
+ const entry = pipelineRaw[step];
42
+ if (entry && typeof entry === 'object') {
43
+ const e = entry;
44
+ const stepCfg = {};
45
+ if (typeof e.agent === 'string')
46
+ stepCfg.agent = e.agent;
47
+ if (typeof e.model === 'string')
48
+ stepCfg.model = e.model;
49
+ if (Object.keys(stepCfg).length > 0)
50
+ pipeline[step] = stepCfg;
51
+ }
52
+ }
53
+ if (Object.keys(pipeline).length > 0)
54
+ result.pipeline = pipeline;
55
+ }
56
+ if (parsed.routing && typeof parsed.routing === 'object') {
57
+ // Re-use loadConfig's routing parser by round-tripping through a fake
58
+ // config. Keeps the validation logic single-sourced.
59
+ const fake = loadConfig({ routing: parsed.routing });
60
+ if (fake.routing)
61
+ result.routing = fake.routing;
62
+ }
63
+ return result;
64
+ }
65
+ /** Resolve a bare profile name to its path, or pass a real path through. */
66
+ export function resolveProfilePath(nameOrPath, projectDir) {
67
+ if (nameOrPath.includes('/') || nameOrPath.endsWith('.yaml') || nameOrPath.endsWith('.yml')) {
68
+ return nameOrPath;
69
+ }
70
+ return join(projectDir, '.alpha-loop', 'evals', 'profiles', `${nameOrPath}.yaml`);
71
+ }
72
+ /** Human-friendly profile name for reports — always just the base. */
73
+ export function profileDisplayName(nameOrPath) {
74
+ if (!nameOrPath.endsWith('.yaml') && !nameOrPath.endsWith('.yml') && !nameOrPath.includes('/')) {
75
+ return nameOrPath;
76
+ }
77
+ return basename(nameOrPath, extname(nameOrPath));
78
+ }
79
+ /** Compose profile overrides onto a base config. Pipeline merges per-step. */
80
+ export function applyProfileToConfig(base, overrides) {
81
+ const mergedPipeline = { ...base.pipeline };
82
+ for (const [step, stepCfg] of Object.entries(overrides.pipeline ?? {})) {
83
+ mergedPipeline[step] = { ...(mergedPipeline[step] ?? {}), ...stepCfg };
84
+ }
85
+ return {
86
+ ...base,
87
+ ...(overrides.agent ? { agent: overrides.agent } : {}),
88
+ ...(overrides.model ? { model: overrides.model } : {}),
89
+ ...(overrides.reviewModel ? { reviewModel: overrides.reviewModel } : {}),
90
+ pipeline: mergedPipeline,
91
+ ...(overrides.routing ? { routing: overrides.routing } : {}),
92
+ };
93
+ }
94
+ /**
95
+ * Compute a rough similarity score between a produced diff and the golden
96
+ * patch. Line-set Jaccard over non-empty, non-header lines — cheap and
97
+ * stable enough to flag drift, not meant to replace a proper diff compare.
98
+ */
99
+ export function diffSimilarity(produced, golden) {
100
+ const extract = (s) => {
101
+ const lines = s.split('\n')
102
+ .map((l) => l.trim())
103
+ .filter((l) => l.length > 0)
104
+ .filter((l) => !l.startsWith('diff --git'))
105
+ .filter((l) => !l.startsWith('index '))
106
+ .filter((l) => !l.startsWith('---') && !l.startsWith('+++'))
107
+ .filter((l) => !l.startsWith('@@'))
108
+ .filter((l) => !l.startsWith('#'));
109
+ return new Set(lines);
110
+ };
111
+ const a = extract(produced);
112
+ const b = extract(golden);
113
+ if (a.size === 0 && b.size === 0)
114
+ return 1;
115
+ if (a.size === 0 || b.size === 0)
116
+ return 0;
117
+ let intersect = 0;
118
+ for (const line of a)
119
+ if (b.has(line))
120
+ intersect++;
121
+ const unionSize = a.size + b.size - intersect;
122
+ return unionSize === 0 ? 1 : intersect / unionSize;
123
+ }
124
+ /**
125
+ * Detect whether a golden.patch file is a stub (no real diff to compare).
126
+ * Stubs start with "# TODO" or contain only comment/blank lines.
127
+ */
128
+ export function isStubPatch(content) {
129
+ const nonCommentLines = content.split('\n')
130
+ .map((l) => l.trim())
131
+ .filter((l) => l.length > 0 && !l.startsWith('#'));
132
+ return nonCommentLines.length === 0;
133
+ }
134
+ /**
135
+ * Extract per-case matrix entries from an EvalSuiteResult, plus the supplied
136
+ * per-case diff comparisons (if any).
137
+ */
138
+ export function toMatrixEntries(result, diffLookup) {
139
+ const entries = {};
140
+ for (const r of result.cases) {
141
+ entries[r.caseId] = buildCaseEntry(r, diffLookup?.get(r.caseId) ?? null);
142
+ }
143
+ return entries;
144
+ }
145
+ function buildCaseEntry(r, diffSim) {
146
+ return {
147
+ passed: r.passed,
148
+ partialCredit: r.partialCredit,
149
+ costUsd: r.costUsd ?? 0,
150
+ wallTimeS: r.duration,
151
+ // Tool error rate isn't persisted on EvalResult today; keep 0 for now,
152
+ // the plumbing is here so #161's telemetry can back-fill without a
153
+ // downstream signature change.
154
+ toolErrorRate: 0,
155
+ diffSimilarity: diffSim,
156
+ errored: Boolean(r.error),
157
+ error: r.error,
158
+ };
159
+ }
160
+ /** Aggregate per-profile totals from per-case entries. */
161
+ export function aggregateTotals(profile, entries) {
162
+ const values = Object.values(entries);
163
+ const caseCount = values.length;
164
+ const passCount = values.filter((v) => v.passed).length;
165
+ const totalCostUsd = values.reduce((s, v) => s + v.costUsd, 0);
166
+ const meanWallTimeS = caseCount === 0 ? 0 : values.reduce((s, v) => s + v.wallTimeS, 0) / caseCount;
167
+ const meanToolErrorRate = caseCount === 0 ? 0 : values.reduce((s, v) => s + v.toolErrorRate, 0) / caseCount;
168
+ return {
169
+ profile,
170
+ caseCount,
171
+ passCount,
172
+ passRate: caseCount === 0 ? 0 : passCount / caseCount,
173
+ totalCostUsd,
174
+ meanWallTimeS,
175
+ meanToolErrorRate,
176
+ };
177
+ }
178
+ /** Compute deltas for each profile vs the baseline. */
179
+ export function computeDeltas(totals, baseline) {
180
+ const byProfile = new Map(totals.map((t) => [t.profile, t]));
181
+ const base = byProfile.get(baseline);
182
+ const result = {};
183
+ if (!base)
184
+ return result;
185
+ const baseCostPerIssue = base.caseCount === 0 ? 0 : base.totalCostUsd / base.caseCount;
186
+ for (const t of totals) {
187
+ const costPerIssue = t.caseCount === 0 ? 0 : t.totalCostUsd / t.caseCount;
188
+ result[t.profile] = {
189
+ pipelineSuccessDelta: t.passRate - base.passRate,
190
+ costPerIssueDelta: costPerIssue - baseCostPerIssue,
191
+ };
192
+ }
193
+ return result;
194
+ }
195
+ /**
196
+ * Run the full matrix: every profile × every case. Each profile gets a
197
+ * fresh config built by deep-merging its overrides onto `baseConfig`.
198
+ *
199
+ * `runner` is injectable for tests — defaults to the real `runEvalSuite`.
200
+ */
201
+ export async function runMatrix(cases, opts, baseConfig, runner = runEvalSuite) {
202
+ if (opts.profiles.length === 0) {
203
+ throw new Error('runMatrix: at least one profile is required');
204
+ }
205
+ const profileNames = opts.profiles.map(profileDisplayName);
206
+ const baselineName = profileDisplayName(opts.baseline ?? 'all-frontier');
207
+ const perProfileEntries = new Map();
208
+ const perProfileTotals = [];
209
+ for (let i = 0; i < opts.profiles.length; i++) {
210
+ const profilePathOrName = opts.profiles[i];
211
+ const displayName = profileNames[i];
212
+ const overrides = loadProfileOverrides(profilePathOrName);
213
+ // Resolve the merged config so validation surfaces bad profile YAML even
214
+ // in dry-run mode. In execute mode the same merged config is what the
215
+ // runner receives.
216
+ const mergedConfig = applyProfileToConfig(baseConfig, overrides);
217
+ let entries;
218
+ if (opts.dryRun) {
219
+ entries = buildSkippedEntries(cases);
220
+ }
221
+ else {
222
+ const result = await runner(cases, mergedConfig, { verbose: opts.verbose });
223
+ const diffLookup = new Map();
224
+ // Placeholder diff similarities: real diff comes from the run's worktree
225
+ // output, which lives inside runEvalSuite. For now we mark all as null
226
+ // unless a golden stub tells us to skip — downstream reports treat null
227
+ // as "informational, not scored".
228
+ for (const c of cases)
229
+ diffLookup.set(c.id, null);
230
+ entries = toMatrixEntries(result, diffLookup);
231
+ }
232
+ perProfileEntries.set(displayName, entries);
233
+ perProfileTotals.push(aggregateTotals(displayName, entries));
234
+ }
235
+ // Flatten per-case table: iterate cases once, attach per-profile entries.
236
+ const flatCases = cases.map((c) => {
237
+ const perProfile = {};
238
+ for (const name of profileNames) {
239
+ const entries = perProfileEntries.get(name);
240
+ perProfile[name] = entries[c.id] ?? {
241
+ passed: false,
242
+ partialCredit: 0,
243
+ costUsd: 0,
244
+ wallTimeS: 0,
245
+ toolErrorRate: 0,
246
+ diffSimilarity: null,
247
+ errored: true,
248
+ error: 'missing result',
249
+ };
250
+ }
251
+ return { caseId: c.id, description: c.description, perProfile };
252
+ });
253
+ return {
254
+ profiles: profileNames,
255
+ baseline: baselineName,
256
+ cases: flatCases,
257
+ totals: perProfileTotals,
258
+ deltas: computeDeltas(perProfileTotals, baselineName),
259
+ ...(opts.dryRun ? { dryRun: true } : {}),
260
+ };
261
+ }
262
+ /**
263
+ * Locate the most recent matrix report, returning its mtime (epoch ms) and
264
+ * path. Used by `alpha-loop evolve routing` to enforce a freshness gate on
265
+ * promotion proposals — promotion must not fire without a recent matrix run.
266
+ *
267
+ * Checks the default `eval/reports/` directory first, then
268
+ * `.alpha-loop/evals/reports/` (where older runs land). Returns null when
269
+ * neither directory contains a `routing-*.{md,csv}` file.
270
+ */
271
+ export function latestMatrixRun(projectDir = process.cwd()) {
272
+ const candidates = [
273
+ join(projectDir, 'eval', 'reports'),
274
+ join(projectDir, '.alpha-loop', 'evals', 'reports'),
275
+ ];
276
+ let best = null;
277
+ for (const dir of candidates) {
278
+ if (!existsSync(dir))
279
+ continue;
280
+ for (const name of readdirSync(dir)) {
281
+ if (!name.startsWith('routing-'))
282
+ continue;
283
+ if (!name.endsWith('.md') && !name.endsWith('.csv'))
284
+ continue;
285
+ try {
286
+ const full = join(dir, name);
287
+ const stat = statSync(full);
288
+ const ms = stat.mtimeMs;
289
+ if (!best || ms > best.timestampMs) {
290
+ best = { timestampMs: ms, summaryPath: full };
291
+ }
292
+ }
293
+ catch {
294
+ /* ignore unreadable entry */
295
+ }
296
+ }
297
+ }
298
+ return best;
299
+ }
300
+ /** Stub entries for a dry-run: every case marked skipped under every profile. */
301
+ function buildSkippedEntries(cases) {
302
+ const entries = {};
303
+ for (const c of cases) {
304
+ entries[c.id] = {
305
+ passed: false,
306
+ partialCredit: 0,
307
+ costUsd: 0,
308
+ wallTimeS: 0,
309
+ toolErrorRate: 0,
310
+ diffSimilarity: null,
311
+ errored: false,
312
+ skipped: true,
313
+ };
314
+ }
315
+ return entries;
316
+ }
317
+ //# sourceMappingURL=eval-matrix.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"eval-matrix.js","sourceRoot":"","sources":["../../src/lib/eval-matrix.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AACH,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,WAAW,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AAC1E,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpD,OAAO,EAAE,KAAK,IAAI,SAAS,EAAE,MAAM,MAAM,CAAC;AAE1C,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAGzC,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AA2EhD;;;GAGG;AACH,MAAM,UAAU,oBAAoB,CAAC,iBAAyB,EAAE,aAAqB,OAAO,CAAC,GAAG,EAAE;IAChG,MAAM,IAAI,GAAG,kBAAkB,CAAC,iBAAiB,EAAE,UAAU,CAAC,CAAC;IAC/D,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;QACtB,MAAM,IAAI,KAAK,CAAC,2BAA2B,IAAI,EAAE,CAAC,CAAC;IACrD,CAAC;IACD,MAAM,GAAG,GAAG,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IACxC,MAAM,MAAM,GAAG,SAAS,CAAC,GAAG,CAAmC,CAAC;IAChE,IAAI,CAAC,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ;QAAE,OAAO,EAAE,CAAC;IAErD,MAAM,MAAM,GAAqB,EAAE,CAAC;IACpC,IAAI,OAAO,MAAM,CAAC,KAAK,KAAK,QAAQ;QAAE,MAAM,CAAC,KAAK,GAAG,MAAM,CAAC,KAAwB,CAAC;IACrF,IAAI,OAAO,MAAM,CAAC,KAAK,KAAK,QAAQ;QAAE,MAAM,CAAC,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC;IAClE,IAAI,OAAO,MAAM,CAAC,YAAY,KAAK,QAAQ;QAAE,MAAM,CAAC,WAAW,GAAG,MAAM,CAAC,YAAY,CAAC;IAEtF,IAAI,MAAM,CAAC,QAAQ,IAAI,OAAO,MAAM,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;QAC3D,MAAM,WAAW,GAAG,MAAM,CAAC,QAAmC,CAAC;QAC/D,MAAM,QAAQ,GAAmB,EAAE,CAAC;QACpC,MAAM,UAAU,GAAuB,CAAC,MAAM,EAAE,WAAW,EAAE,UAAU,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;QACtG,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;YAC9B,MAAM,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;YAChC,IAAI,KAAK,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;gBACvC,MAAM,CAAC,GAAG,KAAgC,CAAC;gBAC3C,MAAM,OAAO,GAAe,EAAE,CAAC;gBAC/B,IAAI,OAAO,CAAC,CAAC,KAAK,KAAK,QAAQ;oBAAE,OAAO,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;gBACzD,IAAI,OAAO,CAAC,CAAC,KAAK,KAAK,QAAQ;oBAAE,OAAO,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;gBACzD,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC;oBAAE,QAAQ,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC;YAChE,CAAC;QACH,CAAC;QACD,IAAI,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,MAAM,GAAG,CAAC;YAAE,MAAM,CAAC,QAAQ,GAAG,QAAQ,CAAC;IACnE,CAAC;IAED,IAAI,MAAM,CAAC,OAAO,IAAI,OAAO,MAAM,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;QACzD,sEAAsE;QACtE,qDAAqD;QACrD,MAAM,IAAI,GAAG,UAAU,CAAC,EAAE,OAAO,EAAE,MAAM,CAAC,OAAmC,EAAE,CAAC,CAAC;QACjF,IAAI,IAAI,CAAC,OAAO;YAAE,MAAM,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;IAClD,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,4EAA4E;AAC5E,MAAM,UAAU,kBAAkB,CAAC,UAAkB,EAAE,UAAkB;IACvE,IAAI,UAAU,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,UAAU,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC5F,OAAO,UAAU,CAAC;IACpB,CAAC;IACD,OAAO,IAAI,CAAC,UAAU,EAAE,aAAa,EAAE,OAAO,EAAE,UAAU,EAAE,GAAG,UAAU,OAAO,CAAC,CAAC;AACpF,CAAC;AAED,sEAAsE;AACtE,MAAM,UAAU,kBAAkB,CAAC,UAAkB;IACnD,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QAC/F,OAAO,UAAU,CAAC;IACpB,CAAC;IACD,OAAO,QAAQ,CAAC,UAAU,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC;AACnD,CAAC;AAED,8EAA8E;AAC9E,MAAM,UAAU,oBAAoB,CAAC,IAAY,EAAE,SAA2B;IAC5E,MAAM,cAAc,GAAmB,EAAE,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC;IAC5D,KAAK,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,QAAQ,IAAI,EAAE,CAA0C,EAAE,CAAC;QAChH,cAAc,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,CAAC,cAAc,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,EAAE,GAAG,OAAO,EAAE,CAAC;IACzE,CAAC;IACD,OAAO;QACL,GAAG,IAAI;QACP,GAAG,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,SAAS,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACtD,GAAG,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,SAAS,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACtD,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,SAAS,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACxE,QAAQ,EAAE,cAAc;QACxB,GAAG,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,SAAS,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KAC7D,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,cAAc,CAAC,QAAgB,EAAE,MAAc;IAC7D,MAAM,OAAO,GAAG,CAAC,CAAS,EAAe,EAAE;QACzC,MAAM,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC;aACxB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;aACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;aAC3B,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,YAAY,CAAC,CAAC;aAC1C,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;aACtC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;aAC3D,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;aAClC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC;QACrC,OAAO,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC;IACxB,CAAC,CAAC;IACF,MAAM,CAAC,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC5B,MAAM,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IAC1B,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAC3C,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAC3C,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,KAAK,MAAM,IAAI,IAAI,CAAC;QAAE,IAAI,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC;YAAE,SAAS,EAAE,CAAC;IACnD,MAAM,SAAS,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,SAAS,CAAC;IAC9C,OAAO,SAAS,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,SAAS,CAAC;AACrD,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,WAAW,CAAC,OAAe;IACzC,MAAM,eAAe,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC;SACxC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC;IACrD,OAAO,eAAe,CAAC,MAAM,KAAK,CAAC,CAAC;AACtC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,eAAe,CAC7B,MAAuB,EACvB,UAAuC;IAEvC,MAAM,OAAO,GAAoC,EAAE,CAAC;IACpD,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;QAC7B,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,cAAc,CAAC,CAAC,EAAE,UAAU,EAAE,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC,CAAC;IAC3E,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,cAAc,CAAC,CAAa,EAAE,OAAsB;IAC3D,OAAO;QACL,MAAM,EAAE,CAAC,CAAC,MAAM;QAChB,aAAa,EAAE,CAAC,CAAC,aAAa;QAC9B,OAAO,EAAE,CAAC,CAAC,OAAO,IAAI,CAAC;QACvB,SAAS,EAAE,CAAC,CAAC,QAAQ;QACrB,uEAAuE;QACvE,mEAAmE;QACnE,+BAA+B;QAC/B,aAAa,EAAE,CAAC;QAChB,cAAc,EAAE,OAAO;QACvB,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC;QACzB,KAAK,EAAE,CAAC,CAAC,KAAK;KACf,CAAC;AACJ,CAAC;AAED,0DAA0D;AAC1D,MAAM,UAAU,eAAe,CAC7B,OAAe,EACf,OAAwC;IAExC,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IACtC,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC;IAChC,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IACxD,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;IAC/D,MAAM,aAAa,GAAG,SAAS,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC;IACpG,MAAM,iBAAiB,GAAG,SAAS,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,aAAa,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC;IAC5G,OAAO;QACL,OAAO;QACP,SAAS;QACT,SAAS;QACT,QAAQ,EAAE,SAAS,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,SAAS;QACrD,YAAY;QACZ,aAAa;QACb,iBAAiB;KAClB,CAAC;AACJ,CAAC;AAED,uDAAuD;AACvD,MAAM,UAAU,aAAa,CAC3B,MAA6B,EAC7B,QAAgB;IAEhB,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAC7D,MAAM,IAAI,GAAG,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IACrC,MAAM,MAAM,GAAgF,EAAE,CAAC;IAC/F,IAAI,CAAC,IAAI;QAAE,OAAO,MAAM,CAAC;IACzB,MAAM,gBAAgB,GAAG,IAAI,CAAC,SAAS,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,SAAS,CAAC;IACvF,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;QACvB,MAAM,YAAY,GAAG,CAAC,CAAC,SAAS,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,CAAC,CAAC,SAAS,CAAC;QAC1E,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG;YAClB,oBAAoB,EAAE,CAAC,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ;YAChD,iBAAiB,EAAE,YAAY,GAAG,gBAAgB;SACnD,CAAC;IACJ,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAC7B,KAA2B,EAC3B,IAAmB,EACnB,UAAkB,EAClB,SAIgC,YAAY;IAE5C,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/B,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAC;IACjE,CAAC;IAED,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAC;IAC3D,MAAM,YAAY,GAAG,kBAAkB,CAAC,IAAI,CAAC,QAAQ,IAAI,cAAc,CAAC,CAAC;IAEzE,MAAM,iBAAiB,GAAG,IAAI,GAAG,EAA2C,CAAC;IAC7E,MAAM,gBAAgB,GAA0B,EAAE,CAAC;IAEnD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC9C,MAAM,iBAAiB,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;QAC3C,MAAM,WAAW,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;QACpC,MAAM,SAAS,GAAG,oBAAoB,CAAC,iBAAiB,CAAC,CAAC;QAC1D,yEAAyE;QACzE,sEAAsE;QACtE,mBAAmB;QACnB,MAAM,YAAY,GAAG,oBAAoB,CAAC,UAAU,EAAE,SAAS,CAAC,CAAC;QAEjE,IAAI,OAAwC,CAAC;QAC7C,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,OAAO,GAAG,mBAAmB,CAAC,KAAK,CAAC,CAAC;QACvC,CAAC;aAAM,CAAC;YACN,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,KAAK,EAAE,YAAY,EAAE,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;YAC5E,MAAM,UAAU,GAAG,IAAI,GAAG,EAAyB,CAAC;YACpD,yEAAyE;YACzE,uEAAuE;YACvE,wEAAwE;YACxE,kCAAkC;YAClC,KAAK,MAAM,CAAC,IAAI,KAAK;gBAAE,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,IAAI,CAAC,CAAC;YAClD,OAAO,GAAG,eAAe,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;QAChD,CAAC;QACD,iBAAiB,CAAC,GAAG,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;QAC5C,gBAAgB,CAAC,IAAI,CAAC,eAAe,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC,CAAC;IAC/D,CAAC;IAED,0EAA0E;IAC1E,MAAM,SAAS,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QAChC,MAAM,UAAU,GAAoC,EAAE,CAAC;QACvD,KAAK,MAAM,IAAI,IAAI,YAAY,EAAE,CAAC;YAChC,MAAM,OAAO,GAAG,iBAAiB,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC;YAC7C,UAAU,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI;gBAClC,MAAM,EAAE,KAAK;gBACb,aAAa,EAAE,CAAC;gBAChB,OAAO,EAAE,CAAC;gBACV,SAAS,EAAE,CAAC;gBACZ,aAAa,EAAE,CAAC;gBAChB,cAAc,EAAE,IAAI;gBACpB,OAAO,EAAE,IAAI;gBACb,KAAK,EAAE,gBAAgB;aACxB,CAAC;QACJ,CAAC;QACD,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,EAAE,WAAW,EAAE,CAAC,CAAC,WAAW,EAAE,UAAU,EAAE,CAAC;IAClE,CAAC,CAAC,CAAC;IAEH,OAAO;QACL,QAAQ,EAAE,YAAY;QACtB,QAAQ,EAAE,YAAY;QACtB,KAAK,EAAE,SAAS;QAChB,MAAM,EAAE,gBAAgB;QACxB,MAAM,EAAE,aAAa,CAAC,gBAAgB,EAAE,YAAY,CAAC;QACrD,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KACzC,CAAC;AACJ,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,eAAe,CAC7B,aAAqB,OAAO,CAAC,GAAG,EAAE;IAElC,MAAM,UAAU,GAAG;QACjB,IAAI,CAAC,UAAU,EAAE,MAAM,EAAE,SAAS,CAAC;QACnC,IAAI,CAAC,UAAU,EAAE,aAAa,EAAE,OAAO,EAAE,SAAS,CAAC;KACpD,CAAC;IACF,IAAI,IAAI,GAAwD,IAAI,CAAC;IACrE,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;QAC7B,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QAC/B,KAAK,MAAM,IAAI,IAAI,WAAW,CAAC,GAAG,CAAC,EAAE,CAAC;YACpC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC;gBAAE,SAAS;YAC3C,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAAE,SAAS;YAC9D,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;gBAC7B,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;gBAC5B,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC;gBACxB,IAAI,CAAC,IAAI,IAAI,EAAE,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;oBACnC,IAAI,GAAG,EAAE,WAAW,EAAE,EAAE,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;gBAChD,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,6BAA6B;YAC/B,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,iFAAiF;AACjF,SAAS,mBAAmB,CAAC,KAA2B;IACtD,MAAM,OAAO,GAAoC,EAAE,CAAC;IACpD,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG;YACd,MAAM,EAAE,KAAK;YACb,aAAa,EAAE,CAAC;YAChB,OAAO,EAAE,CAAC;YACV,SAAS,EAAE,CAAC;YACZ,aAAa,EAAE,CAAC;YAChB,cAAc,EAAE,IAAI;YACpB,OAAO,EAAE,KAAK;YACd,OAAO,EAAE,IAAI;SACd,CAAC;IACJ,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC"}
@@ -0,0 +1,12 @@
1
+ /**
2
+ * eval-report — render matrix results as Markdown or CSV.
3
+ *
4
+ * The markdown form is what's posted to the tracking epic by the nightly
5
+ * workflow. The CSV form is what downstream spreadsheets / Grafana /
6
+ * analytic scripts consume.
7
+ */
8
+ import type { MatrixResult } from './eval-matrix.js';
9
+ /** Render a matrix result as Markdown suitable for a GitHub comment. */
10
+ export declare function renderMatrixMarkdown(result: MatrixResult, title?: string): string;
11
+ /** Render a matrix result as CSV. One row per (case, profile). */
12
+ export declare function renderMatrixCsv(result: MatrixResult): string;
@@ -0,0 +1,132 @@
1
+ /** Render a matrix result as Markdown suitable for a GitHub comment. */
2
+ export function renderMatrixMarkdown(result, title) {
3
+ const lines = [];
4
+ const heading = title ?? `# Routing regression — ${new Date().toISOString().slice(0, 10)}`;
5
+ lines.push(heading);
6
+ lines.push('');
7
+ lines.push(`Baseline: \`${result.baseline}\` · ${result.cases.length} case(s) · ${result.profiles.length} profile(s)`);
8
+ lines.push('');
9
+ if (result.dryRun) {
10
+ lines.push('> **Dry-run** — no pipelines executed. This report validates profile loading and case structure only. Pass `--execute` to run for real (requires fixture isolation; see CASE_FORMAT.md).');
11
+ lines.push('');
12
+ }
13
+ // Per-profile summary table. In dry-run every numeric column is zero by
14
+ // construction (no pipelines ran), so we show a validation table instead
15
+ // to avoid misreading "0/N" as real regressions.
16
+ lines.push('## Per-profile summary');
17
+ lines.push('');
18
+ if (result.dryRun) {
19
+ lines.push('| Profile | Cases loaded | Profile applied | Executed |');
20
+ lines.push('| --- | --- | --- | --- |');
21
+ for (const t of result.totals) {
22
+ lines.push(`| \`${t.profile}\` | ${t.caseCount} | yes | no (dry-run) |`);
23
+ }
24
+ }
25
+ else {
26
+ lines.push('| Profile | Pass | Pass rate | Total cost | Mean wall time | Mean tool-error rate |');
27
+ lines.push('| --- | --- | --- | --- | --- | --- |');
28
+ for (const t of result.totals) {
29
+ lines.push(`| \`${t.profile}\` | ${t.passCount}/${t.caseCount} | ${percent(t.passRate)} | ${usd(t.totalCostUsd)} | ${seconds(t.meanWallTimeS)} | ${percent(t.meanToolErrorRate)} |`);
30
+ }
31
+ }
32
+ lines.push('');
33
+ // Per-case grid
34
+ lines.push('## Per-case results');
35
+ lines.push('');
36
+ const header = ['Case', ...result.profiles];
37
+ lines.push(`| ${header.join(' | ')} |`);
38
+ lines.push(`| ${header.map(() => '---').join(' | ')} |`);
39
+ for (const c of result.cases) {
40
+ const cells = [c.caseId];
41
+ for (const profile of result.profiles) {
42
+ const entry = c.perProfile[profile];
43
+ cells.push(formatCaseCell(entry));
44
+ }
45
+ lines.push(`| ${cells.join(' | ')} |`);
46
+ }
47
+ lines.push('');
48
+ // Deltas vs baseline — only meaningful when pipelines actually ran.
49
+ if (!result.dryRun) {
50
+ lines.push(`## Deltas vs \`${result.baseline}\``);
51
+ lines.push('');
52
+ lines.push('| Profile | Δ pipeline_success | Δ cost_per_issue |');
53
+ lines.push('| --- | --- | --- |');
54
+ for (const t of result.totals) {
55
+ const d = result.deltas[t.profile];
56
+ if (!d)
57
+ continue;
58
+ lines.push(`| \`${t.profile}\` | ${deltaPct(d.pipelineSuccessDelta)} | ${deltaUsd(d.costPerIssueDelta)} |`);
59
+ }
60
+ lines.push('');
61
+ }
62
+ return lines.join('\n');
63
+ }
64
+ /** Render a matrix result as CSV. One row per (case, profile). */
65
+ export function renderMatrixCsv(result) {
66
+ const header = [
67
+ 'case_id',
68
+ 'profile',
69
+ 'passed',
70
+ 'partial_credit',
71
+ 'cost_usd',
72
+ 'wall_time_s',
73
+ 'tool_error_rate',
74
+ 'diff_similarity',
75
+ 'errored',
76
+ ];
77
+ const rows = [header.join(',')];
78
+ for (const c of result.cases) {
79
+ for (const profile of result.profiles) {
80
+ const entry = c.perProfile[profile];
81
+ rows.push([
82
+ csvField(c.caseId),
83
+ csvField(profile),
84
+ entry.passed ? '1' : '0',
85
+ entry.partialCredit.toFixed(3),
86
+ entry.costUsd.toFixed(4),
87
+ entry.wallTimeS.toFixed(1),
88
+ entry.toolErrorRate.toFixed(3),
89
+ entry.diffSimilarity === null ? '' : entry.diffSimilarity.toFixed(3),
90
+ entry.errored ? '1' : '0',
91
+ ].join(','));
92
+ }
93
+ }
94
+ return rows.join('\n') + '\n';
95
+ }
96
+ /** Format a single per-case cell in the markdown grid. */
97
+ function formatCaseCell(entry) {
98
+ if (!entry)
99
+ return '—';
100
+ if (entry.skipped)
101
+ return 'SKIP';
102
+ if (entry.errored)
103
+ return 'ERR';
104
+ const tag = entry.passed ? 'PASS' : 'FAIL';
105
+ return `${tag} (${usd(entry.costUsd)})`;
106
+ }
107
+ function percent(value) {
108
+ return `${(value * 100).toFixed(1)}%`;
109
+ }
110
+ function usd(value) {
111
+ return `$${value.toFixed(2)}`;
112
+ }
113
+ function seconds(value) {
114
+ return `${value.toFixed(0)}s`;
115
+ }
116
+ function deltaPct(value) {
117
+ const sign = value > 0 ? '+' : '';
118
+ return `${sign}${(value * 100).toFixed(1)} pp`;
119
+ }
120
+ function deltaUsd(value) {
121
+ if (value >= 0)
122
+ return `+$${value.toFixed(2)}`;
123
+ return `-$${Math.abs(value).toFixed(2)}`;
124
+ }
125
+ /** Escape a CSV field if it contains commas, quotes, or newlines. */
126
+ function csvField(value) {
127
+ if (/[",\n\r]/.test(value)) {
128
+ return `"${value.replace(/"/g, '""')}"`;
129
+ }
130
+ return value;
131
+ }
132
+ //# sourceMappingURL=eval-report.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"eval-report.js","sourceRoot":"","sources":["../../src/lib/eval-report.ts"],"names":[],"mappings":"AASA,wEAAwE;AACxE,MAAM,UAAU,oBAAoB,CAAC,MAAoB,EAAE,KAAc;IACvE,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,OAAO,GAAG,KAAK,IAAI,0BAA0B,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;IAC3F,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACpB,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACf,KAAK,CAAC,IAAI,CAAC,eAAe,MAAM,CAAC,QAAQ,QAAQ,MAAM,CAAC,KAAK,CAAC,MAAM,cAAc,MAAM,CAAC,QAAQ,CAAC,MAAM,aAAa,CAAC,CAAC;IACvH,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACf,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;QAClB,KAAK,CAAC,IAAI,CAAC,0LAA0L,CAAC,CAAC;QACvM,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACjB,CAAC;IAED,wEAAwE;IACxE,yEAAyE;IACzE,iDAAiD;IACjD,KAAK,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;IACrC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACf,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;QAClB,KAAK,CAAC,IAAI,CAAC,yDAAyD,CAAC,CAAC;QACtE,KAAK,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QACxC,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAC9B,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,OAAO,QAAQ,CAAC,CAAC,SAAS,yBAAyB,CAAC,CAAC;QAC3E,CAAC;IACH,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CAAC,qFAAqF,CAAC,CAAC;QAClG,KAAK,CAAC,IAAI,CAAC,uCAAuC,CAAC,CAAC;QACpD,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAC9B,KAAK,CAAC,IAAI,CACR,OAAO,CAAC,CAAC,OAAO,QAAQ,CAAC,CAAC,SAAS,IAAI,CAAC,CAAC,SAAS,MAAM,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,YAAY,CAAC,MAAM,OAAO,CAAC,CAAC,CAAC,aAAa,CAAC,MAAM,OAAO,CAAC,CAAC,CAAC,iBAAiB,CAAC,IAAI,CACzK,CAAC;QACJ,CAAC;IACH,CAAC;IACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,gBAAgB;IAChB,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;IAClC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACf,MAAM,MAAM,GAAG,CAAC,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC;IAC5C,KAAK,CAAC,IAAI,CAAC,KAAK,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACxC,KAAK,CAAC,IAAI,CAAC,KAAK,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACzD,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;QAC7B,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QACzB,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YACtC,MAAM,KAAK,GAAG,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;YACpC,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,CAAC;QACpC,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,KAAK,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACzC,CAAC;IACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,oEAAoE;IACpE,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;QACnB,KAAK,CAAC,IAAI,CAAC,kBAAkB,MAAM,CAAC,QAAQ,IAAI,CAAC,CAAC;QAClD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,qDAAqD,CAAC,CAAC;QAClE,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;QAClC,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAC9B,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;YACnC,IAAI,CAAC,CAAC;gBAAE,SAAS;YACjB,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,OAAO,QAAQ,QAAQ,CAAC,CAAC,CAAC,oBAAoB,CAAC,MAAM,QAAQ,CAAC,CAAC,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC;QAC9G,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACjB,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,kEAAkE;AAClE,MAAM,UAAU,eAAe,CAAC,MAAoB;IAClD,MAAM,MAAM,GAAG;QACb,SAAS;QACT,SAAS;QACT,QAAQ;QACR,gBAAgB;QAChB,UAAU;QACV,aAAa;QACb,iBAAiB;QACjB,iBAAiB;QACjB,SAAS;KACV,CAAC;IACF,MAAM,IAAI,GAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAC1C,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;QAC7B,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YACtC,MAAM,KAAK,GAAG,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;YACpC,IAAI,CAAC,IAAI,CAAC;gBACR,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC;gBAClB,QAAQ,CAAC,OAAO,CAAC;gBACjB,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG;gBACxB,KAAK,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC9B,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC;gBACxB,KAAK,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC1B,KAAK,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC9B,KAAK,CAAC,cAAc,KAAK,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC;gBACpE,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG;aAC1B,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;QACf,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;AAChC,CAAC;AAED,0DAA0D;AAC1D,SAAS,cAAc,CAAC,KAA4F;IAClH,IAAI,CAAC,KAAK;QAAE,OAAO,GAAG,CAAC;IACvB,IAAI,KAAK,CAAC,OAAO;QAAE,OAAO,MAAM,CAAC;IACjC,IAAI,KAAK,CAAC,OAAO;QAAE,OAAO,KAAK,CAAC;IAChC,MAAM,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;IAC3C,OAAO,GAAG,GAAG,KAAK,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC;AAC1C,CAAC;AAED,SAAS,OAAO,CAAC,KAAa;IAC5B,OAAO,GAAG,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;AACxC,CAAC;AAED,SAAS,GAAG,CAAC,KAAa;IACxB,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC;AAChC,CAAC;AAED,SAAS,OAAO,CAAC,KAAa;IAC5B,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;AAChC,CAAC;AAED,SAAS,QAAQ,CAAC,KAAa;IAC7B,MAAM,IAAI,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;IAClC,OAAO,GAAG,IAAI,GAAG,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC;AACjD,CAAC;AAED,SAAS,QAAQ,CAAC,KAAa;IAC7B,IAAI,KAAK,IAAI,CAAC;QAAE,OAAO,KAAK,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC;IAC/C,OAAO,KAAK,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC;AAC3C,CAAC;AAED,qEAAqE;AACrE,SAAS,QAAQ,CAAC,KAAa;IAC7B,IAAI,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3B,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC;IAC1C,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC"}
@@ -0,0 +1,41 @@
1
+ export type SecretRule = {
2
+ /** Short id for the rule (used in messages and tests). */
3
+ id: string;
4
+ /** Human-readable description of what this detects. */
5
+ description: string;
6
+ /** Regex to match against file contents. */
7
+ pattern: RegExp;
8
+ };
9
+ /**
10
+ * Built-in secret patterns. Conservative — we'd rather false-positive on
11
+ * an obvious looking token than let a real one slip through.
12
+ */
13
+ export declare const SECRET_RULES: SecretRule[];
14
+ export type SecretFinding = {
15
+ ruleId: string;
16
+ description: string;
17
+ match: string;
18
+ line: number;
19
+ };
20
+ export type SecretScanResult = {
21
+ /** Path of the file that was scanned. */
22
+ path: string;
23
+ /** Non-empty when the file contains a secret. */
24
+ findings: SecretFinding[];
25
+ };
26
+ /**
27
+ * Scan a single string of content against the rule set.
28
+ * Returned matches are truncated to 40 characters so secrets don't leak
29
+ * into CI logs when the scan itself reports failures.
30
+ */
31
+ export declare function scanContent(content: string): SecretFinding[];
32
+ /**
33
+ * Walk a case directory and scan each text file. Returns per-file results.
34
+ * Directories that don't exist return an empty array.
35
+ */
36
+ export declare function scanCaseDir(dirPath: string): SecretScanResult[];
37
+ /**
38
+ * Format findings as a single human-readable block. Used by the build
39
+ * script and CLI to print a dirty-case report.
40
+ */
41
+ export declare function formatFindings(results: SecretScanResult[]): string;