@evalgate/sdk 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/CHANGELOG.md +638 -0
  2. package/README.md +398 -0
  3. package/dist/assertions.d.ts +189 -0
  4. package/dist/assertions.js +662 -0
  5. package/dist/batch.d.ts +68 -0
  6. package/dist/batch.js +179 -0
  7. package/dist/cache.d.ts +65 -0
  8. package/dist/cache.js +131 -0
  9. package/dist/cli/api.d.ts +108 -0
  10. package/dist/cli/api.js +132 -0
  11. package/dist/cli/baseline.d.ts +10 -0
  12. package/dist/cli/baseline.js +172 -0
  13. package/dist/cli/check.d.ts +73 -0
  14. package/dist/cli/check.js +355 -0
  15. package/dist/cli/ci-context.d.ts +6 -0
  16. package/dist/cli/ci-context.js +112 -0
  17. package/dist/cli/ci.d.ts +45 -0
  18. package/dist/cli/ci.js +192 -0
  19. package/dist/cli/config.d.ts +30 -0
  20. package/dist/cli/config.js +230 -0
  21. package/dist/cli/constants.d.ts +15 -0
  22. package/dist/cli/constants.js +18 -0
  23. package/dist/cli/diff.d.ts +173 -0
  24. package/dist/cli/diff.js +685 -0
  25. package/dist/cli/discover.d.ts +84 -0
  26. package/dist/cli/discover.js +419 -0
  27. package/dist/cli/doctor.d.ts +88 -0
  28. package/dist/cli/doctor.js +675 -0
  29. package/dist/cli/env.d.ts +21 -0
  30. package/dist/cli/env.js +42 -0
  31. package/dist/cli/explain.d.ts +58 -0
  32. package/dist/cli/explain.js +561 -0
  33. package/dist/cli/formatters/github.d.ts +8 -0
  34. package/dist/cli/formatters/github.js +135 -0
  35. package/dist/cli/formatters/human.d.ts +6 -0
  36. package/dist/cli/formatters/human.js +110 -0
  37. package/dist/cli/formatters/json.d.ts +6 -0
  38. package/dist/cli/formatters/json.js +10 -0
  39. package/dist/cli/formatters/pr-comment.d.ts +12 -0
  40. package/dist/cli/formatters/pr-comment.js +103 -0
  41. package/dist/cli/formatters/types.d.ts +103 -0
  42. package/dist/cli/formatters/types.js +8 -0
  43. package/dist/cli/gate.d.ts +21 -0
  44. package/dist/cli/gate.js +179 -0
  45. package/dist/cli/impact-analysis.d.ts +63 -0
  46. package/dist/cli/impact-analysis.js +252 -0
  47. package/dist/cli/index.d.ts +9 -0
  48. package/dist/cli/index.js +332 -0
  49. package/dist/cli/init.d.ts +16 -0
  50. package/dist/cli/init.js +292 -0
  51. package/dist/cli/manifest.d.ts +103 -0
  52. package/dist/cli/manifest.js +282 -0
  53. package/dist/cli/migrate.d.ts +41 -0
  54. package/dist/cli/migrate.js +349 -0
  55. package/dist/cli/policy-packs.d.ts +23 -0
  56. package/dist/cli/policy-packs.js +89 -0
  57. package/dist/cli/print-config.d.ts +29 -0
  58. package/dist/cli/print-config.js +270 -0
  59. package/dist/cli/profiles.d.ts +28 -0
  60. package/dist/cli/profiles.js +30 -0
  61. package/dist/cli/reason-codes.d.ts +17 -0
  62. package/dist/cli/reason-codes.js +19 -0
  63. package/dist/cli/regression-gate.d.ts +15 -0
  64. package/dist/cli/regression-gate.js +341 -0
  65. package/dist/cli/render/snippet.d.ts +5 -0
  66. package/dist/cli/render/snippet.js +15 -0
  67. package/dist/cli/render/sort.d.ts +10 -0
  68. package/dist/cli/render/sort.js +24 -0
  69. package/dist/cli/report/build-check-report.d.ts +19 -0
  70. package/dist/cli/report/build-check-report.js +132 -0
  71. package/dist/cli/run.d.ts +101 -0
  72. package/dist/cli/run.js +395 -0
  73. package/dist/cli/share.d.ts +17 -0
  74. package/dist/cli/share.js +91 -0
  75. package/dist/cli/upgrade.d.ts +15 -0
  76. package/dist/cli/upgrade.js +492 -0
  77. package/dist/cli/workspace.d.ts +31 -0
  78. package/dist/cli/workspace.js +68 -0
  79. package/dist/client.d.ts +368 -0
  80. package/dist/client.js +893 -0
  81. package/dist/client.request.test.d.ts +1 -0
  82. package/dist/client.request.test.js +232 -0
  83. package/dist/context.d.ts +134 -0
  84. package/dist/context.js +215 -0
  85. package/dist/errors.d.ts +82 -0
  86. package/dist/errors.js +298 -0
  87. package/dist/export.d.ts +195 -0
  88. package/dist/export.js +344 -0
  89. package/dist/index.d.ts +44 -0
  90. package/dist/index.js +153 -0
  91. package/dist/integrations/anthropic.d.ts +91 -0
  92. package/dist/integrations/anthropic.js +163 -0
  93. package/dist/integrations/openai-eval.d.ts +57 -0
  94. package/dist/integrations/openai-eval.js +232 -0
  95. package/dist/integrations/openai.d.ts +92 -0
  96. package/dist/integrations/openai.js +160 -0
  97. package/dist/local.d.ts +39 -0
  98. package/dist/local.js +148 -0
  99. package/dist/logger.d.ts +128 -0
  100. package/dist/logger.js +227 -0
  101. package/dist/matchers/index.d.ts +1 -0
  102. package/dist/matchers/index.js +6 -0
  103. package/dist/matchers/to-pass-gate.d.ts +29 -0
  104. package/dist/matchers/to-pass-gate.js +35 -0
  105. package/dist/pagination.d.ts +74 -0
  106. package/dist/pagination.js +139 -0
  107. package/dist/regression.d.ts +100 -0
  108. package/dist/regression.js +44 -0
  109. package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
  110. package/dist/runtime/adapters/config-to-dsl.js +400 -0
  111. package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
  112. package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
  113. package/dist/runtime/context.d.ts +26 -0
  114. package/dist/runtime/context.js +74 -0
  115. package/dist/runtime/eval.d.ts +46 -0
  116. package/dist/runtime/eval.js +244 -0
  117. package/dist/runtime/execution-mode.d.ts +80 -0
  118. package/dist/runtime/execution-mode.js +357 -0
  119. package/dist/runtime/executor.d.ts +16 -0
  120. package/dist/runtime/executor.js +152 -0
  121. package/dist/runtime/registry.d.ts +78 -0
  122. package/dist/runtime/registry.js +403 -0
  123. package/dist/runtime/run-report.d.ts +200 -0
  124. package/dist/runtime/run-report.js +222 -0
  125. package/dist/runtime/types.d.ts +356 -0
  126. package/dist/runtime/types.js +76 -0
  127. package/dist/snapshot.d.ts +176 -0
  128. package/dist/snapshot.js +322 -0
  129. package/dist/streaming.d.ts +173 -0
  130. package/dist/streaming.js +268 -0
  131. package/dist/testing.d.ts +273 -0
  132. package/dist/testing.js +317 -0
  133. package/dist/types.d.ts +754 -0
  134. package/dist/types.js +54 -0
  135. package/dist/utils/input-hash.d.ts +8 -0
  136. package/dist/utils/input-hash.js +41 -0
  137. package/dist/version.d.ts +7 -0
  138. package/dist/version.js +10 -0
  139. package/dist/workflows.d.ts +389 -0
  140. package/dist/workflows.js +671 -0
  141. package/package.json +117 -0
@@ -0,0 +1,341 @@
1
+ "use strict";
2
+ /**
3
+ * evalgate gate — Run the regression gate
4
+ *
5
+ * Two modes:
6
+ * 1. Project mode: delegates to eval:regression-gate npm script (full gate)
7
+ * 2. Built-in mode: runs `npm test`, compares against evals/baseline.json
8
+ *
9
+ * Built-in mode activates when no eval:regression-gate script is defined,
10
+ * making `npx evalgate gate` work for any project after `npx evalgate init`.
11
+ */
12
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
13
+ if (k2 === undefined) k2 = k;
14
+ var desc = Object.getOwnPropertyDescriptor(m, k);
15
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
16
+ desc = { enumerable: true, get: function() { return m[k]; } };
17
+ }
18
+ Object.defineProperty(o, k2, desc);
19
+ }) : (function(o, m, k, k2) {
20
+ if (k2 === undefined) k2 = k;
21
+ o[k2] = m[k];
22
+ }));
23
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
24
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
25
+ }) : function(o, v) {
26
+ o["default"] = v;
27
+ });
28
+ var __importStar = (this && this.__importStar) || (function () {
29
+ var ownKeys = function(o) {
30
+ ownKeys = Object.getOwnPropertyNames || function (o) {
31
+ var ar = [];
32
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
33
+ return ar;
34
+ };
35
+ return ownKeys(o);
36
+ };
37
+ return function (mod) {
38
+ if (mod && mod.__esModule) return mod;
39
+ var result = {};
40
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
41
+ __setModuleDefault(result, mod);
42
+ return result;
43
+ };
44
+ })();
45
+ Object.defineProperty(exports, "__esModule", { value: true });
46
+ exports.parseGateArgs = parseGateArgs;
47
+ exports.runGate = runGate;
48
+ const node_child_process_1 = require("node:child_process");
49
+ const fs = __importStar(require("node:fs"));
50
+ const path = __importStar(require("node:path"));
51
+ const REPORT_REL = "evals/regression-report.json";
52
+ const BASELINE_REL = "evals/baseline.json";
53
+ /** Detect the package manager used in the project */
54
+ function detectPackageManager(cwd) {
55
+ if (fs.existsSync(path.join(cwd, "pnpm-lock.yaml")))
56
+ return "pnpm";
57
+ if (fs.existsSync(path.join(cwd, "yarn.lock")))
58
+ return "yarn";
59
+ return "npm";
60
+ }
61
+ function parseGateArgs(argv) {
62
+ const args = { format: "human" };
63
+ for (let i = 0; i < argv.length; i++) {
64
+ if (argv[i] === "--format" && argv[i + 1]) {
65
+ const fmt = argv[i + 1];
66
+ if (fmt === "json" || fmt === "github" || fmt === "human") {
67
+ args.format = fmt;
68
+ }
69
+ i++;
70
+ }
71
+ }
72
+ return args;
73
+ }
74
+ function detectRunner(cwd) {
75
+ const pkgPath = path.join(cwd, "package.json");
76
+ try {
77
+ const pkg = JSON.parse(fs.readFileSync(pkgPath, "utf-8"));
78
+ const testCmd = pkg.scripts?.test ?? "";
79
+ if (testCmd.includes("vitest"))
80
+ return "vitest";
81
+ if (testCmd.includes("jest"))
82
+ return "jest";
83
+ if (testCmd.includes("mocha"))
84
+ return "mocha";
85
+ if (testCmd.includes("node --test"))
86
+ return "node:test";
87
+ if (testCmd.includes("ava"))
88
+ return "ava";
89
+ if (testCmd.includes("tap"))
90
+ return "tap";
91
+ }
92
+ catch {
93
+ // ignore
94
+ }
95
+ return "unknown";
96
+ }
97
+ function runBuiltinGate(cwd) {
98
+ const t0 = Date.now();
99
+ const baselinePath = path.join(cwd, BASELINE_REL);
100
+ const now = new Date().toISOString();
101
+ const pm = detectPackageManager(cwd);
102
+ const command = `${pm} test`;
103
+ const runner = detectRunner(cwd);
104
+ // Load baseline
105
+ if (!fs.existsSync(baselinePath)) {
106
+ return {
107
+ schemaVersion: 1,
108
+ timestamp: now,
109
+ exitCode: 2,
110
+ category: "infra_error",
111
+ passed: false,
112
+ failures: ["Baseline file not found. Run: npx evalgate init"],
113
+ deltas: [],
114
+ baseline: null,
115
+ durationMs: Date.now() - t0,
116
+ command,
117
+ runner,
118
+ };
119
+ }
120
+ let baselineData;
121
+ try {
122
+ baselineData = JSON.parse(fs.readFileSync(baselinePath, "utf-8"));
123
+ }
124
+ catch {
125
+ return {
126
+ schemaVersion: 1,
127
+ timestamp: now,
128
+ exitCode: 2,
129
+ category: "infra_error",
130
+ passed: false,
131
+ failures: ["Failed to parse evals/baseline.json"],
132
+ deltas: [],
133
+ baseline: null,
134
+ durationMs: Date.now() - t0,
135
+ command,
136
+ runner,
137
+ };
138
+ }
139
+ const baselineMeta = baselineData.updatedAt
140
+ ? {
141
+ updatedAt: baselineData.updatedAt,
142
+ updatedBy: baselineData.updatedBy ?? "unknown",
143
+ }
144
+ : null;
145
+ // Run tests
146
+ const isWin = process.platform === "win32";
147
+ const result = (0, node_child_process_1.spawnSync)(pm, ["test"], {
148
+ cwd,
149
+ stdio: "pipe",
150
+ shell: isWin,
151
+ timeout: 300000,
152
+ });
153
+ const testsPassed = result.status === 0;
154
+ const output = (result.stdout?.toString() ?? "") + (result.stderr?.toString() ?? "");
155
+ // Try to extract test count
156
+ let testCount = 0;
157
+ const countMatch = output.match(/(\d+)\s+(?:tests?|specs?)\s+(?:passed|completed)/i) ??
158
+ output.match(/Tests:\s+(\d+)\s+passed/i) ??
159
+ output.match(/(\d+)\s+passing/i) ??
160
+ output.match(/Test Files\s+\d+\s+passed.*\n\s+Tests\s+(\d+)\s+passed/i);
161
+ if (countMatch)
162
+ testCount = parseInt(countMatch[1], 10);
163
+ // Compare against baseline
164
+ const baselinePassed = baselineData.confidenceTests?.passed ?? true;
165
+ const baselineTotal = baselineData.confidenceTests?.total ?? 0;
166
+ const failures = [];
167
+ const deltas = [];
168
+ // Delta: tests passing
169
+ deltas.push({
170
+ metric: "tests_passing",
171
+ baseline: baselinePassed,
172
+ current: testsPassed,
173
+ delta: testsPassed === baselinePassed ? "0" : testsPassed ? "+1" : "-1",
174
+ status: testsPassed ? "pass" : "fail",
175
+ });
176
+ if (!testsPassed && baselinePassed) {
177
+ failures.push("Tests were passing in baseline but are now failing");
178
+ }
179
+ // Delta: test count (only if we captured counts)
180
+ if (testCount > 0 || baselineTotal > 0) {
181
+ const countDelta = testCount - baselineTotal;
182
+ deltas.push({
183
+ metric: "test_count",
184
+ baseline: baselineTotal,
185
+ current: testCount,
186
+ delta: countDelta >= 0 ? `+${countDelta}` : `${countDelta}`,
187
+ status: testCount >= baselineTotal ? "pass" : "fail",
188
+ });
189
+ if (testCount < baselineTotal) {
190
+ failures.push(`Test count dropped from ${baselineTotal} to ${testCount} (${countDelta})`);
191
+ }
192
+ }
193
+ const hasRegression = failures.length > 0;
194
+ return {
195
+ schemaVersion: 1,
196
+ timestamp: now,
197
+ exitCode: hasRegression ? 1 : 0,
198
+ category: hasRegression ? "regression" : "pass",
199
+ passed: !hasRegression,
200
+ failures,
201
+ deltas,
202
+ baseline: baselineMeta,
203
+ durationMs: Date.now() - t0,
204
+ command,
205
+ runner,
206
+ };
207
+ }
208
+ // ── Format helpers ──
209
+ function formatHuman(report) {
210
+ const icon = report.passed ? "✅" : "❌";
211
+ console.log(`\n${icon} EvalGate Gate: ${report.category.toUpperCase()}\n`);
212
+ if (report.deltas.length > 0) {
213
+ const pad = (s, n) => s.padEnd(n);
214
+ console.log(` ${pad("Metric", 16)} ${pad("Baseline", 10)} ${pad("Current", 10)} ${pad("Delta", 8)} Status`);
215
+ console.log(` ${"-".repeat(16)} ${"-".repeat(10)} ${"-".repeat(10)} ${"-".repeat(8)} ------`);
216
+ for (const d of report.deltas) {
217
+ const si = d.status === "pass" ? "✔" : "✖";
218
+ console.log(` ${pad(d.metric, 16)} ${pad(String(d.baseline), 10)} ${pad(String(d.current), 10)} ${pad(d.delta, 8)} ${si}`);
219
+ }
220
+ }
221
+ if (report.failures.length > 0) {
222
+ console.log("\n Failures:");
223
+ for (const f of report.failures) {
224
+ console.log(` • ${f}`);
225
+ }
226
+ }
227
+ console.log("");
228
+ }
229
+ function formatGithub(report) {
230
+ const icon = report.passed ? "✅" : "❌";
231
+ const lines = [
232
+ `## ${icon} EvalGate Gate: ${report.category}`,
233
+ "",
234
+ "| Metric | Baseline | Current | Delta | Status |",
235
+ "|--------|----------|---------|-------|--------|",
236
+ ];
237
+ for (const d of report.deltas) {
238
+ const si = d.status === "pass" ? "✅" : "❌";
239
+ lines.push(`| ${d.metric} | ${d.baseline} | ${d.current} | ${d.delta} | ${si} |`);
240
+ }
241
+ if (report.failures.length > 0) {
242
+ lines.push("", "### Failures", "");
243
+ for (const f of report.failures) {
244
+ lines.push(`- ${f}`);
245
+ }
246
+ }
247
+ lines.push("", `Schema version: ${report.schemaVersion}`);
248
+ const md = lines.join("\n");
249
+ // Write to $GITHUB_STEP_SUMMARY if available
250
+ const summaryPath = process.env.GITHUB_STEP_SUMMARY;
251
+ if (summaryPath) {
252
+ try {
253
+ fs.appendFileSync(summaryPath, `${md}\n`);
254
+ }
255
+ catch {
256
+ // ignore if not writable
257
+ }
258
+ }
259
+ console.log(md);
260
+ }
261
+ function formatReport(report, args) {
262
+ if (args.format === "json") {
263
+ process.stdout.write(JSON.stringify(report, null, 2));
264
+ }
265
+ else if (args.format === "github") {
266
+ formatGithub(report);
267
+ }
268
+ else {
269
+ formatHuman(report);
270
+ }
271
+ }
272
+ // ── Main ──
273
+ function runGate(argv) {
274
+ const cwd = process.cwd();
275
+ const args = parseGateArgs(argv);
276
+ // Check for package.json
277
+ const pkgPath = path.join(cwd, "package.json");
278
+ if (!fs.existsSync(pkgPath)) {
279
+ console.error("❌ No package.json found. Run this from your project root.");
280
+ return 1;
281
+ }
282
+ let pkg;
283
+ try {
284
+ pkg = JSON.parse(fs.readFileSync(pkgPath, "utf-8"));
285
+ }
286
+ catch {
287
+ console.error("❌ Failed to parse package.json");
288
+ return 1;
289
+ }
290
+ // ── Project mode: delegate to eval:regression-gate if it exists ──
291
+ if (pkg.scripts?.["eval:regression-gate"]) {
292
+ const pm = detectPackageManager(cwd);
293
+ const isWin = process.platform === "win32";
294
+ const stdio = args.format === "json" ? "pipe" : "inherit";
295
+ const result = (0, node_child_process_1.spawnSync)(pm, ["run", "eval:regression-gate"], {
296
+ cwd,
297
+ stdio: stdio,
298
+ shell: isWin,
299
+ });
300
+ const exitCode = result.status ?? 1;
301
+ // Post-process report for json/github formats
302
+ if (args.format === "json") {
303
+ const reportPath = path.join(cwd, REPORT_REL);
304
+ if (fs.existsSync(reportPath)) {
305
+ process.stdout.write(fs.readFileSync(reportPath, "utf-8"));
306
+ }
307
+ else {
308
+ console.error(JSON.stringify({
309
+ error: "regression-report.json not found",
310
+ exitCode,
311
+ }));
312
+ }
313
+ }
314
+ else if (args.format === "github") {
315
+ const reportPath = path.join(cwd, REPORT_REL);
316
+ if (fs.existsSync(reportPath)) {
317
+ try {
318
+ const report = JSON.parse(fs.readFileSync(reportPath, "utf-8"));
319
+ formatGithub(report);
320
+ }
321
+ catch {
322
+ // human output already printed
323
+ }
324
+ }
325
+ }
326
+ return exitCode;
327
+ }
328
+ // ── Built-in mode: run tests + compare against baseline ──
329
+ if (args.format === "human") {
330
+ console.log("\n Running EvalGate regression gate (built-in mode)...\n");
331
+ }
332
+ const report = runBuiltinGate(cwd);
333
+ // Write report artifact
334
+ const evalsDir = path.join(cwd, "evals");
335
+ if (!fs.existsSync(evalsDir)) {
336
+ fs.mkdirSync(evalsDir, { recursive: true });
337
+ }
338
+ fs.writeFileSync(path.join(cwd, REPORT_REL), `${JSON.stringify(report, null, 2)}\n`);
339
+ formatReport(report, args);
340
+ return report.exitCode;
341
+ }
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Truncate a string for deterministic output.
3
+ * Replaces newlines with space, caps length.
4
+ */
5
+ export declare function truncateSnippet(s: string | undefined | null, maxLen?: number): string;
@@ -0,0 +1,15 @@
1
+ "use strict";
2
+ /**
3
+ * Truncate a string for deterministic output.
4
+ * Replaces newlines with space, caps length.
5
+ */
6
+ Object.defineProperty(exports, "__esModule", { value: true });
7
+ exports.truncateSnippet = truncateSnippet;
8
+ function truncateSnippet(s, maxLen = 140) {
9
+ if (s == null)
10
+ return "";
11
+ const normalized = s.replace(/\s+/g, " ").trim();
12
+ if (normalized.length <= maxLen)
13
+ return normalized;
14
+ return `${normalized.slice(0, maxLen)}…`;
15
+ }
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Deterministic ordering for failed cases.
3
+ * Sort by status severity (failed > error > skipped > passed), then by testCaseId asc.
4
+ */
5
+ export interface SortableCase {
6
+ status?: string;
7
+ testCaseId?: number;
8
+ [key: string]: unknown;
9
+ }
10
+ export declare function sortFailedCases<T extends SortableCase>(cases: T[]): T[];
@@ -0,0 +1,24 @@
1
+ "use strict";
2
+ /**
3
+ * Deterministic ordering for failed cases.
4
+ * Sort by status severity (failed > error > skipped > passed), then by testCaseId asc.
5
+ */
6
+ Object.defineProperty(exports, "__esModule", { value: true });
7
+ exports.sortFailedCases = sortFailedCases;
8
+ const STATUS_SEVERITY = {
9
+ failed: 0,
10
+ error: 1,
11
+ skipped: 2,
12
+ passed: 3,
13
+ };
14
+ function sortFailedCases(cases) {
15
+ return [...cases].sort((a, b) => {
16
+ const sevA = STATUS_SEVERITY[a.status?.toLowerCase() ?? ""] ?? 4;
17
+ const sevB = STATUS_SEVERITY[b.status?.toLowerCase() ?? ""] ?? 4;
18
+ if (sevA !== sevB)
19
+ return sevA - sevB;
20
+ const idA = a.testCaseId ?? 0;
21
+ const idB = b.testCaseId ?? 0;
22
+ return idA - idB;
23
+ });
24
+ }
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Build CheckReport from API data and gate result.
3
+ * Normalizes failed cases (truncate, sort), dashboard URL, top N + more.
4
+ */
5
+ import type { QualityLatestData, RunDetailsData } from "../api";
6
+ import type { CheckArgs } from "../check";
7
+ import { type CheckReport } from "../formatters/types";
8
+ import type { GateResult } from "../gate";
9
+ export type BuildReportInput = {
10
+ args: CheckArgs;
11
+ quality: QualityLatestData;
12
+ runDetails?: RunDetailsData | null;
13
+ gateResult: GateResult;
14
+ requestId?: string;
15
+ shareUrl?: string;
16
+ baselineRunId?: number | null;
17
+ ciRunUrl?: string | null;
18
+ };
19
+ export declare function buildCheckReport(input: BuildReportInput): CheckReport;
@@ -0,0 +1,132 @@
1
+ "use strict";
2
+ /**
3
+ * Build CheckReport from API data and gate result.
4
+ * Normalizes failed cases (truncate, sort), dashboard URL, top N + more.
5
+ */
6
+ Object.defineProperty(exports, "__esModule", { value: true });
7
+ exports.buildCheckReport = buildCheckReport;
8
+ const types_1 = require("../formatters/types");
9
+ const snippet_1 = require("../render/snippet");
10
+ const sort_1 = require("../render/sort");
11
+ const TOP_N = 3;
12
+ /** ContribPts from weights: passRate*50, safety*25, (0.6*judge+0.4*schema)*15, (0.6*latency+0.4*cost)*10 */
13
+ function computeContribPts(b) {
14
+ const pr = b.passRate ?? 0;
15
+ const s = b.safety ?? 0;
16
+ const j = b.judge ?? 0;
17
+ const sc = b.schema ?? 0;
18
+ const l = b.latency ?? 0;
19
+ const c = b.cost ?? 0;
20
+ return {
21
+ passRatePts: Math.round(pr * 50 * 10) / 10,
22
+ safetyPts: Math.round(s * 25 * 10) / 10,
23
+ compliancePts: Math.round((0.6 * j + 0.4 * sc) * 15 * 10) / 10,
24
+ performancePts: Math.round((0.6 * l + 0.4 * c) * 10 * 10) / 10,
25
+ };
26
+ }
27
+ const SNIPPET_MAX = 50;
28
+ function buildCheckReport(input) {
29
+ const { args, quality, runDetails, gateResult, requestId } = input;
30
+ const score = quality?.score ?? 0;
31
+ const total = quality?.total ?? null;
32
+ const baselineScore = quality?.baselineScore ?? null;
33
+ const regressionDelta = quality?.regressionDelta ?? null;
34
+ const evaluationRunId = quality?.evaluationRunId;
35
+ const breakdown = quality?.breakdown ?? {};
36
+ const flags = (quality?.flags ?? []);
37
+ const baseUrl = args.baseUrl.replace(/\/$/, "");
38
+ const dashboardUrl = evaluationRunId != null
39
+ ? `${baseUrl}/evaluations/${args.evaluationId}/runs/${evaluationRunId}`
40
+ : undefined;
41
+ // Build failed cases from run details
42
+ let failedCases = [];
43
+ if (runDetails?.results && evaluationRunId != null) {
44
+ const raw = runDetails.results
45
+ .filter((r) => r.status === "failed")
46
+ .map((r) => ({
47
+ testCaseId: r.testCaseId,
48
+ status: "failed",
49
+ name: r.test_cases?.name,
50
+ input: r.test_cases?.input,
51
+ expectedOutput: r.test_cases?.expectedOutput,
52
+ output: r.output,
53
+ }));
54
+ failedCases = (0, sort_1.sortFailedCases)(raw).map((fc) => ({
55
+ ...fc,
56
+ inputSnippet: (0, snippet_1.truncateSnippet)(fc.input, SNIPPET_MAX),
57
+ expectedSnippet: (0, snippet_1.truncateSnippet)(fc.expectedOutput, SNIPPET_MAX),
58
+ outputSnippet: (0, snippet_1.truncateSnippet)(fc.output, SNIPPET_MAX),
59
+ }));
60
+ }
61
+ const failedCasesShown = Math.min(failedCases.length, TOP_N);
62
+ const failedCasesMore = failedCases.length - failedCasesShown;
63
+ const breakdown01 = Object.keys(breakdown).length > 0
64
+ ? breakdown
65
+ : undefined;
66
+ const contribPts = args.explain && breakdown01 ? computeContribPts(breakdown01) : undefined;
67
+ const gateSkipped = gateResult.gateSkipped === true;
68
+ const gateApplied = !gateSkipped;
69
+ const gateMode = gateSkipped ? "neutral" : "enforced";
70
+ const actionableMessage = gateSkipped
71
+ ? "Gate not applied: baseline missing. Publish a baseline from the dashboard, or run with --baseline previous once you have runs."
72
+ : (gateResult.reasonMessage ?? undefined);
73
+ const verdict = gateResult.reasonCode === "WARN_REGRESSION"
74
+ ? "warn"
75
+ : gateResult.passed
76
+ ? "pass"
77
+ : "fail";
78
+ const report = {
79
+ schemaVersion: types_1.CHECK_REPORT_SCHEMA_VERSION,
80
+ evaluationId: args.evaluationId,
81
+ runId: evaluationRunId,
82
+ verdict,
83
+ gateApplied,
84
+ gateMode,
85
+ actionableMessage,
86
+ shareUrl: input.shareUrl,
87
+ policy: args.policy,
88
+ baselineRunId: input.baselineRunId ?? quality?.baselineRunId ?? undefined,
89
+ ciRunUrl: input.ciRunUrl ?? undefined,
90
+ reasonCode: gateResult.reasonCode,
91
+ reasonMessage: gateResult.reasonMessage ?? undefined,
92
+ score,
93
+ baselineScore: baselineScore ?? undefined,
94
+ delta: regressionDelta ?? undefined,
95
+ n: total ?? undefined,
96
+ evidenceLevel: quality?.evidenceLevel ?? undefined,
97
+ baselineMissing: quality?.baselineMissing === true,
98
+ baselineStatus: quality?.baselineMissing === true
99
+ ? "missing"
100
+ : quality?.baselineScore != null
101
+ ? "found"
102
+ : undefined,
103
+ flags: flags.length > 0 ? [...flags].sort() : undefined,
104
+ breakdown01,
105
+ contribPts,
106
+ thresholds: {
107
+ minScore: args.minScore,
108
+ maxDrop: args.maxDrop,
109
+ warnDrop: args.warnDrop,
110
+ minN: args.minN,
111
+ allowWeakEvidence: args.allowWeakEvidence,
112
+ baseline: args.baseline,
113
+ maxCostUsd: args.maxCostUsd,
114
+ maxLatencyMs: args.maxLatencyMs,
115
+ maxCostDeltaUsd: args.maxCostDeltaUsd,
116
+ },
117
+ dashboardUrl,
118
+ failedCases,
119
+ failedCasesShown: failedCases.length > 0 ? failedCasesShown : undefined,
120
+ failedCasesMore: failedCasesMore > 0 ? failedCasesMore : undefined,
121
+ requestId,
122
+ explain: args.explain,
123
+ policyEvidence: args.explain && gateResult.policyEvidence
124
+ ? {
125
+ failedCheck: gateResult.policyEvidence.failedCheck,
126
+ remediation: gateResult.policyEvidence.remediation,
127
+ snapshot: gateResult.policyEvidence.snapshot,
128
+ }
129
+ : undefined,
130
+ };
131
+ return report;
132
+ }
@@ -0,0 +1,101 @@
1
+ /**
2
+ * TICKET 4 — Unified evalgate run CLI Command
3
+ *
4
+ * Goal: Consolidated execution interface that consumes manifest
5
+ *
6
+ * Features:
7
+ * - Manifest loading and spec filtering
8
+ * - --impacted-only integration with impact analysis
9
+ * - Local executor integration
10
+ * - .evalgate/last-run.json output
11
+ * - Legacy mode compatibility
12
+ */
13
+ /**
14
+ * Run execution options
15
+ */
16
+ export interface RunOptions {
17
+ /** Filter to specific spec IDs */
18
+ specIds?: string[];
19
+ /** Run only impacted specs (requires base branch) */
20
+ impactedOnly?: boolean;
21
+ /** Base branch for impact analysis */
22
+ baseBranch?: string;
23
+ /** Output format */
24
+ format?: "human" | "json";
25
+ /** Write run results to file */
26
+ writeResults?: boolean;
27
+ }
28
+ /**
29
+ * Run execution result
30
+ */
31
+ export interface RunResult {
32
+ /** Schema version for compatibility checking */
33
+ schemaVersion: number;
34
+ /** Unique run identifier */
35
+ runId: string;
36
+ /** Execution metadata */
37
+ metadata: {
38
+ startedAt: number;
39
+ completedAt: number;
40
+ duration: number;
41
+ totalSpecs: number;
42
+ executedSpecs: number;
43
+ mode: "spec" | "legacy";
44
+ };
45
+ /** Individual spec results */
46
+ results: SpecResult[];
47
+ /** Summary statistics */
48
+ summary: {
49
+ passed: number;
50
+ failed: number;
51
+ skipped: number;
52
+ passRate: number;
53
+ };
54
+ }
55
+ /**
56
+ * Individual spec result
57
+ */
58
+ export interface SpecResult {
59
+ /** Spec identifier */
60
+ specId: string;
61
+ /** Spec name */
62
+ name: string;
63
+ /** File path */
64
+ filePath: string;
65
+ /** Execution result */
66
+ result: {
67
+ status: "passed" | "failed" | "skipped";
68
+ score?: number;
69
+ error?: string;
70
+ duration: number;
71
+ };
72
+ }
73
+ /**
74
+ * Run evaluation specifications
75
+ */
76
+ export declare function runEvaluations(options: RunOptions, projectRoot?: string): Promise<RunResult>;
77
+ /**
78
+ * Run index entry
79
+ */
80
+ export interface RunIndexEntry {
81
+ runId: string;
82
+ createdAt: number;
83
+ gitSha?: string;
84
+ branch?: string;
85
+ mode: "spec" | "legacy";
86
+ specCount: number;
87
+ passRate: number;
88
+ avgScore: number;
89
+ }
90
+ /**
91
+ * Print human-readable results
92
+ */
93
+ export declare function printHumanResults(result: RunResult): void;
94
+ /**
95
+ * Print JSON results
96
+ */
97
+ export declare function printJsonResults(result: RunResult): void;
98
+ /**
99
+ * CLI entry point
100
+ */
101
+ export declare function runEvaluationsCLI(options: RunOptions): Promise<void>;