@evalgate/sdk 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/CHANGELOG.md +638 -0
  2. package/README.md +398 -0
  3. package/dist/assertions.d.ts +189 -0
  4. package/dist/assertions.js +662 -0
  5. package/dist/batch.d.ts +68 -0
  6. package/dist/batch.js +179 -0
  7. package/dist/cache.d.ts +65 -0
  8. package/dist/cache.js +131 -0
  9. package/dist/cli/api.d.ts +108 -0
  10. package/dist/cli/api.js +132 -0
  11. package/dist/cli/baseline.d.ts +10 -0
  12. package/dist/cli/baseline.js +172 -0
  13. package/dist/cli/check.d.ts +73 -0
  14. package/dist/cli/check.js +355 -0
  15. package/dist/cli/ci-context.d.ts +6 -0
  16. package/dist/cli/ci-context.js +112 -0
  17. package/dist/cli/ci.d.ts +45 -0
  18. package/dist/cli/ci.js +192 -0
  19. package/dist/cli/config.d.ts +30 -0
  20. package/dist/cli/config.js +230 -0
  21. package/dist/cli/constants.d.ts +15 -0
  22. package/dist/cli/constants.js +18 -0
  23. package/dist/cli/diff.d.ts +173 -0
  24. package/dist/cli/diff.js +685 -0
  25. package/dist/cli/discover.d.ts +84 -0
  26. package/dist/cli/discover.js +419 -0
  27. package/dist/cli/doctor.d.ts +88 -0
  28. package/dist/cli/doctor.js +675 -0
  29. package/dist/cli/env.d.ts +21 -0
  30. package/dist/cli/env.js +42 -0
  31. package/dist/cli/explain.d.ts +58 -0
  32. package/dist/cli/explain.js +561 -0
  33. package/dist/cli/formatters/github.d.ts +8 -0
  34. package/dist/cli/formatters/github.js +135 -0
  35. package/dist/cli/formatters/human.d.ts +6 -0
  36. package/dist/cli/formatters/human.js +110 -0
  37. package/dist/cli/formatters/json.d.ts +6 -0
  38. package/dist/cli/formatters/json.js +10 -0
  39. package/dist/cli/formatters/pr-comment.d.ts +12 -0
  40. package/dist/cli/formatters/pr-comment.js +103 -0
  41. package/dist/cli/formatters/types.d.ts +103 -0
  42. package/dist/cli/formatters/types.js +8 -0
  43. package/dist/cli/gate.d.ts +21 -0
  44. package/dist/cli/gate.js +179 -0
  45. package/dist/cli/impact-analysis.d.ts +63 -0
  46. package/dist/cli/impact-analysis.js +252 -0
  47. package/dist/cli/index.d.ts +9 -0
  48. package/dist/cli/index.js +332 -0
  49. package/dist/cli/init.d.ts +16 -0
  50. package/dist/cli/init.js +292 -0
  51. package/dist/cli/manifest.d.ts +103 -0
  52. package/dist/cli/manifest.js +282 -0
  53. package/dist/cli/migrate.d.ts +41 -0
  54. package/dist/cli/migrate.js +349 -0
  55. package/dist/cli/policy-packs.d.ts +23 -0
  56. package/dist/cli/policy-packs.js +89 -0
  57. package/dist/cli/print-config.d.ts +29 -0
  58. package/dist/cli/print-config.js +270 -0
  59. package/dist/cli/profiles.d.ts +28 -0
  60. package/dist/cli/profiles.js +30 -0
  61. package/dist/cli/reason-codes.d.ts +17 -0
  62. package/dist/cli/reason-codes.js +19 -0
  63. package/dist/cli/regression-gate.d.ts +15 -0
  64. package/dist/cli/regression-gate.js +341 -0
  65. package/dist/cli/render/snippet.d.ts +5 -0
  66. package/dist/cli/render/snippet.js +15 -0
  67. package/dist/cli/render/sort.d.ts +10 -0
  68. package/dist/cli/render/sort.js +24 -0
  69. package/dist/cli/report/build-check-report.d.ts +19 -0
  70. package/dist/cli/report/build-check-report.js +132 -0
  71. package/dist/cli/run.d.ts +101 -0
  72. package/dist/cli/run.js +395 -0
  73. package/dist/cli/share.d.ts +17 -0
  74. package/dist/cli/share.js +91 -0
  75. package/dist/cli/upgrade.d.ts +15 -0
  76. package/dist/cli/upgrade.js +492 -0
  77. package/dist/cli/workspace.d.ts +31 -0
  78. package/dist/cli/workspace.js +68 -0
  79. package/dist/client.d.ts +368 -0
  80. package/dist/client.js +893 -0
  81. package/dist/client.request.test.d.ts +1 -0
  82. package/dist/client.request.test.js +232 -0
  83. package/dist/context.d.ts +134 -0
  84. package/dist/context.js +215 -0
  85. package/dist/errors.d.ts +82 -0
  86. package/dist/errors.js +298 -0
  87. package/dist/export.d.ts +195 -0
  88. package/dist/export.js +344 -0
  89. package/dist/index.d.ts +44 -0
  90. package/dist/index.js +153 -0
  91. package/dist/integrations/anthropic.d.ts +91 -0
  92. package/dist/integrations/anthropic.js +163 -0
  93. package/dist/integrations/openai-eval.d.ts +57 -0
  94. package/dist/integrations/openai-eval.js +232 -0
  95. package/dist/integrations/openai.d.ts +92 -0
  96. package/dist/integrations/openai.js +160 -0
  97. package/dist/local.d.ts +39 -0
  98. package/dist/local.js +148 -0
  99. package/dist/logger.d.ts +128 -0
  100. package/dist/logger.js +227 -0
  101. package/dist/matchers/index.d.ts +1 -0
  102. package/dist/matchers/index.js +6 -0
  103. package/dist/matchers/to-pass-gate.d.ts +29 -0
  104. package/dist/matchers/to-pass-gate.js +35 -0
  105. package/dist/pagination.d.ts +74 -0
  106. package/dist/pagination.js +139 -0
  107. package/dist/regression.d.ts +100 -0
  108. package/dist/regression.js +44 -0
  109. package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
  110. package/dist/runtime/adapters/config-to-dsl.js +400 -0
  111. package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
  112. package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
  113. package/dist/runtime/context.d.ts +26 -0
  114. package/dist/runtime/context.js +74 -0
  115. package/dist/runtime/eval.d.ts +46 -0
  116. package/dist/runtime/eval.js +244 -0
  117. package/dist/runtime/execution-mode.d.ts +80 -0
  118. package/dist/runtime/execution-mode.js +357 -0
  119. package/dist/runtime/executor.d.ts +16 -0
  120. package/dist/runtime/executor.js +152 -0
  121. package/dist/runtime/registry.d.ts +78 -0
  122. package/dist/runtime/registry.js +403 -0
  123. package/dist/runtime/run-report.d.ts +200 -0
  124. package/dist/runtime/run-report.js +222 -0
  125. package/dist/runtime/types.d.ts +356 -0
  126. package/dist/runtime/types.js +76 -0
  127. package/dist/snapshot.d.ts +176 -0
  128. package/dist/snapshot.js +322 -0
  129. package/dist/streaming.d.ts +173 -0
  130. package/dist/streaming.js +268 -0
  131. package/dist/testing.d.ts +273 -0
  132. package/dist/testing.js +317 -0
  133. package/dist/types.d.ts +754 -0
  134. package/dist/types.js +54 -0
  135. package/dist/utils/input-hash.d.ts +8 -0
  136. package/dist/utils/input-hash.js +41 -0
  137. package/dist/version.d.ts +7 -0
  138. package/dist/version.js +10 -0
  139. package/dist/workflows.d.ts +389 -0
  140. package/dist/workflows.js +671 -0
  141. package/package.json +117 -0
@@ -0,0 +1,492 @@
1
+ "use strict";
2
+ /**
3
+ * evalgate upgrade --full — Upgrade from Tier 1 (built-in gate) to Tier 2 (full gate)
4
+ *
5
+ * What it does:
6
+ * 1. Adds full regression gate script (scripts/regression-gate.ts)
7
+ * 2. Adds baseline governance workflow (.github/workflows/baseline-governance.yml)
8
+ * 3. Updates package.json with eval:regression-gate + eval:baseline-update scripts
9
+ * 4. Updates .github/workflows/evalgate-gate.yml to use project mode
10
+ * 5. Prints next steps
11
+ */
12
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
13
+ if (k2 === undefined) k2 = k;
14
+ var desc = Object.getOwnPropertyDescriptor(m, k);
15
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
16
+ desc = { enumerable: true, get: function() { return m[k]; } };
17
+ }
18
+ Object.defineProperty(o, k2, desc);
19
+ }) : (function(o, m, k, k2) {
20
+ if (k2 === undefined) k2 = k;
21
+ o[k2] = m[k];
22
+ }));
23
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
24
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
25
+ }) : function(o, v) {
26
+ o["default"] = v;
27
+ });
28
+ var __importStar = (this && this.__importStar) || (function () {
29
+ var ownKeys = function(o) {
30
+ ownKeys = Object.getOwnPropertyNames || function (o) {
31
+ var ar = [];
32
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
33
+ return ar;
34
+ };
35
+ return ownKeys(o);
36
+ };
37
+ return function (mod) {
38
+ if (mod && mod.__esModule) return mod;
39
+ var result = {};
40
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
41
+ __setModuleDefault(result, mod);
42
+ return result;
43
+ };
44
+ })();
45
+ Object.defineProperty(exports, "__esModule", { value: true });
46
+ exports.parseUpgradeArgs = parseUpgradeArgs;
47
+ exports.runUpgrade = runUpgrade;
48
+ const fs = __importStar(require("node:fs"));
49
+ const path = __importStar(require("node:path"));
50
+ // ── Detect environment ──
51
+ function detectPackageManager(cwd) {
52
+ if (fs.existsSync(path.join(cwd, "pnpm-lock.yaml")))
53
+ return "pnpm";
54
+ if (fs.existsSync(path.join(cwd, "yarn.lock")))
55
+ return "yarn";
56
+ return "npm";
57
+ }
58
+ function ok(msg) {
59
+ console.log(` ✔ ${msg}`);
60
+ }
61
+ function skip(msg) {
62
+ console.log(` – ${msg}`);
63
+ }
64
+ // ── 1. Create scripts/regression-gate.ts ──
65
+ function createGateScript(cwd) {
66
+ const scriptPath = path.join(cwd, "scripts", "regression-gate.ts");
67
+ if (fs.existsSync(scriptPath)) {
68
+ skip("scripts/regression-gate.ts already exists");
69
+ return true;
70
+ }
71
+ const scriptsDir = path.join(cwd, "scripts");
72
+ if (!fs.existsSync(scriptsDir)) {
73
+ fs.mkdirSync(scriptsDir, { recursive: true });
74
+ }
75
+ const content = `#!/usr/bin/env npx tsx
76
+ /**
77
+ * Full regression gate — compares current test results against baseline.
78
+ *
79
+ * Usage:
80
+ * npx tsx scripts/regression-gate.ts # run gate
81
+ * npx tsx scripts/regression-gate.ts --update-baseline # update baseline with current values
82
+ *
83
+ * Generated by: npx evalgate upgrade --full
84
+ */
85
+ import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs";
86
+ import { execSync, spawnSync } from "node:child_process";
87
+ import { resolve } from "node:path";
88
+
89
+ const BASELINE_PATH = resolve("evals/baseline.json");
90
+ const REPORT_PATH = resolve("evals/regression-report.json");
91
+ const CONFIDENCE_PATH = resolve("evals/confidence-summary.json");
92
+
93
+ const isUpdateBaseline = process.argv.includes("--update-baseline");
94
+
95
+ // ── Helpers ──
96
+
97
+ function loadJSON(p: string): Record<string, unknown> | null {
98
+ try {
99
+ return JSON.parse(readFileSync(p, "utf-8"));
100
+ } catch {
101
+ return null;
102
+ }
103
+ }
104
+
105
+ function getHeadSha(): string {
106
+ try {
107
+ return execSync("git rev-parse --short HEAD").toString().trim();
108
+ } catch {
109
+ return "0000000";
110
+ }
111
+ }
112
+
113
+ function writeReport(report: Record<string, unknown>): void {
114
+ const dir = resolve("evals");
115
+ if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
116
+ writeFileSync(REPORT_PATH, JSON.stringify(report, null, 2) + "\\n");
117
+ }
118
+
119
+ // ── Run tests ──
120
+
121
+ function runTests(): { passed: boolean; total: number; durationMs: number } {
122
+ const t0 = Date.now();
123
+ const result = spawnSync("npm", ["test"], {
124
+ stdio: "pipe",
125
+ shell: process.platform === "win32",
126
+ timeout: 300_000,
127
+ });
128
+ const durationMs = Date.now() - t0;
129
+ const passed = result.status === 0;
130
+ const output = (result.stdout?.toString() ?? "") + (result.stderr?.toString() ?? "");
131
+
132
+ let total = 0;
133
+ const m =
134
+ output.match(/(\\d+)\\s+(?:tests?|specs?)\\s+(?:passed|completed)/i) ??
135
+ output.match(/Tests:\\s+(\\d+)\\s+passed/i) ??
136
+ output.match(/(\\d+)\\s+passing/i);
137
+ if (m) total = parseInt(m[1], 10);
138
+
139
+ return { passed, total, durationMs };
140
+ }
141
+
142
+ // ── Main ──
143
+
144
+ const baseline = loadJSON(BASELINE_PATH);
145
+ if (!baseline) {
146
+ console.error("❌ Baseline not found. Run: npx evalgate init");
147
+ const report = {
148
+ schemaVersion: 1,
149
+ timestamp: new Date().toISOString(),
150
+ exitCode: 2,
151
+ category: "infra_error",
152
+ passed: false,
153
+ failures: ["Baseline file not found"],
154
+ deltas: [],
155
+ baseline: null,
156
+ durationMs: 0,
157
+ command: "npm test",
158
+ runner: "unknown",
159
+ };
160
+ writeReport(report);
161
+ process.exit(2);
162
+ }
163
+
164
+ const tests = runTests();
165
+
166
+ if (isUpdateBaseline) {
167
+ const user = process.env.USER || process.env.USERNAME || "unknown";
168
+ const now = new Date().toISOString();
169
+ const updated = {
170
+ ...baseline,
171
+ updatedAt: now,
172
+ updatedBy: user,
173
+ commitSha: getHeadSha(),
174
+ confidenceTests: {
175
+ ...(baseline.confidenceTests as Record<string, unknown> ?? {}),
176
+ passed: tests.passed,
177
+ total: tests.total,
178
+ },
179
+ };
180
+ writeFileSync(BASELINE_PATH, JSON.stringify(updated, null, 2) + "\\n");
181
+ console.log("✅ Baseline updated with current test results");
182
+ console.log(\` Tests: \${tests.total} (\${tests.passed ? "passing" : "FAILING"})\`);
183
+ process.exit(0);
184
+ }
185
+
186
+ // ── Compare ──
187
+
188
+ const bConf = baseline.confidenceTests as { passed?: boolean; total?: number } | undefined;
189
+ const baselinePassed = bConf?.passed ?? true;
190
+ const baselineTotal = bConf?.total ?? 0;
191
+
192
+ const failures: string[] = [];
193
+ const deltas: Array<Record<string, unknown>> = [];
194
+
195
+ deltas.push({
196
+ metric: "tests_passing",
197
+ baseline: baselinePassed,
198
+ current: tests.passed,
199
+ delta: tests.passed === baselinePassed ? "0" : tests.passed ? "+1" : "-1",
200
+ status: tests.passed ? "pass" : "fail",
201
+ });
202
+
203
+ if (!tests.passed && baselinePassed) {
204
+ failures.push("Tests were passing in baseline but are now failing");
205
+ }
206
+
207
+ if (tests.total > 0 || baselineTotal > 0) {
208
+ const d = tests.total - baselineTotal;
209
+ deltas.push({
210
+ metric: "test_count",
211
+ baseline: baselineTotal,
212
+ current: tests.total,
213
+ delta: d >= 0 ? \`+\${d}\` : \`\${d}\`,
214
+ status: tests.total >= baselineTotal ? "pass" : "fail",
215
+ });
216
+ if (tests.total < baselineTotal) {
217
+ failures.push(\`Test count dropped from \${baselineTotal} to \${tests.total} (\${d})\`);
218
+ }
219
+ }
220
+
221
+ const hasRegression = failures.length > 0;
222
+ const report = {
223
+ schemaVersion: 1,
224
+ timestamp: new Date().toISOString(),
225
+ exitCode: hasRegression ? 1 : 0,
226
+ category: hasRegression ? "regression" : "pass",
227
+ passed: !hasRegression,
228
+ failures,
229
+ deltas,
230
+ baseline: {
231
+ updatedAt: (baseline.updatedAt as string) ?? "unknown",
232
+ updatedBy: (baseline.updatedBy as string) ?? "unknown",
233
+ },
234
+ durationMs: tests.durationMs,
235
+ command: "npm test",
236
+ runner: "unknown",
237
+ };
238
+
239
+ writeReport(report);
240
+
241
+ if (hasRegression) {
242
+ console.error("❌ REGRESSION DETECTED");
243
+ for (const f of failures) console.error(\` \${f}\`);
244
+ } else {
245
+ console.log("✅ NO REGRESSION — gate passed");
246
+ }
247
+
248
+ for (const d of deltas) {
249
+ const icon = d.status === "pass" ? "✔" : "✖";
250
+ console.log(\` \${icon} \${d.metric}: \${d.baseline} → \${d.current} (\${d.delta})\`);
251
+ }
252
+
253
+ process.exit(report.exitCode);
254
+ `;
255
+ fs.writeFileSync(scriptPath, content);
256
+ ok("Created scripts/regression-gate.ts");
257
+ return true;
258
+ }
259
+ // ── 2. Add npm scripts to package.json ──
260
+ function addNpmScripts(cwd) {
261
+ const pkgPath = path.join(cwd, "package.json");
262
+ if (!fs.existsSync(pkgPath))
263
+ return false;
264
+ let pkg;
265
+ try {
266
+ pkg = JSON.parse(fs.readFileSync(pkgPath, "utf-8"));
267
+ }
268
+ catch {
269
+ return false;
270
+ }
271
+ const scripts = (pkg.scripts ?? {});
272
+ let changed = false;
273
+ if (!scripts["eval:regression-gate"]) {
274
+ scripts["eval:regression-gate"] = "npx tsx scripts/regression-gate.ts";
275
+ changed = true;
276
+ }
277
+ if (!scripts["eval:baseline-update"]) {
278
+ scripts["eval:baseline-update"] =
279
+ "npx tsx scripts/regression-gate.ts --update-baseline";
280
+ changed = true;
281
+ }
282
+ if (changed) {
283
+ pkg.scripts = scripts;
284
+ fs.writeFileSync(pkgPath, `${JSON.stringify(pkg, null, 2)}\n`);
285
+ ok("Added eval:regression-gate and eval:baseline-update scripts to package.json");
286
+ }
287
+ else {
288
+ skip("eval:regression-gate and eval:baseline-update scripts already exist");
289
+ }
290
+ return true;
291
+ }
292
+ // ── 3. Create baseline governance workflow ──
293
+ function createGovernanceWorkflow(cwd) {
294
+ const workflowDir = path.join(cwd, ".github", "workflows");
295
+ const workflowPath = path.join(workflowDir, "baseline-governance.yml");
296
+ if (fs.existsSync(workflowPath)) {
297
+ skip(".github/workflows/baseline-governance.yml already exists");
298
+ return true;
299
+ }
300
+ if (!fs.existsSync(workflowDir)) {
301
+ fs.mkdirSync(workflowDir, { recursive: true });
302
+ }
303
+ const workflow = `# Baseline Governance — requires label + approval for baseline changes
304
+ # Auto-generated by: npx evalgate upgrade --full
305
+ name: Baseline Governance
306
+
307
+ on:
308
+ pull_request:
309
+ paths:
310
+ - 'evals/baseline.json'
311
+
312
+ jobs:
313
+ governance:
314
+ runs-on: ubuntu-latest
315
+ steps:
316
+ - uses: actions/checkout@v4
317
+
318
+ - name: Check label
319
+ run: |
320
+ LABELS=\${{ toJSON(github.event.pull_request.labels.*.name) }}
321
+ if echo "$LABELS" | grep -q "baseline-update"; then
322
+ echo "✅ baseline-update label found"
323
+ elif echo "$LABELS" | grep -q "baseline-exception"; then
324
+ echo "⚠️ baseline-exception label found — bypassing delta checks"
325
+ else
326
+ echo "❌ Missing 'baseline-update' label"
327
+ echo "Add the 'baseline-update' label to this PR to update the baseline."
328
+ exit 1
329
+ fi
330
+
331
+ - name: Show baseline diff
332
+ run: |
333
+ echo "## Baseline Changes" >> "$GITHUB_STEP_SUMMARY"
334
+ echo "" >> "$GITHUB_STEP_SUMMARY"
335
+ echo "\\\`\\\`\\\`diff" >> "$GITHUB_STEP_SUMMARY"
336
+ git diff HEAD~1 -- evals/baseline.json >> "$GITHUB_STEP_SUMMARY" || echo "No previous baseline" >> "$GITHUB_STEP_SUMMARY"
337
+ echo "\\\`\\\`\\\`" >> "$GITHUB_STEP_SUMMARY"
338
+ `;
339
+ fs.writeFileSync(workflowPath, workflow);
340
+ ok("Created .github/workflows/baseline-governance.yml");
341
+ return true;
342
+ }
343
+ // ── 4. Upgrade evalgate-gate.yml to project mode ──
344
+ function upgradeGateWorkflow(cwd) {
345
+ const pm = detectPackageManager(cwd);
346
+ const workflowPath = path.join(cwd, ".github", "workflows", "evalgate-gate.yml");
347
+ if (!fs.existsSync(workflowPath)) {
348
+ skip("No .github/workflows/evalgate-gate.yml found — run evalgate init first");
349
+ return false;
350
+ }
351
+ const content = fs.readFileSync(workflowPath, "utf-8");
352
+ // Already upgraded?
353
+ if (content.includes("eval:regression-gate")) {
354
+ skip("evalgate-gate.yml already uses project mode");
355
+ return true;
356
+ }
357
+ const installCmd = pm === "pnpm"
358
+ ? "pnpm install --frozen-lockfile"
359
+ : pm === "yarn"
360
+ ? "yarn install --frozen-lockfile"
361
+ : "npm ci";
362
+ const setupSteps = pm === "pnpm"
363
+ ? ` - uses: pnpm/action-setup@v4
364
+ - uses: actions/setup-node@v4
365
+ with:
366
+ node-version: '20'
367
+ cache: pnpm
368
+ - run: ${installCmd}`
369
+ : ` - uses: actions/setup-node@v4
370
+ with:
371
+ node-version: '20'
372
+ cache: ${pm}
373
+ - run: ${installCmd}`;
374
+ const workflow = `# EvalGate Regression Gate (Full / Tier 2)
375
+ # Upgraded by: npx evalgate upgrade --full
376
+ name: EvalGate Gate
377
+
378
+ on:
379
+ pull_request:
380
+ branches: [main]
381
+
382
+ concurrency:
383
+ group: evalgate-\${{ github.ref }}
384
+ cancel-in-progress: true
385
+
386
+ jobs:
387
+ regression-gate:
388
+ runs-on: ubuntu-latest
389
+ steps:
390
+ - uses: actions/checkout@v4
391
+ ${setupSteps}
392
+ - name: Run regression gate
393
+ run: ${pm} run eval:regression-gate
394
+
395
+ - name: Gate summary
396
+ if: always()
397
+ run: npx -y @evalgate/sdk@^2 gate --format github
398
+
399
+ - name: Upload report
400
+ if: always()
401
+ uses: actions/upload-artifact@v4
402
+ with:
403
+ name: regression-report
404
+ path: evals/regression-report.json
405
+ if-no-files-found: ignore
406
+ `;
407
+ fs.writeFileSync(workflowPath, workflow);
408
+ ok("Upgraded .github/workflows/evalgate-gate.yml to project mode (Tier 2)");
409
+ return true;
410
+ }
411
+ // ── 5. Add CODEOWNERS entry ──
412
+ function addCodeowners(cwd) {
413
+ const codeownersPath = path.join(cwd, ".github", "CODEOWNERS");
414
+ const entry = "evals/baseline.json";
415
+ if (fs.existsSync(codeownersPath)) {
416
+ const content = fs.readFileSync(codeownersPath, "utf-8");
417
+ if (content.includes(entry)) {
418
+ skip("CODEOWNERS already has evals/baseline.json entry");
419
+ return true;
420
+ }
421
+ fs.appendFileSync(codeownersPath, `\n# EvalGate baseline — requires approval\n${entry} @YOUR_TEAM\n`);
422
+ }
423
+ else {
424
+ const dir = path.join(cwd, ".github");
425
+ if (!fs.existsSync(dir))
426
+ fs.mkdirSync(dir, { recursive: true });
427
+ fs.writeFileSync(codeownersPath, `# EvalGate baseline — requires approval\n${entry} @YOUR_TEAM\n`);
428
+ }
429
+ ok("Added evals/baseline.json to .github/CODEOWNERS (edit @YOUR_TEAM)");
430
+ return true;
431
+ }
432
+ function parseUpgradeArgs(argv) {
433
+ return { full: argv.includes("--full") };
434
+ }
435
+ // ── Main ──
436
+ function runUpgrade(argv) {
437
+ const args = parseUpgradeArgs(argv);
438
+ const cwd = process.cwd();
439
+ if (!args.full) {
440
+ console.log(`evalgate upgrade — Upgrade regression gate
441
+
442
+ Usage:
443
+ evalgate upgrade --full Upgrade from Tier 1 (built-in) to Tier 2 (full gate)
444
+
445
+ What --full does:
446
+ 1. Creates scripts/regression-gate.ts (full gate script)
447
+ 2. Adds eval:regression-gate + eval:baseline-update npm scripts
448
+ 3. Creates baseline governance workflow
449
+ 4. Upgrades CI workflow to project mode
450
+ 5. Adds CODEOWNERS entry for baseline
451
+
452
+ After upgrading:
453
+ - evalgate gate delegates to your eval:regression-gate script
454
+ - Baseline changes require PR label + approval
455
+ - Full metric comparison: golden eval, confidence, latency, cost
456
+ `);
457
+ return argv.includes("--help") || argv.includes("-h") ? 0 : 1;
458
+ }
459
+ console.log("");
460
+ console.log(" evalgate upgrade --full — upgrading to Tier 2\n");
461
+ // Check preconditions
462
+ const pkgPath = path.join(cwd, "package.json");
463
+ if (!fs.existsSync(pkgPath)) {
464
+ console.error(" ✖ No package.json found. Run this from a Node.js project root.");
465
+ return 1;
466
+ }
467
+ if (!fs.existsSync(path.join(cwd, "evals", "baseline.json"))) {
468
+ console.error(" ✖ No evals/baseline.json found. Run 'npx evalgate init' first.");
469
+ return 1;
470
+ }
471
+ createGateScript(cwd);
472
+ addNpmScripts(cwd);
473
+ createGovernanceWorkflow(cwd);
474
+ upgradeGateWorkflow(cwd);
475
+ addCodeowners(cwd);
476
+ console.log("");
477
+ console.log(" Done! Your repo is now Tier 2.\n");
478
+ console.log(" What changed:");
479
+ console.log(" - scripts/regression-gate.ts Full gate script");
480
+ console.log(" - package.json eval:regression-gate + eval:baseline-update");
481
+ console.log(" - .github/workflows/ Gate + governance workflows");
482
+ console.log(" - .github/CODEOWNERS Baseline requires approval\n");
483
+ console.log(" Next:");
484
+ console.log(" git add -A");
485
+ console.log(" git commit -m 'chore: upgrade EvalGate gate to Tier 2'");
486
+ console.log(" git push\n");
487
+ console.log(" Commands:");
488
+ console.log(" npx evalgate gate Run full gate locally");
489
+ console.log(" npx evalgate baseline update Update baseline with real scores");
490
+ console.log("");
491
+ return 0;
492
+ }
@@ -0,0 +1,31 @@
1
+ /**
2
+ * CORE-402: Centralized .evalgate workspace resolution
3
+ *
4
+ * Provides unified workspace path resolution for all EvalGate CLI commands.
5
+ * Prefers .evalgate/; falls back to .evalai/ for backward compatibility.
6
+ */
7
+ /**
8
+ * EvalGate workspace paths
9
+ */
10
+ export interface EvalWorkspace {
11
+ /** Project root directory */
12
+ root: string;
13
+ /** .evalgate directory (or .evalai for legacy projects) */
14
+ evalDir: string;
15
+ /** @deprecated Use evalDir */
16
+ evalgateDir: string;
17
+ /** runs directory */
18
+ runsDir: string;
19
+ /** manifest.json path */
20
+ manifestPath: string;
21
+ /** last-run.json path */
22
+ lastRunPath: string;
23
+ /** runs/index.json path */
24
+ indexPath: string;
25
+ /** baseline-run.json path */
26
+ baselinePath: string;
27
+ }
28
+ /**
29
+ * Resolve EvalGate workspace paths. Prefers .evalgate/, falls back to .evalai/.
30
+ */
31
+ export declare function resolveEvalWorkspace(projectRoot?: string): EvalWorkspace;
@@ -0,0 +1,68 @@
1
+ "use strict";
2
+ /**
3
+ * CORE-402: Centralized .evalgate workspace resolution
4
+ *
5
+ * Provides unified workspace path resolution for all EvalGate CLI commands.
6
+ * Prefers .evalgate/; falls back to .evalai/ for backward compatibility.
7
+ */
8
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
9
+ if (k2 === undefined) k2 = k;
10
+ var desc = Object.getOwnPropertyDescriptor(m, k);
11
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
12
+ desc = { enumerable: true, get: function() { return m[k]; } };
13
+ }
14
+ Object.defineProperty(o, k2, desc);
15
+ }) : (function(o, m, k, k2) {
16
+ if (k2 === undefined) k2 = k;
17
+ o[k2] = m[k];
18
+ }));
19
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
20
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
21
+ }) : function(o, v) {
22
+ o["default"] = v;
23
+ });
24
+ var __importStar = (this && this.__importStar) || (function () {
25
+ var ownKeys = function(o) {
26
+ ownKeys = Object.getOwnPropertyNames || function (o) {
27
+ var ar = [];
28
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
29
+ return ar;
30
+ };
31
+ return ownKeys(o);
32
+ };
33
+ return function (mod) {
34
+ if (mod && mod.__esModule) return mod;
35
+ var result = {};
36
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
37
+ __setModuleDefault(result, mod);
38
+ return result;
39
+ };
40
+ })();
41
+ Object.defineProperty(exports, "__esModule", { value: true });
42
+ exports.resolveEvalWorkspace = resolveEvalWorkspace;
43
+ const fs = __importStar(require("node:fs"));
44
+ const path = __importStar(require("node:path"));
45
+ /**
46
+ * Resolve EvalGate workspace paths. Prefers .evalgate/, falls back to .evalai/.
47
+ */
48
+ function resolveEvalWorkspace(projectRoot = process.cwd()) {
49
+ const evalgateDir = path.join(projectRoot, ".evalgate");
50
+ const evalaiDir = path.join(projectRoot, ".evalai");
51
+ const useLegacy = fs.existsSync(evalaiDir) && !fs.existsSync(evalgateDir);
52
+ const evalDir = useLegacy ? evalaiDir : evalgateDir;
53
+ if (useLegacy && !process.__EVALGATE_LEGACY_EVALAI_WARNED) {
54
+ console.warn("[EvalGate] Deprecation: .evalai/ is deprecated. Migrate to .evalgate/ (e.g. mv .evalai .evalgate).");
55
+ process.__EVALGATE_LEGACY_EVALAI_WARNED = true;
56
+ }
57
+ const runsDir = path.join(evalDir, "runs");
58
+ return {
59
+ root: projectRoot,
60
+ evalDir,
61
+ evalgateDir: evalDir,
62
+ runsDir,
63
+ manifestPath: path.join(evalDir, "manifest.json"),
64
+ lastRunPath: path.join(evalDir, "last-run.json"),
65
+ indexPath: path.join(runsDir, "index.json"),
66
+ baselinePath: path.join(evalDir, "baseline-run.json"),
67
+ };
68
+ }