@evalgate/sdk 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +638 -0
- package/README.md +398 -0
- package/dist/assertions.d.ts +189 -0
- package/dist/assertions.js +662 -0
- package/dist/batch.d.ts +68 -0
- package/dist/batch.js +179 -0
- package/dist/cache.d.ts +65 -0
- package/dist/cache.js +131 -0
- package/dist/cli/api.d.ts +108 -0
- package/dist/cli/api.js +132 -0
- package/dist/cli/baseline.d.ts +10 -0
- package/dist/cli/baseline.js +172 -0
- package/dist/cli/check.d.ts +73 -0
- package/dist/cli/check.js +355 -0
- package/dist/cli/ci-context.d.ts +6 -0
- package/dist/cli/ci-context.js +112 -0
- package/dist/cli/ci.d.ts +45 -0
- package/dist/cli/ci.js +192 -0
- package/dist/cli/config.d.ts +30 -0
- package/dist/cli/config.js +230 -0
- package/dist/cli/constants.d.ts +15 -0
- package/dist/cli/constants.js +18 -0
- package/dist/cli/diff.d.ts +173 -0
- package/dist/cli/diff.js +685 -0
- package/dist/cli/discover.d.ts +84 -0
- package/dist/cli/discover.js +419 -0
- package/dist/cli/doctor.d.ts +88 -0
- package/dist/cli/doctor.js +675 -0
- package/dist/cli/env.d.ts +21 -0
- package/dist/cli/env.js +42 -0
- package/dist/cli/explain.d.ts +58 -0
- package/dist/cli/explain.js +561 -0
- package/dist/cli/formatters/github.d.ts +8 -0
- package/dist/cli/formatters/github.js +135 -0
- package/dist/cli/formatters/human.d.ts +6 -0
- package/dist/cli/formatters/human.js +110 -0
- package/dist/cli/formatters/json.d.ts +6 -0
- package/dist/cli/formatters/json.js +10 -0
- package/dist/cli/formatters/pr-comment.d.ts +12 -0
- package/dist/cli/formatters/pr-comment.js +103 -0
- package/dist/cli/formatters/types.d.ts +103 -0
- package/dist/cli/formatters/types.js +8 -0
- package/dist/cli/gate.d.ts +21 -0
- package/dist/cli/gate.js +179 -0
- package/dist/cli/impact-analysis.d.ts +63 -0
- package/dist/cli/impact-analysis.js +252 -0
- package/dist/cli/index.d.ts +9 -0
- package/dist/cli/index.js +332 -0
- package/dist/cli/init.d.ts +16 -0
- package/dist/cli/init.js +292 -0
- package/dist/cli/manifest.d.ts +103 -0
- package/dist/cli/manifest.js +282 -0
- package/dist/cli/migrate.d.ts +41 -0
- package/dist/cli/migrate.js +349 -0
- package/dist/cli/policy-packs.d.ts +23 -0
- package/dist/cli/policy-packs.js +89 -0
- package/dist/cli/print-config.d.ts +29 -0
- package/dist/cli/print-config.js +270 -0
- package/dist/cli/profiles.d.ts +28 -0
- package/dist/cli/profiles.js +30 -0
- package/dist/cli/reason-codes.d.ts +17 -0
- package/dist/cli/reason-codes.js +19 -0
- package/dist/cli/regression-gate.d.ts +15 -0
- package/dist/cli/regression-gate.js +341 -0
- package/dist/cli/render/snippet.d.ts +5 -0
- package/dist/cli/render/snippet.js +15 -0
- package/dist/cli/render/sort.d.ts +10 -0
- package/dist/cli/render/sort.js +24 -0
- package/dist/cli/report/build-check-report.d.ts +19 -0
- package/dist/cli/report/build-check-report.js +132 -0
- package/dist/cli/run.d.ts +101 -0
- package/dist/cli/run.js +395 -0
- package/dist/cli/share.d.ts +17 -0
- package/dist/cli/share.js +91 -0
- package/dist/cli/upgrade.d.ts +15 -0
- package/dist/cli/upgrade.js +492 -0
- package/dist/cli/workspace.d.ts +31 -0
- package/dist/cli/workspace.js +68 -0
- package/dist/client.d.ts +368 -0
- package/dist/client.js +893 -0
- package/dist/client.request.test.d.ts +1 -0
- package/dist/client.request.test.js +232 -0
- package/dist/context.d.ts +134 -0
- package/dist/context.js +215 -0
- package/dist/errors.d.ts +82 -0
- package/dist/errors.js +298 -0
- package/dist/export.d.ts +195 -0
- package/dist/export.js +344 -0
- package/dist/index.d.ts +44 -0
- package/dist/index.js +153 -0
- package/dist/integrations/anthropic.d.ts +91 -0
- package/dist/integrations/anthropic.js +163 -0
- package/dist/integrations/openai-eval.d.ts +57 -0
- package/dist/integrations/openai-eval.js +232 -0
- package/dist/integrations/openai.d.ts +92 -0
- package/dist/integrations/openai.js +160 -0
- package/dist/local.d.ts +39 -0
- package/dist/local.js +148 -0
- package/dist/logger.d.ts +128 -0
- package/dist/logger.js +227 -0
- package/dist/matchers/index.d.ts +1 -0
- package/dist/matchers/index.js +6 -0
- package/dist/matchers/to-pass-gate.d.ts +29 -0
- package/dist/matchers/to-pass-gate.js +35 -0
- package/dist/pagination.d.ts +74 -0
- package/dist/pagination.js +139 -0
- package/dist/regression.d.ts +100 -0
- package/dist/regression.js +44 -0
- package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
- package/dist/runtime/adapters/config-to-dsl.js +400 -0
- package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
- package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
- package/dist/runtime/context.d.ts +26 -0
- package/dist/runtime/context.js +74 -0
- package/dist/runtime/eval.d.ts +46 -0
- package/dist/runtime/eval.js +244 -0
- package/dist/runtime/execution-mode.d.ts +80 -0
- package/dist/runtime/execution-mode.js +357 -0
- package/dist/runtime/executor.d.ts +16 -0
- package/dist/runtime/executor.js +152 -0
- package/dist/runtime/registry.d.ts +78 -0
- package/dist/runtime/registry.js +403 -0
- package/dist/runtime/run-report.d.ts +200 -0
- package/dist/runtime/run-report.js +222 -0
- package/dist/runtime/types.d.ts +356 -0
- package/dist/runtime/types.js +76 -0
- package/dist/snapshot.d.ts +176 -0
- package/dist/snapshot.js +322 -0
- package/dist/streaming.d.ts +173 -0
- package/dist/streaming.js +268 -0
- package/dist/testing.d.ts +273 -0
- package/dist/testing.js +317 -0
- package/dist/types.d.ts +754 -0
- package/dist/types.js +54 -0
- package/dist/utils/input-hash.d.ts +8 -0
- package/dist/utils/input-hash.js +41 -0
- package/dist/version.d.ts +7 -0
- package/dist/version.js +10 -0
- package/dist/workflows.d.ts +389 -0
- package/dist/workflows.js +671 -0
- package/package.json +117 -0
|
@@ -0,0 +1,492 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* evalgate upgrade --full — Upgrade from Tier 1 (built-in gate) to Tier 2 (full gate)
|
|
4
|
+
*
|
|
5
|
+
* What it does:
|
|
6
|
+
* 1. Adds full regression gate script (scripts/regression-gate.ts)
|
|
7
|
+
* 2. Adds baseline governance workflow (.github/workflows/baseline-governance.yml)
|
|
8
|
+
* 3. Updates package.json with eval:regression-gate + eval:baseline-update scripts
|
|
9
|
+
* 4. Updates .github/workflows/evalgate-gate.yml to use project mode
|
|
10
|
+
* 5. Prints next steps
|
|
11
|
+
*/
|
|
12
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
13
|
+
if (k2 === undefined) k2 = k;
|
|
14
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
15
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
16
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
17
|
+
}
|
|
18
|
+
Object.defineProperty(o, k2, desc);
|
|
19
|
+
}) : (function(o, m, k, k2) {
|
|
20
|
+
if (k2 === undefined) k2 = k;
|
|
21
|
+
o[k2] = m[k];
|
|
22
|
+
}));
|
|
23
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
24
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
25
|
+
}) : function(o, v) {
|
|
26
|
+
o["default"] = v;
|
|
27
|
+
});
|
|
28
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
29
|
+
var ownKeys = function(o) {
|
|
30
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
31
|
+
var ar = [];
|
|
32
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
33
|
+
return ar;
|
|
34
|
+
};
|
|
35
|
+
return ownKeys(o);
|
|
36
|
+
};
|
|
37
|
+
return function (mod) {
|
|
38
|
+
if (mod && mod.__esModule) return mod;
|
|
39
|
+
var result = {};
|
|
40
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
41
|
+
__setModuleDefault(result, mod);
|
|
42
|
+
return result;
|
|
43
|
+
};
|
|
44
|
+
})();
|
|
45
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
46
|
+
exports.parseUpgradeArgs = parseUpgradeArgs;
|
|
47
|
+
exports.runUpgrade = runUpgrade;
|
|
48
|
+
const fs = __importStar(require("node:fs"));
|
|
49
|
+
const path = __importStar(require("node:path"));
|
|
50
|
+
// ── Detect environment ──
|
|
51
|
+
function detectPackageManager(cwd) {
|
|
52
|
+
if (fs.existsSync(path.join(cwd, "pnpm-lock.yaml")))
|
|
53
|
+
return "pnpm";
|
|
54
|
+
if (fs.existsSync(path.join(cwd, "yarn.lock")))
|
|
55
|
+
return "yarn";
|
|
56
|
+
return "npm";
|
|
57
|
+
}
|
|
58
|
+
function ok(msg) {
|
|
59
|
+
console.log(` ✔ ${msg}`);
|
|
60
|
+
}
|
|
61
|
+
function skip(msg) {
|
|
62
|
+
console.log(` – ${msg}`);
|
|
63
|
+
}
|
|
64
|
+
// ── 1. Create scripts/regression-gate.ts ──
|
|
65
|
+
function createGateScript(cwd) {
|
|
66
|
+
const scriptPath = path.join(cwd, "scripts", "regression-gate.ts");
|
|
67
|
+
if (fs.existsSync(scriptPath)) {
|
|
68
|
+
skip("scripts/regression-gate.ts already exists");
|
|
69
|
+
return true;
|
|
70
|
+
}
|
|
71
|
+
const scriptsDir = path.join(cwd, "scripts");
|
|
72
|
+
if (!fs.existsSync(scriptsDir)) {
|
|
73
|
+
fs.mkdirSync(scriptsDir, { recursive: true });
|
|
74
|
+
}
|
|
75
|
+
const content = `#!/usr/bin/env npx tsx
|
|
76
|
+
/**
|
|
77
|
+
* Full regression gate — compares current test results against baseline.
|
|
78
|
+
*
|
|
79
|
+
* Usage:
|
|
80
|
+
* npx tsx scripts/regression-gate.ts # run gate
|
|
81
|
+
* npx tsx scripts/regression-gate.ts --update-baseline # update baseline with current values
|
|
82
|
+
*
|
|
83
|
+
* Generated by: npx evalgate upgrade --full
|
|
84
|
+
*/
|
|
85
|
+
import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs";
|
|
86
|
+
import { execSync, spawnSync } from "node:child_process";
|
|
87
|
+
import { resolve } from "node:path";
|
|
88
|
+
|
|
89
|
+
const BASELINE_PATH = resolve("evals/baseline.json");
|
|
90
|
+
const REPORT_PATH = resolve("evals/regression-report.json");
|
|
91
|
+
const CONFIDENCE_PATH = resolve("evals/confidence-summary.json");
|
|
92
|
+
|
|
93
|
+
const isUpdateBaseline = process.argv.includes("--update-baseline");
|
|
94
|
+
|
|
95
|
+
// ── Helpers ──
|
|
96
|
+
|
|
97
|
+
function loadJSON(p: string): Record<string, unknown> | null {
|
|
98
|
+
try {
|
|
99
|
+
return JSON.parse(readFileSync(p, "utf-8"));
|
|
100
|
+
} catch {
|
|
101
|
+
return null;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
function getHeadSha(): string {
|
|
106
|
+
try {
|
|
107
|
+
return execSync("git rev-parse --short HEAD").toString().trim();
|
|
108
|
+
} catch {
|
|
109
|
+
return "0000000";
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
function writeReport(report: Record<string, unknown>): void {
|
|
114
|
+
const dir = resolve("evals");
|
|
115
|
+
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
116
|
+
writeFileSync(REPORT_PATH, JSON.stringify(report, null, 2) + "\\n");
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// ── Run tests ──
|
|
120
|
+
|
|
121
|
+
function runTests(): { passed: boolean; total: number; durationMs: number } {
|
|
122
|
+
const t0 = Date.now();
|
|
123
|
+
const result = spawnSync("npm", ["test"], {
|
|
124
|
+
stdio: "pipe",
|
|
125
|
+
shell: process.platform === "win32",
|
|
126
|
+
timeout: 300_000,
|
|
127
|
+
});
|
|
128
|
+
const durationMs = Date.now() - t0;
|
|
129
|
+
const passed = result.status === 0;
|
|
130
|
+
const output = (result.stdout?.toString() ?? "") + (result.stderr?.toString() ?? "");
|
|
131
|
+
|
|
132
|
+
let total = 0;
|
|
133
|
+
const m =
|
|
134
|
+
output.match(/(\\d+)\\s+(?:tests?|specs?)\\s+(?:passed|completed)/i) ??
|
|
135
|
+
output.match(/Tests:\\s+(\\d+)\\s+passed/i) ??
|
|
136
|
+
output.match(/(\\d+)\\s+passing/i);
|
|
137
|
+
if (m) total = parseInt(m[1], 10);
|
|
138
|
+
|
|
139
|
+
return { passed, total, durationMs };
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// ── Main ──
|
|
143
|
+
|
|
144
|
+
const baseline = loadJSON(BASELINE_PATH);
|
|
145
|
+
if (!baseline) {
|
|
146
|
+
console.error("❌ Baseline not found. Run: npx evalgate init");
|
|
147
|
+
const report = {
|
|
148
|
+
schemaVersion: 1,
|
|
149
|
+
timestamp: new Date().toISOString(),
|
|
150
|
+
exitCode: 2,
|
|
151
|
+
category: "infra_error",
|
|
152
|
+
passed: false,
|
|
153
|
+
failures: ["Baseline file not found"],
|
|
154
|
+
deltas: [],
|
|
155
|
+
baseline: null,
|
|
156
|
+
durationMs: 0,
|
|
157
|
+
command: "npm test",
|
|
158
|
+
runner: "unknown",
|
|
159
|
+
};
|
|
160
|
+
writeReport(report);
|
|
161
|
+
process.exit(2);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
const tests = runTests();
|
|
165
|
+
|
|
166
|
+
if (isUpdateBaseline) {
|
|
167
|
+
const user = process.env.USER || process.env.USERNAME || "unknown";
|
|
168
|
+
const now = new Date().toISOString();
|
|
169
|
+
const updated = {
|
|
170
|
+
...baseline,
|
|
171
|
+
updatedAt: now,
|
|
172
|
+
updatedBy: user,
|
|
173
|
+
commitSha: getHeadSha(),
|
|
174
|
+
confidenceTests: {
|
|
175
|
+
...(baseline.confidenceTests as Record<string, unknown> ?? {}),
|
|
176
|
+
passed: tests.passed,
|
|
177
|
+
total: tests.total,
|
|
178
|
+
},
|
|
179
|
+
};
|
|
180
|
+
writeFileSync(BASELINE_PATH, JSON.stringify(updated, null, 2) + "\\n");
|
|
181
|
+
console.log("✅ Baseline updated with current test results");
|
|
182
|
+
console.log(\` Tests: \${tests.total} (\${tests.passed ? "passing" : "FAILING"})\`);
|
|
183
|
+
process.exit(0);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// ── Compare ──
|
|
187
|
+
|
|
188
|
+
const bConf = baseline.confidenceTests as { passed?: boolean; total?: number } | undefined;
|
|
189
|
+
const baselinePassed = bConf?.passed ?? true;
|
|
190
|
+
const baselineTotal = bConf?.total ?? 0;
|
|
191
|
+
|
|
192
|
+
const failures: string[] = [];
|
|
193
|
+
const deltas: Array<Record<string, unknown>> = [];
|
|
194
|
+
|
|
195
|
+
deltas.push({
|
|
196
|
+
metric: "tests_passing",
|
|
197
|
+
baseline: baselinePassed,
|
|
198
|
+
current: tests.passed,
|
|
199
|
+
delta: tests.passed === baselinePassed ? "0" : tests.passed ? "+1" : "-1",
|
|
200
|
+
status: tests.passed ? "pass" : "fail",
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
if (!tests.passed && baselinePassed) {
|
|
204
|
+
failures.push("Tests were passing in baseline but are now failing");
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
if (tests.total > 0 || baselineTotal > 0) {
|
|
208
|
+
const d = tests.total - baselineTotal;
|
|
209
|
+
deltas.push({
|
|
210
|
+
metric: "test_count",
|
|
211
|
+
baseline: baselineTotal,
|
|
212
|
+
current: tests.total,
|
|
213
|
+
delta: d >= 0 ? \`+\${d}\` : \`\${d}\`,
|
|
214
|
+
status: tests.total >= baselineTotal ? "pass" : "fail",
|
|
215
|
+
});
|
|
216
|
+
if (tests.total < baselineTotal) {
|
|
217
|
+
failures.push(\`Test count dropped from \${baselineTotal} to \${tests.total} (\${d})\`);
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
const hasRegression = failures.length > 0;
|
|
222
|
+
const report = {
|
|
223
|
+
schemaVersion: 1,
|
|
224
|
+
timestamp: new Date().toISOString(),
|
|
225
|
+
exitCode: hasRegression ? 1 : 0,
|
|
226
|
+
category: hasRegression ? "regression" : "pass",
|
|
227
|
+
passed: !hasRegression,
|
|
228
|
+
failures,
|
|
229
|
+
deltas,
|
|
230
|
+
baseline: {
|
|
231
|
+
updatedAt: (baseline.updatedAt as string) ?? "unknown",
|
|
232
|
+
updatedBy: (baseline.updatedBy as string) ?? "unknown",
|
|
233
|
+
},
|
|
234
|
+
durationMs: tests.durationMs,
|
|
235
|
+
command: "npm test",
|
|
236
|
+
runner: "unknown",
|
|
237
|
+
};
|
|
238
|
+
|
|
239
|
+
writeReport(report);
|
|
240
|
+
|
|
241
|
+
if (hasRegression) {
|
|
242
|
+
console.error("❌ REGRESSION DETECTED");
|
|
243
|
+
for (const f of failures) console.error(\` \${f}\`);
|
|
244
|
+
} else {
|
|
245
|
+
console.log("✅ NO REGRESSION — gate passed");
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
for (const d of deltas) {
|
|
249
|
+
const icon = d.status === "pass" ? "✔" : "✖";
|
|
250
|
+
console.log(\` \${icon} \${d.metric}: \${d.baseline} → \${d.current} (\${d.delta})\`);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
process.exit(report.exitCode);
|
|
254
|
+
`;
|
|
255
|
+
fs.writeFileSync(scriptPath, content);
|
|
256
|
+
ok("Created scripts/regression-gate.ts");
|
|
257
|
+
return true;
|
|
258
|
+
}
|
|
259
|
+
// ── 2. Add npm scripts to package.json ──
|
|
260
|
+
function addNpmScripts(cwd) {
|
|
261
|
+
const pkgPath = path.join(cwd, "package.json");
|
|
262
|
+
if (!fs.existsSync(pkgPath))
|
|
263
|
+
return false;
|
|
264
|
+
let pkg;
|
|
265
|
+
try {
|
|
266
|
+
pkg = JSON.parse(fs.readFileSync(pkgPath, "utf-8"));
|
|
267
|
+
}
|
|
268
|
+
catch {
|
|
269
|
+
return false;
|
|
270
|
+
}
|
|
271
|
+
const scripts = (pkg.scripts ?? {});
|
|
272
|
+
let changed = false;
|
|
273
|
+
if (!scripts["eval:regression-gate"]) {
|
|
274
|
+
scripts["eval:regression-gate"] = "npx tsx scripts/regression-gate.ts";
|
|
275
|
+
changed = true;
|
|
276
|
+
}
|
|
277
|
+
if (!scripts["eval:baseline-update"]) {
|
|
278
|
+
scripts["eval:baseline-update"] =
|
|
279
|
+
"npx tsx scripts/regression-gate.ts --update-baseline";
|
|
280
|
+
changed = true;
|
|
281
|
+
}
|
|
282
|
+
if (changed) {
|
|
283
|
+
pkg.scripts = scripts;
|
|
284
|
+
fs.writeFileSync(pkgPath, `${JSON.stringify(pkg, null, 2)}\n`);
|
|
285
|
+
ok("Added eval:regression-gate and eval:baseline-update scripts to package.json");
|
|
286
|
+
}
|
|
287
|
+
else {
|
|
288
|
+
skip("eval:regression-gate and eval:baseline-update scripts already exist");
|
|
289
|
+
}
|
|
290
|
+
return true;
|
|
291
|
+
}
|
|
292
|
+
// ── 3. Create baseline governance workflow ──
|
|
293
|
+
function createGovernanceWorkflow(cwd) {
|
|
294
|
+
const workflowDir = path.join(cwd, ".github", "workflows");
|
|
295
|
+
const workflowPath = path.join(workflowDir, "baseline-governance.yml");
|
|
296
|
+
if (fs.existsSync(workflowPath)) {
|
|
297
|
+
skip(".github/workflows/baseline-governance.yml already exists");
|
|
298
|
+
return true;
|
|
299
|
+
}
|
|
300
|
+
if (!fs.existsSync(workflowDir)) {
|
|
301
|
+
fs.mkdirSync(workflowDir, { recursive: true });
|
|
302
|
+
}
|
|
303
|
+
const workflow = `# Baseline Governance — requires label + approval for baseline changes
|
|
304
|
+
# Auto-generated by: npx evalgate upgrade --full
|
|
305
|
+
name: Baseline Governance
|
|
306
|
+
|
|
307
|
+
on:
|
|
308
|
+
pull_request:
|
|
309
|
+
paths:
|
|
310
|
+
- 'evals/baseline.json'
|
|
311
|
+
|
|
312
|
+
jobs:
|
|
313
|
+
governance:
|
|
314
|
+
runs-on: ubuntu-latest
|
|
315
|
+
steps:
|
|
316
|
+
- uses: actions/checkout@v4
|
|
317
|
+
|
|
318
|
+
- name: Check label
|
|
319
|
+
run: |
|
|
320
|
+
LABELS=\${{ toJSON(github.event.pull_request.labels.*.name) }}
|
|
321
|
+
if echo "$LABELS" | grep -q "baseline-update"; then
|
|
322
|
+
echo "✅ baseline-update label found"
|
|
323
|
+
elif echo "$LABELS" | grep -q "baseline-exception"; then
|
|
324
|
+
echo "⚠️ baseline-exception label found — bypassing delta checks"
|
|
325
|
+
else
|
|
326
|
+
echo "❌ Missing 'baseline-update' label"
|
|
327
|
+
echo "Add the 'baseline-update' label to this PR to update the baseline."
|
|
328
|
+
exit 1
|
|
329
|
+
fi
|
|
330
|
+
|
|
331
|
+
- name: Show baseline diff
|
|
332
|
+
run: |
|
|
333
|
+
echo "## Baseline Changes" >> "$GITHUB_STEP_SUMMARY"
|
|
334
|
+
echo "" >> "$GITHUB_STEP_SUMMARY"
|
|
335
|
+
echo "\\\`\\\`\\\`diff" >> "$GITHUB_STEP_SUMMARY"
|
|
336
|
+
git diff HEAD~1 -- evals/baseline.json >> "$GITHUB_STEP_SUMMARY" || echo "No previous baseline" >> "$GITHUB_STEP_SUMMARY"
|
|
337
|
+
echo "\\\`\\\`\\\`" >> "$GITHUB_STEP_SUMMARY"
|
|
338
|
+
`;
|
|
339
|
+
fs.writeFileSync(workflowPath, workflow);
|
|
340
|
+
ok("Created .github/workflows/baseline-governance.yml");
|
|
341
|
+
return true;
|
|
342
|
+
}
|
|
343
|
+
// ── 4. Upgrade evalgate-gate.yml to project mode ──
|
|
344
|
+
function upgradeGateWorkflow(cwd) {
|
|
345
|
+
const pm = detectPackageManager(cwd);
|
|
346
|
+
const workflowPath = path.join(cwd, ".github", "workflows", "evalgate-gate.yml");
|
|
347
|
+
if (!fs.existsSync(workflowPath)) {
|
|
348
|
+
skip("No .github/workflows/evalgate-gate.yml found — run evalgate init first");
|
|
349
|
+
return false;
|
|
350
|
+
}
|
|
351
|
+
const content = fs.readFileSync(workflowPath, "utf-8");
|
|
352
|
+
// Already upgraded?
|
|
353
|
+
if (content.includes("eval:regression-gate")) {
|
|
354
|
+
skip("evalgate-gate.yml already uses project mode");
|
|
355
|
+
return true;
|
|
356
|
+
}
|
|
357
|
+
const installCmd = pm === "pnpm"
|
|
358
|
+
? "pnpm install --frozen-lockfile"
|
|
359
|
+
: pm === "yarn"
|
|
360
|
+
? "yarn install --frozen-lockfile"
|
|
361
|
+
: "npm ci";
|
|
362
|
+
const setupSteps = pm === "pnpm"
|
|
363
|
+
? ` - uses: pnpm/action-setup@v4
|
|
364
|
+
- uses: actions/setup-node@v4
|
|
365
|
+
with:
|
|
366
|
+
node-version: '20'
|
|
367
|
+
cache: pnpm
|
|
368
|
+
- run: ${installCmd}`
|
|
369
|
+
: ` - uses: actions/setup-node@v4
|
|
370
|
+
with:
|
|
371
|
+
node-version: '20'
|
|
372
|
+
cache: ${pm}
|
|
373
|
+
- run: ${installCmd}`;
|
|
374
|
+
const workflow = `# EvalGate Regression Gate (Full / Tier 2)
|
|
375
|
+
# Upgraded by: npx evalgate upgrade --full
|
|
376
|
+
name: EvalGate Gate
|
|
377
|
+
|
|
378
|
+
on:
|
|
379
|
+
pull_request:
|
|
380
|
+
branches: [main]
|
|
381
|
+
|
|
382
|
+
concurrency:
|
|
383
|
+
group: evalgate-\${{ github.ref }}
|
|
384
|
+
cancel-in-progress: true
|
|
385
|
+
|
|
386
|
+
jobs:
|
|
387
|
+
regression-gate:
|
|
388
|
+
runs-on: ubuntu-latest
|
|
389
|
+
steps:
|
|
390
|
+
- uses: actions/checkout@v4
|
|
391
|
+
${setupSteps}
|
|
392
|
+
- name: Run regression gate
|
|
393
|
+
run: ${pm} run eval:regression-gate
|
|
394
|
+
|
|
395
|
+
- name: Gate summary
|
|
396
|
+
if: always()
|
|
397
|
+
run: npx -y @evalgate/sdk@^2 gate --format github
|
|
398
|
+
|
|
399
|
+
- name: Upload report
|
|
400
|
+
if: always()
|
|
401
|
+
uses: actions/upload-artifact@v4
|
|
402
|
+
with:
|
|
403
|
+
name: regression-report
|
|
404
|
+
path: evals/regression-report.json
|
|
405
|
+
if-no-files-found: ignore
|
|
406
|
+
`;
|
|
407
|
+
fs.writeFileSync(workflowPath, workflow);
|
|
408
|
+
ok("Upgraded .github/workflows/evalgate-gate.yml to project mode (Tier 2)");
|
|
409
|
+
return true;
|
|
410
|
+
}
|
|
411
|
+
// ── 5. Add CODEOWNERS entry ──
|
|
412
|
+
function addCodeowners(cwd) {
|
|
413
|
+
const codeownersPath = path.join(cwd, ".github", "CODEOWNERS");
|
|
414
|
+
const entry = "evals/baseline.json";
|
|
415
|
+
if (fs.existsSync(codeownersPath)) {
|
|
416
|
+
const content = fs.readFileSync(codeownersPath, "utf-8");
|
|
417
|
+
if (content.includes(entry)) {
|
|
418
|
+
skip("CODEOWNERS already has evals/baseline.json entry");
|
|
419
|
+
return true;
|
|
420
|
+
}
|
|
421
|
+
fs.appendFileSync(codeownersPath, `\n# EvalGate baseline — requires approval\n${entry} @YOUR_TEAM\n`);
|
|
422
|
+
}
|
|
423
|
+
else {
|
|
424
|
+
const dir = path.join(cwd, ".github");
|
|
425
|
+
if (!fs.existsSync(dir))
|
|
426
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
427
|
+
fs.writeFileSync(codeownersPath, `# EvalGate baseline — requires approval\n${entry} @YOUR_TEAM\n`);
|
|
428
|
+
}
|
|
429
|
+
ok("Added evals/baseline.json to .github/CODEOWNERS (edit @YOUR_TEAM)");
|
|
430
|
+
return true;
|
|
431
|
+
}
|
|
432
|
+
function parseUpgradeArgs(argv) {
|
|
433
|
+
return { full: argv.includes("--full") };
|
|
434
|
+
}
|
|
435
|
+
// ── Main ──
|
|
436
|
+
function runUpgrade(argv) {
|
|
437
|
+
const args = parseUpgradeArgs(argv);
|
|
438
|
+
const cwd = process.cwd();
|
|
439
|
+
if (!args.full) {
|
|
440
|
+
console.log(`evalgate upgrade — Upgrade regression gate
|
|
441
|
+
|
|
442
|
+
Usage:
|
|
443
|
+
evalgate upgrade --full Upgrade from Tier 1 (built-in) to Tier 2 (full gate)
|
|
444
|
+
|
|
445
|
+
What --full does:
|
|
446
|
+
1. Creates scripts/regression-gate.ts (full gate script)
|
|
447
|
+
2. Adds eval:regression-gate + eval:baseline-update npm scripts
|
|
448
|
+
3. Creates baseline governance workflow
|
|
449
|
+
4. Upgrades CI workflow to project mode
|
|
450
|
+
5. Adds CODEOWNERS entry for baseline
|
|
451
|
+
|
|
452
|
+
After upgrading:
|
|
453
|
+
- evalgate gate delegates to your eval:regression-gate script
|
|
454
|
+
- Baseline changes require PR label + approval
|
|
455
|
+
- Full metric comparison: golden eval, confidence, latency, cost
|
|
456
|
+
`);
|
|
457
|
+
return argv.includes("--help") || argv.includes("-h") ? 0 : 1;
|
|
458
|
+
}
|
|
459
|
+
console.log("");
|
|
460
|
+
console.log(" evalgate upgrade --full — upgrading to Tier 2\n");
|
|
461
|
+
// Check preconditions
|
|
462
|
+
const pkgPath = path.join(cwd, "package.json");
|
|
463
|
+
if (!fs.existsSync(pkgPath)) {
|
|
464
|
+
console.error(" ✖ No package.json found. Run this from a Node.js project root.");
|
|
465
|
+
return 1;
|
|
466
|
+
}
|
|
467
|
+
if (!fs.existsSync(path.join(cwd, "evals", "baseline.json"))) {
|
|
468
|
+
console.error(" ✖ No evals/baseline.json found. Run 'npx evalgate init' first.");
|
|
469
|
+
return 1;
|
|
470
|
+
}
|
|
471
|
+
createGateScript(cwd);
|
|
472
|
+
addNpmScripts(cwd);
|
|
473
|
+
createGovernanceWorkflow(cwd);
|
|
474
|
+
upgradeGateWorkflow(cwd);
|
|
475
|
+
addCodeowners(cwd);
|
|
476
|
+
console.log("");
|
|
477
|
+
console.log(" Done! Your repo is now Tier 2.\n");
|
|
478
|
+
console.log(" What changed:");
|
|
479
|
+
console.log(" - scripts/regression-gate.ts Full gate script");
|
|
480
|
+
console.log(" - package.json eval:regression-gate + eval:baseline-update");
|
|
481
|
+
console.log(" - .github/workflows/ Gate + governance workflows");
|
|
482
|
+
console.log(" - .github/CODEOWNERS Baseline requires approval\n");
|
|
483
|
+
console.log(" Next:");
|
|
484
|
+
console.log(" git add -A");
|
|
485
|
+
console.log(" git commit -m 'chore: upgrade EvalGate gate to Tier 2'");
|
|
486
|
+
console.log(" git push\n");
|
|
487
|
+
console.log(" Commands:");
|
|
488
|
+
console.log(" npx evalgate gate Run full gate locally");
|
|
489
|
+
console.log(" npx evalgate baseline update Update baseline with real scores");
|
|
490
|
+
console.log("");
|
|
491
|
+
return 0;
|
|
492
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE-402: Centralized .evalgate workspace resolution
|
|
3
|
+
*
|
|
4
|
+
* Provides unified workspace path resolution for all EvalGate CLI commands.
|
|
5
|
+
* Prefers .evalgate/; falls back to .evalai/ for backward compatibility.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* EvalGate workspace paths
|
|
9
|
+
*/
|
|
10
|
+
export interface EvalWorkspace {
|
|
11
|
+
/** Project root directory */
|
|
12
|
+
root: string;
|
|
13
|
+
/** .evalgate directory (or .evalai for legacy projects) */
|
|
14
|
+
evalDir: string;
|
|
15
|
+
/** @deprecated Use evalDir */
|
|
16
|
+
evalgateDir: string;
|
|
17
|
+
/** runs directory */
|
|
18
|
+
runsDir: string;
|
|
19
|
+
/** manifest.json path */
|
|
20
|
+
manifestPath: string;
|
|
21
|
+
/** last-run.json path */
|
|
22
|
+
lastRunPath: string;
|
|
23
|
+
/** runs/index.json path */
|
|
24
|
+
indexPath: string;
|
|
25
|
+
/** baseline-run.json path */
|
|
26
|
+
baselinePath: string;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Resolve EvalGate workspace paths. Prefers .evalgate/, falls back to .evalai/.
|
|
30
|
+
*/
|
|
31
|
+
export declare function resolveEvalWorkspace(projectRoot?: string): EvalWorkspace;
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* CORE-402: Centralized .evalgate workspace resolution
|
|
4
|
+
*
|
|
5
|
+
* Provides unified workspace path resolution for all EvalGate CLI commands.
|
|
6
|
+
* Prefers .evalgate/; falls back to .evalai/ for backward compatibility.
|
|
7
|
+
*/
|
|
8
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
9
|
+
if (k2 === undefined) k2 = k;
|
|
10
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
11
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
12
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
13
|
+
}
|
|
14
|
+
Object.defineProperty(o, k2, desc);
|
|
15
|
+
}) : (function(o, m, k, k2) {
|
|
16
|
+
if (k2 === undefined) k2 = k;
|
|
17
|
+
o[k2] = m[k];
|
|
18
|
+
}));
|
|
19
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
20
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
21
|
+
}) : function(o, v) {
|
|
22
|
+
o["default"] = v;
|
|
23
|
+
});
|
|
24
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
25
|
+
var ownKeys = function(o) {
|
|
26
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
27
|
+
var ar = [];
|
|
28
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
29
|
+
return ar;
|
|
30
|
+
};
|
|
31
|
+
return ownKeys(o);
|
|
32
|
+
};
|
|
33
|
+
return function (mod) {
|
|
34
|
+
if (mod && mod.__esModule) return mod;
|
|
35
|
+
var result = {};
|
|
36
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
37
|
+
__setModuleDefault(result, mod);
|
|
38
|
+
return result;
|
|
39
|
+
};
|
|
40
|
+
})();
|
|
41
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
42
|
+
exports.resolveEvalWorkspace = resolveEvalWorkspace;
|
|
43
|
+
const fs = __importStar(require("node:fs"));
|
|
44
|
+
const path = __importStar(require("node:path"));
|
|
45
|
+
/**
|
|
46
|
+
* Resolve EvalGate workspace paths. Prefers .evalgate/, falls back to .evalai/.
|
|
47
|
+
*/
|
|
48
|
+
function resolveEvalWorkspace(projectRoot = process.cwd()) {
|
|
49
|
+
const evalgateDir = path.join(projectRoot, ".evalgate");
|
|
50
|
+
const evalaiDir = path.join(projectRoot, ".evalai");
|
|
51
|
+
const useLegacy = fs.existsSync(evalaiDir) && !fs.existsSync(evalgateDir);
|
|
52
|
+
const evalDir = useLegacy ? evalaiDir : evalgateDir;
|
|
53
|
+
if (useLegacy && !process.__EVALGATE_LEGACY_EVALAI_WARNED) {
|
|
54
|
+
console.warn("[EvalGate] Deprecation: .evalai/ is deprecated. Migrate to .evalgate/ (e.g. mv .evalai .evalgate).");
|
|
55
|
+
process.__EVALGATE_LEGACY_EVALAI_WARNED = true;
|
|
56
|
+
}
|
|
57
|
+
const runsDir = path.join(evalDir, "runs");
|
|
58
|
+
return {
|
|
59
|
+
root: projectRoot,
|
|
60
|
+
evalDir,
|
|
61
|
+
evalgateDir: evalDir,
|
|
62
|
+
runsDir,
|
|
63
|
+
manifestPath: path.join(evalDir, "manifest.json"),
|
|
64
|
+
lastRunPath: path.join(evalDir, "last-run.json"),
|
|
65
|
+
indexPath: path.join(runsDir, "index.json"),
|
|
66
|
+
baselinePath: path.join(evalDir, "baseline-run.json"),
|
|
67
|
+
};
|
|
68
|
+
}
|