@pauly4010/evalai-sdk 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/init.js CHANGED
@@ -1,9 +1,18 @@
1
1
  #!/usr/bin/env node
2
2
  "use strict";
3
3
  /**
4
- * evalai init — Create evalai.config.json
4
+ * evalai init — Full project scaffolder
5
5
  *
6
- * Creates the smallest possible config file. Defaults belong in code.
6
+ * Zero-to-gate in under 5 minutes:
7
+ * npx evalai init
8
+ * git push
9
+ * …CI starts blocking regressions.
10
+ *
11
+ * What it does:
12
+ * 1. Detects Node repo + package manager
13
+ * 2. Creates evals/ directory + baseline.json
14
+ * 3. Installs .github/workflows/evalai-gate.yml
15
+ * 4. Prints next steps (no docs required)
7
16
  */
8
17
  var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
9
18
  if (k2 === undefined) k2 = k;
@@ -40,30 +49,232 @@ var __importStar = (this && this.__importStar) || (function () {
40
49
  })();
41
50
  Object.defineProperty(exports, "__esModule", { value: true });
42
51
  exports.runInit = runInit;
52
+ const node_child_process_1 = require("node:child_process");
43
53
  const fs = __importStar(require("node:fs"));
44
54
  const path = __importStar(require("node:path"));
45
- const CONFIG_CONTENT = `{
46
- "evaluationId": ""
55
+ function detectProject(cwd) {
56
+ const pkgPath = path.join(cwd, "package.json");
57
+ if (!fs.existsSync(pkgPath))
58
+ return null;
59
+ let pkg;
60
+ try {
61
+ pkg = JSON.parse(fs.readFileSync(pkgPath, "utf-8"));
62
+ }
63
+ catch {
64
+ return null;
65
+ }
66
+ let pm = "npm";
67
+ if (fs.existsSync(path.join(cwd, "pnpm-lock.yaml")))
68
+ pm = "pnpm";
69
+ else if (fs.existsSync(path.join(cwd, "yarn.lock")))
70
+ pm = "yarn";
71
+ const testScript = pkg.scripts?.test ?? "";
72
+ const hasTestScript = !!testScript && testScript !== 'echo "Error: no test specified" && exit 1';
73
+ return {
74
+ cwd,
75
+ pm,
76
+ hasTestScript,
77
+ testScript,
78
+ name: pkg.name ?? path.basename(cwd),
79
+ };
80
+ }
81
+ // ── Step helpers ──
82
+ function ok(msg) {
83
+ console.log(` ✔ ${msg}`);
84
+ }
85
+ function skip(msg) {
86
+ console.log(` – ${msg}`);
87
+ }
88
+ // ── 1. Create evals/ + baseline.json ──
89
+ function createBaseline(cwd, project) {
90
+ const evalsDir = path.join(cwd, "evals");
91
+ const baselinePath = path.join(evalsDir, "baseline.json");
92
+ if (fs.existsSync(baselinePath)) {
93
+ skip("evals/baseline.json already exists");
94
+ return true;
95
+ }
96
+ if (!fs.existsSync(evalsDir)) {
97
+ fs.mkdirSync(evalsDir, { recursive: true });
98
+ }
99
+ const user = process.env.USER || process.env.USERNAME || "unknown";
100
+ const now = new Date().toISOString();
101
+ // Run tests to capture real count if possible
102
+ let testTotal = 0;
103
+ let testsPassed = true;
104
+ if (project.hasTestScript) {
105
+ const isWin = process.platform === "win32";
106
+ const result = (0, node_child_process_1.spawnSync)(project.pm, ["test"], {
107
+ cwd,
108
+ stdio: "pipe",
109
+ shell: isWin,
110
+ timeout: 120000,
111
+ });
112
+ testsPassed = result.status === 0;
113
+ // Try to extract test count from output
114
+ const output = (result.stdout?.toString() ?? "") + (result.stderr?.toString() ?? "");
115
+ const countMatch = output.match(/(\d+)\s+(?:tests?|specs?)\s+(?:passed|completed)/i) ??
116
+ output.match(/Tests:\s+(\d+)\s+passed/i) ??
117
+ output.match(/(\d+)\s+passing/i);
118
+ if (countMatch)
119
+ testTotal = parseInt(countMatch[1], 10);
120
+ }
121
+ const baseline = {
122
+ schemaVersion: 1,
123
+ description: `Regression gate baseline for ${project.name}`,
124
+ generatedAt: now,
125
+ generatedBy: user,
126
+ commitSha: getHeadSha(cwd),
127
+ updatedAt: now,
128
+ updatedBy: user,
129
+ tolerance: {
130
+ scoreDrop: 5,
131
+ passRateDrop: 5,
132
+ maxLatencyIncreaseMs: 200,
133
+ maxCostIncreaseUsd: 0.05,
134
+ },
135
+ goldenEval: {
136
+ score: 100,
137
+ passRate: 100,
138
+ totalCases: 3,
139
+ passedCases: 3,
140
+ },
141
+ confidenceTests: {
142
+ passed: testsPassed,
143
+ total: testTotal,
144
+ },
145
+ productMetrics: {},
146
+ };
147
+ fs.writeFileSync(baselinePath, `${JSON.stringify(baseline, null, 2)}\n`);
148
+ ok("Created evals/baseline.json");
149
+ return true;
150
+ }
151
+ function getHeadSha(cwd) {
152
+ try {
153
+ const result = (0, node_child_process_1.spawnSync)("git", ["rev-parse", "--short", "HEAD"], {
154
+ cwd,
155
+ stdio: "pipe",
156
+ });
157
+ return result.stdout?.toString().trim() || "0000000";
158
+ }
159
+ catch {
160
+ return "0000000";
161
+ }
47
162
  }
163
+ // ── 2. Install GitHub Actions workflow ──
164
+ function installWorkflow(cwd, project) {
165
+ const workflowDir = path.join(cwd, ".github", "workflows");
166
+ const workflowPath = path.join(workflowDir, "evalai-gate.yml");
167
+ if (fs.existsSync(workflowPath)) {
168
+ skip(".github/workflows/evalai-gate.yml already exists");
169
+ return true;
170
+ }
171
+ if (!fs.existsSync(workflowDir)) {
172
+ fs.mkdirSync(workflowDir, { recursive: true });
173
+ }
174
+ const installCmd = project.pm === "pnpm"
175
+ ? "pnpm install --frozen-lockfile"
176
+ : project.pm === "yarn"
177
+ ? "yarn install --frozen-lockfile"
178
+ : "npm ci";
179
+ const setupSteps = project.pm === "pnpm"
180
+ ? ` - uses: pnpm/action-setup@v4
181
+ - uses: actions/setup-node@v4
182
+ with:
183
+ node-version: '20'
184
+ cache: pnpm
185
+ - run: ${installCmd}`
186
+ : ` - uses: actions/setup-node@v4
187
+ with:
188
+ node-version: '20'
189
+ cache: ${project.pm}
190
+ - run: ${installCmd}`;
191
+ const workflow = `# EvalAI Regression Gate
192
+ # Auto-generated by: npx evalai init
193
+ # Blocks PRs that regress test health.
194
+ name: EvalAI Gate
195
+
196
+ on:
197
+ pull_request:
198
+ branches: [main]
199
+
200
+ concurrency:
201
+ group: evalai-\${{ github.ref }}
202
+ cancel-in-progress: true
203
+
204
+ jobs:
205
+ regression-gate:
206
+ runs-on: ubuntu-latest
207
+ steps:
208
+ - uses: actions/checkout@v4
209
+ ${setupSteps}
210
+ - name: EvalAI Regression Gate
211
+ run: npx -y @pauly4010/evalai-sdk@^1 gate --format github
212
+
213
+ - name: Upload report
214
+ if: always()
215
+ uses: actions/upload-artifact@v4
216
+ with:
217
+ name: regression-report
218
+ path: evals/regression-report.json
219
+ if-no-files-found: ignore
48
220
  `;
49
- function runInit(cwd = process.cwd()) {
221
+ fs.writeFileSync(workflowPath, workflow);
222
+ ok("Created .github/workflows/evalai-gate.yml");
223
+ return true;
224
+ }
225
+ // ── 3. Create evalai.config.json ──
226
+ function createConfig(cwd) {
50
227
  const configPath = path.join(cwd, "evalai.config.json");
51
228
  if (fs.existsSync(configPath)) {
52
- console.log(`evalai.config.json already exists at ${path.resolve(configPath)}`);
229
+ skip("evalai.config.json already exists");
230
+ return true;
231
+ }
232
+ const config = {
233
+ evaluationId: "",
234
+ gate: {
235
+ baseline: "evals/baseline.json",
236
+ report: "evals/regression-report.json",
237
+ },
238
+ };
239
+ fs.writeFileSync(configPath, `${JSON.stringify(config, null, 2)}\n`);
240
+ ok("Created evalai.config.json");
241
+ return true;
242
+ }
243
+ // ── Main ──
244
+ function runInit(cwd = process.cwd()) {
245
+ console.log("");
246
+ console.log(" evalai init — setting up regression gate\n");
247
+ // Detect
248
+ const project = detectProject(cwd);
249
+ if (!project) {
250
+ console.error(" ✖ No package.json found. Run this from a Node.js project root.");
53
251
  return false;
54
252
  }
55
- fs.writeFileSync(configPath, CONFIG_CONTENT, "utf-8");
56
- const resolvedPath = path.resolve(configPath);
57
- console.log(`Wrote evalai.config.json at ${resolvedPath}`);
253
+ ok(`Detected ${project.pm} project: ${project.name}`);
254
+ if (!project.hasTestScript) {
255
+ console.log(` No test script found in package.json`);
256
+ console.log(` The gate will still work — add a "test" script later for full coverage.\n`);
257
+ }
258
+ // Scaffold
259
+ createBaseline(cwd, project);
260
+ installWorkflow(cwd, project);
261
+ createConfig(cwd);
262
+ // Next steps
263
+ console.log("");
264
+ console.log(" Done! Next:");
265
+ console.log("");
266
+ console.log(" git add evals/ .github/workflows/evalai-gate.yml evalai.config.json");
267
+ console.log(" git commit -m 'chore: add EvalAI regression gate'");
268
+ console.log(" git push");
269
+ console.log("");
270
+ console.log(" That's it. Open a PR and the gate runs automatically.");
58
271
  console.log("");
59
- console.log("Next: paste evaluationId into evalai.config.json, then run npx -y @pauly4010/evalai-sdk@^1 check --format github --onFail import");
272
+ console.log(" Commands:");
273
+ console.log(" npx evalai gate Run gate locally");
274
+ console.log(" npx evalai gate --format json Machine-readable output");
275
+ console.log(" npx evalai baseline update Update baseline after intentional changes");
60
276
  console.log("");
61
- console.log("GitHub Actions snippet (add to your workflow):");
62
- console.log(" - name: EvalAI gate");
63
- console.log(" env:");
64
- console.log(" EVALAI_API_KEY: ${{ secrets.EVALAI_API_KEY }}");
65
- console.log(" run: npx -y @pauly4010/evalai-sdk@^1 check --format github --onFail import");
277
+ console.log(" To remove: delete evals/, evalai.config.json, and .github/workflows/evalai-gate.yml");
66
278
  console.log("");
67
- console.log("To uninstall: delete evalai.config.json.");
68
279
  return true;
69
280
  }
@@ -1,8 +1,12 @@
1
1
  /**
2
2
  * evalai gate — Run the regression gate
3
3
  *
4
- * Delegates to the project's eval:regression-gate npm script.
5
- * Supports --format json to output the regression-report.json contents.
4
+ * Two modes:
5
+ * 1. Project mode: delegates to eval:regression-gate npm script (full gate)
6
+ * 2. Built-in mode: runs `npm test`, compares against evals/baseline.json
7
+ *
8
+ * Built-in mode activates when no eval:regression-gate script is defined,
9
+ * making `npx evalai gate` work for any project after `npx evalai init`.
6
10
  */
7
11
  export interface GateArgs {
8
12
  format: "human" | "json" | "github";
@@ -2,8 +2,12 @@
2
2
  /**
3
3
  * evalai gate — Run the regression gate
4
4
  *
5
- * Delegates to the project's eval:regression-gate npm script.
6
- * Supports --format json to output the regression-report.json contents.
5
+ * Two modes:
6
+ * 1. Project mode: delegates to eval:regression-gate npm script (full gate)
7
+ * 2. Built-in mode: runs `npm test`, compares against evals/baseline.json
8
+ *
9
+ * Built-in mode activates when no eval:regression-gate script is defined,
10
+ * making `npx evalai gate` work for any project after `npx evalai init`.
7
11
  */
8
12
  var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
9
13
  if (k2 === undefined) k2 = k;
@@ -45,6 +49,7 @@ const node_child_process_1 = require("node:child_process");
45
49
  const fs = __importStar(require("node:fs"));
46
50
  const path = __importStar(require("node:path"));
47
51
  const REPORT_REL = "evals/regression-report.json";
52
+ const BASELINE_REL = "evals/baseline.json";
48
53
  /** Detect the package manager used in the project */
49
54
  function detectPackageManager(cwd) {
50
55
  if (fs.existsSync(path.join(cwd, "pnpm-lock.yaml")))
@@ -66,10 +71,206 @@ function parseGateArgs(argv) {
66
71
  }
67
72
  return args;
68
73
  }
74
+ function detectRunner(cwd) {
75
+ const pkgPath = path.join(cwd, "package.json");
76
+ try {
77
+ const pkg = JSON.parse(fs.readFileSync(pkgPath, "utf-8"));
78
+ const testCmd = pkg.scripts?.test ?? "";
79
+ if (testCmd.includes("vitest"))
80
+ return "vitest";
81
+ if (testCmd.includes("jest"))
82
+ return "jest";
83
+ if (testCmd.includes("mocha"))
84
+ return "mocha";
85
+ if (testCmd.includes("node --test"))
86
+ return "node:test";
87
+ if (testCmd.includes("ava"))
88
+ return "ava";
89
+ if (testCmd.includes("tap"))
90
+ return "tap";
91
+ }
92
+ catch {
93
+ // ignore
94
+ }
95
+ return "unknown";
96
+ }
97
+ function runBuiltinGate(cwd) {
98
+ const t0 = Date.now();
99
+ const baselinePath = path.join(cwd, BASELINE_REL);
100
+ const now = new Date().toISOString();
101
+ const pm = detectPackageManager(cwd);
102
+ const command = `${pm} test`;
103
+ const runner = detectRunner(cwd);
104
+ // Load baseline
105
+ if (!fs.existsSync(baselinePath)) {
106
+ return {
107
+ schemaVersion: 1,
108
+ timestamp: now,
109
+ exitCode: 2,
110
+ category: "infra_error",
111
+ passed: false,
112
+ failures: ["Baseline file not found. Run: npx evalai init"],
113
+ deltas: [],
114
+ baseline: null,
115
+ durationMs: Date.now() - t0,
116
+ command,
117
+ runner,
118
+ };
119
+ }
120
+ let baselineData;
121
+ try {
122
+ baselineData = JSON.parse(fs.readFileSync(baselinePath, "utf-8"));
123
+ }
124
+ catch {
125
+ return {
126
+ schemaVersion: 1,
127
+ timestamp: now,
128
+ exitCode: 2,
129
+ category: "infra_error",
130
+ passed: false,
131
+ failures: ["Failed to parse evals/baseline.json"],
132
+ deltas: [],
133
+ baseline: null,
134
+ durationMs: Date.now() - t0,
135
+ command,
136
+ runner,
137
+ };
138
+ }
139
+ const baselineMeta = baselineData.updatedAt
140
+ ? { updatedAt: baselineData.updatedAt, updatedBy: baselineData.updatedBy ?? "unknown" }
141
+ : null;
142
+ // Run tests
143
+ const isWin = process.platform === "win32";
144
+ const result = (0, node_child_process_1.spawnSync)(pm, ["test"], {
145
+ cwd,
146
+ stdio: "pipe",
147
+ shell: isWin,
148
+ timeout: 300000,
149
+ });
150
+ const testsPassed = result.status === 0;
151
+ const output = (result.stdout?.toString() ?? "") + (result.stderr?.toString() ?? "");
152
+ // Try to extract test count
153
+ let testCount = 0;
154
+ const countMatch = output.match(/(\d+)\s+(?:tests?|specs?)\s+(?:passed|completed)/i) ??
155
+ output.match(/Tests:\s+(\d+)\s+passed/i) ??
156
+ output.match(/(\d+)\s+passing/i) ??
157
+ output.match(/Test Files\s+\d+\s+passed.*\n\s+Tests\s+(\d+)\s+passed/i);
158
+ if (countMatch)
159
+ testCount = parseInt(countMatch[1], 10);
160
+ // Compare against baseline
161
+ const baselinePassed = baselineData.confidenceTests?.passed ?? true;
162
+ const baselineTotal = baselineData.confidenceTests?.total ?? 0;
163
+ const failures = [];
164
+ const deltas = [];
165
+ // Delta: tests passing
166
+ deltas.push({
167
+ metric: "tests_passing",
168
+ baseline: baselinePassed,
169
+ current: testsPassed,
170
+ delta: testsPassed === baselinePassed ? "0" : testsPassed ? "+1" : "-1",
171
+ status: testsPassed ? "pass" : "fail",
172
+ });
173
+ if (!testsPassed && baselinePassed) {
174
+ failures.push("Tests were passing in baseline but are now failing");
175
+ }
176
+ // Delta: test count (only if we captured counts)
177
+ if (testCount > 0 || baselineTotal > 0) {
178
+ const countDelta = testCount - baselineTotal;
179
+ deltas.push({
180
+ metric: "test_count",
181
+ baseline: baselineTotal,
182
+ current: testCount,
183
+ delta: countDelta >= 0 ? `+${countDelta}` : `${countDelta}`,
184
+ status: testCount >= baselineTotal ? "pass" : "fail",
185
+ });
186
+ if (testCount < baselineTotal) {
187
+ failures.push(`Test count dropped from ${baselineTotal} to ${testCount} (${countDelta})`);
188
+ }
189
+ }
190
+ const hasRegression = failures.length > 0;
191
+ return {
192
+ schemaVersion: 1,
193
+ timestamp: now,
194
+ exitCode: hasRegression ? 1 : 0,
195
+ category: hasRegression ? "regression" : "pass",
196
+ passed: !hasRegression,
197
+ failures,
198
+ deltas,
199
+ baseline: baselineMeta,
200
+ durationMs: Date.now() - t0,
201
+ command,
202
+ runner,
203
+ };
204
+ }
205
+ // ── Format helpers ──
206
+ function formatHuman(report) {
207
+ const icon = report.passed ? "✅" : "❌";
208
+ console.log(`\n${icon} EvalAI Gate: ${report.category.toUpperCase()}\n`);
209
+ if (report.deltas.length > 0) {
210
+ const pad = (s, n) => s.padEnd(n);
211
+ console.log(` ${pad("Metric", 16)} ${pad("Baseline", 10)} ${pad("Current", 10)} ${pad("Delta", 8)} Status`);
212
+ console.log(` ${"-".repeat(16)} ${"-".repeat(10)} ${"-".repeat(10)} ${"-".repeat(8)} ------`);
213
+ for (const d of report.deltas) {
214
+ const si = d.status === "pass" ? "✔" : "✖";
215
+ console.log(` ${pad(d.metric, 16)} ${pad(String(d.baseline), 10)} ${pad(String(d.current), 10)} ${pad(d.delta, 8)} ${si}`);
216
+ }
217
+ }
218
+ if (report.failures.length > 0) {
219
+ console.log("\n Failures:");
220
+ for (const f of report.failures) {
221
+ console.log(` • ${f}`);
222
+ }
223
+ }
224
+ console.log("");
225
+ }
226
+ function formatGithub(report) {
227
+ const icon = report.passed ? "✅" : "❌";
228
+ const lines = [
229
+ `## ${icon} EvalAI Gate: ${report.category}`,
230
+ "",
231
+ "| Metric | Baseline | Current | Delta | Status |",
232
+ "|--------|----------|---------|-------|--------|",
233
+ ];
234
+ for (const d of report.deltas) {
235
+ const si = d.status === "pass" ? "✅" : "❌";
236
+ lines.push(`| ${d.metric} | ${d.baseline} | ${d.current} | ${d.delta} | ${si} |`);
237
+ }
238
+ if (report.failures.length > 0) {
239
+ lines.push("", "### Failures", "");
240
+ for (const f of report.failures) {
241
+ lines.push(`- ${f}`);
242
+ }
243
+ }
244
+ lines.push("", `Schema version: ${report.schemaVersion}`);
245
+ const md = lines.join("\n");
246
+ // Write to $GITHUB_STEP_SUMMARY if available
247
+ const summaryPath = process.env.GITHUB_STEP_SUMMARY;
248
+ if (summaryPath) {
249
+ try {
250
+ fs.appendFileSync(summaryPath, `${md}\n`);
251
+ }
252
+ catch {
253
+ // ignore if not writable
254
+ }
255
+ }
256
+ console.log(md);
257
+ }
258
+ function formatReport(report, args) {
259
+ if (args.format === "json") {
260
+ process.stdout.write(JSON.stringify(report, null, 2));
261
+ }
262
+ else if (args.format === "github") {
263
+ formatGithub(report);
264
+ }
265
+ else {
266
+ formatHuman(report);
267
+ }
268
+ }
269
+ // ── Main ──
69
270
  function runGate(argv) {
70
271
  const cwd = process.cwd();
71
272
  const args = parseGateArgs(argv);
72
- // Check if eval:regression-gate script exists
273
+ // Check for package.json
73
274
  const pkgPath = path.join(cwd, "package.json");
74
275
  if (!fs.existsSync(pkgPath)) {
75
276
  console.error("❌ No package.json found. Run this from your project root.");
@@ -83,68 +284,52 @@ function runGate(argv) {
83
284
  console.error("❌ Failed to parse package.json");
84
285
  return 1;
85
286
  }
86
- if (!pkg.scripts?.["eval:regression-gate"]) {
87
- console.error("❌ Missing 'eval:regression-gate' script in package.json.");
88
- console.error(' Add it: "eval:regression-gate": "npx tsx scripts/regression-gate.ts"');
89
- return 1;
90
- }
91
- const pm = detectPackageManager(cwd);
92
- const isWin = process.platform === "win32";
93
- // For json format, suppress human output and print report JSON
94
- const stdio = args.format === "json" ? "pipe" : "inherit";
95
- const result = (0, node_child_process_1.spawnSync)(pm, ["run", "eval:regression-gate"], {
96
- cwd,
97
- stdio: stdio,
98
- shell: isWin,
99
- });
100
- const exitCode = result.status ?? 1;
101
- if (args.format === "json") {
102
- // Output the regression report as JSON
103
- const reportPath = path.join(cwd, REPORT_REL);
104
- if (fs.existsSync(reportPath)) {
105
- const report = fs.readFileSync(reportPath, "utf-8");
106
- process.stdout.write(report);
107
- }
108
- else {
109
- console.error(JSON.stringify({ error: "regression-report.json not found", exitCode }));
287
+ // ── Project mode: delegate to eval:regression-gate if it exists ──
288
+ if (pkg.scripts?.["eval:regression-gate"]) {
289
+ const pm = detectPackageManager(cwd);
290
+ const isWin = process.platform === "win32";
291
+ const stdio = args.format === "json" ? "pipe" : "inherit";
292
+ const result = (0, node_child_process_1.spawnSync)(pm, ["run", "eval:regression-gate"], {
293
+ cwd,
294
+ stdio: stdio,
295
+ shell: isWin,
296
+ });
297
+ const exitCode = result.status ?? 1;
298
+ // Post-process report for json/github formats
299
+ if (args.format === "json") {
300
+ const reportPath = path.join(cwd, REPORT_REL);
301
+ if (fs.existsSync(reportPath)) {
302
+ process.stdout.write(fs.readFileSync(reportPath, "utf-8"));
303
+ }
304
+ else {
305
+ console.error(JSON.stringify({ error: "regression-report.json not found", exitCode }));
306
+ }
110
307
  }
111
- }
112
- else if (args.format === "github") {
113
- // Output GitHub Step Summary markdown
114
- const reportPath = path.join(cwd, REPORT_REL);
115
- if (fs.existsSync(reportPath)) {
116
- try {
117
- const report = JSON.parse(fs.readFileSync(reportPath, "utf-8"));
118
- const icon = report.passed ? "✅" : "❌";
119
- const lines = [
120
- `## ${icon} Regression Gate: ${report.category}`,
121
- "",
122
- "| Metric | Baseline | Current | Delta | Status |",
123
- "|--------|----------|---------|-------|--------|",
124
- ];
125
- for (const d of report.deltas ?? []) {
126
- const statusIcon = d.status === "pass" ? "✅" : "❌";
127
- lines.push(`| ${d.metric} | ${d.baseline} | ${d.current} | ${d.delta} | ${statusIcon} |`);
128
- }
129
- if (report.failures?.length > 0) {
130
- lines.push("", "### Failures", "");
131
- for (const f of report.failures) {
132
- lines.push(`- ${f}`);
133
- }
308
+ else if (args.format === "github") {
309
+ const reportPath = path.join(cwd, REPORT_REL);
310
+ if (fs.existsSync(reportPath)) {
311
+ try {
312
+ const report = JSON.parse(fs.readFileSync(reportPath, "utf-8"));
313
+ formatGithub(report);
134
314
  }
135
- lines.push("", `Schema version: ${report.schemaVersion ?? "unknown"}`);
136
- const md = lines.join("\n");
137
- // Write to $GITHUB_STEP_SUMMARY if available
138
- const summaryPath = process.env.GITHUB_STEP_SUMMARY;
139
- if (summaryPath) {
140
- fs.appendFileSync(summaryPath, `${md}\n`);
315
+ catch {
316
+ // human output already printed
141
317
  }
142
- console.log(md);
143
- }
144
- catch {
145
- // Fall through — human output already printed
146
318
  }
147
319
  }
320
+ return exitCode;
321
+ }
322
+ // ── Built-in mode: run tests + compare against baseline ──
323
+ if (args.format === "human") {
324
+ console.log("\n Running EvalAI regression gate (built-in mode)...\n");
325
+ }
326
+ const report = runBuiltinGate(cwd);
327
+ // Write report artifact
328
+ const evalsDir = path.join(cwd, "evals");
329
+ if (!fs.existsSync(evalsDir)) {
330
+ fs.mkdirSync(evalsDir, { recursive: true });
148
331
  }
149
- return exitCode;
332
+ fs.writeFileSync(path.join(cwd, REPORT_REL), `${JSON.stringify(report, null, 2)}\n`);
333
+ formatReport(report, args);
334
+ return report.exitCode;
150
335
  }
@@ -0,0 +1,15 @@
1
+ /**
2
+ * evalai upgrade --full — Upgrade from Tier 1 (built-in gate) to Tier 2 (full gate)
3
+ *
4
+ * What it does:
5
+ * 1. Adds full regression gate script (scripts/regression-gate.ts)
6
+ * 2. Adds baseline governance workflow (.github/workflows/baseline-governance.yml)
7
+ * 3. Updates package.json with eval:regression-gate + eval:baseline-update scripts
8
+ * 4. Updates .github/workflows/evalai-gate.yml to use project mode
9
+ * 5. Prints next steps
10
+ */
11
+ export interface UpgradeArgs {
12
+ full: boolean;
13
+ }
14
+ export declare function parseUpgradeArgs(argv: string[]): UpgradeArgs;
15
+ export declare function runUpgrade(argv: string[]): number;