@evalgate/sdk 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +638 -0
- package/README.md +398 -0
- package/dist/assertions.d.ts +189 -0
- package/dist/assertions.js +662 -0
- package/dist/batch.d.ts +68 -0
- package/dist/batch.js +179 -0
- package/dist/cache.d.ts +65 -0
- package/dist/cache.js +131 -0
- package/dist/cli/api.d.ts +108 -0
- package/dist/cli/api.js +132 -0
- package/dist/cli/baseline.d.ts +10 -0
- package/dist/cli/baseline.js +172 -0
- package/dist/cli/check.d.ts +73 -0
- package/dist/cli/check.js +355 -0
- package/dist/cli/ci-context.d.ts +6 -0
- package/dist/cli/ci-context.js +112 -0
- package/dist/cli/ci.d.ts +45 -0
- package/dist/cli/ci.js +192 -0
- package/dist/cli/config.d.ts +30 -0
- package/dist/cli/config.js +230 -0
- package/dist/cli/constants.d.ts +15 -0
- package/dist/cli/constants.js +18 -0
- package/dist/cli/diff.d.ts +173 -0
- package/dist/cli/diff.js +685 -0
- package/dist/cli/discover.d.ts +84 -0
- package/dist/cli/discover.js +419 -0
- package/dist/cli/doctor.d.ts +88 -0
- package/dist/cli/doctor.js +675 -0
- package/dist/cli/env.d.ts +21 -0
- package/dist/cli/env.js +42 -0
- package/dist/cli/explain.d.ts +58 -0
- package/dist/cli/explain.js +561 -0
- package/dist/cli/formatters/github.d.ts +8 -0
- package/dist/cli/formatters/github.js +135 -0
- package/dist/cli/formatters/human.d.ts +6 -0
- package/dist/cli/formatters/human.js +110 -0
- package/dist/cli/formatters/json.d.ts +6 -0
- package/dist/cli/formatters/json.js +10 -0
- package/dist/cli/formatters/pr-comment.d.ts +12 -0
- package/dist/cli/formatters/pr-comment.js +103 -0
- package/dist/cli/formatters/types.d.ts +103 -0
- package/dist/cli/formatters/types.js +8 -0
- package/dist/cli/gate.d.ts +21 -0
- package/dist/cli/gate.js +179 -0
- package/dist/cli/impact-analysis.d.ts +63 -0
- package/dist/cli/impact-analysis.js +252 -0
- package/dist/cli/index.d.ts +9 -0
- package/dist/cli/index.js +332 -0
- package/dist/cli/init.d.ts +16 -0
- package/dist/cli/init.js +292 -0
- package/dist/cli/manifest.d.ts +103 -0
- package/dist/cli/manifest.js +282 -0
- package/dist/cli/migrate.d.ts +41 -0
- package/dist/cli/migrate.js +349 -0
- package/dist/cli/policy-packs.d.ts +23 -0
- package/dist/cli/policy-packs.js +89 -0
- package/dist/cli/print-config.d.ts +29 -0
- package/dist/cli/print-config.js +270 -0
- package/dist/cli/profiles.d.ts +28 -0
- package/dist/cli/profiles.js +30 -0
- package/dist/cli/reason-codes.d.ts +17 -0
- package/dist/cli/reason-codes.js +19 -0
- package/dist/cli/regression-gate.d.ts +15 -0
- package/dist/cli/regression-gate.js +341 -0
- package/dist/cli/render/snippet.d.ts +5 -0
- package/dist/cli/render/snippet.js +15 -0
- package/dist/cli/render/sort.d.ts +10 -0
- package/dist/cli/render/sort.js +24 -0
- package/dist/cli/report/build-check-report.d.ts +19 -0
- package/dist/cli/report/build-check-report.js +132 -0
- package/dist/cli/run.d.ts +101 -0
- package/dist/cli/run.js +395 -0
- package/dist/cli/share.d.ts +17 -0
- package/dist/cli/share.js +91 -0
- package/dist/cli/upgrade.d.ts +15 -0
- package/dist/cli/upgrade.js +492 -0
- package/dist/cli/workspace.d.ts +31 -0
- package/dist/cli/workspace.js +68 -0
- package/dist/client.d.ts +368 -0
- package/dist/client.js +893 -0
- package/dist/client.request.test.d.ts +1 -0
- package/dist/client.request.test.js +232 -0
- package/dist/context.d.ts +134 -0
- package/dist/context.js +215 -0
- package/dist/errors.d.ts +82 -0
- package/dist/errors.js +298 -0
- package/dist/export.d.ts +195 -0
- package/dist/export.js +344 -0
- package/dist/index.d.ts +44 -0
- package/dist/index.js +153 -0
- package/dist/integrations/anthropic.d.ts +91 -0
- package/dist/integrations/anthropic.js +163 -0
- package/dist/integrations/openai-eval.d.ts +57 -0
- package/dist/integrations/openai-eval.js +232 -0
- package/dist/integrations/openai.d.ts +92 -0
- package/dist/integrations/openai.js +160 -0
- package/dist/local.d.ts +39 -0
- package/dist/local.js +148 -0
- package/dist/logger.d.ts +128 -0
- package/dist/logger.js +227 -0
- package/dist/matchers/index.d.ts +1 -0
- package/dist/matchers/index.js +6 -0
- package/dist/matchers/to-pass-gate.d.ts +29 -0
- package/dist/matchers/to-pass-gate.js +35 -0
- package/dist/pagination.d.ts +74 -0
- package/dist/pagination.js +139 -0
- package/dist/regression.d.ts +100 -0
- package/dist/regression.js +44 -0
- package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
- package/dist/runtime/adapters/config-to-dsl.js +400 -0
- package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
- package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
- package/dist/runtime/context.d.ts +26 -0
- package/dist/runtime/context.js +74 -0
- package/dist/runtime/eval.d.ts +46 -0
- package/dist/runtime/eval.js +244 -0
- package/dist/runtime/execution-mode.d.ts +80 -0
- package/dist/runtime/execution-mode.js +357 -0
- package/dist/runtime/executor.d.ts +16 -0
- package/dist/runtime/executor.js +152 -0
- package/dist/runtime/registry.d.ts +78 -0
- package/dist/runtime/registry.js +403 -0
- package/dist/runtime/run-report.d.ts +200 -0
- package/dist/runtime/run-report.js +222 -0
- package/dist/runtime/types.d.ts +356 -0
- package/dist/runtime/types.js +76 -0
- package/dist/snapshot.d.ts +176 -0
- package/dist/snapshot.js +322 -0
- package/dist/streaming.d.ts +173 -0
- package/dist/streaming.js +268 -0
- package/dist/testing.d.ts +273 -0
- package/dist/testing.js +317 -0
- package/dist/types.d.ts +754 -0
- package/dist/types.js +54 -0
- package/dist/utils/input-hash.d.ts +8 -0
- package/dist/utils/input-hash.js +41 -0
- package/dist/version.d.ts +7 -0
- package/dist/version.js +10 -0
- package/dist/workflows.d.ts +389 -0
- package/dist/workflows.js +671 -0
- package/package.json +117 -0
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
"use strict";
|
|
3
|
+
/**
|
|
4
|
+
* evalgate — EvalGate CLI
|
|
5
|
+
*
|
|
6
|
+
* Commands:
|
|
7
|
+
* evalgate init — Create evalgate.config.json
|
|
8
|
+
* evalgate check — CI/CD evaluation gate (see evalgate check --help)
|
|
9
|
+
*/
|
|
10
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
11
|
+
const baseline_1 = require("./baseline");
|
|
12
|
+
const check_1 = require("./check");
|
|
13
|
+
const ci_1 = require("./ci");
|
|
14
|
+
const diff_1 = require("./diff");
|
|
15
|
+
const discover_1 = require("./discover");
|
|
16
|
+
const doctor_1 = require("./doctor");
|
|
17
|
+
const explain_1 = require("./explain");
|
|
18
|
+
const impact_analysis_1 = require("./impact-analysis");
|
|
19
|
+
const init_1 = require("./init");
|
|
20
|
+
const migrate_1 = require("./migrate");
|
|
21
|
+
const print_config_1 = require("./print-config");
|
|
22
|
+
const regression_gate_1 = require("./regression-gate");
|
|
23
|
+
const run_1 = require("./run");
|
|
24
|
+
const share_1 = require("./share");
|
|
25
|
+
const upgrade_1 = require("./upgrade");
|
|
26
|
+
const argv = process.argv.slice(2);
|
|
27
|
+
const subcommand = argv[0];
|
|
28
|
+
if (subcommand === "init") {
|
|
29
|
+
const cwd = process.cwd();
|
|
30
|
+
const ok = (0, init_1.runInit)(cwd);
|
|
31
|
+
process.exit(ok ? 0 : 1);
|
|
32
|
+
}
|
|
33
|
+
else if (subcommand === "baseline") {
|
|
34
|
+
const code = (0, baseline_1.runBaseline)(argv.slice(1));
|
|
35
|
+
process.exit(code);
|
|
36
|
+
}
|
|
37
|
+
else if (subcommand === "gate") {
|
|
38
|
+
const code = (0, regression_gate_1.runGate)(argv.slice(1));
|
|
39
|
+
process.exit(code);
|
|
40
|
+
}
|
|
41
|
+
else if (subcommand === "migrate") {
|
|
42
|
+
// Handle migrate subcommand
|
|
43
|
+
const migrateSubcommand = argv[1];
|
|
44
|
+
if (migrateSubcommand === "config") {
|
|
45
|
+
// Parse migrate config arguments
|
|
46
|
+
let inputPath = "";
|
|
47
|
+
let outputPath = "";
|
|
48
|
+
let verbose = false;
|
|
49
|
+
let helpers = true;
|
|
50
|
+
let preserveIds = true;
|
|
51
|
+
let provenance = true;
|
|
52
|
+
for (let i = 2; i < argv.length; i++) {
|
|
53
|
+
const arg = argv[i];
|
|
54
|
+
if (arg === "--in" || arg === "-i") {
|
|
55
|
+
inputPath = argv[++i];
|
|
56
|
+
}
|
|
57
|
+
else if (arg === "--out" || arg === "-o") {
|
|
58
|
+
outputPath = argv[++i];
|
|
59
|
+
}
|
|
60
|
+
else if (arg === "--verbose" || arg === "-v") {
|
|
61
|
+
verbose = true;
|
|
62
|
+
}
|
|
63
|
+
else if (arg === "--no-helpers") {
|
|
64
|
+
helpers = false;
|
|
65
|
+
}
|
|
66
|
+
else if (arg === "--no-preserve-ids") {
|
|
67
|
+
preserveIds = false;
|
|
68
|
+
}
|
|
69
|
+
else if (arg === "--no-provenance") {
|
|
70
|
+
provenance = false;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
if (!inputPath || !outputPath) {
|
|
74
|
+
console.error("Error: Both --in and --out options are required");
|
|
75
|
+
console.error("Usage: evalgate migrate config --in <input> --out <output> [options]");
|
|
76
|
+
process.exit(1);
|
|
77
|
+
}
|
|
78
|
+
(0, migrate_1.migrateConfig)({
|
|
79
|
+
input: inputPath,
|
|
80
|
+
output: outputPath,
|
|
81
|
+
verbose,
|
|
82
|
+
helpers,
|
|
83
|
+
preserveIds,
|
|
84
|
+
provenance,
|
|
85
|
+
}).catch((err) => {
|
|
86
|
+
console.error(`Migration failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
87
|
+
process.exit(1);
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
console.error("Error: Unknown migrate subcommand. Use 'evalgate migrate config'");
|
|
92
|
+
process.exit(1);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
else if (subcommand === "upgrade") {
|
|
96
|
+
const code = (0, upgrade_1.runUpgrade)(argv.slice(1));
|
|
97
|
+
process.exit(code);
|
|
98
|
+
}
|
|
99
|
+
else if (subcommand === "doctor") {
|
|
100
|
+
(0, doctor_1.runDoctor)(argv.slice(1))
|
|
101
|
+
.then((code) => process.exit(code))
|
|
102
|
+
.catch((err) => {
|
|
103
|
+
console.error(`EvalGate ERROR: ${err instanceof Error ? err.message : String(err)}`);
|
|
104
|
+
process.exit(1);
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
else if (subcommand === "check") {
|
|
108
|
+
const parsed = (0, check_1.parseArgs)(argv.slice(1));
|
|
109
|
+
if (!parsed.ok) {
|
|
110
|
+
console.error(parsed.message);
|
|
111
|
+
process.exit(parsed.exitCode);
|
|
112
|
+
}
|
|
113
|
+
(0, check_1.runCheck)(parsed.args)
|
|
114
|
+
.then((code) => process.exit(code))
|
|
115
|
+
.catch((err) => {
|
|
116
|
+
console.error(`EvalGate ERROR: ${err instanceof Error ? err.message : String(err)}`);
|
|
117
|
+
process.exit(4);
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
else if (subcommand === "explain") {
|
|
121
|
+
(0, explain_1.runExplain)(argv.slice(1))
|
|
122
|
+
.then((code) => process.exit(code))
|
|
123
|
+
.catch((err) => {
|
|
124
|
+
console.error(`EvalGate ERROR: ${err instanceof Error ? err.message : String(err)}`);
|
|
125
|
+
process.exit(1);
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
else if (subcommand === "print-config") {
|
|
129
|
+
const code = (0, print_config_1.runPrintConfig)(argv.slice(1));
|
|
130
|
+
process.exit(code);
|
|
131
|
+
}
|
|
132
|
+
else if (subcommand === "share") {
|
|
133
|
+
const parsed = (0, share_1.parseShareArgs)(argv.slice(1));
|
|
134
|
+
if ("error" in parsed) {
|
|
135
|
+
console.error(parsed.error);
|
|
136
|
+
process.exit(1);
|
|
137
|
+
}
|
|
138
|
+
(0, share_1.runShare)(parsed)
|
|
139
|
+
.then((code) => process.exit(code))
|
|
140
|
+
.catch((err) => {
|
|
141
|
+
console.error(`EvalGate ERROR: ${err instanceof Error ? err.message : String(err)}`);
|
|
142
|
+
process.exit(1);
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
else if (subcommand === "discover") {
|
|
146
|
+
// Parse arguments for discover command
|
|
147
|
+
const args = argv.slice(1);
|
|
148
|
+
const manifestFlag = args.includes("--manifest");
|
|
149
|
+
(0, discover_1.discoverSpecs)({ manifest: manifestFlag })
|
|
150
|
+
.then(() => process.exit(0))
|
|
151
|
+
.catch((err) => {
|
|
152
|
+
console.error(`EvalGate ERROR: ${err instanceof Error ? err.message : String(err)}`);
|
|
153
|
+
process.exit(1);
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
else if (subcommand === "impact-analysis") {
|
|
157
|
+
// Parse arguments for impact-analysis command
|
|
158
|
+
const args = argv.slice(1);
|
|
159
|
+
const baseIndex = args.indexOf("--base");
|
|
160
|
+
const changedFilesIndex = args.indexOf("--changed-files");
|
|
161
|
+
const formatIndex = args.indexOf("--format");
|
|
162
|
+
const baseBranch = baseIndex !== -1 ? args[baseIndex + 1] : "main";
|
|
163
|
+
const changedFiles = changedFilesIndex !== -1
|
|
164
|
+
? args[changedFilesIndex + 1]?.split(",")
|
|
165
|
+
: undefined;
|
|
166
|
+
const format = formatIndex !== -1 ? args[formatIndex + 1] : "human";
|
|
167
|
+
(0, impact_analysis_1.runImpactAnalysisCLI)({ baseBranch, changedFiles, format })
|
|
168
|
+
.then(() => process.exit(0))
|
|
169
|
+
.catch((err) => {
|
|
170
|
+
console.error(`EvalGate ERROR: ${err instanceof Error ? err.message : String(err)}`);
|
|
171
|
+
process.exit(2);
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
else if (subcommand === "run") {
|
|
175
|
+
// Parse arguments for run command
|
|
176
|
+
const args = argv.slice(1);
|
|
177
|
+
const specIdsIndex = args.indexOf("--spec-ids");
|
|
178
|
+
const impactedOnlyIndex = args.indexOf("--impacted-only");
|
|
179
|
+
const baseIndex = args.indexOf("--base");
|
|
180
|
+
const formatIndex = args.indexOf("--format");
|
|
181
|
+
const writeResultsIndex = args.indexOf("--write-results");
|
|
182
|
+
const specIds = specIdsIndex !== -1 ? args[specIdsIndex + 1]?.split(",") : undefined;
|
|
183
|
+
const impactedOnly = impactedOnlyIndex !== -1;
|
|
184
|
+
const baseBranch = baseIndex !== -1 ? args[baseIndex + 1] : undefined;
|
|
185
|
+
const format = formatIndex !== -1 ? args[formatIndex + 1] : "human";
|
|
186
|
+
const writeResults = writeResultsIndex !== -1;
|
|
187
|
+
(0, run_1.runEvaluationsCLI)({
|
|
188
|
+
specIds,
|
|
189
|
+
impactedOnly: impactedOnly ? !!baseBranch : false,
|
|
190
|
+
baseBranch,
|
|
191
|
+
format,
|
|
192
|
+
writeResults,
|
|
193
|
+
})
|
|
194
|
+
.then(() => process.exit(0))
|
|
195
|
+
.catch((err) => {
|
|
196
|
+
console.error(`EvalGate ERROR: ${err instanceof Error ? err.message : String(err)}`);
|
|
197
|
+
process.exit(2);
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
else if (subcommand === "diff") {
|
|
201
|
+
// Parse arguments for diff command
|
|
202
|
+
const args = argv.slice(1);
|
|
203
|
+
const baseIndex = args.indexOf("--base");
|
|
204
|
+
const headIndex = args.indexOf("--head");
|
|
205
|
+
const formatIndex = args.indexOf("--format");
|
|
206
|
+
const base = baseIndex !== -1 ? args[baseIndex + 1] : undefined;
|
|
207
|
+
const head = headIndex !== -1 ? args[headIndex + 1] : undefined;
|
|
208
|
+
const format = formatIndex !== -1 ? args[formatIndex + 1] : "human";
|
|
209
|
+
(0, diff_1.runDiffCLI)({ base, head, format })
|
|
210
|
+
.then(() => process.exit(0))
|
|
211
|
+
.catch((err) => {
|
|
212
|
+
console.error(`EvalGate ERROR: ${err instanceof Error ? err.message : String(err)}`);
|
|
213
|
+
process.exit(2);
|
|
214
|
+
});
|
|
215
|
+
}
|
|
216
|
+
else if (subcommand === "ci") {
|
|
217
|
+
// Parse arguments for ci command
|
|
218
|
+
const args = argv.slice(1);
|
|
219
|
+
const baseIndex = args.indexOf("--base");
|
|
220
|
+
const impactedOnlyIndex = args.indexOf("--impacted-only");
|
|
221
|
+
const formatIndex = args.indexOf("--format");
|
|
222
|
+
const writeResultsIndex = args.indexOf("--write-results");
|
|
223
|
+
const base = baseIndex !== -1 ? args[baseIndex + 1] : undefined;
|
|
224
|
+
const impactedOnly = impactedOnlyIndex !== -1;
|
|
225
|
+
const format = formatIndex !== -1
|
|
226
|
+
? args[formatIndex + 1]
|
|
227
|
+
: "human";
|
|
228
|
+
const writeResults = writeResultsIndex !== -1;
|
|
229
|
+
(0, ci_1.runCICLI)({ base, impactedOnly, format, writeResults })
|
|
230
|
+
.then(() => process.exit(0))
|
|
231
|
+
.catch((err) => {
|
|
232
|
+
console.error(`EvalGate ERROR: ${err instanceof Error ? err.message : String(err)}`);
|
|
233
|
+
process.exit(2);
|
|
234
|
+
});
|
|
235
|
+
}
|
|
236
|
+
else {
|
|
237
|
+
console.log(`EvalGate CLI
|
|
238
|
+
|
|
239
|
+
Usage:
|
|
240
|
+
evalgate init Create evalgate.config.json + baseline + CI workflow
|
|
241
|
+
evalgate discover Discover behavioral specs in project and show statistics
|
|
242
|
+
evalgate discover --manifest Generate evaluation manifest for incremental analysis
|
|
243
|
+
evalgate impact-analysis Analyze impact of changes and suggest targeted tests
|
|
244
|
+
--base <branch> Base branch to compare against (default: main)
|
|
245
|
+
--changed-files <files> Comma-separated list of changed files (for CI)
|
|
246
|
+
--format <fmt> Output format: human (default), json
|
|
247
|
+
evalgate ci One-command CI loop (manifest → impact → run → diff)
|
|
248
|
+
--base <ref> Base reference for diff (baseline|last|<runId>|<path>|<gitref>)
|
|
249
|
+
--impacted-only Run only specs impacted by changes
|
|
250
|
+
--format <fmt> Output format: human (default), json, github
|
|
251
|
+
--write-results Write run results to .evalgate/last-run.json
|
|
252
|
+
evalgate run Run evaluation specifications
|
|
253
|
+
--spec-ids <ids> Comma-separated list of spec IDs to run
|
|
254
|
+
--impacted-only Run only specs impacted by changes (requires --base)
|
|
255
|
+
--base <branch> Base branch for impact analysis (with --impacted-only)
|
|
256
|
+
--format <fmt> Output format: human (default), json
|
|
257
|
+
--write-results Write results to .evalgate/last-run.json
|
|
258
|
+
evalgate diff Compare two run reports and show behavioral changes
|
|
259
|
+
--base <branch> Base branch or report path (default: main)
|
|
260
|
+
--head <path> Head report path (default: .evalgate/last-run.json)
|
|
261
|
+
--format <fmt> Output format: human (default), json
|
|
262
|
+
evalgate gate [options] Run regression gate (local test-based, no API needed)
|
|
263
|
+
evalgate check [options] CI/CD evaluation gate (API-based)
|
|
264
|
+
evalgate explain [options] Explain last gate/check failure with root causes + fixes
|
|
265
|
+
evalgate doctor [options] Comprehensive CI/CD readiness checklist
|
|
266
|
+
evalgate baseline init Create starter evals/baseline.json
|
|
267
|
+
evalgate baseline update Run tests and update baseline with real scores
|
|
268
|
+
evalgate upgrade --full Upgrade from Tier 1 to Tier 2 (full gate)
|
|
269
|
+
evalgate print-config Show resolved config with source-of-truth annotations
|
|
270
|
+
evalgate share [options] Create share link for a run
|
|
271
|
+
|
|
272
|
+
Options for gate:
|
|
273
|
+
--format <fmt> Output format: human (default), json, github
|
|
274
|
+
|
|
275
|
+
Options for check:
|
|
276
|
+
--evaluationId <id> Evaluation to gate on (or from config)
|
|
277
|
+
--apiKey <key> API key (or EVALAI_API_KEY env)
|
|
278
|
+
--format <fmt> Output format: human (default), json, github
|
|
279
|
+
--explain Show score breakdown and thresholds
|
|
280
|
+
--onFail import When gate fails, import run with CI context
|
|
281
|
+
--minScore <n> Fail if score < n (0-100)
|
|
282
|
+
--maxDrop <n> Fail if score dropped > n from baseline
|
|
283
|
+
--warnDrop <n> Warn (exit 8) if score dropped > n but < maxDrop
|
|
284
|
+
--minN <n> Fail if total test cases < n
|
|
285
|
+
--allowWeakEvidence Allow weak evidence level
|
|
286
|
+
--policy <name> Enforce policy (HIPAA, SOC2, GDPR, etc.)
|
|
287
|
+
--baseline <mode> "published", "previous", or "production"
|
|
288
|
+
--share <mode> Share link: always | fail | never (fail = only when gate fails)
|
|
289
|
+
--baseUrl <url> API base URL
|
|
290
|
+
|
|
291
|
+
Options for explain:
|
|
292
|
+
--report <path> Path to report JSON (default: evals/regression-report.json)
|
|
293
|
+
--format <fmt> Output format: human (default), json
|
|
294
|
+
|
|
295
|
+
Options for print-config:
|
|
296
|
+
--format <fmt> Output format: human (default), json
|
|
297
|
+
|
|
298
|
+
Options for doctor:
|
|
299
|
+
--report Output JSON diagnostic bundle
|
|
300
|
+
--format <fmt> Output format: human (default), json
|
|
301
|
+
--strict Treat warnings as failures (exit 2)
|
|
302
|
+
--apiKey <key> API key (or EVALAI_API_KEY env)
|
|
303
|
+
--baseUrl <url> API base URL
|
|
304
|
+
--evaluationId <id> Evaluation to verify
|
|
305
|
+
|
|
306
|
+
Examples:
|
|
307
|
+
evalgate init
|
|
308
|
+
evalgate discover
|
|
309
|
+
evalgate discover --manifest
|
|
310
|
+
evalgate impact-analysis --base main
|
|
311
|
+
evalgate impact-analysis --base main --format json
|
|
312
|
+
evalgate impact-analysis --changed-files src/utils.ts,datasets/test.json
|
|
313
|
+
evalgate run
|
|
314
|
+
evalgate run --spec-ids spec1,spec2
|
|
315
|
+
evalgate run --impacted-only --base main
|
|
316
|
+
evalgate run --format json --write-results
|
|
317
|
+
evalgate diff
|
|
318
|
+
evalgate diff --base main
|
|
319
|
+
evalgate diff --base main --format json
|
|
320
|
+
evalgate diff --a .evalgate/runs/base.json --b .evalgate/last-run.json
|
|
321
|
+
evalgate gate
|
|
322
|
+
evalgate gate --format json
|
|
323
|
+
evalgate explain
|
|
324
|
+
evalgate doctor
|
|
325
|
+
evalgate print-config
|
|
326
|
+
evalgate doctor --report
|
|
327
|
+
evalgate check --minScore 92 --evaluationId 42 --apiKey $EVALAI_API_KEY
|
|
328
|
+
evalgate check --policy HIPAA --evaluationId 42 --apiKey $EVALAI_API_KEY
|
|
329
|
+
evalgate share --scope run --evaluationId 42 --runId 123 --expires 7d --apiKey $EVALAI_API_KEY
|
|
330
|
+
`);
|
|
331
|
+
process.exit(subcommand === "--help" || subcommand === "-h" ? 0 : 1);
|
|
332
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* evalgate init — Full project scaffolder
|
|
4
|
+
*
|
|
5
|
+
* Zero-to-gate in under 5 minutes:
|
|
6
|
+
* npx evalgate init
|
|
7
|
+
* git push
|
|
8
|
+
* …CI starts blocking regressions.
|
|
9
|
+
*
|
|
10
|
+
* What it does:
|
|
11
|
+
* 1. Detects Node repo + package manager
|
|
12
|
+
* 2. Creates evals/ directory + baseline.json
|
|
13
|
+
* 3. Installs .github/workflows/evalgate-gate.yml
|
|
14
|
+
* 4. Prints next steps (no docs required)
|
|
15
|
+
*/
|
|
16
|
+
export declare function runInit(cwd?: string): boolean;
|
package/dist/cli/init.js
ADDED
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
"use strict";
|
|
3
|
+
/**
|
|
4
|
+
* evalgate init — Full project scaffolder
|
|
5
|
+
*
|
|
6
|
+
* Zero-to-gate in under 5 minutes:
|
|
7
|
+
* npx evalgate init
|
|
8
|
+
* git push
|
|
9
|
+
* …CI starts blocking regressions.
|
|
10
|
+
*
|
|
11
|
+
* What it does:
|
|
12
|
+
* 1. Detects Node repo + package manager
|
|
13
|
+
* 2. Creates evals/ directory + baseline.json
|
|
14
|
+
* 3. Installs .github/workflows/evalgate-gate.yml
|
|
15
|
+
* 4. Prints next steps (no docs required)
|
|
16
|
+
*/
|
|
17
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
18
|
+
if (k2 === undefined) k2 = k;
|
|
19
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
20
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
21
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
22
|
+
}
|
|
23
|
+
Object.defineProperty(o, k2, desc);
|
|
24
|
+
}) : (function(o, m, k, k2) {
|
|
25
|
+
if (k2 === undefined) k2 = k;
|
|
26
|
+
o[k2] = m[k];
|
|
27
|
+
}));
|
|
28
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
29
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
30
|
+
}) : function(o, v) {
|
|
31
|
+
o["default"] = v;
|
|
32
|
+
});
|
|
33
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
34
|
+
var ownKeys = function(o) {
|
|
35
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
36
|
+
var ar = [];
|
|
37
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
38
|
+
return ar;
|
|
39
|
+
};
|
|
40
|
+
return ownKeys(o);
|
|
41
|
+
};
|
|
42
|
+
return function (mod) {
|
|
43
|
+
if (mod && mod.__esModule) return mod;
|
|
44
|
+
var result = {};
|
|
45
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
46
|
+
__setModuleDefault(result, mod);
|
|
47
|
+
return result;
|
|
48
|
+
};
|
|
49
|
+
})();
|
|
50
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
51
|
+
exports.runInit = runInit;
|
|
52
|
+
const node_child_process_1 = require("node:child_process");
|
|
53
|
+
const fs = __importStar(require("node:fs"));
|
|
54
|
+
const path = __importStar(require("node:path"));
|
|
55
|
+
function detectProject(cwd) {
|
|
56
|
+
const pkgPath = path.join(cwd, "package.json");
|
|
57
|
+
if (!fs.existsSync(pkgPath))
|
|
58
|
+
return null;
|
|
59
|
+
let pkg;
|
|
60
|
+
try {
|
|
61
|
+
pkg = JSON.parse(fs.readFileSync(pkgPath, "utf-8"));
|
|
62
|
+
}
|
|
63
|
+
catch {
|
|
64
|
+
return null;
|
|
65
|
+
}
|
|
66
|
+
let pm = "npm";
|
|
67
|
+
if (fs.existsSync(path.join(cwd, "pnpm-lock.yaml")))
|
|
68
|
+
pm = "pnpm";
|
|
69
|
+
else if (fs.existsSync(path.join(cwd, "yarn.lock")))
|
|
70
|
+
pm = "yarn";
|
|
71
|
+
const testScript = pkg.scripts?.test ?? "";
|
|
72
|
+
const hasTestScript = !!testScript && testScript !== 'echo "Error: no test specified" && exit 1';
|
|
73
|
+
return {
|
|
74
|
+
cwd,
|
|
75
|
+
pm,
|
|
76
|
+
hasTestScript,
|
|
77
|
+
testScript,
|
|
78
|
+
name: pkg.name ?? path.basename(cwd),
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
// ── Step helpers ──
|
|
82
|
+
function ok(msg) {
|
|
83
|
+
console.log(` ✔ ${msg}`);
|
|
84
|
+
}
|
|
85
|
+
function skip(msg) {
|
|
86
|
+
console.log(` – ${msg}`);
|
|
87
|
+
}
|
|
88
|
+
// ── 1. Create evals/ + baseline.json ──
|
|
89
|
+
function createBaseline(cwd, project) {
|
|
90
|
+
const evalsDir = path.join(cwd, "evals");
|
|
91
|
+
const baselinePath = path.join(evalsDir, "baseline.json");
|
|
92
|
+
if (fs.existsSync(baselinePath)) {
|
|
93
|
+
skip("evals/baseline.json already exists");
|
|
94
|
+
return true;
|
|
95
|
+
}
|
|
96
|
+
if (!fs.existsSync(evalsDir)) {
|
|
97
|
+
fs.mkdirSync(evalsDir, { recursive: true });
|
|
98
|
+
}
|
|
99
|
+
const user = process.env.USER || process.env.USERNAME || "unknown";
|
|
100
|
+
const now = new Date().toISOString();
|
|
101
|
+
// Run tests to capture real count if possible
|
|
102
|
+
let testTotal = 0;
|
|
103
|
+
let testsPassed = true;
|
|
104
|
+
if (project.hasTestScript) {
|
|
105
|
+
const isWin = process.platform === "win32";
|
|
106
|
+
const result = (0, node_child_process_1.spawnSync)(project.pm, ["test"], {
|
|
107
|
+
cwd,
|
|
108
|
+
stdio: "pipe",
|
|
109
|
+
shell: isWin,
|
|
110
|
+
timeout: 120000,
|
|
111
|
+
});
|
|
112
|
+
testsPassed = result.status === 0;
|
|
113
|
+
// Try to extract test count from output
|
|
114
|
+
const output = (result.stdout?.toString() ?? "") + (result.stderr?.toString() ?? "");
|
|
115
|
+
const countMatch = output.match(/(\d+)\s+(?:tests?|specs?)\s+(?:passed|completed)/i) ??
|
|
116
|
+
output.match(/Tests:\s+(\d+)\s+passed/i) ??
|
|
117
|
+
output.match(/(\d+)\s+passing/i);
|
|
118
|
+
if (countMatch)
|
|
119
|
+
testTotal = parseInt(countMatch[1], 10);
|
|
120
|
+
}
|
|
121
|
+
const baseline = {
|
|
122
|
+
schemaVersion: 1,
|
|
123
|
+
description: `Regression gate baseline for ${project.name}`,
|
|
124
|
+
generatedAt: now,
|
|
125
|
+
generatedBy: user,
|
|
126
|
+
commitSha: getHeadSha(cwd),
|
|
127
|
+
updatedAt: now,
|
|
128
|
+
updatedBy: user,
|
|
129
|
+
tolerance: {
|
|
130
|
+
scoreDrop: 5,
|
|
131
|
+
passRateDrop: 5,
|
|
132
|
+
maxLatencyIncreaseMs: 200,
|
|
133
|
+
maxCostIncreaseUsd: 0.05,
|
|
134
|
+
},
|
|
135
|
+
goldenEval: {
|
|
136
|
+
score: 100,
|
|
137
|
+
passRate: 100,
|
|
138
|
+
totalCases: 3,
|
|
139
|
+
passedCases: 3,
|
|
140
|
+
},
|
|
141
|
+
confidenceTests: {
|
|
142
|
+
passed: testsPassed,
|
|
143
|
+
total: testTotal,
|
|
144
|
+
},
|
|
145
|
+
productMetrics: {},
|
|
146
|
+
};
|
|
147
|
+
fs.writeFileSync(baselinePath, `${JSON.stringify(baseline, null, 2)}\n`);
|
|
148
|
+
ok("Created evals/baseline.json");
|
|
149
|
+
return true;
|
|
150
|
+
}
|
|
151
|
+
function getHeadSha(cwd) {
|
|
152
|
+
try {
|
|
153
|
+
const result = (0, node_child_process_1.spawnSync)("git", ["rev-parse", "--short", "HEAD"], {
|
|
154
|
+
cwd,
|
|
155
|
+
stdio: "pipe",
|
|
156
|
+
});
|
|
157
|
+
return result.stdout?.toString().trim() || "0000000";
|
|
158
|
+
}
|
|
159
|
+
catch {
|
|
160
|
+
return "0000000";
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
// ── 2. Install GitHub Actions workflow ──
|
|
164
|
+
function installWorkflow(cwd, project) {
|
|
165
|
+
const workflowDir = path.join(cwd, ".github", "workflows");
|
|
166
|
+
const workflowPath = path.join(workflowDir, "evalgate-gate.yml");
|
|
167
|
+
if (fs.existsSync(workflowPath)) {
|
|
168
|
+
skip(".github/workflows/evalgate-gate.yml already exists");
|
|
169
|
+
return true;
|
|
170
|
+
}
|
|
171
|
+
if (!fs.existsSync(workflowDir)) {
|
|
172
|
+
fs.mkdirSync(workflowDir, { recursive: true });
|
|
173
|
+
}
|
|
174
|
+
const installCmd = project.pm === "pnpm"
|
|
175
|
+
? "pnpm install --frozen-lockfile"
|
|
176
|
+
: project.pm === "yarn"
|
|
177
|
+
? "yarn install --frozen-lockfile"
|
|
178
|
+
: "npm ci";
|
|
179
|
+
const setupSteps = project.pm === "pnpm"
|
|
180
|
+
? ` - uses: pnpm/action-setup@v4
|
|
181
|
+
- uses: actions/setup-node@v4
|
|
182
|
+
with:
|
|
183
|
+
node-version: '20'
|
|
184
|
+
cache: pnpm
|
|
185
|
+
- run: ${installCmd}`
|
|
186
|
+
: ` - uses: actions/setup-node@v4
|
|
187
|
+
with:
|
|
188
|
+
node-version: '20'
|
|
189
|
+
cache: ${project.pm}
|
|
190
|
+
- run: ${installCmd}`;
|
|
191
|
+
const workflow = `# EvalGate Regression Gate
|
|
192
|
+
# Auto-generated by: npx evalgate init
|
|
193
|
+
# Blocks PRs that regress test health.
|
|
194
|
+
name: EvalGate Gate
|
|
195
|
+
|
|
196
|
+
on:
|
|
197
|
+
pull_request:
|
|
198
|
+
branches: [main]
|
|
199
|
+
|
|
200
|
+
concurrency:
|
|
201
|
+
group: evalgate-\${{ github.ref }}
|
|
202
|
+
cancel-in-progress: true
|
|
203
|
+
|
|
204
|
+
jobs:
|
|
205
|
+
regression-gate:
|
|
206
|
+
runs-on: ubuntu-latest
|
|
207
|
+
steps:
|
|
208
|
+
- uses: actions/checkout@v4
|
|
209
|
+
${setupSteps}
|
|
210
|
+
- name: EvalGate Doctor (preflight)
|
|
211
|
+
continue-on-error: true # Strict: set to false, or use: evalgate doctor --strict
|
|
212
|
+
run: npx -y @evalgate/sdk@^2 doctor
|
|
213
|
+
|
|
214
|
+
- name: EvalGate Regression Gate
|
|
215
|
+
run: npx -y @evalgate/sdk@^2 gate --format github
|
|
216
|
+
|
|
217
|
+
- name: Upload report
|
|
218
|
+
if: always()
|
|
219
|
+
uses: actions/upload-artifact@v4
|
|
220
|
+
with:
|
|
221
|
+
name: evalgate-report
|
|
222
|
+
path: |
|
|
223
|
+
evals/regression-report.json
|
|
224
|
+
.evalgate/last-report.json
|
|
225
|
+
if-no-files-found: ignore
|
|
226
|
+
`;
|
|
227
|
+
fs.writeFileSync(workflowPath, workflow);
|
|
228
|
+
ok("Created .github/workflows/evalgate-gate.yml");
|
|
229
|
+
return true;
|
|
230
|
+
}
|
|
231
|
+
// ── 3. Create evalgate.config.json ──
|
|
232
|
+
function createConfig(cwd) {
|
|
233
|
+
const configPath = path.join(cwd, "evalgate.config.json");
|
|
234
|
+
if (fs.existsSync(configPath)) {
|
|
235
|
+
skip("evalgate.config.json already exists");
|
|
236
|
+
return true;
|
|
237
|
+
}
|
|
238
|
+
const config = {
|
|
239
|
+
evaluationId: "",
|
|
240
|
+
gate: {
|
|
241
|
+
baseline: "evals/baseline.json",
|
|
242
|
+
report: "evals/regression-report.json",
|
|
243
|
+
},
|
|
244
|
+
};
|
|
245
|
+
fs.writeFileSync(configPath, `${JSON.stringify(config, null, 2)}\n`);
|
|
246
|
+
ok("Created evalgate.config.json");
|
|
247
|
+
return true;
|
|
248
|
+
}
|
|
249
|
+
// ── Main ──
|
|
250
|
+
function runInit(cwd = process.cwd()) {
|
|
251
|
+
console.log("");
|
|
252
|
+
console.log(" evalgate init — setting up regression gate\n");
|
|
253
|
+
// Detect
|
|
254
|
+
const project = detectProject(cwd);
|
|
255
|
+
if (!project) {
|
|
256
|
+
console.error(" ✖ No package.json found. Run this from a Node.js project root.");
|
|
257
|
+
return false;
|
|
258
|
+
}
|
|
259
|
+
ok(`Detected ${project.pm} project: ${project.name}`);
|
|
260
|
+
if (!project.hasTestScript) {
|
|
261
|
+
console.log(` ⚠ No test script found in package.json`);
|
|
262
|
+
console.log(` The gate will still work — add a "test" script later for full coverage.\n`);
|
|
263
|
+
}
|
|
264
|
+
// Scaffold
|
|
265
|
+
createBaseline(cwd, project);
|
|
266
|
+
installWorkflow(cwd, project);
|
|
267
|
+
createConfig(cwd);
|
|
268
|
+
// Next steps
|
|
269
|
+
console.log("");
|
|
270
|
+
console.log(" Done! Next:");
|
|
271
|
+
console.log("");
|
|
272
|
+
console.log(" npx evalgate doctor Verify your setup is complete");
|
|
273
|
+
console.log("");
|
|
274
|
+
console.log(" Then commit:");
|
|
275
|
+
console.log("");
|
|
276
|
+
console.log(" git add evals/ .github/workflows/evalgate-gate.yml evalgate.config.json");
|
|
277
|
+
console.log(" git commit -m 'chore: add EvalGate regression gate'");
|
|
278
|
+
console.log(" git push");
|
|
279
|
+
console.log("");
|
|
280
|
+
console.log(" That's it. Open a PR and the gate runs automatically.");
|
|
281
|
+
console.log("");
|
|
282
|
+
console.log(" Commands:");
|
|
283
|
+
console.log(" npx evalgate doctor Preflight check — verify config, baseline, CI");
|
|
284
|
+
console.log(" npx evalgate gate Run regression gate locally");
|
|
285
|
+
console.log(" npx evalgate check API-based gate (requires account)");
|
|
286
|
+
console.log(" npx evalgate explain Explain last failure with root causes + fixes");
|
|
287
|
+
console.log(" npx evalgate baseline update Update baseline after intentional changes");
|
|
288
|
+
console.log("");
|
|
289
|
+
console.log(" To remove: delete evals/, evalgate.config.json, and .github/workflows/evalgate-gate.yml");
|
|
290
|
+
console.log("");
|
|
291
|
+
return true;
|
|
292
|
+
}
|