@evalgate/sdk 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/CHANGELOG.md +638 -0
  2. package/README.md +398 -0
  3. package/dist/assertions.d.ts +189 -0
  4. package/dist/assertions.js +662 -0
  5. package/dist/batch.d.ts +68 -0
  6. package/dist/batch.js +179 -0
  7. package/dist/cache.d.ts +65 -0
  8. package/dist/cache.js +131 -0
  9. package/dist/cli/api.d.ts +108 -0
  10. package/dist/cli/api.js +132 -0
  11. package/dist/cli/baseline.d.ts +10 -0
  12. package/dist/cli/baseline.js +172 -0
  13. package/dist/cli/check.d.ts +73 -0
  14. package/dist/cli/check.js +355 -0
  15. package/dist/cli/ci-context.d.ts +6 -0
  16. package/dist/cli/ci-context.js +112 -0
  17. package/dist/cli/ci.d.ts +45 -0
  18. package/dist/cli/ci.js +192 -0
  19. package/dist/cli/config.d.ts +30 -0
  20. package/dist/cli/config.js +230 -0
  21. package/dist/cli/constants.d.ts +15 -0
  22. package/dist/cli/constants.js +18 -0
  23. package/dist/cli/diff.d.ts +173 -0
  24. package/dist/cli/diff.js +685 -0
  25. package/dist/cli/discover.d.ts +84 -0
  26. package/dist/cli/discover.js +419 -0
  27. package/dist/cli/doctor.d.ts +88 -0
  28. package/dist/cli/doctor.js +675 -0
  29. package/dist/cli/env.d.ts +21 -0
  30. package/dist/cli/env.js +42 -0
  31. package/dist/cli/explain.d.ts +58 -0
  32. package/dist/cli/explain.js +561 -0
  33. package/dist/cli/formatters/github.d.ts +8 -0
  34. package/dist/cli/formatters/github.js +135 -0
  35. package/dist/cli/formatters/human.d.ts +6 -0
  36. package/dist/cli/formatters/human.js +110 -0
  37. package/dist/cli/formatters/json.d.ts +6 -0
  38. package/dist/cli/formatters/json.js +10 -0
  39. package/dist/cli/formatters/pr-comment.d.ts +12 -0
  40. package/dist/cli/formatters/pr-comment.js +103 -0
  41. package/dist/cli/formatters/types.d.ts +103 -0
  42. package/dist/cli/formatters/types.js +8 -0
  43. package/dist/cli/gate.d.ts +21 -0
  44. package/dist/cli/gate.js +179 -0
  45. package/dist/cli/impact-analysis.d.ts +63 -0
  46. package/dist/cli/impact-analysis.js +252 -0
  47. package/dist/cli/index.d.ts +9 -0
  48. package/dist/cli/index.js +332 -0
  49. package/dist/cli/init.d.ts +16 -0
  50. package/dist/cli/init.js +292 -0
  51. package/dist/cli/manifest.d.ts +103 -0
  52. package/dist/cli/manifest.js +282 -0
  53. package/dist/cli/migrate.d.ts +41 -0
  54. package/dist/cli/migrate.js +349 -0
  55. package/dist/cli/policy-packs.d.ts +23 -0
  56. package/dist/cli/policy-packs.js +89 -0
  57. package/dist/cli/print-config.d.ts +29 -0
  58. package/dist/cli/print-config.js +270 -0
  59. package/dist/cli/profiles.d.ts +28 -0
  60. package/dist/cli/profiles.js +30 -0
  61. package/dist/cli/reason-codes.d.ts +17 -0
  62. package/dist/cli/reason-codes.js +19 -0
  63. package/dist/cli/regression-gate.d.ts +15 -0
  64. package/dist/cli/regression-gate.js +341 -0
  65. package/dist/cli/render/snippet.d.ts +5 -0
  66. package/dist/cli/render/snippet.js +15 -0
  67. package/dist/cli/render/sort.d.ts +10 -0
  68. package/dist/cli/render/sort.js +24 -0
  69. package/dist/cli/report/build-check-report.d.ts +19 -0
  70. package/dist/cli/report/build-check-report.js +132 -0
  71. package/dist/cli/run.d.ts +101 -0
  72. package/dist/cli/run.js +395 -0
  73. package/dist/cli/share.d.ts +17 -0
  74. package/dist/cli/share.js +91 -0
  75. package/dist/cli/upgrade.d.ts +15 -0
  76. package/dist/cli/upgrade.js +492 -0
  77. package/dist/cli/workspace.d.ts +31 -0
  78. package/dist/cli/workspace.js +68 -0
  79. package/dist/client.d.ts +368 -0
  80. package/dist/client.js +893 -0
  81. package/dist/client.request.test.d.ts +1 -0
  82. package/dist/client.request.test.js +232 -0
  83. package/dist/context.d.ts +134 -0
  84. package/dist/context.js +215 -0
  85. package/dist/errors.d.ts +82 -0
  86. package/dist/errors.js +298 -0
  87. package/dist/export.d.ts +195 -0
  88. package/dist/export.js +344 -0
  89. package/dist/index.d.ts +44 -0
  90. package/dist/index.js +153 -0
  91. package/dist/integrations/anthropic.d.ts +91 -0
  92. package/dist/integrations/anthropic.js +163 -0
  93. package/dist/integrations/openai-eval.d.ts +57 -0
  94. package/dist/integrations/openai-eval.js +232 -0
  95. package/dist/integrations/openai.d.ts +92 -0
  96. package/dist/integrations/openai.js +160 -0
  97. package/dist/local.d.ts +39 -0
  98. package/dist/local.js +148 -0
  99. package/dist/logger.d.ts +128 -0
  100. package/dist/logger.js +227 -0
  101. package/dist/matchers/index.d.ts +1 -0
  102. package/dist/matchers/index.js +6 -0
  103. package/dist/matchers/to-pass-gate.d.ts +29 -0
  104. package/dist/matchers/to-pass-gate.js +35 -0
  105. package/dist/pagination.d.ts +74 -0
  106. package/dist/pagination.js +139 -0
  107. package/dist/regression.d.ts +100 -0
  108. package/dist/regression.js +44 -0
  109. package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
  110. package/dist/runtime/adapters/config-to-dsl.js +400 -0
  111. package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
  112. package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
  113. package/dist/runtime/context.d.ts +26 -0
  114. package/dist/runtime/context.js +74 -0
  115. package/dist/runtime/eval.d.ts +46 -0
  116. package/dist/runtime/eval.js +244 -0
  117. package/dist/runtime/execution-mode.d.ts +80 -0
  118. package/dist/runtime/execution-mode.js +357 -0
  119. package/dist/runtime/executor.d.ts +16 -0
  120. package/dist/runtime/executor.js +152 -0
  121. package/dist/runtime/registry.d.ts +78 -0
  122. package/dist/runtime/registry.js +403 -0
  123. package/dist/runtime/run-report.d.ts +200 -0
  124. package/dist/runtime/run-report.js +222 -0
  125. package/dist/runtime/types.d.ts +356 -0
  126. package/dist/runtime/types.js +76 -0
  127. package/dist/snapshot.d.ts +176 -0
  128. package/dist/snapshot.js +322 -0
  129. package/dist/streaming.d.ts +173 -0
  130. package/dist/streaming.js +268 -0
  131. package/dist/testing.d.ts +273 -0
  132. package/dist/testing.js +317 -0
  133. package/dist/types.d.ts +754 -0
  134. package/dist/types.js +54 -0
  135. package/dist/utils/input-hash.d.ts +8 -0
  136. package/dist/utils/input-hash.js +41 -0
  137. package/dist/version.d.ts +7 -0
  138. package/dist/version.js +10 -0
  139. package/dist/workflows.d.ts +389 -0
  140. package/dist/workflows.js +671 -0
  141. package/package.json +117 -0
@@ -0,0 +1,332 @@
1
+ #!/usr/bin/env node
2
+ "use strict";
3
+ /**
4
+ * evalgate — EvalGate CLI
5
+ *
6
+ * Commands:
7
+ * evalgate init — Create evalgate.config.json
8
+ * evalgate check — CI/CD evaluation gate (see evalgate check --help)
9
+ */
10
+ Object.defineProperty(exports, "__esModule", { value: true });
11
+ const baseline_1 = require("./baseline");
12
+ const check_1 = require("./check");
13
+ const ci_1 = require("./ci");
14
+ const diff_1 = require("./diff");
15
+ const discover_1 = require("./discover");
16
+ const doctor_1 = require("./doctor");
17
+ const explain_1 = require("./explain");
18
+ const impact_analysis_1 = require("./impact-analysis");
19
+ const init_1 = require("./init");
20
+ const migrate_1 = require("./migrate");
21
+ const print_config_1 = require("./print-config");
22
+ const regression_gate_1 = require("./regression-gate");
23
+ const run_1 = require("./run");
24
+ const share_1 = require("./share");
25
+ const upgrade_1 = require("./upgrade");
26
+ const argv = process.argv.slice(2);
27
+ const subcommand = argv[0];
28
+ if (subcommand === "init") {
29
+ const cwd = process.cwd();
30
+ const ok = (0, init_1.runInit)(cwd);
31
+ process.exit(ok ? 0 : 1);
32
+ }
33
+ else if (subcommand === "baseline") {
34
+ const code = (0, baseline_1.runBaseline)(argv.slice(1));
35
+ process.exit(code);
36
+ }
37
+ else if (subcommand === "gate") {
38
+ const code = (0, regression_gate_1.runGate)(argv.slice(1));
39
+ process.exit(code);
40
+ }
41
+ else if (subcommand === "migrate") {
42
+ // Handle migrate subcommand
43
+ const migrateSubcommand = argv[1];
44
+ if (migrateSubcommand === "config") {
45
+ // Parse migrate config arguments
46
+ let inputPath = "";
47
+ let outputPath = "";
48
+ let verbose = false;
49
+ let helpers = true;
50
+ let preserveIds = true;
51
+ let provenance = true;
52
+ for (let i = 2; i < argv.length; i++) {
53
+ const arg = argv[i];
54
+ if (arg === "--in" || arg === "-i") {
55
+ inputPath = argv[++i];
56
+ }
57
+ else if (arg === "--out" || arg === "-o") {
58
+ outputPath = argv[++i];
59
+ }
60
+ else if (arg === "--verbose" || arg === "-v") {
61
+ verbose = true;
62
+ }
63
+ else if (arg === "--no-helpers") {
64
+ helpers = false;
65
+ }
66
+ else if (arg === "--no-preserve-ids") {
67
+ preserveIds = false;
68
+ }
69
+ else if (arg === "--no-provenance") {
70
+ provenance = false;
71
+ }
72
+ }
73
+ if (!inputPath || !outputPath) {
74
+ console.error("Error: Both --in and --out options are required");
75
+ console.error("Usage: evalgate migrate config --in <input> --out <output> [options]");
76
+ process.exit(1);
77
+ }
78
+ (0, migrate_1.migrateConfig)({
79
+ input: inputPath,
80
+ output: outputPath,
81
+ verbose,
82
+ helpers,
83
+ preserveIds,
84
+ provenance,
85
+ }).catch((err) => {
86
+ console.error(`Migration failed: ${err instanceof Error ? err.message : String(err)}`);
87
+ process.exit(1);
88
+ });
89
+ }
90
+ else {
91
+ console.error("Error: Unknown migrate subcommand. Use 'evalgate migrate config'");
92
+ process.exit(1);
93
+ }
94
+ }
95
+ else if (subcommand === "upgrade") {
96
+ const code = (0, upgrade_1.runUpgrade)(argv.slice(1));
97
+ process.exit(code);
98
+ }
99
+ else if (subcommand === "doctor") {
100
+ (0, doctor_1.runDoctor)(argv.slice(1))
101
+ .then((code) => process.exit(code))
102
+ .catch((err) => {
103
+ console.error(`EvalGate ERROR: ${err instanceof Error ? err.message : String(err)}`);
104
+ process.exit(1);
105
+ });
106
+ }
107
+ else if (subcommand === "check") {
108
+ const parsed = (0, check_1.parseArgs)(argv.slice(1));
109
+ if (!parsed.ok) {
110
+ console.error(parsed.message);
111
+ process.exit(parsed.exitCode);
112
+ }
113
+ (0, check_1.runCheck)(parsed.args)
114
+ .then((code) => process.exit(code))
115
+ .catch((err) => {
116
+ console.error(`EvalGate ERROR: ${err instanceof Error ? err.message : String(err)}`);
117
+ process.exit(4);
118
+ });
119
+ }
120
+ else if (subcommand === "explain") {
121
+ (0, explain_1.runExplain)(argv.slice(1))
122
+ .then((code) => process.exit(code))
123
+ .catch((err) => {
124
+ console.error(`EvalGate ERROR: ${err instanceof Error ? err.message : String(err)}`);
125
+ process.exit(1);
126
+ });
127
+ }
128
+ else if (subcommand === "print-config") {
129
+ const code = (0, print_config_1.runPrintConfig)(argv.slice(1));
130
+ process.exit(code);
131
+ }
132
+ else if (subcommand === "share") {
133
+ const parsed = (0, share_1.parseShareArgs)(argv.slice(1));
134
+ if ("error" in parsed) {
135
+ console.error(parsed.error);
136
+ process.exit(1);
137
+ }
138
+ (0, share_1.runShare)(parsed)
139
+ .then((code) => process.exit(code))
140
+ .catch((err) => {
141
+ console.error(`EvalGate ERROR: ${err instanceof Error ? err.message : String(err)}`);
142
+ process.exit(1);
143
+ });
144
+ }
145
+ else if (subcommand === "discover") {
146
+ // Parse arguments for discover command
147
+ const args = argv.slice(1);
148
+ const manifestFlag = args.includes("--manifest");
149
+ (0, discover_1.discoverSpecs)({ manifest: manifestFlag })
150
+ .then(() => process.exit(0))
151
+ .catch((err) => {
152
+ console.error(`EvalGate ERROR: ${err instanceof Error ? err.message : String(err)}`);
153
+ process.exit(1);
154
+ });
155
+ }
156
+ else if (subcommand === "impact-analysis") {
157
+ // Parse arguments for impact-analysis command
158
+ const args = argv.slice(1);
159
+ const baseIndex = args.indexOf("--base");
160
+ const changedFilesIndex = args.indexOf("--changed-files");
161
+ const formatIndex = args.indexOf("--format");
162
+ const baseBranch = baseIndex !== -1 ? args[baseIndex + 1] : "main";
163
+ const changedFiles = changedFilesIndex !== -1
164
+ ? args[changedFilesIndex + 1]?.split(",")
165
+ : undefined;
166
+ const format = formatIndex !== -1 ? args[formatIndex + 1] : "human";
167
+ (0, impact_analysis_1.runImpactAnalysisCLI)({ baseBranch, changedFiles, format })
168
+ .then(() => process.exit(0))
169
+ .catch((err) => {
170
+ console.error(`EvalGate ERROR: ${err instanceof Error ? err.message : String(err)}`);
171
+ process.exit(2);
172
+ });
173
+ }
174
+ else if (subcommand === "run") {
175
+ // Parse arguments for run command
176
+ const args = argv.slice(1);
177
+ const specIdsIndex = args.indexOf("--spec-ids");
178
+ const impactedOnlyIndex = args.indexOf("--impacted-only");
179
+ const baseIndex = args.indexOf("--base");
180
+ const formatIndex = args.indexOf("--format");
181
+ const writeResultsIndex = args.indexOf("--write-results");
182
+ const specIds = specIdsIndex !== -1 ? args[specIdsIndex + 1]?.split(",") : undefined;
183
+ const impactedOnly = impactedOnlyIndex !== -1;
184
+ const baseBranch = baseIndex !== -1 ? args[baseIndex + 1] : undefined;
185
+ const format = formatIndex !== -1 ? args[formatIndex + 1] : "human";
186
+ const writeResults = writeResultsIndex !== -1;
187
+ (0, run_1.runEvaluationsCLI)({
188
+ specIds,
189
+ impactedOnly: impactedOnly ? !!baseBranch : false,
190
+ baseBranch,
191
+ format,
192
+ writeResults,
193
+ })
194
+ .then(() => process.exit(0))
195
+ .catch((err) => {
196
+ console.error(`EvalGate ERROR: ${err instanceof Error ? err.message : String(err)}`);
197
+ process.exit(2);
198
+ });
199
+ }
200
+ else if (subcommand === "diff") {
201
+ // Parse arguments for diff command
202
+ const args = argv.slice(1);
203
+ const baseIndex = args.indexOf("--base");
204
+ const headIndex = args.indexOf("--head");
205
+ const formatIndex = args.indexOf("--format");
206
+ const base = baseIndex !== -1 ? args[baseIndex + 1] : undefined;
207
+ const head = headIndex !== -1 ? args[headIndex + 1] : undefined;
208
+ const format = formatIndex !== -1 ? args[formatIndex + 1] : "human";
209
+ (0, diff_1.runDiffCLI)({ base, head, format })
210
+ .then(() => process.exit(0))
211
+ .catch((err) => {
212
+ console.error(`EvalGate ERROR: ${err instanceof Error ? err.message : String(err)}`);
213
+ process.exit(2);
214
+ });
215
+ }
216
+ else if (subcommand === "ci") {
217
+ // Parse arguments for ci command
218
+ const args = argv.slice(1);
219
+ const baseIndex = args.indexOf("--base");
220
+ const impactedOnlyIndex = args.indexOf("--impacted-only");
221
+ const formatIndex = args.indexOf("--format");
222
+ const writeResultsIndex = args.indexOf("--write-results");
223
+ const base = baseIndex !== -1 ? args[baseIndex + 1] : undefined;
224
+ const impactedOnly = impactedOnlyIndex !== -1;
225
+ const format = formatIndex !== -1
226
+ ? args[formatIndex + 1]
227
+ : "human";
228
+ const writeResults = writeResultsIndex !== -1;
229
+ (0, ci_1.runCICLI)({ base, impactedOnly, format, writeResults })
230
+ .then(() => process.exit(0))
231
+ .catch((err) => {
232
+ console.error(`EvalGate ERROR: ${err instanceof Error ? err.message : String(err)}`);
233
+ process.exit(2);
234
+ });
235
+ }
236
+ else {
237
+ console.log(`EvalGate CLI
238
+
239
+ Usage:
240
+ evalgate init Create evalgate.config.json + baseline + CI workflow
241
+ evalgate discover Discover behavioral specs in project and show statistics
242
+ evalgate discover --manifest Generate evaluation manifest for incremental analysis
243
+ evalgate impact-analysis Analyze impact of changes and suggest targeted tests
244
+ --base <branch> Base branch to compare against (default: main)
245
+ --changed-files <files> Comma-separated list of changed files (for CI)
246
+ --format <fmt> Output format: human (default), json
247
+ evalgate ci One-command CI loop (manifest → impact → run → diff)
248
+ --base <ref> Base reference for diff (baseline|last|<runId>|<path>|<gitref>)
249
+ --impacted-only Run only specs impacted by changes
250
+ --format <fmt> Output format: human (default), json, github
251
+ --write-results Write run results to .evalgate/last-run.json
252
+ evalgate run Run evaluation specifications
253
+ --spec-ids <ids> Comma-separated list of spec IDs to run
254
+ --impacted-only Run only specs impacted by changes (requires --base)
255
+ --base <branch> Base branch for impact analysis (with --impacted-only)
256
+ --format <fmt> Output format: human (default), json
257
+ --write-results Write results to .evalgate/last-run.json
258
+ evalgate diff Compare two run reports and show behavioral changes
259
+ --base <branch> Base branch or report path (default: main)
260
+ --head <path> Head report path (default: .evalgate/last-run.json)
261
+ --format <fmt> Output format: human (default), json
262
+ evalgate gate [options] Run regression gate (local test-based, no API needed)
263
+ evalgate check [options] CI/CD evaluation gate (API-based)
264
+ evalgate explain [options] Explain last gate/check failure with root causes + fixes
265
+ evalgate doctor [options] Comprehensive CI/CD readiness checklist
266
+ evalgate baseline init Create starter evals/baseline.json
267
+ evalgate baseline update Run tests and update baseline with real scores
268
+ evalgate upgrade --full Upgrade from Tier 1 to Tier 2 (full gate)
269
+ evalgate print-config Show resolved config with source-of-truth annotations
270
+ evalgate share [options] Create share link for a run
271
+
272
+ Options for gate:
273
+ --format <fmt> Output format: human (default), json, github
274
+
275
+ Options for check:
276
+ --evaluationId <id> Evaluation to gate on (or from config)
277
+ --apiKey <key> API key (or EVALAI_API_KEY env)
278
+ --format <fmt> Output format: human (default), json, github
279
+ --explain Show score breakdown and thresholds
280
+ --onFail import When gate fails, import run with CI context
281
+ --minScore <n> Fail if score < n (0-100)
282
+ --maxDrop <n> Fail if score dropped > n from baseline
283
+ --warnDrop <n> Warn (exit 8) if score dropped > n but < maxDrop
284
+ --minN <n> Fail if total test cases < n
285
+ --allowWeakEvidence Allow weak evidence level
286
+ --policy <name> Enforce policy (HIPAA, SOC2, GDPR, etc.)
287
+ --baseline <mode> "published", "previous", or "production"
288
+ --share <mode> Share link: always | fail | never (fail = only when gate fails)
289
+ --baseUrl <url> API base URL
290
+
291
+ Options for explain:
292
+ --report <path> Path to report JSON (default: evals/regression-report.json)
293
+ --format <fmt> Output format: human (default), json
294
+
295
+ Options for print-config:
296
+ --format <fmt> Output format: human (default), json
297
+
298
+ Options for doctor:
299
+ --report Output JSON diagnostic bundle
300
+ --format <fmt> Output format: human (default), json
301
+ --strict Treat warnings as failures (exit 2)
302
+ --apiKey <key> API key (or EVALAI_API_KEY env)
303
+ --baseUrl <url> API base URL
304
+ --evaluationId <id> Evaluation to verify
305
+
306
+ Examples:
307
+ evalgate init
308
+ evalgate discover
309
+ evalgate discover --manifest
310
+ evalgate impact-analysis --base main
311
+ evalgate impact-analysis --base main --format json
312
+ evalgate impact-analysis --changed-files src/utils.ts,datasets/test.json
313
+ evalgate run
314
+ evalgate run --spec-ids spec1,spec2
315
+ evalgate run --impacted-only --base main
316
+ evalgate run --format json --write-results
317
+ evalgate diff
318
+ evalgate diff --base main
319
+ evalgate diff --base main --format json
320
+ evalgate diff --a .evalgate/runs/base.json --b .evalgate/last-run.json
321
+ evalgate gate
322
+ evalgate gate --format json
323
+ evalgate explain
324
+ evalgate doctor
325
+ evalgate print-config
326
+ evalgate doctor --report
327
+ evalgate check --minScore 92 --evaluationId 42 --apiKey $EVALAI_API_KEY
328
+ evalgate check --policy HIPAA --evaluationId 42 --apiKey $EVALAI_API_KEY
329
+ evalgate share --scope run --evaluationId 42 --runId 123 --expires 7d --apiKey $EVALAI_API_KEY
330
+ `);
331
+ process.exit(subcommand === "--help" || subcommand === "-h" ? 0 : 1);
332
+ }
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * evalgate init — Full project scaffolder
4
+ *
5
+ * Zero-to-gate in under 5 minutes:
6
+ * npx evalgate init
7
+ * git push
8
+ * …CI starts blocking regressions.
9
+ *
10
+ * What it does:
11
+ * 1. Detects Node repo + package manager
12
+ * 2. Creates evals/ directory + baseline.json
13
+ * 3. Installs .github/workflows/evalgate-gate.yml
14
+ * 4. Prints next steps (no docs required)
15
+ */
16
+ export declare function runInit(cwd?: string): boolean;
@@ -0,0 +1,292 @@
1
+ #!/usr/bin/env node
2
+ "use strict";
3
+ /**
4
+ * evalgate init — Full project scaffolder
5
+ *
6
+ * Zero-to-gate in under 5 minutes:
7
+ * npx evalgate init
8
+ * git push
9
+ * …CI starts blocking regressions.
10
+ *
11
+ * What it does:
12
+ * 1. Detects Node repo + package manager
13
+ * 2. Creates evals/ directory + baseline.json
14
+ * 3. Installs .github/workflows/evalgate-gate.yml
15
+ * 4. Prints next steps (no docs required)
16
+ */
17
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
18
+ if (k2 === undefined) k2 = k;
19
+ var desc = Object.getOwnPropertyDescriptor(m, k);
20
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
21
+ desc = { enumerable: true, get: function() { return m[k]; } };
22
+ }
23
+ Object.defineProperty(o, k2, desc);
24
+ }) : (function(o, m, k, k2) {
25
+ if (k2 === undefined) k2 = k;
26
+ o[k2] = m[k];
27
+ }));
28
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
29
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
30
+ }) : function(o, v) {
31
+ o["default"] = v;
32
+ });
33
+ var __importStar = (this && this.__importStar) || (function () {
34
+ var ownKeys = function(o) {
35
+ ownKeys = Object.getOwnPropertyNames || function (o) {
36
+ var ar = [];
37
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
38
+ return ar;
39
+ };
40
+ return ownKeys(o);
41
+ };
42
+ return function (mod) {
43
+ if (mod && mod.__esModule) return mod;
44
+ var result = {};
45
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
46
+ __setModuleDefault(result, mod);
47
+ return result;
48
+ };
49
+ })();
50
+ Object.defineProperty(exports, "__esModule", { value: true });
51
+ exports.runInit = runInit;
52
+ const node_child_process_1 = require("node:child_process");
53
+ const fs = __importStar(require("node:fs"));
54
+ const path = __importStar(require("node:path"));
55
+ function detectProject(cwd) {
56
+ const pkgPath = path.join(cwd, "package.json");
57
+ if (!fs.existsSync(pkgPath))
58
+ return null;
59
+ let pkg;
60
+ try {
61
+ pkg = JSON.parse(fs.readFileSync(pkgPath, "utf-8"));
62
+ }
63
+ catch {
64
+ return null;
65
+ }
66
+ let pm = "npm";
67
+ if (fs.existsSync(path.join(cwd, "pnpm-lock.yaml")))
68
+ pm = "pnpm";
69
+ else if (fs.existsSync(path.join(cwd, "yarn.lock")))
70
+ pm = "yarn";
71
+ const testScript = pkg.scripts?.test ?? "";
72
+ const hasTestScript = !!testScript && testScript !== 'echo "Error: no test specified" && exit 1';
73
+ return {
74
+ cwd,
75
+ pm,
76
+ hasTestScript,
77
+ testScript,
78
+ name: pkg.name ?? path.basename(cwd),
79
+ };
80
+ }
81
+ // ── Step helpers ──
82
+ function ok(msg) {
83
+ console.log(` ✔ ${msg}`);
84
+ }
85
+ function skip(msg) {
86
+ console.log(` – ${msg}`);
87
+ }
88
+ // ── 1. Create evals/ + baseline.json ──
89
+ function createBaseline(cwd, project) {
90
+ const evalsDir = path.join(cwd, "evals");
91
+ const baselinePath = path.join(evalsDir, "baseline.json");
92
+ if (fs.existsSync(baselinePath)) {
93
+ skip("evals/baseline.json already exists");
94
+ return true;
95
+ }
96
+ if (!fs.existsSync(evalsDir)) {
97
+ fs.mkdirSync(evalsDir, { recursive: true });
98
+ }
99
+ const user = process.env.USER || process.env.USERNAME || "unknown";
100
+ const now = new Date().toISOString();
101
+ // Run tests to capture real count if possible
102
+ let testTotal = 0;
103
+ let testsPassed = true;
104
+ if (project.hasTestScript) {
105
+ const isWin = process.platform === "win32";
106
+ const result = (0, node_child_process_1.spawnSync)(project.pm, ["test"], {
107
+ cwd,
108
+ stdio: "pipe",
109
+ shell: isWin,
110
+ timeout: 120000,
111
+ });
112
+ testsPassed = result.status === 0;
113
+ // Try to extract test count from output
114
+ const output = (result.stdout?.toString() ?? "") + (result.stderr?.toString() ?? "");
115
+ const countMatch = output.match(/(\d+)\s+(?:tests?|specs?)\s+(?:passed|completed)/i) ??
116
+ output.match(/Tests:\s+(\d+)\s+passed/i) ??
117
+ output.match(/(\d+)\s+passing/i);
118
+ if (countMatch)
119
+ testTotal = parseInt(countMatch[1], 10);
120
+ }
121
+ const baseline = {
122
+ schemaVersion: 1,
123
+ description: `Regression gate baseline for ${project.name}`,
124
+ generatedAt: now,
125
+ generatedBy: user,
126
+ commitSha: getHeadSha(cwd),
127
+ updatedAt: now,
128
+ updatedBy: user,
129
+ tolerance: {
130
+ scoreDrop: 5,
131
+ passRateDrop: 5,
132
+ maxLatencyIncreaseMs: 200,
133
+ maxCostIncreaseUsd: 0.05,
134
+ },
135
+ goldenEval: {
136
+ score: 100,
137
+ passRate: 100,
138
+ totalCases: 3,
139
+ passedCases: 3,
140
+ },
141
+ confidenceTests: {
142
+ passed: testsPassed,
143
+ total: testTotal,
144
+ },
145
+ productMetrics: {},
146
+ };
147
+ fs.writeFileSync(baselinePath, `${JSON.stringify(baseline, null, 2)}\n`);
148
+ ok("Created evals/baseline.json");
149
+ return true;
150
+ }
151
+ function getHeadSha(cwd) {
152
+ try {
153
+ const result = (0, node_child_process_1.spawnSync)("git", ["rev-parse", "--short", "HEAD"], {
154
+ cwd,
155
+ stdio: "pipe",
156
+ });
157
+ return result.stdout?.toString().trim() || "0000000";
158
+ }
159
+ catch {
160
+ return "0000000";
161
+ }
162
+ }
163
+ // ── 2. Install GitHub Actions workflow ──
164
+ function installWorkflow(cwd, project) {
165
+ const workflowDir = path.join(cwd, ".github", "workflows");
166
+ const workflowPath = path.join(workflowDir, "evalgate-gate.yml");
167
+ if (fs.existsSync(workflowPath)) {
168
+ skip(".github/workflows/evalgate-gate.yml already exists");
169
+ return true;
170
+ }
171
+ if (!fs.existsSync(workflowDir)) {
172
+ fs.mkdirSync(workflowDir, { recursive: true });
173
+ }
174
+ const installCmd = project.pm === "pnpm"
175
+ ? "pnpm install --frozen-lockfile"
176
+ : project.pm === "yarn"
177
+ ? "yarn install --frozen-lockfile"
178
+ : "npm ci";
179
+ const setupSteps = project.pm === "pnpm"
180
+ ? ` - uses: pnpm/action-setup@v4
181
+ - uses: actions/setup-node@v4
182
+ with:
183
+ node-version: '20'
184
+ cache: pnpm
185
+ - run: ${installCmd}`
186
+ : ` - uses: actions/setup-node@v4
187
+ with:
188
+ node-version: '20'
189
+ cache: ${project.pm}
190
+ - run: ${installCmd}`;
191
+ const workflow = `# EvalGate Regression Gate
192
+ # Auto-generated by: npx evalgate init
193
+ # Blocks PRs that regress test health.
194
+ name: EvalGate Gate
195
+
196
+ on:
197
+ pull_request:
198
+ branches: [main]
199
+
200
+ concurrency:
201
+ group: evalgate-\${{ github.ref }}
202
+ cancel-in-progress: true
203
+
204
+ jobs:
205
+ regression-gate:
206
+ runs-on: ubuntu-latest
207
+ steps:
208
+ - uses: actions/checkout@v4
209
+ ${setupSteps}
210
+ - name: EvalGate Doctor (preflight)
211
+ continue-on-error: true # Strict: set to false, or use: evalgate doctor --strict
212
+ run: npx -y @evalgate/sdk@^2 doctor
213
+
214
+ - name: EvalGate Regression Gate
215
+ run: npx -y @evalgate/sdk@^2 gate --format github
216
+
217
+ - name: Upload report
218
+ if: always()
219
+ uses: actions/upload-artifact@v4
220
+ with:
221
+ name: evalgate-report
222
+ path: |
223
+ evals/regression-report.json
224
+ .evalgate/last-report.json
225
+ if-no-files-found: ignore
226
+ `;
227
+ fs.writeFileSync(workflowPath, workflow);
228
+ ok("Created .github/workflows/evalgate-gate.yml");
229
+ return true;
230
+ }
231
+ // ── 3. Create evalgate.config.json ──
232
+ function createConfig(cwd) {
233
+ const configPath = path.join(cwd, "evalgate.config.json");
234
+ if (fs.existsSync(configPath)) {
235
+ skip("evalgate.config.json already exists");
236
+ return true;
237
+ }
238
+ const config = {
239
+ evaluationId: "",
240
+ gate: {
241
+ baseline: "evals/baseline.json",
242
+ report: "evals/regression-report.json",
243
+ },
244
+ };
245
+ fs.writeFileSync(configPath, `${JSON.stringify(config, null, 2)}\n`);
246
+ ok("Created evalgate.config.json");
247
+ return true;
248
+ }
249
+ // ── Main ──
250
+ function runInit(cwd = process.cwd()) {
251
+ console.log("");
252
+ console.log(" evalgate init — setting up regression gate\n");
253
+ // Detect
254
+ const project = detectProject(cwd);
255
+ if (!project) {
256
+ console.error(" ✖ No package.json found. Run this from a Node.js project root.");
257
+ return false;
258
+ }
259
+ ok(`Detected ${project.pm} project: ${project.name}`);
260
+ if (!project.hasTestScript) {
261
+ console.log(` ⚠ No test script found in package.json`);
262
+ console.log(` The gate will still work — add a "test" script later for full coverage.\n`);
263
+ }
264
+ // Scaffold
265
+ createBaseline(cwd, project);
266
+ installWorkflow(cwd, project);
267
+ createConfig(cwd);
268
+ // Next steps
269
+ console.log("");
270
+ console.log(" Done! Next:");
271
+ console.log("");
272
+ console.log(" npx evalgate doctor Verify your setup is complete");
273
+ console.log("");
274
+ console.log(" Then commit:");
275
+ console.log("");
276
+ console.log(" git add evals/ .github/workflows/evalgate-gate.yml evalgate.config.json");
277
+ console.log(" git commit -m 'chore: add EvalGate regression gate'");
278
+ console.log(" git push");
279
+ console.log("");
280
+ console.log(" That's it. Open a PR and the gate runs automatically.");
281
+ console.log("");
282
+ console.log(" Commands:");
283
+ console.log(" npx evalgate doctor Preflight check — verify config, baseline, CI");
284
+ console.log(" npx evalgate gate Run regression gate locally");
285
+ console.log(" npx evalgate check API-based gate (requires account)");
286
+ console.log(" npx evalgate explain Explain last failure with root causes + fixes");
287
+ console.log(" npx evalgate baseline update Update baseline after intentional changes");
288
+ console.log("");
289
+ console.log(" To remove: delete evals/, evalgate.config.json, and .github/workflows/evalgate-gate.yml");
290
+ console.log("");
291
+ return true;
292
+ }