@evalgate/sdk 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/CHANGELOG.md +638 -0
  2. package/README.md +398 -0
  3. package/dist/assertions.d.ts +189 -0
  4. package/dist/assertions.js +662 -0
  5. package/dist/batch.d.ts +68 -0
  6. package/dist/batch.js +179 -0
  7. package/dist/cache.d.ts +65 -0
  8. package/dist/cache.js +131 -0
  9. package/dist/cli/api.d.ts +108 -0
  10. package/dist/cli/api.js +132 -0
  11. package/dist/cli/baseline.d.ts +10 -0
  12. package/dist/cli/baseline.js +172 -0
  13. package/dist/cli/check.d.ts +73 -0
  14. package/dist/cli/check.js +355 -0
  15. package/dist/cli/ci-context.d.ts +6 -0
  16. package/dist/cli/ci-context.js +112 -0
  17. package/dist/cli/ci.d.ts +45 -0
  18. package/dist/cli/ci.js +192 -0
  19. package/dist/cli/config.d.ts +30 -0
  20. package/dist/cli/config.js +230 -0
  21. package/dist/cli/constants.d.ts +15 -0
  22. package/dist/cli/constants.js +18 -0
  23. package/dist/cli/diff.d.ts +173 -0
  24. package/dist/cli/diff.js +685 -0
  25. package/dist/cli/discover.d.ts +84 -0
  26. package/dist/cli/discover.js +419 -0
  27. package/dist/cli/doctor.d.ts +88 -0
  28. package/dist/cli/doctor.js +675 -0
  29. package/dist/cli/env.d.ts +21 -0
  30. package/dist/cli/env.js +42 -0
  31. package/dist/cli/explain.d.ts +58 -0
  32. package/dist/cli/explain.js +561 -0
  33. package/dist/cli/formatters/github.d.ts +8 -0
  34. package/dist/cli/formatters/github.js +135 -0
  35. package/dist/cli/formatters/human.d.ts +6 -0
  36. package/dist/cli/formatters/human.js +110 -0
  37. package/dist/cli/formatters/json.d.ts +6 -0
  38. package/dist/cli/formatters/json.js +10 -0
  39. package/dist/cli/formatters/pr-comment.d.ts +12 -0
  40. package/dist/cli/formatters/pr-comment.js +103 -0
  41. package/dist/cli/formatters/types.d.ts +103 -0
  42. package/dist/cli/formatters/types.js +8 -0
  43. package/dist/cli/gate.d.ts +21 -0
  44. package/dist/cli/gate.js +179 -0
  45. package/dist/cli/impact-analysis.d.ts +63 -0
  46. package/dist/cli/impact-analysis.js +252 -0
  47. package/dist/cli/index.d.ts +9 -0
  48. package/dist/cli/index.js +332 -0
  49. package/dist/cli/init.d.ts +16 -0
  50. package/dist/cli/init.js +292 -0
  51. package/dist/cli/manifest.d.ts +103 -0
  52. package/dist/cli/manifest.js +282 -0
  53. package/dist/cli/migrate.d.ts +41 -0
  54. package/dist/cli/migrate.js +349 -0
  55. package/dist/cli/policy-packs.d.ts +23 -0
  56. package/dist/cli/policy-packs.js +89 -0
  57. package/dist/cli/print-config.d.ts +29 -0
  58. package/dist/cli/print-config.js +270 -0
  59. package/dist/cli/profiles.d.ts +28 -0
  60. package/dist/cli/profiles.js +30 -0
  61. package/dist/cli/reason-codes.d.ts +17 -0
  62. package/dist/cli/reason-codes.js +19 -0
  63. package/dist/cli/regression-gate.d.ts +15 -0
  64. package/dist/cli/regression-gate.js +341 -0
  65. package/dist/cli/render/snippet.d.ts +5 -0
  66. package/dist/cli/render/snippet.js +15 -0
  67. package/dist/cli/render/sort.d.ts +10 -0
  68. package/dist/cli/render/sort.js +24 -0
  69. package/dist/cli/report/build-check-report.d.ts +19 -0
  70. package/dist/cli/report/build-check-report.js +132 -0
  71. package/dist/cli/run.d.ts +101 -0
  72. package/dist/cli/run.js +395 -0
  73. package/dist/cli/share.d.ts +17 -0
  74. package/dist/cli/share.js +91 -0
  75. package/dist/cli/upgrade.d.ts +15 -0
  76. package/dist/cli/upgrade.js +492 -0
  77. package/dist/cli/workspace.d.ts +31 -0
  78. package/dist/cli/workspace.js +68 -0
  79. package/dist/client.d.ts +368 -0
  80. package/dist/client.js +893 -0
  81. package/dist/client.request.test.d.ts +1 -0
  82. package/dist/client.request.test.js +232 -0
  83. package/dist/context.d.ts +134 -0
  84. package/dist/context.js +215 -0
  85. package/dist/errors.d.ts +82 -0
  86. package/dist/errors.js +298 -0
  87. package/dist/export.d.ts +195 -0
  88. package/dist/export.js +344 -0
  89. package/dist/index.d.ts +44 -0
  90. package/dist/index.js +153 -0
  91. package/dist/integrations/anthropic.d.ts +91 -0
  92. package/dist/integrations/anthropic.js +163 -0
  93. package/dist/integrations/openai-eval.d.ts +57 -0
  94. package/dist/integrations/openai-eval.js +232 -0
  95. package/dist/integrations/openai.d.ts +92 -0
  96. package/dist/integrations/openai.js +160 -0
  97. package/dist/local.d.ts +39 -0
  98. package/dist/local.js +148 -0
  99. package/dist/logger.d.ts +128 -0
  100. package/dist/logger.js +227 -0
  101. package/dist/matchers/index.d.ts +1 -0
  102. package/dist/matchers/index.js +6 -0
  103. package/dist/matchers/to-pass-gate.d.ts +29 -0
  104. package/dist/matchers/to-pass-gate.js +35 -0
  105. package/dist/pagination.d.ts +74 -0
  106. package/dist/pagination.js +139 -0
  107. package/dist/regression.d.ts +100 -0
  108. package/dist/regression.js +44 -0
  109. package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
  110. package/dist/runtime/adapters/config-to-dsl.js +400 -0
  111. package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
  112. package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
  113. package/dist/runtime/context.d.ts +26 -0
  114. package/dist/runtime/context.js +74 -0
  115. package/dist/runtime/eval.d.ts +46 -0
  116. package/dist/runtime/eval.js +244 -0
  117. package/dist/runtime/execution-mode.d.ts +80 -0
  118. package/dist/runtime/execution-mode.js +357 -0
  119. package/dist/runtime/executor.d.ts +16 -0
  120. package/dist/runtime/executor.js +152 -0
  121. package/dist/runtime/registry.d.ts +78 -0
  122. package/dist/runtime/registry.js +403 -0
  123. package/dist/runtime/run-report.d.ts +200 -0
  124. package/dist/runtime/run-report.js +222 -0
  125. package/dist/runtime/types.d.ts +356 -0
  126. package/dist/runtime/types.js +76 -0
  127. package/dist/snapshot.d.ts +176 -0
  128. package/dist/snapshot.js +322 -0
  129. package/dist/streaming.d.ts +173 -0
  130. package/dist/streaming.js +268 -0
  131. package/dist/testing.d.ts +273 -0
  132. package/dist/testing.js +317 -0
  133. package/dist/types.d.ts +754 -0
  134. package/dist/types.js +54 -0
  135. package/dist/utils/input-hash.d.ts +8 -0
  136. package/dist/utils/input-hash.js +41 -0
  137. package/dist/version.d.ts +7 -0
  138. package/dist/version.js +10 -0
  139. package/dist/workflows.d.ts +389 -0
  140. package/dist/workflows.js +671 -0
  141. package/package.json +117 -0
@@ -0,0 +1,270 @@
1
+ "use strict";
2
+ /**
3
+ * evalgate print-config — Show resolved configuration with source-of-truth annotations.
4
+ *
5
+ * Prints every config field, where it came from (file, env, default, CLI arg),
6
+ * and redacts secrets. Useful for debugging "why is it using this baseUrl?"
7
+ *
8
+ * Usage:
9
+ * evalgate print-config
10
+ * evalgate print-config --format json
11
+ *
12
+ * Exit codes:
13
+ * 0 — Always (informational only)
14
+ */
15
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
16
+ if (k2 === undefined) k2 = k;
17
+ var desc = Object.getOwnPropertyDescriptor(m, k);
18
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
19
+ desc = { enumerable: true, get: function() { return m[k]; } };
20
+ }
21
+ Object.defineProperty(o, k2, desc);
22
+ }) : (function(o, m, k, k2) {
23
+ if (k2 === undefined) k2 = k;
24
+ o[k2] = m[k];
25
+ }));
26
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
27
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
28
+ }) : function(o, v) {
29
+ o["default"] = v;
30
+ });
31
+ var __importStar = (this && this.__importStar) || (function () {
32
+ var ownKeys = function(o) {
33
+ ownKeys = Object.getOwnPropertyNames || function (o) {
34
+ var ar = [];
35
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
36
+ return ar;
37
+ };
38
+ return ownKeys(o);
39
+ };
40
+ return function (mod) {
41
+ if (mod && mod.__esModule) return mod;
42
+ var result = {};
43
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
44
+ __setModuleDefault(result, mod);
45
+ return result;
46
+ };
47
+ })();
48
+ Object.defineProperty(exports, "__esModule", { value: true });
49
+ exports.runPrintConfig = runPrintConfig;
50
+ const path = __importStar(require("node:path"));
51
+ const version_1 = require("../version");
52
+ const config_1 = require("./config");
53
+ const profiles_1 = require("./profiles");
54
+ function parseFlags(argv) {
55
+ const raw = {};
56
+ for (let i = 0; i < argv.length; i++) {
57
+ const arg = argv[i];
58
+ if (arg.startsWith("--")) {
59
+ const key = arg.slice(2);
60
+ const next = argv[i + 1];
61
+ if (next !== undefined && !next.startsWith("--")) {
62
+ raw[key] = next;
63
+ i++;
64
+ }
65
+ else {
66
+ raw[key] = "true";
67
+ }
68
+ }
69
+ }
70
+ return {
71
+ format: raw.format === "json" ? "json" : "human",
72
+ evaluationId: raw.evaluationId,
73
+ baseUrl: raw.baseUrl,
74
+ apiKey: raw.apiKey,
75
+ baseline: raw.baseline,
76
+ profile: raw.profile,
77
+ minScore: raw.minScore,
78
+ maxDrop: raw.maxDrop,
79
+ warnDrop: raw.warnDrop,
80
+ minN: raw.minN,
81
+ };
82
+ }
83
+ // ── Helpers ──
84
+ function redact(value) {
85
+ if (!value)
86
+ return null;
87
+ if (value.length > 8)
88
+ return `${value.slice(0, 4)}...${value.slice(-4)}`;
89
+ return "****";
90
+ }
91
+ // ── Build resolved config ──
92
+ function buildResolvedConfig(cwd, flags) {
93
+ const configPath = (0, config_1.findConfigPath)(cwd);
94
+ const fileConfig = (0, config_1.loadConfig)(cwd);
95
+ // Build CLI args object (only what was explicitly passed)
96
+ const cliArgs = {};
97
+ if (flags.evaluationId)
98
+ cliArgs.evaluationId = flags.evaluationId;
99
+ if (flags.baseUrl)
100
+ cliArgs.baseUrl = flags.baseUrl;
101
+ if (flags.baseline)
102
+ cliArgs.baseline = flags.baseline;
103
+ if (flags.profile)
104
+ cliArgs.profile = flags.profile;
105
+ if (flags.minScore)
106
+ cliArgs.minScore = flags.minScore;
107
+ if (flags.maxDrop)
108
+ cliArgs.maxDrop = flags.maxDrop;
109
+ if (flags.warnDrop)
110
+ cliArgs.warnDrop = flags.warnDrop;
111
+ if (flags.minN)
112
+ cliArgs.minN = flags.minN;
113
+ const merged = (0, config_1.mergeConfigWithArgs)(fileConfig, cliArgs);
114
+ // Determine source of each field
115
+ const fields = [];
116
+ // evaluationId
117
+ const evalIdSource = flags.evaluationId
118
+ ? "arg"
119
+ : fileConfig?.evaluationId
120
+ ? "file"
121
+ : "default";
122
+ fields.push({
123
+ key: "evaluationId",
124
+ value: merged.evaluationId ?? null,
125
+ source: evalIdSource,
126
+ });
127
+ // baseUrl
128
+ const envBaseUrl = process.env.EVALGATE_BASE_URL;
129
+ const baseUrlSource = flags.baseUrl
130
+ ? "arg"
131
+ : envBaseUrl
132
+ ? "env"
133
+ : fileConfig?.baseUrl
134
+ ? "file"
135
+ : "default";
136
+ fields.push({
137
+ key: "baseUrl",
138
+ value: flags.baseUrl ||
139
+ envBaseUrl ||
140
+ fileConfig?.baseUrl ||
141
+ "http://localhost:3000",
142
+ source: baseUrlSource,
143
+ });
144
+ // apiKey (always redacted)
145
+ const envApiKey = process.env.EVALGATE_API_KEY;
146
+ const rawApiKey = flags.apiKey || envApiKey || "";
147
+ const apiKeySource = flags.apiKey
148
+ ? "arg"
149
+ : envApiKey
150
+ ? "env"
151
+ : "default";
152
+ fields.push({
153
+ key: "apiKey",
154
+ value: redact(rawApiKey) ?? "(not set)",
155
+ source: apiKeySource,
156
+ raw: rawApiKey ? "(redacted)" : undefined,
157
+ });
158
+ // profile
159
+ const profileName = (flags.profile || fileConfig?.profile);
160
+ const profileSource = flags.profile
161
+ ? "arg"
162
+ : fileConfig?.profile
163
+ ? "file"
164
+ : "default";
165
+ fields.push({
166
+ key: "profile",
167
+ value: profileName ?? null,
168
+ source: profileSource,
169
+ });
170
+ // Numeric gate fields: minScore, maxDrop, warnDrop, minN, allowWeakEvidence
171
+ const numericFields = [
172
+ { key: "minScore" },
173
+ { key: "maxDrop" },
174
+ { key: "warnDrop" },
175
+ { key: "minN" },
176
+ { key: "allowWeakEvidence" },
177
+ ];
178
+ for (const { key } of numericFields) {
179
+ const argVal = cliArgs[key];
180
+ const fileVal = fileConfig?.[key];
181
+ const profileVal = profileName && profileName in profiles_1.PROFILES
182
+ ? profiles_1.PROFILES[profileName][key]
183
+ : undefined;
184
+ const source = argVal !== undefined
185
+ ? "arg"
186
+ : fileVal !== undefined
187
+ ? "file"
188
+ : profileVal !== undefined
189
+ ? "profile"
190
+ : "default";
191
+ fields.push({
192
+ key,
193
+ value: merged[key] ?? null,
194
+ source,
195
+ });
196
+ }
197
+ // baseline
198
+ const baselineSource = flags.baseline
199
+ ? "arg"
200
+ : fileConfig?.baseline
201
+ ? "file"
202
+ : "default";
203
+ fields.push({
204
+ key: "baseline",
205
+ value: merged.baseline ?? "published",
206
+ source: baselineSource,
207
+ });
208
+ // Environment variables summary
209
+ const envVars = {
210
+ EVALGATE_API_KEY: redact(envApiKey),
211
+ EVALGATE_BASE_URL: envBaseUrl ?? null,
212
+ OPENAI_API_KEY: redact(process.env.OPENAI_API_KEY),
213
+ ANTHROPIC_API_KEY: redact(process.env.ANTHROPIC_API_KEY),
214
+ AZURE_OPENAI_API_KEY: redact(process.env.AZURE_OPENAI_API_KEY),
215
+ GITHUB_ACTIONS: process.env.GITHUB_ACTIONS ?? null,
216
+ CI: process.env.CI ?? null,
217
+ };
218
+ return {
219
+ cliVersion: version_1.SDK_VERSION,
220
+ configFile: configPath ? path.relative(cwd, configPath) : null,
221
+ cwd,
222
+ resolved: fields,
223
+ env: envVars,
224
+ };
225
+ }
226
+ // ── Output formatting ──
227
+ function printHuman(output) {
228
+ console.log("\n evalgate print-config\n");
229
+ console.log(` CLI version: ${output.cliVersion}`);
230
+ console.log(` Config file: ${output.configFile ?? "(none found)"}`);
231
+ console.log(` Working dir: ${output.cwd}`);
232
+ console.log("");
233
+ console.log(" Resolved configuration:");
234
+ console.log("");
235
+ const maxKeyLen = Math.max(...output.resolved.map((f) => f.key.length));
236
+ for (const field of output.resolved) {
237
+ const val = field.value === null ? "(not set)" : String(field.value);
238
+ const pad = " ".repeat(maxKeyLen - field.key.length);
239
+ const sourceTag = `[${field.source}]`;
240
+ console.log(` ${field.key}${pad} ${val} ${sourceTag}`);
241
+ }
242
+ console.log("");
243
+ console.log(" Environment variables:");
244
+ console.log("");
245
+ for (const [key, val] of Object.entries(output.env)) {
246
+ if (val !== null) {
247
+ console.log(` ${key} = ${val}`);
248
+ }
249
+ }
250
+ const unsetEnv = Object.entries(output.env)
251
+ .filter(([, v]) => v === null)
252
+ .map(([k]) => k);
253
+ if (unsetEnv.length > 0) {
254
+ console.log(` (not set: ${unsetEnv.join(", ")})`);
255
+ }
256
+ console.log("");
257
+ }
258
+ // ── Main ──
259
+ function runPrintConfig(argv) {
260
+ const flags = parseFlags(argv);
261
+ const cwd = process.cwd();
262
+ const output = buildResolvedConfig(cwd, flags);
263
+ if (flags.format === "json") {
264
+ console.log(JSON.stringify(output, null, 2));
265
+ }
266
+ else {
267
+ printHuman(output);
268
+ }
269
+ return 0;
270
+ }
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Gate profile presets: strict, balanced, fast.
3
+ * Profiles override flags unless explicitly set.
4
+ */
5
+ export declare const PROFILES: {
6
+ readonly strict: {
7
+ readonly minScore: 95;
8
+ readonly maxDrop: 0;
9
+ readonly warnDrop: 0;
10
+ readonly minN: 30;
11
+ readonly allowWeakEvidence: false;
12
+ };
13
+ readonly balanced: {
14
+ readonly minScore: 90;
15
+ readonly maxDrop: 2;
16
+ readonly warnDrop: 1;
17
+ readonly minN: 10;
18
+ readonly allowWeakEvidence: false;
19
+ };
20
+ readonly fast: {
21
+ readonly minScore: 85;
22
+ readonly maxDrop: 5;
23
+ readonly warnDrop: 2;
24
+ readonly minN: 5;
25
+ readonly allowWeakEvidence: true;
26
+ };
27
+ };
28
+ export type ProfileName = keyof typeof PROFILES;
@@ -0,0 +1,30 @@
1
+ "use strict";
2
+ /**
3
+ * Gate profile presets: strict, balanced, fast.
4
+ * Profiles override flags unless explicitly set.
5
+ */
6
+ Object.defineProperty(exports, "__esModule", { value: true });
7
+ exports.PROFILES = void 0;
8
+ exports.PROFILES = {
9
+ strict: {
10
+ minScore: 95,
11
+ maxDrop: 0,
12
+ warnDrop: 0,
13
+ minN: 30,
14
+ allowWeakEvidence: false,
15
+ },
16
+ balanced: {
17
+ minScore: 90,
18
+ maxDrop: 2,
19
+ warnDrop: 1,
20
+ minN: 10,
21
+ allowWeakEvidence: false,
22
+ },
23
+ fast: {
24
+ minScore: 85,
25
+ maxDrop: 5,
26
+ warnDrop: 2,
27
+ minN: 5,
28
+ allowWeakEvidence: true,
29
+ },
30
+ };
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Canonical reason codes for gate evaluation.
3
+ * Used by gate.ts and formatters for consistent failure classification.
4
+ */
5
+ export declare const REASON_CODES: {
6
+ readonly PASS: "PASS";
7
+ readonly WARN_REGRESSION: "WARN_REGRESSION";
8
+ readonly LOW_SAMPLE_SIZE: "LOW_SAMPLE_SIZE";
9
+ readonly BASELINE_MISSING: "BASELINE_MISSING";
10
+ readonly SCORE_TOO_LOW: "SCORE_TOO_LOW";
11
+ readonly DELTA_TOO_HIGH: "DELTA_TOO_HIGH";
12
+ readonly COST_BUDGET_EXCEEDED: "COST_BUDGET_EXCEEDED";
13
+ readonly LATENCY_BUDGET_EXCEEDED: "LATENCY_BUDGET_EXCEEDED";
14
+ readonly POLICY_FAILED: "POLICY_FAILED";
15
+ readonly UNKNOWN: "UNKNOWN";
16
+ };
17
+ export type ReasonCode = (typeof REASON_CODES)[keyof typeof REASON_CODES];
@@ -0,0 +1,19 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.REASON_CODES = void 0;
4
+ /**
5
+ * Canonical reason codes for gate evaluation.
6
+ * Used by gate.ts and formatters for consistent failure classification.
7
+ */
8
+ exports.REASON_CODES = {
9
+ PASS: "PASS",
10
+ WARN_REGRESSION: "WARN_REGRESSION",
11
+ LOW_SAMPLE_SIZE: "LOW_SAMPLE_SIZE",
12
+ BASELINE_MISSING: "BASELINE_MISSING",
13
+ SCORE_TOO_LOW: "SCORE_TOO_LOW",
14
+ DELTA_TOO_HIGH: "DELTA_TOO_HIGH",
15
+ COST_BUDGET_EXCEEDED: "COST_BUDGET_EXCEEDED",
16
+ LATENCY_BUDGET_EXCEEDED: "LATENCY_BUDGET_EXCEEDED",
17
+ POLICY_FAILED: "POLICY_FAILED",
18
+ UNKNOWN: "UNKNOWN",
19
+ };
@@ -0,0 +1,15 @@
1
+ /**
2
+ * evalgate gate — Run the regression gate
3
+ *
4
+ * Two modes:
5
+ * 1. Project mode: delegates to eval:regression-gate npm script (full gate)
6
+ * 2. Built-in mode: runs `npm test`, compares against evals/baseline.json
7
+ *
8
+ * Built-in mode activates when no eval:regression-gate script is defined,
9
+ * making `npx evalgate gate` work for any project after `npx evalgate init`.
10
+ */
11
+ export interface GateArgs {
12
+ format: "human" | "json" | "github";
13
+ }
14
+ export declare function parseGateArgs(argv: string[]): GateArgs;
15
+ export declare function runGate(argv: string[]): number;