@evalgate/sdk 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/CHANGELOG.md +638 -0
  2. package/README.md +398 -0
  3. package/dist/assertions.d.ts +189 -0
  4. package/dist/assertions.js +662 -0
  5. package/dist/batch.d.ts +68 -0
  6. package/dist/batch.js +179 -0
  7. package/dist/cache.d.ts +65 -0
  8. package/dist/cache.js +131 -0
  9. package/dist/cli/api.d.ts +108 -0
  10. package/dist/cli/api.js +132 -0
  11. package/dist/cli/baseline.d.ts +10 -0
  12. package/dist/cli/baseline.js +172 -0
  13. package/dist/cli/check.d.ts +73 -0
  14. package/dist/cli/check.js +355 -0
  15. package/dist/cli/ci-context.d.ts +6 -0
  16. package/dist/cli/ci-context.js +112 -0
  17. package/dist/cli/ci.d.ts +45 -0
  18. package/dist/cli/ci.js +192 -0
  19. package/dist/cli/config.d.ts +30 -0
  20. package/dist/cli/config.js +230 -0
  21. package/dist/cli/constants.d.ts +15 -0
  22. package/dist/cli/constants.js +18 -0
  23. package/dist/cli/diff.d.ts +173 -0
  24. package/dist/cli/diff.js +685 -0
  25. package/dist/cli/discover.d.ts +84 -0
  26. package/dist/cli/discover.js +419 -0
  27. package/dist/cli/doctor.d.ts +88 -0
  28. package/dist/cli/doctor.js +675 -0
  29. package/dist/cli/env.d.ts +21 -0
  30. package/dist/cli/env.js +42 -0
  31. package/dist/cli/explain.d.ts +58 -0
  32. package/dist/cli/explain.js +561 -0
  33. package/dist/cli/formatters/github.d.ts +8 -0
  34. package/dist/cli/formatters/github.js +135 -0
  35. package/dist/cli/formatters/human.d.ts +6 -0
  36. package/dist/cli/formatters/human.js +110 -0
  37. package/dist/cli/formatters/json.d.ts +6 -0
  38. package/dist/cli/formatters/json.js +10 -0
  39. package/dist/cli/formatters/pr-comment.d.ts +12 -0
  40. package/dist/cli/formatters/pr-comment.js +103 -0
  41. package/dist/cli/formatters/types.d.ts +103 -0
  42. package/dist/cli/formatters/types.js +8 -0
  43. package/dist/cli/gate.d.ts +21 -0
  44. package/dist/cli/gate.js +179 -0
  45. package/dist/cli/impact-analysis.d.ts +63 -0
  46. package/dist/cli/impact-analysis.js +252 -0
  47. package/dist/cli/index.d.ts +9 -0
  48. package/dist/cli/index.js +332 -0
  49. package/dist/cli/init.d.ts +16 -0
  50. package/dist/cli/init.js +292 -0
  51. package/dist/cli/manifest.d.ts +103 -0
  52. package/dist/cli/manifest.js +282 -0
  53. package/dist/cli/migrate.d.ts +41 -0
  54. package/dist/cli/migrate.js +349 -0
  55. package/dist/cli/policy-packs.d.ts +23 -0
  56. package/dist/cli/policy-packs.js +89 -0
  57. package/dist/cli/print-config.d.ts +29 -0
  58. package/dist/cli/print-config.js +270 -0
  59. package/dist/cli/profiles.d.ts +28 -0
  60. package/dist/cli/profiles.js +30 -0
  61. package/dist/cli/reason-codes.d.ts +17 -0
  62. package/dist/cli/reason-codes.js +19 -0
  63. package/dist/cli/regression-gate.d.ts +15 -0
  64. package/dist/cli/regression-gate.js +341 -0
  65. package/dist/cli/render/snippet.d.ts +5 -0
  66. package/dist/cli/render/snippet.js +15 -0
  67. package/dist/cli/render/sort.d.ts +10 -0
  68. package/dist/cli/render/sort.js +24 -0
  69. package/dist/cli/report/build-check-report.d.ts +19 -0
  70. package/dist/cli/report/build-check-report.js +132 -0
  71. package/dist/cli/run.d.ts +101 -0
  72. package/dist/cli/run.js +395 -0
  73. package/dist/cli/share.d.ts +17 -0
  74. package/dist/cli/share.js +91 -0
  75. package/dist/cli/upgrade.d.ts +15 -0
  76. package/dist/cli/upgrade.js +492 -0
  77. package/dist/cli/workspace.d.ts +31 -0
  78. package/dist/cli/workspace.js +68 -0
  79. package/dist/client.d.ts +368 -0
  80. package/dist/client.js +893 -0
  81. package/dist/client.request.test.d.ts +1 -0
  82. package/dist/client.request.test.js +232 -0
  83. package/dist/context.d.ts +134 -0
  84. package/dist/context.js +215 -0
  85. package/dist/errors.d.ts +82 -0
  86. package/dist/errors.js +298 -0
  87. package/dist/export.d.ts +195 -0
  88. package/dist/export.js +344 -0
  89. package/dist/index.d.ts +44 -0
  90. package/dist/index.js +153 -0
  91. package/dist/integrations/anthropic.d.ts +91 -0
  92. package/dist/integrations/anthropic.js +163 -0
  93. package/dist/integrations/openai-eval.d.ts +57 -0
  94. package/dist/integrations/openai-eval.js +232 -0
  95. package/dist/integrations/openai.d.ts +92 -0
  96. package/dist/integrations/openai.js +160 -0
  97. package/dist/local.d.ts +39 -0
  98. package/dist/local.js +148 -0
  99. package/dist/logger.d.ts +128 -0
  100. package/dist/logger.js +227 -0
  101. package/dist/matchers/index.d.ts +1 -0
  102. package/dist/matchers/index.js +6 -0
  103. package/dist/matchers/to-pass-gate.d.ts +29 -0
  104. package/dist/matchers/to-pass-gate.js +35 -0
  105. package/dist/pagination.d.ts +74 -0
  106. package/dist/pagination.js +139 -0
  107. package/dist/regression.d.ts +100 -0
  108. package/dist/regression.js +44 -0
  109. package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
  110. package/dist/runtime/adapters/config-to-dsl.js +400 -0
  111. package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
  112. package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
  113. package/dist/runtime/context.d.ts +26 -0
  114. package/dist/runtime/context.js +74 -0
  115. package/dist/runtime/eval.d.ts +46 -0
  116. package/dist/runtime/eval.js +244 -0
  117. package/dist/runtime/execution-mode.d.ts +80 -0
  118. package/dist/runtime/execution-mode.js +357 -0
  119. package/dist/runtime/executor.d.ts +16 -0
  120. package/dist/runtime/executor.js +152 -0
  121. package/dist/runtime/registry.d.ts +78 -0
  122. package/dist/runtime/registry.js +403 -0
  123. package/dist/runtime/run-report.d.ts +200 -0
  124. package/dist/runtime/run-report.js +222 -0
  125. package/dist/runtime/types.d.ts +356 -0
  126. package/dist/runtime/types.js +76 -0
  127. package/dist/snapshot.d.ts +176 -0
  128. package/dist/snapshot.js +322 -0
  129. package/dist/streaming.d.ts +173 -0
  130. package/dist/streaming.js +268 -0
  131. package/dist/testing.d.ts +273 -0
  132. package/dist/testing.js +317 -0
  133. package/dist/types.d.ts +754 -0
  134. package/dist/types.js +54 -0
  135. package/dist/utils/input-hash.d.ts +8 -0
  136. package/dist/utils/input-hash.js +41 -0
  137. package/dist/version.d.ts +7 -0
  138. package/dist/version.js +10 -0
  139. package/dist/workflows.d.ts +389 -0
  140. package/dist/workflows.js +671 -0
  141. package/package.json +117 -0
@@ -0,0 +1,172 @@
1
+ "use strict";
2
+ /**
3
+ * evalai baseline — Baseline management commands
4
+ *
5
+ * Subcommands:
6
+ * evalgate baseline init — Create a starter evals/baseline.json
7
+ * evalgate baseline update — Run tests + update baseline with real scores
8
+ */
9
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ var desc = Object.getOwnPropertyDescriptor(m, k);
12
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
13
+ desc = { enumerable: true, get: function() { return m[k]; } };
14
+ }
15
+ Object.defineProperty(o, k2, desc);
16
+ }) : (function(o, m, k, k2) {
17
+ if (k2 === undefined) k2 = k;
18
+ o[k2] = m[k];
19
+ }));
20
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
21
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
22
+ }) : function(o, v) {
23
+ o["default"] = v;
24
+ });
25
+ var __importStar = (this && this.__importStar) || (function () {
26
+ var ownKeys = function(o) {
27
+ ownKeys = Object.getOwnPropertyNames || function (o) {
28
+ var ar = [];
29
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
30
+ return ar;
31
+ };
32
+ return ownKeys(o);
33
+ };
34
+ return function (mod) {
35
+ if (mod && mod.__esModule) return mod;
36
+ var result = {};
37
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
38
+ __setModuleDefault(result, mod);
39
+ return result;
40
+ };
41
+ })();
42
+ Object.defineProperty(exports, "__esModule", { value: true });
43
+ exports.runBaselineInit = runBaselineInit;
44
+ exports.runBaselineUpdate = runBaselineUpdate;
45
+ exports.runBaseline = runBaseline;
46
+ const node_child_process_1 = require("node:child_process");
47
+ const fs = __importStar(require("node:fs"));
48
+ const path = __importStar(require("node:path"));
49
+ const BASELINE_REL = "evals/baseline.json";
50
+ /** Detect the package manager used in the project */
51
+ function detectPackageManager(cwd) {
52
+ if (fs.existsSync(path.join(cwd, "pnpm-lock.yaml")))
53
+ return "pnpm";
54
+ if (fs.existsSync(path.join(cwd, "yarn.lock")))
55
+ return "yarn";
56
+ return "npm";
57
+ }
58
+ /** Run an npm script via the detected package manager */
59
+ function runScript(cwd, scriptName) {
60
+ const pm = detectPackageManager(cwd);
61
+ const isWin = process.platform === "win32";
62
+ const result = (0, node_child_process_1.spawnSync)(pm, ["run", scriptName], {
63
+ cwd,
64
+ stdio: "inherit",
65
+ shell: isWin,
66
+ });
67
+ return result.status ?? 1;
68
+ }
69
+ function runBaselineInit(cwd) {
70
+ const baselinePath = path.join(cwd, BASELINE_REL);
71
+ if (fs.existsSync(baselinePath)) {
72
+ console.log(`⚠ ${BASELINE_REL} already exists. Delete it first or use 'evalgate baseline update'.`);
73
+ return 1;
74
+ }
75
+ // Ensure evals/ directory exists
76
+ const evalsDir = path.join(cwd, "evals");
77
+ if (!fs.existsSync(evalsDir)) {
78
+ fs.mkdirSync(evalsDir, { recursive: true });
79
+ }
80
+ const user = process.env.USER || process.env.USERNAME || "unknown";
81
+ const now = new Date().toISOString();
82
+ const baseline = {
83
+ schemaVersion: 1,
84
+ description: "Regression gate baseline — created by evalgate baseline init",
85
+ generatedAt: now,
86
+ generatedBy: user,
87
+ commitSha: "0000000",
88
+ updatedAt: now,
89
+ updatedBy: user,
90
+ tolerance: {
91
+ scoreDrop: 5,
92
+ passRateDrop: 5,
93
+ maxLatencyIncreaseMs: 200,
94
+ maxCostIncreaseUsd: 0.05,
95
+ },
96
+ goldenEval: {
97
+ score: 100,
98
+ passRate: 100,
99
+ totalCases: 3,
100
+ passedCases: 3,
101
+ },
102
+ qualityScore: {
103
+ overall: 90,
104
+ grade: "A",
105
+ accuracy: 85,
106
+ safety: 100,
107
+ latency: 90,
108
+ cost: 90,
109
+ consistency: 90,
110
+ },
111
+ confidenceTests: {
112
+ unitPassed: true,
113
+ unitTotal: 0,
114
+ dbPassed: true,
115
+ dbTotal: 0,
116
+ },
117
+ productMetrics: {},
118
+ };
119
+ fs.writeFileSync(baselinePath, `${JSON.stringify(baseline, null, 2)}\n`);
120
+ console.log(`✅ Created ${BASELINE_REL} with sample values\n`);
121
+ console.log("Next steps:");
122
+ console.log(` 1. Commit ${BASELINE_REL} to your repo`);
123
+ console.log(" 2. Run 'evalgate baseline update' to populate with real scores");
124
+ console.log(" 3. Run 'evalgate gate' to verify the regression gate\n");
125
+ return 0;
126
+ }
127
+ // ── baseline update ──
128
+ function runBaselineUpdate(cwd) {
129
+ // Check if eval:baseline-update script exists in package.json
130
+ const pkgPath = path.join(cwd, "package.json");
131
+ if (!fs.existsSync(pkgPath)) {
132
+ console.error("❌ No package.json found. Run this from your project root.");
133
+ return 1;
134
+ }
135
+ let pkg;
136
+ try {
137
+ pkg = JSON.parse(fs.readFileSync(pkgPath, "utf-8"));
138
+ }
139
+ catch {
140
+ console.error("❌ Failed to parse package.json");
141
+ return 1;
142
+ }
143
+ if (!pkg.scripts?.["eval:baseline-update"]) {
144
+ console.error("❌ Missing 'eval:baseline-update' script in package.json.");
145
+ console.error(' Add it: "eval:baseline-update": "npx tsx scripts/regression-gate.ts --update-baseline"');
146
+ return 1;
147
+ }
148
+ console.log("📊 Running baseline update...\n");
149
+ return runScript(cwd, "eval:baseline-update");
150
+ }
151
+ // ── baseline router ──
152
+ function runBaseline(argv) {
153
+ const sub = argv[0];
154
+ const cwd = process.cwd();
155
+ if (sub === "init") {
156
+ return runBaselineInit(cwd);
157
+ }
158
+ if (sub === "update") {
159
+ return runBaselineUpdate(cwd);
160
+ }
161
+ console.log(`evalai baseline — Manage regression gate baselines
162
+
163
+ Usage:
164
+ evalgate baseline init Create starter ${BASELINE_REL}
165
+ evalgate baseline update Run tests and update baseline with real scores
166
+
167
+ Examples:
168
+ evalgate baseline init
169
+ evalgate baseline update
170
+ `);
171
+ return sub === "--help" || sub === "-h" ? 0 : 1;
172
+ }
@@ -0,0 +1,73 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * evalgate check — CI/CD evaluation gate
4
+ *
5
+ * Usage:
6
+ * evalgate check --minScore 92 --evaluationId 42
7
+ * evalgate check --minScore 90 --maxDrop 5 --evaluationId 42
8
+ * evalgate check --policy HIPAA --evaluationId 42
9
+ * evalgate check --baseline published --evaluationId 42
10
+ *
11
+ * Flags:
12
+ * --minScore <n> Fail if quality score < n (0-100)
13
+ * --maxDrop <n> Fail if score dropped > n points from baseline
14
+ * --minN <n> Fail if total test cases < n (low sample size)
15
+ * --allowWeakEvidence If false (default), fail when evidenceLevel is 'weak'
16
+ * --policy <name> Enforce a compliance policy (e.g. HIPAA, SOC2, GDPR)
17
+ * --baseline <mode> Baseline comparison mode: "published" (default), "previous", or "production"
18
+ * --evaluationId <id> Required. The evaluation to gate on.
19
+ * --baseUrl <url> API base URL (default: EVALGATE_BASE_URL or http://localhost:3000)
20
+ * --apiKey <key> API key (default: EVALGATE_API_KEY env var)
21
+ * --share <mode> Share link: "always" | "fail" | "never" (default: never)
22
+ * fail = create public share link only when gate fails (CI-friendly)
23
+ * --pr-comment-out <file> Write PR comment markdown to file (for GitHub Action to post)
24
+ * --profile <name> Preset: strict (95/0/30), balanced (90/2/10), fast (85/5/5). Explicit flags override.
25
+ *
26
+ * Exit codes:
27
+ * 0 — Gate passed
28
+ * 1 — Gate failed: score below threshold
29
+ * 2 — Gate failed: regression exceeded maxDrop
30
+ * 3 — Gate failed: policy violation
31
+ * 4 — API error / network failure
32
+ * 5 — Invalid arguments
33
+ * 6 — Gate failed: total test cases < minN
34
+ * 7 — Gate failed: weak evidence (evidenceLevel === 'weak')
35
+ * 8 — Gate warned: near-regression (warnDrop ≤ drop < maxDrop)
36
+ *
37
+ * Environment:
38
+ * EVALGATE_BASE_URL — API base URL (default: http://localhost:3000)
39
+ * EVALGATE_API_KEY — API key for authentication
40
+ */
41
+ export { EXIT } from "./constants";
42
+ export type FormatType = "human" | "json" | "github";
43
+ export type ShareMode = "always" | "fail" | "never";
44
+ export interface CheckArgs {
45
+ baseUrl: string;
46
+ apiKey: string;
47
+ minScore: number;
48
+ maxDrop?: number;
49
+ warnDrop?: number;
50
+ minN?: number;
51
+ allowWeakEvidence: boolean;
52
+ evaluationId: string;
53
+ policy?: string;
54
+ baseline: "published" | "previous" | "production" | "auto";
55
+ format: FormatType;
56
+ explain: boolean;
57
+ onFail?: "import";
58
+ share: ShareMode;
59
+ prCommentOut?: string;
60
+ maxCostUsd?: number;
61
+ maxLatencyMs?: number;
62
+ maxCostDeltaUsd?: number;
63
+ }
64
+ export type ParseArgsResult = {
65
+ ok: true;
66
+ args: CheckArgs;
67
+ } | {
68
+ ok: false;
69
+ exitCode: number;
70
+ message: string;
71
+ };
72
+ export declare function parseArgs(argv: string[]): ParseArgsResult;
73
+ export declare function runCheck(args: CheckArgs): Promise<number>;
@@ -0,0 +1,355 @@
1
+ #!/usr/bin/env node
2
+ "use strict";
3
+ /**
4
+ * evalgate check — CI/CD evaluation gate
5
+ *
6
+ * Usage:
7
+ * evalgate check --minScore 92 --evaluationId 42
8
+ * evalgate check --minScore 90 --maxDrop 5 --evaluationId 42
9
+ * evalgate check --policy HIPAA --evaluationId 42
10
+ * evalgate check --baseline published --evaluationId 42
11
+ *
12
+ * Flags:
13
+ * --minScore <n> Fail if quality score < n (0-100)
14
+ * --maxDrop <n> Fail if score dropped > n points from baseline
15
+ * --minN <n> Fail if total test cases < n (low sample size)
16
+ * --allowWeakEvidence If false (default), fail when evidenceLevel is 'weak'
17
+ * --policy <name> Enforce a compliance policy (e.g. HIPAA, SOC2, GDPR)
18
+ * --baseline <mode> Baseline comparison mode: "published" (default), "previous", or "production"
19
+ * --evaluationId <id> Required. The evaluation to gate on.
20
+ * --baseUrl <url> API base URL (default: EVALGATE_BASE_URL or http://localhost:3000)
21
+ * --apiKey <key> API key (default: EVALGATE_API_KEY env var)
22
+ * --share <mode> Share link: "always" | "fail" | "never" (default: never)
23
+ * fail = create public share link only when gate fails (CI-friendly)
24
+ * --pr-comment-out <file> Write PR comment markdown to file (for GitHub Action to post)
25
+ * --profile <name> Preset: strict (95/0/30), balanced (90/2/10), fast (85/5/5). Explicit flags override.
26
+ *
27
+ * Exit codes:
28
+ * 0 — Gate passed
29
+ * 1 — Gate failed: score below threshold
30
+ * 2 — Gate failed: regression exceeded maxDrop
31
+ * 3 — Gate failed: policy violation
32
+ * 4 — API error / network failure
33
+ * 5 — Invalid arguments
34
+ * 6 — Gate failed: total test cases < minN
35
+ * 7 — Gate failed: weak evidence (evidenceLevel === 'weak')
36
+ * 8 — Gate warned: near-regression (warnDrop ≤ drop < maxDrop)
37
+ *
38
+ * Environment:
39
+ * EVALGATE_BASE_URL — API base URL (default: http://localhost:3000)
40
+ * EVALGATE_API_KEY — API key for authentication
41
+ */
42
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
43
+ if (k2 === undefined) k2 = k;
44
+ var desc = Object.getOwnPropertyDescriptor(m, k);
45
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
46
+ desc = { enumerable: true, get: function() { return m[k]; } };
47
+ }
48
+ Object.defineProperty(o, k2, desc);
49
+ }) : (function(o, m, k, k2) {
50
+ if (k2 === undefined) k2 = k;
51
+ o[k2] = m[k];
52
+ }));
53
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
54
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
55
+ }) : function(o, v) {
56
+ o["default"] = v;
57
+ });
58
+ var __importStar = (this && this.__importStar) || (function () {
59
+ var ownKeys = function(o) {
60
+ ownKeys = Object.getOwnPropertyNames || function (o) {
61
+ var ar = [];
62
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
63
+ return ar;
64
+ };
65
+ return ownKeys(o);
66
+ };
67
+ return function (mod) {
68
+ if (mod && mod.__esModule) return mod;
69
+ var result = {};
70
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
71
+ __setModuleDefault(result, mod);
72
+ return result;
73
+ };
74
+ })();
75
+ Object.defineProperty(exports, "__esModule", { value: true });
76
+ exports.EXIT = void 0;
77
+ exports.parseArgs = parseArgs;
78
+ exports.runCheck = runCheck;
79
+ const fs = __importStar(require("node:fs"));
80
+ const path = __importStar(require("node:path"));
81
+ const api_1 = require("./api");
82
+ const ci_context_1 = require("./ci-context");
83
+ const config_1 = require("./config");
84
+ const constants_1 = require("./constants");
85
+ const github_1 = require("./formatters/github");
86
+ const human_1 = require("./formatters/human");
87
+ const json_1 = require("./formatters/json");
88
+ const pr_comment_1 = require("./formatters/pr-comment");
89
+ const gate_1 = require("./gate");
90
+ const build_check_report_1 = require("./report/build-check-report");
91
+ var constants_2 = require("./constants");
92
+ Object.defineProperty(exports, "EXIT", { enumerable: true, get: function () { return constants_2.EXIT; } });
93
+ function parseArgs(argv) {
94
+ const args = {};
95
+ for (let i = 0; i < argv.length; i++) {
96
+ const arg = argv[i];
97
+ if (arg.startsWith("--")) {
98
+ const key = arg.slice(2);
99
+ const next = argv[i + 1];
100
+ if (next !== undefined && !next.startsWith("--")) {
101
+ args[key] = next;
102
+ i++;
103
+ }
104
+ else {
105
+ args[key] = "true";
106
+ }
107
+ }
108
+ }
109
+ let baseUrl = args.baseUrl || process.env.EVALGATE_BASE_URL || "http://localhost:3000";
110
+ const apiKey = args.apiKey ||
111
+ process.env.EVALGATE_API_KEY ||
112
+ process.env.EVALAI_API_KEY ||
113
+ "";
114
+ let minScore = parseInt(args.minScore || "0", 10);
115
+ let maxDrop = args.maxDrop ? parseInt(args.maxDrop, 10) : undefined;
116
+ let warnDrop = args.warnDrop ? parseInt(args.warnDrop, 10) : undefined;
117
+ let minN = args.minN ? parseInt(args.minN, 10) : undefined;
118
+ let allowWeakEvidence = args.allowWeakEvidence === "true" || args.allowWeakEvidence === "1";
119
+ let evaluationId = args.evaluationId || "";
120
+ const policy = args.policy || undefined;
121
+ const formatRaw = args.format || "human";
122
+ const format = formatRaw === "json" ? "json" : formatRaw === "github" ? "github" : "human";
123
+ const explain = args.explain === "true" || args.explain === "1";
124
+ const onFail = args.onFail === "import" ? "import" : undefined;
125
+ const shareRaw = args.share || "never";
126
+ const share = shareRaw === "always" ? "always" : shareRaw === "fail" ? "fail" : "never";
127
+ const prCommentOut = args["pr-comment-out"] || args.prCommentOut || undefined;
128
+ const maxCostUsd = args["max-cost-usd"] || args.maxCostUsd
129
+ ? parseFloat(args["max-cost-usd"] || args.maxCostUsd || "0")
130
+ : undefined;
131
+ const maxLatencyMs = args["max-latency-ms"] || args.maxLatencyMs
132
+ ? parseInt(args["max-latency-ms"] || args.maxLatencyMs || "0", 10)
133
+ : undefined;
134
+ const maxCostDeltaUsd = args["max-cost-delta-usd"] || args.maxCostDeltaUsd
135
+ ? parseFloat(args["max-cost-delta-usd"] || args.maxCostDeltaUsd || "0")
136
+ : undefined;
137
+ const profile = (args.profile || args.profile);
138
+ let baseline = (args.baseline === "auto"
139
+ ? "auto"
140
+ : args.baseline === "previous"
141
+ ? "previous"
142
+ : args.baseline === "production"
143
+ ? "production"
144
+ : "published");
145
+ const config = (0, config_1.loadConfig)(process.cwd());
146
+ const merged = (0, config_1.mergeConfigWithArgs)(config, {
147
+ evaluationId: args.evaluationId,
148
+ baseUrl: args.baseUrl ||
149
+ process.env.EVALGATE_BASE_URL ||
150
+ process.env.EVALAI_BASE_URL,
151
+ minScore: args.minScore,
152
+ maxDrop: args.maxDrop,
153
+ warnDrop: args.warnDrop,
154
+ minN: args.minN,
155
+ allowWeakEvidence: args.allowWeakEvidence,
156
+ baseline: args.baseline,
157
+ profile: profile,
158
+ prCommentOut: args["pr-comment-out"] ?? args.prCommentOut,
159
+ });
160
+ if (!evaluationId && merged.evaluationId)
161
+ evaluationId = merged.evaluationId;
162
+ if (merged.baseUrl)
163
+ baseUrl = merged.baseUrl;
164
+ if (merged.minScore != null && args.minScore === undefined)
165
+ minScore = merged.minScore ?? 0;
166
+ if (merged.maxDrop != null && args.maxDrop === undefined)
167
+ maxDrop = merged.maxDrop;
168
+ if (merged.warnDrop != null && args.warnDrop === undefined)
169
+ warnDrop = merged.warnDrop;
170
+ if (merged.minN != null && args.minN === undefined)
171
+ minN = merged.minN;
172
+ if (merged.allowWeakEvidence != null && args.allowWeakEvidence === undefined)
173
+ allowWeakEvidence = merged.allowWeakEvidence ?? false;
174
+ if (merged.baseline && !args.baseline)
175
+ baseline = merged.baseline;
176
+ if (!apiKey) {
177
+ return {
178
+ ok: false,
179
+ exitCode: constants_1.EXIT.BAD_ARGS,
180
+ message: "Error: --apiKey or EVALGATE_API_KEY is required",
181
+ };
182
+ }
183
+ if (!evaluationId) {
184
+ return {
185
+ ok: false,
186
+ exitCode: constants_1.EXIT.BAD_ARGS,
187
+ message: "Run npx evalgate init and paste your evaluationId, or pass --evaluationId.",
188
+ };
189
+ }
190
+ if (Number.isNaN(minScore) || minScore < 0 || minScore > 100) {
191
+ return {
192
+ ok: false,
193
+ exitCode: constants_1.EXIT.BAD_ARGS,
194
+ message: "Error: --minScore must be 0-100",
195
+ };
196
+ }
197
+ if (minN !== undefined && (Number.isNaN(minN) || minN < 1)) {
198
+ return {
199
+ ok: false,
200
+ exitCode: constants_1.EXIT.BAD_ARGS,
201
+ message: "Error: --minN must be a positive number",
202
+ };
203
+ }
204
+ return {
205
+ ok: true,
206
+ args: {
207
+ baseUrl,
208
+ apiKey,
209
+ minScore,
210
+ maxDrop,
211
+ warnDrop,
212
+ minN,
213
+ allowWeakEvidence,
214
+ evaluationId,
215
+ policy,
216
+ baseline,
217
+ format,
218
+ explain,
219
+ onFail,
220
+ share,
221
+ prCommentOut,
222
+ maxCostUsd: maxCostUsd != null && !Number.isNaN(maxCostUsd)
223
+ ? maxCostUsd
224
+ : undefined,
225
+ maxLatencyMs: maxLatencyMs != null && !Number.isNaN(maxLatencyMs)
226
+ ? maxLatencyMs
227
+ : undefined,
228
+ maxCostDeltaUsd: maxCostDeltaUsd != null && !Number.isNaN(maxCostDeltaUsd)
229
+ ? maxCostDeltaUsd
230
+ : undefined,
231
+ },
232
+ };
233
+ }
234
+ async function runCheck(args) {
235
+ const qualityResult = await (0, api_1.fetchQualityLatest)(args.baseUrl, args.apiKey, args.evaluationId, args.baseline);
236
+ if (!qualityResult.ok) {
237
+ if (qualityResult.status === 0) {
238
+ console.error(`EvalGate gate ERROR: Network failure — ${qualityResult.body}`);
239
+ }
240
+ else {
241
+ console.error(`EvalGate gate ERROR: API returned ${qualityResult.status} — ${qualityResult.body}`);
242
+ }
243
+ return constants_1.EXIT.API_ERROR;
244
+ }
245
+ const { data: quality, requestId } = qualityResult;
246
+ const evaluationRunId = quality?.evaluationRunId;
247
+ let runDetails = null;
248
+ if (evaluationRunId != null) {
249
+ const runRes = await (0, api_1.fetchRunDetails)(args.baseUrl, args.apiKey, args.evaluationId, evaluationRunId);
250
+ if (runRes.ok)
251
+ runDetails = runRes.data;
252
+ }
253
+ const gateResult = (0, gate_1.evaluateGate)(args, quality);
254
+ // Create share before report when PR comment needs shareUrl (--pr-comment-out + --share fail + gate failed)
255
+ let shareUrl;
256
+ const shouldCreateShare = quality?.evaluationRunId != null &&
257
+ (args.share === "always" || (args.share === "fail" && !gateResult.passed));
258
+ if (shouldCreateShare) {
259
+ const exportRes = await (0, api_1.fetchRunExport)(args.baseUrl, args.apiKey, args.evaluationId, quality.evaluationRunId);
260
+ if (exportRes.ok) {
261
+ const publishRes = await (0, api_1.publishShare)(args.baseUrl, args.apiKey, args.evaluationId, exportRes.exportData, quality.evaluationRunId);
262
+ if (publishRes.ok) {
263
+ shareUrl = publishRes.data.shareUrl;
264
+ console.error(`\nPublic share link created: ${shareUrl}`);
265
+ }
266
+ }
267
+ }
268
+ const ci = (0, ci_context_1.captureCiContext)();
269
+ const report = (0, build_check_report_1.buildCheckReport)({
270
+ args,
271
+ quality,
272
+ runDetails,
273
+ gateResult,
274
+ requestId,
275
+ shareUrl,
276
+ baselineRunId: quality?.baselineRunId ?? undefined,
277
+ ciRunUrl: ci?.runUrl ?? undefined,
278
+ });
279
+ // Persist report artifact so `evalgate explain` works with zero flags
280
+ try {
281
+ const reportDir = path.join(process.cwd(), ".evalgate");
282
+ if (!fs.existsSync(reportDir))
283
+ fs.mkdirSync(reportDir, { recursive: true });
284
+ fs.writeFileSync(path.join(reportDir, "last-report.json"), JSON.stringify(report, null, 2), "utf8");
285
+ }
286
+ catch {
287
+ // Non-fatal: best-effort artifact write
288
+ }
289
+ const formatted = args.format === "json"
290
+ ? (0, json_1.formatJson)(report)
291
+ : args.format === "github"
292
+ ? (0, github_1.formatGitHub)(report)
293
+ : (0, human_1.formatHuman)(report);
294
+ console.log(formatted);
295
+ // Guided flow hint on failure
296
+ if (!gateResult.passed) {
297
+ console.error("\nNext: evalgate explain");
298
+ }
299
+ // --pr-comment-out: write markdown to file for GitHub Action to post
300
+ if (args.prCommentOut) {
301
+ try {
302
+ const markdown = (0, pr_comment_1.buildPrComment)(report);
303
+ fs.writeFileSync(args.prCommentOut, markdown, "utf8");
304
+ }
305
+ catch (err) {
306
+ console.error(`EvalGate: failed to write PR comment to ${args.prCommentOut}: ${err instanceof Error ? err.message : String(err)}`);
307
+ }
308
+ }
309
+ // --onFail import: when gate fails, import run with CI context
310
+ if (!gateResult.passed &&
311
+ args.onFail === "import" &&
312
+ runDetails?.results &&
313
+ quality?.evaluationRunId) {
314
+ const importResults = runDetails.results
315
+ .filter((r) => r.testCaseId != null &&
316
+ (r.status === "passed" || r.status === "failed"))
317
+ .map((r) => ({
318
+ testCaseId: r.testCaseId,
319
+ status: r.status,
320
+ output: r.output ?? "",
321
+ latencyMs: r.durationMs,
322
+ assertionsJson: r.assertionsJson,
323
+ }));
324
+ if (importResults.length > 0) {
325
+ const idempotencyKey = ci
326
+ ? (0, ci_context_1.computeIdempotencyKey)(args.evaluationId, ci)
327
+ : undefined;
328
+ const importRes = await (0, api_1.importRunOnFail)(args.baseUrl, args.apiKey, args.evaluationId, importResults, {
329
+ idempotencyKey,
330
+ ci,
331
+ importClientVersion: "evalgate-cli",
332
+ checkReport: report,
333
+ });
334
+ if (!importRes.ok) {
335
+ console.error(`EvalGate import (onFail): ${importRes.status} — ${importRes.body}`);
336
+ }
337
+ }
338
+ }
339
+ return gateResult.exitCode;
340
+ }
341
+ // Main entry point
342
+ const isDirectRun = typeof require !== "undefined" && require.main === module;
343
+ if (isDirectRun) {
344
+ const parsed = parseArgs(process.argv.slice(2));
345
+ if (!parsed.ok) {
346
+ console.error(parsed.message);
347
+ process.exit(parsed.exitCode);
348
+ }
349
+ runCheck(parsed.args)
350
+ .then((code) => process.exit(code))
351
+ .catch((err) => {
352
+ console.error(`EvalGate gate ERROR: ${err instanceof Error ? err.message : String(err)}`);
353
+ process.exit(constants_1.EXIT.API_ERROR);
354
+ });
355
+ }
@@ -0,0 +1,6 @@
1
+ /**
2
+ * CI context capture and idempotency key for --onFail import.
3
+ */
4
+ import type { CiContext } from "./api";
5
+ export declare function captureCiContext(): CiContext | undefined;
6
+ export declare function computeIdempotencyKey(evaluationId: string, ci: CiContext): string | undefined;