@evalgate/sdk 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +638 -0
- package/README.md +398 -0
- package/dist/assertions.d.ts +189 -0
- package/dist/assertions.js +662 -0
- package/dist/batch.d.ts +68 -0
- package/dist/batch.js +179 -0
- package/dist/cache.d.ts +65 -0
- package/dist/cache.js +131 -0
- package/dist/cli/api.d.ts +108 -0
- package/dist/cli/api.js +132 -0
- package/dist/cli/baseline.d.ts +10 -0
- package/dist/cli/baseline.js +172 -0
- package/dist/cli/check.d.ts +73 -0
- package/dist/cli/check.js +355 -0
- package/dist/cli/ci-context.d.ts +6 -0
- package/dist/cli/ci-context.js +112 -0
- package/dist/cli/ci.d.ts +45 -0
- package/dist/cli/ci.js +192 -0
- package/dist/cli/config.d.ts +30 -0
- package/dist/cli/config.js +230 -0
- package/dist/cli/constants.d.ts +15 -0
- package/dist/cli/constants.js +18 -0
- package/dist/cli/diff.d.ts +173 -0
- package/dist/cli/diff.js +685 -0
- package/dist/cli/discover.d.ts +84 -0
- package/dist/cli/discover.js +419 -0
- package/dist/cli/doctor.d.ts +88 -0
- package/dist/cli/doctor.js +675 -0
- package/dist/cli/env.d.ts +21 -0
- package/dist/cli/env.js +42 -0
- package/dist/cli/explain.d.ts +58 -0
- package/dist/cli/explain.js +561 -0
- package/dist/cli/formatters/github.d.ts +8 -0
- package/dist/cli/formatters/github.js +135 -0
- package/dist/cli/formatters/human.d.ts +6 -0
- package/dist/cli/formatters/human.js +110 -0
- package/dist/cli/formatters/json.d.ts +6 -0
- package/dist/cli/formatters/json.js +10 -0
- package/dist/cli/formatters/pr-comment.d.ts +12 -0
- package/dist/cli/formatters/pr-comment.js +103 -0
- package/dist/cli/formatters/types.d.ts +103 -0
- package/dist/cli/formatters/types.js +8 -0
- package/dist/cli/gate.d.ts +21 -0
- package/dist/cli/gate.js +179 -0
- package/dist/cli/impact-analysis.d.ts +63 -0
- package/dist/cli/impact-analysis.js +252 -0
- package/dist/cli/index.d.ts +9 -0
- package/dist/cli/index.js +332 -0
- package/dist/cli/init.d.ts +16 -0
- package/dist/cli/init.js +292 -0
- package/dist/cli/manifest.d.ts +103 -0
- package/dist/cli/manifest.js +282 -0
- package/dist/cli/migrate.d.ts +41 -0
- package/dist/cli/migrate.js +349 -0
- package/dist/cli/policy-packs.d.ts +23 -0
- package/dist/cli/policy-packs.js +89 -0
- package/dist/cli/print-config.d.ts +29 -0
- package/dist/cli/print-config.js +270 -0
- package/dist/cli/profiles.d.ts +28 -0
- package/dist/cli/profiles.js +30 -0
- package/dist/cli/reason-codes.d.ts +17 -0
- package/dist/cli/reason-codes.js +19 -0
- package/dist/cli/regression-gate.d.ts +15 -0
- package/dist/cli/regression-gate.js +341 -0
- package/dist/cli/render/snippet.d.ts +5 -0
- package/dist/cli/render/snippet.js +15 -0
- package/dist/cli/render/sort.d.ts +10 -0
- package/dist/cli/render/sort.js +24 -0
- package/dist/cli/report/build-check-report.d.ts +19 -0
- package/dist/cli/report/build-check-report.js +132 -0
- package/dist/cli/run.d.ts +101 -0
- package/dist/cli/run.js +395 -0
- package/dist/cli/share.d.ts +17 -0
- package/dist/cli/share.js +91 -0
- package/dist/cli/upgrade.d.ts +15 -0
- package/dist/cli/upgrade.js +492 -0
- package/dist/cli/workspace.d.ts +31 -0
- package/dist/cli/workspace.js +68 -0
- package/dist/client.d.ts +368 -0
- package/dist/client.js +893 -0
- package/dist/client.request.test.d.ts +1 -0
- package/dist/client.request.test.js +232 -0
- package/dist/context.d.ts +134 -0
- package/dist/context.js +215 -0
- package/dist/errors.d.ts +82 -0
- package/dist/errors.js +298 -0
- package/dist/export.d.ts +195 -0
- package/dist/export.js +344 -0
- package/dist/index.d.ts +44 -0
- package/dist/index.js +153 -0
- package/dist/integrations/anthropic.d.ts +91 -0
- package/dist/integrations/anthropic.js +163 -0
- package/dist/integrations/openai-eval.d.ts +57 -0
- package/dist/integrations/openai-eval.js +232 -0
- package/dist/integrations/openai.d.ts +92 -0
- package/dist/integrations/openai.js +160 -0
- package/dist/local.d.ts +39 -0
- package/dist/local.js +148 -0
- package/dist/logger.d.ts +128 -0
- package/dist/logger.js +227 -0
- package/dist/matchers/index.d.ts +1 -0
- package/dist/matchers/index.js +6 -0
- package/dist/matchers/to-pass-gate.d.ts +29 -0
- package/dist/matchers/to-pass-gate.js +35 -0
- package/dist/pagination.d.ts +74 -0
- package/dist/pagination.js +139 -0
- package/dist/regression.d.ts +100 -0
- package/dist/regression.js +44 -0
- package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
- package/dist/runtime/adapters/config-to-dsl.js +400 -0
- package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
- package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
- package/dist/runtime/context.d.ts +26 -0
- package/dist/runtime/context.js +74 -0
- package/dist/runtime/eval.d.ts +46 -0
- package/dist/runtime/eval.js +244 -0
- package/dist/runtime/execution-mode.d.ts +80 -0
- package/dist/runtime/execution-mode.js +357 -0
- package/dist/runtime/executor.d.ts +16 -0
- package/dist/runtime/executor.js +152 -0
- package/dist/runtime/registry.d.ts +78 -0
- package/dist/runtime/registry.js +403 -0
- package/dist/runtime/run-report.d.ts +200 -0
- package/dist/runtime/run-report.js +222 -0
- package/dist/runtime/types.d.ts +356 -0
- package/dist/runtime/types.js +76 -0
- package/dist/snapshot.d.ts +176 -0
- package/dist/snapshot.js +322 -0
- package/dist/streaming.d.ts +173 -0
- package/dist/streaming.js +268 -0
- package/dist/testing.d.ts +273 -0
- package/dist/testing.js +317 -0
- package/dist/types.d.ts +754 -0
- package/dist/types.js +54 -0
- package/dist/utils/input-hash.d.ts +8 -0
- package/dist/utils/input-hash.js +41 -0
- package/dist/version.d.ts +7 -0
- package/dist/version.js +10 -0
- package/dist/workflows.d.ts +389 -0
- package/dist/workflows.js +671 -0
- package/package.json +117 -0
package/dist/cli/run.js
ADDED
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* TICKET 4 — Unified evalgate run CLI Command
|
|
4
|
+
*
|
|
5
|
+
* Goal: Consolidated execution interface that consumes manifest
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Manifest loading and spec filtering
|
|
9
|
+
* - --impacted-only integration with impact analysis
|
|
10
|
+
* - Local executor integration
|
|
11
|
+
* - .evalgate/last-run.json output
|
|
12
|
+
* - Legacy mode compatibility
|
|
13
|
+
*/
|
|
14
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
15
|
+
if (k2 === undefined) k2 = k;
|
|
16
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
17
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
18
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
19
|
+
}
|
|
20
|
+
Object.defineProperty(o, k2, desc);
|
|
21
|
+
}) : (function(o, m, k, k2) {
|
|
22
|
+
if (k2 === undefined) k2 = k;
|
|
23
|
+
o[k2] = m[k];
|
|
24
|
+
}));
|
|
25
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
26
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
27
|
+
}) : function(o, v) {
|
|
28
|
+
o["default"] = v;
|
|
29
|
+
});
|
|
30
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
31
|
+
var ownKeys = function(o) {
|
|
32
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
33
|
+
var ar = [];
|
|
34
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
35
|
+
return ar;
|
|
36
|
+
};
|
|
37
|
+
return ownKeys(o);
|
|
38
|
+
};
|
|
39
|
+
return function (mod) {
|
|
40
|
+
if (mod && mod.__esModule) return mod;
|
|
41
|
+
var result = {};
|
|
42
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
43
|
+
__setModuleDefault(result, mod);
|
|
44
|
+
return result;
|
|
45
|
+
};
|
|
46
|
+
})();
|
|
47
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
48
|
+
exports.runEvaluations = runEvaluations;
|
|
49
|
+
exports.printHumanResults = printHumanResults;
|
|
50
|
+
exports.printJsonResults = printJsonResults;
|
|
51
|
+
exports.runEvaluationsCLI = runEvaluationsCLI;
|
|
52
|
+
const node_child_process_1 = require("node:child_process");
|
|
53
|
+
const fs = __importStar(require("node:fs/promises"));
|
|
54
|
+
const path = __importStar(require("node:path"));
|
|
55
|
+
const impact_analysis_1 = require("./impact-analysis");
|
|
56
|
+
/**
|
|
57
|
+
* Generate deterministic run ID
|
|
58
|
+
*/
|
|
59
|
+
function generateRunId() {
|
|
60
|
+
const timestamp = Date.now().toString(36);
|
|
61
|
+
const random = Math.random().toString(36).substring(2, 8);
|
|
62
|
+
return `run-${timestamp}-${random}`;
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Run evaluation specifications
|
|
66
|
+
*/
|
|
67
|
+
async function runEvaluations(options, projectRoot = process.cwd()) {
|
|
68
|
+
const startTime = Date.now();
|
|
69
|
+
// Load manifest
|
|
70
|
+
const manifest = await loadManifest(projectRoot);
|
|
71
|
+
if (!manifest) {
|
|
72
|
+
throw new Error("No evaluation manifest found. Run 'evalgate discover --manifest' first.");
|
|
73
|
+
}
|
|
74
|
+
// Determine which specs to run
|
|
75
|
+
let specsToRun = manifest.specs;
|
|
76
|
+
if (options.impactedOnly && options.baseBranch) {
|
|
77
|
+
// Run impact analysis first
|
|
78
|
+
const impactResult = await (0, impact_analysis_1.runImpactAnalysis)({
|
|
79
|
+
baseBranch: options.baseBranch,
|
|
80
|
+
}, projectRoot);
|
|
81
|
+
// Filter to impacted specs only
|
|
82
|
+
const impactedSpecIds = new Set(impactResult.impactedSpecIds);
|
|
83
|
+
specsToRun = manifest.specs.filter((spec) => impactedSpecIds.has(spec.id));
|
|
84
|
+
console.log(`🎯 Running ${specsToRun.length} impacted specs (out of ${manifest.specs.length} total)`);
|
|
85
|
+
}
|
|
86
|
+
else if (options.specIds && options.specIds.length > 0) {
|
|
87
|
+
// Filter to specific spec IDs
|
|
88
|
+
const specIdSet = new Set(options.specIds);
|
|
89
|
+
specsToRun = manifest.specs.filter((spec) => specIdSet.has(spec.id));
|
|
90
|
+
console.log(`🎯 Running ${specsToRun.length} specific specs`);
|
|
91
|
+
}
|
|
92
|
+
else if (options.specIds && options.specIds.length === 0) {
|
|
93
|
+
// Explicit empty list means run nothing
|
|
94
|
+
specsToRun = [];
|
|
95
|
+
console.log(`🎯 Running 0 specs (explicit empty list)`);
|
|
96
|
+
}
|
|
97
|
+
else {
|
|
98
|
+
console.log(`🎯 Running all ${specsToRun.length} specs`);
|
|
99
|
+
}
|
|
100
|
+
// Execute specs
|
|
101
|
+
const results = await executeSpecs(specsToRun);
|
|
102
|
+
const completedAt = Date.now();
|
|
103
|
+
const duration = completedAt - startTime;
|
|
104
|
+
// Calculate summary
|
|
105
|
+
const summary = calculateSummary(results);
|
|
106
|
+
const runResult = {
|
|
107
|
+
schemaVersion: 1,
|
|
108
|
+
runId: generateRunId(),
|
|
109
|
+
metadata: {
|
|
110
|
+
startedAt: startTime,
|
|
111
|
+
completedAt,
|
|
112
|
+
duration,
|
|
113
|
+
totalSpecs: manifest.specs.length,
|
|
114
|
+
executedSpecs: specsToRun.length,
|
|
115
|
+
mode: manifest.runtime.mode,
|
|
116
|
+
},
|
|
117
|
+
results,
|
|
118
|
+
summary,
|
|
119
|
+
};
|
|
120
|
+
// Write results if requested
|
|
121
|
+
if (options.writeResults) {
|
|
122
|
+
await writeRunResults(runResult, projectRoot);
|
|
123
|
+
await updateRunIndex(runResult, projectRoot);
|
|
124
|
+
}
|
|
125
|
+
return runResult;
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Load evaluation manifest
|
|
129
|
+
*/
|
|
130
|
+
async function loadManifest(projectRoot = process.cwd()) {
|
|
131
|
+
const manifestPath = path.join(projectRoot, ".evalgate", "manifest.json");
|
|
132
|
+
try {
|
|
133
|
+
const content = await fs.readFile(manifestPath, "utf-8");
|
|
134
|
+
return JSON.parse(content);
|
|
135
|
+
}
|
|
136
|
+
catch (_error) {
|
|
137
|
+
return null;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Execute specifications
|
|
142
|
+
*/
|
|
143
|
+
async function executeSpecs(specs) {
|
|
144
|
+
const results = [];
|
|
145
|
+
for (const spec of specs) {
|
|
146
|
+
const result = await executeSpec(spec);
|
|
147
|
+
results.push(result);
|
|
148
|
+
}
|
|
149
|
+
return results;
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Execute individual specification
|
|
153
|
+
*/
|
|
154
|
+
async function executeSpec(spec) {
|
|
155
|
+
const startTime = Date.now();
|
|
156
|
+
try {
|
|
157
|
+
// For now, simulate execution
|
|
158
|
+
// In a real implementation, this would:
|
|
159
|
+
// 1. Load the spec file
|
|
160
|
+
// 2. Execute the defineEval function
|
|
161
|
+
// 3. Capture the result
|
|
162
|
+
// Simulate some work
|
|
163
|
+
await new Promise((resolve) => setTimeout(resolve, Math.random() * 100 + 50));
|
|
164
|
+
// Simulate success/failure (90% success rate for demo)
|
|
165
|
+
const success = Math.random() > 0.1;
|
|
166
|
+
const duration = Date.now() - startTime;
|
|
167
|
+
if (success) {
|
|
168
|
+
return {
|
|
169
|
+
specId: spec.id,
|
|
170
|
+
name: spec.name,
|
|
171
|
+
filePath: spec.filePath,
|
|
172
|
+
result: {
|
|
173
|
+
status: "passed",
|
|
174
|
+
score: Math.random() * 0.3 + 0.7, // 0.7-1.0
|
|
175
|
+
duration,
|
|
176
|
+
},
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
else {
|
|
180
|
+
return {
|
|
181
|
+
specId: spec.id,
|
|
182
|
+
name: spec.name,
|
|
183
|
+
filePath: spec.filePath,
|
|
184
|
+
result: {
|
|
185
|
+
status: "failed",
|
|
186
|
+
error: "Simulated execution failure",
|
|
187
|
+
duration,
|
|
188
|
+
},
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
catch (error) {
|
|
193
|
+
return {
|
|
194
|
+
specId: spec.id,
|
|
195
|
+
name: spec.name,
|
|
196
|
+
filePath: spec.filePath,
|
|
197
|
+
result: {
|
|
198
|
+
status: "failed",
|
|
199
|
+
error: error instanceof Error ? error.message : String(error),
|
|
200
|
+
duration: Date.now() - startTime,
|
|
201
|
+
},
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Calculate summary statistics
|
|
207
|
+
*/
|
|
208
|
+
function calculateSummary(results) {
|
|
209
|
+
const passed = results.filter((r) => r.result.status === "passed").length;
|
|
210
|
+
const failed = results.filter((r) => r.result.status === "failed").length;
|
|
211
|
+
const skipped = results.filter((r) => r.result.status === "skipped").length;
|
|
212
|
+
const passRate = results.length > 0 ? passed / results.length : 0;
|
|
213
|
+
return {
|
|
214
|
+
passed,
|
|
215
|
+
failed,
|
|
216
|
+
skipped,
|
|
217
|
+
passRate,
|
|
218
|
+
};
|
|
219
|
+
}
|
|
220
|
+
/**
|
|
221
|
+
* Write run results to file
|
|
222
|
+
*/
|
|
223
|
+
async function writeRunResults(result, projectRoot = process.cwd()) {
|
|
224
|
+
const evalgateDir = path.join(projectRoot, ".evalgate");
|
|
225
|
+
await fs.mkdir(evalgateDir, { recursive: true });
|
|
226
|
+
// Write last-run.json (existing behavior)
|
|
227
|
+
const lastRunPath = path.join(evalgateDir, "last-run.json");
|
|
228
|
+
await fs.writeFile(lastRunPath, JSON.stringify(result, null, 2), "utf-8");
|
|
229
|
+
// Create runs directory and write timestamped artifact
|
|
230
|
+
if (result.runId) {
|
|
231
|
+
const runsDir = path.join(evalgateDir, "runs");
|
|
232
|
+
await fs.mkdir(runsDir, { recursive: true });
|
|
233
|
+
const timestampedPath = path.join(runsDir, `${result.runId}.json`);
|
|
234
|
+
await fs.writeFile(timestampedPath, JSON.stringify(result, null, 2), "utf-8");
|
|
235
|
+
// Optional: Create latest.json mirror
|
|
236
|
+
const latestPath = path.join(runsDir, "latest.json");
|
|
237
|
+
await fs.writeFile(latestPath, JSON.stringify(result, null, 2), "utf-8");
|
|
238
|
+
}
|
|
239
|
+
console.log(`✅ Run results written to .evalgate/last-run.json`);
|
|
240
|
+
if (result.runId) {
|
|
241
|
+
console.log(`📁 Run artifact: .evalgate/runs/${result.runId}.json`);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
/**
|
|
245
|
+
* Update run index with new run entry
|
|
246
|
+
*/
|
|
247
|
+
async function updateRunIndex(result, projectRoot = process.cwd()) {
|
|
248
|
+
const runsDir = path.join(projectRoot, ".evalgate", "runs");
|
|
249
|
+
const indexPath = path.join(runsDir, "index.json");
|
|
250
|
+
await fs.mkdir(runsDir, { recursive: true });
|
|
251
|
+
// Calculate average score
|
|
252
|
+
const scores = result.results
|
|
253
|
+
.filter((r) => r.result.score !== undefined)
|
|
254
|
+
.map((r) => r.result.score);
|
|
255
|
+
const avgScore = scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : 0;
|
|
256
|
+
// Get git info if available
|
|
257
|
+
let gitSha;
|
|
258
|
+
let branch;
|
|
259
|
+
try {
|
|
260
|
+
gitSha = await getGitSha();
|
|
261
|
+
branch = await getGitBranch();
|
|
262
|
+
}
|
|
263
|
+
catch {
|
|
264
|
+
// Git commands not available, continue without git info
|
|
265
|
+
}
|
|
266
|
+
const indexEntry = {
|
|
267
|
+
runId: result.runId,
|
|
268
|
+
createdAt: result.metadata.startedAt,
|
|
269
|
+
gitSha,
|
|
270
|
+
branch,
|
|
271
|
+
mode: result.metadata.mode,
|
|
272
|
+
specCount: result.results.length,
|
|
273
|
+
passRate: result.summary.passRate,
|
|
274
|
+
avgScore,
|
|
275
|
+
};
|
|
276
|
+
// Read existing index or create new one
|
|
277
|
+
let index = [];
|
|
278
|
+
try {
|
|
279
|
+
const existingContent = await fs.readFile(indexPath, "utf-8");
|
|
280
|
+
index = JSON.parse(existingContent);
|
|
281
|
+
}
|
|
282
|
+
catch (_error) {
|
|
283
|
+
// Index doesn't exist yet, start with empty array
|
|
284
|
+
}
|
|
285
|
+
// Add new entry
|
|
286
|
+
index.push(indexEntry);
|
|
287
|
+
// Sort by creation time (newest first)
|
|
288
|
+
index.sort((a, b) => b.createdAt - a.createdAt);
|
|
289
|
+
// Write to temp file first, then rename for atomicity
|
|
290
|
+
const tempPath = `${indexPath}.tmp`;
|
|
291
|
+
await fs.writeFile(tempPath, JSON.stringify(index, null, 2), "utf-8");
|
|
292
|
+
await fs.rename(tempPath, indexPath);
|
|
293
|
+
}
|
|
294
|
+
/**
|
|
295
|
+
* Get current git SHA
|
|
296
|
+
*/
|
|
297
|
+
async function getGitSha() {
|
|
298
|
+
return new Promise((resolve) => {
|
|
299
|
+
const git = (0, node_child_process_1.spawn)("git", ["rev-parse", "HEAD"], {
|
|
300
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
301
|
+
});
|
|
302
|
+
let output = "";
|
|
303
|
+
git.stdout.on("data", (data) => {
|
|
304
|
+
output += data.toString();
|
|
305
|
+
});
|
|
306
|
+
git.on("close", (code) => {
|
|
307
|
+
if (code === 0 && output.trim()) {
|
|
308
|
+
resolve(output.trim());
|
|
309
|
+
}
|
|
310
|
+
else {
|
|
311
|
+
resolve(undefined);
|
|
312
|
+
}
|
|
313
|
+
});
|
|
314
|
+
});
|
|
315
|
+
}
|
|
316
|
+
/**
|
|
317
|
+
* Get current git branch
|
|
318
|
+
*/
|
|
319
|
+
async function getGitBranch() {
|
|
320
|
+
return new Promise((resolve) => {
|
|
321
|
+
const git = (0, node_child_process_1.spawn)("git", ["rev-parse", "--abbrev-ref", "HEAD"], {
|
|
322
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
323
|
+
});
|
|
324
|
+
let output = "";
|
|
325
|
+
git.stdout.on("data", (data) => {
|
|
326
|
+
output += data.toString();
|
|
327
|
+
});
|
|
328
|
+
git.on("close", (code) => {
|
|
329
|
+
if (code === 0 && output.trim()) {
|
|
330
|
+
resolve(output.trim());
|
|
331
|
+
}
|
|
332
|
+
else {
|
|
333
|
+
resolve(undefined);
|
|
334
|
+
}
|
|
335
|
+
});
|
|
336
|
+
});
|
|
337
|
+
}
|
|
338
|
+
/**
|
|
339
|
+
* Print human-readable results
|
|
340
|
+
*/
|
|
341
|
+
function printHumanResults(result) {
|
|
342
|
+
console.log("\n🏃 Evaluation Run Results");
|
|
343
|
+
console.log(`⏱️ Duration: ${result.metadata.duration}ms`);
|
|
344
|
+
console.log(`📊 Specs: ${result.metadata.executedSpecs}/${result.metadata.totalSpecs} executed`);
|
|
345
|
+
console.log(`🎯 Mode: ${result.metadata.mode}`);
|
|
346
|
+
console.log("\n📈 Summary:");
|
|
347
|
+
console.log(` ✅ Passed: ${result.summary.passed}`);
|
|
348
|
+
console.log(` ❌ Failed: ${result.summary.failed}`);
|
|
349
|
+
console.log(` ⏭️ Skipped: ${result.summary.skipped}`);
|
|
350
|
+
console.log(` 📊 Pass Rate: ${(result.summary.passRate * 100).toFixed(1)}%`);
|
|
351
|
+
console.log("\n📋 Individual Results:");
|
|
352
|
+
for (const spec of result.results) {
|
|
353
|
+
const status = spec.result.status === "passed"
|
|
354
|
+
? "✅"
|
|
355
|
+
: spec.result.status === "failed"
|
|
356
|
+
? "❌"
|
|
357
|
+
: "⏭️";
|
|
358
|
+
const score = spec.result.score
|
|
359
|
+
? ` (${(spec.result.score * 100).toFixed(1)}%)`
|
|
360
|
+
: "";
|
|
361
|
+
const error = spec.result.error ? ` - ${spec.result.error}` : "";
|
|
362
|
+
console.log(` ${status} ${spec.name}${score}${error}`);
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
/**
|
|
366
|
+
* Print JSON results
|
|
367
|
+
*/
|
|
368
|
+
function printJsonResults(result) {
|
|
369
|
+
console.log(JSON.stringify(result, null, 2));
|
|
370
|
+
}
|
|
371
|
+
/**
|
|
372
|
+
* CLI entry point
|
|
373
|
+
*/
|
|
374
|
+
async function runEvaluationsCLI(options) {
|
|
375
|
+
try {
|
|
376
|
+
const result = await runEvaluations(options);
|
|
377
|
+
if (options.format === "json") {
|
|
378
|
+
printJsonResults(result);
|
|
379
|
+
}
|
|
380
|
+
else {
|
|
381
|
+
printHumanResults(result);
|
|
382
|
+
}
|
|
383
|
+
// Exit with appropriate code
|
|
384
|
+
if (result.summary.failed > 0) {
|
|
385
|
+
process.exit(1);
|
|
386
|
+
}
|
|
387
|
+
else {
|
|
388
|
+
process.exit(0);
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
catch (error) {
|
|
392
|
+
console.error("❌ Run failed:", error instanceof Error ? error.message : String(error));
|
|
393
|
+
process.exit(2);
|
|
394
|
+
}
|
|
395
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* evalgate share — Create a share link for a run.
|
|
3
|
+
* Usage: evalgate share --scope run --expires 7d
|
|
4
|
+
*/
|
|
5
|
+
export type ShareArgs = {
|
|
6
|
+
baseUrl: string;
|
|
7
|
+
apiKey: string;
|
|
8
|
+
evaluationId: string;
|
|
9
|
+
runId: number;
|
|
10
|
+
scope: "run";
|
|
11
|
+
expires: string;
|
|
12
|
+
expiresInDays: number;
|
|
13
|
+
};
|
|
14
|
+
export declare function parseShareArgs(argv: string[]): ShareArgs | {
|
|
15
|
+
error: string;
|
|
16
|
+
};
|
|
17
|
+
export declare function runShare(args: ShareArgs): Promise<number>;
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* evalgate share — Create a share link for a run.
|
|
4
|
+
* Usage: evalgate share --scope run --expires 7d
|
|
5
|
+
*/
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.parseShareArgs = parseShareArgs;
|
|
8
|
+
exports.runShare = runShare;
|
|
9
|
+
const api_1 = require("./api");
|
|
10
|
+
function parseExpires(spec) {
|
|
11
|
+
const m = spec.match(/^(\d+)(d|h|m|s)$/i);
|
|
12
|
+
if (!m)
|
|
13
|
+
return null;
|
|
14
|
+
const n = parseInt(m[1], 10);
|
|
15
|
+
const unit = m[2].toLowerCase();
|
|
16
|
+
if (unit === "d")
|
|
17
|
+
return n;
|
|
18
|
+
if (unit === "h")
|
|
19
|
+
return n / 24;
|
|
20
|
+
if (unit === "m")
|
|
21
|
+
return n / (24 * 60);
|
|
22
|
+
if (unit === "s")
|
|
23
|
+
return n / (24 * 60 * 60);
|
|
24
|
+
return null;
|
|
25
|
+
}
|
|
26
|
+
function parseShareArgs(argv) {
|
|
27
|
+
const args = {};
|
|
28
|
+
for (let i = 0; i < argv.length; i++) {
|
|
29
|
+
const arg = argv[i];
|
|
30
|
+
if (arg.startsWith("--")) {
|
|
31
|
+
const key = arg.slice(2);
|
|
32
|
+
const next = argv[i + 1];
|
|
33
|
+
if (next !== undefined && !next.startsWith("--")) {
|
|
34
|
+
args[key] = next;
|
|
35
|
+
i++;
|
|
36
|
+
}
|
|
37
|
+
else {
|
|
38
|
+
args[key] = "true";
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
const baseUrl = args.baseUrl ||
|
|
43
|
+
process.env.EVALGATE_BASE_URL ||
|
|
44
|
+
process.env.EVALAI_BASE_URL ||
|
|
45
|
+
"http://localhost:3000";
|
|
46
|
+
const apiKey = args.apiKey ||
|
|
47
|
+
process.env.EVALGATE_API_KEY ||
|
|
48
|
+
process.env.EVALAI_API_KEY ||
|
|
49
|
+
"";
|
|
50
|
+
const evaluationId = args.evaluationId || "";
|
|
51
|
+
const runId = args.runId ? parseInt(args.runId, 10) : NaN;
|
|
52
|
+
const scope = args.scope === "run" ? "run" : "run";
|
|
53
|
+
const expires = args.expires || "7d";
|
|
54
|
+
if (!apiKey)
|
|
55
|
+
return { error: "Error: --apiKey or EVALGATE_API_KEY is required" };
|
|
56
|
+
if (!evaluationId)
|
|
57
|
+
return { error: "Error: --evaluationId is required" };
|
|
58
|
+
if (Number.isNaN(runId) || runId < 1)
|
|
59
|
+
return {
|
|
60
|
+
error: "Error: --runId is required and must be a positive number",
|
|
61
|
+
};
|
|
62
|
+
const expiresInDays = parseExpires(expires);
|
|
63
|
+
if (expiresInDays == null || expiresInDays <= 0)
|
|
64
|
+
return { error: "Error: --expires must be e.g. 7d, 24h, 60m, 1s" };
|
|
65
|
+
return {
|
|
66
|
+
baseUrl,
|
|
67
|
+
apiKey,
|
|
68
|
+
evaluationId,
|
|
69
|
+
runId,
|
|
70
|
+
scope,
|
|
71
|
+
expires,
|
|
72
|
+
expiresInDays,
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
async function runShare(args) {
|
|
76
|
+
const exportRes = await (0, api_1.fetchRunExport)(args.baseUrl, args.apiKey, args.evaluationId, args.runId);
|
|
77
|
+
if (!exportRes.ok) {
|
|
78
|
+
console.error(`EvalGate share: failed to fetch export — ${exportRes.status} ${exportRes.body}`);
|
|
79
|
+
return 1;
|
|
80
|
+
}
|
|
81
|
+
const publishRes = await (0, api_1.publishShare)(args.baseUrl, args.apiKey, args.evaluationId, exportRes.exportData, args.runId, { expiresInDays: args.expiresInDays });
|
|
82
|
+
if (!publishRes.ok) {
|
|
83
|
+
console.error(`EvalGate share: failed to publish — ${publishRes.status} ${publishRes.body}`);
|
|
84
|
+
return 1;
|
|
85
|
+
}
|
|
86
|
+
const shareUrl = publishRes.data.shareUrl ??
|
|
87
|
+
`${args.baseUrl.replace(/\/$/, "")}/share/${publishRes.data.shareId}`;
|
|
88
|
+
console.log(`Share link created (expires in ${args.expires}):`);
|
|
89
|
+
console.log(shareUrl);
|
|
90
|
+
return 0;
|
|
91
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* evalgate upgrade --full — Upgrade from Tier 1 (built-in gate) to Tier 2 (full gate)
|
|
3
|
+
*
|
|
4
|
+
* What it does:
|
|
5
|
+
* 1. Adds full regression gate script (scripts/regression-gate.ts)
|
|
6
|
+
* 2. Adds baseline governance workflow (.github/workflows/baseline-governance.yml)
|
|
7
|
+
* 3. Updates package.json with eval:regression-gate + eval:baseline-update scripts
|
|
8
|
+
* 4. Updates .github/workflows/evalgate-gate.yml to use project mode
|
|
9
|
+
* 5. Prints next steps
|
|
10
|
+
*/
|
|
11
|
+
export interface UpgradeArgs {
|
|
12
|
+
full: boolean;
|
|
13
|
+
}
|
|
14
|
+
export declare function parseUpgradeArgs(argv: string[]): UpgradeArgs;
|
|
15
|
+
export declare function runUpgrade(argv: string[]): number;
|