@pauly4010/evalai-sdk 1.8.0 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +54 -0
- package/dist/cli/ci.d.ts +45 -0
- package/dist/cli/ci.js +192 -0
- package/dist/cli/diff.d.ts +173 -0
- package/dist/cli/diff.js +680 -0
- package/dist/cli/discover.d.ts +84 -0
- package/dist/cli/discover.js +408 -0
- package/dist/cli/doctor.js +19 -10
- package/dist/cli/env.d.ts +21 -0
- package/dist/cli/env.js +42 -0
- package/dist/cli/explain.js +143 -37
- package/dist/cli/impact-analysis.d.ts +63 -0
- package/dist/cli/impact-analysis.js +251 -0
- package/dist/cli/index.js +173 -0
- package/dist/cli/manifest.d.ts +105 -0
- package/dist/cli/manifest.js +275 -0
- package/dist/cli/migrate.d.ts +41 -0
- package/dist/cli/migrate.js +349 -0
- package/dist/cli/print-config.js +18 -14
- package/dist/cli/run.d.ts +101 -0
- package/dist/cli/run.js +389 -0
- package/dist/cli/workspace.d.ts +28 -0
- package/dist/cli/workspace.js +58 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.js +30 -5
- package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
- package/dist/runtime/adapters/config-to-dsl.js +391 -0
- package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
- package/dist/runtime/adapters/testsuite-to-dsl.js +271 -0
- package/dist/runtime/context.d.ts +26 -0
- package/dist/runtime/context.js +74 -0
- package/dist/runtime/eval.d.ts +46 -0
- package/dist/runtime/eval.js +237 -0
- package/dist/runtime/execution-mode.d.ts +80 -0
- package/dist/runtime/execution-mode.js +353 -0
- package/dist/runtime/executor.d.ts +16 -0
- package/dist/runtime/executor.js +152 -0
- package/dist/runtime/registry.d.ts +78 -0
- package/dist/runtime/registry.js +416 -0
- package/dist/runtime/run-report.d.ts +202 -0
- package/dist/runtime/run-report.js +220 -0
- package/dist/runtime/types.d.ts +356 -0
- package/dist/runtime/types.js +76 -0
- package/dist/testing.d.ts +65 -0
- package/dist/testing.js +42 -0
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/package.json +4 -3
package/dist/cli/run.js
ADDED
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* TICKET 4 — Unified evalai run CLI Command
|
|
4
|
+
*
|
|
5
|
+
* Goal: Consolidated execution interface that consumes manifest
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Manifest loading and spec filtering
|
|
9
|
+
* - --impacted-only integration with impact analysis
|
|
10
|
+
* - Local executor integration
|
|
11
|
+
* - .evalai/last-run.json output
|
|
12
|
+
* - Legacy mode compatibility
|
|
13
|
+
*/
|
|
14
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
15
|
+
if (k2 === undefined) k2 = k;
|
|
16
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
17
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
18
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
19
|
+
}
|
|
20
|
+
Object.defineProperty(o, k2, desc);
|
|
21
|
+
}) : (function(o, m, k, k2) {
|
|
22
|
+
if (k2 === undefined) k2 = k;
|
|
23
|
+
o[k2] = m[k];
|
|
24
|
+
}));
|
|
25
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
26
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
27
|
+
}) : function(o, v) {
|
|
28
|
+
o["default"] = v;
|
|
29
|
+
});
|
|
30
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
31
|
+
var ownKeys = function(o) {
|
|
32
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
33
|
+
var ar = [];
|
|
34
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
35
|
+
return ar;
|
|
36
|
+
};
|
|
37
|
+
return ownKeys(o);
|
|
38
|
+
};
|
|
39
|
+
return function (mod) {
|
|
40
|
+
if (mod && mod.__esModule) return mod;
|
|
41
|
+
var result = {};
|
|
42
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
43
|
+
__setModuleDefault(result, mod);
|
|
44
|
+
return result;
|
|
45
|
+
};
|
|
46
|
+
})();
|
|
47
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
48
|
+
exports.runEvaluations = runEvaluations;
|
|
49
|
+
exports.printHumanResults = printHumanResults;
|
|
50
|
+
exports.printJsonResults = printJsonResults;
|
|
51
|
+
exports.runEvaluationsCLI = runEvaluationsCLI;
|
|
52
|
+
const fs = __importStar(require("node:fs/promises"));
|
|
53
|
+
const path = __importStar(require("node:path"));
|
|
54
|
+
const node_child_process_1 = require("node:child_process");
|
|
55
|
+
const impact_analysis_1 = require("./impact-analysis");
|
|
56
|
+
/**
|
|
57
|
+
* Generate deterministic run ID
|
|
58
|
+
*/
|
|
59
|
+
function generateRunId() {
|
|
60
|
+
const timestamp = Date.now().toString(36);
|
|
61
|
+
const random = Math.random().toString(36).substring(2, 8);
|
|
62
|
+
return `run-${timestamp}-${random}`;
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Run evaluation specifications
|
|
66
|
+
*/
|
|
67
|
+
async function runEvaluations(options, projectRoot = process.cwd()) {
|
|
68
|
+
const startTime = Date.now();
|
|
69
|
+
// Load manifest
|
|
70
|
+
const manifest = await loadManifest(projectRoot);
|
|
71
|
+
if (!manifest) {
|
|
72
|
+
throw new Error("No evaluation manifest found. Run 'evalai discover --manifest' first.");
|
|
73
|
+
}
|
|
74
|
+
// Determine which specs to run
|
|
75
|
+
let specsToRun = manifest.specs;
|
|
76
|
+
if (options.impactedOnly && options.baseBranch) {
|
|
77
|
+
// Run impact analysis first
|
|
78
|
+
const impactResult = await (0, impact_analysis_1.runImpactAnalysis)({
|
|
79
|
+
baseBranch: options.baseBranch,
|
|
80
|
+
}, projectRoot);
|
|
81
|
+
// Filter to impacted specs only
|
|
82
|
+
const impactedSpecIds = new Set(impactResult.impactedSpecIds);
|
|
83
|
+
specsToRun = manifest.specs.filter((spec) => impactedSpecIds.has(spec.id));
|
|
84
|
+
console.log(`🎯 Running ${specsToRun.length} impacted specs (out of ${manifest.specs.length} total)`);
|
|
85
|
+
}
|
|
86
|
+
else if (options.specIds && options.specIds.length > 0) {
|
|
87
|
+
// Filter to specific spec IDs
|
|
88
|
+
const specIdSet = new Set(options.specIds);
|
|
89
|
+
specsToRun = manifest.specs.filter((spec) => specIdSet.has(spec.id));
|
|
90
|
+
console.log(`🎯 Running ${specsToRun.length} specific specs`);
|
|
91
|
+
}
|
|
92
|
+
else if (options.specIds && options.specIds.length === 0) {
|
|
93
|
+
// Explicit empty list means run nothing
|
|
94
|
+
specsToRun = [];
|
|
95
|
+
console.log(`🎯 Running 0 specs (explicit empty list)`);
|
|
96
|
+
}
|
|
97
|
+
else {
|
|
98
|
+
console.log(`🎯 Running all ${specsToRun.length} specs`);
|
|
99
|
+
}
|
|
100
|
+
// Execute specs
|
|
101
|
+
const results = await executeSpecs(specsToRun);
|
|
102
|
+
const completedAt = Date.now();
|
|
103
|
+
const duration = completedAt - startTime;
|
|
104
|
+
// Calculate summary
|
|
105
|
+
const summary = calculateSummary(results);
|
|
106
|
+
const runResult = {
|
|
107
|
+
schemaVersion: 1,
|
|
108
|
+
runId: generateRunId(),
|
|
109
|
+
metadata: {
|
|
110
|
+
startedAt: startTime,
|
|
111
|
+
completedAt,
|
|
112
|
+
duration,
|
|
113
|
+
totalSpecs: manifest.specs.length,
|
|
114
|
+
executedSpecs: specsToRun.length,
|
|
115
|
+
mode: manifest.runtime.mode,
|
|
116
|
+
},
|
|
117
|
+
results,
|
|
118
|
+
summary,
|
|
119
|
+
};
|
|
120
|
+
// Write results if requested
|
|
121
|
+
if (options.writeResults) {
|
|
122
|
+
await writeRunResults(runResult, projectRoot);
|
|
123
|
+
await updateRunIndex(runResult, projectRoot);
|
|
124
|
+
}
|
|
125
|
+
return runResult;
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Load evaluation manifest
|
|
129
|
+
*/
|
|
130
|
+
async function loadManifest(projectRoot = process.cwd()) {
|
|
131
|
+
const manifestPath = path.join(projectRoot, ".evalai", "manifest.json");
|
|
132
|
+
try {
|
|
133
|
+
const content = await fs.readFile(manifestPath, "utf-8");
|
|
134
|
+
return JSON.parse(content);
|
|
135
|
+
}
|
|
136
|
+
catch (error) {
|
|
137
|
+
return null;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Execute specifications
|
|
142
|
+
*/
|
|
143
|
+
async function executeSpecs(specs) {
|
|
144
|
+
const results = [];
|
|
145
|
+
for (const spec of specs) {
|
|
146
|
+
const result = await executeSpec(spec);
|
|
147
|
+
results.push(result);
|
|
148
|
+
}
|
|
149
|
+
return results;
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Execute individual specification
|
|
153
|
+
*/
|
|
154
|
+
async function executeSpec(spec) {
|
|
155
|
+
const startTime = Date.now();
|
|
156
|
+
try {
|
|
157
|
+
// For now, simulate execution
|
|
158
|
+
// In a real implementation, this would:
|
|
159
|
+
// 1. Load the spec file
|
|
160
|
+
// 2. Execute the defineEval function
|
|
161
|
+
// 3. Capture the result
|
|
162
|
+
// Simulate some work
|
|
163
|
+
await new Promise((resolve) => setTimeout(resolve, Math.random() * 100 + 50));
|
|
164
|
+
// Simulate success/failure (90% success rate for demo)
|
|
165
|
+
const success = Math.random() > 0.1;
|
|
166
|
+
const duration = Date.now() - startTime;
|
|
167
|
+
if (success) {
|
|
168
|
+
return {
|
|
169
|
+
specId: spec.id,
|
|
170
|
+
name: spec.name,
|
|
171
|
+
filePath: spec.filePath,
|
|
172
|
+
result: {
|
|
173
|
+
status: "passed",
|
|
174
|
+
score: Math.random() * 0.3 + 0.7, // 0.7-1.0
|
|
175
|
+
duration,
|
|
176
|
+
},
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
else {
|
|
180
|
+
return {
|
|
181
|
+
specId: spec.id,
|
|
182
|
+
name: spec.name,
|
|
183
|
+
filePath: spec.filePath,
|
|
184
|
+
result: {
|
|
185
|
+
status: "failed",
|
|
186
|
+
error: "Simulated execution failure",
|
|
187
|
+
duration,
|
|
188
|
+
},
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
catch (error) {
|
|
193
|
+
return {
|
|
194
|
+
specId: spec.id,
|
|
195
|
+
name: spec.name,
|
|
196
|
+
filePath: spec.filePath,
|
|
197
|
+
result: {
|
|
198
|
+
status: "failed",
|
|
199
|
+
error: error instanceof Error ? error.message : String(error),
|
|
200
|
+
duration: Date.now() - startTime,
|
|
201
|
+
},
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Calculate summary statistics
|
|
207
|
+
*/
|
|
208
|
+
function calculateSummary(results) {
|
|
209
|
+
const passed = results.filter((r) => r.result.status === "passed").length;
|
|
210
|
+
const failed = results.filter((r) => r.result.status === "failed").length;
|
|
211
|
+
const skipped = results.filter((r) => r.result.status === "skipped").length;
|
|
212
|
+
const passRate = results.length > 0 ? passed / results.length : 0;
|
|
213
|
+
return {
|
|
214
|
+
passed,
|
|
215
|
+
failed,
|
|
216
|
+
skipped,
|
|
217
|
+
passRate,
|
|
218
|
+
};
|
|
219
|
+
}
|
|
220
|
+
/**
|
|
221
|
+
* Write run results to file
|
|
222
|
+
*/
|
|
223
|
+
async function writeRunResults(result, projectRoot = process.cwd()) {
|
|
224
|
+
const evalaiDir = path.join(projectRoot, ".evalai");
|
|
225
|
+
await fs.mkdir(evalaiDir, { recursive: true });
|
|
226
|
+
// Write last-run.json (existing behavior)
|
|
227
|
+
const lastRunPath = path.join(evalaiDir, "last-run.json");
|
|
228
|
+
await fs.writeFile(lastRunPath, JSON.stringify(result, null, 2), "utf-8");
|
|
229
|
+
// Create runs directory and write timestamped artifact
|
|
230
|
+
if (result.runId) {
|
|
231
|
+
const runsDir = path.join(evalaiDir, "runs");
|
|
232
|
+
await fs.mkdir(runsDir, { recursive: true });
|
|
233
|
+
const timestampedPath = path.join(runsDir, `${result.runId}.json`);
|
|
234
|
+
await fs.writeFile(timestampedPath, JSON.stringify(result, null, 2), "utf-8");
|
|
235
|
+
// Optional: Create latest.json mirror
|
|
236
|
+
const latestPath = path.join(runsDir, "latest.json");
|
|
237
|
+
await fs.writeFile(latestPath, JSON.stringify(result, null, 2), "utf-8");
|
|
238
|
+
}
|
|
239
|
+
console.log(`✅ Run results written to .evalai/last-run.json`);
|
|
240
|
+
if (result.runId) {
|
|
241
|
+
console.log(`📁 Run artifact: .evalai/runs/${result.runId}.json`);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
/**
|
|
245
|
+
* Update run index with new run entry
|
|
246
|
+
*/
|
|
247
|
+
async function updateRunIndex(result, projectRoot = process.cwd()) {
|
|
248
|
+
const runsDir = path.join(projectRoot, ".evalai", "runs");
|
|
249
|
+
const indexPath = path.join(runsDir, "index.json");
|
|
250
|
+
await fs.mkdir(runsDir, { recursive: true });
|
|
251
|
+
// Calculate average score
|
|
252
|
+
const scores = result.results
|
|
253
|
+
.filter((r) => r.result.score !== undefined)
|
|
254
|
+
.map((r) => r.result.score);
|
|
255
|
+
const avgScore = scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : 0;
|
|
256
|
+
// Get git info if available
|
|
257
|
+
let gitSha;
|
|
258
|
+
let branch;
|
|
259
|
+
try {
|
|
260
|
+
gitSha = await getGitSha();
|
|
261
|
+
branch = await getGitBranch();
|
|
262
|
+
}
|
|
263
|
+
catch {
|
|
264
|
+
// Git commands not available, continue without git info
|
|
265
|
+
}
|
|
266
|
+
const indexEntry = {
|
|
267
|
+
runId: result.runId,
|
|
268
|
+
createdAt: result.metadata.startedAt,
|
|
269
|
+
gitSha,
|
|
270
|
+
branch,
|
|
271
|
+
mode: result.metadata.mode,
|
|
272
|
+
specCount: result.results.length,
|
|
273
|
+
passRate: result.summary.passRate,
|
|
274
|
+
avgScore,
|
|
275
|
+
};
|
|
276
|
+
// Read existing index or create new one
|
|
277
|
+
let index = [];
|
|
278
|
+
try {
|
|
279
|
+
const existingContent = await fs.readFile(indexPath, "utf-8");
|
|
280
|
+
index = JSON.parse(existingContent);
|
|
281
|
+
}
|
|
282
|
+
catch (error) {
|
|
283
|
+
// Index doesn't exist yet, start with empty array
|
|
284
|
+
}
|
|
285
|
+
// Add new entry
|
|
286
|
+
index.push(indexEntry);
|
|
287
|
+
// Sort by creation time (newest first)
|
|
288
|
+
index.sort((a, b) => b.createdAt - a.createdAt);
|
|
289
|
+
// Write to temp file first, then rename for atomicity
|
|
290
|
+
const tempPath = `${indexPath}.tmp`;
|
|
291
|
+
await fs.writeFile(tempPath, JSON.stringify(index, null, 2), "utf-8");
|
|
292
|
+
await fs.rename(tempPath, indexPath);
|
|
293
|
+
}
|
|
294
|
+
/**
|
|
295
|
+
* Get current git SHA
|
|
296
|
+
*/
|
|
297
|
+
async function getGitSha() {
|
|
298
|
+
return new Promise((resolve) => {
|
|
299
|
+
const git = (0, node_child_process_1.spawn)("git", ["rev-parse", "HEAD"], {
|
|
300
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
301
|
+
});
|
|
302
|
+
let output = "";
|
|
303
|
+
git.stdout.on("data", (data) => {
|
|
304
|
+
output += data.toString();
|
|
305
|
+
});
|
|
306
|
+
git.on("close", (code) => {
|
|
307
|
+
if (code === 0 && output.trim()) {
|
|
308
|
+
resolve(output.trim());
|
|
309
|
+
}
|
|
310
|
+
else {
|
|
311
|
+
resolve(undefined);
|
|
312
|
+
}
|
|
313
|
+
});
|
|
314
|
+
});
|
|
315
|
+
}
|
|
316
|
+
/**
|
|
317
|
+
* Get current git branch
|
|
318
|
+
*/
|
|
319
|
+
async function getGitBranch() {
|
|
320
|
+
return new Promise((resolve) => {
|
|
321
|
+
const git = (0, node_child_process_1.spawn)("git", ["rev-parse", "--abbrev-ref", "HEAD"], {
|
|
322
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
323
|
+
});
|
|
324
|
+
let output = "";
|
|
325
|
+
git.stdout.on("data", (data) => {
|
|
326
|
+
output += data.toString();
|
|
327
|
+
});
|
|
328
|
+
git.on("close", (code) => {
|
|
329
|
+
if (code === 0 && output.trim()) {
|
|
330
|
+
resolve(output.trim());
|
|
331
|
+
}
|
|
332
|
+
else {
|
|
333
|
+
resolve(undefined);
|
|
334
|
+
}
|
|
335
|
+
});
|
|
336
|
+
});
|
|
337
|
+
}
|
|
338
|
+
/**
|
|
339
|
+
* Print human-readable results
|
|
340
|
+
*/
|
|
341
|
+
function printHumanResults(result) {
|
|
342
|
+
console.log("\n🏃 Evaluation Run Results");
|
|
343
|
+
console.log(`⏱️ Duration: ${result.metadata.duration}ms`);
|
|
344
|
+
console.log(`📊 Specs: ${result.metadata.executedSpecs}/${result.metadata.totalSpecs} executed`);
|
|
345
|
+
console.log(`🎯 Mode: ${result.metadata.mode}`);
|
|
346
|
+
console.log("\n📈 Summary:");
|
|
347
|
+
console.log(` ✅ Passed: ${result.summary.passed}`);
|
|
348
|
+
console.log(` ❌ Failed: ${result.summary.failed}`);
|
|
349
|
+
console.log(` ⏭️ Skipped: ${result.summary.skipped}`);
|
|
350
|
+
console.log(` 📊 Pass Rate: ${(result.summary.passRate * 100).toFixed(1)}%`);
|
|
351
|
+
console.log("\n📋 Individual Results:");
|
|
352
|
+
for (const spec of result.results) {
|
|
353
|
+
const status = spec.result.status === "passed" ? "✅" : spec.result.status === "failed" ? "❌" : "⏭️";
|
|
354
|
+
const score = spec.result.score ? ` (${(spec.result.score * 100).toFixed(1)}%)` : "";
|
|
355
|
+
const error = spec.result.error ? ` - ${spec.result.error}` : "";
|
|
356
|
+
console.log(` ${status} ${spec.name}${score}${error}`);
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
/**
|
|
360
|
+
* Print JSON results
|
|
361
|
+
*/
|
|
362
|
+
function printJsonResults(result) {
|
|
363
|
+
console.log(JSON.stringify(result, null, 2));
|
|
364
|
+
}
|
|
365
|
+
/**
|
|
366
|
+
* CLI entry point
|
|
367
|
+
*/
|
|
368
|
+
async function runEvaluationsCLI(options) {
|
|
369
|
+
try {
|
|
370
|
+
const result = await runEvaluations(options);
|
|
371
|
+
if (options.format === "json") {
|
|
372
|
+
printJsonResults(result);
|
|
373
|
+
}
|
|
374
|
+
else {
|
|
375
|
+
printHumanResults(result);
|
|
376
|
+
}
|
|
377
|
+
// Exit with appropriate code
|
|
378
|
+
if (result.summary.failed > 0) {
|
|
379
|
+
process.exit(1);
|
|
380
|
+
}
|
|
381
|
+
else {
|
|
382
|
+
process.exit(0);
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
catch (error) {
|
|
386
|
+
console.error("❌ Run failed:", error instanceof Error ? error.message : String(error));
|
|
387
|
+
process.exit(2);
|
|
388
|
+
}
|
|
389
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE-402: Centralized .evalai workspace resolution
|
|
3
|
+
*
|
|
4
|
+
* Provides unified workspace path resolution for all EvalAI CLI commands
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* EvalAI workspace paths
|
|
8
|
+
*/
|
|
9
|
+
export interface EvalWorkspace {
|
|
10
|
+
/** Project root directory */
|
|
11
|
+
root: string;
|
|
12
|
+
/** .evalai directory */
|
|
13
|
+
evalaiDir: string;
|
|
14
|
+
/** runs directory */
|
|
15
|
+
runsDir: string;
|
|
16
|
+
/** manifest.json path */
|
|
17
|
+
manifestPath: string;
|
|
18
|
+
/** last-run.json path */
|
|
19
|
+
lastRunPath: string;
|
|
20
|
+
/** runs/index.json path */
|
|
21
|
+
indexPath: string;
|
|
22
|
+
/** baseline-run.json path */
|
|
23
|
+
baselinePath: string;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Resolve EvalAI workspace paths
|
|
27
|
+
*/
|
|
28
|
+
export declare function resolveEvalWorkspace(projectRoot?: string): EvalWorkspace;
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* CORE-402: Centralized .evalai workspace resolution
|
|
4
|
+
*
|
|
5
|
+
* Provides unified workspace path resolution for all EvalAI CLI commands
|
|
6
|
+
*/
|
|
7
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
8
|
+
if (k2 === undefined) k2 = k;
|
|
9
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
10
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
11
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
12
|
+
}
|
|
13
|
+
Object.defineProperty(o, k2, desc);
|
|
14
|
+
}) : (function(o, m, k, k2) {
|
|
15
|
+
if (k2 === undefined) k2 = k;
|
|
16
|
+
o[k2] = m[k];
|
|
17
|
+
}));
|
|
18
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
19
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
20
|
+
}) : function(o, v) {
|
|
21
|
+
o["default"] = v;
|
|
22
|
+
});
|
|
23
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
24
|
+
var ownKeys = function(o) {
|
|
25
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
26
|
+
var ar = [];
|
|
27
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
28
|
+
return ar;
|
|
29
|
+
};
|
|
30
|
+
return ownKeys(o);
|
|
31
|
+
};
|
|
32
|
+
return function (mod) {
|
|
33
|
+
if (mod && mod.__esModule) return mod;
|
|
34
|
+
var result = {};
|
|
35
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
36
|
+
__setModuleDefault(result, mod);
|
|
37
|
+
return result;
|
|
38
|
+
};
|
|
39
|
+
})();
|
|
40
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
41
|
+
exports.resolveEvalWorkspace = resolveEvalWorkspace;
|
|
42
|
+
const path = __importStar(require("node:path"));
|
|
43
|
+
/**
|
|
44
|
+
* Resolve EvalAI workspace paths
|
|
45
|
+
*/
|
|
46
|
+
function resolveEvalWorkspace(projectRoot = process.cwd()) {
|
|
47
|
+
const evalaiDir = path.join(projectRoot, ".evalai");
|
|
48
|
+
const runsDir = path.join(evalaiDir, "runs");
|
|
49
|
+
return {
|
|
50
|
+
root: projectRoot,
|
|
51
|
+
evalaiDir,
|
|
52
|
+
runsDir,
|
|
53
|
+
manifestPath: path.join(evalaiDir, "manifest.json"),
|
|
54
|
+
lastRunPath: path.join(evalaiDir, "last-run.json"),
|
|
55
|
+
indexPath: path.join(runsDir, "index.json"),
|
|
56
|
+
baselinePath: path.join(evalaiDir, "baseline-run.json"),
|
|
57
|
+
};
|
|
58
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -14,6 +14,12 @@ export { containsAllRequiredFields, containsJSON, containsKeywords, containsLang
|
|
|
14
14
|
import { createContext, EvalContext, getCurrentContext, withContext } from "./context";
|
|
15
15
|
export { createContext, getCurrentContext as getContext, withContext, EvalContext as ContextManager, };
|
|
16
16
|
export { createTestSuite, type TestCaseResult, TestSuite, TestSuiteCase, TestSuiteCaseResult, TestSuiteConfig, TestSuiteResult, } from "./testing";
|
|
17
|
+
export { defineEval, evalai, defineSuite, createContext as createEvalContext, createResult, } from "./runtime/eval";
|
|
18
|
+
export { createEvalRuntime, getActiveRuntime, setActiveRuntime, disposeActiveRuntime, } from "./runtime/registry";
|
|
19
|
+
export { createLocalExecutor, defaultLocalExecutor, } from "./runtime/executor";
|
|
20
|
+
export { mergeContexts, cloneContext, validateContext, } from "./runtime/context";
|
|
21
|
+
export type { EvalSpec, EvalContext, EvalResult, EvalOptions, EvalRuntime, EvalExecutor, EvalExecutorInterface, LocalExecutor, CloudExecutor, WorkerExecutor, SpecConfig, SpecOptions, DefineEvalFunction, ExecutorCapabilities, } from "./runtime/types";
|
|
22
|
+
export { EvalRuntimeError, SpecRegistrationError, SpecExecutionError, RuntimeError, } from "./runtime/types";
|
|
17
23
|
import { compareWithSnapshot, snapshot } from "./snapshot";
|
|
18
24
|
export { snapshot, compareWithSnapshot, snapshot as saveSnapshot, compareWithSnapshot as compareSnapshots, };
|
|
19
25
|
import type { ExportFormat } from "./export";
|
package/dist/index.js
CHANGED
|
@@ -8,8 +8,8 @@
|
|
|
8
8
|
* @packageDocumentation
|
|
9
9
|
*/
|
|
10
10
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
11
|
-
exports.
|
|
12
|
-
exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginate = void 0;
|
|
11
|
+
exports.SpecExecutionError = exports.SpecRegistrationError = exports.EvalRuntimeError = exports.validateContext = exports.cloneContext = exports.mergeContexts = exports.defaultLocalExecutor = exports.createLocalExecutor = exports.disposeActiveRuntime = exports.setActiveRuntime = exports.getActiveRuntime = exports.createEvalRuntime = exports.createResult = exports.createEvalContext = exports.defineSuite = exports.evalai = exports.defineEval = exports.TestSuite = exports.createTestSuite = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.withinRange = exports.similarTo = exports.respondedWithinTime = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntax = exports.hasSentiment = exports.hasReadabilityScore = exports.hasNoToxicity = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracy = exports.followsInstructions = exports.expect = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalAIError = exports.AIEvalClient = void 0;
|
|
12
|
+
exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginate = exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.RequestCache = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.compareSnapshots = exports.saveSnapshot = exports.compareWithSnapshot = exports.snapshot = exports.RuntimeError = void 0;
|
|
13
13
|
// Main SDK exports
|
|
14
14
|
var client_1 = require("./client");
|
|
15
15
|
Object.defineProperty(exports, "AIEvalClient", { enumerable: true, get: function () { return client_1.AIEvalClient; } });
|
|
@@ -49,10 +49,35 @@ Object.defineProperty(exports, "createContext", { enumerable: true, get: functio
|
|
|
49
49
|
Object.defineProperty(exports, "ContextManager", { enumerable: true, get: function () { return context_1.EvalContext; } });
|
|
50
50
|
Object.defineProperty(exports, "getContext", { enumerable: true, get: function () { return context_1.getCurrentContext; } });
|
|
51
51
|
Object.defineProperty(exports, "withContext", { enumerable: true, get: function () { return context_1.withContext; } });
|
|
52
|
-
// Test suite builder (Tier 2.7)
|
|
52
|
+
// Test suite builder (Tier 2.7) - BACKWARD COMPATIBILITY LAYER
|
|
53
53
|
var testing_1 = require("./testing");
|
|
54
54
|
Object.defineProperty(exports, "createTestSuite", { enumerable: true, get: function () { return testing_1.createTestSuite; } });
|
|
55
55
|
Object.defineProperty(exports, "TestSuite", { enumerable: true, get: function () { return testing_1.TestSuite; } });
|
|
56
|
+
// LAYER 1: Runtime Foundation - NEW PROGRAMMING MODEL
|
|
57
|
+
var eval_1 = require("./runtime/eval");
|
|
58
|
+
Object.defineProperty(exports, "defineEval", { enumerable: true, get: function () { return eval_1.defineEval; } });
|
|
59
|
+
Object.defineProperty(exports, "evalai", { enumerable: true, get: function () { return eval_1.evalai; } });
|
|
60
|
+
Object.defineProperty(exports, "defineSuite", { enumerable: true, get: function () { return eval_1.defineSuite; } });
|
|
61
|
+
Object.defineProperty(exports, "createEvalContext", { enumerable: true, get: function () { return eval_1.createContext; } });
|
|
62
|
+
Object.defineProperty(exports, "createResult", { enumerable: true, get: function () { return eval_1.createResult; } });
|
|
63
|
+
var registry_1 = require("./runtime/registry");
|
|
64
|
+
Object.defineProperty(exports, "createEvalRuntime", { enumerable: true, get: function () { return registry_1.createEvalRuntime; } });
|
|
65
|
+
Object.defineProperty(exports, "getActiveRuntime", { enumerable: true, get: function () { return registry_1.getActiveRuntime; } });
|
|
66
|
+
Object.defineProperty(exports, "setActiveRuntime", { enumerable: true, get: function () { return registry_1.setActiveRuntime; } });
|
|
67
|
+
Object.defineProperty(exports, "disposeActiveRuntime", { enumerable: true, get: function () { return registry_1.disposeActiveRuntime; } });
|
|
68
|
+
var executor_1 = require("./runtime/executor");
|
|
69
|
+
Object.defineProperty(exports, "createLocalExecutor", { enumerable: true, get: function () { return executor_1.createLocalExecutor; } });
|
|
70
|
+
Object.defineProperty(exports, "defaultLocalExecutor", { enumerable: true, get: function () { return executor_1.defaultLocalExecutor; } });
|
|
71
|
+
var context_2 = require("./runtime/context");
|
|
72
|
+
Object.defineProperty(exports, "mergeContexts", { enumerable: true, get: function () { return context_2.mergeContexts; } });
|
|
73
|
+
Object.defineProperty(exports, "cloneContext", { enumerable: true, get: function () { return context_2.cloneContext; } });
|
|
74
|
+
Object.defineProperty(exports, "validateContext", { enumerable: true, get: function () { return context_2.validateContext; } });
|
|
75
|
+
// Runtime errors
|
|
76
|
+
var types_1 = require("./runtime/types");
|
|
77
|
+
Object.defineProperty(exports, "EvalRuntimeError", { enumerable: true, get: function () { return types_1.EvalRuntimeError; } });
|
|
78
|
+
Object.defineProperty(exports, "SpecRegistrationError", { enumerable: true, get: function () { return types_1.SpecRegistrationError; } });
|
|
79
|
+
Object.defineProperty(exports, "SpecExecutionError", { enumerable: true, get: function () { return types_1.SpecExecutionError; } });
|
|
80
|
+
Object.defineProperty(exports, "RuntimeError", { enumerable: true, get: function () { return types_1.RuntimeError; } });
|
|
56
81
|
// Snapshot testing (Tier 2.8)
|
|
57
82
|
const snapshot_1 = require("./snapshot");
|
|
58
83
|
Object.defineProperty(exports, "compareWithSnapshot", { enumerable: true, get: function () { return snapshot_1.compareWithSnapshot; } });
|
|
@@ -112,8 +137,8 @@ Object.defineProperty(exports, "batchRead", { enumerable: true, get: function ()
|
|
|
112
137
|
Object.defineProperty(exports, "RateLimiter", { enumerable: true, get: function () { return streaming_1.RateLimiter; } });
|
|
113
138
|
Object.defineProperty(exports, "streamEvaluation", { enumerable: true, get: function () { return streaming_1.streamEvaluation; } });
|
|
114
139
|
// New exports for v1.1.0
|
|
115
|
-
var
|
|
116
|
-
Object.defineProperty(exports, "EvaluationTemplates", { enumerable: true, get: function () { return
|
|
140
|
+
var types_2 = require("./types");
|
|
141
|
+
Object.defineProperty(exports, "EvaluationTemplates", { enumerable: true, get: function () { return types_2.EvaluationTemplates; } });
|
|
117
142
|
// Workflow tracing (Orchestration Layer)
|
|
118
143
|
var workflows_1 = require("./workflows");
|
|
119
144
|
Object.defineProperty(exports, "createWorkflowTracer", { enumerable: true, get: function () { return workflows_1.createWorkflowTracer; } });
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Config → DSL Adapter - LAYER 2 Compatibility Bridge
|
|
3
|
+
*
|
|
4
|
+
* Migrates existing evalai.config.json and TestSuite configurations
|
|
5
|
+
* to the new defineEval() DSL without breaking user workflows.
|
|
6
|
+
*/
|
|
7
|
+
import type { TestSuite } from "../../testing";
|
|
8
|
+
/**
|
|
9
|
+
* Migration result information
|
|
10
|
+
*/
|
|
11
|
+
interface MigrationResult {
|
|
12
|
+
success: boolean;
|
|
13
|
+
specsGenerated: number;
|
|
14
|
+
errors: string[];
|
|
15
|
+
warnings: string[];
|
|
16
|
+
outputPath: string;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Convert TestSuite to defineEval() specifications
|
|
20
|
+
*/
|
|
21
|
+
export declare function migrateTestSuiteToDSL(testSuite: TestSuite, outputPath: string): MigrationResult;
|
|
22
|
+
/**
|
|
23
|
+
* Convert evalai.config.json to DSL specifications
|
|
24
|
+
*/
|
|
25
|
+
export declare function migrateConfigToDSL(configPath: string, outputPath: string): MigrationResult;
|
|
26
|
+
/**
|
|
27
|
+
* Discover and migrate all TestSuite configurations in a project
|
|
28
|
+
*/
|
|
29
|
+
export declare function migrateProjectToDSL(projectRoot: string, options?: {
|
|
30
|
+
outputDir?: string;
|
|
31
|
+
dryRun?: boolean;
|
|
32
|
+
}): MigrationResult;
|
|
33
|
+
export {};
|