@pauly4010/evalai-sdk 1.8.0 → 1.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +54 -0
- package/README.md +136 -23
- package/dist/assertions.js +51 -18
- package/dist/batch.js +8 -2
- package/dist/cli/api.js +3 -1
- package/dist/cli/check.js +19 -6
- package/dist/cli/ci-context.js +3 -1
- package/dist/cli/ci.d.ts +45 -0
- package/dist/cli/ci.js +192 -0
- package/dist/cli/config.js +28 -8
- package/dist/cli/diff.d.ts +173 -0
- package/dist/cli/diff.js +685 -0
- package/dist/cli/discover.d.ts +84 -0
- package/dist/cli/discover.js +419 -0
- package/dist/cli/doctor.js +62 -19
- package/dist/cli/env.d.ts +21 -0
- package/dist/cli/env.js +42 -0
- package/dist/cli/explain.js +168 -36
- package/dist/cli/formatters/human.js +4 -1
- package/dist/cli/formatters/pr-comment.js +3 -1
- package/dist/cli/gate.js +6 -2
- package/dist/cli/impact-analysis.d.ts +63 -0
- package/dist/cli/impact-analysis.js +252 -0
- package/dist/cli/index.js +185 -0
- package/dist/cli/manifest.d.ts +103 -0
- package/dist/cli/manifest.js +282 -0
- package/dist/cli/migrate.d.ts +41 -0
- package/dist/cli/migrate.js +349 -0
- package/dist/cli/policy-packs.js +8 -2
- package/dist/cli/print-config.js +33 -14
- package/dist/cli/regression-gate.js +8 -2
- package/dist/cli/report/build-check-report.js +8 -2
- package/dist/cli/run.d.ts +101 -0
- package/dist/cli/run.js +395 -0
- package/dist/cli/share.js +3 -1
- package/dist/cli/upgrade.js +2 -1
- package/dist/cli/workspace.d.ts +28 -0
- package/dist/cli/workspace.js +58 -0
- package/dist/client.d.ts +16 -19
- package/dist/client.js +60 -43
- package/dist/client.request.test.d.ts +1 -1
- package/dist/client.request.test.js +222 -147
- package/dist/context.js +3 -1
- package/dist/errors.js +11 -4
- package/dist/export.js +3 -1
- package/dist/index.d.ts +8 -2
- package/dist/index.js +30 -5
- package/dist/integrations/anthropic.d.ts +20 -1
- package/dist/integrations/openai-eval.js +4 -2
- package/dist/integrations/openai.d.ts +24 -1
- package/dist/local.js +3 -1
- package/dist/logger.js +6 -2
- package/dist/pagination.js +6 -2
- package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
- package/dist/runtime/adapters/config-to-dsl.js +394 -0
- package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
- package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
- package/dist/runtime/context.d.ts +26 -0
- package/dist/runtime/context.js +74 -0
- package/dist/runtime/eval.d.ts +46 -0
- package/dist/runtime/eval.js +244 -0
- package/dist/runtime/execution-mode.d.ts +80 -0
- package/dist/runtime/execution-mode.js +357 -0
- package/dist/runtime/executor.d.ts +16 -0
- package/dist/runtime/executor.js +152 -0
- package/dist/runtime/registry.d.ts +78 -0
- package/dist/runtime/registry.js +403 -0
- package/dist/runtime/run-report.d.ts +200 -0
- package/dist/runtime/run-report.js +222 -0
- package/dist/runtime/types.d.ts +356 -0
- package/dist/runtime/types.js +76 -0
- package/dist/testing.d.ts +65 -0
- package/dist/testing.js +49 -2
- package/dist/types.d.ts +100 -69
- package/dist/utils/input-hash.js +4 -1
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/dist/workflows.js +62 -14
- package/package.json +115 -110
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* TICKET 2 — Evaluation Manifest Generation
|
|
4
|
+
*
|
|
5
|
+
* Goal: turn discovery output into a stable, versioned, machine-consumable artifact
|
|
6
|
+
* that becomes the input to run / impact / diff.
|
|
7
|
+
*
|
|
8
|
+
* This is the compiler output that everything else consumes.
|
|
9
|
+
*/
|
|
10
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
11
|
+
if (k2 === undefined) k2 = k;
|
|
12
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
13
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
14
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
15
|
+
}
|
|
16
|
+
Object.defineProperty(o, k2, desc);
|
|
17
|
+
}) : (function(o, m, k, k2) {
|
|
18
|
+
if (k2 === undefined) k2 = k;
|
|
19
|
+
o[k2] = m[k];
|
|
20
|
+
}));
|
|
21
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
22
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
23
|
+
}) : function(o, v) {
|
|
24
|
+
o["default"] = v;
|
|
25
|
+
});
|
|
26
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
27
|
+
var ownKeys = function(o) {
|
|
28
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
29
|
+
var ar = [];
|
|
30
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
31
|
+
return ar;
|
|
32
|
+
};
|
|
33
|
+
return ownKeys(o);
|
|
34
|
+
};
|
|
35
|
+
return function (mod) {
|
|
36
|
+
if (mod && mod.__esModule) return mod;
|
|
37
|
+
var result = {};
|
|
38
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
39
|
+
__setModuleDefault(result, mod);
|
|
40
|
+
return result;
|
|
41
|
+
};
|
|
42
|
+
})();
|
|
43
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
44
|
+
exports.MANIFEST_SCHEMA_VERSION = exports.SDK_VERSION = void 0;
|
|
45
|
+
exports.generateManifest = generateManifest;
|
|
46
|
+
exports.writeManifest = writeManifest;
|
|
47
|
+
exports.readManifest = readManifest;
|
|
48
|
+
exports.readLock = readLock;
|
|
49
|
+
const crypto = __importStar(require("node:crypto"));
|
|
50
|
+
const fs = __importStar(require("node:fs/promises"));
|
|
51
|
+
const path = __importStar(require("node:path"));
|
|
52
|
+
const version_1 = require("../version");
|
|
53
|
+
Object.defineProperty(exports, "SDK_VERSION", { enumerable: true, get: function () { return version_1.SDK_VERSION; } });
|
|
54
|
+
/**
|
|
55
|
+
* Manifest schema version
|
|
56
|
+
*/
|
|
57
|
+
exports.MANIFEST_SCHEMA_VERSION = 1;
|
|
58
|
+
/**
|
|
59
|
+
* Generate evaluation manifest from discovery results
|
|
60
|
+
*/
|
|
61
|
+
async function generateManifest(specs, projectRoot, projectName, executionMode) {
|
|
62
|
+
const generatedAt = Math.floor(Date.now() / 1000);
|
|
63
|
+
const namespace = generateNamespace(projectRoot);
|
|
64
|
+
// Process spec files and specs
|
|
65
|
+
const specFiles = [];
|
|
66
|
+
const processedSpecs = [];
|
|
67
|
+
// Group specs by file
|
|
68
|
+
const specsByFile = new Map();
|
|
69
|
+
for (const spec of specs) {
|
|
70
|
+
const normalizedPath = normalizePath(spec.file, projectRoot);
|
|
71
|
+
if (!specsByFile.has(normalizedPath)) {
|
|
72
|
+
specsByFile.set(normalizedPath, []);
|
|
73
|
+
}
|
|
74
|
+
specsByFile.get(normalizedPath)?.push(spec);
|
|
75
|
+
}
|
|
76
|
+
// Process each file
|
|
77
|
+
for (const [filePath, fileSpecs] of specsByFile) {
|
|
78
|
+
const absolutePath = path.join(projectRoot, filePath);
|
|
79
|
+
const fileHash = await hashFile(absolutePath);
|
|
80
|
+
specFiles.push({
|
|
81
|
+
filePath,
|
|
82
|
+
fileHash,
|
|
83
|
+
specCount: fileSpecs.length,
|
|
84
|
+
});
|
|
85
|
+
// Process individual specs
|
|
86
|
+
for (const spec of fileSpecs) {
|
|
87
|
+
const processedSpec = await processSpec(spec, filePath, projectRoot);
|
|
88
|
+
processedSpecs.push(processedSpec);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return {
|
|
92
|
+
schemaVersion: exports.MANIFEST_SCHEMA_VERSION,
|
|
93
|
+
generatedAt,
|
|
94
|
+
project: {
|
|
95
|
+
name: projectName,
|
|
96
|
+
root: ".",
|
|
97
|
+
namespace,
|
|
98
|
+
},
|
|
99
|
+
runtime: {
|
|
100
|
+
mode: executionMode.mode,
|
|
101
|
+
sdkVersion: version_1.SDK_VERSION,
|
|
102
|
+
},
|
|
103
|
+
specFiles,
|
|
104
|
+
specs: processedSpecs,
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Process individual specification
|
|
109
|
+
*/
|
|
110
|
+
async function processSpec(spec, filePath, projectRoot) {
|
|
111
|
+
const absolutePath = path.join(projectRoot, filePath);
|
|
112
|
+
const content = await fs.readFile(absolutePath, "utf-8");
|
|
113
|
+
// Extract position from AST analysis (simplified for now)
|
|
114
|
+
const position = extractPosition(content, spec.name);
|
|
115
|
+
// Extract dependencies from content
|
|
116
|
+
const dependsOn = extractDependencies(content);
|
|
117
|
+
// Generate suite path from tags or file structure
|
|
118
|
+
const suitePath = generateSuitePath(spec.tags, filePath);
|
|
119
|
+
return {
|
|
120
|
+
id: spec.id,
|
|
121
|
+
name: spec.name,
|
|
122
|
+
suitePath,
|
|
123
|
+
filePath: normalizePath(spec.file, projectRoot),
|
|
124
|
+
position,
|
|
125
|
+
tags: spec.tags,
|
|
126
|
+
dependsOn,
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Extract position from content (simplified implementation)
|
|
131
|
+
*/
|
|
132
|
+
function extractPosition(content, specName) {
|
|
133
|
+
const lines = content.split("\n");
|
|
134
|
+
const specPattern = new RegExp(`defineEval\\s*\\(\\s*["'\`]${specName}["'\`]`, "g");
|
|
135
|
+
let match = null;
|
|
136
|
+
let line = 1;
|
|
137
|
+
let column = 1;
|
|
138
|
+
for (let i = 0; i < lines.length; i++) {
|
|
139
|
+
const lineContent = lines[i];
|
|
140
|
+
specPattern.lastIndex = 0;
|
|
141
|
+
match = specPattern.exec(lineContent);
|
|
142
|
+
if (match) {
|
|
143
|
+
line = i + 1;
|
|
144
|
+
column = match.index + 1;
|
|
145
|
+
break;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
return { line, column };
|
|
149
|
+
}
|
|
150
|
+
/**
|
|
151
|
+
* Extract dependencies from content
|
|
152
|
+
*/
|
|
153
|
+
function extractDependencies(content) {
|
|
154
|
+
const dependsOn = {
|
|
155
|
+
prompts: [],
|
|
156
|
+
datasets: [],
|
|
157
|
+
tools: [],
|
|
158
|
+
code: [],
|
|
159
|
+
};
|
|
160
|
+
// Extract from dependsOn option if present
|
|
161
|
+
const dependsOnMatch = content.match(/dependsOn\s*:\s*({[^}]+})/s);
|
|
162
|
+
if (dependsOnMatch) {
|
|
163
|
+
try {
|
|
164
|
+
// Use JSON.parse instead of eval for safety
|
|
165
|
+
const deps = JSON.parse(dependsOnMatch[1]);
|
|
166
|
+
return {
|
|
167
|
+
prompts: deps.prompts || [],
|
|
168
|
+
datasets: deps.datasets || [],
|
|
169
|
+
tools: deps.tools || [],
|
|
170
|
+
code: deps.code || [],
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
catch (_error) {
|
|
174
|
+
// If parsing fails, return empty dependencies
|
|
175
|
+
return {
|
|
176
|
+
prompts: [],
|
|
177
|
+
datasets: [],
|
|
178
|
+
tools: [],
|
|
179
|
+
code: [],
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
// Simple extraction as fallback
|
|
184
|
+
const patterns = {
|
|
185
|
+
prompts: /["']([^"']*\.md)["']/g,
|
|
186
|
+
datasets: /["']([^"']*\.json)["']/g,
|
|
187
|
+
tools: /["']([^"']*\.ts)["']/g,
|
|
188
|
+
code: /import.*from\s*["']([^"']+)["']/g,
|
|
189
|
+
};
|
|
190
|
+
for (const [type, pattern] of Object.entries(patterns)) {
|
|
191
|
+
let match;
|
|
192
|
+
match = pattern.exec(content);
|
|
193
|
+
while (match !== null) {
|
|
194
|
+
dependsOn[type].push(match[1]);
|
|
195
|
+
match = pattern.exec(content);
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
return dependsOn;
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* Generate suite path from tags or file structure
|
|
202
|
+
*/
|
|
203
|
+
function generateSuitePath(tags, filePath) {
|
|
204
|
+
// Use tags as primary suite path
|
|
205
|
+
if (tags.length > 0) {
|
|
206
|
+
return [tags[0]];
|
|
207
|
+
}
|
|
208
|
+
// Fall back to file structure
|
|
209
|
+
const parts = filePath.split("/");
|
|
210
|
+
if (parts.length > 1) {
|
|
211
|
+
return [parts[0]];
|
|
212
|
+
}
|
|
213
|
+
return ["general"];
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* Generate namespace from project root
|
|
217
|
+
*/
|
|
218
|
+
function generateNamespace(projectRoot) {
|
|
219
|
+
const hash = crypto.createHash("sha256");
|
|
220
|
+
hash.update(projectRoot);
|
|
221
|
+
return hash.digest("hex").slice(0, 8);
|
|
222
|
+
}
|
|
223
|
+
/**
|
|
224
|
+
* Normalize path to POSIX format
|
|
225
|
+
*/
|
|
226
|
+
function normalizePath(filePath, projectRoot) {
|
|
227
|
+
const relativePath = path.relative(projectRoot, filePath);
|
|
228
|
+
return relativePath.replace(/\\/g, "/");
|
|
229
|
+
}
|
|
230
|
+
/**
|
|
231
|
+
* Hash file content
|
|
232
|
+
*/
|
|
233
|
+
async function hashFile(filePath) {
|
|
234
|
+
const content = await fs.readFile(filePath, "utf-8");
|
|
235
|
+
const hash = crypto.createHash("sha256");
|
|
236
|
+
hash.update(content);
|
|
237
|
+
return `sha256:${hash.digest("hex")}`;
|
|
238
|
+
}
|
|
239
|
+
/**
|
|
240
|
+
* Write manifest to disk
|
|
241
|
+
*/
|
|
242
|
+
async function writeManifest(manifest, projectRoot) {
|
|
243
|
+
const evalaiDir = path.join(projectRoot, ".evalai");
|
|
244
|
+
// Ensure .evalai directory exists
|
|
245
|
+
await fs.mkdir(evalaiDir, { recursive: true });
|
|
246
|
+
// Write manifest.json
|
|
247
|
+
const manifestPath = path.join(evalaiDir, "manifest.json");
|
|
248
|
+
await fs.writeFile(manifestPath, JSON.stringify(manifest, null, 2), "utf-8");
|
|
249
|
+
// Write lock file
|
|
250
|
+
const lock = {
|
|
251
|
+
generatedAt: manifest.generatedAt,
|
|
252
|
+
fileHashes: Object.fromEntries(manifest.specFiles.map((f) => [f.filePath, f.fileHash])),
|
|
253
|
+
};
|
|
254
|
+
const lockPath = path.join(evalaiDir, "manifest.lock.json");
|
|
255
|
+
await fs.writeFile(lockPath, JSON.stringify(lock, null, 2), "utf-8");
|
|
256
|
+
}
|
|
257
|
+
/**
|
|
258
|
+
* Read existing manifest
|
|
259
|
+
*/
|
|
260
|
+
async function readManifest(projectRoot) {
|
|
261
|
+
const manifestPath = path.join(projectRoot, ".evalai", "manifest.json");
|
|
262
|
+
try {
|
|
263
|
+
const content = await fs.readFile(manifestPath, "utf-8");
|
|
264
|
+
return JSON.parse(content);
|
|
265
|
+
}
|
|
266
|
+
catch (_error) {
|
|
267
|
+
return null;
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
/**
|
|
271
|
+
* Read existing lock file
|
|
272
|
+
*/
|
|
273
|
+
async function readLock(projectRoot) {
|
|
274
|
+
const lockPath = path.join(projectRoot, ".evalai", "manifest.lock.json");
|
|
275
|
+
try {
|
|
276
|
+
const content = await fs.readFile(lockPath, "utf-8");
|
|
277
|
+
return JSON.parse(content);
|
|
278
|
+
}
|
|
279
|
+
catch (_error) {
|
|
280
|
+
return null;
|
|
281
|
+
}
|
|
282
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* COMPAT-203: Config → DSL migration generator (file-based)
|
|
3
|
+
*
|
|
4
|
+
* CLI command: evalai migrate config --in evalai.config.json --out eval/legacy.spec.ts
|
|
5
|
+
* Generates defineEval() calls with comments and TODOs for manual completion
|
|
6
|
+
*/
|
|
7
|
+
import { Command } from "commander";
|
|
8
|
+
/**
|
|
9
|
+
* Migration options
|
|
10
|
+
*/
|
|
11
|
+
interface MigrateOptions {
|
|
12
|
+
/** Input config file path */
|
|
13
|
+
input: string;
|
|
14
|
+
/** Output DSL file path */
|
|
15
|
+
output: string;
|
|
16
|
+
/** Include detailed comments */
|
|
17
|
+
verbose?: boolean;
|
|
18
|
+
/** Generate helper functions */
|
|
19
|
+
helpers?: boolean;
|
|
20
|
+
/** Preserve original test IDs */
|
|
21
|
+
preserveIds?: boolean;
|
|
22
|
+
/** Include provenance metadata */
|
|
23
|
+
provenance?: boolean;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Main migration function
|
|
27
|
+
*/
|
|
28
|
+
export declare function migrateConfig(options: MigrateOptions): Promise<void>;
|
|
29
|
+
/**
|
|
30
|
+
* CLI command definition
|
|
31
|
+
*/
|
|
32
|
+
export declare function createMigrateCommand(): Command;
|
|
33
|
+
/**
|
|
34
|
+
* Validate config file structure
|
|
35
|
+
*/
|
|
36
|
+
export declare function validateConfigFile(filePath: string): Promise<boolean>;
|
|
37
|
+
/**
|
|
38
|
+
* Show migration preview without writing files
|
|
39
|
+
*/
|
|
40
|
+
export declare function previewMigration(filePath: string): Promise<void>;
|
|
41
|
+
export {};
|
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* COMPAT-203: Config → DSL migration generator (file-based)
|
|
4
|
+
*
|
|
5
|
+
* CLI command: evalai migrate config --in evalai.config.json --out eval/legacy.spec.ts
|
|
6
|
+
* Generates defineEval() calls with comments and TODOs for manual completion
|
|
7
|
+
*/
|
|
8
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
9
|
+
if (k2 === undefined) k2 = k;
|
|
10
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
11
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
12
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
13
|
+
}
|
|
14
|
+
Object.defineProperty(o, k2, desc);
|
|
15
|
+
}) : (function(o, m, k, k2) {
|
|
16
|
+
if (k2 === undefined) k2 = k;
|
|
17
|
+
o[k2] = m[k];
|
|
18
|
+
}));
|
|
19
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
20
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
21
|
+
}) : function(o, v) {
|
|
22
|
+
o["default"] = v;
|
|
23
|
+
});
|
|
24
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
25
|
+
var ownKeys = function(o) {
|
|
26
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
27
|
+
var ar = [];
|
|
28
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
29
|
+
return ar;
|
|
30
|
+
};
|
|
31
|
+
return ownKeys(o);
|
|
32
|
+
};
|
|
33
|
+
return function (mod) {
|
|
34
|
+
if (mod && mod.__esModule) return mod;
|
|
35
|
+
var result = {};
|
|
36
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
37
|
+
__setModuleDefault(result, mod);
|
|
38
|
+
return result;
|
|
39
|
+
};
|
|
40
|
+
})();
|
|
41
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
42
|
+
exports.migrateConfig = migrateConfig;
|
|
43
|
+
exports.createMigrateCommand = createMigrateCommand;
|
|
44
|
+
exports.validateConfigFile = validateConfigFile;
|
|
45
|
+
exports.previewMigration = previewMigration;
|
|
46
|
+
const fs = __importStar(require("node:fs/promises"));
|
|
47
|
+
const path = __importStar(require("node:path"));
|
|
48
|
+
const commander_1 = require("commander");
|
|
49
|
+
const testsuite_to_dsl_1 = require("../runtime/adapters/testsuite-to-dsl");
|
|
50
|
+
const testing_1 = require("../testing");
|
|
51
|
+
/**
|
|
52
|
+
* Read and parse evalai.config.json
|
|
53
|
+
*/
|
|
54
|
+
async function readConfigFile(filePath) {
|
|
55
|
+
try {
|
|
56
|
+
const content = await fs.readFile(filePath, "utf-8");
|
|
57
|
+
return JSON.parse(content);
|
|
58
|
+
}
|
|
59
|
+
catch (error) {
|
|
60
|
+
throw new Error(`Failed to read config file ${filePath}: ${error instanceof Error ? error.message : String(error)}`);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Extract TestSuite data from config
|
|
65
|
+
*/
|
|
66
|
+
function extractTestSuitesFromConfig(config) {
|
|
67
|
+
const suites = [];
|
|
68
|
+
// Handle different config structures
|
|
69
|
+
if (config.tests) {
|
|
70
|
+
// Direct tests array
|
|
71
|
+
const suite = (0, testing_1.createTestSuite)("config-tests", {
|
|
72
|
+
cases: config.tests,
|
|
73
|
+
executor: config.executor,
|
|
74
|
+
timeout: config.timeout,
|
|
75
|
+
parallel: config.parallel,
|
|
76
|
+
stopOnFailure: config.stopOnFailure,
|
|
77
|
+
retries: config.retries,
|
|
78
|
+
});
|
|
79
|
+
suites.push({ name: "config-tests", suite });
|
|
80
|
+
}
|
|
81
|
+
if (config.suites) {
|
|
82
|
+
// Multiple named suites
|
|
83
|
+
for (const [suiteName, suiteConfig] of Object.entries(config.suites)) {
|
|
84
|
+
const suite = (0, testing_1.createTestSuite)(suiteName, suiteConfig);
|
|
85
|
+
suites.push({ name: suiteName, suite });
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
if (config.testSuites) {
|
|
89
|
+
// Alternative property name
|
|
90
|
+
for (const [suiteName, suiteConfig] of Object.entries(config.testSuites)) {
|
|
91
|
+
const suite = (0, testing_1.createTestSuite)(suiteName, suiteConfig);
|
|
92
|
+
suites.push({ name: suiteName, suite });
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return suites;
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Generate DSL file header
|
|
99
|
+
*/
|
|
100
|
+
function generateFileHeader(_config, options) {
|
|
101
|
+
const timestamp = new Date().toISOString();
|
|
102
|
+
const inputPath = path.resolve(options.input);
|
|
103
|
+
const outputPath = path.resolve(options.output);
|
|
104
|
+
return [
|
|
105
|
+
`/**`,
|
|
106
|
+
` * Auto-generated EvalAI DSL from configuration`,
|
|
107
|
+
` * `,
|
|
108
|
+
` * Generated at: ${timestamp}`,
|
|
109
|
+
` * Source config: ${inputPath}`,
|
|
110
|
+
` * Output file: ${outputPath}`,
|
|
111
|
+
` * `,
|
|
112
|
+
` * This file contains defineEval() specifications migrated from evalai.config.json`,
|
|
113
|
+
` * `,
|
|
114
|
+
` * ⚠️ IMPORTANT: This is a best-effort migration. Manual review and completion required.`,
|
|
115
|
+
` * `,
|
|
116
|
+
` * Migration notes:`,
|
|
117
|
+
` * - Executors have been converted to async functions`,
|
|
118
|
+
` * - Assertions have been converted where possible`,
|
|
119
|
+
` * - Complex logic may need manual adaptation`,
|
|
120
|
+
` * - Review TODO comments for items requiring attention`,
|
|
121
|
+
` */`,
|
|
122
|
+
``,
|
|
123
|
+
`import { defineEval, createResult } from '@pauly4010/evalai-sdk';`,
|
|
124
|
+
``,
|
|
125
|
+
].join("\n");
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Generate helper functions for the entire file
|
|
129
|
+
*/
|
|
130
|
+
function generateGlobalHelpers(config, _options) {
|
|
131
|
+
const helpers = [];
|
|
132
|
+
// Add executor helper if config has executor
|
|
133
|
+
if (config.executor) {
|
|
134
|
+
helpers.push([
|
|
135
|
+
`/**`,
|
|
136
|
+
` * Legacy executor function from config`,
|
|
137
|
+
` * TODO: Replace with actual executor implementation`,
|
|
138
|
+
` */`,
|
|
139
|
+
`async function legacyExecutor(input: string): Promise<string> {`,
|
|
140
|
+
` // Original executor was: ${config.executor.toString()}`,
|
|
141
|
+
` // TODO: Implement actual executor logic here`,
|
|
142
|
+
` return input; // Placeholder`,
|
|
143
|
+
`}`,
|
|
144
|
+
``,
|
|
145
|
+
].join("\n"));
|
|
146
|
+
}
|
|
147
|
+
// Add assertion helpers
|
|
148
|
+
helpers.push([
|
|
149
|
+
`/**`,
|
|
150
|
+
` * Helper function for legacy assertion evaluation`,
|
|
151
|
+
` * TODO: Implement actual assertion logic based on original config`,
|
|
152
|
+
` */`,
|
|
153
|
+
`function evaluateAssertions(output: string, expected?: string): boolean {`,
|
|
154
|
+
` if (expected !== undefined) {`,
|
|
155
|
+
` return output === expected;`,
|
|
156
|
+
` }`,
|
|
157
|
+
` return output.length > 0;`,
|
|
158
|
+
`}`,
|
|
159
|
+
``,
|
|
160
|
+
].join("\n"));
|
|
161
|
+
// Add evaluation helper
|
|
162
|
+
helpers.push([
|
|
163
|
+
`/**`,
|
|
164
|
+
` * Legacy test evaluation function`,
|
|
165
|
+
` * TODO: Adapt based on your original test logic`,
|
|
166
|
+
` */`,
|
|
167
|
+
`async function evaluateLegacyTest(input: string, expected?: string): Promise<unknown> {`,
|
|
168
|
+
` const output = await legacyExecutor(input);`,
|
|
169
|
+
` const passed = evaluateAssertions(output, expected);`,
|
|
170
|
+
` `,
|
|
171
|
+
` return createResult({`,
|
|
172
|
+
` pass: passed,`,
|
|
173
|
+
` score: passed ? 100 : 0,`,
|
|
174
|
+
` metadata: { input, expected },`,
|
|
175
|
+
` });`,
|
|
176
|
+
`}`,
|
|
177
|
+
``,
|
|
178
|
+
].join("\n"));
|
|
179
|
+
return helpers.join("\n");
|
|
180
|
+
}
|
|
181
|
+
/**
|
|
182
|
+
* Generate DSL content for a single suite
|
|
183
|
+
*/
|
|
184
|
+
function generateSuiteDSL(suiteName, suite, options) {
|
|
185
|
+
const dslCode = (0, testsuite_to_dsl_1.generateDefineEvalCode)(suite, {
|
|
186
|
+
generateHelpers: options.helpers,
|
|
187
|
+
preserveIds: options.preserveIds,
|
|
188
|
+
includeProvenance: options.provenance,
|
|
189
|
+
});
|
|
190
|
+
// Add suite-specific comments
|
|
191
|
+
const header = [
|
|
192
|
+
`/**`,
|
|
193
|
+
` * Test suite: ${suiteName}`,
|
|
194
|
+
` * Migrated from evalai.config.json`,
|
|
195
|
+
` * `,
|
|
196
|
+
` * TODO items for this suite:`,
|
|
197
|
+
` * - Review executor implementation`,
|
|
198
|
+
` * - Verify assertion logic`,
|
|
199
|
+
` * - Test with actual data`,
|
|
200
|
+
` */`,
|
|
201
|
+
``,
|
|
202
|
+
].join("\n");
|
|
203
|
+
return header + dslCode;
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Generate migration summary
|
|
207
|
+
*/
|
|
208
|
+
function generateSummary(suites, options) {
|
|
209
|
+
const totalTests = suites.reduce((sum, { suite }) => sum + suite.getTests().length, 0);
|
|
210
|
+
const totalSuites = suites.length;
|
|
211
|
+
return [
|
|
212
|
+
`/**`,
|
|
213
|
+
` * Migration Summary`,
|
|
214
|
+
` * =================`,
|
|
215
|
+
` * `,
|
|
216
|
+
` * Total suites migrated: ${totalSuites}`,
|
|
217
|
+
` * Total tests migrated: ${totalTests}`,
|
|
218
|
+
` * `,
|
|
219
|
+
` * Migration options used:`,
|
|
220
|
+
` * - Include helpers: ${options.helpers}`,
|
|
221
|
+
` * - Preserve IDs: ${options.preserveIds}`,
|
|
222
|
+
` * - Include provenance: ${options.provenance}`,
|
|
223
|
+
` * `,
|
|
224
|
+
` * Next steps:`,
|
|
225
|
+
` * 1. Review all TODO comments in this file`,
|
|
226
|
+
` * 2. Implement actual executor logic`,
|
|
227
|
+
` * 3. Adapt complex assertions`,
|
|
228
|
+
` * 4. Test with real data`,
|
|
229
|
+
` * 5. Remove evalai.config.json when satisfied`,
|
|
230
|
+
` * `,
|
|
231
|
+
` * For help with migration, see: https://github.com/pauly7610/ai-evaluation-platform/docs/MIGRATION.md`,
|
|
232
|
+
` */`,
|
|
233
|
+
``,
|
|
234
|
+
].join("\n");
|
|
235
|
+
}
|
|
236
|
+
/**
|
|
237
|
+
* Main migration function
|
|
238
|
+
*/
|
|
239
|
+
async function migrateConfig(options) {
|
|
240
|
+
try {
|
|
241
|
+
// Read input config
|
|
242
|
+
const config = await readConfigFile(options.input);
|
|
243
|
+
// Extract test suites
|
|
244
|
+
const suites = extractTestSuitesFromConfig(config);
|
|
245
|
+
if (suites.length === 0) {
|
|
246
|
+
throw new Error("No test suites found in config file. Check config structure.");
|
|
247
|
+
}
|
|
248
|
+
// Generate DSL content
|
|
249
|
+
const content = [
|
|
250
|
+
generateFileHeader(config, options),
|
|
251
|
+
generateGlobalHelpers(config, options),
|
|
252
|
+
...suites.map(({ name, suite }) => generateSuiteDSL(name, suite, options)),
|
|
253
|
+
generateSummary(suites, options),
|
|
254
|
+
].join("\n");
|
|
255
|
+
// Ensure output directory exists
|
|
256
|
+
const outputDir = path.dirname(options.output);
|
|
257
|
+
await fs.mkdir(outputDir, { recursive: true });
|
|
258
|
+
// Write output file
|
|
259
|
+
await fs.writeFile(options.output, content, "utf-8");
|
|
260
|
+
console.log(`✅ Migration complete!`);
|
|
261
|
+
console.log(`📁 Output written to: ${path.resolve(options.output)}`);
|
|
262
|
+
console.log(`📊 Migrated ${suites.length} suites with ${suites.reduce((sum, { suite }) => sum + suite.getTests().length, 0)} tests`);
|
|
263
|
+
console.log(`\n⚠️ Remember to review TODO comments and test the migration!`);
|
|
264
|
+
}
|
|
265
|
+
catch (error) {
|
|
266
|
+
console.error(`❌ Migration failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
267
|
+
process.exit(1);
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
/**
|
|
271
|
+
* CLI command definition
|
|
272
|
+
*/
|
|
273
|
+
function createMigrateCommand() {
|
|
274
|
+
const command = new commander_1.Command("migrate")
|
|
275
|
+
.description("Migrate legacy configuration to new DSL format")
|
|
276
|
+
.command("config")
|
|
277
|
+
.description("Migrate evalai.config.json to defineEval() specifications")
|
|
278
|
+
.requiredOption("-i, --in <path>", "Input config file path")
|
|
279
|
+
.requiredOption("-o, --out <path>", "Output DSL file path")
|
|
280
|
+
.option("-v, --verbose", "Include detailed comments and logging", false)
|
|
281
|
+
.option("--no-helpers", "Don't generate helper functions")
|
|
282
|
+
.option("--no-preserve-ids", "Don't preserve original test IDs")
|
|
283
|
+
.option("--no-provenance", "Don't include provenance metadata")
|
|
284
|
+
.action(async (options) => {
|
|
285
|
+
const migrateOptions = {
|
|
286
|
+
input: options.in,
|
|
287
|
+
output: options.out,
|
|
288
|
+
verbose: options.verbose,
|
|
289
|
+
helpers: options.helpers !== false,
|
|
290
|
+
preserveIds: options.preserveIds !== false,
|
|
291
|
+
provenance: options.provenance !== false,
|
|
292
|
+
};
|
|
293
|
+
await migrateConfig(migrateOptions);
|
|
294
|
+
});
|
|
295
|
+
return command;
|
|
296
|
+
}
|
|
297
|
+
/**
|
|
298
|
+
* Validate config file structure
|
|
299
|
+
*/
|
|
300
|
+
async function validateConfigFile(filePath) {
|
|
301
|
+
try {
|
|
302
|
+
const config = await readConfigFile(filePath);
|
|
303
|
+
// Basic validation
|
|
304
|
+
if (!config || typeof config !== "object") {
|
|
305
|
+
throw new Error("Config file must contain a valid JSON object");
|
|
306
|
+
}
|
|
307
|
+
// Check for test data
|
|
308
|
+
const hasTests = config.tests || config.suites || config.testSuites;
|
|
309
|
+
if (!hasTests) {
|
|
310
|
+
throw new Error("Config file must contain 'tests', 'suites', or 'testSuites' property");
|
|
311
|
+
}
|
|
312
|
+
console.log(`✅ Config file ${filePath} appears valid for migration`);
|
|
313
|
+
return true;
|
|
314
|
+
}
|
|
315
|
+
catch (error) {
|
|
316
|
+
console.error(`❌ Config validation failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
317
|
+
return false;
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
/**
|
|
321
|
+
* Show migration preview without writing files
|
|
322
|
+
*/
|
|
323
|
+
async function previewMigration(filePath) {
|
|
324
|
+
try {
|
|
325
|
+
const config = await readConfigFile(filePath);
|
|
326
|
+
const suites = extractTestSuitesFromConfig(config);
|
|
327
|
+
console.log(`📋 Migration preview for: ${filePath}`);
|
|
328
|
+
console.log(``);
|
|
329
|
+
console.log(`Found ${suites.length} test suites:`);
|
|
330
|
+
console.log(``);
|
|
331
|
+
for (const { name, suite } of suites) {
|
|
332
|
+
const tests = suite.getTests();
|
|
333
|
+
console.log(` 📁 ${name}: ${tests.length} tests`);
|
|
334
|
+
if (tests.length > 0) {
|
|
335
|
+
console.log(` Tests: ${tests
|
|
336
|
+
.slice(0, 3)
|
|
337
|
+
.map((t) => t.id)
|
|
338
|
+
.join(", ")}${tests.length > 3 ? "..." : ""}`);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
console.log(``);
|
|
342
|
+
console.log(`Total tests to migrate: ${suites.reduce((sum, { suite }) => sum + suite.getTests().length, 0)}`);
|
|
343
|
+
console.log(``);
|
|
344
|
+
console.log(`To migrate, run: evalai migrate config --in ${filePath} --out eval/migrated.spec.ts`);
|
|
345
|
+
}
|
|
346
|
+
catch (error) {
|
|
347
|
+
console.error(`❌ Preview failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
348
|
+
}
|
|
349
|
+
}
|
package/dist/cli/policy-packs.js
CHANGED
|
@@ -22,7 +22,10 @@ exports.POLICY_PACKS = {
|
|
|
22
22
|
1: {
|
|
23
23
|
policyId: "SOC2",
|
|
24
24
|
version: 1,
|
|
25
|
-
thresholds: {
|
|
25
|
+
thresholds: {
|
|
26
|
+
requiredSafetyRate: 0.95,
|
|
27
|
+
maxFlags: ["SAFETY_RISK", "LOW_PASS_RATE"],
|
|
28
|
+
},
|
|
26
29
|
rationale: "SOC2 trust criteria for security and availability.",
|
|
27
30
|
checks: ["safety_rate", "flag_restrictions"],
|
|
28
31
|
},
|
|
@@ -40,7 +43,10 @@ exports.POLICY_PACKS = {
|
|
|
40
43
|
1: {
|
|
41
44
|
policyId: "PCI_DSS",
|
|
42
45
|
version: 1,
|
|
43
|
-
thresholds: {
|
|
46
|
+
thresholds: {
|
|
47
|
+
requiredSafetyRate: 0.99,
|
|
48
|
+
maxFlags: ["SAFETY_RISK", "LOW_PASS_RATE"],
|
|
49
|
+
},
|
|
44
50
|
rationale: "PCI DSS cardholder data security standards.",
|
|
45
51
|
checks: ["safety_rate", "flag_restrictions"],
|
|
46
52
|
},
|