@pauly4010/evalai-sdk 1.8.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/CHANGELOG.md +54 -0
  2. package/dist/cli/ci.d.ts +45 -0
  3. package/dist/cli/ci.js +192 -0
  4. package/dist/cli/diff.d.ts +173 -0
  5. package/dist/cli/diff.js +680 -0
  6. package/dist/cli/discover.d.ts +84 -0
  7. package/dist/cli/discover.js +408 -0
  8. package/dist/cli/doctor.js +19 -10
  9. package/dist/cli/env.d.ts +21 -0
  10. package/dist/cli/env.js +42 -0
  11. package/dist/cli/explain.js +143 -37
  12. package/dist/cli/impact-analysis.d.ts +63 -0
  13. package/dist/cli/impact-analysis.js +251 -0
  14. package/dist/cli/index.js +173 -0
  15. package/dist/cli/manifest.d.ts +105 -0
  16. package/dist/cli/manifest.js +275 -0
  17. package/dist/cli/migrate.d.ts +41 -0
  18. package/dist/cli/migrate.js +349 -0
  19. package/dist/cli/print-config.js +18 -14
  20. package/dist/cli/run.d.ts +101 -0
  21. package/dist/cli/run.js +389 -0
  22. package/dist/cli/workspace.d.ts +28 -0
  23. package/dist/cli/workspace.js +58 -0
  24. package/dist/index.d.ts +6 -0
  25. package/dist/index.js +30 -5
  26. package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
  27. package/dist/runtime/adapters/config-to-dsl.js +391 -0
  28. package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
  29. package/dist/runtime/adapters/testsuite-to-dsl.js +271 -0
  30. package/dist/runtime/context.d.ts +26 -0
  31. package/dist/runtime/context.js +74 -0
  32. package/dist/runtime/eval.d.ts +46 -0
  33. package/dist/runtime/eval.js +237 -0
  34. package/dist/runtime/execution-mode.d.ts +80 -0
  35. package/dist/runtime/execution-mode.js +353 -0
  36. package/dist/runtime/executor.d.ts +16 -0
  37. package/dist/runtime/executor.js +152 -0
  38. package/dist/runtime/registry.d.ts +78 -0
  39. package/dist/runtime/registry.js +416 -0
  40. package/dist/runtime/run-report.d.ts +202 -0
  41. package/dist/runtime/run-report.js +220 -0
  42. package/dist/runtime/types.d.ts +356 -0
  43. package/dist/runtime/types.js +76 -0
  44. package/dist/testing.d.ts +65 -0
  45. package/dist/testing.js +42 -0
  46. package/dist/version.d.ts +1 -1
  47. package/dist/version.js +1 -1
  48. package/package.json +4 -3
@@ -0,0 +1,349 @@
1
+ "use strict";
2
+ /**
3
+ * COMPAT-203: Config → DSL migration generator (file-based)
4
+ *
5
+ * CLI command: evalai migrate config --in evalai.config.json --out eval/legacy.spec.ts
6
+ * Generates defineEval() calls with comments and TODOs for manual completion
7
+ */
8
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
9
+ if (k2 === undefined) k2 = k;
10
+ var desc = Object.getOwnPropertyDescriptor(m, k);
11
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
12
+ desc = { enumerable: true, get: function() { return m[k]; } };
13
+ }
14
+ Object.defineProperty(o, k2, desc);
15
+ }) : (function(o, m, k, k2) {
16
+ if (k2 === undefined) k2 = k;
17
+ o[k2] = m[k];
18
+ }));
19
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
20
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
21
+ }) : function(o, v) {
22
+ o["default"] = v;
23
+ });
24
+ var __importStar = (this && this.__importStar) || (function () {
25
+ var ownKeys = function(o) {
26
+ ownKeys = Object.getOwnPropertyNames || function (o) {
27
+ var ar = [];
28
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
29
+ return ar;
30
+ };
31
+ return ownKeys(o);
32
+ };
33
+ return function (mod) {
34
+ if (mod && mod.__esModule) return mod;
35
+ var result = {};
36
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
37
+ __setModuleDefault(result, mod);
38
+ return result;
39
+ };
40
+ })();
41
+ Object.defineProperty(exports, "__esModule", { value: true });
42
+ exports.migrateConfig = migrateConfig;
43
+ exports.createMigrateCommand = createMigrateCommand;
44
+ exports.validateConfigFile = validateConfigFile;
45
+ exports.previewMigration = previewMigration;
46
+ const commander_1 = require("commander");
47
+ const fs = __importStar(require("node:fs/promises"));
48
+ const path = __importStar(require("node:path"));
49
+ const testsuite_to_dsl_1 = require("../runtime/adapters/testsuite-to-dsl");
50
+ const testing_1 = require("../testing");
51
+ /**
52
+ * Read and parse evalai.config.json
53
+ */
54
+ async function readConfigFile(filePath) {
55
+ try {
56
+ const content = await fs.readFile(filePath, "utf-8");
57
+ return JSON.parse(content);
58
+ }
59
+ catch (error) {
60
+ throw new Error(`Failed to read config file ${filePath}: ${error instanceof Error ? error.message : String(error)}`);
61
+ }
62
+ }
63
+ /**
64
+ * Extract TestSuite data from config
65
+ */
66
+ function extractTestSuitesFromConfig(config) {
67
+ const suites = [];
68
+ // Handle different config structures
69
+ if (config.tests) {
70
+ // Direct tests array
71
+ const suite = (0, testing_1.createTestSuite)("config-tests", {
72
+ cases: config.tests,
73
+ executor: config.executor,
74
+ timeout: config.timeout,
75
+ parallel: config.parallel,
76
+ stopOnFailure: config.stopOnFailure,
77
+ retries: config.retries,
78
+ });
79
+ suites.push({ name: "config-tests", suite });
80
+ }
81
+ if (config.suites) {
82
+ // Multiple named suites
83
+ for (const [suiteName, suiteConfig] of Object.entries(config.suites)) {
84
+ const suite = (0, testing_1.createTestSuite)(suiteName, suiteConfig);
85
+ suites.push({ name: suiteName, suite });
86
+ }
87
+ }
88
+ if (config.testSuites) {
89
+ // Alternative property name
90
+ for (const [suiteName, suiteConfig] of Object.entries(config.testSuites)) {
91
+ const suite = (0, testing_1.createTestSuite)(suiteName, suiteConfig);
92
+ suites.push({ name: suiteName, suite });
93
+ }
94
+ }
95
+ return suites;
96
+ }
97
+ /**
98
+ * Generate DSL file header
99
+ */
100
+ function generateFileHeader(config, options) {
101
+ const timestamp = new Date().toISOString();
102
+ const inputPath = path.resolve(options.input);
103
+ const outputPath = path.resolve(options.output);
104
+ return [
105
+ `/**`,
106
+ ` * Auto-generated EvalAI DSL from configuration`,
107
+ ` * `,
108
+ ` * Generated at: ${timestamp}`,
109
+ ` * Source config: ${inputPath}`,
110
+ ` * Output file: ${outputPath}`,
111
+ ` * `,
112
+ ` * This file contains defineEval() specifications migrated from evalai.config.json`,
113
+ ` * `,
114
+ ` * ⚠️ IMPORTANT: This is a best-effort migration. Manual review and completion required.`,
115
+ ` * `,
116
+ ` * Migration notes:`,
117
+ ` * - Executors have been converted to async functions`,
118
+ ` * - Assertions have been converted where possible`,
119
+ ` * - Complex logic may need manual adaptation`,
120
+ ` * - Review TODO comments for items requiring attention`,
121
+ ` */`,
122
+ ``,
123
+ `import { defineEval, createResult } from '@pauly4010/evalai-sdk';`,
124
+ ``,
125
+ ].join("\n");
126
+ }
127
+ /**
128
+ * Generate helper functions for the entire file
129
+ */
130
+ function generateGlobalHelpers(config, options) {
131
+ const helpers = [];
132
+ // Add executor helper if config has executor
133
+ if (config.executor) {
134
+ helpers.push([
135
+ `/**`,
136
+ ` * Legacy executor function from config`,
137
+ ` * TODO: Replace with actual executor implementation`,
138
+ ` */`,
139
+ `async function legacyExecutor(input: string): Promise<string> {`,
140
+ ` // Original executor was: ${config.executor.toString()}`,
141
+ ` // TODO: Implement actual executor logic here`,
142
+ ` return input; // Placeholder`,
143
+ `}`,
144
+ ``,
145
+ ].join("\n"));
146
+ }
147
+ // Add assertion helpers
148
+ helpers.push([
149
+ `/**`,
150
+ ` * Helper function for legacy assertion evaluation`,
151
+ ` * TODO: Implement actual assertion logic based on original config`,
152
+ ` */`,
153
+ `function evaluateAssertions(output: string, expected?: string): boolean {`,
154
+ ` if (expected !== undefined) {`,
155
+ ` return output === expected;`,
156
+ ` }`,
157
+ ` return output.length > 0;`,
158
+ `}`,
159
+ ``,
160
+ ].join("\n"));
161
+ // Add evaluation helper
162
+ helpers.push([
163
+ `/**`,
164
+ ` * Legacy test evaluation function`,
165
+ ` * TODO: Adapt based on your original test logic`,
166
+ ` */`,
167
+ `async function evaluateLegacyTest(input: string, expected?: string): Promise<any> {`,
168
+ ` const output = await legacyExecutor(input);`,
169
+ ` const passed = evaluateAssertions(output, expected);`,
170
+ ` `,
171
+ ` return createResult({`,
172
+ ` pass: passed,`,
173
+ ` score: passed ? 100 : 0,`,
174
+ ` metadata: { input, expected },`,
175
+ ` });`,
176
+ `}`,
177
+ ``,
178
+ ].join("\n"));
179
+ return helpers.join("\n");
180
+ }
181
+ /**
182
+ * Generate DSL content for a single suite
183
+ */
184
+ function generateSuiteDSL(suiteName, suite, options) {
185
+ const dslCode = (0, testsuite_to_dsl_1.generateDefineEvalCode)(suite, {
186
+ generateHelpers: options.helpers,
187
+ preserveIds: options.preserveIds,
188
+ includeProvenance: options.provenance,
189
+ });
190
+ // Add suite-specific comments
191
+ const header = [
192
+ `/**`,
193
+ ` * Test suite: ${suiteName}`,
194
+ ` * Migrated from evalai.config.json`,
195
+ ` * `,
196
+ ` * TODO items for this suite:`,
197
+ ` * - Review executor implementation`,
198
+ ` * - Verify assertion logic`,
199
+ ` * - Test with actual data`,
200
+ ` */`,
201
+ ``,
202
+ ].join("\n");
203
+ return header + dslCode;
204
+ }
205
+ /**
206
+ * Generate migration summary
207
+ */
208
+ function generateSummary(suites, options) {
209
+ const totalTests = suites.reduce((sum, { suite }) => sum + suite.getTests().length, 0);
210
+ const totalSuites = suites.length;
211
+ return [
212
+ `/**`,
213
+ ` * Migration Summary`,
214
+ ` * =================`,
215
+ ` * `,
216
+ ` * Total suites migrated: ${totalSuites}`,
217
+ ` * Total tests migrated: ${totalTests}`,
218
+ ` * `,
219
+ ` * Migration options used:`,
220
+ ` * - Include helpers: ${options.helpers}`,
221
+ ` * - Preserve IDs: ${options.preserveIds}`,
222
+ ` * - Include provenance: ${options.provenance}`,
223
+ ` * `,
224
+ ` * Next steps:`,
225
+ ` * 1. Review all TODO comments in this file`,
226
+ ` * 2. Implement actual executor logic`,
227
+ ` * 3. Adapt complex assertions`,
228
+ ` * 4. Test with real data`,
229
+ ` * 5. Remove evalai.config.json when satisfied`,
230
+ ` * `,
231
+ ` * For help with migration, see: https://github.com/pauly7610/ai-evaluation-platform/docs/MIGRATION.md`,
232
+ ` */`,
233
+ ``,
234
+ ].join("\n");
235
+ }
236
+ /**
237
+ * Main migration function
238
+ */
239
+ async function migrateConfig(options) {
240
+ try {
241
+ // Read input config
242
+ const config = await readConfigFile(options.input);
243
+ // Extract test suites
244
+ const suites = extractTestSuitesFromConfig(config);
245
+ if (suites.length === 0) {
246
+ throw new Error("No test suites found in config file. Check config structure.");
247
+ }
248
+ // Generate DSL content
249
+ const content = [
250
+ generateFileHeader(config, options),
251
+ generateGlobalHelpers(config, options),
252
+ ...suites.map(({ name, suite }) => generateSuiteDSL(name, suite, options)),
253
+ generateSummary(suites, options),
254
+ ].join("\n");
255
+ // Ensure output directory exists
256
+ const outputDir = path.dirname(options.output);
257
+ await fs.mkdir(outputDir, { recursive: true });
258
+ // Write output file
259
+ await fs.writeFile(options.output, content, "utf-8");
260
+ console.log(`✅ Migration complete!`);
261
+ console.log(`📁 Output written to: ${path.resolve(options.output)}`);
262
+ console.log(`📊 Migrated ${suites.length} suites with ${suites.reduce((sum, { suite }) => sum + suite.getTests().length, 0)} tests`);
263
+ console.log(`\n⚠️ Remember to review TODO comments and test the migration!`);
264
+ }
265
+ catch (error) {
266
+ console.error(`❌ Migration failed: ${error instanceof Error ? error.message : String(error)}`);
267
+ process.exit(1);
268
+ }
269
+ }
270
+ /**
271
+ * CLI command definition
272
+ */
273
+ function createMigrateCommand() {
274
+ const command = new commander_1.Command("migrate")
275
+ .description("Migrate legacy configuration to new DSL format")
276
+ .command("config")
277
+ .description("Migrate evalai.config.json to defineEval() specifications")
278
+ .requiredOption("-i, --in <path>", "Input config file path")
279
+ .requiredOption("-o, --out <path>", "Output DSL file path")
280
+ .option("-v, --verbose", "Include detailed comments and logging", false)
281
+ .option("--no-helpers", "Don't generate helper functions")
282
+ .option("--no-preserve-ids", "Don't preserve original test IDs")
283
+ .option("--no-provenance", "Don't include provenance metadata")
284
+ .action(async (options) => {
285
+ const migrateOptions = {
286
+ input: options.in,
287
+ output: options.out,
288
+ verbose: options.verbose,
289
+ helpers: options.helpers !== false,
290
+ preserveIds: options.preserveIds !== false,
291
+ provenance: options.provenance !== false,
292
+ };
293
+ await migrateConfig(migrateOptions);
294
+ });
295
+ return command;
296
+ }
297
+ /**
298
+ * Validate config file structure
299
+ */
300
+ async function validateConfigFile(filePath) {
301
+ try {
302
+ const config = await readConfigFile(filePath);
303
+ // Basic validation
304
+ if (!config || typeof config !== "object") {
305
+ throw new Error("Config file must contain a valid JSON object");
306
+ }
307
+ // Check for test data
308
+ const hasTests = config.tests || config.suites || config.testSuites;
309
+ if (!hasTests) {
310
+ throw new Error("Config file must contain 'tests', 'suites', or 'testSuites' property");
311
+ }
312
+ console.log(`✅ Config file ${filePath} appears valid for migration`);
313
+ return true;
314
+ }
315
+ catch (error) {
316
+ console.error(`❌ Config validation failed: ${error instanceof Error ? error.message : String(error)}`);
317
+ return false;
318
+ }
319
+ }
320
+ /**
321
+ * Show migration preview without writing files
322
+ */
323
+ async function previewMigration(filePath) {
324
+ try {
325
+ const config = await readConfigFile(filePath);
326
+ const suites = extractTestSuitesFromConfig(config);
327
+ console.log(`📋 Migration preview for: ${filePath}`);
328
+ console.log(``);
329
+ console.log(`Found ${suites.length} test suites:`);
330
+ console.log(``);
331
+ for (const { name, suite } of suites) {
332
+ const tests = suite.getTests();
333
+ console.log(` 📁 ${name}: ${tests.length} tests`);
334
+ if (tests.length > 0) {
335
+ console.log(` Tests: ${tests
336
+ .slice(0, 3)
337
+ .map((t) => t.id)
338
+ .join(", ")}${tests.length > 3 ? "..." : ""}`);
339
+ }
340
+ }
341
+ console.log(``);
342
+ console.log(`Total tests to migrate: ${suites.reduce((sum, { suite }) => sum + suite.getTests().length, 0)}`);
343
+ console.log(``);
344
+ console.log(`To migrate, run: evalai migrate config --in ${filePath} --out eval/migrated.spec.ts`);
345
+ }
346
+ catch (error) {
347
+ console.error(`❌ Preview failed: ${error instanceof Error ? error.message : String(error)}`);
348
+ }
349
+ }
@@ -114,8 +114,10 @@ function buildResolvedConfig(cwd, flags) {
114
114
  // Determine source of each field
115
115
  const fields = [];
116
116
  // evaluationId
117
- const evalIdSource = flags.evaluationId ? "arg"
118
- : fileConfig?.evaluationId ? "file"
117
+ const evalIdSource = flags.evaluationId
118
+ ? "arg"
119
+ : fileConfig?.evaluationId
120
+ ? "file"
119
121
  : "default";
120
122
  fields.push({
121
123
  key: "evaluationId",
@@ -124,9 +126,12 @@ function buildResolvedConfig(cwd, flags) {
124
126
  });
125
127
  // baseUrl
126
128
  const envBaseUrl = process.env.EVALAI_BASE_URL;
127
- const baseUrlSource = flags.baseUrl ? "arg"
128
- : envBaseUrl ? "env"
129
- : fileConfig?.baseUrl ? "file"
129
+ const baseUrlSource = flags.baseUrl
130
+ ? "arg"
131
+ : envBaseUrl
132
+ ? "env"
133
+ : fileConfig?.baseUrl
134
+ ? "file"
130
135
  : "default";
131
136
  fields.push({
132
137
  key: "baseUrl",
@@ -136,9 +141,7 @@ function buildResolvedConfig(cwd, flags) {
136
141
  // apiKey (always redacted)
137
142
  const envApiKey = process.env.EVALAI_API_KEY;
138
143
  const rawApiKey = flags.apiKey || envApiKey || "";
139
- const apiKeySource = flags.apiKey ? "arg"
140
- : envApiKey ? "env"
141
- : "default";
144
+ const apiKeySource = flags.apiKey ? "arg" : envApiKey ? "env" : "default";
142
145
  fields.push({
143
146
  key: "apiKey",
144
147
  value: redact(rawApiKey) ?? "(not set)",
@@ -167,9 +170,12 @@ function buildResolvedConfig(cwd, flags) {
167
170
  const profileVal = profileName && profileName in profiles_1.PROFILES
168
171
  ? profiles_1.PROFILES[profileName][key]
169
172
  : undefined;
170
- const source = argVal !== undefined ? "arg"
171
- : fileVal !== undefined ? "file"
172
- : profileVal !== undefined ? "profile"
173
+ const source = argVal !== undefined
174
+ ? "arg"
175
+ : fileVal !== undefined
176
+ ? "file"
177
+ : profileVal !== undefined
178
+ ? "profile"
173
179
  : "default";
174
180
  fields.push({
175
181
  key,
@@ -178,9 +184,7 @@ function buildResolvedConfig(cwd, flags) {
178
184
  });
179
185
  }
180
186
  // baseline
181
- const baselineSource = flags.baseline ? "arg"
182
- : fileConfig?.baseline ? "file"
183
- : "default";
187
+ const baselineSource = flags.baseline ? "arg" : fileConfig?.baseline ? "file" : "default";
184
188
  fields.push({
185
189
  key: "baseline",
186
190
  value: merged.baseline ?? "published",
@@ -0,0 +1,101 @@
1
+ /**
2
+ * TICKET 4 — Unified evalai run CLI Command
3
+ *
4
+ * Goal: Consolidated execution interface that consumes manifest
5
+ *
6
+ * Features:
7
+ * - Manifest loading and spec filtering
8
+ * - --impacted-only integration with impact analysis
9
+ * - Local executor integration
10
+ * - .evalai/last-run.json output
11
+ * - Legacy mode compatibility
12
+ */
13
+ /**
14
+ * Run execution options
15
+ */
16
+ export interface RunOptions {
17
+ /** Filter to specific spec IDs */
18
+ specIds?: string[];
19
+ /** Run only impacted specs (requires base branch) */
20
+ impactedOnly?: boolean;
21
+ /** Base branch for impact analysis */
22
+ baseBranch?: string;
23
+ /** Output format */
24
+ format?: "human" | "json";
25
+ /** Write run results to file */
26
+ writeResults?: boolean;
27
+ }
28
+ /**
29
+ * Run execution result
30
+ */
31
+ export interface RunResult {
32
+ /** Schema version for compatibility checking */
33
+ schemaVersion: number;
34
+ /** Unique run identifier */
35
+ runId: string;
36
+ /** Execution metadata */
37
+ metadata: {
38
+ startedAt: number;
39
+ completedAt: number;
40
+ duration: number;
41
+ totalSpecs: number;
42
+ executedSpecs: number;
43
+ mode: "spec" | "legacy";
44
+ };
45
+ /** Individual spec results */
46
+ results: SpecResult[];
47
+ /** Summary statistics */
48
+ summary: {
49
+ passed: number;
50
+ failed: number;
51
+ skipped: number;
52
+ passRate: number;
53
+ };
54
+ }
55
+ /**
56
+ * Individual spec result
57
+ */
58
+ export interface SpecResult {
59
+ /** Spec identifier */
60
+ specId: string;
61
+ /** Spec name */
62
+ name: string;
63
+ /** File path */
64
+ filePath: string;
65
+ /** Execution result */
66
+ result: {
67
+ status: "passed" | "failed" | "skipped";
68
+ score?: number;
69
+ error?: string;
70
+ duration: number;
71
+ };
72
+ }
73
+ /**
74
+ * Run evaluation specifications
75
+ */
76
+ export declare function runEvaluations(options: RunOptions, projectRoot?: string): Promise<RunResult>;
77
+ /**
78
+ * Run index entry
79
+ */
80
+ export interface RunIndexEntry {
81
+ runId: string;
82
+ createdAt: number;
83
+ gitSha?: string;
84
+ branch?: string;
85
+ mode: "spec" | "legacy";
86
+ specCount: number;
87
+ passRate: number;
88
+ avgScore: number;
89
+ }
90
+ /**
91
+ * Print human-readable results
92
+ */
93
+ export declare function printHumanResults(result: RunResult): void;
94
+ /**
95
+ * Print JSON results
96
+ */
97
+ export declare function printJsonResults(result: RunResult): void;
98
+ /**
99
+ * CLI entry point
100
+ */
101
+ export declare function runEvaluationsCLI(options: RunOptions): Promise<void>;