@pauly4010/evalai-sdk 1.8.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/CHANGELOG.md +54 -0
  2. package/dist/cli/ci.d.ts +45 -0
  3. package/dist/cli/ci.js +192 -0
  4. package/dist/cli/diff.d.ts +173 -0
  5. package/dist/cli/diff.js +680 -0
  6. package/dist/cli/discover.d.ts +84 -0
  7. package/dist/cli/discover.js +408 -0
  8. package/dist/cli/doctor.js +19 -10
  9. package/dist/cli/env.d.ts +21 -0
  10. package/dist/cli/env.js +42 -0
  11. package/dist/cli/explain.js +143 -37
  12. package/dist/cli/impact-analysis.d.ts +63 -0
  13. package/dist/cli/impact-analysis.js +251 -0
  14. package/dist/cli/index.js +173 -0
  15. package/dist/cli/manifest.d.ts +105 -0
  16. package/dist/cli/manifest.js +275 -0
  17. package/dist/cli/migrate.d.ts +41 -0
  18. package/dist/cli/migrate.js +349 -0
  19. package/dist/cli/print-config.js +18 -14
  20. package/dist/cli/run.d.ts +101 -0
  21. package/dist/cli/run.js +389 -0
  22. package/dist/cli/workspace.d.ts +28 -0
  23. package/dist/cli/workspace.js +58 -0
  24. package/dist/index.d.ts +6 -0
  25. package/dist/index.js +30 -5
  26. package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
  27. package/dist/runtime/adapters/config-to-dsl.js +391 -0
  28. package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
  29. package/dist/runtime/adapters/testsuite-to-dsl.js +271 -0
  30. package/dist/runtime/context.d.ts +26 -0
  31. package/dist/runtime/context.js +74 -0
  32. package/dist/runtime/eval.d.ts +46 -0
  33. package/dist/runtime/eval.js +237 -0
  34. package/dist/runtime/execution-mode.d.ts +80 -0
  35. package/dist/runtime/execution-mode.js +353 -0
  36. package/dist/runtime/executor.d.ts +16 -0
  37. package/dist/runtime/executor.js +152 -0
  38. package/dist/runtime/registry.d.ts +78 -0
  39. package/dist/runtime/registry.js +416 -0
  40. package/dist/runtime/run-report.d.ts +202 -0
  41. package/dist/runtime/run-report.js +220 -0
  42. package/dist/runtime/types.d.ts +356 -0
  43. package/dist/runtime/types.js +76 -0
  44. package/dist/testing.d.ts +65 -0
  45. package/dist/testing.js +42 -0
  46. package/dist/version.d.ts +1 -1
  47. package/dist/version.js +1 -1
  48. package/package.json +4 -3
@@ -0,0 +1,416 @@
1
+ "use strict";
2
+ /**
3
+ * EvalAI Runtime Registry - Layer 1 Foundation
4
+ *
5
+ * Scoped registry with proper lifecycle management.
6
+ * Prevents cross-run contamination and memory leaks.
7
+ */
8
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
9
+ if (k2 === undefined) k2 = k;
10
+ var desc = Object.getOwnPropertyDescriptor(m, k);
11
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
12
+ desc = { enumerable: true, get: function() { return m[k]; } };
13
+ }
14
+ Object.defineProperty(o, k2, desc);
15
+ }) : (function(o, m, k, k2) {
16
+ if (k2 === undefined) k2 = k;
17
+ o[k2] = m[k];
18
+ }));
19
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
20
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
21
+ }) : function(o, v) {
22
+ o["default"] = v;
23
+ });
24
+ var __importStar = (this && this.__importStar) || (function () {
25
+ var ownKeys = function(o) {
26
+ ownKeys = Object.getOwnPropertyNames || function (o) {
27
+ var ar = [];
28
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
29
+ return ar;
30
+ };
31
+ return ownKeys(o);
32
+ };
33
+ return function (mod) {
34
+ if (mod && mod.__esModule) return mod;
35
+ var result = {};
36
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
37
+ __setModuleDefault(result, mod);
38
+ return result;
39
+ };
40
+ })();
41
+ Object.defineProperty(exports, "__esModule", { value: true });
42
+ exports.createEvalRuntime = createEvalRuntime;
43
+ exports.withRuntime = withRuntime;
44
+ exports.getActiveRuntime = getActiveRuntime;
45
+ exports.setActiveRuntime = setActiveRuntime;
46
+ exports.disposeActiveRuntime = disposeActiveRuntime;
47
+ const crypto = __importStar(require("node:crypto"));
48
+ const path = __importStar(require("node:path"));
49
+ const types_1 = require("./types");
50
+ /**
51
+ * Runtime registry implementation
52
+ * Scoped lifecycle with proper memory management
53
+ */
54
+ class EvalRuntimeImpl {
55
+ constructor(projectRoot) {
56
+ this.specs = new Map();
57
+ this.disposed = false;
58
+ this.id = crypto.randomUUID();
59
+ this.namespace = this.generateNamespace(projectRoot);
60
+ this.createdAt = new Date();
61
+ }
62
+ /**
63
+ * Generate project namespace from project root
64
+ * Content-addressable to prevent collisions
65
+ */
66
+ generateNamespace(projectRoot) {
67
+ return crypto.createHash("sha256").update(path.resolve(projectRoot)).digest("hex").slice(0, 12);
68
+ }
69
+ /**
70
+ * Generate content-addressable specification ID
71
+ * Uses AST position for identity stability with canonical paths
72
+ */
73
+ generateSpecId(identity) {
74
+ // Canonicalize path: relative to project root with POSIX separators
75
+ const projectRoot = process.cwd();
76
+ const relativePath = path.relative(projectRoot, identity.filePath);
77
+ const canonicalPath = relativePath.split(path.sep).join("/"); // Force POSIX separators
78
+ const components = [
79
+ identity.namespace,
80
+ canonicalPath,
81
+ identity.name,
82
+ identity.suitePath || "",
83
+ `${identity.position.line}:${identity.position.column}`,
84
+ ];
85
+ const content = components.join("|");
86
+ return crypto.createHash("sha256").update(content).digest("hex").slice(0, 20);
87
+ }
88
+ /**
89
+ * Register a new specification
90
+ */
91
+ register(spec) {
92
+ if (this.disposed) {
93
+ throw new types_1.RuntimeError("Runtime has been disposed", {
94
+ runtimeId: this.id,
95
+ namespace: this.namespace,
96
+ });
97
+ }
98
+ // Validate specification
99
+ this.validateSpec(spec);
100
+ // Check for existing spec with same ID
101
+ if (this.specs.has(spec.id)) {
102
+ throw new types_1.SpecRegistrationError(`Specification with ID '${spec.id}' already exists`, {
103
+ specId: spec.id,
104
+ specName: spec.name,
105
+ filePath: spec.filePath,
106
+ });
107
+ }
108
+ // Register specification
109
+ this.specs.set(spec.id, spec);
110
+ }
111
+ /**
112
+ * Get specification by ID
113
+ */
114
+ get(id) {
115
+ if (this.disposed) {
116
+ throw new types_1.RuntimeError("Runtime has been disposed");
117
+ }
118
+ return this.specs.get(id);
119
+ }
120
+ /**
121
+ * List all registered specifications
122
+ */
123
+ list() {
124
+ if (this.disposed) {
125
+ throw new types_1.RuntimeError("Runtime has been disposed");
126
+ }
127
+ return Array.from(this.specs.values());
128
+ }
129
+ /**
130
+ * Find specifications by criteria
131
+ */
132
+ find(criteria) {
133
+ if (this.disposed) {
134
+ throw new types_1.RuntimeError("Runtime has been disposed");
135
+ }
136
+ return Array.from(this.specs.values()).filter((spec) => {
137
+ // Tag filtering
138
+ if (criteria.tags && criteria.tags.length > 0) {
139
+ const specTags = spec.tags || [];
140
+ if (!criteria.tags.some((tag) => specTags.includes(tag))) {
141
+ return false;
142
+ }
143
+ }
144
+ // File filtering
145
+ if (criteria.files && criteria.files.length > 0) {
146
+ if (!criteria.files.includes(spec.filePath)) {
147
+ return false;
148
+ }
149
+ }
150
+ // Name filtering
151
+ if (criteria.names && criteria.names.length > 0) {
152
+ if (!criteria.names.includes(spec.name)) {
153
+ return false;
154
+ }
155
+ }
156
+ // Metadata filtering
157
+ if (criteria.metadata) {
158
+ const specMetadata = spec.metadata || {};
159
+ for (const [key, value] of Object.entries(criteria.metadata)) {
160
+ if (specMetadata[key] !== value) {
161
+ return false;
162
+ }
163
+ }
164
+ }
165
+ return true;
166
+ });
167
+ }
168
+ /**
169
+ * Clear all specifications and dispose runtime
170
+ */
171
+ clear() {
172
+ this.specs.clear();
173
+ this.disposed = true;
174
+ }
175
+ /**
176
+ * Create runtime snapshot for persistence
177
+ */
178
+ snapshot() {
179
+ if (this.disposed) {
180
+ throw new types_1.RuntimeError("Runtime has been disposed");
181
+ }
182
+ const serializedSpecs = Array.from(this.specs.values()).map((spec) => ({
183
+ id: spec.id,
184
+ name: spec.name,
185
+ filePath: spec.filePath,
186
+ position: spec.position,
187
+ description: spec.description,
188
+ tags: spec.tags,
189
+ metadata: spec.metadata,
190
+ config: spec.config,
191
+ executorSerialized: false, // Cannot serialize functions
192
+ }));
193
+ return {
194
+ runtimeId: this.id,
195
+ namespace: this.namespace,
196
+ createdAt: this.createdAt.toISOString(),
197
+ specs: serializedSpecs,
198
+ version: "1.0.0",
199
+ };
200
+ }
201
+ /**
202
+ * Load runtime from snapshot
203
+ * Note: Executors cannot be serialized and must be recreated
204
+ */
205
+ load(snapshot) {
206
+ if (this.disposed) {
207
+ throw new types_1.RuntimeError("Runtime has been disposed");
208
+ }
209
+ if (snapshot.runtimeId !== this.id) {
210
+ throw new types_1.RuntimeError("Snapshot runtime ID does not match current runtime");
211
+ }
212
+ if (snapshot.namespace !== this.namespace) {
213
+ throw new types_1.RuntimeError("Snapshot namespace does not match current runtime");
214
+ }
215
+ // Clear current specs
216
+ this.specs.clear();
217
+ // Load specs from snapshot (without executors)
218
+ for (const serializedSpec of snapshot.specs) {
219
+ // Note: Executors cannot be restored from snapshot
220
+ // Users must recreate the defineEval calls to restore executors
221
+ console.warn(`Cannot restore executor for spec '${serializedSpec.name}' from snapshot`);
222
+ }
223
+ }
224
+ /**
225
+ * Get runtime statistics
226
+ */
227
+ get stats() {
228
+ if (this.disposed) {
229
+ throw new types_1.RuntimeError("Runtime has been disposed");
230
+ }
231
+ const specsByTag = {};
232
+ const specsByFile = {};
233
+ for (const spec of this.specs.values()) {
234
+ // Count by tags
235
+ const tags = spec.tags || [];
236
+ for (const tag of tags) {
237
+ specsByTag[tag] = (specsByTag[tag] || 0) + 1;
238
+ }
239
+ // Count by files
240
+ specsByFile[spec.filePath] = (specsByFile[spec.filePath] || 0) + 1;
241
+ }
242
+ // Estimate memory usage (rough calculation)
243
+ const memoryUsage = this.estimateMemoryUsage();
244
+ return {
245
+ totalSpecs: this.specs.size,
246
+ specsByTag,
247
+ specsByFile,
248
+ memoryUsage,
249
+ lastUpdated: new Date(),
250
+ };
251
+ }
252
+ /**
253
+ * Get runtime health information
254
+ */
255
+ getHealth() {
256
+ if (this.disposed) {
257
+ return {
258
+ status: "error",
259
+ memoryUsage: 0,
260
+ specCount: 0,
261
+ issues: ["Runtime has been disposed"],
262
+ };
263
+ }
264
+ const issues = [];
265
+ const memoryUsage = this.estimateMemoryUsage();
266
+ // Memory usage warnings
267
+ if (memoryUsage > 50 * 1024 * 1024) {
268
+ // 50MB
269
+ issues.push("High memory usage detected");
270
+ }
271
+ // Spec count warnings
272
+ if (this.specs.size > 10000) {
273
+ issues.push("Large number of specifications may impact performance");
274
+ }
275
+ let status = "healthy";
276
+ if (issues.length > 0) {
277
+ status = issues.some((issue) => issue.includes("error")) ? "error" : "warning";
278
+ }
279
+ return {
280
+ status,
281
+ memoryUsage,
282
+ specCount: this.specs.size,
283
+ issues,
284
+ };
285
+ }
286
+ /**
287
+ * Validate specification before registration
288
+ */
289
+ validateSpec(spec) {
290
+ if (!spec.name || spec.name.trim() === "") {
291
+ throw new types_1.SpecRegistrationError("Specification name is required", {
292
+ spec,
293
+ });
294
+ }
295
+ if (!spec.filePath || spec.filePath.trim() === "") {
296
+ throw new types_1.SpecRegistrationError("Specification file path is required", {
297
+ spec,
298
+ });
299
+ }
300
+ if (!spec.executor || typeof spec.executor !== "function") {
301
+ throw new types_1.SpecRegistrationError("Specification executor is required and must be a function", {
302
+ spec,
303
+ });
304
+ }
305
+ if (!spec.position ||
306
+ typeof spec.position.line !== "number" ||
307
+ typeof spec.position.column !== "number") {
308
+ throw new types_1.SpecRegistrationError("Specification AST position is required", {
309
+ spec,
310
+ });
311
+ }
312
+ // Validate ID format
313
+ if (!spec.id || spec.id.length !== 20) {
314
+ throw new types_1.SpecRegistrationError("Specification ID must be 20 characters long", {
315
+ spec,
316
+ });
317
+ }
318
+ }
319
+ /**
320
+ * Estimate memory usage of the registry
321
+ */
322
+ estimateMemoryUsage() {
323
+ // Rough estimation: each spec ~1KB of data
324
+ return this.specs.size * 1024;
325
+ }
326
+ }
327
+ /**
328
+ * Create a new scoped runtime with lifecycle management
329
+ * Returns a handle for proper resource management
330
+ */
331
+ function createEvalRuntime(projectRoot = process.cwd()) {
332
+ const runtime = new EvalRuntimeImpl(projectRoot);
333
+ // Create bound defineEval function
334
+ const boundDefineEval = ((nameOrConfig, executor, options) => {
335
+ // Temporarily set this runtime as active
336
+ const previousRuntime = activeRuntime;
337
+ activeRuntime = runtime;
338
+ try {
339
+ // Import and call defineEval
340
+ const { defineEval } = require("./eval");
341
+ return defineEval(nameOrConfig, executor, options);
342
+ }
343
+ finally {
344
+ // Restore previous runtime
345
+ activeRuntime = previousRuntime;
346
+ }
347
+ });
348
+ return {
349
+ runtime,
350
+ defineEval: boundDefineEval,
351
+ dispose: () => {
352
+ runtime.clear();
353
+ if (activeRuntime === runtime) {
354
+ activeRuntime = null;
355
+ }
356
+ },
357
+ snapshot: () => runtime.snapshot(),
358
+ load: (snapshot) => runtime.load(snapshot),
359
+ };
360
+ }
361
+ /**
362
+ * Helper function for safe runtime execution with automatic cleanup
363
+ * Ensures runtime is disposed even if an exception is thrown
364
+ */
365
+ async function withRuntime(projectRoot, fn) {
366
+ const handle = createEvalRuntime(projectRoot);
367
+ try {
368
+ return await fn(handle);
369
+ }
370
+ finally {
371
+ // Always dispose, even on exception
372
+ handle.dispose();
373
+ }
374
+ }
375
+ /**
376
+ * Get the currently active runtime (for backward compatibility)
377
+ */
378
+ let activeRuntime = null;
379
+ function getActiveRuntime() {
380
+ if (!activeRuntime) {
381
+ activeRuntime = new EvalRuntimeImpl(process.cwd());
382
+ }
383
+ return activeRuntime;
384
+ }
385
+ /**
386
+ * Set the active runtime (for backward compatibility)
387
+ */
388
+ function setActiveRuntime(runtime) {
389
+ if (activeRuntime) {
390
+ throw new types_1.RuntimeError("Active runtime already exists");
391
+ }
392
+ activeRuntime = runtime;
393
+ }
394
+ /**
395
+ * Dispose the active runtime (for backward compatibility)
396
+ */
397
+ function disposeActiveRuntime() {
398
+ if (activeRuntime) {
399
+ activeRuntime.clear();
400
+ activeRuntime = null;
401
+ }
402
+ }
403
+ /**
404
+ * Runtime cleanup hook for process termination
405
+ */
406
+ process.on("exit", () => {
407
+ disposeActiveRuntime();
408
+ });
409
+ process.on("SIGINT", () => {
410
+ disposeActiveRuntime();
411
+ process.exit(0);
412
+ });
413
+ process.on("SIGTERM", () => {
414
+ disposeActiveRuntime();
415
+ process.exit(0);
416
+ });
@@ -0,0 +1,202 @@
1
+ /**
2
+ * RUNTIME-104: Deterministic Report Serialization (RunReport v1)
3
+ *
4
+ * Stable report format for downstream processing (explain, diff, history).
5
+ * Mirrors CheckReport conventions for consistency.
6
+ */
7
+ import type { EnhancedEvalResult, ExecutionErrorEnvelope } from "./types";
8
+ /**
9
+ * RunReport schema version - increment when breaking changes occur
10
+ */
11
+ export declare const RUN_REPORT_SCHEMA_VERSION = "1";
12
+ /**
13
+ * Main run report structure
14
+ * Mirrors CheckReport conventions for consistency
15
+ */
16
+ export interface RunReport {
17
+ /** Schema version for compatibility */
18
+ schemaVersion: string;
19
+ /** Unique run identifier */
20
+ runId: string;
21
+ /** Run start timestamp */
22
+ startedAt: string;
23
+ /** Run completion timestamp */
24
+ finishedAt: string;
25
+ /** Runtime information */
26
+ runtime: {
27
+ /** Runtime ID */
28
+ id: string;
29
+ /** Project namespace */
30
+ namespace: string;
31
+ /** Project root path */
32
+ projectRoot: string;
33
+ };
34
+ /** Execution results (sorted by testId for determinism) */
35
+ results: RunResult[];
36
+ /** Failures and errors (sorted by testId for determinism) */
37
+ failures: RunFailure[];
38
+ /** Execution summary */
39
+ summary: RunSummary;
40
+ /** Execution configuration */
41
+ config: RunConfig;
42
+ /** Serialize to JSON string */
43
+ toJSON(): string;
44
+ }
45
+ /**
46
+ * Individual test result
47
+ */
48
+ export interface RunResult {
49
+ /** Test specification ID */
50
+ testId: string;
51
+ /** Test specification name */
52
+ testName: string;
53
+ /** File path where test is defined */
54
+ filePath: string;
55
+ /** AST position in file */
56
+ position: {
57
+ line: number;
58
+ column: number;
59
+ };
60
+ /** Test input */
61
+ input: string;
62
+ /** Pass/fail determination */
63
+ pass: boolean;
64
+ /** Numeric score (0-100) */
65
+ score: number;
66
+ /** Execution duration in milliseconds */
67
+ durationMs: number;
68
+ /** Test metadata */
69
+ metadata?: Record<string, unknown>;
70
+ /** Test tags */
71
+ tags?: string[];
72
+ /** Assertion results if available */
73
+ assertions?: Array<{
74
+ name: string;
75
+ passed: boolean;
76
+ message?: string;
77
+ }>;
78
+ }
79
+ /**
80
+ * Failure or error information
81
+ */
82
+ export interface RunFailure {
83
+ /** Test specification ID */
84
+ testId: string;
85
+ /** Test specification name */
86
+ testName: string;
87
+ /** File path where test is defined */
88
+ filePath: string;
89
+ /** AST position in file */
90
+ position: {
91
+ line: number;
92
+ column: number;
93
+ };
94
+ /** Failure classification */
95
+ classification: "failed" | "error" | "timeout";
96
+ /** Error envelope for errors/timeouts */
97
+ errorEnvelope?: ExecutionErrorEnvelope;
98
+ /** Human-readable error message */
99
+ message: string;
100
+ /** Failure timestamp */
101
+ timestamp: string;
102
+ }
103
+ /**
104
+ * Execution summary statistics
105
+ */
106
+ export interface RunSummary {
107
+ /** Total number of tests */
108
+ total: number;
109
+ /** Number of passed tests */
110
+ passed: number;
111
+ /** Number of failed tests */
112
+ failed: number;
113
+ /** Number of errors */
114
+ errors: number;
115
+ /** Number of timeouts */
116
+ timeouts: number;
117
+ /** Overall pass rate (0-100) */
118
+ passRate: number;
119
+ /** Average score (0-100) */
120
+ averageScore: number;
121
+ /** Total execution duration */
122
+ totalDurationMs: number;
123
+ /** Execution success (no errors/timeouts) */
124
+ success: boolean;
125
+ }
126
+ /**
127
+ * Execution configuration
128
+ */
129
+ export interface RunConfig {
130
+ /** Executor type */
131
+ executorType: string;
132
+ /** Maximum parallel workers */
133
+ maxParallel?: number;
134
+ /** Default timeout in milliseconds */
135
+ defaultTimeout: number;
136
+ /** Environment information */
137
+ environment: {
138
+ nodeVersion: string;
139
+ platform: string;
140
+ arch: string;
141
+ };
142
+ }
143
+ /**
144
+ * RunReport builder for creating deterministic reports
145
+ */
146
+ export declare class RunReportBuilder {
147
+ private runId;
148
+ private runtimeInfo;
149
+ private report;
150
+ /**
151
+ * Initialize report with basic metadata
152
+ */
153
+ constructor(runId: string, runtimeInfo: {
154
+ id: string;
155
+ namespace: string;
156
+ projectRoot: string;
157
+ });
158
+ /**
159
+ * Add a test result to the report
160
+ */
161
+ addResult(testId: string, testName: string, filePath: string, position: {
162
+ line: number;
163
+ column: number;
164
+ }, input: string, result: EnhancedEvalResult): void;
165
+ /**
166
+ * Update summary statistics
167
+ */
168
+ private updateSummary;
169
+ /**
170
+ * Add a failure to the report
171
+ */
172
+ private addFailure;
173
+ /**
174
+ * Set execution configuration
175
+ */
176
+ setConfig(config: Partial<RunConfig>): void;
177
+ /**
178
+ * Finalize and return the complete report
179
+ */
180
+ build(): RunReport;
181
+ /**
182
+ * Serialize report to JSON string
183
+ * Ensures deterministic output
184
+ */
185
+ toJSON(): string;
186
+ /**
187
+ * Write report to file
188
+ */
189
+ writeToFile(filePath: string): Promise<void>;
190
+ }
191
+ /**
192
+ * Create a new RunReport builder
193
+ */
194
+ export declare function createRunReport(runId: string, runtimeInfo: {
195
+ id: string;
196
+ namespace: string;
197
+ projectRoot: string;
198
+ }): RunReportBuilder;
199
+ /**
200
+ * Parse a RunReport from JSON string
201
+ */
202
+ export declare function parseRunReport(json: string): RunReport;