@evalgate/sdk 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/CHANGELOG.md +638 -0
  2. package/README.md +398 -0
  3. package/dist/assertions.d.ts +189 -0
  4. package/dist/assertions.js +662 -0
  5. package/dist/batch.d.ts +68 -0
  6. package/dist/batch.js +179 -0
  7. package/dist/cache.d.ts +65 -0
  8. package/dist/cache.js +131 -0
  9. package/dist/cli/api.d.ts +108 -0
  10. package/dist/cli/api.js +132 -0
  11. package/dist/cli/baseline.d.ts +10 -0
  12. package/dist/cli/baseline.js +172 -0
  13. package/dist/cli/check.d.ts +73 -0
  14. package/dist/cli/check.js +355 -0
  15. package/dist/cli/ci-context.d.ts +6 -0
  16. package/dist/cli/ci-context.js +112 -0
  17. package/dist/cli/ci.d.ts +45 -0
  18. package/dist/cli/ci.js +192 -0
  19. package/dist/cli/config.d.ts +30 -0
  20. package/dist/cli/config.js +230 -0
  21. package/dist/cli/constants.d.ts +15 -0
  22. package/dist/cli/constants.js +18 -0
  23. package/dist/cli/diff.d.ts +173 -0
  24. package/dist/cli/diff.js +685 -0
  25. package/dist/cli/discover.d.ts +84 -0
  26. package/dist/cli/discover.js +419 -0
  27. package/dist/cli/doctor.d.ts +88 -0
  28. package/dist/cli/doctor.js +675 -0
  29. package/dist/cli/env.d.ts +21 -0
  30. package/dist/cli/env.js +42 -0
  31. package/dist/cli/explain.d.ts +58 -0
  32. package/dist/cli/explain.js +561 -0
  33. package/dist/cli/formatters/github.d.ts +8 -0
  34. package/dist/cli/formatters/github.js +135 -0
  35. package/dist/cli/formatters/human.d.ts +6 -0
  36. package/dist/cli/formatters/human.js +110 -0
  37. package/dist/cli/formatters/json.d.ts +6 -0
  38. package/dist/cli/formatters/json.js +10 -0
  39. package/dist/cli/formatters/pr-comment.d.ts +12 -0
  40. package/dist/cli/formatters/pr-comment.js +103 -0
  41. package/dist/cli/formatters/types.d.ts +103 -0
  42. package/dist/cli/formatters/types.js +8 -0
  43. package/dist/cli/gate.d.ts +21 -0
  44. package/dist/cli/gate.js +179 -0
  45. package/dist/cli/impact-analysis.d.ts +63 -0
  46. package/dist/cli/impact-analysis.js +252 -0
  47. package/dist/cli/index.d.ts +9 -0
  48. package/dist/cli/index.js +332 -0
  49. package/dist/cli/init.d.ts +16 -0
  50. package/dist/cli/init.js +292 -0
  51. package/dist/cli/manifest.d.ts +103 -0
  52. package/dist/cli/manifest.js +282 -0
  53. package/dist/cli/migrate.d.ts +41 -0
  54. package/dist/cli/migrate.js +349 -0
  55. package/dist/cli/policy-packs.d.ts +23 -0
  56. package/dist/cli/policy-packs.js +89 -0
  57. package/dist/cli/print-config.d.ts +29 -0
  58. package/dist/cli/print-config.js +270 -0
  59. package/dist/cli/profiles.d.ts +28 -0
  60. package/dist/cli/profiles.js +30 -0
  61. package/dist/cli/reason-codes.d.ts +17 -0
  62. package/dist/cli/reason-codes.js +19 -0
  63. package/dist/cli/regression-gate.d.ts +15 -0
  64. package/dist/cli/regression-gate.js +341 -0
  65. package/dist/cli/render/snippet.d.ts +5 -0
  66. package/dist/cli/render/snippet.js +15 -0
  67. package/dist/cli/render/sort.d.ts +10 -0
  68. package/dist/cli/render/sort.js +24 -0
  69. package/dist/cli/report/build-check-report.d.ts +19 -0
  70. package/dist/cli/report/build-check-report.js +132 -0
  71. package/dist/cli/run.d.ts +101 -0
  72. package/dist/cli/run.js +395 -0
  73. package/dist/cli/share.d.ts +17 -0
  74. package/dist/cli/share.js +91 -0
  75. package/dist/cli/upgrade.d.ts +15 -0
  76. package/dist/cli/upgrade.js +492 -0
  77. package/dist/cli/workspace.d.ts +31 -0
  78. package/dist/cli/workspace.js +68 -0
  79. package/dist/client.d.ts +368 -0
  80. package/dist/client.js +893 -0
  81. package/dist/client.request.test.d.ts +1 -0
  82. package/dist/client.request.test.js +232 -0
  83. package/dist/context.d.ts +134 -0
  84. package/dist/context.js +215 -0
  85. package/dist/errors.d.ts +82 -0
  86. package/dist/errors.js +298 -0
  87. package/dist/export.d.ts +195 -0
  88. package/dist/export.js +344 -0
  89. package/dist/index.d.ts +44 -0
  90. package/dist/index.js +153 -0
  91. package/dist/integrations/anthropic.d.ts +91 -0
  92. package/dist/integrations/anthropic.js +163 -0
  93. package/dist/integrations/openai-eval.d.ts +57 -0
  94. package/dist/integrations/openai-eval.js +232 -0
  95. package/dist/integrations/openai.d.ts +92 -0
  96. package/dist/integrations/openai.js +160 -0
  97. package/dist/local.d.ts +39 -0
  98. package/dist/local.js +148 -0
  99. package/dist/logger.d.ts +128 -0
  100. package/dist/logger.js +227 -0
  101. package/dist/matchers/index.d.ts +1 -0
  102. package/dist/matchers/index.js +6 -0
  103. package/dist/matchers/to-pass-gate.d.ts +29 -0
  104. package/dist/matchers/to-pass-gate.js +35 -0
  105. package/dist/pagination.d.ts +74 -0
  106. package/dist/pagination.js +139 -0
  107. package/dist/regression.d.ts +100 -0
  108. package/dist/regression.js +44 -0
  109. package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
  110. package/dist/runtime/adapters/config-to-dsl.js +400 -0
  111. package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
  112. package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
  113. package/dist/runtime/context.d.ts +26 -0
  114. package/dist/runtime/context.js +74 -0
  115. package/dist/runtime/eval.d.ts +46 -0
  116. package/dist/runtime/eval.js +244 -0
  117. package/dist/runtime/execution-mode.d.ts +80 -0
  118. package/dist/runtime/execution-mode.js +357 -0
  119. package/dist/runtime/executor.d.ts +16 -0
  120. package/dist/runtime/executor.js +152 -0
  121. package/dist/runtime/registry.d.ts +78 -0
  122. package/dist/runtime/registry.js +403 -0
  123. package/dist/runtime/run-report.d.ts +200 -0
  124. package/dist/runtime/run-report.js +222 -0
  125. package/dist/runtime/types.d.ts +356 -0
  126. package/dist/runtime/types.js +76 -0
  127. package/dist/snapshot.d.ts +176 -0
  128. package/dist/snapshot.js +322 -0
  129. package/dist/streaming.d.ts +173 -0
  130. package/dist/streaming.js +268 -0
  131. package/dist/testing.d.ts +273 -0
  132. package/dist/testing.js +317 -0
  133. package/dist/types.d.ts +754 -0
  134. package/dist/types.js +54 -0
  135. package/dist/utils/input-hash.d.ts +8 -0
  136. package/dist/utils/input-hash.js +41 -0
  137. package/dist/version.d.ts +7 -0
  138. package/dist/version.js +10 -0
  139. package/dist/workflows.d.ts +389 -0
  140. package/dist/workflows.js +671 -0
  141. package/package.json +117 -0
@@ -0,0 +1,403 @@
1
+ "use strict";
2
+ /**
3
+ * EvalGate Runtime Registry - Layer 1 Foundation
4
+ *
5
+ * Scoped registry with proper lifecycle management.
6
+ * Prevents cross-run contamination and memory leaks.
7
+ */
8
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
9
+ if (k2 === undefined) k2 = k;
10
+ var desc = Object.getOwnPropertyDescriptor(m, k);
11
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
12
+ desc = { enumerable: true, get: function() { return m[k]; } };
13
+ }
14
+ Object.defineProperty(o, k2, desc);
15
+ }) : (function(o, m, k, k2) {
16
+ if (k2 === undefined) k2 = k;
17
+ o[k2] = m[k];
18
+ }));
19
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
20
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
21
+ }) : function(o, v) {
22
+ o["default"] = v;
23
+ });
24
+ var __importStar = (this && this.__importStar) || (function () {
25
+ var ownKeys = function(o) {
26
+ ownKeys = Object.getOwnPropertyNames || function (o) {
27
+ var ar = [];
28
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
29
+ return ar;
30
+ };
31
+ return ownKeys(o);
32
+ };
33
+ return function (mod) {
34
+ if (mod && mod.__esModule) return mod;
35
+ var result = {};
36
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
37
+ __setModuleDefault(result, mod);
38
+ return result;
39
+ };
40
+ })();
41
+ Object.defineProperty(exports, "__esModule", { value: true });
42
+ exports.createEvalRuntime = createEvalRuntime;
43
+ exports.withRuntime = withRuntime;
44
+ exports.getActiveRuntime = getActiveRuntime;
45
+ exports.setActiveRuntime = setActiveRuntime;
46
+ exports.disposeActiveRuntime = disposeActiveRuntime;
47
+ const crypto = __importStar(require("node:crypto"));
48
+ const path = __importStar(require("node:path"));
49
+ const types_1 = require("./types");
50
+ /**
51
+ * Runtime registry implementation
52
+ * Scoped lifecycle with proper memory management
53
+ */
54
+ class EvalRuntimeImpl {
55
+ constructor(projectRoot) {
56
+ this.specs = new Map();
57
+ this.disposed = false;
58
+ this.id = crypto.randomUUID();
59
+ this.namespace = this.generateNamespace(projectRoot);
60
+ this.createdAt = new Date();
61
+ }
62
+ /**
63
+ * Generate project namespace from project root
64
+ * Content-addressable to prevent collisions
65
+ */
66
+ generateNamespace(projectRoot) {
67
+ return crypto
68
+ .createHash("sha256")
69
+ .update(path.resolve(projectRoot))
70
+ .digest("hex")
71
+ .slice(0, 12);
72
+ }
73
+ /**
74
+ * Register a new specification
75
+ */
76
+ register(spec) {
77
+ if (this.disposed) {
78
+ throw new types_1.RuntimeError("Runtime has been disposed", {
79
+ runtimeId: this.id,
80
+ namespace: this.namespace,
81
+ });
82
+ }
83
+ // Validate specification
84
+ this.validateSpec(spec);
85
+ // Check for existing spec with same ID
86
+ if (this.specs.has(spec.id)) {
87
+ throw new types_1.SpecRegistrationError(`Specification with ID '${spec.id}' already exists`, {
88
+ specId: spec.id,
89
+ specName: spec.name,
90
+ filePath: spec.filePath,
91
+ });
92
+ }
93
+ // Register specification
94
+ this.specs.set(spec.id, spec);
95
+ }
96
+ /**
97
+ * Get specification by ID
98
+ */
99
+ get(id) {
100
+ if (this.disposed) {
101
+ throw new types_1.RuntimeError("Runtime has been disposed");
102
+ }
103
+ return this.specs.get(id);
104
+ }
105
+ /**
106
+ * List all registered specifications
107
+ */
108
+ list() {
109
+ if (this.disposed) {
110
+ throw new types_1.RuntimeError("Runtime has been disposed");
111
+ }
112
+ return Array.from(this.specs.values());
113
+ }
114
+ /**
115
+ * Find specifications by criteria
116
+ */
117
+ find(criteria) {
118
+ if (this.disposed) {
119
+ throw new types_1.RuntimeError("Runtime has been disposed");
120
+ }
121
+ return Array.from(this.specs.values()).filter((spec) => {
122
+ // Tag filtering
123
+ if (criteria.tags && criteria.tags.length > 0) {
124
+ const specTags = spec.tags || [];
125
+ if (!criteria.tags.some((tag) => specTags.includes(tag))) {
126
+ return false;
127
+ }
128
+ }
129
+ // File filtering
130
+ if (criteria.files && criteria.files.length > 0) {
131
+ if (!criteria.files.includes(spec.filePath)) {
132
+ return false;
133
+ }
134
+ }
135
+ // Name filtering
136
+ if (criteria.names && criteria.names.length > 0) {
137
+ if (!criteria.names.includes(spec.name)) {
138
+ return false;
139
+ }
140
+ }
141
+ // Metadata filtering
142
+ if (criteria.metadata) {
143
+ const specMetadata = spec.metadata || {};
144
+ for (const [key, value] of Object.entries(criteria.metadata)) {
145
+ if (specMetadata[key] !== value) {
146
+ return false;
147
+ }
148
+ }
149
+ }
150
+ return true;
151
+ });
152
+ }
153
+ /**
154
+ * Clear all specifications and dispose runtime
155
+ */
156
+ clear() {
157
+ this.specs.clear();
158
+ this.disposed = true;
159
+ }
160
+ /**
161
+ * Create runtime snapshot for persistence
162
+ */
163
+ snapshot() {
164
+ if (this.disposed) {
165
+ throw new types_1.RuntimeError("Runtime has been disposed");
166
+ }
167
+ const serializedSpecs = Array.from(this.specs.values()).map((spec) => ({
168
+ id: spec.id,
169
+ name: spec.name,
170
+ filePath: spec.filePath,
171
+ position: spec.position,
172
+ description: spec.description,
173
+ tags: spec.tags,
174
+ metadata: spec.metadata,
175
+ config: spec.config,
176
+ executorSerialized: false, // Cannot serialize functions
177
+ }));
178
+ return {
179
+ runtimeId: this.id,
180
+ namespace: this.namespace,
181
+ createdAt: this.createdAt.toISOString(),
182
+ specs: serializedSpecs,
183
+ version: "1.0.0",
184
+ };
185
+ }
186
+ /**
187
+ * Load runtime from snapshot
188
+ * Note: Executors cannot be serialized and must be recreated
189
+ */
190
+ load(snapshot) {
191
+ if (this.disposed) {
192
+ throw new types_1.RuntimeError("Runtime has been disposed");
193
+ }
194
+ if (snapshot.runtimeId !== this.id) {
195
+ throw new types_1.RuntimeError("Snapshot runtime ID does not match current runtime");
196
+ }
197
+ if (snapshot.namespace !== this.namespace) {
198
+ throw new types_1.RuntimeError("Snapshot namespace does not match current runtime");
199
+ }
200
+ // Clear current specs
201
+ this.specs.clear();
202
+ // Load specs from snapshot (without executors)
203
+ for (const serializedSpec of snapshot.specs) {
204
+ // Note: Executors cannot be restored from snapshot
205
+ // Users must recreate the defineEval calls to restore executors
206
+ console.warn(`Cannot restore executor for spec '${serializedSpec.name}' from snapshot`);
207
+ }
208
+ }
209
+ /**
210
+ * Get runtime statistics
211
+ */
212
+ get stats() {
213
+ if (this.disposed) {
214
+ throw new types_1.RuntimeError("Runtime has been disposed");
215
+ }
216
+ const specsByTag = {};
217
+ const specsByFile = {};
218
+ for (const spec of this.specs.values()) {
219
+ // Count by tags
220
+ const tags = spec.tags || [];
221
+ for (const tag of tags) {
222
+ specsByTag[tag] = (specsByTag[tag] || 0) + 1;
223
+ }
224
+ // Count by files
225
+ specsByFile[spec.filePath] = (specsByFile[spec.filePath] || 0) + 1;
226
+ }
227
+ // Estimate memory usage (rough calculation)
228
+ const memoryUsage = this.estimateMemoryUsage();
229
+ return {
230
+ totalSpecs: this.specs.size,
231
+ specsByTag,
232
+ specsByFile,
233
+ memoryUsage,
234
+ lastUpdated: new Date(),
235
+ };
236
+ }
237
+ /**
238
+ * Get runtime health information
239
+ */
240
+ getHealth() {
241
+ if (this.disposed) {
242
+ return {
243
+ status: "error",
244
+ memoryUsage: 0,
245
+ specCount: 0,
246
+ issues: ["Runtime has been disposed"],
247
+ };
248
+ }
249
+ const issues = [];
250
+ const memoryUsage = this.estimateMemoryUsage();
251
+ // Memory usage warnings
252
+ if (memoryUsage > 50 * 1024 * 1024) {
253
+ // 50MB
254
+ issues.push("High memory usage detected");
255
+ }
256
+ // Spec count warnings
257
+ if (this.specs.size > 10000) {
258
+ issues.push("Large number of specifications may impact performance");
259
+ }
260
+ let status = "healthy";
261
+ if (issues.length > 0) {
262
+ status = issues.some((issue) => issue.includes("error"))
263
+ ? "error"
264
+ : "warning";
265
+ }
266
+ return {
267
+ status,
268
+ memoryUsage,
269
+ specCount: this.specs.size,
270
+ issues,
271
+ };
272
+ }
273
+ /**
274
+ * Validate specification before registration
275
+ */
276
+ validateSpec(spec) {
277
+ if (!spec.name || spec.name.trim() === "") {
278
+ throw new types_1.SpecRegistrationError("Specification name is required", {
279
+ spec,
280
+ });
281
+ }
282
+ if (!spec.filePath || spec.filePath.trim() === "") {
283
+ throw new types_1.SpecRegistrationError("Specification file path is required", {
284
+ spec,
285
+ });
286
+ }
287
+ if (!spec.executor || typeof spec.executor !== "function") {
288
+ throw new types_1.SpecRegistrationError("Specification executor is required and must be a function", {
289
+ spec,
290
+ });
291
+ }
292
+ if (!spec.position ||
293
+ typeof spec.position.line !== "number" ||
294
+ typeof spec.position.column !== "number") {
295
+ throw new types_1.SpecRegistrationError("Specification AST position is required", {
296
+ spec,
297
+ });
298
+ }
299
+ // Validate ID format
300
+ if (!spec.id || spec.id.length !== 20) {
301
+ throw new types_1.SpecRegistrationError("Specification ID must be 20 characters long", {
302
+ spec,
303
+ });
304
+ }
305
+ }
306
+ /**
307
+ * Estimate memory usage of the registry
308
+ */
309
+ estimateMemoryUsage() {
310
+ // Rough estimation: each spec ~1KB of data
311
+ return this.specs.size * 1024;
312
+ }
313
+ }
314
+ /**
315
+ * Create a new scoped runtime with lifecycle management
316
+ * Returns a handle for proper resource management
317
+ */
318
+ function createEvalRuntime(projectRoot = process.cwd()) {
319
+ const runtime = new EvalRuntimeImpl(projectRoot);
320
+ // Create bound defineEval function
321
+ const boundDefineEval = ((nameOrConfig, executor, options) => {
322
+ // Temporarily set this runtime as active
323
+ const previousRuntime = activeRuntime;
324
+ activeRuntime = runtime;
325
+ try {
326
+ // Import and call defineEval
327
+ const { defineEval } = require("./eval");
328
+ return defineEval(nameOrConfig, executor, options);
329
+ }
330
+ finally {
331
+ // Restore previous runtime
332
+ activeRuntime = previousRuntime;
333
+ }
334
+ });
335
+ return {
336
+ runtime,
337
+ defineEval: boundDefineEval,
338
+ dispose: () => {
339
+ runtime.clear();
340
+ if (activeRuntime === runtime) {
341
+ activeRuntime = null;
342
+ }
343
+ },
344
+ snapshot: () => runtime.snapshot(),
345
+ load: (snapshot) => runtime.load(snapshot),
346
+ };
347
+ }
348
+ /**
349
+ * Helper function for safe runtime execution with automatic cleanup
350
+ * Ensures runtime is disposed even if an exception is thrown
351
+ */
352
+ async function withRuntime(projectRoot, fn) {
353
+ const handle = createEvalRuntime(projectRoot);
354
+ try {
355
+ return await fn(handle);
356
+ }
357
+ finally {
358
+ // Always dispose, even on exception
359
+ handle.dispose();
360
+ }
361
+ }
362
+ /**
363
+ * Get the currently active runtime (for backward compatibility)
364
+ */
365
+ let activeRuntime = null;
366
+ function getActiveRuntime() {
367
+ if (!activeRuntime) {
368
+ activeRuntime = new EvalRuntimeImpl(process.cwd());
369
+ }
370
+ return activeRuntime;
371
+ }
372
+ /**
373
+ * Set the active runtime (for backward compatibility)
374
+ */
375
+ function setActiveRuntime(runtime) {
376
+ if (activeRuntime) {
377
+ throw new types_1.RuntimeError("Active runtime already exists");
378
+ }
379
+ activeRuntime = runtime;
380
+ }
381
+ /**
382
+ * Dispose the active runtime (for backward compatibility)
383
+ */
384
+ function disposeActiveRuntime() {
385
+ if (activeRuntime) {
386
+ activeRuntime.clear();
387
+ activeRuntime = null;
388
+ }
389
+ }
390
+ /**
391
+ * Runtime cleanup hook for process termination
392
+ */
393
+ process.on("exit", () => {
394
+ disposeActiveRuntime();
395
+ });
396
+ process.on("SIGINT", () => {
397
+ disposeActiveRuntime();
398
+ process.exit(0);
399
+ });
400
+ process.on("SIGTERM", () => {
401
+ disposeActiveRuntime();
402
+ process.exit(0);
403
+ });
@@ -0,0 +1,200 @@
1
+ /**
2
+ * RUNTIME-104: Deterministic Report Serialization (RunReport v1)
3
+ *
4
+ * Stable report format for downstream processing (explain, diff, history).
5
+ * Mirrors CheckReport conventions for consistency.
6
+ */
7
+ import type { EnhancedEvalResult, ExecutionErrorEnvelope } from "./types";
8
+ /**
9
+ * RunReport schema version - increment when breaking changes occur
10
+ */
11
+ export declare const RUN_REPORT_SCHEMA_VERSION = "1";
12
+ /**
13
+ * Main run report structure
14
+ * Mirrors CheckReport conventions for consistency
15
+ */
16
+ export interface RunReport {
17
+ /** Schema version for compatibility */
18
+ schemaVersion: string;
19
+ /** Unique run identifier */
20
+ runId: string;
21
+ /** Run start timestamp */
22
+ startedAt: string;
23
+ /** Run completion timestamp */
24
+ finishedAt: string;
25
+ /** Runtime information */
26
+ runtime: {
27
+ /** Runtime ID */
28
+ id: string;
29
+ /** Project namespace */
30
+ namespace: string;
31
+ /** Project root path */
32
+ projectRoot: string;
33
+ };
34
+ /** Execution results (sorted by testId for determinism) */
35
+ results: RunResult[];
36
+ /** Failures and errors (sorted by testId for determinism) */
37
+ failures: RunFailure[];
38
+ /** Execution summary */
39
+ summary: RunSummary;
40
+ /** Execution configuration */
41
+ config: RunConfig;
42
+ /** Serialize to JSON string */
43
+ toJSON(): string;
44
+ }
45
+ /**
46
+ * Individual test result
47
+ */
48
+ export interface RunResult {
49
+ /** Test specification ID */
50
+ testId: string;
51
+ /** Test specification name */
52
+ testName: string;
53
+ /** File path where test is defined */
54
+ filePath: string;
55
+ /** AST position in file */
56
+ position: {
57
+ line: number;
58
+ column: number;
59
+ };
60
+ /** Test input */
61
+ input: string;
62
+ /** Pass/fail determination */
63
+ pass: boolean;
64
+ /** Numeric score (0-100) */
65
+ score: number;
66
+ /** Execution duration in milliseconds */
67
+ durationMs: number;
68
+ /** Test metadata */
69
+ metadata?: Record<string, unknown>;
70
+ /** Test tags */
71
+ tags?: string[];
72
+ /** Assertion results if available */
73
+ assertions?: Array<{
74
+ name: string;
75
+ passed: boolean;
76
+ message?: string;
77
+ }>;
78
+ }
79
+ /**
80
+ * Failure or error information
81
+ */
82
+ export interface RunFailure {
83
+ /** Test specification ID */
84
+ testId: string;
85
+ /** Test specification name */
86
+ testName: string;
87
+ /** File path where test is defined */
88
+ filePath: string;
89
+ /** AST position in file */
90
+ position: {
91
+ line: number;
92
+ column: number;
93
+ };
94
+ /** Failure classification */
95
+ classification: "failed" | "error" | "timeout";
96
+ /** Error envelope for errors/timeouts */
97
+ errorEnvelope?: ExecutionErrorEnvelope;
98
+ /** Human-readable error message */
99
+ message: string;
100
+ /** Failure timestamp */
101
+ timestamp: string;
102
+ }
103
+ /**
104
+ * Execution summary statistics
105
+ */
106
+ export interface RunSummary {
107
+ /** Total number of tests */
108
+ total: number;
109
+ /** Number of passed tests */
110
+ passed: number;
111
+ /** Number of failed tests */
112
+ failed: number;
113
+ /** Number of errors */
114
+ errors: number;
115
+ /** Number of timeouts */
116
+ timeouts: number;
117
+ /** Overall pass rate (0-100) */
118
+ passRate: number;
119
+ /** Average score (0-100) */
120
+ averageScore: number;
121
+ /** Total execution duration */
122
+ totalDurationMs: number;
123
+ /** Execution success (no errors/timeouts) */
124
+ success: boolean;
125
+ }
126
+ /**
127
+ * Execution configuration
128
+ */
129
+ export interface RunConfig {
130
+ /** Executor type */
131
+ executorType: string;
132
+ /** Maximum parallel workers */
133
+ maxParallel?: number;
134
+ /** Default timeout in milliseconds */
135
+ defaultTimeout: number;
136
+ /** Environment information */
137
+ environment: {
138
+ nodeVersion: string;
139
+ platform: string;
140
+ arch: string;
141
+ };
142
+ }
143
+ /**
144
+ * RunReport builder for creating deterministic reports
145
+ */
146
+ export declare class RunReportBuilder {
147
+ private report;
148
+ /**
149
+ * Initialize report with basic metadata
150
+ */
151
+ constructor(runId: string, runtimeInfo: {
152
+ id: string;
153
+ namespace: string;
154
+ projectRoot: string;
155
+ });
156
+ /**
157
+ * Add a test result to the report
158
+ */
159
+ addResult(testId: string, testName: string, filePath: string, position: {
160
+ line: number;
161
+ column: number;
162
+ }, input: string, result: EnhancedEvalResult): void;
163
+ /**
164
+ * Update summary statistics
165
+ */
166
+ private updateSummary;
167
+ /**
168
+ * Add a failure to the report
169
+ */
170
+ private addFailure;
171
+ /**
172
+ * Set execution configuration
173
+ */
174
+ setConfig(config: Partial<RunConfig>): void;
175
+ /**
176
+ * Finalize and return the complete report
177
+ */
178
+ build(): RunReport;
179
+ /**
180
+ * Serialize report to JSON string
181
+ * Ensures deterministic output
182
+ */
183
+ toJSON(): string;
184
+ /**
185
+ * Write report to file
186
+ */
187
+ writeToFile(filePath: string): Promise<void>;
188
+ }
189
+ /**
190
+ * Create a new RunReport builder
191
+ */
192
+ export declare function createRunReport(runId: string, runtimeInfo: {
193
+ id: string;
194
+ namespace: string;
195
+ projectRoot: string;
196
+ }): RunReportBuilder;
197
+ /**
198
+ * Parse a RunReport from JSON string
199
+ */
200
+ export declare function parseRunReport(json: string): RunReport;