@pauly4010/evalai-sdk 1.9.0 → 1.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +136 -23
- package/dist/assertions.js +51 -18
- package/dist/batch.js +8 -2
- package/dist/cli/api.js +3 -1
- package/dist/cli/check.js +19 -6
- package/dist/cli/ci-context.js +3 -1
- package/dist/cli/config.js +28 -8
- package/dist/cli/diff.js +14 -9
- package/dist/cli/discover.js +18 -7
- package/dist/cli/doctor.js +43 -9
- package/dist/cli/explain.js +37 -11
- package/dist/cli/formatters/human.js +4 -1
- package/dist/cli/formatters/pr-comment.js +3 -1
- package/dist/cli/gate.js +6 -2
- package/dist/cli/impact-analysis.js +6 -5
- package/dist/cli/index.js +18 -6
- package/dist/cli/manifest.d.ts +3 -5
- package/dist/cli/manifest.js +21 -14
- package/dist/cli/migrate.js +4 -4
- package/dist/cli/policy-packs.js +8 -2
- package/dist/cli/print-config.js +19 -4
- package/dist/cli/regression-gate.js +8 -2
- package/dist/cli/report/build-check-report.js +8 -2
- package/dist/cli/run.js +11 -5
- package/dist/cli/share.js +3 -1
- package/dist/cli/upgrade.js +2 -1
- package/dist/client.d.ts +16 -19
- package/dist/client.js +60 -43
- package/dist/client.request.test.d.ts +1 -1
- package/dist/client.request.test.js +222 -147
- package/dist/context.js +3 -1
- package/dist/errors.js +11 -4
- package/dist/export.js +3 -1
- package/dist/index.d.ts +8 -8
- package/dist/index.js +19 -19
- package/dist/integrations/anthropic.d.ts +20 -1
- package/dist/integrations/openai-eval.js +4 -2
- package/dist/integrations/openai.d.ts +24 -1
- package/dist/local.js +3 -1
- package/dist/logger.js +6 -2
- package/dist/pagination.js +6 -2
- package/dist/runtime/adapters/config-to-dsl.js +12 -9
- package/dist/runtime/adapters/testsuite-to-dsl.d.ts +1 -1
- package/dist/runtime/adapters/testsuite-to-dsl.js +11 -6
- package/dist/runtime/eval.d.ts +1 -1
- package/dist/runtime/eval.js +12 -5
- package/dist/runtime/execution-mode.js +13 -9
- package/dist/runtime/registry.js +8 -21
- package/dist/runtime/run-report.d.ts +0 -2
- package/dist/runtime/run-report.js +12 -10
- package/dist/testing.js +7 -2
- package/dist/types.d.ts +100 -69
- package/dist/utils/input-hash.js +4 -1
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/dist/workflows.js +62 -14
- package/package.json +115 -111
|
@@ -18,6 +18,28 @@
|
|
|
18
18
|
* ```
|
|
19
19
|
*/
|
|
20
20
|
import type { AIEvalClient } from "../client";
|
|
21
|
+
interface OpenAIChatParams {
|
|
22
|
+
model: string;
|
|
23
|
+
messages: unknown[];
|
|
24
|
+
temperature?: number;
|
|
25
|
+
max_tokens?: number;
|
|
26
|
+
[key: string]: unknown;
|
|
27
|
+
}
|
|
28
|
+
interface OpenAIChatCompletion {
|
|
29
|
+
choices: Array<{
|
|
30
|
+
message?: unknown;
|
|
31
|
+
finish_reason?: unknown;
|
|
32
|
+
}>;
|
|
33
|
+
usage?: unknown;
|
|
34
|
+
[key: string]: unknown;
|
|
35
|
+
}
|
|
36
|
+
interface OpenAIClient {
|
|
37
|
+
chat: {
|
|
38
|
+
completions: {
|
|
39
|
+
create: (params: OpenAIChatParams, requestOptions?: Record<string, unknown>) => Promise<OpenAIChatCompletion>;
|
|
40
|
+
};
|
|
41
|
+
};
|
|
42
|
+
}
|
|
21
43
|
export interface OpenAITraceOptions {
|
|
22
44
|
/** Whether to capture input (default: true) */
|
|
23
45
|
captureInput?: boolean;
|
|
@@ -48,7 +70,7 @@ export interface OpenAITraceOptions {
|
|
|
48
70
|
* });
|
|
49
71
|
* ```
|
|
50
72
|
*/
|
|
51
|
-
export declare function traceOpenAI(openai:
|
|
73
|
+
export declare function traceOpenAI(openai: OpenAIClient, evalClient: AIEvalClient, options?: OpenAITraceOptions): OpenAIClient;
|
|
52
74
|
/**
|
|
53
75
|
* Manual trace wrapper for OpenAI calls
|
|
54
76
|
*
|
|
@@ -67,3 +89,4 @@ export declare function traceOpenAI(openai: any, evalClient: AIEvalClient, optio
|
|
|
67
89
|
* ```
|
|
68
90
|
*/
|
|
69
91
|
export declare function traceOpenAICall<T>(evalClient: AIEvalClient, name: string, fn: () => Promise<T>, options?: OpenAITraceOptions): Promise<T>;
|
|
92
|
+
export {};
|
package/dist/local.js
CHANGED
|
@@ -31,7 +31,9 @@ class LocalStorage {
|
|
|
31
31
|
try {
|
|
32
32
|
await promises_1.default.mkdir(this.directory, { recursive: true });
|
|
33
33
|
await promises_1.default.mkdir(node_path_1.default.join(this.directory, "traces"), { recursive: true });
|
|
34
|
-
await promises_1.default.mkdir(node_path_1.default.join(this.directory, "evaluations"), {
|
|
34
|
+
await promises_1.default.mkdir(node_path_1.default.join(this.directory, "evaluations"), {
|
|
35
|
+
recursive: true,
|
|
36
|
+
});
|
|
35
37
|
await promises_1.default.mkdir(node_path_1.default.join(this.directory, "spans"), { recursive: true });
|
|
36
38
|
// Load existing data
|
|
37
39
|
await this.loadAllData();
|
package/dist/logger.js
CHANGED
|
@@ -133,10 +133,14 @@ class Logger {
|
|
|
133
133
|
}
|
|
134
134
|
// Level
|
|
135
135
|
const levelStr = entry.level.toUpperCase().padEnd(5);
|
|
136
|
-
parts.push(this.options.pretty
|
|
136
|
+
parts.push(this.options.pretty
|
|
137
|
+
? `${LOG_COLORS[entry.level]}${levelStr}${COLOR_RESET}`
|
|
138
|
+
: levelStr);
|
|
137
139
|
// Prefix
|
|
138
140
|
if (entry.prefix) {
|
|
139
|
-
parts.push(this.options.pretty
|
|
141
|
+
parts.push(this.options.pretty
|
|
142
|
+
? `\x1b[35m[${entry.prefix}]${COLOR_RESET}`
|
|
143
|
+
: `[${entry.prefix}]`);
|
|
140
144
|
}
|
|
141
145
|
// Message
|
|
142
146
|
parts.push(entry.message);
|
package/dist/pagination.js
CHANGED
|
@@ -113,8 +113,12 @@ function createPaginationMeta(items, limit, offset, total) {
|
|
|
113
113
|
limit,
|
|
114
114
|
offset,
|
|
115
115
|
total,
|
|
116
|
-
nextCursor: hasMore
|
|
117
|
-
|
|
116
|
+
nextCursor: hasMore
|
|
117
|
+
? encodeCursor({ offset: offset + limit, limit })
|
|
118
|
+
: undefined,
|
|
119
|
+
prevCursor: offset > 0
|
|
120
|
+
? encodeCursor({ offset: Math.max(0, offset - limit), limit })
|
|
121
|
+
: undefined,
|
|
118
122
|
};
|
|
119
123
|
}
|
|
120
124
|
/**
|
|
@@ -58,13 +58,13 @@ function migrateTestSuiteToDSL(testSuite, outputPath) {
|
|
|
58
58
|
};
|
|
59
59
|
try {
|
|
60
60
|
// Create isolated runtime for migration
|
|
61
|
-
const
|
|
61
|
+
const _runtime = (0, registry_1.createEvalRuntime)();
|
|
62
62
|
// Use the runtime handle to define specs
|
|
63
|
-
const
|
|
63
|
+
const _boundDefineEval = (nameOrConfig, executor, options) => {
|
|
64
64
|
// The runtime handle manages the active runtime internally
|
|
65
65
|
const { defineEval } = require("../eval");
|
|
66
66
|
return defineEval(nameOrConfig, executor, options);
|
|
67
|
-
}
|
|
67
|
+
};
|
|
68
68
|
// Get test suite data via public methods
|
|
69
69
|
// Note: We need to access the internal data structure for migration
|
|
70
70
|
// This is a limitation of the current TestSuite design
|
|
@@ -88,7 +88,7 @@ function migrateTestSuiteToDSL(testSuite, outputPath) {
|
|
|
88
88
|
* Extract data from TestSuite instance
|
|
89
89
|
* This is a workaround for the private properties
|
|
90
90
|
*/
|
|
91
|
-
function extractTestSuiteData(
|
|
91
|
+
function extractTestSuiteData(_testSuite) {
|
|
92
92
|
// Since TestSuite properties are private, we need to reconstruct from usage
|
|
93
93
|
// This is a limitation that should be addressed in a future version
|
|
94
94
|
// For now, we'll create a basic structure and warn the user
|
|
@@ -124,13 +124,13 @@ function migrateConfigToDSL(configPath, outputPath) {
|
|
|
124
124
|
const configContent = fs.readFileSync(configPath, "utf-8");
|
|
125
125
|
const config = JSON.parse(configContent);
|
|
126
126
|
// Create isolated runtime for migration
|
|
127
|
-
const
|
|
127
|
+
const _runtime = (0, registry_1.createEvalRuntime)();
|
|
128
128
|
// Use the runtime handle to define specs
|
|
129
|
-
const
|
|
129
|
+
const _boundDefineEval = (nameOrConfig, executor, options) => {
|
|
130
130
|
// The runtime handle manages the active runtime internally
|
|
131
131
|
const { defineEval } = require("../eval");
|
|
132
132
|
return defineEval(nameOrConfig, executor, options);
|
|
133
|
-
}
|
|
133
|
+
};
|
|
134
134
|
// Generate basic DSL structure from config
|
|
135
135
|
const dslContent = generateDSLFromConfig(config);
|
|
136
136
|
// Write DSL file
|
|
@@ -333,13 +333,16 @@ function findTestSuiteFiles(projectRoot) {
|
|
|
333
333
|
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
334
334
|
for (const entry of entries) {
|
|
335
335
|
const fullPath = path.join(dir, entry.name);
|
|
336
|
-
if (entry.isDirectory() &&
|
|
336
|
+
if (entry.isDirectory() &&
|
|
337
|
+
!entry.name.startsWith(".") &&
|
|
338
|
+
entry.name !== "node_modules") {
|
|
337
339
|
scanDirectory(fullPath);
|
|
338
340
|
}
|
|
339
341
|
else if (entry.isFile() && /\.(ts|js)$/.test(entry.name)) {
|
|
340
342
|
try {
|
|
341
343
|
const content = fs.readFileSync(fullPath, "utf-8");
|
|
342
|
-
if (content.includes("createTestSuite") ||
|
|
344
|
+
if (content.includes("createTestSuite") ||
|
|
345
|
+
content.includes("TestSuite")) {
|
|
343
346
|
testFiles.push(fullPath);
|
|
344
347
|
}
|
|
345
348
|
}
|
|
@@ -38,7 +38,7 @@ export declare function generateDefineEvalCode(suite: TestSuite, options?: Parti
|
|
|
38
38
|
*/
|
|
39
39
|
export interface TestSuiteConfig {
|
|
40
40
|
/** Test cases to run */
|
|
41
|
-
cases:
|
|
41
|
+
cases: unknown[];
|
|
42
42
|
/** Function that generates output from input */
|
|
43
43
|
executor?: (input: string) => Promise<string>;
|
|
44
44
|
/** Run tests in parallel (default: true) */
|
|
@@ -18,13 +18,13 @@ const registry_1 = require("../registry");
|
|
|
18
18
|
* @returns Array of EvalSpec definitions
|
|
19
19
|
*/
|
|
20
20
|
function adaptTestSuite(suite, options = {}) {
|
|
21
|
-
const { includeProvenance = true, preserveIds = true, generateHelpers = true } = options;
|
|
21
|
+
const { includeProvenance = true, preserveIds = true, generateHelpers = true, } = options;
|
|
22
22
|
// Get test suite data using the new getters
|
|
23
23
|
const tests = suite.getTests();
|
|
24
24
|
const metadata = suite.getMetadata();
|
|
25
25
|
const config = suite.getConfig();
|
|
26
26
|
// Create a temporary runtime for spec generation
|
|
27
|
-
const
|
|
27
|
+
const _runtime = (0, registry_1.createEvalRuntime)();
|
|
28
28
|
const specs = [];
|
|
29
29
|
try {
|
|
30
30
|
// Convert each test case to an EvalSpec
|
|
@@ -182,7 +182,7 @@ function generateDefineEvalCode(suite, options = {}) {
|
|
|
182
182
|
`import { defineEval, createResult } from '@pauly4010/evalai-sdk';`,
|
|
183
183
|
"",
|
|
184
184
|
];
|
|
185
|
-
const specCode = specs.map((spec,
|
|
185
|
+
const specCode = specs.map((spec, _index) => {
|
|
186
186
|
const helperCode = generateHelperFunctions(spec, options);
|
|
187
187
|
return [
|
|
188
188
|
`defineEval("${spec.name}", async (context) => {`,
|
|
@@ -208,7 +208,12 @@ function generateDefineEvalCode(suite, options = {}) {
|
|
|
208
208
|
});
|
|
209
209
|
const helperFunctions = generateHelperFunctionsForSuite(specs, options);
|
|
210
210
|
const evaluationFunction = generateEvaluationFunction();
|
|
211
|
-
return [
|
|
211
|
+
return [
|
|
212
|
+
...imports,
|
|
213
|
+
...helperFunctions,
|
|
214
|
+
...evaluationFunction,
|
|
215
|
+
...specCode,
|
|
216
|
+
].join("\n");
|
|
212
217
|
}
|
|
213
218
|
/**
|
|
214
219
|
* Generate helper functions for a specific spec
|
|
@@ -223,7 +228,7 @@ function generateHelperFunctions(spec, options) {
|
|
|
223
228
|
helpers.push(`function evaluateLegacyAssertion(output: string, expected: string): boolean {`, ` return output === expected;`, `}`);
|
|
224
229
|
}
|
|
225
230
|
// Add helper for test evaluation
|
|
226
|
-
helpers.push(`async function evaluateLegacyTest(input: string, expected?: string): Promise<
|
|
231
|
+
helpers.push(`async function evaluateLegacyTest(input: string, expected?: string): Promise<unknown> {`, ` // This function simulates the legacy test evaluation`, ` const output = await simulateLegacyExecutor(input);`, ` `, ` if (expected !== undefined) {`, ` const passed = evaluateLegacyAssertion(output, expected);`, ` return createResult({`, ` pass: passed,`, ` score: passed ? 100 : 0,`, ` metadata: {`, ` input,`, ` expected,`, ` },`, ` });`, ` }`, ` `, ` return createResult({`, ` pass: output.length > 0,`, ` score: output.length > 0 ? 100 : 0,`, ` metadata: { input },`, ` });`, `}`);
|
|
227
232
|
// Add executor simulation
|
|
228
233
|
helpers.push(`async function simulateLegacyExecutor(input: string): Promise<string> {`, ` // This function simulates the legacy executor`, ` // In a real migration, this would be replaced with the actual executor`, ` return input; // Echo for demonstration`, `}`);
|
|
229
234
|
return helpers.join("\n\n");
|
|
@@ -248,7 +253,7 @@ function generateHelperFunctionsForSuite(specs, options) {
|
|
|
248
253
|
function generateEvaluationFunction() {
|
|
249
254
|
return [
|
|
250
255
|
`// Legacy test evaluation function`,
|
|
251
|
-
`function evaluateLegacyTest(input: string, expected?: string):
|
|
256
|
+
`function evaluateLegacyTest(input: string, expected?: string): unknown {`,
|
|
252
257
|
` // This function evaluates legacy test logic`,
|
|
253
258
|
` // In a real migration, this would contain the actual test logic`,
|
|
254
259
|
` `,
|
package/dist/runtime/eval.d.ts
CHANGED
|
@@ -21,7 +21,7 @@ export declare const evalai: {
|
|
|
21
21
|
* Suite definition for grouping related specifications
|
|
22
22
|
* This will be expanded in Layer 3 for dependency graph support
|
|
23
23
|
*/
|
|
24
|
-
export declare function defineSuite(
|
|
24
|
+
export declare function defineSuite(_name: string, specs: (() => void)[]): void;
|
|
25
25
|
/**
|
|
26
26
|
* Helper function to create specification contexts
|
|
27
27
|
* Useful for testing and manual execution
|
package/dist/runtime/eval.js
CHANGED
|
@@ -43,10 +43,10 @@ exports.evalai = exports.defineEval = void 0;
|
|
|
43
43
|
exports.defineSuite = defineSuite;
|
|
44
44
|
exports.createContext = createContext;
|
|
45
45
|
exports.createResult = createResult;
|
|
46
|
-
const path = __importStar(require("node:path"));
|
|
47
46
|
const crypto = __importStar(require("node:crypto"));
|
|
48
|
-
const
|
|
47
|
+
const path = __importStar(require("node:path"));
|
|
49
48
|
const registry_1 = require("./registry");
|
|
49
|
+
const types_1 = require("./types");
|
|
50
50
|
/**
|
|
51
51
|
* Extract AST position from call stack
|
|
52
52
|
* This provides stable identity that survives renames but changes when logic moves
|
|
@@ -61,7 +61,9 @@ function getCallerPosition() {
|
|
|
61
61
|
// Skip current function and find the actual caller
|
|
62
62
|
for (let i = 3; i < lines.length; i++) {
|
|
63
63
|
const line = lines[i];
|
|
64
|
-
if (!line ||
|
|
64
|
+
if (!line ||
|
|
65
|
+
line.includes("node_modules") ||
|
|
66
|
+
line.includes("internal/modules")) {
|
|
65
67
|
continue;
|
|
66
68
|
}
|
|
67
69
|
// Extract file path, line, and column
|
|
@@ -95,7 +97,12 @@ function generateSpecId(namespace, filePath, name, position) {
|
|
|
95
97
|
const projectRoot = process.cwd();
|
|
96
98
|
const relativePath = path.relative(projectRoot, filePath);
|
|
97
99
|
const canonicalPath = relativePath.split(path.sep).join("/"); // Force POSIX separators
|
|
98
|
-
const components = [
|
|
100
|
+
const components = [
|
|
101
|
+
namespace,
|
|
102
|
+
canonicalPath,
|
|
103
|
+
name,
|
|
104
|
+
`${position.line}:${position.column}`,
|
|
105
|
+
];
|
|
99
106
|
const content = components.join("|");
|
|
100
107
|
return crypto.createHash("sha256").update(content).digest("hex").slice(0, 20);
|
|
101
108
|
}
|
|
@@ -200,7 +207,7 @@ exports.evalai = {
|
|
|
200
207
|
* Suite definition for grouping related specifications
|
|
201
208
|
* This will be expanded in Layer 3 for dependency graph support
|
|
202
209
|
*/
|
|
203
|
-
function defineSuite(
|
|
210
|
+
function defineSuite(_name, specs) {
|
|
204
211
|
// For now, just execute the specs to register them
|
|
205
212
|
// In Layer 3, this will build the dependency graph
|
|
206
213
|
for (const specFn of specs) {
|
|
@@ -123,7 +123,7 @@ async function findSpecFiles(projectRoot) {
|
|
|
123
123
|
const files = await searchFiles(projectRoot, pattern, projectRoot);
|
|
124
124
|
foundFiles.push(...files);
|
|
125
125
|
}
|
|
126
|
-
catch (
|
|
126
|
+
catch (_error) {
|
|
127
127
|
// Ignore errors for non-existent paths
|
|
128
128
|
}
|
|
129
129
|
}
|
|
@@ -136,7 +136,7 @@ async function findSpecFiles(projectRoot) {
|
|
|
136
136
|
specFilesWithDefineEval.push(file);
|
|
137
137
|
}
|
|
138
138
|
}
|
|
139
|
-
catch (
|
|
139
|
+
catch (_error) {
|
|
140
140
|
// Ignore read errors
|
|
141
141
|
}
|
|
142
142
|
}
|
|
@@ -163,7 +163,7 @@ async function searchFiles(dir, pattern, projectRoot) {
|
|
|
163
163
|
}
|
|
164
164
|
}
|
|
165
165
|
}
|
|
166
|
-
catch (
|
|
166
|
+
catch (_error) {
|
|
167
167
|
// Ignore permission errors
|
|
168
168
|
}
|
|
169
169
|
return results;
|
|
@@ -172,9 +172,9 @@ async function searchFiles(dir, pattern, projectRoot) {
|
|
|
172
172
|
* Simple pattern matching (placeholder for proper glob)
|
|
173
173
|
*/
|
|
174
174
|
function matchesPattern(filePath, pattern, projectRoot) {
|
|
175
|
-
const
|
|
176
|
-
const
|
|
177
|
-
const
|
|
175
|
+
const _fileName = path.basename(filePath);
|
|
176
|
+
const _ext = path.extname(filePath);
|
|
177
|
+
const _dir = path.dirname(filePath);
|
|
178
178
|
// Convert glob pattern to regex
|
|
179
179
|
// Handle **/ and * patterns correctly
|
|
180
180
|
let regexPattern = pattern;
|
|
@@ -203,7 +203,7 @@ async function findLegacyConfig(projectRoot) {
|
|
|
203
203
|
await fs.access(fullPath);
|
|
204
204
|
return fullPath;
|
|
205
205
|
}
|
|
206
|
-
catch (
|
|
206
|
+
catch (_error) {
|
|
207
207
|
// File doesn't exist, continue
|
|
208
208
|
}
|
|
209
209
|
}
|
|
@@ -305,12 +305,16 @@ function printExecutionModeInfo(config) {
|
|
|
305
305
|
const validation = validateExecutionMode(config);
|
|
306
306
|
if (validation.warnings.length > 0) {
|
|
307
307
|
console.log(`⚠️ Warnings:`);
|
|
308
|
-
validation.warnings.forEach((warning) =>
|
|
308
|
+
validation.warnings.forEach((warning) => {
|
|
309
|
+
console.log(` ${warning}`);
|
|
310
|
+
});
|
|
309
311
|
console.log(``);
|
|
310
312
|
}
|
|
311
313
|
if (validation.errors.length > 0) {
|
|
312
314
|
console.log(`❌ Errors:`);
|
|
313
|
-
validation.errors.forEach((error) =>
|
|
315
|
+
validation.errors.forEach((error) => {
|
|
316
|
+
console.log(` ${error}`);
|
|
317
|
+
});
|
|
314
318
|
console.log(``);
|
|
315
319
|
}
|
|
316
320
|
const recommended = getRecommendedExecutionMode(config);
|
package/dist/runtime/registry.js
CHANGED
|
@@ -64,26 +64,11 @@ class EvalRuntimeImpl {
|
|
|
64
64
|
* Content-addressable to prevent collisions
|
|
65
65
|
*/
|
|
66
66
|
generateNamespace(projectRoot) {
|
|
67
|
-
return crypto
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
*/
|
|
73
|
-
generateSpecId(identity) {
|
|
74
|
-
// Canonicalize path: relative to project root with POSIX separators
|
|
75
|
-
const projectRoot = process.cwd();
|
|
76
|
-
const relativePath = path.relative(projectRoot, identity.filePath);
|
|
77
|
-
const canonicalPath = relativePath.split(path.sep).join("/"); // Force POSIX separators
|
|
78
|
-
const components = [
|
|
79
|
-
identity.namespace,
|
|
80
|
-
canonicalPath,
|
|
81
|
-
identity.name,
|
|
82
|
-
identity.suitePath || "",
|
|
83
|
-
`${identity.position.line}:${identity.position.column}`,
|
|
84
|
-
];
|
|
85
|
-
const content = components.join("|");
|
|
86
|
-
return crypto.createHash("sha256").update(content).digest("hex").slice(0, 20);
|
|
67
|
+
return crypto
|
|
68
|
+
.createHash("sha256")
|
|
69
|
+
.update(path.resolve(projectRoot))
|
|
70
|
+
.digest("hex")
|
|
71
|
+
.slice(0, 12);
|
|
87
72
|
}
|
|
88
73
|
/**
|
|
89
74
|
* Register a new specification
|
|
@@ -274,7 +259,9 @@ class EvalRuntimeImpl {
|
|
|
274
259
|
}
|
|
275
260
|
let status = "healthy";
|
|
276
261
|
if (issues.length > 0) {
|
|
277
|
-
status = issues.some((issue) => issue.includes("error"))
|
|
262
|
+
status = issues.some((issue) => issue.includes("error"))
|
|
263
|
+
? "error"
|
|
264
|
+
: "warning";
|
|
278
265
|
}
|
|
279
266
|
return {
|
|
280
267
|
status,
|
|
@@ -144,8 +144,6 @@ export interface RunConfig {
|
|
|
144
144
|
* RunReport builder for creating deterministic reports
|
|
145
145
|
*/
|
|
146
146
|
export declare class RunReportBuilder {
|
|
147
|
-
private runId;
|
|
148
|
-
private runtimeInfo;
|
|
149
147
|
private report;
|
|
150
148
|
/**
|
|
151
149
|
* Initialize report with basic metadata
|
|
@@ -54,8 +54,6 @@ class RunReportBuilder {
|
|
|
54
54
|
* Initialize report with basic metadata
|
|
55
55
|
*/
|
|
56
56
|
constructor(runId, runtimeInfo) {
|
|
57
|
-
this.runId = runId;
|
|
58
|
-
this.runtimeInfo = runtimeInfo;
|
|
59
57
|
this.report = {
|
|
60
58
|
schemaVersion: exports.RUN_REPORT_SCHEMA_VERSION,
|
|
61
59
|
results: [],
|
|
@@ -97,11 +95,13 @@ class RunReportBuilder {
|
|
|
97
95
|
message: assertion.message,
|
|
98
96
|
})),
|
|
99
97
|
};
|
|
100
|
-
this.report.results
|
|
98
|
+
this.report.results?.push(runResult);
|
|
101
99
|
// Update summary
|
|
102
100
|
this.updateSummary(result);
|
|
103
101
|
// Add to failures if needed
|
|
104
|
-
if (!result.pass ||
|
|
102
|
+
if (!result.pass ||
|
|
103
|
+
result.classification === "error" ||
|
|
104
|
+
result.classification === "timeout") {
|
|
105
105
|
this.addFailure(testId, testName, filePath, position, result);
|
|
106
106
|
}
|
|
107
107
|
}
|
|
@@ -127,12 +127,14 @@ class RunReportBuilder {
|
|
|
127
127
|
summary.failed++;
|
|
128
128
|
}
|
|
129
129
|
// Calculate rates and averages
|
|
130
|
-
summary.passRate =
|
|
130
|
+
summary.passRate =
|
|
131
|
+
summary.total > 0 ? (summary.passed / summary.total) * 100 : 0;
|
|
131
132
|
// Average score calculation (excluding errors/timeouts)
|
|
132
|
-
const scoredResults = this.report.results
|
|
133
|
+
const scoredResults = this.report.results?.filter((r) => r.score > 0) || [];
|
|
133
134
|
summary.averageScore =
|
|
134
135
|
scoredResults.length > 0
|
|
135
|
-
? scoredResults.reduce((sum, r) => sum + r.score, 0) /
|
|
136
|
+
? scoredResults.reduce((sum, r) => sum + r.score, 0) /
|
|
137
|
+
scoredResults.length
|
|
136
138
|
: 0;
|
|
137
139
|
}
|
|
138
140
|
/**
|
|
@@ -153,7 +155,7 @@ class RunReportBuilder {
|
|
|
153
155
|
message: result.error || "Test failed",
|
|
154
156
|
timestamp: new Date().toISOString(),
|
|
155
157
|
};
|
|
156
|
-
this.report.failures
|
|
158
|
+
this.report.failures?.push(failure);
|
|
157
159
|
}
|
|
158
160
|
/**
|
|
159
161
|
* Set execution configuration
|
|
@@ -175,8 +177,8 @@ class RunReportBuilder {
|
|
|
175
177
|
*/
|
|
176
178
|
build() {
|
|
177
179
|
// Sort results and failures by testId for determinism
|
|
178
|
-
this.report.results
|
|
179
|
-
this.report.failures
|
|
180
|
+
this.report.results?.sort((a, b) => a.testId.localeCompare(b.testId));
|
|
181
|
+
this.report.failures?.sort((a, b) => a.testId.localeCompare(b.testId));
|
|
180
182
|
// Set completion timestamp
|
|
181
183
|
this.report.finishedAt = new Date().toISOString();
|
|
182
184
|
const finalReport = this.report;
|
package/dist/testing.js
CHANGED
|
@@ -59,7 +59,10 @@ class TestSuite {
|
|
|
59
59
|
if (this.config.executor) {
|
|
60
60
|
const timeout = this.config.timeout || 30000;
|
|
61
61
|
const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error(`Test timeout after ${timeout}ms`)), timeout));
|
|
62
|
-
actual = await Promise.race([
|
|
62
|
+
actual = await Promise.race([
|
|
63
|
+
this.config.executor(testCase.input),
|
|
64
|
+
timeoutPromise,
|
|
65
|
+
]);
|
|
63
66
|
}
|
|
64
67
|
else if (testCase.expected) {
|
|
65
68
|
actual = testCase.expected; // Use expected as actual if no executor
|
|
@@ -127,7 +130,9 @@ class TestSuite {
|
|
|
127
130
|
const retriedCases = [];
|
|
128
131
|
const retries = this.config.retries ?? 0;
|
|
129
132
|
if (retries > 0 && results.length > 0) {
|
|
130
|
-
const failingIndices = results
|
|
133
|
+
const failingIndices = results
|
|
134
|
+
.map((r, i) => (r.passed ? -1 : i))
|
|
135
|
+
.filter((i) => i >= 0);
|
|
131
136
|
for (let attempt = 0; attempt < retries && failingIndices.length > 0; attempt++) {
|
|
132
137
|
const toRetry = [...failingIndices];
|
|
133
138
|
failingIndices.length = 0;
|