@evalgate/sdk 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +638 -0
- package/README.md +398 -0
- package/dist/assertions.d.ts +189 -0
- package/dist/assertions.js +662 -0
- package/dist/batch.d.ts +68 -0
- package/dist/batch.js +179 -0
- package/dist/cache.d.ts +65 -0
- package/dist/cache.js +131 -0
- package/dist/cli/api.d.ts +108 -0
- package/dist/cli/api.js +132 -0
- package/dist/cli/baseline.d.ts +10 -0
- package/dist/cli/baseline.js +172 -0
- package/dist/cli/check.d.ts +73 -0
- package/dist/cli/check.js +355 -0
- package/dist/cli/ci-context.d.ts +6 -0
- package/dist/cli/ci-context.js +112 -0
- package/dist/cli/ci.d.ts +45 -0
- package/dist/cli/ci.js +192 -0
- package/dist/cli/config.d.ts +30 -0
- package/dist/cli/config.js +230 -0
- package/dist/cli/constants.d.ts +15 -0
- package/dist/cli/constants.js +18 -0
- package/dist/cli/diff.d.ts +173 -0
- package/dist/cli/diff.js +685 -0
- package/dist/cli/discover.d.ts +84 -0
- package/dist/cli/discover.js +419 -0
- package/dist/cli/doctor.d.ts +88 -0
- package/dist/cli/doctor.js +675 -0
- package/dist/cli/env.d.ts +21 -0
- package/dist/cli/env.js +42 -0
- package/dist/cli/explain.d.ts +58 -0
- package/dist/cli/explain.js +561 -0
- package/dist/cli/formatters/github.d.ts +8 -0
- package/dist/cli/formatters/github.js +135 -0
- package/dist/cli/formatters/human.d.ts +6 -0
- package/dist/cli/formatters/human.js +110 -0
- package/dist/cli/formatters/json.d.ts +6 -0
- package/dist/cli/formatters/json.js +10 -0
- package/dist/cli/formatters/pr-comment.d.ts +12 -0
- package/dist/cli/formatters/pr-comment.js +103 -0
- package/dist/cli/formatters/types.d.ts +103 -0
- package/dist/cli/formatters/types.js +8 -0
- package/dist/cli/gate.d.ts +21 -0
- package/dist/cli/gate.js +179 -0
- package/dist/cli/impact-analysis.d.ts +63 -0
- package/dist/cli/impact-analysis.js +252 -0
- package/dist/cli/index.d.ts +9 -0
- package/dist/cli/index.js +332 -0
- package/dist/cli/init.d.ts +16 -0
- package/dist/cli/init.js +292 -0
- package/dist/cli/manifest.d.ts +103 -0
- package/dist/cli/manifest.js +282 -0
- package/dist/cli/migrate.d.ts +41 -0
- package/dist/cli/migrate.js +349 -0
- package/dist/cli/policy-packs.d.ts +23 -0
- package/dist/cli/policy-packs.js +89 -0
- package/dist/cli/print-config.d.ts +29 -0
- package/dist/cli/print-config.js +270 -0
- package/dist/cli/profiles.d.ts +28 -0
- package/dist/cli/profiles.js +30 -0
- package/dist/cli/reason-codes.d.ts +17 -0
- package/dist/cli/reason-codes.js +19 -0
- package/dist/cli/regression-gate.d.ts +15 -0
- package/dist/cli/regression-gate.js +341 -0
- package/dist/cli/render/snippet.d.ts +5 -0
- package/dist/cli/render/snippet.js +15 -0
- package/dist/cli/render/sort.d.ts +10 -0
- package/dist/cli/render/sort.js +24 -0
- package/dist/cli/report/build-check-report.d.ts +19 -0
- package/dist/cli/report/build-check-report.js +132 -0
- package/dist/cli/run.d.ts +101 -0
- package/dist/cli/run.js +395 -0
- package/dist/cli/share.d.ts +17 -0
- package/dist/cli/share.js +91 -0
- package/dist/cli/upgrade.d.ts +15 -0
- package/dist/cli/upgrade.js +492 -0
- package/dist/cli/workspace.d.ts +31 -0
- package/dist/cli/workspace.js +68 -0
- package/dist/client.d.ts +368 -0
- package/dist/client.js +893 -0
- package/dist/client.request.test.d.ts +1 -0
- package/dist/client.request.test.js +232 -0
- package/dist/context.d.ts +134 -0
- package/dist/context.js +215 -0
- package/dist/errors.d.ts +82 -0
- package/dist/errors.js +298 -0
- package/dist/export.d.ts +195 -0
- package/dist/export.js +344 -0
- package/dist/index.d.ts +44 -0
- package/dist/index.js +153 -0
- package/dist/integrations/anthropic.d.ts +91 -0
- package/dist/integrations/anthropic.js +163 -0
- package/dist/integrations/openai-eval.d.ts +57 -0
- package/dist/integrations/openai-eval.js +232 -0
- package/dist/integrations/openai.d.ts +92 -0
- package/dist/integrations/openai.js +160 -0
- package/dist/local.d.ts +39 -0
- package/dist/local.js +148 -0
- package/dist/logger.d.ts +128 -0
- package/dist/logger.js +227 -0
- package/dist/matchers/index.d.ts +1 -0
- package/dist/matchers/index.js +6 -0
- package/dist/matchers/to-pass-gate.d.ts +29 -0
- package/dist/matchers/to-pass-gate.js +35 -0
- package/dist/pagination.d.ts +74 -0
- package/dist/pagination.js +139 -0
- package/dist/regression.d.ts +100 -0
- package/dist/regression.js +44 -0
- package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
- package/dist/runtime/adapters/config-to-dsl.js +400 -0
- package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
- package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
- package/dist/runtime/context.d.ts +26 -0
- package/dist/runtime/context.js +74 -0
- package/dist/runtime/eval.d.ts +46 -0
- package/dist/runtime/eval.js +244 -0
- package/dist/runtime/execution-mode.d.ts +80 -0
- package/dist/runtime/execution-mode.js +357 -0
- package/dist/runtime/executor.d.ts +16 -0
- package/dist/runtime/executor.js +152 -0
- package/dist/runtime/registry.d.ts +78 -0
- package/dist/runtime/registry.js +403 -0
- package/dist/runtime/run-report.d.ts +200 -0
- package/dist/runtime/run-report.js +222 -0
- package/dist/runtime/types.d.ts +356 -0
- package/dist/runtime/types.js +76 -0
- package/dist/snapshot.d.ts +176 -0
- package/dist/snapshot.js +322 -0
- package/dist/streaming.d.ts +173 -0
- package/dist/streaming.js +268 -0
- package/dist/testing.d.ts +273 -0
- package/dist/testing.js +317 -0
- package/dist/types.d.ts +754 -0
- package/dist/types.js +54 -0
- package/dist/utils/input-hash.d.ts +8 -0
- package/dist/utils/input-hash.js +41 -0
- package/dist/version.d.ts +7 -0
- package/dist/version.js +10 -0
- package/dist/workflows.d.ts +389 -0
- package/dist/workflows.js +671 -0
- package/package.json +117 -0
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* COMPAT-202: Legacy TestSuite → defineEval adapter
|
|
4
|
+
*
|
|
5
|
+
* Converts legacy TestSuite instances to defineEval specifications
|
|
6
|
+
* without forcing migration. Enables lossless where possible.
|
|
7
|
+
*/
|
|
8
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
+
exports.adaptTestSuite = adaptTestSuite;
|
|
10
|
+
exports.generateDefineEvalCode = generateDefineEvalCode;
|
|
11
|
+
const eval_1 = require("../eval");
|
|
12
|
+
const registry_1 = require("../registry");
|
|
13
|
+
/**
|
|
14
|
+
* Convert TestSuite to defineEval specifications
|
|
15
|
+
*
|
|
16
|
+
* @param suite - Legacy TestSuite instance
|
|
17
|
+
* @param options - Adapter configuration options
|
|
18
|
+
* @returns Array of EvalSpec definitions
|
|
19
|
+
*/
|
|
20
|
+
function adaptTestSuite(suite, options = {}) {
|
|
21
|
+
const { includeProvenance = true, preserveIds = true, generateHelpers = true, } = options;
|
|
22
|
+
// Get test suite data using the new getters
|
|
23
|
+
const tests = suite.getTests();
|
|
24
|
+
const metadata = suite.getMetadata();
|
|
25
|
+
const config = suite.getConfig();
|
|
26
|
+
// Create a temporary runtime for spec generation
|
|
27
|
+
const _runtime = (0, registry_1.createEvalRuntime)();
|
|
28
|
+
const specs = [];
|
|
29
|
+
try {
|
|
30
|
+
// Convert each test case to an EvalSpec
|
|
31
|
+
for (const test of tests) {
|
|
32
|
+
const spec = {
|
|
33
|
+
id: generateSpecId(test, metadata.suiteName || "legacy-suite", preserveIds),
|
|
34
|
+
name: test.id,
|
|
35
|
+
filePath: "legacy://testsuite", // Placeholder for legacy source
|
|
36
|
+
position: { line: 1, column: 1 }, // Placeholder position
|
|
37
|
+
description: `Legacy test: ${test.id}`,
|
|
38
|
+
tags: ["legacy", "migrated"],
|
|
39
|
+
executor: createExecutorFromTestCase(test, config, generateHelpers),
|
|
40
|
+
metadata: {
|
|
41
|
+
...test.metadata,
|
|
42
|
+
...(includeProvenance && {
|
|
43
|
+
source: "legacy",
|
|
44
|
+
legacySuiteName: metadata.suiteName,
|
|
45
|
+
legacyTestId: test.id,
|
|
46
|
+
originalInput: test.input,
|
|
47
|
+
originalExpected: test.expected,
|
|
48
|
+
}),
|
|
49
|
+
},
|
|
50
|
+
config: {
|
|
51
|
+
timeout: config.timeout,
|
|
52
|
+
retries: config.retries,
|
|
53
|
+
// Note: budget, model not available in TestSuite
|
|
54
|
+
},
|
|
55
|
+
};
|
|
56
|
+
specs.push(spec);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
finally {
|
|
60
|
+
// Clean up temporary runtime
|
|
61
|
+
(0, registry_1.disposeActiveRuntime)();
|
|
62
|
+
}
|
|
63
|
+
return specs;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Generate specification ID for legacy test
|
|
67
|
+
*/
|
|
68
|
+
function generateSpecId(test, suiteName, preserveIds) {
|
|
69
|
+
if (preserveIds && test.id && test.id !== `case-${test.id}`) {
|
|
70
|
+
// Use original ID if available and not auto-generated
|
|
71
|
+
return test.id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 20);
|
|
72
|
+
}
|
|
73
|
+
// Generate deterministic ID from test content
|
|
74
|
+
const content = `${suiteName}|${test.id}|${test.input}|${test.expected || ""}`;
|
|
75
|
+
const hash = Buffer.from(content)
|
|
76
|
+
.toString("base64")
|
|
77
|
+
.replace(/[+/=]/g, "")
|
|
78
|
+
.slice(0, 20)
|
|
79
|
+
.toLowerCase();
|
|
80
|
+
return hash;
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Create executor function from test case
|
|
84
|
+
*/
|
|
85
|
+
function createExecutorFromTestCase(test, config, generateHelpers) {
|
|
86
|
+
return async (context) => {
|
|
87
|
+
const input = context.input;
|
|
88
|
+
// If there's an executor in the config, use it
|
|
89
|
+
if (config.executor) {
|
|
90
|
+
try {
|
|
91
|
+
const output = await config.executor(input);
|
|
92
|
+
return evaluateTestCase(test, output, generateHelpers);
|
|
93
|
+
}
|
|
94
|
+
catch (error) {
|
|
95
|
+
return (0, eval_1.createResult)({
|
|
96
|
+
pass: false,
|
|
97
|
+
score: 0,
|
|
98
|
+
error: error instanceof Error ? error.message : String(error),
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
// If there's an expected value, use it as output
|
|
103
|
+
if (test.expected !== undefined) {
|
|
104
|
+
return evaluateTestCase(test, test.expected, generateHelpers);
|
|
105
|
+
}
|
|
106
|
+
// No executor or expected value - this is an error case
|
|
107
|
+
return (0, eval_1.createResult)({
|
|
108
|
+
pass: false,
|
|
109
|
+
score: 0,
|
|
110
|
+
error: "No executor or expected output available for legacy test",
|
|
111
|
+
});
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Evaluate test case against output
|
|
116
|
+
*/
|
|
117
|
+
function evaluateTestCase(test, output, generateHelpers) {
|
|
118
|
+
try {
|
|
119
|
+
let passed = true;
|
|
120
|
+
let score = 100;
|
|
121
|
+
const assertions = [];
|
|
122
|
+
// If there are assertions, run them
|
|
123
|
+
if (test.hasAssertions && test.assertionCount > 0) {
|
|
124
|
+
// Note: We can't actually run the assertions since they're functions
|
|
125
|
+
// In a real implementation, we'd need to serialize and execute them
|
|
126
|
+
// For now, we'll do basic validation
|
|
127
|
+
// Basic string comparison if expected is provided
|
|
128
|
+
if (test.expected !== undefined) {
|
|
129
|
+
const exactMatch = output === test.expected;
|
|
130
|
+
passed = exactMatch;
|
|
131
|
+
score = exactMatch ? 100 : 0;
|
|
132
|
+
assertions.push({
|
|
133
|
+
name: "legacy-equals",
|
|
134
|
+
passed: exactMatch,
|
|
135
|
+
expected: test.expected,
|
|
136
|
+
actual: output,
|
|
137
|
+
message: exactMatch
|
|
138
|
+
? "Output matches expected"
|
|
139
|
+
: `Expected "${test.expected}", got "${output}"`,
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
else {
|
|
144
|
+
// No assertions, assume pass if output exists
|
|
145
|
+
passed = output.length > 0;
|
|
146
|
+
score = passed ? 100 : 0;
|
|
147
|
+
}
|
|
148
|
+
return (0, eval_1.createResult)({
|
|
149
|
+
pass: passed,
|
|
150
|
+
score: score,
|
|
151
|
+
assertions: generateHelpers ? assertions : undefined,
|
|
152
|
+
metadata: {
|
|
153
|
+
testCaseId: test.id,
|
|
154
|
+
originalInput: test.input,
|
|
155
|
+
originalExpected: test.expected,
|
|
156
|
+
},
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
catch (error) {
|
|
160
|
+
return (0, eval_1.createResult)({
|
|
161
|
+
pass: false,
|
|
162
|
+
score: 0,
|
|
163
|
+
error: error instanceof Error ? error.message : String(error),
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
/**
|
|
168
|
+
* Generate defineEval code from TestSuite
|
|
169
|
+
*
|
|
170
|
+
* @param suite - Legacy TestSuite instance
|
|
171
|
+
* @param options - Code generation options
|
|
172
|
+
* @returns Generated TypeScript code
|
|
173
|
+
*/
|
|
174
|
+
function generateDefineEvalCode(suite, options = {}) {
|
|
175
|
+
const specs = adaptTestSuite(suite, options);
|
|
176
|
+
const metadata = suite.getMetadata();
|
|
177
|
+
const imports = [
|
|
178
|
+
`// Auto-generated from TestSuite: ${metadata.suiteName || "legacy-suite"}`,
|
|
179
|
+
`// Generated at: ${new Date().toISOString()}`,
|
|
180
|
+
`// This file replaces the legacy TestSuite with defineEval() specifications`,
|
|
181
|
+
"",
|
|
182
|
+
`import { defineEval, createResult } from '@evalgate/sdk';`,
|
|
183
|
+
"",
|
|
184
|
+
];
|
|
185
|
+
const specCode = specs.map((spec, _index) => {
|
|
186
|
+
const helperCode = generateHelperFunctions(spec, options);
|
|
187
|
+
return [
|
|
188
|
+
`defineEval("${spec.name}", async (context) => {`,
|
|
189
|
+
` // Legacy test input: ${JSON.stringify(spec.metadata?.originalInput)}`,
|
|
190
|
+
` const input = context.input;`,
|
|
191
|
+
` `,
|
|
192
|
+
` // Legacy test execution`,
|
|
193
|
+
helperCode,
|
|
194
|
+
` `,
|
|
195
|
+
` // Legacy evaluation logic`,
|
|
196
|
+
` const result = await evaluateLegacyTest(input, ${JSON.stringify(spec.metadata?.originalExpected)});`,
|
|
197
|
+
` `,
|
|
198
|
+
` return result;`,
|
|
199
|
+
`}, {`,
|
|
200
|
+
` description: "${spec.description}",`,
|
|
201
|
+
` tags: ${JSON.stringify(spec.tags)},`,
|
|
202
|
+
` metadata: ${JSON.stringify(spec.metadata)},`,
|
|
203
|
+
` timeout: ${spec.config?.timeout || 30000},`,
|
|
204
|
+
` retries: ${spec.config?.retries || 0},`,
|
|
205
|
+
`});`,
|
|
206
|
+
"",
|
|
207
|
+
].join("\n");
|
|
208
|
+
});
|
|
209
|
+
const helperFunctions = generateHelperFunctionsForSuite(specs, options);
|
|
210
|
+
const evaluationFunction = generateEvaluationFunction();
|
|
211
|
+
return [
|
|
212
|
+
...imports,
|
|
213
|
+
...helperFunctions,
|
|
214
|
+
...evaluationFunction,
|
|
215
|
+
...specCode,
|
|
216
|
+
].join("\n");
|
|
217
|
+
}
|
|
218
|
+
/**
|
|
219
|
+
* Generate helper functions for a specific spec
|
|
220
|
+
*/
|
|
221
|
+
function generateHelperFunctions(spec, options) {
|
|
222
|
+
if (!options.generateHelpers)
|
|
223
|
+
return "";
|
|
224
|
+
// Generate helper functions based on test metadata
|
|
225
|
+
const helpers = [];
|
|
226
|
+
// Add helper for assertion evaluation if needed
|
|
227
|
+
if (spec.metadata?.originalExpected) {
|
|
228
|
+
helpers.push(`function evaluateLegacyAssertion(output: string, expected: string): boolean {`, ` return output === expected;`, `}`);
|
|
229
|
+
}
|
|
230
|
+
// Add helper for test evaluation
|
|
231
|
+
helpers.push(`async function evaluateLegacyTest(input: string, expected?: string): Promise<unknown> {`, ` // This function simulates the legacy test evaluation`, ` const output = await simulateLegacyExecutor(input);`, ` `, ` if (expected !== undefined) {`, ` const passed = evaluateLegacyAssertion(output, expected);`, ` return createResult({`, ` pass: passed,`, ` score: passed ? 100 : 0,`, ` metadata: {`, ` input,`, ` expected,`, ` },`, ` });`, ` }`, ` `, ` return createResult({`, ` pass: output.length > 0,`, ` score: output.length > 0 ? 100 : 0,`, ` metadata: { input },`, ` });`, `}`);
|
|
232
|
+
// Add executor simulation
|
|
233
|
+
helpers.push(`async function simulateLegacyExecutor(input: string): Promise<string> {`, ` // This function simulates the legacy executor`, ` // In a real migration, this would be replaced with the actual executor`, ` return input; // Echo for demonstration`, `}`);
|
|
234
|
+
return helpers.join("\n\n");
|
|
235
|
+
}
|
|
236
|
+
/**
|
|
237
|
+
* Generate helper functions for the entire suite
|
|
238
|
+
*/
|
|
239
|
+
function generateHelperFunctionsForSuite(specs, options) {
|
|
240
|
+
const helpers = new Set();
|
|
241
|
+
// Collect all unique helper functions needed
|
|
242
|
+
for (const spec of specs) {
|
|
243
|
+
const specHelpers = generateHelperFunctions(spec, options);
|
|
244
|
+
if (specHelpers) {
|
|
245
|
+
helpers.add(specHelpers);
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
return Array.from(helpers).join("\n\n");
|
|
249
|
+
}
|
|
250
|
+
/**
|
|
251
|
+
* Generate evaluation function
|
|
252
|
+
*/
|
|
253
|
+
function generateEvaluationFunction() {
|
|
254
|
+
return [
|
|
255
|
+
`// Legacy test evaluation function`,
|
|
256
|
+
`function evaluateLegacyTest(input: string, expected?: string): unknown {`,
|
|
257
|
+
` // This function evaluates legacy test logic`,
|
|
258
|
+
` // In a real migration, this would contain the actual test logic`,
|
|
259
|
+
` `,
|
|
260
|
+
` if (expected !== undefined) {`,
|
|
261
|
+
` const passed = input === expected;`,
|
|
262
|
+
` return createResult({`,
|
|
263
|
+
` pass: passed,`,
|
|
264
|
+
` score: passed ? 100 : 0,`,
|
|
265
|
+
` metadata: { input, expected },`,
|
|
266
|
+
` });`,
|
|
267
|
+
` }`,
|
|
268
|
+
` `,
|
|
269
|
+
` return createResult({`,
|
|
270
|
+
` pass: input.length > 0,`,
|
|
271
|
+
` score: input.length > 0 ? 100 : 0,`,
|
|
272
|
+
` metadata: { input },`,
|
|
273
|
+
` });`,
|
|
274
|
+
`}`,
|
|
275
|
+
].join("\n");
|
|
276
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* EvalGate Runtime Context - Layer 1 Foundation
|
|
3
|
+
*
|
|
4
|
+
* Execution context management for specifications.
|
|
5
|
+
* Provides clean isolation and proper resource management.
|
|
6
|
+
*/
|
|
7
|
+
import type { EvalContext, EvalOptions } from "./types";
|
|
8
|
+
/**
|
|
9
|
+
* Create a new execution context
|
|
10
|
+
*/
|
|
11
|
+
export declare function createContext<TInput = string>(input: TInput, metadata?: Record<string, unknown>, options?: EvalOptions): EvalContext & {
|
|
12
|
+
input: TInput;
|
|
13
|
+
};
|
|
14
|
+
/**
|
|
15
|
+
* Merge contexts with proper precedence
|
|
16
|
+
* Later contexts override earlier ones
|
|
17
|
+
*/
|
|
18
|
+
export declare function mergeContexts(base: EvalContext, ...overrides: Partial<EvalContext>[]): EvalContext;
|
|
19
|
+
/**
|
|
20
|
+
* Clone a context for safe modification
|
|
21
|
+
*/
|
|
22
|
+
export declare function cloneContext(context: EvalContext): EvalContext;
|
|
23
|
+
/**
|
|
24
|
+
* Validate context structure
|
|
25
|
+
*/
|
|
26
|
+
export declare function validateContext(context: EvalContext): void;
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* EvalGate Runtime Context - Layer 1 Foundation
|
|
4
|
+
*
|
|
5
|
+
* Execution context management for specifications.
|
|
6
|
+
* Provides clean isolation and proper resource management.
|
|
7
|
+
*/
|
|
8
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
+
exports.createContext = createContext;
|
|
10
|
+
exports.mergeContexts = mergeContexts;
|
|
11
|
+
exports.cloneContext = cloneContext;
|
|
12
|
+
exports.validateContext = validateContext;
|
|
13
|
+
/**
|
|
14
|
+
* Create a new execution context
|
|
15
|
+
*/
|
|
16
|
+
function createContext(input, metadata, options) {
|
|
17
|
+
return {
|
|
18
|
+
input: input,
|
|
19
|
+
metadata,
|
|
20
|
+
options,
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Merge contexts with proper precedence
|
|
25
|
+
* Later contexts override earlier ones
|
|
26
|
+
*/
|
|
27
|
+
function mergeContexts(base, ...overrides) {
|
|
28
|
+
// Ensure base has a valid input
|
|
29
|
+
if (!base.input) {
|
|
30
|
+
throw new Error("Base context must have a valid input");
|
|
31
|
+
}
|
|
32
|
+
const merged = overrides.reduce((merged, override) => ({
|
|
33
|
+
input: override.input ?? merged.input,
|
|
34
|
+
metadata: {
|
|
35
|
+
...merged.metadata,
|
|
36
|
+
...override.metadata,
|
|
37
|
+
},
|
|
38
|
+
options: override.options
|
|
39
|
+
? {
|
|
40
|
+
...merged.options,
|
|
41
|
+
...override.options,
|
|
42
|
+
}
|
|
43
|
+
: merged.options,
|
|
44
|
+
}), base);
|
|
45
|
+
// Type assertion since we've ensured input exists
|
|
46
|
+
return merged;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Clone a context for safe modification
|
|
50
|
+
*/
|
|
51
|
+
function cloneContext(context) {
|
|
52
|
+
return {
|
|
53
|
+
input: context.input,
|
|
54
|
+
metadata: { ...context.metadata },
|
|
55
|
+
options: context.options ? { ...context.options } : undefined,
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Validate context structure
|
|
60
|
+
*/
|
|
61
|
+
function validateContext(context) {
|
|
62
|
+
if (!context || typeof context !== "object") {
|
|
63
|
+
throw new Error("Context must be an object");
|
|
64
|
+
}
|
|
65
|
+
if (typeof context.input !== "string") {
|
|
66
|
+
throw new Error("Context input must be a string");
|
|
67
|
+
}
|
|
68
|
+
if (context.metadata && typeof context.metadata !== "object") {
|
|
69
|
+
throw new Error("Context metadata must be an object");
|
|
70
|
+
}
|
|
71
|
+
if (context.options && typeof context.options !== "object") {
|
|
72
|
+
throw new Error("Context options must be an object");
|
|
73
|
+
}
|
|
74
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* EvalGate defineEval() DSL - Layer 1 Foundation
|
|
3
|
+
*
|
|
4
|
+
* The core DSL function for defining behavioral specifications.
|
|
5
|
+
* Uses content-addressable identity with AST position for stability.
|
|
6
|
+
*/
|
|
7
|
+
import type { DefineEvalFunction, EvalContext, EvalResult } from "./types";
|
|
8
|
+
/**
|
|
9
|
+
* Export the defineEval function with proper typing
|
|
10
|
+
* This is the main DSL entry point
|
|
11
|
+
*/
|
|
12
|
+
export declare const defineEval: DefineEvalFunction;
|
|
13
|
+
/**
|
|
14
|
+
* Convenience export for evalai.test() alias (backward compatibility)
|
|
15
|
+
* Provides alternative naming that matches the original roadmap vision
|
|
16
|
+
*/
|
|
17
|
+
export declare const evalai: {
|
|
18
|
+
test: DefineEvalFunction;
|
|
19
|
+
};
|
|
20
|
+
/**
|
|
21
|
+
* Suite definition for grouping related specifications
|
|
22
|
+
* This will be expanded in Layer 3 for dependency graph support
|
|
23
|
+
*/
|
|
24
|
+
export declare function defineSuite(_name: string, specs: (() => void)[]): void;
|
|
25
|
+
/**
|
|
26
|
+
* Helper function to create specification contexts
|
|
27
|
+
* Useful for testing and manual execution
|
|
28
|
+
*/
|
|
29
|
+
export declare function createContext<TInput = string>(input: TInput, metadata?: Record<string, unknown>, options?: EvalContext["options"]): EvalContext & {
|
|
30
|
+
input: TInput;
|
|
31
|
+
};
|
|
32
|
+
/**
|
|
33
|
+
* Helper function to create specification results
|
|
34
|
+
* Provides a convenient builder pattern for common result patterns
|
|
35
|
+
*/
|
|
36
|
+
export declare function createResult(config: {
|
|
37
|
+
pass: boolean;
|
|
38
|
+
score: number;
|
|
39
|
+
assertions?: EvalResult["assertions"];
|
|
40
|
+
metadata?: Record<string, unknown>;
|
|
41
|
+
error?: string;
|
|
42
|
+
}): EvalResult;
|
|
43
|
+
/**
|
|
44
|
+
* Default export for convenience
|
|
45
|
+
*/
|
|
46
|
+
export default defineEval;
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* EvalGate defineEval() DSL - Layer 1 Foundation
|
|
4
|
+
*
|
|
5
|
+
* The core DSL function for defining behavioral specifications.
|
|
6
|
+
* Uses content-addressable identity with AST position for stability.
|
|
7
|
+
*/
|
|
8
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
9
|
+
if (k2 === undefined) k2 = k;
|
|
10
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
11
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
12
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
13
|
+
}
|
|
14
|
+
Object.defineProperty(o, k2, desc);
|
|
15
|
+
}) : (function(o, m, k, k2) {
|
|
16
|
+
if (k2 === undefined) k2 = k;
|
|
17
|
+
o[k2] = m[k];
|
|
18
|
+
}));
|
|
19
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
20
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
21
|
+
}) : function(o, v) {
|
|
22
|
+
o["default"] = v;
|
|
23
|
+
});
|
|
24
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
25
|
+
var ownKeys = function(o) {
|
|
26
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
27
|
+
var ar = [];
|
|
28
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
29
|
+
return ar;
|
|
30
|
+
};
|
|
31
|
+
return ownKeys(o);
|
|
32
|
+
};
|
|
33
|
+
return function (mod) {
|
|
34
|
+
if (mod && mod.__esModule) return mod;
|
|
35
|
+
var result = {};
|
|
36
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
37
|
+
__setModuleDefault(result, mod);
|
|
38
|
+
return result;
|
|
39
|
+
};
|
|
40
|
+
})();
|
|
41
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
42
|
+
exports.evalai = exports.defineEval = void 0;
|
|
43
|
+
exports.defineSuite = defineSuite;
|
|
44
|
+
exports.createContext = createContext;
|
|
45
|
+
exports.createResult = createResult;
|
|
46
|
+
const crypto = __importStar(require("node:crypto"));
|
|
47
|
+
const path = __importStar(require("node:path"));
|
|
48
|
+
const registry_1 = require("./registry");
|
|
49
|
+
const types_1 = require("./types");
|
|
50
|
+
/**
|
|
51
|
+
* Extract AST position from call stack
|
|
52
|
+
* This provides stable identity that survives renames but changes when logic moves
|
|
53
|
+
*/
|
|
54
|
+
function getCallerPosition() {
|
|
55
|
+
const stack = new Error().stack;
|
|
56
|
+
if (!stack) {
|
|
57
|
+
throw new types_1.SpecRegistrationError("Unable to determine caller position");
|
|
58
|
+
}
|
|
59
|
+
// Parse stack trace to find the caller
|
|
60
|
+
const lines = stack.split("\n");
|
|
61
|
+
// Skip current function and find the actual caller
|
|
62
|
+
for (let i = 3; i < lines.length; i++) {
|
|
63
|
+
const line = lines[i];
|
|
64
|
+
if (!line ||
|
|
65
|
+
line.includes("node_modules") ||
|
|
66
|
+
line.includes("internal/modules")) {
|
|
67
|
+
continue;
|
|
68
|
+
}
|
|
69
|
+
// Extract file path, line, and column
|
|
70
|
+
const match = line.match(/at\s+.*?\((.*?):(\d+):(\d+)\)/);
|
|
71
|
+
if (match) {
|
|
72
|
+
const [, filePath, lineNum, colNum] = match;
|
|
73
|
+
return {
|
|
74
|
+
filePath: path.resolve(filePath),
|
|
75
|
+
line: parseInt(lineNum, 10),
|
|
76
|
+
column: parseInt(colNum, 10),
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
// Alternative format for some environments
|
|
80
|
+
const altMatch = line.match(/at\s+(.*?):(\d+):(\d+)/);
|
|
81
|
+
if (altMatch) {
|
|
82
|
+
const [, filePath, lineNum, colNum] = altMatch;
|
|
83
|
+
return {
|
|
84
|
+
filePath: path.resolve(filePath),
|
|
85
|
+
line: parseInt(lineNum, 10),
|
|
86
|
+
column: parseInt(colNum, 10),
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
throw new types_1.SpecRegistrationError("Unable to parse caller position from stack trace");
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Generate content-addressable specification ID
|
|
94
|
+
*/
|
|
95
|
+
function generateSpecId(namespace, filePath, name, position) {
|
|
96
|
+
// Canonicalize path: relative to project root with POSIX separators
|
|
97
|
+
const projectRoot = process.cwd();
|
|
98
|
+
const relativePath = path.relative(projectRoot, filePath);
|
|
99
|
+
const canonicalPath = relativePath.split(path.sep).join("/"); // Force POSIX separators
|
|
100
|
+
const components = [
|
|
101
|
+
namespace,
|
|
102
|
+
canonicalPath,
|
|
103
|
+
name,
|
|
104
|
+
`${position.line}:${position.column}`,
|
|
105
|
+
];
|
|
106
|
+
const content = components.join("|");
|
|
107
|
+
return crypto.createHash("sha256").update(content).digest("hex").slice(0, 20);
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Validate specification name
|
|
111
|
+
*/
|
|
112
|
+
function validateSpecName(name) {
|
|
113
|
+
if (!name || typeof name !== "string") {
|
|
114
|
+
throw new types_1.SpecRegistrationError("Specification name must be a non-empty string");
|
|
115
|
+
}
|
|
116
|
+
if (name.trim() === "") {
|
|
117
|
+
throw new types_1.SpecRegistrationError("Specification name cannot be empty");
|
|
118
|
+
}
|
|
119
|
+
if (name.length > 100) {
|
|
120
|
+
throw new types_1.SpecRegistrationError("Specification name must be 100 characters or less");
|
|
121
|
+
}
|
|
122
|
+
// Check for invalid characters
|
|
123
|
+
if (!/^[a-zA-Z0-9\s\-_]+$/.test(name)) {
|
|
124
|
+
throw new types_1.SpecRegistrationError("Specification name can only contain letters, numbers, spaces, hyphens, and underscores");
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Validate executor function
|
|
129
|
+
*/
|
|
130
|
+
function validateExecutor(executor) {
|
|
131
|
+
if (typeof executor !== "function") {
|
|
132
|
+
throw new types_1.SpecRegistrationError("Executor must be a function");
|
|
133
|
+
}
|
|
134
|
+
// Check function length (should accept context parameter)
|
|
135
|
+
if (executor.length > 1) {
|
|
136
|
+
throw new types_1.SpecRegistrationError("Executor should accept exactly one parameter (context)");
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
/**
|
|
140
|
+
* Create specification configuration from parameters
|
|
141
|
+
*/
|
|
142
|
+
function createSpecConfig(nameOrConfig, executor, options) {
|
|
143
|
+
if (typeof nameOrConfig === "string") {
|
|
144
|
+
// defineEval(name, executor, options) form
|
|
145
|
+
if (!executor) {
|
|
146
|
+
throw new types_1.SpecRegistrationError("Executor function is required when using name parameter");
|
|
147
|
+
}
|
|
148
|
+
return {
|
|
149
|
+
name: nameOrConfig,
|
|
150
|
+
executor,
|
|
151
|
+
...options,
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
else {
|
|
155
|
+
// defineEval(config) form
|
|
156
|
+
return nameOrConfig;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Core defineEval function implementation
|
|
161
|
+
*/
|
|
162
|
+
function defineEvalImpl(nameOrConfig, executor, options) {
|
|
163
|
+
// Get caller position for identity
|
|
164
|
+
const callerPosition = getCallerPosition();
|
|
165
|
+
// Create specification configuration
|
|
166
|
+
const config = createSpecConfig(nameOrConfig, executor, options);
|
|
167
|
+
// Validate configuration
|
|
168
|
+
validateSpecName(config.name);
|
|
169
|
+
validateExecutor(config.executor);
|
|
170
|
+
// Get active runtime
|
|
171
|
+
const runtime = (0, registry_1.getActiveRuntime)();
|
|
172
|
+
// Generate specification ID
|
|
173
|
+
const specId = generateSpecId(runtime.namespace, callerPosition.filePath, config.name, callerPosition);
|
|
174
|
+
// Create specification
|
|
175
|
+
const spec = {
|
|
176
|
+
id: specId,
|
|
177
|
+
name: config.name,
|
|
178
|
+
filePath: callerPosition.filePath,
|
|
179
|
+
position: callerPosition,
|
|
180
|
+
description: config.description,
|
|
181
|
+
tags: config.tags,
|
|
182
|
+
executor: config.executor,
|
|
183
|
+
metadata: config.metadata,
|
|
184
|
+
config: {
|
|
185
|
+
timeout: config.timeout,
|
|
186
|
+
retries: config.retries,
|
|
187
|
+
budget: config.budget,
|
|
188
|
+
model: config.model,
|
|
189
|
+
},
|
|
190
|
+
};
|
|
191
|
+
// Register specification
|
|
192
|
+
runtime.register(spec);
|
|
193
|
+
}
|
|
194
|
+
/**
|
|
195
|
+
* Export the defineEval function with proper typing
|
|
196
|
+
* This is the main DSL entry point
|
|
197
|
+
*/
|
|
198
|
+
exports.defineEval = defineEvalImpl;
|
|
199
|
+
/**
|
|
200
|
+
* Convenience export for evalai.test() alias (backward compatibility)
|
|
201
|
+
* Provides alternative naming that matches the original roadmap vision
|
|
202
|
+
*/
|
|
203
|
+
exports.evalai = {
|
|
204
|
+
test: exports.defineEval,
|
|
205
|
+
};
|
|
206
|
+
/**
|
|
207
|
+
* Suite definition for grouping related specifications
|
|
208
|
+
* This will be expanded in Layer 3 for dependency graph support
|
|
209
|
+
*/
|
|
210
|
+
function defineSuite(_name, specs) {
|
|
211
|
+
// For now, just execute the specs to register them
|
|
212
|
+
// In Layer 3, this will build the dependency graph
|
|
213
|
+
for (const specFn of specs) {
|
|
214
|
+
specFn();
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
/**
|
|
218
|
+
* Helper function to create specification contexts
|
|
219
|
+
* Useful for testing and manual execution
|
|
220
|
+
*/
|
|
221
|
+
function createContext(input, metadata, options) {
|
|
222
|
+
return {
|
|
223
|
+
input: input,
|
|
224
|
+
metadata,
|
|
225
|
+
options,
|
|
226
|
+
};
|
|
227
|
+
}
|
|
228
|
+
/**
|
|
229
|
+
* Helper function to create specification results
|
|
230
|
+
* Provides a convenient builder pattern for common result patterns
|
|
231
|
+
*/
|
|
232
|
+
function createResult(config) {
|
|
233
|
+
return {
|
|
234
|
+
pass: config.pass,
|
|
235
|
+
score: Math.max(0, Math.min(100, config.score)), // Clamp to 0-100
|
|
236
|
+
assertions: config.assertions,
|
|
237
|
+
metadata: config.metadata,
|
|
238
|
+
error: config.error,
|
|
239
|
+
};
|
|
240
|
+
}
|
|
241
|
+
/**
|
|
242
|
+
* Default export for convenience
|
|
243
|
+
*/
|
|
244
|
+
exports.default = exports.defineEval;
|