@evalgate/sdk 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/CHANGELOG.md +638 -0
  2. package/README.md +398 -0
  3. package/dist/assertions.d.ts +189 -0
  4. package/dist/assertions.js +662 -0
  5. package/dist/batch.d.ts +68 -0
  6. package/dist/batch.js +179 -0
  7. package/dist/cache.d.ts +65 -0
  8. package/dist/cache.js +131 -0
  9. package/dist/cli/api.d.ts +108 -0
  10. package/dist/cli/api.js +132 -0
  11. package/dist/cli/baseline.d.ts +10 -0
  12. package/dist/cli/baseline.js +172 -0
  13. package/dist/cli/check.d.ts +73 -0
  14. package/dist/cli/check.js +355 -0
  15. package/dist/cli/ci-context.d.ts +6 -0
  16. package/dist/cli/ci-context.js +112 -0
  17. package/dist/cli/ci.d.ts +45 -0
  18. package/dist/cli/ci.js +192 -0
  19. package/dist/cli/config.d.ts +30 -0
  20. package/dist/cli/config.js +230 -0
  21. package/dist/cli/constants.d.ts +15 -0
  22. package/dist/cli/constants.js +18 -0
  23. package/dist/cli/diff.d.ts +173 -0
  24. package/dist/cli/diff.js +685 -0
  25. package/dist/cli/discover.d.ts +84 -0
  26. package/dist/cli/discover.js +419 -0
  27. package/dist/cli/doctor.d.ts +88 -0
  28. package/dist/cli/doctor.js +675 -0
  29. package/dist/cli/env.d.ts +21 -0
  30. package/dist/cli/env.js +42 -0
  31. package/dist/cli/explain.d.ts +58 -0
  32. package/dist/cli/explain.js +561 -0
  33. package/dist/cli/formatters/github.d.ts +8 -0
  34. package/dist/cli/formatters/github.js +135 -0
  35. package/dist/cli/formatters/human.d.ts +6 -0
  36. package/dist/cli/formatters/human.js +110 -0
  37. package/dist/cli/formatters/json.d.ts +6 -0
  38. package/dist/cli/formatters/json.js +10 -0
  39. package/dist/cli/formatters/pr-comment.d.ts +12 -0
  40. package/dist/cli/formatters/pr-comment.js +103 -0
  41. package/dist/cli/formatters/types.d.ts +103 -0
  42. package/dist/cli/formatters/types.js +8 -0
  43. package/dist/cli/gate.d.ts +21 -0
  44. package/dist/cli/gate.js +179 -0
  45. package/dist/cli/impact-analysis.d.ts +63 -0
  46. package/dist/cli/impact-analysis.js +252 -0
  47. package/dist/cli/index.d.ts +9 -0
  48. package/dist/cli/index.js +332 -0
  49. package/dist/cli/init.d.ts +16 -0
  50. package/dist/cli/init.js +292 -0
  51. package/dist/cli/manifest.d.ts +103 -0
  52. package/dist/cli/manifest.js +282 -0
  53. package/dist/cli/migrate.d.ts +41 -0
  54. package/dist/cli/migrate.js +349 -0
  55. package/dist/cli/policy-packs.d.ts +23 -0
  56. package/dist/cli/policy-packs.js +89 -0
  57. package/dist/cli/print-config.d.ts +29 -0
  58. package/dist/cli/print-config.js +270 -0
  59. package/dist/cli/profiles.d.ts +28 -0
  60. package/dist/cli/profiles.js +30 -0
  61. package/dist/cli/reason-codes.d.ts +17 -0
  62. package/dist/cli/reason-codes.js +19 -0
  63. package/dist/cli/regression-gate.d.ts +15 -0
  64. package/dist/cli/regression-gate.js +341 -0
  65. package/dist/cli/render/snippet.d.ts +5 -0
  66. package/dist/cli/render/snippet.js +15 -0
  67. package/dist/cli/render/sort.d.ts +10 -0
  68. package/dist/cli/render/sort.js +24 -0
  69. package/dist/cli/report/build-check-report.d.ts +19 -0
  70. package/dist/cli/report/build-check-report.js +132 -0
  71. package/dist/cli/run.d.ts +101 -0
  72. package/dist/cli/run.js +395 -0
  73. package/dist/cli/share.d.ts +17 -0
  74. package/dist/cli/share.js +91 -0
  75. package/dist/cli/upgrade.d.ts +15 -0
  76. package/dist/cli/upgrade.js +492 -0
  77. package/dist/cli/workspace.d.ts +31 -0
  78. package/dist/cli/workspace.js +68 -0
  79. package/dist/client.d.ts +368 -0
  80. package/dist/client.js +893 -0
  81. package/dist/client.request.test.d.ts +1 -0
  82. package/dist/client.request.test.js +232 -0
  83. package/dist/context.d.ts +134 -0
  84. package/dist/context.js +215 -0
  85. package/dist/errors.d.ts +82 -0
  86. package/dist/errors.js +298 -0
  87. package/dist/export.d.ts +195 -0
  88. package/dist/export.js +344 -0
  89. package/dist/index.d.ts +44 -0
  90. package/dist/index.js +153 -0
  91. package/dist/integrations/anthropic.d.ts +91 -0
  92. package/dist/integrations/anthropic.js +163 -0
  93. package/dist/integrations/openai-eval.d.ts +57 -0
  94. package/dist/integrations/openai-eval.js +232 -0
  95. package/dist/integrations/openai.d.ts +92 -0
  96. package/dist/integrations/openai.js +160 -0
  97. package/dist/local.d.ts +39 -0
  98. package/dist/local.js +148 -0
  99. package/dist/logger.d.ts +128 -0
  100. package/dist/logger.js +227 -0
  101. package/dist/matchers/index.d.ts +1 -0
  102. package/dist/matchers/index.js +6 -0
  103. package/dist/matchers/to-pass-gate.d.ts +29 -0
  104. package/dist/matchers/to-pass-gate.js +35 -0
  105. package/dist/pagination.d.ts +74 -0
  106. package/dist/pagination.js +139 -0
  107. package/dist/regression.d.ts +100 -0
  108. package/dist/regression.js +44 -0
  109. package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
  110. package/dist/runtime/adapters/config-to-dsl.js +400 -0
  111. package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
  112. package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
  113. package/dist/runtime/context.d.ts +26 -0
  114. package/dist/runtime/context.js +74 -0
  115. package/dist/runtime/eval.d.ts +46 -0
  116. package/dist/runtime/eval.js +244 -0
  117. package/dist/runtime/execution-mode.d.ts +80 -0
  118. package/dist/runtime/execution-mode.js +357 -0
  119. package/dist/runtime/executor.d.ts +16 -0
  120. package/dist/runtime/executor.js +152 -0
  121. package/dist/runtime/registry.d.ts +78 -0
  122. package/dist/runtime/registry.js +403 -0
  123. package/dist/runtime/run-report.d.ts +200 -0
  124. package/dist/runtime/run-report.js +222 -0
  125. package/dist/runtime/types.d.ts +356 -0
  126. package/dist/runtime/types.js +76 -0
  127. package/dist/snapshot.d.ts +176 -0
  128. package/dist/snapshot.js +322 -0
  129. package/dist/streaming.d.ts +173 -0
  130. package/dist/streaming.js +268 -0
  131. package/dist/testing.d.ts +273 -0
  132. package/dist/testing.js +317 -0
  133. package/dist/types.d.ts +754 -0
  134. package/dist/types.js +54 -0
  135. package/dist/utils/input-hash.d.ts +8 -0
  136. package/dist/utils/input-hash.js +41 -0
  137. package/dist/version.d.ts +7 -0
  138. package/dist/version.js +10 -0
  139. package/dist/workflows.d.ts +389 -0
  140. package/dist/workflows.js +671 -0
  141. package/package.json +117 -0
@@ -0,0 +1,276 @@
1
+ "use strict";
2
+ /**
3
+ * COMPAT-202: Legacy TestSuite → defineEval adapter
4
+ *
5
+ * Converts legacy TestSuite instances to defineEval specifications
6
+ * without forcing migration. Enables lossless where possible.
7
+ */
8
+ Object.defineProperty(exports, "__esModule", { value: true });
9
+ exports.adaptTestSuite = adaptTestSuite;
10
+ exports.generateDefineEvalCode = generateDefineEvalCode;
11
+ const eval_1 = require("../eval");
12
+ const registry_1 = require("../registry");
13
+ /**
14
+ * Convert TestSuite to defineEval specifications
15
+ *
16
+ * @param suite - Legacy TestSuite instance
17
+ * @param options - Adapter configuration options
18
+ * @returns Array of EvalSpec definitions
19
+ */
20
+ function adaptTestSuite(suite, options = {}) {
21
+ const { includeProvenance = true, preserveIds = true, generateHelpers = true, } = options;
22
+ // Get test suite data using the new getters
23
+ const tests = suite.getTests();
24
+ const metadata = suite.getMetadata();
25
+ const config = suite.getConfig();
26
+ // Create a temporary runtime for spec generation
27
+ const _runtime = (0, registry_1.createEvalRuntime)();
28
+ const specs = [];
29
+ try {
30
+ // Convert each test case to an EvalSpec
31
+ for (const test of tests) {
32
+ const spec = {
33
+ id: generateSpecId(test, metadata.suiteName || "legacy-suite", preserveIds),
34
+ name: test.id,
35
+ filePath: "legacy://testsuite", // Placeholder for legacy source
36
+ position: { line: 1, column: 1 }, // Placeholder position
37
+ description: `Legacy test: ${test.id}`,
38
+ tags: ["legacy", "migrated"],
39
+ executor: createExecutorFromTestCase(test, config, generateHelpers),
40
+ metadata: {
41
+ ...test.metadata,
42
+ ...(includeProvenance && {
43
+ source: "legacy",
44
+ legacySuiteName: metadata.suiteName,
45
+ legacyTestId: test.id,
46
+ originalInput: test.input,
47
+ originalExpected: test.expected,
48
+ }),
49
+ },
50
+ config: {
51
+ timeout: config.timeout,
52
+ retries: config.retries,
53
+ // Note: budget, model not available in TestSuite
54
+ },
55
+ };
56
+ specs.push(spec);
57
+ }
58
+ }
59
+ finally {
60
+ // Clean up temporary runtime
61
+ (0, registry_1.disposeActiveRuntime)();
62
+ }
63
+ return specs;
64
+ }
65
+ /**
66
+ * Generate specification ID for legacy test
67
+ */
68
+ function generateSpecId(test, suiteName, preserveIds) {
69
+ if (preserveIds && test.id && test.id !== `case-${test.id}`) {
70
+ // Use original ID if available and not auto-generated
71
+ return test.id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 20);
72
+ }
73
+ // Generate deterministic ID from test content
74
+ const content = `${suiteName}|${test.id}|${test.input}|${test.expected || ""}`;
75
+ const hash = Buffer.from(content)
76
+ .toString("base64")
77
+ .replace(/[+/=]/g, "")
78
+ .slice(0, 20)
79
+ .toLowerCase();
80
+ return hash;
81
+ }
82
+ /**
83
+ * Create executor function from test case
84
+ */
85
+ function createExecutorFromTestCase(test, config, generateHelpers) {
86
+ return async (context) => {
87
+ const input = context.input;
88
+ // If there's an executor in the config, use it
89
+ if (config.executor) {
90
+ try {
91
+ const output = await config.executor(input);
92
+ return evaluateTestCase(test, output, generateHelpers);
93
+ }
94
+ catch (error) {
95
+ return (0, eval_1.createResult)({
96
+ pass: false,
97
+ score: 0,
98
+ error: error instanceof Error ? error.message : String(error),
99
+ });
100
+ }
101
+ }
102
+ // If there's an expected value, use it as output
103
+ if (test.expected !== undefined) {
104
+ return evaluateTestCase(test, test.expected, generateHelpers);
105
+ }
106
+ // No executor or expected value - this is an error case
107
+ return (0, eval_1.createResult)({
108
+ pass: false,
109
+ score: 0,
110
+ error: "No executor or expected output available for legacy test",
111
+ });
112
+ };
113
+ }
114
+ /**
115
+ * Evaluate test case against output
116
+ */
117
+ function evaluateTestCase(test, output, generateHelpers) {
118
+ try {
119
+ let passed = true;
120
+ let score = 100;
121
+ const assertions = [];
122
+ // If there are assertions, run them
123
+ if (test.hasAssertions && test.assertionCount > 0) {
124
+ // Note: We can't actually run the assertions since they're functions
125
+ // In a real implementation, we'd need to serialize and execute them
126
+ // For now, we'll do basic validation
127
+ // Basic string comparison if expected is provided
128
+ if (test.expected !== undefined) {
129
+ const exactMatch = output === test.expected;
130
+ passed = exactMatch;
131
+ score = exactMatch ? 100 : 0;
132
+ assertions.push({
133
+ name: "legacy-equals",
134
+ passed: exactMatch,
135
+ expected: test.expected,
136
+ actual: output,
137
+ message: exactMatch
138
+ ? "Output matches expected"
139
+ : `Expected "${test.expected}", got "${output}"`,
140
+ });
141
+ }
142
+ }
143
+ else {
144
+ // No assertions, assume pass if output exists
145
+ passed = output.length > 0;
146
+ score = passed ? 100 : 0;
147
+ }
148
+ return (0, eval_1.createResult)({
149
+ pass: passed,
150
+ score: score,
151
+ assertions: generateHelpers ? assertions : undefined,
152
+ metadata: {
153
+ testCaseId: test.id,
154
+ originalInput: test.input,
155
+ originalExpected: test.expected,
156
+ },
157
+ });
158
+ }
159
+ catch (error) {
160
+ return (0, eval_1.createResult)({
161
+ pass: false,
162
+ score: 0,
163
+ error: error instanceof Error ? error.message : String(error),
164
+ });
165
+ }
166
+ }
167
+ /**
168
+ * Generate defineEval code from TestSuite
169
+ *
170
+ * @param suite - Legacy TestSuite instance
171
+ * @param options - Code generation options
172
+ * @returns Generated TypeScript code
173
+ */
174
+ function generateDefineEvalCode(suite, options = {}) {
175
+ const specs = adaptTestSuite(suite, options);
176
+ const metadata = suite.getMetadata();
177
+ const imports = [
178
+ `// Auto-generated from TestSuite: ${metadata.suiteName || "legacy-suite"}`,
179
+ `// Generated at: ${new Date().toISOString()}`,
180
+ `// This file replaces the legacy TestSuite with defineEval() specifications`,
181
+ "",
182
+ `import { defineEval, createResult } from '@evalgate/sdk';`,
183
+ "",
184
+ ];
185
+ const specCode = specs.map((spec, _index) => {
186
+ const helperCode = generateHelperFunctions(spec, options);
187
+ return [
188
+ `defineEval("${spec.name}", async (context) => {`,
189
+ ` // Legacy test input: ${JSON.stringify(spec.metadata?.originalInput)}`,
190
+ ` const input = context.input;`,
191
+ ` `,
192
+ ` // Legacy test execution`,
193
+ helperCode,
194
+ ` `,
195
+ ` // Legacy evaluation logic`,
196
+ ` const result = await evaluateLegacyTest(input, ${JSON.stringify(spec.metadata?.originalExpected)});`,
197
+ ` `,
198
+ ` return result;`,
199
+ `}, {`,
200
+ ` description: "${spec.description}",`,
201
+ ` tags: ${JSON.stringify(spec.tags)},`,
202
+ ` metadata: ${JSON.stringify(spec.metadata)},`,
203
+ ` timeout: ${spec.config?.timeout || 30000},`,
204
+ ` retries: ${spec.config?.retries || 0},`,
205
+ `});`,
206
+ "",
207
+ ].join("\n");
208
+ });
209
+ const helperFunctions = generateHelperFunctionsForSuite(specs, options);
210
+ const evaluationFunction = generateEvaluationFunction();
211
+ return [
212
+ ...imports,
213
+ ...helperFunctions,
214
+ ...evaluationFunction,
215
+ ...specCode,
216
+ ].join("\n");
217
+ }
218
+ /**
219
+ * Generate helper functions for a specific spec
220
+ */
221
+ function generateHelperFunctions(spec, options) {
222
+ if (!options.generateHelpers)
223
+ return "";
224
+ // Generate helper functions based on test metadata
225
+ const helpers = [];
226
+ // Add helper for assertion evaluation if needed
227
+ if (spec.metadata?.originalExpected) {
228
+ helpers.push(`function evaluateLegacyAssertion(output: string, expected: string): boolean {`, ` return output === expected;`, `}`);
229
+ }
230
+ // Add helper for test evaluation
231
+ helpers.push(`async function evaluateLegacyTest(input: string, expected?: string): Promise<unknown> {`, ` // This function simulates the legacy test evaluation`, ` const output = await simulateLegacyExecutor(input);`, ` `, ` if (expected !== undefined) {`, ` const passed = evaluateLegacyAssertion(output, expected);`, ` return createResult({`, ` pass: passed,`, ` score: passed ? 100 : 0,`, ` metadata: {`, ` input,`, ` expected,`, ` },`, ` });`, ` }`, ` `, ` return createResult({`, ` pass: output.length > 0,`, ` score: output.length > 0 ? 100 : 0,`, ` metadata: { input },`, ` });`, `}`);
232
+ // Add executor simulation
233
+ helpers.push(`async function simulateLegacyExecutor(input: string): Promise<string> {`, ` // This function simulates the legacy executor`, ` // In a real migration, this would be replaced with the actual executor`, ` return input; // Echo for demonstration`, `}`);
234
+ return helpers.join("\n\n");
235
+ }
236
+ /**
237
+ * Generate helper functions for the entire suite
238
+ */
239
+ function generateHelperFunctionsForSuite(specs, options) {
240
+ const helpers = new Set();
241
+ // Collect all unique helper functions needed
242
+ for (const spec of specs) {
243
+ const specHelpers = generateHelperFunctions(spec, options);
244
+ if (specHelpers) {
245
+ helpers.add(specHelpers);
246
+ }
247
+ }
248
+ return Array.from(helpers).join("\n\n");
249
+ }
250
+ /**
251
+ * Generate evaluation function
252
+ */
253
+ function generateEvaluationFunction() {
254
+ return [
255
+ `// Legacy test evaluation function`,
256
+ `function evaluateLegacyTest(input: string, expected?: string): unknown {`,
257
+ ` // This function evaluates legacy test logic`,
258
+ ` // In a real migration, this would contain the actual test logic`,
259
+ ` `,
260
+ ` if (expected !== undefined) {`,
261
+ ` const passed = input === expected;`,
262
+ ` return createResult({`,
263
+ ` pass: passed,`,
264
+ ` score: passed ? 100 : 0,`,
265
+ ` metadata: { input, expected },`,
266
+ ` });`,
267
+ ` }`,
268
+ ` `,
269
+ ` return createResult({`,
270
+ ` pass: input.length > 0,`,
271
+ ` score: input.length > 0 ? 100 : 0,`,
272
+ ` metadata: { input },`,
273
+ ` });`,
274
+ `}`,
275
+ ].join("\n");
276
+ }
@@ -0,0 +1,26 @@
1
+ /**
2
+ * EvalGate Runtime Context - Layer 1 Foundation
3
+ *
4
+ * Execution context management for specifications.
5
+ * Provides clean isolation and proper resource management.
6
+ */
7
+ import type { EvalContext, EvalOptions } from "./types";
8
+ /**
9
+ * Create a new execution context
10
+ */
11
+ export declare function createContext<TInput = string>(input: TInput, metadata?: Record<string, unknown>, options?: EvalOptions): EvalContext & {
12
+ input: TInput;
13
+ };
14
+ /**
15
+ * Merge contexts with proper precedence
16
+ * Later contexts override earlier ones
17
+ */
18
+ export declare function mergeContexts(base: EvalContext, ...overrides: Partial<EvalContext>[]): EvalContext;
19
+ /**
20
+ * Clone a context for safe modification
21
+ */
22
+ export declare function cloneContext(context: EvalContext): EvalContext;
23
+ /**
24
+ * Validate context structure
25
+ */
26
+ export declare function validateContext(context: EvalContext): void;
@@ -0,0 +1,74 @@
1
+ "use strict";
2
+ /**
3
+ * EvalGate Runtime Context - Layer 1 Foundation
4
+ *
5
+ * Execution context management for specifications.
6
+ * Provides clean isolation and proper resource management.
7
+ */
8
+ Object.defineProperty(exports, "__esModule", { value: true });
9
+ exports.createContext = createContext;
10
+ exports.mergeContexts = mergeContexts;
11
+ exports.cloneContext = cloneContext;
12
+ exports.validateContext = validateContext;
13
+ /**
14
+ * Create a new execution context
15
+ */
16
+ function createContext(input, metadata, options) {
17
+ return {
18
+ input: input,
19
+ metadata,
20
+ options,
21
+ };
22
+ }
23
+ /**
24
+ * Merge contexts with proper precedence
25
+ * Later contexts override earlier ones
26
+ */
27
+ function mergeContexts(base, ...overrides) {
28
+ // Ensure base has a valid input
29
+ if (!base.input) {
30
+ throw new Error("Base context must have a valid input");
31
+ }
32
+ const merged = overrides.reduce((merged, override) => ({
33
+ input: override.input ?? merged.input,
34
+ metadata: {
35
+ ...merged.metadata,
36
+ ...override.metadata,
37
+ },
38
+ options: override.options
39
+ ? {
40
+ ...merged.options,
41
+ ...override.options,
42
+ }
43
+ : merged.options,
44
+ }), base);
45
+ // Type assertion since we've ensured input exists
46
+ return merged;
47
+ }
48
+ /**
49
+ * Clone a context for safe modification
50
+ */
51
+ function cloneContext(context) {
52
+ return {
53
+ input: context.input,
54
+ metadata: { ...context.metadata },
55
+ options: context.options ? { ...context.options } : undefined,
56
+ };
57
+ }
58
+ /**
59
+ * Validate context structure
60
+ */
61
+ function validateContext(context) {
62
+ if (!context || typeof context !== "object") {
63
+ throw new Error("Context must be an object");
64
+ }
65
+ if (typeof context.input !== "string") {
66
+ throw new Error("Context input must be a string");
67
+ }
68
+ if (context.metadata && typeof context.metadata !== "object") {
69
+ throw new Error("Context metadata must be an object");
70
+ }
71
+ if (context.options && typeof context.options !== "object") {
72
+ throw new Error("Context options must be an object");
73
+ }
74
+ }
@@ -0,0 +1,46 @@
1
+ /**
2
+ * EvalGate defineEval() DSL - Layer 1 Foundation
3
+ *
4
+ * The core DSL function for defining behavioral specifications.
5
+ * Uses content-addressable identity with AST position for stability.
6
+ */
7
+ import type { DefineEvalFunction, EvalContext, EvalResult } from "./types";
8
+ /**
9
+ * Export the defineEval function with proper typing
10
+ * This is the main DSL entry point
11
+ */
12
+ export declare const defineEval: DefineEvalFunction;
13
+ /**
14
+ * Convenience export for evalai.test() alias (backward compatibility)
15
+ * Provides alternative naming that matches the original roadmap vision
16
+ */
17
+ export declare const evalai: {
18
+ test: DefineEvalFunction;
19
+ };
20
+ /**
21
+ * Suite definition for grouping related specifications
22
+ * This will be expanded in Layer 3 for dependency graph support
23
+ */
24
+ export declare function defineSuite(_name: string, specs: (() => void)[]): void;
25
+ /**
26
+ * Helper function to create specification contexts
27
+ * Useful for testing and manual execution
28
+ */
29
+ export declare function createContext<TInput = string>(input: TInput, metadata?: Record<string, unknown>, options?: EvalContext["options"]): EvalContext & {
30
+ input: TInput;
31
+ };
32
+ /**
33
+ * Helper function to create specification results
34
+ * Provides a convenient builder pattern for common result patterns
35
+ */
36
+ export declare function createResult(config: {
37
+ pass: boolean;
38
+ score: number;
39
+ assertions?: EvalResult["assertions"];
40
+ metadata?: Record<string, unknown>;
41
+ error?: string;
42
+ }): EvalResult;
43
+ /**
44
+ * Default export for convenience
45
+ */
46
+ export default defineEval;
@@ -0,0 +1,244 @@
1
+ "use strict";
2
+ /**
3
+ * EvalGate defineEval() DSL - Layer 1 Foundation
4
+ *
5
+ * The core DSL function for defining behavioral specifications.
6
+ * Uses content-addressable identity with AST position for stability.
7
+ */
8
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
9
+ if (k2 === undefined) k2 = k;
10
+ var desc = Object.getOwnPropertyDescriptor(m, k);
11
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
12
+ desc = { enumerable: true, get: function() { return m[k]; } };
13
+ }
14
+ Object.defineProperty(o, k2, desc);
15
+ }) : (function(o, m, k, k2) {
16
+ if (k2 === undefined) k2 = k;
17
+ o[k2] = m[k];
18
+ }));
19
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
20
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
21
+ }) : function(o, v) {
22
+ o["default"] = v;
23
+ });
24
+ var __importStar = (this && this.__importStar) || (function () {
25
+ var ownKeys = function(o) {
26
+ ownKeys = Object.getOwnPropertyNames || function (o) {
27
+ var ar = [];
28
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
29
+ return ar;
30
+ };
31
+ return ownKeys(o);
32
+ };
33
+ return function (mod) {
34
+ if (mod && mod.__esModule) return mod;
35
+ var result = {};
36
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
37
+ __setModuleDefault(result, mod);
38
+ return result;
39
+ };
40
+ })();
41
+ Object.defineProperty(exports, "__esModule", { value: true });
42
+ exports.evalai = exports.defineEval = void 0;
43
+ exports.defineSuite = defineSuite;
44
+ exports.createContext = createContext;
45
+ exports.createResult = createResult;
46
+ const crypto = __importStar(require("node:crypto"));
47
+ const path = __importStar(require("node:path"));
48
+ const registry_1 = require("./registry");
49
+ const types_1 = require("./types");
50
+ /**
51
+ * Extract AST position from call stack
52
+ * This provides stable identity that survives renames but changes when logic moves
53
+ */
54
+ function getCallerPosition() {
55
+ const stack = new Error().stack;
56
+ if (!stack) {
57
+ throw new types_1.SpecRegistrationError("Unable to determine caller position");
58
+ }
59
+ // Parse stack trace to find the caller
60
+ const lines = stack.split("\n");
61
+ // Skip current function and find the actual caller
62
+ for (let i = 3; i < lines.length; i++) {
63
+ const line = lines[i];
64
+ if (!line ||
65
+ line.includes("node_modules") ||
66
+ line.includes("internal/modules")) {
67
+ continue;
68
+ }
69
+ // Extract file path, line, and column
70
+ const match = line.match(/at\s+.*?\((.*?):(\d+):(\d+)\)/);
71
+ if (match) {
72
+ const [, filePath, lineNum, colNum] = match;
73
+ return {
74
+ filePath: path.resolve(filePath),
75
+ line: parseInt(lineNum, 10),
76
+ column: parseInt(colNum, 10),
77
+ };
78
+ }
79
+ // Alternative format for some environments
80
+ const altMatch = line.match(/at\s+(.*?):(\d+):(\d+)/);
81
+ if (altMatch) {
82
+ const [, filePath, lineNum, colNum] = altMatch;
83
+ return {
84
+ filePath: path.resolve(filePath),
85
+ line: parseInt(lineNum, 10),
86
+ column: parseInt(colNum, 10),
87
+ };
88
+ }
89
+ }
90
+ throw new types_1.SpecRegistrationError("Unable to parse caller position from stack trace");
91
+ }
92
+ /**
93
+ * Generate content-addressable specification ID
94
+ */
95
+ function generateSpecId(namespace, filePath, name, position) {
96
+ // Canonicalize path: relative to project root with POSIX separators
97
+ const projectRoot = process.cwd();
98
+ const relativePath = path.relative(projectRoot, filePath);
99
+ const canonicalPath = relativePath.split(path.sep).join("/"); // Force POSIX separators
100
+ const components = [
101
+ namespace,
102
+ canonicalPath,
103
+ name,
104
+ `${position.line}:${position.column}`,
105
+ ];
106
+ const content = components.join("|");
107
+ return crypto.createHash("sha256").update(content).digest("hex").slice(0, 20);
108
+ }
109
+ /**
110
+ * Validate specification name
111
+ */
112
+ function validateSpecName(name) {
113
+ if (!name || typeof name !== "string") {
114
+ throw new types_1.SpecRegistrationError("Specification name must be a non-empty string");
115
+ }
116
+ if (name.trim() === "") {
117
+ throw new types_1.SpecRegistrationError("Specification name cannot be empty");
118
+ }
119
+ if (name.length > 100) {
120
+ throw new types_1.SpecRegistrationError("Specification name must be 100 characters or less");
121
+ }
122
+ // Check for invalid characters
123
+ if (!/^[a-zA-Z0-9\s\-_]+$/.test(name)) {
124
+ throw new types_1.SpecRegistrationError("Specification name can only contain letters, numbers, spaces, hyphens, and underscores");
125
+ }
126
+ }
127
+ /**
128
+ * Validate executor function
129
+ */
130
+ function validateExecutor(executor) {
131
+ if (typeof executor !== "function") {
132
+ throw new types_1.SpecRegistrationError("Executor must be a function");
133
+ }
134
+ // Check function length (should accept context parameter)
135
+ if (executor.length > 1) {
136
+ throw new types_1.SpecRegistrationError("Executor should accept exactly one parameter (context)");
137
+ }
138
+ }
139
+ /**
140
+ * Create specification configuration from parameters
141
+ */
142
+ function createSpecConfig(nameOrConfig, executor, options) {
143
+ if (typeof nameOrConfig === "string") {
144
+ // defineEval(name, executor, options) form
145
+ if (!executor) {
146
+ throw new types_1.SpecRegistrationError("Executor function is required when using name parameter");
147
+ }
148
+ return {
149
+ name: nameOrConfig,
150
+ executor,
151
+ ...options,
152
+ };
153
+ }
154
+ else {
155
+ // defineEval(config) form
156
+ return nameOrConfig;
157
+ }
158
+ }
159
+ /**
160
+ * Core defineEval function implementation
161
+ */
162
+ function defineEvalImpl(nameOrConfig, executor, options) {
163
+ // Get caller position for identity
164
+ const callerPosition = getCallerPosition();
165
+ // Create specification configuration
166
+ const config = createSpecConfig(nameOrConfig, executor, options);
167
+ // Validate configuration
168
+ validateSpecName(config.name);
169
+ validateExecutor(config.executor);
170
+ // Get active runtime
171
+ const runtime = (0, registry_1.getActiveRuntime)();
172
+ // Generate specification ID
173
+ const specId = generateSpecId(runtime.namespace, callerPosition.filePath, config.name, callerPosition);
174
+ // Create specification
175
+ const spec = {
176
+ id: specId,
177
+ name: config.name,
178
+ filePath: callerPosition.filePath,
179
+ position: callerPosition,
180
+ description: config.description,
181
+ tags: config.tags,
182
+ executor: config.executor,
183
+ metadata: config.metadata,
184
+ config: {
185
+ timeout: config.timeout,
186
+ retries: config.retries,
187
+ budget: config.budget,
188
+ model: config.model,
189
+ },
190
+ };
191
+ // Register specification
192
+ runtime.register(spec);
193
+ }
194
+ /**
195
+ * Export the defineEval function with proper typing
196
+ * This is the main DSL entry point
197
+ */
198
+ exports.defineEval = defineEvalImpl;
199
+ /**
200
+ * Convenience export for evalai.test() alias (backward compatibility)
201
+ * Provides alternative naming that matches the original roadmap vision
202
+ */
203
+ exports.evalai = {
204
+ test: exports.defineEval,
205
+ };
206
+ /**
207
+ * Suite definition for grouping related specifications
208
+ * This will be expanded in Layer 3 for dependency graph support
209
+ */
210
+ function defineSuite(_name, specs) {
211
+ // For now, just execute the specs to register them
212
+ // In Layer 3, this will build the dependency graph
213
+ for (const specFn of specs) {
214
+ specFn();
215
+ }
216
+ }
217
+ /**
218
+ * Helper function to create specification contexts
219
+ * Useful for testing and manual execution
220
+ */
221
+ function createContext(input, metadata, options) {
222
+ return {
223
+ input: input,
224
+ metadata,
225
+ options,
226
+ };
227
+ }
228
+ /**
229
+ * Helper function to create specification results
230
+ * Provides a convenient builder pattern for common result patterns
231
+ */
232
+ function createResult(config) {
233
+ return {
234
+ pass: config.pass,
235
+ score: Math.max(0, Math.min(100, config.score)), // Clamp to 0-100
236
+ assertions: config.assertions,
237
+ metadata: config.metadata,
238
+ error: config.error,
239
+ };
240
+ }
241
+ /**
242
+ * Default export for convenience
243
+ */
244
+ exports.default = exports.defineEval;