@evalgate/sdk 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +638 -0
- package/README.md +398 -0
- package/dist/assertions.d.ts +189 -0
- package/dist/assertions.js +662 -0
- package/dist/batch.d.ts +68 -0
- package/dist/batch.js +179 -0
- package/dist/cache.d.ts +65 -0
- package/dist/cache.js +131 -0
- package/dist/cli/api.d.ts +108 -0
- package/dist/cli/api.js +132 -0
- package/dist/cli/baseline.d.ts +10 -0
- package/dist/cli/baseline.js +172 -0
- package/dist/cli/check.d.ts +73 -0
- package/dist/cli/check.js +355 -0
- package/dist/cli/ci-context.d.ts +6 -0
- package/dist/cli/ci-context.js +112 -0
- package/dist/cli/ci.d.ts +45 -0
- package/dist/cli/ci.js +192 -0
- package/dist/cli/config.d.ts +30 -0
- package/dist/cli/config.js +230 -0
- package/dist/cli/constants.d.ts +15 -0
- package/dist/cli/constants.js +18 -0
- package/dist/cli/diff.d.ts +173 -0
- package/dist/cli/diff.js +685 -0
- package/dist/cli/discover.d.ts +84 -0
- package/dist/cli/discover.js +419 -0
- package/dist/cli/doctor.d.ts +88 -0
- package/dist/cli/doctor.js +675 -0
- package/dist/cli/env.d.ts +21 -0
- package/dist/cli/env.js +42 -0
- package/dist/cli/explain.d.ts +58 -0
- package/dist/cli/explain.js +561 -0
- package/dist/cli/formatters/github.d.ts +8 -0
- package/dist/cli/formatters/github.js +135 -0
- package/dist/cli/formatters/human.d.ts +6 -0
- package/dist/cli/formatters/human.js +110 -0
- package/dist/cli/formatters/json.d.ts +6 -0
- package/dist/cli/formatters/json.js +10 -0
- package/dist/cli/formatters/pr-comment.d.ts +12 -0
- package/dist/cli/formatters/pr-comment.js +103 -0
- package/dist/cli/formatters/types.d.ts +103 -0
- package/dist/cli/formatters/types.js +8 -0
- package/dist/cli/gate.d.ts +21 -0
- package/dist/cli/gate.js +179 -0
- package/dist/cli/impact-analysis.d.ts +63 -0
- package/dist/cli/impact-analysis.js +252 -0
- package/dist/cli/index.d.ts +9 -0
- package/dist/cli/index.js +332 -0
- package/dist/cli/init.d.ts +16 -0
- package/dist/cli/init.js +292 -0
- package/dist/cli/manifest.d.ts +103 -0
- package/dist/cli/manifest.js +282 -0
- package/dist/cli/migrate.d.ts +41 -0
- package/dist/cli/migrate.js +349 -0
- package/dist/cli/policy-packs.d.ts +23 -0
- package/dist/cli/policy-packs.js +89 -0
- package/dist/cli/print-config.d.ts +29 -0
- package/dist/cli/print-config.js +270 -0
- package/dist/cli/profiles.d.ts +28 -0
- package/dist/cli/profiles.js +30 -0
- package/dist/cli/reason-codes.d.ts +17 -0
- package/dist/cli/reason-codes.js +19 -0
- package/dist/cli/regression-gate.d.ts +15 -0
- package/dist/cli/regression-gate.js +341 -0
- package/dist/cli/render/snippet.d.ts +5 -0
- package/dist/cli/render/snippet.js +15 -0
- package/dist/cli/render/sort.d.ts +10 -0
- package/dist/cli/render/sort.js +24 -0
- package/dist/cli/report/build-check-report.d.ts +19 -0
- package/dist/cli/report/build-check-report.js +132 -0
- package/dist/cli/run.d.ts +101 -0
- package/dist/cli/run.js +395 -0
- package/dist/cli/share.d.ts +17 -0
- package/dist/cli/share.js +91 -0
- package/dist/cli/upgrade.d.ts +15 -0
- package/dist/cli/upgrade.js +492 -0
- package/dist/cli/workspace.d.ts +31 -0
- package/dist/cli/workspace.js +68 -0
- package/dist/client.d.ts +368 -0
- package/dist/client.js +893 -0
- package/dist/client.request.test.d.ts +1 -0
- package/dist/client.request.test.js +232 -0
- package/dist/context.d.ts +134 -0
- package/dist/context.js +215 -0
- package/dist/errors.d.ts +82 -0
- package/dist/errors.js +298 -0
- package/dist/export.d.ts +195 -0
- package/dist/export.js +344 -0
- package/dist/index.d.ts +44 -0
- package/dist/index.js +153 -0
- package/dist/integrations/anthropic.d.ts +91 -0
- package/dist/integrations/anthropic.js +163 -0
- package/dist/integrations/openai-eval.d.ts +57 -0
- package/dist/integrations/openai-eval.js +232 -0
- package/dist/integrations/openai.d.ts +92 -0
- package/dist/integrations/openai.js +160 -0
- package/dist/local.d.ts +39 -0
- package/dist/local.js +148 -0
- package/dist/logger.d.ts +128 -0
- package/dist/logger.js +227 -0
- package/dist/matchers/index.d.ts +1 -0
- package/dist/matchers/index.js +6 -0
- package/dist/matchers/to-pass-gate.d.ts +29 -0
- package/dist/matchers/to-pass-gate.js +35 -0
- package/dist/pagination.d.ts +74 -0
- package/dist/pagination.js +139 -0
- package/dist/regression.d.ts +100 -0
- package/dist/regression.js +44 -0
- package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
- package/dist/runtime/adapters/config-to-dsl.js +400 -0
- package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
- package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
- package/dist/runtime/context.d.ts +26 -0
- package/dist/runtime/context.js +74 -0
- package/dist/runtime/eval.d.ts +46 -0
- package/dist/runtime/eval.js +244 -0
- package/dist/runtime/execution-mode.d.ts +80 -0
- package/dist/runtime/execution-mode.js +357 -0
- package/dist/runtime/executor.d.ts +16 -0
- package/dist/runtime/executor.js +152 -0
- package/dist/runtime/registry.d.ts +78 -0
- package/dist/runtime/registry.js +403 -0
- package/dist/runtime/run-report.d.ts +200 -0
- package/dist/runtime/run-report.js +222 -0
- package/dist/runtime/types.d.ts +356 -0
- package/dist/runtime/types.js +76 -0
- package/dist/snapshot.d.ts +176 -0
- package/dist/snapshot.js +322 -0
- package/dist/streaming.d.ts +173 -0
- package/dist/streaming.js +268 -0
- package/dist/testing.d.ts +273 -0
- package/dist/testing.js +317 -0
- package/dist/types.d.ts +754 -0
- package/dist/types.js +54 -0
- package/dist/utils/input-hash.d.ts +8 -0
- package/dist/utils/input-hash.js +41 -0
- package/dist/version.d.ts +7 -0
- package/dist/version.js +10 -0
- package/dist/workflows.d.ts +389 -0
- package/dist/workflows.js +671 -0
- package/package.json +117 -0
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* RUNTIME-104: Deterministic Report Serialization (RunReport v1)
|
|
4
|
+
*
|
|
5
|
+
* Stable report format for downstream processing (explain, diff, history).
|
|
6
|
+
* Mirrors CheckReport conventions for consistency.
|
|
7
|
+
*/
|
|
8
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
9
|
+
if (k2 === undefined) k2 = k;
|
|
10
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
11
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
12
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
13
|
+
}
|
|
14
|
+
Object.defineProperty(o, k2, desc);
|
|
15
|
+
}) : (function(o, m, k, k2) {
|
|
16
|
+
if (k2 === undefined) k2 = k;
|
|
17
|
+
o[k2] = m[k];
|
|
18
|
+
}));
|
|
19
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
20
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
21
|
+
}) : function(o, v) {
|
|
22
|
+
o["default"] = v;
|
|
23
|
+
});
|
|
24
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
25
|
+
var ownKeys = function(o) {
|
|
26
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
27
|
+
var ar = [];
|
|
28
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
29
|
+
return ar;
|
|
30
|
+
};
|
|
31
|
+
return ownKeys(o);
|
|
32
|
+
};
|
|
33
|
+
return function (mod) {
|
|
34
|
+
if (mod && mod.__esModule) return mod;
|
|
35
|
+
var result = {};
|
|
36
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
37
|
+
__setModuleDefault(result, mod);
|
|
38
|
+
return result;
|
|
39
|
+
};
|
|
40
|
+
})();
|
|
41
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
42
|
+
exports.RunReportBuilder = exports.RUN_REPORT_SCHEMA_VERSION = void 0;
|
|
43
|
+
exports.createRunReport = createRunReport;
|
|
44
|
+
exports.parseRunReport = parseRunReport;
|
|
45
|
+
/**
|
|
46
|
+
* RunReport schema version - increment when breaking changes occur
|
|
47
|
+
*/
|
|
48
|
+
exports.RUN_REPORT_SCHEMA_VERSION = "1";
|
|
49
|
+
/**
|
|
50
|
+
* RunReport builder for creating deterministic reports
|
|
51
|
+
*/
|
|
52
|
+
class RunReportBuilder {
|
|
53
|
+
/**
|
|
54
|
+
* Initialize report with basic metadata
|
|
55
|
+
*/
|
|
56
|
+
constructor(runId, runtimeInfo) {
|
|
57
|
+
this.report = {
|
|
58
|
+
schemaVersion: exports.RUN_REPORT_SCHEMA_VERSION,
|
|
59
|
+
results: [],
|
|
60
|
+
failures: [],
|
|
61
|
+
summary: {
|
|
62
|
+
total: 0,
|
|
63
|
+
passed: 0,
|
|
64
|
+
failed: 0,
|
|
65
|
+
errors: 0,
|
|
66
|
+
timeouts: 0,
|
|
67
|
+
passRate: 0,
|
|
68
|
+
averageScore: 0,
|
|
69
|
+
totalDurationMs: 0,
|
|
70
|
+
success: true,
|
|
71
|
+
},
|
|
72
|
+
};
|
|
73
|
+
this.report.startedAt = new Date().toISOString();
|
|
74
|
+
this.report.runId = runId;
|
|
75
|
+
this.report.runtime = runtimeInfo;
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Add a test result to the report
|
|
79
|
+
*/
|
|
80
|
+
addResult(testId, testName, filePath, position, input, result) {
|
|
81
|
+
const runResult = {
|
|
82
|
+
testId,
|
|
83
|
+
testName,
|
|
84
|
+
filePath,
|
|
85
|
+
position,
|
|
86
|
+
input,
|
|
87
|
+
pass: result.pass,
|
|
88
|
+
score: result.score,
|
|
89
|
+
durationMs: result.durationMs || 0,
|
|
90
|
+
metadata: result.metadata,
|
|
91
|
+
tags: [], // TODO: Extract from spec
|
|
92
|
+
assertions: result.assertions?.map((assertion, index) => ({
|
|
93
|
+
name: assertion.name || `assertion-${index}`,
|
|
94
|
+
passed: assertion.passed,
|
|
95
|
+
message: assertion.message,
|
|
96
|
+
})),
|
|
97
|
+
};
|
|
98
|
+
this.report.results?.push(runResult);
|
|
99
|
+
// Update summary
|
|
100
|
+
this.updateSummary(result);
|
|
101
|
+
// Add to failures if needed
|
|
102
|
+
if (!result.pass ||
|
|
103
|
+
result.classification === "error" ||
|
|
104
|
+
result.classification === "timeout") {
|
|
105
|
+
this.addFailure(testId, testName, filePath, position, result);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Update summary statistics
|
|
110
|
+
*/
|
|
111
|
+
updateSummary(result) {
|
|
112
|
+
const summary = this.report.summary;
|
|
113
|
+
summary.total++;
|
|
114
|
+
summary.totalDurationMs += result.durationMs || 0;
|
|
115
|
+
if (result.pass) {
|
|
116
|
+
summary.passed++;
|
|
117
|
+
}
|
|
118
|
+
else if (result.classification === "error") {
|
|
119
|
+
summary.errors++;
|
|
120
|
+
summary.success = false;
|
|
121
|
+
}
|
|
122
|
+
else if (result.classification === "timeout") {
|
|
123
|
+
summary.timeouts++;
|
|
124
|
+
summary.success = false;
|
|
125
|
+
}
|
|
126
|
+
else {
|
|
127
|
+
summary.failed++;
|
|
128
|
+
}
|
|
129
|
+
// Calculate rates and averages
|
|
130
|
+
summary.passRate =
|
|
131
|
+
summary.total > 0 ? (summary.passed / summary.total) * 100 : 0;
|
|
132
|
+
// Average score calculation (excluding errors/timeouts)
|
|
133
|
+
const scoredResults = this.report.results?.filter((r) => r.score > 0) || [];
|
|
134
|
+
summary.averageScore =
|
|
135
|
+
scoredResults.length > 0
|
|
136
|
+
? scoredResults.reduce((sum, r) => sum + r.score, 0) /
|
|
137
|
+
scoredResults.length
|
|
138
|
+
: 0;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Add a failure to the report
|
|
142
|
+
*/
|
|
143
|
+
addFailure(testId, testName, filePath, position, result) {
|
|
144
|
+
const failure = {
|
|
145
|
+
testId,
|
|
146
|
+
testName,
|
|
147
|
+
filePath,
|
|
148
|
+
position,
|
|
149
|
+
classification: result.classification === "error"
|
|
150
|
+
? "error"
|
|
151
|
+
: result.classification === "timeout"
|
|
152
|
+
? "timeout"
|
|
153
|
+
: "failed",
|
|
154
|
+
errorEnvelope: result.errorEnvelope,
|
|
155
|
+
message: result.error || "Test failed",
|
|
156
|
+
timestamp: new Date().toISOString(),
|
|
157
|
+
};
|
|
158
|
+
this.report.failures?.push(failure);
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* Set execution configuration
|
|
162
|
+
*/
|
|
163
|
+
setConfig(config) {
|
|
164
|
+
this.report.config = {
|
|
165
|
+
executorType: "local",
|
|
166
|
+
defaultTimeout: 30000,
|
|
167
|
+
environment: {
|
|
168
|
+
nodeVersion: process.version,
|
|
169
|
+
platform: process.platform,
|
|
170
|
+
arch: process.arch,
|
|
171
|
+
},
|
|
172
|
+
...config,
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
/**
|
|
176
|
+
* Finalize and return the complete report
|
|
177
|
+
*/
|
|
178
|
+
build() {
|
|
179
|
+
// Sort results and failures by testId for determinism
|
|
180
|
+
this.report.results?.sort((a, b) => a.testId.localeCompare(b.testId));
|
|
181
|
+
this.report.failures?.sort((a, b) => a.testId.localeCompare(b.testId));
|
|
182
|
+
// Set completion timestamp
|
|
183
|
+
this.report.finishedAt = new Date().toISOString();
|
|
184
|
+
const finalReport = this.report;
|
|
185
|
+
// Add toJSON method
|
|
186
|
+
finalReport.toJSON = () => JSON.stringify(finalReport, null, 2);
|
|
187
|
+
return finalReport;
|
|
188
|
+
}
|
|
189
|
+
/**
|
|
190
|
+
* Serialize report to JSON string
|
|
191
|
+
* Ensures deterministic output
|
|
192
|
+
*/
|
|
193
|
+
toJSON() {
|
|
194
|
+
const report = this.build();
|
|
195
|
+
return JSON.stringify(report, null, 2);
|
|
196
|
+
}
|
|
197
|
+
/**
|
|
198
|
+
* Write report to file
|
|
199
|
+
*/
|
|
200
|
+
async writeToFile(filePath) {
|
|
201
|
+
const fs = await Promise.resolve().then(() => __importStar(require("node:fs/promises")));
|
|
202
|
+
await fs.writeFile(filePath, this.toJSON(), "utf-8");
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
exports.RunReportBuilder = RunReportBuilder;
|
|
206
|
+
/**
|
|
207
|
+
* Create a new RunReport builder
|
|
208
|
+
*/
|
|
209
|
+
function createRunReport(runId, runtimeInfo) {
|
|
210
|
+
return new RunReportBuilder(runId, runtimeInfo);
|
|
211
|
+
}
|
|
212
|
+
/**
|
|
213
|
+
* Parse a RunReport from JSON string
|
|
214
|
+
*/
|
|
215
|
+
function parseRunReport(json) {
|
|
216
|
+
const report = JSON.parse(json);
|
|
217
|
+
// Validate schema version
|
|
218
|
+
if (report.schemaVersion !== exports.RUN_REPORT_SCHEMA_VERSION) {
|
|
219
|
+
throw new Error(`Unsupported RunReport schema version: ${report.schemaVersion}`);
|
|
220
|
+
}
|
|
221
|
+
return report;
|
|
222
|
+
}
|
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* EvalGate Runtime Types - Layer 1 Foundation
|
|
3
|
+
*
|
|
4
|
+
* Core types for the evaluation specification programming model.
|
|
5
|
+
* Everything revolves around the Evaluation Specification primitive.
|
|
6
|
+
*/
|
|
7
|
+
import type { AssertionResult } from "../assertions";
|
|
8
|
+
/**
|
|
9
|
+
* Core evaluation specification - the single primitive
|
|
10
|
+
* Represents a behavioral specification test
|
|
11
|
+
*/
|
|
12
|
+
export interface EvalSpec {
|
|
13
|
+
/** Content-addressable unique identifier */
|
|
14
|
+
id: string;
|
|
15
|
+
/** Human-readable name */
|
|
16
|
+
name: string;
|
|
17
|
+
/** File path where this spec is defined */
|
|
18
|
+
filePath: string;
|
|
19
|
+
/** AST position for identity stability */
|
|
20
|
+
position: {
|
|
21
|
+
line: number;
|
|
22
|
+
column: number;
|
|
23
|
+
};
|
|
24
|
+
/** Specification description */
|
|
25
|
+
description?: string;
|
|
26
|
+
/** Categorization tags */
|
|
27
|
+
tags?: string[];
|
|
28
|
+
/** Specification execution function */
|
|
29
|
+
executor: EvalExecutor;
|
|
30
|
+
/** Specification metadata */
|
|
31
|
+
metadata?: Record<string, unknown>;
|
|
32
|
+
/** Execution configuration */
|
|
33
|
+
config?: {
|
|
34
|
+
timeout?: number;
|
|
35
|
+
retries?: number;
|
|
36
|
+
budget?: string;
|
|
37
|
+
model?: string | "auto";
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Specification execution context
|
|
42
|
+
*/
|
|
43
|
+
export interface EvalContext {
|
|
44
|
+
/** Test input data */
|
|
45
|
+
input: string;
|
|
46
|
+
/** Context metadata */
|
|
47
|
+
metadata?: Record<string, unknown>;
|
|
48
|
+
/** Execution options */
|
|
49
|
+
options?: EvalOptions;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Execution options
|
|
53
|
+
*/
|
|
54
|
+
export interface EvalOptions {
|
|
55
|
+
/** Model selection */
|
|
56
|
+
model?: string | "auto";
|
|
57
|
+
/** Budget constraint */
|
|
58
|
+
budget?: string;
|
|
59
|
+
/** Timeout override */
|
|
60
|
+
timeout?: number;
|
|
61
|
+
/** Custom execution parameters */
|
|
62
|
+
[key: string]: unknown;
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Specification executor function
|
|
66
|
+
*/
|
|
67
|
+
export type EvalExecutor = (context: EvalContext) => Promise<EvalResult>;
|
|
68
|
+
/**
|
|
69
|
+
* Specification execution result
|
|
70
|
+
*/
|
|
71
|
+
export interface EvalResult {
|
|
72
|
+
/** Pass/fail determination */
|
|
73
|
+
pass: boolean;
|
|
74
|
+
/** Numeric score (0-100) */
|
|
75
|
+
score: number;
|
|
76
|
+
/** Assertion results */
|
|
77
|
+
assertions?: AssertionResult[];
|
|
78
|
+
/** Result metadata */
|
|
79
|
+
metadata?: Record<string, unknown>;
|
|
80
|
+
/** Execution duration */
|
|
81
|
+
durationMs?: number;
|
|
82
|
+
/** Execution error if failed */
|
|
83
|
+
error?: string;
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Scoped runtime context - prevents cross-run contamination
|
|
87
|
+
*/
|
|
88
|
+
export interface EvalRuntime {
|
|
89
|
+
/** Runtime unique identifier */
|
|
90
|
+
id: string;
|
|
91
|
+
/** Project namespace */
|
|
92
|
+
namespace: string;
|
|
93
|
+
/** Runtime creation timestamp */
|
|
94
|
+
createdAt: Date;
|
|
95
|
+
/** Registered specifications */
|
|
96
|
+
specs: Map<string, EvalSpec>;
|
|
97
|
+
/** Runtime statistics */
|
|
98
|
+
stats: RuntimeStats;
|
|
99
|
+
/** Register a new specification */
|
|
100
|
+
register(spec: EvalSpec): void;
|
|
101
|
+
/** Get specification by ID */
|
|
102
|
+
get(id: string): EvalSpec | undefined;
|
|
103
|
+
/** List all specifications */
|
|
104
|
+
list(): EvalSpec[];
|
|
105
|
+
/** Find specifications by criteria */
|
|
106
|
+
find(criteria: SpecSearchCriteria): EvalSpec[];
|
|
107
|
+
/** Clear all specifications (dispose) */
|
|
108
|
+
clear(): void;
|
|
109
|
+
/** Get runtime health metrics */
|
|
110
|
+
getHealth(): RuntimeHealth;
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Specification search criteria
|
|
114
|
+
*/
|
|
115
|
+
export interface SpecSearchCriteria {
|
|
116
|
+
tags?: string[];
|
|
117
|
+
files?: string[];
|
|
118
|
+
names?: string[];
|
|
119
|
+
metadata?: Record<string, unknown>;
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Runtime statistics
|
|
123
|
+
*/
|
|
124
|
+
export interface RuntimeStats {
|
|
125
|
+
/** Total registered specs */
|
|
126
|
+
totalSpecs: number;
|
|
127
|
+
/** Specs by tag */
|
|
128
|
+
specsByTag: Record<string, number>;
|
|
129
|
+
/** Specs by file */
|
|
130
|
+
specsByFile: Record<string, number>;
|
|
131
|
+
/** Memory usage estimate */
|
|
132
|
+
memoryUsage: number;
|
|
133
|
+
/** Last updated timestamp */
|
|
134
|
+
lastUpdated: Date;
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* Runtime health information
|
|
138
|
+
*/
|
|
139
|
+
export interface RuntimeHealth {
|
|
140
|
+
/** Runtime status */
|
|
141
|
+
status: "healthy" | "warning" | "error";
|
|
142
|
+
/** Memory usage in bytes */
|
|
143
|
+
memoryUsage: number;
|
|
144
|
+
/** Number of registered specs */
|
|
145
|
+
specCount: number;
|
|
146
|
+
/** Health issues if any */
|
|
147
|
+
issues: string[];
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Content-addressable identity components
|
|
151
|
+
*/
|
|
152
|
+
export interface SpecIdentity {
|
|
153
|
+
/** Project namespace hash */
|
|
154
|
+
namespace: string;
|
|
155
|
+
/** Relative file path */
|
|
156
|
+
filePath: string;
|
|
157
|
+
/** Specification name */
|
|
158
|
+
name: string;
|
|
159
|
+
/** Suite path if nested */
|
|
160
|
+
suitePath?: string;
|
|
161
|
+
/** AST position */
|
|
162
|
+
position: {
|
|
163
|
+
line: number;
|
|
164
|
+
column: number;
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
/**
|
|
168
|
+
* Specification definition function signature
|
|
169
|
+
* This is the defineEval() DSL function signature
|
|
170
|
+
*/
|
|
171
|
+
export interface DefineEvalFunction {
|
|
172
|
+
/**
|
|
173
|
+
* Define a behavioral specification
|
|
174
|
+
* @param name - Human-readable specification name
|
|
175
|
+
* @param executor - Specification execution function
|
|
176
|
+
* @param options - Optional configuration
|
|
177
|
+
*/
|
|
178
|
+
<TInput = string>(name: string, executor: (context: EvalContext & {
|
|
179
|
+
input: TInput;
|
|
180
|
+
}) => Promise<EvalResult>, options?: SpecOptions): void;
|
|
181
|
+
/**
|
|
182
|
+
* Define a specification with full configuration
|
|
183
|
+
* @param config - Complete specification configuration
|
|
184
|
+
*/
|
|
185
|
+
(config: SpecConfig): void;
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Specification definition options
|
|
189
|
+
*/
|
|
190
|
+
export interface SpecOptions {
|
|
191
|
+
/** Specification description */
|
|
192
|
+
description?: string;
|
|
193
|
+
/** Categorization tags */
|
|
194
|
+
tags?: string[];
|
|
195
|
+
/** Execution timeout */
|
|
196
|
+
timeout?: number;
|
|
197
|
+
/** Retry attempts */
|
|
198
|
+
retries?: number;
|
|
199
|
+
/** Budget constraint */
|
|
200
|
+
budget?: string;
|
|
201
|
+
/** Model selection */
|
|
202
|
+
model?: string | "auto";
|
|
203
|
+
/** Specification metadata */
|
|
204
|
+
metadata?: Record<string, unknown>;
|
|
205
|
+
/** Dependencies for impact analysis */
|
|
206
|
+
dependsOn?: {
|
|
207
|
+
prompts?: string[];
|
|
208
|
+
datasets?: string[];
|
|
209
|
+
tools?: string[];
|
|
210
|
+
code?: string[];
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* Complete specification configuration
|
|
215
|
+
*/
|
|
216
|
+
export interface SpecConfig extends SpecOptions {
|
|
217
|
+
/** Specification name (required) */
|
|
218
|
+
name: string;
|
|
219
|
+
/** Specification executor (required) */
|
|
220
|
+
executor: EvalExecutor;
|
|
221
|
+
}
|
|
222
|
+
/**
|
|
223
|
+
* Execution interface - abstraction layer
|
|
224
|
+
* Prevents worker coupling and enables multiple backends
|
|
225
|
+
*/
|
|
226
|
+
export interface EvalExecutorInterface {
|
|
227
|
+
/** Execute a single specification */
|
|
228
|
+
executeSpec(spec: EvalSpec, input: string): Promise<EvalResult>;
|
|
229
|
+
/** Execute multiple specifications */
|
|
230
|
+
executeSpecs(specs: EvalSpec[], inputs: string[]): Promise<EvalResult[]>;
|
|
231
|
+
/** Get executor capabilities */
|
|
232
|
+
getCapabilities(): ExecutorCapabilities;
|
|
233
|
+
}
|
|
234
|
+
/**
|
|
235
|
+
* Executor capabilities
|
|
236
|
+
*/
|
|
237
|
+
export interface ExecutorCapabilities {
|
|
238
|
+
/** Executor type */
|
|
239
|
+
type: "local" | "cloud" | "worker";
|
|
240
|
+
/** Parallel execution support */
|
|
241
|
+
parallel: boolean;
|
|
242
|
+
/** Maximum parallel workers */
|
|
243
|
+
maxParallel?: number;
|
|
244
|
+
/** Supported models */
|
|
245
|
+
supportedModels: string[];
|
|
246
|
+
/** Cost tracking support */
|
|
247
|
+
costTracking: boolean;
|
|
248
|
+
/** Streaming support */
|
|
249
|
+
streaming: boolean;
|
|
250
|
+
}
|
|
251
|
+
/**
|
|
252
|
+
* Local executor implementation
|
|
253
|
+
*/
|
|
254
|
+
export interface LocalExecutor extends EvalExecutorInterface {
|
|
255
|
+
type: "local";
|
|
256
|
+
}
|
|
257
|
+
/**
|
|
258
|
+
* Cloud executor implementation
|
|
259
|
+
*/
|
|
260
|
+
export interface CloudExecutor extends EvalExecutorInterface {
|
|
261
|
+
type: "cloud";
|
|
262
|
+
/** Cloud provider */
|
|
263
|
+
provider: string;
|
|
264
|
+
/** Region */
|
|
265
|
+
region?: string;
|
|
266
|
+
}
|
|
267
|
+
/**
|
|
268
|
+
* Worker executor implementation
|
|
269
|
+
*/
|
|
270
|
+
export interface WorkerExecutor extends EvalExecutorInterface {
|
|
271
|
+
type: "worker";
|
|
272
|
+
/** Worker pool configuration */
|
|
273
|
+
workerPool: {
|
|
274
|
+
size: number;
|
|
275
|
+
maxMemory: number;
|
|
276
|
+
timeout: number;
|
|
277
|
+
};
|
|
278
|
+
}
|
|
279
|
+
/**
|
|
280
|
+
* Runtime errors
|
|
281
|
+
*/
|
|
282
|
+
export declare class EvalRuntimeError extends Error {
|
|
283
|
+
code: string;
|
|
284
|
+
details?: Record<string, unknown> | undefined;
|
|
285
|
+
constructor(message: string, code: string, details?: Record<string, unknown> | undefined);
|
|
286
|
+
}
|
|
287
|
+
export declare class SpecRegistrationError extends EvalRuntimeError {
|
|
288
|
+
constructor(message: string, details?: Record<string, unknown>);
|
|
289
|
+
}
|
|
290
|
+
export declare class SpecExecutionError extends EvalRuntimeError {
|
|
291
|
+
constructor(message: string, details?: Record<string, unknown>);
|
|
292
|
+
}
|
|
293
|
+
export declare class RuntimeError extends EvalRuntimeError {
|
|
294
|
+
constructor(message: string, details?: Record<string, unknown>);
|
|
295
|
+
}
|
|
296
|
+
/**
|
|
297
|
+
* EvalExecutionError wrapper for safe error boundaries
|
|
298
|
+
*/
|
|
299
|
+
export declare class EvalExecutionError extends Error {
|
|
300
|
+
readonly name = "EvalExecutionError";
|
|
301
|
+
readonly code: string;
|
|
302
|
+
readonly testId: string;
|
|
303
|
+
readonly filePath: string;
|
|
304
|
+
readonly position: {
|
|
305
|
+
line: number;
|
|
306
|
+
column: number;
|
|
307
|
+
};
|
|
308
|
+
readonly originalError?: Error;
|
|
309
|
+
constructor(message: string, context: {
|
|
310
|
+
testId: string;
|
|
311
|
+
filePath: string;
|
|
312
|
+
position: {
|
|
313
|
+
line: number;
|
|
314
|
+
column: number;
|
|
315
|
+
};
|
|
316
|
+
code?: string;
|
|
317
|
+
originalError?: Error;
|
|
318
|
+
});
|
|
319
|
+
/**
|
|
320
|
+
* Convert to normalized error envelope for reporting
|
|
321
|
+
*/
|
|
322
|
+
toEnvelope(): ExecutionErrorEnvelope;
|
|
323
|
+
}
|
|
324
|
+
/**
|
|
325
|
+
* Normalized error envelope for reporting
|
|
326
|
+
*/
|
|
327
|
+
export interface ExecutionErrorEnvelope {
|
|
328
|
+
/** Error classification */
|
|
329
|
+
classification: "execution_error" | "timeout_error" | "assertion_error" | "system_error";
|
|
330
|
+
/** Error code for programmatic handling */
|
|
331
|
+
code: string;
|
|
332
|
+
/** Human-readable error message */
|
|
333
|
+
message: string;
|
|
334
|
+
/** Stack trace if available */
|
|
335
|
+
stack?: string;
|
|
336
|
+
/** Test specification ID */
|
|
337
|
+
testId: string;
|
|
338
|
+
/** File where error occurred */
|
|
339
|
+
filePath: string;
|
|
340
|
+
/** Position in file where error occurred */
|
|
341
|
+
position: {
|
|
342
|
+
line: number;
|
|
343
|
+
column: number;
|
|
344
|
+
};
|
|
345
|
+
/** Error timestamp */
|
|
346
|
+
timestamp: string;
|
|
347
|
+
}
|
|
348
|
+
/**
|
|
349
|
+
* Enhanced execution result with error classification
|
|
350
|
+
*/
|
|
351
|
+
export interface EnhancedEvalResult extends EvalResult {
|
|
352
|
+
/** Error envelope if execution failed */
|
|
353
|
+
errorEnvelope?: ExecutionErrorEnvelope;
|
|
354
|
+
/** Execution classification */
|
|
355
|
+
classification: "passed" | "failed" | "error" | "timeout";
|
|
356
|
+
}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* EvalGate Runtime Types - Layer 1 Foundation
|
|
4
|
+
*
|
|
5
|
+
* Core types for the evaluation specification programming model.
|
|
6
|
+
* Everything revolves around the Evaluation Specification primitive.
|
|
7
|
+
*/
|
|
8
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
+
exports.EvalExecutionError = exports.RuntimeError = exports.SpecExecutionError = exports.SpecRegistrationError = exports.EvalRuntimeError = void 0;
|
|
10
|
+
/**
|
|
11
|
+
* Runtime errors
|
|
12
|
+
*/
|
|
13
|
+
class EvalRuntimeError extends Error {
|
|
14
|
+
constructor(message, code, details) {
|
|
15
|
+
super(message);
|
|
16
|
+
this.code = code;
|
|
17
|
+
this.details = details;
|
|
18
|
+
this.name = "EvalRuntimeError";
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
exports.EvalRuntimeError = EvalRuntimeError;
|
|
22
|
+
class SpecRegistrationError extends EvalRuntimeError {
|
|
23
|
+
constructor(message, details) {
|
|
24
|
+
super(message, "SPEC_REGISTRATION_ERROR", details);
|
|
25
|
+
this.name = "SpecRegistrationError";
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
exports.SpecRegistrationError = SpecRegistrationError;
|
|
29
|
+
class SpecExecutionError extends EvalRuntimeError {
|
|
30
|
+
constructor(message, details) {
|
|
31
|
+
super(message, "SPEC_EXECUTION_ERROR", details);
|
|
32
|
+
this.name = "SpecExecutionError";
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
exports.SpecExecutionError = SpecExecutionError;
|
|
36
|
+
class RuntimeError extends EvalRuntimeError {
|
|
37
|
+
constructor(message, details) {
|
|
38
|
+
super(message, "RUNTIME_ERROR", details);
|
|
39
|
+
this.name = "RuntimeError";
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
exports.RuntimeError = RuntimeError;
|
|
43
|
+
/**
|
|
44
|
+
* EvalExecutionError wrapper for safe error boundaries
|
|
45
|
+
*/
|
|
46
|
+
class EvalExecutionError extends Error {
|
|
47
|
+
constructor(message, context) {
|
|
48
|
+
super(message);
|
|
49
|
+
this.name = "EvalExecutionError";
|
|
50
|
+
this.code = context.code || "EXECUTION_ERROR";
|
|
51
|
+
this.testId = context.testId;
|
|
52
|
+
this.filePath = context.filePath;
|
|
53
|
+
this.position = context.position;
|
|
54
|
+
this.originalError = context.originalError;
|
|
55
|
+
// Maintain proper stack trace
|
|
56
|
+
if (Error.captureStackTrace) {
|
|
57
|
+
Error.captureStackTrace(this, EvalExecutionError);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Convert to normalized error envelope for reporting
|
|
62
|
+
*/
|
|
63
|
+
toEnvelope() {
|
|
64
|
+
return {
|
|
65
|
+
classification: "execution_error",
|
|
66
|
+
code: this.code,
|
|
67
|
+
message: this.message,
|
|
68
|
+
stack: this.stack,
|
|
69
|
+
testId: this.testId,
|
|
70
|
+
filePath: this.filePath,
|
|
71
|
+
position: this.position,
|
|
72
|
+
timestamp: new Date().toISOString(),
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
exports.EvalExecutionError = EvalExecutionError;
|