@evalgate/sdk 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +638 -0
- package/README.md +398 -0
- package/dist/assertions.d.ts +189 -0
- package/dist/assertions.js +662 -0
- package/dist/batch.d.ts +68 -0
- package/dist/batch.js +179 -0
- package/dist/cache.d.ts +65 -0
- package/dist/cache.js +131 -0
- package/dist/cli/api.d.ts +108 -0
- package/dist/cli/api.js +132 -0
- package/dist/cli/baseline.d.ts +10 -0
- package/dist/cli/baseline.js +172 -0
- package/dist/cli/check.d.ts +73 -0
- package/dist/cli/check.js +355 -0
- package/dist/cli/ci-context.d.ts +6 -0
- package/dist/cli/ci-context.js +112 -0
- package/dist/cli/ci.d.ts +45 -0
- package/dist/cli/ci.js +192 -0
- package/dist/cli/config.d.ts +30 -0
- package/dist/cli/config.js +230 -0
- package/dist/cli/constants.d.ts +15 -0
- package/dist/cli/constants.js +18 -0
- package/dist/cli/diff.d.ts +173 -0
- package/dist/cli/diff.js +685 -0
- package/dist/cli/discover.d.ts +84 -0
- package/dist/cli/discover.js +419 -0
- package/dist/cli/doctor.d.ts +88 -0
- package/dist/cli/doctor.js +675 -0
- package/dist/cli/env.d.ts +21 -0
- package/dist/cli/env.js +42 -0
- package/dist/cli/explain.d.ts +58 -0
- package/dist/cli/explain.js +561 -0
- package/dist/cli/formatters/github.d.ts +8 -0
- package/dist/cli/formatters/github.js +135 -0
- package/dist/cli/formatters/human.d.ts +6 -0
- package/dist/cli/formatters/human.js +110 -0
- package/dist/cli/formatters/json.d.ts +6 -0
- package/dist/cli/formatters/json.js +10 -0
- package/dist/cli/formatters/pr-comment.d.ts +12 -0
- package/dist/cli/formatters/pr-comment.js +103 -0
- package/dist/cli/formatters/types.d.ts +103 -0
- package/dist/cli/formatters/types.js +8 -0
- package/dist/cli/gate.d.ts +21 -0
- package/dist/cli/gate.js +179 -0
- package/dist/cli/impact-analysis.d.ts +63 -0
- package/dist/cli/impact-analysis.js +252 -0
- package/dist/cli/index.d.ts +9 -0
- package/dist/cli/index.js +332 -0
- package/dist/cli/init.d.ts +16 -0
- package/dist/cli/init.js +292 -0
- package/dist/cli/manifest.d.ts +103 -0
- package/dist/cli/manifest.js +282 -0
- package/dist/cli/migrate.d.ts +41 -0
- package/dist/cli/migrate.js +349 -0
- package/dist/cli/policy-packs.d.ts +23 -0
- package/dist/cli/policy-packs.js +89 -0
- package/dist/cli/print-config.d.ts +29 -0
- package/dist/cli/print-config.js +270 -0
- package/dist/cli/profiles.d.ts +28 -0
- package/dist/cli/profiles.js +30 -0
- package/dist/cli/reason-codes.d.ts +17 -0
- package/dist/cli/reason-codes.js +19 -0
- package/dist/cli/regression-gate.d.ts +15 -0
- package/dist/cli/regression-gate.js +341 -0
- package/dist/cli/render/snippet.d.ts +5 -0
- package/dist/cli/render/snippet.js +15 -0
- package/dist/cli/render/sort.d.ts +10 -0
- package/dist/cli/render/sort.js +24 -0
- package/dist/cli/report/build-check-report.d.ts +19 -0
- package/dist/cli/report/build-check-report.js +132 -0
- package/dist/cli/run.d.ts +101 -0
- package/dist/cli/run.js +395 -0
- package/dist/cli/share.d.ts +17 -0
- package/dist/cli/share.js +91 -0
- package/dist/cli/upgrade.d.ts +15 -0
- package/dist/cli/upgrade.js +492 -0
- package/dist/cli/workspace.d.ts +31 -0
- package/dist/cli/workspace.js +68 -0
- package/dist/client.d.ts +368 -0
- package/dist/client.js +893 -0
- package/dist/client.request.test.d.ts +1 -0
- package/dist/client.request.test.js +232 -0
- package/dist/context.d.ts +134 -0
- package/dist/context.js +215 -0
- package/dist/errors.d.ts +82 -0
- package/dist/errors.js +298 -0
- package/dist/export.d.ts +195 -0
- package/dist/export.js +344 -0
- package/dist/index.d.ts +44 -0
- package/dist/index.js +153 -0
- package/dist/integrations/anthropic.d.ts +91 -0
- package/dist/integrations/anthropic.js +163 -0
- package/dist/integrations/openai-eval.d.ts +57 -0
- package/dist/integrations/openai-eval.js +232 -0
- package/dist/integrations/openai.d.ts +92 -0
- package/dist/integrations/openai.js +160 -0
- package/dist/local.d.ts +39 -0
- package/dist/local.js +148 -0
- package/dist/logger.d.ts +128 -0
- package/dist/logger.js +227 -0
- package/dist/matchers/index.d.ts +1 -0
- package/dist/matchers/index.js +6 -0
- package/dist/matchers/to-pass-gate.d.ts +29 -0
- package/dist/matchers/to-pass-gate.js +35 -0
- package/dist/pagination.d.ts +74 -0
- package/dist/pagination.js +139 -0
- package/dist/regression.d.ts +100 -0
- package/dist/regression.js +44 -0
- package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
- package/dist/runtime/adapters/config-to-dsl.js +400 -0
- package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
- package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
- package/dist/runtime/context.d.ts +26 -0
- package/dist/runtime/context.js +74 -0
- package/dist/runtime/eval.d.ts +46 -0
- package/dist/runtime/eval.js +244 -0
- package/dist/runtime/execution-mode.d.ts +80 -0
- package/dist/runtime/execution-mode.js +357 -0
- package/dist/runtime/executor.d.ts +16 -0
- package/dist/runtime/executor.js +152 -0
- package/dist/runtime/registry.d.ts +78 -0
- package/dist/runtime/registry.js +403 -0
- package/dist/runtime/run-report.d.ts +200 -0
- package/dist/runtime/run-report.js +222 -0
- package/dist/runtime/types.d.ts +356 -0
- package/dist/runtime/types.js +76 -0
- package/dist/snapshot.d.ts +176 -0
- package/dist/snapshot.js +322 -0
- package/dist/streaming.d.ts +173 -0
- package/dist/streaming.js +268 -0
- package/dist/testing.d.ts +273 -0
- package/dist/testing.js +317 -0
- package/dist/types.d.ts +754 -0
- package/dist/types.js +54 -0
- package/dist/utils/input-hash.d.ts +8 -0
- package/dist/utils/input-hash.js +41 -0
- package/dist/version.d.ts +7 -0
- package/dist/version.js +10 -0
- package/dist/workflows.d.ts +389 -0
- package/dist/workflows.js +671 -0
- package/package.json +117 -0
package/dist/testing.js
ADDED
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Test Suite Builder
|
|
4
|
+
* Tier 2.7: Declarative test definitions
|
|
5
|
+
*
|
|
6
|
+
* @example
|
|
7
|
+
* ```typescript
|
|
8
|
+
* import { createTestSuite, expect } from '@ai-eval-platform/sdk';
|
|
9
|
+
*
|
|
10
|
+
* const suite = createTestSuite('chatbot-responses', {
|
|
11
|
+
* cases: [
|
|
12
|
+
* {
|
|
13
|
+
* input: 'Hello',
|
|
14
|
+
* assertions: [
|
|
15
|
+
* (output) => expect(output).toContain('greeting'),
|
|
16
|
+
* (output) => expect(output).toHaveSentiment('positive')
|
|
17
|
+
* ]
|
|
18
|
+
* }
|
|
19
|
+
* ]
|
|
20
|
+
* });
|
|
21
|
+
*
|
|
22
|
+
* const results = await suite.run();
|
|
23
|
+
* ```
|
|
24
|
+
*/
|
|
25
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
+
exports.TestSuite = void 0;
|
|
27
|
+
exports.createTestSuite = createTestSuite;
|
|
28
|
+
exports.containsKeywords = containsKeywords;
|
|
29
|
+
exports.matchesPattern = matchesPattern;
|
|
30
|
+
exports.hasSentiment = hasSentiment;
|
|
31
|
+
exports.hasLength = hasLength;
|
|
32
|
+
const assertions_1 = require("./assertions");
|
|
33
|
+
/**
|
|
34
|
+
* Test Suite for declarative evaluation testing
|
|
35
|
+
*/
|
|
36
|
+
class TestSuite {
|
|
37
|
+
constructor(name, config) {
|
|
38
|
+
this.name = name;
|
|
39
|
+
this.config = config;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Run all test cases
|
|
43
|
+
*
|
|
44
|
+
* @example
|
|
45
|
+
* ```typescript
|
|
46
|
+
* const results = await suite.run();
|
|
47
|
+
* console.log(`${results.passed}/${results.total} tests passed`);
|
|
48
|
+
* ```
|
|
49
|
+
*/
|
|
50
|
+
async run() {
|
|
51
|
+
const startTime = Date.now();
|
|
52
|
+
const results = [];
|
|
53
|
+
const runTestCase = async (testCase, index) => {
|
|
54
|
+
const caseStartTime = Date.now();
|
|
55
|
+
const id = testCase.id || `case-${index}`;
|
|
56
|
+
try {
|
|
57
|
+
// Execute to get output
|
|
58
|
+
let actual;
|
|
59
|
+
if (this.config.executor) {
|
|
60
|
+
const timeout = this.config.timeout || 30000;
|
|
61
|
+
const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error(`Test timeout after ${timeout}ms`)), timeout));
|
|
62
|
+
actual = await Promise.race([
|
|
63
|
+
this.config.executor(testCase.input),
|
|
64
|
+
timeoutPromise,
|
|
65
|
+
]);
|
|
66
|
+
}
|
|
67
|
+
else if (testCase.expected) {
|
|
68
|
+
actual = testCase.expected; // Use expected as actual if no executor
|
|
69
|
+
}
|
|
70
|
+
else {
|
|
71
|
+
throw new Error("No executor provided and no expected output");
|
|
72
|
+
}
|
|
73
|
+
// Run assertions
|
|
74
|
+
const assertions = [];
|
|
75
|
+
let allPassed = true;
|
|
76
|
+
// Run custom assertions
|
|
77
|
+
if (testCase.assertions) {
|
|
78
|
+
for (const assertion of testCase.assertions) {
|
|
79
|
+
const result = assertion(actual);
|
|
80
|
+
assertions.push(result);
|
|
81
|
+
if (!result.passed)
|
|
82
|
+
allPassed = false;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
// Default equality check if expected provided
|
|
86
|
+
if (testCase.expected && !testCase.assertions) {
|
|
87
|
+
const result = (0, assertions_1.expect)(actual).toEqual(testCase.expected);
|
|
88
|
+
assertions.push(result);
|
|
89
|
+
if (!result.passed)
|
|
90
|
+
allPassed = false;
|
|
91
|
+
}
|
|
92
|
+
const durationMs = Date.now() - caseStartTime;
|
|
93
|
+
return {
|
|
94
|
+
id,
|
|
95
|
+
input: testCase.input,
|
|
96
|
+
expected: testCase.expected,
|
|
97
|
+
actual,
|
|
98
|
+
passed: allPassed,
|
|
99
|
+
assertions,
|
|
100
|
+
durationMs,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
catch (error) {
|
|
104
|
+
const durationMs = Date.now() - caseStartTime;
|
|
105
|
+
return {
|
|
106
|
+
id,
|
|
107
|
+
input: testCase.input,
|
|
108
|
+
expected: testCase.expected,
|
|
109
|
+
actual: "",
|
|
110
|
+
passed: false,
|
|
111
|
+
assertions: [],
|
|
112
|
+
durationMs,
|
|
113
|
+
error: error instanceof Error ? error.message : String(error),
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
};
|
|
117
|
+
// Run tests
|
|
118
|
+
if (this.config.parallel) {
|
|
119
|
+
results.push(...(await Promise.all(this.config.cases.map((tc, i) => runTestCase(tc, i)))));
|
|
120
|
+
}
|
|
121
|
+
else {
|
|
122
|
+
for (let i = 0; i < this.config.cases.length; i++) {
|
|
123
|
+
const result = await runTestCase(this.config.cases[i], i);
|
|
124
|
+
results.push(result);
|
|
125
|
+
if (this.config.stopOnFailure && !result.passed) {
|
|
126
|
+
break;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
const retriedCases = [];
|
|
131
|
+
const retries = this.config.retries ?? 0;
|
|
132
|
+
if (retries > 0 && results.length > 0) {
|
|
133
|
+
const failingIndices = results
|
|
134
|
+
.map((r, i) => (r.passed ? -1 : i))
|
|
135
|
+
.filter((i) => i >= 0);
|
|
136
|
+
for (let attempt = 0; attempt < retries && failingIndices.length > 0; attempt++) {
|
|
137
|
+
const toRetry = [...failingIndices];
|
|
138
|
+
failingIndices.length = 0;
|
|
139
|
+
for (const i of toRetry) {
|
|
140
|
+
const tc = this.config.cases[i];
|
|
141
|
+
const retryResult = await runTestCase(tc, i);
|
|
142
|
+
if (retryResult.passed) {
|
|
143
|
+
results[i] = retryResult;
|
|
144
|
+
retriedCases.push(retryResult.id);
|
|
145
|
+
}
|
|
146
|
+
else {
|
|
147
|
+
failingIndices.push(i);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
const durationMs = Date.now() - startTime;
|
|
153
|
+
const passed = results.filter((r) => r.passed).length;
|
|
154
|
+
const failed = results.filter((r) => !r.passed).length;
|
|
155
|
+
return {
|
|
156
|
+
name: this.name,
|
|
157
|
+
total: results.length,
|
|
158
|
+
passed,
|
|
159
|
+
failed,
|
|
160
|
+
durationMs,
|
|
161
|
+
results,
|
|
162
|
+
...(retriedCases.length > 0 && { retriedCases }),
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* Add a test case to the suite
|
|
167
|
+
*/
|
|
168
|
+
addCase(testCase) {
|
|
169
|
+
this.config.cases.push(testCase);
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Get suite configuration
|
|
173
|
+
*/
|
|
174
|
+
getConfig() {
|
|
175
|
+
return { ...this.config };
|
|
176
|
+
}
|
|
177
|
+
/**
|
|
178
|
+
* Get test definitions for introspection
|
|
179
|
+
* COMPAT-201: Public TestSuite introspection (minimal getters)
|
|
180
|
+
*/
|
|
181
|
+
getTests() {
|
|
182
|
+
return this.config.cases.map((testCase, index) => ({
|
|
183
|
+
id: testCase.id || `case-${index}`,
|
|
184
|
+
input: testCase.input,
|
|
185
|
+
expected: testCase.expected,
|
|
186
|
+
metadata: testCase.metadata,
|
|
187
|
+
hasAssertions: !!testCase.assertions && testCase.assertions.length > 0,
|
|
188
|
+
assertionCount: testCase.assertions?.length || 0,
|
|
189
|
+
}));
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Get suite metadata for introspection
|
|
193
|
+
* COMPAT-201: Public TestSuite introspection (minimal getters)
|
|
194
|
+
*/
|
|
195
|
+
getMetadata() {
|
|
196
|
+
return {
|
|
197
|
+
suiteName: this.name,
|
|
198
|
+
tags: [], // TestSuite doesn't have tags, but include for future compatibility
|
|
199
|
+
defaults: {
|
|
200
|
+
timeout: this.config.timeout,
|
|
201
|
+
parallel: this.config.parallel,
|
|
202
|
+
stopOnFailure: this.config.stopOnFailure,
|
|
203
|
+
retries: this.config.retries,
|
|
204
|
+
},
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
/**
|
|
208
|
+
* Convert to portable suite representation
|
|
209
|
+
* COMPAT-201: Public TestSuite introspection (minimal getters)
|
|
210
|
+
*/
|
|
211
|
+
toJSON() {
|
|
212
|
+
return {
|
|
213
|
+
name: this.name,
|
|
214
|
+
config: this.getConfig(),
|
|
215
|
+
tests: this.getTests(),
|
|
216
|
+
metadata: this.getMetadata(),
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
exports.TestSuite = TestSuite;
|
|
221
|
+
/**
|
|
222
|
+
* Create a test suite
|
|
223
|
+
*
|
|
224
|
+
* @example
|
|
225
|
+
* ```typescript
|
|
226
|
+
* const suite = createTestSuite('my-tests', {
|
|
227
|
+
* cases: [
|
|
228
|
+
* {
|
|
229
|
+
* input: 'Hello',
|
|
230
|
+
* assertions: [
|
|
231
|
+
* (output) => expect(output).toContain('hi'),
|
|
232
|
+
* (output) => expect(output).toHaveSentiment('positive')
|
|
233
|
+
* ]
|
|
234
|
+
* }
|
|
235
|
+
* ],
|
|
236
|
+
* executor: async (input) => {
|
|
237
|
+
* // Your LLM call here
|
|
238
|
+
* return callLLM(input);
|
|
239
|
+
* }
|
|
240
|
+
* });
|
|
241
|
+
* ```
|
|
242
|
+
*/
|
|
243
|
+
function createTestSuite(name, config) {
|
|
244
|
+
return new TestSuite(name, config);
|
|
245
|
+
}
|
|
246
|
+
/**
|
|
247
|
+
* Helper to create assertions from expected keywords
|
|
248
|
+
*
|
|
249
|
+
* @example
|
|
250
|
+
* ```typescript
|
|
251
|
+
* const suite = createTestSuite('tests', {
|
|
252
|
+
* cases: [
|
|
253
|
+
* {
|
|
254
|
+
* input: 'refund policy',
|
|
255
|
+
* assertions: containsKeywords(['refund', 'return', 'policy'])
|
|
256
|
+
* }
|
|
257
|
+
* ]
|
|
258
|
+
* });
|
|
259
|
+
* ```
|
|
260
|
+
*/
|
|
261
|
+
function containsKeywords(keywords) {
|
|
262
|
+
return (output) => (0, assertions_1.expect)(output).toContainKeywords(keywords);
|
|
263
|
+
}
|
|
264
|
+
/**
|
|
265
|
+
* Helper to create pattern matching assertion
|
|
266
|
+
*
|
|
267
|
+
* @example
|
|
268
|
+
* ```typescript
|
|
269
|
+
* const suite = createTestSuite('tests', {
|
|
270
|
+
* cases: [
|
|
271
|
+
* {
|
|
272
|
+
* input: 'What time is it?',
|
|
273
|
+
* assertions: matchesPattern(/\d{1,2}:\d{2}/)
|
|
274
|
+
* }
|
|
275
|
+
* ]
|
|
276
|
+
* });
|
|
277
|
+
* ```
|
|
278
|
+
*/
|
|
279
|
+
function matchesPattern(pattern) {
|
|
280
|
+
return (output) => (0, assertions_1.expect)(output).toMatchPattern(pattern);
|
|
281
|
+
}
|
|
282
|
+
/**
|
|
283
|
+
* Helper to create sentiment assertion
|
|
284
|
+
*
|
|
285
|
+
* @example
|
|
286
|
+
* ```typescript
|
|
287
|
+
* const suite = createTestSuite('tests', {
|
|
288
|
+
* cases: [
|
|
289
|
+
* {
|
|
290
|
+
* input: 'Thank you!',
|
|
291
|
+
* assertions: hasSentiment('positive')
|
|
292
|
+
* }
|
|
293
|
+
* ]
|
|
294
|
+
* });
|
|
295
|
+
* ```
|
|
296
|
+
*/
|
|
297
|
+
function hasSentiment(sentiment) {
|
|
298
|
+
return (output) => (0, assertions_1.expect)(output).toHaveSentiment(sentiment);
|
|
299
|
+
}
|
|
300
|
+
/**
|
|
301
|
+
* Helper to create length range assertion
|
|
302
|
+
*
|
|
303
|
+
* @example
|
|
304
|
+
* ```typescript
|
|
305
|
+
* const suite = createTestSuite('tests', {
|
|
306
|
+
* cases: [
|
|
307
|
+
* {
|
|
308
|
+
* input: 'Summarize this',
|
|
309
|
+
* assertions: hasLength({ min: 50, max: 200 })
|
|
310
|
+
* }
|
|
311
|
+
* ]
|
|
312
|
+
* });
|
|
313
|
+
* ```
|
|
314
|
+
*/
|
|
315
|
+
function hasLength(range) {
|
|
316
|
+
return (output) => (0, assertions_1.expect)(output).toHaveLength(range);
|
|
317
|
+
}
|