@evalgate/sdk 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +638 -0
- package/README.md +398 -0
- package/dist/assertions.d.ts +189 -0
- package/dist/assertions.js +662 -0
- package/dist/batch.d.ts +68 -0
- package/dist/batch.js +179 -0
- package/dist/cache.d.ts +65 -0
- package/dist/cache.js +131 -0
- package/dist/cli/api.d.ts +108 -0
- package/dist/cli/api.js +132 -0
- package/dist/cli/baseline.d.ts +10 -0
- package/dist/cli/baseline.js +172 -0
- package/dist/cli/check.d.ts +73 -0
- package/dist/cli/check.js +355 -0
- package/dist/cli/ci-context.d.ts +6 -0
- package/dist/cli/ci-context.js +112 -0
- package/dist/cli/ci.d.ts +45 -0
- package/dist/cli/ci.js +192 -0
- package/dist/cli/config.d.ts +30 -0
- package/dist/cli/config.js +230 -0
- package/dist/cli/constants.d.ts +15 -0
- package/dist/cli/constants.js +18 -0
- package/dist/cli/diff.d.ts +173 -0
- package/dist/cli/diff.js +685 -0
- package/dist/cli/discover.d.ts +84 -0
- package/dist/cli/discover.js +419 -0
- package/dist/cli/doctor.d.ts +88 -0
- package/dist/cli/doctor.js +675 -0
- package/dist/cli/env.d.ts +21 -0
- package/dist/cli/env.js +42 -0
- package/dist/cli/explain.d.ts +58 -0
- package/dist/cli/explain.js +561 -0
- package/dist/cli/formatters/github.d.ts +8 -0
- package/dist/cli/formatters/github.js +135 -0
- package/dist/cli/formatters/human.d.ts +6 -0
- package/dist/cli/formatters/human.js +110 -0
- package/dist/cli/formatters/json.d.ts +6 -0
- package/dist/cli/formatters/json.js +10 -0
- package/dist/cli/formatters/pr-comment.d.ts +12 -0
- package/dist/cli/formatters/pr-comment.js +103 -0
- package/dist/cli/formatters/types.d.ts +103 -0
- package/dist/cli/formatters/types.js +8 -0
- package/dist/cli/gate.d.ts +21 -0
- package/dist/cli/gate.js +179 -0
- package/dist/cli/impact-analysis.d.ts +63 -0
- package/dist/cli/impact-analysis.js +252 -0
- package/dist/cli/index.d.ts +9 -0
- package/dist/cli/index.js +332 -0
- package/dist/cli/init.d.ts +16 -0
- package/dist/cli/init.js +292 -0
- package/dist/cli/manifest.d.ts +103 -0
- package/dist/cli/manifest.js +282 -0
- package/dist/cli/migrate.d.ts +41 -0
- package/dist/cli/migrate.js +349 -0
- package/dist/cli/policy-packs.d.ts +23 -0
- package/dist/cli/policy-packs.js +89 -0
- package/dist/cli/print-config.d.ts +29 -0
- package/dist/cli/print-config.js +270 -0
- package/dist/cli/profiles.d.ts +28 -0
- package/dist/cli/profiles.js +30 -0
- package/dist/cli/reason-codes.d.ts +17 -0
- package/dist/cli/reason-codes.js +19 -0
- package/dist/cli/regression-gate.d.ts +15 -0
- package/dist/cli/regression-gate.js +341 -0
- package/dist/cli/render/snippet.d.ts +5 -0
- package/dist/cli/render/snippet.js +15 -0
- package/dist/cli/render/sort.d.ts +10 -0
- package/dist/cli/render/sort.js +24 -0
- package/dist/cli/report/build-check-report.d.ts +19 -0
- package/dist/cli/report/build-check-report.js +132 -0
- package/dist/cli/run.d.ts +101 -0
- package/dist/cli/run.js +395 -0
- package/dist/cli/share.d.ts +17 -0
- package/dist/cli/share.js +91 -0
- package/dist/cli/upgrade.d.ts +15 -0
- package/dist/cli/upgrade.js +492 -0
- package/dist/cli/workspace.d.ts +31 -0
- package/dist/cli/workspace.js +68 -0
- package/dist/client.d.ts +368 -0
- package/dist/client.js +893 -0
- package/dist/client.request.test.d.ts +1 -0
- package/dist/client.request.test.js +232 -0
- package/dist/context.d.ts +134 -0
- package/dist/context.js +215 -0
- package/dist/errors.d.ts +82 -0
- package/dist/errors.js +298 -0
- package/dist/export.d.ts +195 -0
- package/dist/export.js +344 -0
- package/dist/index.d.ts +44 -0
- package/dist/index.js +153 -0
- package/dist/integrations/anthropic.d.ts +91 -0
- package/dist/integrations/anthropic.js +163 -0
- package/dist/integrations/openai-eval.d.ts +57 -0
- package/dist/integrations/openai-eval.js +232 -0
- package/dist/integrations/openai.d.ts +92 -0
- package/dist/integrations/openai.js +160 -0
- package/dist/local.d.ts +39 -0
- package/dist/local.js +148 -0
- package/dist/logger.d.ts +128 -0
- package/dist/logger.js +227 -0
- package/dist/matchers/index.d.ts +1 -0
- package/dist/matchers/index.js +6 -0
- package/dist/matchers/to-pass-gate.d.ts +29 -0
- package/dist/matchers/to-pass-gate.js +35 -0
- package/dist/pagination.d.ts +74 -0
- package/dist/pagination.js +139 -0
- package/dist/regression.d.ts +100 -0
- package/dist/regression.js +44 -0
- package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
- package/dist/runtime/adapters/config-to-dsl.js +400 -0
- package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
- package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
- package/dist/runtime/context.d.ts +26 -0
- package/dist/runtime/context.js +74 -0
- package/dist/runtime/eval.d.ts +46 -0
- package/dist/runtime/eval.js +244 -0
- package/dist/runtime/execution-mode.d.ts +80 -0
- package/dist/runtime/execution-mode.js +357 -0
- package/dist/runtime/executor.d.ts +16 -0
- package/dist/runtime/executor.js +152 -0
- package/dist/runtime/registry.d.ts +78 -0
- package/dist/runtime/registry.js +403 -0
- package/dist/runtime/run-report.d.ts +200 -0
- package/dist/runtime/run-report.js +222 -0
- package/dist/runtime/types.d.ts +356 -0
- package/dist/runtime/types.js +76 -0
- package/dist/snapshot.d.ts +176 -0
- package/dist/snapshot.js +322 -0
- package/dist/streaming.d.ts +173 -0
- package/dist/streaming.js +268 -0
- package/dist/testing.d.ts +273 -0
- package/dist/testing.js +317 -0
- package/dist/types.d.ts +754 -0
- package/dist/types.js +54 -0
- package/dist/utils/input-hash.d.ts +8 -0
- package/dist/utils/input-hash.js +41 -0
- package/dist/version.d.ts +7 -0
- package/dist/version.js +10 -0
- package/dist/workflows.d.ts +389 -0
- package/dist/workflows.js +671 -0
- package/package.json +117 -0
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Streaming & Batch Operations
|
|
4
|
+
* Tier 2.8: Handle large datasets efficiently
|
|
5
|
+
*
|
|
6
|
+
* @example
|
|
7
|
+
* ```typescript
|
|
8
|
+
* import { streamEvaluations, batchCreate } from '@ai-eval-platform/sdk';
|
|
9
|
+
*
|
|
10
|
+
* // Stream large evaluation results
|
|
11
|
+
* for await (const result of streamEvaluations(client, config)) {
|
|
12
|
+
* console.log(`Progress: ${result.completed}/${result.total}`);
|
|
13
|
+
* }
|
|
14
|
+
*
|
|
15
|
+
* // Batch create traces
|
|
16
|
+
* await batchCreate(client.traces, traces, { batchSize: 100 });
|
|
17
|
+
* ```
|
|
18
|
+
*/
|
|
19
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
20
|
+
exports.RateLimiter = void 0;
|
|
21
|
+
exports.batchProcess = batchProcess;
|
|
22
|
+
exports.streamEvaluation = streamEvaluation;
|
|
23
|
+
exports.batchRead = batchRead;
|
|
24
|
+
exports.chunk = chunk;
|
|
25
|
+
/**
|
|
26
|
+
* Batch create items
|
|
27
|
+
*
|
|
28
|
+
* @example
|
|
29
|
+
* ```typescript
|
|
30
|
+
* const traces = [
|
|
31
|
+
* { name: 'trace-1', traceId: 'id-1' },
|
|
32
|
+
* { name: 'trace-2', traceId: 'id-2' },
|
|
33
|
+
* // ... 1000 more
|
|
34
|
+
* ];
|
|
35
|
+
*
|
|
36
|
+
* const result = await batchCreate(
|
|
37
|
+
* (item) => client.traces.create(item),
|
|
38
|
+
* traces,
|
|
39
|
+
* {
|
|
40
|
+
* batchSize: 100,
|
|
41
|
+
* onProgress: (p) => console.log(`${p.completed}/${p.total}`)
|
|
42
|
+
* }
|
|
43
|
+
* );
|
|
44
|
+
* ```
|
|
45
|
+
*/
|
|
46
|
+
async function batchProcess(processor, items, options = {}) {
|
|
47
|
+
const { batchSize = 100, parallel = true, delayMs = 0, onProgress, onError, continueOnError = true, } = options;
|
|
48
|
+
const result = {
|
|
49
|
+
successful: [],
|
|
50
|
+
failed: [],
|
|
51
|
+
summary: {
|
|
52
|
+
total: items.length,
|
|
53
|
+
successful: 0,
|
|
54
|
+
failed: 0,
|
|
55
|
+
},
|
|
56
|
+
};
|
|
57
|
+
// Split into batches
|
|
58
|
+
const batches = [];
|
|
59
|
+
for (let i = 0; i < items.length; i += batchSize) {
|
|
60
|
+
batches.push(items.slice(i, i + batchSize));
|
|
61
|
+
}
|
|
62
|
+
// Process batches
|
|
63
|
+
for (let batchIndex = 0; batchIndex < batches.length; batchIndex++) {
|
|
64
|
+
const batch = batches[batchIndex];
|
|
65
|
+
const processBatch = async () => {
|
|
66
|
+
const batchPromises = batch.map(async (item, itemIndex) => {
|
|
67
|
+
try {
|
|
68
|
+
const output = await processor(item);
|
|
69
|
+
result.successful.push(output);
|
|
70
|
+
result.summary.successful++;
|
|
71
|
+
return { success: true, output };
|
|
72
|
+
}
|
|
73
|
+
catch (error) {
|
|
74
|
+
const batchError = {
|
|
75
|
+
batch: batchIndex,
|
|
76
|
+
index: itemIndex,
|
|
77
|
+
error: error instanceof Error ? error : new Error(String(error)),
|
|
78
|
+
item,
|
|
79
|
+
};
|
|
80
|
+
result.failed.push({
|
|
81
|
+
item,
|
|
82
|
+
error: batchError.error,
|
|
83
|
+
});
|
|
84
|
+
result.summary.failed++;
|
|
85
|
+
if (onError)
|
|
86
|
+
onError(batchError);
|
|
87
|
+
if (!continueOnError) {
|
|
88
|
+
throw error;
|
|
89
|
+
}
|
|
90
|
+
return { success: false, error };
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
if (parallel) {
|
|
94
|
+
await Promise.all(batchPromises);
|
|
95
|
+
}
|
|
96
|
+
else {
|
|
97
|
+
for (const promise of batchPromises) {
|
|
98
|
+
await promise;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
};
|
|
102
|
+
await processBatch();
|
|
103
|
+
// Progress callback
|
|
104
|
+
if (onProgress) {
|
|
105
|
+
onProgress({
|
|
106
|
+
total: items.length,
|
|
107
|
+
completed: result.summary.successful + result.summary.failed,
|
|
108
|
+
failed: result.summary.failed,
|
|
109
|
+
batch: batchIndex + 1,
|
|
110
|
+
totalBatches: batches.length,
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
// Delay between batches
|
|
114
|
+
if (delayMs > 0 && batchIndex < batches.length - 1) {
|
|
115
|
+
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
return result;
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Stream evaluation results
|
|
122
|
+
*
|
|
123
|
+
* @example
|
|
124
|
+
* ```typescript
|
|
125
|
+
* const config = {
|
|
126
|
+
* cases: [...],
|
|
127
|
+
* executor: async (input) => callLLM(input)
|
|
128
|
+
* };
|
|
129
|
+
*
|
|
130
|
+
* for await (const result of streamEvaluation(config)) {
|
|
131
|
+
* console.log(`Case ${result.caseId}: ${result.passed ? 'PASS' : 'FAIL'}`);
|
|
132
|
+
* console.log(`Progress: ${result.completed}/${result.total}`);
|
|
133
|
+
* }
|
|
134
|
+
* ```
|
|
135
|
+
*/
|
|
136
|
+
async function* streamEvaluation(config) {
|
|
137
|
+
const { cases, executor } = config;
|
|
138
|
+
let completed = 0;
|
|
139
|
+
for (const [index, testCase] of cases.entries()) {
|
|
140
|
+
try {
|
|
141
|
+
const result = await executor(testCase);
|
|
142
|
+
completed++;
|
|
143
|
+
yield {
|
|
144
|
+
caseId: `case-${index}`,
|
|
145
|
+
case: testCase,
|
|
146
|
+
result,
|
|
147
|
+
passed: true,
|
|
148
|
+
completed,
|
|
149
|
+
total: cases.length,
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
catch (error) {
|
|
153
|
+
completed++;
|
|
154
|
+
yield {
|
|
155
|
+
caseId: `case-${index}`,
|
|
156
|
+
case: testCase,
|
|
157
|
+
result: error,
|
|
158
|
+
passed: false,
|
|
159
|
+
completed,
|
|
160
|
+
total: cases.length,
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* Batch read with pagination
|
|
167
|
+
*
|
|
168
|
+
* @example
|
|
169
|
+
* ```typescript
|
|
170
|
+
* const allTraces = await batchRead(
|
|
171
|
+
* (params) => client.traces.list(params),
|
|
172
|
+
* { pageSize: 100 }
|
|
173
|
+
* );
|
|
174
|
+
* ```
|
|
175
|
+
*/
|
|
176
|
+
async function batchRead(fetcher, options = {}) {
|
|
177
|
+
const { pageSize = 100, maxPages, onProgress } = options;
|
|
178
|
+
const allItems = [];
|
|
179
|
+
let page = 0;
|
|
180
|
+
let hasMore = true;
|
|
181
|
+
while (hasMore && (!maxPages || page < maxPages)) {
|
|
182
|
+
const items = await fetcher({
|
|
183
|
+
limit: pageSize,
|
|
184
|
+
offset: page * pageSize,
|
|
185
|
+
});
|
|
186
|
+
if (items.length === 0) {
|
|
187
|
+
hasMore = false;
|
|
188
|
+
}
|
|
189
|
+
else {
|
|
190
|
+
allItems.push(...items);
|
|
191
|
+
page++;
|
|
192
|
+
if (onProgress) {
|
|
193
|
+
onProgress(page, allItems.length);
|
|
194
|
+
}
|
|
195
|
+
if (items.length < pageSize) {
|
|
196
|
+
hasMore = false;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
return allItems;
|
|
201
|
+
}
|
|
202
|
+
/**
|
|
203
|
+
* Rate-limited batch processor
|
|
204
|
+
*
|
|
205
|
+
* @example
|
|
206
|
+
* ```typescript
|
|
207
|
+
* const limiter = new RateLimiter({ requestsPerSecond: 10 });
|
|
208
|
+
*
|
|
209
|
+
* for (const item of items) {
|
|
210
|
+
* await limiter.throttle(() => client.traces.create(item));
|
|
211
|
+
* }
|
|
212
|
+
* ```
|
|
213
|
+
*/
|
|
214
|
+
class RateLimiter {
|
|
215
|
+
constructor(options) {
|
|
216
|
+
this.queue = [];
|
|
217
|
+
this.processing = false;
|
|
218
|
+
this.requestsPerSecond = options.requestsPerSecond;
|
|
219
|
+
this.interval = 1000 / options.requestsPerSecond;
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
222
|
+
* Throttle a function call
|
|
223
|
+
*/
|
|
224
|
+
async throttle(fn) {
|
|
225
|
+
return new Promise((resolve, reject) => {
|
|
226
|
+
this.queue.push(async () => {
|
|
227
|
+
try {
|
|
228
|
+
const result = await fn();
|
|
229
|
+
resolve(result);
|
|
230
|
+
}
|
|
231
|
+
catch (error) {
|
|
232
|
+
reject(error);
|
|
233
|
+
}
|
|
234
|
+
});
|
|
235
|
+
if (!this.processing) {
|
|
236
|
+
this.process();
|
|
237
|
+
}
|
|
238
|
+
});
|
|
239
|
+
}
|
|
240
|
+
async process() {
|
|
241
|
+
this.processing = true;
|
|
242
|
+
while (this.queue.length > 0) {
|
|
243
|
+
const fn = this.queue.shift();
|
|
244
|
+
if (fn) {
|
|
245
|
+
await fn();
|
|
246
|
+
await new Promise((resolve) => setTimeout(resolve, this.interval));
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
this.processing = false;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
exports.RateLimiter = RateLimiter;
|
|
253
|
+
/**
|
|
254
|
+
* Chunk array into smaller arrays
|
|
255
|
+
*
|
|
256
|
+
* @example
|
|
257
|
+
* ```typescript
|
|
258
|
+
* const chunks = chunk([1, 2, 3, 4, 5], 2);
|
|
259
|
+
* // [[1, 2], [3, 4], [5]]
|
|
260
|
+
* ```
|
|
261
|
+
*/
|
|
262
|
+
function chunk(array, size) {
|
|
263
|
+
const chunks = [];
|
|
264
|
+
for (let i = 0; i < array.length; i += size) {
|
|
265
|
+
chunks.push(array.slice(i, i + size));
|
|
266
|
+
}
|
|
267
|
+
return chunks;
|
|
268
|
+
}
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Test Suite Builder
|
|
3
|
+
* Tier 2.7: Declarative test definitions
|
|
4
|
+
*
|
|
5
|
+
* @example
|
|
6
|
+
* ```typescript
|
|
7
|
+
* import { createTestSuite, expect } from '@ai-eval-platform/sdk';
|
|
8
|
+
*
|
|
9
|
+
* const suite = createTestSuite('chatbot-responses', {
|
|
10
|
+
* cases: [
|
|
11
|
+
* {
|
|
12
|
+
* input: 'Hello',
|
|
13
|
+
* assertions: [
|
|
14
|
+
* (output) => expect(output).toContain('greeting'),
|
|
15
|
+
* (output) => expect(output).toHaveSentiment('positive')
|
|
16
|
+
* ]
|
|
17
|
+
* }
|
|
18
|
+
* ]
|
|
19
|
+
* });
|
|
20
|
+
*
|
|
21
|
+
* const results = await suite.run();
|
|
22
|
+
* ```
|
|
23
|
+
*/
|
|
24
|
+
import { type AssertionResult } from "./assertions";
|
|
25
|
+
/**
|
|
26
|
+
* Test suite case definition (different from API TestCase type)
|
|
27
|
+
* Use this for defining test cases in test suites with assertions
|
|
28
|
+
*/
|
|
29
|
+
export interface TestSuiteCase {
|
|
30
|
+
/** Unique identifier for the test case */
|
|
31
|
+
id?: string;
|
|
32
|
+
/** Input to the LLM */
|
|
33
|
+
input: string;
|
|
34
|
+
/** Expected output (optional) */
|
|
35
|
+
expected?: string;
|
|
36
|
+
/** Metadata for the test case */
|
|
37
|
+
metadata?: Record<string, unknown>;
|
|
38
|
+
/** Assertion functions to run */
|
|
39
|
+
assertions?: ((output: string) => AssertionResult)[];
|
|
40
|
+
}
|
|
41
|
+
/** @deprecated Use TestSuiteCase instead to avoid confusion with API TestCase type */
|
|
42
|
+
export type TestCase = TestSuiteCase;
|
|
43
|
+
export interface TestSuiteConfig {
|
|
44
|
+
/** Test cases to run */
|
|
45
|
+
cases: TestSuiteCase[];
|
|
46
|
+
/** Function that generates output from input */
|
|
47
|
+
executor?: (input: string) => Promise<string>;
|
|
48
|
+
/** Run tests in parallel (default: true) */
|
|
49
|
+
parallel?: boolean;
|
|
50
|
+
/** Stop on first failure (default: false) */
|
|
51
|
+
stopOnFailure?: boolean;
|
|
52
|
+
/** Timeout per test case in ms (default: 30000) */
|
|
53
|
+
timeout?: number;
|
|
54
|
+
/** Retry failing cases N times (default: 0). Only failing cases are retried. */
|
|
55
|
+
retries?: number;
|
|
56
|
+
}
|
|
57
|
+
export interface TestSuiteCaseResult {
|
|
58
|
+
/** Test case ID */
|
|
59
|
+
id: string;
|
|
60
|
+
/** Input that was tested */
|
|
61
|
+
input: string;
|
|
62
|
+
/** Expected output */
|
|
63
|
+
expected?: string;
|
|
64
|
+
/** Actual output */
|
|
65
|
+
actual: string;
|
|
66
|
+
/** Whether test passed */
|
|
67
|
+
passed: boolean;
|
|
68
|
+
/** Assertion results */
|
|
69
|
+
assertions: AssertionResult[];
|
|
70
|
+
/** Duration in milliseconds */
|
|
71
|
+
durationMs: number;
|
|
72
|
+
/** Error if test failed to execute */
|
|
73
|
+
error?: string;
|
|
74
|
+
}
|
|
75
|
+
/** @deprecated Use TestSuiteCaseResult instead */
|
|
76
|
+
export type TestCaseResult = TestSuiteCaseResult;
|
|
77
|
+
export interface TestSuiteResult {
|
|
78
|
+
/** Suite name */
|
|
79
|
+
name: string;
|
|
80
|
+
/** Total number of test cases */
|
|
81
|
+
total: number;
|
|
82
|
+
/** Number of passed tests */
|
|
83
|
+
passed: number;
|
|
84
|
+
/** Number of failed tests */
|
|
85
|
+
failed: number;
|
|
86
|
+
/** Total duration in milliseconds */
|
|
87
|
+
durationMs: number;
|
|
88
|
+
/** Individual test results */
|
|
89
|
+
results: TestSuiteCaseResult[];
|
|
90
|
+
/** Case IDs that were retried (flaky recovery) */
|
|
91
|
+
retriedCases?: string[];
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Test definition for introspection
|
|
95
|
+
* COMPAT-201: Public TestSuite introspection (minimal getters)
|
|
96
|
+
*/
|
|
97
|
+
export interface TestDefinition {
|
|
98
|
+
/** Test case ID */
|
|
99
|
+
id: string;
|
|
100
|
+
/** Test input */
|
|
101
|
+
input: string;
|
|
102
|
+
/** Expected output */
|
|
103
|
+
expected?: string;
|
|
104
|
+
/** Test metadata */
|
|
105
|
+
metadata?: Record<string, unknown>;
|
|
106
|
+
/** Whether test has assertions */
|
|
107
|
+
hasAssertions: boolean;
|
|
108
|
+
/** Number of assertions */
|
|
109
|
+
assertionCount: number;
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Portable suite representation
|
|
113
|
+
* COMPAT-201: Public TestSuite introspection (minimal getters)
|
|
114
|
+
*/
|
|
115
|
+
export interface PortableSuite {
|
|
116
|
+
/** Suite name */
|
|
117
|
+
name: string;
|
|
118
|
+
/** Suite configuration */
|
|
119
|
+
config: TestSuiteConfig;
|
|
120
|
+
/** Test definitions */
|
|
121
|
+
tests: TestDefinition[];
|
|
122
|
+
/** Suite metadata */
|
|
123
|
+
metadata: {
|
|
124
|
+
suiteName?: string;
|
|
125
|
+
tags?: string[];
|
|
126
|
+
defaults?: {
|
|
127
|
+
timeout?: number;
|
|
128
|
+
parallel?: boolean;
|
|
129
|
+
stopOnFailure?: boolean;
|
|
130
|
+
retries?: number;
|
|
131
|
+
};
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
/**
|
|
135
|
+
* Test Suite for declarative evaluation testing
|
|
136
|
+
*/
|
|
137
|
+
export declare class TestSuite {
|
|
138
|
+
private name;
|
|
139
|
+
private config;
|
|
140
|
+
constructor(name: string, config: TestSuiteConfig);
|
|
141
|
+
/**
|
|
142
|
+
* Run all test cases
|
|
143
|
+
*
|
|
144
|
+
* @example
|
|
145
|
+
* ```typescript
|
|
146
|
+
* const results = await suite.run();
|
|
147
|
+
* console.log(`${results.passed}/${results.total} tests passed`);
|
|
148
|
+
* ```
|
|
149
|
+
*/
|
|
150
|
+
run(): Promise<TestSuiteResult>;
|
|
151
|
+
/**
|
|
152
|
+
* Add a test case to the suite
|
|
153
|
+
*/
|
|
154
|
+
addCase(testCase: TestSuiteCase): void;
|
|
155
|
+
/**
|
|
156
|
+
* Get suite configuration
|
|
157
|
+
*/
|
|
158
|
+
getConfig(): TestSuiteConfig;
|
|
159
|
+
/**
|
|
160
|
+
* Get test definitions for introspection
|
|
161
|
+
* COMPAT-201: Public TestSuite introspection (minimal getters)
|
|
162
|
+
*/
|
|
163
|
+
getTests(): TestDefinition[];
|
|
164
|
+
/**
|
|
165
|
+
* Get suite metadata for introspection
|
|
166
|
+
* COMPAT-201: Public TestSuite introspection (minimal getters)
|
|
167
|
+
*/
|
|
168
|
+
getMetadata(): {
|
|
169
|
+
suiteName?: string;
|
|
170
|
+
tags?: string[];
|
|
171
|
+
defaults?: {
|
|
172
|
+
timeout?: number;
|
|
173
|
+
parallel?: boolean;
|
|
174
|
+
stopOnFailure?: boolean;
|
|
175
|
+
retries?: number;
|
|
176
|
+
};
|
|
177
|
+
};
|
|
178
|
+
/**
|
|
179
|
+
* Convert to portable suite representation
|
|
180
|
+
* COMPAT-201: Public TestSuite introspection (minimal getters)
|
|
181
|
+
*/
|
|
182
|
+
toJSON(): PortableSuite;
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Create a test suite
|
|
186
|
+
*
|
|
187
|
+
* @example
|
|
188
|
+
* ```typescript
|
|
189
|
+
* const suite = createTestSuite('my-tests', {
|
|
190
|
+
* cases: [
|
|
191
|
+
* {
|
|
192
|
+
* input: 'Hello',
|
|
193
|
+
* assertions: [
|
|
194
|
+
* (output) => expect(output).toContain('hi'),
|
|
195
|
+
* (output) => expect(output).toHaveSentiment('positive')
|
|
196
|
+
* ]
|
|
197
|
+
* }
|
|
198
|
+
* ],
|
|
199
|
+
* executor: async (input) => {
|
|
200
|
+
* // Your LLM call here
|
|
201
|
+
* return callLLM(input);
|
|
202
|
+
* }
|
|
203
|
+
* });
|
|
204
|
+
* ```
|
|
205
|
+
*/
|
|
206
|
+
export declare function createTestSuite(name: string, config: TestSuiteConfig): TestSuite;
|
|
207
|
+
/**
|
|
208
|
+
* Helper to create assertions from expected keywords
|
|
209
|
+
*
|
|
210
|
+
* @example
|
|
211
|
+
* ```typescript
|
|
212
|
+
* const suite = createTestSuite('tests', {
|
|
213
|
+
* cases: [
|
|
214
|
+
* {
|
|
215
|
+
* input: 'refund policy',
|
|
216
|
+
* assertions: containsKeywords(['refund', 'return', 'policy'])
|
|
217
|
+
* }
|
|
218
|
+
* ]
|
|
219
|
+
* });
|
|
220
|
+
* ```
|
|
221
|
+
*/
|
|
222
|
+
export declare function containsKeywords(keywords: string[]): (output: string) => AssertionResult;
|
|
223
|
+
/**
|
|
224
|
+
* Helper to create pattern matching assertion
|
|
225
|
+
*
|
|
226
|
+
* @example
|
|
227
|
+
* ```typescript
|
|
228
|
+
* const suite = createTestSuite('tests', {
|
|
229
|
+
* cases: [
|
|
230
|
+
* {
|
|
231
|
+
* input: 'What time is it?',
|
|
232
|
+
* assertions: matchesPattern(/\d{1,2}:\d{2}/)
|
|
233
|
+
* }
|
|
234
|
+
* ]
|
|
235
|
+
* });
|
|
236
|
+
* ```
|
|
237
|
+
*/
|
|
238
|
+
export declare function matchesPattern(pattern: RegExp): (output: string) => AssertionResult;
|
|
239
|
+
/**
|
|
240
|
+
* Helper to create sentiment assertion
|
|
241
|
+
*
|
|
242
|
+
* @example
|
|
243
|
+
* ```typescript
|
|
244
|
+
* const suite = createTestSuite('tests', {
|
|
245
|
+
* cases: [
|
|
246
|
+
* {
|
|
247
|
+
* input: 'Thank you!',
|
|
248
|
+
* assertions: hasSentiment('positive')
|
|
249
|
+
* }
|
|
250
|
+
* ]
|
|
251
|
+
* });
|
|
252
|
+
* ```
|
|
253
|
+
*/
|
|
254
|
+
export declare function hasSentiment(sentiment: "positive" | "negative" | "neutral"): (output: string) => AssertionResult;
|
|
255
|
+
/**
|
|
256
|
+
* Helper to create length range assertion
|
|
257
|
+
*
|
|
258
|
+
* @example
|
|
259
|
+
* ```typescript
|
|
260
|
+
* const suite = createTestSuite('tests', {
|
|
261
|
+
* cases: [
|
|
262
|
+
* {
|
|
263
|
+
* input: 'Summarize this',
|
|
264
|
+
* assertions: hasLength({ min: 50, max: 200 })
|
|
265
|
+
* }
|
|
266
|
+
* ]
|
|
267
|
+
* });
|
|
268
|
+
* ```
|
|
269
|
+
*/
|
|
270
|
+
export declare function hasLength(range: {
|
|
271
|
+
min?: number;
|
|
272
|
+
max?: number;
|
|
273
|
+
}): (output: string) => AssertionResult;
|