@pauly4010/evalai-sdk 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,268 @@
1
+ "use strict";
2
+ /**
3
+ * Streaming & Batch Operations
4
+ * Tier 2.8: Handle large datasets efficiently
5
+ *
6
+ * @example
7
+ * ```typescript
8
+ * import { streamEvaluations, batchCreate } from '@ai-eval-platform/sdk';
9
+ *
10
+ * // Stream large evaluation results
11
+ * for await (const result of streamEvaluations(client, config)) {
12
+ * console.log(`Progress: ${result.completed}/${result.total}`);
13
+ * }
14
+ *
15
+ * // Batch create traces
16
+ * await batchCreate(client.traces, traces, { batchSize: 100 });
17
+ * ```
18
+ */
19
+ Object.defineProperty(exports, "__esModule", { value: true });
20
+ exports.RateLimiter = void 0;
21
+ exports.batchProcess = batchProcess;
22
+ exports.streamEvaluation = streamEvaluation;
23
+ exports.batchRead = batchRead;
24
+ exports.chunk = chunk;
25
+ /**
26
+ * Batch create items
27
+ *
28
+ * @example
29
+ * ```typescript
30
+ * const traces = [
31
+ * { name: 'trace-1', traceId: 'id-1' },
32
+ * { name: 'trace-2', traceId: 'id-2' },
33
+ * // ... 1000 more
34
+ * ];
35
+ *
36
+ * const result = await batchCreate(
37
+ * (item) => client.traces.create(item),
38
+ * traces,
39
+ * {
40
+ * batchSize: 100,
41
+ * onProgress: (p) => console.log(`${p.completed}/${p.total}`)
42
+ * }
43
+ * );
44
+ * ```
45
+ */
46
+ async function batchProcess(processor, items, options = {}) {
47
+ const { batchSize = 100, parallel = true, delayMs = 0, onProgress, onError, continueOnError = true } = options;
48
+ const result = {
49
+ successful: [],
50
+ failed: [],
51
+ summary: {
52
+ total: items.length,
53
+ successful: 0,
54
+ failed: 0
55
+ }
56
+ };
57
+ // Split into batches
58
+ const batches = [];
59
+ for (let i = 0; i < items.length; i += batchSize) {
60
+ batches.push(items.slice(i, i + batchSize));
61
+ }
62
+ // Process batches
63
+ for (let batchIndex = 0; batchIndex < batches.length; batchIndex++) {
64
+ const batch = batches[batchIndex];
65
+ const processBatch = async () => {
66
+ const batchPromises = batch.map(async (item, itemIndex) => {
67
+ try {
68
+ const output = await processor(item);
69
+ result.successful.push(output);
70
+ result.summary.successful++;
71
+ return { success: true, output };
72
+ }
73
+ catch (error) {
74
+ const batchError = {
75
+ batch: batchIndex,
76
+ index: itemIndex,
77
+ error: error instanceof Error ? error : new Error(String(error)),
78
+ item
79
+ };
80
+ result.failed.push({
81
+ item,
82
+ error: batchError.error
83
+ });
84
+ result.summary.failed++;
85
+ if (onError)
86
+ onError(batchError);
87
+ if (!continueOnError) {
88
+ throw error;
89
+ }
90
+ return { success: false, error };
91
+ }
92
+ });
93
+ if (parallel) {
94
+ await Promise.all(batchPromises);
95
+ }
96
+ else {
97
+ for (const promise of batchPromises) {
98
+ await promise;
99
+ }
100
+ }
101
+ };
102
+ await processBatch();
103
+ // Progress callback
104
+ if (onProgress) {
105
+ onProgress({
106
+ total: items.length,
107
+ completed: result.summary.successful + result.summary.failed,
108
+ failed: result.summary.failed,
109
+ batch: batchIndex + 1,
110
+ totalBatches: batches.length
111
+ });
112
+ }
113
+ // Delay between batches
114
+ if (delayMs > 0 && batchIndex < batches.length - 1) {
115
+ await new Promise(resolve => setTimeout(resolve, delayMs));
116
+ }
117
+ }
118
+ return result;
119
+ }
120
+ /**
121
+ * Stream evaluation results
122
+ *
123
+ * @example
124
+ * ```typescript
125
+ * const config = {
126
+ * cases: [...],
127
+ * executor: async (input) => callLLM(input)
128
+ * };
129
+ *
130
+ * for await (const result of streamEvaluation(config)) {
131
+ * console.log(`Case ${result.caseId}: ${result.passed ? 'PASS' : 'FAIL'}`);
132
+ * console.log(`Progress: ${result.completed}/${result.total}`);
133
+ * }
134
+ * ```
135
+ */
136
+ async function* streamEvaluation(config) {
137
+ const { cases, executor } = config;
138
+ let completed = 0;
139
+ for (const [index, testCase] of cases.entries()) {
140
+ try {
141
+ const result = await executor(testCase);
142
+ completed++;
143
+ yield {
144
+ caseId: `case-${index}`,
145
+ case: testCase,
146
+ result,
147
+ passed: true,
148
+ completed,
149
+ total: cases.length
150
+ };
151
+ }
152
+ catch (error) {
153
+ completed++;
154
+ yield {
155
+ caseId: `case-${index}`,
156
+ case: testCase,
157
+ result: error,
158
+ passed: false,
159
+ completed,
160
+ total: cases.length
161
+ };
162
+ }
163
+ }
164
+ }
165
+ /**
166
+ * Batch read with pagination
167
+ *
168
+ * @example
169
+ * ```typescript
170
+ * const allTraces = await batchRead(
171
+ * (params) => client.traces.list(params),
172
+ * { pageSize: 100 }
173
+ * );
174
+ * ```
175
+ */
176
+ async function batchRead(fetcher, options = {}) {
177
+ const { pageSize = 100, maxPages, onProgress } = options;
178
+ const allItems = [];
179
+ let page = 0;
180
+ let hasMore = true;
181
+ while (hasMore && (!maxPages || page < maxPages)) {
182
+ const items = await fetcher({
183
+ limit: pageSize,
184
+ offset: page * pageSize
185
+ });
186
+ if (items.length === 0) {
187
+ hasMore = false;
188
+ }
189
+ else {
190
+ allItems.push(...items);
191
+ page++;
192
+ if (onProgress) {
193
+ onProgress(page, allItems.length);
194
+ }
195
+ if (items.length < pageSize) {
196
+ hasMore = false;
197
+ }
198
+ }
199
+ }
200
+ return allItems;
201
+ }
202
+ /**
203
+ * Rate-limited batch processor
204
+ *
205
+ * @example
206
+ * ```typescript
207
+ * const limiter = new RateLimiter({ requestsPerSecond: 10 });
208
+ *
209
+ * for (const item of items) {
210
+ * await limiter.throttle(() => client.traces.create(item));
211
+ * }
212
+ * ```
213
+ */
214
+ class RateLimiter {
215
+ constructor(options) {
216
+ this.queue = [];
217
+ this.processing = false;
218
+ this.requestsPerSecond = options.requestsPerSecond;
219
+ this.interval = 1000 / options.requestsPerSecond;
220
+ }
221
+ /**
222
+ * Throttle a function call
223
+ */
224
+ async throttle(fn) {
225
+ return new Promise((resolve, reject) => {
226
+ this.queue.push(async () => {
227
+ try {
228
+ const result = await fn();
229
+ resolve(result);
230
+ }
231
+ catch (error) {
232
+ reject(error);
233
+ }
234
+ });
235
+ if (!this.processing) {
236
+ this.process();
237
+ }
238
+ });
239
+ }
240
+ async process() {
241
+ this.processing = true;
242
+ while (this.queue.length > 0) {
243
+ const fn = this.queue.shift();
244
+ if (fn) {
245
+ await fn();
246
+ await new Promise(resolve => setTimeout(resolve, this.interval));
247
+ }
248
+ }
249
+ this.processing = false;
250
+ }
251
+ }
252
+ exports.RateLimiter = RateLimiter;
253
+ /**
254
+ * Chunk array into smaller arrays
255
+ *
256
+ * @example
257
+ * ```typescript
258
+ * const chunks = chunk([1, 2, 3, 4, 5], 2);
259
+ * // [[1, 2], [3, 4], [5]]
260
+ * ```
261
+ */
262
+ function chunk(array, size) {
263
+ const chunks = [];
264
+ for (let i = 0; i < array.length; i += size) {
265
+ chunks.push(array.slice(i, i + size));
266
+ }
267
+ return chunks;
268
+ }
@@ -0,0 +1,204 @@
1
+ /**
2
+ * Test Suite Builder
3
+ * Tier 2.7: Declarative test definitions
4
+ *
5
+ * @example
6
+ * ```typescript
7
+ * import { createTestSuite, expect } from '@ai-eval-platform/sdk';
8
+ *
9
+ * const suite = createTestSuite('chatbot-responses', {
10
+ * cases: [
11
+ * {
12
+ * input: 'Hello',
13
+ * assertions: [
14
+ * (output) => expect(output).toContain('greeting'),
15
+ * (output) => expect(output).toHaveSentiment('positive')
16
+ * ]
17
+ * }
18
+ * ]
19
+ * });
20
+ *
21
+ * const results = await suite.run();
22
+ * ```
23
+ */
24
+ import { AssertionResult } from './assertions';
25
+ /**
26
+ * Test suite case definition (different from API TestCase type)
27
+ * Use this for defining test cases in test suites with assertions
28
+ */
29
+ export interface TestSuiteCase {
30
+ /** Unique identifier for the test case */
31
+ id?: string;
32
+ /** Input to the LLM */
33
+ input: string;
34
+ /** Expected output (optional) */
35
+ expected?: string;
36
+ /** Metadata for the test case */
37
+ metadata?: Record<string, any>;
38
+ /** Assertion functions to run */
39
+ assertions?: ((output: string) => AssertionResult)[];
40
+ }
41
+ /** @deprecated Use TestSuiteCase instead to avoid confusion with API TestCase type */
42
+ export type TestCase = TestSuiteCase;
43
+ export interface TestSuiteConfig {
44
+ /** Test cases to run */
45
+ cases: TestSuiteCase[];
46
+ /** Function that generates output from input */
47
+ executor?: (input: string) => Promise<string>;
48
+ /** Run tests in parallel (default: true) */
49
+ parallel?: boolean;
50
+ /** Stop on first failure (default: false) */
51
+ stopOnFailure?: boolean;
52
+ /** Timeout per test case in ms (default: 30000) */
53
+ timeout?: number;
54
+ }
55
+ export interface TestSuiteCaseResult {
56
+ /** Test case ID */
57
+ id: string;
58
+ /** Input that was tested */
59
+ input: string;
60
+ /** Expected output */
61
+ expected?: string;
62
+ /** Actual output */
63
+ actual: string;
64
+ /** Whether test passed */
65
+ passed: boolean;
66
+ /** Assertion results */
67
+ assertions: AssertionResult[];
68
+ /** Duration in milliseconds */
69
+ durationMs: number;
70
+ /** Error if test failed to execute */
71
+ error?: string;
72
+ }
73
+ /** @deprecated Use TestSuiteCaseResult instead */
74
+ export type TestCaseResult = TestSuiteCaseResult;
75
+ export interface TestSuiteResult {
76
+ /** Suite name */
77
+ name: string;
78
+ /** Total number of test cases */
79
+ total: number;
80
+ /** Number of passed tests */
81
+ passed: number;
82
+ /** Number of failed tests */
83
+ failed: number;
84
+ /** Total duration in milliseconds */
85
+ durationMs: number;
86
+ /** Individual test results */
87
+ results: TestSuiteCaseResult[];
88
+ }
89
+ /**
90
+ * Test Suite for declarative evaluation testing
91
+ */
92
+ export declare class TestSuite {
93
+ private name;
94
+ private config;
95
+ constructor(name: string, config: TestSuiteConfig);
96
+ /**
97
+ * Run all test cases
98
+ *
99
+ * @example
100
+ * ```typescript
101
+ * const results = await suite.run();
102
+ * console.log(`${results.passed}/${results.total} tests passed`);
103
+ * ```
104
+ */
105
+ run(): Promise<TestSuiteResult>;
106
+ /**
107
+ * Add a test case to the suite
108
+ */
109
+ addCase(testCase: TestSuiteCase): void;
110
+ /**
111
+ * Get suite configuration
112
+ */
113
+ getConfig(): TestSuiteConfig;
114
+ }
115
+ /**
116
+ * Create a test suite
117
+ *
118
+ * @example
119
+ * ```typescript
120
+ * const suite = createTestSuite('my-tests', {
121
+ * cases: [
122
+ * {
123
+ * input: 'Hello',
124
+ * assertions: [
125
+ * (output) => expect(output).toContain('hi'),
126
+ * (output) => expect(output).toHaveSentiment('positive')
127
+ * ]
128
+ * }
129
+ * ],
130
+ * executor: async (input) => {
131
+ * // Your LLM call here
132
+ * return callLLM(input);
133
+ * }
134
+ * });
135
+ * ```
136
+ */
137
+ export declare function createTestSuite(name: string, config: TestSuiteConfig): TestSuite;
138
+ /**
139
+ * Helper to create assertions from expected keywords
140
+ *
141
+ * @example
142
+ * ```typescript
143
+ * const suite = createTestSuite('tests', {
144
+ * cases: [
145
+ * {
146
+ * input: 'refund policy',
147
+ * assertions: containsKeywords(['refund', 'return', 'policy'])
148
+ * }
149
+ * ]
150
+ * });
151
+ * ```
152
+ */
153
+ export declare function containsKeywords(keywords: string[]): (output: string) => AssertionResult;
154
+ /**
155
+ * Helper to create pattern matching assertion
156
+ *
157
+ * @example
158
+ * ```typescript
159
+ * const suite = createTestSuite('tests', {
160
+ * cases: [
161
+ * {
162
+ * input: 'What time is it?',
163
+ * assertions: matchesPattern(/\d{1,2}:\d{2}/)
164
+ * }
165
+ * ]
166
+ * });
167
+ * ```
168
+ */
169
+ export declare function matchesPattern(pattern: RegExp): (output: string) => AssertionResult;
170
+ /**
171
+ * Helper to create sentiment assertion
172
+ *
173
+ * @example
174
+ * ```typescript
175
+ * const suite = createTestSuite('tests', {
176
+ * cases: [
177
+ * {
178
+ * input: 'Thank you!',
179
+ * assertions: hasSentiment('positive')
180
+ * }
181
+ * ]
182
+ * });
183
+ * ```
184
+ */
185
+ export declare function hasSentiment(sentiment: 'positive' | 'negative' | 'neutral'): (output: string) => AssertionResult;
186
+ /**
187
+ * Helper to create length range assertion
188
+ *
189
+ * @example
190
+ * ```typescript
191
+ * const suite = createTestSuite('tests', {
192
+ * cases: [
193
+ * {
194
+ * input: 'Summarize this',
195
+ * assertions: hasLength({ min: 50, max: 200 })
196
+ * }
197
+ * ]
198
+ * });
199
+ * ```
200
+ */
201
+ export declare function hasLength(range: {
202
+ min?: number;
203
+ max?: number;
204
+ }): (output: string) => AssertionResult;
@@ -0,0 +1,252 @@
1
+ "use strict";
2
+ /**
3
+ * Test Suite Builder
4
+ * Tier 2.7: Declarative test definitions
5
+ *
6
+ * @example
7
+ * ```typescript
8
+ * import { createTestSuite, expect } from '@ai-eval-platform/sdk';
9
+ *
10
+ * const suite = createTestSuite('chatbot-responses', {
11
+ * cases: [
12
+ * {
13
+ * input: 'Hello',
14
+ * assertions: [
15
+ * (output) => expect(output).toContain('greeting'),
16
+ * (output) => expect(output).toHaveSentiment('positive')
17
+ * ]
18
+ * }
19
+ * ]
20
+ * });
21
+ *
22
+ * const results = await suite.run();
23
+ * ```
24
+ */
25
+ Object.defineProperty(exports, "__esModule", { value: true });
26
+ exports.TestSuite = void 0;
27
+ exports.createTestSuite = createTestSuite;
28
+ exports.containsKeywords = containsKeywords;
29
+ exports.matchesPattern = matchesPattern;
30
+ exports.hasSentiment = hasSentiment;
31
+ exports.hasLength = hasLength;
32
+ const assertions_1 = require("./assertions");
33
+ /**
34
+ * Test Suite for declarative evaluation testing
35
+ */
36
+ class TestSuite {
37
+ constructor(name, config) {
38
+ this.name = name;
39
+ this.config = config;
40
+ }
41
+ /**
42
+ * Run all test cases
43
+ *
44
+ * @example
45
+ * ```typescript
46
+ * const results = await suite.run();
47
+ * console.log(`${results.passed}/${results.total} tests passed`);
48
+ * ```
49
+ */
50
+ async run() {
51
+ const startTime = Date.now();
52
+ const results = [];
53
+ const runTestCase = async (testCase, index) => {
54
+ const caseStartTime = Date.now();
55
+ const id = testCase.id || `case-${index}`;
56
+ try {
57
+ // Execute to get output
58
+ let actual;
59
+ if (this.config.executor) {
60
+ const timeout = this.config.timeout || 30000;
61
+ const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error(`Test timeout after ${timeout}ms`)), timeout));
62
+ actual = await Promise.race([
63
+ this.config.executor(testCase.input),
64
+ timeoutPromise
65
+ ]);
66
+ }
67
+ else if (testCase.expected) {
68
+ actual = testCase.expected; // Use expected as actual if no executor
69
+ }
70
+ else {
71
+ throw new Error('No executor provided and no expected output');
72
+ }
73
+ // Run assertions
74
+ const assertions = [];
75
+ let allPassed = true;
76
+ // Run custom assertions
77
+ if (testCase.assertions) {
78
+ for (const assertion of testCase.assertions) {
79
+ const result = assertion(actual);
80
+ assertions.push(result);
81
+ if (!result.passed)
82
+ allPassed = false;
83
+ }
84
+ }
85
+ // Default equality check if expected provided
86
+ if (testCase.expected && !testCase.assertions) {
87
+ const result = (0, assertions_1.expect)(actual).toEqual(testCase.expected);
88
+ assertions.push(result);
89
+ if (!result.passed)
90
+ allPassed = false;
91
+ }
92
+ const durationMs = Date.now() - caseStartTime;
93
+ return {
94
+ id,
95
+ input: testCase.input,
96
+ expected: testCase.expected,
97
+ actual,
98
+ passed: allPassed,
99
+ assertions,
100
+ durationMs
101
+ };
102
+ }
103
+ catch (error) {
104
+ const durationMs = Date.now() - caseStartTime;
105
+ return {
106
+ id,
107
+ input: testCase.input,
108
+ expected: testCase.expected,
109
+ actual: '',
110
+ passed: false,
111
+ assertions: [],
112
+ durationMs,
113
+ error: error instanceof Error ? error.message : String(error)
114
+ };
115
+ }
116
+ };
117
+ // Run tests
118
+ if (this.config.parallel) {
119
+ results.push(...await Promise.all(this.config.cases.map((tc, i) => runTestCase(tc, i))));
120
+ }
121
+ else {
122
+ for (let i = 0; i < this.config.cases.length; i++) {
123
+ const result = await runTestCase(this.config.cases[i], i);
124
+ results.push(result);
125
+ if (this.config.stopOnFailure && !result.passed) {
126
+ break;
127
+ }
128
+ }
129
+ }
130
+ const durationMs = Date.now() - startTime;
131
+ const passed = results.filter(r => r.passed).length;
132
+ const failed = results.filter(r => !r.passed).length;
133
+ return {
134
+ name: this.name,
135
+ total: results.length,
136
+ passed,
137
+ failed,
138
+ durationMs,
139
+ results
140
+ };
141
+ }
142
+ /**
143
+ * Add a test case to the suite
144
+ */
145
+ addCase(testCase) {
146
+ this.config.cases.push(testCase);
147
+ }
148
+ /**
149
+ * Get suite configuration
150
+ */
151
+ getConfig() {
152
+ return { ...this.config };
153
+ }
154
+ }
155
+ exports.TestSuite = TestSuite;
156
+ /**
157
+ * Create a test suite
158
+ *
159
+ * @example
160
+ * ```typescript
161
+ * const suite = createTestSuite('my-tests', {
162
+ * cases: [
163
+ * {
164
+ * input: 'Hello',
165
+ * assertions: [
166
+ * (output) => expect(output).toContain('hi'),
167
+ * (output) => expect(output).toHaveSentiment('positive')
168
+ * ]
169
+ * }
170
+ * ],
171
+ * executor: async (input) => {
172
+ * // Your LLM call here
173
+ * return callLLM(input);
174
+ * }
175
+ * });
176
+ * ```
177
+ */
178
+ function createTestSuite(name, config) {
179
+ return new TestSuite(name, config);
180
+ }
181
+ /**
182
+ * Helper to create assertions from expected keywords
183
+ *
184
+ * @example
185
+ * ```typescript
186
+ * const suite = createTestSuite('tests', {
187
+ * cases: [
188
+ * {
189
+ * input: 'refund policy',
190
+ * assertions: containsKeywords(['refund', 'return', 'policy'])
191
+ * }
192
+ * ]
193
+ * });
194
+ * ```
195
+ */
196
+ function containsKeywords(keywords) {
197
+ return (output) => (0, assertions_1.expect)(output).toContainKeywords(keywords);
198
+ }
199
+ /**
200
+ * Helper to create pattern matching assertion
201
+ *
202
+ * @example
203
+ * ```typescript
204
+ * const suite = createTestSuite('tests', {
205
+ * cases: [
206
+ * {
207
+ * input: 'What time is it?',
208
+ * assertions: matchesPattern(/\d{1,2}:\d{2}/)
209
+ * }
210
+ * ]
211
+ * });
212
+ * ```
213
+ */
214
+ function matchesPattern(pattern) {
215
+ return (output) => (0, assertions_1.expect)(output).toMatchPattern(pattern);
216
+ }
217
+ /**
218
+ * Helper to create sentiment assertion
219
+ *
220
+ * @example
221
+ * ```typescript
222
+ * const suite = createTestSuite('tests', {
223
+ * cases: [
224
+ * {
225
+ * input: 'Thank you!',
226
+ * assertions: hasSentiment('positive')
227
+ * }
228
+ * ]
229
+ * });
230
+ * ```
231
+ */
232
+ function hasSentiment(sentiment) {
233
+ return (output) => (0, assertions_1.expect)(output).toHaveSentiment(sentiment);
234
+ }
235
+ /**
236
+ * Helper to create length range assertion
237
+ *
238
+ * @example
239
+ * ```typescript
240
+ * const suite = createTestSuite('tests', {
241
+ * cases: [
242
+ * {
243
+ * input: 'Summarize this',
244
+ * assertions: hasLength({ min: 50, max: 200 })
245
+ * }
246
+ * ]
247
+ * });
248
+ * ```
249
+ */
250
+ function hasLength(range) {
251
+ return (output) => (0, assertions_1.expect)(output).toHaveLength(range);
252
+ }