@pauly4010/evalai-sdk 1.4.1 → 1.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +85 -0
- package/README.md +205 -543
- package/dist/assertions.d.ts +2 -2
- package/dist/assertions.js +104 -71
- package/dist/batch.js +12 -17
- package/dist/cache.js +7 -11
- package/dist/cli/api.d.ts +108 -0
- package/dist/cli/api.js +130 -0
- package/dist/cli/check.d.ts +28 -13
- package/dist/cli/check.js +249 -142
- package/dist/cli/ci-context.d.ts +6 -0
- package/dist/cli/ci-context.js +110 -0
- package/dist/cli/config.d.ts +30 -0
- package/dist/cli/config.js +207 -0
- package/dist/cli/constants.d.ts +15 -0
- package/dist/cli/constants.js +18 -0
- package/dist/cli/doctor.d.ts +11 -0
- package/dist/cli/doctor.js +82 -0
- package/dist/cli/formatters/github.d.ts +8 -0
- package/dist/cli/formatters/github.js +130 -0
- package/dist/cli/formatters/human.d.ts +6 -0
- package/dist/cli/formatters/human.js +107 -0
- package/dist/cli/formatters/json.d.ts +6 -0
- package/dist/cli/formatters/json.js +10 -0
- package/dist/cli/formatters/pr-comment.d.ts +12 -0
- package/dist/cli/formatters/pr-comment.js +101 -0
- package/dist/cli/formatters/types.d.ts +100 -0
- package/dist/cli/formatters/types.js +5 -0
- package/dist/cli/gate.d.ts +21 -0
- package/dist/cli/gate.js +175 -0
- package/dist/cli/index.d.ts +1 -0
- package/dist/cli/index.js +67 -23
- package/dist/cli/init.d.ts +7 -0
- package/dist/cli/init.js +69 -0
- package/dist/cli/policy-packs.d.ts +23 -0
- package/dist/cli/policy-packs.js +83 -0
- package/dist/cli/profiles.d.ts +28 -0
- package/dist/cli/profiles.js +30 -0
- package/dist/cli/reason-codes.d.ts +17 -0
- package/dist/cli/reason-codes.js +19 -0
- package/dist/cli/render/snippet.d.ts +5 -0
- package/dist/cli/render/snippet.js +15 -0
- package/dist/cli/render/sort.d.ts +10 -0
- package/dist/cli/render/sort.js +24 -0
- package/dist/cli/report/build-check-report.d.ts +19 -0
- package/dist/cli/report/build-check-report.js +124 -0
- package/dist/cli/share.d.ts +17 -0
- package/dist/cli/share.js +83 -0
- package/dist/client.d.ts +2 -2
- package/dist/client.js +144 -132
- package/dist/context.d.ts +1 -1
- package/dist/context.js +4 -6
- package/dist/errors.d.ts +2 -0
- package/dist/errors.js +116 -107
- package/dist/export.d.ts +6 -6
- package/dist/export.js +39 -33
- package/dist/index.d.ts +25 -24
- package/dist/index.js +62 -56
- package/dist/integrations/anthropic.d.ts +1 -1
- package/dist/integrations/anthropic.js +23 -19
- package/dist/integrations/openai-eval.d.ts +57 -0
- package/dist/integrations/openai-eval.js +230 -0
- package/dist/integrations/openai.d.ts +1 -1
- package/dist/integrations/openai.js +23 -19
- package/dist/local.d.ts +2 -2
- package/dist/local.js +25 -25
- package/dist/logger.d.ts +1 -1
- package/dist/logger.js +24 -28
- package/dist/matchers/index.d.ts +1 -0
- package/dist/matchers/index.js +6 -0
- package/dist/matchers/to-pass-gate.d.ts +29 -0
- package/dist/matchers/to-pass-gate.js +35 -0
- package/dist/pagination.d.ts +1 -1
- package/dist/pagination.js +6 -6
- package/dist/snapshot.js +24 -24
- package/dist/streaming.js +11 -11
- package/dist/testing.d.ts +6 -2
- package/dist/testing.js +30 -12
- package/dist/types.d.ts +22 -22
- package/dist/types.js +13 -13
- package/dist/utils/input-hash.d.ts +8 -0
- package/dist/utils/input-hash.js +38 -0
- package/dist/version.d.ts +7 -0
- package/dist/version.js +10 -0
- package/dist/workflows.d.ts +7 -7
- package/dist/workflows.js +44 -44
- package/package.json +102 -90
- package/dist/__tests__/assertions.test.d.ts +0 -1
- package/dist/__tests__/assertions.test.js +0 -288
- package/dist/__tests__/client.test.d.ts +0 -1
- package/dist/__tests__/client.test.js +0 -185
- package/dist/__tests__/testing.test.d.ts +0 -1
- package/dist/__tests__/testing.test.js +0 -230
- package/dist/__tests__/workflows.test.d.ts +0 -1
- package/dist/__tests__/workflows.test.js +0 -222
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vitest/Jest matcher: expect(result).toPassGate()
|
|
3
|
+
* Use with openAIChatEval: expect(await openAIChatEval(...)).toPassGate()
|
|
4
|
+
*
|
|
5
|
+
* @example
|
|
6
|
+
* ```ts
|
|
7
|
+
* import { openAIChatEval } from '@pauly4010/evalai-sdk';
|
|
8
|
+
* import { expect } from 'vitest';
|
|
9
|
+
* import { extendExpectWithToPassGate } from '@pauly4010/evalai-sdk/matchers';
|
|
10
|
+
*
|
|
11
|
+
* extendExpectWithToPassGate(expect);
|
|
12
|
+
*
|
|
13
|
+
* it('passes gate', async () => {
|
|
14
|
+
* const result = await openAIChatEval({ name: 'test', cases: [...] });
|
|
15
|
+
* expect(result).toPassGate();
|
|
16
|
+
* });
|
|
17
|
+
* ```
|
|
18
|
+
*/
|
|
19
|
+
import type { OpenAIChatEvalResult } from "../integrations/openai-eval";
|
|
20
|
+
export declare function toPassGate(this: {
|
|
21
|
+
isNot?: boolean;
|
|
22
|
+
}, received: OpenAIChatEvalResult): {
|
|
23
|
+
pass: boolean;
|
|
24
|
+
message: () => string;
|
|
25
|
+
};
|
|
26
|
+
/** Register toPassGate matcher with expect. Call in test setup. */
|
|
27
|
+
export declare function extendExpectWithToPassGate(expect: {
|
|
28
|
+
extend: (matchers: object) => void;
|
|
29
|
+
}): void;
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Vitest/Jest matcher: expect(result).toPassGate()
|
|
4
|
+
* Use with openAIChatEval: expect(await openAIChatEval(...)).toPassGate()
|
|
5
|
+
*
|
|
6
|
+
* @example
|
|
7
|
+
* ```ts
|
|
8
|
+
* import { openAIChatEval } from '@pauly4010/evalai-sdk';
|
|
9
|
+
* import { expect } from 'vitest';
|
|
10
|
+
* import { extendExpectWithToPassGate } from '@pauly4010/evalai-sdk/matchers';
|
|
11
|
+
*
|
|
12
|
+
* extendExpectWithToPassGate(expect);
|
|
13
|
+
*
|
|
14
|
+
* it('passes gate', async () => {
|
|
15
|
+
* const result = await openAIChatEval({ name: 'test', cases: [...] });
|
|
16
|
+
* expect(result).toPassGate();
|
|
17
|
+
* });
|
|
18
|
+
* ```
|
|
19
|
+
*/
|
|
20
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
21
|
+
exports.toPassGate = toPassGate;
|
|
22
|
+
exports.extendExpectWithToPassGate = extendExpectWithToPassGate;
|
|
23
|
+
function toPassGate(received) {
|
|
24
|
+
const passed = received.passed === received.total && received.total > 0;
|
|
25
|
+
const isNot = this.isNot ?? false;
|
|
26
|
+
const success = isNot ? !passed : passed;
|
|
27
|
+
const message = () => isNot
|
|
28
|
+
? `Expected result not to pass gate (${received.passed}/${received.total} passed, score ${received.score})`
|
|
29
|
+
: `Expected result to pass gate but ${received.passed}/${received.total} passed (score ${received.score})`;
|
|
30
|
+
return { pass: success, message };
|
|
31
|
+
}
|
|
32
|
+
/** Register toPassGate matcher with expect. Call in test setup. */
|
|
33
|
+
function extendExpectWithToPassGate(expect) {
|
|
34
|
+
expect.extend({ toPassGate });
|
|
35
|
+
}
|
package/dist/pagination.d.ts
CHANGED
|
@@ -64,7 +64,7 @@ export declare function decodeCursor(cursor: string): any;
|
|
|
64
64
|
/**
|
|
65
65
|
* Create pagination metadata from response
|
|
66
66
|
*/
|
|
67
|
-
export declare function createPaginationMeta<T>(items: T[], limit: number, offset: number, total?: number): PaginatedResponse<T>[
|
|
67
|
+
export declare function createPaginationMeta<T>(items: T[], limit: number, offset: number, total?: number): PaginatedResponse<T>["pagination"];
|
|
68
68
|
/**
|
|
69
69
|
* Parse pagination params from cursor or offset
|
|
70
70
|
*/
|
package/dist/pagination.js
CHANGED
|
@@ -78,11 +78,11 @@ async function* autoPaginate(fetchFn, limit = 50) {
|
|
|
78
78
|
*/
|
|
79
79
|
function encodeCursor(data) {
|
|
80
80
|
const json = JSON.stringify(data);
|
|
81
|
-
if (typeof globalThis !==
|
|
81
|
+
if (typeof globalThis !== "undefined" && "btoa" in globalThis) {
|
|
82
82
|
return globalThis.btoa(json);
|
|
83
83
|
}
|
|
84
84
|
else {
|
|
85
|
-
return Buffer.from(json).toString(
|
|
85
|
+
return Buffer.from(json).toString("base64");
|
|
86
86
|
}
|
|
87
87
|
}
|
|
88
88
|
/**
|
|
@@ -91,16 +91,16 @@ function encodeCursor(data) {
|
|
|
91
91
|
function decodeCursor(cursor) {
|
|
92
92
|
try {
|
|
93
93
|
let json;
|
|
94
|
-
if (typeof globalThis !==
|
|
94
|
+
if (typeof globalThis !== "undefined" && "atob" in globalThis) {
|
|
95
95
|
json = globalThis.atob(cursor);
|
|
96
96
|
}
|
|
97
97
|
else {
|
|
98
|
-
json = Buffer.from(cursor,
|
|
98
|
+
json = Buffer.from(cursor, "base64").toString("utf-8");
|
|
99
99
|
}
|
|
100
100
|
return JSON.parse(json);
|
|
101
101
|
}
|
|
102
|
-
catch (
|
|
103
|
-
throw new Error(
|
|
102
|
+
catch (_error) {
|
|
103
|
+
throw new Error("Invalid cursor format");
|
|
104
104
|
}
|
|
105
105
|
}
|
|
106
106
|
/**
|
package/dist/snapshot.js
CHANGED
|
@@ -58,19 +58,19 @@ exports.compareWithSnapshot = compareWithSnapshot;
|
|
|
58
58
|
exports.deleteSnapshot = deleteSnapshot;
|
|
59
59
|
exports.listSnapshots = listSnapshots;
|
|
60
60
|
// Environment check
|
|
61
|
-
const isNode = typeof process !==
|
|
61
|
+
const isNode = typeof process !== "undefined" && process.versions?.node;
|
|
62
62
|
if (!isNode) {
|
|
63
|
-
throw new Error(
|
|
64
|
-
|
|
63
|
+
throw new Error("Snapshot testing requires Node.js and cannot run in browsers. " +
|
|
64
|
+
"This feature uses the filesystem for storing snapshots.");
|
|
65
65
|
}
|
|
66
|
-
const
|
|
67
|
-
const
|
|
68
|
-
const
|
|
66
|
+
const crypto = __importStar(require("node:crypto"));
|
|
67
|
+
const fs = __importStar(require("node:fs"));
|
|
68
|
+
const path = __importStar(require("node:path"));
|
|
69
69
|
/**
|
|
70
70
|
* Snapshot manager
|
|
71
71
|
*/
|
|
72
72
|
class SnapshotManager {
|
|
73
|
-
constructor(snapshotDir =
|
|
73
|
+
constructor(snapshotDir = "./.snapshots") {
|
|
74
74
|
this.snapshotDir = snapshotDir;
|
|
75
75
|
this.ensureSnapshotDir();
|
|
76
76
|
}
|
|
@@ -88,24 +88,24 @@ class SnapshotManager {
|
|
|
88
88
|
getSnapshotPath(name) {
|
|
89
89
|
// Security: prevent empty names
|
|
90
90
|
if (!name || name.trim().length === 0) {
|
|
91
|
-
throw new Error(
|
|
91
|
+
throw new Error("Snapshot name cannot be empty");
|
|
92
92
|
}
|
|
93
93
|
// Security: prevent path traversal
|
|
94
|
-
if (name.includes(
|
|
94
|
+
if (name.includes("..") || name.includes("/") || name.includes("\\")) {
|
|
95
95
|
throw new Error('Snapshot name cannot contain path separators or ".."');
|
|
96
96
|
}
|
|
97
97
|
// Sanitize to alphanumeric, hyphens, and underscores
|
|
98
|
-
const sanitized = name.replace(/[^a-zA-Z0-9-_]/g,
|
|
98
|
+
const sanitized = name.replace(/[^a-zA-Z0-9-_]/g, "-");
|
|
99
99
|
// Security: ensure sanitized name is not empty
|
|
100
100
|
if (sanitized.length === 0) {
|
|
101
|
-
throw new Error(
|
|
101
|
+
throw new Error("Snapshot name must contain at least one alphanumeric character");
|
|
102
102
|
}
|
|
103
103
|
// Security: prevent absolute paths
|
|
104
104
|
const filePath = path.join(this.snapshotDir, `${sanitized}.json`);
|
|
105
105
|
const resolvedPath = path.resolve(filePath);
|
|
106
106
|
const resolvedDir = path.resolve(this.snapshotDir);
|
|
107
107
|
if (!resolvedPath.startsWith(resolvedDir)) {
|
|
108
|
-
throw new Error(
|
|
108
|
+
throw new Error("Invalid snapshot path: path traversal detected");
|
|
109
109
|
}
|
|
110
110
|
return filePath;
|
|
111
111
|
}
|
|
@@ -113,7 +113,7 @@ class SnapshotManager {
|
|
|
113
113
|
* Generate content hash
|
|
114
114
|
*/
|
|
115
115
|
generateHash(content) {
|
|
116
|
-
return crypto.createHash(
|
|
116
|
+
return crypto.createHash("sha256").update(content).digest("hex");
|
|
117
117
|
}
|
|
118
118
|
/**
|
|
119
119
|
* Save a snapshot
|
|
@@ -137,8 +137,8 @@ class SnapshotManager {
|
|
|
137
137
|
createdAt: new Date().toISOString(),
|
|
138
138
|
hash: this.generateHash(output),
|
|
139
139
|
tags: options?.tags,
|
|
140
|
-
metadata: options?.metadata
|
|
141
|
-
}
|
|
140
|
+
metadata: options?.metadata,
|
|
141
|
+
},
|
|
142
142
|
};
|
|
143
143
|
fs.writeFileSync(filePath, JSON.stringify(snapshotData, null, 2));
|
|
144
144
|
return snapshotData;
|
|
@@ -157,7 +157,7 @@ class SnapshotManager {
|
|
|
157
157
|
if (!fs.existsSync(filePath)) {
|
|
158
158
|
throw new Error(`Snapshot '${name}' not found`);
|
|
159
159
|
}
|
|
160
|
-
const content = fs.readFileSync(filePath,
|
|
160
|
+
const content = fs.readFileSync(filePath, "utf-8");
|
|
161
161
|
return JSON.parse(content);
|
|
162
162
|
}
|
|
163
163
|
/**
|
|
@@ -177,14 +177,14 @@ class SnapshotManager {
|
|
|
177
177
|
// Exact match check
|
|
178
178
|
const exactMatch = original === currentOutput;
|
|
179
179
|
// Calculate similarity (simple line-based diff)
|
|
180
|
-
const originalLines = original.split(
|
|
181
|
-
const currentLines = currentOutput.split(
|
|
180
|
+
const originalLines = original.split("\n");
|
|
181
|
+
const currentLines = currentOutput.split("\n");
|
|
182
182
|
const differences = [];
|
|
183
183
|
const maxLines = Math.max(originalLines.length, currentLines.length);
|
|
184
184
|
let matchingLines = 0;
|
|
185
185
|
for (let i = 0; i < maxLines; i++) {
|
|
186
|
-
const origLine = originalLines[i] ||
|
|
187
|
-
const currLine = currentLines[i] ||
|
|
186
|
+
const origLine = originalLines[i] || "";
|
|
187
|
+
const currLine = currentLines[i] || "";
|
|
188
188
|
if (origLine === currLine) {
|
|
189
189
|
matchingLines++;
|
|
190
190
|
}
|
|
@@ -198,7 +198,7 @@ class SnapshotManager {
|
|
|
198
198
|
similarity,
|
|
199
199
|
differences,
|
|
200
200
|
original,
|
|
201
|
-
current: currentOutput
|
|
201
|
+
current: currentOutput,
|
|
202
202
|
};
|
|
203
203
|
}
|
|
204
204
|
/**
|
|
@@ -214,8 +214,8 @@ class SnapshotManager {
|
|
|
214
214
|
const files = fs.readdirSync(this.snapshotDir);
|
|
215
215
|
const snapshots = [];
|
|
216
216
|
for (const file of files) {
|
|
217
|
-
if (file.endsWith(
|
|
218
|
-
const content = fs.readFileSync(path.join(this.snapshotDir, file),
|
|
217
|
+
if (file.endsWith(".json")) {
|
|
218
|
+
const content = fs.readFileSync(path.join(this.snapshotDir, file), "utf-8");
|
|
219
219
|
snapshots.push(JSON.parse(content));
|
|
220
220
|
}
|
|
221
221
|
}
|
|
@@ -249,7 +249,7 @@ class SnapshotManager {
|
|
|
249
249
|
return this.save(name, output, {
|
|
250
250
|
tags: existing.metadata.tags,
|
|
251
251
|
metadata: existing.metadata.metadata,
|
|
252
|
-
overwrite: true
|
|
252
|
+
overwrite: true,
|
|
253
253
|
});
|
|
254
254
|
}
|
|
255
255
|
}
|
package/dist/streaming.js
CHANGED
|
@@ -44,15 +44,15 @@ exports.chunk = chunk;
|
|
|
44
44
|
* ```
|
|
45
45
|
*/
|
|
46
46
|
async function batchProcess(processor, items, options = {}) {
|
|
47
|
-
const { batchSize = 100, parallel = true, delayMs = 0, onProgress, onError, continueOnError = true } = options;
|
|
47
|
+
const { batchSize = 100, parallel = true, delayMs = 0, onProgress, onError, continueOnError = true, } = options;
|
|
48
48
|
const result = {
|
|
49
49
|
successful: [],
|
|
50
50
|
failed: [],
|
|
51
51
|
summary: {
|
|
52
52
|
total: items.length,
|
|
53
53
|
successful: 0,
|
|
54
|
-
failed: 0
|
|
55
|
-
}
|
|
54
|
+
failed: 0,
|
|
55
|
+
},
|
|
56
56
|
};
|
|
57
57
|
// Split into batches
|
|
58
58
|
const batches = [];
|
|
@@ -75,11 +75,11 @@ async function batchProcess(processor, items, options = {}) {
|
|
|
75
75
|
batch: batchIndex,
|
|
76
76
|
index: itemIndex,
|
|
77
77
|
error: error instanceof Error ? error : new Error(String(error)),
|
|
78
|
-
item
|
|
78
|
+
item,
|
|
79
79
|
};
|
|
80
80
|
result.failed.push({
|
|
81
81
|
item,
|
|
82
|
-
error: batchError.error
|
|
82
|
+
error: batchError.error,
|
|
83
83
|
});
|
|
84
84
|
result.summary.failed++;
|
|
85
85
|
if (onError)
|
|
@@ -107,12 +107,12 @@ async function batchProcess(processor, items, options = {}) {
|
|
|
107
107
|
completed: result.summary.successful + result.summary.failed,
|
|
108
108
|
failed: result.summary.failed,
|
|
109
109
|
batch: batchIndex + 1,
|
|
110
|
-
totalBatches: batches.length
|
|
110
|
+
totalBatches: batches.length,
|
|
111
111
|
});
|
|
112
112
|
}
|
|
113
113
|
// Delay between batches
|
|
114
114
|
if (delayMs > 0 && batchIndex < batches.length - 1) {
|
|
115
|
-
await new Promise(resolve => setTimeout(resolve, delayMs));
|
|
115
|
+
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
|
116
116
|
}
|
|
117
117
|
}
|
|
118
118
|
return result;
|
|
@@ -146,7 +146,7 @@ async function* streamEvaluation(config) {
|
|
|
146
146
|
result,
|
|
147
147
|
passed: true,
|
|
148
148
|
completed,
|
|
149
|
-
total: cases.length
|
|
149
|
+
total: cases.length,
|
|
150
150
|
};
|
|
151
151
|
}
|
|
152
152
|
catch (error) {
|
|
@@ -157,7 +157,7 @@ async function* streamEvaluation(config) {
|
|
|
157
157
|
result: error,
|
|
158
158
|
passed: false,
|
|
159
159
|
completed,
|
|
160
|
-
total: cases.length
|
|
160
|
+
total: cases.length,
|
|
161
161
|
};
|
|
162
162
|
}
|
|
163
163
|
}
|
|
@@ -181,7 +181,7 @@ async function batchRead(fetcher, options = {}) {
|
|
|
181
181
|
while (hasMore && (!maxPages || page < maxPages)) {
|
|
182
182
|
const items = await fetcher({
|
|
183
183
|
limit: pageSize,
|
|
184
|
-
offset: page * pageSize
|
|
184
|
+
offset: page * pageSize,
|
|
185
185
|
});
|
|
186
186
|
if (items.length === 0) {
|
|
187
187
|
hasMore = false;
|
|
@@ -243,7 +243,7 @@ class RateLimiter {
|
|
|
243
243
|
const fn = this.queue.shift();
|
|
244
244
|
if (fn) {
|
|
245
245
|
await fn();
|
|
246
|
-
await new Promise(resolve => setTimeout(resolve, this.interval));
|
|
246
|
+
await new Promise((resolve) => setTimeout(resolve, this.interval));
|
|
247
247
|
}
|
|
248
248
|
}
|
|
249
249
|
this.processing = false;
|
package/dist/testing.d.ts
CHANGED
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
* const results = await suite.run();
|
|
22
22
|
* ```
|
|
23
23
|
*/
|
|
24
|
-
import { AssertionResult } from
|
|
24
|
+
import { type AssertionResult } from "./assertions";
|
|
25
25
|
/**
|
|
26
26
|
* Test suite case definition (different from API TestCase type)
|
|
27
27
|
* Use this for defining test cases in test suites with assertions
|
|
@@ -51,6 +51,8 @@ export interface TestSuiteConfig {
|
|
|
51
51
|
stopOnFailure?: boolean;
|
|
52
52
|
/** Timeout per test case in ms (default: 30000) */
|
|
53
53
|
timeout?: number;
|
|
54
|
+
/** Retry failing cases N times (default: 0). Only failing cases are retried. */
|
|
55
|
+
retries?: number;
|
|
54
56
|
}
|
|
55
57
|
export interface TestSuiteCaseResult {
|
|
56
58
|
/** Test case ID */
|
|
@@ -85,6 +87,8 @@ export interface TestSuiteResult {
|
|
|
85
87
|
durationMs: number;
|
|
86
88
|
/** Individual test results */
|
|
87
89
|
results: TestSuiteCaseResult[];
|
|
90
|
+
/** Case IDs that were retried (flaky recovery) */
|
|
91
|
+
retriedCases?: string[];
|
|
88
92
|
}
|
|
89
93
|
/**
|
|
90
94
|
* Test Suite for declarative evaluation testing
|
|
@@ -182,7 +186,7 @@ export declare function matchesPattern(pattern: RegExp): (output: string) => Ass
|
|
|
182
186
|
* });
|
|
183
187
|
* ```
|
|
184
188
|
*/
|
|
185
|
-
export declare function hasSentiment(sentiment:
|
|
189
|
+
export declare function hasSentiment(sentiment: "positive" | "negative" | "neutral"): (output: string) => AssertionResult;
|
|
186
190
|
/**
|
|
187
191
|
* Helper to create length range assertion
|
|
188
192
|
*
|
package/dist/testing.js
CHANGED
|
@@ -59,16 +59,13 @@ class TestSuite {
|
|
|
59
59
|
if (this.config.executor) {
|
|
60
60
|
const timeout = this.config.timeout || 30000;
|
|
61
61
|
const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error(`Test timeout after ${timeout}ms`)), timeout));
|
|
62
|
-
actual = await Promise.race([
|
|
63
|
-
this.config.executor(testCase.input),
|
|
64
|
-
timeoutPromise
|
|
65
|
-
]);
|
|
62
|
+
actual = await Promise.race([this.config.executor(testCase.input), timeoutPromise]);
|
|
66
63
|
}
|
|
67
64
|
else if (testCase.expected) {
|
|
68
65
|
actual = testCase.expected; // Use expected as actual if no executor
|
|
69
66
|
}
|
|
70
67
|
else {
|
|
71
|
-
throw new Error(
|
|
68
|
+
throw new Error("No executor provided and no expected output");
|
|
72
69
|
}
|
|
73
70
|
// Run assertions
|
|
74
71
|
const assertions = [];
|
|
@@ -97,7 +94,7 @@ class TestSuite {
|
|
|
97
94
|
actual,
|
|
98
95
|
passed: allPassed,
|
|
99
96
|
assertions,
|
|
100
|
-
durationMs
|
|
97
|
+
durationMs,
|
|
101
98
|
};
|
|
102
99
|
}
|
|
103
100
|
catch (error) {
|
|
@@ -106,17 +103,17 @@ class TestSuite {
|
|
|
106
103
|
id,
|
|
107
104
|
input: testCase.input,
|
|
108
105
|
expected: testCase.expected,
|
|
109
|
-
actual:
|
|
106
|
+
actual: "",
|
|
110
107
|
passed: false,
|
|
111
108
|
assertions: [],
|
|
112
109
|
durationMs,
|
|
113
|
-
error: error instanceof Error ? error.message : String(error)
|
|
110
|
+
error: error instanceof Error ? error.message : String(error),
|
|
114
111
|
};
|
|
115
112
|
}
|
|
116
113
|
};
|
|
117
114
|
// Run tests
|
|
118
115
|
if (this.config.parallel) {
|
|
119
|
-
results.push(...await Promise.all(this.config.cases.map((tc, i) => runTestCase(tc, i))));
|
|
116
|
+
results.push(...(await Promise.all(this.config.cases.map((tc, i) => runTestCase(tc, i)))));
|
|
120
117
|
}
|
|
121
118
|
else {
|
|
122
119
|
for (let i = 0; i < this.config.cases.length; i++) {
|
|
@@ -127,16 +124,37 @@ class TestSuite {
|
|
|
127
124
|
}
|
|
128
125
|
}
|
|
129
126
|
}
|
|
127
|
+
const retriedCases = [];
|
|
128
|
+
const retries = this.config.retries ?? 0;
|
|
129
|
+
if (retries > 0 && results.length > 0) {
|
|
130
|
+
const failingIndices = results.map((r, i) => (r.passed ? -1 : i)).filter((i) => i >= 0);
|
|
131
|
+
for (let attempt = 0; attempt < retries && failingIndices.length > 0; attempt++) {
|
|
132
|
+
const toRetry = [...failingIndices];
|
|
133
|
+
failingIndices.length = 0;
|
|
134
|
+
for (const i of toRetry) {
|
|
135
|
+
const tc = this.config.cases[i];
|
|
136
|
+
const retryResult = await runTestCase(tc, i);
|
|
137
|
+
if (retryResult.passed) {
|
|
138
|
+
results[i] = retryResult;
|
|
139
|
+
retriedCases.push(retryResult.id);
|
|
140
|
+
}
|
|
141
|
+
else {
|
|
142
|
+
failingIndices.push(i);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
130
147
|
const durationMs = Date.now() - startTime;
|
|
131
|
-
const passed = results.filter(r => r.passed).length;
|
|
132
|
-
const failed = results.filter(r => !r.passed).length;
|
|
148
|
+
const passed = results.filter((r) => r.passed).length;
|
|
149
|
+
const failed = results.filter((r) => !r.passed).length;
|
|
133
150
|
return {
|
|
134
151
|
name: this.name,
|
|
135
152
|
total: results.length,
|
|
136
153
|
passed,
|
|
137
154
|
failed,
|
|
138
155
|
durationMs,
|
|
139
|
-
results
|
|
156
|
+
results,
|
|
157
|
+
...(retriedCases.length > 0 && { retriedCases }),
|
|
140
158
|
};
|
|
141
159
|
}
|
|
142
160
|
/**
|
package/dist/types.d.ts
CHANGED
|
@@ -14,13 +14,13 @@ export interface ClientConfig {
|
|
|
14
14
|
/** Debug mode - enables request/response logging (default: false) */
|
|
15
15
|
debug?: boolean;
|
|
16
16
|
/** Log level for debug mode (default: 'info') */
|
|
17
|
-
logLevel?:
|
|
17
|
+
logLevel?: "trace" | "debug" | "info" | "warn" | "error";
|
|
18
18
|
/** Retry configuration */
|
|
19
19
|
retry?: {
|
|
20
20
|
/** Maximum retry attempts (default: 3) */
|
|
21
21
|
maxAttempts?: number;
|
|
22
22
|
/** Backoff strategy (default: 'exponential') */
|
|
23
|
-
backoff?:
|
|
23
|
+
backoff?: "exponential" | "linear" | "fixed";
|
|
24
24
|
/** Retryable error codes */
|
|
25
25
|
retryableErrors?: string[];
|
|
26
26
|
};
|
|
@@ -55,7 +55,7 @@ export declare const EvaluationTemplates: {
|
|
|
55
55
|
readonly CODE_GENERATION: "code-generation";
|
|
56
56
|
readonly SUMMARIZATION: "summarization";
|
|
57
57
|
};
|
|
58
|
-
export type EvaluationTemplateType = typeof EvaluationTemplates[keyof typeof EvaluationTemplates];
|
|
58
|
+
export type EvaluationTemplateType = (typeof EvaluationTemplates)[keyof typeof EvaluationTemplates];
|
|
59
59
|
/**
|
|
60
60
|
* Feature usage limits for per-organization quotas
|
|
61
61
|
*/
|
|
@@ -95,7 +95,7 @@ export interface Trace<TMetadata = Record<string, any>> {
|
|
|
95
95
|
name: string;
|
|
96
96
|
traceId: string;
|
|
97
97
|
organizationId: number;
|
|
98
|
-
status:
|
|
98
|
+
status: "pending" | "success" | "error";
|
|
99
99
|
durationMs: number | null;
|
|
100
100
|
metadata: TMetadata | null;
|
|
101
101
|
createdAt: string;
|
|
@@ -107,7 +107,7 @@ export interface CreateTraceParams<TMetadata = Record<string, any>> {
|
|
|
107
107
|
name: string;
|
|
108
108
|
traceId: string;
|
|
109
109
|
organizationId?: number;
|
|
110
|
-
status?:
|
|
110
|
+
status?: "pending" | "success" | "error";
|
|
111
111
|
durationMs?: number;
|
|
112
112
|
metadata?: TMetadata;
|
|
113
113
|
}
|
|
@@ -115,7 +115,7 @@ export interface CreateTraceParams<TMetadata = Record<string, any>> {
|
|
|
115
115
|
* Parameters for updating an existing trace
|
|
116
116
|
*/
|
|
117
117
|
export interface UpdateTraceParams<TMetadata = Record<string, any>> {
|
|
118
|
-
status?:
|
|
118
|
+
status?: "pending" | "success" | "error";
|
|
119
119
|
durationMs?: number;
|
|
120
120
|
metadata?: TMetadata;
|
|
121
121
|
}
|
|
@@ -126,7 +126,7 @@ export interface ListTracesParams {
|
|
|
126
126
|
limit?: number;
|
|
127
127
|
offset?: number;
|
|
128
128
|
organizationId?: number;
|
|
129
|
-
status?:
|
|
129
|
+
status?: "pending" | "success" | "error";
|
|
130
130
|
search?: string;
|
|
131
131
|
}
|
|
132
132
|
/**
|
|
@@ -164,7 +164,7 @@ export interface Evaluation<TMetadata = Record<string, any>> {
|
|
|
164
164
|
name: string;
|
|
165
165
|
description: string | null;
|
|
166
166
|
type: string;
|
|
167
|
-
status:
|
|
167
|
+
status: "draft" | "active" | "archived";
|
|
168
168
|
organizationId: number;
|
|
169
169
|
createdBy: number;
|
|
170
170
|
createdAt: string;
|
|
@@ -180,7 +180,7 @@ export interface CreateEvaluationParams {
|
|
|
180
180
|
type: string;
|
|
181
181
|
organizationId?: number;
|
|
182
182
|
createdBy: number;
|
|
183
|
-
status?:
|
|
183
|
+
status?: "draft" | "active" | "archived";
|
|
184
184
|
}
|
|
185
185
|
/**
|
|
186
186
|
* Parameters for updating an evaluation
|
|
@@ -189,7 +189,7 @@ export interface UpdateEvaluationParams {
|
|
|
189
189
|
name?: string;
|
|
190
190
|
description?: string;
|
|
191
191
|
type?: string;
|
|
192
|
-
status?:
|
|
192
|
+
status?: "draft" | "active" | "archived";
|
|
193
193
|
}
|
|
194
194
|
/**
|
|
195
195
|
* Parameters for listing evaluations
|
|
@@ -199,7 +199,7 @@ export interface ListEvaluationsParams {
|
|
|
199
199
|
offset?: number;
|
|
200
200
|
organizationId?: number;
|
|
201
201
|
type?: string;
|
|
202
|
-
status?:
|
|
202
|
+
status?: "draft" | "active" | "archived";
|
|
203
203
|
search?: string;
|
|
204
204
|
}
|
|
205
205
|
/**
|
|
@@ -227,7 +227,7 @@ export interface CreateTestCaseParams {
|
|
|
227
227
|
export interface EvaluationRun {
|
|
228
228
|
id: number;
|
|
229
229
|
evaluationId: number;
|
|
230
|
-
status:
|
|
230
|
+
status: "pending" | "running" | "completed" | "failed";
|
|
231
231
|
results: Record<string, any> | null;
|
|
232
232
|
createdAt: string;
|
|
233
233
|
completedAt: string | null;
|
|
@@ -236,7 +236,7 @@ export interface EvaluationRun {
|
|
|
236
236
|
* Parameters for creating an evaluation run
|
|
237
237
|
*/
|
|
238
238
|
export interface CreateRunParams {
|
|
239
|
-
status?:
|
|
239
|
+
status?: "pending" | "running" | "completed" | "failed";
|
|
240
240
|
results?: Record<string, any>;
|
|
241
241
|
}
|
|
242
242
|
/**
|
|
@@ -300,7 +300,7 @@ export type LLMJudgeData = LLMJudgeResult;
|
|
|
300
300
|
export type AnnotationData = any;
|
|
301
301
|
export interface RetryConfig {
|
|
302
302
|
maxAttempts?: number;
|
|
303
|
-
backoff?:
|
|
303
|
+
backoff?: "exponential" | "linear" | "fixed";
|
|
304
304
|
retryableErrors?: string[];
|
|
305
305
|
}
|
|
306
306
|
export interface GenericMetadata {
|
|
@@ -327,7 +327,7 @@ export interface SnapshotData {
|
|
|
327
327
|
updatedAt: string;
|
|
328
328
|
}
|
|
329
329
|
export interface ExportOptions {
|
|
330
|
-
format:
|
|
330
|
+
format: "json" | "csv" | "jsonl";
|
|
331
331
|
includeTraces?: boolean;
|
|
332
332
|
includeEvaluations?: boolean;
|
|
333
333
|
includeTestCases?: boolean;
|
|
@@ -360,7 +360,7 @@ export interface BatchOptions {
|
|
|
360
360
|
}) => void;
|
|
361
361
|
signal?: AbortSignal;
|
|
362
362
|
}
|
|
363
|
-
export type ExportFormat =
|
|
363
|
+
export type ExportFormat = "json" | "csv" | "jsonl";
|
|
364
364
|
/**
|
|
365
365
|
* Annotation object representing human feedback
|
|
366
366
|
*/
|
|
@@ -412,7 +412,7 @@ export interface AnnotationTask {
|
|
|
412
412
|
description: string | null;
|
|
413
413
|
instructions: string | null;
|
|
414
414
|
type: string;
|
|
415
|
-
status:
|
|
415
|
+
status: "pending" | "in_progress" | "completed" | "archived";
|
|
416
416
|
organizationId: number;
|
|
417
417
|
annotationSettings: Record<string, any>;
|
|
418
418
|
createdAt: string;
|
|
@@ -434,7 +434,7 @@ export interface CreateAnnotationTaskParams {
|
|
|
434
434
|
*/
|
|
435
435
|
export interface ListAnnotationTasksParams {
|
|
436
436
|
organizationId?: number;
|
|
437
|
-
status?:
|
|
437
|
+
status?: "pending" | "in_progress" | "completed" | "archived";
|
|
438
438
|
limit?: number;
|
|
439
439
|
offset?: number;
|
|
440
440
|
}
|
|
@@ -536,7 +536,7 @@ export interface Webhook {
|
|
|
536
536
|
url: string;
|
|
537
537
|
events: string[];
|
|
538
538
|
secret: string;
|
|
539
|
-
status:
|
|
539
|
+
status: "active" | "inactive";
|
|
540
540
|
lastTriggeredAt: string | null;
|
|
541
541
|
createdAt: string;
|
|
542
542
|
updatedAt: string;
|
|
@@ -555,14 +555,14 @@ export interface CreateWebhookParams {
|
|
|
555
555
|
export interface UpdateWebhookParams {
|
|
556
556
|
url?: string;
|
|
557
557
|
events?: string[];
|
|
558
|
-
status?:
|
|
558
|
+
status?: "active" | "inactive";
|
|
559
559
|
}
|
|
560
560
|
/**
|
|
561
561
|
* Parameters for listing webhooks
|
|
562
562
|
*/
|
|
563
563
|
export interface ListWebhooksParams {
|
|
564
564
|
organizationId: number;
|
|
565
|
-
status?:
|
|
565
|
+
status?: "active" | "inactive";
|
|
566
566
|
limit?: number;
|
|
567
567
|
offset?: number;
|
|
568
568
|
}
|
|
@@ -716,7 +716,7 @@ export interface Organization {
|
|
|
716
716
|
name: string;
|
|
717
717
|
slug: string;
|
|
718
718
|
plan: string;
|
|
719
|
-
status:
|
|
719
|
+
status: "active" | "suspended" | "cancelled";
|
|
720
720
|
createdAt: string;
|
|
721
721
|
updatedAt: string;
|
|
722
722
|
metadata?: Record<string, any>;
|