@pauly4010/evalai-sdk 1.5.7 → 1.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -1
- package/README.md +12 -3
- package/dist/assertions.d.ts +11 -11
- package/dist/assertions.js +1 -1
- package/dist/batch.d.ts +3 -3
- package/dist/batch.js +1 -1
- package/dist/cache.d.ts +3 -3
- package/dist/cache.js +1 -1
- package/dist/cli/formatters/github.js +1 -1
- package/dist/cli/formatters/human.js +1 -1
- package/dist/cli/formatters/pr-comment.js +1 -1
- package/dist/client.d.ts +3 -3
- package/dist/client.js +3 -2
- package/dist/client.request.test.d.ts +1 -0
- package/dist/client.request.test.js +157 -0
- package/dist/context.d.ts +4 -4
- package/dist/context.js +1 -1
- package/dist/errors.d.ts +5 -5
- package/dist/errors.js +21 -24
- package/dist/export.d.ts +1 -1
- package/dist/export.js +4 -2
- package/dist/integrations/openai-eval.js +1 -1
- package/dist/logger.d.ts +10 -10
- package/dist/pagination.d.ts +2 -2
- package/dist/snapshot.d.ts +3 -3
- package/dist/streaming.d.ts +4 -4
- package/dist/testing.d.ts +1 -1
- package/dist/types.d.ts +33 -33
- package/dist/workflows.d.ts +29 -18
- package/package.json +2 -3
package/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,23 @@ All notable changes to the @pauly4010/evalai-sdk package will be documented in t
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [1.5.8] - 2026-02-22
|
|
9
|
+
|
|
10
|
+
### 🐛 Fixed
|
|
11
|
+
|
|
12
|
+
- **secureRoute TypeScript overload compatibility** — Fixed implementation signature to use `ctx: any` for proper overload compatibility
|
|
13
|
+
- **Test infrastructure fixes** — Replaced invalid `expect.unknown()` with `expect.any()` across test files
|
|
14
|
+
- **NextRequest constructor** — Fixed test mocks using incorrect `(NextRequest as any)()` syntax
|
|
15
|
+
- **304 response handling** — Fixed exports API returning invalid 304 response with body
|
|
16
|
+
- **Error catalog tests** — Updated test expectations to match actual EvalAIError behavior
|
|
17
|
+
- **Redis cache timeout** — Added explicit timeout to prevent test hangs
|
|
18
|
+
|
|
19
|
+
### 🔧 Changed
|
|
20
|
+
|
|
21
|
+
- **Biome formatting** — Applied consistent line endings across 199 files
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
8
25
|
## [1.5.7] - 2026-02-20
|
|
9
26
|
|
|
10
27
|
### 📚 Documentation
|
|
@@ -32,7 +49,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
32
49
|
- **`--warnDrop <n>`** — Introduce a WARN band when score drops > `warnDrop` but < `maxDrop`
|
|
33
50
|
- **Gate verdicts:** PASS, WARN, FAIL
|
|
34
51
|
- **Profiles:** `strict` (warnDrop: 0), `balanced` (warnDrop: 1), `fast` (warnDrop: 2)
|
|
35
|
-
- **`--fail-on-flake`** — Fail the gate if
|
|
52
|
+
- **`--fail-on-flake`** — Fail the gate if unknown case is flagged as flaky (partial pass rate across determinism runs)
|
|
36
53
|
|
|
37
54
|
#### Determinism & flake intelligence
|
|
38
55
|
|
package/README.md
CHANGED
|
@@ -99,7 +99,7 @@ Key flags
|
|
|
99
99
|
|
|
100
100
|
--maxDrop → hard regression fail
|
|
101
101
|
|
|
102
|
-
--fail-on-flake → fail if
|
|
102
|
+
--fail-on-flake → fail if unknown test is unstable
|
|
103
103
|
|
|
104
104
|
This lets teams tune signal vs noise in CI.
|
|
105
105
|
|
|
@@ -190,7 +190,7 @@ Option Description
|
|
|
190
190
|
--allowWeakEvidence Permit weak evidence
|
|
191
191
|
--policy <name> HIPAA, SOC2, GDPR, PCI_DSS, FINRA_4511
|
|
192
192
|
--baseline <mode> published, previous, production
|
|
193
|
-
--fail-on-flake Fail if
|
|
193
|
+
--fail-on-flake Fail if unknown case is flaky
|
|
194
194
|
--baseUrl <url> Override API base URL
|
|
195
195
|
|
|
196
196
|
Exit codes
|
|
@@ -257,7 +257,16 @@ await openai.chat.completions.create({
|
|
|
257
257
|
|
|
258
258
|
|
|
259
259
|
🧭 Changelog
|
|
260
|
-
v1.5.
|
|
260
|
+
v1.5.8 (Latest)
|
|
261
|
+
Fixed secureRoute TypeScript overload compatibility
|
|
262
|
+
|
|
263
|
+
Fixed test infrastructure (expect.any, NextRequest constructor)
|
|
264
|
+
|
|
265
|
+
Fixed 304 response handling in exports API
|
|
266
|
+
|
|
267
|
+
Improved error catalog test coverage
|
|
268
|
+
|
|
269
|
+
v1.5.7
|
|
261
270
|
Documentation updates for CJS compatibility
|
|
262
271
|
|
|
263
272
|
Version alignment across README and changelog
|
package/dist/assertions.d.ts
CHANGED
|
@@ -17,26 +17,26 @@
|
|
|
17
17
|
export interface AssertionResult {
|
|
18
18
|
name: string;
|
|
19
19
|
passed: boolean;
|
|
20
|
-
expected:
|
|
21
|
-
actual:
|
|
20
|
+
expected: unknown;
|
|
21
|
+
actual: unknown;
|
|
22
22
|
message?: string;
|
|
23
23
|
}
|
|
24
24
|
export declare class AssertionError extends Error {
|
|
25
|
-
expected:
|
|
26
|
-
actual:
|
|
27
|
-
constructor(message: string, expected:
|
|
25
|
+
expected: unknown;
|
|
26
|
+
actual: unknown;
|
|
27
|
+
constructor(message: string, expected: unknown, actual: unknown);
|
|
28
28
|
}
|
|
29
29
|
/**
|
|
30
30
|
* Fluent assertion builder
|
|
31
31
|
*/
|
|
32
32
|
export declare class Expectation {
|
|
33
33
|
private value;
|
|
34
|
-
constructor(value:
|
|
34
|
+
constructor(value: unknown);
|
|
35
35
|
/**
|
|
36
36
|
* Assert value equals expected
|
|
37
37
|
* @example expect(output).toEqual("Hello")
|
|
38
38
|
*/
|
|
39
|
-
toEqual(expected:
|
|
39
|
+
toEqual(expected: unknown, message?: string): AssertionResult;
|
|
40
40
|
/**
|
|
41
41
|
* Assert value contains substring
|
|
42
42
|
* @example expect(output).toContain("help")
|
|
@@ -71,7 +71,7 @@ export declare class Expectation {
|
|
|
71
71
|
* Assert JSON matches schema
|
|
72
72
|
* @example expect(output).toMatchJSON({ status: 'success' })
|
|
73
73
|
*/
|
|
74
|
-
toMatchJSON(schema: Record<string,
|
|
74
|
+
toMatchJSON(schema: Record<string, unknown>, message?: string): AssertionResult;
|
|
75
75
|
/**
|
|
76
76
|
* Assert value has expected sentiment
|
|
77
77
|
* @example expect(output).toHaveSentiment('positive')
|
|
@@ -148,7 +148,7 @@ export declare class Expectation {
|
|
|
148
148
|
* expect(output).toHaveLength({ min: 10, max: 100 });
|
|
149
149
|
* ```
|
|
150
150
|
*/
|
|
151
|
-
export declare function expect(value:
|
|
151
|
+
export declare function expect(value: unknown): Expectation;
|
|
152
152
|
/**
|
|
153
153
|
* Run multiple assertions and collect results
|
|
154
154
|
*
|
|
@@ -178,12 +178,12 @@ export declare function withinRange(value: number, min: number, max: number): bo
|
|
|
178
178
|
export declare function isValidEmail(email: string): boolean;
|
|
179
179
|
export declare function isValidURL(url: string): boolean;
|
|
180
180
|
export declare function hasNoHallucinations(text: string, groundTruth: string[]): boolean;
|
|
181
|
-
export declare function matchesSchema(value:
|
|
181
|
+
export declare function matchesSchema(value: unknown, schema: Record<string, unknown>): boolean;
|
|
182
182
|
export declare function hasReadabilityScore(text: string, minScore: number): boolean;
|
|
183
183
|
export declare function containsLanguage(text: string, language: string): boolean;
|
|
184
184
|
export declare function hasFactualAccuracy(text: string, facts: string[]): boolean;
|
|
185
185
|
export declare function respondedWithinTime(startTime: number, maxMs: number): boolean;
|
|
186
186
|
export declare function hasNoToxicity(text: string): boolean;
|
|
187
187
|
export declare function followsInstructions(text: string, instructions: string[]): boolean;
|
|
188
|
-
export declare function containsAllRequiredFields(obj:
|
|
188
|
+
export declare function containsAllRequiredFields(obj: unknown, requiredFields: string[]): boolean;
|
|
189
189
|
export declare function hasValidCodeSyntax(code: string, language: string): boolean;
|
package/dist/assertions.js
CHANGED
|
@@ -612,7 +612,7 @@ function followsInstructions(text, instructions) {
|
|
|
612
612
|
});
|
|
613
613
|
}
|
|
614
614
|
function containsAllRequiredFields(obj, requiredFields) {
|
|
615
|
-
return requiredFields.every((field) => field in obj);
|
|
615
|
+
return requiredFields.every((field) => obj && typeof obj === "object" && field in obj);
|
|
616
616
|
}
|
|
617
617
|
function hasValidCodeSyntax(code, language) {
|
|
618
618
|
// This is a simplified implementation
|
package/dist/batch.d.ts
CHANGED
|
@@ -6,13 +6,13 @@ export interface BatchRequest {
|
|
|
6
6
|
id: string;
|
|
7
7
|
method: string;
|
|
8
8
|
endpoint: string;
|
|
9
|
-
body?:
|
|
9
|
+
body?: unknown;
|
|
10
10
|
headers?: Record<string, string>;
|
|
11
11
|
}
|
|
12
12
|
export interface BatchResponse {
|
|
13
13
|
id: string;
|
|
14
14
|
status: number;
|
|
15
|
-
data?:
|
|
15
|
+
data?: unknown;
|
|
16
16
|
error?: string;
|
|
17
17
|
}
|
|
18
18
|
/**
|
|
@@ -32,7 +32,7 @@ export declare class RequestBatcher {
|
|
|
32
32
|
/**
|
|
33
33
|
* Add request to batch queue
|
|
34
34
|
*/
|
|
35
|
-
enqueue(method: string, endpoint: string, body?:
|
|
35
|
+
enqueue(method: string, endpoint: string, body?: unknown, headers?: Record<string, string>): Promise<unknown>;
|
|
36
36
|
/**
|
|
37
37
|
* Schedule batch processing after delay
|
|
38
38
|
*/
|
package/dist/batch.js
CHANGED
|
@@ -85,7 +85,7 @@ class RequestBatcher {
|
|
|
85
85
|
}
|
|
86
86
|
}
|
|
87
87
|
}
|
|
88
|
-
// Handle
|
|
88
|
+
// Handle unknown requests that didn't get a response
|
|
89
89
|
for (const item of batch) {
|
|
90
90
|
if (!responses.find((r) => r.id === item.id)) {
|
|
91
91
|
item.reject(new Error("No response received for request"));
|
package/dist/cache.d.ts
CHANGED
|
@@ -17,15 +17,15 @@ export declare class RequestCache {
|
|
|
17
17
|
/**
|
|
18
18
|
* Get cached response if valid
|
|
19
19
|
*/
|
|
20
|
-
get<T>(method: string, url: string, params?:
|
|
20
|
+
get<T>(method: string, url: string, params?: unknown): T | null;
|
|
21
21
|
/**
|
|
22
22
|
* Store response in cache
|
|
23
23
|
*/
|
|
24
|
-
set<T>(method: string, url: string, data: T, ttl: number, params?:
|
|
24
|
+
set<T>(method: string, url: string, data: T, ttl: number, params?: unknown): void;
|
|
25
25
|
/**
|
|
26
26
|
* Invalidate specific cache entry
|
|
27
27
|
*/
|
|
28
|
-
invalidate(method: string, url: string, params?:
|
|
28
|
+
invalidate(method: string, url: string, params?: unknown): void;
|
|
29
29
|
/**
|
|
30
30
|
* Invalidate all cache entries matching a pattern
|
|
31
31
|
*/
|
package/dist/cache.js
CHANGED
|
@@ -69,7 +69,7 @@ class RequestCache {
|
|
|
69
69
|
* Invalidate all cache entries matching a pattern
|
|
70
70
|
*/
|
|
71
71
|
invalidatePattern(pattern) {
|
|
72
|
-
for (const key of this.cache.keys()) {
|
|
72
|
+
for (const key of Array.from(this.cache.keys())) {
|
|
73
73
|
if (key.includes(pattern)) {
|
|
74
74
|
this.cache.delete(key);
|
|
75
75
|
}
|
|
@@ -81,7 +81,7 @@ function appendStepSummary(report) {
|
|
|
81
81
|
const exp = (0, snippet_1.truncateSnippet)(fc.expectedOutput ?? fc.expectedSnippet, 80);
|
|
82
82
|
const out = (0, snippet_1.truncateSnippet)(fc.output ?? fc.outputSnippet, 80);
|
|
83
83
|
const reason = out ? `got "${out}"` : "no output";
|
|
84
|
-
lines.push(`- **${(0, snippet_1.truncateSnippet)(label, 60)}** — expected: ${exp || "(
|
|
84
|
+
lines.push(`- **${(0, snippet_1.truncateSnippet)(label, 60)}** — expected: ${exp || "(unknown)"}, ${reason}`);
|
|
85
85
|
}
|
|
86
86
|
if (failedCases.length > 10) {
|
|
87
87
|
lines.push(`- _+ ${failedCases.length - 10} more_`);
|
|
@@ -30,7 +30,7 @@ function formatHuman(report) {
|
|
|
30
30
|
const exp = (0, snippet_1.truncateSnippet)(fc.expectedOutput ?? fc.expectedSnippet, 50);
|
|
31
31
|
const out = (0, snippet_1.truncateSnippet)(fc.output ?? fc.outputSnippet, 50);
|
|
32
32
|
const reason = out ? `got "${out}"` : "no output";
|
|
33
|
-
lines.push(` - "${(0, snippet_1.truncateSnippet)(label, 50)}" → expected: ${exp || "(
|
|
33
|
+
lines.push(` - "${(0, snippet_1.truncateSnippet)(label, 50)}" → expected: ${exp || "(unknown)"}, ${reason}`);
|
|
34
34
|
}
|
|
35
35
|
if (failedCases.length > toShow.length) {
|
|
36
36
|
lines.push(` + ${failedCases.length - toShow.length} more`);
|
package/dist/client.d.ts
CHANGED
|
@@ -111,7 +111,7 @@ declare class TraceAPI {
|
|
|
111
111
|
* });
|
|
112
112
|
* ```
|
|
113
113
|
*/
|
|
114
|
-
create<TMetadata = Record<string,
|
|
114
|
+
create<TMetadata = Record<string, unknown>>(params: CreateTraceParams<TMetadata>): Promise<Trace<TMetadata>>;
|
|
115
115
|
/**
|
|
116
116
|
* List traces with optional filtering
|
|
117
117
|
*/
|
|
@@ -138,7 +138,7 @@ declare class TraceAPI {
|
|
|
138
138
|
* });
|
|
139
139
|
* ```
|
|
140
140
|
*/
|
|
141
|
-
update<TMetadata = Record<string,
|
|
141
|
+
update<TMetadata = Record<string, unknown>>(id: number, params: UpdateTraceParams<TMetadata>): Promise<Trace<TMetadata>>;
|
|
142
142
|
/**
|
|
143
143
|
* Create a span for a trace
|
|
144
144
|
*/
|
|
@@ -208,7 +208,7 @@ declare class LLMJudgeAPI {
|
|
|
208
208
|
*/
|
|
209
209
|
evaluate(params: RunLLMJudgeParams): Promise<{
|
|
210
210
|
result: LLMJudgeResult;
|
|
211
|
-
config:
|
|
211
|
+
config: unknown;
|
|
212
212
|
}>;
|
|
213
213
|
/**
|
|
214
214
|
* Create an LLM judge configuration
|
package/dist/client.js
CHANGED
|
@@ -94,11 +94,12 @@ class AIEvalClient {
|
|
|
94
94
|
results.push({ id: req.id, status: 200, data });
|
|
95
95
|
}
|
|
96
96
|
catch (err) {
|
|
97
|
+
const errorObj = err;
|
|
97
98
|
results.push({
|
|
98
99
|
id: req.id,
|
|
99
|
-
status:
|
|
100
|
+
status: errorObj?.statusCode || 500,
|
|
100
101
|
data: null,
|
|
101
|
-
error:
|
|
102
|
+
error: errorObj?.message || "Unknown error",
|
|
102
103
|
});
|
|
103
104
|
}
|
|
104
105
|
})();
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
const vitest_1 = require("vitest");
|
|
37
|
+
const client_1 = require("./client");
|
|
38
|
+
const errorsModule = __importStar(require("./errors"));
|
|
39
|
+
vitest_1.vi.mock("./cache", () => {
|
|
40
|
+
const cacheTracker = { invalidatedPatterns: [] };
|
|
41
|
+
const shouldCache = vitest_1.vi.fn().mockReturnValue(true);
|
|
42
|
+
const getTTL = vitest_1.vi.fn().mockReturnValue(1000);
|
|
43
|
+
const makeKey = (method, url, params) => `${method}:${url}:${JSON.stringify(params ?? null)}`;
|
|
44
|
+
return {
|
|
45
|
+
__esModule: true,
|
|
46
|
+
shouldCache,
|
|
47
|
+
getTTL,
|
|
48
|
+
cacheTracker,
|
|
49
|
+
RequestCache: class RequestCache {
|
|
50
|
+
constructor() {
|
|
51
|
+
this.store = new Map();
|
|
52
|
+
}
|
|
53
|
+
get(method, url, params) {
|
|
54
|
+
const key = makeKey(method, url, params);
|
|
55
|
+
return this.store.get(key) ?? null;
|
|
56
|
+
}
|
|
57
|
+
set(method, url, data, _ttl, params) {
|
|
58
|
+
const key = makeKey(method, url, params);
|
|
59
|
+
this.store.set(key, data);
|
|
60
|
+
}
|
|
61
|
+
invalidatePattern(pattern) {
|
|
62
|
+
cacheTracker.invalidatedPatterns.push(pattern);
|
|
63
|
+
}
|
|
64
|
+
invalidate(_method, _url, _params) {
|
|
65
|
+
// no-op for tests
|
|
66
|
+
}
|
|
67
|
+
clear() {
|
|
68
|
+
this.store.clear();
|
|
69
|
+
}
|
|
70
|
+
},
|
|
71
|
+
};
|
|
72
|
+
});
|
|
73
|
+
const cache_1 = require("./cache");
|
|
74
|
+
(0, vitest_1.describe)("AIEvalClient.request", () => {
|
|
75
|
+
(0, vitest_1.beforeEach)(() => {
|
|
76
|
+
process.env.EVALAI_API_KEY = "test";
|
|
77
|
+
cache_1.shouldCache.mockReset().mockReturnValue(true);
|
|
78
|
+
cache_1.getTTL.mockReset().mockReturnValue(1000);
|
|
79
|
+
cache_1.cacheTracker.invalidatedPatterns.length = 0;
|
|
80
|
+
});
|
|
81
|
+
(0, vitest_1.it)("caches GET responses and reuses data without re-fetching", async () => {
|
|
82
|
+
const client = new client_1.AIEvalClient({ apiKey: "test", baseUrl: "http://localhost", timeout: 1000 });
|
|
83
|
+
const payload = { items: [1, 2, 3] };
|
|
84
|
+
const fetchMock = vitest_1.vi.fn().mockResolvedValue({
|
|
85
|
+
ok: true,
|
|
86
|
+
status: 200,
|
|
87
|
+
json: async () => payload,
|
|
88
|
+
});
|
|
89
|
+
globalThis.fetch = fetchMock;
|
|
90
|
+
const first = await client.request("/api/traces", { method: "GET" });
|
|
91
|
+
const second = await client.request("/api/traces", { method: "GET" });
|
|
92
|
+
(0, vitest_1.expect)(first).toEqual(payload);
|
|
93
|
+
(0, vitest_1.expect)(second).toEqual(payload);
|
|
94
|
+
(0, vitest_1.expect)(fetchMock).toHaveBeenCalledTimes(1);
|
|
95
|
+
});
|
|
96
|
+
(0, vitest_1.it)("propagates non-ok responses as SDK errors", async () => {
|
|
97
|
+
const client = new client_1.AIEvalClient({ apiKey: "test", baseUrl: "http://localhost" });
|
|
98
|
+
const fetchMock = vitest_1.vi.fn().mockResolvedValue({
|
|
99
|
+
ok: false,
|
|
100
|
+
status: 429,
|
|
101
|
+
json: async () => ({ error: { code: "RATE_LIMIT_EXCEEDED" } }),
|
|
102
|
+
});
|
|
103
|
+
globalThis.fetch = fetchMock;
|
|
104
|
+
const createErrorSpy = vitest_1.vi
|
|
105
|
+
.spyOn(errorsModule, "createErrorFromResponse")
|
|
106
|
+
.mockReturnValue(new errorsModule.EvalAIError("rate limited", "RATE_LIMIT_EXCEEDED", 429));
|
|
107
|
+
await (0, vitest_1.expect)(client.request("/api/fail", { method: "GET" })).rejects.toHaveProperty("code", "RATE_LIMIT_EXCEEDED");
|
|
108
|
+
createErrorSpy.mockRestore();
|
|
109
|
+
});
|
|
110
|
+
(0, vitest_1.it)("retries on retryable SDK errors and eventually succeeds", async () => {
|
|
111
|
+
const client = new client_1.AIEvalClient({ apiKey: "test", baseUrl: "http://localhost", timeout: 1000 });
|
|
112
|
+
vitest_1.vi.spyOn(client, "calculateBackoff").mockReturnValue(0);
|
|
113
|
+
const failureResponse = {
|
|
114
|
+
ok: false,
|
|
115
|
+
status: 429,
|
|
116
|
+
json: async () => ({ error: { code: "RATE_LIMIT_EXCEEDED" } }),
|
|
117
|
+
};
|
|
118
|
+
const successResponse = {
|
|
119
|
+
ok: true,
|
|
120
|
+
status: 200,
|
|
121
|
+
json: async () => ({ ok: true }),
|
|
122
|
+
};
|
|
123
|
+
const createErrorSpy = vitest_1.vi
|
|
124
|
+
.spyOn(errorsModule, "createErrorFromResponse")
|
|
125
|
+
.mockReturnValue(new errorsModule.EvalAIError("rate limited", "RATE_LIMIT_EXCEEDED", 429));
|
|
126
|
+
const fetchMock = vitest_1.vi
|
|
127
|
+
.fn()
|
|
128
|
+
.mockResolvedValueOnce(failureResponse)
|
|
129
|
+
.mockResolvedValueOnce(successResponse);
|
|
130
|
+
globalThis.fetch = fetchMock;
|
|
131
|
+
const result = await client.request("/api/retry", { method: "GET" });
|
|
132
|
+
(0, vitest_1.expect)(result).toEqual({ ok: true });
|
|
133
|
+
(0, vitest_1.expect)(fetchMock).toHaveBeenCalledTimes(2);
|
|
134
|
+
createErrorSpy.mockRestore();
|
|
135
|
+
});
|
|
136
|
+
(0, vitest_1.it)("throws a TIMEOUT SDK error when fetch aborts", async () => {
|
|
137
|
+
const client = new client_1.AIEvalClient({ apiKey: "test", baseUrl: "http://localhost", timeout: 1000 });
|
|
138
|
+
const abortError = Object.assign(new Error("aborted"), { name: "AbortError" });
|
|
139
|
+
const fetchMock = vitest_1.vi.fn().mockRejectedValue(abortError);
|
|
140
|
+
globalThis.fetch = fetchMock;
|
|
141
|
+
await (0, vitest_1.expect)(client.request("/api/timeout", { method: "GET" })).rejects.toMatchObject({
|
|
142
|
+
code: "TIMEOUT",
|
|
143
|
+
});
|
|
144
|
+
});
|
|
145
|
+
(0, vitest_1.it)("invalidates related cache entries for mutation requests", async () => {
|
|
146
|
+
const client = new client_1.AIEvalClient({ apiKey: "test", baseUrl: "http://localhost", timeout: 1000 });
|
|
147
|
+
cache_1.shouldCache.mockReturnValue(false);
|
|
148
|
+
const fetchMock = vitest_1.vi.fn().mockResolvedValue({
|
|
149
|
+
ok: true,
|
|
150
|
+
status: 201,
|
|
151
|
+
json: async () => ({ result: "ok" }),
|
|
152
|
+
});
|
|
153
|
+
globalThis.fetch = fetchMock;
|
|
154
|
+
await client.request("/api/evaluations", { method: "POST", body: JSON.stringify({}) });
|
|
155
|
+
(0, vitest_1.expect)(cache_1.cacheTracker.invalidatedPatterns).toContain("evaluations");
|
|
156
|
+
});
|
|
157
|
+
});
|
package/dist/context.d.ts
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
* Context metadata that will be automatically injected
|
|
23
23
|
*/
|
|
24
24
|
export interface ContextMetadata {
|
|
25
|
-
[key: string]:
|
|
25
|
+
[key: string]: unknown;
|
|
26
26
|
}
|
|
27
27
|
/**
|
|
28
28
|
* Context manager for automatic metadata propagation
|
|
@@ -75,7 +75,7 @@ export declare class EvalContext {
|
|
|
75
75
|
*/
|
|
76
76
|
export declare function createContext(metadata: ContextMetadata): EvalContext;
|
|
77
77
|
/**
|
|
78
|
-
* Get the current context metadata (if
|
|
78
|
+
* Get the current context metadata (if unknown)
|
|
79
79
|
*
|
|
80
80
|
* @example
|
|
81
81
|
* ```typescript
|
|
@@ -98,7 +98,7 @@ export declare function getCurrentContext(): ContextMetadata | undefined;
|
|
|
98
98
|
* };
|
|
99
99
|
* ```
|
|
100
100
|
*/
|
|
101
|
-
export declare function mergeWithContext(metadata?: Record<string,
|
|
101
|
+
export declare function mergeWithContext(metadata?: Record<string, unknown>): Record<string, unknown>;
|
|
102
102
|
/**
|
|
103
103
|
* Run with nested context (merges parent context)
|
|
104
104
|
*
|
|
@@ -131,4 +131,4 @@ export declare function withContextSync<T>(metadata: ContextMetadata, fn: () =>
|
|
|
131
131
|
* }
|
|
132
132
|
* ```
|
|
133
133
|
*/
|
|
134
|
-
export declare function WithContext(metadata: ContextMetadata): (_target:
|
|
134
|
+
export declare function WithContext(metadata: ContextMetadata): (_target: unknown, _propertyKey: string, descriptor: PropertyDescriptor) => PropertyDescriptor;
|
package/dist/context.js
CHANGED
package/dist/errors.d.ts
CHANGED
|
@@ -42,14 +42,14 @@ export declare class EvalAIError extends Error {
|
|
|
42
42
|
/** Whether this error is retryable */
|
|
43
43
|
retryable: boolean;
|
|
44
44
|
/** Additional error details from the API */
|
|
45
|
-
details?:
|
|
45
|
+
details?: unknown;
|
|
46
46
|
/** When to retry (for rate limit errors) in seconds */
|
|
47
47
|
retryAfter?: number;
|
|
48
48
|
/** When the limit resets (for feature limit errors) */
|
|
49
49
|
resetAt?: Date;
|
|
50
50
|
/** Request ID from API (for correlation/debugging) */
|
|
51
51
|
requestId?: string;
|
|
52
|
-
constructor(message: string, code: string, statusCode: number, details?:
|
|
52
|
+
constructor(message: string, code: string, statusCode: number, details?: unknown);
|
|
53
53
|
/**
|
|
54
54
|
* Get formatted error message with solutions
|
|
55
55
|
*/
|
|
@@ -61,12 +61,12 @@ export declare class EvalAIError extends Error {
|
|
|
61
61
|
/**
|
|
62
62
|
* Convert to JSON for logging
|
|
63
63
|
*/
|
|
64
|
-
toJSON(): Record<string,
|
|
64
|
+
toJSON(): Record<string, unknown>;
|
|
65
65
|
}
|
|
66
66
|
/**
|
|
67
67
|
* Create an error from an HTTP response
|
|
68
68
|
*/
|
|
69
|
-
export declare function createErrorFromResponse(response: Response, data:
|
|
69
|
+
export declare function createErrorFromResponse(response: Response, data: unknown): EvalAIError;
|
|
70
70
|
export declare class RateLimitError extends EvalAIError {
|
|
71
71
|
constructor(message: string, retryAfter?: number);
|
|
72
72
|
}
|
|
@@ -74,7 +74,7 @@ export declare class AuthenticationError extends EvalAIError {
|
|
|
74
74
|
constructor(message?: string);
|
|
75
75
|
}
|
|
76
76
|
export declare class ValidationError extends EvalAIError {
|
|
77
|
-
constructor(message?: string, details?:
|
|
77
|
+
constructor(message?: string, details?: unknown);
|
|
78
78
|
}
|
|
79
79
|
export declare class NetworkError extends EvalAIError {
|
|
80
80
|
constructor(message?: string);
|
package/dist/errors.js
CHANGED
|
@@ -160,27 +160,21 @@ class EvalAIError extends Error {
|
|
|
160
160
|
this.code = code;
|
|
161
161
|
this.statusCode = statusCode;
|
|
162
162
|
this.details = details;
|
|
163
|
-
//
|
|
164
|
-
const
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
this.retryable = errorDoc.retryable;
|
|
169
|
-
}
|
|
170
|
-
else {
|
|
171
|
-
this.documentation = "https://docs.ai-eval-platform.com/errors";
|
|
172
|
-
this.solutions = ["Check the API documentation for more information"];
|
|
173
|
-
this.retryable = false;
|
|
174
|
-
}
|
|
163
|
+
// Initialize required properties from ERROR_DOCS
|
|
164
|
+
const doc = ERROR_DOCS[code];
|
|
165
|
+
this.documentation = doc?.documentation ?? `https://docs.ai-eval-platform.com/errors/${code}`;
|
|
166
|
+
this.solutions = doc?.solutions ?? ["Check the error details for more information"];
|
|
167
|
+
this.retryable = doc?.retryable ?? false;
|
|
175
168
|
// Extract retry-after for rate limits
|
|
176
|
-
|
|
177
|
-
|
|
169
|
+
const errorDetails = details;
|
|
170
|
+
if (code === "RATE_LIMIT_EXCEEDED" && errorDetails?.retryAfter) {
|
|
171
|
+
this.retryAfter = errorDetails.retryAfter;
|
|
178
172
|
}
|
|
179
173
|
// Extract reset time for feature limits
|
|
180
|
-
if (code === "FEATURE_LIMIT_REACHED" &&
|
|
181
|
-
this.resetAt = new Date(
|
|
174
|
+
if (code === "FEATURE_LIMIT_REACHED" && errorDetails?.resetAt) {
|
|
175
|
+
this.resetAt = new Date(errorDetails.resetAt);
|
|
182
176
|
}
|
|
183
|
-
this.requestId =
|
|
177
|
+
this.requestId = errorDetails?.error?.requestId ?? errorDetails?.requestId;
|
|
184
178
|
// Ensure proper prototype chain
|
|
185
179
|
Object.setPrototypeOf(this, EvalAIError.prototype);
|
|
186
180
|
}
|
|
@@ -234,14 +228,17 @@ exports.SDKError = EvalAIError;
|
|
|
234
228
|
*/
|
|
235
229
|
function createErrorFromResponse(response, data) {
|
|
236
230
|
const status = response.status;
|
|
237
|
-
const
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
const
|
|
231
|
+
const errorData = data;
|
|
232
|
+
const errObj = errorData?.error && typeof errorData.error === "object"
|
|
233
|
+
? errorData.error
|
|
234
|
+
: errorData;
|
|
235
|
+
let code = errObj?.code ?? errorData?.code ?? "UNKNOWN_ERROR";
|
|
236
|
+
const message = typeof errorData?.error === "string"
|
|
237
|
+
? errorData.error
|
|
238
|
+
: (errObj?.message ?? errorData?.message ?? response.statusText);
|
|
239
|
+
const requestId = errObj?.requestId ?? errorData?.requestId ?? response.headers.get("x-request-id") ?? undefined;
|
|
243
240
|
// Map HTTP status to error codes when code not in response
|
|
244
|
-
if (!errObj?.code && !
|
|
241
|
+
if (!errObj?.code && !errorData?.code) {
|
|
245
242
|
if (status === 401)
|
|
246
243
|
code = "UNAUTHORIZED";
|
|
247
244
|
else if (status === 403)
|
package/dist/export.d.ts
CHANGED
|
@@ -181,7 +181,7 @@ export declare function importFromFile(client: AIEvalClient, filePath: string, o
|
|
|
181
181
|
* });
|
|
182
182
|
* ```
|
|
183
183
|
*/
|
|
184
|
-
export declare function importFromLangSmith(client: AIEvalClient, langsmithData:
|
|
184
|
+
export declare function importFromLangSmith(client: AIEvalClient, langsmithData: unknown, options: ImportOptions): Promise<ImportResult>;
|
|
185
185
|
/**
|
|
186
186
|
* Convert export data to CSV format
|
|
187
187
|
*
|
package/dist/export.js
CHANGED
|
@@ -292,8 +292,10 @@ async function importFromLangSmith(client, langsmithData, options) {
|
|
|
292
292
|
traces: [],
|
|
293
293
|
};
|
|
294
294
|
// Transform runs to traces
|
|
295
|
-
|
|
296
|
-
|
|
295
|
+
const lsData = langsmithData;
|
|
296
|
+
if (lsData.runs && Array.isArray(lsData.runs)) {
|
|
297
|
+
transformedData.traces = lsData.runs.map((run) => ({
|
|
298
|
+
id: run.id || 0,
|
|
297
299
|
name: run.name || "Imported Trace",
|
|
298
300
|
traceId: run.id || `langsmith-${Date.now()}-${Math.random()}`,
|
|
299
301
|
organizationId: options.organizationId,
|
|
@@ -141,7 +141,7 @@ async function openAIChatEval(options) {
|
|
|
141
141
|
for (let i = 0; i < result.results.length; i++) {
|
|
142
142
|
const tcId = cases[i]?.testCaseId;
|
|
143
143
|
if (tcId == null) {
|
|
144
|
-
console.log("reportToEvalAI: All cases must have testCaseId when
|
|
144
|
+
console.log("reportToEvalAI: All cases must have testCaseId when unknown has it.");
|
|
145
145
|
return evalResult;
|
|
146
146
|
}
|
|
147
147
|
importResults.push({
|
package/dist/logger.d.ts
CHANGED
|
@@ -29,7 +29,7 @@ export interface LogEntry {
|
|
|
29
29
|
level: LogLevel;
|
|
30
30
|
message: string;
|
|
31
31
|
timestamp: string;
|
|
32
|
-
data?:
|
|
32
|
+
data?: unknown;
|
|
33
33
|
prefix?: string;
|
|
34
34
|
}
|
|
35
35
|
/**
|
|
@@ -41,31 +41,31 @@ export declare class Logger {
|
|
|
41
41
|
/**
|
|
42
42
|
* Log a trace message
|
|
43
43
|
*/
|
|
44
|
-
trace(message: string, data?:
|
|
44
|
+
trace(message: string, data?: unknown): void;
|
|
45
45
|
/**
|
|
46
46
|
* Log a debug message
|
|
47
47
|
*/
|
|
48
|
-
debug(message: string, data?:
|
|
48
|
+
debug(message: string, data?: unknown): void;
|
|
49
49
|
/**
|
|
50
50
|
* Log an info message
|
|
51
51
|
*/
|
|
52
|
-
info(message: string, data?:
|
|
52
|
+
info(message: string, data?: unknown): void;
|
|
53
53
|
/**
|
|
54
54
|
* Log a warning message
|
|
55
55
|
*/
|
|
56
|
-
warn(message: string, data?:
|
|
56
|
+
warn(message: string, data?: unknown): void;
|
|
57
57
|
/**
|
|
58
58
|
* Log an error message
|
|
59
59
|
*/
|
|
60
|
-
error(message: string, data?:
|
|
60
|
+
error(message: string, data?: unknown): void;
|
|
61
61
|
/**
|
|
62
62
|
* Log HTTP request
|
|
63
63
|
*/
|
|
64
|
-
logRequest(method: string, url: string, data?:
|
|
64
|
+
logRequest(method: string, url: string, data?: unknown): void;
|
|
65
65
|
/**
|
|
66
66
|
* Log HTTP response
|
|
67
67
|
*/
|
|
68
|
-
logResponse(method: string, url: string, status: number, duration: number, data?:
|
|
68
|
+
logResponse(method: string, url: string, status: number, duration: number, data?: unknown): void;
|
|
69
69
|
/**
|
|
70
70
|
* Create child logger with prefix
|
|
71
71
|
*/
|
|
@@ -112,7 +112,7 @@ export declare class RequestLogger {
|
|
|
112
112
|
method: string;
|
|
113
113
|
url: string;
|
|
114
114
|
headers?: Record<string, string>;
|
|
115
|
-
body?:
|
|
115
|
+
body?: unknown;
|
|
116
116
|
}): void;
|
|
117
117
|
/**
|
|
118
118
|
* Log response after receiving
|
|
@@ -123,6 +123,6 @@ export declare class RequestLogger {
|
|
|
123
123
|
status: number;
|
|
124
124
|
duration: number;
|
|
125
125
|
headers?: Record<string, string>;
|
|
126
|
-
body?:
|
|
126
|
+
body?: unknown;
|
|
127
127
|
}): void;
|
|
128
128
|
}
|
package/dist/pagination.d.ts
CHANGED
|
@@ -56,11 +56,11 @@ export declare function autoPaginate<T>(fetchFn: (offset: number, limit: number)
|
|
|
56
56
|
/**
|
|
57
57
|
* Encode cursor for pagination (base64)
|
|
58
58
|
*/
|
|
59
|
-
export declare function encodeCursor(data:
|
|
59
|
+
export declare function encodeCursor(data: unknown): string;
|
|
60
60
|
/**
|
|
61
61
|
* Decode cursor from base64
|
|
62
62
|
*/
|
|
63
|
-
export declare function decodeCursor(cursor: string):
|
|
63
|
+
export declare function decodeCursor(cursor: string): unknown;
|
|
64
64
|
/**
|
|
65
65
|
* Create pagination metadata from response
|
|
66
66
|
*/
|
package/dist/snapshot.d.ts
CHANGED
|
@@ -26,7 +26,7 @@ export interface SnapshotMetadata {
|
|
|
26
26
|
/** Optional tags for organization */
|
|
27
27
|
tags?: string[];
|
|
28
28
|
/** Additional metadata */
|
|
29
|
-
metadata?: Record<string,
|
|
29
|
+
metadata?: Record<string, unknown>;
|
|
30
30
|
}
|
|
31
31
|
export interface SnapshotData {
|
|
32
32
|
/** The actual output that was snapshotted */
|
|
@@ -75,7 +75,7 @@ export declare class SnapshotManager {
|
|
|
75
75
|
*/
|
|
76
76
|
save(name: string, output: string, options?: {
|
|
77
77
|
tags?: string[];
|
|
78
|
-
metadata?: Record<string,
|
|
78
|
+
metadata?: Record<string, unknown>;
|
|
79
79
|
overwrite?: boolean;
|
|
80
80
|
}): Promise<SnapshotData>;
|
|
81
81
|
/**
|
|
@@ -140,7 +140,7 @@ export declare class SnapshotManager {
|
|
|
140
140
|
*/
|
|
141
141
|
export declare function snapshot(output: string, name: string, options?: {
|
|
142
142
|
tags?: string[];
|
|
143
|
-
metadata?: Record<string,
|
|
143
|
+
metadata?: Record<string, unknown>;
|
|
144
144
|
overwrite?: boolean;
|
|
145
145
|
dir?: string;
|
|
146
146
|
}): Promise<SnapshotData>;
|
package/dist/streaming.d.ts
CHANGED
|
@@ -49,14 +49,14 @@ export interface BatchError {
|
|
|
49
49
|
/** The error */
|
|
50
50
|
error: Error;
|
|
51
51
|
/** The item that failed */
|
|
52
|
-
item:
|
|
52
|
+
item: unknown;
|
|
53
53
|
}
|
|
54
54
|
export interface BatchResult<T> {
|
|
55
55
|
/** Successfully processed items */
|
|
56
56
|
successful: T[];
|
|
57
57
|
/** Failed items */
|
|
58
58
|
failed: Array<{
|
|
59
|
-
item:
|
|
59
|
+
item: unknown;
|
|
60
60
|
error: Error;
|
|
61
61
|
}>;
|
|
62
62
|
/** Summary */
|
|
@@ -106,12 +106,12 @@ export declare function batchProcess<TInput, TOutput>(processor: (item: TInput)
|
|
|
106
106
|
*/
|
|
107
107
|
export declare function streamEvaluation<T>(config: {
|
|
108
108
|
cases: T[];
|
|
109
|
-
executor: (testCase: T) => Promise<
|
|
109
|
+
executor: (testCase: T) => Promise<unknown>;
|
|
110
110
|
onProgress?: (progress: BatchProgress) => void;
|
|
111
111
|
}): AsyncGenerator<{
|
|
112
112
|
caseId: string;
|
|
113
113
|
case: T;
|
|
114
|
-
result:
|
|
114
|
+
result: unknown;
|
|
115
115
|
passed: boolean;
|
|
116
116
|
completed: number;
|
|
117
117
|
total: number;
|
package/dist/testing.d.ts
CHANGED
|
@@ -34,7 +34,7 @@ export interface TestSuiteCase {
|
|
|
34
34
|
/** Expected output (optional) */
|
|
35
35
|
expected?: string;
|
|
36
36
|
/** Metadata for the test case */
|
|
37
|
-
metadata?: Record<string,
|
|
37
|
+
metadata?: Record<string, unknown>;
|
|
38
38
|
/** Assertion functions to run */
|
|
39
39
|
assertions?: ((output: string) => AssertionResult)[];
|
|
40
40
|
}
|
package/dist/types.d.ts
CHANGED
|
@@ -90,7 +90,7 @@ export interface OrganizationLimits {
|
|
|
90
90
|
* Trace object representing a single execution trace
|
|
91
91
|
* Generic metadata support for type safety
|
|
92
92
|
*/
|
|
93
|
-
export interface Trace<TMetadata = Record<string,
|
|
93
|
+
export interface Trace<TMetadata = Record<string, unknown>> {
|
|
94
94
|
id: number;
|
|
95
95
|
name: string;
|
|
96
96
|
traceId: string;
|
|
@@ -103,7 +103,7 @@ export interface Trace<TMetadata = Record<string, any>> {
|
|
|
103
103
|
/**
|
|
104
104
|
* Parameters for creating a new trace
|
|
105
105
|
*/
|
|
106
|
-
export interface CreateTraceParams<TMetadata = Record<string,
|
|
106
|
+
export interface CreateTraceParams<TMetadata = Record<string, unknown>> {
|
|
107
107
|
name: string;
|
|
108
108
|
traceId: string;
|
|
109
109
|
organizationId?: number;
|
|
@@ -114,7 +114,7 @@ export interface CreateTraceParams<TMetadata = Record<string, any>> {
|
|
|
114
114
|
/**
|
|
115
115
|
* Parameters for updating an existing trace
|
|
116
116
|
*/
|
|
117
|
-
export interface UpdateTraceParams<TMetadata = Record<string,
|
|
117
|
+
export interface UpdateTraceParams<TMetadata = Record<string, unknown>> {
|
|
118
118
|
status?: "pending" | "success" | "error";
|
|
119
119
|
durationMs?: number;
|
|
120
120
|
metadata?: TMetadata;
|
|
@@ -132,7 +132,7 @@ export interface ListTracesParams {
|
|
|
132
132
|
/**
|
|
133
133
|
* Span object representing a sub-operation within a trace
|
|
134
134
|
*/
|
|
135
|
-
export interface Span<TMetadata = Record<string,
|
|
135
|
+
export interface Span<TMetadata = Record<string, unknown>> {
|
|
136
136
|
id: number;
|
|
137
137
|
traceId: number;
|
|
138
138
|
name: string;
|
|
@@ -147,7 +147,7 @@ export interface Span<TMetadata = Record<string, any>> {
|
|
|
147
147
|
/**
|
|
148
148
|
* Parameters for creating a span
|
|
149
149
|
*/
|
|
150
|
-
export interface CreateSpanParams<TMetadata = Record<string,
|
|
150
|
+
export interface CreateSpanParams<TMetadata = Record<string, unknown>> {
|
|
151
151
|
name: string;
|
|
152
152
|
spanId: string;
|
|
153
153
|
parentSpanId?: string;
|
|
@@ -159,7 +159,7 @@ export interface CreateSpanParams<TMetadata = Record<string, any>> {
|
|
|
159
159
|
/**
|
|
160
160
|
* Evaluation object representing a test evaluation
|
|
161
161
|
*/
|
|
162
|
-
export interface Evaluation<TMetadata = Record<string,
|
|
162
|
+
export interface Evaluation<TMetadata = Record<string, unknown>> {
|
|
163
163
|
id: number;
|
|
164
164
|
name: string;
|
|
165
165
|
description: string | null;
|
|
@@ -210,7 +210,7 @@ export interface TestCase {
|
|
|
210
210
|
evaluationId: number;
|
|
211
211
|
input: string;
|
|
212
212
|
expectedOutput: string | null;
|
|
213
|
-
metadata: Record<string,
|
|
213
|
+
metadata: Record<string, unknown> | null;
|
|
214
214
|
createdAt: string;
|
|
215
215
|
}
|
|
216
216
|
/**
|
|
@@ -219,7 +219,7 @@ export interface TestCase {
|
|
|
219
219
|
export interface CreateTestCaseParams {
|
|
220
220
|
input: string;
|
|
221
221
|
expectedOutput?: string;
|
|
222
|
-
metadata?: Record<string,
|
|
222
|
+
metadata?: Record<string, unknown>;
|
|
223
223
|
}
|
|
224
224
|
/**
|
|
225
225
|
* Evaluation run
|
|
@@ -228,7 +228,7 @@ export interface EvaluationRun {
|
|
|
228
228
|
id: number;
|
|
229
229
|
evaluationId: number;
|
|
230
230
|
status: "pending" | "running" | "completed" | "failed";
|
|
231
|
-
results: Record<string,
|
|
231
|
+
results: Record<string, unknown> | null;
|
|
232
232
|
createdAt: string;
|
|
233
233
|
completedAt: string | null;
|
|
234
234
|
}
|
|
@@ -237,7 +237,7 @@ export interface EvaluationRun {
|
|
|
237
237
|
*/
|
|
238
238
|
export interface CreateRunParams {
|
|
239
239
|
status?: "pending" | "running" | "completed" | "failed";
|
|
240
|
-
results?: Record<string,
|
|
240
|
+
results?: Record<string, unknown>;
|
|
241
241
|
}
|
|
242
242
|
/**
|
|
243
243
|
* LLM Judge evaluation result
|
|
@@ -249,7 +249,7 @@ export interface LLMJudgeResult {
|
|
|
249
249
|
output: string;
|
|
250
250
|
score: number | null;
|
|
251
251
|
reasoning: string | null;
|
|
252
|
-
metadata: Record<string,
|
|
252
|
+
metadata: Record<string, unknown> | null;
|
|
253
253
|
createdAt: string;
|
|
254
254
|
}
|
|
255
255
|
/**
|
|
@@ -261,7 +261,7 @@ export interface RunLLMJudgeParams {
|
|
|
261
261
|
output: string;
|
|
262
262
|
score?: number;
|
|
263
263
|
reasoning?: string;
|
|
264
|
-
metadata?: Record<string,
|
|
264
|
+
metadata?: Record<string, unknown>;
|
|
265
265
|
}
|
|
266
266
|
/**
|
|
267
267
|
* SDK Error class with additional error details
|
|
@@ -285,26 +285,26 @@ export interface RunLLMJudgeParams {
|
|
|
285
285
|
export declare class SDKError extends Error {
|
|
286
286
|
code: string;
|
|
287
287
|
statusCode: number;
|
|
288
|
-
details?:
|
|
288
|
+
details?: unknown;
|
|
289
289
|
documentation?: string;
|
|
290
290
|
solutions?: string[];
|
|
291
291
|
retryable?: boolean;
|
|
292
292
|
retryAfter?: number;
|
|
293
|
-
constructor(message: string, code: string, statusCode: number, details?:
|
|
293
|
+
constructor(message: string, code: string, statusCode: number, details?: unknown);
|
|
294
294
|
}
|
|
295
295
|
export type AIEvalConfig = ClientConfig;
|
|
296
|
-
export type TraceData<TMetadata =
|
|
297
|
-
export type SpanData<TMetadata =
|
|
298
|
-
export type EvaluationData<TMetadata =
|
|
296
|
+
export type TraceData<TMetadata = unknown> = Trace<TMetadata>;
|
|
297
|
+
export type SpanData<TMetadata = unknown> = Span<TMetadata>;
|
|
298
|
+
export type EvaluationData<TMetadata = unknown> = Evaluation<TMetadata>;
|
|
299
299
|
export type LLMJudgeData = LLMJudgeResult;
|
|
300
|
-
export type AnnotationData =
|
|
300
|
+
export type AnnotationData = unknown;
|
|
301
301
|
export interface RetryConfig {
|
|
302
302
|
maxAttempts?: number;
|
|
303
303
|
backoff?: "exponential" | "linear" | "fixed";
|
|
304
304
|
retryableErrors?: string[];
|
|
305
305
|
}
|
|
306
306
|
export interface GenericMetadata {
|
|
307
|
-
[key: string]:
|
|
307
|
+
[key: string]: unknown;
|
|
308
308
|
}
|
|
309
309
|
export interface TracedResponse<T> {
|
|
310
310
|
data: T;
|
|
@@ -314,14 +314,14 @@ export interface TracedResponse<T> {
|
|
|
314
314
|
export interface TestResult {
|
|
315
315
|
passed: boolean;
|
|
316
316
|
message?: string;
|
|
317
|
-
expected?:
|
|
318
|
-
actual?:
|
|
317
|
+
expected?: unknown;
|
|
318
|
+
actual?: unknown;
|
|
319
319
|
metadata?: GenericMetadata;
|
|
320
320
|
}
|
|
321
321
|
export interface SnapshotData {
|
|
322
322
|
id: string;
|
|
323
323
|
name: string;
|
|
324
|
-
data:
|
|
324
|
+
data: unknown;
|
|
325
325
|
metadata?: GenericMetadata;
|
|
326
326
|
createdAt: string;
|
|
327
327
|
updatedAt: string;
|
|
@@ -346,7 +346,7 @@ export interface ImportOptions {
|
|
|
346
346
|
dryRun?: boolean;
|
|
347
347
|
}
|
|
348
348
|
export interface StreamOptions {
|
|
349
|
-
onData: (data:
|
|
349
|
+
onData: (data: unknown) => void;
|
|
350
350
|
onError?: (error: Error) => void;
|
|
351
351
|
onComplete?: () => void;
|
|
352
352
|
signal?: AbortSignal;
|
|
@@ -371,8 +371,8 @@ export interface Annotation {
|
|
|
371
371
|
annotatorId: string;
|
|
372
372
|
rating: number | null;
|
|
373
373
|
feedback: string | null;
|
|
374
|
-
labels: Record<string,
|
|
375
|
-
metadata: Record<string,
|
|
374
|
+
labels: Record<string, unknown>;
|
|
375
|
+
metadata: Record<string, unknown>;
|
|
376
376
|
createdAt: string;
|
|
377
377
|
annotator?: {
|
|
378
378
|
id: string;
|
|
@@ -391,8 +391,8 @@ export interface CreateAnnotationParams {
|
|
|
391
391
|
testCaseId: number;
|
|
392
392
|
rating?: number;
|
|
393
393
|
feedback?: string;
|
|
394
|
-
labels?: Record<string,
|
|
395
|
-
metadata?: Record<string,
|
|
394
|
+
labels?: Record<string, unknown>;
|
|
395
|
+
metadata?: Record<string, unknown>;
|
|
396
396
|
}
|
|
397
397
|
/**
|
|
398
398
|
* Parameters for listing annotations
|
|
@@ -414,7 +414,7 @@ export interface AnnotationTask {
|
|
|
414
414
|
type: string;
|
|
415
415
|
status: "pending" | "in_progress" | "completed" | "archived";
|
|
416
416
|
organizationId: number;
|
|
417
|
-
annotationSettings: Record<string,
|
|
417
|
+
annotationSettings: Record<string, unknown>;
|
|
418
418
|
createdAt: string;
|
|
419
419
|
updatedAt: string;
|
|
420
420
|
}
|
|
@@ -427,7 +427,7 @@ export interface CreateAnnotationTaskParams {
|
|
|
427
427
|
instructions?: string;
|
|
428
428
|
type: string;
|
|
429
429
|
organizationId: number;
|
|
430
|
-
annotationSettings?: Record<string,
|
|
430
|
+
annotationSettings?: Record<string, unknown>;
|
|
431
431
|
}
|
|
432
432
|
/**
|
|
433
433
|
* Parameters for listing annotation tasks
|
|
@@ -445,7 +445,7 @@ export interface AnnotationItem {
|
|
|
445
445
|
id: number;
|
|
446
446
|
taskId: number;
|
|
447
447
|
content: string;
|
|
448
|
-
annotation:
|
|
448
|
+
annotation: unknown | null;
|
|
449
449
|
annotatedBy: string | null;
|
|
450
450
|
annotatedAt: string | null;
|
|
451
451
|
createdAt: string;
|
|
@@ -455,7 +455,7 @@ export interface AnnotationItem {
|
|
|
455
455
|
*/
|
|
456
456
|
export interface CreateAnnotationItemParams {
|
|
457
457
|
content: string;
|
|
458
|
-
annotation?:
|
|
458
|
+
annotation?: unknown;
|
|
459
459
|
annotatedBy?: string;
|
|
460
460
|
annotatedAt?: string;
|
|
461
461
|
}
|
|
@@ -573,7 +573,7 @@ export interface WebhookDelivery {
|
|
|
573
573
|
id: number;
|
|
574
574
|
webhookId: number;
|
|
575
575
|
event: string;
|
|
576
|
-
payload: Record<string,
|
|
576
|
+
payload: Record<string, unknown>;
|
|
577
577
|
response: string | null;
|
|
578
578
|
statusCode: number | null;
|
|
579
579
|
success: boolean;
|
|
@@ -719,5 +719,5 @@ export interface Organization {
|
|
|
719
719
|
status: "active" | "suspended" | "cancelled";
|
|
720
720
|
createdAt: string;
|
|
721
721
|
updatedAt: string;
|
|
722
|
-
metadata?: Record<string,
|
|
722
|
+
metadata?: Record<string, unknown>;
|
|
723
723
|
}
|
package/dist/workflows.d.ts
CHANGED
|
@@ -34,7 +34,7 @@ export interface WorkflowNode {
|
|
|
34
34
|
id: string;
|
|
35
35
|
type: "agent" | "tool" | "decision" | "parallel" | "human" | "llm";
|
|
36
36
|
name: string;
|
|
37
|
-
config?: Record<string,
|
|
37
|
+
config?: Record<string, unknown>;
|
|
38
38
|
}
|
|
39
39
|
/**
|
|
40
40
|
* Edge connecting nodes in a workflow DAG
|
|
@@ -52,7 +52,17 @@ export interface WorkflowDefinition {
|
|
|
52
52
|
nodes: WorkflowNode[];
|
|
53
53
|
edges: WorkflowEdge[];
|
|
54
54
|
entrypoint: string;
|
|
55
|
-
metadata?: Record<string,
|
|
55
|
+
metadata?: Record<string, unknown>;
|
|
56
|
+
}
|
|
57
|
+
interface LangChainExecutor {
|
|
58
|
+
invoke?: (input: unknown, config?: unknown) => Promise<unknown>;
|
|
59
|
+
call?: (input: unknown, config?: unknown) => Promise<unknown>;
|
|
60
|
+
}
|
|
61
|
+
interface CrewAI {
|
|
62
|
+
kickoff?: (input?: unknown) => Promise<unknown>;
|
|
63
|
+
}
|
|
64
|
+
interface LangChainConversation {
|
|
65
|
+
initiate_chat?: (...args: unknown[]) => Promise<unknown>;
|
|
56
66
|
}
|
|
57
67
|
/**
|
|
58
68
|
* Active workflow context
|
|
@@ -63,7 +73,7 @@ export interface WorkflowContext {
|
|
|
63
73
|
name: string;
|
|
64
74
|
startedAt: string;
|
|
65
75
|
definition?: WorkflowDefinition;
|
|
66
|
-
metadata?: Record<string,
|
|
76
|
+
metadata?: Record<string, unknown>;
|
|
67
77
|
}
|
|
68
78
|
/**
|
|
69
79
|
* Workflow run status
|
|
@@ -80,7 +90,7 @@ export interface AgentHandoff {
|
|
|
80
90
|
fromAgent?: string;
|
|
81
91
|
toAgent: string;
|
|
82
92
|
handoffType: HandoffType;
|
|
83
|
-
context?: Record<string,
|
|
93
|
+
context?: Record<string, unknown>;
|
|
84
94
|
timestamp: string;
|
|
85
95
|
}
|
|
86
96
|
/**
|
|
@@ -115,7 +125,7 @@ export interface RecordDecisionParams {
|
|
|
115
125
|
/** Factors that influenced the decision */
|
|
116
126
|
contextFactors?: string[];
|
|
117
127
|
/** Input context at decision time */
|
|
118
|
-
inputContext?: Record<string,
|
|
128
|
+
inputContext?: Record<string, unknown>;
|
|
119
129
|
}
|
|
120
130
|
/**
|
|
121
131
|
* LLM provider names
|
|
@@ -169,7 +179,7 @@ export interface AgentSpanContext {
|
|
|
169
179
|
agentName: string;
|
|
170
180
|
startTime: string;
|
|
171
181
|
parentSpanId?: string;
|
|
172
|
-
metadata?: Record<string,
|
|
182
|
+
metadata?: Record<string, unknown>;
|
|
173
183
|
}
|
|
174
184
|
/**
|
|
175
185
|
* WorkflowTracer - Instrument multi-agent workflows with tracing, decision auditing, and cost tracking
|
|
@@ -213,11 +223,11 @@ export declare class WorkflowTracer {
|
|
|
213
223
|
* });
|
|
214
224
|
* ```
|
|
215
225
|
*/
|
|
216
|
-
startWorkflow(name: string, definition?: WorkflowDefinition, metadata?: Record<string,
|
|
226
|
+
startWorkflow(name: string, definition?: WorkflowDefinition, metadata?: Record<string, unknown>): Promise<WorkflowContext>;
|
|
217
227
|
/**
|
|
218
228
|
* End the current workflow
|
|
219
229
|
*/
|
|
220
|
-
endWorkflow(output?: Record<string,
|
|
230
|
+
endWorkflow(output?: Record<string, unknown>, status?: WorkflowStatus): Promise<void>;
|
|
221
231
|
/**
|
|
222
232
|
* Start an agent span within the workflow
|
|
223
233
|
*
|
|
@@ -228,11 +238,11 @@ export declare class WorkflowTracer {
|
|
|
228
238
|
* });
|
|
229
239
|
* ```
|
|
230
240
|
*/
|
|
231
|
-
startAgentSpan(agentName: string, input?: Record<string,
|
|
241
|
+
startAgentSpan(agentName: string, input?: Record<string, unknown>, parentSpanId?: string): Promise<AgentSpanContext>;
|
|
232
242
|
/**
|
|
233
243
|
* End an agent span
|
|
234
244
|
*/
|
|
235
|
-
endAgentSpan(span: AgentSpanContext, output?: Record<string,
|
|
245
|
+
endAgentSpan(span: AgentSpanContext, output?: Record<string, unknown>, error?: string): Promise<void>;
|
|
236
246
|
/**
|
|
237
247
|
* Record a handoff between agents
|
|
238
248
|
*
|
|
@@ -246,7 +256,7 @@ export declare class WorkflowTracer {
|
|
|
246
256
|
* );
|
|
247
257
|
* ```
|
|
248
258
|
*/
|
|
249
|
-
recordHandoff(fromAgent: string | undefined, toAgent: string, context?: Record<string,
|
|
259
|
+
recordHandoff(fromAgent: string | undefined, toAgent: string, context?: Record<string, unknown>, handoffType?: HandoffType): Promise<void>;
|
|
250
260
|
/**
|
|
251
261
|
* Record a decision made by an agent
|
|
252
262
|
*
|
|
@@ -337,9 +347,9 @@ export declare class WorkflowTracer {
|
|
|
337
347
|
* const result = await tracedExecutor.invoke({ input: 'Hello' });
|
|
338
348
|
* ```
|
|
339
349
|
*/
|
|
340
|
-
export declare function traceLangChainAgent(executor:
|
|
350
|
+
export declare function traceLangChainAgent(executor: LangChainExecutor, tracer: WorkflowTracer, options?: {
|
|
341
351
|
agentName?: string;
|
|
342
|
-
}):
|
|
352
|
+
}): LangChainExecutor;
|
|
343
353
|
/**
|
|
344
354
|
* Create a traced wrapper for CrewAI crews
|
|
345
355
|
*
|
|
@@ -352,9 +362,9 @@ export declare function traceLangChainAgent(executor: any, tracer: WorkflowTrace
|
|
|
352
362
|
* const result = await tracedCrew.kickoff({ topic: 'AI Safety' });
|
|
353
363
|
* ```
|
|
354
364
|
*/
|
|
355
|
-
export declare function traceCrewAI(crew:
|
|
365
|
+
export declare function traceCrewAI(crew: CrewAI, tracer: WorkflowTracer, options?: {
|
|
356
366
|
crewName?: string;
|
|
357
|
-
}):
|
|
367
|
+
}): CrewAI;
|
|
358
368
|
/**
|
|
359
369
|
* Create a traced wrapper for AutoGen conversations
|
|
360
370
|
*
|
|
@@ -365,9 +375,9 @@ export declare function traceCrewAI(crew: any, tracer: WorkflowTracer, options?:
|
|
|
365
375
|
* });
|
|
366
376
|
* ```
|
|
367
377
|
*/
|
|
368
|
-
export declare function traceAutoGen(conversation:
|
|
378
|
+
export declare function traceAutoGen(conversation: LangChainConversation, tracer: WorkflowTracer, options?: {
|
|
369
379
|
conversationName?: string;
|
|
370
|
-
}):
|
|
380
|
+
}): LangChainConversation;
|
|
371
381
|
/**
|
|
372
382
|
* Create a workflow tracer from an existing client
|
|
373
383
|
*/
|
|
@@ -375,4 +385,5 @@ export declare function createWorkflowTracer(client: AIEvalClient, options?: Wor
|
|
|
375
385
|
/**
|
|
376
386
|
* Helper to trace an async function as a workflow step
|
|
377
387
|
*/
|
|
378
|
-
export declare function traceWorkflowStep<T>(tracer: WorkflowTracer, agentName: string, fn: () => Promise<T>, input?: Record<string,
|
|
388
|
+
export declare function traceWorkflowStep<T>(tracer: WorkflowTracer, agentName: string, fn: () => Promise<T>, input?: Record<string, unknown>): Promise<T>;
|
|
389
|
+
export {};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pauly4010/evalai-sdk",
|
|
3
|
-
"version": "1.5.
|
|
3
|
+
"version": "1.5.8",
|
|
4
4
|
"description": "AI Evaluation Platform SDK - Complete API Coverage with Performance Optimizations",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|
|
@@ -20,8 +20,7 @@
|
|
|
20
20
|
"scripts": {
|
|
21
21
|
"build": "tsc",
|
|
22
22
|
"dev": "tsc --watch",
|
|
23
|
-
"test": "vitest"
|
|
24
|
-
"prepublishOnly": "npm run build"
|
|
23
|
+
"test": "vitest"
|
|
25
24
|
},
|
|
26
25
|
"keywords": [
|
|
27
26
|
"ai",
|