@pauly4010/evalai-sdk 1.9.0 → 1.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +136 -23
- package/dist/assertions.js +51 -18
- package/dist/batch.js +8 -2
- package/dist/cli/api.js +3 -1
- package/dist/cli/check.js +19 -6
- package/dist/cli/ci-context.js +3 -1
- package/dist/cli/config.js +28 -8
- package/dist/cli/diff.js +14 -9
- package/dist/cli/discover.js +18 -7
- package/dist/cli/doctor.js +43 -9
- package/dist/cli/explain.js +37 -11
- package/dist/cli/formatters/human.js +4 -1
- package/dist/cli/formatters/pr-comment.js +3 -1
- package/dist/cli/gate.js +6 -2
- package/dist/cli/impact-analysis.js +6 -5
- package/dist/cli/index.js +18 -6
- package/dist/cli/manifest.d.ts +3 -5
- package/dist/cli/manifest.js +21 -14
- package/dist/cli/migrate.js +4 -4
- package/dist/cli/policy-packs.js +8 -2
- package/dist/cli/print-config.js +19 -4
- package/dist/cli/regression-gate.js +8 -2
- package/dist/cli/report/build-check-report.js +8 -2
- package/dist/cli/run.js +11 -5
- package/dist/cli/share.js +3 -1
- package/dist/cli/upgrade.js +2 -1
- package/dist/client.d.ts +16 -19
- package/dist/client.js +60 -43
- package/dist/client.request.test.d.ts +1 -1
- package/dist/client.request.test.js +222 -147
- package/dist/context.js +3 -1
- package/dist/errors.js +11 -4
- package/dist/export.js +3 -1
- package/dist/index.d.ts +8 -8
- package/dist/index.js +19 -19
- package/dist/integrations/anthropic.d.ts +20 -1
- package/dist/integrations/openai-eval.js +4 -2
- package/dist/integrations/openai.d.ts +24 -1
- package/dist/local.js +3 -1
- package/dist/logger.js +6 -2
- package/dist/pagination.js +6 -2
- package/dist/runtime/adapters/config-to-dsl.js +12 -9
- package/dist/runtime/adapters/testsuite-to-dsl.d.ts +1 -1
- package/dist/runtime/adapters/testsuite-to-dsl.js +11 -6
- package/dist/runtime/eval.d.ts +1 -1
- package/dist/runtime/eval.js +12 -5
- package/dist/runtime/execution-mode.js +13 -9
- package/dist/runtime/registry.js +8 -21
- package/dist/runtime/run-report.d.ts +0 -2
- package/dist/runtime/run-report.js +12 -10
- package/dist/testing.js +7 -2
- package/dist/types.d.ts +100 -69
- package/dist/utils/input-hash.js +4 -1
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/dist/workflows.js +62 -14
- package/package.json +115 -111
|
@@ -1,157 +1,232 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __createBinding =
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
2
|
+
var __createBinding =
|
|
3
|
+
(this && this.__createBinding) ||
|
|
4
|
+
(Object.create
|
|
5
|
+
? function (o, m, k, k2) {
|
|
6
|
+
if (k2 === undefined) k2 = k;
|
|
7
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
8
|
+
if (
|
|
9
|
+
!desc ||
|
|
10
|
+
("get" in desc ? !m.__esModule : desc.writable || desc.configurable)
|
|
11
|
+
) {
|
|
12
|
+
desc = {
|
|
13
|
+
enumerable: true,
|
|
14
|
+
get: function () {
|
|
15
|
+
return m[k];
|
|
16
|
+
},
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
Object.defineProperty(o, k2, desc);
|
|
20
|
+
}
|
|
21
|
+
: function (o, m, k, k2) {
|
|
22
|
+
if (k2 === undefined) k2 = k;
|
|
23
|
+
o[k2] = m[k];
|
|
24
|
+
});
|
|
25
|
+
var __setModuleDefault =
|
|
26
|
+
(this && this.__setModuleDefault) ||
|
|
27
|
+
(Object.create
|
|
28
|
+
? function (o, v) {
|
|
29
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
30
|
+
}
|
|
31
|
+
: function (o, v) {
|
|
32
|
+
o["default"] = v;
|
|
33
|
+
});
|
|
34
|
+
var __importStar =
|
|
35
|
+
(this && this.__importStar) ||
|
|
36
|
+
(function () {
|
|
37
|
+
var ownKeys = function (o) {
|
|
38
|
+
ownKeys =
|
|
39
|
+
Object.getOwnPropertyNames ||
|
|
40
|
+
function (o) {
|
|
41
|
+
var ar = [];
|
|
42
|
+
for (var k in o)
|
|
43
|
+
if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
44
|
+
return ar;
|
|
45
|
+
};
|
|
46
|
+
return ownKeys(o);
|
|
47
|
+
};
|
|
48
|
+
return function (mod) {
|
|
49
|
+
if (mod && mod.__esModule) return mod;
|
|
50
|
+
var result = {};
|
|
51
|
+
if (mod != null)
|
|
52
|
+
for (var k = ownKeys(mod), i = 0; i < k.length; i++)
|
|
53
|
+
if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
54
|
+
__setModuleDefault(result, mod);
|
|
55
|
+
return result;
|
|
56
|
+
};
|
|
57
|
+
})();
|
|
35
58
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
59
|
const vitest_1 = require("vitest");
|
|
37
60
|
const client_1 = require("./client");
|
|
38
61
|
const errorsModule = __importStar(require("./errors"));
|
|
39
62
|
vitest_1.vi.mock("./cache", () => {
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
63
|
+
const cacheTracker = { invalidatedPatterns: [] };
|
|
64
|
+
const shouldCache = vitest_1.vi.fn().mockReturnValue(true);
|
|
65
|
+
const getTTL = vitest_1.vi.fn().mockReturnValue(1000);
|
|
66
|
+
const makeKey = (method, url, params) =>
|
|
67
|
+
`${method}:${url}:${JSON.stringify(params ?? null)}`;
|
|
68
|
+
return {
|
|
69
|
+
__esModule: true,
|
|
70
|
+
shouldCache,
|
|
71
|
+
getTTL,
|
|
72
|
+
cacheTracker,
|
|
73
|
+
RequestCache: class RequestCache {
|
|
74
|
+
constructor() {
|
|
75
|
+
this.store = new Map();
|
|
76
|
+
}
|
|
77
|
+
get(method, url, params) {
|
|
78
|
+
const key = makeKey(method, url, params);
|
|
79
|
+
return this.store.get(key) ?? null;
|
|
80
|
+
}
|
|
81
|
+
set(method, url, data, _ttl, params) {
|
|
82
|
+
const key = makeKey(method, url, params);
|
|
83
|
+
this.store.set(key, data);
|
|
84
|
+
}
|
|
85
|
+
invalidatePattern(pattern) {
|
|
86
|
+
cacheTracker.invalidatedPatterns.push(pattern);
|
|
87
|
+
}
|
|
88
|
+
invalidate(_method, _url, _params) {
|
|
89
|
+
// no-op for tests
|
|
90
|
+
}
|
|
91
|
+
clear() {
|
|
92
|
+
this.store.clear();
|
|
93
|
+
}
|
|
94
|
+
},
|
|
95
|
+
};
|
|
72
96
|
});
|
|
73
97
|
const cache_1 = require("./cache");
|
|
74
98
|
(0, vitest_1.describe)("AIEvalClient.request", () => {
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
99
|
+
(0, vitest_1.beforeEach)(() => {
|
|
100
|
+
process.env.EVALAI_API_KEY = "test";
|
|
101
|
+
cache_1.shouldCache.mockReset().mockReturnValue(true);
|
|
102
|
+
cache_1.getTTL.mockReset().mockReturnValue(1000);
|
|
103
|
+
cache_1.cacheTracker.invalidatedPatterns.length = 0;
|
|
104
|
+
});
|
|
105
|
+
(0, vitest_1.it)(
|
|
106
|
+
"caches GET responses and reuses data without re-fetching",
|
|
107
|
+
async () => {
|
|
108
|
+
const client = new client_1.AIEvalClient({
|
|
109
|
+
apiKey: "test",
|
|
110
|
+
baseUrl: "http://localhost",
|
|
111
|
+
timeout: 1000,
|
|
112
|
+
});
|
|
113
|
+
const payload = { items: [1, 2, 3] };
|
|
114
|
+
const fetchMock = vitest_1.vi.fn().mockResolvedValue({
|
|
115
|
+
ok: true,
|
|
116
|
+
status: 200,
|
|
117
|
+
json: async () => payload,
|
|
118
|
+
});
|
|
119
|
+
globalThis.fetch = fetchMock;
|
|
120
|
+
const first = await client.request("/api/traces", { method: "GET" });
|
|
121
|
+
const second = await client.request("/api/traces", { method: "GET" });
|
|
122
|
+
(0, vitest_1.expect)(first).toEqual(payload);
|
|
123
|
+
(0, vitest_1.expect)(second).toEqual(payload);
|
|
124
|
+
(0, vitest_1.expect)(fetchMock).toHaveBeenCalledTimes(1);
|
|
125
|
+
},
|
|
126
|
+
);
|
|
127
|
+
(0, vitest_1.it)("propagates non-ok responses as SDK errors", async () => {
|
|
128
|
+
const client = new client_1.AIEvalClient({
|
|
129
|
+
apiKey: "test",
|
|
130
|
+
baseUrl: "http://localhost",
|
|
131
|
+
});
|
|
132
|
+
const fetchMock = vitest_1.vi.fn().mockResolvedValue({
|
|
133
|
+
ok: false,
|
|
134
|
+
status: 429,
|
|
135
|
+
json: async () => ({ error: { code: "RATE_LIMIT_EXCEEDED" } }),
|
|
136
|
+
});
|
|
137
|
+
globalThis.fetch = fetchMock;
|
|
138
|
+
const createErrorSpy = vitest_1.vi
|
|
139
|
+
.spyOn(errorsModule, "createErrorFromResponse")
|
|
140
|
+
.mockReturnValue(
|
|
141
|
+
new errorsModule.EvalAIError(
|
|
142
|
+
"rate limited",
|
|
143
|
+
"RATE_LIMIT_EXCEEDED",
|
|
144
|
+
429,
|
|
145
|
+
),
|
|
146
|
+
);
|
|
147
|
+
await (0, vitest_1.expect)(
|
|
148
|
+
client.request("/api/fail", { method: "GET" }),
|
|
149
|
+
).rejects.toHaveProperty("code", "RATE_LIMIT_EXCEEDED");
|
|
150
|
+
createErrorSpy.mockRestore();
|
|
151
|
+
});
|
|
152
|
+
(0, vitest_1.it)(
|
|
153
|
+
"retries on retryable SDK errors and eventually succeeds",
|
|
154
|
+
async () => {
|
|
155
|
+
const client = new client_1.AIEvalClient({
|
|
156
|
+
apiKey: "test",
|
|
157
|
+
baseUrl: "http://localhost",
|
|
158
|
+
timeout: 1000,
|
|
159
|
+
});
|
|
160
|
+
vitest_1.vi.spyOn(client, "calculateBackoff").mockReturnValue(0);
|
|
161
|
+
const failureResponse = {
|
|
162
|
+
ok: false,
|
|
163
|
+
status: 429,
|
|
164
|
+
json: async () => ({ error: { code: "RATE_LIMIT_EXCEEDED" } }),
|
|
165
|
+
};
|
|
166
|
+
const successResponse = {
|
|
167
|
+
ok: true,
|
|
168
|
+
status: 200,
|
|
169
|
+
json: async () => ({ ok: true }),
|
|
170
|
+
};
|
|
171
|
+
const createErrorSpy = vitest_1.vi
|
|
172
|
+
.spyOn(errorsModule, "createErrorFromResponse")
|
|
173
|
+
.mockReturnValue(
|
|
174
|
+
new errorsModule.EvalAIError(
|
|
175
|
+
"rate limited",
|
|
176
|
+
"RATE_LIMIT_EXCEEDED",
|
|
177
|
+
429,
|
|
178
|
+
),
|
|
179
|
+
);
|
|
180
|
+
const fetchMock = vitest_1.vi
|
|
181
|
+
.fn()
|
|
182
|
+
.mockResolvedValueOnce(failureResponse)
|
|
183
|
+
.mockResolvedValueOnce(successResponse);
|
|
184
|
+
globalThis.fetch = fetchMock;
|
|
185
|
+
const result = await client.request("/api/retry", { method: "GET" });
|
|
186
|
+
(0, vitest_1.expect)(result).toEqual({ ok: true });
|
|
187
|
+
(0, vitest_1.expect)(fetchMock).toHaveBeenCalledTimes(2);
|
|
188
|
+
createErrorSpy.mockRestore();
|
|
189
|
+
},
|
|
190
|
+
);
|
|
191
|
+
(0, vitest_1.it)("throws a TIMEOUT SDK error when fetch aborts", async () => {
|
|
192
|
+
const client = new client_1.AIEvalClient({
|
|
193
|
+
apiKey: "test",
|
|
194
|
+
baseUrl: "http://localhost",
|
|
195
|
+
timeout: 1000,
|
|
196
|
+
});
|
|
197
|
+
const abortError = Object.assign(new Error("aborted"), {
|
|
198
|
+
name: "AbortError",
|
|
199
|
+
});
|
|
200
|
+
const fetchMock = vitest_1.vi.fn().mockRejectedValue(abortError);
|
|
201
|
+
globalThis.fetch = fetchMock;
|
|
202
|
+
await (0, vitest_1.expect)(
|
|
203
|
+
client.request("/api/timeout", { method: "GET" }),
|
|
204
|
+
).rejects.toMatchObject({
|
|
205
|
+
code: "TIMEOUT",
|
|
206
|
+
});
|
|
207
|
+
});
|
|
208
|
+
(0, vitest_1.it)(
|
|
209
|
+
"invalidates related cache entries for mutation requests",
|
|
210
|
+
async () => {
|
|
211
|
+
const client = new client_1.AIEvalClient({
|
|
212
|
+
apiKey: "test",
|
|
213
|
+
baseUrl: "http://localhost",
|
|
214
|
+
timeout: 1000,
|
|
215
|
+
});
|
|
216
|
+
cache_1.shouldCache.mockReturnValue(false);
|
|
217
|
+
const fetchMock = vitest_1.vi.fn().mockResolvedValue({
|
|
218
|
+
ok: true,
|
|
219
|
+
status: 201,
|
|
220
|
+
json: async () => ({ result: "ok" }),
|
|
221
|
+
});
|
|
222
|
+
globalThis.fetch = fetchMock;
|
|
223
|
+
await client.request("/api/evaluations", {
|
|
224
|
+
method: "POST",
|
|
225
|
+
body: JSON.stringify({}),
|
|
226
|
+
});
|
|
227
|
+
(0, vitest_1.expect)(cache_1.cacheTracker.invalidatedPatterns).toContain(
|
|
228
|
+
"evaluations",
|
|
229
|
+
);
|
|
230
|
+
},
|
|
231
|
+
);
|
|
157
232
|
});
|
package/dist/context.js
CHANGED
|
@@ -28,7 +28,9 @@ exports.withContext = withContext;
|
|
|
28
28
|
exports.withContextSync = withContextSync;
|
|
29
29
|
exports.WithContext = WithContext;
|
|
30
30
|
// Detect environment
|
|
31
|
-
const isNode = typeof process !== "undefined" &&
|
|
31
|
+
const isNode = typeof process !== "undefined" &&
|
|
32
|
+
process.versions?.node &&
|
|
33
|
+
typeof require !== "undefined";
|
|
32
34
|
// Browser fallback: simple context stack
|
|
33
35
|
class BrowserContextStorage {
|
|
34
36
|
constructor() {
|
package/dist/errors.js
CHANGED
|
@@ -162,8 +162,11 @@ class EvalAIError extends Error {
|
|
|
162
162
|
this.details = details;
|
|
163
163
|
// Initialize required properties from ERROR_DOCS
|
|
164
164
|
const doc = ERROR_DOCS[code];
|
|
165
|
-
this.documentation =
|
|
166
|
-
|
|
165
|
+
this.documentation =
|
|
166
|
+
doc?.documentation ?? `https://docs.ai-eval-platform.com/errors/${code}`;
|
|
167
|
+
this.solutions = doc?.solutions ?? [
|
|
168
|
+
"Check the error details for more information",
|
|
169
|
+
];
|
|
167
170
|
this.retryable = doc?.retryable ?? false;
|
|
168
171
|
// Extract retry-after for rate limits
|
|
169
172
|
const errorDetails = details;
|
|
@@ -174,7 +177,8 @@ class EvalAIError extends Error {
|
|
|
174
177
|
if (code === "FEATURE_LIMIT_REACHED" && errorDetails?.resetAt) {
|
|
175
178
|
this.resetAt = new Date(errorDetails.resetAt);
|
|
176
179
|
}
|
|
177
|
-
this.requestId =
|
|
180
|
+
this.requestId =
|
|
181
|
+
errorDetails?.error?.requestId ?? errorDetails?.requestId;
|
|
178
182
|
// Ensure proper prototype chain
|
|
179
183
|
Object.setPrototypeOf(this, EvalAIError.prototype);
|
|
180
184
|
}
|
|
@@ -236,7 +240,10 @@ function createErrorFromResponse(response, data) {
|
|
|
236
240
|
const message = typeof errorData?.error === "string"
|
|
237
241
|
? errorData.error
|
|
238
242
|
: (errObj?.message ?? errorData?.message ?? response.statusText);
|
|
239
|
-
const requestId = errObj?.requestId ??
|
|
243
|
+
const requestId = errObj?.requestId ??
|
|
244
|
+
errorData?.requestId ??
|
|
245
|
+
response.headers.get("x-request-id") ??
|
|
246
|
+
undefined;
|
|
240
247
|
// Map HTTP status to error codes when code not in response
|
|
241
248
|
if (!errObj?.code && !errorData?.code) {
|
|
242
249
|
if (status === 401)
|
package/dist/export.js
CHANGED
|
@@ -300,7 +300,9 @@ async function importFromLangSmith(client, langsmithData, options) {
|
|
|
300
300
|
traceId: run.id || `langsmith-${Date.now()}-${Math.random()}`,
|
|
301
301
|
organizationId: options.organizationId,
|
|
302
302
|
status: run.error ? "error" : "success",
|
|
303
|
-
durationMs: run.execution_time
|
|
303
|
+
durationMs: run.execution_time
|
|
304
|
+
? Math.round(run.execution_time * 1000)
|
|
305
|
+
: null,
|
|
304
306
|
metadata: {
|
|
305
307
|
source: "langsmith",
|
|
306
308
|
original_id: run.id,
|
package/dist/index.d.ts
CHANGED
|
@@ -13,13 +13,13 @@ NetworkError, };
|
|
|
13
13
|
export { containsAllRequiredFields, containsJSON, containsKeywords, containsLanguage, expect, followsInstructions, hasFactualAccuracy, hasLength, hasNoHallucinations, hasNoToxicity, hasReadabilityScore, hasSentiment, hasValidCodeSyntax, isValidEmail, isValidURL, matchesPattern, matchesSchema, notContainsPII, respondedWithinTime, similarTo, withinRange, } from "./assertions";
|
|
14
14
|
import { createContext, EvalContext, getCurrentContext, withContext } from "./context";
|
|
15
15
|
export { createContext, getCurrentContext as getContext, withContext, EvalContext as ContextManager, };
|
|
16
|
-
export {
|
|
17
|
-
export {
|
|
18
|
-
export { createEvalRuntime, getActiveRuntime, setActiveRuntime, disposeActiveRuntime, } from "./runtime/registry";
|
|
16
|
+
export { cloneContext, mergeContexts, validateContext, } from "./runtime/context";
|
|
17
|
+
export { createContext as createEvalContext, createResult, defineEval, defineSuite, evalai, } from "./runtime/eval";
|
|
19
18
|
export { createLocalExecutor, defaultLocalExecutor, } from "./runtime/executor";
|
|
20
|
-
export {
|
|
21
|
-
export type {
|
|
22
|
-
export { EvalRuntimeError,
|
|
19
|
+
export { createEvalRuntime, disposeActiveRuntime, getActiveRuntime, setActiveRuntime, } from "./runtime/registry";
|
|
20
|
+
export type { CloudExecutor, DefineEvalFunction, EvalContext, EvalExecutor, EvalExecutorInterface, EvalOptions, EvalResult, EvalRuntime, EvalSpec, ExecutorCapabilities, LocalExecutor, SpecConfig, SpecOptions, WorkerExecutor, } from "./runtime/types";
|
|
21
|
+
export { EvalRuntimeError, RuntimeError, SpecExecutionError, SpecRegistrationError, } from "./runtime/types";
|
|
22
|
+
export { createTestSuite, type TestCaseResult, TestSuite, TestSuiteCase, TestSuiteCaseResult, TestSuiteConfig, TestSuiteResult, } from "./testing";
|
|
23
23
|
import { compareWithSnapshot, snapshot } from "./snapshot";
|
|
24
24
|
export { snapshot, compareWithSnapshot, snapshot as saveSnapshot, compareWithSnapshot as compareSnapshots, };
|
|
25
25
|
import type { ExportFormat } from "./export";
|
|
@@ -36,8 +36,8 @@ export { Logger } from "./logger";
|
|
|
36
36
|
export { extendExpectWithToPassGate } from "./matchers";
|
|
37
37
|
export { autoPaginate, createPaginatedIterator, decodeCursor, encodeCursor, PaginatedIterator, type PaginatedResponse, type PaginationParams, } from "./pagination";
|
|
38
38
|
export { ARTIFACTS, type Baseline, type BaselineTolerance, GATE_CATEGORY, GATE_EXIT, type GateCategory, type GateExitCode, REPORT_SCHEMA_VERSION, type RegressionDelta, type RegressionReport, } from "./regression";
|
|
39
|
-
export { batchProcess, batchRead, RateLimiter, streamEvaluation } from "./streaming";
|
|
40
|
-
export type { Annotation, AnnotationItem, AnnotationTask, APIKey, APIKeyUsage, APIKeyWithSecret, BatchOptions, ClientConfig as AIEvalConfig, CreateAnnotationItemParams, CreateAnnotationParams, CreateAnnotationTaskParams, CreateAPIKeyParams, CreateLLMJudgeConfigParams, CreateWebhookParams, Evaluation as EvaluationData, ExportOptions, GenericMetadata as AnnotationData, GetLLMJudgeAlignmentParams, GetUsageParams, ImportOptions, ListAnnotationItemsParams, ListAnnotationsParams, ListAnnotationTasksParams, ListAPIKeysParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, ListWebhookDeliveriesParams, ListWebhooksParams, LLMJudgeAlignment, LLMJudgeConfig, LLMJudgeResult as LLMJudgeData, Organization, RetryConfig, SnapshotData, Span as SpanData, StreamOptions, TestCase, TestResult, Trace as TraceData, TracedResponse, UpdateAPIKeyParams, UpdateWebhookParams, UsageStats, UsageSummary, Webhook, WebhookDelivery, } from "./types";
|
|
39
|
+
export { batchProcess, batchRead, RateLimiter, streamEvaluation, } from "./streaming";
|
|
40
|
+
export type { Annotation, AnnotationItem, AnnotationTask, APIKey, APIKeyUsage, APIKeyWithSecret, BatchOptions, ClientConfig as AIEvalConfig, CreateAnnotationItemParams, CreateAnnotationParams, CreateAnnotationTaskParams, CreateAPIKeyParams, CreateLLMJudgeConfigParams, CreateWebhookParams, Evaluation as EvaluationData, EvaluationRun, EvaluationRunDetail, ExportOptions, GenericMetadata as AnnotationData, GetLLMJudgeAlignmentParams, GetUsageParams, ImportOptions, ListAnnotationItemsParams, ListAnnotationsParams, ListAnnotationTasksParams, ListAPIKeysParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, ListWebhookDeliveriesParams, ListWebhooksParams, LLMJudgeAlignment, LLMJudgeConfig, LLMJudgeEvaluateResult, LLMJudgeResult as LLMJudgeData, Organization, RetryConfig, SnapshotData, Span as SpanData, StreamOptions, TestCase, TestResult, Trace as TraceData, TraceDetail, TracedResponse, UpdateAPIKeyParams, UpdateWebhookParams, UsageStats, UsageSummary, Webhook, WebhookDelivery, } from "./types";
|
|
41
41
|
export { EvaluationTemplates, type EvaluationTemplateType, type FeatureUsage, type OrganizationLimits, } from "./types";
|
|
42
42
|
export { type AgentHandoff, type AgentSpanContext, type CostCategory, type CostRecord, createWorkflowTracer, type DecisionAlternative, type DecisionType, type HandoffType, type LLMProvider, type RecordCostParams, type RecordDecisionParams, traceAutoGen, traceCrewAI, traceLangChainAgent, traceWorkflowStep, type WorkflowContext, type WorkflowDefinition, type WorkflowEdge, type WorkflowNode, type WorkflowStatus, WorkflowTracer, type WorkflowTracerOptions, } from "./workflows";
|
|
43
43
|
import { AIEvalClient } from "./client";
|
package/dist/index.js
CHANGED
|
@@ -8,8 +8,8 @@
|
|
|
8
8
|
* @packageDocumentation
|
|
9
9
|
*/
|
|
10
10
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
11
|
-
exports.
|
|
12
|
-
exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginate = exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.RequestCache = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.compareSnapshots = exports.saveSnapshot = exports.compareWithSnapshot = exports.snapshot = exports.
|
|
11
|
+
exports.createTestSuite = exports.SpecRegistrationError = exports.SpecExecutionError = exports.RuntimeError = exports.EvalRuntimeError = exports.setActiveRuntime = exports.getActiveRuntime = exports.disposeActiveRuntime = exports.createEvalRuntime = exports.defaultLocalExecutor = exports.createLocalExecutor = exports.evalai = exports.defineSuite = exports.defineEval = exports.createResult = exports.createEvalContext = exports.validateContext = exports.mergeContexts = exports.cloneContext = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.withinRange = exports.similarTo = exports.respondedWithinTime = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntax = exports.hasSentiment = exports.hasReadabilityScore = exports.hasNoToxicity = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracy = exports.followsInstructions = exports.expect = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalAIError = exports.AIEvalClient = void 0;
|
|
12
|
+
exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginate = exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.RequestCache = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.compareSnapshots = exports.saveSnapshot = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = void 0;
|
|
13
13
|
// Main SDK exports
|
|
14
14
|
var client_1 = require("./client");
|
|
15
15
|
Object.defineProperty(exports, "AIEvalClient", { enumerable: true, get: function () { return client_1.AIEvalClient; } });
|
|
@@ -49,35 +49,35 @@ Object.defineProperty(exports, "createContext", { enumerable: true, get: functio
|
|
|
49
49
|
Object.defineProperty(exports, "ContextManager", { enumerable: true, get: function () { return context_1.EvalContext; } });
|
|
50
50
|
Object.defineProperty(exports, "getContext", { enumerable: true, get: function () { return context_1.getCurrentContext; } });
|
|
51
51
|
Object.defineProperty(exports, "withContext", { enumerable: true, get: function () { return context_1.withContext; } });
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
Object.defineProperty(exports, "
|
|
55
|
-
Object.defineProperty(exports, "
|
|
52
|
+
var context_2 = require("./runtime/context");
|
|
53
|
+
Object.defineProperty(exports, "cloneContext", { enumerable: true, get: function () { return context_2.cloneContext; } });
|
|
54
|
+
Object.defineProperty(exports, "mergeContexts", { enumerable: true, get: function () { return context_2.mergeContexts; } });
|
|
55
|
+
Object.defineProperty(exports, "validateContext", { enumerable: true, get: function () { return context_2.validateContext; } });
|
|
56
56
|
// LAYER 1: Runtime Foundation - NEW PROGRAMMING MODEL
|
|
57
57
|
var eval_1 = require("./runtime/eval");
|
|
58
|
-
Object.defineProperty(exports, "defineEval", { enumerable: true, get: function () { return eval_1.defineEval; } });
|
|
59
|
-
Object.defineProperty(exports, "evalai", { enumerable: true, get: function () { return eval_1.evalai; } });
|
|
60
|
-
Object.defineProperty(exports, "defineSuite", { enumerable: true, get: function () { return eval_1.defineSuite; } });
|
|
61
58
|
Object.defineProperty(exports, "createEvalContext", { enumerable: true, get: function () { return eval_1.createContext; } });
|
|
62
59
|
Object.defineProperty(exports, "createResult", { enumerable: true, get: function () { return eval_1.createResult; } });
|
|
60
|
+
Object.defineProperty(exports, "defineEval", { enumerable: true, get: function () { return eval_1.defineEval; } });
|
|
61
|
+
Object.defineProperty(exports, "defineSuite", { enumerable: true, get: function () { return eval_1.defineSuite; } });
|
|
62
|
+
Object.defineProperty(exports, "evalai", { enumerable: true, get: function () { return eval_1.evalai; } });
|
|
63
|
+
var executor_1 = require("./runtime/executor");
|
|
64
|
+
Object.defineProperty(exports, "createLocalExecutor", { enumerable: true, get: function () { return executor_1.createLocalExecutor; } });
|
|
65
|
+
Object.defineProperty(exports, "defaultLocalExecutor", { enumerable: true, get: function () { return executor_1.defaultLocalExecutor; } });
|
|
63
66
|
var registry_1 = require("./runtime/registry");
|
|
64
67
|
Object.defineProperty(exports, "createEvalRuntime", { enumerable: true, get: function () { return registry_1.createEvalRuntime; } });
|
|
68
|
+
Object.defineProperty(exports, "disposeActiveRuntime", { enumerable: true, get: function () { return registry_1.disposeActiveRuntime; } });
|
|
65
69
|
Object.defineProperty(exports, "getActiveRuntime", { enumerable: true, get: function () { return registry_1.getActiveRuntime; } });
|
|
66
70
|
Object.defineProperty(exports, "setActiveRuntime", { enumerable: true, get: function () { return registry_1.setActiveRuntime; } });
|
|
67
|
-
Object.defineProperty(exports, "disposeActiveRuntime", { enumerable: true, get: function () { return registry_1.disposeActiveRuntime; } });
|
|
68
|
-
var executor_1 = require("./runtime/executor");
|
|
69
|
-
Object.defineProperty(exports, "createLocalExecutor", { enumerable: true, get: function () { return executor_1.createLocalExecutor; } });
|
|
70
|
-
Object.defineProperty(exports, "defaultLocalExecutor", { enumerable: true, get: function () { return executor_1.defaultLocalExecutor; } });
|
|
71
|
-
var context_2 = require("./runtime/context");
|
|
72
|
-
Object.defineProperty(exports, "mergeContexts", { enumerable: true, get: function () { return context_2.mergeContexts; } });
|
|
73
|
-
Object.defineProperty(exports, "cloneContext", { enumerable: true, get: function () { return context_2.cloneContext; } });
|
|
74
|
-
Object.defineProperty(exports, "validateContext", { enumerable: true, get: function () { return context_2.validateContext; } });
|
|
75
71
|
// Runtime errors
|
|
76
72
|
var types_1 = require("./runtime/types");
|
|
77
73
|
Object.defineProperty(exports, "EvalRuntimeError", { enumerable: true, get: function () { return types_1.EvalRuntimeError; } });
|
|
78
|
-
Object.defineProperty(exports, "SpecRegistrationError", { enumerable: true, get: function () { return types_1.SpecRegistrationError; } });
|
|
79
|
-
Object.defineProperty(exports, "SpecExecutionError", { enumerable: true, get: function () { return types_1.SpecExecutionError; } });
|
|
80
74
|
Object.defineProperty(exports, "RuntimeError", { enumerable: true, get: function () { return types_1.RuntimeError; } });
|
|
75
|
+
Object.defineProperty(exports, "SpecExecutionError", { enumerable: true, get: function () { return types_1.SpecExecutionError; } });
|
|
76
|
+
Object.defineProperty(exports, "SpecRegistrationError", { enumerable: true, get: function () { return types_1.SpecRegistrationError; } });
|
|
77
|
+
// Test suite builder (Tier 2.7) - BACKWARD COMPATIBILITY LAYER
|
|
78
|
+
var testing_1 = require("./testing");
|
|
79
|
+
Object.defineProperty(exports, "createTestSuite", { enumerable: true, get: function () { return testing_1.createTestSuite; } });
|
|
80
|
+
Object.defineProperty(exports, "TestSuite", { enumerable: true, get: function () { return testing_1.TestSuite; } });
|
|
81
81
|
// Snapshot testing (Tier 2.8)
|
|
82
82
|
const snapshot_1 = require("./snapshot");
|
|
83
83
|
Object.defineProperty(exports, "compareWithSnapshot", { enumerable: true, get: function () { return snapshot_1.compareWithSnapshot; } });
|
|
@@ -19,6 +19,24 @@
|
|
|
19
19
|
* ```
|
|
20
20
|
*/
|
|
21
21
|
import type { AIEvalClient } from "../client";
|
|
22
|
+
interface AnthropicMessageParams {
|
|
23
|
+
model: string;
|
|
24
|
+
messages: unknown[];
|
|
25
|
+
temperature?: number;
|
|
26
|
+
max_tokens?: number;
|
|
27
|
+
[key: string]: unknown;
|
|
28
|
+
}
|
|
29
|
+
interface AnthropicMessage {
|
|
30
|
+
content: unknown;
|
|
31
|
+
usage?: unknown;
|
|
32
|
+
stop_reason?: unknown;
|
|
33
|
+
[key: string]: unknown;
|
|
34
|
+
}
|
|
35
|
+
interface AnthropicClient {
|
|
36
|
+
messages: {
|
|
37
|
+
create: (params: AnthropicMessageParams, requestOptions?: Record<string, unknown>) => Promise<AnthropicMessage>;
|
|
38
|
+
};
|
|
39
|
+
}
|
|
22
40
|
export interface AnthropicTraceOptions {
|
|
23
41
|
/** Whether to capture input (default: true) */
|
|
24
42
|
captureInput?: boolean;
|
|
@@ -50,7 +68,7 @@ export interface AnthropicTraceOptions {
|
|
|
50
68
|
* });
|
|
51
69
|
* ```
|
|
52
70
|
*/
|
|
53
|
-
export declare function traceAnthropic(anthropic:
|
|
71
|
+
export declare function traceAnthropic(anthropic: AnthropicClient, evalClient: AIEvalClient, options?: AnthropicTraceOptions): AnthropicClient;
|
|
54
72
|
/**
|
|
55
73
|
* Manual trace wrapper for Anthropic calls
|
|
56
74
|
*
|
|
@@ -70,3 +88,4 @@ export declare function traceAnthropic(anthropic: any, evalClient: AIEvalClient,
|
|
|
70
88
|
* ```
|
|
71
89
|
*/
|
|
72
90
|
export declare function traceAnthropicCall<T>(evalClient: AIEvalClient, name: string, fn: () => Promise<T>, options?: AnthropicTraceOptions): Promise<T>;
|
|
91
|
+
export {};
|
|
@@ -87,7 +87,7 @@ async function openAIChatEval(options) {
|
|
|
87
87
|
? [...c.assertions]
|
|
88
88
|
: c.expectedOutput
|
|
89
89
|
? [
|
|
90
|
-
(output) => (0, assertions_1.expect)(output).toContainKeywords(c.expectedOutput
|
|
90
|
+
(output) => (0, assertions_1.expect)(output).toContainKeywords(c.expectedOutput?.split(/\s+/).filter(Boolean) || []),
|
|
91
91
|
]
|
|
92
92
|
: undefined;
|
|
93
93
|
return {
|
|
@@ -116,7 +116,9 @@ async function openAIChatEval(options) {
|
|
|
116
116
|
printSummary(evalResult);
|
|
117
117
|
// v1.5: Optional report to EvalAI platform
|
|
118
118
|
if (options.reportToEvalAI) {
|
|
119
|
-
const config = typeof process !== "undefined" && process.cwd
|
|
119
|
+
const config = typeof process !== "undefined" && process.cwd
|
|
120
|
+
? (0, config_1.loadConfig)(process.cwd())
|
|
121
|
+
: null;
|
|
120
122
|
const evalId = options.evaluationId || config?.evaluationId;
|
|
121
123
|
if (!evalId || String(evalId).trim() === "") {
|
|
122
124
|
console.log("Run evalai init and set evaluationId to upload results.");
|