@pauly4010/evalai-sdk 1.9.0 → 1.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +136 -23
  2. package/dist/assertions.js +51 -18
  3. package/dist/batch.js +8 -2
  4. package/dist/cli/api.js +3 -1
  5. package/dist/cli/check.js +19 -6
  6. package/dist/cli/ci-context.js +3 -1
  7. package/dist/cli/config.js +28 -8
  8. package/dist/cli/diff.js +14 -9
  9. package/dist/cli/discover.js +18 -7
  10. package/dist/cli/doctor.js +43 -9
  11. package/dist/cli/explain.js +37 -11
  12. package/dist/cli/formatters/human.js +4 -1
  13. package/dist/cli/formatters/pr-comment.js +3 -1
  14. package/dist/cli/gate.js +6 -2
  15. package/dist/cli/impact-analysis.js +6 -5
  16. package/dist/cli/index.js +18 -6
  17. package/dist/cli/manifest.d.ts +3 -5
  18. package/dist/cli/manifest.js +21 -14
  19. package/dist/cli/migrate.js +4 -4
  20. package/dist/cli/policy-packs.js +8 -2
  21. package/dist/cli/print-config.js +19 -4
  22. package/dist/cli/regression-gate.js +8 -2
  23. package/dist/cli/report/build-check-report.js +8 -2
  24. package/dist/cli/run.js +11 -5
  25. package/dist/cli/share.js +3 -1
  26. package/dist/cli/upgrade.js +2 -1
  27. package/dist/client.d.ts +16 -19
  28. package/dist/client.js +60 -43
  29. package/dist/client.request.test.d.ts +1 -1
  30. package/dist/client.request.test.js +222 -147
  31. package/dist/context.js +3 -1
  32. package/dist/errors.js +11 -4
  33. package/dist/export.js +3 -1
  34. package/dist/index.d.ts +8 -8
  35. package/dist/index.js +19 -19
  36. package/dist/integrations/anthropic.d.ts +20 -1
  37. package/dist/integrations/openai-eval.js +4 -2
  38. package/dist/integrations/openai.d.ts +24 -1
  39. package/dist/local.js +3 -1
  40. package/dist/logger.js +6 -2
  41. package/dist/pagination.js +6 -2
  42. package/dist/runtime/adapters/config-to-dsl.js +12 -9
  43. package/dist/runtime/adapters/testsuite-to-dsl.d.ts +1 -1
  44. package/dist/runtime/adapters/testsuite-to-dsl.js +11 -6
  45. package/dist/runtime/eval.d.ts +1 -1
  46. package/dist/runtime/eval.js +12 -5
  47. package/dist/runtime/execution-mode.js +13 -9
  48. package/dist/runtime/registry.js +8 -21
  49. package/dist/runtime/run-report.d.ts +0 -2
  50. package/dist/runtime/run-report.js +12 -10
  51. package/dist/testing.js +7 -2
  52. package/dist/types.d.ts +100 -69
  53. package/dist/utils/input-hash.js +4 -1
  54. package/dist/version.d.ts +1 -1
  55. package/dist/version.js +1 -1
  56. package/dist/workflows.js +62 -14
  57. package/package.json +115 -111
@@ -1,157 +1,232 @@
1
1
  "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
2
+ var __createBinding =
3
+ (this && this.__createBinding) ||
4
+ (Object.create
5
+ ? function (o, m, k, k2) {
6
+ if (k2 === undefined) k2 = k;
7
+ var desc = Object.getOwnPropertyDescriptor(m, k);
8
+ if (
9
+ !desc ||
10
+ ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)
11
+ ) {
12
+ desc = {
13
+ enumerable: true,
14
+ get: function () {
15
+ return m[k];
16
+ },
17
+ };
18
+ }
19
+ Object.defineProperty(o, k2, desc);
20
+ }
21
+ : function (o, m, k, k2) {
22
+ if (k2 === undefined) k2 = k;
23
+ o[k2] = m[k];
24
+ });
25
+ var __setModuleDefault =
26
+ (this && this.__setModuleDefault) ||
27
+ (Object.create
28
+ ? function (o, v) {
29
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
30
+ }
31
+ : function (o, v) {
32
+ o["default"] = v;
33
+ });
34
+ var __importStar =
35
+ (this && this.__importStar) ||
36
+ (function () {
37
+ var ownKeys = function (o) {
38
+ ownKeys =
39
+ Object.getOwnPropertyNames ||
40
+ function (o) {
41
+ var ar = [];
42
+ for (var k in o)
43
+ if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
44
+ return ar;
45
+ };
46
+ return ownKeys(o);
47
+ };
48
+ return function (mod) {
49
+ if (mod && mod.__esModule) return mod;
50
+ var result = {};
51
+ if (mod != null)
52
+ for (var k = ownKeys(mod), i = 0; i < k.length; i++)
53
+ if (k[i] !== "default") __createBinding(result, mod, k[i]);
54
+ __setModuleDefault(result, mod);
55
+ return result;
56
+ };
57
+ })();
35
58
  Object.defineProperty(exports, "__esModule", { value: true });
36
59
  const vitest_1 = require("vitest");
37
60
  const client_1 = require("./client");
38
61
  const errorsModule = __importStar(require("./errors"));
39
62
  vitest_1.vi.mock("./cache", () => {
40
- const cacheTracker = { invalidatedPatterns: [] };
41
- const shouldCache = vitest_1.vi.fn().mockReturnValue(true);
42
- const getTTL = vitest_1.vi.fn().mockReturnValue(1000);
43
- const makeKey = (method, url, params) => `${method}:${url}:${JSON.stringify(params ?? null)}`;
44
- return {
45
- __esModule: true,
46
- shouldCache,
47
- getTTL,
48
- cacheTracker,
49
- RequestCache: class RequestCache {
50
- constructor() {
51
- this.store = new Map();
52
- }
53
- get(method, url, params) {
54
- const key = makeKey(method, url, params);
55
- return this.store.get(key) ?? null;
56
- }
57
- set(method, url, data, _ttl, params) {
58
- const key = makeKey(method, url, params);
59
- this.store.set(key, data);
60
- }
61
- invalidatePattern(pattern) {
62
- cacheTracker.invalidatedPatterns.push(pattern);
63
- }
64
- invalidate(_method, _url, _params) {
65
- // no-op for tests
66
- }
67
- clear() {
68
- this.store.clear();
69
- }
70
- },
71
- };
63
+ const cacheTracker = { invalidatedPatterns: [] };
64
+ const shouldCache = vitest_1.vi.fn().mockReturnValue(true);
65
+ const getTTL = vitest_1.vi.fn().mockReturnValue(1000);
66
+ const makeKey = (method, url, params) =>
67
+ `${method}:${url}:${JSON.stringify(params ?? null)}`;
68
+ return {
69
+ __esModule: true,
70
+ shouldCache,
71
+ getTTL,
72
+ cacheTracker,
73
+ RequestCache: class RequestCache {
74
+ constructor() {
75
+ this.store = new Map();
76
+ }
77
+ get(method, url, params) {
78
+ const key = makeKey(method, url, params);
79
+ return this.store.get(key) ?? null;
80
+ }
81
+ set(method, url, data, _ttl, params) {
82
+ const key = makeKey(method, url, params);
83
+ this.store.set(key, data);
84
+ }
85
+ invalidatePattern(pattern) {
86
+ cacheTracker.invalidatedPatterns.push(pattern);
87
+ }
88
+ invalidate(_method, _url, _params) {
89
+ // no-op for tests
90
+ }
91
+ clear() {
92
+ this.store.clear();
93
+ }
94
+ },
95
+ };
72
96
  });
73
97
  const cache_1 = require("./cache");
74
98
  (0, vitest_1.describe)("AIEvalClient.request", () => {
75
- (0, vitest_1.beforeEach)(() => {
76
- process.env.EVALAI_API_KEY = "test";
77
- cache_1.shouldCache.mockReset().mockReturnValue(true);
78
- cache_1.getTTL.mockReset().mockReturnValue(1000);
79
- cache_1.cacheTracker.invalidatedPatterns.length = 0;
80
- });
81
- (0, vitest_1.it)("caches GET responses and reuses data without re-fetching", async () => {
82
- const client = new client_1.AIEvalClient({ apiKey: "test", baseUrl: "http://localhost", timeout: 1000 });
83
- const payload = { items: [1, 2, 3] };
84
- const fetchMock = vitest_1.vi.fn().mockResolvedValue({
85
- ok: true,
86
- status: 200,
87
- json: async () => payload,
88
- });
89
- globalThis.fetch = fetchMock;
90
- const first = await client.request("/api/traces", { method: "GET" });
91
- const second = await client.request("/api/traces", { method: "GET" });
92
- (0, vitest_1.expect)(first).toEqual(payload);
93
- (0, vitest_1.expect)(second).toEqual(payload);
94
- (0, vitest_1.expect)(fetchMock).toHaveBeenCalledTimes(1);
95
- });
96
- (0, vitest_1.it)("propagates non-ok responses as SDK errors", async () => {
97
- const client = new client_1.AIEvalClient({ apiKey: "test", baseUrl: "http://localhost" });
98
- const fetchMock = vitest_1.vi.fn().mockResolvedValue({
99
- ok: false,
100
- status: 429,
101
- json: async () => ({ error: { code: "RATE_LIMIT_EXCEEDED" } }),
102
- });
103
- globalThis.fetch = fetchMock;
104
- const createErrorSpy = vitest_1.vi
105
- .spyOn(errorsModule, "createErrorFromResponse")
106
- .mockReturnValue(new errorsModule.EvalAIError("rate limited", "RATE_LIMIT_EXCEEDED", 429));
107
- await (0, vitest_1.expect)(client.request("/api/fail", { method: "GET" })).rejects.toHaveProperty("code", "RATE_LIMIT_EXCEEDED");
108
- createErrorSpy.mockRestore();
109
- });
110
- (0, vitest_1.it)("retries on retryable SDK errors and eventually succeeds", async () => {
111
- const client = new client_1.AIEvalClient({ apiKey: "test", baseUrl: "http://localhost", timeout: 1000 });
112
- vitest_1.vi.spyOn(client, "calculateBackoff").mockReturnValue(0);
113
- const failureResponse = {
114
- ok: false,
115
- status: 429,
116
- json: async () => ({ error: { code: "RATE_LIMIT_EXCEEDED" } }),
117
- };
118
- const successResponse = {
119
- ok: true,
120
- status: 200,
121
- json: async () => ({ ok: true }),
122
- };
123
- const createErrorSpy = vitest_1.vi
124
- .spyOn(errorsModule, "createErrorFromResponse")
125
- .mockReturnValue(new errorsModule.EvalAIError("rate limited", "RATE_LIMIT_EXCEEDED", 429));
126
- const fetchMock = vitest_1.vi
127
- .fn()
128
- .mockResolvedValueOnce(failureResponse)
129
- .mockResolvedValueOnce(successResponse);
130
- globalThis.fetch = fetchMock;
131
- const result = await client.request("/api/retry", { method: "GET" });
132
- (0, vitest_1.expect)(result).toEqual({ ok: true });
133
- (0, vitest_1.expect)(fetchMock).toHaveBeenCalledTimes(2);
134
- createErrorSpy.mockRestore();
135
- });
136
- (0, vitest_1.it)("throws a TIMEOUT SDK error when fetch aborts", async () => {
137
- const client = new client_1.AIEvalClient({ apiKey: "test", baseUrl: "http://localhost", timeout: 1000 });
138
- const abortError = Object.assign(new Error("aborted"), { name: "AbortError" });
139
- const fetchMock = vitest_1.vi.fn().mockRejectedValue(abortError);
140
- globalThis.fetch = fetchMock;
141
- await (0, vitest_1.expect)(client.request("/api/timeout", { method: "GET" })).rejects.toMatchObject({
142
- code: "TIMEOUT",
143
- });
144
- });
145
- (0, vitest_1.it)("invalidates related cache entries for mutation requests", async () => {
146
- const client = new client_1.AIEvalClient({ apiKey: "test", baseUrl: "http://localhost", timeout: 1000 });
147
- cache_1.shouldCache.mockReturnValue(false);
148
- const fetchMock = vitest_1.vi.fn().mockResolvedValue({
149
- ok: true,
150
- status: 201,
151
- json: async () => ({ result: "ok" }),
152
- });
153
- globalThis.fetch = fetchMock;
154
- await client.request("/api/evaluations", { method: "POST", body: JSON.stringify({}) });
155
- (0, vitest_1.expect)(cache_1.cacheTracker.invalidatedPatterns).toContain("evaluations");
156
- });
99
+ (0, vitest_1.beforeEach)(() => {
100
+ process.env.EVALAI_API_KEY = "test";
101
+ cache_1.shouldCache.mockReset().mockReturnValue(true);
102
+ cache_1.getTTL.mockReset().mockReturnValue(1000);
103
+ cache_1.cacheTracker.invalidatedPatterns.length = 0;
104
+ });
105
+ (0, vitest_1.it)(
106
+ "caches GET responses and reuses data without re-fetching",
107
+ async () => {
108
+ const client = new client_1.AIEvalClient({
109
+ apiKey: "test",
110
+ baseUrl: "http://localhost",
111
+ timeout: 1000,
112
+ });
113
+ const payload = { items: [1, 2, 3] };
114
+ const fetchMock = vitest_1.vi.fn().mockResolvedValue({
115
+ ok: true,
116
+ status: 200,
117
+ json: async () => payload,
118
+ });
119
+ globalThis.fetch = fetchMock;
120
+ const first = await client.request("/api/traces", { method: "GET" });
121
+ const second = await client.request("/api/traces", { method: "GET" });
122
+ (0, vitest_1.expect)(first).toEqual(payload);
123
+ (0, vitest_1.expect)(second).toEqual(payload);
124
+ (0, vitest_1.expect)(fetchMock).toHaveBeenCalledTimes(1);
125
+ },
126
+ );
127
+ (0, vitest_1.it)("propagates non-ok responses as SDK errors", async () => {
128
+ const client = new client_1.AIEvalClient({
129
+ apiKey: "test",
130
+ baseUrl: "http://localhost",
131
+ });
132
+ const fetchMock = vitest_1.vi.fn().mockResolvedValue({
133
+ ok: false,
134
+ status: 429,
135
+ json: async () => ({ error: { code: "RATE_LIMIT_EXCEEDED" } }),
136
+ });
137
+ globalThis.fetch = fetchMock;
138
+ const createErrorSpy = vitest_1.vi
139
+ .spyOn(errorsModule, "createErrorFromResponse")
140
+ .mockReturnValue(
141
+ new errorsModule.EvalAIError(
142
+ "rate limited",
143
+ "RATE_LIMIT_EXCEEDED",
144
+ 429,
145
+ ),
146
+ );
147
+ await (0, vitest_1.expect)(
148
+ client.request("/api/fail", { method: "GET" }),
149
+ ).rejects.toHaveProperty("code", "RATE_LIMIT_EXCEEDED");
150
+ createErrorSpy.mockRestore();
151
+ });
152
+ (0, vitest_1.it)(
153
+ "retries on retryable SDK errors and eventually succeeds",
154
+ async () => {
155
+ const client = new client_1.AIEvalClient({
156
+ apiKey: "test",
157
+ baseUrl: "http://localhost",
158
+ timeout: 1000,
159
+ });
160
+ vitest_1.vi.spyOn(client, "calculateBackoff").mockReturnValue(0);
161
+ const failureResponse = {
162
+ ok: false,
163
+ status: 429,
164
+ json: async () => ({ error: { code: "RATE_LIMIT_EXCEEDED" } }),
165
+ };
166
+ const successResponse = {
167
+ ok: true,
168
+ status: 200,
169
+ json: async () => ({ ok: true }),
170
+ };
171
+ const createErrorSpy = vitest_1.vi
172
+ .spyOn(errorsModule, "createErrorFromResponse")
173
+ .mockReturnValue(
174
+ new errorsModule.EvalAIError(
175
+ "rate limited",
176
+ "RATE_LIMIT_EXCEEDED",
177
+ 429,
178
+ ),
179
+ );
180
+ const fetchMock = vitest_1.vi
181
+ .fn()
182
+ .mockResolvedValueOnce(failureResponse)
183
+ .mockResolvedValueOnce(successResponse);
184
+ globalThis.fetch = fetchMock;
185
+ const result = await client.request("/api/retry", { method: "GET" });
186
+ (0, vitest_1.expect)(result).toEqual({ ok: true });
187
+ (0, vitest_1.expect)(fetchMock).toHaveBeenCalledTimes(2);
188
+ createErrorSpy.mockRestore();
189
+ },
190
+ );
191
+ (0, vitest_1.it)("throws a TIMEOUT SDK error when fetch aborts", async () => {
192
+ const client = new client_1.AIEvalClient({
193
+ apiKey: "test",
194
+ baseUrl: "http://localhost",
195
+ timeout: 1000,
196
+ });
197
+ const abortError = Object.assign(new Error("aborted"), {
198
+ name: "AbortError",
199
+ });
200
+ const fetchMock = vitest_1.vi.fn().mockRejectedValue(abortError);
201
+ globalThis.fetch = fetchMock;
202
+ await (0, vitest_1.expect)(
203
+ client.request("/api/timeout", { method: "GET" }),
204
+ ).rejects.toMatchObject({
205
+ code: "TIMEOUT",
206
+ });
207
+ });
208
+ (0, vitest_1.it)(
209
+ "invalidates related cache entries for mutation requests",
210
+ async () => {
211
+ const client = new client_1.AIEvalClient({
212
+ apiKey: "test",
213
+ baseUrl: "http://localhost",
214
+ timeout: 1000,
215
+ });
216
+ cache_1.shouldCache.mockReturnValue(false);
217
+ const fetchMock = vitest_1.vi.fn().mockResolvedValue({
218
+ ok: true,
219
+ status: 201,
220
+ json: async () => ({ result: "ok" }),
221
+ });
222
+ globalThis.fetch = fetchMock;
223
+ await client.request("/api/evaluations", {
224
+ method: "POST",
225
+ body: JSON.stringify({}),
226
+ });
227
+ (0, vitest_1.expect)(cache_1.cacheTracker.invalidatedPatterns).toContain(
228
+ "evaluations",
229
+ );
230
+ },
231
+ );
157
232
  });
package/dist/context.js CHANGED
@@ -28,7 +28,9 @@ exports.withContext = withContext;
28
28
  exports.withContextSync = withContextSync;
29
29
  exports.WithContext = WithContext;
30
30
  // Detect environment
31
- const isNode = typeof process !== "undefined" && process.versions?.node && typeof require !== "undefined";
31
+ const isNode = typeof process !== "undefined" &&
32
+ process.versions?.node &&
33
+ typeof require !== "undefined";
32
34
  // Browser fallback: simple context stack
33
35
  class BrowserContextStorage {
34
36
  constructor() {
package/dist/errors.js CHANGED
@@ -162,8 +162,11 @@ class EvalAIError extends Error {
162
162
  this.details = details;
163
163
  // Initialize required properties from ERROR_DOCS
164
164
  const doc = ERROR_DOCS[code];
165
- this.documentation = doc?.documentation ?? `https://docs.ai-eval-platform.com/errors/${code}`;
166
- this.solutions = doc?.solutions ?? ["Check the error details for more information"];
165
+ this.documentation =
166
+ doc?.documentation ?? `https://docs.ai-eval-platform.com/errors/${code}`;
167
+ this.solutions = doc?.solutions ?? [
168
+ "Check the error details for more information",
169
+ ];
167
170
  this.retryable = doc?.retryable ?? false;
168
171
  // Extract retry-after for rate limits
169
172
  const errorDetails = details;
@@ -174,7 +177,8 @@ class EvalAIError extends Error {
174
177
  if (code === "FEATURE_LIMIT_REACHED" && errorDetails?.resetAt) {
175
178
  this.resetAt = new Date(errorDetails.resetAt);
176
179
  }
177
- this.requestId = errorDetails?.error?.requestId ?? errorDetails?.requestId;
180
+ this.requestId =
181
+ errorDetails?.error?.requestId ?? errorDetails?.requestId;
178
182
  // Ensure proper prototype chain
179
183
  Object.setPrototypeOf(this, EvalAIError.prototype);
180
184
  }
@@ -236,7 +240,10 @@ function createErrorFromResponse(response, data) {
236
240
  const message = typeof errorData?.error === "string"
237
241
  ? errorData.error
238
242
  : (errObj?.message ?? errorData?.message ?? response.statusText);
239
- const requestId = errObj?.requestId ?? errorData?.requestId ?? response.headers.get("x-request-id") ?? undefined;
243
+ const requestId = errObj?.requestId ??
244
+ errorData?.requestId ??
245
+ response.headers.get("x-request-id") ??
246
+ undefined;
240
247
  // Map HTTP status to error codes when code not in response
241
248
  if (!errObj?.code && !errorData?.code) {
242
249
  if (status === 401)
package/dist/export.js CHANGED
@@ -300,7 +300,9 @@ async function importFromLangSmith(client, langsmithData, options) {
300
300
  traceId: run.id || `langsmith-${Date.now()}-${Math.random()}`,
301
301
  organizationId: options.organizationId,
302
302
  status: run.error ? "error" : "success",
303
- durationMs: run.execution_time ? Math.round(run.execution_time * 1000) : null,
303
+ durationMs: run.execution_time
304
+ ? Math.round(run.execution_time * 1000)
305
+ : null,
304
306
  metadata: {
305
307
  source: "langsmith",
306
308
  original_id: run.id,
package/dist/index.d.ts CHANGED
@@ -13,13 +13,13 @@ NetworkError, };
13
13
  export { containsAllRequiredFields, containsJSON, containsKeywords, containsLanguage, expect, followsInstructions, hasFactualAccuracy, hasLength, hasNoHallucinations, hasNoToxicity, hasReadabilityScore, hasSentiment, hasValidCodeSyntax, isValidEmail, isValidURL, matchesPattern, matchesSchema, notContainsPII, respondedWithinTime, similarTo, withinRange, } from "./assertions";
14
14
  import { createContext, EvalContext, getCurrentContext, withContext } from "./context";
15
15
  export { createContext, getCurrentContext as getContext, withContext, EvalContext as ContextManager, };
16
- export { createTestSuite, type TestCaseResult, TestSuite, TestSuiteCase, TestSuiteCaseResult, TestSuiteConfig, TestSuiteResult, } from "./testing";
17
- export { defineEval, evalai, defineSuite, createContext as createEvalContext, createResult, } from "./runtime/eval";
18
- export { createEvalRuntime, getActiveRuntime, setActiveRuntime, disposeActiveRuntime, } from "./runtime/registry";
16
+ export { cloneContext, mergeContexts, validateContext, } from "./runtime/context";
17
+ export { createContext as createEvalContext, createResult, defineEval, defineSuite, evalai, } from "./runtime/eval";
19
18
  export { createLocalExecutor, defaultLocalExecutor, } from "./runtime/executor";
20
- export { mergeContexts, cloneContext, validateContext, } from "./runtime/context";
21
- export type { EvalSpec, EvalContext, EvalResult, EvalOptions, EvalRuntime, EvalExecutor, EvalExecutorInterface, LocalExecutor, CloudExecutor, WorkerExecutor, SpecConfig, SpecOptions, DefineEvalFunction, ExecutorCapabilities, } from "./runtime/types";
22
- export { EvalRuntimeError, SpecRegistrationError, SpecExecutionError, RuntimeError, } from "./runtime/types";
19
+ export { createEvalRuntime, disposeActiveRuntime, getActiveRuntime, setActiveRuntime, } from "./runtime/registry";
20
+ export type { CloudExecutor, DefineEvalFunction, EvalContext, EvalExecutor, EvalExecutorInterface, EvalOptions, EvalResult, EvalRuntime, EvalSpec, ExecutorCapabilities, LocalExecutor, SpecConfig, SpecOptions, WorkerExecutor, } from "./runtime/types";
21
+ export { EvalRuntimeError, RuntimeError, SpecExecutionError, SpecRegistrationError, } from "./runtime/types";
22
+ export { createTestSuite, type TestCaseResult, TestSuite, TestSuiteCase, TestSuiteCaseResult, TestSuiteConfig, TestSuiteResult, } from "./testing";
23
23
  import { compareWithSnapshot, snapshot } from "./snapshot";
24
24
  export { snapshot, compareWithSnapshot, snapshot as saveSnapshot, compareWithSnapshot as compareSnapshots, };
25
25
  import type { ExportFormat } from "./export";
@@ -36,8 +36,8 @@ export { Logger } from "./logger";
36
36
  export { extendExpectWithToPassGate } from "./matchers";
37
37
  export { autoPaginate, createPaginatedIterator, decodeCursor, encodeCursor, PaginatedIterator, type PaginatedResponse, type PaginationParams, } from "./pagination";
38
38
  export { ARTIFACTS, type Baseline, type BaselineTolerance, GATE_CATEGORY, GATE_EXIT, type GateCategory, type GateExitCode, REPORT_SCHEMA_VERSION, type RegressionDelta, type RegressionReport, } from "./regression";
39
- export { batchProcess, batchRead, RateLimiter, streamEvaluation } from "./streaming";
40
- export type { Annotation, AnnotationItem, AnnotationTask, APIKey, APIKeyUsage, APIKeyWithSecret, BatchOptions, ClientConfig as AIEvalConfig, CreateAnnotationItemParams, CreateAnnotationParams, CreateAnnotationTaskParams, CreateAPIKeyParams, CreateLLMJudgeConfigParams, CreateWebhookParams, Evaluation as EvaluationData, ExportOptions, GenericMetadata as AnnotationData, GetLLMJudgeAlignmentParams, GetUsageParams, ImportOptions, ListAnnotationItemsParams, ListAnnotationsParams, ListAnnotationTasksParams, ListAPIKeysParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, ListWebhookDeliveriesParams, ListWebhooksParams, LLMJudgeAlignment, LLMJudgeConfig, LLMJudgeResult as LLMJudgeData, Organization, RetryConfig, SnapshotData, Span as SpanData, StreamOptions, TestCase, TestResult, Trace as TraceData, TracedResponse, UpdateAPIKeyParams, UpdateWebhookParams, UsageStats, UsageSummary, Webhook, WebhookDelivery, } from "./types";
39
+ export { batchProcess, batchRead, RateLimiter, streamEvaluation, } from "./streaming";
40
+ export type { Annotation, AnnotationItem, AnnotationTask, APIKey, APIKeyUsage, APIKeyWithSecret, BatchOptions, ClientConfig as AIEvalConfig, CreateAnnotationItemParams, CreateAnnotationParams, CreateAnnotationTaskParams, CreateAPIKeyParams, CreateLLMJudgeConfigParams, CreateWebhookParams, Evaluation as EvaluationData, EvaluationRun, EvaluationRunDetail, ExportOptions, GenericMetadata as AnnotationData, GetLLMJudgeAlignmentParams, GetUsageParams, ImportOptions, ListAnnotationItemsParams, ListAnnotationsParams, ListAnnotationTasksParams, ListAPIKeysParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, ListWebhookDeliveriesParams, ListWebhooksParams, LLMJudgeAlignment, LLMJudgeConfig, LLMJudgeEvaluateResult, LLMJudgeResult as LLMJudgeData, Organization, RetryConfig, SnapshotData, Span as SpanData, StreamOptions, TestCase, TestResult, Trace as TraceData, TraceDetail, TracedResponse, UpdateAPIKeyParams, UpdateWebhookParams, UsageStats, UsageSummary, Webhook, WebhookDelivery, } from "./types";
41
41
  export { EvaluationTemplates, type EvaluationTemplateType, type FeatureUsage, type OrganizationLimits, } from "./types";
42
42
  export { type AgentHandoff, type AgentSpanContext, type CostCategory, type CostRecord, createWorkflowTracer, type DecisionAlternative, type DecisionType, type HandoffType, type LLMProvider, type RecordCostParams, type RecordDecisionParams, traceAutoGen, traceCrewAI, traceLangChainAgent, traceWorkflowStep, type WorkflowContext, type WorkflowDefinition, type WorkflowEdge, type WorkflowNode, type WorkflowStatus, WorkflowTracer, type WorkflowTracerOptions, } from "./workflows";
43
43
  import { AIEvalClient } from "./client";
package/dist/index.js CHANGED
@@ -8,8 +8,8 @@
8
8
  * @packageDocumentation
9
9
  */
10
10
  Object.defineProperty(exports, "__esModule", { value: true });
11
- exports.SpecExecutionError = exports.SpecRegistrationError = exports.EvalRuntimeError = exports.validateContext = exports.cloneContext = exports.mergeContexts = exports.defaultLocalExecutor = exports.createLocalExecutor = exports.disposeActiveRuntime = exports.setActiveRuntime = exports.getActiveRuntime = exports.createEvalRuntime = exports.createResult = exports.createEvalContext = exports.defineSuite = exports.evalai = exports.defineEval = exports.TestSuite = exports.createTestSuite = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.withinRange = exports.similarTo = exports.respondedWithinTime = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntax = exports.hasSentiment = exports.hasReadabilityScore = exports.hasNoToxicity = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracy = exports.followsInstructions = exports.expect = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalAIError = exports.AIEvalClient = void 0;
12
- exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginate = exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.RequestCache = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.compareSnapshots = exports.saveSnapshot = exports.compareWithSnapshot = exports.snapshot = exports.RuntimeError = void 0;
11
+ exports.createTestSuite = exports.SpecRegistrationError = exports.SpecExecutionError = exports.RuntimeError = exports.EvalRuntimeError = exports.setActiveRuntime = exports.getActiveRuntime = exports.disposeActiveRuntime = exports.createEvalRuntime = exports.defaultLocalExecutor = exports.createLocalExecutor = exports.evalai = exports.defineSuite = exports.defineEval = exports.createResult = exports.createEvalContext = exports.validateContext = exports.mergeContexts = exports.cloneContext = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.withinRange = exports.similarTo = exports.respondedWithinTime = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntax = exports.hasSentiment = exports.hasReadabilityScore = exports.hasNoToxicity = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracy = exports.followsInstructions = exports.expect = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalAIError = exports.AIEvalClient = void 0;
12
+ exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginate = exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.RequestCache = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.compareSnapshots = exports.saveSnapshot = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = void 0;
13
13
  // Main SDK exports
14
14
  var client_1 = require("./client");
15
15
  Object.defineProperty(exports, "AIEvalClient", { enumerable: true, get: function () { return client_1.AIEvalClient; } });
@@ -49,35 +49,35 @@ Object.defineProperty(exports, "createContext", { enumerable: true, get: functio
49
49
  Object.defineProperty(exports, "ContextManager", { enumerable: true, get: function () { return context_1.EvalContext; } });
50
50
  Object.defineProperty(exports, "getContext", { enumerable: true, get: function () { return context_1.getCurrentContext; } });
51
51
  Object.defineProperty(exports, "withContext", { enumerable: true, get: function () { return context_1.withContext; } });
52
- // Test suite builder (Tier 2.7) - BACKWARD COMPATIBILITY LAYER
53
- var testing_1 = require("./testing");
54
- Object.defineProperty(exports, "createTestSuite", { enumerable: true, get: function () { return testing_1.createTestSuite; } });
55
- Object.defineProperty(exports, "TestSuite", { enumerable: true, get: function () { return testing_1.TestSuite; } });
52
+ var context_2 = require("./runtime/context");
53
+ Object.defineProperty(exports, "cloneContext", { enumerable: true, get: function () { return context_2.cloneContext; } });
54
+ Object.defineProperty(exports, "mergeContexts", { enumerable: true, get: function () { return context_2.mergeContexts; } });
55
+ Object.defineProperty(exports, "validateContext", { enumerable: true, get: function () { return context_2.validateContext; } });
56
56
  // LAYER 1: Runtime Foundation - NEW PROGRAMMING MODEL
57
57
  var eval_1 = require("./runtime/eval");
58
- Object.defineProperty(exports, "defineEval", { enumerable: true, get: function () { return eval_1.defineEval; } });
59
- Object.defineProperty(exports, "evalai", { enumerable: true, get: function () { return eval_1.evalai; } });
60
- Object.defineProperty(exports, "defineSuite", { enumerable: true, get: function () { return eval_1.defineSuite; } });
61
58
  Object.defineProperty(exports, "createEvalContext", { enumerable: true, get: function () { return eval_1.createContext; } });
62
59
  Object.defineProperty(exports, "createResult", { enumerable: true, get: function () { return eval_1.createResult; } });
60
+ Object.defineProperty(exports, "defineEval", { enumerable: true, get: function () { return eval_1.defineEval; } });
61
+ Object.defineProperty(exports, "defineSuite", { enumerable: true, get: function () { return eval_1.defineSuite; } });
62
+ Object.defineProperty(exports, "evalai", { enumerable: true, get: function () { return eval_1.evalai; } });
63
+ var executor_1 = require("./runtime/executor");
64
+ Object.defineProperty(exports, "createLocalExecutor", { enumerable: true, get: function () { return executor_1.createLocalExecutor; } });
65
+ Object.defineProperty(exports, "defaultLocalExecutor", { enumerable: true, get: function () { return executor_1.defaultLocalExecutor; } });
63
66
  var registry_1 = require("./runtime/registry");
64
67
  Object.defineProperty(exports, "createEvalRuntime", { enumerable: true, get: function () { return registry_1.createEvalRuntime; } });
68
+ Object.defineProperty(exports, "disposeActiveRuntime", { enumerable: true, get: function () { return registry_1.disposeActiveRuntime; } });
65
69
  Object.defineProperty(exports, "getActiveRuntime", { enumerable: true, get: function () { return registry_1.getActiveRuntime; } });
66
70
  Object.defineProperty(exports, "setActiveRuntime", { enumerable: true, get: function () { return registry_1.setActiveRuntime; } });
67
- Object.defineProperty(exports, "disposeActiveRuntime", { enumerable: true, get: function () { return registry_1.disposeActiveRuntime; } });
68
- var executor_1 = require("./runtime/executor");
69
- Object.defineProperty(exports, "createLocalExecutor", { enumerable: true, get: function () { return executor_1.createLocalExecutor; } });
70
- Object.defineProperty(exports, "defaultLocalExecutor", { enumerable: true, get: function () { return executor_1.defaultLocalExecutor; } });
71
- var context_2 = require("./runtime/context");
72
- Object.defineProperty(exports, "mergeContexts", { enumerable: true, get: function () { return context_2.mergeContexts; } });
73
- Object.defineProperty(exports, "cloneContext", { enumerable: true, get: function () { return context_2.cloneContext; } });
74
- Object.defineProperty(exports, "validateContext", { enumerable: true, get: function () { return context_2.validateContext; } });
75
71
  // Runtime errors
76
72
  var types_1 = require("./runtime/types");
77
73
  Object.defineProperty(exports, "EvalRuntimeError", { enumerable: true, get: function () { return types_1.EvalRuntimeError; } });
78
- Object.defineProperty(exports, "SpecRegistrationError", { enumerable: true, get: function () { return types_1.SpecRegistrationError; } });
79
- Object.defineProperty(exports, "SpecExecutionError", { enumerable: true, get: function () { return types_1.SpecExecutionError; } });
80
74
  Object.defineProperty(exports, "RuntimeError", { enumerable: true, get: function () { return types_1.RuntimeError; } });
75
+ Object.defineProperty(exports, "SpecExecutionError", { enumerable: true, get: function () { return types_1.SpecExecutionError; } });
76
+ Object.defineProperty(exports, "SpecRegistrationError", { enumerable: true, get: function () { return types_1.SpecRegistrationError; } });
77
+ // Test suite builder (Tier 2.7) - BACKWARD COMPATIBILITY LAYER
78
+ var testing_1 = require("./testing");
79
+ Object.defineProperty(exports, "createTestSuite", { enumerable: true, get: function () { return testing_1.createTestSuite; } });
80
+ Object.defineProperty(exports, "TestSuite", { enumerable: true, get: function () { return testing_1.TestSuite; } });
81
81
  // Snapshot testing (Tier 2.8)
82
82
  const snapshot_1 = require("./snapshot");
83
83
  Object.defineProperty(exports, "compareWithSnapshot", { enumerable: true, get: function () { return snapshot_1.compareWithSnapshot; } });
@@ -19,6 +19,24 @@
19
19
  * ```
20
20
  */
21
21
  import type { AIEvalClient } from "../client";
22
+ interface AnthropicMessageParams {
23
+ model: string;
24
+ messages: unknown[];
25
+ temperature?: number;
26
+ max_tokens?: number;
27
+ [key: string]: unknown;
28
+ }
29
+ interface AnthropicMessage {
30
+ content: unknown;
31
+ usage?: unknown;
32
+ stop_reason?: unknown;
33
+ [key: string]: unknown;
34
+ }
35
+ interface AnthropicClient {
36
+ messages: {
37
+ create: (params: AnthropicMessageParams, requestOptions?: Record<string, unknown>) => Promise<AnthropicMessage>;
38
+ };
39
+ }
22
40
  export interface AnthropicTraceOptions {
23
41
  /** Whether to capture input (default: true) */
24
42
  captureInput?: boolean;
@@ -50,7 +68,7 @@ export interface AnthropicTraceOptions {
50
68
  * });
51
69
  * ```
52
70
  */
53
- export declare function traceAnthropic(anthropic: any, evalClient: AIEvalClient, options?: AnthropicTraceOptions): any;
71
+ export declare function traceAnthropic(anthropic: AnthropicClient, evalClient: AIEvalClient, options?: AnthropicTraceOptions): AnthropicClient;
54
72
  /**
55
73
  * Manual trace wrapper for Anthropic calls
56
74
  *
@@ -70,3 +88,4 @@ export declare function traceAnthropic(anthropic: any, evalClient: AIEvalClient,
70
88
  * ```
71
89
  */
72
90
  export declare function traceAnthropicCall<T>(evalClient: AIEvalClient, name: string, fn: () => Promise<T>, options?: AnthropicTraceOptions): Promise<T>;
91
+ export {};
@@ -87,7 +87,7 @@ async function openAIChatEval(options) {
87
87
  ? [...c.assertions]
88
88
  : c.expectedOutput
89
89
  ? [
90
- (output) => (0, assertions_1.expect)(output).toContainKeywords(c.expectedOutput.split(/\s+/).filter(Boolean)),
90
+ (output) => (0, assertions_1.expect)(output).toContainKeywords(c.expectedOutput?.split(/\s+/).filter(Boolean) || []),
91
91
  ]
92
92
  : undefined;
93
93
  return {
@@ -116,7 +116,9 @@ async function openAIChatEval(options) {
116
116
  printSummary(evalResult);
117
117
  // v1.5: Optional report to EvalAI platform
118
118
  if (options.reportToEvalAI) {
119
- const config = typeof process !== "undefined" && process.cwd ? (0, config_1.loadConfig)(process.cwd()) : null;
119
+ const config = typeof process !== "undefined" && process.cwd
120
+ ? (0, config_1.loadConfig)(process.cwd())
121
+ : null;
120
122
  const evalId = options.evaluationId || config?.evaluationId;
121
123
  if (!evalId || String(evalId).trim() === "") {
122
124
  console.log("Run evalai init and set evaluationId to upload results.");