@pauly4010/evalai-sdk 1.5.7 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/client.d.ts CHANGED
@@ -111,7 +111,7 @@ declare class TraceAPI {
111
111
  * });
112
112
  * ```
113
113
  */
114
- create<TMetadata = Record<string, any>>(params: CreateTraceParams<TMetadata>): Promise<Trace<TMetadata>>;
114
+ create<TMetadata = Record<string, unknown>>(params: CreateTraceParams<TMetadata>): Promise<Trace<TMetadata>>;
115
115
  /**
116
116
  * List traces with optional filtering
117
117
  */
@@ -138,7 +138,7 @@ declare class TraceAPI {
138
138
  * });
139
139
  * ```
140
140
  */
141
- update<TMetadata = Record<string, any>>(id: number, params: UpdateTraceParams<TMetadata>): Promise<Trace<TMetadata>>;
141
+ update<TMetadata = Record<string, unknown>>(id: number, params: UpdateTraceParams<TMetadata>): Promise<Trace<TMetadata>>;
142
142
  /**
143
143
  * Create a span for a trace
144
144
  */
@@ -208,7 +208,7 @@ declare class LLMJudgeAPI {
208
208
  */
209
209
  evaluate(params: RunLLMJudgeParams): Promise<{
210
210
  result: LLMJudgeResult;
211
- config: any;
211
+ config: unknown;
212
212
  }>;
213
213
  /**
214
214
  * Create an LLM judge configuration
package/dist/client.js CHANGED
@@ -94,11 +94,12 @@ class AIEvalClient {
94
94
  results.push({ id: req.id, status: 200, data });
95
95
  }
96
96
  catch (err) {
97
+ const errorObj = err;
97
98
  results.push({
98
99
  id: req.id,
99
- status: err?.statusCode || 500,
100
+ status: errorObj?.statusCode || 500,
100
101
  data: null,
101
- error: err?.message || "Unknown error",
102
+ error: errorObj?.message || "Unknown error",
102
103
  });
103
104
  }
104
105
  })();
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,157 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ const vitest_1 = require("vitest");
37
+ const client_1 = require("./client");
38
+ const errorsModule = __importStar(require("./errors"));
39
+ vitest_1.vi.mock("./cache", () => {
40
+ const cacheTracker = { invalidatedPatterns: [] };
41
+ const shouldCache = vitest_1.vi.fn().mockReturnValue(true);
42
+ const getTTL = vitest_1.vi.fn().mockReturnValue(1000);
43
+ const makeKey = (method, url, params) => `${method}:${url}:${JSON.stringify(params ?? null)}`;
44
+ return {
45
+ __esModule: true,
46
+ shouldCache,
47
+ getTTL,
48
+ cacheTracker,
49
+ RequestCache: class RequestCache {
50
+ constructor() {
51
+ this.store = new Map();
52
+ }
53
+ get(method, url, params) {
54
+ const key = makeKey(method, url, params);
55
+ return this.store.get(key) ?? null;
56
+ }
57
+ set(method, url, data, _ttl, params) {
58
+ const key = makeKey(method, url, params);
59
+ this.store.set(key, data);
60
+ }
61
+ invalidatePattern(pattern) {
62
+ cacheTracker.invalidatedPatterns.push(pattern);
63
+ }
64
+ invalidate(_method, _url, _params) {
65
+ // no-op for tests
66
+ }
67
+ clear() {
68
+ this.store.clear();
69
+ }
70
+ },
71
+ };
72
+ });
73
+ const cache_1 = require("./cache");
74
+ (0, vitest_1.describe)("AIEvalClient.request", () => {
75
+ (0, vitest_1.beforeEach)(() => {
76
+ process.env.EVALAI_API_KEY = "test";
77
+ cache_1.shouldCache.mockReset().mockReturnValue(true);
78
+ cache_1.getTTL.mockReset().mockReturnValue(1000);
79
+ cache_1.cacheTracker.invalidatedPatterns.length = 0;
80
+ });
81
+ (0, vitest_1.it)("caches GET responses and reuses data without re-fetching", async () => {
82
+ const client = new client_1.AIEvalClient({ apiKey: "test", baseUrl: "http://localhost", timeout: 1000 });
83
+ const payload = { items: [1, 2, 3] };
84
+ const fetchMock = vitest_1.vi.fn().mockResolvedValue({
85
+ ok: true,
86
+ status: 200,
87
+ json: async () => payload,
88
+ });
89
+ globalThis.fetch = fetchMock;
90
+ const first = await client.request("/api/traces", { method: "GET" });
91
+ const second = await client.request("/api/traces", { method: "GET" });
92
+ (0, vitest_1.expect)(first).toEqual(payload);
93
+ (0, vitest_1.expect)(second).toEqual(payload);
94
+ (0, vitest_1.expect)(fetchMock).toHaveBeenCalledTimes(1);
95
+ });
96
+ (0, vitest_1.it)("propagates non-ok responses as SDK errors", async () => {
97
+ const client = new client_1.AIEvalClient({ apiKey: "test", baseUrl: "http://localhost" });
98
+ const fetchMock = vitest_1.vi.fn().mockResolvedValue({
99
+ ok: false,
100
+ status: 429,
101
+ json: async () => ({ error: { code: "RATE_LIMIT_EXCEEDED" } }),
102
+ });
103
+ globalThis.fetch = fetchMock;
104
+ const createErrorSpy = vitest_1.vi
105
+ .spyOn(errorsModule, "createErrorFromResponse")
106
+ .mockReturnValue(new errorsModule.EvalAIError("rate limited", "RATE_LIMIT_EXCEEDED", 429));
107
+ await (0, vitest_1.expect)(client.request("/api/fail", { method: "GET" })).rejects.toHaveProperty("code", "RATE_LIMIT_EXCEEDED");
108
+ createErrorSpy.mockRestore();
109
+ });
110
+ (0, vitest_1.it)("retries on retryable SDK errors and eventually succeeds", async () => {
111
+ const client = new client_1.AIEvalClient({ apiKey: "test", baseUrl: "http://localhost", timeout: 1000 });
112
+ vitest_1.vi.spyOn(client, "calculateBackoff").mockReturnValue(0);
113
+ const failureResponse = {
114
+ ok: false,
115
+ status: 429,
116
+ json: async () => ({ error: { code: "RATE_LIMIT_EXCEEDED" } }),
117
+ };
118
+ const successResponse = {
119
+ ok: true,
120
+ status: 200,
121
+ json: async () => ({ ok: true }),
122
+ };
123
+ const createErrorSpy = vitest_1.vi
124
+ .spyOn(errorsModule, "createErrorFromResponse")
125
+ .mockReturnValue(new errorsModule.EvalAIError("rate limited", "RATE_LIMIT_EXCEEDED", 429));
126
+ const fetchMock = vitest_1.vi
127
+ .fn()
128
+ .mockResolvedValueOnce(failureResponse)
129
+ .mockResolvedValueOnce(successResponse);
130
+ globalThis.fetch = fetchMock;
131
+ const result = await client.request("/api/retry", { method: "GET" });
132
+ (0, vitest_1.expect)(result).toEqual({ ok: true });
133
+ (0, vitest_1.expect)(fetchMock).toHaveBeenCalledTimes(2);
134
+ createErrorSpy.mockRestore();
135
+ });
136
+ (0, vitest_1.it)("throws a TIMEOUT SDK error when fetch aborts", async () => {
137
+ const client = new client_1.AIEvalClient({ apiKey: "test", baseUrl: "http://localhost", timeout: 1000 });
138
+ const abortError = Object.assign(new Error("aborted"), { name: "AbortError" });
139
+ const fetchMock = vitest_1.vi.fn().mockRejectedValue(abortError);
140
+ globalThis.fetch = fetchMock;
141
+ await (0, vitest_1.expect)(client.request("/api/timeout", { method: "GET" })).rejects.toMatchObject({
142
+ code: "TIMEOUT",
143
+ });
144
+ });
145
+ (0, vitest_1.it)("invalidates related cache entries for mutation requests", async () => {
146
+ const client = new client_1.AIEvalClient({ apiKey: "test", baseUrl: "http://localhost", timeout: 1000 });
147
+ cache_1.shouldCache.mockReturnValue(false);
148
+ const fetchMock = vitest_1.vi.fn().mockResolvedValue({
149
+ ok: true,
150
+ status: 201,
151
+ json: async () => ({ result: "ok" }),
152
+ });
153
+ globalThis.fetch = fetchMock;
154
+ await client.request("/api/evaluations", { method: "POST", body: JSON.stringify({}) });
155
+ (0, vitest_1.expect)(cache_1.cacheTracker.invalidatedPatterns).toContain("evaluations");
156
+ });
157
+ });
package/dist/context.d.ts CHANGED
@@ -22,7 +22,7 @@
22
22
  * Context metadata that will be automatically injected
23
23
  */
24
24
  export interface ContextMetadata {
25
- [key: string]: any;
25
+ [key: string]: unknown;
26
26
  }
27
27
  /**
28
28
  * Context manager for automatic metadata propagation
@@ -75,7 +75,7 @@ export declare class EvalContext {
75
75
  */
76
76
  export declare function createContext(metadata: ContextMetadata): EvalContext;
77
77
  /**
78
- * Get the current context metadata (if any)
78
+ * Get the current context metadata (if unknown)
79
79
  *
80
80
  * @example
81
81
  * ```typescript
@@ -98,7 +98,7 @@ export declare function getCurrentContext(): ContextMetadata | undefined;
98
98
  * };
99
99
  * ```
100
100
  */
101
- export declare function mergeWithContext(metadata?: Record<string, any>): Record<string, any>;
101
+ export declare function mergeWithContext(metadata?: Record<string, unknown>): Record<string, unknown>;
102
102
  /**
103
103
  * Run with nested context (merges parent context)
104
104
  *
@@ -131,4 +131,4 @@ export declare function withContextSync<T>(metadata: ContextMetadata, fn: () =>
131
131
  * }
132
132
  * ```
133
133
  */
134
- export declare function WithContext(metadata: ContextMetadata): (_target: any, _propertyKey: string, descriptor: PropertyDescriptor) => PropertyDescriptor;
134
+ export declare function WithContext(metadata: ContextMetadata): (_target: unknown, _propertyKey: string, descriptor: PropertyDescriptor) => PropertyDescriptor;
package/dist/context.js CHANGED
@@ -131,7 +131,7 @@ function createContext(metadata) {
131
131
  return new EvalContext(metadata);
132
132
  }
133
133
  /**
134
- * Get the current context metadata (if any)
134
+ * Get the current context metadata (if unknown)
135
135
  *
136
136
  * @example
137
137
  * ```typescript
package/dist/errors.d.ts CHANGED
@@ -42,14 +42,14 @@ export declare class EvalAIError extends Error {
42
42
  /** Whether this error is retryable */
43
43
  retryable: boolean;
44
44
  /** Additional error details from the API */
45
- details?: any;
45
+ details?: unknown;
46
46
  /** When to retry (for rate limit errors) in seconds */
47
47
  retryAfter?: number;
48
48
  /** When the limit resets (for feature limit errors) */
49
49
  resetAt?: Date;
50
50
  /** Request ID from API (for correlation/debugging) */
51
51
  requestId?: string;
52
- constructor(message: string, code: string, statusCode: number, details?: any);
52
+ constructor(message: string, code: string, statusCode: number, details?: unknown);
53
53
  /**
54
54
  * Get formatted error message with solutions
55
55
  */
@@ -61,12 +61,12 @@ export declare class EvalAIError extends Error {
61
61
  /**
62
62
  * Convert to JSON for logging
63
63
  */
64
- toJSON(): Record<string, any>;
64
+ toJSON(): Record<string, unknown>;
65
65
  }
66
66
  /**
67
67
  * Create an error from an HTTP response
68
68
  */
69
- export declare function createErrorFromResponse(response: Response, data: any): EvalAIError;
69
+ export declare function createErrorFromResponse(response: Response, data: unknown): EvalAIError;
70
70
  export declare class RateLimitError extends EvalAIError {
71
71
  constructor(message: string, retryAfter?: number);
72
72
  }
@@ -74,7 +74,7 @@ export declare class AuthenticationError extends EvalAIError {
74
74
  constructor(message?: string);
75
75
  }
76
76
  export declare class ValidationError extends EvalAIError {
77
- constructor(message?: string, details?: any);
77
+ constructor(message?: string, details?: unknown);
78
78
  }
79
79
  export declare class NetworkError extends EvalAIError {
80
80
  constructor(message?: string);
package/dist/errors.js CHANGED
@@ -160,27 +160,21 @@ class EvalAIError extends Error {
160
160
  this.code = code;
161
161
  this.statusCode = statusCode;
162
162
  this.details = details;
163
- // Get documentation and solutions
164
- const errorDoc = ERROR_DOCS[code];
165
- if (errorDoc) {
166
- this.documentation = errorDoc.documentation;
167
- this.solutions = errorDoc.solutions;
168
- this.retryable = errorDoc.retryable;
169
- }
170
- else {
171
- this.documentation = "https://docs.ai-eval-platform.com/errors";
172
- this.solutions = ["Check the API documentation for more information"];
173
- this.retryable = false;
174
- }
163
+ // Initialize required properties from ERROR_DOCS
164
+ const doc = ERROR_DOCS[code];
165
+ this.documentation = doc?.documentation ?? `https://docs.ai-eval-platform.com/errors/${code}`;
166
+ this.solutions = doc?.solutions ?? ["Check the error details for more information"];
167
+ this.retryable = doc?.retryable ?? false;
175
168
  // Extract retry-after for rate limits
176
- if (code === "RATE_LIMIT_EXCEEDED" && details?.retryAfter) {
177
- this.retryAfter = details.retryAfter;
169
+ const errorDetails = details;
170
+ if (code === "RATE_LIMIT_EXCEEDED" && errorDetails?.retryAfter) {
171
+ this.retryAfter = errorDetails.retryAfter;
178
172
  }
179
173
  // Extract reset time for feature limits
180
- if (code === "FEATURE_LIMIT_REACHED" && details?.resetAt) {
181
- this.resetAt = new Date(details.resetAt);
174
+ if (code === "FEATURE_LIMIT_REACHED" && errorDetails?.resetAt) {
175
+ this.resetAt = new Date(errorDetails.resetAt);
182
176
  }
183
- this.requestId = details?.error?.requestId ?? details?.requestId;
177
+ this.requestId = errorDetails?.error?.requestId ?? errorDetails?.requestId;
184
178
  // Ensure proper prototype chain
185
179
  Object.setPrototypeOf(this, EvalAIError.prototype);
186
180
  }
@@ -234,14 +228,17 @@ exports.SDKError = EvalAIError;
234
228
  */
235
229
  function createErrorFromResponse(response, data) {
236
230
  const status = response.status;
237
- const errObj = data?.error && typeof data.error === "object" ? data.error : data;
238
- let code = errObj?.code ?? data?.code ?? "UNKNOWN_ERROR";
239
- const message = typeof data?.error === "string"
240
- ? data.error
241
- : (errObj?.message ?? data?.message ?? response.statusText);
242
- const requestId = errObj?.requestId ?? data?.requestId ?? response.headers.get("x-request-id") ?? undefined;
231
+ const errorData = data;
232
+ const errObj = errorData?.error && typeof errorData.error === "object"
233
+ ? errorData.error
234
+ : errorData;
235
+ let code = errObj?.code ?? errorData?.code ?? "UNKNOWN_ERROR";
236
+ const message = typeof errorData?.error === "string"
237
+ ? errorData.error
238
+ : (errObj?.message ?? errorData?.message ?? response.statusText);
239
+ const requestId = errObj?.requestId ?? errorData?.requestId ?? response.headers.get("x-request-id") ?? undefined;
243
240
  // Map HTTP status to error codes when code not in response
244
- if (!errObj?.code && !data?.code) {
241
+ if (!errObj?.code && !errorData?.code) {
245
242
  if (status === 401)
246
243
  code = "UNAUTHORIZED";
247
244
  else if (status === 403)
package/dist/export.d.ts CHANGED
@@ -181,7 +181,7 @@ export declare function importFromFile(client: AIEvalClient, filePath: string, o
181
181
  * });
182
182
  * ```
183
183
  */
184
- export declare function importFromLangSmith(client: AIEvalClient, langsmithData: any, options: ImportOptions): Promise<ImportResult>;
184
+ export declare function importFromLangSmith(client: AIEvalClient, langsmithData: unknown, options: ImportOptions): Promise<ImportResult>;
185
185
  /**
186
186
  * Convert export data to CSV format
187
187
  *
package/dist/export.js CHANGED
@@ -292,8 +292,10 @@ async function importFromLangSmith(client, langsmithData, options) {
292
292
  traces: [],
293
293
  };
294
294
  // Transform runs to traces
295
- if (langsmithData.runs && Array.isArray(langsmithData.runs)) {
296
- transformedData.traces = langsmithData.runs.map((run) => ({
295
+ const lsData = langsmithData;
296
+ if (lsData.runs && Array.isArray(lsData.runs)) {
297
+ transformedData.traces = lsData.runs.map((run) => ({
298
+ id: run.id || 0,
297
299
  name: run.name || "Imported Trace",
298
300
  traceId: run.id || `langsmith-${Date.now()}-${Math.random()}`,
299
301
  organizationId: options.organizationId,
package/dist/index.d.ts CHANGED
@@ -33,5 +33,6 @@ export { batchProcess, batchRead, RateLimiter, streamEvaluation } from "./stream
33
33
  export type { Annotation, AnnotationItem, AnnotationTask, APIKey, APIKeyUsage, APIKeyWithSecret, BatchOptions, ClientConfig as AIEvalConfig, CreateAnnotationItemParams, CreateAnnotationParams, CreateAnnotationTaskParams, CreateAPIKeyParams, CreateLLMJudgeConfigParams, CreateWebhookParams, Evaluation as EvaluationData, ExportOptions, GenericMetadata as AnnotationData, GetLLMJudgeAlignmentParams, GetUsageParams, ImportOptions, ListAnnotationItemsParams, ListAnnotationsParams, ListAnnotationTasksParams, ListAPIKeysParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, ListWebhookDeliveriesParams, ListWebhooksParams, LLMJudgeAlignment, LLMJudgeConfig, LLMJudgeResult as LLMJudgeData, Organization, RetryConfig, SnapshotData, Span as SpanData, StreamOptions, TestCase, TestResult, Trace as TraceData, TracedResponse, UpdateAPIKeyParams, UpdateWebhookParams, UsageStats, UsageSummary, Webhook, WebhookDelivery, } from "./types";
34
34
  export { EvaluationTemplates, type EvaluationTemplateType, type FeatureUsage, type OrganizationLimits, } from "./types";
35
35
  export { type AgentHandoff, type AgentSpanContext, type CostCategory, type CostRecord, createWorkflowTracer, type DecisionAlternative, type DecisionType, type HandoffType, type LLMProvider, type RecordCostParams, type RecordDecisionParams, traceAutoGen, traceCrewAI, traceLangChainAgent, traceWorkflowStep, type WorkflowContext, type WorkflowDefinition, type WorkflowEdge, type WorkflowNode, type WorkflowStatus, WorkflowTracer, type WorkflowTracerOptions, } from "./workflows";
36
+ export { ARTIFACTS, type Baseline, type BaselineTolerance, GATE_CATEGORY, GATE_EXIT, type GateCategory, type GateExitCode, type RegressionDelta, type RegressionReport, REPORT_SCHEMA_VERSION, } from "./regression";
36
37
  import { AIEvalClient } from "./client";
37
38
  export default AIEvalClient;
package/dist/index.js CHANGED
@@ -9,7 +9,7 @@
9
9
  */
10
10
  Object.defineProperty(exports, "__esModule", { value: true });
11
11
  exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.RequestCache = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.compareSnapshots = exports.saveSnapshot = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = exports.createTestSuite = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.withinRange = exports.similarTo = exports.respondedWithinTime = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntax = exports.hasSentiment = exports.hasReadabilityScore = exports.hasNoToxicity = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracy = exports.followsInstructions = exports.expect = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalAIError = exports.AIEvalClient = void 0;
12
- exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginate = void 0;
12
+ exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginate = void 0;
13
13
  // Main SDK exports
14
14
  var client_1 = require("./client");
15
15
  Object.defineProperty(exports, "AIEvalClient", { enumerable: true, get: function () { return client_1.AIEvalClient; } });
@@ -117,6 +117,12 @@ Object.defineProperty(exports, "traceCrewAI", { enumerable: true, get: function
117
117
  Object.defineProperty(exports, "traceLangChainAgent", { enumerable: true, get: function () { return workflows_1.traceLangChainAgent; } });
118
118
  Object.defineProperty(exports, "traceWorkflowStep", { enumerable: true, get: function () { return workflows_1.traceWorkflowStep; } });
119
119
  Object.defineProperty(exports, "WorkflowTracer", { enumerable: true, get: function () { return workflows_1.WorkflowTracer; } });
120
+ // Regression gate constants & types (v1.6.0)
121
+ var regression_1 = require("./regression");
122
+ Object.defineProperty(exports, "ARTIFACTS", { enumerable: true, get: function () { return regression_1.ARTIFACTS; } });
123
+ Object.defineProperty(exports, "GATE_CATEGORY", { enumerable: true, get: function () { return regression_1.GATE_CATEGORY; } });
124
+ Object.defineProperty(exports, "GATE_EXIT", { enumerable: true, get: function () { return regression_1.GATE_EXIT; } });
125
+ Object.defineProperty(exports, "REPORT_SCHEMA_VERSION", { enumerable: true, get: function () { return regression_1.REPORT_SCHEMA_VERSION; } });
120
126
  // Default export for convenience
121
127
  const client_2 = require("./client");
122
128
  exports.default = client_2.AIEvalClient;
@@ -141,7 +141,7 @@ async function openAIChatEval(options) {
141
141
  for (let i = 0; i < result.results.length; i++) {
142
142
  const tcId = cases[i]?.testCaseId;
143
143
  if (tcId == null) {
144
- console.log("reportToEvalAI: All cases must have testCaseId when any has it.");
144
+ console.log("reportToEvalAI: All cases must have testCaseId when unknown has it.");
145
145
  return evalResult;
146
146
  }
147
147
  importResults.push({
package/dist/logger.d.ts CHANGED
@@ -29,7 +29,7 @@ export interface LogEntry {
29
29
  level: LogLevel;
30
30
  message: string;
31
31
  timestamp: string;
32
- data?: any;
32
+ data?: unknown;
33
33
  prefix?: string;
34
34
  }
35
35
  /**
@@ -41,31 +41,31 @@ export declare class Logger {
41
41
  /**
42
42
  * Log a trace message
43
43
  */
44
- trace(message: string, data?: any): void;
44
+ trace(message: string, data?: unknown): void;
45
45
  /**
46
46
  * Log a debug message
47
47
  */
48
- debug(message: string, data?: any): void;
48
+ debug(message: string, data?: unknown): void;
49
49
  /**
50
50
  * Log an info message
51
51
  */
52
- info(message: string, data?: any): void;
52
+ info(message: string, data?: unknown): void;
53
53
  /**
54
54
  * Log a warning message
55
55
  */
56
- warn(message: string, data?: any): void;
56
+ warn(message: string, data?: unknown): void;
57
57
  /**
58
58
  * Log an error message
59
59
  */
60
- error(message: string, data?: any): void;
60
+ error(message: string, data?: unknown): void;
61
61
  /**
62
62
  * Log HTTP request
63
63
  */
64
- logRequest(method: string, url: string, data?: any): void;
64
+ logRequest(method: string, url: string, data?: unknown): void;
65
65
  /**
66
66
  * Log HTTP response
67
67
  */
68
- logResponse(method: string, url: string, status: number, duration: number, data?: any): void;
68
+ logResponse(method: string, url: string, status: number, duration: number, data?: unknown): void;
69
69
  /**
70
70
  * Create child logger with prefix
71
71
  */
@@ -112,7 +112,7 @@ export declare class RequestLogger {
112
112
  method: string;
113
113
  url: string;
114
114
  headers?: Record<string, string>;
115
- body?: any;
115
+ body?: unknown;
116
116
  }): void;
117
117
  /**
118
118
  * Log response after receiving
@@ -123,6 +123,6 @@ export declare class RequestLogger {
123
123
  status: number;
124
124
  duration: number;
125
125
  headers?: Record<string, string>;
126
- body?: any;
126
+ body?: unknown;
127
127
  }): void;
128
128
  }
@@ -56,11 +56,11 @@ export declare function autoPaginate<T>(fetchFn: (offset: number, limit: number)
56
56
  /**
57
57
  * Encode cursor for pagination (base64)
58
58
  */
59
- export declare function encodeCursor(data: any): string;
59
+ export declare function encodeCursor(data: unknown): string;
60
60
  /**
61
61
  * Decode cursor from base64
62
62
  */
63
- export declare function decodeCursor(cursor: string): any;
63
+ export declare function decodeCursor(cursor: string): unknown;
64
64
  /**
65
65
  * Create pagination metadata from response
66
66
  */
@@ -0,0 +1,100 @@
1
+ /**
2
+ * Regression gate constants and types.
3
+ *
4
+ * These mirror the contracts defined in scripts/regression-gate.ts
5
+ * and evals/schemas/regression-report.schema.json so that SDK consumers
6
+ * can programmatically inspect gate results without parsing strings.
7
+ *
8
+ * @packageDocumentation
9
+ */
10
+ /** Exit codes emitted by `evalai gate` / `scripts/regression-gate.ts`. */
11
+ export declare const GATE_EXIT: {
12
+ /** Gate passed — no regressions detected */
13
+ readonly PASS: 0;
14
+ /** One or more regression thresholds exceeded */
15
+ readonly REGRESSION: 1;
16
+ /** Infrastructure error (baseline missing, summary missing, etc.) */
17
+ readonly INFRA_ERROR: 2;
18
+ /** Confidence tests failed (test suite red) */
19
+ readonly CONFIDENCE_FAILED: 3;
20
+ /** Confidence summary file missing (test infra crashed) */
21
+ readonly CONFIDENCE_MISSING: 4;
22
+ };
23
+ export type GateExitCode = (typeof GATE_EXIT)[keyof typeof GATE_EXIT];
24
+ /** Categories written to regression-report.json `category` field. */
25
+ export declare const GATE_CATEGORY: {
26
+ readonly PASS: "pass";
27
+ readonly REGRESSION: "regression";
28
+ readonly INFRA_ERROR: "infra_error";
29
+ };
30
+ export type GateCategory = (typeof GATE_CATEGORY)[keyof typeof GATE_CATEGORY];
31
+ /** Current schema version for regression-report.json. */
32
+ export declare const REPORT_SCHEMA_VERSION = 1;
33
+ export interface RegressionDelta {
34
+ metric: string;
35
+ baseline: number | string;
36
+ current: number | string;
37
+ delta: string;
38
+ status: "pass" | "fail";
39
+ }
40
+ export interface RegressionReport {
41
+ schemaVersion: number;
42
+ timestamp: string;
43
+ exitCode: GateExitCode;
44
+ category: GateCategory;
45
+ passed: boolean;
46
+ failures: string[];
47
+ deltas: RegressionDelta[];
48
+ }
49
+ export interface BaselineTolerance {
50
+ scoreDrop: number;
51
+ passRateDrop: number;
52
+ maxLatencyIncreaseMs: number;
53
+ maxCostIncreaseUsd: number;
54
+ }
55
+ export interface Baseline {
56
+ schemaVersion: number;
57
+ description: string;
58
+ generatedAt: string;
59
+ generatedBy: string;
60
+ commitSha: string;
61
+ updatedAt: string;
62
+ updatedBy: string;
63
+ tolerance: BaselineTolerance;
64
+ goldenEval: {
65
+ score: number;
66
+ passRate: number;
67
+ totalCases: number;
68
+ passedCases: number;
69
+ };
70
+ qualityScore: {
71
+ overall: number;
72
+ grade: string;
73
+ accuracy: number;
74
+ safety: number;
75
+ latency: number;
76
+ cost: number;
77
+ consistency: number;
78
+ };
79
+ confidenceTests: {
80
+ unitPassed: boolean;
81
+ unitTotal: number;
82
+ dbPassed: boolean;
83
+ dbTotal: number;
84
+ };
85
+ productMetrics: {
86
+ p95ApiLatencyMs?: number;
87
+ goldenCostUsd?: number;
88
+ };
89
+ qualityMetrics?: {
90
+ unitLaneDurationMs?: number;
91
+ dbLaneDurationMs?: number;
92
+ };
93
+ }
94
+ /** Well-known artifact paths relative to project root. */
95
+ export declare const ARTIFACTS: {
96
+ readonly BASELINE: "evals/baseline.json";
97
+ readonly REGRESSION_REPORT: "evals/regression-report.json";
98
+ readonly CONFIDENCE_SUMMARY: "evals/confidence-summary.json";
99
+ readonly LATENCY_BENCHMARK: "evals/latency-benchmark.json";
100
+ };
@@ -0,0 +1,44 @@
1
+ "use strict";
2
+ /**
3
+ * Regression gate constants and types.
4
+ *
5
+ * These mirror the contracts defined in scripts/regression-gate.ts
6
+ * and evals/schemas/regression-report.schema.json so that SDK consumers
7
+ * can programmatically inspect gate results without parsing strings.
8
+ *
9
+ * @packageDocumentation
10
+ */
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.ARTIFACTS = exports.REPORT_SCHEMA_VERSION = exports.GATE_CATEGORY = exports.GATE_EXIT = void 0;
13
+ // ── Exit codes ──
14
+ /** Exit codes emitted by `evalai gate` / `scripts/regression-gate.ts`. */
15
+ exports.GATE_EXIT = {
16
+ /** Gate passed — no regressions detected */
17
+ PASS: 0,
18
+ /** One or more regression thresholds exceeded */
19
+ REGRESSION: 1,
20
+ /** Infrastructure error (baseline missing, summary missing, etc.) */
21
+ INFRA_ERROR: 2,
22
+ /** Confidence tests failed (test suite red) */
23
+ CONFIDENCE_FAILED: 3,
24
+ /** Confidence summary file missing (test infra crashed) */
25
+ CONFIDENCE_MISSING: 4,
26
+ };
27
+ // ── Report categories ──
28
+ /** Categories written to regression-report.json `category` field. */
29
+ exports.GATE_CATEGORY = {
30
+ PASS: "pass",
31
+ REGRESSION: "regression",
32
+ INFRA_ERROR: "infra_error",
33
+ };
34
+ // ── Schema version ──
35
+ /** Current schema version for regression-report.json. */
36
+ exports.REPORT_SCHEMA_VERSION = 1;
37
+ // ── Artifact paths ──
38
+ /** Well-known artifact paths relative to project root. */
39
+ exports.ARTIFACTS = {
40
+ BASELINE: "evals/baseline.json",
41
+ REGRESSION_REPORT: "evals/regression-report.json",
42
+ CONFIDENCE_SUMMARY: "evals/confidence-summary.json",
43
+ LATENCY_BENCHMARK: "evals/latency-benchmark.json",
44
+ };