@evalgate/sdk 2.2.1 → 2.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cache.d.ts CHANGED
@@ -21,7 +21,7 @@ export declare class RequestCache {
21
21
  /**
22
22
  * Store response in cache
23
23
  */
24
- set<T>(method: string, url: string, data: T, ttl: number, params?: unknown): void;
24
+ set<T>(method: string, url: string, data: T, ttl?: number, params?: unknown): void;
25
25
  /**
26
26
  * Invalidate specific cache entry
27
27
  */
package/dist/cache.js CHANGED
@@ -43,7 +43,7 @@ class RequestCache {
43
43
  /**
44
44
  * Store response in cache
45
45
  */
46
- set(method, url, data, ttl, params) {
46
+ set(method, url, data, ttl = exports.CacheTTL.MEDIUM, params) {
47
47
  // Enforce cache size limit (LRU-style)
48
48
  if (this.cache.size >= this.maxSize) {
49
49
  const firstKey = this.cache.keys().next().value;
@@ -480,7 +480,12 @@ After upgrading:
480
480
  console.log(" - package.json eval:regression-gate + eval:baseline-update");
481
481
  console.log(" - .github/workflows/ Gate + governance workflows");
482
482
  console.log(" - .github/CODEOWNERS Baseline requires approval\n");
483
+ console.log(" ⚠️ IMPORTANT — Reset your baseline before pushing:");
484
+ console.log(" The gate compares against your existing Tier 1 baseline.");
485
+ console.log(" If your test script changed, run this first to avoid an immediate regression:");
486
+ console.log(" npx evalgate baseline update (or: pnpm eval:baseline-update)\n");
483
487
  console.log(" Next:");
488
+ console.log(" npx evalgate baseline update");
484
489
  console.log(" git add -A");
485
490
  console.log(" git commit -m 'chore: upgrade EvalGate gate to Tier 2'");
486
491
  console.log(" git push\n");
package/dist/client.js CHANGED
@@ -72,7 +72,7 @@ class AIEvalClient {
72
72
  this.baseUrl =
73
73
  config.baseUrl ||
74
74
  getEnvVar("EVALGATE_BASE_URL", "EVALAI_BASE_URL") ||
75
- (isBrowser ? "" : "http://localhost:3000");
75
+ (isBrowser ? "" : "https://api.evalgate.com");
76
76
  this.timeout = config.timeout || 30000;
77
77
  // Tier 4.17: Debug mode with request logging
78
78
  const logLevel = config.logLevel || (config.debug ? "debug" : "info");
package/dist/errors.js CHANGED
@@ -271,6 +271,10 @@ class RateLimitError extends EvalGateError {
271
271
  constructor(message, retryAfter) {
272
272
  super(message, "RATE_LIMIT_EXCEEDED", 429, { retryAfter });
273
273
  this.name = "RateLimitError";
274
+ if (retryAfter !== undefined) {
275
+ this.retryAfter = retryAfter;
276
+ }
277
+ Object.setPrototypeOf(this, RateLimitError.prototype);
274
278
  }
275
279
  }
276
280
  exports.RateLimitError = RateLimitError;
@@ -278,6 +282,7 @@ class AuthenticationError extends EvalGateError {
278
282
  constructor(message = "Authentication failed") {
279
283
  super(message, "AUTHENTICATION_ERROR", 401);
280
284
  this.name = "AuthenticationError";
285
+ Object.setPrototypeOf(this, AuthenticationError.prototype);
281
286
  }
282
287
  }
283
288
  exports.AuthenticationError = AuthenticationError;
@@ -285,6 +290,7 @@ class ValidationError extends EvalGateError {
285
290
  constructor(message = "Validation failed", details) {
286
291
  super(message, "VALIDATION_ERROR", 400, details);
287
292
  this.name = "ValidationError";
293
+ Object.setPrototypeOf(this, ValidationError.prototype);
288
294
  }
289
295
  }
290
296
  exports.ValidationError = ValidationError;
@@ -293,6 +299,7 @@ class NetworkError extends EvalGateError {
293
299
  super(message, "NETWORK_ERROR", 0);
294
300
  this.name = "NetworkError";
295
301
  this.retryable = true;
302
+ Object.setPrototypeOf(this, NetworkError.prototype);
296
303
  }
297
304
  }
298
305
  exports.NetworkError = NetworkError;
package/dist/export.d.ts CHANGED
@@ -140,7 +140,7 @@ export declare function exportData(client: AIEvalClient, options: ExportOptions)
140
140
  * console.log(`Imported ${result.summary.imported} items`);
141
141
  * ```
142
142
  */
143
- export declare function importData(client: AIEvalClient, data: ExportData, options: ImportOptions): Promise<ImportResult>;
143
+ export declare function importData(client: AIEvalClient, data: ExportData, options?: ImportOptions): Promise<ImportResult>;
144
144
  /**
145
145
  * Export data to JSON file
146
146
  *
package/dist/export.js CHANGED
@@ -136,7 +136,7 @@ async function exportData(client, options) {
136
136
  * console.log(`Imported ${result.summary.imported} items`);
137
137
  * ```
138
138
  */
139
- async function importData(client, data, options) {
139
+ async function importData(client, data, options = {}) {
140
140
  const result = {
141
141
  summary: { total: 0, imported: 0, skipped: 0, failed: 0 },
142
142
  details: {},
@@ -155,7 +155,7 @@ async function importData(client, data, options) {
155
155
  return result;
156
156
  }
157
157
  // Import traces
158
- if (data.traces) {
158
+ if (data.traces && client?.traces) {
159
159
  const traceResults = { imported: 0, skipped: 0, failed: 0 };
160
160
  for (const trace of data.traces) {
161
161
  try {
@@ -191,7 +191,7 @@ async function importData(client, data, options) {
191
191
  result.summary.total += data.traces.length;
192
192
  }
193
193
  // Import evaluations
194
- if (data.evaluations) {
194
+ if (data.evaluations && client?.evaluations) {
195
195
  const evalResults = { imported: 0, skipped: 0, failed: 0 };
196
196
  for (const evaluation of data.evaluations) {
197
197
  try {
package/dist/index.d.ts CHANGED
@@ -10,7 +10,7 @@ export { AIEvalClient } from "./client";
10
10
  import { AuthenticationError, EvalGateError, NetworkError, RateLimitError, SDKError } from "./errors";
11
11
  export { EvalGateError, RateLimitError, AuthenticationError, SDKError as ValidationError, // Using SDKError as ValidationError for backward compatibility
12
12
  NetworkError, };
13
- export { containsAllRequiredFields, containsJSON, containsKeywords, containsLanguage, expect, followsInstructions, hasFactualAccuracy, hasLength, hasNoHallucinations, hasNoToxicity, hasPII, hasReadabilityScore, hasSentiment, hasValidCodeSyntax, isValidEmail, isValidURL, matchesPattern, matchesSchema, notContainsPII, respondedWithinTime, similarTo, withinRange, } from "./assertions";
13
+ export { type AssertionLLMConfig, configureAssertions, containsAllRequiredFields, containsJSON, containsKeywords, containsLanguage, containsLanguageAsync, expect, followsInstructions, getAssertionConfig, hasFactualAccuracy, hasFactualAccuracyAsync, hasLength, hasNoHallucinations, hasNoHallucinationsAsync, hasNoToxicity, hasNoToxicityAsync, hasPII, hasReadabilityScore, hasSentiment, hasSentimentAsync, hasValidCodeSyntax, hasValidCodeSyntaxAsync, isValidEmail, isValidURL, matchesPattern, matchesSchema, notContainsPII, respondedWithinTime, similarTo, withinRange, } from "./assertions";
14
14
  import { createContext, EvalContext, getCurrentContext, withContext } from "./context";
15
15
  export { createContext, getCurrentContext as getContext, withContext, EvalContext as ContextManager, };
16
16
  export { cloneContext, mergeContexts, validateContext, } from "./runtime/context";
@@ -20,8 +20,8 @@ export { createEvalRuntime, disposeActiveRuntime, getActiveRuntime, setActiveRun
20
20
  export type { CloudExecutor, DefineEvalFunction, EvalContext, EvalExecutor, EvalExecutorInterface, EvalOptions, EvalResult, EvalRuntime, EvalSpec, ExecutorCapabilities, LocalExecutor, SpecConfig, SpecOptions, WorkerExecutor, } from "./runtime/types";
21
21
  export { EvalRuntimeError, RuntimeError, SpecExecutionError, SpecRegistrationError, } from "./runtime/types";
22
22
  export { createTestSuite, type TestCaseResult, TestSuite, TestSuiteCase, TestSuiteCaseResult, TestSuiteConfig, TestSuiteResult, } from "./testing";
23
- import { compareWithSnapshot, snapshot } from "./snapshot";
24
- export { snapshot, compareWithSnapshot, snapshot as saveSnapshot, compareWithSnapshot as compareSnapshots, };
23
+ import { compareSnapshots, compareWithSnapshot, snapshot } from "./snapshot";
24
+ export { snapshot, compareWithSnapshot, compareSnapshots, snapshot as saveSnapshot, };
25
25
  import type { ExportFormat } from "./export";
26
26
  import { exportData, importData } from "./export";
27
27
  export { exportData, importData };
@@ -34,7 +34,7 @@ export { traceOpenAI } from "./integrations/openai";
34
34
  export { type OpenAIChatEvalCase, type OpenAIChatEvalOptions, type OpenAIChatEvalResult, openAIChatEval, } from "./integrations/openai-eval";
35
35
  export { Logger } from "./logger";
36
36
  export { extendExpectWithToPassGate } from "./matchers";
37
- export { autoPaginate, createPaginatedIterator, decodeCursor, encodeCursor, PaginatedIterator, type PaginatedResponse, type PaginationParams, } from "./pagination";
37
+ export { autoPaginate, autoPaginateGenerator, createPaginatedIterator, decodeCursor, encodeCursor, PaginatedIterator, type PaginatedResponse, type PaginationParams, } from "./pagination";
38
38
  export { ARTIFACTS, type Baseline, type BaselineTolerance, GATE_CATEGORY, GATE_EXIT, type GateCategory, type GateExitCode, REPORT_SCHEMA_VERSION, type RegressionDelta, type RegressionReport, } from "./regression";
39
39
  export { batchProcess, batchRead, RateLimiter, streamEvaluation, } from "./streaming";
40
40
  export type { Annotation, AnnotationItem, AnnotationTask, APIKey, APIKeyUsage, APIKeyWithSecret, BatchOptions, ClientConfig as AIEvalConfig, CreateAnnotationItemParams, CreateAnnotationParams, CreateAnnotationTaskParams, CreateAPIKeyParams, CreateLLMJudgeConfigParams, CreateWebhookParams, Evaluation as EvaluationData, EvaluationRun, EvaluationRunDetail, ExportOptions, GenericMetadata as AnnotationData, GetLLMJudgeAlignmentParams, GetUsageParams, ImportOptions, ListAnnotationItemsParams, ListAnnotationsParams, ListAnnotationTasksParams, ListAPIKeysParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, ListWebhookDeliveriesParams, ListWebhooksParams, LLMJudgeAlignment, LLMJudgeConfig, LLMJudgeEvaluateResult, LLMJudgeResult as LLMJudgeData, Organization, RetryConfig, SnapshotData, Span as SpanData, StreamOptions, TestCase, TestResult, Trace as TraceData, TraceDetail, TracedResponse, UpdateAPIKeyParams, UpdateWebhookParams, UsageStats, UsageSummary, Webhook, WebhookDelivery, } from "./types";
package/dist/index.js CHANGED
@@ -8,8 +8,8 @@
8
8
  * @packageDocumentation
9
9
  */
10
10
  Object.defineProperty(exports, "__esModule", { value: true });
11
- exports.SpecRegistrationError = exports.SpecExecutionError = exports.RuntimeError = exports.EvalRuntimeError = exports.setActiveRuntime = exports.getActiveRuntime = exports.disposeActiveRuntime = exports.createEvalRuntime = exports.defaultLocalExecutor = exports.createLocalExecutor = exports.evalai = exports.defineSuite = exports.defineEval = exports.createResult = exports.createEvalContext = exports.validateContext = exports.mergeContexts = exports.cloneContext = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.withinRange = exports.similarTo = exports.respondedWithinTime = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntax = exports.hasSentiment = exports.hasReadabilityScore = exports.hasPII = exports.hasNoToxicity = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracy = exports.followsInstructions = exports.expect = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalGateError = exports.AIEvalClient = void 0;
12
- exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginate = exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.RequestCache = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.compareSnapshots = exports.saveSnapshot = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = exports.createTestSuite = void 0;
11
+ exports.defaultLocalExecutor = exports.createLocalExecutor = exports.evalai = exports.defineSuite = exports.defineEval = exports.createResult = exports.createEvalContext = exports.validateContext = exports.mergeContexts = exports.cloneContext = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.withinRange = exports.similarTo = exports.respondedWithinTime = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntaxAsync = exports.hasValidCodeSyntax = exports.hasSentimentAsync = exports.hasSentiment = exports.hasReadabilityScore = exports.hasPII = exports.hasNoToxicityAsync = exports.hasNoToxicity = exports.hasNoHallucinationsAsync = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracyAsync = exports.hasFactualAccuracy = exports.getAssertionConfig = exports.followsInstructions = exports.expect = exports.containsLanguageAsync = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.configureAssertions = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalGateError = exports.AIEvalClient = void 0;
12
+ exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginateGenerator = exports.autoPaginate = exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.RequestCache = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.saveSnapshot = exports.compareSnapshots = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = exports.createTestSuite = exports.SpecRegistrationError = exports.SpecExecutionError = exports.RuntimeError = exports.EvalRuntimeError = exports.setActiveRuntime = exports.getActiveRuntime = exports.disposeActiveRuntime = exports.createEvalRuntime = void 0;
13
13
  // Main SDK exports
14
14
  var client_1 = require("./client");
15
15
  Object.defineProperty(exports, "AIEvalClient", { enumerable: true, get: function () { return client_1.AIEvalClient; } });
@@ -22,20 +22,30 @@ Object.defineProperty(exports, "RateLimitError", { enumerable: true, get: functi
22
22
  Object.defineProperty(exports, "ValidationError", { enumerable: true, get: function () { return errors_1.SDKError; } });
23
23
  // Enhanced assertions (Tier 1.3)
24
24
  var assertions_1 = require("./assertions");
25
+ // LLM config
26
+ Object.defineProperty(exports, "configureAssertions", { enumerable: true, get: function () { return assertions_1.configureAssertions; } });
25
27
  Object.defineProperty(exports, "containsAllRequiredFields", { enumerable: true, get: function () { return assertions_1.containsAllRequiredFields; } });
26
28
  Object.defineProperty(exports, "containsJSON", { enumerable: true, get: function () { return assertions_1.containsJSON; } });
27
29
  Object.defineProperty(exports, "containsKeywords", { enumerable: true, get: function () { return assertions_1.containsKeywords; } });
28
30
  Object.defineProperty(exports, "containsLanguage", { enumerable: true, get: function () { return assertions_1.containsLanguage; } });
31
+ // LLM-backed async variants
32
+ Object.defineProperty(exports, "containsLanguageAsync", { enumerable: true, get: function () { return assertions_1.containsLanguageAsync; } });
29
33
  Object.defineProperty(exports, "expect", { enumerable: true, get: function () { return assertions_1.expect; } });
30
34
  Object.defineProperty(exports, "followsInstructions", { enumerable: true, get: function () { return assertions_1.followsInstructions; } });
35
+ Object.defineProperty(exports, "getAssertionConfig", { enumerable: true, get: function () { return assertions_1.getAssertionConfig; } });
31
36
  Object.defineProperty(exports, "hasFactualAccuracy", { enumerable: true, get: function () { return assertions_1.hasFactualAccuracy; } });
37
+ Object.defineProperty(exports, "hasFactualAccuracyAsync", { enumerable: true, get: function () { return assertions_1.hasFactualAccuracyAsync; } });
32
38
  Object.defineProperty(exports, "hasLength", { enumerable: true, get: function () { return assertions_1.hasLength; } });
33
39
  Object.defineProperty(exports, "hasNoHallucinations", { enumerable: true, get: function () { return assertions_1.hasNoHallucinations; } });
40
+ Object.defineProperty(exports, "hasNoHallucinationsAsync", { enumerable: true, get: function () { return assertions_1.hasNoHallucinationsAsync; } });
34
41
  Object.defineProperty(exports, "hasNoToxicity", { enumerable: true, get: function () { return assertions_1.hasNoToxicity; } });
42
+ Object.defineProperty(exports, "hasNoToxicityAsync", { enumerable: true, get: function () { return assertions_1.hasNoToxicityAsync; } });
35
43
  Object.defineProperty(exports, "hasPII", { enumerable: true, get: function () { return assertions_1.hasPII; } });
36
44
  Object.defineProperty(exports, "hasReadabilityScore", { enumerable: true, get: function () { return assertions_1.hasReadabilityScore; } });
37
45
  Object.defineProperty(exports, "hasSentiment", { enumerable: true, get: function () { return assertions_1.hasSentiment; } });
46
+ Object.defineProperty(exports, "hasSentimentAsync", { enumerable: true, get: function () { return assertions_1.hasSentimentAsync; } });
38
47
  Object.defineProperty(exports, "hasValidCodeSyntax", { enumerable: true, get: function () { return assertions_1.hasValidCodeSyntax; } });
48
+ Object.defineProperty(exports, "hasValidCodeSyntaxAsync", { enumerable: true, get: function () { return assertions_1.hasValidCodeSyntaxAsync; } });
39
49
  Object.defineProperty(exports, "isValidEmail", { enumerable: true, get: function () { return assertions_1.isValidEmail; } });
40
50
  Object.defineProperty(exports, "isValidURL", { enumerable: true, get: function () { return assertions_1.isValidURL; } });
41
51
  Object.defineProperty(exports, "matchesPattern", { enumerable: true, get: function () { return assertions_1.matchesPattern; } });
@@ -81,8 +91,8 @@ Object.defineProperty(exports, "createTestSuite", { enumerable: true, get: funct
81
91
  Object.defineProperty(exports, "TestSuite", { enumerable: true, get: function () { return testing_1.TestSuite; } });
82
92
  // Snapshot testing (Tier 2.8)
83
93
  const snapshot_1 = require("./snapshot");
94
+ Object.defineProperty(exports, "compareSnapshots", { enumerable: true, get: function () { return snapshot_1.compareSnapshots; } });
84
95
  Object.defineProperty(exports, "compareWithSnapshot", { enumerable: true, get: function () { return snapshot_1.compareWithSnapshot; } });
85
- Object.defineProperty(exports, "compareSnapshots", { enumerable: true, get: function () { return snapshot_1.compareWithSnapshot; } });
86
96
  Object.defineProperty(exports, "snapshot", { enumerable: true, get: function () { return snapshot_1.snapshot; } });
87
97
  Object.defineProperty(exports, "saveSnapshot", { enumerable: true, get: function () { return snapshot_1.snapshot; } });
88
98
  // Export/Import utilities (Tier 4.18)
@@ -120,6 +130,7 @@ var matchers_1 = require("./matchers");
120
130
  Object.defineProperty(exports, "extendExpectWithToPassGate", { enumerable: true, get: function () { return matchers_1.extendExpectWithToPassGate; } });
121
131
  var pagination_1 = require("./pagination");
122
132
  Object.defineProperty(exports, "autoPaginate", { enumerable: true, get: function () { return pagination_1.autoPaginate; } });
133
+ Object.defineProperty(exports, "autoPaginateGenerator", { enumerable: true, get: function () { return pagination_1.autoPaginateGenerator; } });
123
134
  Object.defineProperty(exports, "createPaginatedIterator", { enumerable: true, get: function () { return pagination_1.createPaginatedIterator; } });
124
135
  Object.defineProperty(exports, "decodeCursor", { enumerable: true, get: function () { return pagination_1.decodeCursor; } });
125
136
  Object.defineProperty(exports, "encodeCursor", { enumerable: true, get: function () { return pagination_1.encodeCursor; } });
@@ -67,7 +67,7 @@ function traceAnthropic(anthropic, evalClient, options = {}) {
67
67
  }
68
68
  : {}),
69
69
  });
70
- await evalClient.traces.create({
70
+ await evalClient.traces?.create({
71
71
  name: `Anthropic: ${params.model}`,
72
72
  traceId,
73
73
  organizationId: organizationId || evalClient.getOrganizationId(),
@@ -89,7 +89,7 @@ function traceAnthropic(anthropic, evalClient, options = {}) {
89
89
  error: error instanceof Error ? error.message : String(error),
90
90
  });
91
91
  await evalClient.traces
92
- .create({
92
+ ?.create({
93
93
  name: `Anthropic: ${params.model}`,
94
94
  traceId,
95
95
  organizationId: organizationId || evalClient.getOrganizationId(),
@@ -97,7 +97,7 @@ function traceAnthropic(anthropic, evalClient, options = {}) {
97
97
  durationMs,
98
98
  metadata: errorMetadata,
99
99
  })
100
- .catch(() => {
100
+ ?.catch(() => {
101
101
  // Ignore errors in trace creation to avoid masking the original error
102
102
  });
103
103
  throw error;
@@ -127,7 +127,7 @@ async function traceAnthropicCall(evalClient, name, fn, options = {}) {
127
127
  const startTime = Date.now();
128
128
  const traceId = `anthropic-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
129
129
  try {
130
- await evalClient.traces.create({
130
+ await evalClient.traces?.create({
131
131
  name,
132
132
  traceId,
133
133
  organizationId: options.organizationId || evalClient.getOrganizationId(),
@@ -136,7 +136,7 @@ async function traceAnthropicCall(evalClient, name, fn, options = {}) {
136
136
  });
137
137
  const result = await fn();
138
138
  const durationMs = Date.now() - startTime;
139
- await evalClient.traces.create({
139
+ await evalClient.traces?.create({
140
140
  name,
141
141
  traceId,
142
142
  organizationId: options.organizationId || evalClient.getOrganizationId(),
@@ -148,7 +148,7 @@ async function traceAnthropicCall(evalClient, name, fn, options = {}) {
148
148
  }
149
149
  catch (error) {
150
150
  const durationMs = Date.now() - startTime;
151
- await evalClient.traces.create({
151
+ await evalClient.traces?.create({
152
152
  name,
153
153
  traceId,
154
154
  organizationId: options.organizationId || evalClient.getOrganizationId(),
@@ -65,7 +65,7 @@ function traceOpenAI(openai, evalClient, options = {}) {
65
65
  }
66
66
  : {}),
67
67
  });
68
- await evalClient.traces.create({
68
+ await evalClient.traces?.create({
69
69
  name: `OpenAI: ${params.model}`,
70
70
  traceId,
71
71
  organizationId: organizationId || evalClient.getOrganizationId(),
@@ -87,7 +87,7 @@ function traceOpenAI(openai, evalClient, options = {}) {
87
87
  error: error instanceof Error ? error.message : String(error),
88
88
  });
89
89
  await evalClient.traces
90
- .create({
90
+ ?.create({
91
91
  name: `OpenAI: ${params.model}`,
92
92
  traceId,
93
93
  organizationId: organizationId || evalClient.getOrganizationId(),
@@ -95,7 +95,7 @@ function traceOpenAI(openai, evalClient, options = {}) {
95
95
  durationMs,
96
96
  metadata: errorMetadata,
97
97
  })
98
- .catch(() => {
98
+ ?.catch(() => {
99
99
  // Ignore errors in trace creation to avoid masking the original error
100
100
  });
101
101
  throw error;
@@ -124,7 +124,7 @@ async function traceOpenAICall(evalClient, name, fn, options = {}) {
124
124
  const startTime = Date.now();
125
125
  const traceId = `openai-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
126
126
  try {
127
- await evalClient.traces.create({
127
+ await evalClient.traces?.create({
128
128
  name,
129
129
  traceId,
130
130
  organizationId: options.organizationId || evalClient.getOrganizationId(),
@@ -133,7 +133,7 @@ async function traceOpenAICall(evalClient, name, fn, options = {}) {
133
133
  });
134
134
  const result = await fn();
135
135
  const durationMs = Date.now() - startTime;
136
- await evalClient.traces.create({
136
+ await evalClient.traces?.create({
137
137
  name,
138
138
  traceId,
139
139
  organizationId: options.organizationId || evalClient.getOrganizationId(),
@@ -145,7 +145,7 @@ async function traceOpenAICall(evalClient, name, fn, options = {}) {
145
145
  }
146
146
  catch (error) {
147
147
  const durationMs = Date.now() - startTime;
148
- await evalClient.traces.create({
148
+ await evalClient.traces?.create({
149
149
  name,
150
150
  traceId,
151
151
  organizationId: options.organizationId || evalClient.getOrganizationId(),
@@ -50,9 +50,20 @@ export declare function createPaginatedIterator<T>(fetchFn: (offset: number, lim
50
50
  hasMore: boolean;
51
51
  }>, limit?: number): PaginatedIterator<T>;
52
52
  /**
53
- * Auto-paginate helper that fetches all pages automatically
53
+ * Auto-paginate helper that fetches all pages and returns a flat array.
54
+ * @example
55
+ * ```typescript
56
+ * const allItems = await autoPaginate(
57
+ * (offset, limit) => client.traces.list({ offset, limit }),
58
+ * );
59
+ * ```
54
60
  */
55
- export declare function autoPaginate<T>(fetchFn: (offset: number, limit: number) => Promise<T[]>, limit?: number): AsyncGenerator<T, void, unknown>;
61
+ export declare function autoPaginate<T>(fetchFn: (offset: number, limit: number) => Promise<T[]>, limit?: number): Promise<T[]>;
62
+ /**
63
+ * Streaming auto-paginate generator — yields individual items one at a time.
64
+ * Use this when you want to process items as they arrive rather than waiting for all pages.
65
+ */
66
+ export declare function autoPaginateGenerator<T>(fetchFn: (offset: number, limit: number) => Promise<T[]>, limit?: number): AsyncGenerator<T, void, unknown>;
56
67
  /**
57
68
  * Encode cursor for pagination (base64)
58
69
  */
@@ -6,6 +6,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.PaginatedIterator = void 0;
7
7
  exports.createPaginatedIterator = createPaginatedIterator;
8
8
  exports.autoPaginate = autoPaginate;
9
+ exports.autoPaginateGenerator = autoPaginateGenerator;
9
10
  exports.encodeCursor = encodeCursor;
10
11
  exports.decodeCursor = decodeCursor;
11
12
  exports.createPaginationMeta = createPaginationMeta;
@@ -56,9 +57,34 @@ function createPaginatedIterator(fetchFn, limit = 50) {
56
57
  return new PaginatedIterator(fetchFn, limit);
57
58
  }
58
59
  /**
59
- * Auto-paginate helper that fetches all pages automatically
60
+ * Auto-paginate helper that fetches all pages and returns a flat array.
61
+ * @example
62
+ * ```typescript
63
+ * const allItems = await autoPaginate(
64
+ * (offset, limit) => client.traces.list({ offset, limit }),
65
+ * );
66
+ * ```
60
67
  */
61
- async function* autoPaginate(fetchFn, limit = 50) {
68
+ async function autoPaginate(fetchFn, limit = 50) {
69
+ const result = [];
70
+ let offset = 0;
71
+ let hasMore = true;
72
+ while (hasMore) {
73
+ const items = await fetchFn(offset, limit);
74
+ if (items.length === 0) {
75
+ break;
76
+ }
77
+ result.push(...items);
78
+ hasMore = items.length === limit;
79
+ offset += limit;
80
+ }
81
+ return result;
82
+ }
83
+ /**
84
+ * Streaming auto-paginate generator — yields individual items one at a time.
85
+ * Use this when you want to process items as they arrive rather than waiting for all pages.
86
+ */
87
+ async function* autoPaginateGenerator(fetchFn, limit = 50) {
62
88
  let offset = 0;
63
89
  let hasMore = true;
64
90
  while (hasMore) {
@@ -208,12 +208,7 @@ function generateDefineEvalCode(suite, options = {}) {
208
208
  });
209
209
  const helperFunctions = generateHelperFunctionsForSuite(specs, options);
210
210
  const evaluationFunction = generateEvaluationFunction();
211
- return [
212
- ...imports,
213
- ...helperFunctions,
214
- ...evaluationFunction,
215
- ...specCode,
216
- ].join("\n");
211
+ return [...imports, helperFunctions, evaluationFunction, ...specCode].join("\n");
217
212
  }
218
213
  /**
219
214
  * Generate helper functions for a specific spec
@@ -10,7 +10,8 @@ import type { LocalExecutor } from "./types";
10
10
  */
11
11
  export declare function createLocalExecutor(): LocalExecutor;
12
12
  /**
13
- * Default local executor instance
13
+ * Default local executor factory
14
+ * Call as defaultLocalExecutor() to get a new executor instance.
14
15
  * For convenience in simple use cases
15
16
  */
16
- export declare const defaultLocalExecutor: LocalExecutor;
17
+ export declare const defaultLocalExecutor: typeof createLocalExecutor;
@@ -146,7 +146,8 @@ function createLocalExecutor() {
146
146
  return new LocalExecutorImpl();
147
147
  }
148
148
  /**
149
- * Default local executor instance
149
+ * Default local executor factory
150
+ * Call as defaultLocalExecutor() to get a new executor instance.
150
151
  * For convenience in simple use cases
151
152
  */
152
- exports.defaultLocalExecutor = createLocalExecutor();
153
+ exports.defaultLocalExecutor = createLocalExecutor;
@@ -61,7 +61,10 @@ export interface SerializedSpec {
61
61
  * Create a new scoped runtime with lifecycle management
62
62
  * Returns a handle for proper resource management
63
63
  */
64
- export declare function createEvalRuntime(projectRoot?: string): RuntimeHandle;
64
+ export declare function createEvalRuntime(projectRootOrConfig?: string | {
65
+ name?: string;
66
+ projectRoot?: string;
67
+ }): RuntimeHandle;
65
68
  /**
66
69
  * Helper function for safe runtime execution with automatic cleanup
67
70
  * Ensures runtime is disposed even if an exception is thrown
@@ -315,7 +315,10 @@ class EvalRuntimeImpl {
315
315
  * Create a new scoped runtime with lifecycle management
316
316
  * Returns a handle for proper resource management
317
317
  */
318
- function createEvalRuntime(projectRoot = process.cwd()) {
318
+ function createEvalRuntime(projectRootOrConfig = process.cwd()) {
319
+ const projectRoot = typeof projectRootOrConfig === "string"
320
+ ? projectRootOrConfig
321
+ : (projectRootOrConfig.projectRoot ?? process.cwd());
319
322
  const runtime = new EvalRuntimeImpl(projectRoot);
320
323
  // Create bound defineEval function
321
324
  const boundDefineEval = ((nameOrConfig, executor, options) => {
@@ -99,7 +99,7 @@ export declare class SnapshotManager {
99
99
  * }
100
100
  * ```
101
101
  */
102
- compare(name: string, currentOutput: string): Promise<SnapshotComparison>;
102
+ compare(name: string, currentOutput: unknown): Promise<SnapshotComparison>;
103
103
  /**
104
104
  * List all snapshots
105
105
  *
@@ -165,7 +165,19 @@ export declare function loadSnapshot(name: string, dir?: string): Promise<Snapsh
165
165
  * }
166
166
  * ```
167
167
  */
168
- export declare function compareWithSnapshot(name: string, currentOutput: string, dir?: string): Promise<SnapshotComparison>;
168
+ export declare function compareWithSnapshot(name: string, currentOutput: unknown, dir?: string): Promise<SnapshotComparison>;
169
+ /**
170
+ * Compare two saved snapshots by name (convenience function)
171
+ *
172
+ * @example
173
+ * ```typescript
174
+ * const comparison = await compareSnapshots('baseline', 'current');
175
+ * if (!comparison.matches) {
176
+ * console.log('Snapshots differ!', comparison.differences);
177
+ * }
178
+ * ```
179
+ */
180
+ export declare function compareSnapshots(nameA: string, nameB: string, dir?: string): Promise<SnapshotComparison>;
169
181
  /**
170
182
  * Delete a snapshot (convenience function)
171
183
  */
package/dist/snapshot.js CHANGED
@@ -55,6 +55,7 @@ exports.SnapshotManager = void 0;
55
55
  exports.snapshot = snapshot;
56
56
  exports.loadSnapshot = loadSnapshot;
57
57
  exports.compareWithSnapshot = compareWithSnapshot;
58
+ exports.compareSnapshots = compareSnapshots;
58
59
  exports.deleteSnapshot = deleteSnapshot;
59
60
  exports.listSnapshots = listSnapshots;
60
61
  // Environment check
@@ -130,7 +131,13 @@ class SnapshotManager {
130
131
  if (!options?.overwrite && fs.existsSync(filePath)) {
131
132
  throw new Error(`Snapshot '${name}' already exists. Use overwrite: true to update.`);
132
133
  }
133
- const serialized = typeof output === "string" ? output : JSON.stringify(output);
134
+ const serialized = output === undefined
135
+ ? "undefined"
136
+ : output === null
137
+ ? "null"
138
+ : typeof output === "string"
139
+ ? output
140
+ : JSON.stringify(output);
134
141
  const snapshotData = {
135
142
  output: serialized,
136
143
  metadata: {
@@ -175,11 +182,14 @@ class SnapshotManager {
175
182
  async compare(name, currentOutput) {
176
183
  const snapshot = await this.load(name);
177
184
  const original = snapshot.output;
185
+ const currentOutputStr = typeof currentOutput === "string"
186
+ ? currentOutput
187
+ : JSON.stringify(currentOutput);
178
188
  // Exact match check
179
- const exactMatch = original === currentOutput;
189
+ const exactMatch = original === currentOutputStr;
180
190
  // Calculate similarity (simple line-based diff)
181
191
  const originalLines = original.split("\n");
182
- const currentLines = currentOutput.split("\n");
192
+ const currentLines = currentOutputStr.split("\n");
183
193
  const differences = [];
184
194
  const maxLines = Math.max(originalLines.length, currentLines.length);
185
195
  let matchingLines = 0;
@@ -199,7 +209,7 @@ class SnapshotManager {
199
209
  similarity,
200
210
  differences,
201
211
  original,
202
- current: currentOutput,
212
+ current: currentOutputStr,
203
213
  };
204
214
  }
205
215
  /**
@@ -307,6 +317,22 @@ async function compareWithSnapshot(name, currentOutput, dir) {
307
317
  const manager = getSnapshotManager(dir);
308
318
  return manager.compare(name, currentOutput);
309
319
  }
320
+ /**
321
+ * Compare two saved snapshots by name (convenience function)
322
+ *
323
+ * @example
324
+ * ```typescript
325
+ * const comparison = await compareSnapshots('baseline', 'current');
326
+ * if (!comparison.matches) {
327
+ * console.log('Snapshots differ!', comparison.differences);
328
+ * }
329
+ * ```
330
+ */
331
+ async function compareSnapshots(nameA, nameB, dir) {
332
+ const manager = getSnapshotManager(dir);
333
+ const snapshotB = await manager.load(nameB);
334
+ return manager.compare(nameA, snapshotB.output);
335
+ }
310
336
  /**
311
337
  * Delete a snapshot (convenience function)
312
338
  */
package/dist/types.d.ts CHANGED
@@ -38,8 +38,13 @@ export interface ClientConfig {
38
38
  keepAlive?: boolean;
39
39
  }
40
40
  /**
41
- * Evaluation template categories
42
- * Updated with new template types for comprehensive LLM testing
41
+ * Evaluation template identifier constants for use with the EvalAI platform API.
42
+ *
43
+ * These are **string identifiers** (e.g. `"unit-testing"`) that reference
44
+ * pre-built templates on the platform — not template definition objects.
45
+ * Pass these values to `evaluations.create({ templateId: EvaluationTemplates.UNIT_TESTING })`
46
+ * to spin up a pre-configured evaluation. For custom criteria, thresholds, and
47
+ * test cases, build your own evaluation config instead.
43
48
  */
44
49
  export declare const EvaluationTemplates: {
45
50
  readonly UNIT_TESTING: "unit-testing";
package/dist/types.js CHANGED
@@ -2,8 +2,13 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.SDKError = exports.EvaluationTemplates = void 0;
4
4
  /**
5
- * Evaluation template categories
6
- * Updated with new template types for comprehensive LLM testing
5
+ * Evaluation template identifier constants for use with the EvalAI platform API.
6
+ *
7
+ * These are **string identifiers** (e.g. `"unit-testing"`) that reference
8
+ * pre-built templates on the platform — not template definition objects.
9
+ * Pass these values to `evaluations.create({ templateId: EvaluationTemplates.UNIT_TESTING })`
10
+ * to spin up a pre-configured evaluation. For custom criteria, thresholds, and
11
+ * test cases, build your own evaluation config instead.
7
12
  */
8
13
  exports.EvaluationTemplates = {
9
14
  // Core Testing
package/dist/version.d.ts CHANGED
@@ -3,5 +3,5 @@
3
3
  * X-EvalGate-SDK-Version: SDK package version
4
4
  * X-EvalGate-Spec-Version: OpenAPI spec version (docs/openapi.json info.version)
5
5
  */
6
- export declare const SDK_VERSION = "2.2.1";
7
- export declare const SPEC_VERSION = "2.2.1";
6
+ export declare const SDK_VERSION = "2.2.3";
7
+ export declare const SPEC_VERSION = "2.2.3";
package/dist/version.js CHANGED
@@ -6,5 +6,5 @@ exports.SPEC_VERSION = exports.SDK_VERSION = void 0;
6
6
  * X-EvalGate-SDK-Version: SDK package version
7
7
  * X-EvalGate-Spec-Version: OpenAPI spec version (docs/openapi.json info.version)
8
8
  */
9
- exports.SDK_VERSION = "2.2.1";
10
- exports.SPEC_VERSION = "2.2.1";
9
+ exports.SDK_VERSION = "2.2.3";
10
+ exports.SPEC_VERSION = "2.2.3";
package/dist/workflows.js CHANGED
@@ -64,8 +64,13 @@ class WorkflowTracer {
64
64
  this.costs = [];
65
65
  this.spanCounter = 0;
66
66
  this.client = client;
67
+ const resolvedOrgId = options.organizationId ??
68
+ (typeof client?.getOrganizationId === "function"
69
+ ? client.getOrganizationId()
70
+ : undefined) ??
71
+ 0;
67
72
  this.options = {
68
- organizationId: options.organizationId || client.getOrganizationId() || 0,
73
+ organizationId: resolvedOrgId,
69
74
  autoCalculateCost: options.autoCalculateCost ?? true,
70
75
  tracePrefix: options.tracePrefix || "workflow",
71
76
  captureFullPayloads: options.captureFullPayloads ?? true,