npm - @evalgate/sdk - Versions diffs - 2.2.1 → 2.2.3 - Mend

@evalgate/sdk 2.2.1 → 2.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/CHANGELOG.md +70 -1
package/README.md +36 -7
package/dist/assertions.d.ts +67 -5
package/dist/assertions.js +733 -45
package/dist/cache.d.ts +1 -1
package/dist/cache.js +1 -1
package/dist/cli/upgrade.js +5 -0
package/dist/client.js +1 -1
package/dist/errors.js +7 -0
package/dist/export.d.ts +1 -1
package/dist/export.js +3 -3
package/dist/index.d.ts +4 -4
package/dist/index.js +14 -3
package/dist/integrations/anthropic.js +6 -6
package/dist/integrations/openai.js +6 -6
package/dist/pagination.d.ts +13 -2
package/dist/pagination.js +28 -2
package/dist/runtime/adapters/testsuite-to-dsl.js +1 -6
package/dist/runtime/executor.d.ts +3 -2
package/dist/runtime/executor.js +3 -2
package/dist/runtime/registry.d.ts +4 -1
package/dist/runtime/registry.js +4 -1
package/dist/snapshot.d.ts +14 -2
package/dist/snapshot.js +30 -4
package/dist/types.d.ts +7 -2
package/dist/types.js +7 -2
package/dist/version.d.ts +2 -2
package/dist/version.js +2 -2
package/dist/workflows.js +6 -1
package/package.json +2 -2

package/dist/cache.d.ts CHANGED Viewed

@@ -21,7 +21,7 @@ export declare class RequestCache {
     /**
      * Store response in cache
      */
-    set<T>(method: string, url: string, data: T, ttl: number, params?: unknown): void;
+    set<T>(method: string, url: string, data: T, ttl?: number, params?: unknown): void;
     /**
      * Invalidate specific cache entry
      */

package/dist/cache.js CHANGED Viewed

@@ -43,7 +43,7 @@ class RequestCache {
     /**
      * Store response in cache
      */
-    set(method, url, data, ttl, params) {
+    set(method, url, data, ttl = exports.CacheTTL.MEDIUM, params) {
         // Enforce cache size limit (LRU-style)
         if (this.cache.size >= this.maxSize) {
             const firstKey = this.cache.keys().next().value;

package/dist/cli/upgrade.js CHANGED Viewed

@@ -480,7 +480,12 @@ After upgrading:
     console.log("    - package.json                  eval:regression-gate + eval:baseline-update");
     console.log("    - .github/workflows/            Gate + governance workflows");
     console.log("    - .github/CODEOWNERS            Baseline requires approval\n");
+    console.log("  ⚠️  IMPORTANT — Reset your baseline before pushing:");
+    console.log("    The gate compares against your existing Tier 1 baseline.");
+    console.log("    If your test script changed, run this first to avoid an immediate regression:");
+    console.log("    npx evalgate baseline update    (or: pnpm eval:baseline-update)\n");
     console.log("  Next:");
+    console.log("    npx evalgate baseline update");
     console.log("    git add -A");
     console.log("    git commit -m 'chore: upgrade EvalGate gate to Tier 2'");
     console.log("    git push\n");

package/dist/client.js CHANGED Viewed

@@ -72,7 +72,7 @@ class AIEvalClient {
         this.baseUrl =
             config.baseUrl ||
                 getEnvVar("EVALGATE_BASE_URL", "EVALAI_BASE_URL") ||
-                (isBrowser ? "" : "http://localhost:3000");
+                (isBrowser ? "" : "https://api.evalgate.com");
         this.timeout = config.timeout || 30000;
         // Tier 4.17: Debug mode with request logging
         const logLevel = config.logLevel || (config.debug ? "debug" : "info");

package/dist/errors.js CHANGED Viewed

@@ -271,6 +271,10 @@ class RateLimitError extends EvalGateError {
     constructor(message, retryAfter) {
         super(message, "RATE_LIMIT_EXCEEDED", 429, { retryAfter });
         this.name = "RateLimitError";
+        if (retryAfter !== undefined) {
+            this.retryAfter = retryAfter;
+        }
+        Object.setPrototypeOf(this, RateLimitError.prototype);
     }
 }
 exports.RateLimitError = RateLimitError;
@@ -278,6 +282,7 @@ class AuthenticationError extends EvalGateError {
     constructor(message = "Authentication failed") {
         super(message, "AUTHENTICATION_ERROR", 401);
         this.name = "AuthenticationError";
+        Object.setPrototypeOf(this, AuthenticationError.prototype);
     }
 }
 exports.AuthenticationError = AuthenticationError;
@@ -285,6 +290,7 @@ class ValidationError extends EvalGateError {
     constructor(message = "Validation failed", details) {
         super(message, "VALIDATION_ERROR", 400, details);
         this.name = "ValidationError";
+        Object.setPrototypeOf(this, ValidationError.prototype);
     }
 }
 exports.ValidationError = ValidationError;
@@ -293,6 +299,7 @@ class NetworkError extends EvalGateError {
         super(message, "NETWORK_ERROR", 0);
         this.name = "NetworkError";
         this.retryable = true;
+        Object.setPrototypeOf(this, NetworkError.prototype);
     }
 }
 exports.NetworkError = NetworkError;

package/dist/export.d.ts CHANGED Viewed

@@ -140,7 +140,7 @@ export declare function exportData(client: AIEvalClient, options: ExportOptions)
  * console.log(`Imported ${result.summary.imported} items`);
  * ```
  */
-export declare function importData(client: AIEvalClient, data: ExportData, options: ImportOptions): Promise<ImportResult>;
+export declare function importData(client: AIEvalClient, data: ExportData, options?: ImportOptions): Promise<ImportResult>;
 /**
  * Export data to JSON file
  *

package/dist/export.js CHANGED Viewed

@@ -136,7 +136,7 @@ async function exportData(client, options) {
  * console.log(`Imported ${result.summary.imported} items`);
  * ```
  */
-async function importData(client, data, options) {
+async function importData(client, data, options = {}) {
     const result = {
         summary: { total: 0, imported: 0, skipped: 0, failed: 0 },
         details: {},
@@ -155,7 +155,7 @@ async function importData(client, data, options) {
         return result;
     }
     // Import traces
-    if (data.traces) {
+    if (data.traces && client?.traces) {
         const traceResults = { imported: 0, skipped: 0, failed: 0 };
         for (const trace of data.traces) {
             try {
@@ -191,7 +191,7 @@ async function importData(client, data, options) {
         result.summary.total += data.traces.length;
     }
     // Import evaluations
-    if (data.evaluations) {
+    if (data.evaluations && client?.evaluations) {
         const evalResults = { imported: 0, skipped: 0, failed: 0 };
         for (const evaluation of data.evaluations) {
             try {

package/dist/index.d.ts CHANGED Viewed

@@ -10,7 +10,7 @@ export { AIEvalClient } from "./client";
 import { AuthenticationError, EvalGateError, NetworkError, RateLimitError, SDKError } from "./errors";
 export { EvalGateError, RateLimitError, AuthenticationError, SDKError as ValidationError, // Using SDKError as ValidationError for backward compatibility
 NetworkError, };
-export { containsAllRequiredFields, containsJSON, containsKeywords, containsLanguage, expect, followsInstructions, hasFactualAccuracy, hasLength, hasNoHallucinations, hasNoToxicity, hasPII, hasReadabilityScore, hasSentiment, hasValidCodeSyntax, isValidEmail, isValidURL, matchesPattern, matchesSchema, notContainsPII, respondedWithinTime, similarTo, withinRange, } from "./assertions";
+export { type AssertionLLMConfig, configureAssertions, containsAllRequiredFields, containsJSON, containsKeywords, containsLanguage, containsLanguageAsync, expect, followsInstructions, getAssertionConfig, hasFactualAccuracy, hasFactualAccuracyAsync, hasLength, hasNoHallucinations, hasNoHallucinationsAsync, hasNoToxicity, hasNoToxicityAsync, hasPII, hasReadabilityScore, hasSentiment, hasSentimentAsync, hasValidCodeSyntax, hasValidCodeSyntaxAsync, isValidEmail, isValidURL, matchesPattern, matchesSchema, notContainsPII, respondedWithinTime, similarTo, withinRange, } from "./assertions";
 import { createContext, EvalContext, getCurrentContext, withContext } from "./context";
 export { createContext, getCurrentContext as getContext, withContext, EvalContext as ContextManager, };
 export { cloneContext, mergeContexts, validateContext, } from "./runtime/context";
@@ -20,8 +20,8 @@ export { createEvalRuntime, disposeActiveRuntime, getActiveRuntime, setActiveRun
 export type { CloudExecutor, DefineEvalFunction, EvalContext, EvalExecutor, EvalExecutorInterface, EvalOptions, EvalResult, EvalRuntime, EvalSpec, ExecutorCapabilities, LocalExecutor, SpecConfig, SpecOptions, WorkerExecutor, } from "./runtime/types";
 export { EvalRuntimeError, RuntimeError, SpecExecutionError, SpecRegistrationError, } from "./runtime/types";
 export { createTestSuite, type TestCaseResult, TestSuite, TestSuiteCase, TestSuiteCaseResult, TestSuiteConfig, TestSuiteResult, } from "./testing";
-import { compareWithSnapshot, snapshot } from "./snapshot";
-export { snapshot, compareWithSnapshot, snapshot as saveSnapshot, compareWithSnapshot as compareSnapshots, };
+import { compareSnapshots, compareWithSnapshot, snapshot } from "./snapshot";
+export { snapshot, compareWithSnapshot, compareSnapshots, snapshot as saveSnapshot, };
 import type { ExportFormat } from "./export";
 import { exportData, importData } from "./export";
 export { exportData, importData };
@@ -34,7 +34,7 @@ export { traceOpenAI } from "./integrations/openai";
 export { type OpenAIChatEvalCase, type OpenAIChatEvalOptions, type OpenAIChatEvalResult, openAIChatEval, } from "./integrations/openai-eval";
 export { Logger } from "./logger";
 export { extendExpectWithToPassGate } from "./matchers";
-export { autoPaginate, createPaginatedIterator, decodeCursor, encodeCursor, PaginatedIterator, type PaginatedResponse, type PaginationParams, } from "./pagination";
+export { autoPaginate, autoPaginateGenerator, createPaginatedIterator, decodeCursor, encodeCursor, PaginatedIterator, type PaginatedResponse, type PaginationParams, } from "./pagination";
 export { ARTIFACTS, type Baseline, type BaselineTolerance, GATE_CATEGORY, GATE_EXIT, type GateCategory, type GateExitCode, REPORT_SCHEMA_VERSION, type RegressionDelta, type RegressionReport, } from "./regression";
 export { batchProcess, batchRead, RateLimiter, streamEvaluation, } from "./streaming";
 export type { Annotation, AnnotationItem, AnnotationTask, APIKey, APIKeyUsage, APIKeyWithSecret, BatchOptions, ClientConfig as AIEvalConfig, CreateAnnotationItemParams, CreateAnnotationParams, CreateAnnotationTaskParams, CreateAPIKeyParams, CreateLLMJudgeConfigParams, CreateWebhookParams, Evaluation as EvaluationData, EvaluationRun, EvaluationRunDetail, ExportOptions, GenericMetadata as AnnotationData, GetLLMJudgeAlignmentParams, GetUsageParams, ImportOptions, ListAnnotationItemsParams, ListAnnotationsParams, ListAnnotationTasksParams, ListAPIKeysParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, ListWebhookDeliveriesParams, ListWebhooksParams, LLMJudgeAlignment, LLMJudgeConfig, LLMJudgeEvaluateResult, LLMJudgeResult as LLMJudgeData, Organization, RetryConfig, SnapshotData, Span as SpanData, StreamOptions, TestCase, TestResult, Trace as TraceData, TraceDetail, TracedResponse, UpdateAPIKeyParams, UpdateWebhookParams, UsageStats, UsageSummary, Webhook, WebhookDelivery, } from "./types";

package/dist/index.js CHANGED Viewed

@@ -8,8 +8,8 @@
  * @packageDocumentation
  */
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.SpecRegistrationError = exports.SpecExecutionError = exports.RuntimeError = exports.EvalRuntimeError = exports.setActiveRuntime = exports.getActiveRuntime = exports.disposeActiveRuntime = exports.createEvalRuntime = exports.defaultLocalExecutor = exports.createLocalExecutor = exports.evalai = exports.defineSuite = exports.defineEval = exports.createResult = exports.createEvalContext = exports.validateContext = exports.mergeContexts = exports.cloneContext = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.withinRange = exports.similarTo = exports.respondedWithinTime = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntax = exports.hasSentiment = exports.hasReadabilityScore = exports.hasPII = exports.hasNoToxicity = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracy = exports.followsInstructions = exports.expect = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalGateError = exports.AIEvalClient = void 0;
-exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginate = exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.RequestCache = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.compareSnapshots = exports.saveSnapshot = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = exports.createTestSuite = void 0;
+exports.defaultLocalExecutor = exports.createLocalExecutor = exports.evalai = exports.defineSuite = exports.defineEval = exports.createResult = exports.createEvalContext = exports.validateContext = exports.mergeContexts = exports.cloneContext = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.withinRange = exports.similarTo = exports.respondedWithinTime = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntaxAsync = exports.hasValidCodeSyntax = exports.hasSentimentAsync = exports.hasSentiment = exports.hasReadabilityScore = exports.hasPII = exports.hasNoToxicityAsync = exports.hasNoToxicity = exports.hasNoHallucinationsAsync = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracyAsync = exports.hasFactualAccuracy = exports.getAssertionConfig = exports.followsInstructions = exports.expect = exports.containsLanguageAsync = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.configureAssertions = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalGateError = exports.AIEvalClient = void 0;
+exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginateGenerator = exports.autoPaginate = exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.RequestCache = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.saveSnapshot = exports.compareSnapshots = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = exports.createTestSuite = exports.SpecRegistrationError = exports.SpecExecutionError = exports.RuntimeError = exports.EvalRuntimeError = exports.setActiveRuntime = exports.getActiveRuntime = exports.disposeActiveRuntime = exports.createEvalRuntime = void 0;
 // Main SDK exports
 var client_1 = require("./client");
 Object.defineProperty(exports, "AIEvalClient", { enumerable: true, get: function () { return client_1.AIEvalClient; } });
@@ -22,20 +22,30 @@ Object.defineProperty(exports, "RateLimitError", { enumerable: true, get: functi
 Object.defineProperty(exports, "ValidationError", { enumerable: true, get: function () { return errors_1.SDKError; } });
 // Enhanced assertions (Tier 1.3)
 var assertions_1 = require("./assertions");
+// LLM config
+Object.defineProperty(exports, "configureAssertions", { enumerable: true, get: function () { return assertions_1.configureAssertions; } });
 Object.defineProperty(exports, "containsAllRequiredFields", { enumerable: true, get: function () { return assertions_1.containsAllRequiredFields; } });
 Object.defineProperty(exports, "containsJSON", { enumerable: true, get: function () { return assertions_1.containsJSON; } });
 Object.defineProperty(exports, "containsKeywords", { enumerable: true, get: function () { return assertions_1.containsKeywords; } });
 Object.defineProperty(exports, "containsLanguage", { enumerable: true, get: function () { return assertions_1.containsLanguage; } });
+// LLM-backed async variants
+Object.defineProperty(exports, "containsLanguageAsync", { enumerable: true, get: function () { return assertions_1.containsLanguageAsync; } });
 Object.defineProperty(exports, "expect", { enumerable: true, get: function () { return assertions_1.expect; } });
 Object.defineProperty(exports, "followsInstructions", { enumerable: true, get: function () { return assertions_1.followsInstructions; } });
+Object.defineProperty(exports, "getAssertionConfig", { enumerable: true, get: function () { return assertions_1.getAssertionConfig; } });
 Object.defineProperty(exports, "hasFactualAccuracy", { enumerable: true, get: function () { return assertions_1.hasFactualAccuracy; } });
+Object.defineProperty(exports, "hasFactualAccuracyAsync", { enumerable: true, get: function () { return assertions_1.hasFactualAccuracyAsync; } });
 Object.defineProperty(exports, "hasLength", { enumerable: true, get: function () { return assertions_1.hasLength; } });
 Object.defineProperty(exports, "hasNoHallucinations", { enumerable: true, get: function () { return assertions_1.hasNoHallucinations; } });
+Object.defineProperty(exports, "hasNoHallucinationsAsync", { enumerable: true, get: function () { return assertions_1.hasNoHallucinationsAsync; } });
 Object.defineProperty(exports, "hasNoToxicity", { enumerable: true, get: function () { return assertions_1.hasNoToxicity; } });
+Object.defineProperty(exports, "hasNoToxicityAsync", { enumerable: true, get: function () { return assertions_1.hasNoToxicityAsync; } });
 Object.defineProperty(exports, "hasPII", { enumerable: true, get: function () { return assertions_1.hasPII; } });
 Object.defineProperty(exports, "hasReadabilityScore", { enumerable: true, get: function () { return assertions_1.hasReadabilityScore; } });
 Object.defineProperty(exports, "hasSentiment", { enumerable: true, get: function () { return assertions_1.hasSentiment; } });
+Object.defineProperty(exports, "hasSentimentAsync", { enumerable: true, get: function () { return assertions_1.hasSentimentAsync; } });
 Object.defineProperty(exports, "hasValidCodeSyntax", { enumerable: true, get: function () { return assertions_1.hasValidCodeSyntax; } });
+Object.defineProperty(exports, "hasValidCodeSyntaxAsync", { enumerable: true, get: function () { return assertions_1.hasValidCodeSyntaxAsync; } });
 Object.defineProperty(exports, "isValidEmail", { enumerable: true, get: function () { return assertions_1.isValidEmail; } });
 Object.defineProperty(exports, "isValidURL", { enumerable: true, get: function () { return assertions_1.isValidURL; } });
 Object.defineProperty(exports, "matchesPattern", { enumerable: true, get: function () { return assertions_1.matchesPattern; } });
@@ -81,8 +91,8 @@ Object.defineProperty(exports, "createTestSuite", { enumerable: true, get: funct
 Object.defineProperty(exports, "TestSuite", { enumerable: true, get: function () { return testing_1.TestSuite; } });
 // Snapshot testing (Tier 2.8)
 const snapshot_1 = require("./snapshot");
+Object.defineProperty(exports, "compareSnapshots", { enumerable: true, get: function () { return snapshot_1.compareSnapshots; } });
 Object.defineProperty(exports, "compareWithSnapshot", { enumerable: true, get: function () { return snapshot_1.compareWithSnapshot; } });
-Object.defineProperty(exports, "compareSnapshots", { enumerable: true, get: function () { return snapshot_1.compareWithSnapshot; } });
 Object.defineProperty(exports, "snapshot", { enumerable: true, get: function () { return snapshot_1.snapshot; } });
 Object.defineProperty(exports, "saveSnapshot", { enumerable: true, get: function () { return snapshot_1.snapshot; } });
 // Export/Import utilities (Tier 4.18)
@@ -120,6 +130,7 @@ var matchers_1 = require("./matchers");
 Object.defineProperty(exports, "extendExpectWithToPassGate", { enumerable: true, get: function () { return matchers_1.extendExpectWithToPassGate; } });
 var pagination_1 = require("./pagination");
 Object.defineProperty(exports, "autoPaginate", { enumerable: true, get: function () { return pagination_1.autoPaginate; } });
+Object.defineProperty(exports, "autoPaginateGenerator", { enumerable: true, get: function () { return pagination_1.autoPaginateGenerator; } });
 Object.defineProperty(exports, "createPaginatedIterator", { enumerable: true, get: function () { return pagination_1.createPaginatedIterator; } });
 Object.defineProperty(exports, "decodeCursor", { enumerable: true, get: function () { return pagination_1.decodeCursor; } });
 Object.defineProperty(exports, "encodeCursor", { enumerable: true, get: function () { return pagination_1.encodeCursor; } });

package/dist/integrations/anthropic.js CHANGED Viewed

@@ -67,7 +67,7 @@ function traceAnthropic(anthropic, evalClient, options = {}) {
                     }
                     : {}),
             });
-            await evalClient.traces.create({
+            await evalClient.traces?.create({
                 name: `Anthropic: ${params.model}`,
                 traceId,
                 organizationId: organizationId || evalClient.getOrganizationId(),
@@ -89,7 +89,7 @@ function traceAnthropic(anthropic, evalClient, options = {}) {
                 error: error instanceof Error ? error.message : String(error),
             });
             await evalClient.traces
-                .create({
+                ?.create({
                 name: `Anthropic: ${params.model}`,
                 traceId,
                 organizationId: organizationId || evalClient.getOrganizationId(),
@@ -97,7 +97,7 @@ function traceAnthropic(anthropic, evalClient, options = {}) {
                 durationMs,
                 metadata: errorMetadata,
             })
-                .catch(() => {
+                ?.catch(() => {
                 // Ignore errors in trace creation to avoid masking the original error
             });
             throw error;
@@ -127,7 +127,7 @@ async function traceAnthropicCall(evalClient, name, fn, options = {}) {
     const startTime = Date.now();
     const traceId = `anthropic-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
     try {
-        await evalClient.traces.create({
+        await evalClient.traces?.create({
             name,
             traceId,
             organizationId: options.organizationId || evalClient.getOrganizationId(),
@@ -136,7 +136,7 @@ async function traceAnthropicCall(evalClient, name, fn, options = {}) {
         });
         const result = await fn();
         const durationMs = Date.now() - startTime;
-        await evalClient.traces.create({
+        await evalClient.traces?.create({
             name,
             traceId,
             organizationId: options.organizationId || evalClient.getOrganizationId(),
@@ -148,7 +148,7 @@ async function traceAnthropicCall(evalClient, name, fn, options = {}) {
     }
     catch (error) {
         const durationMs = Date.now() - startTime;
-        await evalClient.traces.create({
+        await evalClient.traces?.create({
             name,
             traceId,
             organizationId: options.organizationId || evalClient.getOrganizationId(),

package/dist/integrations/openai.js CHANGED Viewed

@@ -65,7 +65,7 @@ function traceOpenAI(openai, evalClient, options = {}) {
                     }
                     : {}),
             });
-            await evalClient.traces.create({
+            await evalClient.traces?.create({
                 name: `OpenAI: ${params.model}`,
                 traceId,
                 organizationId: organizationId || evalClient.getOrganizationId(),
@@ -87,7 +87,7 @@ function traceOpenAI(openai, evalClient, options = {}) {
                 error: error instanceof Error ? error.message : String(error),
             });
             await evalClient.traces
-                .create({
+                ?.create({
                 name: `OpenAI: ${params.model}`,
                 traceId,
                 organizationId: organizationId || evalClient.getOrganizationId(),
@@ -95,7 +95,7 @@ function traceOpenAI(openai, evalClient, options = {}) {
                 durationMs,
                 metadata: errorMetadata,
             })
-                .catch(() => {
+                ?.catch(() => {
                 // Ignore errors in trace creation to avoid masking the original error
             });
             throw error;
@@ -124,7 +124,7 @@ async function traceOpenAICall(evalClient, name, fn, options = {}) {
     const startTime = Date.now();
     const traceId = `openai-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
     try {
-        await evalClient.traces.create({
+        await evalClient.traces?.create({
             name,
             traceId,
             organizationId: options.organizationId || evalClient.getOrganizationId(),
@@ -133,7 +133,7 @@ async function traceOpenAICall(evalClient, name, fn, options = {}) {
         });
         const result = await fn();
         const durationMs = Date.now() - startTime;
-        await evalClient.traces.create({
+        await evalClient.traces?.create({
             name,
             traceId,
             organizationId: options.organizationId || evalClient.getOrganizationId(),
@@ -145,7 +145,7 @@ async function traceOpenAICall(evalClient, name, fn, options = {}) {
     }
     catch (error) {
         const durationMs = Date.now() - startTime;
-        await evalClient.traces.create({
+        await evalClient.traces?.create({
             name,
             traceId,
             organizationId: options.organizationId || evalClient.getOrganizationId(),

package/dist/pagination.d.ts CHANGED Viewed

@@ -50,9 +50,20 @@ export declare function createPaginatedIterator<T>(fetchFn: (offset: number, lim
     hasMore: boolean;
 }>, limit?: number): PaginatedIterator<T>;
 /**
- * Auto-paginate helper that fetches all pages automatically
+ * Auto-paginate helper that fetches all pages and returns a flat array.
+ * @example
+ * ```typescript
+ * const allItems = await autoPaginate(
+ *   (offset, limit) => client.traces.list({ offset, limit }),
+ * );
+ * ```
  */
-export declare function autoPaginate<T>(fetchFn: (offset: number, limit: number) => Promise<T[]>, limit?: number): AsyncGenerator<T, void, unknown>;
+export declare function autoPaginate<T>(fetchFn: (offset: number, limit: number) => Promise<T[]>, limit?: number): Promise<T[]>;
+/**
+ * Streaming auto-paginate generator — yields individual items one at a time.
+ * Use this when you want to process items as they arrive rather than waiting for all pages.
+ */
+export declare function autoPaginateGenerator<T>(fetchFn: (offset: number, limit: number) => Promise<T[]>, limit?: number): AsyncGenerator<T, void, unknown>;
 /**
  * Encode cursor for pagination (base64)
  */

package/dist/pagination.js CHANGED Viewed

@@ -6,6 +6,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
 exports.PaginatedIterator = void 0;
 exports.createPaginatedIterator = createPaginatedIterator;
 exports.autoPaginate = autoPaginate;
+exports.autoPaginateGenerator = autoPaginateGenerator;
 exports.encodeCursor = encodeCursor;
 exports.decodeCursor = decodeCursor;
 exports.createPaginationMeta = createPaginationMeta;
@@ -56,9 +57,34 @@ function createPaginatedIterator(fetchFn, limit = 50) {
     return new PaginatedIterator(fetchFn, limit);
 }
 /**
- * Auto-paginate helper that fetches all pages automatically
+ * Auto-paginate helper that fetches all pages and returns a flat array.
+ * @example
+ * ```typescript
+ * const allItems = await autoPaginate(
+ *   (offset, limit) => client.traces.list({ offset, limit }),
+ * );
+ * ```
  */
-async function* autoPaginate(fetchFn, limit = 50) {
+async function autoPaginate(fetchFn, limit = 50) {
+    const result = [];
+    let offset = 0;
+    let hasMore = true;
+    while (hasMore) {
+        const items = await fetchFn(offset, limit);
+        if (items.length === 0) {
+            break;
+        }
+        result.push(...items);
+        hasMore = items.length === limit;
+        offset += limit;
+    }
+    return result;
+}
+/**
+ * Streaming auto-paginate generator — yields individual items one at a time.
+ * Use this when you want to process items as they arrive rather than waiting for all pages.
+ */
+async function* autoPaginateGenerator(fetchFn, limit = 50) {
     let offset = 0;
     let hasMore = true;
     while (hasMore) {

package/dist/runtime/adapters/testsuite-to-dsl.js CHANGED Viewed

@@ -208,12 +208,7 @@ function generateDefineEvalCode(suite, options = {}) {
     });
     const helperFunctions = generateHelperFunctionsForSuite(specs, options);
     const evaluationFunction = generateEvaluationFunction();
-    return [
-        ...imports,
-        ...helperFunctions,
-        ...evaluationFunction,
-        ...specCode,
-    ].join("\n");
+    return [...imports, helperFunctions, evaluationFunction, ...specCode].join("\n");
 }
 /**
  * Generate helper functions for a specific spec

package/dist/runtime/executor.d.ts CHANGED Viewed

@@ -10,7 +10,8 @@ import type { LocalExecutor } from "./types";
  */
 export declare function createLocalExecutor(): LocalExecutor;
 /**
- * Default local executor instance
+ * Default local executor factory
+ * Call as defaultLocalExecutor() to get a new executor instance.
  * For convenience in simple use cases
  */
-export declare const defaultLocalExecutor: LocalExecutor;
+export declare const defaultLocalExecutor: typeof createLocalExecutor;

package/dist/runtime/executor.js CHANGED Viewed

@@ -146,7 +146,8 @@ function createLocalExecutor() {
     return new LocalExecutorImpl();
 }
 /**
- * Default local executor instance
+ * Default local executor factory
+ * Call as defaultLocalExecutor() to get a new executor instance.
  * For convenience in simple use cases
  */
-exports.defaultLocalExecutor = createLocalExecutor();
+exports.defaultLocalExecutor = createLocalExecutor;

package/dist/runtime/registry.d.ts CHANGED Viewed

@@ -61,7 +61,10 @@ export interface SerializedSpec {
  * Create a new scoped runtime with lifecycle management
  * Returns a handle for proper resource management
  */
-export declare function createEvalRuntime(projectRoot?: string): RuntimeHandle;
+export declare function createEvalRuntime(projectRootOrConfig?: string | {
+    name?: string;
+    projectRoot?: string;
+}): RuntimeHandle;
 /**
  * Helper function for safe runtime execution with automatic cleanup
  * Ensures runtime is disposed even if an exception is thrown

package/dist/runtime/registry.js CHANGED Viewed

@@ -315,7 +315,10 @@ class EvalRuntimeImpl {
  * Create a new scoped runtime with lifecycle management
  * Returns a handle for proper resource management
  */
-function createEvalRuntime(projectRoot = process.cwd()) {
+function createEvalRuntime(projectRootOrConfig = process.cwd()) {
+    const projectRoot = typeof projectRootOrConfig === "string"
+        ? projectRootOrConfig
+        : (projectRootOrConfig.projectRoot ?? process.cwd());
     const runtime = new EvalRuntimeImpl(projectRoot);
     // Create bound defineEval function
     const boundDefineEval = ((nameOrConfig, executor, options) => {

package/dist/snapshot.d.ts CHANGED Viewed

@@ -99,7 +99,7 @@ export declare class SnapshotManager {
      * }
      * ```
      */
-    compare(name: string, currentOutput: string): Promise<SnapshotComparison>;
+    compare(name: string, currentOutput: unknown): Promise<SnapshotComparison>;
     /**
      * List all snapshots
      *
@@ -165,7 +165,19 @@ export declare function loadSnapshot(name: string, dir?: string): Promise<Snapsh
  * }
  * ```
  */
-export declare function compareWithSnapshot(name: string, currentOutput: string, dir?: string): Promise<SnapshotComparison>;
+export declare function compareWithSnapshot(name: string, currentOutput: unknown, dir?: string): Promise<SnapshotComparison>;
+/**
+ * Compare two saved snapshots by name (convenience function)
+ *
+ * @example
+ * ```typescript
+ * const comparison = await compareSnapshots('baseline', 'current');
+ * if (!comparison.matches) {
+ *   console.log('Snapshots differ!', comparison.differences);
+ * }
+ * ```
+ */
+export declare function compareSnapshots(nameA: string, nameB: string, dir?: string): Promise<SnapshotComparison>;
 /**
  * Delete a snapshot (convenience function)
  */

package/dist/snapshot.js CHANGED Viewed

@@ -55,6 +55,7 @@ exports.SnapshotManager = void 0;
 exports.snapshot = snapshot;
 exports.loadSnapshot = loadSnapshot;
 exports.compareWithSnapshot = compareWithSnapshot;
+exports.compareSnapshots = compareSnapshots;
 exports.deleteSnapshot = deleteSnapshot;
 exports.listSnapshots = listSnapshots;
 // Environment check
@@ -130,7 +131,13 @@ class SnapshotManager {
         if (!options?.overwrite && fs.existsSync(filePath)) {
             throw new Error(`Snapshot '${name}' already exists. Use overwrite: true to update.`);
         }
-        const serialized = typeof output === "string" ? output : JSON.stringify(output);
+        const serialized = output === undefined
+            ? "undefined"
+            : output === null
+                ? "null"
+                : typeof output === "string"
+                    ? output
+                    : JSON.stringify(output);
         const snapshotData = {
             output: serialized,
             metadata: {
@@ -175,11 +182,14 @@ class SnapshotManager {
     async compare(name, currentOutput) {
         const snapshot = await this.load(name);
         const original = snapshot.output;
+        const currentOutputStr = typeof currentOutput === "string"
+            ? currentOutput
+            : JSON.stringify(currentOutput);
         // Exact match check
-        const exactMatch = original === currentOutput;
+        const exactMatch = original === currentOutputStr;
         // Calculate similarity (simple line-based diff)
         const originalLines = original.split("\n");
-        const currentLines = currentOutput.split("\n");
+        const currentLines = currentOutputStr.split("\n");
         const differences = [];
         const maxLines = Math.max(originalLines.length, currentLines.length);
         let matchingLines = 0;
@@ -199,7 +209,7 @@ class SnapshotManager {
             similarity,
             differences,
             original,
-            current: currentOutput,
+            current: currentOutputStr,
         };
     }
     /**
@@ -307,6 +317,22 @@ async function compareWithSnapshot(name, currentOutput, dir) {
     const manager = getSnapshotManager(dir);
     return manager.compare(name, currentOutput);
 }
+/**
+ * Compare two saved snapshots by name (convenience function)
+ *
+ * @example
+ * ```typescript
+ * const comparison = await compareSnapshots('baseline', 'current');
+ * if (!comparison.matches) {
+ *   console.log('Snapshots differ!', comparison.differences);
+ * }
+ * ```
+ */
+async function compareSnapshots(nameA, nameB, dir) {
+    const manager = getSnapshotManager(dir);
+    const snapshotB = await manager.load(nameB);
+    return manager.compare(nameA, snapshotB.output);
+}
 /**
  * Delete a snapshot (convenience function)
  */

package/dist/types.d.ts CHANGED Viewed

@@ -38,8 +38,13 @@ export interface ClientConfig {
     keepAlive?: boolean;
 }
 /**
- * Evaluation template categories
- * Updated with new template types for comprehensive LLM testing
+ * Evaluation template identifier constants for use with the EvalAI platform API.
+ *
+ * These are **string identifiers** (e.g. `"unit-testing"`) that reference
+ * pre-built templates on the platform — not template definition objects.
+ * Pass these values to `evaluations.create({ templateId: EvaluationTemplates.UNIT_TESTING })`
+ * to spin up a pre-configured evaluation. For custom criteria, thresholds, and
+ * test cases, build your own evaluation config instead.
  */
 export declare const EvaluationTemplates: {
     readonly UNIT_TESTING: "unit-testing";

package/dist/types.js CHANGED Viewed

@@ -2,8 +2,13 @@
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.SDKError = exports.EvaluationTemplates = void 0;
 /**
- * Evaluation template categories
- * Updated with new template types for comprehensive LLM testing
+ * Evaluation template identifier constants for use with the EvalAI platform API.
+ *
+ * These are **string identifiers** (e.g. `"unit-testing"`) that reference
+ * pre-built templates on the platform — not template definition objects.
+ * Pass these values to `evaluations.create({ templateId: EvaluationTemplates.UNIT_TESTING })`
+ * to spin up a pre-configured evaluation. For custom criteria, thresholds, and
+ * test cases, build your own evaluation config instead.
  */
 exports.EvaluationTemplates = {
     // Core Testing

package/dist/version.d.ts CHANGED Viewed

@@ -3,5 +3,5 @@
  * X-EvalGate-SDK-Version: SDK package version
  * X-EvalGate-Spec-Version: OpenAPI spec version (docs/openapi.json info.version)
  */
-export declare const SDK_VERSION = "2.2.1";
-export declare const SPEC_VERSION = "2.2.1";
+export declare const SDK_VERSION = "2.2.3";
+export declare const SPEC_VERSION = "2.2.3";

package/dist/version.js CHANGED Viewed

@@ -6,5 +6,5 @@ exports.SPEC_VERSION = exports.SDK_VERSION = void 0;
  * X-EvalGate-SDK-Version: SDK package version
  * X-EvalGate-Spec-Version: OpenAPI spec version (docs/openapi.json info.version)
  */
-exports.SDK_VERSION = "2.2.1";
-exports.SPEC_VERSION = "2.2.1";
+exports.SDK_VERSION = "2.2.3";
+exports.SPEC_VERSION = "2.2.3";

package/dist/workflows.js CHANGED Viewed

@@ -64,8 +64,13 @@ class WorkflowTracer {
         this.costs = [];
         this.spanCounter = 0;
         this.client = client;
+        const resolvedOrgId = options.organizationId ??
+            (typeof client?.getOrganizationId === "function"
+                ? client.getOrganizationId()
+                : undefined) ??
+            0;
         this.options = {
-            organizationId: options.organizationId || client.getOrganizationId() || 0,
+            organizationId: resolvedOrgId,
             autoCalculateCost: options.autoCalculateCost ?? true,
             tracePrefix: options.tracePrefix || "workflow",
             captureFullPayloads: options.captureFullPayloads ?? true,