npm - @pauly4010/evalai-sdk - Versions diffs - 1.4.1 → 1.5.5 - Mend

@pauly4010/evalai-sdk 1.4.1 → 1.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

package/CHANGELOG.md +85 -0
package/README.md +205 -543
package/dist/assertions.d.ts +2 -2
package/dist/assertions.js +104 -71
package/dist/batch.js +12 -17
package/dist/cache.js +7 -11
package/dist/cli/api.d.ts +108 -0
package/dist/cli/api.js +130 -0
package/dist/cli/check.d.ts +28 -13
package/dist/cli/check.js +249 -142
package/dist/cli/ci-context.d.ts +6 -0
package/dist/cli/ci-context.js +110 -0
package/dist/cli/config.d.ts +30 -0
package/dist/cli/config.js +207 -0
package/dist/cli/constants.d.ts +15 -0
package/dist/cli/constants.js +18 -0
package/dist/cli/doctor.d.ts +11 -0
package/dist/cli/doctor.js +82 -0
package/dist/cli/formatters/github.d.ts +8 -0
package/dist/cli/formatters/github.js +130 -0
package/dist/cli/formatters/human.d.ts +6 -0
package/dist/cli/formatters/human.js +107 -0
package/dist/cli/formatters/json.d.ts +6 -0
package/dist/cli/formatters/json.js +10 -0
package/dist/cli/formatters/pr-comment.d.ts +12 -0
package/dist/cli/formatters/pr-comment.js +101 -0
package/dist/cli/formatters/types.d.ts +100 -0
package/dist/cli/formatters/types.js +5 -0
package/dist/cli/gate.d.ts +21 -0
package/dist/cli/gate.js +175 -0
package/dist/cli/index.d.ts +1 -0
package/dist/cli/index.js +67 -23
package/dist/cli/init.d.ts +7 -0
package/dist/cli/init.js +69 -0
package/dist/cli/policy-packs.d.ts +23 -0
package/dist/cli/policy-packs.js +83 -0
package/dist/cli/profiles.d.ts +28 -0
package/dist/cli/profiles.js +30 -0
package/dist/cli/reason-codes.d.ts +17 -0
package/dist/cli/reason-codes.js +19 -0
package/dist/cli/render/snippet.d.ts +5 -0
package/dist/cli/render/snippet.js +15 -0
package/dist/cli/render/sort.d.ts +10 -0
package/dist/cli/render/sort.js +24 -0
package/dist/cli/report/build-check-report.d.ts +19 -0
package/dist/cli/report/build-check-report.js +124 -0
package/dist/cli/share.d.ts +17 -0
package/dist/cli/share.js +83 -0
package/dist/client.d.ts +2 -2
package/dist/client.js +144 -132
package/dist/context.d.ts +1 -1
package/dist/context.js +4 -6
package/dist/errors.d.ts +2 -0
package/dist/errors.js +116 -107
package/dist/export.d.ts +6 -6
package/dist/export.js +39 -33
package/dist/index.d.ts +25 -24
package/dist/index.js +62 -56
package/dist/integrations/anthropic.d.ts +1 -1
package/dist/integrations/anthropic.js +23 -19
package/dist/integrations/openai-eval.d.ts +57 -0
package/dist/integrations/openai-eval.js +230 -0
package/dist/integrations/openai.d.ts +1 -1
package/dist/integrations/openai.js +23 -19
package/dist/local.d.ts +2 -2
package/dist/local.js +25 -25
package/dist/logger.d.ts +1 -1
package/dist/logger.js +24 -28
package/dist/matchers/index.d.ts +1 -0
package/dist/matchers/index.js +6 -0
package/dist/matchers/to-pass-gate.d.ts +29 -0
package/dist/matchers/to-pass-gate.js +35 -0
package/dist/pagination.d.ts +1 -1
package/dist/pagination.js +6 -6
package/dist/snapshot.js +24 -24
package/dist/streaming.js +11 -11
package/dist/testing.d.ts +6 -2
package/dist/testing.js +30 -12
package/dist/types.d.ts +22 -22
package/dist/types.js +13 -13
package/dist/utils/input-hash.d.ts +8 -0
package/dist/utils/input-hash.js +38 -0
package/dist/version.d.ts +7 -0
package/dist/version.js +10 -0
package/dist/workflows.d.ts +7 -7
package/dist/workflows.js +44 -44
package/package.json +102 -90
package/dist/__tests__/assertions.test.d.ts +0 -1
package/dist/__tests__/assertions.test.js +0 -288
package/dist/__tests__/client.test.d.ts +0 -1
package/dist/__tests__/client.test.js +0 -185
package/dist/__tests__/testing.test.d.ts +0 -1
package/dist/__tests__/testing.test.js +0 -230
package/dist/__tests__/workflows.test.d.ts +0 -1
package/dist/__tests__/workflows.test.js +0 -222

package/dist/index.d.ts CHANGED Viewed

@@ -6,31 +6,32 @@
  *
  * @packageDocumentation
  */
-export { AIEvalClient } from './client';
-import { EvalAIError, RateLimitError, AuthenticationError, NetworkError, SDKError } from './errors';
+export { AIEvalClient } from "./client";
+import { AuthenticationError, EvalAIError, NetworkError, RateLimitError, SDKError } from "./errors";
 export { EvalAIError, RateLimitError, AuthenticationError, SDKError as ValidationError, // Using SDKError as ValidationError for backward compatibility
-NetworkError };
-export { expect, containsKeywords, matchesPattern, hasLength, containsJSON, notContainsPII, hasSentiment, similarTo, withinRange, isValidEmail, isValidURL, hasNoHallucinations, matchesSchema, hasReadabilityScore, containsLanguage, hasFactualAccuracy, respondedWithinTime, hasNoToxicity, followsInstructions, containsAllRequiredFields, hasValidCodeSyntax } from './assertions';
-import { createContext, getCurrentContext, withContext, EvalContext } from './context';
-export { createContext, getCurrentContext as getContext, withContext, EvalContext as ContextManager };
-export { createTestSuite, TestSuite, TestSuiteCase, TestSuiteCaseResult, TestSuiteResult, TestSuiteConfig, type TestCaseResult } from './testing';
-import { snapshot, compareWithSnapshot } from './snapshot';
-export { snapshot, compareWithSnapshot, snapshot as saveSnapshot, compareWithSnapshot as compareSnapshots };
-import { exportData, importData } from './export';
-import type { ExportFormat } from './export';
+NetworkError, };
+export { containsAllRequiredFields, containsJSON, containsKeywords, containsLanguage, expect, followsInstructions, hasFactualAccuracy, hasLength, hasNoHallucinations, hasNoToxicity, hasReadabilityScore, hasSentiment, hasValidCodeSyntax, isValidEmail, isValidURL, matchesPattern, matchesSchema, notContainsPII, respondedWithinTime, similarTo, withinRange, } from "./assertions";
+import { createContext, EvalContext, getCurrentContext, withContext } from "./context";
+export { createContext, getCurrentContext as getContext, withContext, EvalContext as ContextManager, };
+export { createTestSuite, type TestCaseResult, TestSuite, TestSuiteCase, TestSuiteCaseResult, TestSuiteConfig, TestSuiteResult, } from "./testing";
+import { compareWithSnapshot, snapshot } from "./snapshot";
+export { snapshot, compareWithSnapshot, snapshot as saveSnapshot, compareWithSnapshot as compareSnapshots, };
+import type { ExportFormat } from "./export";
+import { exportData, importData } from "./export";
 export { exportData, importData };
 export type { ExportFormat, ExportFormat as ExportType };
-export { batchProcess, streamEvaluation, batchRead, RateLimiter } from './streaming';
-export { RequestCache, CacheTTL } from './cache';
-export { PaginatedIterator, createPaginatedIterator, autoPaginate, encodeCursor, decodeCursor, type PaginatedResponse, type PaginationParams } from './pagination';
-export { RequestBatcher } from './batch';
-export { Logger } from './logger';
-export { traceOpenAI } from './integrations/openai';
-export { traceAnthropic } from './integrations/anthropic';
-export { WorkflowTracer, createWorkflowTracer, traceWorkflowStep, traceLangChainAgent, traceCrewAI, traceAutoGen, type WorkflowNode, type WorkflowEdge, type WorkflowDefinition, type WorkflowContext, type WorkflowStatus, type HandoffType, type AgentHandoff, type DecisionAlternative, type DecisionType, type RecordDecisionParams, type LLMProvider, type CostCategory, type RecordCostParams, type CostRecord, type WorkflowTracerOptions, type AgentSpanContext, } from './workflows';
-export type { ClientConfig as AIEvalConfig, Trace as TraceData, Span as SpanData, Evaluation as EvaluationData, LLMJudgeResult as LLMJudgeData, RetryConfig, GenericMetadata as AnnotationData, TracedResponse, TestCase, TestResult, SnapshotData, ExportOptions, ImportOptions, StreamOptions, BatchOptions } from './types';
-export { EvaluationTemplates, type EvaluationTemplateType, type FeatureUsage, type OrganizationLimits } from './types';
-export type { Annotation, CreateAnnotationParams, ListAnnotationsParams, AnnotationTask, CreateAnnotationTaskParams, ListAnnotationTasksParams, AnnotationItem, CreateAnnotationItemParams, ListAnnotationItemsParams, APIKey, APIKeyWithSecret, CreateAPIKeyParams, UpdateAPIKeyParams, ListAPIKeysParams, APIKeyUsage, Webhook, CreateWebhookParams, UpdateWebhookParams, ListWebhooksParams, WebhookDelivery, ListWebhookDeliveriesParams, UsageStats, GetUsageParams, UsageSummary, LLMJudgeConfig, CreateLLMJudgeConfigParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, LLMJudgeAlignment, GetLLMJudgeAlignmentParams, Organization, } from './types';
-export { parseArgs, runCheck, EXIT, type CheckArgs } from './cli/check';
-import { AIEvalClient } from './client';
+export { RequestBatcher } from "./batch";
+export { CacheTTL, RequestCache } from "./cache";
+export { type CheckArgs, EXIT, parseArgs, runCheck } from "./cli/check";
+export { traceAnthropic } from "./integrations/anthropic";
+export { traceOpenAI } from "./integrations/openai";
+export { type OpenAIChatEvalCase, type OpenAIChatEvalOptions, type OpenAIChatEvalResult, openAIChatEval, } from "./integrations/openai-eval";
+export { Logger } from "./logger";
+export { extendExpectWithToPassGate } from "./matchers";
+export { autoPaginate, createPaginatedIterator, decodeCursor, encodeCursor, PaginatedIterator, type PaginatedResponse, type PaginationParams, } from "./pagination";
+export { batchProcess, batchRead, RateLimiter, streamEvaluation } from "./streaming";
+export type { Annotation, AnnotationItem, AnnotationTask, APIKey, APIKeyUsage, APIKeyWithSecret, BatchOptions, ClientConfig as AIEvalConfig, CreateAnnotationItemParams, CreateAnnotationParams, CreateAnnotationTaskParams, CreateAPIKeyParams, CreateLLMJudgeConfigParams, CreateWebhookParams, Evaluation as EvaluationData, ExportOptions, GenericMetadata as AnnotationData, GetLLMJudgeAlignmentParams, GetUsageParams, ImportOptions, ListAnnotationItemsParams, ListAnnotationsParams, ListAnnotationTasksParams, ListAPIKeysParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, ListWebhookDeliveriesParams, ListWebhooksParams, LLMJudgeAlignment, LLMJudgeConfig, LLMJudgeResult as LLMJudgeData, Organization, RetryConfig, SnapshotData, Span as SpanData, StreamOptions, TestCase, TestResult, Trace as TraceData, TracedResponse, UpdateAPIKeyParams, UpdateWebhookParams, UsageStats, UsageSummary, Webhook, WebhookDelivery, } from "./types";
+export { EvaluationTemplates, type EvaluationTemplateType, type FeatureUsage, type OrganizationLimits, } from "./types";
+export { type AgentHandoff, type AgentSpanContext, type CostCategory, type CostRecord, createWorkflowTracer, type DecisionAlternative, type DecisionType, type HandoffType, type LLMProvider, type RecordCostParams, type RecordDecisionParams, traceAutoGen, traceCrewAI, traceLangChainAgent, traceWorkflowStep, type WorkflowContext, type WorkflowDefinition, type WorkflowEdge, type WorkflowNode, type WorkflowStatus, WorkflowTracer, type WorkflowTracerOptions, } from "./workflows";
+import { AIEvalClient } from "./client";
 export default AIEvalClient;

package/dist/index.js CHANGED Viewed

@@ -8,109 +8,115 @@
  * @packageDocumentation
  */
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.decodeCursor = exports.encodeCursor = exports.autoPaginate = exports.createPaginatedIterator = exports.PaginatedIterator = exports.CacheTTL = exports.RequestCache = exports.RateLimiter = exports.batchRead = exports.streamEvaluation = exports.batchProcess = exports.importData = exports.exportData = exports.compareSnapshots = exports.saveSnapshot = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = exports.createTestSuite = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.hasValidCodeSyntax = exports.containsAllRequiredFields = exports.followsInstructions = exports.hasNoToxicity = exports.respondedWithinTime = exports.hasFactualAccuracy = exports.containsLanguage = exports.hasReadabilityScore = exports.matchesSchema = exports.hasNoHallucinations = exports.isValidURL = exports.isValidEmail = exports.withinRange = exports.similarTo = exports.hasSentiment = exports.notContainsPII = exports.containsJSON = exports.hasLength = exports.matchesPattern = exports.containsKeywords = exports.expect = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalAIError = exports.AIEvalClient = void 0;
-exports.EXIT = exports.runCheck = exports.parseArgs = exports.EvaluationTemplates = exports.traceAutoGen = exports.traceCrewAI = exports.traceLangChainAgent = exports.traceWorkflowStep = exports.createWorkflowTracer = exports.WorkflowTracer = exports.traceAnthropic = exports.traceOpenAI = exports.Logger = exports.RequestBatcher = void 0;
+exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.RequestCache = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.compareSnapshots = exports.saveSnapshot = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = exports.createTestSuite = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.withinRange = exports.similarTo = exports.respondedWithinTime = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntax = exports.hasSentiment = exports.hasReadabilityScore = exports.hasNoToxicity = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracy = exports.followsInstructions = exports.expect = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalAIError = exports.AIEvalClient = void 0;
+exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginate = void 0;
 // Main SDK exports
 var client_1 = require("./client");
 Object.defineProperty(exports, "AIEvalClient", { enumerable: true, get: function () { return client_1.AIEvalClient; } });
 // Enhanced error handling (Tier 1.5)
 const errors_1 = require("./errors");
-Object.defineProperty(exports, "EvalAIError", { enumerable: true, get: function () { return errors_1.EvalAIError; } });
-Object.defineProperty(exports, "RateLimitError", { enumerable: true, get: function () { return errors_1.RateLimitError; } });
 Object.defineProperty(exports, "AuthenticationError", { enumerable: true, get: function () { return errors_1.AuthenticationError; } });
+Object.defineProperty(exports, "EvalAIError", { enumerable: true, get: function () { return errors_1.EvalAIError; } });
 Object.defineProperty(exports, "NetworkError", { enumerable: true, get: function () { return errors_1.NetworkError; } });
+Object.defineProperty(exports, "RateLimitError", { enumerable: true, get: function () { return errors_1.RateLimitError; } });
 Object.defineProperty(exports, "ValidationError", { enumerable: true, get: function () { return errors_1.SDKError; } });
 // Enhanced assertions (Tier 1.3)
 var assertions_1 = require("./assertions");
-Object.defineProperty(exports, "expect", { enumerable: true, get: function () { return assertions_1.expect; } });
+Object.defineProperty(exports, "containsAllRequiredFields", { enumerable: true, get: function () { return assertions_1.containsAllRequiredFields; } });
+Object.defineProperty(exports, "containsJSON", { enumerable: true, get: function () { return assertions_1.containsJSON; } });
 Object.defineProperty(exports, "containsKeywords", { enumerable: true, get: function () { return assertions_1.containsKeywords; } });
-Object.defineProperty(exports, "matchesPattern", { enumerable: true, get: function () { return assertions_1.matchesPattern; } });
+Object.defineProperty(exports, "containsLanguage", { enumerable: true, get: function () { return assertions_1.containsLanguage; } });
+Object.defineProperty(exports, "expect", { enumerable: true, get: function () { return assertions_1.expect; } });
+Object.defineProperty(exports, "followsInstructions", { enumerable: true, get: function () { return assertions_1.followsInstructions; } });
+Object.defineProperty(exports, "hasFactualAccuracy", { enumerable: true, get: function () { return assertions_1.hasFactualAccuracy; } });
 Object.defineProperty(exports, "hasLength", { enumerable: true, get: function () { return assertions_1.hasLength; } });
-Object.defineProperty(exports, "containsJSON", { enumerable: true, get: function () { return assertions_1.containsJSON; } });
-Object.defineProperty(exports, "notContainsPII", { enumerable: true, get: function () { return assertions_1.notContainsPII; } });
+Object.defineProperty(exports, "hasNoHallucinations", { enumerable: true, get: function () { return assertions_1.hasNoHallucinations; } });
+Object.defineProperty(exports, "hasNoToxicity", { enumerable: true, get: function () { return assertions_1.hasNoToxicity; } });
+Object.defineProperty(exports, "hasReadabilityScore", { enumerable: true, get: function () { return assertions_1.hasReadabilityScore; } });
 Object.defineProperty(exports, "hasSentiment", { enumerable: true, get: function () { return assertions_1.hasSentiment; } });
-Object.defineProperty(exports, "similarTo", { enumerable: true, get: function () { return assertions_1.similarTo; } });
-Object.defineProperty(exports, "withinRange", { enumerable: true, get: function () { return assertions_1.withinRange; } });
+Object.defineProperty(exports, "hasValidCodeSyntax", { enumerable: true, get: function () { return assertions_1.hasValidCodeSyntax; } });
 Object.defineProperty(exports, "isValidEmail", { enumerable: true, get: function () { return assertions_1.isValidEmail; } });
 Object.defineProperty(exports, "isValidURL", { enumerable: true, get: function () { return assertions_1.isValidURL; } });
-Object.defineProperty(exports, "hasNoHallucinations", { enumerable: true, get: function () { return assertions_1.hasNoHallucinations; } });
+Object.defineProperty(exports, "matchesPattern", { enumerable: true, get: function () { return assertions_1.matchesPattern; } });
 Object.defineProperty(exports, "matchesSchema", { enumerable: true, get: function () { return assertions_1.matchesSchema; } });
-Object.defineProperty(exports, "hasReadabilityScore", { enumerable: true, get: function () { return assertions_1.hasReadabilityScore; } });
-Object.defineProperty(exports, "containsLanguage", { enumerable: true, get: function () { return assertions_1.containsLanguage; } });
-Object.defineProperty(exports, "hasFactualAccuracy", { enumerable: true, get: function () { return assertions_1.hasFactualAccuracy; } });
+Object.defineProperty(exports, "notContainsPII", { enumerable: true, get: function () { return assertions_1.notContainsPII; } });
 Object.defineProperty(exports, "respondedWithinTime", { enumerable: true, get: function () { return assertions_1.respondedWithinTime; } });
-Object.defineProperty(exports, "hasNoToxicity", { enumerable: true, get: function () { return assertions_1.hasNoToxicity; } });
-Object.defineProperty(exports, "followsInstructions", { enumerable: true, get: function () { return assertions_1.followsInstructions; } });
-Object.defineProperty(exports, "containsAllRequiredFields", { enumerable: true, get: function () { return assertions_1.containsAllRequiredFields; } });
-Object.defineProperty(exports, "hasValidCodeSyntax", { enumerable: true, get: function () { return assertions_1.hasValidCodeSyntax; } });
+Object.defineProperty(exports, "similarTo", { enumerable: true, get: function () { return assertions_1.similarTo; } });
+Object.defineProperty(exports, "withinRange", { enumerable: true, get: function () { return assertions_1.withinRange; } });
 // Context propagation (Tier 2.9)
 const context_1 = require("./context");
 Object.defineProperty(exports, "createContext", { enumerable: true, get: function () { return context_1.createContext; } });
+Object.defineProperty(exports, "ContextManager", { enumerable: true, get: function () { return context_1.EvalContext; } });
 Object.defineProperty(exports, "getContext", { enumerable: true, get: function () { return context_1.getCurrentContext; } });
 Object.defineProperty(exports, "withContext", { enumerable: true, get: function () { return context_1.withContext; } });
-Object.defineProperty(exports, "ContextManager", { enumerable: true, get: function () { return context_1.EvalContext; } });
 // Test suite builder (Tier 2.7)
 var testing_1 = require("./testing");
 Object.defineProperty(exports, "createTestSuite", { enumerable: true, get: function () { return testing_1.createTestSuite; } });
 Object.defineProperty(exports, "TestSuite", { enumerable: true, get: function () { return testing_1.TestSuite; } });
 // Snapshot testing (Tier 2.8)
 const snapshot_1 = require("./snapshot");
-Object.defineProperty(exports, "snapshot", { enumerable: true, get: function () { return snapshot_1.snapshot; } });
-Object.defineProperty(exports, "saveSnapshot", { enumerable: true, get: function () { return snapshot_1.snapshot; } });
 Object.defineProperty(exports, "compareWithSnapshot", { enumerable: true, get: function () { return snapshot_1.compareWithSnapshot; } });
 Object.defineProperty(exports, "compareSnapshots", { enumerable: true, get: function () { return snapshot_1.compareWithSnapshot; } });
+Object.defineProperty(exports, "snapshot", { enumerable: true, get: function () { return snapshot_1.snapshot; } });
+Object.defineProperty(exports, "saveSnapshot", { enumerable: true, get: function () { return snapshot_1.snapshot; } });
 // Export/Import utilities (Tier 4.18)
 const export_1 = require("./export");
 Object.defineProperty(exports, "exportData", { enumerable: true, get: function () { return export_1.exportData; } });
 Object.defineProperty(exports, "importData", { enumerable: true, get: function () { return export_1.importData; } });
-// Streaming and batch processing (Tier 3.3)
-// Use functions from ./streaming module instead of these deprecated exports
-var streaming_1 = require("./streaming");
-Object.defineProperty(exports, "batchProcess", { enumerable: true, get: function () { return streaming_1.batchProcess; } });
-Object.defineProperty(exports, "streamEvaluation", { enumerable: true, get: function () { return streaming_1.streamEvaluation; } });
-Object.defineProperty(exports, "batchRead", { enumerable: true, get: function () { return streaming_1.batchRead; } });
-Object.defineProperty(exports, "RateLimiter", { enumerable: true, get: function () { return streaming_1.RateLimiter; } });
+// Note: RequestBatcher is for advanced users only
+// Most users don't need this - batching is automatic
+var batch_1 = require("./batch");
+Object.defineProperty(exports, "RequestBatcher", { enumerable: true, get: function () { return batch_1.RequestBatcher; } });
 // Performance optimization utilities (v1.3.0)
 // Note: RequestCache and CacheTTL are for advanced users only
 // Most users don't need these - caching is automatic
 var cache_1 = require("./cache");
-Object.defineProperty(exports, "RequestCache", { enumerable: true, get: function () { return cache_1.RequestCache; } });
 Object.defineProperty(exports, "CacheTTL", { enumerable: true, get: function () { return cache_1.CacheTTL; } });
-var pagination_1 = require("./pagination");
-Object.defineProperty(exports, "PaginatedIterator", { enumerable: true, get: function () { return pagination_1.PaginatedIterator; } });
-Object.defineProperty(exports, "createPaginatedIterator", { enumerable: true, get: function () { return pagination_1.createPaginatedIterator; } });
-Object.defineProperty(exports, "autoPaginate", { enumerable: true, get: function () { return pagination_1.autoPaginate; } });
-Object.defineProperty(exports, "encodeCursor", { enumerable: true, get: function () { return pagination_1.encodeCursor; } });
-Object.defineProperty(exports, "decodeCursor", { enumerable: true, get: function () { return pagination_1.decodeCursor; } });
-// Note: RequestBatcher is for advanced users only
-// Most users don't need this - batching is automatic
-var batch_1 = require("./batch");
-Object.defineProperty(exports, "RequestBatcher", { enumerable: true, get: function () { return batch_1.RequestBatcher; } });
-// Debug logger (Tier 4.17)
-var logger_1 = require("./logger");
-Object.defineProperty(exports, "Logger", { enumerable: true, get: function () { return logger_1.Logger; } });
+Object.defineProperty(exports, "RequestCache", { enumerable: true, get: function () { return cache_1.RequestCache; } });
+// CLI (programmatic use)
+var check_1 = require("./cli/check");
+Object.defineProperty(exports, "EXIT", { enumerable: true, get: function () { return check_1.EXIT; } });
+Object.defineProperty(exports, "parseArgs", { enumerable: true, get: function () { return check_1.parseArgs; } });
+Object.defineProperty(exports, "runCheck", { enumerable: true, get: function () { return check_1.runCheck; } });
+var anthropic_1 = require("./integrations/anthropic");
+Object.defineProperty(exports, "traceAnthropic", { enumerable: true, get: function () { return anthropic_1.traceAnthropic; } });
 // Framework integrations (Tier 1.2)
 var openai_1 = require("./integrations/openai");
 Object.defineProperty(exports, "traceOpenAI", { enumerable: true, get: function () { return openai_1.traceOpenAI; } });
-var anthropic_1 = require("./integrations/anthropic");
-Object.defineProperty(exports, "traceAnthropic", { enumerable: true, get: function () { return anthropic_1.traceAnthropic; } });
+// OpenAI regression eval (local-first, no account required)
+var openai_eval_1 = require("./integrations/openai-eval");
+Object.defineProperty(exports, "openAIChatEval", { enumerable: true, get: function () { return openai_eval_1.openAIChatEval; } });
+// Debug logger (Tier 4.17)
+var logger_1 = require("./logger");
+Object.defineProperty(exports, "Logger", { enumerable: true, get: function () { return logger_1.Logger; } });
+// Vitest matcher: expect(await openAIChatEval(...)).toPassGate()
+var matchers_1 = require("./matchers");
+Object.defineProperty(exports, "extendExpectWithToPassGate", { enumerable: true, get: function () { return matchers_1.extendExpectWithToPassGate; } });
+var pagination_1 = require("./pagination");
+Object.defineProperty(exports, "autoPaginate", { enumerable: true, get: function () { return pagination_1.autoPaginate; } });
+Object.defineProperty(exports, "createPaginatedIterator", { enumerable: true, get: function () { return pagination_1.createPaginatedIterator; } });
+Object.defineProperty(exports, "decodeCursor", { enumerable: true, get: function () { return pagination_1.decodeCursor; } });
+Object.defineProperty(exports, "encodeCursor", { enumerable: true, get: function () { return pagination_1.encodeCursor; } });
+Object.defineProperty(exports, "PaginatedIterator", { enumerable: true, get: function () { return pagination_1.PaginatedIterator; } });
+// Streaming and batch processing (Tier 3.3)
+// Use functions from ./streaming module instead of these deprecated exports
+var streaming_1 = require("./streaming");
+Object.defineProperty(exports, "batchProcess", { enumerable: true, get: function () { return streaming_1.batchProcess; } });
+Object.defineProperty(exports, "batchRead", { enumerable: true, get: function () { return streaming_1.batchRead; } });
+Object.defineProperty(exports, "RateLimiter", { enumerable: true, get: function () { return streaming_1.RateLimiter; } });
+Object.defineProperty(exports, "streamEvaluation", { enumerable: true, get: function () { return streaming_1.streamEvaluation; } });
+// New exports for v1.1.0
+var types_1 = require("./types");
+Object.defineProperty(exports, "EvaluationTemplates", { enumerable: true, get: function () { return types_1.EvaluationTemplates; } });
 // Workflow tracing (Orchestration Layer)
 var workflows_1 = require("./workflows");
-Object.defineProperty(exports, "WorkflowTracer", { enumerable: true, get: function () { return workflows_1.WorkflowTracer; } });
 Object.defineProperty(exports, "createWorkflowTracer", { enumerable: true, get: function () { return workflows_1.createWorkflowTracer; } });
-Object.defineProperty(exports, "traceWorkflowStep", { enumerable: true, get: function () { return workflows_1.traceWorkflowStep; } });
+Object.defineProperty(exports, "traceAutoGen", { enumerable: true, get: function () { return workflows_1.traceAutoGen; } });
+Object.defineProperty(exports, "traceCrewAI", { enumerable: true, get: function () { return workflows_1.traceCrewAI; } });
 // Framework integrations
 Object.defineProperty(exports, "traceLangChainAgent", { enumerable: true, get: function () { return workflows_1.traceLangChainAgent; } });
-Object.defineProperty(exports, "traceCrewAI", { enumerable: true, get: function () { return workflows_1.traceCrewAI; } });
-Object.defineProperty(exports, "traceAutoGen", { enumerable: true, get: function () { return workflows_1.traceAutoGen; } });
-// New exports for v1.1.0
-var types_1 = require("./types");
-Object.defineProperty(exports, "EvaluationTemplates", { enumerable: true, get: function () { return types_1.EvaluationTemplates; } });
-// CLI (programmatic use)
-var check_1 = require("./cli/check");
-Object.defineProperty(exports, "parseArgs", { enumerable: true, get: function () { return check_1.parseArgs; } });
-Object.defineProperty(exports, "runCheck", { enumerable: true, get: function () { return check_1.runCheck; } });
-Object.defineProperty(exports, "EXIT", { enumerable: true, get: function () { return check_1.EXIT; } });
+Object.defineProperty(exports, "traceWorkflowStep", { enumerable: true, get: function () { return workflows_1.traceWorkflowStep; } });
+Object.defineProperty(exports, "WorkflowTracer", { enumerable: true, get: function () { return workflows_1.WorkflowTracer; } });
 // Default export for convenience
 const client_2 = require("./client");
 exports.default = client_2.AIEvalClient;

package/dist/integrations/anthropic.d.ts CHANGED Viewed

@@ -18,7 +18,7 @@
  * });
  * ```
  */
-import type { AIEvalClient } from '../client';
+import type { AIEvalClient } from "../client";
 export interface AnthropicTraceOptions {
     /** Whether to capture input (default: true) */
     captureInput?: boolean;

package/dist/integrations/anthropic.js CHANGED Viewed

@@ -43,7 +43,7 @@ const context_1 = require("../context");
  * ```
  */
 function traceAnthropic(anthropic, evalClient, options = {}) {
-    const { captureInput = true, captureOutput = true, captureMetadata = true, organizationId, tracePrefix = 'anthropic' } = options;
+    const { captureInput = true, captureOutput = true, captureMetadata = true, organizationId, tracePrefix = "anthropic", } = options;
     // Create proxy for messages.create
     const originalCreate = anthropic.messages.create.bind(anthropic.messages);
     anthropic.messages.create = async (params, requestOptions) => {
@@ -60,18 +60,20 @@ function traceAnthropic(anthropic, evalClient, options = {}) {
                 max_tokens: params.max_tokens,
                 ...(captureInput ? { input: params.messages } : {}),
                 ...(captureOutput ? { output: message.content } : {}),
-                ...(captureMetadata ? {
-                    usage: message.usage,
-                    stop_reason: message.stop_reason
-                } : {})
+                ...(captureMetadata
+                    ? {
+                        usage: message.usage,
+                        stop_reason: message.stop_reason,
+                    }
+                    : {}),
             });
             await evalClient.traces.create({
                 name: `Anthropic: ${params.model}`,
                 traceId,
                 organizationId: organizationId || evalClient.getOrganizationId(),
-                status: 'success',
+                status: "success",
                 durationMs,
-                metadata: traceMetadata
+                metadata: traceMetadata,
             });
             return message;
         }
@@ -84,16 +86,18 @@ function traceAnthropic(anthropic, evalClient, options = {}) {
                 max_tokens: params.max_tokens,
                 ...(captureInput ? { input: params.messages } : {}),
                 ...(captureMetadata ? { params } : {}),
-                error: error instanceof Error ? error.message : String(error)
+                error: error instanceof Error ? error.message : String(error),
             });
-            await evalClient.traces.create({
+            await evalClient.traces
+                .create({
                 name: `Anthropic: ${params.model}`,
                 traceId,
                 organizationId: organizationId || evalClient.getOrganizationId(),
-                status: 'error',
+                status: "error",
                 durationMs,
-                metadata: errorMetadata
-            }).catch(() => {
+                metadata: errorMetadata,
+            })
+                .catch(() => {
                 // Ignore errors in trace creation to avoid masking the original error
             });
             throw error;
@@ -127,8 +131,8 @@ async function traceAnthropicCall(evalClient, name, fn, options = {}) {
             name,
             traceId,
             organizationId: options.organizationId || evalClient.getOrganizationId(),
-            status: 'pending',
-            metadata: (0, context_1.mergeWithContext)({})
+            status: "pending",
+            metadata: (0, context_1.mergeWithContext)({}),
         });
         const result = await fn();
         const durationMs = Date.now() - startTime;
@@ -136,9 +140,9 @@ async function traceAnthropicCall(evalClient, name, fn, options = {}) {
             name,
             traceId,
             organizationId: options.organizationId || evalClient.getOrganizationId(),
-            status: 'success',
+            status: "success",
             durationMs,
-            metadata: (0, context_1.mergeWithContext)({})
+            metadata: (0, context_1.mergeWithContext)({}),
         });
         return result;
     }
@@ -148,11 +152,11 @@ async function traceAnthropicCall(evalClient, name, fn, options = {}) {
             name,
             traceId,
             organizationId: options.organizationId || evalClient.getOrganizationId(),
-            status: 'error',
+            status: "error",
             durationMs,
             metadata: (0, context_1.mergeWithContext)({
-                error: error instanceof Error ? error.message : String(error)
-            })
+                error: error instanceof Error ? error.message : String(error),
+            }),
         });
         throw error;
     }

package/dist/integrations/openai-eval.d.ts ADDED Viewed

@@ -0,0 +1,57 @@
+/**
+ * openAIChatEval — One-function OpenAI chat regression testing
+ *
+ * Run local regression tests with OpenAI. No EvalAI account required.
+ * CI-friendly output. Optional reportToEvalAI in v1.5.
+ *
+ * @example
+ * ```typescript
+ * import { openAIChatEval } from '@pauly4010/evalai-sdk';
+ *
+ * await openAIChatEval({
+ *   name: 'chat-regression',
+ *   cases: [
+ *     { input: 'Hello', expectedOutput: 'greeting' },
+ *     { input: '2 + 2 = ?', expectedOutput: '4' }
+ *   ]
+ * });
+ * ```
+ */
+import type { TestSuiteCaseResult } from "../testing";
+export interface OpenAIChatEvalCase {
+    input: string;
+    expectedOutput?: string;
+    /** Platform test case ID. When provided, used directly for reportToEvalAI (no input matching). */
+    testCaseId?: number;
+    assertions?: ((output: string) => import("../assertions").AssertionResult)[];
+}
+export interface OpenAIChatEvalOptions {
+    name: string;
+    model?: string;
+    apiKey?: string;
+    cases: OpenAIChatEvalCase[];
+    /** Retry failing cases N times (default: 0). Only failing cases are retried. */
+    retries?: number;
+    /** v1.5: Upload results to EvalAI platform for an existing evaluation. Requires evaluationId and EVALAI_API_KEY. */
+    reportToEvalAI?: boolean;
+    /** Evaluation ID (from config or arg). Required when reportToEvalAI is true. */
+    evaluationId?: string;
+    /** EvalAI API base URL. Default: EVALAI_BASE_URL or http://localhost:3000 */
+    baseUrl?: string;
+    /** Idempotency key for import (e.g. CI run ID). Prevents duplicate runs on retry. */
+    idempotencyKey?: string;
+}
+export interface OpenAIChatEvalResult {
+    passed: number;
+    total: number;
+    score: number;
+    results: TestSuiteCaseResult[];
+    durationMs: number;
+    /** Case IDs that were retried (flaky recovery) */
+    retriedCases?: string[];
+}
+/**
+ * Run OpenAI chat regression tests locally.
+ * No EvalAI account required. Returns score and prints CI-friendly summary.
+ */
+export declare function openAIChatEval(options: OpenAIChatEvalOptions): Promise<OpenAIChatEvalResult>;