@pauly4010/evalai-sdk 1.4.1 → 1.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/CHANGELOG.md +85 -0
  2. package/README.md +205 -543
  3. package/dist/assertions.d.ts +2 -2
  4. package/dist/assertions.js +104 -71
  5. package/dist/batch.js +12 -17
  6. package/dist/cache.js +7 -11
  7. package/dist/cli/api.d.ts +108 -0
  8. package/dist/cli/api.js +130 -0
  9. package/dist/cli/check.d.ts +28 -13
  10. package/dist/cli/check.js +249 -142
  11. package/dist/cli/ci-context.d.ts +6 -0
  12. package/dist/cli/ci-context.js +110 -0
  13. package/dist/cli/config.d.ts +30 -0
  14. package/dist/cli/config.js +207 -0
  15. package/dist/cli/constants.d.ts +15 -0
  16. package/dist/cli/constants.js +18 -0
  17. package/dist/cli/doctor.d.ts +11 -0
  18. package/dist/cli/doctor.js +82 -0
  19. package/dist/cli/formatters/github.d.ts +8 -0
  20. package/dist/cli/formatters/github.js +130 -0
  21. package/dist/cli/formatters/human.d.ts +6 -0
  22. package/dist/cli/formatters/human.js +107 -0
  23. package/dist/cli/formatters/json.d.ts +6 -0
  24. package/dist/cli/formatters/json.js +10 -0
  25. package/dist/cli/formatters/pr-comment.d.ts +12 -0
  26. package/dist/cli/formatters/pr-comment.js +101 -0
  27. package/dist/cli/formatters/types.d.ts +100 -0
  28. package/dist/cli/formatters/types.js +5 -0
  29. package/dist/cli/gate.d.ts +21 -0
  30. package/dist/cli/gate.js +175 -0
  31. package/dist/cli/index.d.ts +1 -0
  32. package/dist/cli/index.js +67 -23
  33. package/dist/cli/init.d.ts +7 -0
  34. package/dist/cli/init.js +69 -0
  35. package/dist/cli/policy-packs.d.ts +23 -0
  36. package/dist/cli/policy-packs.js +83 -0
  37. package/dist/cli/profiles.d.ts +28 -0
  38. package/dist/cli/profiles.js +30 -0
  39. package/dist/cli/reason-codes.d.ts +17 -0
  40. package/dist/cli/reason-codes.js +19 -0
  41. package/dist/cli/render/snippet.d.ts +5 -0
  42. package/dist/cli/render/snippet.js +15 -0
  43. package/dist/cli/render/sort.d.ts +10 -0
  44. package/dist/cli/render/sort.js +24 -0
  45. package/dist/cli/report/build-check-report.d.ts +19 -0
  46. package/dist/cli/report/build-check-report.js +124 -0
  47. package/dist/cli/share.d.ts +17 -0
  48. package/dist/cli/share.js +83 -0
  49. package/dist/client.d.ts +2 -2
  50. package/dist/client.js +144 -132
  51. package/dist/context.d.ts +1 -1
  52. package/dist/context.js +4 -6
  53. package/dist/errors.d.ts +2 -0
  54. package/dist/errors.js +116 -107
  55. package/dist/export.d.ts +6 -6
  56. package/dist/export.js +39 -33
  57. package/dist/index.d.ts +25 -24
  58. package/dist/index.js +62 -56
  59. package/dist/integrations/anthropic.d.ts +1 -1
  60. package/dist/integrations/anthropic.js +23 -19
  61. package/dist/integrations/openai-eval.d.ts +57 -0
  62. package/dist/integrations/openai-eval.js +230 -0
  63. package/dist/integrations/openai.d.ts +1 -1
  64. package/dist/integrations/openai.js +23 -19
  65. package/dist/local.d.ts +2 -2
  66. package/dist/local.js +25 -25
  67. package/dist/logger.d.ts +1 -1
  68. package/dist/logger.js +24 -28
  69. package/dist/matchers/index.d.ts +1 -0
  70. package/dist/matchers/index.js +6 -0
  71. package/dist/matchers/to-pass-gate.d.ts +29 -0
  72. package/dist/matchers/to-pass-gate.js +35 -0
  73. package/dist/pagination.d.ts +1 -1
  74. package/dist/pagination.js +6 -6
  75. package/dist/snapshot.js +24 -24
  76. package/dist/streaming.js +11 -11
  77. package/dist/testing.d.ts +6 -2
  78. package/dist/testing.js +30 -12
  79. package/dist/types.d.ts +22 -22
  80. package/dist/types.js +13 -13
  81. package/dist/utils/input-hash.d.ts +8 -0
  82. package/dist/utils/input-hash.js +38 -0
  83. package/dist/version.d.ts +7 -0
  84. package/dist/version.js +10 -0
  85. package/dist/workflows.d.ts +7 -7
  86. package/dist/workflows.js +44 -44
  87. package/package.json +102 -90
  88. package/dist/__tests__/assertions.test.d.ts +0 -1
  89. package/dist/__tests__/assertions.test.js +0 -288
  90. package/dist/__tests__/client.test.d.ts +0 -1
  91. package/dist/__tests__/client.test.js +0 -185
  92. package/dist/__tests__/testing.test.d.ts +0 -1
  93. package/dist/__tests__/testing.test.js +0 -230
  94. package/dist/__tests__/workflows.test.d.ts +0 -1
  95. package/dist/__tests__/workflows.test.js +0 -222
package/dist/index.d.ts CHANGED
@@ -6,31 +6,32 @@
6
6
  *
7
7
  * @packageDocumentation
8
8
  */
9
- export { AIEvalClient } from './client';
10
- import { EvalAIError, RateLimitError, AuthenticationError, NetworkError, SDKError } from './errors';
9
+ export { AIEvalClient } from "./client";
10
+ import { AuthenticationError, EvalAIError, NetworkError, RateLimitError, SDKError } from "./errors";
11
11
  export { EvalAIError, RateLimitError, AuthenticationError, SDKError as ValidationError, // Using SDKError as ValidationError for backward compatibility
12
- NetworkError };
13
- export { expect, containsKeywords, matchesPattern, hasLength, containsJSON, notContainsPII, hasSentiment, similarTo, withinRange, isValidEmail, isValidURL, hasNoHallucinations, matchesSchema, hasReadabilityScore, containsLanguage, hasFactualAccuracy, respondedWithinTime, hasNoToxicity, followsInstructions, containsAllRequiredFields, hasValidCodeSyntax } from './assertions';
14
- import { createContext, getCurrentContext, withContext, EvalContext } from './context';
15
- export { createContext, getCurrentContext as getContext, withContext, EvalContext as ContextManager };
16
- export { createTestSuite, TestSuite, TestSuiteCase, TestSuiteCaseResult, TestSuiteResult, TestSuiteConfig, type TestCaseResult } from './testing';
17
- import { snapshot, compareWithSnapshot } from './snapshot';
18
- export { snapshot, compareWithSnapshot, snapshot as saveSnapshot, compareWithSnapshot as compareSnapshots };
19
- import { exportData, importData } from './export';
20
- import type { ExportFormat } from './export';
12
+ NetworkError, };
13
+ export { containsAllRequiredFields, containsJSON, containsKeywords, containsLanguage, expect, followsInstructions, hasFactualAccuracy, hasLength, hasNoHallucinations, hasNoToxicity, hasReadabilityScore, hasSentiment, hasValidCodeSyntax, isValidEmail, isValidURL, matchesPattern, matchesSchema, notContainsPII, respondedWithinTime, similarTo, withinRange, } from "./assertions";
14
+ import { createContext, EvalContext, getCurrentContext, withContext } from "./context";
15
+ export { createContext, getCurrentContext as getContext, withContext, EvalContext as ContextManager, };
16
+ export { createTestSuite, type TestCaseResult, TestSuite, TestSuiteCase, TestSuiteCaseResult, TestSuiteConfig, TestSuiteResult, } from "./testing";
17
+ import { compareWithSnapshot, snapshot } from "./snapshot";
18
+ export { snapshot, compareWithSnapshot, snapshot as saveSnapshot, compareWithSnapshot as compareSnapshots, };
19
+ import type { ExportFormat } from "./export";
20
+ import { exportData, importData } from "./export";
21
21
  export { exportData, importData };
22
22
  export type { ExportFormat, ExportFormat as ExportType };
23
- export { batchProcess, streamEvaluation, batchRead, RateLimiter } from './streaming';
24
- export { RequestCache, CacheTTL } from './cache';
25
- export { PaginatedIterator, createPaginatedIterator, autoPaginate, encodeCursor, decodeCursor, type PaginatedResponse, type PaginationParams } from './pagination';
26
- export { RequestBatcher } from './batch';
27
- export { Logger } from './logger';
28
- export { traceOpenAI } from './integrations/openai';
29
- export { traceAnthropic } from './integrations/anthropic';
30
- export { WorkflowTracer, createWorkflowTracer, traceWorkflowStep, traceLangChainAgent, traceCrewAI, traceAutoGen, type WorkflowNode, type WorkflowEdge, type WorkflowDefinition, type WorkflowContext, type WorkflowStatus, type HandoffType, type AgentHandoff, type DecisionAlternative, type DecisionType, type RecordDecisionParams, type LLMProvider, type CostCategory, type RecordCostParams, type CostRecord, type WorkflowTracerOptions, type AgentSpanContext, } from './workflows';
31
- export type { ClientConfig as AIEvalConfig, Trace as TraceData, Span as SpanData, Evaluation as EvaluationData, LLMJudgeResult as LLMJudgeData, RetryConfig, GenericMetadata as AnnotationData, TracedResponse, TestCase, TestResult, SnapshotData, ExportOptions, ImportOptions, StreamOptions, BatchOptions } from './types';
32
- export { EvaluationTemplates, type EvaluationTemplateType, type FeatureUsage, type OrganizationLimits } from './types';
33
- export type { Annotation, CreateAnnotationParams, ListAnnotationsParams, AnnotationTask, CreateAnnotationTaskParams, ListAnnotationTasksParams, AnnotationItem, CreateAnnotationItemParams, ListAnnotationItemsParams, APIKey, APIKeyWithSecret, CreateAPIKeyParams, UpdateAPIKeyParams, ListAPIKeysParams, APIKeyUsage, Webhook, CreateWebhookParams, UpdateWebhookParams, ListWebhooksParams, WebhookDelivery, ListWebhookDeliveriesParams, UsageStats, GetUsageParams, UsageSummary, LLMJudgeConfig, CreateLLMJudgeConfigParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, LLMJudgeAlignment, GetLLMJudgeAlignmentParams, Organization, } from './types';
34
- export { parseArgs, runCheck, EXIT, type CheckArgs } from './cli/check';
35
- import { AIEvalClient } from './client';
23
+ export { RequestBatcher } from "./batch";
24
+ export { CacheTTL, RequestCache } from "./cache";
25
+ export { type CheckArgs, EXIT, parseArgs, runCheck } from "./cli/check";
26
+ export { traceAnthropic } from "./integrations/anthropic";
27
+ export { traceOpenAI } from "./integrations/openai";
28
+ export { type OpenAIChatEvalCase, type OpenAIChatEvalOptions, type OpenAIChatEvalResult, openAIChatEval, } from "./integrations/openai-eval";
29
+ export { Logger } from "./logger";
30
+ export { extendExpectWithToPassGate } from "./matchers";
31
+ export { autoPaginate, createPaginatedIterator, decodeCursor, encodeCursor, PaginatedIterator, type PaginatedResponse, type PaginationParams, } from "./pagination";
32
+ export { batchProcess, batchRead, RateLimiter, streamEvaluation } from "./streaming";
33
+ export type { Annotation, AnnotationItem, AnnotationTask, APIKey, APIKeyUsage, APIKeyWithSecret, BatchOptions, ClientConfig as AIEvalConfig, CreateAnnotationItemParams, CreateAnnotationParams, CreateAnnotationTaskParams, CreateAPIKeyParams, CreateLLMJudgeConfigParams, CreateWebhookParams, Evaluation as EvaluationData, ExportOptions, GenericMetadata as AnnotationData, GetLLMJudgeAlignmentParams, GetUsageParams, ImportOptions, ListAnnotationItemsParams, ListAnnotationsParams, ListAnnotationTasksParams, ListAPIKeysParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, ListWebhookDeliveriesParams, ListWebhooksParams, LLMJudgeAlignment, LLMJudgeConfig, LLMJudgeResult as LLMJudgeData, Organization, RetryConfig, SnapshotData, Span as SpanData, StreamOptions, TestCase, TestResult, Trace as TraceData, TracedResponse, UpdateAPIKeyParams, UpdateWebhookParams, UsageStats, UsageSummary, Webhook, WebhookDelivery, } from "./types";
34
+ export { EvaluationTemplates, type EvaluationTemplateType, type FeatureUsage, type OrganizationLimits, } from "./types";
35
+ export { type AgentHandoff, type AgentSpanContext, type CostCategory, type CostRecord, createWorkflowTracer, type DecisionAlternative, type DecisionType, type HandoffType, type LLMProvider, type RecordCostParams, type RecordDecisionParams, traceAutoGen, traceCrewAI, traceLangChainAgent, traceWorkflowStep, type WorkflowContext, type WorkflowDefinition, type WorkflowEdge, type WorkflowNode, type WorkflowStatus, WorkflowTracer, type WorkflowTracerOptions, } from "./workflows";
36
+ import { AIEvalClient } from "./client";
36
37
  export default AIEvalClient;
package/dist/index.js CHANGED
@@ -8,109 +8,115 @@
8
8
  * @packageDocumentation
9
9
  */
10
10
  Object.defineProperty(exports, "__esModule", { value: true });
11
- exports.decodeCursor = exports.encodeCursor = exports.autoPaginate = exports.createPaginatedIterator = exports.PaginatedIterator = exports.CacheTTL = exports.RequestCache = exports.RateLimiter = exports.batchRead = exports.streamEvaluation = exports.batchProcess = exports.importData = exports.exportData = exports.compareSnapshots = exports.saveSnapshot = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = exports.createTestSuite = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.hasValidCodeSyntax = exports.containsAllRequiredFields = exports.followsInstructions = exports.hasNoToxicity = exports.respondedWithinTime = exports.hasFactualAccuracy = exports.containsLanguage = exports.hasReadabilityScore = exports.matchesSchema = exports.hasNoHallucinations = exports.isValidURL = exports.isValidEmail = exports.withinRange = exports.similarTo = exports.hasSentiment = exports.notContainsPII = exports.containsJSON = exports.hasLength = exports.matchesPattern = exports.containsKeywords = exports.expect = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalAIError = exports.AIEvalClient = void 0;
12
- exports.EXIT = exports.runCheck = exports.parseArgs = exports.EvaluationTemplates = exports.traceAutoGen = exports.traceCrewAI = exports.traceLangChainAgent = exports.traceWorkflowStep = exports.createWorkflowTracer = exports.WorkflowTracer = exports.traceAnthropic = exports.traceOpenAI = exports.Logger = exports.RequestBatcher = void 0;
11
+ exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.RequestCache = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.compareSnapshots = exports.saveSnapshot = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = exports.createTestSuite = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.withinRange = exports.similarTo = exports.respondedWithinTime = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntax = exports.hasSentiment = exports.hasReadabilityScore = exports.hasNoToxicity = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracy = exports.followsInstructions = exports.expect = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalAIError = exports.AIEvalClient = void 0;
12
+ exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginate = void 0;
13
13
  // Main SDK exports
14
14
  var client_1 = require("./client");
15
15
  Object.defineProperty(exports, "AIEvalClient", { enumerable: true, get: function () { return client_1.AIEvalClient; } });
16
16
  // Enhanced error handling (Tier 1.5)
17
17
  const errors_1 = require("./errors");
18
- Object.defineProperty(exports, "EvalAIError", { enumerable: true, get: function () { return errors_1.EvalAIError; } });
19
- Object.defineProperty(exports, "RateLimitError", { enumerable: true, get: function () { return errors_1.RateLimitError; } });
20
18
  Object.defineProperty(exports, "AuthenticationError", { enumerable: true, get: function () { return errors_1.AuthenticationError; } });
19
+ Object.defineProperty(exports, "EvalAIError", { enumerable: true, get: function () { return errors_1.EvalAIError; } });
21
20
  Object.defineProperty(exports, "NetworkError", { enumerable: true, get: function () { return errors_1.NetworkError; } });
21
+ Object.defineProperty(exports, "RateLimitError", { enumerable: true, get: function () { return errors_1.RateLimitError; } });
22
22
  Object.defineProperty(exports, "ValidationError", { enumerable: true, get: function () { return errors_1.SDKError; } });
23
23
  // Enhanced assertions (Tier 1.3)
24
24
  var assertions_1 = require("./assertions");
25
- Object.defineProperty(exports, "expect", { enumerable: true, get: function () { return assertions_1.expect; } });
25
+ Object.defineProperty(exports, "containsAllRequiredFields", { enumerable: true, get: function () { return assertions_1.containsAllRequiredFields; } });
26
+ Object.defineProperty(exports, "containsJSON", { enumerable: true, get: function () { return assertions_1.containsJSON; } });
26
27
  Object.defineProperty(exports, "containsKeywords", { enumerable: true, get: function () { return assertions_1.containsKeywords; } });
27
- Object.defineProperty(exports, "matchesPattern", { enumerable: true, get: function () { return assertions_1.matchesPattern; } });
28
+ Object.defineProperty(exports, "containsLanguage", { enumerable: true, get: function () { return assertions_1.containsLanguage; } });
29
+ Object.defineProperty(exports, "expect", { enumerable: true, get: function () { return assertions_1.expect; } });
30
+ Object.defineProperty(exports, "followsInstructions", { enumerable: true, get: function () { return assertions_1.followsInstructions; } });
31
+ Object.defineProperty(exports, "hasFactualAccuracy", { enumerable: true, get: function () { return assertions_1.hasFactualAccuracy; } });
28
32
  Object.defineProperty(exports, "hasLength", { enumerable: true, get: function () { return assertions_1.hasLength; } });
29
- Object.defineProperty(exports, "containsJSON", { enumerable: true, get: function () { return assertions_1.containsJSON; } });
30
- Object.defineProperty(exports, "notContainsPII", { enumerable: true, get: function () { return assertions_1.notContainsPII; } });
33
+ Object.defineProperty(exports, "hasNoHallucinations", { enumerable: true, get: function () { return assertions_1.hasNoHallucinations; } });
34
+ Object.defineProperty(exports, "hasNoToxicity", { enumerable: true, get: function () { return assertions_1.hasNoToxicity; } });
35
+ Object.defineProperty(exports, "hasReadabilityScore", { enumerable: true, get: function () { return assertions_1.hasReadabilityScore; } });
31
36
  Object.defineProperty(exports, "hasSentiment", { enumerable: true, get: function () { return assertions_1.hasSentiment; } });
32
- Object.defineProperty(exports, "similarTo", { enumerable: true, get: function () { return assertions_1.similarTo; } });
33
- Object.defineProperty(exports, "withinRange", { enumerable: true, get: function () { return assertions_1.withinRange; } });
37
+ Object.defineProperty(exports, "hasValidCodeSyntax", { enumerable: true, get: function () { return assertions_1.hasValidCodeSyntax; } });
34
38
  Object.defineProperty(exports, "isValidEmail", { enumerable: true, get: function () { return assertions_1.isValidEmail; } });
35
39
  Object.defineProperty(exports, "isValidURL", { enumerable: true, get: function () { return assertions_1.isValidURL; } });
36
- Object.defineProperty(exports, "hasNoHallucinations", { enumerable: true, get: function () { return assertions_1.hasNoHallucinations; } });
40
+ Object.defineProperty(exports, "matchesPattern", { enumerable: true, get: function () { return assertions_1.matchesPattern; } });
37
41
  Object.defineProperty(exports, "matchesSchema", { enumerable: true, get: function () { return assertions_1.matchesSchema; } });
38
- Object.defineProperty(exports, "hasReadabilityScore", { enumerable: true, get: function () { return assertions_1.hasReadabilityScore; } });
39
- Object.defineProperty(exports, "containsLanguage", { enumerable: true, get: function () { return assertions_1.containsLanguage; } });
40
- Object.defineProperty(exports, "hasFactualAccuracy", { enumerable: true, get: function () { return assertions_1.hasFactualAccuracy; } });
42
+ Object.defineProperty(exports, "notContainsPII", { enumerable: true, get: function () { return assertions_1.notContainsPII; } });
41
43
  Object.defineProperty(exports, "respondedWithinTime", { enumerable: true, get: function () { return assertions_1.respondedWithinTime; } });
42
- Object.defineProperty(exports, "hasNoToxicity", { enumerable: true, get: function () { return assertions_1.hasNoToxicity; } });
43
- Object.defineProperty(exports, "followsInstructions", { enumerable: true, get: function () { return assertions_1.followsInstructions; } });
44
- Object.defineProperty(exports, "containsAllRequiredFields", { enumerable: true, get: function () { return assertions_1.containsAllRequiredFields; } });
45
- Object.defineProperty(exports, "hasValidCodeSyntax", { enumerable: true, get: function () { return assertions_1.hasValidCodeSyntax; } });
44
+ Object.defineProperty(exports, "similarTo", { enumerable: true, get: function () { return assertions_1.similarTo; } });
45
+ Object.defineProperty(exports, "withinRange", { enumerable: true, get: function () { return assertions_1.withinRange; } });
46
46
  // Context propagation (Tier 2.9)
47
47
  const context_1 = require("./context");
48
48
  Object.defineProperty(exports, "createContext", { enumerable: true, get: function () { return context_1.createContext; } });
49
+ Object.defineProperty(exports, "ContextManager", { enumerable: true, get: function () { return context_1.EvalContext; } });
49
50
  Object.defineProperty(exports, "getContext", { enumerable: true, get: function () { return context_1.getCurrentContext; } });
50
51
  Object.defineProperty(exports, "withContext", { enumerable: true, get: function () { return context_1.withContext; } });
51
- Object.defineProperty(exports, "ContextManager", { enumerable: true, get: function () { return context_1.EvalContext; } });
52
52
  // Test suite builder (Tier 2.7)
53
53
  var testing_1 = require("./testing");
54
54
  Object.defineProperty(exports, "createTestSuite", { enumerable: true, get: function () { return testing_1.createTestSuite; } });
55
55
  Object.defineProperty(exports, "TestSuite", { enumerable: true, get: function () { return testing_1.TestSuite; } });
56
56
  // Snapshot testing (Tier 2.8)
57
57
  const snapshot_1 = require("./snapshot");
58
- Object.defineProperty(exports, "snapshot", { enumerable: true, get: function () { return snapshot_1.snapshot; } });
59
- Object.defineProperty(exports, "saveSnapshot", { enumerable: true, get: function () { return snapshot_1.snapshot; } });
60
58
  Object.defineProperty(exports, "compareWithSnapshot", { enumerable: true, get: function () { return snapshot_1.compareWithSnapshot; } });
61
59
  Object.defineProperty(exports, "compareSnapshots", { enumerable: true, get: function () { return snapshot_1.compareWithSnapshot; } });
60
+ Object.defineProperty(exports, "snapshot", { enumerable: true, get: function () { return snapshot_1.snapshot; } });
61
+ Object.defineProperty(exports, "saveSnapshot", { enumerable: true, get: function () { return snapshot_1.snapshot; } });
62
62
  // Export/Import utilities (Tier 4.18)
63
63
  const export_1 = require("./export");
64
64
  Object.defineProperty(exports, "exportData", { enumerable: true, get: function () { return export_1.exportData; } });
65
65
  Object.defineProperty(exports, "importData", { enumerable: true, get: function () { return export_1.importData; } });
66
- // Streaming and batch processing (Tier 3.3)
67
- // Use functions from ./streaming module instead of these deprecated exports
68
- var streaming_1 = require("./streaming");
69
- Object.defineProperty(exports, "batchProcess", { enumerable: true, get: function () { return streaming_1.batchProcess; } });
70
- Object.defineProperty(exports, "streamEvaluation", { enumerable: true, get: function () { return streaming_1.streamEvaluation; } });
71
- Object.defineProperty(exports, "batchRead", { enumerable: true, get: function () { return streaming_1.batchRead; } });
72
- Object.defineProperty(exports, "RateLimiter", { enumerable: true, get: function () { return streaming_1.RateLimiter; } });
66
+ // Note: RequestBatcher is for advanced users only
67
+ // Most users don't need this - batching is automatic
68
+ var batch_1 = require("./batch");
69
+ Object.defineProperty(exports, "RequestBatcher", { enumerable: true, get: function () { return batch_1.RequestBatcher; } });
73
70
  // Performance optimization utilities (v1.3.0)
74
71
  // Note: RequestCache and CacheTTL are for advanced users only
75
72
  // Most users don't need these - caching is automatic
76
73
  var cache_1 = require("./cache");
77
- Object.defineProperty(exports, "RequestCache", { enumerable: true, get: function () { return cache_1.RequestCache; } });
78
74
  Object.defineProperty(exports, "CacheTTL", { enumerable: true, get: function () { return cache_1.CacheTTL; } });
79
- var pagination_1 = require("./pagination");
80
- Object.defineProperty(exports, "PaginatedIterator", { enumerable: true, get: function () { return pagination_1.PaginatedIterator; } });
81
- Object.defineProperty(exports, "createPaginatedIterator", { enumerable: true, get: function () { return pagination_1.createPaginatedIterator; } });
82
- Object.defineProperty(exports, "autoPaginate", { enumerable: true, get: function () { return pagination_1.autoPaginate; } });
83
- Object.defineProperty(exports, "encodeCursor", { enumerable: true, get: function () { return pagination_1.encodeCursor; } });
84
- Object.defineProperty(exports, "decodeCursor", { enumerable: true, get: function () { return pagination_1.decodeCursor; } });
85
- // Note: RequestBatcher is for advanced users only
86
- // Most users don't need this - batching is automatic
87
- var batch_1 = require("./batch");
88
- Object.defineProperty(exports, "RequestBatcher", { enumerable: true, get: function () { return batch_1.RequestBatcher; } });
89
- // Debug logger (Tier 4.17)
90
- var logger_1 = require("./logger");
91
- Object.defineProperty(exports, "Logger", { enumerable: true, get: function () { return logger_1.Logger; } });
75
+ Object.defineProperty(exports, "RequestCache", { enumerable: true, get: function () { return cache_1.RequestCache; } });
76
+ // CLI (programmatic use)
77
+ var check_1 = require("./cli/check");
78
+ Object.defineProperty(exports, "EXIT", { enumerable: true, get: function () { return check_1.EXIT; } });
79
+ Object.defineProperty(exports, "parseArgs", { enumerable: true, get: function () { return check_1.parseArgs; } });
80
+ Object.defineProperty(exports, "runCheck", { enumerable: true, get: function () { return check_1.runCheck; } });
81
+ var anthropic_1 = require("./integrations/anthropic");
82
+ Object.defineProperty(exports, "traceAnthropic", { enumerable: true, get: function () { return anthropic_1.traceAnthropic; } });
92
83
  // Framework integrations (Tier 1.2)
93
84
  var openai_1 = require("./integrations/openai");
94
85
  Object.defineProperty(exports, "traceOpenAI", { enumerable: true, get: function () { return openai_1.traceOpenAI; } });
95
- var anthropic_1 = require("./integrations/anthropic");
96
- Object.defineProperty(exports, "traceAnthropic", { enumerable: true, get: function () { return anthropic_1.traceAnthropic; } });
86
+ // OpenAI regression eval (local-first, no account required)
87
+ var openai_eval_1 = require("./integrations/openai-eval");
88
+ Object.defineProperty(exports, "openAIChatEval", { enumerable: true, get: function () { return openai_eval_1.openAIChatEval; } });
89
+ // Debug logger (Tier 4.17)
90
+ var logger_1 = require("./logger");
91
+ Object.defineProperty(exports, "Logger", { enumerable: true, get: function () { return logger_1.Logger; } });
92
+ // Vitest matcher: expect(await openAIChatEval(...)).toPassGate()
93
+ var matchers_1 = require("./matchers");
94
+ Object.defineProperty(exports, "extendExpectWithToPassGate", { enumerable: true, get: function () { return matchers_1.extendExpectWithToPassGate; } });
95
+ var pagination_1 = require("./pagination");
96
+ Object.defineProperty(exports, "autoPaginate", { enumerable: true, get: function () { return pagination_1.autoPaginate; } });
97
+ Object.defineProperty(exports, "createPaginatedIterator", { enumerable: true, get: function () { return pagination_1.createPaginatedIterator; } });
98
+ Object.defineProperty(exports, "decodeCursor", { enumerable: true, get: function () { return pagination_1.decodeCursor; } });
99
+ Object.defineProperty(exports, "encodeCursor", { enumerable: true, get: function () { return pagination_1.encodeCursor; } });
100
+ Object.defineProperty(exports, "PaginatedIterator", { enumerable: true, get: function () { return pagination_1.PaginatedIterator; } });
101
+ // Streaming and batch processing (Tier 3.3)
102
+ // Use functions from ./streaming module instead of these deprecated exports
103
+ var streaming_1 = require("./streaming");
104
+ Object.defineProperty(exports, "batchProcess", { enumerable: true, get: function () { return streaming_1.batchProcess; } });
105
+ Object.defineProperty(exports, "batchRead", { enumerable: true, get: function () { return streaming_1.batchRead; } });
106
+ Object.defineProperty(exports, "RateLimiter", { enumerable: true, get: function () { return streaming_1.RateLimiter; } });
107
+ Object.defineProperty(exports, "streamEvaluation", { enumerable: true, get: function () { return streaming_1.streamEvaluation; } });
108
+ // New exports for v1.1.0
109
+ var types_1 = require("./types");
110
+ Object.defineProperty(exports, "EvaluationTemplates", { enumerable: true, get: function () { return types_1.EvaluationTemplates; } });
97
111
  // Workflow tracing (Orchestration Layer)
98
112
  var workflows_1 = require("./workflows");
99
- Object.defineProperty(exports, "WorkflowTracer", { enumerable: true, get: function () { return workflows_1.WorkflowTracer; } });
100
113
  Object.defineProperty(exports, "createWorkflowTracer", { enumerable: true, get: function () { return workflows_1.createWorkflowTracer; } });
101
- Object.defineProperty(exports, "traceWorkflowStep", { enumerable: true, get: function () { return workflows_1.traceWorkflowStep; } });
114
+ Object.defineProperty(exports, "traceAutoGen", { enumerable: true, get: function () { return workflows_1.traceAutoGen; } });
115
+ Object.defineProperty(exports, "traceCrewAI", { enumerable: true, get: function () { return workflows_1.traceCrewAI; } });
102
116
  // Framework integrations
103
117
  Object.defineProperty(exports, "traceLangChainAgent", { enumerable: true, get: function () { return workflows_1.traceLangChainAgent; } });
104
- Object.defineProperty(exports, "traceCrewAI", { enumerable: true, get: function () { return workflows_1.traceCrewAI; } });
105
- Object.defineProperty(exports, "traceAutoGen", { enumerable: true, get: function () { return workflows_1.traceAutoGen; } });
106
- // New exports for v1.1.0
107
- var types_1 = require("./types");
108
- Object.defineProperty(exports, "EvaluationTemplates", { enumerable: true, get: function () { return types_1.EvaluationTemplates; } });
109
- // CLI (programmatic use)
110
- var check_1 = require("./cli/check");
111
- Object.defineProperty(exports, "parseArgs", { enumerable: true, get: function () { return check_1.parseArgs; } });
112
- Object.defineProperty(exports, "runCheck", { enumerable: true, get: function () { return check_1.runCheck; } });
113
- Object.defineProperty(exports, "EXIT", { enumerable: true, get: function () { return check_1.EXIT; } });
118
+ Object.defineProperty(exports, "traceWorkflowStep", { enumerable: true, get: function () { return workflows_1.traceWorkflowStep; } });
119
+ Object.defineProperty(exports, "WorkflowTracer", { enumerable: true, get: function () { return workflows_1.WorkflowTracer; } });
114
120
  // Default export for convenience
115
121
  const client_2 = require("./client");
116
122
  exports.default = client_2.AIEvalClient;
@@ -18,7 +18,7 @@
18
18
  * });
19
19
  * ```
20
20
  */
21
- import type { AIEvalClient } from '../client';
21
+ import type { AIEvalClient } from "../client";
22
22
  export interface AnthropicTraceOptions {
23
23
  /** Whether to capture input (default: true) */
24
24
  captureInput?: boolean;
@@ -43,7 +43,7 @@ const context_1 = require("../context");
43
43
  * ```
44
44
  */
45
45
  function traceAnthropic(anthropic, evalClient, options = {}) {
46
- const { captureInput = true, captureOutput = true, captureMetadata = true, organizationId, tracePrefix = 'anthropic' } = options;
46
+ const { captureInput = true, captureOutput = true, captureMetadata = true, organizationId, tracePrefix = "anthropic", } = options;
47
47
  // Create proxy for messages.create
48
48
  const originalCreate = anthropic.messages.create.bind(anthropic.messages);
49
49
  anthropic.messages.create = async (params, requestOptions) => {
@@ -60,18 +60,20 @@ function traceAnthropic(anthropic, evalClient, options = {}) {
60
60
  max_tokens: params.max_tokens,
61
61
  ...(captureInput ? { input: params.messages } : {}),
62
62
  ...(captureOutput ? { output: message.content } : {}),
63
- ...(captureMetadata ? {
64
- usage: message.usage,
65
- stop_reason: message.stop_reason
66
- } : {})
63
+ ...(captureMetadata
64
+ ? {
65
+ usage: message.usage,
66
+ stop_reason: message.stop_reason,
67
+ }
68
+ : {}),
67
69
  });
68
70
  await evalClient.traces.create({
69
71
  name: `Anthropic: ${params.model}`,
70
72
  traceId,
71
73
  organizationId: organizationId || evalClient.getOrganizationId(),
72
- status: 'success',
74
+ status: "success",
73
75
  durationMs,
74
- metadata: traceMetadata
76
+ metadata: traceMetadata,
75
77
  });
76
78
  return message;
77
79
  }
@@ -84,16 +86,18 @@ function traceAnthropic(anthropic, evalClient, options = {}) {
84
86
  max_tokens: params.max_tokens,
85
87
  ...(captureInput ? { input: params.messages } : {}),
86
88
  ...(captureMetadata ? { params } : {}),
87
- error: error instanceof Error ? error.message : String(error)
89
+ error: error instanceof Error ? error.message : String(error),
88
90
  });
89
- await evalClient.traces.create({
91
+ await evalClient.traces
92
+ .create({
90
93
  name: `Anthropic: ${params.model}`,
91
94
  traceId,
92
95
  organizationId: organizationId || evalClient.getOrganizationId(),
93
- status: 'error',
96
+ status: "error",
94
97
  durationMs,
95
- metadata: errorMetadata
96
- }).catch(() => {
98
+ metadata: errorMetadata,
99
+ })
100
+ .catch(() => {
97
101
  // Ignore errors in trace creation to avoid masking the original error
98
102
  });
99
103
  throw error;
@@ -127,8 +131,8 @@ async function traceAnthropicCall(evalClient, name, fn, options = {}) {
127
131
  name,
128
132
  traceId,
129
133
  organizationId: options.organizationId || evalClient.getOrganizationId(),
130
- status: 'pending',
131
- metadata: (0, context_1.mergeWithContext)({})
134
+ status: "pending",
135
+ metadata: (0, context_1.mergeWithContext)({}),
132
136
  });
133
137
  const result = await fn();
134
138
  const durationMs = Date.now() - startTime;
@@ -136,9 +140,9 @@ async function traceAnthropicCall(evalClient, name, fn, options = {}) {
136
140
  name,
137
141
  traceId,
138
142
  organizationId: options.organizationId || evalClient.getOrganizationId(),
139
- status: 'success',
143
+ status: "success",
140
144
  durationMs,
141
- metadata: (0, context_1.mergeWithContext)({})
145
+ metadata: (0, context_1.mergeWithContext)({}),
142
146
  });
143
147
  return result;
144
148
  }
@@ -148,11 +152,11 @@ async function traceAnthropicCall(evalClient, name, fn, options = {}) {
148
152
  name,
149
153
  traceId,
150
154
  organizationId: options.organizationId || evalClient.getOrganizationId(),
151
- status: 'error',
155
+ status: "error",
152
156
  durationMs,
153
157
  metadata: (0, context_1.mergeWithContext)({
154
- error: error instanceof Error ? error.message : String(error)
155
- })
158
+ error: error instanceof Error ? error.message : String(error),
159
+ }),
156
160
  });
157
161
  throw error;
158
162
  }
@@ -0,0 +1,57 @@
1
+ /**
2
+ * openAIChatEval — One-function OpenAI chat regression testing
3
+ *
4
+ * Run local regression tests with OpenAI. No EvalAI account required.
5
+ * CI-friendly output. Optional reportToEvalAI in v1.5.
6
+ *
7
+ * @example
8
+ * ```typescript
9
+ * import { openAIChatEval } from '@pauly4010/evalai-sdk';
10
+ *
11
+ * await openAIChatEval({
12
+ * name: 'chat-regression',
13
+ * cases: [
14
+ * { input: 'Hello', expectedOutput: 'greeting' },
15
+ * { input: '2 + 2 = ?', expectedOutput: '4' }
16
+ * ]
17
+ * });
18
+ * ```
19
+ */
20
+ import type { TestSuiteCaseResult } from "../testing";
21
+ export interface OpenAIChatEvalCase {
22
+ input: string;
23
+ expectedOutput?: string;
24
+ /** Platform test case ID. When provided, used directly for reportToEvalAI (no input matching). */
25
+ testCaseId?: number;
26
+ assertions?: ((output: string) => import("../assertions").AssertionResult)[];
27
+ }
28
+ export interface OpenAIChatEvalOptions {
29
+ name: string;
30
+ model?: string;
31
+ apiKey?: string;
32
+ cases: OpenAIChatEvalCase[];
33
+ /** Retry failing cases N times (default: 0). Only failing cases are retried. */
34
+ retries?: number;
35
+ /** v1.5: Upload results to EvalAI platform for an existing evaluation. Requires evaluationId and EVALAI_API_KEY. */
36
+ reportToEvalAI?: boolean;
37
+ /** Evaluation ID (from config or arg). Required when reportToEvalAI is true. */
38
+ evaluationId?: string;
39
+ /** EvalAI API base URL. Default: EVALAI_BASE_URL or http://localhost:3000 */
40
+ baseUrl?: string;
41
+ /** Idempotency key for import (e.g. CI run ID). Prevents duplicate runs on retry. */
42
+ idempotencyKey?: string;
43
+ }
44
+ export interface OpenAIChatEvalResult {
45
+ passed: number;
46
+ total: number;
47
+ score: number;
48
+ results: TestSuiteCaseResult[];
49
+ durationMs: number;
50
+ /** Case IDs that were retried (flaky recovery) */
51
+ retriedCases?: string[];
52
+ }
53
+ /**
54
+ * Run OpenAI chat regression tests locally.
55
+ * No EvalAI account required. Returns score and prints CI-friendly summary.
56
+ */
57
+ export declare function openAIChatEval(options: OpenAIChatEvalOptions): Promise<OpenAIChatEvalResult>;