@pauly4010/evalai-sdk 1.4.1 → 1.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +85 -0
- package/README.md +205 -543
- package/dist/assertions.d.ts +2 -2
- package/dist/assertions.js +104 -71
- package/dist/batch.js +12 -17
- package/dist/cache.js +7 -11
- package/dist/cli/api.d.ts +108 -0
- package/dist/cli/api.js +130 -0
- package/dist/cli/check.d.ts +28 -13
- package/dist/cli/check.js +249 -142
- package/dist/cli/ci-context.d.ts +6 -0
- package/dist/cli/ci-context.js +110 -0
- package/dist/cli/config.d.ts +30 -0
- package/dist/cli/config.js +207 -0
- package/dist/cli/constants.d.ts +15 -0
- package/dist/cli/constants.js +18 -0
- package/dist/cli/doctor.d.ts +11 -0
- package/dist/cli/doctor.js +82 -0
- package/dist/cli/formatters/github.d.ts +8 -0
- package/dist/cli/formatters/github.js +130 -0
- package/dist/cli/formatters/human.d.ts +6 -0
- package/dist/cli/formatters/human.js +107 -0
- package/dist/cli/formatters/json.d.ts +6 -0
- package/dist/cli/formatters/json.js +10 -0
- package/dist/cli/formatters/pr-comment.d.ts +12 -0
- package/dist/cli/formatters/pr-comment.js +101 -0
- package/dist/cli/formatters/types.d.ts +100 -0
- package/dist/cli/formatters/types.js +5 -0
- package/dist/cli/gate.d.ts +21 -0
- package/dist/cli/gate.js +175 -0
- package/dist/cli/index.d.ts +1 -0
- package/dist/cli/index.js +67 -23
- package/dist/cli/init.d.ts +7 -0
- package/dist/cli/init.js +69 -0
- package/dist/cli/policy-packs.d.ts +23 -0
- package/dist/cli/policy-packs.js +83 -0
- package/dist/cli/profiles.d.ts +28 -0
- package/dist/cli/profiles.js +30 -0
- package/dist/cli/reason-codes.d.ts +17 -0
- package/dist/cli/reason-codes.js +19 -0
- package/dist/cli/render/snippet.d.ts +5 -0
- package/dist/cli/render/snippet.js +15 -0
- package/dist/cli/render/sort.d.ts +10 -0
- package/dist/cli/render/sort.js +24 -0
- package/dist/cli/report/build-check-report.d.ts +19 -0
- package/dist/cli/report/build-check-report.js +124 -0
- package/dist/cli/share.d.ts +17 -0
- package/dist/cli/share.js +83 -0
- package/dist/client.d.ts +2 -2
- package/dist/client.js +144 -132
- package/dist/context.d.ts +1 -1
- package/dist/context.js +4 -6
- package/dist/errors.d.ts +2 -0
- package/dist/errors.js +116 -107
- package/dist/export.d.ts +6 -6
- package/dist/export.js +39 -33
- package/dist/index.d.ts +25 -24
- package/dist/index.js +62 -56
- package/dist/integrations/anthropic.d.ts +1 -1
- package/dist/integrations/anthropic.js +23 -19
- package/dist/integrations/openai-eval.d.ts +57 -0
- package/dist/integrations/openai-eval.js +230 -0
- package/dist/integrations/openai.d.ts +1 -1
- package/dist/integrations/openai.js +23 -19
- package/dist/local.d.ts +2 -2
- package/dist/local.js +25 -25
- package/dist/logger.d.ts +1 -1
- package/dist/logger.js +24 -28
- package/dist/matchers/index.d.ts +1 -0
- package/dist/matchers/index.js +6 -0
- package/dist/matchers/to-pass-gate.d.ts +29 -0
- package/dist/matchers/to-pass-gate.js +35 -0
- package/dist/pagination.d.ts +1 -1
- package/dist/pagination.js +6 -6
- package/dist/snapshot.js +24 -24
- package/dist/streaming.js +11 -11
- package/dist/testing.d.ts +6 -2
- package/dist/testing.js +30 -12
- package/dist/types.d.ts +22 -22
- package/dist/types.js +13 -13
- package/dist/utils/input-hash.d.ts +8 -0
- package/dist/utils/input-hash.js +38 -0
- package/dist/version.d.ts +7 -0
- package/dist/version.js +10 -0
- package/dist/workflows.d.ts +7 -7
- package/dist/workflows.js +44 -44
- package/package.json +102 -90
- package/dist/__tests__/assertions.test.d.ts +0 -1
- package/dist/__tests__/assertions.test.js +0 -288
- package/dist/__tests__/client.test.d.ts +0 -1
- package/dist/__tests__/client.test.js +0 -185
- package/dist/__tests__/testing.test.d.ts +0 -1
- package/dist/__tests__/testing.test.js +0 -230
- package/dist/__tests__/workflows.test.d.ts +0 -1
- package/dist/__tests__/workflows.test.js +0 -222
package/dist/index.d.ts
CHANGED
|
@@ -6,31 +6,32 @@
|
|
|
6
6
|
*
|
|
7
7
|
* @packageDocumentation
|
|
8
8
|
*/
|
|
9
|
-
export { AIEvalClient } from
|
|
10
|
-
import {
|
|
9
|
+
export { AIEvalClient } from "./client";
|
|
10
|
+
import { AuthenticationError, EvalAIError, NetworkError, RateLimitError, SDKError } from "./errors";
|
|
11
11
|
export { EvalAIError, RateLimitError, AuthenticationError, SDKError as ValidationError, // Using SDKError as ValidationError for backward compatibility
|
|
12
|
-
NetworkError };
|
|
13
|
-
export {
|
|
14
|
-
import { createContext, getCurrentContext, withContext
|
|
15
|
-
export { createContext, getCurrentContext as getContext, withContext, EvalContext as ContextManager };
|
|
16
|
-
export { createTestSuite, TestSuite, TestSuiteCase, TestSuiteCaseResult,
|
|
17
|
-
import {
|
|
18
|
-
export { snapshot, compareWithSnapshot, snapshot as saveSnapshot, compareWithSnapshot as compareSnapshots };
|
|
19
|
-
import {
|
|
20
|
-
import
|
|
12
|
+
NetworkError, };
|
|
13
|
+
export { containsAllRequiredFields, containsJSON, containsKeywords, containsLanguage, expect, followsInstructions, hasFactualAccuracy, hasLength, hasNoHallucinations, hasNoToxicity, hasReadabilityScore, hasSentiment, hasValidCodeSyntax, isValidEmail, isValidURL, matchesPattern, matchesSchema, notContainsPII, respondedWithinTime, similarTo, withinRange, } from "./assertions";
|
|
14
|
+
import { createContext, EvalContext, getCurrentContext, withContext } from "./context";
|
|
15
|
+
export { createContext, getCurrentContext as getContext, withContext, EvalContext as ContextManager, };
|
|
16
|
+
export { createTestSuite, type TestCaseResult, TestSuite, TestSuiteCase, TestSuiteCaseResult, TestSuiteConfig, TestSuiteResult, } from "./testing";
|
|
17
|
+
import { compareWithSnapshot, snapshot } from "./snapshot";
|
|
18
|
+
export { snapshot, compareWithSnapshot, snapshot as saveSnapshot, compareWithSnapshot as compareSnapshots, };
|
|
19
|
+
import type { ExportFormat } from "./export";
|
|
20
|
+
import { exportData, importData } from "./export";
|
|
21
21
|
export { exportData, importData };
|
|
22
22
|
export type { ExportFormat, ExportFormat as ExportType };
|
|
23
|
-
export {
|
|
24
|
-
export {
|
|
25
|
-
export {
|
|
26
|
-
export {
|
|
27
|
-
export {
|
|
28
|
-
export {
|
|
29
|
-
export {
|
|
30
|
-
export {
|
|
31
|
-
export
|
|
32
|
-
export {
|
|
33
|
-
export type { Annotation,
|
|
34
|
-
export {
|
|
35
|
-
|
|
23
|
+
export { RequestBatcher } from "./batch";
|
|
24
|
+
export { CacheTTL, RequestCache } from "./cache";
|
|
25
|
+
export { type CheckArgs, EXIT, parseArgs, runCheck } from "./cli/check";
|
|
26
|
+
export { traceAnthropic } from "./integrations/anthropic";
|
|
27
|
+
export { traceOpenAI } from "./integrations/openai";
|
|
28
|
+
export { type OpenAIChatEvalCase, type OpenAIChatEvalOptions, type OpenAIChatEvalResult, openAIChatEval, } from "./integrations/openai-eval";
|
|
29
|
+
export { Logger } from "./logger";
|
|
30
|
+
export { extendExpectWithToPassGate } from "./matchers";
|
|
31
|
+
export { autoPaginate, createPaginatedIterator, decodeCursor, encodeCursor, PaginatedIterator, type PaginatedResponse, type PaginationParams, } from "./pagination";
|
|
32
|
+
export { batchProcess, batchRead, RateLimiter, streamEvaluation } from "./streaming";
|
|
33
|
+
export type { Annotation, AnnotationItem, AnnotationTask, APIKey, APIKeyUsage, APIKeyWithSecret, BatchOptions, ClientConfig as AIEvalConfig, CreateAnnotationItemParams, CreateAnnotationParams, CreateAnnotationTaskParams, CreateAPIKeyParams, CreateLLMJudgeConfigParams, CreateWebhookParams, Evaluation as EvaluationData, ExportOptions, GenericMetadata as AnnotationData, GetLLMJudgeAlignmentParams, GetUsageParams, ImportOptions, ListAnnotationItemsParams, ListAnnotationsParams, ListAnnotationTasksParams, ListAPIKeysParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, ListWebhookDeliveriesParams, ListWebhooksParams, LLMJudgeAlignment, LLMJudgeConfig, LLMJudgeResult as LLMJudgeData, Organization, RetryConfig, SnapshotData, Span as SpanData, StreamOptions, TestCase, TestResult, Trace as TraceData, TracedResponse, UpdateAPIKeyParams, UpdateWebhookParams, UsageStats, UsageSummary, Webhook, WebhookDelivery, } from "./types";
|
|
34
|
+
export { EvaluationTemplates, type EvaluationTemplateType, type FeatureUsage, type OrganizationLimits, } from "./types";
|
|
35
|
+
export { type AgentHandoff, type AgentSpanContext, type CostCategory, type CostRecord, createWorkflowTracer, type DecisionAlternative, type DecisionType, type HandoffType, type LLMProvider, type RecordCostParams, type RecordDecisionParams, traceAutoGen, traceCrewAI, traceLangChainAgent, traceWorkflowStep, type WorkflowContext, type WorkflowDefinition, type WorkflowEdge, type WorkflowNode, type WorkflowStatus, WorkflowTracer, type WorkflowTracerOptions, } from "./workflows";
|
|
36
|
+
import { AIEvalClient } from "./client";
|
|
36
37
|
export default AIEvalClient;
|
package/dist/index.js
CHANGED
|
@@ -8,109 +8,115 @@
|
|
|
8
8
|
* @packageDocumentation
|
|
9
9
|
*/
|
|
10
10
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
11
|
-
exports.
|
|
12
|
-
exports.
|
|
11
|
+
exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.RequestCache = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.compareSnapshots = exports.saveSnapshot = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = exports.createTestSuite = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.withinRange = exports.similarTo = exports.respondedWithinTime = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntax = exports.hasSentiment = exports.hasReadabilityScore = exports.hasNoToxicity = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracy = exports.followsInstructions = exports.expect = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalAIError = exports.AIEvalClient = void 0;
|
|
12
|
+
exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginate = void 0;
|
|
13
13
|
// Main SDK exports
|
|
14
14
|
var client_1 = require("./client");
|
|
15
15
|
Object.defineProperty(exports, "AIEvalClient", { enumerable: true, get: function () { return client_1.AIEvalClient; } });
|
|
16
16
|
// Enhanced error handling (Tier 1.5)
|
|
17
17
|
const errors_1 = require("./errors");
|
|
18
|
-
Object.defineProperty(exports, "EvalAIError", { enumerable: true, get: function () { return errors_1.EvalAIError; } });
|
|
19
|
-
Object.defineProperty(exports, "RateLimitError", { enumerable: true, get: function () { return errors_1.RateLimitError; } });
|
|
20
18
|
Object.defineProperty(exports, "AuthenticationError", { enumerable: true, get: function () { return errors_1.AuthenticationError; } });
|
|
19
|
+
Object.defineProperty(exports, "EvalAIError", { enumerable: true, get: function () { return errors_1.EvalAIError; } });
|
|
21
20
|
Object.defineProperty(exports, "NetworkError", { enumerable: true, get: function () { return errors_1.NetworkError; } });
|
|
21
|
+
Object.defineProperty(exports, "RateLimitError", { enumerable: true, get: function () { return errors_1.RateLimitError; } });
|
|
22
22
|
Object.defineProperty(exports, "ValidationError", { enumerable: true, get: function () { return errors_1.SDKError; } });
|
|
23
23
|
// Enhanced assertions (Tier 1.3)
|
|
24
24
|
var assertions_1 = require("./assertions");
|
|
25
|
-
Object.defineProperty(exports, "
|
|
25
|
+
Object.defineProperty(exports, "containsAllRequiredFields", { enumerable: true, get: function () { return assertions_1.containsAllRequiredFields; } });
|
|
26
|
+
Object.defineProperty(exports, "containsJSON", { enumerable: true, get: function () { return assertions_1.containsJSON; } });
|
|
26
27
|
Object.defineProperty(exports, "containsKeywords", { enumerable: true, get: function () { return assertions_1.containsKeywords; } });
|
|
27
|
-
Object.defineProperty(exports, "
|
|
28
|
+
Object.defineProperty(exports, "containsLanguage", { enumerable: true, get: function () { return assertions_1.containsLanguage; } });
|
|
29
|
+
Object.defineProperty(exports, "expect", { enumerable: true, get: function () { return assertions_1.expect; } });
|
|
30
|
+
Object.defineProperty(exports, "followsInstructions", { enumerable: true, get: function () { return assertions_1.followsInstructions; } });
|
|
31
|
+
Object.defineProperty(exports, "hasFactualAccuracy", { enumerable: true, get: function () { return assertions_1.hasFactualAccuracy; } });
|
|
28
32
|
Object.defineProperty(exports, "hasLength", { enumerable: true, get: function () { return assertions_1.hasLength; } });
|
|
29
|
-
Object.defineProperty(exports, "
|
|
30
|
-
Object.defineProperty(exports, "
|
|
33
|
+
Object.defineProperty(exports, "hasNoHallucinations", { enumerable: true, get: function () { return assertions_1.hasNoHallucinations; } });
|
|
34
|
+
Object.defineProperty(exports, "hasNoToxicity", { enumerable: true, get: function () { return assertions_1.hasNoToxicity; } });
|
|
35
|
+
Object.defineProperty(exports, "hasReadabilityScore", { enumerable: true, get: function () { return assertions_1.hasReadabilityScore; } });
|
|
31
36
|
Object.defineProperty(exports, "hasSentiment", { enumerable: true, get: function () { return assertions_1.hasSentiment; } });
|
|
32
|
-
Object.defineProperty(exports, "
|
|
33
|
-
Object.defineProperty(exports, "withinRange", { enumerable: true, get: function () { return assertions_1.withinRange; } });
|
|
37
|
+
Object.defineProperty(exports, "hasValidCodeSyntax", { enumerable: true, get: function () { return assertions_1.hasValidCodeSyntax; } });
|
|
34
38
|
Object.defineProperty(exports, "isValidEmail", { enumerable: true, get: function () { return assertions_1.isValidEmail; } });
|
|
35
39
|
Object.defineProperty(exports, "isValidURL", { enumerable: true, get: function () { return assertions_1.isValidURL; } });
|
|
36
|
-
Object.defineProperty(exports, "
|
|
40
|
+
Object.defineProperty(exports, "matchesPattern", { enumerable: true, get: function () { return assertions_1.matchesPattern; } });
|
|
37
41
|
Object.defineProperty(exports, "matchesSchema", { enumerable: true, get: function () { return assertions_1.matchesSchema; } });
|
|
38
|
-
Object.defineProperty(exports, "
|
|
39
|
-
Object.defineProperty(exports, "containsLanguage", { enumerable: true, get: function () { return assertions_1.containsLanguage; } });
|
|
40
|
-
Object.defineProperty(exports, "hasFactualAccuracy", { enumerable: true, get: function () { return assertions_1.hasFactualAccuracy; } });
|
|
42
|
+
Object.defineProperty(exports, "notContainsPII", { enumerable: true, get: function () { return assertions_1.notContainsPII; } });
|
|
41
43
|
Object.defineProperty(exports, "respondedWithinTime", { enumerable: true, get: function () { return assertions_1.respondedWithinTime; } });
|
|
42
|
-
Object.defineProperty(exports, "
|
|
43
|
-
Object.defineProperty(exports, "
|
|
44
|
-
Object.defineProperty(exports, "containsAllRequiredFields", { enumerable: true, get: function () { return assertions_1.containsAllRequiredFields; } });
|
|
45
|
-
Object.defineProperty(exports, "hasValidCodeSyntax", { enumerable: true, get: function () { return assertions_1.hasValidCodeSyntax; } });
|
|
44
|
+
Object.defineProperty(exports, "similarTo", { enumerable: true, get: function () { return assertions_1.similarTo; } });
|
|
45
|
+
Object.defineProperty(exports, "withinRange", { enumerable: true, get: function () { return assertions_1.withinRange; } });
|
|
46
46
|
// Context propagation (Tier 2.9)
|
|
47
47
|
const context_1 = require("./context");
|
|
48
48
|
Object.defineProperty(exports, "createContext", { enumerable: true, get: function () { return context_1.createContext; } });
|
|
49
|
+
Object.defineProperty(exports, "ContextManager", { enumerable: true, get: function () { return context_1.EvalContext; } });
|
|
49
50
|
Object.defineProperty(exports, "getContext", { enumerable: true, get: function () { return context_1.getCurrentContext; } });
|
|
50
51
|
Object.defineProperty(exports, "withContext", { enumerable: true, get: function () { return context_1.withContext; } });
|
|
51
|
-
Object.defineProperty(exports, "ContextManager", { enumerable: true, get: function () { return context_1.EvalContext; } });
|
|
52
52
|
// Test suite builder (Tier 2.7)
|
|
53
53
|
var testing_1 = require("./testing");
|
|
54
54
|
Object.defineProperty(exports, "createTestSuite", { enumerable: true, get: function () { return testing_1.createTestSuite; } });
|
|
55
55
|
Object.defineProperty(exports, "TestSuite", { enumerable: true, get: function () { return testing_1.TestSuite; } });
|
|
56
56
|
// Snapshot testing (Tier 2.8)
|
|
57
57
|
const snapshot_1 = require("./snapshot");
|
|
58
|
-
Object.defineProperty(exports, "snapshot", { enumerable: true, get: function () { return snapshot_1.snapshot; } });
|
|
59
|
-
Object.defineProperty(exports, "saveSnapshot", { enumerable: true, get: function () { return snapshot_1.snapshot; } });
|
|
60
58
|
Object.defineProperty(exports, "compareWithSnapshot", { enumerable: true, get: function () { return snapshot_1.compareWithSnapshot; } });
|
|
61
59
|
Object.defineProperty(exports, "compareSnapshots", { enumerable: true, get: function () { return snapshot_1.compareWithSnapshot; } });
|
|
60
|
+
Object.defineProperty(exports, "snapshot", { enumerable: true, get: function () { return snapshot_1.snapshot; } });
|
|
61
|
+
Object.defineProperty(exports, "saveSnapshot", { enumerable: true, get: function () { return snapshot_1.snapshot; } });
|
|
62
62
|
// Export/Import utilities (Tier 4.18)
|
|
63
63
|
const export_1 = require("./export");
|
|
64
64
|
Object.defineProperty(exports, "exportData", { enumerable: true, get: function () { return export_1.exportData; } });
|
|
65
65
|
Object.defineProperty(exports, "importData", { enumerable: true, get: function () { return export_1.importData; } });
|
|
66
|
-
//
|
|
67
|
-
//
|
|
68
|
-
var
|
|
69
|
-
Object.defineProperty(exports, "
|
|
70
|
-
Object.defineProperty(exports, "streamEvaluation", { enumerable: true, get: function () { return streaming_1.streamEvaluation; } });
|
|
71
|
-
Object.defineProperty(exports, "batchRead", { enumerable: true, get: function () { return streaming_1.batchRead; } });
|
|
72
|
-
Object.defineProperty(exports, "RateLimiter", { enumerable: true, get: function () { return streaming_1.RateLimiter; } });
|
|
66
|
+
// Note: RequestBatcher is for advanced users only
|
|
67
|
+
// Most users don't need this - batching is automatic
|
|
68
|
+
var batch_1 = require("./batch");
|
|
69
|
+
Object.defineProperty(exports, "RequestBatcher", { enumerable: true, get: function () { return batch_1.RequestBatcher; } });
|
|
73
70
|
// Performance optimization utilities (v1.3.0)
|
|
74
71
|
// Note: RequestCache and CacheTTL are for advanced users only
|
|
75
72
|
// Most users don't need these - caching is automatic
|
|
76
73
|
var cache_1 = require("./cache");
|
|
77
|
-
Object.defineProperty(exports, "RequestCache", { enumerable: true, get: function () { return cache_1.RequestCache; } });
|
|
78
74
|
Object.defineProperty(exports, "CacheTTL", { enumerable: true, get: function () { return cache_1.CacheTTL; } });
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
Object.defineProperty(exports, "
|
|
83
|
-
Object.defineProperty(exports, "
|
|
84
|
-
Object.defineProperty(exports, "
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
var batch_1 = require("./batch");
|
|
88
|
-
Object.defineProperty(exports, "RequestBatcher", { enumerable: true, get: function () { return batch_1.RequestBatcher; } });
|
|
89
|
-
// Debug logger (Tier 4.17)
|
|
90
|
-
var logger_1 = require("./logger");
|
|
91
|
-
Object.defineProperty(exports, "Logger", { enumerable: true, get: function () { return logger_1.Logger; } });
|
|
75
|
+
Object.defineProperty(exports, "RequestCache", { enumerable: true, get: function () { return cache_1.RequestCache; } });
|
|
76
|
+
// CLI (programmatic use)
|
|
77
|
+
var check_1 = require("./cli/check");
|
|
78
|
+
Object.defineProperty(exports, "EXIT", { enumerable: true, get: function () { return check_1.EXIT; } });
|
|
79
|
+
Object.defineProperty(exports, "parseArgs", { enumerable: true, get: function () { return check_1.parseArgs; } });
|
|
80
|
+
Object.defineProperty(exports, "runCheck", { enumerable: true, get: function () { return check_1.runCheck; } });
|
|
81
|
+
var anthropic_1 = require("./integrations/anthropic");
|
|
82
|
+
Object.defineProperty(exports, "traceAnthropic", { enumerable: true, get: function () { return anthropic_1.traceAnthropic; } });
|
|
92
83
|
// Framework integrations (Tier 1.2)
|
|
93
84
|
var openai_1 = require("./integrations/openai");
|
|
94
85
|
Object.defineProperty(exports, "traceOpenAI", { enumerable: true, get: function () { return openai_1.traceOpenAI; } });
|
|
95
|
-
|
|
96
|
-
|
|
86
|
+
// OpenAI regression eval (local-first, no account required)
|
|
87
|
+
var openai_eval_1 = require("./integrations/openai-eval");
|
|
88
|
+
Object.defineProperty(exports, "openAIChatEval", { enumerable: true, get: function () { return openai_eval_1.openAIChatEval; } });
|
|
89
|
+
// Debug logger (Tier 4.17)
|
|
90
|
+
var logger_1 = require("./logger");
|
|
91
|
+
Object.defineProperty(exports, "Logger", { enumerable: true, get: function () { return logger_1.Logger; } });
|
|
92
|
+
// Vitest matcher: expect(await openAIChatEval(...)).toPassGate()
|
|
93
|
+
var matchers_1 = require("./matchers");
|
|
94
|
+
Object.defineProperty(exports, "extendExpectWithToPassGate", { enumerable: true, get: function () { return matchers_1.extendExpectWithToPassGate; } });
|
|
95
|
+
var pagination_1 = require("./pagination");
|
|
96
|
+
Object.defineProperty(exports, "autoPaginate", { enumerable: true, get: function () { return pagination_1.autoPaginate; } });
|
|
97
|
+
Object.defineProperty(exports, "createPaginatedIterator", { enumerable: true, get: function () { return pagination_1.createPaginatedIterator; } });
|
|
98
|
+
Object.defineProperty(exports, "decodeCursor", { enumerable: true, get: function () { return pagination_1.decodeCursor; } });
|
|
99
|
+
Object.defineProperty(exports, "encodeCursor", { enumerable: true, get: function () { return pagination_1.encodeCursor; } });
|
|
100
|
+
Object.defineProperty(exports, "PaginatedIterator", { enumerable: true, get: function () { return pagination_1.PaginatedIterator; } });
|
|
101
|
+
// Streaming and batch processing (Tier 3.3)
|
|
102
|
+
// Use functions from ./streaming module instead of these deprecated exports
|
|
103
|
+
var streaming_1 = require("./streaming");
|
|
104
|
+
Object.defineProperty(exports, "batchProcess", { enumerable: true, get: function () { return streaming_1.batchProcess; } });
|
|
105
|
+
Object.defineProperty(exports, "batchRead", { enumerable: true, get: function () { return streaming_1.batchRead; } });
|
|
106
|
+
Object.defineProperty(exports, "RateLimiter", { enumerable: true, get: function () { return streaming_1.RateLimiter; } });
|
|
107
|
+
Object.defineProperty(exports, "streamEvaluation", { enumerable: true, get: function () { return streaming_1.streamEvaluation; } });
|
|
108
|
+
// New exports for v1.1.0
|
|
109
|
+
var types_1 = require("./types");
|
|
110
|
+
Object.defineProperty(exports, "EvaluationTemplates", { enumerable: true, get: function () { return types_1.EvaluationTemplates; } });
|
|
97
111
|
// Workflow tracing (Orchestration Layer)
|
|
98
112
|
var workflows_1 = require("./workflows");
|
|
99
|
-
Object.defineProperty(exports, "WorkflowTracer", { enumerable: true, get: function () { return workflows_1.WorkflowTracer; } });
|
|
100
113
|
Object.defineProperty(exports, "createWorkflowTracer", { enumerable: true, get: function () { return workflows_1.createWorkflowTracer; } });
|
|
101
|
-
Object.defineProperty(exports, "
|
|
114
|
+
Object.defineProperty(exports, "traceAutoGen", { enumerable: true, get: function () { return workflows_1.traceAutoGen; } });
|
|
115
|
+
Object.defineProperty(exports, "traceCrewAI", { enumerable: true, get: function () { return workflows_1.traceCrewAI; } });
|
|
102
116
|
// Framework integrations
|
|
103
117
|
Object.defineProperty(exports, "traceLangChainAgent", { enumerable: true, get: function () { return workflows_1.traceLangChainAgent; } });
|
|
104
|
-
Object.defineProperty(exports, "
|
|
105
|
-
Object.defineProperty(exports, "
|
|
106
|
-
// New exports for v1.1.0
|
|
107
|
-
var types_1 = require("./types");
|
|
108
|
-
Object.defineProperty(exports, "EvaluationTemplates", { enumerable: true, get: function () { return types_1.EvaluationTemplates; } });
|
|
109
|
-
// CLI (programmatic use)
|
|
110
|
-
var check_1 = require("./cli/check");
|
|
111
|
-
Object.defineProperty(exports, "parseArgs", { enumerable: true, get: function () { return check_1.parseArgs; } });
|
|
112
|
-
Object.defineProperty(exports, "runCheck", { enumerable: true, get: function () { return check_1.runCheck; } });
|
|
113
|
-
Object.defineProperty(exports, "EXIT", { enumerable: true, get: function () { return check_1.EXIT; } });
|
|
118
|
+
Object.defineProperty(exports, "traceWorkflowStep", { enumerable: true, get: function () { return workflows_1.traceWorkflowStep; } });
|
|
119
|
+
Object.defineProperty(exports, "WorkflowTracer", { enumerable: true, get: function () { return workflows_1.WorkflowTracer; } });
|
|
114
120
|
// Default export for convenience
|
|
115
121
|
const client_2 = require("./client");
|
|
116
122
|
exports.default = client_2.AIEvalClient;
|
|
@@ -43,7 +43,7 @@ const context_1 = require("../context");
|
|
|
43
43
|
* ```
|
|
44
44
|
*/
|
|
45
45
|
function traceAnthropic(anthropic, evalClient, options = {}) {
|
|
46
|
-
const { captureInput = true, captureOutput = true, captureMetadata = true, organizationId, tracePrefix =
|
|
46
|
+
const { captureInput = true, captureOutput = true, captureMetadata = true, organizationId, tracePrefix = "anthropic", } = options;
|
|
47
47
|
// Create proxy for messages.create
|
|
48
48
|
const originalCreate = anthropic.messages.create.bind(anthropic.messages);
|
|
49
49
|
anthropic.messages.create = async (params, requestOptions) => {
|
|
@@ -60,18 +60,20 @@ function traceAnthropic(anthropic, evalClient, options = {}) {
|
|
|
60
60
|
max_tokens: params.max_tokens,
|
|
61
61
|
...(captureInput ? { input: params.messages } : {}),
|
|
62
62
|
...(captureOutput ? { output: message.content } : {}),
|
|
63
|
-
...(captureMetadata
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
63
|
+
...(captureMetadata
|
|
64
|
+
? {
|
|
65
|
+
usage: message.usage,
|
|
66
|
+
stop_reason: message.stop_reason,
|
|
67
|
+
}
|
|
68
|
+
: {}),
|
|
67
69
|
});
|
|
68
70
|
await evalClient.traces.create({
|
|
69
71
|
name: `Anthropic: ${params.model}`,
|
|
70
72
|
traceId,
|
|
71
73
|
organizationId: organizationId || evalClient.getOrganizationId(),
|
|
72
|
-
status:
|
|
74
|
+
status: "success",
|
|
73
75
|
durationMs,
|
|
74
|
-
metadata: traceMetadata
|
|
76
|
+
metadata: traceMetadata,
|
|
75
77
|
});
|
|
76
78
|
return message;
|
|
77
79
|
}
|
|
@@ -84,16 +86,18 @@ function traceAnthropic(anthropic, evalClient, options = {}) {
|
|
|
84
86
|
max_tokens: params.max_tokens,
|
|
85
87
|
...(captureInput ? { input: params.messages } : {}),
|
|
86
88
|
...(captureMetadata ? { params } : {}),
|
|
87
|
-
error: error instanceof Error ? error.message : String(error)
|
|
89
|
+
error: error instanceof Error ? error.message : String(error),
|
|
88
90
|
});
|
|
89
|
-
await evalClient.traces
|
|
91
|
+
await evalClient.traces
|
|
92
|
+
.create({
|
|
90
93
|
name: `Anthropic: ${params.model}`,
|
|
91
94
|
traceId,
|
|
92
95
|
organizationId: organizationId || evalClient.getOrganizationId(),
|
|
93
|
-
status:
|
|
96
|
+
status: "error",
|
|
94
97
|
durationMs,
|
|
95
|
-
metadata: errorMetadata
|
|
96
|
-
})
|
|
98
|
+
metadata: errorMetadata,
|
|
99
|
+
})
|
|
100
|
+
.catch(() => {
|
|
97
101
|
// Ignore errors in trace creation to avoid masking the original error
|
|
98
102
|
});
|
|
99
103
|
throw error;
|
|
@@ -127,8 +131,8 @@ async function traceAnthropicCall(evalClient, name, fn, options = {}) {
|
|
|
127
131
|
name,
|
|
128
132
|
traceId,
|
|
129
133
|
organizationId: options.organizationId || evalClient.getOrganizationId(),
|
|
130
|
-
status:
|
|
131
|
-
metadata: (0, context_1.mergeWithContext)({})
|
|
134
|
+
status: "pending",
|
|
135
|
+
metadata: (0, context_1.mergeWithContext)({}),
|
|
132
136
|
});
|
|
133
137
|
const result = await fn();
|
|
134
138
|
const durationMs = Date.now() - startTime;
|
|
@@ -136,9 +140,9 @@ async function traceAnthropicCall(evalClient, name, fn, options = {}) {
|
|
|
136
140
|
name,
|
|
137
141
|
traceId,
|
|
138
142
|
organizationId: options.organizationId || evalClient.getOrganizationId(),
|
|
139
|
-
status:
|
|
143
|
+
status: "success",
|
|
140
144
|
durationMs,
|
|
141
|
-
metadata: (0, context_1.mergeWithContext)({})
|
|
145
|
+
metadata: (0, context_1.mergeWithContext)({}),
|
|
142
146
|
});
|
|
143
147
|
return result;
|
|
144
148
|
}
|
|
@@ -148,11 +152,11 @@ async function traceAnthropicCall(evalClient, name, fn, options = {}) {
|
|
|
148
152
|
name,
|
|
149
153
|
traceId,
|
|
150
154
|
organizationId: options.organizationId || evalClient.getOrganizationId(),
|
|
151
|
-
status:
|
|
155
|
+
status: "error",
|
|
152
156
|
durationMs,
|
|
153
157
|
metadata: (0, context_1.mergeWithContext)({
|
|
154
|
-
error: error instanceof Error ? error.message : String(error)
|
|
155
|
-
})
|
|
158
|
+
error: error instanceof Error ? error.message : String(error),
|
|
159
|
+
}),
|
|
156
160
|
});
|
|
157
161
|
throw error;
|
|
158
162
|
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* openAIChatEval — One-function OpenAI chat regression testing
|
|
3
|
+
*
|
|
4
|
+
* Run local regression tests with OpenAI. No EvalAI account required.
|
|
5
|
+
* CI-friendly output. Optional reportToEvalAI in v1.5.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```typescript
|
|
9
|
+
* import { openAIChatEval } from '@pauly4010/evalai-sdk';
|
|
10
|
+
*
|
|
11
|
+
* await openAIChatEval({
|
|
12
|
+
* name: 'chat-regression',
|
|
13
|
+
* cases: [
|
|
14
|
+
* { input: 'Hello', expectedOutput: 'greeting' },
|
|
15
|
+
* { input: '2 + 2 = ?', expectedOutput: '4' }
|
|
16
|
+
* ]
|
|
17
|
+
* });
|
|
18
|
+
* ```
|
|
19
|
+
*/
|
|
20
|
+
import type { TestSuiteCaseResult } from "../testing";
|
|
21
|
+
export interface OpenAIChatEvalCase {
|
|
22
|
+
input: string;
|
|
23
|
+
expectedOutput?: string;
|
|
24
|
+
/** Platform test case ID. When provided, used directly for reportToEvalAI (no input matching). */
|
|
25
|
+
testCaseId?: number;
|
|
26
|
+
assertions?: ((output: string) => import("../assertions").AssertionResult)[];
|
|
27
|
+
}
|
|
28
|
+
export interface OpenAIChatEvalOptions {
|
|
29
|
+
name: string;
|
|
30
|
+
model?: string;
|
|
31
|
+
apiKey?: string;
|
|
32
|
+
cases: OpenAIChatEvalCase[];
|
|
33
|
+
/** Retry failing cases N times (default: 0). Only failing cases are retried. */
|
|
34
|
+
retries?: number;
|
|
35
|
+
/** v1.5: Upload results to EvalAI platform for an existing evaluation. Requires evaluationId and EVALAI_API_KEY. */
|
|
36
|
+
reportToEvalAI?: boolean;
|
|
37
|
+
/** Evaluation ID (from config or arg). Required when reportToEvalAI is true. */
|
|
38
|
+
evaluationId?: string;
|
|
39
|
+
/** EvalAI API base URL. Default: EVALAI_BASE_URL or http://localhost:3000 */
|
|
40
|
+
baseUrl?: string;
|
|
41
|
+
/** Idempotency key for import (e.g. CI run ID). Prevents duplicate runs on retry. */
|
|
42
|
+
idempotencyKey?: string;
|
|
43
|
+
}
|
|
44
|
+
export interface OpenAIChatEvalResult {
|
|
45
|
+
passed: number;
|
|
46
|
+
total: number;
|
|
47
|
+
score: number;
|
|
48
|
+
results: TestSuiteCaseResult[];
|
|
49
|
+
durationMs: number;
|
|
50
|
+
/** Case IDs that were retried (flaky recovery) */
|
|
51
|
+
retriedCases?: string[];
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Run OpenAI chat regression tests locally.
|
|
55
|
+
* No EvalAI account required. Returns score and prints CI-friendly summary.
|
|
56
|
+
*/
|
|
57
|
+
export declare function openAIChatEval(options: OpenAIChatEvalOptions): Promise<OpenAIChatEvalResult>;
|