@evalgate/sdk 2.2.1 → 2.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +70 -1
- package/README.md +36 -7
- package/dist/assertions.d.ts +67 -5
- package/dist/assertions.js +733 -45
- package/dist/cache.d.ts +1 -1
- package/dist/cache.js +1 -1
- package/dist/cli/upgrade.js +5 -0
- package/dist/client.js +1 -1
- package/dist/errors.js +7 -0
- package/dist/export.d.ts +1 -1
- package/dist/export.js +3 -3
- package/dist/index.d.ts +4 -4
- package/dist/index.js +14 -3
- package/dist/integrations/anthropic.js +6 -6
- package/dist/integrations/openai.js +6 -6
- package/dist/pagination.d.ts +13 -2
- package/dist/pagination.js +28 -2
- package/dist/runtime/adapters/testsuite-to-dsl.js +1 -6
- package/dist/runtime/executor.d.ts +3 -2
- package/dist/runtime/executor.js +3 -2
- package/dist/runtime/registry.d.ts +4 -1
- package/dist/runtime/registry.js +4 -1
- package/dist/snapshot.d.ts +14 -2
- package/dist/snapshot.js +30 -4
- package/dist/types.d.ts +7 -2
- package/dist/types.js +7 -2
- package/dist/version.d.ts +2 -2
- package/dist/version.js +2 -2
- package/dist/workflows.js +6 -1
- package/package.json +2 -2
package/dist/cache.d.ts
CHANGED
|
@@ -21,7 +21,7 @@ export declare class RequestCache {
|
|
|
21
21
|
/**
|
|
22
22
|
* Store response in cache
|
|
23
23
|
*/
|
|
24
|
-
set<T>(method: string, url: string, data: T, ttl
|
|
24
|
+
set<T>(method: string, url: string, data: T, ttl?: number, params?: unknown): void;
|
|
25
25
|
/**
|
|
26
26
|
* Invalidate specific cache entry
|
|
27
27
|
*/
|
package/dist/cache.js
CHANGED
|
@@ -43,7 +43,7 @@ class RequestCache {
|
|
|
43
43
|
/**
|
|
44
44
|
* Store response in cache
|
|
45
45
|
*/
|
|
46
|
-
set(method, url, data, ttl, params) {
|
|
46
|
+
set(method, url, data, ttl = exports.CacheTTL.MEDIUM, params) {
|
|
47
47
|
// Enforce cache size limit (LRU-style)
|
|
48
48
|
if (this.cache.size >= this.maxSize) {
|
|
49
49
|
const firstKey = this.cache.keys().next().value;
|
package/dist/cli/upgrade.js
CHANGED
|
@@ -480,7 +480,12 @@ After upgrading:
|
|
|
480
480
|
console.log(" - package.json eval:regression-gate + eval:baseline-update");
|
|
481
481
|
console.log(" - .github/workflows/ Gate + governance workflows");
|
|
482
482
|
console.log(" - .github/CODEOWNERS Baseline requires approval\n");
|
|
483
|
+
console.log(" ⚠️ IMPORTANT — Reset your baseline before pushing:");
|
|
484
|
+
console.log(" The gate compares against your existing Tier 1 baseline.");
|
|
485
|
+
console.log(" If your test script changed, run this first to avoid an immediate regression:");
|
|
486
|
+
console.log(" npx evalgate baseline update (or: pnpm eval:baseline-update)\n");
|
|
483
487
|
console.log(" Next:");
|
|
488
|
+
console.log(" npx evalgate baseline update");
|
|
484
489
|
console.log(" git add -A");
|
|
485
490
|
console.log(" git commit -m 'chore: upgrade EvalGate gate to Tier 2'");
|
|
486
491
|
console.log(" git push\n");
|
package/dist/client.js
CHANGED
|
@@ -72,7 +72,7 @@ class AIEvalClient {
|
|
|
72
72
|
this.baseUrl =
|
|
73
73
|
config.baseUrl ||
|
|
74
74
|
getEnvVar("EVALGATE_BASE_URL", "EVALAI_BASE_URL") ||
|
|
75
|
-
(isBrowser ? "" : "
|
|
75
|
+
(isBrowser ? "" : "https://api.evalgate.com");
|
|
76
76
|
this.timeout = config.timeout || 30000;
|
|
77
77
|
// Tier 4.17: Debug mode with request logging
|
|
78
78
|
const logLevel = config.logLevel || (config.debug ? "debug" : "info");
|
package/dist/errors.js
CHANGED
|
@@ -271,6 +271,10 @@ class RateLimitError extends EvalGateError {
|
|
|
271
271
|
constructor(message, retryAfter) {
|
|
272
272
|
super(message, "RATE_LIMIT_EXCEEDED", 429, { retryAfter });
|
|
273
273
|
this.name = "RateLimitError";
|
|
274
|
+
if (retryAfter !== undefined) {
|
|
275
|
+
this.retryAfter = retryAfter;
|
|
276
|
+
}
|
|
277
|
+
Object.setPrototypeOf(this, RateLimitError.prototype);
|
|
274
278
|
}
|
|
275
279
|
}
|
|
276
280
|
exports.RateLimitError = RateLimitError;
|
|
@@ -278,6 +282,7 @@ class AuthenticationError extends EvalGateError {
|
|
|
278
282
|
constructor(message = "Authentication failed") {
|
|
279
283
|
super(message, "AUTHENTICATION_ERROR", 401);
|
|
280
284
|
this.name = "AuthenticationError";
|
|
285
|
+
Object.setPrototypeOf(this, AuthenticationError.prototype);
|
|
281
286
|
}
|
|
282
287
|
}
|
|
283
288
|
exports.AuthenticationError = AuthenticationError;
|
|
@@ -285,6 +290,7 @@ class ValidationError extends EvalGateError {
|
|
|
285
290
|
constructor(message = "Validation failed", details) {
|
|
286
291
|
super(message, "VALIDATION_ERROR", 400, details);
|
|
287
292
|
this.name = "ValidationError";
|
|
293
|
+
Object.setPrototypeOf(this, ValidationError.prototype);
|
|
288
294
|
}
|
|
289
295
|
}
|
|
290
296
|
exports.ValidationError = ValidationError;
|
|
@@ -293,6 +299,7 @@ class NetworkError extends EvalGateError {
|
|
|
293
299
|
super(message, "NETWORK_ERROR", 0);
|
|
294
300
|
this.name = "NetworkError";
|
|
295
301
|
this.retryable = true;
|
|
302
|
+
Object.setPrototypeOf(this, NetworkError.prototype);
|
|
296
303
|
}
|
|
297
304
|
}
|
|
298
305
|
exports.NetworkError = NetworkError;
|
package/dist/export.d.ts
CHANGED
|
@@ -140,7 +140,7 @@ export declare function exportData(client: AIEvalClient, options: ExportOptions)
|
|
|
140
140
|
* console.log(`Imported ${result.summary.imported} items`);
|
|
141
141
|
* ```
|
|
142
142
|
*/
|
|
143
|
-
export declare function importData(client: AIEvalClient, data: ExportData, options
|
|
143
|
+
export declare function importData(client: AIEvalClient, data: ExportData, options?: ImportOptions): Promise<ImportResult>;
|
|
144
144
|
/**
|
|
145
145
|
* Export data to JSON file
|
|
146
146
|
*
|
package/dist/export.js
CHANGED
|
@@ -136,7 +136,7 @@ async function exportData(client, options) {
|
|
|
136
136
|
* console.log(`Imported ${result.summary.imported} items`);
|
|
137
137
|
* ```
|
|
138
138
|
*/
|
|
139
|
-
async function importData(client, data, options) {
|
|
139
|
+
async function importData(client, data, options = {}) {
|
|
140
140
|
const result = {
|
|
141
141
|
summary: { total: 0, imported: 0, skipped: 0, failed: 0 },
|
|
142
142
|
details: {},
|
|
@@ -155,7 +155,7 @@ async function importData(client, data, options) {
|
|
|
155
155
|
return result;
|
|
156
156
|
}
|
|
157
157
|
// Import traces
|
|
158
|
-
if (data.traces) {
|
|
158
|
+
if (data.traces && client?.traces) {
|
|
159
159
|
const traceResults = { imported: 0, skipped: 0, failed: 0 };
|
|
160
160
|
for (const trace of data.traces) {
|
|
161
161
|
try {
|
|
@@ -191,7 +191,7 @@ async function importData(client, data, options) {
|
|
|
191
191
|
result.summary.total += data.traces.length;
|
|
192
192
|
}
|
|
193
193
|
// Import evaluations
|
|
194
|
-
if (data.evaluations) {
|
|
194
|
+
if (data.evaluations && client?.evaluations) {
|
|
195
195
|
const evalResults = { imported: 0, skipped: 0, failed: 0 };
|
|
196
196
|
for (const evaluation of data.evaluations) {
|
|
197
197
|
try {
|
package/dist/index.d.ts
CHANGED
|
@@ -10,7 +10,7 @@ export { AIEvalClient } from "./client";
|
|
|
10
10
|
import { AuthenticationError, EvalGateError, NetworkError, RateLimitError, SDKError } from "./errors";
|
|
11
11
|
export { EvalGateError, RateLimitError, AuthenticationError, SDKError as ValidationError, // Using SDKError as ValidationError for backward compatibility
|
|
12
12
|
NetworkError, };
|
|
13
|
-
export { containsAllRequiredFields, containsJSON, containsKeywords, containsLanguage, expect, followsInstructions, hasFactualAccuracy, hasLength, hasNoHallucinations, hasNoToxicity, hasPII, hasReadabilityScore, hasSentiment, hasValidCodeSyntax, isValidEmail, isValidURL, matchesPattern, matchesSchema, notContainsPII, respondedWithinTime, similarTo, withinRange, } from "./assertions";
|
|
13
|
+
export { type AssertionLLMConfig, configureAssertions, containsAllRequiredFields, containsJSON, containsKeywords, containsLanguage, containsLanguageAsync, expect, followsInstructions, getAssertionConfig, hasFactualAccuracy, hasFactualAccuracyAsync, hasLength, hasNoHallucinations, hasNoHallucinationsAsync, hasNoToxicity, hasNoToxicityAsync, hasPII, hasReadabilityScore, hasSentiment, hasSentimentAsync, hasValidCodeSyntax, hasValidCodeSyntaxAsync, isValidEmail, isValidURL, matchesPattern, matchesSchema, notContainsPII, respondedWithinTime, similarTo, withinRange, } from "./assertions";
|
|
14
14
|
import { createContext, EvalContext, getCurrentContext, withContext } from "./context";
|
|
15
15
|
export { createContext, getCurrentContext as getContext, withContext, EvalContext as ContextManager, };
|
|
16
16
|
export { cloneContext, mergeContexts, validateContext, } from "./runtime/context";
|
|
@@ -20,8 +20,8 @@ export { createEvalRuntime, disposeActiveRuntime, getActiveRuntime, setActiveRun
|
|
|
20
20
|
export type { CloudExecutor, DefineEvalFunction, EvalContext, EvalExecutor, EvalExecutorInterface, EvalOptions, EvalResult, EvalRuntime, EvalSpec, ExecutorCapabilities, LocalExecutor, SpecConfig, SpecOptions, WorkerExecutor, } from "./runtime/types";
|
|
21
21
|
export { EvalRuntimeError, RuntimeError, SpecExecutionError, SpecRegistrationError, } from "./runtime/types";
|
|
22
22
|
export { createTestSuite, type TestCaseResult, TestSuite, TestSuiteCase, TestSuiteCaseResult, TestSuiteConfig, TestSuiteResult, } from "./testing";
|
|
23
|
-
import { compareWithSnapshot, snapshot } from "./snapshot";
|
|
24
|
-
export { snapshot, compareWithSnapshot, snapshot as saveSnapshot,
|
|
23
|
+
import { compareSnapshots, compareWithSnapshot, snapshot } from "./snapshot";
|
|
24
|
+
export { snapshot, compareWithSnapshot, compareSnapshots, snapshot as saveSnapshot, };
|
|
25
25
|
import type { ExportFormat } from "./export";
|
|
26
26
|
import { exportData, importData } from "./export";
|
|
27
27
|
export { exportData, importData };
|
|
@@ -34,7 +34,7 @@ export { traceOpenAI } from "./integrations/openai";
|
|
|
34
34
|
export { type OpenAIChatEvalCase, type OpenAIChatEvalOptions, type OpenAIChatEvalResult, openAIChatEval, } from "./integrations/openai-eval";
|
|
35
35
|
export { Logger } from "./logger";
|
|
36
36
|
export { extendExpectWithToPassGate } from "./matchers";
|
|
37
|
-
export { autoPaginate, createPaginatedIterator, decodeCursor, encodeCursor, PaginatedIterator, type PaginatedResponse, type PaginationParams, } from "./pagination";
|
|
37
|
+
export { autoPaginate, autoPaginateGenerator, createPaginatedIterator, decodeCursor, encodeCursor, PaginatedIterator, type PaginatedResponse, type PaginationParams, } from "./pagination";
|
|
38
38
|
export { ARTIFACTS, type Baseline, type BaselineTolerance, GATE_CATEGORY, GATE_EXIT, type GateCategory, type GateExitCode, REPORT_SCHEMA_VERSION, type RegressionDelta, type RegressionReport, } from "./regression";
|
|
39
39
|
export { batchProcess, batchRead, RateLimiter, streamEvaluation, } from "./streaming";
|
|
40
40
|
export type { Annotation, AnnotationItem, AnnotationTask, APIKey, APIKeyUsage, APIKeyWithSecret, BatchOptions, ClientConfig as AIEvalConfig, CreateAnnotationItemParams, CreateAnnotationParams, CreateAnnotationTaskParams, CreateAPIKeyParams, CreateLLMJudgeConfigParams, CreateWebhookParams, Evaluation as EvaluationData, EvaluationRun, EvaluationRunDetail, ExportOptions, GenericMetadata as AnnotationData, GetLLMJudgeAlignmentParams, GetUsageParams, ImportOptions, ListAnnotationItemsParams, ListAnnotationsParams, ListAnnotationTasksParams, ListAPIKeysParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, ListWebhookDeliveriesParams, ListWebhooksParams, LLMJudgeAlignment, LLMJudgeConfig, LLMJudgeEvaluateResult, LLMJudgeResult as LLMJudgeData, Organization, RetryConfig, SnapshotData, Span as SpanData, StreamOptions, TestCase, TestResult, Trace as TraceData, TraceDetail, TracedResponse, UpdateAPIKeyParams, UpdateWebhookParams, UsageStats, UsageSummary, Webhook, WebhookDelivery, } from "./types";
|
package/dist/index.js
CHANGED
|
@@ -8,8 +8,8 @@
|
|
|
8
8
|
* @packageDocumentation
|
|
9
9
|
*/
|
|
10
10
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
11
|
-
exports.
|
|
12
|
-
exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginate = exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.RequestCache = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.
|
|
11
|
+
exports.defaultLocalExecutor = exports.createLocalExecutor = exports.evalai = exports.defineSuite = exports.defineEval = exports.createResult = exports.createEvalContext = exports.validateContext = exports.mergeContexts = exports.cloneContext = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.withinRange = exports.similarTo = exports.respondedWithinTime = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntaxAsync = exports.hasValidCodeSyntax = exports.hasSentimentAsync = exports.hasSentiment = exports.hasReadabilityScore = exports.hasPII = exports.hasNoToxicityAsync = exports.hasNoToxicity = exports.hasNoHallucinationsAsync = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracyAsync = exports.hasFactualAccuracy = exports.getAssertionConfig = exports.followsInstructions = exports.expect = exports.containsLanguageAsync = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.configureAssertions = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalGateError = exports.AIEvalClient = void 0;
|
|
12
|
+
exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginateGenerator = exports.autoPaginate = exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.RequestCache = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.saveSnapshot = exports.compareSnapshots = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = exports.createTestSuite = exports.SpecRegistrationError = exports.SpecExecutionError = exports.RuntimeError = exports.EvalRuntimeError = exports.setActiveRuntime = exports.getActiveRuntime = exports.disposeActiveRuntime = exports.createEvalRuntime = void 0;
|
|
13
13
|
// Main SDK exports
|
|
14
14
|
var client_1 = require("./client");
|
|
15
15
|
Object.defineProperty(exports, "AIEvalClient", { enumerable: true, get: function () { return client_1.AIEvalClient; } });
|
|
@@ -22,20 +22,30 @@ Object.defineProperty(exports, "RateLimitError", { enumerable: true, get: functi
|
|
|
22
22
|
Object.defineProperty(exports, "ValidationError", { enumerable: true, get: function () { return errors_1.SDKError; } });
|
|
23
23
|
// Enhanced assertions (Tier 1.3)
|
|
24
24
|
var assertions_1 = require("./assertions");
|
|
25
|
+
// LLM config
|
|
26
|
+
Object.defineProperty(exports, "configureAssertions", { enumerable: true, get: function () { return assertions_1.configureAssertions; } });
|
|
25
27
|
Object.defineProperty(exports, "containsAllRequiredFields", { enumerable: true, get: function () { return assertions_1.containsAllRequiredFields; } });
|
|
26
28
|
Object.defineProperty(exports, "containsJSON", { enumerable: true, get: function () { return assertions_1.containsJSON; } });
|
|
27
29
|
Object.defineProperty(exports, "containsKeywords", { enumerable: true, get: function () { return assertions_1.containsKeywords; } });
|
|
28
30
|
Object.defineProperty(exports, "containsLanguage", { enumerable: true, get: function () { return assertions_1.containsLanguage; } });
|
|
31
|
+
// LLM-backed async variants
|
|
32
|
+
Object.defineProperty(exports, "containsLanguageAsync", { enumerable: true, get: function () { return assertions_1.containsLanguageAsync; } });
|
|
29
33
|
Object.defineProperty(exports, "expect", { enumerable: true, get: function () { return assertions_1.expect; } });
|
|
30
34
|
Object.defineProperty(exports, "followsInstructions", { enumerable: true, get: function () { return assertions_1.followsInstructions; } });
|
|
35
|
+
Object.defineProperty(exports, "getAssertionConfig", { enumerable: true, get: function () { return assertions_1.getAssertionConfig; } });
|
|
31
36
|
Object.defineProperty(exports, "hasFactualAccuracy", { enumerable: true, get: function () { return assertions_1.hasFactualAccuracy; } });
|
|
37
|
+
Object.defineProperty(exports, "hasFactualAccuracyAsync", { enumerable: true, get: function () { return assertions_1.hasFactualAccuracyAsync; } });
|
|
32
38
|
Object.defineProperty(exports, "hasLength", { enumerable: true, get: function () { return assertions_1.hasLength; } });
|
|
33
39
|
Object.defineProperty(exports, "hasNoHallucinations", { enumerable: true, get: function () { return assertions_1.hasNoHallucinations; } });
|
|
40
|
+
Object.defineProperty(exports, "hasNoHallucinationsAsync", { enumerable: true, get: function () { return assertions_1.hasNoHallucinationsAsync; } });
|
|
34
41
|
Object.defineProperty(exports, "hasNoToxicity", { enumerable: true, get: function () { return assertions_1.hasNoToxicity; } });
|
|
42
|
+
Object.defineProperty(exports, "hasNoToxicityAsync", { enumerable: true, get: function () { return assertions_1.hasNoToxicityAsync; } });
|
|
35
43
|
Object.defineProperty(exports, "hasPII", { enumerable: true, get: function () { return assertions_1.hasPII; } });
|
|
36
44
|
Object.defineProperty(exports, "hasReadabilityScore", { enumerable: true, get: function () { return assertions_1.hasReadabilityScore; } });
|
|
37
45
|
Object.defineProperty(exports, "hasSentiment", { enumerable: true, get: function () { return assertions_1.hasSentiment; } });
|
|
46
|
+
Object.defineProperty(exports, "hasSentimentAsync", { enumerable: true, get: function () { return assertions_1.hasSentimentAsync; } });
|
|
38
47
|
Object.defineProperty(exports, "hasValidCodeSyntax", { enumerable: true, get: function () { return assertions_1.hasValidCodeSyntax; } });
|
|
48
|
+
Object.defineProperty(exports, "hasValidCodeSyntaxAsync", { enumerable: true, get: function () { return assertions_1.hasValidCodeSyntaxAsync; } });
|
|
39
49
|
Object.defineProperty(exports, "isValidEmail", { enumerable: true, get: function () { return assertions_1.isValidEmail; } });
|
|
40
50
|
Object.defineProperty(exports, "isValidURL", { enumerable: true, get: function () { return assertions_1.isValidURL; } });
|
|
41
51
|
Object.defineProperty(exports, "matchesPattern", { enumerable: true, get: function () { return assertions_1.matchesPattern; } });
|
|
@@ -81,8 +91,8 @@ Object.defineProperty(exports, "createTestSuite", { enumerable: true, get: funct
|
|
|
81
91
|
Object.defineProperty(exports, "TestSuite", { enumerable: true, get: function () { return testing_1.TestSuite; } });
|
|
82
92
|
// Snapshot testing (Tier 2.8)
|
|
83
93
|
const snapshot_1 = require("./snapshot");
|
|
94
|
+
Object.defineProperty(exports, "compareSnapshots", { enumerable: true, get: function () { return snapshot_1.compareSnapshots; } });
|
|
84
95
|
Object.defineProperty(exports, "compareWithSnapshot", { enumerable: true, get: function () { return snapshot_1.compareWithSnapshot; } });
|
|
85
|
-
Object.defineProperty(exports, "compareSnapshots", { enumerable: true, get: function () { return snapshot_1.compareWithSnapshot; } });
|
|
86
96
|
Object.defineProperty(exports, "snapshot", { enumerable: true, get: function () { return snapshot_1.snapshot; } });
|
|
87
97
|
Object.defineProperty(exports, "saveSnapshot", { enumerable: true, get: function () { return snapshot_1.snapshot; } });
|
|
88
98
|
// Export/Import utilities (Tier 4.18)
|
|
@@ -120,6 +130,7 @@ var matchers_1 = require("./matchers");
|
|
|
120
130
|
Object.defineProperty(exports, "extendExpectWithToPassGate", { enumerable: true, get: function () { return matchers_1.extendExpectWithToPassGate; } });
|
|
121
131
|
var pagination_1 = require("./pagination");
|
|
122
132
|
Object.defineProperty(exports, "autoPaginate", { enumerable: true, get: function () { return pagination_1.autoPaginate; } });
|
|
133
|
+
Object.defineProperty(exports, "autoPaginateGenerator", { enumerable: true, get: function () { return pagination_1.autoPaginateGenerator; } });
|
|
123
134
|
Object.defineProperty(exports, "createPaginatedIterator", { enumerable: true, get: function () { return pagination_1.createPaginatedIterator; } });
|
|
124
135
|
Object.defineProperty(exports, "decodeCursor", { enumerable: true, get: function () { return pagination_1.decodeCursor; } });
|
|
125
136
|
Object.defineProperty(exports, "encodeCursor", { enumerable: true, get: function () { return pagination_1.encodeCursor; } });
|
|
@@ -67,7 +67,7 @@ function traceAnthropic(anthropic, evalClient, options = {}) {
|
|
|
67
67
|
}
|
|
68
68
|
: {}),
|
|
69
69
|
});
|
|
70
|
-
await evalClient.traces
|
|
70
|
+
await evalClient.traces?.create({
|
|
71
71
|
name: `Anthropic: ${params.model}`,
|
|
72
72
|
traceId,
|
|
73
73
|
organizationId: organizationId || evalClient.getOrganizationId(),
|
|
@@ -89,7 +89,7 @@ function traceAnthropic(anthropic, evalClient, options = {}) {
|
|
|
89
89
|
error: error instanceof Error ? error.message : String(error),
|
|
90
90
|
});
|
|
91
91
|
await evalClient.traces
|
|
92
|
-
|
|
92
|
+
?.create({
|
|
93
93
|
name: `Anthropic: ${params.model}`,
|
|
94
94
|
traceId,
|
|
95
95
|
organizationId: organizationId || evalClient.getOrganizationId(),
|
|
@@ -97,7 +97,7 @@ function traceAnthropic(anthropic, evalClient, options = {}) {
|
|
|
97
97
|
durationMs,
|
|
98
98
|
metadata: errorMetadata,
|
|
99
99
|
})
|
|
100
|
-
|
|
100
|
+
?.catch(() => {
|
|
101
101
|
// Ignore errors in trace creation to avoid masking the original error
|
|
102
102
|
});
|
|
103
103
|
throw error;
|
|
@@ -127,7 +127,7 @@ async function traceAnthropicCall(evalClient, name, fn, options = {}) {
|
|
|
127
127
|
const startTime = Date.now();
|
|
128
128
|
const traceId = `anthropic-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
|
|
129
129
|
try {
|
|
130
|
-
await evalClient.traces
|
|
130
|
+
await evalClient.traces?.create({
|
|
131
131
|
name,
|
|
132
132
|
traceId,
|
|
133
133
|
organizationId: options.organizationId || evalClient.getOrganizationId(),
|
|
@@ -136,7 +136,7 @@ async function traceAnthropicCall(evalClient, name, fn, options = {}) {
|
|
|
136
136
|
});
|
|
137
137
|
const result = await fn();
|
|
138
138
|
const durationMs = Date.now() - startTime;
|
|
139
|
-
await evalClient.traces
|
|
139
|
+
await evalClient.traces?.create({
|
|
140
140
|
name,
|
|
141
141
|
traceId,
|
|
142
142
|
organizationId: options.organizationId || evalClient.getOrganizationId(),
|
|
@@ -148,7 +148,7 @@ async function traceAnthropicCall(evalClient, name, fn, options = {}) {
|
|
|
148
148
|
}
|
|
149
149
|
catch (error) {
|
|
150
150
|
const durationMs = Date.now() - startTime;
|
|
151
|
-
await evalClient.traces
|
|
151
|
+
await evalClient.traces?.create({
|
|
152
152
|
name,
|
|
153
153
|
traceId,
|
|
154
154
|
organizationId: options.organizationId || evalClient.getOrganizationId(),
|
|
@@ -65,7 +65,7 @@ function traceOpenAI(openai, evalClient, options = {}) {
|
|
|
65
65
|
}
|
|
66
66
|
: {}),
|
|
67
67
|
});
|
|
68
|
-
await evalClient.traces
|
|
68
|
+
await evalClient.traces?.create({
|
|
69
69
|
name: `OpenAI: ${params.model}`,
|
|
70
70
|
traceId,
|
|
71
71
|
organizationId: organizationId || evalClient.getOrganizationId(),
|
|
@@ -87,7 +87,7 @@ function traceOpenAI(openai, evalClient, options = {}) {
|
|
|
87
87
|
error: error instanceof Error ? error.message : String(error),
|
|
88
88
|
});
|
|
89
89
|
await evalClient.traces
|
|
90
|
-
|
|
90
|
+
?.create({
|
|
91
91
|
name: `OpenAI: ${params.model}`,
|
|
92
92
|
traceId,
|
|
93
93
|
organizationId: organizationId || evalClient.getOrganizationId(),
|
|
@@ -95,7 +95,7 @@ function traceOpenAI(openai, evalClient, options = {}) {
|
|
|
95
95
|
durationMs,
|
|
96
96
|
metadata: errorMetadata,
|
|
97
97
|
})
|
|
98
|
-
|
|
98
|
+
?.catch(() => {
|
|
99
99
|
// Ignore errors in trace creation to avoid masking the original error
|
|
100
100
|
});
|
|
101
101
|
throw error;
|
|
@@ -124,7 +124,7 @@ async function traceOpenAICall(evalClient, name, fn, options = {}) {
|
|
|
124
124
|
const startTime = Date.now();
|
|
125
125
|
const traceId = `openai-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
|
|
126
126
|
try {
|
|
127
|
-
await evalClient.traces
|
|
127
|
+
await evalClient.traces?.create({
|
|
128
128
|
name,
|
|
129
129
|
traceId,
|
|
130
130
|
organizationId: options.organizationId || evalClient.getOrganizationId(),
|
|
@@ -133,7 +133,7 @@ async function traceOpenAICall(evalClient, name, fn, options = {}) {
|
|
|
133
133
|
});
|
|
134
134
|
const result = await fn();
|
|
135
135
|
const durationMs = Date.now() - startTime;
|
|
136
|
-
await evalClient.traces
|
|
136
|
+
await evalClient.traces?.create({
|
|
137
137
|
name,
|
|
138
138
|
traceId,
|
|
139
139
|
organizationId: options.organizationId || evalClient.getOrganizationId(),
|
|
@@ -145,7 +145,7 @@ async function traceOpenAICall(evalClient, name, fn, options = {}) {
|
|
|
145
145
|
}
|
|
146
146
|
catch (error) {
|
|
147
147
|
const durationMs = Date.now() - startTime;
|
|
148
|
-
await evalClient.traces
|
|
148
|
+
await evalClient.traces?.create({
|
|
149
149
|
name,
|
|
150
150
|
traceId,
|
|
151
151
|
organizationId: options.organizationId || evalClient.getOrganizationId(),
|
package/dist/pagination.d.ts
CHANGED
|
@@ -50,9 +50,20 @@ export declare function createPaginatedIterator<T>(fetchFn: (offset: number, lim
|
|
|
50
50
|
hasMore: boolean;
|
|
51
51
|
}>, limit?: number): PaginatedIterator<T>;
|
|
52
52
|
/**
|
|
53
|
-
* Auto-paginate helper that fetches all pages
|
|
53
|
+
* Auto-paginate helper that fetches all pages and returns a flat array.
|
|
54
|
+
* @example
|
|
55
|
+
* ```typescript
|
|
56
|
+
* const allItems = await autoPaginate(
|
|
57
|
+
* (offset, limit) => client.traces.list({ offset, limit }),
|
|
58
|
+
* );
|
|
59
|
+
* ```
|
|
54
60
|
*/
|
|
55
|
-
export declare function autoPaginate<T>(fetchFn: (offset: number, limit: number) => Promise<T[]>, limit?: number):
|
|
61
|
+
export declare function autoPaginate<T>(fetchFn: (offset: number, limit: number) => Promise<T[]>, limit?: number): Promise<T[]>;
|
|
62
|
+
/**
|
|
63
|
+
* Streaming auto-paginate generator — yields individual items one at a time.
|
|
64
|
+
* Use this when you want to process items as they arrive rather than waiting for all pages.
|
|
65
|
+
*/
|
|
66
|
+
export declare function autoPaginateGenerator<T>(fetchFn: (offset: number, limit: number) => Promise<T[]>, limit?: number): AsyncGenerator<T, void, unknown>;
|
|
56
67
|
/**
|
|
57
68
|
* Encode cursor for pagination (base64)
|
|
58
69
|
*/
|
package/dist/pagination.js
CHANGED
|
@@ -6,6 +6,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
6
6
|
exports.PaginatedIterator = void 0;
|
|
7
7
|
exports.createPaginatedIterator = createPaginatedIterator;
|
|
8
8
|
exports.autoPaginate = autoPaginate;
|
|
9
|
+
exports.autoPaginateGenerator = autoPaginateGenerator;
|
|
9
10
|
exports.encodeCursor = encodeCursor;
|
|
10
11
|
exports.decodeCursor = decodeCursor;
|
|
11
12
|
exports.createPaginationMeta = createPaginationMeta;
|
|
@@ -56,9 +57,34 @@ function createPaginatedIterator(fetchFn, limit = 50) {
|
|
|
56
57
|
return new PaginatedIterator(fetchFn, limit);
|
|
57
58
|
}
|
|
58
59
|
/**
|
|
59
|
-
* Auto-paginate helper that fetches all pages
|
|
60
|
+
* Auto-paginate helper that fetches all pages and returns a flat array.
|
|
61
|
+
* @example
|
|
62
|
+
* ```typescript
|
|
63
|
+
* const allItems = await autoPaginate(
|
|
64
|
+
* (offset, limit) => client.traces.list({ offset, limit }),
|
|
65
|
+
* );
|
|
66
|
+
* ```
|
|
60
67
|
*/
|
|
61
|
-
async function
|
|
68
|
+
async function autoPaginate(fetchFn, limit = 50) {
|
|
69
|
+
const result = [];
|
|
70
|
+
let offset = 0;
|
|
71
|
+
let hasMore = true;
|
|
72
|
+
while (hasMore) {
|
|
73
|
+
const items = await fetchFn(offset, limit);
|
|
74
|
+
if (items.length === 0) {
|
|
75
|
+
break;
|
|
76
|
+
}
|
|
77
|
+
result.push(...items);
|
|
78
|
+
hasMore = items.length === limit;
|
|
79
|
+
offset += limit;
|
|
80
|
+
}
|
|
81
|
+
return result;
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Streaming auto-paginate generator — yields individual items one at a time.
|
|
85
|
+
* Use this when you want to process items as they arrive rather than waiting for all pages.
|
|
86
|
+
*/
|
|
87
|
+
async function* autoPaginateGenerator(fetchFn, limit = 50) {
|
|
62
88
|
let offset = 0;
|
|
63
89
|
let hasMore = true;
|
|
64
90
|
while (hasMore) {
|
|
@@ -208,12 +208,7 @@ function generateDefineEvalCode(suite, options = {}) {
|
|
|
208
208
|
});
|
|
209
209
|
const helperFunctions = generateHelperFunctionsForSuite(specs, options);
|
|
210
210
|
const evaluationFunction = generateEvaluationFunction();
|
|
211
|
-
return [
|
|
212
|
-
...imports,
|
|
213
|
-
...helperFunctions,
|
|
214
|
-
...evaluationFunction,
|
|
215
|
-
...specCode,
|
|
216
|
-
].join("\n");
|
|
211
|
+
return [...imports, helperFunctions, evaluationFunction, ...specCode].join("\n");
|
|
217
212
|
}
|
|
218
213
|
/**
|
|
219
214
|
* Generate helper functions for a specific spec
|
|
@@ -10,7 +10,8 @@ import type { LocalExecutor } from "./types";
|
|
|
10
10
|
*/
|
|
11
11
|
export declare function createLocalExecutor(): LocalExecutor;
|
|
12
12
|
/**
|
|
13
|
-
* Default local executor
|
|
13
|
+
* Default local executor factory
|
|
14
|
+
* Call as defaultLocalExecutor() to get a new executor instance.
|
|
14
15
|
* For convenience in simple use cases
|
|
15
16
|
*/
|
|
16
|
-
export declare const defaultLocalExecutor:
|
|
17
|
+
export declare const defaultLocalExecutor: typeof createLocalExecutor;
|
package/dist/runtime/executor.js
CHANGED
|
@@ -146,7 +146,8 @@ function createLocalExecutor() {
|
|
|
146
146
|
return new LocalExecutorImpl();
|
|
147
147
|
}
|
|
148
148
|
/**
|
|
149
|
-
* Default local executor
|
|
149
|
+
* Default local executor factory
|
|
150
|
+
* Call as defaultLocalExecutor() to get a new executor instance.
|
|
150
151
|
* For convenience in simple use cases
|
|
151
152
|
*/
|
|
152
|
-
exports.defaultLocalExecutor = createLocalExecutor
|
|
153
|
+
exports.defaultLocalExecutor = createLocalExecutor;
|
|
@@ -61,7 +61,10 @@ export interface SerializedSpec {
|
|
|
61
61
|
* Create a new scoped runtime with lifecycle management
|
|
62
62
|
* Returns a handle for proper resource management
|
|
63
63
|
*/
|
|
64
|
-
export declare function createEvalRuntime(
|
|
64
|
+
export declare function createEvalRuntime(projectRootOrConfig?: string | {
|
|
65
|
+
name?: string;
|
|
66
|
+
projectRoot?: string;
|
|
67
|
+
}): RuntimeHandle;
|
|
65
68
|
/**
|
|
66
69
|
* Helper function for safe runtime execution with automatic cleanup
|
|
67
70
|
* Ensures runtime is disposed even if an exception is thrown
|
package/dist/runtime/registry.js
CHANGED
|
@@ -315,7 +315,10 @@ class EvalRuntimeImpl {
|
|
|
315
315
|
* Create a new scoped runtime with lifecycle management
|
|
316
316
|
* Returns a handle for proper resource management
|
|
317
317
|
*/
|
|
318
|
-
function createEvalRuntime(
|
|
318
|
+
function createEvalRuntime(projectRootOrConfig = process.cwd()) {
|
|
319
|
+
const projectRoot = typeof projectRootOrConfig === "string"
|
|
320
|
+
? projectRootOrConfig
|
|
321
|
+
: (projectRootOrConfig.projectRoot ?? process.cwd());
|
|
319
322
|
const runtime = new EvalRuntimeImpl(projectRoot);
|
|
320
323
|
// Create bound defineEval function
|
|
321
324
|
const boundDefineEval = ((nameOrConfig, executor, options) => {
|
package/dist/snapshot.d.ts
CHANGED
|
@@ -99,7 +99,7 @@ export declare class SnapshotManager {
|
|
|
99
99
|
* }
|
|
100
100
|
* ```
|
|
101
101
|
*/
|
|
102
|
-
compare(name: string, currentOutput:
|
|
102
|
+
compare(name: string, currentOutput: unknown): Promise<SnapshotComparison>;
|
|
103
103
|
/**
|
|
104
104
|
* List all snapshots
|
|
105
105
|
*
|
|
@@ -165,7 +165,19 @@ export declare function loadSnapshot(name: string, dir?: string): Promise<Snapsh
|
|
|
165
165
|
* }
|
|
166
166
|
* ```
|
|
167
167
|
*/
|
|
168
|
-
export declare function compareWithSnapshot(name: string, currentOutput:
|
|
168
|
+
export declare function compareWithSnapshot(name: string, currentOutput: unknown, dir?: string): Promise<SnapshotComparison>;
|
|
169
|
+
/**
|
|
170
|
+
* Compare two saved snapshots by name (convenience function)
|
|
171
|
+
*
|
|
172
|
+
* @example
|
|
173
|
+
* ```typescript
|
|
174
|
+
* const comparison = await compareSnapshots('baseline', 'current');
|
|
175
|
+
* if (!comparison.matches) {
|
|
176
|
+
* console.log('Snapshots differ!', comparison.differences);
|
|
177
|
+
* }
|
|
178
|
+
* ```
|
|
179
|
+
*/
|
|
180
|
+
export declare function compareSnapshots(nameA: string, nameB: string, dir?: string): Promise<SnapshotComparison>;
|
|
169
181
|
/**
|
|
170
182
|
* Delete a snapshot (convenience function)
|
|
171
183
|
*/
|
package/dist/snapshot.js
CHANGED
|
@@ -55,6 +55,7 @@ exports.SnapshotManager = void 0;
|
|
|
55
55
|
exports.snapshot = snapshot;
|
|
56
56
|
exports.loadSnapshot = loadSnapshot;
|
|
57
57
|
exports.compareWithSnapshot = compareWithSnapshot;
|
|
58
|
+
exports.compareSnapshots = compareSnapshots;
|
|
58
59
|
exports.deleteSnapshot = deleteSnapshot;
|
|
59
60
|
exports.listSnapshots = listSnapshots;
|
|
60
61
|
// Environment check
|
|
@@ -130,7 +131,13 @@ class SnapshotManager {
|
|
|
130
131
|
if (!options?.overwrite && fs.existsSync(filePath)) {
|
|
131
132
|
throw new Error(`Snapshot '${name}' already exists. Use overwrite: true to update.`);
|
|
132
133
|
}
|
|
133
|
-
const serialized =
|
|
134
|
+
const serialized = output === undefined
|
|
135
|
+
? "undefined"
|
|
136
|
+
: output === null
|
|
137
|
+
? "null"
|
|
138
|
+
: typeof output === "string"
|
|
139
|
+
? output
|
|
140
|
+
: JSON.stringify(output);
|
|
134
141
|
const snapshotData = {
|
|
135
142
|
output: serialized,
|
|
136
143
|
metadata: {
|
|
@@ -175,11 +182,14 @@ class SnapshotManager {
|
|
|
175
182
|
async compare(name, currentOutput) {
|
|
176
183
|
const snapshot = await this.load(name);
|
|
177
184
|
const original = snapshot.output;
|
|
185
|
+
const currentOutputStr = typeof currentOutput === "string"
|
|
186
|
+
? currentOutput
|
|
187
|
+
: JSON.stringify(currentOutput);
|
|
178
188
|
// Exact match check
|
|
179
|
-
const exactMatch = original ===
|
|
189
|
+
const exactMatch = original === currentOutputStr;
|
|
180
190
|
// Calculate similarity (simple line-based diff)
|
|
181
191
|
const originalLines = original.split("\n");
|
|
182
|
-
const currentLines =
|
|
192
|
+
const currentLines = currentOutputStr.split("\n");
|
|
183
193
|
const differences = [];
|
|
184
194
|
const maxLines = Math.max(originalLines.length, currentLines.length);
|
|
185
195
|
let matchingLines = 0;
|
|
@@ -199,7 +209,7 @@ class SnapshotManager {
|
|
|
199
209
|
similarity,
|
|
200
210
|
differences,
|
|
201
211
|
original,
|
|
202
|
-
current:
|
|
212
|
+
current: currentOutputStr,
|
|
203
213
|
};
|
|
204
214
|
}
|
|
205
215
|
/**
|
|
@@ -307,6 +317,22 @@ async function compareWithSnapshot(name, currentOutput, dir) {
|
|
|
307
317
|
const manager = getSnapshotManager(dir);
|
|
308
318
|
return manager.compare(name, currentOutput);
|
|
309
319
|
}
|
|
320
|
+
/**
|
|
321
|
+
* Compare two saved snapshots by name (convenience function)
|
|
322
|
+
*
|
|
323
|
+
* @example
|
|
324
|
+
* ```typescript
|
|
325
|
+
* const comparison = await compareSnapshots('baseline', 'current');
|
|
326
|
+
* if (!comparison.matches) {
|
|
327
|
+
* console.log('Snapshots differ!', comparison.differences);
|
|
328
|
+
* }
|
|
329
|
+
* ```
|
|
330
|
+
*/
|
|
331
|
+
async function compareSnapshots(nameA, nameB, dir) {
|
|
332
|
+
const manager = getSnapshotManager(dir);
|
|
333
|
+
const snapshotB = await manager.load(nameB);
|
|
334
|
+
return manager.compare(nameA, snapshotB.output);
|
|
335
|
+
}
|
|
310
336
|
/**
|
|
311
337
|
* Delete a snapshot (convenience function)
|
|
312
338
|
*/
|
package/dist/types.d.ts
CHANGED
|
@@ -38,8 +38,13 @@ export interface ClientConfig {
|
|
|
38
38
|
keepAlive?: boolean;
|
|
39
39
|
}
|
|
40
40
|
/**
|
|
41
|
-
* Evaluation template
|
|
42
|
-
*
|
|
41
|
+
* Evaluation template identifier constants for use with the EvalAI platform API.
|
|
42
|
+
*
|
|
43
|
+
* These are **string identifiers** (e.g. `"unit-testing"`) that reference
|
|
44
|
+
* pre-built templates on the platform — not template definition objects.
|
|
45
|
+
* Pass these values to `evaluations.create({ templateId: EvaluationTemplates.UNIT_TESTING })`
|
|
46
|
+
* to spin up a pre-configured evaluation. For custom criteria, thresholds, and
|
|
47
|
+
* test cases, build your own evaluation config instead.
|
|
43
48
|
*/
|
|
44
49
|
export declare const EvaluationTemplates: {
|
|
45
50
|
readonly UNIT_TESTING: "unit-testing";
|
package/dist/types.js
CHANGED
|
@@ -2,8 +2,13 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.SDKError = exports.EvaluationTemplates = void 0;
|
|
4
4
|
/**
|
|
5
|
-
* Evaluation template
|
|
6
|
-
*
|
|
5
|
+
* Evaluation template identifier constants for use with the EvalAI platform API.
|
|
6
|
+
*
|
|
7
|
+
* These are **string identifiers** (e.g. `"unit-testing"`) that reference
|
|
8
|
+
* pre-built templates on the platform — not template definition objects.
|
|
9
|
+
* Pass these values to `evaluations.create({ templateId: EvaluationTemplates.UNIT_TESTING })`
|
|
10
|
+
* to spin up a pre-configured evaluation. For custom criteria, thresholds, and
|
|
11
|
+
* test cases, build your own evaluation config instead.
|
|
7
12
|
*/
|
|
8
13
|
exports.EvaluationTemplates = {
|
|
9
14
|
// Core Testing
|
package/dist/version.d.ts
CHANGED
|
@@ -3,5 +3,5 @@
|
|
|
3
3
|
* X-EvalGate-SDK-Version: SDK package version
|
|
4
4
|
* X-EvalGate-Spec-Version: OpenAPI spec version (docs/openapi.json info.version)
|
|
5
5
|
*/
|
|
6
|
-
export declare const SDK_VERSION = "2.2.
|
|
7
|
-
export declare const SPEC_VERSION = "2.2.
|
|
6
|
+
export declare const SDK_VERSION = "2.2.3";
|
|
7
|
+
export declare const SPEC_VERSION = "2.2.3";
|
package/dist/version.js
CHANGED
|
@@ -6,5 +6,5 @@ exports.SPEC_VERSION = exports.SDK_VERSION = void 0;
|
|
|
6
6
|
* X-EvalGate-SDK-Version: SDK package version
|
|
7
7
|
* X-EvalGate-Spec-Version: OpenAPI spec version (docs/openapi.json info.version)
|
|
8
8
|
*/
|
|
9
|
-
exports.SDK_VERSION = "2.2.
|
|
10
|
-
exports.SPEC_VERSION = "2.2.
|
|
9
|
+
exports.SDK_VERSION = "2.2.3";
|
|
10
|
+
exports.SPEC_VERSION = "2.2.3";
|
package/dist/workflows.js
CHANGED
|
@@ -64,8 +64,13 @@ class WorkflowTracer {
|
|
|
64
64
|
this.costs = [];
|
|
65
65
|
this.spanCounter = 0;
|
|
66
66
|
this.client = client;
|
|
67
|
+
const resolvedOrgId = options.organizationId ??
|
|
68
|
+
(typeof client?.getOrganizationId === "function"
|
|
69
|
+
? client.getOrganizationId()
|
|
70
|
+
: undefined) ??
|
|
71
|
+
0;
|
|
67
72
|
this.options = {
|
|
68
|
-
organizationId:
|
|
73
|
+
organizationId: resolvedOrgId,
|
|
69
74
|
autoCalculateCost: options.autoCalculateCost ?? true,
|
|
70
75
|
tracePrefix: options.tracePrefix || "workflow",
|
|
71
76
|
captureFullPayloads: options.captureFullPayloads ?? true,
|