@evalgate/sdk 2.2.2 → 2.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/README.md +40 -1
- package/dist/assertions.d.ts +194 -10
- package/dist/assertions.js +525 -73
- package/dist/batch.js +4 -4
- package/dist/cache.d.ts +5 -1
- package/dist/cache.js +5 -1
- package/dist/cli/baseline.d.ts +14 -0
- package/dist/cli/baseline.js +43 -3
- package/dist/cli/check.d.ts +5 -2
- package/dist/cli/check.js +20 -12
- package/dist/cli/compare.d.ts +80 -0
- package/dist/cli/compare.js +266 -0
- package/dist/cli/index.js +244 -101
- package/dist/cli/regression-gate.js +23 -0
- package/dist/cli/run.js +22 -0
- package/dist/cli/start.d.ts +26 -0
- package/dist/cli/start.js +130 -0
- package/dist/cli/templates.d.ts +24 -0
- package/dist/cli/templates.js +314 -0
- package/dist/cli/traces.d.ts +109 -0
- package/dist/cli/traces.js +152 -0
- package/dist/cli/upgrade.js +5 -0
- package/dist/cli/validate.d.ts +37 -0
- package/dist/cli/validate.js +252 -0
- package/dist/cli/watch.d.ts +19 -0
- package/dist/cli/watch.js +175 -0
- package/dist/client.js +6 -13
- package/dist/constants.d.ts +2 -0
- package/dist/constants.js +5 -0
- package/dist/errors.js +7 -0
- package/dist/export.js +2 -2
- package/dist/index.d.ts +10 -9
- package/dist/index.js +24 -7
- package/dist/integrations/anthropic.js +6 -6
- package/dist/integrations/openai.js +84 -61
- package/dist/logger.d.ts +3 -1
- package/dist/logger.js +2 -1
- package/dist/otel.d.ts +130 -0
- package/dist/otel.js +309 -0
- package/dist/pagination.d.ts +13 -2
- package/dist/pagination.js +28 -2
- package/dist/runtime/adapters/testsuite-to-dsl.js +1 -6
- package/dist/runtime/eval.d.ts +14 -4
- package/dist/runtime/eval.js +127 -2
- package/dist/runtime/executor.d.ts +3 -2
- package/dist/runtime/executor.js +3 -2
- package/dist/runtime/registry.d.ts +8 -3
- package/dist/runtime/registry.js +15 -4
- package/dist/runtime/run-report.d.ts +1 -1
- package/dist/runtime/run-report.js +7 -4
- package/dist/runtime/types.d.ts +38 -0
- package/dist/snapshot.d.ts +12 -0
- package/dist/snapshot.js +24 -1
- package/dist/testing.d.ts +8 -0
- package/dist/testing.js +45 -10
- package/dist/version.d.ts +2 -2
- package/dist/version.js +2 -2
- package/dist/workflows.d.ts +2 -0
- package/dist/workflows.js +184 -102
- package/package.json +8 -1
package/dist/index.js
CHANGED
|
@@ -8,8 +8,9 @@
|
|
|
8
8
|
* @packageDocumentation
|
|
9
9
|
*/
|
|
10
10
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
11
|
-
exports.
|
|
12
|
-
exports.
|
|
11
|
+
exports.validateContext = exports.mergeContexts = exports.cloneContext = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.SDKError = exports.withinRange = exports.toSemanticallyContain = exports.similarTo = exports.respondedWithinTimeSince = exports.respondedWithinTime = exports.respondedWithinDuration = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntaxAsync = exports.hasValidCodeSyntax = exports.hasSentimentWithScore = exports.hasSentimentAsync = exports.hasSentiment = exports.hasReadabilityScore = exports.hasPII = exports.hasNoToxicityAsync = exports.hasNoToxicity = exports.hasNoHallucinationsAsync = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracyAsync = exports.hasFactualAccuracy = exports.hasConsistencyAsync = exports.hasConsistency = exports.getAssertionConfig = exports.followsInstructions = exports.expect = exports.containsLanguageAsync = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.configureAssertions = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalGateError = exports.AIEvalClient = void 0;
|
|
12
|
+
exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginateGenerator = exports.autoPaginate = exports.OTelExporter = exports.createOTelExporter = exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.saveSnapshot = exports.compareSnapshots = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = exports.createTestSuite = exports.SpecRegistrationError = exports.SpecExecutionError = exports.RuntimeError = exports.EvalRuntimeError = exports.setActiveRuntime = exports.getActiveRuntime = exports.disposeActiveRuntime = exports.createEvalRuntime = exports.defaultLocalExecutor = exports.createLocalExecutor = exports.getFilteredSpecs = exports.evalai = exports.defineSuite = exports.defineEval = exports.createResult = exports.createEvalContext = void 0;
|
|
13
|
+
exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = void 0;
|
|
13
14
|
// Main SDK exports
|
|
14
15
|
var client_1 = require("./client");
|
|
15
16
|
Object.defineProperty(exports, "AIEvalClient", { enumerable: true, get: function () { return client_1.AIEvalClient; } });
|
|
@@ -19,7 +20,7 @@ Object.defineProperty(exports, "AuthenticationError", { enumerable: true, get: f
|
|
|
19
20
|
Object.defineProperty(exports, "EvalGateError", { enumerable: true, get: function () { return errors_1.EvalGateError; } });
|
|
20
21
|
Object.defineProperty(exports, "NetworkError", { enumerable: true, get: function () { return errors_1.NetworkError; } });
|
|
21
22
|
Object.defineProperty(exports, "RateLimitError", { enumerable: true, get: function () { return errors_1.RateLimitError; } });
|
|
22
|
-
Object.defineProperty(exports, "ValidationError", { enumerable: true, get: function () { return errors_1.
|
|
23
|
+
Object.defineProperty(exports, "ValidationError", { enumerable: true, get: function () { return errors_1.ValidationError; } });
|
|
23
24
|
// Enhanced assertions (Tier 1.3)
|
|
24
25
|
var assertions_1 = require("./assertions");
|
|
25
26
|
// LLM config
|
|
@@ -33,6 +34,8 @@ Object.defineProperty(exports, "containsLanguageAsync", { enumerable: true, get:
|
|
|
33
34
|
Object.defineProperty(exports, "expect", { enumerable: true, get: function () { return assertions_1.expect; } });
|
|
34
35
|
Object.defineProperty(exports, "followsInstructions", { enumerable: true, get: function () { return assertions_1.followsInstructions; } });
|
|
35
36
|
Object.defineProperty(exports, "getAssertionConfig", { enumerable: true, get: function () { return assertions_1.getAssertionConfig; } });
|
|
37
|
+
Object.defineProperty(exports, "hasConsistency", { enumerable: true, get: function () { return assertions_1.hasConsistency; } });
|
|
38
|
+
Object.defineProperty(exports, "hasConsistencyAsync", { enumerable: true, get: function () { return assertions_1.hasConsistencyAsync; } });
|
|
36
39
|
Object.defineProperty(exports, "hasFactualAccuracy", { enumerable: true, get: function () { return assertions_1.hasFactualAccuracy; } });
|
|
37
40
|
Object.defineProperty(exports, "hasFactualAccuracyAsync", { enumerable: true, get: function () { return assertions_1.hasFactualAccuracyAsync; } });
|
|
38
41
|
Object.defineProperty(exports, "hasLength", { enumerable: true, get: function () { return assertions_1.hasLength; } });
|
|
@@ -44,6 +47,7 @@ Object.defineProperty(exports, "hasPII", { enumerable: true, get: function () {
|
|
|
44
47
|
Object.defineProperty(exports, "hasReadabilityScore", { enumerable: true, get: function () { return assertions_1.hasReadabilityScore; } });
|
|
45
48
|
Object.defineProperty(exports, "hasSentiment", { enumerable: true, get: function () { return assertions_1.hasSentiment; } });
|
|
46
49
|
Object.defineProperty(exports, "hasSentimentAsync", { enumerable: true, get: function () { return assertions_1.hasSentimentAsync; } });
|
|
50
|
+
Object.defineProperty(exports, "hasSentimentWithScore", { enumerable: true, get: function () { return assertions_1.hasSentimentWithScore; } });
|
|
47
51
|
Object.defineProperty(exports, "hasValidCodeSyntax", { enumerable: true, get: function () { return assertions_1.hasValidCodeSyntax; } });
|
|
48
52
|
Object.defineProperty(exports, "hasValidCodeSyntaxAsync", { enumerable: true, get: function () { return assertions_1.hasValidCodeSyntaxAsync; } });
|
|
49
53
|
Object.defineProperty(exports, "isValidEmail", { enumerable: true, get: function () { return assertions_1.isValidEmail; } });
|
|
@@ -51,9 +55,15 @@ Object.defineProperty(exports, "isValidURL", { enumerable: true, get: function (
|
|
|
51
55
|
Object.defineProperty(exports, "matchesPattern", { enumerable: true, get: function () { return assertions_1.matchesPattern; } });
|
|
52
56
|
Object.defineProperty(exports, "matchesSchema", { enumerable: true, get: function () { return assertions_1.matchesSchema; } });
|
|
53
57
|
Object.defineProperty(exports, "notContainsPII", { enumerable: true, get: function () { return assertions_1.notContainsPII; } });
|
|
58
|
+
Object.defineProperty(exports, "respondedWithinDuration", { enumerable: true, get: function () { return assertions_1.respondedWithinDuration; } });
|
|
54
59
|
Object.defineProperty(exports, "respondedWithinTime", { enumerable: true, get: function () { return assertions_1.respondedWithinTime; } });
|
|
60
|
+
Object.defineProperty(exports, "respondedWithinTimeSince", { enumerable: true, get: function () { return assertions_1.respondedWithinTimeSince; } });
|
|
55
61
|
Object.defineProperty(exports, "similarTo", { enumerable: true, get: function () { return assertions_1.similarTo; } });
|
|
62
|
+
Object.defineProperty(exports, "toSemanticallyContain", { enumerable: true, get: function () { return assertions_1.toSemanticallyContain; } });
|
|
56
63
|
Object.defineProperty(exports, "withinRange", { enumerable: true, get: function () { return assertions_1.withinRange; } });
|
|
64
|
+
// Legacy backward compat — SDKError is the old name for EvalGateError
|
|
65
|
+
var errors_2 = require("./errors");
|
|
66
|
+
Object.defineProperty(exports, "SDKError", { enumerable: true, get: function () { return errors_2.EvalGateError; } });
|
|
57
67
|
// Context propagation (Tier 2.9)
|
|
58
68
|
const context_1 = require("./context");
|
|
59
69
|
Object.defineProperty(exports, "createContext", { enumerable: true, get: function () { return context_1.createContext; } });
|
|
@@ -71,6 +81,7 @@ Object.defineProperty(exports, "createResult", { enumerable: true, get: function
|
|
|
71
81
|
Object.defineProperty(exports, "defineEval", { enumerable: true, get: function () { return eval_1.defineEval; } });
|
|
72
82
|
Object.defineProperty(exports, "defineSuite", { enumerable: true, get: function () { return eval_1.defineSuite; } });
|
|
73
83
|
Object.defineProperty(exports, "evalai", { enumerable: true, get: function () { return eval_1.evalai; } });
|
|
84
|
+
Object.defineProperty(exports, "getFilteredSpecs", { enumerable: true, get: function () { return eval_1.getFilteredSpecs; } });
|
|
74
85
|
var executor_1 = require("./runtime/executor");
|
|
75
86
|
Object.defineProperty(exports, "createLocalExecutor", { enumerable: true, get: function () { return executor_1.createLocalExecutor; } });
|
|
76
87
|
Object.defineProperty(exports, "defaultLocalExecutor", { enumerable: true, get: function () { return executor_1.defaultLocalExecutor; } });
|
|
@@ -91,8 +102,8 @@ Object.defineProperty(exports, "createTestSuite", { enumerable: true, get: funct
|
|
|
91
102
|
Object.defineProperty(exports, "TestSuite", { enumerable: true, get: function () { return testing_1.TestSuite; } });
|
|
92
103
|
// Snapshot testing (Tier 2.8)
|
|
93
104
|
const snapshot_1 = require("./snapshot");
|
|
105
|
+
Object.defineProperty(exports, "compareSnapshots", { enumerable: true, get: function () { return snapshot_1.compareSnapshots; } });
|
|
94
106
|
Object.defineProperty(exports, "compareWithSnapshot", { enumerable: true, get: function () { return snapshot_1.compareWithSnapshot; } });
|
|
95
|
-
Object.defineProperty(exports, "compareSnapshots", { enumerable: true, get: function () { return snapshot_1.compareWithSnapshot; } });
|
|
96
107
|
Object.defineProperty(exports, "snapshot", { enumerable: true, get: function () { return snapshot_1.snapshot; } });
|
|
97
108
|
Object.defineProperty(exports, "saveSnapshot", { enumerable: true, get: function () { return snapshot_1.snapshot; } });
|
|
98
109
|
// Export/Import utilities (Tier 4.18)
|
|
@@ -104,11 +115,12 @@ Object.defineProperty(exports, "importData", { enumerable: true, get: function (
|
|
|
104
115
|
var batch_1 = require("./batch");
|
|
105
116
|
Object.defineProperty(exports, "RequestBatcher", { enumerable: true, get: function () { return batch_1.RequestBatcher; } });
|
|
106
117
|
// Performance optimization utilities (v1.3.0)
|
|
107
|
-
// Note:
|
|
108
|
-
// Most users don't need
|
|
118
|
+
// Note: CacheTTL is for advanced users only
|
|
119
|
+
// Most users don't need this - caching is automatic
|
|
120
|
+
// RequestCache is intentionally NOT exported — it's an internal HTTP cache.
|
|
121
|
+
// Use CacheTTL to configure cache durations via client options.
|
|
109
122
|
var cache_1 = require("./cache");
|
|
110
123
|
Object.defineProperty(exports, "CacheTTL", { enumerable: true, get: function () { return cache_1.CacheTTL; } });
|
|
111
|
-
Object.defineProperty(exports, "RequestCache", { enumerable: true, get: function () { return cache_1.RequestCache; } });
|
|
112
124
|
// CLI (programmatic use)
|
|
113
125
|
var check_1 = require("./cli/check");
|
|
114
126
|
Object.defineProperty(exports, "EXIT", { enumerable: true, get: function () { return check_1.EXIT; } });
|
|
@@ -128,8 +140,13 @@ Object.defineProperty(exports, "Logger", { enumerable: true, get: function () {
|
|
|
128
140
|
// Vitest matcher: expect(await openAIChatEval(...)).toPassGate()
|
|
129
141
|
var matchers_1 = require("./matchers");
|
|
130
142
|
Object.defineProperty(exports, "extendExpectWithToPassGate", { enumerable: true, get: function () { return matchers_1.extendExpectWithToPassGate; } });
|
|
143
|
+
// OpenTelemetry export
|
|
144
|
+
var otel_1 = require("./otel");
|
|
145
|
+
Object.defineProperty(exports, "createOTelExporter", { enumerable: true, get: function () { return otel_1.createOTelExporter; } });
|
|
146
|
+
Object.defineProperty(exports, "OTelExporter", { enumerable: true, get: function () { return otel_1.OTelExporter; } });
|
|
131
147
|
var pagination_1 = require("./pagination");
|
|
132
148
|
Object.defineProperty(exports, "autoPaginate", { enumerable: true, get: function () { return pagination_1.autoPaginate; } });
|
|
149
|
+
Object.defineProperty(exports, "autoPaginateGenerator", { enumerable: true, get: function () { return pagination_1.autoPaginateGenerator; } });
|
|
133
150
|
Object.defineProperty(exports, "createPaginatedIterator", { enumerable: true, get: function () { return pagination_1.createPaginatedIterator; } });
|
|
134
151
|
Object.defineProperty(exports, "decodeCursor", { enumerable: true, get: function () { return pagination_1.decodeCursor; } });
|
|
135
152
|
Object.defineProperty(exports, "encodeCursor", { enumerable: true, get: function () { return pagination_1.encodeCursor; } });
|
|
@@ -67,7 +67,7 @@ function traceAnthropic(anthropic, evalClient, options = {}) {
|
|
|
67
67
|
}
|
|
68
68
|
: {}),
|
|
69
69
|
});
|
|
70
|
-
await evalClient.traces
|
|
70
|
+
await evalClient.traces?.create({
|
|
71
71
|
name: `Anthropic: ${params.model}`,
|
|
72
72
|
traceId,
|
|
73
73
|
organizationId: organizationId || evalClient.getOrganizationId(),
|
|
@@ -89,7 +89,7 @@ function traceAnthropic(anthropic, evalClient, options = {}) {
|
|
|
89
89
|
error: error instanceof Error ? error.message : String(error),
|
|
90
90
|
});
|
|
91
91
|
await evalClient.traces
|
|
92
|
-
|
|
92
|
+
?.create({
|
|
93
93
|
name: `Anthropic: ${params.model}`,
|
|
94
94
|
traceId,
|
|
95
95
|
organizationId: organizationId || evalClient.getOrganizationId(),
|
|
@@ -97,7 +97,7 @@ function traceAnthropic(anthropic, evalClient, options = {}) {
|
|
|
97
97
|
durationMs,
|
|
98
98
|
metadata: errorMetadata,
|
|
99
99
|
})
|
|
100
|
-
|
|
100
|
+
?.catch(() => {
|
|
101
101
|
// Ignore errors in trace creation to avoid masking the original error
|
|
102
102
|
});
|
|
103
103
|
throw error;
|
|
@@ -127,7 +127,7 @@ async function traceAnthropicCall(evalClient, name, fn, options = {}) {
|
|
|
127
127
|
const startTime = Date.now();
|
|
128
128
|
const traceId = `anthropic-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
|
|
129
129
|
try {
|
|
130
|
-
await evalClient.traces
|
|
130
|
+
await evalClient.traces?.create({
|
|
131
131
|
name,
|
|
132
132
|
traceId,
|
|
133
133
|
organizationId: options.organizationId || evalClient.getOrganizationId(),
|
|
@@ -136,7 +136,7 @@ async function traceAnthropicCall(evalClient, name, fn, options = {}) {
|
|
|
136
136
|
});
|
|
137
137
|
const result = await fn();
|
|
138
138
|
const durationMs = Date.now() - startTime;
|
|
139
|
-
await evalClient.traces
|
|
139
|
+
await evalClient.traces?.create({
|
|
140
140
|
name,
|
|
141
141
|
traceId,
|
|
142
142
|
organizationId: options.organizationId || evalClient.getOrganizationId(),
|
|
@@ -148,7 +148,7 @@ async function traceAnthropicCall(evalClient, name, fn, options = {}) {
|
|
|
148
148
|
}
|
|
149
149
|
catch (error) {
|
|
150
150
|
const durationMs = Date.now() - startTime;
|
|
151
|
-
await evalClient.traces
|
|
151
|
+
await evalClient.traces?.create({
|
|
152
152
|
name,
|
|
153
153
|
traceId,
|
|
154
154
|
organizationId: options.organizationId || evalClient.getOrganizationId(),
|
|
@@ -52,52 +52,59 @@ function traceOpenAI(openai, evalClient, options = {}) {
|
|
|
52
52
|
const response = await originalCreate(params, requestOptions);
|
|
53
53
|
const durationMs = Date.now() - startTime;
|
|
54
54
|
// Create trace with success status and complete metadata
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
? {
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
55
|
+
// Trace creation is non-fatal — never lose the OpenAI result due to tracing issues
|
|
56
|
+
try {
|
|
57
|
+
const traceMetadata = (0, context_1.mergeWithContext)({
|
|
58
|
+
model: params.model,
|
|
59
|
+
temperature: params.temperature,
|
|
60
|
+
max_tokens: params.max_tokens,
|
|
61
|
+
...(captureInput ? { input: params.messages } : {}),
|
|
62
|
+
...(captureOutput ? { output: response.choices[0]?.message } : {}),
|
|
63
|
+
...(captureMetadata
|
|
64
|
+
? {
|
|
65
|
+
usage: response.usage,
|
|
66
|
+
finish_reason: response.choices[0]?.finish_reason,
|
|
67
|
+
}
|
|
68
|
+
: {}),
|
|
69
|
+
});
|
|
70
|
+
await evalClient.traces?.create({
|
|
71
|
+
name: `OpenAI: ${params.model}`,
|
|
72
|
+
traceId,
|
|
73
|
+
organizationId: organizationId || evalClient.getOrganizationId(),
|
|
74
|
+
status: "success",
|
|
75
|
+
durationMs,
|
|
76
|
+
metadata: traceMetadata,
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
catch {
|
|
80
|
+
/* trace failure is non-fatal */
|
|
81
|
+
}
|
|
76
82
|
return response;
|
|
77
83
|
}
|
|
78
84
|
catch (error) {
|
|
79
85
|
const durationMs = Date.now() - startTime;
|
|
80
|
-
// Create trace with error status
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
.create({
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
86
|
+
// Create trace with error status — non-fatal
|
|
87
|
+
try {
|
|
88
|
+
const errorMetadata = (0, context_1.mergeWithContext)({
|
|
89
|
+
model: params.model,
|
|
90
|
+
temperature: params.temperature,
|
|
91
|
+
max_tokens: params.max_tokens,
|
|
92
|
+
...(captureInput ? { input: params.messages } : {}),
|
|
93
|
+
...(captureMetadata ? { params } : {}),
|
|
94
|
+
error: error instanceof Error ? error.message : String(error),
|
|
95
|
+
});
|
|
96
|
+
await evalClient.traces?.create({
|
|
97
|
+
name: `OpenAI: ${params.model}`,
|
|
98
|
+
traceId,
|
|
99
|
+
organizationId: organizationId || evalClient.getOrganizationId(),
|
|
100
|
+
status: "error",
|
|
101
|
+
durationMs,
|
|
102
|
+
metadata: errorMetadata,
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
catch {
|
|
106
|
+
/* trace failure is non-fatal */
|
|
107
|
+
}
|
|
101
108
|
throw error;
|
|
102
109
|
}
|
|
103
110
|
};
|
|
@@ -123,38 +130,54 @@ function traceOpenAI(openai, evalClient, options = {}) {
|
|
|
123
130
|
async function traceOpenAICall(evalClient, name, fn, options = {}) {
|
|
124
131
|
const startTime = Date.now();
|
|
125
132
|
const traceId = `openai-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
|
|
133
|
+
// Trace creation is non-fatal — never lose the fn() result due to tracing issues
|
|
126
134
|
try {
|
|
127
|
-
await evalClient.traces
|
|
135
|
+
await evalClient.traces?.create({
|
|
128
136
|
name,
|
|
129
137
|
traceId,
|
|
130
138
|
organizationId: options.organizationId || evalClient.getOrganizationId(),
|
|
131
139
|
status: "pending",
|
|
132
140
|
metadata: (0, context_1.mergeWithContext)({}),
|
|
133
141
|
});
|
|
142
|
+
}
|
|
143
|
+
catch {
|
|
144
|
+
/* trace failure is non-fatal */
|
|
145
|
+
}
|
|
146
|
+
try {
|
|
134
147
|
const result = await fn();
|
|
135
148
|
const durationMs = Date.now() - startTime;
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
149
|
+
try {
|
|
150
|
+
await evalClient.traces?.create({
|
|
151
|
+
name,
|
|
152
|
+
traceId,
|
|
153
|
+
organizationId: options.organizationId || evalClient.getOrganizationId(),
|
|
154
|
+
status: "success",
|
|
155
|
+
durationMs,
|
|
156
|
+
metadata: (0, context_1.mergeWithContext)({}),
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
catch {
|
|
160
|
+
/* trace failure is non-fatal */
|
|
161
|
+
}
|
|
144
162
|
return result;
|
|
145
163
|
}
|
|
146
164
|
catch (error) {
|
|
147
165
|
const durationMs = Date.now() - startTime;
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
166
|
+
try {
|
|
167
|
+
await evalClient.traces?.create({
|
|
168
|
+
name,
|
|
169
|
+
traceId,
|
|
170
|
+
organizationId: options.organizationId || evalClient.getOrganizationId(),
|
|
171
|
+
status: "error",
|
|
172
|
+
durationMs,
|
|
173
|
+
metadata: (0, context_1.mergeWithContext)({
|
|
174
|
+
error: error instanceof Error ? error.message : String(error),
|
|
175
|
+
}),
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
catch {
|
|
179
|
+
/* trace failure is non-fatal */
|
|
180
|
+
}
|
|
158
181
|
throw error;
|
|
159
182
|
}
|
|
160
183
|
}
|
package/dist/logger.d.ts
CHANGED
package/dist/logger.js
CHANGED
|
@@ -93,9 +93,10 @@ class Logger {
|
|
|
93
93
|
* Create child logger with prefix
|
|
94
94
|
*/
|
|
95
95
|
child(prefix) {
|
|
96
|
+
const resolvedPrefix = typeof prefix === "string" ? prefix : prefix.prefix;
|
|
96
97
|
return new Logger({
|
|
97
98
|
...this.options,
|
|
98
|
-
prefix: `${this.options.prefix}:${
|
|
99
|
+
prefix: `${this.options.prefix}:${resolvedPrefix}`,
|
|
99
100
|
});
|
|
100
101
|
}
|
|
101
102
|
/**
|
package/dist/otel.d.ts
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenTelemetry Export for WorkflowTracer
|
|
3
|
+
*
|
|
4
|
+
* Converts WorkflowTracer spans, decisions, and costs into
|
|
5
|
+
* OpenTelemetry-compatible span data for export to any OTEL collector.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* import { OTelExporter } from "@evalgate/sdk/otel";
|
|
9
|
+
*
|
|
10
|
+
* const exporter = new OTelExporter({ endpoint: "http://localhost:4318" });
|
|
11
|
+
* const tracer = new WorkflowTracer(client, { debug: true });
|
|
12
|
+
* // ... run workflow ...
|
|
13
|
+
* await exporter.exportFromTracer(tracer);
|
|
14
|
+
*/
|
|
15
|
+
import type { WorkflowTracer } from "./workflows";
|
|
16
|
+
/**
|
|
17
|
+
* OTEL-compatible span representation
|
|
18
|
+
* Follows the OpenTelemetry Trace specification
|
|
19
|
+
*/
|
|
20
|
+
export interface OTelSpan {
|
|
21
|
+
traceId: string;
|
|
22
|
+
spanId: string;
|
|
23
|
+
parentSpanId?: string;
|
|
24
|
+
name: string;
|
|
25
|
+
/** OTLP SpanKind: 0=UNSPECIFIED, 1=INTERNAL, 2=SERVER, 3=CLIENT, 4=PRODUCER, 5=CONSUMER */
|
|
26
|
+
kind: 0 | 1 | 2 | 3 | 4 | 5;
|
|
27
|
+
startTimeUnixNano: string;
|
|
28
|
+
endTimeUnixNano: string;
|
|
29
|
+
attributes: OTelAttribute[];
|
|
30
|
+
/** OTLP StatusCode: 0=STATUS_CODE_UNSET, 1=STATUS_CODE_OK, 2=STATUS_CODE_ERROR */
|
|
31
|
+
status: {
|
|
32
|
+
code: 0 | 1 | 2;
|
|
33
|
+
message?: string;
|
|
34
|
+
};
|
|
35
|
+
events: OTelEvent[];
|
|
36
|
+
}
|
|
37
|
+
export interface OTelAttribute {
|
|
38
|
+
key: string;
|
|
39
|
+
value: {
|
|
40
|
+
stringValue?: string;
|
|
41
|
+
intValue?: string;
|
|
42
|
+
doubleValue?: number;
|
|
43
|
+
boolValue?: boolean;
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
export interface OTelEvent {
|
|
47
|
+
name: string;
|
|
48
|
+
timeUnixNano: string;
|
|
49
|
+
attributes: OTelAttribute[];
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* OTEL export payload (OTLP JSON format)
|
|
53
|
+
*/
|
|
54
|
+
export interface OTelExportPayload {
|
|
55
|
+
resourceSpans: Array<{
|
|
56
|
+
resource: {
|
|
57
|
+
attributes: OTelAttribute[];
|
|
58
|
+
};
|
|
59
|
+
scopeSpans: Array<{
|
|
60
|
+
scope: {
|
|
61
|
+
name: string;
|
|
62
|
+
version: string;
|
|
63
|
+
};
|
|
64
|
+
spans: OTelSpan[];
|
|
65
|
+
}>;
|
|
66
|
+
}>;
|
|
67
|
+
}
|
|
68
|
+
export interface OTelExporterOptions {
|
|
69
|
+
/** OTEL collector endpoint (default: http://localhost:4318/v1/traces) */
|
|
70
|
+
endpoint?: string;
|
|
71
|
+
/** Service name for resource attributes */
|
|
72
|
+
serviceName?: string;
|
|
73
|
+
/** Additional resource attributes */
|
|
74
|
+
resourceAttributes?: Record<string, string>;
|
|
75
|
+
/** SDK version */
|
|
76
|
+
sdkVersion?: string;
|
|
77
|
+
/** Headers for the export request */
|
|
78
|
+
headers?: Record<string, string>;
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* OpenTelemetry Exporter for EvalGate WorkflowTracer
|
|
82
|
+
*/
|
|
83
|
+
export declare class OTelExporter {
|
|
84
|
+
private options;
|
|
85
|
+
constructor(options?: OTelExporterOptions);
|
|
86
|
+
/**
|
|
87
|
+
* Export workflow data from a WorkflowTracer instance
|
|
88
|
+
*/
|
|
89
|
+
exportFromTracer(tracer: WorkflowTracer): OTelExportPayload;
|
|
90
|
+
/**
|
|
91
|
+
* Export a run result as OTEL spans
|
|
92
|
+
*/
|
|
93
|
+
exportRunResult(runResult: {
|
|
94
|
+
runId: string;
|
|
95
|
+
metadata: {
|
|
96
|
+
startedAt: number;
|
|
97
|
+
completedAt: number;
|
|
98
|
+
duration: number;
|
|
99
|
+
mode: string;
|
|
100
|
+
};
|
|
101
|
+
results: Array<{
|
|
102
|
+
specId: string;
|
|
103
|
+
name: string;
|
|
104
|
+
filePath: string;
|
|
105
|
+
result: {
|
|
106
|
+
status: string;
|
|
107
|
+
score?: number;
|
|
108
|
+
duration: number;
|
|
109
|
+
error?: string;
|
|
110
|
+
};
|
|
111
|
+
}>;
|
|
112
|
+
summary: {
|
|
113
|
+
passed: number;
|
|
114
|
+
failed: number;
|
|
115
|
+
passRate: number;
|
|
116
|
+
};
|
|
117
|
+
}): OTelExportPayload;
|
|
118
|
+
/**
|
|
119
|
+
* Send payload to OTEL collector via HTTP
|
|
120
|
+
*/
|
|
121
|
+
send(payload: OTelExportPayload): Promise<boolean>;
|
|
122
|
+
private decisionToSpan;
|
|
123
|
+
private handoffToSpan;
|
|
124
|
+
private costToSpan;
|
|
125
|
+
private buildPayload;
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Convenience factory
|
|
129
|
+
*/
|
|
130
|
+
export declare function createOTelExporter(options?: OTelExporterOptions): OTelExporter;
|