@evalgate/sdk 2.2.2 → 2.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/CHANGELOG.md +32 -0
  2. package/README.md +40 -1
  3. package/dist/assertions.d.ts +194 -10
  4. package/dist/assertions.js +525 -73
  5. package/dist/batch.js +4 -4
  6. package/dist/cache.d.ts +5 -1
  7. package/dist/cache.js +5 -1
  8. package/dist/cli/baseline.d.ts +14 -0
  9. package/dist/cli/baseline.js +43 -3
  10. package/dist/cli/check.d.ts +5 -2
  11. package/dist/cli/check.js +20 -12
  12. package/dist/cli/compare.d.ts +80 -0
  13. package/dist/cli/compare.js +266 -0
  14. package/dist/cli/index.js +244 -101
  15. package/dist/cli/regression-gate.js +23 -0
  16. package/dist/cli/run.js +22 -0
  17. package/dist/cli/start.d.ts +26 -0
  18. package/dist/cli/start.js +130 -0
  19. package/dist/cli/templates.d.ts +24 -0
  20. package/dist/cli/templates.js +314 -0
  21. package/dist/cli/traces.d.ts +109 -0
  22. package/dist/cli/traces.js +152 -0
  23. package/dist/cli/upgrade.js +5 -0
  24. package/dist/cli/validate.d.ts +37 -0
  25. package/dist/cli/validate.js +252 -0
  26. package/dist/cli/watch.d.ts +19 -0
  27. package/dist/cli/watch.js +175 -0
  28. package/dist/client.js +6 -13
  29. package/dist/constants.d.ts +2 -0
  30. package/dist/constants.js +5 -0
  31. package/dist/errors.js +7 -0
  32. package/dist/export.js +2 -2
  33. package/dist/index.d.ts +10 -9
  34. package/dist/index.js +24 -7
  35. package/dist/integrations/anthropic.js +6 -6
  36. package/dist/integrations/openai.js +84 -61
  37. package/dist/logger.d.ts +3 -1
  38. package/dist/logger.js +2 -1
  39. package/dist/otel.d.ts +130 -0
  40. package/dist/otel.js +309 -0
  41. package/dist/pagination.d.ts +13 -2
  42. package/dist/pagination.js +28 -2
  43. package/dist/runtime/adapters/testsuite-to-dsl.js +1 -6
  44. package/dist/runtime/eval.d.ts +14 -4
  45. package/dist/runtime/eval.js +127 -2
  46. package/dist/runtime/executor.d.ts +3 -2
  47. package/dist/runtime/executor.js +3 -2
  48. package/dist/runtime/registry.d.ts +8 -3
  49. package/dist/runtime/registry.js +15 -4
  50. package/dist/runtime/run-report.d.ts +1 -1
  51. package/dist/runtime/run-report.js +7 -4
  52. package/dist/runtime/types.d.ts +38 -0
  53. package/dist/snapshot.d.ts +12 -0
  54. package/dist/snapshot.js +24 -1
  55. package/dist/testing.d.ts +8 -0
  56. package/dist/testing.js +45 -10
  57. package/dist/version.d.ts +2 -2
  58. package/dist/version.js +2 -2
  59. package/dist/workflows.d.ts +2 -0
  60. package/dist/workflows.js +184 -102
  61. package/package.json +8 -1
package/dist/index.js CHANGED
@@ -8,8 +8,9 @@
8
8
  * @packageDocumentation
9
9
  */
10
10
  Object.defineProperty(exports, "__esModule", { value: true });
11
- exports.defaultLocalExecutor = exports.createLocalExecutor = exports.evalai = exports.defineSuite = exports.defineEval = exports.createResult = exports.createEvalContext = exports.validateContext = exports.mergeContexts = exports.cloneContext = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.withinRange = exports.similarTo = exports.respondedWithinTime = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntaxAsync = exports.hasValidCodeSyntax = exports.hasSentimentAsync = exports.hasSentiment = exports.hasReadabilityScore = exports.hasPII = exports.hasNoToxicityAsync = exports.hasNoToxicity = exports.hasNoHallucinationsAsync = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracyAsync = exports.hasFactualAccuracy = exports.getAssertionConfig = exports.followsInstructions = exports.expect = exports.containsLanguageAsync = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.configureAssertions = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalGateError = exports.AIEvalClient = void 0;
12
- exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginate = exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.RequestCache = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.compareSnapshots = exports.saveSnapshot = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = exports.createTestSuite = exports.SpecRegistrationError = exports.SpecExecutionError = exports.RuntimeError = exports.EvalRuntimeError = exports.setActiveRuntime = exports.getActiveRuntime = exports.disposeActiveRuntime = exports.createEvalRuntime = void 0;
11
+ exports.validateContext = exports.mergeContexts = exports.cloneContext = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.SDKError = exports.withinRange = exports.toSemanticallyContain = exports.similarTo = exports.respondedWithinTimeSince = exports.respondedWithinTime = exports.respondedWithinDuration = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntaxAsync = exports.hasValidCodeSyntax = exports.hasSentimentWithScore = exports.hasSentimentAsync = exports.hasSentiment = exports.hasReadabilityScore = exports.hasPII = exports.hasNoToxicityAsync = exports.hasNoToxicity = exports.hasNoHallucinationsAsync = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracyAsync = exports.hasFactualAccuracy = exports.hasConsistencyAsync = exports.hasConsistency = exports.getAssertionConfig = exports.followsInstructions = exports.expect = exports.containsLanguageAsync = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.configureAssertions = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalGateError = exports.AIEvalClient = void 0;
12
+ exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginateGenerator = exports.autoPaginate = exports.OTelExporter = exports.createOTelExporter = exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.saveSnapshot = exports.compareSnapshots = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = exports.createTestSuite = exports.SpecRegistrationError = exports.SpecExecutionError = exports.RuntimeError = exports.EvalRuntimeError = exports.setActiveRuntime = exports.getActiveRuntime = exports.disposeActiveRuntime = exports.createEvalRuntime = exports.defaultLocalExecutor = exports.createLocalExecutor = exports.getFilteredSpecs = exports.evalai = exports.defineSuite = exports.defineEval = exports.createResult = exports.createEvalContext = void 0;
13
+ exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = void 0;
13
14
  // Main SDK exports
14
15
  var client_1 = require("./client");
15
16
  Object.defineProperty(exports, "AIEvalClient", { enumerable: true, get: function () { return client_1.AIEvalClient; } });
@@ -19,7 +20,7 @@ Object.defineProperty(exports, "AuthenticationError", { enumerable: true, get: f
19
20
  Object.defineProperty(exports, "EvalGateError", { enumerable: true, get: function () { return errors_1.EvalGateError; } });
20
21
  Object.defineProperty(exports, "NetworkError", { enumerable: true, get: function () { return errors_1.NetworkError; } });
21
22
  Object.defineProperty(exports, "RateLimitError", { enumerable: true, get: function () { return errors_1.RateLimitError; } });
22
- Object.defineProperty(exports, "ValidationError", { enumerable: true, get: function () { return errors_1.SDKError; } });
23
+ Object.defineProperty(exports, "ValidationError", { enumerable: true, get: function () { return errors_1.ValidationError; } });
23
24
  // Enhanced assertions (Tier 1.3)
24
25
  var assertions_1 = require("./assertions");
25
26
  // LLM config
@@ -33,6 +34,8 @@ Object.defineProperty(exports, "containsLanguageAsync", { enumerable: true, get:
33
34
  Object.defineProperty(exports, "expect", { enumerable: true, get: function () { return assertions_1.expect; } });
34
35
  Object.defineProperty(exports, "followsInstructions", { enumerable: true, get: function () { return assertions_1.followsInstructions; } });
35
36
  Object.defineProperty(exports, "getAssertionConfig", { enumerable: true, get: function () { return assertions_1.getAssertionConfig; } });
37
+ Object.defineProperty(exports, "hasConsistency", { enumerable: true, get: function () { return assertions_1.hasConsistency; } });
38
+ Object.defineProperty(exports, "hasConsistencyAsync", { enumerable: true, get: function () { return assertions_1.hasConsistencyAsync; } });
36
39
  Object.defineProperty(exports, "hasFactualAccuracy", { enumerable: true, get: function () { return assertions_1.hasFactualAccuracy; } });
37
40
  Object.defineProperty(exports, "hasFactualAccuracyAsync", { enumerable: true, get: function () { return assertions_1.hasFactualAccuracyAsync; } });
38
41
  Object.defineProperty(exports, "hasLength", { enumerable: true, get: function () { return assertions_1.hasLength; } });
@@ -44,6 +47,7 @@ Object.defineProperty(exports, "hasPII", { enumerable: true, get: function () {
44
47
  Object.defineProperty(exports, "hasReadabilityScore", { enumerable: true, get: function () { return assertions_1.hasReadabilityScore; } });
45
48
  Object.defineProperty(exports, "hasSentiment", { enumerable: true, get: function () { return assertions_1.hasSentiment; } });
46
49
  Object.defineProperty(exports, "hasSentimentAsync", { enumerable: true, get: function () { return assertions_1.hasSentimentAsync; } });
50
+ Object.defineProperty(exports, "hasSentimentWithScore", { enumerable: true, get: function () { return assertions_1.hasSentimentWithScore; } });
47
51
  Object.defineProperty(exports, "hasValidCodeSyntax", { enumerable: true, get: function () { return assertions_1.hasValidCodeSyntax; } });
48
52
  Object.defineProperty(exports, "hasValidCodeSyntaxAsync", { enumerable: true, get: function () { return assertions_1.hasValidCodeSyntaxAsync; } });
49
53
  Object.defineProperty(exports, "isValidEmail", { enumerable: true, get: function () { return assertions_1.isValidEmail; } });
@@ -51,9 +55,15 @@ Object.defineProperty(exports, "isValidURL", { enumerable: true, get: function (
51
55
  Object.defineProperty(exports, "matchesPattern", { enumerable: true, get: function () { return assertions_1.matchesPattern; } });
52
56
  Object.defineProperty(exports, "matchesSchema", { enumerable: true, get: function () { return assertions_1.matchesSchema; } });
53
57
  Object.defineProperty(exports, "notContainsPII", { enumerable: true, get: function () { return assertions_1.notContainsPII; } });
58
+ Object.defineProperty(exports, "respondedWithinDuration", { enumerable: true, get: function () { return assertions_1.respondedWithinDuration; } });
54
59
  Object.defineProperty(exports, "respondedWithinTime", { enumerable: true, get: function () { return assertions_1.respondedWithinTime; } });
60
+ Object.defineProperty(exports, "respondedWithinTimeSince", { enumerable: true, get: function () { return assertions_1.respondedWithinTimeSince; } });
55
61
  Object.defineProperty(exports, "similarTo", { enumerable: true, get: function () { return assertions_1.similarTo; } });
62
+ Object.defineProperty(exports, "toSemanticallyContain", { enumerable: true, get: function () { return assertions_1.toSemanticallyContain; } });
56
63
  Object.defineProperty(exports, "withinRange", { enumerable: true, get: function () { return assertions_1.withinRange; } });
64
+ // Legacy backward compat — SDKError is the old name for EvalGateError
65
+ var errors_2 = require("./errors");
66
+ Object.defineProperty(exports, "SDKError", { enumerable: true, get: function () { return errors_2.EvalGateError; } });
57
67
  // Context propagation (Tier 2.9)
58
68
  const context_1 = require("./context");
59
69
  Object.defineProperty(exports, "createContext", { enumerable: true, get: function () { return context_1.createContext; } });
@@ -71,6 +81,7 @@ Object.defineProperty(exports, "createResult", { enumerable: true, get: function
71
81
  Object.defineProperty(exports, "defineEval", { enumerable: true, get: function () { return eval_1.defineEval; } });
72
82
  Object.defineProperty(exports, "defineSuite", { enumerable: true, get: function () { return eval_1.defineSuite; } });
73
83
  Object.defineProperty(exports, "evalai", { enumerable: true, get: function () { return eval_1.evalai; } });
84
+ Object.defineProperty(exports, "getFilteredSpecs", { enumerable: true, get: function () { return eval_1.getFilteredSpecs; } });
74
85
  var executor_1 = require("./runtime/executor");
75
86
  Object.defineProperty(exports, "createLocalExecutor", { enumerable: true, get: function () { return executor_1.createLocalExecutor; } });
76
87
  Object.defineProperty(exports, "defaultLocalExecutor", { enumerable: true, get: function () { return executor_1.defaultLocalExecutor; } });
@@ -91,8 +102,8 @@ Object.defineProperty(exports, "createTestSuite", { enumerable: true, get: funct
91
102
  Object.defineProperty(exports, "TestSuite", { enumerable: true, get: function () { return testing_1.TestSuite; } });
92
103
  // Snapshot testing (Tier 2.8)
93
104
  const snapshot_1 = require("./snapshot");
105
+ Object.defineProperty(exports, "compareSnapshots", { enumerable: true, get: function () { return snapshot_1.compareSnapshots; } });
94
106
  Object.defineProperty(exports, "compareWithSnapshot", { enumerable: true, get: function () { return snapshot_1.compareWithSnapshot; } });
95
- Object.defineProperty(exports, "compareSnapshots", { enumerable: true, get: function () { return snapshot_1.compareWithSnapshot; } });
96
107
  Object.defineProperty(exports, "snapshot", { enumerable: true, get: function () { return snapshot_1.snapshot; } });
97
108
  Object.defineProperty(exports, "saveSnapshot", { enumerable: true, get: function () { return snapshot_1.snapshot; } });
98
109
  // Export/Import utilities (Tier 4.18)
@@ -104,11 +115,12 @@ Object.defineProperty(exports, "importData", { enumerable: true, get: function (
104
115
  var batch_1 = require("./batch");
105
116
  Object.defineProperty(exports, "RequestBatcher", { enumerable: true, get: function () { return batch_1.RequestBatcher; } });
106
117
  // Performance optimization utilities (v1.3.0)
107
- // Note: RequestCache and CacheTTL are for advanced users only
108
- // Most users don't need these - caching is automatic
118
+ // Note: CacheTTL is for advanced users only
119
+ // Most users don't need this - caching is automatic
120
+ // RequestCache is intentionally NOT exported — it's an internal HTTP cache.
121
+ // Use CacheTTL to configure cache durations via client options.
109
122
  var cache_1 = require("./cache");
110
123
  Object.defineProperty(exports, "CacheTTL", { enumerable: true, get: function () { return cache_1.CacheTTL; } });
111
- Object.defineProperty(exports, "RequestCache", { enumerable: true, get: function () { return cache_1.RequestCache; } });
112
124
  // CLI (programmatic use)
113
125
  var check_1 = require("./cli/check");
114
126
  Object.defineProperty(exports, "EXIT", { enumerable: true, get: function () { return check_1.EXIT; } });
@@ -128,8 +140,13 @@ Object.defineProperty(exports, "Logger", { enumerable: true, get: function () {
128
140
  // Vitest matcher: expect(await openAIChatEval(...)).toPassGate()
129
141
  var matchers_1 = require("./matchers");
130
142
  Object.defineProperty(exports, "extendExpectWithToPassGate", { enumerable: true, get: function () { return matchers_1.extendExpectWithToPassGate; } });
143
+ // OpenTelemetry export
144
+ var otel_1 = require("./otel");
145
+ Object.defineProperty(exports, "createOTelExporter", { enumerable: true, get: function () { return otel_1.createOTelExporter; } });
146
+ Object.defineProperty(exports, "OTelExporter", { enumerable: true, get: function () { return otel_1.OTelExporter; } });
131
147
  var pagination_1 = require("./pagination");
132
148
  Object.defineProperty(exports, "autoPaginate", { enumerable: true, get: function () { return pagination_1.autoPaginate; } });
149
+ Object.defineProperty(exports, "autoPaginateGenerator", { enumerable: true, get: function () { return pagination_1.autoPaginateGenerator; } });
133
150
  Object.defineProperty(exports, "createPaginatedIterator", { enumerable: true, get: function () { return pagination_1.createPaginatedIterator; } });
134
151
  Object.defineProperty(exports, "decodeCursor", { enumerable: true, get: function () { return pagination_1.decodeCursor; } });
135
152
  Object.defineProperty(exports, "encodeCursor", { enumerable: true, get: function () { return pagination_1.encodeCursor; } });
@@ -67,7 +67,7 @@ function traceAnthropic(anthropic, evalClient, options = {}) {
67
67
  }
68
68
  : {}),
69
69
  });
70
- await evalClient.traces.create({
70
+ await evalClient.traces?.create({
71
71
  name: `Anthropic: ${params.model}`,
72
72
  traceId,
73
73
  organizationId: organizationId || evalClient.getOrganizationId(),
@@ -89,7 +89,7 @@ function traceAnthropic(anthropic, evalClient, options = {}) {
89
89
  error: error instanceof Error ? error.message : String(error),
90
90
  });
91
91
  await evalClient.traces
92
- .create({
92
+ ?.create({
93
93
  name: `Anthropic: ${params.model}`,
94
94
  traceId,
95
95
  organizationId: organizationId || evalClient.getOrganizationId(),
@@ -97,7 +97,7 @@ function traceAnthropic(anthropic, evalClient, options = {}) {
97
97
  durationMs,
98
98
  metadata: errorMetadata,
99
99
  })
100
- .catch(() => {
100
+ ?.catch(() => {
101
101
  // Ignore errors in trace creation to avoid masking the original error
102
102
  });
103
103
  throw error;
@@ -127,7 +127,7 @@ async function traceAnthropicCall(evalClient, name, fn, options = {}) {
127
127
  const startTime = Date.now();
128
128
  const traceId = `anthropic-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
129
129
  try {
130
- await evalClient.traces.create({
130
+ await evalClient.traces?.create({
131
131
  name,
132
132
  traceId,
133
133
  organizationId: options.organizationId || evalClient.getOrganizationId(),
@@ -136,7 +136,7 @@ async function traceAnthropicCall(evalClient, name, fn, options = {}) {
136
136
  });
137
137
  const result = await fn();
138
138
  const durationMs = Date.now() - startTime;
139
- await evalClient.traces.create({
139
+ await evalClient.traces?.create({
140
140
  name,
141
141
  traceId,
142
142
  organizationId: options.organizationId || evalClient.getOrganizationId(),
@@ -148,7 +148,7 @@ async function traceAnthropicCall(evalClient, name, fn, options = {}) {
148
148
  }
149
149
  catch (error) {
150
150
  const durationMs = Date.now() - startTime;
151
- await evalClient.traces.create({
151
+ await evalClient.traces?.create({
152
152
  name,
153
153
  traceId,
154
154
  organizationId: options.organizationId || evalClient.getOrganizationId(),
@@ -52,52 +52,59 @@ function traceOpenAI(openai, evalClient, options = {}) {
52
52
  const response = await originalCreate(params, requestOptions);
53
53
  const durationMs = Date.now() - startTime;
54
54
  // Create trace with success status and complete metadata
55
- const traceMetadata = (0, context_1.mergeWithContext)({
56
- model: params.model,
57
- temperature: params.temperature,
58
- max_tokens: params.max_tokens,
59
- ...(captureInput ? { input: params.messages } : {}),
60
- ...(captureOutput ? { output: response.choices[0]?.message } : {}),
61
- ...(captureMetadata
62
- ? {
63
- usage: response.usage,
64
- finish_reason: response.choices[0]?.finish_reason,
65
- }
66
- : {}),
67
- });
68
- await evalClient.traces.create({
69
- name: `OpenAI: ${params.model}`,
70
- traceId,
71
- organizationId: organizationId || evalClient.getOrganizationId(),
72
- status: "success",
73
- durationMs,
74
- metadata: traceMetadata,
75
- });
55
+ // Trace creation is non-fatal — never lose the OpenAI result due to tracing issues
56
+ try {
57
+ const traceMetadata = (0, context_1.mergeWithContext)({
58
+ model: params.model,
59
+ temperature: params.temperature,
60
+ max_tokens: params.max_tokens,
61
+ ...(captureInput ? { input: params.messages } : {}),
62
+ ...(captureOutput ? { output: response.choices[0]?.message } : {}),
63
+ ...(captureMetadata
64
+ ? {
65
+ usage: response.usage,
66
+ finish_reason: response.choices[0]?.finish_reason,
67
+ }
68
+ : {}),
69
+ });
70
+ await evalClient.traces?.create({
71
+ name: `OpenAI: ${params.model}`,
72
+ traceId,
73
+ organizationId: organizationId || evalClient.getOrganizationId(),
74
+ status: "success",
75
+ durationMs,
76
+ metadata: traceMetadata,
77
+ });
78
+ }
79
+ catch {
80
+ /* trace failure is non-fatal */
81
+ }
76
82
  return response;
77
83
  }
78
84
  catch (error) {
79
85
  const durationMs = Date.now() - startTime;
80
- // Create trace with error status
81
- const errorMetadata = (0, context_1.mergeWithContext)({
82
- model: params.model,
83
- temperature: params.temperature,
84
- max_tokens: params.max_tokens,
85
- ...(captureInput ? { input: params.messages } : {}),
86
- ...(captureMetadata ? { params } : {}),
87
- error: error instanceof Error ? error.message : String(error),
88
- });
89
- await evalClient.traces
90
- .create({
91
- name: `OpenAI: ${params.model}`,
92
- traceId,
93
- organizationId: organizationId || evalClient.getOrganizationId(),
94
- status: "error",
95
- durationMs,
96
- metadata: errorMetadata,
97
- })
98
- .catch(() => {
99
- // Ignore errors in trace creation to avoid masking the original error
100
- });
86
+ // Create trace with error status — non-fatal
87
+ try {
88
+ const errorMetadata = (0, context_1.mergeWithContext)({
89
+ model: params.model,
90
+ temperature: params.temperature,
91
+ max_tokens: params.max_tokens,
92
+ ...(captureInput ? { input: params.messages } : {}),
93
+ ...(captureMetadata ? { params } : {}),
94
+ error: error instanceof Error ? error.message : String(error),
95
+ });
96
+ await evalClient.traces?.create({
97
+ name: `OpenAI: ${params.model}`,
98
+ traceId,
99
+ organizationId: organizationId || evalClient.getOrganizationId(),
100
+ status: "error",
101
+ durationMs,
102
+ metadata: errorMetadata,
103
+ });
104
+ }
105
+ catch {
106
+ /* trace failure is non-fatal */
107
+ }
101
108
  throw error;
102
109
  }
103
110
  };
@@ -123,38 +130,54 @@ function traceOpenAI(openai, evalClient, options = {}) {
123
130
  async function traceOpenAICall(evalClient, name, fn, options = {}) {
124
131
  const startTime = Date.now();
125
132
  const traceId = `openai-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
133
+ // Trace creation is non-fatal — never lose the fn() result due to tracing issues
126
134
  try {
127
- await evalClient.traces.create({
135
+ await evalClient.traces?.create({
128
136
  name,
129
137
  traceId,
130
138
  organizationId: options.organizationId || evalClient.getOrganizationId(),
131
139
  status: "pending",
132
140
  metadata: (0, context_1.mergeWithContext)({}),
133
141
  });
142
+ }
143
+ catch {
144
+ /* trace failure is non-fatal */
145
+ }
146
+ try {
134
147
  const result = await fn();
135
148
  const durationMs = Date.now() - startTime;
136
- await evalClient.traces.create({
137
- name,
138
- traceId,
139
- organizationId: options.organizationId || evalClient.getOrganizationId(),
140
- status: "success",
141
- durationMs,
142
- metadata: (0, context_1.mergeWithContext)({}),
143
- });
149
+ try {
150
+ await evalClient.traces?.create({
151
+ name,
152
+ traceId,
153
+ organizationId: options.organizationId || evalClient.getOrganizationId(),
154
+ status: "success",
155
+ durationMs,
156
+ metadata: (0, context_1.mergeWithContext)({}),
157
+ });
158
+ }
159
+ catch {
160
+ /* trace failure is non-fatal */
161
+ }
144
162
  return result;
145
163
  }
146
164
  catch (error) {
147
165
  const durationMs = Date.now() - startTime;
148
- await evalClient.traces.create({
149
- name,
150
- traceId,
151
- organizationId: options.organizationId || evalClient.getOrganizationId(),
152
- status: "error",
153
- durationMs,
154
- metadata: (0, context_1.mergeWithContext)({
155
- error: error instanceof Error ? error.message : String(error),
156
- }),
157
- });
166
+ try {
167
+ await evalClient.traces?.create({
168
+ name,
169
+ traceId,
170
+ organizationId: options.organizationId || evalClient.getOrganizationId(),
171
+ status: "error",
172
+ durationMs,
173
+ metadata: (0, context_1.mergeWithContext)({
174
+ error: error instanceof Error ? error.message : String(error),
175
+ }),
176
+ });
177
+ }
178
+ catch {
179
+ /* trace failure is non-fatal */
180
+ }
158
181
  throw error;
159
182
  }
160
183
  }
package/dist/logger.d.ts CHANGED
@@ -69,7 +69,9 @@ export declare class Logger {
69
69
  /**
70
70
  * Create child logger with prefix
71
71
  */
72
- child(prefix: string): Logger;
72
+ child(prefix: string | {
73
+ prefix: string;
74
+ }): Logger;
73
75
  /**
74
76
  * Set log level
75
77
  */
package/dist/logger.js CHANGED
@@ -93,9 +93,10 @@ class Logger {
93
93
  * Create child logger with prefix
94
94
  */
95
95
  child(prefix) {
96
+ const resolvedPrefix = typeof prefix === "string" ? prefix : prefix.prefix;
96
97
  return new Logger({
97
98
  ...this.options,
98
- prefix: `${this.options.prefix}:${prefix}`,
99
+ prefix: `${this.options.prefix}:${resolvedPrefix}`,
99
100
  });
100
101
  }
101
102
  /**
package/dist/otel.d.ts ADDED
@@ -0,0 +1,130 @@
1
+ /**
2
+ * OpenTelemetry Export for WorkflowTracer
3
+ *
4
+ * Converts WorkflowTracer spans, decisions, and costs into
5
+ * OpenTelemetry-compatible span data for export to any OTEL collector.
6
+ *
7
+ * Usage:
8
+ * import { OTelExporter } from "@evalgate/sdk/otel";
9
+ *
10
+ * const exporter = new OTelExporter({ endpoint: "http://localhost:4318" });
11
+ * const tracer = new WorkflowTracer(client, { debug: true });
12
+ * // ... run workflow ...
13
+ * await exporter.exportFromTracer(tracer);
14
+ */
15
+ import type { WorkflowTracer } from "./workflows";
16
+ /**
17
+ * OTEL-compatible span representation
18
+ * Follows the OpenTelemetry Trace specification
19
+ */
20
+ export interface OTelSpan {
21
+ traceId: string;
22
+ spanId: string;
23
+ parentSpanId?: string;
24
+ name: string;
25
+ /** OTLP SpanKind: 0=UNSPECIFIED, 1=INTERNAL, 2=SERVER, 3=CLIENT, 4=PRODUCER, 5=CONSUMER */
26
+ kind: 0 | 1 | 2 | 3 | 4 | 5;
27
+ startTimeUnixNano: string;
28
+ endTimeUnixNano: string;
29
+ attributes: OTelAttribute[];
30
+ /** OTLP StatusCode: 0=STATUS_CODE_UNSET, 1=STATUS_CODE_OK, 2=STATUS_CODE_ERROR */
31
+ status: {
32
+ code: 0 | 1 | 2;
33
+ message?: string;
34
+ };
35
+ events: OTelEvent[];
36
+ }
37
+ export interface OTelAttribute {
38
+ key: string;
39
+ value: {
40
+ stringValue?: string;
41
+ intValue?: string;
42
+ doubleValue?: number;
43
+ boolValue?: boolean;
44
+ };
45
+ }
46
+ export interface OTelEvent {
47
+ name: string;
48
+ timeUnixNano: string;
49
+ attributes: OTelAttribute[];
50
+ }
51
+ /**
52
+ * OTEL export payload (OTLP JSON format)
53
+ */
54
+ export interface OTelExportPayload {
55
+ resourceSpans: Array<{
56
+ resource: {
57
+ attributes: OTelAttribute[];
58
+ };
59
+ scopeSpans: Array<{
60
+ scope: {
61
+ name: string;
62
+ version: string;
63
+ };
64
+ spans: OTelSpan[];
65
+ }>;
66
+ }>;
67
+ }
68
+ export interface OTelExporterOptions {
69
+ /** OTEL collector endpoint (default: http://localhost:4318/v1/traces) */
70
+ endpoint?: string;
71
+ /** Service name for resource attributes */
72
+ serviceName?: string;
73
+ /** Additional resource attributes */
74
+ resourceAttributes?: Record<string, string>;
75
+ /** SDK version */
76
+ sdkVersion?: string;
77
+ /** Headers for the export request */
78
+ headers?: Record<string, string>;
79
+ }
80
+ /**
81
+ * OpenTelemetry Exporter for EvalGate WorkflowTracer
82
+ */
83
+ export declare class OTelExporter {
84
+ private options;
85
+ constructor(options?: OTelExporterOptions);
86
+ /**
87
+ * Export workflow data from a WorkflowTracer instance
88
+ */
89
+ exportFromTracer(tracer: WorkflowTracer): OTelExportPayload;
90
+ /**
91
+ * Export a run result as OTEL spans
92
+ */
93
+ exportRunResult(runResult: {
94
+ runId: string;
95
+ metadata: {
96
+ startedAt: number;
97
+ completedAt: number;
98
+ duration: number;
99
+ mode: string;
100
+ };
101
+ results: Array<{
102
+ specId: string;
103
+ name: string;
104
+ filePath: string;
105
+ result: {
106
+ status: string;
107
+ score?: number;
108
+ duration: number;
109
+ error?: string;
110
+ };
111
+ }>;
112
+ summary: {
113
+ passed: number;
114
+ failed: number;
115
+ passRate: number;
116
+ };
117
+ }): OTelExportPayload;
118
+ /**
119
+ * Send payload to OTEL collector via HTTP
120
+ */
121
+ send(payload: OTelExportPayload): Promise<boolean>;
122
+ private decisionToSpan;
123
+ private handoffToSpan;
124
+ private costToSpan;
125
+ private buildPayload;
126
+ }
127
+ /**
128
+ * Convenience factory
129
+ */
130
+ export declare function createOTelExporter(options?: OTelExporterOptions): OTelExporter;