@evalgate/sdk 2.2.3 → 2.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/CHANGELOG.md +5 -0
  2. package/README.md +38 -1
  3. package/dist/assertions.d.ts +185 -5
  4. package/dist/assertions.js +496 -61
  5. package/dist/batch.js +4 -4
  6. package/dist/cache.d.ts +4 -0
  7. package/dist/cache.js +4 -0
  8. package/dist/cli/baseline.d.ts +14 -0
  9. package/dist/cli/baseline.js +43 -3
  10. package/dist/cli/check.d.ts +5 -2
  11. package/dist/cli/check.js +20 -12
  12. package/dist/cli/compare.d.ts +80 -0
  13. package/dist/cli/compare.js +266 -0
  14. package/dist/cli/index.js +244 -101
  15. package/dist/cli/regression-gate.js +23 -0
  16. package/dist/cli/run.js +22 -0
  17. package/dist/cli/start.d.ts +26 -0
  18. package/dist/cli/start.js +130 -0
  19. package/dist/cli/templates.d.ts +24 -0
  20. package/dist/cli/templates.js +314 -0
  21. package/dist/cli/traces.d.ts +109 -0
  22. package/dist/cli/traces.js +152 -0
  23. package/dist/cli/validate.d.ts +37 -0
  24. package/dist/cli/validate.js +252 -0
  25. package/dist/cli/watch.d.ts +19 -0
  26. package/dist/cli/watch.js +175 -0
  27. package/dist/client.js +6 -13
  28. package/dist/constants.d.ts +2 -0
  29. package/dist/constants.js +5 -0
  30. package/dist/index.d.ts +7 -6
  31. package/dist/index.js +22 -6
  32. package/dist/integrations/openai.js +83 -60
  33. package/dist/logger.d.ts +3 -1
  34. package/dist/logger.js +2 -1
  35. package/dist/otel.d.ts +130 -0
  36. package/dist/otel.js +309 -0
  37. package/dist/runtime/eval.d.ts +14 -4
  38. package/dist/runtime/eval.js +127 -2
  39. package/dist/runtime/registry.d.ts +4 -2
  40. package/dist/runtime/registry.js +11 -3
  41. package/dist/runtime/run-report.d.ts +1 -1
  42. package/dist/runtime/run-report.js +7 -4
  43. package/dist/runtime/types.d.ts +38 -0
  44. package/dist/testing.d.ts +8 -0
  45. package/dist/testing.js +45 -10
  46. package/dist/version.d.ts +1 -1
  47. package/dist/version.js +1 -1
  48. package/dist/workflows.d.ts +2 -0
  49. package/dist/workflows.js +184 -102
  50. package/package.json +8 -1
@@ -52,52 +52,59 @@ function traceOpenAI(openai, evalClient, options = {}) {
52
52
  const response = await originalCreate(params, requestOptions);
53
53
  const durationMs = Date.now() - startTime;
54
54
  // Create trace with success status and complete metadata
55
- const traceMetadata = (0, context_1.mergeWithContext)({
56
- model: params.model,
57
- temperature: params.temperature,
58
- max_tokens: params.max_tokens,
59
- ...(captureInput ? { input: params.messages } : {}),
60
- ...(captureOutput ? { output: response.choices[0]?.message } : {}),
61
- ...(captureMetadata
62
- ? {
63
- usage: response.usage,
64
- finish_reason: response.choices[0]?.finish_reason,
65
- }
66
- : {}),
67
- });
68
- await evalClient.traces?.create({
69
- name: `OpenAI: ${params.model}`,
70
- traceId,
71
- organizationId: organizationId || evalClient.getOrganizationId(),
72
- status: "success",
73
- durationMs,
74
- metadata: traceMetadata,
75
- });
55
+ // Trace creation is non-fatal — never lose the OpenAI result due to tracing issues
56
+ try {
57
+ const traceMetadata = (0, context_1.mergeWithContext)({
58
+ model: params.model,
59
+ temperature: params.temperature,
60
+ max_tokens: params.max_tokens,
61
+ ...(captureInput ? { input: params.messages } : {}),
62
+ ...(captureOutput ? { output: response.choices[0]?.message } : {}),
63
+ ...(captureMetadata
64
+ ? {
65
+ usage: response.usage,
66
+ finish_reason: response.choices[0]?.finish_reason,
67
+ }
68
+ : {}),
69
+ });
70
+ await evalClient.traces?.create({
71
+ name: `OpenAI: ${params.model}`,
72
+ traceId,
73
+ organizationId: organizationId || evalClient.getOrganizationId(),
74
+ status: "success",
75
+ durationMs,
76
+ metadata: traceMetadata,
77
+ });
78
+ }
79
+ catch {
80
+ /* trace failure is non-fatal */
81
+ }
76
82
  return response;
77
83
  }
78
84
  catch (error) {
79
85
  const durationMs = Date.now() - startTime;
80
- // Create trace with error status
81
- const errorMetadata = (0, context_1.mergeWithContext)({
82
- model: params.model,
83
- temperature: params.temperature,
84
- max_tokens: params.max_tokens,
85
- ...(captureInput ? { input: params.messages } : {}),
86
- ...(captureMetadata ? { params } : {}),
87
- error: error instanceof Error ? error.message : String(error),
88
- });
89
- await evalClient.traces
90
- ?.create({
91
- name: `OpenAI: ${params.model}`,
92
- traceId,
93
- organizationId: organizationId || evalClient.getOrganizationId(),
94
- status: "error",
95
- durationMs,
96
- metadata: errorMetadata,
97
- })
98
- ?.catch(() => {
99
- // Ignore errors in trace creation to avoid masking the original error
100
- });
86
+ // Create trace with error status — non-fatal
87
+ try {
88
+ const errorMetadata = (0, context_1.mergeWithContext)({
89
+ model: params.model,
90
+ temperature: params.temperature,
91
+ max_tokens: params.max_tokens,
92
+ ...(captureInput ? { input: params.messages } : {}),
93
+ ...(captureMetadata ? { params } : {}),
94
+ error: error instanceof Error ? error.message : String(error),
95
+ });
96
+ await evalClient.traces?.create({
97
+ name: `OpenAI: ${params.model}`,
98
+ traceId,
99
+ organizationId: organizationId || evalClient.getOrganizationId(),
100
+ status: "error",
101
+ durationMs,
102
+ metadata: errorMetadata,
103
+ });
104
+ }
105
+ catch {
106
+ /* trace failure is non-fatal */
107
+ }
101
108
  throw error;
102
109
  }
103
110
  };
@@ -123,6 +130,7 @@ function traceOpenAI(openai, evalClient, options = {}) {
123
130
  async function traceOpenAICall(evalClient, name, fn, options = {}) {
124
131
  const startTime = Date.now();
125
132
  const traceId = `openai-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
133
+ // Trace creation is non-fatal — never lose the fn() result due to tracing issues
126
134
  try {
127
135
  await evalClient.traces?.create({
128
136
  name,
@@ -131,30 +139,45 @@ async function traceOpenAICall(evalClient, name, fn, options = {}) {
131
139
  status: "pending",
132
140
  metadata: (0, context_1.mergeWithContext)({}),
133
141
  });
142
+ }
143
+ catch {
144
+ /* trace failure is non-fatal */
145
+ }
146
+ try {
134
147
  const result = await fn();
135
148
  const durationMs = Date.now() - startTime;
136
- await evalClient.traces?.create({
137
- name,
138
- traceId,
139
- organizationId: options.organizationId || evalClient.getOrganizationId(),
140
- status: "success",
141
- durationMs,
142
- metadata: (0, context_1.mergeWithContext)({}),
143
- });
149
+ try {
150
+ await evalClient.traces?.create({
151
+ name,
152
+ traceId,
153
+ organizationId: options.organizationId || evalClient.getOrganizationId(),
154
+ status: "success",
155
+ durationMs,
156
+ metadata: (0, context_1.mergeWithContext)({}),
157
+ });
158
+ }
159
+ catch {
160
+ /* trace failure is non-fatal */
161
+ }
144
162
  return result;
145
163
  }
146
164
  catch (error) {
147
165
  const durationMs = Date.now() - startTime;
148
- await evalClient.traces?.create({
149
- name,
150
- traceId,
151
- organizationId: options.organizationId || evalClient.getOrganizationId(),
152
- status: "error",
153
- durationMs,
154
- metadata: (0, context_1.mergeWithContext)({
155
- error: error instanceof Error ? error.message : String(error),
156
- }),
157
- });
166
+ try {
167
+ await evalClient.traces?.create({
168
+ name,
169
+ traceId,
170
+ organizationId: options.organizationId || evalClient.getOrganizationId(),
171
+ status: "error",
172
+ durationMs,
173
+ metadata: (0, context_1.mergeWithContext)({
174
+ error: error instanceof Error ? error.message : String(error),
175
+ }),
176
+ });
177
+ }
178
+ catch {
179
+ /* trace failure is non-fatal */
180
+ }
158
181
  throw error;
159
182
  }
160
183
  }
package/dist/logger.d.ts CHANGED
@@ -69,7 +69,9 @@ export declare class Logger {
69
69
  /**
70
70
  * Create child logger with prefix
71
71
  */
72
- child(prefix: string): Logger;
72
+ child(prefix: string | {
73
+ prefix: string;
74
+ }): Logger;
73
75
  /**
74
76
  * Set log level
75
77
  */
package/dist/logger.js CHANGED
@@ -93,9 +93,10 @@ class Logger {
93
93
  * Create child logger with prefix
94
94
  */
95
95
  child(prefix) {
96
+ const resolvedPrefix = typeof prefix === "string" ? prefix : prefix.prefix;
96
97
  return new Logger({
97
98
  ...this.options,
98
- prefix: `${this.options.prefix}:${prefix}`,
99
+ prefix: `${this.options.prefix}:${resolvedPrefix}`,
99
100
  });
100
101
  }
101
102
  /**
package/dist/otel.d.ts ADDED
@@ -0,0 +1,130 @@
1
+ /**
2
+ * OpenTelemetry Export for WorkflowTracer
3
+ *
4
+ * Converts WorkflowTracer spans, decisions, and costs into
5
+ * OpenTelemetry-compatible span data for export to any OTEL collector.
6
+ *
7
+ * Usage:
8
+ * import { OTelExporter } from "@evalgate/sdk/otel";
9
+ *
10
+ * const exporter = new OTelExporter({ endpoint: "http://localhost:4318" });
11
+ * const tracer = new WorkflowTracer(client, { debug: true });
12
+ * // ... run workflow ...
13
+ * await exporter.exportFromTracer(tracer);
14
+ */
15
+ import type { WorkflowTracer } from "./workflows";
16
+ /**
17
+ * OTEL-compatible span representation
18
+ * Follows the OpenTelemetry Trace specification
19
+ */
20
+ export interface OTelSpan {
21
+ traceId: string;
22
+ spanId: string;
23
+ parentSpanId?: string;
24
+ name: string;
25
+ /** OTLP SpanKind: 0=UNSPECIFIED, 1=INTERNAL, 2=SERVER, 3=CLIENT, 4=PRODUCER, 5=CONSUMER */
26
+ kind: 0 | 1 | 2 | 3 | 4 | 5;
27
+ startTimeUnixNano: string;
28
+ endTimeUnixNano: string;
29
+ attributes: OTelAttribute[];
30
+ /** OTLP StatusCode: 0=STATUS_CODE_UNSET, 1=STATUS_CODE_OK, 2=STATUS_CODE_ERROR */
31
+ status: {
32
+ code: 0 | 1 | 2;
33
+ message?: string;
34
+ };
35
+ events: OTelEvent[];
36
+ }
37
+ export interface OTelAttribute {
38
+ key: string;
39
+ value: {
40
+ stringValue?: string;
41
+ intValue?: string;
42
+ doubleValue?: number;
43
+ boolValue?: boolean;
44
+ };
45
+ }
46
+ export interface OTelEvent {
47
+ name: string;
48
+ timeUnixNano: string;
49
+ attributes: OTelAttribute[];
50
+ }
51
+ /**
52
+ * OTEL export payload (OTLP JSON format)
53
+ */
54
+ export interface OTelExportPayload {
55
+ resourceSpans: Array<{
56
+ resource: {
57
+ attributes: OTelAttribute[];
58
+ };
59
+ scopeSpans: Array<{
60
+ scope: {
61
+ name: string;
62
+ version: string;
63
+ };
64
+ spans: OTelSpan[];
65
+ }>;
66
+ }>;
67
+ }
68
+ export interface OTelExporterOptions {
69
+ /** OTEL collector endpoint (default: http://localhost:4318/v1/traces) */
70
+ endpoint?: string;
71
+ /** Service name for resource attributes */
72
+ serviceName?: string;
73
+ /** Additional resource attributes */
74
+ resourceAttributes?: Record<string, string>;
75
+ /** SDK version */
76
+ sdkVersion?: string;
77
+ /** Headers for the export request */
78
+ headers?: Record<string, string>;
79
+ }
80
+ /**
81
+ * OpenTelemetry Exporter for EvalGate WorkflowTracer
82
+ */
83
+ export declare class OTelExporter {
84
+ private options;
85
+ constructor(options?: OTelExporterOptions);
86
+ /**
87
+ * Export workflow data from a WorkflowTracer instance
88
+ */
89
+ exportFromTracer(tracer: WorkflowTracer): OTelExportPayload;
90
+ /**
91
+ * Export a run result as OTEL spans
92
+ */
93
+ exportRunResult(runResult: {
94
+ runId: string;
95
+ metadata: {
96
+ startedAt: number;
97
+ completedAt: number;
98
+ duration: number;
99
+ mode: string;
100
+ };
101
+ results: Array<{
102
+ specId: string;
103
+ name: string;
104
+ filePath: string;
105
+ result: {
106
+ status: string;
107
+ score?: number;
108
+ duration: number;
109
+ error?: string;
110
+ };
111
+ }>;
112
+ summary: {
113
+ passed: number;
114
+ failed: number;
115
+ passRate: number;
116
+ };
117
+ }): OTelExportPayload;
118
+ /**
119
+ * Send payload to OTEL collector via HTTP
120
+ */
121
+ send(payload: OTelExportPayload): Promise<boolean>;
122
+ private decisionToSpan;
123
+ private handoffToSpan;
124
+ private costToSpan;
125
+ private buildPayload;
126
+ }
127
+ /**
128
+ * Convenience factory
129
+ */
130
+ export declare function createOTelExporter(options?: OTelExporterOptions): OTelExporter;
package/dist/otel.js ADDED
@@ -0,0 +1,309 @@
1
+ "use strict";
2
+ /**
3
+ * OpenTelemetry Export for WorkflowTracer
4
+ *
5
+ * Converts WorkflowTracer spans, decisions, and costs into
6
+ * OpenTelemetry-compatible span data for export to any OTEL collector.
7
+ *
8
+ * Usage:
9
+ * import { OTelExporter } from "@evalgate/sdk/otel";
10
+ *
11
+ * const exporter = new OTelExporter({ endpoint: "http://localhost:4318" });
12
+ * const tracer = new WorkflowTracer(client, { debug: true });
13
+ * // ... run workflow ...
14
+ * await exporter.exportFromTracer(tracer);
15
+ */
16
+ Object.defineProperty(exports, "__esModule", { value: true });
17
+ exports.OTelExporter = void 0;
18
+ exports.createOTelExporter = createOTelExporter;
19
+ /**
20
+ * Generate a random 16-byte hex trace ID
21
+ */
22
+ function generateTraceId() {
23
+ const bytes = new Uint8Array(16);
24
+ for (let i = 0; i < 16; i++) {
25
+ bytes[i] = Math.floor(Math.random() * 256);
26
+ }
27
+ return Array.from(bytes)
28
+ .map((b) => b.toString(16).padStart(2, "0"))
29
+ .join("");
30
+ }
31
+ /**
32
+ * Generate a random 8-byte hex span ID
33
+ */
34
+ function generateSpanId() {
35
+ const bytes = new Uint8Array(8);
36
+ for (let i = 0; i < 8; i++) {
37
+ bytes[i] = Math.floor(Math.random() * 256);
38
+ }
39
+ return Array.from(bytes)
40
+ .map((b) => b.toString(16).padStart(2, "0"))
41
+ .join("");
42
+ }
43
+ /**
44
+ * Convert milliseconds to nanosecond string
45
+ */
46
+ function msToNano(ms) {
47
+ return `${BigInt(ms) * BigInt(1000000)}`;
48
+ }
49
+ /**
50
+ * Create an OTEL attribute
51
+ */
52
+ function attr(key, value) {
53
+ if (typeof value === "string") {
54
+ return { key, value: { stringValue: value } };
55
+ }
56
+ if (typeof value === "number") {
57
+ if (Number.isInteger(value)) {
58
+ return { key, value: { intValue: String(value) } };
59
+ }
60
+ return { key, value: { doubleValue: value } };
61
+ }
62
+ return { key, value: { boolValue: value } };
63
+ }
64
+ /**
65
+ * OpenTelemetry Exporter for EvalGate WorkflowTracer
66
+ */
67
+ class OTelExporter {
68
+ constructor(options = {}) {
69
+ this.options = {
70
+ endpoint: options.endpoint ?? "http://localhost:4318/v1/traces",
71
+ serviceName: options.serviceName ?? "evalgate",
72
+ resourceAttributes: options.resourceAttributes ?? {},
73
+ sdkVersion: options.sdkVersion ?? "2.2.4",
74
+ headers: options.headers ?? {},
75
+ };
76
+ }
77
+ /**
78
+ * Export workflow data from a WorkflowTracer instance
79
+ */
80
+ exportFromTracer(tracer) {
81
+ const workflow = tracer.getCurrentWorkflow();
82
+ const handoffs = tracer.getHandoffs();
83
+ const decisions = tracer.getDecisions();
84
+ const costs = tracer.getCosts();
85
+ const traceId = generateTraceId();
86
+ const rootSpanId = generateSpanId();
87
+ const now = Date.now();
88
+ const spans = [];
89
+ // Root workflow span
90
+ if (workflow) {
91
+ spans.push({
92
+ traceId,
93
+ spanId: rootSpanId,
94
+ name: `workflow.${workflow.name}`,
95
+ kind: 1,
96
+ startTimeUnixNano: msToNano(new Date(workflow.startedAt).getTime()),
97
+ endTimeUnixNano: msToNano(now),
98
+ attributes: [
99
+ attr("evalgate.workflow.name", workflow.name),
100
+ attr("evalgate.workflow.id", workflow.id),
101
+ attr("evalgate.workflow.trace_id", workflow.traceId),
102
+ ],
103
+ status: { code: 1 },
104
+ events: [],
105
+ });
106
+ }
107
+ // Decision spans
108
+ for (let i = 0; i < decisions.length; i++) {
109
+ const decision = decisions[i];
110
+ const spanId = generateSpanId();
111
+ spans.push(this.decisionToSpan(traceId, spanId, rootSpanId, decision, now - decisions.length + i));
112
+ }
113
+ // Handoff events
114
+ for (let i = 0; i < handoffs.length; i++) {
115
+ const handoff = handoffs[i];
116
+ const spanId = generateSpanId();
117
+ spans.push(this.handoffToSpan(traceId, spanId, rootSpanId, handoff));
118
+ }
119
+ // Cost spans
120
+ for (let i = 0; i < costs.length; i++) {
121
+ const cost = costs[i];
122
+ const spanId = generateSpanId();
123
+ spans.push(this.costToSpan(traceId, spanId, rootSpanId, cost, now - costs.length + i));
124
+ }
125
+ return this.buildPayload(spans);
126
+ }
127
+ /**
128
+ * Export a run result as OTEL spans
129
+ */
130
+ exportRunResult(runResult) {
131
+ const traceId = generateTraceId();
132
+ const rootSpanId = generateSpanId();
133
+ const spans = [];
134
+ // Root run span
135
+ spans.push({
136
+ traceId,
137
+ spanId: rootSpanId,
138
+ name: `evalgate.run.${runResult.runId}`,
139
+ kind: 1,
140
+ startTimeUnixNano: msToNano(runResult.metadata.startedAt),
141
+ endTimeUnixNano: msToNano(runResult.metadata.completedAt),
142
+ attributes: [
143
+ attr("evalgate.run.id", runResult.runId),
144
+ attr("evalgate.run.mode", runResult.metadata.mode),
145
+ attr("evalgate.run.duration_ms", runResult.metadata.duration),
146
+ attr("evalgate.run.pass_rate", runResult.summary.passRate),
147
+ attr("evalgate.run.passed", runResult.summary.passed),
148
+ attr("evalgate.run.failed", runResult.summary.failed),
149
+ ],
150
+ status: {
151
+ code: runResult.summary.failed > 0 ? 2 : 1,
152
+ },
153
+ events: [],
154
+ });
155
+ // Per-spec child spans
156
+ let offset = 0;
157
+ for (const spec of runResult.results) {
158
+ const spanId = generateSpanId();
159
+ const specStart = runResult.metadata.startedAt + offset;
160
+ const specEnd = specStart + spec.result.duration;
161
+ offset += spec.result.duration;
162
+ const attributes = [
163
+ attr("evalgate.spec.id", spec.specId),
164
+ attr("evalgate.spec.name", spec.name),
165
+ attr("evalgate.spec.file", spec.filePath),
166
+ attr("evalgate.spec.status", spec.result.status),
167
+ attr("evalgate.spec.duration_ms", spec.result.duration),
168
+ ];
169
+ if (spec.result.score !== undefined) {
170
+ attributes.push(attr("evalgate.spec.score", spec.result.score));
171
+ }
172
+ spans.push({
173
+ traceId,
174
+ spanId,
175
+ parentSpanId: rootSpanId,
176
+ name: `evalgate.spec.${spec.name}`,
177
+ kind: 1,
178
+ startTimeUnixNano: msToNano(specStart),
179
+ endTimeUnixNano: msToNano(specEnd),
180
+ attributes,
181
+ status: {
182
+ code: spec.result.status === "passed" ? 1 : 2,
183
+ message: spec.result.error,
184
+ },
185
+ events: [],
186
+ });
187
+ }
188
+ return this.buildPayload(spans);
189
+ }
190
+ /**
191
+ * Send payload to OTEL collector via HTTP
192
+ */
193
+ async send(payload) {
194
+ try {
195
+ const response = await fetch(this.options.endpoint, {
196
+ method: "POST",
197
+ headers: {
198
+ "Content-Type": "application/json",
199
+ ...this.options.headers,
200
+ },
201
+ body: JSON.stringify(payload),
202
+ });
203
+ return response.ok;
204
+ }
205
+ catch (err) {
206
+ console.warn(`[OTelExporter] Failed to send: ${err instanceof Error ? err.message : String(err)}`);
207
+ return false;
208
+ }
209
+ }
210
+ decisionToSpan(traceId, spanId, parentSpanId, decision, timestampMs) {
211
+ return {
212
+ traceId,
213
+ spanId,
214
+ parentSpanId,
215
+ name: `decision.${decision.agent}.${decision.chosen}`,
216
+ kind: 1,
217
+ startTimeUnixNano: msToNano(timestampMs),
218
+ endTimeUnixNano: msToNano(timestampMs + 1),
219
+ attributes: [
220
+ attr("evalgate.decision.agent", decision.agent),
221
+ attr("evalgate.decision.type", decision.type),
222
+ attr("evalgate.decision.chosen", decision.chosen),
223
+ attr("evalgate.decision.alternatives", decision.alternatives.length),
224
+ ...(decision.confidence !== undefined
225
+ ? [attr("evalgate.decision.confidence", decision.confidence)]
226
+ : []),
227
+ ...(decision.reasoning
228
+ ? [attr("evalgate.decision.reasoning", decision.reasoning)]
229
+ : []),
230
+ ],
231
+ status: { code: 1 },
232
+ events: [],
233
+ };
234
+ }
235
+ handoffToSpan(traceId, spanId, parentSpanId, handoff) {
236
+ const ts = new Date(handoff.timestamp).getTime();
237
+ return {
238
+ traceId,
239
+ spanId,
240
+ parentSpanId,
241
+ name: `handoff.${handoff.fromAgent ?? "start"}.${handoff.toAgent}`,
242
+ kind: 1,
243
+ startTimeUnixNano: msToNano(ts),
244
+ endTimeUnixNano: msToNano(ts + 1),
245
+ attributes: [
246
+ attr("evalgate.handoff.from", handoff.fromAgent ?? "start"),
247
+ attr("evalgate.handoff.to", handoff.toAgent),
248
+ attr("evalgate.handoff.type", handoff.handoffType),
249
+ ],
250
+ status: { code: 1 },
251
+ events: [],
252
+ };
253
+ }
254
+ costToSpan(traceId, spanId, parentSpanId, cost, timestampMs) {
255
+ return {
256
+ traceId,
257
+ spanId,
258
+ parentSpanId,
259
+ name: `cost.${cost.provider}.${cost.model}`,
260
+ kind: 1,
261
+ startTimeUnixNano: msToNano(timestampMs),
262
+ endTimeUnixNano: msToNano(timestampMs + 1),
263
+ attributes: [
264
+ attr("evalgate.cost.provider", cost.provider),
265
+ attr("evalgate.cost.model", cost.model),
266
+ attr("evalgate.cost.input_tokens", cost.inputTokens),
267
+ attr("evalgate.cost.output_tokens", cost.outputTokens),
268
+ attr("evalgate.cost.total_tokens", cost.totalTokens),
269
+ attr("evalgate.cost.total_usd", cost.totalCost),
270
+ ],
271
+ status: { code: 1 },
272
+ events: [],
273
+ };
274
+ }
275
+ buildPayload(spans) {
276
+ const resourceAttrs = [
277
+ attr("service.name", this.options.serviceName),
278
+ attr("telemetry.sdk.name", "evalgate"),
279
+ attr("telemetry.sdk.version", this.options.sdkVersion),
280
+ attr("telemetry.sdk.language", "nodejs"),
281
+ ];
282
+ for (const [key, value] of Object.entries(this.options.resourceAttributes)) {
283
+ resourceAttrs.push(attr(key, value));
284
+ }
285
+ return {
286
+ resourceSpans: [
287
+ {
288
+ resource: { attributes: resourceAttrs },
289
+ scopeSpans: [
290
+ {
291
+ scope: {
292
+ name: "evalgate",
293
+ version: this.options.sdkVersion,
294
+ },
295
+ spans,
296
+ },
297
+ ],
298
+ },
299
+ ],
300
+ };
301
+ }
302
+ }
303
+ exports.OTelExporter = OTelExporter;
304
+ /**
305
+ * Convenience factory
306
+ */
307
+ function createOTelExporter(options) {
308
+ return new OTelExporter(options);
309
+ }
@@ -4,12 +4,19 @@
4
4
  * The core DSL function for defining behavioral specifications.
5
5
  * Uses content-addressable identity with AST position for stability.
6
6
  */
7
- import type { DefineEvalFunction, EvalContext, EvalResult } from "./types";
7
+ import { createEvalRuntime, disposeActiveRuntime, getActiveRuntime, setActiveRuntime, withRuntime } from "./registry";
8
+ import type { DefineEvalFunction, EvalContext, EvalResult, EvalSpec } from "./types";
8
9
  /**
9
10
  * Export the defineEval function with proper typing
10
11
  * This is the main DSL entry point
11
12
  */
12
13
  export declare const defineEval: DefineEvalFunction;
14
+ /**
15
+ * Filter a list of specs according to skip/only semantics:
16
+ * - If any spec has mode === "only", return only those specs
17
+ * - Otherwise, return all specs except those with mode === "skip"
18
+ */
19
+ export declare function getFilteredSpecs(specs: EvalSpec[]): EvalSpec[];
13
20
  /**
14
21
  * Convenience export for evalai.test() alias (backward compatibility)
15
22
  * Provides alternative naming that matches the original roadmap vision
@@ -48,8 +55,11 @@ export declare function createResult(config: {
48
55
  assertions?: EvalResult["assertions"];
49
56
  metadata?: Record<string, unknown>;
50
57
  error?: string;
58
+ output?: string;
59
+ durationMs?: number;
60
+ tokens?: number;
51
61
  }): EvalResult;
52
- /**
53
- * Default export for convenience
54
- */
62
+ export { createEvalRuntime, disposeActiveRuntime, getActiveRuntime, setActiveRuntime, withRuntime, };
63
+ export { createContext as createEvalContext };
64
+ export { createLocalExecutor } from "./executor";
55
65
  export default defineEval;