@evalgate/sdk 2.2.2 → 2.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/CHANGELOG.md +32 -0
  2. package/README.md +40 -1
  3. package/dist/assertions.d.ts +194 -10
  4. package/dist/assertions.js +525 -73
  5. package/dist/batch.js +4 -4
  6. package/dist/cache.d.ts +5 -1
  7. package/dist/cache.js +5 -1
  8. package/dist/cli/baseline.d.ts +14 -0
  9. package/dist/cli/baseline.js +43 -3
  10. package/dist/cli/check.d.ts +5 -2
  11. package/dist/cli/check.js +20 -12
  12. package/dist/cli/compare.d.ts +80 -0
  13. package/dist/cli/compare.js +266 -0
  14. package/dist/cli/index.js +244 -101
  15. package/dist/cli/regression-gate.js +23 -0
  16. package/dist/cli/run.js +22 -0
  17. package/dist/cli/start.d.ts +26 -0
  18. package/dist/cli/start.js +130 -0
  19. package/dist/cli/templates.d.ts +24 -0
  20. package/dist/cli/templates.js +314 -0
  21. package/dist/cli/traces.d.ts +109 -0
  22. package/dist/cli/traces.js +152 -0
  23. package/dist/cli/upgrade.js +5 -0
  24. package/dist/cli/validate.d.ts +37 -0
  25. package/dist/cli/validate.js +252 -0
  26. package/dist/cli/watch.d.ts +19 -0
  27. package/dist/cli/watch.js +175 -0
  28. package/dist/client.js +6 -13
  29. package/dist/constants.d.ts +2 -0
  30. package/dist/constants.js +5 -0
  31. package/dist/errors.js +7 -0
  32. package/dist/export.js +2 -2
  33. package/dist/index.d.ts +10 -9
  34. package/dist/index.js +24 -7
  35. package/dist/integrations/anthropic.js +6 -6
  36. package/dist/integrations/openai.js +84 -61
  37. package/dist/logger.d.ts +3 -1
  38. package/dist/logger.js +2 -1
  39. package/dist/otel.d.ts +130 -0
  40. package/dist/otel.js +309 -0
  41. package/dist/pagination.d.ts +13 -2
  42. package/dist/pagination.js +28 -2
  43. package/dist/runtime/adapters/testsuite-to-dsl.js +1 -6
  44. package/dist/runtime/eval.d.ts +14 -4
  45. package/dist/runtime/eval.js +127 -2
  46. package/dist/runtime/executor.d.ts +3 -2
  47. package/dist/runtime/executor.js +3 -2
  48. package/dist/runtime/registry.d.ts +8 -3
  49. package/dist/runtime/registry.js +15 -4
  50. package/dist/runtime/run-report.d.ts +1 -1
  51. package/dist/runtime/run-report.js +7 -4
  52. package/dist/runtime/types.d.ts +38 -0
  53. package/dist/snapshot.d.ts +12 -0
  54. package/dist/snapshot.js +24 -1
  55. package/dist/testing.d.ts +8 -0
  56. package/dist/testing.js +45 -10
  57. package/dist/version.d.ts +2 -2
  58. package/dist/version.js +2 -2
  59. package/dist/workflows.d.ts +2 -0
  60. package/dist/workflows.js +184 -102
  61. package/package.json +8 -1
package/dist/workflows.js CHANGED
@@ -27,6 +27,39 @@
27
27
  * await tracer.endWorkflow({ resolution: 'Issue resolved' });
28
28
  * ```
29
29
  */
30
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
31
+ if (k2 === undefined) k2 = k;
32
+ var desc = Object.getOwnPropertyDescriptor(m, k);
33
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
34
+ desc = { enumerable: true, get: function() { return m[k]; } };
35
+ }
36
+ Object.defineProperty(o, k2, desc);
37
+ }) : (function(o, m, k, k2) {
38
+ if (k2 === undefined) k2 = k;
39
+ o[k2] = m[k];
40
+ }));
41
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
42
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
43
+ }) : function(o, v) {
44
+ o["default"] = v;
45
+ });
46
+ var __importStar = (this && this.__importStar) || (function () {
47
+ var ownKeys = function(o) {
48
+ ownKeys = Object.getOwnPropertyNames || function (o) {
49
+ var ar = [];
50
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
51
+ return ar;
52
+ };
53
+ return ownKeys(o);
54
+ };
55
+ return function (mod) {
56
+ if (mod && mod.__esModule) return mod;
57
+ var result = {};
58
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
59
+ __setModuleDefault(result, mod);
60
+ return result;
61
+ };
62
+ })();
30
63
  Object.defineProperty(exports, "__esModule", { value: true });
31
64
  exports.WorkflowTracer = void 0;
32
65
  exports.traceLangChainAgent = traceLangChainAgent;
@@ -34,6 +67,8 @@ exports.traceCrewAI = traceCrewAI;
34
67
  exports.traceAutoGen = traceAutoGen;
35
68
  exports.createWorkflowTracer = createWorkflowTracer;
36
69
  exports.traceWorkflowStep = traceWorkflowStep;
70
+ const fs = __importStar(require("node:fs"));
71
+ const nodePath = __importStar(require("node:path"));
37
72
  const context_1 = require("./context");
38
73
  // ============================================================================
39
74
  // MAIN CLASS - WorkflowTracer
@@ -75,6 +110,7 @@ class WorkflowTracer {
75
110
  tracePrefix: options.tracePrefix || "workflow",
76
111
  captureFullPayloads: options.captureFullPayloads ?? true,
77
112
  debug: options.debug ?? false,
113
+ offline: options.offline ?? false,
78
114
  };
79
115
  }
80
116
  // ==========================================================================
@@ -101,34 +137,39 @@ class WorkflowTracer {
101
137
  }
102
138
  const traceId = `${this.options.tracePrefix}-${Date.now()}-${this.generateId()}`;
103
139
  const startedAt = new Date().toISOString();
104
- // Create the trace
105
- const trace = await this.client.traces.create({
106
- name: `Workflow: ${name}`,
107
- traceId,
108
- organizationId: this.options.organizationId,
109
- status: "pending",
110
- metadata: (0, context_1.mergeWithContext)({
111
- workflowName: name,
112
- definition,
113
- ...metadata,
114
- }),
115
- });
116
- this.currentWorkflow = {
117
- id: 0, // Will be set after API call returns
118
- traceId: trace.id,
140
+ // Create the trace (skip in offline mode)
141
+ let traceResultId = 0;
142
+ if (!this.options.offline) {
143
+ const trace = await this.client.traces.create({
144
+ name: `Workflow: ${name}`,
145
+ traceId,
146
+ organizationId: this.options.organizationId,
147
+ status: "pending",
148
+ metadata: (0, context_1.mergeWithContext)({
149
+ workflowName: name,
150
+ definition,
151
+ ...metadata,
152
+ }),
153
+ });
154
+ traceResultId = trace.id;
155
+ }
156
+ const workflow = {
157
+ id: 0,
158
+ traceId: traceResultId,
119
159
  name,
120
160
  startedAt,
121
161
  definition,
122
162
  metadata,
123
163
  };
164
+ this.currentWorkflow = workflow;
124
165
  // Reset state
125
166
  this.handoffs = [];
126
167
  this.decisions = [];
127
168
  this.costs = [];
128
169
  this.activeSpans.clear();
129
170
  this.spanCounter = 0;
130
- this.log("Started workflow", { name, traceId: trace.id });
131
- return this.currentWorkflow;
171
+ this.log("Started workflow", { name, traceId: traceResultId });
172
+ return workflow;
132
173
  }
133
174
  /**
134
175
  * End the current workflow
@@ -140,24 +181,57 @@ class WorkflowTracer {
140
181
  const durationMs = Date.now() - new Date(this.currentWorkflow.startedAt).getTime();
141
182
  // Calculate total cost
142
183
  const totalCost = this.costs.reduce((sum, cost) => sum + parseFloat(cost.totalCost), 0);
143
- // Update the original trace with completion data
144
- await this.client.traces.update(this.currentWorkflow.traceId, {
145
- status: status === "completed" ? "success" : "error",
146
- durationMs,
147
- metadata: (0, context_1.mergeWithContext)({
148
- workflowName: this.currentWorkflow.name,
149
- output,
150
- status,
151
- totalCost: totalCost.toFixed(6),
152
- handoffCount: this.handoffs.length,
153
- decisionCount: this.decisions.length,
154
- agentCount: new Set(this.handoffs.map((h) => h.toAgent)).size + 1,
155
- retryCount: this.costs.filter((c) => c.isRetry).length,
156
- handoffs: this.handoffs,
157
- decisions: this.decisions,
158
- costs: this.costs,
159
- }),
160
- });
184
+ // Update the original trace with completion data (skip in offline mode)
185
+ if (!this.options.offline) {
186
+ await this.client.traces.update(this.currentWorkflow.traceId, {
187
+ status: status === "completed" ? "success" : "error",
188
+ durationMs,
189
+ metadata: (0, context_1.mergeWithContext)({
190
+ workflowName: this.currentWorkflow.name,
191
+ output,
192
+ status,
193
+ totalCost: totalCost.toFixed(6),
194
+ handoffCount: this.handoffs.length,
195
+ decisionCount: this.decisions.length,
196
+ agentCount: new Set(this.handoffs.map((h) => h.toAgent)).size + 1,
197
+ retryCount: this.costs.filter((c) => c.isRetry).length,
198
+ handoffs: this.handoffs,
199
+ decisions: this.decisions,
200
+ costs: this.costs,
201
+ }),
202
+ });
203
+ }
204
+ // In offline mode, persist workflow data to local filesystem
205
+ if (this.options.offline) {
206
+ try {
207
+ const dataDir = nodePath.resolve(".evalgate-data", "workflows");
208
+ fs.mkdirSync(dataDir, { recursive: true });
209
+ const fileName = `${this.currentWorkflow.name.replace(/[^a-zA-Z0-9_-]/g, "_")}-${Date.now()}.json`;
210
+ const workflowData = {
211
+ name: this.currentWorkflow.name,
212
+ startedAt: this.currentWorkflow.startedAt,
213
+ endedAt: new Date().toISOString(),
214
+ status,
215
+ durationMs,
216
+ totalCost: totalCost.toFixed(6),
217
+ handoffs: this.handoffs,
218
+ decisions: this.decisions,
219
+ costs: this.costs,
220
+ output,
221
+ metadata: this.currentWorkflow.metadata,
222
+ definition: this.currentWorkflow.definition,
223
+ };
224
+ fs.writeFileSync(nodePath.join(dataDir, fileName), JSON.stringify(workflowData, null, 2));
225
+ this.log("Saved workflow to local filesystem", {
226
+ path: nodePath.join(dataDir, fileName),
227
+ });
228
+ }
229
+ catch (err) {
230
+ this.log("Failed to save workflow to local filesystem", {
231
+ error: err instanceof Error ? err.message : String(err),
232
+ });
233
+ }
234
+ }
161
235
  this.log("Ended workflow", {
162
236
  name: this.currentWorkflow.name,
163
237
  status,
@@ -193,18 +267,20 @@ class WorkflowTracer {
193
267
  metadata: input,
194
268
  };
195
269
  this.activeSpans.set(spanId, spanContext);
196
- // Create span via API
197
- await this.client.traces.createSpan(this.currentWorkflow.traceId, {
198
- name: `Agent: ${agentName}`,
199
- spanId,
200
- type: "agent",
201
- parentSpanId,
202
- startTime,
203
- metadata: (0, context_1.mergeWithContext)({
204
- agentName,
205
- ...(this.options.captureFullPayloads ? { input } : {}),
206
- }),
207
- });
270
+ // Create span via API (skip in offline mode)
271
+ if (!this.options.offline) {
272
+ await this.client.traces.createSpan(this.currentWorkflow.traceId, {
273
+ name: `Agent: ${agentName}`,
274
+ spanId,
275
+ type: "agent",
276
+ parentSpanId,
277
+ startTime,
278
+ metadata: (0, context_1.mergeWithContext)({
279
+ agentName,
280
+ ...(this.options.captureFullPayloads ? { input } : {}),
281
+ }),
282
+ });
283
+ }
208
284
  this.log("Started agent span", { agentName, spanId });
209
285
  return spanContext;
210
286
  }
@@ -217,21 +293,23 @@ class WorkflowTracer {
217
293
  }
218
294
  const endTime = new Date().toISOString();
219
295
  const durationMs = new Date(endTime).getTime() - new Date(span.startTime).getTime();
220
- // Update span via API (create completion record)
221
- await this.client.traces.createSpan(this.currentWorkflow.traceId, {
222
- name: `Agent: ${span.agentName}`,
223
- spanId: `${span.spanId}-end`,
224
- type: "agent",
225
- parentSpanId: span.spanId,
226
- startTime: span.startTime,
227
- endTime,
228
- durationMs,
229
- metadata: (0, context_1.mergeWithContext)({
230
- agentName: span.agentName,
231
- ...(this.options.captureFullPayloads ? { output } : {}),
232
- ...(error ? { error } : {}),
233
- }),
234
- });
296
+ // Update span via API (skip in offline mode)
297
+ if (!this.options.offline) {
298
+ await this.client.traces.createSpan(this.currentWorkflow.traceId, {
299
+ name: `Agent: ${span.agentName}`,
300
+ spanId: `${span.spanId}-end`,
301
+ type: "agent",
302
+ parentSpanId: span.spanId,
303
+ startTime: span.startTime,
304
+ endTime,
305
+ durationMs,
306
+ metadata: (0, context_1.mergeWithContext)({
307
+ agentName: span.agentName,
308
+ ...(this.options.captureFullPayloads ? { output } : {}),
309
+ ...(error ? { error } : {}),
310
+ }),
311
+ });
312
+ }
235
313
  this.activeSpans.delete(span.spanId);
236
314
  this.log("Ended agent span", {
237
315
  agentName: span.agentName,
@@ -267,22 +345,24 @@ class WorkflowTracer {
267
345
  timestamp: new Date().toISOString(),
268
346
  };
269
347
  this.handoffs.push(handoff);
270
- // Also create a span for the handoff
271
- const spanId = `handoff-${this.handoffs.length}-${this.generateId()}`;
272
- await this.client.traces.createSpan(this.currentWorkflow.traceId, {
273
- name: `Handoff: ${fromAgent || "start"} → ${toAgent}`,
274
- spanId,
275
- type: "handoff",
276
- startTime: handoff.timestamp,
277
- endTime: handoff.timestamp,
278
- durationMs: 0,
279
- metadata: (0, context_1.mergeWithContext)({
280
- handoffType,
281
- fromAgent,
282
- toAgent,
283
- context,
284
- }),
285
- });
348
+ // Also create a span for the handoff (skip in offline mode)
349
+ if (!this.options.offline) {
350
+ const spanId = `handoff-${this.handoffs.length}-${this.generateId()}`;
351
+ await this.client.traces.createSpan(this.currentWorkflow.traceId, {
352
+ name: `Handoff: ${fromAgent || "start"} → ${toAgent}`,
353
+ spanId,
354
+ type: "handoff",
355
+ startTime: handoff.timestamp,
356
+ endTime: handoff.timestamp,
357
+ durationMs: 0,
358
+ metadata: (0, context_1.mergeWithContext)({
359
+ handoffType,
360
+ fromAgent,
361
+ toAgent,
362
+ context,
363
+ }),
364
+ });
365
+ }
286
366
  this.log("Recorded handoff", { fromAgent, toAgent, handoffType });
287
367
  }
288
368
  // ==========================================================================
@@ -312,28 +392,30 @@ class WorkflowTracer {
312
392
  throw new Error("No active workflow. Call startWorkflow() first.");
313
393
  }
314
394
  this.decisions.push(params);
315
- // Create a span for the decision
316
- const spanId = `decision-${this.decisions.length}-${this.generateId()}`;
317
- const timestamp = new Date().toISOString();
318
- await this.client.traces.createSpan(this.currentWorkflow.traceId, {
319
- name: `Decision: ${params.agent} chose ${params.chosen}`,
320
- spanId,
321
- type: "decision",
322
- startTime: timestamp,
323
- endTime: timestamp,
324
- durationMs: 0,
325
- metadata: (0, context_1.mergeWithContext)({
326
- isDecisionPoint: true,
327
- agentName: params.agent,
328
- decisionType: params.type,
329
- chosen: params.chosen,
330
- alternatives: params.alternatives,
331
- reasoning: params.reasoning,
332
- confidence: params.confidence,
333
- contextFactors: params.contextFactors,
334
- inputContext: params.inputContext,
335
- }),
336
- });
395
+ // Create a span for the decision (skip in offline mode)
396
+ if (!this.options.offline) {
397
+ const spanId = `decision-${this.decisions.length}-${this.generateId()}`;
398
+ const timestamp = new Date().toISOString();
399
+ await this.client.traces.createSpan(this.currentWorkflow.traceId, {
400
+ name: `Decision: ${params.agent} chose ${params.chosen}`,
401
+ spanId,
402
+ type: "decision",
403
+ startTime: timestamp,
404
+ endTime: timestamp,
405
+ durationMs: 0,
406
+ metadata: (0, context_1.mergeWithContext)({
407
+ isDecisionPoint: true,
408
+ agentName: params.agent,
409
+ decisionType: params.type,
410
+ chosen: params.chosen,
411
+ alternatives: params.alternatives,
412
+ reasoning: params.reasoning,
413
+ confidence: params.confidence,
414
+ contextFactors: params.contextFactors,
415
+ inputContext: params.inputContext,
416
+ }),
417
+ });
418
+ }
337
419
  this.log("Recorded decision", {
338
420
  agent: params.agent,
339
421
  type: params.type,
@@ -375,8 +457,8 @@ class WorkflowTracer {
375
457
  totalCost: totalCost.toFixed(6),
376
458
  };
377
459
  this.costs.push(costRecord);
378
- // Also record as a span if in an active workflow
379
- if (this.currentWorkflow) {
460
+ // Also record as a span if in an active workflow (skip in offline mode)
461
+ if (this.currentWorkflow && !this.options.offline) {
380
462
  const spanId = `cost-${this.costs.length}-${this.generateId()}`;
381
463
  const timestamp = new Date().toISOString();
382
464
  await this.client.traces.createSpan(this.currentWorkflow.traceId, {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@evalgate/sdk",
3
- "version": "2.2.2",
3
+ "version": "2.2.4",
4
4
  "publishConfig": {
5
5
  "access": "public",
6
6
  "registry": "https://registry.npmjs.org/"
@@ -25,6 +25,8 @@
25
25
  "build": "tsc",
26
26
  "dev": "tsc --watch",
27
27
  "test": "vitest run",
28
+ "test:dist": "tsc && vitest run src/__tests__/dist-smoke.test.ts",
29
+ "otel:test": "bash otel-integration/run-test.sh",
28
30
  "test:watch": "vitest"
29
31
  },
30
32
  "keywords": [
@@ -113,6 +115,11 @@
113
115
  "import": "./dist/regression.js",
114
116
  "require": "./dist/regression.js",
115
117
  "types": "./dist/regression.d.ts"
118
+ },
119
+ "./otel": {
120
+ "import": "./dist/otel.js",
121
+ "require": "./dist/otel.js",
122
+ "types": "./dist/otel.d.ts"
116
123
  }
117
124
  }
118
125
  }