@pauly4010/evalai-sdk 1.4.1 → 1.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/CHANGELOG.md +85 -0
  2. package/README.md +205 -543
  3. package/dist/assertions.d.ts +2 -2
  4. package/dist/assertions.js +104 -71
  5. package/dist/batch.js +12 -17
  6. package/dist/cache.js +7 -11
  7. package/dist/cli/api.d.ts +108 -0
  8. package/dist/cli/api.js +130 -0
  9. package/dist/cli/check.d.ts +28 -13
  10. package/dist/cli/check.js +249 -142
  11. package/dist/cli/ci-context.d.ts +6 -0
  12. package/dist/cli/ci-context.js +110 -0
  13. package/dist/cli/config.d.ts +30 -0
  14. package/dist/cli/config.js +207 -0
  15. package/dist/cli/constants.d.ts +15 -0
  16. package/dist/cli/constants.js +18 -0
  17. package/dist/cli/doctor.d.ts +11 -0
  18. package/dist/cli/doctor.js +82 -0
  19. package/dist/cli/formatters/github.d.ts +8 -0
  20. package/dist/cli/formatters/github.js +130 -0
  21. package/dist/cli/formatters/human.d.ts +6 -0
  22. package/dist/cli/formatters/human.js +107 -0
  23. package/dist/cli/formatters/json.d.ts +6 -0
  24. package/dist/cli/formatters/json.js +10 -0
  25. package/dist/cli/formatters/pr-comment.d.ts +12 -0
  26. package/dist/cli/formatters/pr-comment.js +101 -0
  27. package/dist/cli/formatters/types.d.ts +100 -0
  28. package/dist/cli/formatters/types.js +5 -0
  29. package/dist/cli/gate.d.ts +21 -0
  30. package/dist/cli/gate.js +175 -0
  31. package/dist/cli/index.d.ts +1 -0
  32. package/dist/cli/index.js +67 -23
  33. package/dist/cli/init.d.ts +7 -0
  34. package/dist/cli/init.js +69 -0
  35. package/dist/cli/policy-packs.d.ts +23 -0
  36. package/dist/cli/policy-packs.js +83 -0
  37. package/dist/cli/profiles.d.ts +28 -0
  38. package/dist/cli/profiles.js +30 -0
  39. package/dist/cli/reason-codes.d.ts +17 -0
  40. package/dist/cli/reason-codes.js +19 -0
  41. package/dist/cli/render/snippet.d.ts +5 -0
  42. package/dist/cli/render/snippet.js +15 -0
  43. package/dist/cli/render/sort.d.ts +10 -0
  44. package/dist/cli/render/sort.js +24 -0
  45. package/dist/cli/report/build-check-report.d.ts +19 -0
  46. package/dist/cli/report/build-check-report.js +124 -0
  47. package/dist/cli/share.d.ts +17 -0
  48. package/dist/cli/share.js +83 -0
  49. package/dist/client.d.ts +2 -2
  50. package/dist/client.js +144 -132
  51. package/dist/context.d.ts +1 -1
  52. package/dist/context.js +4 -6
  53. package/dist/errors.d.ts +2 -0
  54. package/dist/errors.js +116 -107
  55. package/dist/export.d.ts +6 -6
  56. package/dist/export.js +39 -33
  57. package/dist/index.d.ts +25 -24
  58. package/dist/index.js +62 -56
  59. package/dist/integrations/anthropic.d.ts +1 -1
  60. package/dist/integrations/anthropic.js +23 -19
  61. package/dist/integrations/openai-eval.d.ts +57 -0
  62. package/dist/integrations/openai-eval.js +230 -0
  63. package/dist/integrations/openai.d.ts +1 -1
  64. package/dist/integrations/openai.js +23 -19
  65. package/dist/local.d.ts +2 -2
  66. package/dist/local.js +25 -25
  67. package/dist/logger.d.ts +1 -1
  68. package/dist/logger.js +24 -28
  69. package/dist/matchers/index.d.ts +1 -0
  70. package/dist/matchers/index.js +6 -0
  71. package/dist/matchers/to-pass-gate.d.ts +29 -0
  72. package/dist/matchers/to-pass-gate.js +35 -0
  73. package/dist/pagination.d.ts +1 -1
  74. package/dist/pagination.js +6 -6
  75. package/dist/snapshot.js +24 -24
  76. package/dist/streaming.js +11 -11
  77. package/dist/testing.d.ts +6 -2
  78. package/dist/testing.js +30 -12
  79. package/dist/types.d.ts +22 -22
  80. package/dist/types.js +13 -13
  81. package/dist/utils/input-hash.d.ts +8 -0
  82. package/dist/utils/input-hash.js +38 -0
  83. package/dist/version.d.ts +7 -0
  84. package/dist/version.js +10 -0
  85. package/dist/workflows.d.ts +7 -7
  86. package/dist/workflows.js +44 -44
  87. package/package.json +102 -90
  88. package/dist/__tests__/assertions.test.d.ts +0 -1
  89. package/dist/__tests__/assertions.test.js +0 -288
  90. package/dist/__tests__/client.test.d.ts +0 -1
  91. package/dist/__tests__/client.test.js +0 -185
  92. package/dist/__tests__/testing.test.d.ts +0 -1
  93. package/dist/__tests__/testing.test.js +0 -230
  94. package/dist/__tests__/workflows.test.d.ts +0 -1
  95. package/dist/__tests__/workflows.test.js +0 -222
package/dist/types.js CHANGED
@@ -7,21 +7,21 @@ exports.SDKError = exports.EvaluationTemplates = void 0;
7
7
  */
8
8
  exports.EvaluationTemplates = {
9
9
  // Core Testing
10
- UNIT_TESTING: 'unit-testing',
11
- OUTPUT_QUALITY: 'output-quality',
10
+ UNIT_TESTING: "unit-testing",
11
+ OUTPUT_QUALITY: "output-quality",
12
12
  // Advanced Evaluation
13
- PROMPT_OPTIMIZATION: 'prompt-optimization',
14
- CHAIN_OF_THOUGHT: 'chain-of-thought',
15
- LONG_CONTEXT_TESTING: 'long-context-testing',
16
- MODEL_STEERING: 'model-steering',
17
- REGRESSION_TESTING: 'regression-testing',
18
- CONFIDENCE_CALIBRATION: 'confidence-calibration',
13
+ PROMPT_OPTIMIZATION: "prompt-optimization",
14
+ CHAIN_OF_THOUGHT: "chain-of-thought",
15
+ LONG_CONTEXT_TESTING: "long-context-testing",
16
+ MODEL_STEERING: "model-steering",
17
+ REGRESSION_TESTING: "regression-testing",
18
+ CONFIDENCE_CALIBRATION: "confidence-calibration",
19
19
  // Safety & Compliance
20
- SAFETY_COMPLIANCE: 'safety-compliance',
20
+ SAFETY_COMPLIANCE: "safety-compliance",
21
21
  // Domain-Specific
22
- RAG_EVALUATION: 'rag-evaluation',
23
- CODE_GENERATION: 'code-generation',
24
- SUMMARIZATION: 'summarization',
22
+ RAG_EVALUATION: "rag-evaluation",
23
+ CODE_GENERATION: "code-generation",
24
+ SUMMARIZATION: "summarization",
25
25
  };
26
26
  /**
27
27
  * SDK Error class with additional error details
@@ -45,7 +45,7 @@ exports.EvaluationTemplates = {
45
45
  class SDKError extends Error {
46
46
  constructor(message, code, statusCode, details) {
47
47
  super(message);
48
- this.name = 'SDKError';
48
+ this.name = "SDKError";
49
49
  this.code = code;
50
50
  this.statusCode = statusCode;
51
51
  this.details = details;
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Input normalization and hashing for deterministic matching.
3
+ * Must match platform's @/lib/utils/input-hash.ts for reportToEvalAI.
4
+ */
5
+ /** Normalize input for stable matching (whitespace, JSON key order). */
6
+ export declare function normalizeInput(input: string): string;
7
+ /** SHA-256 hash of normalized input. */
8
+ export declare function sha256Input(s: string): string;
@@ -0,0 +1,38 @@
1
+ "use strict";
2
+ /**
3
+ * Input normalization and hashing for deterministic matching.
4
+ * Must match platform's @/lib/utils/input-hash.ts for reportToEvalAI.
5
+ */
6
+ var __importDefault = (this && this.__importDefault) || function (mod) {
7
+ return (mod && mod.__esModule) ? mod : { "default": mod };
8
+ };
9
+ Object.defineProperty(exports, "__esModule", { value: true });
10
+ exports.normalizeInput = normalizeInput;
11
+ exports.sha256Input = sha256Input;
12
+ const node_crypto_1 = __importDefault(require("node:crypto"));
13
+ function sortKeys(obj) {
14
+ const sorted = {};
15
+ for (const k of Object.keys(obj).sort()) {
16
+ const v = obj[k];
17
+ sorted[k] =
18
+ v != null && typeof v === "object" && !Array.isArray(v)
19
+ ? sortKeys(v)
20
+ : v;
21
+ }
22
+ return sorted;
23
+ }
24
+ /** Normalize input for stable matching (whitespace, JSON key order). */
25
+ function normalizeInput(input) {
26
+ const s = input.trim();
27
+ try {
28
+ const obj = JSON.parse(s);
29
+ return JSON.stringify(sortKeys(obj));
30
+ }
31
+ catch {
32
+ return s.replace(/\s+/g, " ");
33
+ }
34
+ }
35
+ /** SHA-256 hash of normalized input. */
36
+ function sha256Input(s) {
37
+ return node_crypto_1.default.createHash("sha256").update(normalizeInput(s), "utf8").digest("hex");
38
+ }
@@ -0,0 +1,7 @@
1
+ /**
2
+ * SDK and API spec versions for request headers.
3
+ * X-EvalAI-SDK-Version: SDK package version
4
+ * X-EvalAI-Spec-Version: OpenAPI spec version (docs/openapi.json info.version)
5
+ */
6
+ export declare const SDK_VERSION = "1.5.0";
7
+ export declare const SPEC_VERSION = "1.0.0";
@@ -0,0 +1,10 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.SPEC_VERSION = exports.SDK_VERSION = void 0;
4
+ /**
5
+ * SDK and API spec versions for request headers.
6
+ * X-EvalAI-SDK-Version: SDK package version
7
+ * X-EvalAI-Spec-Version: OpenAPI spec version (docs/openapi.json info.version)
8
+ */
9
+ exports.SDK_VERSION = "1.5.0";
10
+ exports.SPEC_VERSION = "1.0.0";
@@ -26,13 +26,13 @@
26
26
  * await tracer.endWorkflow({ resolution: 'Issue resolved' });
27
27
  * ```
28
28
  */
29
- import type { AIEvalClient } from './client';
29
+ import type { AIEvalClient } from "./client";
30
30
  /**
31
31
  * Node in a workflow DAG
32
32
  */
33
33
  export interface WorkflowNode {
34
34
  id: string;
35
- type: 'agent' | 'tool' | 'decision' | 'parallel' | 'human' | 'llm';
35
+ type: "agent" | "tool" | "decision" | "parallel" | "human" | "llm";
36
36
  name: string;
37
37
  config?: Record<string, any>;
38
38
  }
@@ -68,11 +68,11 @@ export interface WorkflowContext {
68
68
  /**
69
69
  * Workflow run status
70
70
  */
71
- export type WorkflowStatus = 'running' | 'completed' | 'failed' | 'cancelled';
71
+ export type WorkflowStatus = "running" | "completed" | "failed" | "cancelled";
72
72
  /**
73
73
  * Handoff types between agents
74
74
  */
75
- export type HandoffType = 'delegation' | 'escalation' | 'parallel' | 'fallback';
75
+ export type HandoffType = "delegation" | "escalation" | "parallel" | "fallback";
76
76
  /**
77
77
  * Agent handoff record
78
78
  */
@@ -95,7 +95,7 @@ export interface DecisionAlternative {
95
95
  /**
96
96
  * Decision types made by agents
97
97
  */
98
- export type DecisionType = 'action' | 'tool' | 'delegate' | 'respond' | 'route';
98
+ export type DecisionType = "action" | "tool" | "delegate" | "respond" | "route";
99
99
  /**
100
100
  * Parameters for recording a decision
101
101
  */
@@ -120,11 +120,11 @@ export interface RecordDecisionParams {
120
120
  /**
121
121
  * LLM provider names
122
122
  */
123
- export type LLMProvider = 'openai' | 'anthropic' | 'google' | 'cohere' | 'mistral' | 'custom';
123
+ export type LLMProvider = "openai" | "anthropic" | "google" | "cohere" | "mistral" | "custom";
124
124
  /**
125
125
  * Cost categories for tracking
126
126
  */
127
- export type CostCategory = 'llm' | 'tool' | 'embedding' | 'other';
127
+ export type CostCategory = "llm" | "tool" | "embedding" | "other";
128
128
  /**
129
129
  * Parameters for recording cost
130
130
  */
package/dist/workflows.js CHANGED
@@ -67,7 +67,7 @@ class WorkflowTracer {
67
67
  this.options = {
68
68
  organizationId: options.organizationId || client.getOrganizationId() || 0,
69
69
  autoCalculateCost: options.autoCalculateCost ?? true,
70
- tracePrefix: options.tracePrefix || 'workflow',
70
+ tracePrefix: options.tracePrefix || "workflow",
71
71
  captureFullPayloads: options.captureFullPayloads ?? true,
72
72
  debug: options.debug ?? false,
73
73
  };
@@ -92,7 +92,7 @@ class WorkflowTracer {
92
92
  */
93
93
  async startWorkflow(name, definition, metadata) {
94
94
  if (this.currentWorkflow) {
95
- throw new Error('A workflow is already active. Call endWorkflow() first.');
95
+ throw new Error("A workflow is already active. Call endWorkflow() first.");
96
96
  }
97
97
  const traceId = `${this.options.tracePrefix}-${Date.now()}-${this.generateId()}`;
98
98
  const startedAt = new Date().toISOString();
@@ -101,7 +101,7 @@ class WorkflowTracer {
101
101
  name: `Workflow: ${name}`,
102
102
  traceId,
103
103
  organizationId: this.options.organizationId,
104
- status: 'pending',
104
+ status: "pending",
105
105
  metadata: (0, context_1.mergeWithContext)({
106
106
  workflowName: name,
107
107
  definition,
@@ -122,22 +122,22 @@ class WorkflowTracer {
122
122
  this.costs = [];
123
123
  this.activeSpans.clear();
124
124
  this.spanCounter = 0;
125
- this.log('Started workflow', { name, traceId: trace.id });
125
+ this.log("Started workflow", { name, traceId: trace.id });
126
126
  return this.currentWorkflow;
127
127
  }
128
128
  /**
129
129
  * End the current workflow
130
130
  */
131
- async endWorkflow(output, status = 'completed') {
131
+ async endWorkflow(output, status = "completed") {
132
132
  if (!this.currentWorkflow) {
133
- throw new Error('No active workflow. Call startWorkflow() first.');
133
+ throw new Error("No active workflow. Call startWorkflow() first.");
134
134
  }
135
135
  const durationMs = Date.now() - new Date(this.currentWorkflow.startedAt).getTime();
136
136
  // Calculate total cost
137
137
  const totalCost = this.costs.reduce((sum, cost) => sum + parseFloat(cost.totalCost), 0);
138
138
  // Update the original trace with completion data
139
139
  await this.client.traces.update(this.currentWorkflow.traceId, {
140
- status: status === 'completed' ? 'success' : 'error',
140
+ status: status === "completed" ? "success" : "error",
141
141
  durationMs,
142
142
  metadata: (0, context_1.mergeWithContext)({
143
143
  workflowName: this.currentWorkflow.name,
@@ -146,14 +146,14 @@ class WorkflowTracer {
146
146
  totalCost: totalCost.toFixed(6),
147
147
  handoffCount: this.handoffs.length,
148
148
  decisionCount: this.decisions.length,
149
- agentCount: new Set(this.handoffs.map(h => h.toAgent)).size + 1,
150
- retryCount: this.costs.filter(c => c.isRetry).length,
149
+ agentCount: new Set(this.handoffs.map((h) => h.toAgent)).size + 1,
150
+ retryCount: this.costs.filter((c) => c.isRetry).length,
151
151
  handoffs: this.handoffs,
152
152
  decisions: this.decisions,
153
153
  costs: this.costs,
154
154
  }),
155
155
  });
156
- this.log('Ended workflow', {
156
+ this.log("Ended workflow", {
157
157
  name: this.currentWorkflow.name,
158
158
  status,
159
159
  durationMs,
@@ -176,7 +176,7 @@ class WorkflowTracer {
176
176
  */
177
177
  async startAgentSpan(agentName, input, parentSpanId) {
178
178
  if (!this.currentWorkflow) {
179
- throw new Error('No active workflow. Call startWorkflow() first.');
179
+ throw new Error("No active workflow. Call startWorkflow() first.");
180
180
  }
181
181
  const spanId = `span-${++this.spanCounter}-${this.generateId()}`;
182
182
  const startTime = new Date().toISOString();
@@ -199,7 +199,7 @@ class WorkflowTracer {
199
199
  ...(this.options.captureFullPayloads ? { input } : {}),
200
200
  }),
201
201
  });
202
- this.log('Started agent span', { agentName, spanId });
202
+ this.log("Started agent span", { agentName, spanId });
203
203
  return spanContext;
204
204
  }
205
205
  /**
@@ -207,7 +207,7 @@ class WorkflowTracer {
207
207
  */
208
208
  async endAgentSpan(span, output, error) {
209
209
  if (!this.currentWorkflow) {
210
- throw new Error('No active workflow.');
210
+ throw new Error("No active workflow.");
211
211
  }
212
212
  const endTime = new Date().toISOString();
213
213
  const durationMs = new Date(endTime).getTime() - new Date(span.startTime).getTime();
@@ -226,7 +226,7 @@ class WorkflowTracer {
226
226
  }),
227
227
  });
228
228
  this.activeSpans.delete(span.spanId);
229
- this.log('Ended agent span', { agentName: span.agentName, spanId: span.spanId, durationMs });
229
+ this.log("Ended agent span", { agentName: span.agentName, spanId: span.spanId, durationMs });
230
230
  }
231
231
  // ==========================================================================
232
232
  // HANDOFFS
@@ -244,9 +244,9 @@ class WorkflowTracer {
244
244
  * );
245
245
  * ```
246
246
  */
247
- async recordHandoff(fromAgent, toAgent, context, handoffType = 'delegation') {
247
+ async recordHandoff(fromAgent, toAgent, context, handoffType = "delegation") {
248
248
  if (!this.currentWorkflow) {
249
- throw new Error('No active workflow. Call startWorkflow() first.');
249
+ throw new Error("No active workflow. Call startWorkflow() first.");
250
250
  }
251
251
  const handoff = {
252
252
  fromAgent,
@@ -259,7 +259,7 @@ class WorkflowTracer {
259
259
  // Also create a span for the handoff
260
260
  const spanId = `handoff-${this.handoffs.length}-${this.generateId()}`;
261
261
  await this.client.traces.createSpan(this.currentWorkflow.traceId, {
262
- name: `Handoff: ${fromAgent || 'start'} → ${toAgent}`,
262
+ name: `Handoff: ${fromAgent || "start"} → ${toAgent}`,
263
263
  spanId,
264
264
  startTime: handoff.timestamp,
265
265
  endTime: handoff.timestamp,
@@ -271,7 +271,7 @@ class WorkflowTracer {
271
271
  context,
272
272
  }),
273
273
  });
274
- this.log('Recorded handoff', { fromAgent, toAgent, handoffType });
274
+ this.log("Recorded handoff", { fromAgent, toAgent, handoffType });
275
275
  }
276
276
  // ==========================================================================
277
277
  // DECISION AUDITING
@@ -297,7 +297,7 @@ class WorkflowTracer {
297
297
  */
298
298
  async recordDecision(params) {
299
299
  if (!this.currentWorkflow) {
300
- throw new Error('No active workflow. Call startWorkflow() first.');
300
+ throw new Error("No active workflow. Call startWorkflow() first.");
301
301
  }
302
302
  this.decisions.push(params);
303
303
  // Create a span for the decision
@@ -321,7 +321,7 @@ class WorkflowTracer {
321
321
  inputContext: params.inputContext,
322
322
  }),
323
323
  });
324
- this.log('Recorded decision', {
324
+ this.log("Recorded decision", {
325
325
  agent: params.agent,
326
326
  type: params.type,
327
327
  chosen: params.chosen,
@@ -356,7 +356,7 @@ class WorkflowTracer {
356
356
  const costRecord = {
357
357
  ...params,
358
358
  totalTokens,
359
- category: params.category || 'llm',
359
+ category: params.category || "llm",
360
360
  inputCost: inputCost.toFixed(6),
361
361
  outputCost: outputCost.toFixed(6),
362
362
  totalCost: totalCost.toFixed(6),
@@ -377,7 +377,7 @@ class WorkflowTracer {
377
377
  }),
378
378
  });
379
379
  }
380
- this.log('Recorded cost', {
380
+ this.log("Recorded cost", {
381
381
  provider: params.provider,
382
382
  model: params.model,
383
383
  totalTokens,
@@ -402,7 +402,7 @@ class WorkflowTracer {
402
402
  other: 0,
403
403
  };
404
404
  for (const cost of this.costs) {
405
- const category = cost.category || 'other';
405
+ const category = cost.category || "other";
406
406
  breakdown[category] += parseFloat(cost.totalCost);
407
407
  }
408
408
  return breakdown;
@@ -417,23 +417,23 @@ class WorkflowTracer {
417
417
  // Default pricing (can be extended with API lookup)
418
418
  const knownPricing = {
419
419
  // OpenAI
420
- 'openai/gpt-4': { inputPricePerMillion: 30.00, outputPricePerMillion: 60.00 },
421
- 'openai/gpt-4-turbo': { inputPricePerMillion: 10.00, outputPricePerMillion: 30.00 },
422
- 'openai/gpt-4o': { inputPricePerMillion: 5.00, outputPricePerMillion: 15.00 },
423
- 'openai/gpt-4o-mini': { inputPricePerMillion: 0.15, outputPricePerMillion: 0.60 },
424
- 'openai/gpt-3.5-turbo': { inputPricePerMillion: 0.50, outputPricePerMillion: 1.50 },
420
+ "openai/gpt-4": { inputPricePerMillion: 30.0, outputPricePerMillion: 60.0 },
421
+ "openai/gpt-4-turbo": { inputPricePerMillion: 10.0, outputPricePerMillion: 30.0 },
422
+ "openai/gpt-4o": { inputPricePerMillion: 5.0, outputPricePerMillion: 15.0 },
423
+ "openai/gpt-4o-mini": { inputPricePerMillion: 0.15, outputPricePerMillion: 0.6 },
424
+ "openai/gpt-3.5-turbo": { inputPricePerMillion: 0.5, outputPricePerMillion: 1.5 },
425
425
  // Anthropic
426
- 'anthropic/claude-3-opus': { inputPricePerMillion: 15.00, outputPricePerMillion: 75.00 },
427
- 'anthropic/claude-3-sonnet': { inputPricePerMillion: 3.00, outputPricePerMillion: 15.00 },
428
- 'anthropic/claude-3-haiku': { inputPricePerMillion: 0.25, outputPricePerMillion: 1.25 },
429
- 'anthropic/claude-3.5-sonnet': { inputPricePerMillion: 3.00, outputPricePerMillion: 15.00 },
426
+ "anthropic/claude-3-opus": { inputPricePerMillion: 15.0, outputPricePerMillion: 75.0 },
427
+ "anthropic/claude-3-sonnet": { inputPricePerMillion: 3.0, outputPricePerMillion: 15.0 },
428
+ "anthropic/claude-3-haiku": { inputPricePerMillion: 0.25, outputPricePerMillion: 1.25 },
429
+ "anthropic/claude-3.5-sonnet": { inputPricePerMillion: 3.0, outputPricePerMillion: 15.0 },
430
430
  // Google
431
- 'google/gemini-pro': { inputPricePerMillion: 0.50, outputPricePerMillion: 1.50 },
432
- 'google/gemini-1.5-pro': { inputPricePerMillion: 3.50, outputPricePerMillion: 10.50 },
433
- 'google/gemini-1.5-flash': { inputPricePerMillion: 0.075, outputPricePerMillion: 0.30 },
431
+ "google/gemini-pro": { inputPricePerMillion: 0.5, outputPricePerMillion: 1.5 },
432
+ "google/gemini-1.5-pro": { inputPricePerMillion: 3.5, outputPricePerMillion: 10.5 },
433
+ "google/gemini-1.5-flash": { inputPricePerMillion: 0.075, outputPricePerMillion: 0.3 },
434
434
  };
435
435
  const key = `${provider}/${model}`;
436
- return knownPricing[key] || { inputPricePerMillion: 1.00, outputPricePerMillion: 3.00 };
436
+ return knownPricing[key] || { inputPricePerMillion: 1.0, outputPricePerMillion: 3.0 };
437
437
  }
438
438
  /**
439
439
  * Generate a unique ID
@@ -446,7 +446,7 @@ class WorkflowTracer {
446
446
  */
447
447
  log(message, data) {
448
448
  if (this.options.debug) {
449
- console.log(`[WorkflowTracer] ${message}`, data || '');
449
+ console.log(`[WorkflowTracer] ${message}`, data || "");
450
450
  }
451
451
  }
452
452
  /**
@@ -498,7 +498,7 @@ exports.WorkflowTracer = WorkflowTracer;
498
498
  * ```
499
499
  */
500
500
  function traceLangChainAgent(executor, tracer, options = {}) {
501
- const agentName = options.agentName || 'LangChainAgent';
501
+ const agentName = options.agentName || "LangChainAgent";
502
502
  const originalInvoke = executor.invoke?.bind(executor);
503
503
  const originalCall = executor.call?.bind(executor);
504
504
  if (originalInvoke) {
@@ -544,7 +544,7 @@ function traceLangChainAgent(executor, tracer, options = {}) {
544
544
  * ```
545
545
  */
546
546
  function traceCrewAI(crew, tracer, options = {}) {
547
- const crewName = options.crewName || 'CrewAI';
547
+ const crewName = options.crewName || "CrewAI";
548
548
  const originalKickoff = crew.kickoff?.bind(crew);
549
549
  if (originalKickoff) {
550
550
  crew.kickoff = async (input) => {
@@ -553,12 +553,12 @@ function traceCrewAI(crew, tracer, options = {}) {
553
553
  try {
554
554
  const result = await originalKickoff(input);
555
555
  await tracer.endAgentSpan(span, { output: result });
556
- await tracer.endWorkflow({ result }, 'completed');
556
+ await tracer.endWorkflow({ result }, "completed");
557
557
  return result;
558
558
  }
559
559
  catch (error) {
560
560
  await tracer.endAgentSpan(span, undefined, error instanceof Error ? error.message : String(error));
561
- await tracer.endWorkflow({ error: error instanceof Error ? error.message : String(error) }, 'failed');
561
+ await tracer.endWorkflow({ error: error instanceof Error ? error.message : String(error) }, "failed");
562
562
  throw error;
563
563
  }
564
564
  };
@@ -576,7 +576,7 @@ function traceCrewAI(crew, tracer, options = {}) {
576
576
  * ```
577
577
  */
578
578
  function traceAutoGen(conversation, tracer, options = {}) {
579
- const conversationName = options.conversationName || 'AutoGenConversation';
579
+ const conversationName = options.conversationName || "AutoGenConversation";
580
580
  const originalInitiateChat = conversation.initiate_chat?.bind(conversation);
581
581
  if (originalInitiateChat) {
582
582
  conversation.initiate_chat = async (...args) => {
@@ -585,12 +585,12 @@ function traceAutoGen(conversation, tracer, options = {}) {
585
585
  try {
586
586
  const result = await originalInitiateChat(...args);
587
587
  await tracer.endAgentSpan(span, { output: result });
588
- await tracer.endWorkflow({ result }, 'completed');
588
+ await tracer.endWorkflow({ result }, "completed");
589
589
  return result;
590
590
  }
591
591
  catch (error) {
592
592
  await tracer.endAgentSpan(span, undefined, error instanceof Error ? error.message : String(error));
593
- await tracer.endWorkflow({ error: error instanceof Error ? error.message : String(error) }, 'failed');
593
+ await tracer.endWorkflow({ error: error instanceof Error ? error.message : String(error) }, "failed");
594
594
  throw error;
595
595
  }
596
596
  };
package/package.json CHANGED
@@ -1,90 +1,102 @@
1
- {
2
- "name": "@pauly4010/evalai-sdk",
3
- "version": "1.4.1",
4
- "description": "AI Evaluation Platform SDK - Complete API Coverage with Performance Optimizations",
5
- "main": "dist/index.js",
6
- "module": "dist/index.js",
7
- "types": "dist/index.d.ts",
8
- "sideEffects": false,
9
- "files": ["dist", "README.md", "CHANGELOG.md"],
10
- "bin": {
11
- "evalai": "./dist/cli/index.js"
12
- },
13
- "engines": {
14
- "node": ">=16.0.0"
15
- },
16
- "scripts": {
17
- "build": "tsc",
18
- "dev": "tsc --watch",
19
- "test": "vitest",
20
- "prepublishOnly": "npm run build"
21
- },
22
- "keywords": [
23
- "ai",
24
- "evaluation",
25
- "llm",
26
- "testing",
27
- "observability",
28
- "tracing",
29
- "monitoring",
30
- "annotations",
31
- "webhooks",
32
- "developer-tools",
33
- "openai",
34
- "anthropic"
35
- ],
36
- "author": "EvalAI Team",
37
- "license": "MIT",
38
- "repository": {
39
- "type": "git",
40
- "url": "git+https://github.com/pauly7610/ai-evaluation-platform.git",
41
- "directory": "src/packages/sdk"
42
- },
43
- "homepage": "https://v0-ai-evaluation-platform-nu.vercel.app",
44
- "bugs": {
45
- "url": "https://github.com/pauly7610/ai-evaluation-platform/issues"
46
- },
47
- "dependencies": {
48
- "commander": "^14.0.0"
49
- },
50
- "peerDependencies": {
51
- "openai": "^4.0.0",
52
- "@anthropic-ai/sdk": "^0.20.0"
53
- },
54
- "peerDependenciesMeta": {
55
- "openai": {
56
- "optional": true
57
- },
58
- "@anthropic-ai/sdk": {
59
- "optional": true
60
- }
61
- },
62
- "devDependencies": {
63
- "@types/node": "^20.0.0",
64
- "typescript": "^5.0.0",
65
- "vitest": "^1.0.0"
66
- },
67
- "exports": {
68
- ".": {
69
- "import": "./dist/index.js",
70
- "require": "./dist/index.js",
71
- "types": "./dist/index.d.ts"
72
- },
73
- "./assertions": {
74
- "import": "./dist/assertions.js",
75
- "types": "./dist/assertions.d.ts"
76
- },
77
- "./testing": {
78
- "import": "./dist/testing.js",
79
- "types": "./dist/testing.d.ts"
80
- },
81
- "./integrations/openai": {
82
- "import": "./dist/integrations/openai.js",
83
- "types": "./dist/integrations/openai.d.ts"
84
- },
85
- "./integrations/anthropic": {
86
- "import": "./dist/integrations/anthropic.js",
87
- "types": "./dist/integrations/anthropic.d.ts"
88
- }
89
- }
90
- }
1
+ {
2
+ "name": "@pauly4010/evalai-sdk",
3
+ "version": "1.5.5",
4
+ "description": "AI Evaluation Platform SDK - Complete API Coverage with Performance Optimizations",
5
+ "main": "dist/index.js",
6
+ "module": "dist/index.js",
7
+ "types": "dist/index.d.ts",
8
+ "sideEffects": false,
9
+ "files": [
10
+ "dist",
11
+ "README.md",
12
+ "CHANGELOG.md"
13
+ ],
14
+ "bin": {
15
+ "evalai": "./dist/cli/index.js"
16
+ },
17
+ "engines": {
18
+ "node": ">=16.0.0"
19
+ },
20
+ "scripts": {
21
+ "build": "tsc",
22
+ "dev": "tsc --watch",
23
+ "test": "vitest",
24
+ "prepublishOnly": "npm run build"
25
+ },
26
+ "keywords": [
27
+ "ai",
28
+ "evaluation",
29
+ "llm",
30
+ "testing",
31
+ "observability",
32
+ "tracing",
33
+ "monitoring",
34
+ "annotations",
35
+ "webhooks",
36
+ "developer-tools",
37
+ "openai",
38
+ "anthropic"
39
+ ],
40
+ "author": "EvalAI Team",
41
+ "license": "MIT",
42
+ "repository": {
43
+ "type": "git",
44
+ "url": "git+https://github.com/pauly7610/ai-evaluation-platform.git",
45
+ "directory": "src/packages/sdk"
46
+ },
47
+ "homepage": "https://v0-ai-evaluation-platform-nu.vercel.app",
48
+ "bugs": {
49
+ "url": "https://github.com/pauly7610/ai-evaluation-platform/issues"
50
+ },
51
+ "dependencies": {
52
+ "commander": "^14.0.0"
53
+ },
54
+ "peerDependencies": {
55
+ "openai": "^4.0.0",
56
+ "@anthropic-ai/sdk": "^0.20.0"
57
+ },
58
+ "peerDependenciesMeta": {
59
+ "openai": {
60
+ "optional": true
61
+ },
62
+ "@anthropic-ai/sdk": {
63
+ "optional": true
64
+ }
65
+ },
66
+ "devDependencies": {
67
+ "@types/node": "^20.0.0",
68
+ "typescript": "^5.0.0",
69
+ "vitest": "^1.0.0"
70
+ },
71
+ "exports": {
72
+ ".": {
73
+ "import": "./dist/index.js",
74
+ "require": "./dist/index.js",
75
+ "types": "./dist/index.d.ts"
76
+ },
77
+ "./assertions": {
78
+ "import": "./dist/assertions.js",
79
+ "types": "./dist/assertions.d.ts"
80
+ },
81
+ "./testing": {
82
+ "import": "./dist/testing.js",
83
+ "types": "./dist/testing.d.ts"
84
+ },
85
+ "./integrations/openai": {
86
+ "import": "./dist/integrations/openai.js",
87
+ "types": "./dist/integrations/openai.d.ts"
88
+ },
89
+ "./integrations/anthropic": {
90
+ "import": "./dist/integrations/anthropic.js",
91
+ "types": "./dist/integrations/anthropic.d.ts"
92
+ },
93
+ "./integrations/openai-eval": {
94
+ "import": "./dist/integrations/openai-eval.js",
95
+ "types": "./dist/integrations/openai-eval.d.ts"
96
+ },
97
+ "./matchers": {
98
+ "import": "./dist/matchers/index.js",
99
+ "types": "./dist/matchers/index.d.ts"
100
+ }
101
+ }
102
+ }
@@ -1 +0,0 @@
1
- export {};