@pauly4010/evalai-sdk 1.4.1 → 1.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +85 -0
- package/README.md +205 -543
- package/dist/assertions.d.ts +2 -2
- package/dist/assertions.js +104 -71
- package/dist/batch.js +12 -17
- package/dist/cache.js +7 -11
- package/dist/cli/api.d.ts +108 -0
- package/dist/cli/api.js +130 -0
- package/dist/cli/check.d.ts +28 -13
- package/dist/cli/check.js +249 -142
- package/dist/cli/ci-context.d.ts +6 -0
- package/dist/cli/ci-context.js +110 -0
- package/dist/cli/config.d.ts +30 -0
- package/dist/cli/config.js +207 -0
- package/dist/cli/constants.d.ts +15 -0
- package/dist/cli/constants.js +18 -0
- package/dist/cli/doctor.d.ts +11 -0
- package/dist/cli/doctor.js +82 -0
- package/dist/cli/formatters/github.d.ts +8 -0
- package/dist/cli/formatters/github.js +130 -0
- package/dist/cli/formatters/human.d.ts +6 -0
- package/dist/cli/formatters/human.js +107 -0
- package/dist/cli/formatters/json.d.ts +6 -0
- package/dist/cli/formatters/json.js +10 -0
- package/dist/cli/formatters/pr-comment.d.ts +12 -0
- package/dist/cli/formatters/pr-comment.js +101 -0
- package/dist/cli/formatters/types.d.ts +100 -0
- package/dist/cli/formatters/types.js +5 -0
- package/dist/cli/gate.d.ts +21 -0
- package/dist/cli/gate.js +175 -0
- package/dist/cli/index.d.ts +1 -0
- package/dist/cli/index.js +67 -23
- package/dist/cli/init.d.ts +7 -0
- package/dist/cli/init.js +69 -0
- package/dist/cli/policy-packs.d.ts +23 -0
- package/dist/cli/policy-packs.js +83 -0
- package/dist/cli/profiles.d.ts +28 -0
- package/dist/cli/profiles.js +30 -0
- package/dist/cli/reason-codes.d.ts +17 -0
- package/dist/cli/reason-codes.js +19 -0
- package/dist/cli/render/snippet.d.ts +5 -0
- package/dist/cli/render/snippet.js +15 -0
- package/dist/cli/render/sort.d.ts +10 -0
- package/dist/cli/render/sort.js +24 -0
- package/dist/cli/report/build-check-report.d.ts +19 -0
- package/dist/cli/report/build-check-report.js +124 -0
- package/dist/cli/share.d.ts +17 -0
- package/dist/cli/share.js +83 -0
- package/dist/client.d.ts +2 -2
- package/dist/client.js +144 -132
- package/dist/context.d.ts +1 -1
- package/dist/context.js +4 -6
- package/dist/errors.d.ts +2 -0
- package/dist/errors.js +116 -107
- package/dist/export.d.ts +6 -6
- package/dist/export.js +39 -33
- package/dist/index.d.ts +25 -24
- package/dist/index.js +62 -56
- package/dist/integrations/anthropic.d.ts +1 -1
- package/dist/integrations/anthropic.js +23 -19
- package/dist/integrations/openai-eval.d.ts +57 -0
- package/dist/integrations/openai-eval.js +230 -0
- package/dist/integrations/openai.d.ts +1 -1
- package/dist/integrations/openai.js +23 -19
- package/dist/local.d.ts +2 -2
- package/dist/local.js +25 -25
- package/dist/logger.d.ts +1 -1
- package/dist/logger.js +24 -28
- package/dist/matchers/index.d.ts +1 -0
- package/dist/matchers/index.js +6 -0
- package/dist/matchers/to-pass-gate.d.ts +29 -0
- package/dist/matchers/to-pass-gate.js +35 -0
- package/dist/pagination.d.ts +1 -1
- package/dist/pagination.js +6 -6
- package/dist/snapshot.js +24 -24
- package/dist/streaming.js +11 -11
- package/dist/testing.d.ts +6 -2
- package/dist/testing.js +30 -12
- package/dist/types.d.ts +22 -22
- package/dist/types.js +13 -13
- package/dist/utils/input-hash.d.ts +8 -0
- package/dist/utils/input-hash.js +38 -0
- package/dist/version.d.ts +7 -0
- package/dist/version.js +10 -0
- package/dist/workflows.d.ts +7 -7
- package/dist/workflows.js +44 -44
- package/package.json +102 -90
- package/dist/__tests__/assertions.test.d.ts +0 -1
- package/dist/__tests__/assertions.test.js +0 -288
- package/dist/__tests__/client.test.d.ts +0 -1
- package/dist/__tests__/client.test.js +0 -185
- package/dist/__tests__/testing.test.d.ts +0 -1
- package/dist/__tests__/testing.test.js +0 -230
- package/dist/__tests__/workflows.test.d.ts +0 -1
- package/dist/__tests__/workflows.test.js +0 -222
package/dist/types.js
CHANGED
|
@@ -7,21 +7,21 @@ exports.SDKError = exports.EvaluationTemplates = void 0;
|
|
|
7
7
|
*/
|
|
8
8
|
exports.EvaluationTemplates = {
|
|
9
9
|
// Core Testing
|
|
10
|
-
UNIT_TESTING:
|
|
11
|
-
OUTPUT_QUALITY:
|
|
10
|
+
UNIT_TESTING: "unit-testing",
|
|
11
|
+
OUTPUT_QUALITY: "output-quality",
|
|
12
12
|
// Advanced Evaluation
|
|
13
|
-
PROMPT_OPTIMIZATION:
|
|
14
|
-
CHAIN_OF_THOUGHT:
|
|
15
|
-
LONG_CONTEXT_TESTING:
|
|
16
|
-
MODEL_STEERING:
|
|
17
|
-
REGRESSION_TESTING:
|
|
18
|
-
CONFIDENCE_CALIBRATION:
|
|
13
|
+
PROMPT_OPTIMIZATION: "prompt-optimization",
|
|
14
|
+
CHAIN_OF_THOUGHT: "chain-of-thought",
|
|
15
|
+
LONG_CONTEXT_TESTING: "long-context-testing",
|
|
16
|
+
MODEL_STEERING: "model-steering",
|
|
17
|
+
REGRESSION_TESTING: "regression-testing",
|
|
18
|
+
CONFIDENCE_CALIBRATION: "confidence-calibration",
|
|
19
19
|
// Safety & Compliance
|
|
20
|
-
SAFETY_COMPLIANCE:
|
|
20
|
+
SAFETY_COMPLIANCE: "safety-compliance",
|
|
21
21
|
// Domain-Specific
|
|
22
|
-
RAG_EVALUATION:
|
|
23
|
-
CODE_GENERATION:
|
|
24
|
-
SUMMARIZATION:
|
|
22
|
+
RAG_EVALUATION: "rag-evaluation",
|
|
23
|
+
CODE_GENERATION: "code-generation",
|
|
24
|
+
SUMMARIZATION: "summarization",
|
|
25
25
|
};
|
|
26
26
|
/**
|
|
27
27
|
* SDK Error class with additional error details
|
|
@@ -45,7 +45,7 @@ exports.EvaluationTemplates = {
|
|
|
45
45
|
class SDKError extends Error {
|
|
46
46
|
constructor(message, code, statusCode, details) {
|
|
47
47
|
super(message);
|
|
48
|
-
this.name =
|
|
48
|
+
this.name = "SDKError";
|
|
49
49
|
this.code = code;
|
|
50
50
|
this.statusCode = statusCode;
|
|
51
51
|
this.details = details;
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Input normalization and hashing for deterministic matching.
|
|
3
|
+
* Must match platform's @/lib/utils/input-hash.ts for reportToEvalAI.
|
|
4
|
+
*/
|
|
5
|
+
/** Normalize input for stable matching (whitespace, JSON key order). */
|
|
6
|
+
export declare function normalizeInput(input: string): string;
|
|
7
|
+
/** SHA-256 hash of normalized input. */
|
|
8
|
+
export declare function sha256Input(s: string): string;
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Input normalization and hashing for deterministic matching.
|
|
4
|
+
* Must match platform's @/lib/utils/input-hash.ts for reportToEvalAI.
|
|
5
|
+
*/
|
|
6
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
7
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
8
|
+
};
|
|
9
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
10
|
+
exports.normalizeInput = normalizeInput;
|
|
11
|
+
exports.sha256Input = sha256Input;
|
|
12
|
+
const node_crypto_1 = __importDefault(require("node:crypto"));
|
|
13
|
+
function sortKeys(obj) {
|
|
14
|
+
const sorted = {};
|
|
15
|
+
for (const k of Object.keys(obj).sort()) {
|
|
16
|
+
const v = obj[k];
|
|
17
|
+
sorted[k] =
|
|
18
|
+
v != null && typeof v === "object" && !Array.isArray(v)
|
|
19
|
+
? sortKeys(v)
|
|
20
|
+
: v;
|
|
21
|
+
}
|
|
22
|
+
return sorted;
|
|
23
|
+
}
|
|
24
|
+
/** Normalize input for stable matching (whitespace, JSON key order). */
|
|
25
|
+
function normalizeInput(input) {
|
|
26
|
+
const s = input.trim();
|
|
27
|
+
try {
|
|
28
|
+
const obj = JSON.parse(s);
|
|
29
|
+
return JSON.stringify(sortKeys(obj));
|
|
30
|
+
}
|
|
31
|
+
catch {
|
|
32
|
+
return s.replace(/\s+/g, " ");
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
/** SHA-256 hash of normalized input. */
|
|
36
|
+
function sha256Input(s) {
|
|
37
|
+
return node_crypto_1.default.createHash("sha256").update(normalizeInput(s), "utf8").digest("hex");
|
|
38
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SDK and API spec versions for request headers.
|
|
3
|
+
* X-EvalAI-SDK-Version: SDK package version
|
|
4
|
+
* X-EvalAI-Spec-Version: OpenAPI spec version (docs/openapi.json info.version)
|
|
5
|
+
*/
|
|
6
|
+
export declare const SDK_VERSION = "1.5.0";
|
|
7
|
+
export declare const SPEC_VERSION = "1.0.0";
|
package/dist/version.js
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.SPEC_VERSION = exports.SDK_VERSION = void 0;
|
|
4
|
+
/**
|
|
5
|
+
* SDK and API spec versions for request headers.
|
|
6
|
+
* X-EvalAI-SDK-Version: SDK package version
|
|
7
|
+
* X-EvalAI-Spec-Version: OpenAPI spec version (docs/openapi.json info.version)
|
|
8
|
+
*/
|
|
9
|
+
exports.SDK_VERSION = "1.5.0";
|
|
10
|
+
exports.SPEC_VERSION = "1.0.0";
|
package/dist/workflows.d.ts
CHANGED
|
@@ -26,13 +26,13 @@
|
|
|
26
26
|
* await tracer.endWorkflow({ resolution: 'Issue resolved' });
|
|
27
27
|
* ```
|
|
28
28
|
*/
|
|
29
|
-
import type { AIEvalClient } from
|
|
29
|
+
import type { AIEvalClient } from "./client";
|
|
30
30
|
/**
|
|
31
31
|
* Node in a workflow DAG
|
|
32
32
|
*/
|
|
33
33
|
export interface WorkflowNode {
|
|
34
34
|
id: string;
|
|
35
|
-
type:
|
|
35
|
+
type: "agent" | "tool" | "decision" | "parallel" | "human" | "llm";
|
|
36
36
|
name: string;
|
|
37
37
|
config?: Record<string, any>;
|
|
38
38
|
}
|
|
@@ -68,11 +68,11 @@ export interface WorkflowContext {
|
|
|
68
68
|
/**
|
|
69
69
|
* Workflow run status
|
|
70
70
|
*/
|
|
71
|
-
export type WorkflowStatus =
|
|
71
|
+
export type WorkflowStatus = "running" | "completed" | "failed" | "cancelled";
|
|
72
72
|
/**
|
|
73
73
|
* Handoff types between agents
|
|
74
74
|
*/
|
|
75
|
-
export type HandoffType =
|
|
75
|
+
export type HandoffType = "delegation" | "escalation" | "parallel" | "fallback";
|
|
76
76
|
/**
|
|
77
77
|
* Agent handoff record
|
|
78
78
|
*/
|
|
@@ -95,7 +95,7 @@ export interface DecisionAlternative {
|
|
|
95
95
|
/**
|
|
96
96
|
* Decision types made by agents
|
|
97
97
|
*/
|
|
98
|
-
export type DecisionType =
|
|
98
|
+
export type DecisionType = "action" | "tool" | "delegate" | "respond" | "route";
|
|
99
99
|
/**
|
|
100
100
|
* Parameters for recording a decision
|
|
101
101
|
*/
|
|
@@ -120,11 +120,11 @@ export interface RecordDecisionParams {
|
|
|
120
120
|
/**
|
|
121
121
|
* LLM provider names
|
|
122
122
|
*/
|
|
123
|
-
export type LLMProvider =
|
|
123
|
+
export type LLMProvider = "openai" | "anthropic" | "google" | "cohere" | "mistral" | "custom";
|
|
124
124
|
/**
|
|
125
125
|
* Cost categories for tracking
|
|
126
126
|
*/
|
|
127
|
-
export type CostCategory =
|
|
127
|
+
export type CostCategory = "llm" | "tool" | "embedding" | "other";
|
|
128
128
|
/**
|
|
129
129
|
* Parameters for recording cost
|
|
130
130
|
*/
|
package/dist/workflows.js
CHANGED
|
@@ -67,7 +67,7 @@ class WorkflowTracer {
|
|
|
67
67
|
this.options = {
|
|
68
68
|
organizationId: options.organizationId || client.getOrganizationId() || 0,
|
|
69
69
|
autoCalculateCost: options.autoCalculateCost ?? true,
|
|
70
|
-
tracePrefix: options.tracePrefix ||
|
|
70
|
+
tracePrefix: options.tracePrefix || "workflow",
|
|
71
71
|
captureFullPayloads: options.captureFullPayloads ?? true,
|
|
72
72
|
debug: options.debug ?? false,
|
|
73
73
|
};
|
|
@@ -92,7 +92,7 @@ class WorkflowTracer {
|
|
|
92
92
|
*/
|
|
93
93
|
async startWorkflow(name, definition, metadata) {
|
|
94
94
|
if (this.currentWorkflow) {
|
|
95
|
-
throw new Error(
|
|
95
|
+
throw new Error("A workflow is already active. Call endWorkflow() first.");
|
|
96
96
|
}
|
|
97
97
|
const traceId = `${this.options.tracePrefix}-${Date.now()}-${this.generateId()}`;
|
|
98
98
|
const startedAt = new Date().toISOString();
|
|
@@ -101,7 +101,7 @@ class WorkflowTracer {
|
|
|
101
101
|
name: `Workflow: ${name}`,
|
|
102
102
|
traceId,
|
|
103
103
|
organizationId: this.options.organizationId,
|
|
104
|
-
status:
|
|
104
|
+
status: "pending",
|
|
105
105
|
metadata: (0, context_1.mergeWithContext)({
|
|
106
106
|
workflowName: name,
|
|
107
107
|
definition,
|
|
@@ -122,22 +122,22 @@ class WorkflowTracer {
|
|
|
122
122
|
this.costs = [];
|
|
123
123
|
this.activeSpans.clear();
|
|
124
124
|
this.spanCounter = 0;
|
|
125
|
-
this.log(
|
|
125
|
+
this.log("Started workflow", { name, traceId: trace.id });
|
|
126
126
|
return this.currentWorkflow;
|
|
127
127
|
}
|
|
128
128
|
/**
|
|
129
129
|
* End the current workflow
|
|
130
130
|
*/
|
|
131
|
-
async endWorkflow(output, status =
|
|
131
|
+
async endWorkflow(output, status = "completed") {
|
|
132
132
|
if (!this.currentWorkflow) {
|
|
133
|
-
throw new Error(
|
|
133
|
+
throw new Error("No active workflow. Call startWorkflow() first.");
|
|
134
134
|
}
|
|
135
135
|
const durationMs = Date.now() - new Date(this.currentWorkflow.startedAt).getTime();
|
|
136
136
|
// Calculate total cost
|
|
137
137
|
const totalCost = this.costs.reduce((sum, cost) => sum + parseFloat(cost.totalCost), 0);
|
|
138
138
|
// Update the original trace with completion data
|
|
139
139
|
await this.client.traces.update(this.currentWorkflow.traceId, {
|
|
140
|
-
status: status ===
|
|
140
|
+
status: status === "completed" ? "success" : "error",
|
|
141
141
|
durationMs,
|
|
142
142
|
metadata: (0, context_1.mergeWithContext)({
|
|
143
143
|
workflowName: this.currentWorkflow.name,
|
|
@@ -146,14 +146,14 @@ class WorkflowTracer {
|
|
|
146
146
|
totalCost: totalCost.toFixed(6),
|
|
147
147
|
handoffCount: this.handoffs.length,
|
|
148
148
|
decisionCount: this.decisions.length,
|
|
149
|
-
agentCount: new Set(this.handoffs.map(h => h.toAgent)).size + 1,
|
|
150
|
-
retryCount: this.costs.filter(c => c.isRetry).length,
|
|
149
|
+
agentCount: new Set(this.handoffs.map((h) => h.toAgent)).size + 1,
|
|
150
|
+
retryCount: this.costs.filter((c) => c.isRetry).length,
|
|
151
151
|
handoffs: this.handoffs,
|
|
152
152
|
decisions: this.decisions,
|
|
153
153
|
costs: this.costs,
|
|
154
154
|
}),
|
|
155
155
|
});
|
|
156
|
-
this.log(
|
|
156
|
+
this.log("Ended workflow", {
|
|
157
157
|
name: this.currentWorkflow.name,
|
|
158
158
|
status,
|
|
159
159
|
durationMs,
|
|
@@ -176,7 +176,7 @@ class WorkflowTracer {
|
|
|
176
176
|
*/
|
|
177
177
|
async startAgentSpan(agentName, input, parentSpanId) {
|
|
178
178
|
if (!this.currentWorkflow) {
|
|
179
|
-
throw new Error(
|
|
179
|
+
throw new Error("No active workflow. Call startWorkflow() first.");
|
|
180
180
|
}
|
|
181
181
|
const spanId = `span-${++this.spanCounter}-${this.generateId()}`;
|
|
182
182
|
const startTime = new Date().toISOString();
|
|
@@ -199,7 +199,7 @@ class WorkflowTracer {
|
|
|
199
199
|
...(this.options.captureFullPayloads ? { input } : {}),
|
|
200
200
|
}),
|
|
201
201
|
});
|
|
202
|
-
this.log(
|
|
202
|
+
this.log("Started agent span", { agentName, spanId });
|
|
203
203
|
return spanContext;
|
|
204
204
|
}
|
|
205
205
|
/**
|
|
@@ -207,7 +207,7 @@ class WorkflowTracer {
|
|
|
207
207
|
*/
|
|
208
208
|
async endAgentSpan(span, output, error) {
|
|
209
209
|
if (!this.currentWorkflow) {
|
|
210
|
-
throw new Error(
|
|
210
|
+
throw new Error("No active workflow.");
|
|
211
211
|
}
|
|
212
212
|
const endTime = new Date().toISOString();
|
|
213
213
|
const durationMs = new Date(endTime).getTime() - new Date(span.startTime).getTime();
|
|
@@ -226,7 +226,7 @@ class WorkflowTracer {
|
|
|
226
226
|
}),
|
|
227
227
|
});
|
|
228
228
|
this.activeSpans.delete(span.spanId);
|
|
229
|
-
this.log(
|
|
229
|
+
this.log("Ended agent span", { agentName: span.agentName, spanId: span.spanId, durationMs });
|
|
230
230
|
}
|
|
231
231
|
// ==========================================================================
|
|
232
232
|
// HANDOFFS
|
|
@@ -244,9 +244,9 @@ class WorkflowTracer {
|
|
|
244
244
|
* );
|
|
245
245
|
* ```
|
|
246
246
|
*/
|
|
247
|
-
async recordHandoff(fromAgent, toAgent, context, handoffType =
|
|
247
|
+
async recordHandoff(fromAgent, toAgent, context, handoffType = "delegation") {
|
|
248
248
|
if (!this.currentWorkflow) {
|
|
249
|
-
throw new Error(
|
|
249
|
+
throw new Error("No active workflow. Call startWorkflow() first.");
|
|
250
250
|
}
|
|
251
251
|
const handoff = {
|
|
252
252
|
fromAgent,
|
|
@@ -259,7 +259,7 @@ class WorkflowTracer {
|
|
|
259
259
|
// Also create a span for the handoff
|
|
260
260
|
const spanId = `handoff-${this.handoffs.length}-${this.generateId()}`;
|
|
261
261
|
await this.client.traces.createSpan(this.currentWorkflow.traceId, {
|
|
262
|
-
name: `Handoff: ${fromAgent ||
|
|
262
|
+
name: `Handoff: ${fromAgent || "start"} → ${toAgent}`,
|
|
263
263
|
spanId,
|
|
264
264
|
startTime: handoff.timestamp,
|
|
265
265
|
endTime: handoff.timestamp,
|
|
@@ -271,7 +271,7 @@ class WorkflowTracer {
|
|
|
271
271
|
context,
|
|
272
272
|
}),
|
|
273
273
|
});
|
|
274
|
-
this.log(
|
|
274
|
+
this.log("Recorded handoff", { fromAgent, toAgent, handoffType });
|
|
275
275
|
}
|
|
276
276
|
// ==========================================================================
|
|
277
277
|
// DECISION AUDITING
|
|
@@ -297,7 +297,7 @@ class WorkflowTracer {
|
|
|
297
297
|
*/
|
|
298
298
|
async recordDecision(params) {
|
|
299
299
|
if (!this.currentWorkflow) {
|
|
300
|
-
throw new Error(
|
|
300
|
+
throw new Error("No active workflow. Call startWorkflow() first.");
|
|
301
301
|
}
|
|
302
302
|
this.decisions.push(params);
|
|
303
303
|
// Create a span for the decision
|
|
@@ -321,7 +321,7 @@ class WorkflowTracer {
|
|
|
321
321
|
inputContext: params.inputContext,
|
|
322
322
|
}),
|
|
323
323
|
});
|
|
324
|
-
this.log(
|
|
324
|
+
this.log("Recorded decision", {
|
|
325
325
|
agent: params.agent,
|
|
326
326
|
type: params.type,
|
|
327
327
|
chosen: params.chosen,
|
|
@@ -356,7 +356,7 @@ class WorkflowTracer {
|
|
|
356
356
|
const costRecord = {
|
|
357
357
|
...params,
|
|
358
358
|
totalTokens,
|
|
359
|
-
category: params.category ||
|
|
359
|
+
category: params.category || "llm",
|
|
360
360
|
inputCost: inputCost.toFixed(6),
|
|
361
361
|
outputCost: outputCost.toFixed(6),
|
|
362
362
|
totalCost: totalCost.toFixed(6),
|
|
@@ -377,7 +377,7 @@ class WorkflowTracer {
|
|
|
377
377
|
}),
|
|
378
378
|
});
|
|
379
379
|
}
|
|
380
|
-
this.log(
|
|
380
|
+
this.log("Recorded cost", {
|
|
381
381
|
provider: params.provider,
|
|
382
382
|
model: params.model,
|
|
383
383
|
totalTokens,
|
|
@@ -402,7 +402,7 @@ class WorkflowTracer {
|
|
|
402
402
|
other: 0,
|
|
403
403
|
};
|
|
404
404
|
for (const cost of this.costs) {
|
|
405
|
-
const category = cost.category ||
|
|
405
|
+
const category = cost.category || "other";
|
|
406
406
|
breakdown[category] += parseFloat(cost.totalCost);
|
|
407
407
|
}
|
|
408
408
|
return breakdown;
|
|
@@ -417,23 +417,23 @@ class WorkflowTracer {
|
|
|
417
417
|
// Default pricing (can be extended with API lookup)
|
|
418
418
|
const knownPricing = {
|
|
419
419
|
// OpenAI
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
420
|
+
"openai/gpt-4": { inputPricePerMillion: 30.0, outputPricePerMillion: 60.0 },
|
|
421
|
+
"openai/gpt-4-turbo": { inputPricePerMillion: 10.0, outputPricePerMillion: 30.0 },
|
|
422
|
+
"openai/gpt-4o": { inputPricePerMillion: 5.0, outputPricePerMillion: 15.0 },
|
|
423
|
+
"openai/gpt-4o-mini": { inputPricePerMillion: 0.15, outputPricePerMillion: 0.6 },
|
|
424
|
+
"openai/gpt-3.5-turbo": { inputPricePerMillion: 0.5, outputPricePerMillion: 1.5 },
|
|
425
425
|
// Anthropic
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
426
|
+
"anthropic/claude-3-opus": { inputPricePerMillion: 15.0, outputPricePerMillion: 75.0 },
|
|
427
|
+
"anthropic/claude-3-sonnet": { inputPricePerMillion: 3.0, outputPricePerMillion: 15.0 },
|
|
428
|
+
"anthropic/claude-3-haiku": { inputPricePerMillion: 0.25, outputPricePerMillion: 1.25 },
|
|
429
|
+
"anthropic/claude-3.5-sonnet": { inputPricePerMillion: 3.0, outputPricePerMillion: 15.0 },
|
|
430
430
|
// Google
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
431
|
+
"google/gemini-pro": { inputPricePerMillion: 0.5, outputPricePerMillion: 1.5 },
|
|
432
|
+
"google/gemini-1.5-pro": { inputPricePerMillion: 3.5, outputPricePerMillion: 10.5 },
|
|
433
|
+
"google/gemini-1.5-flash": { inputPricePerMillion: 0.075, outputPricePerMillion: 0.3 },
|
|
434
434
|
};
|
|
435
435
|
const key = `${provider}/${model}`;
|
|
436
|
-
return knownPricing[key] || { inputPricePerMillion: 1.
|
|
436
|
+
return knownPricing[key] || { inputPricePerMillion: 1.0, outputPricePerMillion: 3.0 };
|
|
437
437
|
}
|
|
438
438
|
/**
|
|
439
439
|
* Generate a unique ID
|
|
@@ -446,7 +446,7 @@ class WorkflowTracer {
|
|
|
446
446
|
*/
|
|
447
447
|
log(message, data) {
|
|
448
448
|
if (this.options.debug) {
|
|
449
|
-
console.log(`[WorkflowTracer] ${message}`, data ||
|
|
449
|
+
console.log(`[WorkflowTracer] ${message}`, data || "");
|
|
450
450
|
}
|
|
451
451
|
}
|
|
452
452
|
/**
|
|
@@ -498,7 +498,7 @@ exports.WorkflowTracer = WorkflowTracer;
|
|
|
498
498
|
* ```
|
|
499
499
|
*/
|
|
500
500
|
function traceLangChainAgent(executor, tracer, options = {}) {
|
|
501
|
-
const agentName = options.agentName ||
|
|
501
|
+
const agentName = options.agentName || "LangChainAgent";
|
|
502
502
|
const originalInvoke = executor.invoke?.bind(executor);
|
|
503
503
|
const originalCall = executor.call?.bind(executor);
|
|
504
504
|
if (originalInvoke) {
|
|
@@ -544,7 +544,7 @@ function traceLangChainAgent(executor, tracer, options = {}) {
|
|
|
544
544
|
* ```
|
|
545
545
|
*/
|
|
546
546
|
function traceCrewAI(crew, tracer, options = {}) {
|
|
547
|
-
const crewName = options.crewName ||
|
|
547
|
+
const crewName = options.crewName || "CrewAI";
|
|
548
548
|
const originalKickoff = crew.kickoff?.bind(crew);
|
|
549
549
|
if (originalKickoff) {
|
|
550
550
|
crew.kickoff = async (input) => {
|
|
@@ -553,12 +553,12 @@ function traceCrewAI(crew, tracer, options = {}) {
|
|
|
553
553
|
try {
|
|
554
554
|
const result = await originalKickoff(input);
|
|
555
555
|
await tracer.endAgentSpan(span, { output: result });
|
|
556
|
-
await tracer.endWorkflow({ result },
|
|
556
|
+
await tracer.endWorkflow({ result }, "completed");
|
|
557
557
|
return result;
|
|
558
558
|
}
|
|
559
559
|
catch (error) {
|
|
560
560
|
await tracer.endAgentSpan(span, undefined, error instanceof Error ? error.message : String(error));
|
|
561
|
-
await tracer.endWorkflow({ error: error instanceof Error ? error.message : String(error) },
|
|
561
|
+
await tracer.endWorkflow({ error: error instanceof Error ? error.message : String(error) }, "failed");
|
|
562
562
|
throw error;
|
|
563
563
|
}
|
|
564
564
|
};
|
|
@@ -576,7 +576,7 @@ function traceCrewAI(crew, tracer, options = {}) {
|
|
|
576
576
|
* ```
|
|
577
577
|
*/
|
|
578
578
|
function traceAutoGen(conversation, tracer, options = {}) {
|
|
579
|
-
const conversationName = options.conversationName ||
|
|
579
|
+
const conversationName = options.conversationName || "AutoGenConversation";
|
|
580
580
|
const originalInitiateChat = conversation.initiate_chat?.bind(conversation);
|
|
581
581
|
if (originalInitiateChat) {
|
|
582
582
|
conversation.initiate_chat = async (...args) => {
|
|
@@ -585,12 +585,12 @@ function traceAutoGen(conversation, tracer, options = {}) {
|
|
|
585
585
|
try {
|
|
586
586
|
const result = await originalInitiateChat(...args);
|
|
587
587
|
await tracer.endAgentSpan(span, { output: result });
|
|
588
|
-
await tracer.endWorkflow({ result },
|
|
588
|
+
await tracer.endWorkflow({ result }, "completed");
|
|
589
589
|
return result;
|
|
590
590
|
}
|
|
591
591
|
catch (error) {
|
|
592
592
|
await tracer.endAgentSpan(span, undefined, error instanceof Error ? error.message : String(error));
|
|
593
|
-
await tracer.endWorkflow({ error: error instanceof Error ? error.message : String(error) },
|
|
593
|
+
await tracer.endWorkflow({ error: error instanceof Error ? error.message : String(error) }, "failed");
|
|
594
594
|
throw error;
|
|
595
595
|
}
|
|
596
596
|
};
|
package/package.json
CHANGED
|
@@ -1,90 +1,102 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "@pauly4010/evalai-sdk",
|
|
3
|
-
"version": "1.
|
|
4
|
-
"description": "AI Evaluation Platform SDK - Complete API Coverage with Performance Optimizations",
|
|
5
|
-
"main": "dist/index.js",
|
|
6
|
-
"module": "dist/index.js",
|
|
7
|
-
"types": "dist/index.d.ts",
|
|
8
|
-
"sideEffects": false,
|
|
9
|
-
"files": [
|
|
10
|
-
|
|
11
|
-
"
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
"
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
"
|
|
24
|
-
"
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
"
|
|
28
|
-
"
|
|
29
|
-
"
|
|
30
|
-
"
|
|
31
|
-
"
|
|
32
|
-
"
|
|
33
|
-
"
|
|
34
|
-
"
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
"
|
|
46
|
-
},
|
|
47
|
-
"
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
"
|
|
53
|
-
},
|
|
54
|
-
"
|
|
55
|
-
"openai":
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
"
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
"
|
|
75
|
-
"types": "./dist/
|
|
76
|
-
},
|
|
77
|
-
"./
|
|
78
|
-
"import": "./dist/
|
|
79
|
-
"types": "./dist/
|
|
80
|
-
},
|
|
81
|
-
"./
|
|
82
|
-
"import": "./dist/
|
|
83
|
-
"types": "./dist/
|
|
84
|
-
},
|
|
85
|
-
"./integrations/
|
|
86
|
-
"import": "./dist/integrations/
|
|
87
|
-
"types": "./dist/integrations/
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
|
|
1
|
+
{
|
|
2
|
+
"name": "@pauly4010/evalai-sdk",
|
|
3
|
+
"version": "1.5.5",
|
|
4
|
+
"description": "AI Evaluation Platform SDK - Complete API Coverage with Performance Optimizations",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"module": "dist/index.js",
|
|
7
|
+
"types": "dist/index.d.ts",
|
|
8
|
+
"sideEffects": false,
|
|
9
|
+
"files": [
|
|
10
|
+
"dist",
|
|
11
|
+
"README.md",
|
|
12
|
+
"CHANGELOG.md"
|
|
13
|
+
],
|
|
14
|
+
"bin": {
|
|
15
|
+
"evalai": "./dist/cli/index.js"
|
|
16
|
+
},
|
|
17
|
+
"engines": {
|
|
18
|
+
"node": ">=16.0.0"
|
|
19
|
+
},
|
|
20
|
+
"scripts": {
|
|
21
|
+
"build": "tsc",
|
|
22
|
+
"dev": "tsc --watch",
|
|
23
|
+
"test": "vitest",
|
|
24
|
+
"prepublishOnly": "npm run build"
|
|
25
|
+
},
|
|
26
|
+
"keywords": [
|
|
27
|
+
"ai",
|
|
28
|
+
"evaluation",
|
|
29
|
+
"llm",
|
|
30
|
+
"testing",
|
|
31
|
+
"observability",
|
|
32
|
+
"tracing",
|
|
33
|
+
"monitoring",
|
|
34
|
+
"annotations",
|
|
35
|
+
"webhooks",
|
|
36
|
+
"developer-tools",
|
|
37
|
+
"openai",
|
|
38
|
+
"anthropic"
|
|
39
|
+
],
|
|
40
|
+
"author": "EvalAI Team",
|
|
41
|
+
"license": "MIT",
|
|
42
|
+
"repository": {
|
|
43
|
+
"type": "git",
|
|
44
|
+
"url": "git+https://github.com/pauly7610/ai-evaluation-platform.git",
|
|
45
|
+
"directory": "src/packages/sdk"
|
|
46
|
+
},
|
|
47
|
+
"homepage": "https://v0-ai-evaluation-platform-nu.vercel.app",
|
|
48
|
+
"bugs": {
|
|
49
|
+
"url": "https://github.com/pauly7610/ai-evaluation-platform/issues"
|
|
50
|
+
},
|
|
51
|
+
"dependencies": {
|
|
52
|
+
"commander": "^14.0.0"
|
|
53
|
+
},
|
|
54
|
+
"peerDependencies": {
|
|
55
|
+
"openai": "^4.0.0",
|
|
56
|
+
"@anthropic-ai/sdk": "^0.20.0"
|
|
57
|
+
},
|
|
58
|
+
"peerDependenciesMeta": {
|
|
59
|
+
"openai": {
|
|
60
|
+
"optional": true
|
|
61
|
+
},
|
|
62
|
+
"@anthropic-ai/sdk": {
|
|
63
|
+
"optional": true
|
|
64
|
+
}
|
|
65
|
+
},
|
|
66
|
+
"devDependencies": {
|
|
67
|
+
"@types/node": "^20.0.0",
|
|
68
|
+
"typescript": "^5.0.0",
|
|
69
|
+
"vitest": "^1.0.0"
|
|
70
|
+
},
|
|
71
|
+
"exports": {
|
|
72
|
+
".": {
|
|
73
|
+
"import": "./dist/index.js",
|
|
74
|
+
"require": "./dist/index.js",
|
|
75
|
+
"types": "./dist/index.d.ts"
|
|
76
|
+
},
|
|
77
|
+
"./assertions": {
|
|
78
|
+
"import": "./dist/assertions.js",
|
|
79
|
+
"types": "./dist/assertions.d.ts"
|
|
80
|
+
},
|
|
81
|
+
"./testing": {
|
|
82
|
+
"import": "./dist/testing.js",
|
|
83
|
+
"types": "./dist/testing.d.ts"
|
|
84
|
+
},
|
|
85
|
+
"./integrations/openai": {
|
|
86
|
+
"import": "./dist/integrations/openai.js",
|
|
87
|
+
"types": "./dist/integrations/openai.d.ts"
|
|
88
|
+
},
|
|
89
|
+
"./integrations/anthropic": {
|
|
90
|
+
"import": "./dist/integrations/anthropic.js",
|
|
91
|
+
"types": "./dist/integrations/anthropic.d.ts"
|
|
92
|
+
},
|
|
93
|
+
"./integrations/openai-eval": {
|
|
94
|
+
"import": "./dist/integrations/openai-eval.js",
|
|
95
|
+
"types": "./dist/integrations/openai-eval.d.ts"
|
|
96
|
+
},
|
|
97
|
+
"./matchers": {
|
|
98
|
+
"import": "./dist/matchers/index.js",
|
|
99
|
+
"types": "./dist/matchers/index.d.ts"
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export {};
|