@pauly4010/evalai-sdk 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/types.js ADDED
@@ -0,0 +1,54 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.SDKError = exports.EvaluationTemplates = void 0;
4
+ /**
5
+ * Evaluation template categories
6
+ * Updated with new template types for comprehensive LLM testing
7
+ */
8
+ exports.EvaluationTemplates = {
9
+ // Core Testing
10
+ UNIT_TESTING: 'unit-testing',
11
+ OUTPUT_QUALITY: 'output-quality',
12
+ // Advanced Evaluation
13
+ PROMPT_OPTIMIZATION: 'prompt-optimization',
14
+ CHAIN_OF_THOUGHT: 'chain-of-thought',
15
+ LONG_CONTEXT_TESTING: 'long-context-testing',
16
+ MODEL_STEERING: 'model-steering',
17
+ REGRESSION_TESTING: 'regression-testing',
18
+ CONFIDENCE_CALIBRATION: 'confidence-calibration',
19
+ // Safety & Compliance
20
+ SAFETY_COMPLIANCE: 'safety-compliance',
21
+ // Domain-Specific
22
+ RAG_EVALUATION: 'rag-evaluation',
23
+ CODE_GENERATION: 'code-generation',
24
+ SUMMARIZATION: 'summarization',
25
+ };
26
+ /**
27
+ * SDK Error class with additional error details
28
+ *
29
+ * Common error codes:
30
+ * - MISSING_API_KEY: API key not provided
31
+ * - MISSING_ORGANIZATION_ID: Organization ID not provided
32
+ * - MISSING_REQUIRED_FIELDS: Required parameters missing
33
+ * - INVALID_ID: Invalid ID format
34
+ * - NOT_FOUND: Resource not found
35
+ * - UNAUTHORIZED: Authentication required
36
+ * - FORBIDDEN: Access forbidden
37
+ * - RATE_LIMIT_EXCEEDED: Rate limit exceeded
38
+ * - TIMEOUT: Request timed out
39
+ * - NETWORK_ERROR: Network connectivity issue
40
+ * - VALIDATION_ERROR: Request validation failed
41
+ * - INTERNAL_SERVER_ERROR: Server error
42
+ * - FEATURE_LIMIT_REACHED: Feature usage limit reached
43
+ * - UNKNOWN_ERROR: Unknown error occurred
44
+ */
45
+ class SDKError extends Error {
46
+ constructor(message, code, statusCode, details) {
47
+ super(message);
48
+ this.name = 'SDKError';
49
+ this.code = code;
50
+ this.statusCode = statusCode;
51
+ this.details = details;
52
+ }
53
+ }
54
+ exports.SDKError = SDKError;
@@ -0,0 +1,378 @@
1
+ /**
2
+ * Workflow Tracer SDK
3
+ * Multi-agent workflow instrumentation, decision tracking, and cost capture
4
+ *
5
+ * @example
6
+ * ```typescript
7
+ * import { WorkflowTracer } from '@pauly4010/evalai-sdk';
8
+ *
9
+ * const tracer = new WorkflowTracer(client, { organizationId: 123 });
10
+ *
11
+ * // Start a workflow
12
+ * const workflow = await tracer.startWorkflow('Customer Support Pipeline');
13
+ *
14
+ * // Record agent spans and handoffs
15
+ * const span1 = await tracer.startAgentSpan('RouterAgent', { input: query });
16
+ * await tracer.recordDecision({
17
+ * agent: 'RouterAgent',
18
+ * chosen: 'delegate_to_technical',
19
+ * alternatives: [{ action: 'delegate_to_billing', confidence: 0.3 }],
20
+ * reasoning: 'Query contains technical keywords'
21
+ * });
22
+ * await tracer.recordHandoff('RouterAgent', 'TechnicalAgent', { issue: 'API error' });
23
+ * await tracer.endAgentSpan(span1, { result: 'delegated' });
24
+ *
25
+ * // End workflow with final output
26
+ * await tracer.endWorkflow({ resolution: 'Issue resolved' });
27
+ * ```
28
+ */
29
+ import type { AIEvalClient } from './client';
30
+ /**
31
+ * Node in a workflow DAG
32
+ */
33
+ export interface WorkflowNode {
34
+ id: string;
35
+ type: 'agent' | 'tool' | 'decision' | 'parallel' | 'human' | 'llm';
36
+ name: string;
37
+ config?: Record<string, any>;
38
+ }
39
+ /**
40
+ * Edge connecting nodes in a workflow DAG
41
+ */
42
+ export interface WorkflowEdge {
43
+ from: string;
44
+ to: string;
45
+ condition?: string;
46
+ label?: string;
47
+ }
48
+ /**
49
+ * Complete workflow definition (DAG structure)
50
+ */
51
+ export interface WorkflowDefinition {
52
+ nodes: WorkflowNode[];
53
+ edges: WorkflowEdge[];
54
+ entrypoint: string;
55
+ metadata?: Record<string, any>;
56
+ }
57
+ /**
58
+ * Active workflow context
59
+ */
60
+ export interface WorkflowContext {
61
+ id: number;
62
+ traceId: number;
63
+ name: string;
64
+ startedAt: string;
65
+ definition?: WorkflowDefinition;
66
+ metadata?: Record<string, any>;
67
+ }
68
+ /**
69
+ * Workflow run status
70
+ */
71
+ export type WorkflowStatus = 'running' | 'completed' | 'failed' | 'cancelled';
72
+ /**
73
+ * Handoff types between agents
74
+ */
75
+ export type HandoffType = 'delegation' | 'escalation' | 'parallel' | 'fallback';
76
+ /**
77
+ * Agent handoff record
78
+ */
79
+ export interface AgentHandoff {
80
+ fromAgent?: string;
81
+ toAgent: string;
82
+ handoffType: HandoffType;
83
+ context?: Record<string, any>;
84
+ timestamp: string;
85
+ }
86
+ /**
87
+ * Alternative action that was considered but not chosen
88
+ */
89
+ export interface DecisionAlternative {
90
+ action: string;
91
+ confidence: number;
92
+ reasoning?: string;
93
+ rejectedReason?: string;
94
+ }
95
+ /**
96
+ * Decision types made by agents
97
+ */
98
+ export type DecisionType = 'action' | 'tool' | 'delegate' | 'respond' | 'route';
99
+ /**
100
+ * Parameters for recording a decision
101
+ */
102
+ export interface RecordDecisionParams {
103
+ /** Name of the agent making the decision */
104
+ agent: string;
105
+ /** Type of decision */
106
+ type: DecisionType;
107
+ /** The action/tool/response that was chosen */
108
+ chosen: string;
109
+ /** Alternative options that were considered */
110
+ alternatives: DecisionAlternative[];
111
+ /** Reasoning for the choice */
112
+ reasoning?: string;
113
+ /** Confidence score 0-100 */
114
+ confidence?: number;
115
+ /** Factors that influenced the decision */
116
+ contextFactors?: string[];
117
+ /** Input context at decision time */
118
+ inputContext?: Record<string, any>;
119
+ }
120
+ /**
121
+ * LLM provider names
122
+ */
123
+ export type LLMProvider = 'openai' | 'anthropic' | 'google' | 'cohere' | 'mistral' | 'custom';
124
+ /**
125
+ * Cost categories for tracking
126
+ */
127
+ export type CostCategory = 'llm' | 'tool' | 'embedding' | 'other';
128
+ /**
129
+ * Parameters for recording cost
130
+ */
131
+ export interface RecordCostParams {
132
+ provider: LLMProvider | string;
133
+ model: string;
134
+ inputTokens: number;
135
+ outputTokens: number;
136
+ category?: CostCategory;
137
+ isRetry?: boolean;
138
+ retryNumber?: number;
139
+ }
140
+ /**
141
+ * Cost record with calculated values
142
+ */
143
+ export interface CostRecord extends RecordCostParams {
144
+ totalTokens: number;
145
+ inputCost: string;
146
+ outputCost: string;
147
+ totalCost: string;
148
+ }
149
+ /**
150
+ * Options for WorkflowTracer
151
+ */
152
+ export interface WorkflowTracerOptions {
153
+ /** Organization ID for traces */
154
+ organizationId?: number;
155
+ /** Whether to auto-calculate costs (requires provider pricing) */
156
+ autoCalculateCost?: boolean;
157
+ /** Custom trace name prefix */
158
+ tracePrefix?: string;
159
+ /** Whether to capture full input/output (may be large) */
160
+ captureFullPayloads?: boolean;
161
+ /** Debug mode */
162
+ debug?: boolean;
163
+ }
164
+ /**
165
+ * Agent span context
166
+ */
167
+ export interface AgentSpanContext {
168
+ spanId: string;
169
+ agentName: string;
170
+ startTime: string;
171
+ parentSpanId?: string;
172
+ metadata?: Record<string, any>;
173
+ }
174
+ /**
175
+ * WorkflowTracer - Instrument multi-agent workflows with tracing, decision auditing, and cost tracking
176
+ *
177
+ * @example
178
+ * ```typescript
179
+ * const tracer = new WorkflowTracer(client, { organizationId: 123 });
180
+ *
181
+ * // Simple workflow
182
+ * await tracer.startWorkflow('Data Processing Pipeline');
183
+ *
184
+ * const agentSpan = await tracer.startAgentSpan('DataAgent', { source: 'api' });
185
+ * // ... agent work ...
186
+ * await tracer.endAgentSpan(agentSpan, { processed: 100 });
187
+ *
188
+ * await tracer.endWorkflow({ status: 'success' });
189
+ * ```
190
+ */
191
+ export declare class WorkflowTracer {
192
+ private client;
193
+ private options;
194
+ private currentWorkflow;
195
+ private activeSpans;
196
+ private handoffs;
197
+ private decisions;
198
+ private costs;
199
+ private spanCounter;
200
+ constructor(client: AIEvalClient, options?: WorkflowTracerOptions);
201
+ /**
202
+ * Start a new workflow
203
+ *
204
+ * @example
205
+ * ```typescript
206
+ * const workflow = await tracer.startWorkflow('Customer Support Flow', {
207
+ * nodes: [
208
+ * { id: 'router', type: 'agent', name: 'RouterAgent' },
209
+ * { id: 'technical', type: 'agent', name: 'TechnicalAgent' },
210
+ * ],
211
+ * edges: [{ from: 'router', to: 'technical', condition: 'is_technical' }],
212
+ * entrypoint: 'router'
213
+ * });
214
+ * ```
215
+ */
216
+ startWorkflow(name: string, definition?: WorkflowDefinition, metadata?: Record<string, any>): Promise<WorkflowContext>;
217
+ /**
218
+ * End the current workflow
219
+ */
220
+ endWorkflow(output?: Record<string, any>, status?: WorkflowStatus): Promise<void>;
221
+ /**
222
+ * Start an agent span within the workflow
223
+ *
224
+ * @example
225
+ * ```typescript
226
+ * const span = await tracer.startAgentSpan('RouterAgent', {
227
+ * input: userQuery
228
+ * });
229
+ * ```
230
+ */
231
+ startAgentSpan(agentName: string, input?: Record<string, any>, parentSpanId?: string): Promise<AgentSpanContext>;
232
+ /**
233
+ * End an agent span
234
+ */
235
+ endAgentSpan(span: AgentSpanContext, output?: Record<string, any>, error?: string): Promise<void>;
236
+ /**
237
+ * Record a handoff between agents
238
+ *
239
+ * @example
240
+ * ```typescript
241
+ * await tracer.recordHandoff(
242
+ * 'RouterAgent',
243
+ * 'TechnicalAgent',
244
+ * { issueType: 'api_error', priority: 'high' },
245
+ * 'delegation'
246
+ * );
247
+ * ```
248
+ */
249
+ recordHandoff(fromAgent: string | undefined, toAgent: string, context?: Record<string, any>, handoffType?: HandoffType): Promise<void>;
250
+ /**
251
+ * Record a decision made by an agent
252
+ *
253
+ * @example
254
+ * ```typescript
255
+ * await tracer.recordDecision({
256
+ * agent: 'RouterAgent',
257
+ * type: 'route',
258
+ * chosen: 'technical_support',
259
+ * alternatives: [
260
+ * { action: 'billing_support', confidence: 0.3, reasoning: 'No billing keywords' },
261
+ * { action: 'general_support', confidence: 0.1, reasoning: 'Fallback option' }
262
+ * ],
263
+ * reasoning: 'Query contains technical terms like "API", "error", "endpoint"',
264
+ * confidence: 85,
265
+ * contextFactors: ['keyword_match', 'user_history']
266
+ * });
267
+ * ```
268
+ */
269
+ recordDecision(params: RecordDecisionParams): Promise<void>;
270
+ /**
271
+ * Record cost for an LLM call or operation
272
+ *
273
+ * @example
274
+ * ```typescript
275
+ * await tracer.recordCost({
276
+ * provider: 'openai',
277
+ * model: 'gpt-4',
278
+ * inputTokens: 500,
279
+ * outputTokens: 200,
280
+ * category: 'llm',
281
+ * isRetry: false
282
+ * });
283
+ * ```
284
+ */
285
+ recordCost(params: RecordCostParams): Promise<CostRecord>;
286
+ /**
287
+ * Get total cost for the current workflow
288
+ */
289
+ getTotalCost(): number;
290
+ /**
291
+ * Get cost breakdown by category
292
+ */
293
+ getCostBreakdown(): Record<CostCategory, number>;
294
+ /**
295
+ * Get known pricing for a model (can be extended or fetched from API)
296
+ */
297
+ private getModelPricing;
298
+ /**
299
+ * Generate a unique ID
300
+ */
301
+ private generateId;
302
+ /**
303
+ * Log if debug mode is enabled
304
+ */
305
+ private log;
306
+ /**
307
+ * Get current workflow context
308
+ */
309
+ getCurrentWorkflow(): WorkflowContext | null;
310
+ /**
311
+ * Check if a workflow is active
312
+ */
313
+ isWorkflowActive(): boolean;
314
+ /**
315
+ * Get all recorded handoffs
316
+ */
317
+ getHandoffs(): AgentHandoff[];
318
+ /**
319
+ * Get all recorded decisions
320
+ */
321
+ getDecisions(): RecordDecisionParams[];
322
+ /**
323
+ * Get all recorded costs
324
+ */
325
+ getCosts(): CostRecord[];
326
+ }
327
+ /**
328
+ * Wrap a LangChain agent for automatic workflow tracing
329
+ *
330
+ * @example
331
+ * ```typescript
332
+ * import { AgentExecutor } from 'langchain/agents';
333
+ *
334
+ * const executor = new AgentExecutor({ ... });
335
+ * const tracedExecutor = traceLangChainAgent(executor, tracer);
336
+ *
337
+ * const result = await tracedExecutor.invoke({ input: 'Hello' });
338
+ * ```
339
+ */
340
+ export declare function traceLangChainAgent(executor: any, tracer: WorkflowTracer, options?: {
341
+ agentName?: string;
342
+ }): any;
343
+ /**
344
+ * Create a traced wrapper for CrewAI crews
345
+ *
346
+ * @example
347
+ * ```typescript
348
+ * const tracedCrew = traceCrewAI(crew, tracer, {
349
+ * crewName: 'ResearchCrew'
350
+ * });
351
+ *
352
+ * const result = await tracedCrew.kickoff({ topic: 'AI Safety' });
353
+ * ```
354
+ */
355
+ export declare function traceCrewAI(crew: any, tracer: WorkflowTracer, options?: {
356
+ crewName?: string;
357
+ }): any;
358
+ /**
359
+ * Create a traced wrapper for AutoGen conversations
360
+ *
361
+ * @example
362
+ * ```typescript
363
+ * const tracedConversation = traceAutoGen(conversation, tracer, {
364
+ * conversationName: 'CodeReview'
365
+ * });
366
+ * ```
367
+ */
368
+ export declare function traceAutoGen(conversation: any, tracer: WorkflowTracer, options?: {
369
+ conversationName?: string;
370
+ }): any;
371
+ /**
372
+ * Create a workflow tracer from an existing client
373
+ */
374
+ export declare function createWorkflowTracer(client: AIEvalClient, options?: WorkflowTracerOptions): WorkflowTracer;
375
+ /**
376
+ * Helper to trace an async function as a workflow step
377
+ */
378
+ export declare function traceWorkflowStep<T>(tracer: WorkflowTracer, agentName: string, fn: () => Promise<T>, input?: Record<string, any>): Promise<T>;