@pauly4010/evalai-sdk 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,715 @@
1
+ /**
2
+ * Configuration options for the AI Evaluation Platform SDK client
3
+ * Tier 1.4: TypeScript-First with Generics
4
+ */
5
+ export interface ClientConfig {
6
+ /** Your API key from the AI Evaluation Platform dashboard */
7
+ apiKey?: string;
8
+ /** Base URL for the API (default: relative URLs in browser, http://localhost:3000 in Node.js) */
9
+ baseUrl?: string;
10
+ /** Organization ID for multi-tenant setups */
11
+ organizationId?: number;
12
+ /** Request timeout in milliseconds (default: 30000) */
13
+ timeout?: number;
14
+ /** Debug mode - enables request/response logging (default: false) */
15
+ debug?: boolean;
16
+ /** Log level for debug mode (default: 'info') */
17
+ logLevel?: 'trace' | 'debug' | 'info' | 'warn' | 'error';
18
+ /** Retry configuration */
19
+ retry?: {
20
+ /** Maximum retry attempts (default: 3) */
21
+ maxAttempts?: number;
22
+ /** Backoff strategy (default: 'exponential') */
23
+ backoff?: 'exponential' | 'linear' | 'fixed';
24
+ /** Retryable error codes */
25
+ retryableErrors?: string[];
26
+ };
27
+ /** Enable request caching for GET requests (default: true) */
28
+ enableCaching?: boolean;
29
+ /** Maximum cache size in entries (default: 1000) */
30
+ cacheSize?: number;
31
+ /** Enable request batching (default: true) */
32
+ enableBatching?: boolean;
33
+ /** Maximum batch size (default: 10) */
34
+ batchSize?: number;
35
+ /** Batch delay in milliseconds (default: 50) */
36
+ batchDelay?: number;
37
+ /** Enable HTTP keep-alive for connection pooling (default: true) */
38
+ keepAlive?: boolean;
39
+ }
40
+ /**
41
+ * Evaluation template categories
42
+ * Updated with new template types for comprehensive LLM testing
43
+ */
44
+ export declare const EvaluationTemplates: {
45
+ readonly UNIT_TESTING: "unit-testing";
46
+ readonly OUTPUT_QUALITY: "output-quality";
47
+ readonly PROMPT_OPTIMIZATION: "prompt-optimization";
48
+ readonly CHAIN_OF_THOUGHT: "chain-of-thought";
49
+ readonly LONG_CONTEXT_TESTING: "long-context-testing";
50
+ readonly MODEL_STEERING: "model-steering";
51
+ readonly REGRESSION_TESTING: "regression-testing";
52
+ readonly CONFIDENCE_CALIBRATION: "confidence-calibration";
53
+ readonly SAFETY_COMPLIANCE: "safety-compliance";
54
+ readonly RAG_EVALUATION: "rag-evaluation";
55
+ readonly CODE_GENERATION: "code-generation";
56
+ readonly SUMMARIZATION: "summarization";
57
+ };
58
+ export type EvaluationTemplateType = typeof EvaluationTemplates[keyof typeof EvaluationTemplates];
59
+ /**
60
+ * Feature usage limits for per-organization quotas
61
+ */
62
+ export interface FeatureUsage {
63
+ /** Feature ID (e.g., 'traces_per_project', 'evals_per_project') */
64
+ feature_id: string;
65
+ /** Whether the feature has unlimited usage */
66
+ unlimited: boolean;
67
+ /** Billing interval (month, year, etc.) */
68
+ interval: string;
69
+ /** Remaining balance */
70
+ balance: number;
71
+ /** Current usage amount */
72
+ usage: number;
73
+ /** Total included usage allowance */
74
+ included_usage: number;
75
+ /** When the usage resets */
76
+ next_reset_at: number;
77
+ }
78
+ /**
79
+ * Organization resource limits
80
+ */
81
+ export interface OrganizationLimits {
82
+ /** Traces per organization limit */
83
+ traces_per_organization?: FeatureUsage;
84
+ /** Evaluations per organization limit */
85
+ evals_per_organization?: FeatureUsage;
86
+ /** Annotations per organization limit */
87
+ annotations_per_organization?: FeatureUsage;
88
+ }
89
+ /**
90
+ * Trace object representing a single execution trace
91
+ * Generic metadata support for type safety
92
+ */
93
+ export interface Trace<TMetadata = Record<string, any>> {
94
+ id: number;
95
+ name: string;
96
+ traceId: string;
97
+ organizationId: number;
98
+ status: 'pending' | 'success' | 'error';
99
+ durationMs: number | null;
100
+ metadata: TMetadata | null;
101
+ createdAt: string;
102
+ }
103
+ /**
104
+ * Parameters for creating a new trace
105
+ */
106
+ export interface CreateTraceParams<TMetadata = Record<string, any>> {
107
+ name: string;
108
+ traceId: string;
109
+ organizationId?: number;
110
+ status?: 'pending' | 'success' | 'error';
111
+ durationMs?: number;
112
+ metadata?: TMetadata;
113
+ }
114
+ /**
115
+ * Parameters for listing traces
116
+ */
117
+ export interface ListTracesParams {
118
+ limit?: number;
119
+ offset?: number;
120
+ organizationId?: number;
121
+ status?: 'pending' | 'success' | 'error';
122
+ search?: string;
123
+ }
124
+ /**
125
+ * Span object representing a sub-operation within a trace
126
+ */
127
+ export interface Span<TMetadata = Record<string, any>> {
128
+ id: number;
129
+ traceId: number;
130
+ name: string;
131
+ spanId: string;
132
+ parentSpanId: string | null;
133
+ startTime: string;
134
+ endTime: string | null;
135
+ durationMs: number | null;
136
+ metadata: TMetadata | null;
137
+ createdAt: string;
138
+ }
139
+ /**
140
+ * Parameters for creating a span
141
+ */
142
+ export interface CreateSpanParams<TMetadata = Record<string, any>> {
143
+ name: string;
144
+ spanId: string;
145
+ parentSpanId?: string;
146
+ startTime: string;
147
+ endTime?: string;
148
+ durationMs?: number;
149
+ metadata?: TMetadata;
150
+ }
151
+ /**
152
+ * Evaluation object representing a test evaluation
153
+ */
154
+ export interface Evaluation<TMetadata = Record<string, any>> {
155
+ id: number;
156
+ name: string;
157
+ description: string | null;
158
+ type: string;
159
+ status: 'draft' | 'active' | 'archived';
160
+ organizationId: number;
161
+ createdBy: number;
162
+ createdAt: string;
163
+ updatedAt: string;
164
+ metadata?: TMetadata | null;
165
+ }
166
+ /**
167
+ * Parameters for creating a new evaluation
168
+ */
169
+ export interface CreateEvaluationParams {
170
+ name: string;
171
+ description?: string;
172
+ type: string;
173
+ organizationId?: number;
174
+ createdBy: number;
175
+ status?: 'draft' | 'active' | 'archived';
176
+ }
177
+ /**
178
+ * Parameters for updating an evaluation
179
+ */
180
+ export interface UpdateEvaluationParams {
181
+ name?: string;
182
+ description?: string;
183
+ type?: string;
184
+ status?: 'draft' | 'active' | 'archived';
185
+ }
186
+ /**
187
+ * Parameters for listing evaluations
188
+ */
189
+ export interface ListEvaluationsParams {
190
+ limit?: number;
191
+ offset?: number;
192
+ organizationId?: number;
193
+ type?: string;
194
+ status?: 'draft' | 'active' | 'archived';
195
+ search?: string;
196
+ }
197
+ /**
198
+ * Test case for an evaluation
199
+ */
200
+ export interface TestCase {
201
+ id: number;
202
+ evaluationId: number;
203
+ input: string;
204
+ expectedOutput: string | null;
205
+ metadata: Record<string, any> | null;
206
+ createdAt: string;
207
+ }
208
+ /**
209
+ * Parameters for creating a test case
210
+ */
211
+ export interface CreateTestCaseParams {
212
+ input: string;
213
+ expectedOutput?: string;
214
+ metadata?: Record<string, any>;
215
+ }
216
+ /**
217
+ * Evaluation run
218
+ */
219
+ export interface EvaluationRun {
220
+ id: number;
221
+ evaluationId: number;
222
+ status: 'pending' | 'running' | 'completed' | 'failed';
223
+ results: Record<string, any> | null;
224
+ createdAt: string;
225
+ completedAt: string | null;
226
+ }
227
+ /**
228
+ * Parameters for creating an evaluation run
229
+ */
230
+ export interface CreateRunParams {
231
+ status?: 'pending' | 'running' | 'completed' | 'failed';
232
+ results?: Record<string, any>;
233
+ }
234
+ /**
235
+ * LLM Judge evaluation result
236
+ */
237
+ export interface LLMJudgeResult {
238
+ id: number;
239
+ configId: number;
240
+ input: string;
241
+ output: string;
242
+ score: number | null;
243
+ reasoning: string | null;
244
+ metadata: Record<string, any> | null;
245
+ createdAt: string;
246
+ }
247
+ /**
248
+ * Parameters for running an LLM judge evaluation
249
+ */
250
+ export interface RunLLMJudgeParams {
251
+ configId: number;
252
+ input: string;
253
+ output: string;
254
+ score?: number;
255
+ reasoning?: string;
256
+ metadata?: Record<string, any>;
257
+ }
258
+ /**
259
+ * SDK Error class with additional error details
260
+ *
261
+ * Common error codes:
262
+ * - MISSING_API_KEY: API key not provided
263
+ * - MISSING_ORGANIZATION_ID: Organization ID not provided
264
+ * - MISSING_REQUIRED_FIELDS: Required parameters missing
265
+ * - INVALID_ID: Invalid ID format
266
+ * - NOT_FOUND: Resource not found
267
+ * - UNAUTHORIZED: Authentication required
268
+ * - FORBIDDEN: Access forbidden
269
+ * - RATE_LIMIT_EXCEEDED: Rate limit exceeded
270
+ * - TIMEOUT: Request timed out
271
+ * - NETWORK_ERROR: Network connectivity issue
272
+ * - VALIDATION_ERROR: Request validation failed
273
+ * - INTERNAL_SERVER_ERROR: Server error
274
+ * - FEATURE_LIMIT_REACHED: Feature usage limit reached
275
+ * - UNKNOWN_ERROR: Unknown error occurred
276
+ */
277
+ export declare class SDKError extends Error {
278
+ code: string;
279
+ statusCode: number;
280
+ details?: any;
281
+ documentation?: string;
282
+ solutions?: string[];
283
+ retryable?: boolean;
284
+ retryAfter?: number;
285
+ constructor(message: string, code: string, statusCode: number, details?: any);
286
+ }
287
+ export type AIEvalConfig = ClientConfig;
288
+ export type TraceData<TMetadata = any> = Trace<TMetadata>;
289
+ export type SpanData<TMetadata = any> = Span<TMetadata>;
290
+ export type EvaluationData<TMetadata = any> = Evaluation<TMetadata>;
291
+ export type LLMJudgeData = LLMJudgeResult;
292
+ export type AnnotationData = any;
293
+ export interface RetryConfig {
294
+ maxAttempts?: number;
295
+ backoff?: 'exponential' | 'linear' | 'fixed';
296
+ retryableErrors?: string[];
297
+ }
298
+ export interface GenericMetadata {
299
+ [key: string]: any;
300
+ }
301
+ export interface TracedResponse<T> {
302
+ data: T;
303
+ traceId?: string;
304
+ metadata?: GenericMetadata;
305
+ }
306
+ export interface TestResult {
307
+ passed: boolean;
308
+ message?: string;
309
+ expected?: any;
310
+ actual?: any;
311
+ metadata?: GenericMetadata;
312
+ }
313
+ export interface SnapshotData {
314
+ id: string;
315
+ name: string;
316
+ data: any;
317
+ metadata?: GenericMetadata;
318
+ createdAt: string;
319
+ updatedAt: string;
320
+ }
321
+ export interface ExportOptions {
322
+ format: 'json' | 'csv' | 'jsonl';
323
+ includeTraces?: boolean;
324
+ includeEvaluations?: boolean;
325
+ includeTestCases?: boolean;
326
+ includeRuns?: boolean;
327
+ dateRange?: {
328
+ from: string;
329
+ to: string;
330
+ };
331
+ organizationId?: number;
332
+ limit?: number;
333
+ }
334
+ export interface ImportOptions {
335
+ organizationId?: number;
336
+ createdBy?: number;
337
+ skipDuplicates?: boolean;
338
+ dryRun?: boolean;
339
+ }
340
+ export interface StreamOptions {
341
+ onData: (data: any) => void;
342
+ onError?: (error: Error) => void;
343
+ onComplete?: () => void;
344
+ signal?: AbortSignal;
345
+ }
346
+ export interface BatchOptions {
347
+ batchSize?: number;
348
+ concurrency?: number;
349
+ onProgress?: (progress: {
350
+ processed: number;
351
+ total: number;
352
+ }) => void;
353
+ signal?: AbortSignal;
354
+ }
355
+ export type ExportFormat = 'json' | 'csv' | 'jsonl';
356
+ /**
357
+ * Annotation object representing human feedback
358
+ */
359
+ export interface Annotation {
360
+ id: number;
361
+ evaluationRunId: number;
362
+ testCaseId: number;
363
+ annotatorId: string;
364
+ rating: number | null;
365
+ feedback: string | null;
366
+ labels: Record<string, any>;
367
+ metadata: Record<string, any>;
368
+ createdAt: string;
369
+ annotator?: {
370
+ id: string;
371
+ name: string;
372
+ email: string;
373
+ };
374
+ testCase?: {
375
+ name: string;
376
+ };
377
+ }
378
+ /**
379
+ * Parameters for creating an annotation
380
+ */
381
+ export interface CreateAnnotationParams {
382
+ evaluationRunId: number;
383
+ testCaseId: number;
384
+ rating?: number;
385
+ feedback?: string;
386
+ labels?: Record<string, any>;
387
+ metadata?: Record<string, any>;
388
+ }
389
+ /**
390
+ * Parameters for listing annotations
391
+ */
392
+ export interface ListAnnotationsParams {
393
+ evaluationRunId?: number;
394
+ testCaseId?: number;
395
+ limit?: number;
396
+ offset?: number;
397
+ }
398
+ /**
399
+ * Annotation task object
400
+ */
401
+ export interface AnnotationTask {
402
+ id: number;
403
+ name: string;
404
+ description: string | null;
405
+ instructions: string | null;
406
+ type: string;
407
+ status: 'pending' | 'in_progress' | 'completed' | 'archived';
408
+ organizationId: number;
409
+ annotationSettings: Record<string, any>;
410
+ createdAt: string;
411
+ updatedAt: string;
412
+ }
413
+ /**
414
+ * Parameters for creating an annotation task
415
+ */
416
+ export interface CreateAnnotationTaskParams {
417
+ name: string;
418
+ description?: string;
419
+ instructions?: string;
420
+ type: string;
421
+ organizationId: number;
422
+ annotationSettings?: Record<string, any>;
423
+ }
424
+ /**
425
+ * Parameters for listing annotation tasks
426
+ */
427
+ export interface ListAnnotationTasksParams {
428
+ organizationId?: number;
429
+ status?: 'pending' | 'in_progress' | 'completed' | 'archived';
430
+ limit?: number;
431
+ offset?: number;
432
+ }
433
+ /**
434
+ * Annotation item object
435
+ */
436
+ export interface AnnotationItem {
437
+ id: number;
438
+ taskId: number;
439
+ content: string;
440
+ annotation: any | null;
441
+ annotatedBy: string | null;
442
+ annotatedAt: string | null;
443
+ createdAt: string;
444
+ }
445
+ /**
446
+ * Parameters for creating an annotation item
447
+ */
448
+ export interface CreateAnnotationItemParams {
449
+ content: string;
450
+ annotation?: any;
451
+ annotatedBy?: string;
452
+ annotatedAt?: string;
453
+ }
454
+ /**
455
+ * Parameters for listing annotation items
456
+ */
457
+ export interface ListAnnotationItemsParams {
458
+ limit?: number;
459
+ offset?: number;
460
+ }
461
+ /**
462
+ * API Key object
463
+ */
464
+ export interface APIKey {
465
+ id: number;
466
+ userId: string;
467
+ organizationId: number;
468
+ keyPrefix: string;
469
+ name: string;
470
+ scopes: string[];
471
+ lastUsedAt: string | null;
472
+ expiresAt: string | null;
473
+ revokedAt: string | null;
474
+ createdAt: string;
475
+ }
476
+ /**
477
+ * API Key with full key (only returned on creation)
478
+ */
479
+ export interface APIKeyWithSecret extends APIKey {
480
+ apiKey: string;
481
+ }
482
+ /**
483
+ * Parameters for creating an API key
484
+ */
485
+ export interface CreateAPIKeyParams {
486
+ name: string;
487
+ organizationId: number;
488
+ scopes: string[];
489
+ expiresAt?: string;
490
+ }
491
+ /**
492
+ * Parameters for updating an API key
493
+ */
494
+ export interface UpdateAPIKeyParams {
495
+ name?: string;
496
+ scopes?: string[];
497
+ expiresAt?: string;
498
+ }
499
+ /**
500
+ * Parameters for listing API keys
501
+ */
502
+ export interface ListAPIKeysParams {
503
+ organizationId?: number;
504
+ limit?: number;
505
+ offset?: number;
506
+ }
507
+ /**
508
+ * API Key usage statistics
509
+ */
510
+ export interface APIKeyUsage {
511
+ keyId: number;
512
+ totalRequests: number;
513
+ successfulRequests: number;
514
+ failedRequests: number;
515
+ lastUsedAt: string | null;
516
+ usageByEndpoint: Record<string, number>;
517
+ usageByDay: Array<{
518
+ date: string;
519
+ requests: number;
520
+ }>;
521
+ }
522
+ /**
523
+ * Webhook object
524
+ */
525
+ export interface Webhook {
526
+ id: number;
527
+ organizationId: number;
528
+ url: string;
529
+ events: string[];
530
+ secret: string;
531
+ status: 'active' | 'inactive';
532
+ lastTriggeredAt: string | null;
533
+ createdAt: string;
534
+ updatedAt: string;
535
+ }
536
+ /**
537
+ * Parameters for creating a webhook
538
+ */
539
+ export interface CreateWebhookParams {
540
+ organizationId: number;
541
+ url: string;
542
+ events: string[];
543
+ }
544
+ /**
545
+ * Parameters for updating a webhook
546
+ */
547
+ export interface UpdateWebhookParams {
548
+ url?: string;
549
+ events?: string[];
550
+ status?: 'active' | 'inactive';
551
+ }
552
+ /**
553
+ * Parameters for listing webhooks
554
+ */
555
+ export interface ListWebhooksParams {
556
+ organizationId: number;
557
+ status?: 'active' | 'inactive';
558
+ limit?: number;
559
+ offset?: number;
560
+ }
561
+ /**
562
+ * Webhook delivery object
563
+ */
564
+ export interface WebhookDelivery {
565
+ id: number;
566
+ webhookId: number;
567
+ event: string;
568
+ payload: Record<string, any>;
569
+ response: string | null;
570
+ statusCode: number | null;
571
+ success: boolean;
572
+ attempt: number;
573
+ createdAt: string;
574
+ }
575
+ /**
576
+ * Parameters for listing webhook deliveries
577
+ */
578
+ export interface ListWebhookDeliveriesParams {
579
+ limit?: number;
580
+ offset?: number;
581
+ success?: boolean;
582
+ }
583
+ /**
584
+ * Usage statistics
585
+ */
586
+ export interface UsageStats {
587
+ organizationId: number;
588
+ period: {
589
+ start: string;
590
+ end: string;
591
+ };
592
+ traces: {
593
+ total: number;
594
+ byStatus: Record<string, number>;
595
+ };
596
+ evaluations: {
597
+ total: number;
598
+ byType: Record<string, number>;
599
+ };
600
+ apiCalls: {
601
+ total: number;
602
+ byEndpoint: Record<string, number>;
603
+ };
604
+ }
605
+ /**
606
+ * Parameters for getting usage stats
607
+ */
608
+ export interface GetUsageParams {
609
+ organizationId: number;
610
+ startDate?: string;
611
+ endDate?: string;
612
+ }
613
+ /**
614
+ * Usage summary
615
+ */
616
+ export interface UsageSummary {
617
+ organizationId: number;
618
+ currentPeriod: {
619
+ traces: number;
620
+ evaluations: number;
621
+ annotations: number;
622
+ apiCalls: number;
623
+ };
624
+ limits: OrganizationLimits;
625
+ billingPeriod: {
626
+ start: string;
627
+ end: string;
628
+ };
629
+ }
630
+ /**
631
+ * LLM Judge configuration object
632
+ */
633
+ export interface LLMJudgeConfig {
634
+ id: number;
635
+ name: string;
636
+ description: string | null;
637
+ model: string;
638
+ rubric: string;
639
+ temperature: number;
640
+ maxTokens: number;
641
+ organizationId: number;
642
+ createdBy: number;
643
+ createdAt: string;
644
+ updatedAt: string;
645
+ }
646
+ /**
647
+ * Parameters for creating an LLM judge config
648
+ */
649
+ export interface CreateLLMJudgeConfigParams {
650
+ name: string;
651
+ description?: string;
652
+ model: string;
653
+ rubric: string;
654
+ temperature?: number;
655
+ maxTokens?: number;
656
+ organizationId: number;
657
+ createdBy: number;
658
+ }
659
+ /**
660
+ * Parameters for listing LLM judge configs
661
+ */
662
+ export interface ListLLMJudgeConfigsParams {
663
+ organizationId?: number;
664
+ limit?: number;
665
+ offset?: number;
666
+ }
667
+ /**
668
+ * Parameters for listing LLM judge results
669
+ */
670
+ export interface ListLLMJudgeResultsParams {
671
+ configId?: number;
672
+ evaluationId?: number;
673
+ limit?: number;
674
+ offset?: number;
675
+ }
676
+ /**
677
+ * LLM Judge alignment analysis
678
+ */
679
+ export interface LLMJudgeAlignment {
680
+ configId: number;
681
+ totalEvaluations: number;
682
+ averageScore: number;
683
+ alignmentMetrics: {
684
+ accuracy: number;
685
+ precision: number;
686
+ recall: number;
687
+ f1Score: number;
688
+ };
689
+ scoreDistribution: Record<string, number>;
690
+ comparisonWithHuman?: {
691
+ agreement: number;
692
+ correlation: number;
693
+ };
694
+ }
695
+ /**
696
+ * Parameters for getting alignment analysis
697
+ */
698
+ export interface GetLLMJudgeAlignmentParams {
699
+ configId: number;
700
+ startDate?: string;
701
+ endDate?: string;
702
+ }
703
+ /**
704
+ * Organization object
705
+ */
706
+ export interface Organization {
707
+ id: number;
708
+ name: string;
709
+ slug: string;
710
+ plan: string;
711
+ status: 'active' | 'suspended' | 'cancelled';
712
+ createdAt: string;
713
+ updatedAt: string;
714
+ metadata?: Record<string, any>;
715
+ }