@pauly4010/evalai-sdk 1.3.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,215 @@
1
+ #!/usr/bin/env node
2
+ "use strict";
3
+ /**
4
+ * evalai check — CI/CD evaluation gate
5
+ *
6
+ * Usage:
7
+ * evalai check --minScore 92 --evaluationId 42
8
+ * evalai check --minScore 90 --maxDrop 5 --evaluationId 42
9
+ * evalai check --policy HIPAA --evaluationId 42
10
+ * evalai check --baseline published --evaluationId 42
11
+ *
12
+ * Flags:
13
+ * --minScore <n> Fail if quality score < n (0-100)
14
+ * --maxDrop <n> Fail if score dropped > n points from baseline
15
+ * --minN <n> Fail if total test cases < n (low sample size)
16
+ * --allowWeakEvidence If false (default), fail when evidenceLevel is 'weak'
17
+ * --policy <name> Enforce a compliance policy (e.g. HIPAA, SOC2, GDPR)
18
+ * --baseline <mode> Baseline comparison mode: "published" (default), "previous", or "production"
19
+ * --evaluationId <id> Required. The evaluation to gate on.
20
+ * --baseUrl <url> API base URL (default: EVALAI_BASE_URL or http://localhost:3000)
21
+ * --apiKey <key> API key (default: EVALAI_API_KEY env var)
22
+ *
23
+ * Exit codes:
24
+ * 0 — Gate passed
25
+ * 1 — Gate failed: score below threshold
26
+ * 2 — Gate failed: regression exceeded maxDrop
27
+ * 3 — Gate failed: policy violation
28
+ * 4 — API error / network failure
29
+ * 5 — Invalid arguments
30
+ * 6 — Gate failed: total test cases < minN
31
+ * 7 — Gate failed: weak evidence (evidenceLevel === 'weak')
32
+ *
33
+ * Environment:
34
+ * EVALAI_BASE_URL — API base URL (default: http://localhost:3000)
35
+ * EVALAI_API_KEY — API key for authentication
36
+ */
37
+ Object.defineProperty(exports, "__esModule", { value: true });
38
+ exports.EXIT = void 0;
39
+ exports.parseArgs = parseArgs;
40
+ exports.runCheck = runCheck;
41
+ // Standardized exit codes
42
+ exports.EXIT = {
43
+ PASS: 0,
44
+ SCORE_BELOW: 1,
45
+ REGRESSION: 2,
46
+ POLICY_VIOLATION: 3,
47
+ API_ERROR: 4,
48
+ BAD_ARGS: 5,
49
+ LOW_N: 6,
50
+ WEAK_EVIDENCE: 7,
51
+ };
52
+ function parseArgs(argv) {
53
+ const args = {};
54
+ for (let i = 0; i < argv.length; i++) {
55
+ const arg = argv[i];
56
+ if (arg.startsWith('--')) {
57
+ const key = arg.slice(2);
58
+ const next = argv[i + 1];
59
+ if (next !== undefined && !next.startsWith('--')) {
60
+ args[key] = next;
61
+ i++;
62
+ }
63
+ else {
64
+ args[key] = 'true'; // bare flag
65
+ }
66
+ }
67
+ }
68
+ const baseUrl = args.baseUrl || process.env.EVALAI_BASE_URL || 'http://localhost:3000';
69
+ const apiKey = args.apiKey || process.env.EVALAI_API_KEY || '';
70
+ const minScore = parseInt(args.minScore || '0');
71
+ const maxDrop = args.maxDrop ? parseInt(args.maxDrop) : undefined;
72
+ const minN = args.minN ? parseInt(args.minN) : undefined;
73
+ const allowWeakEvidence = args.allowWeakEvidence === 'true' || args.allowWeakEvidence === '1';
74
+ const evaluationId = args.evaluationId || '';
75
+ const policy = args.policy || undefined;
76
+ const baseline = (args.baseline === 'previous'
77
+ ? 'previous'
78
+ : args.baseline === 'production'
79
+ ? 'production'
80
+ : 'published');
81
+ if (!apiKey) {
82
+ console.error('Error: --apiKey or EVALAI_API_KEY is required');
83
+ process.exit(exports.EXIT.BAD_ARGS);
84
+ }
85
+ if (!evaluationId) {
86
+ console.error('Error: --evaluationId is required');
87
+ process.exit(exports.EXIT.BAD_ARGS);
88
+ }
89
+ if (isNaN(minScore) || minScore < 0 || minScore > 100) {
90
+ console.error('Error: --minScore must be 0-100');
91
+ process.exit(exports.EXIT.BAD_ARGS);
92
+ }
93
+ if (minN !== undefined && (isNaN(minN) || minN < 1)) {
94
+ console.error('Error: --minN must be a positive number');
95
+ process.exit(exports.EXIT.BAD_ARGS);
96
+ }
97
+ return { baseUrl, apiKey, minScore, maxDrop, minN, allowWeakEvidence, evaluationId, policy, baseline };
98
+ }
99
+ async function runCheck(args) {
100
+ const headers = { Authorization: `Bearer ${args.apiKey}` };
101
+ // ── 1. Fetch latest quality score ──
102
+ const scoreUrl = `${args.baseUrl}/api/quality?evaluationId=${args.evaluationId}&action=latest&baseline=${args.baseline}`;
103
+ let scoreRes;
104
+ try {
105
+ scoreRes = await fetch(scoreUrl, { headers });
106
+ }
107
+ catch (err) {
108
+ console.error(`EvalAI gate ERROR: Network failure — ${err.message}`);
109
+ return exports.EXIT.API_ERROR;
110
+ }
111
+ if (!scoreRes.ok) {
112
+ const body = await scoreRes.text();
113
+ console.error(`EvalAI gate ERROR: API returned ${scoreRes.status} — ${body}`);
114
+ return exports.EXIT.API_ERROR;
115
+ }
116
+ const data = (await scoreRes.json());
117
+ const score = data?.score ?? 0;
118
+ const total = data?.total ?? null;
119
+ const evidenceLevel = data?.evidenceLevel ?? null;
120
+ const baselineScore = data?.baselineScore ?? null;
121
+ const regressionDelta = data?.regressionDelta ?? null;
122
+ const baselineMissing = data?.baselineMissing === true;
123
+ const breakdown = data?.breakdown ?? {};
124
+ // ── Gate: baseline missing (when baseline comparison requested) ──
125
+ if (baselineMissing && (args.baseline !== 'published' || args.maxDrop !== undefined)) {
126
+ console.error(`\n✗ FAILED: baseline (${args.baseline}) not found. ` +
127
+ `Ensure a baseline run exists (e.g. published run, previous run, or prod-tagged run).`);
128
+ return exports.EXIT.API_ERROR;
129
+ }
130
+ // ── Gate: minN (low sample size) ──
131
+ if (args.minN !== undefined && total !== null && total < args.minN) {
132
+ console.error(`\n✗ FAILED: total test cases (${total}) < minN (${args.minN})`);
133
+ return exports.EXIT.LOW_N;
134
+ }
135
+ // ── Gate: allowWeakEvidence ──
136
+ if (!args.allowWeakEvidence && evidenceLevel === 'weak') {
137
+ console.error(`\n✗ FAILED: evidence level is 'weak' (use --allowWeakEvidence to permit)`);
138
+ return exports.EXIT.WEAK_EVIDENCE;
139
+ }
140
+ // ── Print summary ──
141
+ console.log('┌─────────────────────────────────────────┐');
142
+ console.log(`│ EvalAI Quality Score: ${String(score).padStart(3)}/100 │`);
143
+ console.log('├─────────────────────────────────────────┤');
144
+ if (baselineScore !== null) {
145
+ const delta = regressionDelta ?? 0;
146
+ const arrow = delta >= 0 ? '▲' : '▼';
147
+ console.log(`│ Baseline: ${baselineScore} ${arrow} ${Math.abs(delta)} pts │`);
148
+ }
149
+ if (breakdown) {
150
+ const pct = (v) => `${Math.round((v ?? 0) * 100)}%`;
151
+ console.log(`│ Pass: ${pct(breakdown.passRate)} Safety: ${pct(breakdown.safety)} Judge: ${pct(breakdown.judge)} │`);
152
+ }
153
+ if (data?.flags && data.flags.length > 0) {
154
+ console.log(`│ Flags: ${data.flags.join(', ').padEnd(30)} │`);
155
+ }
156
+ console.log('└─────────────────────────────────────────┘');
157
+ // ── 2. Gate: minimum score ──
158
+ if (args.minScore > 0 && score < args.minScore) {
159
+ console.error(`\n✗ FAILED: score=${score} < minScore=${args.minScore}`);
160
+ return exports.EXIT.SCORE_BELOW;
161
+ }
162
+ // ── 3. Gate: maximum drop from baseline ──
163
+ if (args.maxDrop !== undefined && regressionDelta !== null && regressionDelta < -(args.maxDrop)) {
164
+ console.error(`\n✗ FAILED: score dropped ${Math.abs(regressionDelta)} pts from baseline ` +
165
+ `(max allowed: ${args.maxDrop})`);
166
+ return exports.EXIT.REGRESSION;
167
+ }
168
+ // ── 4. Gate: policy compliance ──
169
+ if (args.policy) {
170
+ const policyUrl = `${args.baseUrl}/api/quality?evaluationId=${args.evaluationId}&action=latest`;
171
+ // Check policy-specific flags
172
+ const policyFlags = (data?.flags ?? []);
173
+ // Policy mapping: each policy has a set of required conditions
174
+ const policyChecks = {
175
+ HIPAA: { requiredSafetyRate: 0.99, maxFlags: ['SAFETY_RISK'] },
176
+ SOC2: { requiredSafetyRate: 0.95, maxFlags: ['SAFETY_RISK', 'LOW_PASS_RATE'] },
177
+ GDPR: { requiredSafetyRate: 0.95, maxFlags: ['SAFETY_RISK'] },
178
+ PCI_DSS: { requiredSafetyRate: 0.99, maxFlags: ['SAFETY_RISK', 'LOW_PASS_RATE'] },
179
+ FINRA_4511: { requiredSafetyRate: 0.95, maxFlags: ['SAFETY_RISK'] },
180
+ };
181
+ const policyName = args.policy.toUpperCase();
182
+ const check = policyChecks[policyName];
183
+ if (!check) {
184
+ console.error(`\n✗ Unknown policy: ${args.policy}. Available: ${Object.keys(policyChecks).join(', ')}`);
185
+ return exports.EXIT.BAD_ARGS;
186
+ }
187
+ // Check safety rate
188
+ const safetyRate = breakdown?.safety ?? 0;
189
+ if (safetyRate < check.requiredSafetyRate) {
190
+ console.error(`\n✗ POLICY VIOLATION (${policyName}): safety rate ${Math.round(safetyRate * 100)}% < ` +
191
+ `required ${Math.round(check.requiredSafetyRate * 100)}%`);
192
+ return exports.EXIT.POLICY_VIOLATION;
193
+ }
194
+ // Check for disqualifying flags
195
+ const violations = policyFlags.filter(f => check.maxFlags.includes(f));
196
+ if (violations.length > 0) {
197
+ console.error(`\n✗ POLICY VIOLATION (${policyName}): ${violations.join(', ')}`);
198
+ return exports.EXIT.POLICY_VIOLATION;
199
+ }
200
+ console.log(`\n✓ Policy ${policyName}: COMPLIANT`);
201
+ }
202
+ console.log('\n✓ EvalAI gate PASSED');
203
+ return exports.EXIT.PASS;
204
+ }
205
+ // Main entry point
206
+ const isDirectRun = typeof require !== 'undefined' && require.main === module;
207
+ if (isDirectRun) {
208
+ const args = parseArgs(process.argv.slice(2));
209
+ runCheck(args).then((code) => {
210
+ process.exit(code);
211
+ }).catch((err) => {
212
+ console.error(`EvalAI gate ERROR: ${err.message}`);
213
+ process.exit(exports.EXIT.API_ERROR);
214
+ });
215
+ }
@@ -1,6 +1,8 @@
1
1
  #!/usr/bin/env node
2
2
  /**
3
- * CLI for AI Evaluation Platform SDK
4
- * Tier 2.6: CLI for Everything
3
+ * evalai EvalAI CLI
4
+ *
5
+ * Commands:
6
+ * evalai check — CI/CD evaluation gate (see evalai check --help)
5
7
  */
6
8
  export {};
package/dist/cli/index.js CHANGED
@@ -1,181 +1,44 @@
1
1
  #!/usr/bin/env node
2
2
  "use strict";
3
3
  /**
4
- * CLI for AI Evaluation Platform SDK
5
- * Tier 2.6: CLI for Everything
4
+ * evalai EvalAI CLI
5
+ *
6
+ * Commands:
7
+ * evalai check — CI/CD evaluation gate (see evalai check --help)
6
8
  */
7
- var __importDefault = (this && this.__importDefault) || function (mod) {
8
- return (mod && mod.__esModule) ? mod : { "default": mod };
9
- };
10
9
  Object.defineProperty(exports, "__esModule", { value: true });
11
- const commander_1 = require("commander");
12
- const promises_1 = __importDefault(require("fs/promises"));
13
- const path_1 = __importDefault(require("path"));
14
- const client_1 = require("../client");
15
- const export_1 = require("../export");
16
- const program = new commander_1.Command();
17
- program
18
- .name('evalai')
19
- .description('AI Evaluation Platform CLI')
20
- .version('1.0.0');
21
- // Initialize project
22
- program
23
- .command('init')
24
- .description('Initialize a new evaluation project')
25
- .option('-d, --dir <directory>', 'Project directory', '.')
26
- .action(async (options) => {
27
- const dir = path_1.default.resolve(options.dir);
28
- console.log('🚀 Initializing EvalAI project...');
29
- // Create directory structure
30
- await promises_1.default.mkdir(path_1.default.join(dir, '.evalai'), { recursive: true });
31
- await promises_1.default.mkdir(path_1.default.join(dir, '.evalai', 'snapshots'), { recursive: true });
32
- await promises_1.default.mkdir(path_1.default.join(dir, 'evaluations'), { recursive: true });
33
- // Create config file
34
- const config = {
35
- apiKey: process.env.EVALAI_API_KEY || '',
36
- projectId: process.env.EVALAI_PROJECT_ID || '',
37
- baseUrl: 'http://localhost:3000/api',
38
- debug: false,
39
- retry: {
40
- maxAttempts: 3,
41
- backoff: 'exponential'
42
- }
43
- };
44
- await promises_1.default.writeFile(path_1.default.join(dir, 'evalai.config.json'), JSON.stringify(config, null, 2));
45
- // Create example evaluation file
46
- const exampleEval = `import { AIEvalClient, createTestSuite, expect } from '@pauly4010/evalai-sdk'
47
-
48
- const client = AIEvalClient.init()
49
-
50
- const suite = createTestSuite('example-evaluation', {
51
- cases: [
52
- {
53
- input: 'What is 2+2?',
54
- expected: '4',
55
- name: 'simple-math'
56
- },
57
- {
58
- input: 'Explain AI in simple terms',
59
- expected: (output) => {
60
- expect(output).toContainKeywords(['artificial', 'intelligence'])
61
- expect(output).toHaveLength({ min: 50, max: 500 })
62
- return true
63
- },
64
- name: 'ai-explanation'
65
- }
66
- ]
67
- })
68
-
69
- // Run the test suite
70
- suite.run().then(results => {
71
- console.log('Test Results:', results)
72
- console.log(\`Passed: \${results.passed}/\${results.total}\`)
73
- })
74
- `;
75
- await promises_1.default.writeFile(path_1.default.join(dir, 'evaluations', 'example.ts'), exampleEval);
76
- console.log('✅ Project initialized successfully!');
77
- console.log('\nNext steps:');
78
- console.log('1. Set your API key: export EVALAI_API_KEY=your-key');
79
- console.log('2. Set your project ID: export EVALAI_PROJECT_ID=your-project');
80
- console.log('3. Run evaluations: npx evalai eval:run');
81
- });
82
- // Run evaluations
83
- program
84
- .command('eval:run')
85
- .description('Run evaluation tests')
86
- .option('-c, --config <path>', 'Config file path', './evalai.config.json')
87
- .option('-f, --file <path>', 'Evaluation file to run')
88
- .action(async (options) => {
89
- console.log('🧪 Running evaluations...');
90
- // Load config
91
- const configPath = path_1.default.resolve(options.config);
92
- let config;
93
- try {
94
- const configContent = await promises_1.default.readFile(configPath, 'utf-8');
95
- config = JSON.parse(configContent);
96
- }
97
- catch (error) {
98
- console.error('❌ Config file not found. Run "evalai init" first.');
99
- process.exit(1);
100
- }
101
- const client = client_1.AIEvalClient.init(config);
102
- // If file specified, run that file
103
- if (options.file) {
104
- console.log(`Running ${options.file}...`);
105
- // Dynamic import of evaluation file would go here
106
- // This requires compilation step for TS files
107
- }
108
- else {
109
- // Run all evaluations in the evaluations directory
110
- console.log('Running all evaluations...');
111
- }
112
- console.log('✅ Evaluations completed!');
113
- });
114
- // List traces
115
- program
116
- .command('traces')
117
- .description('List and filter traces')
118
- .option('-l, --limit <number>', 'Number of traces to show', '10')
119
- .option('--failed', 'Show only failed traces')
120
- .option('--slow', 'Show slow traces (>5s)')
121
- .action(async (options) => {
122
- const configPath = path_1.default.resolve('./evalai.config.json');
123
- let config;
124
- try {
125
- const configContent = await promises_1.default.readFile(configPath, 'utf-8');
126
- config = JSON.parse(configContent);
127
- }
128
- catch (error) {
129
- console.error('❌ Config file not found. Run "evalai init" first.');
130
- process.exit(1);
131
- }
132
- const client = client_1.AIEvalClient.init(config);
133
- console.log('📊 Fetching traces...');
134
- // API call to get traces would go here
135
- console.log(`Showing ${options.limit} traces`);
136
- });
137
- // Export data
138
- program
139
- .command('export')
140
- .description('Export data from EvalAI')
141
- .option('-f, --format <format>', 'Export format (json, csv, xlsx)', 'json')
142
- .option('-o, --output <path>', 'Output file path', './export')
143
- .option('-t, --type <type>', 'Data type (traces, evaluations, all)', 'all')
144
- .action(async (options) => {
145
- const configPath = path_1.default.resolve('./evalai.config.json');
146
- let config;
147
- try {
148
- const configContent = await promises_1.default.readFile(configPath, 'utf-8');
149
- config = JSON.parse(configContent);
150
- }
151
- catch (error) {
152
- console.error('❌ Config file not found. Run "evalai init" first.');
153
- process.exit(1);
154
- }
155
- const client = client_1.AIEvalClient.init(config);
156
- console.log(`📥 Exporting data as ${options.format}...`);
157
- const data = await (0, export_1.exportData)(client, {
158
- format: options.format,
159
- includeTraces: true,
160
- includeEvaluations: true
10
+ const check_1 = require("./check");
11
+ const argv = process.argv.slice(2);
12
+ const subcommand = argv[0];
13
+ if (subcommand === 'check') {
14
+ const args = (0, check_1.parseArgs)(argv.slice(1));
15
+ (0, check_1.runCheck)(args)
16
+ .then((code) => process.exit(code))
17
+ .catch((err) => {
18
+ console.error(`EvalAI ERROR: ${err instanceof Error ? err.message : String(err)}`);
19
+ process.exit(4);
161
20
  });
162
- // Save to file
163
- const outputPath = path_1.default.resolve(process.cwd(), options.output);
164
- await promises_1.default.writeFile(outputPath, JSON.stringify(data, null, 2));
165
- console.log(`✅ Data exported to ${outputPath}`);
166
- });
167
- // Dev server
168
- program
169
- .command('dev')
170
- .description('Start local development server')
171
- .option('-p, --port <port>', 'Port number', '3001')
172
- .action(async (options) => {
173
- console.log(`🚀 Starting development server on port ${options.port}...`);
174
- console.log('📊 Dashboard: http://localhost:' + options.port);
175
- console.log('🔍 API: http://localhost:' + options.port + '/api');
176
- console.log('\nPress Ctrl+C to stop');
177
- // This would start an Express server with a simple dashboard
178
- // For now, just keep the process running
179
- process.stdin.resume();
180
- });
181
- program.parse();
21
+ }
22
+ else {
23
+ console.log(`EvalAI CLI
24
+
25
+ Usage:
26
+ evalai check [options] CI/CD evaluation gate
27
+
28
+ Options for check:
29
+ --evaluationId <id> Required. Evaluation to gate on.
30
+ --apiKey <key> API key (or EVALAI_API_KEY env)
31
+ --minScore <n> Fail if score < n (0-100)
32
+ --maxDrop <n> Fail if score dropped > n from baseline
33
+ --minN <n> Fail if total test cases < n
34
+ --allowWeakEvidence Allow weak evidence level
35
+ --policy <name> Enforce policy (HIPAA, SOC2, GDPR, etc.)
36
+ --baseline <mode> "published" or "previous"
37
+ --baseUrl <url> API base URL
38
+
39
+ Examples:
40
+ evalai check --minScore 92 --evaluationId 42 --apiKey $EVALAI_API_KEY
41
+ evalai check --policy HIPAA --evaluationId 42 --apiKey $EVALAI_API_KEY
42
+ `);
43
+ process.exit(subcommand === '--help' || subcommand === '-h' ? 0 : 1);
44
+ }
package/dist/client.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { ClientConfig, Trace, CreateTraceParams, ListTracesParams, Evaluation, CreateEvaluationParams, UpdateEvaluationParams, ListEvaluationsParams, LLMJudgeResult, RunLLMJudgeParams, TestCase, CreateTestCaseParams, EvaluationRun, CreateRunParams, Span, CreateSpanParams, OrganizationLimits, Annotation, CreateAnnotationParams, ListAnnotationsParams, AnnotationTask, CreateAnnotationTaskParams, ListAnnotationTasksParams, AnnotationItem, CreateAnnotationItemParams, ListAnnotationItemsParams, APIKey, APIKeyWithSecret, CreateAPIKeyParams, UpdateAPIKeyParams, ListAPIKeysParams, APIKeyUsage, Webhook, CreateWebhookParams, UpdateWebhookParams, ListWebhooksParams, WebhookDelivery, ListWebhookDeliveriesParams, UsageStats, GetUsageParams, UsageSummary, LLMJudgeConfig, CreateLLMJudgeConfigParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, LLMJudgeAlignment, GetLLMJudgeAlignmentParams, Organization } from './types';
1
+ import { ClientConfig, Trace, CreateTraceParams, ListTracesParams, Evaluation, CreateEvaluationParams, UpdateEvaluationParams, ListEvaluationsParams, LLMJudgeResult, RunLLMJudgeParams, TestCase, CreateTestCaseParams, EvaluationRun, CreateRunParams, Span, CreateSpanParams, UpdateTraceParams, OrganizationLimits, Annotation, CreateAnnotationParams, ListAnnotationsParams, AnnotationTask, CreateAnnotationTaskParams, ListAnnotationTasksParams, AnnotationItem, CreateAnnotationItemParams, ListAnnotationItemsParams, APIKey, APIKeyWithSecret, CreateAPIKeyParams, UpdateAPIKeyParams, ListAPIKeysParams, APIKeyUsage, Webhook, CreateWebhookParams, UpdateWebhookParams, ListWebhooksParams, WebhookDelivery, ListWebhookDeliveriesParams, UsageStats, GetUsageParams, UsageSummary, LLMJudgeConfig, CreateLLMJudgeConfigParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, LLMJudgeAlignment, GetLLMJudgeAlignmentParams, Organization } from './types';
2
2
  import { Logger } from './logger';
3
3
  /**
4
4
  * AI Evaluation Platform SDK Client
@@ -126,6 +126,19 @@ declare class TraceAPI {
126
126
  * Get a single trace by ID
127
127
  */
128
128
  get(id: number): Promise<Trace>;
129
+ /**
130
+ * Update an existing trace (e.g. set status, duration, metadata on completion)
131
+ *
132
+ * @example
133
+ * ```typescript
134
+ * await client.traces.update(42, {
135
+ * status: 'success',
136
+ * durationMs: 1234,
137
+ * metadata: { output: 'done' }
138
+ * });
139
+ * ```
140
+ */
141
+ update<TMetadata = Record<string, any>>(id: number, params: UpdateTraceParams<TMetadata>): Promise<Trace<TMetadata>>;
129
142
  /**
130
143
  * Create a span for a trace
131
144
  */
package/dist/client.js CHANGED
@@ -76,13 +76,45 @@ class AIEvalClient {
76
76
  this.cache = new cache_1.RequestCache(config.cacheSize || 1000);
77
77
  // Initialize request batcher if enabled (default: enabled)
78
78
  if (config.enableBatching !== false) {
79
+ const MAX_CONCURRENCY = 5;
79
80
  this.batcher = new batch_1.RequestBatcher(async (requests) => {
80
- // Batch execution placeholder - will be implemented per API
81
- return requests.map(req => ({
82
- id: req.id,
83
- status: 200,
84
- data: null,
85
- }));
81
+ const results = [];
82
+ const executing = [];
83
+ for (const req of requests) {
84
+ const task = (async () => {
85
+ try {
86
+ const data = await this.request(req.endpoint, {
87
+ method: req.method,
88
+ body: req.body ? JSON.stringify(req.body) : undefined,
89
+ headers: req.headers,
90
+ });
91
+ results.push({ id: req.id, status: 200, data });
92
+ }
93
+ catch (err) {
94
+ results.push({
95
+ id: req.id,
96
+ status: err?.statusCode || 500,
97
+ data: null,
98
+ error: err?.message || 'Unknown error',
99
+ });
100
+ }
101
+ })();
102
+ executing.push(task);
103
+ if (executing.length >= MAX_CONCURRENCY) {
104
+ await Promise.race(executing);
105
+ // Remove settled promises
106
+ for (let i = executing.length - 1; i >= 0; i--) {
107
+ const settled = await Promise.race([
108
+ executing[i].then(() => true),
109
+ Promise.resolve(false),
110
+ ]);
111
+ if (settled)
112
+ executing.splice(i, 1);
113
+ }
114
+ }
115
+ }
116
+ await Promise.allSettled(executing);
117
+ return results;
86
118
  }, {
87
119
  maxBatchSize: config.batchSize || 10,
88
120
  batchDelay: config.batchDelay || 50,
@@ -338,6 +370,24 @@ class TraceAPI {
338
370
  async get(id) {
339
371
  return this.client.request(`/api/traces/${id}`);
340
372
  }
373
+ /**
374
+ * Update an existing trace (e.g. set status, duration, metadata on completion)
375
+ *
376
+ * @example
377
+ * ```typescript
378
+ * await client.traces.update(42, {
379
+ * status: 'success',
380
+ * durationMs: 1234,
381
+ * metadata: { output: 'done' }
382
+ * });
383
+ * ```
384
+ */
385
+ async update(id, params) {
386
+ return this.client.request(`/api/traces/${id}`, {
387
+ method: 'PATCH',
388
+ body: JSON.stringify(params),
389
+ });
390
+ }
341
391
  /**
342
392
  * Create a span for a trace
343
393
  */
package/dist/index.d.ts CHANGED
@@ -31,5 +31,6 @@ export { WorkflowTracer, createWorkflowTracer, traceWorkflowStep, traceLangChain
31
31
  export type { ClientConfig as AIEvalConfig, Trace as TraceData, Span as SpanData, Evaluation as EvaluationData, LLMJudgeResult as LLMJudgeData, RetryConfig, GenericMetadata as AnnotationData, TracedResponse, TestCase, TestResult, SnapshotData, ExportOptions, ImportOptions, StreamOptions, BatchOptions } from './types';
32
32
  export { EvaluationTemplates, type EvaluationTemplateType, type FeatureUsage, type OrganizationLimits } from './types';
33
33
  export type { Annotation, CreateAnnotationParams, ListAnnotationsParams, AnnotationTask, CreateAnnotationTaskParams, ListAnnotationTasksParams, AnnotationItem, CreateAnnotationItemParams, ListAnnotationItemsParams, APIKey, APIKeyWithSecret, CreateAPIKeyParams, UpdateAPIKeyParams, ListAPIKeysParams, APIKeyUsage, Webhook, CreateWebhookParams, UpdateWebhookParams, ListWebhooksParams, WebhookDelivery, ListWebhookDeliveriesParams, UsageStats, GetUsageParams, UsageSummary, LLMJudgeConfig, CreateLLMJudgeConfigParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, LLMJudgeAlignment, GetLLMJudgeAlignmentParams, Organization, } from './types';
34
+ export { parseArgs, runCheck, EXIT, type CheckArgs } from './cli/check';
34
35
  import { AIEvalClient } from './client';
35
36
  export default AIEvalClient;
package/dist/index.js CHANGED
@@ -9,7 +9,7 @@
9
9
  */
10
10
  Object.defineProperty(exports, "__esModule", { value: true });
11
11
  exports.decodeCursor = exports.encodeCursor = exports.autoPaginate = exports.createPaginatedIterator = exports.PaginatedIterator = exports.CacheTTL = exports.RequestCache = exports.RateLimiter = exports.batchRead = exports.streamEvaluation = exports.batchProcess = exports.importData = exports.exportData = exports.compareSnapshots = exports.saveSnapshot = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = exports.createTestSuite = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.hasValidCodeSyntax = exports.containsAllRequiredFields = exports.followsInstructions = exports.hasNoToxicity = exports.respondedWithinTime = exports.hasFactualAccuracy = exports.containsLanguage = exports.hasReadabilityScore = exports.matchesSchema = exports.hasNoHallucinations = exports.isValidURL = exports.isValidEmail = exports.withinRange = exports.similarTo = exports.hasSentiment = exports.notContainsPII = exports.containsJSON = exports.hasLength = exports.matchesPattern = exports.containsKeywords = exports.expect = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalAIError = exports.AIEvalClient = void 0;
12
- exports.EvaluationTemplates = exports.traceAutoGen = exports.traceCrewAI = exports.traceLangChainAgent = exports.traceWorkflowStep = exports.createWorkflowTracer = exports.WorkflowTracer = exports.traceAnthropic = exports.traceOpenAI = exports.Logger = exports.RequestBatcher = void 0;
12
+ exports.EXIT = exports.runCheck = exports.parseArgs = exports.EvaluationTemplates = exports.traceAutoGen = exports.traceCrewAI = exports.traceLangChainAgent = exports.traceWorkflowStep = exports.createWorkflowTracer = exports.WorkflowTracer = exports.traceAnthropic = exports.traceOpenAI = exports.Logger = exports.RequestBatcher = void 0;
13
13
  // Main SDK exports
14
14
  var client_1 = require("./client");
15
15
  Object.defineProperty(exports, "AIEvalClient", { enumerable: true, get: function () { return client_1.AIEvalClient; } });
@@ -106,6 +106,11 @@ Object.defineProperty(exports, "traceAutoGen", { enumerable: true, get: function
106
106
  // New exports for v1.1.0
107
107
  var types_1 = require("./types");
108
108
  Object.defineProperty(exports, "EvaluationTemplates", { enumerable: true, get: function () { return types_1.EvaluationTemplates; } });
109
+ // CLI (programmatic use)
110
+ var check_1 = require("./cli/check");
111
+ Object.defineProperty(exports, "parseArgs", { enumerable: true, get: function () { return check_1.parseArgs; } });
112
+ Object.defineProperty(exports, "runCheck", { enumerable: true, get: function () { return check_1.runCheck; } });
113
+ Object.defineProperty(exports, "EXIT", { enumerable: true, get: function () { return check_1.EXIT; } });
109
114
  // Default export for convenience
110
115
  const client_2 = require("./client");
111
116
  exports.default = client_2.AIEvalClient;
package/dist/types.d.ts CHANGED
@@ -111,6 +111,14 @@ export interface CreateTraceParams<TMetadata = Record<string, any>> {
111
111
  durationMs?: number;
112
112
  metadata?: TMetadata;
113
113
  }
114
+ /**
115
+ * Parameters for updating an existing trace
116
+ */
117
+ export interface UpdateTraceParams<TMetadata = Record<string, any>> {
118
+ status?: 'pending' | 'success' | 'error';
119
+ durationMs?: number;
120
+ metadata?: TMetadata;
121
+ }
114
122
  /**
115
123
  * Parameters for listing traces
116
124
  */
package/dist/workflows.js CHANGED
@@ -135,13 +135,8 @@ class WorkflowTracer {
135
135
  const durationMs = Date.now() - new Date(this.currentWorkflow.startedAt).getTime();
136
136
  // Calculate total cost
137
137
  const totalCost = this.costs.reduce((sum, cost) => sum + parseFloat(cost.totalCost), 0);
138
- // Update the trace with final status
139
- // Note: We create a new trace entry with the same ID pattern to update status
140
- const traceId = `${this.options.tracePrefix}-complete-${this.currentWorkflow.traceId}`;
141
- await this.client.traces.create({
142
- name: `Workflow: ${this.currentWorkflow.name}`,
143
- traceId,
144
- organizationId: this.options.organizationId,
138
+ // Update the original trace with completion data
139
+ await this.client.traces.update(this.currentWorkflow.traceId, {
145
140
  status: status === 'completed' ? 'success' : 'error',
146
141
  durationMs,
147
142
  metadata: (0, context_1.mergeWithContext)({