npm - @pauly4010/evalai-sdk - Versions diffs - 1.3.0 → 1.4.1 - Mend

@pauly4010/evalai-sdk 1.3.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/CHANGELOG.md +12 -0
package/README.md +80 -4
package/dist/__tests__/assertions.test.d.ts +1 -0
package/dist/__tests__/assertions.test.js +288 -0
package/dist/__tests__/client.test.d.ts +1 -0
package/dist/__tests__/client.test.js +185 -0
package/dist/__tests__/testing.test.d.ts +1 -0
package/dist/__tests__/testing.test.js +230 -0
package/dist/__tests__/workflows.test.d.ts +1 -0
package/dist/__tests__/workflows.test.js +222 -0
package/dist/cli/check.d.ts +58 -0
package/dist/cli/check.js +215 -0
package/dist/cli/index.d.ts +4 -2
package/dist/cli/index.js +38 -175
package/dist/client.d.ts +14 -1
package/dist/client.js +56 -6
package/dist/index.d.ts +1 -0
package/dist/index.js +6 -1
package/dist/types.d.ts +8 -0
package/dist/workflows.js +2 -7
package/package.json +17 -29
package/LICENSE +0 -21

package/dist/cli/check.js ADDED Viewed

@@ -0,0 +1,215 @@
+#!/usr/bin/env node
+"use strict";
+/**
+ * evalai check — CI/CD evaluation gate
+ *
+ * Usage:
+ *   evalai check --minScore 92 --evaluationId 42
+ *   evalai check --minScore 90 --maxDrop 5 --evaluationId 42
+ *   evalai check --policy HIPAA --evaluationId 42
+ *   evalai check --baseline published --evaluationId 42
+ *
+ * Flags:
+ *   --minScore <n>       Fail if quality score < n (0-100)
+ *   --maxDrop <n>        Fail if score dropped > n points from baseline
+ *   --minN <n>           Fail if total test cases < n (low sample size)
+ *   --allowWeakEvidence  If false (default), fail when evidenceLevel is 'weak'
+ *   --policy <name>      Enforce a compliance policy (e.g. HIPAA, SOC2, GDPR)
+ *   --baseline <mode>    Baseline comparison mode: "published" (default), "previous", or "production"
+ *   --evaluationId <id>  Required. The evaluation to gate on.
+ *   --baseUrl <url>      API base URL (default: EVALAI_BASE_URL or http://localhost:3000)
+ *   --apiKey <key>       API key (default: EVALAI_API_KEY env var)
+ *
+ * Exit codes:
+ *   0  — Gate passed
+ *   1  — Gate failed: score below threshold
+ *   2  — Gate failed: regression exceeded maxDrop
+ *   3  — Gate failed: policy violation
+ *   4  — API error / network failure
+ *   5  — Invalid arguments
+ *   6  — Gate failed: total test cases < minN
+ *   7  — Gate failed: weak evidence (evidenceLevel === 'weak')
+ *
+ * Environment:
+ *   EVALAI_BASE_URL  — API base URL (default: http://localhost:3000)
+ *   EVALAI_API_KEY   — API key for authentication
+ */
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.EXIT = void 0;
+exports.parseArgs = parseArgs;
+exports.runCheck = runCheck;
+// Standardized exit codes
+exports.EXIT = {
+    PASS: 0,
+    SCORE_BELOW: 1,
+    REGRESSION: 2,
+    POLICY_VIOLATION: 3,
+    API_ERROR: 4,
+    BAD_ARGS: 5,
+    LOW_N: 6,
+    WEAK_EVIDENCE: 7,
+};
+function parseArgs(argv) {
+    const args = {};
+    for (let i = 0; i < argv.length; i++) {
+        const arg = argv[i];
+        if (arg.startsWith('--')) {
+            const key = arg.slice(2);
+            const next = argv[i + 1];
+            if (next !== undefined && !next.startsWith('--')) {
+                args[key] = next;
+                i++;
+            }
+            else {
+                args[key] = 'true'; // bare flag
+            }
+        }
+    }
+    const baseUrl = args.baseUrl || process.env.EVALAI_BASE_URL || 'http://localhost:3000';
+    const apiKey = args.apiKey || process.env.EVALAI_API_KEY || '';
+    const minScore = parseInt(args.minScore || '0');
+    const maxDrop = args.maxDrop ? parseInt(args.maxDrop) : undefined;
+    const minN = args.minN ? parseInt(args.minN) : undefined;
+    const allowWeakEvidence = args.allowWeakEvidence === 'true' || args.allowWeakEvidence === '1';
+    const evaluationId = args.evaluationId || '';
+    const policy = args.policy || undefined;
+    const baseline = (args.baseline === 'previous'
+        ? 'previous'
+        : args.baseline === 'production'
+            ? 'production'
+            : 'published');
+    if (!apiKey) {
+        console.error('Error: --apiKey or EVALAI_API_KEY is required');
+        process.exit(exports.EXIT.BAD_ARGS);
+    }
+    if (!evaluationId) {
+        console.error('Error: --evaluationId is required');
+        process.exit(exports.EXIT.BAD_ARGS);
+    }
+    if (isNaN(minScore) || minScore < 0 || minScore > 100) {
+        console.error('Error: --minScore must be 0-100');
+        process.exit(exports.EXIT.BAD_ARGS);
+    }
+    if (minN !== undefined && (isNaN(minN) || minN < 1)) {
+        console.error('Error: --minN must be a positive number');
+        process.exit(exports.EXIT.BAD_ARGS);
+    }
+    return { baseUrl, apiKey, minScore, maxDrop, minN, allowWeakEvidence, evaluationId, policy, baseline };
+}
+async function runCheck(args) {
+    const headers = { Authorization: `Bearer ${args.apiKey}` };
+    // ── 1. Fetch latest quality score ──
+    const scoreUrl = `${args.baseUrl}/api/quality?evaluationId=${args.evaluationId}&action=latest&baseline=${args.baseline}`;
+    let scoreRes;
+    try {
+        scoreRes = await fetch(scoreUrl, { headers });
+    }
+    catch (err) {
+        console.error(`EvalAI gate ERROR: Network failure — ${err.message}`);
+        return exports.EXIT.API_ERROR;
+    }
+    if (!scoreRes.ok) {
+        const body = await scoreRes.text();
+        console.error(`EvalAI gate ERROR: API returned ${scoreRes.status} — ${body}`);
+        return exports.EXIT.API_ERROR;
+    }
+    const data = (await scoreRes.json());
+    const score = data?.score ?? 0;
+    const total = data?.total ?? null;
+    const evidenceLevel = data?.evidenceLevel ?? null;
+    const baselineScore = data?.baselineScore ?? null;
+    const regressionDelta = data?.regressionDelta ?? null;
+    const baselineMissing = data?.baselineMissing === true;
+    const breakdown = data?.breakdown ?? {};
+    // ── Gate: baseline missing (when baseline comparison requested) ──
+    if (baselineMissing && (args.baseline !== 'published' || args.maxDrop !== undefined)) {
+        console.error(`\n✗ FAILED: baseline (${args.baseline}) not found. ` +
+            `Ensure a baseline run exists (e.g. published run, previous run, or prod-tagged run).`);
+        return exports.EXIT.API_ERROR;
+    }
+    // ── Gate: minN (low sample size) ──
+    if (args.minN !== undefined && total !== null && total < args.minN) {
+        console.error(`\n✗ FAILED: total test cases (${total}) < minN (${args.minN})`);
+        return exports.EXIT.LOW_N;
+    }
+    // ── Gate: allowWeakEvidence ──
+    if (!args.allowWeakEvidence && evidenceLevel === 'weak') {
+        console.error(`\n✗ FAILED: evidence level is 'weak' (use --allowWeakEvidence to permit)`);
+        return exports.EXIT.WEAK_EVIDENCE;
+    }
+    // ── Print summary ──
+    console.log('┌─────────────────────────────────────────┐');
+    console.log(`│  EvalAI Quality Score: ${String(score).padStart(3)}/100            │`);
+    console.log('├─────────────────────────────────────────┤');
+    if (baselineScore !== null) {
+        const delta = regressionDelta ?? 0;
+        const arrow = delta >= 0 ? '▲' : '▼';
+        console.log(`│  Baseline: ${baselineScore}  ${arrow} ${Math.abs(delta)} pts          │`);
+    }
+    if (breakdown) {
+        const pct = (v) => `${Math.round((v ?? 0) * 100)}%`;
+        console.log(`│  Pass: ${pct(breakdown.passRate)}  Safety: ${pct(breakdown.safety)}  Judge: ${pct(breakdown.judge)} │`);
+    }
+    if (data?.flags && data.flags.length > 0) {
+        console.log(`│  Flags: ${data.flags.join(', ').padEnd(30)} │`);
+    }
+    console.log('└─────────────────────────────────────────┘');
+    // ── 2. Gate: minimum score ──
+    if (args.minScore > 0 && score < args.minScore) {
+        console.error(`\n✗ FAILED: score=${score} < minScore=${args.minScore}`);
+        return exports.EXIT.SCORE_BELOW;
+    }
+    // ── 3. Gate: maximum drop from baseline ──
+    if (args.maxDrop !== undefined && regressionDelta !== null && regressionDelta < -(args.maxDrop)) {
+        console.error(`\n✗ FAILED: score dropped ${Math.abs(regressionDelta)} pts from baseline ` +
+            `(max allowed: ${args.maxDrop})`);
+        return exports.EXIT.REGRESSION;
+    }
+    // ── 4. Gate: policy compliance ──
+    if (args.policy) {
+        const policyUrl = `${args.baseUrl}/api/quality?evaluationId=${args.evaluationId}&action=latest`;
+        // Check policy-specific flags
+        const policyFlags = (data?.flags ?? []);
+        // Policy mapping: each policy has a set of required conditions
+        const policyChecks = {
+            HIPAA: { requiredSafetyRate: 0.99, maxFlags: ['SAFETY_RISK'] },
+            SOC2: { requiredSafetyRate: 0.95, maxFlags: ['SAFETY_RISK', 'LOW_PASS_RATE'] },
+            GDPR: { requiredSafetyRate: 0.95, maxFlags: ['SAFETY_RISK'] },
+            PCI_DSS: { requiredSafetyRate: 0.99, maxFlags: ['SAFETY_RISK', 'LOW_PASS_RATE'] },
+            FINRA_4511: { requiredSafetyRate: 0.95, maxFlags: ['SAFETY_RISK'] },
+        };
+        const policyName = args.policy.toUpperCase();
+        const check = policyChecks[policyName];
+        if (!check) {
+            console.error(`\n✗ Unknown policy: ${args.policy}. Available: ${Object.keys(policyChecks).join(', ')}`);
+            return exports.EXIT.BAD_ARGS;
+        }
+        // Check safety rate
+        const safetyRate = breakdown?.safety ?? 0;
+        if (safetyRate < check.requiredSafetyRate) {
+            console.error(`\n✗ POLICY VIOLATION (${policyName}): safety rate ${Math.round(safetyRate * 100)}% < ` +
+                `required ${Math.round(check.requiredSafetyRate * 100)}%`);
+            return exports.EXIT.POLICY_VIOLATION;
+        }
+        // Check for disqualifying flags
+        const violations = policyFlags.filter(f => check.maxFlags.includes(f));
+        if (violations.length > 0) {
+            console.error(`\n✗ POLICY VIOLATION (${policyName}): ${violations.join(', ')}`);
+            return exports.EXIT.POLICY_VIOLATION;
+        }
+        console.log(`\n✓ Policy ${policyName}: COMPLIANT`);
+    }
+    console.log('\n✓ EvalAI gate PASSED');
+    return exports.EXIT.PASS;
+}
+// Main entry point
+const isDirectRun = typeof require !== 'undefined' && require.main === module;
+if (isDirectRun) {
+    const args = parseArgs(process.argv.slice(2));
+    runCheck(args).then((code) => {
+        process.exit(code);
+    }).catch((err) => {
+        console.error(`EvalAI gate ERROR: ${err.message}`);
+        process.exit(exports.EXIT.API_ERROR);
+    });
+}

package/dist/cli/index.d.ts CHANGED Viewed

@@ -1,6 +1,8 @@
 #!/usr/bin/env node
 /**
- * CLI for AI Evaluation Platform SDK
- * Tier 2.6: CLI for Everything
+ * evalai — EvalAI CLI
+ *
+ * Commands:
+ *   evalai check  — CI/CD evaluation gate (see evalai check --help)
  */
 export {};

package/dist/cli/index.js CHANGED Viewed

@@ -1,181 +1,44 @@
 #!/usr/bin/env node
 "use strict";
 /**
- * CLI for AI Evaluation Platform SDK
- * Tier 2.6: CLI for Everything
+ * evalai — EvalAI CLI
+ *
+ * Commands:
+ *   evalai check  — CI/CD evaluation gate (see evalai check --help)
  */
-var __importDefault = (this && this.__importDefault) || function (mod) {
-    return (mod && mod.__esModule) ? mod : { "default": mod };
-};
 Object.defineProperty(exports, "__esModule", { value: true });
-const commander_1 = require("commander");
-const promises_1 = __importDefault(require("fs/promises"));
-const path_1 = __importDefault(require("path"));
-const client_1 = require("../client");
-const export_1 = require("../export");
-const program = new commander_1.Command();
-program
-    .name('evalai')
-    .description('AI Evaluation Platform CLI')
-    .version('1.0.0');
-// Initialize project
-program
-    .command('init')
-    .description('Initialize a new evaluation project')
-    .option('-d, --dir <directory>', 'Project directory', '.')
-    .action(async (options) => {
-    const dir = path_1.default.resolve(options.dir);
-    console.log('🚀 Initializing EvalAI project...');
-    // Create directory structure
-    await promises_1.default.mkdir(path_1.default.join(dir, '.evalai'), { recursive: true });
-    await promises_1.default.mkdir(path_1.default.join(dir, '.evalai', 'snapshots'), { recursive: true });
-    await promises_1.default.mkdir(path_1.default.join(dir, 'evaluations'), { recursive: true });
-    // Create config file
-    const config = {
-        apiKey: process.env.EVALAI_API_KEY || '',
-        projectId: process.env.EVALAI_PROJECT_ID || '',
-        baseUrl: 'http://localhost:3000/api',
-        debug: false,
-        retry: {
-            maxAttempts: 3,
-            backoff: 'exponential'
-        }
-    };
-    await promises_1.default.writeFile(path_1.default.join(dir, 'evalai.config.json'), JSON.stringify(config, null, 2));
-    // Create example evaluation file
-    const exampleEval = `import { AIEvalClient, createTestSuite, expect } from '@pauly4010/evalai-sdk'
-const client = AIEvalClient.init()
-const suite = createTestSuite('example-evaluation', {
-  cases: [
-    {
-      input: 'What is 2+2?',
-      expected: '4',
-      name: 'simple-math'
-    },
-    {
-      input: 'Explain AI in simple terms',
-      expected: (output) => {
-        expect(output).toContainKeywords(['artificial', 'intelligence'])
-        expect(output).toHaveLength({ min: 50, max: 500 })
-        return true
-      },
-      name: 'ai-explanation'
-    }
-  ]
-})
-// Run the test suite
-suite.run().then(results => {
-  console.log('Test Results:', results)
-  console.log(\`Passed: \${results.passed}/\${results.total}\`)
-})
-`;
-    await promises_1.default.writeFile(path_1.default.join(dir, 'evaluations', 'example.ts'), exampleEval);
-    console.log('✅ Project initialized successfully!');
-    console.log('\nNext steps:');
-    console.log('1. Set your API key: export EVALAI_API_KEY=your-key');
-    console.log('2. Set your project ID: export EVALAI_PROJECT_ID=your-project');
-    console.log('3. Run evaluations: npx evalai eval:run');
-});
-// Run evaluations
-program
-    .command('eval:run')
-    .description('Run evaluation tests')
-    .option('-c, --config <path>', 'Config file path', './evalai.config.json')
-    .option('-f, --file <path>', 'Evaluation file to run')
-    .action(async (options) => {
-    console.log('🧪 Running evaluations...');
-    // Load config
-    const configPath = path_1.default.resolve(options.config);
-    let config;
-    try {
-        const configContent = await promises_1.default.readFile(configPath, 'utf-8');
-        config = JSON.parse(configContent);
-    }
-    catch (error) {
-        console.error('❌ Config file not found. Run "evalai init" first.');
-        process.exit(1);
-    }
-    const client = client_1.AIEvalClient.init(config);
-    // If file specified, run that file
-    if (options.file) {
-        console.log(`Running ${options.file}...`);
-        // Dynamic import of evaluation file would go here
-        // This requires compilation step for TS files
-    }
-    else {
-        // Run all evaluations in the evaluations directory
-        console.log('Running all evaluations...');
-    }
-    console.log('✅ Evaluations completed!');
-});
-// List traces
-program
-    .command('traces')
-    .description('List and filter traces')
-    .option('-l, --limit <number>', 'Number of traces to show', '10')
-    .option('--failed', 'Show only failed traces')
-    .option('--slow', 'Show slow traces (>5s)')
-    .action(async (options) => {
-    const configPath = path_1.default.resolve('./evalai.config.json');
-    let config;
-    try {
-        const configContent = await promises_1.default.readFile(configPath, 'utf-8');
-        config = JSON.parse(configContent);
-    }
-    catch (error) {
-        console.error('❌ Config file not found. Run "evalai init" first.');
-        process.exit(1);
-    }
-    const client = client_1.AIEvalClient.init(config);
-    console.log('📊 Fetching traces...');
-    // API call to get traces would go here
-    console.log(`Showing ${options.limit} traces`);
-});
-// Export data
-program
-    .command('export')
-    .description('Export data from EvalAI')
-    .option('-f, --format <format>', 'Export format (json, csv, xlsx)', 'json')
-    .option('-o, --output <path>', 'Output file path', './export')
-    .option('-t, --type <type>', 'Data type (traces, evaluations, all)', 'all')
-    .action(async (options) => {
-    const configPath = path_1.default.resolve('./evalai.config.json');
-    let config;
-    try {
-        const configContent = await promises_1.default.readFile(configPath, 'utf-8');
-        config = JSON.parse(configContent);
-    }
-    catch (error) {
-        console.error('❌ Config file not found. Run "evalai init" first.');
-        process.exit(1);
-    }
-    const client = client_1.AIEvalClient.init(config);
-    console.log(`📥 Exporting data as ${options.format}...`);
-    const data = await (0, export_1.exportData)(client, {
-        format: options.format,
-        includeTraces: true,
-        includeEvaluations: true
+const check_1 = require("./check");
+const argv = process.argv.slice(2);
+const subcommand = argv[0];
+if (subcommand === 'check') {
+    const args = (0, check_1.parseArgs)(argv.slice(1));
+    (0, check_1.runCheck)(args)
+        .then((code) => process.exit(code))
+        .catch((err) => {
+        console.error(`EvalAI ERROR: ${err instanceof Error ? err.message : String(err)}`);
+        process.exit(4);
     });
-    // Save to file
-    const outputPath = path_1.default.resolve(process.cwd(), options.output);
-    await promises_1.default.writeFile(outputPath, JSON.stringify(data, null, 2));
-    console.log(`✅ Data exported to ${outputPath}`);
-});
-// Dev server
-program
-    .command('dev')
-    .description('Start local development server')
-    .option('-p, --port <port>', 'Port number', '3001')
-    .action(async (options) => {
-    console.log(`🚀 Starting development server on port ${options.port}...`);
-    console.log('📊 Dashboard: http://localhost:' + options.port);
-    console.log('🔍 API: http://localhost:' + options.port + '/api');
-    console.log('\nPress Ctrl+C to stop');
-    // This would start an Express server with a simple dashboard
-    // For now, just keep the process running
-    process.stdin.resume();
-});
-program.parse();
+}
+else {
+    console.log(`EvalAI CLI
+Usage:
+  evalai check [options]   CI/CD evaluation gate
+Options for check:
+  --evaluationId <id>  Required. Evaluation to gate on.
+  --apiKey <key>      API key (or EVALAI_API_KEY env)
+  --minScore <n>      Fail if score < n (0-100)
+  --maxDrop <n>       Fail if score dropped > n from baseline
+  --minN <n>          Fail if total test cases < n
+  --allowWeakEvidence Allow weak evidence level
+  --policy <name>     Enforce policy (HIPAA, SOC2, GDPR, etc.)
+  --baseline <mode>   "published" or "previous"
+  --baseUrl <url>     API base URL
+Examples:
+  evalai check --minScore 92 --evaluationId 42 --apiKey $EVALAI_API_KEY
+  evalai check --policy HIPAA --evaluationId 42 --apiKey $EVALAI_API_KEY
+`);
+    process.exit(subcommand === '--help' || subcommand === '-h' ? 0 : 1);
+}

package/dist/client.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { ClientConfig, Trace, CreateTraceParams, ListTracesParams, Evaluation, CreateEvaluationParams, UpdateEvaluationParams, ListEvaluationsParams, LLMJudgeResult, RunLLMJudgeParams, TestCase, CreateTestCaseParams, EvaluationRun, CreateRunParams, Span, CreateSpanParams, OrganizationLimits, Annotation, CreateAnnotationParams, ListAnnotationsParams, AnnotationTask, CreateAnnotationTaskParams, ListAnnotationTasksParams, AnnotationItem, CreateAnnotationItemParams, ListAnnotationItemsParams, APIKey, APIKeyWithSecret, CreateAPIKeyParams, UpdateAPIKeyParams, ListAPIKeysParams, APIKeyUsage, Webhook, CreateWebhookParams, UpdateWebhookParams, ListWebhooksParams, WebhookDelivery, ListWebhookDeliveriesParams, UsageStats, GetUsageParams, UsageSummary, LLMJudgeConfig, CreateLLMJudgeConfigParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, LLMJudgeAlignment, GetLLMJudgeAlignmentParams, Organization } from './types';
+import { ClientConfig, Trace, CreateTraceParams, ListTracesParams, Evaluation, CreateEvaluationParams, UpdateEvaluationParams, ListEvaluationsParams, LLMJudgeResult, RunLLMJudgeParams, TestCase, CreateTestCaseParams, EvaluationRun, CreateRunParams, Span, CreateSpanParams, UpdateTraceParams, OrganizationLimits, Annotation, CreateAnnotationParams, ListAnnotationsParams, AnnotationTask, CreateAnnotationTaskParams, ListAnnotationTasksParams, AnnotationItem, CreateAnnotationItemParams, ListAnnotationItemsParams, APIKey, APIKeyWithSecret, CreateAPIKeyParams, UpdateAPIKeyParams, ListAPIKeysParams, APIKeyUsage, Webhook, CreateWebhookParams, UpdateWebhookParams, ListWebhooksParams, WebhookDelivery, ListWebhookDeliveriesParams, UsageStats, GetUsageParams, UsageSummary, LLMJudgeConfig, CreateLLMJudgeConfigParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, LLMJudgeAlignment, GetLLMJudgeAlignmentParams, Organization } from './types';
 import { Logger } from './logger';
 /**
  * AI Evaluation Platform SDK Client
@@ -126,6 +126,19 @@ declare class TraceAPI {
      * Get a single trace by ID
      */
     get(id: number): Promise<Trace>;
+    /**
+     * Update an existing trace (e.g. set status, duration, metadata on completion)
+     *
+     * @example
+     * ```typescript
+     * await client.traces.update(42, {
+     *   status: 'success',
+     *   durationMs: 1234,
+     *   metadata: { output: 'done' }
+     * });
+     * ```
+     */
+    update<TMetadata = Record<string, any>>(id: number, params: UpdateTraceParams<TMetadata>): Promise<Trace<TMetadata>>;
     /**
      * Create a span for a trace
      */

package/dist/client.js CHANGED Viewed

@@ -76,13 +76,45 @@ class AIEvalClient {
         this.cache = new cache_1.RequestCache(config.cacheSize || 1000);
         // Initialize request batcher if enabled (default: enabled)
         if (config.enableBatching !== false) {
+            const MAX_CONCURRENCY = 5;
             this.batcher = new batch_1.RequestBatcher(async (requests) => {
-                // Batch execution placeholder - will be implemented per API
-                return requests.map(req => ({
-                    id: req.id,
-                    status: 200,
-                    data: null,
-                }));
+                const results = [];
+                const executing = [];
+                for (const req of requests) {
+                    const task = (async () => {
+                        try {
+                            const data = await this.request(req.endpoint, {
+                                method: req.method,
+                                body: req.body ? JSON.stringify(req.body) : undefined,
+                                headers: req.headers,
+                            });
+                            results.push({ id: req.id, status: 200, data });
+                        }
+                        catch (err) {
+                            results.push({
+                                id: req.id,
+                                status: err?.statusCode || 500,
+                                data: null,
+                                error: err?.message || 'Unknown error',
+                            });
+                        }
+                    })();
+                    executing.push(task);
+                    if (executing.length >= MAX_CONCURRENCY) {
+                        await Promise.race(executing);
+                        // Remove settled promises
+                        for (let i = executing.length - 1; i >= 0; i--) {
+                            const settled = await Promise.race([
+                                executing[i].then(() => true),
+                                Promise.resolve(false),
+                            ]);
+                            if (settled)
+                                executing.splice(i, 1);
+                        }
+                    }
+                }
+                await Promise.allSettled(executing);
+                return results;
             }, {
                 maxBatchSize: config.batchSize || 10,
                 batchDelay: config.batchDelay || 50,
@@ -338,6 +370,24 @@ class TraceAPI {
     async get(id) {
         return this.client.request(`/api/traces/${id}`);
     }
+    /**
+     * Update an existing trace (e.g. set status, duration, metadata on completion)
+     *
+     * @example
+     * ```typescript
+     * await client.traces.update(42, {
+     *   status: 'success',
+     *   durationMs: 1234,
+     *   metadata: { output: 'done' }
+     * });
+     * ```
+     */
+    async update(id, params) {
+        return this.client.request(`/api/traces/${id}`, {
+            method: 'PATCH',
+            body: JSON.stringify(params),
+        });
+    }
     /**
      * Create a span for a trace
      */

package/dist/index.d.ts CHANGED Viewed

@@ -31,5 +31,6 @@ export { WorkflowTracer, createWorkflowTracer, traceWorkflowStep, traceLangChain
 export type { ClientConfig as AIEvalConfig, Trace as TraceData, Span as SpanData, Evaluation as EvaluationData, LLMJudgeResult as LLMJudgeData, RetryConfig, GenericMetadata as AnnotationData, TracedResponse, TestCase, TestResult, SnapshotData, ExportOptions, ImportOptions, StreamOptions, BatchOptions } from './types';
 export { EvaluationTemplates, type EvaluationTemplateType, type FeatureUsage, type OrganizationLimits } from './types';
 export type { Annotation, CreateAnnotationParams, ListAnnotationsParams, AnnotationTask, CreateAnnotationTaskParams, ListAnnotationTasksParams, AnnotationItem, CreateAnnotationItemParams, ListAnnotationItemsParams, APIKey, APIKeyWithSecret, CreateAPIKeyParams, UpdateAPIKeyParams, ListAPIKeysParams, APIKeyUsage, Webhook, CreateWebhookParams, UpdateWebhookParams, ListWebhooksParams, WebhookDelivery, ListWebhookDeliveriesParams, UsageStats, GetUsageParams, UsageSummary, LLMJudgeConfig, CreateLLMJudgeConfigParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, LLMJudgeAlignment, GetLLMJudgeAlignmentParams, Organization, } from './types';
+export { parseArgs, runCheck, EXIT, type CheckArgs } from './cli/check';
 import { AIEvalClient } from './client';
 export default AIEvalClient;

package/dist/index.js CHANGED Viewed

@@ -9,7 +9,7 @@
  */
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.decodeCursor = exports.encodeCursor = exports.autoPaginate = exports.createPaginatedIterator = exports.PaginatedIterator = exports.CacheTTL = exports.RequestCache = exports.RateLimiter = exports.batchRead = exports.streamEvaluation = exports.batchProcess = exports.importData = exports.exportData = exports.compareSnapshots = exports.saveSnapshot = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = exports.createTestSuite = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.hasValidCodeSyntax = exports.containsAllRequiredFields = exports.followsInstructions = exports.hasNoToxicity = exports.respondedWithinTime = exports.hasFactualAccuracy = exports.containsLanguage = exports.hasReadabilityScore = exports.matchesSchema = exports.hasNoHallucinations = exports.isValidURL = exports.isValidEmail = exports.withinRange = exports.similarTo = exports.hasSentiment = exports.notContainsPII = exports.containsJSON = exports.hasLength = exports.matchesPattern = exports.containsKeywords = exports.expect = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalAIError = exports.AIEvalClient = void 0;
-exports.EvaluationTemplates = exports.traceAutoGen = exports.traceCrewAI = exports.traceLangChainAgent = exports.traceWorkflowStep = exports.createWorkflowTracer = exports.WorkflowTracer = exports.traceAnthropic = exports.traceOpenAI = exports.Logger = exports.RequestBatcher = void 0;
+exports.EXIT = exports.runCheck = exports.parseArgs = exports.EvaluationTemplates = exports.traceAutoGen = exports.traceCrewAI = exports.traceLangChainAgent = exports.traceWorkflowStep = exports.createWorkflowTracer = exports.WorkflowTracer = exports.traceAnthropic = exports.traceOpenAI = exports.Logger = exports.RequestBatcher = void 0;
 // Main SDK exports
 var client_1 = require("./client");
 Object.defineProperty(exports, "AIEvalClient", { enumerable: true, get: function () { return client_1.AIEvalClient; } });
@@ -106,6 +106,11 @@ Object.defineProperty(exports, "traceAutoGen", { enumerable: true, get: function
 // New exports for v1.1.0
 var types_1 = require("./types");
 Object.defineProperty(exports, "EvaluationTemplates", { enumerable: true, get: function () { return types_1.EvaluationTemplates; } });
+// CLI (programmatic use)
+var check_1 = require("./cli/check");
+Object.defineProperty(exports, "parseArgs", { enumerable: true, get: function () { return check_1.parseArgs; } });
+Object.defineProperty(exports, "runCheck", { enumerable: true, get: function () { return check_1.runCheck; } });
+Object.defineProperty(exports, "EXIT", { enumerable: true, get: function () { return check_1.EXIT; } });
 // Default export for convenience
 const client_2 = require("./client");
 exports.default = client_2.AIEvalClient;

package/dist/types.d.ts CHANGED Viewed

@@ -111,6 +111,14 @@ export interface CreateTraceParams<TMetadata = Record<string, any>> {
     durationMs?: number;
     metadata?: TMetadata;
 }
+/**
+ * Parameters for updating an existing trace
+ */
+export interface UpdateTraceParams<TMetadata = Record<string, any>> {
+    status?: 'pending' | 'success' | 'error';
+    durationMs?: number;
+    metadata?: TMetadata;
+}
 /**
  * Parameters for listing traces
  */

package/dist/workflows.js CHANGED Viewed

@@ -135,13 +135,8 @@ class WorkflowTracer {
         const durationMs = Date.now() - new Date(this.currentWorkflow.startedAt).getTime();
         // Calculate total cost
         const totalCost = this.costs.reduce((sum, cost) => sum + parseFloat(cost.totalCost), 0);
-        // Update the trace with final status
-        // Note: We create a new trace entry with the same ID pattern to update status
-        const traceId = `${this.options.tracePrefix}-complete-${this.currentWorkflow.traceId}`;
-        await this.client.traces.create({
-            name: `Workflow: ${this.currentWorkflow.name}`,
-            traceId,
-            organizationId: this.options.organizationId,
+        // Update the original trace with completion data
+        await this.client.traces.update(this.currentWorkflow.traceId, {
             status: status === 'completed' ? 'success' : 'error',
             durationMs,
             metadata: (0, context_1.mergeWithContext)({