npm - @evalgate/sdk - Versions diffs - 2.2.3 → 2.2.4 - Mend

@evalgate/sdk 2.2.3 → 2.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/CHANGELOG.md +5 -0
package/README.md +38 -1
package/dist/assertions.d.ts +185 -5
package/dist/assertions.js +496 -61
package/dist/batch.js +4 -4
package/dist/cache.d.ts +4 -0
package/dist/cache.js +4 -0
package/dist/cli/baseline.d.ts +14 -0
package/dist/cli/baseline.js +43 -3
package/dist/cli/check.d.ts +5 -2
package/dist/cli/check.js +20 -12
package/dist/cli/compare.d.ts +80 -0
package/dist/cli/compare.js +266 -0
package/dist/cli/index.js +244 -101
package/dist/cli/regression-gate.js +23 -0
package/dist/cli/run.js +22 -0
package/dist/cli/start.d.ts +26 -0
package/dist/cli/start.js +130 -0
package/dist/cli/templates.d.ts +24 -0
package/dist/cli/templates.js +314 -0
package/dist/cli/traces.d.ts +109 -0
package/dist/cli/traces.js +152 -0
package/dist/cli/validate.d.ts +37 -0
package/dist/cli/validate.js +252 -0
package/dist/cli/watch.d.ts +19 -0
package/dist/cli/watch.js +175 -0
package/dist/client.js +6 -13
package/dist/constants.d.ts +2 -0
package/dist/constants.js +5 -0
package/dist/index.d.ts +7 -6
package/dist/index.js +22 -6
package/dist/integrations/openai.js +83 -60
package/dist/logger.d.ts +3 -1
package/dist/logger.js +2 -1
package/dist/otel.d.ts +130 -0
package/dist/otel.js +309 -0
package/dist/runtime/eval.d.ts +14 -4
package/dist/runtime/eval.js +127 -2
package/dist/runtime/registry.d.ts +4 -2
package/dist/runtime/registry.js +11 -3
package/dist/runtime/run-report.d.ts +1 -1
package/dist/runtime/run-report.js +7 -4
package/dist/runtime/types.d.ts +38 -0
package/dist/testing.d.ts +8 -0
package/dist/testing.js +45 -10
package/dist/version.d.ts +1 -1
package/dist/version.js +1 -1
package/dist/workflows.d.ts +2 -0
package/dist/workflows.js +184 -102
package/package.json +8 -1

package/dist/cli/validate.js ADDED Viewed

@@ -0,0 +1,252 @@
+"use strict";
+/**
+ * evalgate validate — static validation of spec files without execution
+ *
+ * The equivalent of `tsc --noEmit` for eval specs. Catches:
+ * - Missing or malformed defineEval calls
+ * - Executor functions that don't return EvalResult shape
+ * - Invalid spec names (characters, length)
+ * - Empty spec files
+ * - Missing required fields in config-form defineEval
+ *
+ * Usage:
+ *   evalgate validate
+ *   evalgate validate --format json
+ */
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || (function () {
+    var ownKeys = function(o) {
+        ownKeys = Object.getOwnPropertyNames || function (o) {
+            var ar = [];
+            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
+            return ar;
+        };
+        return ownKeys(o);
+    };
+    return function (mod) {
+        if (mod && mod.__esModule) return mod;
+        var result = {};
+        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
+        __setModuleDefault(result, mod);
+        return result;
+    };
+})();
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.runValidate = runValidate;
+const fs = __importStar(require("node:fs"));
+const path = __importStar(require("node:path"));
+const execution_mode_1 = require("../runtime/execution-mode");
+/**
+ * Name validation regex — must match the runtime's validateSpecName
+ */
+const VALID_NAME_RE = /^[a-zA-Z0-9\s\-_]+$/;
+const MAX_NAME_LENGTH = 100;
+/**
+ * Static patterns we look for in spec files
+ */
+const DEFINE_EVAL_RE = /defineEval\s*[.(]/g;
+const DEFINE_EVAL_NAME_RE = /defineEval\s*\(\s*["'`]([^"'`]*)["'`]/g;
+const DEFINE_EVAL_CONFIG_RE = /defineEval\s*\(\s*\{/g;
+const DEFINE_EVAL_SKIP_RE = /defineEval\.skip\s*\(/g;
+const DEFINE_EVAL_ONLY_RE = /defineEval\.only\s*\(/g;
+const DEFINE_EVAL_FROM_DATASET_RE = /defineEval\.fromDataset\s*\(/g;
+const EXECUTOR_RETURN_RE = /return\s*\{[^}]*pass\s*:/g;
+const CREATE_RESULT_RE = /createResult\s*\(/g;
+function analyzeFile(filePath) {
+    const issues = [];
+    const relPath = path.relative(process.cwd(), filePath);
+    let content;
+    try {
+        content = fs.readFileSync(filePath, "utf8");
+    }
+    catch {
+        issues.push({
+            severity: "error",
+            file: relPath,
+            code: "FILE_UNREADABLE",
+            message: `Cannot read file: ${relPath}`,
+        });
+        return issues;
+    }
+    if (content.trim().length === 0) {
+        issues.push({
+            severity: "error",
+            file: relPath,
+            code: "EMPTY_FILE",
+            message: "Spec file is empty",
+        });
+        return issues;
+    }
+    const lines = content.split("\n");
+    // Check for defineEval calls
+    const defineEvalMatches = content.match(DEFINE_EVAL_RE);
+    const skipMatches = content.match(DEFINE_EVAL_SKIP_RE);
+    const onlyMatches = content.match(DEFINE_EVAL_ONLY_RE);
+    const fromDatasetMatches = content.match(DEFINE_EVAL_FROM_DATASET_RE);
+    const totalCalls = (defineEvalMatches?.length ?? 0) +
+        (skipMatches?.length ?? 0) +
+        (onlyMatches?.length ?? 0) +
+        (fromDatasetMatches?.length ?? 0);
+    if (totalCalls === 0) {
+        issues.push({
+            severity: "warn",
+            file: relPath,
+            code: "NO_DEFINE_EVAL",
+            message: "No defineEval() calls found. File may not define any specs.",
+        });
+    }
+    // Validate spec names
+    const nameMatches = [...content.matchAll(DEFINE_EVAL_NAME_RE)];
+    for (const match of nameMatches) {
+        const name = match[1];
+        const matchIndex = match.index ?? 0;
+        const lineNum = content.substring(0, matchIndex).split("\n").length;
+        if (!name || name.trim() === "") {
+            issues.push({
+                severity: "error",
+                file: relPath,
+                line: lineNum,
+                code: "EMPTY_NAME",
+                message: "Spec name is empty",
+            });
+            continue;
+        }
+        if (name.length > MAX_NAME_LENGTH) {
+            issues.push({
+                severity: "error",
+                file: relPath,
+                line: lineNum,
+                code: "NAME_TOO_LONG",
+                message: `Spec name "${name.slice(0, 30)}..." exceeds ${MAX_NAME_LENGTH} characters`,
+            });
+        }
+        if (!VALID_NAME_RE.test(name)) {
+            issues.push({
+                severity: "error",
+                file: relPath,
+                line: lineNum,
+                code: "INVALID_NAME",
+                message: `Spec name "${name}" contains invalid characters (only letters, numbers, spaces, hyphens, underscores allowed)`,
+            });
+        }
+    }
+    // Check config-form defineEval calls have required fields
+    const configMatches = [...content.matchAll(DEFINE_EVAL_CONFIG_RE)];
+    for (const match of configMatches) {
+        const matchIndex = match.index ?? 0;
+        const lineNum = content.substring(0, matchIndex).split("\n").length;
+        // Simple heuristic: look for 'name:' and 'executor:' in the next ~20 lines
+        const contextLines = lines.slice(lineNum - 1, lineNum + 19).join("\n");
+        if (!contextLines.includes("name:") && !contextLines.includes("name :")) {
+            issues.push({
+                severity: "error",
+                file: relPath,
+                line: lineNum,
+                code: "MISSING_NAME",
+                message: "Config-form defineEval() missing required 'name' field",
+            });
+        }
+        if (!contextLines.includes("executor:") &&
+            !contextLines.includes("executor :")) {
+            issues.push({
+                severity: "error",
+                file: relPath,
+                line: lineNum,
+                code: "MISSING_EXECUTOR",
+                message: "Config-form defineEval() missing required 'executor' field",
+            });
+        }
+    }
+    // Check that executors return EvalResult shape
+    const hasCreateResult = CREATE_RESULT_RE.test(content);
+    const hasReturnPass = EXECUTOR_RETURN_RE.test(content);
+    if (totalCalls > 0 && !hasCreateResult && !hasReturnPass) {
+        issues.push({
+            severity: "warn",
+            file: relPath,
+            code: "NO_RESULT_SHAPE",
+            message: "No createResult() or return { pass: ... } found. Executors may not return the required EvalResult shape.",
+        });
+    }
+    return issues;
+}
+async function runValidate(args = []) {
+    const formatIndex = args.indexOf("--format");
+    const format = formatIndex !== -1 ? args[formatIndex + 1] : "human";
+    const projectRoot = process.cwd();
+    const executionMode = await (0, execution_mode_1.getExecutionMode)(projectRoot);
+    const specFiles = executionMode.specFiles;
+    if (specFiles.length === 0) {
+        const result = {
+            filesScanned: 0,
+            filesWithIssues: 0,
+            issues: [],
+            passed: true,
+        };
+        if (format === "json") {
+            console.log(JSON.stringify(result, null, 2));
+        }
+        else {
+            console.log("\n✨ No spec files found. Nothing to validate.");
+            console.log("💡 Create files with defineEval() calls to get started.");
+        }
+        return result;
+    }
+    const allIssues = [];
+    const filesWithIssues = new Set();
+    for (const file of specFiles) {
+        const issues = analyzeFile(file);
+        for (const issue of issues) {
+            allIssues.push(issue);
+            filesWithIssues.add(issue.file);
+        }
+    }
+    const errors = allIssues.filter((i) => i.severity === "error");
+    const warnings = allIssues.filter((i) => i.severity === "warn");
+    const passed = errors.length === 0;
+    const result = {
+        filesScanned: specFiles.length,
+        filesWithIssues: filesWithIssues.size,
+        issues: allIssues,
+        passed,
+    };
+    if (format === "json") {
+        console.log(JSON.stringify(result, null, 2));
+    }
+    else {
+        console.log(`\n🔍 Validated ${specFiles.length} spec file${specFiles.length === 1 ? "" : "s"}`);
+        if (allIssues.length === 0) {
+            console.log("✅ All spec files are valid.\n");
+        }
+        else {
+            for (const issue of allIssues) {
+                const loc = issue.line ? `:${issue.line}` : "";
+                const icon = issue.severity === "error" ? "❌" : "⚠️";
+                console.log(`  ${icon} ${issue.file}${loc} [${issue.code}] ${issue.message}`);
+            }
+            console.log(`\n${errors.length} error${errors.length === 1 ? "" : "s"}, ${warnings.length} warning${warnings.length === 1 ? "" : "s"}`);
+            if (passed) {
+                console.log("✅ Validation passed (warnings only).\n");
+            }
+            else {
+                console.log("❌ Validation failed.\n");
+            }
+        }
+    }
+    return result;
+}

package/dist/cli/watch.d.ts ADDED Viewed

@@ -0,0 +1,19 @@
+/**
+ * Watch mode for evalgate run
+ *
+ * Re-executes evaluation specs when source files change.
+ * Uses Node.js fs.watch with debouncing to avoid rapid re-runs.
+ */
+import type { RunOptions } from "./run";
+export interface WatchOptions extends RunOptions {
+    /** Debounce interval in milliseconds (default: 300) */
+    debounceMs?: number;
+    /** Additional directories to watch beyond spec files */
+    extraWatchDirs?: string[];
+    /** Clear terminal between runs */
+    clearScreen?: boolean;
+}
+/**
+ * Start watch mode — runs evaluations and re-runs on file changes
+ */
+export declare function runWatch(options: WatchOptions, projectRoot?: string): Promise<void>;

package/dist/cli/watch.js ADDED Viewed

@@ -0,0 +1,175 @@
+"use strict";
+/**
+ * Watch mode for evalgate run
+ *
+ * Re-executes evaluation specs when source files change.
+ * Uses Node.js fs.watch with debouncing to avoid rapid re-runs.
+ */
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || (function () {
+    var ownKeys = function(o) {
+        ownKeys = Object.getOwnPropertyNames || function (o) {
+            var ar = [];
+            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
+            return ar;
+        };
+        return ownKeys(o);
+    };
+    return function (mod) {
+        if (mod && mod.__esModule) return mod;
+        var result = {};
+        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
+        __setModuleDefault(result, mod);
+        return result;
+    };
+})();
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.runWatch = runWatch;
+const fs = __importStar(require("node:fs"));
+const path = __importStar(require("node:path"));
+const run_1 = require("./run");
+/**
+ * Start watch mode — runs evaluations and re-runs on file changes
+ */
+async function runWatch(options, projectRoot = process.cwd()) {
+    const debounceMs = options.debounceMs ?? 300;
+    const clearScreen = options.clearScreen ?? true;
+    // Directories to watch
+    const watchDirs = new Set();
+    // Always watch the eval/ directory if it exists
+    const evalDir = path.join(projectRoot, "eval");
+    if (fs.existsSync(evalDir))
+        watchDirs.add(evalDir);
+    // Watch evals/ directory too
+    const evalsDir = path.join(projectRoot, "evals");
+    if (fs.existsSync(evalsDir))
+        watchDirs.add(evalsDir);
+    // Watch src/ for code changes that may affect evals
+    const srcDir = path.join(projectRoot, "src");
+    if (fs.existsSync(srcDir))
+        watchDirs.add(srcDir);
+    // Add extra watch dirs
+    if (options.extraWatchDirs) {
+        for (const dir of options.extraWatchDirs) {
+            const resolved = path.isAbsolute(dir) ? dir : path.join(projectRoot, dir);
+            if (fs.existsSync(resolved))
+                watchDirs.add(resolved);
+        }
+    }
+    if (watchDirs.size === 0) {
+        console.error("❌ No directories to watch. Create eval/, evals/, or src/ directory.");
+        process.exit(1);
+    }
+    console.log("👁️  Watch mode enabled");
+    console.log(`   Watching: ${[...watchDirs].map((d) => path.relative(projectRoot, d) || ".").join(", ")}`);
+    console.log(`   Debounce: ${debounceMs}ms`);
+    console.log("   Press Ctrl+C to stop\n");
+    // Initial run
+    await executeRun(options, projectRoot, clearScreen, false);
+    // Set up watchers with debouncing
+    let debounceTimer = null;
+    let isRunning = false;
+    const triggerRun = () => {
+        if (debounceTimer)
+            clearTimeout(debounceTimer);
+        debounceTimer = setTimeout(async () => {
+            if (isRunning)
+                return;
+            isRunning = true;
+            try {
+                await executeRun(options, projectRoot, clearScreen, true);
+            }
+            finally {
+                isRunning = false;
+            }
+        }, debounceMs);
+    };
+    const watchers = [];
+    for (const dir of watchDirs) {
+        try {
+            const watcher = fs.watch(dir, { recursive: true }, (eventType, filename) => {
+                if (!filename)
+                    return;
+                // Skip hidden files and node_modules
+                if (filename.startsWith(".") || filename.includes("node_modules"))
+                    return;
+                // Only watch relevant file types
+                const ext = path.extname(filename).toLowerCase();
+                if ([".ts", ".tsx", ".js", ".jsx", ".json", ".jsonl", ".csv"].includes(ext)) {
+                    console.log(`\n🔄 Change detected: ${filename} (${eventType})`);
+                    triggerRun();
+                }
+            });
+            watchers.push(watcher);
+        }
+        catch (err) {
+            console.warn(`⚠️  Could not watch ${path.relative(projectRoot, dir)}: ${err instanceof Error ? err.message : String(err)}`);
+        }
+    }
+    // Handle graceful shutdown
+    const cleanup = () => {
+        console.log("\n\n👋 Watch mode stopped.");
+        for (const watcher of watchers) {
+            watcher.close();
+        }
+        if (debounceTimer)
+            clearTimeout(debounceTimer);
+        process.exit(0);
+    };
+    process.on("SIGINT", cleanup);
+    process.on("SIGTERM", cleanup);
+    // Keep process alive
+    await new Promise(() => {
+        // Never resolves — watch runs until interrupted
+    });
+}
+/**
+ * Execute a single run and print results (without process.exit)
+ */
+async function executeRun(options, projectRoot, clearScreen, isRerun) {
+    if (clearScreen && isRerun) {
+        // Clear screen using ANSI escape
+        process.stdout.write("\x1B[2J\x1B[0f");
+    }
+    const timestamp = new Date().toLocaleTimeString();
+    console.log(`${isRerun ? "🔄 Re-running" : "▶️  Running"} evaluations... (${timestamp})`);
+    try {
+        const result = await (0, run_1.runEvaluations)({
+            specIds: options.specIds,
+            impactedOnly: options.impactedOnly,
+            baseBranch: options.baseBranch,
+            format: options.format,
+            writeResults: options.writeResults,
+        }, projectRoot);
+        if (options.format === "json") {
+            (0, run_1.printJsonResults)(result);
+        }
+        else {
+            (0, run_1.printHumanResults)(result);
+        }
+        // Print watch-specific summary
+        const statusIcon = result.summary.failed > 0 ? "❌" : "✅";
+        console.log(`\n${statusIcon} ${result.summary.passed}/${result.results.length} passed | Waiting for changes...`);
+        return result;
+    }
+    catch (error) {
+        console.error("❌ Run failed:", error instanceof Error ? error.message : String(error));
+        console.log("\n⏳ Waiting for changes...");
+        return null;
+    }
+}

package/dist/client.js CHANGED Viewed

@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
 exports.AIEvalClient = void 0;
 const batch_1 = require("./batch");
 const cache_1 = require("./cache");
+const constants_1 = require("./constants");
 const context_1 = require("./context");
 const errors_1 = require("./errors");
 const logger_1 = require("./logger");
@@ -72,7 +73,7 @@ class AIEvalClient {
         this.baseUrl =
             config.baseUrl ||
                 getEnvVar("EVALGATE_BASE_URL", "EVALAI_BASE_URL") ||
-                (isBrowser ? "" : "https://api.evalgate.com");
+                (isBrowser ? "" : constants_1.DEFAULT_BASE_URL);
         this.timeout = config.timeout || 30000;
         // Tier 4.17: Debug mode with request logging
         const logLevel = config.logLevel || (config.debug ? "debug" : "info");
@@ -100,7 +101,7 @@ class AIEvalClient {
             const MAX_CONCURRENCY = 5;
             this.batcher = new batch_1.RequestBatcher(async (requests) => {
                 const results = [];
-                const executing = [];
+                const executing = new Set();
                 for (const req of requests) {
                     const task = (async () => {
                         try {
@@ -121,18 +122,10 @@ class AIEvalClient {
                             });
                         }
                     })();
-                    executing.push(task);
-                    if (executing.length >= MAX_CONCURRENCY) {
+                    const tracked = task.finally(() => executing.delete(tracked));
+                    executing.add(tracked);
+                    if (executing.size >= MAX_CONCURRENCY) {
                         await Promise.race(executing);
-                        // Remove settled promises
-                        for (let i = executing.length - 1; i >= 0; i--) {
-                            const settled = await Promise.race([
-                                executing[i].then(() => true),
-                                Promise.resolve(false),
-                            ]);
-                            if (settled)
-                                executing.splice(i, 1);
-                        }
                     }
                 }
                 await Promise.allSettled(executing);

package/dist/constants.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ /** Default API base URL for the EvalGate platform. */
2	+ export declare const DEFAULT_BASE_URL = "https://api.evalgate.com";

package/dist/constants.js ADDED Viewed

@@ -0,0 +1,5 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.DEFAULT_BASE_URL = void 0;
+/** Default API base URL for the EvalGate platform. */
+exports.DEFAULT_BASE_URL = "https://api.evalgate.com";

package/dist/index.d.ts CHANGED Viewed

@@ -7,14 +7,14 @@
  * @packageDocumentation
  */
 export { AIEvalClient } from "./client";
-import { AuthenticationError, EvalGateError, NetworkError, RateLimitError, SDKError } from "./errors";
-export { EvalGateError, RateLimitError, AuthenticationError, SDKError as ValidationError, // Using SDKError as ValidationError for backward compatibility
-NetworkError, };
-export { type AssertionLLMConfig, configureAssertions, containsAllRequiredFields, containsJSON, containsKeywords, containsLanguage, containsLanguageAsync, expect, followsInstructions, getAssertionConfig, hasFactualAccuracy, hasFactualAccuracyAsync, hasLength, hasNoHallucinations, hasNoHallucinationsAsync, hasNoToxicity, hasNoToxicityAsync, hasPII, hasReadabilityScore, hasSentiment, hasSentimentAsync, hasValidCodeSyntax, hasValidCodeSyntaxAsync, isValidEmail, isValidURL, matchesPattern, matchesSchema, notContainsPII, respondedWithinTime, similarTo, withinRange, } from "./assertions";
+import { AuthenticationError, EvalGateError, NetworkError, RateLimitError, ValidationError } from "./errors";
+export { EvalGateError, RateLimitError, AuthenticationError, ValidationError, NetworkError, };
+export { type AssertionLLMConfig, configureAssertions, containsAllRequiredFields, containsJSON, containsKeywords, containsLanguage, containsLanguageAsync, expect, followsInstructions, getAssertionConfig, hasConsistency, hasConsistencyAsync, hasFactualAccuracy, hasFactualAccuracyAsync, hasLength, hasNoHallucinations, hasNoHallucinationsAsync, hasNoToxicity, hasNoToxicityAsync, hasPII, hasReadabilityScore, hasSentiment, hasSentimentAsync, hasSentimentWithScore, hasValidCodeSyntax, hasValidCodeSyntaxAsync, isValidEmail, isValidURL, matchesPattern, matchesSchema, notContainsPII, respondedWithinDuration, respondedWithinTime, respondedWithinTimeSince, type SentimentAsyncResult, similarTo, toSemanticallyContain, withinRange, } from "./assertions";
+export { EvalGateError as SDKError } from "./errors";
 import { createContext, EvalContext, getCurrentContext, withContext } from "./context";
 export { createContext, getCurrentContext as getContext, withContext, EvalContext as ContextManager, };
 export { cloneContext, mergeContexts, validateContext, } from "./runtime/context";
-export { createContext as createEvalContext, createResult, defineEval, defineSuite, evalai, } from "./runtime/eval";
+export { createContext as createEvalContext, createResult, defineEval, defineSuite, evalai, getFilteredSpecs, } from "./runtime/eval";
 export { createLocalExecutor, defaultLocalExecutor, } from "./runtime/executor";
 export { createEvalRuntime, disposeActiveRuntime, getActiveRuntime, setActiveRuntime, } from "./runtime/registry";
 export type { CloudExecutor, DefineEvalFunction, EvalContext, EvalExecutor, EvalExecutorInterface, EvalOptions, EvalResult, EvalRuntime, EvalSpec, ExecutorCapabilities, LocalExecutor, SpecConfig, SpecOptions, WorkerExecutor, } from "./runtime/types";
@@ -27,13 +27,14 @@ import { exportData, importData } from "./export";
 export { exportData, importData };
 export type { ExportFormat, ExportFormat as ExportType };
 export { RequestBatcher } from "./batch";
-export { CacheTTL, RequestCache } from "./cache";
+export { CacheTTL } from "./cache";
 export { type CheckArgs, EXIT, parseArgs, runCheck } from "./cli/check";
 export { traceAnthropic } from "./integrations/anthropic";
 export { traceOpenAI } from "./integrations/openai";
 export { type OpenAIChatEvalCase, type OpenAIChatEvalOptions, type OpenAIChatEvalResult, openAIChatEval, } from "./integrations/openai-eval";
 export { Logger } from "./logger";
 export { extendExpectWithToPassGate } from "./matchers";
+export { createOTelExporter, type OTelAttribute, type OTelEvent, OTelExporter, type OTelExporterOptions, type OTelExportPayload, type OTelSpan, } from "./otel";
 export { autoPaginate, autoPaginateGenerator, createPaginatedIterator, decodeCursor, encodeCursor, PaginatedIterator, type PaginatedResponse, type PaginationParams, } from "./pagination";
 export { ARTIFACTS, type Baseline, type BaselineTolerance, GATE_CATEGORY, GATE_EXIT, type GateCategory, type GateExitCode, REPORT_SCHEMA_VERSION, type RegressionDelta, type RegressionReport, } from "./regression";
 export { batchProcess, batchRead, RateLimiter, streamEvaluation, } from "./streaming";

package/dist/index.js CHANGED Viewed

@@ -8,8 +8,9 @@
  * @packageDocumentation
  */
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.defaultLocalExecutor = exports.createLocalExecutor = exports.evalai = exports.defineSuite = exports.defineEval = exports.createResult = exports.createEvalContext = exports.validateContext = exports.mergeContexts = exports.cloneContext = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.withinRange = exports.similarTo = exports.respondedWithinTime = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntaxAsync = exports.hasValidCodeSyntax = exports.hasSentimentAsync = exports.hasSentiment = exports.hasReadabilityScore = exports.hasPII = exports.hasNoToxicityAsync = exports.hasNoToxicity = exports.hasNoHallucinationsAsync = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracyAsync = exports.hasFactualAccuracy = exports.getAssertionConfig = exports.followsInstructions = exports.expect = exports.containsLanguageAsync = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.configureAssertions = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalGateError = exports.AIEvalClient = void 0;
-exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginateGenerator = exports.autoPaginate = exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.RequestCache = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.saveSnapshot = exports.compareSnapshots = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = exports.createTestSuite = exports.SpecRegistrationError = exports.SpecExecutionError = exports.RuntimeError = exports.EvalRuntimeError = exports.setActiveRuntime = exports.getActiveRuntime = exports.disposeActiveRuntime = exports.createEvalRuntime = void 0;
+exports.validateContext = exports.mergeContexts = exports.cloneContext = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.SDKError = exports.withinRange = exports.toSemanticallyContain = exports.similarTo = exports.respondedWithinTimeSince = exports.respondedWithinTime = exports.respondedWithinDuration = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntaxAsync = exports.hasValidCodeSyntax = exports.hasSentimentWithScore = exports.hasSentimentAsync = exports.hasSentiment = exports.hasReadabilityScore = exports.hasPII = exports.hasNoToxicityAsync = exports.hasNoToxicity = exports.hasNoHallucinationsAsync = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracyAsync = exports.hasFactualAccuracy = exports.hasConsistencyAsync = exports.hasConsistency = exports.getAssertionConfig = exports.followsInstructions = exports.expect = exports.containsLanguageAsync = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.configureAssertions = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalGateError = exports.AIEvalClient = void 0;
+exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginateGenerator = exports.autoPaginate = exports.OTelExporter = exports.createOTelExporter = exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.saveSnapshot = exports.compareSnapshots = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = exports.createTestSuite = exports.SpecRegistrationError = exports.SpecExecutionError = exports.RuntimeError = exports.EvalRuntimeError = exports.setActiveRuntime = exports.getActiveRuntime = exports.disposeActiveRuntime = exports.createEvalRuntime = exports.defaultLocalExecutor = exports.createLocalExecutor = exports.getFilteredSpecs = exports.evalai = exports.defineSuite = exports.defineEval = exports.createResult = exports.createEvalContext = void 0;
+exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = void 0;
 // Main SDK exports
 var client_1 = require("./client");
 Object.defineProperty(exports, "AIEvalClient", { enumerable: true, get: function () { return client_1.AIEvalClient; } });
@@ -19,7 +20,7 @@ Object.defineProperty(exports, "AuthenticationError", { enumerable: true, get: f
 Object.defineProperty(exports, "EvalGateError", { enumerable: true, get: function () { return errors_1.EvalGateError; } });
 Object.defineProperty(exports, "NetworkError", { enumerable: true, get: function () { return errors_1.NetworkError; } });
 Object.defineProperty(exports, "RateLimitError", { enumerable: true, get: function () { return errors_1.RateLimitError; } });
-Object.defineProperty(exports, "ValidationError", { enumerable: true, get: function () { return errors_1.SDKError; } });
+Object.defineProperty(exports, "ValidationError", { enumerable: true, get: function () { return errors_1.ValidationError; } });
 // Enhanced assertions (Tier 1.3)
 var assertions_1 = require("./assertions");
 // LLM config
@@ -33,6 +34,8 @@ Object.defineProperty(exports, "containsLanguageAsync", { enumerable: true, get:
 Object.defineProperty(exports, "expect", { enumerable: true, get: function () { return assertions_1.expect; } });
 Object.defineProperty(exports, "followsInstructions", { enumerable: true, get: function () { return assertions_1.followsInstructions; } });
 Object.defineProperty(exports, "getAssertionConfig", { enumerable: true, get: function () { return assertions_1.getAssertionConfig; } });
+Object.defineProperty(exports, "hasConsistency", { enumerable: true, get: function () { return assertions_1.hasConsistency; } });
+Object.defineProperty(exports, "hasConsistencyAsync", { enumerable: true, get: function () { return assertions_1.hasConsistencyAsync; } });
 Object.defineProperty(exports, "hasFactualAccuracy", { enumerable: true, get: function () { return assertions_1.hasFactualAccuracy; } });
 Object.defineProperty(exports, "hasFactualAccuracyAsync", { enumerable: true, get: function () { return assertions_1.hasFactualAccuracyAsync; } });
 Object.defineProperty(exports, "hasLength", { enumerable: true, get: function () { return assertions_1.hasLength; } });
@@ -44,6 +47,7 @@ Object.defineProperty(exports, "hasPII", { enumerable: true, get: function () {
 Object.defineProperty(exports, "hasReadabilityScore", { enumerable: true, get: function () { return assertions_1.hasReadabilityScore; } });
 Object.defineProperty(exports, "hasSentiment", { enumerable: true, get: function () { return assertions_1.hasSentiment; } });
 Object.defineProperty(exports, "hasSentimentAsync", { enumerable: true, get: function () { return assertions_1.hasSentimentAsync; } });
+Object.defineProperty(exports, "hasSentimentWithScore", { enumerable: true, get: function () { return assertions_1.hasSentimentWithScore; } });
 Object.defineProperty(exports, "hasValidCodeSyntax", { enumerable: true, get: function () { return assertions_1.hasValidCodeSyntax; } });
 Object.defineProperty(exports, "hasValidCodeSyntaxAsync", { enumerable: true, get: function () { return assertions_1.hasValidCodeSyntaxAsync; } });
 Object.defineProperty(exports, "isValidEmail", { enumerable: true, get: function () { return assertions_1.isValidEmail; } });
@@ -51,9 +55,15 @@ Object.defineProperty(exports, "isValidURL", { enumerable: true, get: function (
 Object.defineProperty(exports, "matchesPattern", { enumerable: true, get: function () { return assertions_1.matchesPattern; } });
 Object.defineProperty(exports, "matchesSchema", { enumerable: true, get: function () { return assertions_1.matchesSchema; } });
 Object.defineProperty(exports, "notContainsPII", { enumerable: true, get: function () { return assertions_1.notContainsPII; } });
+Object.defineProperty(exports, "respondedWithinDuration", { enumerable: true, get: function () { return assertions_1.respondedWithinDuration; } });
 Object.defineProperty(exports, "respondedWithinTime", { enumerable: true, get: function () { return assertions_1.respondedWithinTime; } });
+Object.defineProperty(exports, "respondedWithinTimeSince", { enumerable: true, get: function () { return assertions_1.respondedWithinTimeSince; } });
 Object.defineProperty(exports, "similarTo", { enumerable: true, get: function () { return assertions_1.similarTo; } });
+Object.defineProperty(exports, "toSemanticallyContain", { enumerable: true, get: function () { return assertions_1.toSemanticallyContain; } });
 Object.defineProperty(exports, "withinRange", { enumerable: true, get: function () { return assertions_1.withinRange; } });
+// Legacy backward compat — SDKError is the old name for EvalGateError
+var errors_2 = require("./errors");
+Object.defineProperty(exports, "SDKError", { enumerable: true, get: function () { return errors_2.EvalGateError; } });
 // Context propagation (Tier 2.9)
 const context_1 = require("./context");
 Object.defineProperty(exports, "createContext", { enumerable: true, get: function () { return context_1.createContext; } });
@@ -71,6 +81,7 @@ Object.defineProperty(exports, "createResult", { enumerable: true, get: function
 Object.defineProperty(exports, "defineEval", { enumerable: true, get: function () { return eval_1.defineEval; } });
 Object.defineProperty(exports, "defineSuite", { enumerable: true, get: function () { return eval_1.defineSuite; } });
 Object.defineProperty(exports, "evalai", { enumerable: true, get: function () { return eval_1.evalai; } });
+Object.defineProperty(exports, "getFilteredSpecs", { enumerable: true, get: function () { return eval_1.getFilteredSpecs; } });
 var executor_1 = require("./runtime/executor");
 Object.defineProperty(exports, "createLocalExecutor", { enumerable: true, get: function () { return executor_1.createLocalExecutor; } });
 Object.defineProperty(exports, "defaultLocalExecutor", { enumerable: true, get: function () { return executor_1.defaultLocalExecutor; } });
@@ -104,11 +115,12 @@ Object.defineProperty(exports, "importData", { enumerable: true, get: function (
 var batch_1 = require("./batch");
 Object.defineProperty(exports, "RequestBatcher", { enumerable: true, get: function () { return batch_1.RequestBatcher; } });
 // Performance optimization utilities (v1.3.0)
-// Note: RequestCache and CacheTTL are for advanced users only
-// Most users don't need these - caching is automatic
+// Note: CacheTTL is for advanced users only
+// Most users don't need this - caching is automatic
+// RequestCache is intentionally NOT exported — it's an internal HTTP cache.
+// Use CacheTTL to configure cache durations via client options.
 var cache_1 = require("./cache");
 Object.defineProperty(exports, "CacheTTL", { enumerable: true, get: function () { return cache_1.CacheTTL; } });
-Object.defineProperty(exports, "RequestCache", { enumerable: true, get: function () { return cache_1.RequestCache; } });
 // CLI (programmatic use)
 var check_1 = require("./cli/check");
 Object.defineProperty(exports, "EXIT", { enumerable: true, get: function () { return check_1.EXIT; } });
@@ -128,6 +140,10 @@ Object.defineProperty(exports, "Logger", { enumerable: true, get: function () {
 // Vitest matcher: expect(await openAIChatEval(...)).toPassGate()
 var matchers_1 = require("./matchers");
 Object.defineProperty(exports, "extendExpectWithToPassGate", { enumerable: true, get: function () { return matchers_1.extendExpectWithToPassGate; } });
+// OpenTelemetry export
+var otel_1 = require("./otel");
+Object.defineProperty(exports, "createOTelExporter", { enumerable: true, get: function () { return otel_1.createOTelExporter; } });
+Object.defineProperty(exports, "OTelExporter", { enumerable: true, get: function () { return otel_1.OTelExporter; } });
 var pagination_1 = require("./pagination");
 Object.defineProperty(exports, "autoPaginate", { enumerable: true, get: function () { return pagination_1.autoPaginate; } });
 Object.defineProperty(exports, "autoPaginateGenerator", { enumerable: true, get: function () { return pagination_1.autoPaginateGenerator; } });