npm - @pauly4010/evalai-sdk - Versions diffs - 1.9.0 → 1.9.1 - Mend

@pauly4010/evalai-sdk 1.9.0 → 1.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

package/README.md +136 -23
package/dist/assertions.js +51 -18
package/dist/batch.js +8 -2
package/dist/cli/api.js +3 -1
package/dist/cli/check.js +19 -6
package/dist/cli/ci-context.js +3 -1
package/dist/cli/config.js +28 -8
package/dist/cli/diff.js +14 -9
package/dist/cli/discover.js +18 -7
package/dist/cli/doctor.js +43 -9
package/dist/cli/explain.js +37 -11
package/dist/cli/formatters/human.js +4 -1
package/dist/cli/formatters/pr-comment.js +3 -1
package/dist/cli/gate.js +6 -2
package/dist/cli/impact-analysis.js +6 -5
package/dist/cli/index.js +18 -6
package/dist/cli/manifest.d.ts +3 -5
package/dist/cli/manifest.js +21 -14
package/dist/cli/migrate.js +4 -4
package/dist/cli/policy-packs.js +8 -2
package/dist/cli/print-config.js +19 -4
package/dist/cli/regression-gate.js +8 -2
package/dist/cli/report/build-check-report.js +8 -2
package/dist/cli/run.js +11 -5
package/dist/cli/share.js +3 -1
package/dist/cli/upgrade.js +2 -1
package/dist/client.d.ts +16 -19
package/dist/client.js +60 -43
package/dist/client.request.test.d.ts +1 -1
package/dist/client.request.test.js +222 -147
package/dist/context.js +3 -1
package/dist/errors.js +11 -4
package/dist/export.js +3 -1
package/dist/index.d.ts +8 -8
package/dist/index.js +19 -19
package/dist/integrations/anthropic.d.ts +20 -1
package/dist/integrations/openai-eval.js +4 -2
package/dist/integrations/openai.d.ts +24 -1
package/dist/local.js +3 -1
package/dist/logger.js +6 -2
package/dist/pagination.js +6 -2
package/dist/runtime/adapters/config-to-dsl.js +12 -9
package/dist/runtime/adapters/testsuite-to-dsl.d.ts +1 -1
package/dist/runtime/adapters/testsuite-to-dsl.js +11 -6
package/dist/runtime/eval.d.ts +1 -1
package/dist/runtime/eval.js +12 -5
package/dist/runtime/execution-mode.js +13 -9
package/dist/runtime/registry.js +8 -21
package/dist/runtime/run-report.d.ts +0 -2
package/dist/runtime/run-report.js +12 -10
package/dist/testing.js +7 -2
package/dist/types.d.ts +100 -69
package/dist/utils/input-hash.js +4 -1
package/dist/version.d.ts +1 -1
package/dist/version.js +1 -1
package/dist/workflows.js +62 -14
package/package.json +115 -111

package/dist/integrations/openai.d.ts CHANGED Viewed

@@ -18,6 +18,28 @@
  * ```
  */
 import type { AIEvalClient } from "../client";
+interface OpenAIChatParams {
+    model: string;
+    messages: unknown[];
+    temperature?: number;
+    max_tokens?: number;
+    [key: string]: unknown;
+}
+interface OpenAIChatCompletion {
+    choices: Array<{
+        message?: unknown;
+        finish_reason?: unknown;
+    }>;
+    usage?: unknown;
+    [key: string]: unknown;
+}
+interface OpenAIClient {
+    chat: {
+        completions: {
+            create: (params: OpenAIChatParams, requestOptions?: Record<string, unknown>) => Promise<OpenAIChatCompletion>;
+        };
+    };
+}
 export interface OpenAITraceOptions {
     /** Whether to capture input (default: true) */
     captureInput?: boolean;
@@ -48,7 +70,7 @@ export interface OpenAITraceOptions {
  * });
  * ```
  */
-export declare function traceOpenAI(openai: any, evalClient: AIEvalClient, options?: OpenAITraceOptions): any;
+export declare function traceOpenAI(openai: OpenAIClient, evalClient: AIEvalClient, options?: OpenAITraceOptions): OpenAIClient;
 /**
  * Manual trace wrapper for OpenAI calls
  *
@@ -67,3 +89,4 @@ export declare function traceOpenAI(openai: any, evalClient: AIEvalClient, optio
  * ```
  */
 export declare function traceOpenAICall<T>(evalClient: AIEvalClient, name: string, fn: () => Promise<T>, options?: OpenAITraceOptions): Promise<T>;
+export {};

package/dist/local.js CHANGED Viewed

@@ -31,7 +31,9 @@ class LocalStorage {
         try {
             await promises_1.default.mkdir(this.directory, { recursive: true });
             await promises_1.default.mkdir(node_path_1.default.join(this.directory, "traces"), { recursive: true });
-            await promises_1.default.mkdir(node_path_1.default.join(this.directory, "evaluations"), { recursive: true });
+            await promises_1.default.mkdir(node_path_1.default.join(this.directory, "evaluations"), {
+                recursive: true,
+            });
             await promises_1.default.mkdir(node_path_1.default.join(this.directory, "spans"), { recursive: true });
             // Load existing data
             await this.loadAllData();

package/dist/logger.js CHANGED Viewed

@@ -133,10 +133,14 @@ class Logger {
         }
         // Level
         const levelStr = entry.level.toUpperCase().padEnd(5);
-        parts.push(this.options.pretty ? `${LOG_COLORS[entry.level]}${levelStr}${COLOR_RESET}` : levelStr);
+        parts.push(this.options.pretty
+            ? `${LOG_COLORS[entry.level]}${levelStr}${COLOR_RESET}`
+            : levelStr);
         // Prefix
         if (entry.prefix) {
-            parts.push(this.options.pretty ? `\x1b[35m[${entry.prefix}]${COLOR_RESET}` : `[${entry.prefix}]`);
+            parts.push(this.options.pretty
+                ? `\x1b[35m[${entry.prefix}]${COLOR_RESET}`
+                : `[${entry.prefix}]`);
         }
         // Message
         parts.push(entry.message);

package/dist/pagination.js CHANGED Viewed

@@ -113,8 +113,12 @@ function createPaginationMeta(items, limit, offset, total) {
         limit,
         offset,
         total,
-        nextCursor: hasMore ? encodeCursor({ offset: offset + limit, limit }) : undefined,
-        prevCursor: offset > 0 ? encodeCursor({ offset: Math.max(0, offset - limit), limit }) : undefined,
+        nextCursor: hasMore
+            ? encodeCursor({ offset: offset + limit, limit })
+            : undefined,
+        prevCursor: offset > 0
+            ? encodeCursor({ offset: Math.max(0, offset - limit), limit })
+            : undefined,
     };
 }
 /**

package/dist/runtime/adapters/config-to-dsl.js CHANGED Viewed

@@ -58,13 +58,13 @@ function migrateTestSuiteToDSL(testSuite, outputPath) {
     };
     try {
         // Create isolated runtime for migration
-        const runtime = (0, registry_1.createEvalRuntime)();
+        const _runtime = (0, registry_1.createEvalRuntime)();
         // Use the runtime handle to define specs
-        const boundDefineEval = ((nameOrConfig, executor, options) => {
+        const _boundDefineEval = (nameOrConfig, executor, options) => {
             // The runtime handle manages the active runtime internally
             const { defineEval } = require("../eval");
             return defineEval(nameOrConfig, executor, options);
-        });
+        };
         // Get test suite data via public methods
         // Note: We need to access the internal data structure for migration
         // This is a limitation of the current TestSuite design
@@ -88,7 +88,7 @@ function migrateTestSuiteToDSL(testSuite, outputPath) {
  * Extract data from TestSuite instance
  * This is a workaround for the private properties
  */
-function extractTestSuiteData(testSuite) {
+function extractTestSuiteData(_testSuite) {
     // Since TestSuite properties are private, we need to reconstruct from usage
     // This is a limitation that should be addressed in a future version
     // For now, we'll create a basic structure and warn the user
@@ -124,13 +124,13 @@ function migrateConfigToDSL(configPath, outputPath) {
         const configContent = fs.readFileSync(configPath, "utf-8");
         const config = JSON.parse(configContent);
         // Create isolated runtime for migration
-        const runtime = (0, registry_1.createEvalRuntime)();
+        const _runtime = (0, registry_1.createEvalRuntime)();
         // Use the runtime handle to define specs
-        const boundDefineEval = ((nameOrConfig, executor, options) => {
+        const _boundDefineEval = (nameOrConfig, executor, options) => {
             // The runtime handle manages the active runtime internally
             const { defineEval } = require("../eval");
             return defineEval(nameOrConfig, executor, options);
-        });
+        };
         // Generate basic DSL structure from config
         const dslContent = generateDSLFromConfig(config);
         // Write DSL file
@@ -333,13 +333,16 @@ function findTestSuiteFiles(projectRoot) {
         const entries = fs.readdirSync(dir, { withFileTypes: true });
         for (const entry of entries) {
             const fullPath = path.join(dir, entry.name);
-            if (entry.isDirectory() && !entry.name.startsWith(".") && entry.name !== "node_modules") {
+            if (entry.isDirectory() &&
+                !entry.name.startsWith(".") &&
+                entry.name !== "node_modules") {
                 scanDirectory(fullPath);
             }
             else if (entry.isFile() && /\.(ts|js)$/.test(entry.name)) {
                 try {
                     const content = fs.readFileSync(fullPath, "utf-8");
-                    if (content.includes("createTestSuite") || content.includes("TestSuite")) {
+                    if (content.includes("createTestSuite") ||
+                        content.includes("TestSuite")) {
                         testFiles.push(fullPath);
                     }
                 }

package/dist/runtime/adapters/testsuite-to-dsl.d.ts CHANGED Viewed

@@ -38,7 +38,7 @@ export declare function generateDefineEvalCode(suite: TestSuite, options?: Parti
  */
 export interface TestSuiteConfig {
     /** Test cases to run */
-    cases: any[];
+    cases: unknown[];
     /** Function that generates output from input */
     executor?: (input: string) => Promise<string>;
     /** Run tests in parallel (default: true) */

package/dist/runtime/adapters/testsuite-to-dsl.js CHANGED Viewed

@@ -18,13 +18,13 @@ const registry_1 = require("../registry");
  * @returns Array of EvalSpec definitions
  */
 function adaptTestSuite(suite, options = {}) {
-    const { includeProvenance = true, preserveIds = true, generateHelpers = true } = options;
+    const { includeProvenance = true, preserveIds = true, generateHelpers = true, } = options;
     // Get test suite data using the new getters
     const tests = suite.getTests();
     const metadata = suite.getMetadata();
     const config = suite.getConfig();
     // Create a temporary runtime for spec generation
-    const runtime = (0, registry_1.createEvalRuntime)();
+    const _runtime = (0, registry_1.createEvalRuntime)();
     const specs = [];
     try {
         // Convert each test case to an EvalSpec
@@ -182,7 +182,7 @@ function generateDefineEvalCode(suite, options = {}) {
         `import { defineEval, createResult } from '@pauly4010/evalai-sdk';`,
         "",
     ];
-    const specCode = specs.map((spec, index) => {
+    const specCode = specs.map((spec, _index) => {
         const helperCode = generateHelperFunctions(spec, options);
         return [
             `defineEval("${spec.name}", async (context) => {`,
@@ -208,7 +208,12 @@ function generateDefineEvalCode(suite, options = {}) {
     });
     const helperFunctions = generateHelperFunctionsForSuite(specs, options);
     const evaluationFunction = generateEvaluationFunction();
-    return [...imports, ...helperFunctions, ...evaluationFunction, ...specCode].join("\n");
+    return [
+        ...imports,
+        ...helperFunctions,
+        ...evaluationFunction,
+        ...specCode,
+    ].join("\n");
 }
 /**
  * Generate helper functions for a specific spec
@@ -223,7 +228,7 @@ function generateHelperFunctions(spec, options) {
         helpers.push(`function evaluateLegacyAssertion(output: string, expected: string): boolean {`, `  return output === expected;`, `}`);
     }
     // Add helper for test evaluation
-    helpers.push(`async function evaluateLegacyTest(input: string, expected?: string): Promise<any> {`, `  // This function simulates the legacy test evaluation`, `  const output = await simulateLegacyExecutor(input);`, `  `, `  if (expected !== undefined) {`, `    const passed = evaluateLegacyAssertion(output, expected);`, `    return createResult({`, `      pass: passed,`, `      score: passed ? 100 : 0,`, `      metadata: {`, `        input,`, `        expected,`, `      },`, `    });`, `  }`, `  `, `  return createResult({`, `    pass: output.length > 0,`, `    score: output.length > 0 ? 100 : 0,`, `    metadata: { input },`, `  });`, `}`);
+    helpers.push(`async function evaluateLegacyTest(input: string, expected?: string): Promise<unknown> {`, `  // This function simulates the legacy test evaluation`, `  const output = await simulateLegacyExecutor(input);`, `  `, `  if (expected !== undefined) {`, `    const passed = evaluateLegacyAssertion(output, expected);`, `    return createResult({`, `      pass: passed,`, `      score: passed ? 100 : 0,`, `      metadata: {`, `        input,`, `        expected,`, `      },`, `    });`, `  }`, `  `, `  return createResult({`, `    pass: output.length > 0,`, `    score: output.length > 0 ? 100 : 0,`, `    metadata: { input },`, `  });`, `}`);
     // Add executor simulation
     helpers.push(`async function simulateLegacyExecutor(input: string): Promise<string> {`, `  // This function simulates the legacy executor`, `  // In a real migration, this would be replaced with the actual executor`, `  return input; // Echo for demonstration`, `}`);
     return helpers.join("\n\n");
@@ -248,7 +253,7 @@ function generateHelperFunctionsForSuite(specs, options) {
 function generateEvaluationFunction() {
     return [
         `// Legacy test evaluation function`,
-        `function evaluateLegacyTest(input: string, expected?: string): any {`,
+        `function evaluateLegacyTest(input: string, expected?: string): unknown {`,
         `  // This function evaluates legacy test logic`,
         `  // In a real migration, this would contain the actual test logic`,
         `  `,

package/dist/runtime/eval.d.ts CHANGED Viewed

@@ -21,7 +21,7 @@ export declare const evalai: {
  * Suite definition for grouping related specifications
  * This will be expanded in Layer 3 for dependency graph support
  */
-export declare function defineSuite(name: string, specs: (() => void)[]): void;
+export declare function defineSuite(_name: string, specs: (() => void)[]): void;
 /**
  * Helper function to create specification contexts
  * Useful for testing and manual execution

package/dist/runtime/eval.js CHANGED Viewed

@@ -43,10 +43,10 @@ exports.evalai = exports.defineEval = void 0;
 exports.defineSuite = defineSuite;
 exports.createContext = createContext;
 exports.createResult = createResult;
-const path = __importStar(require("node:path"));
 const crypto = __importStar(require("node:crypto"));
-const types_1 = require("./types");
+const path = __importStar(require("node:path"));
 const registry_1 = require("./registry");
+const types_1 = require("./types");
 /**
  * Extract AST position from call stack
  * This provides stable identity that survives renames but changes when logic moves
@@ -61,7 +61,9 @@ function getCallerPosition() {
     // Skip current function and find the actual caller
     for (let i = 3; i < lines.length; i++) {
         const line = lines[i];
-        if (!line || line.includes("node_modules") || line.includes("internal/modules")) {
+        if (!line ||
+            line.includes("node_modules") ||
+            line.includes("internal/modules")) {
             continue;
         }
         // Extract file path, line, and column
@@ -95,7 +97,12 @@ function generateSpecId(namespace, filePath, name, position) {
     const projectRoot = process.cwd();
     const relativePath = path.relative(projectRoot, filePath);
     const canonicalPath = relativePath.split(path.sep).join("/"); // Force POSIX separators
-    const components = [namespace, canonicalPath, name, `${position.line}:${position.column}`];
+    const components = [
+        namespace,
+        canonicalPath,
+        name,
+        `${position.line}:${position.column}`,
+    ];
     const content = components.join("|");
     return crypto.createHash("sha256").update(content).digest("hex").slice(0, 20);
 }
@@ -200,7 +207,7 @@ exports.evalai = {
  * Suite definition for grouping related specifications
  * This will be expanded in Layer 3 for dependency graph support
  */
-function defineSuite(name, specs) {
+function defineSuite(_name, specs) {
     // For now, just execute the specs to register them
     // In Layer 3, this will build the dependency graph
     for (const specFn of specs) {

package/dist/runtime/execution-mode.js CHANGED Viewed

@@ -123,7 +123,7 @@ async function findSpecFiles(projectRoot) {
             const files = await searchFiles(projectRoot, pattern, projectRoot);
             foundFiles.push(...files);
         }
-        catch (error) {
+        catch (_error) {
             // Ignore errors for non-existent paths
         }
     }
@@ -136,7 +136,7 @@ async function findSpecFiles(projectRoot) {
                 specFilesWithDefineEval.push(file);
             }
         }
-        catch (error) {
+        catch (_error) {
             // Ignore read errors
         }
     }
@@ -163,7 +163,7 @@ async function searchFiles(dir, pattern, projectRoot) {
             }
         }
     }
-    catch (error) {
+    catch (_error) {
         // Ignore permission errors
     }
     return results;
@@ -172,9 +172,9 @@ async function searchFiles(dir, pattern, projectRoot) {
  * Simple pattern matching (placeholder for proper glob)
  */
 function matchesPattern(filePath, pattern, projectRoot) {
-    const fileName = path.basename(filePath);
-    const ext = path.extname(filePath);
-    const dir = path.dirname(filePath);
+    const _fileName = path.basename(filePath);
+    const _ext = path.extname(filePath);
+    const _dir = path.dirname(filePath);
     // Convert glob pattern to regex
     // Handle **/ and * patterns correctly
     let regexPattern = pattern;
@@ -203,7 +203,7 @@ async function findLegacyConfig(projectRoot) {
             await fs.access(fullPath);
             return fullPath;
         }
-        catch (error) {
+        catch (_error) {
             // File doesn't exist, continue
         }
     }
@@ -305,12 +305,16 @@ function printExecutionModeInfo(config) {
     const validation = validateExecutionMode(config);
     if (validation.warnings.length > 0) {
         console.log(`⚠️  Warnings:`);
-        validation.warnings.forEach((warning) => console.log(`   ${warning}`));
+        validation.warnings.forEach((warning) => {
+            console.log(`   ${warning}`);
+        });
         console.log(``);
     }
     if (validation.errors.length > 0) {
         console.log(`❌ Errors:`);
-        validation.errors.forEach((error) => console.log(`   ${error}`));
+        validation.errors.forEach((error) => {
+            console.log(`   ${error}`);
+        });
         console.log(``);
     }
     const recommended = getRecommendedExecutionMode(config);

package/dist/runtime/registry.js CHANGED Viewed

@@ -64,26 +64,11 @@ class EvalRuntimeImpl {
      * Content-addressable to prevent collisions
      */
     generateNamespace(projectRoot) {
-        return crypto.createHash("sha256").update(path.resolve(projectRoot)).digest("hex").slice(0, 12);
-    }
-    /**
-     * Generate content-addressable specification ID
-     * Uses AST position for identity stability with canonical paths
-     */
-    generateSpecId(identity) {
-        // Canonicalize path: relative to project root with POSIX separators
-        const projectRoot = process.cwd();
-        const relativePath = path.relative(projectRoot, identity.filePath);
-        const canonicalPath = relativePath.split(path.sep).join("/"); // Force POSIX separators
-        const components = [
-            identity.namespace,
-            canonicalPath,
-            identity.name,
-            identity.suitePath || "",
-            `${identity.position.line}:${identity.position.column}`,
-        ];
-        const content = components.join("|");
-        return crypto.createHash("sha256").update(content).digest("hex").slice(0, 20);
+        return crypto
+            .createHash("sha256")
+            .update(path.resolve(projectRoot))
+            .digest("hex")
+            .slice(0, 12);
     }
     /**
      * Register a new specification
@@ -274,7 +259,9 @@ class EvalRuntimeImpl {
         }
         let status = "healthy";
         if (issues.length > 0) {
-            status = issues.some((issue) => issue.includes("error")) ? "error" : "warning";
+            status = issues.some((issue) => issue.includes("error"))
+                ? "error"
+                : "warning";
         }
         return {
             status,

package/dist/runtime/run-report.d.ts CHANGED Viewed

@@ -144,8 +144,6 @@ export interface RunConfig {
  * RunReport builder for creating deterministic reports
  */
 export declare class RunReportBuilder {
-    private runId;
-    private runtimeInfo;
     private report;
     /**
      * Initialize report with basic metadata

package/dist/runtime/run-report.js CHANGED Viewed

@@ -54,8 +54,6 @@ class RunReportBuilder {
      * Initialize report with basic metadata
      */
     constructor(runId, runtimeInfo) {
-        this.runId = runId;
-        this.runtimeInfo = runtimeInfo;
         this.report = {
             schemaVersion: exports.RUN_REPORT_SCHEMA_VERSION,
             results: [],
@@ -97,11 +95,13 @@ class RunReportBuilder {
                 message: assertion.message,
             })),
         };
-        this.report.results.push(runResult);
+        this.report.results?.push(runResult);
         // Update summary
         this.updateSummary(result);
         // Add to failures if needed
-        if (!result.pass || result.classification === "error" || result.classification === "timeout") {
+        if (!result.pass ||
+            result.classification === "error" ||
+            result.classification === "timeout") {
             this.addFailure(testId, testName, filePath, position, result);
         }
     }
@@ -127,12 +127,14 @@ class RunReportBuilder {
             summary.failed++;
         }
         // Calculate rates and averages
-        summary.passRate = summary.total > 0 ? (summary.passed / summary.total) * 100 : 0;
+        summary.passRate =
+            summary.total > 0 ? (summary.passed / summary.total) * 100 : 0;
         // Average score calculation (excluding errors/timeouts)
-        const scoredResults = this.report.results.filter((r) => r.score > 0);
+        const scoredResults = this.report.results?.filter((r) => r.score > 0) || [];
         summary.averageScore =
             scoredResults.length > 0
-                ? scoredResults.reduce((sum, r) => sum + r.score, 0) / scoredResults.length
+                ? scoredResults.reduce((sum, r) => sum + r.score, 0) /
+                    scoredResults.length
                 : 0;
     }
     /**
@@ -153,7 +155,7 @@ class RunReportBuilder {
             message: result.error || "Test failed",
             timestamp: new Date().toISOString(),
         };
-        this.report.failures.push(failure);
+        this.report.failures?.push(failure);
     }
     /**
      * Set execution configuration
@@ -175,8 +177,8 @@ class RunReportBuilder {
      */
     build() {
         // Sort results and failures by testId for determinism
-        this.report.results.sort((a, b) => a.testId.localeCompare(b.testId));
-        this.report.failures.sort((a, b) => a.testId.localeCompare(b.testId));
+        this.report.results?.sort((a, b) => a.testId.localeCompare(b.testId));
+        this.report.failures?.sort((a, b) => a.testId.localeCompare(b.testId));
         // Set completion timestamp
         this.report.finishedAt = new Date().toISOString();
         const finalReport = this.report;

package/dist/testing.js CHANGED Viewed

@@ -59,7 +59,10 @@ class TestSuite {
                 if (this.config.executor) {
                     const timeout = this.config.timeout || 30000;
                     const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error(`Test timeout after ${timeout}ms`)), timeout));
-                    actual = await Promise.race([this.config.executor(testCase.input), timeoutPromise]);
+                    actual = await Promise.race([
+                        this.config.executor(testCase.input),
+                        timeoutPromise,
+                    ]);
                 }
                 else if (testCase.expected) {
                     actual = testCase.expected; // Use expected as actual if no executor
@@ -127,7 +130,9 @@ class TestSuite {
         const retriedCases = [];
         const retries = this.config.retries ?? 0;
         if (retries > 0 && results.length > 0) {
-            const failingIndices = results.map((r, i) => (r.passed ? -1 : i)).filter((i) => i >= 0);
+            const failingIndices = results
+                .map((r, i) => (r.passed ? -1 : i))
+                .filter((i) => i >= 0);
             for (let attempt = 0; attempt < retries && failingIndices.length > 0; attempt++) {
                 const toRetry = [...failingIndices];
                 failingIndices.length = 0;