npm - @pauly4010/evalai-sdk - Versions diffs - 1.8.0 → 1.9.0 - Mend

@pauly4010/evalai-sdk 1.8.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

package/CHANGELOG.md +54 -0
package/dist/cli/ci.d.ts +45 -0
package/dist/cli/ci.js +192 -0
package/dist/cli/diff.d.ts +173 -0
package/dist/cli/diff.js +680 -0
package/dist/cli/discover.d.ts +84 -0
package/dist/cli/discover.js +408 -0
package/dist/cli/doctor.js +19 -10
package/dist/cli/env.d.ts +21 -0
package/dist/cli/env.js +42 -0
package/dist/cli/explain.js +143 -37
package/dist/cli/impact-analysis.d.ts +63 -0
package/dist/cli/impact-analysis.js +251 -0
package/dist/cli/index.js +173 -0
package/dist/cli/manifest.d.ts +105 -0
package/dist/cli/manifest.js +275 -0
package/dist/cli/migrate.d.ts +41 -0
package/dist/cli/migrate.js +349 -0
package/dist/cli/print-config.js +18 -14
package/dist/cli/run.d.ts +101 -0
package/dist/cli/run.js +389 -0
package/dist/cli/workspace.d.ts +28 -0
package/dist/cli/workspace.js +58 -0
package/dist/index.d.ts +6 -0
package/dist/index.js +30 -5
package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
package/dist/runtime/adapters/config-to-dsl.js +391 -0
package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
package/dist/runtime/adapters/testsuite-to-dsl.js +271 -0
package/dist/runtime/context.d.ts +26 -0
package/dist/runtime/context.js +74 -0
package/dist/runtime/eval.d.ts +46 -0
package/dist/runtime/eval.js +237 -0
package/dist/runtime/execution-mode.d.ts +80 -0
package/dist/runtime/execution-mode.js +353 -0
package/dist/runtime/executor.d.ts +16 -0
package/dist/runtime/executor.js +152 -0
package/dist/runtime/registry.d.ts +78 -0
package/dist/runtime/registry.js +416 -0
package/dist/runtime/run-report.d.ts +202 -0
package/dist/runtime/run-report.js +220 -0
package/dist/runtime/types.d.ts +356 -0
package/dist/runtime/types.js +76 -0
package/dist/testing.d.ts +65 -0
package/dist/testing.js +42 -0
package/dist/version.d.ts +1 -1
package/dist/version.js +1 -1
package/package.json +4 -3

package/dist/cli/migrate.js ADDED Viewed

@@ -0,0 +1,349 @@
+"use strict";
+/**
+ * COMPAT-203: Config → DSL migration generator (file-based)
+ *
+ * CLI command: evalai migrate config --in evalai.config.json --out eval/legacy.spec.ts
+ * Generates defineEval() calls with comments and TODOs for manual completion
+ */
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || (function () {
+    var ownKeys = function(o) {
+        ownKeys = Object.getOwnPropertyNames || function (o) {
+            var ar = [];
+            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
+            return ar;
+        };
+        return ownKeys(o);
+    };
+    return function (mod) {
+        if (mod && mod.__esModule) return mod;
+        var result = {};
+        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
+        __setModuleDefault(result, mod);
+        return result;
+    };
+})();
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.migrateConfig = migrateConfig;
+exports.createMigrateCommand = createMigrateCommand;
+exports.validateConfigFile = validateConfigFile;
+exports.previewMigration = previewMigration;
+const commander_1 = require("commander");
+const fs = __importStar(require("node:fs/promises"));
+const path = __importStar(require("node:path"));
+const testsuite_to_dsl_1 = require("../runtime/adapters/testsuite-to-dsl");
+const testing_1 = require("../testing");
+/**
+ * Read and parse evalai.config.json
+ */
+async function readConfigFile(filePath) {
+    try {
+        const content = await fs.readFile(filePath, "utf-8");
+        return JSON.parse(content);
+    }
+    catch (error) {
+        throw new Error(`Failed to read config file ${filePath}: ${error instanceof Error ? error.message : String(error)}`);
+    }
+}
+/**
+ * Extract TestSuite data from config
+ */
+function extractTestSuitesFromConfig(config) {
+    const suites = [];
+    // Handle different config structures
+    if (config.tests) {
+        // Direct tests array
+        const suite = (0, testing_1.createTestSuite)("config-tests", {
+            cases: config.tests,
+            executor: config.executor,
+            timeout: config.timeout,
+            parallel: config.parallel,
+            stopOnFailure: config.stopOnFailure,
+            retries: config.retries,
+        });
+        suites.push({ name: "config-tests", suite });
+    }
+    if (config.suites) {
+        // Multiple named suites
+        for (const [suiteName, suiteConfig] of Object.entries(config.suites)) {
+            const suite = (0, testing_1.createTestSuite)(suiteName, suiteConfig);
+            suites.push({ name: suiteName, suite });
+        }
+    }
+    if (config.testSuites) {
+        // Alternative property name
+        for (const [suiteName, suiteConfig] of Object.entries(config.testSuites)) {
+            const suite = (0, testing_1.createTestSuite)(suiteName, suiteConfig);
+            suites.push({ name: suiteName, suite });
+        }
+    }
+    return suites;
+}
+/**
+ * Generate DSL file header
+ */
+function generateFileHeader(config, options) {
+    const timestamp = new Date().toISOString();
+    const inputPath = path.resolve(options.input);
+    const outputPath = path.resolve(options.output);
+    return [
+        `/**`,
+        ` * Auto-generated EvalAI DSL from configuration`,
+        ` * `,
+        ` * Generated at: ${timestamp}`,
+        ` * Source config: ${inputPath}`,
+        ` * Output file: ${outputPath}`,
+        ` * `,
+        ` * This file contains defineEval() specifications migrated from evalai.config.json`,
+        ` * `,
+        ` * ⚠️  IMPORTANT: This is a best-effort migration. Manual review and completion required.`,
+        ` * `,
+        ` * Migration notes:`,
+        ` * - Executors have been converted to async functions`,
+        ` * - Assertions have been converted where possible`,
+        ` * - Complex logic may need manual adaptation`,
+        ` * - Review TODO comments for items requiring attention`,
+        ` */`,
+        ``,
+        `import { defineEval, createResult } from '@pauly4010/evalai-sdk';`,
+        ``,
+    ].join("\n");
+}
+/**
+ * Generate helper functions for the entire file
+ */
+function generateGlobalHelpers(config, options) {
+    const helpers = [];
+    // Add executor helper if config has executor
+    if (config.executor) {
+        helpers.push([
+            `/**`,
+            ` * Legacy executor function from config`,
+            ` * TODO: Replace with actual executor implementation`,
+            ` */`,
+            `async function legacyExecutor(input: string): Promise<string> {`,
+            `  // Original executor was: ${config.executor.toString()}`,
+            `  // TODO: Implement actual executor logic here`,
+            `  return input; // Placeholder`,
+            `}`,
+            ``,
+        ].join("\n"));
+    }
+    // Add assertion helpers
+    helpers.push([
+        `/**`,
+        ` * Helper function for legacy assertion evaluation`,
+        ` * TODO: Implement actual assertion logic based on original config`,
+        ` */`,
+        `function evaluateAssertions(output: string, expected?: string): boolean {`,
+        `  if (expected !== undefined) {`,
+        `    return output === expected;`,
+        `  }`,
+        `  return output.length > 0;`,
+        `}`,
+        ``,
+    ].join("\n"));
+    // Add evaluation helper
+    helpers.push([
+        `/**`,
+        ` * Legacy test evaluation function`,
+        ` * TODO: Adapt based on your original test logic`,
+        ` */`,
+        `async function evaluateLegacyTest(input: string, expected?: string): Promise<any> {`,
+        `  const output = await legacyExecutor(input);`,
+        `  const passed = evaluateAssertions(output, expected);`,
+        `  `,
+        `  return createResult({`,
+        `    pass: passed,`,
+        `    score: passed ? 100 : 0,`,
+        `    metadata: { input, expected },`,
+        `  });`,
+        `}`,
+        ``,
+    ].join("\n"));
+    return helpers.join("\n");
+}
+/**
+ * Generate DSL content for a single suite
+ */
+function generateSuiteDSL(suiteName, suite, options) {
+    const dslCode = (0, testsuite_to_dsl_1.generateDefineEvalCode)(suite, {
+        generateHelpers: options.helpers,
+        preserveIds: options.preserveIds,
+        includeProvenance: options.provenance,
+    });
+    // Add suite-specific comments
+    const header = [
+        `/**`,
+        ` * Test suite: ${suiteName}`,
+        ` * Migrated from evalai.config.json`,
+        ` * `,
+        ` * TODO items for this suite:`,
+        ` * - Review executor implementation`,
+        ` * - Verify assertion logic`,
+        ` * - Test with actual data`,
+        ` */`,
+        ``,
+    ].join("\n");
+    return header + dslCode;
+}
+/**
+ * Generate migration summary
+ */
+function generateSummary(suites, options) {
+    const totalTests = suites.reduce((sum, { suite }) => sum + suite.getTests().length, 0);
+    const totalSuites = suites.length;
+    return [
+        `/**`,
+        ` * Migration Summary`,
+        ` * =================`,
+        ` * `,
+        ` * Total suites migrated: ${totalSuites}`,
+        ` * Total tests migrated: ${totalTests}`,
+        ` * `,
+        ` * Migration options used:`,
+        ` * - Include helpers: ${options.helpers}`,
+        ` * - Preserve IDs: ${options.preserveIds}`,
+        ` * - Include provenance: ${options.provenance}`,
+        ` * `,
+        ` * Next steps:`,
+        ` * 1. Review all TODO comments in this file`,
+        ` * 2. Implement actual executor logic`,
+        ` * 3. Adapt complex assertions`,
+        ` * 4. Test with real data`,
+        ` * 5. Remove evalai.config.json when satisfied`,
+        ` * `,
+        ` * For help with migration, see: https://github.com/pauly7610/ai-evaluation-platform/docs/MIGRATION.md`,
+        ` */`,
+        ``,
+    ].join("\n");
+}
+/**
+ * Main migration function
+ */
+async function migrateConfig(options) {
+    try {
+        // Read input config
+        const config = await readConfigFile(options.input);
+        // Extract test suites
+        const suites = extractTestSuitesFromConfig(config);
+        if (suites.length === 0) {
+            throw new Error("No test suites found in config file. Check config structure.");
+        }
+        // Generate DSL content
+        const content = [
+            generateFileHeader(config, options),
+            generateGlobalHelpers(config, options),
+            ...suites.map(({ name, suite }) => generateSuiteDSL(name, suite, options)),
+            generateSummary(suites, options),
+        ].join("\n");
+        // Ensure output directory exists
+        const outputDir = path.dirname(options.output);
+        await fs.mkdir(outputDir, { recursive: true });
+        // Write output file
+        await fs.writeFile(options.output, content, "utf-8");
+        console.log(`✅ Migration complete!`);
+        console.log(`📁 Output written to: ${path.resolve(options.output)}`);
+        console.log(`📊 Migrated ${suites.length} suites with ${suites.reduce((sum, { suite }) => sum + suite.getTests().length, 0)} tests`);
+        console.log(`\n⚠️  Remember to review TODO comments and test the migration!`);
+    }
+    catch (error) {
+        console.error(`❌ Migration failed: ${error instanceof Error ? error.message : String(error)}`);
+        process.exit(1);
+    }
+}
+/**
+ * CLI command definition
+ */
+function createMigrateCommand() {
+    const command = new commander_1.Command("migrate")
+        .description("Migrate legacy configuration to new DSL format")
+        .command("config")
+        .description("Migrate evalai.config.json to defineEval() specifications")
+        .requiredOption("-i, --in <path>", "Input config file path")
+        .requiredOption("-o, --out <path>", "Output DSL file path")
+        .option("-v, --verbose", "Include detailed comments and logging", false)
+        .option("--no-helpers", "Don't generate helper functions")
+        .option("--no-preserve-ids", "Don't preserve original test IDs")
+        .option("--no-provenance", "Don't include provenance metadata")
+        .action(async (options) => {
+        const migrateOptions = {
+            input: options.in,
+            output: options.out,
+            verbose: options.verbose,
+            helpers: options.helpers !== false,
+            preserveIds: options.preserveIds !== false,
+            provenance: options.provenance !== false,
+        };
+        await migrateConfig(migrateOptions);
+    });
+    return command;
+}
+/**
+ * Validate config file structure
+ */
+async function validateConfigFile(filePath) {
+    try {
+        const config = await readConfigFile(filePath);
+        // Basic validation
+        if (!config || typeof config !== "object") {
+            throw new Error("Config file must contain a valid JSON object");
+        }
+        // Check for test data
+        const hasTests = config.tests || config.suites || config.testSuites;
+        if (!hasTests) {
+            throw new Error("Config file must contain 'tests', 'suites', or 'testSuites' property");
+        }
+        console.log(`✅ Config file ${filePath} appears valid for migration`);
+        return true;
+    }
+    catch (error) {
+        console.error(`❌ Config validation failed: ${error instanceof Error ? error.message : String(error)}`);
+        return false;
+    }
+}
+/**
+ * Show migration preview without writing files
+ */
+async function previewMigration(filePath) {
+    try {
+        const config = await readConfigFile(filePath);
+        const suites = extractTestSuitesFromConfig(config);
+        console.log(`📋 Migration preview for: ${filePath}`);
+        console.log(``);
+        console.log(`Found ${suites.length} test suites:`);
+        console.log(``);
+        for (const { name, suite } of suites) {
+            const tests = suite.getTests();
+            console.log(`  📁 ${name}: ${tests.length} tests`);
+            if (tests.length > 0) {
+                console.log(`     Tests: ${tests
+                    .slice(0, 3)
+                    .map((t) => t.id)
+                    .join(", ")}${tests.length > 3 ? "..." : ""}`);
+            }
+        }
+        console.log(``);
+        console.log(`Total tests to migrate: ${suites.reduce((sum, { suite }) => sum + suite.getTests().length, 0)}`);
+        console.log(``);
+        console.log(`To migrate, run: evalai migrate config --in ${filePath} --out eval/migrated.spec.ts`);
+    }
+    catch (error) {
+        console.error(`❌ Preview failed: ${error instanceof Error ? error.message : String(error)}`);
+    }
+}

package/dist/cli/print-config.js CHANGED Viewed

@@ -114,8 +114,10 @@ function buildResolvedConfig(cwd, flags) {
     // Determine source of each field
     const fields = [];
     // evaluationId
-    const evalIdSource = flags.evaluationId ? "arg"
-        : fileConfig?.evaluationId ? "file"
+    const evalIdSource = flags.evaluationId
+        ? "arg"
+        : fileConfig?.evaluationId
+            ? "file"
             : "default";
     fields.push({
         key: "evaluationId",
@@ -124,9 +126,12 @@ function buildResolvedConfig(cwd, flags) {
     });
     // baseUrl
     const envBaseUrl = process.env.EVALAI_BASE_URL;
-    const baseUrlSource = flags.baseUrl ? "arg"
-        : envBaseUrl ? "env"
-            : fileConfig?.baseUrl ? "file"
+    const baseUrlSource = flags.baseUrl
+        ? "arg"
+        : envBaseUrl
+            ? "env"
+            : fileConfig?.baseUrl
+                ? "file"
                 : "default";
     fields.push({
         key: "baseUrl",
@@ -136,9 +141,7 @@ function buildResolvedConfig(cwd, flags) {
     // apiKey (always redacted)
     const envApiKey = process.env.EVALAI_API_KEY;
     const rawApiKey = flags.apiKey || envApiKey || "";
-    const apiKeySource = flags.apiKey ? "arg"
-        : envApiKey ? "env"
-            : "default";
+    const apiKeySource = flags.apiKey ? "arg" : envApiKey ? "env" : "default";
     fields.push({
         key: "apiKey",
         value: redact(rawApiKey) ?? "(not set)",
@@ -167,9 +170,12 @@ function buildResolvedConfig(cwd, flags) {
         const profileVal = profileName && profileName in profiles_1.PROFILES
             ? profiles_1.PROFILES[profileName][key]
             : undefined;
-        const source = argVal !== undefined ? "arg"
-            : fileVal !== undefined ? "file"
-                : profileVal !== undefined ? "profile"
+        const source = argVal !== undefined
+            ? "arg"
+            : fileVal !== undefined
+                ? "file"
+                : profileVal !== undefined
+                    ? "profile"
                     : "default";
         fields.push({
             key,
@@ -178,9 +184,7 @@ function buildResolvedConfig(cwd, flags) {
         });
     }
     // baseline
-    const baselineSource = flags.baseline ? "arg"
-        : fileConfig?.baseline ? "file"
-            : "default";
+    const baselineSource = flags.baseline ? "arg" : fileConfig?.baseline ? "file" : "default";
     fields.push({
         key: "baseline",
         value: merged.baseline ?? "published",

package/dist/cli/run.d.ts ADDED Viewed

@@ -0,0 +1,101 @@
+/**
+ * TICKET 4 — Unified evalai run CLI Command
+ *
+ * Goal: Consolidated execution interface that consumes manifest
+ *
+ * Features:
+ * - Manifest loading and spec filtering
+ * - --impacted-only integration with impact analysis
+ * - Local executor integration
+ * - .evalai/last-run.json output
+ * - Legacy mode compatibility
+ */
+/**
+ * Run execution options
+ */
+export interface RunOptions {
+    /** Filter to specific spec IDs */
+    specIds?: string[];
+    /** Run only impacted specs (requires base branch) */
+    impactedOnly?: boolean;
+    /** Base branch for impact analysis */
+    baseBranch?: string;
+    /** Output format */
+    format?: "human" | "json";
+    /** Write run results to file */
+    writeResults?: boolean;
+}
+/**
+ * Run execution result
+ */
+export interface RunResult {
+    /** Schema version for compatibility checking */
+    schemaVersion: number;
+    /** Unique run identifier */
+    runId: string;
+    /** Execution metadata */
+    metadata: {
+        startedAt: number;
+        completedAt: number;
+        duration: number;
+        totalSpecs: number;
+        executedSpecs: number;
+        mode: "spec" | "legacy";
+    };
+    /** Individual spec results */
+    results: SpecResult[];
+    /** Summary statistics */
+    summary: {
+        passed: number;
+        failed: number;
+        skipped: number;
+        passRate: number;
+    };
+}
+/**
+ * Individual spec result
+ */
+export interface SpecResult {
+    /** Spec identifier */
+    specId: string;
+    /** Spec name */
+    name: string;
+    /** File path */
+    filePath: string;
+    /** Execution result */
+    result: {
+        status: "passed" | "failed" | "skipped";
+        score?: number;
+        error?: string;
+        duration: number;
+    };
+}
+/**
+ * Run evaluation specifications
+ */
+export declare function runEvaluations(options: RunOptions, projectRoot?: string): Promise<RunResult>;
+/**
+ * Run index entry
+ */
+export interface RunIndexEntry {
+    runId: string;
+    createdAt: number;
+    gitSha?: string;
+    branch?: string;
+    mode: "spec" | "legacy";
+    specCount: number;
+    passRate: number;
+    avgScore: number;
+}
+/**
+ * Print human-readable results
+ */
+export declare function printHumanResults(result: RunResult): void;
+/**
+ * Print JSON results
+ */
+export declare function printJsonResults(result: RunResult): void;
+/**
+ * CLI entry point
+ */
+export declare function runEvaluationsCLI(options: RunOptions): Promise<void>;