npm - @pauly4010/evalai-sdk - Versions diffs - 1.6.0 → 1.8.0 - Mend

@pauly4010/evalai-sdk 1.6.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/CHANGELOG.md +114 -0
package/README.md +198 -236
package/dist/cli/baseline.js +1 -1
package/dist/cli/check.js +15 -0
package/dist/cli/doctor.d.ts +80 -3
package/dist/cli/doctor.js +576 -43
package/dist/cli/explain.d.ts +58 -0
package/dist/cli/explain.js +429 -0
package/dist/cli/formatters/github.js +5 -0
package/dist/cli/formatters/types.d.ts +3 -0
package/dist/cli/formatters/types.js +3 -0
package/dist/cli/index.js +47 -4
package/dist/cli/init.d.ts +11 -2
package/dist/cli/init.js +239 -16
package/dist/cli/print-config.d.ts +29 -0
package/dist/cli/print-config.js +251 -0
package/dist/cli/regression-gate.d.ts +6 -2
package/dist/cli/regression-gate.js +246 -61
package/dist/cli/report/build-check-report.d.ts +1 -1
package/dist/cli/report/build-check-report.js +2 -0
package/dist/cli/upgrade.d.ts +15 -0
package/dist/cli/upgrade.js +491 -0
package/dist/index.d.ts +1 -1
package/dist/index.js +7 -7
package/dist/version.d.ts +1 -1
package/dist/version.js +1 -1
package/package.json +1 -1

package/dist/cli/init.js CHANGED Viewed

@@ -1,9 +1,18 @@
 #!/usr/bin/env node
 "use strict";
 /**
- * evalai init — Create evalai.config.json
+ * evalai init — Full project scaffolder
  *
- * Creates the smallest possible config file. Defaults belong in code.
+ * Zero-to-gate in under 5 minutes:
+ *   npx evalai init
+ *   git push
+ *   …CI starts blocking regressions.
+ *
+ * What it does:
+ *   1. Detects Node repo + package manager
+ *   2. Creates evals/ directory + baseline.json
+ *   3. Installs .github/workflows/evalai-gate.yml
+ *   4. Prints next steps (no docs required)
  */
 var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
     if (k2 === undefined) k2 = k;
@@ -40,30 +49,244 @@ var __importStar = (this && this.__importStar) || (function () {
 })();
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.runInit = runInit;
+const node_child_process_1 = require("node:child_process");
 const fs = __importStar(require("node:fs"));
 const path = __importStar(require("node:path"));
-const CONFIG_CONTENT = `{
-  "evaluationId": ""
+function detectProject(cwd) {
+    const pkgPath = path.join(cwd, "package.json");
+    if (!fs.existsSync(pkgPath))
+        return null;
+    let pkg;
+    try {
+        pkg = JSON.parse(fs.readFileSync(pkgPath, "utf-8"));
+    }
+    catch {
+        return null;
+    }
+    let pm = "npm";
+    if (fs.existsSync(path.join(cwd, "pnpm-lock.yaml")))
+        pm = "pnpm";
+    else if (fs.existsSync(path.join(cwd, "yarn.lock")))
+        pm = "yarn";
+    const testScript = pkg.scripts?.test ?? "";
+    const hasTestScript = !!testScript && testScript !== 'echo "Error: no test specified" && exit 1';
+    return {
+        cwd,
+        pm,
+        hasTestScript,
+        testScript,
+        name: pkg.name ?? path.basename(cwd),
+    };
+}
+// ── Step helpers ──
+function ok(msg) {
+    console.log(`  ✔ ${msg}`);
+}
+function skip(msg) {
+    console.log(`  – ${msg}`);
+}
+// ── 1. Create evals/ + baseline.json ──
+function createBaseline(cwd, project) {
+    const evalsDir = path.join(cwd, "evals");
+    const baselinePath = path.join(evalsDir, "baseline.json");
+    if (fs.existsSync(baselinePath)) {
+        skip("evals/baseline.json already exists");
+        return true;
+    }
+    if (!fs.existsSync(evalsDir)) {
+        fs.mkdirSync(evalsDir, { recursive: true });
+    }
+    const user = process.env.USER || process.env.USERNAME || "unknown";
+    const now = new Date().toISOString();
+    // Run tests to capture real count if possible
+    let testTotal = 0;
+    let testsPassed = true;
+    if (project.hasTestScript) {
+        const isWin = process.platform === "win32";
+        const result = (0, node_child_process_1.spawnSync)(project.pm, ["test"], {
+            cwd,
+            stdio: "pipe",
+            shell: isWin,
+            timeout: 120000,
+        });
+        testsPassed = result.status === 0;
+        // Try to extract test count from output
+        const output = (result.stdout?.toString() ?? "") + (result.stderr?.toString() ?? "");
+        const countMatch = output.match(/(\d+)\s+(?:tests?|specs?)\s+(?:passed|completed)/i) ??
+            output.match(/Tests:\s+(\d+)\s+passed/i) ??
+            output.match(/(\d+)\s+passing/i);
+        if (countMatch)
+            testTotal = parseInt(countMatch[1], 10);
+    }
+    const baseline = {
+        schemaVersion: 1,
+        description: `Regression gate baseline for ${project.name}`,
+        generatedAt: now,
+        generatedBy: user,
+        commitSha: getHeadSha(cwd),
+        updatedAt: now,
+        updatedBy: user,
+        tolerance: {
+            scoreDrop: 5,
+            passRateDrop: 5,
+            maxLatencyIncreaseMs: 200,
+            maxCostIncreaseUsd: 0.05,
+        },
+        goldenEval: {
+            score: 100,
+            passRate: 100,
+            totalCases: 3,
+            passedCases: 3,
+        },
+        confidenceTests: {
+            passed: testsPassed,
+            total: testTotal,
+        },
+        productMetrics: {},
+    };
+    fs.writeFileSync(baselinePath, `${JSON.stringify(baseline, null, 2)}\n`);
+    ok("Created evals/baseline.json");
+    return true;
 }
+function getHeadSha(cwd) {
+    try {
+        const result = (0, node_child_process_1.spawnSync)("git", ["rev-parse", "--short", "HEAD"], {
+            cwd,
+            stdio: "pipe",
+        });
+        return result.stdout?.toString().trim() || "0000000";
+    }
+    catch {
+        return "0000000";
+    }
+}
+// ── 2. Install GitHub Actions workflow ──
+function installWorkflow(cwd, project) {
+    const workflowDir = path.join(cwd, ".github", "workflows");
+    const workflowPath = path.join(workflowDir, "evalai-gate.yml");
+    if (fs.existsSync(workflowPath)) {
+        skip(".github/workflows/evalai-gate.yml already exists");
+        return true;
+    }
+    if (!fs.existsSync(workflowDir)) {
+        fs.mkdirSync(workflowDir, { recursive: true });
+    }
+    const installCmd = project.pm === "pnpm"
+        ? "pnpm install --frozen-lockfile"
+        : project.pm === "yarn"
+            ? "yarn install --frozen-lockfile"
+            : "npm ci";
+    const setupSteps = project.pm === "pnpm"
+        ? `      - uses: pnpm/action-setup@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+          cache: pnpm
+      - run: ${installCmd}`
+        : `      - uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+          cache: ${project.pm}
+      - run: ${installCmd}`;
+    const workflow = `# EvalAI Regression Gate
+# Auto-generated by: npx evalai init
+# Blocks PRs that regress test health.
+name: EvalAI Gate
+on:
+  pull_request:
+    branches: [main]
+concurrency:
+  group: evalai-\${{ github.ref }}
+  cancel-in-progress: true
+jobs:
+  regression-gate:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+${setupSteps}
+      - name: EvalAI Doctor (preflight)
+        continue-on-error: true  # Strict: set to false, or use: evalai doctor --strict
+        run: npx -y @pauly4010/evalai-sdk@^1 doctor
+      - name: EvalAI Regression Gate
+        run: npx -y @pauly4010/evalai-sdk@^1 gate --format github
+      - name: Upload report
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: evalai-report
+          path: |
+            evals/regression-report.json
+            .evalai/last-report.json
+          if-no-files-found: ignore
 `;
-function runInit(cwd = process.cwd()) {
+    fs.writeFileSync(workflowPath, workflow);
+    ok("Created .github/workflows/evalai-gate.yml");
+    return true;
+}
+// ── 3. Create evalai.config.json ──
+function createConfig(cwd) {
     const configPath = path.join(cwd, "evalai.config.json");
     if (fs.existsSync(configPath)) {
-        console.log(`evalai.config.json already exists at ${path.resolve(configPath)}`);
+        skip("evalai.config.json already exists");
+        return true;
+    }
+    const config = {
+        evaluationId: "",
+        gate: {
+            baseline: "evals/baseline.json",
+            report: "evals/regression-report.json",
+        },
+    };
+    fs.writeFileSync(configPath, `${JSON.stringify(config, null, 2)}\n`);
+    ok("Created evalai.config.json");
+    return true;
+}
+// ── Main ──
+function runInit(cwd = process.cwd()) {
+    console.log("");
+    console.log("  evalai init — setting up regression gate\n");
+    // Detect
+    const project = detectProject(cwd);
+    if (!project) {
+        console.error("  ✖ No package.json found. Run this from a Node.js project root.");
         return false;
     }
-    fs.writeFileSync(configPath, CONFIG_CONTENT, "utf-8");
-    const resolvedPath = path.resolve(configPath);
-    console.log(`Wrote evalai.config.json at ${resolvedPath}`);
+    ok(`Detected ${project.pm} project: ${project.name}`);
+    if (!project.hasTestScript) {
+        console.log(`  ⚠ No test script found in package.json`);
+        console.log(`    The gate will still work — add a "test" script later for full coverage.\n`);
+    }
+    // Scaffold
+    createBaseline(cwd, project);
+    installWorkflow(cwd, project);
+    createConfig(cwd);
+    // Next steps
+    console.log("");
+    console.log("  Done! Next:");
+    console.log("");
+    console.log("    npx evalai doctor             Verify your setup is complete");
+    console.log("");
+    console.log("  Then commit:");
+    console.log("");
+    console.log("    git add evals/ .github/workflows/evalai-gate.yml evalai.config.json");
+    console.log("    git commit -m 'chore: add EvalAI regression gate'");
+    console.log("    git push");
+    console.log("");
+    console.log("  That's it. Open a PR and the gate runs automatically.");
     console.log("");
-    console.log("Next: paste evaluationId into evalai.config.json, then run npx -y @pauly4010/evalai-sdk@^1 check --format github --onFail import");
+    console.log("  Commands:");
+    console.log("    npx evalai doctor             Preflight check — verify config, baseline, CI");
+    console.log("    npx evalai gate               Run regression gate locally");
+    console.log("    npx evalai check              API-based gate (requires account)");
+    console.log("    npx evalai explain            Explain last failure with root causes + fixes");
+    console.log("    npx evalai baseline update    Update baseline after intentional changes");
     console.log("");
-    console.log("GitHub Actions snippet (add to your workflow):");
-    console.log("  - name: EvalAI gate");
-    console.log("    env:");
-    console.log("      EVALAI_API_KEY: ${{ secrets.EVALAI_API_KEY }}");
-    console.log("    run: npx -y @pauly4010/evalai-sdk@^1 check --format github --onFail import");
+    console.log("  To remove: delete evals/, evalai.config.json, and .github/workflows/evalai-gate.yml");
     console.log("");
-    console.log("To uninstall: delete evalai.config.json.");
     return true;
 }

package/dist/cli/print-config.d.ts ADDED Viewed

@@ -0,0 +1,29 @@
+/**
+ * evalai print-config — Show resolved configuration with source-of-truth annotations.
+ *
+ * Prints every config field, where it came from (file, env, default, CLI arg),
+ * and redacts secrets. Useful for debugging "why is it using this baseUrl?"
+ *
+ * Usage:
+ *   evalai print-config
+ *   evalai print-config --format json
+ *
+ * Exit codes:
+ *   0 — Always (informational only)
+ */
+type Source = "file" | "env" | "default" | "profile" | "arg";
+interface ResolvedField {
+    key: string;
+    value: string | number | boolean | null;
+    source: Source;
+    raw?: string;
+}
+export interface PrintConfigOutput {
+    cliVersion: string;
+    configFile: string | null;
+    cwd: string;
+    resolved: ResolvedField[];
+    env: Record<string, string | null>;
+}
+export declare function runPrintConfig(argv: string[]): number;
+export {};

package/dist/cli/print-config.js ADDED Viewed

@@ -0,0 +1,251 @@
+"use strict";
+/**
+ * evalai print-config — Show resolved configuration with source-of-truth annotations.
+ *
+ * Prints every config field, where it came from (file, env, default, CLI arg),
+ * and redacts secrets. Useful for debugging "why is it using this baseUrl?"
+ *
+ * Usage:
+ *   evalai print-config
+ *   evalai print-config --format json
+ *
+ * Exit codes:
+ *   0 — Always (informational only)
+ */
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || (function () {
+    var ownKeys = function(o) {
+        ownKeys = Object.getOwnPropertyNames || function (o) {
+            var ar = [];
+            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
+            return ar;
+        };
+        return ownKeys(o);
+    };
+    return function (mod) {
+        if (mod && mod.__esModule) return mod;
+        var result = {};
+        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
+        __setModuleDefault(result, mod);
+        return result;
+    };
+})();
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.runPrintConfig = runPrintConfig;
+const path = __importStar(require("node:path"));
+const version_1 = require("../version");
+const config_1 = require("./config");
+const profiles_1 = require("./profiles");
+function parseFlags(argv) {
+    const raw = {};
+    for (let i = 0; i < argv.length; i++) {
+        const arg = argv[i];
+        if (arg.startsWith("--")) {
+            const key = arg.slice(2);
+            const next = argv[i + 1];
+            if (next !== undefined && !next.startsWith("--")) {
+                raw[key] = next;
+                i++;
+            }
+            else {
+                raw[key] = "true";
+            }
+        }
+    }
+    return {
+        format: raw.format === "json" ? "json" : "human",
+        evaluationId: raw.evaluationId,
+        baseUrl: raw.baseUrl,
+        apiKey: raw.apiKey,
+        baseline: raw.baseline,
+        profile: raw.profile,
+        minScore: raw.minScore,
+        maxDrop: raw.maxDrop,
+        warnDrop: raw.warnDrop,
+        minN: raw.minN,
+    };
+}
+// ── Helpers ──
+function redact(value) {
+    if (!value)
+        return null;
+    if (value.length > 8)
+        return `${value.slice(0, 4)}...${value.slice(-4)}`;
+    return "****";
+}
+// ── Build resolved config ──
+function buildResolvedConfig(cwd, flags) {
+    const configPath = (0, config_1.findConfigPath)(cwd);
+    const fileConfig = (0, config_1.loadConfig)(cwd);
+    // Build CLI args object (only what was explicitly passed)
+    const cliArgs = {};
+    if (flags.evaluationId)
+        cliArgs.evaluationId = flags.evaluationId;
+    if (flags.baseUrl)
+        cliArgs.baseUrl = flags.baseUrl;
+    if (flags.baseline)
+        cliArgs.baseline = flags.baseline;
+    if (flags.profile)
+        cliArgs.profile = flags.profile;
+    if (flags.minScore)
+        cliArgs.minScore = flags.minScore;
+    if (flags.maxDrop)
+        cliArgs.maxDrop = flags.maxDrop;
+    if (flags.warnDrop)
+        cliArgs.warnDrop = flags.warnDrop;
+    if (flags.minN)
+        cliArgs.minN = flags.minN;
+    const merged = (0, config_1.mergeConfigWithArgs)(fileConfig, cliArgs);
+    // Determine source of each field
+    const fields = [];
+    // evaluationId
+    const evalIdSource = flags.evaluationId ? "arg"
+        : fileConfig?.evaluationId ? "file"
+            : "default";
+    fields.push({
+        key: "evaluationId",
+        value: merged.evaluationId ?? null,
+        source: evalIdSource,
+    });
+    // baseUrl
+    const envBaseUrl = process.env.EVALAI_BASE_URL;
+    const baseUrlSource = flags.baseUrl ? "arg"
+        : envBaseUrl ? "env"
+            : fileConfig?.baseUrl ? "file"
+                : "default";
+    fields.push({
+        key: "baseUrl",
+        value: flags.baseUrl || envBaseUrl || fileConfig?.baseUrl || "http://localhost:3000",
+        source: baseUrlSource,
+    });
+    // apiKey (always redacted)
+    const envApiKey = process.env.EVALAI_API_KEY;
+    const rawApiKey = flags.apiKey || envApiKey || "";
+    const apiKeySource = flags.apiKey ? "arg"
+        : envApiKey ? "env"
+            : "default";
+    fields.push({
+        key: "apiKey",
+        value: redact(rawApiKey) ?? "(not set)",
+        source: apiKeySource,
+        raw: rawApiKey ? "(redacted)" : undefined,
+    });
+    // profile
+    const profileName = (flags.profile || fileConfig?.profile);
+    const profileSource = flags.profile ? "arg" : fileConfig?.profile ? "file" : "default";
+    fields.push({
+        key: "profile",
+        value: profileName ?? null,
+        source: profileSource,
+    });
+    // Numeric gate fields: minScore, maxDrop, warnDrop, minN, allowWeakEvidence
+    const numericFields = [
+        { key: "minScore" },
+        { key: "maxDrop" },
+        { key: "warnDrop" },
+        { key: "minN" },
+        { key: "allowWeakEvidence" },
+    ];
+    for (const { key } of numericFields) {
+        const argVal = cliArgs[key];
+        const fileVal = fileConfig?.[key];
+        const profileVal = profileName && profileName in profiles_1.PROFILES
+            ? profiles_1.PROFILES[profileName][key]
+            : undefined;
+        const source = argVal !== undefined ? "arg"
+            : fileVal !== undefined ? "file"
+                : profileVal !== undefined ? "profile"
+                    : "default";
+        fields.push({
+            key,
+            value: merged[key] ?? null,
+            source,
+        });
+    }
+    // baseline
+    const baselineSource = flags.baseline ? "arg"
+        : fileConfig?.baseline ? "file"
+            : "default";
+    fields.push({
+        key: "baseline",
+        value: merged.baseline ?? "published",
+        source: baselineSource,
+    });
+    // Environment variables summary
+    const envVars = {
+        EVALAI_API_KEY: redact(envApiKey),
+        EVALAI_BASE_URL: envBaseUrl ?? null,
+        OPENAI_API_KEY: redact(process.env.OPENAI_API_KEY),
+        ANTHROPIC_API_KEY: redact(process.env.ANTHROPIC_API_KEY),
+        AZURE_OPENAI_API_KEY: redact(process.env.AZURE_OPENAI_API_KEY),
+        GITHUB_ACTIONS: process.env.GITHUB_ACTIONS ?? null,
+        CI: process.env.CI ?? null,
+    };
+    return {
+        cliVersion: version_1.SDK_VERSION,
+        configFile: configPath ? path.relative(cwd, configPath) : null,
+        cwd,
+        resolved: fields,
+        env: envVars,
+    };
+}
+// ── Output formatting ──
+function printHuman(output) {
+    console.log("\n  evalai print-config\n");
+    console.log(`  CLI version: ${output.cliVersion}`);
+    console.log(`  Config file: ${output.configFile ?? "(none found)"}`);
+    console.log(`  Working dir: ${output.cwd}`);
+    console.log("");
+    console.log("  Resolved configuration:");
+    console.log("");
+    const maxKeyLen = Math.max(...output.resolved.map((f) => f.key.length));
+    for (const field of output.resolved) {
+        const val = field.value === null ? "(not set)" : String(field.value);
+        const pad = " ".repeat(maxKeyLen - field.key.length);
+        const sourceTag = `[${field.source}]`;
+        console.log(`    ${field.key}${pad}  ${val}  ${sourceTag}`);
+    }
+    console.log("");
+    console.log("  Environment variables:");
+    console.log("");
+    for (const [key, val] of Object.entries(output.env)) {
+        if (val !== null) {
+            console.log(`    ${key} = ${val}`);
+        }
+    }
+    const unsetEnv = Object.entries(output.env)
+        .filter(([, v]) => v === null)
+        .map(([k]) => k);
+    if (unsetEnv.length > 0) {
+        console.log(`    (not set: ${unsetEnv.join(", ")})`);
+    }
+    console.log("");
+}
+// ── Main ──
+function runPrintConfig(argv) {
+    const flags = parseFlags(argv);
+    const cwd = process.cwd();
+    const output = buildResolvedConfig(cwd, flags);
+    if (flags.format === "json") {
+        console.log(JSON.stringify(output, null, 2));
+    }
+    else {
+        printHuman(output);
+    }
+    return 0;
+}

package/dist/cli/regression-gate.d.ts CHANGED Viewed

@@ -1,8 +1,12 @@
 /**
  * evalai gate — Run the regression gate
  *
- * Delegates to the project's eval:regression-gate npm script.
- * Supports --format json to output the regression-report.json contents.
+ * Two modes:
+ *   1. Project mode: delegates to eval:regression-gate npm script (full gate)
+ *   2. Built-in mode: runs `npm test`, compares against evals/baseline.json
+ *
+ * Built-in mode activates when no eval:regression-gate script is defined,
+ * making `npx evalai gate` work for any project after `npx evalai init`.
  */
 export interface GateArgs {
     format: "human" | "json" | "github";