npm - @pauly4010/evalai-sdk - Versions diffs - 1.5.8 → 1.7.0 - Mend

@pauly4010/evalai-sdk 1.5.8 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/CHANGELOG.md +72 -0
package/README.md +172 -251
package/dist/cli/baseline.d.ts +10 -0
package/dist/cli/baseline.js +172 -0
package/dist/cli/index.js +26 -4
package/dist/cli/init.d.ts +11 -2
package/dist/cli/init.js +227 -16
package/dist/cli/regression-gate.d.ts +15 -0
package/dist/cli/regression-gate.js +335 -0
package/dist/cli/upgrade.d.ts +15 -0
package/dist/cli/upgrade.js +491 -0
package/dist/client.request.test.d.ts +1 -1
package/dist/client.request.test.js +157 -157
package/dist/index.d.ts +1 -0
package/dist/index.js +7 -1
package/dist/regression.d.ts +100 -0
package/dist/regression.js +44 -0
package/dist/version.d.ts +1 -1
package/dist/version.js +1 -1
package/package.json +6 -1

package/dist/cli/upgrade.js ADDED Viewed

@@ -0,0 +1,491 @@
+"use strict";
+/**
+ * evalai upgrade --full — Upgrade from Tier 1 (built-in gate) to Tier 2 (full gate)
+ *
+ * What it does:
+ *   1. Adds full regression gate script (scripts/regression-gate.ts)
+ *   2. Adds baseline governance workflow (.github/workflows/baseline-governance.yml)
+ *   3. Updates package.json with eval:regression-gate + eval:baseline-update scripts
+ *   4. Updates .github/workflows/evalai-gate.yml to use project mode
+ *   5. Prints next steps
+ */
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || (function () {
+    var ownKeys = function(o) {
+        ownKeys = Object.getOwnPropertyNames || function (o) {
+            var ar = [];
+            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
+            return ar;
+        };
+        return ownKeys(o);
+    };
+    return function (mod) {
+        if (mod && mod.__esModule) return mod;
+        var result = {};
+        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
+        __setModuleDefault(result, mod);
+        return result;
+    };
+})();
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.parseUpgradeArgs = parseUpgradeArgs;
+exports.runUpgrade = runUpgrade;
+const fs = __importStar(require("node:fs"));
+const path = __importStar(require("node:path"));
+// ── Detect environment ──
+function detectPackageManager(cwd) {
+    if (fs.existsSync(path.join(cwd, "pnpm-lock.yaml")))
+        return "pnpm";
+    if (fs.existsSync(path.join(cwd, "yarn.lock")))
+        return "yarn";
+    return "npm";
+}
+function ok(msg) {
+    console.log(`  ✔ ${msg}`);
+}
+function skip(msg) {
+    console.log(`  – ${msg}`);
+}
+// ── 1. Create scripts/regression-gate.ts ──
+function createGateScript(cwd) {
+    const scriptPath = path.join(cwd, "scripts", "regression-gate.ts");
+    if (fs.existsSync(scriptPath)) {
+        skip("scripts/regression-gate.ts already exists");
+        return true;
+    }
+    const scriptsDir = path.join(cwd, "scripts");
+    if (!fs.existsSync(scriptsDir)) {
+        fs.mkdirSync(scriptsDir, { recursive: true });
+    }
+    const content = `#!/usr/bin/env npx tsx
+/**
+ * Full regression gate — compares current test results against baseline.
+ *
+ * Usage:
+ *   npx tsx scripts/regression-gate.ts                  # run gate
+ *   npx tsx scripts/regression-gate.ts --update-baseline # update baseline with current values
+ *
+ * Generated by: npx evalai upgrade --full
+ */
+import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs";
+import { execSync, spawnSync } from "node:child_process";
+import { resolve } from "node:path";
+const BASELINE_PATH = resolve("evals/baseline.json");
+const REPORT_PATH = resolve("evals/regression-report.json");
+const CONFIDENCE_PATH = resolve("evals/confidence-summary.json");
+const isUpdateBaseline = process.argv.includes("--update-baseline");
+// ── Helpers ──
+function loadJSON(p: string): Record<string, unknown> | null {
+  try {
+    return JSON.parse(readFileSync(p, "utf-8"));
+  } catch {
+    return null;
+  }
+}
+function getHeadSha(): string {
+  try {
+    return execSync("git rev-parse --short HEAD").toString().trim();
+  } catch {
+    return "0000000";
+  }
+}
+function writeReport(report: Record<string, unknown>): void {
+  const dir = resolve("evals");
+  if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
+  writeFileSync(REPORT_PATH, JSON.stringify(report, null, 2) + "\\n");
+}
+// ── Run tests ──
+function runTests(): { passed: boolean; total: number; durationMs: number } {
+  const t0 = Date.now();
+  const result = spawnSync("npm", ["test"], {
+    stdio: "pipe",
+    shell: process.platform === "win32",
+    timeout: 300_000,
+  });
+  const durationMs = Date.now() - t0;
+  const passed = result.status === 0;
+  const output = (result.stdout?.toString() ?? "") + (result.stderr?.toString() ?? "");
+  let total = 0;
+  const m =
+    output.match(/(\\d+)\\s+(?:tests?|specs?)\\s+(?:passed|completed)/i) ??
+    output.match(/Tests:\\s+(\\d+)\\s+passed/i) ??
+    output.match(/(\\d+)\\s+passing/i);
+  if (m) total = parseInt(m[1], 10);
+  return { passed, total, durationMs };
+}
+// ── Main ──
+const baseline = loadJSON(BASELINE_PATH);
+if (!baseline) {
+  console.error("❌ Baseline not found. Run: npx evalai init");
+  const report = {
+    schemaVersion: 1,
+    timestamp: new Date().toISOString(),
+    exitCode: 2,
+    category: "infra_error",
+    passed: false,
+    failures: ["Baseline file not found"],
+    deltas: [],
+    baseline: null,
+    durationMs: 0,
+    command: "npm test",
+    runner: "unknown",
+  };
+  writeReport(report);
+  process.exit(2);
+}
+const tests = runTests();
+if (isUpdateBaseline) {
+  const user = process.env.USER || process.env.USERNAME || "unknown";
+  const now = new Date().toISOString();
+  const updated = {
+    ...baseline,
+    updatedAt: now,
+    updatedBy: user,
+    commitSha: getHeadSha(),
+    confidenceTests: {
+      ...(baseline.confidenceTests as Record<string, unknown> ?? {}),
+      passed: tests.passed,
+      total: tests.total,
+    },
+  };
+  writeFileSync(BASELINE_PATH, JSON.stringify(updated, null, 2) + "\\n");
+  console.log("✅ Baseline updated with current test results");
+  console.log(\`   Tests: \${tests.total} (\${tests.passed ? "passing" : "FAILING"})\`);
+  process.exit(0);
+}
+// ── Compare ──
+const bConf = baseline.confidenceTests as { passed?: boolean; total?: number } | undefined;
+const baselinePassed = bConf?.passed ?? true;
+const baselineTotal = bConf?.total ?? 0;
+const failures: string[] = [];
+const deltas: Array<Record<string, unknown>> = [];
+deltas.push({
+  metric: "tests_passing",
+  baseline: baselinePassed,
+  current: tests.passed,
+  delta: tests.passed === baselinePassed ? "0" : tests.passed ? "+1" : "-1",
+  status: tests.passed ? "pass" : "fail",
+});
+if (!tests.passed && baselinePassed) {
+  failures.push("Tests were passing in baseline but are now failing");
+}
+if (tests.total > 0 || baselineTotal > 0) {
+  const d = tests.total - baselineTotal;
+  deltas.push({
+    metric: "test_count",
+    baseline: baselineTotal,
+    current: tests.total,
+    delta: d >= 0 ? \`+\${d}\` : \`\${d}\`,
+    status: tests.total >= baselineTotal ? "pass" : "fail",
+  });
+  if (tests.total < baselineTotal) {
+    failures.push(\`Test count dropped from \${baselineTotal} to \${tests.total} (\${d})\`);
+  }
+}
+const hasRegression = failures.length > 0;
+const report = {
+  schemaVersion: 1,
+  timestamp: new Date().toISOString(),
+  exitCode: hasRegression ? 1 : 0,
+  category: hasRegression ? "regression" : "pass",
+  passed: !hasRegression,
+  failures,
+  deltas,
+  baseline: {
+    updatedAt: (baseline.updatedAt as string) ?? "unknown",
+    updatedBy: (baseline.updatedBy as string) ?? "unknown",
+  },
+  durationMs: tests.durationMs,
+  command: "npm test",
+  runner: "unknown",
+};
+writeReport(report);
+if (hasRegression) {
+  console.error("❌ REGRESSION DETECTED");
+  for (const f of failures) console.error(\`   \${f}\`);
+} else {
+  console.log("✅ NO REGRESSION — gate passed");
+}
+for (const d of deltas) {
+  const icon = d.status === "pass" ? "✔" : "✖";
+  console.log(\`  \${icon} \${d.metric}: \${d.baseline} → \${d.current} (\${d.delta})\`);
+}
+process.exit(report.exitCode);
+`;
+    fs.writeFileSync(scriptPath, content);
+    ok("Created scripts/regression-gate.ts");
+    return true;
+}
+// ── 2. Add npm scripts to package.json ──
+function addNpmScripts(cwd) {
+    const pkgPath = path.join(cwd, "package.json");
+    if (!fs.existsSync(pkgPath))
+        return false;
+    let pkg;
+    try {
+        pkg = JSON.parse(fs.readFileSync(pkgPath, "utf-8"));
+    }
+    catch {
+        return false;
+    }
+    const scripts = (pkg.scripts ?? {});
+    let changed = false;
+    if (!scripts["eval:regression-gate"]) {
+        scripts["eval:regression-gate"] = "npx tsx scripts/regression-gate.ts";
+        changed = true;
+    }
+    if (!scripts["eval:baseline-update"]) {
+        scripts["eval:baseline-update"] = "npx tsx scripts/regression-gate.ts --update-baseline";
+        changed = true;
+    }
+    if (changed) {
+        pkg.scripts = scripts;
+        fs.writeFileSync(pkgPath, `${JSON.stringify(pkg, null, 2)}\n`);
+        ok("Added eval:regression-gate and eval:baseline-update scripts to package.json");
+    }
+    else {
+        skip("eval:regression-gate and eval:baseline-update scripts already exist");
+    }
+    return true;
+}
+// ── 3. Create baseline governance workflow ──
+function createGovernanceWorkflow(cwd) {
+    const workflowDir = path.join(cwd, ".github", "workflows");
+    const workflowPath = path.join(workflowDir, "baseline-governance.yml");
+    if (fs.existsSync(workflowPath)) {
+        skip(".github/workflows/baseline-governance.yml already exists");
+        return true;
+    }
+    if (!fs.existsSync(workflowDir)) {
+        fs.mkdirSync(workflowDir, { recursive: true });
+    }
+    const workflow = `# Baseline Governance — requires label + approval for baseline changes
+# Auto-generated by: npx evalai upgrade --full
+name: Baseline Governance
+on:
+  pull_request:
+    paths:
+      - 'evals/baseline.json'
+jobs:
+  governance:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Check label
+        run: |
+          LABELS=\${{ toJSON(github.event.pull_request.labels.*.name) }}
+          if echo "$LABELS" | grep -q "baseline-update"; then
+            echo "✅ baseline-update label found"
+          elif echo "$LABELS" | grep -q "baseline-exception"; then
+            echo "⚠️ baseline-exception label found — bypassing delta checks"
+          else
+            echo "❌ Missing 'baseline-update' label"
+            echo "Add the 'baseline-update' label to this PR to update the baseline."
+            exit 1
+          fi
+      - name: Show baseline diff
+        run: |
+          echo "## Baseline Changes" >> "$GITHUB_STEP_SUMMARY"
+          echo "" >> "$GITHUB_STEP_SUMMARY"
+          echo "\\\`\\\`\\\`diff" >> "$GITHUB_STEP_SUMMARY"
+          git diff HEAD~1 -- evals/baseline.json >> "$GITHUB_STEP_SUMMARY" || echo "No previous baseline" >> "$GITHUB_STEP_SUMMARY"
+          echo "\\\`\\\`\\\`" >> "$GITHUB_STEP_SUMMARY"
+`;
+    fs.writeFileSync(workflowPath, workflow);
+    ok("Created .github/workflows/baseline-governance.yml");
+    return true;
+}
+// ── 4. Upgrade evalai-gate.yml to project mode ──
+function upgradeGateWorkflow(cwd) {
+    const pm = detectPackageManager(cwd);
+    const workflowPath = path.join(cwd, ".github", "workflows", "evalai-gate.yml");
+    if (!fs.existsSync(workflowPath)) {
+        skip("No .github/workflows/evalai-gate.yml found — run evalai init first");
+        return false;
+    }
+    const content = fs.readFileSync(workflowPath, "utf-8");
+    // Already upgraded?
+    if (content.includes("eval:regression-gate")) {
+        skip("evalai-gate.yml already uses project mode");
+        return true;
+    }
+    const installCmd = pm === "pnpm"
+        ? "pnpm install --frozen-lockfile"
+        : pm === "yarn"
+            ? "yarn install --frozen-lockfile"
+            : "npm ci";
+    const setupSteps = pm === "pnpm"
+        ? `      - uses: pnpm/action-setup@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+          cache: pnpm
+      - run: ${installCmd}`
+        : `      - uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+          cache: ${pm}
+      - run: ${installCmd}`;
+    const workflow = `# EvalAI Regression Gate (Full / Tier 2)
+# Upgraded by: npx evalai upgrade --full
+name: EvalAI Gate
+on:
+  pull_request:
+    branches: [main]
+concurrency:
+  group: evalai-\${{ github.ref }}
+  cancel-in-progress: true
+jobs:
+  regression-gate:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+${setupSteps}
+      - name: Run regression gate
+        run: ${pm} run eval:regression-gate
+      - name: Gate summary
+        if: always()
+        run: npx -y @pauly4010/evalai-sdk@^1 gate --format github
+      - name: Upload report
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: regression-report
+          path: evals/regression-report.json
+          if-no-files-found: ignore
+`;
+    fs.writeFileSync(workflowPath, workflow);
+    ok("Upgraded .github/workflows/evalai-gate.yml to project mode (Tier 2)");
+    return true;
+}
+// ── 5. Add CODEOWNERS entry ──
+function addCodeowners(cwd) {
+    const codeownersPath = path.join(cwd, ".github", "CODEOWNERS");
+    const entry = "evals/baseline.json";
+    if (fs.existsSync(codeownersPath)) {
+        const content = fs.readFileSync(codeownersPath, "utf-8");
+        if (content.includes(entry)) {
+            skip("CODEOWNERS already has evals/baseline.json entry");
+            return true;
+        }
+        fs.appendFileSync(codeownersPath, `\n# EvalAI baseline — requires approval\n${entry} @YOUR_TEAM\n`);
+    }
+    else {
+        const dir = path.join(cwd, ".github");
+        if (!fs.existsSync(dir))
+            fs.mkdirSync(dir, { recursive: true });
+        fs.writeFileSync(codeownersPath, `# EvalAI baseline — requires approval\n${entry} @YOUR_TEAM\n`);
+    }
+    ok("Added evals/baseline.json to .github/CODEOWNERS (edit @YOUR_TEAM)");
+    return true;
+}
+function parseUpgradeArgs(argv) {
+    return { full: argv.includes("--full") };
+}
+// ── Main ──
+function runUpgrade(argv) {
+    const args = parseUpgradeArgs(argv);
+    const cwd = process.cwd();
+    if (!args.full) {
+        console.log(`evalai upgrade — Upgrade regression gate
+Usage:
+  evalai upgrade --full    Upgrade from Tier 1 (built-in) to Tier 2 (full gate)
+What --full does:
+  1. Creates scripts/regression-gate.ts (full gate script)
+  2. Adds eval:regression-gate + eval:baseline-update npm scripts
+  3. Creates baseline governance workflow
+  4. Upgrades CI workflow to project mode
+  5. Adds CODEOWNERS entry for baseline
+After upgrading:
+  - evalai gate delegates to your eval:regression-gate script
+  - Baseline changes require PR label + approval
+  - Full metric comparison: golden eval, confidence, latency, cost
+`);
+        return argv.includes("--help") || argv.includes("-h") ? 0 : 1;
+    }
+    console.log("");
+    console.log("  evalai upgrade --full — upgrading to Tier 2\n");
+    // Check preconditions
+    const pkgPath = path.join(cwd, "package.json");
+    if (!fs.existsSync(pkgPath)) {
+        console.error("  ✖ No package.json found. Run this from a Node.js project root.");
+        return 1;
+    }
+    if (!fs.existsSync(path.join(cwd, "evals", "baseline.json"))) {
+        console.error("  ✖ No evals/baseline.json found. Run 'npx evalai init' first.");
+        return 1;
+    }
+    createGateScript(cwd);
+    addNpmScripts(cwd);
+    createGovernanceWorkflow(cwd);
+    upgradeGateWorkflow(cwd);
+    addCodeowners(cwd);
+    console.log("");
+    console.log("  Done! Your repo is now Tier 2.\n");
+    console.log("  What changed:");
+    console.log("    - scripts/regression-gate.ts    Full gate script");
+    console.log("    - package.json                  eval:regression-gate + eval:baseline-update");
+    console.log("    - .github/workflows/            Gate + governance workflows");
+    console.log("    - .github/CODEOWNERS            Baseline requires approval\n");
+    console.log("  Next:");
+    console.log("    git add -A");
+    console.log("    git commit -m 'chore: upgrade EvalAI gate to Tier 2'");
+    console.log("    git push\n");
+    console.log("  Commands:");
+    console.log("    npx evalai gate                 Run full gate locally");
+    console.log("    npx evalai baseline update      Update baseline with real scores");
+    console.log("");
+    return 0;
+}

package/dist/client.request.test.d.ts CHANGED Viewed

	@@ -1 +1 @@
1	- export {};
1	+ export {};