npm - @pauly4010/evalai-sdk - Versions diffs - 1.9.0 → 1.9.1 - Mend

@pauly4010/evalai-sdk 1.9.0 → 1.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

package/README.md +136 -23
package/dist/assertions.js +51 -18
package/dist/batch.js +8 -2
package/dist/cli/api.js +3 -1
package/dist/cli/check.js +19 -6
package/dist/cli/ci-context.js +3 -1
package/dist/cli/config.js +28 -8
package/dist/cli/diff.js +14 -9
package/dist/cli/discover.js +18 -7
package/dist/cli/doctor.js +43 -9
package/dist/cli/explain.js +37 -11
package/dist/cli/formatters/human.js +4 -1
package/dist/cli/formatters/pr-comment.js +3 -1
package/dist/cli/gate.js +6 -2
package/dist/cli/impact-analysis.js +6 -5
package/dist/cli/index.js +18 -6
package/dist/cli/manifest.d.ts +3 -5
package/dist/cli/manifest.js +21 -14
package/dist/cli/migrate.js +4 -4
package/dist/cli/policy-packs.js +8 -2
package/dist/cli/print-config.js +19 -4
package/dist/cli/regression-gate.js +8 -2
package/dist/cli/report/build-check-report.js +8 -2
package/dist/cli/run.js +11 -5
package/dist/cli/share.js +3 -1
package/dist/cli/upgrade.js +2 -1
package/dist/client.d.ts +16 -19
package/dist/client.js +60 -43
package/dist/client.request.test.d.ts +1 -1
package/dist/client.request.test.js +222 -147
package/dist/context.js +3 -1
package/dist/errors.js +11 -4
package/dist/export.js +3 -1
package/dist/index.d.ts +8 -8
package/dist/index.js +19 -19
package/dist/integrations/anthropic.d.ts +20 -1
package/dist/integrations/openai-eval.js +4 -2
package/dist/integrations/openai.d.ts +24 -1
package/dist/local.js +3 -1
package/dist/logger.js +6 -2
package/dist/pagination.js +6 -2
package/dist/runtime/adapters/config-to-dsl.js +12 -9
package/dist/runtime/adapters/testsuite-to-dsl.d.ts +1 -1
package/dist/runtime/adapters/testsuite-to-dsl.js +11 -6
package/dist/runtime/eval.d.ts +1 -1
package/dist/runtime/eval.js +12 -5
package/dist/runtime/execution-mode.js +13 -9
package/dist/runtime/registry.js +8 -21
package/dist/runtime/run-report.d.ts +0 -2
package/dist/runtime/run-report.js +12 -10
package/dist/testing.js +7 -2
package/dist/types.d.ts +100 -69
package/dist/utils/input-hash.js +4 -1
package/dist/version.d.ts +1 -1
package/dist/version.js +1 -1
package/dist/workflows.js +62 -14
package/package.json +115 -111

package/README.md CHANGED Viewed

@@ -7,41 +7,150 @@
 [![Contract Version](https://img.shields.io/badge/report%20schema-v1-blue.svg)](#)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
-**Stop LLM regressions in CI in minutes.**
+**One-command CI for AI evaluation. Complete pipeline: discover → manifest → impact → run → diff → PR summary.**
-Zero to gate in under 5 minutes. No infra. No lock-in. Remove anytime.
+Zero to production CI in 60 seconds. No infra. No lock-in. Remove anytime.
 ---
-## Quick Start (2 minutes)
+## Quick Start (60 seconds)
+Add this to your `.github/workflows/evalai.yml`:
+```yaml
+name: EvalAI CI
+on: [push, pull_request]
+jobs:
+  evalai:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+      - run: npm ci
+      - run: npx @pauly4010/evalai-sdk ci --format github --write-results --base main
+      - uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: evalai-results
+          path: .evalai/
+```
+Create `eval/your-spec.spec.ts`:
+```typescript
+import { defineEval } from "@pauly4010/evalai-sdk";
+defineEval({
+  name: "Basic Math Operations",
+  description: "Test fundamental arithmetic",
+  prompt: "Test: 1+1=2, string concatenation, array includes",
+  expected: "All tests should pass",
+  tags: ["basic", "math"],
+  category: "unit-test"
+});
+```
 ```bash
-npx @pauly4010/evalai-sdk init
-git add evals/ .github/workflows/evalai-gate.yml evalai.config.json
-git commit -m "chore: add EvalAI regression gate"
+git add .github/workflows/evalai.yml eval/
+git commit -m "feat: add EvalAI CI pipeline"
 git push
 ```
-That's it. Open a PR and CI blocks regressions automatically.
-`evalai init` detects your project, creates a baseline from your current tests, and installs a GitHub Actions workflow. No manual config needed.
+That's it! Your CI now:
+- ✅ Discovers evaluation specs automatically
+- ✅ Runs only impacted specs (smart caching)
+- ✅ Compares results against base branch
+- ✅ Posts rich summary in PR with regressions
+- ✅ Exits with proper codes (0=clean, 1=regressions, 2=config)
 ---
-## What `evalai init` does
+## 🚀 New in v1.9.0: One-Command CI
+### `evalai ci` - Complete CI Pipeline
+```bash
+npx @pauly4010/evalai-sdk ci --format github --write-results --base main
+```
-1. **Detects** your Node repo and package manager (npm/yarn/pnpm)
-2. **Runs your tests** to capture a real baseline (pass/fail + test count)
-3. **Creates `evals/baseline.json`** with provenance metadata
-4. **Installs `.github/workflows/evalai-gate.yml`** (package-manager aware)
-5. **Creates `evalai.config.json`**
-6. **Prints next steps** — just commit and push
+**What it does:**
+1. **Discover** - Finds all evaluation specs automatically
+2. **Manifest** - Builds stable manifest if missing
+3. **Impact Analysis** - Runs only specs impacted by changes (optional)
+4. **Run** - Executes evaluations with artifact retention
+5. **Diff** - Compares results against base branch
+6. **PR Summary** - Posts rich markdown summary to GitHub
+7. **Debug Flow** - Prints copy/paste next step on failure
+**Advanced Options:**
+```bash
+npx @pauly4010/evalai-sdk ci --base main --impacted-only    # Run only impacted specs
+npx @pauly4010/evalai-sdk ci --format json --write-results   # JSON output for automation
+npx @pauly4010/evalai-sdk ci --base develop                  # Custom base branch
+```
+### Smart Diffing & GitHub Integration
+```bash
+npx @pauly4010/evalai-sdk diff --base main --head last --format github
+```
+**Features:**
+- 📊 Pass rate delta and score changes
+- 🚨 Regression detection with classifications
+- 📈 Improvements and new specs
+- 📁 Artifact links and technical details
+- 🎯 Exit codes: 0=clean, 1=regressions, 2=config
+### Self-Documenting Failures
+Every failure prints a clear next step:
+```
+🔧 Next step for debugging:
+   Download base artifact and run: evalai diff --base .evalai/base-run.json --head .evalai/last-run.json
+   Artifacts: .evalai/runs/
+```
 ---
 ## CLI Commands
-### Regression Gate (local, no account needed)
+### 🚀 One-Command CI (v1.9.0)
+| Command | Description |
+|---------|-------------|
+| `npx evalai ci` | Complete CI pipeline: discover → manifest → impact → run → diff → PR summary |
+| `npx evalai ci --base main` | Run CI with diff against main branch |
+| `npx evalai ci --impacted-only` | Run only specs impacted by changes |
+| `npx evalai ci --format github` | GitHub Step Summary with rich markdown |
+| `npx evalai ci --format json` | JSON output for automation |
+### Discovery & Manifest
+| Command | Description |
+|---------|-------------|
+| `npx evalai discover` | Find and analyze evaluation specs |
+| `npx evalai discover --manifest` | Generate stable manifest for incremental analysis |
+### Impact Analysis
+| Command | Description |
+|---------|-------------|
+| `npx evalai impact-analysis --base main` | Analyze impact of changes |
+| `npx evalai impact-analysis --changed-files file1.ts,file2.ts` | Analyze specific changed files |
+### Run & Diff
+| Command | Description |
+|---------|-------------|
+| `npx evalai run` | Run evaluation specifications |
+| `npx evalai run --write-results` | Run with artifact retention |
+| `npx evalai diff --base main` | Compare results against base branch |
+| `npx evalai diff --base last --head last` | Compare last two runs |
+| `npx evalai diff --format github` | GitHub Step Summary with regressions |
+### Legacy Regression Gate (local, no account needed)
 | Command | Description |
 |---------|-------------|
@@ -68,25 +177,29 @@ That's it. Open a PR and CI blocks regressions automatically.
 | `npx evalai explain` | Offline report explainer — top failures, root cause classification, suggested fixes |
 | `npx evalai print-config` | Show resolved config with source-of-truth annotations (file/env/default/arg) |
+### Migration Tools
+| Command | Description |
+|---------|-------------|
+| `npx evalai migrate config --in evalai.config.json --out eval/migrated.spec.ts` | Convert legacy config to DSL |
 **Guided failure flow:**
 ```
-evalai check  →  fails  →  "Next: evalai explain"
+evalai ci  →  fails  →  "Next: evalai explain --report .evalai/last-run.json"
                               ↓
                    evalai explain  →  root causes + fixes
 ```
-**GitHub Actions step summary** — gate result at a glance:
+**GitHub Actions step summary** — CI result at a glance with regressions and artifacts:
-![GitHub Actions step summary showing gate pass/fail with delta table](../../docs/images/evalai-gate-step-summary.svg)
+![GitHub Actions step summary showing CI pass/fail with delta table](../../docs/images/evalai-gate-step-summary.svg)
 **`evalai explain` terminal output** — root causes + fix commands:
 ![Terminal output of evalai explain showing top failures and suggested fixes](../../docs/images/evalai-explain-terminal.svg)
-`check` automatically writes `.evalai/last-report.json` so `explain` works with zero flags.
-`doctor` uses exit codes: **0** = ready, **2** = not ready, **3** = infra error. Use `--report` for a JSON diagnostic bundle.
+All commands automatically write artifacts so `explain` works with zero flags.
 ### Gate Exit Codes

package/dist/assertions.js CHANGED Viewed

@@ -86,7 +86,9 @@ class Expectation {
             expected: substring,
             actual: text,
             message: message ||
-                (passed ? `Text contains "${substring}"` : `Text does not contain "${substring}"`),
+                (passed
+                    ? `Text contains "${substring}"`
+                    : `Text does not contain "${substring}"`),
         };
     }
     /**
@@ -103,7 +105,9 @@ class Expectation {
             expected: keywords,
             actual: text,
             message: message ||
-                (passed ? `Contains all keywords` : `Missing keywords: ${missingKeywords.join(", ")}`),
+                (passed
+                    ? `Contains all keywords`
+                    : `Missing keywords: ${missingKeywords.join(", ")}`),
         };
     }
     /**
@@ -119,7 +123,9 @@ class Expectation {
             expected: `not containing "${substring}"`,
             actual: text,
             message: message ||
-                (passed ? `Text does not contain "${substring}"` : `Text contains "${substring}"`),
+                (passed
+                    ? `Text does not contain "${substring}"`
+                    : `Text contains "${substring}"`),
         };
     }
     /**
@@ -144,7 +150,8 @@ class Expectation {
             passed,
             expected: "no PII",
             actual: foundPII.length > 0 ? `Found: ${foundPII.join(", ")}` : "no PII",
-            message: message || (passed ? "No PII detected" : `PII detected: ${foundPII.join(", ")}`),
+            message: message ||
+                (passed ? "No PII detected" : `PII detected: ${foundPII.join(", ")}`),
         };
     }
     /**
@@ -159,7 +166,10 @@ class Expectation {
             passed,
             expected: pattern.toString(),
             actual: text,
-            message: message || (passed ? `Matches pattern ${pattern}` : `Does not match pattern ${pattern}`),
+            message: message ||
+                (passed
+                    ? `Matches pattern ${pattern}`
+                    : `Does not match pattern ${pattern}`),
         };
     }
     /**
@@ -205,7 +215,8 @@ class Expectation {
             passed,
             expected: schema,
             actual: parsedJson,
-            message: message || (passed ? "JSON matches schema" : "JSON does not match schema"),
+            message: message ||
+                (passed ? "JSON matches schema" : "JSON does not match schema"),
         };
     }
     /**
@@ -253,7 +264,10 @@ class Expectation {
             passed,
             expected,
             actual,
-            message: message || (passed ? `Sentiment is ${expected}` : `Expected ${expected}, got ${actual}`),
+            message: message ||
+                (passed
+                    ? `Sentiment is ${expected}`
+                    : `Expected ${expected}, got ${actual}`),
         };
     }
     /**
@@ -269,7 +283,10 @@ class Expectation {
             passed,
             expected: range,
             actual: length,
-            message: message || (passed ? `Length ${length} is within range` : `Length ${length} not in range`),
+            message: message ||
+                (passed
+                    ? `Length ${length} is within range`
+                    : `Length ${length} not in range`),
         };
     }
     /**
@@ -284,9 +301,13 @@ class Expectation {
             name: "toNotHallucinate",
             passed,
             expected: "all ground truth facts",
-            actual: missingFacts.length > 0 ? `Missing: ${missingFacts.join(", ")}` : "all facts present",
+            actual: missingFacts.length > 0
+                ? `Missing: ${missingFacts.join(", ")}`
+                : "all facts present",
             message: message ||
-                (passed ? "No hallucinations detected" : `Missing facts: ${missingFacts.join(", ")}`),
+                (passed
+                    ? "No hallucinations detected"
+                    : `Missing facts: ${missingFacts.join(", ")}`),
         };
     }
     /**
@@ -301,7 +322,10 @@ class Expectation {
             passed,
             expected: `<= ${maxMs}ms`,
             actual: `${duration}ms`,
-            message: message || (passed ? `${duration}ms within limit` : `${duration}ms exceeds ${maxMs}ms`),
+            message: message ||
+                (passed
+                    ? `${duration}ms within limit`
+                    : `${duration}ms exceeds ${maxMs}ms`),
         };
     }
     /**
@@ -344,7 +368,8 @@ class Expectation {
             passed,
             expected: `> ${expected}`,
             actual: value,
-            message: message || (passed ? `${value} > ${expected}` : `${value} <= ${expected}`),
+            message: message ||
+                (passed ? `${value} > ${expected}` : `${value} <= ${expected}`),
         };
     }
     /**
@@ -359,7 +384,8 @@ class Expectation {
             passed,
             expected: `< ${expected}`,
             actual: value,
-            message: message || (passed ? `${value} < ${expected}` : `${value} >= ${expected}`),
+            message: message ||
+                (passed ? `${value} < ${expected}` : `${value} >= ${expected}`),
         };
     }
     /**
@@ -374,7 +400,8 @@ class Expectation {
             passed,
             expected: `between ${min} and ${max}`,
             actual: value,
-            message: message || (passed ? `${value} is within range` : `${value} is outside range`),
+            message: message ||
+                (passed ? `${value} is within range` : `${value} is outside range`),
         };
     }
     /**
@@ -389,7 +416,8 @@ class Expectation {
             passed: hasCodeBlock,
             expected: "code block",
             actual: text,
-            message: message || (hasCodeBlock ? "Contains code block" : "No code block found"),
+            message: message ||
+                (hasCodeBlock ? "Contains code block" : "No code block found"),
         };
     }
     /**
@@ -405,9 +433,13 @@ class Expectation {
             name: "toBeProfessional",
             passed,
             expected: "professional tone",
-            actual: foundProfanity.length > 0 ? `Found: ${foundProfanity.join(", ")}` : "professional",
+            actual: foundProfanity.length > 0
+                ? `Found: ${foundProfanity.join(", ")}`
+                : "professional",
             message: message ||
-                (passed ? "Professional tone" : `Unprofessional language: ${foundProfanity.join(", ")}`),
+                (passed
+                    ? "Professional tone"
+                    : `Unprofessional language: ${foundProfanity.join(", ")}`),
         };
     }
     /**
@@ -432,7 +464,8 @@ class Expectation {
             passed,
             expected: "proper grammar",
             actual: issues.length > 0 ? `Issues: ${issues.join(", ")}` : "proper grammar",
-            message: message || (passed ? "Proper grammar" : `Grammar issues: ${issues.join(", ")}`),
+            message: message ||
+                (passed ? "Proper grammar" : `Grammar issues: ${issues.join(", ")}`),
         };
     }
 }

package/dist/batch.js CHANGED Viewed

@@ -81,7 +81,8 @@ class RequestBatcher {
                         pendingRequest.resolve(response.data);
                     }
                     else {
-                        pendingRequest.reject(new Error(response.error || `Request failed with status ${response.status}`));
+                        pendingRequest.reject(new Error(response.error ||
+                            `Request failed with status ${response.status}`));
                     }
                 }
             }
@@ -149,7 +150,12 @@ function canBatch(method, endpoint) {
     if (method !== "GET") {
         return false;
     }
-    const batchableEndpoints = ["/traces", "/evaluations", "/annotations", "/results"];
+    const batchableEndpoints = [
+        "/traces",
+        "/evaluations",
+        "/annotations",
+        "/results",
+    ];
     return batchableEndpoints.some((pattern) => endpoint.includes(pattern));
 }
 /**

package/dist/cli/api.js CHANGED Viewed

@@ -73,7 +73,9 @@ async function publishShare(baseUrl, apiKey, evaluationId, exportData, evaluatio
         exportData,
         shareScope: "run",
         evaluationRunId,
-        ...(options?.expiresInDays != null && { expiresInDays: options.expiresInDays }),
+        ...(options?.expiresInDays != null && {
+            expiresInDays: options.expiresInDays,
+        }),
     };
     const url = `${baseUrl.replace(/\/$/, "")}/api/evaluations/${evaluationId}/publish`;
     try {

package/dist/cli/check.js CHANGED Viewed

@@ -183,7 +183,11 @@ function parseArgs(argv) {
         };
     }
     if (Number.isNaN(minScore) || minScore < 0 || minScore > 100) {
-        return { ok: false, exitCode: constants_1.EXIT.BAD_ARGS, message: "Error: --minScore must be 0-100" };
+        return {
+            ok: false,
+            exitCode: constants_1.EXIT.BAD_ARGS,
+            message: "Error: --minScore must be 0-100",
+        };
     }
     if (minN !== undefined && (Number.isNaN(minN) || minN < 1)) {
         return {
@@ -210,9 +214,15 @@ function parseArgs(argv) {
             onFail,
             share,
             prCommentOut,
-            maxCostUsd: maxCostUsd != null && !Number.isNaN(maxCostUsd) ? maxCostUsd : undefined,
-            maxLatencyMs: maxLatencyMs != null && !Number.isNaN(maxLatencyMs) ? maxLatencyMs : undefined,
-            maxCostDeltaUsd: maxCostDeltaUsd != null && !Number.isNaN(maxCostDeltaUsd) ? maxCostDeltaUsd : undefined,
+            maxCostUsd: maxCostUsd != null && !Number.isNaN(maxCostUsd)
+                ? maxCostUsd
+                : undefined,
+            maxLatencyMs: maxLatencyMs != null && !Number.isNaN(maxLatencyMs)
+                ? maxLatencyMs
+                : undefined,
+            maxCostDeltaUsd: maxCostDeltaUsd != null && !Number.isNaN(maxCostDeltaUsd)
+                ? maxCostDeltaUsd
+                : undefined,
         },
     };
 }
@@ -297,7 +307,8 @@ async function runCheck(args) {
         runDetails?.results &&
         quality?.evaluationRunId) {
         const importResults = runDetails.results
-            .filter((r) => r.testCaseId != null && (r.status === "passed" || r.status === "failed"))
+            .filter((r) => r.testCaseId != null &&
+            (r.status === "passed" || r.status === "failed"))
             .map((r) => ({
             testCaseId: r.testCaseId,
             status: r.status,
@@ -306,7 +317,9 @@ async function runCheck(args) {
             assertionsJson: r.assertionsJson,
         }));
         if (importResults.length > 0) {
-            const idempotencyKey = ci ? (0, ci_context_1.computeIdempotencyKey)(args.evaluationId, ci) : undefined;
+            const idempotencyKey = ci
+                ? (0, ci_context_1.computeIdempotencyKey)(args.evaluationId, ci)
+                : undefined;
             const importRes = await (0, api_1.importRunOnFail)(args.baseUrl, args.apiKey, args.evaluationId, importResults, {
                 idempotencyKey,
                 ci,

package/dist/cli/ci-context.js CHANGED Viewed

@@ -89,7 +89,9 @@ function captureCiContext() {
         provider,
         repo,
         sha,
-        branch: ref?.startsWith("refs/heads/") ? ref.slice("refs/heads/".length) : ref,
+        branch: ref?.startsWith("refs/heads/")
+            ? ref.slice("refs/heads/".length)
+            : ref,
         runUrl,
         actor,
         pr,

package/dist/cli/config.js CHANGED Viewed

@@ -43,7 +43,11 @@ exports.mergeConfigWithArgs = mergeConfigWithArgs;
 const fs = __importStar(require("node:fs"));
 const path = __importStar(require("node:path"));
 const profiles_1 = require("./profiles");
-const CONFIG_FILES = ["evalai.config.json", "evalai.config.js", "evalai.config.cjs"];
+const CONFIG_FILES = [
+    "evalai.config.json",
+    "evalai.config.js",
+    "evalai.config.cjs",
+];
 /**
  * Find config file path in directory, walking up to root
  */
@@ -113,7 +117,11 @@ function loadConfig(cwd = process.cwd()) {
             }
             for (const key of Object.keys(config.packages)) {
                 if (relNorm === key || relNorm.startsWith(`${key}/`)) {
-                    return { ...config, ...config.packages[key], packages: config.packages };
+                    return {
+                        ...config,
+                        ...config.packages[key],
+                        packages: config.packages,
+                    };
                 }
             }
         }
@@ -156,11 +164,14 @@ function mergeConfigWithArgs(config, args) {
             merged.minScore = profile.minScore;
         if (merged.maxDrop === undefined && args.maxDrop === undefined)
             merged.maxDrop = profile.maxDrop;
-        if (merged.warnDrop === undefined && args.warnDrop === undefined && "warnDrop" in profile)
+        if (merged.warnDrop === undefined &&
+            args.warnDrop === undefined &&
+            "warnDrop" in profile)
             merged.warnDrop = profile.warnDrop;
         if (merged.minN === undefined && args.minN === undefined)
             merged.minN = profile.minN;
-        if (merged.allowWeakEvidence === undefined && args.allowWeakEvidence === undefined)
+        if (merged.allowWeakEvidence === undefined &&
+            args.allowWeakEvidence === undefined)
             merged.allowWeakEvidence = profile.allowWeakEvidence;
     }
     // Args override
@@ -172,18 +183,27 @@ function mergeConfigWithArgs(config, args) {
     }
     if (args.minScore !== undefined) {
         merged.minScore =
-            typeof args.minScore === "number" ? args.minScore : parseInt(String(args.minScore), 10);
+            typeof args.minScore === "number"
+                ? args.minScore
+                : parseInt(String(args.minScore), 10);
     }
     if (args.maxDrop !== undefined) {
         merged.maxDrop =
-            typeof args.maxDrop === "number" ? args.maxDrop : parseInt(String(args.maxDrop), 10);
+            typeof args.maxDrop === "number"
+                ? args.maxDrop
+                : parseInt(String(args.maxDrop), 10);
     }
     if (args.warnDrop !== undefined) {
         merged.warnDrop =
-            typeof args.warnDrop === "number" ? args.warnDrop : parseInt(String(args.warnDrop), 10);
+            typeof args.warnDrop === "number"
+                ? args.warnDrop
+                : parseInt(String(args.warnDrop), 10);
     }
     if (args.minN !== undefined) {
-        merged.minN = typeof args.minN === "number" ? args.minN : parseInt(String(args.minN), 10);
+        merged.minN =
+            typeof args.minN === "number"
+                ? args.minN
+                : parseInt(String(args.minN), 10);
     }
     if (args.allowWeakEvidence !== undefined) {
         merged.allowWeakEvidence =

package/dist/cli/diff.js CHANGED Viewed

@@ -186,7 +186,7 @@ async function findBaselineRun() {
         const content = await fs.readFile(workspace.baselinePath, "utf-8");
         return JSON.parse(content);
     }
-    catch (error) {
+    catch (_error) {
         // Baseline file doesn't exist, try index
     }
     // Check index for baseline runId
@@ -200,7 +200,7 @@ async function findBaselineRun() {
             return await loadRunReport(`.evalai/runs/${oldestRunId}.json`);
         }
     }
-    catch (error) {
+    catch (_error) {
         // Index doesn't exist
     }
     throw new Error("No baseline run found. Set a baseline with 'evalai diff --base <runId> --head last --set-baseline' or create .evalai/baseline-run.json.");
@@ -239,7 +239,8 @@ async function findPreviousRun() {
         return await loadRunReport(`.evalai/runs/${previousRunId}.json`);
     }
     catch (error) {
-        if (error instanceof Error && error.message.includes("Need at least 2 runs")) {
+        if (error instanceof Error &&
+            error.message.includes("Need at least 2 runs")) {
             throw error;
         }
         throw new Error("No run history found. Run 'evalai run --write-results' first.");
@@ -261,7 +262,7 @@ async function isBranchName(name) {
 /**
  * Find last run for a branch
  */
-async function findLastRunForBranch(branch) {
+async function findLastRunForBranch(_branch) {
     // For now, just look for .evalai/last-run.json
     // In a real implementation, this would:
     // 1. Check CI artifacts for the branch
@@ -272,7 +273,7 @@ async function findLastRunForBranch(branch) {
         const content = await fs.readFile(lastRunPath, "utf-8");
         return JSON.parse(content);
     }
-    catch (error) {
+    catch (_error) {
         return null;
     }
 }
@@ -284,7 +285,7 @@ async function loadRunReport(filePath) {
         const content = await fs.readFile(path.resolve(filePath), "utf-8");
         return JSON.parse(content);
     }
-    catch (error) {
+    catch (_error) {
         return null;
     }
 }
@@ -457,8 +458,12 @@ function calculateDiffSummary(base, head, changedSpecs) {
     const headScores = head.results
         .filter((r) => r.result.score !== undefined)
         .map((r) => r.result.score);
-    const baseAvgScore = baseScores.length > 0 ? baseScores.reduce((a, b) => a + b, 0) / baseScores.length : 0;
-    const headAvgScore = headScores.length > 0 ? headScores.reduce((a, b) => a + b, 0) / headScores.length : 0;
+    const baseAvgScore = baseScores.length > 0
+        ? baseScores.reduce((a, b) => a + b, 0) / baseScores.length
+        : 0;
+    const headAvgScore = headScores.length > 0
+        ? headScores.reduce((a, b) => a + b, 0) / headScores.length
+        : 0;
     const scoreDelta = round(headAvgScore - baseAvgScore, 4);
     // Count classifications
     const regressions = changedSpecs.filter((s) => ["new_failure", "score_drop", "execution_error"].includes(s.classification)).length;
@@ -542,7 +547,7 @@ async function writeGitHubStepSummary(result) {
     const summaryPath = process.env.GITHUB_STEP_SUMMARY;
     try {
         const summary = generateGitHubSummary(result);
-        await fs.appendFile(summaryPath, summary + "\n", "utf-8");
+        await fs.appendFile(summaryPath, `${summary}\n`, "utf-8");
     }
     catch (error) {
         console.warn("Warning: Could not write GitHub Step Summary:", error);