npm - @runchr/gstack-antigravity - Versions diffs - 0.1.0 - Mend

@runchr/gstack-antigravity 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (297) hide show

package/.agents/rules/ETHOS.md +129 -0
package/.agents/rules/global-gstack.md +117 -0
package/.agents/rules/persona-gstack-autoplan.md +14 -0
package/.agents/rules/persona-gstack-benchmark.md +14 -0
package/.agents/rules/persona-gstack-browse.md +14 -0
package/.agents/rules/persona-gstack-canary.md +14 -0
package/.agents/rules/persona-gstack-careful.md +14 -0
package/.agents/rules/persona-gstack-codex.md +14 -0
package/.agents/rules/persona-gstack-cso.md +14 -0
package/.agents/rules/persona-gstack-design-consultation.md +14 -0
package/.agents/rules/persona-gstack-design-review.md +14 -0
package/.agents/rules/persona-gstack-document-release.md +14 -0
package/.agents/rules/persona-gstack-freeze.md +14 -0
package/.agents/rules/persona-gstack-gstack-upgrade.md +14 -0
package/.agents/rules/persona-gstack-guard.md +14 -0
package/.agents/rules/persona-gstack-investigate.md +14 -0
package/.agents/rules/persona-gstack-land-and-deploy.md +14 -0
package/.agents/rules/persona-gstack-office-hours.md +14 -0
package/.agents/rules/persona-gstack-plan-ceo-review.md +14 -0
package/.agents/rules/persona-gstack-plan-design-review.md +14 -0
package/.agents/rules/persona-gstack-plan-eng-review.md +14 -0
package/.agents/rules/persona-gstack-qa-only.md +14 -0
package/.agents/rules/persona-gstack-qa.md +14 -0
package/.agents/rules/persona-gstack-retro.md +14 -0
package/.agents/rules/persona-gstack-review.md +14 -0
package/.agents/rules/persona-gstack-setup-browser-cookies.md +14 -0
package/.agents/rules/persona-gstack-setup-deploy.md +14 -0
package/.agents/rules/persona-gstack-ship.md +14 -0
package/.agents/rules/persona-gstack-unfreeze.md +14 -0
package/.agents/rules/persona-gstack.md +40 -0
package/.agents/rules/recursive-identities.md +22 -0
package/.agents/workflows/autoplan.md +30 -0
package/.agents/workflows/benchmark.md +31 -0
package/.agents/workflows/browse.md +26 -0
package/.agents/workflows/canary.md +33 -0
package/.agents/workflows/careful.md +22 -0
package/.agents/workflows/codex.md +36 -0
package/.agents/workflows/cso.md +29 -0
package/.agents/workflows/design-consultation.md +28 -0
package/.agents/workflows/design-review.md +28 -0
package/.agents/workflows/document-release.md +32 -0
package/.agents/workflows/freeze.md +17 -0
package/.agents/workflows/gstack-upgrade.md +54 -0
package/.agents/workflows/gstack.md +56 -0
package/.agents/workflows/guard.md +18 -0
package/.agents/workflows/investigate.md +37 -0
package/.agents/workflows/land-and-deploy.md +35 -0
package/.agents/workflows/office-hours.md +27 -0
package/.agents/workflows/plan-ceo-review.md +34 -0
package/.agents/workflows/plan-design-review.md +31 -0
package/.agents/workflows/plan-eng-review.md +28 -0
package/.agents/workflows/qa-only.md +28 -0
package/.agents/workflows/qa.md +73 -0
package/.agents/workflows/retro.md +34 -0
package/.agents/workflows/review.md +30 -0
package/.agents/workflows/setup-browser-cookies.md +15 -0
package/.agents/workflows/setup-cookies.md +8 -0
package/.agents/workflows/setup-deploy.md +21 -0
package/.agents/workflows/ship.md +93 -0
package/.agents/workflows/unfreeze.md +12 -0
package/LICENSE +22 -0
package/README.md +189 -0
package/README_KO.md +191 -0
package/bin/install.js +105 -0
package/gstack-origin/.agents/skills/gstack/SKILL.md +651 -0
package/gstack-origin/.agents/skills/gstack-autoplan/SKILL.md +678 -0
package/gstack-origin/.agents/skills/gstack-benchmark/SKILL.md +482 -0
package/gstack-origin/.agents/skills/gstack-browse/SKILL.md +511 -0
package/gstack-origin/.agents/skills/gstack-canary/SKILL.md +486 -0
package/gstack-origin/.agents/skills/gstack-careful/SKILL.md +50 -0
package/gstack-origin/.agents/skills/gstack-cso/SKILL.md +607 -0
package/gstack-origin/.agents/skills/gstack-design-consultation/SKILL.md +615 -0
package/gstack-origin/.agents/skills/gstack-design-review/SKILL.md +988 -0
package/gstack-origin/.agents/skills/gstack-document-release/SKILL.md +604 -0
package/gstack-origin/.agents/skills/gstack-freeze/SKILL.md +67 -0
package/gstack-origin/.agents/skills/gstack-guard/SKILL.md +62 -0
package/gstack-origin/.agents/skills/gstack-investigate/SKILL.md +415 -0
package/gstack-origin/.agents/skills/gstack-land-and-deploy/SKILL.md +873 -0
package/gstack-origin/.agents/skills/gstack-office-hours/SKILL.md +986 -0
package/gstack-origin/.agents/skills/gstack-plan-ceo-review/SKILL.md +1268 -0
package/gstack-origin/.agents/skills/gstack-plan-design-review/SKILL.md +668 -0
package/gstack-origin/.agents/skills/gstack-plan-eng-review/SKILL.md +826 -0
package/gstack-origin/.agents/skills/gstack-qa/SKILL.md +1006 -0
package/gstack-origin/.agents/skills/gstack-qa-only/SKILL.md +626 -0
package/gstack-origin/.agents/skills/gstack-retro/SKILL.md +1065 -0
package/gstack-origin/.agents/skills/gstack-review/SKILL.md +704 -0
package/gstack-origin/.agents/skills/gstack-setup-browser-cookies/SKILL.md +325 -0
package/gstack-origin/.agents/skills/gstack-setup-deploy/SKILL.md +450 -0
package/gstack-origin/.agents/skills/gstack-ship/SKILL.md +1312 -0
package/gstack-origin/.agents/skills/gstack-unfreeze/SKILL.md +36 -0
package/gstack-origin/.agents/skills/gstack-upgrade/SKILL.md +220 -0
package/gstack-origin/.env.example +5 -0
package/gstack-origin/.github/workflows/skill-docs.yml +17 -0
package/gstack-origin/AGENTS.md +49 -0
package/gstack-origin/ARCHITECTURE.md +359 -0
package/gstack-origin/BROWSER.md +271 -0
package/gstack-origin/CHANGELOG.md +800 -0
package/gstack-origin/CLAUDE.md +284 -0
package/gstack-origin/CONTRIBUTING.md +370 -0
package/gstack-origin/ETHOS.md +129 -0
package/gstack-origin/LICENSE +21 -0
package/gstack-origin/README.md +228 -0
package/gstack-origin/SKILL.md +657 -0
package/gstack-origin/SKILL.md.tmpl +281 -0
package/gstack-origin/TODOS.md +564 -0
package/gstack-origin/VERSION +1 -0
package/gstack-origin/autoplan/SKILL.md +689 -0
package/gstack-origin/autoplan/SKILL.md.tmpl +416 -0
package/gstack-origin/benchmark/SKILL.md +489 -0
package/gstack-origin/benchmark/SKILL.md.tmpl +233 -0
package/gstack-origin/bin/dev-setup +68 -0
package/gstack-origin/bin/dev-teardown +56 -0
package/gstack-origin/bin/gstack-analytics +191 -0
package/gstack-origin/bin/gstack-community-dashboard +113 -0
package/gstack-origin/bin/gstack-config +38 -0
package/gstack-origin/bin/gstack-diff-scope +71 -0
package/gstack-origin/bin/gstack-global-discover.ts +591 -0
package/gstack-origin/bin/gstack-repo-mode +93 -0
package/gstack-origin/bin/gstack-review-log +9 -0
package/gstack-origin/bin/gstack-review-read +12 -0
package/gstack-origin/bin/gstack-slug +15 -0
package/gstack-origin/bin/gstack-telemetry-log +158 -0
package/gstack-origin/bin/gstack-telemetry-sync +127 -0
package/gstack-origin/bin/gstack-update-check +196 -0
package/gstack-origin/browse/SKILL.md +517 -0
package/gstack-origin/browse/SKILL.md.tmpl +141 -0
package/gstack-origin/browse/bin/find-browse +21 -0
package/gstack-origin/browse/bin/remote-slug +14 -0
package/gstack-origin/browse/scripts/build-node-server.sh +48 -0
package/gstack-origin/browse/src/browser-manager.ts +634 -0
package/gstack-origin/browse/src/buffers.ts +137 -0
package/gstack-origin/browse/src/bun-polyfill.cjs +109 -0
package/gstack-origin/browse/src/cli.ts +420 -0
package/gstack-origin/browse/src/commands.ts +111 -0
package/gstack-origin/browse/src/config.ts +150 -0
package/gstack-origin/browse/src/cookie-import-browser.ts +417 -0
package/gstack-origin/browse/src/cookie-picker-routes.ts +207 -0
package/gstack-origin/browse/src/cookie-picker-ui.ts +541 -0
package/gstack-origin/browse/src/find-browse.ts +61 -0
package/gstack-origin/browse/src/meta-commands.ts +269 -0
package/gstack-origin/browse/src/platform.ts +17 -0
package/gstack-origin/browse/src/read-commands.ts +335 -0
package/gstack-origin/browse/src/server.ts +369 -0
package/gstack-origin/browse/src/snapshot.ts +398 -0
package/gstack-origin/browse/src/url-validation.ts +91 -0
package/gstack-origin/browse/src/write-commands.ts +352 -0
package/gstack-origin/browse/test/bun-polyfill.test.ts +72 -0
package/gstack-origin/browse/test/commands.test.ts +1836 -0
package/gstack-origin/browse/test/config.test.ts +250 -0
package/gstack-origin/browse/test/cookie-import-browser.test.ts +397 -0
package/gstack-origin/browse/test/cookie-picker-routes.test.ts +205 -0
package/gstack-origin/browse/test/find-browse.test.ts +50 -0
package/gstack-origin/browse/test/fixtures/basic.html +33 -0
package/gstack-origin/browse/test/fixtures/cursor-interactive.html +22 -0
package/gstack-origin/browse/test/fixtures/dialog.html +15 -0
package/gstack-origin/browse/test/fixtures/empty.html +2 -0
package/gstack-origin/browse/test/fixtures/forms.html +55 -0
package/gstack-origin/browse/test/fixtures/qa-eval-checkout.html +108 -0
package/gstack-origin/browse/test/fixtures/qa-eval-spa.html +98 -0
package/gstack-origin/browse/test/fixtures/qa-eval.html +51 -0
package/gstack-origin/browse/test/fixtures/responsive.html +49 -0
package/gstack-origin/browse/test/fixtures/snapshot.html +55 -0
package/gstack-origin/browse/test/fixtures/spa.html +24 -0
package/gstack-origin/browse/test/fixtures/states.html +17 -0
package/gstack-origin/browse/test/fixtures/upload.html +25 -0
package/gstack-origin/browse/test/gstack-config.test.ts +125 -0
package/gstack-origin/browse/test/gstack-update-check.test.ts +467 -0
package/gstack-origin/browse/test/handoff.test.ts +235 -0
package/gstack-origin/browse/test/path-validation.test.ts +63 -0
package/gstack-origin/browse/test/platform.test.ts +37 -0
package/gstack-origin/browse/test/snapshot.test.ts +467 -0
package/gstack-origin/browse/test/test-server.ts +57 -0
package/gstack-origin/browse/test/url-validation.test.ts +72 -0
package/gstack-origin/canary/SKILL.md +493 -0
package/gstack-origin/canary/SKILL.md.tmpl +220 -0
package/gstack-origin/careful/SKILL.md +59 -0
package/gstack-origin/careful/SKILL.md.tmpl +57 -0
package/gstack-origin/careful/bin/check-careful.sh +112 -0
package/gstack-origin/codex/SKILL.md +677 -0
package/gstack-origin/codex/SKILL.md.tmpl +356 -0
package/gstack-origin/conductor.json +6 -0
package/gstack-origin/cso/SKILL.md +615 -0
package/gstack-origin/cso/SKILL.md.tmpl +376 -0
package/gstack-origin/design-consultation/SKILL.md +625 -0
package/gstack-origin/design-consultation/SKILL.md.tmpl +369 -0
package/gstack-origin/design-review/SKILL.md +998 -0
package/gstack-origin/design-review/SKILL.md.tmpl +262 -0
package/gstack-origin/docs/images/github-2013.png +0 -0
package/gstack-origin/docs/images/github-2026.png +0 -0
package/gstack-origin/docs/skills.md +877 -0
package/gstack-origin/document-release/SKILL.md +613 -0
package/gstack-origin/document-release/SKILL.md.tmpl +357 -0
package/gstack-origin/freeze/SKILL.md +82 -0
package/gstack-origin/freeze/SKILL.md.tmpl +80 -0
package/gstack-origin/freeze/bin/check-freeze.sh +68 -0
package/gstack-origin/gstack-upgrade/SKILL.md +226 -0
package/gstack-origin/gstack-upgrade/SKILL.md.tmpl +224 -0
package/gstack-origin/guard/SKILL.md +82 -0
package/gstack-origin/guard/SKILL.md.tmpl +80 -0
package/gstack-origin/investigate/SKILL.md +435 -0
package/gstack-origin/investigate/SKILL.md.tmpl +196 -0
package/gstack-origin/land-and-deploy/SKILL.md +880 -0
package/gstack-origin/land-and-deploy/SKILL.md.tmpl +575 -0
package/gstack-origin/office-hours/SKILL.md +996 -0
package/gstack-origin/office-hours/SKILL.md.tmpl +624 -0
package/gstack-origin/package.json +55 -0
package/gstack-origin/plan-ceo-review/SKILL.md +1277 -0
package/gstack-origin/plan-ceo-review/SKILL.md.tmpl +838 -0
package/gstack-origin/plan-design-review/SKILL.md +676 -0
package/gstack-origin/plan-design-review/SKILL.md.tmpl +314 -0
package/gstack-origin/plan-eng-review/SKILL.md +836 -0
package/gstack-origin/plan-eng-review/SKILL.md.tmpl +279 -0
package/gstack-origin/qa/SKILL.md +1016 -0
package/gstack-origin/qa/SKILL.md.tmpl +316 -0
package/gstack-origin/qa/references/issue-taxonomy.md +85 -0
package/gstack-origin/qa/templates/qa-report-template.md +126 -0
package/gstack-origin/qa-only/SKILL.md +633 -0
package/gstack-origin/qa-only/SKILL.md.tmpl +101 -0
package/gstack-origin/retro/SKILL.md +1072 -0
package/gstack-origin/retro/SKILL.md.tmpl +833 -0
package/gstack-origin/review/SKILL.md +849 -0
package/gstack-origin/review/SKILL.md.tmpl +259 -0
package/gstack-origin/review/TODOS-format.md +62 -0
package/gstack-origin/review/checklist.md +190 -0
package/gstack-origin/review/design-checklist.md +132 -0
package/gstack-origin/review/greptile-triage.md +220 -0
package/gstack-origin/scripts/analytics.ts +190 -0
package/gstack-origin/scripts/dev-skill.ts +82 -0
package/gstack-origin/scripts/eval-compare.ts +96 -0
package/gstack-origin/scripts/eval-list.ts +116 -0
package/gstack-origin/scripts/eval-select.ts +86 -0
package/gstack-origin/scripts/eval-summary.ts +187 -0
package/gstack-origin/scripts/eval-watch.ts +172 -0
package/gstack-origin/scripts/gen-skill-docs.ts +2414 -0
package/gstack-origin/scripts/skill-check.ts +167 -0
package/gstack-origin/setup +269 -0
package/gstack-origin/setup-browser-cookies/SKILL.md +330 -0
package/gstack-origin/setup-browser-cookies/SKILL.md.tmpl +74 -0
package/gstack-origin/setup-deploy/SKILL.md +459 -0
package/gstack-origin/setup-deploy/SKILL.md.tmpl +220 -0
package/gstack-origin/ship/SKILL.md +1457 -0
package/gstack-origin/ship/SKILL.md.tmpl +528 -0
package/gstack-origin/supabase/config.sh +10 -0
package/gstack-origin/supabase/functions/community-pulse/index.ts +59 -0
package/gstack-origin/supabase/functions/telemetry-ingest/index.ts +135 -0
package/gstack-origin/supabase/functions/update-check/index.ts +37 -0
package/gstack-origin/supabase/migrations/001_telemetry.sql +89 -0
package/gstack-origin/test/analytics.test.ts +277 -0
package/gstack-origin/test/codex-e2e.test.ts +197 -0
package/gstack-origin/test/fixtures/coverage-audit-fixture.ts +76 -0
package/gstack-origin/test/fixtures/eval-baselines.json +7 -0
package/gstack-origin/test/fixtures/qa-eval-checkout-ground-truth.json +43 -0
package/gstack-origin/test/fixtures/qa-eval-ground-truth.json +43 -0
package/gstack-origin/test/fixtures/qa-eval-spa-ground-truth.json +43 -0
package/gstack-origin/test/fixtures/review-eval-design-slop.css +86 -0
package/gstack-origin/test/fixtures/review-eval-design-slop.html +41 -0
package/gstack-origin/test/fixtures/review-eval-enum-diff.rb +30 -0
package/gstack-origin/test/fixtures/review-eval-enum.rb +27 -0
package/gstack-origin/test/fixtures/review-eval-vuln.rb +14 -0
package/gstack-origin/test/gemini-e2e.test.ts +173 -0
package/gstack-origin/test/gen-skill-docs.test.ts +1049 -0
package/gstack-origin/test/global-discover.test.ts +187 -0
package/gstack-origin/test/helpers/codex-session-runner.ts +282 -0
package/gstack-origin/test/helpers/e2e-helpers.ts +239 -0
package/gstack-origin/test/helpers/eval-store.test.ts +548 -0
package/gstack-origin/test/helpers/eval-store.ts +689 -0
package/gstack-origin/test/helpers/gemini-session-runner.test.ts +104 -0
package/gstack-origin/test/helpers/gemini-session-runner.ts +201 -0
package/gstack-origin/test/helpers/llm-judge.ts +130 -0
package/gstack-origin/test/helpers/observability.test.ts +283 -0
package/gstack-origin/test/helpers/session-runner.test.ts +96 -0
package/gstack-origin/test/helpers/session-runner.ts +357 -0
package/gstack-origin/test/helpers/skill-parser.ts +206 -0
package/gstack-origin/test/helpers/touchfiles.ts +260 -0
package/gstack-origin/test/hook-scripts.test.ts +373 -0
package/gstack-origin/test/skill-e2e-browse.test.ts +293 -0
package/gstack-origin/test/skill-e2e-deploy.test.ts +279 -0
package/gstack-origin/test/skill-e2e-design.test.ts +614 -0
package/gstack-origin/test/skill-e2e-plan.test.ts +538 -0
package/gstack-origin/test/skill-e2e-qa-bugs.test.ts +194 -0
package/gstack-origin/test/skill-e2e-qa-workflow.test.ts +412 -0
package/gstack-origin/test/skill-e2e-review.test.ts +535 -0
package/gstack-origin/test/skill-e2e-workflow.test.ts +586 -0
package/gstack-origin/test/skill-e2e.test.ts +3325 -0
package/gstack-origin/test/skill-llm-eval.test.ts +787 -0
package/gstack-origin/test/skill-parser.test.ts +179 -0
package/gstack-origin/test/skill-routing-e2e.test.ts +605 -0
package/gstack-origin/test/skill-validation.test.ts +1520 -0
package/gstack-origin/test/telemetry.test.ts +278 -0
package/gstack-origin/test/touchfiles.test.ts +262 -0
package/gstack-origin/unfreeze/SKILL.md +40 -0
package/gstack-origin/unfreeze/SKILL.md.tmpl +38 -0
package/package.json +38 -0
package/scripts/install-antigravity-skill.ps1 +33 -0
package/scripts/install-antigravity-skill.sh +41 -0
package/scripts/sync-gstack-origin.ps1 +37 -0
package/scripts/sync-gstack-origin.sh +35 -0

package/gstack-origin/test/helpers/eval-store.test.ts ADDED Viewed

@@ -0,0 +1,548 @@
+import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+import {
+  EvalCollector,
+  extractToolSummary,
+  findPreviousRun,
+  compareEvalResults,
+  formatComparison,
+  generateCommentary,
+  judgePassed,
+} from './eval-store';
+import type { EvalResult, EvalTestEntry, ComparisonResult } from './eval-store';
+let tmpDir: string;
+beforeEach(() => {
+  tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'eval-store-test-'));
+});
+afterEach(() => {
+  try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
+});
+// --- Helper to make a minimal test entry ---
+function makeEntry(overrides?: Partial<EvalTestEntry>): EvalTestEntry {
+  return {
+    name: 'test-1',
+    suite: 'suite-1',
+    tier: 'e2e',
+    passed: true,
+    duration_ms: 1000,
+    cost_usd: 0.05,
+    ...overrides,
+  };
+}
+// --- Helper to make a minimal EvalResult ---
+function makeResult(overrides?: Partial<EvalResult>): EvalResult {
+  return {
+    schema_version: 1,
+    version: '0.3.6',
+    branch: 'main',
+    git_sha: 'abc1234',
+    timestamp: '2026-03-14T12:00:00.000Z',
+    hostname: 'test-host',
+    tier: 'e2e',
+    total_tests: 1,
+    passed: 1,
+    failed: 0,
+    total_cost_usd: 0.05,
+    total_duration_ms: 1000,
+    tests: [makeEntry()],
+    ...overrides,
+  };
+}
+// --- EvalCollector tests ---
+describe('EvalCollector', () => {
+  test('addTest accumulates entries', () => {
+    const collector = new EvalCollector('e2e', tmpDir);
+    collector.addTest(makeEntry({ name: 'a' }));
+    collector.addTest(makeEntry({ name: 'b' }));
+    collector.addTest(makeEntry({ name: 'c' }));
+    // We can't inspect tests directly, but finalize will write them
+  });
+  test('finalize writes JSON file to eval dir', async () => {
+    const collector = new EvalCollector('e2e', tmpDir);
+    collector.addTest(makeEntry());
+    const filepath = await collector.finalize();
+    expect(filepath).toBeTruthy();
+    expect(fs.existsSync(filepath)).toBe(true);
+    const data = JSON.parse(fs.readFileSync(filepath, 'utf-8'));
+    expect(data.tests).toHaveLength(1);
+    expect(data.tests[0].name).toBe('test-1');
+  });
+  test('written JSON has correct schema fields', async () => {
+    const collector = new EvalCollector('e2e', tmpDir);
+    collector.addTest(makeEntry({ passed: true, cost_usd: 0.10, duration_ms: 2000 }));
+    collector.addTest(makeEntry({ name: 'test-2', passed: false, cost_usd: 0.05, duration_ms: 1000 }));
+    const filepath = await collector.finalize();
+    const data: EvalResult = JSON.parse(fs.readFileSync(filepath, 'utf-8'));
+    expect(data.schema_version).toBe(1);
+    expect(data.tier).toBe('e2e');
+    expect(data.total_tests).toBe(2);
+    expect(data.passed).toBe(1);
+    expect(data.failed).toBe(1);
+    expect(data.total_cost_usd).toBe(0.15);
+    expect(data.total_duration_ms).toBe(3000);
+    expect(data.timestamp).toBeTruthy();
+    expect(data.hostname).toBeTruthy();
+  });
+  test('finalize creates directory if missing', async () => {
+    const nestedDir = path.join(tmpDir, 'nested', 'deep', 'evals');
+    const collector = new EvalCollector('e2e', nestedDir);
+    collector.addTest(makeEntry());
+    const filepath = await collector.finalize();
+    expect(fs.existsSync(filepath)).toBe(true);
+  });
+  test('double finalize does not write twice', async () => {
+    const collector = new EvalCollector('e2e', tmpDir);
+    collector.addTest(makeEntry());
+    const filepath1 = await collector.finalize();
+    const filepath2 = await collector.finalize();
+    expect(filepath1).toBeTruthy();
+    expect(filepath2).toBe(''); // second call returns empty
+    expect(fs.readdirSync(tmpDir).filter(f => f.endsWith('.json') && !f.startsWith('_partial'))).toHaveLength(1);
+  });
+  test('empty collector writes valid file', async () => {
+    const collector = new EvalCollector('llm-judge', tmpDir);
+    const filepath = await collector.finalize();
+    const data: EvalResult = JSON.parse(fs.readFileSync(filepath, 'utf-8'));
+    expect(data.total_tests).toBe(0);
+    expect(data.passed).toBe(0);
+    expect(data.tests).toHaveLength(0);
+    expect(data.tier).toBe('llm-judge');
+  });
+});
+// --- judgePassed tests ---
+describe('judgePassed', () => {
+  test('passes when all thresholds met', () => {
+    expect(judgePassed(
+      { detection_rate: 3, false_positives: 1, evidence_quality: 3 },
+      { minimum_detection: 2, max_false_positives: 2 },
+    )).toBe(true);
+  });
+  test('fails when detection rate below minimum', () => {
+    expect(judgePassed(
+      { detection_rate: 1, false_positives: 0, evidence_quality: 3 },
+      { minimum_detection: 2, max_false_positives: 2 },
+    )).toBe(false);
+  });
+  test('fails when too many false positives', () => {
+    expect(judgePassed(
+      { detection_rate: 3, false_positives: 3, evidence_quality: 3 },
+      { minimum_detection: 2, max_false_positives: 2 },
+    )).toBe(false);
+  });
+  test('fails when evidence quality below 2', () => {
+    expect(judgePassed(
+      { detection_rate: 3, false_positives: 0, evidence_quality: 1 },
+      { minimum_detection: 2, max_false_positives: 2 },
+    )).toBe(false);
+  });
+  test('passes at exact thresholds', () => {
+    expect(judgePassed(
+      { detection_rate: 2, false_positives: 2, evidence_quality: 2 },
+      { minimum_detection: 2, max_false_positives: 2 },
+    )).toBe(true);
+  });
+});
+// --- extractToolSummary tests ---
+describe('extractToolSummary', () => {
+  test('counts tool types from transcript events', () => {
+    const transcript = [
+      { type: 'system', subtype: 'init' },
+      { type: 'assistant', message: { content: [
+        { type: 'tool_use', name: 'Bash', input: {} },
+      ] } },
+      { type: 'user', tool_use_result: { stdout: '' } },
+      { type: 'assistant', message: { content: [
+        { type: 'text', text: 'ok' },
+        { type: 'tool_use', name: 'Read', input: {} },
+      ] } },
+      { type: 'assistant', message: { content: [
+        { type: 'tool_use', name: 'Bash', input: {} },
+        { type: 'tool_use', name: 'Write', input: {} },
+      ] } },
+    ];
+    const summary = extractToolSummary(transcript);
+    expect(summary).toEqual({ Bash: 2, Read: 1, Write: 1 });
+  });
+  test('returns empty object for empty transcript', () => {
+    expect(extractToolSummary([])).toEqual({});
+  });
+  test('handles events with no content array', () => {
+    const transcript = [
+      { type: 'assistant', message: {} },
+      { type: 'assistant' },
+    ];
+    expect(extractToolSummary(transcript)).toEqual({});
+  });
+});
+// --- findPreviousRun tests ---
+describe('findPreviousRun', () => {
+  test('finds correct file — same branch preferred, most recent', () => {
+    // Write three eval files
+    const files = [
+      { name: '0.3.5-main-e2e-20260312-100000.json', data: makeResult({ branch: 'main', timestamp: '2026-03-12T10:00:00Z' }) },
+      { name: '0.3.5-feature-e2e-20260313-100000.json', data: makeResult({ branch: 'feature', timestamp: '2026-03-13T10:00:00Z' }) },
+      { name: '0.3.6-feature-e2e-20260314-100000.json', data: makeResult({ branch: 'feature', timestamp: '2026-03-14T10:00:00Z' }) },
+    ];
+    for (const f of files) {
+      fs.writeFileSync(path.join(tmpDir, f.name), JSON.stringify(f.data));
+    }
+    // Should prefer feature branch (most recent on same branch)
+    const result = findPreviousRun(tmpDir, 'e2e', 'feature', path.join(tmpDir, 'current.json'));
+    expect(result).toContain('0.3.6-feature-e2e-20260314');
+  });
+  test('falls back to different branch when no same-branch match', () => {
+    const files = [
+      { name: '0.3.5-main-e2e-20260312-100000.json', data: makeResult({ branch: 'main', timestamp: '2026-03-12T10:00:00Z' }) },
+    ];
+    for (const f of files) {
+      fs.writeFileSync(path.join(tmpDir, f.name), JSON.stringify(f.data));
+    }
+    const result = findPreviousRun(tmpDir, 'e2e', 'new-branch', path.join(tmpDir, 'current.json'));
+    expect(result).toContain('0.3.5-main-e2e');
+  });
+  test('returns null when no prior runs exist', () => {
+    const result = findPreviousRun(tmpDir, 'e2e', 'main', path.join(tmpDir, 'current.json'));
+    expect(result).toBeNull();
+  });
+  test('returns null when directory does not exist', () => {
+    const result = findPreviousRun('/nonexistent/path', 'e2e', 'main', 'current.json');
+    expect(result).toBeNull();
+  });
+  test('excludes the current file from results', () => {
+    const filename = '0.3.6-main-e2e-20260314-100000.json';
+    fs.writeFileSync(
+      path.join(tmpDir, filename),
+      JSON.stringify(makeResult({ branch: 'main', timestamp: '2026-03-14T10:00:00Z' })),
+    );
+    const result = findPreviousRun(tmpDir, 'e2e', 'main', path.join(tmpDir, filename));
+    expect(result).toBeNull(); // only file is excluded
+  });
+  test('filters by tier', () => {
+    fs.writeFileSync(
+      path.join(tmpDir, '0.3.6-main-llm-judge-20260314-100000.json'),
+      JSON.stringify(makeResult({ tier: 'llm-judge', branch: 'main', timestamp: '2026-03-14T10:00:00Z' })),
+    );
+    const result = findPreviousRun(tmpDir, 'e2e', 'main', 'current.json');
+    expect(result).toBeNull(); // only llm-judge file, looking for e2e
+  });
+});
+// --- compareEvalResults tests ---
+describe('compareEvalResults', () => {
+  test('detects improved/regressed/unchanged per test', () => {
+    const before = makeResult({
+      tests: [
+        makeEntry({ name: 'test-a', passed: false }),
+        makeEntry({ name: 'test-b', passed: true }),
+        makeEntry({ name: 'test-c', passed: true }),
+      ],
+      total_tests: 3, passed: 2, failed: 1,
+    });
+    const after = makeResult({
+      tests: [
+        makeEntry({ name: 'test-a', passed: true }),   // improved
+        makeEntry({ name: 'test-b', passed: false }),  // regressed
+        makeEntry({ name: 'test-c', passed: true }),   // unchanged
+      ],
+      total_tests: 3, passed: 2, failed: 1,
+    });
+    const result = compareEvalResults(before, after, 'before.json', 'after.json');
+    expect(result.improved).toBe(1);
+    expect(result.regressed).toBe(1);
+    expect(result.unchanged).toBe(1);
+    expect(result.deltas.find(d => d.name === 'test-a')?.status_change).toBe('improved');
+    expect(result.deltas.find(d => d.name === 'test-b')?.status_change).toBe('regressed');
+    expect(result.deltas.find(d => d.name === 'test-c')?.status_change).toBe('unchanged');
+  });
+  test('handles tests present in one run but not the other', () => {
+    const before = makeResult({
+      tests: [
+        makeEntry({ name: 'old-test', passed: true }),
+        makeEntry({ name: 'shared', passed: true }),
+      ],
+    });
+    const after = makeResult({
+      tests: [
+        makeEntry({ name: 'shared', passed: true }),
+        makeEntry({ name: 'new-test', passed: true }),
+      ],
+    });
+    const result = compareEvalResults(before, after, 'before.json', 'after.json');
+    expect(result.deltas).toHaveLength(3); // shared + new-test + old-test (removed)
+    expect(result.deltas.find(d => d.name.includes('old-test'))?.name).toContain('removed');
+  });
+  test('computes cost and duration deltas', () => {
+    const before = makeResult({ total_cost_usd: 2.00, total_duration_ms: 60000 });
+    const after = makeResult({ total_cost_usd: 1.50, total_duration_ms: 45000 });
+    const result = compareEvalResults(before, after, 'a.json', 'b.json');
+    expect(result.total_cost_delta).toBe(-0.50);
+    expect(result.total_duration_delta).toBe(-15000);
+  });
+});
+// --- formatComparison tests ---
+describe('formatComparison', () => {
+  test('produces readable output with status arrows', () => {
+    const comparison: ComparisonResult = {
+      before_file: 'before.json',
+      after_file: 'after.json',
+      before_branch: 'main',
+      after_branch: 'feature',
+      before_timestamp: '2026-03-13T14:30:00Z',
+      after_timestamp: '2026-03-14T14:30:00Z',
+      deltas: [
+        {
+          name: 'browse basic',
+          before: { passed: true, cost_usd: 0.07, turns_used: 6, duration_ms: 24000, tool_summary: { Bash: 3 } },
+          after: { passed: true, cost_usd: 0.06, turns_used: 5, duration_ms: 19000, tool_summary: { Bash: 4 } },
+          status_change: 'unchanged',
+        },
+        {
+          name: 'planted bugs static',
+          before: { passed: false, cost_usd: 1.00, detection_rate: 3, tool_summary: {} },
+          after: { passed: true, cost_usd: 0.95, detection_rate: 4, tool_summary: {} },
+          status_change: 'improved',
+        },
+      ],
+      total_cost_delta: -0.06,
+      total_duration_delta: -5000,
+      improved: 1,
+      regressed: 0,
+      unchanged: 1,
+      tool_count_before: 3,
+      tool_count_after: 4,
+    };
+    const output = formatComparison(comparison);
+    expect(output).toContain('vs previous');
+    expect(output).toContain('main');
+    expect(output).toContain('1 improved');
+    expect(output).toContain('1 unchanged');
+    expect(output).toContain('↑'); // improved arrow
+    expect(output).toContain('='); // unchanged arrow
+    // Turns and duration deltas
+    expect(output).toContain('6→5t');
+    expect(output).toContain('24→19s');
+  });
+  test('includes commentary section', () => {
+    const comparison: ComparisonResult = {
+      before_file: 'a.json', after_file: 'b.json',
+      before_branch: 'main', after_branch: 'main',
+      before_timestamp: '2026-03-13T14:30:00Z',
+      after_timestamp: '2026-03-14T14:30:00Z',
+      deltas: [
+        {
+          name: 'test-a',
+          before: { passed: true, cost_usd: 0.50, turns_used: 20, duration_ms: 120000 },
+          after: { passed: true, cost_usd: 0.30, turns_used: 10, duration_ms: 60000 },
+          status_change: 'unchanged',
+        },
+        {
+          name: 'test-b',
+          before: { passed: true, cost_usd: 0.10, turns_used: 5, duration_ms: 20000 },
+          after: { passed: true, cost_usd: 0.10, turns_used: 5, duration_ms: 20000 },
+          status_change: 'unchanged',
+        },
+        {
+          name: 'test-c',
+          before: { passed: true, cost_usd: 0.10, turns_used: 5, duration_ms: 20000 },
+          after: { passed: true, cost_usd: 0.10, turns_used: 5, duration_ms: 20000 },
+          status_change: 'unchanged',
+        },
+      ],
+      total_cost_delta: -0.20,
+      total_duration_delta: -60000,
+      improved: 0, regressed: 0, unchanged: 3,
+      tool_count_before: 30, tool_count_after: 20,
+    };
+    const output = formatComparison(comparison);
+    expect(output).toContain('Takeaway');
+    expect(output).toContain('fewer turns');
+    expect(output).toContain('faster');
+  });
+});
+// --- generateCommentary tests ---
+describe('generateCommentary', () => {
+  test('flags regressions prominently', () => {
+    const c: ComparisonResult = {
+      before_file: 'a.json', after_file: 'b.json',
+      before_branch: 'main', after_branch: 'main',
+      before_timestamp: '', after_timestamp: '',
+      deltas: [{
+        name: 'critical-test',
+        before: { passed: true, cost_usd: 0.10 },
+        after: { passed: false, cost_usd: 0.10 },
+        status_change: 'regressed',
+      }],
+      total_cost_delta: 0, total_duration_delta: 0,
+      improved: 0, regressed: 1, unchanged: 0,
+      tool_count_before: 0, tool_count_after: 0,
+    };
+    const notes = generateCommentary(c);
+    expect(notes.some(n => n.includes('REGRESSION'))).toBe(true);
+    expect(notes.some(n => n.includes('critical-test'))).toBe(true);
+  });
+  test('notes improvements', () => {
+    const c: ComparisonResult = {
+      before_file: 'a.json', after_file: 'b.json',
+      before_branch: 'main', after_branch: 'main',
+      before_timestamp: '', after_timestamp: '',
+      deltas: [{
+        name: 'fixed-test',
+        before: { passed: false, cost_usd: 0.10 },
+        after: { passed: true, cost_usd: 0.10 },
+        status_change: 'improved',
+      }],
+      total_cost_delta: 0, total_duration_delta: 0,
+      improved: 1, regressed: 0, unchanged: 0,
+      tool_count_before: 0, tool_count_after: 0,
+    };
+    const notes = generateCommentary(c);
+    expect(notes.some(n => n.includes('Fixed'))).toBe(true);
+    expect(notes.some(n => n.includes('fixed-test'))).toBe(true);
+  });
+  test('reports efficiency gains for stable tests', () => {
+    const c: ComparisonResult = {
+      before_file: 'a.json', after_file: 'b.json',
+      before_branch: 'main', after_branch: 'main',
+      before_timestamp: '', after_timestamp: '',
+      deltas: [{
+        name: 'fast-test',
+        before: { passed: true, cost_usd: 0.50, turns_used: 20, duration_ms: 120000 },
+        after: { passed: true, cost_usd: 0.25, turns_used: 10, duration_ms: 60000 },
+        status_change: 'unchanged',
+      }],
+      total_cost_delta: -0.25, total_duration_delta: -60000,
+      improved: 0, regressed: 0, unchanged: 1,
+      tool_count_before: 0, tool_count_after: 0,
+    };
+    const notes = generateCommentary(c);
+    expect(notes.some(n => n.includes('fewer turns'))).toBe(true);
+    expect(notes.some(n => n.includes('faster'))).toBe(true);
+    expect(notes.some(n => n.includes('cheaper'))).toBe(true);
+  });
+  test('reports detection rate changes', () => {
+    const c: ComparisonResult = {
+      before_file: 'a.json', after_file: 'b.json',
+      before_branch: 'main', after_branch: 'main',
+      before_timestamp: '', after_timestamp: '',
+      deltas: [{
+        name: 'detection-test',
+        before: { passed: true, cost_usd: 0.50, detection_rate: 3 },
+        after: { passed: true, cost_usd: 0.50, detection_rate: 5 },
+        status_change: 'unchanged',
+      }],
+      total_cost_delta: 0, total_duration_delta: 0,
+      improved: 0, regressed: 0, unchanged: 1,
+      tool_count_before: 0, tool_count_after: 0,
+    };
+    const notes = generateCommentary(c);
+    expect(notes.some(n => n.includes('detecting 2 more bugs'))).toBe(true);
+  });
+  test('produces overall summary for 3+ tests with no regressions', () => {
+    const c: ComparisonResult = {
+      before_file: 'a.json', after_file: 'b.json',
+      before_branch: 'main', after_branch: 'main',
+      before_timestamp: '', after_timestamp: '',
+      deltas: [
+        { name: 'a', before: { passed: true, cost_usd: 0.50, turns_used: 10, duration_ms: 60000 },
+          after: { passed: true, cost_usd: 0.30, turns_used: 6, duration_ms: 40000 }, status_change: 'unchanged' },
+        { name: 'b', before: { passed: true, cost_usd: 0.20, turns_used: 5, duration_ms: 30000 },
+          after: { passed: true, cost_usd: 0.15, turns_used: 4, duration_ms: 25000 }, status_change: 'unchanged' },
+        { name: 'c', before: { passed: true, cost_usd: 0.10, turns_used: 3, duration_ms: 20000 },
+          after: { passed: true, cost_usd: 0.08, turns_used: 3, duration_ms: 18000 }, status_change: 'unchanged' },
+      ],
+      total_cost_delta: -0.27, total_duration_delta: -27000,
+      improved: 0, regressed: 0, unchanged: 3,
+      tool_count_before: 0, tool_count_after: 0,
+    };
+    const notes = generateCommentary(c);
+    expect(notes.some(n => n.includes('Overall'))).toBe(true);
+    expect(notes.some(n => n.includes('No regressions'))).toBe(true);
+  });
+  test('returns empty for stable run with no significant changes', () => {
+    const c: ComparisonResult = {
+      before_file: 'a.json', after_file: 'b.json',
+      before_branch: 'main', after_branch: 'main',
+      before_timestamp: '', after_timestamp: '',
+      deltas: [
+        { name: 'a', before: { passed: true, cost_usd: 0.10, turns_used: 5, duration_ms: 20000 },
+          after: { passed: true, cost_usd: 0.10, turns_used: 5, duration_ms: 21000 }, status_change: 'unchanged' },
+        { name: 'b', before: { passed: true, cost_usd: 0.10, turns_used: 5, duration_ms: 20000 },
+          after: { passed: true, cost_usd: 0.10, turns_used: 5, duration_ms: 20000 }, status_change: 'unchanged' },
+        { name: 'c', before: { passed: true, cost_usd: 0.10, turns_used: 5, duration_ms: 20000 },
+          after: { passed: true, cost_usd: 0.10, turns_used: 5, duration_ms: 20000 }, status_change: 'unchanged' },
+      ],
+      total_cost_delta: 0, total_duration_delta: 1000,
+      improved: 0, regressed: 0, unchanged: 3,
+      tool_count_before: 15, tool_count_after: 15,
+    };
+    const notes = generateCommentary(c);
+    expect(notes.some(n => n.includes('Stable run'))).toBe(true);
+  });
+});