npm - opengstack - Versions diffs - 0.13.10 → 0.14.2 - Mend

opengstack 0.13.10 → 0.14.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (189) hide show

package/AGENTS.md +4 -4
package/CLAUDE.md +127 -110
package/README.md +10 -5
package/SKILL.md +500 -70
package/bin/opengstack.js +69 -69
package/{skills/land-and-deploy/SKILL.md → commands/autoplan.md} +7 -25
package/{skills/benchmark/SKILL.md → commands/benchmark.md} +84 -108
package/{skills/browse/SKILL.md → commands/browse.md} +60 -81
package/{skills/ship/SKILL.md → commands/canary.md} +7 -27
package/{skills/careful/SKILL.md → commands/careful.md} +2 -22
package/{skills/canary/SKILL.md → commands/codex.md} +7 -26
package/{skills/connect-chrome/SKILL.md → commands/connect-chrome.md} +7 -24
package/commands/cso.md +70 -0
package/commands/design-consultation.md +70 -0
package/commands/design-review.md +70 -0
package/commands/design-shotgun.md +70 -0
package/commands/document-release.md +70 -0
package/{skills/freeze/SKILL.md → commands/freeze.md} +3 -29
package/{skills/guard/SKILL.md → commands/guard.md} +4 -35
package/commands/investigate.md +70 -0
package/commands/land-and-deploy.md +70 -0
package/commands/office-hours.md +70 -0
package/{skills/gstack-upgrade/SKILL.md → commands/opengstack-upgrade.md} +64 -79
package/commands/plan-ceo-review.md +70 -0
package/commands/plan-design-review.md +70 -0
package/commands/plan-eng-review.md +70 -0
package/commands/qa-only.md +70 -0
package/commands/qa.md +70 -0
package/commands/retro.md +70 -0
package/commands/review.md +70 -0
package/{skills/setup-browser-cookies/SKILL.md → commands/setup-browser-cookies.md} +22 -40
package/commands/setup-deploy.md +70 -0
package/commands/ship.md +70 -0
package/commands/unfreeze.md +25 -0
package/docs/designs/CHROME_VS_CHROMIUM_EXPLORATION.md +9 -9
package/docs/designs/CONDUCTOR_CHROME_SIDEBAR_INTEGRATION.md +2 -2
package/docs/designs/CONDUCTOR_SESSION_API.md +16 -16
package/docs/designs/DESIGN_SHOTGUN.md +74 -74
package/docs/designs/DESIGN_TOOLS_V1.md +111 -111
package/docs/skills.md +483 -202
package/package.json +42 -43
package/scripts/analytics.ts +188 -0
package/scripts/dev-skill.ts +83 -0
package/scripts/discover-skills.ts +39 -0
package/scripts/eval-compare.ts +97 -0
package/scripts/eval-list.ts +117 -0
package/scripts/eval-select.ts +86 -0
package/scripts/eval-summary.ts +188 -0
package/scripts/eval-watch.ts +172 -0
package/scripts/gen-skill-docs.ts +473 -0
package/scripts/resolvers/browse.ts +129 -0
package/scripts/resolvers/codex-helpers.ts +133 -0
package/scripts/resolvers/composition.ts +48 -0
package/scripts/resolvers/confidence.ts +37 -0
package/scripts/resolvers/constants.ts +50 -0
package/scripts/resolvers/design.ts +950 -0
package/scripts/resolvers/index.ts +59 -0
package/scripts/resolvers/learnings.ts +96 -0
package/scripts/resolvers/preamble.ts +505 -0
package/scripts/resolvers/review.ts +884 -0
package/scripts/resolvers/testing.ts +573 -0
package/scripts/resolvers/types.ts +45 -0
package/scripts/resolvers/utility.ts +421 -0
package/scripts/skill-check.ts +190 -0
package/scripts/cleanup.py +0 -100
package/scripts/filter-skills.sh +0 -114
package/scripts/filter_skills.py +0 -164
package/scripts/install-skills.js +0 -60
package/skills/autoplan/SKILL.md +0 -96
package/skills/autoplan/SKILL.md.tmpl +0 -694
package/skills/benchmark/SKILL.md.tmpl +0 -222
package/skills/browse/SKILL.md.tmpl +0 -131
package/skills/browse/bin/find-browse +0 -21
package/skills/browse/bin/remote-slug +0 -14
package/skills/browse/scripts/build-node-server.sh +0 -48
package/skills/browse/src/activity.ts +0 -208
package/skills/browse/src/browser-manager.ts +0 -959
package/skills/browse/src/buffers.ts +0 -137
package/skills/browse/src/bun-polyfill.cjs +0 -109
package/skills/browse/src/cli.ts +0 -678
package/skills/browse/src/commands.ts +0 -128
package/skills/browse/src/config.ts +0 -150
package/skills/browse/src/cookie-import-browser.ts +0 -625
package/skills/browse/src/cookie-picker-routes.ts +0 -230
package/skills/browse/src/cookie-picker-ui.ts +0 -688
package/skills/browse/src/find-browse.ts +0 -61
package/skills/browse/src/meta-commands.ts +0 -550
package/skills/browse/src/platform.ts +0 -17
package/skills/browse/src/read-commands.ts +0 -358
package/skills/browse/src/server.ts +0 -1192
package/skills/browse/src/sidebar-agent.ts +0 -280
package/skills/browse/src/sidebar-utils.ts +0 -21
package/skills/browse/src/snapshot.ts +0 -407
package/skills/browse/src/url-validation.ts +0 -95
package/skills/browse/src/write-commands.ts +0 -364
package/skills/browse/test/activity.test.ts +0 -120
package/skills/browse/test/adversarial-security.test.ts +0 -32
package/skills/browse/test/browser-manager-unit.test.ts +0 -17
package/skills/browse/test/bun-polyfill.test.ts +0 -72
package/skills/browse/test/commands.test.ts +0 -2075
package/skills/browse/test/compare-board.test.ts +0 -342
package/skills/browse/test/config.test.ts +0 -316
package/skills/browse/test/cookie-import-browser.test.ts +0 -519
package/skills/browse/test/cookie-picker-routes.test.ts +0 -260
package/skills/browse/test/file-drop.test.ts +0 -271
package/skills/browse/test/find-browse.test.ts +0 -50
package/skills/browse/test/findport.test.ts +0 -191
package/skills/browse/test/fixtures/basic.html +0 -33
package/skills/browse/test/fixtures/cursor-interactive.html +0 -22
package/skills/browse/test/fixtures/dialog.html +0 -15
package/skills/browse/test/fixtures/empty.html +0 -2
package/skills/browse/test/fixtures/forms.html +0 -55
package/skills/browse/test/fixtures/iframe.html +0 -30
package/skills/browse/test/fixtures/network-idle.html +0 -30
package/skills/browse/test/fixtures/qa-eval-checkout.html +0 -108
package/skills/browse/test/fixtures/qa-eval-spa.html +0 -98
package/skills/browse/test/fixtures/qa-eval.html +0 -51
package/skills/browse/test/fixtures/responsive.html +0 -49
package/skills/browse/test/fixtures/snapshot.html +0 -55
package/skills/browse/test/fixtures/spa.html +0 -24
package/skills/browse/test/fixtures/states.html +0 -17
package/skills/browse/test/fixtures/upload.html +0 -25
package/skills/browse/test/gstack-config.test.ts +0 -138
package/skills/browse/test/gstack-update-check.test.ts +0 -514
package/skills/browse/test/handoff.test.ts +0 -235
package/skills/browse/test/path-validation.test.ts +0 -91
package/skills/browse/test/platform.test.ts +0 -37
package/skills/browse/test/server-auth.test.ts +0 -65
package/skills/browse/test/sidebar-agent-roundtrip.test.ts +0 -226
package/skills/browse/test/sidebar-agent.test.ts +0 -199
package/skills/browse/test/sidebar-integration.test.ts +0 -320
package/skills/browse/test/sidebar-unit.test.ts +0 -96
package/skills/browse/test/snapshot.test.ts +0 -467
package/skills/browse/test/state-ttl.test.ts +0 -35
package/skills/browse/test/test-server.ts +0 -57
package/skills/browse/test/url-validation.test.ts +0 -72
package/skills/browse/test/watch.test.ts +0 -129
package/skills/canary/SKILL.md.tmpl +0 -212
package/skills/careful/SKILL.md.tmpl +0 -56
package/skills/careful/bin/check-careful.sh +0 -112
package/skills/codex/SKILL.md +0 -90
package/skills/codex/SKILL.md.tmpl +0 -417
package/skills/connect-chrome/SKILL.md.tmpl +0 -195
package/skills/cso/ACKNOWLEDGEMENTS.md +0 -14
package/skills/cso/SKILL.md +0 -93
package/skills/cso/SKILL.md.tmpl +0 -606
package/skills/design-consultation/SKILL.md +0 -94
package/skills/design-consultation/SKILL.md.tmpl +0 -415
package/skills/design-review/SKILL.md +0 -94
package/skills/design-review/SKILL.md.tmpl +0 -290
package/skills/design-shotgun/SKILL.md +0 -91
package/skills/design-shotgun/SKILL.md.tmpl +0 -285
package/skills/document-release/SKILL.md +0 -91
package/skills/document-release/SKILL.md.tmpl +0 -359
package/skills/freeze/SKILL.md.tmpl +0 -77
package/skills/freeze/bin/check-freeze.sh +0 -79
package/skills/gstack-upgrade/SKILL.md.tmpl +0 -222
package/skills/guard/SKILL.md.tmpl +0 -77
package/skills/investigate/SKILL.md +0 -105
package/skills/investigate/SKILL.md.tmpl +0 -194
package/skills/land-and-deploy/SKILL.md.tmpl +0 -881
package/skills/office-hours/SKILL.md +0 -96
package/skills/office-hours/SKILL.md.tmpl +0 -645
package/skills/plan-ceo-review/SKILL.md +0 -94
package/skills/plan-ceo-review/SKILL.md.tmpl +0 -811
package/skills/plan-design-review/SKILL.md +0 -92
package/skills/plan-design-review/SKILL.md.tmpl +0 -446
package/skills/plan-eng-review/SKILL.md +0 -93
package/skills/plan-eng-review/SKILL.md.tmpl +0 -303
package/skills/qa/SKILL.md +0 -95
package/skills/qa/SKILL.md.tmpl +0 -316
package/skills/qa/references/issue-taxonomy.md +0 -85
package/skills/qa/templates/qa-report-template.md +0 -126
package/skills/qa-only/SKILL.md +0 -89
package/skills/qa-only/SKILL.md.tmpl +0 -101
package/skills/retro/SKILL.md +0 -89
package/skills/retro/SKILL.md.tmpl +0 -820
package/skills/review/SKILL.md +0 -92
package/skills/review/SKILL.md.tmpl +0 -281
package/skills/review/TODOS-format.md +0 -62
package/skills/review/checklist.md +0 -220
package/skills/review/design-checklist.md +0 -132
package/skills/review/greptile-triage.md +0 -220
package/skills/setup-browser-cookies/SKILL.md.tmpl +0 -81
package/skills/setup-deploy/SKILL.md +0 -92
package/skills/setup-deploy/SKILL.md.tmpl +0 -215
package/skills/ship/SKILL.md.tmpl +0 -636
package/skills/unfreeze/SKILL.md +0 -37
package/skills/unfreeze/SKILL.md.tmpl +0 -36

package/scripts/eval-select.ts ADDED Viewed

@@ -0,0 +1,86 @@
+#!/usr/bin/env bun
+/**
+ * Show which E2E and LLM-judge tests would run based on the current git diff.
+ *
+ * Usage:
+ * bun run eval:select # human-readable output
+ * bun run eval:select --json # machine-readable JSON
+ * bun run eval:select --base main # override base branch
+ */
+import * as path from 'path';
+import {
+ selectTests,
+ detectBaseBranch,
+ getChangedFiles,
+ E2E_TOUCHFILES,
+ LLM_JUDGE_TOUCHFILES,
+ GLOBAL_TOUCHFILES,
+} from '../test/helpers/touchfiles';
+const ROOT = path.resolve(import.meta.dir, '..');
+const args = process.argv.slice(2);
+const jsonMode = args.includes('--json');
+const baseIdx = args.indexOf('--base');
+const baseOverride = baseIdx >= 0 ? args[baseIdx + 1] : undefined;
+// Detect base branch
+const baseBranch = baseOverride || detectBaseBranch(ROOT) || 'main';
+const changedFiles = getChangedFiles(baseBranch, ROOT);
+if (changedFiles.length === 0) {
+ if (jsonMode) {
+ console.log(JSON.stringify({ base: baseBranch, changed_files: 0, e2e: 'all', llm_judge: 'all', reason: 'no diff — would run all tests' }));
+ } else {
+ console.log(`Base: ${baseBranch}`);
+ console.log('No changed files detected — all tests would run.');
+ }
+ process.exit(0);
+}
+const e2eSelection = selectTests(changedFiles, E2E_TOUCHFILES, GLOBAL_TOUCHFILES);
+const llmSelection = selectTests(changedFiles, LLM_JUDGE_TOUCHFILES, GLOBAL_TOUCHFILES);
+if (jsonMode) {
+ console.log(JSON.stringify({
+ base: baseBranch,
+ changed_files: changedFiles,
+ e2e: {
+ selected: e2eSelection.selected,
+ skipped: e2eSelection.skipped,
+ reason: e2eSelection.reason,
+ count: `${e2eSelection.selected.length}/${Object.keys(E2E_TOUCHFILES).length}`,
+ },
+ llm_judge: {
+ selected: llmSelection.selected,
+ skipped: llmSelection.skipped,
+ reason: llmSelection.reason,
+ count: `${llmSelection.selected.length}/${Object.keys(LLM_JUDGE_TOUCHFILES).length}`,
+ },
+ }, null, 2));
+} else {
+ console.log(`Base: ${baseBranch}`);
+ console.log(`Changed files: ${changedFiles.length}`);
+ console.log();
+ console.log(`E2E (${e2eSelection.reason}): ${e2eSelection.selected.length}/${Object.keys(E2E_TOUCHFILES).length} tests`);
+ if (e2eSelection.selected.length > 0 && e2eSelection.selected.length < Object.keys(E2E_TOUCHFILES).length) {
+ console.log(` Selected: ${e2eSelection.selected.join(', ')}`);
+ console.log(` Skipped: ${e2eSelection.skipped.join(', ')}`);
+ } else if (e2eSelection.selected.length === 0) {
+ console.log(' No E2E tests affected.');
+ } else {
+ console.log(' All E2E tests selected.');
+ }
+ console.log();
+ console.log(`LLM-judge (${llmSelection.reason}): ${llmSelection.selected.length}/${Object.keys(LLM_JUDGE_TOUCHFILES).length} tests`);
+ if (llmSelection.selected.length > 0 && llmSelection.selected.length < Object.keys(LLM_JUDGE_TOUCHFILES).length) {
+ console.log(` Selected: ${llmSelection.selected.join(', ')}`);
+ console.log(` Skipped: ${llmSelection.skipped.join(', ')}`);
+ } else if (llmSelection.selected.length === 0) {
+ console.log(' No LLM-judge tests affected.');
+ } else {
+ console.log(' All LLM-judge tests selected.');
+ }
+}

package/scripts/eval-summary.ts ADDED Viewed

@@ -0,0 +1,188 @@
+#!/usr/bin/env bun
+/**
+ * Aggregate summary of all eval runs from ~/.opengstack-dev/evals/
+ *
+ * Usage: bun run eval:summary
+ */
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+import type { EvalResult } from '../test/helpers/eval-store';
+import { getProjectEvalDir } from '../test/helpers/eval-store';
+const EVAL_DIR = getProjectEvalDir();
+let files: string[];
+try {
+ files = fs.readdirSync(EVAL_DIR).filter(f => f.endsWith('.json'));
+} catch {
+ console.log('No eval runs yet. Run: EVALS=1 bun run test:evals');
+ process.exit(0);
+}
+if (files.length === 0) {
+ console.log('No eval runs yet. Run: EVALS=1 bun run test:evals');
+ process.exit(0);
+}
+// Load all results
+const results: EvalResult[] = [];
+for (const file of files) {
+ try {
+ results.push(JSON.parse(fs.readFileSync(path.join(EVAL_DIR, file), 'utf-8')));
+ } catch { continue; }
+}
+// Aggregate stats
+const e2eRuns = results.filter(r => r.tier === 'e2e');
+const judgeRuns = results.filter(r => r.tier === 'llm-judge');
+const totalCost = results.reduce((s, r) => s + (r.total_cost_usd || 0), 0);
+const avgE2ECost = e2eRuns.length > 0 ? e2eRuns.reduce((s, r) => s + r.total_cost_usd, 0) / e2eRuns.length : 0;
+const avgJudgeCost = judgeRuns.length > 0 ? judgeRuns.reduce((s, r) => s + r.total_cost_usd, 0) / judgeRuns.length : 0;
+// Duration + turns from E2E runs
+const avgE2EDuration = e2eRuns.length > 0
+ ? e2eRuns.reduce((s, r) => s + (r.total_duration_ms || 0), 0) / e2eRuns.length
+ : 0;
+const e2eTurns: number[] = [];
+for (const r of e2eRuns) {
+ const runTurns = r.tests.reduce((s, t) => s + (t.turns_used || 0), 0);
+ if (runTurns > 0) e2eTurns.push(runTurns);
+}
+const avgE2ETurns = e2eTurns.length > 0
+ ? e2eTurns.reduce((a, b) => a + b, 0) / e2eTurns.length
+ : 0;
+// Per-test efficiency stats (avg turns + duration across runs)
+const testEfficiency = new Map<string, { turns: number[]; durations: number[]; costs: number[] }>();
+for (const r of e2eRuns) {
+ for (const t of r.tests) {
+ if (!testEfficiency.has(t.name)) {
+ testEfficiency.set(t.name, { turns: [], durations: [], costs: [] });
+ }
+ const stats = testEfficiency.get(t.name)!;
+ if (t.turns_used !== undefined) stats.turns.push(t.turns_used);
+ if (t.duration_ms > 0) stats.durations.push(t.duration_ms);
+ if (t.cost_usd > 0) stats.costs.push(t.cost_usd);
+ }
+}
+// Detection rates from outcome evals
+const detectionRates: number[] = [];
+for (const r of e2eRuns) {
+ for (const t of r.tests) {
+ if (t.detection_rate !== undefined) {
+ detectionRates.push(t.detection_rate);
+ }
+ }
+}
+const avgDetection = detectionRates.length > 0
+ ? detectionRates.reduce((a, b) => a + b, 0) / detectionRates.length
+ : null;
+// Flaky tests (passed in some runs, failed in others)
+const testResults = new Map<string, boolean[]>();
+for (const r of results) {
+ for (const t of r.tests) {
+ const key = `${r.tier}:${t.name}`;
+ if (!testResults.has(key)) testResults.set(key, []);
+ testResults.get(key)!.push(t.passed);
+ }
+}
+const flakyTests: string[] = [];
+for (const [name, outcomes] of testResults) {
+ if (outcomes.length >= 2) {
+ const hasPass = outcomes.some(o => o);
+ const hasFail = outcomes.some(o => !o);
+ if (hasPass && hasFail) flakyTests.push(name);
+ }
+}
+// Branch stats
+const branchStats = new Map<string, { runs: number; avgDetection: number; detections: number[] }>();
+for (const r of e2eRuns) {
+ if (!branchStats.has(r.branch)) {
+ branchStats.set(r.branch, { runs: 0, avgDetection: 0, detections: [] });
+ }
+ const stats = branchStats.get(r.branch)!;
+ stats.runs++;
+ for (const t of r.tests) {
+ if (t.detection_rate !== undefined) {
+ stats.detections.push(t.detection_rate);
+ }
+ }
+}
+for (const stats of branchStats.values()) {
+ stats.avgDetection = stats.detections.length > 0
+ ? stats.detections.reduce((a, b) => a + b, 0) / stats.detections.length
+ : 0;
+}
+// Print summary
+console.log('');
+console.log('Eval Summary');
+console.log('═'.repeat(70));
+console.log(` Total runs: ${results.length} (${e2eRuns.length} e2e, ${judgeRuns.length} llm-judge)`);
+console.log(` Total spend: $${totalCost.toFixed(2)}`);
+console.log(` Avg cost/e2e: $${avgE2ECost.toFixed(2)}`);
+console.log(` Avg cost/judge: $${avgJudgeCost.toFixed(2)}`);
+if (avgE2EDuration > 0) {
+ console.log(` Avg duration/e2e: ${Math.round(avgE2EDuration / 1000)}s`);
+}
+if (avgE2ETurns > 0) {
+ console.log(` Avg turns/e2e: ${Math.round(avgE2ETurns)}`);
+}
+if (avgDetection !== null) {
+ console.log(` Avg detection: ${avgDetection.toFixed(1)} bugs`);
+}
+console.log('─'.repeat(70));
+// Per-test efficiency averages (only if we have enough data)
+if (testEfficiency.size > 0 && e2eRuns.length >= 2) {
+ console.log(' Per-test efficiency (averages across runs):');
+ const sorted = [...testEfficiency.entries()]
+ .filter(([, s]) => s.turns.length >= 2)
+ .sort((a, b) => {
+ const avgA = a[1].costs.reduce((s, c) => s + c, 0) / a[1].costs.length;
+ const avgB = b[1].costs.reduce((s, c) => s + c, 0) / b[1].costs.length;
+ return avgB - avgA;
+ });
+ for (const [name, stats] of sorted) {
+ const avgT = Math.round(stats.turns.reduce((a, b) => a + b, 0) / stats.turns.length);
+ const avgD = Math.round(stats.durations.reduce((a, b) => a + b, 0) / stats.durations.length / 1000);
+ const avgC = (stats.costs.reduce((a, b) => a + b, 0) / stats.costs.length).toFixed(2);
+ const label = name.length > 30 ? name.slice(0, 27) + '...' : name.padEnd(30);
+ console.log(` ${label} $${avgC} ${avgT}t ${avgD}s (${stats.turns.length} runs)`);
+ }
+ console.log('─'.repeat(70));
+}
+if (flakyTests.length > 0) {
+ console.log(` Flaky tests (${flakyTests.length}):`);
+ for (const name of flakyTests) {
+ console.log(` - ${name}`);
+ }
+ console.log('─'.repeat(70));
+}
+if (branchStats.size > 0) {
+ console.log(' Branches:');
+ const sorted = [...branchStats.entries()].sort((a, b) => b[1].avgDetection - a[1].avgDetection);
+ for (const [branch, stats] of sorted) {
+ const det = stats.detections.length > 0 ? ` avg det: ${stats.avgDetection.toFixed(1)}` : '';
+ console.log(` ${branch.padEnd(30)} ${stats.runs} runs${det}`);
+ }
+ console.log('─'.repeat(70));
+}
+// Date range
+const timestamps = results.map(r => r.timestamp).filter(Boolean).sort();
+if (timestamps.length > 0) {
+ const first = timestamps[0].replace('T', ' ').slice(0, 16);
+ const last = timestamps[timestamps.length - 1].replace('T', ' ').slice(0, 16);
+ console.log(` Date range: ${first} → ${last}`);
+}
+console.log(` Dir: ${EVAL_DIR}`);
+console.log('');

package/scripts/eval-watch.ts ADDED Viewed

@@ -0,0 +1,172 @@
+/**
+ * Live E2E test watcher dashboard.
+ *
+ * Reads heartbeat (e2e-live.json) for current test status and
+ * partial eval results (_partial-e2e.json) for completed tests.
+ * Renders a terminal dashboard every 1s.
+ *
+ * Usage: bun run eval:watch [--tail]
+ */
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+const OpenGStack_DEV_DIR = path.join(os.homedir(), '.opengstack-dev');
+const HEARTBEAT_PATH = path.join(OpenGStack_DEV_DIR, 'e2e-live.json');
+const PARTIAL_PATH = path.join(OpenGStack_DEV_DIR, 'evals', '_partial-e2e.json');
+const STALE_THRESHOLD_SEC = 600; // 10 minutes
+export interface HeartbeatData {
+ runId: string;
+ pid?: number;
+ startedAt: string;
+ currentTest: string;
+ status: string;
+ turn: number;
+ toolCount: number;
+ lastTool: string;
+ lastToolAt: string;
+ elapsedSec: number;
+}
+export interface PartialData {
+ tests: Array<{
+ name: string;
+ passed: boolean;
+ cost_usd: number;
+ duration_ms: number;
+ turns_used?: number;
+ exit_reason?: string;
+ }>;
+ total_cost_usd: number;
+ _partial?: boolean;
+}
+/** Read and parse a JSON file, returning null on any error. */
+function readJSON<T>(filePath: string): T | null {
+ try {
+ return JSON.parse(fs.readFileSync(filePath, 'utf-8'));
+ } catch {
+ return null;
+ }
+}
+/** Check if a process is alive (signal 0 = existence check, doesn't kill). */
+function isProcessAlive(pid: number): boolean {
+ try {
+ process.kill(pid, 0);
+ return true;
+ } catch {
+ return false;
+ }
+}
+/** Format seconds as Xm Ys */
+function formatDuration(sec: number): string {
+ if (sec < 60) return `${sec}s`;
+ const m = Math.floor(sec / 60);
+ const s = sec % 60;
+ return `${m}m ${s}s`;
+}
+/** Render dashboard from heartbeat + partial data. Pure function for testability. */
+export function renderDashboard(heartbeat: HeartbeatData | null, partial: PartialData | null): string {
+ const lines: string[] = [];
+ if (!heartbeat && !partial) {
+ lines.push('E2E Watch — No active run detected');
+ lines.push('');
+ lines.push(`Heartbeat: ${HEARTBEAT_PATH} (not found)`);
+ lines.push(`Partial: ${PARTIAL_PATH} (not found)`);
+ lines.push('');
+ lines.push('Start a run with: EVALS=1 bun test test/skill-e2e-*.test.ts');
+ return lines.join('\n');
+ }
+ const runId = heartbeat?.runId || 'unknown';
+ const elapsed = heartbeat?.elapsedSec || 0;
+ lines.push(`E2E Watch \u2014 Run ${runId} \u2014 ${formatDuration(elapsed)}`);
+ lines.push('\u2550'.repeat(55));
+ // Completed tests from partial
+ if (partial?.tests) {
+ for (const t of partial.tests) {
+ const icon = t.passed ? '\u2713' : '\u2717';
+ const cost = `$${t.cost_usd.toFixed(2)}`;
+ const dur = `${Math.round(t.duration_ms / 1000)}s`;
+ const turns = t.turns_used !== undefined ? `${t.turns_used} turns` : '';
+ const name = t.name.length > 30 ? t.name.slice(0, 27) + '...' : t.name.padEnd(30);
+ lines.push(` ${icon} ${name} ${cost.padStart(6)} ${dur.padStart(5)} ${turns}`);
+ }
+ }
+ // Current test from heartbeat
+ if (heartbeat && heartbeat.status === 'running') {
+ const name = heartbeat.currentTest.length > 30
+ ? heartbeat.currentTest.slice(0, 27) + '...'
+ : heartbeat.currentTest.padEnd(30);
+ lines.push(` \u29D6 ${name} ${formatDuration(heartbeat.elapsedSec).padStart(6)} turn ${heartbeat.turn} last: ${heartbeat.lastTool}`);
+ // Stale detection
+ const lastToolTime = new Date(heartbeat.lastToolAt).getTime();
+ const staleSec = Math.round((Date.now() - lastToolTime) / 1000);
+ if (staleSec > STALE_THRESHOLD_SEC) {
+ lines.push(` \u26A0 STALE: last tool call was ${formatDuration(staleSec)} ago \u2014 run may have crashed`);
+ }
+ }
+ lines.push('\u2500'.repeat(55));
+ // Summary
+ const completedCount = partial?.tests?.length || 0;
+ const totalCost = partial?.total_cost_usd || 0;
+ const running = heartbeat?.status === 'running' ? 1 : 0;
+ lines.push(` Completed: ${completedCount} Running: ${running} Cost: $${totalCost.toFixed(2)} Elapsed: ${formatDuration(elapsed)}`);
+ if (heartbeat?.runId) {
+ const logPath = path.join(OpenGStack_DEV_DIR, 'e2e-runs', heartbeat.runId, 'progress.log');
+ lines.push(` Logs: ${logPath}`);
+ }
+ return lines.join('\n');
+}
+// --- Main ---
+if (import.meta.main) {
+ const showTail = process.argv.includes('--tail');
+ const render = () => {
+ let heartbeat = readJSON<HeartbeatData>(HEARTBEAT_PATH);
+ const partial = readJSON<PartialData>(PARTIAL_PATH);
+ // Auto-clear heartbeat if the process is dead
+ if (heartbeat?.pid && !isProcessAlive(heartbeat.pid)) {
+ try { fs.unlinkSync(HEARTBEAT_PATH); } catch { /* already gone */ }
+ process.stdout.write('\x1B[2J\x1B[H');
+ process.stdout.write(`Cleared stale heartbeat — PID ${heartbeat.pid} is no longer running.\n\n`);
+ heartbeat = null;
+ }
+ // Clear screen
+ process.stdout.write('\x1B[2J\x1B[H');
+ process.stdout.write(renderDashboard(heartbeat, partial) + '\n');
+ // --tail: show last 10 lines of progress.log
+ if (showTail && heartbeat?.runId) {
+ const logPath = path.join(OpenGStack_DEV_DIR, 'e2e-runs', heartbeat.runId, 'progress.log');
+ try {
+ const content = fs.readFileSync(logPath, 'utf-8');
+ const tail = content.split('\n').filter(l => l.trim()).slice(-10);
+ process.stdout.write('\nRecent progress:\n');
+ for (const line of tail) {
+ process.stdout.write(line + '\n');
+ }
+ } catch { /* log file may not exist yet */ }
+ }
+ };
+ render();
+ setInterval(render, 1000);
+}