npm - @aion0/forge - Versions diffs - 0.10.79 → 0.10.81 - Mend

@aion0/forge 0.10.79 → 0.10.81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/RELEASE_NOTES.md +4 -5
package/app/api/tasks/[id]/hook/stop/route.ts +15 -0
package/app/api/tasks/route.ts +2 -1
package/cli/mw.mjs +7 -5
package/cli/mw.ts +8 -6
package/components/Dashboard.tsx +61 -28
package/components/InlinePipelineView.tsx +22 -5
package/components/PipelineHistory.tsx +306 -0
package/components/TaskDetail.tsx +28 -1
package/components/TmuxTaskTerminal.tsx +105 -0
package/components/WebTerminal.tsx +7 -0
package/docs/design_automation_records/Automation Redesign.dc.html +2019 -0
package/docs/design_automation_records/README.md +232 -0
package/lib/chat/agent-loop.ts +6 -0
package/lib/chat/tool-dispatcher.ts +110 -9
package/lib/help-docs/05-pipelines.md +31 -0
package/lib/help-docs/25-chat-tools.md +23 -0
package/lib/pipeline.ts +27 -3
package/lib/task-manager.ts +73 -3
package/lib/task-tmux-backend.ts +625 -0
package/lib/workspace/skill-installer.ts +18 -8
package/package.json +1 -1
package/proxy.ts +5 -4
package/src/core/db/database.ts +1 -0
package/src/types/index.ts +3 -0

package/lib/task-tmux-backend.ts ADDED Viewed

@@ -0,0 +1,625 @@
+/**
+ * Tmux task backend — runs `claude` (interactive mode, no -p) inside a dedicated
+ * tmux session, injects the prompt via paste-buffer, and captures the response.
+ *
+ * Why interactive (no -p)?
+ * Interactive mode uses ~/.claude/ OAuth credentials (subscription billing).
+ * `-p` / headless mode uses ANTHROPIC_API_KEY (API billing). By starting claude
+ * the same way a user would in a terminal, the session runs under the logged-in
+ * subscription account.
+ *
+ * Completion detection: Claude Code's Stop hook fires when a turn finishes
+ * (including all tool calls). We write a task-context.json file to the project
+ * directory so the hook script knows which task just completed. The hook POSTs
+ * to /api/tasks/{id}/hook/stop; that endpoint resolves the awaited promise here.
+ * A 2-hour timeout acts as fallback if the hook never fires.
+ *
+ * Flow:
+ *   1. Write task-context.json to {projectPath}/.forge/ (hook routing info)
+ *   2. Create tmux session fgt-{id} in project directory
+ *   3. Inject secret env vars via `tmux set-environment` (out-of-band)
+ *   4. Run launch script: eval env, then exec `claude` (interactive, no -p)
+ *   5. Wait up to 60s for claude's TUI input prompt to appear
+ *   6. Inject prompt via paste-buffer (handles any length / special chars)
+ *   7. Send Enter; await Stop hook POST (or 2h timeout / cancellation)
+ *   8. Capture full pane history, strip ANSI, store as result
+ *   9. Send `/exit`; remove task-context.json
+ *  10. Session stays alive until deleteTask (for post-mortem debugging)
+ *
+ * Run dir: <dataDir>/tmux/sessions/{taskId}/
+ *   launch.sh   — env-sourcing wrapper that execs claude
+ *   prompt.txt  — prompt text (newlines joined to avoid multi-submit)
+ *   output.txt  — final stripped pane capture written after completion
+ */
+import { execSync, spawn } from 'node:child_process';
+import * as fs from 'node:fs';
+import { join } from 'node:path';
+import type { Task, TaskLogEntry } from '../src/types';
+import { resolveTerminalLaunch } from './agents/index';
+import { connectorEnv } from './task-manager';
+import { getDataDir } from './dirs';
+// ─── Hook waiter registry ────────────────────────────────────
+// Keyed by taskId; resolved when /api/tasks/{id}/hook/stop fires or session dies.
+const _hookWaiters = new Map<string, (outcome: 'done' | 'session_died') => void>();
+/** Returns true if a live waiter was found and resolved; false if no waiter (e.g. server restart). */
+export function fireTmuxHook(taskId: string): boolean {
+  const resolve = _hookWaiters.get(taskId);
+  if (resolve) {
+    _hookWaiters.delete(taskId);
+    resolve('done');
+    return true;
+  }
+  return false;
+}
+/** Signal session death to the waiter (tmux session exited unexpectedly). */
+function fireSessionDied(taskId: string): void {
+  const resolve = _hookWaiters.get(taskId);
+  if (resolve) {
+    _hookWaiters.delete(taskId);
+    resolve('session_died');
+  }
+}
+/**
+ * Fallback completion for tmux tasks whose waiter was lost (e.g. server restart mid-task).
+ * Captures the pane, writes the result, and marks the task done via the DB helper exported
+ * from task-manager.
+ */
+export function completeStaleTmuxTask(taskId: string): boolean {
+  const { getTask, finishTmuxTask } = require('./task-manager') as typeof import('./task-manager');
+  const task = getTask(taskId);
+  if (!task || (task as any).backend !== 'tmux' || task.status !== 'running') return false;
+  const sessionName = tmuxSessionName(taskId);
+  const runDir = taskRunDir(taskId);
+  const outputPath = join(runDir, 'output.txt');
+  const response = stripAnsi(capturePane(sessionName));
+  try { fs.writeFileSync(outputPath, response, 'utf8'); } catch {}
+  finishTmuxTask(taskId, response);
+  return true;
+}
+function waitForHookOrTimeout(
+  taskId: string,
+  isCancelled: () => boolean,
+  maxWaitMs: number,
+): Promise<'done' | 'cancelled' | 'timeout' | 'session_died'> {
+  return new Promise((resolve) => {
+    const cleanup = (result: 'done' | 'cancelled' | 'timeout' | 'session_died') => {
+      _hookWaiters.delete(taskId);
+      clearInterval(cancelPoll);
+      clearTimeout(timeoutTimer);
+      resolve(result);
+    };
+    _hookWaiters.set(taskId, (outcome) => cleanup(outcome));
+    const cancelPoll = setInterval(() => {
+      if (isCancelled()) cleanup('cancelled');
+    }, 2000);
+    const timeoutTimer = setTimeout(() => cleanup('timeout'), maxWaitMs);
+  });
+}
+// ─── Pane completion detection (mirrors workspace Smith logic) ──────────────
+const DONE_PROMPT_PATTERNS = [
+  /^❯\s*$/,       // Claude Code idle prompt (anchored — whole line is just ❯)
+  /^>\s*$/,        // Generic / Codex prompt
+  /^\$\s*$/,       // Shell fallback
+];
+// Claude Code v2 keeps the `❯`/`>` input box ON SCREEN AT ALL TIMES — even
+// mid-turn — so an empty prompt line does NOT mean idle. The reliable
+// "still working" signal is the live activity footer. If any of these appear
+// in the tail, the agent is mid-turn: never treat it as done / never auto-reply.
+const WORKING_PATTERNS = [
+  /esc to inter/i,              // active-turn footer (may be truncated: "esc to inte…")
+  /\([0-9hms .]+·/,            // live timer "(2m 16s ·" / "(12s ·"
+  /↓\s*[\d.]+k?\s*tokens/i,    // live token counter "↓ 2.2k tokens"
+  /to run in background/i,      // shown only while a turn is actively running
+  /Press up to edit queued/i,   // a queued message = a turn is pending/running
+  /^[✻✽✶✦✳✺∗⋆]\s+\w+…/,        // spinner verb still in progress ("✽ Booping…")
+];
+// Lines that are TUI chrome, not assistant content — excluded when locating the
+// last "real" content line for question detection.
+const CHROME_PATTERNS = [
+  /bypass permissions/i,
+  /shift\+tab to cycle/i,
+  /Press up to edit/i,
+  /for agents\s*$/i,
+  /esc to interrupt/i,
+  /^[─━—_]{3,}$/,               // separator rules
+  /^⏵+/,
+];
+const DONE_CONFIRM_CHECKS = 2;   // consecutive detections required
+const DONE_CHECK_INTERVAL = 5000; // ms between checks
+/**
+ * Capture the last 30 lines of a tmux pane (fast — no full scrollback).
+ * Returns the last N non-empty trimmed lines for pattern matching.
+ * Throws if the session no longer exists (allows callers to detect session death).
+ */
+function captureTail(sessionName: string, tailLines = 5): string[] {
+  const raw = execSync(`tmux capture-pane -t ${sessionName} -p -S -30`, { timeout: 5000, encoding: 'utf-8' });
+  return raw
+    .replace(/\x1b\[[0-9;]*[a-zA-Z]/g, '')
+    .split('\n')
+    .map(l => l.trim())
+    .filter(Boolean)
+    .slice(-tailLines);
+}
+function isDonePrompt(tailLines: string[]): boolean {
+  return tailLines.some(l => DONE_PROMPT_PATTERNS.some(p => p.test(l)));
+}
+function isWorking(tailLines: string[]): boolean {
+  return tailLines.some(l => WORKING_PATTERNS.some(p => p.test(l)));
+}
+// Welcome-banner / startup lines that are never task content.
+const BANNER_PATTERNS = [
+  /Claude Code v\d/i,
+  /setup issues/i,
+  /·\s*Claude (API|Pro|Max)/i,
+  /^[▐▝▘▖▗▛▜▙▟█▌▐\s]+$/,        // full logo block-glyph rows
+  /^[▐▝▘▖▗▛▜▙▟█▌]{2,}/,         // logo row with trailing text (e.g. "▘▘ ▝▝  ~/path")
+  /Auto-update/i,
+];
+/**
+ * Convert a tmux pane screen-scrape into something resembling headless
+ * `claude -p` stdout, so workflow consumers (anchored `^ACTION:` greps,
+ * structured-block extraction) behave the SAME under tmux as headless.
+ * Drops TUI chrome (banner, separators, input box, footer, spinner, any
+ * stray auto-reply line) and removes claude's uniform 2-space TUI margin so
+ * markers land back at column 0. `raw` must already be ANSI-stripped.
+ */
+function normalizeTmuxResult(raw: string): string {
+  const kept = raw.split('\n').filter(line => {
+    const t = line.trim();
+    if (!t) return true;                                    // keep blanks (collapsed later)
+    if (/^[─━—_]{3,}$/.test(t)) return false;               // separator rules
+    if (/^⏵+/.test(t) || /bypass permissions/i.test(t)) return false; // footer
+    if (/^❯/.test(t) || /^>\s*$/.test(t)) return false;     // input box / empty prompt / stray reply
+    if (/^[✻✽✶✦✳✺∗⋆]\s/.test(t)) return false;            // spinner / "Cooked for Xs"
+    if (/esc to inter|Press up to edit|for agents\s*$/i.test(t)) return false;
+    if (BANNER_PATTERNS.some(p => p.test(t))) return false; // welcome banner
+    return true;
+  }).map(line => line.replace(/^ {1,2}/, ''));              // strip the 2-space TUI margin
+  return kept.join('\n').replace(/\n{3,}/g, '\n\n').trim();
+}
+/** True if the input box still holds un-submitted pasted text — a "❯ <text>"
+ *  line that is neither the empty box nor the placeholder ("Try …") nor the
+ *  queued-messages hint. Used to detect a paste whose Enter never submitted. */
+function inputBoxHasPendingText(pane: string): boolean {
+  return pane.split('\n').map(l => l.trim()).some(l =>
+    /^❯\s+\S/.test(l) && !/^❯\s+(Try |Press up to edit)/.test(l));
+}
+/** Last assistant-content line (excludes the input box + TUI chrome). */
+function lastContentLine(tailLines: string[]): string {
+  for (let i = tailLines.length - 1; i >= 0; i--) {
+    const l = tailLines[i];
+    if (DONE_PROMPT_PATTERNS.some(p => p.test(l))) continue;
+    if (CHROME_PATTERNS.some(p => p.test(l))) continue;
+    if (l) return l;
+  }
+  return '';
+}
+/** Does the idle pane actually show a question / interactive choice waiting for
+ *  input? Necessary precondition for any auto-reply — a finished task that just
+ *  printed a summary is NOT this. */
+function looksLikeQuestion(tailLines: string[]): boolean {
+  const last = lastContentLine(tailLines);
+  if (!last) return false;
+  if (last.endsWith('?') || last.endsWith('?')) return true;        // ASCII + fullwidth ?
+  // Claude Code permission / choice menus: "❯ 1. Yes", "1. Yes  2. No", "(y/n)".
+  if (/^❯?\s*\d+\.\s/.test(last)) return true;
+  if (/\(y\/n\)|\[y\/n\]|yes\/no/i.test(last)) return true;
+  return false;
+}
+// ─── Helpers ────────────────────────────────────────────────
+export function tmuxSessionName(taskId: string): string {
+  return `fgt-${taskId}`;
+}
+function taskRunDir(taskId: string): string {
+  return join(getDataDir(), 'tmux', 'sessions', taskId);
+}
+export function killTmuxTaskSession(taskId: string): void {
+  const name = tmuxSessionName(taskId);
+  try { execSync(`tmux kill-session -t ${name} 2>/dev/null`, { timeout: 5000 }); } catch {}
+  try { fs.rmSync(taskRunDir(taskId), { recursive: true, force: true }); } catch {}
+}
+function capturePane(sessionName: string): string {
+  try {
+    return execSync(`tmux capture-pane -t ${sessionName} -p -S - 2>/dev/null`, { timeout: 5000 }).toString();
+  } catch { return ''; }
+}
+function stripAnsi(s: string): string {
+  return s
+    .replace(/\x1b\[[0-9;?]*[a-zA-Z]/g, '')
+    .replace(/\x1b\][^\x07\x1b]*(?:\x07|\x1b\\)/g, '')
+    .replace(/\x1b[()][0-9A-B]/g, '')
+    .replace(/\x1b[=>]/g, '')
+    .replace(/\r/g, '')
+    .replace(/\x07/g, '');
+}
+function delay(ms: number): Promise<void> {
+  return new Promise(r => setTimeout(r, ms));
+}
+// ─── LLM-based question classification ────────────────────────
+// Runs `claude -p --model haiku` to classify whether Claude paused to ask for
+// permission/clarification (→ CONTINUE) or finished the task (→ DONE).
+// Uses the same claude binary already launched for the task — no API key needed.
+// Falls back to pattern matching if the subprocess fails.
+function classifyWithPatterns(contentLines: string[]): boolean {
+  const lastContent = contentLines[contentLines.length - 1]?.trim() ?? '';
+  return lastContent.endsWith('?') &&
+    /\b(want me to|should i|shall i|would you like me to|do you want me to|can i\b|may i\b|should we|want to proceed|want to continue|like me to)\b/i
+      .test(contentLines.slice(-3).join(' '));
+}
+async function shouldAutoReply(tailLines: string[], claudeBin: string): Promise<boolean> {
+  const context = tailLines.slice(-20).join('\n').trim();
+  if (!context) return false;
+  const prompt = `You monitor an AI coding assistant running a background task. Its terminal is now idle. Decide whether it is genuinely BLOCKED waiting for the user to approve continuing, or whether it has FINISHED.
+Reply CONTINUE only if the assistant is clearly asking permission to keep going / proposing a next step and waiting for a yes (e.g. "Should I proceed?", "Want me to apply the fix?", a numbered Yes/No choice).
+Reply DONE if it has delivered its result, is asking a substantive question that needs a real human decision (not a yes/no continue), or you are at all unsure. When in doubt, reply DONE.
+Reply with exactly one word: CONTINUE or DONE.
+Last output:
+${context}`;
+  return new Promise((resolve) => {
+    let settled = false;
+    const done = (v: boolean) => { if (!settled) { settled = true; resolve(v); } };
+    const child = spawn(claudeBin, ['-p', '--model', 'claude-haiku-4-5-20251001'], {
+      stdio: ['pipe', 'pipe', 'ignore'],
+      env: { ...process.env, CLAUDECODE: undefined, CLAUDE_CODE_ENTRYPOINT: undefined, CLAUDE_CODE_SSE_PORT: undefined },
+    });
+    child.stdin.write(prompt);
+    child.stdin.end();
+    let output = '';
+    child.stdout.on('data', (d: Buffer) => { output += d.toString(); });
+    child.on('close', (code: number | null) => {
+      if (code !== 0) { done(classifyWithPatterns(tailLines)); return; }
+      done(output.trim().toUpperCase().startsWith('CONTINUE'));
+    });
+    child.on('error', () => done(classifyWithPatterns(tailLines)));
+    // 30s safety timeout — fall back to pattern matching
+    setTimeout(() => { try { child.kill(); } catch {} done(classifyWithPatterns(tailLines)); }, 30_000);
+  });
+}
+// ─── Callbacks contract ─────────────────────────────────────
+export interface TmuxTaskCallbacks {
+  appendLog(entry: TaskLogEntry): void;
+  setStatus(status: 'done' | 'failed' | 'cancelled', detail?: { resultSummary?: string; costUSD?: number; error?: string }): void;
+  isCancelled(): boolean;
+}
+// ─── Main executor ──────────────────────────────────────────
+// Claude Code v2 TUI: horizontal rule borders + bare `> ` prompt line.
+// Older builds used box-drawing chars (╭─╮, │ >) but v2 dropped them.
+const READY_PATTERNS = [
+  /^>\s/m,          // the input prompt line: "> " at start
+  /─{20,}/,         // the horizontal separator (20+ dashes)
+  /╭─+╮/,           // legacy box top (pre-v2)
+  /│\s*>/,          // legacy input box (pre-v2)
+  /❯\s*$/m,         // legacy arrow prompt (pre-v2)
+];
+const MAX_WAIT_MS = 2 * 60 * 60 * 1000; // 2h timeout fallback
+export async function executeTmuxTask(task: Task, cbs: TmuxTaskCallbacks): Promise<void> {
+  const { appendLog, setStatus, isCancelled } = cbs;
+  const agentId = (task as any).agent || 'claude';
+  const launch = resolveTerminalLaunch(agentId, 'task');
+  const envVars: Record<string, string> = {
+    ...connectorEnv(),
+    ...(launch.env || {}),
+  };
+  delete envVars.CLAUDECODE;
+  const sessionName = tmuxSessionName(task.id);
+  const runDir = taskRunDir(task.id);
+  fs.mkdirSync(runDir, { recursive: true });
+  const launchScriptPath = join(runDir, 'launch.sh');
+  const promptPath = join(runDir, 'prompt.txt');
+  const outputPath = join(runDir, 'output.txt');
+  // ── 1. Write task-context.json for the Stop hook ──────────
+  const forgePort = Number(process.env.PORT) || 8403;
+  const taskContextDir = join(task.projectPath, '.forge');
+  const taskContextPath = join(taskContextDir, 'task-context.json');
+  try {
+    fs.mkdirSync(taskContextDir, { recursive: true });
+    fs.writeFileSync(taskContextPath, JSON.stringify({ taskId: task.id, port: forgePort }));
+  } catch (e: any) {
+    appendLog({ type: 'system', subtype: 'warn', content: `task-context.json write failed: ${e?.message}`, timestamp: new Date().toISOString() });
+  }
+  // Prompt: replace internal newlines with space so paste-buffer doesn't multi-submit
+  const promptText = task.prompt.replace(/\r?\n+/g, ' ').trim();
+  fs.writeFileSync(promptPath, promptText, 'utf8');
+  const modelFlag = launch.model && launch.model !== 'default' ? `--model ${launch.model}` : '';
+  const skipFlag = launch.skipPermissionsFlag || '--dangerously-skip-permissions';
+  const claudeBin = launch.cliCmd;
+  const envKeys = Object.keys(envVars).filter(k => /^[A-Za-z_][A-Za-z0-9_]*$/.test(k));
+  const evalLine = envKeys.length
+    ? `eval "$(tmux show-environment -s 2>/dev/null | grep -E '^(${envKeys.join('|')})=')"`
+    : '';
+  fs.writeFileSync(launchScriptPath, [
+    '#!/bin/bash',
+    // Unset vars that make claude think it's running inside Claude Code —
+    // inherited from the Forge/Node process environment through the tmux session.
+    'unset CLAUDECODE CLAUDE_CODE_ENTRYPOINT CLAUDE_CODE_SSE_PORT',
+    evalLine,
+    `exec "${claudeBin}" ${skipFlag} ${modelFlag}`,
+  ].filter(Boolean).join('\n') + '\n', { mode: 0o700 });
+  // ── 2. Create tmux session (working dir = project path) ───
+  try {
+    execSync(`tmux new-session -d -s ${sessionName} -c ${JSON.stringify(task.projectPath)} -x 220 -y 50 2>/dev/null || true`, { timeout: 10000 });
+  } catch (e: any) {
+    try { fs.unlinkSync(taskContextPath); } catch {}
+    setStatus('failed', { error: `Failed to create tmux session: ${e?.message}` });
+    return;
+  }
+  // ── 3. Inject secret env vars out-of-band ────────────────
+  for (const [k, v] of Object.entries(envVars)) {
+    try { execSync(`tmux set-environment -t ${sessionName} ${k} ${JSON.stringify(v)}`, { timeout: 5000 }); } catch {}
+  }
+  // ── 4. Start claude (interactive, no -p) ─────────────────
+  try {
+    execSync(`tmux send-keys -t ${sessionName} "bash ${JSON.stringify(launchScriptPath)}" Enter`, { timeout: 5000 });
+  } catch (e: any) {
+    try { fs.unlinkSync(taskContextPath); } catch {}
+    setStatus('failed', { error: `Failed to launch claude: ${e?.message}` });
+    return;
+  }
+  appendLog({ type: 'system', subtype: 'init', content: `tmux session: ${sessionName} | interactive mode | completion via Stop hook`, timestamp: new Date().toISOString() });
+  // ── 5. Wait for claude's TUI input prompt ────────────────
+  appendLog({ type: 'system', subtype: 'info', content: 'Waiting for claude to initialize...', timestamp: new Date().toISOString() });
+  let lastPane = '';
+  const ready = await new Promise<boolean>((resolve) => {
+    const deadline = Date.now() + 120_000;
+    const poll = () => {
+      if (isCancelled()) { resolve(false); return; }
+      if (Date.now() > deadline) { resolve(false); return; }
+      lastPane = stripAnsi(capturePane(sessionName));
+      if (READY_PATTERNS.some(p => p.test(lastPane))) { resolve(true); return; }
+      setTimeout(poll, 500);
+    };
+    setTimeout(poll, 500);
+  });
+  if (!ready) {
+    try { fs.unlinkSync(taskContextPath); } catch {}
+    if (isCancelled()) {
+      setStatus('cancelled');
+    } else {
+      // Dump last pane snapshot so user can see what claude printed (pattern mismatch vs slow start)
+      const paneSnippet = lastPane.slice(-800).trim();
+      appendLog({ type: 'system', subtype: 'warn', content: `[pane at timeout]\n${paneSnippet}`, timestamp: new Date().toISOString() });
+      setStatus('failed', { error: 'claude did not initialize within 120s' });
+    }
+    return;
+  }
+  // ── 6 & 7. Inject prompt via paste-buffer, send Enter ────
+  appendLog({ type: 'system', subtype: 'info', content: 'Injecting prompt...', timestamp: new Date().toISOString() });
+  try {
+    execSync(`tmux load-buffer ${JSON.stringify(promptPath)}`, { timeout: 5000 });
+    execSync(`tmux paste-buffer -t ${sessionName}`, { timeout: 5000 });
+    // Large/multi-line pastes (e.g. a 9KB triage prompt) can race claude's init:
+    // the welcome banner trips the readiness check, the paste lands but the first
+    // Enter is swallowed, and the prompt sits in the box un-submitted forever.
+    // Send Enter, then VERIFY the box cleared / work started; retry Enter if not.
+    await delay(400);
+    execSync(`tmux send-keys -t ${sessionName} Enter`, { timeout: 5000 });
+    for (let attempt = 0; attempt < 4; attempt++) {
+      await delay(2000);
+      const pane = stripAnsi(capturePane(sessionName));
+      const tail = pane.split('\n').map(l => l.trim()).filter(Boolean).slice(-8);
+      if (isWorking(tail) || !inputBoxHasPendingText(pane)) break; // submitted (working) or box cleared
+      // Still sitting in the box — Enter was lost. Re-submit.
+      appendLog({ type: 'system', subtype: 'info', content: `Prompt not submitted yet — re-sending Enter (attempt ${attempt + 1})`, timestamp: new Date().toISOString() });
+      execSync(`tmux send-keys -t ${sessionName} Enter`, { timeout: 5000 });
+    }
+  } catch (e: any) {
+    try { fs.unlinkSync(taskContextPath); } catch {}
+    setStatus('failed', { error: `Failed to inject prompt: ${e?.message}` });
+    return;
+  }
+  appendLog({ type: 'assistant', subtype: 'prompt', content: task.prompt, timestamp: new Date().toISOString() });
+  appendLog({ type: 'system', subtype: 'info', content: 'Awaiting completion (Stop hook or pane polling)...', timestamp: new Date().toISOString() });
+  // ── 8. Two parallel mechanisms watch for completion ───────────────────────
+  // Stop hook: fast path — fires via /api/tasks/{id}/hook/stop when claude's Stop event fires.
+  // Pane polling (doneTimer): reliable fallback — mirrors Smith's monitorTerminalCompletion logic.
+  // Brief pause so the ❯ prompt is cleared before we snapshot the baseline tail.
+  await delay(1500);
+  // ── Streaming: capture full scrollback every 3s for log output ──────────
+  let streamedLines = stripAnsi(capturePane(sessionName)).split('\n').length;
+  const streamTimer = setInterval(() => {
+    const current = stripAnsi(capturePane(sessionName));
+    const lines = current.split('\n');
+    if (lines.length > streamedLines) {
+      const newContent = lines.slice(streamedLines).join('\n').trim();
+      if (newContent) appendLog({ type: 'system', subtype: 'info', content: newContent, timestamp: new Date().toISOString() });
+      streamedLines = lines.length;
+    }
+  }, 3000);
+  // ── Completion detection ──────────────────────────────────────────────────
+  // The `❯`/`>` input box is ALWAYS on screen in Claude Code v2, so it cannot
+  // mean "idle". Priority of signals each poll:
+  //   1. isWorking(tail) → mid-turn (spinner / "esc to interrupt" / live timer).
+  //      Reset confirmCount, do nothing. This is the guard that stops the old
+  //      "auto-reply fired while Claude was still working" bug.
+  //   2. started gate: wait for the post-inject baseline to change once.
+  //   3. idle prompt visible + NOT working for DONE_CONFIRM_CHECKS polls →
+  //        - pane shows a real question/choice (looksLikeQuestion) → haiku
+  //          confirms, then auto-reply "Yes, please continue."
+  //        - otherwise → the task is done (fire hook). No reply.
+  //   Session death (execSync throws) → fail immediately.
+  let baselineTail = captureTail(sessionName).join('|');
+  let started = false;
+  let confirmCount = 0;
+  let autoReplyCount = 0;
+  let llmCheckInProgress = false;
+  const MAX_AUTO_REPLIES = 10; // hard cap to prevent infinite loops
+  const doneTimer = setInterval(() => {
+    if (llmCheckInProgress) return; // wait for pending LLM classification to settle
+    try {
+      const tail = captureTail(sessionName, 8);
+      // (1) Mid-turn → never done, never auto-reply.
+      if (isWorking(tail)) { started = true; confirmCount = 0; return; }
+      // (2) Wait until the agent has actually started working at least once.
+      if (!started) {
+        if (tail.join('|') !== baselineTail) started = true;
+        return;
+      }
+      // (3) Idle prompt must be visible and stable for N consecutive polls.
+      if (!isDonePrompt(tail)) { confirmCount = 0; return; }
+      confirmCount++;
+      if (confirmCount < DONE_CONFIRM_CHECKS) return;
+      // Idle + stable. If there's no actual question waiting, the task is done.
+      if (!looksLikeQuestion(tail) || autoReplyCount >= MAX_AUTO_REPLIES) {
+        appendLog({ type: 'system', subtype: 'info', content: '[pane polling detected completion]', timestamp: new Date().toISOString() });
+        fireTmuxHook(task.id);
+        return;
+      }
+      // A question/choice is on screen — let haiku confirm it's a "continue" ask
+      // before injecting. Wider context for the classifier.
+      const contextTail = captureTail(sessionName, 20);
+      llmCheckInProgress = true;
+      shouldAutoReply(contextTail, claudeBin).then((isContinue) => {
+        llmCheckInProgress = false;
+        if (isContinue && autoReplyCount < MAX_AUTO_REPLIES) {
+          autoReplyCount++;
+          const reply = 'Yes, please continue.';
+          const askedLine = lastContentLine(contextTail);
+          appendLog({ type: 'system', subtype: 'info', content: `[auto-reply ${autoReplyCount}/${MAX_AUTO_REPLIES}] "${reply}" ← in response to: ${askedLine.slice(0, 200)}`, timestamp: new Date().toISOString() });
+          try {
+            const replyPath = join(runDir, `reply-${autoReplyCount}.txt`);
+            fs.writeFileSync(replyPath, reply, 'utf8');
+            execSync(`tmux load-buffer ${JSON.stringify(replyPath)}`, { timeout: 5000 });
+            execSync(`tmux paste-buffer -t ${sessionName}`, { timeout: 5000 });
+            execSync(`tmux send-keys -t ${sessionName} Enter`, { timeout: 5000 });
+          } catch {}
+          confirmCount = 0;
+          started = false;
+          baselineTail = captureTail(sessionName).join('|');
+        } else {
+          // haiku says it's actually finished (the `?` was rhetorical / part of a report).
+          appendLog({ type: 'system', subtype: 'info', content: '[pane polling detected completion]', timestamp: new Date().toISOString() });
+          fireTmuxHook(task.id);
+        }
+      }).catch(() => {
+        llmCheckInProgress = false;
+        appendLog({ type: 'system', subtype: 'info', content: '[pane polling detected completion]', timestamp: new Date().toISOString() });
+        fireTmuxHook(task.id);
+      });
+    } catch {
+      appendLog({ type: 'system', subtype: 'warn', content: '[tmux session died]', timestamp: new Date().toISOString() });
+      fireSessionDied(task.id);
+    }
+  }, DONE_CHECK_INTERVAL);
+  doneTimer.unref();
+  const outcome = await waitForHookOrTimeout(task.id, isCancelled, MAX_WAIT_MS);
+  clearInterval(streamTimer);
+  clearInterval(doneTimer);
+  // Clean up context file regardless of outcome
+  try { fs.unlinkSync(taskContextPath); } catch {}
+  if (outcome === 'cancelled') {
+    killTmuxTaskSession(task.id);
+    setStatus('cancelled');
+    return;
+  }
+  if (outcome === 'timeout') {
+    killTmuxTaskSession(task.id);
+    setStatus('failed', { error: 'Tmux task timed out after 2 hours' });
+    return;
+  }
+  if (outcome === 'session_died') {
+    setStatus('failed', { error: 'tmux session died unexpectedly' });
+    return;
+  }
+  // ── 9. Capture and store result ───────────────────────────
+  // output.txt keeps the full ANSI-stripped pane (forensics). The result that
+  // feeds pipeline node outputs + chat is normalized to headless-like stdout so
+  // anchored greps / structured extraction behave identically to `claude -p`.
+  const rawPane = stripAnsi(capturePane(sessionName));
+  fs.writeFileSync(outputPath, rawPane, 'utf8');
+  const response = normalizeTmuxResult(rawPane);
+  appendLog({ type: 'result', content: response, timestamp: new Date().toISOString() });
+  // ── 10. Exit claude gracefully ────────────────────────────
+  try {
+    execSync(`tmux send-keys -t ${sessionName} "/exit" Enter`, { timeout: 5000 });
+  } catch {}
+  setStatus('done', { resultSummary: response.slice(0, 2048) });
+}