@aion0/forge 0.10.79 → 0.10.81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,625 @@
1
+ /**
2
+ * Tmux task backend — runs `claude` (interactive mode, no -p) inside a dedicated
3
+ * tmux session, injects the prompt via paste-buffer, and captures the response.
4
+ *
5
+ * Why interactive (no -p)?
6
+ * Interactive mode uses ~/.claude/ OAuth credentials (subscription billing).
7
+ * `-p` / headless mode uses ANTHROPIC_API_KEY (API billing). By starting claude
8
+ * the same way a user would in a terminal, the session runs under the logged-in
9
+ * subscription account.
10
+ *
11
+ * Completion detection: Claude Code's Stop hook fires when a turn finishes
12
+ * (including all tool calls). We write a task-context.json file to the project
13
+ * directory so the hook script knows which task just completed. The hook POSTs
14
+ * to /api/tasks/{id}/hook/stop; that endpoint resolves the awaited promise here.
15
+ * A 2-hour timeout acts as fallback if the hook never fires.
16
+ *
17
+ * Flow:
18
+ * 1. Write task-context.json to {projectPath}/.forge/ (hook routing info)
19
+ * 2. Create tmux session fgt-{id} in project directory
20
+ * 3. Inject secret env vars via `tmux set-environment` (out-of-band)
21
+ * 4. Run launch script: eval env, then exec `claude` (interactive, no -p)
22
+ * 5. Wait up to 60s for claude's TUI input prompt to appear
23
+ * 6. Inject prompt via paste-buffer (handles any length / special chars)
24
+ * 7. Send Enter; await Stop hook POST (or 2h timeout / cancellation)
25
+ * 8. Capture full pane history, strip ANSI, store as result
26
+ * 9. Send `/exit`; remove task-context.json
27
+ * 10. Session stays alive until deleteTask (for post-mortem debugging)
28
+ *
29
+ * Run dir: <dataDir>/tmux/sessions/{taskId}/
30
+ * launch.sh — env-sourcing wrapper that execs claude
31
+ * prompt.txt — prompt text (newlines joined to avoid multi-submit)
32
+ * output.txt — final stripped pane capture written after completion
33
+ */
34
+
35
+ import { execSync, spawn } from 'node:child_process';
36
+ import * as fs from 'node:fs';
37
+ import { join } from 'node:path';
38
+ import type { Task, TaskLogEntry } from '../src/types';
39
+ import { resolveTerminalLaunch } from './agents/index';
40
+ import { connectorEnv } from './task-manager';
41
+ import { getDataDir } from './dirs';
42
+
43
+ // ─── Hook waiter registry ────────────────────────────────────
44
+
45
+ // Keyed by taskId; resolved when /api/tasks/{id}/hook/stop fires or session dies.
46
+ const _hookWaiters = new Map<string, (outcome: 'done' | 'session_died') => void>();
47
+
48
+ /** Returns true if a live waiter was found and resolved; false if no waiter (e.g. server restart). */
49
+ export function fireTmuxHook(taskId: string): boolean {
50
+ const resolve = _hookWaiters.get(taskId);
51
+ if (resolve) {
52
+ _hookWaiters.delete(taskId);
53
+ resolve('done');
54
+ return true;
55
+ }
56
+ return false;
57
+ }
58
+
59
+ /** Signal session death to the waiter (tmux session exited unexpectedly). */
60
+ function fireSessionDied(taskId: string): void {
61
+ const resolve = _hookWaiters.get(taskId);
62
+ if (resolve) {
63
+ _hookWaiters.delete(taskId);
64
+ resolve('session_died');
65
+ }
66
+ }
67
+
68
+ /**
69
+ * Fallback completion for tmux tasks whose waiter was lost (e.g. server restart mid-task).
70
+ * Captures the pane, writes the result, and marks the task done via the DB helper exported
71
+ * from task-manager.
72
+ */
73
+ export function completeStaleTmuxTask(taskId: string): boolean {
74
+ const { getTask, finishTmuxTask } = require('./task-manager') as typeof import('./task-manager');
75
+ const task = getTask(taskId);
76
+ if (!task || (task as any).backend !== 'tmux' || task.status !== 'running') return false;
77
+
78
+ const sessionName = tmuxSessionName(taskId);
79
+ const runDir = taskRunDir(taskId);
80
+ const outputPath = join(runDir, 'output.txt');
81
+
82
+ const response = stripAnsi(capturePane(sessionName));
83
+ try { fs.writeFileSync(outputPath, response, 'utf8'); } catch {}
84
+
85
+ finishTmuxTask(taskId, response);
86
+ return true;
87
+ }
88
+
89
+ function waitForHookOrTimeout(
90
+ taskId: string,
91
+ isCancelled: () => boolean,
92
+ maxWaitMs: number,
93
+ ): Promise<'done' | 'cancelled' | 'timeout' | 'session_died'> {
94
+ return new Promise((resolve) => {
95
+ const cleanup = (result: 'done' | 'cancelled' | 'timeout' | 'session_died') => {
96
+ _hookWaiters.delete(taskId);
97
+ clearInterval(cancelPoll);
98
+ clearTimeout(timeoutTimer);
99
+ resolve(result);
100
+ };
101
+
102
+ _hookWaiters.set(taskId, (outcome) => cleanup(outcome));
103
+
104
+ const cancelPoll = setInterval(() => {
105
+ if (isCancelled()) cleanup('cancelled');
106
+ }, 2000);
107
+
108
+ const timeoutTimer = setTimeout(() => cleanup('timeout'), maxWaitMs);
109
+ });
110
+ }
111
+
112
+ // ─── Pane completion detection (mirrors workspace Smith logic) ──────────────
113
+
114
+ const DONE_PROMPT_PATTERNS = [
115
+ /^❯\s*$/, // Claude Code idle prompt (anchored — whole line is just ❯)
116
+ /^>\s*$/, // Generic / Codex prompt
117
+ /^\$\s*$/, // Shell fallback
118
+ ];
119
+
120
+ // Claude Code v2 keeps the `❯`/`>` input box ON SCREEN AT ALL TIMES — even
121
+ // mid-turn — so an empty prompt line does NOT mean idle. The reliable
122
+ // "still working" signal is the live activity footer. If any of these appear
123
+ // in the tail, the agent is mid-turn: never treat it as done / never auto-reply.
124
+ const WORKING_PATTERNS = [
125
+ /esc to inter/i, // active-turn footer (may be truncated: "esc to inte…")
126
+ /\([0-9hms .]+·/, // live timer "(2m 16s ·" / "(12s ·"
127
+ /↓\s*[\d.]+k?\s*tokens/i, // live token counter "↓ 2.2k tokens"
128
+ /to run in background/i, // shown only while a turn is actively running
129
+ /Press up to edit queued/i, // a queued message = a turn is pending/running
130
+ /^[✻✽✶✦✳✺∗⋆]\s+\w+…/, // spinner verb still in progress ("✽ Booping…")
131
+ ];
132
+
133
+ // Lines that are TUI chrome, not assistant content — excluded when locating the
134
+ // last "real" content line for question detection.
135
+ const CHROME_PATTERNS = [
136
+ /bypass permissions/i,
137
+ /shift\+tab to cycle/i,
138
+ /Press up to edit/i,
139
+ /for agents\s*$/i,
140
+ /esc to interrupt/i,
141
+ /^[─━—_]{3,}$/, // separator rules
142
+ /^⏵+/,
143
+ ];
144
+
145
+ const DONE_CONFIRM_CHECKS = 2; // consecutive detections required
146
+ const DONE_CHECK_INTERVAL = 5000; // ms between checks
147
+
148
+ /**
149
+ * Capture the last 30 lines of a tmux pane (fast — no full scrollback).
150
+ * Returns the last N non-empty trimmed lines for pattern matching.
151
+ * Throws if the session no longer exists (allows callers to detect session death).
152
+ */
153
+ function captureTail(sessionName: string, tailLines = 5): string[] {
154
+ const raw = execSync(`tmux capture-pane -t ${sessionName} -p -S -30`, { timeout: 5000, encoding: 'utf-8' });
155
+ return raw
156
+ .replace(/\x1b\[[0-9;]*[a-zA-Z]/g, '')
157
+ .split('\n')
158
+ .map(l => l.trim())
159
+ .filter(Boolean)
160
+ .slice(-tailLines);
161
+ }
162
+
163
+ function isDonePrompt(tailLines: string[]): boolean {
164
+ return tailLines.some(l => DONE_PROMPT_PATTERNS.some(p => p.test(l)));
165
+ }
166
+
167
+ function isWorking(tailLines: string[]): boolean {
168
+ return tailLines.some(l => WORKING_PATTERNS.some(p => p.test(l)));
169
+ }
170
+
171
+ // Welcome-banner / startup lines that are never task content.
172
+ const BANNER_PATTERNS = [
173
+ /Claude Code v\d/i,
174
+ /setup issues/i,
175
+ /·\s*Claude (API|Pro|Max)/i,
176
+ /^[▐▝▘▖▗▛▜▙▟█▌▐\s]+$/, // full logo block-glyph rows
177
+ /^[▐▝▘▖▗▛▜▙▟█▌]{2,}/, // logo row with trailing text (e.g. "▘▘ ▝▝ ~/path")
178
+ /Auto-update/i,
179
+ ];
180
+
181
+ /**
182
+ * Convert a tmux pane screen-scrape into something resembling headless
183
+ * `claude -p` stdout, so workflow consumers (anchored `^ACTION:` greps,
184
+ * structured-block extraction) behave the SAME under tmux as headless.
185
+ * Drops TUI chrome (banner, separators, input box, footer, spinner, any
186
+ * stray auto-reply line) and removes claude's uniform 2-space TUI margin so
187
+ * markers land back at column 0. `raw` must already be ANSI-stripped.
188
+ */
189
+ function normalizeTmuxResult(raw: string): string {
190
+ const kept = raw.split('\n').filter(line => {
191
+ const t = line.trim();
192
+ if (!t) return true; // keep blanks (collapsed later)
193
+ if (/^[─━—_]{3,}$/.test(t)) return false; // separator rules
194
+ if (/^⏵+/.test(t) || /bypass permissions/i.test(t)) return false; // footer
195
+ if (/^❯/.test(t) || /^>\s*$/.test(t)) return false; // input box / empty prompt / stray reply
196
+ if (/^[✻✽✶✦✳✺∗⋆]\s/.test(t)) return false; // spinner / "Cooked for Xs"
197
+ if (/esc to inter|Press up to edit|for agents\s*$/i.test(t)) return false;
198
+ if (BANNER_PATTERNS.some(p => p.test(t))) return false; // welcome banner
199
+ return true;
200
+ }).map(line => line.replace(/^ {1,2}/, '')); // strip the 2-space TUI margin
201
+ return kept.join('\n').replace(/\n{3,}/g, '\n\n').trim();
202
+ }
203
+
204
+ /** True if the input box still holds un-submitted pasted text — a "❯ <text>"
205
+ * line that is neither the empty box nor the placeholder ("Try …") nor the
206
+ * queued-messages hint. Used to detect a paste whose Enter never submitted. */
207
+ function inputBoxHasPendingText(pane: string): boolean {
208
+ return pane.split('\n').map(l => l.trim()).some(l =>
209
+ /^❯\s+\S/.test(l) && !/^❯\s+(Try |Press up to edit)/.test(l));
210
+ }
211
+
212
+ /** Last assistant-content line (excludes the input box + TUI chrome). */
213
+ function lastContentLine(tailLines: string[]): string {
214
+ for (let i = tailLines.length - 1; i >= 0; i--) {
215
+ const l = tailLines[i];
216
+ if (DONE_PROMPT_PATTERNS.some(p => p.test(l))) continue;
217
+ if (CHROME_PATTERNS.some(p => p.test(l))) continue;
218
+ if (l) return l;
219
+ }
220
+ return '';
221
+ }
222
+
223
+ /** Does the idle pane actually show a question / interactive choice waiting for
224
+ * input? Necessary precondition for any auto-reply — a finished task that just
225
+ * printed a summary is NOT this. */
226
+ function looksLikeQuestion(tailLines: string[]): boolean {
227
+ const last = lastContentLine(tailLines);
228
+ if (!last) return false;
229
+ if (last.endsWith('?') || last.endsWith('?')) return true; // ASCII + fullwidth ?
230
+ // Claude Code permission / choice menus: "❯ 1. Yes", "1. Yes 2. No", "(y/n)".
231
+ if (/^❯?\s*\d+\.\s/.test(last)) return true;
232
+ if (/\(y\/n\)|\[y\/n\]|yes\/no/i.test(last)) return true;
233
+ return false;
234
+ }
235
+
236
+
237
+ // ─── Helpers ────────────────────────────────────────────────
238
+
239
+ export function tmuxSessionName(taskId: string): string {
240
+ return `fgt-${taskId}`;
241
+ }
242
+
243
+ function taskRunDir(taskId: string): string {
244
+ return join(getDataDir(), 'tmux', 'sessions', taskId);
245
+ }
246
+
247
+ export function killTmuxTaskSession(taskId: string): void {
248
+ const name = tmuxSessionName(taskId);
249
+ try { execSync(`tmux kill-session -t ${name} 2>/dev/null`, { timeout: 5000 }); } catch {}
250
+ try { fs.rmSync(taskRunDir(taskId), { recursive: true, force: true }); } catch {}
251
+ }
252
+
253
+ function capturePane(sessionName: string): string {
254
+ try {
255
+ return execSync(`tmux capture-pane -t ${sessionName} -p -S - 2>/dev/null`, { timeout: 5000 }).toString();
256
+ } catch { return ''; }
257
+ }
258
+
259
+ function stripAnsi(s: string): string {
260
+ return s
261
+ .replace(/\x1b\[[0-9;?]*[a-zA-Z]/g, '')
262
+ .replace(/\x1b\][^\x07\x1b]*(?:\x07|\x1b\\)/g, '')
263
+ .replace(/\x1b[()][0-9A-B]/g, '')
264
+ .replace(/\x1b[=>]/g, '')
265
+ .replace(/\r/g, '')
266
+ .replace(/\x07/g, '');
267
+ }
268
+
269
+ function delay(ms: number): Promise<void> {
270
+ return new Promise(r => setTimeout(r, ms));
271
+ }
272
+
273
+ // ─── LLM-based question classification ────────────────────────
274
+ // Runs `claude -p --model haiku` to classify whether Claude paused to ask for
275
+ // permission/clarification (→ CONTINUE) or finished the task (→ DONE).
276
+ // Uses the same claude binary already launched for the task — no API key needed.
277
+ // Falls back to pattern matching if the subprocess fails.
278
+
279
+ function classifyWithPatterns(contentLines: string[]): boolean {
280
+ const lastContent = contentLines[contentLines.length - 1]?.trim() ?? '';
281
+ return lastContent.endsWith('?') &&
282
+ /\b(want me to|should i|shall i|would you like me to|do you want me to|can i\b|may i\b|should we|want to proceed|want to continue|like me to)\b/i
283
+ .test(contentLines.slice(-3).join(' '));
284
+ }
285
+
286
+ async function shouldAutoReply(tailLines: string[], claudeBin: string): Promise<boolean> {
287
+ const context = tailLines.slice(-20).join('\n').trim();
288
+ if (!context) return false;
289
+
290
+ const prompt = `You monitor an AI coding assistant running a background task. Its terminal is now idle. Decide whether it is genuinely BLOCKED waiting for the user to approve continuing, or whether it has FINISHED.
291
+
292
+ Reply CONTINUE only if the assistant is clearly asking permission to keep going / proposing a next step and waiting for a yes (e.g. "Should I proceed?", "Want me to apply the fix?", a numbered Yes/No choice).
293
+ Reply DONE if it has delivered its result, is asking a substantive question that needs a real human decision (not a yes/no continue), or you are at all unsure. When in doubt, reply DONE.
294
+
295
+ Reply with exactly one word: CONTINUE or DONE.
296
+
297
+ Last output:
298
+ ${context}`;
299
+
300
+ return new Promise((resolve) => {
301
+ let settled = false;
302
+ const done = (v: boolean) => { if (!settled) { settled = true; resolve(v); } };
303
+
304
+ const child = spawn(claudeBin, ['-p', '--model', 'claude-haiku-4-5-20251001'], {
305
+ stdio: ['pipe', 'pipe', 'ignore'],
306
+ env: { ...process.env, CLAUDECODE: undefined, CLAUDE_CODE_ENTRYPOINT: undefined, CLAUDE_CODE_SSE_PORT: undefined },
307
+ });
308
+
309
+ child.stdin.write(prompt);
310
+ child.stdin.end();
311
+
312
+ let output = '';
313
+ child.stdout.on('data', (d: Buffer) => { output += d.toString(); });
314
+ child.on('close', (code: number | null) => {
315
+ if (code !== 0) { done(classifyWithPatterns(tailLines)); return; }
316
+ done(output.trim().toUpperCase().startsWith('CONTINUE'));
317
+ });
318
+ child.on('error', () => done(classifyWithPatterns(tailLines)));
319
+
320
+ // 30s safety timeout — fall back to pattern matching
321
+ setTimeout(() => { try { child.kill(); } catch {} done(classifyWithPatterns(tailLines)); }, 30_000);
322
+ });
323
+ }
324
+
325
+ // ─── Callbacks contract ─────────────────────────────────────
326
+
327
+ export interface TmuxTaskCallbacks {
328
+ appendLog(entry: TaskLogEntry): void;
329
+ setStatus(status: 'done' | 'failed' | 'cancelled', detail?: { resultSummary?: string; costUSD?: number; error?: string }): void;
330
+ isCancelled(): boolean;
331
+ }
332
+
333
+ // ─── Main executor ──────────────────────────────────────────
334
+
335
+ // Claude Code v2 TUI: horizontal rule borders + bare `> ` prompt line.
336
+ // Older builds used box-drawing chars (╭─╮, │ >) but v2 dropped them.
337
+ const READY_PATTERNS = [
338
+ /^>\s/m, // the input prompt line: "> " at start
339
+ /─{20,}/, // the horizontal separator (20+ dashes)
340
+ /╭─+╮/, // legacy box top (pre-v2)
341
+ /│\s*>/, // legacy input box (pre-v2)
342
+ /❯\s*$/m, // legacy arrow prompt (pre-v2)
343
+ ];
344
+
345
+ const MAX_WAIT_MS = 2 * 60 * 60 * 1000; // 2h timeout fallback
346
+
347
+ export async function executeTmuxTask(task: Task, cbs: TmuxTaskCallbacks): Promise<void> {
348
+ const { appendLog, setStatus, isCancelled } = cbs;
349
+
350
+ const agentId = (task as any).agent || 'claude';
351
+ const launch = resolveTerminalLaunch(agentId, 'task');
352
+
353
+ const envVars: Record<string, string> = {
354
+ ...connectorEnv(),
355
+ ...(launch.env || {}),
356
+ };
357
+ delete envVars.CLAUDECODE;
358
+
359
+ const sessionName = tmuxSessionName(task.id);
360
+ const runDir = taskRunDir(task.id);
361
+ fs.mkdirSync(runDir, { recursive: true });
362
+
363
+ const launchScriptPath = join(runDir, 'launch.sh');
364
+ const promptPath = join(runDir, 'prompt.txt');
365
+ const outputPath = join(runDir, 'output.txt');
366
+
367
+ // ── 1. Write task-context.json for the Stop hook ──────────
368
+ const forgePort = Number(process.env.PORT) || 8403;
369
+ const taskContextDir = join(task.projectPath, '.forge');
370
+ const taskContextPath = join(taskContextDir, 'task-context.json');
371
+ try {
372
+ fs.mkdirSync(taskContextDir, { recursive: true });
373
+ fs.writeFileSync(taskContextPath, JSON.stringify({ taskId: task.id, port: forgePort }));
374
+ } catch (e: any) {
375
+ appendLog({ type: 'system', subtype: 'warn', content: `task-context.json write failed: ${e?.message}`, timestamp: new Date().toISOString() });
376
+ }
377
+
378
+ // Prompt: replace internal newlines with space so paste-buffer doesn't multi-submit
379
+ const promptText = task.prompt.replace(/\r?\n+/g, ' ').trim();
380
+ fs.writeFileSync(promptPath, promptText, 'utf8');
381
+
382
+ const modelFlag = launch.model && launch.model !== 'default' ? `--model ${launch.model}` : '';
383
+ const skipFlag = launch.skipPermissionsFlag || '--dangerously-skip-permissions';
384
+ const claudeBin = launch.cliCmd;
385
+
386
+ const envKeys = Object.keys(envVars).filter(k => /^[A-Za-z_][A-Za-z0-9_]*$/.test(k));
387
+ const evalLine = envKeys.length
388
+ ? `eval "$(tmux show-environment -s 2>/dev/null | grep -E '^(${envKeys.join('|')})=')"`
389
+ : '';
390
+
391
+ fs.writeFileSync(launchScriptPath, [
392
+ '#!/bin/bash',
393
+ // Unset vars that make claude think it's running inside Claude Code —
394
+ // inherited from the Forge/Node process environment through the tmux session.
395
+ 'unset CLAUDECODE CLAUDE_CODE_ENTRYPOINT CLAUDE_CODE_SSE_PORT',
396
+ evalLine,
397
+ `exec "${claudeBin}" ${skipFlag} ${modelFlag}`,
398
+ ].filter(Boolean).join('\n') + '\n', { mode: 0o700 });
399
+
400
+ // ── 2. Create tmux session (working dir = project path) ───
401
+ try {
402
+ execSync(`tmux new-session -d -s ${sessionName} -c ${JSON.stringify(task.projectPath)} -x 220 -y 50 2>/dev/null || true`, { timeout: 10000 });
403
+ } catch (e: any) {
404
+ try { fs.unlinkSync(taskContextPath); } catch {}
405
+ setStatus('failed', { error: `Failed to create tmux session: ${e?.message}` });
406
+ return;
407
+ }
408
+
409
+ // ── 3. Inject secret env vars out-of-band ────────────────
410
+ for (const [k, v] of Object.entries(envVars)) {
411
+ try { execSync(`tmux set-environment -t ${sessionName} ${k} ${JSON.stringify(v)}`, { timeout: 5000 }); } catch {}
412
+ }
413
+
414
+ // ── 4. Start claude (interactive, no -p) ─────────────────
415
+ try {
416
+ execSync(`tmux send-keys -t ${sessionName} "bash ${JSON.stringify(launchScriptPath)}" Enter`, { timeout: 5000 });
417
+ } catch (e: any) {
418
+ try { fs.unlinkSync(taskContextPath); } catch {}
419
+ setStatus('failed', { error: `Failed to launch claude: ${e?.message}` });
420
+ return;
421
+ }
422
+
423
+ appendLog({ type: 'system', subtype: 'init', content: `tmux session: ${sessionName} | interactive mode | completion via Stop hook`, timestamp: new Date().toISOString() });
424
+
425
+ // ── 5. Wait for claude's TUI input prompt ────────────────
426
+ appendLog({ type: 'system', subtype: 'info', content: 'Waiting for claude to initialize...', timestamp: new Date().toISOString() });
427
+
428
+ let lastPane = '';
429
+ const ready = await new Promise<boolean>((resolve) => {
430
+ const deadline = Date.now() + 120_000;
431
+ const poll = () => {
432
+ if (isCancelled()) { resolve(false); return; }
433
+ if (Date.now() > deadline) { resolve(false); return; }
434
+ lastPane = stripAnsi(capturePane(sessionName));
435
+ if (READY_PATTERNS.some(p => p.test(lastPane))) { resolve(true); return; }
436
+ setTimeout(poll, 500);
437
+ };
438
+ setTimeout(poll, 500);
439
+ });
440
+
441
+ if (!ready) {
442
+ try { fs.unlinkSync(taskContextPath); } catch {}
443
+ if (isCancelled()) {
444
+ setStatus('cancelled');
445
+ } else {
446
+ // Dump last pane snapshot so user can see what claude printed (pattern mismatch vs slow start)
447
+ const paneSnippet = lastPane.slice(-800).trim();
448
+ appendLog({ type: 'system', subtype: 'warn', content: `[pane at timeout]\n${paneSnippet}`, timestamp: new Date().toISOString() });
449
+ setStatus('failed', { error: 'claude did not initialize within 120s' });
450
+ }
451
+ return;
452
+ }
453
+
454
+ // ── 6 & 7. Inject prompt via paste-buffer, send Enter ────
455
+ appendLog({ type: 'system', subtype: 'info', content: 'Injecting prompt...', timestamp: new Date().toISOString() });
456
+
457
+ try {
458
+ execSync(`tmux load-buffer ${JSON.stringify(promptPath)}`, { timeout: 5000 });
459
+ execSync(`tmux paste-buffer -t ${sessionName}`, { timeout: 5000 });
460
+ // Large/multi-line pastes (e.g. a 9KB triage prompt) can race claude's init:
461
+ // the welcome banner trips the readiness check, the paste lands but the first
462
+ // Enter is swallowed, and the prompt sits in the box un-submitted forever.
463
+ // Send Enter, then VERIFY the box cleared / work started; retry Enter if not.
464
+ await delay(400);
465
+ execSync(`tmux send-keys -t ${sessionName} Enter`, { timeout: 5000 });
466
+ for (let attempt = 0; attempt < 4; attempt++) {
467
+ await delay(2000);
468
+ const pane = stripAnsi(capturePane(sessionName));
469
+ const tail = pane.split('\n').map(l => l.trim()).filter(Boolean).slice(-8);
470
+ if (isWorking(tail) || !inputBoxHasPendingText(pane)) break; // submitted (working) or box cleared
471
+ // Still sitting in the box — Enter was lost. Re-submit.
472
+ appendLog({ type: 'system', subtype: 'info', content: `Prompt not submitted yet — re-sending Enter (attempt ${attempt + 1})`, timestamp: new Date().toISOString() });
473
+ execSync(`tmux send-keys -t ${sessionName} Enter`, { timeout: 5000 });
474
+ }
475
+ } catch (e: any) {
476
+ try { fs.unlinkSync(taskContextPath); } catch {}
477
+ setStatus('failed', { error: `Failed to inject prompt: ${e?.message}` });
478
+ return;
479
+ }
480
+
481
+ appendLog({ type: 'assistant', subtype: 'prompt', content: task.prompt, timestamp: new Date().toISOString() });
482
+ appendLog({ type: 'system', subtype: 'info', content: 'Awaiting completion (Stop hook or pane polling)...', timestamp: new Date().toISOString() });
483
+
484
+ // ── 8. Two parallel mechanisms watch for completion ───────────────────────
485
+ // Stop hook: fast path — fires via /api/tasks/{id}/hook/stop when claude's Stop event fires.
486
+ // Pane polling (doneTimer): reliable fallback — mirrors Smith's monitorTerminalCompletion logic.
487
+
488
+ // Brief pause so the ❯ prompt is cleared before we snapshot the baseline tail.
489
+ await delay(1500);
490
+
491
+ // ── Streaming: capture full scrollback every 3s for log output ──────────
492
+ let streamedLines = stripAnsi(capturePane(sessionName)).split('\n').length;
493
+
494
+ const streamTimer = setInterval(() => {
495
+ const current = stripAnsi(capturePane(sessionName));
496
+ const lines = current.split('\n');
497
+ if (lines.length > streamedLines) {
498
+ const newContent = lines.slice(streamedLines).join('\n').trim();
499
+ if (newContent) appendLog({ type: 'system', subtype: 'info', content: newContent, timestamp: new Date().toISOString() });
500
+ streamedLines = lines.length;
501
+ }
502
+ }, 3000);
503
+
504
+ // ── Completion detection ──────────────────────────────────────────────────
505
+ // The `❯`/`>` input box is ALWAYS on screen in Claude Code v2, so it cannot
506
+ // mean "idle". Priority of signals each poll:
507
+ // 1. isWorking(tail) → mid-turn (spinner / "esc to interrupt" / live timer).
508
+ // Reset confirmCount, do nothing. This is the guard that stops the old
509
+ // "auto-reply fired while Claude was still working" bug.
510
+ // 2. started gate: wait for the post-inject baseline to change once.
511
+ // 3. idle prompt visible + NOT working for DONE_CONFIRM_CHECKS polls →
512
+ // - pane shows a real question/choice (looksLikeQuestion) → haiku
513
+ // confirms, then auto-reply "Yes, please continue."
514
+ // - otherwise → the task is done (fire hook). No reply.
515
+ // Session death (execSync throws) → fail immediately.
516
+ let baselineTail = captureTail(sessionName).join('|');
517
+ let started = false;
518
+ let confirmCount = 0;
519
+ let autoReplyCount = 0;
520
+ let llmCheckInProgress = false;
521
+ const MAX_AUTO_REPLIES = 10; // hard cap to prevent infinite loops
522
+
523
+ const doneTimer = setInterval(() => {
524
+ if (llmCheckInProgress) return; // wait for pending LLM classification to settle
525
+ try {
526
+ const tail = captureTail(sessionName, 8);
527
+
528
+ // (1) Mid-turn → never done, never auto-reply.
529
+ if (isWorking(tail)) { started = true; confirmCount = 0; return; }
530
+
531
+ // (2) Wait until the agent has actually started working at least once.
532
+ if (!started) {
533
+ if (tail.join('|') !== baselineTail) started = true;
534
+ return;
535
+ }
536
+
537
+ // (3) Idle prompt must be visible and stable for N consecutive polls.
538
+ if (!isDonePrompt(tail)) { confirmCount = 0; return; }
539
+ confirmCount++;
540
+ if (confirmCount < DONE_CONFIRM_CHECKS) return;
541
+
542
+ // Idle + stable. If there's no actual question waiting, the task is done.
543
+ if (!looksLikeQuestion(tail) || autoReplyCount >= MAX_AUTO_REPLIES) {
544
+ appendLog({ type: 'system', subtype: 'info', content: '[pane polling detected completion]', timestamp: new Date().toISOString() });
545
+ fireTmuxHook(task.id);
546
+ return;
547
+ }
548
+
549
+ // A question/choice is on screen — let haiku confirm it's a "continue" ask
550
+ // before injecting. Wider context for the classifier.
551
+ const contextTail = captureTail(sessionName, 20);
552
+ llmCheckInProgress = true;
553
+ shouldAutoReply(contextTail, claudeBin).then((isContinue) => {
554
+ llmCheckInProgress = false;
555
+ if (isContinue && autoReplyCount < MAX_AUTO_REPLIES) {
556
+ autoReplyCount++;
557
+ const reply = 'Yes, please continue.';
558
+ const askedLine = lastContentLine(contextTail);
559
+ appendLog({ type: 'system', subtype: 'info', content: `[auto-reply ${autoReplyCount}/${MAX_AUTO_REPLIES}] "${reply}" ← in response to: ${askedLine.slice(0, 200)}`, timestamp: new Date().toISOString() });
560
+ try {
561
+ const replyPath = join(runDir, `reply-${autoReplyCount}.txt`);
562
+ fs.writeFileSync(replyPath, reply, 'utf8');
563
+ execSync(`tmux load-buffer ${JSON.stringify(replyPath)}`, { timeout: 5000 });
564
+ execSync(`tmux paste-buffer -t ${sessionName}`, { timeout: 5000 });
565
+ execSync(`tmux send-keys -t ${sessionName} Enter`, { timeout: 5000 });
566
+ } catch {}
567
+ confirmCount = 0;
568
+ started = false;
569
+ baselineTail = captureTail(sessionName).join('|');
570
+ } else {
571
+ // haiku says it's actually finished (the `?` was rhetorical / part of a report).
572
+ appendLog({ type: 'system', subtype: 'info', content: '[pane polling detected completion]', timestamp: new Date().toISOString() });
573
+ fireTmuxHook(task.id);
574
+ }
575
+ }).catch(() => {
576
+ llmCheckInProgress = false;
577
+ appendLog({ type: 'system', subtype: 'info', content: '[pane polling detected completion]', timestamp: new Date().toISOString() });
578
+ fireTmuxHook(task.id);
579
+ });
580
+ } catch {
581
+ appendLog({ type: 'system', subtype: 'warn', content: '[tmux session died]', timestamp: new Date().toISOString() });
582
+ fireSessionDied(task.id);
583
+ }
584
+ }, DONE_CHECK_INTERVAL);
585
+ doneTimer.unref();
586
+
587
+ const outcome = await waitForHookOrTimeout(task.id, isCancelled, MAX_WAIT_MS);
588
+ clearInterval(streamTimer);
589
+ clearInterval(doneTimer);
590
+
591
+ // Clean up context file regardless of outcome
592
+ try { fs.unlinkSync(taskContextPath); } catch {}
593
+
594
+ if (outcome === 'cancelled') {
595
+ killTmuxTaskSession(task.id);
596
+ setStatus('cancelled');
597
+ return;
598
+ }
599
+ if (outcome === 'timeout') {
600
+ killTmuxTaskSession(task.id);
601
+ setStatus('failed', { error: 'Tmux task timed out after 2 hours' });
602
+ return;
603
+ }
604
+ if (outcome === 'session_died') {
605
+ setStatus('failed', { error: 'tmux session died unexpectedly' });
606
+ return;
607
+ }
608
+
609
+ // ── 9. Capture and store result ───────────────────────────
610
+ // output.txt keeps the full ANSI-stripped pane (forensics). The result that
611
+ // feeds pipeline node outputs + chat is normalized to headless-like stdout so
612
+ // anchored greps / structured extraction behave identically to `claude -p`.
613
+ const rawPane = stripAnsi(capturePane(sessionName));
614
+ fs.writeFileSync(outputPath, rawPane, 'utf8');
615
+
616
+ const response = normalizeTmuxResult(rawPane);
617
+ appendLog({ type: 'result', content: response, timestamp: new Date().toISOString() });
618
+
619
+ // ── 10. Exit claude gracefully ────────────────────────────
620
+ try {
621
+ execSync(`tmux send-keys -t ${sessionName} "/exit" Enter`, { timeout: 5000 });
622
+ } catch {}
623
+
624
+ setStatus('done', { resultSummary: response.slice(0, 2048) });
625
+ }