@bastani/atomic 0.5.21-1 → 0.5.22-0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,24 +24,14 @@ import {
24
24
  type SDKUserMessage,
25
25
  type Options as SDKOptions,
26
26
  } from "@anthropic-ai/claude-agent-sdk";
27
- import {
28
- sendViaPasteBuffer,
29
- sendSpecialKey,
30
- sendKeysAndSubmit,
31
- capturePaneVisible,
32
- capturePaneScrollback,
33
- normalizeTmuxCapture,
34
- normalizeTmuxLines,
35
- paneLooksReady,
36
- paneHasActiveTask,
37
- waitForPaneReady,
38
- attemptSubmitRounds,
39
- } from "../runtime/tmux.ts";
40
- import { watch, unlink, mkdir } from "node:fs/promises";
27
+ import { sendKeysAndSubmit } from "../runtime/tmux.ts";
28
+ import { escBash } from "../runtime/executor.ts";
29
+ import { watch, unlink, mkdir, writeFile } from "node:fs/promises";
41
30
  import { existsSync, writeFileSync } from "node:fs";
42
31
  import { join } from "node:path";
43
32
  import { randomUUID } from "node:crypto";
44
33
  import os from "node:os";
34
+ import { claudeHookDirs } from "../../commands/cli/claude-stop-hook.ts";
45
35
 
46
36
  // ---------------------------------------------------------------------------
47
37
  // Session tracking — ensures createClaudeSession is called before claudeQuery
@@ -61,21 +51,27 @@ interface PaneState {
61
51
  chatFlags: string[];
62
52
  /** Timeout in ms waiting for Claude TUI / JSONL file on first spawn. */
63
53
  readyTimeoutMs: number;
64
- /**
65
- * Workflow session directory (`~/.atomic/sessions/<runId>/<name>-<sid>`).
66
- * The first prompt is persisted here as `prompt.txt` so it appears in the
67
- * session log alongside `messages.json`, `metadata.json`, etc.
68
- */
69
- sessionDir: string;
70
54
  }
71
55
 
72
56
  const initializedPanes = new Map<string, PaneState>();
73
57
 
74
58
  /**
75
- * Remove a pane from the initialized map, freeing memory.
76
- * Call when a Claude session is killed or no longer needed.
59
+ * Remove a pane from the initialized map and signal the currently-blocked
60
+ * Stop hook that the session is over, so Claude stops promptly instead of
61
+ * waiting out the hook's safety timeout.
62
+ *
63
+ * Called by the runtime when a Claude stage is being torn down. Idempotent.
77
64
  */
78
- export function clearClaudeSession(paneId: string): void {
65
+ export async function clearClaudeSession(paneId: string): Promise<void> {
66
+ const state = initializedPanes.get(paneId);
67
+ if (state) {
68
+ try {
69
+ await releaseClaudeSession(state.claudeSessionId);
70
+ } catch {
71
+ // Best-effort — if release fails the hook will still exit on its
72
+ // own safety timeout.
73
+ }
74
+ }
79
75
  initializedPanes.delete(paneId);
80
76
  }
81
77
 
@@ -85,6 +81,54 @@ const DEFAULT_CHAT_FLAGS = [
85
81
  "--dangerously-skip-permissions",
86
82
  ];
87
83
 
84
+ /**
85
+ * Build the shell command Claude Code runs from the injected Stop hook.
86
+ *
87
+ * - **Published install** (`import.meta.dir` under `node_modules`): resolve
88
+ * `atomic` via the user's PATH. That's the binary they installed, and
89
+ * relying on PATH is robust across shells and platforms.
90
+ * - **Dev** (source checkout): re-invoke THIS repo's `src/cli.ts` using the
91
+ * same Bun runtime that's executing us, so edits to the hook logic are
92
+ * picked up without rebuilding or re-linking. Mirrors the
93
+ * `spawnAttachedFooter` pattern in `src/sdk/runtime/executor.ts:293-303`.
94
+ *
95
+ * The dev-detection heuristic (`node_modules` in `import.meta.dir`) is the
96
+ * same one used by `src/services/system/auto-sync.ts:50`.
97
+ */
98
+ function buildWorkflowStopHookCommand(): string {
99
+ if (import.meta.dir.includes("node_modules")) {
100
+ return "atomic _claude-stop-hook";
101
+ }
102
+ const runtime = process.execPath;
103
+ const cliPath = join(import.meta.dir, "..", "..", "cli.ts");
104
+ return `"${escBash(runtime)}" "${escBash(cliPath)}" _claude-stop-hook`;
105
+ }
106
+
107
+ /**
108
+ * Inline settings injected via `claude --settings <json>` on every workflow
109
+ * spawn. Registers the workflow Stop hook that delivers follow-up prompts
110
+ * without relying on `.claude/settings.json` — so the hook fires only for
111
+ * workflow-spawned Claude sessions, not when a user runs `claude` manually.
112
+ *
113
+ * Built once at module load. Contains no single quotes (JSON syntax doesn't
114
+ * produce them and paths rarely do), so POSIX single-quoting at the spawn
115
+ * site is sufficient shell escaping.
116
+ */
117
+ const WORKFLOW_STOP_HOOK_SETTINGS = JSON.stringify({
118
+ hooks: {
119
+ Stop: [
120
+ {
121
+ hooks: [
122
+ {
123
+ type: "command",
124
+ command: buildWorkflowStopHookCommand(),
125
+ },
126
+ ],
127
+ },
128
+ ],
129
+ },
130
+ });
131
+
88
132
  // ---------------------------------------------------------------------------
89
133
  // createClaudeSession
90
134
  // ---------------------------------------------------------------------------
@@ -92,11 +136,6 @@ const DEFAULT_CHAT_FLAGS = [
92
136
  export interface ClaudeSessionOptions {
93
137
  /** tmux pane ID where Claude should be started */
94
138
  paneId: string;
95
- /**
96
- * Workflow session directory. The first prompt is written here as
97
- * `prompt.txt` and Claude is told to read from that path.
98
- */
99
- sessionDir: string;
100
139
  /** CLI flags to pass to the `claude` command (default: ["--allow-dangerously-skip-permissions", "--dangerously-skip-permissions"]) */
101
140
  chatFlags?: string[];
102
141
  /** Timeout in ms waiting for Claude TUI to be ready (default: 30s) */
@@ -132,51 +171,62 @@ export interface ClaudeSessionOptions {
132
171
  * });
133
172
  * ```
134
173
  */
135
- export async function createClaudeSession(options: ClaudeSessionOptions): Promise<void> {
174
+ export async function createClaudeSession(options: ClaudeSessionOptions): Promise<string> {
136
175
  const {
137
176
  paneId,
138
- sessionDir,
139
177
  chatFlags = DEFAULT_CHAT_FLAGS,
140
178
  readyTimeoutMs = 30_000,
141
179
  } = options;
142
180
 
181
+ const claudeSessionId = randomUUID();
143
182
  initializedPanes.set(paneId, {
144
- claudeSessionId: randomUUID(),
183
+ claudeSessionId,
145
184
  claudeStarted: false,
146
185
  chatFlags,
147
186
  readyTimeoutMs,
148
- sessionDir,
149
187
  });
188
+ return claudeSessionId;
189
+ }
190
+
191
+ /**
192
+ * Build the short, single-line natural-language prompt we send to Claude
193
+ * (either as spawn argv or as a follow-up message). Claude's first action
194
+ * is then a Read tool call against `promptFile` — which sidesteps shell
195
+ * escaping, ARG_MAX, and tmux paste-buffer flakiness for large prompts.
196
+ *
197
+ * The session dir and filename are slug-based (`prompt-<N>.txt` under
198
+ * `~/.atomic/sessions/...`), so they never contain shell-special characters.
199
+ */
200
+ function readPromptInstruction(promptFile: string): string {
201
+ return `Read ${promptFile} and follow the instructions inside.`;
150
202
  }
151
203
 
152
204
  /**
153
205
  * Spawn `claude` in the pane with the prompt baked in via the Read tool.
154
206
  *
155
- * The prompt is written to `${sessionDir}/prompt.txt` so it persists in the
156
- * workflow's session log alongside `messages.json`, `metadata.json`, etc.
157
- * The argv prompt is `Read the prompt in <path>`, so Claude's first action
158
- * is a Read tool call against that file. This sidesteps shell-escaping and
159
- * ARG_MAX entirely — the prompt bytes never traverse the shell parser or
160
- * the kernel argv cap.
207
+ * The prompt is already written to `promptFile` by the caller. The spawn
208
+ * argv is `'Read the prompt in <path>'`, so Claude's first action is a Read
209
+ * tool call against that file.
161
210
  */
162
211
  async function spawnClaudeWithPrompt(
163
212
  paneId: string,
164
- prompt: string,
213
+ promptFile: string,
165
214
  chatFlags: string[],
166
215
  sessionId: string,
167
- sessionDir: string,
168
216
  readyTimeoutMs: number,
169
217
  ): Promise<void> {
170
- const promptFile = join(sessionDir, "prompt.txt");
171
- writeFileSync(promptFile, prompt, "utf-8");
172
-
173
218
  // sessionDir is the workflow's `${name}-${sessionId}` directory under
174
219
  // ~/.atomic/sessions — slug-based, so single-quoting is sufficient on
175
220
  // POSIX and PowerShell alike.
176
- const argvPrompt = `'Read the prompt in ${promptFile}'`;
221
+ const argvPrompt = `'${readPromptInstruction(promptFile)}'`;
177
222
  const cmd = [
178
223
  "claude",
179
224
  ...chatFlags,
225
+ // Workflow-owned Stop hook. Placed AFTER chatFlags so commander's
226
+ // last-wins semantics shadow any user-provided --settings, making this
227
+ // non-overridable by `.atomic/settings.json` chatFlags overrides.
228
+ "--settings",
229
+ `'${WORKFLOW_STOP_HOOK_SETTINGS}'`,
180
230
  "--session-id",
181
231
  sessionId,
182
232
  argvPrompt,
@@ -308,6 +358,36 @@ export function _hasUnresolvedHILTool(messages: SessionMessage[]): boolean {
308
358
  return false;
309
359
  }
310
360
 
361
+ /**
362
+ * Returns true when the most recent assistant message in the transcript
363
+ * ended with `stop_reason: "tool_use"` — i.e. the agent stopped the current
364
+ * API response to call a tool but has not yet produced its post-tool answer.
365
+ *
366
+ * Claude Code's Stop hook fires each time Claude "finishes responding",
367
+ * which includes intermediate tool-use responses in a multi-step agent
368
+ * loop (not just the final `end_turn`). If we return from `waitForIdle`
369
+ * on the first Stop event, we capture the transcript mid-loop — the
370
+ * final assistant text block is still being generated and won't be on
371
+ * disk yet, so `inbox.md` drops the actual answer.
372
+ *
373
+ * We keep watching until we see an assistant message with a terminal
374
+ * stop_reason (`end_turn`, `max_tokens`, `stop_sequence`, `refusal`),
375
+ * which is the real end of the turn.
376
+ *
377
+ * Exported as `_isMidAgentLoop` for unit testing.
378
+ */
379
+ export function _isMidAgentLoop(messages: SessionMessage[]): boolean {
380
+ for (let i = messages.length - 1; i >= 0; i--) {
381
+ const msg = messages[i];
382
+ if (msg?.type !== "assistant") continue;
383
+ const inner = msg.message as { stop_reason?: unknown } | null;
384
+ const stopReason = inner?.stop_reason;
385
+ return stopReason === "tool_use";
386
+ }
387
+ // No assistant message yet — treat as mid-loop so we wait for one.
388
+ return true;
389
+ }
390
+
311
391
  /**
312
392
  * Core HIL watcher loop — pure logic, dependency-injected for testability.
313
393
  *
@@ -343,19 +423,105 @@ export async function _runHILWatcher(
343
423
  }
344
424
  }
345
425
 
426
+ /**
427
+ * Path helpers for the transcript JSONL written by Claude Code.
428
+ * @internal Exported for tests.
429
+ */
430
+ export function transcriptDir(): string {
431
+ return resolveSessionDir(process.cwd());
432
+ }
433
+
434
+ /** @internal Exported for tests. */
435
+ export function transcriptPath(claudeSessionId: string): string {
436
+ return join(transcriptDir(), `${claudeSessionId}.jsonl`);
437
+ }
438
+
439
+ /**
440
+ * Watch this session's transcript JSONL and call `onHIL` on every HIL-state
441
+ * transition — independently of the Stop hook.
442
+ *
443
+ * Why not piggyback on the Stop hook? `AskUserQuestion` is a deferred tool
444
+ * (`shouldDefer: true`, see Claude Code's
445
+ * `src/tools/AskUserQuestionTool/AskUserQuestionTool.tsx`). While the question
446
+ * is pending, Claude's agent loop blocks on the tool with
447
+ * `needsFollowUp === true`, so `handleStopHooks` never runs
448
+ * (`src/query.ts`: `if (!needsFollowUp)`). A watcher tied to the Stop-hook
449
+ * marker would sleep through the entire HIL window and only wake up after
450
+ * the user has already answered.
451
+ *
452
+ * Watches the parent session directory rather than the file itself so the
453
+ * attach is safe before Claude has created the JSONL on first query. Events
454
+ * are filtered by `<sessionId>.jsonl`. Returns when `signal` is aborted.
455
+ *
456
+ * @internal Exported for tests.
457
+ */
458
+ export async function watchTranscriptForHIL(
459
+ claudeSessionId: string,
460
+ onHIL: (waiting: boolean) => void,
461
+ signal: AbortSignal,
462
+ ): Promise<void> {
463
+ const dir = transcriptDir();
464
+
465
+ const readMessages = async (): Promise<SessionMessage[]> => {
466
+ try {
467
+ return await getSessionMessages(claudeSessionId, {
468
+ dir: process.cwd(),
469
+ includeSystemMessages: true,
470
+ });
471
+ } catch {
472
+ return [];
473
+ }
474
+ };
475
+
476
+ let wasHIL = false;
477
+ const check = async (): Promise<void> => {
478
+ const msgs = await readMessages();
479
+ const isHIL = _hasUnresolvedHILTool(msgs);
480
+ if (isHIL !== wasHIL) {
481
+ onHIL(isHIL);
482
+ wasHIL = isHIL;
483
+ }
484
+ };
485
+
486
+ await mkdir(dir, { recursive: true });
487
+
488
+ // Attach the watcher BEFORE the initial check so any events that arrive
489
+ // during the check are buffered by the iterator instead of being lost.
490
+ const watcher = watch(dir, { signal });
491
+
492
+ // Initial check: closes the race where the JSONL already contains an
493
+ // unresolved AskUserQuestion by the time this watcher attaches (resumed
494
+ // session, slow attach, etc.).
495
+ await check();
496
+
497
+ try {
498
+ for await (const _event of watcher) {
499
+ // We intentionally don't filter by `_event.filename`. On Linux, writes
500
+ // can deliver events with unrelated or `.tmp` basenames, and Bun's
501
+ // fs.watch behavior varies across OSes; `getSessionMessages` is keyed
502
+ // by `claudeSessionId` so a cheap re-read is authoritative.
503
+ await check();
504
+ }
505
+ } catch (e: unknown) {
506
+ if (!(e instanceof Error && e.name === "AbortError")) {
507
+ throw e;
508
+ }
509
+ }
510
+ }
511
+
346
512
  // ---------------------------------------------------------------------------
347
513
  // Helpers
348
514
  // ---------------------------------------------------------------------------
349
515
 
350
516
  /**
351
517
  * Path of the directory where the claude-stop-hook writes marker files.
352
- * Each Claude turn creates `~/.atomic/claude-stop/<session_id>` atomically
353
- * via rename, which triggers the `fs.watch` event in `waitForIdle`.
518
+ * Each Claude turn creates `~/.atomic/claude-stop/<session_id>` which
519
+ * triggers the `fs.watch` event in `waitForIdle`.
354
520
  *
355
521
  * @internal Exported for unit tests.
356
522
  */
357
523
  export function markerDir(): string {
358
- return join(os.homedir(), ".atomic", "claude-stop");
524
+ return claudeHookDirs().marker;
359
525
  }
360
526
 
361
527
  /**
@@ -367,6 +533,35 @@ export function markerPath(claudeSessionId: string): string {
367
533
  return join(markerDir(), claudeSessionId);
368
534
  }
369
535
 
536
+ /**
537
+ * Directory where the workflow runtime writes queued follow-up prompts that
538
+ * `atomic _claude-stop-hook` picks up and feeds back to Claude as
539
+ * `{decision:"block", reason:<prompt>}`. @internal Exported for unit tests.
540
+ */
541
+ export function queueDir(): string {
542
+ return claudeHookDirs().queue;
543
+ }
544
+
545
+ /** Return the queue file path for a given Claude session ID. @internal */
546
+ export function queuePath(claudeSessionId: string): string {
547
+ return join(queueDir(), claudeSessionId);
548
+ }
549
+
550
+ /**
551
+ * Directory where the runtime writes session-release signals. When the Stop
552
+ * hook sees `~/.atomic/claude-release/<session_id>` it exits 0 without
553
+ * emitting a block decision — the signal used by `clearClaudeSession` to
554
+ * tell Claude it's safe to actually stop. @internal Exported for unit tests.
555
+ */
556
+ export function releaseDir(): string {
557
+ return claudeHookDirs().release;
558
+ }
559
+
560
+ /** Return the release file path for a given Claude session ID. @internal */
561
+ export function releasePath(claudeSessionId: string): string {
562
+ return join(releaseDir(), claudeSessionId);
563
+ }
564
+
370
565
  /**
371
566
  * Ensure the marker directory exists and remove any stale marker left from a
372
567
  * previous turn of this session. Call this BEFORE submitting the prompt so
@@ -386,6 +581,42 @@ async function clearStaleMarker(claudeSessionId: string): Promise<void> {
386
581
  }
387
582
  }
388
583
 
584
+ /**
585
+ * Ensure the queue directory exists and remove any stale entry from a prior
586
+ * turn so the Stop hook doesn't race on it. Ignores ENOENT.
587
+ */
588
+ async function clearStaleQueue(claudeSessionId: string): Promise<void> {
589
+ await mkdir(queueDir(), { recursive: true });
590
+ try {
591
+ await unlink(queuePath(claudeSessionId));
592
+ } catch (e: unknown) {
593
+ if (!(e instanceof Error && "code" in e && (e as NodeJS.ErrnoException).code === "ENOENT")) {
594
+ throw e;
595
+ }
596
+ }
597
+ }
598
+
599
+ /**
600
+ * Write the next prompt to the session queue file. The currently-running
601
+ * Stop hook process (blocked on poll from the previous turn) picks it up,
602
+ * emits `{decision:"block", reason:<prompt>}` on stdout, and Claude feeds
603
+ * it back as the next user message — no tmux keystrokes required.
604
+ */
605
+ async function enqueuePrompt(claudeSessionId: string, prompt: string): Promise<void> {
606
+ await mkdir(queueDir(), { recursive: true });
607
+ await writeFile(queuePath(claudeSessionId), prompt, "utf-8");
608
+ }
609
+
610
+ /**
611
+ * Signal the Stop hook's blocking wait that this session is done. Called
612
+ * during session teardown so the final hook invocation exits 0 promptly.
613
+ * Safe to call more than once.
614
+ */
615
+ export async function releaseClaudeSession(claudeSessionId: string): Promise<void> {
616
+ await mkdir(releaseDir(), { recursive: true });
617
+ await writeFile(releasePath(claudeSessionId), "");
618
+ }
619
+
389
620
  // ---------------------------------------------------------------------------
390
621
  // Idle detection via marker file watch
391
622
  // ---------------------------------------------------------------------------
@@ -395,110 +626,138 @@ async function clearStaleMarker(claudeSessionId: string): Promise<void> {
395
626
  * `~/.atomic/claude-stop/` marker directory.
396
627
  *
397
628
  * When Claude finishes a turn, the `atomic _claude-stop-hook` Stop hook writes
398
- * `~/.atomic/claude-stop/<session_id>` atomically (tmp file + rename). The
399
- * rename triggers an OS-native `fs.watch` event on the parent directory —
400
- * far more reliable than polling tmux pane glyphs, which vary between Claude
401
- * Code versions.
629
+ * `~/.atomic/claude-stop/<session_id>`. The write triggers an OS-native
630
+ * `fs.watch` event on the parent directory — far more reliable than polling
631
+ * tmux pane glyphs, which vary between Claude Code versions.
632
+ *
633
+ * This function is strictly about *idle detection*. HIL is detected separately
634
+ * by {@link watchTranscriptForHIL}; the Stop hook does not fire while
635
+ * `AskUserQuestion` is pending (the agent loop blocks on deferred tools), so
636
+ * mixing the two would silently miss the HIL window.
402
637
  *
403
638
  * Algorithm:
404
- * 1. Watch the marker directory for events whose `filename` matches
405
- * `claudeSessionId`.
406
- * 2. On a matching event, read the session transcript via
407
- * `getSessionMessages` and test `_hasUnresolvedHILTool`.
408
- * - If unresolved HIL: call `onHIL(true)`, unlink the marker (so the next
409
- * turn's hook can fire again), and continue watching.
410
- * - If no unresolved HIL after a prior HIL: call `onHIL(false)`.
411
- * - If truly idle: slice messages from `transcriptBeforeCount` and return.
412
- * 3. Clean up the `fs.watch` watcher on any exit path via AbortController.
413
- *
414
- * The function signature is intentionally identical to the previous polling
415
- * implementation so all callers remain unchanged.
416
- *
417
- * @param paneId - tmux pane (kept in signature for caller compat; not used here)
418
- * @param claudeSessionId - Claude's session UUID (used to identify marker file)
639
+ * 1. Attach the directory watcher, then check for the marker file on disk —
640
+ * this closes the race where the Stop hook fires between prompt submission
641
+ * and watcher attach.
642
+ * 2. On any event, re-check the marker file on disk (we intentionally do NOT
643
+ * filter by `event.filename`, because on Linux a write can deliver multiple
644
+ * events with varying filenames and editor tools may race us).
645
+ * 3. Read the session transcript via `getSessionMessages` and slice messages
646
+ * from `transcriptBeforeCount`.
647
+ * 4. Clean up the `fs.watch` watcher on any exit path via AbortController.
648
+ *
649
+ * @param claudeSessionId - Claude's session UUID (used to identify marker file)
419
650
  * @param transcriptBeforeCount - number of messages in transcript before this turn
420
- * @param beforeContent - (unused) pane content before send; kept for compat
421
- * @param pollIntervalMs - (unused) kept for compat; watch is event-driven
422
- * @param onHIL - optional callback for HIL state changes
423
651
  */
652
+ /** Safety timeout so the workflow's next stage still fires if the Stop hook
653
+ * never runs (misconfigured settings, killed Claude process, etc.). 15 min
654
+ * covers any reasonable single-turn run without hanging forever. */
655
+ const IDLE_TIMEOUT_MS = 15 * 60 * 1000;
656
+
424
657
  /**
425
658
  * @internal Exported for unit tests.
426
659
  */
427
660
  export async function waitForIdle(
428
- _paneId: string,
429
- claudeSessionId: string | undefined,
661
+ claudeSessionId: string,
430
662
  transcriptBeforeCount: number,
431
- _beforeContent: string,
432
- _pollIntervalMs: number,
433
- onHIL?: (waiting: boolean) => void,
434
663
  ): Promise<SessionMessage[]> {
435
- // Without a session ID we cannot watch the marker directory — return empty.
436
- if (!claudeSessionId) {
437
- return [];
438
- }
439
664
 
440
665
  const dir = markerDir();
441
666
  const sessionId = claudeSessionId;
667
+ const target = markerPath(sessionId);
442
668
  const ac = new AbortController();
669
+ const timeout = setTimeout(() => ac.abort(), IDLE_TIMEOUT_MS);
443
670
 
444
- let hilActive = false;
671
+ // Process a marker that has appeared on disk. Returns a tuple:
672
+ // [resolved, result] — when resolved=true, waitForIdle should return.
673
+ const readMessages = async (): Promise<SessionMessage[] | null> => {
674
+ try {
675
+ return await getSessionMessages(sessionId, {
676
+ dir: process.cwd(),
677
+ includeSystemMessages: true,
678
+ });
679
+ } catch {
680
+ return null;
681
+ }
682
+ };
445
683
 
446
- try {
447
- for await (const event of watch(dir, { signal: ac.signal })) {
448
- // Filter: only care about events for our session's marker file
449
- if (event.filename !== sessionId) continue;
684
+ const handleMarker = async (): Promise<[boolean, SessionMessage[]]> => {
685
+ let msgs = await readMessages();
686
+ if (msgs === null) {
687
+ // Transcript read failed — keep watching; the next event will retry.
688
+ return [false, []];
689
+ }
450
690
 
451
- // Marker appeared read transcript
452
- let msgs: SessionMessage[];
453
- try {
454
- msgs = await getSessionMessages(sessionId, {
455
- dir: process.cwd(),
456
- includeSystemMessages: true,
457
- });
458
- } catch {
459
- // Transcript read failed — wait for the next marker event
460
- continue;
691
+ // The Stop hook fires only once per agent loop completion (when there
692
+ // are no more tool_use blocks to resolve — see Claude Code's
693
+ // `src/query/stopHooks.ts` / `query.ts`: `if (!needsFollowUp)`). But
694
+ // Claude Code writes to the JSONL transcript asynchronously via
695
+ // `enqueueWrite()` with a batched ~100ms flush, so the final
696
+ // `assistant[text]` message can still be in the page-cache when our
697
+ // marker watcher fires. Reading the transcript at that moment races
698
+ // the writer and returns a prefix ending at `user[tool_result]`.
699
+ //
700
+ // Because no further marker events are coming, we can't just "keep
701
+ // watching the marker dir". Instead, poll the transcript file directly
702
+ // until it either settles on a terminal stop_reason or the poll budget
703
+ // expires. The budget covers Claude Code's flush interval plus headroom
704
+ // for slow disks and buffered `fs/promises` writes.
705
+ if (_isMidAgentLoop(msgs)) {
706
+ const pollIntervalMs = 50;
707
+ const pollBudgetMs = 3_000;
708
+ const start = Date.now();
709
+ while (_isMidAgentLoop(msgs) && Date.now() - start < pollBudgetMs) {
710
+ await Bun.sleep(pollIntervalMs);
711
+ const next = await readMessages();
712
+ if (next) msgs = next;
461
713
  }
714
+ // Whether we recovered or ran out of budget, fall through — returning
715
+ // what we have beats hanging forever if the writer really did drop a
716
+ // message (e.g. max-tokens collapse, abort mid-stream).
717
+ }
462
718
 
463
- if (_hasUnresolvedHILTool(msgs)) {
464
- // Agent is blocked on user input (HIL).
465
- if (!hilActive) {
466
- onHIL?.(true);
467
- hilActive = true;
468
- }
469
- // Remove the marker so the Stop hook can write a new one after the
470
- // user responds and Claude finishes its next turn.
471
- try {
472
- await unlink(markerPath(sessionId));
473
- } catch {
474
- // ENOENT is fine — ignore
475
- }
476
- // Continue watching for the next marker event
477
- continue;
478
- }
719
+ const sliced = msgs.length > transcriptBeforeCount
720
+ ? msgs.slice(transcriptBeforeCount)
721
+ : [];
722
+ return [true, sliced];
723
+ };
479
724
 
480
- // No unresolved HIL — if we were in HIL state, signal resolution.
481
- if (hilActive) {
482
- onHIL?.(false);
483
- hilActive = false;
725
+ try {
726
+ // Attach the watcher FIRST; fs.watch returns an iterable whose underlying
727
+ // inotify/FSEvent subscription is live from this point on.
728
+ const watcher = watch(dir, { signal: ac.signal });
729
+
730
+ // Close the race: if the Stop hook fired between clearStaleMarker() and
731
+ // the watcher attach above, the marker is already on disk and no further
732
+ // events will be emitted. Handle it synchronously.
733
+ if (existsSync(target)) {
734
+ const [done, result] = await handleMarker();
735
+ if (done) {
736
+ ac.abort();
737
+ return result;
484
738
  }
739
+ }
485
740
 
486
- // Truly idle return transcript messages produced during this turn.
487
- const result = msgs.length > transcriptBeforeCount
488
- ? msgs.slice(transcriptBeforeCount)
489
- : [];
741
+ for await (const _event of watcher) {
742
+ // We don't trust event.filename on Linux, a tmp+rename write emits
743
+ // events with the `.tmp` basename, and other files in the marker dir
744
+ // can race us. The marker file's existence on disk is authoritative.
745
+ if (!existsSync(target)) continue;
490
746
 
491
- ac.abort();
492
- return result;
747
+ const [done, result] = await handleMarker();
748
+ if (done) {
749
+ ac.abort();
750
+ return result;
751
+ }
493
752
  }
494
753
  } catch (e: unknown) {
495
- // AbortError is expected when we call ac.abort() to stop watching.
496
- if (e instanceof Error && e.name === "AbortError") {
497
- // Normal exit return value was already set and returned above.
498
- // If we somehow reach here without returning, fall through to [].
499
- } else {
754
+ // AbortError is expected when we call ac.abort() to stop watching, or
755
+ // when the safety timeout fires.
756
+ if (!(e instanceof Error && e.name === "AbortError")) {
500
757
  throw e;
501
758
  }
759
+ } finally {
760
+ clearTimeout(timeout);
502
761
  }
503
762
 
504
763
  return [];
@@ -513,14 +772,6 @@ export interface ClaudeQueryOptions {
513
772
  paneId: string;
514
773
  /** The prompt to send */
515
774
  prompt: string;
516
- /** Polling interval in ms (default: 2000) */
517
- pollIntervalMs?: number;
518
- /** Number of C-m presses per submit round (default: 1 for Claude) */
519
- submitPresses?: number;
520
- /** Max submit rounds if text isn't consumed (default: 6) */
521
- maxSubmitRounds?: number;
522
- /** Timeout in ms waiting for pane to be ready before sending (default: 30s) */
523
- readyTimeoutMs?: number;
524
775
  /**
525
776
  * Called when the agent's human-in-the-loop state changes.
526
777
  * `waiting=true` → AskUserQuestion is pending (agent blocked on user input).
@@ -567,14 +818,23 @@ export function extractAssistantText(
567
818
  /**
568
819
  * Send a prompt to a Claude Code interactive session running in a tmux pane.
569
820
  *
570
- * Flow (hardened from OMX's sendToWorker):
571
- * 1. Wait for pane readiness with exponential backoff
572
- * 2. Capture pane content before sending
573
- * 3. Send literal text via `send-keys -l --`
574
- * 4. Submit with C-m rounds and per-round capture verification
575
- * 5. Adaptive retry: clear line (C-u), re-type, re-submit
576
- * 6. Post-submit verification via active-task detection
577
- * 7. Wait for response by polling for output stabilization + prompt return
821
+ * First query and follow-up queries use different delivery channels:
822
+ *
823
+ * - **First query**: stages the prompt in a tmp file and spawns
824
+ * `claude --session-id <UUID> 'Read the prompt in <path>'` into the
825
+ * empty pane. Claude's first action is a Read tool call, which
826
+ * sidesteps ARG_MAX on the spawn argv.
827
+ *
828
+ * - **Follow-up query**: writes the prompt to
829
+ * `~/.atomic/claude-queue/<session_id>`. The Stop hook from the
830
+ * previous turn is blocked in a poll loop there; it reads the queue
831
+ * entry and emits `{"decision":"block","reason":<prompt>}` on stdout,
832
+ * which Claude Code feeds back as the next user message. No tmux
833
+ * keystrokes, no paste-buffer dance, no pane-state polling — the
834
+ * whole delivery rides Claude's own continuation API.
835
+ *
836
+ * Both paths converge on `waitForIdle`, which watches the Stop-hook marker
837
+ * file for this session and returns the transcript slice for the turn.
578
838
  *
579
839
  * @example
580
840
  * ```typescript
@@ -588,15 +848,7 @@ export function extractAssistantText(
588
848
  * ```
589
849
  */
590
850
  export async function claudeQuery(options: ClaudeQueryOptions): Promise<SessionMessage[]> {
591
- const {
592
- paneId,
593
- prompt,
594
- pollIntervalMs = 2_000,
595
- submitPresses = 1,
596
- maxSubmitRounds = 6,
597
- readyTimeoutMs = 30_000,
598
- onHIL,
599
- } = options;
851
+ const { paneId, prompt, onHIL } = options;
600
852
 
601
853
  const paneState = initializedPanes.get(paneId);
602
854
  if (!paneState) {
@@ -609,134 +861,91 @@ export async function claudeQuery(options: ClaudeQueryOptions): Promise<SessionM
609
861
  const dir = process.cwd();
610
862
  const claudeSessionId = paneState.claudeSessionId;
611
863
 
612
- // ── Clear any stale marker left from a previous turn before submitting. ──
613
- // This ensures `waitForIdle`'s watch loop doesn't fire on the marker written
614
- // by the Stop hook at the end of the LAST turn instead of the current one.
864
+ // Clear stale marker AND stale queue entry before submitting so the
865
+ // Stop-hook for the previous turn (if any) cannot race this one.
615
866
  await clearStaleMarker(claudeSessionId);
867
+ await clearStaleQueue(claudeSessionId);
616
868
 
617
- // ── First query: spawn `claude --session-id <UUID> 'Read the prompt in <path>'`.
618
- // The prompt is delivered via Claude's Read tool on its first turn — no
619
- // paste-buffer, no submit retries. Subsequent queries fall through to the
620
- // existing paste-buffer flow against the now-running TUI.
621
- if (!paneState.claudeStarted) {
622
- await spawnClaudeWithPrompt(
623
- paneId,
624
- prompt,
625
- paneState.chatFlags,
626
- claudeSessionId,
627
- paneState.sessionDir,
628
- paneState.readyTimeoutMs,
629
- );
630
- paneState.claudeStarted = true;
631
- } else {
632
- // ── Transcript snapshot (before send) ──
633
- // Taken BEFORE sending so we get an accurate baseline for slicing the
634
- // returned messages to just this turn.
635
- let transcriptBeforeCount = 0;
636
- try {
637
- const msgs = await getSessionMessages(claudeSessionId, {
638
- dir,
639
- includeSystemMessages: true,
640
- });
641
- transcriptBeforeCount = msgs.length;
642
- } catch {
643
- // Best-effort — 0 means we scan all messages (correct, slightly less efficient)
644
- }
645
-
646
- const beforeContent = normalizeTmuxLines(capturePaneScrollback(paneId));
647
- const normalizedPrompt = normalizeTmuxCapture(prompt).slice(0, 100);
648
-
649
- // Step 1: Wait for pane readiness before sending
650
- await waitForPaneReady(paneId, readyTimeoutMs);
651
-
652
- // Step 2: Send text via paste buffer (atomic, handles large prompts)
653
- sendViaPasteBuffer(paneId, prompt);
654
- await Bun.sleep(150);
655
-
656
- // Step 3: Submit with per-round capture verification
657
- let delivered = await attemptSubmitRounds(paneId, normalizedPrompt, maxSubmitRounds, submitPresses);
869
+ let transcriptBeforeCount = 0;
870
+ let spawnPromptFile: string | undefined;
658
871
 
659
- // Step 4: Adaptive retry — clear line, re-type, re-submit
660
- if (!delivered) {
661
- const visibleCapture = capturePaneVisible(paneId);
662
- const visibleNorm = normalizeTmuxCapture(visibleCapture);
663
-
664
- // Only retry if text is still visible and pane is idle (not mid-task)
665
- if (visibleNorm.includes(normalizedPrompt) && !paneHasActiveTask(visibleCapture) && paneLooksReady(visibleCapture)) {
666
- sendSpecialKey(paneId, "C-u");
667
- await Bun.sleep(80);
668
- sendViaPasteBuffer(paneId, prompt);
669
- await Bun.sleep(120);
670
- delivered = await attemptSubmitRounds(paneId, normalizedPrompt, maxSubmitRounds, submitPresses);
872
+ try {
873
+ if (paneState.claudeStarted) {
874
+ // Follow-up query: snapshot the transcript length so waitForIdle can
875
+ // slice out the messages produced by THIS turn, then enqueue the
876
+ // prompt for the Stop hook to pick up.
877
+ try {
878
+ const msgs = await getSessionMessages(claudeSessionId, {
879
+ dir,
880
+ includeSystemMessages: true,
881
+ });
882
+ transcriptBeforeCount = msgs.length;
883
+ } catch {
884
+ // Best-effort — 0 means we scan all messages (correct, slightly less efficient)
671
885
  }
886
+
887
+ await enqueuePrompt(claudeSessionId, prompt);
888
+ } else {
889
+ // First query: spawn claude with the prompt baked into argv via the
890
+ // Read-tool indirection. The tmp file only has to live long enough
891
+ // for Claude's first Read tool call, so we delete it once waitForIdle
892
+ // returns (the turn is complete by then).
893
+ spawnPromptFile = join(
894
+ os.tmpdir(),
895
+ `atomic-claude-prompt-${claudeSessionId}-${randomUUID()}.txt`,
896
+ );
897
+ writeFileSync(spawnPromptFile, prompt, "utf-8");
898
+
899
+ await spawnClaudeWithPrompt(
900
+ paneId,
901
+ spawnPromptFile,
902
+ paneState.chatFlags,
903
+ claudeSessionId,
904
+ paneState.readyTimeoutMs,
905
+ );
906
+ paneState.claudeStarted = true;
672
907
  }
673
908
 
674
- // Step 5: Final fallback double C-m nudge + post-submit verification
675
- if (!delivered) {
676
- sendSpecialKey(paneId, "C-m");
677
- await Bun.sleep(120);
678
- sendSpecialKey(paneId, "C-m");
679
- await Bun.sleep(300);
680
-
681
- const verifyCapture = capturePaneVisible(paneId);
682
- if (paneHasActiveTask(verifyCapture)) {
683
- delivered = true;
684
- } else {
685
- delivered = !normalizeTmuxCapture(verifyCapture).includes(normalizedPrompt);
686
- }
909
+ // HIL detection runs in parallel with idle detection. The Stop hook
910
+ // (which drives waitForIdle) doesn't fire while `AskUserQuestion` is
911
+ // pending, so we watch the transcript JSONL directly for HIL transitions.
912
+ const hilAc = new AbortController();
913
+ if (onHIL) {
914
+ void watchTranscriptForHIL(claudeSessionId, onHIL, hilAc.signal).catch(
915
+ () => {
916
+ // Best-effort never fail the query over HIL detection.
917
+ },
918
+ );
919
+ }
687
920
 
688
- // One more attempt if text is still stuck
689
- if (!delivered) {
690
- sendSpecialKey(paneId, "C-m");
691
- await Bun.sleep(150);
692
- sendSpecialKey(paneId, "C-m");
921
+ try {
922
+ return await waitForIdle(claudeSessionId, transcriptBeforeCount);
923
+ } finally {
924
+ hilAc.abort();
925
+ // Safety: waitForIdle only returns at true turn-idle (no unresolved
926
+ // AskUserQuestion by Claude's own `!needsFollowUp` gate). If the
927
+ // transcript watcher missed the final tool_result flush due to
928
+ // Claude's batched JSONL writes, the UI could be stuck on
929
+ // awaiting_input. `resumeSession` in the panel store is idempotent
930
+ // (no-op when the session isn't in awaiting_input), so this is
931
+ // always safe.
932
+ onHIL?.(false);
933
+ }
934
+ } finally {
935
+ if (spawnPromptFile) {
936
+ try {
937
+ await unlink(spawnPromptFile);
938
+ } catch {
939
+ // ENOENT / already removed is fine.
693
940
  }
694
941
  }
695
-
696
- // Wait for response completion via pane idle + transcript read.
697
- // HIL detection is integrated into waitForIdle.
698
- return await waitForIdle(
699
- paneId,
700
- claudeSessionId,
701
- transcriptBeforeCount,
702
- beforeContent,
703
- pollIntervalMs,
704
- onHIL,
705
- );
706
942
  }
707
-
708
- // First-query path: wait for Claude to finish the response. The prompt
709
- // file lives in the workflow's session dir as `prompt.txt` and is kept
710
- // as part of the session log — no cleanup needed.
711
- return await waitForIdle(
712
- paneId,
713
- claudeSessionId,
714
- 0,
715
- "",
716
- pollIntervalMs,
717
- onHIL,
718
- );
719
943
  }
720
944
 
721
945
  // ---------------------------------------------------------------------------
722
946
  // Synthetic wrappers — uniform s.client / s.session API for Claude stages
723
947
  // ---------------------------------------------------------------------------
724
948
 
725
- /**
726
- * Default query options the user can set per-stage via the `sessionOpts` arg.
727
- * These become defaults for every `s.session.query()` call within that stage.
728
- */
729
- export interface ClaudeQueryDefaults {
730
- /** Polling interval in ms (default: 2000) */
731
- pollIntervalMs?: number;
732
- /** Number of C-m presses per submit round (default: 1) */
733
- submitPresses?: number;
734
- /** Max submit rounds if text isn't consumed (default: 6) */
735
- maxSubmitRounds?: number;
736
- /** Timeout in ms waiting for pane to be ready before sending (default: 30s) */
737
- readyTimeoutMs?: number;
738
- }
739
-
740
949
  /**
741
950
  * Synthetic client wrapper for Claude stages.
742
951
  * Auto-starts the Claude CLI in the tmux pane during `start()`.
@@ -744,23 +953,26 @@ export interface ClaudeQueryDefaults {
744
953
  export class ClaudeClientWrapper {
745
954
  readonly paneId: string;
746
955
  private readonly opts: { chatFlags?: string[]; readyTimeoutMs?: number };
747
- private readonly sessionDir: string;
748
956
 
749
957
  constructor(
750
958
  paneId: string,
751
959
  opts: { chatFlags?: string[]; readyTimeoutMs?: number } = {},
752
- sessionDir: string,
753
960
  ) {
754
961
  this.paneId = paneId;
755
962
  this.opts = opts;
756
- this.sessionDir = sessionDir;
757
963
  }
758
964
 
759
- /** Start the Claude CLI in the tmux pane. Called by the runtime during init. */
760
- async start(): Promise<void> {
761
- await createClaudeSession({
965
+ /**
966
+ * Start the Claude CLI in the tmux pane. Returns the Claude session UUID
967
+ * so the caller can pass it to `ClaudeSessionWrapper` (and thus expose it
968
+ * as `s.sessionId` to workflows). This is the UUID used by Claude Code to
969
+ * name its JSONL transcript file and to key the Stop-hook marker — workflows
970
+ * pass it to `s.save(s.sessionId)` so the save path reads the correct
971
+ * transcript even when many Claude sessions run in parallel.
972
+ */
973
+ async start(): Promise<string> {
974
+ return await createClaudeSession({
762
975
  paneId: this.paneId,
763
- sessionDir: this.sessionDir,
764
976
  chatFlags: this.opts.chatFlags,
765
977
  readyTimeoutMs: this.opts.readyTimeoutMs,
766
978
  });
@@ -777,31 +989,34 @@ export class ClaudeClientWrapper {
777
989
  export class ClaudeSessionWrapper {
778
990
  readonly paneId: string;
779
991
  readonly sessionId: string;
780
- private readonly defaults: ClaudeQueryDefaults;
781
992
  private readonly onHIL: ((waiting: boolean) => void) | undefined;
782
993
 
783
994
  constructor(
784
995
  paneId: string,
785
996
  sessionId: string,
786
- defaults: ClaudeQueryDefaults = {},
787
997
  onHIL?: (waiting: boolean) => void,
788
998
  ) {
789
999
  this.paneId = paneId;
790
1000
  this.sessionId = sessionId;
791
- this.defaults = defaults;
792
1001
  this.onHIL = onHIL;
793
1002
  }
794
1003
 
795
- /** Send a prompt to Claude and wait for the response. */
1004
+ /**
1005
+ * Send a prompt to Claude and wait for the response.
1006
+ *
1007
+ * The `_options` parameter exists for signature compatibility with
1008
+ * {@link HeadlessClaudeSessionWrapper.query} (which forwards SDK options
1009
+ * like `agent`, `permissionMode`, etc. to the Agent SDK). In the
1010
+ * interactive pane path these options don't apply — we're driving the
1011
+ * `claude` CLI binary, not the SDK — so they are silently ignored.
1012
+ */
796
1013
  async query(
797
1014
  prompt: string,
798
- opts?: Partial<ClaudeQueryDefaults & SDKOptions>,
1015
+ _options?: Partial<SDKOptions>,
799
1016
  ): Promise<SessionMessage[]> {
800
1017
  return claudeQuery({
801
1018
  paneId: this.paneId,
802
1019
  prompt,
803
- ...this.defaults,
804
- ...opts,
805
1020
  onHIL: this.onHIL,
806
1021
  });
807
1022
  }
@@ -819,7 +1034,15 @@ export class ClaudeSessionWrapper {
819
1034
  * Used when `options.headless` is true in `ctx.stage()`.
820
1035
  */
821
1036
  export class HeadlessClaudeClientWrapper {
822
- async start(): Promise<void> {}
1037
+ /**
1038
+ * Headless Claude stages don't pre-allocate a session — each `query()` call
1039
+ * to {@link HeadlessClaudeSessionWrapper} spawns a fresh Agent SDK run that
1040
+ * emits its own `session_id`. We still return an empty string here so the
1041
+ * method signature matches {@link ClaudeClientWrapper.start}.
1042
+ */
1043
+ async start(): Promise<string> {
1044
+ return "";
1045
+ }
823
1046
  async stop(): Promise<void> {}
824
1047
  }
825
1048
 
@@ -836,33 +1059,31 @@ export class HeadlessClaudeClientWrapper {
836
1059
  */
837
1060
  export class HeadlessClaudeSessionWrapper {
838
1061
  readonly paneId = "";
839
- readonly sessionId: string;
1062
+ /**
1063
+ * The Claude session UUID of the most recently completed `query()`. Exposed
1064
+ * via `s.sessionId` so workflows can pass it to `s.save(s.sessionId)` and
1065
+ * have the save path read the correct transcript, even when several headless
1066
+ * Claude stages run in parallel (each call gets its own SDK-assigned UUID).
1067
+ */
1068
+ private _lastSessionId: string = "";
840
1069
 
841
- constructor(sessionId: string) {
842
- this.sessionId = sessionId;
1070
+ get sessionId(): string {
1071
+ return this._lastSessionId;
843
1072
  }
844
1073
 
845
1074
  async query(
846
1075
  prompt: string | AsyncIterable<SDKUserMessage>,
847
- options?: Partial<ClaudeQueryDefaults & SDKOptions>,
1076
+ options?: Partial<SDKOptions>,
848
1077
  ): Promise<SessionMessage[]> {
849
- // Strip query-defaults fields; the rest are SDK options
850
- const {
851
- pollIntervalMs: _a,
852
- submitPresses: _b,
853
- maxSubmitRounds: _c,
854
- readyTimeoutMs: _d,
855
- ...sdkOpts
856
- } = options ?? {};
857
-
858
1078
  let sdkSessionId = "";
859
- for await (const msg of sdkQuery({ prompt, options: sdkOpts })) {
1079
+ for await (const msg of sdkQuery({ prompt, options: options ?? {} })) {
860
1080
  if (msg.type === "result") {
861
1081
  sdkSessionId = String((msg as Record<string, unknown>).session_id ?? "");
862
1082
  }
863
1083
  }
864
1084
  // Read the transcript to return native SessionMessage[]
865
1085
  if (sdkSessionId) {
1086
+ this._lastSessionId = sdkSessionId;
866
1087
  return getSessionMessages(sdkSessionId, { dir: process.cwd() });
867
1088
  }
868
1089
  return [];