@bastani/atomic 0.5.27 → 0.5.28-1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -45,13 +45,24 @@ export declare function claudeHookDirs(): {
45
45
  queue: string;
46
46
  release: string;
47
47
  hil: string;
48
+ pid: string;
48
49
  };
49
50
  /** Options for {@link claudeStopHookCommand}. Primarily used by tests to shrink the wait budget. */
50
51
  export interface ClaudeStopHookOptions {
51
52
  /** Maximum time the hook waits for a queued follow-up prompt before letting Claude stop. */
52
53
  waitTimeoutMs?: number;
53
- /** Polling interval for queue/release detection. */
54
+ /**
55
+ * Interval for the polling fallback that runs alongside the `fs.watch`
56
+ * watchers in case an inotify/FSEvent notification gets dropped. In the
57
+ * happy path, watcher events fire on create and the poll never matches.
58
+ */
54
59
  pollIntervalMs?: number;
60
+ /**
61
+ * Interval at which the hook checks whether the atomic workflow process
62
+ * that owns this session is still alive. Coarser than `pollIntervalMs`
63
+ * because atomic crashing is rare and `process.kill(pid, 0)` is a syscall.
64
+ */
65
+ livenessIntervalMs?: number;
55
66
  }
56
67
  /**
57
68
  * Handler for the hidden `_claude-stop-hook` subcommand.
@@ -1 +1 @@
1
- {"version":3,"file":"claude-stop-hook.d.ts","sourceRoot":"","sources":["../../../src/commands/cli/claude-stop-hook.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAOH,yEAAyE;AACzE,MAAM,WAAW,qBAAqB;IACpC,UAAU,EAAE,MAAM,CAAC;IACnB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B;AAeD;;;;;GAKG;AACH,wBAAgB,cAAc,IAAI;IAAE,MAAM,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,CAQhG;AAED,oGAAoG;AACpG,MAAM,WAAW,qBAAqB;IACpC,4FAA4F;IAC5F,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,oDAAoD;IACpD,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAKD;;;;;;;;GAQG;AACH,wBAAsB,qBAAqB,CACzC,OAAO,GAAE,qBAA0B,GAClC,OAAO,CAAC,MAAM,CAAC,CAsGjB"}
1
+ {"version":3,"file":"claude-stop-hook.d.ts","sourceRoot":"","sources":["../../../src/commands/cli/claude-stop-hook.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAQH,yEAAyE;AACzE,MAAM,WAAW,qBAAqB;IACpC,UAAU,EAAE,MAAM,CAAC;IACnB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B;AAeD;;;;;GAKG;AACH,wBAAgB,cAAc,IAAI;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;CACb,CAcA;AAED,oGAAoG;AACpG,MAAM,WAAW,qBAAqB;IACpC,4FAA4F;IAC5F,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB;;;;OAIG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;;;;OAIG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAsFD;;;;;;;;GAQG;AACH,wBAAsB,qBAAqB,CACzC,OAAO,GAAE,qBAA0B,GAClC,OAAO,CAAC,MAAM,CAAC,CAyMjB"}
@@ -133,6 +133,34 @@ export declare function releasePath(claudeSessionId: string): string;
133
133
  * Safe to call more than once.
134
134
  */
135
135
  export declare function releaseClaudeSession(claudeSessionId: string): Promise<void>;
136
+ /**
137
+ * Wait for the Claude session to become idle using `fs.watch` on the
138
+ * `~/.atomic/claude-stop/` marker directory.
139
+ *
140
+ * When Claude finishes a turn, the `atomic _claude-stop-hook` Stop hook writes
141
+ * `~/.atomic/claude-stop/<session_id>`. The write triggers an OS-native
142
+ * `fs.watch` event on the parent directory — far more reliable than polling
143
+ * tmux pane glyphs, which vary between Claude Code versions.
144
+ *
145
+ * This function is strictly about *idle detection*. HIL is detected separately
146
+ * by {@link watchHILMarker}; the Stop hook does not fire while
147
+ * `AskUserQuestion` is pending (the agent loop blocks on deferred tools), so
148
+ * mixing the two would silently miss the HIL window.
149
+ *
150
+ * Algorithm:
151
+ * 1. Attach the directory watcher, then check for the marker file on disk —
152
+ * this closes the race where the Stop hook fires between prompt submission
153
+ * and watcher attach.
154
+ * 2. On any event, re-check the marker file on disk (we intentionally do NOT
155
+ * filter by `event.filename`, because on Linux a write can deliver multiple
156
+ * events with varying filenames and editor tools may race us).
157
+ * 3. Read the session transcript via `getSessionMessages` and slice messages
158
+ * from `transcriptBeforeCount`.
159
+ * 4. Clean up the `fs.watch` watcher on any exit path via AbortController.
160
+ *
161
+ * @param claudeSessionId - Claude's session UUID (used to identify marker file)
162
+ * @param transcriptBeforeCount - number of messages in transcript before this turn
163
+ */
136
164
  /**
137
165
  * @internal Exported for unit tests.
138
166
  */
@@ -1 +1 @@
1
- {"version":3,"file":"claude.d.ts","sourceRoot":"","sources":["../../../src/sdk/providers/claude.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,OAAO,EAGL,KAAK,cAAc,EACnB,KAAK,cAAc,EACnB,KAAK,OAAO,IAAI,UAAU,EAC3B,MAAM,gCAAgC,CAAC;AAgCxC;;;;;;GAMG;AACH,wBAAsB,kBAAkB,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAWtE;AAqID,MAAM,WAAW,oBAAoB;IACnC,kDAAkD;IAClD,MAAM,EAAE,MAAM,CAAC;IACf,sIAAsI;IACtI,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,sEAAsE;IACtE,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,wBAAsB,mBAAmB,CAAC,OAAO,EAAE,oBAAoB,GAAG,OAAO,CAAC,MAAM,CAAC,CAexF;AAsID;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAgB,eAAe,CAAC,QAAQ,EAAE,cAAc,EAAE,GAAG,OAAO,CAUnE;AAED;;;;;;;;;;;;GAYG;AACH,wBAAsB,cAAc,CAClC,eAAe,EAAE,MAAM,EACvB,KAAK,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI,EACjC,MAAM,EAAE,WAAW,GAClB,OAAO,CAAC,IAAI,CAAC,CAyCf;AAMD;;;;;;GAMG;AACH,wBAAgB,SAAS,IAAI,MAAM,CAElC;AAED;;;;GAIG;AACH,wBAAgB,UAAU,CAAC,eAAe,EAAE,MAAM,GAAG,MAAM,CAE1D;AAED;;;;GAIG;AACH,wBAAgB,QAAQ,IAAI,MAAM,CAEjC;AAED,0EAA0E;AAC1E,wBAAgB,SAAS,CAAC,eAAe,EAAE,MAAM,GAAG,MAAM,CAEzD;AAED;;;;;GAKG;AACH,wBAAgB,UAAU,IAAI,MAAM,CAEnC;AAED,4EAA4E;AAC5E,wBAAgB,WAAW,CAAC,eAAe,EAAE,MAAM,GAAG,MAAM,CAE3D;AAiED;;;;GAIG;AACH,wBAAsB,oBAAoB,CAAC,eAAe,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAGjF;AAuCD;;GAEG;AACH,wBAAsB,WAAW,CAC/B,eAAe,EAAE,MAAM,EACvB,qBAAqB,EAAE,MAAM,GAC5B,OAAO,CAAC,cAAc,EAAE,CAAC,CAqG3B;AAMD,MAAM,WAAW,kBAAkB;IACjC,2CAA2C;IAC3C,MAAM,EAAE,MAAM,CAAC;IACf,yBAAyB;IACzB,MAAM,EAAE,MAAM,CAAC;IACf;;;;OAIG;IACH,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI,CAAC;CACpC;AAED;;;;;;;;;GASG;AACH,wBAAgB,oBAAoB,CAClC,IAAI,EAAE,aAAa,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,OAAO,CAAA;CAAE,CAAC,EACvD,UAAU,EAAE,MAAM,GACjB,MAAM,CAoBR;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,wBAAsB,WAAW,CAAC,OAAO,EAAE,kBAAkB,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC,CA8FxF;AAMD;;;GAGG;AACH,wBAAgB,oBAAoB,CAClC,QAAQ,EAAE,MAAM,EAAE,GAAG,SAAS,EAC9B,MAAM,EAAE,MAAM,EAAE,GACf,MAAM,EAAE,CAMV;AAED;;;GAGG;AACH,qBAAa,mBAAmB;IAC9B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAoD;gBAGvE,MAAM,EAAE,MAAM,EACd,IAAI,GAAE;QAAE,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;QAAC,cAAc,CAAC,EAAE,MAAM,CAAA;KAAO;IAM9D;;;;;;;OAOG;IACG,KAAK,IAAI,OAAO,CAAC,MAAM,CAAC;IAQ9B,yEAAyE;IACnE,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;CAC5B;AAED;;;GAGG;AACH,qBAAa,oBAAoB;IAC/B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAA2C;gBAG/D,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI;IAOpC;;;;;;;;OAQG;IACG,KAAK,CACT,MAAM,EAAE,MAAM,EACd,QAAQ,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,GAC7B,OAAO,CAAC,cAAc,EAAE,CAAC;IAQ5B,gEAAgE;IAC1D,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;CAClC;AAMD;;;GAGG;AACH,qBAAa,2BAA2B;IACtC;;;;;OAKG;IACG,KAAK,IAAI,OAAO,CAAC,MAAM,CAAC;IAGxB,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;CAC5B;AAED;;;;;;;;;;GAUG;AACH,qBAAa,4BAA4B;IACvC,QAAQ,CAAC,MAAM,MAAM;IACrB;;;;;OAKG;IACH,OAAO,CAAC,cAAc,CAAc;IAEpC,IAAI,SAAS,IAAI,MAAM,CAEtB;IAEK,KAAK,CACT,MAAM,EAAE,MAAM,GAAG,aAAa,CAAC,cAAc,CAAC,EAC9C,OAAO,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,GAC5B,OAAO,CAAC,cAAc,EAAE,CAAC;IAqCtB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;CAClC;AAQD;;;;;GAKG;AACH,eAAO,MAAM,sBAAsB,+DAejC,CAAC"}
1
+ {"version":3,"file":"claude.d.ts","sourceRoot":"","sources":["../../../src/sdk/providers/claude.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,OAAO,EAGL,KAAK,cAAc,EACnB,KAAK,cAAc,EACnB,KAAK,OAAO,IAAI,UAAU,EAC3B,MAAM,gCAAgC,CAAC;AAgCxC;;;;;;GAMG;AACH,wBAAsB,kBAAkB,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAiBtE;AAqID,MAAM,WAAW,oBAAoB;IACnC,kDAAkD;IAClD,MAAM,EAAE,MAAM,CAAC;IACf,sIAAsI;IACtI,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,sEAAsE;IACtE,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,wBAAsB,mBAAmB,CAAC,OAAO,EAAE,oBAAoB,GAAG,OAAO,CAAC,MAAM,CAAC,CAqBxF;AAsID;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAgB,eAAe,CAAC,QAAQ,EAAE,cAAc,EAAE,GAAG,OAAO,CAUnE;AAED;;;;;;;;;;;;GAYG;AACH,wBAAsB,cAAc,CAClC,eAAe,EAAE,MAAM,EACvB,KAAK,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI,EACjC,MAAM,EAAE,WAAW,GAClB,OAAO,CAAC,IAAI,CAAC,CAyCf;AAMD;;;;;;GAMG;AACH,wBAAgB,SAAS,IAAI,MAAM,CAElC;AAED;;;;GAIG;AACH,wBAAgB,UAAU,CAAC,eAAe,EAAE,MAAM,GAAG,MAAM,CAE1D;AAED;;;;GAIG;AACH,wBAAgB,QAAQ,IAAI,MAAM,CAEjC;AAED,0EAA0E;AAC1E,wBAAgB,SAAS,CAAC,eAAe,EAAE,MAAM,GAAG,MAAM,CAEzD;AAED;;;;;GAKG;AACH,wBAAgB,UAAU,IAAI,MAAM,CAEnC;AAED,4EAA4E;AAC5E,wBAAgB,WAAW,CAAC,eAAe,EAAE,MAAM,GAAG,MAAM,CAE3D;AAiED;;;;GAIG;AACH,wBAAsB,oBAAoB,CAAC,eAAe,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAGjF;AAsCD;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH;;GAEG;AACH,wBAAsB,WAAW,CAC/B,eAAe,EAAE,MAAM,EACvB,qBAAqB,EAAE,MAAM,GAC5B,OAAO,CAAC,cAAc,EAAE,CAAC,CAiG3B;AAMD,MAAM,WAAW,kBAAkB;IACjC,2CAA2C;IAC3C,MAAM,EAAE,MAAM,CAAC;IACf,yBAAyB;IACzB,MAAM,EAAE,MAAM,CAAC;IACf;;;;OAIG;IACH,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI,CAAC;CACpC;AAED;;;;;;;;;GASG;AACH,wBAAgB,oBAAoB,CAClC,IAAI,EAAE,aAAa,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,OAAO,CAAA;CAAE,CAAC,EACvD,UAAU,EAAE,MAAM,GACjB,MAAM,CAoBR;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,wBAAsB,WAAW,CAAC,OAAO,EAAE,kBAAkB,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC,CA8FxF;AAMD;;;GAGG;AACH,wBAAgB,oBAAoB,CAClC,QAAQ,EAAE,MAAM,EAAE,GAAG,SAAS,EAC9B,MAAM,EAAE,MAAM,EAAE,GACf,MAAM,EAAE,CAMV;AAED;;;GAGG;AACH,qBAAa,mBAAmB;IAC9B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAoD;gBAGvE,MAAM,EAAE,MAAM,EACd,IAAI,GAAE;QAAE,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;QAAC,cAAc,CAAC,EAAE,MAAM,CAAA;KAAO;IAM9D;;;;;;;OAOG;IACG,KAAK,IAAI,OAAO,CAAC,MAAM,CAAC;IAQ9B,yEAAyE;IACnE,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;CAC5B;AAED;;;GAGG;AACH,qBAAa,oBAAoB;IAC/B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAA2C;gBAG/D,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI;IAOpC;;;;;;;;OAQG;IACG,KAAK,CACT,MAAM,EAAE,MAAM,EACd,QAAQ,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,GAC7B,OAAO,CAAC,cAAc,EAAE,CAAC;IAQ5B,gEAAgE;IAC1D,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;CAClC;AAMD;;;GAGG;AACH,qBAAa,2BAA2B;IACtC;;;;;OAKG;IACG,KAAK,IAAI,OAAO,CAAC,MAAM,CAAC;IAGxB,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;CAC5B;AAED;;;;;;;;;;GAUG;AACH,qBAAa,4BAA4B;IACvC,QAAQ,CAAC,MAAM,MAAM;IACrB;;;;;OAKG;IACH,OAAO,CAAC,cAAc,CAAc;IAEpC,IAAI,SAAS,IAAI,MAAM,CAEtB;IAEK,KAAK,CACT,MAAM,EAAE,MAAM,GAAG,aAAa,CAAC,cAAc,CAAC,EAC9C,OAAO,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,GAC5B,OAAO,CAAC,cAAc,EAAE,CAAC;IAqCtB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;CAClC;AAQD;;;;;GAKG;AACH,eAAO,MAAM,sBAAsB,+DAejC,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bastani/atomic",
3
- "version": "0.5.27",
3
+ "version": "0.5.28-1",
4
4
  "description": "Configuration management CLI and SDK for coding agents",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -9,10 +9,11 @@
9
9
  * and clean up in `afterEach` so test runs never collide with each other
10
10
  * or with real marker/queue/release files.
11
11
  *
12
- * The hook's default wait for a queued follow-up prompt is 15 minutes.
13
- * Every test here passes a short `waitTimeoutMs` so the hook exits quickly
14
- * when no queue entry is present we are testing the branching logic,
15
- * not the real-world wait budget.
12
+ * The hook's default wait for a queued follow-up prompt is effectively
13
+ * unbounded (~24 days) so the workflow can take as long as it needs between
14
+ * turns. Every test here passes a short `waitTimeoutMs` so the hook exits
15
+ * quickly when no queue entry is present — we are testing the branching
16
+ * logic, not the real-world wait budget.
16
17
  */
17
18
 
18
19
  import { describe, test, expect, afterEach, spyOn } from "bun:test";
@@ -20,7 +21,7 @@ import { access, rm, writeFile, mkdir } from "node:fs/promises";
20
21
  import { join } from "node:path";
21
22
  import { claudeStopHookCommand, claudeHookDirs } from "./claude-stop-hook.ts";
22
23
 
23
- const { marker: markerDir, queue: queueDir, release: releaseDir } = claudeHookDirs();
24
+ const { marker: markerDir, queue: queueDir, release: releaseDir, pid: pidDir } = claudeHookDirs();
24
25
 
25
26
  const SHORT_TIMEOUT_MS = 300;
26
27
 
@@ -52,6 +53,7 @@ afterEach(async () => {
52
53
  rm(join(markerDir, id), { force: true }),
53
54
  rm(join(queueDir, id), { force: true }),
54
55
  rm(join(releaseDir, id), { force: true }),
56
+ rm(join(pidDir, id), { force: true }),
55
57
  ]);
56
58
  }
57
59
  sessionIdsToClean.length = 0;
@@ -268,4 +270,48 @@ describe("claudeStopHookCommand", () => {
268
270
  // No block decision emitted.
269
271
  expect(stdoutChunks.join("")).toBe("");
270
272
  });
273
+
274
+ // 9. Dead atomic PID → hook exits without waiting out the full timeout.
275
+ //
276
+ // Simulates the case where the atomic workflow was SIGKILL'd between
277
+ // turns: the pid file on disk points at a process that no longer exists,
278
+ // so the liveness check should fire and let the hook bail. We pick a
279
+ // deliberately-bogus PID (2^22 - 1) that is almost certainly unused.
280
+ test("dead atomic pid triggers liveness exit before the wait timeout", async () => {
281
+ const sessionId = crypto.randomUUID();
282
+ sessionIdsToClean.push(sessionId);
283
+
284
+ // Find a PID that doesn't currently exist. `process.kill(pid, 0)` throws
285
+ // ESRCH for free PIDs; we scan from a high number downward to dodge
286
+ // system-reserved low PIDs.
287
+ let deadPid = 4_194_303;
288
+ while (deadPid > 1) {
289
+ try {
290
+ process.kill(deadPid, 0);
291
+ deadPid -= 1;
292
+ } catch (e: unknown) {
293
+ if (e instanceof Error && "code" in e && (e as NodeJS.ErrnoException).code === "ESRCH") break;
294
+ deadPid -= 1;
295
+ }
296
+ }
297
+
298
+ await mkdir(pidDir, { recursive: true });
299
+ await writeFile(join(pidDir, sessionId), String(deadPid), "utf-8");
300
+
301
+ mockStdin(JSON.stringify({ session_id: sessionId }));
302
+
303
+ // Use a long wait timeout so the test only passes if the liveness check
304
+ // short-circuits the wait. livenessIntervalMs is short so the test runs fast.
305
+ const started = Date.now();
306
+ const code = await claudeStopHookCommand({
307
+ waitTimeoutMs: 30_000,
308
+ pollIntervalMs: 10_000,
309
+ livenessIntervalMs: 50,
310
+ });
311
+ const elapsed = Date.now() - started;
312
+
313
+ expect(code).toBe(0);
314
+ expect(elapsed).toBeLessThan(5_000);
315
+ expect(await fileExists(join(markerDir, sessionId))).toBe(true);
316
+ });
271
317
  });
@@ -29,6 +29,7 @@
29
29
  */
30
30
 
31
31
  import fs from "node:fs/promises";
32
+ import { watch as watchDir } from "node:fs/promises";
32
33
  import { existsSync } from "node:fs";
33
34
  import path from "node:path";
34
35
  import os from "node:os";
@@ -60,13 +61,25 @@ function isClaudeStopHookPayload(value: unknown): value is ClaudeStopHookPayload
60
61
  *
61
62
  * Exported so tests and `src/sdk/providers/claude.ts` share one source of truth.
62
63
  */
63
- export function claudeHookDirs(): { marker: string; queue: string; release: string; hil: string } {
64
+ export function claudeHookDirs(): {
65
+ marker: string;
66
+ queue: string;
67
+ release: string;
68
+ hil: string;
69
+ pid: string;
70
+ } {
64
71
  const base = path.join(os.homedir(), ".atomic");
65
72
  return {
66
73
  marker: path.join(base, "claude-stop"),
67
74
  queue: path.join(base, "claude-queue"),
68
75
  release: path.join(base, "claude-release"),
69
76
  hil: path.join(base, "claude-hil"),
77
+ // Holds the PID of the atomic workflow process that owns each session.
78
+ // The Stop hook polls `process.kill(pid, 0)` against this value so that
79
+ // if atomic is SIGKILL'd (no chance to write a release marker), the hook
80
+ // can detect the orphaned session and self-exit instead of sitting in
81
+ // its wait loop for ~24 days.
82
+ pid: path.join(base, "claude-pid"),
70
83
  };
71
84
  }
72
85
 
@@ -74,12 +87,103 @@ export function claudeHookDirs(): { marker: string; queue: string; release: stri
74
87
  export interface ClaudeStopHookOptions {
75
88
  /** Maximum time the hook waits for a queued follow-up prompt before letting Claude stop. */
76
89
  waitTimeoutMs?: number;
77
- /** Polling interval for queue/release detection. */
90
+ /**
91
+ * Interval for the polling fallback that runs alongside the `fs.watch`
92
+ * watchers in case an inotify/FSEvent notification gets dropped. In the
93
+ * happy path, watcher events fire on create and the poll never matches.
94
+ */
78
95
  pollIntervalMs?: number;
96
+ /**
97
+ * Interval at which the hook checks whether the atomic workflow process
98
+ * that owns this session is still alive. Coarser than `pollIntervalMs`
99
+ * because atomic crashing is rare and `process.kill(pid, 0)` is a syscall.
100
+ */
101
+ livenessIntervalMs?: number;
79
102
  }
80
103
 
81
- const DEFAULT_WAIT_TIMEOUT_MS = 15 * 60 * 1000;
104
+ /**
105
+ * Effectively-unbounded default wait budget for the queue/release poll loop.
106
+ *
107
+ * The hook holds Claude Code in the Stop phase while the workflow runtime
108
+ * decides what to do next — either enqueueing a follow-up prompt (delivered
109
+ * back to Claude as `{decision:"block", reason:...}`) or writing a release
110
+ * marker on teardown. Any finite default here caps the time the workflow has
111
+ * between turns: when it expires, the hook exits 0, Claude stops, and the
112
+ * next `enqueuePrompt` writes to a file nobody's reading — the workflow
113
+ * hangs on `waitForIdle` for a turn that will never come.
114
+ *
115
+ * The Claude-side hook timeout (see `STOP_HOOK_TIMEOUT_SECONDS` in
116
+ * `src/sdk/providers/claude.ts`) is already set to ~24 days, so matching it
117
+ * here keeps the two bounds aligned — the hook either runs until the
118
+ * workflow releases it or until Claude Code itself gives up. Tests override
119
+ * `waitTimeoutMs` via options to keep runs fast.
120
+ *
121
+ * Expressed in ms: 2_147_483 s × 1000 = 2_147_483_000 ms, just under the
122
+ * max safe `setTimeout` value (2^31 - 1).
123
+ */
124
+ const DEFAULT_WAIT_TIMEOUT_MS = 2_147_483_000;
82
125
  const DEFAULT_POLL_INTERVAL_MS = 100;
126
+ const DEFAULT_LIVENESS_INTERVAL_MS = 5_000;
127
+
128
+ /**
129
+ * Read the atomic PID that owns this session from `~/.atomic/claude-pid/<id>`,
130
+ * or return null if the file is missing / malformed. Missing is fine: older
131
+ * runtimes didn't write one, and we just skip the liveness check in that case.
132
+ */
133
+ async function readAtomicPid(pidFilePath: string): Promise<number | null> {
134
+ let raw: string;
135
+ try {
136
+ raw = await fs.readFile(pidFilePath, "utf-8");
137
+ } catch {
138
+ return null;
139
+ }
140
+ const parsed = Number.parseInt(raw.trim(), 10);
141
+ return Number.isInteger(parsed) && parsed > 0 ? parsed : null;
142
+ }
143
+
144
+ /**
145
+ * Sleep that resolves early when `signal` is aborted. Used by the hook's
146
+ * wait loops so `ac.abort()` unblocks everything immediately instead of
147
+ * waiting for the next wake-up tick — otherwise a task that detects a hit
148
+ * (e.g. liveness check) can't meaningfully cancel its siblings.
149
+ */
150
+ function abortableSleep(ms: number, signal: AbortSignal): Promise<void> {
151
+ return new Promise<void>((resolve) => {
152
+ if (signal.aborted) {
153
+ resolve();
154
+ return;
155
+ }
156
+ const timer = setTimeout(() => {
157
+ signal.removeEventListener("abort", onAbort);
158
+ resolve();
159
+ }, ms);
160
+ const onAbort = (): void => {
161
+ clearTimeout(timer);
162
+ resolve();
163
+ };
164
+ signal.addEventListener("abort", onAbort, { once: true });
165
+ });
166
+ }
167
+
168
+ /**
169
+ * True when a process with `pid` exists. Uses signal `0`, which performs the
170
+ * permission/existence check without delivering a signal. ESRCH means gone,
171
+ * EPERM means alive-but-not-ours (still alive for our purposes).
172
+ */
173
+ function isProcessAlive(pid: number): boolean {
174
+ try {
175
+ process.kill(pid, 0);
176
+ return true;
177
+ } catch (e: unknown) {
178
+ if (e instanceof Error && "code" in e) {
179
+ const code = (e as NodeJS.ErrnoException).code;
180
+ if (code === "EPERM") return true;
181
+ if (code === "ESRCH") return false;
182
+ }
183
+ // Unknown error — assume alive to avoid false-positive teardown.
184
+ return true;
185
+ }
186
+ }
83
187
 
84
188
  /**
85
189
  * Handler for the hidden `_claude-stop-hook` subcommand.
@@ -95,6 +199,8 @@ export async function claudeStopHookCommand(
95
199
  ): Promise<number> {
96
200
  const waitTimeoutMs = options.waitTimeoutMs ?? DEFAULT_WAIT_TIMEOUT_MS;
97
201
  const pollIntervalMs = options.pollIntervalMs ?? DEFAULT_POLL_INTERVAL_MS;
202
+ const livenessIntervalMs =
203
+ options.livenessIntervalMs ?? DEFAULT_LIVENESS_INTERVAL_MS;
98
204
 
99
205
  // 1. Read stdin
100
206
  const raw = await Bun.stdin.text();
@@ -121,9 +227,9 @@ export async function claudeStopHookCommand(
121
227
  // `src/query.ts` → `transition: { reason: 'stop_hook_blocking' }`). In a
122
228
  // multi-turn workflow, every follow-up turn after the first is therefore
123
229
  // invoked with `stop_hook_active=true`. Returning early here would skip the
124
- // marker write, leaving `waitForIdle` hanging until its 15-minute safety
125
- // timeout, and would skip the queue poll so the workflow's next
126
- // `s.session.query(...)` would never reach Claude.
230
+ // marker write, leaving `waitForIdle` hanging forever, and would skip the
231
+ // queue poll so the workflow's next `s.session.query(...)` would never
232
+ // reach Claude.
127
233
  //
128
234
  // Our design doesn't need the generic loop guard: the hook only emits a
129
235
  // `block` decision when the workflow runtime has written a prompt to the
@@ -135,6 +241,7 @@ export async function claudeStopHookCommand(
135
241
  fs.mkdir(dirs.marker, { recursive: true }),
136
242
  fs.mkdir(dirs.queue, { recursive: true }),
137
243
  fs.mkdir(dirs.release, { recursive: true }),
244
+ fs.mkdir(dirs.pid, { recursive: true }),
138
245
  ]);
139
246
 
140
247
  // 4. Write the marker file directly.
@@ -148,7 +255,7 @@ export async function claudeStopHookCommand(
148
255
  const markerPath = path.join(dirs.marker, payload.session_id);
149
256
  await Bun.write(markerPath, raw);
150
257
 
151
- // 5. Block-poll for either a queued follow-up prompt or a release signal.
258
+ // 5. Wait for either a queued follow-up prompt or a release signal.
152
259
  //
153
260
  // The workflow's `waitForIdle` has already been unblocked by the marker
154
261
  // write above and is now returning control to the user's stage callback.
@@ -164,34 +271,130 @@ export async function claudeStopHookCommand(
164
271
  // `~/.atomic/claude-release/<session_id>`. We exit 0 with no stdout
165
272
  // payload and Claude stops as usual.
166
273
  //
167
- // c. Neither happens within `waitTimeoutMs`. We exit 0 on timeout as a
168
- // safety net Claude stops rather than hanging its Stop hook forever.
274
+ // c. Neither happens within `waitTimeoutMs`. We exit 0 so Claude Code
275
+ // doesn't hang past its own per-hook timeout. The production default
276
+ // for `waitTimeoutMs` is aligned with the Claude-side hook timeout
277
+ // (~24 days), so this path is effectively unreachable in real runs —
278
+ // it only fires in tests that pass a short override.
279
+ //
280
+ // Delivery uses `fs.watch` on the queue and release dirs for ~0-latency
281
+ // wake-up on create events, with a slower `existsSync` polling fallback
282
+ // in case a watcher notification gets dropped under fs load (same pattern
283
+ // as `watchHILMarker` in `src/sdk/providers/claude.ts`).
169
284
  const queuePath = path.join(dirs.queue, payload.session_id);
170
285
  const releasePath = path.join(dirs.release, payload.session_id);
171
286
 
172
- const deadline = Date.now() + waitTimeoutMs;
173
- while (Date.now() <= deadline) {
287
+ type Hit = { kind: "release" } | { kind: "queue"; prompt: string };
288
+
289
+ const check = async (): Promise<Hit | null> => {
174
290
  if (existsSync(releasePath)) {
175
291
  try { await fs.unlink(releasePath); } catch { /* ENOENT is fine */ }
176
- return 0;
292
+ return { kind: "release" };
177
293
  }
178
294
  if (existsSync(queuePath)) {
179
295
  let prompt: string;
180
296
  try {
181
297
  prompt = await fs.readFile(queuePath, "utf-8");
182
298
  } catch {
183
- return 0;
299
+ // Treat a failed read as a graceful release so the hook still exits.
300
+ return { kind: "release" };
184
301
  }
185
302
  try { await fs.unlink(queuePath); } catch { /* ENOENT is fine */ }
303
+ return { kind: "queue", prompt };
304
+ }
305
+ return null;
306
+ };
307
+
308
+ const emit = (hit: Hit): number => {
309
+ if (hit.kind === "queue") {
186
310
  process.stdout.write(JSON.stringify({
187
311
  decision: "block",
188
- reason: prompt,
312
+ reason: hit.prompt,
189
313
  }));
190
- return 0;
191
314
  }
192
- await Bun.sleep(pollIntervalMs);
315
+ return 0;
316
+ };
317
+
318
+ // Initial synchronous check — the runtime may have enqueued/released before
319
+ // we attached watchers, and without this the hook could hang until the
320
+ // polling fallback fires.
321
+ const early = await check();
322
+ if (early) return emit(early);
323
+
324
+ const ac = new AbortController();
325
+ const overallTimer = setTimeout(() => ac.abort(), waitTimeoutMs);
326
+ let hit: Hit | null = null;
327
+
328
+ // Read the atomic workflow's PID (if the runtime wrote one for this
329
+ // session). Used by the liveness task below to detect an atomic crash.
330
+ const atomicPid = await readAtomicPid(
331
+ path.join(dirs.pid, payload.session_id),
332
+ );
333
+
334
+ // Watch a single directory for change events and resolve `hit` on the
335
+ // first one that matches. `event.filename` is unreliable across OSes
336
+ // (see the comment in `watchHILMarker`), so disk state is authoritative.
337
+ const runWatcher = async (dir: string): Promise<void> => {
338
+ try {
339
+ for await (const _event of watchDir(dir, { signal: ac.signal })) {
340
+ const result = await check();
341
+ if (result) {
342
+ hit = result;
343
+ ac.abort();
344
+ return;
345
+ }
346
+ }
347
+ } catch (e: unknown) {
348
+ if (!(e instanceof Error && e.name === "AbortError")) throw e;
349
+ }
350
+ };
351
+
352
+ // Polling fallback — catches the rare dropped inotify/FSEvent event.
353
+ // Only runs while the watchers are live; `ac.abort()` shuts it down.
354
+ const runPollFallback = async (): Promise<void> => {
355
+ while (!ac.signal.aborted) {
356
+ await abortableSleep(pollIntervalMs, ac.signal);
357
+ if (ac.signal.aborted) return;
358
+ const result = await check();
359
+ if (result) {
360
+ hit = result;
361
+ ac.abort();
362
+ return;
363
+ }
364
+ }
365
+ };
366
+
367
+ // Liveness check — if the atomic workflow process died without writing a
368
+ // release marker (e.g. SIGKILL), this task abandons the wait and lets
369
+ // Claude stop. No-op when there's no pid file (older sessions or non-
370
+ // runtime spawns) so the hook still functions standalone.
371
+ const runLivenessCheck = async (): Promise<void> => {
372
+ if (atomicPid === null) return;
373
+ while (!ac.signal.aborted) {
374
+ await abortableSleep(livenessIntervalMs, ac.signal);
375
+ if (ac.signal.aborted) return;
376
+ if (!isProcessAlive(atomicPid)) {
377
+ // hit stays null → the hook exits 0 without emitting a block decision.
378
+ ac.abort();
379
+ return;
380
+ }
381
+ }
382
+ };
383
+
384
+ try {
385
+ await Promise.all([
386
+ runWatcher(dirs.queue),
387
+ runWatcher(dirs.release),
388
+ runPollFallback(),
389
+ runLivenessCheck(),
390
+ ]);
391
+ } finally {
392
+ clearTimeout(overallTimer);
393
+ ac.abort();
193
394
  }
194
395
 
396
+ if (hit) return emit(hit);
397
+
195
398
  // Timeout — no queued prompt arrived. Let Claude stop normally.
196
399
  return 0;
197
400
  }
@@ -71,6 +71,12 @@ export async function clearClaudeSession(paneId: string): Promise<void> {
71
71
  // Best-effort — if release fails the hook will still exit on its
72
72
  // own safety timeout.
73
73
  }
74
+ try {
75
+ await unlinkAtomicPidFile(state.claudeSessionId);
76
+ } catch {
77
+ // Best-effort — stale pid file is inert; the next session writes a
78
+ // fresh one under its own UUID.
79
+ }
74
80
  }
75
81
  initializedPanes.delete(paneId);
76
82
  }
@@ -258,6 +264,12 @@ export async function createClaudeSession(options: ClaudeSessionOptions): Promis
258
264
  chatFlags,
259
265
  readyTimeoutMs,
260
266
  });
267
+
268
+ // Write our PID so the Stop hook can detect an orphaned session if we
269
+ // crash/get SIGKILL'd without running teardown. Best-effort; failures just
270
+ // mean the hook falls back to waiting out Claude's own hook timeout.
271
+ await writeAtomicPidFile(claudeSessionId);
272
+
261
273
  return claudeSessionId;
262
274
  }
263
275
 
@@ -609,6 +621,38 @@ export async function releaseClaudeSession(claudeSessionId: string): Promise<voi
609
621
  await writeFile(releasePath(claudeSessionId), "");
610
622
  }
611
623
 
624
+ /** @internal */
625
+ function pidDir(): string {
626
+ return claudeHookDirs().pid;
627
+ }
628
+
629
+ /** @internal */
630
+ function pidFilePath(claudeSessionId: string): string {
631
+ return join(pidDir(), claudeSessionId);
632
+ }
633
+
634
+ /**
635
+ * Write `process.pid` to `~/.atomic/claude-pid/<session_id>` so the Stop hook
636
+ * can use it as a liveness signal. If atomic is SIGKILL'd (no chance to run
637
+ * `clearClaudeSession`), the hook detects the dead PID via `process.kill(..,0)`
638
+ * and self-exits instead of parking Claude for the full 24-day timeout.
639
+ */
640
+ async function writeAtomicPidFile(claudeSessionId: string): Promise<void> {
641
+ await mkdir(pidDir(), { recursive: true });
642
+ await writeFile(pidFilePath(claudeSessionId), String(process.pid), "utf-8");
643
+ }
644
+
645
+ /** Remove the pid file for a session. Idempotent — ENOENT is swallowed. */
646
+ async function unlinkAtomicPidFile(claudeSessionId: string): Promise<void> {
647
+ try {
648
+ await unlink(pidFilePath(claudeSessionId));
649
+ } catch (e: unknown) {
650
+ if (!(e instanceof Error && "code" in e && (e as NodeJS.ErrnoException).code === "ENOENT")) {
651
+ throw e;
652
+ }
653
+ }
654
+ }
655
+
612
656
  // ---------------------------------------------------------------------------
613
657
  // Idle detection via marker file watch
614
658
  // ---------------------------------------------------------------------------
@@ -641,11 +685,6 @@ export async function releaseClaudeSession(claudeSessionId: string): Promise<voi
641
685
  * @param claudeSessionId - Claude's session UUID (used to identify marker file)
642
686
  * @param transcriptBeforeCount - number of messages in transcript before this turn
643
687
  */
644
- /** Safety timeout so the workflow's next stage still fires if the Stop hook
645
- * never runs (misconfigured settings, killed Claude process, etc.). 15 min
646
- * covers any reasonable single-turn run without hanging forever. */
647
- const IDLE_TIMEOUT_MS = 15 * 60 * 1000;
648
-
649
688
  /**
650
689
  * @internal Exported for unit tests.
651
690
  */
@@ -658,7 +697,6 @@ export async function waitForIdle(
658
697
  const sessionId = claudeSessionId;
659
698
  const target = markerPath(sessionId);
660
699
  const ac = new AbortController();
661
- const timeout = setTimeout(() => ac.abort(), IDLE_TIMEOUT_MS);
662
700
 
663
701
  // Process a marker that has appeared on disk. Returns a tuple:
664
702
  // [resolved, result] — when resolved=true, waitForIdle should return.
@@ -743,13 +781,10 @@ export async function waitForIdle(
743
781
  }
744
782
  }
745
783
  } catch (e: unknown) {
746
- // AbortError is expected when we call ac.abort() to stop watching, or
747
- // when the safety timeout fires.
784
+ // AbortError is expected when we call ac.abort() to stop watching.
748
785
  if (!(e instanceof Error && e.name === "AbortError")) {
749
786
  throw e;
750
787
  }
751
- } finally {
752
- clearTimeout(timeout);
753
788
  }
754
789
 
755
790
  return [];