npm - @bastani/atomic - Versions diffs - 0.5.27 → 0.5.28-1 - Mend

@bastani/atomic 0.5.27 → 0.5.28-1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/commands/cli/claude-stop-hook.d.ts +12 -1
package/dist/commands/cli/claude-stop-hook.d.ts.map +1 -1
package/dist/sdk/providers/claude.d.ts +28 -0
package/dist/sdk/providers/claude.d.ts.map +1 -1
package/package.json +1 -1
package/src/commands/cli/claude-stop-hook.test.ts +51 -5
package/src/commands/cli/claude-stop-hook.ts +219 -16
package/src/sdk/providers/claude.ts +45 -10

package/dist/commands/cli/claude-stop-hook.d.ts CHANGED Viewed

@@ -45,13 +45,24 @@ export declare function claudeHookDirs(): {
     queue: string;
     release: string;
     hil: string;
+    pid: string;
 };
 /** Options for {@link claudeStopHookCommand}. Primarily used by tests to shrink the wait budget. */
 export interface ClaudeStopHookOptions {
     /** Maximum time the hook waits for a queued follow-up prompt before letting Claude stop. */
     waitTimeoutMs?: number;
-    /** Polling interval for queue/release detection. */
+    /**
+     * Interval for the polling fallback that runs alongside the `fs.watch`
+     * watchers in case an inotify/FSEvent notification gets dropped. In the
+     * happy path, watcher events fire on create and the poll never matches.
+     */
     pollIntervalMs?: number;
+    /**
+     * Interval at which the hook checks whether the atomic workflow process
+     * that owns this session is still alive. Coarser than `pollIntervalMs`
+     * because atomic crashing is rare and `process.kill(pid, 0)` is a syscall.
+     */
+    livenessIntervalMs?: number;
 }
 /**
  * Handler for the hidden `_claude-stop-hook` subcommand.

package/dist/commands/cli/claude-stop-hook.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"claude-stop-hook.d.ts","sourceRoot":"","sources":["../../../src/commands/cli/claude-stop-hook.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;~~AAOH~~,yEAAyE;AACzE,MAAM,WAAW,qBAAqB;IACpC,UAAU,EAAE,MAAM,CAAC;IACnB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B;AAeD;;;;;GAKG;AACH,wBAAgB,cAAc,IAAI;~~IAAE~~,MAAM,EAAE,MAAM,CAAC;~~IAAC~~,KAAK,EAAE,MAAM,CAAC;~~IAAC~~,OAAO,EAAE,MAAM,CAAC;~~IAAC~~,GAAG,EAAE,MAAM,~~CAAA~~;~~CAAE~~,~~CAQhG~~;AAED,oGAAoG;AACpG,MAAM,WAAW,qBAAqB;IACpC,4FAA4F;IAC5F,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB~~,oDAAoD~~;~~IACpD~~,cAAc,CAAC,EAAE,MAAM,CAAC;~~CACzB~~;~~AAKD~~;;;;;;;;GAQG;AACH,wBAAsB,qBAAqB,CACzC,OAAO,GAAE,qBAA0B,GAClC,OAAO,CAAC,MAAM,CAAC,~~CAsGjB~~"}
1	+ {"version":3,"file":"claude-stop-hook.d.ts","sourceRoot":"","sources":["../../../src/commands/cli/claude-stop-hook.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAQH,yEAAyE;AACzE,MAAM,WAAW,qBAAqB;IACpC,UAAU,EAAE,MAAM,CAAC;IACnB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B;AAeD;;;;;GAKG;AACH,wBAAgB,cAAc,IAAI;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;CACb,CAcA;AAED,oGAAoG;AACpG,MAAM,WAAW,qBAAqB;IACpC,4FAA4F;IAC5F,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB;;;;OAIG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;;;;OAIG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAsFD;;;;;;;;GAQG;AACH,wBAAsB,qBAAqB,CACzC,OAAO,GAAE,qBAA0B,GAClC,OAAO,CAAC,MAAM,CAAC,CAyMjB"}

package/dist/sdk/providers/claude.d.ts CHANGED Viewed

@@ -133,6 +133,34 @@ export declare function releasePath(claudeSessionId: string): string;
  * Safe to call more than once.
  */
 export declare function releaseClaudeSession(claudeSessionId: string): Promise<void>;
+/**
+ * Wait for the Claude session to become idle using `fs.watch` on the
+ * `~/.atomic/claude-stop/` marker directory.
+ *
+ * When Claude finishes a turn, the `atomic _claude-stop-hook` Stop hook writes
+ * `~/.atomic/claude-stop/<session_id>`. The write triggers an OS-native
+ * `fs.watch` event on the parent directory — far more reliable than polling
+ * tmux pane glyphs, which vary between Claude Code versions.
+ *
+ * This function is strictly about *idle detection*. HIL is detected separately
+ * by {@link watchHILMarker}; the Stop hook does not fire while
+ * `AskUserQuestion` is pending (the agent loop blocks on deferred tools), so
+ * mixing the two would silently miss the HIL window.
+ *
+ * Algorithm:
+ * 1. Attach the directory watcher, then check for the marker file on disk —
+ *    this closes the race where the Stop hook fires between prompt submission
+ *    and watcher attach.
+ * 2. On any event, re-check the marker file on disk (we intentionally do NOT
+ *    filter by `event.filename`, because on Linux a write can deliver multiple
+ *    events with varying filenames and editor tools may race us).
+ * 3. Read the session transcript via `getSessionMessages` and slice messages
+ *    from `transcriptBeforeCount`.
+ * 4. Clean up the `fs.watch` watcher on any exit path via AbortController.
+ *
+ * @param claudeSessionId       - Claude's session UUID (used to identify marker file)
+ * @param transcriptBeforeCount - number of messages in transcript before this turn
+ */
 /**
  * @internal Exported for unit tests.
  */

package/dist/sdk/providers/claude.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"claude.d.ts","sourceRoot":"","sources":["../../../src/sdk/providers/claude.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,OAAO,EAGL,KAAK,cAAc,EACnB,KAAK,cAAc,EACnB,KAAK,OAAO,IAAI,UAAU,EAC3B,MAAM,gCAAgC,CAAC;AAgCxC;;;;;;GAMG;AACH,wBAAsB,kBAAkB,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,~~CAWtE~~;AAqID,MAAM,WAAW,oBAAoB;IACnC,kDAAkD;IAClD,MAAM,EAAE,MAAM,CAAC;IACf,sIAAsI;IACtI,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,sEAAsE;IACtE,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,wBAAsB,mBAAmB,CAAC,OAAO,EAAE,oBAAoB,GAAG,OAAO,CAAC,MAAM,CAAC,~~CAexF~~;AAsID;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAgB,eAAe,CAAC,QAAQ,EAAE,cAAc,EAAE,GAAG,OAAO,CAUnE;AAED;;;;;;;;;;;;GAYG;AACH,wBAAsB,cAAc,CAClC,eAAe,EAAE,MAAM,EACvB,KAAK,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI,EACjC,MAAM,EAAE,WAAW,GAClB,OAAO,CAAC,IAAI,CAAC,CAyCf;AAMD;;;;;;GAMG;AACH,wBAAgB,SAAS,IAAI,MAAM,CAElC;AAED;;;;GAIG;AACH,wBAAgB,UAAU,CAAC,eAAe,EAAE,MAAM,GAAG,MAAM,CAE1D;AAED;;;;GAIG;AACH,wBAAgB,QAAQ,IAAI,MAAM,CAEjC;AAED,0EAA0E;AAC1E,wBAAgB,SAAS,CAAC,eAAe,EAAE,MAAM,GAAG,MAAM,CAEzD;AAED;;;;;GAKG;AACH,wBAAgB,UAAU,IAAI,MAAM,CAEnC;AAED,4EAA4E;AAC5E,wBAAgB,WAAW,CAAC,eAAe,EAAE,MAAM,GAAG,MAAM,CAE3D;AAiED;;;;GAIG;AACH,wBAAsB,oBAAoB,CAAC,eAAe,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAGjF;~~AAuCD~~;;GAEG;AACH,wBAAsB,WAAW,CAC/B,eAAe,EAAE,MAAM,EACvB,qBAAqB,EAAE,MAAM,GAC5B,OAAO,CAAC,cAAc,EAAE,CAAC,~~CAqG3B~~;AAMD,MAAM,WAAW,kBAAkB;IACjC,2CAA2C;IAC3C,MAAM,EAAE,MAAM,CAAC;IACf,yBAAyB;IACzB,MAAM,EAAE,MAAM,CAAC;IACf;;;;OAIG;IACH,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI,CAAC;CACpC;AAED;;;;;;;;;GASG;AACH,wBAAgB,oBAAoB,CAClC,IAAI,EAAE,aAAa,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,OAAO,CAAA;CAAE,CAAC,EACvD,UAAU,EAAE,MAAM,GACjB,MAAM,CAoBR;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,wBAAsB,WAAW,CAAC,OAAO,EAAE,kBAAkB,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC,CA8FxF;AAMD;;;GAGG;AACH,wBAAgB,oBAAoB,CAClC,QAAQ,EAAE,MAAM,EAAE,GAAG,SAAS,EAC9B,MAAM,EAAE,MAAM,EAAE,GACf,MAAM,EAAE,CAMV;AAED;;;GAGG;AACH,qBAAa,mBAAmB;IAC9B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAoD;gBAGvE,MAAM,EAAE,MAAM,EACd,IAAI,GAAE;QAAE,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;QAAC,cAAc,CAAC,EAAE,MAAM,CAAA;KAAO;IAM9D;;;;;;;OAOG;IACG,KAAK,IAAI,OAAO,CAAC,MAAM,CAAC;IAQ9B,yEAAyE;IACnE,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;CAC5B;AAED;;;GAGG;AACH,qBAAa,oBAAoB;IAC/B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAA2C;gBAG/D,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI;IAOpC;;;;;;;;OAQG;IACG,KAAK,CACT,MAAM,EAAE,MAAM,EACd,QAAQ,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,GAC7B,OAAO,CAAC,cAAc,EAAE,CAAC;IAQ5B,gEAAgE;IAC1D,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;CAClC;AAMD;;;GAGG;AACH,qBAAa,2BAA2B;IACtC;;;;;OAKG;IACG,KAAK,IAAI,OAAO,CAAC,MAAM,CAAC;IAGxB,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;CAC5B;AAED;;;;;;;;;;GAUG;AACH,qBAAa,4BAA4B;IACvC,QAAQ,CAAC,MAAM,MAAM;IACrB;;;;;OAKG;IACH,OAAO,CAAC,cAAc,CAAc;IAEpC,IAAI,SAAS,IAAI,MAAM,CAEtB;IAEK,KAAK,CACT,MAAM,EAAE,MAAM,GAAG,aAAa,CAAC,cAAc,CAAC,EAC9C,OAAO,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,GAC5B,OAAO,CAAC,cAAc,EAAE,CAAC;IAqCtB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;CAClC;AAQD;;;;;GAKG;AACH,eAAO,MAAM,sBAAsB,+DAejC,CAAC"}
1	+ {"version":3,"file":"claude.d.ts","sourceRoot":"","sources":["../../../src/sdk/providers/claude.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,OAAO,EAGL,KAAK,cAAc,EACnB,KAAK,cAAc,EACnB,KAAK,OAAO,IAAI,UAAU,EAC3B,MAAM,gCAAgC,CAAC;AAgCxC;;;;;;GAMG;AACH,wBAAsB,kBAAkB,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAiBtE;AAqID,MAAM,WAAW,oBAAoB;IACnC,kDAAkD;IAClD,MAAM,EAAE,MAAM,CAAC;IACf,sIAAsI;IACtI,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,sEAAsE;IACtE,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,wBAAsB,mBAAmB,CAAC,OAAO,EAAE,oBAAoB,GAAG,OAAO,CAAC,MAAM,CAAC,CAqBxF;AAsID;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAgB,eAAe,CAAC,QAAQ,EAAE,cAAc,EAAE,GAAG,OAAO,CAUnE;AAED;;;;;;;;;;;;GAYG;AACH,wBAAsB,cAAc,CAClC,eAAe,EAAE,MAAM,EACvB,KAAK,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI,EACjC,MAAM,EAAE,WAAW,GAClB,OAAO,CAAC,IAAI,CAAC,CAyCf;AAMD;;;;;;GAMG;AACH,wBAAgB,SAAS,IAAI,MAAM,CAElC;AAED;;;;GAIG;AACH,wBAAgB,UAAU,CAAC,eAAe,EAAE,MAAM,GAAG,MAAM,CAE1D;AAED;;;;GAIG;AACH,wBAAgB,QAAQ,IAAI,MAAM,CAEjC;AAED,0EAA0E;AAC1E,wBAAgB,SAAS,CAAC,eAAe,EAAE,MAAM,GAAG,MAAM,CAEzD;AAED;;;;;GAKG;AACH,wBAAgB,UAAU,IAAI,MAAM,CAEnC;AAED,4EAA4E;AAC5E,wBAAgB,WAAW,CAAC,eAAe,EAAE,MAAM,GAAG,MAAM,CAE3D;AAiED;;;;GAIG;AACH,wBAAsB,oBAAoB,CAAC,eAAe,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAGjF;AAsCD;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH;;GAEG;AACH,wBAAsB,WAAW,CAC/B,eAAe,EAAE,MAAM,EACvB,qBAAqB,EAAE,MAAM,GAC5B,OAAO,CAAC,cAAc,EAAE,CAAC,CAiG3B;AAMD,MAAM,WAAW,kBAAkB;IACjC,2CAA2C;IAC3C,MAAM,EAAE,MAAM,CAAC;IACf,yBAAyB;IACzB,MAAM,EAAE,MAAM,CAAC;IACf;;;;OAIG;IACH,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI,CAAC;CACpC;AAED;;;;;;;;;GASG;AACH,wBAAgB,oBAAoB,CAClC,IAAI,EAAE,aAAa,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,OAAO,CAAA;CAAE,CAAC,EACvD,UAAU,EAAE,MAAM,GACjB,MAAM,CAoBR;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,wBAAsB,WAAW,CAAC,OAAO,EAAE,kBAAkB,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC,CA8FxF;AAMD;;;GAGG;AACH,wBAAgB,oBAAoB,CAClC,QAAQ,EAAE,MAAM,EAAE,GAAG,SAAS,EAC9B,MAAM,EAAE,MAAM,EAAE,GACf,MAAM,EAAE,CAMV;AAED;;;GAGG;AACH,qBAAa,mBAAmB;IAC9B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAoD;gBAGvE,MAAM,EAAE,MAAM,EACd,IAAI,GAAE;QAAE,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;QAAC,cAAc,CAAC,EAAE,MAAM,CAAA;KAAO;IAM9D;;;;;;;OAOG;IACG,KAAK,IAAI,OAAO,CAAC,MAAM,CAAC;IAQ9B,yEAAyE;IACnE,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;CAC5B;AAED;;;GAGG;AACH,qBAAa,oBAAoB;IAC/B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAA2C;gBAG/D,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI;IAOpC;;;;;;;;OAQG;IACG,KAAK,CACT,MAAM,EAAE,MAAM,EACd,QAAQ,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,GAC7B,OAAO,CAAC,cAAc,EAAE,CAAC;IAQ5B,gEAAgE;IAC1D,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;CAClC;AAMD;;;GAGG;AACH,qBAAa,2BAA2B;IACtC;;;;;OAKG;IACG,KAAK,IAAI,OAAO,CAAC,MAAM,CAAC;IAGxB,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;CAC5B;AAED;;;;;;;;;;GAUG;AACH,qBAAa,4BAA4B;IACvC,QAAQ,CAAC,MAAM,MAAM;IACrB;;;;;OAKG;IACH,OAAO,CAAC,cAAc,CAAc;IAEpC,IAAI,SAAS,IAAI,MAAM,CAEtB;IAEK,KAAK,CACT,MAAM,EAAE,MAAM,GAAG,aAAa,CAAC,cAAc,CAAC,EAC9C,OAAO,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,GAC5B,OAAO,CAAC,cAAc,EAAE,CAAC;IAqCtB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;CAClC;AAQD;;;;;GAKG;AACH,eAAO,MAAM,sBAAsB,+DAejC,CAAC"}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@bastani/atomic",
-  "version": "0.5.27",
+  "version": "0.5.28-1",
   "description": "Configuration management CLI and SDK for coding agents",
   "type": "module",
   "license": "MIT",

package/src/commands/cli/claude-stop-hook.test.ts CHANGED Viewed

@@ -9,10 +9,11 @@
  * and clean up in `afterEach` so test runs never collide with each other
  * or with real marker/queue/release files.
  *
- * The hook's default wait for a queued follow-up prompt is 15 minutes.
- * Every test here passes a short `waitTimeoutMs` so the hook exits quickly
- * when no queue entry is present — we are testing the branching logic,
- * not the real-world wait budget.
+ * The hook's default wait for a queued follow-up prompt is effectively
+ * unbounded (~24 days) so the workflow can take as long as it needs between
+ * turns. Every test here passes a short `waitTimeoutMs` so the hook exits
+ * quickly when no queue entry is present — we are testing the branching
+ * logic, not the real-world wait budget.
  */
 import { describe, test, expect, afterEach, spyOn } from "bun:test";
@@ -20,7 +21,7 @@ import { access, rm, writeFile, mkdir } from "node:fs/promises";
 import { join } from "node:path";
 import { claudeStopHookCommand, claudeHookDirs } from "./claude-stop-hook.ts";
-const { marker: markerDir, queue: queueDir, release: releaseDir } = claudeHookDirs();
+const { marker: markerDir, queue: queueDir, release: releaseDir, pid: pidDir } = claudeHookDirs();
 const SHORT_TIMEOUT_MS = 300;
@@ -52,6 +53,7 @@ afterEach(async () => {
       rm(join(markerDir, id), { force: true }),
       rm(join(queueDir, id), { force: true }),
       rm(join(releaseDir, id), { force: true }),
+      rm(join(pidDir, id), { force: true }),
     ]);
   }
   sessionIdsToClean.length = 0;
@@ -268,4 +270,48 @@ describe("claudeStopHookCommand", () => {
     // No block decision emitted.
     expect(stdoutChunks.join("")).toBe("");
   });
+  // 9. Dead atomic PID → hook exits without waiting out the full timeout.
+  //
+  // Simulates the case where the atomic workflow was SIGKILL'd between
+  // turns: the pid file on disk points at a process that no longer exists,
+  // so the liveness check should fire and let the hook bail. We pick a
+  // deliberately-bogus PID (2^22 - 1) that is almost certainly unused.
+  test("dead atomic pid triggers liveness exit before the wait timeout", async () => {
+    const sessionId = crypto.randomUUID();
+    sessionIdsToClean.push(sessionId);
+    // Find a PID that doesn't currently exist. `process.kill(pid, 0)` throws
+    // ESRCH for free PIDs; we scan from a high number downward to dodge
+    // system-reserved low PIDs.
+    let deadPid = 4_194_303;
+    while (deadPid > 1) {
+      try {
+        process.kill(deadPid, 0);
+        deadPid -= 1;
+      } catch (e: unknown) {
+        if (e instanceof Error && "code" in e && (e as NodeJS.ErrnoException).code === "ESRCH") break;
+        deadPid -= 1;
+      }
+    }
+    await mkdir(pidDir, { recursive: true });
+    await writeFile(join(pidDir, sessionId), String(deadPid), "utf-8");
+    mockStdin(JSON.stringify({ session_id: sessionId }));
+    // Use a long wait timeout so the test only passes if the liveness check
+    // short-circuits the wait. livenessIntervalMs is short so the test runs fast.
+    const started = Date.now();
+    const code = await claudeStopHookCommand({
+      waitTimeoutMs: 30_000,
+      pollIntervalMs: 10_000,
+      livenessIntervalMs: 50,
+    });
+    const elapsed = Date.now() - started;
+    expect(code).toBe(0);
+    expect(elapsed).toBeLessThan(5_000);
+    expect(await fileExists(join(markerDir, sessionId))).toBe(true);
+  });
 });

package/src/commands/cli/claude-stop-hook.ts CHANGED Viewed

@@ -29,6 +29,7 @@
  */
 import fs from "node:fs/promises";
+import { watch as watchDir } from "node:fs/promises";
 import { existsSync } from "node:fs";
 import path from "node:path";
 import os from "node:os";
@@ -60,13 +61,25 @@ function isClaudeStopHookPayload(value: unknown): value is ClaudeStopHookPayload
  *
  * Exported so tests and `src/sdk/providers/claude.ts` share one source of truth.
  */
-export function claudeHookDirs(): { marker: string; queue: string; release: string; hil: string } {
+export function claudeHookDirs(): {
+  marker: string;
+  queue: string;
+  release: string;
+  hil: string;
+  pid: string;
+} {
   const base = path.join(os.homedir(), ".atomic");
   return {
     marker: path.join(base, "claude-stop"),
     queue: path.join(base, "claude-queue"),
     release: path.join(base, "claude-release"),
     hil: path.join(base, "claude-hil"),
+    // Holds the PID of the atomic workflow process that owns each session.
+    // The Stop hook polls `process.kill(pid, 0)` against this value so that
+    // if atomic is SIGKILL'd (no chance to write a release marker), the hook
+    // can detect the orphaned session and self-exit instead of sitting in
+    // its wait loop for ~24 days.
+    pid: path.join(base, "claude-pid"),
   };
 }
@@ -74,12 +87,103 @@ export function claudeHookDirs(): { marker: string; queue: string; release: stri
 export interface ClaudeStopHookOptions {
   /** Maximum time the hook waits for a queued follow-up prompt before letting Claude stop. */
   waitTimeoutMs?: number;
-  /** Polling interval for queue/release detection. */
+  /**
+   * Interval for the polling fallback that runs alongside the `fs.watch`
+   * watchers in case an inotify/FSEvent notification gets dropped. In the
+   * happy path, watcher events fire on create and the poll never matches.
+   */
   pollIntervalMs?: number;
+  /**
+   * Interval at which the hook checks whether the atomic workflow process
+   * that owns this session is still alive. Coarser than `pollIntervalMs`
+   * because atomic crashing is rare and `process.kill(pid, 0)` is a syscall.
+   */
+  livenessIntervalMs?: number;
 }
-const DEFAULT_WAIT_TIMEOUT_MS = 15 * 60 * 1000;
+/**
+ * Effectively-unbounded default wait budget for the queue/release poll loop.
+ *
+ * The hook holds Claude Code in the Stop phase while the workflow runtime
+ * decides what to do next — either enqueueing a follow-up prompt (delivered
+ * back to Claude as `{decision:"block", reason:...}`) or writing a release
+ * marker on teardown. Any finite default here caps the time the workflow has
+ * between turns: when it expires, the hook exits 0, Claude stops, and the
+ * next `enqueuePrompt` writes to a file nobody's reading — the workflow
+ * hangs on `waitForIdle` for a turn that will never come.
+ *
+ * The Claude-side hook timeout (see `STOP_HOOK_TIMEOUT_SECONDS` in
+ * `src/sdk/providers/claude.ts`) is already set to ~24 days, so matching it
+ * here keeps the two bounds aligned — the hook either runs until the
+ * workflow releases it or until Claude Code itself gives up. Tests override
+ * `waitTimeoutMs` via options to keep runs fast.
+ *
+ * Expressed in ms: 2_147_483 s × 1000 = 2_147_483_000 ms, just under the
+ * max safe `setTimeout` value (2^31 - 1).
+ */
+const DEFAULT_WAIT_TIMEOUT_MS = 2_147_483_000;
 const DEFAULT_POLL_INTERVAL_MS = 100;
+const DEFAULT_LIVENESS_INTERVAL_MS = 5_000;
+/**
+ * Read the atomic PID that owns this session from `~/.atomic/claude-pid/<id>`,
+ * or return null if the file is missing / malformed. Missing is fine: older
+ * runtimes didn't write one, and we just skip the liveness check in that case.
+ */
+async function readAtomicPid(pidFilePath: string): Promise<number | null> {
+  let raw: string;
+  try {
+    raw = await fs.readFile(pidFilePath, "utf-8");
+  } catch {
+    return null;
+  }
+  const parsed = Number.parseInt(raw.trim(), 10);
+  return Number.isInteger(parsed) && parsed > 0 ? parsed : null;
+}
+/**
+ * Sleep that resolves early when `signal` is aborted. Used by the hook's
+ * wait loops so `ac.abort()` unblocks everything immediately instead of
+ * waiting for the next wake-up tick — otherwise a task that detects a hit
+ * (e.g. liveness check) can't meaningfully cancel its siblings.
+ */
+function abortableSleep(ms: number, signal: AbortSignal): Promise<void> {
+  return new Promise<void>((resolve) => {
+    if (signal.aborted) {
+      resolve();
+      return;
+    }
+    const timer = setTimeout(() => {
+      signal.removeEventListener("abort", onAbort);
+      resolve();
+    }, ms);
+    const onAbort = (): void => {
+      clearTimeout(timer);
+      resolve();
+    };
+    signal.addEventListener("abort", onAbort, { once: true });
+  });
+}
+/**
+ * True when a process with `pid` exists. Uses signal `0`, which performs the
+ * permission/existence check without delivering a signal. ESRCH means gone,
+ * EPERM means alive-but-not-ours (still alive for our purposes).
+ */
+function isProcessAlive(pid: number): boolean {
+  try {
+    process.kill(pid, 0);
+    return true;
+  } catch (e: unknown) {
+    if (e instanceof Error && "code" in e) {
+      const code = (e as NodeJS.ErrnoException).code;
+      if (code === "EPERM") return true;
+      if (code === "ESRCH") return false;
+    }
+    // Unknown error — assume alive to avoid false-positive teardown.
+    return true;
+  }
+}
 /**
  * Handler for the hidden `_claude-stop-hook` subcommand.
@@ -95,6 +199,8 @@ export async function claudeStopHookCommand(
 ): Promise<number> {
   const waitTimeoutMs = options.waitTimeoutMs ?? DEFAULT_WAIT_TIMEOUT_MS;
   const pollIntervalMs = options.pollIntervalMs ?? DEFAULT_POLL_INTERVAL_MS;
+  const livenessIntervalMs =
+    options.livenessIntervalMs ?? DEFAULT_LIVENESS_INTERVAL_MS;
   // 1. Read stdin
   const raw = await Bun.stdin.text();
@@ -121,9 +227,9 @@ export async function claudeStopHookCommand(
   // `src/query.ts` → `transition: { reason: 'stop_hook_blocking' }`). In a
   // multi-turn workflow, every follow-up turn after the first is therefore
   // invoked with `stop_hook_active=true`. Returning early here would skip the
-  // marker write, leaving `waitForIdle` hanging until its 15-minute safety
-  // timeout, and would skip the queue poll so the workflow's next
-  // `s.session.query(...)` would never reach Claude.
+  // marker write, leaving `waitForIdle` hanging forever, and would skip the
+  // queue poll so the workflow's next `s.session.query(...)` would never
+  // reach Claude.
   //
   // Our design doesn't need the generic loop guard: the hook only emits a
   // `block` decision when the workflow runtime has written a prompt to the
@@ -135,6 +241,7 @@ export async function claudeStopHookCommand(
     fs.mkdir(dirs.marker, { recursive: true }),
     fs.mkdir(dirs.queue, { recursive: true }),
     fs.mkdir(dirs.release, { recursive: true }),
+    fs.mkdir(dirs.pid, { recursive: true }),
   ]);
   // 4. Write the marker file directly.
@@ -148,7 +255,7 @@ export async function claudeStopHookCommand(
   const markerPath = path.join(dirs.marker, payload.session_id);
   await Bun.write(markerPath, raw);
-  // 5. Block-poll for either a queued follow-up prompt or a release signal.
+  // 5. Wait for either a queued follow-up prompt or a release signal.
   //
   // The workflow's `waitForIdle` has already been unblocked by the marker
   // write above and is now returning control to the user's stage callback.
@@ -164,34 +271,130 @@ export async function claudeStopHookCommand(
   //      `~/.atomic/claude-release/<session_id>`. We exit 0 with no stdout
   //      payload and Claude stops as usual.
   //
-  //   c. Neither happens within `waitTimeoutMs`. We exit 0 on timeout as a
-  //      safety net — Claude stops rather than hanging its Stop hook forever.
+  //   c. Neither happens within `waitTimeoutMs`. We exit 0 so Claude Code
+  //      doesn't hang past its own per-hook timeout. The production default
+  //      for `waitTimeoutMs` is aligned with the Claude-side hook timeout
+  //      (~24 days), so this path is effectively unreachable in real runs —
+  //      it only fires in tests that pass a short override.
+  //
+  // Delivery uses `fs.watch` on the queue and release dirs for ~0-latency
+  // wake-up on create events, with a slower `existsSync` polling fallback
+  // in case a watcher notification gets dropped under fs load (same pattern
+  // as `watchHILMarker` in `src/sdk/providers/claude.ts`).
   const queuePath = path.join(dirs.queue, payload.session_id);
   const releasePath = path.join(dirs.release, payload.session_id);
-  const deadline = Date.now() + waitTimeoutMs;
-  while (Date.now() <= deadline) {
+  type Hit = { kind: "release" } | { kind: "queue"; prompt: string };
+  const check = async (): Promise<Hit | null> => {
     if (existsSync(releasePath)) {
       try { await fs.unlink(releasePath); } catch { /* ENOENT is fine */ }
-      return 0;
+      return { kind: "release" };
     }
     if (existsSync(queuePath)) {
       let prompt: string;
       try {
         prompt = await fs.readFile(queuePath, "utf-8");
       } catch {
-        return 0;
+        // Treat a failed read as a graceful release so the hook still exits.
+        return { kind: "release" };
       }
       try { await fs.unlink(queuePath); } catch { /* ENOENT is fine */ }
+      return { kind: "queue", prompt };
+    }
+    return null;
+  };
+  const emit = (hit: Hit): number => {
+    if (hit.kind === "queue") {
       process.stdout.write(JSON.stringify({
         decision: "block",
-        reason: prompt,
+        reason: hit.prompt,
       }));
-      return 0;
     }
-    await Bun.sleep(pollIntervalMs);
+    return 0;
+  };
+  // Initial synchronous check — the runtime may have enqueued/released before
+  // we attached watchers, and without this the hook could hang until the
+  // polling fallback fires.
+  const early = await check();
+  if (early) return emit(early);
+  const ac = new AbortController();
+  const overallTimer = setTimeout(() => ac.abort(), waitTimeoutMs);
+  let hit: Hit | null = null;
+  // Read the atomic workflow's PID (if the runtime wrote one for this
+  // session). Used by the liveness task below to detect an atomic crash.
+  const atomicPid = await readAtomicPid(
+    path.join(dirs.pid, payload.session_id),
+  );
+  // Watch a single directory for change events and resolve `hit` on the
+  // first one that matches. `event.filename` is unreliable across OSes
+  // (see the comment in `watchHILMarker`), so disk state is authoritative.
+  const runWatcher = async (dir: string): Promise<void> => {
+    try {
+      for await (const _event of watchDir(dir, { signal: ac.signal })) {
+        const result = await check();
+        if (result) {
+          hit = result;
+          ac.abort();
+          return;
+        }
+      }
+    } catch (e: unknown) {
+      if (!(e instanceof Error && e.name === "AbortError")) throw e;
+    }
+  };
+  // Polling fallback — catches the rare dropped inotify/FSEvent event.
+  // Only runs while the watchers are live; `ac.abort()` shuts it down.
+  const runPollFallback = async (): Promise<void> => {
+    while (!ac.signal.aborted) {
+      await abortableSleep(pollIntervalMs, ac.signal);
+      if (ac.signal.aborted) return;
+      const result = await check();
+      if (result) {
+        hit = result;
+        ac.abort();
+        return;
+      }
+    }
+  };
+  // Liveness check — if the atomic workflow process died without writing a
+  // release marker (e.g. SIGKILL), this task abandons the wait and lets
+  // Claude stop. No-op when there's no pid file (older sessions or non-
+  // runtime spawns) so the hook still functions standalone.
+  const runLivenessCheck = async (): Promise<void> => {
+    if (atomicPid === null) return;
+    while (!ac.signal.aborted) {
+      await abortableSleep(livenessIntervalMs, ac.signal);
+      if (ac.signal.aborted) return;
+      if (!isProcessAlive(atomicPid)) {
+        // hit stays null → the hook exits 0 without emitting a block decision.
+        ac.abort();
+        return;
+      }
+    }
+  };
+  try {
+    await Promise.all([
+      runWatcher(dirs.queue),
+      runWatcher(dirs.release),
+      runPollFallback(),
+      runLivenessCheck(),
+    ]);
+  } finally {
+    clearTimeout(overallTimer);
+    ac.abort();
   }
+  if (hit) return emit(hit);
   // Timeout — no queued prompt arrived. Let Claude stop normally.
   return 0;
 }

package/src/sdk/providers/claude.ts CHANGED Viewed

@@ -71,6 +71,12 @@ export async function clearClaudeSession(paneId: string): Promise<void> {
       // Best-effort — if release fails the hook will still exit on its
       // own safety timeout.
     }
+    try {
+      await unlinkAtomicPidFile(state.claudeSessionId);
+    } catch {
+      // Best-effort — stale pid file is inert; the next session writes a
+      // fresh one under its own UUID.
+    }
   }
   initializedPanes.delete(paneId);
 }
@@ -258,6 +264,12 @@ export async function createClaudeSession(options: ClaudeSessionOptions): Promis
     chatFlags,
     readyTimeoutMs,
   });
+  // Write our PID so the Stop hook can detect an orphaned session if we
+  // crash/get SIGKILL'd without running teardown. Best-effort; failures just
+  // mean the hook falls back to waiting out Claude's own hook timeout.
+  await writeAtomicPidFile(claudeSessionId);
   return claudeSessionId;
 }
@@ -609,6 +621,38 @@ export async function releaseClaudeSession(claudeSessionId: string): Promise<voi
   await writeFile(releasePath(claudeSessionId), "");
 }
+/** @internal */
+function pidDir(): string {
+  return claudeHookDirs().pid;
+}
+/** @internal */
+function pidFilePath(claudeSessionId: string): string {
+  return join(pidDir(), claudeSessionId);
+}
+/**
+ * Write `process.pid` to `~/.atomic/claude-pid/<session_id>` so the Stop hook
+ * can use it as a liveness signal. If atomic is SIGKILL'd (no chance to run
+ * `clearClaudeSession`), the hook detects the dead PID via `process.kill(..,0)`
+ * and self-exits instead of parking Claude for the full 24-day timeout.
+ */
+async function writeAtomicPidFile(claudeSessionId: string): Promise<void> {
+  await mkdir(pidDir(), { recursive: true });
+  await writeFile(pidFilePath(claudeSessionId), String(process.pid), "utf-8");
+}
+/** Remove the pid file for a session. Idempotent — ENOENT is swallowed. */
+async function unlinkAtomicPidFile(claudeSessionId: string): Promise<void> {
+  try {
+    await unlink(pidFilePath(claudeSessionId));
+  } catch (e: unknown) {
+    if (!(e instanceof Error && "code" in e && (e as NodeJS.ErrnoException).code === "ENOENT")) {
+      throw e;
+    }
+  }
+}
 // ---------------------------------------------------------------------------
 // Idle detection via marker file watch
 // ---------------------------------------------------------------------------
@@ -641,11 +685,6 @@ export async function releaseClaudeSession(claudeSessionId: string): Promise<voi
  * @param claudeSessionId       - Claude's session UUID (used to identify marker file)
  * @param transcriptBeforeCount - number of messages in transcript before this turn
  */
-/** Safety timeout so the workflow's next stage still fires if the Stop hook
- * never runs (misconfigured settings, killed Claude process, etc.). 15 min
- * covers any reasonable single-turn run without hanging forever. */
-const IDLE_TIMEOUT_MS = 15 * 60 * 1000;
 /**
  * @internal Exported for unit tests.
  */
@@ -658,7 +697,6 @@ export async function waitForIdle(
   const sessionId = claudeSessionId;
   const target = markerPath(sessionId);
   const ac = new AbortController();
-  const timeout = setTimeout(() => ac.abort(), IDLE_TIMEOUT_MS);
   // Process a marker that has appeared on disk. Returns a tuple:
   //   [resolved, result] — when resolved=true, waitForIdle should return.
@@ -743,13 +781,10 @@ export async function waitForIdle(
       }
     }
   } catch (e: unknown) {
-    // AbortError is expected when we call ac.abort() to stop watching, or
-    // when the safety timeout fires.
+    // AbortError is expected when we call ac.abort() to stop watching.
     if (!(e instanceof Error && e.name === "AbortError")) {
       throw e;
     }
-  } finally {
-    clearTimeout(timeout);
   }
   return [];