@bastani/atomic 0.5.27 → 0.5.28-1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/cli/claude-stop-hook.d.ts +12 -1
- package/dist/commands/cli/claude-stop-hook.d.ts.map +1 -1
- package/dist/sdk/providers/claude.d.ts +28 -0
- package/dist/sdk/providers/claude.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/commands/cli/claude-stop-hook.test.ts +51 -5
- package/src/commands/cli/claude-stop-hook.ts +219 -16
- package/src/sdk/providers/claude.ts +45 -10
|
@@ -45,13 +45,24 @@ export declare function claudeHookDirs(): {
|
|
|
45
45
|
queue: string;
|
|
46
46
|
release: string;
|
|
47
47
|
hil: string;
|
|
48
|
+
pid: string;
|
|
48
49
|
};
|
|
49
50
|
/** Options for {@link claudeStopHookCommand}. Primarily used by tests to shrink the wait budget. */
|
|
50
51
|
export interface ClaudeStopHookOptions {
|
|
51
52
|
/** Maximum time the hook waits for a queued follow-up prompt before letting Claude stop. */
|
|
52
53
|
waitTimeoutMs?: number;
|
|
53
|
-
/**
|
|
54
|
+
/**
|
|
55
|
+
* Interval for the polling fallback that runs alongside the `fs.watch`
|
|
56
|
+
* watchers in case an inotify/FSEvent notification gets dropped. In the
|
|
57
|
+
* happy path, watcher events fire on create and the poll never matches.
|
|
58
|
+
*/
|
|
54
59
|
pollIntervalMs?: number;
|
|
60
|
+
/**
|
|
61
|
+
* Interval at which the hook checks whether the atomic workflow process
|
|
62
|
+
* that owns this session is still alive. Coarser than `pollIntervalMs`
|
|
63
|
+
* because atomic crashing is rare and `process.kill(pid, 0)` is a syscall.
|
|
64
|
+
*/
|
|
65
|
+
livenessIntervalMs?: number;
|
|
55
66
|
}
|
|
56
67
|
/**
|
|
57
68
|
* Handler for the hidden `_claude-stop-hook` subcommand.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"claude-stop-hook.d.ts","sourceRoot":"","sources":["../../../src/commands/cli/claude-stop-hook.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;
|
|
1
|
+
{"version":3,"file":"claude-stop-hook.d.ts","sourceRoot":"","sources":["../../../src/commands/cli/claude-stop-hook.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAQH,yEAAyE;AACzE,MAAM,WAAW,qBAAqB;IACpC,UAAU,EAAE,MAAM,CAAC;IACnB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B;AAeD;;;;;GAKG;AACH,wBAAgB,cAAc,IAAI;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;CACb,CAcA;AAED,oGAAoG;AACpG,MAAM,WAAW,qBAAqB;IACpC,4FAA4F;IAC5F,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB;;;;OAIG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;;;;OAIG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAsFD;;;;;;;;GAQG;AACH,wBAAsB,qBAAqB,CACzC,OAAO,GAAE,qBAA0B,GAClC,OAAO,CAAC,MAAM,CAAC,CAyMjB"}
|
|
@@ -133,6 +133,34 @@ export declare function releasePath(claudeSessionId: string): string;
|
|
|
133
133
|
* Safe to call more than once.
|
|
134
134
|
*/
|
|
135
135
|
export declare function releaseClaudeSession(claudeSessionId: string): Promise<void>;
|
|
136
|
+
/**
|
|
137
|
+
* Wait for the Claude session to become idle using `fs.watch` on the
|
|
138
|
+
* `~/.atomic/claude-stop/` marker directory.
|
|
139
|
+
*
|
|
140
|
+
* When Claude finishes a turn, the `atomic _claude-stop-hook` Stop hook writes
|
|
141
|
+
* `~/.atomic/claude-stop/<session_id>`. The write triggers an OS-native
|
|
142
|
+
* `fs.watch` event on the parent directory — far more reliable than polling
|
|
143
|
+
* tmux pane glyphs, which vary between Claude Code versions.
|
|
144
|
+
*
|
|
145
|
+
* This function is strictly about *idle detection*. HIL is detected separately
|
|
146
|
+
* by {@link watchHILMarker}; the Stop hook does not fire while
|
|
147
|
+
* `AskUserQuestion` is pending (the agent loop blocks on deferred tools), so
|
|
148
|
+
* mixing the two would silently miss the HIL window.
|
|
149
|
+
*
|
|
150
|
+
* Algorithm:
|
|
151
|
+
* 1. Attach the directory watcher, then check for the marker file on disk —
|
|
152
|
+
* this closes the race where the Stop hook fires between prompt submission
|
|
153
|
+
* and watcher attach.
|
|
154
|
+
* 2. On any event, re-check the marker file on disk (we intentionally do NOT
|
|
155
|
+
* filter by `event.filename`, because on Linux a write can deliver multiple
|
|
156
|
+
* events with varying filenames and editor tools may race us).
|
|
157
|
+
* 3. Read the session transcript via `getSessionMessages` and slice messages
|
|
158
|
+
* from `transcriptBeforeCount`.
|
|
159
|
+
* 4. Clean up the `fs.watch` watcher on any exit path via AbortController.
|
|
160
|
+
*
|
|
161
|
+
* @param claudeSessionId - Claude's session UUID (used to identify marker file)
|
|
162
|
+
* @param transcriptBeforeCount - number of messages in transcript before this turn
|
|
163
|
+
*/
|
|
136
164
|
/**
|
|
137
165
|
* @internal Exported for unit tests.
|
|
138
166
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"claude.d.ts","sourceRoot":"","sources":["../../../src/sdk/providers/claude.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,OAAO,EAGL,KAAK,cAAc,EACnB,KAAK,cAAc,EACnB,KAAK,OAAO,IAAI,UAAU,EAC3B,MAAM,gCAAgC,CAAC;AAgCxC;;;;;;GAMG;AACH,wBAAsB,kBAAkB,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,
|
|
1
|
+
{"version":3,"file":"claude.d.ts","sourceRoot":"","sources":["../../../src/sdk/providers/claude.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,OAAO,EAGL,KAAK,cAAc,EACnB,KAAK,cAAc,EACnB,KAAK,OAAO,IAAI,UAAU,EAC3B,MAAM,gCAAgC,CAAC;AAgCxC;;;;;;GAMG;AACH,wBAAsB,kBAAkB,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAiBtE;AAqID,MAAM,WAAW,oBAAoB;IACnC,kDAAkD;IAClD,MAAM,EAAE,MAAM,CAAC;IACf,sIAAsI;IACtI,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,sEAAsE;IACtE,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,wBAAsB,mBAAmB,CAAC,OAAO,EAAE,oBAAoB,GAAG,OAAO,CAAC,MAAM,CAAC,CAqBxF;AAsID;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAgB,eAAe,CAAC,QAAQ,EAAE,cAAc,EAAE,GAAG,OAAO,CAUnE;AAED;;;;;;;;;;;;GAYG;AACH,wBAAsB,cAAc,CAClC,eAAe,EAAE,MAAM,EACvB,KAAK,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI,EACjC,MAAM,EAAE,WAAW,GAClB,OAAO,CAAC,IAAI,CAAC,CAyCf;AAMD;;;;;;GAMG;AACH,wBAAgB,SAAS,IAAI,MAAM,CAElC;AAED;;;;GAIG;AACH,wBAAgB,UAAU,CAAC,eAAe,EAAE,MAAM,GAAG,MAAM,CAE1D;AAED;;;;GAIG;AACH,wBAAgB,QAAQ,IAAI,MAAM,CAEjC;AAED,0EAA0E;AAC1E,wBAAgB,SAAS,CAAC,eAAe,EAAE,MAAM,GAAG,MAAM,CAEzD;AAED;;;;;GAKG;AACH,wBAAgB,UAAU,IAAI,MAAM,CAEnC;AAED,4EAA4E;AAC5E,wBAAgB,WAAW,CAAC,eAAe,EAAE,MAAM,GAAG,MAAM,CAE3D;AAiED;;;;GAIG;AACH,wBAAsB,oBAAoB,CAAC,eAAe,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAGjF;AAsCD;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH;;GAEG;AACH,wBAAsB,WAAW,CAC/B,eAAe,EAAE,MAAM,EACvB,qBAAqB,EAAE,MAAM,GAC5B,OAAO,CAAC,cAAc,EAAE,CAAC,CAiG3B;AAMD,MAAM,WAAW,kBAAkB;IACjC,2CAA2C;IAC3C,MAAM,EAAE,MAAM,CAAC;IACf,yBAAyB;IACzB,MAAM,EAAE,MAAM,CAAC;IACf;;;;OAIG;IACH,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI,CAAC;CACpC;AAED;;;;;;;;;GASG;AACH,wBAAgB,oBAAoB,CAClC,IAAI,EAAE,aAAa,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,OAAO,CAAA;CAAE,CAAC,EACvD,UAAU,EAAE,MAAM,GACjB,MAAM,CAoBR;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,wBAAsB,WAAW,CAAC,OAAO,EAAE,kBAAkB,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC,CA8FxF;AAMD;;;GAGG;AACH,wBAAgB,oBAAoB,CAClC,QAAQ,EAAE,MAAM,EAAE,GAAG,SAAS,EAC9B,MAAM,EAAE,MAAM,EAAE,GACf,MAAM,EAAE,CAMV;AAED;;;GAGG;AACH,qBAAa,mBAAmB;IAC9B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAoD;gBAGvE,MAAM,EAAE,MAAM,EACd,IAAI,GAAE;QAAE,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;QAAC,cAAc,CAAC,EAAE,MAAM,CAAA;KAAO;IAM9D;;;;;;;OAOG;IACG,KAAK,IAAI,OAAO,CAAC,MAAM,CAAC;IAQ9B,yEAAyE;IACnE,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;CAC5B;AAED;;;GAGG;AACH,qBAAa,oBAAoB;IAC/B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAA2C;gBAG/D,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI;IAOpC;;;;;;;;OAQG;IACG,KAAK,CACT,MAAM,EAAE,MAAM,EACd,QAAQ,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,GAC7B,OAAO,CAAC,cAAc,EAAE,CAAC;IAQ5B,gEAAgE;IAC1D,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;CAClC;AAMD;;;GAGG;AACH,qBAAa,2BAA2B;IACtC;;;;;OAKG;IACG,KAAK,IAAI,OAAO,CAAC,MAAM,CAAC;IAGxB,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;CAC5B;AAED;;;;;;;;;;GAUG;AACH,qBAAa,4BAA4B;IACvC,QAAQ,CAAC,MAAM,MAAM;IACrB;;;;;OAKG;IACH,OAAO,CAAC,cAAc,CAAc;IAEpC,IAAI,SAAS,IAAI,MAAM,CAEtB;IAEK,KAAK,CACT,MAAM,EAAE,MAAM,GAAG,aAAa,CAAC,cAAc,CAAC,EAC9C,OAAO,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,GAC5B,OAAO,CAAC,cAAc,EAAE,CAAC;IAqCtB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;CAClC;AAQD;;;;;GAKG;AACH,eAAO,MAAM,sBAAsB,+DAejC,CAAC"}
|
package/package.json
CHANGED
|
@@ -9,10 +9,11 @@
|
|
|
9
9
|
* and clean up in `afterEach` so test runs never collide with each other
|
|
10
10
|
* or with real marker/queue/release files.
|
|
11
11
|
*
|
|
12
|
-
* The hook's default wait for a queued follow-up prompt is
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
12
|
+
* The hook's default wait for a queued follow-up prompt is effectively
|
|
13
|
+
* unbounded (~24 days) so the workflow can take as long as it needs between
|
|
14
|
+
* turns. Every test here passes a short `waitTimeoutMs` so the hook exits
|
|
15
|
+
* quickly when no queue entry is present — we are testing the branching
|
|
16
|
+
* logic, not the real-world wait budget.
|
|
16
17
|
*/
|
|
17
18
|
|
|
18
19
|
import { describe, test, expect, afterEach, spyOn } from "bun:test";
|
|
@@ -20,7 +21,7 @@ import { access, rm, writeFile, mkdir } from "node:fs/promises";
|
|
|
20
21
|
import { join } from "node:path";
|
|
21
22
|
import { claudeStopHookCommand, claudeHookDirs } from "./claude-stop-hook.ts";
|
|
22
23
|
|
|
23
|
-
const { marker: markerDir, queue: queueDir, release: releaseDir } = claudeHookDirs();
|
|
24
|
+
const { marker: markerDir, queue: queueDir, release: releaseDir, pid: pidDir } = claudeHookDirs();
|
|
24
25
|
|
|
25
26
|
const SHORT_TIMEOUT_MS = 300;
|
|
26
27
|
|
|
@@ -52,6 +53,7 @@ afterEach(async () => {
|
|
|
52
53
|
rm(join(markerDir, id), { force: true }),
|
|
53
54
|
rm(join(queueDir, id), { force: true }),
|
|
54
55
|
rm(join(releaseDir, id), { force: true }),
|
|
56
|
+
rm(join(pidDir, id), { force: true }),
|
|
55
57
|
]);
|
|
56
58
|
}
|
|
57
59
|
sessionIdsToClean.length = 0;
|
|
@@ -268,4 +270,48 @@ describe("claudeStopHookCommand", () => {
|
|
|
268
270
|
// No block decision emitted.
|
|
269
271
|
expect(stdoutChunks.join("")).toBe("");
|
|
270
272
|
});
|
|
273
|
+
|
|
274
|
+
// 9. Dead atomic PID → hook exits without waiting out the full timeout.
|
|
275
|
+
//
|
|
276
|
+
// Simulates the case where the atomic workflow was SIGKILL'd between
|
|
277
|
+
// turns: the pid file on disk points at a process that no longer exists,
|
|
278
|
+
// so the liveness check should fire and let the hook bail. We pick a
|
|
279
|
+
// deliberately-bogus PID (2^22 - 1) that is almost certainly unused.
|
|
280
|
+
test("dead atomic pid triggers liveness exit before the wait timeout", async () => {
|
|
281
|
+
const sessionId = crypto.randomUUID();
|
|
282
|
+
sessionIdsToClean.push(sessionId);
|
|
283
|
+
|
|
284
|
+
// Find a PID that doesn't currently exist. `process.kill(pid, 0)` throws
|
|
285
|
+
// ESRCH for free PIDs; we scan from a high number downward to dodge
|
|
286
|
+
// system-reserved low PIDs.
|
|
287
|
+
let deadPid = 4_194_303;
|
|
288
|
+
while (deadPid > 1) {
|
|
289
|
+
try {
|
|
290
|
+
process.kill(deadPid, 0);
|
|
291
|
+
deadPid -= 1;
|
|
292
|
+
} catch (e: unknown) {
|
|
293
|
+
if (e instanceof Error && "code" in e && (e as NodeJS.ErrnoException).code === "ESRCH") break;
|
|
294
|
+
deadPid -= 1;
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
await mkdir(pidDir, { recursive: true });
|
|
299
|
+
await writeFile(join(pidDir, sessionId), String(deadPid), "utf-8");
|
|
300
|
+
|
|
301
|
+
mockStdin(JSON.stringify({ session_id: sessionId }));
|
|
302
|
+
|
|
303
|
+
// Use a long wait timeout so the test only passes if the liveness check
|
|
304
|
+
// short-circuits the wait. livenessIntervalMs is short so the test runs fast.
|
|
305
|
+
const started = Date.now();
|
|
306
|
+
const code = await claudeStopHookCommand({
|
|
307
|
+
waitTimeoutMs: 30_000,
|
|
308
|
+
pollIntervalMs: 10_000,
|
|
309
|
+
livenessIntervalMs: 50,
|
|
310
|
+
});
|
|
311
|
+
const elapsed = Date.now() - started;
|
|
312
|
+
|
|
313
|
+
expect(code).toBe(0);
|
|
314
|
+
expect(elapsed).toBeLessThan(5_000);
|
|
315
|
+
expect(await fileExists(join(markerDir, sessionId))).toBe(true);
|
|
316
|
+
});
|
|
271
317
|
});
|
|
@@ -29,6 +29,7 @@
|
|
|
29
29
|
*/
|
|
30
30
|
|
|
31
31
|
import fs from "node:fs/promises";
|
|
32
|
+
import { watch as watchDir } from "node:fs/promises";
|
|
32
33
|
import { existsSync } from "node:fs";
|
|
33
34
|
import path from "node:path";
|
|
34
35
|
import os from "node:os";
|
|
@@ -60,13 +61,25 @@ function isClaudeStopHookPayload(value: unknown): value is ClaudeStopHookPayload
|
|
|
60
61
|
*
|
|
61
62
|
* Exported so tests and `src/sdk/providers/claude.ts` share one source of truth.
|
|
62
63
|
*/
|
|
63
|
-
export function claudeHookDirs(): {
|
|
64
|
+
export function claudeHookDirs(): {
|
|
65
|
+
marker: string;
|
|
66
|
+
queue: string;
|
|
67
|
+
release: string;
|
|
68
|
+
hil: string;
|
|
69
|
+
pid: string;
|
|
70
|
+
} {
|
|
64
71
|
const base = path.join(os.homedir(), ".atomic");
|
|
65
72
|
return {
|
|
66
73
|
marker: path.join(base, "claude-stop"),
|
|
67
74
|
queue: path.join(base, "claude-queue"),
|
|
68
75
|
release: path.join(base, "claude-release"),
|
|
69
76
|
hil: path.join(base, "claude-hil"),
|
|
77
|
+
// Holds the PID of the atomic workflow process that owns each session.
|
|
78
|
+
// The Stop hook polls `process.kill(pid, 0)` against this value so that
|
|
79
|
+
// if atomic is SIGKILL'd (no chance to write a release marker), the hook
|
|
80
|
+
// can detect the orphaned session and self-exit instead of sitting in
|
|
81
|
+
// its wait loop for ~24 days.
|
|
82
|
+
pid: path.join(base, "claude-pid"),
|
|
70
83
|
};
|
|
71
84
|
}
|
|
72
85
|
|
|
@@ -74,12 +87,103 @@ export function claudeHookDirs(): { marker: string; queue: string; release: stri
|
|
|
74
87
|
export interface ClaudeStopHookOptions {
|
|
75
88
|
/** Maximum time the hook waits for a queued follow-up prompt before letting Claude stop. */
|
|
76
89
|
waitTimeoutMs?: number;
|
|
77
|
-
/**
|
|
90
|
+
/**
|
|
91
|
+
* Interval for the polling fallback that runs alongside the `fs.watch`
|
|
92
|
+
* watchers in case an inotify/FSEvent notification gets dropped. In the
|
|
93
|
+
* happy path, watcher events fire on create and the poll never matches.
|
|
94
|
+
*/
|
|
78
95
|
pollIntervalMs?: number;
|
|
96
|
+
/**
|
|
97
|
+
* Interval at which the hook checks whether the atomic workflow process
|
|
98
|
+
* that owns this session is still alive. Coarser than `pollIntervalMs`
|
|
99
|
+
* because atomic crashing is rare and `process.kill(pid, 0)` is a syscall.
|
|
100
|
+
*/
|
|
101
|
+
livenessIntervalMs?: number;
|
|
79
102
|
}
|
|
80
103
|
|
|
81
|
-
|
|
104
|
+
/**
|
|
105
|
+
* Effectively-unbounded default wait budget for the queue/release poll loop.
|
|
106
|
+
*
|
|
107
|
+
* The hook holds Claude Code in the Stop phase while the workflow runtime
|
|
108
|
+
* decides what to do next — either enqueueing a follow-up prompt (delivered
|
|
109
|
+
* back to Claude as `{decision:"block", reason:...}`) or writing a release
|
|
110
|
+
* marker on teardown. Any finite default here caps the time the workflow has
|
|
111
|
+
* between turns: when it expires, the hook exits 0, Claude stops, and the
|
|
112
|
+
* next `enqueuePrompt` writes to a file nobody's reading — the workflow
|
|
113
|
+
* hangs on `waitForIdle` for a turn that will never come.
|
|
114
|
+
*
|
|
115
|
+
* The Claude-side hook timeout (see `STOP_HOOK_TIMEOUT_SECONDS` in
|
|
116
|
+
* `src/sdk/providers/claude.ts`) is already set to ~24 days, so matching it
|
|
117
|
+
* here keeps the two bounds aligned — the hook either runs until the
|
|
118
|
+
* workflow releases it or until Claude Code itself gives up. Tests override
|
|
119
|
+
* `waitTimeoutMs` via options to keep runs fast.
|
|
120
|
+
*
|
|
121
|
+
* Expressed in ms: 2_147_483 s × 1000 = 2_147_483_000 ms, just under the
|
|
122
|
+
* max safe `setTimeout` value (2^31 - 1).
|
|
123
|
+
*/
|
|
124
|
+
const DEFAULT_WAIT_TIMEOUT_MS = 2_147_483_000;
|
|
82
125
|
const DEFAULT_POLL_INTERVAL_MS = 100;
|
|
126
|
+
const DEFAULT_LIVENESS_INTERVAL_MS = 5_000;
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Read the atomic PID that owns this session from `~/.atomic/claude-pid/<id>`,
|
|
130
|
+
* or return null if the file is missing / malformed. Missing is fine: older
|
|
131
|
+
* runtimes didn't write one, and we just skip the liveness check in that case.
|
|
132
|
+
*/
|
|
133
|
+
async function readAtomicPid(pidFilePath: string): Promise<number | null> {
|
|
134
|
+
let raw: string;
|
|
135
|
+
try {
|
|
136
|
+
raw = await fs.readFile(pidFilePath, "utf-8");
|
|
137
|
+
} catch {
|
|
138
|
+
return null;
|
|
139
|
+
}
|
|
140
|
+
const parsed = Number.parseInt(raw.trim(), 10);
|
|
141
|
+
return Number.isInteger(parsed) && parsed > 0 ? parsed : null;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Sleep that resolves early when `signal` is aborted. Used by the hook's
|
|
146
|
+
* wait loops so `ac.abort()` unblocks everything immediately instead of
|
|
147
|
+
* waiting for the next wake-up tick — otherwise a task that detects a hit
|
|
148
|
+
* (e.g. liveness check) can't meaningfully cancel its siblings.
|
|
149
|
+
*/
|
|
150
|
+
function abortableSleep(ms: number, signal: AbortSignal): Promise<void> {
|
|
151
|
+
return new Promise<void>((resolve) => {
|
|
152
|
+
if (signal.aborted) {
|
|
153
|
+
resolve();
|
|
154
|
+
return;
|
|
155
|
+
}
|
|
156
|
+
const timer = setTimeout(() => {
|
|
157
|
+
signal.removeEventListener("abort", onAbort);
|
|
158
|
+
resolve();
|
|
159
|
+
}, ms);
|
|
160
|
+
const onAbort = (): void => {
|
|
161
|
+
clearTimeout(timer);
|
|
162
|
+
resolve();
|
|
163
|
+
};
|
|
164
|
+
signal.addEventListener("abort", onAbort, { once: true });
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* True when a process with `pid` exists. Uses signal `0`, which performs the
|
|
170
|
+
* permission/existence check without delivering a signal. ESRCH means gone,
|
|
171
|
+
* EPERM means alive-but-not-ours (still alive for our purposes).
|
|
172
|
+
*/
|
|
173
|
+
function isProcessAlive(pid: number): boolean {
|
|
174
|
+
try {
|
|
175
|
+
process.kill(pid, 0);
|
|
176
|
+
return true;
|
|
177
|
+
} catch (e: unknown) {
|
|
178
|
+
if (e instanceof Error && "code" in e) {
|
|
179
|
+
const code = (e as NodeJS.ErrnoException).code;
|
|
180
|
+
if (code === "EPERM") return true;
|
|
181
|
+
if (code === "ESRCH") return false;
|
|
182
|
+
}
|
|
183
|
+
// Unknown error — assume alive to avoid false-positive teardown.
|
|
184
|
+
return true;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
83
187
|
|
|
84
188
|
/**
|
|
85
189
|
* Handler for the hidden `_claude-stop-hook` subcommand.
|
|
@@ -95,6 +199,8 @@ export async function claudeStopHookCommand(
|
|
|
95
199
|
): Promise<number> {
|
|
96
200
|
const waitTimeoutMs = options.waitTimeoutMs ?? DEFAULT_WAIT_TIMEOUT_MS;
|
|
97
201
|
const pollIntervalMs = options.pollIntervalMs ?? DEFAULT_POLL_INTERVAL_MS;
|
|
202
|
+
const livenessIntervalMs =
|
|
203
|
+
options.livenessIntervalMs ?? DEFAULT_LIVENESS_INTERVAL_MS;
|
|
98
204
|
|
|
99
205
|
// 1. Read stdin
|
|
100
206
|
const raw = await Bun.stdin.text();
|
|
@@ -121,9 +227,9 @@ export async function claudeStopHookCommand(
|
|
|
121
227
|
// `src/query.ts` → `transition: { reason: 'stop_hook_blocking' }`). In a
|
|
122
228
|
// multi-turn workflow, every follow-up turn after the first is therefore
|
|
123
229
|
// invoked with `stop_hook_active=true`. Returning early here would skip the
|
|
124
|
-
// marker write, leaving `waitForIdle` hanging
|
|
125
|
-
//
|
|
126
|
-
//
|
|
230
|
+
// marker write, leaving `waitForIdle` hanging forever, and would skip the
|
|
231
|
+
// queue poll so the workflow's next `s.session.query(...)` would never
|
|
232
|
+
// reach Claude.
|
|
127
233
|
//
|
|
128
234
|
// Our design doesn't need the generic loop guard: the hook only emits a
|
|
129
235
|
// `block` decision when the workflow runtime has written a prompt to the
|
|
@@ -135,6 +241,7 @@ export async function claudeStopHookCommand(
|
|
|
135
241
|
fs.mkdir(dirs.marker, { recursive: true }),
|
|
136
242
|
fs.mkdir(dirs.queue, { recursive: true }),
|
|
137
243
|
fs.mkdir(dirs.release, { recursive: true }),
|
|
244
|
+
fs.mkdir(dirs.pid, { recursive: true }),
|
|
138
245
|
]);
|
|
139
246
|
|
|
140
247
|
// 4. Write the marker file directly.
|
|
@@ -148,7 +255,7 @@ export async function claudeStopHookCommand(
|
|
|
148
255
|
const markerPath = path.join(dirs.marker, payload.session_id);
|
|
149
256
|
await Bun.write(markerPath, raw);
|
|
150
257
|
|
|
151
|
-
// 5.
|
|
258
|
+
// 5. Wait for either a queued follow-up prompt or a release signal.
|
|
152
259
|
//
|
|
153
260
|
// The workflow's `waitForIdle` has already been unblocked by the marker
|
|
154
261
|
// write above and is now returning control to the user's stage callback.
|
|
@@ -164,34 +271,130 @@ export async function claudeStopHookCommand(
|
|
|
164
271
|
// `~/.atomic/claude-release/<session_id>`. We exit 0 with no stdout
|
|
165
272
|
// payload and Claude stops as usual.
|
|
166
273
|
//
|
|
167
|
-
// c. Neither happens within `waitTimeoutMs`. We exit 0
|
|
168
|
-
//
|
|
274
|
+
// c. Neither happens within `waitTimeoutMs`. We exit 0 so Claude Code
|
|
275
|
+
// doesn't hang past its own per-hook timeout. The production default
|
|
276
|
+
// for `waitTimeoutMs` is aligned with the Claude-side hook timeout
|
|
277
|
+
// (~24 days), so this path is effectively unreachable in real runs —
|
|
278
|
+
// it only fires in tests that pass a short override.
|
|
279
|
+
//
|
|
280
|
+
// Delivery uses `fs.watch` on the queue and release dirs for ~0-latency
|
|
281
|
+
// wake-up on create events, with a slower `existsSync` polling fallback
|
|
282
|
+
// in case a watcher notification gets dropped under fs load (same pattern
|
|
283
|
+
// as `watchHILMarker` in `src/sdk/providers/claude.ts`).
|
|
169
284
|
const queuePath = path.join(dirs.queue, payload.session_id);
|
|
170
285
|
const releasePath = path.join(dirs.release, payload.session_id);
|
|
171
286
|
|
|
172
|
-
|
|
173
|
-
|
|
287
|
+
type Hit = { kind: "release" } | { kind: "queue"; prompt: string };
|
|
288
|
+
|
|
289
|
+
const check = async (): Promise<Hit | null> => {
|
|
174
290
|
if (existsSync(releasePath)) {
|
|
175
291
|
try { await fs.unlink(releasePath); } catch { /* ENOENT is fine */ }
|
|
176
|
-
return
|
|
292
|
+
return { kind: "release" };
|
|
177
293
|
}
|
|
178
294
|
if (existsSync(queuePath)) {
|
|
179
295
|
let prompt: string;
|
|
180
296
|
try {
|
|
181
297
|
prompt = await fs.readFile(queuePath, "utf-8");
|
|
182
298
|
} catch {
|
|
183
|
-
|
|
299
|
+
// Treat a failed read as a graceful release so the hook still exits.
|
|
300
|
+
return { kind: "release" };
|
|
184
301
|
}
|
|
185
302
|
try { await fs.unlink(queuePath); } catch { /* ENOENT is fine */ }
|
|
303
|
+
return { kind: "queue", prompt };
|
|
304
|
+
}
|
|
305
|
+
return null;
|
|
306
|
+
};
|
|
307
|
+
|
|
308
|
+
const emit = (hit: Hit): number => {
|
|
309
|
+
if (hit.kind === "queue") {
|
|
186
310
|
process.stdout.write(JSON.stringify({
|
|
187
311
|
decision: "block",
|
|
188
|
-
reason: prompt,
|
|
312
|
+
reason: hit.prompt,
|
|
189
313
|
}));
|
|
190
|
-
return 0;
|
|
191
314
|
}
|
|
192
|
-
|
|
315
|
+
return 0;
|
|
316
|
+
};
|
|
317
|
+
|
|
318
|
+
// Initial synchronous check — the runtime may have enqueued/released before
|
|
319
|
+
// we attached watchers, and without this the hook could hang until the
|
|
320
|
+
// polling fallback fires.
|
|
321
|
+
const early = await check();
|
|
322
|
+
if (early) return emit(early);
|
|
323
|
+
|
|
324
|
+
const ac = new AbortController();
|
|
325
|
+
const overallTimer = setTimeout(() => ac.abort(), waitTimeoutMs);
|
|
326
|
+
let hit: Hit | null = null;
|
|
327
|
+
|
|
328
|
+
// Read the atomic workflow's PID (if the runtime wrote one for this
|
|
329
|
+
// session). Used by the liveness task below to detect an atomic crash.
|
|
330
|
+
const atomicPid = await readAtomicPid(
|
|
331
|
+
path.join(dirs.pid, payload.session_id),
|
|
332
|
+
);
|
|
333
|
+
|
|
334
|
+
// Watch a single directory for change events and resolve `hit` on the
|
|
335
|
+
// first one that matches. `event.filename` is unreliable across OSes
|
|
336
|
+
// (see the comment in `watchHILMarker`), so disk state is authoritative.
|
|
337
|
+
const runWatcher = async (dir: string): Promise<void> => {
|
|
338
|
+
try {
|
|
339
|
+
for await (const _event of watchDir(dir, { signal: ac.signal })) {
|
|
340
|
+
const result = await check();
|
|
341
|
+
if (result) {
|
|
342
|
+
hit = result;
|
|
343
|
+
ac.abort();
|
|
344
|
+
return;
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
} catch (e: unknown) {
|
|
348
|
+
if (!(e instanceof Error && e.name === "AbortError")) throw e;
|
|
349
|
+
}
|
|
350
|
+
};
|
|
351
|
+
|
|
352
|
+
// Polling fallback — catches the rare dropped inotify/FSEvent event.
|
|
353
|
+
// Only runs while the watchers are live; `ac.abort()` shuts it down.
|
|
354
|
+
const runPollFallback = async (): Promise<void> => {
|
|
355
|
+
while (!ac.signal.aborted) {
|
|
356
|
+
await abortableSleep(pollIntervalMs, ac.signal);
|
|
357
|
+
if (ac.signal.aborted) return;
|
|
358
|
+
const result = await check();
|
|
359
|
+
if (result) {
|
|
360
|
+
hit = result;
|
|
361
|
+
ac.abort();
|
|
362
|
+
return;
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
};
|
|
366
|
+
|
|
367
|
+
// Liveness check — if the atomic workflow process died without writing a
|
|
368
|
+
// release marker (e.g. SIGKILL), this task abandons the wait and lets
|
|
369
|
+
// Claude stop. No-op when there's no pid file (older sessions or non-
|
|
370
|
+
// runtime spawns) so the hook still functions standalone.
|
|
371
|
+
const runLivenessCheck = async (): Promise<void> => {
|
|
372
|
+
if (atomicPid === null) return;
|
|
373
|
+
while (!ac.signal.aborted) {
|
|
374
|
+
await abortableSleep(livenessIntervalMs, ac.signal);
|
|
375
|
+
if (ac.signal.aborted) return;
|
|
376
|
+
if (!isProcessAlive(atomicPid)) {
|
|
377
|
+
// hit stays null → the hook exits 0 without emitting a block decision.
|
|
378
|
+
ac.abort();
|
|
379
|
+
return;
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
};
|
|
383
|
+
|
|
384
|
+
try {
|
|
385
|
+
await Promise.all([
|
|
386
|
+
runWatcher(dirs.queue),
|
|
387
|
+
runWatcher(dirs.release),
|
|
388
|
+
runPollFallback(),
|
|
389
|
+
runLivenessCheck(),
|
|
390
|
+
]);
|
|
391
|
+
} finally {
|
|
392
|
+
clearTimeout(overallTimer);
|
|
393
|
+
ac.abort();
|
|
193
394
|
}
|
|
194
395
|
|
|
396
|
+
if (hit) return emit(hit);
|
|
397
|
+
|
|
195
398
|
// Timeout — no queued prompt arrived. Let Claude stop normally.
|
|
196
399
|
return 0;
|
|
197
400
|
}
|
|
@@ -71,6 +71,12 @@ export async function clearClaudeSession(paneId: string): Promise<void> {
|
|
|
71
71
|
// Best-effort — if release fails the hook will still exit on its
|
|
72
72
|
// own safety timeout.
|
|
73
73
|
}
|
|
74
|
+
try {
|
|
75
|
+
await unlinkAtomicPidFile(state.claudeSessionId);
|
|
76
|
+
} catch {
|
|
77
|
+
// Best-effort — stale pid file is inert; the next session writes a
|
|
78
|
+
// fresh one under its own UUID.
|
|
79
|
+
}
|
|
74
80
|
}
|
|
75
81
|
initializedPanes.delete(paneId);
|
|
76
82
|
}
|
|
@@ -258,6 +264,12 @@ export async function createClaudeSession(options: ClaudeSessionOptions): Promis
|
|
|
258
264
|
chatFlags,
|
|
259
265
|
readyTimeoutMs,
|
|
260
266
|
});
|
|
267
|
+
|
|
268
|
+
// Write our PID so the Stop hook can detect an orphaned session if we
|
|
269
|
+
// crash/get SIGKILL'd without running teardown. Best-effort; failures just
|
|
270
|
+
// mean the hook falls back to waiting out Claude's own hook timeout.
|
|
271
|
+
await writeAtomicPidFile(claudeSessionId);
|
|
272
|
+
|
|
261
273
|
return claudeSessionId;
|
|
262
274
|
}
|
|
263
275
|
|
|
@@ -609,6 +621,38 @@ export async function releaseClaudeSession(claudeSessionId: string): Promise<voi
|
|
|
609
621
|
await writeFile(releasePath(claudeSessionId), "");
|
|
610
622
|
}
|
|
611
623
|
|
|
624
|
+
/** @internal */
|
|
625
|
+
function pidDir(): string {
|
|
626
|
+
return claudeHookDirs().pid;
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
/** @internal */
|
|
630
|
+
function pidFilePath(claudeSessionId: string): string {
|
|
631
|
+
return join(pidDir(), claudeSessionId);
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
/**
|
|
635
|
+
* Write `process.pid` to `~/.atomic/claude-pid/<session_id>` so the Stop hook
|
|
636
|
+
* can use it as a liveness signal. If atomic is SIGKILL'd (no chance to run
|
|
637
|
+
* `clearClaudeSession`), the hook detects the dead PID via `process.kill(..,0)`
|
|
638
|
+
* and self-exits instead of parking Claude for the full 24-day timeout.
|
|
639
|
+
*/
|
|
640
|
+
async function writeAtomicPidFile(claudeSessionId: string): Promise<void> {
|
|
641
|
+
await mkdir(pidDir(), { recursive: true });
|
|
642
|
+
await writeFile(pidFilePath(claudeSessionId), String(process.pid), "utf-8");
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
/** Remove the pid file for a session. Idempotent — ENOENT is swallowed. */
|
|
646
|
+
async function unlinkAtomicPidFile(claudeSessionId: string): Promise<void> {
|
|
647
|
+
try {
|
|
648
|
+
await unlink(pidFilePath(claudeSessionId));
|
|
649
|
+
} catch (e: unknown) {
|
|
650
|
+
if (!(e instanceof Error && "code" in e && (e as NodeJS.ErrnoException).code === "ENOENT")) {
|
|
651
|
+
throw e;
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
|
|
612
656
|
// ---------------------------------------------------------------------------
|
|
613
657
|
// Idle detection via marker file watch
|
|
614
658
|
// ---------------------------------------------------------------------------
|
|
@@ -641,11 +685,6 @@ export async function releaseClaudeSession(claudeSessionId: string): Promise<voi
|
|
|
641
685
|
* @param claudeSessionId - Claude's session UUID (used to identify marker file)
|
|
642
686
|
* @param transcriptBeforeCount - number of messages in transcript before this turn
|
|
643
687
|
*/
|
|
644
|
-
/** Safety timeout so the workflow's next stage still fires if the Stop hook
|
|
645
|
-
* never runs (misconfigured settings, killed Claude process, etc.). 15 min
|
|
646
|
-
* covers any reasonable single-turn run without hanging forever. */
|
|
647
|
-
const IDLE_TIMEOUT_MS = 15 * 60 * 1000;
|
|
648
|
-
|
|
649
688
|
/**
|
|
650
689
|
* @internal Exported for unit tests.
|
|
651
690
|
*/
|
|
@@ -658,7 +697,6 @@ export async function waitForIdle(
|
|
|
658
697
|
const sessionId = claudeSessionId;
|
|
659
698
|
const target = markerPath(sessionId);
|
|
660
699
|
const ac = new AbortController();
|
|
661
|
-
const timeout = setTimeout(() => ac.abort(), IDLE_TIMEOUT_MS);
|
|
662
700
|
|
|
663
701
|
// Process a marker that has appeared on disk. Returns a tuple:
|
|
664
702
|
// [resolved, result] — when resolved=true, waitForIdle should return.
|
|
@@ -743,13 +781,10 @@ export async function waitForIdle(
|
|
|
743
781
|
}
|
|
744
782
|
}
|
|
745
783
|
} catch (e: unknown) {
|
|
746
|
-
// AbortError is expected when we call ac.abort() to stop watching
|
|
747
|
-
// when the safety timeout fires.
|
|
784
|
+
// AbortError is expected when we call ac.abort() to stop watching.
|
|
748
785
|
if (!(e instanceof Error && e.name === "AbortError")) {
|
|
749
786
|
throw e;
|
|
750
787
|
}
|
|
751
|
-
} finally {
|
|
752
|
-
clearTimeout(timeout);
|
|
753
788
|
}
|
|
754
789
|
|
|
755
790
|
return [];
|