@hayasaka7/haya-pet 0.3.8 → 0.3.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +37 -2
- package/apps/cli/src/haya-pet.js +29 -7
- package/docs/known-issues.md +88 -47
- package/package.json +1 -1
- package/packages/cli-core/src/background-tasks.js +63 -0
- package/packages/cli-core/src/codex-guardian-watcher.js +35 -2
- package/packages/cli-core/src/codex-transcript-watcher.js +25 -1
- package/packages/cli-core/src/run-state.js +83 -17
- package/packages/cli-core/test/background-tasks.test.mjs +115 -0
- package/packages/cli-core/test/codex-guardian-watcher.test.mjs +48 -0
- package/packages/cli-core/test/codex-transcript-watcher.test.mjs +50 -0
- package/packages/cli-core/test/run-state.test.mjs +68 -1
package/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,41 @@ All notable changes to HAYA Pet are documented here. This project adheres to
|
|
|
7
7
|
> 0.2.0 npm publish; they are listed under 0.2.1, which is the first version that
|
|
8
8
|
> ships them.
|
|
9
9
|
|
|
10
|
+
## [0.3.10]
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
- **A running Claude Code subagent no longer drives the main session's status.**
|
|
14
|
+
With hooks enabled and a multi-agent run, once the main agent had stopped but a
|
|
15
|
+
subagent was still working, two things went wrong: the pet dropped to *idle*
|
|
16
|
+
(even though work was ongoing), and while the subagent ran its tool calls flipped
|
|
17
|
+
the pet between *running tools* / *editing files* / *thinking*. Fix, checked
|
|
18
|
+
**only at the main agent's `Stop`** (no timers, no persisted state): (1) Claude's
|
|
19
|
+
`Stop` payload carries a live `background_tasks` snapshot — when it still lists a
|
|
20
|
+
running **subagent**, the pet shows *running tools* with the message **"Subagent
|
|
21
|
+
running"**, and the follow-up `Stop` (empty `background_tasks`) clears it back to
|
|
22
|
+
*idle*; (2) every subagent-originated hook event carries an `agent_id`, so the
|
|
23
|
+
reporter now **drops any event with an `agent_id`**, and a subagent's activity can
|
|
24
|
+
no longer overwrite the main session's status. Background **shells** are
|
|
25
|
+
deliberately not surfaced (their completion isn't reliably observable). See
|
|
26
|
+
`docs/known-issues.md`.
|
|
27
|
+
|
|
28
|
+
## [0.3.9]
|
|
29
|
+
|
|
30
|
+
### Fixed
|
|
31
|
+
- **The cross-session contamination fix now covers Codex too.** Codex had the same
|
|
32
|
+
flaw fixed for Claude in 0.3.8: its transcript watcher chose the rollout by
|
|
33
|
+
newest mtime + cwd, and the guardian-review watcher derived the main thread id
|
|
34
|
+
from the newest main rollout — so two Codex sessions in the same folder could
|
|
35
|
+
cross-report each other's `turn_aborted` (interrupt) or tool activity, with the
|
|
36
|
+
idle session showing the busy one's state. Codex's command-hook payload also
|
|
37
|
+
carries `transcript_path`, so the `haya-pet state` reporter's per-session
|
|
38
|
+
`session → transcript` link (already written for every client) now pins the Codex
|
|
39
|
+
transcript watcher to its own rollout, and the guardian watcher binds the main
|
|
40
|
+
thread id from the linked rollout's `payload.id` (and only follows a trunk whose
|
|
41
|
+
`parent_thread_id` matches it). Both fall back to the previous mtime+cwd heuristic
|
|
42
|
+
when no link is available (e.g. `transcript_path` null early), so there is no
|
|
43
|
+
regression. No timer involved.
|
|
44
|
+
|
|
10
45
|
## [0.3.8]
|
|
11
46
|
|
|
12
47
|
### Fixed
|
|
@@ -19,8 +54,8 @@ All notable changes to HAYA Pet are documented here. This project adheres to
|
|
|
19
54
|
session's watcher now pins to its own transcript via the `transcript_path` Claude
|
|
20
55
|
includes in every hook payload (recorded as a per-session link by the `haya-pet
|
|
21
56
|
state` reporter) instead of guessing; until that link exists it idles rather than
|
|
22
|
-
locking onto another session's file. Codex
|
|
23
|
-
|
|
57
|
+
locking onto another session's file. (Codex had the same discovery shape — fixed
|
|
58
|
+
in 0.3.9.)
|
|
24
59
|
- **The pet no longer disappears when the display layout changes.** The overlay
|
|
25
60
|
window's bounds were set once at creation to span one display's work area and
|
|
26
61
|
never re-homed, so unplugging a monitor, changing resolution/DPI, docking or
|
package/apps/cli/src/haya-pet.js
CHANGED
|
@@ -5,7 +5,7 @@ import { randomUUID } from "node:crypto";
|
|
|
5
5
|
import { dirname, join } from "node:path";
|
|
6
6
|
import { fileURLToPath } from "node:url";
|
|
7
7
|
import { runGenericCommand as defaultRunGenericCommand } from "../../../packages/cli-core/src/run-command.js";
|
|
8
|
-
import { parseStateArgs, runStateCommand,
|
|
8
|
+
import { parseStateArgs, runStateCommand, readHookPayloadFromStdin } from "../../../packages/cli-core/src/run-state.js";
|
|
9
9
|
import { removeSessionTranscriptLink } from "../../../packages/cli-core/src/session-transcript-link.js";
|
|
10
10
|
import { injectClaudeHooks as defaultInjectClaudeHooks } from "../../../packages/cli-core/src/claude-hook-injection.js";
|
|
11
11
|
import { injectCodexHooks as defaultInjectCodexHooks } from "../../../packages/cli-core/src/codex-hook-injection.js";
|
|
@@ -434,12 +434,20 @@ async function runRunCommand(parsed, dependencies) {
|
|
|
434
434
|
};
|
|
435
435
|
cleanup = injected.cleanup;
|
|
436
436
|
|
|
437
|
+
// Pin both Codex watchers to THIS session's rollout via the
|
|
438
|
+
// session->transcript link the `haya-pet state` reporter records from the
|
|
439
|
+
// hook payload's transcript_path, instead of guessing newest-by-mtime (which
|
|
440
|
+
// leaks a concurrent same-cwd session's activity/interrupts).
|
|
441
|
+
const sessionDir = resolveSessionDir(dependencies, env);
|
|
442
|
+
|
|
437
443
|
const activeToolCalls = new Set();
|
|
438
444
|
const watcher = watchCodexTranscript({
|
|
439
445
|
homeDir: dependencies.homeDir,
|
|
440
446
|
sessionsRoot: dependencies.codexSessionsRoot,
|
|
441
447
|
cwd,
|
|
442
448
|
startedAt: now(),
|
|
449
|
+
sessionId,
|
|
450
|
+
sessionDir,
|
|
443
451
|
onToolEvent: (event) => {
|
|
444
452
|
hookDebugLog(env, now, {
|
|
445
453
|
source: "codex_transcript",
|
|
@@ -522,6 +530,8 @@ async function runRunCommand(parsed, dependencies) {
|
|
|
522
530
|
sessionsRoot: dependencies.codexSessionsRoot,
|
|
523
531
|
cwd,
|
|
524
532
|
startedAt: now(),
|
|
533
|
+
sessionId,
|
|
534
|
+
sessionDir,
|
|
525
535
|
onReviewEvent: (event) => {
|
|
526
536
|
hookDebugLog(env, now, {
|
|
527
537
|
source: "codex_guardian",
|
|
@@ -550,6 +560,7 @@ async function runRunCommand(parsed, dependencies) {
|
|
|
550
560
|
stopWatcher = () => {
|
|
551
561
|
guardianWatcher.stop();
|
|
552
562
|
stopWithoutGuardian();
|
|
563
|
+
removeSessionTranscriptLink({ sessionDir, sessionId });
|
|
553
564
|
};
|
|
554
565
|
}
|
|
555
566
|
}
|
|
@@ -958,20 +969,31 @@ if (isDirectRun(import.meta.url, process.argv[1])) {
|
|
|
958
969
|
}
|
|
959
970
|
|
|
960
971
|
// Real-process entry. For a `haya-pet state` invocation — which is ALWAYS a client
|
|
961
|
-
// hook child — read the hook payload from stdin once to learn this session's real
|
|
962
|
-
// transcript path
|
|
963
|
-
//
|
|
964
|
-
//
|
|
965
|
-
//
|
|
972
|
+
// hook child — read the hook payload from stdin once to learn: this session's real
|
|
973
|
+
// transcript path (for the session->transcript link); the live background_tasks
|
|
974
|
+
// snapshot (so a Stop with a still-running subagent keeps a working cue instead of
|
|
975
|
+
// idle); and the agent_id (present only for subagent-originated events, which the
|
|
976
|
+
// reporter drops so a subagent's activity never overwrites the main status). All
|
|
977
|
+
// are handed to the reporter via dependencies. Done here (not inside
|
|
978
|
+
// main/runStateCommand) so unit tests, and every other command that needs stdin
|
|
979
|
+
// passed through to its child (e.g. `run`), never touch stdin.
|
|
966
980
|
async function bootstrap() {
|
|
967
981
|
const argv = process.argv.slice(2);
|
|
968
982
|
const dependencies = {};
|
|
969
983
|
if (argv[0] === "state") {
|
|
970
984
|
try {
|
|
971
|
-
const transcriptPath = await
|
|
985
|
+
const { transcriptPath, backgroundTasks, agentId } = await readHookPayloadFromStdin();
|
|
972
986
|
if (transcriptPath) {
|
|
973
987
|
dependencies.transcriptPath = transcriptPath;
|
|
974
988
|
}
|
|
989
|
+
if (Array.isArray(backgroundTasks) && backgroundTasks.length > 0) {
|
|
990
|
+
dependencies.backgroundTasks = backgroundTasks;
|
|
991
|
+
}
|
|
992
|
+
// Present only for subagent-originated events — the reporter drops those so a
|
|
993
|
+
// subagent's tool use never overwrites the main session's status.
|
|
994
|
+
if (agentId) {
|
|
995
|
+
dependencies.agentId = agentId;
|
|
996
|
+
}
|
|
975
997
|
} catch {
|
|
976
998
|
// a missing/garbled payload just means no binding this time — never fatal
|
|
977
999
|
}
|
package/docs/known-issues.md
CHANGED
|
@@ -2,38 +2,34 @@
|
|
|
2
2
|
|
|
3
3
|
Issues found in live use, with their current status.
|
|
4
4
|
|
|
5
|
-
##
|
|
6
|
-
|
|
7
|
-
- **Symptom (same class as the
|
|
8
|
-
|
|
9
|
-
*interrupted* (and more generally mirror another session's tool/working
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
- **Root cause:** `discoverCodexTranscript` (`codex-transcript-watcher.js`)
|
|
13
|
-
the rollout by **newest `.jsonl` by mtime**, filtered only by
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
the thread id
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
available. No timer, consistent with the rest of the status model.
|
|
34
|
-
- **Status:** unfixed. The binding fix shipped this session is **Claude-only** and
|
|
35
|
-
does not touch the Codex watcher or the guardian-review watcher (which shares the
|
|
36
|
-
same discovery shape and should be checked alongside it).
|
|
5
|
+
## ✅ Resolved: cross-session status contamination on Codex
|
|
6
|
+
|
|
7
|
+
- **Symptom (same class as the Claude entry below):** interrupting one Codex
|
|
8
|
+
session could flip a **different, concurrent** Codex session's pet to
|
|
9
|
+
*interrupted* (and more generally mirror another session's tool/working states).
|
|
10
|
+
Most likely when two Codex sessions ran in the **same folder** and one was busy
|
|
11
|
+
while the other was idle.
|
|
12
|
+
- **Root cause:** `discoverCodexTranscript` (`codex-transcript-watcher.js`) picked
|
|
13
|
+
the rollout by **newest `.jsonl` by mtime**, filtered only by `session_meta.cwd`
|
|
14
|
+
/ freshness — it did **not** bind to a specific session, so an idle session's
|
|
15
|
+
watcher could lock onto a busy session's rollout and read that session's
|
|
16
|
+
`turn_aborted` (Codex's interrupt signal) as its own. The `isFreshSession` branch
|
|
17
|
+
even admitted recently-started rollouts from **other cwds**, so the exposure was
|
|
18
|
+
slightly *wider* than Claude's (scoped to one project dir). The guardian-review
|
|
19
|
+
watcher had the same flaw: it derived the main thread id from the newest main
|
|
20
|
+
rollout by mtime, so a concurrent session's review status could be misattributed.
|
|
21
|
+
- **Fix:** the same per-session binding used for Claude. Verified against the
|
|
22
|
+
OpenAI Codex docs that the command-hook stdin payload carries **`transcript_path`**
|
|
23
|
+
(and `session_id`, the conversation/rollout id — which I also confirmed on disk
|
|
24
|
+
equals `session_meta.payload.id` and the rollout filename uuid). The
|
|
25
|
+
`haya-pet state` reporter already records a per-session `session→transcript` link
|
|
26
|
+
from that `transcript_path` (the capture is client-agnostic), so the Codex
|
|
27
|
+
transcript watcher now pins to its own rollout via the link
|
|
28
|
+
(`session-transcript-link.js`) instead of guessing newest-by-mtime, and the
|
|
29
|
+
guardian watcher derives the main thread id from the **linked** rollout's
|
|
30
|
+
`payload.id` (and only considers a trunk whose `parent_thread_id` matches it).
|
|
31
|
+
Both fall back to the old heuristic when no link is available (e.g. `transcript_path`
|
|
32
|
+
null early), so there is no regression. No timer involved.
|
|
37
33
|
|
|
38
34
|
## ✅ Resolved: Claude interrupt/denial leaked into a concurrent idle session
|
|
39
35
|
|
|
@@ -154,19 +150,60 @@ Issues found in live use, with their current status.
|
|
|
154
150
|
to *thinking* (the agent continues the turn) and the next real event refines it.
|
|
155
151
|
Verified live on the manual path; auto uses the identical matcher mechanism.
|
|
156
152
|
|
|
157
|
-
## ✅ Resolved:
|
|
158
|
-
|
|
159
|
-
- **Symptom:**
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
153
|
+
## ✅ Resolved: subagent activity drove the main session status (Claude Code)
|
|
154
|
+
|
|
155
|
+
- **Symptom:** With Claude Code hooks enabled and a multi-agent run, two things
|
|
156
|
+
went wrong once the **main agent had stopped but a subagent was still working**:
|
|
157
|
+
(1) the pet dropped to *idle* even though real work was ongoing in the
|
|
158
|
+
background; and (2) while the subagent ran, its own tool calls flipped the pet
|
|
159
|
+
between *running tools* / *editing files* / *thinking* — the subagent's activity
|
|
160
|
+
was driving the main session's status.
|
|
161
|
+
- **Root cause:** Two gaps. (a) The hook table mapped `Stop` → *idle*
|
|
162
|
+
unconditionally, with no awareness that a backgrounded subagent can outlive the
|
|
163
|
+
main turn. (b) A backgrounded subagent's tool calls fire the **parent session's**
|
|
164
|
+
`PreToolUse` / `PostToolUse` hooks, which ran `haya-pet state running_tool`
|
|
165
|
+
(etc.) under the main session id and overwrote its status. (An earlier fix only
|
|
166
|
+
stopped `SubagentStop` from reporting *idle*; it addressed neither of these.)
|
|
167
|
+
- **Fix — only ever decided at the main agent's `Stop`; no timers, no persisted
|
|
168
|
+
state:**
|
|
169
|
+
- **The "Subagent running" cue.** Claude's `Stop` payload carries an
|
|
170
|
+
(undocumented) **`background_tasks`** array: a live snapshot of work still
|
|
171
|
+
running at that instant. When `Stop` would report *idle* but `background_tasks`
|
|
172
|
+
still lists a running **subagent**, the reporter instead reports *running
|
|
173
|
+
tools* with the summary **"Subagent running"**
|
|
174
|
+
(`packages/cli-core/src/background-tasks.js`). When that subagent finishes,
|
|
175
|
+
Claude fires `Stop` **again** with an empty `background_tasks`, which clears the
|
|
176
|
+
cue back to *idle* — self-retracting, no timer. (Verified against live hook
|
|
177
|
+
traces: a backgrounded subagent appears in `Stop`'s `background_tasks` as
|
|
178
|
+
`type:"subagent", status:"running"`, and a second `Stop` arrives with `[]` once
|
|
179
|
+
it completes.)
|
|
180
|
+
- **Subagent events are dropped.** Every hook payload from a subagent context
|
|
181
|
+
carries an **`agent_id`** — the documented field that distinguishes subagent
|
|
182
|
+
hook calls from main-thread calls. The reporter now drops any event with an
|
|
183
|
+
`agent_id` (`extractAgentId` in `run-state.js`), so a subagent's tool use can
|
|
184
|
+
never overwrite the main session's status. Main-agent events have no `agent_id`
|
|
185
|
+
and report as before; the main `Stop` (also no `agent_id`) still carries the
|
|
186
|
+
`background_tasks` snapshot used for the cue above. `SubagentStop` is likewise
|
|
187
|
+
not wired.
|
|
188
|
+
- **Known limitations (accepted):**
|
|
189
|
+
- **Only subagents, never background shells.** A `background_tasks` entry can
|
|
190
|
+
also be `type:"shell"` (e.g. a `sleep 120` the agent backgrounded). These are
|
|
191
|
+
deliberately **not** surfaced: their completion isn't reliably observable here,
|
|
192
|
+
and a "working" cue we can't retract is worse than showing *idle*. So a
|
|
193
|
+
backgrounded shell still running after the main agent stops shows *idle*.
|
|
194
|
+
- **The `agent_id` discriminator is documented but not yet captured live on
|
|
195
|
+
`PreToolUse` / `PostToolUse`.** The Claude hooks reference lists `agent_id` /
|
|
196
|
+
`agent_type` as optional fields delivered to all hooks to distinguish subagent
|
|
197
|
+
calls, and the observed flicker confirms subagent tool calls reach the parent
|
|
198
|
+
hooks — but a subagent `PreToolUse` payload hasn't been captured on disk to
|
|
199
|
+
100% confirm `agent_id` is present there. If a future Claude build omits it the
|
|
200
|
+
flicker could recur; the marker would then be widened to also match
|
|
201
|
+
`agent_type` / `agent_transcript_path`.
|
|
202
|
+
- **How to diagnose if it recurs:** with `HAYA_PET_HOOK_DEBUG=<path>` set, the
|
|
203
|
+
reporter logs an `agentId` field on subagent-sourced events (which it then
|
|
204
|
+
drops). A subagent event logged with **no** `agentId` is the signal to widen the
|
|
205
|
+
marker. Codex keeps its separate behavior: it uses `Stop` as the only idle signal
|
|
206
|
+
and treats `SubagentStop` as mid-turn.
|
|
170
207
|
|
|
171
208
|
## ✅ Resolved: false "waiting for approval" while Codex auto-reviews an approval (Approve for me)
|
|
172
209
|
|
|
@@ -430,8 +467,12 @@ observation (`--observe`) or L1 lifecycle as the fallback. Current state:
|
|
|
430
467
|
`Notification`/`PreCompact`/`PostCompact`/`Stop` events to `haya-pet state <state>`,
|
|
431
468
|
reported to the daemon over the IPC pipe. `PostCompact` is split by its
|
|
432
469
|
`manual`/`auto` trigger matcher (manual `/compact` → *idle*, auto compaction →
|
|
433
|
-
*thinking*) so the pet never sticks on *compacting*.
|
|
434
|
-
|
|
470
|
+
*thinking*) so the pet never sticks on *compacting*. Subagent-originated events
|
|
471
|
+
are **dropped** by the reporter (they carry an `agent_id`), so a subagent's tool
|
|
472
|
+
use never drives the main status, and `SubagentStop` is not wired; when `Stop`
|
|
473
|
+
fires while a subagent is still running, its `background_tasks` snapshot surfaces
|
|
474
|
+
as a *running tools* / "Subagent running" cue that the next (empty) `Stop` clears
|
|
475
|
+
— see the resolved subagent entry above. `PreToolUse` distinguishes
|
|
435
476
|
file-editing tools (`Edit`/`Write`/`MultiEdit`/`NotebookEdit` → *editing files*)
|
|
436
477
|
from other tools (→ *running tools*) via the hook `matcher`. **Why opt-in:**
|
|
437
478
|
injecting hooks makes Claude show a one-time *review hooks* trust prompt; the
|
package/package.json
CHANGED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
// Pure helpers for the Claude "subagent still running at Stop" cue.
|
|
2
|
+
//
|
|
3
|
+
// When the main Claude agent ends its turn it fires `Stop`, whose payload carries
|
|
4
|
+
// a `background_tasks` array — a live snapshot of work still running at that
|
|
5
|
+
// instant (the hooks docs omit this field; it was verified against live traces).
|
|
6
|
+
// If a backgrounded *subagent* is still running, the main agent is paused but real
|
|
7
|
+
// work continues, so the pet keeps a "working" cue with a message rather than
|
|
8
|
+
// dropping to idle. When that subagent finishes, Claude fires `Stop` AGAIN with an
|
|
9
|
+
// empty `background_tasks`, which naturally clears the cue — so this needs no
|
|
10
|
+
// timers, no persisted state, and no subagent-event wiring. The only check is at
|
|
11
|
+
// the main agent's Stop, exactly as scoped.
|
|
12
|
+
//
|
|
13
|
+
// Scoped to `type: "subagent"` ONLY. Background *shells* are intentionally
|
|
14
|
+
// excluded: their completion isn't reliably observable here, and a status we
|
|
15
|
+
// cannot retract is worse than none.
|
|
16
|
+
|
|
17
|
+
// A single, fixed cue message. The user only wants to know that a subagent is
|
|
18
|
+
// still working after the main agent paused — NOT which one or how many.
|
|
19
|
+
const SUBAGENT_RUNNING_SUMMARY = "Subagent running";
|
|
20
|
+
|
|
21
|
+
// Parse a Claude hook payload (JSON on stdin) and return its background_tasks
|
|
22
|
+
// array. Defensive: any non-JSON / missing / wrong-typed input yields [].
|
|
23
|
+
export function extractBackgroundTasks(raw) {
|
|
24
|
+
if (typeof raw !== "string" || raw.trim() === "") {
|
|
25
|
+
return [];
|
|
26
|
+
}
|
|
27
|
+
try {
|
|
28
|
+
const parsed = JSON.parse(raw);
|
|
29
|
+
return Array.isArray(parsed?.background_tasks) ? parsed.background_tasks : [];
|
|
30
|
+
} catch {
|
|
31
|
+
return [];
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// The still-running *subagent* tasks. Shells and finished tasks are dropped.
|
|
36
|
+
export function runningSubagentTasks(tasks) {
|
|
37
|
+
if (!Array.isArray(tasks)) {
|
|
38
|
+
return [];
|
|
39
|
+
}
|
|
40
|
+
return tasks.filter((task) => task && task.type === "subagent" && task.status === "running");
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// A fixed cue message when any subagent is still running, or undefined when none
|
|
44
|
+
// are (the caller then leaves the reported state untouched). Intentionally not
|
|
45
|
+
// detailed — just "a subagent is still working".
|
|
46
|
+
export function summarizeSubagentTasks(tasks) {
|
|
47
|
+
return runningSubagentTasks(tasks).length > 0 ? SUBAGENT_RUNNING_SUMMARY : undefined;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Decide the effective state/summary for a reported state given the Stop payload's
|
|
51
|
+
// background_tasks. Only an `idle` report with a running subagent is upgraded to a
|
|
52
|
+
// working cue; everything else passes through unchanged — including the follow-up
|
|
53
|
+
// Stop whose background_tasks is empty, which is what retracts the cue.
|
|
54
|
+
export function applySubagentBackgroundTasks({ state, summary, backgroundTasks }) {
|
|
55
|
+
if (state !== "idle") {
|
|
56
|
+
return { state, summary };
|
|
57
|
+
}
|
|
58
|
+
const message = summarizeSubagentTasks(backgroundTasks);
|
|
59
|
+
if (!message) {
|
|
60
|
+
return { state, summary };
|
|
61
|
+
}
|
|
62
|
+
return { state: "running_tool", summary: message };
|
|
63
|
+
}
|
|
@@ -14,6 +14,7 @@ import {
|
|
|
14
14
|
parseGuardianTranscriptLines
|
|
15
15
|
} from "../../adapters/src/codex-guardian.js";
|
|
16
16
|
import { listJsonlFiles, readFirstLine, readRange, safeMtime, safeSize } from "./codex-rollout-fs.js";
|
|
17
|
+
import { readSessionTranscriptLink } from "./session-transcript-link.js";
|
|
17
18
|
|
|
18
19
|
const DEFAULT_POLL_MS = 700;
|
|
19
20
|
const MTIME_SKEW_MS = 2000;
|
|
@@ -26,6 +27,8 @@ export function watchCodexGuardianReviews(options = {}) {
|
|
|
26
27
|
onReviewEvent = () => {},
|
|
27
28
|
pollIntervalMs = DEFAULT_POLL_MS,
|
|
28
29
|
sessionsRoot,
|
|
30
|
+
sessionId,
|
|
31
|
+
sessionDir,
|
|
29
32
|
setInterval: setIntervalFn = setInterval,
|
|
30
33
|
clearInterval: clearIntervalFn = clearInterval
|
|
31
34
|
} = options;
|
|
@@ -66,7 +69,33 @@ export function watchCodexGuardianReviews(options = {}) {
|
|
|
66
69
|
return meta;
|
|
67
70
|
};
|
|
68
71
|
|
|
72
|
+
// Authoritative main thread id from the session->transcript link (the linked
|
|
73
|
+
// rollout's session_meta.payload.id). Lets the guardian bind to OUR main thread
|
|
74
|
+
// even when another session's main rollout has a newer mtime.
|
|
75
|
+
const resolveLinkedMainThreadId = () => {
|
|
76
|
+
if (!sessionId || !sessionDir) {
|
|
77
|
+
return undefined;
|
|
78
|
+
}
|
|
79
|
+
const linked = readSessionTranscriptLink({ sessionDir, sessionId });
|
|
80
|
+
if (!linked) {
|
|
81
|
+
return undefined;
|
|
82
|
+
}
|
|
83
|
+
const firstLine = readFirstLine(linked);
|
|
84
|
+
if (firstLine === undefined) {
|
|
85
|
+
return undefined;
|
|
86
|
+
}
|
|
87
|
+
const meta = classifyCodexSessionMeta(firstLine);
|
|
88
|
+
return meta?.kind === "main" ? meta.threadId : undefined;
|
|
89
|
+
};
|
|
90
|
+
|
|
69
91
|
const discoverTrunk = () => {
|
|
92
|
+
// Prefer the linked main thread id; fall back to the newest main rollout by
|
|
93
|
+
// mtime only when no link is available (older behavior, unsafe across
|
|
94
|
+
// concurrent same-cwd sessions).
|
|
95
|
+
if (!mainThreadId) {
|
|
96
|
+
mainThreadId = resolveLinkedMainThreadId();
|
|
97
|
+
}
|
|
98
|
+
|
|
70
99
|
let newestMain;
|
|
71
100
|
let newestTrunk;
|
|
72
101
|
|
|
@@ -83,8 +112,12 @@ export function watchCodexGuardianReviews(options = {}) {
|
|
|
83
112
|
if (meta.kind === "main" && meta.threadId && (!newestMain || mtime > newestMain.mtime)) {
|
|
84
113
|
newestMain = { threadId: meta.threadId, mtime };
|
|
85
114
|
}
|
|
86
|
-
if (meta.kind === "guardian" && (!newestTrunk || mtime > newestTrunk.mtime)) {
|
|
87
|
-
|
|
115
|
+
if (meta.kind === "guardian" && meta.parentThreadId && (!newestTrunk || mtime > newestTrunk.mtime)) {
|
|
116
|
+
// Once our main thread id is known, only consider OUR trunk so a
|
|
117
|
+
// concurrent session's (or collab subagent's) newer trunk cannot win.
|
|
118
|
+
if (!mainThreadId || meta.parentThreadId === mainThreadId) {
|
|
119
|
+
newestTrunk = { file, parentThreadId: meta.parentThreadId, mtime };
|
|
120
|
+
}
|
|
88
121
|
}
|
|
89
122
|
}
|
|
90
123
|
|
|
@@ -5,6 +5,7 @@ import { existsSync } from "node:fs";
|
|
|
5
5
|
import { join } from "node:path";
|
|
6
6
|
import { parseCodexTranscriptLines } from "../../adapters/src/codex-transcript.js";
|
|
7
7
|
import { listJsonlFiles, readFirstLine, readRange, safeMtime, safeSize } from "./codex-rollout-fs.js";
|
|
8
|
+
import { readSessionTranscriptLink } from "./session-transcript-link.js";
|
|
8
9
|
|
|
9
10
|
const DEFAULT_POLL_MS = 700;
|
|
10
11
|
const MTIME_SKEW_MS = 2000;
|
|
@@ -17,6 +18,8 @@ export function watchCodexTranscript(options = {}) {
|
|
|
17
18
|
onToolEvent = () => {},
|
|
18
19
|
pollIntervalMs = DEFAULT_POLL_MS,
|
|
19
20
|
sessionsRoot,
|
|
21
|
+
sessionId,
|
|
22
|
+
sessionDir,
|
|
20
23
|
transcriptPath: fixedPath,
|
|
21
24
|
setInterval: setIntervalFn = setInterval,
|
|
22
25
|
clearInterval: clearIntervalFn = clearInterval
|
|
@@ -25,6 +28,13 @@ export function watchCodexTranscript(options = {}) {
|
|
|
25
28
|
const root = sessionsRoot ?? (homeDir ? join(homeDir, ".codex", "sessions") : undefined);
|
|
26
29
|
const minMtime = startedAt > 0 ? startedAt - MTIME_SKEW_MS : 0;
|
|
27
30
|
|
|
31
|
+
// Preferred resolution: pin to the exact rollout this session's hook reported
|
|
32
|
+
// (Codex puts `transcript_path` in every hook payload; the `haya-pet state`
|
|
33
|
+
// reporter records it as a session->transcript link). Without that link (e.g.
|
|
34
|
+
// the path was null early), fall back to the newest-by-mtime heuristic — unsafe
|
|
35
|
+
// with concurrent same-cwd sessions, which is the bug the link avoids.
|
|
36
|
+
const useLink = Boolean(sessionId && sessionDir);
|
|
37
|
+
|
|
28
38
|
let transcriptPath = fixedPath;
|
|
29
39
|
let offset = 0;
|
|
30
40
|
let carry = "";
|
|
@@ -32,7 +42,9 @@ export function watchCodexTranscript(options = {}) {
|
|
|
32
42
|
const tick = () => {
|
|
33
43
|
try {
|
|
34
44
|
if (!transcriptPath) {
|
|
35
|
-
transcriptPath =
|
|
45
|
+
transcriptPath = useLink
|
|
46
|
+
? resolveLinkedRollout(sessionDir, sessionId)
|
|
47
|
+
: discoverCodexTranscript(root, minMtime, { cwd });
|
|
36
48
|
if (!transcriptPath) {
|
|
37
49
|
return;
|
|
38
50
|
}
|
|
@@ -79,6 +91,18 @@ export function watchCodexTranscript(options = {}) {
|
|
|
79
91
|
};
|
|
80
92
|
}
|
|
81
93
|
|
|
94
|
+
// Resolve the rollout this session's hook bound itself to (via the
|
|
95
|
+
// session->transcript link). Returns undefined until the link exists and points
|
|
96
|
+
// at a real file, so before the first hook the watcher idles rather than guessing
|
|
97
|
+
// a rollout that may belong to another concurrent session.
|
|
98
|
+
function resolveLinkedRollout(sessionDir, sessionId) {
|
|
99
|
+
const linked = readSessionTranscriptLink({ sessionDir, sessionId });
|
|
100
|
+
if (!linked || !existsSync(linked)) {
|
|
101
|
+
return undefined;
|
|
102
|
+
}
|
|
103
|
+
return linked;
|
|
104
|
+
}
|
|
105
|
+
|
|
82
106
|
export function discoverCodexTranscript(root, minMtime = 0, options = {}) {
|
|
83
107
|
if (!root || !existsSync(root)) {
|
|
84
108
|
return undefined;
|
|
@@ -7,6 +7,7 @@ import { getDefaultPaths } from "../../platform-core/src/paths.js";
|
|
|
7
7
|
import { isAiClientState } from "../../protocol/src/messages.js";
|
|
8
8
|
import { DEADLINE, raceDeadline } from "./deadline.js";
|
|
9
9
|
import { writeSessionTranscriptLink } from "./session-transcript-link.js";
|
|
10
|
+
import { applySubagentBackgroundTasks, extractBackgroundTasks } from "./background-tasks.js";
|
|
10
11
|
|
|
11
12
|
// Hard ceiling on the whole connect→send→close interaction. The reporter is a
|
|
12
13
|
// child process of the wrapped AI client, and the client may wait for its hook
|
|
@@ -60,8 +61,28 @@ export async function runStateCommand(parsed, dependencies = {}) {
|
|
|
60
61
|
const env = dependencies.env ?? process.env;
|
|
61
62
|
const now = dependencies.now ?? Date.now;
|
|
62
63
|
const sessionId = parsed.session ?? env.HAYA_PET_SESSION_ID;
|
|
64
|
+
const agentId = dependencies.agentId;
|
|
63
65
|
|
|
64
|
-
debugLog(
|
|
66
|
+
debugLog(
|
|
67
|
+
env,
|
|
68
|
+
now,
|
|
69
|
+
agentId
|
|
70
|
+
? { state: parsed.state, sessionId, summary: parsed.summary, agentId }
|
|
71
|
+
: { state: parsed.state, sessionId, summary: parsed.summary }
|
|
72
|
+
);
|
|
73
|
+
|
|
74
|
+
// A subagent's own activity must NEVER drive the main session's status. A
|
|
75
|
+
// backgrounded subagent's tool calls fire the PARENT session's PreToolUse/
|
|
76
|
+
// PostToolUse hooks, which would otherwise overwrite the main agent's status
|
|
77
|
+
// (and the "Subagent running" cue) with running_tool/thinking/editing_files as
|
|
78
|
+
// the subagent works. The payload's `agent_id` is the documented field that
|
|
79
|
+
// distinguishes subagent hook calls from main-thread calls — when it's present
|
|
80
|
+
// the event came from a subagent, so we drop it entirely. The ONE place a
|
|
81
|
+
// subagent surfaces is the main agent's own Stop (no agent_id), via
|
|
82
|
+
// background_tasks; see applySubagentBackgroundTasks below.
|
|
83
|
+
if (agentId) {
|
|
84
|
+
return { command: "state", ok: false, reason: "subagent-event" };
|
|
85
|
+
}
|
|
65
86
|
|
|
66
87
|
if (!sessionId) {
|
|
67
88
|
return { command: "state", ok: false, reason: "no-session" };
|
|
@@ -77,6 +98,17 @@ export async function runStateCommand(parsed, dependencies = {}) {
|
|
|
77
98
|
// session's interrupt/denial. Best-effort and synchronous; never blocks the hook.
|
|
78
99
|
recordTranscriptLink(sessionId, env, dependencies);
|
|
79
100
|
|
|
101
|
+
// When the main agent's Stop reports idle but its background_tasks (from the hook
|
|
102
|
+
// payload, passed in via dependencies) still lists a running subagent, keep a
|
|
103
|
+
// working cue with a message instead — the main agent is paused but real work
|
|
104
|
+
// continues. The follow-up Stop carries an empty list and clears it. Scoped to
|
|
105
|
+
// subagents only; see background-tasks.js.
|
|
106
|
+
const effective = applySubagentBackgroundTasks({
|
|
107
|
+
state: parsed.state,
|
|
108
|
+
summary: parsed.summary,
|
|
109
|
+
backgroundTasks: dependencies.backgroundTasks
|
|
110
|
+
});
|
|
111
|
+
|
|
80
112
|
const createIpcClient = dependencies.createIpcClient ?? defaultCreateIpcClient;
|
|
81
113
|
const deadlineMs = dependencies.reportDeadlineMs ?? REPORT_DEADLINE_MS;
|
|
82
114
|
|
|
@@ -93,8 +125,8 @@ export async function runStateCommand(parsed, dependencies = {}) {
|
|
|
93
125
|
await client.send({
|
|
94
126
|
type: "state",
|
|
95
127
|
sessionId,
|
|
96
|
-
state:
|
|
97
|
-
summary:
|
|
128
|
+
state: effective.state,
|
|
129
|
+
summary: effective.summary,
|
|
98
130
|
confidence: 0.9,
|
|
99
131
|
source: "official_plugin",
|
|
100
132
|
updatedAt: now()
|
|
@@ -152,19 +184,50 @@ export function extractTranscriptPath(raw) {
|
|
|
152
184
|
}
|
|
153
185
|
}
|
|
154
186
|
|
|
155
|
-
//
|
|
156
|
-
//
|
|
157
|
-
//
|
|
158
|
-
//
|
|
159
|
-
|
|
160
|
-
|
|
187
|
+
// Pull `agent_id` out of a Claude hook payload (JSON on stdin). Present only for
|
|
188
|
+
// subagent-originated events (the documented field distinguishing subagent hook
|
|
189
|
+
// calls from main-thread calls); absent for main-agent events. Pure and
|
|
190
|
+
// defensive: any non-JSON, missing-field, or wrong-type input yields undefined.
|
|
191
|
+
export function extractAgentId(raw) {
|
|
192
|
+
if (typeof raw !== "string" || raw.trim() === "") {
|
|
193
|
+
return undefined;
|
|
194
|
+
}
|
|
195
|
+
try {
|
|
196
|
+
const parsed = JSON.parse(raw);
|
|
197
|
+
const value = parsed?.agent_id;
|
|
198
|
+
return typeof value === "string" && value.trim() !== "" ? value : undefined;
|
|
199
|
+
} catch {
|
|
200
|
+
return undefined;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// Read the Claude hook payload from stdin and return everything the reporter
|
|
205
|
+
// needs from it in one read (stdin can only be consumed once): the session's
|
|
206
|
+
// transcript_path (for the watcher binding) and the live background_tasks snapshot
|
|
207
|
+
// (for the subagent-at-Stop cue). Used by the real `haya-pet state` process (a
|
|
208
|
+
// Claude hook child) — NOT by internal callers, so tests and other commands never
|
|
209
|
+
// touch stdin. Bounded and best-effort: a TTY (manual invocation) or a
|
|
210
|
+
// slow/absent payload resolves to empty results rather than ever hanging the host
|
|
211
|
+
// client's hook.
|
|
212
|
+
export async function readHookPayloadFromStdin(options = {}) {
|
|
213
|
+
const raw = await readHookPayloadRaw(options);
|
|
214
|
+
return {
|
|
215
|
+
transcriptPath: extractTranscriptPath(raw),
|
|
216
|
+
backgroundTasks: extractBackgroundTasks(raw),
|
|
217
|
+
agentId: extractAgentId(raw)
|
|
218
|
+
};
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Accumulate the raw payload string from stdin under a hard deadline and byte cap.
|
|
222
|
+
// Always resolves (never rejects); an error or no payload yields "".
|
|
223
|
+
function readHookPayloadRaw(options = {}) {
|
|
161
224
|
const stdin = options.stdin ?? process.stdin;
|
|
162
225
|
const timeoutMs = options.timeoutMs ?? 400;
|
|
163
226
|
const maxBytes = options.maxBytes ?? 1_000_000;
|
|
164
227
|
|
|
165
228
|
return new Promise((resolve) => {
|
|
166
229
|
if (!stdin || stdin.isTTY) {
|
|
167
|
-
resolve(
|
|
230
|
+
resolve("");
|
|
168
231
|
return;
|
|
169
232
|
}
|
|
170
233
|
|
|
@@ -172,7 +235,7 @@ export function readHookTranscriptPathFromStdin(options = {}) {
|
|
|
172
235
|
let settled = false;
|
|
173
236
|
let timer;
|
|
174
237
|
|
|
175
|
-
const finish = (
|
|
238
|
+
const finish = () => {
|
|
176
239
|
if (settled) {
|
|
177
240
|
return;
|
|
178
241
|
}
|
|
@@ -188,17 +251,20 @@ export function readHookTranscriptPathFromStdin(options = {}) {
|
|
|
188
251
|
} catch {
|
|
189
252
|
// detaching is best-effort
|
|
190
253
|
}
|
|
191
|
-
resolve(
|
|
254
|
+
resolve(data);
|
|
192
255
|
};
|
|
193
256
|
|
|
194
257
|
const onData = (chunk) => {
|
|
195
258
|
data += chunk;
|
|
196
259
|
if (data.length > maxBytes) {
|
|
197
|
-
finish(
|
|
260
|
+
finish();
|
|
198
261
|
}
|
|
199
262
|
};
|
|
200
|
-
const onEnd = () => finish(
|
|
201
|
-
const onError = () =>
|
|
263
|
+
const onEnd = () => finish();
|
|
264
|
+
const onError = () => {
|
|
265
|
+
data = "";
|
|
266
|
+
finish();
|
|
267
|
+
};
|
|
202
268
|
|
|
203
269
|
try {
|
|
204
270
|
stdin.setEncoding("utf8");
|
|
@@ -206,12 +272,12 @@ export function readHookTranscriptPathFromStdin(options = {}) {
|
|
|
206
272
|
stdin.on("end", onEnd);
|
|
207
273
|
stdin.on("error", onError);
|
|
208
274
|
stdin.resume();
|
|
209
|
-
timer = setTimeout(
|
|
275
|
+
timer = setTimeout(finish, timeoutMs);
|
|
210
276
|
if (timer && typeof timer.unref === "function") {
|
|
211
277
|
timer.unref();
|
|
212
278
|
}
|
|
213
279
|
} catch {
|
|
214
|
-
finish(
|
|
280
|
+
finish();
|
|
215
281
|
}
|
|
216
282
|
});
|
|
217
283
|
}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import { test } from "../../../test/harness.mjs";
|
|
3
|
+
import {
|
|
4
|
+
applySubagentBackgroundTasks,
|
|
5
|
+
extractBackgroundTasks,
|
|
6
|
+
runningSubagentTasks,
|
|
7
|
+
summarizeSubagentTasks
|
|
8
|
+
} from "../src/background-tasks.js";
|
|
9
|
+
|
|
10
|
+
test("extractBackgroundTasks pulls the array out of a Claude Stop payload", () => {
|
|
11
|
+
const raw = JSON.stringify({
|
|
12
|
+
hook_event_name: "Stop",
|
|
13
|
+
background_tasks: [
|
|
14
|
+
{ id: "x", type: "subagent", status: "running", description: "Survey repo structure" }
|
|
15
|
+
]
|
|
16
|
+
});
|
|
17
|
+
assert.deepEqual(extractBackgroundTasks(raw), [
|
|
18
|
+
{ id: "x", type: "subagent", status: "running", description: "Survey repo structure" }
|
|
19
|
+
]);
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
test("extractBackgroundTasks is defensive about junk, missing field, and wrong types", () => {
|
|
23
|
+
assert.deepEqual(extractBackgroundTasks("{not json"), []);
|
|
24
|
+
assert.deepEqual(extractBackgroundTasks(JSON.stringify({ hook_event_name: "Stop" })), []);
|
|
25
|
+
assert.deepEqual(extractBackgroundTasks(JSON.stringify({ background_tasks: "nope" })), []);
|
|
26
|
+
assert.deepEqual(extractBackgroundTasks(""), []);
|
|
27
|
+
assert.deepEqual(extractBackgroundTasks(undefined), []);
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
test("runningSubagentTasks keeps only running subagents (drops shells and finished)", () => {
|
|
31
|
+
const tasks = [
|
|
32
|
+
{ id: "a", type: "subagent", status: "running", description: "A" },
|
|
33
|
+
{ id: "b", type: "subagent", status: "completed", description: "B" },
|
|
34
|
+
{ id: "c", type: "shell", status: "running", command: "sleep 120" },
|
|
35
|
+
null,
|
|
36
|
+
{ id: "d", type: "subagent", status: "running", description: "D" }
|
|
37
|
+
];
|
|
38
|
+
assert.deepEqual(runningSubagentTasks(tasks).map((t) => t.id), ["a", "d"]);
|
|
39
|
+
assert.deepEqual(runningSubagentTasks(undefined), []);
|
|
40
|
+
assert.deepEqual(runningSubagentTasks("nope"), []);
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
test("summarizeSubagentTasks returns a fixed message when any subagent runs (no detail)", () => {
|
|
44
|
+
// Same generic message regardless of description, agent_type, or count.
|
|
45
|
+
assert.equal(
|
|
46
|
+
summarizeSubagentTasks([
|
|
47
|
+
{ type: "subagent", status: "running", description: "Survey repo structure" }
|
|
48
|
+
]),
|
|
49
|
+
"Subagent running"
|
|
50
|
+
);
|
|
51
|
+
assert.equal(
|
|
52
|
+
summarizeSubagentTasks([{ type: "subagent", status: "running", agent_type: "explore" }]),
|
|
53
|
+
"Subagent running"
|
|
54
|
+
);
|
|
55
|
+
assert.equal(
|
|
56
|
+
summarizeSubagentTasks([
|
|
57
|
+
{ type: "subagent", status: "running", description: "A" },
|
|
58
|
+
{ type: "subagent", status: "running", description: "B" }
|
|
59
|
+
]),
|
|
60
|
+
"Subagent running"
|
|
61
|
+
);
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
test("summarizeSubagentTasks returns undefined when nothing is running", () => {
|
|
65
|
+
assert.equal(summarizeSubagentTasks([]), undefined);
|
|
66
|
+
assert.equal(summarizeSubagentTasks([{ type: "shell", status: "running" }]), undefined);
|
|
67
|
+
assert.equal(summarizeSubagentTasks([{ type: "subagent", status: "completed" }]), undefined);
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
test("applySubagentBackgroundTasks upgrades idle to a working cue when a subagent runs", () => {
|
|
71
|
+
assert.deepEqual(
|
|
72
|
+
applySubagentBackgroundTasks({
|
|
73
|
+
state: "idle",
|
|
74
|
+
summary: undefined,
|
|
75
|
+
backgroundTasks: [
|
|
76
|
+
{ type: "subagent", status: "running", description: "Survey repo structure" }
|
|
77
|
+
]
|
|
78
|
+
}),
|
|
79
|
+
{ state: "running_tool", summary: "Subagent running" }
|
|
80
|
+
);
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
test("applySubagentBackgroundTasks leaves a non-idle state untouched", () => {
|
|
84
|
+
assert.deepEqual(
|
|
85
|
+
applySubagentBackgroundTasks({
|
|
86
|
+
state: "thinking",
|
|
87
|
+
summary: "hi",
|
|
88
|
+
backgroundTasks: [{ type: "subagent", status: "running", description: "A" }]
|
|
89
|
+
}),
|
|
90
|
+
{ state: "thinking", summary: "hi" }
|
|
91
|
+
);
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
test("applySubagentBackgroundTasks passes idle through when nothing runs (the retraction case)", () => {
|
|
95
|
+
// The follow-up Stop carries an empty background_tasks — this is what clears the cue.
|
|
96
|
+
assert.deepEqual(
|
|
97
|
+
applySubagentBackgroundTasks({ state: "idle", summary: undefined, backgroundTasks: [] }),
|
|
98
|
+
{ state: "idle", summary: undefined }
|
|
99
|
+
);
|
|
100
|
+
assert.deepEqual(
|
|
101
|
+
applySubagentBackgroundTasks({ state: "idle", summary: "idle", backgroundTasks: undefined }),
|
|
102
|
+
{ state: "idle", summary: "idle" }
|
|
103
|
+
);
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
test("applySubagentBackgroundTasks ignores background shells (deliberately unsupported)", () => {
|
|
107
|
+
assert.deepEqual(
|
|
108
|
+
applySubagentBackgroundTasks({
|
|
109
|
+
state: "idle",
|
|
110
|
+
summary: undefined,
|
|
111
|
+
backgroundTasks: [{ type: "shell", status: "running", command: "sleep 120" }]
|
|
112
|
+
}),
|
|
113
|
+
{ state: "idle", summary: undefined }
|
|
114
|
+
);
|
|
115
|
+
});
|
|
@@ -4,6 +4,7 @@ import { tmpdir } from "node:os";
|
|
|
4
4
|
import { join } from "node:path";
|
|
5
5
|
import { test } from "../../../test/harness.mjs";
|
|
6
6
|
import { watchCodexGuardianReviews } from "../src/codex-guardian-watcher.js";
|
|
7
|
+
import { writeSessionTranscriptLink } from "../src/session-transcript-link.js";
|
|
7
8
|
|
|
8
9
|
const noopTimers = { setInterval: () => ({}), clearInterval: () => {} };
|
|
9
10
|
|
|
@@ -262,6 +263,53 @@ test("watchCodexGuardianReviews ignores guardian trunks of other parents", () =>
|
|
|
262
263
|
watcher.stop();
|
|
263
264
|
});
|
|
264
265
|
|
|
266
|
+
test("watchCodexGuardianReviews binds to the LINKED main thread, not the newest main", () => {
|
|
267
|
+
const { root, dir } = makeSessionsRoot();
|
|
268
|
+
const sessionDir = mkdtempSync(join(tmpdir(), "sess-"));
|
|
269
|
+
|
|
270
|
+
// Our main rollout, and a DIFFERENT concurrent session's main rollout written
|
|
271
|
+
// afterwards (so it has the newer mtime — what the old heuristic would pick).
|
|
272
|
+
const ourMain = join(dir, "rollout-main-ours.jsonl");
|
|
273
|
+
writeFileSync(ourMain, metaLine({ id: "main-ours", parent_thread_id: null, source: "cli", thread_source: "user" }));
|
|
274
|
+
writeFileSync(
|
|
275
|
+
join(dir, "rollout-main-other.jsonl"),
|
|
276
|
+
metaLine({ id: "main-other", parent_thread_id: null, source: "cli", thread_source: "user" })
|
|
277
|
+
);
|
|
278
|
+
|
|
279
|
+
// A guardian trunk for OUR main, plus a decoy trunk for the OTHER main that is
|
|
280
|
+
// newer and already has a review turn.
|
|
281
|
+
const ourTrunk = join(dir, "rollout-guardian-ours.jsonl");
|
|
282
|
+
writeFileSync(
|
|
283
|
+
ourTrunk,
|
|
284
|
+
metaLine({ id: "g-ours", parent_thread_id: "main-ours", source: { subagent: { other: "guardian" } } })
|
|
285
|
+
);
|
|
286
|
+
writeFileSync(
|
|
287
|
+
join(dir, "rollout-guardian-other.jsonl"),
|
|
288
|
+
metaLine({ id: "g-other", parent_thread_id: "main-other", source: { subagent: { other: "guardian" } } }) +
|
|
289
|
+
reviewStarted("decoy")
|
|
290
|
+
);
|
|
291
|
+
|
|
292
|
+
writeSessionTranscriptLink({ sessionDir, sessionId: "sess_a", transcriptPath: ourMain });
|
|
293
|
+
|
|
294
|
+
const events = [];
|
|
295
|
+
const watcher = watchCodexGuardianReviews({
|
|
296
|
+
sessionsRoot: root,
|
|
297
|
+
sessionId: "sess_a",
|
|
298
|
+
sessionDir,
|
|
299
|
+
onReviewEvent: (e) => events.push(e),
|
|
300
|
+
...noopTimers
|
|
301
|
+
});
|
|
302
|
+
|
|
303
|
+
watcher._tick();
|
|
304
|
+
assert.deepEqual(events, [], "the newer decoy trunk (another session) is ignored");
|
|
305
|
+
|
|
306
|
+
appendFileSync(ourTrunk, reviewStarted());
|
|
307
|
+
watcher._tick();
|
|
308
|
+
assert.deepEqual(events, [{ type: "review_started" }], "our trunk is the one tailed");
|
|
309
|
+
|
|
310
|
+
watcher.stop();
|
|
311
|
+
});
|
|
312
|
+
|
|
265
313
|
test("watchCodexGuardianReviews picks up a trunk created after watching began", () => {
|
|
266
314
|
const { root, dir } = makeSessionsRoot();
|
|
267
315
|
writeFileSync(
|
|
@@ -4,6 +4,7 @@ import { tmpdir } from "node:os";
|
|
|
4
4
|
import { join } from "node:path";
|
|
5
5
|
import { test } from "../../../test/harness.mjs";
|
|
6
6
|
import { discoverCodexTranscript, watchCodexTranscript } from "../src/codex-transcript-watcher.js";
|
|
7
|
+
import { writeSessionTranscriptLink } from "../src/session-transcript-link.js";
|
|
7
8
|
|
|
8
9
|
const noopTimers = { setInterval: () => ({}), clearInterval: () => {} };
|
|
9
10
|
|
|
@@ -60,6 +61,55 @@ test("discoverCodexTranscript skips files older than session start", () => {
|
|
|
60
61
|
assert.equal(discoverCodexTranscript(root, Date.now() - 1000), undefined);
|
|
61
62
|
});
|
|
62
63
|
|
|
64
|
+
test("watchCodexTranscript pins to the session's linked rollout, not newest-by-mtime", () => {
|
|
65
|
+
// Two concurrent Codex sessions, each with its own rollout and its own link.
|
|
66
|
+
const root = mkdtempSync(join(tmpdir(), "codex-sessions-"));
|
|
67
|
+
const dir = join(root, "2026", "06", "12");
|
|
68
|
+
mkdirSync(dir, { recursive: true });
|
|
69
|
+
const sessionDir = mkdtempSync(join(tmpdir(), "sess-"));
|
|
70
|
+
|
|
71
|
+
const fileA = join(dir, "rollout-a.jsonl");
|
|
72
|
+
const fileB = join(dir, "rollout-b.jsonl");
|
|
73
|
+
writeFileSync(fileA, sessionMeta("2026-06-12T01:00:00.000Z", "thread-a"));
|
|
74
|
+
writeFileSync(fileB, sessionMeta("2026-06-12T01:00:00.000Z", "thread-b"));
|
|
75
|
+
writeSessionTranscriptLink({ sessionDir, sessionId: "sess_a", transcriptPath: fileA });
|
|
76
|
+
writeSessionTranscriptLink({ sessionDir, sessionId: "sess_b", transcriptPath: fileB });
|
|
77
|
+
|
|
78
|
+
const eventsA = [];
|
|
79
|
+
const eventsB = [];
|
|
80
|
+
const watcherA = watchCodexTranscript({
|
|
81
|
+
sessionId: "sess_a",
|
|
82
|
+
sessionDir,
|
|
83
|
+
onToolEvent: (e) => eventsA.push(e),
|
|
84
|
+
...noopTimers
|
|
85
|
+
});
|
|
86
|
+
const watcherB = watchCodexTranscript({
|
|
87
|
+
sessionId: "sess_b",
|
|
88
|
+
sessionDir,
|
|
89
|
+
onToolEvent: (e) => eventsB.push(e),
|
|
90
|
+
...noopTimers
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
// Both pin to their own rollout and consume the session_meta line.
|
|
94
|
+
watcherA._tick();
|
|
95
|
+
watcherB._tick();
|
|
96
|
+
|
|
97
|
+
// Session A is interrupted.
|
|
98
|
+
appendFileSync(fileA, turnAborted());
|
|
99
|
+
watcherA._tick();
|
|
100
|
+
watcherB._tick();
|
|
101
|
+
|
|
102
|
+
assert.deepEqual(
|
|
103
|
+
eventsA,
|
|
104
|
+
[{ type: "turn_aborted", reason: "interrupted" }],
|
|
105
|
+
"session A sees its own interrupt"
|
|
106
|
+
);
|
|
107
|
+
assert.deepEqual(eventsB, [], "session B is NOT contaminated by session A's interrupt");
|
|
108
|
+
|
|
109
|
+
watcherA.stop();
|
|
110
|
+
watcherB.stop();
|
|
111
|
+
});
|
|
112
|
+
|
|
63
113
|
test("watchCodexTranscript reports appended tool events", () => {
|
|
64
114
|
const dir = mkdtempSync(join(tmpdir(), "codex-transcript-"));
|
|
65
115
|
const path = join(dir, "session.jsonl");
|
|
@@ -3,7 +3,7 @@ import { mkdtempSync, readFileSync } from "node:fs";
|
|
|
3
3
|
import { tmpdir } from "node:os";
|
|
4
4
|
import { join } from "node:path";
|
|
5
5
|
import { test } from "../../../test/harness.mjs";
|
|
6
|
-
import { extractTranscriptPath, parseStateArgs, runStateCommand } from "../src/run-state.js";
|
|
6
|
+
import { extractAgentId, extractTranscriptPath, parseStateArgs, runStateCommand } from "../src/run-state.js";
|
|
7
7
|
import { readSessionTranscriptLink } from "../src/session-transcript-link.js";
|
|
8
8
|
|
|
9
9
|
test("runStateCommand appends a debug line when HAYA_PET_HOOK_DEBUG is set", async () => {
|
|
@@ -35,6 +35,40 @@ test("extractTranscriptPath pulls transcript_path out of a Claude hook payload",
|
|
|
35
35
|
assert.equal(extractTranscriptPath(undefined), undefined);
|
|
36
36
|
});
|
|
37
37
|
|
|
38
|
+
test("extractAgentId pulls agent_id out of a subagent hook payload", () => {
|
|
39
|
+
assert.equal(
|
|
40
|
+
extractAgentId(JSON.stringify({ hook_event_name: "PreToolUse", agent_id: "a9a8317d", agent_type: "Explore" })),
|
|
41
|
+
"a9a8317d"
|
|
42
|
+
);
|
|
43
|
+
// Main-agent events carry no agent_id; junk and wrong types yield undefined.
|
|
44
|
+
assert.equal(extractAgentId(JSON.stringify({ hook_event_name: "Stop" })), undefined);
|
|
45
|
+
assert.equal(extractAgentId(JSON.stringify({ agent_id: 42 })), undefined);
|
|
46
|
+
assert.equal(extractAgentId(JSON.stringify({ agent_id: "" })), undefined);
|
|
47
|
+
assert.equal(extractAgentId("{not json"), undefined);
|
|
48
|
+
assert.equal(extractAgentId(undefined), undefined);
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
test("runStateCommand ignores subagent-originated events so they never drive main status", async () => {
|
|
52
|
+
let connected = false;
|
|
53
|
+
const sent = [];
|
|
54
|
+
const result = await runStateCommand(
|
|
55
|
+
{ command: "state", state: "running_tool", summary: undefined, session: "sess_main" },
|
|
56
|
+
{
|
|
57
|
+
now: () => 9,
|
|
58
|
+
agentId: "a9a8317d3457d6364",
|
|
59
|
+
createIpcClient: async () => {
|
|
60
|
+
connected = true;
|
|
61
|
+
return { send: async (m) => sent.push(m), close: async () => {} };
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
);
|
|
65
|
+
|
|
66
|
+
assert.equal(result.ok, false);
|
|
67
|
+
assert.equal(result.reason, "subagent-event");
|
|
68
|
+
assert.equal(connected, false);
|
|
69
|
+
assert.equal(sent.length, 0);
|
|
70
|
+
});
|
|
71
|
+
|
|
38
72
|
test("runStateCommand records the session->transcript link when given a transcript path", async () => {
|
|
39
73
|
const sessionDir = mkdtempSync(join(tmpdir(), "sess-"));
|
|
40
74
|
await runStateCommand(
|
|
@@ -112,6 +146,39 @@ test("runStateCommand sends one official_plugin state message", async () => {
|
|
|
112
146
|
});
|
|
113
147
|
});
|
|
114
148
|
|
|
149
|
+
test("runStateCommand upgrades a Stop idle to a working cue when a subagent still runs", async () => {
|
|
150
|
+
const sent = [];
|
|
151
|
+
const result = await runStateCommand(
|
|
152
|
+
{ command: "state", state: "idle", summary: undefined, session: "sess_bg" },
|
|
153
|
+
{
|
|
154
|
+
now: () => 42,
|
|
155
|
+
backgroundTasks: [
|
|
156
|
+
{ id: "x", type: "subagent", status: "running", description: "Survey repo structure" }
|
|
157
|
+
],
|
|
158
|
+
createIpcClient: async () => ({ send: async (m) => sent.push(m), close: async () => {} })
|
|
159
|
+
}
|
|
160
|
+
);
|
|
161
|
+
|
|
162
|
+
assert.equal(result.ok, true);
|
|
163
|
+
assert.equal(sent[0].state, "running_tool");
|
|
164
|
+
assert.equal(sent[0].summary, "Subagent running");
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
test("runStateCommand reports plain idle when the follow-up Stop has no running subagents", async () => {
|
|
168
|
+
const sent = [];
|
|
169
|
+
await runStateCommand(
|
|
170
|
+
{ command: "state", state: "idle", summary: undefined, session: "sess_done" },
|
|
171
|
+
{
|
|
172
|
+
now: () => 43,
|
|
173
|
+
backgroundTasks: [],
|
|
174
|
+
createIpcClient: async () => ({ send: async (m) => sent.push(m), close: async () => {} })
|
|
175
|
+
}
|
|
176
|
+
);
|
|
177
|
+
|
|
178
|
+
assert.equal(sent[0].state, "idle");
|
|
179
|
+
assert.equal(sent[0].summary, undefined);
|
|
180
|
+
});
|
|
181
|
+
|
|
115
182
|
test("runStateCommand falls back to HAYA_PET_SESSION_ID", async () => {
|
|
116
183
|
const sent = [];
|
|
117
184
|
const result = await runStateCommand(
|