@hayasaka7/haya-pet 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -0
- package/README.md +8 -4
- package/apps/cli/src/haya-pet.js +64 -0
- package/apps/cli/test/haya-pet.test.mjs +41 -0
- package/docs/architecture.md +8 -2
- package/docs/known-issues.md +57 -5
- package/docs/troubleshooting.md +2 -1
- package/package.json +23 -1
- package/packages/adapters/src/codex-guardian.js +131 -0
- package/packages/adapters/src/codex-hooks.js +11 -2
- package/packages/adapters/test/codex-guardian.test.mjs +174 -0
- package/packages/cli-core/src/codex-guardian-watcher.js +136 -0
- package/packages/cli-core/src/codex-rollout-fs.js +88 -0
- package/packages/cli-core/src/codex-transcript-watcher.js +2 -65
- package/packages/cli-core/test/codex-guardian-watcher.test.mjs +217 -0
package/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,23 @@ All notable changes to HAYA Pet are documented here. This project adheres to
|
|
|
7
7
|
> 0.2.0 npm publish; they are listed under 0.2.1, which is the first version that
|
|
8
8
|
> ships them.
|
|
9
9
|
|
|
10
|
+
## [0.2.6]
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
- **Codex "Approve for me" no longer shows a false *waiting for approval*.**
|
|
14
|
+
With `approvals_reviewer = auto_review` (the TUI's "Approve for me"; legacy
|
|
15
|
+
config alias `guardian_subagent`), Codex routes approval requests to a
|
|
16
|
+
guardian subagent and never prompts the user — but its `PermissionRequest`
|
|
17
|
+
hook still fires when the request is created, so the pet sat on *waiting for
|
|
18
|
+
approval* for the entire auto-review (and the approved command's run). A new
|
|
19
|
+
guardian-review watcher tails the guardian's own session rollout (the only
|
|
20
|
+
persisted trace of the review) and reports event-backed states instead:
|
|
21
|
+
*reviewing* while the guardian assesses, *running tools* on an `allow`
|
|
22
|
+
verdict, *thinking* on a `deny` (the rejection goes back to the model — no
|
|
23
|
+
human decision is pending). When the reviewer is the user (`approvals_reviewer
|
|
24
|
+
= "user"`, "Ask for approval"), nothing changes: *waiting for approval* still
|
|
25
|
+
shows until the user decides.
|
|
26
|
+
|
|
10
27
|
## [0.2.4]
|
|
11
28
|
|
|
12
29
|
### Fixed
|
package/README.md
CHANGED
|
@@ -187,10 +187,14 @@ Why opt in? Both clients show a one-time trust prompt when hooks are added. HAYA
|
|
|
187
187
|
Pet lets you decide when to approve that instead of surprising you in the middle
|
|
188
188
|
of work.
|
|
189
189
|
|
|
190
|
-
Codex
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
190
|
+
Codex live status combines three sources: hooks report `thinking`/`idle` and
|
|
191
|
+
approval requests, a transcript watcher reports tool/file activity, and a
|
|
192
|
+
guardian-review watcher tracks Codex's **"Approve for me"** auto-reviewer — the
|
|
193
|
+
pet shows *reviewing* while the guardian assesses a request and only shows
|
|
194
|
+
*waiting for approval* when Codex actually asks you ("Ask for approval" mode).
|
|
195
|
+
Per-tool `PreToolUse` hooks still depend on an upstream Codex gap
|
|
196
|
+
([openai/codex#16732](https://github.com/openai/codex/issues/16732)); the
|
|
197
|
+
transcript watcher covers that in the meantime.
|
|
194
198
|
|
|
195
199
|
For any client, you can ask HAYA Pet to infer rough activity from terminal
|
|
196
200
|
output:
|
package/apps/cli/src/haya-pet.js
CHANGED
|
@@ -9,6 +9,7 @@ import { injectClaudeHooks as defaultInjectClaudeHooks } from "../../../packages
|
|
|
9
9
|
import { injectCodexHooks as defaultInjectCodexHooks } from "../../../packages/cli-core/src/codex-hook-injection.js";
|
|
10
10
|
import { watchClaudeTranscript as defaultWatchClaudeTranscript } from "../../../packages/cli-core/src/claude-transcript-watcher.js";
|
|
11
11
|
import { watchCodexTranscript as defaultWatchCodexTranscript } from "../../../packages/cli-core/src/codex-transcript-watcher.js";
|
|
12
|
+
import { watchCodexGuardianReviews as defaultWatchCodexGuardianReviews } from "../../../packages/cli-core/src/codex-guardian-watcher.js";
|
|
12
13
|
import { ensureCompanionConnection } from "../../../packages/cli-core/src/companion-launcher.js";
|
|
13
14
|
import { createIpcClient as defaultCreateIpcClient } from "../../../packages/daemon-core/src/ipc-server.js";
|
|
14
15
|
import { getDefaultPaths } from "../../../packages/platform-core/src/paths.js";
|
|
@@ -128,6 +129,8 @@ async function runRunCommand(parsed, dependencies) {
|
|
|
128
129
|
const injectCodexHooks = dependencies.injectCodexHooks ?? defaultInjectCodexHooks;
|
|
129
130
|
const watchClaudeTranscript = dependencies.watchClaudeTranscript ?? defaultWatchClaudeTranscript;
|
|
130
131
|
const watchCodexTranscript = dependencies.watchCodexTranscript ?? defaultWatchCodexTranscript;
|
|
132
|
+
const watchCodexGuardianReviews =
|
|
133
|
+
dependencies.watchCodexGuardianReviews ?? defaultWatchCodexGuardianReviews;
|
|
131
134
|
const print = dependencies.print ?? defaultPrint;
|
|
132
135
|
const env = dependencies.env ?? process.env;
|
|
133
136
|
const now = dependencies.now ?? Date.now;
|
|
@@ -251,6 +254,50 @@ async function runRunCommand(parsed, dependencies) {
|
|
|
251
254
|
watcher.stop();
|
|
252
255
|
previousStopWatcher();
|
|
253
256
|
};
|
|
257
|
+
|
|
258
|
+
// With "Approve for me" (approvals_reviewer=auto_review, legacy alias
|
|
259
|
+
// guardian_subagent), Codex routes approval requests to a guardian
|
|
260
|
+
// subagent and never shows the human approval UI — yet the
|
|
261
|
+
// PermissionRequest hook still fires at request creation, which used to
|
|
262
|
+
// pin the pet on a false "waiting for approval" for the whole review.
|
|
263
|
+
// The guardian's own rollout is the only observable record of the
|
|
264
|
+
// review, so tail it: a review turn starting proves the agent is
|
|
265
|
+
// reviewing; an "allow" verdict proves the action proceeds; a "deny"
|
|
266
|
+
// verdict goes back to the model, which keeps working. An unreadable
|
|
267
|
+
// verdict reports nothing — a pending cue is never cleared on a guess.
|
|
268
|
+
const guardianWatcher = watchCodexGuardianReviews({
|
|
269
|
+
homeDir: dependencies.homeDir,
|
|
270
|
+
sessionsRoot: dependencies.codexSessionsRoot,
|
|
271
|
+
startedAt: now(),
|
|
272
|
+
onReviewEvent: (event) => {
|
|
273
|
+
hookDebugLog(env, now, {
|
|
274
|
+
source: "codex_guardian",
|
|
275
|
+
event: event.type,
|
|
276
|
+
outcome: event.outcome
|
|
277
|
+
});
|
|
278
|
+
|
|
279
|
+
const report = resolveGuardianStateReport(event);
|
|
280
|
+
if (!report) {
|
|
281
|
+
return;
|
|
282
|
+
}
|
|
283
|
+
messageSender
|
|
284
|
+
.send({
|
|
285
|
+
type: "state",
|
|
286
|
+
sessionId,
|
|
287
|
+
state: report.state,
|
|
288
|
+
summary: report.summary,
|
|
289
|
+
confidence: 0.85,
|
|
290
|
+
source: "client_log",
|
|
291
|
+
updatedAt: now()
|
|
292
|
+
})
|
|
293
|
+
.catch(() => {});
|
|
294
|
+
}
|
|
295
|
+
});
|
|
296
|
+
const stopWithoutGuardian = stopWatcher;
|
|
297
|
+
stopWatcher = () => {
|
|
298
|
+
guardianWatcher.stop();
|
|
299
|
+
stopWithoutGuardian();
|
|
300
|
+
};
|
|
254
301
|
}
|
|
255
302
|
}
|
|
256
303
|
|
|
@@ -276,6 +323,23 @@ async function runRunCommand(parsed, dependencies) {
|
|
|
276
323
|
}
|
|
277
324
|
}
|
|
278
325
|
|
|
326
|
+
// Map a guardian review event to the pet state it proves. waiting_approval is
|
|
327
|
+
// deliberately NOT among these: with the guardian reviewing, the user is not
|
|
328
|
+
// being asked anything, and after a deny the request is resolved (the model
|
|
329
|
+
// receives the rejection and continues) — there is no pending human decision.
|
|
330
|
+
function resolveGuardianStateReport(event) {
|
|
331
|
+
if (event.type === "review_started") {
|
|
332
|
+
return { state: "reviewing", summary: "agent reviewing approval" };
|
|
333
|
+
}
|
|
334
|
+
if (event.type === "review_finished" && event.outcome === "allow") {
|
|
335
|
+
return { state: "running_tool", summary: "reviewer approved" };
|
|
336
|
+
}
|
|
337
|
+
if (event.type === "review_finished" && event.outcome === "deny") {
|
|
338
|
+
return { state: "thinking", summary: "reviewer denied" };
|
|
339
|
+
}
|
|
340
|
+
return undefined;
|
|
341
|
+
}
|
|
342
|
+
|
|
279
343
|
// Resolve whether live-status hooks should be injected for this run (any
|
|
280
344
|
// hook-capable client). Precedence: HAYA_PET_NO_HOOKS forces off, HAYA_PET_HOOKS
|
|
281
345
|
// forces on (per-run overrides), otherwise the persisted `haya-pet hooks on/off`
|
|
@@ -483,6 +483,47 @@ test("codex hooks also start a transcript watcher for tool activity", async () =
|
|
|
483
483
|
assert.ok(sent.every((message) => message.updatedAt === undefined || message.updatedAt === 42));
|
|
484
484
|
});
|
|
485
485
|
|
|
486
|
+
test("codex hooks also start a guardian-review watcher that reports review states", async () => {
|
|
487
|
+
const sent = [];
|
|
488
|
+
let fireReviewEvent;
|
|
489
|
+
let stopped = false;
|
|
490
|
+
|
|
491
|
+
await runAiPet(["run", "--client", "codex", "--", "codex"], {
|
|
492
|
+
cwd: process.cwd(),
|
|
493
|
+
env: { USERPROFILE: "C:\\Users\\A" },
|
|
494
|
+
now: () => 42,
|
|
495
|
+
heartbeatIntervalMs: 10,
|
|
496
|
+
send: async (message) => sent.push(message),
|
|
497
|
+
createStateFile: hooksStateFile(true),
|
|
498
|
+
injectCodexHooks: () => ({ profileName: "haya-pet", cleanup: () => {} }),
|
|
499
|
+
watchCodexTranscript: () => ({ stop: () => {} }),
|
|
500
|
+
watchCodexGuardianReviews: ({ onReviewEvent }) => {
|
|
501
|
+
fireReviewEvent = onReviewEvent;
|
|
502
|
+
return { stop: () => { stopped = true; } };
|
|
503
|
+
},
|
|
504
|
+
runGenericCommand: async (options) => {
|
|
505
|
+
fireReviewEvent({ type: "review_started" });
|
|
506
|
+
fireReviewEvent({ type: "review_finished", outcome: "allow" });
|
|
507
|
+
fireReviewEvent({ type: "review_started" });
|
|
508
|
+
fireReviewEvent({ type: "review_finished", outcome: "deny" });
|
|
509
|
+
// An unreadable verdict must not change the state (leave the cue as-is).
|
|
510
|
+
fireReviewEvent({ type: "review_finished", outcome: undefined });
|
|
511
|
+
return { sessionId: options.sessionId, pid: 1, exitCode: 0 };
|
|
512
|
+
}
|
|
513
|
+
});
|
|
514
|
+
|
|
515
|
+
assert.ok(stopped, "guardian watcher is stopped after the wrapped command exits");
|
|
516
|
+
const reviewStates = sent
|
|
517
|
+
.filter((message) => message.type === "state" && message.source === "client_log")
|
|
518
|
+
.map((message) => [message.state, message.summary]);
|
|
519
|
+
assert.deepEqual(reviewStates, [
|
|
520
|
+
["reviewing", "agent reviewing approval"],
|
|
521
|
+
["running_tool", "reviewer approved"],
|
|
522
|
+
["reviewing", "agent reviewing approval"],
|
|
523
|
+
["thinking", "reviewer denied"]
|
|
524
|
+
]);
|
|
525
|
+
});
|
|
526
|
+
|
|
486
527
|
test("codex hooks are skipped (with a notice) when the user passes their own -p", async () => {
|
|
487
528
|
const calls = [];
|
|
488
529
|
let injected = 0;
|
package/docs/architecture.md
CHANGED
|
@@ -70,8 +70,14 @@ profile, so if the user already passes `-p/--profile`, injection is skipped with
|
|
|
70
70
|
notice. Codex's hook command must be unquoted at the program position (it runs via
|
|
71
71
|
`cmd /c`, which strips a leading quote) and its matchers can't use look-around
|
|
72
72
|
(Rust regex) — see [known-issues.md](known-issues.md). Codex's L4 is **partial**:
|
|
73
|
-
`PreToolUse
|
|
74
|
-
|
|
73
|
+
`PreToolUse` doesn't fire upstream yet, so tool activity comes from an L3
|
|
74
|
+
transcript watcher tailing the session rollout. `PermissionRequest` fires, but
|
|
75
|
+
once at approval-request creation — before Codex routes the request to either
|
|
76
|
+
the user or its guardian auto-reviewer ("Approve for me"), which never prompts
|
|
77
|
+
the user at all. An L3 **guardian-trunk watcher** tails the guardian's own
|
|
78
|
+
rollout (`source: {subagent:{other:"guardian"}}`, parented to the main thread)
|
|
79
|
+
and refines the state: review running → `reviewing`, verdict allow →
|
|
80
|
+
`running_tool`, verdict deny → `thinking`.
|
|
75
81
|
|
|
76
82
|
Hooks alone can't see one moment: clients emit **no event when the user accepts a
|
|
77
83
|
permission prompt** (denial and completion are observable; the accept click is
|
package/docs/known-issues.md
CHANGED
|
@@ -2,6 +2,52 @@
|
|
|
2
2
|
|
|
3
3
|
Issues found in live use, with their current status.
|
|
4
4
|
|
|
5
|
+
## ✅ Resolved: false "waiting for approval" while Codex auto-reviews an approval (Approve for me)
|
|
6
|
+
|
|
7
|
+
- **Symptom:** Running Codex under the pet with the **"Approve for me"** preset
|
|
8
|
+
(`approvals_reviewer = auto_review`; the user's config had the legacy alias
|
|
9
|
+
`guardian_subagent`), the pet showed *waiting for approval* whenever an action
|
|
10
|
+
needed approval — even though Codex's guardian was reviewing it automatically
|
|
11
|
+
and the user was never asked anything. The false state lasted the whole review
|
|
12
|
+
(~8–30 s per request, up to Codex's 90 s review timeout) plus the approved
|
|
13
|
+
command's runtime.
|
|
14
|
+
- **Root cause (verified against codex-rs 0.139.0 source + a live trunk
|
|
15
|
+
rollout):** Codex fires the `PermissionRequest` hook once, at approval-request
|
|
16
|
+
creation, **before** routing — and for guardian-routed requests the human
|
|
17
|
+
approval UI is *never* shown: a guardian `allow` lets the action proceed; a
|
|
18
|
+
guardian `deny` returns the rationale to the **model** as a rejected tool call
|
|
19
|
+
("This action was rejected due to unacceptable risk. …"), so no human decision
|
|
20
|
+
is ever pending. Our Codex hook table mapped `PermissionRequest` →
|
|
21
|
+
`waiting_approval` unconditionally. No better hook exists: nothing fires on
|
|
22
|
+
guardian start/finish (the guardian session is `SubAgentSource::Other`, which
|
|
23
|
+
is excluded from Subagent hooks), and `GuardianAssessment` events are
|
|
24
|
+
explicitly not persisted to the main rollout (`rollout/src/policy.rs`).
|
|
25
|
+
- **Fix:** an **L3 guardian-trunk watcher** (`codex-guardian-watcher.js` +
|
|
26
|
+
`adapters/codex-guardian.js`). The guardian runs as its own Codex session that
|
|
27
|
+
writes its own rollout under `~/.codex/sessions` — session_meta has
|
|
28
|
+
`source: {subagent: {other: "guardian"}}` and `parent_thread_id` = the main
|
|
29
|
+
thread; each review is one turn (`task_started` → `task_complete` with the
|
|
30
|
+
verdict JSON in `last_agent_message`, e.g. `{"outcome":"allow"}`). The watcher
|
|
31
|
+
binds the trunk to the wrapped session's main thread id and maps real events:
|
|
32
|
+
review turn starts → **reviewing**; verdict `allow` → **running_tool**
|
|
33
|
+
("reviewer approved" — the action verifiably proceeds); verdict `deny` →
|
|
34
|
+
**thinking** ("reviewer denied" — the model received the rejection and keeps
|
|
35
|
+
working). An unreadable verdict reports nothing, so a pending cue is never
|
|
36
|
+
cleared on a guess. With `approvals_reviewer = "user"` ("Ask for approval")
|
|
37
|
+
there is no trunk and behavior is unchanged: `PermissionRequest` →
|
|
38
|
+
*waiting for approval* until the user decides (process-tree/denial detection
|
|
39
|
+
resolve it, as before).
|
|
40
|
+
- **Known limitations (accepted):** (1) A ≤ ~1 s *waiting for approval* flicker
|
|
41
|
+
can precede *reviewing* (the hook fires immediately; the trunk poll is 700 ms).
|
|
42
|
+
(2) Reviews of a **collab subagent's** actions (multi-agent runs) have their
|
|
43
|
+
own trunks keyed to the subagent's thread and are not watched; a subagent's
|
|
44
|
+
`PermissionRequest` can still briefly show *waiting for approval* until the
|
|
45
|
+
next main-session event. (3) After a guardian deny the pet shows *thinking*,
|
|
46
|
+
not *waiting for approval* — by design: Codex resolves the request itself and
|
|
47
|
+
the model decides what to do next (it may ask the user in chat, which then
|
|
48
|
+
surfaces as turn-end *idle*). The TUI's passive `/approve` denial-override
|
|
49
|
+
picker is not a blocking prompt.
|
|
50
|
+
|
|
5
51
|
## ✅ Resolved: pet stuck on "waiting for approval" after a manual denial
|
|
6
52
|
|
|
7
53
|
- **Symptom:** With Claude Code hooks enabled, denying a permission prompt left the
|
|
@@ -176,10 +222,16 @@ observation (`--observe`) or L1 lifecycle as the fallback. Current state:
|
|
|
176
222
|
activity is covered by an L3 Codex transcript watcher that tails
|
|
177
223
|
`~/.codex/sessions` JSONL: normal tools report `running_tool`, `apply_patch`
|
|
178
224
|
reports `editing_files`, and HAYA Pet returns to `thinking` after active tool
|
|
179
|
-
calls drain.
|
|
180
|
-
|
|
181
|
-
approval-
|
|
182
|
-
|
|
225
|
+
calls drain.
|
|
226
|
+
- **`PermissionRequest` fires** (confirmed live on 0.139.0), but **once, at
|
|
227
|
+
approval-request creation, before routing** — under "Approve for me"
|
|
228
|
+
(`approvals_reviewer = auto_review` / legacy `guardian_subagent`) the user is
|
|
229
|
+
never actually prompted, so the hook alone over-reports *waiting for
|
|
230
|
+
approval*. An L3 **guardian-trunk watcher** tails the guardian reviewer's own
|
|
231
|
+
rollout (`source: {subagent:{other:"guardian"}}`, `parent_thread_id` = main
|
|
232
|
+
thread) and refines the state: review running → *reviewing*, verdict `allow`
|
|
233
|
+
→ *running_tool*, verdict `deny` → *thinking*. See the resolved
|
|
234
|
+
false-waiting-for-approval entry above.
|
|
183
235
|
- **Antigravity (`agy`)** — **not yet implemented** (no hook injection). Uses
|
|
184
236
|
`--observe` or L1 lifecycle. A Gemini-schema hook adapter is a planned follow-up.
|
|
185
237
|
- **Generic / unknown** — no hooks; PTY observation (`--observe`) or L1 lifecycle.
|
|
@@ -194,7 +246,7 @@ remains a possible follow-up.
|
|
|
194
246
|
|---|---|---|
|
|
195
247
|
| L1 | process wrapper | default; session lifecycle + exit code |
|
|
196
248
|
| L4 | client hooks | opt-in via `haya-pet hooks on` (Claude Code full, Codex partial); reports through `haya-pet state …` |
|
|
197
|
-
| L3 | client logs | Codex session JSONL watcher for tool activity; Claude denial recovery; future clients can add similar transcript adapters |
|
|
249
|
+
| L3 | client logs | Codex session JSONL watcher for tool activity; Codex guardian-trunk watcher for auto-review status; Claude denial recovery; future clients can add similar transcript adapters |
|
|
198
250
|
| L3 | process tree | approval-accept detection: a `waiting_approval` session flips to `running_tool` when the approved command verifiably starts under the client's pid |
|
|
199
251
|
| L2 | PTY output scraping | opt-in via `--observe` (terminal-fidelity tradeoff) |
|
|
200
252
|
|
package/docs/troubleshooting.md
CHANGED
|
@@ -16,7 +16,8 @@ deferred problems with known root causes.
|
|
|
16
16
|
| Terminal scroll / Shift+Tab / backspace odd while a CLI runs under `haya-pet run` | Fixed — `haya-pet run` now uses native passthrough by default (full fidelity). If you opted into `--observe`, drop it. See [known-issues.md](known-issues.md). |
|
|
17
17
|
| Pet shows only **idle/lifecycle** while **Claude Code** works | Live in-session status is opt-in: run `haya-pet hooks on` once (persisted). The first `haya-pet run` afterward shows a one-time Claude *review hooks* prompt — approve it. Also make sure the companion is running (`haya-pet start`). Check the toggle with `haya-pet hooks status`. |
|
|
18
18
|
| Typing doesn't work / **Claude Code** TUI frozen under `haya-pet run` | You have hooks enabled and Claude is showing its *review hooks* trust prompt (approve it once), or your Claude is too old for `--settings`. Run `haya-pet hooks off` (or set `HAYA_PET_NO_HOOKS=1`) for native passthrough with lifecycle-only status — typing and Shift+Tab work normally. |
|
|
19
|
-
| Pet shows only **idle/lifecycle** while **Codex** works | Live status is opt-in: run `haya-pet hooks on` once (persisted, global), then `haya-pet run --client codex -- codex`; approve Codex's one-time *review hooks* prompt. `thinking`/`idle` come from hooks
|
|
19
|
+
| Pet shows only **idle/lifecycle** while **Codex** works | Live status is opt-in: run `haya-pet hooks on` once (persisted, global), then `haya-pet run --client codex -- codex`; approve Codex's one-time *review hooks* prompt. `thinking`/`idle` come from hooks, `running_tool`/`editing_files` from a transcript watcher, and approval states from the `PermissionRequest` hook plus a guardian-review watcher. |
|
|
20
|
+
| Pet showed **waiting for approval** while **Codex** auto-reviewed the request ("Approve for me") | Fixed — with `approvals_reviewer = auto_review` (legacy `guardian_subagent`) Codex's guardian decides without asking you; the pet now shows **reviewing** during the assessment, then **working** on an allow verdict or **thinking** on a deny. *Waiting for approval* still shows when Codex actually asks you (`approvals_reviewer = "user"`). |
|
|
20
21
|
| **Codex** live status didn't turn on / you pass your own `-p`/`--profile` | Codex allows only one profile, so haya-pet skips hook injection when you supply your own and prints a notice. Drop your `-p` for that run to get live status, or accept lifecycle-only. |
|
|
21
22
|
| Pet shows only **idle/lifecycle** while **Antigravity** (`agy`) works | Antigravity has no hook adapter yet. Add `--observe` for coarse PTY activity, or accept lifecycle-only status. |
|
|
22
23
|
| Claude hooks fail with **"hook exited with code 1"** | The hook command must not bake an **fnm**/node-manager *per-shell* node path (`…\fnm_multishells\<pid>_…\node.exe`) that dies when the shell exits. haya-pet bakes the stable `realpath`-resolved node path into the temp settings instead. Update to the latest version. |
|
package/package.json
CHANGED
|
@@ -1,8 +1,30 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hayasaka7/haya-pet",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.6",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Generic AI CLI pet runtime foundation.",
|
|
6
|
+
"keywords": [
|
|
7
|
+
"haya-pet",
|
|
8
|
+
"desktop-pet",
|
|
9
|
+
"virtual-pet",
|
|
10
|
+
"desktop-companion",
|
|
11
|
+
"ai",
|
|
12
|
+
"ai-agents",
|
|
13
|
+
"coding-agents",
|
|
14
|
+
"ai-cli",
|
|
15
|
+
"codex",
|
|
16
|
+
"codex-cli",
|
|
17
|
+
"claude-code",
|
|
18
|
+
"gemini-cli",
|
|
19
|
+
"aider",
|
|
20
|
+
"antigravity",
|
|
21
|
+
"electron",
|
|
22
|
+
"cli",
|
|
23
|
+
"terminal",
|
|
24
|
+
"developer-tools",
|
|
25
|
+
"local-first",
|
|
26
|
+
"productivity"
|
|
27
|
+
],
|
|
6
28
|
"license": "MIT",
|
|
7
29
|
"author": "Ai Hayasaka",
|
|
8
30
|
"repository": {
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
// Pure parser for Codex guardian-review rollouts. When `approvals_reviewer` is
|
|
2
|
+
// `auto_review` (legacy alias `guardian_subagent`, the TUI's "Approve for me"),
|
|
3
|
+
// Codex routes approval requests to a guardian subagent instead of prompting the
|
|
4
|
+
// user — the PermissionRequest hook still fires at request creation, but the
|
|
5
|
+
// human approval UI never appears. The only persisted trace of the review is a
|
|
6
|
+
// separate "guardian trunk" rollout under ~/.codex/sessions whose session_meta
|
|
7
|
+
// carries source.subagent.other == "guardian" and parent_thread_id == the main
|
|
8
|
+
// thread; each review is one turn there (task_started → task_complete with the
|
|
9
|
+
// verdict JSON in last_agent_message). Verified against codex-cli 0.139.0
|
|
10
|
+
// (codex-rs guardian/review_session.rs; rollout/src/policy.rs excludes the
|
|
11
|
+
// GuardianAssessment events themselves from persistence).
|
|
12
|
+
|
|
13
|
+
// Classify a rollout's first JSONL line (the session_meta record) so watchers
|
|
14
|
+
// can tell main sessions, guardian review sessions, and other subagents apart.
|
|
15
|
+
export function classifyCodexSessionMeta(line) {
|
|
16
|
+
let entry;
|
|
17
|
+
try {
|
|
18
|
+
entry = JSON.parse(line);
|
|
19
|
+
} catch {
|
|
20
|
+
return undefined;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
if (entry?.type !== "session_meta") {
|
|
24
|
+
return undefined;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
const payload = entry.payload;
|
|
28
|
+
if (!payload || typeof payload !== "object") {
|
|
29
|
+
return undefined;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const threadId = typeof payload.id === "string" ? payload.id : undefined;
|
|
33
|
+
const parentThreadId =
|
|
34
|
+
typeof payload.parent_thread_id === "string" ? payload.parent_thread_id : undefined;
|
|
35
|
+
|
|
36
|
+
return { kind: resolveSessionKind(payload), threadId, parentThreadId };
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function resolveSessionKind(payload) {
|
|
40
|
+
const subagentSource =
|
|
41
|
+
typeof payload.source === "object" && payload.source !== null
|
|
42
|
+
? payload.source.subagent
|
|
43
|
+
: undefined;
|
|
44
|
+
|
|
45
|
+
if (typeof subagentSource === "object" && subagentSource !== null) {
|
|
46
|
+
if (subagentSource.other === "guardian") {
|
|
47
|
+
return "guardian";
|
|
48
|
+
}
|
|
49
|
+
return "subagent";
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if (payload.thread_source === "subagent") {
|
|
53
|
+
return "subagent";
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return "main";
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Parse one guardian-trunk JSONL line into a review lifecycle event. Each review
|
|
60
|
+
// turn yields task_started (the guardian began assessing an approval request)
|
|
61
|
+
// and task_complete (verdict in last_agent_message: `{"outcome":"allow"|"deny"}`,
|
|
62
|
+
// optionally with risk_level/rationale). An unreadable verdict maps to
|
|
63
|
+
// outcome: undefined so callers can leave the pet state untouched (safe: the
|
|
64
|
+
// existing waiting cue stays up rather than being cleared on a guess).
|
|
65
|
+
export function parseGuardianTranscriptLine(line, options = {}) {
|
|
66
|
+
let entry;
|
|
67
|
+
try {
|
|
68
|
+
entry = JSON.parse(line);
|
|
69
|
+
} catch {
|
|
70
|
+
return undefined;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (entry?.type !== "event_msg") {
|
|
74
|
+
return undefined;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Same replay guard as the main transcript parser: skip records from before
|
|
78
|
+
// the current session, keep records without a parseable timestamp.
|
|
79
|
+
const minTimestampMs = options.minTimestampMs ?? 0;
|
|
80
|
+
if (minTimestampMs > 0 && typeof entry.timestamp === "string") {
|
|
81
|
+
const timestampMs = Date.parse(entry.timestamp);
|
|
82
|
+
if (Number.isFinite(timestampMs) && timestampMs < minTimestampMs) {
|
|
83
|
+
return undefined;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const payload = entry.payload;
|
|
88
|
+
if (!payload || typeof payload !== "object") {
|
|
89
|
+
return undefined;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
if (payload.type === "task_started") {
|
|
93
|
+
return { type: "review_started" };
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if (payload.type === "task_complete") {
|
|
97
|
+
return { type: "review_finished", outcome: parseVerdictOutcome(payload.last_agent_message) };
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return undefined;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function parseVerdictOutcome(lastAgentMessage) {
|
|
104
|
+
if (typeof lastAgentMessage !== "string") {
|
|
105
|
+
return undefined;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
let verdict;
|
|
109
|
+
try {
|
|
110
|
+
verdict = JSON.parse(lastAgentMessage);
|
|
111
|
+
} catch {
|
|
112
|
+
return undefined;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
const outcome = verdict?.outcome;
|
|
116
|
+
return outcome === "allow" || outcome === "deny" ? outcome : undefined;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
export function parseGuardianTranscriptLines(lines, options = {}) {
|
|
120
|
+
const events = [];
|
|
121
|
+
for (const line of lines) {
|
|
122
|
+
if (typeof line !== "string" || line.trim() === "") {
|
|
123
|
+
continue;
|
|
124
|
+
}
|
|
125
|
+
const event = parseGuardianTranscriptLine(line, options);
|
|
126
|
+
if (event) {
|
|
127
|
+
events.push(event);
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
return events;
|
|
131
|
+
}
|
|
@@ -33,8 +33,17 @@
|
|
|
33
33
|
// - NOT FIRING (0.137): PreToolUse — so `running_tool` / `editing_files` never
|
|
34
34
|
// arrive in practice yet (upstream coverage gap, openai/codex#16732). The
|
|
35
35
|
// entries are kept (harmless no-ops) so they light up once Codex fixes it.
|
|
36
|
-
// -
|
|
37
|
-
//
|
|
36
|
+
// - PermissionRequest (verified live on 0.139.0, semantics from codex-rs
|
|
37
|
+
// source): fires ONCE at approval-request creation, BEFORE the request is
|
|
38
|
+
// routed to the guardian auto-reviewer or the user. Under "Approve for me"
|
|
39
|
+
// (approvals_reviewer=auto_review, legacy alias guardian_subagent) the user
|
|
40
|
+
// is never prompted at all, so waiting_approval from this hook over-reports;
|
|
41
|
+
// the wrapper's codex-guardian-watcher refines it to reviewing /
|
|
42
|
+
// running_tool / thinking from the guardian's own rollout. The guardian
|
|
43
|
+
// fires NO hooks itself (SubAgentSource::Other is excluded from Subagent
|
|
44
|
+
// hooks), so these entries can't see it.
|
|
45
|
+
// - UNTESTED: PreCompact / SubagentStart|Stop (no compaction / subagent
|
|
46
|
+
// occurred in the probe).
|
|
38
47
|
//
|
|
39
48
|
// OPEN QUESTION (injection): unlike `claude --settings <file>`, Codex has no
|
|
40
49
|
// per-invocation settings-file flag. Candidate non-mutating paths, best first:
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import { test } from "../../../test/harness.mjs";
|
|
3
|
+
import {
|
|
4
|
+
classifyCodexSessionMeta,
|
|
5
|
+
parseGuardianTranscriptLine,
|
|
6
|
+
parseGuardianTranscriptLines
|
|
7
|
+
} from "../src/codex-guardian.js";
|
|
8
|
+
|
|
9
|
+
function metaLine(payload) {
|
|
10
|
+
return JSON.stringify({ timestamp: "2026-06-12T01:36:41.556Z", type: "session_meta", payload });
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
test("classifyCodexSessionMeta identifies a main session", () => {
|
|
14
|
+
const line = metaLine({
|
|
15
|
+
id: "main-1",
|
|
16
|
+
parent_thread_id: null,
|
|
17
|
+
originator: "codex-tui",
|
|
18
|
+
source: "cli",
|
|
19
|
+
thread_source: "user"
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
assert.deepEqual(classifyCodexSessionMeta(line), {
|
|
23
|
+
kind: "main",
|
|
24
|
+
threadId: "main-1",
|
|
25
|
+
parentThreadId: undefined
|
|
26
|
+
});
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
test("classifyCodexSessionMeta identifies a guardian review session", () => {
|
|
30
|
+
const line = metaLine({
|
|
31
|
+
id: "guardian-1",
|
|
32
|
+
parent_thread_id: "main-1",
|
|
33
|
+
source: { subagent: { other: "guardian" } },
|
|
34
|
+
thread_source: "subagent"
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
assert.deepEqual(classifyCodexSessionMeta(line), {
|
|
38
|
+
kind: "guardian",
|
|
39
|
+
threadId: "guardian-1",
|
|
40
|
+
parentThreadId: "main-1"
|
|
41
|
+
});
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
test("classifyCodexSessionMeta identifies non-guardian subagent sessions", () => {
|
|
45
|
+
const bySource = metaLine({
|
|
46
|
+
id: "agent-1",
|
|
47
|
+
parent_thread_id: "main-1",
|
|
48
|
+
source: { subagent: { other: "collab" } }
|
|
49
|
+
});
|
|
50
|
+
const byThreadSource = metaLine({
|
|
51
|
+
id: "agent-2",
|
|
52
|
+
parent_thread_id: "main-1",
|
|
53
|
+
source: "cli",
|
|
54
|
+
thread_source: "subagent"
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
assert.equal(classifyCodexSessionMeta(bySource).kind, "subagent");
|
|
58
|
+
assert.equal(classifyCodexSessionMeta(byThreadSource).kind, "subagent");
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
test("classifyCodexSessionMeta rejects non-meta and malformed lines", () => {
|
|
62
|
+
assert.equal(classifyCodexSessionMeta("not json"), undefined);
|
|
63
|
+
assert.equal(classifyCodexSessionMeta("{}"), undefined);
|
|
64
|
+
assert.equal(
|
|
65
|
+
classifyCodexSessionMeta(JSON.stringify({ type: "response_item", payload: { type: "message" } })),
|
|
66
|
+
undefined
|
|
67
|
+
);
|
|
68
|
+
assert.equal(classifyCodexSessionMeta(metaLine(null)), undefined);
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
test("parseGuardianTranscriptLine maps task_started to review_started", () => {
|
|
72
|
+
const line = JSON.stringify({
|
|
73
|
+
timestamp: "2026-06-12T01:36:41.557Z",
|
|
74
|
+
type: "event_msg",
|
|
75
|
+
payload: { type: "task_started", turn_id: "turn-1" }
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
assert.deepEqual(parseGuardianTranscriptLine(line), { type: "review_started" });
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
test("parseGuardianTranscriptLine extracts the verdict from task_complete", () => {
|
|
82
|
+
const allow = JSON.stringify({
|
|
83
|
+
type: "event_msg",
|
|
84
|
+
payload: { type: "task_complete", turn_id: "turn-1", last_agent_message: '{"outcome":"allow"}' }
|
|
85
|
+
});
|
|
86
|
+
const deny = JSON.stringify({
|
|
87
|
+
type: "event_msg",
|
|
88
|
+
payload: {
|
|
89
|
+
type: "task_complete",
|
|
90
|
+
turn_id: "turn-2",
|
|
91
|
+
last_agent_message:
|
|
92
|
+
'{"risk_level":"high","user_authorization":"low","outcome":"deny","rationale":"too risky"}'
|
|
93
|
+
}
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
assert.deepEqual(parseGuardianTranscriptLine(allow), { type: "review_finished", outcome: "allow" });
|
|
97
|
+
assert.deepEqual(parseGuardianTranscriptLine(deny), { type: "review_finished", outcome: "deny" });
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
test("parseGuardianTranscriptLine reports an unknown outcome when the verdict is unreadable", () => {
|
|
101
|
+
const garbled = JSON.stringify({
|
|
102
|
+
type: "event_msg",
|
|
103
|
+
payload: { type: "task_complete", turn_id: "turn-1", last_agent_message: "I think it is fine" }
|
|
104
|
+
});
|
|
105
|
+
const missing = JSON.stringify({
|
|
106
|
+
type: "event_msg",
|
|
107
|
+
payload: { type: "task_complete", turn_id: "turn-1" }
|
|
108
|
+
});
|
|
109
|
+
const unexpected = JSON.stringify({
|
|
110
|
+
type: "event_msg",
|
|
111
|
+
payload: { type: "task_complete", turn_id: "turn-1", last_agent_message: '{"outcome":"maybe"}' }
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
assert.deepEqual(parseGuardianTranscriptLine(garbled), { type: "review_finished", outcome: undefined });
|
|
115
|
+
assert.deepEqual(parseGuardianTranscriptLine(missing), { type: "review_finished", outcome: undefined });
|
|
116
|
+
assert.deepEqual(parseGuardianTranscriptLine(unexpected), { type: "review_finished", outcome: undefined });
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
test("parseGuardianTranscriptLine ignores unrelated and malformed records", () => {
|
|
120
|
+
assert.equal(parseGuardianTranscriptLine("not json"), undefined);
|
|
121
|
+
assert.equal(
|
|
122
|
+
parseGuardianTranscriptLine(
|
|
123
|
+
JSON.stringify({ type: "event_msg", payload: { type: "token_count" } })
|
|
124
|
+
),
|
|
125
|
+
undefined
|
|
126
|
+
);
|
|
127
|
+
assert.equal(
|
|
128
|
+
parseGuardianTranscriptLine(
|
|
129
|
+
JSON.stringify({ type: "response_item", payload: { type: "message", role: "assistant" } })
|
|
130
|
+
),
|
|
131
|
+
undefined
|
|
132
|
+
);
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
test("parseGuardianTranscriptLine skips records from before the session start", () => {
|
|
136
|
+
const old = JSON.stringify({
|
|
137
|
+
timestamp: "2026-06-12T01:00:00.000Z",
|
|
138
|
+
type: "event_msg",
|
|
139
|
+
payload: { type: "task_started", turn_id: "turn-0" }
|
|
140
|
+
});
|
|
141
|
+
const fresh = JSON.stringify({
|
|
142
|
+
timestamp: "2026-06-12T02:00:00.000Z",
|
|
143
|
+
type: "event_msg",
|
|
144
|
+
payload: { type: "task_started", turn_id: "turn-1" }
|
|
145
|
+
});
|
|
146
|
+
const untimestamped = JSON.stringify({
|
|
147
|
+
type: "event_msg",
|
|
148
|
+
payload: { type: "task_started", turn_id: "turn-2" }
|
|
149
|
+
});
|
|
150
|
+
const minTimestampMs = Date.parse("2026-06-12T01:30:00.000Z");
|
|
151
|
+
|
|
152
|
+
assert.equal(parseGuardianTranscriptLine(old, { minTimestampMs }), undefined);
|
|
153
|
+
assert.deepEqual(parseGuardianTranscriptLine(fresh, { minTimestampMs }), { type: "review_started" });
|
|
154
|
+
assert.deepEqual(parseGuardianTranscriptLine(untimestamped, { minTimestampMs }), {
|
|
155
|
+
type: "review_started"
|
|
156
|
+
});
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
test("parseGuardianTranscriptLines collects events and skips blank lines", () => {
|
|
160
|
+
const lines = [
|
|
161
|
+
"",
|
|
162
|
+
JSON.stringify({ type: "event_msg", payload: { type: "task_started", turn_id: "t1" } }),
|
|
163
|
+
" ",
|
|
164
|
+
JSON.stringify({
|
|
165
|
+
type: "event_msg",
|
|
166
|
+
payload: { type: "task_complete", turn_id: "t1", last_agent_message: '{"outcome":"allow"}' }
|
|
167
|
+
})
|
|
168
|
+
];
|
|
169
|
+
|
|
170
|
+
assert.deepEqual(parseGuardianTranscriptLines(lines), [
|
|
171
|
+
{ type: "review_started" },
|
|
172
|
+
{ type: "review_finished", outcome: "allow" }
|
|
173
|
+
]);
|
|
174
|
+
});
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
// Tails the Codex guardian-review trunk rollout and reports review lifecycle
|
|
2
|
+
// events. With `approvals_reviewer = auto_review` ("Approve for me"), Codex
|
|
3
|
+
// never shows the human approval UI for guardian-routed requests — the
|
|
4
|
+
// PermissionRequest hook fires at request creation, then a guardian subagent
|
|
5
|
+
// decides. No hook fires when the review starts or finishes and the
|
|
6
|
+
// GuardianAssessment events are not persisted to the main rollout, so the
|
|
7
|
+
// guardian's own rollout (one trunk per parent thread, one turn per review) is
|
|
8
|
+
// the only observable, event-backed signal. This watcher exists so the pet can
|
|
9
|
+
// show "reviewing" during the auto-review instead of a false "waiting for
|
|
10
|
+
// approval", without ever clearing a real pending approval on a guess.
|
|
11
|
+
import { join } from "node:path";
|
|
12
|
+
import {
|
|
13
|
+
classifyCodexSessionMeta,
|
|
14
|
+
parseGuardianTranscriptLines
|
|
15
|
+
} from "../../adapters/src/codex-guardian.js";
|
|
16
|
+
import { listJsonlFiles, readFirstLine, readRange, safeMtime, safeSize } from "./codex-rollout-fs.js";
|
|
17
|
+
|
|
18
|
+
const DEFAULT_POLL_MS = 700;
|
|
19
|
+
const MTIME_SKEW_MS = 2000;
|
|
20
|
+
|
|
21
|
+
export function watchCodexGuardianReviews(options = {}) {
|
|
22
|
+
const {
|
|
23
|
+
homeDir = process.env.USERPROFILE || process.env.HOME,
|
|
24
|
+
startedAt = 0,
|
|
25
|
+
onReviewEvent = () => {},
|
|
26
|
+
pollIntervalMs = DEFAULT_POLL_MS,
|
|
27
|
+
sessionsRoot,
|
|
28
|
+
setInterval: setIntervalFn = setInterval,
|
|
29
|
+
clearInterval: clearIntervalFn = clearInterval
|
|
30
|
+
} = options;
|
|
31
|
+
|
|
32
|
+
const root = sessionsRoot ?? (homeDir ? join(homeDir, ".codex", "sessions") : undefined);
|
|
33
|
+
const minMtime = startedAt > 0 ? startedAt - MTIME_SKEW_MS : 0;
|
|
34
|
+
|
|
35
|
+
// session_meta classifications are immutable once written, so cache them by
|
|
36
|
+
// path. A file with no complete first line yet is NOT cached — it is retried
|
|
37
|
+
// on the next poll (the rollout may still be flushing).
|
|
38
|
+
const metaByPath = new Map();
|
|
39
|
+
let mainThreadId;
|
|
40
|
+
let trunkPath;
|
|
41
|
+
let offset = 0;
|
|
42
|
+
let carry = "";
|
|
43
|
+
|
|
44
|
+
const classify = (file) => {
|
|
45
|
+
if (metaByPath.has(file)) {
|
|
46
|
+
return metaByPath.get(file);
|
|
47
|
+
}
|
|
48
|
+
const firstLine = readFirstLine(file);
|
|
49
|
+
if (firstLine === undefined) {
|
|
50
|
+
return undefined;
|
|
51
|
+
}
|
|
52
|
+
const meta = classifyCodexSessionMeta(firstLine) ?? null;
|
|
53
|
+
metaByPath.set(file, meta);
|
|
54
|
+
return meta;
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
const discoverTrunk = () => {
|
|
58
|
+
let newestMain;
|
|
59
|
+
let newestTrunk;
|
|
60
|
+
|
|
61
|
+
for (const file of listJsonlFiles(root)) {
|
|
62
|
+
const mtime = safeMtime(file);
|
|
63
|
+
if (mtime < minMtime) {
|
|
64
|
+
continue;
|
|
65
|
+
}
|
|
66
|
+
const meta = classify(file);
|
|
67
|
+
if (!meta) {
|
|
68
|
+
continue;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if (meta.kind === "main" && meta.threadId && (!newestMain || mtime > newestMain.mtime)) {
|
|
72
|
+
newestMain = { threadId: meta.threadId, mtime };
|
|
73
|
+
}
|
|
74
|
+
if (meta.kind === "guardian" && (!newestTrunk || mtime > newestTrunk.mtime)) {
|
|
75
|
+
newestTrunk = { file, parentThreadId: meta.parentThreadId, mtime };
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// The guardian trunk only appears at the first review, usually long after
|
|
80
|
+
// the main rollout — bind the main thread first, then match the trunk to
|
|
81
|
+
// it so another session's (or a collab subagent's) reviews are ignored.
|
|
82
|
+
mainThreadId = mainThreadId ?? newestMain?.threadId;
|
|
83
|
+
if (mainThreadId && newestTrunk?.parentThreadId === mainThreadId) {
|
|
84
|
+
// Replay the trunk from the start: the first review is usually still in
|
|
85
|
+
// progress when we find the file, and the per-record timestamp filter
|
|
86
|
+
// keeps an earlier session's reviews from replaying as live events.
|
|
87
|
+
trunkPath = newestTrunk.file;
|
|
88
|
+
offset = 0;
|
|
89
|
+
carry = "";
|
|
90
|
+
}
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
const tick = () => {
|
|
94
|
+
try {
|
|
95
|
+
if (!trunkPath) {
|
|
96
|
+
discoverTrunk();
|
|
97
|
+
if (!trunkPath) {
|
|
98
|
+
return;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const size = safeSize(trunkPath);
|
|
103
|
+
if (size <= offset) {
|
|
104
|
+
if (size < offset) {
|
|
105
|
+
offset = size;
|
|
106
|
+
carry = "";
|
|
107
|
+
}
|
|
108
|
+
return;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const chunk = readRange(trunkPath, offset, size);
|
|
112
|
+
offset = size;
|
|
113
|
+
|
|
114
|
+
const lines = (carry + chunk).split("\n");
|
|
115
|
+
carry = lines.pop() ?? "";
|
|
116
|
+
|
|
117
|
+
for (const event of parseGuardianTranscriptLines(lines, { minTimestampMs: startedAt })) {
|
|
118
|
+
onReviewEvent(event);
|
|
119
|
+
}
|
|
120
|
+
} catch {
|
|
121
|
+
// best-effort: rollout surprises must never crash the wrapper
|
|
122
|
+
}
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
const timer = setIntervalFn(tick, pollIntervalMs);
|
|
126
|
+
if (timer && typeof timer.unref === "function") {
|
|
127
|
+
timer.unref();
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
return {
|
|
131
|
+
stop() {
|
|
132
|
+
clearIntervalFn(timer);
|
|
133
|
+
},
|
|
134
|
+
_tick: tick
|
|
135
|
+
};
|
|
136
|
+
}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
// Shared best-effort filesystem helpers for tailing Codex rollout JSONL files
|
|
2
|
+
// (~/.codex/sessions/<y>/<m>/<d>/rollout-*.jsonl). Used by the main transcript
|
|
3
|
+
// watcher and the guardian-review watcher. Every helper swallows fs errors —
|
|
4
|
+
// rollout surprises must never crash the wrapper.
|
|
5
|
+
import { closeSync, existsSync, openSync, readdirSync, readSync, statSync } from "node:fs";
|
|
6
|
+
import { join } from "node:path";
|
|
7
|
+
|
|
8
|
+
// A session_meta first line is normally a few KB, but guardian trunks embed the
|
|
9
|
+
// reviewer's full base instructions (~10 KB observed); leave generous headroom.
|
|
10
|
+
const FIRST_LINE_MAX_BYTES = 262_144;
|
|
11
|
+
|
|
12
|
+
export function listJsonlFiles(root) {
|
|
13
|
+
const files = [];
|
|
14
|
+
if (!root || !existsSync(root)) {
|
|
15
|
+
return files;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
const stack = [root];
|
|
19
|
+
while (stack.length > 0) {
|
|
20
|
+
const dir = stack.pop();
|
|
21
|
+
let entries;
|
|
22
|
+
try {
|
|
23
|
+
entries = readdirSync(dir, { withFileTypes: true });
|
|
24
|
+
} catch {
|
|
25
|
+
continue;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
for (const entry of entries) {
|
|
29
|
+
const full = join(dir, entry.name);
|
|
30
|
+
if (entry.isDirectory()) {
|
|
31
|
+
stack.push(full);
|
|
32
|
+
} else if (entry.isFile() && entry.name.endsWith(".jsonl")) {
|
|
33
|
+
files.push(full);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
return files;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export function safeSize(path) {
|
|
42
|
+
try {
|
|
43
|
+
return statSync(path).size;
|
|
44
|
+
} catch {
|
|
45
|
+
return 0;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export function safeMtime(path) {
|
|
50
|
+
try {
|
|
51
|
+
return statSync(path).mtimeMs;
|
|
52
|
+
} catch {
|
|
53
|
+
return 0;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export function readRange(path, start, end) {
|
|
58
|
+
const length = end - start;
|
|
59
|
+
if (length <= 0) {
|
|
60
|
+
return "";
|
|
61
|
+
}
|
|
62
|
+
const fd = openSync(path, "r");
|
|
63
|
+
try {
|
|
64
|
+
const buffer = Buffer.alloc(length);
|
|
65
|
+
const bytesRead = readSync(fd, buffer, 0, length, start);
|
|
66
|
+
return buffer.toString("utf8", 0, bytesRead);
|
|
67
|
+
} finally {
|
|
68
|
+
closeSync(fd);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// First newline-terminated line of a file, or undefined while none exists yet
|
|
73
|
+
// (a rollout that was just created and not flushed). Callers must treat
|
|
74
|
+
// undefined as "retry later", never as a final classification.
|
|
75
|
+
export function readFirstLine(path, maxBytes = FIRST_LINE_MAX_BYTES) {
|
|
76
|
+
let chunk;
|
|
77
|
+
try {
|
|
78
|
+
chunk = readRange(path, 0, Math.min(safeSize(path), maxBytes));
|
|
79
|
+
} catch {
|
|
80
|
+
return undefined;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const newlineIndex = chunk.indexOf("\n");
|
|
84
|
+
if (newlineIndex === -1) {
|
|
85
|
+
return undefined;
|
|
86
|
+
}
|
|
87
|
+
return chunk.slice(0, newlineIndex);
|
|
88
|
+
}
|
|
@@ -1,16 +1,10 @@
|
|
|
1
1
|
// Tails Codex session JSONL and reports tool start/finish activity. Codex hooks
|
|
2
2
|
// cover turn lifecycle, but the transcript is the reliable source for tool use
|
|
3
3
|
// when PreToolUse is unavailable.
|
|
4
|
-
import {
|
|
5
|
-
closeSync,
|
|
6
|
-
existsSync,
|
|
7
|
-
openSync,
|
|
8
|
-
readdirSync,
|
|
9
|
-
readSync,
|
|
10
|
-
statSync
|
|
11
|
-
} from "node:fs";
|
|
4
|
+
import { existsSync } from "node:fs";
|
|
12
5
|
import { join } from "node:path";
|
|
13
6
|
import { parseCodexTranscriptLines } from "../../adapters/src/codex-transcript.js";
|
|
7
|
+
import { listJsonlFiles, readRange, safeMtime, safeSize } from "./codex-rollout-fs.js";
|
|
14
8
|
|
|
15
9
|
const DEFAULT_POLL_MS = 700;
|
|
16
10
|
const MTIME_SKEW_MS = 2000;
|
|
@@ -101,60 +95,3 @@ export function discoverCodexTranscript(root, minMtime = 0) {
|
|
|
101
95
|
}
|
|
102
96
|
return newest?.file;
|
|
103
97
|
}
|
|
104
|
-
|
|
105
|
-
function listJsonlFiles(root) {
|
|
106
|
-
const files = [];
|
|
107
|
-
const stack = [root];
|
|
108
|
-
|
|
109
|
-
while (stack.length > 0) {
|
|
110
|
-
const dir = stack.pop();
|
|
111
|
-
let entries;
|
|
112
|
-
try {
|
|
113
|
-
entries = readdirSync(dir, { withFileTypes: true });
|
|
114
|
-
} catch {
|
|
115
|
-
continue;
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
for (const entry of entries) {
|
|
119
|
-
const full = join(dir, entry.name);
|
|
120
|
-
if (entry.isDirectory()) {
|
|
121
|
-
stack.push(full);
|
|
122
|
-
} else if (entry.isFile() && entry.name.endsWith(".jsonl")) {
|
|
123
|
-
files.push(full);
|
|
124
|
-
}
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
return files;
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
function safeSize(path) {
|
|
132
|
-
try {
|
|
133
|
-
return statSync(path).size;
|
|
134
|
-
} catch {
|
|
135
|
-
return 0;
|
|
136
|
-
}
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
function safeMtime(path) {
|
|
140
|
-
try {
|
|
141
|
-
return statSync(path).mtimeMs;
|
|
142
|
-
} catch {
|
|
143
|
-
return 0;
|
|
144
|
-
}
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
function readRange(path, start, end) {
|
|
148
|
-
const length = end - start;
|
|
149
|
-
if (length <= 0) {
|
|
150
|
-
return "";
|
|
151
|
-
}
|
|
152
|
-
const fd = openSync(path, "r");
|
|
153
|
-
try {
|
|
154
|
-
const buffer = Buffer.alloc(length);
|
|
155
|
-
const bytesRead = readSync(fd, buffer, 0, length, start);
|
|
156
|
-
return buffer.toString("utf8", 0, bytesRead);
|
|
157
|
-
} finally {
|
|
158
|
-
closeSync(fd);
|
|
159
|
-
}
|
|
160
|
-
}
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import { appendFileSync, mkdirSync, mkdtempSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import { test } from "../../../test/harness.mjs";
|
|
6
|
+
import { watchCodexGuardianReviews } from "../src/codex-guardian-watcher.js";
|
|
7
|
+
|
|
8
|
+
const noopTimers = { setInterval: () => ({}), clearInterval: () => {} };
|
|
9
|
+
|
|
10
|
+
function metaLine(payload) {
|
|
11
|
+
return `${JSON.stringify({ type: "session_meta", payload })}\n`;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function reviewStarted(turnId = "turn-1", timestamp) {
|
|
15
|
+
return `${JSON.stringify({
|
|
16
|
+
...(timestamp ? { timestamp } : {}),
|
|
17
|
+
type: "event_msg",
|
|
18
|
+
payload: { type: "task_started", turn_id: turnId }
|
|
19
|
+
})}\n`;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function reviewFinished(outcome, turnId = "turn-1") {
|
|
23
|
+
return `${JSON.stringify({
|
|
24
|
+
type: "event_msg",
|
|
25
|
+
payload: {
|
|
26
|
+
type: "task_complete",
|
|
27
|
+
turn_id: turnId,
|
|
28
|
+
last_agent_message: JSON.stringify({ outcome })
|
|
29
|
+
}
|
|
30
|
+
})}\n`;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function makeSessionsRoot() {
|
|
34
|
+
const root = mkdtempSync(join(tmpdir(), "codex-guardian-"));
|
|
35
|
+
const dir = join(root, "2026", "06", "12");
|
|
36
|
+
mkdirSync(dir, { recursive: true });
|
|
37
|
+
return { root, dir };
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
test("watchCodexGuardianReviews tails the guardian trunk of the main session", () => {
|
|
41
|
+
const { root, dir } = makeSessionsRoot();
|
|
42
|
+
writeFileSync(
|
|
43
|
+
join(dir, "rollout-main.jsonl"),
|
|
44
|
+
metaLine({ id: "main-1", parent_thread_id: null, source: "cli", thread_source: "user" })
|
|
45
|
+
);
|
|
46
|
+
// A non-guardian subagent of the same parent must not be tailed.
|
|
47
|
+
writeFileSync(
|
|
48
|
+
join(dir, "rollout-collab.jsonl"),
|
|
49
|
+
metaLine({ id: "agent-1", parent_thread_id: "main-1", source: { subagent: { other: "collab" } } }) +
|
|
50
|
+
reviewStarted("decoy")
|
|
51
|
+
);
|
|
52
|
+
const trunkPath = join(dir, "rollout-guardian.jsonl");
|
|
53
|
+
writeFileSync(
|
|
54
|
+
trunkPath,
|
|
55
|
+
metaLine({ id: "guardian-1", parent_thread_id: "main-1", source: { subagent: { other: "guardian" } } })
|
|
56
|
+
);
|
|
57
|
+
|
|
58
|
+
const events = [];
|
|
59
|
+
const watcher = watchCodexGuardianReviews({
|
|
60
|
+
sessionsRoot: root,
|
|
61
|
+
onReviewEvent: (event) => events.push(event),
|
|
62
|
+
...noopTimers
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
watcher._tick();
|
|
66
|
+
assert.deepEqual(events, [], "no review turns yet");
|
|
67
|
+
|
|
68
|
+
appendFileSync(trunkPath, reviewStarted());
|
|
69
|
+
watcher._tick();
|
|
70
|
+
appendFileSync(trunkPath, reviewFinished("allow"));
|
|
71
|
+
watcher._tick();
|
|
72
|
+
|
|
73
|
+
assert.deepEqual(events, [
|
|
74
|
+
{ type: "review_started" },
|
|
75
|
+
{ type: "review_finished", outcome: "allow" }
|
|
76
|
+
]);
|
|
77
|
+
|
|
78
|
+
watcher.stop();
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
test("watchCodexGuardianReviews replays a trunk discovered after the review began", () => {
|
|
82
|
+
const { root, dir } = makeSessionsRoot();
|
|
83
|
+
writeFileSync(
|
|
84
|
+
join(dir, "rollout-main.jsonl"),
|
|
85
|
+
metaLine({ id: "main-1", parent_thread_id: null, source: "cli", thread_source: "user" })
|
|
86
|
+
);
|
|
87
|
+
writeFileSync(
|
|
88
|
+
join(dir, "rollout-guardian.jsonl"),
|
|
89
|
+
metaLine({ id: "guardian-1", parent_thread_id: "main-1", source: { subagent: { other: "guardian" } } }) +
|
|
90
|
+
reviewStarted("turn-1") +
|
|
91
|
+
reviewFinished("deny", "turn-1")
|
|
92
|
+
);
|
|
93
|
+
|
|
94
|
+
const events = [];
|
|
95
|
+
const watcher = watchCodexGuardianReviews({
|
|
96
|
+
sessionsRoot: root,
|
|
97
|
+
onReviewEvent: (event) => events.push(event),
|
|
98
|
+
...noopTimers
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
watcher._tick();
|
|
102
|
+
|
|
103
|
+
assert.deepEqual(events, [
|
|
104
|
+
{ type: "review_started" },
|
|
105
|
+
{ type: "review_finished", outcome: "deny" }
|
|
106
|
+
]);
|
|
107
|
+
|
|
108
|
+
watcher.stop();
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
test("watchCodexGuardianReviews skips review records from before the session start", () => {
|
|
112
|
+
const { root, dir } = makeSessionsRoot();
|
|
113
|
+
writeFileSync(
|
|
114
|
+
join(dir, "rollout-main.jsonl"),
|
|
115
|
+
metaLine({ id: "main-1", parent_thread_id: null, source: "cli", thread_source: "user" })
|
|
116
|
+
);
|
|
117
|
+
writeFileSync(
|
|
118
|
+
join(dir, "rollout-guardian.jsonl"),
|
|
119
|
+
metaLine({ id: "guardian-1", parent_thread_id: "main-1", source: { subagent: { other: "guardian" } } }) +
|
|
120
|
+
reviewStarted("turn-old", "2026-06-12T00:00:00.000Z") +
|
|
121
|
+
reviewStarted("turn-new", "2026-06-12T02:00:00.000Z")
|
|
122
|
+
);
|
|
123
|
+
|
|
124
|
+
const events = [];
|
|
125
|
+
const watcher = watchCodexGuardianReviews({
|
|
126
|
+
sessionsRoot: root,
|
|
127
|
+
startedAt: Date.parse("2026-06-12T01:00:00.000Z"),
|
|
128
|
+
onReviewEvent: (event) => events.push(event),
|
|
129
|
+
...noopTimers
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
watcher._tick();
|
|
133
|
+
|
|
134
|
+
assert.deepEqual(events, [{ type: "review_started" }]);
|
|
135
|
+
|
|
136
|
+
watcher.stop();
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
test("watchCodexGuardianReviews emits nothing without a classifiable main session", () => {
|
|
140
|
+
const { root, dir } = makeSessionsRoot();
|
|
141
|
+
// Guardian trunk exists but there is no main rollout to bind its parent to.
|
|
142
|
+
writeFileSync(
|
|
143
|
+
join(dir, "rollout-guardian.jsonl"),
|
|
144
|
+
metaLine({ id: "guardian-1", parent_thread_id: "main-1", source: { subagent: { other: "guardian" } } }) +
|
|
145
|
+
reviewStarted()
|
|
146
|
+
);
|
|
147
|
+
|
|
148
|
+
const events = [];
|
|
149
|
+
const watcher = watchCodexGuardianReviews({
|
|
150
|
+
sessionsRoot: root,
|
|
151
|
+
onReviewEvent: (event) => events.push(event),
|
|
152
|
+
...noopTimers
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
watcher._tick();
|
|
156
|
+
watcher._tick();
|
|
157
|
+
|
|
158
|
+
assert.deepEqual(events, []);
|
|
159
|
+
|
|
160
|
+
watcher.stop();
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
test("watchCodexGuardianReviews ignores guardian trunks of other parents", () => {
|
|
164
|
+
const { root, dir } = makeSessionsRoot();
|
|
165
|
+
writeFileSync(
|
|
166
|
+
join(dir, "rollout-main.jsonl"),
|
|
167
|
+
metaLine({ id: "main-1", parent_thread_id: null, source: "cli", thread_source: "user" })
|
|
168
|
+
);
|
|
169
|
+
writeFileSync(
|
|
170
|
+
join(dir, "rollout-guardian-other.jsonl"),
|
|
171
|
+
metaLine({ id: "guardian-9", parent_thread_id: "someone-else", source: { subagent: { other: "guardian" } } }) +
|
|
172
|
+
reviewStarted()
|
|
173
|
+
);
|
|
174
|
+
|
|
175
|
+
const events = [];
|
|
176
|
+
const watcher = watchCodexGuardianReviews({
|
|
177
|
+
sessionsRoot: root,
|
|
178
|
+
onReviewEvent: (event) => events.push(event),
|
|
179
|
+
...noopTimers
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
watcher._tick();
|
|
183
|
+
|
|
184
|
+
assert.deepEqual(events, []);
|
|
185
|
+
|
|
186
|
+
watcher.stop();
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
test("watchCodexGuardianReviews picks up a trunk created after watching began", () => {
|
|
190
|
+
const { root, dir } = makeSessionsRoot();
|
|
191
|
+
writeFileSync(
|
|
192
|
+
join(dir, "rollout-main.jsonl"),
|
|
193
|
+
metaLine({ id: "main-1", parent_thread_id: null, source: "cli", thread_source: "user" })
|
|
194
|
+
);
|
|
195
|
+
|
|
196
|
+
const events = [];
|
|
197
|
+
const watcher = watchCodexGuardianReviews({
|
|
198
|
+
sessionsRoot: root,
|
|
199
|
+
onReviewEvent: (event) => events.push(event),
|
|
200
|
+
...noopTimers
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
watcher._tick();
|
|
204
|
+
assert.deepEqual(events, [], "no trunk yet");
|
|
205
|
+
|
|
206
|
+
const trunkPath = join(dir, "rollout-guardian.jsonl");
|
|
207
|
+
writeFileSync(
|
|
208
|
+
trunkPath,
|
|
209
|
+
metaLine({ id: "guardian-1", parent_thread_id: "main-1", source: { subagent: { other: "guardian" } } }) +
|
|
210
|
+
reviewStarted()
|
|
211
|
+
);
|
|
212
|
+
watcher._tick();
|
|
213
|
+
|
|
214
|
+
assert.deepEqual(events, [{ type: "review_started" }]);
|
|
215
|
+
|
|
216
|
+
watcher.stop();
|
|
217
|
+
});
|