@hayasaka7/haya-pet 0.2.5 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/CHANGELOG.md +51 -0
  2. package/README.md +27 -5
  3. package/apps/cli/src/haya-pet.js +136 -4
  4. package/apps/cli/test/haya-pet.test.mjs +109 -0
  5. package/apps/companion/src/main/bubble-list-viewport.js +26 -0
  6. package/apps/companion/src/main/index.js +52 -2
  7. package/apps/companion/src/main/tray-menu.js +5 -0
  8. package/apps/companion/src/renderer/pet-window.js +5 -2
  9. package/apps/companion/src/renderer/session-bubbles.js +5 -1
  10. package/apps/companion/src/renderer/styles.css +19 -0
  11. package/apps/companion/test/bubble-list-viewport.test.mjs +50 -0
  12. package/apps/companion/test/tray-menu.test.mjs +10 -0
  13. package/docs/architecture.md +8 -2
  14. package/docs/known-issues.md +90 -5
  15. package/docs/troubleshooting.md +4 -1
  16. package/package.json +23 -1
  17. package/packages/adapters/src/codex-guardian.js +131 -0
  18. package/packages/adapters/src/codex-hooks.js +11 -2
  19. package/packages/adapters/test/codex-guardian.test.mjs +174 -0
  20. package/packages/app-state/src/state.js +4 -1
  21. package/packages/app-state/src/update-check.js +173 -0
  22. package/packages/app-state/test/update-check.test.mjs +227 -0
  23. package/packages/cli-core/src/codex-guardian-watcher.js +136 -0
  24. package/packages/cli-core/src/codex-rollout-fs.js +88 -0
  25. package/packages/cli-core/src/codex-transcript-watcher.js +2 -65
  26. package/packages/cli-core/src/deadline.js +23 -0
  27. package/packages/cli-core/src/run-state.js +31 -11
  28. package/packages/cli-core/test/codex-guardian-watcher.test.mjs +217 -0
  29. package/packages/cli-core/test/deadline.test.mjs +29 -0
  30. package/packages/cli-core/test/run-state.test.mjs +41 -0
@@ -0,0 +1,50 @@
1
+ import assert from "node:assert/strict";
2
+ import { test } from "../../../test/harness.mjs";
3
+ import { resolveBubbleListMaxHeight } from "../src/main/bubble-list-viewport.js";
4
+
5
+ // Layout bottoms for bubbles ~48px tall with a 6px gap.
6
+ const FOUR_BUBBLES = [48, 102, 156, 210];
7
+
8
+ test("three or fewer bubbles use the height budget alone", () => {
9
+ assert.equal(resolveBubbleListMaxHeight({ room: 400, bubbleBottoms: [48, 102, 156] }), 400);
10
+ assert.equal(resolveBubbleListMaxHeight({ room: 400, bubbleBottoms: [48] }), 400);
11
+ assert.equal(resolveBubbleListMaxHeight({ room: 400, bubbleBottoms: [] }), 400);
12
+ });
13
+
14
+ test("more than three bubbles cap the viewport at the third bubble's bottom", () => {
15
+ assert.equal(resolveBubbleListMaxHeight({ room: 400, bubbleBottoms: FOUR_BUBBLES }), 156);
16
+ });
17
+
18
+ test("the height budget still wins when it is tighter than the count cap", () => {
19
+ assert.equal(resolveBubbleListMaxHeight({ room: 120, bubbleBottoms: FOUR_BUBBLES }), 120);
20
+ });
21
+
22
+ test("the minimum height floor applies to both budgets", () => {
23
+ assert.equal(resolveBubbleListMaxHeight({ room: 40, bubbleBottoms: FOUR_BUBBLES }), 96);
24
+ assert.equal(resolveBubbleListMaxHeight({ room: 40, bubbleBottoms: [48] }), 96);
25
+ });
26
+
27
+ test("the room is rounded to whole pixels", () => {
28
+ assert.equal(resolveBubbleListMaxHeight({ room: 150.6, bubbleBottoms: [48] }), 151);
29
+ });
30
+
31
+ test("maxVisible and minHeight are configurable", () => {
32
+ assert.equal(
33
+ resolveBubbleListMaxHeight({ room: 400, bubbleBottoms: FOUR_BUBBLES, maxVisible: 2 }),
34
+ 102
35
+ );
36
+ assert.equal(
37
+ resolveBubbleListMaxHeight({ room: 40, bubbleBottoms: [48], minHeight: 32 }),
38
+ 40
39
+ );
40
+ });
41
+
42
+ test("garbage measurements fall back safely", () => {
43
+ // Unusable room → the floor keeps the list usable.
44
+ assert.equal(resolveBubbleListMaxHeight({ room: Number.NaN, bubbleBottoms: [48] }), 96);
45
+ // Unusable bottom at the cap index → no count cap, height budget alone.
46
+ assert.equal(
47
+ resolveBubbleListMaxHeight({ room: 400, bubbleBottoms: [48, 102, Number.NaN, 210] }),
48
+ 400
49
+ );
50
+ });
@@ -47,6 +47,16 @@ test("reflects the attach-bubbles checkbox state", () => {
47
47
  assert.equal(buildTrayMenu({ ...baseState, attachBubblesToTerminals: false }).find((i) => i.id === "attach_bubbles").checked, false);
48
48
  });
49
49
 
50
+ test("shows the update item only when a newer version is known", () => {
51
+ const withoutUpdate = buildTrayMenu(baseState);
52
+ assert.ok(!withoutUpdate.some((i) => i.id === "update"), "no update item by default");
53
+
54
+ const withUpdate = buildTrayMenu({ ...baseState, updateAvailable: { latestVersion: "9.9.9" } });
55
+ const item = withUpdate.find((i) => i.id === "update");
56
+ assert.ok(item, "update item appears when an update is known");
57
+ assert.ok(item.label.includes("9.9.9"), "label names the new version");
58
+ });
59
+
50
60
  test("uses the HAYA Pet brand in the tray hover text", () => {
51
61
  assert.equal(buildTrayTooltip(), "HAYA Pet");
52
62
  });
@@ -70,8 +70,14 @@ profile, so if the user already passes `-p/--profile`, injection is skipped with
70
70
  notice. Codex's hook command must be unquoted at the program position (it runs via
71
71
  `cmd /c`, which strips a leading quote) and its matchers can't use look-around
72
72
  (Rust regex) — see [known-issues.md](known-issues.md). Codex's L4 is **partial**:
73
- `PreToolUse`/`PermissionRequest` don't fire upstream yet, so only `thinking`/`idle`
74
- arrive today.
73
+ `PreToolUse` doesn't fire upstream yet, so tool activity comes from an L3
74
+ transcript watcher tailing the session rollout. `PermissionRequest` fires, but
75
+ once at approval-request creation — before Codex routes the request to either
76
+ the user or its guardian auto-reviewer ("Approve for me"), which never prompts
77
+ the user at all. An L3 **guardian-trunk watcher** tails the guardian's own
78
+ rollout (`source: {subagent:{other:"guardian"}}`, parented to the main thread)
79
+ and refines the state: review running → `reviewing`, verdict allow →
80
+ `running_tool`, verdict deny → `thinking`.
75
81
 
76
82
  Hooks alone can't see one moment: clients emit **no event when the user accepts a
77
83
  permission prompt** (denial and completion are observable; the accept click is
@@ -2,6 +2,85 @@
2
2
 
3
3
  Issues found in live use, with their current status.
4
4
 
5
+ ## ✅ Resolved: false "waiting for approval" while Codex auto-reviews an approval (Approve for me)
6
+
7
+ - **Symptom:** Running Codex under the pet with the **"Approve for me"** preset
8
+ (`approvals_reviewer = auto_review`; the user's config had the legacy alias
9
+ `guardian_subagent`), the pet showed *waiting for approval* whenever an action
10
+ needed approval — even though Codex's guardian was reviewing it automatically
11
+ and the user was never asked anything. The false state lasted the whole review
12
+ (~8–30 s per request, up to Codex's 90 s review timeout) plus the approved
13
+ command's runtime.
14
+ - **Root cause (verified against codex-rs 0.139.0 source + a live trunk
15
+ rollout):** Codex fires the `PermissionRequest` hook once, at approval-request
16
+ creation, **before** routing — and for guardian-routed requests the human
17
+ approval UI is *never* shown: a guardian `allow` lets the action proceed; a
18
+ guardian `deny` returns the rationale to the **model** as a rejected tool call
19
+ ("This action was rejected due to unacceptable risk. …"), so no human decision
20
+ is ever pending. Our Codex hook table mapped `PermissionRequest` →
21
+ `waiting_approval` unconditionally. No better hook exists: nothing fires on
22
+ guardian start/finish (the guardian session is `SubAgentSource::Other`, which
23
+ is excluded from Subagent hooks), and `GuardianAssessment` events are
24
+ explicitly not persisted to the main rollout (`rollout/src/policy.rs`).
25
+ - **Fix:** an **L3 guardian-trunk watcher** (`codex-guardian-watcher.js` +
26
+ `adapters/codex-guardian.js`). The guardian runs as its own Codex session that
27
+ writes its own rollout under `~/.codex/sessions` — session_meta has
28
+ `source: {subagent: {other: "guardian"}}` and `parent_thread_id` = the main
29
+ thread; each review is one turn (`task_started` → `task_complete` with the
30
+ verdict JSON in `last_agent_message`, e.g. `{"outcome":"allow"}`). The watcher
31
+ binds the trunk to the wrapped session's main thread id and maps real events:
32
+ review turn starts → **reviewing**; verdict `allow` → **running_tool**
33
+ ("reviewer approved" — the action verifiably proceeds); verdict `deny` →
34
+ **thinking** ("reviewer denied" — the model received the rejection and keeps
35
+ working). An unreadable verdict reports nothing, so a pending cue is never
36
+ cleared on a guess. With `approvals_reviewer = "user"` ("Ask for approval")
37
+ there is no trunk and behavior is unchanged: `PermissionRequest` →
38
+ *waiting for approval* until the user decides (process-tree/denial detection
39
+ resolve it, as before).
40
+ - **Known limitations (accepted):** (1) A ≤ ~1 s *waiting for approval* flicker
41
+ can precede *reviewing* (the hook fires immediately; the trunk poll is 700 ms).
42
+ (2) Reviews of a **collab subagent's** actions (multi-agent runs) have their
43
+ own trunks keyed to the subagent's thread and are not watched; a subagent's
44
+ `PermissionRequest` can still briefly show *waiting for approval* until the
45
+ next main-session event. (3) After a guardian deny the pet shows *thinking*,
46
+ not *waiting for approval* — by design: Codex resolves the request itself and
47
+ the model decides what to do next (it may ask the user in chat, which then
48
+ surfaces as turn-end *idle*). The TUI's passive `/approve` denial-override
49
+ picker is not a blocking prompt.
50
+
51
+ ## ✅ Resolved: Codex `/quit` hung on its goodbye (and the pet kept showing "working")
52
+
53
+ - **Symptom:** Exiting Codex with `/quit` printed the token-usage goodbye and the
54
+ `codex resume` hint, but the terminal never returned to a prompt and the pet
55
+ kept showing the session as ongoing. Ctrl+C exited fine. Only happened under
56
+ `haya-pet run`.
57
+ - **Root cause (verified against codex-rs 0.139.0 source + a live orphaned
58
+ process):** the `haya-pet state` hook reporter had three **unbounded awaits**
59
+ in its IPC path — pipe connect, write drain, and `socket.end()` → `close` —
60
+ and the CLI entry's `process.exit()` only runs after the command resolves, so
61
+ one never-settling await made a reporter hang forever. Codex awaits every
62
+ hook child with a **default 600 s timeout**
63
+ (`hooks/engine/discovery.rs` `timeout_sec.unwrap_or(600)`;
64
+ `command_runner.rs` `timeout(…, child.wait_with_output())`), and `Stop` hooks
65
+ are awaited in turn completion (`core/hook_runtime.rs run_turn_stop_hooks`)
66
+ with the TUI exiting only after `ShutdownComplete`. So one hung turn-end
67
+ `state idle` reporter produced BOTH symptoms: the idle report never arrived
68
+ (pet stuck on "working"), and `/quit` waited up to 10 minutes on the hook
69
+ child after printing its goodbye. Ctrl+C kills Codex without that wait and
70
+ orphans the reporter — exactly what live process-tree monitoring showed (a
71
+ parentless reporter under the hook node version).
72
+ - **Fix:** every IPC await now has a hard deadline (`cli-core/deadline.js`).
73
+ The reporter races its whole connect→send→close against **2 s** and exits
74
+ with `{ ok:false, reason:"timeout" }` on the deadline (one best-effort status
75
+ update lost; `HAYA_PET_HOOK_DEBUG` logs a `timeout: true` line for evidence).
76
+ The wrapper's companion connection gets the same guard (**5 s** per
77
+ send/close) so a wedged companion can never keep the wrapper — and the user's
78
+ terminal — alive after the wrapped CLI exits. Dead sessions still resolve via
79
+ the registry's stale/drop sweep, so a lost message self-heals.
80
+ - **Note:** why a pipe await occasionally never settles (companion busy/wedged
81
+ at that moment) is not yet pinned down; the deadline makes it harmless and
82
+ the debug log will show `timeout: true` entries if it recurs.
83
+
5
84
  ## ✅ Resolved: pet stuck on "waiting for approval" after a manual denial
6
85
 
7
86
  - **Symptom:** With Claude Code hooks enabled, denying a permission prompt left the
@@ -176,10 +255,16 @@ observation (`--observe`) or L1 lifecycle as the fallback. Current state:
176
255
  activity is covered by an L3 Codex transcript watcher that tails
177
256
  `~/.codex/sessions` JSONL: normal tools report `running_tool`, `apply_patch`
178
257
  reports `editing_files`, and HAYA Pet returns to `thinking` after active tool
179
- calls drain. `PermissionRequest` (the *waiting for approval* cue — the
180
- highest-value state) is **unconfirmed**; it likely depends on an
181
- approval-required flow and needs a dedicated test before the feature is worth
182
- wiring in.
258
+ calls drain.
259
+ - **`PermissionRequest` fires** (confirmed live on 0.139.0), but **once, at
260
+ approval-request creation, before routing** under "Approve for me"
261
+ (`approvals_reviewer = auto_review` / legacy `guardian_subagent`) the user is
262
+ never actually prompted, so the hook alone over-reports *waiting for
263
+ approval*. An L3 **guardian-trunk watcher** tails the guardian reviewer's own
264
+ rollout (`source: {subagent:{other:"guardian"}}`, `parent_thread_id` = main
265
+ thread) and refines the state: review running → *reviewing*, verdict `allow`
266
+ → *running_tool*, verdict `deny` → *thinking*. See the resolved
267
+ false-waiting-for-approval entry above.
183
268
  - **Antigravity (`agy`)** — **not yet implemented** (no hook injection). Uses
184
269
  `--observe` or L1 lifecycle. A Gemini-schema hook adapter is a planned follow-up.
185
270
  - **Generic / unknown** — no hooks; PTY observation (`--observe`) or L1 lifecycle.
@@ -194,7 +279,7 @@ remains a possible follow-up.
194
279
  |---|---|---|
195
280
  | L1 | process wrapper | default; session lifecycle + exit code |
196
281
  | L4 | client hooks | opt-in via `haya-pet hooks on` (Claude Code full, Codex partial); reports through `haya-pet state …` |
197
- | L3 | client logs | Codex session JSONL watcher for tool activity; Claude denial recovery; future clients can add similar transcript adapters |
282
+ | L3 | client logs | Codex session JSONL watcher for tool activity; Codex guardian-trunk watcher for auto-review status; Claude denial recovery; future clients can add similar transcript adapters |
198
283
  | L3 | process tree | approval-accept detection: a `waiting_approval` session flips to `running_tool` when the approved command verifiably starts under the client's pid |
199
284
  | L2 | PTY output scraping | opt-in via `--observe` (terminal-fidelity tradeoff) |
200
285
 
@@ -16,7 +16,8 @@ deferred problems with known root causes.
16
16
  | Terminal scroll / Shift+Tab / backspace odd while a CLI runs under `haya-pet run` | Fixed — `haya-pet run` now uses native passthrough by default (full fidelity). If you opted into `--observe`, drop it. See [known-issues.md](known-issues.md). |
17
17
  | Pet shows only **idle/lifecycle** while **Claude Code** works | Live in-session status is opt-in: run `haya-pet hooks on` once (persisted). The first `haya-pet run` afterward shows a one-time Claude *review hooks* prompt — approve it. Also make sure the companion is running (`haya-pet start`). Check the toggle with `haya-pet hooks status`. |
18
18
  | Typing doesn't work / **Claude Code** TUI frozen under `haya-pet run` | You have hooks enabled and Claude is showing its *review hooks* trust prompt (approve it once), or your Claude is too old for `--settings`. Run `haya-pet hooks off` (or set `HAYA_PET_NO_HOOKS=1`) for native passthrough with lifecycle-only status — typing and Shift+Tab work normally. |
19
- | Pet shows only **idle/lifecycle** while **Codex** works | Live status is opt-in: run `haya-pet hooks on` once (persisted, global), then `haya-pet run --client codex -- codex`; approve Codex's one-time *review hooks* prompt. `thinking`/`idle` come from hooks and `running_tool`/`editing_files` from a transcript watcher; *waiting for approval* doesn't fire yet (upstream [openai/codex#16732](https://github.com/openai/codex/issues/16732)). |
19
+ | Pet shows only **idle/lifecycle** while **Codex** works | Live status is opt-in: run `haya-pet hooks on` once (persisted, global), then `haya-pet run --client codex -- codex`; approve Codex's one-time *review hooks* prompt. `thinking`/`idle` come from hooks, `running_tool`/`editing_files` from a transcript watcher, and approval states from the `PermissionRequest` hook plus a guardian-review watcher. |
20
+ | Pet showed **waiting for approval** while **Codex** auto-reviewed the request ("Approve for me") | Fixed — with `approvals_reviewer = auto_review` (legacy `guardian_subagent`) Codex's guardian decides without asking you; the pet now shows **reviewing** during the assessment, then **working** on an allow verdict or **thinking** on a deny. *Waiting for approval* still shows when Codex actually asks you (`approvals_reviewer = "user"`). |
20
21
  | **Codex** live status didn't turn on / you pass your own `-p`/`--profile` | Codex allows only one profile, so haya-pet skips hook injection when you supply your own and prints a notice. Drop your `-p` for that run to get live status, or accept lifecycle-only. |
21
22
  | Pet shows only **idle/lifecycle** while **Antigravity** (`agy`) works | Antigravity has no hook adapter yet. Add `--observe` for coarse PTY activity, or accept lifecycle-only status. |
22
23
  | Claude hooks fail with **"hook exited with code 1"** | The hook command must not bake an **fnm**/node-manager *per-shell* node path (`…\fnm_multishells\<pid>_…\node.exe`) that dies when the shell exits. haya-pet bakes the stable `realpath`-resolved node path into the temp settings instead. Update to the latest version. |
@@ -24,7 +25,9 @@ deferred problems with known root causes.
24
25
  | Pet stayed on **waiting for approval** after I denied a tool | Fixed — Claude fires no hook on a manual denial, so the wrapper tails the session transcript and clears to **idle** when the denial is recorded. A genuinely-pending approval (you haven't decided yet) correctly keeps alerting — there's no timer. |
25
26
  | Pet stayed on **waiting for approval** after I *approved* a command | Fixed — Claude also fires no hook at the accept moment, so the companion watches the client's process tree while a session waits: when the approved command verifiably starts (a new persistent process under the client), the pet flips to **working**. Expect a ~2–3s lag after your click. File-edit approvals (no process) resolve at completion, which is near-instant. |
26
27
  | Want to see which status events fire | Set `HAYA_PET_HOOK_DEBUG=<file.jsonl>` before `haya-pet run`; each hook- and transcript-sourced status appends one JSON line (timestamp, state, and source/event). |
28
+ | Don't want the update check / notice | Set `HAYA_PET_NO_UPDATE_CHECK=1`. The check is a daily, cached HTTPS request to the npm registry (no session data); it is already skipped automatically when output is piped. |
27
29
  | Pet stays **idle** after force-quitting a CLI | The wrapper marks the session stale ~15s after the heartbeat stops, then drops it. Exiting normally (incl. Ctrl+C) reports **exited** immediately. |
30
+ | **Codex `/quit`** printed its goodbye but the terminal hung (pet stuck on "working") | Fixed — a hook reporter could hang on a pipe await and Codex waits up to 600s for hook children at shutdown. Reporters now hard-deadline at 2s. Update to the latest version. |
28
31
  | Ctrl+C doesn't exit the CLI cleanly under `haya-pet run` | Fixed — the wrapper no longer dies on Ctrl+C; the signal reaches the CLI, which exits, and the pet shows the result. |
29
32
  | `ENOENT … electron\path.txt` | Electron's install extraction was interrupted — see below. |
30
33
 
package/package.json CHANGED
@@ -1,8 +1,30 @@
1
1
  {
2
2
  "name": "@hayasaka7/haya-pet",
3
- "version": "0.2.5",
3
+ "version": "0.2.7",
4
4
  "type": "module",
5
5
  "description": "Generic AI CLI pet runtime foundation.",
6
+ "keywords": [
7
+ "haya-pet",
8
+ "desktop-pet",
9
+ "virtual-pet",
10
+ "desktop-companion",
11
+ "ai",
12
+ "ai-agents",
13
+ "coding-agents",
14
+ "ai-cli",
15
+ "codex",
16
+ "codex-cli",
17
+ "claude-code",
18
+ "gemini-cli",
19
+ "aider",
20
+ "antigravity",
21
+ "electron",
22
+ "cli",
23
+ "terminal",
24
+ "developer-tools",
25
+ "local-first",
26
+ "productivity"
27
+ ],
6
28
  "license": "MIT",
7
29
  "author": "Ai Hayasaka",
8
30
  "repository": {
@@ -0,0 +1,131 @@
1
+ // Pure parser for Codex guardian-review rollouts. When `approvals_reviewer` is
2
+ // `auto_review` (legacy alias `guardian_subagent`, the TUI's "Approve for me"),
3
+ // Codex routes approval requests to a guardian subagent instead of prompting the
4
+ // user — the PermissionRequest hook still fires at request creation, but the
5
+ // human approval UI never appears. The only persisted trace of the review is a
6
+ // separate "guardian trunk" rollout under ~/.codex/sessions whose session_meta
7
+ // carries source.subagent.other == "guardian" and parent_thread_id == the main
8
+ // thread; each review is one turn there (task_started → task_complete with the
9
+ // verdict JSON in last_agent_message). Verified against codex-cli 0.139.0
10
+ // (codex-rs guardian/review_session.rs; rollout/src/policy.rs excludes the
11
+ // GuardianAssessment events themselves from persistence).
12
+
13
+ // Classify a rollout's first JSONL line (the session_meta record) so watchers
14
+ // can tell main sessions, guardian review sessions, and other subagents apart.
15
+ export function classifyCodexSessionMeta(line) {
16
+ let entry;
17
+ try {
18
+ entry = JSON.parse(line);
19
+ } catch {
20
+ return undefined;
21
+ }
22
+
23
+ if (entry?.type !== "session_meta") {
24
+ return undefined;
25
+ }
26
+
27
+ const payload = entry.payload;
28
+ if (!payload || typeof payload !== "object") {
29
+ return undefined;
30
+ }
31
+
32
+ const threadId = typeof payload.id === "string" ? payload.id : undefined;
33
+ const parentThreadId =
34
+ typeof payload.parent_thread_id === "string" ? payload.parent_thread_id : undefined;
35
+
36
+ return { kind: resolveSessionKind(payload), threadId, parentThreadId };
37
+ }
38
+
39
+ function resolveSessionKind(payload) {
40
+ const subagentSource =
41
+ typeof payload.source === "object" && payload.source !== null
42
+ ? payload.source.subagent
43
+ : undefined;
44
+
45
+ if (typeof subagentSource === "object" && subagentSource !== null) {
46
+ if (subagentSource.other === "guardian") {
47
+ return "guardian";
48
+ }
49
+ return "subagent";
50
+ }
51
+
52
+ if (payload.thread_source === "subagent") {
53
+ return "subagent";
54
+ }
55
+
56
+ return "main";
57
+ }
58
+
59
+ // Parse one guardian-trunk JSONL line into a review lifecycle event. Each review
60
+ // turn yields task_started (the guardian began assessing an approval request)
61
+ // and task_complete (verdict in last_agent_message: `{"outcome":"allow"|"deny"}`,
62
+ // optionally with risk_level/rationale). An unreadable verdict maps to
63
+ // outcome: undefined so callers can leave the pet state untouched (safe: the
64
+ // existing waiting cue stays up rather than being cleared on a guess).
65
+ export function parseGuardianTranscriptLine(line, options = {}) {
66
+ let entry;
67
+ try {
68
+ entry = JSON.parse(line);
69
+ } catch {
70
+ return undefined;
71
+ }
72
+
73
+ if (entry?.type !== "event_msg") {
74
+ return undefined;
75
+ }
76
+
77
+ // Same replay guard as the main transcript parser: skip records from before
78
+ // the current session, keep records without a parseable timestamp.
79
+ const minTimestampMs = options.minTimestampMs ?? 0;
80
+ if (minTimestampMs > 0 && typeof entry.timestamp === "string") {
81
+ const timestampMs = Date.parse(entry.timestamp);
82
+ if (Number.isFinite(timestampMs) && timestampMs < minTimestampMs) {
83
+ return undefined;
84
+ }
85
+ }
86
+
87
+ const payload = entry.payload;
88
+ if (!payload || typeof payload !== "object") {
89
+ return undefined;
90
+ }
91
+
92
+ if (payload.type === "task_started") {
93
+ return { type: "review_started" };
94
+ }
95
+
96
+ if (payload.type === "task_complete") {
97
+ return { type: "review_finished", outcome: parseVerdictOutcome(payload.last_agent_message) };
98
+ }
99
+
100
+ return undefined;
101
+ }
102
+
103
+ function parseVerdictOutcome(lastAgentMessage) {
104
+ if (typeof lastAgentMessage !== "string") {
105
+ return undefined;
106
+ }
107
+
108
+ let verdict;
109
+ try {
110
+ verdict = JSON.parse(lastAgentMessage);
111
+ } catch {
112
+ return undefined;
113
+ }
114
+
115
+ const outcome = verdict?.outcome;
116
+ return outcome === "allow" || outcome === "deny" ? outcome : undefined;
117
+ }
118
+
119
+ export function parseGuardianTranscriptLines(lines, options = {}) {
120
+ const events = [];
121
+ for (const line of lines) {
122
+ if (typeof line !== "string" || line.trim() === "") {
123
+ continue;
124
+ }
125
+ const event = parseGuardianTranscriptLine(line, options);
126
+ if (event) {
127
+ events.push(event);
128
+ }
129
+ }
130
+ return events;
131
+ }
@@ -33,8 +33,17 @@
33
33
  // - NOT FIRING (0.137): PreToolUse — so `running_tool` / `editing_files` never
34
34
  // arrive in practice yet (upstream coverage gap, openai/codex#16732). The
35
35
  // entries are kept (harmless no-ops) so they light up once Codex fixes it.
36
- // - UNTESTED: PermissionRequest / PreCompact / SubagentStart|Stop (no approval /
37
- // compaction / subagent occurred in the probe).
36
+ // - PermissionRequest (verified live on 0.139.0, semantics from codex-rs
37
+ // source): fires ONCE at approval-request creation, BEFORE the request is
38
+ // routed to the guardian auto-reviewer or the user. Under "Approve for me"
39
+ // (approvals_reviewer=auto_review, legacy alias guardian_subagent) the user
40
+ // is never prompted at all, so waiting_approval from this hook over-reports;
41
+ // the wrapper's codex-guardian-watcher refines it to reviewing /
42
+ // running_tool / thinking from the guardian's own rollout. The guardian
43
+ // fires NO hooks itself (SubAgentSource::Other is excluded from Subagent
44
+ // hooks), so these entries can't see it.
45
+ // - UNTESTED: PreCompact / SubagentStart|Stop (no compaction / subagent
46
+ // occurred in the probe).
38
47
  //
39
48
  // OPEN QUESTION (injection): unlike `claude --settings <file>`, Codex has no
40
49
  // per-invocation settings-file flag. Candidate non-mutating paths, best first:
@@ -0,0 +1,174 @@
1
+ import assert from "node:assert/strict";
2
+ import { test } from "../../../test/harness.mjs";
3
+ import {
4
+ classifyCodexSessionMeta,
5
+ parseGuardianTranscriptLine,
6
+ parseGuardianTranscriptLines
7
+ } from "../src/codex-guardian.js";
8
+
9
+ function metaLine(payload) {
10
+ return JSON.stringify({ timestamp: "2026-06-12T01:36:41.556Z", type: "session_meta", payload });
11
+ }
12
+
13
+ test("classifyCodexSessionMeta identifies a main session", () => {
14
+ const line = metaLine({
15
+ id: "main-1",
16
+ parent_thread_id: null,
17
+ originator: "codex-tui",
18
+ source: "cli",
19
+ thread_source: "user"
20
+ });
21
+
22
+ assert.deepEqual(classifyCodexSessionMeta(line), {
23
+ kind: "main",
24
+ threadId: "main-1",
25
+ parentThreadId: undefined
26
+ });
27
+ });
28
+
29
+ test("classifyCodexSessionMeta identifies a guardian review session", () => {
30
+ const line = metaLine({
31
+ id: "guardian-1",
32
+ parent_thread_id: "main-1",
33
+ source: { subagent: { other: "guardian" } },
34
+ thread_source: "subagent"
35
+ });
36
+
37
+ assert.deepEqual(classifyCodexSessionMeta(line), {
38
+ kind: "guardian",
39
+ threadId: "guardian-1",
40
+ parentThreadId: "main-1"
41
+ });
42
+ });
43
+
44
+ test("classifyCodexSessionMeta identifies non-guardian subagent sessions", () => {
45
+ const bySource = metaLine({
46
+ id: "agent-1",
47
+ parent_thread_id: "main-1",
48
+ source: { subagent: { other: "collab" } }
49
+ });
50
+ const byThreadSource = metaLine({
51
+ id: "agent-2",
52
+ parent_thread_id: "main-1",
53
+ source: "cli",
54
+ thread_source: "subagent"
55
+ });
56
+
57
+ assert.equal(classifyCodexSessionMeta(bySource).kind, "subagent");
58
+ assert.equal(classifyCodexSessionMeta(byThreadSource).kind, "subagent");
59
+ });
60
+
61
+ test("classifyCodexSessionMeta rejects non-meta and malformed lines", () => {
62
+ assert.equal(classifyCodexSessionMeta("not json"), undefined);
63
+ assert.equal(classifyCodexSessionMeta("{}"), undefined);
64
+ assert.equal(
65
+ classifyCodexSessionMeta(JSON.stringify({ type: "response_item", payload: { type: "message" } })),
66
+ undefined
67
+ );
68
+ assert.equal(classifyCodexSessionMeta(metaLine(null)), undefined);
69
+ });
70
+
71
+ test("parseGuardianTranscriptLine maps task_started to review_started", () => {
72
+ const line = JSON.stringify({
73
+ timestamp: "2026-06-12T01:36:41.557Z",
74
+ type: "event_msg",
75
+ payload: { type: "task_started", turn_id: "turn-1" }
76
+ });
77
+
78
+ assert.deepEqual(parseGuardianTranscriptLine(line), { type: "review_started" });
79
+ });
80
+
81
+ test("parseGuardianTranscriptLine extracts the verdict from task_complete", () => {
82
+ const allow = JSON.stringify({
83
+ type: "event_msg",
84
+ payload: { type: "task_complete", turn_id: "turn-1", last_agent_message: '{"outcome":"allow"}' }
85
+ });
86
+ const deny = JSON.stringify({
87
+ type: "event_msg",
88
+ payload: {
89
+ type: "task_complete",
90
+ turn_id: "turn-2",
91
+ last_agent_message:
92
+ '{"risk_level":"high","user_authorization":"low","outcome":"deny","rationale":"too risky"}'
93
+ }
94
+ });
95
+
96
+ assert.deepEqual(parseGuardianTranscriptLine(allow), { type: "review_finished", outcome: "allow" });
97
+ assert.deepEqual(parseGuardianTranscriptLine(deny), { type: "review_finished", outcome: "deny" });
98
+ });
99
+
100
+ test("parseGuardianTranscriptLine reports an unknown outcome when the verdict is unreadable", () => {
101
+ const garbled = JSON.stringify({
102
+ type: "event_msg",
103
+ payload: { type: "task_complete", turn_id: "turn-1", last_agent_message: "I think it is fine" }
104
+ });
105
+ const missing = JSON.stringify({
106
+ type: "event_msg",
107
+ payload: { type: "task_complete", turn_id: "turn-1" }
108
+ });
109
+ const unexpected = JSON.stringify({
110
+ type: "event_msg",
111
+ payload: { type: "task_complete", turn_id: "turn-1", last_agent_message: '{"outcome":"maybe"}' }
112
+ });
113
+
114
+ assert.deepEqual(parseGuardianTranscriptLine(garbled), { type: "review_finished", outcome: undefined });
115
+ assert.deepEqual(parseGuardianTranscriptLine(missing), { type: "review_finished", outcome: undefined });
116
+ assert.deepEqual(parseGuardianTranscriptLine(unexpected), { type: "review_finished", outcome: undefined });
117
+ });
118
+
119
+ test("parseGuardianTranscriptLine ignores unrelated and malformed records", () => {
120
+ assert.equal(parseGuardianTranscriptLine("not json"), undefined);
121
+ assert.equal(
122
+ parseGuardianTranscriptLine(
123
+ JSON.stringify({ type: "event_msg", payload: { type: "token_count" } })
124
+ ),
125
+ undefined
126
+ );
127
+ assert.equal(
128
+ parseGuardianTranscriptLine(
129
+ JSON.stringify({ type: "response_item", payload: { type: "message", role: "assistant" } })
130
+ ),
131
+ undefined
132
+ );
133
+ });
134
+
135
+ test("parseGuardianTranscriptLine skips records from before the session start", () => {
136
+ const old = JSON.stringify({
137
+ timestamp: "2026-06-12T01:00:00.000Z",
138
+ type: "event_msg",
139
+ payload: { type: "task_started", turn_id: "turn-0" }
140
+ });
141
+ const fresh = JSON.stringify({
142
+ timestamp: "2026-06-12T02:00:00.000Z",
143
+ type: "event_msg",
144
+ payload: { type: "task_started", turn_id: "turn-1" }
145
+ });
146
+ const untimestamped = JSON.stringify({
147
+ type: "event_msg",
148
+ payload: { type: "task_started", turn_id: "turn-2" }
149
+ });
150
+ const minTimestampMs = Date.parse("2026-06-12T01:30:00.000Z");
151
+
152
+ assert.equal(parseGuardianTranscriptLine(old, { minTimestampMs }), undefined);
153
+ assert.deepEqual(parseGuardianTranscriptLine(fresh, { minTimestampMs }), { type: "review_started" });
154
+ assert.deepEqual(parseGuardianTranscriptLine(untimestamped, { minTimestampMs }), {
155
+ type: "review_started"
156
+ });
157
+ });
158
+
159
+ test("parseGuardianTranscriptLines collects events and skips blank lines", () => {
160
+ const lines = [
161
+ "",
162
+ JSON.stringify({ type: "event_msg", payload: { type: "task_started", turn_id: "t1" } }),
163
+ " ",
164
+ JSON.stringify({
165
+ type: "event_msg",
166
+ payload: { type: "task_complete", turn_id: "t1", last_agent_message: '{"outcome":"allow"}' }
167
+ })
168
+ ];
169
+
170
+ assert.deepEqual(parseGuardianTranscriptLines(lines), [
171
+ { type: "review_started" },
172
+ { type: "review_finished", outcome: "allow" }
173
+ ]);
174
+ });
@@ -105,7 +105,10 @@ export function parsePositionState(text) {
105
105
  settings: {
106
106
  ...defaults.settings,
107
107
  ...(isPlainObject(parsed.settings) ? parsed.settings : {})
108
- }
108
+ },
109
+ // Cached npm update-check result (see update-check.js) — must survive a
110
+ // load/save round-trip or every run would re-fetch from the registry.
111
+ ...(isPlainObject(parsed.updateCheck) ? { updateCheck: parsed.updateCheck } : {})
109
112
  };
110
113
  } catch {
111
114
  return defaults;