typeclaw 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/README.md +6 -6
  2. package/package.json +5 -3
  3. package/scripts/require-parallel.ts +41 -0
  4. package/src/agent/index.ts +55 -6
  5. package/src/agent/live-sessions.ts +34 -0
  6. package/src/agent/plugin-tools.ts +2 -0
  7. package/src/agent/session-meta.ts +21 -2
  8. package/src/agent/subagent-completion-reminder.ts +89 -0
  9. package/src/agent/subagents.ts +3 -2
  10. package/src/agent/system-prompt.ts +10 -8
  11. package/src/bundled-plugins/explorer/explorer.ts +2 -2
  12. package/src/bundled-plugins/guard/index.ts +14 -1
  13. package/src/bundled-plugins/guard/policies/managed-config.ts +43 -13
  14. package/src/bundled-plugins/guard/policies/memory-retrieval-cache-write.ts +37 -0
  15. package/src/bundled-plugins/guard/policies/memory-topics-delete.ts +67 -0
  16. package/src/bundled-plugins/guard/policies/memory-topics-write.ts +33 -0
  17. package/src/bundled-plugins/guard/policies/non-workspace-write.ts +8 -2
  18. package/src/bundled-plugins/guard/policy.ts +7 -0
  19. package/src/bundled-plugins/memory/README.md +76 -62
  20. package/src/bundled-plugins/memory/append-tool.ts +3 -2
  21. package/src/bundled-plugins/memory/citation-superset.ts +49 -11
  22. package/src/bundled-plugins/memory/citations.ts +19 -8
  23. package/src/bundled-plugins/memory/delete-tool.ts +57 -0
  24. package/src/bundled-plugins/memory/dreaming-state.ts +1 -1
  25. package/src/bundled-plugins/memory/dreaming.ts +364 -146
  26. package/src/bundled-plugins/memory/frontmatter.ts +165 -0
  27. package/src/bundled-plugins/memory/index.ts +236 -16
  28. package/src/bundled-plugins/memory/injection-plan.ts +15 -0
  29. package/src/bundled-plugins/memory/load-memory.ts +102 -103
  30. package/src/bundled-plugins/memory/load-shards.ts +156 -0
  31. package/src/bundled-plugins/memory/memory-logger.ts +16 -15
  32. package/src/bundled-plugins/memory/memory-retrieval.ts +105 -0
  33. package/src/bundled-plugins/memory/migration.ts +282 -1
  34. package/src/bundled-plugins/memory/paths.ts +42 -0
  35. package/src/bundled-plugins/memory/search-tool.ts +232 -0
  36. package/src/bundled-plugins/memory/secret-detector.ts +2 -2
  37. package/src/bundled-plugins/memory/shard-snapshot.ts +51 -0
  38. package/src/bundled-plugins/memory/slug.ts +59 -0
  39. package/src/bundled-plugins/memory/stream-io.ts +110 -1
  40. package/src/bundled-plugins/memory/strength.ts +3 -3
  41. package/src/bundled-plugins/memory/topics.ts +70 -16
  42. package/src/bundled-plugins/security/index.ts +24 -0
  43. package/src/bundled-plugins/security/permissions.ts +4 -0
  44. package/src/bundled-plugins/security/policies/cron-promotion.ts +349 -0
  45. package/src/bundled-plugins/security/policies/git-exfil.ts +2 -0
  46. package/src/bundled-plugins/security/policies/prompt-injection.ts +3 -0
  47. package/src/bundled-plugins/security/policies/role-promotion.ts +419 -0
  48. package/src/bundled-plugins/security/policies/system-prompt-leak.ts +1 -0
  49. package/src/channels/adapters/kakaotalk-attachment.ts +7 -17
  50. package/src/channels/adapters/kakaotalk.ts +64 -37
  51. package/src/channels/adapters/slack-bot-classify.ts +2 -27
  52. package/src/channels/index.ts +5 -0
  53. package/src/channels/router.ts +201 -17
  54. package/src/channels/subagent-completion-bridge.ts +84 -0
  55. package/src/cli/builtins.ts +1 -0
  56. package/src/cli/index.ts +1 -0
  57. package/src/cli/init.ts +122 -14
  58. package/src/cli/inspect.ts +151 -0
  59. package/src/cron/consumer.ts +1 -1
  60. package/src/init/dockerfile.ts +268 -4
  61. package/src/init/hatching.ts +5 -6
  62. package/src/init/kakaotalk-auth.ts +6 -47
  63. package/src/init/validate-api-key.ts +121 -0
  64. package/src/inspect/index.ts +213 -0
  65. package/src/inspect/label.ts +50 -0
  66. package/src/inspect/live.ts +221 -0
  67. package/src/inspect/render.ts +163 -0
  68. package/src/inspect/replay.ts +265 -0
  69. package/src/inspect/session-list.ts +160 -0
  70. package/src/inspect/types.ts +110 -0
  71. package/src/plugin/hooks.ts +23 -1
  72. package/src/plugin/index.ts +2 -0
  73. package/src/plugin/manager.ts +1 -1
  74. package/src/plugin/registry.ts +1 -1
  75. package/src/plugin/types.ts +10 -0
  76. package/src/run/channel-session-factory.ts +7 -1
  77. package/src/run/index.ts +87 -21
  78. package/src/secrets/kakao-renewal.ts +3 -47
  79. package/src/server/index.ts +241 -60
  80. package/src/shared/index.ts +3 -0
  81. package/src/shared/protocol.ts +49 -0
  82. package/src/skills/typeclaw-channel-kakaotalk/SKILL.md +9 -9
  83. package/src/skills/typeclaw-claude-code/SKILL.md +57 -39
  84. package/src/skills/typeclaw-claude-code/references/stop-hook.md +2 -0
  85. package/src/skills/typeclaw-claude-code/references/tmux-driving.md +102 -16
  86. package/src/skills/typeclaw-config/SKILL.md +1 -1
  87. package/src/skills/typeclaw-cron/SKILL.md +1 -1
  88. package/src/skills/typeclaw-memory/SKILL.md +16 -163
  89. package/src/skills/typeclaw-permissions/SKILL.md +2 -2
  90. package/src/skills/typeclaw-plugins/SKILL.md +25 -14
  91. package/src/test-helpers/wait-for.ts +7 -1
  92. package/typeclaw.schema.json +7 -0
@@ -107,7 +107,6 @@ Pick a task id (short hex string or `verb-noun` like `refactor-auth`) and create
107
107
  ```sh
108
108
  git -C /agent worktree add -b cc-<task-id> /tmp/cc-<task-id> HEAD
109
109
  cd /tmp/cc-<task-id>
110
- mkdir -p .claude
111
110
  ```
112
111
 
113
112
  This creates:
@@ -118,16 +117,14 @@ This creates:
118
117
 
119
118
  The worktree shares the agent folder's `.git` directory but has its own `HEAD`, index, and working tree. Branch state lives in `/agent/.git/refs/heads/cc-<task-id>` regardless of where the worktree itself lives on disk.
120
119
 
121
- Inside `/tmp/cc-<task-id>/`, write the per-task hook config (see "The Stop hook" below):
120
+ No per-task hook config is needed — the Stop and SessionStart hooks are wired globally at Dockerfile-build time (see "The Stop hook" below). Your worktree just becomes the cwd when you spawn `claude`; the global hooks write per-session files into `$PWD` (which `tmux new-session -c /tmp/cc-<id>` sets to the worktree).
122
121
 
123
122
  ```
124
123
  /tmp/cc-<task-id>/
125
- ├── .claude/
126
- │ └── settings.json # registers the Stop hook
127
- ├── hook-on-stop.sh # the hook script, chmod +x
128
- ├── sentinel.json # written by the hook (does not exist yet)
129
- └── .done # flag file (does not exist yet)
130
- └── ... # plus every file from the agent folder's HEAD
124
+ ├── .session-id # written by SessionStart hook (fast path; may not appear before trust is accepted)
125
+ ├── sentinel-<uuid>.json # written by Stop hook per turn
126
+ ├── .done-<uuid> # flag file written by Stop hook per turn
127
+ └── ... # plus every file from the agent folder's HEAD
131
128
  ```
132
129
 
133
130
  ### Why `/tmp/`, not `workspace/`?
@@ -138,41 +135,62 @@ Inside `/tmp/cc-<task-id>/`, write the per-task hook config (see "The Stop hook"
138
135
 
139
136
  Claude Code fires a `Stop` hook every time it finishes responding — turn-end, not session-end. The hook runs an arbitrary shell command with the lifecycle event payload (JSON) on stdin. We use this as the done-signal: the hook writes the payload to `sentinel.json` and `touch`es `.done`, and your polling loop watches for `.done`.
140
137
 
141
- Minimum `/tmp/cc-<id>/.claude/settings.json`:
142
-
143
- ```json
144
- {
145
- "hooks": {
146
- "Stop": [
147
- {
148
- "matcher": "*",
149
- "hooks": [{ "type": "command", "command": "./hook-on-stop.sh" }]
150
- }
151
- ]
152
- }
153
- }
154
- ```
138
+ **The hook is pre-baked into the container image.** When `docker.file.claudeCode: true`, the Dockerfile install layer writes TWO hook scripts and a settings file:
139
+
140
+ - `/usr/local/bin/typeclaw-cc-session-start-hook` — fires once at session start. Reads the SessionStart event JSON from stdin, extracts `session_id`, validates it as a UUID, and writes `$PWD/.session-id` (atomically, temp-then-rename) containing that UUID. This is how the operator learns the session UUID — the only reliable way, because `claude --session-id <uuid>` does NOT propagate to hook payloads in interactive mode (anthropics/claude-code#44607).
141
+ - `/usr/local/bin/typeclaw-cc-stop-hook` — fires every turn. Reads the Stop event JSON from stdin, extracts the same `session_id`, and writes per-session files: `$PWD/sentinel-<session_id>.json` atomically and `$PWD/.done-<session_id>`. The script uses `$PWD` (the literal cwd Claude Code was invoked with — set by the operator's `tmux new-session -c /tmp/cc-<id>`) rather than Claude Code's `$CLAUDE_PROJECT_DIR`, which resolves to the _git root of cwd_ and inside a worktree returns the main repo's path, not the worktree path. See the `TYPECLAW_CC_STOP_HOOK_PATH` comment block in `src/init/dockerfile.ts` for the upstream-bug citations (anthropics/claude-code#27343, #44450) that drove that choice.
142
+ - `~/.claude/settings.json` — user-level (global) Claude Code settings that register both hooks for every `claude` invocation in the container. Built at build time via `JSON.stringify` so the shape never drifts. Both hooks use exec form (`args: []` present) so Claude Code invokes them via `execvp` directly (kernel-handled shebang, no shell tokenization).
143
+
144
+ You do **not** write any of these files. The previous version of this skill had you `mkdir -p .claude && cat > .claude/settings.json …` per worktree; that step is removed. The shape of the JSON used to be the single most failure-prone part of a delegation (Claude Code silently ignores unknown keys, so wrong-shape configs like `{"hooks": {"onStop": "./script.sh"}}` would let the polling loop run to its wall-clock budget without ever firing the hook), and the only reliable fix is to keep the JSON out of LLM hands entirely.
145
+
146
+ ### Per-session filenames race safety
147
+
148
+ The sentinel and `.done` filenames carry the session UUID — `sentinel-<uuid>.json` and `.done-<uuid>` — so two `claude` sessions sharing a cwd cannot collide on a fixed `sentinel.json`. You learn the UUID one of two ways:
149
+
150
+ 1. **Fast path: read `.session-id` after spawning claude.** The SessionStart hook writes it on session start. Works for sessions that don't hit the workspace-trust dialog (re-attached worktrees, etc.).
151
+ 2. **Discovery path: read it from the first Stop sentinel.** After sending the first prompt, glob `.done-*` for new files. The first one's UUID becomes `cc_session_id`. This path is required for fresh worktrees because per anthropics/claude-code#11519, **SessionStart is skipped entirely while workspace trust is pending** — and EVERY fresh worktree starts with the trust dialog pending. The fast path never wins on a first delegation.
152
+
153
+ In both cases, **`cc_session_id` can ROTATE mid-delegation**. Per anthropics/claude-code#29094, `SessionStart` with `source: "compact"` is a NEW session linked via `parent_session_id`. So a long claude session that auto-compacts will start emitting Stop events with a DIFFERENT session_id. Your polling loop must handle this: if you see a new `.done-<different-uuid>` appear, update `cc_session_id` to the new value.
155
154
 
156
- Minimum `/tmp/cc-<id>/hook-on-stop.sh` (chmod +x):
155
+ **Do NOT use `claude --session-id <uuid>`.** Per anthropics/claude-code#44607, the flag works only in `-p` (print) mode; in interactive mode it sets a telemetry ID while the CLI generates its own UUID for the transcript and for hook payloads. The pre-generated UUID and the hook's UUID don't match, the polling loop watches a file that never appears, and the loop times out. If you find yourself reaching for `--session-id`, stop let claude pick its own UUID and learn it via discovery.
156
+
157
+ If you see `$PWD/.session-id` containing the literal string `malformed`, or `$PWD/sentinel-malformed.json` appearing instead of your expected file, a hook fired but couldn't extract a UUID-shape `session_id` from the event payload (malformed JSON, missing field, or a future upstream schema change). Read the file to diagnose; surface to the user.
158
+
159
+ ### Verifying the global hooks
160
+
161
+ Verify both hooks are wired correctly in the container before the first delegation of a session:
157
162
 
158
163
  ```sh
159
- #!/bin/sh
160
- # stdin carries the Stop event JSON; transcript_path points at the JSONL.
161
- cat > sentinel.json.tmp
162
- mv sentinel.json.tmp sentinel.json
163
- touch .done
164
+ test -x /usr/local/bin/typeclaw-cc-stop-hook && \
165
+ test -x /usr/local/bin/typeclaw-cc-session-start-hook && \
166
+ jq -e '
167
+ .hooks.Stop[0].hooks[0].command == "/usr/local/bin/typeclaw-cc-stop-hook"
168
+ and .hooks.Stop[0].hooks[0].args == []
169
+ and .hooks.SessionStart[0].hooks[0].command == "/usr/local/bin/typeclaw-cc-session-start-hook"
170
+ and .hooks.SessionStart[0].hooks[0].args == []
171
+ ' "$HOME/.claude/settings.json"
164
172
  ```
165
173
 
166
- The temp-file-then-rename keeps the read side from ever seeing a partial sentinel. The full schema of the Stop event (every field Claude Code populates, including `last_assistant_message` and `transcript_path`) is in `references/stop-hook.md`.
174
+ Three distinct failure modes if it fails:
175
+
176
+ | Symptom | Cause | Remediation |
177
+ | ---------------------------------------------------- | ------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
178
+ | `test -x …` fails | Hook script missing | `docker.file.claudeCode` is off, or image built before this layer landed → `typeclaw start --build` |
179
+ | Scripts present, `jq` fails | `$HOME/.claude/settings.json` was overwritten or bind-mounted | Check `cat ~/.claude/settings.json` for user-mounted config; if so, the operator's hooks won't fire and the delegation cannot proceed |
180
+ | Scripts + settings correct, no sentinel ever appears | Hooks failing at runtime (trust skip, schema mismatch, permissions) | Inspect `ls -la /tmp/cc-<id>/.cc-*-in.*` to see if hooks fired at all, and read any `sentinel-malformed.json` for diagnostic |
181
+
182
+ Don't try to write the hook config yourself — the operator subagent doesn't have the right tools to do it reliably, which is exactly the failure mode this layout was built to eliminate.
183
+
184
+ The full schema of the Stop event (every field Claude Code populates, including `last_assistant_message` and `transcript_path`) is in `references/stop-hook.md`.
167
185
 
168
186
  ## Driving the session
169
187
 
170
188
  The minimum protocol — translate to your actual tool calls:
171
189
 
172
- 1. Create the worktree, write the hook config (above).
173
- 2. `tmux new-session -d -s cc-<id> -c /tmp/cc-<id> claude`.
190
+ 1. Create the worktree.
191
+ 2. `tmux new-session -d -s cc-<id> -c /tmp/cc-<id> claude`. Do NOT pass `--session-id` — it doesn't propagate to hook payloads in interactive mode (see "Per-session filenames" above).
174
192
  3. Wait ~3 seconds for the TUI to initialize.
175
- 4. **Clear startup dialogs (BEFORE sending the task prompt).** Even with `~/.claude.json` pre-seeded, claude can land on one or both pre-prompt modals. Run this as a **loop**, not a one-shot: clearing one dialog can immediately reveal the next, and you must keep polling until claude's actual input prompt is visible (it renders a bottom-of-pane input box with a `╭` / `╰` border).
193
+ 4. **Clear startup dialogs (BEFORE sending the task prompt).** Even with `~/.claude.json` pre-seeded, claude can land on one or both pre-prompt modals. Run this as a **loop**, not a one-shot: clearing one dialog can immediately reveal the next, and you must keep polling until claude's actual input prompt is visible (it renders a bottom-of-pane input box with a `╭` / `╰` border). **Do NOT poll `.session-id` before this step** — per anthropics/claude-code#11519, SessionStart is suppressed while workspace trust is pending, so `.session-id` will not appear until you've accepted trust here.
176
194
 
177
195
  The two known modals, with the exact keystrokes for each (Claude Code's select widget does NOT wrap — pressing `Up` from the first option is a no-op, so the direction must match the dialog's option order):
178
196
  - **Custom API key confirmation** — "Detected a custom API key from environment. Do you want to use this API key?" Fires when `ANTHROPIC_API_KEY` is set (exactly typeclaw's auth path). Options are `[No (recommended), Yes]` with focus initialized on **No**. Resolution: `tmux send-keys -t cc-<id> Down Enter` to advance to **Yes** and submit. Sending `Up Enter` would submit the **No** answer, which can persist as a rejection in `customApiKeyResponses.rejected` and break subsequent launches — never do that here.
@@ -191,9 +209,9 @@ The minimum protocol — translate to your actual tool calls:
191
209
  **Safety note**: accepting workspace trust on a fresh `/tmp/cc-<id>/` worktree is the right call **only when its `HEAD` is the intended clean state** — typically the agent folder's last good commit on a branch the user controls. If the user just merged a third-party PR, pulled a remote branch, or checked out an untrusted ref, the worktree carries that content too and "trusting" it gives claude tool access on potentially hostile code. Before auto-accepting trust, sanity-check: if the user hasn't said something equivalent to "delegate this to Claude Code", or if you're not confident the current `HEAD` is one the user authored or reviewed, surface the trust dialog to them instead. Do NOT extend even a legitimate trust acceptance to in-session permission prompts (Bash, Edit, etc.) — those still need per-turn judgment per the multi-turn decision loop below.
192
210
 
193
211
  5. `tmux send-keys -t cc-<id> "<your prompt>" Enter`.
194
- 6. **Poll** for `/tmp/cc-<id>/.done` in a 500ms-cadence loop with a wall-clock budget (default 10 minutes). On every iteration, also check `tmux has-session -t cc-<id>` — if the session died, claude crashed or auth failed.
195
- 7. When `.done` exists: `rm .done`, read `sentinel.json`, examine `last_assistant_message`.
196
- 8. Decide using the multi-turn loop below.
212
+ 6. **Discover the session UUID from the newest unprocessed Stop sentinel.** Poll `/tmp/cc-<id>/.done-*` in a loop: each iteration, enumerate the files sorted by mtime (`ls -t`), filter out any UUIDs you've already processed (initially empty), and pick the first one whose UUID is a real hex UUID (not `malformed`). That UUID becomes `cc_session_id`. On every poll, also check `tmux has-session -t cc-<id>` — if the session died, claude crashed or auth failed. (Fast-path optimization: if `/tmp/cc-<id>/.session-id` happened to appear before the first prompt, you can use it instead and skip the glob — see `references/tmux-driving.md` for the fast-path snippet.) If the only marker that appears is `.done-malformed`, the Stop hook fired but couldn't extract a UUID-shape `session_id` from the payload — bail and surface to the user.
213
+ 7. Read `/tmp/cc-<id>/sentinel-${cc_session_id}.json`, examine `last_assistant_message`, then `rm /tmp/cc-<id>/.done-${cc_session_id}` (the SPECIFIC file you just processed, NOT a glob — globbing wipes any in-flight new sentinel from a concurrent compact rotation).
214
+ 8. Decide using the multi-turn loop below. **Track which UUIDs you've already processed.** On the next poll, again pick the newest unprocessed `.done-<uuid>`. If the UUID differs from the previous `cc_session_id`, claude has compacted (anthropics/claude-code#29094) — update `cc_session_id` to the new value and continue. Polling is edge-triggered: don't wait on `.done-${cc_session_id}` specifically, because if compact rotated the UUID, that file will never appear.
197
215
  9. When done: `tmux send-keys -t cc-<id> "/exit" Enter && sleep 1 && tmux kill-session -t cc-<id>`.
198
216
 
199
217
  The full polling implementation, the ANSI-handling rules for `capture-pane` fallbacks, and the "tmux session died unexpectedly" recovery path are in `references/tmux-driving.md`.
@@ -202,10 +220,10 @@ The full polling implementation, the ANSI-handling rules for `capture-pane` fall
202
220
 
203
221
  `Stop` fires every turn — including turns where claude paused to ask you a question, not just turns where claude finished the task. After every Stop sentinel, read `last_assistant_message` and decide:
204
222
 
205
- - **Ends with a question mark, or contains "Do you want me to", "Should I", "Could you clarify"** → claude is asking a clarifying question. Compose an answer from the original task brief and `send-keys` it back. Reset the loop: `rm .done`, poll again.
223
+ - **Ends with a question mark, or contains "Do you want me to", "Should I", "Could you clarify"** → claude is asking a clarifying question. Compose an answer from the original task brief and `send-keys` it back. Reset the loop: `rm /tmp/cc-<id>/.done-${cc_session_id}` (the SPECIFIC file you just processed), add that UUID to your processed set, then poll for the next newest unprocessed `.done-<uuid>`.
206
224
  - **Mentions a permission-style ask** ("May I run `<command>`?", "Allow me to edit `<file>`?") → answer per the task's safety constraints. If the constraint is unclear, abort with `/exit` and surface to the user — never invent a yes/no on the user's behalf for an unbounded operation.
207
225
  - **Looks like a final result** (code block + summary, or "Done.", "Here's the result.", "I've finished") → capture and `/exit`.
208
- - **Looks like a status update mid-tool-use** ("Let me check…", "Reading the file now…") → this is a spurious Stop (a Claude turn-boundary that isn't real task progress). Just `rm .done` and keep polling.
226
+ - **Looks like a status update mid-tool-use** ("Let me check…", "Reading the file now…") → this is a spurious Stop (a Claude turn-boundary that isn't real task progress). `rm /tmp/cc-<id>/.done-${cc_session_id}`, add the UUID to your processed set, and keep polling.
209
227
 
210
228
  **Hard turn cap: 8 turns per delegation.** Beyond that, either the task is too complex to delegate cleanly or claude is stuck in a loop. Abort with `/exit`, capture what you have, surface to the user with: "Claude took 8 turns without finishing — here's what it produced, what do you want to do?"
211
229
 
@@ -217,7 +235,7 @@ Four sources, in order of preference:
217
235
 
218
236
  1. **`git diff /agent main..cc-<id>`** (run from `/agent`, or use the explicit worktree path). This is the killer feature of the worktree model — the exact set of changes claude made, branch-vs-branch. Use this for code-change tasks.
219
237
  2. **`git log cc-<id> --oneline main..cc-<id>`** for how claude got there (the sequence of commits). Useful when claude broke a refactor into steps you want to attribute or cherry-pick.
220
- 3. **`sentinel.json` from the final turn** (`last_assistant_message`). The narrative summary claude gave you. Use this for analysis tasks where the answer is prose, not code.
238
+ 3. **`sentinel-<cc_session_id>.json` from the final turn** (`last_assistant_message`). The narrative summary claude gave you. Use this for analysis tasks where the answer is prose, not code.
221
239
  4. **The JSONL transcript** at `transcript_path` in the sentinel. The complete conversation including intermediate tool calls. Use when the diff/log aren't enough and you need to see how claude reasoned. Schema in `references/stop-hook.md`.
222
240
 
223
241
  For code-change tasks, the canonical pattern is:
@@ -272,7 +290,7 @@ A re-statement, because this is where the skill is most often misused:
272
290
  - **Do not use `claude -p` for delegation work.** The headless print mode strips plan mode, sub-agents, and the agent loop. The whole reason to delegate up is the loop. If you find yourself reaching for `-p`, the right answer is probably "do it yourself".
273
291
  - **Do not run `claude` directly inside `/agent`.** Always inside `/tmp/cc-<id>/`. Running claude in the agent folder lets it mutate the live working tree and break the user's session in flight.
274
292
  - **Do not skip the worktree.** Even for short delegations, the worktree is what gives you the `git diff` introspection and the rollback safety. Skipping it because "this one's small" is the path to claude accidentally committing on the wrong branch.
275
- - **Do not share a tmux session across two delegated tasks.** Each task needs its own worktree, its own session, and its own `.claude/settings.json`. Sharing corrupts the sentinel state and crosses transcripts.
293
+ - **Do not share a tmux session across two delegated tasks.** Each task needs its own worktree and its own tmux session. The hook config is global (`~/.claude/settings.json`), so sharing a worktree means two sessions race on the same `$PWD/.session-id` file. Per-session filenames (`sentinel-<uuid>.json`, `.done-<uuid>`) make per-turn artifacts safe across sessions but `.session-id` is fixed-name; the operator's discovery flow handles this by globbing `.done-*` anyway.
276
294
  - **Do not leave a tmux session, worktree, or branch alive after capturing the result.** All three need explicit teardown. Reusing them defeats the per-task isolation that makes the Stop hook reliable.
277
295
  - **Do not push claude's branch to a remote.** `cc-<id>` is throwaway. If something useful happened, cherry-pick onto a real branch first; don't push the experimental branch directly.
278
296
  - **Do not merge claude's branch into main without reviewing the diff.** The `git diff main..cc-<id>` is your review surface. Skipping the diff and merging blindly means you don't actually know what shipped.
@@ -92,6 +92,8 @@ If you need to detect permission prompts (to auto-answer them), `capture-pane` i
92
92
 
93
93
  ## Things you must not do with the Stop hook
94
94
 
95
+ - **Do not write a per-worktree `.claude/settings.json` from operator code.** The hook is pre-baked into the image at build time (see `src/init/dockerfile.ts`, constants `TYPECLAW_CC_STOP_HOOK_PATH` and `TYPECLAW_CC_GLOBAL_SETTINGS`) precisely so the operator subagent never has to construct the JSON itself. Past delegations failed by inventing wrong shapes like `{"hooks": {"onStop": "./script.sh"}}` (wrong key — Claude Code's event name is literal `Stop`, no `on` prefix), `{"hooks": {"Stop": "./script.sh"}}` (right key, wrong value type — must be an array of matcher objects, not a string), and `{"hooks": {"Stop": [{"command": "./script.sh"}]}}` (missing the `matcher` and the inner `hooks` array — the schema is two levels of nesting, not one). All three slips silently fail: Claude Code ignores unknown keys, so the hook is never registered, `.done` is never created, and the polling loop times out at its wall-clock budget. If you ever find yourself wanting to write a per-worktree settings file, **stop** — either the global hook isn't installed (verify with the `jq` check in `SKILL.md`'s "The Stop hook" section) or you're trying to customize behavior the skill's flow doesn't anticipate. In the former case, bail to the user; in the latter, the right answer is a code change to `src/init/dockerfile.ts`, not a runtime JSON write.
96
+ - **Do not edit the in-Dockerfile hook config in a way that bypasses `JSON.stringify` + the regression test.** `TYPECLAW_CC_GLOBAL_SETTINGS` in `src/init/dockerfile.ts` is constructed via `JSON.stringify` so any structural drift fails `dockerfile.test.ts`'s `JSON.parse` regression test, not the docker build or (worse) the first failed delegation. Hand-writing the JSON as a string literal would let a typo land in production. The accepted shape is exactly `{"hooks": {"Stop": [{"matcher": "*", "hooks": [{"type": "command", "command": "..."}]}]}}`.
95
97
  - **Do not set `matcher` to anything other than `"*"`.** The matcher filters by hook tool name; for `Stop`, there's no tool — `"*"` is the canonical "fire on every Stop". Other values may silently never match.
96
98
  - **Do not put long-running commands in the hook.** The hook runs synchronously on the Claude Code main loop; a slow hook blocks the user's next prompt. Write the payload + touch a flag + exit. Anything heavier belongs in your polling loop, not the hook.
97
99
  - **Do not skip the temp-file rename pattern.** Writing `sentinel.json` directly with `>` lets readers see partial JSON if they poll mid-write. Always `cat > sentinel.json.tmp && mv sentinel.json.tmp sentinel.json`.
@@ -13,7 +13,7 @@ The agent process has no TTY. `claude` (interactive) is a TUI that uses raw term
13
13
  The cwd of the spawned `claude` process must be the git worktree at `/tmp/cc-<id>/`, not the agent folder's `workspace/`. Three reasons:
14
14
 
15
15
  1. **Worktree-vs-scratch:** the `cc-<id>` directory is a real git checkout managed by `git worktree`, with refs in `/agent/.git/worktrees/cc-<id>/`. Putting it under `workspace/` would mean the agent folder contains a worktree of itself, which works mechanically but is recursive and confusing.
16
- 2. **Claude's `.claude/settings.json` is read relative to cwd.** It must live at `/tmp/cc-<id>/.claude/settings.json` so claude picks up the per-task `Stop` hook.
16
+ 2. **The global SessionStart and Stop hooks write their per-session files into cwd.** Both hook scripts read `$PWD` (the literal cwd Claude Code was invoked with) and write into it: SessionStart writes `.session-id` containing the UUID; Stop writes `sentinel-<uuid>.json` and `.done-<uuid>`. `$PWD` resolves to the worktree because `tmux new-session -c /tmp/cc-<id>` sets claude's cwd there. If cwd is the wrong place, the files land somewhere the polling loop isn't watching and the loop times out at its budget. (The hook config itself is global at `~/.claude/settings.json`, not per-worktree — see `references/stop-hook.md` for the architectural context. The hooks deliberately do NOT use Claude Code's `$CLAUDE_PROJECT_DIR` env var, which resolves to the git root of cwd — inside a worktree that's the main repo, not the worktree path; the dockerfile constant block in `src/init/dockerfile.ts` carries the rationale.)
17
17
  3. **The worktree IS the codebase.** Claude can read every file at `HEAD` directly — it doesn't need a separate scratch area.
18
18
 
19
19
  Auth has no in-container scratch directory at all — the OAuth `setup-token` flow runs on the user's machine, not in tmux here. See `references/auth-flow.md`.
@@ -30,21 +30,18 @@ Flags worth knowing:
30
30
 
31
31
  - `-d` — detached. The session runs in the background; your shell doesn't attach.
32
32
  - `-s cc-<task-id>` — explicit session name. Required. Without `-s`, tmux picks `0`, `1`, … and a sibling delegation will clobber yours.
33
- - `-c /tmp/cc-<task-id>` — start directory. Must be the worktree path. Claude Code reads `.claude/settings.json` relative to its cwd; if cwd is wrong, the Stop hook will not be registered.
33
+ - `-c /tmp/cc-<task-id>` — start directory. Must be the worktree path. The global Stop hook at `~/.claude/settings.json` always fires regardless of cwd, but the hook script writes its sentinel to `$PWD`; if cwd is wrong, the sentinel lands somewhere your polling loop isn't watching.
34
34
  - `claude` — the command. Just `claude`, not `claude -p`. The interactive TUI is the whole point.
35
35
 
36
- Common mistake: forgetting `-c` and getting cwd `/agent` by default. The Stop hook won't fire because `/agent/.claude/settings.json` doesn't exist (or it does, and you've accidentally polluted someone else's hook config). Worse: claude in `/agent` operates on the live working tree instead of the worktree.
36
+ Common mistake: forgetting `-c` and getting cwd `/agent` by default. The Stop hook still fires (it's global), but `sentinel.json` + `.done` end up under `/agent/`, your polling loop watches `/tmp/cc-<id>/`, and the loop times out at its wall-clock budget. Worse: claude in `/agent` operates on the live working tree instead of the worktree.
37
37
 
38
38
  ## The init wait
39
39
 
40
- `claude` prints a banner, performs auth verification, and renders its input box. This takes ~2–3 seconds on a warm cache, up to ~8s on cold start. You must wait for the input box to render before sending the first prompt, otherwise `send-keys` writes to a pane that isn't accepting input yet and your keystrokes are lost.
40
+ `claude` prints a banner, performs auth verification, and renders its input box. This takes ~2–3 seconds on a warm cache, up to ~8s on cold start. You must wait for the input box to render (and clear any startup dialogs) before sending the first prompt, otherwise `send-keys` writes to a pane that isn't accepting input yet and your keystrokes are lost.
41
41
 
42
- Two strategies:
42
+ The skill body's flow uses dialog-polling rather than a fixed sleep: every 500ms, `tmux capture-pane -t cc-<id> -p -S -15` and check for the input box (Unicode box-drawing `╭` / `╰` at column 0 of bottom rows) OR a known dialog (API key confirmation / workspace trust). Clear dialogs as they appear, exit the loop when the input box is visible. Give up after ~10s and surface to the user.
43
43
 
44
- 1. **Fixed sleep (simple, mostly works):** `sleep 3` after spawn, then `send-keys`. Robust against typical init times; occasionally lossy on cold start.
45
- 2. **Poll for ready signal (robust):** every 500ms, `tmux capture-pane -t cc-<id> -p | tail -5` and look for an input-prompt marker. The exact marker varies by Claude Code version, but a unicode box-drawing character (`│`, `╭`, `╰`) at column 0 of the bottom rows is a reliable heuristic. Give up after 15 seconds and proceed anyway — late init is rare enough that the fixed-sleep fallback is fine.
46
-
47
- The skill body uses the fixed sleep for simplicity. Upgrade to polling if you observe lost first prompts in practice.
44
+ `.session-id` cannot be used as a readiness signal: per anthropics/claude-code#11519, SessionStart is SKIPPED while workspace trust is pending. The dialog-polling loop is the only reliable signal here.
48
45
 
49
46
  ## Sending input
50
47
 
@@ -67,16 +64,57 @@ Notes:
67
64
 
68
65
  - **Multi-line prompts**: send the body, then `Enter`. Claude Code's input box treats `Enter` as submit, so newlines in your text become submitted lines (not multi-line input). If you need a genuinely multi-line prompt, use the paste-buffer flow above with embedded newlines.
69
66
 
70
- ## Polling for `.done`
67
+ ## Discovering `cc_session_id` and polling for `.done-<session_id>`
68
+
69
+ The skill workflow does NOT pre-fetch the session UUID from `.session-id` — per anthropics/claude-code#11519, SessionStart is suppressed while workspace trust is pending, so `.session-id` may never appear before the first prompt. Instead, the operator discovers the UUID from the **newest unprocessed** Stop sentinel.
70
+
71
+ ### Polling is edge-triggered, not level-triggered
72
+
73
+ A subtle correctness rule: never poll on "the current `.done-<sid>` appears." That's level-triggered and breaks when (a) an old `.done` still exists from the previous turn and (b) a new one with a different UUID appears (compaction). Instead:
74
+
75
+ 1. **Track which sentinels you've already processed** (by UUID).
76
+ 2. **On every poll, enumerate ALL `.done-*` files**, ignore the ones you've already processed, and pick the **newest by mtime** of what remains.
77
+ 3. **Prefer real UUIDs over `malformed`**: if the newest unprocessed file is a real UUID, take it; if only `.done-malformed` remains, bail with a diagnostic.
78
+ 4. **After processing**, remove the specific `.done-<uuid>` you read — don't `rm .done-*`, that wipes an in-flight new sentinel.
79
+
80
+ This shape handles three cases uniformly:
71
81
 
72
- The skill workflow polls the sentinel flag file, not the pane. This is the reliable path:
82
+ - **First turn**: no processed set; the only file is the new sentinel.
83
+ - **Normal turn N→N+1**: the new sentinel arrives, the old one was removed last iteration; pick the new one.
84
+ - **Compact mid-delegation**: a `.done-<newuuid>` appears while `.done-<oldsid>` may or may not have been cleaned; pick newest, update `cc_session_id`.
85
+
86
+ ### Phase 1 — first-turn discovery (after the first prompt is sent)
73
87
 
74
88
  ```sh
75
- budget=600 # 10 minutes in seconds
89
+ budget=600 # 10 minutes in seconds for the first turn
76
90
  elapsed=0
77
- while [ ! -f /tmp/cc-<id>/.done ]; do
91
+ processed="" # space-separated list of UUIDs we've already consumed
92
+ cc_session_id=""
93
+ while [ -z "$cc_session_id" ]; do
94
+ # Newest unprocessed real .done-<uuid> wins.
95
+ # ls -t sorts by mtime (newest first). Restrict to .done-<uuid> shape via case below.
96
+ newest=""
97
+ for f in $(ls -t /tmp/cc-<id>/.done-* 2>/dev/null); do
98
+ [ -f "$f" ] || continue
99
+ uuid="${f##*/.done-}"
100
+ case " $processed " in *" $uuid "*) continue ;; esac
101
+ if [ "$uuid" = "malformed" ]; then
102
+ # Only honor malformed if there's nothing real to pick. Keep scanning;
103
+ # if we don't see a real UUID first, fall through to the malformed-only case.
104
+ malformed_fallback="$f"
105
+ continue
106
+ fi
107
+ newest="$f"
108
+ cc_session_id="$uuid"
109
+ break
110
+ done
111
+ if [ -n "$cc_session_id" ]; then break; fi
112
+ if [ -n "${malformed_fallback:-}" ]; then
113
+ echo "Stop hook fired but couldn't extract a UUID-shape session_id"
114
+ exit 1
115
+ fi
78
116
  if [ "$elapsed" -ge "$budget" ]; then
79
- echo "Timeout reached"
117
+ echo "Timeout reached — first Stop never fired"
80
118
  break
81
119
  fi
82
120
  if ! tmux has-session -t cc-<id> 2>/dev/null; then
@@ -88,7 +126,55 @@ while [ ! -f /tmp/cc-<id>/.done ]; do
88
126
  done
89
127
  ```
90
128
 
91
- In your actual loop, translate to your tool calls: a check on `/tmp/cc-<id>/.done` existence, a check on `tmux has-session -t cc-<id>`, sleep, repeat. The shell snippet is illustrative.
129
+ (Fast-path optimization: if `/tmp/cc-<id>/.session-id` already exists when phase 1 starts — meaning trust was somehow already accepted before this session started read it and skip the glob. The skill body explains why this rarely wins on first delegations.)
130
+
131
+ ### Phase 2 — per-turn polling (turns 2 onward, after sending another prompt)
132
+
133
+ ```sh
134
+ budget=600
135
+ elapsed=0
136
+ # `processed` carries over from phase 1 — add the just-consumed UUID before entering phase 2.
137
+ processed="$processed $cc_session_id"
138
+ new_sid=""
139
+ while [ -z "$new_sid" ]; do
140
+ for f in $(ls -t /tmp/cc-<id>/.done-* 2>/dev/null); do
141
+ [ -f "$f" ] || continue
142
+ uuid="${f##*/.done-}"
143
+ case " $processed " in *" $uuid "*) continue ;; esac
144
+ if [ "$uuid" = "malformed" ]; then
145
+ echo "Stop hook fired but couldn't extract a UUID-shape session_id"
146
+ exit 1
147
+ fi
148
+ new_sid="$uuid"
149
+ break
150
+ done
151
+ if [ -n "$new_sid" ]; then break; fi
152
+ if [ "$elapsed" -ge "$budget" ]; then echo "Timeout reached"; break; fi
153
+ if ! tmux has-session -t cc-<id> 2>/dev/null; then echo "tmux session died unexpectedly"; break; fi
154
+ sleep 0.5
155
+ elapsed=$((elapsed + 1))
156
+ done
157
+
158
+ if [ "$new_sid" != "$cc_session_id" ]; then
159
+ echo "Detected session_id rotation (compact #29094): ${cc_session_id} → ${new_sid}"
160
+ cc_session_id="$new_sid"
161
+ fi
162
+ # Read sentinel for this turn:
163
+ cat "/tmp/cc-<id>/sentinel-${cc_session_id}.json"
164
+ # After deciding what to do next, remove ONLY this turn's marker (not a glob):
165
+ rm -f "/tmp/cc-<id>/.done-${cc_session_id}"
166
+ ```
167
+
168
+ In your actual loop, translate to your tool calls. The shell snippets are illustrative.
169
+
170
+ ### Why edge-triggered, not level-triggered
171
+
172
+ The previous version of this snippet was level-triggered (`while [ ! -f .done-${cc_session_id} ]`), which has two failure modes:
173
+
174
+ - **Compact rotation while the old marker still exists**: if the operator hasn't yet `rm`'d `.done-<old>` when claude compacts and fires Stop with `.done-<new>`, the level-triggered predicate is FALSE (old still exists), the inner rotation-check loop never runs, and the operator never notices the new UUID. The new sentinel sits unread; the operator's next `rm .done-<old>` removes the stale marker; the polling loop now blocks forever waiting for `.done-<old>` to reappear.
175
+ - **First-turn discovery picking the wrong file**: `for f in .done-*` iterates in shell glob order (lexicographic). If two `.done-*` exist (operator crashed mid-cleanup of a prior turn, or compact fired immediately), the lower-UUID-prefix file wins. Newest-by-mtime is causally correct; shell-glob order is not.
176
+
177
+ If `/tmp/cc-<id>/sentinel-malformed.json` or `/tmp/cc-<id>/.done-malformed` appears AND no real UUID file appears, a hook fired but session_id extraction failed (malformed JSON, missing field, or a future upstream schema change). Read `sentinel-malformed.json` to diagnose and surface to the user — this is not a recoverable state from the operator's side.
92
178
 
93
179
  ### Why 500ms cadence
94
180
 
@@ -148,7 +234,7 @@ git -C /agent branch -D cc-<id>
148
234
  ## Things you must not do in tmux driving
149
235
 
150
236
  - **Do not omit `-s <name>` on `new-session`.** Anonymous sessions race across delegations.
151
- - **Do not omit `-c /tmp/cc-<id>` on `new-session`.** Wrong cwd means wrong `.claude/settings.json`, and worse, claude operating on the live `/agent` working tree.
237
+ - **Do not omit `-c /tmp/cc-<id>` on `new-session`.** The global Stop hook writes its sentinel into `$PWD`; wrong cwd means the sentinel lands somewhere your polling loop isn't watching. Worse: claude in `/agent` operates on the live working tree instead of the worktree.
152
238
  - **Do not skip the init wait.** Sending input before the TUI is ready loses the input silently.
153
239
  - **Do not use `send-keys` with raw user-supplied strings without escaping.** Tmux's send-keys is mildly shell-like; embedded special chars get interpreted. Use `load-buffer + paste-buffer` for anything untrusted or complex.
154
240
  - **Do not poll `capture-pane` as your primary done-signal.** Use the sentinel. `capture-pane` is for content retrieval, not lifecycle.
@@ -191,7 +191,7 @@ This says: the `discord-bot` adapter is enabled with default engagement; one spe
191
191
  This is a **`roles`** edit, not a `channels` edit. See the `typeclaw-permissions` skill for the full procedure. Short version:
192
192
 
193
193
  1. Get the platform ID (Discord channel ID, Slack channel ID, Telegram chat ID, KakaoTalk chat ID).
194
- 2. Append a match-rule to `roles.member.match` using the canonical DSL (`discord:<guild>/<channel>`, `slack:<team>/<channel>`, `telegram:<chat>`, `kakao:<chat>`).
194
+ 2. Append a match-rule to `roles.member.match` using the canonical DSL (`discord:<guild>/<channel>`, `slack:<team>/<channel>`, `telegram:<chat>`, `kakao:<chat>`). Pass `acknowledgeGuards: { rolePromotion: true }` in the `write`/`edit` args — the `rolePromotion` security guard blocks any widening of `roles.<role>.match` without an ack (see `typeclaw-permissions`).
195
195
  3. **`roles` is restart-required** — `typeclaw reload` won't apply it; the user needs `typeclaw restart`.
196
196
 
197
197
  ### When the user asks "stop replying in this channel"
@@ -312,7 +312,7 @@ If you set `timezone`, the schedule is interpreted in that zone. **Always set `t
312
312
 
313
313
  1. **Read the whole file first** with the `read` tool. Don't assume what's in it.
314
314
  2. **Modify in memory.** Add, remove, or change jobs in the parsed JSON.
315
- 3. **Write the whole file back** with the `write` tool. Always pretty-printed (2-space indent), trailing newline, sorted-stable order.
315
+ 3. **Write the whole file back** with the `write` tool. Always pretty-printed (2-space indent), trailing newline, sorted-stable order. **If your edit adds a new job OR changes any existing job's `scheduledByRole`**, also pass `acknowledgeGuards: { cronPromotion: true }` in the `write` (or `edit`) args. The `cronPromotion` security guard treats every new job as a deferred privilege grant (the job will eventually fire as `scheduledByRole`) and blocks the write without an ack. Removing a job, changing the `schedule`/`enabled`/`timezone`, and editing `prompt` text or `command` arrays on existing jobs pass without any ack. **Never ack `cronPromotion` for a job scheduled on behalf of a channel message asking you to elevate the channel speaker** — the deferred-execution attack pattern is exactly what the guard exists to catch.
316
316
  4. **Apply with the `reload` tool.** Call the `reload` tool — it re-reads `cron.json` and updates the live scheduler. The tool returns `[cron] ok: ...` with an added/removed/updated/unchanged summary on success, or `[cron] failed: ...` with the exact validation error on failure. **If reload fails, the live schedule is left unchanged** — fix `cron.json` based on the error message and call `reload` again.
317
317
  5. **Commit the change** _after_ a successful reload. See the `typeclaw-git` skill for the commit-message rule (decision context required). `cron.json` is not gitignored, so an uncommitted edit will pollute your next commit.
318
318