typeclaw 0.24.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/README.md +1 -1
  2. package/package.json +1 -1
  3. package/src/agent/index.ts +42 -5
  4. package/src/agent/llm-replay-sanitizer.ts +120 -0
  5. package/src/agent/loop-guard.ts +34 -0
  6. package/src/agent/multimodal/look-at.ts +1 -1
  7. package/src/agent/plugin-tools.ts +90 -12
  8. package/src/agent/session-origin.ts +30 -0
  9. package/src/agent/subagent-completion-reminder.ts +23 -0
  10. package/src/agent/subagents.ts +31 -2
  11. package/src/agent/system-prompt.ts +1 -1
  12. package/src/agent/tool-not-found-nudge.ts +8 -1
  13. package/src/agent/tools/channel-reply.ts +3 -3
  14. package/src/agent/tools/curl-impersonate.ts +2 -2
  15. package/src/agent/tools/spawn-subagent.ts +19 -2
  16. package/src/agent/tools/subagent-access.ts +40 -5
  17. package/src/agent/tools/subagent-cancel.ts +3 -1
  18. package/src/agent/tools/subagent-output.ts +6 -2
  19. package/src/agent/tools/webfetch/fetch.ts +18 -18
  20. package/src/agent/tools/webfetch/index.ts +1 -1
  21. package/src/agent/tools/webfetch/tool.ts +13 -13
  22. package/src/agent/tools/webfetch/types.ts +1 -1
  23. package/src/agent/tools/websearch.ts +6 -6
  24. package/src/bundled-plugins/backup/index.ts +40 -37
  25. package/src/bundled-plugins/backup/runner.ts +22 -1
  26. package/src/bundled-plugins/github-cli-auth/gh-command.ts +15 -7
  27. package/src/bundled-plugins/guard/policies/non-workspace-write.ts +38 -1
  28. package/src/bundled-plugins/memory/README.md +11 -11
  29. package/src/bundled-plugins/memory/dreaming.ts +5 -0
  30. package/src/bundled-plugins/memory/search-tool.ts +98 -1
  31. package/src/bundled-plugins/operator/operator.ts +5 -1
  32. package/src/bundled-plugins/reviewer/reviewer.ts +18 -9
  33. package/src/bundled-plugins/reviewer/skills/code-review.ts +1 -1
  34. package/src/bundled-plugins/reviewer/skills/general.ts +1 -1
  35. package/src/bundled-plugins/scout/scout.ts +7 -7
  36. package/src/bundled-plugins/security/policies/private-surface-read.ts +2 -2
  37. package/src/bundled-plugins/security/policies/ssrf.ts +3 -3
  38. package/src/bundled-plugins/tool-result-cap/README.md +1 -1
  39. package/src/channels/adapters/github/inbound.ts +11 -0
  40. package/src/channels/adapters/github/webhook-register.ts +32 -27
  41. package/src/channels/router.ts +61 -23
  42. package/src/channels/schema.ts +2 -1
  43. package/src/channels/subagent-completion-bridge.ts +18 -18
  44. package/src/channels/types.ts +1 -1
  45. package/src/cli/inspect-controller.ts +130 -38
  46. package/src/container/start.ts +7 -1
  47. package/src/git/mutex.ts +22 -0
  48. package/src/git/reconcile-ignored.ts +214 -0
  49. package/src/hostd/daemon.ts +26 -1
  50. package/src/hostd/portbroker-manager.ts +7 -0
  51. package/src/init/dockerfile.ts +1 -1
  52. package/src/init/gitignore.ts +25 -16
  53. package/src/inspect/index.ts +31 -4
  54. package/src/inspect/loop.ts +16 -12
  55. package/src/plugin/define.ts +2 -2
  56. package/src/plugin/index.ts +2 -2
  57. package/src/portbroker/hostd-client.ts +36 -13
  58. package/src/run/index.ts +14 -0
  59. package/src/sandbox/build.ts +10 -0
  60. package/src/sandbox/index.ts +9 -1
  61. package/src/sandbox/policy.ts +12 -0
  62. package/src/sandbox/session-tmp.ts +43 -0
  63. package/src/sandbox/writable-zones.ts +103 -3
  64. package/src/server/command-runner.ts +1 -1
  65. package/src/server/index.ts +8 -0
  66. package/src/skills/typeclaw-channel-github/SKILL.md +37 -10
  67. package/src/skills/typeclaw-memory/SKILL.md +3 -1
  68. package/src/tui/format.ts +11 -11
@@ -0,0 +1,43 @@
1
+ import { mkdir } from 'node:fs/promises'
2
+ import { isAbsolute, join, relative, resolve } from 'node:path'
3
+
4
+ // Per-session scratch lives on the REAL container /tmp, namespaced by session id.
5
+ // It sits OUTSIDE the agent folder on purpose: the agent folder's `sessions/` is
6
+ // force-committed by typeclaw, and scratch must never be committed. The real
7
+ // /tmp is ephemeral (dies with the container) and already the natural home for
8
+ // throwaway files, so a per-session subdir of it gives `/tmp` semantics without
9
+ // either sharing the whole container /tmp into a sandboxed role or persisting
10
+ // anything into the project surface.
11
+ export const SESSION_TMP_ROOT = '/tmp/typeclaw-session'
12
+
13
+ export function sessionTmpDir(sessionId: string): string {
14
+ return join(SESSION_TMP_ROOT, sessionId)
15
+ }
16
+
17
+ export async function ensureSessionTmpDir(sessionId: string): Promise<string> {
18
+ const dir = sessionTmpDir(sessionId)
19
+ await mkdir(dir, { recursive: true, mode: 0o700 })
20
+ return dir
21
+ }
22
+
23
+ export function isUnderTmp(agentDir: string, rawPath: string): boolean {
24
+ const resolved = resolve(agentDir, rawPath)
25
+ return resolved === '/tmp' || isInside('/tmp', resolved)
26
+ }
27
+
28
+ // Maps a model-facing /tmp path to its per-session backing path. Returns
29
+ // undefined when the path is not under /tmp (caller leaves it untouched). The
30
+ // model keeps writing/reading `/tmp/foo`; only the on-disk target moves to
31
+ // `<SESSION_TMP_ROOT>/<sid>/foo`, which is the same dir bwrap binds over `/tmp`
32
+ // for the sandboxed bash that reads it back.
33
+ export function mapVirtualTmpPath(agentDir: string, sessionId: string, rawPath: string): string | undefined {
34
+ const resolved = resolve(agentDir, rawPath)
35
+ if (resolved !== '/tmp' && !isInside('/tmp', resolved)) return undefined
36
+ const rel = relative('/tmp', resolved)
37
+ return rel === '' ? sessionTmpDir(sessionId) : join(sessionTmpDir(sessionId), rel)
38
+ }
39
+
40
+ function isInside(parent: string, child: string): boolean {
41
+ const rel = relative(parent, child)
42
+ return rel !== '' && !rel.startsWith('..') && !isAbsolute(rel)
43
+ }
@@ -1,11 +1,16 @@
1
- import { lstat } from 'node:fs/promises'
2
- import path, { join } from 'node:path'
1
+ import { lstat, mkdir, readFile, writeFile } from 'node:fs/promises'
2
+ import path, { isAbsolute, join, resolve } from 'node:path'
3
3
 
4
4
  export type WritableZones = {
5
5
  dirs: string[]
6
6
  files: string[]
7
7
  }
8
8
 
9
+ export type ProtectedZones = {
10
+ dirs: string[]
11
+ files: string[]
12
+ }
13
+
9
14
  // SECURITY: a blanket RW bind is coarser than the write/edit guards, so this set
10
15
  // is deliberately NARROWER than the write/edit allowlist — only genuinely
11
16
  // free-write scratch zones. `.agents/skills` and `packages` are excluded: the
@@ -13,7 +18,25 @@ export type WritableZones = {
13
18
  // guard and the latter holds executable plugin code; bash must not get blanket
14
19
  // RW to either. Skill authoring and package writes go through the guarded
15
20
  // write/edit tool only.
16
- const WRITABLE_DIRS = ['workspace', 'public', 'mounts'] as const
21
+ // `.git` is writable so a member can `git add`/`git commit` their own edits.
22
+ // This is the AGENT'S OWN repo, not a shared/upstream one, so writing history
23
+ // is not a privilege boundary: a low-trust role staging a tracked path it
24
+ // cannot edit in the worktree (e.g. via `git update-index --cacheinfo` plumbing)
25
+ // only writes the agent's own history — content the backup runner already
26
+ // force-commits on idle regardless. So we deliberately do NOT try to confine
27
+ // commit *content* to the worktree write-allowlist; that boundary governs the
28
+ // working tree, not the object database.
29
+ //
30
+ // The one thing writable `.git` must NOT grant is code execution in the
31
+ // UNSANDBOXED runtime (backup/dreaming commit the same .git out of band): a
32
+ // planted `.git/hooks/*` or a `core.hooksPath` in `.git/config` would fire there
33
+ // as a higher-privilege process. resolveProtectedZones re-binds `.git/hooks` and
34
+ // `.git/config` read-only (after the writable .git bind, last-op-wins) to close
35
+ // exactly that escalation.
36
+ const WRITABLE_DIRS = ['workspace', 'public', 'mounts', '.git'] as const
37
+
38
+ const PROTECTED_GIT_DIRS = ['.git/hooks'] as const
39
+ const PROTECTED_GIT_FILES = ['.git/config'] as const
17
40
 
18
41
  // Bash may EDIT these when present; creating a MISSING root file goes through
19
42
  // write/edit (bwrap cannot RW-bind a non-existent source without pre-creating it).
@@ -43,6 +66,83 @@ export async function resolveWritableZones(agentDir: string): Promise<WritableZo
43
66
  return { dirs, files }
44
67
  }
45
68
 
69
+ // Read-only re-protections rendered on top of the writable .git bind. Unlike
70
+ // the writable resolvers, this MUST NOT drop absent entries: .git is writable,
71
+ // so a path absent at jail-build time would otherwise be CREATED by sandboxed
72
+ // bash (e.g. a planted .git/hooks/pre-commit) and then executed by the
73
+ // unsandboxed runtime git ops. So we ensure each protected path exists first,
74
+ // then always RO-bind it — a read-only bind of a real dir blocks creating
75
+ // children inside it (EROFS), and a read-only bind of config keeps its real
76
+ // content readable (commits need user.name/email) while blocking mutation.
77
+ //
78
+ // We also resolve the effective core.hooksPath from the real (about-to-be-RO)
79
+ // config: if it already points at a writable location (e.g. workspace/hooks),
80
+ // the .git/hooks RO-bind alone would not cover it, so that dir is protected too.
81
+ export async function resolveProtectedZones(agentDir: string): Promise<ProtectedZones> {
82
+ const dirs: string[] = []
83
+ for (const rel of PROTECTED_GIT_DIRS) {
84
+ dirs.push(await ensureProtectedDir(join(agentDir, rel)))
85
+ }
86
+ const files: string[] = []
87
+ for (const rel of PROTECTED_GIT_FILES) {
88
+ files.push(await ensureProtectedFile(join(agentDir, rel)))
89
+ }
90
+
91
+ const hooksPathDir = await resolveEffectiveHooksPath(agentDir)
92
+ if (hooksPathDir !== undefined && !dirs.includes(hooksPathDir)) {
93
+ dirs.push(await ensureProtectedDir(hooksPathDir))
94
+ }
95
+
96
+ return { dirs, files }
97
+ }
98
+
99
+ // Fail closed: a symlink at a protected path would make the RO bind follow it
100
+ // elsewhere, so reject it rather than silently protect the wrong target.
101
+ async function ensureProtectedDir(target: string): Promise<string> {
102
+ await mkdir(target, { recursive: true })
103
+ await assertNotSymlink(target)
104
+ return target
105
+ }
106
+
107
+ async function ensureProtectedFile(target: string): Promise<string> {
108
+ if (!(await isRealEntry(target, 'file'))) {
109
+ try {
110
+ await writeFile(target, '', { flag: 'wx' })
111
+ } catch {
112
+ // Lost a race (or it appeared); the symlink check below still guards it.
113
+ }
114
+ }
115
+ await assertNotSymlink(target)
116
+ return target
117
+ }
118
+
119
+ async function assertNotSymlink(target: string): Promise<void> {
120
+ const stats = await lstat(target)
121
+ if (stats.isSymbolicLink()) {
122
+ throw new Error(`sandbox: refusing to protect symlinked path ${target}`)
123
+ }
124
+ }
125
+
126
+ // Reads core.hooksPath straight from .git/config text (the file is about to be
127
+ // RO-bound, so its content is the trusted baseline). Returns the resolved
128
+ // absolute dir only when it lands inside agentDir — an outside path is not
129
+ // writable by the jail and a relative path resolves against the repo root, per
130
+ // gitconfig semantics.
131
+ async function resolveEffectiveHooksPath(agentDir: string): Promise<string | undefined> {
132
+ let text: string
133
+ try {
134
+ text = await readFile(join(agentDir, '.git', 'config'), 'utf8')
135
+ } catch {
136
+ return undefined
137
+ }
138
+ const match = text.match(/^\s*hooksPath\s*=\s*(.+?)\s*$/m)
139
+ if (match === null) return undefined
140
+ const raw = match[1]?.trim()
141
+ if (raw === undefined || raw.length === 0) return undefined
142
+ const resolved = isAbsolute(raw) ? resolve(raw) : resolve(agentDir, raw)
143
+ return isInside(agentDir, resolved) ? resolved : undefined
144
+ }
145
+
46
146
  // SECURITY: a writable RW bind renders AFTER the masks and last-op-wins, so an
47
147
  // RW bind on a masked path would re-expose the real (hidden) directory. Drop any
48
148
  // writable zone that is, or is nested under, a masked path so the confidentiality
@@ -389,7 +389,7 @@ export async function runPromptForCommand(args: {
389
389
  // Mirrors src/agent/multimodal/look-at.ts: spawn a session, prompt, capture
390
390
  // the final assistant text, dispose. Unlike look-at we want the FULL agent
391
391
  // toolset (no `tools: []` / `customTools: []` overrides) so the model can
392
- // call channel_send, websearch, etc. The system prompt is composed from
392
+ // call channel_send, web_search, etc. The system prompt is composed from
393
393
  // the agent folder's IDENTITY/SOUL/MEMORY files via the default resource
394
394
  // loader (no `systemPromptOverride`).
395
395
  const snapshot = args.runtime.get()
@@ -11,6 +11,7 @@ import {
11
11
  import { runPluginDoctorChecks, runPluginDoctorFix } from '@/agent/doctor'
12
12
  import type { LiveSessionRegistry } from '@/agent/live-sessions'
13
13
  import type { LiveSubagentRegistry } from '@/agent/live-subagents'
14
+ import { forgetSharedLoopGuardTool } from '@/agent/plugin-tools'
14
15
  import { detectProviderError } from '@/agent/provider-error'
15
16
  import { requestContainerRestart } from '@/agent/restart'
16
17
  import { consumeRestartHandoff, type RestartHandoff } from '@/agent/restart-handoff'
@@ -25,6 +26,7 @@ import {
25
26
  recordTurnStart,
26
27
  runIdleContinuation,
27
28
  } from '@/agent/todo/continuation-wiring'
29
+ import { SUBAGENT_OUTPUT_TOOL_NAME } from '@/agent/tools/subagent-output'
28
30
  import type { ChannelRouter } from '@/channels/router'
29
31
  import { aggregateCronList, type CronListEntry, loadCron } from '@/cron'
30
32
  import type { McpManager } from '@/mcp'
@@ -931,6 +933,12 @@ function routeSubagentCompletionReminder(state: SessionState, msg: StreamMessage
931
933
  if (parsed === null) return
932
934
  if (parsed.parentSessionId !== state.sessionFileId) return
933
935
 
936
+ // The reminder asks the agent to fetch this result now; clear the
937
+ // subagent_output window first so an earlier premature-polling streak can't
938
+ // hard-block that fetch. Reset before publish so the wakeup can't race stale
939
+ // guard state.
940
+ forgetSharedLoopGuardTool(state.sessionFileId, SUBAGENT_OUTPUT_TOOL_NAME)
941
+
934
942
  const idle = state.drainQueue.length === 0 && !state.draining
935
943
  const delivery = idle ? 'interrupt' : 'queue'
936
944
  const text = renderSubagentCompletionReminder(parsed)
@@ -14,9 +14,27 @@ GitHub renders normal Markdown in issues, PRs, discussions, and review comments.
14
14
 
15
15
  A successful `channel_reply` ends your turn by default — the runtime stops the model right after the reply lands. That is correct for a final answer, but it will **silently truncate** a turn that still has work to do. If you post a status line like "Reviewing now, I'll be back with findings" and then expect to keep working (fetch the diff, spawn the reviewer, post the review) in the **same** turn, you must call `channel_reply({ text: "…", continue: true })`. Without `continue: true`, the turn ends at that status reply and the review never runs. Reserve `continue: true` for genuine multi-step turns; the final reply that wraps up the turn omits it.
16
16
 
17
+ ## Inbound triage — do this first, every time
18
+
19
+ Before you pick an action, classify the inbound. Skipping this step is how a PR ends up with a "looks good" comment but no approval: the model pattern-matches on the prose ("they fixed it → resolve the thread") and never asks whether it owes the PR a formal review. Answer these in order; the **first** that matches decides your path. Do not skip ahead.
20
+
21
+ 1. **Is this a PR, and do I have an unresolved blocking obligation on it?** On any `pr:N` inbound, before anything else, check whether you owe this PR a verdict you have not yet landed. Check **both** signals below — checking only formal review state misses the very failure this gate exists to catch, because a prior block may never have become formal state:
22
+ - **Formal review state.** Run the step-1 re-review query in the PR review flow (`gh api --paginate --slurp /repos/owner/repo/pulls/<N>/reviews --jq '…'` filtered to `{CHANGES_REQUESTED, APPROVED}`). If your latest **blocking decision** is `CHANGES_REQUESTED`, you have a live sticky block.
23
+ - **Flat-comment blockers you authored.** A prior "request changes" may have been posted as a plain PR/issue comment instead of a formal review — in which case **no `CHANGES_REQUESTED` row exists** and the query above returns empty even though you blocked the PR in prose. So also scan your own recent comments (`gh api /repos/owner/repo/issues/<N>/comments --jq '[.[] | select(.user.login == "<your-login>")]'`) for one that requested changes / raised blockers and has not since been superseded by a formal review or a clear retraction. For routing, a blocking comment you wrote is as binding as a formal `CHANGES_REQUESTED`.
24
+
25
+ If **either** signal shows an unresolved blocker you raised, this inbound is a **re-review** — go to the **PR review flow** regardless of how it is phrased. An author commenting "fixed both issues" / "addressed your feedback" / "pushed a fix" is a re-review trigger, **not** a thread-resolve trigger. A re-review is closed by re-deciding the verdict and landing a **formal** review via `POST /pulls/<N>/reviews`: `APPROVE` clears a sticky `CHANGES_REQUESTED`; a comment or a flat reply clears neither a formal block nor a flat-comment blocker — it just strands the verdict again, which is the original bug.
26
+
27
+ 2. **Am I being asked to review (first-time)?** Explicit `review_requested` inbound, or a human asking in plain language ("review this", "take a look at #N"). → **PR review flow** (see "When you are being asked to review").
28
+
29
+ 3. **Is this a reply inside an inline review thread I authored** (`pr:N` with `thread` set, on a thread whose root comment is mine)? → verify the fix at head SHA and **resolve the thread** (see "Resolving review threads you authored"). `resolve_review_thread` only works when `thread` is set on the origin; if there is **no** `thread`, this branch does not apply — do not attempt it, fall through to the table below.
30
+
31
+ 4. **None of the above** → use the routing table below.
32
+
33
+ > The decisive question is **#1**. A blocking verdict you owe a PR is never discharged by a `channel_reply` or an `issue_comment` — neither carries review state, and neither clears a sticky `CHANGES_REQUESTED`. This applies to an **unresolved blocking obligation** (a live `CHANGES_REQUESTED`, or an unretracted blocker you raised in a flat comment), not to a stale `APPROVED` or a past non-blocking comment — those impose no closeout duty. When you do owe a block, the close-out is always a formal review via `POST /pulls/<N>/reviews`.
34
+
17
35
  ## What to do, by inbound type
18
36
 
19
- Every GitHub inbound lands on a `chat` keyed by its subject: `issue:N`, `pr:N`, or `discussion:N`. Pick your action from the kind of thing that arrived. The default action for anything addressed to you is a normal `channel_reply` in that thread; the **PR review flow** below is the one exception that requires delegation.
37
+ Every GitHub inbound lands on a `chat` keyed by its subject: `issue:N`, `pr:N`, or `discussion:N`. **Run the triage above first.** Only if no triage branch matched do you pick an action from this table. The default action for anything addressed to you is a normal `channel_reply` in that thread; the **PR review flow** is the exception that requires delegation.
20
38
 
21
39
  | Inbound | Looks like | What to do |
22
40
  | -------------------------------------------------------- | ------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------- |
@@ -53,21 +71,24 @@ The `reviewer` subagent is the analyst; you are the integration layer between it
53
71
  gh pr view <N> --repo owner/repo --json title,body,baseRefName,headRefOid,files
54
72
  ```
55
73
 
56
- Then check for a **prior review by you** — this is what makes the current request a _re-review_ (the author pushed fixes and re-requested you after you previously blocked the PR):
74
+ Then check for an **unresolved blocking obligation of yours** — this is what makes the current request a _re-review_ (the author pushed fixes after you previously blocked the PR). As in triage #1, a block can live in **two** places, and you must check both:
57
75
 
58
76
  ```sh
77
+ # (a) formal review state
59
78
  gh api --paginate --slurp /repos/owner/repo/pulls/<N>/reviews --jq 'add | [.[] | select(.user.login == "<your-login>" and (.state == "CHANGES_REQUESTED" or .state == "APPROVED"))] | last | .state'
79
+ # (b) flat-comment blocker you authored (when (a) is empty)
80
+ gh api --paginate /repos/owner/repo/issues/<N>/comments --jq '[.[] | select(.user.login == "<your-login>")]'
60
81
  ```
61
82
 
62
- If that prints `CHANGES_REQUESTED`, treat the current request as a **re-review** and carry that fact into the spawn in step 2; any other output (including empty) means no live block, so handle the request normally. (`<your-login>` is your GitHub App login, typically `name[bot]`.)
83
+ If (a) prints `CHANGES_REQUESTED`, **or** (a) is empty but (b) surfaces a comment of yours that requested changes / raised blockers and has not since been superseded by a formal review or a clear retraction, treat the current request as a **re-review** and carry that fact — including which form the prior block took — into the spawn in step 2. Only when **neither** signal shows an unresolved block do you handle the request normally. (`<your-login>` is your GitHub App login, typically `name[bot]`.)
63
84
 
64
- Two things make this query load-bearing — both are bugs if you simplify it:
85
+ Two things make the formal-review query load-bearing — both are bugs if you simplify it:
65
86
  - **Filter to _decision_ states, not the latest review row.** GitHub's sticky block is cleared only by a later `APPROVED` (or a dismissal) from the same reviewer — a later `COMMENTED` review does **not** clear it. So a history of `CHANGES_REQUESTED` → `COMMENTED` is _still blocked_, even though the latest row is `COMMENTED`. Selecting `last` over the raw review list would misread that as "not a re-review". Filtering to `{CHANGES_REQUESTED, APPROVED}` first, then taking `last`, asks the right question: "what is my latest _blocking decision_, ignoring non-deciding comments?" (Dismissed reviews surface as `state: "DISMISSED"`, so they're correctly excluded from the decision set too.)
66
87
  - **`--paginate --slurp` is mandatory.** GitHub returns reviews 30 per page; a bot on a long-lived PR can have its blocking `CHANGES_REQUESTED` past the first page. Without paginating, that review is invisible and a genuine re-review silently falls back to the plain-comment path. `--slurp` collects every page into one array of arrays; the `add` concatenates them before filtering.
67
88
 
68
89
  2. **Spawn the `reviewer` subagent with the PR target.** Use `run_in_background: true` so you stay responsive while the deep model works. Pass the PR URL (or `owner/repo#N`) plus any context the requester gave you (focus areas, specific files, etc.). The reviewer fetches the diff itself (`gh pr diff`, `gh api /repos/.../pulls/<n>`), loads the `code-review` skill, and returns a `<review>` block whose code findings carry `location="path:line"`.
69
90
 
70
- **If step 1 found a prior `CHANGES_REQUESTED` review, say so in the spawn payload** — e.g. _"This is a re-review: you previously requested changes on this PR (the prior blockers were ). Verify they are resolved and return `approve` or `request-changes` — a re-review must re-decide the blocking state, not return `comment`."_ The reviewer's `code-review` skill enforces the same rule, but telling it the prior verdict is what lets it apply that rule; a fresh reviewer session has no memory of your earlier review.
91
+ **If step 1 found an unresolved blocking obligation — a formal `CHANGES_REQUESTED` _or_ an unretracted flat-comment blocker — say so in the spawn payload** — e.g. _"This is a re-review: you previously blocked this PR (the prior blockers were …; the block was a formal `CHANGES_REQUESTED` / a flat PR comment). Verify they are resolved and return `approve` or `request-changes` — a re-review must re-decide the blocking state, not return `comment`."_ The reviewer's `code-review` skill enforces the same rule, but telling it the prior blockers (and which form they took) is what lets it apply that rule; a fresh reviewer session has no memory of your earlier block. The flat-comment case especially must be passed through — the reviewer cannot recover it from review state, so omitting it would silently drop the re-review context the moment the flow starts.
71
92
 
72
93
  Do **not** post an "on it" acknowledgement comment before spawning the reviewer — the runtime already adds an :eyes: reaction to the PR the moment it engages, so a "looking into this" comment is redundant noise. Just spawn the reviewer with `run_in_background: true`; the formal review is your reply. If you want to acknowledge explicitly, use `channel_react({ emoji: "eyes" })`, which reacts without posting a comment.
73
94
 
@@ -107,10 +128,14 @@ The `reviewer` subagent is the analyst; you are the integration layer between it
107
128
 
108
129
  **Operator approval policy.** If the inbound carries a note that PR approval is disabled (`channels.github.review.approve: false` — the adapter appends "Operator policy: PR approval is disabled for this agent" to the message), you must **not** submit an `APPROVE`. Map an `approve` verdict to `COMMENT` instead: post the same `<summary>` and all inline `comments[]` as a `COMMENT` review, just without the formal approval. `request-changes` and `comment` verdicts are unaffected (they never approve). Absent that note, approval is enabled and the table above applies unchanged.
109
130
 
110
- **Re-review.** If step 1 established this is a re-review (your latest blocking decision was `CHANGES_REQUESTED`), the result MUST clear or re-assert that block — never a top-level PR comment. On GitHub, `CHANGES_REQUESTED` is sticky: **only** a fresh `APPROVE` from you, or a dismissal of your prior review, clears it. A plain issue comment does **not** clear it, and — critically — **neither does a `COMMENT` review.** So even if the reviewer returns zero actionable findings, do **not** take the `comment` → top-level-comment branch below for a re-review. The reviewer's skill is instructed not to return `comment` on a re-review; if it does anyway despite a reachable diff, prefer `approve` when the prior blockers are visibly resolved in the diff, otherwise `request-changes` — and say which in your reasoning. Resolve the re-review by verdict:
131
+ **Re-review.** If step 1 established this is a re-review (an unresolved blocking obligation of yours — a formal `CHANGES_REQUESTED` **or** an unretracted flat-comment blocker), the result MUST clear or re-assert that block — never a top-level PR comment. The clearing mechanics depend on which form the prior block took:
132
+ - **Prior block was a formal `CHANGES_REQUESTED`.** It is sticky: **only** a fresh `APPROVE` from you, or a dismissal of your prior review, clears it. A plain issue comment does **not** clear it, and — critically — **neither does a `COMMENT` review.**
133
+ - **Prior block was a flat comment** (no formal `CHANGES_REQUESTED` exists). There is no sticky GitHub state to clear, but the obligation is still yours to discharge as a **formal** review so the verdict finally lands as review state: submit `APPROVE` (resolved, approval enabled) or `REQUEST_CHANGES` (not resolved). Do not discharge a flat-comment block with another flat comment — that re-strands the verdict, the original bug.
134
+
135
+ So even if the reviewer returns zero actionable findings, do **not** take the `comment` → top-level-comment branch below for a re-review. The reviewer's skill is instructed not to return `comment` on a re-review; if it does anyway despite a reachable diff, prefer `approve` when the prior blockers are visibly resolved in the diff, otherwise `request-changes` — and say which in your reasoning. Resolve the re-review by verdict:
111
136
  - **`request-changes`** — submit a fresh `REQUEST_CHANGES` review (re-asserts the block with the new findings). Straightforward.
112
137
  - **`approve`, approval enabled** — submit `APPROVE`. This clears the block.
113
- - **`approve`, approval disabled (`channels.github.review.approve: false`)** — you cannot `APPROVE`, and a `COMMENT` review will **not** clear the sticky block, so the PR would stay blocked by your stale review. Clear it explicitly by **dismissing your own prior `CHANGES_REQUESTED` review**. Grab that review's `id` by re-running the step-1 query with the trailing filter changed from `| .state` to `| {state, id}` (same `select`), take the entry whose `state` is `CHANGES_REQUESTED`, then:
138
+ - **`approve`, approval disabled (`channels.github.review.approve: false`)** — you cannot `APPROVE`. How you close out depends on the prior block's form. **If the prior block was a flat comment** (no formal `CHANGES_REQUESTED`), there is no sticky state to clear: submit a `COMMENT` review carrying the `<summary>` so the verdict lands as review state, and you are done — nothing to dismiss. **If the prior block was a formal `CHANGES_REQUESTED`**, a `COMMENT` review will **not** clear the sticky block, so the PR would stay blocked by your stale review; clear it explicitly by **dismissing your own prior `CHANGES_REQUESTED` review**. Grab that review's `id` by re-running the step-1 formal-review query with the trailing filter changed from `| .state` to `| {state, id}` (same `select`), take the entry whose `state` is `CHANGES_REQUESTED`, then:
114
139
 
115
140
  ```sh
116
141
  gh api -X PUT /repos/owner/repo/pulls/<N>/reviews/<review_id>/dismissals -f message="Blockers resolved; dismissing my prior changes request per operator approval-disabled policy." -f event=DISMISS
@@ -120,7 +145,7 @@ The `reviewer` subagent is the analyst; you are the integration layer between it
120
145
 
121
146
  Then submit the review. **Write the JSON payload to a file with the `write` tool, then run a single bare `gh api --input <file>`** — two steps:
122
147
 
123
- First write `/tmp/review.json` (via the `write` tool, not bash):
148
+ First write `/tmp/review-<N>.json` (via the `write` tool, not bash) — `/tmp` is per-session scratch, and the `<N>` keeps concurrent reviews in one session from colliding:
124
149
 
125
150
  ```json
126
151
  {
@@ -141,7 +166,7 @@ The `reviewer` subagent is the analyst; you are the integration layer between it
141
166
  Then post it:
142
167
 
143
168
  ```sh
144
- gh api -X POST /repos/owner/repo/pulls/<N>/reviews --input /tmp/review.json
169
+ gh api -X POST /repos/owner/repo/pulls/<N>/reviews --input /tmp/review-<N>.json
145
170
  ```
146
171
 
147
172
  **A repo-targeting `gh` command must be a single bare `gh` invocation — no pipes, `;`, `&&`, heredocs, or command substitution.** The `github-cli-auth` plugin injects the GitHub App token into the command's environment, so any sibling/upstream stage in a pipeline would inherit a live token; the runtime blocks those shapes. That is why the old `cat <<'JSON' | gh api --input -` heredoc-pipe no longer works: write the JSON to a file and feed it with `--input <file>` instead. Do **not** use `-f body=...` or `-F 'comments[][body]=...'`: those go through shell argument parsing, so backticks trigger command substitution. The file passes the JSON through untouched — backticks, newlines, and `${...}` all survive verbatim. The same file-then-`--input` pattern applies to any `gh api` POST whose body contains backticks, embedded newlines, or shell metacharacters.
@@ -165,7 +190,7 @@ The `reviewer` subagent is the analyst; you are the integration layer between it
165
190
  A finding is "actionable" if its severity is `blocker`, `concern`, or `nit`. The inline-review post in step 4 applies whenever the actionable count is **at least one**. When the reviewer returns **exactly zero** actionable findings (only `praise`, or none), there is nothing to anchor inline — handle by verdict:
166
191
 
167
192
  - `approve` → post a plain `APPROVE` with the `<summary>` as the review body (no `comments[]` array). **If the operator approval policy above disabled approval, submit a `COMMENT` review instead — same `<summary>` as the review body, `event: "COMMENT"`, no `comments[]` array. Keep it a formal review, not a top-level issue comment, so the review metadata and flow are preserved.** (Re-review caveat: a `COMMENT` review does **not** clear a sticky `CHANGES_REQUESTED` block. If this is a re-review under approval-disabled policy, follow the step-4 re-review branch — dismiss your prior review — instead of relying on this `COMMENT`.)
168
- - `comment` → post the summary as a top-level PR comment via `gh api -X POST /repos/.../issues/<N>/comments` instead of submitting an empty review. **Exception — re-reviews:** if this is a re-review (your latest blocking decision was `CHANGES_REQUESTED`), a top-level comment does not clear the sticky block. Do not use this branch; resolve it via the step-4 re-review branch (`APPROVE` if resolved and approval is enabled, the dismissal endpoint if resolved but approval is disabled, `REQUEST_CHANGES` if not resolved).
193
+ - `comment` → post the summary as a top-level PR comment via `gh api -X POST /repos/.../issues/<N>/comments` instead of submitting an empty review. **Exception — re-reviews:** if this is a re-review (you have an unresolved blocking obligation a formal `CHANGES_REQUESTED` **or** an unretracted flat-comment blocker), a top-level comment discharges neither. Do not use this branch; resolve it via the step-4 re-review branch (`APPROVE` if resolved and approval is enabled, the dismissal endpoint if a formal block is resolved but approval is disabled, `REQUEST_CHANGES` if not resolved).
169
194
  - `request-changes` → submit `REQUEST_CHANGES` with the `<summary>` as the review body and no `comments[]` array. This combination is rare (the reviewer's contract says `request-changes` requires at least one blocker or load-bearing concern); if it happens, faithfully encode the verdict and trust the reviewer's reasoning is in the summary.
170
195
 
171
196
  The bundled `agent-browser` is **not** for PR reviews — `gh api` is faster and more reliable. Only use the browser when the API genuinely can't reach what you need.
@@ -186,6 +211,8 @@ Do not resolve on a bare "done" claim. A reply that says "fixed" is a prompt to
186
211
 
187
212
  If the author merely **replied** without pushing (e.g. "this is intentional because …") and their reasoning settles it, that is also "addressed". If their reasoning does **not** settle it, keep the thread open and answer instead.
188
213
 
214
+ > **The verify and the resolve are one action, not two.** Once you've verified the fix, your acknowledgement reply **is** the close-out — carry `resolve_review_thread: true` on it. The common failure is posting a bare "Verified at \<sha\> — thanks, that addresses it" with the flag omitted: that reads as closed but leaves the thread **open**, because a successful reply ends your turn and the resolve can't happen in a later one. The flag is technically optional (nothing rejects a reply without it), but on an acknowledgement it is the only thing that actually closes the thread — so treat it as part of the acknowledgement, not an afterthought.
215
+
189
216
  ### How to resolve — `channel_reply({ resolve_review_thread: true })`
190
217
 
191
218
  Once you have verified the fix, **acknowledge and resolve in one call**: pass `resolve_review_thread: true` to your `channel_reply`. The runtime resolves the thread you're replying in **before** it posts your acknowledgement, then posts the reply:
@@ -25,12 +25,14 @@ Citations in shard bodies use the canonical form `streams/yyyy-MM-dd#<fragment-i
25
25
 
26
26
  When index-mode injection hides bodies, or when you need recent fragments the dreaming subagent hasn't consolidated yet, use `memory_search({query, asRegex?, full?, maxResults?})`. It searches BOTH topic shards under `memory/topics/` and undreamed stream events under `memory/streams/`. Substring (case-insensitive) by default; `asRegex: true` for regex.
27
27
 
28
+ Plain queries are **phrase-first with a word fallback**: the whole query is tried as one substring, and only if that finds nothing is the query split on whitespace and the distinct words OR-matched (ranked by how many words each hit contains). So a descriptive multi-word query like `quarterly regional revenue summary` still returns results even when no entry contains that exact phrase. You don't need to pre-split queries into single keywords — but a focused phrase still wins when an entry contains it verbatim. Regex queries never fall back (whitespace stays part of the pattern).
29
+
28
30
  Results are discriminated by `source`:
29
31
 
30
32
  - `source: "topic"` — fields `shardPath`, `slug`, `heading`, `excerpt`, `fullBody?`
31
33
  - `source: "stream"` — fields `streamPath`, `date`, `eventId?` (citation-format `streams/yyyy-MM-dd#<id>` for fragments; absent for legacy prose), `topic`, `excerpt`, `fullBody?`
32
34
 
33
- Topic matches come first (alphabetical by slug); then stream matches (newest day first). `full: true` returns the entire shard or fragment body. `maxResults` truncates streams before topics when exhausted.
35
+ Ordering depends on mode. Exact-phrase (and regex) results list all topic matches first (alphabetical by slug), then stream matches (newest day first), and `maxResults` truncates streams before topics. Word-fallback results are instead ranked by matched-word count — that same topic-first/stream-newest order is only the tiebreak within a score band, so a higher-scoring stream can precede a lower-scoring topic, and `maxResults` drops the lowest-scored tail regardless of source. `full: true` returns the entire shard or fragment body.
34
36
 
35
37
  ## Per-shard truncation
36
38
 
package/src/tui/format.ts CHANGED
@@ -63,10 +63,10 @@ function humanizeArgs(name: string, args: unknown): string | null {
63
63
  return humanizeFindArgs(args)
64
64
  case 'ls':
65
65
  return humanizeLsArgs(args)
66
- case 'websearch':
67
- return humanizeWebsearchArgs(args)
68
- case 'webfetch':
69
- return humanizeWebfetchArgs(args)
66
+ case 'web_search':
67
+ return humanizeWebSearchArgs(args)
68
+ case 'web_fetch':
69
+ return humanizeWebFetchArgs(args)
70
70
  default:
71
71
  return null
72
72
  }
@@ -123,14 +123,14 @@ function humanizeLsArgs(args: ArgRecord): string | null {
123
123
  return asString(args.path) ?? '.'
124
124
  }
125
125
 
126
- function humanizeWebsearchArgs(args: ArgRecord): string | null {
126
+ function humanizeWebSearchArgs(args: ArgRecord): string | null {
127
127
  const query = asString(args.query)
128
128
  if (query === null) return null
129
129
  const source = asString(args.source)
130
130
  return source && source !== 'web' ? `"${query}" (${source})` : `"${query}"`
131
131
  }
132
132
 
133
- function humanizeWebfetchArgs(args: ArgRecord): string | null {
133
+ function humanizeWebFetchArgs(args: ArgRecord): string | null {
134
134
  return asString(args.url)
135
135
  }
136
136
 
@@ -153,8 +153,8 @@ function enrichResult(name: string, result: ArgRecord): string | null {
153
153
  return enrichBashResult(result)
154
154
  case 'read':
155
155
  return enrichReadResult(result)
156
- case 'websearch':
157
- return enrichWebsearchResult(result)
156
+ case 'web_search':
157
+ return enrichWebSearchResult(result)
158
158
  default:
159
159
  return null
160
160
  }
@@ -187,7 +187,7 @@ function enrichReadResult(result: ArgRecord): string | null {
187
187
  return mime ? `[image: ${mime}]` : '[image]'
188
188
  }
189
189
 
190
- function enrichWebsearchResult(result: ArgRecord): string | null {
190
+ function enrichWebSearchResult(result: ArgRecord): string | null {
191
191
  const details = isObject(result.details) ? result.details : null
192
192
  if (details === null) return null
193
193
  const results = Array.isArray(details.results) ? details.results : null
@@ -198,13 +198,13 @@ function enrichWebsearchResult(result: ArgRecord): string | null {
198
198
  const source = asString(details.source) ?? ''
199
199
  const header = query ? `${results.length} result${results.length === 1 ? '' : 's'} for "${query}" (${source})` : null
200
200
  const lines = results
201
- .map((entry, i) => formatWebsearchEntry(entry, i + 1))
201
+ .map((entry, i) => formatWebSearchEntry(entry, i + 1))
202
202
  .filter((line): line is string => line !== null)
203
203
  if (lines.length === 0) return extractContentText(result)
204
204
  return header === null ? lines.join('\n') : `${header}\n${lines.join('\n')}`
205
205
  }
206
206
 
207
- function formatWebsearchEntry(entry: unknown, index: number): string | null {
207
+ function formatWebSearchEntry(entry: unknown, index: number): string | null {
208
208
  if (!isObject(entry)) return null
209
209
  const title = asString(entry.title)
210
210
  const url = asString(entry.url)