typeclaw 0.23.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/README.md +1 -1
  2. package/package.json +1 -1
  3. package/src/agent/index.ts +133 -27
  4. package/src/agent/llm-replay-sanitizer.ts +120 -0
  5. package/src/agent/loop-guard.ts +34 -0
  6. package/src/agent/multimodal/look-at.ts +1 -1
  7. package/src/agent/plugin-tools.ts +122 -8
  8. package/src/agent/restart/index.ts +15 -3
  9. package/src/agent/restart-handoff/index.ts +110 -12
  10. package/src/agent/session-origin.ts +30 -0
  11. package/src/agent/subagent-completion-reminder.ts +26 -1
  12. package/src/agent/subagents.ts +75 -3
  13. package/src/agent/system-prompt.ts +5 -1
  14. package/src/agent/todo/continuation-policy.ts +242 -0
  15. package/src/agent/todo/continuation-state.ts +87 -0
  16. package/src/agent/todo/continuation-wiring.ts +113 -0
  17. package/src/agent/todo/continuation.ts +71 -0
  18. package/src/agent/todo/scope.ts +77 -0
  19. package/src/agent/todo/store.ts +98 -0
  20. package/src/agent/tool-not-found-nudge.ts +126 -0
  21. package/src/agent/tools/channel-reply.ts +51 -0
  22. package/src/agent/tools/curl-impersonate.ts +2 -2
  23. package/src/agent/tools/restart.ts +11 -4
  24. package/src/agent/tools/spawn-subagent.ts +19 -2
  25. package/src/agent/tools/subagent-access.ts +40 -5
  26. package/src/agent/tools/subagent-cancel.ts +3 -1
  27. package/src/agent/tools/subagent-output.ts +6 -2
  28. package/src/agent/tools/todo/index.ts +119 -0
  29. package/src/agent/tools/webfetch/fetch.ts +18 -18
  30. package/src/agent/tools/webfetch/index.ts +1 -1
  31. package/src/agent/tools/webfetch/tool.ts +13 -13
  32. package/src/agent/tools/webfetch/types.ts +1 -1
  33. package/src/agent/tools/websearch.ts +6 -6
  34. package/src/bundled-plugins/backup/index.ts +40 -37
  35. package/src/bundled-plugins/backup/runner.ts +23 -2
  36. package/src/bundled-plugins/github-cli-auth/gh-command.ts +15 -7
  37. package/src/bundled-plugins/guard/policies/non-workspace-write.ts +38 -1
  38. package/src/bundled-plugins/memory/README.md +11 -11
  39. package/src/bundled-plugins/memory/dreaming.ts +5 -0
  40. package/src/bundled-plugins/memory/search-tool.ts +98 -1
  41. package/src/bundled-plugins/operator/operator.ts +5 -1
  42. package/src/bundled-plugins/reviewer/reviewer.ts +32 -9
  43. package/src/bundled-plugins/reviewer/skills/code-review.ts +1 -1
  44. package/src/bundled-plugins/reviewer/skills/general.ts +1 -1
  45. package/src/bundled-plugins/scout/scout.ts +7 -7
  46. package/src/bundled-plugins/security/policies/private-surface-read.ts +2 -2
  47. package/src/bundled-plugins/security/policies/ssrf.ts +3 -3
  48. package/src/bundled-plugins/tool-result-cap/README.md +1 -1
  49. package/src/channels/adapters/discord-bot-reference.ts +78 -0
  50. package/src/channels/adapters/discord-bot.ts +25 -3
  51. package/src/channels/adapters/github/inbound.ts +172 -10
  52. package/src/channels/adapters/github/index.ts +10 -0
  53. package/src/channels/adapters/github/review-thread-resolver.ts +246 -0
  54. package/src/channels/adapters/github/webhook-register.ts +32 -27
  55. package/src/channels/adapters/kakaotalk-classify.ts +67 -6
  56. package/src/channels/adapters/slack-bot-classify.ts +9 -1
  57. package/src/channels/adapters/slack-bot-reference.ts +129 -0
  58. package/src/channels/adapters/slack-bot.ts +67 -8
  59. package/src/channels/manager.ts +8 -2
  60. package/src/channels/router.ts +506 -45
  61. package/src/channels/schema.ts +21 -4
  62. package/src/channels/subagent-completion-bridge.ts +18 -18
  63. package/src/channels/types.ts +69 -1
  64. package/src/cli/inspect-controller.ts +132 -33
  65. package/src/cli/inspect.ts +2 -1
  66. package/src/commands/index.ts +9 -0
  67. package/src/container/start.ts +7 -1
  68. package/src/git/mutex.ts +22 -0
  69. package/src/git/reconcile-ignored.ts +214 -0
  70. package/src/hostd/daemon.ts +26 -1
  71. package/src/hostd/portbroker-manager.ts +7 -0
  72. package/src/init/dockerfile.ts +1 -1
  73. package/src/init/gitignore.ts +28 -16
  74. package/src/inspect/index.ts +53 -4
  75. package/src/inspect/loop.ts +16 -12
  76. package/src/plugin/define.ts +2 -2
  77. package/src/plugin/index.ts +2 -2
  78. package/src/portbroker/hostd-client.ts +36 -13
  79. package/src/run/index.ts +74 -5
  80. package/src/sandbox/build.ts +20 -0
  81. package/src/sandbox/index.ts +10 -0
  82. package/src/sandbox/policy.ts +22 -0
  83. package/src/sandbox/session-tmp.ts +43 -0
  84. package/src/sandbox/writable-zones.ts +178 -0
  85. package/src/server/command-runner.ts +1 -1
  86. package/src/server/index.ts +126 -4
  87. package/src/skills/typeclaw-channel-github/SKILL.md +71 -17
  88. package/src/skills/typeclaw-memory/SKILL.md +3 -1
  89. package/src/tui/format.ts +11 -11
  90. package/typeclaw.schema.json +10 -0
@@ -30,17 +30,17 @@ All fields are **restart-required** — the plugin reads them once at boot.
30
30
 
31
31
  ## What it contributes
32
32
 
33
- | Kind | Name | Notes |
34
- | -------- | -------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
35
- | Subagent | `memory-logger` | Reads a parent transcript past a watermark and appends fragments to `memory/streams/<today>.jsonl`. Coalesced per `agentDir`. |
36
- | Subagent | `dreaming` | Reads shards under `memory/topics/` plus undreamed daily-stream events and rebalances the topic shards. Coalesced per `agentDir`. Citation-superset invariant enforced on every run. |
37
- | Subagent | `memory-retrieval` | On `session.turn.start` when injection plan is `index` mode, reads the user's actual prompt for this turn + shard listing, writes a focused summary to `memory/.retrieval-cache/<sessionId>.md`. Coalesced per `parentSessionId`. Declares `profile: 'fast'` (retrieval is "≤3 keyword searches + 1 write", no reasoning required) and `timeoutMs: 30_000` so a wedged provider call releases the coalescing key instead of poisoning the cache for every subsequent turn. |
38
- | Tool | `memory_search` | Main-agent tool. Substring/regex search across BOTH topic shards (slugs, frontmatter, bodies) and undreamed daily-stream events (fragment topic/body, legacy prose). Results are discriminated by `source: "topic" \| "stream"`; topics come first, then streams newest-first. |
39
- | Tool | `delete_topic_shard` | Subagent-only (dreaming). Deletes a topic shard at `memory/topics/<slug>.md`. Path-guarded. |
40
- | Cron | `__plugin_memory_dreaming` | `kind: 'prompt'`, `subagent: 'dreaming'`, scheduled per `memory.dreaming.schedule`. |
41
- | Hook | `session.idle` | Per-session debouncer with size-based ceiling. Spawns `memory-logger` on idle or buffer-trip. |
42
- | Hook | `session.end` | Spawns `memory-logger` immediately; also unlinks the retrieval-cache file for this session. |
43
- | Hook | `session.turn.start` | When `buildInjectionPlan` returns `mode: 'index'` and origin is not a subagent, spawns `memory-retrieval` (detached) with the turn's `userPrompt` so the cache reflects the user's current question, not the assembling system prompt. Fire-and-forget; failures route through the plugin logger. |
33
+ | Kind | Name | Notes |
34
+ | -------- | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
35
+ | Subagent | `memory-logger` | Reads a parent transcript past a watermark and appends fragments to `memory/streams/<today>.jsonl`. Coalesced per `agentDir`. |
36
+ | Subagent | `dreaming` | Reads shards under `memory/topics/` plus undreamed daily-stream events and rebalances the topic shards. Coalesced per `agentDir`. Citation-superset invariant enforced on every run. |
37
+ | Subagent | `memory-retrieval` | On `session.turn.start` when injection plan is `index` mode, reads the user's actual prompt for this turn + shard listing, writes a focused summary to `memory/.retrieval-cache/<sessionId>.md`. Coalesced per `parentSessionId`. Declares `profile: 'fast'` (retrieval is "≤3 keyword searches + 1 write", no reasoning required) and `timeoutMs: 30_000` so a wedged provider call releases the coalescing key instead of poisoning the cache for every subsequent turn. |
38
+ | Tool | `memory_search` | Main-agent tool. Substring/regex search across BOTH topic shards (slugs, frontmatter, bodies) and undreamed daily-stream events (fragment topic/body, legacy prose). Plain queries are phrase-first: the whole query is tried as one substring, and if that finds nothing the query is split on whitespace and the distinct words are OR-matched, ranked by how many words each hit contains (regex queries never fall back). Results are discriminated by `source: "topic" \| "stream"`; exact-phrase (and regex) results list topics first, then streams newest-first, while word-fallback results are ranked by matched-word count with that order as the tiebreak (so a higher-scoring stream can precede a lower-scoring topic). |
39
+ | Tool | `delete_topic_shard` | Subagent-only (dreaming). Deletes a topic shard at `memory/topics/<slug>.md`. Path-guarded. |
40
+ | Cron | `__plugin_memory_dreaming` | `kind: 'prompt'`, `subagent: 'dreaming'`, scheduled per `memory.dreaming.schedule`. |
41
+ | Hook | `session.idle` | Per-session debouncer with size-based ceiling. Spawns `memory-logger` on idle or buffer-trip. |
42
+ | Hook | `session.end` | Spawns `memory-logger` immediately; also unlinks the retrieval-cache file for this session. |
43
+ | Hook | `session.turn.start` | When `buildInjectionPlan` returns `mode: 'index'` and origin is not a subagent, spawns `memory-retrieval` (detached) with the turn's `userPrompt` so the cache reflects the user's current question, not the assembling system prompt. Fire-and-forget; failures route through the plugin logger. |
44
44
 
45
45
  ## Memory injection (two-tier, topic shards only)
46
46
 
@@ -4,6 +4,7 @@ import { join } from 'node:path'
4
4
 
5
5
  import { z } from 'zod'
6
6
 
7
+ import { withGitLock } from '@/git/mutex'
7
8
  import { defineTool, lsTool, readTool, type Subagent, writeTool } from '@/plugin'
8
9
  import { formatLocalDate, formatLocalDateTime } from '@/shared'
9
10
 
@@ -419,6 +420,10 @@ async function ensureMemoryFiles(agentDir: string): Promise<void> {
419
420
  // `git add` fails with "outside of your sparse-checkout definition" on a
420
421
  // skip-worktree path.
421
422
  export async function commitMemorySnapshot(cwd: string): Promise<void> {
423
+ await withGitLock(cwd, () => commitMemorySnapshotUnlocked(cwd))
424
+ }
425
+
426
+ async function commitMemorySnapshotUnlocked(cwd: string): Promise<void> {
422
427
  const bun = (globalThis as { Bun?: { spawn: typeof Bun.spawn } }).Bun
423
428
  if (!bun) return
424
429
  if (!existsSync(join(cwd, '.git'))) return
@@ -36,7 +36,7 @@ type Matcher = (haystack: string) => boolean
36
36
 
37
37
  export const memorySearchTool = defineTool({
38
38
  description:
39
- 'Search the agent\'s long-term memory. Covers both topic shards under memory/topics/ (consolidated facts) and undreamed daily-stream events under memory/streams/ (recent fragments not yet folded into shards). Case-insensitive substring by default; asRegex=true treats query as a JavaScript regex. Returns matches discriminated by `source: "topic" | "stream"`, each with line-context excerpts; full=true includes complete bodies. Topic matches come first (alphabetical by slug), then stream matches (newest day first).',
39
+ 'Search the agent\'s long-term memory. Covers both topic shards under memory/topics/ (consolidated facts) and undreamed daily-stream events under memory/streams/ (recent fragments not yet folded into shards). Case-insensitive substring by default: tries the whole query as one phrase first, and if that finds nothing, falls back to OR-matching the individual words (ranked by how many words each hit contains) — so a multi-word query still returns results even when no entry contains the exact phrase. asRegex=true treats query as a JavaScript regex (no word fallback). Returns matches discriminated by `source: "topic" | "stream"`, each with line-context excerpts; full=true includes complete bodies. Ordering depends on mode: exact-phrase (and regex) results list all topic matches first (alphabetical by slug), then stream matches (newest day first); word-fallback results are ranked by matched-word count, with that same topic-first/stream-newest order as the tiebreak within each score band, so a higher-scoring stream match can precede a lower-scoring topic match.',
40
40
  parameters: z.object({
41
41
  query: z.string(),
42
42
  asRegex: z.boolean().default(false),
@@ -58,10 +58,49 @@ export const memorySearchTool = defineTool({
58
58
  }
59
59
 
60
60
  const result = searchAll(shards, streamDays, matcherOrError, { full, maxResults })
61
+ if ('matches' in result && result.matches.length === 0) {
62
+ const fallback = tokenFallback(query, asRegex, shards, streamDays, { full, maxResults })
63
+ if (fallback !== null) return resultToToolResult(fallback)
64
+ }
61
65
  return resultToToolResult(result)
62
66
  },
63
67
  })
64
68
 
69
+ // Phrase-first/token-fallback: the descriptive multi-word queries the
70
+ // retrieval subagent issues rarely appear verbatim in any body, so a
71
+ // whole-phrase substring search returns nothing while every component word is
72
+ // present. When the phrase search comes up empty, split on whitespace and
73
+ // OR-match the distinct tokens, ranking each hit by how many tokens it
74
+ // matched (richer matches first) with the natural topic-first/newest-stream
75
+ // order as the stable tiebreak. Returns null when tokenizing cannot widen the
76
+ // search: regex mode (whitespace is intentional pattern syntax), or a token
77
+ // set that is identical to the phrase already tried (a single clean token, so
78
+ // the phrase search already covered it).
79
+ function tokenFallback(
80
+ query: string,
81
+ asRegex: boolean,
82
+ shards: TopicShard[],
83
+ streamDays: UndreamedStreamDay[],
84
+ options: { full: boolean; maxResults: number },
85
+ ): MemorySearchResult | null {
86
+ if (asRegex) return null
87
+ const tokens = distinctTokens(query)
88
+ if (tokens.length === 0) return null
89
+ if (tokens.length === 1 && tokens[0] === query.trim().toLowerCase()) return null
90
+ return searchAllRanked(shards, streamDays, tokens, options)
91
+ }
92
+
93
+ function distinctTokens(query: string): string[] {
94
+ return [
95
+ ...new Set(
96
+ query
97
+ .toLowerCase()
98
+ .split(/\s+/)
99
+ .filter((t) => t.length > 0),
100
+ ),
101
+ ]
102
+ }
103
+
65
104
  function buildMatcher(query: string, asRegex: boolean): Matcher | string {
66
105
  if (asRegex) {
67
106
  try {
@@ -119,6 +158,64 @@ function searchAll(
119
158
  return truncatedAt === undefined ? { matches } : { matches, truncatedAt }
120
159
  }
121
160
 
161
+ // Token-OR variant of searchAll. Builds each match with an any-token matcher
162
+ // (so a hit requires only one token and the excerpt anchors on the first line
163
+ // matching any token), then scores it by how many distinct tokens appear in
164
+ // its full searchable text. Results sort by score descending; ties keep the
165
+ // natural enumeration order (topics first in loadAllShards order, then stream
166
+ // days newest-first), so the established ordering contract holds within each
167
+ // score band. maxResults truncation is applied last, after ranking.
168
+ function searchAllRanked(
169
+ shards: TopicShard[],
170
+ streamDays: UndreamedStreamDay[],
171
+ tokens: string[],
172
+ options: { full: boolean; maxResults: number },
173
+ ): MemorySearchResult {
174
+ const anyToken: Matcher = (haystack) => {
175
+ const lower = haystack.toLowerCase()
176
+ return tokens.some((t) => lower.includes(t))
177
+ }
178
+ const scoreOf = (text: string): number => {
179
+ const lower = text.toLowerCase()
180
+ return tokens.reduce((n, t) => (lower.includes(t) ? n + 1 : n), 0)
181
+ }
182
+
183
+ const scored: Array<{ match: MemorySearchMatch; score: number; order: number }> = []
184
+ let order = 0
185
+
186
+ for (const shard of shards) {
187
+ const match = matchShard(shard, anyToken, options.full)
188
+ if (match === null) continue
189
+ scored.push({ match, score: scoreOf(shardSearchText(shard)), order: order++ })
190
+ }
191
+
192
+ for (let i = streamDays.length - 1; i >= 0; i--) {
193
+ const day = streamDays[i]!
194
+ for (const event of day.events) {
195
+ const match = matchStreamEvent(day, event, anyToken, options.full)
196
+ if (match === null) continue
197
+ scored.push({ match, score: scoreOf(eventSearchText(event)), order: order++ })
198
+ }
199
+ }
200
+
201
+ scored.sort((a, b) => b.score - a.score || a.order - b.order)
202
+
203
+ if (scored.length > options.maxResults) {
204
+ return { matches: scored.slice(0, options.maxResults).map((s) => s.match), truncatedAt: options.maxResults }
205
+ }
206
+ return { matches: scored.map((s) => s.match) }
207
+ }
208
+
209
+ function shardSearchText(shard: TopicShard): string {
210
+ return [shard.slug, shard.frontmatter.heading, ...(shard.frontmatter.tags ?? []), shard.body].join('\n')
211
+ }
212
+
213
+ function eventSearchText(event: StreamEvent): string {
214
+ if (event.type === 'fragment') return `${event.topic}\n${event.body}`
215
+ if (event.type === 'legacy_prose') return event.text
216
+ return ''
217
+ }
218
+
122
219
  function matchShard(shard: TopicShard, matcher: Matcher, full: boolean): TopicMatch | null {
123
220
  const bodyLines = splitBodyLines(shard.body)
124
221
  const firstBodyLineIndex = bodyLines.findIndex((line) => matcher(line))
@@ -18,8 +18,11 @@ You have a full tool set: read, write, edit, grep, find, ls, bash. You can:
18
18
  - Run shell commands with side effects (bash without the read-only restriction)
19
19
  - Use any tool available to a normal operator session
20
20
 
21
+ You CAN delegate, but rarely should:
22
+ - You may \`spawn_subagent\` to hand a clearly separable, context-heavy chunk to a fresh worker — e.g. a focused read-only investigation of a large area you don't want to load into your own context. Spawn only when delegation clearly pays for itself; doing the work yourself is the default. The delegation chain is depth-limited, so a worker you spawn cannot spawn again — keep your own tree flat.
23
+ - Use \`subagent_output\` and \`subagent_cancel\` only for tasks YOU spawned; you cannot see other branches' subagents.
24
+
21
25
  You CANNOT:
22
- - Spawn further subagents (you are at the end of the delegation chain).
23
26
  - Talk to the user directly (the parent owns the conversation).
24
27
  - Use channel_send, channel_reply, or any channel tool.
25
28
 
@@ -67,6 +70,7 @@ export function createOperatorSubagent(): Subagent<OperatorPayload> {
67
70
  payloadSchema: operatorPayloadSchema,
68
71
  visibility: 'public',
69
72
  requiresSpecificPermission: true,
73
+ canSpawnSubagents: true,
70
74
  inFlightKey: (payload) => payload?.requestId ?? `anon-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
71
75
  toolResultBudget: {
72
76
  maxTotalBytes: 1_000_000,
@@ -9,8 +9,8 @@ import {
9
9
  lsTool,
10
10
  readTool,
11
11
  type Subagent,
12
- webfetchTool,
13
- websearchTool,
12
+ webFetchTool,
13
+ webSearchTool,
14
14
  } from '@/plugin'
15
15
 
16
16
  import { CODE_REVIEW_SKILL } from './skills/code-review'
@@ -26,6 +26,19 @@ import { GENERAL_REVIEW_SKILL } from './skills/general'
26
26
  // no runtime change required.
27
27
  export const REVIEWER_SKILLS: readonly LoadableSkill[] = [CODE_REVIEW_SKILL, GENERAL_REVIEW_SKILL]
28
28
 
29
+ // Without a ceiling, a reviewer whose `session.prompt` stalls mid-turn (model
30
+ // wedges after a tool error, never emits a terminal message) leaves `completion`
31
+ // pending forever: the `subagent.completed` broadcast never fires and the parent
32
+ // channel session is never woken to post the review — the spawn hangs silently.
33
+ // The ceiling makes `awaitWithSubagentTimeout` settle with SubagentTimeoutError,
34
+ // surfacing to the parent as a FAILED completion reminder so the request fails
35
+ // loudly instead of vanishing. Sized for a thorough `deep`-model review (large
36
+ // diff + a few web lookups), well above the typical sub-minute review. This is
37
+ // liveness for the parent, not hard cancellation: pi's `session.prompt` takes no
38
+ // AbortSignal, so the LLM stream may run until the OS reaps it. See
39
+ // src/agent/subagents.ts `timeoutMs`.
40
+ export const REVIEWER_SPAWN_TIMEOUT_MS = 600_000
41
+
29
42
  // TODO(#452): Restrict the reviewer's `bash` to git and a curated set of
30
43
  // read-only `gh` subcommands once per-subagent bash allowlist support lands.
31
44
  // Today the read-only contract is enforced only by this system prompt, the
@@ -42,9 +55,17 @@ You are STRICTLY PROHIBITED from:
42
55
  - Posting to GitHub, Slack, Discord, email, or any channel — the parent owns posting
43
56
  - Pushing, merging, rebasing, or otherwise mutating remote state
44
57
  - Using bash for: mkdir, touch, rm, cp, mv, git add, git commit, git push, git rebase, git reset, npm install, pip install, or any write operation
45
- - Spawning further subagents — you are at the end of the delegation chain
46
58
 
47
- Your role is EXCLUSIVELY to analyze and report. The parent agent decides what to do with your findings.
59
+ Your role is EXCLUSIVELY to analyze and report. The parent agent decides what to do with your findings. Delegating part of that analysis is fine; performing side effects through a delegate is NOT — anything you cannot do directly, a subagent you spawn cannot do for you.
60
+
61
+ ## Delegating to keep your context lean
62
+
63
+ You run on a deliberately expensive model. Reading a sprawling file tree, a giant diff, or a pile of vendor docs into YOUR context burns that budget on grunt work. When a slice of the job is bulky-but-mechanical — "summarize what these 40 files do", "extract the public API of this module", "gather the relevant passages from this 2,000-line diff" — hand it to a cheaper worker with \`spawn_subagent\` and review the distilled result instead of the raw bulk.
64
+
65
+ - Spawn read-only/research workers for context-heavy gathering, not for forming the verdict. The findings and the \`<review>\` block are YOURS — never delegate the judgment.
66
+ - Each delegated task must be self-contained: the worker does not see this conversation or the target. Put everything it needs in the prompt.
67
+ - The chain is depth-limited: a worker you spawn cannot spawn again. Keep delegation one level deep.
68
+ - \`subagent_output\`/\`subagent_cancel\` reach only the tasks YOU spawned. Use background spawns for parallel gathering, then fold the results into your single review pass.
48
69
 
49
70
  ## Tools
50
71
 
@@ -55,8 +76,8 @@ The runtime exposes these tools to you by these EXACT names — call them by nam
55
76
  - \`find\` — locate files by name pattern
56
77
  - \`ls\` — list a directory's immediate contents
57
78
  - \`bash\` — read-only commands ONLY. Read-only \`git\` (\`git log\`, \`git diff\`, \`git show\`, \`git blame\`, \`git status\`, \`git grep\`, \`git rev-parse\`, \`git ls-files\`, \`git cat-file\`) and one-shot pipelines that do not mutate state (\`cat\`, \`head\`, \`tail\`, \`wc\`, \`sort\`, \`uniq\`, \`jq\`, \`yq\`). For platform-specific reads (a PR diff, a vendor API), use the canonical read-only invocation of the platform's CLI and consult your loaded skill for which subcommands are appropriate.
58
- - \`websearch\` — search the public web (e.g. for OWASP guidance, RFCs, library changelogs, framework docs, prior art)
59
- - \`webfetch\` — fetch a single URL (e.g. to read a linked spec, vendor doc, or article cited in the target)
79
+ - \`web_search\` — search the public web (e.g. for OWASP guidance, RFCs, library changelogs, framework docs, prior art)
80
+ - \`web_fetch\` — fetch a single URL (e.g. to read a linked spec, vendor doc, or article cited in the target)
60
81
  - \`load_skill\` — load a curated review skill by name. See the section below.
61
82
 
62
83
  Launch independent tools in parallel. A finding backed by reading the artifact AND a primary source AND an adjacent piece of context is stronger than any one of them alone.
@@ -81,7 +102,7 @@ These rules apply to every review regardless of domain.
81
102
 
82
103
  1. **Form findings, not opinions.** Each finding is one issue. State severity (\`blocker\` / \`concern\` / \`nit\` / \`praise\`). Cite specific evidence — a file:line, a diff hunk, a quoted passage. Suggest a concrete alternative.
83
104
  2. **Evidence is mandatory.** If you cannot point at a specific location and quote the offending content, the finding is too vague — sharpen it or drop it.
84
- 3. **Verify external claims.** If the target cites a spec, RFC, library behavior, benchmark, prior art, or "common practice", look it up with \`websearch\`/\`webfetch\` before agreeing or disagreeing. Cite the source in the finding.
105
+ 3. **Verify external claims.** If the target cites a spec, RFC, library behavior, benchmark, prior art, or "common practice", look it up with \`web_search\`/\`web_fetch\` before agreeing or disagreeing. Cite the source in the finding.
85
106
  4. **One finding, one concern.** Do not bundle unrelated issues into a single finding. The parent parses findings; mixed-concern findings break that.
86
107
  5. **Praise is rare.** Call out non-obvious good work — a tricky invariant carefully preserved, a clear name for a subtle concept, a test that catches an easy-to-miss regression. Do not pad reviews with positivity.
87
108
  6. **No generic LLM review noise.** "Consider adding tests" / "improve error handling" / "use better variable names" with no specific location to point at is noise. If you cannot point at a line, do not raise the finding.
@@ -155,17 +176,19 @@ If none of the listed skills fit the target, load \`general\` and explain in \`<
155
176
  // user has not configured `models.deep` in typeclaw.json, `resolveProfile`
156
177
  // falls back to `default` with a one-time warning — safe degradation.
157
178
  profile: 'deep',
158
- tools: [readTool, grepTool, findTool, lsTool, bashTool, websearchTool, webfetchTool],
179
+ tools: [readTool, grepTool, findTool, lsTool, bashTool, webSearchTool, webFetchTool],
159
180
  customTools: [loadSkillTool],
160
181
  payloadSchema: reviewerPayloadSchema,
161
182
  visibility: 'public',
183
+ canSpawnSubagents: true,
184
+ timeoutMs: REVIEWER_SPAWN_TIMEOUT_MS,
162
185
  inFlightKey: (payload) => payload?.requestId ?? `anon-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
163
186
  toolResultBudget: {
164
187
  // Higher than explorer (256KB) because a reviewer typically reads larger
165
188
  // diffs and multiple files plus web sources; lower than operator (1MB)
166
189
  // because we are read-only and producing analysis, not building.
167
190
  maxTotalBytes: 512_000,
168
- toolNames: ['read', 'grep', 'find', 'ls', 'bash', 'websearch', 'webfetch', 'load_skill'],
191
+ toolNames: ['read', 'grep', 'find', 'ls', 'bash', 'web_search', 'web_fetch', 'load_skill'],
169
192
  },
170
193
  }
171
194
  }
@@ -33,7 +33,7 @@ A finding without context is noise. Before forming findings:
33
33
  Prioritize in this order:
34
34
 
35
35
  1. **Correctness.** Does the change do what its description claims? Off-by-one errors, missing null/undefined handling, race conditions, incorrect error propagation, broken invariants.
36
- 2. **Security.** Injection vectors (SQL, shell, HTML), missing authz/authn checks, secret leakage in logs or error messages, unsafe deserialization, SSRF, path traversal, time-of-check-time-of-use. Cite OWASP / CWE / RFC by number when relevant; verify with \`websearch\` or \`webfetch\` before asserting.
36
+ 2. **Security.** Injection vectors (SQL, shell, HTML), missing authz/authn checks, secret leakage in logs or error messages, unsafe deserialization, SSRF, path traversal, time-of-check-time-of-use. Cite OWASP / CWE / RFC by number when relevant; verify with \`web_search\` or \`web_fetch\` before asserting.
37
37
  3. **Architecture fit.** Does the change respect existing layering? Does it introduce a new dependency where the existing pattern would have worked? Does it duplicate logic that already exists elsewhere in the repo?
38
38
  4. **Test coverage.** New behavior should have new tests. Edge cases the description names should be tested. If existing tests were deleted or skipped, that is a blocker absent a stated reason. Look past the raw test count, but only flag a redundant case when you can show the *inputs themselves* reach the same path — same branch, same validation rule, same boundary — not merely that the assertion shape is identical. Table-driven and parametrized tests legitimately share one assertion across many inputs while each input exercises a distinct branch, parser, or edge case; that is coverage, not duplication. The finding is "these inputs are indistinguishable to the code under test," and you must name the path they collapse onto — never "the assertions look the same."
39
39
  5. **Error handling.** Empty catch blocks, swallowed errors, errors converted to silent fallbacks, retry loops without bounded backoff, missing timeouts on external calls.
@@ -11,7 +11,7 @@ You have been asked to review something that does not clearly fit a specific dom
11
11
 
12
12
  ## How to acquire the target
13
13
 
14
- - **A URL** — \`webfetch\` it. If it is a private resource the fetch cannot reach, say so in \`<summary>\` and review what was provided in the payload.
14
+ - **A URL** — \`web_fetch\` it. If it is a private resource the fetch cannot reach, say so in \`<summary>\` and review what was provided in the payload.
15
15
  - **A file path** — \`read\` it. \`ls\` the parent directory if siblings might be relevant.
16
16
  - **Inline text in the payload** — read the payload carefully; quote from it when forming evidence.
17
17
  - **A reference to something the caller has** — ask the caller to provide it. Return a single \`blocker\` finding describing what you need and a \`comment\` verdict.
@@ -1,6 +1,6 @@
1
1
  import { z } from 'zod'
2
2
 
3
- import { type Subagent, webfetchTool, websearchTool } from '@/plugin'
3
+ import { type Subagent, webFetchTool, webSearchTool } from '@/plugin'
4
4
 
5
5
  export const SCOUT_SYSTEM_PROMPT = `You are a web-research specialist running inside TypeClaw. Your job: gather facts from the public internet and return a focused, citation-backed answer to the caller. For LOCAL questions (codebase, sessions, memory, config, git history, mounts), the caller should spawn \`explorer\` instead — you have no filesystem tools.
6
6
 
@@ -17,8 +17,8 @@ Your role is EXCLUSIVELY to search and read public web sources.
17
17
 
18
18
  The runtime exposes these tools to you by these EXACT names — call them by name, do not paraphrase:
19
19
 
20
- - \`websearch\` — search the public web. Returns ranked \`{title, url, snippet}\` entries. Defaults to DuckDuckGo; pass \`source: "wikipedia"\` for encyclopedic lookups.
21
- - \`webfetch\` — fetch a single HTTP(S) URL and return the body, optionally compacted by a strategy:
20
+ - \`web_search\` — search the public web. Returns ranked \`{title, url, snippet}\` entries. Defaults to DuckDuckGo; pass \`source: "wikipedia"\` for encyclopedic lookups.
21
+ - \`web_fetch\` — fetch a single HTTP(S) URL and return the body, optionally compacted by a strategy:
22
22
  - \`readability\` (default for HTML) — extract article content as markdown
23
23
  - \`jq\` — query JSON APIs (pass \`query\`)
24
24
  - \`selector\` — extract text from CSS-selected elements (pass \`selector\`)
@@ -26,7 +26,7 @@ The runtime exposes these tools to you by these EXACT names — call them by nam
26
26
  - \`snapshot\` — indented semantic tree of the page (forms, headings, links)
27
27
  - \`raw\` — no processing
28
28
 
29
- Launch multiple \`websearch\` queries in parallel for the same topic — different phrasings surface different sources. When a search result looks promising, \`webfetch\` it for the full content.
29
+ Launch multiple \`web_search\` queries in parallel for the same topic — different phrasings surface different sources. When a search result looks promising, \`web_fetch\` it for the full content.
30
30
 
31
31
  ## Process
32
32
 
@@ -60,7 +60,7 @@ End every response with this exact structure:
60
60
 
61
61
  ## Rules
62
62
 
63
- - Cite every claim with a URL from your <sources> list. **Never invent a URL.** If you didn't \`webfetch\` it, don't cite it.
63
+ - Cite every claim with a URL from your <sources> list. **Never invent a URL.** If you didn't \`web_fetch\` it, don't cite it.
64
64
  - If a fact appears only in your training data and you couldn't find a web source for it, say so explicitly rather than answering from memory.
65
65
  - Prefer primary sources (official docs, vendor changelogs, GitHub releases, paper PDFs) over aggregator blogs.
66
66
  - When dates matter (versions, deprecations, vulnerability disclosures), surface the date of the source.
@@ -82,13 +82,13 @@ export function createScoutSubagent(): Subagent<ScoutPayload> {
82
82
  return {
83
83
  systemPrompt: SCOUT_SYSTEM_PROMPT,
84
84
  profile: 'fast',
85
- tools: [websearchTool, webfetchTool],
85
+ tools: [webSearchTool, webFetchTool],
86
86
  payloadSchema: scoutPayloadSchema,
87
87
  visibility: 'public',
88
88
  inFlightKey: (payload) => payload?.requestId ?? `anon-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
89
89
  toolResultBudget: {
90
90
  maxTotalBytes: 512_000,
91
- toolNames: ['websearch', 'webfetch'],
91
+ toolNames: ['web_search', 'web_fetch'],
92
92
  },
93
93
  }
94
94
  }
@@ -10,7 +10,7 @@ export const GUARD_PRIVATE_SURFACE_READ = 'privateSurfaceRead'
10
10
  // bash is excluded: its access to hidden paths is contained by the bwrap
11
11
  // sandbox (applyBashSandbox), not by blocking the call. Every OTHER tool is
12
12
  // scanned, so a new file-reading tool — bundled or third-party — is covered
13
- // the day it ships without a whitelist edit. websearch/webfetch take URLs, not
13
+ // the day it ships without a whitelist edit. web_search/web_fetch take URLs, not
14
14
  // local paths, and the path-plausibility filter keeps their args from matching.
15
15
  const UNSCANNED_TOOLS = new Set(['bash'])
16
16
 
@@ -65,7 +65,7 @@ export function checkPrivateSurfaceReadGuard(options: {
65
65
 
66
66
  // Field names whose values are ALWAYS free text (prose/queries/ids), NEVER a
67
67
  // filesystem path, for EVERY tool. Scanning them caused false positives: a
68
- // guest's `channel_reply({ text: "the memory leak" })` or `websearch({ query:
68
+ // guest's `channel_reply({ text: "the memory leak" })` or `web_search({ query:
69
69
  // "workspace setup" })` resolve to a bare hidden-dir name and were wrongly
70
70
  // blocked. This is a DENYLIST OF KEY NAMES, not a tool whitelist: an unknown
71
71
  // field on an unknown tool is still scanned (fail-closed for new path-bearing
@@ -100,7 +100,7 @@ export function classifyUrl(rawUrl: string): SsrfClassification {
100
100
 
101
101
  export function checkSsrfGuard(options: { tool: string; args: Record<string, unknown> }): SecurityBlock | undefined {
102
102
  const { tool, args } = options
103
- if (tool !== 'webfetch') return undefined
103
+ if (tool !== 'web_fetch') return undefined
104
104
  const url = args.url
105
105
  if (typeof url !== 'string') return undefined
106
106
  if (isGuardAcknowledged(args, GUARD_SSRF)) return undefined
@@ -111,9 +111,9 @@ export function checkSsrfGuard(options: { tool: string; args: Record<string, unk
111
111
  return {
112
112
  block: true,
113
113
  reason: [
114
- `Guard \`${GUARD_SSRF}\` blocked webfetch to a non-public destination (${result.category ?? 'unknown'}): ${result.reason ?? 'classified as internal'}.`,
114
+ `Guard \`${GUARD_SSRF}\` blocked web_fetch to a non-public destination (${result.category ?? 'unknown'}): ${result.reason ?? 'classified as internal'}.`,
115
115
  'This protects against SSRF, cloud metadata exfiltration, and accidental fetches against internal services.',
116
- `If this is genuinely intentional and you trust the URL, retry with \`${ACKNOWLEDGE_GUARDS}.${GUARD_SSRF}: true\` in the webfetch arguments.`,
116
+ `If this is genuinely intentional and you trust the URL, retry with \`${ACKNOWLEDGE_GUARDS}.${GUARD_SSRF}: true\` in the web_fetch arguments.`,
117
117
  ].join(' '),
118
118
  }
119
119
  }
@@ -9,7 +9,7 @@ This plugin is **auto-loaded** by every TypeClaw agent. There is no `plugins[]`
9
9
  `pi-coding-agent`'s built-in tools occasionally return very large payloads that the model only needed once. Two empirically observed cases:
10
10
 
11
11
  1. **`read` on an image file** returns the base64-encoded image inline (e.g. `{type:"image", data:"<3.2MB of base64>"}`). The model uses it on the turn it was asked for, then sees the same 3.2MB of base64 as conversation context on every subsequent prompt — until compaction fires (which is token-driven, not byte-driven, so a single fat blob may sit in context for many turns before compaction is triggered).
12
- 2. **`webfetch` on a binary URL** (PNG, ZIP, etc.) receives the raw response body, treats it as text, and stores raw binary as a JSON-encoded string. Same effect: 100KB+ of mojibake sits in the transcript permanently.
12
+ 2. **`web_fetch` on a binary URL** (PNG, ZIP, etc.) receives the raw response body, treats it as text, and stores raw binary as a JSON-encoded string. Same effect: 100KB+ of mojibake sits in the transcript permanently.
13
13
 
14
14
  The result is a session JSONL file that's tens of megabytes on disk but mostly one or two giant tool results, plus 3-minute first-prompt latencies after container restart because the full transcript gets re-shipped to the LLM as context.
15
15
 
@@ -0,0 +1,78 @@
1
+ import type { InboundReferenceContext, QuoteAnchorSource } from '@/channels/types'
2
+
3
+ export type DiscordResolvedReference = {
4
+ authorId: string
5
+ authorName: string
6
+ text: string
7
+ }
8
+
9
+ export type DiscordReferenceFetch = (channelId: string, messageId: string) => Promise<DiscordResolvedReference | null>
10
+
11
+ export type DiscordMessagePointer = {
12
+ channelId: string
13
+ messageId: string
14
+ }
15
+
16
+ export async function enrichDiscordMessageReferences(args: {
17
+ text: string
18
+ reply?: DiscordMessagePointer
19
+ fetchMessage: DiscordReferenceFetch
20
+ linkLimit?: number
21
+ }): Promise<{ text: string; referenceContext?: InboundReferenceContext }> {
22
+ const sources: QuoteAnchorSource[] = []
23
+ let hasReply = false
24
+
25
+ if (args.reply !== undefined) {
26
+ const parent = await fetchSafely(args.fetchMessage, args.reply)
27
+ if (parent !== null) {
28
+ sources.push(toSource(parent))
29
+ hasReply = true
30
+ }
31
+ }
32
+
33
+ const links = extractDiscordMessageLinks(args.text).slice(0, args.linkLimit ?? 3)
34
+ for (const link of links) {
35
+ const message = await fetchSafely(args.fetchMessage, link)
36
+ if (message !== null) sources.push(toSource(message))
37
+ }
38
+
39
+ if (sources.length === 0) return { text: args.text }
40
+ return { text: args.text, referenceContext: { kind: hasReply ? 'reply' : 'link', sources } }
41
+ }
42
+
43
+ const DISCORD_MESSAGE_LINK = /https?:\/\/(?:canary\.|ptb\.)?discord(?:app)?\.com\/channels\/(\d+|@me)\/(\d+)\/(\d+)/g
44
+
45
+ function extractDiscordMessageLinks(text: string): DiscordMessagePointer[] {
46
+ const seen = new Set<string>()
47
+ const links: DiscordMessagePointer[] = []
48
+ for (const match of text.matchAll(DISCORD_MESSAGE_LINK)) {
49
+ const channelId = match[2]
50
+ const messageId = match[3]
51
+ if (channelId === undefined || messageId === undefined) continue
52
+ const key = `${channelId}:${messageId}`
53
+ if (seen.has(key)) continue
54
+ seen.add(key)
55
+ links.push({ channelId, messageId })
56
+ }
57
+ return links
58
+ }
59
+
60
+ async function fetchSafely(
61
+ fetchMessage: DiscordReferenceFetch,
62
+ pointer: DiscordMessagePointer,
63
+ ): Promise<DiscordResolvedReference | null> {
64
+ try {
65
+ return await fetchMessage(pointer.channelId, pointer.messageId)
66
+ } catch {
67
+ return null
68
+ }
69
+ }
70
+
71
+ function toSource(message: DiscordResolvedReference): QuoteAnchorSource {
72
+ return {
73
+ adapter: 'discord-bot',
74
+ authorId: message.authorId,
75
+ authorName: message.authorName,
76
+ text: message.text,
77
+ }
78
+ }
@@ -39,6 +39,7 @@ import {
39
39
  type InboundDropReason,
40
40
  renderPlaceholder,
41
41
  } from './discord-bot-classify'
42
+ import { enrichDiscordMessageReferences } from './discord-bot-reference'
42
43
  import {
43
44
  ackInteraction,
44
45
  parseInteractionAsCommand,
@@ -902,11 +903,32 @@ export function createDiscordBotAdapter(options: DiscordBotAdapterOptions): Disc
902
903
  return
903
904
  }
904
905
 
905
- const routedTag = await formatChannelTag(verdict.payload.workspace, verdict.payload.chat)
906
+ const replyMessageId = event.message_reference?.message_id
907
+ const referenceResult = await enrichDiscordMessageReferences({
908
+ text: verdict.payload.text,
909
+ ...(replyMessageId !== undefined
910
+ ? { reply: { channelId: event.message_reference?.channel_id ?? event.channel_id, messageId: replyMessageId } }
911
+ : {}),
912
+ fetchMessage: async (channelId, messageId) => {
913
+ const message: { author: { id: string; username: string; global_name?: string | null }; content: string } =
914
+ await client.getMessage(channelId, messageId)
915
+ return {
916
+ authorId: message.author.id,
917
+ authorName: message.author.global_name ?? message.author.username,
918
+ text: message.content,
919
+ }
920
+ },
921
+ })
922
+ const payload =
923
+ referenceResult.referenceContext === undefined
924
+ ? verdict.payload
925
+ : { ...verdict.payload, referenceContext: referenceResult.referenceContext }
926
+
927
+ const routedTag = await formatChannelTag(payload.workspace, payload.chat)
906
928
  logger.info(
907
- `[discord-bot] routed id=${event.id} ${routedTag} mention=${verdict.payload.isBotMention} reply=${verdict.payload.replyToBotMessageId !== null}`,
929
+ `[discord-bot] routed id=${event.id} ${routedTag} mention=${payload.isBotMention} reply=${payload.replyToBotMessageId !== null}`,
908
930
  )
909
- await options.router.route(verdict.payload)
931
+ await options.router.route(payload)
910
932
  } catch (err) {
911
933
  logger.error(`[discord-bot] handleInbound failed: ${describe(err)}`)
912
934
  } finally {