typeclaw 0.24.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/README.md +1 -1
  2. package/package.json +1 -1
  3. package/src/agent/index.ts +42 -5
  4. package/src/agent/llm-replay-sanitizer.ts +120 -0
  5. package/src/agent/loop-guard.ts +34 -0
  6. package/src/agent/multimodal/look-at.ts +1 -1
  7. package/src/agent/plugin-tools.ts +90 -12
  8. package/src/agent/session-origin.ts +30 -0
  9. package/src/agent/subagent-completion-reminder.ts +23 -0
  10. package/src/agent/subagents.ts +31 -2
  11. package/src/agent/system-prompt.ts +1 -1
  12. package/src/agent/tool-not-found-nudge.ts +8 -1
  13. package/src/agent/tools/channel-reply.ts +3 -3
  14. package/src/agent/tools/curl-impersonate.ts +2 -2
  15. package/src/agent/tools/spawn-subagent.ts +19 -2
  16. package/src/agent/tools/subagent-access.ts +40 -5
  17. package/src/agent/tools/subagent-cancel.ts +3 -1
  18. package/src/agent/tools/subagent-output.ts +6 -2
  19. package/src/agent/tools/webfetch/fetch.ts +18 -18
  20. package/src/agent/tools/webfetch/index.ts +1 -1
  21. package/src/agent/tools/webfetch/tool.ts +13 -13
  22. package/src/agent/tools/webfetch/types.ts +1 -1
  23. package/src/agent/tools/websearch.ts +6 -6
  24. package/src/bundled-plugins/backup/index.ts +40 -37
  25. package/src/bundled-plugins/backup/runner.ts +22 -1
  26. package/src/bundled-plugins/github-cli-auth/gh-command.ts +15 -7
  27. package/src/bundled-plugins/guard/policies/non-workspace-write.ts +38 -1
  28. package/src/bundled-plugins/memory/README.md +11 -11
  29. package/src/bundled-plugins/memory/dreaming.ts +5 -0
  30. package/src/bundled-plugins/memory/search-tool.ts +98 -1
  31. package/src/bundled-plugins/operator/operator.ts +5 -1
  32. package/src/bundled-plugins/reviewer/reviewer.ts +18 -9
  33. package/src/bundled-plugins/reviewer/skills/code-review.ts +1 -1
  34. package/src/bundled-plugins/reviewer/skills/general.ts +1 -1
  35. package/src/bundled-plugins/scout/scout.ts +7 -7
  36. package/src/bundled-plugins/security/policies/private-surface-read.ts +2 -2
  37. package/src/bundled-plugins/security/policies/ssrf.ts +3 -3
  38. package/src/bundled-plugins/tool-result-cap/README.md +1 -1
  39. package/src/channels/adapters/github/inbound.ts +11 -0
  40. package/src/channels/adapters/github/webhook-register.ts +32 -27
  41. package/src/channels/router.ts +61 -23
  42. package/src/channels/schema.ts +2 -1
  43. package/src/channels/subagent-completion-bridge.ts +18 -18
  44. package/src/channels/types.ts +1 -1
  45. package/src/cli/inspect-controller.ts +130 -38
  46. package/src/container/start.ts +7 -1
  47. package/src/git/mutex.ts +22 -0
  48. package/src/git/reconcile-ignored.ts +214 -0
  49. package/src/hostd/daemon.ts +26 -1
  50. package/src/hostd/portbroker-manager.ts +7 -0
  51. package/src/init/dockerfile.ts +1 -1
  52. package/src/init/gitignore.ts +25 -16
  53. package/src/inspect/index.ts +31 -4
  54. package/src/inspect/loop.ts +16 -12
  55. package/src/plugin/define.ts +2 -2
  56. package/src/plugin/index.ts +2 -2
  57. package/src/portbroker/hostd-client.ts +36 -13
  58. package/src/run/index.ts +14 -0
  59. package/src/sandbox/build.ts +10 -0
  60. package/src/sandbox/index.ts +9 -1
  61. package/src/sandbox/policy.ts +12 -0
  62. package/src/sandbox/session-tmp.ts +43 -0
  63. package/src/sandbox/writable-zones.ts +103 -3
  64. package/src/server/command-runner.ts +1 -1
  65. package/src/server/index.ts +8 -0
  66. package/src/skills/typeclaw-channel-github/SKILL.md +37 -10
  67. package/src/skills/typeclaw-memory/SKILL.md +3 -1
  68. package/src/tui/format.ts +11 -11
@@ -281,7 +281,7 @@ function isCommandBoundaryBefore(tokens: readonly string[], index: number): bool
281
281
  while (cursor >= 0) {
282
282
  const prev = tokens[cursor]
283
283
  if (prev === undefined) return false
284
- if (prev === '&&' || prev === '||' || prev === '|' || prev === ';') return true
284
+ if (prev === '&&' || prev === '||' || prev === '|' || prev === ';' || prev === '\n') return true
285
285
  if (/^[A-Za-z_][A-Za-z0-9_]*=/.test(prev)) {
286
286
  cursor -= 1
287
287
  continue
@@ -409,11 +409,14 @@ function isPlaceholderSegment(segment: string): boolean {
409
409
  return segment.includes('{') || segment.includes('}')
410
410
  }
411
411
 
412
- // Splits on whitespace AND shell control operators (; | & && ||) so a boundary
413
- // like `true; gh ...` (no surrounding spaces) yields a standalone operator
414
- // token. Quote-aware: operators inside quotes are literal. This is a
415
- // command-position detector, not a full shell parser it does not interpret
416
- // redirections, subshells, or backgrounding semantics beyond boundary marking.
412
+ // Splits on whitespace AND shell control operators (newline ; | & && ||) so a
413
+ // boundary like `true; gh ...` (no surrounding spaces) or a `gh` on its own line
414
+ // yields a standalone separator token. A newline ends a simple command in bash,
415
+ // so it must be a boundary toootherwise a `gh` on a later line (e.g. after a
416
+ // heredoc) is not seen at command position and escapes classification. Quote-
417
+ // aware: operators inside quotes are literal. This is a command-position
418
+ // detector, not a full shell parser — it does not interpret redirections,
419
+ // subshells, heredoc bodies, or backgrounding semantics beyond boundary marking.
417
420
  function tokenize(command: string): string[] {
418
421
  const tokens: string[] = []
419
422
  let current = ''
@@ -441,10 +444,15 @@ function tokenize(command: string): string[] {
441
444
  hasContent = true
442
445
  continue
443
446
  }
444
- if (ch === ' ' || ch === '\t' || ch === '\n') {
447
+ if (ch === ' ' || ch === '\t') {
445
448
  flush()
446
449
  continue
447
450
  }
451
+ if (ch === '\n') {
452
+ flush()
453
+ tokens.push('\n')
454
+ continue
455
+ }
448
456
  if (ch === ';' || ch === '|' || ch === '&') {
449
457
  flush()
450
458
  const next = command[i + 1]
@@ -43,15 +43,27 @@ export async function checkNonWorkspaceWriteGuard(options: {
43
43
 
44
44
  const targetPath = path.resolve(agentDir, rawPath)
45
45
  const workspacePath = path.resolve(agentDir, 'workspace')
46
- const [realTargetPath, realWorkspacePath] = await Promise.all([
46
+ const [realTargetPath, realWorkspacePath, realAgentDir, realTmpRoot] = await Promise.all([
47
47
  resolveRealIntendedPath(targetPath),
48
48
  resolveRealIntendedPath(workspacePath),
49
+ resolveRealIntendedPath(path.resolve(agentDir)),
50
+ resolveRealIntendedPath('/tmp'),
49
51
  ])
50
52
  if (await isSkillAuthoringAllowed({ tool, args, agentDir })) return undefined
51
53
  if (await isMemoryRetrievalCacheWriteAllowed({ tool, args, agentDir, origin })) return undefined
52
54
  if (await isMemoryTopicsWriteAllowed({ tool, args, agentDir, origin })) return undefined
53
55
  if (await isAllowedAgentRootWrite(agentDir, targetPath, realTargetPath)) return undefined
54
56
  if (isInside(realWorkspacePath, realTargetPath)) return undefined
57
+ // /tmp is virtual per-session scratch (see src/sandbox/session-tmp.ts), not a
58
+ // project or secret surface — throwaway, never committed, so an unacknowledged
59
+ // write is expected. Allowed only on LEXICAL intent: the model's raw path must
60
+ // itself be an absolute /tmp/... path. A relative path that merely realpaths
61
+ // into /tmp (e.g. `workspace/link` where `link -> /tmp/x`) is a workspace
62
+ // escape, not scratch, and must stay blocked by the rules above. The physical
63
+ // target must also still resolve under real /tmp (blocks `/tmp/../agent/.env`
64
+ // and a `/tmp/link -> /agent/.env`) and must not land inside the agent dir
65
+ // (a container/test agent dir can itself sit under /tmp).
66
+ if (isTmpScratchWrite(rawPath, realTargetPath, realAgentDir, realTmpRoot)) return undefined
55
67
  if (isGuardAcknowledged(args, GUARD_NON_WORKSPACE_WRITE)) return undefined
56
68
 
57
69
  return {
@@ -77,6 +89,31 @@ async function isAllowedAgentRootWrite(agentDir: string, targetPath: string, rea
77
89
  return false
78
90
  }
79
91
 
92
+ // `rawPath`: the model's RAW path normalized; only an absolute /tmp/... path
93
+ // counts as scratch intent (a relative workspace path that escapes into /tmp is
94
+ // handled by the escape rules above, never here). `realTargetPath`: the
95
+ // realpath-resolved physical target — must still land under /tmp (not /agent via
96
+ // `..` or a planted symlink) and must not land inside the agent dir.
97
+ function isTmpScratchWrite(
98
+ rawPath: string,
99
+ realTargetPath: string,
100
+ realAgentDir: string,
101
+ realTmpRoot: string,
102
+ ): boolean {
103
+ const normalizedRaw = path.normalize(rawPath)
104
+ const rawIsAbsoluteTmp = normalizedRaw === '/tmp' || isInside('/tmp', normalizedRaw)
105
+ if (!rawIsAbsoluteTmp) return false
106
+
107
+ // Compare against the REALPATH of /tmp, not the literal: on macOS /tmp is a
108
+ // symlink to /private/tmp, so realTargetPath resolves there and a literal-/tmp
109
+ // containment check would never match.
110
+ const physicallyUnderTmp = realTargetPath === realTmpRoot || isInside(realTmpRoot, realTargetPath)
111
+ if (!physicallyUnderTmp) return false
112
+
113
+ const insideAgent = realTargetPath === realAgentDir || isInside(realAgentDir, realTargetPath)
114
+ return !insideAgent
115
+ }
116
+
80
117
  function isInside(parent: string, child: string): boolean {
81
118
  const relative = path.relative(parent, child)
82
119
  return relative === '' || (!relative.startsWith('..') && !path.isAbsolute(relative))
@@ -30,17 +30,17 @@ All fields are **restart-required** — the plugin reads them once at boot.
30
30
 
31
31
  ## What it contributes
32
32
 
33
- | Kind | Name | Notes |
34
- | -------- | -------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
35
- | Subagent | `memory-logger` | Reads a parent transcript past a watermark and appends fragments to `memory/streams/<today>.jsonl`. Coalesced per `agentDir`. |
36
- | Subagent | `dreaming` | Reads shards under `memory/topics/` plus undreamed daily-stream events and rebalances the topic shards. Coalesced per `agentDir`. Citation-superset invariant enforced on every run. |
37
- | Subagent | `memory-retrieval` | On `session.turn.start` when injection plan is `index` mode, reads the user's actual prompt for this turn + shard listing, writes a focused summary to `memory/.retrieval-cache/<sessionId>.md`. Coalesced per `parentSessionId`. Declares `profile: 'fast'` (retrieval is "≤3 keyword searches + 1 write", no reasoning required) and `timeoutMs: 30_000` so a wedged provider call releases the coalescing key instead of poisoning the cache for every subsequent turn. |
38
- | Tool | `memory_search` | Main-agent tool. Substring/regex search across BOTH topic shards (slugs, frontmatter, bodies) and undreamed daily-stream events (fragment topic/body, legacy prose). Results are discriminated by `source: "topic" \| "stream"`; topics come first, then streams newest-first. |
39
- | Tool | `delete_topic_shard` | Subagent-only (dreaming). Deletes a topic shard at `memory/topics/<slug>.md`. Path-guarded. |
40
- | Cron | `__plugin_memory_dreaming` | `kind: 'prompt'`, `subagent: 'dreaming'`, scheduled per `memory.dreaming.schedule`. |
41
- | Hook | `session.idle` | Per-session debouncer with size-based ceiling. Spawns `memory-logger` on idle or buffer-trip. |
42
- | Hook | `session.end` | Spawns `memory-logger` immediately; also unlinks the retrieval-cache file for this session. |
43
- | Hook | `session.turn.start` | When `buildInjectionPlan` returns `mode: 'index'` and origin is not a subagent, spawns `memory-retrieval` (detached) with the turn's `userPrompt` so the cache reflects the user's current question, not the assembling system prompt. Fire-and-forget; failures route through the plugin logger. |
33
+ | Kind | Name | Notes |
34
+ | -------- | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
35
+ | Subagent | `memory-logger` | Reads a parent transcript past a watermark and appends fragments to `memory/streams/<today>.jsonl`. Coalesced per `agentDir`. |
36
+ | Subagent | `dreaming` | Reads shards under `memory/topics/` plus undreamed daily-stream events and rebalances the topic shards. Coalesced per `agentDir`. Citation-superset invariant enforced on every run. |
37
+ | Subagent | `memory-retrieval` | On `session.turn.start` when injection plan is `index` mode, reads the user's actual prompt for this turn + shard listing, writes a focused summary to `memory/.retrieval-cache/<sessionId>.md`. Coalesced per `parentSessionId`. Declares `profile: 'fast'` (retrieval is "≤3 keyword searches + 1 write", no reasoning required) and `timeoutMs: 30_000` so a wedged provider call releases the coalescing key instead of poisoning the cache for every subsequent turn. |
38
+ | Tool | `memory_search` | Main-agent tool. Substring/regex search across BOTH topic shards (slugs, frontmatter, bodies) and undreamed daily-stream events (fragment topic/body, legacy prose). Plain queries are phrase-first: the whole query is tried as one substring, and if that finds nothing the query is split on whitespace and the distinct words are OR-matched, ranked by how many words each hit contains (regex queries never fall back). Results are discriminated by `source: "topic" \| "stream"`; exact-phrase (and regex) results list topics first, then streams newest-first, while word-fallback results are ranked by matched-word count with that order as the tiebreak (so a higher-scoring stream can precede a lower-scoring topic). |
39
+ | Tool | `delete_topic_shard` | Subagent-only (dreaming). Deletes a topic shard at `memory/topics/<slug>.md`. Path-guarded. |
40
+ | Cron | `__plugin_memory_dreaming` | `kind: 'prompt'`, `subagent: 'dreaming'`, scheduled per `memory.dreaming.schedule`. |
41
+ | Hook | `session.idle` | Per-session debouncer with size-based ceiling. Spawns `memory-logger` on idle or buffer-trip. |
42
+ | Hook | `session.end` | Spawns `memory-logger` immediately; also unlinks the retrieval-cache file for this session. |
43
+ | Hook | `session.turn.start` | When `buildInjectionPlan` returns `mode: 'index'` and origin is not a subagent, spawns `memory-retrieval` (detached) with the turn's `userPrompt` so the cache reflects the user's current question, not the assembling system prompt. Fire-and-forget; failures route through the plugin logger. |
44
44
 
45
45
  ## Memory injection (two-tier, topic shards only)
46
46
 
@@ -4,6 +4,7 @@ import { join } from 'node:path'
4
4
 
5
5
  import { z } from 'zod'
6
6
 
7
+ import { withGitLock } from '@/git/mutex'
7
8
  import { defineTool, lsTool, readTool, type Subagent, writeTool } from '@/plugin'
8
9
  import { formatLocalDate, formatLocalDateTime } from '@/shared'
9
10
 
@@ -419,6 +420,10 @@ async function ensureMemoryFiles(agentDir: string): Promise<void> {
419
420
  // `git add` fails with "outside of your sparse-checkout definition" on a
420
421
  // skip-worktree path.
421
422
  export async function commitMemorySnapshot(cwd: string): Promise<void> {
423
+ await withGitLock(cwd, () => commitMemorySnapshotUnlocked(cwd))
424
+ }
425
+
426
+ async function commitMemorySnapshotUnlocked(cwd: string): Promise<void> {
422
427
  const bun = (globalThis as { Bun?: { spawn: typeof Bun.spawn } }).Bun
423
428
  if (!bun) return
424
429
  if (!existsSync(join(cwd, '.git'))) return
@@ -36,7 +36,7 @@ type Matcher = (haystack: string) => boolean
36
36
 
37
37
  export const memorySearchTool = defineTool({
38
38
  description:
39
- 'Search the agent\'s long-term memory. Covers both topic shards under memory/topics/ (consolidated facts) and undreamed daily-stream events under memory/streams/ (recent fragments not yet folded into shards). Case-insensitive substring by default; asRegex=true treats query as a JavaScript regex. Returns matches discriminated by `source: "topic" | "stream"`, each with line-context excerpts; full=true includes complete bodies. Topic matches come first (alphabetical by slug), then stream matches (newest day first).',
39
+ 'Search the agent\'s long-term memory. Covers both topic shards under memory/topics/ (consolidated facts) and undreamed daily-stream events under memory/streams/ (recent fragments not yet folded into shards). Case-insensitive substring by default: tries the whole query as one phrase first, and if that finds nothing, falls back to OR-matching the individual words (ranked by how many words each hit contains) — so a multi-word query still returns results even when no entry contains the exact phrase. asRegex=true treats query as a JavaScript regex (no word fallback). Returns matches discriminated by `source: "topic" | "stream"`, each with line-context excerpts; full=true includes complete bodies. Ordering depends on mode: exact-phrase (and regex) results list all topic matches first (alphabetical by slug), then stream matches (newest day first); word-fallback results are ranked by matched-word count, with that same topic-first/stream-newest order as the tiebreak within each score band, so a higher-scoring stream match can precede a lower-scoring topic match.',
40
40
  parameters: z.object({
41
41
  query: z.string(),
42
42
  asRegex: z.boolean().default(false),
@@ -58,10 +58,49 @@ export const memorySearchTool = defineTool({
58
58
  }
59
59
 
60
60
  const result = searchAll(shards, streamDays, matcherOrError, { full, maxResults })
61
+ if ('matches' in result && result.matches.length === 0) {
62
+ const fallback = tokenFallback(query, asRegex, shards, streamDays, { full, maxResults })
63
+ if (fallback !== null) return resultToToolResult(fallback)
64
+ }
61
65
  return resultToToolResult(result)
62
66
  },
63
67
  })
64
68
 
69
+ // Phrase-first/token-fallback: the descriptive multi-word queries the
70
+ // retrieval subagent issues rarely appear verbatim in any body, so a
71
+ // whole-phrase substring search returns nothing while every component word is
72
+ // present. When the phrase search comes up empty, split on whitespace and
73
+ // OR-match the distinct tokens, ranking each hit by how many tokens it
74
+ // matched (richer matches first) with the natural topic-first/newest-stream
75
+ // order as the stable tiebreak. Returns null when tokenizing cannot widen the
76
+ // search: regex mode (whitespace is intentional pattern syntax), or a token
77
+ // set that is identical to the phrase already tried (a single clean token, so
78
+ // the phrase search already covered it).
79
+ function tokenFallback(
80
+ query: string,
81
+ asRegex: boolean,
82
+ shards: TopicShard[],
83
+ streamDays: UndreamedStreamDay[],
84
+ options: { full: boolean; maxResults: number },
85
+ ): MemorySearchResult | null {
86
+ if (asRegex) return null
87
+ const tokens = distinctTokens(query)
88
+ if (tokens.length === 0) return null
89
+ if (tokens.length === 1 && tokens[0] === query.trim().toLowerCase()) return null
90
+ return searchAllRanked(shards, streamDays, tokens, options)
91
+ }
92
+
93
+ function distinctTokens(query: string): string[] {
94
+ return [
95
+ ...new Set(
96
+ query
97
+ .toLowerCase()
98
+ .split(/\s+/)
99
+ .filter((t) => t.length > 0),
100
+ ),
101
+ ]
102
+ }
103
+
65
104
  function buildMatcher(query: string, asRegex: boolean): Matcher | string {
66
105
  if (asRegex) {
67
106
  try {
@@ -119,6 +158,64 @@ function searchAll(
119
158
  return truncatedAt === undefined ? { matches } : { matches, truncatedAt }
120
159
  }
121
160
 
161
+ // Token-OR variant of searchAll. Builds each match with an any-token matcher
162
+ // (so a hit requires only one token and the excerpt anchors on the first line
163
+ // matching any token), then scores it by how many distinct tokens appear in
164
+ // its full searchable text. Results sort by score descending; ties keep the
165
+ // natural enumeration order (topics first in loadAllShards order, then stream
166
+ // days newest-first), so the established ordering contract holds within each
167
+ // score band. maxResults truncation is applied last, after ranking.
168
+ function searchAllRanked(
169
+ shards: TopicShard[],
170
+ streamDays: UndreamedStreamDay[],
171
+ tokens: string[],
172
+ options: { full: boolean; maxResults: number },
173
+ ): MemorySearchResult {
174
+ const anyToken: Matcher = (haystack) => {
175
+ const lower = haystack.toLowerCase()
176
+ return tokens.some((t) => lower.includes(t))
177
+ }
178
+ const scoreOf = (text: string): number => {
179
+ const lower = text.toLowerCase()
180
+ return tokens.reduce((n, t) => (lower.includes(t) ? n + 1 : n), 0)
181
+ }
182
+
183
+ const scored: Array<{ match: MemorySearchMatch; score: number; order: number }> = []
184
+ let order = 0
185
+
186
+ for (const shard of shards) {
187
+ const match = matchShard(shard, anyToken, options.full)
188
+ if (match === null) continue
189
+ scored.push({ match, score: scoreOf(shardSearchText(shard)), order: order++ })
190
+ }
191
+
192
+ for (let i = streamDays.length - 1; i >= 0; i--) {
193
+ const day = streamDays[i]!
194
+ for (const event of day.events) {
195
+ const match = matchStreamEvent(day, event, anyToken, options.full)
196
+ if (match === null) continue
197
+ scored.push({ match, score: scoreOf(eventSearchText(event)), order: order++ })
198
+ }
199
+ }
200
+
201
+ scored.sort((a, b) => b.score - a.score || a.order - b.order)
202
+
203
+ if (scored.length > options.maxResults) {
204
+ return { matches: scored.slice(0, options.maxResults).map((s) => s.match), truncatedAt: options.maxResults }
205
+ }
206
+ return { matches: scored.map((s) => s.match) }
207
+ }
208
+
209
+ function shardSearchText(shard: TopicShard): string {
210
+ return [shard.slug, shard.frontmatter.heading, ...(shard.frontmatter.tags ?? []), shard.body].join('\n')
211
+ }
212
+
213
+ function eventSearchText(event: StreamEvent): string {
214
+ if (event.type === 'fragment') return `${event.topic}\n${event.body}`
215
+ if (event.type === 'legacy_prose') return event.text
216
+ return ''
217
+ }
218
+
122
219
  function matchShard(shard: TopicShard, matcher: Matcher, full: boolean): TopicMatch | null {
123
220
  const bodyLines = splitBodyLines(shard.body)
124
221
  const firstBodyLineIndex = bodyLines.findIndex((line) => matcher(line))
@@ -18,8 +18,11 @@ You have a full tool set: read, write, edit, grep, find, ls, bash. You can:
18
18
  - Run shell commands with side effects (bash without the read-only restriction)
19
19
  - Use any tool available to a normal operator session
20
20
 
21
+ You CAN delegate, but rarely should:
22
+ - You may \`spawn_subagent\` to hand a clearly separable, context-heavy chunk to a fresh worker — e.g. a focused read-only investigation of a large area you don't want to load into your own context. Spawn only when delegation clearly pays for itself; doing the work yourself is the default. The delegation chain is depth-limited, so a worker you spawn cannot spawn again — keep your own tree flat.
23
+ - Use \`subagent_output\` and \`subagent_cancel\` only for tasks YOU spawned; you cannot see other branches' subagents.
24
+
21
25
  You CANNOT:
22
- - Spawn further subagents (you are at the end of the delegation chain).
23
26
  - Talk to the user directly (the parent owns the conversation).
24
27
  - Use channel_send, channel_reply, or any channel tool.
25
28
 
@@ -67,6 +70,7 @@ export function createOperatorSubagent(): Subagent<OperatorPayload> {
67
70
  payloadSchema: operatorPayloadSchema,
68
71
  visibility: 'public',
69
72
  requiresSpecificPermission: true,
73
+ canSpawnSubagents: true,
70
74
  inFlightKey: (payload) => payload?.requestId ?? `anon-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
71
75
  toolResultBudget: {
72
76
  maxTotalBytes: 1_000_000,
@@ -9,8 +9,8 @@ import {
9
9
  lsTool,
10
10
  readTool,
11
11
  type Subagent,
12
- webfetchTool,
13
- websearchTool,
12
+ webFetchTool,
13
+ webSearchTool,
14
14
  } from '@/plugin'
15
15
 
16
16
  import { CODE_REVIEW_SKILL } from './skills/code-review'
@@ -55,9 +55,17 @@ You are STRICTLY PROHIBITED from:
55
55
  - Posting to GitHub, Slack, Discord, email, or any channel — the parent owns posting
56
56
  - Pushing, merging, rebasing, or otherwise mutating remote state
57
57
  - Using bash for: mkdir, touch, rm, cp, mv, git add, git commit, git push, git rebase, git reset, npm install, pip install, or any write operation
58
- - Spawning further subagents — you are at the end of the delegation chain
59
58
 
60
- Your role is EXCLUSIVELY to analyze and report. The parent agent decides what to do with your findings.
59
+ Your role is EXCLUSIVELY to analyze and report. The parent agent decides what to do with your findings. Delegating part of that analysis is fine; performing side effects through a delegate is NOT — anything you cannot do directly, a subagent you spawn cannot do for you.
60
+
61
+ ## Delegating to keep your context lean
62
+
63
+ You run on a deliberately expensive model. Reading a sprawling file tree, a giant diff, or a pile of vendor docs into YOUR context burns that budget on grunt work. When a slice of the job is bulky-but-mechanical — "summarize what these 40 files do", "extract the public API of this module", "gather the relevant passages from this 2,000-line diff" — hand it to a cheaper worker with \`spawn_subagent\` and review the distilled result instead of the raw bulk.
64
+
65
+ - Spawn read-only/research workers for context-heavy gathering, not for forming the verdict. The findings and the \`<review>\` block are YOURS — never delegate the judgment.
66
+ - Each delegated task must be self-contained: the worker does not see this conversation or the target. Put everything it needs in the prompt.
67
+ - The chain is depth-limited: a worker you spawn cannot spawn again. Keep delegation one level deep.
68
+ - \`subagent_output\`/\`subagent_cancel\` reach only the tasks YOU spawned. Use background spawns for parallel gathering, then fold the results into your single review pass.
61
69
 
62
70
  ## Tools
63
71
 
@@ -68,8 +76,8 @@ The runtime exposes these tools to you by these EXACT names — call them by nam
68
76
  - \`find\` — locate files by name pattern
69
77
  - \`ls\` — list a directory's immediate contents
70
78
  - \`bash\` — read-only commands ONLY. Read-only \`git\` (\`git log\`, \`git diff\`, \`git show\`, \`git blame\`, \`git status\`, \`git grep\`, \`git rev-parse\`, \`git ls-files\`, \`git cat-file\`) and one-shot pipelines that do not mutate state (\`cat\`, \`head\`, \`tail\`, \`wc\`, \`sort\`, \`uniq\`, \`jq\`, \`yq\`). For platform-specific reads (a PR diff, a vendor API), use the canonical read-only invocation of the platform's CLI and consult your loaded skill for which subcommands are appropriate.
71
- - \`websearch\` — search the public web (e.g. for OWASP guidance, RFCs, library changelogs, framework docs, prior art)
72
- - \`webfetch\` — fetch a single URL (e.g. to read a linked spec, vendor doc, or article cited in the target)
79
+ - \`web_search\` — search the public web (e.g. for OWASP guidance, RFCs, library changelogs, framework docs, prior art)
80
+ - \`web_fetch\` — fetch a single URL (e.g. to read a linked spec, vendor doc, or article cited in the target)
73
81
  - \`load_skill\` — load a curated review skill by name. See the section below.
74
82
 
75
83
  Launch independent tools in parallel. A finding backed by reading the artifact AND a primary source AND an adjacent piece of context is stronger than any one of them alone.
@@ -94,7 +102,7 @@ These rules apply to every review regardless of domain.
94
102
 
95
103
  1. **Form findings, not opinions.** Each finding is one issue. State severity (\`blocker\` / \`concern\` / \`nit\` / \`praise\`). Cite specific evidence — a file:line, a diff hunk, a quoted passage. Suggest a concrete alternative.
96
104
  2. **Evidence is mandatory.** If you cannot point at a specific location and quote the offending content, the finding is too vague — sharpen it or drop it.
97
- 3. **Verify external claims.** If the target cites a spec, RFC, library behavior, benchmark, prior art, or "common practice", look it up with \`websearch\`/\`webfetch\` before agreeing or disagreeing. Cite the source in the finding.
105
+ 3. **Verify external claims.** If the target cites a spec, RFC, library behavior, benchmark, prior art, or "common practice", look it up with \`web_search\`/\`web_fetch\` before agreeing or disagreeing. Cite the source in the finding.
98
106
  4. **One finding, one concern.** Do not bundle unrelated issues into a single finding. The parent parses findings; mixed-concern findings break that.
99
107
  5. **Praise is rare.** Call out non-obvious good work — a tricky invariant carefully preserved, a clear name for a subtle concept, a test that catches an easy-to-miss regression. Do not pad reviews with positivity.
100
108
  6. **No generic LLM review noise.** "Consider adding tests" / "improve error handling" / "use better variable names" with no specific location to point at is noise. If you cannot point at a line, do not raise the finding.
@@ -168,10 +176,11 @@ If none of the listed skills fit the target, load \`general\` and explain in \`<
168
176
  // user has not configured `models.deep` in typeclaw.json, `resolveProfile`
169
177
  // falls back to `default` with a one-time warning — safe degradation.
170
178
  profile: 'deep',
171
- tools: [readTool, grepTool, findTool, lsTool, bashTool, websearchTool, webfetchTool],
179
+ tools: [readTool, grepTool, findTool, lsTool, bashTool, webSearchTool, webFetchTool],
172
180
  customTools: [loadSkillTool],
173
181
  payloadSchema: reviewerPayloadSchema,
174
182
  visibility: 'public',
183
+ canSpawnSubagents: true,
175
184
  timeoutMs: REVIEWER_SPAWN_TIMEOUT_MS,
176
185
  inFlightKey: (payload) => payload?.requestId ?? `anon-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
177
186
  toolResultBudget: {
@@ -179,7 +188,7 @@ If none of the listed skills fit the target, load \`general\` and explain in \`<
179
188
  // diffs and multiple files plus web sources; lower than operator (1MB)
180
189
  // because we are read-only and producing analysis, not building.
181
190
  maxTotalBytes: 512_000,
182
- toolNames: ['read', 'grep', 'find', 'ls', 'bash', 'websearch', 'webfetch', 'load_skill'],
191
+ toolNames: ['read', 'grep', 'find', 'ls', 'bash', 'web_search', 'web_fetch', 'load_skill'],
183
192
  },
184
193
  }
185
194
  }
@@ -33,7 +33,7 @@ A finding without context is noise. Before forming findings:
33
33
  Prioritize in this order:
34
34
 
35
35
  1. **Correctness.** Does the change do what its description claims? Off-by-one errors, missing null/undefined handling, race conditions, incorrect error propagation, broken invariants.
36
- 2. **Security.** Injection vectors (SQL, shell, HTML), missing authz/authn checks, secret leakage in logs or error messages, unsafe deserialization, SSRF, path traversal, time-of-check-time-of-use. Cite OWASP / CWE / RFC by number when relevant; verify with \`websearch\` or \`webfetch\` before asserting.
36
+ 2. **Security.** Injection vectors (SQL, shell, HTML), missing authz/authn checks, secret leakage in logs or error messages, unsafe deserialization, SSRF, path traversal, time-of-check-time-of-use. Cite OWASP / CWE / RFC by number when relevant; verify with \`web_search\` or \`web_fetch\` before asserting.
37
37
  3. **Architecture fit.** Does the change respect existing layering? Does it introduce a new dependency where the existing pattern would have worked? Does it duplicate logic that already exists elsewhere in the repo?
38
38
  4. **Test coverage.** New behavior should have new tests. Edge cases the description names should be tested. If existing tests were deleted or skipped, that is a blocker absent a stated reason. Look past the raw test count, but only flag a redundant case when you can show the *inputs themselves* reach the same path — same branch, same validation rule, same boundary — not merely that the assertion shape is identical. Table-driven and parametrized tests legitimately share one assertion across many inputs while each input exercises a distinct branch, parser, or edge case; that is coverage, not duplication. The finding is "these inputs are indistinguishable to the code under test," and you must name the path they collapse onto — never "the assertions look the same."
39
39
  5. **Error handling.** Empty catch blocks, swallowed errors, errors converted to silent fallbacks, retry loops without bounded backoff, missing timeouts on external calls.
@@ -11,7 +11,7 @@ You have been asked to review something that does not clearly fit a specific dom
11
11
 
12
12
  ## How to acquire the target
13
13
 
14
- - **A URL** — \`webfetch\` it. If it is a private resource the fetch cannot reach, say so in \`<summary>\` and review what was provided in the payload.
14
+ - **A URL** — \`web_fetch\` it. If it is a private resource the fetch cannot reach, say so in \`<summary>\` and review what was provided in the payload.
15
15
  - **A file path** — \`read\` it. \`ls\` the parent directory if siblings might be relevant.
16
16
  - **Inline text in the payload** — read the payload carefully; quote from it when forming evidence.
17
17
  - **A reference to something the caller has** — ask the caller to provide it. Return a single \`blocker\` finding describing what you need and a \`comment\` verdict.
@@ -1,6 +1,6 @@
1
1
  import { z } from 'zod'
2
2
 
3
- import { type Subagent, webfetchTool, websearchTool } from '@/plugin'
3
+ import { type Subagent, webFetchTool, webSearchTool } from '@/plugin'
4
4
 
5
5
  export const SCOUT_SYSTEM_PROMPT = `You are a web-research specialist running inside TypeClaw. Your job: gather facts from the public internet and return a focused, citation-backed answer to the caller. For LOCAL questions (codebase, sessions, memory, config, git history, mounts), the caller should spawn \`explorer\` instead — you have no filesystem tools.
6
6
 
@@ -17,8 +17,8 @@ Your role is EXCLUSIVELY to search and read public web sources.
17
17
 
18
18
  The runtime exposes these tools to you by these EXACT names — call them by name, do not paraphrase:
19
19
 
20
- - \`websearch\` — search the public web. Returns ranked \`{title, url, snippet}\` entries. Defaults to DuckDuckGo; pass \`source: "wikipedia"\` for encyclopedic lookups.
21
- - \`webfetch\` — fetch a single HTTP(S) URL and return the body, optionally compacted by a strategy:
20
+ - \`web_search\` — search the public web. Returns ranked \`{title, url, snippet}\` entries. Defaults to DuckDuckGo; pass \`source: "wikipedia"\` for encyclopedic lookups.
21
+ - \`web_fetch\` — fetch a single HTTP(S) URL and return the body, optionally compacted by a strategy:
22
22
  - \`readability\` (default for HTML) — extract article content as markdown
23
23
  - \`jq\` — query JSON APIs (pass \`query\`)
24
24
  - \`selector\` — extract text from CSS-selected elements (pass \`selector\`)
@@ -26,7 +26,7 @@ The runtime exposes these tools to you by these EXACT names — call them by nam
26
26
  - \`snapshot\` — indented semantic tree of the page (forms, headings, links)
27
27
  - \`raw\` — no processing
28
28
 
29
- Launch multiple \`websearch\` queries in parallel for the same topic — different phrasings surface different sources. When a search result looks promising, \`webfetch\` it for the full content.
29
+ Launch multiple \`web_search\` queries in parallel for the same topic — different phrasings surface different sources. When a search result looks promising, \`web_fetch\` it for the full content.
30
30
 
31
31
  ## Process
32
32
 
@@ -60,7 +60,7 @@ End every response with this exact structure:
60
60
 
61
61
  ## Rules
62
62
 
63
- - Cite every claim with a URL from your <sources> list. **Never invent a URL.** If you didn't \`webfetch\` it, don't cite it.
63
+ - Cite every claim with a URL from your <sources> list. **Never invent a URL.** If you didn't \`web_fetch\` it, don't cite it.
64
64
  - If a fact appears only in your training data and you couldn't find a web source for it, say so explicitly rather than answering from memory.
65
65
  - Prefer primary sources (official docs, vendor changelogs, GitHub releases, paper PDFs) over aggregator blogs.
66
66
  - When dates matter (versions, deprecations, vulnerability disclosures), surface the date of the source.
@@ -82,13 +82,13 @@ export function createScoutSubagent(): Subagent<ScoutPayload> {
82
82
  return {
83
83
  systemPrompt: SCOUT_SYSTEM_PROMPT,
84
84
  profile: 'fast',
85
- tools: [websearchTool, webfetchTool],
85
+ tools: [webSearchTool, webFetchTool],
86
86
  payloadSchema: scoutPayloadSchema,
87
87
  visibility: 'public',
88
88
  inFlightKey: (payload) => payload?.requestId ?? `anon-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
89
89
  toolResultBudget: {
90
90
  maxTotalBytes: 512_000,
91
- toolNames: ['websearch', 'webfetch'],
91
+ toolNames: ['web_search', 'web_fetch'],
92
92
  },
93
93
  }
94
94
  }
@@ -10,7 +10,7 @@ export const GUARD_PRIVATE_SURFACE_READ = 'privateSurfaceRead'
10
10
  // bash is excluded: its access to hidden paths is contained by the bwrap
11
11
  // sandbox (applyBashSandbox), not by blocking the call. Every OTHER tool is
12
12
  // scanned, so a new file-reading tool — bundled or third-party — is covered
13
- // the day it ships without a whitelist edit. websearch/webfetch take URLs, not
13
+ // the day it ships without a whitelist edit. web_search/web_fetch take URLs, not
14
14
  // local paths, and the path-plausibility filter keeps their args from matching.
15
15
  const UNSCANNED_TOOLS = new Set(['bash'])
16
16
 
@@ -65,7 +65,7 @@ export function checkPrivateSurfaceReadGuard(options: {
65
65
 
66
66
  // Field names whose values are ALWAYS free text (prose/queries/ids), NEVER a
67
67
  // filesystem path, for EVERY tool. Scanning them caused false positives: a
68
- // guest's `channel_reply({ text: "the memory leak" })` or `websearch({ query:
68
+ // guest's `channel_reply({ text: "the memory leak" })` or `web_search({ query:
69
69
  // "workspace setup" })` resolve to a bare hidden-dir name and were wrongly
70
70
  // blocked. This is a DENYLIST OF KEY NAMES, not a tool whitelist: an unknown
71
71
  // field on an unknown tool is still scanned (fail-closed for new path-bearing
@@ -100,7 +100,7 @@ export function classifyUrl(rawUrl: string): SsrfClassification {
100
100
 
101
101
  export function checkSsrfGuard(options: { tool: string; args: Record<string, unknown> }): SecurityBlock | undefined {
102
102
  const { tool, args } = options
103
- if (tool !== 'webfetch') return undefined
103
+ if (tool !== 'web_fetch') return undefined
104
104
  const url = args.url
105
105
  if (typeof url !== 'string') return undefined
106
106
  if (isGuardAcknowledged(args, GUARD_SSRF)) return undefined
@@ -111,9 +111,9 @@ export function checkSsrfGuard(options: { tool: string; args: Record<string, unk
111
111
  return {
112
112
  block: true,
113
113
  reason: [
114
- `Guard \`${GUARD_SSRF}\` blocked webfetch to a non-public destination (${result.category ?? 'unknown'}): ${result.reason ?? 'classified as internal'}.`,
114
+ `Guard \`${GUARD_SSRF}\` blocked web_fetch to a non-public destination (${result.category ?? 'unknown'}): ${result.reason ?? 'classified as internal'}.`,
115
115
  'This protects against SSRF, cloud metadata exfiltration, and accidental fetches against internal services.',
116
- `If this is genuinely intentional and you trust the URL, retry with \`${ACKNOWLEDGE_GUARDS}.${GUARD_SSRF}: true\` in the webfetch arguments.`,
116
+ `If this is genuinely intentional and you trust the URL, retry with \`${ACKNOWLEDGE_GUARDS}.${GUARD_SSRF}: true\` in the web_fetch arguments.`,
117
117
  ].join(' '),
118
118
  }
119
119
  }
@@ -9,7 +9,7 @@ This plugin is **auto-loaded** by every TypeClaw agent. There is no `plugins[]`
9
9
  `pi-coding-agent`'s built-in tools occasionally return very large payloads that the model only needed once. Two empirically observed cases:
10
10
 
11
11
  1. **`read` on an image file** returns the base64-encoded image inline (e.g. `{type:"image", data:"<3.2MB of base64>"}`). The model uses it on the turn it was asked for, then sees the same 3.2MB of base64 as conversation context on every subsequent prompt — until compaction fires (which is token-driven, not byte-driven, so a single fat blob may sit in context for many turns before compaction is triggered).
12
- 2. **`webfetch` on a binary URL** (PNG, ZIP, etc.) receives the raw response body, treats it as text, and stores raw binary as a JSON-encoded string. Same effect: 100KB+ of mojibake sits in the transcript permanently.
12
+ 2. **`web_fetch` on a binary URL** (PNG, ZIP, etc.) receives the raw response body, treats it as text, and stores raw binary as a JSON-encoded string. Same effect: 100KB+ of mojibake sits in the transcript permanently.
13
13
 
14
14
  The result is a session JSONL file that's tens of megabytes on disk but mostly one or two giant tool results, plus 3-minute first-prompt latencies after container restart because the full transcript gets re-shipped to the LLM as context.
15
15
 
@@ -494,6 +494,12 @@ function classifyOpenedReviewTrigger(input: OpenedReviewTriggerInput): InboundMe
494
494
  const decoyLogin = resolveDecoyReviewerLogin(selfLogin, authType)
495
495
  if (sender.login === selfLogin || (decoyLogin !== null && sender.login === decoyLogin)) return null
496
496
 
497
+ // A draft PR is work-in-progress, so the automatic `opened` path skips it: null
498
+ // here drops to awareness-only context (like a non-`opened` reviewOn) instead of
499
+ // waking a review. An explicit `review_requested` still triggers on a draft via
500
+ // classifyReviewRequest, preserving "skip until explicitly requested".
501
+ if (readBoolean(pr, 'draft') === true) return null
502
+
497
503
  const title = readString(pr, 'title') ?? `#${number}`
498
504
  const head = readString(readRecord(pr.head), 'ref')
499
505
  const baseRef = readString(readRecord(pr.base), 'ref')
@@ -738,6 +744,11 @@ function readNumber(obj: Record<string, unknown> | null, key: string): number |
738
744
  return typeof value === 'number' && Number.isFinite(value) ? value : null
739
745
  }
740
746
 
747
+ function readBoolean(obj: Record<string, unknown> | null, key: string): boolean | null {
748
+ const value = obj?.[key]
749
+ return typeof value === 'boolean' ? value : null
750
+ }
751
+
741
752
  function ok(): Response {
742
753
  return new Response('ok', { status: 200 })
743
754
  }