npm - typeclaw - Versions diffs - 0.24.0 → 0.25.0 - Mend

typeclaw 0.24.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

package/README.md +1 -1
package/package.json +1 -1
package/src/agent/index.ts +42 -5
package/src/agent/llm-replay-sanitizer.ts +120 -0
package/src/agent/loop-guard.ts +34 -0
package/src/agent/multimodal/look-at.ts +1 -1
package/src/agent/plugin-tools.ts +90 -12
package/src/agent/session-origin.ts +30 -0
package/src/agent/subagent-completion-reminder.ts +23 -0
package/src/agent/subagents.ts +31 -2
package/src/agent/system-prompt.ts +1 -1
package/src/agent/tool-not-found-nudge.ts +8 -1
package/src/agent/tools/channel-reply.ts +3 -3
package/src/agent/tools/curl-impersonate.ts +2 -2
package/src/agent/tools/spawn-subagent.ts +19 -2
package/src/agent/tools/subagent-access.ts +40 -5
package/src/agent/tools/subagent-cancel.ts +3 -1
package/src/agent/tools/subagent-output.ts +6 -2
package/src/agent/tools/webfetch/fetch.ts +18 -18
package/src/agent/tools/webfetch/index.ts +1 -1
package/src/agent/tools/webfetch/tool.ts +13 -13
package/src/agent/tools/webfetch/types.ts +1 -1
package/src/agent/tools/websearch.ts +6 -6
package/src/bundled-plugins/backup/index.ts +40 -37
package/src/bundled-plugins/backup/runner.ts +22 -1
package/src/bundled-plugins/github-cli-auth/gh-command.ts +15 -7
package/src/bundled-plugins/guard/policies/non-workspace-write.ts +38 -1
package/src/bundled-plugins/memory/README.md +11 -11
package/src/bundled-plugins/memory/dreaming.ts +5 -0
package/src/bundled-plugins/memory/search-tool.ts +98 -1
package/src/bundled-plugins/operator/operator.ts +5 -1
package/src/bundled-plugins/reviewer/reviewer.ts +18 -9
package/src/bundled-plugins/reviewer/skills/code-review.ts +1 -1
package/src/bundled-plugins/reviewer/skills/general.ts +1 -1
package/src/bundled-plugins/scout/scout.ts +7 -7
package/src/bundled-plugins/security/policies/private-surface-read.ts +2 -2
package/src/bundled-plugins/security/policies/ssrf.ts +3 -3
package/src/bundled-plugins/tool-result-cap/README.md +1 -1
package/src/channels/adapters/github/inbound.ts +11 -0
package/src/channels/adapters/github/webhook-register.ts +32 -27
package/src/channels/router.ts +61 -23
package/src/channels/schema.ts +2 -1
package/src/channels/subagent-completion-bridge.ts +18 -18
package/src/channels/types.ts +1 -1
package/src/cli/inspect-controller.ts +130 -38
package/src/container/start.ts +7 -1
package/src/git/mutex.ts +22 -0
package/src/git/reconcile-ignored.ts +214 -0
package/src/hostd/daemon.ts +26 -1
package/src/hostd/portbroker-manager.ts +7 -0
package/src/init/dockerfile.ts +1 -1
package/src/init/gitignore.ts +25 -16
package/src/inspect/index.ts +31 -4
package/src/inspect/loop.ts +16 -12
package/src/plugin/define.ts +2 -2
package/src/plugin/index.ts +2 -2
package/src/portbroker/hostd-client.ts +36 -13
package/src/run/index.ts +14 -0
package/src/sandbox/build.ts +10 -0
package/src/sandbox/index.ts +9 -1
package/src/sandbox/policy.ts +12 -0
package/src/sandbox/session-tmp.ts +43 -0
package/src/sandbox/writable-zones.ts +103 -3
package/src/server/command-runner.ts +1 -1
package/src/server/index.ts +8 -0
package/src/skills/typeclaw-channel-github/SKILL.md +37 -10
package/src/skills/typeclaw-memory/SKILL.md +3 -1
package/src/tui/format.ts +11 -11

package/src/bundled-plugins/github-cli-auth/gh-command.ts CHANGED Viewed

@@ -281,7 +281,7 @@ function isCommandBoundaryBefore(tokens: readonly string[], index: number): bool
   while (cursor >= 0) {
     const prev = tokens[cursor]
     if (prev === undefined) return false
-    if (prev === '&&' || prev === '||' || prev === '|' || prev === ';') return true
+    if (prev === '&&' || prev === '||' || prev === '|' || prev === ';' || prev === '\n') return true
     if (/^[A-Za-z_][A-Za-z0-9_]*=/.test(prev)) {
       cursor -= 1
       continue
@@ -409,11 +409,14 @@ function isPlaceholderSegment(segment: string): boolean {
   return segment.includes('{') || segment.includes('}')
 }
-// Splits on whitespace AND shell control operators (; | & && ||) so a boundary
-// like `true; gh ...` (no surrounding spaces) yields a standalone operator
-// token. Quote-aware: operators inside quotes are literal. This is a
-// command-position detector, not a full shell parser — it does not interpret
-// redirections, subshells, or backgrounding semantics beyond boundary marking.
+// Splits on whitespace AND shell control operators (newline ; | & && ||) so a
+// boundary like `true; gh ...` (no surrounding spaces) or a `gh` on its own line
+// yields a standalone separator token. A newline ends a simple command in bash,
+// so it must be a boundary too — otherwise a `gh` on a later line (e.g. after a
+// heredoc) is not seen at command position and escapes classification. Quote-
+// aware: operators inside quotes are literal. This is a command-position
+// detector, not a full shell parser — it does not interpret redirections,
+// subshells, heredoc bodies, or backgrounding semantics beyond boundary marking.
 function tokenize(command: string): string[] {
   const tokens: string[] = []
   let current = ''
@@ -441,10 +444,15 @@ function tokenize(command: string): string[] {
       hasContent = true
       continue
     }
-    if (ch === ' ' || ch === '\t' || ch === '\n') {
+    if (ch === ' ' || ch === '\t') {
       flush()
       continue
     }
+    if (ch === '\n') {
+      flush()
+      tokens.push('\n')
+      continue
+    }
     if (ch === ';' || ch === '|' || ch === '&') {
       flush()
       const next = command[i + 1]

package/src/bundled-plugins/guard/policies/non-workspace-write.ts CHANGED Viewed

@@ -43,15 +43,27 @@ export async function checkNonWorkspaceWriteGuard(options: {
   const targetPath = path.resolve(agentDir, rawPath)
   const workspacePath = path.resolve(agentDir, 'workspace')
-  const [realTargetPath, realWorkspacePath] = await Promise.all([
+  const [realTargetPath, realWorkspacePath, realAgentDir, realTmpRoot] = await Promise.all([
     resolveRealIntendedPath(targetPath),
     resolveRealIntendedPath(workspacePath),
+    resolveRealIntendedPath(path.resolve(agentDir)),
+    resolveRealIntendedPath('/tmp'),
   ])
   if (await isSkillAuthoringAllowed({ tool, args, agentDir })) return undefined
   if (await isMemoryRetrievalCacheWriteAllowed({ tool, args, agentDir, origin })) return undefined
   if (await isMemoryTopicsWriteAllowed({ tool, args, agentDir, origin })) return undefined
   if (await isAllowedAgentRootWrite(agentDir, targetPath, realTargetPath)) return undefined
   if (isInside(realWorkspacePath, realTargetPath)) return undefined
+  // /tmp is virtual per-session scratch (see src/sandbox/session-tmp.ts), not a
+  // project or secret surface — throwaway, never committed, so an unacknowledged
+  // write is expected. Allowed only on LEXICAL intent: the model's raw path must
+  // itself be an absolute /tmp/... path. A relative path that merely realpaths
+  // into /tmp (e.g. `workspace/link` where `link -> /tmp/x`) is a workspace
+  // escape, not scratch, and must stay blocked by the rules above. The physical
+  // target must also still resolve under real /tmp (blocks `/tmp/../agent/.env`
+  // and a `/tmp/link -> /agent/.env`) and must not land inside the agent dir
+  // (a container/test agent dir can itself sit under /tmp).
+  if (isTmpScratchWrite(rawPath, realTargetPath, realAgentDir, realTmpRoot)) return undefined
   if (isGuardAcknowledged(args, GUARD_NON_WORKSPACE_WRITE)) return undefined
   return {
@@ -77,6 +89,31 @@ async function isAllowedAgentRootWrite(agentDir: string, targetPath: string, rea
   return false
 }
+// `rawPath`: the model's RAW path normalized; only an absolute /tmp/... path
+// counts as scratch intent (a relative workspace path that escapes into /tmp is
+// handled by the escape rules above, never here). `realTargetPath`: the
+// realpath-resolved physical target — must still land under /tmp (not /agent via
+// `..` or a planted symlink) and must not land inside the agent dir.
+function isTmpScratchWrite(
+  rawPath: string,
+  realTargetPath: string,
+  realAgentDir: string,
+  realTmpRoot: string,
+): boolean {
+  const normalizedRaw = path.normalize(rawPath)
+  const rawIsAbsoluteTmp = normalizedRaw === '/tmp' || isInside('/tmp', normalizedRaw)
+  if (!rawIsAbsoluteTmp) return false
+  // Compare against the REALPATH of /tmp, not the literal: on macOS /tmp is a
+  // symlink to /private/tmp, so realTargetPath resolves there and a literal-/tmp
+  // containment check would never match.
+  const physicallyUnderTmp = realTargetPath === realTmpRoot || isInside(realTmpRoot, realTargetPath)
+  if (!physicallyUnderTmp) return false
+  const insideAgent = realTargetPath === realAgentDir || isInside(realAgentDir, realTargetPath)
+  return !insideAgent
+}
 function isInside(parent: string, child: string): boolean {
   const relative = path.relative(parent, child)
   return relative === '' || (!relative.startsWith('..') && !path.isAbsolute(relative))

package/src/bundled-plugins/memory/README.md CHANGED Viewed

@@ -30,17 +30,17 @@ All fields are **restart-required** — the plugin reads them once at boot.
 ## What it contributes
-| Kind     | Name                       | Notes                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
-| -------- | -------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| Subagent | `memory-logger`            | Reads a parent transcript past a watermark and appends fragments to `memory/streams/<today>.jsonl`. Coalesced per `agentDir`.                                                                                                                                                                                                                                                                                                                                              |
-| Subagent | `dreaming`                 | Reads shards under `memory/topics/` plus undreamed daily-stream events and rebalances the topic shards. Coalesced per `agentDir`. Citation-superset invariant enforced on every run.                                                                                                                                                                                                                                                                                       |
-| Subagent | `memory-retrieval`         | On `session.turn.start` when injection plan is `index` mode, reads the user's actual prompt for this turn + shard listing, writes a focused summary to `memory/.retrieval-cache/<sessionId>.md`. Coalesced per `parentSessionId`. Declares `profile: 'fast'` (retrieval is "≤3 keyword searches + 1 write", no reasoning required) and `timeoutMs: 30_000` so a wedged provider call releases the coalescing key instead of poisoning the cache for every subsequent turn. |
-| Tool     | `memory_search`            | Main-agent tool. Substring/regex search across BOTH topic shards (slugs, frontmatter, bodies) and undreamed daily-stream events (fragment topic/body, legacy prose). Results are discriminated by `source: "topic" \| "stream"`; topics come first, then streams newest-first.                                                                                                                                                                                             |
-| Tool     | `delete_topic_shard`       | Subagent-only (dreaming). Deletes a topic shard at `memory/topics/<slug>.md`. Path-guarded.                                                                                                                                                                                                                                                                                                                                                                                |
-| Cron     | `__plugin_memory_dreaming` | `kind: 'prompt'`, `subagent: 'dreaming'`, scheduled per `memory.dreaming.schedule`.                                                                                                                                                                                                                                                                                                                                                                                        |
-| Hook     | `session.idle`             | Per-session debouncer with size-based ceiling. Spawns `memory-logger` on idle or buffer-trip.                                                                                                                                                                                                                                                                                                                                                                              |
-| Hook     | `session.end`              | Spawns `memory-logger` immediately; also unlinks the retrieval-cache file for this session.                                                                                                                                                                                                                                                                                                                                                                                |
-| Hook     | `session.turn.start`       | When `buildInjectionPlan` returns `mode: 'index'` and origin is not a subagent, spawns `memory-retrieval` (detached) with the turn's `userPrompt` so the cache reflects the user's current question, not the assembling system prompt. Fire-and-forget; failures route through the plugin logger.                                                                                                                                                                          |
+| Kind     | Name                       | Notes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
+| -------- | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Subagent | `memory-logger`            | Reads a parent transcript past a watermark and appends fragments to `memory/streams/<today>.jsonl`. Coalesced per `agentDir`.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| Subagent | `dreaming`                 | Reads shards under `memory/topics/` plus undreamed daily-stream events and rebalances the topic shards. Coalesced per `agentDir`. Citation-superset invariant enforced on every run.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+| Subagent | `memory-retrieval`         | On `session.turn.start` when injection plan is `index` mode, reads the user's actual prompt for this turn + shard listing, writes a focused summary to `memory/.retrieval-cache/<sessionId>.md`. Coalesced per `parentSessionId`. Declares `profile: 'fast'` (retrieval is "≤3 keyword searches + 1 write", no reasoning required) and `timeoutMs: 30_000` so a wedged provider call releases the coalescing key instead of poisoning the cache for every subsequent turn.                                                                                                                                                                                                                                                            |
+| Tool     | `memory_search`            | Main-agent tool. Substring/regex search across BOTH topic shards (slugs, frontmatter, bodies) and undreamed daily-stream events (fragment topic/body, legacy prose). Plain queries are phrase-first: the whole query is tried as one substring, and if that finds nothing the query is split on whitespace and the distinct words are OR-matched, ranked by how many words each hit contains (regex queries never fall back). Results are discriminated by `source: "topic" \| "stream"`; exact-phrase (and regex) results list topics first, then streams newest-first, while word-fallback results are ranked by matched-word count with that order as the tiebreak (so a higher-scoring stream can precede a lower-scoring topic). |
+| Tool     | `delete_topic_shard`       | Subagent-only (dreaming). Deletes a topic shard at `memory/topics/<slug>.md`. Path-guarded.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
+| Cron     | `__plugin_memory_dreaming` | `kind: 'prompt'`, `subagent: 'dreaming'`, scheduled per `memory.dreaming.schedule`.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+| Hook     | `session.idle`             | Per-session debouncer with size-based ceiling. Spawns `memory-logger` on idle or buffer-trip.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| Hook     | `session.end`              | Spawns `memory-logger` immediately; also unlinks the retrieval-cache file for this session.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
+| Hook     | `session.turn.start`       | When `buildInjectionPlan` returns `mode: 'index'` and origin is not a subagent, spawns `memory-retrieval` (detached) with the turn's `userPrompt` so the cache reflects the user's current question, not the assembling system prompt. Fire-and-forget; failures route through the plugin logger.                                                                                                                                                                                                                                                                                                                                                                                                                                     |
 ## Memory injection (two-tier, topic shards only)

package/src/bundled-plugins/memory/dreaming.ts CHANGED Viewed

@@ -4,6 +4,7 @@ import { join } from 'node:path'
 import { z } from 'zod'
+import { withGitLock } from '@/git/mutex'
 import { defineTool, lsTool, readTool, type Subagent, writeTool } from '@/plugin'
 import { formatLocalDate, formatLocalDateTime } from '@/shared'
@@ -419,6 +420,10 @@ async function ensureMemoryFiles(agentDir: string): Promise<void> {
 // `git add` fails with "outside of your sparse-checkout definition" on a
 // skip-worktree path.
 export async function commitMemorySnapshot(cwd: string): Promise<void> {
+  await withGitLock(cwd, () => commitMemorySnapshotUnlocked(cwd))
+}
+async function commitMemorySnapshotUnlocked(cwd: string): Promise<void> {
   const bun = (globalThis as { Bun?: { spawn: typeof Bun.spawn } }).Bun
   if (!bun) return
   if (!existsSync(join(cwd, '.git'))) return

package/src/bundled-plugins/memory/search-tool.ts CHANGED Viewed

@@ -36,7 +36,7 @@ type Matcher = (haystack: string) => boolean
 export const memorySearchTool = defineTool({
   description:
-    'Search the agent\'s long-term memory. Covers both topic shards under memory/topics/ (consolidated facts) and undreamed daily-stream events under memory/streams/ (recent fragments not yet folded into shards). Case-insensitive substring by default; asRegex=true treats query as a JavaScript regex. Returns matches discriminated by `source: "topic" | "stream"`, each with line-context excerpts; full=true includes complete bodies. Topic matches come first (alphabetical by slug), then stream matches (newest day first).',
+    'Search the agent\'s long-term memory. Covers both topic shards under memory/topics/ (consolidated facts) and undreamed daily-stream events under memory/streams/ (recent fragments not yet folded into shards). Case-insensitive substring by default: tries the whole query as one phrase first, and if that finds nothing, falls back to OR-matching the individual words (ranked by how many words each hit contains) — so a multi-word query still returns results even when no entry contains the exact phrase. asRegex=true treats query as a JavaScript regex (no word fallback). Returns matches discriminated by `source: "topic" | "stream"`, each with line-context excerpts; full=true includes complete bodies. Ordering depends on mode: exact-phrase (and regex) results list all topic matches first (alphabetical by slug), then stream matches (newest day first); word-fallback results are ranked by matched-word count, with that same topic-first/stream-newest order as the tiebreak within each score band, so a higher-scoring stream match can precede a lower-scoring topic match.',
   parameters: z.object({
     query: z.string(),
     asRegex: z.boolean().default(false),
@@ -58,10 +58,49 @@ export const memorySearchTool = defineTool({
     }
     const result = searchAll(shards, streamDays, matcherOrError, { full, maxResults })
+    if ('matches' in result && result.matches.length === 0) {
+      const fallback = tokenFallback(query, asRegex, shards, streamDays, { full, maxResults })
+      if (fallback !== null) return resultToToolResult(fallback)
+    }
     return resultToToolResult(result)
   },
 })
+// Phrase-first/token-fallback: the descriptive multi-word queries the
+// retrieval subagent issues rarely appear verbatim in any body, so a
+// whole-phrase substring search returns nothing while every component word is
+// present. When the phrase search comes up empty, split on whitespace and
+// OR-match the distinct tokens, ranking each hit by how many tokens it
+// matched (richer matches first) with the natural topic-first/newest-stream
+// order as the stable tiebreak. Returns null when tokenizing cannot widen the
+// search: regex mode (whitespace is intentional pattern syntax), or a token
+// set that is identical to the phrase already tried (a single clean token, so
+// the phrase search already covered it).
+function tokenFallback(
+  query: string,
+  asRegex: boolean,
+  shards: TopicShard[],
+  streamDays: UndreamedStreamDay[],
+  options: { full: boolean; maxResults: number },
+): MemorySearchResult | null {
+  if (asRegex) return null
+  const tokens = distinctTokens(query)
+  if (tokens.length === 0) return null
+  if (tokens.length === 1 && tokens[0] === query.trim().toLowerCase()) return null
+  return searchAllRanked(shards, streamDays, tokens, options)
+}
+function distinctTokens(query: string): string[] {
+  return [
+    ...new Set(
+      query
+        .toLowerCase()
+        .split(/\s+/)
+        .filter((t) => t.length > 0),
+    ),
+  ]
+}
 function buildMatcher(query: string, asRegex: boolean): Matcher | string {
   if (asRegex) {
     try {
@@ -119,6 +158,64 @@ function searchAll(
   return truncatedAt === undefined ? { matches } : { matches, truncatedAt }
 }
+// Token-OR variant of searchAll. Builds each match with an any-token matcher
+// (so a hit requires only one token and the excerpt anchors on the first line
+// matching any token), then scores it by how many distinct tokens appear in
+// its full searchable text. Results sort by score descending; ties keep the
+// natural enumeration order (topics first in loadAllShards order, then stream
+// days newest-first), so the established ordering contract holds within each
+// score band. maxResults truncation is applied last, after ranking.
+function searchAllRanked(
+  shards: TopicShard[],
+  streamDays: UndreamedStreamDay[],
+  tokens: string[],
+  options: { full: boolean; maxResults: number },
+): MemorySearchResult {
+  const anyToken: Matcher = (haystack) => {
+    const lower = haystack.toLowerCase()
+    return tokens.some((t) => lower.includes(t))
+  }
+  const scoreOf = (text: string): number => {
+    const lower = text.toLowerCase()
+    return tokens.reduce((n, t) => (lower.includes(t) ? n + 1 : n), 0)
+  }
+  const scored: Array<{ match: MemorySearchMatch; score: number; order: number }> = []
+  let order = 0
+  for (const shard of shards) {
+    const match = matchShard(shard, anyToken, options.full)
+    if (match === null) continue
+    scored.push({ match, score: scoreOf(shardSearchText(shard)), order: order++ })
+  }
+  for (let i = streamDays.length - 1; i >= 0; i--) {
+    const day = streamDays[i]!
+    for (const event of day.events) {
+      const match = matchStreamEvent(day, event, anyToken, options.full)
+      if (match === null) continue
+      scored.push({ match, score: scoreOf(eventSearchText(event)), order: order++ })
+    }
+  }
+  scored.sort((a, b) => b.score - a.score || a.order - b.order)
+  if (scored.length > options.maxResults) {
+    return { matches: scored.slice(0, options.maxResults).map((s) => s.match), truncatedAt: options.maxResults }
+  }
+  return { matches: scored.map((s) => s.match) }
+}
+function shardSearchText(shard: TopicShard): string {
+  return [shard.slug, shard.frontmatter.heading, ...(shard.frontmatter.tags ?? []), shard.body].join('\n')
+}
+function eventSearchText(event: StreamEvent): string {
+  if (event.type === 'fragment') return `${event.topic}\n${event.body}`
+  if (event.type === 'legacy_prose') return event.text
+  return ''
+}
 function matchShard(shard: TopicShard, matcher: Matcher, full: boolean): TopicMatch | null {
   const bodyLines = splitBodyLines(shard.body)
   const firstBodyLineIndex = bodyLines.findIndex((line) => matcher(line))

package/src/bundled-plugins/operator/operator.ts CHANGED Viewed

@@ -18,8 +18,11 @@ You have a full tool set: read, write, edit, grep, find, ls, bash. You can:
 - Run shell commands with side effects (bash without the read-only restriction)
 - Use any tool available to a normal operator session
+You CAN delegate, but rarely should:
+- You may \`spawn_subagent\` to hand a clearly separable, context-heavy chunk to a fresh worker — e.g. a focused read-only investigation of a large area you don't want to load into your own context. Spawn only when delegation clearly pays for itself; doing the work yourself is the default. The delegation chain is depth-limited, so a worker you spawn cannot spawn again — keep your own tree flat.
+- Use \`subagent_output\` and \`subagent_cancel\` only for tasks YOU spawned; you cannot see other branches' subagents.
 You CANNOT:
-- Spawn further subagents (you are at the end of the delegation chain).
 - Talk to the user directly (the parent owns the conversation).
 - Use channel_send, channel_reply, or any channel tool.
@@ -67,6 +70,7 @@ export function createOperatorSubagent(): Subagent<OperatorPayload> {
     payloadSchema: operatorPayloadSchema,
     visibility: 'public',
     requiresSpecificPermission: true,
+    canSpawnSubagents: true,
     inFlightKey: (payload) => payload?.requestId ?? `anon-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
     toolResultBudget: {
       maxTotalBytes: 1_000_000,

package/src/bundled-plugins/reviewer/reviewer.ts CHANGED Viewed

@@ -9,8 +9,8 @@ import {
   lsTool,
   readTool,
   type Subagent,
-  webfetchTool,
-  websearchTool,
+  webFetchTool,
+  webSearchTool,
 } from '@/plugin'
 import { CODE_REVIEW_SKILL } from './skills/code-review'
@@ -55,9 +55,17 @@ You are STRICTLY PROHIBITED from:
 - Posting to GitHub, Slack, Discord, email, or any channel — the parent owns posting
 - Pushing, merging, rebasing, or otherwise mutating remote state
 - Using bash for: mkdir, touch, rm, cp, mv, git add, git commit, git push, git rebase, git reset, npm install, pip install, or any write operation
-- Spawning further subagents — you are at the end of the delegation chain
-Your role is EXCLUSIVELY to analyze and report. The parent agent decides what to do with your findings.
+Your role is EXCLUSIVELY to analyze and report. The parent agent decides what to do with your findings. Delegating part of that analysis is fine; performing side effects through a delegate is NOT — anything you cannot do directly, a subagent you spawn cannot do for you.
+## Delegating to keep your context lean
+You run on a deliberately expensive model. Reading a sprawling file tree, a giant diff, or a pile of vendor docs into YOUR context burns that budget on grunt work. When a slice of the job is bulky-but-mechanical — "summarize what these 40 files do", "extract the public API of this module", "gather the relevant passages from this 2,000-line diff" — hand it to a cheaper worker with \`spawn_subagent\` and review the distilled result instead of the raw bulk.
+- Spawn read-only/research workers for context-heavy gathering, not for forming the verdict. The findings and the \`<review>\` block are YOURS — never delegate the judgment.
+- Each delegated task must be self-contained: the worker does not see this conversation or the target. Put everything it needs in the prompt.
+- The chain is depth-limited: a worker you spawn cannot spawn again. Keep delegation one level deep.
+- \`subagent_output\`/\`subagent_cancel\` reach only the tasks YOU spawned. Use background spawns for parallel gathering, then fold the results into your single review pass.
 ## Tools
@@ -68,8 +76,8 @@ The runtime exposes these tools to you by these EXACT names — call them by nam
 - \`find\` — locate files by name pattern
 - \`ls\` — list a directory's immediate contents
 - \`bash\` — read-only commands ONLY. Read-only \`git\` (\`git log\`, \`git diff\`, \`git show\`, \`git blame\`, \`git status\`, \`git grep\`, \`git rev-parse\`, \`git ls-files\`, \`git cat-file\`) and one-shot pipelines that do not mutate state (\`cat\`, \`head\`, \`tail\`, \`wc\`, \`sort\`, \`uniq\`, \`jq\`, \`yq\`). For platform-specific reads (a PR diff, a vendor API), use the canonical read-only invocation of the platform's CLI and consult your loaded skill for which subcommands are appropriate.
-- \`websearch\` — search the public web (e.g. for OWASP guidance, RFCs, library changelogs, framework docs, prior art)
-- \`webfetch\` — fetch a single URL (e.g. to read a linked spec, vendor doc, or article cited in the target)
+- \`web_search\` — search the public web (e.g. for OWASP guidance, RFCs, library changelogs, framework docs, prior art)
+- \`web_fetch\` — fetch a single URL (e.g. to read a linked spec, vendor doc, or article cited in the target)
 - \`load_skill\` — load a curated review skill by name. See the section below.
 Launch independent tools in parallel. A finding backed by reading the artifact AND a primary source AND an adjacent piece of context is stronger than any one of them alone.
@@ -94,7 +102,7 @@ These rules apply to every review regardless of domain.
 1. **Form findings, not opinions.** Each finding is one issue. State severity (\`blocker\` / \`concern\` / \`nit\` / \`praise\`). Cite specific evidence — a file:line, a diff hunk, a quoted passage. Suggest a concrete alternative.
 2. **Evidence is mandatory.** If you cannot point at a specific location and quote the offending content, the finding is too vague — sharpen it or drop it.
-3. **Verify external claims.** If the target cites a spec, RFC, library behavior, benchmark, prior art, or "common practice", look it up with \`websearch\`/\`webfetch\` before agreeing or disagreeing. Cite the source in the finding.
+3. **Verify external claims.** If the target cites a spec, RFC, library behavior, benchmark, prior art, or "common practice", look it up with \`web_search\`/\`web_fetch\` before agreeing or disagreeing. Cite the source in the finding.
 4. **One finding, one concern.** Do not bundle unrelated issues into a single finding. The parent parses findings; mixed-concern findings break that.
 5. **Praise is rare.** Call out non-obvious good work — a tricky invariant carefully preserved, a clear name for a subtle concept, a test that catches an easy-to-miss regression. Do not pad reviews with positivity.
 6. **No generic LLM review noise.** "Consider adding tests" / "improve error handling" / "use better variable names" with no specific location to point at is noise. If you cannot point at a line, do not raise the finding.
@@ -168,10 +176,11 @@ If none of the listed skills fit the target, load \`general\` and explain in \`<
     // user has not configured `models.deep` in typeclaw.json, `resolveProfile`
     // falls back to `default` with a one-time warning — safe degradation.
     profile: 'deep',
-    tools: [readTool, grepTool, findTool, lsTool, bashTool, websearchTool, webfetchTool],
+    tools: [readTool, grepTool, findTool, lsTool, bashTool, webSearchTool, webFetchTool],
     customTools: [loadSkillTool],
     payloadSchema: reviewerPayloadSchema,
     visibility: 'public',
+    canSpawnSubagents: true,
     timeoutMs: REVIEWER_SPAWN_TIMEOUT_MS,
     inFlightKey: (payload) => payload?.requestId ?? `anon-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
     toolResultBudget: {
@@ -179,7 +188,7 @@ If none of the listed skills fit the target, load \`general\` and explain in \`<
       // diffs and multiple files plus web sources; lower than operator (1MB)
       // because we are read-only and producing analysis, not building.
       maxTotalBytes: 512_000,
-      toolNames: ['read', 'grep', 'find', 'ls', 'bash', 'websearch', 'webfetch', 'load_skill'],
+      toolNames: ['read', 'grep', 'find', 'ls', 'bash', 'web_search', 'web_fetch', 'load_skill'],
     },
   }
 }

package/src/bundled-plugins/reviewer/skills/code-review.ts CHANGED Viewed

@@ -33,7 +33,7 @@ A finding without context is noise. Before forming findings:
 Prioritize in this order:
 1. **Correctness.** Does the change do what its description claims? Off-by-one errors, missing null/undefined handling, race conditions, incorrect error propagation, broken invariants.
-2. **Security.** Injection vectors (SQL, shell, HTML), missing authz/authn checks, secret leakage in logs or error messages, unsafe deserialization, SSRF, path traversal, time-of-check-time-of-use. Cite OWASP / CWE / RFC by number when relevant; verify with \`websearch\` or \`webfetch\` before asserting.
+2. **Security.** Injection vectors (SQL, shell, HTML), missing authz/authn checks, secret leakage in logs or error messages, unsafe deserialization, SSRF, path traversal, time-of-check-time-of-use. Cite OWASP / CWE / RFC by number when relevant; verify with \`web_search\` or \`web_fetch\` before asserting.
 3. **Architecture fit.** Does the change respect existing layering? Does it introduce a new dependency where the existing pattern would have worked? Does it duplicate logic that already exists elsewhere in the repo?
 4. **Test coverage.** New behavior should have new tests. Edge cases the description names should be tested. If existing tests were deleted or skipped, that is a blocker absent a stated reason. Look past the raw test count, but only flag a redundant case when you can show the *inputs themselves* reach the same path — same branch, same validation rule, same boundary — not merely that the assertion shape is identical. Table-driven and parametrized tests legitimately share one assertion across many inputs while each input exercises a distinct branch, parser, or edge case; that is coverage, not duplication. The finding is "these inputs are indistinguishable to the code under test," and you must name the path they collapse onto — never "the assertions look the same."
 5. **Error handling.** Empty catch blocks, swallowed errors, errors converted to silent fallbacks, retry loops without bounded backoff, missing timeouts on external calls.

package/src/bundled-plugins/reviewer/skills/general.ts CHANGED Viewed

@@ -11,7 +11,7 @@ You have been asked to review something that does not clearly fit a specific dom
 ## How to acquire the target
-- **A URL** — \`webfetch\` it. If it is a private resource the fetch cannot reach, say so in \`<summary>\` and review what was provided in the payload.
+- **A URL** — \`web_fetch\` it. If it is a private resource the fetch cannot reach, say so in \`<summary>\` and review what was provided in the payload.
 - **A file path** — \`read\` it. \`ls\` the parent directory if siblings might be relevant.
 - **Inline text in the payload** — read the payload carefully; quote from it when forming evidence.
 - **A reference to something the caller has** — ask the caller to provide it. Return a single \`blocker\` finding describing what you need and a \`comment\` verdict.

package/src/bundled-plugins/scout/scout.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { z } from 'zod'
-import { type Subagent, webfetchTool, websearchTool } from '@/plugin'
+import { type Subagent, webFetchTool, webSearchTool } from '@/plugin'
 export const SCOUT_SYSTEM_PROMPT = `You are a web-research specialist running inside TypeClaw. Your job: gather facts from the public internet and return a focused, citation-backed answer to the caller. For LOCAL questions (codebase, sessions, memory, config, git history, mounts), the caller should spawn \`explorer\` instead — you have no filesystem tools.
@@ -17,8 +17,8 @@ Your role is EXCLUSIVELY to search and read public web sources.
 The runtime exposes these tools to you by these EXACT names — call them by name, do not paraphrase:
-- \`websearch\` — search the public web. Returns ranked \`{title, url, snippet}\` entries. Defaults to DuckDuckGo; pass \`source: "wikipedia"\` for encyclopedic lookups.
-- \`webfetch\` — fetch a single HTTP(S) URL and return the body, optionally compacted by a strategy:
+- \`web_search\` — search the public web. Returns ranked \`{title, url, snippet}\` entries. Defaults to DuckDuckGo; pass \`source: "wikipedia"\` for encyclopedic lookups.
+- \`web_fetch\` — fetch a single HTTP(S) URL and return the body, optionally compacted by a strategy:
   - \`readability\` (default for HTML) — extract article content as markdown
   - \`jq\` — query JSON APIs (pass \`query\`)
   - \`selector\` — extract text from CSS-selected elements (pass \`selector\`)
@@ -26,7 +26,7 @@ The runtime exposes these tools to you by these EXACT names — call them by nam
   - \`snapshot\` — indented semantic tree of the page (forms, headings, links)
   - \`raw\` — no processing
-Launch multiple \`websearch\` queries in parallel for the same topic — different phrasings surface different sources. When a search result looks promising, \`webfetch\` it for the full content.
+Launch multiple \`web_search\` queries in parallel for the same topic — different phrasings surface different sources. When a search result looks promising, \`web_fetch\` it for the full content.
 ## Process
@@ -60,7 +60,7 @@ End every response with this exact structure:
 ## Rules
-- Cite every claim with a URL from your <sources> list. **Never invent a URL.** If you didn't \`webfetch\` it, don't cite it.
+- Cite every claim with a URL from your <sources> list. **Never invent a URL.** If you didn't \`web_fetch\` it, don't cite it.
 - If a fact appears only in your training data and you couldn't find a web source for it, say so explicitly rather than answering from memory.
 - Prefer primary sources (official docs, vendor changelogs, GitHub releases, paper PDFs) over aggregator blogs.
 - When dates matter (versions, deprecations, vulnerability disclosures), surface the date of the source.
@@ -82,13 +82,13 @@ export function createScoutSubagent(): Subagent<ScoutPayload> {
   return {
     systemPrompt: SCOUT_SYSTEM_PROMPT,
     profile: 'fast',
-    tools: [websearchTool, webfetchTool],
+    tools: [webSearchTool, webFetchTool],
     payloadSchema: scoutPayloadSchema,
     visibility: 'public',
     inFlightKey: (payload) => payload?.requestId ?? `anon-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
     toolResultBudget: {
       maxTotalBytes: 512_000,
-      toolNames: ['websearch', 'webfetch'],
+      toolNames: ['web_search', 'web_fetch'],
     },
   }
 }

package/src/bundled-plugins/security/policies/private-surface-read.ts CHANGED Viewed

@@ -10,7 +10,7 @@ export const GUARD_PRIVATE_SURFACE_READ = 'privateSurfaceRead'
 // bash is excluded: its access to hidden paths is contained by the bwrap
 // sandbox (applyBashSandbox), not by blocking the call. Every OTHER tool is
 // scanned, so a new file-reading tool — bundled or third-party — is covered
-// the day it ships without a whitelist edit. websearch/webfetch take URLs, not
+// the day it ships without a whitelist edit. web_search/web_fetch take URLs, not
 // local paths, and the path-plausibility filter keeps their args from matching.
 const UNSCANNED_TOOLS = new Set(['bash'])
@@ -65,7 +65,7 @@ export function checkPrivateSurfaceReadGuard(options: {
 // Field names whose values are ALWAYS free text (prose/queries/ids), NEVER a
 // filesystem path, for EVERY tool. Scanning them caused false positives: a
-// guest's `channel_reply({ text: "the memory leak" })` or `websearch({ query:
+// guest's `channel_reply({ text: "the memory leak" })` or `web_search({ query:
 // "workspace setup" })` resolve to a bare hidden-dir name and were wrongly
 // blocked. This is a DENYLIST OF KEY NAMES, not a tool whitelist: an unknown
 // field on an unknown tool is still scanned (fail-closed for new path-bearing

package/src/bundled-plugins/security/policies/ssrf.ts CHANGED Viewed

@@ -100,7 +100,7 @@ export function classifyUrl(rawUrl: string): SsrfClassification {
 export function checkSsrfGuard(options: { tool: string; args: Record<string, unknown> }): SecurityBlock | undefined {
   const { tool, args } = options
-  if (tool !== 'webfetch') return undefined
+  if (tool !== 'web_fetch') return undefined
   const url = args.url
   if (typeof url !== 'string') return undefined
   if (isGuardAcknowledged(args, GUARD_SSRF)) return undefined
@@ -111,9 +111,9 @@ export function checkSsrfGuard(options: { tool: string; args: Record<string, unk
   return {
     block: true,
     reason: [
-      `Guard \`${GUARD_SSRF}\` blocked webfetch to a non-public destination (${result.category ?? 'unknown'}): ${result.reason ?? 'classified as internal'}.`,
+      `Guard \`${GUARD_SSRF}\` blocked web_fetch to a non-public destination (${result.category ?? 'unknown'}): ${result.reason ?? 'classified as internal'}.`,
       'This protects against SSRF, cloud metadata exfiltration, and accidental fetches against internal services.',
-      `If this is genuinely intentional and you trust the URL, retry with \`${ACKNOWLEDGE_GUARDS}.${GUARD_SSRF}: true\` in the webfetch arguments.`,
+      `If this is genuinely intentional and you trust the URL, retry with \`${ACKNOWLEDGE_GUARDS}.${GUARD_SSRF}: true\` in the web_fetch arguments.`,
     ].join(' '),
   }
 }

package/src/bundled-plugins/tool-result-cap/README.md CHANGED Viewed

@@ -9,7 +9,7 @@ This plugin is **auto-loaded** by every TypeClaw agent. There is no `plugins[]`
 `pi-coding-agent`'s built-in tools occasionally return very large payloads that the model only needed once. Two empirically observed cases:
 1. **`read` on an image file** returns the base64-encoded image inline (e.g. `{type:"image", data:"<3.2MB of base64>"}`). The model uses it on the turn it was asked for, then sees the same 3.2MB of base64 as conversation context on every subsequent prompt — until compaction fires (which is token-driven, not byte-driven, so a single fat blob may sit in context for many turns before compaction is triggered).
-2. **`webfetch` on a binary URL** (PNG, ZIP, etc.) receives the raw response body, treats it as text, and stores raw binary as a JSON-encoded string. Same effect: 100KB+ of mojibake sits in the transcript permanently.
+2. **`web_fetch` on a binary URL** (PNG, ZIP, etc.) receives the raw response body, treats it as text, and stores raw binary as a JSON-encoded string. Same effect: 100KB+ of mojibake sits in the transcript permanently.
 The result is a session JSONL file that's tens of megabytes on disk but mostly one or two giant tool results, plus 3-minute first-prompt latencies after container restart because the full transcript gets re-shipped to the LLM as context.

package/src/channels/adapters/github/inbound.ts CHANGED Viewed

@@ -494,6 +494,12 @@ function classifyOpenedReviewTrigger(input: OpenedReviewTriggerInput): InboundMe
   const decoyLogin = resolveDecoyReviewerLogin(selfLogin, authType)
   if (sender.login === selfLogin || (decoyLogin !== null && sender.login === decoyLogin)) return null
+  // A draft PR is work-in-progress, so the automatic `opened` path skips it: null
+  // here drops to awareness-only context (like a non-`opened` reviewOn) instead of
+  // waking a review. An explicit `review_requested` still triggers on a draft via
+  // classifyReviewRequest, preserving "skip until explicitly requested".
+  if (readBoolean(pr, 'draft') === true) return null
   const title = readString(pr, 'title') ?? `#${number}`
   const head = readString(readRecord(pr.head), 'ref')
   const baseRef = readString(readRecord(pr.base), 'ref')
@@ -738,6 +744,11 @@ function readNumber(obj: Record<string, unknown> | null, key: string): number |
   return typeof value === 'number' && Number.isFinite(value) ? value : null
 }
+function readBoolean(obj: Record<string, unknown> | null, key: string): boolean | null {
+  const value = obj?.[key]
+  return typeof value === 'boolean' ? value : null
+}
 function ok(): Response {
   return new Response('ok', { status: 200 })
 }