npm - typeclaw - Versions diffs - 0.28.2 → 0.30.0 - Mend

typeclaw 0.28.2 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (82) hide show

package/package.json +1 -1
package/src/agent/index.ts +43 -5
package/src/agent/live-subagents.ts +5 -0
package/src/agent/loop-guard.ts +112 -26
package/src/agent/plugin-tools.ts +167 -50
package/src/agent/session-origin.ts +3 -3
package/src/agent/subagent-drain.ts +150 -0
package/src/agent/subagents.ts +41 -3
package/src/agent/system-prompt.ts +29 -4
package/src/agent/tools/channel-send.ts +1 -1
package/src/agent/tools/spawn-subagent.ts +34 -1
package/src/agent/tools/subagent-output.ts +7 -3
package/src/agent/tools/wikipedia.ts +1 -1
package/src/bundled-plugins/bun-hygiene/README.md +12 -11
package/src/bundled-plugins/bun-hygiene/policy.ts +8 -3
package/src/bundled-plugins/explorer/explorer.ts +2 -0
package/src/bundled-plugins/github-cli-auth/approve-idempotency.ts +94 -0
package/src/bundled-plugins/github-cli-auth/effective-approval.ts +98 -0
package/src/bundled-plugins/github-cli-auth/gh-review-inline-detect.ts +130 -0
package/src/bundled-plugins/github-cli-auth/index.ts +27 -2
package/src/bundled-plugins/github-cli-auth/review-recorder.ts +12 -4
package/src/bundled-plugins/memory/memory-logger.ts +3 -3
package/src/bundled-plugins/operator/operator.ts +2 -0
package/src/bundled-plugins/planner/index.ts +11 -0
package/src/bundled-plugins/planner/planner.ts +283 -0
package/src/bundled-plugins/planner/skills/general.ts +65 -0
package/src/bundled-plugins/planner/skills/project.ts +69 -0
package/src/bundled-plugins/researcher/index.ts +11 -0
package/src/bundled-plugins/researcher/researcher.ts +233 -0
package/src/bundled-plugins/researcher/skills/general.ts +105 -0
package/src/bundled-plugins/researcher/write-report.ts +107 -0
package/src/bundled-plugins/reviewer/reviewer.ts +28 -9
package/src/bundled-plugins/reviewer/skills/data-review.ts +77 -0
package/src/bundled-plugins/reviewer/skills/doc-review.ts +79 -0
package/src/bundled-plugins/reviewer/skills/plan-review.ts +64 -0
package/src/bundled-plugins/reviewer/skills/security-audit.ts +70 -0
package/src/bundled-plugins/reviewer/skills/writing-review.ts +63 -0
package/src/bundled-plugins/scout/scout.ts +2 -0
package/src/bundled-plugins/security/policies/prompt-injection.ts +8 -4
package/src/bundled-plugins/security/policies/secret-exfil-bash.ts +3 -2
package/src/channels/adapters/discord-bot.ts +38 -11
package/src/channels/adapters/github/inbound.ts +68 -4
package/src/channels/adapters/kakaotalk-classify.ts +2 -2
package/src/channels/adapters/kakaotalk.ts +2 -2
package/src/channels/adapters/slack-bot-classify.ts +1 -1
package/src/channels/adapters/slack-bot.ts +3 -0
package/src/channels/adapters/telegram-bot.ts +3 -0
package/src/channels/engagement.ts +12 -7
package/src/channels/github-review-claim.ts +15 -3
package/src/channels/router.ts +85 -9
package/src/channels/schema.ts +1 -1
package/src/channels/types.ts +6 -0
package/src/cli/init.ts +13 -2
package/src/cli/ui.ts +64 -0
package/src/config/config.ts +21 -15
package/src/container/start.ts +5 -1
package/src/init/dockerfile.ts +19 -56
package/src/init/hatching.ts +1 -1
package/src/init/index.ts +5 -1
package/src/migrations/index.ts +35 -0
package/src/migrations/secrets-v1-to-v2.ts +344 -0
package/src/run/bundled-plugins.ts +4 -0
package/src/run/index.ts +13 -0
package/src/sandbox/availability.ts +12 -0
package/src/sandbox/build.ts +12 -0
package/src/sandbox/index.ts +1 -1
package/src/sandbox/policy.ts +8 -0
package/src/server/index.ts +24 -5
package/src/shared/host-locale.ts +27 -0
package/src/shared/protocol.ts +1 -1
package/src/shared/wordmark.ts +19 -0
package/src/skills/typeclaw-config/SKILL.md +32 -32
package/src/skills/typeclaw-kaomoji/SKILL.md +3 -3
package/src/skills/typeclaw-tunnels/SKILL.md +3 -1
package/src/tui/banner.ts +19 -0
package/src/tui/format.ts +34 -0
package/src/tui/index.ts +121 -22
package/src/tui/theme.ts +26 -1
package/src/tunnels/providers/cloudflare-named.ts +15 -4
package/src/tunnels/providers/cloudflare-quick.ts +15 -4
package/src/tunnels/providers/cloudflared-binary.ts +11 -0
package/typeclaw.schema.json +15 -7

package/src/agent/system-prompt.ts CHANGED Viewed

@@ -1,6 +1,15 @@
 import { formatLocalDateTime, formatLocalWeekday, resolveLocalTimezoneName } from '@/shared'
-export const DEFAULT_SYSTEM_PROMPT = `You are a general-purpose AI agent running inside TypeClaw.
+// The orchestration roster (the `Briefly: ...` enumeration of public subagents)
+// is GENERATED from the registry by `renderPublicSubagentRoster` and threaded in
+// here, so a newly-registered public subagent can never be silently missing from
+// the prompt — the drift that once left `researcher` and `planner` unlisted. The
+// rest of the prompt is static. `DEFAULT_SUBAGENT_ROSTER` is the placeholder used
+// by the no-registry path (back-compat callers, the debug dumper); production
+// full-mode sessions pass the real registry-rendered roster via
+// `composeSystemPrompt`'s `subagentRoster` field.
+export function buildDefaultSystemPrompt(subagentRoster: string): string {
+  return `You are a general-purpose AI agent running inside TypeClaw.
 TypeClaw is domain-agnostic — your purpose is defined by \`IDENTITY.md\`, your character by \`SOUL.md\`, and your operating manual by \`AGENTS.md\`. This system prompt only describes the runtime around you.
@@ -80,13 +89,13 @@ Your agent folder is a git repository.
 ## Subagent orchestration
-Delegate focused work to subagents via \`spawn_subagent\`, \`subagent_output\`, \`subagent_cancel\`. Each runs in its own context window with its own tool set. The available subagents and their purpose are listed in the \`spawn_subagent\` tool description — re-read it before delegating. Briefly: \`explorer\` (read-only local recon — code, sessions, memory, git, config; fire liberally), \`scout\` (web research in a fresh context), \`reviewer\` (deep read-only code/PR/plan review, returns a structured verdict; it does NOT post), \`operator\` (write-capable: bash-with-side-effects, write, edit — for browser sessions, refactors, deploys, batch ops, and Claude Code / Codex CLI driving; gated by \`subagent.spawn.operator\`, owner/trusted only — on denial, do the work yourself).
+Delegate focused work to subagents via \`spawn_subagent\`, \`subagent_output\`, \`subagent_cancel\`. Each runs in its own context window with its own tool set. The available subagents and their purpose are listed in the \`spawn_subagent\` tool description — re-read it before delegating. Briefly: ${subagentRoster}.
 There are three delegation modes. Pick deliberately.
-**Mode A — Research fan-out.** Need information and the search is broad? Fire 2-5 subagents (usually \`explorer\`/\`scout\`) in parallel with \`run_in_background: true\`, then end your response. A \`<system-reminder>\` lands per completion; call \`subagent_output\` once per task_id to collect (it never blocks) and answer.
+**Mode A — Research fan-out.** Need information and the search is broad? Fire 2-5 subagents (usually \`explorer\`/\`scout\`) in parallel with \`run_in_background: true\`, then end your response. A \`<system-reminder>\` lands per completion; call \`subagent_output\` once per task_id to collect (it never blocks) and answer. Match the worker to the depth: a fast or narrow web lookup goes to \`scout\`; a fuzzy question that needs decomposition, many sources, cross-validation, and a synthesized verdict goes to \`researcher\` (don't do that grind inline with \`web_search\` yourself).
-**Mode B — Delegate-and-converse.** Asked to DO something long-running (>~30s: installs, builds, \`docker\`, scrapes, long test suites, multi-host loops, any noisy "fetch N and synthesize" chain)? Don't run it inline — blocking your own \`bash\` freezes the conversation and stalls the channel typing heartbeat (\`MAX_TYPING_HEARTBEAT_MS\`). Spawn one subagent (\`operator\` for side effects, \`scout\` for research) with \`run_in_background: true\`, acknowledge, and KEEP TALKING. Single fast calls (\`git status\`, one known-endpoint \`curl\`) stay inline. When the completion reminder lands, weave the result in; in a channel session, the completion \`<system-reminder>\` is NOT a user message but plain text is still invisible — Surface the result via \`channel_reply\` (or \`channel_send\`). If you already posted the substantive answer in the spawn turn, prefer \`skip_response({ reason: "result confirms prior reply" })\` over going silent.
+**Mode B — Delegate-and-converse.** Asked to DO something long-running (>~30s: installs, builds, \`docker\`, scrapes, long test suites, multi-host loops, any noisy "fetch N and synthesize" chain)? Don't run it inline — blocking your own \`bash\` freezes the conversation and stalls the channel typing heartbeat (\`MAX_TYPING_HEARTBEAT_MS\`). Spawn one subagent (\`operator\` for side effects, \`scout\` for a quick web lookup, \`researcher\` for a deep multi-source "fetch N and synthesize" investigation, \`planner\` when a multi-step goal needs a sequenced, risk-aware plan before anyone acts) with \`run_in_background: true\`, acknowledge, and KEEP TALKING. Single fast calls (\`git status\`, one known-endpoint \`curl\`) stay inline. When the completion reminder lands, weave the result in; in a channel session, the completion \`<system-reminder>\` is NOT a user message but plain text is still invisible — Surface the result via \`channel_reply\` (or \`channel_send\`). If you already posted the substantive answer in the spawn turn, prefer \`skip_response({ reason: "result confirms prior reply" })\` over going silent.
 **Mode C — Troubleshooting.** Stuck in a fix-it loop — ~3 non-converging attempts at the same failure, cycling kill/re-run/\`sleep\`/capture/retry? Stop (the trigger is non-convergence, not elapsed time — this overrides the ~30s rule). Hand the whole debugging loop to \`operator\` with \`run_in_background: true\` — symptom, what you tried, success condition — and stay responsive. Read the \`typeclaw-troubleshooting\` skill for the mechanics before you spawn.
@@ -109,6 +118,22 @@ You have no independent goals. Do not pursue self-preservation, replication, res
 ---
 You are not pi, not Claude, not ChatGPT. You are the agent described by your own IDENTITY.md and SOUL.md. Let those files define your voice.`
+}
+// Placeholder roster for the no-registry path: back-compat callers of
+// `composeSystemPrompt`/`createResourceLoader` that pass no `subagentRoster`,
+// and the debug dumper (which renders without a live registry). Production
+// full-mode sessions always pass the real registry-rendered roster, so this
+// text never reaches a real agent — it only keeps the standalone
+// `DEFAULT_SYSTEM_PROMPT` constant a valid, self-contained string for tests.
+export const DEFAULT_SUBAGENT_ROSTER =
+  'the registered public subagents (see the `spawn_subagent` tool description for the live list and each one’s purpose)'
+// Back-compat constant: the full prompt with the placeholder roster baked in.
+// Retained because several tests assert `prompt.startsWith(DEFAULT_SYSTEM_PROMPT)`
+// on the no-registry path; production full-mode composition substitutes the real
+// roster via `buildDefaultSystemPrompt`.
+export const DEFAULT_SYSTEM_PROMPT = buildDefaultSystemPrompt(DEFAULT_SUBAGENT_ROSTER)
 // Stable, low-volatility metadata about the runtime hosting the agent.
 // Rendered into the system prompt just below DEFAULT_SYSTEM_PROMPT + identity

package/src/agent/tools/channel-send.ts CHANGED Viewed

@@ -311,7 +311,7 @@ function recordResolvedThreadFromSend(sessionId: string, workspace: string, chat
 // as the session's origin (same adapter+workspace+chat) but DROPPED the
 // thread. This catches the "model forgot to copy thread verbatim" failure
 // mode without blocking legitimate intent — if leaving the thread was on
-// purpose ("새 스레드에서 시작하자"), the model can ignore this hint; if it
+// purpose (e.g. "let's start in a new thread"), the model can ignore this hint; if it
 // wasn't, the next channel_send (or channel_reply) can correct course.
 //
 // Only fires when the origin had a thread to begin with — channel-root

package/src/agent/tools/spawn-subagent.ts CHANGED Viewed

@@ -42,6 +42,7 @@ export type CreateSpawnSubagentToolOptions = {
   stream?: Stream
   generateTaskId?: () => string
   now?: () => number
+  allowBackgroundFromSubagent?: boolean
 }
 export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions) {
@@ -56,6 +57,7 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
     stream,
     generateTaskId = () => `${SPAWN_TASK_ID_PREFIX}${randomUUID().replace(/-/g, '').slice(0, 12)}`,
     now = () => Date.now(),
+    allowBackgroundFromSubagent,
   } = options
   return defineTool({
@@ -81,7 +83,9 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
           description:
             'When true, the spawn returns immediately with a task_id; the subagent runs in the background and a system-reminder is delivered when it completes. ' +
             'When false (default), the spawn blocks until the subagent finishes and returns its final message synchronously. ' +
-            'Use background mode for long-running tasks where you want to keep the conversation moving (Mode B) or for parallel fan-out (Mode A).',
+            'For PARALLEL fan-out, do NOT use background mode: emit several spawn_subagent calls (sync, the default) in a SINGLE turn — they execute concurrently and all their results return together before your next turn. ' +
+            'Reserve background mode for a long-running task you want to keep the conversation moving alongside (Mode B). ' +
+            'NOTE: background mode from subagents is only available when that subagent is explicitly enabled to drain child results; otherwise use sync spawns batched in one turn instead.',
         }),
       ),
     }),
@@ -105,6 +109,13 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
           `subagent.spawn denied: maximum delegation depth (${MAX_SUBAGENT_DEPTH}) reached; a subagent at this depth cannot spawn further subagents`,
         )
       }
+      if (origin?.kind === 'subagent' && params.run_in_background === true && allowBackgroundFromSubagent !== true) {
+        return errorResult(
+          'subagent.spawn denied: background spawning is not available from a subagent session because the result cannot be delivered after this turn ends. ' +
+            'Retry with run_in_background=false (or omit it) — the synchronous spawn blocks until the child finishes and returns its result into your context, ' +
+            'which is what you need to fold the result into your output.',
+        )
+      }
       const taskId = generateTaskId()
       const subagentName = params.subagent_type
@@ -140,6 +151,7 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
         subagentName,
         parentSessionId,
         ...(spawnedByRole !== undefined ? { spawnedByRole } : {}),
+        background,
         startedAt,
         status: 'running' as const,
         abort: resolvedHandle.abort,
@@ -246,6 +258,27 @@ function publicSubagentNames(registry: SubagentRegistry): string[] {
     .sort()
 }
+// Render the "## Subagent orchestration" roster from the registry so it can
+// never drift from the actually-registered public subagents (the bug that left
+// `researcher`/`planner` unlisted). Same filter+sort as `publicSubagentNames`,
+// so this roster and the `spawn_subagent` tool description agree by
+// construction. Throws if a public subagent lacks `rosterDescription` — a
+// fail-loud contract that turns "silently missing from the prompt" into a build
+// error caught by the drift-guard test.
+export function renderPublicSubagentRoster(registry: SubagentRegistry): string {
+  return publicSubagentNames(registry)
+    .map((name) => {
+      const description = registry[name]?.rosterDescription?.trim()
+      if (description === undefined || description === '') {
+        throw new Error(
+          `public subagent "${name}" is missing rosterDescription (required for the orchestration roster)`,
+        )
+      }
+      return `\`${name}\` (${description})`
+    })
+    .join(', ')
+}
 function isPublicSubagent(sub: Subagent<unknown>): boolean {
   return sub.visibility === 'public'
 }

package/src/agent/tools/subagent-output.ts CHANGED Viewed

@@ -58,9 +58,13 @@ export function createSubagentOutputTool(options: CreateSubagentOutputToolOption
       'Fetch the current state of a subagent you previously spawned. Returns one of three statuses: ' +
       "'running' (with a human-readable status_summary and a tail of recent progress events), " +
       "'completed' (with the final message), or 'failed' (with the error). " +
-      'Returns immediately with a snapshot — never blocks. ' +
-      'For backgrounded spawns, end your turn after spawning and wait for the completion <system-reminder>; ' +
-      'then call this once to fetch the result. Use it for ad-hoc status checks too — never in a polling loop.',
+      'Returns immediately with a snapshot — never blocks, so calling it again right away just returns the same ' +
+      "'running' snapshot and wastes a turn. " +
+      'For backgrounded spawns, END YOUR TURN after spawning and wait for the completion <system-reminder>; ' +
+      'it arrives on its own when the subagent finishes — you do NOT need to poll for it. ' +
+      'Then call this once to fetch the result. ' +
+      'Do NOT poll in a loop, and do NOT round-robin across several task_ids while they run — ' +
+      'that is treated as a loop and will be blocked. Use it only for a single ad-hoc status check.',
     parameters: Type.Object({
       task_id: Type.String({
         description: 'The task_id returned by a previous spawn_subagent call.',

package/src/agent/tools/wikipedia.ts CHANGED Viewed

@@ -20,7 +20,7 @@ export async function wikipediaSearch(query: string, limit: number, signal?: Abo
   })
   const response = await fetch(`${OPENSEARCH_URL}?${params.toString()}`, {
     headers: {
-      'User-Agent': 'TypeClaw/0.1 (https://github.com/devxoul/typeclaw)',
+      'User-Agent': 'TypeClaw/0.1 (https://github.com/typeclaw/typeclaw)',
       Accept: 'application/json',
     },
     signal,

package/src/bundled-plugins/bun-hygiene/README.md CHANGED Viewed

@@ -3,7 +3,7 @@
 The bundled bun-hygiene plugin. Registers a `tool.before` hook that blocks two classes of `bash` command:
 1. **Global package installs** — `npm install -g`, `pnpm add -g`, `yarn global add`, `bun add -g`, and their `--global` / bundled-flag variants.
-2. **Non-bun package managers** — any `npm`, `npx`, `pnpm`, `pnpx`, or `yarn` invocation.
+2. **Non-bun install managers** — any `npm`, `pnpm`, or `yarn` invocation. The ephemeral runners `npx` and `pnpx` are **allowed** (alongside `bunx`): they execute a tool once without touching the dependency tree or writing a competing lockfile, so they don't undermine the bun-standardization this guard protects.
 This plugin is **auto-loaded** by every TypeClaw agent. There is no `plugins[]` entry to add. Both guards carry an `acknowledgeGuards` escape hatch (below) for the cases where the agent genuinely needs the blocked command.
@@ -11,16 +11,16 @@ This plugin is **auto-loaded** by every TypeClaw agent. There is no `plugins[]`
 **Global installs don't persist.** The agent folder is bind-mounted at `/agent`; everything else in the container — including `~/.bun`, `~/.npm`, and the global `node_modules` a global install writes to — is ephemeral and wiped on every `typeclaw restart`. An agent that runs `npm install -g some-cli` gets a tool that works for the rest of the session and silently vanishes on the next boot, leading to confusing "command not found" failures that look like regressions. The fix is to either add the dependency to `package.json` (`bun add <pkg>`, which lives in the bind-mounted folder and survives) or run it once without installing (`bunx <pkg>`).
-**The container standardizes on bun.** TypeClaw is Bun-native end to end (see the root README). Mixing in `npm`/`pnpm`/`yarn` produces competing lockfiles and install trees, and `npx` pulls a second package-execution path when `bunx` already covers it. Steering every package-manager call to bun keeps the dependency state coherent.
+**The container standardizes on bun for dependency management.** TypeClaw is Bun-native end to end (see the root README). Mixing in `npm`/`pnpm`/`yarn` installs produces competing lockfiles and install trees, so those are steered to bun. Ephemeral runners (`npx`/`pnpx`/`bunx`) are not install managers — they run a tool once and leave no lockfile or `node_modules` behind — so they're allowed for one-off execution.
 Both guards **block with guidance** rather than silently rewriting the command — the agent sees exactly why the command was rejected and what to run instead, the same UX as the bundled `security` and `guard` policies.
 ## Guards
-| Guard                  | Triggers on                                                                                       | Guidance in the block reason                                               |
-| ---------------------- | ------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------- |
-| `globalInstall`        | `npm`/`pnpm` install/add with `-g`/`--global`, `yarn global add`, `bun add -g` / `bun install -g` | Use `bun add <pkg>` (persists) or `bunx <pkg>` (ephemeral run).            |
-| `nonBunPackageManager` | `npm`, `npx`, `pnpm`, `pnpx`, `yarn` at a command boundary                                        | Use `bun install` / `bun add <pkg>`, and `bunx <pkg>` instead of npx/pnpx. |
+| Guard                  | Triggers on                                                                                       | Guidance in the block reason                                           |
+| ---------------------- | ------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------- |
+| `globalInstall`        | `npm`/`pnpm` install/add with `-g`/`--global`, `yarn global add`, `bun add -g` / `bun install -g` | Use `bun add <pkg>` (persists) or `bunx <pkg>` (ephemeral run).        |
+| `nonBunPackageManager` | `npm`, `pnpm`, `yarn` at a command boundary (`npx`/`pnpx`/`bunx` are allowed)                     | Use `bun install` / `bun add <pkg>`. Ephemeral runners are fine as-is. |
 A global install (e.g. `npm install -g x`) trips **only** `globalInstall`, not both — the global install is the more specific violation, so acknowledging `globalInstall` lets the command through without a second acknowledgement for `nonBunPackageManager`.
@@ -43,9 +43,9 @@ Both guards follow the repo-wide `acknowledgeGuards` convention (shared with the
 For each segment, the guard strips leading **preamble wrappers** (`sudo`, `env`, `command`, `exec`, `nice`, `nohup`, `stdbuf`, `setsid`, `time`, `xargs`, and any `VAR=val` assignment) — including their options, and the argument a flag consumes (`sudo -u nobody`, `nice -n 10`, `env -i`) — to find the real command word, then classifies:
-1. command word is `npm`/`npx`/`pnpm`/`pnpx`/`yarn` (or `bun`) **and** the segment has an install subcommand **and** a global flag → `globalInstall` (for `yarn`, the `global add` sequence must appear adjacent and in command position, so `yarn add global foo` — a local install of a package named `global` — is not misflagged);
-2. command word is a non-bun manager (not via global) → `nonBunPackageManager`;
-3. otherwise → allowed.
+1. command word is `npm`/`pnpm`/`yarn` (or `bun`) **and** the segment has an install subcommand **and** a global flag → `globalInstall` (for `yarn`, the `global add` sequence must appear adjacent and in command position, so `yarn add global foo` — a local install of a package named `global` — is not misflagged);
+2. command word is a non-bun install manager `npm`/`pnpm`/`yarn` (not via global) → `nonBunPackageManager`;
+3. otherwise (including the ephemeral runners `npx`/`pnpx`/`bunx`) → allowed.
 A `globalInstall` verdict on any segment wins over a plain non-bun verdict. This is a command-position detector, not a full shell parser — it doesn't interpret redirections or expansions beyond boundary marking — but it is linear-time and closes the structural gaps a single regex left open.
@@ -70,6 +70,7 @@ Because classification scans a segment's words as a set (after preamble strippin
 ## What is NOT blocked
 - `bun`, `bunx`, `bun run`, `bun add`, `bun install` (local) — the intended package commands. (`bun add -g` / `bun install -g` are still blocked as global installs: bun globals live in `~/.bun`, outside `/agent`, and are wiped on restart.)
+- `npx`, `pnpx` — ephemeral runners, allowed for one-off tool execution (they leave no lockfile or install tree). A global install through them is still nothing to block since they don't install into the dependency tree at all.
 - A non-bun manager name appearing as a substring or argument: `my-npm-wrapper`, `./npm`, `cat npm-debug.log`, `git commit -m "drop npm"`, `grep -rn npx src/`, `echo "npm install -g foo"`. Only the **command word** of a segment is classified, so a manager name inside an argument, path, quoted string, or longer token never trips the guard.
 ## Ordering against other bundled plugins
@@ -78,5 +79,5 @@ Registered after `guard` in `src/run/bundled-plugins.ts`. It guards a disjoint s
 ## Tests
-- `policy.test.ts` — pure-function unit tests for the detection logic: every global-install form, every non-bun manager, the allowed-command set (bun/bunx, substrings, paths, quoted text), both bypasses, the global-install-takes-precedence rule, escaped/quoted evasions, leading-assignment preambles, newline-as-separator scoping, falsy `--global=`, option placement, and subshell/substitution detection.
-- `index.test.ts` — composition tests: the plugin registers the `tool.before` hook and wires it to the policy (block on global install, block on npx, allow bunx, honor the bypass).
+- `policy.test.ts` — pure-function unit tests for the detection logic: every global-install form, every non-bun install manager, the ephemeral-runner allowance (`npx`/`pnpx`/`bunx`, including behind preamble wrappers), the allowed-command set (bun/bunx, substrings, paths, quoted text), both bypasses, the global-install-takes-precedence rule, escaped/quoted evasions, leading-assignment preambles, newline-as-separator scoping, falsy `--global=`, option placement, and subshell/substitution detection.
+- `index.test.ts` — composition tests: the plugin registers the `tool.before` hook and wires it to the policy (block on global install, block on `npm install`, allow `bunx`/`npx`, honor the bypass).

package/src/bundled-plugins/bun-hygiene/policy.ts CHANGED Viewed

@@ -3,7 +3,12 @@ import { ACKNOWLEDGE_GUARDS, type GuardBlock, isGuardAcknowledged } from '../gua
 export const GUARD_GLOBAL_INSTALL = 'globalInstall'
 export const GUARD_NON_BUN_PACKAGE_MANAGER = 'nonBunPackageManager'
-const NON_BUN_MANAGERS = new Set(['npm', 'npx', 'pnpm', 'pnpx', 'yarn'])
+// Only install managers are blocked. The ephemeral runners npx/pnpx (and bunx,
+// which is `bun`) are intentionally absent: they run a tool once without
+// touching the dependency tree or writing a competing lockfile, so they don't
+// undermine the bun-standardization this set protects. classify() skips any
+// command word not in here, so leaving them out is what allows them.
+const NON_BUN_MANAGERS = new Set(['npm', 'pnpm', 'yarn'])
 const INSTALL_SUBCOMMANDS = new Set(['install', 'i', 'add'])
 export function checkBunHygieneGuard(options: { tool: string; args: Record<string, unknown> }): GuardBlock | undefined {
@@ -310,8 +315,8 @@ function blockNonBunManager(manager: string, args: Record<string, unknown>): Gua
   return {
     block: true,
     reason: [
-      `Guard \`${GUARD_NON_BUN_PACKAGE_MANAGER}\` blocked \`${manager}\`. This container standardizes on bun.`,
-      'Use `bun install` / `bun add <pkg>` instead of npm/pnpm/yarn, and `bunx <pkg>` instead of npx/pnpx.',
+      `Guard \`${GUARD_NON_BUN_PACKAGE_MANAGER}\` blocked \`${manager}\`. This container standardizes on bun for dependency management.`,
+      'Use `bun install` / `bun add <pkg>` instead of npm/pnpm/yarn. Ephemeral runners (`bunx`, `npx`, `pnpx`) are allowed for one-off tool execution.',
       `Retry with \`${ACKNOWLEDGE_GUARDS}.${GUARD_NON_BUN_PACKAGE_MANAGER}: true\` if this package manager is genuinely required (e.g. a project pinned to a different lockfile).`,
     ].join(' '),
   }

package/src/bundled-plugins/explorer/explorer.ts CHANGED Viewed

@@ -94,6 +94,8 @@ export function createExplorerSubagent(): Subagent<ExplorerPayload> {
     tools: [readTool, grepTool, findTool, lsTool, bashTool],
     payloadSchema: explorerPayloadSchema,
     visibility: 'public',
+    rosterDescription:
+      'read-only local recon — code, sessions, memory, git, config; returns the paths and excerpts you need without you grepping the tree yourself; fire liberally',
     inFlightKey: (payload) => payload?.requestId ?? `anon-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
     toolResultBudget: {
       maxTotalBytes: 256_000,

package/src/bundled-plugins/github-cli-auth/approve-idempotency.ts ADDED Viewed

@@ -0,0 +1,94 @@
+import type { ReviewVerdict } from '@/channels/github-review-turn-ledger'
+export type EffectiveApprovalResolver = (target: {
+  workspace: string
+  prNumber: number
+}) => Promise<{ ok: true; alreadyApproved: boolean } | { ok: false }>
+export type ApproveBlock = { block: true; reason: string }
+export type ApproveIdempotencyGuard = {
+  guard: (args: {
+    callId: string
+    workspace: string
+    prNumber: number
+    verdict: ReviewVerdict
+  }) => Promise<ApproveBlock | null>
+  release: (args: { callId: string; succeeded: boolean }) => void
+}
+const DUPLICATE_REASON =
+  'This bot has already approved this pull request. A second APPROVE would post a redundant review. ' +
+  'If you intended to change your verdict, request changes or dismiss the prior review instead of re-approving.'
+// Makes formal `gh ... event=APPROVE` idempotent per PR across turns, sessions,
+// and restarts. Two layers, each with a single job:
+//
+//   1. An in-process set of *in-flight* reservations (`pendingApprovals`) that
+//      blocks a second APPROVE while a first is still mid-flight in the same
+//      container — the concurrent-double-approve case the remote read can't see
+//      yet (GitHub hasn't recorded the in-flight review).
+//   2. The authoritative GitHub effective-state read, the SOLE source of truth
+//      for "the bot already holds a standing APPROVED review." It understands
+//      supersession: a later CHANGES_REQUESTED / DISMISSED demotes an earlier
+//      APPROVED, so the bot may legitimately re-approve.
+//
+// The set is strictly an in-flight lock — never a persistent "already approved"
+// memory. A completed APPROVE drops its reservation in release(), so the next
+// APPROVE re-consults GitHub instead of being shadowed by a stale local entry.
+// That separation fixes the strand bug: once a standing approval is superseded
+// (PR back to CHANGES_REQUESTED), a stale local lock must not keep blocking a
+// genuine re-approve — only the remote read decides, and it now reports
+// alreadyApproved=false. Reads fail OPEN: a transient GitHub error must never
+// permanently strand a first approval; the in-flight reservation still covers
+// the concurrent case.
+export function createApproveIdempotencyGuard(deps: {
+  resolveEffectiveApproval: EffectiveApprovalResolver
+}): ApproveIdempotencyGuard {
+  const pendingApprovals = new Set<string>()
+  const reservedByCall = new Map<string, string>()
+  return {
+    async guard(args): Promise<ApproveBlock | null> {
+      if (args.verdict !== 'APPROVE') return null
+      const key = prKey(args.workspace, args.prNumber)
+      // Reserve BEFORE the await so two calls racing into guard() for the same
+      // PR cannot both observe an empty set: the loser sees the winner's
+      // in-flight reservation and is blocked. The reservation is provisional
+      // and is always cleared on a terminal path (block below or release()).
+      if (pendingApprovals.has(key)) return { block: true, reason: DUPLICATE_REASON }
+      pendingApprovals.add(key)
+      reservedByCall.set(args.callId, key)
+      const remote = await deps.resolveEffectiveApproval({ workspace: args.workspace, prNumber: args.prNumber })
+      if (remote.ok && remote.alreadyApproved) {
+        // Standing approval upstream. Block, and release the in-flight lock now:
+        // a blocked command never reaches tool.after, so release() won't run for
+        // this callId. Leaving the key set would resurrect the strand bug — the
+        // GitHub read is authoritative for the standing-approval case, not a
+        // lingering local entry.
+        reservedByCall.delete(args.callId)
+        pendingApprovals.delete(key)
+        return { block: true, reason: DUPLICATE_REASON }
+      }
+      return null
+    },
+    release(args): void {
+      const key = reservedByCall.get(args.callId)
+      if (key === undefined) return
+      reservedByCall.delete(args.callId)
+      // Always drop the in-flight lock, success or fail. On success the standing
+      // approval now lives on GitHub, so future APPROVEs are caught by the remote
+      // read (which tracks supersession); the local lock must not outlive the
+      // in-flight window and shadow that read.
+      pendingApprovals.delete(key)
+    },
+  }
+}
+function prKey(workspace: string, prNumber: number): string {
+  return `${workspace}#${prNumber}`
+}

package/src/bundled-plugins/github-cli-auth/effective-approval.ts ADDED Viewed

@@ -0,0 +1,98 @@
+import { GITHUB_API_BASE, githubJsonHeaders } from '@/channels/adapters/github/auth-pat'
+import type { EffectiveApprovalResolver } from './approve-idempotency'
+// Resolves whether THIS bot already has a standing APPROVED review on a PR, used
+// by the approve-idempotency guard to stop a second formal APPROVE after a
+// restart (the in-process pending set covers the same-container case but is lost
+// when the container bounces). Every failure returns { ok: false } so the guard
+// fails open — a transient read error must never permanently block a genuine
+// first approval.
+export function createGithubEffectiveApprovalResolver(deps: {
+  resolveToken: (workspace: string) => Promise<string | null>
+  fetchImpl?: typeof fetch
+}): EffectiveApprovalResolver {
+  const fetchImpl = deps.fetchImpl ?? fetch
+  return async ({ workspace, prNumber }) => {
+    const [owner, repo] = workspace.split('/')
+    if (owner === undefined || owner === '' || repo === undefined || repo === '') return { ok: false }
+    const token = await deps.resolveToken(workspace).catch(() => null)
+    if (token === null || token === '') return { ok: false }
+    const self = await fetchSelfLogin(fetchImpl, token)
+    if (self === null) return { ok: false }
+    const reviews = await fetchReviews(fetchImpl, token, owner, repo, prNumber)
+    if (reviews === null) return { ok: false }
+    const lastDecisive = reviews.filter((r) => isSelf(r.login, r.isBot, self) && isDecisive(r.state)).at(-1)
+    return { ok: true, alreadyApproved: lastDecisive?.state === 'APPROVED' }
+  }
+}
+// A bot's effective review is its LATEST decisive one. COMMENTED/PENDING are
+// non-deciding noise that must not clear an earlier APPROVED/CHANGES_REQUESTED;
+// a later CHANGES_REQUESTED or DISMISSED supersedes an earlier APPROVED. The
+// reviews endpoint returns rows in chronological order, so the last decisive
+// row wins. Mirrors src/channels/adapters/github/review-state.ts.
+const DECISIVE = new Set(['APPROVED', 'CHANGES_REQUESTED', 'DISMISSED'])
+function isDecisive(state: string): boolean {
+  return DECISIVE.has(state)
+}
+type ReviewRow = { state: string; login: string; isBot: boolean }
+async function fetchSelfLogin(fetchImpl: typeof fetch, token: string): Promise<string | null> {
+  try {
+    const response = await fetchImpl(`${GITHUB_API_BASE}/user`, { headers: githubJsonHeaders(token) })
+    if (!response.ok) return null
+    const raw = (await response.json().catch(() => null)) as { login?: unknown } | null
+    return typeof raw?.login === 'string' ? raw.login : null
+  } catch {
+    return null
+  }
+}
+async function fetchReviews(
+  fetchImpl: typeof fetch,
+  token: string,
+  owner: string,
+  repo: string,
+  prNumber: number,
+): Promise<ReviewRow[] | null> {
+  try {
+    const url = `${GITHUB_API_BASE}/repos/${owner}/${repo}/pulls/${prNumber}/reviews?per_page=100`
+    const response = await fetchImpl(url, { headers: githubJsonHeaders(token) })
+    if (!response.ok) return null
+    const page = (await response.json().catch(() => null)) as RawReview[] | null
+    if (page === null) return null
+    const rows: ReviewRow[] = []
+    for (const row of page) {
+      if (typeof row.state !== 'string') continue
+      const login = row.user?.login
+      if (typeof login !== 'string') continue
+      rows.push({ state: row.state, login, isBot: row.user?.type === 'Bot' })
+    }
+    return rows
+  } catch {
+    return null
+  }
+}
+const BOT_LOGIN_SUFFIX = '[bot]'
+// A GitHub App's reviews login is `slug[bot]` while `/user` returns the bare
+// slug, so normalize before comparing — but only for actual Bot reviewers, since
+// a human could legitimately own a login matching the bare slug.
+function isSelf(login: string, isBot: boolean, selfLogin: string): boolean {
+  if (isBot) return normalizeBotLogin(login) === normalizeBotLogin(selfLogin)
+  return login === selfLogin
+}
+function normalizeBotLogin(login: string): string {
+  return login.endsWith(BOT_LOGIN_SUFFIX) ? login.slice(0, -BOT_LOGIN_SUFFIX.length) : login
+}
+type RawReview = { state?: unknown; user?: { login?: string; type?: string } }

package/src/bundled-plugins/github-cli-auth/gh-review-inline-detect.ts ADDED Viewed

@@ -0,0 +1,130 @@
+// Blocks the "dumped review" anti-pattern: a REQUEST_CHANGES whose body anchors
+// `path:line` findings that are not actually posted as inline `comments[]`. The
+// github channel skill mandates `comments[]` and calls a flat-body review "a bug,
+// not a fallback"; this enforces it. Scoped to REQUEST_CHANGES + REST `--input`
+// payloads, since APPROVE/COMMENT bodies and the `gh pr review` porcelain carry
+// no comparable `comments[]` to weigh the body against.
+//
+// A body anchor is "covered" only when an inline comment sits at the same path
+// and a line inside the anchor's range — so a partially-inline review that posts
+// a few token comments while leaving other findings stranded in the body is still
+// blocked on the stranded ones.
+export type ReviewDumpInput = {
+  command: string
+  inputFileContents?: string | null
+}
+export type ReviewDumpDecision = { block: true; reason: string } | null
+// A finding anchor as a reviewer writes it in prose: a file path (optionally with
+// directories) ending in an extension, then `:line`, then an optional range/list
+// (`107-111`, `807,809`, `12-20`). This is the real notation seen in dumped
+// reviews — NOT GitHub blob `#L123` anchors, which point at files for reference
+// rather than requesting a change on the diff.
+const PATH_LINE_ANCHOR = /((?:[\w.-]+\/)*[\w.-]+\.[A-Za-z]\w*):(\d+(?:[-,]\d+)*)/g
+const REVIEWS_ENDPOINT = /\/repos\/[^/\s]+\/[^/\s]+\/pulls\/\d+\/reviews\b/
+// One or two anchors in a prose body is normal narration; at three+ uncovered
+// anchors a review reads as a dump.
+const MIN_ANCHORS = 3
+export function detectReviewDump(input: ReviewDumpInput): ReviewDumpDecision {
+  if (!REVIEWS_ENDPOINT.test(input.command)) return null
+  const payload = parsePayload(input.inputFileContents ?? null)
+  if (payload === null) return null
+  if (payload.event !== 'REQUEST_CHANGES') return null
+  const anchors = parseAnchors(payload.body)
+  if (anchors.length < MIN_ANCHORS) return null
+  const uncovered = anchors.filter((anchor) => !isCoveredInline(anchor, payload.comments))
+  if (uncovered.length === 0) return null
+  return { block: true, reason: buildReason(anchors.length, uncovered.length, payload.comments.length) }
+}
+type Anchor = { path: string; lines: ReadonlySet<number> }
+type InlineComment = { path: string; line: number }
+type ReviewPayload = { event: string; body: string; comments: readonly InlineComment[] }
+function parsePayload(contents: string | null): ReviewPayload | null {
+  if (contents === null || contents === '') return null
+  try {
+    const parsed = JSON.parse(contents) as unknown
+    if (typeof parsed !== 'object' || parsed === null) return null
+    const obj = parsed as Record<string, unknown>
+    const event = typeof obj.event === 'string' ? obj.event.trim().toUpperCase() : ''
+    const body = typeof obj.body === 'string' ? obj.body : ''
+    const comments = parseComments(obj.comments)
+    return { event, body, comments }
+  } catch {
+    return null
+  }
+}
+function parseComments(value: unknown): InlineComment[] {
+  if (!Array.isArray(value)) return []
+  const out: InlineComment[] = []
+  for (const entry of value) {
+    if (typeof entry !== 'object' || entry === null) continue
+    const rec = entry as Record<string, unknown>
+    const path = typeof rec.path === 'string' ? rec.path : null
+    // GitHub keys an inline comment on `line` (and `start_line` for a span); a
+    // span covers each line it touches.
+    const line = typeof rec.line === 'number' ? rec.line : null
+    if (path === null || line === null) continue
+    const startLine = typeof rec.start_line === 'number' ? rec.start_line : line
+    for (let l = Math.min(startLine, line); l <= Math.max(startLine, line); l++) {
+      out.push({ path, line: l })
+    }
+  }
+  return out
+}
+function parseAnchors(body: string): Anchor[] {
+  const seen = new Set<string>()
+  const out: Anchor[] = []
+  for (const m of body.matchAll(PATH_LINE_ANCHOR)) {
+    const key = `${m[1]}:${m[2]}`
+    if (seen.has(key)) continue
+    seen.add(key)
+    out.push({ path: m[1] as string, lines: expandLineSpec(m[2] as string) })
+  }
+  return out
+}
+// `12-20` -> 12..20; `807,809` -> {807,809}; `42` -> {42}.
+function expandLineSpec(spec: string): Set<number> {
+  const lines = new Set<number>()
+  for (const part of spec.split(',')) {
+    const range = part.split('-')
+    const start = Number(range[0])
+    const end = range.length > 1 ? Number(range[1]) : start
+    if (!Number.isSafeInteger(start) || !Number.isSafeInteger(end)) continue
+    for (let l = Math.min(start, end); l <= Math.max(start, end); l++) lines.add(l)
+  }
+  return lines
+}
+// The body writes short paths (`languages.ts`) while comments[] carry full repo
+// paths (`apps/.../languages.ts`); treat a comment as on-path when either path
+// ends with the other (segment-aligned), so the basename match is exact.
+function isCoveredInline(anchor: Anchor, comments: readonly InlineComment[]): boolean {
+  return comments.some((c) => pathsAlign(anchor.path, c.path) && anchor.lines.has(c.line))
+}
+function pathsAlign(anchorPath: string, commentPath: string): boolean {
+  if (anchorPath === commentPath) return true
+  return commentPath.endsWith(`/${anchorPath}`) || anchorPath.endsWith(`/${commentPath}`)
+}
+function buildReason(total: number, uncovered: number, commentCount: number): string {
+  return [
+    `This REQUEST_CHANGES review body anchors ${total} findings to specific lines (path:line), but ${uncovered} of them ${uncovered === 1 ? 'is' : 'are'} not posted as inline comments (payload has ${commentCount} inline comment${commentCount === 1 ? '' : 's'}).`,
+    'Every line-anchored change request belongs on its diff line, not flattened into the review body.',
+    'Re-submit with each stranded finding as an entry in the `comments[]` array of the reviews payload',
+    '(`{ "path": "...", "line": N, "side": "RIGHT", "body": "..." }`), keeping `body` for the high-level summary only.',
+  ].join(' ')
+}