typeclaw 0.28.2 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/package.json +1 -1
  2. package/src/agent/index.ts +43 -5
  3. package/src/agent/live-subagents.ts +5 -0
  4. package/src/agent/loop-guard.ts +112 -26
  5. package/src/agent/plugin-tools.ts +167 -50
  6. package/src/agent/session-origin.ts +3 -3
  7. package/src/agent/subagent-drain.ts +150 -0
  8. package/src/agent/subagents.ts +41 -3
  9. package/src/agent/system-prompt.ts +29 -4
  10. package/src/agent/tools/channel-send.ts +1 -1
  11. package/src/agent/tools/spawn-subagent.ts +34 -1
  12. package/src/agent/tools/subagent-output.ts +7 -3
  13. package/src/agent/tools/wikipedia.ts +1 -1
  14. package/src/bundled-plugins/bun-hygiene/README.md +12 -11
  15. package/src/bundled-plugins/bun-hygiene/policy.ts +8 -3
  16. package/src/bundled-plugins/explorer/explorer.ts +2 -0
  17. package/src/bundled-plugins/github-cli-auth/approve-idempotency.ts +94 -0
  18. package/src/bundled-plugins/github-cli-auth/effective-approval.ts +98 -0
  19. package/src/bundled-plugins/github-cli-auth/gh-review-inline-detect.ts +130 -0
  20. package/src/bundled-plugins/github-cli-auth/index.ts +27 -2
  21. package/src/bundled-plugins/github-cli-auth/review-recorder.ts +12 -4
  22. package/src/bundled-plugins/memory/memory-logger.ts +3 -3
  23. package/src/bundled-plugins/operator/operator.ts +2 -0
  24. package/src/bundled-plugins/planner/index.ts +11 -0
  25. package/src/bundled-plugins/planner/planner.ts +283 -0
  26. package/src/bundled-plugins/planner/skills/general.ts +65 -0
  27. package/src/bundled-plugins/planner/skills/project.ts +69 -0
  28. package/src/bundled-plugins/researcher/index.ts +11 -0
  29. package/src/bundled-plugins/researcher/researcher.ts +233 -0
  30. package/src/bundled-plugins/researcher/skills/general.ts +105 -0
  31. package/src/bundled-plugins/researcher/write-report.ts +107 -0
  32. package/src/bundled-plugins/reviewer/reviewer.ts +28 -9
  33. package/src/bundled-plugins/reviewer/skills/data-review.ts +77 -0
  34. package/src/bundled-plugins/reviewer/skills/doc-review.ts +79 -0
  35. package/src/bundled-plugins/reviewer/skills/plan-review.ts +64 -0
  36. package/src/bundled-plugins/reviewer/skills/security-audit.ts +70 -0
  37. package/src/bundled-plugins/reviewer/skills/writing-review.ts +63 -0
  38. package/src/bundled-plugins/scout/scout.ts +2 -0
  39. package/src/bundled-plugins/security/policies/prompt-injection.ts +8 -4
  40. package/src/bundled-plugins/security/policies/secret-exfil-bash.ts +3 -2
  41. package/src/channels/adapters/discord-bot.ts +38 -11
  42. package/src/channels/adapters/github/inbound.ts +68 -4
  43. package/src/channels/adapters/kakaotalk-classify.ts +2 -2
  44. package/src/channels/adapters/kakaotalk.ts +2 -2
  45. package/src/channels/adapters/slack-bot-classify.ts +1 -1
  46. package/src/channels/adapters/slack-bot.ts +3 -0
  47. package/src/channels/adapters/telegram-bot.ts +3 -0
  48. package/src/channels/engagement.ts +12 -7
  49. package/src/channels/github-review-claim.ts +15 -3
  50. package/src/channels/router.ts +85 -9
  51. package/src/channels/schema.ts +1 -1
  52. package/src/channels/types.ts +6 -0
  53. package/src/cli/init.ts +13 -2
  54. package/src/cli/ui.ts +64 -0
  55. package/src/config/config.ts +21 -15
  56. package/src/container/start.ts +5 -1
  57. package/src/init/dockerfile.ts +19 -56
  58. package/src/init/hatching.ts +1 -1
  59. package/src/init/index.ts +5 -1
  60. package/src/migrations/index.ts +35 -0
  61. package/src/migrations/secrets-v1-to-v2.ts +344 -0
  62. package/src/run/bundled-plugins.ts +4 -0
  63. package/src/run/index.ts +13 -0
  64. package/src/sandbox/availability.ts +12 -0
  65. package/src/sandbox/build.ts +12 -0
  66. package/src/sandbox/index.ts +1 -1
  67. package/src/sandbox/policy.ts +8 -0
  68. package/src/server/index.ts +24 -5
  69. package/src/shared/host-locale.ts +27 -0
  70. package/src/shared/protocol.ts +1 -1
  71. package/src/shared/wordmark.ts +19 -0
  72. package/src/skills/typeclaw-config/SKILL.md +32 -32
  73. package/src/skills/typeclaw-kaomoji/SKILL.md +3 -3
  74. package/src/skills/typeclaw-tunnels/SKILL.md +3 -1
  75. package/src/tui/banner.ts +19 -0
  76. package/src/tui/format.ts +34 -0
  77. package/src/tui/index.ts +121 -22
  78. package/src/tui/theme.ts +26 -1
  79. package/src/tunnels/providers/cloudflare-named.ts +15 -4
  80. package/src/tunnels/providers/cloudflare-quick.ts +15 -4
  81. package/src/tunnels/providers/cloudflared-binary.ts +11 -0
  82. package/typeclaw.schema.json +15 -7
@@ -1,6 +1,15 @@
1
1
  import { formatLocalDateTime, formatLocalWeekday, resolveLocalTimezoneName } from '@/shared'
2
2
 
3
- export const DEFAULT_SYSTEM_PROMPT = `You are a general-purpose AI agent running inside TypeClaw.
3
+ // The orchestration roster (the `Briefly: ...` enumeration of public subagents)
4
+ // is GENERATED from the registry by `renderPublicSubagentRoster` and threaded in
5
+ // here, so a newly-registered public subagent can never be silently missing from
6
+ // the prompt — the drift that once left `researcher` and `planner` unlisted. The
7
+ // rest of the prompt is static. `DEFAULT_SUBAGENT_ROSTER` is the placeholder used
8
+ // by the no-registry path (back-compat callers, the debug dumper); production
9
+ // full-mode sessions pass the real registry-rendered roster via
10
+ // `composeSystemPrompt`'s `subagentRoster` field.
11
+ export function buildDefaultSystemPrompt(subagentRoster: string): string {
12
+ return `You are a general-purpose AI agent running inside TypeClaw.
4
13
 
5
14
  TypeClaw is domain-agnostic — your purpose is defined by \`IDENTITY.md\`, your character by \`SOUL.md\`, and your operating manual by \`AGENTS.md\`. This system prompt only describes the runtime around you.
6
15
 
@@ -80,13 +89,13 @@ Your agent folder is a git repository.
80
89
 
81
90
  ## Subagent orchestration
82
91
 
83
- Delegate focused work to subagents via \`spawn_subagent\`, \`subagent_output\`, \`subagent_cancel\`. Each runs in its own context window with its own tool set. The available subagents and their purpose are listed in the \`spawn_subagent\` tool description — re-read it before delegating. Briefly: \`explorer\` (read-only local recon — code, sessions, memory, git, config; fire liberally), \`scout\` (web research in a fresh context), \`reviewer\` (deep read-only code/PR/plan review, returns a structured verdict; it does NOT post), \`operator\` (write-capable: bash-with-side-effects, write, edit — for browser sessions, refactors, deploys, batch ops, and Claude Code / Codex CLI driving; gated by \`subagent.spawn.operator\`, owner/trusted only — on denial, do the work yourself).
92
+ Delegate focused work to subagents via \`spawn_subagent\`, \`subagent_output\`, \`subagent_cancel\`. Each runs in its own context window with its own tool set. The available subagents and their purpose are listed in the \`spawn_subagent\` tool description — re-read it before delegating. Briefly: ${subagentRoster}.
84
93
 
85
94
  There are three delegation modes. Pick deliberately.
86
95
 
87
- **Mode A — Research fan-out.** Need information and the search is broad? Fire 2-5 subagents (usually \`explorer\`/\`scout\`) in parallel with \`run_in_background: true\`, then end your response. A \`<system-reminder>\` lands per completion; call \`subagent_output\` once per task_id to collect (it never blocks) and answer.
96
+ **Mode A — Research fan-out.** Need information and the search is broad? Fire 2-5 subagents (usually \`explorer\`/\`scout\`) in parallel with \`run_in_background: true\`, then end your response. A \`<system-reminder>\` lands per completion; call \`subagent_output\` once per task_id to collect (it never blocks) and answer. Match the worker to the depth: a fast or narrow web lookup goes to \`scout\`; a fuzzy question that needs decomposition, many sources, cross-validation, and a synthesized verdict goes to \`researcher\` (don't do that grind inline with \`web_search\` yourself).
88
97
 
89
- **Mode B — Delegate-and-converse.** Asked to DO something long-running (>~30s: installs, builds, \`docker\`, scrapes, long test suites, multi-host loops, any noisy "fetch N and synthesize" chain)? Don't run it inline — blocking your own \`bash\` freezes the conversation and stalls the channel typing heartbeat (\`MAX_TYPING_HEARTBEAT_MS\`). Spawn one subagent (\`operator\` for side effects, \`scout\` for research) with \`run_in_background: true\`, acknowledge, and KEEP TALKING. Single fast calls (\`git status\`, one known-endpoint \`curl\`) stay inline. When the completion reminder lands, weave the result in; in a channel session, the completion \`<system-reminder>\` is NOT a user message but plain text is still invisible — Surface the result via \`channel_reply\` (or \`channel_send\`). If you already posted the substantive answer in the spawn turn, prefer \`skip_response({ reason: "result confirms prior reply" })\` over going silent.
98
+ **Mode B — Delegate-and-converse.** Asked to DO something long-running (>~30s: installs, builds, \`docker\`, scrapes, long test suites, multi-host loops, any noisy "fetch N and synthesize" chain)? Don't run it inline — blocking your own \`bash\` freezes the conversation and stalls the channel typing heartbeat (\`MAX_TYPING_HEARTBEAT_MS\`). Spawn one subagent (\`operator\` for side effects, \`scout\` for a quick web lookup, \`researcher\` for a deep multi-source "fetch N and synthesize" investigation, \`planner\` when a multi-step goal needs a sequenced, risk-aware plan before anyone acts) with \`run_in_background: true\`, acknowledge, and KEEP TALKING. Single fast calls (\`git status\`, one known-endpoint \`curl\`) stay inline. When the completion reminder lands, weave the result in; in a channel session, the completion \`<system-reminder>\` is NOT a user message but plain text is still invisible — Surface the result via \`channel_reply\` (or \`channel_send\`). If you already posted the substantive answer in the spawn turn, prefer \`skip_response({ reason: "result confirms prior reply" })\` over going silent.
90
99
 
91
100
  **Mode C — Troubleshooting.** Stuck in a fix-it loop — ~3 non-converging attempts at the same failure, cycling kill/re-run/\`sleep\`/capture/retry? Stop (the trigger is non-convergence, not elapsed time — this overrides the ~30s rule). Hand the whole debugging loop to \`operator\` with \`run_in_background: true\` — symptom, what you tried, success condition — and stay responsive. Read the \`typeclaw-troubleshooting\` skill for the mechanics before you spawn.
92
101
 
@@ -109,6 +118,22 @@ You have no independent goals. Do not pursue self-preservation, replication, res
109
118
  ---
110
119
 
111
120
  You are not pi, not Claude, not ChatGPT. You are the agent described by your own IDENTITY.md and SOUL.md. Let those files define your voice.`
121
+ }
122
+
123
+ // Placeholder roster for the no-registry path: back-compat callers of
124
+ // `composeSystemPrompt`/`createResourceLoader` that pass no `subagentRoster`,
125
+ // and the debug dumper (which renders without a live registry). Production
126
+ // full-mode sessions always pass the real registry-rendered roster, so this
127
+ // text never reaches a real agent — it only keeps the standalone
128
+ // `DEFAULT_SYSTEM_PROMPT` constant a valid, self-contained string for tests.
129
+ export const DEFAULT_SUBAGENT_ROSTER =
130
+ 'the registered public subagents (see the `spawn_subagent` tool description for the live list and each one’s purpose)'
131
+
132
+ // Back-compat constant: the full prompt with the placeholder roster baked in.
133
+ // Retained because several tests assert `prompt.startsWith(DEFAULT_SYSTEM_PROMPT)`
134
+ // on the no-registry path; production full-mode composition substitutes the real
135
+ // roster via `buildDefaultSystemPrompt`.
136
+ export const DEFAULT_SYSTEM_PROMPT = buildDefaultSystemPrompt(DEFAULT_SUBAGENT_ROSTER)
112
137
 
113
138
  // Stable, low-volatility metadata about the runtime hosting the agent.
114
139
  // Rendered into the system prompt just below DEFAULT_SYSTEM_PROMPT + identity
@@ -311,7 +311,7 @@ function recordResolvedThreadFromSend(sessionId: string, workspace: string, chat
311
311
  // as the session's origin (same adapter+workspace+chat) but DROPPED the
312
312
  // thread. This catches the "model forgot to copy thread verbatim" failure
313
313
  // mode without blocking legitimate intent — if leaving the thread was on
314
- // purpose (" 스레드에서 시작하자"), the model can ignore this hint; if it
314
+ // purpose (e.g. "let's start in a new thread"), the model can ignore this hint; if it
315
315
  // wasn't, the next channel_send (or channel_reply) can correct course.
316
316
  //
317
317
  // Only fires when the origin had a thread to begin with — channel-root
@@ -42,6 +42,7 @@ export type CreateSpawnSubagentToolOptions = {
42
42
  stream?: Stream
43
43
  generateTaskId?: () => string
44
44
  now?: () => number
45
+ allowBackgroundFromSubagent?: boolean
45
46
  }
46
47
 
47
48
  export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions) {
@@ -56,6 +57,7 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
56
57
  stream,
57
58
  generateTaskId = () => `${SPAWN_TASK_ID_PREFIX}${randomUUID().replace(/-/g, '').slice(0, 12)}`,
58
59
  now = () => Date.now(),
60
+ allowBackgroundFromSubagent,
59
61
  } = options
60
62
 
61
63
  return defineTool({
@@ -81,7 +83,9 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
81
83
  description:
82
84
  'When true, the spawn returns immediately with a task_id; the subagent runs in the background and a system-reminder is delivered when it completes. ' +
83
85
  'When false (default), the spawn blocks until the subagent finishes and returns its final message synchronously. ' +
84
- 'Use background mode for long-running tasks where you want to keep the conversation moving (Mode B) or for parallel fan-out (Mode A).',
86
+ 'For PARALLEL fan-out, do NOT use background mode: emit several spawn_subagent calls (sync, the default) in a SINGLE turn they execute concurrently and all their results return together before your next turn. ' +
87
+ 'Reserve background mode for a long-running task you want to keep the conversation moving alongside (Mode B). ' +
88
+ 'NOTE: background mode from subagents is only available when that subagent is explicitly enabled to drain child results; otherwise use sync spawns batched in one turn instead.',
85
89
  }),
86
90
  ),
87
91
  }),
@@ -105,6 +109,13 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
105
109
  `subagent.spawn denied: maximum delegation depth (${MAX_SUBAGENT_DEPTH}) reached; a subagent at this depth cannot spawn further subagents`,
106
110
  )
107
111
  }
112
+ if (origin?.kind === 'subagent' && params.run_in_background === true && allowBackgroundFromSubagent !== true) {
113
+ return errorResult(
114
+ 'subagent.spawn denied: background spawning is not available from a subagent session because the result cannot be delivered after this turn ends. ' +
115
+ 'Retry with run_in_background=false (or omit it) — the synchronous spawn blocks until the child finishes and returns its result into your context, ' +
116
+ 'which is what you need to fold the result into your output.',
117
+ )
118
+ }
108
119
 
109
120
  const taskId = generateTaskId()
110
121
  const subagentName = params.subagent_type
@@ -140,6 +151,7 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
140
151
  subagentName,
141
152
  parentSessionId,
142
153
  ...(spawnedByRole !== undefined ? { spawnedByRole } : {}),
154
+ background,
143
155
  startedAt,
144
156
  status: 'running' as const,
145
157
  abort: resolvedHandle.abort,
@@ -246,6 +258,27 @@ function publicSubagentNames(registry: SubagentRegistry): string[] {
246
258
  .sort()
247
259
  }
248
260
 
261
+ // Render the "## Subagent orchestration" roster from the registry so it can
262
+ // never drift from the actually-registered public subagents (the bug that left
263
+ // `researcher`/`planner` unlisted). Same filter+sort as `publicSubagentNames`,
264
+ // so this roster and the `spawn_subagent` tool description agree by
265
+ // construction. Throws if a public subagent lacks `rosterDescription` — a
266
+ // fail-loud contract that turns "silently missing from the prompt" into a build
267
+ // error caught by the drift-guard test.
268
+ export function renderPublicSubagentRoster(registry: SubagentRegistry): string {
269
+ return publicSubagentNames(registry)
270
+ .map((name) => {
271
+ const description = registry[name]?.rosterDescription?.trim()
272
+ if (description === undefined || description === '') {
273
+ throw new Error(
274
+ `public subagent "${name}" is missing rosterDescription (required for the orchestration roster)`,
275
+ )
276
+ }
277
+ return `\`${name}\` (${description})`
278
+ })
279
+ .join(', ')
280
+ }
281
+
249
282
  function isPublicSubagent(sub: Subagent<unknown>): boolean {
250
283
  return sub.visibility === 'public'
251
284
  }
@@ -58,9 +58,13 @@ export function createSubagentOutputTool(options: CreateSubagentOutputToolOption
58
58
  'Fetch the current state of a subagent you previously spawned. Returns one of three statuses: ' +
59
59
  "'running' (with a human-readable status_summary and a tail of recent progress events), " +
60
60
  "'completed' (with the final message), or 'failed' (with the error). " +
61
- 'Returns immediately with a snapshot — never blocks. ' +
62
- 'For backgrounded spawns, end your turn after spawning and wait for the completion <system-reminder>; ' +
63
- 'then call this once to fetch the result. Use it for ad-hoc status checks too — never in a polling loop.',
61
+ 'Returns immediately with a snapshot — never blocks, so calling it again right away just returns the same ' +
62
+ "'running' snapshot and wastes a turn. " +
63
+ 'For backgrounded spawns, END YOUR TURN after spawning and wait for the completion <system-reminder>; ' +
64
+ 'it arrives on its own when the subagent finishes — you do NOT need to poll for it. ' +
65
+ 'Then call this once to fetch the result. ' +
66
+ 'Do NOT poll in a loop, and do NOT round-robin across several task_ids while they run — ' +
67
+ 'that is treated as a loop and will be blocked. Use it only for a single ad-hoc status check.',
64
68
  parameters: Type.Object({
65
69
  task_id: Type.String({
66
70
  description: 'The task_id returned by a previous spawn_subagent call.',
@@ -20,7 +20,7 @@ export async function wikipediaSearch(query: string, limit: number, signal?: Abo
20
20
  })
21
21
  const response = await fetch(`${OPENSEARCH_URL}?${params.toString()}`, {
22
22
  headers: {
23
- 'User-Agent': 'TypeClaw/0.1 (https://github.com/devxoul/typeclaw)',
23
+ 'User-Agent': 'TypeClaw/0.1 (https://github.com/typeclaw/typeclaw)',
24
24
  Accept: 'application/json',
25
25
  },
26
26
  signal,
@@ -3,7 +3,7 @@
3
3
  The bundled bun-hygiene plugin. Registers a `tool.before` hook that blocks two classes of `bash` command:
4
4
 
5
5
  1. **Global package installs** — `npm install -g`, `pnpm add -g`, `yarn global add`, `bun add -g`, and their `--global` / bundled-flag variants.
6
- 2. **Non-bun package managers** — any `npm`, `npx`, `pnpm`, `pnpx`, or `yarn` invocation.
6
+ 2. **Non-bun install managers** — any `npm`, `pnpm`, or `yarn` invocation. The ephemeral runners `npx` and `pnpx` are **allowed** (alongside `bunx`): they execute a tool once without touching the dependency tree or writing a competing lockfile, so they don't undermine the bun-standardization this guard protects.
7
7
 
8
8
  This plugin is **auto-loaded** by every TypeClaw agent. There is no `plugins[]` entry to add. Both guards carry an `acknowledgeGuards` escape hatch (below) for the cases where the agent genuinely needs the blocked command.
9
9
 
@@ -11,16 +11,16 @@ This plugin is **auto-loaded** by every TypeClaw agent. There is no `plugins[]`
11
11
 
12
12
  **Global installs don't persist.** The agent folder is bind-mounted at `/agent`; everything else in the container — including `~/.bun`, `~/.npm`, and the global `node_modules` a global install writes to — is ephemeral and wiped on every `typeclaw restart`. An agent that runs `npm install -g some-cli` gets a tool that works for the rest of the session and silently vanishes on the next boot, leading to confusing "command not found" failures that look like regressions. The fix is to either add the dependency to `package.json` (`bun add <pkg>`, which lives in the bind-mounted folder and survives) or run it once without installing (`bunx <pkg>`).
13
13
 
14
- **The container standardizes on bun.** TypeClaw is Bun-native end to end (see the root README). Mixing in `npm`/`pnpm`/`yarn` produces competing lockfiles and install trees, and `npx` pulls a second package-execution path when `bunx` already covers it. Steering every package-manager call to bun keeps the dependency state coherent.
14
+ **The container standardizes on bun for dependency management.** TypeClaw is Bun-native end to end (see the root README). Mixing in `npm`/`pnpm`/`yarn` installs produces competing lockfiles and install trees, so those are steered to bun. Ephemeral runners (`npx`/`pnpx`/`bunx`) are not install managers they run a tool once and leave no lockfile or `node_modules` behind — so they're allowed for one-off execution.
15
15
 
16
16
  Both guards **block with guidance** rather than silently rewriting the command — the agent sees exactly why the command was rejected and what to run instead, the same UX as the bundled `security` and `guard` policies.
17
17
 
18
18
  ## Guards
19
19
 
20
- | Guard | Triggers on | Guidance in the block reason |
21
- | ---------------------- | ------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------- |
22
- | `globalInstall` | `npm`/`pnpm` install/add with `-g`/`--global`, `yarn global add`, `bun add -g` / `bun install -g` | Use `bun add <pkg>` (persists) or `bunx <pkg>` (ephemeral run). |
23
- | `nonBunPackageManager` | `npm`, `npx`, `pnpm`, `pnpx`, `yarn` at a command boundary | Use `bun install` / `bun add <pkg>`, and `bunx <pkg>` instead of npx/pnpx. |
20
+ | Guard | Triggers on | Guidance in the block reason |
21
+ | ---------------------- | ------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------- |
22
+ | `globalInstall` | `npm`/`pnpm` install/add with `-g`/`--global`, `yarn global add`, `bun add -g` / `bun install -g` | Use `bun add <pkg>` (persists) or `bunx <pkg>` (ephemeral run). |
23
+ | `nonBunPackageManager` | `npm`, `pnpm`, `yarn` at a command boundary (`npx`/`pnpx`/`bunx` are allowed) | Use `bun install` / `bun add <pkg>`. Ephemeral runners are fine as-is. |
24
24
 
25
25
  A global install (e.g. `npm install -g x`) trips **only** `globalInstall`, not both — the global install is the more specific violation, so acknowledging `globalInstall` lets the command through without a second acknowledgement for `nonBunPackageManager`.
26
26
 
@@ -43,9 +43,9 @@ Both guards follow the repo-wide `acknowledgeGuards` convention (shared with the
43
43
 
44
44
  For each segment, the guard strips leading **preamble wrappers** (`sudo`, `env`, `command`, `exec`, `nice`, `nohup`, `stdbuf`, `setsid`, `time`, `xargs`, and any `VAR=val` assignment) — including their options, and the argument a flag consumes (`sudo -u nobody`, `nice -n 10`, `env -i`) — to find the real command word, then classifies:
45
45
 
46
- 1. command word is `npm`/`npx`/`pnpm`/`pnpx`/`yarn` (or `bun`) **and** the segment has an install subcommand **and** a global flag → `globalInstall` (for `yarn`, the `global add` sequence must appear adjacent and in command position, so `yarn add global foo` — a local install of a package named `global` — is not misflagged);
47
- 2. command word is a non-bun manager (not via global) → `nonBunPackageManager`;
48
- 3. otherwise → allowed.
46
+ 1. command word is `npm`/`pnpm`/`yarn` (or `bun`) **and** the segment has an install subcommand **and** a global flag → `globalInstall` (for `yarn`, the `global add` sequence must appear adjacent and in command position, so `yarn add global foo` — a local install of a package named `global` — is not misflagged);
47
+ 2. command word is a non-bun install manager `npm`/`pnpm`/`yarn` (not via global) → `nonBunPackageManager`;
48
+ 3. otherwise (including the ephemeral runners `npx`/`pnpx`/`bunx`) → allowed.
49
49
 
50
50
  A `globalInstall` verdict on any segment wins over a plain non-bun verdict. This is a command-position detector, not a full shell parser — it doesn't interpret redirections or expansions beyond boundary marking — but it is linear-time and closes the structural gaps a single regex left open.
51
51
 
@@ -70,6 +70,7 @@ Because classification scans a segment's words as a set (after preamble strippin
70
70
  ## What is NOT blocked
71
71
 
72
72
  - `bun`, `bunx`, `bun run`, `bun add`, `bun install` (local) — the intended package commands. (`bun add -g` / `bun install -g` are still blocked as global installs: bun globals live in `~/.bun`, outside `/agent`, and are wiped on restart.)
73
+ - `npx`, `pnpx` — ephemeral runners, allowed for one-off tool execution (they leave no lockfile or install tree). A global install through them is still nothing to block since they don't install into the dependency tree at all.
73
74
  - A non-bun manager name appearing as a substring or argument: `my-npm-wrapper`, `./npm`, `cat npm-debug.log`, `git commit -m "drop npm"`, `grep -rn npx src/`, `echo "npm install -g foo"`. Only the **command word** of a segment is classified, so a manager name inside an argument, path, quoted string, or longer token never trips the guard.
74
75
 
75
76
  ## Ordering against other bundled plugins
@@ -78,5 +79,5 @@ Registered after `guard` in `src/run/bundled-plugins.ts`. It guards a disjoint s
78
79
 
79
80
  ## Tests
80
81
 
81
- - `policy.test.ts` — pure-function unit tests for the detection logic: every global-install form, every non-bun manager, the allowed-command set (bun/bunx, substrings, paths, quoted text), both bypasses, the global-install-takes-precedence rule, escaped/quoted evasions, leading-assignment preambles, newline-as-separator scoping, falsy `--global=`, option placement, and subshell/substitution detection.
82
- - `index.test.ts` — composition tests: the plugin registers the `tool.before` hook and wires it to the policy (block on global install, block on npx, allow bunx, honor the bypass).
82
+ - `policy.test.ts` — pure-function unit tests for the detection logic: every global-install form, every non-bun install manager, the ephemeral-runner allowance (`npx`/`pnpx`/`bunx`, including behind preamble wrappers), the allowed-command set (bun/bunx, substrings, paths, quoted text), both bypasses, the global-install-takes-precedence rule, escaped/quoted evasions, leading-assignment preambles, newline-as-separator scoping, falsy `--global=`, option placement, and subshell/substitution detection.
83
+ - `index.test.ts` — composition tests: the plugin registers the `tool.before` hook and wires it to the policy (block on global install, block on `npm install`, allow `bunx`/`npx`, honor the bypass).
@@ -3,7 +3,12 @@ import { ACKNOWLEDGE_GUARDS, type GuardBlock, isGuardAcknowledged } from '../gua
3
3
  export const GUARD_GLOBAL_INSTALL = 'globalInstall'
4
4
  export const GUARD_NON_BUN_PACKAGE_MANAGER = 'nonBunPackageManager'
5
5
 
6
- const NON_BUN_MANAGERS = new Set(['npm', 'npx', 'pnpm', 'pnpx', 'yarn'])
6
+ // Only install managers are blocked. The ephemeral runners npx/pnpx (and bunx,
7
+ // which is `bun`) are intentionally absent: they run a tool once without
8
+ // touching the dependency tree or writing a competing lockfile, so they don't
9
+ // undermine the bun-standardization this set protects. classify() skips any
10
+ // command word not in here, so leaving them out is what allows them.
11
+ const NON_BUN_MANAGERS = new Set(['npm', 'pnpm', 'yarn'])
7
12
  const INSTALL_SUBCOMMANDS = new Set(['install', 'i', 'add'])
8
13
 
9
14
  export function checkBunHygieneGuard(options: { tool: string; args: Record<string, unknown> }): GuardBlock | undefined {
@@ -310,8 +315,8 @@ function blockNonBunManager(manager: string, args: Record<string, unknown>): Gua
310
315
  return {
311
316
  block: true,
312
317
  reason: [
313
- `Guard \`${GUARD_NON_BUN_PACKAGE_MANAGER}\` blocked \`${manager}\`. This container standardizes on bun.`,
314
- 'Use `bun install` / `bun add <pkg>` instead of npm/pnpm/yarn, and `bunx <pkg>` instead of npx/pnpx.',
318
+ `Guard \`${GUARD_NON_BUN_PACKAGE_MANAGER}\` blocked \`${manager}\`. This container standardizes on bun for dependency management.`,
319
+ 'Use `bun install` / `bun add <pkg>` instead of npm/pnpm/yarn. Ephemeral runners (`bunx`, `npx`, `pnpx`) are allowed for one-off tool execution.',
315
320
  `Retry with \`${ACKNOWLEDGE_GUARDS}.${GUARD_NON_BUN_PACKAGE_MANAGER}: true\` if this package manager is genuinely required (e.g. a project pinned to a different lockfile).`,
316
321
  ].join(' '),
317
322
  }
@@ -94,6 +94,8 @@ export function createExplorerSubagent(): Subagent<ExplorerPayload> {
94
94
  tools: [readTool, grepTool, findTool, lsTool, bashTool],
95
95
  payloadSchema: explorerPayloadSchema,
96
96
  visibility: 'public',
97
+ rosterDescription:
98
+ 'read-only local recon — code, sessions, memory, git, config; returns the paths and excerpts you need without you grepping the tree yourself; fire liberally',
97
99
  inFlightKey: (payload) => payload?.requestId ?? `anon-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
98
100
  toolResultBudget: {
99
101
  maxTotalBytes: 256_000,
@@ -0,0 +1,94 @@
1
+ import type { ReviewVerdict } from '@/channels/github-review-turn-ledger'
2
+
3
+ export type EffectiveApprovalResolver = (target: {
4
+ workspace: string
5
+ prNumber: number
6
+ }) => Promise<{ ok: true; alreadyApproved: boolean } | { ok: false }>
7
+
8
+ export type ApproveBlock = { block: true; reason: string }
9
+
10
+ export type ApproveIdempotencyGuard = {
11
+ guard: (args: {
12
+ callId: string
13
+ workspace: string
14
+ prNumber: number
15
+ verdict: ReviewVerdict
16
+ }) => Promise<ApproveBlock | null>
17
+ release: (args: { callId: string; succeeded: boolean }) => void
18
+ }
19
+
20
+ const DUPLICATE_REASON =
21
+ 'This bot has already approved this pull request. A second APPROVE would post a redundant review. ' +
22
+ 'If you intended to change your verdict, request changes or dismiss the prior review instead of re-approving.'
23
+
24
+ // Makes formal `gh ... event=APPROVE` idempotent per PR across turns, sessions,
25
+ // and restarts. Two layers, each with a single job:
26
+ //
27
+ // 1. An in-process set of *in-flight* reservations (`pendingApprovals`) that
28
+ // blocks a second APPROVE while a first is still mid-flight in the same
29
+ // container — the concurrent-double-approve case the remote read can't see
30
+ // yet (GitHub hasn't recorded the in-flight review).
31
+ // 2. The authoritative GitHub effective-state read, the SOLE source of truth
32
+ // for "the bot already holds a standing APPROVED review." It understands
33
+ // supersession: a later CHANGES_REQUESTED / DISMISSED demotes an earlier
34
+ // APPROVED, so the bot may legitimately re-approve.
35
+ //
36
+ // The set is strictly an in-flight lock — never a persistent "already approved"
37
+ // memory. A completed APPROVE drops its reservation in release(), so the next
38
+ // APPROVE re-consults GitHub instead of being shadowed by a stale local entry.
39
+ // That separation fixes the strand bug: once a standing approval is superseded
40
+ // (PR back to CHANGES_REQUESTED), a stale local lock must not keep blocking a
41
+ // genuine re-approve — only the remote read decides, and it now reports
42
+ // alreadyApproved=false. Reads fail OPEN: a transient GitHub error must never
43
+ // permanently strand a first approval; the in-flight reservation still covers
44
+ // the concurrent case.
45
+ export function createApproveIdempotencyGuard(deps: {
46
+ resolveEffectiveApproval: EffectiveApprovalResolver
47
+ }): ApproveIdempotencyGuard {
48
+ const pendingApprovals = new Set<string>()
49
+ const reservedByCall = new Map<string, string>()
50
+
51
+ return {
52
+ async guard(args): Promise<ApproveBlock | null> {
53
+ if (args.verdict !== 'APPROVE') return null
54
+ const key = prKey(args.workspace, args.prNumber)
55
+
56
+ // Reserve BEFORE the await so two calls racing into guard() for the same
57
+ // PR cannot both observe an empty set: the loser sees the winner's
58
+ // in-flight reservation and is blocked. The reservation is provisional
59
+ // and is always cleared on a terminal path (block below or release()).
60
+ if (pendingApprovals.has(key)) return { block: true, reason: DUPLICATE_REASON }
61
+ pendingApprovals.add(key)
62
+ reservedByCall.set(args.callId, key)
63
+
64
+ const remote = await deps.resolveEffectiveApproval({ workspace: args.workspace, prNumber: args.prNumber })
65
+ if (remote.ok && remote.alreadyApproved) {
66
+ // Standing approval upstream. Block, and release the in-flight lock now:
67
+ // a blocked command never reaches tool.after, so release() won't run for
68
+ // this callId. Leaving the key set would resurrect the strand bug — the
69
+ // GitHub read is authoritative for the standing-approval case, not a
70
+ // lingering local entry.
71
+ reservedByCall.delete(args.callId)
72
+ pendingApprovals.delete(key)
73
+ return { block: true, reason: DUPLICATE_REASON }
74
+ }
75
+
76
+ return null
77
+ },
78
+
79
+ release(args): void {
80
+ const key = reservedByCall.get(args.callId)
81
+ if (key === undefined) return
82
+ reservedByCall.delete(args.callId)
83
+ // Always drop the in-flight lock, success or fail. On success the standing
84
+ // approval now lives on GitHub, so future APPROVEs are caught by the remote
85
+ // read (which tracks supersession); the local lock must not outlive the
86
+ // in-flight window and shadow that read.
87
+ pendingApprovals.delete(key)
88
+ },
89
+ }
90
+ }
91
+
92
+ function prKey(workspace: string, prNumber: number): string {
93
+ return `${workspace}#${prNumber}`
94
+ }
@@ -0,0 +1,98 @@
1
+ import { GITHUB_API_BASE, githubJsonHeaders } from '@/channels/adapters/github/auth-pat'
2
+
3
+ import type { EffectiveApprovalResolver } from './approve-idempotency'
4
+
5
+ // Resolves whether THIS bot already has a standing APPROVED review on a PR, used
6
+ // by the approve-idempotency guard to stop a second formal APPROVE after a
7
+ // restart (the in-process pending set covers the same-container case but is lost
8
+ // when the container bounces). Every failure returns { ok: false } so the guard
9
+ // fails open — a transient read error must never permanently block a genuine
10
+ // first approval.
11
+ export function createGithubEffectiveApprovalResolver(deps: {
12
+ resolveToken: (workspace: string) => Promise<string | null>
13
+ fetchImpl?: typeof fetch
14
+ }): EffectiveApprovalResolver {
15
+ const fetchImpl = deps.fetchImpl ?? fetch
16
+ return async ({ workspace, prNumber }) => {
17
+ const [owner, repo] = workspace.split('/')
18
+ if (owner === undefined || owner === '' || repo === undefined || repo === '') return { ok: false }
19
+
20
+ const token = await deps.resolveToken(workspace).catch(() => null)
21
+ if (token === null || token === '') return { ok: false }
22
+
23
+ const self = await fetchSelfLogin(fetchImpl, token)
24
+ if (self === null) return { ok: false }
25
+
26
+ const reviews = await fetchReviews(fetchImpl, token, owner, repo, prNumber)
27
+ if (reviews === null) return { ok: false }
28
+
29
+ const lastDecisive = reviews.filter((r) => isSelf(r.login, r.isBot, self) && isDecisive(r.state)).at(-1)
30
+ return { ok: true, alreadyApproved: lastDecisive?.state === 'APPROVED' }
31
+ }
32
+ }
33
+
34
+ // A bot's effective review is its LATEST decisive one. COMMENTED/PENDING are
35
+ // non-deciding noise that must not clear an earlier APPROVED/CHANGES_REQUESTED;
36
+ // a later CHANGES_REQUESTED or DISMISSED supersedes an earlier APPROVED. The
37
+ // reviews endpoint returns rows in chronological order, so the last decisive
38
+ // row wins. Mirrors src/channels/adapters/github/review-state.ts.
39
+ const DECISIVE = new Set(['APPROVED', 'CHANGES_REQUESTED', 'DISMISSED'])
40
+
41
+ function isDecisive(state: string): boolean {
42
+ return DECISIVE.has(state)
43
+ }
44
+
45
+ type ReviewRow = { state: string; login: string; isBot: boolean }
46
+
47
+ async function fetchSelfLogin(fetchImpl: typeof fetch, token: string): Promise<string | null> {
48
+ try {
49
+ const response = await fetchImpl(`${GITHUB_API_BASE}/user`, { headers: githubJsonHeaders(token) })
50
+ if (!response.ok) return null
51
+ const raw = (await response.json().catch(() => null)) as { login?: unknown } | null
52
+ return typeof raw?.login === 'string' ? raw.login : null
53
+ } catch {
54
+ return null
55
+ }
56
+ }
57
+
58
+ async function fetchReviews(
59
+ fetchImpl: typeof fetch,
60
+ token: string,
61
+ owner: string,
62
+ repo: string,
63
+ prNumber: number,
64
+ ): Promise<ReviewRow[] | null> {
65
+ try {
66
+ const url = `${GITHUB_API_BASE}/repos/${owner}/${repo}/pulls/${prNumber}/reviews?per_page=100`
67
+ const response = await fetchImpl(url, { headers: githubJsonHeaders(token) })
68
+ if (!response.ok) return null
69
+ const page = (await response.json().catch(() => null)) as RawReview[] | null
70
+ if (page === null) return null
71
+ const rows: ReviewRow[] = []
72
+ for (const row of page) {
73
+ if (typeof row.state !== 'string') continue
74
+ const login = row.user?.login
75
+ if (typeof login !== 'string') continue
76
+ rows.push({ state: row.state, login, isBot: row.user?.type === 'Bot' })
77
+ }
78
+ return rows
79
+ } catch {
80
+ return null
81
+ }
82
+ }
83
+
84
+ const BOT_LOGIN_SUFFIX = '[bot]'
85
+
86
+ // A GitHub App's reviews login is `slug[bot]` while `/user` returns the bare
87
+ // slug, so normalize before comparing — but only for actual Bot reviewers, since
88
+ // a human could legitimately own a login matching the bare slug.
89
+ function isSelf(login: string, isBot: boolean, selfLogin: string): boolean {
90
+ if (isBot) return normalizeBotLogin(login) === normalizeBotLogin(selfLogin)
91
+ return login === selfLogin
92
+ }
93
+
94
+ function normalizeBotLogin(login: string): string {
95
+ return login.endsWith(BOT_LOGIN_SUFFIX) ? login.slice(0, -BOT_LOGIN_SUFFIX.length) : login
96
+ }
97
+
98
+ type RawReview = { state?: unknown; user?: { login?: string; type?: string } }
@@ -0,0 +1,130 @@
1
+ // Blocks the "dumped review" anti-pattern: a REQUEST_CHANGES whose body anchors
2
+ // `path:line` findings that are not actually posted as inline `comments[]`. The
3
+ // github channel skill mandates `comments[]` and calls a flat-body review "a bug,
4
+ // not a fallback"; this enforces it. Scoped to REQUEST_CHANGES + REST `--input`
5
+ // payloads, since APPROVE/COMMENT bodies and the `gh pr review` porcelain carry
6
+ // no comparable `comments[]` to weigh the body against.
7
+ //
8
+ // A body anchor is "covered" only when an inline comment sits at the same path
9
+ // and a line inside the anchor's range — so a partially-inline review that posts
10
+ // a few token comments while leaving other findings stranded in the body is still
11
+ // blocked on the stranded ones.
12
+
13
+ export type ReviewDumpInput = {
14
+ command: string
15
+ inputFileContents?: string | null
16
+ }
17
+
18
+ export type ReviewDumpDecision = { block: true; reason: string } | null
19
+
20
+ // A finding anchor as a reviewer writes it in prose: a file path (optionally with
21
+ // directories) ending in an extension, then `:line`, then an optional range/list
22
+ // (`107-111`, `807,809`, `12-20`). This is the real notation seen in dumped
23
+ // reviews — NOT GitHub blob `#L123` anchors, which point at files for reference
24
+ // rather than requesting a change on the diff.
25
+ const PATH_LINE_ANCHOR = /((?:[\w.-]+\/)*[\w.-]+\.[A-Za-z]\w*):(\d+(?:[-,]\d+)*)/g
26
+
27
+ const REVIEWS_ENDPOINT = /\/repos\/[^/\s]+\/[^/\s]+\/pulls\/\d+\/reviews\b/
28
+
29
+ // One or two anchors in a prose body is normal narration; at three+ uncovered
30
+ // anchors a review reads as a dump.
31
+ const MIN_ANCHORS = 3
32
+
33
+ export function detectReviewDump(input: ReviewDumpInput): ReviewDumpDecision {
34
+ if (!REVIEWS_ENDPOINT.test(input.command)) return null
35
+ const payload = parsePayload(input.inputFileContents ?? null)
36
+ if (payload === null) return null
37
+ if (payload.event !== 'REQUEST_CHANGES') return null
38
+
39
+ const anchors = parseAnchors(payload.body)
40
+ if (anchors.length < MIN_ANCHORS) return null
41
+
42
+ const uncovered = anchors.filter((anchor) => !isCoveredInline(anchor, payload.comments))
43
+ if (uncovered.length === 0) return null
44
+
45
+ return { block: true, reason: buildReason(anchors.length, uncovered.length, payload.comments.length) }
46
+ }
47
+
48
+ type Anchor = { path: string; lines: ReadonlySet<number> }
49
+ type InlineComment = { path: string; line: number }
50
+ type ReviewPayload = { event: string; body: string; comments: readonly InlineComment[] }
51
+
52
+ function parsePayload(contents: string | null): ReviewPayload | null {
53
+ if (contents === null || contents === '') return null
54
+ try {
55
+ const parsed = JSON.parse(contents) as unknown
56
+ if (typeof parsed !== 'object' || parsed === null) return null
57
+ const obj = parsed as Record<string, unknown>
58
+ const event = typeof obj.event === 'string' ? obj.event.trim().toUpperCase() : ''
59
+ const body = typeof obj.body === 'string' ? obj.body : ''
60
+ const comments = parseComments(obj.comments)
61
+ return { event, body, comments }
62
+ } catch {
63
+ return null
64
+ }
65
+ }
66
+
67
+ function parseComments(value: unknown): InlineComment[] {
68
+ if (!Array.isArray(value)) return []
69
+ const out: InlineComment[] = []
70
+ for (const entry of value) {
71
+ if (typeof entry !== 'object' || entry === null) continue
72
+ const rec = entry as Record<string, unknown>
73
+ const path = typeof rec.path === 'string' ? rec.path : null
74
+ // GitHub keys an inline comment on `line` (and `start_line` for a span); a
75
+ // span covers each line it touches.
76
+ const line = typeof rec.line === 'number' ? rec.line : null
77
+ if (path === null || line === null) continue
78
+ const startLine = typeof rec.start_line === 'number' ? rec.start_line : line
79
+ for (let l = Math.min(startLine, line); l <= Math.max(startLine, line); l++) {
80
+ out.push({ path, line: l })
81
+ }
82
+ }
83
+ return out
84
+ }
85
+
86
+ function parseAnchors(body: string): Anchor[] {
87
+ const seen = new Set<string>()
88
+ const out: Anchor[] = []
89
+ for (const m of body.matchAll(PATH_LINE_ANCHOR)) {
90
+ const key = `${m[1]}:${m[2]}`
91
+ if (seen.has(key)) continue
92
+ seen.add(key)
93
+ out.push({ path: m[1] as string, lines: expandLineSpec(m[2] as string) })
94
+ }
95
+ return out
96
+ }
97
+
98
+ // `12-20` -> 12..20; `807,809` -> {807,809}; `42` -> {42}.
99
+ function expandLineSpec(spec: string): Set<number> {
100
+ const lines = new Set<number>()
101
+ for (const part of spec.split(',')) {
102
+ const range = part.split('-')
103
+ const start = Number(range[0])
104
+ const end = range.length > 1 ? Number(range[1]) : start
105
+ if (!Number.isSafeInteger(start) || !Number.isSafeInteger(end)) continue
106
+ for (let l = Math.min(start, end); l <= Math.max(start, end); l++) lines.add(l)
107
+ }
108
+ return lines
109
+ }
110
+
111
+ // The body writes short paths (`languages.ts`) while comments[] carry full repo
112
+ // paths (`apps/.../languages.ts`); treat a comment as on-path when either path
113
+ // ends with the other (segment-aligned), so the basename match is exact.
114
+ function isCoveredInline(anchor: Anchor, comments: readonly InlineComment[]): boolean {
115
+ return comments.some((c) => pathsAlign(anchor.path, c.path) && anchor.lines.has(c.line))
116
+ }
117
+
118
+ function pathsAlign(anchorPath: string, commentPath: string): boolean {
119
+ if (anchorPath === commentPath) return true
120
+ return commentPath.endsWith(`/${anchorPath}`) || anchorPath.endsWith(`/${commentPath}`)
121
+ }
122
+
123
+ function buildReason(total: number, uncovered: number, commentCount: number): string {
124
+ return [
125
+ `This REQUEST_CHANGES review body anchors ${total} findings to specific lines (path:line), but ${uncovered} of them ${uncovered === 1 ? 'is' : 'are'} not posted as inline comments (payload has ${commentCount} inline comment${commentCount === 1 ? '' : 's'}).`,
126
+ 'Every line-anchored change request belongs on its diff line, not flattened into the review body.',
127
+ 'Re-submit with each stranded finding as an entry in the `comments[]` array of the reviews payload',
128
+ '(`{ "path": "...", "line": N, "side": "RIGHT", "body": "..." }`), keeping `body` for the high-level summary only.',
129
+ ].join(' ')
130
+ }