typeclaw 0.29.0 → 0.30.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/package.json +1 -1
  2. package/scripts/verify-realproc-sandbox.sh +58 -0
  3. package/src/agent/index.ts +6 -0
  4. package/src/agent/live-subagents.ts +5 -0
  5. package/src/agent/plugin-tools.ts +79 -10
  6. package/src/agent/subagent-drain.ts +150 -0
  7. package/src/agent/subagents.ts +34 -3
  8. package/src/agent/system-prompt.ts +1 -1
  9. package/src/agent/tools/spawn-subagent.ts +13 -1
  10. package/src/bundled-plugins/bun-hygiene/README.md +12 -11
  11. package/src/bundled-plugins/bun-hygiene/policy.ts +8 -3
  12. package/src/bundled-plugins/github-cli-auth/approve-idempotency.ts +116 -35
  13. package/src/bundled-plugins/github-cli-auth/effective-approval.ts +14 -9
  14. package/src/bundled-plugins/github-cli-auth/index.ts +3 -3
  15. package/src/bundled-plugins/planner/planner.ts +2 -1
  16. package/src/bundled-plugins/researcher/researcher.ts +9 -2
  17. package/src/bundled-plugins/reviewer/reviewer.ts +2 -1
  18. package/src/channels/adapters/discord-bot-format.ts +191 -0
  19. package/src/channels/adapters/discord-bot.ts +2 -1
  20. package/src/channels/adapters/github/inbound.ts +88 -30
  21. package/src/channels/adapters/github/review-state.ts +27 -0
  22. package/src/channels/github-review-claim.ts +15 -3
  23. package/src/channels/outbound-flood-filter.ts +70 -3
  24. package/src/channels/router.ts +53 -0
  25. package/src/compose/discover.ts +5 -1
  26. package/src/config/config.ts +38 -0
  27. package/src/container/start.ts +14 -0
  28. package/src/migrations/index.ts +35 -0
  29. package/src/migrations/secrets-v1-to-v2.ts +344 -0
  30. package/src/run/index.ts +13 -0
  31. package/src/sandbox/availability.ts +12 -0
  32. package/src/sandbox/build.ts +53 -9
  33. package/src/sandbox/index.ts +1 -1
  34. package/src/sandbox/policy.ts +17 -1
  35. package/typeclaw.schema.json +24 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "typeclaw",
3
- "version": "0.29.0",
3
+ "version": "0.30.1",
4
4
  "homepage": "https://github.com/typeclaw/typeclaw#readme",
5
5
  "bugs": {
6
6
  "url": "https://github.com/typeclaw/typeclaw/issues"
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/env bash
2
+ # Manual acceptance check for the sandbox.realProc strategy (src/sandbox/build.ts).
3
+ # Not a unit test: it needs a Linux container with CAP_SYS_ADMIN, which the macOS
4
+ # dev host and standard CI runners cannot provide, so it lives here as an
5
+ # operator-runnable script instead of a skipIf-everywhere test.
6
+ #
7
+ # Proves two properties of the two-phase `unshare --mount-proc -- bwrap` sandbox:
8
+ # 1. An external package runner (bunx) runs to completion (no Bun "NotDir").
9
+ # 2. A secret in a sibling process's environment NEVER appears in any
10
+ # /proc/*/environ the sandbox can read (PID-namespace scoping holds).
11
+ #
12
+ # Usage: scripts/verify-realproc-sandbox.sh [image]
13
+ # image defaults to ghcr.io/typeclaw/typeclaw-base:<version-from-package.json>
14
+ set -euo pipefail
15
+
16
+ IMAGE="${1:-}"
17
+ if [ -z "$IMAGE" ]; then
18
+ version="$(node -p "require('./package.json').version" 2>/dev/null || echo latest)"
19
+ IMAGE="ghcr.io/typeclaw/typeclaw-base:${version}"
20
+ fi
21
+
22
+ secret="TYPECLAW_REALPROC_LEAK_CANARY_$$"
23
+
24
+ inner='
25
+ echo "=== bunx via real-proc sandbox ==="
26
+ bunx cowsay "real-proc ok" 2>&1 | tail -6
27
+ echo "bunx exit=$?"
28
+ echo "=== visible pids (sandbox should NOT see the canary holder) ==="
29
+ ls /proc | grep -E "^[0-9]+$" | tr "\n" " "; echo
30
+ echo "=== leak scan ==="
31
+ found=0
32
+ for f in /proc/[0-9]*/environ; do
33
+ if tr "\0" "\n" < "$f" 2>/dev/null | grep -q "CANARY_TOKEN"; then
34
+ echo "LEAK:$f"; found=1
35
+ fi
36
+ done
37
+ if [ $found -eq 0 ]; then echo "NO_LEAK_CONFIRMED"; else echo "LEAK_DETECTED"; exit 1; fi
38
+ '
39
+ inner="${inner//CANARY_TOKEN/$secret}"
40
+
41
+ # The real-proc argv shape mirrors buildArgv() in src/sandbox/build.ts. Keep in
42
+ # sync if that helper changes.
43
+ runner="
44
+ ${secret}_holder() { :; }
45
+ env CANARY=${secret} sleep 120 &
46
+ unshare --pid --fork --mount --mount-proc -- \
47
+ bwrap --unshare-user --unshare-ipc --unshare-uts --unshare-cgroup \
48
+ --new-session --die-with-parent --clearenv \
49
+ --setenv PATH /usr/local/bin:/usr/bin:/bin --setenv HOME /tmp --setenv LANG C.UTF-8 \
50
+ --ro-bind /usr /usr --ro-bind /etc /etc --dev /dev --tmpfs /tmp \
51
+ --ro-bind-try /bin /bin --ro-bind-try /sbin /sbin --ro-bind-try /lib /lib --ro-bind-try /lib64 /lib64 \
52
+ --ro-bind /proc /proc \
53
+ bash -c '$inner'
54
+ "
55
+
56
+ echo "Image: $IMAGE"
57
+ docker run --rm --security-opt seccomp=unconfined --cap-add SYS_ADMIN \
58
+ -e "CANARY=${secret}" "$IMAGE" bash -c "$runner"
@@ -213,6 +213,7 @@ export type CreateSessionOptions = {
213
213
  liveSubagentRegistry?: LiveSubagentRegistry
214
214
  subagentRegistry?: SubagentRegistry
215
215
  createSessionForSubagent?: CreateSessionForSubagent
216
+ allowBackgroundFromSubagent?: boolean
216
217
  }
217
218
 
218
219
  export type CreateSessionResult = {
@@ -357,6 +358,7 @@ export async function createSessionWithDispose(options: CreateSessionOptions = {
357
358
  getOrigin,
358
359
  permissions: options.permissions,
359
360
  stream: options.stream,
361
+ allowBackgroundFromSubagent: options.allowBackgroundFromSubagent,
360
362
  }),
361
363
  ]
362
364
  : [
@@ -726,6 +728,7 @@ export function buildSubagentOrchestrationTools(opts: {
726
728
  getOrigin: () => SessionOrigin | undefined
727
729
  permissions: PermissionService | undefined
728
730
  stream: Stream | undefined
731
+ allowBackgroundFromSubagent?: boolean
729
732
  }): ToolDefinition[] {
730
733
  if (
731
734
  opts.liveRegistry === undefined ||
@@ -745,6 +748,9 @@ export function buildSubagentOrchestrationTools(opts: {
745
748
  getOrigin: opts.getOrigin,
746
749
  ...(opts.permissions ? { permissions: opts.permissions } : {}),
747
750
  ...(opts.stream ? { stream: opts.stream } : {}),
751
+ ...(opts.allowBackgroundFromSubagent !== undefined
752
+ ? { allowBackgroundFromSubagent: opts.allowBackgroundFromSubagent }
753
+ : {}),
748
754
  }),
749
755
  createSubagentOutputTool({
750
756
  liveRegistry: opts.liveRegistry,
@@ -23,6 +23,11 @@ export type LiveSubagent = {
23
23
  // subagent_output/subagent_cancel. Absent when no permission service was
24
24
  // active at spawn, in which case the cap fails closed.
25
25
  spawnedByRole?: string
26
+ // True when spawned with run_in_background. Only background spawns deliver
27
+ // their result out-of-band (via the subagent.completed broadcast and the
28
+ // parent's drain); synchronous spawns return their result inline as the tool
29
+ // result, so the drain MUST NOT re-prompt for them. See runSubagentDrain.
30
+ background?: boolean
26
31
  startedAt: number
27
32
  status: SubagentStatus
28
33
  completion?: SubagentCompletion
@@ -23,6 +23,7 @@ import {
23
23
  checkNonWorkspaceWriteGuard,
24
24
  checkSkillAuthoringGuard,
25
25
  } from '@/bundled-plugins/guard/policy'
26
+ import { config } from '@/config/config'
26
27
  import type { PermissionService } from '@/permissions/permissions'
27
28
  import type {
28
29
  BuiltinToolRef,
@@ -40,6 +41,7 @@ import {
40
41
  ensureSessionTmpDir,
41
42
  mapVirtualTmpPath,
42
43
  resolveHiddenPaths,
44
+ resolveProcSelfExe,
43
45
  resolveProtectedZones,
44
46
  resolveWritableZones,
45
47
  subtractMasked,
@@ -463,13 +465,24 @@ export function wrapAgentToolAsCustomToolDefinition<TParams extends TSchema, TDe
463
465
  await applyBashSandbox(mutableArgs, opts.permissions, liveOrigin, opts.agentDir, opts.sessionId, bashEnvOverlay)
464
466
  }
465
467
 
466
- if (TMP_REDIRECT_TOOLS.has(tool.name) && opts.permissions !== undefined) {
467
- await applyTmpPathRedirect(mutableArgs, opts.permissions, liveOrigin, opts.agentDir, opts.sessionId)
468
- }
468
+ const tmpRedirect =
469
+ TMP_REDIRECT_TOOLS.has(tool.name) && opts.permissions !== undefined
470
+ ? await applyTmpPathRedirect(mutableArgs, opts.permissions, liveOrigin, opts.agentDir, opts.sessionId)
471
+ : undefined
469
472
 
470
- const result = await bashEnvStore.run(bashEnvOverlay, () =>
471
- tool.execute(toolCallId, mutableArgs as Static<TParams>, signal, onUpdate),
472
- )
473
+ let rawResult: ToolResult
474
+ try {
475
+ rawResult = await bashEnvStore.run(bashEnvOverlay, () =>
476
+ tool.execute(toolCallId, mutableArgs as Static<TParams>, signal, onUpdate),
477
+ )
478
+ } catch (error) {
479
+ // A throwing tool (pi's bash rejects on non-zero exit) must still run
480
+ // tool.after so cleanup hooks fire — e.g. the github approve guard's
481
+ // release, whose absence stranded a PR as "already approved" (PR #672).
482
+ await runToolAfterSafely(opts, tool.name, toolCallId, toErrorResult(error))
483
+ throw error
484
+ }
485
+ const result = tmpRedirect !== undefined ? restoreTmpPathInResult(rawResult, tmpRedirect) : rawResult
473
486
  const resolved = loopGate.resolve({ content: result.content as ContentPart[], details: result.details })
474
487
  if ('deferredBlock' in resolved) {
475
488
  fireLoopAbort(opts.getAbort)
@@ -490,6 +503,26 @@ export function wrapAgentToolAsCustomToolDefinition<TParams extends TSchema, TDe
490
503
  })
491
504
  }
492
505
 
506
+ function toErrorResult(error: unknown): ToolResult {
507
+ const message = error instanceof Error ? error.message : String(error)
508
+ return { content: [{ type: 'text', text: message }], details: { error: message } }
509
+ }
510
+
511
+ // The original tool error must always propagate, so a failure inside the
512
+ // after-hook itself is swallowed rather than masking the real cause.
513
+ async function runToolAfterSafely(
514
+ opts: WrapSystemToolOptions,
515
+ tool: string,
516
+ callId: string,
517
+ result: ToolResult,
518
+ ): Promise<void> {
519
+ try {
520
+ await opts.hooks.runToolAfter({ tool, sessionId: opts.sessionId, callId, result })
521
+ } catch {
522
+ // intentionally ignored: never mask the originating tool error
523
+ }
524
+ }
525
+
493
526
  export function defaultBuiltinPiAgentTools(): AgentTool<any, any>[] {
494
527
  return [piReadTool, piBashTool, piEditTool, piWriteTool, piGrepTool, piFindTool, piLsTool]
495
528
  }
@@ -550,6 +583,17 @@ async function applyBashSandbox(
550
583
  // bwrap does --clearenv, so the overlay must be re-introduced via env.set or
551
584
  // it would never reach the sandboxed process (the non-sandboxed spawnHook
552
585
  // path does not run when the command is rewritten to a bwrap invocation).
586
+ // 'real-proc' gives a sandboxed JS package runner a working /proc/self/{fd,
587
+ // maps} so `bunx`/`bun add`/`bun run <pkg>` stop aborting with Bun's NotDir.
588
+ // Opt-in (default 'tmpfs') because it makes start.ts grant the container
589
+ // CAP_SYS_ADMIN at boot. Read from the boot-time `config` snapshot, NOT live
590
+ // getConfig(): sandbox.realProc is restart-required, and the strategy MUST
591
+ // track the boot-time capability. A `typeclaw reload` that flips realProc to
592
+ // true would otherwise make this emit `unshare --mount-proc` in a container
593
+ // booted WITHOUT CAP_SYS_ADMIN, so the mount fails instead of the old tmpfs
594
+ // strategy holding until restart. `config` never changes on reload.
595
+ // procSelfExe is only consumed by the 'tmpfs' branch.
596
+ const realProc = config.sandbox.realProc
553
597
  const { commandString } = buildSandboxedCommand(command, {
554
598
  mounts: [
555
599
  { type: 'ro-bind', source: agentDir, dest: agentDir },
@@ -560,6 +604,8 @@ async function applyBashSandbox(
560
604
  protected: protectedZones,
561
605
  network: 'inherit',
562
606
  cwd: agentDir,
607
+ proc: realProc ? 'real-proc' : 'tmpfs',
608
+ procSelfExe: resolveProcSelfExe(),
563
609
  ...(envOverlay !== undefined ? { env: { set: envOverlay } } : {}),
564
610
  })
565
611
  mutableArgs.command = commandString
@@ -579,24 +625,47 @@ const TMP_REDIRECT_TOOLS = new Set(['read', 'write', 'edit', 'grep', 'find', 'ls
579
625
  // different files. Rewriting the file tool's on-disk path to the same session
580
626
  // backing dir makes every layer resolve /tmp/foo to one file. Unsandboxed roles
581
627
  // (empty masks) are left untouched: their bash already shares the real /tmp.
628
+ type TmpRedirect = { original: string; backing: string }
629
+
582
630
  async function applyTmpPathRedirect(
583
631
  mutableArgs: Record<string, unknown>,
584
632
  permissions: PermissionService,
585
633
  origin: SessionOrigin | undefined,
586
634
  agentDir: string,
587
635
  sessionId: string,
588
- ): Promise<void> {
636
+ ): Promise<TmpRedirect | undefined> {
589
637
  const rawPath = mutableArgs.path
590
- if (typeof rawPath !== 'string') return
638
+ if (typeof rawPath !== 'string') return undefined
591
639
 
592
640
  const { dirs, files } = resolveHiddenPaths(permissions, origin, agentDir)
593
- if (dirs.length === 0 && files.length === 0) return
641
+ if (dirs.length === 0 && files.length === 0) return undefined
594
642
 
595
643
  const backing = mapVirtualTmpPath(agentDir, sessionId, rawPath)
596
- if (backing === undefined) return
644
+ if (backing === undefined || backing === rawPath) return undefined
597
645
 
598
646
  await ensureSessionTmpDir(sessionId)
599
647
  mutableArgs.path = backing
648
+ return { original: rawPath, backing }
649
+ }
650
+
651
+ // The redirect swaps the model-facing /tmp path for its session backing dir
652
+ // before execution; the file tool then echoes that backing path in its receipt
653
+ // text and details. Reverse it on the way out so the model only ever sees the
654
+ // path it asked for — a leaked backing path is unreachable inside the bwrap
655
+ // bash sandbox, so reusing it in `gh api --input` fails (the PR #672 strand).
656
+ function restoreTmpPathInResult(result: ToolResult, redirect: TmpRedirect): ToolResult {
657
+ const content = (result.content as ContentPart[]).map((part) =>
658
+ part.type === 'text' ? { ...part, text: part.text.split(redirect.backing).join(redirect.original) } : part,
659
+ )
660
+ const details =
661
+ isRecord(result.details) && result.details.path === redirect.backing
662
+ ? { ...result.details, path: redirect.original }
663
+ : result.details
664
+ return { content, details }
665
+ }
666
+
667
+ function isRecord(value: unknown): value is Record<string, unknown> {
668
+ return typeof value === 'object' && value !== null
600
669
  }
601
670
 
602
671
  function appendLoopWarning(result: ToolResult, message: string): ToolResult {
@@ -0,0 +1,150 @@
1
+ import type { Stream, Unsubscribe } from '@/stream'
2
+
3
+ import type { LiveSubagentRegistry } from './live-subagents'
4
+ import { parseSubagentCompletedPayload, renderSubagentCompletionReminder } from './subagent-completion-reminder'
5
+
6
+ // Presence of this capability is the single signal that background spawning is
7
+ // permitted from a subagent (see the spawn_subagent guard); absence keeps the
8
+ // subagent a one-shot leaf. It carries everything the drain needs: the shared
9
+ // Stream to listen on, the subagent's own sessionId to filter completions by,
10
+ // and the registry that is the source of truth for child state.
11
+ export type SubagentBackgroundDrain = {
12
+ stream: Stream
13
+ sessionId: string
14
+ liveRegistry: LiveSubagentRegistry
15
+ }
16
+
17
+ export type DrainPrompt = (text: string) => Promise<void>
18
+
19
+ export type RunSubagentDrainOptions = {
20
+ drain: SubagentBackgroundDrain
21
+ prompt: DrainPrompt
22
+ // Cooperative cancellation: when this returns true the loop stops re-prompting
23
+ // and returns, letting the caller's timeout/abort path dispose the session.
24
+ cancelled?: () => boolean
25
+ }
26
+
27
+ // Re-prompts a subagent with its children's completion reminders until a fixed
28
+ // point, called after the subagent's initial prompt resolves. The registry is
29
+ // the source of truth; stream broadcasts are only wakeups, so a duplicated or
30
+ // missed broadcast cannot corrupt termination (every iteration re-derives state
31
+ // from the registry). Each child's reminder is delivered at most once (tracked
32
+ // by taskId). Terminates only when no children are running AND none are
33
+ // completed-but-undelivered; a child spawned during a reminder turn reappears as
34
+ // `running` in the next snapshot and keeps the loop alive, so no separate
35
+ // "spawned nothing" flag is needed. The watch MUST have been started before the
36
+ // initial prompt (see `beginSubagentDrainWatch`) to close the lost-wakeup race.
37
+ export async function runSubagentDrain(watch: SubagentDrainWatch, options: RunSubagentDrainOptions): Promise<void> {
38
+ const { drain, prompt, cancelled } = options
39
+ const delivered = new Set<string>()
40
+ try {
41
+ while (cancelled === undefined || !cancelled()) {
42
+ const pending = collectPendingReminders(drain, delivered)
43
+ if (pending.length === 0) {
44
+ if (!hasRunningChildren(drain)) return
45
+ // Children still running but none newly completed: wait for the next
46
+ // wakeup, then re-derive from the registry.
47
+ const woke = await watch.waitForWakeup()
48
+ if (!woke) return
49
+ continue
50
+ }
51
+ for (const reminder of pending) {
52
+ if (cancelled !== undefined && cancelled()) return
53
+ delivered.add(reminder.taskId)
54
+ await prompt(reminder.text)
55
+ }
56
+ }
57
+ } finally {
58
+ watch.stop()
59
+ }
60
+ }
61
+
62
+ type PendingReminder = { taskId: string; text: string }
63
+
64
+ function collectPendingReminders(drain: SubagentBackgroundDrain, delivered: Set<string>): PendingReminder[] {
65
+ const children = drain.liveRegistry.list({ parentSessionId: drain.sessionId })
66
+ const pending: PendingReminder[] = []
67
+ for (const child of children) {
68
+ // Synchronous spawns return their result inline via the tool call; only
69
+ // background spawns deliver out-of-band and need a drain reminder.
70
+ if (child.background !== true) continue
71
+ if (child.status === 'running') continue
72
+ if (delivered.has(child.taskId)) continue
73
+ const completion = child.completion
74
+ const text = renderSubagentCompletionReminder({
75
+ subagent: child.subagentName,
76
+ taskId: child.taskId,
77
+ ok: child.status === 'completed',
78
+ durationMs: completion?.durationMs ?? 0,
79
+ ...(completion?.error !== undefined ? { error: completion.error } : {}),
80
+ })
81
+ pending.push({ taskId: child.taskId, text })
82
+ }
83
+ return pending
84
+ }
85
+
86
+ function hasRunningChildren(drain: SubagentBackgroundDrain): boolean {
87
+ // Only background children gate termination. A sync child still marked running
88
+ // in the registry settles via its inline tool call, never via a broadcast
89
+ // wakeup, so waiting on it would hang the drain forever.
90
+ return drain.liveRegistry
91
+ .list({ parentSessionId: drain.sessionId })
92
+ .some((c) => c.background === true && c.status === 'running')
93
+ }
94
+
95
+ export type SubagentDrainWatch = {
96
+ // Resolves true on a child-completion wakeup, false once stopped. A wakeup
97
+ // that arrives before anyone waits is latched (pendingWake), so a completion
98
+ // during the subagent's prompt is not lost.
99
+ waitForWakeup: () => Promise<boolean>
100
+ stop: () => void
101
+ }
102
+
103
+ export function beginSubagentDrainWatch(drain: SubagentBackgroundDrain): SubagentDrainWatch {
104
+ let stopped = false
105
+ let pendingWake = false
106
+ let resolveWaiter: ((woke: boolean) => void) | null = null
107
+
108
+ const wake = (): void => {
109
+ if (resolveWaiter !== null) {
110
+ const r = resolveWaiter
111
+ resolveWaiter = null
112
+ r(true)
113
+ return
114
+ }
115
+ pendingWake = true
116
+ }
117
+
118
+ const unsubscribe: Unsubscribe = drain.stream.subscribe({ target: { kind: 'broadcast' } }, (msg) => {
119
+ const parsed = parseSubagentCompletedPayload(msg.payload)
120
+ if (parsed === null) return
121
+ if (parsed.parentSessionId !== drain.sessionId) return
122
+ wake()
123
+ })
124
+
125
+ return {
126
+ waitForWakeup: () =>
127
+ new Promise<boolean>((resolve) => {
128
+ if (stopped) {
129
+ resolve(false)
130
+ return
131
+ }
132
+ if (pendingWake) {
133
+ pendingWake = false
134
+ resolve(true)
135
+ return
136
+ }
137
+ resolveWaiter = resolve
138
+ }),
139
+ stop: () => {
140
+ if (stopped) return
141
+ stopped = true
142
+ unsubscribe()
143
+ if (resolveWaiter !== null) {
144
+ const r = resolveWaiter
145
+ resolveWaiter = null
146
+ r(false)
147
+ }
148
+ },
149
+ }
150
+ }
@@ -7,6 +7,12 @@ import type { Stream, Unsubscribe } from '@/stream'
7
7
  import { type AgentSession, createSession } from './index'
8
8
  import { subscribeProviderErrors } from './provider-error'
9
9
  import type { SessionOrigin } from './session-origin'
10
+ import {
11
+ beginSubagentDrainWatch,
12
+ runSubagentDrain,
13
+ type SubagentBackgroundDrain,
14
+ type SubagentDrainWatch,
15
+ } from './subagent-drain'
10
16
  import { renderTurnTimeAnchor } from './system-prompt'
11
17
  import type { ToolResultBudget } from './tool-result-budget'
12
18
 
@@ -62,6 +68,12 @@ export type SubagentShared<P = unknown> = {
62
68
  // registry scoping. Default (unset/false) keeps the subagent a leaf — the
63
69
  // historical contract for explorer/scout/memory-logger/etc.
64
70
  canSpawnSubagents?: boolean
71
+ // Opt-in: allow this subagent to spawn background children AND drain their
72
+ // completions back into its own session (requires canSpawnSubagents). Default
73
+ // (unset/false) keeps background spawns denied from this subagent — it must
74
+ // use synchronous spawns. Only meaningful when the runtime wires the drain
75
+ // capability (createSessionForSubagent provides stream+sessionId+liveRegistry).
76
+ canBackgroundSpawnSubagents?: boolean
65
77
  // Wall-clock ceiling on a single spawn, enforced at the orchestration
66
78
  // layer (both `dispatchSpawnSubagent` and the stream-driven
67
79
  // `SubagentConsumer`). When exceeded, the orchestrator's `await` settles
@@ -116,6 +128,7 @@ export type CreateSessionForSubagentResult = {
116
128
  agentDir?: string
117
129
  origin?: SessionOrigin
118
130
  getTranscriptPath?: () => string | undefined
131
+ backgroundDrain?: SubagentBackgroundDrain
119
132
  }
120
133
  export type CreateSessionForSubagentOptions = {
121
134
  name?: string
@@ -152,6 +165,7 @@ type NormalizedSubagentSession = {
152
165
  agentDir: string | undefined
153
166
  origin: SessionOrigin | undefined
154
167
  getTranscriptPath: (() => string | undefined) | undefined
168
+ backgroundDrain: SubagentBackgroundDrain | undefined
155
169
  }
156
170
 
157
171
  function normalizeSubagentSession(result: AgentSession | CreateSessionForSubagentResult): NormalizedSubagentSession {
@@ -164,6 +178,7 @@ function normalizeSubagentSession(result: AgentSession | CreateSessionForSubagen
164
178
  agentDir: result.agentDir,
165
179
  origin: result.origin,
166
180
  getTranscriptPath: result.getTranscriptPath,
181
+ backgroundDrain: result.backgroundDrain,
167
182
  }
168
183
  }
169
184
  return {
@@ -174,6 +189,7 @@ function normalizeSubagentSession(result: AgentSession | CreateSessionForSubagen
174
189
  agentDir: undefined,
175
190
  origin: undefined,
176
191
  getTranscriptPath: undefined,
192
+ backgroundDrain: undefined,
177
193
  }
178
194
  }
179
195
 
@@ -214,14 +230,16 @@ export async function invokeSubagent(name: string, options: InvokeSubagentOption
214
230
  }
215
231
 
216
232
  const runSession: RunSession = async (override) => {
217
- const { session, dispose, hooks, sessionId, agentDir, origin, getTranscriptPath } = normalizeSubagentSession(
218
- await createSessionForSubagent(subagent, sessionOptions),
219
- )
233
+ const { session, dispose, hooks, sessionId, agentDir, origin, getTranscriptPath, backgroundDrain } =
234
+ normalizeSubagentSession(await createSessionForSubagent(subagent, sessionOptions))
235
+ let aborted = false
236
+ let drainWatch: SubagentDrainWatch | undefined
220
237
  if (options.onSessionCreated !== undefined) {
221
238
  options.onSessionCreated({
222
239
  session,
223
240
  sessionId,
224
241
  abort: async () => {
242
+ aborted = true
225
243
  await session.abort()
226
244
  },
227
245
  })
@@ -239,6 +257,9 @@ export async function invokeSubagent(name: string, options: InvokeSubagentOption
239
257
  if (hooks && turnEvent !== undefined) {
240
258
  await hooks.runSessionTurnStart({ ...turnEvent, userPrompt: userPromptForTurn })
241
259
  }
260
+ if (backgroundDrain !== undefined) {
261
+ drainWatch = beginSubagentDrainWatch(backgroundDrain)
262
+ }
242
263
  try {
243
264
  await session.prompt(`${renderTurnTimeAnchor()}\n\n${userPromptForTurn}`)
244
265
  } finally {
@@ -246,6 +267,15 @@ export async function invokeSubagent(name: string, options: InvokeSubagentOption
246
267
  await hooks.runSessionTurnEnd(turnEvent)
247
268
  }
248
269
  }
270
+ if (drainWatch !== undefined && backgroundDrain !== undefined) {
271
+ await runSubagentDrain(drainWatch, {
272
+ drain: backgroundDrain,
273
+ prompt: async (text) => {
274
+ await session.prompt(`${renderTurnTimeAnchor()}\n\n${text}`)
275
+ },
276
+ cancelled: () => aborted,
277
+ })
278
+ }
249
279
  if (hooks && sessionId !== undefined) {
250
280
  await hooks.runSessionIdle({
251
281
  sessionId,
@@ -259,6 +289,7 @@ export async function invokeSubagent(name: string, options: InvokeSubagentOption
259
289
  if (hooks && sessionId !== undefined) {
260
290
  await hooks.runSessionEnd({ sessionId, ...(origin !== undefined ? { origin } : {}) })
261
291
  }
292
+ drainWatch?.stop()
262
293
  session.dispose()
263
294
  await dispose()
264
295
  }
@@ -93,7 +93,7 @@ Delegate focused work to subagents via \`spawn_subagent\`, \`subagent_output\`,
93
93
 
94
94
  There are three delegation modes. Pick deliberately.
95
95
 
96
- **Mode A — Research fan-out.** Need information and the search is broad? Fire 2-5 subagents (usually \`explorer\`/\`scout\`) in parallel with \`run_in_background: true\`, then end your response. A \`<system-reminder>\` lands per completion; call \`subagent_output\` once per task_id to collect (it never blocks) and answer. Match the worker to the depth: a fast or narrow web lookup goes to \`scout\`; a fuzzy question that needs decomposition, many sources, cross-validation, and a synthesized verdict goes to \`researcher\` (don't do that grind inline with \`web_search\` yourself).
96
+ **Mode A — Research fan-out.** Need information and the search is broad? Fire 2-5 subagents (usually \`explorer\`/\`scout\`) in parallel with \`run_in_background: true\`, then end your response. A \`<system-reminder>\` lands per completion; call \`subagent_output\` once per task_id to collect (it never blocks) and answer. Match the worker to the depth: a fast or narrow web lookup goes to \`scout\`; a fuzzy question that needs decomposition, many sources, cross-validation, and a synthesized verdict goes to \`researcher\` (don't do that grind inline with \`web_search\` yourself). When the user *explicitly* says "research"/"investigate" (or equivalent), you MUST spawn \`researcher\` — answering from training memory or a single inline \`web_search\` does not satisfy the request, even if you think you know the answer. (Fanning out \`scout\`/\`explorer\` underneath is fine, but it does not replace \`researcher\`.)
97
97
 
98
98
  **Mode B — Delegate-and-converse.** Asked to DO something long-running (>~30s: installs, builds, \`docker\`, scrapes, long test suites, multi-host loops, any noisy "fetch N and synthesize" chain)? Don't run it inline — blocking your own \`bash\` freezes the conversation and stalls the channel typing heartbeat (\`MAX_TYPING_HEARTBEAT_MS\`). Spawn one subagent (\`operator\` for side effects, \`scout\` for a quick web lookup, \`researcher\` for a deep multi-source "fetch N and synthesize" investigation, \`planner\` when a multi-step goal needs a sequenced, risk-aware plan before anyone acts) with \`run_in_background: true\`, acknowledge, and KEEP TALKING. Single fast calls (\`git status\`, one known-endpoint \`curl\`) stay inline. When the completion reminder lands, weave the result in; in a channel session, the completion \`<system-reminder>\` is NOT a user message but plain text is still invisible — Surface the result via \`channel_reply\` (or \`channel_send\`). If you already posted the substantive answer in the spawn turn, prefer \`skip_response({ reason: "result confirms prior reply" })\` over going silent.
99
99
 
@@ -42,6 +42,7 @@ export type CreateSpawnSubagentToolOptions = {
42
42
  stream?: Stream
43
43
  generateTaskId?: () => string
44
44
  now?: () => number
45
+ allowBackgroundFromSubagent?: boolean
45
46
  }
46
47
 
47
48
  export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions) {
@@ -56,6 +57,7 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
56
57
  stream,
57
58
  generateTaskId = () => `${SPAWN_TASK_ID_PREFIX}${randomUUID().replace(/-/g, '').slice(0, 12)}`,
58
59
  now = () => Date.now(),
60
+ allowBackgroundFromSubagent,
59
61
  } = options
60
62
 
61
63
  return defineTool({
@@ -81,7 +83,9 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
81
83
  description:
82
84
  'When true, the spawn returns immediately with a task_id; the subagent runs in the background and a system-reminder is delivered when it completes. ' +
83
85
  'When false (default), the spawn blocks until the subagent finishes and returns its final message synchronously. ' +
84
- 'Use background mode for long-running tasks where you want to keep the conversation moving (Mode B) or for parallel fan-out (Mode A).',
86
+ 'For PARALLEL fan-out, do NOT use background mode: emit several spawn_subagent calls (sync, the default) in a SINGLE turn they execute concurrently and all their results return together before your next turn. ' +
87
+ 'Reserve background mode for a long-running task you want to keep the conversation moving alongside (Mode B). ' +
88
+ 'NOTE: background mode from subagents is only available when that subagent is explicitly enabled to drain child results; otherwise use sync spawns batched in one turn instead.',
85
89
  }),
86
90
  ),
87
91
  }),
@@ -105,6 +109,13 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
105
109
  `subagent.spawn denied: maximum delegation depth (${MAX_SUBAGENT_DEPTH}) reached; a subagent at this depth cannot spawn further subagents`,
106
110
  )
107
111
  }
112
+ if (origin?.kind === 'subagent' && params.run_in_background === true && allowBackgroundFromSubagent !== true) {
113
+ return errorResult(
114
+ 'subagent.spawn denied: background spawning is not available from a subagent session because the result cannot be delivered after this turn ends. ' +
115
+ 'Retry with run_in_background=false (or omit it) — the synchronous spawn blocks until the child finishes and returns its result into your context, ' +
116
+ 'which is what you need to fold the result into your output.',
117
+ )
118
+ }
108
119
 
109
120
  const taskId = generateTaskId()
110
121
  const subagentName = params.subagent_type
@@ -140,6 +151,7 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
140
151
  subagentName,
141
152
  parentSessionId,
142
153
  ...(spawnedByRole !== undefined ? { spawnedByRole } : {}),
154
+ background,
143
155
  startedAt,
144
156
  status: 'running' as const,
145
157
  abort: resolvedHandle.abort,
@@ -3,7 +3,7 @@
3
3
  The bundled bun-hygiene plugin. Registers a `tool.before` hook that blocks two classes of `bash` command:
4
4
 
5
5
  1. **Global package installs** — `npm install -g`, `pnpm add -g`, `yarn global add`, `bun add -g`, and their `--global` / bundled-flag variants.
6
- 2. **Non-bun package managers** — any `npm`, `npx`, `pnpm`, `pnpx`, or `yarn` invocation.
6
+ 2. **Non-bun install managers** — any `npm`, `pnpm`, or `yarn` invocation. The ephemeral runners `npx` and `pnpx` are **allowed** (alongside `bunx`): they execute a tool once without touching the dependency tree or writing a competing lockfile, so they don't undermine the bun-standardization this guard protects.
7
7
 
8
8
  This plugin is **auto-loaded** by every TypeClaw agent. There is no `plugins[]` entry to add. Both guards carry an `acknowledgeGuards` escape hatch (below) for the cases where the agent genuinely needs the blocked command.
9
9
 
@@ -11,16 +11,16 @@ This plugin is **auto-loaded** by every TypeClaw agent. There is no `plugins[]`
11
11
 
12
12
  **Global installs don't persist.** The agent folder is bind-mounted at `/agent`; everything else in the container — including `~/.bun`, `~/.npm`, and the global `node_modules` a global install writes to — is ephemeral and wiped on every `typeclaw restart`. An agent that runs `npm install -g some-cli` gets a tool that works for the rest of the session and silently vanishes on the next boot, leading to confusing "command not found" failures that look like regressions. The fix is to either add the dependency to `package.json` (`bun add <pkg>`, which lives in the bind-mounted folder and survives) or run it once without installing (`bunx <pkg>`).
13
13
 
14
- **The container standardizes on bun.** TypeClaw is Bun-native end to end (see the root README). Mixing in `npm`/`pnpm`/`yarn` produces competing lockfiles and install trees, and `npx` pulls a second package-execution path when `bunx` already covers it. Steering every package-manager call to bun keeps the dependency state coherent.
14
+ **The container standardizes on bun for dependency management.** TypeClaw is Bun-native end to end (see the root README). Mixing in `npm`/`pnpm`/`yarn` installs produces competing lockfiles and install trees, so those are steered to bun. Ephemeral runners (`npx`/`pnpx`/`bunx`) are not install managers they run a tool once and leave no lockfile or `node_modules` behind — so they're allowed for one-off execution.
15
15
 
16
16
  Both guards **block with guidance** rather than silently rewriting the command — the agent sees exactly why the command was rejected and what to run instead, the same UX as the bundled `security` and `guard` policies.
17
17
 
18
18
  ## Guards
19
19
 
20
- | Guard | Triggers on | Guidance in the block reason |
21
- | ---------------------- | ------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------- |
22
- | `globalInstall` | `npm`/`pnpm` install/add with `-g`/`--global`, `yarn global add`, `bun add -g` / `bun install -g` | Use `bun add <pkg>` (persists) or `bunx <pkg>` (ephemeral run). |
23
- | `nonBunPackageManager` | `npm`, `npx`, `pnpm`, `pnpx`, `yarn` at a command boundary | Use `bun install` / `bun add <pkg>`, and `bunx <pkg>` instead of npx/pnpx. |
20
+ | Guard | Triggers on | Guidance in the block reason |
21
+ | ---------------------- | ------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------- |
22
+ | `globalInstall` | `npm`/`pnpm` install/add with `-g`/`--global`, `yarn global add`, `bun add -g` / `bun install -g` | Use `bun add <pkg>` (persists) or `bunx <pkg>` (ephemeral run). |
23
+ | `nonBunPackageManager` | `npm`, `pnpm`, `yarn` at a command boundary (`npx`/`pnpx`/`bunx` are allowed) | Use `bun install` / `bun add <pkg>`. Ephemeral runners are fine as-is. |
24
24
 
25
25
  A global install (e.g. `npm install -g x`) trips **only** `globalInstall`, not both — the global install is the more specific violation, so acknowledging `globalInstall` lets the command through without a second acknowledgement for `nonBunPackageManager`.
26
26
 
@@ -43,9 +43,9 @@ Both guards follow the repo-wide `acknowledgeGuards` convention (shared with the
43
43
 
44
44
  For each segment, the guard strips leading **preamble wrappers** (`sudo`, `env`, `command`, `exec`, `nice`, `nohup`, `stdbuf`, `setsid`, `time`, `xargs`, and any `VAR=val` assignment) — including their options, and the argument a flag consumes (`sudo -u nobody`, `nice -n 10`, `env -i`) — to find the real command word, then classifies:
45
45
 
46
- 1. command word is `npm`/`npx`/`pnpm`/`pnpx`/`yarn` (or `bun`) **and** the segment has an install subcommand **and** a global flag → `globalInstall` (for `yarn`, the `global add` sequence must appear adjacent and in command position, so `yarn add global foo` — a local install of a package named `global` — is not misflagged);
47
- 2. command word is a non-bun manager (not via global) → `nonBunPackageManager`;
48
- 3. otherwise → allowed.
46
+ 1. command word is `npm`/`pnpm`/`yarn` (or `bun`) **and** the segment has an install subcommand **and** a global flag → `globalInstall` (for `yarn`, the `global add` sequence must appear adjacent and in command position, so `yarn add global foo` — a local install of a package named `global` — is not misflagged);
47
+ 2. command word is a non-bun install manager `npm`/`pnpm`/`yarn` (not via global) → `nonBunPackageManager`;
48
+ 3. otherwise (including the ephemeral runners `npx`/`pnpx`/`bunx`) → allowed.
49
49
 
50
50
  A `globalInstall` verdict on any segment wins over a plain non-bun verdict. This is a command-position detector, not a full shell parser — it doesn't interpret redirections or expansions beyond boundary marking — but it is linear-time and closes the structural gaps a single regex left open.
51
51
 
@@ -70,6 +70,7 @@ Because classification scans a segment's words as a set (after preamble strippin
70
70
  ## What is NOT blocked
71
71
 
72
72
  - `bun`, `bunx`, `bun run`, `bun add`, `bun install` (local) — the intended package commands. (`bun add -g` / `bun install -g` are still blocked as global installs: bun globals live in `~/.bun`, outside `/agent`, and are wiped on restart.)
73
+ - `npx`, `pnpx` — ephemeral runners, allowed for one-off tool execution (they leave no lockfile or install tree). A global install through them is still nothing to block since they don't install into the dependency tree at all.
73
74
  - A non-bun manager name appearing as a substring or argument: `my-npm-wrapper`, `./npm`, `cat npm-debug.log`, `git commit -m "drop npm"`, `grep -rn npx src/`, `echo "npm install -g foo"`. Only the **command word** of a segment is classified, so a manager name inside an argument, path, quoted string, or longer token never trips the guard.
74
75
 
75
76
  ## Ordering against other bundled plugins
@@ -78,5 +79,5 @@ Registered after `guard` in `src/run/bundled-plugins.ts`. It guards a disjoint s
78
79
 
79
80
  ## Tests
80
81
 
81
- - `policy.test.ts` — pure-function unit tests for the detection logic: every global-install form, every non-bun manager, the allowed-command set (bun/bunx, substrings, paths, quoted text), both bypasses, the global-install-takes-precedence rule, escaped/quoted evasions, leading-assignment preambles, newline-as-separator scoping, falsy `--global=`, option placement, and subshell/substitution detection.
82
- - `index.test.ts` — composition tests: the plugin registers the `tool.before` hook and wires it to the policy (block on global install, block on npx, allow bunx, honor the bypass).
82
+ - `policy.test.ts` — pure-function unit tests for the detection logic: every global-install form, every non-bun install manager, the ephemeral-runner allowance (`npx`/`pnpx`/`bunx`, including behind preamble wrappers), the allowed-command set (bun/bunx, substrings, paths, quoted text), both bypasses, the global-install-takes-precedence rule, escaped/quoted evasions, leading-assignment preambles, newline-as-separator scoping, falsy `--global=`, option placement, and subshell/substitution detection.
83
+ - `index.test.ts` — composition tests: the plugin registers the `tool.before` hook and wires it to the policy (block on global install, block on `npm install`, allow `bunx`/`npx`, honor the bypass).