typeclaw 0.29.0 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "typeclaw",
3
- "version": "0.29.0",
3
+ "version": "0.30.0",
4
4
  "homepage": "https://github.com/typeclaw/typeclaw#readme",
5
5
  "bugs": {
6
6
  "url": "https://github.com/typeclaw/typeclaw/issues"
@@ -213,6 +213,7 @@ export type CreateSessionOptions = {
213
213
  liveSubagentRegistry?: LiveSubagentRegistry
214
214
  subagentRegistry?: SubagentRegistry
215
215
  createSessionForSubagent?: CreateSessionForSubagent
216
+ allowBackgroundFromSubagent?: boolean
216
217
  }
217
218
 
218
219
  export type CreateSessionResult = {
@@ -357,6 +358,7 @@ export async function createSessionWithDispose(options: CreateSessionOptions = {
357
358
  getOrigin,
358
359
  permissions: options.permissions,
359
360
  stream: options.stream,
361
+ allowBackgroundFromSubagent: options.allowBackgroundFromSubagent,
360
362
  }),
361
363
  ]
362
364
  : [
@@ -726,6 +728,7 @@ export function buildSubagentOrchestrationTools(opts: {
726
728
  getOrigin: () => SessionOrigin | undefined
727
729
  permissions: PermissionService | undefined
728
730
  stream: Stream | undefined
731
+ allowBackgroundFromSubagent?: boolean
729
732
  }): ToolDefinition[] {
730
733
  if (
731
734
  opts.liveRegistry === undefined ||
@@ -745,6 +748,9 @@ export function buildSubagentOrchestrationTools(opts: {
745
748
  getOrigin: opts.getOrigin,
746
749
  ...(opts.permissions ? { permissions: opts.permissions } : {}),
747
750
  ...(opts.stream ? { stream: opts.stream } : {}),
751
+ ...(opts.allowBackgroundFromSubagent !== undefined
752
+ ? { allowBackgroundFromSubagent: opts.allowBackgroundFromSubagent }
753
+ : {}),
748
754
  }),
749
755
  createSubagentOutputTool({
750
756
  liveRegistry: opts.liveRegistry,
@@ -23,6 +23,11 @@ export type LiveSubagent = {
23
23
  // subagent_output/subagent_cancel. Absent when no permission service was
24
24
  // active at spawn, in which case the cap fails closed.
25
25
  spawnedByRole?: string
26
+ // True when spawned with run_in_background. Only background spawns deliver
27
+ // their result out-of-band (via the subagent.completed broadcast and the
28
+ // parent's drain); synchronous spawns return their result inline as the tool
29
+ // result, so the drain MUST NOT re-prompt for them. See runSubagentDrain.
30
+ background?: boolean
26
31
  startedAt: number
27
32
  status: SubagentStatus
28
33
  completion?: SubagentCompletion
@@ -40,6 +40,7 @@ import {
40
40
  ensureSessionTmpDir,
41
41
  mapVirtualTmpPath,
42
42
  resolveHiddenPaths,
43
+ resolveProcSelfExe,
43
44
  resolveProtectedZones,
44
45
  resolveWritableZones,
45
46
  subtractMasked,
@@ -463,13 +464,24 @@ export function wrapAgentToolAsCustomToolDefinition<TParams extends TSchema, TDe
463
464
  await applyBashSandbox(mutableArgs, opts.permissions, liveOrigin, opts.agentDir, opts.sessionId, bashEnvOverlay)
464
465
  }
465
466
 
466
- if (TMP_REDIRECT_TOOLS.has(tool.name) && opts.permissions !== undefined) {
467
- await applyTmpPathRedirect(mutableArgs, opts.permissions, liveOrigin, opts.agentDir, opts.sessionId)
468
- }
467
+ const tmpRedirect =
468
+ TMP_REDIRECT_TOOLS.has(tool.name) && opts.permissions !== undefined
469
+ ? await applyTmpPathRedirect(mutableArgs, opts.permissions, liveOrigin, opts.agentDir, opts.sessionId)
470
+ : undefined
469
471
 
470
- const result = await bashEnvStore.run(bashEnvOverlay, () =>
471
- tool.execute(toolCallId, mutableArgs as Static<TParams>, signal, onUpdate),
472
- )
472
+ let rawResult: ToolResult
473
+ try {
474
+ rawResult = await bashEnvStore.run(bashEnvOverlay, () =>
475
+ tool.execute(toolCallId, mutableArgs as Static<TParams>, signal, onUpdate),
476
+ )
477
+ } catch (error) {
478
+ // A throwing tool (pi's bash rejects on non-zero exit) must still run
479
+ // tool.after so cleanup hooks fire — e.g. the github approve guard's
480
+ // release, whose absence stranded a PR as "already approved" (PR #672).
481
+ await runToolAfterSafely(opts, tool.name, toolCallId, toErrorResult(error))
482
+ throw error
483
+ }
484
+ const result = tmpRedirect !== undefined ? restoreTmpPathInResult(rawResult, tmpRedirect) : rawResult
473
485
  const resolved = loopGate.resolve({ content: result.content as ContentPart[], details: result.details })
474
486
  if ('deferredBlock' in resolved) {
475
487
  fireLoopAbort(opts.getAbort)
@@ -490,6 +502,26 @@ export function wrapAgentToolAsCustomToolDefinition<TParams extends TSchema, TDe
490
502
  })
491
503
  }
492
504
 
505
+ function toErrorResult(error: unknown): ToolResult {
506
+ const message = error instanceof Error ? error.message : String(error)
507
+ return { content: [{ type: 'text', text: message }], details: { error: message } }
508
+ }
509
+
510
+ // The original tool error must always propagate, so a failure inside the
511
+ // after-hook itself is swallowed rather than masking the real cause.
512
+ async function runToolAfterSafely(
513
+ opts: WrapSystemToolOptions,
514
+ tool: string,
515
+ callId: string,
516
+ result: ToolResult,
517
+ ): Promise<void> {
518
+ try {
519
+ await opts.hooks.runToolAfter({ tool, sessionId: opts.sessionId, callId, result })
520
+ } catch {
521
+ // intentionally ignored: never mask the originating tool error
522
+ }
523
+ }
524
+
493
525
  export function defaultBuiltinPiAgentTools(): AgentTool<any, any>[] {
494
526
  return [piReadTool, piBashTool, piEditTool, piWriteTool, piGrepTool, piFindTool, piLsTool]
495
527
  }
@@ -560,6 +592,7 @@ async function applyBashSandbox(
560
592
  protected: protectedZones,
561
593
  network: 'inherit',
562
594
  cwd: agentDir,
595
+ procSelfExe: resolveProcSelfExe(),
563
596
  ...(envOverlay !== undefined ? { env: { set: envOverlay } } : {}),
564
597
  })
565
598
  mutableArgs.command = commandString
@@ -579,24 +612,47 @@ const TMP_REDIRECT_TOOLS = new Set(['read', 'write', 'edit', 'grep', 'find', 'ls
579
612
  // different files. Rewriting the file tool's on-disk path to the same session
580
613
  // backing dir makes every layer resolve /tmp/foo to one file. Unsandboxed roles
581
614
  // (empty masks) are left untouched: their bash already shares the real /tmp.
615
+ type TmpRedirect = { original: string; backing: string }
616
+
582
617
  async function applyTmpPathRedirect(
583
618
  mutableArgs: Record<string, unknown>,
584
619
  permissions: PermissionService,
585
620
  origin: SessionOrigin | undefined,
586
621
  agentDir: string,
587
622
  sessionId: string,
588
- ): Promise<void> {
623
+ ): Promise<TmpRedirect | undefined> {
589
624
  const rawPath = mutableArgs.path
590
- if (typeof rawPath !== 'string') return
625
+ if (typeof rawPath !== 'string') return undefined
591
626
 
592
627
  const { dirs, files } = resolveHiddenPaths(permissions, origin, agentDir)
593
- if (dirs.length === 0 && files.length === 0) return
628
+ if (dirs.length === 0 && files.length === 0) return undefined
594
629
 
595
630
  const backing = mapVirtualTmpPath(agentDir, sessionId, rawPath)
596
- if (backing === undefined) return
631
+ if (backing === undefined || backing === rawPath) return undefined
597
632
 
598
633
  await ensureSessionTmpDir(sessionId)
599
634
  mutableArgs.path = backing
635
+ return { original: rawPath, backing }
636
+ }
637
+
638
+ // The redirect swaps the model-facing /tmp path for its session backing dir
639
+ // before execution; the file tool then echoes that backing path in its receipt
640
+ // text and details. Reverse it on the way out so the model only ever sees the
641
+ // path it asked for — a leaked backing path is unreachable inside the bwrap
642
+ // bash sandbox, so reusing it in `gh api --input` fails (the PR #672 strand).
643
+ function restoreTmpPathInResult(result: ToolResult, redirect: TmpRedirect): ToolResult {
644
+ const content = (result.content as ContentPart[]).map((part) =>
645
+ part.type === 'text' ? { ...part, text: part.text.split(redirect.backing).join(redirect.original) } : part,
646
+ )
647
+ const details =
648
+ isRecord(result.details) && result.details.path === redirect.backing
649
+ ? { ...result.details, path: redirect.original }
650
+ : result.details
651
+ return { content, details }
652
+ }
653
+
654
+ function isRecord(value: unknown): value is Record<string, unknown> {
655
+ return typeof value === 'object' && value !== null
600
656
  }
601
657
 
602
658
  function appendLoopWarning(result: ToolResult, message: string): ToolResult {
@@ -0,0 +1,150 @@
1
+ import type { Stream, Unsubscribe } from '@/stream'
2
+
3
+ import type { LiveSubagentRegistry } from './live-subagents'
4
+ import { parseSubagentCompletedPayload, renderSubagentCompletionReminder } from './subagent-completion-reminder'
5
+
6
+ // Presence of this capability is the single signal that background spawning is
7
+ // permitted from a subagent (see the spawn_subagent guard); absence keeps the
8
+ // subagent a one-shot leaf. It carries everything the drain needs: the shared
9
+ // Stream to listen on, the subagent's own sessionId to filter completions by,
10
+ // and the registry that is the source of truth for child state.
11
+ export type SubagentBackgroundDrain = {
12
+ stream: Stream
13
+ sessionId: string
14
+ liveRegistry: LiveSubagentRegistry
15
+ }
16
+
17
+ export type DrainPrompt = (text: string) => Promise<void>
18
+
19
+ export type RunSubagentDrainOptions = {
20
+ drain: SubagentBackgroundDrain
21
+ prompt: DrainPrompt
22
+ // Cooperative cancellation: when this returns true the loop stops re-prompting
23
+ // and returns, letting the caller's timeout/abort path dispose the session.
24
+ cancelled?: () => boolean
25
+ }
26
+
27
+ // Re-prompts a subagent with its children's completion reminders until a fixed
28
+ // point, called after the subagent's initial prompt resolves. The registry is
29
+ // the source of truth; stream broadcasts are only wakeups, so a duplicated or
30
+ // missed broadcast cannot corrupt termination (every iteration re-derives state
31
+ // from the registry). Each child's reminder is delivered at most once (tracked
32
+ // by taskId). Terminates only when no children are running AND none are
33
+ // completed-but-undelivered; a child spawned during a reminder turn reappears as
34
+ // `running` in the next snapshot and keeps the loop alive, so no separate
35
+ // "spawned nothing" flag is needed. The watch MUST have been started before the
36
+ // initial prompt (see `beginSubagentDrainWatch`) to close the lost-wakeup race.
37
+ export async function runSubagentDrain(watch: SubagentDrainWatch, options: RunSubagentDrainOptions): Promise<void> {
38
+ const { drain, prompt, cancelled } = options
39
+ const delivered = new Set<string>()
40
+ try {
41
+ while (cancelled === undefined || !cancelled()) {
42
+ const pending = collectPendingReminders(drain, delivered)
43
+ if (pending.length === 0) {
44
+ if (!hasRunningChildren(drain)) return
45
+ // Children still running but none newly completed: wait for the next
46
+ // wakeup, then re-derive from the registry.
47
+ const woke = await watch.waitForWakeup()
48
+ if (!woke) return
49
+ continue
50
+ }
51
+ for (const reminder of pending) {
52
+ if (cancelled !== undefined && cancelled()) return
53
+ delivered.add(reminder.taskId)
54
+ await prompt(reminder.text)
55
+ }
56
+ }
57
+ } finally {
58
+ watch.stop()
59
+ }
60
+ }
61
+
62
+ type PendingReminder = { taskId: string; text: string }
63
+
64
+ function collectPendingReminders(drain: SubagentBackgroundDrain, delivered: Set<string>): PendingReminder[] {
65
+ const children = drain.liveRegistry.list({ parentSessionId: drain.sessionId })
66
+ const pending: PendingReminder[] = []
67
+ for (const child of children) {
68
+ // Synchronous spawns return their result inline via the tool call; only
69
+ // background spawns deliver out-of-band and need a drain reminder.
70
+ if (child.background !== true) continue
71
+ if (child.status === 'running') continue
72
+ if (delivered.has(child.taskId)) continue
73
+ const completion = child.completion
74
+ const text = renderSubagentCompletionReminder({
75
+ subagent: child.subagentName,
76
+ taskId: child.taskId,
77
+ ok: child.status === 'completed',
78
+ durationMs: completion?.durationMs ?? 0,
79
+ ...(completion?.error !== undefined ? { error: completion.error } : {}),
80
+ })
81
+ pending.push({ taskId: child.taskId, text })
82
+ }
83
+ return pending
84
+ }
85
+
86
+ function hasRunningChildren(drain: SubagentBackgroundDrain): boolean {
87
+ // Only background children gate termination. A sync child still marked running
88
+ // in the registry settles via its inline tool call, never via a broadcast
89
+ // wakeup, so waiting on it would hang the drain forever.
90
+ return drain.liveRegistry
91
+ .list({ parentSessionId: drain.sessionId })
92
+ .some((c) => c.background === true && c.status === 'running')
93
+ }
94
+
95
+ export type SubagentDrainWatch = {
96
+ // Resolves true on a child-completion wakeup, false once stopped. A wakeup
97
+ // that arrives before anyone waits is latched (pendingWake), so a completion
98
+ // during the subagent's prompt is not lost.
99
+ waitForWakeup: () => Promise<boolean>
100
+ stop: () => void
101
+ }
102
+
103
+ export function beginSubagentDrainWatch(drain: SubagentBackgroundDrain): SubagentDrainWatch {
104
+ let stopped = false
105
+ let pendingWake = false
106
+ let resolveWaiter: ((woke: boolean) => void) | null = null
107
+
108
+ const wake = (): void => {
109
+ if (resolveWaiter !== null) {
110
+ const r = resolveWaiter
111
+ resolveWaiter = null
112
+ r(true)
113
+ return
114
+ }
115
+ pendingWake = true
116
+ }
117
+
118
+ const unsubscribe: Unsubscribe = drain.stream.subscribe({ target: { kind: 'broadcast' } }, (msg) => {
119
+ const parsed = parseSubagentCompletedPayload(msg.payload)
120
+ if (parsed === null) return
121
+ if (parsed.parentSessionId !== drain.sessionId) return
122
+ wake()
123
+ })
124
+
125
+ return {
126
+ waitForWakeup: () =>
127
+ new Promise<boolean>((resolve) => {
128
+ if (stopped) {
129
+ resolve(false)
130
+ return
131
+ }
132
+ if (pendingWake) {
133
+ pendingWake = false
134
+ resolve(true)
135
+ return
136
+ }
137
+ resolveWaiter = resolve
138
+ }),
139
+ stop: () => {
140
+ if (stopped) return
141
+ stopped = true
142
+ unsubscribe()
143
+ if (resolveWaiter !== null) {
144
+ const r = resolveWaiter
145
+ resolveWaiter = null
146
+ r(false)
147
+ }
148
+ },
149
+ }
150
+ }
@@ -7,6 +7,12 @@ import type { Stream, Unsubscribe } from '@/stream'
7
7
  import { type AgentSession, createSession } from './index'
8
8
  import { subscribeProviderErrors } from './provider-error'
9
9
  import type { SessionOrigin } from './session-origin'
10
+ import {
11
+ beginSubagentDrainWatch,
12
+ runSubagentDrain,
13
+ type SubagentBackgroundDrain,
14
+ type SubagentDrainWatch,
15
+ } from './subagent-drain'
10
16
  import { renderTurnTimeAnchor } from './system-prompt'
11
17
  import type { ToolResultBudget } from './tool-result-budget'
12
18
 
@@ -62,6 +68,12 @@ export type SubagentShared<P = unknown> = {
62
68
  // registry scoping. Default (unset/false) keeps the subagent a leaf — the
63
69
  // historical contract for explorer/scout/memory-logger/etc.
64
70
  canSpawnSubagents?: boolean
71
+ // Opt-in: allow this subagent to spawn background children AND drain their
72
+ // completions back into its own session (requires canSpawnSubagents). Default
73
+ // (unset/false) keeps background spawns denied from this subagent — it must
74
+ // use synchronous spawns. Only meaningful when the runtime wires the drain
75
+ // capability (createSessionForSubagent provides stream+sessionId+liveRegistry).
76
+ canBackgroundSpawnSubagents?: boolean
65
77
  // Wall-clock ceiling on a single spawn, enforced at the orchestration
66
78
  // layer (both `dispatchSpawnSubagent` and the stream-driven
67
79
  // `SubagentConsumer`). When exceeded, the orchestrator's `await` settles
@@ -116,6 +128,7 @@ export type CreateSessionForSubagentResult = {
116
128
  agentDir?: string
117
129
  origin?: SessionOrigin
118
130
  getTranscriptPath?: () => string | undefined
131
+ backgroundDrain?: SubagentBackgroundDrain
119
132
  }
120
133
  export type CreateSessionForSubagentOptions = {
121
134
  name?: string
@@ -152,6 +165,7 @@ type NormalizedSubagentSession = {
152
165
  agentDir: string | undefined
153
166
  origin: SessionOrigin | undefined
154
167
  getTranscriptPath: (() => string | undefined) | undefined
168
+ backgroundDrain: SubagentBackgroundDrain | undefined
155
169
  }
156
170
 
157
171
  function normalizeSubagentSession(result: AgentSession | CreateSessionForSubagentResult): NormalizedSubagentSession {
@@ -164,6 +178,7 @@ function normalizeSubagentSession(result: AgentSession | CreateSessionForSubagen
164
178
  agentDir: result.agentDir,
165
179
  origin: result.origin,
166
180
  getTranscriptPath: result.getTranscriptPath,
181
+ backgroundDrain: result.backgroundDrain,
167
182
  }
168
183
  }
169
184
  return {
@@ -174,6 +189,7 @@ function normalizeSubagentSession(result: AgentSession | CreateSessionForSubagen
174
189
  agentDir: undefined,
175
190
  origin: undefined,
176
191
  getTranscriptPath: undefined,
192
+ backgroundDrain: undefined,
177
193
  }
178
194
  }
179
195
 
@@ -214,14 +230,16 @@ export async function invokeSubagent(name: string, options: InvokeSubagentOption
214
230
  }
215
231
 
216
232
  const runSession: RunSession = async (override) => {
217
- const { session, dispose, hooks, sessionId, agentDir, origin, getTranscriptPath } = normalizeSubagentSession(
218
- await createSessionForSubagent(subagent, sessionOptions),
219
- )
233
+ const { session, dispose, hooks, sessionId, agentDir, origin, getTranscriptPath, backgroundDrain } =
234
+ normalizeSubagentSession(await createSessionForSubagent(subagent, sessionOptions))
235
+ let aborted = false
236
+ let drainWatch: SubagentDrainWatch | undefined
220
237
  if (options.onSessionCreated !== undefined) {
221
238
  options.onSessionCreated({
222
239
  session,
223
240
  sessionId,
224
241
  abort: async () => {
242
+ aborted = true
225
243
  await session.abort()
226
244
  },
227
245
  })
@@ -239,6 +257,9 @@ export async function invokeSubagent(name: string, options: InvokeSubagentOption
239
257
  if (hooks && turnEvent !== undefined) {
240
258
  await hooks.runSessionTurnStart({ ...turnEvent, userPrompt: userPromptForTurn })
241
259
  }
260
+ if (backgroundDrain !== undefined) {
261
+ drainWatch = beginSubagentDrainWatch(backgroundDrain)
262
+ }
242
263
  try {
243
264
  await session.prompt(`${renderTurnTimeAnchor()}\n\n${userPromptForTurn}`)
244
265
  } finally {
@@ -246,6 +267,15 @@ export async function invokeSubagent(name: string, options: InvokeSubagentOption
246
267
  await hooks.runSessionTurnEnd(turnEvent)
247
268
  }
248
269
  }
270
+ if (drainWatch !== undefined && backgroundDrain !== undefined) {
271
+ await runSubagentDrain(drainWatch, {
272
+ drain: backgroundDrain,
273
+ prompt: async (text) => {
274
+ await session.prompt(`${renderTurnTimeAnchor()}\n\n${text}`)
275
+ },
276
+ cancelled: () => aborted,
277
+ })
278
+ }
249
279
  if (hooks && sessionId !== undefined) {
250
280
  await hooks.runSessionIdle({
251
281
  sessionId,
@@ -259,6 +289,7 @@ export async function invokeSubagent(name: string, options: InvokeSubagentOption
259
289
  if (hooks && sessionId !== undefined) {
260
290
  await hooks.runSessionEnd({ sessionId, ...(origin !== undefined ? { origin } : {}) })
261
291
  }
292
+ drainWatch?.stop()
262
293
  session.dispose()
263
294
  await dispose()
264
295
  }
@@ -42,6 +42,7 @@ export type CreateSpawnSubagentToolOptions = {
42
42
  stream?: Stream
43
43
  generateTaskId?: () => string
44
44
  now?: () => number
45
+ allowBackgroundFromSubagent?: boolean
45
46
  }
46
47
 
47
48
  export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions) {
@@ -56,6 +57,7 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
56
57
  stream,
57
58
  generateTaskId = () => `${SPAWN_TASK_ID_PREFIX}${randomUUID().replace(/-/g, '').slice(0, 12)}`,
58
59
  now = () => Date.now(),
60
+ allowBackgroundFromSubagent,
59
61
  } = options
60
62
 
61
63
  return defineTool({
@@ -81,7 +83,9 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
81
83
  description:
82
84
  'When true, the spawn returns immediately with a task_id; the subagent runs in the background and a system-reminder is delivered when it completes. ' +
83
85
  'When false (default), the spawn blocks until the subagent finishes and returns its final message synchronously. ' +
84
- 'Use background mode for long-running tasks where you want to keep the conversation moving (Mode B) or for parallel fan-out (Mode A).',
86
+ 'For PARALLEL fan-out, do NOT use background mode: emit several spawn_subagent calls (sync, the default) in a SINGLE turn they execute concurrently and all their results return together before your next turn. ' +
87
+ 'Reserve background mode for a long-running task you want to keep the conversation moving alongside (Mode B). ' +
88
+ 'NOTE: background mode from subagents is only available when that subagent is explicitly enabled to drain child results; otherwise use sync spawns batched in one turn instead.',
85
89
  }),
86
90
  ),
87
91
  }),
@@ -105,6 +109,13 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
105
109
  `subagent.spawn denied: maximum delegation depth (${MAX_SUBAGENT_DEPTH}) reached; a subagent at this depth cannot spawn further subagents`,
106
110
  )
107
111
  }
112
+ if (origin?.kind === 'subagent' && params.run_in_background === true && allowBackgroundFromSubagent !== true) {
113
+ return errorResult(
114
+ 'subagent.spawn denied: background spawning is not available from a subagent session because the result cannot be delivered after this turn ends. ' +
115
+ 'Retry with run_in_background=false (or omit it) — the synchronous spawn blocks until the child finishes and returns its result into your context, ' +
116
+ 'which is what you need to fold the result into your output.',
117
+ )
118
+ }
108
119
 
109
120
  const taskId = generateTaskId()
110
121
  const subagentName = params.subagent_type
@@ -140,6 +151,7 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
140
151
  subagentName,
141
152
  parentSessionId,
142
153
  ...(spawnedByRole !== undefined ? { spawnedByRole } : {}),
154
+ background,
143
155
  startedAt,
144
156
  status: 'running' as const,
145
157
  abort: resolvedHandle.abort,
@@ -3,7 +3,7 @@
3
3
  The bundled bun-hygiene plugin. Registers a `tool.before` hook that blocks two classes of `bash` command:
4
4
 
5
5
  1. **Global package installs** — `npm install -g`, `pnpm add -g`, `yarn global add`, `bun add -g`, and their `--global` / bundled-flag variants.
6
- 2. **Non-bun package managers** — any `npm`, `npx`, `pnpm`, `pnpx`, or `yarn` invocation.
6
+ 2. **Non-bun install managers** — any `npm`, `pnpm`, or `yarn` invocation. The ephemeral runners `npx` and `pnpx` are **allowed** (alongside `bunx`): they execute a tool once without touching the dependency tree or writing a competing lockfile, so they don't undermine the bun-standardization this guard protects.
7
7
 
8
8
  This plugin is **auto-loaded** by every TypeClaw agent. There is no `plugins[]` entry to add. Both guards carry an `acknowledgeGuards` escape hatch (below) for the cases where the agent genuinely needs the blocked command.
9
9
 
@@ -11,16 +11,16 @@ This plugin is **auto-loaded** by every TypeClaw agent. There is no `plugins[]`
11
11
 
12
12
  **Global installs don't persist.** The agent folder is bind-mounted at `/agent`; everything else in the container — including `~/.bun`, `~/.npm`, and the global `node_modules` a global install writes to — is ephemeral and wiped on every `typeclaw restart`. An agent that runs `npm install -g some-cli` gets a tool that works for the rest of the session and silently vanishes on the next boot, leading to confusing "command not found" failures that look like regressions. The fix is to either add the dependency to `package.json` (`bun add <pkg>`, which lives in the bind-mounted folder and survives) or run it once without installing (`bunx <pkg>`).
13
13
 
14
- **The container standardizes on bun.** TypeClaw is Bun-native end to end (see the root README). Mixing in `npm`/`pnpm`/`yarn` produces competing lockfiles and install trees, and `npx` pulls a second package-execution path when `bunx` already covers it. Steering every package-manager call to bun keeps the dependency state coherent.
14
+ **The container standardizes on bun for dependency management.** TypeClaw is Bun-native end to end (see the root README). Mixing in `npm`/`pnpm`/`yarn` installs produces competing lockfiles and install trees, so those are steered to bun. Ephemeral runners (`npx`/`pnpx`/`bunx`) are not install managers they run a tool once and leave no lockfile or `node_modules` behind — so they're allowed for one-off execution.
15
15
 
16
16
  Both guards **block with guidance** rather than silently rewriting the command — the agent sees exactly why the command was rejected and what to run instead, the same UX as the bundled `security` and `guard` policies.
17
17
 
18
18
  ## Guards
19
19
 
20
- | Guard | Triggers on | Guidance in the block reason |
21
- | ---------------------- | ------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------- |
22
- | `globalInstall` | `npm`/`pnpm` install/add with `-g`/`--global`, `yarn global add`, `bun add -g` / `bun install -g` | Use `bun add <pkg>` (persists) or `bunx <pkg>` (ephemeral run). |
23
- | `nonBunPackageManager` | `npm`, `npx`, `pnpm`, `pnpx`, `yarn` at a command boundary | Use `bun install` / `bun add <pkg>`, and `bunx <pkg>` instead of npx/pnpx. |
20
+ | Guard | Triggers on | Guidance in the block reason |
21
+ | ---------------------- | ------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------- |
22
+ | `globalInstall` | `npm`/`pnpm` install/add with `-g`/`--global`, `yarn global add`, `bun add -g` / `bun install -g` | Use `bun add <pkg>` (persists) or `bunx <pkg>` (ephemeral run). |
23
+ | `nonBunPackageManager` | `npm`, `pnpm`, `yarn` at a command boundary (`npx`/`pnpx`/`bunx` are allowed) | Use `bun install` / `bun add <pkg>`. Ephemeral runners are fine as-is. |
24
24
 
25
25
  A global install (e.g. `npm install -g x`) trips **only** `globalInstall`, not both — the global install is the more specific violation, so acknowledging `globalInstall` lets the command through without a second acknowledgement for `nonBunPackageManager`.
26
26
 
@@ -43,9 +43,9 @@ Both guards follow the repo-wide `acknowledgeGuards` convention (shared with the
43
43
 
44
44
  For each segment, the guard strips leading **preamble wrappers** (`sudo`, `env`, `command`, `exec`, `nice`, `nohup`, `stdbuf`, `setsid`, `time`, `xargs`, and any `VAR=val` assignment) — including their options, and the argument a flag consumes (`sudo -u nobody`, `nice -n 10`, `env -i`) — to find the real command word, then classifies:
45
45
 
46
- 1. command word is `npm`/`npx`/`pnpm`/`pnpx`/`yarn` (or `bun`) **and** the segment has an install subcommand **and** a global flag → `globalInstall` (for `yarn`, the `global add` sequence must appear adjacent and in command position, so `yarn add global foo` — a local install of a package named `global` — is not misflagged);
47
- 2. command word is a non-bun manager (not via global) → `nonBunPackageManager`;
48
- 3. otherwise → allowed.
46
+ 1. command word is `npm`/`pnpm`/`yarn` (or `bun`) **and** the segment has an install subcommand **and** a global flag → `globalInstall` (for `yarn`, the `global add` sequence must appear adjacent and in command position, so `yarn add global foo` — a local install of a package named `global` — is not misflagged);
47
+ 2. command word is a non-bun install manager `npm`/`pnpm`/`yarn` (not via global) → `nonBunPackageManager`;
48
+ 3. otherwise (including the ephemeral runners `npx`/`pnpx`/`bunx`) → allowed.
49
49
 
50
50
  A `globalInstall` verdict on any segment wins over a plain non-bun verdict. This is a command-position detector, not a full shell parser — it doesn't interpret redirections or expansions beyond boundary marking — but it is linear-time and closes the structural gaps a single regex left open.
51
51
 
@@ -70,6 +70,7 @@ Because classification scans a segment's words as a set (after preamble strippin
70
70
  ## What is NOT blocked
71
71
 
72
72
  - `bun`, `bunx`, `bun run`, `bun add`, `bun install` (local) — the intended package commands. (`bun add -g` / `bun install -g` are still blocked as global installs: bun globals live in `~/.bun`, outside `/agent`, and are wiped on restart.)
73
+ - `npx`, `pnpx` — ephemeral runners, allowed for one-off tool execution (they leave no lockfile or install tree). A global install through them is still nothing to block since they don't install into the dependency tree at all.
73
74
  - A non-bun manager name appearing as a substring or argument: `my-npm-wrapper`, `./npm`, `cat npm-debug.log`, `git commit -m "drop npm"`, `grep -rn npx src/`, `echo "npm install -g foo"`. Only the **command word** of a segment is classified, so a manager name inside an argument, path, quoted string, or longer token never trips the guard.
74
75
 
75
76
  ## Ordering against other bundled plugins
@@ -78,5 +79,5 @@ Registered after `guard` in `src/run/bundled-plugins.ts`. It guards a disjoint s
78
79
 
79
80
  ## Tests
80
81
 
81
- - `policy.test.ts` — pure-function unit tests for the detection logic: every global-install form, every non-bun manager, the allowed-command set (bun/bunx, substrings, paths, quoted text), both bypasses, the global-install-takes-precedence rule, escaped/quoted evasions, leading-assignment preambles, newline-as-separator scoping, falsy `--global=`, option placement, and subshell/substitution detection.
82
- - `index.test.ts` — composition tests: the plugin registers the `tool.before` hook and wires it to the policy (block on global install, block on npx, allow bunx, honor the bypass).
82
+ - `policy.test.ts` — pure-function unit tests for the detection logic: every global-install form, every non-bun install manager, the ephemeral-runner allowance (`npx`/`pnpx`/`bunx`, including behind preamble wrappers), the allowed-command set (bun/bunx, substrings, paths, quoted text), both bypasses, the global-install-takes-precedence rule, escaped/quoted evasions, leading-assignment preambles, newline-as-separator scoping, falsy `--global=`, option placement, and subshell/substitution detection.
83
+ - `index.test.ts` — composition tests: the plugin registers the `tool.before` hook and wires it to the policy (block on global install, block on `npm install`, allow `bunx`/`npx`, honor the bypass).
@@ -3,7 +3,12 @@ import { ACKNOWLEDGE_GUARDS, type GuardBlock, isGuardAcknowledged } from '../gua
3
3
  export const GUARD_GLOBAL_INSTALL = 'globalInstall'
4
4
  export const GUARD_NON_BUN_PACKAGE_MANAGER = 'nonBunPackageManager'
5
5
 
6
- const NON_BUN_MANAGERS = new Set(['npm', 'npx', 'pnpm', 'pnpx', 'yarn'])
6
+ // Only install managers are blocked. The ephemeral runners npx/pnpx (and bunx,
7
+ // which is `bun`) are intentionally absent: they run a tool once without
8
+ // touching the dependency tree or writing a competing lockfile, so they don't
9
+ // undermine the bun-standardization this set protects. classify() skips any
10
+ // command word not in here, so leaving them out is what allows them.
11
+ const NON_BUN_MANAGERS = new Set(['npm', 'pnpm', 'yarn'])
7
12
  const INSTALL_SUBCOMMANDS = new Set(['install', 'i', 'add'])
8
13
 
9
14
  export function checkBunHygieneGuard(options: { tool: string; args: Record<string, unknown> }): GuardBlock | undefined {
@@ -310,8 +315,8 @@ function blockNonBunManager(manager: string, args: Record<string, unknown>): Gua
310
315
  return {
311
316
  block: true,
312
317
  reason: [
313
- `Guard \`${GUARD_NON_BUN_PACKAGE_MANAGER}\` blocked \`${manager}\`. This container standardizes on bun.`,
314
- 'Use `bun install` / `bun add <pkg>` instead of npm/pnpm/yarn, and `bunx <pkg>` instead of npx/pnpx.',
318
+ `Guard \`${GUARD_NON_BUN_PACKAGE_MANAGER}\` blocked \`${manager}\`. This container standardizes on bun for dependency management.`,
319
+ 'Use `bun install` / `bun add <pkg>` instead of npm/pnpm/yarn. Ephemeral runners (`bunx`, `npx`, `pnpx`) are allowed for one-off tool execution.',
315
320
  `Retry with \`${ACKNOWLEDGE_GUARDS}.${GUARD_NON_BUN_PACKAGE_MANAGER}: true\` if this package manager is genuinely required (e.g. a project pinned to a different lockfile).`,
316
321
  ].join(' '),
317
322
  }