typeclaw 0.15.2 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/package.json +1 -1
  2. package/src/agent/index.ts +35 -2
  3. package/src/agent/plugin-tools.ts +38 -0
  4. package/src/agent/session-meta.ts +6 -2
  5. package/src/agent/session-origin.ts +111 -14
  6. package/src/agent/subagents.ts +6 -1
  7. package/src/agent/system-prompt.ts +41 -32
  8. package/src/agent/tools/channel-reply.ts +3 -2
  9. package/src/agent/tools/grant-role.ts +214 -0
  10. package/src/bundled-plugins/guard/policies/non-workspace-write.ts +8 -6
  11. package/src/bundled-plugins/memory/index.ts +25 -6
  12. package/src/bundled-plugins/security/index.ts +12 -0
  13. package/src/bundled-plugins/security/policies/private-surface-read.ts +215 -0
  14. package/src/channels/adapters/github/inbound.ts +54 -1
  15. package/src/channels/adapters/github/index.ts +1 -0
  16. package/src/channels/router.ts +150 -37
  17. package/src/cli/inspect.ts +20 -9
  18. package/src/cli/role.ts +10 -1
  19. package/src/cli/ui.ts +6 -4
  20. package/src/config/reloadable.ts +10 -3
  21. package/src/init/index.ts +24 -42
  22. package/src/init/paths.ts +1 -0
  23. package/src/init/run-owner-claim.ts +21 -3
  24. package/src/inspect/label.ts +2 -0
  25. package/src/inspect/live.ts +6 -1
  26. package/src/inspect/render.ts +8 -2
  27. package/src/inspect/replay.ts +6 -1
  28. package/src/inspect/types.ts +4 -1
  29. package/src/permissions/builtins.ts +22 -0
  30. package/src/permissions/grant.ts +92 -16
  31. package/src/permissions/index.ts +8 -2
  32. package/src/permissions/permissions.ts +16 -0
  33. package/src/permissions/resolve.ts +10 -0
  34. package/src/plugin/types.ts +12 -0
  35. package/src/role-claim/index.ts +1 -0
  36. package/src/role-claim/reload-after-claim.ts +34 -0
  37. package/src/run/channel-session-factory.ts +6 -1
  38. package/src/run/index.ts +18 -1
  39. package/src/sandbox/build.ts +51 -1
  40. package/src/sandbox/hidden-paths.ts +41 -0
  41. package/src/sandbox/index.ts +2 -1
  42. package/src/sandbox/policy.ts +15 -0
  43. package/src/skills/typeclaw-channel-github/SKILL.md +15 -3
  44. package/src/skills/typeclaw-permissions/SKILL.md +11 -3
  45. package/src/skills/typeclaw-skills/SKILL.md +3 -1
  46. package/src/skills/typeclaw-troubleshooting/SKILL.md +104 -0
  47. package/src/usage/report.ts +4 -0
  48. package/src/usage/scan.ts +1 -1
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "typeclaw",
3
- "version": "0.15.2",
3
+ "version": "0.17.0",
4
4
  "homepage": "https://github.com/typeclaw/typeclaw#readme",
5
5
  "bugs": {
6
6
  "url": "https://github.com/typeclaw/typeclaw/issues"
@@ -9,7 +9,7 @@ import { loadMemory } from '@/bundled-plugins/memory/load-memory'
9
9
  import type { ChannelRouter } from '@/channels/router'
10
10
  import { getConfig, resolveModel, resolveProfile } from '@/config'
11
11
  import { defaultThinkingLevelForRef, providerForModelRef, type KnownModelRef } from '@/config/providers'
12
- import type { PermissionService } from '@/permissions'
12
+ import type { PermissionService, RolesConfig } from '@/permissions'
13
13
  import type {
14
14
  BuiltinToolRef,
15
15
  HookBus,
@@ -50,6 +50,7 @@ import { createChannelFetchAttachmentTool } from './tools/channel-fetch-attachme
50
50
  import { createChannelHistoryTool } from './tools/channel-history'
51
51
  import { createChannelReplyTool } from './tools/channel-reply'
52
52
  import { createChannelSendTool } from './tools/channel-send'
53
+ import { createGrantRoleTool } from './tools/grant-role'
53
54
  import { createRestartTool } from './tools/restart'
54
55
  import { createSkipResponseTool } from './tools/skip-response'
55
56
  import { createSpawnSubagentTool } from './tools/spawn-subagent'
@@ -63,7 +64,7 @@ export type { SessionOrigin } from './session-origin'
63
64
 
64
65
  export type { AgentSession }
65
66
 
66
- export { renderTurnTimeAnchor } from './system-prompt'
67
+ export { renderTurnRoleAnchor, renderTurnTimeAnchor } from './system-prompt'
67
68
 
68
69
  type AgentSessionTools = NonNullable<Parameters<typeof createAgentSession>[0]>['tools']
69
70
 
@@ -141,6 +142,11 @@ export type CreateSessionOptions = {
141
142
  // prompt is not regenerated; see `typeclaw-permissions` skill for how the
142
143
  // agent should interpret the snapshot on later turns.
143
144
  permissions?: PermissionService
145
+ // Re-reads roles from disk for the grant_role tool's hot-reload after a match
146
+ // grant. Production threads a reload-then-read (reloadConfig + getConfig);
147
+ // must not be an in-memory snapshot or the grant reapplies stale roles.
148
+ // Omitted when no grant_role tool is wired (the tool requires permissions).
149
+ reloadRoles?: () => RolesConfig | undefined
144
150
  // Model profile name. Resolved against `config.models` to pick the concrete
145
151
  // model ref this session binds to. Unknown profile names fall back to
146
152
  // `default` with a one-time console warning. Omitted → `default`. Threaded
@@ -322,6 +328,12 @@ export async function createSessionWithDispose(options: CreateSessionOptions = {
322
328
  permissions: options.permissions,
323
329
  stream: options.stream,
324
330
  }),
331
+ ...buildRoleGrantTools({
332
+ agentDir: options.plugins?.agentDir,
333
+ getOrigin,
334
+ permissions: options.permissions,
335
+ reloadRoles: options.reloadRoles,
336
+ }),
325
337
  ]
326
338
  // Hook coverage for pi's builtin coding tools (read/bash/edit/write/grep/
327
339
  // find/ls) — pi 0.67.3 ignores `tools:` for implementation, so the only
@@ -335,6 +347,7 @@ export async function createSessionWithDispose(options: CreateSessionOptions = {
335
347
  sessionId: options.plugins.sessionId,
336
348
  hooks: options.plugins.hooks,
337
349
  getOrigin,
350
+ ...(options.permissions ? { permissions: options.permissions } : {}),
338
351
  })
339
352
  : []
340
353
  const wrappedCustomSystemTools = wrapSystemTools(customSystemTools, options.plugins, getOrigin)
@@ -576,6 +589,25 @@ export function buildSubagentOrchestrationTools(opts: {
576
589
  ]
577
590
  }
578
591
 
592
+ export function buildRoleGrantTools(opts: {
593
+ agentDir: string | undefined
594
+ getOrigin: () => SessionOrigin | undefined
595
+ permissions: PermissionService | undefined
596
+ reloadRoles: (() => RolesConfig | undefined) | undefined
597
+ }): ToolDefinition[] {
598
+ if (opts.agentDir === undefined || opts.permissions === undefined || opts.reloadRoles === undefined) {
599
+ return []
600
+ }
601
+ return [
602
+ createGrantRoleTool({
603
+ agentDir: opts.agentDir,
604
+ getOrigin: opts.getOrigin,
605
+ permissions: opts.permissions,
606
+ reloadRoles: opts.reloadRoles,
607
+ }),
608
+ ]
609
+ }
610
+
579
611
  function wrapRegistryTools(
580
612
  plugins: PluginSessionWiring | undefined,
581
613
  getOrigin: () => SessionOrigin | undefined,
@@ -711,6 +743,7 @@ export function deriveSystemPromptMode(origin: SessionOrigin | undefined): Syste
711
743
  return 'full'
712
744
  case 'cron':
713
745
  case 'subagent':
746
+ case 'system':
714
747
  return 'slim'
715
748
  default: {
716
749
  const _exhaustive: never = origin
@@ -20,6 +20,7 @@ import {
20
20
  checkNonWorkspaceWriteGuard,
21
21
  checkSkillAuthoringGuard,
22
22
  } from '@/bundled-plugins/guard/policy'
23
+ import type { PermissionService } from '@/permissions/permissions'
23
24
  import type {
24
25
  BuiltinToolRef,
25
26
  ContentPart,
@@ -30,6 +31,7 @@ import type {
30
31
  ToolContext,
31
32
  ToolResult,
32
33
  } from '@/plugin'
34
+ import { buildSandboxedCommand, ensureBwrapAvailable, resolveHiddenPaths } from '@/sandbox'
33
35
 
34
36
  import { createLoopGuard, type LoopGuard } from './loop-guard'
35
37
  import { checkImageReadRedirect } from './multimodal/read-redirect'
@@ -134,6 +136,11 @@ export type WrapSystemToolOptions = {
134
136
  sessionId: string
135
137
  hooks: HookBus
136
138
  getOrigin?: () => SessionOrigin | undefined
139
+ // When present, the bash builtin is rewritten through the per-tool bwrap
140
+ // sandbox with role-derived path masks. Absent (or no masks for the role)
141
+ // runs bash unchanged — preserving today's behavior for trusted+ and for
142
+ // sessions wired without a permission service (e.g. tests).
143
+ permissions?: PermissionService
137
144
  }
138
145
 
139
146
  // Zod 4 emits a top-level `"$schema": "https://json-schema.org/draft/2020-12/schema"`
@@ -393,6 +400,10 @@ export function wrapAgentToolAsCustomToolDefinition<TParams extends TSchema, TDe
393
400
  }
394
401
  stripGuardAcknowledgements(mutableArgs)
395
402
 
403
+ if (tool.name === 'bash' && opts.permissions !== undefined) {
404
+ await applyBashSandbox(mutableArgs, opts.permissions, liveOrigin, opts.agentDir)
405
+ }
406
+
396
407
  const result = await tool.execute(toolCallId, mutableArgs as Static<TParams>, signal, onUpdate)
397
408
  const hookResult: ToolResult = {
398
409
  content: result.content as ContentPart[],
@@ -425,6 +436,33 @@ export function buildBuiltinPiToolOverrides(opts: WrapSystemToolOptions): ToolDe
425
436
  return defaultBuiltinPiAgentTools().map((tool) => wrapAgentToolAsCustomToolDefinition(tool, opts))
426
437
  }
427
438
 
439
+ // Rewrites mutableArgs.command in place so the bash builtin runs inside bwrap
440
+ // with role-derived path masks. A role that sees everything (trusted+) yields
441
+ // no masks and runs unchanged. When masks ARE needed but bwrap is unavailable
442
+ // we throw rather than run unsandboxed — fail closed, never leak the masked
443
+ // surface. Runs after the tool.before guards have inspected the raw command.
444
+ async function applyBashSandbox(
445
+ mutableArgs: Record<string, unknown>,
446
+ permissions: PermissionService,
447
+ origin: SessionOrigin | undefined,
448
+ agentDir: string,
449
+ ): Promise<void> {
450
+ const command = mutableArgs.command
451
+ if (typeof command !== 'string') return
452
+
453
+ const { dirs, files } = resolveHiddenPaths(permissions, origin, agentDir)
454
+ if (dirs.length === 0 && files.length === 0) return
455
+
456
+ await ensureBwrapAvailable()
457
+ const { commandString } = buildSandboxedCommand(command, {
458
+ mounts: [{ type: 'bind', source: agentDir, dest: agentDir }],
459
+ masks: { dirs, files },
460
+ network: 'inherit',
461
+ cwd: agentDir,
462
+ })
463
+ mutableArgs.command = commandString
464
+ }
465
+
428
466
  function appendLoopWarning(result: ToolResult, message: string): ToolResult {
429
467
  const content: ContentPart[] = [...(result.content as ContentPart[]), { type: 'text', text: message }]
430
468
  return { content, details: result.details }
@@ -28,10 +28,12 @@ export type MinimalSessionOrigin =
28
28
  thread: string | null
29
29
  }
30
30
  | { kind: 'subagent'; subagent: string; parentSessionId: string }
31
+ | { kind: 'system'; component: string }
31
32
 
32
33
  // Reduce a full SessionOrigin to the minimum projection persisted to disk.
33
- // Drops participant lists, membership counts, recursive provenance, and
34
- // author identifiers none of which `typeclaw usage` reads, and all of
34
+ // Drops participant lists, membership counts, recursive provenance (including
35
+ // the system origin's `triggeredBy`, which can carry channel author identity),
36
+ // and author identifiers — none of which `typeclaw usage` reads, and all of
35
37
  // which would otherwise land in git history when sessions/ is auto-backed-up.
36
38
  // Kept as a separate function so the boundary between "data the LLM sees in
37
39
  // the system prompt" (full origin) and "data persisted for usage reporting"
@@ -58,5 +60,7 @@ function minimalOrigin(origin: SessionOrigin): MinimalSessionOrigin {
58
60
  }
59
61
  case 'subagent':
60
62
  return { kind: 'subagent', subagent: origin.subagent, parentSessionId: origin.parentSessionId }
63
+ case 'system':
64
+ return { kind: 'system', component: origin.component }
61
65
  }
62
66
  }
@@ -48,6 +48,23 @@ export type SessionOrigin =
48
48
  spawnedByRole?: string
49
49
  spawnedByOrigin?: SessionOrigin
50
50
  }
51
+ // Runtime-owned infrastructure operating over TypeClaw's own state (memory
52
+ // logging/retrieval, backup), NOT user-delegated work. It resolves to `owner`
53
+ // because it acts on the operator's behalf over operator-owned files, with no
54
+ // single user session to inherit authority from — inheriting the triggering
55
+ // turn's role (e.g. a guest channel turn) would wrongly classify TypeClaw
56
+ // infrastructure as the guest actor and block its legitimate sessions//memory/
57
+ // access. `triggeredBy` keeps honest provenance — "a guest turn triggered the
58
+ // memory-logger" — without the synthetic-TUI lie. This kind is only ever
59
+ // constructed by runtime/bundled code; inbound channel/cron content can never
60
+ // produce it (those origins come from the runtime, not from message text), so
61
+ // it is not a role-laundering vector.
62
+ | {
63
+ kind: 'system'
64
+ component: string
65
+ reason?: string
66
+ triggeredBy?: SessionOrigin
67
+ }
51
68
 
52
69
  export const PARTICIPANTS_TOP_K = 10
53
70
  export const PARTICIPANTS_MAX_AGE_MS = 7 * 24 * 60 * 60 * 1000
@@ -94,11 +111,12 @@ function getPlatformInfo(adapter: AdapterId): PlatformInfo {
94
111
  // TUI is always `owner` by construction — annotating it would add noise to
95
112
  // every interactive session for zero new information.
96
113
  //
97
- // For channel sessions this is a session-creation snapshot. The router
98
- // re-resolves per-turn for tool gating, but the system prompt is not
99
- // regenerated mid-session; the role line is accurate at admission and the
100
- // `typeclaw-permissions` skill spells out how to interpret it on later
101
- // turns when a different speaker may have spoken last.
114
+ // Channel origins do NOT render this concrete role. A channel session is
115
+ // keyed by chat/thread, so the opener's role is wrong for every later
116
+ // speaker and printing their permission list leaks it into shared context.
117
+ // Channel origins render renderChannelRolePolicy() instead, and the
118
+ // authoritative per-turn role rides in the non-cacheable `<your-role>`
119
+ // turn anchor (renderTurnRoleAnchor in system-prompt.ts).
102
120
  export type SessionRoleContext = {
103
121
  role: string
104
122
  permissions: readonly string[]
@@ -111,19 +129,22 @@ export function renderSessionOrigin(
111
129
  ): string {
112
130
  switch (origin.kind) {
113
131
  case 'tui':
114
- return withRoleContext(renderTuiOrigin(), roleContext)
132
+ return withRoleContext(renderTuiOrigin(), roleContext, origin.kind)
115
133
  case 'cron':
116
- return withRoleContext(renderCronOrigin(origin), roleContext)
134
+ return withRoleContext(renderCronOrigin(origin), roleContext, origin.kind)
117
135
  case 'channel':
118
- return withRoleContext(renderChannelOrigin(origin, now), roleContext)
136
+ return withRoleContext(renderChannelOrigin(origin, now), roleContext, origin.kind)
119
137
  case 'subagent':
120
- return withRoleContext(renderSubagentOrigin(origin), roleContext)
138
+ return withRoleContext(renderSubagentOrigin(origin), roleContext, origin.kind)
139
+ case 'system':
140
+ return withRoleContext(renderSystemOrigin(origin), roleContext, origin.kind)
121
141
  }
122
142
  }
123
143
 
124
- function withRoleContext(block: string, ctx: SessionRoleContext | undefined): string {
144
+ function withRoleContext(block: string, ctx: SessionRoleContext | undefined, kind: SessionOrigin['kind']): string {
125
145
  if (ctx === undefined) return block
126
- return `${block}\n\n${renderRoleContext(ctx)}`
146
+ const roleBlock = kind === 'channel' ? renderChannelRolePolicy() : renderRoleContext(ctx)
147
+ return `${block}\n\n${roleBlock}`
127
148
  }
128
149
 
129
150
  function renderRoleContext(ctx: SessionRoleContext): string {
@@ -141,6 +162,30 @@ function renderRoleContext(ctx: SessionRoleContext): string {
141
162
  ].join('\n')
142
163
  }
143
164
 
165
+ // Channel sessions are keyed by chat/thread, not by author: one session can see
166
+ // many speakers with different roles. Rendering the opener's concrete role here
167
+ // would (1) be wrong for every later speaker and (2) leak the opener's full
168
+ // permission list into shared context. So channel origins get a cache-stable
169
+ // policy instead of a resolved identity; the authoritative per-turn role rides
170
+ // in the non-cacheable `<your-role>` turn anchor.
171
+ function renderChannelRolePolicy(): string {
172
+ return [
173
+ '## Your role in this session',
174
+ '',
175
+ 'This is a channel conversation that may include multiple speakers. Do not',
176
+ 'assume one speaker’s role applies to later messages. For each user turn the',
177
+ 'current speaker’s effective role is provided in the turn context as a',
178
+ '`<your-role>` tag; that per-turn role is authoritative for the current',
179
+ 'message and overrides any role implied by session-opening context. An absent',
180
+ '`<your-role>` tag means the current speaker is the unconstrained default.',
181
+ '',
182
+ 'Tool calls and channel admission are gated by the current speaker’s',
183
+ 'permissions; a `blocked:` or "denied by permissions" message means that',
184
+ 'speaker lacks the permission the guard wanted. See the',
185
+ '`typeclaw-permissions` skill for what each role can do.',
186
+ ].join('\n')
187
+ }
188
+
144
189
  function renderTuiOrigin(): string {
145
190
  return [
146
191
  '## Session origin',
@@ -167,6 +212,34 @@ function renderCronOrigin(origin: { jobId: string; jobKind: 'prompt' | 'exec' |
167
212
  ].join('\n')
168
213
  }
169
214
 
215
+ function renderSystemOrigin(origin: { component: string; reason?: string; triggeredBy?: SessionOrigin }): string {
216
+ const lines = [
217
+ '## Session origin',
218
+ '',
219
+ `You are the \`${origin.component}\` system process — TypeClaw-owned`,
220
+ "infrastructure operating over the agent folder on the operator's behalf,",
221
+ 'not a user-delegated task. Do exactly the job described and exit.',
222
+ ]
223
+ if (origin.reason !== undefined) lines.push('', `Reason: ${origin.reason}`)
224
+ if (origin.triggeredBy !== undefined) lines.push('', `Triggered by: ${describeTrigger(origin.triggeredBy)}`)
225
+ return lines.join('\n')
226
+ }
227
+
228
+ function describeTrigger(origin: SessionOrigin): string {
229
+ switch (origin.kind) {
230
+ case 'tui':
231
+ return 'a TUI session'
232
+ case 'cron':
233
+ return `cron job \`${origin.jobId}\``
234
+ case 'channel':
235
+ return `a ${getPlatformInfo(origin.adapter).displayName} channel turn`
236
+ case 'subagent':
237
+ return `the \`${origin.subagent}\` subagent`
238
+ case 'system':
239
+ return `the \`${origin.component}\` system process`
240
+ }
241
+ }
242
+
170
243
  function renderSubagentOrigin(origin: { subagent: string; parentSessionId: string }): string {
171
244
  return [
172
245
  '## Session origin',
@@ -215,6 +288,22 @@ function renderChannelOrigin(
215
288
  'is a tool call. Plain-text output is invisible.',
216
289
  ]
217
290
 
291
+ // GitHub has no separate "chat" surface — channel_reply IS a public comment
292
+ // on this PR/issue. Without saying so, models default to the Slack-style
293
+ // two-surface split and post operator-facing meta-commentary ("Posted review
294
+ // result for PR #511") straight into the PR thread, where it reads absurdly.
295
+ if (origin.adapter === 'github') {
296
+ lines.push(
297
+ '',
298
+ '**`channel_reply` posts a public comment directly on this PR/issue.** It',
299
+ 'is not a side-report to an operator — the reply lands in this exact',
300
+ 'thread, read by everyone on the PR. Write the substance for that',
301
+ 'audience: post the answer (or review summary) itself, never a status',
302
+ 'line about having posted it elsewhere. A narrated "Posted review result',
303
+ 'for PR #N: …" inside the PR is exactly the failure to avoid.',
304
+ )
305
+ }
306
+
218
307
  const conversationLine = renderConversationLine(origin)
219
308
  if (conversationLine !== null) lines.push('', conversationLine)
220
309
 
@@ -244,10 +333,18 @@ function renderChannelOrigin(
244
333
  ' have no reason worth recording. Any other visible text without a',
245
334
  ' channel tool call is blocked.',
246
335
  '',
336
+ '**Every user-facing sentence goes through `channel_reply`.** Narrating in',
337
+ 'plain text — "bumping to 16x now", "let me check that" — does NOT reach the',
338
+ 'user; it is invisible. If you want the user to see it, it is a',
339
+ '`channel_reply` call, not narration. This includes acks.',
340
+ '',
247
341
  '**One substantive reply per inbound.** If the answer needs more than one',
248
- 'tool call, send a one-line ack first ("On it."), keep working, then send',
249
- 'the answer both in the same turn. The ack is not your reply; the answer',
250
- 'is. Once the answer lands, end your turn.',
342
+ 'tool call, send a one-line ack first via `channel_reply({ text: "On it.",',
343
+ 'continue: true })`, keep working, then send the answer with a final',
344
+ '`channel_reply`. The ack is not your reply; the answer is. Once the answer',
345
+ 'lands, end your turn. The `continue: true` is not optional on that ack:',
346
+ 'without it the turn ends the instant the ack lands and the rest of your',
347
+ 'work — the fetch, the subagent, the actual answer — is silently dropped.',
251
348
  '',
252
349
  '**Backgrounded work does not end the obligation.** If you spawn a',
253
350
  'subagent with `run_in_background: true` to answer the current inbound,',
@@ -455,7 +455,12 @@ function parseSpawnedByOriginJson(
455
455
  return parsed
456
456
  }
457
457
 
458
- const SESSION_ORIGIN_KINDS = new Set(['tui', 'cron', 'channel', 'subagent'])
458
+ // Must list EVERY SessionOrigin discriminator. `system` is included so a
459
+ // streamed memory/backup spawn (whose spawnedByOrigin is serialized to JSON
460
+ // and re-parsed here) keeps its owner-resolving origin instead of being
461
+ // dropped and silently demoted to guest — the exact regression the system
462
+ // origin exists to prevent. Keep in sync with the SessionOrigin union.
463
+ const SESSION_ORIGIN_KINDS = new Set(['tui', 'cron', 'channel', 'subagent', 'system'])
459
464
  function isSessionOriginShape(value: unknown): value is SessionOrigin {
460
465
  if (value === null || typeof value !== 'object') return false
461
466
  const kind = (value as { kind?: unknown }).kind
@@ -27,6 +27,7 @@ When in doubt between SOUL.md and AGENTS.md: if it describes *how you sound*, it
27
27
  ## Your workspace
28
28
 
29
29
  - **\`workspace/\`** — your free-write zone for drafts, scratch work, generated artifacts. Do not create files at the agent-folder root unless the user explicitly asks.
30
+ - **\`public/\`** — the guest-visible zone. Untrusted callers (the \`guest\` role) cannot see \`workspace/\`, but they can read and write \`public/\`. Put anything meant to be shared with an untrusted caller here. If a \`<your-role>\` tag on the turn names a non-trusted role, or a write to \`workspace/\` comes back \`denied by permissions\`, the caller is untrusted — write to \`public/\` instead.
30
31
  - **\`sessions/\`** — transcripts of past conversations. Runtime-managed; don't write here.
31
32
  - **\`memory/streams/\`** *(not injected — reach via \`memory_search\`)* — dated streams written by the memory-logger between sessions. Runtime-owned. Undreamed observations are searchable on demand instead of injected into every prompt.
32
33
  - **\`memory/skills/\`** — muscle-memory skills written by the dreaming subagent. Auto-loaded; don't write here directly.
@@ -45,6 +46,17 @@ When the user gives you work, start doing it in the same turn — a real action,
45
46
 
46
47
  Do not narrate routine, low-risk tool calls. Just call the tool. Narrate only when it helps: multi-step work, risky actions (deletions, external sends, irreversible changes), or when the user asks.
47
48
 
49
+ ## Long-running and interactive shell work
50
+
51
+ Foreground \`bash\` blocks your turn until exit, so a command that runs for minutes or waits for input (dev server, REPL, watcher, \`docker compose up\`, interactive installer) freezes the conversation. \`tmux\` is in the container — run such programs detached so your turn stays free:
52
+
53
+ - Start: \`tmux new-session -d -s <name> "<cmd>"\`
54
+ - Observe: \`tmux capture-pane -t <name> -p\` (poll across turns, don't block)
55
+ - Drive: \`tmux send-keys -t <name> "<input>" Enter\` (control keys too, e.g. \`C-c\`)
56
+ - Stop: \`tmux kill-session -t <name>\`
57
+
58
+ Use this only when the work belongs in *your* session. For self-contained long work (build, test suite, install, batch) whose result is all you need, delegate to \`operator\` instead.
59
+
48
60
  ## Version control
49
61
 
50
62
  Your agent folder is a git repository.
@@ -64,35 +76,17 @@ Your agent folder is a git repository.
64
76
 
65
77
  ## Subagent orchestration
66
78
 
67
- You can delegate focused work to subagents via three tools: \`spawn_subagent\`, \`subagent_output\`, \`subagent_cancel\`. Subagents run with their own context window and their own (often smaller, cheaper, or more constrained) tool set. The list of available subagents and what each one is for is rendered in the \`spawn_subagent\` tool description — re-read it before delegating.
68
-
69
- There are two delegation modes. Pick deliberately.
70
-
71
- **Mode A — Research fan-out** (in service of the current question)
72
-
73
- When you need information to answer the user and the search is broad, fire 2-5 subagents in parallel with \`run_in_background: true\` covering different angles. End your response after spawning. The system will deliver a \`<system-reminder>\` for each completion; then call \`subagent_output\` once per task_id to fetch the result and answer the user. \`subagent_output\` always returns immediately with a snapshot — it does not block.
74
-
75
- The bundled \`explorer\` subagent is the right tool for **local** reconnaissance — anything reachable on the agent's filesystem: code, past sessions (\`sessions/*.jsonl\`), memory topic shards and daily memory streams, skills, cron jobs, config, git history, mounts, channels state. It is read-only and runs on a fast/cheap model, so fire liberally. Do NOT ask it to plan, decide, or write code — it finds and reports.
76
-
77
- The bundled \`scout\` subagent is its external counterpart — web research only. Use it when you need information from public sources (docs, library references, vendor changelogs, news, anything not already in this agent's folder). Scout runs \`websearch\` and \`webfetch\` in a fresh context window so the search churn does not pollute yours; it returns a citation-backed answer with a confidence rating. Prefer scout over running \`websearch\`/\`webfetch\` yourself when the research is non-trivial (more than 1-2 queries) or when you want to save your context for the synthesis step.
79
+ Delegate focused work to subagents via \`spawn_subagent\`, \`subagent_output\`, \`subagent_cancel\`. Each runs in its own context window with its own tool set. The available subagents and their purpose are listed in the \`spawn_subagent\` tool description — re-read it before delegating. Briefly: \`explorer\` (read-only local recon — code, sessions, memory, git, config; fire liberally), \`scout\` (web research in a fresh context), \`reviewer\` (deep read-only code/PR/plan review, returns a structured verdict; it does NOT post), \`operator\` (write-capable: bash-with-side-effects, write, edit — for browser sessions, refactors, deploys, batch ops, and Claude Code / Codex CLI driving; gated by \`subagent.spawn.operator\`, owner/trusted only — on denial, do the work yourself).
78
80
 
79
- The bundled \`reviewer\` subagent is for **deep read-only analysis** — code review, PR review, plan review, design review, docs review. It runs on the \`deep\` profile (falls back to \`default\` if \`models.deep\` is unconfigured) so it can spend tokens on careful reasoning. It has the read-only filesystem tools, \`bash\` (for \`gh pr diff\`, \`git log\`, \`git diff\`, \`gh api -X GET\`, etc.), and the web tools (for verifying claims against OWASP, RFCs, library docs). It returns a structured \`<review>\` block with findings (severity \`blocker\`/\`concern\`/\`nit\`/\`praise\`, evidence quotes, suggestions) and a verdict (\`approve\`/\`request-changes\`/\`comment\`). Reviewer does NOT post — when reviewing a PR for a channel that wants comments posted, YOU translate its findings into \`gh api\` review-comment payloads and post them yourself. Use reviewer instead of doing review work in your own session whenever the target is non-trivial: a single-file lookup or a one-paragraph sanity check stays with you; a real PR, a multi-page design doc, a non-trivial plan — delegate.
81
+ There are three delegation modes. Pick deliberately.
80
82
 
81
- **Mode BDelegate-and-converse** (the user asked you to DO something long-running)
83
+ **Mode AResearch fan-out.** Need information and the search is broad? Fire 2-5 subagents (usually \`explorer\`/\`scout\`) in parallel with \`run_in_background: true\`, then end your response. A \`<system-reminder>\` lands per completion; call \`subagent_output\` once per task_id to collect (it never blocks) and answer.
82
84
 
83
- When the user hands you a task that will take minutes (a multi-step browser session, a long build, a complex external operation), acknowledge in plain language ("Alright, running that in the background I'll let you know when it's done"), spawn one subagent with \`run_in_background: true\`, then KEEP TALKING. Stay available for follow-ups, related questions, parallel small tasks. When the completion reminder lands, weave the result into your next reply naturally. If the conversation has gone idle, proactively message the user with the result rather than waiting.
85
+ **Mode B Delegate-and-converse.** Asked to DO something long-running (>~30s: installs, builds, \`docker\`, scrapes, long test suites, multi-host loops, any noisy "fetch N and synthesize" chain)? Don't run it inline blocking your own \`bash\` freezes the conversation and stalls the channel typing heartbeat (\`MAX_TYPING_HEARTBEAT_MS\`). Spawn one subagent (\`operator\` for side effects, \`scout\` for research) with \`run_in_background: true\`, acknowledge, and KEEP TALKING. Single fast calls (\`git status\`, one known-endpoint \`curl\`) stay inline. When the completion reminder lands, weave the result in; in a channel session, the completion \`<system-reminder>\` is NOT a user message but plain text is still invisible — Surface the result via \`channel_reply\` (or \`channel_send\`). If you already posted the substantive answer in the spawn turn, prefer \`skip_response({ reason: "result confirms prior reply" })\` over going silent.
84
86
 
85
- **Concrete threshold: ~30 seconds.** If you expect a tool call to take longer than that, delegate. While your own \`bash\` is blocked, you cannot reply, the channel typing indicator cannot heartbeat past silent stretches (it caps after a couple of minutes of no tool activity by design see \`MAX_TYPING_HEARTBEAT_MS\`), and the user sees a frozen-looking conversation. Specifically: do NOT run \`npm install\`, \`bun install\`, \`docker build\`, \`docker compose up\`, multi-target \`curl\` probes, headed-browser scrapes, WebSocket/CDP captures, long \`pytest\`/\`npm test\` suites, or any "do N requests across hosts" loop in your own session delegate every one of those to \`operator\`. Single fast \`bash\` calls (a \`git status\`, a \`ls\`, a one-shot \`curl\` against a known endpoint) stay in your session; that's not what this rule is targeting.
87
+ **Mode C Troubleshooting.** Stuck in a fix-it loop ~3 non-converging attempts at the same failure, cycling kill/re-run/\`sleep\`/capture/retry? Stop (the trigger is non-convergence, not elapsed timethis overrides the ~30s rule). Hand the whole debugging loop to \`operator\` with \`run_in_background: true\` symptom, what you tried, success conditionand stay responsive. Read the \`typeclaw-troubleshooting\` skill for the mechanics before you spawn.
86
88
 
87
- In a channel session, the completion \`<system-reminder>\` is NOT a user message — the channel origin's "you MUST call \`channel_reply\` for every user message" rule does not literally apply, but the underlying constraint does: plain-text output is invisible in a channel. Surface the result via \`channel_reply\` (or \`channel_send\`) so the user actually sees it. Failures need surfacing too: when a delegated task didn't complete, the user needs the outcome and whatever partial progress you got. Skipping the reply is legal only when the user has already seen the substantive answer — typically because you posted it via \`channel_reply\` in the same turn that spawned the subagent, and the reminder is purely confirming completion of a step the user is already tracking. In that case, prefer \`skip_response({ reason: "result confirms prior reply" })\` over the \`NO_REPLY\` text sentinel the structured tool records why, so the operator can audit silent post-completion turns. Otherwise, post the result.
88
-
89
- Before you run a tool chain that returns bulky intermediate output you won't need again — multiple \`webfetch\` calls, a \`websearch\` round you'll iterate on, a \`bash\` command that scrapes a site or dumps a large response, an \`agent-browser\` session, a \`claude\` (Claude Code) or \`codex\` (OpenAI Codex CLI) delegation driven through tmux, any "fetch N things and synthesize" loop — delegate it to a subagent. \`scout\` (for research) or \`operator\` (for actions with side effects) runs the noisy work in its own context window and returns a distilled summary; your session carries the *answer*, not the raw material you derived it from. This is about context economy, not latency: even a fast operation belongs in a subagent when the byproducts are large and disposable (three quick news searches across different outlets still dumps three SERPs and three article bodies into your context forever). The exception is exactly one call whose result you'll cite directly — one \`webfetch\` of a known URL, one \`websearch\` query whose top result is the answer. Two of either, or any "across multiple sources" framing, is delegation territory.
90
-
91
- The bundled \`operator\` subagent is the right tool for this mode. It is write-capable (read, write, edit, bash with side effects) and runs on the default model. Use it for: browser sessions, multi-file refactors, deploys, batch API calls, Claude Code or Codex CLI delegations (the tmux driving loop, the multi-turn polling, the worktree teardown — all of it inside operator), anything that involves taking action on behalf of the user over multiple steps. The operator returns a structured final report (outcome, what changed, what was observed); surface it naturally rather than copy-pasting. Operator is gated by a separate permission (\`subagent.spawn.operator\`) so write-capable spawns are restricted to owner-tier and trusted-tier callers — if the gate denies, fall back to doing the work in your own session rather than reporting failure to the user.
92
-
93
- **Status queries**
94
-
95
- If the user asks "how's it going?" or "status?" on a running subagent, call \`subagent_output({ task_id })\` and report the \`status_summary\` in your own words. Don't pretend to know the status without checking.
89
+ **Status queries.** If the user asks "status?" on a running subagent, call \`subagent_output({ task_id })\` and report its \`status_summary\` — don't guess.
96
90
 
97
91
  **Prompt structure for spawns** (mandatory — the subagent does not see this conversation)
98
92
 
@@ -102,13 +96,7 @@ If the user asks "how's it going?" or "status?" on a running subagent, call \`su
102
96
  [REQUEST]: Concrete instructions — what to find/do/produce, what format, what to SKIP.
103
97
  \`\`\`
104
98
 
105
- **Anti-patterns**
106
-
107
- - Don't fire more than 5 subagents in a single turn.
108
- - Don't spawn for a known answer or single-file lookup — do it yourself.
109
- - Don't call \`subagent_output\` in a loop waiting for completion; end your response and the reminder will wake you, then fetch the result once.
110
- - Don't ask a research subagent to make architectural decisions for you — they find and report; you decide.
111
- - Subagents cannot recursively spawn other subagents.
99
+ **Anti-patterns.** Don't fire more than 5 subagents per turn, spawn for a known answer or single-file lookup, poll \`subagent_output\` in a loop (end your turn; the reminder wakes you), or ask a research subagent to make decisions — they find and report, you decide. Subagents cannot recursively spawn subagents.
112
100
 
113
101
  ## Safety
114
102
 
@@ -167,6 +155,27 @@ export function renderTurnTimeAnchor(now: Date = new Date()): string {
167
155
  return `<current-time>${iso} (${zone}, ${weekday})</current-time>`
168
156
  }
169
157
 
158
+ // Live role anchor injected into the **user turn**, not the system prompt —
159
+ // same rationale and cache properties as renderTurnTimeAnchor above.
160
+ //
161
+ // The "## Your role in this session" block in the system prompt is a
162
+ // session-CREATION snapshot: in a channel where speakers change turn to turn,
163
+ // it reports the role of whoever first opened the session, not whoever is
164
+ // speaking now. Tool gating already re-resolves the live role per turn (the
165
+ // router updates `originRef` before each prompt), but the model never saw that
166
+ // value — so it could not, for example, route output to `public/` for a guest.
167
+ // This anchor surfaces the per-turn resolved role in the one place that costs
168
+ // zero cached bytes (the non-cacheable user-turn suffix).
169
+ //
170
+ // Omitted for `owner`: owner is the unconstrained default, an absent tag means
171
+ // "no special handling", and emitting it on every interactive turn would be
172
+ // pure token overhead. This mirrors resolveRoleContext skipping the session
173
+ // block for a TUI owner.
174
+ export function renderTurnRoleAnchor(role: string): string | undefined {
175
+ if (role === 'owner') return undefined
176
+ return `<your-role authority="current-speaker">${role}</your-role> (authoritative for this message; overrides any role implied by the system prompt)`
177
+ }
178
+
170
179
  // Compact replacement for DEFAULT_SYSTEM_PROMPT, used by non-interactive
171
180
  // sessions (cron jobs, and default subagents that don't supply their own
172
181
  // `systemPromptOverride`). The full prompt is ~2155 tokens of operator-facing
@@ -207,6 +216,6 @@ Never suppress errors to make things "work", and never fabricate results. If som
207
216
 
208
217
  Do not narrate routine, low-risk tool calls — just call the tool. Do not over-explain what you did unless asked.
209
218
 
210
- Your free-write zone is \`workspace/\`. Do not create files at the root of the agent folder unless the prompt names another path. Do not edit \`memory/topics/\` directly — the dreaming subagent owns it; to capture something memorable, surface it in your reply or let the memory-logger append to \`memory/streams/\`. Never stage or commit \`secrets.json\`, \`.env\`, \`sessions/\`, \`memory/\`, or \`workspace/\` — those are runtime- or user-managed.
219
+ Your free-write zone is \`workspace/\`. Do not create files at the root of the agent folder unless the prompt names another path. \`public/\` is the guest-visible zone — write there anything meant to be shared with an untrusted caller (a \`guest\`-role turn cannot read \`workspace/\` but can read \`public/\`). Do not edit \`memory/topics/\` directly — the dreaming subagent owns it; to capture something memorable, surface it in your reply or let the memory-logger append to \`memory/streams/\`. Never stage or commit \`secrets.json\`, \`.env\`, \`sessions/\`, \`memory/\`, or \`workspace/\` — those are runtime- or user-managed.
211
220
 
212
221
  See the session-origin block below for what kind of session this is and what's expected of you.`
@@ -74,8 +74,9 @@ export function createChannelReplyTool({
74
74
  continue: Type.Optional(
75
75
  Type.Boolean({
76
76
  description:
77
- 'Set `true` ONLY when this reply is a mid-turn status update (e.g. "working on it…") and you still have work to do THIS turn — fetching data, running a tool, spawning a subagent, then replying again. ' +
78
- 'A normal reply omits this: by default a successful reply ends the turn (no wasted follow-up LLM call). ' +
77
+ 'Set `true` when this reply is a mid-turn status update (e.g. "working on it…") and you still have work to do THIS turn — fetching data, running a tool, spawning a subagent, then replying again. ' +
78
+ 'Omitting it on such an ack silently truncates the turn: a successful reply ends the turn by default, so the fetch/subagent/answer you intended to do next never runs. ' +
79
+ 'A normal final reply omits this (no wasted follow-up LLM call). ' +
79
80
  'Do not set it just to seem responsive; only when genuine multi-step work follows in the same turn.',
80
81
  }),
81
82
  ),