typeclaw 0.15.2 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agent/index.ts +35 -2
- package/src/agent/plugin-tools.ts +38 -0
- package/src/agent/session-meta.ts +6 -2
- package/src/agent/session-origin.ts +111 -14
- package/src/agent/subagents.ts +6 -1
- package/src/agent/system-prompt.ts +41 -32
- package/src/agent/tools/channel-reply.ts +3 -2
- package/src/agent/tools/grant-role.ts +214 -0
- package/src/bundled-plugins/guard/policies/non-workspace-write.ts +8 -6
- package/src/bundled-plugins/memory/index.ts +25 -6
- package/src/bundled-plugins/security/index.ts +12 -0
- package/src/bundled-plugins/security/policies/private-surface-read.ts +215 -0
- package/src/channels/adapters/github/inbound.ts +54 -1
- package/src/channels/adapters/github/index.ts +1 -0
- package/src/channels/router.ts +150 -37
- package/src/cli/inspect.ts +20 -9
- package/src/cli/role.ts +10 -1
- package/src/cli/ui.ts +6 -4
- package/src/config/reloadable.ts +10 -3
- package/src/init/index.ts +24 -42
- package/src/init/paths.ts +1 -0
- package/src/init/run-owner-claim.ts +21 -3
- package/src/inspect/label.ts +2 -0
- package/src/inspect/live.ts +6 -1
- package/src/inspect/render.ts +8 -2
- package/src/inspect/replay.ts +6 -1
- package/src/inspect/types.ts +4 -1
- package/src/permissions/builtins.ts +22 -0
- package/src/permissions/grant.ts +92 -16
- package/src/permissions/index.ts +8 -2
- package/src/permissions/permissions.ts +16 -0
- package/src/permissions/resolve.ts +10 -0
- package/src/plugin/types.ts +12 -0
- package/src/role-claim/index.ts +1 -0
- package/src/role-claim/reload-after-claim.ts +34 -0
- package/src/run/channel-session-factory.ts +6 -1
- package/src/run/index.ts +18 -1
- package/src/sandbox/build.ts +51 -1
- package/src/sandbox/hidden-paths.ts +41 -0
- package/src/sandbox/index.ts +2 -1
- package/src/sandbox/policy.ts +15 -0
- package/src/skills/typeclaw-channel-github/SKILL.md +15 -3
- package/src/skills/typeclaw-permissions/SKILL.md +11 -3
- package/src/skills/typeclaw-skills/SKILL.md +3 -1
- package/src/skills/typeclaw-troubleshooting/SKILL.md +104 -0
- package/src/usage/report.ts +4 -0
- package/src/usage/scan.ts +1 -1
package/package.json
CHANGED
package/src/agent/index.ts
CHANGED
|
@@ -9,7 +9,7 @@ import { loadMemory } from '@/bundled-plugins/memory/load-memory'
|
|
|
9
9
|
import type { ChannelRouter } from '@/channels/router'
|
|
10
10
|
import { getConfig, resolveModel, resolveProfile } from '@/config'
|
|
11
11
|
import { defaultThinkingLevelForRef, providerForModelRef, type KnownModelRef } from '@/config/providers'
|
|
12
|
-
import type { PermissionService } from '@/permissions'
|
|
12
|
+
import type { PermissionService, RolesConfig } from '@/permissions'
|
|
13
13
|
import type {
|
|
14
14
|
BuiltinToolRef,
|
|
15
15
|
HookBus,
|
|
@@ -50,6 +50,7 @@ import { createChannelFetchAttachmentTool } from './tools/channel-fetch-attachme
|
|
|
50
50
|
import { createChannelHistoryTool } from './tools/channel-history'
|
|
51
51
|
import { createChannelReplyTool } from './tools/channel-reply'
|
|
52
52
|
import { createChannelSendTool } from './tools/channel-send'
|
|
53
|
+
import { createGrantRoleTool } from './tools/grant-role'
|
|
53
54
|
import { createRestartTool } from './tools/restart'
|
|
54
55
|
import { createSkipResponseTool } from './tools/skip-response'
|
|
55
56
|
import { createSpawnSubagentTool } from './tools/spawn-subagent'
|
|
@@ -63,7 +64,7 @@ export type { SessionOrigin } from './session-origin'
|
|
|
63
64
|
|
|
64
65
|
export type { AgentSession }
|
|
65
66
|
|
|
66
|
-
export { renderTurnTimeAnchor } from './system-prompt'
|
|
67
|
+
export { renderTurnRoleAnchor, renderTurnTimeAnchor } from './system-prompt'
|
|
67
68
|
|
|
68
69
|
type AgentSessionTools = NonNullable<Parameters<typeof createAgentSession>[0]>['tools']
|
|
69
70
|
|
|
@@ -141,6 +142,11 @@ export type CreateSessionOptions = {
|
|
|
141
142
|
// prompt is not regenerated; see `typeclaw-permissions` skill for how the
|
|
142
143
|
// agent should interpret the snapshot on later turns.
|
|
143
144
|
permissions?: PermissionService
|
|
145
|
+
// Re-reads roles from disk for the grant_role tool's hot-reload after a match
|
|
146
|
+
// grant. Production threads a reload-then-read (reloadConfig + getConfig);
|
|
147
|
+
// must not be an in-memory snapshot or the grant reapplies stale roles.
|
|
148
|
+
// Omitted when no grant_role tool is wired (the tool requires permissions).
|
|
149
|
+
reloadRoles?: () => RolesConfig | undefined
|
|
144
150
|
// Model profile name. Resolved against `config.models` to pick the concrete
|
|
145
151
|
// model ref this session binds to. Unknown profile names fall back to
|
|
146
152
|
// `default` with a one-time console warning. Omitted → `default`. Threaded
|
|
@@ -322,6 +328,12 @@ export async function createSessionWithDispose(options: CreateSessionOptions = {
|
|
|
322
328
|
permissions: options.permissions,
|
|
323
329
|
stream: options.stream,
|
|
324
330
|
}),
|
|
331
|
+
...buildRoleGrantTools({
|
|
332
|
+
agentDir: options.plugins?.agentDir,
|
|
333
|
+
getOrigin,
|
|
334
|
+
permissions: options.permissions,
|
|
335
|
+
reloadRoles: options.reloadRoles,
|
|
336
|
+
}),
|
|
325
337
|
]
|
|
326
338
|
// Hook coverage for pi's builtin coding tools (read/bash/edit/write/grep/
|
|
327
339
|
// find/ls) — pi 0.67.3 ignores `tools:` for implementation, so the only
|
|
@@ -335,6 +347,7 @@ export async function createSessionWithDispose(options: CreateSessionOptions = {
|
|
|
335
347
|
sessionId: options.plugins.sessionId,
|
|
336
348
|
hooks: options.plugins.hooks,
|
|
337
349
|
getOrigin,
|
|
350
|
+
...(options.permissions ? { permissions: options.permissions } : {}),
|
|
338
351
|
})
|
|
339
352
|
: []
|
|
340
353
|
const wrappedCustomSystemTools = wrapSystemTools(customSystemTools, options.plugins, getOrigin)
|
|
@@ -576,6 +589,25 @@ export function buildSubagentOrchestrationTools(opts: {
|
|
|
576
589
|
]
|
|
577
590
|
}
|
|
578
591
|
|
|
592
|
+
export function buildRoleGrantTools(opts: {
|
|
593
|
+
agentDir: string | undefined
|
|
594
|
+
getOrigin: () => SessionOrigin | undefined
|
|
595
|
+
permissions: PermissionService | undefined
|
|
596
|
+
reloadRoles: (() => RolesConfig | undefined) | undefined
|
|
597
|
+
}): ToolDefinition[] {
|
|
598
|
+
if (opts.agentDir === undefined || opts.permissions === undefined || opts.reloadRoles === undefined) {
|
|
599
|
+
return []
|
|
600
|
+
}
|
|
601
|
+
return [
|
|
602
|
+
createGrantRoleTool({
|
|
603
|
+
agentDir: opts.agentDir,
|
|
604
|
+
getOrigin: opts.getOrigin,
|
|
605
|
+
permissions: opts.permissions,
|
|
606
|
+
reloadRoles: opts.reloadRoles,
|
|
607
|
+
}),
|
|
608
|
+
]
|
|
609
|
+
}
|
|
610
|
+
|
|
579
611
|
function wrapRegistryTools(
|
|
580
612
|
plugins: PluginSessionWiring | undefined,
|
|
581
613
|
getOrigin: () => SessionOrigin | undefined,
|
|
@@ -711,6 +743,7 @@ export function deriveSystemPromptMode(origin: SessionOrigin | undefined): Syste
|
|
|
711
743
|
return 'full'
|
|
712
744
|
case 'cron':
|
|
713
745
|
case 'subagent':
|
|
746
|
+
case 'system':
|
|
714
747
|
return 'slim'
|
|
715
748
|
default: {
|
|
716
749
|
const _exhaustive: never = origin
|
|
@@ -20,6 +20,7 @@ import {
|
|
|
20
20
|
checkNonWorkspaceWriteGuard,
|
|
21
21
|
checkSkillAuthoringGuard,
|
|
22
22
|
} from '@/bundled-plugins/guard/policy'
|
|
23
|
+
import type { PermissionService } from '@/permissions/permissions'
|
|
23
24
|
import type {
|
|
24
25
|
BuiltinToolRef,
|
|
25
26
|
ContentPart,
|
|
@@ -30,6 +31,7 @@ import type {
|
|
|
30
31
|
ToolContext,
|
|
31
32
|
ToolResult,
|
|
32
33
|
} from '@/plugin'
|
|
34
|
+
import { buildSandboxedCommand, ensureBwrapAvailable, resolveHiddenPaths } from '@/sandbox'
|
|
33
35
|
|
|
34
36
|
import { createLoopGuard, type LoopGuard } from './loop-guard'
|
|
35
37
|
import { checkImageReadRedirect } from './multimodal/read-redirect'
|
|
@@ -134,6 +136,11 @@ export type WrapSystemToolOptions = {
|
|
|
134
136
|
sessionId: string
|
|
135
137
|
hooks: HookBus
|
|
136
138
|
getOrigin?: () => SessionOrigin | undefined
|
|
139
|
+
// When present, the bash builtin is rewritten through the per-tool bwrap
|
|
140
|
+
// sandbox with role-derived path masks. Absent (or no masks for the role)
|
|
141
|
+
// runs bash unchanged — preserving today's behavior for trusted+ and for
|
|
142
|
+
// sessions wired without a permission service (e.g. tests).
|
|
143
|
+
permissions?: PermissionService
|
|
137
144
|
}
|
|
138
145
|
|
|
139
146
|
// Zod 4 emits a top-level `"$schema": "https://json-schema.org/draft/2020-12/schema"`
|
|
@@ -393,6 +400,10 @@ export function wrapAgentToolAsCustomToolDefinition<TParams extends TSchema, TDe
|
|
|
393
400
|
}
|
|
394
401
|
stripGuardAcknowledgements(mutableArgs)
|
|
395
402
|
|
|
403
|
+
if (tool.name === 'bash' && opts.permissions !== undefined) {
|
|
404
|
+
await applyBashSandbox(mutableArgs, opts.permissions, liveOrigin, opts.agentDir)
|
|
405
|
+
}
|
|
406
|
+
|
|
396
407
|
const result = await tool.execute(toolCallId, mutableArgs as Static<TParams>, signal, onUpdate)
|
|
397
408
|
const hookResult: ToolResult = {
|
|
398
409
|
content: result.content as ContentPart[],
|
|
@@ -425,6 +436,33 @@ export function buildBuiltinPiToolOverrides(opts: WrapSystemToolOptions): ToolDe
|
|
|
425
436
|
return defaultBuiltinPiAgentTools().map((tool) => wrapAgentToolAsCustomToolDefinition(tool, opts))
|
|
426
437
|
}
|
|
427
438
|
|
|
439
|
+
// Rewrites mutableArgs.command in place so the bash builtin runs inside bwrap
|
|
440
|
+
// with role-derived path masks. A role that sees everything (trusted+) yields
|
|
441
|
+
// no masks and runs unchanged. When masks ARE needed but bwrap is unavailable
|
|
442
|
+
// we throw rather than run unsandboxed — fail closed, never leak the masked
|
|
443
|
+
// surface. Runs after the tool.before guards have inspected the raw command.
|
|
444
|
+
async function applyBashSandbox(
|
|
445
|
+
mutableArgs: Record<string, unknown>,
|
|
446
|
+
permissions: PermissionService,
|
|
447
|
+
origin: SessionOrigin | undefined,
|
|
448
|
+
agentDir: string,
|
|
449
|
+
): Promise<void> {
|
|
450
|
+
const command = mutableArgs.command
|
|
451
|
+
if (typeof command !== 'string') return
|
|
452
|
+
|
|
453
|
+
const { dirs, files } = resolveHiddenPaths(permissions, origin, agentDir)
|
|
454
|
+
if (dirs.length === 0 && files.length === 0) return
|
|
455
|
+
|
|
456
|
+
await ensureBwrapAvailable()
|
|
457
|
+
const { commandString } = buildSandboxedCommand(command, {
|
|
458
|
+
mounts: [{ type: 'bind', source: agentDir, dest: agentDir }],
|
|
459
|
+
masks: { dirs, files },
|
|
460
|
+
network: 'inherit',
|
|
461
|
+
cwd: agentDir,
|
|
462
|
+
})
|
|
463
|
+
mutableArgs.command = commandString
|
|
464
|
+
}
|
|
465
|
+
|
|
428
466
|
function appendLoopWarning(result: ToolResult, message: string): ToolResult {
|
|
429
467
|
const content: ContentPart[] = [...(result.content as ContentPart[]), { type: 'text', text: message }]
|
|
430
468
|
return { content, details: result.details }
|
|
@@ -28,10 +28,12 @@ export type MinimalSessionOrigin =
|
|
|
28
28
|
thread: string | null
|
|
29
29
|
}
|
|
30
30
|
| { kind: 'subagent'; subagent: string; parentSessionId: string }
|
|
31
|
+
| { kind: 'system'; component: string }
|
|
31
32
|
|
|
32
33
|
// Reduce a full SessionOrigin to the minimum projection persisted to disk.
|
|
33
|
-
// Drops participant lists, membership counts, recursive provenance
|
|
34
|
-
//
|
|
34
|
+
// Drops participant lists, membership counts, recursive provenance (including
|
|
35
|
+
// the system origin's `triggeredBy`, which can carry channel author identity),
|
|
36
|
+
// and author identifiers — none of which `typeclaw usage` reads, and all of
|
|
35
37
|
// which would otherwise land in git history when sessions/ is auto-backed-up.
|
|
36
38
|
// Kept as a separate function so the boundary between "data the LLM sees in
|
|
37
39
|
// the system prompt" (full origin) and "data persisted for usage reporting"
|
|
@@ -58,5 +60,7 @@ function minimalOrigin(origin: SessionOrigin): MinimalSessionOrigin {
|
|
|
58
60
|
}
|
|
59
61
|
case 'subagent':
|
|
60
62
|
return { kind: 'subagent', subagent: origin.subagent, parentSessionId: origin.parentSessionId }
|
|
63
|
+
case 'system':
|
|
64
|
+
return { kind: 'system', component: origin.component }
|
|
61
65
|
}
|
|
62
66
|
}
|
|
@@ -48,6 +48,23 @@ export type SessionOrigin =
|
|
|
48
48
|
spawnedByRole?: string
|
|
49
49
|
spawnedByOrigin?: SessionOrigin
|
|
50
50
|
}
|
|
51
|
+
// Runtime-owned infrastructure operating over TypeClaw's own state (memory
|
|
52
|
+
// logging/retrieval, backup), NOT user-delegated work. It resolves to `owner`
|
|
53
|
+
// because it acts on the operator's behalf over operator-owned files, with no
|
|
54
|
+
// single user session to inherit authority from — inheriting the triggering
|
|
55
|
+
// turn's role (e.g. a guest channel turn) would wrongly classify TypeClaw
|
|
56
|
+
// infrastructure as the guest actor and block its legitimate sessions//memory/
|
|
57
|
+
// access. `triggeredBy` keeps honest provenance — "a guest turn triggered the
|
|
58
|
+
// memory-logger" — without the synthetic-TUI lie. This kind is only ever
|
|
59
|
+
// constructed by runtime/bundled code; inbound channel/cron content can never
|
|
60
|
+
// produce it (those origins come from the runtime, not from message text), so
|
|
61
|
+
// it is not a role-laundering vector.
|
|
62
|
+
| {
|
|
63
|
+
kind: 'system'
|
|
64
|
+
component: string
|
|
65
|
+
reason?: string
|
|
66
|
+
triggeredBy?: SessionOrigin
|
|
67
|
+
}
|
|
51
68
|
|
|
52
69
|
export const PARTICIPANTS_TOP_K = 10
|
|
53
70
|
export const PARTICIPANTS_MAX_AGE_MS = 7 * 24 * 60 * 60 * 1000
|
|
@@ -94,11 +111,12 @@ function getPlatformInfo(adapter: AdapterId): PlatformInfo {
|
|
|
94
111
|
// TUI is always `owner` by construction — annotating it would add noise to
|
|
95
112
|
// every interactive session for zero new information.
|
|
96
113
|
//
|
|
97
|
-
//
|
|
98
|
-
//
|
|
99
|
-
//
|
|
100
|
-
//
|
|
101
|
-
//
|
|
114
|
+
// Channel origins do NOT render this concrete role. A channel session is
|
|
115
|
+
// keyed by chat/thread, so the opener's role is wrong for every later
|
|
116
|
+
// speaker and printing their permission list leaks it into shared context.
|
|
117
|
+
// Channel origins render renderChannelRolePolicy() instead, and the
|
|
118
|
+
// authoritative per-turn role rides in the non-cacheable `<your-role>`
|
|
119
|
+
// turn anchor (renderTurnRoleAnchor in system-prompt.ts).
|
|
102
120
|
export type SessionRoleContext = {
|
|
103
121
|
role: string
|
|
104
122
|
permissions: readonly string[]
|
|
@@ -111,19 +129,22 @@ export function renderSessionOrigin(
|
|
|
111
129
|
): string {
|
|
112
130
|
switch (origin.kind) {
|
|
113
131
|
case 'tui':
|
|
114
|
-
return withRoleContext(renderTuiOrigin(), roleContext)
|
|
132
|
+
return withRoleContext(renderTuiOrigin(), roleContext, origin.kind)
|
|
115
133
|
case 'cron':
|
|
116
|
-
return withRoleContext(renderCronOrigin(origin), roleContext)
|
|
134
|
+
return withRoleContext(renderCronOrigin(origin), roleContext, origin.kind)
|
|
117
135
|
case 'channel':
|
|
118
|
-
return withRoleContext(renderChannelOrigin(origin, now), roleContext)
|
|
136
|
+
return withRoleContext(renderChannelOrigin(origin, now), roleContext, origin.kind)
|
|
119
137
|
case 'subagent':
|
|
120
|
-
return withRoleContext(renderSubagentOrigin(origin), roleContext)
|
|
138
|
+
return withRoleContext(renderSubagentOrigin(origin), roleContext, origin.kind)
|
|
139
|
+
case 'system':
|
|
140
|
+
return withRoleContext(renderSystemOrigin(origin), roleContext, origin.kind)
|
|
121
141
|
}
|
|
122
142
|
}
|
|
123
143
|
|
|
124
|
-
function withRoleContext(block: string, ctx: SessionRoleContext | undefined): string {
|
|
144
|
+
function withRoleContext(block: string, ctx: SessionRoleContext | undefined, kind: SessionOrigin['kind']): string {
|
|
125
145
|
if (ctx === undefined) return block
|
|
126
|
-
|
|
146
|
+
const roleBlock = kind === 'channel' ? renderChannelRolePolicy() : renderRoleContext(ctx)
|
|
147
|
+
return `${block}\n\n${roleBlock}`
|
|
127
148
|
}
|
|
128
149
|
|
|
129
150
|
function renderRoleContext(ctx: SessionRoleContext): string {
|
|
@@ -141,6 +162,30 @@ function renderRoleContext(ctx: SessionRoleContext): string {
|
|
|
141
162
|
].join('\n')
|
|
142
163
|
}
|
|
143
164
|
|
|
165
|
+
// Channel sessions are keyed by chat/thread, not by author: one session can see
|
|
166
|
+
// many speakers with different roles. Rendering the opener's concrete role here
|
|
167
|
+
// would (1) be wrong for every later speaker and (2) leak the opener's full
|
|
168
|
+
// permission list into shared context. So channel origins get a cache-stable
|
|
169
|
+
// policy instead of a resolved identity; the authoritative per-turn role rides
|
|
170
|
+
// in the non-cacheable `<your-role>` turn anchor.
|
|
171
|
+
function renderChannelRolePolicy(): string {
|
|
172
|
+
return [
|
|
173
|
+
'## Your role in this session',
|
|
174
|
+
'',
|
|
175
|
+
'This is a channel conversation that may include multiple speakers. Do not',
|
|
176
|
+
'assume one speaker’s role applies to later messages. For each user turn the',
|
|
177
|
+
'current speaker’s effective role is provided in the turn context as a',
|
|
178
|
+
'`<your-role>` tag; that per-turn role is authoritative for the current',
|
|
179
|
+
'message and overrides any role implied by session-opening context. An absent',
|
|
180
|
+
'`<your-role>` tag means the current speaker is the unconstrained default.',
|
|
181
|
+
'',
|
|
182
|
+
'Tool calls and channel admission are gated by the current speaker’s',
|
|
183
|
+
'permissions; a `blocked:` or "denied by permissions" message means that',
|
|
184
|
+
'speaker lacks the permission the guard wanted. See the',
|
|
185
|
+
'`typeclaw-permissions` skill for what each role can do.',
|
|
186
|
+
].join('\n')
|
|
187
|
+
}
|
|
188
|
+
|
|
144
189
|
function renderTuiOrigin(): string {
|
|
145
190
|
return [
|
|
146
191
|
'## Session origin',
|
|
@@ -167,6 +212,34 @@ function renderCronOrigin(origin: { jobId: string; jobKind: 'prompt' | 'exec' |
|
|
|
167
212
|
].join('\n')
|
|
168
213
|
}
|
|
169
214
|
|
|
215
|
+
function renderSystemOrigin(origin: { component: string; reason?: string; triggeredBy?: SessionOrigin }): string {
|
|
216
|
+
const lines = [
|
|
217
|
+
'## Session origin',
|
|
218
|
+
'',
|
|
219
|
+
`You are the \`${origin.component}\` system process — TypeClaw-owned`,
|
|
220
|
+
"infrastructure operating over the agent folder on the operator's behalf,",
|
|
221
|
+
'not a user-delegated task. Do exactly the job described and exit.',
|
|
222
|
+
]
|
|
223
|
+
if (origin.reason !== undefined) lines.push('', `Reason: ${origin.reason}`)
|
|
224
|
+
if (origin.triggeredBy !== undefined) lines.push('', `Triggered by: ${describeTrigger(origin.triggeredBy)}`)
|
|
225
|
+
return lines.join('\n')
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
function describeTrigger(origin: SessionOrigin): string {
|
|
229
|
+
switch (origin.kind) {
|
|
230
|
+
case 'tui':
|
|
231
|
+
return 'a TUI session'
|
|
232
|
+
case 'cron':
|
|
233
|
+
return `cron job \`${origin.jobId}\``
|
|
234
|
+
case 'channel':
|
|
235
|
+
return `a ${getPlatformInfo(origin.adapter).displayName} channel turn`
|
|
236
|
+
case 'subagent':
|
|
237
|
+
return `the \`${origin.subagent}\` subagent`
|
|
238
|
+
case 'system':
|
|
239
|
+
return `the \`${origin.component}\` system process`
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
170
243
|
function renderSubagentOrigin(origin: { subagent: string; parentSessionId: string }): string {
|
|
171
244
|
return [
|
|
172
245
|
'## Session origin',
|
|
@@ -215,6 +288,22 @@ function renderChannelOrigin(
|
|
|
215
288
|
'is a tool call. Plain-text output is invisible.',
|
|
216
289
|
]
|
|
217
290
|
|
|
291
|
+
// GitHub has no separate "chat" surface — channel_reply IS a public comment
|
|
292
|
+
// on this PR/issue. Without saying so, models default to the Slack-style
|
|
293
|
+
// two-surface split and post operator-facing meta-commentary ("Posted review
|
|
294
|
+
// result for PR #511") straight into the PR thread, where it reads absurdly.
|
|
295
|
+
if (origin.adapter === 'github') {
|
|
296
|
+
lines.push(
|
|
297
|
+
'',
|
|
298
|
+
'**`channel_reply` posts a public comment directly on this PR/issue.** It',
|
|
299
|
+
'is not a side-report to an operator — the reply lands in this exact',
|
|
300
|
+
'thread, read by everyone on the PR. Write the substance for that',
|
|
301
|
+
'audience: post the answer (or review summary) itself, never a status',
|
|
302
|
+
'line about having posted it elsewhere. A narrated "Posted review result',
|
|
303
|
+
'for PR #N: …" inside the PR is exactly the failure to avoid.',
|
|
304
|
+
)
|
|
305
|
+
}
|
|
306
|
+
|
|
218
307
|
const conversationLine = renderConversationLine(origin)
|
|
219
308
|
if (conversationLine !== null) lines.push('', conversationLine)
|
|
220
309
|
|
|
@@ -244,10 +333,18 @@ function renderChannelOrigin(
|
|
|
244
333
|
' have no reason worth recording. Any other visible text without a',
|
|
245
334
|
' channel tool call is blocked.',
|
|
246
335
|
'',
|
|
336
|
+
'**Every user-facing sentence goes through `channel_reply`.** Narrating in',
|
|
337
|
+
'plain text — "bumping to 16x now", "let me check that" — does NOT reach the',
|
|
338
|
+
'user; it is invisible. If you want the user to see it, it is a',
|
|
339
|
+
'`channel_reply` call, not narration. This includes acks.',
|
|
340
|
+
'',
|
|
247
341
|
'**One substantive reply per inbound.** If the answer needs more than one',
|
|
248
|
-
'tool call, send a one-line ack first ("On it."
|
|
249
|
-
'
|
|
250
|
-
'is
|
|
342
|
+
'tool call, send a one-line ack first via `channel_reply({ text: "On it.",',
|
|
343
|
+
'continue: true })`, keep working, then send the answer with a final',
|
|
344
|
+
'`channel_reply`. The ack is not your reply; the answer is. Once the answer',
|
|
345
|
+
'lands, end your turn. The `continue: true` is not optional on that ack:',
|
|
346
|
+
'without it the turn ends the instant the ack lands and the rest of your',
|
|
347
|
+
'work — the fetch, the subagent, the actual answer — is silently dropped.',
|
|
251
348
|
'',
|
|
252
349
|
'**Backgrounded work does not end the obligation.** If you spawn a',
|
|
253
350
|
'subagent with `run_in_background: true` to answer the current inbound,',
|
package/src/agent/subagents.ts
CHANGED
|
@@ -455,7 +455,12 @@ function parseSpawnedByOriginJson(
|
|
|
455
455
|
return parsed
|
|
456
456
|
}
|
|
457
457
|
|
|
458
|
-
|
|
458
|
+
// Must list EVERY SessionOrigin discriminator. `system` is included so a
|
|
459
|
+
// streamed memory/backup spawn (whose spawnedByOrigin is serialized to JSON
|
|
460
|
+
// and re-parsed here) keeps its owner-resolving origin instead of being
|
|
461
|
+
// dropped and silently demoted to guest — the exact regression the system
|
|
462
|
+
// origin exists to prevent. Keep in sync with the SessionOrigin union.
|
|
463
|
+
const SESSION_ORIGIN_KINDS = new Set(['tui', 'cron', 'channel', 'subagent', 'system'])
|
|
459
464
|
function isSessionOriginShape(value: unknown): value is SessionOrigin {
|
|
460
465
|
if (value === null || typeof value !== 'object') return false
|
|
461
466
|
const kind = (value as { kind?: unknown }).kind
|
|
@@ -27,6 +27,7 @@ When in doubt between SOUL.md and AGENTS.md: if it describes *how you sound*, it
|
|
|
27
27
|
## Your workspace
|
|
28
28
|
|
|
29
29
|
- **\`workspace/\`** — your free-write zone for drafts, scratch work, generated artifacts. Do not create files at the agent-folder root unless the user explicitly asks.
|
|
30
|
+
- **\`public/\`** — the guest-visible zone. Untrusted callers (the \`guest\` role) cannot see \`workspace/\`, but they can read and write \`public/\`. Put anything meant to be shared with an untrusted caller here. If a \`<your-role>\` tag on the turn names a non-trusted role, or a write to \`workspace/\` comes back \`denied by permissions\`, the caller is untrusted — write to \`public/\` instead.
|
|
30
31
|
- **\`sessions/\`** — transcripts of past conversations. Runtime-managed; don't write here.
|
|
31
32
|
- **\`memory/streams/\`** *(not injected — reach via \`memory_search\`)* — dated streams written by the memory-logger between sessions. Runtime-owned. Undreamed observations are searchable on demand instead of injected into every prompt.
|
|
32
33
|
- **\`memory/skills/\`** — muscle-memory skills written by the dreaming subagent. Auto-loaded; don't write here directly.
|
|
@@ -45,6 +46,17 @@ When the user gives you work, start doing it in the same turn — a real action,
|
|
|
45
46
|
|
|
46
47
|
Do not narrate routine, low-risk tool calls. Just call the tool. Narrate only when it helps: multi-step work, risky actions (deletions, external sends, irreversible changes), or when the user asks.
|
|
47
48
|
|
|
49
|
+
## Long-running and interactive shell work
|
|
50
|
+
|
|
51
|
+
Foreground \`bash\` blocks your turn until exit, so a command that runs for minutes or waits for input (dev server, REPL, watcher, \`docker compose up\`, interactive installer) freezes the conversation. \`tmux\` is in the container — run such programs detached so your turn stays free:
|
|
52
|
+
|
|
53
|
+
- Start: \`tmux new-session -d -s <name> "<cmd>"\`
|
|
54
|
+
- Observe: \`tmux capture-pane -t <name> -p\` (poll across turns, don't block)
|
|
55
|
+
- Drive: \`tmux send-keys -t <name> "<input>" Enter\` (control keys too, e.g. \`C-c\`)
|
|
56
|
+
- Stop: \`tmux kill-session -t <name>\`
|
|
57
|
+
|
|
58
|
+
Use this only when the work belongs in *your* session. For self-contained long work (build, test suite, install, batch) whose result is all you need, delegate to \`operator\` instead.
|
|
59
|
+
|
|
48
60
|
## Version control
|
|
49
61
|
|
|
50
62
|
Your agent folder is a git repository.
|
|
@@ -64,35 +76,17 @@ Your agent folder is a git repository.
|
|
|
64
76
|
|
|
65
77
|
## Subagent orchestration
|
|
66
78
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
There are two delegation modes. Pick deliberately.
|
|
70
|
-
|
|
71
|
-
**Mode A — Research fan-out** (in service of the current question)
|
|
72
|
-
|
|
73
|
-
When you need information to answer the user and the search is broad, fire 2-5 subagents in parallel with \`run_in_background: true\` covering different angles. End your response after spawning. The system will deliver a \`<system-reminder>\` for each completion; then call \`subagent_output\` once per task_id to fetch the result and answer the user. \`subagent_output\` always returns immediately with a snapshot — it does not block.
|
|
74
|
-
|
|
75
|
-
The bundled \`explorer\` subagent is the right tool for **local** reconnaissance — anything reachable on the agent's filesystem: code, past sessions (\`sessions/*.jsonl\`), memory topic shards and daily memory streams, skills, cron jobs, config, git history, mounts, channels state. It is read-only and runs on a fast/cheap model, so fire liberally. Do NOT ask it to plan, decide, or write code — it finds and reports.
|
|
76
|
-
|
|
77
|
-
The bundled \`scout\` subagent is its external counterpart — web research only. Use it when you need information from public sources (docs, library references, vendor changelogs, news, anything not already in this agent's folder). Scout runs \`websearch\` and \`webfetch\` in a fresh context window so the search churn does not pollute yours; it returns a citation-backed answer with a confidence rating. Prefer scout over running \`websearch\`/\`webfetch\` yourself when the research is non-trivial (more than 1-2 queries) or when you want to save your context for the synthesis step.
|
|
79
|
+
Delegate focused work to subagents via \`spawn_subagent\`, \`subagent_output\`, \`subagent_cancel\`. Each runs in its own context window with its own tool set. The available subagents and their purpose are listed in the \`spawn_subagent\` tool description — re-read it before delegating. Briefly: \`explorer\` (read-only local recon — code, sessions, memory, git, config; fire liberally), \`scout\` (web research in a fresh context), \`reviewer\` (deep read-only code/PR/plan review, returns a structured verdict; it does NOT post), \`operator\` (write-capable: bash-with-side-effects, write, edit — for browser sessions, refactors, deploys, batch ops, and Claude Code / Codex CLI driving; gated by \`subagent.spawn.operator\`, owner/trusted only — on denial, do the work yourself).
|
|
78
80
|
|
|
79
|
-
|
|
81
|
+
There are three delegation modes. Pick deliberately.
|
|
80
82
|
|
|
81
|
-
**Mode
|
|
83
|
+
**Mode A — Research fan-out.** Need information and the search is broad? Fire 2-5 subagents (usually \`explorer\`/\`scout\`) in parallel with \`run_in_background: true\`, then end your response. A \`<system-reminder>\` lands per completion; call \`subagent_output\` once per task_id to collect (it never blocks) and answer.
|
|
82
84
|
|
|
83
|
-
|
|
85
|
+
**Mode B — Delegate-and-converse.** Asked to DO something long-running (>~30s: installs, builds, \`docker\`, scrapes, long test suites, multi-host loops, any noisy "fetch N and synthesize" chain)? Don't run it inline — blocking your own \`bash\` freezes the conversation and stalls the channel typing heartbeat (\`MAX_TYPING_HEARTBEAT_MS\`). Spawn one subagent (\`operator\` for side effects, \`scout\` for research) with \`run_in_background: true\`, acknowledge, and KEEP TALKING. Single fast calls (\`git status\`, one known-endpoint \`curl\`) stay inline. When the completion reminder lands, weave the result in; in a channel session, the completion \`<system-reminder>\` is NOT a user message but plain text is still invisible — Surface the result via \`channel_reply\` (or \`channel_send\`). If you already posted the substantive answer in the spawn turn, prefer \`skip_response({ reason: "result confirms prior reply" })\` over going silent.
|
|
84
86
|
|
|
85
|
-
**
|
|
87
|
+
**Mode C — Troubleshooting.** Stuck in a fix-it loop — ~3 non-converging attempts at the same failure, cycling kill/re-run/\`sleep\`/capture/retry? Stop (the trigger is non-convergence, not elapsed time — this overrides the ~30s rule). Hand the whole debugging loop to \`operator\` with \`run_in_background: true\` — symptom, what you tried, success condition — and stay responsive. Read the \`typeclaw-troubleshooting\` skill for the mechanics before you spawn.
|
|
86
88
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
Before you run a tool chain that returns bulky intermediate output you won't need again — multiple \`webfetch\` calls, a \`websearch\` round you'll iterate on, a \`bash\` command that scrapes a site or dumps a large response, an \`agent-browser\` session, a \`claude\` (Claude Code) or \`codex\` (OpenAI Codex CLI) delegation driven through tmux, any "fetch N things and synthesize" loop — delegate it to a subagent. \`scout\` (for research) or \`operator\` (for actions with side effects) runs the noisy work in its own context window and returns a distilled summary; your session carries the *answer*, not the raw material you derived it from. This is about context economy, not latency: even a fast operation belongs in a subagent when the byproducts are large and disposable (three quick news searches across different outlets still dumps three SERPs and three article bodies into your context forever). The exception is exactly one call whose result you'll cite directly — one \`webfetch\` of a known URL, one \`websearch\` query whose top result is the answer. Two of either, or any "across multiple sources" framing, is delegation territory.
|
|
90
|
-
|
|
91
|
-
The bundled \`operator\` subagent is the right tool for this mode. It is write-capable (read, write, edit, bash with side effects) and runs on the default model. Use it for: browser sessions, multi-file refactors, deploys, batch API calls, Claude Code or Codex CLI delegations (the tmux driving loop, the multi-turn polling, the worktree teardown — all of it inside operator), anything that involves taking action on behalf of the user over multiple steps. The operator returns a structured final report (outcome, what changed, what was observed); surface it naturally rather than copy-pasting. Operator is gated by a separate permission (\`subagent.spawn.operator\`) so write-capable spawns are restricted to owner-tier and trusted-tier callers — if the gate denies, fall back to doing the work in your own session rather than reporting failure to the user.
|
|
92
|
-
|
|
93
|
-
**Status queries**
|
|
94
|
-
|
|
95
|
-
If the user asks "how's it going?" or "status?" on a running subagent, call \`subagent_output({ task_id })\` and report the \`status_summary\` in your own words. Don't pretend to know the status without checking.
|
|
89
|
+
**Status queries.** If the user asks "status?" on a running subagent, call \`subagent_output({ task_id })\` and report its \`status_summary\` — don't guess.
|
|
96
90
|
|
|
97
91
|
**Prompt structure for spawns** (mandatory — the subagent does not see this conversation)
|
|
98
92
|
|
|
@@ -102,13 +96,7 @@ If the user asks "how's it going?" or "status?" on a running subagent, call \`su
|
|
|
102
96
|
[REQUEST]: Concrete instructions — what to find/do/produce, what format, what to SKIP.
|
|
103
97
|
\`\`\`
|
|
104
98
|
|
|
105
|
-
**Anti-patterns
|
|
106
|
-
|
|
107
|
-
- Don't fire more than 5 subagents in a single turn.
|
|
108
|
-
- Don't spawn for a known answer or single-file lookup — do it yourself.
|
|
109
|
-
- Don't call \`subagent_output\` in a loop waiting for completion; end your response and the reminder will wake you, then fetch the result once.
|
|
110
|
-
- Don't ask a research subagent to make architectural decisions for you — they find and report; you decide.
|
|
111
|
-
- Subagents cannot recursively spawn other subagents.
|
|
99
|
+
**Anti-patterns.** Don't fire more than 5 subagents per turn, spawn for a known answer or single-file lookup, poll \`subagent_output\` in a loop (end your turn; the reminder wakes you), or ask a research subagent to make decisions — they find and report, you decide. Subagents cannot recursively spawn subagents.
|
|
112
100
|
|
|
113
101
|
## Safety
|
|
114
102
|
|
|
@@ -167,6 +155,27 @@ export function renderTurnTimeAnchor(now: Date = new Date()): string {
|
|
|
167
155
|
return `<current-time>${iso} (${zone}, ${weekday})</current-time>`
|
|
168
156
|
}
|
|
169
157
|
|
|
158
|
+
// Live role anchor injected into the **user turn**, not the system prompt —
|
|
159
|
+
// same rationale and cache properties as renderTurnTimeAnchor above.
|
|
160
|
+
//
|
|
161
|
+
// The "## Your role in this session" block in the system prompt is a
|
|
162
|
+
// session-CREATION snapshot: in a channel where speakers change turn to turn,
|
|
163
|
+
// it reports the role of whoever first opened the session, not whoever is
|
|
164
|
+
// speaking now. Tool gating already re-resolves the live role per turn (the
|
|
165
|
+
// router updates `originRef` before each prompt), but the model never saw that
|
|
166
|
+
// value — so it could not, for example, route output to `public/` for a guest.
|
|
167
|
+
// This anchor surfaces the per-turn resolved role in the one place that costs
|
|
168
|
+
// zero cached bytes (the non-cacheable user-turn suffix).
|
|
169
|
+
//
|
|
170
|
+
// Omitted for `owner`: owner is the unconstrained default, an absent tag means
|
|
171
|
+
// "no special handling", and emitting it on every interactive turn would be
|
|
172
|
+
// pure token overhead. This mirrors resolveRoleContext skipping the session
|
|
173
|
+
// block for a TUI owner.
|
|
174
|
+
export function renderTurnRoleAnchor(role: string): string | undefined {
|
|
175
|
+
if (role === 'owner') return undefined
|
|
176
|
+
return `<your-role authority="current-speaker">${role}</your-role> (authoritative for this message; overrides any role implied by the system prompt)`
|
|
177
|
+
}
|
|
178
|
+
|
|
170
179
|
// Compact replacement for DEFAULT_SYSTEM_PROMPT, used by non-interactive
|
|
171
180
|
// sessions (cron jobs, and default subagents that don't supply their own
|
|
172
181
|
// `systemPromptOverride`). The full prompt is ~2155 tokens of operator-facing
|
|
@@ -207,6 +216,6 @@ Never suppress errors to make things "work", and never fabricate results. If som
|
|
|
207
216
|
|
|
208
217
|
Do not narrate routine, low-risk tool calls — just call the tool. Do not over-explain what you did unless asked.
|
|
209
218
|
|
|
210
|
-
Your free-write zone is \`workspace/\`. Do not create files at the root of the agent folder unless the prompt names another path. Do not edit \`memory/topics/\` directly — the dreaming subagent owns it; to capture something memorable, surface it in your reply or let the memory-logger append to \`memory/streams/\`. Never stage or commit \`secrets.json\`, \`.env\`, \`sessions/\`, \`memory/\`, or \`workspace/\` — those are runtime- or user-managed.
|
|
219
|
+
Your free-write zone is \`workspace/\`. Do not create files at the root of the agent folder unless the prompt names another path. \`public/\` is the guest-visible zone — write there anything meant to be shared with an untrusted caller (a \`guest\`-role turn cannot read \`workspace/\` but can read \`public/\`). Do not edit \`memory/topics/\` directly — the dreaming subagent owns it; to capture something memorable, surface it in your reply or let the memory-logger append to \`memory/streams/\`. Never stage or commit \`secrets.json\`, \`.env\`, \`sessions/\`, \`memory/\`, or \`workspace/\` — those are runtime- or user-managed.
|
|
211
220
|
|
|
212
221
|
See the session-origin block below for what kind of session this is and what's expected of you.`
|
|
@@ -74,8 +74,9 @@ export function createChannelReplyTool({
|
|
|
74
74
|
continue: Type.Optional(
|
|
75
75
|
Type.Boolean({
|
|
76
76
|
description:
|
|
77
|
-
'Set `true`
|
|
78
|
-
'
|
|
77
|
+
'Set `true` when this reply is a mid-turn status update (e.g. "working on it…") and you still have work to do THIS turn — fetching data, running a tool, spawning a subagent, then replying again. ' +
|
|
78
|
+
'Omitting it on such an ack silently truncates the turn: a successful reply ends the turn by default, so the fetch/subagent/answer you intended to do next never runs. ' +
|
|
79
|
+
'A normal final reply omits this (no wasted follow-up LLM call). ' +
|
|
79
80
|
'Do not set it just to seem responsive; only when genuine multi-step work follows in the same turn.',
|
|
80
81
|
}),
|
|
81
82
|
),
|