typeclaw 0.15.1 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agent/index.ts +3 -1
- package/src/agent/plugin-tools.ts +38 -0
- package/src/agent/session-meta.ts +6 -2
- package/src/agent/session-origin.ts +58 -3
- package/src/agent/subagents.ts +6 -1
- package/src/agent/system-prompt.ts +41 -32
- package/src/agent/tools/channel-reply.ts +18 -1
- package/src/bundled-plugins/guard/policies/non-workspace-write.ts +8 -6
- package/src/bundled-plugins/memory/index.ts +25 -6
- package/src/bundled-plugins/security/index.ts +12 -0
- package/src/bundled-plugins/security/policies/private-surface-read.ts +215 -0
- package/src/channels/adapters/github/inbound.ts +54 -1
- package/src/channels/adapters/github/index.ts +1 -0
- package/src/channels/router.ts +74 -25
- package/src/cli/inspect.ts +20 -9
- package/src/init/index.ts +19 -9
- package/src/init/paths.ts +1 -0
- package/src/inspect/label.ts +2 -0
- package/src/inspect/live.ts +6 -1
- package/src/inspect/render.ts +8 -2
- package/src/inspect/replay.ts +6 -1
- package/src/inspect/types.ts +4 -1
- package/src/permissions/builtins.ts +12 -0
- package/src/permissions/permissions.ts +7 -0
- package/src/plugin/types.ts +12 -0
- package/src/sandbox/build.ts +19 -1
- package/src/sandbox/hidden-paths.ts +41 -0
- package/src/sandbox/index.ts +2 -1
- package/src/sandbox/policy.ts +15 -0
- package/src/skills/typeclaw-channel-github/SKILL.md +6 -0
- package/src/skills/typeclaw-troubleshooting/SKILL.md +104 -0
- package/src/usage/report.ts +4 -0
- package/src/usage/scan.ts +1 -1
package/package.json
CHANGED
package/src/agent/index.ts
CHANGED
|
@@ -63,7 +63,7 @@ export type { SessionOrigin } from './session-origin'
|
|
|
63
63
|
|
|
64
64
|
export type { AgentSession }
|
|
65
65
|
|
|
66
|
-
export { renderTurnTimeAnchor } from './system-prompt'
|
|
66
|
+
export { renderTurnRoleAnchor, renderTurnTimeAnchor } from './system-prompt'
|
|
67
67
|
|
|
68
68
|
type AgentSessionTools = NonNullable<Parameters<typeof createAgentSession>[0]>['tools']
|
|
69
69
|
|
|
@@ -335,6 +335,7 @@ export async function createSessionWithDispose(options: CreateSessionOptions = {
|
|
|
335
335
|
sessionId: options.plugins.sessionId,
|
|
336
336
|
hooks: options.plugins.hooks,
|
|
337
337
|
getOrigin,
|
|
338
|
+
...(options.permissions ? { permissions: options.permissions } : {}),
|
|
338
339
|
})
|
|
339
340
|
: []
|
|
340
341
|
const wrappedCustomSystemTools = wrapSystemTools(customSystemTools, options.plugins, getOrigin)
|
|
@@ -711,6 +712,7 @@ export function deriveSystemPromptMode(origin: SessionOrigin | undefined): Syste
|
|
|
711
712
|
return 'full'
|
|
712
713
|
case 'cron':
|
|
713
714
|
case 'subagent':
|
|
715
|
+
case 'system':
|
|
714
716
|
return 'slim'
|
|
715
717
|
default: {
|
|
716
718
|
const _exhaustive: never = origin
|
|
@@ -20,6 +20,7 @@ import {
|
|
|
20
20
|
checkNonWorkspaceWriteGuard,
|
|
21
21
|
checkSkillAuthoringGuard,
|
|
22
22
|
} from '@/bundled-plugins/guard/policy'
|
|
23
|
+
import type { PermissionService } from '@/permissions/permissions'
|
|
23
24
|
import type {
|
|
24
25
|
BuiltinToolRef,
|
|
25
26
|
ContentPart,
|
|
@@ -30,6 +31,7 @@ import type {
|
|
|
30
31
|
ToolContext,
|
|
31
32
|
ToolResult,
|
|
32
33
|
} from '@/plugin'
|
|
34
|
+
import { buildSandboxedCommand, ensureBwrapAvailable, resolveHiddenPaths } from '@/sandbox'
|
|
33
35
|
|
|
34
36
|
import { createLoopGuard, type LoopGuard } from './loop-guard'
|
|
35
37
|
import { checkImageReadRedirect } from './multimodal/read-redirect'
|
|
@@ -134,6 +136,11 @@ export type WrapSystemToolOptions = {
|
|
|
134
136
|
sessionId: string
|
|
135
137
|
hooks: HookBus
|
|
136
138
|
getOrigin?: () => SessionOrigin | undefined
|
|
139
|
+
// When present, the bash builtin is rewritten through the per-tool bwrap
|
|
140
|
+
// sandbox with role-derived path masks. Absent (or no masks for the role)
|
|
141
|
+
// runs bash unchanged — preserving today's behavior for trusted+ and for
|
|
142
|
+
// sessions wired without a permission service (e.g. tests).
|
|
143
|
+
permissions?: PermissionService
|
|
137
144
|
}
|
|
138
145
|
|
|
139
146
|
// Zod 4 emits a top-level `"$schema": "https://json-schema.org/draft/2020-12/schema"`
|
|
@@ -393,6 +400,10 @@ export function wrapAgentToolAsCustomToolDefinition<TParams extends TSchema, TDe
|
|
|
393
400
|
}
|
|
394
401
|
stripGuardAcknowledgements(mutableArgs)
|
|
395
402
|
|
|
403
|
+
if (tool.name === 'bash' && opts.permissions !== undefined) {
|
|
404
|
+
await applyBashSandbox(mutableArgs, opts.permissions, liveOrigin, opts.agentDir)
|
|
405
|
+
}
|
|
406
|
+
|
|
396
407
|
const result = await tool.execute(toolCallId, mutableArgs as Static<TParams>, signal, onUpdate)
|
|
397
408
|
const hookResult: ToolResult = {
|
|
398
409
|
content: result.content as ContentPart[],
|
|
@@ -425,6 +436,33 @@ export function buildBuiltinPiToolOverrides(opts: WrapSystemToolOptions): ToolDe
|
|
|
425
436
|
return defaultBuiltinPiAgentTools().map((tool) => wrapAgentToolAsCustomToolDefinition(tool, opts))
|
|
426
437
|
}
|
|
427
438
|
|
|
439
|
+
// Rewrites mutableArgs.command in place so the bash builtin runs inside bwrap
|
|
440
|
+
// with role-derived path masks. A role that sees everything (trusted+) yields
|
|
441
|
+
// no masks and runs unchanged. When masks ARE needed but bwrap is unavailable
|
|
442
|
+
// we throw rather than run unsandboxed — fail closed, never leak the masked
|
|
443
|
+
// surface. Runs after the tool.before guards have inspected the raw command.
|
|
444
|
+
async function applyBashSandbox(
|
|
445
|
+
mutableArgs: Record<string, unknown>,
|
|
446
|
+
permissions: PermissionService,
|
|
447
|
+
origin: SessionOrigin | undefined,
|
|
448
|
+
agentDir: string,
|
|
449
|
+
): Promise<void> {
|
|
450
|
+
const command = mutableArgs.command
|
|
451
|
+
if (typeof command !== 'string') return
|
|
452
|
+
|
|
453
|
+
const { dirs, files } = resolveHiddenPaths(permissions, origin, agentDir)
|
|
454
|
+
if (dirs.length === 0 && files.length === 0) return
|
|
455
|
+
|
|
456
|
+
await ensureBwrapAvailable()
|
|
457
|
+
const { commandString } = buildSandboxedCommand(command, {
|
|
458
|
+
mounts: [{ type: 'bind', source: agentDir, dest: agentDir }],
|
|
459
|
+
masks: { dirs, files },
|
|
460
|
+
network: 'inherit',
|
|
461
|
+
cwd: agentDir,
|
|
462
|
+
})
|
|
463
|
+
mutableArgs.command = commandString
|
|
464
|
+
}
|
|
465
|
+
|
|
428
466
|
function appendLoopWarning(result: ToolResult, message: string): ToolResult {
|
|
429
467
|
const content: ContentPart[] = [...(result.content as ContentPart[]), { type: 'text', text: message }]
|
|
430
468
|
return { content, details: result.details }
|
|
@@ -28,10 +28,12 @@ export type MinimalSessionOrigin =
|
|
|
28
28
|
thread: string | null
|
|
29
29
|
}
|
|
30
30
|
| { kind: 'subagent'; subagent: string; parentSessionId: string }
|
|
31
|
+
| { kind: 'system'; component: string }
|
|
31
32
|
|
|
32
33
|
// Reduce a full SessionOrigin to the minimum projection persisted to disk.
|
|
33
|
-
// Drops participant lists, membership counts, recursive provenance
|
|
34
|
-
//
|
|
34
|
+
// Drops participant lists, membership counts, recursive provenance (including
|
|
35
|
+
// the system origin's `triggeredBy`, which can carry channel author identity),
|
|
36
|
+
// and author identifiers — none of which `typeclaw usage` reads, and all of
|
|
35
37
|
// which would otherwise land in git history when sessions/ is auto-backed-up.
|
|
36
38
|
// Kept as a separate function so the boundary between "data the LLM sees in
|
|
37
39
|
// the system prompt" (full origin) and "data persisted for usage reporting"
|
|
@@ -58,5 +60,7 @@ function minimalOrigin(origin: SessionOrigin): MinimalSessionOrigin {
|
|
|
58
60
|
}
|
|
59
61
|
case 'subagent':
|
|
60
62
|
return { kind: 'subagent', subagent: origin.subagent, parentSessionId: origin.parentSessionId }
|
|
63
|
+
case 'system':
|
|
64
|
+
return { kind: 'system', component: origin.component }
|
|
61
65
|
}
|
|
62
66
|
}
|
|
@@ -48,6 +48,23 @@ export type SessionOrigin =
|
|
|
48
48
|
spawnedByRole?: string
|
|
49
49
|
spawnedByOrigin?: SessionOrigin
|
|
50
50
|
}
|
|
51
|
+
// Runtime-owned infrastructure operating over TypeClaw's own state (memory
|
|
52
|
+
// logging/retrieval, backup), NOT user-delegated work. It resolves to `owner`
|
|
53
|
+
// because it acts on the operator's behalf over operator-owned files, with no
|
|
54
|
+
// single user session to inherit authority from — inheriting the triggering
|
|
55
|
+
// turn's role (e.g. a guest channel turn) would wrongly classify TypeClaw
|
|
56
|
+
// infrastructure as the guest actor and block its legitimate sessions//memory/
|
|
57
|
+
// access. `triggeredBy` keeps honest provenance — "a guest turn triggered the
|
|
58
|
+
// memory-logger" — without the synthetic-TUI lie. This kind is only ever
|
|
59
|
+
// constructed by runtime/bundled code; inbound channel/cron content can never
|
|
60
|
+
// produce it (those origins come from the runtime, not from message text), so
|
|
61
|
+
// it is not a role-laundering vector.
|
|
62
|
+
| {
|
|
63
|
+
kind: 'system'
|
|
64
|
+
component: string
|
|
65
|
+
reason?: string
|
|
66
|
+
triggeredBy?: SessionOrigin
|
|
67
|
+
}
|
|
51
68
|
|
|
52
69
|
export const PARTICIPANTS_TOP_K = 10
|
|
53
70
|
export const PARTICIPANTS_MAX_AGE_MS = 7 * 24 * 60 * 60 * 1000
|
|
@@ -118,6 +135,8 @@ export function renderSessionOrigin(
|
|
|
118
135
|
return withRoleContext(renderChannelOrigin(origin, now), roleContext)
|
|
119
136
|
case 'subagent':
|
|
120
137
|
return withRoleContext(renderSubagentOrigin(origin), roleContext)
|
|
138
|
+
case 'system':
|
|
139
|
+
return withRoleContext(renderSystemOrigin(origin), roleContext)
|
|
121
140
|
}
|
|
122
141
|
}
|
|
123
142
|
|
|
@@ -167,6 +186,34 @@ function renderCronOrigin(origin: { jobId: string; jobKind: 'prompt' | 'exec' |
|
|
|
167
186
|
].join('\n')
|
|
168
187
|
}
|
|
169
188
|
|
|
189
|
+
function renderSystemOrigin(origin: { component: string; reason?: string; triggeredBy?: SessionOrigin }): string {
|
|
190
|
+
const lines = [
|
|
191
|
+
'## Session origin',
|
|
192
|
+
'',
|
|
193
|
+
`You are the \`${origin.component}\` system process — TypeClaw-owned`,
|
|
194
|
+
"infrastructure operating over the agent folder on the operator's behalf,",
|
|
195
|
+
'not a user-delegated task. Do exactly the job described and exit.',
|
|
196
|
+
]
|
|
197
|
+
if (origin.reason !== undefined) lines.push('', `Reason: ${origin.reason}`)
|
|
198
|
+
if (origin.triggeredBy !== undefined) lines.push('', `Triggered by: ${describeTrigger(origin.triggeredBy)}`)
|
|
199
|
+
return lines.join('\n')
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
function describeTrigger(origin: SessionOrigin): string {
|
|
203
|
+
switch (origin.kind) {
|
|
204
|
+
case 'tui':
|
|
205
|
+
return 'a TUI session'
|
|
206
|
+
case 'cron':
|
|
207
|
+
return `cron job \`${origin.jobId}\``
|
|
208
|
+
case 'channel':
|
|
209
|
+
return `a ${getPlatformInfo(origin.adapter).displayName} channel turn`
|
|
210
|
+
case 'subagent':
|
|
211
|
+
return `the \`${origin.subagent}\` subagent`
|
|
212
|
+
case 'system':
|
|
213
|
+
return `the \`${origin.component}\` system process`
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
170
217
|
function renderSubagentOrigin(origin: { subagent: string; parentSessionId: string }): string {
|
|
171
218
|
return [
|
|
172
219
|
'## Session origin',
|
|
@@ -244,10 +291,18 @@ function renderChannelOrigin(
|
|
|
244
291
|
' have no reason worth recording. Any other visible text without a',
|
|
245
292
|
' channel tool call is blocked.',
|
|
246
293
|
'',
|
|
294
|
+
'**Every user-facing sentence goes through `channel_reply`.** Narrating in',
|
|
295
|
+
'plain text — "bumping to 16x now", "let me check that" — does NOT reach the',
|
|
296
|
+
'user; it is invisible. If you want the user to see it, it is a',
|
|
297
|
+
'`channel_reply` call, not narration. This includes acks.',
|
|
298
|
+
'',
|
|
247
299
|
'**One substantive reply per inbound.** If the answer needs more than one',
|
|
248
|
-
'tool call, send a one-line ack first ("On it."
|
|
249
|
-
'
|
|
250
|
-
'is
|
|
300
|
+
'tool call, send a one-line ack first via `channel_reply({ text: "On it.",',
|
|
301
|
+
'continue: true })`, keep working, then send the answer with a final',
|
|
302
|
+
'`channel_reply`. The ack is not your reply; the answer is. Once the answer',
|
|
303
|
+
'lands, end your turn. The `continue: true` is not optional on that ack:',
|
|
304
|
+
'without it the turn ends the instant the ack lands and the rest of your',
|
|
305
|
+
'work — the fetch, the subagent, the actual answer — is silently dropped.',
|
|
251
306
|
'',
|
|
252
307
|
'**Backgrounded work does not end the obligation.** If you spawn a',
|
|
253
308
|
'subagent with `run_in_background: true` to answer the current inbound,',
|
package/src/agent/subagents.ts
CHANGED
|
@@ -455,7 +455,12 @@ function parseSpawnedByOriginJson(
|
|
|
455
455
|
return parsed
|
|
456
456
|
}
|
|
457
457
|
|
|
458
|
-
|
|
458
|
+
// Must list EVERY SessionOrigin discriminator. `system` is included so a
|
|
459
|
+
// streamed memory/backup spawn (whose spawnedByOrigin is serialized to JSON
|
|
460
|
+
// and re-parsed here) keeps its owner-resolving origin instead of being
|
|
461
|
+
// dropped and silently demoted to guest — the exact regression the system
|
|
462
|
+
// origin exists to prevent. Keep in sync with the SessionOrigin union.
|
|
463
|
+
const SESSION_ORIGIN_KINDS = new Set(['tui', 'cron', 'channel', 'subagent', 'system'])
|
|
459
464
|
function isSessionOriginShape(value: unknown): value is SessionOrigin {
|
|
460
465
|
if (value === null || typeof value !== 'object') return false
|
|
461
466
|
const kind = (value as { kind?: unknown }).kind
|
|
@@ -27,6 +27,7 @@ When in doubt between SOUL.md and AGENTS.md: if it describes *how you sound*, it
|
|
|
27
27
|
## Your workspace
|
|
28
28
|
|
|
29
29
|
- **\`workspace/\`** — your free-write zone for drafts, scratch work, generated artifacts. Do not create files at the agent-folder root unless the user explicitly asks.
|
|
30
|
+
- **\`public/\`** — the guest-visible zone. Untrusted callers (the \`guest\` role) cannot see \`workspace/\`, but they can read and write \`public/\`. Put anything meant to be shared with an untrusted caller here. If a \`<your-role>\` tag on the turn names a non-trusted role, or a write to \`workspace/\` comes back \`denied by permissions\`, the caller is untrusted — write to \`public/\` instead.
|
|
30
31
|
- **\`sessions/\`** — transcripts of past conversations. Runtime-managed; don't write here.
|
|
31
32
|
- **\`memory/streams/\`** *(not injected — reach via \`memory_search\`)* — dated streams written by the memory-logger between sessions. Runtime-owned. Undreamed observations are searchable on demand instead of injected into every prompt.
|
|
32
33
|
- **\`memory/skills/\`** — muscle-memory skills written by the dreaming subagent. Auto-loaded; don't write here directly.
|
|
@@ -45,6 +46,17 @@ When the user gives you work, start doing it in the same turn — a real action,
|
|
|
45
46
|
|
|
46
47
|
Do not narrate routine, low-risk tool calls. Just call the tool. Narrate only when it helps: multi-step work, risky actions (deletions, external sends, irreversible changes), or when the user asks.
|
|
47
48
|
|
|
49
|
+
## Long-running and interactive shell work
|
|
50
|
+
|
|
51
|
+
Foreground \`bash\` blocks your turn until exit, so a command that runs for minutes or waits for input (dev server, REPL, watcher, \`docker compose up\`, interactive installer) freezes the conversation. \`tmux\` is in the container — run such programs detached so your turn stays free:
|
|
52
|
+
|
|
53
|
+
- Start: \`tmux new-session -d -s <name> "<cmd>"\`
|
|
54
|
+
- Observe: \`tmux capture-pane -t <name> -p\` (poll across turns, don't block)
|
|
55
|
+
- Drive: \`tmux send-keys -t <name> "<input>" Enter\` (control keys too, e.g. \`C-c\`)
|
|
56
|
+
- Stop: \`tmux kill-session -t <name>\`
|
|
57
|
+
|
|
58
|
+
Use this only when the work belongs in *your* session. For self-contained long work (build, test suite, install, batch) whose result is all you need, delegate to \`operator\` instead.
|
|
59
|
+
|
|
48
60
|
## Version control
|
|
49
61
|
|
|
50
62
|
Your agent folder is a git repository.
|
|
@@ -64,35 +76,17 @@ Your agent folder is a git repository.
|
|
|
64
76
|
|
|
65
77
|
## Subagent orchestration
|
|
66
78
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
There are two delegation modes. Pick deliberately.
|
|
70
|
-
|
|
71
|
-
**Mode A — Research fan-out** (in service of the current question)
|
|
72
|
-
|
|
73
|
-
When you need information to answer the user and the search is broad, fire 2-5 subagents in parallel with \`run_in_background: true\` covering different angles. End your response after spawning. The system will deliver a \`<system-reminder>\` for each completion; then call \`subagent_output\` once per task_id to fetch the result and answer the user. \`subagent_output\` always returns immediately with a snapshot — it does not block.
|
|
74
|
-
|
|
75
|
-
The bundled \`explorer\` subagent is the right tool for **local** reconnaissance — anything reachable on the agent's filesystem: code, past sessions (\`sessions/*.jsonl\`), memory topic shards and daily memory streams, skills, cron jobs, config, git history, mounts, channels state. It is read-only and runs on a fast/cheap model, so fire liberally. Do NOT ask it to plan, decide, or write code — it finds and reports.
|
|
76
|
-
|
|
77
|
-
The bundled \`scout\` subagent is its external counterpart — web research only. Use it when you need information from public sources (docs, library references, vendor changelogs, news, anything not already in this agent's folder). Scout runs \`websearch\` and \`webfetch\` in a fresh context window so the search churn does not pollute yours; it returns a citation-backed answer with a confidence rating. Prefer scout over running \`websearch\`/\`webfetch\` yourself when the research is non-trivial (more than 1-2 queries) or when you want to save your context for the synthesis step.
|
|
79
|
+
Delegate focused work to subagents via \`spawn_subagent\`, \`subagent_output\`, \`subagent_cancel\`. Each runs in its own context window with its own tool set. The available subagents and their purpose are listed in the \`spawn_subagent\` tool description — re-read it before delegating. Briefly: \`explorer\` (read-only local recon — code, sessions, memory, git, config; fire liberally), \`scout\` (web research in a fresh context), \`reviewer\` (deep read-only code/PR/plan review, returns a structured verdict; it does NOT post), \`operator\` (write-capable: bash-with-side-effects, write, edit — for browser sessions, refactors, deploys, batch ops, and Claude Code / Codex CLI driving; gated by \`subagent.spawn.operator\`, owner/trusted only — on denial, do the work yourself).
|
|
78
80
|
|
|
79
|
-
|
|
81
|
+
There are three delegation modes. Pick deliberately.
|
|
80
82
|
|
|
81
|
-
**Mode
|
|
83
|
+
**Mode A — Research fan-out.** Need information and the search is broad? Fire 2-5 subagents (usually \`explorer\`/\`scout\`) in parallel with \`run_in_background: true\`, then end your response. A \`<system-reminder>\` lands per completion; call \`subagent_output\` once per task_id to collect (it never blocks) and answer.
|
|
82
84
|
|
|
83
|
-
|
|
85
|
+
**Mode B — Delegate-and-converse.** Asked to DO something long-running (>~30s: installs, builds, \`docker\`, scrapes, long test suites, multi-host loops, any noisy "fetch N and synthesize" chain)? Don't run it inline — blocking your own \`bash\` freezes the conversation and stalls the channel typing heartbeat (\`MAX_TYPING_HEARTBEAT_MS\`). Spawn one subagent (\`operator\` for side effects, \`scout\` for research) with \`run_in_background: true\`, acknowledge, and KEEP TALKING. Single fast calls (\`git status\`, one known-endpoint \`curl\`) stay inline. When the completion reminder lands, weave the result in; in a channel session, the completion \`<system-reminder>\` is NOT a user message but plain text is still invisible — Surface the result via \`channel_reply\` (or \`channel_send\`). If you already posted the substantive answer in the spawn turn, prefer \`skip_response({ reason: "result confirms prior reply" })\` over going silent.
|
|
84
86
|
|
|
85
|
-
**
|
|
87
|
+
**Mode C — Troubleshooting.** Stuck in a fix-it loop — ~3 non-converging attempts at the same failure, cycling kill/re-run/\`sleep\`/capture/retry? Stop (the trigger is non-convergence, not elapsed time — this overrides the ~30s rule). Hand the whole debugging loop to \`operator\` with \`run_in_background: true\` — symptom, what you tried, success condition — and stay responsive. Read the \`typeclaw-troubleshooting\` skill for the mechanics before you spawn.
|
|
86
88
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
Before you run a tool chain that returns bulky intermediate output you won't need again — multiple \`webfetch\` calls, a \`websearch\` round you'll iterate on, a \`bash\` command that scrapes a site or dumps a large response, an \`agent-browser\` session, a \`claude\` (Claude Code) or \`codex\` (OpenAI Codex CLI) delegation driven through tmux, any "fetch N things and synthesize" loop — delegate it to a subagent. \`scout\` (for research) or \`operator\` (for actions with side effects) runs the noisy work in its own context window and returns a distilled summary; your session carries the *answer*, not the raw material you derived it from. This is about context economy, not latency: even a fast operation belongs in a subagent when the byproducts are large and disposable (three quick news searches across different outlets still dumps three SERPs and three article bodies into your context forever). The exception is exactly one call whose result you'll cite directly — one \`webfetch\` of a known URL, one \`websearch\` query whose top result is the answer. Two of either, or any "across multiple sources" framing, is delegation territory.
|
|
90
|
-
|
|
91
|
-
The bundled \`operator\` subagent is the right tool for this mode. It is write-capable (read, write, edit, bash with side effects) and runs on the default model. Use it for: browser sessions, multi-file refactors, deploys, batch API calls, Claude Code or Codex CLI delegations (the tmux driving loop, the multi-turn polling, the worktree teardown — all of it inside operator), anything that involves taking action on behalf of the user over multiple steps. The operator returns a structured final report (outcome, what changed, what was observed); surface it naturally rather than copy-pasting. Operator is gated by a separate permission (\`subagent.spawn.operator\`) so write-capable spawns are restricted to owner-tier and trusted-tier callers — if the gate denies, fall back to doing the work in your own session rather than reporting failure to the user.
|
|
92
|
-
|
|
93
|
-
**Status queries**
|
|
94
|
-
|
|
95
|
-
If the user asks "how's it going?" or "status?" on a running subagent, call \`subagent_output({ task_id })\` and report the \`status_summary\` in your own words. Don't pretend to know the status without checking.
|
|
89
|
+
**Status queries.** If the user asks "status?" on a running subagent, call \`subagent_output({ task_id })\` and report its \`status_summary\` — don't guess.
|
|
96
90
|
|
|
97
91
|
**Prompt structure for spawns** (mandatory — the subagent does not see this conversation)
|
|
98
92
|
|
|
@@ -102,13 +96,7 @@ If the user asks "how's it going?" or "status?" on a running subagent, call \`su
|
|
|
102
96
|
[REQUEST]: Concrete instructions — what to find/do/produce, what format, what to SKIP.
|
|
103
97
|
\`\`\`
|
|
104
98
|
|
|
105
|
-
**Anti-patterns
|
|
106
|
-
|
|
107
|
-
- Don't fire more than 5 subagents in a single turn.
|
|
108
|
-
- Don't spawn for a known answer or single-file lookup — do it yourself.
|
|
109
|
-
- Don't call \`subagent_output\` in a loop waiting for completion; end your response and the reminder will wake you, then fetch the result once.
|
|
110
|
-
- Don't ask a research subagent to make architectural decisions for you — they find and report; you decide.
|
|
111
|
-
- Subagents cannot recursively spawn other subagents.
|
|
99
|
+
**Anti-patterns.** Don't fire more than 5 subagents per turn, spawn for a known answer or single-file lookup, poll \`subagent_output\` in a loop (end your turn; the reminder wakes you), or ask a research subagent to make decisions — they find and report, you decide. Subagents cannot recursively spawn subagents.
|
|
112
100
|
|
|
113
101
|
## Safety
|
|
114
102
|
|
|
@@ -167,6 +155,27 @@ export function renderTurnTimeAnchor(now: Date = new Date()): string {
|
|
|
167
155
|
return `<current-time>${iso} (${zone}, ${weekday})</current-time>`
|
|
168
156
|
}
|
|
169
157
|
|
|
158
|
+
// Live role anchor injected into the **user turn**, not the system prompt —
|
|
159
|
+
// same rationale and cache properties as renderTurnTimeAnchor above.
|
|
160
|
+
//
|
|
161
|
+
// The "## Your role in this session" block in the system prompt is a
|
|
162
|
+
// session-CREATION snapshot: in a channel where speakers change turn to turn,
|
|
163
|
+
// it reports the role of whoever first opened the session, not whoever is
|
|
164
|
+
// speaking now. Tool gating already re-resolves the live role per turn (the
|
|
165
|
+
// router updates `originRef` before each prompt), but the model never saw that
|
|
166
|
+
// value — so it could not, for example, route output to `public/` for a guest.
|
|
167
|
+
// This anchor surfaces the per-turn resolved role in the one place that costs
|
|
168
|
+
// zero cached bytes (the non-cacheable user-turn suffix).
|
|
169
|
+
//
|
|
170
|
+
// Omitted for `owner`: owner is the unconstrained default, an absent tag means
|
|
171
|
+
// "no special handling", and emitting it on every interactive turn would be
|
|
172
|
+
// pure token overhead. This mirrors resolveRoleContext skipping the session
|
|
173
|
+
// block for a TUI owner.
|
|
174
|
+
export function renderTurnRoleAnchor(role: string): string | undefined {
|
|
175
|
+
if (role === 'owner') return undefined
|
|
176
|
+
return `<your-role>${role}</your-role>`
|
|
177
|
+
}
|
|
178
|
+
|
|
170
179
|
// Compact replacement for DEFAULT_SYSTEM_PROMPT, used by non-interactive
|
|
171
180
|
// sessions (cron jobs, and default subagents that don't supply their own
|
|
172
181
|
// `systemPromptOverride`). The full prompt is ~2155 tokens of operator-facing
|
|
@@ -207,6 +216,6 @@ Never suppress errors to make things "work", and never fabricate results. If som
|
|
|
207
216
|
|
|
208
217
|
Do not narrate routine, low-risk tool calls — just call the tool. Do not over-explain what you did unless asked.
|
|
209
218
|
|
|
210
|
-
Your free-write zone is \`workspace/\`. Do not create files at the root of the agent folder unless the prompt names another path. Do not edit \`memory/topics/\` directly — the dreaming subagent owns it; to capture something memorable, surface it in your reply or let the memory-logger append to \`memory/streams/\`. Never stage or commit \`secrets.json\`, \`.env\`, \`sessions/\`, \`memory/\`, or \`workspace/\` — those are runtime- or user-managed.
|
|
219
|
+
Your free-write zone is \`workspace/\`. Do not create files at the root of the agent folder unless the prompt names another path. \`public/\` is the guest-visible zone — write there anything meant to be shared with an untrusted caller (a \`guest\`-role turn cannot read \`workspace/\` but can read \`public/\`). Do not edit \`memory/topics/\` directly — the dreaming subagent owns it; to capture something memorable, surface it in your reply or let the memory-logger append to \`memory/streams/\`. Never stage or commit \`secrets.json\`, \`.env\`, \`sessions/\`, \`memory/\`, or \`workspace/\` — those are runtime- or user-managed.
|
|
211
220
|
|
|
212
221
|
See the session-origin block below for what kind of session this is and what's expected of you.`
|
|
@@ -71,11 +71,21 @@ export function createChannelReplyTool({
|
|
|
71
71
|
},
|
|
72
72
|
),
|
|
73
73
|
),
|
|
74
|
+
continue: Type.Optional(
|
|
75
|
+
Type.Boolean({
|
|
76
|
+
description:
|
|
77
|
+
'Set `true` when this reply is a mid-turn status update (e.g. "working on it…") and you still have work to do THIS turn — fetching data, running a tool, spawning a subagent, then replying again. ' +
|
|
78
|
+
'Omitting it on such an ack silently truncates the turn: a successful reply ends the turn by default, so the fetch/subagent/answer you intended to do next never runs. ' +
|
|
79
|
+
'A normal final reply omits this (no wasted follow-up LLM call). ' +
|
|
80
|
+
'Do not set it just to seem responsive; only when genuine multi-step work follows in the same turn.',
|
|
81
|
+
}),
|
|
82
|
+
),
|
|
74
83
|
}),
|
|
75
84
|
|
|
76
85
|
async execute(_toolCallId, params) {
|
|
77
86
|
const text = params.text
|
|
78
87
|
const attachments = params.attachments
|
|
88
|
+
const keepTurnAlive = params.continue === true
|
|
79
89
|
if ((text === undefined || text === '') && (attachments === undefined || attachments.length === 0)) {
|
|
80
90
|
logger.warn(formatChannelToolFailure('channel_reply', 'missing text and attachments'))
|
|
81
91
|
return {
|
|
@@ -130,7 +140,14 @@ export function createChannelReplyTool({
|
|
|
130
140
|
),
|
|
131
141
|
)
|
|
132
142
|
}
|
|
133
|
-
|
|
143
|
+
// `continue` is read by the router's terminal hook (installChannelReplyTerminalHook),
|
|
144
|
+
// not by this tool — it suppresses the post-reply abort so a multi-step turn
|
|
145
|
+
// keeps going. Success-only: a denied reply never ran, so there is no turn to keep.
|
|
146
|
+
const details: { ok: boolean; error?: string; continue?: boolean } = result.ok
|
|
147
|
+
? keepTurnAlive
|
|
148
|
+
? { ok: true, continue: true }
|
|
149
|
+
: { ok: true }
|
|
150
|
+
: { ok: false, error: result.error }
|
|
134
151
|
// Echo the delivered text back to the model. The adapter classifier
|
|
135
152
|
// drops self-authored messages on the inbound path (`self_author`),
|
|
136
153
|
// so the bot otherwise has ZERO visibility into what it just said —
|
|
@@ -20,12 +20,14 @@ const AGENT_ROOT_WRITE_ALLOWLIST = new Set([
|
|
|
20
20
|
'typeclaw.json',
|
|
21
21
|
])
|
|
22
22
|
|
|
23
|
-
//
|
|
24
|
-
// src/init/index.ts#DIRECTORIES)
|
|
25
|
-
//
|
|
26
|
-
//
|
|
27
|
-
//
|
|
28
|
-
|
|
23
|
+
// All scaffolded write zones outside `workspace/` (see
|
|
24
|
+
// src/init/index.ts#DIRECTORIES) that the agent may write into without
|
|
25
|
+
// acknowledging the guard. `packages/` holds reusable systems and custom
|
|
26
|
+
// typeclaw plugins as standalone packages; `public/` is the guest-visible
|
|
27
|
+
// zone for anything intended to be shared out. Both are deliberate write
|
|
28
|
+
// targets, same as `workspace/`, so an unacknowledged write is expected, not
|
|
29
|
+
// suspicious.
|
|
30
|
+
const AGENT_ROOT_DIRECTORY_ALLOWLIST = new Set(['mounts', 'packages', 'public'])
|
|
29
31
|
|
|
30
32
|
export async function checkNonWorkspaceWriteGuard(options: {
|
|
31
33
|
tool: string
|
|
@@ -6,7 +6,7 @@ import { CronExpressionParser } from 'cron-parser'
|
|
|
6
6
|
import { z } from 'zod'
|
|
7
7
|
|
|
8
8
|
import type { SessionOrigin } from '@/agent/session-origin'
|
|
9
|
-
import { definePlugin } from '@/plugin'
|
|
9
|
+
import { definePlugin, type SpawnSubagentOptions } from '@/plugin'
|
|
10
10
|
import { formatLocalDate } from '@/shared'
|
|
11
11
|
|
|
12
12
|
import { createDreamingSubagent, type DreamingPayload } from './dreaming'
|
|
@@ -205,9 +205,20 @@ export default definePlugin({
|
|
|
205
205
|
...(last.origin !== undefined ? { origin: last.origin } : {}),
|
|
206
206
|
...(streamLineCursor !== undefined ? { streamLineCursor } : {}),
|
|
207
207
|
}
|
|
208
|
-
|
|
208
|
+
// Execution authority is `system` (resolves to owner), NOT the
|
|
209
|
+
// triggering turn's role: memory-logging is TypeClaw infrastructure over
|
|
210
|
+
// operator-owned sessions//memory/, so a guest channel turn that triggers
|
|
211
|
+
// it must not demote the logger to guest and get its transcript read
|
|
212
|
+
// blocked by privateSurfaceRead. The triggering origin is preserved two
|
|
213
|
+
// ways: `triggeredBy` for audit provenance, and `payload.origin` for
|
|
214
|
+
// content provenance (memory extraction/retrieval channel-safety).
|
|
215
|
+
const spawnOptions: SpawnSubagentOptions = {
|
|
209
216
|
parentSessionId: sessionId,
|
|
210
|
-
|
|
217
|
+
spawnedByOrigin: {
|
|
218
|
+
kind: 'system',
|
|
219
|
+
component: 'memory-logger',
|
|
220
|
+
...(last.origin !== undefined ? { triggeredBy: last.origin } : {}),
|
|
221
|
+
},
|
|
211
222
|
}
|
|
212
223
|
const next = spawnChain
|
|
213
224
|
.catch(() => undefined)
|
|
@@ -280,10 +291,18 @@ export default definePlugin({
|
|
|
280
291
|
cacheFilePath,
|
|
281
292
|
...(event.origin !== undefined ? { origin: event.origin } : {}),
|
|
282
293
|
}
|
|
283
|
-
|
|
294
|
+
// System authority, not the triggering turn's role — see the
|
|
295
|
+
// memory-logger spawn above. memory-retrieval writes
|
|
296
|
+
// memory/.retrieval-cache/, which a guest-demoted role cannot.
|
|
297
|
+
const retrievalSpawnOptions: SpawnSubagentOptions = {
|
|
284
298
|
parentSessionId: event.sessionId,
|
|
285
|
-
|
|
286
|
-
|
|
299
|
+
spawnedByOrigin: {
|
|
300
|
+
kind: 'system',
|
|
301
|
+
component: 'memory-retrieval',
|
|
302
|
+
...(event.origin !== undefined ? { triggeredBy: event.origin } : {}),
|
|
303
|
+
},
|
|
304
|
+
}
|
|
305
|
+
await ctx.spawnSubagent('memory-retrieval', payload, retrievalSpawnOptions)
|
|
287
306
|
}
|
|
288
307
|
|
|
289
308
|
// Subagents are constructed at boot here (rather than imported as constants)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { definePlugin } from '@/plugin'
|
|
2
|
+
import { resolveHiddenPaths } from '@/sandbox'
|
|
2
3
|
|
|
3
4
|
import { HIGH_TIER_PER_GUARD_PERMISSIONS, SECURITY_PERMISSIONS, SEVERITY_PERMISSION } from './permissions'
|
|
4
5
|
import type { SecurityPermission, SecuritySeverity } from './permissions'
|
|
@@ -11,6 +12,7 @@ import {
|
|
|
11
12
|
recordGitRemoteTaintIfAny,
|
|
12
13
|
} from './policies/git-exfil'
|
|
13
14
|
import { GUARD_OUTBOUND_SECRET_SEVERITY, checkOutboundSecretGuard } from './policies/outbound-secret-scan'
|
|
15
|
+
import { checkPrivateSurfaceReadGuard } from './policies/private-surface-read'
|
|
14
16
|
import { applyPromptInjectionDefense } from './policies/prompt-injection'
|
|
15
17
|
import { clearSessionTaints } from './policies/remote-taint-state'
|
|
16
18
|
import { GUARD_ROLE_PROMOTION_SEVERITY, checkRolePromotionGuard } from './policies/role-promotion'
|
|
@@ -161,6 +163,16 @@ export default definePlugin({
|
|
|
161
163
|
SECURITY_PERMISSIONS.bypassSecretExfilRead,
|
|
162
164
|
GUARD_SECRET_EXFIL_READ_SEVERITY,
|
|
163
165
|
),
|
|
166
|
+
// Role-derived, not severity-bypassed: resolveHiddenPaths already
|
|
167
|
+
// returns an empty deny-list for roles that may see the surface, so
|
|
168
|
+
// there is no canBypass wrapper. Mirrors the bash sandbox masks onto
|
|
169
|
+
// the non-bash read/grep/find/ls/edit/write builtins.
|
|
170
|
+
checkPrivateSurfaceReadGuard({
|
|
171
|
+
tool: event.tool,
|
|
172
|
+
args: event.args,
|
|
173
|
+
agentDir: ctx.agentDir,
|
|
174
|
+
hidden: resolveHiddenPaths(ctx.permissions, event.origin, ctx.agentDir),
|
|
175
|
+
}),
|
|
164
176
|
canBypass(GUARD_SSRF_SEVERITY, SECURITY_PERMISSIONS.bypassSsrf)
|
|
165
177
|
? undefined
|
|
166
178
|
: withPermissionHint(
|