typeclaw 0.15.2 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -137,7 +137,12 @@ function* assistantEvents(
137
137
  }
138
138
  }
139
139
  if (typeof message.errorMessage === 'string' && message.errorMessage !== '') {
140
- yield { cat: 'error', ts, message: message.errorMessage }
140
+ yield {
141
+ cat: 'error',
142
+ ts,
143
+ message: message.errorMessage,
144
+ ...(typeof message.stopReason === 'string' ? { stopReason: message.stopReason } : {}),
145
+ }
141
146
  }
142
147
  const usage = readUsage(message.usage)
143
148
  if (usage !== null && (usage.totalTokens > 0 || typeof message.stopReason === 'string')) {
@@ -37,7 +37,10 @@ export type InspectEvent =
37
37
  isError?: boolean
38
38
  durationMs?: number
39
39
  }
40
- | { cat: 'error'; ts: number; message: string }
40
+ // `stopReason` is the upstream-reported reason for the failed/aborted turn
41
+ // (e.g. 'error', 'aborted'). An 'aborted' stopReason is a user cancel, not a
42
+ // provider failure, and is rendered distinctly (see render.ts).
43
+ | { cat: 'error'; ts: number; message: string; stopReason?: string }
41
44
  | {
42
45
  cat: 'done'
43
46
  ts: number
@@ -16,6 +16,13 @@ export const CORE_PERMISSIONS = {
16
16
  subagentCancel: 'subagent.cancel',
17
17
  subagentOutput: 'subagent.output',
18
18
  subagentSpawnOperator: 'subagent.spawn.operator',
19
+ // Phrased as capabilities to SEE, not to hide, so the role tower stays
20
+ // monotonic (a higher tier sees a strict superset of a lower tier) and the
21
+ // empty-permission guest is the fail-safe floor. resolveHiddenPaths masks
22
+ // whatever the resolved role lacks: fsSeePrivate gates workspace/+memory/+
23
+ // sessions/, fsSeeSecrets gates .env+secrets.json.
24
+ fsSeePrivate: 'fs.see.private',
25
+ fsSeeSecrets: 'fs.see.secrets',
19
26
  } as const
20
27
 
21
28
  // Sentinel that `expandOwnerWildcard` swaps for the concrete union of
@@ -61,6 +68,8 @@ export const BUILTIN_ROLES: Readonly<Record<BuiltinRoleName, BuiltinRoleSpec>> =
61
68
  CORE_PERMISSIONS.subagentCancel,
62
69
  CORE_PERMISSIONS.subagentOutput,
63
70
  CORE_PERMISSIONS.subagentSpawnOperator,
71
+ CORE_PERMISSIONS.fsSeePrivate,
72
+ CORE_PERMISSIONS.fsSeeSecrets,
64
73
  'security.bypass.low',
65
74
  'security.bypass.medium',
66
75
  'security.bypass.high',
@@ -76,6 +85,8 @@ export const BUILTIN_ROLES: Readonly<Record<BuiltinRoleName, BuiltinRoleSpec>> =
76
85
  CORE_PERMISSIONS.subagentCancel,
77
86
  CORE_PERMISSIONS.subagentOutput,
78
87
  CORE_PERMISSIONS.subagentSpawnOperator,
88
+ CORE_PERMISSIONS.fsSeePrivate,
89
+ CORE_PERMISSIONS.fsSeeSecrets,
79
90
  'security.bypass.low',
80
91
  'security.bypass.medium',
81
92
  ],
@@ -87,6 +98,7 @@ export const BUILTIN_ROLES: Readonly<Record<BuiltinRoleName, BuiltinRoleSpec>> =
87
98
  CORE_PERMISSIONS.subagentSpawn,
88
99
  CORE_PERMISSIONS.subagentCancel,
89
100
  CORE_PERMISSIONS.subagentOutput,
101
+ CORE_PERMISSIONS.fsSeePrivate,
90
102
  'security.bypass.low',
91
103
  ],
92
104
  },
@@ -110,6 +110,13 @@ export function createPermissionService(opts: CreatePermissionServiceOptions = {
110
110
  function resolveRole(origin: SessionOrigin | undefined): string {
111
111
  if (origin === undefined) return 'guest'
112
112
 
113
+ // Runtime-owned infrastructure (memory, backup) acts on the operator's
114
+ // behalf over operator-owned state. It is constructed only by runtime/
115
+ // bundled code — inbound channel/cron content cannot produce this kind —
116
+ // so resolving it to owner is not a laundering vector. See the `system`
117
+ // origin doc in session-origin.ts.
118
+ if (origin.kind === 'system') return 'owner'
119
+
113
120
  if (origin.kind === 'cron') {
114
121
  const role = origin.scheduledByRole
115
122
  if (role !== undefined && byName.has(role)) return role
@@ -250,6 +250,18 @@ export type SpawnSubagentOptions = {
250
250
  // `spawnedByOrigin: event.origin`. The runtime resolves `spawnedByRole`
251
251
  // from the origin via the PermissionService, so the spawning session's
252
252
  // role is inherited rather than forged from outside.
253
+ //
254
+ // TRUST MODEL: `spawnedByOrigin` accepts any SessionOrigin, including
255
+ // `{ kind: 'system' }`, which resolves to owner. That is intentional and
256
+ // not an escalation path: a plugin is a full-trust, in-process module with
257
+ // no sandbox (see AGENTS.md / docs/internals/skills) — it can already do
258
+ // anything the runtime can, so minting a system origin grants it nothing it
259
+ // lacks. The anti-forgery guarantee this API preserves is narrower and
260
+ // unaffected: inbound channel/cron CONTENT can never reach owner, because
261
+ // those origins are constructed by the runtime from the transport, never
262
+ // from message text, and a content-driven turn cannot produce a `system`
263
+ // origin. Bundled infra (memory, backup) uses `system` to act on the
264
+ // operator's own state; third-party plugins should pass `event.origin`.
253
265
  parentSessionId?: string
254
266
  spawnedByOrigin?: SessionOrigin
255
267
  }
@@ -13,6 +13,11 @@ export type SandboxedCommand = {
13
13
  commandString: string
14
14
  }
15
15
 
16
+ // Fixed fd the rendered commandString opens to /dev/null for --ro-bind-data
17
+ // file masks. 3 is the first fd above stdio; the bash tool's spawn does not
18
+ // inherit it, so the redirect is part of the command string itself.
19
+ const MASK_DATA_FD = 3
20
+
16
21
  // Pure: no I/O, no bwrap availability probe (that is `ensureBwrapAvailable`'s
17
22
  // job). Given a bash command and a policy, returns the bwrap-wrapped argv plus
18
23
  // a shell-quoted rendering of it. Knows nothing about subagents, origins, or
@@ -24,7 +29,9 @@ export function buildSandboxedCommand(command: string, policy: SandboxPolicy = {
24
29
  applyCommandFilter(command, policy.commandFilter)
25
30
  }
26
31
  const argv = buildArgv(command, policy)
27
- return { argv, commandString: formatCommand(argv) }
32
+ const needsMaskFd = (policy.masks?.files?.length ?? 0) > 0
33
+ const commandString = needsMaskFd ? `${formatCommand(argv)} ${MASK_DATA_FD}</dev/null` : formatCommand(argv)
34
+ return { argv, commandString }
28
35
  }
29
36
 
30
37
  function buildArgv(command: string, policy: SandboxPolicy): string[] {
@@ -70,6 +77,8 @@ function buildArgv(command: string, policy: SandboxPolicy): string[] {
70
77
  appendMount(argv, mount)
71
78
  }
72
79
 
80
+ appendMasks(argv, policy)
81
+
73
82
  if (policy.cwd !== undefined) {
74
83
  argv.push('--chdir', policy.cwd)
75
84
  }
@@ -78,6 +87,15 @@ function buildArgv(command: string, policy: SandboxPolicy): string[] {
78
87
  return argv
79
88
  }
80
89
 
90
+ function appendMasks(argv: string[], policy: SandboxPolicy): void {
91
+ for (const dir of policy.masks?.dirs ?? []) {
92
+ argv.push('--tmpfs', dir)
93
+ }
94
+ for (const file of policy.masks?.files ?? []) {
95
+ argv.push('--ro-bind-data', String(MASK_DATA_FD), file)
96
+ }
97
+ }
98
+
81
99
  function appendMount(argv: string[], mount: SandboxMount): void {
82
100
  switch (mount.type) {
83
101
  case 'ro-bind':
@@ -0,0 +1,41 @@
1
+ import { join } from 'node:path'
2
+
3
+ import type { SessionOrigin } from '@/agent/session-origin'
4
+ import { CORE_PERMISSIONS } from '@/permissions/builtins'
5
+ import type { PermissionService } from '@/permissions/permissions'
6
+
7
+ export type HiddenPaths = {
8
+ dirs: string[]
9
+ files: string[]
10
+ }
11
+
12
+ const PRIVATE_DIRS = ['workspace', 'memory', 'sessions'] as const
13
+ const SECRET_FILES = ['.env', 'secrets.json'] as const
14
+
15
+ // The agent's private working surface and credential files are masked from
16
+ // sandboxed bash unless the resolved role carries the matching fs.see.* grant.
17
+ // `permissions.has` resolves the role from the live origin and fails safe to
18
+ // guest (empty permissions) for an unclear/undefined origin, so a missing
19
+ // grant — whether from a low tier or an unresolvable author — hides the path.
20
+ //
21
+ // The security.bypass.* fallback keeps custom roles (which may never name the
22
+ // fs.see.* strings) working by capability: a role trusted enough to bypass
23
+ // medium-severity guards is treated as trusted for filesystem visibility, and
24
+ // bypass.low maps to the private-surface tier. fs.see.* always wins when
25
+ // present; the fallback only fires when it is absent.
26
+ export function resolveHiddenPaths(
27
+ permissions: PermissionService,
28
+ origin: SessionOrigin | undefined,
29
+ agentDir: string,
30
+ ): HiddenPaths {
31
+ const seesPrivate =
32
+ permissions.has(origin, CORE_PERMISSIONS.fsSeePrivate) ||
33
+ permissions.has(origin, 'security.bypass.low') ||
34
+ permissions.has(origin, 'security.bypass.medium')
35
+ const seesSecrets =
36
+ permissions.has(origin, CORE_PERMISSIONS.fsSeeSecrets) || permissions.has(origin, 'security.bypass.medium')
37
+
38
+ const dirs = seesPrivate ? [] : PRIVATE_DIRS.map((d) => join(agentDir, d))
39
+ const files = seesSecrets ? [] : SECRET_FILES.map((f) => join(agentDir, f))
40
+ return { dirs, files }
41
+ }
@@ -1,5 +1,6 @@
1
1
  export { buildSandboxedCommand, type SandboxedCommand } from './build'
2
- export { ensureBwrapAvailable } from './availability'
2
+ export { ensureBwrapAvailable, _resetBwrapAvailabilityCacheForTests } from './availability'
3
+ export { resolveHiddenPaths, type HiddenPaths } from './hidden-paths'
3
4
  export { formatCommand, shellQuote } from './quote'
4
5
  export { SandboxPolicyError, SandboxUnavailableError } from './errors'
5
6
  export {
@@ -23,10 +23,25 @@ export type SandboxProcessPolicy = {
23
23
  dieWithParent?: boolean
24
24
  }
25
25
 
26
+ // Role-derived deny-list overlaid on top of an already-visible tree. dirs are
27
+ // hidden with an empty tmpfs; files are hidden with --ro-bind-data, the only
28
+ // bwrap primitive that masks a single FILE (--tmpfs is dir-only). --ro-bind-data
29
+ // reads its empty content from a file descriptor, and the bash tool spawns with
30
+ // stdio ["ignore","pipe","pipe"] — no inherited extra fds — so the rendered
31
+ // commandString self-opens fd MASK_DATA_FD via a `<fd>< /dev/null` redirection
32
+ // appended after `bash -c <command>`. Masks MUST render after the broad parent
33
+ // mounts: bwrap applies mount ops in command-line order and the last op on a
34
+ // path wins, so a mask emitted before its parent bind would be re-exposed.
35
+ export type SandboxMaskPolicy = {
36
+ dirs?: string[]
37
+ files?: string[]
38
+ }
39
+
26
40
  export type SandboxPolicy = {
27
41
  bwrapPath?: string
28
42
  cwd?: string
29
43
  mounts?: SandboxMount[]
44
+ masks?: SandboxMaskPolicy
30
45
  network?: SandboxNetwork
31
46
  env?: SandboxEnvPolicy
32
47
  commandFilter?: SandboxCommandFilter
@@ -0,0 +1,104 @@
1
+ ---
2
+ name: typeclaw-troubleshooting
3
+ description: Use this skill when you are stuck in a fix-it loop — you've made roughly three attempts at the same failure and you're still cycling shell commands (kill the process, re-run, `sleep`, `capture-pane`, inspect, retry) without converging. Triggers include a hung or runaway process that won't die, a `C-c` that didn't stop the program, `<defunct>`/zombie processes piling up in `ps`, an interactive program that blocks `bash` waiting for input, a script that "ran" but produced no output and no file, repeated "not found"/timeout/same-error-again loops, and any moment you catch yourself thinking "let me wait a bit more and check again" for the third time. Read it before you spawn `operator` to take over the debugging — it covers the operator hand-off prompt, the tmux session pattern, killing stuck/zombie processes properly, and the edge-triggered capture-pane polling loop that the inline retry-and-sleep approach gets wrong.
4
+ ---
5
+
6
+ # typeclaw-troubleshooting
7
+
8
+ When a problem fights back, the failure mode is not "I can't fix it" — it's "I'm burning my own context and freezing the conversation while I fix it." A debugging loop is inherently noisy: every retry dumps stale shell output, zombie-process listings, and pane captures into your context, and each blocking `bash` call (especially `sleep N` followed by a capture) leaves the user staring at a frozen-looking conversation. The fix is to move the loop out of your session and into `operator`, which has bash-with-side-effects and runs in its own context window.
9
+
10
+ This skill is the runbook for that hand-off. Read it once you've hit the trigger (~3 attempts on the same failure without convergence), **before** you spawn `operator`.
11
+
12
+ ## The trigger, concretely
13
+
14
+ You are in a troubleshooting loop when all of these are true:
15
+
16
+ - You've attempted the **same underlying fix** ~3 times and it still fails.
17
+ - Your recent turns are dominated by `bash` calls whose only purpose is to probe/retry: `kill`, `sleep`, `tmux capture-pane`, `ps aux | grep`, re-running the same script, "let me wait and check again".
18
+ - Each attempt produces more disposable output than signal.
19
+
20
+ If you're still making real progress (each attempt narrows the problem), keep going — this is for the _non-converging_ case. One or two quick probes stay inline; a third lap means delegate.
21
+
22
+ ## Why inline retry-and-sleep is the wrong tool
23
+
24
+ Two failure patterns show up over and over when an agent debugs inline, and both are why this belongs in operator:
25
+
26
+ 1. **`sleep N; capture-pane` blocks you for N seconds at a time.** You can't reply, the typing indicator can't heartbeat, and you still don't know if the work finished — you just guessed at a duration. Operator absorbs all of that latency in its own session.
27
+ 2. **A `C-c` sent to a tmux pane does not always kill the program.** If the foreground process is in a tight loop (e.g. a `while True:` with `pyboy.tick()`), the interrupt may be queued behind work and never processed, so the _next_ command you type lands in the shell while the old process is still running — and you end up reading output from the wrong process. The reliable kill is by PID, not by keystroke (see below).
28
+
29
+ ## The hand-off: spawn operator in background
30
+
31
+ Spawn `operator` with `run_in_background: true` so your session stays free, then keep talking to the user. Give operator everything it needs — it does **not** see this conversation, and it does **not** see this skill. Operator runs on a fixed tool set (`read`, `grep`, `find`, `ls`, `bash`, `write`, `edit`) with no skill loading, so any mechanic below that you want it to follow has to be spelled out in the `[REQUEST]` block — don't assume it knows the tmux/PID/polling patterns:
32
+
33
+ ```
34
+ [CONTEXT]: <what you were doing, the file/process/command involved, the environment>
35
+ [SYMPTOM]: <the exact failure — error text, "process won't die", "script ran but wrote no file", paste the relevant output>
36
+ [ALREADY TRIED]: <each attempt and what happened, so operator doesn't repeat your dead ends>
37
+ [SUCCESS CONDITION]: <something operator can verify with bash alone — "screenshot_now.png exists and is larger than 1KB", "the dev server answers 200 on :3000", "pgrep -f repro.py returns nothing">
38
+ [CONSTRAINTS]: <don't touch X, the relevant tmux session is named Y, the workdir is Z>
39
+ [REQUEST]: Drive the diagnose → fix → verify loop. Use a tmux session for any hung or interactive process so it can't block you (start detached, kill stuck processes by PID not C-c, poll on the success condition not a fixed sleep). Return root cause, what you changed, and whether the success condition is met.
40
+ ```
41
+
42
+ State the success condition as something operator can check with `bash` — file exists and is non-trivially sized, a port answers, a process is gone. Operator has **no vision tools** (`look_at` is yours, not its), so "the screenshot looks right" is **not** a condition operator can verify. If the fix ultimately needs a visual check, have operator confirm the file is written and reasonably sized, then **you** call `look_at` on it after operator reports back — that final eyeball stays in your session.
43
+
44
+ Then stay responsive. When the completion `<system-reminder>` lands, weave operator's report into your next reply (in a channel session, surface it via `channel_reply`/`channel_send` — plain text is invisible there).
45
+
46
+ If the `subagent.spawn.operator` gate denies (you're not owner/trusted tier), you can't delegate — fall back to doing the loop yourself, but apply the mechanics below to do it cleanly.
47
+
48
+ ## Mechanics operator should use (and you, if you can't delegate)
49
+
50
+ ### Run hung/interactive processes in a dedicated tmux session
51
+
52
+ ```sh
53
+ tmux new-session -d -s fix-<short-id> -c /agent/workspace
54
+ tmux send-keys -t fix-<short-id> "python3 repro.py" Enter
55
+ ```
56
+
57
+ A detached session means a process that hangs or waits for input never blocks the driver. Name it for the task (`fix-<id>`) so it's easy to find and tear down.
58
+
59
+ ### Observe without blocking — edge-triggered, not `sleep`-and-guess
60
+
61
+ Don't `sleep N; capture-pane` and hope. Capture the pane, react to what's actually there, and key the next step off a real signal (a file appearing, a process exiting, a prompt string showing up):
62
+
63
+ ```sh
64
+ tmux capture-pane -t fix-<id> -p -S - # -p print, -S - full scrollback
65
+ ls -la /agent/workspace/expected-output.png # the real done-signal
66
+ pgrep -af repro.py # is it still running?
67
+ ```
68
+
69
+ Loop on the **success condition** (output file exists, port answers, process gone), not on a fixed sleep. If you must wait, poll in short intervals and re-check the signal each time rather than sleeping for one long guess.
70
+
71
+ ### Kill stuck processes by PID, not by keystroke
72
+
73
+ `tmux send-keys ... C-c` is unreliable against a tight loop. Find the real PID and signal it:
74
+
75
+ ```sh
76
+ pgrep -af repro.py # find the actual PID(s)
77
+ kill <pid> # SIGTERM first
78
+ sleep 1; pgrep -af repro.py # still there?
79
+ kill -9 <pid> # SIGKILL if it ignored SIGTERM
80
+ ```
81
+
82
+ ### Reap zombies and confirm the field is clear
83
+
84
+ `<defunct>` entries in `ps` are zombies — already dead, waiting for their parent to reap them. They are not your hung process; chasing them wastes turns. Filter them out and confirm the _live_ process is gone before re-running:
85
+
86
+ ```sh
87
+ ps aux | grep -i repro | grep -v grep | grep -v defunct
88
+ ```
89
+
90
+ If the only matches are `<defunct>`, the process is already dead — re-running is safe. If a live PID remains, the previous `C-c` didn't work; kill it by PID (above) before the next attempt.
91
+
92
+ ### Tear down when done
93
+
94
+ ```sh
95
+ tmux kill-session -t fix-<id> 2>/dev/null || true
96
+ ```
97
+
98
+ Don't leave orphaned sessions running between attempts — a stale session is how you end up sending input to the wrong process.
99
+
100
+ ## What operator returns, and what you do with it
101
+
102
+ Operator's final report should give you: **root cause**, **what it changed**, and **whether the success condition is met**. Surface that to the user in your own words — don't paste the raw debugging transcript; the whole point was to keep that noise out of the conversation. If operator couldn't resolve it, relay the outcome plus the partial progress (what's now known, what's still failing) so the user can decide the next move.
103
+
104
+ Bound the loop so it can't spin as badly as the inline version would have. Tell operator in the `[REQUEST]` that if a handful of diagnose-fix-verify rounds (≈5) haven't met the success condition, it should stop and report what it found rather than keep retrying — a non-converging operator loop wastes the same tokens you delegated to avoid, it just wastes them out of sight. When that bounded-failure report comes back, bring the user in: relay the partial progress and ask how to proceed instead of immediately re-spawning operator on the same dead end.
@@ -140,6 +140,8 @@ function renderOriginLabel(kind: OriginKind, ctx: RenderCtx): string {
140
140
  return `${color('green', '#', ctx)} ${'channel'}`
141
141
  case 'subagent':
142
142
  return `${color('yellow', '↳', ctx)} ${'subagent'}`
143
+ case 'system':
144
+ return `${color('blue', '⚙', ctx)} ${'system'}`
143
145
  case 'unknown':
144
146
  return `${dim('?', ctx)} ${dim('unknown', ctx)}`
145
147
  }
@@ -211,6 +213,8 @@ function originGlyphOnly(kind: OriginKind, ctx: RenderCtx): string {
211
213
  return color('green', '#', ctx)
212
214
  case 'subagent':
213
215
  return color('yellow', '↳', ctx)
216
+ case 'system':
217
+ return color('blue', '⚙', ctx)
214
218
  case 'unknown':
215
219
  return dim('?', ctx)
216
220
  }
package/src/usage/scan.ts CHANGED
@@ -6,7 +6,7 @@ import { join } from 'node:path'
6
6
  // before origin stamping landed AND sessions whose session-meta line is
7
7
  // malformed or missing — surfacing them under one explicit label is more
8
8
  // honest than silently dropping them.
9
- export const ORIGIN_KINDS = ['tui', 'cron', 'channel', 'subagent', 'unknown'] as const
9
+ export const ORIGIN_KINDS = ['tui', 'cron', 'channel', 'subagent', 'system', 'unknown'] as const
10
10
  export type OriginKind = (typeof ORIGIN_KINDS)[number]
11
11
 
12
12
  // Narrow projection: session files can grow into tens of MB on long-lived