npm - typeclaw - Versions diffs - 0.15.2 → 0.16.0 - Mend

typeclaw 0.15.2 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/package.json +1 -1
package/src/agent/index.ts +3 -1
package/src/agent/plugin-tools.ts +38 -0
package/src/agent/session-meta.ts +6 -2
package/src/agent/session-origin.ts +58 -3
package/src/agent/subagents.ts +6 -1
package/src/agent/system-prompt.ts +41 -32
package/src/agent/tools/channel-reply.ts +3 -2
package/src/bundled-plugins/guard/policies/non-workspace-write.ts +8 -6
package/src/bundled-plugins/memory/index.ts +25 -6
package/src/bundled-plugins/security/index.ts +12 -0
package/src/bundled-plugins/security/policies/private-surface-read.ts +215 -0
package/src/channels/adapters/github/inbound.ts +54 -1
package/src/channels/adapters/github/index.ts +1 -0
package/src/channels/router.ts +63 -20
package/src/cli/inspect.ts +20 -9
package/src/init/index.ts +19 -9
package/src/init/paths.ts +1 -0
package/src/inspect/label.ts +2 -0
package/src/inspect/live.ts +6 -1
package/src/inspect/render.ts +8 -2
package/src/inspect/replay.ts +6 -1
package/src/inspect/types.ts +4 -1
package/src/permissions/builtins.ts +12 -0
package/src/permissions/permissions.ts +7 -0
package/src/plugin/types.ts +12 -0
package/src/sandbox/build.ts +19 -1
package/src/sandbox/hidden-paths.ts +41 -0
package/src/sandbox/index.ts +2 -1
package/src/sandbox/policy.ts +15 -0
package/src/skills/typeclaw-troubleshooting/SKILL.md +104 -0
package/src/usage/report.ts +4 -0
package/src/usage/scan.ts +1 -1

package/src/inspect/replay.ts CHANGED Viewed

@@ -137,7 +137,12 @@ function* assistantEvents(
     }
   }
   if (typeof message.errorMessage === 'string' && message.errorMessage !== '') {
-    yield { cat: 'error', ts, message: message.errorMessage }
+    yield {
+      cat: 'error',
+      ts,
+      message: message.errorMessage,
+      ...(typeof message.stopReason === 'string' ? { stopReason: message.stopReason } : {}),
+    }
   }
   const usage = readUsage(message.usage)
   if (usage !== null && (usage.totalTokens > 0 || typeof message.stopReason === 'string')) {

package/src/inspect/types.ts CHANGED Viewed

@@ -37,7 +37,10 @@ export type InspectEvent =
       isError?: boolean
       durationMs?: number
     }
-  | { cat: 'error'; ts: number; message: string }
+  // `stopReason` is the upstream-reported reason for the failed/aborted turn
+  // (e.g. 'error', 'aborted'). An 'aborted' stopReason is a user cancel, not a
+  // provider failure, and is rendered distinctly (see render.ts).
+  | { cat: 'error'; ts: number; message: string; stopReason?: string }
   | {
       cat: 'done'
       ts: number

package/src/permissions/builtins.ts CHANGED Viewed

@@ -16,6 +16,13 @@ export const CORE_PERMISSIONS = {
   subagentCancel: 'subagent.cancel',
   subagentOutput: 'subagent.output',
   subagentSpawnOperator: 'subagent.spawn.operator',
+  // Phrased as capabilities to SEE, not to hide, so the role tower stays
+  // monotonic (a higher tier sees a strict superset of a lower tier) and the
+  // empty-permission guest is the fail-safe floor. resolveHiddenPaths masks
+  // whatever the resolved role lacks: fsSeePrivate gates workspace/+memory/+
+  // sessions/, fsSeeSecrets gates .env+secrets.json.
+  fsSeePrivate: 'fs.see.private',
+  fsSeeSecrets: 'fs.see.secrets',
 } as const
 // Sentinel that `expandOwnerWildcard` swaps for the concrete union of
@@ -61,6 +68,8 @@ export const BUILTIN_ROLES: Readonly<Record<BuiltinRoleName, BuiltinRoleSpec>> =
       CORE_PERMISSIONS.subagentCancel,
       CORE_PERMISSIONS.subagentOutput,
       CORE_PERMISSIONS.subagentSpawnOperator,
+      CORE_PERMISSIONS.fsSeePrivate,
+      CORE_PERMISSIONS.fsSeeSecrets,
       'security.bypass.low',
       'security.bypass.medium',
       'security.bypass.high',
@@ -76,6 +85,8 @@ export const BUILTIN_ROLES: Readonly<Record<BuiltinRoleName, BuiltinRoleSpec>> =
       CORE_PERMISSIONS.subagentCancel,
       CORE_PERMISSIONS.subagentOutput,
       CORE_PERMISSIONS.subagentSpawnOperator,
+      CORE_PERMISSIONS.fsSeePrivate,
+      CORE_PERMISSIONS.fsSeeSecrets,
       'security.bypass.low',
       'security.bypass.medium',
     ],
@@ -87,6 +98,7 @@ export const BUILTIN_ROLES: Readonly<Record<BuiltinRoleName, BuiltinRoleSpec>> =
       CORE_PERMISSIONS.subagentSpawn,
       CORE_PERMISSIONS.subagentCancel,
       CORE_PERMISSIONS.subagentOutput,
+      CORE_PERMISSIONS.fsSeePrivate,
       'security.bypass.low',
     ],
   },

package/src/permissions/permissions.ts CHANGED Viewed

@@ -110,6 +110,13 @@ export function createPermissionService(opts: CreatePermissionServiceOptions = {
   function resolveRole(origin: SessionOrigin | undefined): string {
     if (origin === undefined) return 'guest'
+    // Runtime-owned infrastructure (memory, backup) acts on the operator's
+    // behalf over operator-owned state. It is constructed only by runtime/
+    // bundled code — inbound channel/cron content cannot produce this kind —
+    // so resolving it to owner is not a laundering vector. See the `system`
+    // origin doc in session-origin.ts.
+    if (origin.kind === 'system') return 'owner'
     if (origin.kind === 'cron') {
       const role = origin.scheduledByRole
       if (role !== undefined && byName.has(role)) return role

package/src/plugin/types.ts CHANGED Viewed

@@ -250,6 +250,18 @@ export type SpawnSubagentOptions = {
   // `spawnedByOrigin: event.origin`. The runtime resolves `spawnedByRole`
   // from the origin via the PermissionService, so the spawning session's
   // role is inherited rather than forged from outside.
+  //
+  // TRUST MODEL: `spawnedByOrigin` accepts any SessionOrigin, including
+  // `{ kind: 'system' }`, which resolves to owner. That is intentional and
+  // not an escalation path: a plugin is a full-trust, in-process module with
+  // no sandbox (see AGENTS.md / docs/internals/skills) — it can already do
+  // anything the runtime can, so minting a system origin grants it nothing it
+  // lacks. The anti-forgery guarantee this API preserves is narrower and
+  // unaffected: inbound channel/cron CONTENT can never reach owner, because
+  // those origins are constructed by the runtime from the transport, never
+  // from message text, and a content-driven turn cannot produce a `system`
+  // origin. Bundled infra (memory, backup) uses `system` to act on the
+  // operator's own state; third-party plugins should pass `event.origin`.
   parentSessionId?: string
   spawnedByOrigin?: SessionOrigin
 }

package/src/sandbox/build.ts CHANGED Viewed

@@ -13,6 +13,11 @@ export type SandboxedCommand = {
   commandString: string
 }
+// Fixed fd the rendered commandString opens to /dev/null for --ro-bind-data
+// file masks. 3 is the first fd above stdio; the bash tool's spawn does not
+// inherit it, so the redirect is part of the command string itself.
+const MASK_DATA_FD = 3
 // Pure: no I/O, no bwrap availability probe (that is `ensureBwrapAvailable`'s
 // job). Given a bash command and a policy, returns the bwrap-wrapped argv plus
 // a shell-quoted rendering of it. Knows nothing about subagents, origins, or
@@ -24,7 +29,9 @@ export function buildSandboxedCommand(command: string, policy: SandboxPolicy = {
     applyCommandFilter(command, policy.commandFilter)
   }
   const argv = buildArgv(command, policy)
-  return { argv, commandString: formatCommand(argv) }
+  const needsMaskFd = (policy.masks?.files?.length ?? 0) > 0
+  const commandString = needsMaskFd ? `${formatCommand(argv)} ${MASK_DATA_FD}</dev/null` : formatCommand(argv)
+  return { argv, commandString }
 }
 function buildArgv(command: string, policy: SandboxPolicy): string[] {
@@ -70,6 +77,8 @@ function buildArgv(command: string, policy: SandboxPolicy): string[] {
     appendMount(argv, mount)
   }
+  appendMasks(argv, policy)
   if (policy.cwd !== undefined) {
     argv.push('--chdir', policy.cwd)
   }
@@ -78,6 +87,15 @@ function buildArgv(command: string, policy: SandboxPolicy): string[] {
   return argv
 }
+function appendMasks(argv: string[], policy: SandboxPolicy): void {
+  for (const dir of policy.masks?.dirs ?? []) {
+    argv.push('--tmpfs', dir)
+  }
+  for (const file of policy.masks?.files ?? []) {
+    argv.push('--ro-bind-data', String(MASK_DATA_FD), file)
+  }
+}
 function appendMount(argv: string[], mount: SandboxMount): void {
   switch (mount.type) {
     case 'ro-bind':

package/src/sandbox/hidden-paths.ts ADDED Viewed

@@ -0,0 +1,41 @@
+import { join } from 'node:path'
+import type { SessionOrigin } from '@/agent/session-origin'
+import { CORE_PERMISSIONS } from '@/permissions/builtins'
+import type { PermissionService } from '@/permissions/permissions'
+export type HiddenPaths = {
+  dirs: string[]
+  files: string[]
+}
+const PRIVATE_DIRS = ['workspace', 'memory', 'sessions'] as const
+const SECRET_FILES = ['.env', 'secrets.json'] as const
+// The agent's private working surface and credential files are masked from
+// sandboxed bash unless the resolved role carries the matching fs.see.* grant.
+// `permissions.has` resolves the role from the live origin and fails safe to
+// guest (empty permissions) for an unclear/undefined origin, so a missing
+// grant — whether from a low tier or an unresolvable author — hides the path.
+//
+// The security.bypass.* fallback keeps custom roles (which may never name the
+// fs.see.* strings) working by capability: a role trusted enough to bypass
+// medium-severity guards is treated as trusted for filesystem visibility, and
+// bypass.low maps to the private-surface tier. fs.see.* always wins when
+// present; the fallback only fires when it is absent.
+export function resolveHiddenPaths(
+  permissions: PermissionService,
+  origin: SessionOrigin | undefined,
+  agentDir: string,
+): HiddenPaths {
+  const seesPrivate =
+    permissions.has(origin, CORE_PERMISSIONS.fsSeePrivate) ||
+    permissions.has(origin, 'security.bypass.low') ||
+    permissions.has(origin, 'security.bypass.medium')
+  const seesSecrets =
+    permissions.has(origin, CORE_PERMISSIONS.fsSeeSecrets) || permissions.has(origin, 'security.bypass.medium')
+  const dirs = seesPrivate ? [] : PRIVATE_DIRS.map((d) => join(agentDir, d))
+  const files = seesSecrets ? [] : SECRET_FILES.map((f) => join(agentDir, f))
+  return { dirs, files }
+}

package/src/sandbox/index.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 export { buildSandboxedCommand, type SandboxedCommand } from './build'
-export { ensureBwrapAvailable } from './availability'
+export { ensureBwrapAvailable, _resetBwrapAvailabilityCacheForTests } from './availability'
+export { resolveHiddenPaths, type HiddenPaths } from './hidden-paths'
 export { formatCommand, shellQuote } from './quote'
 export { SandboxPolicyError, SandboxUnavailableError } from './errors'
 export {

package/src/sandbox/policy.ts CHANGED Viewed

@@ -23,10 +23,25 @@ export type SandboxProcessPolicy = {
   dieWithParent?: boolean
 }
+// Role-derived deny-list overlaid on top of an already-visible tree. dirs are
+// hidden with an empty tmpfs; files are hidden with --ro-bind-data, the only
+// bwrap primitive that masks a single FILE (--tmpfs is dir-only). --ro-bind-data
+// reads its empty content from a file descriptor, and the bash tool spawns with
+// stdio ["ignore","pipe","pipe"] — no inherited extra fds — so the rendered
+// commandString self-opens fd MASK_DATA_FD via a `<fd>< /dev/null` redirection
+// appended after `bash -c <command>`. Masks MUST render after the broad parent
+// mounts: bwrap applies mount ops in command-line order and the last op on a
+// path wins, so a mask emitted before its parent bind would be re-exposed.
+export type SandboxMaskPolicy = {
+  dirs?: string[]
+  files?: string[]
+}
 export type SandboxPolicy = {
   bwrapPath?: string
   cwd?: string
   mounts?: SandboxMount[]
+  masks?: SandboxMaskPolicy
   network?: SandboxNetwork
   env?: SandboxEnvPolicy
   commandFilter?: SandboxCommandFilter

package/src/skills/typeclaw-troubleshooting/SKILL.md ADDED Viewed

@@ -0,0 +1,104 @@
+---
+name: typeclaw-troubleshooting
+description: Use this skill when you are stuck in a fix-it loop — you've made roughly three attempts at the same failure and you're still cycling shell commands (kill the process, re-run, `sleep`, `capture-pane`, inspect, retry) without converging. Triggers include a hung or runaway process that won't die, a `C-c` that didn't stop the program, `<defunct>`/zombie processes piling up in `ps`, an interactive program that blocks `bash` waiting for input, a script that "ran" but produced no output and no file, repeated "not found"/timeout/same-error-again loops, and any moment you catch yourself thinking "let me wait a bit more and check again" for the third time. Read it before you spawn `operator` to take over the debugging — it covers the operator hand-off prompt, the tmux session pattern, killing stuck/zombie processes properly, and the edge-triggered capture-pane polling loop that the inline retry-and-sleep approach gets wrong.
+---
+# typeclaw-troubleshooting
+When a problem fights back, the failure mode is not "I can't fix it" — it's "I'm burning my own context and freezing the conversation while I fix it." A debugging loop is inherently noisy: every retry dumps stale shell output, zombie-process listings, and pane captures into your context, and each blocking `bash` call (especially `sleep N` followed by a capture) leaves the user staring at a frozen-looking conversation. The fix is to move the loop out of your session and into `operator`, which has bash-with-side-effects and runs in its own context window.
+This skill is the runbook for that hand-off. Read it once you've hit the trigger (~3 attempts on the same failure without convergence), **before** you spawn `operator`.
+## The trigger, concretely
+You are in a troubleshooting loop when all of these are true:
+- You've attempted the **same underlying fix** ~3 times and it still fails.
+- Your recent turns are dominated by `bash` calls whose only purpose is to probe/retry: `kill`, `sleep`, `tmux capture-pane`, `ps aux | grep`, re-running the same script, "let me wait and check again".
+- Each attempt produces more disposable output than signal.
+If you're still making real progress (each attempt narrows the problem), keep going — this is for the _non-converging_ case. One or two quick probes stay inline; a third lap means delegate.
+## Why inline retry-and-sleep is the wrong tool
+Two failure patterns show up over and over when an agent debugs inline, and both are why this belongs in operator:
+1. **`sleep N; capture-pane` blocks you for N seconds at a time.** You can't reply, the typing indicator can't heartbeat, and you still don't know if the work finished — you just guessed at a duration. Operator absorbs all of that latency in its own session.
+2. **A `C-c` sent to a tmux pane does not always kill the program.** If the foreground process is in a tight loop (e.g. a `while True:` with `pyboy.tick()`), the interrupt may be queued behind work and never processed, so the _next_ command you type lands in the shell while the old process is still running — and you end up reading output from the wrong process. The reliable kill is by PID, not by keystroke (see below).
+## The hand-off: spawn operator in background
+Spawn `operator` with `run_in_background: true` so your session stays free, then keep talking to the user. Give operator everything it needs — it does **not** see this conversation, and it does **not** see this skill. Operator runs on a fixed tool set (`read`, `grep`, `find`, `ls`, `bash`, `write`, `edit`) with no skill loading, so any mechanic below that you want it to follow has to be spelled out in the `[REQUEST]` block — don't assume it knows the tmux/PID/polling patterns:
+```
+[CONTEXT]: <what you were doing, the file/process/command involved, the environment>
+[SYMPTOM]: <the exact failure — error text, "process won't die", "script ran but wrote no file", paste the relevant output>
+[ALREADY TRIED]: <each attempt and what happened, so operator doesn't repeat your dead ends>
+[SUCCESS CONDITION]: <something operator can verify with bash alone — "screenshot_now.png exists and is larger than 1KB", "the dev server answers 200 on :3000", "pgrep -f repro.py returns nothing">
+[CONSTRAINTS]: <don't touch X, the relevant tmux session is named Y, the workdir is Z>
+[REQUEST]: Drive the diagnose → fix → verify loop. Use a tmux session for any hung or interactive process so it can't block you (start detached, kill stuck processes by PID not C-c, poll on the success condition not a fixed sleep). Return root cause, what you changed, and whether the success condition is met.
+```
+State the success condition as something operator can check with `bash` — file exists and is non-trivially sized, a port answers, a process is gone. Operator has **no vision tools** (`look_at` is yours, not its), so "the screenshot looks right" is **not** a condition operator can verify. If the fix ultimately needs a visual check, have operator confirm the file is written and reasonably sized, then **you** call `look_at` on it after operator reports back — that final eyeball stays in your session.
+Then stay responsive. When the completion `<system-reminder>` lands, weave operator's report into your next reply (in a channel session, surface it via `channel_reply`/`channel_send` — plain text is invisible there).
+If the `subagent.spawn.operator` gate denies (you're not owner/trusted tier), you can't delegate — fall back to doing the loop yourself, but apply the mechanics below to do it cleanly.
+## Mechanics operator should use (and you, if you can't delegate)
+### Run hung/interactive processes in a dedicated tmux session
+```sh
+tmux new-session -d -s fix-<short-id> -c /agent/workspace
+tmux send-keys -t fix-<short-id> "python3 repro.py" Enter
+```
+A detached session means a process that hangs or waits for input never blocks the driver. Name it for the task (`fix-<id>`) so it's easy to find and tear down.
+### Observe without blocking — edge-triggered, not `sleep`-and-guess
+Don't `sleep N; capture-pane` and hope. Capture the pane, react to what's actually there, and key the next step off a real signal (a file appearing, a process exiting, a prompt string showing up):
+```sh
+tmux capture-pane -t fix-<id> -p -S -          # -p print, -S - full scrollback
+ls -la /agent/workspace/expected-output.png    # the real done-signal
+pgrep -af repro.py                             # is it still running?
+```
+Loop on the **success condition** (output file exists, port answers, process gone), not on a fixed sleep. If you must wait, poll in short intervals and re-check the signal each time rather than sleeping for one long guess.
+### Kill stuck processes by PID, not by keystroke
+`tmux send-keys ... C-c` is unreliable against a tight loop. Find the real PID and signal it:
+```sh
+pgrep -af repro.py                 # find the actual PID(s)
+kill <pid>                         # SIGTERM first
+sleep 1; pgrep -af repro.py        # still there?
+kill -9 <pid>                      # SIGKILL if it ignored SIGTERM
+```
+### Reap zombies and confirm the field is clear
+`<defunct>` entries in `ps` are zombies — already dead, waiting for their parent to reap them. They are not your hung process; chasing them wastes turns. Filter them out and confirm the _live_ process is gone before re-running:
+```sh
+ps aux | grep -i repro | grep -v grep | grep -v defunct
+```
+If the only matches are `<defunct>`, the process is already dead — re-running is safe. If a live PID remains, the previous `C-c` didn't work; kill it by PID (above) before the next attempt.
+### Tear down when done
+```sh
+tmux kill-session -t fix-<id> 2>/dev/null || true
+```
+Don't leave orphaned sessions running between attempts — a stale session is how you end up sending input to the wrong process.
+## What operator returns, and what you do with it
+Operator's final report should give you: **root cause**, **what it changed**, and **whether the success condition is met**. Surface that to the user in your own words — don't paste the raw debugging transcript; the whole point was to keep that noise out of the conversation. If operator couldn't resolve it, relay the outcome plus the partial progress (what's now known, what's still failing) so the user can decide the next move.
+Bound the loop so it can't spin as badly as the inline version would have. Tell operator in the `[REQUEST]` that if a handful of diagnose-fix-verify rounds (≈5) haven't met the success condition, it should stop and report what it found rather than keep retrying — a non-converging operator loop wastes the same tokens you delegated to avoid, it just wastes them out of sight. When that bounded-failure report comes back, bring the user in: relay the partial progress and ask how to proceed instead of immediately re-spawning operator on the same dead end.

package/src/usage/report.ts CHANGED Viewed

@@ -140,6 +140,8 @@ function renderOriginLabel(kind: OriginKind, ctx: RenderCtx): string {
       return `${color('green', '#', ctx)} ${'channel'}`
     case 'subagent':
       return `${color('yellow', '↳', ctx)} ${'subagent'}`
+    case 'system':
+      return `${color('blue', '⚙', ctx)} ${'system'}`
     case 'unknown':
       return `${dim('?', ctx)} ${dim('unknown', ctx)}`
   }
@@ -211,6 +213,8 @@ function originGlyphOnly(kind: OriginKind, ctx: RenderCtx): string {
       return color('green', '#', ctx)
     case 'subagent':
       return color('yellow', '↳', ctx)
+    case 'system':
+      return color('blue', '⚙', ctx)
     case 'unknown':
       return dim('?', ctx)
   }

package/src/usage/scan.ts CHANGED Viewed

@@ -6,7 +6,7 @@ import { join } from 'node:path'
 // before origin stamping landed AND sessions whose session-meta line is
 // malformed or missing — surfacing them under one explicit label is more
 // honest than silently dropping them.
-export const ORIGIN_KINDS = ['tui', 'cron', 'channel', 'subagent', 'unknown'] as const
+export const ORIGIN_KINDS = ['tui', 'cron', 'channel', 'subagent', 'system', 'unknown'] as const
 export type OriginKind = (typeof ORIGIN_KINDS)[number]
 // Narrow projection: session files can grow into tens of MB on long-lived