npm - typeclaw - Versions diffs - 0.5.1 → 0.6.0 - Mend

typeclaw 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/README.md +4 -0
package/package.json +1 -1
package/src/agent/index.ts +80 -8
package/src/agent/live-subagents.ts +215 -0
package/src/agent/plugin-tools.ts +60 -20
package/src/agent/session-origin.ts +15 -0
package/src/agent/subagents.ts +140 -3
package/src/agent/system-prompt.ts +40 -0
package/src/agent/tools/channel-reply.ts +24 -1
package/src/agent/tools/channel-send.ts +26 -1
package/src/agent/tools/spawn-subagent.ts +283 -0
package/src/agent/tools/subagent-cancel.ts +96 -0
package/src/agent/tools/subagent-output.ts +192 -0
package/src/bundled-plugins/agent-browser/skills/agent-browser/SKILL.md +26 -0
package/src/bundled-plugins/explorer/explorer.ts +103 -0
package/src/bundled-plugins/explorer/index.ts +11 -0
package/src/bundled-plugins/guard/index.ts +12 -1
package/src/bundled-plugins/guard/policies/managed-config.ts +139 -0
package/src/bundled-plugins/guard/policy.ts +1 -0
package/src/bundled-plugins/operator/index.ts +11 -0
package/src/bundled-plugins/operator/operator.ts +76 -0
package/src/bundled-plugins/scout/index.ts +11 -0
package/src/bundled-plugins/scout/scout.ts +94 -0
package/src/channels/router.ts +32 -0
package/src/config/config.ts +45 -12
package/src/config/index.ts +3 -0
package/src/cron/index.ts +3 -0
package/src/cron/schema.ts +20 -0
package/src/init/dockerfile.ts +44 -5
package/src/permissions/builtins.ts +23 -2
package/src/plugin/define.ts +2 -0
package/src/plugin/index.ts +2 -0
package/src/plugin/types.ts +15 -22
package/src/run/bundled-plugins.ts +6 -0
package/src/run/channel-session-factory.ts +19 -0
package/src/run/index.ts +56 -6
package/src/server/index.ts +103 -0
package/src/skills/typeclaw-claude-code/SKILL.md +273 -0
package/src/skills/typeclaw-claude-code/references/auth-flow.md +135 -0
package/src/skills/typeclaw-claude-code/references/stop-hook.md +99 -0
package/src/skills/typeclaw-claude-code/references/tmux-driving.md +157 -0
package/src/skills/typeclaw-config/SKILL.md +29 -26
package/typeclaw.schema.json +6 -0

package/src/run/index.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import { SessionManager } from '@mariozechner/pi-coding-agent'
 import { createSession, createSessionWithDispose } from '@/agent'
+import { LiveSubagentRegistry } from '@/agent/live-subagents'
 import type { SessionOrigin } from '@/agent/session-origin'
 import {
   createSubagentConsumer,
@@ -9,6 +10,7 @@ import {
   type Subagent as InternalSubagent,
   type SubagentConsumer,
   type SubagentRegistry,
+  type SubagentShared,
 } from '@/agent/subagents'
 import { resolveCapOptionsFromConfig } from '@/bundled-plugins/tool-result-cap'
 import { createChannelManager, createChannelsReloadable, type ChannelManager } from '@/channels'
@@ -176,6 +178,8 @@ export async function startAgent({
     },
   })
+  const liveSubagentRegistry = new LiveSubagentRegistry()
   const channelManager = createChannelManagerFor({
     agentDir: cwd,
     channelsConfigRef: () => getConfig().channels,
@@ -191,6 +195,9 @@ export async function startAgent({
       getChannelRouter: () => channelManager.router,
       rehydrateCapOptions: resolveCapOptionsFromConfig(pluginConfigsByName['tool-result-cap']),
       permissions: pluginsLoaded.permissions,
+      liveSubagentRegistry,
+      subagentRegistry: pluginRuntime.get().subagents,
+      getCreateSessionForSubagent: () => createSessionForSubagent,
       ...containerNameOpt,
       ...runtimeVersionOpt,
     }),
@@ -347,6 +354,9 @@ export async function startAgent({
               },
             }
           : {}),
+        liveSubagentRegistry,
+        subagentRegistry: pluginRuntime.get().subagents,
+        createSessionForSubagent,
         ...containerNameOpt,
         ...runtimeVersionOpt,
       })
@@ -465,6 +475,8 @@ export async function startAgent({
     claimController,
     commandRunnerFactory,
     tunnelManager,
+    liveSubagentRegistry,
+    createSessionForSubagent,
     ...containerNameOpt,
     ...runtimeVersionOpt,
     ...tuiTokenOpt,
@@ -593,7 +605,15 @@ function makeDefaultSchedulerFactory(internalJobs: () => CronJob[]): SchedulerFa
   return ({ file, onFire }) => createScheduler({ jobs: [...file.jobs, ...internalJobs()], onFire })
 }
-function mergeSubagents(pluginRegistry: PluginRegistry): {
+// Exported for the regression test in `merge-subagents.test.ts`. The shim
+// layer between the plugin-author-facing `Subagent` (`@/plugin/types`) and
+// the runtime-internal `Subagent` (`@/agent/subagents`) is the load-bearing
+// translation point for visibility, payload-schema, and permission gating —
+// fields that flow through the `SubagentRegistry` without going through the
+// `pluginSubagentByShim` recovery path. Previous regressions silently
+// dropped fields here, hiding every public bundled subagent (scout,
+// explorer, operator) from the `spawn_subagent` tool surface.
+export function mergeSubagents(pluginRegistry: PluginRegistry): {
   registry: SubagentRegistry
   pluginSubagentByShim: WeakMap<InternalSubagent<any>, PluginSubagentEntry>
   pluginSubagentByName: Map<string, PluginSubagentEntry>
@@ -620,10 +640,40 @@ function mergeSubagents(pluginRegistry: PluginRegistry): {
   return { registry: merged, pluginSubagentByShim, pluginSubagentByName }
 }
+// Compile-time proof that every plugin-only key on `@/plugin`'s `Subagent`
+// (i.e. every key NOT inherited from `SubagentShared`) has been classified
+// for the shim. When a future maintainer introduces a new field on plugin-side
+// `Subagent` that isn't on `SubagentShared`, the `satisfies` clause on
+// `PLUGIN_ONLY_KEYS_DROPPED_BY_SHIM` below fails at compile time until the
+// new key is listed there — and the destructuring in `pluginSubagentShim`
+// is updated to discard it. Without this guard, the shim's rest-spread
+// would silently leak future plugin-only fields into the internal registry —
+// the opposite-direction drift from the bug this PR fixes for shared fields.
+type PluginOnlySubagentKeys = Exclude<keyof import('@/plugin').Subagent<any>, keyof SubagentShared<any>>
+const PLUGIN_ONLY_KEYS_DROPPED_BY_SHIM = {
+  tools: true,
+  customTools: true,
+  inFlightKey: true,
+} satisfies Record<PluginOnlySubagentKeys, true>
+// Reference the table so it's not dead code. The value is a runtime no-op;
+// the load-bearing work is the `satisfies` clause above which forces
+// exhaustive classification of plugin-only keys at compile time.
+void PLUGIN_ONLY_KEYS_DROPPED_BY_SHIM
 function pluginSubagentShim(subagent: import('@/plugin').Subagent<any>): InternalSubagent<any> {
-  return {
-    systemPrompt: subagent.systemPrompt,
-    ...(subagent.payloadSchema ? { payloadSchema: subagent.payloadSchema } : {}),
-    ...(subagent.handler ? { handler: subagent.handler as InternalSubagent<any>['handler'] } : {}),
-  }
+  // The two diverging fields (`tools` is `BuiltinToolRef[]` plugin-side vs
+  // `AgentSessionTools` internal-side; `customTools` similarly differs) are
+  // resolved later in `createSessionForSubagent` via the
+  // `pluginSubagentByShim` lookup, which recovers the original plugin
+  // reference. `inFlightKey` is consumed only by the SubagentConsumer via
+  // `pluginSubagentByName`, not through this shim's registry path. Every
+  // other plugin-side field lives on `SubagentShared` and is structurally
+  // assignable to the internal `Subagent`, so a rest-spread carries them
+  // verbatim — including `visibility` and `requiresSpecificPermission`,
+  // whose silent drop in the previous shim made every plugin-contributed
+  // public subagent (scout, explorer, operator) invisible to the
+  // `spawn_subagent` tool. The list of keys removed here is enforced
+  // exhaustive at compile time by `PLUGIN_ONLY_KEYS_DROPPED_BY_SHIM` above.
+  const { tools: _tools, customTools: _customTools, inFlightKey: _inFlightKey, ...shared } = subagent
+  return shared
 }

package/src/server/index.ts CHANGED Viewed

@@ -7,8 +7,10 @@ import {
   type CreateSessionResult,
 } from '@/agent'
 import { runPluginDoctorChecks, runPluginDoctorFix } from '@/agent/doctor'
+import type { LiveSubagentRegistry } from '@/agent/live-subagents'
 import { detectProviderError } from '@/agent/provider-error'
 import type { SessionOrigin } from '@/agent/session-origin'
+import type { CreateSessionForSubagent } from '@/agent/subagents'
 import type { ChannelRouter } from '@/channels/router'
 import { aggregateCronList, type CronListEntry, loadCron } from '@/cron'
 import type { HookBus } from '@/plugin'
@@ -79,6 +81,27 @@ export type ServerOptions = {
   // without it the four `exec_command`-family messages are answered with
   // `command_error` so the host CLI sees a clean failure.
   commandRunnerFactory?: (outbound: CommandOutbound) => CommandRunner
+  // Subagent orchestration plumbing for TUI sessions. Both fields must be
+  // present together for the spawn_subagent / subagent_output / subagent_cancel
+  // tools to surface; `createSession` gates registration on all three of
+  // (liveSubagentRegistry, subagentRegistry, createSessionForSubagent), and
+  // we derive subagentRegistry per WS connection from the same `pluginRuntime`
+  // snapshot that already feeds `plugins.registry` — so a reload landing
+  // mid-connection keeps using the snapshot the session opened with, matching
+  // the existing per-session lifecycle invariant.
+  //
+  // `createSessionForSubagent` is passed eagerly (not late-bound) because the
+  // TUI server is constructed AFTER the channel manager in `startAgent`,
+  // breaking the construction cycle that forces the channel session factory's
+  // `getCreateSessionForSubagent` late-binding.
+  //
+  // Channel and cron sessions get the same plumbing through
+  // `buildChannelSessionFactory` / `createSessionForCron` (see src/run/). The
+  // three top-level callers must stay aligned; otherwise the agent's tool
+  // surface diverges across origin kinds — exactly the gap PR #281 flagged
+  // as out-of-scope follow-up.
+  liveSubagentRegistry?: LiveSubagentRegistry
+  createSessionForSubagent?: CreateSessionForSubagent
 }
 const consoleLogger: ServerLogger = {
@@ -176,6 +199,8 @@ export function createServer({
   logger = consoleLogger,
   claimController,
   commandRunnerFactory,
+  liveSubagentRegistry,
+  createSessionForSubagent,
 }: ServerOptions) {
   const sessionStates = new WeakMap<Ws, SessionState>()
   const callIdToWs = new Map<string, AnyOwnerWs>()
@@ -351,6 +376,15 @@ export function createServer({
                   }
                 : undefined
             const origin: SessionOrigin = { kind: 'tui', sessionId: sessionFileId }
+            // Derive subagentRegistry from the same runtimeSnapshot that
+            // populates `plugins.registry`. createSession gates the orchestration
+            // tools on (liveRegistry, subagentRegistry, createSessionForSubagent,
+            // agentDir) being all-present; threading the registry alongside the
+            // two server-owned fields gives the gate a complete tuple for TUI
+            // sessions whenever the host plumbed in plugin runtime + subagent
+            // wiring (production), while keeping every existing test that omits
+            // either side at exactly its current tool surface.
+            const subagentRegistry = runtimeSnapshot?.subagents
             const result = await createSession({
               reloadRegistry,
               sessionManager,
@@ -360,6 +394,9 @@ export function createServer({
               ...(pluginsWiring ? { plugins: pluginsWiring } : {}),
               ...(containerName !== undefined ? { containerName } : {}),
               ...(runtimeVersion !== undefined ? { runtimeVersion } : {}),
+              ...(liveSubagentRegistry !== undefined ? { liveSubagentRegistry } : {}),
+              ...(subagentRegistry !== undefined ? { subagentRegistry } : {}),
+              ...(createSessionForSubagent !== undefined ? { createSessionForSubagent } : {}),
             })
             const session = 'session' in result ? result.session : result
             const dispose = 'session' in result && result.dispose ? result.dispose : async () => {}
@@ -392,6 +429,7 @@ export function createServer({
               )
               state.unsubBroadcast = stream.subscribe({ target: { kind: 'broadcast' } }, (msg) => {
+                routeSubagentCompletionReminder(state, msg, stream)
                 const payload: ServerMessage = {
                   type: 'notification',
                   payload: msg.payload,
@@ -712,6 +750,71 @@ function forwardAssistantError(ws: Ws, message: unknown, logger: ServerLogger, s
   send(ws, { type: 'error', message: detected.message })
 }
+function routeSubagentCompletionReminder(state: SessionState, msg: StreamMessage, stream: Stream): void {
+  const payload = msg.payload
+  if (payload === null || typeof payload !== 'object') return
+  const p = payload as {
+    kind?: unknown
+    taskId?: unknown
+    subagent?: unknown
+    parentSessionId?: unknown
+    ok?: unknown
+    durationMs?: unknown
+    error?: unknown
+  }
+  if (p.kind !== 'subagent.completed') return
+  if (typeof p.parentSessionId !== 'string' || p.parentSessionId !== state.sessionFileId) return
+  const subagent = typeof p.subagent === 'string' ? p.subagent : 'subagent'
+  const taskId = typeof p.taskId === 'string' ? p.taskId : '<unknown>'
+  const ok = p.ok === true
+  const durationMs = typeof p.durationMs === 'number' ? p.durationMs : 0
+  const error = typeof p.error === 'string' ? p.error : undefined
+  const idle = state.drainQueue.length === 0 && !state.draining
+  const delivery = idle ? 'interrupt' : 'queue'
+  const text = renderCompletionReminder({ subagent, taskId, ok, durationMs, error })
+  stream.publish({
+    target: { kind: 'session', sessionId: state.sessionFileId },
+    payload: { kind: 'prompt', text, delivery },
+    meta: { source: 'subagent-completion' },
+  })
+}
+function renderCompletionReminder(args: {
+  subagent: string
+  taskId: string
+  ok: boolean
+  durationMs: number
+  error?: string
+}): string {
+  const durationStr = formatReminderDuration(args.durationMs)
+  if (args.ok) {
+    return (
+      `<system-reminder>\n` +
+      `Subagent \`${args.subagent}\` (${args.taskId}) completed in ${durationStr}. ` +
+      `Use subagent_output to fetch the result.\n` +
+      `</system-reminder>`
+    )
+  }
+  const err = args.error ?? 'unknown error'
+  return (
+    `<system-reminder>\n` +
+    `Subagent \`${args.subagent}\` (${args.taskId}) FAILED after ${durationStr}: ${err}. ` +
+    `Use subagent_output to inspect.\n` +
+    `</system-reminder>`
+  )
+}
+function formatReminderDuration(ms: number): string {
+  if (ms < 1000) return `${ms}ms`
+  const totalSec = Math.floor(ms / 1000)
+  if (totalSec < 60) return `${totalSec}s`
+  const min = Math.floor(totalSec / 60)
+  const sec = totalSec % 60
+  return `${min}m${sec}s`
+}
 function enqueuePrompt(
   ws: Ws,
   state: SessionState,

package/src/skills/typeclaw-claude-code/SKILL.md ADDED Viewed

@@ -0,0 +1,273 @@
+---
+name: typeclaw-claude-code
+description: Use this skill whenever you decide to delegate substantial coding or code-analysis work to Claude Code (Anthropic's official coding-agent CLI). Triggers include "use Claude Code", "ask Claude Code", "delegate to claude", "claude cli", "have claude do it", any task where you want a more capable agent than yourself, and any time you're about to run `claude` from a shell. Read it before you spawn the CLI — Claude Code is a TTY-only TUI in interactive mode (you must drive it through tmux, not pipes), it operates inside a dedicated `git worktree` checkout under `/tmp/` so its commits never pollute the agent folder, and you detect "turn done" through a `Stop` hook that writes a sentinel file. Skipping this skill means you'll either fall back to `claude -p` (which strips plan mode and sub-agents), let claude mutate the live agent checkout (which loses you the rollback safety), or try to parse the TUI buffer with capture-pane heuristics (fragile, version-locked).
+---
+# typeclaw-claude-code
+You can delegate work to Claude Code, Anthropic's official coding agent. The agent runs as an interactive TUI: it plans, uses sub-agents, edits files, runs tools — the full loop. You drive it through tmux because your own process has no TTY, you isolate it in a dedicated `git worktree` so its experiments never touch the live agent checkout, and you detect "turn done" through a `Stop` hook that writes a sentinel file (not by parsing the TUI buffer).
+This skill is for the case where Claude Code is the right tool: hard architecture work, multi-file refactors, deep code analysis, a second-opinion read on something you wrote. It is **not** for trivial edits — the round-trip cost (worktree setup + process spawn + auth check + TUI init + at least one full Claude turn) is 15–45 seconds and several thousand tokens of someone else's context window. Do trivial edits yourself.
+## When to delegate to Claude Code
+Use Claude Code for:
+- **Multi-file refactors** that need a holistic plan before any edit lands.
+- **Code analysis** the user wants done thoroughly — "review this module", "find the bug in this 800-line file", "explain why X is slow".
+- **Implementations you're unsure about** where a more capable model would catch issues you'd miss.
+- **A second pair of eyes** on a design you've already drafted, especially when the user asks for one.
+Do **not** use Claude Code for:
+- One-line edits, typo fixes, single-function tweaks.
+- Anything where the user is watching your tool calls and wants to see each step — Claude's intermediate output is captured but not streamed back to the user.
+- Tasks that depend on context you haven't extracted yet. Claude won't have repo-wide context either; you have to brief it explicitly.
+## First-time auth (interactive)
+If `claude` is installed but no credential is set up, you have to broker the auth flow yourself. The user is talking to you through the TUI (or a channel); you walk them through one of two paths.
+**Decision rule, top to bottom:**
+1. **Already authenticated?** Run `env | grep -E '^(ANTHROPIC_API_KEY|CLAUDE_CODE_OAUTH_TOKEN)='` — if either is present, skip auth entirely.
+2. **User has an Anthropic Console workspace** (API billing, no subscription) → API key path.
+3. **User has a Claude Pro/Max/Team/Enterprise subscription** → OAuth token path.
+4. **User is unsure** → ask which kind of Claude account they have. Both paths are now equally low-friction (one user action each — paste an API key, or run one command on their machine and paste the result), so the old "prefer API key when unsure" bias is gone. Pick by account shape, not by flow complexity.
+Both paths converge on the same final steps: read `.env`, merge one new `KEY=value` line, write back with the `nonWorkspaceWrite` guard ack, verify, and prompt the user to restart the container. Only the credential differs.
+### API key path
+1. Ask the user: "Paste your Anthropic API key (starts with `sk-ant-`) — or say 'cancel' to use OAuth instead."
+2. **Validate** the pasted value before writing: `/^sk-ant-[A-Za-z0-9_-]{20,}$/`. If it doesn't match, refuse and ask again — neither the guard nor the restart tool catches a malformed token.
+3. **Read** the existing `.env` first (if any). Parse it into a key→value map so you don't clobber unrelated entries.
+4. **Reconstruct** the full `.env` content with `ANTHROPIC_API_KEY=<value>` added or replaced.
+5. **Write** with `acknowledgeGuards: { nonWorkspaceWrite: true }`. `.env` is in the `nonWorkspaceWrite` guard's deny set; the call fails without the ack flag.
+6. **Verify** by re-reading the file.
+7. **Ask the user**: "Auth is on disk. The container needs to restart to load it (TUI will briefly disconnect). May I restart now, or do you have other changes to make first?"
+8. On yes → call the `restart` tool. On no → tell them to run `typeclaw restart` themselves when ready.
+### OAuth path
+The OAuth flow runs **on the user's own machine**, not inside the container. The user generates a long-lived `CLAUDE_CODE_OAUTH_TOKEN` with `claude setup-token` on whatever local machine they're already authenticated on, copies the printed token, and pastes it back to you. You write it to `.env` exactly like the API key path.
+Why this works: `claude setup-token` is Anthropic's documented path for "CI pipelines, scripts, or other environments where interactive browser login isn't available" ([code.claude.com/docs/en/authentication](https://code.claude.com/docs/en/authentication)). A typeclaw container is exactly that environment. The token is one-year-lived, authenticates against the user's Claude subscription, and is scoped to inference only — it can't establish Remote Control sessions or otherwise act outside of `claude` CLI calls.
+Do **not** run `claude setup-token` inside the container. The container has no browser, no display, and (for remote-host typeclaw deployments) is on a different machine from the user's browser anyway. The user's local machine already has `claude` installed for them to be a subscriber in the first place — they're the right place to run the one-off `setup-token` command.
+1. Confirm with the user: "Do you have the `claude` CLI installed on your local machine and are you signed in to it with your Claude Pro/Max/Team/Enterprise account? If not, install it from claude.com/code and `claude login` first."
+2. Once they confirm, instruct them: "Run `claude setup-token` on your machine. It opens a browser, you authorize, and the terminal prints a long token (looks like `sk-ant-oat01-...` or similar). Copy that token and paste it back to me. The token is long-lived (one year) and authenticates against your Claude subscription — keep it private."
+3. When they paste, **validate** before writing: `/^[A-Za-z0-9_-]{30,}$/`. Strip surrounding whitespace first. If it doesn't match (too short, contains slashes, looks like a URL or a sentence), refuse and ask again — the user may have pasted a partial copy or the wrong line.
+4. **Read** the existing `.env` first. Parse it into a key→value map.
+5. **Reconstruct** the full `.env` content with `CLAUDE_CODE_OAUTH_TOKEN=<value>` added or replaced.
+6. **Write** with `acknowledgeGuards: { nonWorkspaceWrite: true }`.
+7. **Verify** by re-reading the file.
+8. **Ask before restart** (same prompt as the API key path).
+9. On yes → call the `restart` tool. On no → `typeclaw restart` themselves when ready.
+The full validation rules, the failure modes on the user's side (their `claude` CLI is signed out, their `setup-token` command 401s, their subscription is expired), and the rationale for not doing the OAuth dance in-container are in `references/auth-flow.md`.
+### Cost-cap warning
+Interactive-mode Claude Code has **no built-in spend cap** — `--max-budget-usd` only works in `-p` mode, which is not what we use here. If the user is on the API-key path, recommend setting a workspace spend limit in the Anthropic Console; that's the only safety net. If they're on OAuth (subscription), usage is bounded by the subscription's monthly Agent SDK credit pool. Tell them once before the first delegation so it's not a surprise.
+## Prerequisites
+Before you spawn `claude` for any real work:
+- **`docker.file.claudeCode: true`** in `typeclaw.json`. Verify with `which claude`; if missing, the toggle isn't on. Tell the user to enable it and `typeclaw start --build`.
+- **`docker.file.tmux: true`** (default `true`, but check). Verify with `which tmux`.
+- **Auth set up** — see above. Verify with `env | grep -E '^(ANTHROPIC_API_KEY|CLAUDE_CODE_OAUTH_TOKEN)='`.
+- **Agent folder is a git repo.** Verify with `git -C /agent rev-parse --is-inside-work-tree`. The worktree model below requires it. If the user's agent folder somehow isn't a repo (rare — `typeclaw init` scaffolds one), tell them to `git init && git add -A && git commit -m "initial"` first.
+- **No uncommitted changes that you care about.** `git -C /agent status --porcelain` should be clean, or you should be willing to set the working tree aside before delegating. The worktree is a separate checkout, so claude can't see your uncommitted changes — meaning claude operates on the last committed state. If the user wants claude to work with in-progress edits, commit them first (even on a WIP branch).
+If any prerequisite is missing, stop and surface the gap to the user. Do not try to install `claude` yourself in the running container — the install belongs in the Dockerfile layer, not at runtime.
+## Create the worktree
+Each delegation runs inside a dedicated `git worktree` checkout under `/tmp/`. This is the load-bearing isolation that makes the rest of the skill safe:
+- **Claude can edit, commit, reset, run tests** — none of it touches the agent folder's live working tree or its main branch pointer.
+- **You get perfect introspection.** `git diff` between claude's branch and your main checkout shows exactly what claude changed; `git log` shows how it got there.
+- **Cleanup is bounded.** When you're done, you remove the worktree and its branch; nothing persists on disk except deliberately cherry-picked commits.
+- **The agent folder's `git status` stays clean during delegation** — the user can keep working on their own checkout while claude operates in parallel.
+### Setup
+Pick a task id (short hex string or `verb-noun` like `refactor-auth`) and create the worktree:
+```sh
+git -C /agent worktree add -b cc-<task-id> /tmp/cc-<task-id> HEAD
+cd /tmp/cc-<task-id>
+mkdir -p .claude
+```
+This creates:
+- A new branch `cc-<task-id>` rooted at the agent folder's current `HEAD`.
+- A new working tree at `/tmp/cc-<task-id>/` containing every file from that commit.
+- An entry in `/agent/.git/worktrees/cc-<task-id>/` that ties the two together.
+The worktree shares the agent folder's `.git` directory but has its own `HEAD`, index, and working tree. Branch state lives in `/agent/.git/refs/heads/cc-<task-id>` regardless of where the worktree itself lives on disk.
+Inside `/tmp/cc-<task-id>/`, write the per-task hook config (see "The Stop hook" below):
+```
+/tmp/cc-<task-id>/
+├── .claude/
+│   └── settings.json        # registers the Stop hook
+├── hook-on-stop.sh          # the hook script, chmod +x
+├── sentinel.json            # written by the hook (does not exist yet)
+└── .done                    # flag file (does not exist yet)
+└── ...                      # plus every file from the agent folder's HEAD
+```
+### Why `/tmp/`, not `workspace/`?
+`workspace/` is the agent folder's gitignored scratch zone — fine for one-off scripts. But a `git worktree` is a _checkout_, not scratch: it carries an index, refs in `/agent/.git/worktrees/`, and (briefly) shares working-tree state with the main checkout. Putting it under `workspace/` would mean the agent folder contains a worktree of itself, which works mechanically but is recursive and confusing (nested worktrees? infinite recursion if claude does `git status`?). `/tmp/cc-<id>/` keeps the worktree clearly outside the agent folder. It's also genuinely ephemeral — `/tmp/` is tmpfs-ish, survives container life but never enters git history or backups.
+## The Stop hook
+Claude Code fires a `Stop` hook every time it finishes responding — turn-end, not session-end. The hook runs an arbitrary shell command with the lifecycle event payload (JSON) on stdin. We use this as the done-signal: the hook writes the payload to `sentinel.json` and `touch`es `.done`, and your polling loop watches for `.done`.
+Minimum `/tmp/cc-<id>/.claude/settings.json`:
+```json
+{
+  "hooks": {
+    "Stop": [
+      {
+        "matcher": "*",
+        "hooks": [{ "type": "command", "command": "./hook-on-stop.sh" }]
+      }
+    ]
+  }
+}
+```
+Minimum `/tmp/cc-<id>/hook-on-stop.sh` (chmod +x):
+```sh
+#!/bin/sh
+# stdin carries the Stop event JSON; transcript_path points at the JSONL.
+cat > sentinel.json.tmp
+mv sentinel.json.tmp sentinel.json
+touch .done
+```
+The temp-file-then-rename keeps the read side from ever seeing a partial sentinel. The full schema of the Stop event (every field Claude Code populates, including `last_assistant_message` and `transcript_path`) is in `references/stop-hook.md`.
+## Driving the session
+The minimum protocol — translate to your actual tool calls:
+1. Create the worktree, write the hook config (above).
+2. `tmux new-session -d -s cc-<id> -c /tmp/cc-<id> claude`.
+3. Wait ~3 seconds for the TUI to initialize.
+4. `tmux send-keys -t cc-<id> "<your prompt>" Enter`.
+5. **Poll** for `/tmp/cc-<id>/.done` in a 500ms-cadence loop with a wall-clock budget (default 10 minutes). On every iteration, also check `tmux has-session -t cc-<id>` — if the session died, claude crashed or auth failed.
+6. When `.done` exists: `rm .done`, read `sentinel.json`, examine `last_assistant_message`.
+7. Decide using the multi-turn loop below.
+8. When done: `tmux send-keys -t cc-<id> "/exit" Enter && sleep 1 && tmux kill-session -t cc-<id>`.
+The full polling implementation, the ANSI-handling rules for `capture-pane` fallbacks, and the "tmux session died unexpectedly" recovery path are in `references/tmux-driving.md`.
+## The multi-turn decision loop
+`Stop` fires every turn — including turns where claude paused to ask you a question, not just turns where claude finished the task. After every Stop sentinel, read `last_assistant_message` and decide:
+- **Ends with a question mark, or contains "Do you want me to", "Should I", "Could you clarify"** → claude is asking a clarifying question. Compose an answer from the original task brief and `send-keys` it back. Reset the loop: `rm .done`, poll again.
+- **Mentions a permission-style ask** ("May I run `<command>`?", "Allow me to edit `<file>`?") → answer per the task's safety constraints. If the constraint is unclear, abort with `/exit` and surface to the user — never invent a yes/no on the user's behalf for an unbounded operation.
+- **Looks like a final result** (code block + summary, or "Done.", "Here's the result.", "I've finished") → capture and `/exit`.
+- **Looks like a status update mid-tool-use** ("Let me check…", "Reading the file now…") → this is a spurious Stop (a Claude turn-boundary that isn't real task progress). Just `rm .done` and keep polling.
+**Hard turn cap: 8 turns per delegation.** Beyond that, either the task is too complex to delegate cleanly or claude is stuck in a loop. Abort with `/exit`, capture what you have, surface to the user with: "Claude took 8 turns without finishing — here's what it produced, what do you want to do?"
+This loop is the most failure-prone part of the skill. If you find yourself uncertain whether a message is a question or a result, **default to surfacing to the user**, not to guessing. Wrong answers compound across turns.
+## Capturing the output
+Four sources, in order of preference:
+1. **`git diff /agent main..cc-<id>`** (run from `/agent`, or use the explicit worktree path). This is the killer feature of the worktree model — the exact set of changes claude made, branch-vs-branch. Use this for code-change tasks.
+2. **`git log cc-<id> --oneline main..cc-<id>`** for how claude got there (the sequence of commits). Useful when claude broke a refactor into steps you want to attribute or cherry-pick.
+3. **`sentinel.json` from the final turn** (`last_assistant_message`). The narrative summary claude gave you. Use this for analysis tasks where the answer is prose, not code.
+4. **The JSONL transcript** at `transcript_path` in the sentinel. The complete conversation including intermediate tool calls. Use when the diff/log aren't enough and you need to see how claude reasoned. Schema in `references/stop-hook.md`.
+For code-change tasks, the canonical pattern is:
+1. Read `last_assistant_message` for the summary.
+2. Run `git diff main..cc-<id> -- <files>` to see the actual changes.
+3. Decide: are these changes good? If yes, either `git cherry-pick <commits>` onto the agent folder's branch OR copy the changes manually into the main checkout and commit there with proper attribution (per `typeclaw-git`).
+4. Throw away the `cc-<id>` branch.
+Never paste Claude's output verbatim into your reply or a commit message. Summarize, attribute ("Claude Code's analysis: ..."), and stay accountable for the work. You delegated up; you didn't outsource ownership.
+## Cleanup discipline
+Cleanup is git-aware: a worktree isn't just a directory. Three steps, in order:
+```sh
+tmux kill-session -t cc-<id> 2>/dev/null || true
+git -C /agent worktree remove --force /tmp/cc-<id>
+git -C /agent branch -D cc-<id>
+```
+- **`tmux kill-session`** first because claude might still be holding files open. `|| true` because a clean `/exit` already killed the session.
+- **`git worktree remove --force`** because the working tree may have dirty files (the sentinel, the hook script, claude's in-progress edits). `--force` skips the "uncommitted changes" check; this is correct here because we're explicitly discarding the worktree.
+- **`git branch -D cc-<id>`** to delete the branch ref. Without this, `cc-<id>` lingers in `git branch -a` indefinitely. `-D` (capital) because `cc-<id>` is unmerged into anything you care about.
+Always do all three, including on failure paths. Orphan worktrees:
+- Show up in `git worktree list` forever.
+- Cause `git status` in the agent folder to mention "another worktree exists at /tmp/cc-<id>" if you `cd` somewhere related.
+- Make the next delegation with the same task-id fail with "branch already exists".
+Before starting a new delegation, check for orphans:
+```sh
+git -C /agent worktree list | grep cc-
+tmux ls 2>/dev/null | grep '^cc-'
+```
+Kill anything you find first.
+## When not to delegate
+A re-statement, because this is where the skill is most often misused:
+- **Trivial edits**: the round-trip cost dominates. Do it yourself.
+- **Tasks needing live user visibility**: claude's tool calls don't stream back through TypeClaw. The user sees a long pause, not progress. Use your own tools.
+- **Tasks where you don't have the context to brief claude**: spend tokens narrowing the problem first. A vague delegation produces a vague result.
+- **Anything secret beyond `ANTHROPIC_API_KEY`**: claude only sees the prompt you send it and the files in its worktree (which is everything at `HEAD`). Don't try to pass secrets through the prompt — they'll land in claude's transcript and in your sentinel.
+## Things you must not do
+- **Do not use `claude -p` for delegation work.** The headless print mode strips plan mode, sub-agents, and the agent loop. The whole reason to delegate up is the loop. If you find yourself reaching for `-p`, the right answer is probably "do it yourself".
+- **Do not run `claude` directly inside `/agent`.** Always inside `/tmp/cc-<id>/`. Running claude in the agent folder lets it mutate the live working tree and break the user's session in flight.
+- **Do not skip the worktree.** Even for short delegations, the worktree is what gives you the `git diff` introspection and the rollback safety. Skipping it because "this one's small" is the path to claude accidentally committing on the wrong branch.
+- **Do not share a tmux session across two delegated tasks.** Each task needs its own worktree, its own session, and its own `.claude/settings.json`. Sharing corrupts the sentinel state and crosses transcripts.
+- **Do not leave a tmux session, worktree, or branch alive after capturing the result.** All three need explicit teardown. Reusing them defeats the per-task isolation that makes the Stop hook reliable.
+- **Do not push claude's branch to a remote.** `cc-<id>` is throwaway. If something useful happened, cherry-pick onto a real branch first; don't push the experimental branch directly.
+- **Do not merge claude's branch into main without reviewing the diff.** The `git diff main..cc-<id>` is your review surface. Skipping the diff and merging blindly means you don't actually know what shipped.
+- **Do not commit `/tmp/cc-<id>/` artifacts back to the agent folder.** The sentinel, the hook script, the captured pane content are scratch — they live in `/tmp/`, they die with `worktree remove`.
+- **Do not paste Claude's output verbatim into a commit message or a user reply.** Summarize and attribute. You're accountable for the work you ship.
+- **Do not put `ANTHROPIC_API_KEY` or `CLAUDE_CODE_OAUTH_TOKEN` in `typeclaw.json`, in a prompt, or in any committed file.** They live in `.env`, which is gitignored. Period.
+- **Do not poll the JSONL transcript directly as the done-signal.** The JSONL has documented race conditions (the file can be stale when `Stop` fires, or occasionally missing entirely). The sentinel is the reliable signal; the JSONL is for content, not lifecycle.
+- **Do not write to `.env` without `acknowledgeGuards: { nonWorkspaceWrite: true }`.** The guard will refuse, the agent loop will retry the same broken write, and you'll waste tokens fighting the guard. The ack is required every write, not just the first one.
+- **Do not edit `.env` with the `edit` tool's patch semantics.** Use read-modify-write: read the whole file, reconstruct the new content, write the whole file. `.env` is a flat KV store; a fragile `oldText` match could corrupt unrelated lines.
+- **Do not run `claude setup-token` inside the container.** It's a TUI OAuth flow that wants a browser. The container has no display, no browser, and is often on a different machine from the user anyway. Always have the user run `setup-token` on their own machine and paste the resulting token back; never spawn it in tmux on this side.
+- **Do not echo, log, or transcribe the pasted `CLAUDE_CODE_OAUTH_TOKEN` value back to the user, into a sentinel, into a commit message, or into any message you send.** It's a one-year credential. Confirm receipt with "got it, validating" — never with the token itself.
+- **Do not invent answers to Claude's clarifying questions.** If you can't derive the answer from the original task brief, surface the question to the user. Wrong answers compound across multi-turn delegations.
+- **Do not exceed 8 turns per delegation.** Abort, capture what you have, surface. Long delegations almost always mean the task wasn't shaped right.
+- **Do not assume `claude` exists.** If `which claude` returns empty, the `docker.file.claudeCode` toggle isn't on. Tell the user, don't try to install it yourself.
+## Cross-references
+- **`references/auth-flow.md`** — both auth paths in detail: the API-key recap, the OAuth user-machine flow (what to tell the user, what their `claude setup-token` output looks like, validation rules), and the failure-mode catalogue (expired subscription, wrong account, malformed paste).
+- **`references/tmux-driving.md`** — full polling implementation, ANSI handling, session-died recovery, the `capture-pane` fallback details, the worktree-is-not-scratch distinction.
+- **`references/stop-hook.md`** — complete `Stop` event JSON schema, `SubagentStop` differences, transcript JSONL schema (unofficial but reverse-engineered), documented race conditions to handle.
+- **`typeclaw-config`** — the `docker.file.claudeCode` toggle that gates the install.
+- **`typeclaw-git`** — commit discipline for any cherry-picks or hand-copies from claude's worktree back into the agent folder.
+- **`typeclaw-monorepo`** — the `workspace/` vs `packages/` distinction (this skill uses `/tmp/`, not `workspace/`, for reasons explained above).