npm - typeclaw - Versions diffs - 0.14.0 → 0.15.1 - Mend

typeclaw 0.14.0 → 0.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/package.json +2 -2
package/src/agent/system-prompt.ts +10 -9
package/src/agent/tools/channel-reply.ts +37 -27
package/src/agent/tools/channel-send.ts +13 -8
package/src/agent/tools/runtime-notice.ts +28 -0
package/src/agent/tools/webfetch/tool.ts +1 -0
package/src/agent/tools/websearch.ts +2 -1
package/src/channels/adapters/discord-bot.ts +8 -1
package/src/channels/adapters/kakaotalk-format.ts +239 -0
package/src/channels/adapters/kakaotalk.ts +54 -5
package/src/channels/adapters/telegram-bot.ts +11 -1
package/src/channels/router.ts +204 -21
package/src/channels/types.ts +22 -0
package/src/cli/inspect.ts +29 -25
package/src/config/providers.ts +17 -4
package/src/container/start.ts +17 -0
package/src/init/dockerfile.ts +21 -1
package/src/inspect/live.ts +13 -3
package/src/sandbox/availability.ts +35 -0
package/src/sandbox/build.ts +128 -0
package/src/sandbox/errors.ts +20 -0
package/src/sandbox/index.ts +14 -0
package/src/sandbox/policy.ts +47 -0
package/src/sandbox/quote.ts +18 -0
package/src/server/index.ts +16 -2
package/src/shared/index.ts +1 -7
package/src/shared/local-time.ts +14 -22
package/src/shared/protocol.ts +4 -0
package/src/skills/typeclaw-channel-kakaotalk/SKILL.md +11 -9
package/typeclaw.schema.json +2 -0

package/src/channels/router.ts CHANGED Viewed

@@ -45,7 +45,9 @@ import type {
   InboundMessage,
   OutboundCallback,
   OutboundMessage,
+  QuoteAnchorSource,
   ResolvedChannelNames,
+  SendErrorCode,
   SendResult,
   TypingCallback,
 } from './types'
@@ -98,6 +100,35 @@ export const SESSION_GC_INTERVAL_MS = 60 * 1000
 // Enforced inside router.send for `source: 'tool'` callers; system
 // recovery paths (`source: 'system'`) bypass.
 export const MAX_CHANNEL_SENDS_PER_TURN = 10
+// Ceiling on tool-source channel sends that a same-turn router policy DENIED
+// without delivering — `skip-locked`, `turn-cap`, or `duplicate`. Such denials
+// return a soft error and do NOT increment `consecutiveSends`, so a model that
+// ignores the denial and retries never trips `MAX_CHANNEL_SENDS_PER_TURN`.
+// Both production livelocks had this shape: the model alternated a no-op
+// `skip_response` with a denied `channel_reply` (~200-400x in one
+// `session.prompt()`) — the interleaving defeated the byte-identical
+// loop-guard's 5-in-a-row streak, and the denials bypassed the send cap. One
+// turn was all `skip-locked`, the other all `duplicate` (byte-identical text).
+// Past this ceiling we ABORT the run's AbortSignal (`agent.abort()`), which
+// ends the turn on the next assistant stream. We can't just throw: the pi tool
+// executor catches a tool's throw into an error result and the turn continues.
+// Counted per send-target and only when NO concurrent reservation for that
+// target is in flight, so a legitimate parallel send-burst (one winner + many
+// same-tick duplicate/cap denials) is never mistaken for a loop. Reset at turn
+// start alongside `turnSeq`.
+export const MAX_POLICY_DENIED_CHANNEL_SENDS_PER_TURN = 3
+// Per-request output-token cap for channel sessions, threaded into the agent's
+// stream options to override pi-ai's silent `Math.min(model.maxTokens, 32000)`
+// default (`buildBaseOptions` in @mariozechner/pi-ai). Without it, Fireworks'
+// kimi-k2p6-turbo — which degenerates into single-token repetition on the
+// post-tool follow-up turn — runs the full 32000 tokens (~116s of garbage that
+// never produces a reply) before `stopReason: 'length'`. The terminal-reply
+// hook below removes the turn that triggers this; the cap bounds any other path
+// that still reaches a channel LLM call. 4096 fits a thinking block plus a
+// nontrivial reply (healthy channel turns observed at ~317 output tokens
+// including reasoning). Deliberately NOT lowered in `providers.ts`, where
+// `maxTokens` is the model's true capability that compaction math reads.
+export const CHANNEL_MAX_OUTPUT_TOKENS = 4096
 // Rolling window for outbound send-rate telemetry. 5s matches Discord's
 // rate-limit shape (5 msg / 5 s / channel) and comfortably covers Slack's
 // 1 msg/s sustained. The window is observational; exceeding the burst
@@ -347,6 +378,19 @@ type LiveSession = {
   // regardless of which order the model tried them in. Updated only at
   // turn start; reads against the live counter elsewhere are intentional.
   successfulSendsAtTurnStart: number
+  // Per-send-target count of tool-source sends with a reservation currently
+  // in flight (slot reserved, outbound callback not yet settled). Lets the
+  // policy-denial guard tell a legitimate parallel send-burst (denials that
+  // race a still-in-flight winner) from a sequential retry loop (denials with
+  // nothing in flight). Incremented at reservation, decremented in the
+  // callback-loop `finally` so an adapter throw can't strand a target.
+  inFlightToolSends: Map<string, number>
+  // Per-send-target count of policy-denied tool sends this turn that did NOT
+  // race an in-flight reservation. Drives the throw at
+  // `MAX_POLICY_DENIED_CHANNEL_SENDS_PER_TURN` that breaks the alternating-tool
+  // livelock the byte-identical loop-guard misses. Reset at turn start and
+  // cleared per-target on a successful delivery to that target.
+  policyDeniedToolSendsThisTurn: Map<string, number>
   // Stamped by `markTurnSkipped` (called from the `skip_response` tool)
   // with the current `turnSeq`. Read at the top of `validateChannelTurn`:
   // if it matches the just-completed turn, recovery is skipped entirely
@@ -1011,6 +1055,8 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
         successfulChannelSends: 0,
         turnSeq: 0,
         successfulSendsAtTurnStart: 0,
+        inFlightToolSends: new Map(),
+        policyDeniedToolSendsThisTurn: new Map(),
         skippedTurn: null,
         pendingQuoteCandidate: null,
         recentEngagedPeerBotTurns: [],
@@ -1025,6 +1071,8 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
         logger.error(`[channels] ${live.keyId}: LLM call failed: ${err.message}`)
       })
       live.unsubTypingActivity = subscribeTypingActivity(created.session, live)
+      installChannelReplyTerminalHook(live)
+      installChannelOutputCap(live)
       liveSessions.set(keyId, live)
       if (isColdStart) {
@@ -1182,6 +1230,54 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
     })
   }
+  // After a successful `channel_reply`, the model has delivered its user-facing
+  // response and the turn is semantically done. pi-agent-core's loop, however,
+  // unconditionally makes one more LLM call after any tool result (the
+  // "post-tool follow-up") to let multi-step tool chains continue. On a turn
+  // that ended with `channel_reply` there is nothing left to say, and Fireworks'
+  // kimi-k2p6-turbo degenerates that empty follow-up into a 32000-token
+  // repetition loop (see CHANNEL_MAX_OUTPUT_TOKENS). Aborting the run's signal
+  // from `afterToolCall` — which runs during tool execution, before the loop
+  // re-enters the LLM stream — makes the follow-up stream observe an already-
+  // aborted signal and return `stopReason: 'aborted'` without generating. This
+  // is the same `agent.abort()` lever the policy-denied-send cap uses; the
+  // tool's own result is already persisted, so the reply still lands.
+  //
+  // Scope is deliberately narrow: only `channel_reply` (the current-chat user-
+  // facing response), only on success, and only for channel sessions. Read-only
+  // tools and `channel_send` must keep the follow-up so genuine multi-step turns
+  // continue. A prior non-typeclaw `afterToolCall` (none today) would be
+  // composed, not clobbered.
+  const installChannelReplyTerminalHook = (live: LiveSession): void => {
+    const { agent } = live.session
+    const prior = agent.afterToolCall
+    agent.afterToolCall = async (context, signal) => {
+      const result = prior ? await prior(context, signal) : undefined
+      const succeeded =
+        context.toolCall.name === 'channel_reply' &&
+        !context.isError &&
+        (context.result.details as { ok?: unknown } | undefined)?.ok === true
+      if (succeeded && agent.signal?.aborted !== true) {
+        logger.info(`[channels] ${live.keyId} terminal_after_channel_reply`)
+        agent.abort()
+      }
+      return result
+    }
+  }
+  // Override pi-ai's hidden `Math.min(model.maxTokens, 32000)` output cap for
+  // channel sessions by threading an explicit `maxTokens` into every stream
+  // call. See CHANNEL_MAX_OUTPUT_TOKENS for why. Composes the existing streamFn
+  // (pi's default `streamSimple` unless a proxy was installed) and only fills
+  // `maxTokens` when the caller left it unset, so an explicit per-call value
+  // still wins.
+  const installChannelOutputCap = (live: LiveSession): void => {
+    const { agent } = live.session
+    const inner = agent.streamFn
+    agent.streamFn = (model, context, options) =>
+      inner(model, context, { ...options, maxTokens: options?.maxTokens ?? CHANNEL_MAX_OUTPUT_TOKENS })
+  }
   const startTypingHeartbeat = (live: LiveSession): void => {
     if (live.typingTimedOut || live.typingStopPromise) return
     if (live.destroyed) return
@@ -1370,6 +1466,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
         const successfulSendsBeforePrompt = live.successfulChannelSends
         live.turnSeq++
         live.successfulSendsAtTurnStart = successfulSendsBeforePrompt
+        live.policyDeniedToolSendsThisTurn.clear()
         await fireSessionTurnStart(live, text)
         try {
           await live.session.prompt(text)
@@ -1426,13 +1523,21 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
     }, wait)
   }
-  const publishInbound = (event: InboundMessage, decision: 'engage' | 'observe' | 'denied' | 'claim'): void => {
+  const publishInbound = (
+    event: InboundMessage,
+    decision: 'engage' | 'observe' | 'denied' | 'claim',
+    // Undefined before a session exists (denied/claim intercepts). Carried so a
+    // session-scoped `typeclaw inspect` only sees its own session's inbounds —
+    // the broadcast otherwise fans out to every inspect client.
+    sessionId?: string,
+  ): void => {
     if (stream === undefined) return
     try {
       stream.publish({
         target: { kind: 'broadcast' },
         payload: {
           kind: 'channel-inbound',
+          ...(sessionId !== undefined ? { sessionId } : {}),
           adapter: event.adapter,
           workspace: event.workspace,
           chat: event.chat,
@@ -1569,7 +1674,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
     })
     if (decision === 'observe') {
-      publishInbound(event, 'observe')
+      publishInbound(event, 'observe', live.sessionId)
       // Log every observe so an unanswered mention is diagnosable from logs
       // alone instead of "routed but no prompting" silence. The bracketed
       // shape mirrors `prompting batch=` so log scraping can pair them.
@@ -1578,7 +1683,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
       return
     }
-    publishInbound(event, 'engage')
+    publishInbound(event, 'engage', live.sessionId)
     updateLoopGuard(live, event)
@@ -1875,7 +1980,12 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
     if (live && source === 'tool' && live.pendingQuoteCandidate !== null) {
       const quoteCandidate = refreshQuoteCandidate(live.pendingQuoteCandidate, live.contextBuffer)
       const anchor = decideQuoteAnchor(quoteCandidate, now(), options.configForAdapter(msg.adapter))
-      if (anchor !== null) msg = { ...msg, text: prependQuoteAnchor(msg.text ?? '', anchor) }
+      if (anchor !== null) {
+        msg =
+          resolveReplyRenderMode(msg) === 'native'
+            ? { ...msg, replyTo: { externalMessageId: anchor.externalMessageId, source: anchor.source } }
+            : { ...msg, text: prependQuoteAnchor(msg.text ?? '', anchor.source) }
+      }
       live.pendingQuoteCandidate = null
     }
     const text = normalizeSendText(msg.text)
@@ -1892,19 +2002,52 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
     let priorLastSentText: string | undefined
     let reserved = false
     if (live && source === 'tool') {
+      // Every same-turn policy denial (skip-locked / turn-cap / duplicate)
+      // returns a soft error and does NOT increment `consecutiveSends`, so a
+      // model that ignores the denial and retries never trips the send cap. To
+      // bound that loop we route all three through one tally that ABORTS the run
+      // past the ceiling. The discriminator that keeps legitimate parallel
+      // send-bursts soft: a denial only counts when NO reservation for the same
+      // target is in flight. In a `Promise.all` burst the synchronous denials
+      // all race the one in-flight winner, so they don't count; a sequential
+      // retry loop has nothing in flight, so it does. See
+      // `MAX_POLICY_DENIED_CHANNEL_SENDS_PER_TURN`.
+      //
+      // Why abort, not throw: pi-agent-core's tool executor catches a throw
+      // from a tool's execute() and converts it into an `isError` tool result —
+      // the turn would continue and the model could retry. The only thing that
+      // actually ends an in-flight turn is aborting the run's AbortSignal:
+      // `agent.abort()` flips it synchronously, then the NEXT assistant stream
+      // (after this tool returns) sees the aborted signal and ends the turn with
+      // stopReason 'aborted'. We must NOT call `session.abort()` here — it
+      // `await`s `waitForIdle()`, which would deadlock waiting for the very run
+      // this tool call belongs to. `agent.abort()` is the signal-only,
+      // non-blocking variant. We still return the soft denial for this call.
+      const denyPolicyToolSend = (error: string, code: SendErrorCode): SendResult => {
+        if ((live.inFlightToolSends.get(sendKey) ?? 0) > 0) {
+          return { ok: false, error, code }
+        }
+        const count = (live.policyDeniedToolSendsThisTurn.get(sendKey) ?? 0) + 1
+        live.policyDeniedToolSendsThisTurn.set(sendKey, count)
+        if (count >= MAX_POLICY_DENIED_CHANNEL_SENDS_PER_TURN) {
+          logger.warn(`[channels] ${live.keyId}: aborting turn — ${count} policy-denied channel sends (last: ${code})`)
+          if (live.session.agent.signal?.aborted !== true) live.session.agent.abort()
+        }
+        return { ok: false, error, code }
+      }
       // Tool-source send after `skip_response` for the same turn is a contract
       // violation: the model already committed to silence. Reject before any
       // state mutation so the model gets a clear error and the channel stays
       // silent. System-source sends (recovery, role-claim) are not affected.
       if (live.skippedTurn !== null && live.skippedTurn.turnSeq === live.turnSeq) {
-        return { ok: false, error: SKIP_RESPONSE_LOCK_ERROR, code: 'skip-locked' }
+        return denyPolicyToolSend(SKIP_RESPONSE_LOCK_ERROR, 'skip-locked')
       }
       const currentCount = live.consecutiveSends.get(sendKey) ?? 0
       if (currentCount >= MAX_CHANNEL_SENDS_PER_TURN) {
-        return { ok: false, error: TURN_CAP_ERROR, code: 'turn-cap' }
+        return denyPolicyToolSend(TURN_CAP_ERROR, 'turn-cap')
       }
       if (text !== undefined && live.lastSentText.get(sendKey) === text) {
-        return { ok: false, error: DUPLICATE_SEND_ERROR, code: 'duplicate' }
+        return denyPolicyToolSend(DUPLICATE_SEND_ERROR, 'duplicate')
       }
       // Reserve the slot before awaiting. If the callback rejects we roll
       // back below; if it succeeds we keep the increment. The slot reserve
@@ -1915,6 +2058,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
       priorLastSentText = live.lastSentText.get(sendKey)
       live.consecutiveSends.set(sendKey, currentCount + 1)
       if (text !== undefined) live.lastSentText.set(sendKey, text)
+      live.inFlightToolSends.set(sendKey, (live.inFlightToolSends.get(sendKey) ?? 0) + 1)
       reserved = true
     }
@@ -1924,13 +2068,24 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
     const snapshot = Array.from(callbacks)
     let lastError: string | undefined
     let delivered = false
-    for (const cb of snapshot) {
-      const result = await cb(msg)
-      if (result.ok) {
-        delivered = true
-        break
+    try {
+      for (const cb of snapshot) {
+        const result = await cb(msg)
+        if (result.ok) {
+          delivered = true
+          break
+        }
+        lastError = result.error
+      }
+    } finally {
+      // Clear the in-flight reservation even if a callback threw, so a flaky
+      // adapter can never strand a target as permanently "in flight" and
+      // disable the policy-denial guard for it.
+      if (live && reserved) {
+        const inFlight = (live.inFlightToolSends.get(sendKey) ?? 1) - 1
+        if (inFlight <= 0) live.inFlightToolSends.delete(sendKey)
+        else live.inFlightToolSends.set(sendKey, inFlight)
       }
-      lastError = result.error
     }
     if (!delivered) {
@@ -1950,6 +2105,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
     if (live) {
       live.successfulChannelSends++
+      live.policyDeniedToolSendsThisTurn.delete(sendKey)
       // Don't stop the heartbeat here: the agent may still be mid-turn and
       // about to send another reply. drain()'s finally block owns turn-end
       // stop. But Slack's adapter outbound callback explicitly clears
@@ -2480,12 +2636,7 @@ function formatAuthorLine(
   return `${stamp}${formatAuthorReference(adapter, authorId, authorName)} (${authorName})${tag}: ${text}`
 }
-export type QuoteAnchorSource = {
-  adapter: AdapterId
-  authorId: string
-  authorName: string
-  text: string
-}
+export type { QuoteAnchorSource } from './types'
 // Picks the right author syntax for the platform so prompts and rendered
 // quote anchors use the same form the user would type in that channel.
@@ -2557,6 +2708,7 @@ type QuoteAnchorBatchEntry = {
   authorName: string
   authorIsBot: boolean
   receivedAt: number
+  externalMessageId: string
 }
 type QuoteAnchorObservedEntry = {
@@ -2566,10 +2718,18 @@ type QuoteAnchorObservedEntry = {
 export type QuoteAnchorCandidate = {
   source: QuoteAnchorSource
+  // Native id of the primary inbound, so a native-reply adapter can point at
+  // the exact message; the blockquote fallback ignores it.
+  externalMessageId: string
   primaryReceivedAt: number
   hadInterveningObserved: boolean
 }
+export type QuoteAnchorTarget = {
+  source: QuoteAnchorSource
+  externalMessageId: string
+}
 // Strips both current `[<Adapter> attachment #N: ...]` and legacy
 // `[<Adapter> message with ...]` placeholders that adapter
 // classifiers synthesize for non-text inbounds (KakaoTalk stickers,
@@ -2620,6 +2780,7 @@ export function captureQuoteCandidate(
   if (cleaned === '') return null
   return {
     source: { adapter, authorId: primary.authorId, authorName: primary.authorName, text: cleaned },
+    externalMessageId: primary.externalMessageId,
     primaryReceivedAt: primary.receivedAt,
     hadInterveningObserved: hasInterveningObserved(primary.receivedAt, observed),
   }
@@ -2647,12 +2808,34 @@ export function decideQuoteAnchor(
   candidate: QuoteAnchorCandidate | null,
   _nowMs: number,
   adapterConfig: ChannelAdapterConfig | undefined,
-): QuoteAnchorSource | null {
+): QuoteAnchorTarget | null {
   if (candidate === null) return null
   const config = adapterConfig?.quotedReply
   if (config !== undefined && config.enabled === false) return null
   if (!candidate.hadInterveningObserved) return null
-  return candidate.source
+  return { source: candidate.source, externalMessageId: candidate.externalMessageId }
+}
+export type ReplyRenderMode = 'native' | 'quote'
+// Per-adapter, per-shape decision: can this exact outbound carry a native
+// platform reply, or must it degrade to the blockquote fallback? Conditional
+// because native support is not uniform within an adapter — Telegram's
+// `sendMessage` accepts `reply_to_message_id` but `sendDocument` does not, so
+// an attachment-only Telegram reply must quote; the same text-only restriction
+// holds for Discord (`message_reference` rides on the text send, file uploads
+// land bare) and KakaoTalk. Slack's primitive is `thread`, not a per-message
+// reply, so it stays quote; GitHub's PR-review reply already rides on `thread`.
+//
+// KakaoTalk is `native` here even though its reply payload can fail to resolve
+// at send time — the adapter degrades to the blockquote fallback itself using
+// `replyTo.source`, so the router still routes it down the native branch.
+const NATIVE_REPLY_TEXT_ADAPTERS = new Set<AdapterId>(['telegram-bot', 'discord-bot', 'kakaotalk'])
+export function resolveReplyRenderMode(msg: OutboundMessage): ReplyRenderMode {
+  const hasText = normalizeSendText(msg.text) !== undefined
+  if (hasText && NATIVE_REPLY_TEXT_ADAPTERS.has(msg.adapter)) return 'native'
+  return 'quote'
 }
 type Sliced = { kind: 'message'; message: ChannelHistoryMessage } | { kind: 'elision'; elidedCount: number }

package/src/channels/types.ts CHANGED Viewed

@@ -126,6 +126,28 @@ export type OutboundMessage = {
   // `uploadFile` does not accept a content body or a thread id, see the
   // adapter for the workaround details.
   attachments?: OutboundAttachment[]
+  // Set by the router (native render mode + anchor fired) so an adapter can
+  // reply to the inbound it answers. Telegram/Discord consume `externalMessageId`;
+  // `quote`-mode adapters never see this (the router prepends the blockquote into
+  // `text` instead). `source` lets an adapter whose native primitive can fail at
+  // send time (KakaoTalk: payload built from a source message that may have
+  // scrolled out of history) degrade to the same blockquote fallback.
+  replyTo?: OutboundReplyTo
+}
+export type OutboundReplyTo = {
+  externalMessageId: string
+  source?: QuoteAnchorSource
+}
+// `adapter` selects the per-platform author-mention syntax in the blockquote
+// fallback. Lives here (not router.ts) so adapters can reconstruct a native
+// reply payload from the same shape the router renders quotes from.
+export type QuoteAnchorSource = {
+  adapter: AdapterId
+  authorId: string
+  authorName: string
+  text: string
 }
 export type SendErrorCode =

package/src/cli/inspect.ts CHANGED Viewed

@@ -49,31 +49,35 @@ export const inspectCommand = defineCommand({
     const escListener = isJson ? null : createEscListener()
     const liveHint = escListener === null ? undefined : escHintLine(color)
-    const result = await runInspectLoop({
-      agentDir: cwd,
-      ...(sessionArg !== undefined ? { sessionIdOrPrefix: sessionArg } : {}),
-      ...(filterArg !== undefined ? { filter: filterArg } : {}),
-      ...(sinceArg !== undefined ? { since: sinceArg } : {}),
-      json: isJson,
-      color,
-      selectSession: (sessions, selectOpts) => {
-        escListener?.pause()
-        return clackSelect(sessions, selectOpts?.initialSessionId).finally(() => {
-          escListener?.resume()
-        })
-      },
-      ...(liveSource !== undefined ? { liveSource } : {}),
-      signal,
-      newEscSignal: () => {
-        if (escListener === null) return new AbortController().signal
-        return escListener.armForStream()
-      },
-      ...(liveHint !== undefined ? { liveHint } : {}),
-      stdout: (line) => process.stdout.write(`${line}\n`),
-      stderr: (line) => process.stderr.write(`${line}\n`),
-    })
-    escListener?.stop()
+    // try/finally so a thrown loop never leaves the terminal stuck in raw mode.
+    let result: Awaited<ReturnType<typeof runInspectLoop>>
+    try {
+      result = await runInspectLoop({
+        agentDir: cwd,
+        ...(sessionArg !== undefined ? { sessionIdOrPrefix: sessionArg } : {}),
+        ...(filterArg !== undefined ? { filter: filterArg } : {}),
+        ...(sinceArg !== undefined ? { since: sinceArg } : {}),
+        json: isJson,
+        color,
+        selectSession: (sessions, selectOpts) => {
+          escListener?.pause()
+          return clackSelect(sessions, selectOpts?.initialSessionId).finally(() => {
+            escListener?.resume()
+          })
+        },
+        ...(liveSource !== undefined ? { liveSource } : {}),
+        signal,
+        newEscSignal: () => {
+          if (escListener === null) return new AbortController().signal
+          return escListener.armForStream()
+        },
+        ...(liveHint !== undefined ? { liveHint } : {}),
+        stdout: (line) => process.stdout.write(`${line}\n`),
+        stderr: (line) => process.stderr.write(`${line}\n`),
+      })
+    } finally {
+      escListener?.stop()
+    }
     if (!result.ok) {
       process.stderr.write(`${errorLine(result.reason)}\n`)

package/src/config/providers.ts CHANGED Viewed

@@ -197,10 +197,11 @@ export const KNOWN_PROVIDERS = {
   // anthropic`) before relying on the env-var path. Same rule applies to any
   // future dual-auth provider — keep the surprise in mind when expanding.
   //
-  // Model lineup is the current GA tier as of 2026-04-16: Opus 4.7 (top,
-  // released Apr 16 2026), Sonnet 4.6 (mid, Feb 5 2026), Haiku 4.5 (fast,
-  // Oct 1 2025). Anthropic's own model overview lists these three as the
-  // current recommended set and flags earlier Opus/Sonnet variants with
+  // Model lineup is the current GA tier as of 2026-05-29: Opus 4.8 (top,
+  // released May 2026), Opus 4.7 (prior top, Apr 16 2026), Sonnet 4.6 (mid,
+  // Feb 5 2026), Haiku 4.5 (fast, Oct 1 2025). Anthropic's own model overview
+  // lists the latest Opus/Sonnet/Haiku as the current recommended set and
+  // flags earlier Opus/Sonnet variants with
   // "Consider migrating to current models." Opus 4 / Sonnet 4 are deprecated
   // (retirement: Jun 15 2026); the 4.5/4.6 alternates remain Active but are
   // not the recommended path.
@@ -276,6 +277,18 @@ export const KNOWN_PROVIDERS = {
         contextWindow: 1000000,
         maxTokens: 128000,
       },
+      'claude-opus-4-8': {
+        id: 'claude-opus-4-8',
+        name: 'Claude Opus 4.8',
+        api: 'anthropic-messages',
+        provider: 'anthropic',
+        baseUrl: 'https://api.anthropic.com',
+        reasoning: true,
+        input: ['text', 'image'],
+        cost: { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 },
+        contextWindow: 1000000,
+        maxTokens: 128000,
+      },
     },
   },
   fireworks: {

package/src/container/start.ts CHANGED Viewed

@@ -464,12 +464,29 @@ export async function planStart({
   // misattribute to bot detection. 2g matches the Playwright/Puppeteer
   // canonical recommendation and is a memory cap, not an allocation (only
   // used pages count against the host).
+  // `seccomp=unconfined` lets `bwrap(1)` (installed in baseline; see
+  // BASELINE_APT_PACKAGES in src/init/dockerfile.ts) create user/pid/mount
+  // namespaces from inside the container. Docker's default seccomp profile
+  // rejects `unshare(CLONE_NEWUSER)` and `clone(CLONE_NEWUSER)` for
+  // non-privileged containers, which is the right default for multi-tenant
+  // hosts (Kubernetes nodes, CI runners) but wrong for typeclaw: the outer
+  // container is a single-tenant trust boundary — the user trusts everything
+  // inside it equally, the .env and agent folder are already mounted in —
+  // so the multi-tenant protections seccomp adds are not load-bearing for
+  // typeclaw's threat model. The per-tool sandbox bwrap builds for subagents
+  // IS the real boundary against prompt-injected commands; that boundary is
+  // what `--security-opt seccomp=unconfined` exists to enable. See
+  // `docs/internals/sandbox.mdx` for the full rationale including why
+  // `--cap-add=SYS_ADMIN` was rejected as an alternative (narrower in
+  // syscalls but strictly worse in capability semantics).
   const runArgs = [
     'run',
     '-d',
     '--name',
     containerName,
     '--shm-size=2g',
+    '--security-opt',
+    'seccomp=unconfined',
     '-p',
     `${publishHost}:${hostPort}:${CONTAINER_PORT}`,
   ]

package/src/init/dockerfile.ts CHANGED Viewed

@@ -38,7 +38,27 @@ export type BuildDockerfileOptions = {
 // self-heals: it spawns Xvfb (and exports DISPLAY) if the binary is on
 // PATH, and execs the agent directly otherwise. See APT_FEATURES.xvfb
 // below and `buildEntrypointShim`.
-const BASELINE_APT_PACKAGES = ['git', 'ca-certificates', 'curl', 'gnupg', 'iptables', 'util-linux'] as const
+// `bubblewrap` ships the `bwrap(1)` setuid-less namespace sandboxer. It is
+// included in baseline (not behind a toggle) because per-tool sandboxing of
+// agent bash calls is a runtime concern resolved by the agent, not by the
+// agent author. See `src/sandbox/` for the bwrap command builder, and
+// `docs/internals/sandbox.mdx` for why bwrap is the right
+// shape for per-call isolation inside an already-containerized agent. The
+// outer container's `--security-opt seccomp=unconfined` (added in the same
+// commit as this line; see `src/container/start.ts:planStart`) is what lets
+// bwrap create user/pid/mount namespaces from inside Docker. Without that
+// flag the seccomp default profile blocks `unshare(CLONE_NEWUSER)` and bwrap
+// fails at startup. The two changes are load-bearing together — do not drop
+// one without the other.
+const BASELINE_APT_PACKAGES = [
+  'git',
+  'ca-certificates',
+  'curl',
+  'gnupg',
+  'iptables',
+  'util-linux',
+  'bubblewrap',
+] as const
 // curl-impersonate is the only currently-working way to query DuckDuckGo from
 // a non-browser client on residential IPs in 2026. DDG fingerprints incoming

package/src/inspect/live.ts CHANGED Viewed

@@ -63,9 +63,17 @@ export async function* streamLive(opts: StreamLiveOptions): AsyncGenerator<Inspe
     }
   })
-  const onOpen = new Promise<void>((resolve, reject) => {
-    ws.addEventListener('open', () => resolve(), { once: true })
+  // Settle on open OR on any terminal condition (error/close/abort). Resolving
+  // false here is what unblocks the connect gate when esc aborts mid-connect —
+  // otherwise `await onOpen` would hang forever and freeze the inspect CLI.
+  const onOpen = new Promise<boolean>((resolve, reject) => {
+    ws.addEventListener('open', () => resolve(true), { once: true })
     ws.addEventListener('error', () => reject(new Error('websocket connection failed')), { once: true })
+    ws.addEventListener('close', () => resolve(false), { once: true })
+    if (opts.signal !== undefined) {
+      if (opts.signal.aborted) resolve(false)
+      else opts.signal.addEventListener('abort', () => resolve(false), { once: true })
+    }
   })
   ws.addEventListener('close', () => {
     closed = true
@@ -96,12 +104,14 @@ export async function* streamLive(opts: StreamLiveOptions): AsyncGenerator<Inspe
     }
   }
+  let opened: boolean
   try {
-    await onOpen
+    opened = await onOpen
   } catch (err) {
     closed = true
     throw err
   }
+  if (!opened || closed || opts.signal?.aborted === true) return
   const subscribe: InspectClientMessage = {
     type: 'subscribe',

package/src/sandbox/availability.ts ADDED Viewed

@@ -0,0 +1,35 @@
+import { SandboxUnavailableError } from './errors'
+// Cached because the binary cannot appear or disappear during a single
+// process lifetime, and a probe per bash call is wasted work. Keyed by the
+// resolved bwrap path so a test (or a consumer pinning a non-default path)
+// re-probes instead of reading another path's cached result.
+const availabilityCache = new Map<string, boolean>()
+export async function ensureBwrapAvailable(options?: { bwrapPath?: string }): Promise<void> {
+  const bwrap = options?.bwrapPath ?? 'bwrap'
+  const cached = availabilityCache.get(bwrap)
+  if (cached === true) return
+  if (cached === false) throw new SandboxUnavailableError()
+  const available = await probe(bwrap)
+  availabilityCache.set(bwrap, available)
+  if (!available) throw new SandboxUnavailableError()
+}
+async function probe(bwrap: string): Promise<boolean> {
+  // Bun.spawn throws synchronously with ENOENT when the binary is not on
+  // PATH, rather than resolving with a non-zero exit code — so the
+  // "not installed" case lands in the catch, not in proc.exitCode.
+  try {
+    const proc = Bun.spawn([bwrap, '--version'], { stdout: 'ignore', stderr: 'ignore' })
+    await proc.exited
+    return proc.exitCode === 0
+  } catch {
+    return false
+  }
+}
+export function _resetBwrapAvailabilityCacheForTests(): void {
+  availabilityCache.clear()
+}