typeclaw 0.14.0 → 0.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -45,7 +45,9 @@ import type {
45
45
  InboundMessage,
46
46
  OutboundCallback,
47
47
  OutboundMessage,
48
+ QuoteAnchorSource,
48
49
  ResolvedChannelNames,
50
+ SendErrorCode,
49
51
  SendResult,
50
52
  TypingCallback,
51
53
  } from './types'
@@ -98,6 +100,35 @@ export const SESSION_GC_INTERVAL_MS = 60 * 1000
98
100
  // Enforced inside router.send for `source: 'tool'` callers; system
99
101
  // recovery paths (`source: 'system'`) bypass.
100
102
  export const MAX_CHANNEL_SENDS_PER_TURN = 10
103
+ // Ceiling on tool-source channel sends that a same-turn router policy DENIED
104
+ // without delivering — `skip-locked`, `turn-cap`, or `duplicate`. Such denials
105
+ // return a soft error and do NOT increment `consecutiveSends`, so a model that
106
+ // ignores the denial and retries never trips `MAX_CHANNEL_SENDS_PER_TURN`.
107
+ // Both production livelocks had this shape: the model alternated a no-op
108
+ // `skip_response` with a denied `channel_reply` (~200-400x in one
109
+ // `session.prompt()`) — the interleaving defeated the byte-identical
110
+ // loop-guard's 5-in-a-row streak, and the denials bypassed the send cap. One
111
+ // turn was all `skip-locked`, the other all `duplicate` (byte-identical text).
112
+ // Past this ceiling we ABORT the run's AbortSignal (`agent.abort()`), which
113
+ // ends the turn on the next assistant stream. We can't just throw: the pi tool
114
+ // executor catches a tool's throw into an error result and the turn continues.
115
+ // Counted per send-target and only when NO concurrent reservation for that
116
+ // target is in flight, so a legitimate parallel send-burst (one winner + many
117
+ // same-tick duplicate/cap denials) is never mistaken for a loop. Reset at turn
118
+ // start alongside `turnSeq`.
119
+ export const MAX_POLICY_DENIED_CHANNEL_SENDS_PER_TURN = 3
120
+ // Per-request output-token cap for channel sessions, threaded into the agent's
121
+ // stream options to override pi-ai's silent `Math.min(model.maxTokens, 32000)`
122
+ // default (`buildBaseOptions` in @mariozechner/pi-ai). Without it, Fireworks'
123
+ // kimi-k2p6-turbo — which degenerates into single-token repetition on the
124
+ // post-tool follow-up turn — runs the full 32000 tokens (~116s of garbage that
125
+ // never produces a reply) before `stopReason: 'length'`. The terminal-reply
126
+ // hook below removes the turn that triggers this; the cap bounds any other path
127
+ // that still reaches a channel LLM call. 4096 fits a thinking block plus a
128
+ // nontrivial reply (healthy channel turns observed at ~317 output tokens
129
+ // including reasoning). Deliberately NOT lowered in `providers.ts`, where
130
+ // `maxTokens` is the model's true capability that compaction math reads.
131
+ export const CHANNEL_MAX_OUTPUT_TOKENS = 4096
101
132
  // Rolling window for outbound send-rate telemetry. 5s matches Discord's
102
133
  // rate-limit shape (5 msg / 5 s / channel) and comfortably covers Slack's
103
134
  // 1 msg/s sustained. The window is observational; exceeding the burst
@@ -347,6 +378,19 @@ type LiveSession = {
347
378
  // regardless of which order the model tried them in. Updated only at
348
379
  // turn start; reads against the live counter elsewhere are intentional.
349
380
  successfulSendsAtTurnStart: number
381
+ // Per-send-target count of tool-source sends with a reservation currently
382
+ // in flight (slot reserved, outbound callback not yet settled). Lets the
383
+ // policy-denial guard tell a legitimate parallel send-burst (denials that
384
+ // race a still-in-flight winner) from a sequential retry loop (denials with
385
+ // nothing in flight). Incremented at reservation, decremented in the
386
+ // callback-loop `finally` so an adapter throw can't strand a target.
387
+ inFlightToolSends: Map<string, number>
388
+ // Per-send-target count of policy-denied tool sends this turn that did NOT
389
+ // race an in-flight reservation. Drives the throw at
390
+ // `MAX_POLICY_DENIED_CHANNEL_SENDS_PER_TURN` that breaks the alternating-tool
391
+ // livelock the byte-identical loop-guard misses. Reset at turn start and
392
+ // cleared per-target on a successful delivery to that target.
393
+ policyDeniedToolSendsThisTurn: Map<string, number>
350
394
  // Stamped by `markTurnSkipped` (called from the `skip_response` tool)
351
395
  // with the current `turnSeq`. Read at the top of `validateChannelTurn`:
352
396
  // if it matches the just-completed turn, recovery is skipped entirely
@@ -1011,6 +1055,8 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1011
1055
  successfulChannelSends: 0,
1012
1056
  turnSeq: 0,
1013
1057
  successfulSendsAtTurnStart: 0,
1058
+ inFlightToolSends: new Map(),
1059
+ policyDeniedToolSendsThisTurn: new Map(),
1014
1060
  skippedTurn: null,
1015
1061
  pendingQuoteCandidate: null,
1016
1062
  recentEngagedPeerBotTurns: [],
@@ -1025,6 +1071,8 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1025
1071
  logger.error(`[channels] ${live.keyId}: LLM call failed: ${err.message}`)
1026
1072
  })
1027
1073
  live.unsubTypingActivity = subscribeTypingActivity(created.session, live)
1074
+ installChannelReplyTerminalHook(live)
1075
+ installChannelOutputCap(live)
1028
1076
  liveSessions.set(keyId, live)
1029
1077
 
1030
1078
  if (isColdStart) {
@@ -1182,6 +1230,54 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1182
1230
  })
1183
1231
  }
1184
1232
 
1233
+ // After a successful `channel_reply`, the model has delivered its user-facing
1234
+ // response and the turn is semantically done. pi-agent-core's loop, however,
1235
+ // unconditionally makes one more LLM call after any tool result (the
1236
+ // "post-tool follow-up") to let multi-step tool chains continue. On a turn
1237
+ // that ended with `channel_reply` there is nothing left to say, and Fireworks'
1238
+ // kimi-k2p6-turbo degenerates that empty follow-up into a 32000-token
1239
+ // repetition loop (see CHANNEL_MAX_OUTPUT_TOKENS). Aborting the run's signal
1240
+ // from `afterToolCall` — which runs during tool execution, before the loop
1241
+ // re-enters the LLM stream — makes the follow-up stream observe an already-
1242
+ // aborted signal and return `stopReason: 'aborted'` without generating. This
1243
+ // is the same `agent.abort()` lever the policy-denied-send cap uses; the
1244
+ // tool's own result is already persisted, so the reply still lands.
1245
+ //
1246
+ // Scope is deliberately narrow: only `channel_reply` (the current-chat user-
1247
+ // facing response), only on success, and only for channel sessions. Read-only
1248
+ // tools and `channel_send` must keep the follow-up so genuine multi-step turns
1249
+ // continue. A prior non-typeclaw `afterToolCall` (none today) would be
1250
+ // composed, not clobbered.
1251
+ const installChannelReplyTerminalHook = (live: LiveSession): void => {
1252
+ const { agent } = live.session
1253
+ const prior = agent.afterToolCall
1254
+ agent.afterToolCall = async (context, signal) => {
1255
+ const result = prior ? await prior(context, signal) : undefined
1256
+ const succeeded =
1257
+ context.toolCall.name === 'channel_reply' &&
1258
+ !context.isError &&
1259
+ (context.result.details as { ok?: unknown } | undefined)?.ok === true
1260
+ if (succeeded && agent.signal?.aborted !== true) {
1261
+ logger.info(`[channels] ${live.keyId} terminal_after_channel_reply`)
1262
+ agent.abort()
1263
+ }
1264
+ return result
1265
+ }
1266
+ }
1267
+
1268
+ // Override pi-ai's hidden `Math.min(model.maxTokens, 32000)` output cap for
1269
+ // channel sessions by threading an explicit `maxTokens` into every stream
1270
+ // call. See CHANNEL_MAX_OUTPUT_TOKENS for why. Composes the existing streamFn
1271
+ // (pi's default `streamSimple` unless a proxy was installed) and only fills
1272
+ // `maxTokens` when the caller left it unset, so an explicit per-call value
1273
+ // still wins.
1274
+ const installChannelOutputCap = (live: LiveSession): void => {
1275
+ const { agent } = live.session
1276
+ const inner = agent.streamFn
1277
+ agent.streamFn = (model, context, options) =>
1278
+ inner(model, context, { ...options, maxTokens: options?.maxTokens ?? CHANNEL_MAX_OUTPUT_TOKENS })
1279
+ }
1280
+
1185
1281
  const startTypingHeartbeat = (live: LiveSession): void => {
1186
1282
  if (live.typingTimedOut || live.typingStopPromise) return
1187
1283
  if (live.destroyed) return
@@ -1370,6 +1466,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1370
1466
  const successfulSendsBeforePrompt = live.successfulChannelSends
1371
1467
  live.turnSeq++
1372
1468
  live.successfulSendsAtTurnStart = successfulSendsBeforePrompt
1469
+ live.policyDeniedToolSendsThisTurn.clear()
1373
1470
  await fireSessionTurnStart(live, text)
1374
1471
  try {
1375
1472
  await live.session.prompt(text)
@@ -1426,13 +1523,21 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1426
1523
  }, wait)
1427
1524
  }
1428
1525
 
1429
- const publishInbound = (event: InboundMessage, decision: 'engage' | 'observe' | 'denied' | 'claim'): void => {
1526
+ const publishInbound = (
1527
+ event: InboundMessage,
1528
+ decision: 'engage' | 'observe' | 'denied' | 'claim',
1529
+ // Undefined before a session exists (denied/claim intercepts). Carried so a
1530
+ // session-scoped `typeclaw inspect` only sees its own session's inbounds —
1531
+ // the broadcast otherwise fans out to every inspect client.
1532
+ sessionId?: string,
1533
+ ): void => {
1430
1534
  if (stream === undefined) return
1431
1535
  try {
1432
1536
  stream.publish({
1433
1537
  target: { kind: 'broadcast' },
1434
1538
  payload: {
1435
1539
  kind: 'channel-inbound',
1540
+ ...(sessionId !== undefined ? { sessionId } : {}),
1436
1541
  adapter: event.adapter,
1437
1542
  workspace: event.workspace,
1438
1543
  chat: event.chat,
@@ -1569,7 +1674,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1569
1674
  })
1570
1675
 
1571
1676
  if (decision === 'observe') {
1572
- publishInbound(event, 'observe')
1677
+ publishInbound(event, 'observe', live.sessionId)
1573
1678
  // Log every observe so an unanswered mention is diagnosable from logs
1574
1679
  // alone instead of "routed but no prompting" silence. The bracketed
1575
1680
  // shape mirrors `prompting batch=` so log scraping can pair them.
@@ -1578,7 +1683,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1578
1683
  return
1579
1684
  }
1580
1685
 
1581
- publishInbound(event, 'engage')
1686
+ publishInbound(event, 'engage', live.sessionId)
1582
1687
 
1583
1688
  updateLoopGuard(live, event)
1584
1689
 
@@ -1875,7 +1980,12 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1875
1980
  if (live && source === 'tool' && live.pendingQuoteCandidate !== null) {
1876
1981
  const quoteCandidate = refreshQuoteCandidate(live.pendingQuoteCandidate, live.contextBuffer)
1877
1982
  const anchor = decideQuoteAnchor(quoteCandidate, now(), options.configForAdapter(msg.adapter))
1878
- if (anchor !== null) msg = { ...msg, text: prependQuoteAnchor(msg.text ?? '', anchor) }
1983
+ if (anchor !== null) {
1984
+ msg =
1985
+ resolveReplyRenderMode(msg) === 'native'
1986
+ ? { ...msg, replyTo: { externalMessageId: anchor.externalMessageId, source: anchor.source } }
1987
+ : { ...msg, text: prependQuoteAnchor(msg.text ?? '', anchor.source) }
1988
+ }
1879
1989
  live.pendingQuoteCandidate = null
1880
1990
  }
1881
1991
  const text = normalizeSendText(msg.text)
@@ -1892,19 +2002,52 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1892
2002
  let priorLastSentText: string | undefined
1893
2003
  let reserved = false
1894
2004
  if (live && source === 'tool') {
2005
+ // Every same-turn policy denial (skip-locked / turn-cap / duplicate)
2006
+ // returns a soft error and does NOT increment `consecutiveSends`, so a
2007
+ // model that ignores the denial and retries never trips the send cap. To
2008
+ // bound that loop we route all three through one tally that ABORTS the run
2009
+ // past the ceiling. The discriminator that keeps legitimate parallel
2010
+ // send-bursts soft: a denial only counts when NO reservation for the same
2011
+ // target is in flight. In a `Promise.all` burst the synchronous denials
2012
+ // all race the one in-flight winner, so they don't count; a sequential
2013
+ // retry loop has nothing in flight, so it does. See
2014
+ // `MAX_POLICY_DENIED_CHANNEL_SENDS_PER_TURN`.
2015
+ //
2016
+ // Why abort, not throw: pi-agent-core's tool executor catches a throw
2017
+ // from a tool's execute() and converts it into an `isError` tool result —
2018
+ // the turn would continue and the model could retry. The only thing that
2019
+ // actually ends an in-flight turn is aborting the run's AbortSignal:
2020
+ // `agent.abort()` flips it synchronously, then the NEXT assistant stream
2021
+ // (after this tool returns) sees the aborted signal and ends the turn with
2022
+ // stopReason 'aborted'. We must NOT call `session.abort()` here — it
2023
+ // `await`s `waitForIdle()`, which would deadlock waiting for the very run
2024
+ // this tool call belongs to. `agent.abort()` is the signal-only,
2025
+ // non-blocking variant. We still return the soft denial for this call.
2026
+ const denyPolicyToolSend = (error: string, code: SendErrorCode): SendResult => {
2027
+ if ((live.inFlightToolSends.get(sendKey) ?? 0) > 0) {
2028
+ return { ok: false, error, code }
2029
+ }
2030
+ const count = (live.policyDeniedToolSendsThisTurn.get(sendKey) ?? 0) + 1
2031
+ live.policyDeniedToolSendsThisTurn.set(sendKey, count)
2032
+ if (count >= MAX_POLICY_DENIED_CHANNEL_SENDS_PER_TURN) {
2033
+ logger.warn(`[channels] ${live.keyId}: aborting turn — ${count} policy-denied channel sends (last: ${code})`)
2034
+ if (live.session.agent.signal?.aborted !== true) live.session.agent.abort()
2035
+ }
2036
+ return { ok: false, error, code }
2037
+ }
1895
2038
  // Tool-source send after `skip_response` for the same turn is a contract
1896
2039
  // violation: the model already committed to silence. Reject before any
1897
2040
  // state mutation so the model gets a clear error and the channel stays
1898
2041
  // silent. System-source sends (recovery, role-claim) are not affected.
1899
2042
  if (live.skippedTurn !== null && live.skippedTurn.turnSeq === live.turnSeq) {
1900
- return { ok: false, error: SKIP_RESPONSE_LOCK_ERROR, code: 'skip-locked' }
2043
+ return denyPolicyToolSend(SKIP_RESPONSE_LOCK_ERROR, 'skip-locked')
1901
2044
  }
1902
2045
  const currentCount = live.consecutiveSends.get(sendKey) ?? 0
1903
2046
  if (currentCount >= MAX_CHANNEL_SENDS_PER_TURN) {
1904
- return { ok: false, error: TURN_CAP_ERROR, code: 'turn-cap' }
2047
+ return denyPolicyToolSend(TURN_CAP_ERROR, 'turn-cap')
1905
2048
  }
1906
2049
  if (text !== undefined && live.lastSentText.get(sendKey) === text) {
1907
- return { ok: false, error: DUPLICATE_SEND_ERROR, code: 'duplicate' }
2050
+ return denyPolicyToolSend(DUPLICATE_SEND_ERROR, 'duplicate')
1908
2051
  }
1909
2052
  // Reserve the slot before awaiting. If the callback rejects we roll
1910
2053
  // back below; if it succeeds we keep the increment. The slot reserve
@@ -1915,6 +2058,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1915
2058
  priorLastSentText = live.lastSentText.get(sendKey)
1916
2059
  live.consecutiveSends.set(sendKey, currentCount + 1)
1917
2060
  if (text !== undefined) live.lastSentText.set(sendKey, text)
2061
+ live.inFlightToolSends.set(sendKey, (live.inFlightToolSends.get(sendKey) ?? 0) + 1)
1918
2062
  reserved = true
1919
2063
  }
1920
2064
 
@@ -1924,13 +2068,24 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1924
2068
  const snapshot = Array.from(callbacks)
1925
2069
  let lastError: string | undefined
1926
2070
  let delivered = false
1927
- for (const cb of snapshot) {
1928
- const result = await cb(msg)
1929
- if (result.ok) {
1930
- delivered = true
1931
- break
2071
+ try {
2072
+ for (const cb of snapshot) {
2073
+ const result = await cb(msg)
2074
+ if (result.ok) {
2075
+ delivered = true
2076
+ break
2077
+ }
2078
+ lastError = result.error
2079
+ }
2080
+ } finally {
2081
+ // Clear the in-flight reservation even if a callback threw, so a flaky
2082
+ // adapter can never strand a target as permanently "in flight" and
2083
+ // disable the policy-denial guard for it.
2084
+ if (live && reserved) {
2085
+ const inFlight = (live.inFlightToolSends.get(sendKey) ?? 1) - 1
2086
+ if (inFlight <= 0) live.inFlightToolSends.delete(sendKey)
2087
+ else live.inFlightToolSends.set(sendKey, inFlight)
1932
2088
  }
1933
- lastError = result.error
1934
2089
  }
1935
2090
 
1936
2091
  if (!delivered) {
@@ -1950,6 +2105,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1950
2105
 
1951
2106
  if (live) {
1952
2107
  live.successfulChannelSends++
2108
+ live.policyDeniedToolSendsThisTurn.delete(sendKey)
1953
2109
  // Don't stop the heartbeat here: the agent may still be mid-turn and
1954
2110
  // about to send another reply. drain()'s finally block owns turn-end
1955
2111
  // stop. But Slack's adapter outbound callback explicitly clears
@@ -2480,12 +2636,7 @@ function formatAuthorLine(
2480
2636
  return `${stamp}${formatAuthorReference(adapter, authorId, authorName)} (${authorName})${tag}: ${text}`
2481
2637
  }
2482
2638
 
2483
- export type QuoteAnchorSource = {
2484
- adapter: AdapterId
2485
- authorId: string
2486
- authorName: string
2487
- text: string
2488
- }
2639
+ export type { QuoteAnchorSource } from './types'
2489
2640
 
2490
2641
  // Picks the right author syntax for the platform so prompts and rendered
2491
2642
  // quote anchors use the same form the user would type in that channel.
@@ -2557,6 +2708,7 @@ type QuoteAnchorBatchEntry = {
2557
2708
  authorName: string
2558
2709
  authorIsBot: boolean
2559
2710
  receivedAt: number
2711
+ externalMessageId: string
2560
2712
  }
2561
2713
 
2562
2714
  type QuoteAnchorObservedEntry = {
@@ -2566,10 +2718,18 @@ type QuoteAnchorObservedEntry = {
2566
2718
 
2567
2719
  export type QuoteAnchorCandidate = {
2568
2720
  source: QuoteAnchorSource
2721
+ // Native id of the primary inbound, so a native-reply adapter can point at
2722
+ // the exact message; the blockquote fallback ignores it.
2723
+ externalMessageId: string
2569
2724
  primaryReceivedAt: number
2570
2725
  hadInterveningObserved: boolean
2571
2726
  }
2572
2727
 
2728
+ export type QuoteAnchorTarget = {
2729
+ source: QuoteAnchorSource
2730
+ externalMessageId: string
2731
+ }
2732
+
2573
2733
  // Strips both current `[<Adapter> attachment #N: ...]` and legacy
2574
2734
  // `[<Adapter> message with ...]` placeholders that adapter
2575
2735
  // classifiers synthesize for non-text inbounds (KakaoTalk stickers,
@@ -2620,6 +2780,7 @@ export function captureQuoteCandidate(
2620
2780
  if (cleaned === '') return null
2621
2781
  return {
2622
2782
  source: { adapter, authorId: primary.authorId, authorName: primary.authorName, text: cleaned },
2783
+ externalMessageId: primary.externalMessageId,
2623
2784
  primaryReceivedAt: primary.receivedAt,
2624
2785
  hadInterveningObserved: hasInterveningObserved(primary.receivedAt, observed),
2625
2786
  }
@@ -2647,12 +2808,34 @@ export function decideQuoteAnchor(
2647
2808
  candidate: QuoteAnchorCandidate | null,
2648
2809
  _nowMs: number,
2649
2810
  adapterConfig: ChannelAdapterConfig | undefined,
2650
- ): QuoteAnchorSource | null {
2811
+ ): QuoteAnchorTarget | null {
2651
2812
  if (candidate === null) return null
2652
2813
  const config = adapterConfig?.quotedReply
2653
2814
  if (config !== undefined && config.enabled === false) return null
2654
2815
  if (!candidate.hadInterveningObserved) return null
2655
- return candidate.source
2816
+ return { source: candidate.source, externalMessageId: candidate.externalMessageId }
2817
+ }
2818
+
2819
+ export type ReplyRenderMode = 'native' | 'quote'
2820
+
2821
+ // Per-adapter, per-shape decision: can this exact outbound carry a native
2822
+ // platform reply, or must it degrade to the blockquote fallback? Conditional
2823
+ // because native support is not uniform within an adapter — Telegram's
2824
+ // `sendMessage` accepts `reply_to_message_id` but `sendDocument` does not, so
2825
+ // an attachment-only Telegram reply must quote; the same text-only restriction
2826
+ // holds for Discord (`message_reference` rides on the text send, file uploads
2827
+ // land bare) and KakaoTalk. Slack's primitive is `thread`, not a per-message
2828
+ // reply, so it stays quote; GitHub's PR-review reply already rides on `thread`.
2829
+ //
2830
+ // KakaoTalk is `native` here even though its reply payload can fail to resolve
2831
+ // at send time — the adapter degrades to the blockquote fallback itself using
2832
+ // `replyTo.source`, so the router still routes it down the native branch.
2833
+ const NATIVE_REPLY_TEXT_ADAPTERS = new Set<AdapterId>(['telegram-bot', 'discord-bot', 'kakaotalk'])
2834
+
2835
+ export function resolveReplyRenderMode(msg: OutboundMessage): ReplyRenderMode {
2836
+ const hasText = normalizeSendText(msg.text) !== undefined
2837
+ if (hasText && NATIVE_REPLY_TEXT_ADAPTERS.has(msg.adapter)) return 'native'
2838
+ return 'quote'
2656
2839
  }
2657
2840
 
2658
2841
  type Sliced = { kind: 'message'; message: ChannelHistoryMessage } | { kind: 'elision'; elidedCount: number }
@@ -126,6 +126,28 @@ export type OutboundMessage = {
126
126
  // `uploadFile` does not accept a content body or a thread id, see the
127
127
  // adapter for the workaround details.
128
128
  attachments?: OutboundAttachment[]
129
+ // Set by the router (native render mode + anchor fired) so an adapter can
130
+ // reply to the inbound it answers. Telegram/Discord consume `externalMessageId`;
131
+ // `quote`-mode adapters never see this (the router prepends the blockquote into
132
+ // `text` instead). `source` lets an adapter whose native primitive can fail at
133
+ // send time (KakaoTalk: payload built from a source message that may have
134
+ // scrolled out of history) degrade to the same blockquote fallback.
135
+ replyTo?: OutboundReplyTo
136
+ }
137
+
138
+ export type OutboundReplyTo = {
139
+ externalMessageId: string
140
+ source?: QuoteAnchorSource
141
+ }
142
+
143
+ // `adapter` selects the per-platform author-mention syntax in the blockquote
144
+ // fallback. Lives here (not router.ts) so adapters can reconstruct a native
145
+ // reply payload from the same shape the router renders quotes from.
146
+ export type QuoteAnchorSource = {
147
+ adapter: AdapterId
148
+ authorId: string
149
+ authorName: string
150
+ text: string
129
151
  }
130
152
 
131
153
  export type SendErrorCode =
@@ -49,31 +49,35 @@ export const inspectCommand = defineCommand({
49
49
  const escListener = isJson ? null : createEscListener()
50
50
  const liveHint = escListener === null ? undefined : escHintLine(color)
51
51
 
52
- const result = await runInspectLoop({
53
- agentDir: cwd,
54
- ...(sessionArg !== undefined ? { sessionIdOrPrefix: sessionArg } : {}),
55
- ...(filterArg !== undefined ? { filter: filterArg } : {}),
56
- ...(sinceArg !== undefined ? { since: sinceArg } : {}),
57
- json: isJson,
58
- color,
59
- selectSession: (sessions, selectOpts) => {
60
- escListener?.pause()
61
- return clackSelect(sessions, selectOpts?.initialSessionId).finally(() => {
62
- escListener?.resume()
63
- })
64
- },
65
- ...(liveSource !== undefined ? { liveSource } : {}),
66
- signal,
67
- newEscSignal: () => {
68
- if (escListener === null) return new AbortController().signal
69
- return escListener.armForStream()
70
- },
71
- ...(liveHint !== undefined ? { liveHint } : {}),
72
- stdout: (line) => process.stdout.write(`${line}\n`),
73
- stderr: (line) => process.stderr.write(`${line}\n`),
74
- })
75
-
76
- escListener?.stop()
52
+ // try/finally so a thrown loop never leaves the terminal stuck in raw mode.
53
+ let result: Awaited<ReturnType<typeof runInspectLoop>>
54
+ try {
55
+ result = await runInspectLoop({
56
+ agentDir: cwd,
57
+ ...(sessionArg !== undefined ? { sessionIdOrPrefix: sessionArg } : {}),
58
+ ...(filterArg !== undefined ? { filter: filterArg } : {}),
59
+ ...(sinceArg !== undefined ? { since: sinceArg } : {}),
60
+ json: isJson,
61
+ color,
62
+ selectSession: (sessions, selectOpts) => {
63
+ escListener?.pause()
64
+ return clackSelect(sessions, selectOpts?.initialSessionId).finally(() => {
65
+ escListener?.resume()
66
+ })
67
+ },
68
+ ...(liveSource !== undefined ? { liveSource } : {}),
69
+ signal,
70
+ newEscSignal: () => {
71
+ if (escListener === null) return new AbortController().signal
72
+ return escListener.armForStream()
73
+ },
74
+ ...(liveHint !== undefined ? { liveHint } : {}),
75
+ stdout: (line) => process.stdout.write(`${line}\n`),
76
+ stderr: (line) => process.stderr.write(`${line}\n`),
77
+ })
78
+ } finally {
79
+ escListener?.stop()
80
+ }
77
81
 
78
82
  if (!result.ok) {
79
83
  process.stderr.write(`${errorLine(result.reason)}\n`)
@@ -197,10 +197,11 @@ export const KNOWN_PROVIDERS = {
197
197
  // anthropic`) before relying on the env-var path. Same rule applies to any
198
198
  // future dual-auth provider — keep the surprise in mind when expanding.
199
199
  //
200
- // Model lineup is the current GA tier as of 2026-04-16: Opus 4.7 (top,
201
- // released Apr 16 2026), Sonnet 4.6 (mid, Feb 5 2026), Haiku 4.5 (fast,
202
- // Oct 1 2025). Anthropic's own model overview lists these three as the
203
- // current recommended set and flags earlier Opus/Sonnet variants with
200
+ // Model lineup is the current GA tier as of 2026-05-29: Opus 4.8 (top,
201
+ // released May 2026), Opus 4.7 (prior top, Apr 16 2026), Sonnet 4.6 (mid,
202
+ // Feb 5 2026), Haiku 4.5 (fast, Oct 1 2025). Anthropic's own model overview
203
+ // lists the latest Opus/Sonnet/Haiku as the current recommended set and
204
+ // flags earlier Opus/Sonnet variants with
204
205
  // "Consider migrating to current models." Opus 4 / Sonnet 4 are deprecated
205
206
  // (retirement: Jun 15 2026); the 4.5/4.6 alternates remain Active but are
206
207
  // not the recommended path.
@@ -276,6 +277,18 @@ export const KNOWN_PROVIDERS = {
276
277
  contextWindow: 1000000,
277
278
  maxTokens: 128000,
278
279
  },
280
+ 'claude-opus-4-8': {
281
+ id: 'claude-opus-4-8',
282
+ name: 'Claude Opus 4.8',
283
+ api: 'anthropic-messages',
284
+ provider: 'anthropic',
285
+ baseUrl: 'https://api.anthropic.com',
286
+ reasoning: true,
287
+ input: ['text', 'image'],
288
+ cost: { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 },
289
+ contextWindow: 1000000,
290
+ maxTokens: 128000,
291
+ },
279
292
  },
280
293
  },
281
294
  fireworks: {
@@ -464,12 +464,29 @@ export async function planStart({
464
464
  // misattribute to bot detection. 2g matches the Playwright/Puppeteer
465
465
  // canonical recommendation and is a memory cap, not an allocation (only
466
466
  // used pages count against the host).
467
+ // `seccomp=unconfined` lets `bwrap(1)` (installed in baseline; see
468
+ // BASELINE_APT_PACKAGES in src/init/dockerfile.ts) create user/pid/mount
469
+ // namespaces from inside the container. Docker's default seccomp profile
470
+ // rejects `unshare(CLONE_NEWUSER)` and `clone(CLONE_NEWUSER)` for
471
+ // non-privileged containers, which is the right default for multi-tenant
472
+ // hosts (Kubernetes nodes, CI runners) but wrong for typeclaw: the outer
473
+ // container is a single-tenant trust boundary — the user trusts everything
474
+ // inside it equally, the .env and agent folder are already mounted in —
475
+ // so the multi-tenant protections seccomp adds are not load-bearing for
476
+ // typeclaw's threat model. The per-tool sandbox bwrap builds for subagents
477
+ // IS the real boundary against prompt-injected commands; that boundary is
478
+ // what `--security-opt seccomp=unconfined` exists to enable. See
479
+ // `docs/internals/sandbox.mdx` for the full rationale including why
480
+ // `--cap-add=SYS_ADMIN` was rejected as an alternative (narrower in
481
+ // syscalls but strictly worse in capability semantics).
467
482
  const runArgs = [
468
483
  'run',
469
484
  '-d',
470
485
  '--name',
471
486
  containerName,
472
487
  '--shm-size=2g',
488
+ '--security-opt',
489
+ 'seccomp=unconfined',
473
490
  '-p',
474
491
  `${publishHost}:${hostPort}:${CONTAINER_PORT}`,
475
492
  ]
@@ -38,7 +38,27 @@ export type BuildDockerfileOptions = {
38
38
  // self-heals: it spawns Xvfb (and exports DISPLAY) if the binary is on
39
39
  // PATH, and execs the agent directly otherwise. See APT_FEATURES.xvfb
40
40
  // below and `buildEntrypointShim`.
41
- const BASELINE_APT_PACKAGES = ['git', 'ca-certificates', 'curl', 'gnupg', 'iptables', 'util-linux'] as const
41
+ // `bubblewrap` ships the `bwrap(1)` setuid-less namespace sandboxer. It is
42
+ // included in baseline (not behind a toggle) because per-tool sandboxing of
43
+ // agent bash calls is a runtime concern resolved by the agent, not by the
44
+ // agent author. See `src/sandbox/` for the bwrap command builder, and
45
+ // `docs/internals/sandbox.mdx` for why bwrap is the right
46
+ // shape for per-call isolation inside an already-containerized agent. The
47
+ // outer container's `--security-opt seccomp=unconfined` (added in the same
48
+ // commit as this line; see `src/container/start.ts:planStart`) is what lets
49
+ // bwrap create user/pid/mount namespaces from inside Docker. Without that
50
+ // flag the seccomp default profile blocks `unshare(CLONE_NEWUSER)` and bwrap
51
+ // fails at startup. The two changes are load-bearing together — do not drop
52
+ // one without the other.
53
+ const BASELINE_APT_PACKAGES = [
54
+ 'git',
55
+ 'ca-certificates',
56
+ 'curl',
57
+ 'gnupg',
58
+ 'iptables',
59
+ 'util-linux',
60
+ 'bubblewrap',
61
+ ] as const
42
62
 
43
63
  // curl-impersonate is the only currently-working way to query DuckDuckGo from
44
64
  // a non-browser client on residential IPs in 2026. DDG fingerprints incoming
@@ -63,9 +63,17 @@ export async function* streamLive(opts: StreamLiveOptions): AsyncGenerator<Inspe
63
63
  }
64
64
  })
65
65
 
66
- const onOpen = new Promise<void>((resolve, reject) => {
67
- ws.addEventListener('open', () => resolve(), { once: true })
66
+ // Settle on open OR on any terminal condition (error/close/abort). Resolving
67
+ // false here is what unblocks the connect gate when esc aborts mid-connect —
68
+ // otherwise `await onOpen` would hang forever and freeze the inspect CLI.
69
+ const onOpen = new Promise<boolean>((resolve, reject) => {
70
+ ws.addEventListener('open', () => resolve(true), { once: true })
68
71
  ws.addEventListener('error', () => reject(new Error('websocket connection failed')), { once: true })
72
+ ws.addEventListener('close', () => resolve(false), { once: true })
73
+ if (opts.signal !== undefined) {
74
+ if (opts.signal.aborted) resolve(false)
75
+ else opts.signal.addEventListener('abort', () => resolve(false), { once: true })
76
+ }
69
77
  })
70
78
  ws.addEventListener('close', () => {
71
79
  closed = true
@@ -96,12 +104,14 @@ export async function* streamLive(opts: StreamLiveOptions): AsyncGenerator<Inspe
96
104
  }
97
105
  }
98
106
 
107
+ let opened: boolean
99
108
  try {
100
- await onOpen
109
+ opened = await onOpen
101
110
  } catch (err) {
102
111
  closed = true
103
112
  throw err
104
113
  }
114
+ if (!opened || closed || opts.signal?.aborted === true) return
105
115
 
106
116
  const subscribe: InspectClientMessage = {
107
117
  type: 'subscribe',
@@ -0,0 +1,35 @@
1
+ import { SandboxUnavailableError } from './errors'
2
+
3
+ // Cached because the binary cannot appear or disappear during a single
4
+ // process lifetime, and a probe per bash call is wasted work. Keyed by the
5
+ // resolved bwrap path so a test (or a consumer pinning a non-default path)
6
+ // re-probes instead of reading another path's cached result.
7
+ const availabilityCache = new Map<string, boolean>()
8
+
9
+ export async function ensureBwrapAvailable(options?: { bwrapPath?: string }): Promise<void> {
10
+ const bwrap = options?.bwrapPath ?? 'bwrap'
11
+ const cached = availabilityCache.get(bwrap)
12
+ if (cached === true) return
13
+ if (cached === false) throw new SandboxUnavailableError()
14
+
15
+ const available = await probe(bwrap)
16
+ availabilityCache.set(bwrap, available)
17
+ if (!available) throw new SandboxUnavailableError()
18
+ }
19
+
20
+ async function probe(bwrap: string): Promise<boolean> {
21
+ // Bun.spawn throws synchronously with ENOENT when the binary is not on
22
+ // PATH, rather than resolving with a non-zero exit code — so the
23
+ // "not installed" case lands in the catch, not in proc.exitCode.
24
+ try {
25
+ const proc = Bun.spawn([bwrap, '--version'], { stdout: 'ignore', stderr: 'ignore' })
26
+ await proc.exited
27
+ return proc.exitCode === 0
28
+ } catch {
29
+ return false
30
+ }
31
+ }
32
+
33
+ export function _resetBwrapAvailabilityCacheForTests(): void {
34
+ availabilityCache.clear()
35
+ }