switchroom 0.14.18 → 0.14.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24260,7 +24260,7 @@ var init_bridge = __esm(async () => {
24260
24260
  instructions: [
24261
24261
  "The sender reads Telegram, not this session. Anything you want them to see must go through the reply tool \u2014 your transcript output never reaches their chat.",
24262
24262
  "",
24263
- 'Messages from Telegram arrive as <channel source="telegram" chat_id="..." message_id="..." user="..." ts="...">. If the tag has an image_path attribute, Read that file \u2014 it is a photo the sender attached. If the tag has attachment_file_id, call download_attachment with that file_id to fetch the file, then Read the returned path. Reply with the reply tool \u2014 pass chat_id back. The reply and stream_reply tools quote-reply to the latest inbound user message by default, so you do NOT need to pass reply_to for normal responses. Pass reply_to (a message_id) only when quoting a specific earlier message, or pass quote:false to send a bare (non-quoted) message.',
24263
+ 'Messages from Telegram arrive as <channel source="telegram" chat_id="..." message_id="..." user="..." ts="...">. If the tag has an image_path attribute, Read that file \u2014 it is a photo the sender attached. If the tag has attachment_file_id, call download_attachment with that file_id to fetch the file, then Read the returned path. A single message may carry SEVERAL attachments (a forwarded album or a text+multi-image burst): when attachment_count is set (>1), also handle the numbered siblings \u2014 image_path_2, image_path_3, \u2026 (Read each) and attachment_file_id_2, attachment_file_id_3, \u2026 (download_attachment each). Process every one, not just the first. Reply with the reply tool \u2014 pass chat_id back. The reply and stream_reply tools quote-reply to the latest inbound user message by default, so you do NOT need to pass reply_to for normal responses. Pass reply_to (a message_id) only when quoting a specific earlier message, or pass quote:false to send a bare (non-quoted) message.',
24264
24264
  "",
24265
24265
  `reply accepts file paths (files: ["/abs/path.png"]) for attachments. Use react to add emoji reactions, edit_message for interim progress updates, and delete_message when you need to truly remove a message (prefer edit_message if you just want to change text \u2014 delete is for retraction). Edits don't trigger push notifications \u2014 when a long task completes, send a new reply so the user's device pings. Use send_typing to show a typing indicator during long operations. Use pin_message to pin important outputs. Use forward_message to quote/resurface earlier messages.`,
24266
24266
  "",
@@ -0,0 +1,70 @@
1
+ /**
2
+ * Pure helpers for A2 multi-attachment coalescing — kept out of `gateway.ts`
3
+ * so the cap/ordering and numbered-meta logic can be unit-tested without the
4
+ * gateway's `loadAccess()` / IPC machinery.
5
+ *
6
+ * Inbound model: each Telegram message carries at most one attachment, so the
7
+ * coalescer accumulates one attachment per buffered entry. On flush the
8
+ * gateway folds up to `coalesce.max_attachments` of them into a single turn —
9
+ * the first is the primary (unsuffixed `image_path` / `attachment_*` meta),
10
+ * the rest are numbered siblings (`image_path_2`, `attachment_file_id_2`, …).
11
+ */
12
+
13
+ export interface CoalesceAttachmentMeta {
14
+ kind: string
15
+ file_id: string
16
+ size?: number
17
+ mime?: string
18
+ name?: string
19
+ }
20
+
21
+ /** A resolved extra attachment: photos are pre-downloaded to `imagePath`;
22
+ * documents/voice carry only `attachment` metadata (agent fetches the file
23
+ * via `download_attachment`). */
24
+ export interface ResolvedExtraAttachment {
25
+ imagePath?: string
26
+ attachment?: CoalesceAttachmentMeta
27
+ }
28
+
29
+ /**
30
+ * Split the attachment-bearing entries of a coalesce window into the primary
31
+ * entry plus the capped list of extras. Preserves arrival order so a
32
+ * `[photo][text][photo]` burst keeps both photos in the order sent. Entries
33
+ * past `maxAttachments` are dropped here (the gateway bypasses them to their
34
+ * own turn upstream, so nothing is actually lost).
35
+ *
36
+ * `maxAttachments` is floored at 1 — a cap of 0 or negative would strip the
37
+ * primary, silently dropping the only attachment.
38
+ */
39
+ export function splitCoalescedAttachments<T>(
40
+ entries: T[],
41
+ hasAttachment: (e: T) => boolean,
42
+ maxAttachments: number,
43
+ ): { primary: T | undefined; extras: T[] } {
44
+ const withAttachment = entries.filter(hasAttachment)
45
+ const capped = withAttachment.slice(0, Math.max(1, maxAttachments))
46
+ const [primary, ...extras] = capped
47
+ return { primary, extras: extras }
48
+ }
49
+
50
+ /**
51
+ * Build the numbered meta fields for the resolved extra attachments. The
52
+ * primary occupies the unsuffixed keys, so extras start at `_2`.
53
+ */
54
+ export function buildExtraAttachmentMeta(
55
+ resolved: ResolvedExtraAttachment[],
56
+ ): Record<string, string> {
57
+ const out: Record<string, string> = {}
58
+ resolved.forEach((ex, i) => {
59
+ const n = i + 2
60
+ if (ex.imagePath) out[`image_path_${n}`] = ex.imagePath
61
+ if (ex.attachment) {
62
+ out[`attachment_kind_${n}`] = ex.attachment.kind
63
+ out[`attachment_file_id_${n}`] = ex.attachment.file_id
64
+ if (ex.attachment.size != null) out[`attachment_size_${n}`] = String(ex.attachment.size)
65
+ if (ex.attachment.mime) out[`attachment_mime_${n}`] = ex.attachment.mime
66
+ if (ex.attachment.name) out[`attachment_name_${n}`] = ex.attachment.name
67
+ }
68
+ })
69
+ return out
70
+ }
@@ -35,6 +35,11 @@ import {
35
35
  type AskUserOutcome,
36
36
  } from '../ask-user.js'
37
37
  import { parseInterruptMarker } from '../interrupt-marker.js'
38
+ import {
39
+ ToolFlightTracker,
40
+ decideInterruptTiming,
41
+ resolveInterruptMaxWaitMs,
42
+ } from './interrupt-defer.js'
38
43
  import {
39
44
  resolveStickerSendArgs,
40
45
  resolveGifSendArgs,
@@ -51,6 +56,7 @@ import {
51
56
  } from '../telegraph.js'
52
57
  import { OutboundDedupCache } from '../recent-outbound-dedup.js'
53
58
  import { createInboundCoalescer, inboundCoalesceKey } from './inbound-coalesce.js'
59
+ import { splitCoalescedAttachments, buildExtraAttachmentMeta } from './coalesce-attachments.js'
54
60
  import { StatusReactionController } from '../status-reactions.js'
55
61
  import { DeferredDoneReactions } from '../reaction-defer.js'
56
62
  import { createWorkerActivityFeed } from '../worker-activity-feed.js'
@@ -770,6 +776,19 @@ type Access = {
770
776
  parseMode?: 'html' | 'markdownv2' | 'text'
771
777
  disableLinkPreview?: boolean
772
778
  coalescingGapMs?: number
779
+ /** A2: max media attachments folded into one coalesced turn. Default 1
780
+ * (single-attachment behaviour). Projected from
781
+ * channels.telegram.coalesce.max_attachments by scaffold. */
782
+ coalesceMaxAttachments?: number
783
+ /** Problem B: when true, a `!` interrupt that lands mid-tool-call is
784
+ * deferred until the in-flight tool finishes (bounded by
785
+ * interruptMaxWaitMs) before SIGINT + resume. Default false (fire
786
+ * synchronously). Projected from channels.telegram.interrupt.safe_boundary. */
787
+ interruptSafeBoundary?: boolean
788
+ /** Upper bound (ms) to wait for a safe boundary before firing a deferred
789
+ * interrupt anyway. Default 8000. Projected from
790
+ * channels.telegram.interrupt.max_wait_ms. */
791
+ interruptMaxWaitMs?: number
773
792
  statusReactions?: boolean
774
793
  historyEnabled?: boolean
775
794
  historyRetentionDays?: number
@@ -868,6 +887,9 @@ function readAccessFile(): Access {
868
887
  parseMode: parsed.parseMode,
869
888
  disableLinkPreview: parsed.disableLinkPreview,
870
889
  coalescingGapMs: parsed.coalescingGapMs,
890
+ coalesceMaxAttachments: parsed.coalesceMaxAttachments,
891
+ interruptSafeBoundary: parsed.interruptSafeBoundary,
892
+ interruptMaxWaitMs: parsed.interruptMaxWaitMs,
871
893
  statusReactions: parsed.statusReactions,
872
894
  historyEnabled: parsed.historyEnabled,
873
895
  historyRetentionDays: parsed.historyRetentionDays,
@@ -1380,6 +1402,78 @@ type CurrentTurn = {
1380
1402
 
1381
1403
  let currentTurn: CurrentTurn | null = null
1382
1404
 
1405
+ // Problem B — deferred safe-boundary interrupt.
1406
+ //
1407
+ // `toolFlightTracker` mirrors the session-event stream to know whether a
1408
+ // top-level tool call is open right now (an unsafe point to SIGINT). When the
1409
+ // `interrupt.safe_boundary` flag is on and a `!` lands mid-tool-call, we don't
1410
+ // fire the SIGINT — we stash the fully-built replacement inbound here and fire
1411
+ // it (SIGINT + deliver) at the next clean boundary (tool_result drains the
1412
+ // last open tool, or turn_end), or when the max-wait timer expires. Rapid
1413
+ // repeated `!` while one is pending coalesce: the latest body replaces the
1414
+ // stashed inbound, the original deadline is preserved (bounded wait).
1415
+ const toolFlightTracker = new ToolFlightTracker()
1416
+
1417
+ interface PendingDeferredInterrupt {
1418
+ agentName: string
1419
+ inboundMsg: InboundMessage
1420
+ chatId: string
1421
+ msgId: number | null
1422
+ threadId: number | undefined
1423
+ registeredAt: number
1424
+ deadlineTimer: ReturnType<typeof setTimeout>
1425
+ }
1426
+ let pendingDeferredInterrupt: PendingDeferredInterrupt | null = null
1427
+
1428
+ /**
1429
+ * Fire a stashed deferred interrupt: SIGINT the (now safely-bounded) turn via
1430
+ * tmux, then deliver the replacement body as a fresh inbound — the same two
1431
+ * primitives the synchronous `!` path uses, just gated on a clean boundary.
1432
+ * Idempotent: nulls the slot and clears the timer before doing any work so a
1433
+ * boundary event and the timeout can't double-fire.
1434
+ */
1435
+ async function fireDeferredInterrupt(reason: 'boundary' | 'timeout'): Promise<void> {
1436
+ const pending = pendingDeferredInterrupt
1437
+ if (pending == null) return
1438
+ pendingDeferredInterrupt = null
1439
+ clearTimeout(pending.deadlineTimer)
1440
+
1441
+ const waitedMs = Date.now() - pending.registeredAt
1442
+ process.stderr.write(
1443
+ `telegram gateway: deferred-interrupt firing reason=${reason} agent=${pending.agentName} ` +
1444
+ `chat=${pending.chatId} waited_ms=${waitedMs} in_flight=${toolFlightTracker.inFlightCount()}\n`,
1445
+ )
1446
+
1447
+ try {
1448
+ const { sendAgentInterrupt } = await import('../../src/agents/tmux.js')
1449
+ const r = sendAgentInterrupt({ agentName: pending.agentName })
1450
+ if ('ok' in r) {
1451
+ process.stderr.write(
1452
+ `telegram gateway: deferred-interrupt SIGINT delivered via tmux send-keys agent=${pending.agentName}\n`,
1453
+ )
1454
+ } else {
1455
+ process.stderr.write(
1456
+ `telegram gateway: deferred-interrupt SIGINT via tmux failed agent=${pending.agentName}: ${r.error}\n`,
1457
+ )
1458
+ }
1459
+ } catch (err) {
1460
+ process.stderr.write(`telegram gateway: deferred-interrupt SIGINT failed: ${(err as Error).message}\n`)
1461
+ }
1462
+
1463
+ // Deliver the replacement body as a fresh turn to the freshly-killed
1464
+ // bridge — same sendToAgent + buffer-on-miss primitive the synchronous
1465
+ // interrupt carve-out uses at the handleInbound delivery site.
1466
+ const delivered = ipcServer.sendToAgent(pending.agentName, pending.inboundMsg)
1467
+ if (delivered) {
1468
+ markClaudeBusyForInbound(pending.inboundMsg)
1469
+ } else {
1470
+ pendingInboundBuffer.push(pending.agentName, pending.inboundMsg)
1471
+ process.stderr.write(
1472
+ `telegram gateway: deferred-interrupt body buffered (bridge miss) agent=${pending.agentName} chat=${pending.chatId}\n`,
1473
+ )
1474
+ }
1475
+ }
1476
+
1383
1477
  // #549 fix — preamble suppression for the answer-stream path.
1384
1478
  //
1385
1479
  // Background: assistant text emitted before a tool_use is "preamble"
@@ -1954,6 +2048,24 @@ function paintStatusReactionError(chatId: string, threadId: number | undefined):
1954
2048
  ctrl.setError()
1955
2049
  }
1956
2050
 
2051
+ /**
2052
+ * Flip the current turn's status reaction off 🙏 (awaiting-approval) back
2053
+ * to a working glyph once a permission verdict has been dispatched. The
2054
+ * turn was suspended *inside* the bridge's permission call, so `currentTurn`
2055
+ * still points at it; the verdict un-parks claude and it resumes the SAME
2056
+ * turn. `setThinking()` re-arms the stall watchdog that `setAwaiting()`
2057
+ * suspended, so a genuine post-approval hang still promotes to 🥱/😨, and
2058
+ * it is replaced by the real tool glyph (✍/⚡) as soon as the resumed turn
2059
+ * fires its next PreToolUse. Non-terminal — 👍 still waits for `turn_end`.
2060
+ */
2061
+ function resumeReactionAfterVerdict(): void {
2062
+ const turn = currentTurn
2063
+ if (turn == null) return
2064
+ activeStatusReactions
2065
+ .get(statusKey(turn.sessionChatId, turn.sessionThreadId))
2066
+ ?.setThinking()
2067
+ }
2068
+
1957
2069
  function resolveThreadId(chat_id: string, explicit?: string | number | null): number | undefined {
1958
2070
  if (explicit != null) return Number(explicit)
1959
2071
  return chatThreadMap.get(chat_id)
@@ -2876,6 +2988,9 @@ const pendingStateReaper = setInterval(() => {
2876
2988
  // dispatchPermissionVerdict so it's buffered+redelivered too if
2877
2989
  // the bridge is also offline at sweep time.
2878
2990
  dispatchPermissionVerdict({ type: 'permission', requestId: k, behavior: 'deny' })
2991
+ // The auto-deny un-parks the suspended turn — flip 🙏 → working so
2992
+ // it doesn't sit on the awaiting glyph (or stall) after the timeout.
2993
+ resumeReactionAfterVerdict()
2879
2994
  process.stderr.write(
2880
2995
  `telegram gateway: permission TTL expired — auto-deny request=${k} ` +
2881
2996
  `tool=${v.tool_name} (no operator response in ` +
@@ -2993,28 +3108,43 @@ type AttachmentMeta = {
2993
3108
  name?: string
2994
3109
  }
2995
3110
 
3111
+ // One attachment slot carried by a coalesced message — primary or extra.
3112
+ type CoalesceAttachment = {
3113
+ downloadImage?: () => Promise<string | undefined>
3114
+ attachment?: AttachmentMeta
3115
+ }
3116
+
2996
3117
  // CoalescePayload is what the InboundCoalescer carries per buffered message.
2997
3118
  // `ctx` must be the *latest* message's context (latest message_id, etc.) so
2998
3119
  // the merge function picks the last entry's ctx.
2999
3120
  //
3000
- // A single attachment-bearing message may ride along in a coalesce window
3001
- // (so a [text][photo] forward becomes one turn). The handleInboundCoalesced
3002
- // guards ensure AT MOST ONE attachment per window albums (media_group_id)
3003
- // and a second attachment both bypass to their own turn — so the single
3004
- // `downloadImage`/`attachment` slot is never silently overwritten. Folding a
3005
- // whole album into one multi-attachment turn is the A2 follow-on.
3121
+ // Each inbound Telegram message carries at most one attachment, so an enqueued
3122
+ // payload sets at most `downloadImage`/`attachment`. The merge collects every
3123
+ // attachment-bearing entry in the window (up to coalesce.max_attachments): the
3124
+ // first becomes the primary `downloadImage`/`attachment`, the rest ride along
3125
+ // in `extraAttachments` (A2). When the cap is 1 (default), the
3126
+ // handleInboundCoalesced guards still bypass a second attachment / album part
3127
+ // to its own turn, so the single-attachment behaviour is byte-for-byte
3128
+ // preserved.
3006
3129
  type CoalescePayload = {
3007
3130
  text: string
3008
3131
  ctx: Context
3009
3132
  downloadImage?: () => Promise<string | undefined>
3010
3133
  attachment?: AttachmentMeta
3134
+ // Set only by `merge`: the 2nd..Nth attachments folded into this turn.
3135
+ extraAttachments?: CoalesceAttachment[]
3011
3136
  }
3012
3137
 
3013
- // Coalesce keys whose open window already holds an attachment-bearing entry.
3014
- // A second attachment for the same key bypasses coalescing (see
3015
- // handleInboundCoalesced) so the single-attachment merge can't drop a photo.
3016
- // Cleared on flush (below) and on the synchronous bypass path.
3017
- const bufferedAttachmentKeys = new Set<string>()
3138
+ // Count of attachment-bearing entries currently buffered per coalesce key.
3139
+ // A new attachment for a key whose count has reached the per-agent cap
3140
+ // (coalesce.max_attachments, default 1) bypasses coalescing (see
3141
+ // handleInboundCoalesced) so no media is dropped past the cap. Cleared on
3142
+ // flush (below) and on the synchronous bypass path.
3143
+ const bufferedAttachmentKeys = new Map<string, number>()
3144
+
3145
+ function coalesceMaxAttachments(): number {
3146
+ return Math.max(1, loadAccess().coalesceMaxAttachments ?? 1)
3147
+ }
3018
3148
 
3019
3149
  const inboundCoalescer = createInboundCoalescer<CoalescePayload>({
3020
3150
  // Read per-call from the access file so an operator-tuned
@@ -3026,21 +3156,36 @@ const inboundCoalescer = createInboundCoalescer<CoalescePayload>({
3026
3156
  gapMs: () => loadAccess().coalescingGapMs ?? 500,
3027
3157
  merge: (entries) => {
3028
3158
  const last = entries[entries.length - 1]
3029
- // At most one entry carries an attachment (guarded upstream), so pick
3030
- // whichever entry has it rather than blindly taking `last` — a
3031
- // [photo][text] burst keeps its image even though the last entry is
3032
- // text-only.
3033
- const withAttachment = entries.find((e) => e.downloadImage != null || e.attachment != null)
3159
+ // Collect every attachment-bearing entry in arrival order. The first is
3160
+ // the primary (unsuffixed image_path/attachment_* meta); the remainder,
3161
+ // capped at max_attachments, become numbered extras. A [photo][text]
3162
+ // burst keeps its image even though the last entry is text-only.
3163
+ const { primary, extras } = splitCoalescedAttachments(
3164
+ entries,
3165
+ (e) => e.downloadImage != null || e.attachment != null,
3166
+ coalesceMaxAttachments(),
3167
+ )
3034
3168
  return {
3035
- text: entries.map((e) => e.text).join('\n'),
3169
+ // Drop empty texts (e.g. caption-less album parts) so the join doesn't
3170
+ // emit blank lines between attachments.
3171
+ text: entries.map((e) => e.text).filter((t) => t.length > 0).join('\n'),
3036
3172
  ctx: last.ctx,
3037
- downloadImage: withAttachment?.downloadImage,
3038
- attachment: withAttachment?.attachment,
3173
+ downloadImage: primary?.downloadImage,
3174
+ attachment: primary?.attachment,
3175
+ extraAttachments: extras.length > 0
3176
+ ? extras.map((e) => ({ downloadImage: e.downloadImage, attachment: e.attachment }))
3177
+ : undefined,
3039
3178
  }
3040
3179
  },
3041
3180
  onFlush: (key, merged) => {
3042
3181
  bufferedAttachmentKeys.delete(key)
3043
- void handleInbound(merged.ctx, merged.text, merged.downloadImage, merged.attachment)
3182
+ void handleInbound(
3183
+ merged.ctx,
3184
+ merged.text,
3185
+ merged.downloadImage,
3186
+ merged.attachment,
3187
+ merged.extraAttachments,
3188
+ )
3044
3189
  },
3045
3190
  })
3046
3191
 
@@ -4107,6 +4252,14 @@ const ipcServer: IpcServer = createIpcServer({
4107
4252
  const threadHint = msg.threadId != null ? String(msg.threadId) : undefined
4108
4253
  progressDriver?.ingest(ev, chatHint, threadHint)
4109
4254
  handleSessionEvent(ev)
4255
+ // Problem B: keep the deferred-interrupt boundary tracker in lockstep with
4256
+ // the session stream (tool_use opens, tool_result/turn_end close). If a `!`
4257
+ // interrupt is parked waiting for a clean boundary and this event drains
4258
+ // the last in-flight tool, fire it now rather than waiting out the timer.
4259
+ toolFlightTracker.onEvent(ev)
4260
+ if (pendingDeferredInterrupt != null && !toolFlightTracker.isMidToolCall()) {
4261
+ void fireDeferredInterrupt('boundary')
4262
+ }
4110
4263
  // #1122 silence-poke: surface activity signals from the session
4111
4264
  // stream so the 300s framework-fallback message wording is honest
4112
4265
  // (thinking vs working, plus the longest-running in-flight tool).
@@ -4227,6 +4380,16 @@ const ipcServer: IpcServer = createIpcServer({
4227
4380
  process.stderr.write(`telegram gateway: permission_request send to ${chat_id} failed: ${e}\n`)
4228
4381
  })
4229
4382
  }
4383
+ // Park the turn's status reaction on 🙏 (awaiting your tap) and
4384
+ // suspend the stall watchdog — a turn blocked on the operator is not
4385
+ // stalled, so it must not degrade to 🥱/😨 while the card sits
4386
+ // unanswered. The verdict path (`resumeReactionAfterVerdict`) flips it
4387
+ // back to a working state the instant you tap.
4388
+ if (activeTurn != null) {
4389
+ activeStatusReactions
4390
+ .get(statusKey(activeTurn.sessionChatId, activeTurn.sessionThreadId))
4391
+ ?.setAwaiting()
4392
+ }
4230
4393
  },
4231
4394
 
4232
4395
  onHeartbeat(_client: IpcClient, _msg: HeartbeatMessage) {
@@ -8561,29 +8724,31 @@ async function handleInboundCoalesced(
8561
8724
  }
8562
8725
 
8563
8726
  const hasAttachment = downloadImage != null || attachment != null
8564
-
8565
- // Albums (media_group_id) are NOT coalesced in A1 — each part keeps its
8566
- // own turn exactly as before. The single-attachment merge can carry only
8567
- // one image, so folding a 3-photo album into one turn requires the
8568
- // multi-attachment inbound payload (the A2 follow-on). Bypass to preserve
8569
- // current per-part behavior and avoid dropping sibling photos.
8570
- if (hasAttachment && ctx.message?.media_group_id != null) {
8727
+ const maxAttachments = coalesceMaxAttachments()
8728
+
8729
+ // Albums (media_group_id): coalesce only when the cap allows >1 attachment
8730
+ // (A2). At the default cap of 1 each album part keeps its own turn exactly
8731
+ // as before the single-attachment merge can't carry sibling photos, so
8732
+ // bypassing avoids dropping them. With a raised cap the parts share the
8733
+ // coalesce key and fold into one multi-attachment turn (the cap-overflow
8734
+ // bypass below catches parts past the cap).
8735
+ if (hasAttachment && ctx.message?.media_group_id != null && maxAttachments <= 1) {
8571
8736
  return handleInbound(ctx, text, downloadImage, attachment)
8572
8737
  }
8573
8738
 
8574
8739
  const from = ctx.from
8575
8740
  if (!from) return
8576
8741
 
8577
- // A second attachment landing in an already-open window would clobber the
8578
- // first under the single-attachment merge. Bypass it to its own turn so no
8579
- // media is silently dropped; A2's multi-attachment payload lifts this.
8742
+ // An attachment past the per-agent cap would be dropped by the capped merge.
8743
+ // Bypass it to its own turn so no media is silently lost. At the default
8744
+ // cap of 1 this fires on the SECOND attachment, preserving A1 behaviour.
8580
8745
  if (hasAttachment) {
8581
8746
  const probeKey = inboundCoalesceKey(
8582
8747
  String(ctx.chat!.id),
8583
8748
  ctx.message?.message_thread_id,
8584
8749
  String(from.id),
8585
8750
  )
8586
- if (bufferedAttachmentKeys.has(probeKey)) {
8751
+ if ((bufferedAttachmentKeys.get(probeKey) ?? 0) >= maxAttachments) {
8587
8752
  return handleInbound(ctx, text, downloadImage, attachment)
8588
8753
  }
8589
8754
  }
@@ -8620,9 +8785,10 @@ async function handleInboundCoalesced(
8620
8785
  // Coalescing disabled (window <= 0): flush immediately, preserving any
8621
8786
  // media this message carried.
8622
8787
  if (result.bypass) return handleInbound(ctx, text, downloadImage, attachment)
8623
- // Mark the open window as holding an attachment so a second attachment for
8624
- // this key bypasses rather than clobbers (cleared in onFlush).
8625
- if (hasAttachment) bufferedAttachmentKeys.add(key)
8788
+ // Count the open window's attachments so a third+ (or second, at the
8789
+ // default cap) bypasses rather than overflows the capped merge (cleared
8790
+ // in onFlush).
8791
+ if (hasAttachment) bufferedAttachmentKeys.set(key, (bufferedAttachmentKeys.get(key) ?? 0) + 1)
8626
8792
  }
8627
8793
 
8628
8794
  /**
@@ -8659,6 +8825,10 @@ async function handleInbound(
8659
8825
  text: string,
8660
8826
  downloadImage: (() => Promise<string | undefined>) | undefined,
8661
8827
  attachment?: AttachmentMeta,
8828
+ // A2: 2nd..Nth attachments folded into this coalesced turn. Each is
8829
+ // resolved (photos downloaded) and surfaced as numbered meta fields
8830
+ // (image_path_2, attachment_file_id_2, …) alongside the primary.
8831
+ extraAttachments?: CoalesceAttachment[],
8662
8832
  ): Promise<void> {
8663
8833
  const isTopicMessage = ctx.message?.is_topic_message ?? false
8664
8834
  const messageThreadId = ctx.message?.message_thread_id
@@ -8816,18 +8986,32 @@ async function handleInbound(
8816
8986
  // unauthorized senders never reach this code (gate() above).
8817
8987
  // Interrupt requires the same trust as sending a normal message.
8818
8988
  const interrupt = parseInterruptMarker(text)
8989
+ // Problem B: defer this `!`'s SIGINT to a safe boundary instead of firing it
8990
+ // synchronously below. Set only when the `interrupt.safe_boundary` flag is on
8991
+ // AND a top-level tool call is in flight AND the body is non-empty (an empty
8992
+ // `!` is an explicit halt-now and stays immediate). When set, we skip the
8993
+ // synchronous SIGINT here and stash the built inbound at the delivery site.
8994
+ let deferInterrupt = false
8819
8995
  if (interrupt.isInterrupt) {
8820
8996
  const agentName = process.env.SWITCHROOM_AGENT_NAME
8997
+ const access = loadAccess()
8998
+ deferInterrupt =
8999
+ !interrupt.emptyBody &&
9000
+ decideInterruptTiming({
9001
+ safeBoundaryEnabled: access.interruptSafeBoundary === true,
9002
+ midToolCall: toolFlightTracker.isMidToolCall(),
9003
+ }) === 'defer'
8821
9004
  process.stderr.write(
8822
9005
  `telegram gateway: interrupt-marker received chat_id=${chat_id} agent=${agentName ?? '-'} ` +
8823
- `body_len=${interrupt.body.length} empty=${interrupt.emptyBody}\n`,
9006
+ `body_len=${interrupt.body.length} empty=${interrupt.emptyBody} defer=${deferInterrupt} ` +
9007
+ `in_flight=${toolFlightTracker.inFlightCount()}\n`,
8824
9008
  )
8825
9009
  if (msgId != null) {
8826
9010
  void bot.api.setMessageReaction(chat_id, msgId, [
8827
9011
  { type: 'emoji', emoji: '⚡' as ReactionTypeEmoji['emoji'] },
8828
9012
  ]).catch(() => {})
8829
9013
  }
8830
- if (agentName) {
9014
+ if (agentName && !deferInterrupt) {
8831
9015
  try {
8832
9016
  // The gateway runs INSIDE the agent container in docker mode,
8833
9017
  // so calling `interruptAgent` (which probes `docker inspect`
@@ -8924,6 +9108,7 @@ async function handleInbound(
8924
9108
  requestId: request_id,
8925
9109
  behavior,
8926
9110
  })
9111
+ resumeReactionAfterVerdict()
8927
9112
  if (msgId != null) {
8928
9113
  const emoji = behavior === 'allow' ? '✅' : '❌'
8929
9114
  void bot.api.setMessageReaction(chat_id, msgId, [
@@ -9573,6 +9758,25 @@ async function handleInbound(
9573
9758
 
9574
9759
  const imagePath = downloadImage ? await downloadImage() : undefined
9575
9760
 
9761
+ // A2: resolve the extra attachments (2nd..Nth in a coalesced multi-media
9762
+ // burst). Photos are downloaded the same way as the primary; documents/
9763
+ // voice carry only attachment metadata (the agent fetches them via
9764
+ // download_attachment). Numbered meta fields below let the agent see each.
9765
+ const extraResolved: Array<{ imagePath?: string; attachment?: AttachmentMeta }> = []
9766
+ if (extraAttachments && extraAttachments.length > 0) {
9767
+ for (const ex of extraAttachments) {
9768
+ const exImagePath = ex.downloadImage ? await ex.downloadImage() : undefined
9769
+ extraResolved.push({ imagePath: exImagePath, attachment: ex.attachment })
9770
+ }
9771
+ }
9772
+ // Flatten the numbered meta fields once so the InboundMessage literal can
9773
+ // spread them. Primary is "1" (unsuffixed); extras start at "_2".
9774
+ const extraMeta = buildExtraAttachmentMeta(extraResolved)
9775
+ // Total attachment count (primary + extras) so the agent knows how many to
9776
+ // expect without probing for numbered fields. Only emitted when >1.
9777
+ const primaryHasAttachment = imagePath != null || attachment != null
9778
+ const attachmentCount = (primaryHasAttachment ? 1 : 0) + extraResolved.length
9779
+
9576
9780
  // Telegram-native reply context (issue #119). Same pattern as server.ts:
9577
9781
  // `replyToText` is raw (for SQLite); `replyToTextEscaped` is XML-escaped
9578
9782
  // (for channel meta).
@@ -9682,6 +9886,10 @@ async function handleInbound(
9682
9886
  ...(attachment.mime ? { attachment_mime: attachment.mime } : {}),
9683
9887
  ...(attachment.name ? { attachment_name: attachment.name } : {}),
9684
9888
  } : {}),
9889
+ // A2: numbered fields for the 2nd..Nth attachment + a total count so
9890
+ // the agent reads every item in a coalesced multi-media burst.
9891
+ ...(attachmentCount > 1 ? { attachment_count: String(attachmentCount) } : {}),
9892
+ ...extraMeta,
9685
9893
  },
9686
9894
  }
9687
9895
 
@@ -9713,6 +9921,40 @@ async function handleInbound(
9713
9921
  // line ~7357 already populated the Map for THIS inbound's turn;
9714
9922
  // reading the live size here would self-block (see the comment on
9715
9923
  // turnInFlightAtReceipt for the wedge symptom this fixes).
9924
+ // Problem B: a deferred `!` interrupt. The synchronous SIGINT was skipped
9925
+ // above (a tool was in flight) — claude is still working. Don't deliver the
9926
+ // replacement body now (it would race the live tool); stash the fully-built
9927
+ // inbound and let `fireDeferredInterrupt` SIGINT + deliver at the next clean
9928
+ // boundary, or when the max-wait timer expires. Rapid repeated `!` coalesce:
9929
+ // the latest body replaces the stashed inbound, the original deadline holds
9930
+ // so the wait stays bounded.
9931
+ if (deferInterrupt) {
9932
+ const selfAgentDefer = process.env.SWITCHROOM_AGENT_NAME ?? ''
9933
+ if (pendingDeferredInterrupt != null) {
9934
+ pendingDeferredInterrupt.inboundMsg = inboundMsg
9935
+ pendingDeferredInterrupt.msgId = msgId ?? null
9936
+ process.stderr.write(
9937
+ `telegram gateway: deferred-interrupt coalesced (replacing pending body) agent=${selfAgentDefer} chat=${chat_id} msg=${msgId ?? '-'}\n`,
9938
+ )
9939
+ } else {
9940
+ const maxWaitMs = resolveInterruptMaxWaitMs(loadAccess().interruptMaxWaitMs)
9941
+ pendingDeferredInterrupt = {
9942
+ agentName: selfAgentDefer,
9943
+ inboundMsg,
9944
+ chatId: chat_id,
9945
+ msgId: msgId ?? null,
9946
+ threadId: messageThreadId ?? undefined,
9947
+ registeredAt: Date.now(),
9948
+ deadlineTimer: setTimeout(() => { void fireDeferredInterrupt('timeout') }, maxWaitMs),
9949
+ }
9950
+ process.stderr.write(
9951
+ `telegram gateway: deferred-interrupt parked agent=${selfAgentDefer} chat=${chat_id} ` +
9952
+ `msg=${msgId ?? '-'} max_wait_ms=${maxWaitMs} in_flight=${toolFlightTracker.inFlightCount()}\n`,
9953
+ )
9954
+ }
9955
+ return
9956
+ }
9957
+
9716
9958
  if (
9717
9959
  decideInboundDelivery({
9718
9960
  turnInFlight: turnInFlightAtReceipt,
@@ -11759,6 +12001,7 @@ async function handlePermissionSlash(ctx: Context, behavior: 'allow' | 'deny'):
11759
12001
  }
11760
12002
  // Forward to connected bridges — same IPC the button handler uses.
11761
12003
  dispatchPermissionVerdict({ type: 'permission', requestId: request_id, behavior })
12004
+ resumeReactionAfterVerdict()
11762
12005
  pendingPermissions.delete(request_id)
11763
12006
  process.stderr.write(
11764
12007
  `[telegram gateway] slash-${behavior} request_id=${request_id} tool=${details.tool_name} by=${senderId}\n`,
@@ -15409,6 +15652,10 @@ bot.on('callback_query:data', async ctx => {
15409
15652
  behavior: 'allow',
15410
15653
  rule: chosen.rule,
15411
15654
  })
15655
+ // The turn resumes now (independent of the host persistence round-trip
15656
+ // below). Un-park 🙏 → working immediately so the operator sees the
15657
+ // agent continue while hostd writes the durable rule.
15658
+ resumeReactionAfterVerdict()
15412
15659
 
15413
15660
  // (3) Decide the persistence path. tryHostdDispatch returns
15414
15661
  // "not-configured" when host_control is disabled or the per-agent
@@ -15562,7 +15809,16 @@ bot.on('callback_query:data', async ctx => {
15562
15809
 
15563
15810
  // Forward permission decision to connected bridges
15564
15811
  pendingPermissions.delete(request_id)
15565
- const label = behavior === 'allow' ? '✅ Allowed' : '❌ Denied'
15812
+ // Deterministic "▶️ resuming…" beat (framework-posted, not model text):
15813
+ // the verdict un-parks the suspended turn, so confirm to the operator
15814
+ // that the agent received it and is continuing — closing the "is it
15815
+ // working or did my tap do nothing?" gap. Allow and deny both resume the
15816
+ // turn (deny just hands claude a refusal it then handles).
15817
+ const resumeAgent = process.env.SWITCHROOM_AGENT_NAME
15818
+ const resumeBeat = resumeAgent
15819
+ ? `▶️ ${escapeHtmlForTg(resumeAgent)} resuming…`
15820
+ : '▶️ resuming…'
15821
+ const label = `${behavior === 'allow' ? '✅ Allowed' : '❌ Denied'} · ${resumeBeat}`
15566
15822
  // HTML-escape the source text — same hazard as the scope-commit and
15567
15823
  // recent-denial paths above. The permission card body
15568
15824
  // (formatPermissionCardBody) appends claude-supplied `description`
@@ -15590,6 +15846,9 @@ bot.on('callback_query:data', async ctx => {
15590
15846
  requestId: request_id,
15591
15847
  behavior: behavior as 'allow' | 'deny',
15592
15848
  })
15849
+ // Un-park the status reaction: 🙏 → working, re-arming the stall
15850
+ // watchdog that setAwaiting() suspended.
15851
+ resumeReactionAfterVerdict()
15593
15852
  },
15594
15853
  })
15595
15854
  })