switchroom 0.14.17 → 0.14.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1954,6 +1954,24 @@ function paintStatusReactionError(chatId: string, threadId: number | undefined):
1954
1954
  ctrl.setError()
1955
1955
  }
1956
1956
 
1957
+ /**
1958
+ * Flip the current turn's status reaction off 🙏 (awaiting-approval) back
1959
+ * to a working glyph once a permission verdict has been dispatched. The
1960
+ * turn was suspended *inside* the bridge's permission call, so `currentTurn`
1961
+ * still points at it; the verdict un-parks claude and it resumes the SAME
1962
+ * turn. `setThinking()` re-arms the stall watchdog that `setAwaiting()`
1963
+ * suspended, so a genuine post-approval hang still promotes to 🥱/😨, and
1964
+ * it is replaced by the real tool glyph (✍/⚡) as soon as the resumed turn
1965
+ * fires its next PreToolUse. Non-terminal — 👍 still waits for `turn_end`.
1966
+ */
1967
+ function resumeReactionAfterVerdict(): void {
1968
+ const turn = currentTurn
1969
+ if (turn == null) return
1970
+ activeStatusReactions
1971
+ .get(statusKey(turn.sessionChatId, turn.sessionThreadId))
1972
+ ?.setThinking()
1973
+ }
1974
+
1957
1975
  function resolveThreadId(chat_id: string, explicit?: string | number | null): number | undefined {
1958
1976
  if (explicit != null) return Number(explicit)
1959
1977
  return chatThreadMap.get(chat_id)
@@ -2876,6 +2894,9 @@ const pendingStateReaper = setInterval(() => {
2876
2894
  // dispatchPermissionVerdict so it's buffered+redelivered too if
2877
2895
  // the bridge is also offline at sweep time.
2878
2896
  dispatchPermissionVerdict({ type: 'permission', requestId: k, behavior: 'deny' })
2897
+ // The auto-deny un-parks the suspended turn — flip 🙏 → working so
2898
+ // it doesn't sit on the awaiting glyph (or stall) after the timeout.
2899
+ resumeReactionAfterVerdict()
2879
2900
  process.stderr.write(
2880
2901
  `telegram gateway: permission TTL expired — auto-deny request=${k} ` +
2881
2902
  `tool=${v.tool_name} (no operator response in ` +
@@ -2997,10 +3018,12 @@ type AttachmentMeta = {
2997
3018
  // `ctx` must be the *latest* message's context (latest message_id, etc.) so
2998
3019
  // the merge function picks the last entry's ctx.
2999
3020
  //
3000
- // Image/attachment-bearing messages bypass the coalescer entirely (see
3001
- // handleInboundCoalesced), so those fields stay optional and unused on the
3002
- // coalesce path; preserved for future use if we ever want to coalesce
3003
- // image+text bursts.
3021
+ // A single attachment-bearing message may ride along in a coalesce window
3022
+ // (so a [text][photo] forward becomes one turn). The handleInboundCoalesced
3023
+ // guards ensure AT MOST ONE attachment per window albums (media_group_id)
3024
+ // and a second attachment both bypass to their own turn — so the single
3025
+ // `downloadImage`/`attachment` slot is never silently overwritten. Folding a
3026
+ // whole album into one multi-attachment turn is the A2 follow-on.
3004
3027
  type CoalescePayload = {
3005
3028
  text: string
3006
3029
  ctx: Context
@@ -3008,24 +3031,36 @@ type CoalescePayload = {
3008
3031
  attachment?: AttachmentMeta
3009
3032
  }
3010
3033
 
3034
+ // Coalesce keys whose open window already holds an attachment-bearing entry.
3035
+ // A second attachment for the same key bypasses coalescing (see
3036
+ // handleInboundCoalesced) so the single-attachment merge can't drop a photo.
3037
+ // Cleared on flush (below) and on the synchronous bypass path.
3038
+ const bufferedAttachmentKeys = new Set<string>()
3039
+
3011
3040
  const inboundCoalescer = createInboundCoalescer<CoalescePayload>({
3012
- // Read per-call from the access file so `/access set-coalesce N` takes
3013
- // effect on the next message without restarting the gateway.
3041
+ // Read per-call from the access file so an operator-tuned
3042
+ // channels.telegram.coalesce.window_ms (projected to coalescingGapMs by
3043
+ // scaffold) takes effect on the next message after apply+restart.
3014
3044
  //
3015
3045
  // Default lowered 1500 → 500 in #553 PR 3 to shrink the gateway-side
3016
- // contribution to first-real-text latency. Operators can still tune
3017
- // higher via `/access set-coalesce N` or the access file.
3046
+ // contribution to first-real-text latency.
3018
3047
  gapMs: () => loadAccess().coalescingGapMs ?? 500,
3019
3048
  merge: (entries) => {
3020
3049
  const last = entries[entries.length - 1]
3050
+ // At most one entry carries an attachment (guarded upstream), so pick
3051
+ // whichever entry has it rather than blindly taking `last` — a
3052
+ // [photo][text] burst keeps its image even though the last entry is
3053
+ // text-only.
3054
+ const withAttachment = entries.find((e) => e.downloadImage != null || e.attachment != null)
3021
3055
  return {
3022
3056
  text: entries.map((e) => e.text).join('\n'),
3023
3057
  ctx: last.ctx,
3024
- downloadImage: last.downloadImage,
3025
- attachment: last.attachment,
3058
+ downloadImage: withAttachment?.downloadImage,
3059
+ attachment: withAttachment?.attachment,
3026
3060
  }
3027
3061
  },
3028
- onFlush: (_key, merged) => {
3062
+ onFlush: (key, merged) => {
3063
+ bufferedAttachmentKeys.delete(key)
3029
3064
  void handleInbound(merged.ctx, merged.text, merged.downloadImage, merged.attachment)
3030
3065
  },
3031
3066
  })
@@ -4213,6 +4248,16 @@ const ipcServer: IpcServer = createIpcServer({
4213
4248
  process.stderr.write(`telegram gateway: permission_request send to ${chat_id} failed: ${e}\n`)
4214
4249
  })
4215
4250
  }
4251
+ // Park the turn's status reaction on 🙏 (awaiting your tap) and
4252
+ // suspend the stall watchdog — a turn blocked on the operator is not
4253
+ // stalled, so it must not degrade to 🥱/😨 while the card sits
4254
+ // unanswered. The verdict path (`resumeReactionAfterVerdict`) flips it
4255
+ // back to a working state the instant you tap.
4256
+ if (activeTurn != null) {
4257
+ activeStatusReactions
4258
+ .get(statusKey(activeTurn.sessionChatId, activeTurn.sessionThreadId))
4259
+ ?.setAwaiting()
4260
+ }
4216
4261
  },
4217
4262
 
4218
4263
  onHeartbeat(_client: IpcClient, _msg: HeartbeatMessage) {
@@ -8534,24 +8579,46 @@ async function handleInboundCoalesced(
8534
8579
  downloadImage: (() => Promise<string | undefined>) | undefined,
8535
8580
  attachment?: AttachmentMeta,
8536
8581
  ): Promise<void> {
8537
- // Image/attachment-bearing messages bypass coalescing preserves the
8538
- // legacy invariant that media never gets merged with sibling text.
8539
- if (downloadImage || attachment) return handleInbound(ctx, text, downloadImage, attachment)
8540
-
8541
- // `!`-prefix interrupt (#575) ALSO bypasses coalescing. If we let an
8582
+ // `!`-prefix interrupt (#575) bypasses coalescing. If we let an
8542
8583
  // interrupt sit in the coalesce window, an earlier non-`!` message
8543
8584
  // arriving in the same window would prepend itself and the marker
8544
8585
  // would no longer be at position 0 — handleInbound's parser would
8545
8586
  // miss it and the user's interrupt would silently get merged into a
8546
8587
  // normal turn. Bypass to handleInbound directly so the marker
8547
- // stays at the start of the text.
8588
+ // stays at the start of the text. Checked first so a `!`-prefixed
8589
+ // media caption still interrupts.
8548
8590
  if (parseInterruptMarker(text).isInterrupt) {
8549
- return handleInbound(ctx, text, undefined, undefined)
8591
+ return handleInbound(ctx, text, downloadImage, attachment)
8592
+ }
8593
+
8594
+ const hasAttachment = downloadImage != null || attachment != null
8595
+
8596
+ // Albums (media_group_id) are NOT coalesced in A1 — each part keeps its
8597
+ // own turn exactly as before. The single-attachment merge can carry only
8598
+ // one image, so folding a 3-photo album into one turn requires the
8599
+ // multi-attachment inbound payload (the A2 follow-on). Bypass to preserve
8600
+ // current per-part behavior and avoid dropping sibling photos.
8601
+ if (hasAttachment && ctx.message?.media_group_id != null) {
8602
+ return handleInbound(ctx, text, downloadImage, attachment)
8550
8603
  }
8551
8604
 
8552
8605
  const from = ctx.from
8553
8606
  if (!from) return
8554
8607
 
8608
+ // A second attachment landing in an already-open window would clobber the
8609
+ // first under the single-attachment merge. Bypass it to its own turn so no
8610
+ // media is silently dropped; A2's multi-attachment payload lifts this.
8611
+ if (hasAttachment) {
8612
+ const probeKey = inboundCoalesceKey(
8613
+ String(ctx.chat!.id),
8614
+ ctx.message?.message_thread_id,
8615
+ String(from.id),
8616
+ )
8617
+ if (bufferedAttachmentKeys.has(probeKey)) {
8618
+ return handleInbound(ctx, text, downloadImage, attachment)
8619
+ }
8620
+ }
8621
+
8555
8622
  // F2 fix (#553): fire 👀 reaction on RAW arrival, before the coalesce
8556
8623
  // wait blocks first paint. Pre-fix, the controller's setQueued() inside
8557
8624
  // handleInbound only ran AFTER the coalesce flush (default gapMs=1500),
@@ -8581,7 +8648,12 @@ async function handleInboundCoalesced(
8581
8648
  String(from.id),
8582
8649
  )
8583
8650
  const result = inboundCoalescer.enqueue(key, { text, ctx, downloadImage, attachment })
8584
- if (result.bypass) return handleInbound(ctx, text, undefined, undefined)
8651
+ // Coalescing disabled (window <= 0): flush immediately, preserving any
8652
+ // media this message carried.
8653
+ if (result.bypass) return handleInbound(ctx, text, downloadImage, attachment)
8654
+ // Mark the open window as holding an attachment so a second attachment for
8655
+ // this key bypasses rather than clobbers (cleared in onFlush).
8656
+ if (hasAttachment) bufferedAttachmentKeys.add(key)
8585
8657
  }
8586
8658
 
8587
8659
  /**
@@ -8883,6 +8955,7 @@ async function handleInbound(
8883
8955
  requestId: request_id,
8884
8956
  behavior,
8885
8957
  })
8958
+ resumeReactionAfterVerdict()
8886
8959
  if (msgId != null) {
8887
8960
  const emoji = behavior === 'allow' ? '✅' : '❌'
8888
8961
  void bot.api.setMessageReaction(chat_id, msgId, [
@@ -11718,6 +11791,7 @@ async function handlePermissionSlash(ctx: Context, behavior: 'allow' | 'deny'):
11718
11791
  }
11719
11792
  // Forward to connected bridges — same IPC the button handler uses.
11720
11793
  dispatchPermissionVerdict({ type: 'permission', requestId: request_id, behavior })
11794
+ resumeReactionAfterVerdict()
11721
11795
  pendingPermissions.delete(request_id)
11722
11796
  process.stderr.write(
11723
11797
  `[telegram gateway] slash-${behavior} request_id=${request_id} tool=${details.tool_name} by=${senderId}\n`,
@@ -15368,6 +15442,10 @@ bot.on('callback_query:data', async ctx => {
15368
15442
  behavior: 'allow',
15369
15443
  rule: chosen.rule,
15370
15444
  })
15445
+ // The turn resumes now (independent of the host persistence round-trip
15446
+ // below). Un-park 🙏 → working immediately so the operator sees the
15447
+ // agent continue while hostd writes the durable rule.
15448
+ resumeReactionAfterVerdict()
15371
15449
 
15372
15450
  // (3) Decide the persistence path. tryHostdDispatch returns
15373
15451
  // "not-configured" when host_control is disabled or the per-agent
@@ -15521,7 +15599,16 @@ bot.on('callback_query:data', async ctx => {
15521
15599
 
15522
15600
  // Forward permission decision to connected bridges
15523
15601
  pendingPermissions.delete(request_id)
15524
- const label = behavior === 'allow' ? '✅ Allowed' : '❌ Denied'
15602
+ // Deterministic "▶️ resuming…" beat (framework-posted, not model text):
15603
+ // the verdict un-parks the suspended turn, so confirm to the operator
15604
+ // that the agent received it and is continuing — closing the "is it
15605
+ // working or did my tap do nothing?" gap. Allow and deny both resume the
15606
+ // turn (deny just hands claude a refusal it then handles).
15607
+ const resumeAgent = process.env.SWITCHROOM_AGENT_NAME
15608
+ const resumeBeat = resumeAgent
15609
+ ? `▶️ ${escapeHtmlForTg(resumeAgent)} resuming…`
15610
+ : '▶️ resuming…'
15611
+ const label = `${behavior === 'allow' ? '✅ Allowed' : '❌ Denied'} · ${resumeBeat}`
15525
15612
  // HTML-escape the source text — same hazard as the scope-commit and
15526
15613
  // recent-denial paths above. The permission card body
15527
15614
  // (formatPermissionCardBody) appends claude-supplied `description`
@@ -15549,6 +15636,9 @@ bot.on('callback_query:data', async ctx => {
15549
15636
  requestId: request_id,
15550
15637
  behavior: behavior as 'allow' | 'deny',
15551
15638
  })
15639
+ // Un-park the status reaction: 🙏 → working, re-arming the stall
15640
+ // watchdog that setAwaiting() suspended.
15641
+ resumeReactionAfterVerdict()
15552
15642
  },
15553
15643
  })
15554
15644
  })
@@ -15560,7 +15650,7 @@ bot.on('message:text', async ctx => {
15560
15650
 
15561
15651
  bot.on('message:photo', async ctx => {
15562
15652
  const caption = ctx.message.caption ?? '(photo)'
15563
- await handleInbound(ctx, caption, async () => {
15653
+ await handleInboundCoalesced(ctx, caption, async () => {
15564
15654
  const photos = ctx.message.photo
15565
15655
  const best = photos[photos.length - 1]
15566
15656
  try {
@@ -15603,7 +15693,7 @@ bot.on('message:photo', async ctx => {
15603
15693
  bot.on('message:document', async ctx => {
15604
15694
  const doc = ctx.message.document
15605
15695
  const name = safeName(doc.file_name)
15606
- await handleInbound(ctx, ctx.message.caption ?? `(document: ${name ?? 'file'})`, undefined, { kind: 'document', file_id: doc.file_id, size: doc.file_size, mime: doc.mime_type, name })
15696
+ await handleInboundCoalesced(ctx, ctx.message.caption ?? `(document: ${name ?? 'file'})`, undefined, { kind: 'document', file_id: doc.file_id, size: doc.file_size, mime: doc.mime_type, name })
15607
15697
  })
15608
15698
 
15609
15699
  bot.on('message:voice', async ctx => {
@@ -15626,7 +15716,7 @@ bot.on('message:voice', async ctx => {
15626
15716
  const text = ctx.message.caption
15627
15717
  ? `${ctx.message.caption}\n\n[voice transcript] ${transcript}`
15628
15718
  : `[voice transcript] ${transcript}`
15629
- await handleInbound(ctx, text, undefined, {
15719
+ await handleInboundCoalesced(ctx, text, undefined, {
15630
15720
  kind: 'voice',
15631
15721
  file_id: voice.file_id,
15632
15722
  size: voice.file_size,
@@ -15636,7 +15726,7 @@ bot.on('message:voice', async ctx => {
15636
15726
  }
15637
15727
  // Fall through to the legacy path on transcription failure.
15638
15728
  }
15639
- await handleInbound(ctx, ctx.message.caption ?? '(voice message)', undefined, { kind: 'voice', file_id: voice.file_id, size: voice.file_size, mime: voice.mime_type })
15729
+ await handleInboundCoalesced(ctx, ctx.message.caption ?? '(voice message)', undefined, { kind: 'voice', file_id: voice.file_id, size: voice.file_size, mime: voice.mime_type })
15640
15730
  })
15641
15731
 
15642
15732
  /**
@@ -15728,17 +15818,17 @@ async function maybeTranscribeVoice(
15728
15818
  bot.on('message:audio', async ctx => {
15729
15819
  const audio = ctx.message.audio
15730
15820
  const name = safeName(audio.file_name)
15731
- await handleInbound(ctx, ctx.message.caption ?? `(audio: ${safeName(audio.title) ?? name ?? 'audio'})`, undefined, { kind: 'audio', file_id: audio.file_id, size: audio.file_size, mime: audio.mime_type, name })
15821
+ await handleInboundCoalesced(ctx, ctx.message.caption ?? `(audio: ${safeName(audio.title) ?? name ?? 'audio'})`, undefined, { kind: 'audio', file_id: audio.file_id, size: audio.file_size, mime: audio.mime_type, name })
15732
15822
  })
15733
15823
 
15734
15824
  bot.on('message:video', async ctx => {
15735
15825
  const video = ctx.message.video
15736
- await handleInbound(ctx, ctx.message.caption ?? '(video)', undefined, { kind: 'video', file_id: video.file_id, size: video.file_size, mime: video.mime_type, name: safeName(video.file_name) })
15826
+ await handleInboundCoalesced(ctx, ctx.message.caption ?? '(video)', undefined, { kind: 'video', file_id: video.file_id, size: video.file_size, mime: video.mime_type, name: safeName(video.file_name) })
15737
15827
  })
15738
15828
 
15739
15829
  bot.on('message:video_note', async ctx => {
15740
15830
  const vn = ctx.message.video_note
15741
- await handleInbound(ctx, '(video note)', undefined, { kind: 'video_note', file_id: vn.file_id, size: vn.file_size })
15831
+ await handleInboundCoalesced(ctx, '(video note)', undefined, { kind: 'video_note', file_id: vn.file_id, size: vn.file_size })
15742
15832
  })
15743
15833
 
15744
15834
  bot.on('message:sticker', async ctx => {
@@ -15753,7 +15843,7 @@ bot.on('message:sticker', async ctx => {
15753
15843
  if (sticker.emoji) parts.push(sticker.emoji)
15754
15844
  if (sticker.set_name) parts.push(`from "${sticker.set_name}"`)
15755
15845
  const text = parts.length > 0 ? `(sticker — ${parts.join(' ')})` : '(sticker)'
15756
- await handleInbound(ctx, text, undefined, { kind: 'sticker', file_id: sticker.file_id, size: sticker.file_size })
15846
+ await handleInboundCoalesced(ctx, text, undefined, { kind: 'sticker', file_id: sticker.file_id, size: sticker.file_size })
15757
15847
  })
15758
15848
 
15759
15849
  bot.on('message:animation', async ctx => {
@@ -15766,7 +15856,7 @@ bot.on('message:animation', async ctx => {
15766
15856
  const animation = ctx.message.animation
15767
15857
  const caption = ctx.message.caption
15768
15858
  const text = caption ? `(gif) ${caption}` : '(gif)'
15769
- await handleInbound(ctx, text, undefined, {
15859
+ await handleInboundCoalesced(ctx, text, undefined, {
15770
15860
  kind: 'animation',
15771
15861
  file_id: animation.file_id,
15772
15862
  size: animation.file_size,
@@ -34,10 +34,11 @@ export interface InboundCoalescerOptions<T> {
34
34
  * `{ bypass: true }` and the caller should flush immediately).
35
35
  *
36
36
  * Pass a function (`() => number`) instead of a number when the
37
- * window is config-driven and the operator can change it at runtime
38
- * gateway.ts reads it per-call from the access file so a
39
- * `/access set-coalesce 500` takes effect on the next message
40
- * without restarting the gateway.
37
+ * window is config-driven: gateway.ts reads it per-call from the
38
+ * access file (projected there from
39
+ * `channels.telegram.coalesce.window_ms` by the scaffold) so an
40
+ * operator-tuned window takes effect on the next message after
41
+ * apply + restart.
41
42
  */
42
43
  gapMs: number | (() => number)
43
44
  /**
@@ -146,9 +147,9 @@ export function createInboundCoalescer<T>(opts: InboundCoalescerOptions<T>): Inb
146
147
  * CPO decision #9 ratified 2026-05-27)
147
148
  *
148
149
  * `threadId` collapses `null`/`undefined`/`0` to `_` via the same
149
- * convention as `chatKey()`. The 1.5s coalesce window is per-topic
150
- * intent ("user sends 3 sentences as one thought") — applying it
151
- * cross-topic merges genuinely separate conversations.
150
+ * convention as `chatKey()`. The coalesce window (default 500ms) is
151
+ * per-topic intent ("user sends 3 sentences as one thought") — applying
152
+ * it cross-topic merges genuinely separate conversations.
152
153
  */
153
154
  export function inboundCoalesceKey(
154
155
  chatId: string,
@@ -91,28 +91,119 @@ export function redeliverBufferedInbound(
91
91
  const pending = buffer.drain(agent)
92
92
  let redelivered = 0
93
93
  let rebuffered = 0
94
- for (const msg of pending) {
94
+ // Collapse consecutive same-sender Telegram user messages into one turn
95
+ // (see planBufferedRedelivery) so a forwarded burst that spanned a turn
96
+ // boundary doesn't fan out into N sequential replies. System inbounds
97
+ // (vault grants, approvals, cron, handbacks — anything with meta.source)
98
+ // are never merged and are delivered individually exactly as before.
99
+ for (const { merged, originals } of planBufferedRedelivery(pending)) {
95
100
  let delivered = false
96
101
  try {
97
- delivered = send(msg)
102
+ delivered = send(merged)
98
103
  } catch {
99
104
  delivered = false
100
105
  }
101
106
  if (delivered) {
102
- redelivered++
103
107
  // Confirmed delivery to a live registered bridge → the durable
104
- // promise is kept; tombstone the spool entry so it is NOT
105
- // boot-replayed again. A miss leaves it spooled (re-pushed below
106
- // AND still live in the spool) for the next drain / escalation.
107
- spool?.ack(msg)
108
+ // promise is kept; tombstone EVERY original's spool entry so none is
109
+ // boot-replayed again. The merged message isn't itself spooled the
110
+ // originals are, so we ack by original identity.
111
+ for (const o of originals) spool?.ack(o)
112
+ redelivered += originals.length
108
113
  } else {
109
- buffer.push(agent, msg)
110
- rebuffered++
114
+ // Re-buffer the originals (not the merged synthetic) so the spool
115
+ // identity is preserved and the next drain re-merges them losslessly.
116
+ for (const o of originals) buffer.push(agent, o)
117
+ rebuffered += originals.length
111
118
  }
112
119
  }
113
120
  return { drained: pending.length, redelivered, rebuffered }
114
121
  }
115
122
 
123
+ /** True when `msg` is an ordinary Telegram user message eligible to be
124
+ * merged with adjacent siblings. System inbounds (cron, vault grants,
125
+ * approvals, subagent handbacks, warmup, reaction triggers) all tag a
126
+ * `meta.source`; the user-message inbound built in gateway.ts sets none.
127
+ * Restricting to source-less inbounds keeps merge-on-drain away from the
128
+ * #1150 wake-up class entirely. */
129
+ function isMergeableUserInbound(msg: InboundMessage): boolean {
130
+ return msg.type === 'inbound' && (msg.meta == null || msg.meta.source == null)
131
+ }
132
+
133
+ function inboundHasMedia(msg: InboundMessage): boolean {
134
+ return msg.imagePath != null || msg.attachment != null
135
+ }
136
+
137
+ /**
138
+ * Plan how a drained buffer is re-delivered. Walks `pending` in arrival
139
+ * order and groups runs of consecutive messages that:
140
+ * - are both ordinary Telegram user messages (no meta.source), AND
141
+ * - share the same (chatId, threadId, userId), AND
142
+ * - would not put two attachments in one turn (A1 carries a single
143
+ * attachment; a second media starts a new run so nothing is dropped).
144
+ *
145
+ * Each run collapses to one merged InboundMessage (texts joined by '\n',
146
+ * the run's single attachment carried, the LAST message's identity/meta
147
+ * kept as the turn anchor). A run of one passes through unchanged. The
148
+ * returned `originals` preserve spool identity for ack / re-buffer.
149
+ *
150
+ * Pure + deterministic so it can be exhaustively fuzzed.
151
+ */
152
+ export function planBufferedRedelivery(
153
+ pending: InboundMessage[],
154
+ ): { merged: InboundMessage; originals: InboundMessage[] }[] {
155
+ const out: { merged: InboundMessage; originals: InboundMessage[] }[] = []
156
+ let run: InboundMessage[] = []
157
+ let runHasMedia = false
158
+
159
+ const sameTarget = (a: InboundMessage, b: InboundMessage): boolean =>
160
+ a.chatId === b.chatId &&
161
+ (a.threadId ?? null) === (b.threadId ?? null) &&
162
+ a.userId === b.userId
163
+
164
+ const flush = (): void => {
165
+ if (run.length === 0) return
166
+ out.push({ merged: run.length === 1 ? run[0]! : mergeRun(run), originals: run })
167
+ run = []
168
+ runHasMedia = false
169
+ }
170
+
171
+ for (const msg of pending) {
172
+ const msgHasMedia = inboundHasMedia(msg)
173
+ const canJoin =
174
+ run.length > 0 &&
175
+ isMergeableUserInbound(msg) &&
176
+ isMergeableUserInbound(run[run.length - 1]!) &&
177
+ sameTarget(run[run.length - 1]!, msg) &&
178
+ !(runHasMedia && msgHasMedia)
179
+ if (!canJoin) flush()
180
+ run.push(msg)
181
+ runHasMedia = runHasMedia || msgHasMedia
182
+ }
183
+ flush()
184
+ return out
185
+ }
186
+
187
+ /** Collapse a >1 run into a single turn. The newest message anchors the
188
+ * turn (its messageId/ts/user/meta); texts join in arrival order; the
189
+ * single attachment (if any) rides along from whichever message carried
190
+ * it. Caller guarantees the run is mergeable + has at most one media. */
191
+ function mergeRun(run: InboundMessage[]): InboundMessage {
192
+ const last = run[run.length - 1]!
193
+ const mediaEntry = run.find(inboundHasMedia)
194
+ const merged: InboundMessage = {
195
+ ...last,
196
+ text: run.map((m) => m.text).join('\n'),
197
+ }
198
+ // Re-seat the single attachment/imagePath from the entry that owns it
199
+ // (which may not be `last`), or strip them if the run is text-only.
200
+ delete merged.imagePath
201
+ delete merged.attachment
202
+ if (mediaEntry?.imagePath != null) merged.imagePath = mediaEntry.imagePath
203
+ if (mediaEntry?.attachment != null) merged.attachment = mediaEntry.attachment
204
+ return merged
205
+ }
206
+
116
207
  /**
117
208
  * One opportunistic idle-drain tick. The third drain trigger, beside
118
209
  * `onClientRegistered` (bridge re-register) and the silence-poke
@@ -53,6 +53,7 @@ export type ReactionState =
53
53
  | 'web'
54
54
  | 'tool'
55
55
  | 'compacting'
56
+ | 'awaiting'
56
57
  | 'done'
57
58
  | 'error'
58
59
  | 'stallSoft'
@@ -78,6 +79,7 @@ export const REACTION_VARIANTS: Record<ReactionState, string[]> = {
78
79
  coding: ['👨‍💻', '✍', '⚡'], // WORKING: writing / running code
79
80
  web: ['⚡', '🤔', '👌'], // WORKING: lookup in motion
80
81
  compacting:['✍', '🤔', '👀'],
82
+ awaiting: ['🙏', '🤔', '👀'], // BLOCKED ON HUMAN: parked on a permission card
81
83
  done: ['👍', '💯', '🎉'], // FINISHED: turn_end fired
82
84
  error: ['😱', '😨', '🤯'], // NON-TERMINAL — recovery allowed
83
85
  stallSoft: ['🥱', '😴', '🤔'],
@@ -180,6 +182,22 @@ export class StatusReactionController {
180
182
  this.scheduleState('compacting')
181
183
  }
182
184
 
185
+ /**
186
+ * 🙏 — the turn is parked on a human decision (a permission card is
187
+ * waiting for the operator to tap Allow/Deny). Immediate, non-terminal,
188
+ * and crucially SUSPENDS the stall watchdog: a turn blocked on the
189
+ * operator is not stalled, so it must NOT promote to 🥱/😨 while the
190
+ * card sits unanswered. The next working transition (setTool /
191
+ * setThinking, fired when the verdict resumes the turn) re-arms the
192
+ * watchdog normally. Bypasses debounce so 🙏 lands as soon as the card
193
+ * is posted.
194
+ */
195
+ setAwaiting(): void {
196
+ if (this.finished) return
197
+ this.scheduleState('awaiting', { immediate: true, skipStallReset: true })
198
+ this.clearStallTimers()
199
+ }
200
+
183
201
  /**
184
202
  * 😱 — non-terminal error indicator. Paints the error emoji but does
185
203
  * NOT end the controller — recovery to a working state is permitted
@@ -140,4 +140,25 @@ describe('createInboundCoalescer', () => {
140
140
  expect(flushed).toEqual([])
141
141
  expect(c.size()).toBe(0)
142
142
  })
143
+
144
+ it('hands merge ALL entries in arrival order so the attachment can ride from a non-last entry', () => {
145
+ // The gateway merge picks the single attachment via entries.find(...),
146
+ // NOT entries[last]. Pin that the coalescer preserves arrival order and
147
+ // passes every buffered entry, so a [photo][text] burst keeps the photo.
148
+ interface MediaPayload { text: string; attachment?: string }
149
+ const mediaMerge = (entries: MediaPayload[]): MediaPayload => ({
150
+ text: entries.map((e) => e.text).join('\n'),
151
+ attachment: entries.find((e) => e.attachment != null)?.attachment,
152
+ })
153
+ const flushed: MediaPayload[] = []
154
+ const c = createInboundCoalescer<MediaPayload>({
155
+ gapMs: 1500,
156
+ merge: mediaMerge,
157
+ onFlush: (_key, merged) => flushed.push(merged),
158
+ })
159
+ c.enqueue('c1:u1', { text: 'look', attachment: 'photo-1' }) // media FIRST
160
+ c.enqueue('c1:u1', { text: 'at this' }) // text second
161
+ vi.advanceTimersByTime(1500)
162
+ expect(flushed).toEqual([{ text: 'look\nat this', attachment: 'photo-1' }])
163
+ })
143
164
  })