switchroom 0.14.39 → 0.14.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,160 @@
1
+ /**
2
+ * Reliable inbound delivery: deliver-until-acked (the marko drop-wedge).
3
+ *
4
+ * Delivering an inbound to claude is fire-and-forget: the gateway calls
5
+ * `sendToAgent`, the bridge turns it into an MCP channel notification, and
6
+ * the unmodified CLI appends the text into its composer and auto-submits
7
+ * ONLY when the composer is empty + idle. If the message lands while claude
8
+ * is still finalizing the prior turn, the auto-submit races turn-completion
9
+ * and the text strands unsubmitted — claude never starts the turn, so the
10
+ * gateway eventually drops the message at the 300s silence-poke. Observed
11
+ * recurring on `marko` (supergroup topic + DMs alike).
12
+ *
13
+ * This is the queue that makes delivery reliable. The contract is the whole
14
+ * idea, and it is deliberately small:
15
+ *
16
+ * 1. A delivered inbound is ACKED only when claude actually starts the
17
+ * turn — the `enqueue` session-event (the one signal that claude truly
18
+ * picked the message up). NOT when `sendToAgent` returns true.
19
+ * 2. Until acked, the message stays tracked. If it hasn't been acked
20
+ * within `timeoutMs`, it stranded: re-deliver it (the gateway re-clears
21
+ * the composer and re-sends).
22
+ * 3. Re-deliver as many times as it takes. We never drop the message and
23
+ * never give up — a reliable queue keeps the message until it lands.
24
+ *
25
+ * Keyed per `chatKey(chatId, threadId)`, so DMs and supergroup forum topics
26
+ * are handled identically (the key is opaque here). The #1556 gate
27
+ * serialises delivery per key, so at most one delivery per key is in flight.
28
+ *
29
+ * Pure bookkeeping only — the gateway does the actual composer-clear and
30
+ * re-send for whatever `sweep` returns. Unit-tested in isolation.
31
+ */
32
+
33
+ export interface PendingDelivery<M> {
34
+ /** chatKey(chatId, threadId) — opaque to this module. */
35
+ readonly key: string
36
+ /** The exact inbound to re-send until claude acks it. */
37
+ readonly inbound: M
38
+ /**
39
+ * Source message id of the tracked inbound (stringified Telegram
40
+ * `message_id`), or null if unknown. The `enqueue` ack matches on THIS so a
41
+ * synthetic-source turn (cron / resume / vault / reaction) that shares the
42
+ * chatKey can't false-ack — and silently drop — a real user message still
43
+ * waiting to land. See ackDelivery.
44
+ */
45
+ readonly messageId: string | null
46
+ /** When the latest delivery attempt was made (unix-ms). */
47
+ lastAttemptAt: number
48
+ }
49
+
50
+ export interface DeliveryQueue<M> {
51
+ readonly pending: Map<string, PendingDelivery<M>>
52
+ }
53
+
54
+ export function createDeliveryQueue<M>(): DeliveryQueue<M> {
55
+ return { pending: new Map() }
56
+ }
57
+
58
+ /**
59
+ * Track a freshly-delivered inbound, awaiting claude's `enqueue` ack.
60
+ * Overwrites any prior pending for the key — the #1556 gate serialises per
61
+ * key, so a later inbound supersedes an earlier un-acked one for that key.
62
+ * `messageId` (stringified Telegram message_id) lets the ack match only the
63
+ * enqueue that belongs to THIS message; pass null when unknown.
64
+ */
65
+ export function trackDelivery<M>(
66
+ q: DeliveryQueue<M>,
67
+ key: string,
68
+ inbound: M,
69
+ now: number,
70
+ messageId: string | null = null,
71
+ ): void {
72
+ q.pending.set(key, { key, inbound, messageId, lastAttemptAt: now })
73
+ }
74
+
75
+ /**
76
+ * Ack a delivery — call from the `enqueue` session-event (claude started a
77
+ * turn). `enqueue` fires for EVERY turn start regardless of source (user
78
+ * inbound, cron, subagent-handback, vault-resume, restart-marker), so acking
79
+ * purely by chatKey would let a synthetic-source turn clear — and thus
80
+ * silently drop — a real user message still waiting under the same key. So
81
+ * ack ONLY when the enqueue's source message id matches the tracked one.
82
+ *
83
+ * Matching rule: if we recorded a messageId for the pending entry, require the
84
+ * enqueue's `enqueueMessageId` to equal it. If we never recorded one (legacy /
85
+ * defensive null), fall back to key-only ack. Returns true if an entry was
86
+ * cleared.
87
+ */
88
+ export function ackDelivery<M>(
89
+ q: DeliveryQueue<M>,
90
+ key: string,
91
+ enqueueMessageId: string | null = null,
92
+ ): boolean {
93
+ const entry = q.pending.get(key)
94
+ if (!entry) return false
95
+ // A different message started this turn — don't ack ours (it may still be
96
+ // waiting to land; the sweep will re-deliver it if it stranded).
97
+ if (entry.messageId != null && entry.messageId !== enqueueMessageId) return false
98
+ q.pending.delete(key)
99
+ return true
100
+ }
101
+
102
+ /**
103
+ * Return the inbounds that stranded (no ack within `timeoutMs`) and should be
104
+ * re-delivered now. Resets each returned entry's clock so the next sweep
105
+ * waits another full `timeoutMs` — the gateway re-sends them. Entries still
106
+ * within the window are left untouched (claude may yet be picking them up).
107
+ */
108
+ export function sweep<M>(
109
+ q: DeliveryQueue<M>,
110
+ now: number,
111
+ timeoutMs: number,
112
+ ): PendingDelivery<M>[] {
113
+ const redeliver: PendingDelivery<M>[] = []
114
+ for (const entry of q.pending.values()) {
115
+ if (now - entry.lastAttemptAt < timeoutMs) continue
116
+ entry.lastAttemptAt = now
117
+ redeliver.push(entry)
118
+ }
119
+ return redeliver
120
+ }
121
+
122
+ /** Forget a key without acking (e.g. the bridge went offline and the message
123
+ * was handed back to the offline buffer, which owns it now). */
124
+ export function forgetDelivery<M>(q: DeliveryQueue<M>, key: string): void {
125
+ q.pending.delete(key)
126
+ }
127
+
128
+ /**
129
+ * Should this delivered inbound be tracked for ack/re-delivery?
130
+ *
131
+ * Track a delivery iff it is a fresh user turn that will produce exactly one
132
+ * `enqueue` to ack against. Everything that does NOT enqueue must be excluded,
133
+ * or the sweep re-delivers it forever (re-clearing the composer every cycle):
134
+ *
135
+ * - `isSteering` / `isInterrupt` — the #1556 gate's carve-outs: delivered
136
+ * mid-turn to AMEND the running turn, so they never start a fresh turn and
137
+ * never emit `enqueue`.
138
+ * - `hasSource` — synthetic inbounds (cron / vault-resume / subagent-handback
139
+ * / reaction) carry a `meta.source`; they enqueue under their own semantics
140
+ * and must never be tracked as if they were a queued user turn.
141
+ * - empty `effectiveText` — an empty body (e.g. `/queue` with no text) is
142
+ * silently dropped by claude's auto-submit and never enqueues, so tracking
143
+ * it is a pure re-delivery loop (a self-inflicted DoS on the queue).
144
+ *
145
+ * Mirror the gate's carve-outs here so tracking is exactly the set of messages
146
+ * that produce an `enqueue`.
147
+ */
148
+ export function shouldTrackDelivery(input: {
149
+ isSteering: boolean
150
+ isInterrupt: boolean
151
+ hasSource?: boolean
152
+ effectiveText?: string
153
+ }): boolean {
154
+ if (input.isSteering || input.isInterrupt) return false
155
+ if (input.hasSource) return false
156
+ // Gate on empty text only when the caller actually provided it (undefined =
157
+ // "not supplied", left untracked-gated so existing callers keep their behaviour).
158
+ if (input.effectiveText !== undefined && input.effectiveText.trim().length === 0) return false
159
+ return true
160
+ }
@@ -0,0 +1,180 @@
1
+ import { describe, expect, it } from 'vitest'
2
+
3
+ import {
4
+ ackDelivery,
5
+ createDeliveryQueue,
6
+ forgetDelivery,
7
+ shouldTrackDelivery,
8
+ sweep,
9
+ trackDelivery,
10
+ type DeliveryQueue,
11
+ } from '../gateway/inbound-delivery-confirm.js'
12
+
13
+ /**
14
+ * Regression coverage for the marko drop-wedge.
15
+ *
16
+ * An inbound delivered to claude's TUI composer strands unsubmitted when the
17
+ * auto-submit races turn-completion. claude never emits `enqueue`, so the
18
+ * gateway used to sit "typing…" for 300s then DROP the message.
19
+ *
20
+ * The queue's contract: a delivered inbound is acked ONLY by `enqueue`; until
21
+ * then it is re-delivered every `timeoutMs`, forever, never dropped — and an
22
+ * acked delivery never re-fires (no duplicate turns).
23
+ */
24
+ type Msg = { text: string }
25
+ const TIMEOUT = 15_000
26
+
27
+ function fresh(): DeliveryQueue<Msg> {
28
+ return createDeliveryQueue<Msg>()
29
+ }
30
+
31
+ describe('inbound-delivery-confirm (reliable deliver-until-acked queue)', () => {
32
+ it('an acked delivery is never re-delivered (happy path — no duplicate turns)', () => {
33
+ const q = fresh()
34
+ trackDelivery(q, 'chat:_', { text: 'hi' }, 1_000)
35
+ expect(ackDelivery(q, 'chat:_')).toBe(true) // enqueue arrived
36
+ expect(sweep(q, 1_000 + 999_999, TIMEOUT)).toHaveLength(0)
37
+ expect(q.pending.size).toBe(0)
38
+ })
39
+
40
+ it('within the timeout, an un-acked delivery is left alone (claude may still be picking it up)', () => {
41
+ const q = fresh()
42
+ trackDelivery(q, 'chat:_', { text: 'hi' }, 1_000)
43
+ expect(sweep(q, 1_000 + 14_999, TIMEOUT)).toHaveLength(0)
44
+ expect(q.pending.size).toBe(1)
45
+ })
46
+
47
+ it('a strand (no ack) is re-delivered after the timeout, and the clock resets', () => {
48
+ const q = fresh()
49
+ trackDelivery(q, 'chat:_', { text: 'draft nurture email' }, 1_000)
50
+ const r = sweep(q, 1_000 + 15_000, TIMEOUT)
51
+ expect(r).toHaveLength(1)
52
+ expect(r[0]!.inbound.text).toBe('draft nurture email')
53
+ expect(r[0]!.lastAttemptAt).toBe(1_000 + 15_000) // clock reset
54
+ // not re-swept until another full timeout elapses
55
+ expect(sweep(q, 1_000 + 15_000 + 14_999, TIMEOUT)).toHaveLength(0)
56
+ })
57
+
58
+ it('keeps re-delivering forever until acked — never drops (the reliability invariant)', () => {
59
+ const q = fresh()
60
+ let t = 0
61
+ trackDelivery(q, 'chat:_', { text: 'x' }, t)
62
+ for (let i = 0; i < 50; i++) {
63
+ t += 15_000
64
+ expect(sweep(q, t, TIMEOUT)).toHaveLength(1) // still trying after 50 strands
65
+ }
66
+ expect(q.pending.size).toBe(1) // never dropped
67
+ // claude finally picks it up → acked → stops.
68
+ expect(ackDelivery(q, 'chat:_')).toBe(true)
69
+ expect(sweep(q, t + 999_999, TIMEOUT)).toHaveLength(0)
70
+ })
71
+
72
+ it('an ack that lands right after a re-delivery stops further re-delivery (no duplicate turns)', () => {
73
+ const q = fresh()
74
+ trackDelivery(q, 'chat:_', { text: 'x' }, 0)
75
+ sweep(q, 15_000, TIMEOUT) // strand → re-delivered
76
+ expect(ackDelivery(q, 'chat:_')).toBe(true) // the re-delivered copy landed
77
+ expect(sweep(q, 999_999, TIMEOUT)).toHaveLength(0)
78
+ expect(q.pending.size).toBe(0)
79
+ })
80
+
81
+ it('keys are independent — a strand on one topic does not affect another (DM + supergroup topics)', () => {
82
+ const q = fresh()
83
+ trackDelivery(q, '-100:4', { text: 'crm topic msg' }, 0) // supergroup CRM topic
84
+ trackDelivery(q, '555:_', { text: 'dm msg' }, 0) // a DM
85
+ ackDelivery(q, '555:_') // the DM submits fine
86
+ const r = sweep(q, 15_000, TIMEOUT)
87
+ expect(r).toHaveLength(1)
88
+ expect(r[0]!.key).toBe('-100:4') // only the stranded topic re-delivers
89
+ })
90
+
91
+ it('tracking the same key twice keeps only the latest inbound (gate serialises per key)', () => {
92
+ const q = fresh()
93
+ trackDelivery(q, 'chat:_', { text: 'first' }, 0)
94
+ trackDelivery(q, 'chat:_', { text: 'second' }, 100)
95
+ expect(q.pending.size).toBe(1)
96
+ expect(sweep(q, 100 + 15_000, TIMEOUT)[0]!.inbound.text).toBe('second')
97
+ })
98
+
99
+ it('ack on an unknown key is a harmless no-op', () => {
100
+ expect(ackDelivery(fresh(), 'never-tracked')).toBe(false)
101
+ })
102
+
103
+ it('forgetDelivery clears without acking or re-delivering (bridge went offline)', () => {
104
+ const q = fresh()
105
+ trackDelivery(q, 'chat:_', { text: 'x' }, 0)
106
+ forgetDelivery(q, 'chat:_')
107
+ expect(q.pending.size).toBe(0)
108
+ expect(sweep(q, 999_999, TIMEOUT)).toHaveLength(0)
109
+ })
110
+ })
111
+
112
+ // Regression for the cross-source ACK collision (silent drop): `enqueue` fires
113
+ // for EVERY turn start regardless of source. A synthetic-source turn (cron /
114
+ // resume / vault / reaction) that shares the chatKey of a real user message
115
+ // still waiting to land would, with a key-only ack, clear — and silently drop
116
+ // — that user message. The ack must match on the tracked message id.
117
+ describe('ackDelivery — message-id-matched (cross-source false-ack guard)', () => {
118
+ it('acks when the enqueue message id matches the tracked one', () => {
119
+ const q = fresh()
120
+ trackDelivery(q, 'chat:_', { text: 'real user msg' }, 0, '5001')
121
+ expect(ackDelivery(q, 'chat:_', '5001')).toBe(true)
122
+ expect(q.pending.size).toBe(0)
123
+ })
124
+
125
+ it('does NOT ack when a different (synthetic-source) turn enqueues under the same key', () => {
126
+ const q = fresh()
127
+ trackDelivery(q, 'chat:_', { text: 'real user msg' }, 0, '5001')
128
+ // a cron / resume turn for the same chat enqueues first, with its own id
129
+ expect(ackDelivery(q, 'chat:_', '1716123456789')).toBe(false)
130
+ // the user message is still tracked — it strands → gets re-delivered, not dropped
131
+ expect(q.pending.size).toBe(1)
132
+ expect(sweep(q, 15_000, TIMEOUT)).toHaveLength(1)
133
+ // and its own enqueue (matching id) later acks it cleanly
134
+ expect(ackDelivery(q, 'chat:_', '5001')).toBe(true)
135
+ expect(q.pending.size).toBe(0)
136
+ })
137
+
138
+ it('does NOT ack when the enqueue carries no message id but the tracked one has one', () => {
139
+ const q = fresh()
140
+ trackDelivery(q, 'chat:_', { text: 'real user msg' }, 0, '5001')
141
+ expect(ackDelivery(q, 'chat:_', null)).toBe(false)
142
+ expect(q.pending.size).toBe(1)
143
+ })
144
+
145
+ it('falls back to key-only ack when no message id was recorded (legacy/defensive)', () => {
146
+ const q = fresh()
147
+ trackDelivery(q, 'chat:_', { text: 'x' }, 0) // no messageId
148
+ expect(ackDelivery(q, 'chat:_', '5001')).toBe(true)
149
+ expect(q.pending.size).toBe(0)
150
+ })
151
+ })
152
+
153
+ // Regression for the steer/interrupt re-delivery loop: steering and `!`
154
+ // interrupt inbounds amend the running turn and never emit `enqueue`, so they
155
+ // must NOT be tracked (else the sweep re-delivers them forever). Only
156
+ // fresh-turn messages — which DO enqueue — are tracked.
157
+ describe('shouldTrackDelivery — only fresh-turn messages are tracked', () => {
158
+ it('tracks a normal (non-steering, non-interrupt) message', () => {
159
+ expect(shouldTrackDelivery({ isSteering: false, isInterrupt: false })).toBe(true)
160
+ })
161
+ it('does NOT track a /steer message (amends the turn — never acks)', () => {
162
+ expect(shouldTrackDelivery({ isSteering: true, isInterrupt: false })).toBe(false)
163
+ })
164
+ it('does NOT track a ! interrupt message (amends the turn — never acks)', () => {
165
+ expect(shouldTrackDelivery({ isSteering: false, isInterrupt: true })).toBe(false)
166
+ })
167
+ it('does NOT track when both flags set (defensive)', () => {
168
+ expect(shouldTrackDelivery({ isSteering: true, isInterrupt: true })).toBe(false)
169
+ })
170
+ it('does NOT track a synthetic (meta.source) inbound — cron/resume/vault/reaction enqueue under their own semantics', () => {
171
+ expect(shouldTrackDelivery({ isSteering: false, isInterrupt: false, hasSource: true })).toBe(false)
172
+ })
173
+ it('does NOT track an empty-body message (e.g. `/queue` with no text) — never enqueues, would re-deliver forever', () => {
174
+ expect(shouldTrackDelivery({ isSteering: false, isInterrupt: false, effectiveText: '' })).toBe(false)
175
+ expect(shouldTrackDelivery({ isSteering: false, isInterrupt: false, effectiveText: ' ' })).toBe(false)
176
+ })
177
+ it('tracks a normal message when effectiveText is provided and non-empty', () => {
178
+ expect(shouldTrackDelivery({ isSteering: false, isInterrupt: false, effectiveText: 'draft the email' })).toBe(true)
179
+ })
180
+ })
@@ -0,0 +1,64 @@
1
+ /**
2
+ * E2E regression — inbound is NEVER dropped under rapid fire (the drop-wedge).
3
+ *
4
+ * The bug: a Telegram inbound reaches claude as an MCP channel notification
5
+ * the unmodified CLI appends to its composer and auto-submits only when the
6
+ * composer is empty + idle. A message arriving the instant the prior turn
7
+ * completes races that auto-submit and strands unsubmitted — claude never
8
+ * starts the turn, the gateway sits "typing…", and the 300s silence-poke
9
+ * DROPS the message. Observed recurring on `marko` (supergroup topics + DMs).
10
+ *
11
+ * This scenario drives the exact failure timing: it fires each message the
12
+ * instant the prior reply lands (i.e. right at turn-completion, the strand
13
+ * window) and asserts EVERY message gets its own unique token back. With the
14
+ * deliver-until-acked queue (inbound-delivery-confirm.ts) a strand self-heals
15
+ * via re-delivery; without it, a stranded message yields NO reply within the
16
+ * timeout and this test fails on exactly the message that was swallowed.
17
+ *
18
+ * Each message carries a random token and the assertion matches THAT token,
19
+ * so a reply to message N-1 can never be mistaken for message N's reply — the
20
+ * test proves every distinct message was actually processed, not merely that
21
+ * "some replies came back".
22
+ */
23
+
24
+ import { describe, it, expect } from "vitest";
25
+ import { spinUp } from "../harness.js";
26
+
27
+ describe("uat: rapid-fire inbound — no message is ever dropped (drop-wedge)", () => {
28
+ it(
29
+ "every back-to-back message gets its own reply (delivery never strands)",
30
+ async () => {
31
+ const sc = await spinUp({ agent: "test-harness" });
32
+ try {
33
+ const N = 8;
34
+ const dropped: number[] = [];
35
+ for (let i = 1; i <= N; i++) {
36
+ const token = `acktok-${i}-${Math.random().toString(36).slice(2, 8)}`;
37
+ await sc.sendDM(
38
+ `Reply with exactly this token and nothing else: ${token}`,
39
+ );
40
+ try {
41
+ const reply = await sc.expectMessage((m) => m.text.includes(token), {
42
+ from: "bot",
43
+ timeout: 75_000,
44
+ });
45
+ expect(reply.text).toContain(token);
46
+ } catch {
47
+ // The message stranded — no reply carrying its token arrived.
48
+ dropped.push(i);
49
+ }
50
+ // Deliberately NO delay: fire the next message the instant this
51
+ // reply lands, so it arrives in the turn-completion strand window.
52
+ }
53
+ expect(
54
+ dropped,
55
+ `messages dropped (no reply within timeout): ${dropped.join(", ")} of ${N}`,
56
+ ).toEqual([]);
57
+ } finally {
58
+ await sc.tearDown();
59
+ }
60
+ },
61
+ // N messages × (turn + up to one ~15-20s strand recovery) — generous.
62
+ 900_000,
63
+ );
64
+ });
@@ -55,8 +55,15 @@ describe("uat: rapid follow-ups — steering vs queued classification", () => {
55
55
  (m) => {
56
56
  const txt = m.text;
57
57
  const mentionsMd5 = /\bmd5\b/i.test(txt);
58
+ // Steer narration: the agent acknowledges amending the in-flight
59
+ // task. Accept the phrasings the model actually uses — including
60
+ // "Switched to MD5 per your update/follow-up" (the 2026-06-02
61
+ // canary reply that the old regex wrongly rejected). Anchored on
62
+ // "per your <qualifier>" / continuation language so it stays
63
+ // distinct from the QUEUED path (a fresh answer with no such
64
+ // course-correction narration).
58
65
  const narratesSteer =
59
- /↪️|\bsteer(ing)?\b|continuing the (prior|original|in-flight) task|amendment|course[- ]correct/i.test(
66
+ /↪️|\bsteer(ing)?\b|switch(?:ed|ing)? to \w+ per your (?:update|follow-?up|guidance|request|steer)|continuing the (prior|original|in-flight) task|amendment|course[- ]correct/i.test(
60
67
  txt,
61
68
  );
62
69
  return mentionsMd5 && narratesSteer;