switchroom 0.14.39 → 0.14.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auth-broker/index.js +294 -46
- package/dist/cli/drive-write-pretool.mjs +25 -1
- package/dist/cli/switchroom.js +63 -6
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +142 -12
- package/telegram-plugin/gateway/gateway.ts +142 -4
- package/telegram-plugin/gateway/inbound-delivery-confirm.ts +160 -0
- package/telegram-plugin/tests/inbound-delivery-confirm.test.ts +180 -0
- package/telegram-plugin/uat/scenarios/inbound-no-drop-rapid-fire-dm.test.ts +64 -0
- package/telegram-plugin/uat/scenarios/jtbd-rapid-followup-dm.test.ts +8 -1
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Reliable inbound delivery: deliver-until-acked (the marko drop-wedge).
|
|
3
|
+
*
|
|
4
|
+
* Delivering an inbound to claude is fire-and-forget: the gateway calls
|
|
5
|
+
* `sendToAgent`, the bridge turns it into an MCP channel notification, and
|
|
6
|
+
* the unmodified CLI appends the text into its composer and auto-submits
|
|
7
|
+
* ONLY when the composer is empty + idle. If the message lands while claude
|
|
8
|
+
* is still finalizing the prior turn, the auto-submit races turn-completion
|
|
9
|
+
* and the text strands unsubmitted — claude never starts the turn, so the
|
|
10
|
+
* gateway eventually drops the message at the 300s silence-poke. Observed
|
|
11
|
+
* recurring on `marko` (supergroup topic + DMs alike).
|
|
12
|
+
*
|
|
13
|
+
* This is the queue that makes delivery reliable. The contract is the whole
|
|
14
|
+
* idea, and it is deliberately small:
|
|
15
|
+
*
|
|
16
|
+
* 1. A delivered inbound is ACKED only when claude actually starts the
|
|
17
|
+
* turn — the `enqueue` session-event (the one signal that claude truly
|
|
18
|
+
* picked the message up). NOT when `sendToAgent` returns true.
|
|
19
|
+
* 2. Until acked, the message stays tracked. If it hasn't been acked
|
|
20
|
+
* within `timeoutMs`, it stranded: re-deliver it (the gateway re-clears
|
|
21
|
+
* the composer and re-sends).
|
|
22
|
+
* 3. Re-deliver as many times as it takes. We never drop the message and
|
|
23
|
+
* never give up — a reliable queue keeps the message until it lands.
|
|
24
|
+
*
|
|
25
|
+
* Keyed per `chatKey(chatId, threadId)`, so DMs and supergroup forum topics
|
|
26
|
+
* are handled identically (the key is opaque here). The #1556 gate
|
|
27
|
+
* serialises delivery per key, so at most one delivery per key is in flight.
|
|
28
|
+
*
|
|
29
|
+
* Pure bookkeeping only — the gateway does the actual composer-clear and
|
|
30
|
+
* re-send for whatever `sweep` returns. Unit-tested in isolation.
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
export interface PendingDelivery<M> {
|
|
34
|
+
/** chatKey(chatId, threadId) — opaque to this module. */
|
|
35
|
+
readonly key: string
|
|
36
|
+
/** The exact inbound to re-send until claude acks it. */
|
|
37
|
+
readonly inbound: M
|
|
38
|
+
/**
|
|
39
|
+
* Source message id of the tracked inbound (stringified Telegram
|
|
40
|
+
* `message_id`), or null if unknown. The `enqueue` ack matches on THIS so a
|
|
41
|
+
* synthetic-source turn (cron / resume / vault / reaction) that shares the
|
|
42
|
+
* chatKey can't false-ack — and silently drop — a real user message still
|
|
43
|
+
* waiting to land. See ackDelivery.
|
|
44
|
+
*/
|
|
45
|
+
readonly messageId: string | null
|
|
46
|
+
/** When the latest delivery attempt was made (unix-ms). */
|
|
47
|
+
lastAttemptAt: number
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export interface DeliveryQueue<M> {
|
|
51
|
+
readonly pending: Map<string, PendingDelivery<M>>
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export function createDeliveryQueue<M>(): DeliveryQueue<M> {
|
|
55
|
+
return { pending: new Map() }
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Track a freshly-delivered inbound, awaiting claude's `enqueue` ack.
|
|
60
|
+
* Overwrites any prior pending for the key — the #1556 gate serialises per
|
|
61
|
+
* key, so a later inbound supersedes an earlier un-acked one for that key.
|
|
62
|
+
* `messageId` (stringified Telegram message_id) lets the ack match only the
|
|
63
|
+
* enqueue that belongs to THIS message; pass null when unknown.
|
|
64
|
+
*/
|
|
65
|
+
export function trackDelivery<M>(
|
|
66
|
+
q: DeliveryQueue<M>,
|
|
67
|
+
key: string,
|
|
68
|
+
inbound: M,
|
|
69
|
+
now: number,
|
|
70
|
+
messageId: string | null = null,
|
|
71
|
+
): void {
|
|
72
|
+
q.pending.set(key, { key, inbound, messageId, lastAttemptAt: now })
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Ack a delivery — call from the `enqueue` session-event (claude started a
|
|
77
|
+
* turn). `enqueue` fires for EVERY turn start regardless of source (user
|
|
78
|
+
* inbound, cron, subagent-handback, vault-resume, restart-marker), so acking
|
|
79
|
+
* purely by chatKey would let a synthetic-source turn clear — and thus
|
|
80
|
+
* silently drop — a real user message still waiting under the same key. So
|
|
81
|
+
* ack ONLY when the enqueue's source message id matches the tracked one.
|
|
82
|
+
*
|
|
83
|
+
* Matching rule: if we recorded a messageId for the pending entry, require the
|
|
84
|
+
* enqueue's `enqueueMessageId` to equal it. If we never recorded one (legacy /
|
|
85
|
+
* defensive null), fall back to key-only ack. Returns true if an entry was
|
|
86
|
+
* cleared.
|
|
87
|
+
*/
|
|
88
|
+
export function ackDelivery<M>(
|
|
89
|
+
q: DeliveryQueue<M>,
|
|
90
|
+
key: string,
|
|
91
|
+
enqueueMessageId: string | null = null,
|
|
92
|
+
): boolean {
|
|
93
|
+
const entry = q.pending.get(key)
|
|
94
|
+
if (!entry) return false
|
|
95
|
+
// A different message started this turn — don't ack ours (it may still be
|
|
96
|
+
// waiting to land; the sweep will re-deliver it if it stranded).
|
|
97
|
+
if (entry.messageId != null && entry.messageId !== enqueueMessageId) return false
|
|
98
|
+
q.pending.delete(key)
|
|
99
|
+
return true
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Return the inbounds that stranded (no ack within `timeoutMs`) and should be
|
|
104
|
+
* re-delivered now. Resets each returned entry's clock so the next sweep
|
|
105
|
+
* waits another full `timeoutMs` — the gateway re-sends them. Entries still
|
|
106
|
+
* within the window are left untouched (claude may yet be picking them up).
|
|
107
|
+
*/
|
|
108
|
+
export function sweep<M>(
|
|
109
|
+
q: DeliveryQueue<M>,
|
|
110
|
+
now: number,
|
|
111
|
+
timeoutMs: number,
|
|
112
|
+
): PendingDelivery<M>[] {
|
|
113
|
+
const redeliver: PendingDelivery<M>[] = []
|
|
114
|
+
for (const entry of q.pending.values()) {
|
|
115
|
+
if (now - entry.lastAttemptAt < timeoutMs) continue
|
|
116
|
+
entry.lastAttemptAt = now
|
|
117
|
+
redeliver.push(entry)
|
|
118
|
+
}
|
|
119
|
+
return redeliver
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/** Forget a key without acking (e.g. the bridge went offline and the message
|
|
123
|
+
* was handed back to the offline buffer, which owns it now). */
|
|
124
|
+
export function forgetDelivery<M>(q: DeliveryQueue<M>, key: string): void {
|
|
125
|
+
q.pending.delete(key)
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Should this delivered inbound be tracked for ack/re-delivery?
|
|
130
|
+
*
|
|
131
|
+
* Track a delivery iff it is a fresh user turn that will produce exactly one
|
|
132
|
+
* `enqueue` to ack against. Everything that does NOT enqueue must be excluded,
|
|
133
|
+
* or the sweep re-delivers it forever (re-clearing the composer every cycle):
|
|
134
|
+
*
|
|
135
|
+
* - `isSteering` / `isInterrupt` — the #1556 gate's carve-outs: delivered
|
|
136
|
+
* mid-turn to AMEND the running turn, so they never start a fresh turn and
|
|
137
|
+
* never emit `enqueue`.
|
|
138
|
+
* - `hasSource` — synthetic inbounds (cron / vault-resume / subagent-handback
|
|
139
|
+
* / reaction) carry a `meta.source`; they enqueue under their own semantics
|
|
140
|
+
* and must never be tracked as if they were a queued user turn.
|
|
141
|
+
* - empty `effectiveText` — an empty body (e.g. `/queue` with no text) is
|
|
142
|
+
* silently dropped by claude's auto-submit and never enqueues, so tracking
|
|
143
|
+
* it is a pure re-delivery loop (a self-inflicted DoS on the queue).
|
|
144
|
+
*
|
|
145
|
+
* Mirror the gate's carve-outs here so tracking is exactly the set of messages
|
|
146
|
+
* that produce an `enqueue`.
|
|
147
|
+
*/
|
|
148
|
+
export function shouldTrackDelivery(input: {
|
|
149
|
+
isSteering: boolean
|
|
150
|
+
isInterrupt: boolean
|
|
151
|
+
hasSource?: boolean
|
|
152
|
+
effectiveText?: string
|
|
153
|
+
}): boolean {
|
|
154
|
+
if (input.isSteering || input.isInterrupt) return false
|
|
155
|
+
if (input.hasSource) return false
|
|
156
|
+
// Gate on empty text only when the caller actually provided it (undefined =
|
|
157
|
+
// "not supplied", left untracked-gated so existing callers keep their behaviour).
|
|
158
|
+
if (input.effectiveText !== undefined && input.effectiveText.trim().length === 0) return false
|
|
159
|
+
return true
|
|
160
|
+
}
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest'
|
|
2
|
+
|
|
3
|
+
import {
|
|
4
|
+
ackDelivery,
|
|
5
|
+
createDeliveryQueue,
|
|
6
|
+
forgetDelivery,
|
|
7
|
+
shouldTrackDelivery,
|
|
8
|
+
sweep,
|
|
9
|
+
trackDelivery,
|
|
10
|
+
type DeliveryQueue,
|
|
11
|
+
} from '../gateway/inbound-delivery-confirm.js'
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Regression coverage for the marko drop-wedge.
|
|
15
|
+
*
|
|
16
|
+
* An inbound delivered to claude's TUI composer strands unsubmitted when the
|
|
17
|
+
* auto-submit races turn-completion. claude never emits `enqueue`, so the
|
|
18
|
+
* gateway used to sit "typing…" for 300s then DROP the message.
|
|
19
|
+
*
|
|
20
|
+
* The queue's contract: a delivered inbound is acked ONLY by `enqueue`; until
|
|
21
|
+
* then it is re-delivered every `timeoutMs`, forever, never dropped — and an
|
|
22
|
+
* acked delivery never re-fires (no duplicate turns).
|
|
23
|
+
*/
|
|
24
|
+
type Msg = { text: string }
|
|
25
|
+
const TIMEOUT = 15_000
|
|
26
|
+
|
|
27
|
+
function fresh(): DeliveryQueue<Msg> {
|
|
28
|
+
return createDeliveryQueue<Msg>()
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
describe('inbound-delivery-confirm (reliable deliver-until-acked queue)', () => {
|
|
32
|
+
it('an acked delivery is never re-delivered (happy path — no duplicate turns)', () => {
|
|
33
|
+
const q = fresh()
|
|
34
|
+
trackDelivery(q, 'chat:_', { text: 'hi' }, 1_000)
|
|
35
|
+
expect(ackDelivery(q, 'chat:_')).toBe(true) // enqueue arrived
|
|
36
|
+
expect(sweep(q, 1_000 + 999_999, TIMEOUT)).toHaveLength(0)
|
|
37
|
+
expect(q.pending.size).toBe(0)
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
it('within the timeout, an un-acked delivery is left alone (claude may still be picking it up)', () => {
|
|
41
|
+
const q = fresh()
|
|
42
|
+
trackDelivery(q, 'chat:_', { text: 'hi' }, 1_000)
|
|
43
|
+
expect(sweep(q, 1_000 + 14_999, TIMEOUT)).toHaveLength(0)
|
|
44
|
+
expect(q.pending.size).toBe(1)
|
|
45
|
+
})
|
|
46
|
+
|
|
47
|
+
it('a strand (no ack) is re-delivered after the timeout, and the clock resets', () => {
|
|
48
|
+
const q = fresh()
|
|
49
|
+
trackDelivery(q, 'chat:_', { text: 'draft nurture email' }, 1_000)
|
|
50
|
+
const r = sweep(q, 1_000 + 15_000, TIMEOUT)
|
|
51
|
+
expect(r).toHaveLength(1)
|
|
52
|
+
expect(r[0]!.inbound.text).toBe('draft nurture email')
|
|
53
|
+
expect(r[0]!.lastAttemptAt).toBe(1_000 + 15_000) // clock reset
|
|
54
|
+
// not re-swept until another full timeout elapses
|
|
55
|
+
expect(sweep(q, 1_000 + 15_000 + 14_999, TIMEOUT)).toHaveLength(0)
|
|
56
|
+
})
|
|
57
|
+
|
|
58
|
+
it('keeps re-delivering forever until acked — never drops (the reliability invariant)', () => {
|
|
59
|
+
const q = fresh()
|
|
60
|
+
let t = 0
|
|
61
|
+
trackDelivery(q, 'chat:_', { text: 'x' }, t)
|
|
62
|
+
for (let i = 0; i < 50; i++) {
|
|
63
|
+
t += 15_000
|
|
64
|
+
expect(sweep(q, t, TIMEOUT)).toHaveLength(1) // still trying after 50 strands
|
|
65
|
+
}
|
|
66
|
+
expect(q.pending.size).toBe(1) // never dropped
|
|
67
|
+
// claude finally picks it up → acked → stops.
|
|
68
|
+
expect(ackDelivery(q, 'chat:_')).toBe(true)
|
|
69
|
+
expect(sweep(q, t + 999_999, TIMEOUT)).toHaveLength(0)
|
|
70
|
+
})
|
|
71
|
+
|
|
72
|
+
it('an ack that lands right after a re-delivery stops further re-delivery (no duplicate turns)', () => {
|
|
73
|
+
const q = fresh()
|
|
74
|
+
trackDelivery(q, 'chat:_', { text: 'x' }, 0)
|
|
75
|
+
sweep(q, 15_000, TIMEOUT) // strand → re-delivered
|
|
76
|
+
expect(ackDelivery(q, 'chat:_')).toBe(true) // the re-delivered copy landed
|
|
77
|
+
expect(sweep(q, 999_999, TIMEOUT)).toHaveLength(0)
|
|
78
|
+
expect(q.pending.size).toBe(0)
|
|
79
|
+
})
|
|
80
|
+
|
|
81
|
+
it('keys are independent — a strand on one topic does not affect another (DM + supergroup topics)', () => {
|
|
82
|
+
const q = fresh()
|
|
83
|
+
trackDelivery(q, '-100:4', { text: 'crm topic msg' }, 0) // supergroup CRM topic
|
|
84
|
+
trackDelivery(q, '555:_', { text: 'dm msg' }, 0) // a DM
|
|
85
|
+
ackDelivery(q, '555:_') // the DM submits fine
|
|
86
|
+
const r = sweep(q, 15_000, TIMEOUT)
|
|
87
|
+
expect(r).toHaveLength(1)
|
|
88
|
+
expect(r[0]!.key).toBe('-100:4') // only the stranded topic re-delivers
|
|
89
|
+
})
|
|
90
|
+
|
|
91
|
+
it('tracking the same key twice keeps only the latest inbound (gate serialises per key)', () => {
|
|
92
|
+
const q = fresh()
|
|
93
|
+
trackDelivery(q, 'chat:_', { text: 'first' }, 0)
|
|
94
|
+
trackDelivery(q, 'chat:_', { text: 'second' }, 100)
|
|
95
|
+
expect(q.pending.size).toBe(1)
|
|
96
|
+
expect(sweep(q, 100 + 15_000, TIMEOUT)[0]!.inbound.text).toBe('second')
|
|
97
|
+
})
|
|
98
|
+
|
|
99
|
+
it('ack on an unknown key is a harmless no-op', () => {
|
|
100
|
+
expect(ackDelivery(fresh(), 'never-tracked')).toBe(false)
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
it('forgetDelivery clears without acking or re-delivering (bridge went offline)', () => {
|
|
104
|
+
const q = fresh()
|
|
105
|
+
trackDelivery(q, 'chat:_', { text: 'x' }, 0)
|
|
106
|
+
forgetDelivery(q, 'chat:_')
|
|
107
|
+
expect(q.pending.size).toBe(0)
|
|
108
|
+
expect(sweep(q, 999_999, TIMEOUT)).toHaveLength(0)
|
|
109
|
+
})
|
|
110
|
+
})
|
|
111
|
+
|
|
112
|
+
// Regression for the cross-source ACK collision (silent drop): `enqueue` fires
|
|
113
|
+
// for EVERY turn start regardless of source. A synthetic-source turn (cron /
|
|
114
|
+
// resume / vault / reaction) that shares the chatKey of a real user message
|
|
115
|
+
// still waiting to land would, with a key-only ack, clear — and silently drop
|
|
116
|
+
// — that user message. The ack must match on the tracked message id.
|
|
117
|
+
describe('ackDelivery — message-id-matched (cross-source false-ack guard)', () => {
|
|
118
|
+
it('acks when the enqueue message id matches the tracked one', () => {
|
|
119
|
+
const q = fresh()
|
|
120
|
+
trackDelivery(q, 'chat:_', { text: 'real user msg' }, 0, '5001')
|
|
121
|
+
expect(ackDelivery(q, 'chat:_', '5001')).toBe(true)
|
|
122
|
+
expect(q.pending.size).toBe(0)
|
|
123
|
+
})
|
|
124
|
+
|
|
125
|
+
it('does NOT ack when a different (synthetic-source) turn enqueues under the same key', () => {
|
|
126
|
+
const q = fresh()
|
|
127
|
+
trackDelivery(q, 'chat:_', { text: 'real user msg' }, 0, '5001')
|
|
128
|
+
// a cron / resume turn for the same chat enqueues first, with its own id
|
|
129
|
+
expect(ackDelivery(q, 'chat:_', '1716123456789')).toBe(false)
|
|
130
|
+
// the user message is still tracked — it strands → gets re-delivered, not dropped
|
|
131
|
+
expect(q.pending.size).toBe(1)
|
|
132
|
+
expect(sweep(q, 15_000, TIMEOUT)).toHaveLength(1)
|
|
133
|
+
// and its own enqueue (matching id) later acks it cleanly
|
|
134
|
+
expect(ackDelivery(q, 'chat:_', '5001')).toBe(true)
|
|
135
|
+
expect(q.pending.size).toBe(0)
|
|
136
|
+
})
|
|
137
|
+
|
|
138
|
+
it('does NOT ack when the enqueue carries no message id but the tracked one has one', () => {
|
|
139
|
+
const q = fresh()
|
|
140
|
+
trackDelivery(q, 'chat:_', { text: 'real user msg' }, 0, '5001')
|
|
141
|
+
expect(ackDelivery(q, 'chat:_', null)).toBe(false)
|
|
142
|
+
expect(q.pending.size).toBe(1)
|
|
143
|
+
})
|
|
144
|
+
|
|
145
|
+
it('falls back to key-only ack when no message id was recorded (legacy/defensive)', () => {
|
|
146
|
+
const q = fresh()
|
|
147
|
+
trackDelivery(q, 'chat:_', { text: 'x' }, 0) // no messageId
|
|
148
|
+
expect(ackDelivery(q, 'chat:_', '5001')).toBe(true)
|
|
149
|
+
expect(q.pending.size).toBe(0)
|
|
150
|
+
})
|
|
151
|
+
})
|
|
152
|
+
|
|
153
|
+
// Regression for the steer/interrupt re-delivery loop: steering and `!`
|
|
154
|
+
// interrupt inbounds amend the running turn and never emit `enqueue`, so they
|
|
155
|
+
// must NOT be tracked (else the sweep re-delivers them forever). Only
|
|
156
|
+
// fresh-turn messages — which DO enqueue — are tracked.
|
|
157
|
+
describe('shouldTrackDelivery — only fresh-turn messages are tracked', () => {
|
|
158
|
+
it('tracks a normal (non-steering, non-interrupt) message', () => {
|
|
159
|
+
expect(shouldTrackDelivery({ isSteering: false, isInterrupt: false })).toBe(true)
|
|
160
|
+
})
|
|
161
|
+
it('does NOT track a /steer message (amends the turn — never acks)', () => {
|
|
162
|
+
expect(shouldTrackDelivery({ isSteering: true, isInterrupt: false })).toBe(false)
|
|
163
|
+
})
|
|
164
|
+
it('does NOT track a ! interrupt message (amends the turn — never acks)', () => {
|
|
165
|
+
expect(shouldTrackDelivery({ isSteering: false, isInterrupt: true })).toBe(false)
|
|
166
|
+
})
|
|
167
|
+
it('does NOT track when both flags set (defensive)', () => {
|
|
168
|
+
expect(shouldTrackDelivery({ isSteering: true, isInterrupt: true })).toBe(false)
|
|
169
|
+
})
|
|
170
|
+
it('does NOT track a synthetic (meta.source) inbound — cron/resume/vault/reaction enqueue under their own semantics', () => {
|
|
171
|
+
expect(shouldTrackDelivery({ isSteering: false, isInterrupt: false, hasSource: true })).toBe(false)
|
|
172
|
+
})
|
|
173
|
+
it('does NOT track an empty-body message (e.g. `/queue` with no text) — never enqueues, would re-deliver forever', () => {
|
|
174
|
+
expect(shouldTrackDelivery({ isSteering: false, isInterrupt: false, effectiveText: '' })).toBe(false)
|
|
175
|
+
expect(shouldTrackDelivery({ isSteering: false, isInterrupt: false, effectiveText: ' ' })).toBe(false)
|
|
176
|
+
})
|
|
177
|
+
it('tracks a normal message when effectiveText is provided and non-empty', () => {
|
|
178
|
+
expect(shouldTrackDelivery({ isSteering: false, isInterrupt: false, effectiveText: 'draft the email' })).toBe(true)
|
|
179
|
+
})
|
|
180
|
+
})
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* E2E regression — inbound is NEVER dropped under rapid fire (the drop-wedge).
|
|
3
|
+
*
|
|
4
|
+
* The bug: a Telegram inbound reaches claude as an MCP channel notification
|
|
5
|
+
* the unmodified CLI appends to its composer and auto-submits only when the
|
|
6
|
+
* composer is empty + idle. A message arriving the instant the prior turn
|
|
7
|
+
* completes races that auto-submit and strands unsubmitted — claude never
|
|
8
|
+
* starts the turn, the gateway sits "typing…", and the 300s silence-poke
|
|
9
|
+
* DROPS the message. Observed recurring on `marko` (supergroup topics + DMs).
|
|
10
|
+
*
|
|
11
|
+
* This scenario drives the exact failure timing: it fires each message the
|
|
12
|
+
* instant the prior reply lands (i.e. right at turn-completion, the strand
|
|
13
|
+
* window) and asserts EVERY message gets its own unique token back. With the
|
|
14
|
+
* deliver-until-acked queue (inbound-delivery-confirm.ts) a strand self-heals
|
|
15
|
+
* via re-delivery; without it, a stranded message yields NO reply within the
|
|
16
|
+
* timeout and this test fails on exactly the message that was swallowed.
|
|
17
|
+
*
|
|
18
|
+
* Each message carries a random token and the assertion matches THAT token,
|
|
19
|
+
* so a reply to message N-1 can never be mistaken for message N's reply — the
|
|
20
|
+
* test proves every distinct message was actually processed, not merely that
|
|
21
|
+
* "some replies came back".
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
import { describe, it, expect } from "vitest";
|
|
25
|
+
import { spinUp } from "../harness.js";
|
|
26
|
+
|
|
27
|
+
describe("uat: rapid-fire inbound — no message is ever dropped (drop-wedge)", () => {
|
|
28
|
+
it(
|
|
29
|
+
"every back-to-back message gets its own reply (delivery never strands)",
|
|
30
|
+
async () => {
|
|
31
|
+
const sc = await spinUp({ agent: "test-harness" });
|
|
32
|
+
try {
|
|
33
|
+
const N = 8;
|
|
34
|
+
const dropped: number[] = [];
|
|
35
|
+
for (let i = 1; i <= N; i++) {
|
|
36
|
+
const token = `acktok-${i}-${Math.random().toString(36).slice(2, 8)}`;
|
|
37
|
+
await sc.sendDM(
|
|
38
|
+
`Reply with exactly this token and nothing else: ${token}`,
|
|
39
|
+
);
|
|
40
|
+
try {
|
|
41
|
+
const reply = await sc.expectMessage((m) => m.text.includes(token), {
|
|
42
|
+
from: "bot",
|
|
43
|
+
timeout: 75_000,
|
|
44
|
+
});
|
|
45
|
+
expect(reply.text).toContain(token);
|
|
46
|
+
} catch {
|
|
47
|
+
// The message stranded — no reply carrying its token arrived.
|
|
48
|
+
dropped.push(i);
|
|
49
|
+
}
|
|
50
|
+
// Deliberately NO delay: fire the next message the instant this
|
|
51
|
+
// reply lands, so it arrives in the turn-completion strand window.
|
|
52
|
+
}
|
|
53
|
+
expect(
|
|
54
|
+
dropped,
|
|
55
|
+
`messages dropped (no reply within timeout): ${dropped.join(", ")} of ${N}`,
|
|
56
|
+
).toEqual([]);
|
|
57
|
+
} finally {
|
|
58
|
+
await sc.tearDown();
|
|
59
|
+
}
|
|
60
|
+
},
|
|
61
|
+
// N messages × (turn + up to one ~15-20s strand recovery) — generous.
|
|
62
|
+
900_000,
|
|
63
|
+
);
|
|
64
|
+
});
|
|
@@ -55,8 +55,15 @@ describe("uat: rapid follow-ups — steering vs queued classification", () => {
|
|
|
55
55
|
(m) => {
|
|
56
56
|
const txt = m.text;
|
|
57
57
|
const mentionsMd5 = /\bmd5\b/i.test(txt);
|
|
58
|
+
// Steer narration: the agent acknowledges amending the in-flight
|
|
59
|
+
// task. Accept the phrasings the model actually uses — including
|
|
60
|
+
// "Switched to MD5 per your update/follow-up" (the 2026-06-02
|
|
61
|
+
// canary reply that the old regex wrongly rejected). Anchored on
|
|
62
|
+
// "per your <qualifier>" / continuation language so it stays
|
|
63
|
+
// distinct from the QUEUED path (a fresh answer with no such
|
|
64
|
+
// course-correction narration).
|
|
58
65
|
const narratesSteer =
|
|
59
|
-
/↪️|\bsteer(ing)?\b|continuing the (prior|original|in-flight) task|amendment|course[- ]correct/i.test(
|
|
66
|
+
/↪️|\bsteer(ing)?\b|switch(?:ed|ing)? to \w+ per your (?:update|follow-?up|guidance|request|steer)|continuing the (prior|original|in-flight) task|amendment|course[- ]correct/i.test(
|
|
60
67
|
txt,
|
|
61
68
|
);
|
|
62
69
|
return mentionsMd5 && narratesSteer;
|