switchroom 0.13.20 → 0.13.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +2 -2
- package/package.json +1 -1
- package/profiles/_shared/telegram-style.md.hbs +3 -3
- package/telegram-plugin/dist/gateway/gateway.js +87 -25
- package/telegram-plugin/gateway/disconnect-flush.ts +37 -0
- package/telegram-plugin/gateway/gateway.ts +100 -7
- package/telegram-plugin/gateway/inbound-delivery-gate.ts +37 -4
- package/telegram-plugin/handoff-continuity.ts +8 -2
- package/telegram-plugin/recent-outbound-dedup.ts +51 -5
- package/telegram-plugin/runtime-metrics.ts +5 -1
- package/telegram-plugin/subagent-watcher.ts +25 -3
- package/telegram-plugin/tests/gateway-disconnect-flush.test.ts +114 -0
- package/telegram-plugin/tests/handoff-continuity.test.ts +15 -2
- package/telegram-plugin/tests/inbound-delivery-gate.test.ts +77 -4
- package/telegram-plugin/tests/recent-outbound-dedup.test.ts +72 -0
- package/telegram-plugin/tests/subagent-watcher-enoent-deregister.test.ts +152 -0
- package/telegram-plugin/uat/scenarios/jtbd-rapid-followup-dm.test.ts +72 -45
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* (written by the summarizer Stop hook). On the FIRST assistant reply
|
|
6
6
|
* of the new session the plugin prepends a subtle one-liner:
|
|
7
7
|
*
|
|
8
|
-
* ↩️ Picked up where we left off
|
|
8
|
+
* ↩️ Picked up where we left off, <topic>
|
|
9
9
|
*
|
|
10
10
|
* The sidecar is consumed (read + deleted) so the line only fires once.
|
|
11
11
|
* All helpers here are filesystem-only or env-only — no Telegram side
|
|
@@ -175,7 +175,13 @@ export function formatHandoffLine(
|
|
|
175
175
|
topic: string,
|
|
176
176
|
format: HandoffFormat,
|
|
177
177
|
): string {
|
|
178
|
-
|
|
178
|
+
// Comma instead of em-dash: the framework-emitted prefix is
|
|
179
|
+
// concatenated AFTER scrubVoice runs on the model body (gateway.ts
|
|
180
|
+
// executeReply), so any em-dash here bypasses the v0.13.20 voice
|
|
181
|
+
// scrub. Replacing at the template source is one mechanical change
|
|
182
|
+
// that closes the dominant residual em-dash leak (16 of 17 dashed
|
|
183
|
+
// messages on test-harness were this template per 2026-05-24 audit).
|
|
184
|
+
const prefix = "↩️ Picked up where we left off, ";
|
|
179
185
|
if (format === "html") {
|
|
180
186
|
return `<i>${prefix}${escapeHtml(topic)}</i>\n\n`;
|
|
181
187
|
}
|
|
@@ -57,6 +57,16 @@ interface DedupEntry {
|
|
|
57
57
|
/** First 80 chars of the original (un-normalized) text — for
|
|
58
58
|
* operator-facing log lines that show what got deduped. */
|
|
59
59
|
preview: string
|
|
60
|
+
/** The `currentTurn.registryKey` at record time, or `null` if the
|
|
61
|
+
* recording site had no turn context. Threaded through so check()
|
|
62
|
+
* can distinguish within-turn retries (#546 bug class — keep
|
|
63
|
+
* protecting) from cross-turn coincidences (2026-05-23 audit found
|
|
64
|
+
* identical mid-turn + final replies across two turns ~30s apart
|
|
65
|
+
* silently swallowing the second turn's answer; the user gets
|
|
66
|
+
* no response to their second question). Null on either side
|
|
67
|
+
* matches as before, preserving the boot-time / edge-case behaviour
|
|
68
|
+
* the original tests pin. */
|
|
69
|
+
turnKey: string | null
|
|
60
70
|
}
|
|
61
71
|
|
|
62
72
|
/**
|
|
@@ -75,8 +85,21 @@ export class OutboundDedupCache {
|
|
|
75
85
|
/** Record an outbound message. Caller should invoke this after a
|
|
76
86
|
* successful send, regardless of which path sent it (turn-flush,
|
|
77
87
|
* executeReply, executeStreamReply, etc.). Short content is not
|
|
78
|
-
* recorded — see DEDUP_MIN_CONTENT_LEN.
|
|
79
|
-
|
|
88
|
+
* recorded — see DEDUP_MIN_CONTENT_LEN.
|
|
89
|
+
*
|
|
90
|
+
* `turnKey` lets check() tell within-turn retries (the #546 race
|
|
91
|
+
* this module exists to catch) apart from cross-turn coincidences
|
|
92
|
+
* (a user asking similar questions in different turns). Pass
|
|
93
|
+
* `null` if the recording site has no turn context — that matches
|
|
94
|
+
* legacy behaviour and is what the early-boot / fallback callers
|
|
95
|
+
* pass. */
|
|
96
|
+
record(
|
|
97
|
+
chatId: string,
|
|
98
|
+
threadId: number | undefined,
|
|
99
|
+
text: string,
|
|
100
|
+
now: number,
|
|
101
|
+
turnKey: string | null = null,
|
|
102
|
+
): void {
|
|
80
103
|
if (text.length < DEDUP_MIN_CONTENT_LEN) return
|
|
81
104
|
const key = makeKey(chatId, threadId)
|
|
82
105
|
const list = this.entries.get(key) ?? []
|
|
@@ -85,6 +108,7 @@ export class OutboundDedupCache {
|
|
|
85
108
|
hash: normalizeForDedup(text),
|
|
86
109
|
ts: now,
|
|
87
110
|
preview: text.slice(0, 80),
|
|
111
|
+
turnKey,
|
|
88
112
|
})
|
|
89
113
|
this.entries.set(key, list)
|
|
90
114
|
}
|
|
@@ -92,12 +116,24 @@ export class OutboundDedupCache {
|
|
|
92
116
|
/** Check whether the given text was already sent recently to the
|
|
93
117
|
* same chat. Returns the matched entry's preview + age on hit, or
|
|
94
118
|
* null on miss. Caller decides what to do with the answer
|
|
95
|
-
* (skip-send, log, etc.).
|
|
119
|
+
* (skip-send, log, etc.).
|
|
120
|
+
*
|
|
121
|
+
* Cross-turn carve-out (2026-05-23 fix): when both sides of a hash
|
|
122
|
+
* match carry non-null `turnKey` AND those keys differ, treat as
|
|
123
|
+
* miss. The duplicate-reply race this module was built for (#546)
|
|
124
|
+
* is strictly within-turn (the same turn's buffered text replays
|
|
125
|
+
* via a stream_reply retry), so within-turn retries continue to
|
|
126
|
+
* hit. A user typing two similar prompts back-to-back used to lose
|
|
127
|
+
* the second turn's reply because the hashes collided across
|
|
128
|
+
* turns; that no longer happens. Null on EITHER side (legacy /
|
|
129
|
+
* no-turn-context callers) still matches — preserves backward
|
|
130
|
+
* compatibility with the original test suite + early-boot paths. */
|
|
96
131
|
check(
|
|
97
132
|
chatId: string,
|
|
98
133
|
threadId: number | undefined,
|
|
99
134
|
text: string,
|
|
100
135
|
now: number,
|
|
136
|
+
turnKey: string | null = null,
|
|
101
137
|
): { matched: true; preview: string; ageMs: number } | null {
|
|
102
138
|
if (text.length < DEDUP_MIN_CONTENT_LEN) return null
|
|
103
139
|
const key = makeKey(chatId, threadId)
|
|
@@ -106,9 +142,19 @@ export class OutboundDedupCache {
|
|
|
106
142
|
this.evict(list, now)
|
|
107
143
|
const candidateHash = normalizeForDedup(text)
|
|
108
144
|
for (const entry of list) {
|
|
109
|
-
if (entry.hash
|
|
110
|
-
|
|
145
|
+
if (entry.hash !== candidateHash) continue
|
|
146
|
+
// Cross-turn carve-out: distinct, non-null turnKeys on both
|
|
147
|
+
// sides ⇒ different turns ⇒ not a #546 retry. Skip past this
|
|
148
|
+
// entry and keep scanning (a same-turn match later in the list
|
|
149
|
+
// should still hit).
|
|
150
|
+
if (
|
|
151
|
+
turnKey != null
|
|
152
|
+
&& entry.turnKey != null
|
|
153
|
+
&& entry.turnKey !== turnKey
|
|
154
|
+
) {
|
|
155
|
+
continue
|
|
111
156
|
}
|
|
157
|
+
return { matched: true, preview: entry.preview, ageMs: now - entry.ts }
|
|
112
158
|
}
|
|
113
159
|
return null
|
|
114
160
|
}
|
|
@@ -158,7 +158,11 @@ export type RuntimeMetricEvent =
|
|
|
158
158
|
kind: 'voice_scrub_applied'
|
|
159
159
|
chatKey: string
|
|
160
160
|
replaced: number
|
|
161
|
-
|
|
161
|
+
// `stream_reply` and `turn_flush` added in v0.13.21 — modern
|
|
162
|
+
// Claude routes most multi-paragraph replies through the
|
|
163
|
+
// answer-stream / draft-stream path, bypassing the v0.13.20
|
|
164
|
+
// executeReply scrub site. The two new sites close that gap.
|
|
165
|
+
site: 'reply' | 'edit_message' | 'progress_update' | 'answer_stream' | 'stream_reply' | 'turn_flush'
|
|
162
166
|
}
|
|
163
167
|
|
|
164
168
|
/**
|
|
@@ -459,7 +459,10 @@ function backfillJsonlAgentId(
|
|
|
459
459
|
log?.(`subagent-watcher: backfill linked ${agentId} → ${candidate.id}`)
|
|
460
460
|
}
|
|
461
461
|
|
|
462
|
-
|
|
462
|
+
// Exported for unit-testing the ENOENT/EACCES deregister path
|
|
463
|
+
// (telegram-plugin/tests/subagent-watcher-enoent-deregister.test.ts).
|
|
464
|
+
// Not intended for consumption by other modules.
|
|
465
|
+
export function readSubTail(
|
|
463
466
|
entry: WorkerEntry,
|
|
464
467
|
tail: SubTail,
|
|
465
468
|
now: number,
|
|
@@ -472,6 +475,14 @@ function readSubTail(
|
|
|
472
475
|
* previously-stalled entry. Closes the resume edge the schema doc
|
|
473
476
|
* has always promised. */
|
|
474
477
|
onUnstall?: (agentId: string, description: string) => void,
|
|
478
|
+
/** Fires when the JSONL file is no longer accessible (ENOENT — file
|
|
479
|
+
* reaped by Claude Code when the parent session ends; EACCES —
|
|
480
|
+
* permission change mid-poll). The caller deregisters the entry so
|
|
481
|
+
* the 1s poll loop stops re-statting a dead path. Without this
|
|
482
|
+
* callback, every poll re-emits the error log line — on 2026-05-23
|
|
483
|
+
* the clerk agent logged 540k ENOENT lines in 3 days (30/sec
|
|
484
|
+
* sustained) AND leaked one fs.watch FD per stranded entry. */
|
|
485
|
+
onFileVanished?: (agentId: string, code: 'ENOENT' | 'EACCES') => void,
|
|
475
486
|
): void {
|
|
476
487
|
try {
|
|
477
488
|
const stat = fs.statSync(entry.filePath)
|
|
@@ -639,6 +650,17 @@ function readSubTail(
|
|
|
639
650
|
}
|
|
640
651
|
tail.hasEmittedStart = startState.hasEmittedStart
|
|
641
652
|
} catch (err) {
|
|
653
|
+
const code = (err as NodeJS.ErrnoException).code
|
|
654
|
+
if (code === 'ENOENT' || code === 'EACCES') {
|
|
655
|
+
// JSONL is gone (Claude Code reaped the parent session's
|
|
656
|
+
// subagents/ dir) or permission flipped under us. Deregister the
|
|
657
|
+
// entry so the periodic poll stops re-emitting this same line
|
|
658
|
+
// forever. Logged ONCE per agent — operators can still audit
|
|
659
|
+
// which entries got reaped without 30 lines/sec of noise.
|
|
660
|
+
log?.(`subagent-watcher: JSONL vanished for ${entry.agentId} (${code}) — deregistering`)
|
|
661
|
+
onFileVanished?.(entry.agentId, code)
|
|
662
|
+
return
|
|
663
|
+
}
|
|
642
664
|
log?.(`subagent-watcher: read error ${entry.agentId}: ${(err as Error).message}`)
|
|
643
665
|
}
|
|
644
666
|
}
|
|
@@ -841,7 +863,7 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
|
|
|
841
863
|
if (!entry || !t) return
|
|
842
864
|
readSubTail(entry, t, nowFn(), (desc) => {
|
|
843
865
|
log?.(`subagent-watcher: description updated for ${agentId}: ${desc}`)
|
|
844
|
-
}, fs, log, db, parentStateDir, config.onUnstall)
|
|
866
|
+
}, fs, log, db, parentStateDir, config.onUnstall, cleanupTerminalAgent)
|
|
845
867
|
maybySendStateTransition(agentId)
|
|
846
868
|
})
|
|
847
869
|
} catch (err) {
|
|
@@ -1179,7 +1201,7 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
|
|
|
1179
1201
|
if (!tail) continue
|
|
1180
1202
|
readSubTail(entry, tail, n, (desc) => {
|
|
1181
1203
|
log?.(`subagent-watcher: description updated for ${agentId}: ${desc}`)
|
|
1182
|
-
}, fs, log, db, parentStateDir, config.onUnstall)
|
|
1204
|
+
}, fs, log, db, parentStateDir, config.onUnstall, cleanupTerminalAgent)
|
|
1183
1205
|
maybySendStateTransition(agentId)
|
|
1184
1206
|
}
|
|
1185
1207
|
|
|
@@ -142,3 +142,117 @@ describe('flushOnAgentDisconnect — registered agent disconnects (existing beha
|
|
|
142
142
|
expect(deps.activeDraftParseModes.size).toBe(0)
|
|
143
143
|
})
|
|
144
144
|
})
|
|
145
|
+
|
|
146
|
+
describe('flushOnAgentDisconnect — dangling-turn sweep (2026-05-23 wedge fix)', () => {
|
|
147
|
+
// The race that motivates this: the canonical reply path fires
|
|
148
|
+
// `setDone()` on the StatusReactionController BEFORE purgeReactionTracking
|
|
149
|
+
// runs `activeTurnStartedAt.delete(key)`. If the bridge crashes between
|
|
150
|
+
// those two steps, the controller loop sees an EMPTY activeStatusReactions
|
|
151
|
+
// (already cleared by setDone) but activeTurnStartedAt still has the key.
|
|
152
|
+
// Without the sweep, that key orphans and the next inbound is "held mid-
|
|
153
|
+
// turn" against a ghost.
|
|
154
|
+
|
|
155
|
+
it('sweeps activeTurnStartedAt keys the controller loop missed', () => {
|
|
156
|
+
// Construct the exact race: activeStatusReactions is EMPTY (setDone
|
|
157
|
+
// already cleared it on the reply path) but activeTurnStartedAt still
|
|
158
|
+
// has an entry.
|
|
159
|
+
const onDanglingTurnsSwept = vi.fn()
|
|
160
|
+
const clearActiveReactions = vi.fn()
|
|
161
|
+
const disposeProgressDriver = vi.fn()
|
|
162
|
+
const log = vi.fn()
|
|
163
|
+
const deps = {
|
|
164
|
+
agentName: 'clerk',
|
|
165
|
+
activeStatusReactions: new Map<string, FakeCtrl>(),
|
|
166
|
+
activeReactionMsgIds: new Map<string, { chatId: string; messageId: number }>([
|
|
167
|
+
['ghost:thr:msg', { chatId: 'ghost', messageId: 42 }],
|
|
168
|
+
]),
|
|
169
|
+
activeTurnStartedAt: new Map<string, number>([['ghost:thr:msg', 100]]),
|
|
170
|
+
activeDraftStreams: new Map<string, FakeStream>(),
|
|
171
|
+
activeDraftParseModes: new Map<string, 'HTML' | 'MarkdownV2' | undefined>(),
|
|
172
|
+
clearActiveReactions,
|
|
173
|
+
disposeProgressDriver,
|
|
174
|
+
onDanglingTurnsSwept,
|
|
175
|
+
log,
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
flushOnAgentDisconnect(deps)
|
|
179
|
+
|
|
180
|
+
// The sweep fired and cleared the dangling entry.
|
|
181
|
+
expect(deps.activeTurnStartedAt.size).toBe(0)
|
|
182
|
+
expect(deps.activeReactionMsgIds.size).toBe(0)
|
|
183
|
+
expect(onDanglingTurnsSwept).toHaveBeenCalledTimes(1)
|
|
184
|
+
expect(onDanglingTurnsSwept.mock.calls[0][0]).toEqual(['ghost:thr:msg'])
|
|
185
|
+
// The log line names what happened so the operator can audit.
|
|
186
|
+
expect(
|
|
187
|
+
log.mock.calls.some((c: unknown[]) =>
|
|
188
|
+
typeof c[0] === 'string' && /swept .* dangling turn/.test(c[0]),
|
|
189
|
+
),
|
|
190
|
+
).toBe(true)
|
|
191
|
+
})
|
|
192
|
+
|
|
193
|
+
it('does not fire the sweep when the controller loop already cleaned up everything', () => {
|
|
194
|
+
// Normal-path disconnect: activeStatusReactions had entries, the
|
|
195
|
+
// controller loop ran setDone + delete on each, activeTurnStartedAt
|
|
196
|
+
// is already empty by the end of the loop. No dangling to sweep.
|
|
197
|
+
const { spies, deps } = makeDeps('clerk')
|
|
198
|
+
const onDanglingTurnsSwept = vi.fn()
|
|
199
|
+
const depsWithCallback = { ...deps, onDanglingTurnsSwept }
|
|
200
|
+
|
|
201
|
+
flushOnAgentDisconnect(depsWithCallback)
|
|
202
|
+
|
|
203
|
+
// Controller loop already cleaned both entries.
|
|
204
|
+
expect(deps.activeTurnStartedAt.size).toBe(0)
|
|
205
|
+
// Callback NOT fired — nothing left to sweep after the loop.
|
|
206
|
+
expect(onDanglingTurnsSwept).not.toHaveBeenCalled()
|
|
207
|
+
// Regression: the existing setDone path still works.
|
|
208
|
+
expect(spies.setDoneA).toHaveBeenCalledTimes(1)
|
|
209
|
+
expect(spies.setDoneB).toHaveBeenCalledTimes(1)
|
|
210
|
+
})
|
|
211
|
+
|
|
212
|
+
it('does NOT sweep for anonymous disconnects (no agent registered)', () => {
|
|
213
|
+
// Critical regression guard: the sweep MUST be gated by the
|
|
214
|
+
// agentName-null early-return. Anonymous one-shot IPC clients
|
|
215
|
+
// (recall.py, etc.) disconnect constantly and must never touch
|
|
216
|
+
// turn state.
|
|
217
|
+
const onDanglingTurnsSwept = vi.fn()
|
|
218
|
+
const deps = {
|
|
219
|
+
agentName: null,
|
|
220
|
+
activeStatusReactions: new Map<string, FakeCtrl>(),
|
|
221
|
+
activeReactionMsgIds: new Map<string, { chatId: string; messageId: number }>(),
|
|
222
|
+
activeTurnStartedAt: new Map<string, number>([['real-turn:thr:msg', 100]]),
|
|
223
|
+
activeDraftStreams: new Map<string, FakeStream>(),
|
|
224
|
+
activeDraftParseModes: new Map<string, 'HTML' | 'MarkdownV2' | undefined>(),
|
|
225
|
+
clearActiveReactions: vi.fn(),
|
|
226
|
+
disposeProgressDriver: vi.fn(),
|
|
227
|
+
onDanglingTurnsSwept,
|
|
228
|
+
log: vi.fn(),
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
flushOnAgentDisconnect(deps)
|
|
232
|
+
|
|
233
|
+
// Anonymous disconnect: turn state preserved, sweep callback not fired.
|
|
234
|
+
expect(deps.activeTurnStartedAt.size).toBe(1)
|
|
235
|
+
expect(onDanglingTurnsSwept).not.toHaveBeenCalled()
|
|
236
|
+
})
|
|
237
|
+
|
|
238
|
+
it('omitting onDanglingTurnsSwept is safe (optional callback)', () => {
|
|
239
|
+
// Backward-compat guard — existing callers that don't pass the new
|
|
240
|
+
// callback still work without runtime error.
|
|
241
|
+
const deps = {
|
|
242
|
+
agentName: 'clerk',
|
|
243
|
+
activeStatusReactions: new Map<string, FakeCtrl>(),
|
|
244
|
+
activeReactionMsgIds: new Map<string, { chatId: string; messageId: number }>(),
|
|
245
|
+
activeTurnStartedAt: new Map<string, number>([['ghost:thr:msg', 100]]),
|
|
246
|
+
activeDraftStreams: new Map<string, FakeStream>(),
|
|
247
|
+
activeDraftParseModes: new Map<string, 'HTML' | 'MarkdownV2' | undefined>(),
|
|
248
|
+
clearActiveReactions: vi.fn(),
|
|
249
|
+
disposeProgressDriver: vi.fn(),
|
|
250
|
+
// onDanglingTurnsSwept intentionally omitted.
|
|
251
|
+
log: vi.fn(),
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
expect(() => flushOnAgentDisconnect(deps)).not.toThrow()
|
|
255
|
+
// The sweep still happens, just without the callback observation.
|
|
256
|
+
expect(deps.activeTurnStartedAt.size).toBe(0)
|
|
257
|
+
})
|
|
258
|
+
})
|
|
@@ -219,7 +219,7 @@ describe("shouldShowHandoffLine", () => {
|
|
|
219
219
|
describe("formatHandoffLine", () => {
|
|
220
220
|
it("wraps the topic in italic HTML with the return emoji", () => {
|
|
221
221
|
const line = formatHandoffLine("fixing the bug", "html");
|
|
222
|
-
expect(line).toBe("<i>↩️ Picked up where we left off
|
|
222
|
+
expect(line).toBe("<i>↩️ Picked up where we left off, fixing the bug</i>\n\n");
|
|
223
223
|
});
|
|
224
224
|
|
|
225
225
|
it("escapes HTML-unsafe chars in the topic", () => {
|
|
@@ -238,7 +238,7 @@ describe("formatHandoffLine", () => {
|
|
|
238
238
|
|
|
239
239
|
it("produces plain text for 'text' format", () => {
|
|
240
240
|
const line = formatHandoffLine("simple", "text");
|
|
241
|
-
expect(line).toBe("↩️ Picked up where we left off
|
|
241
|
+
expect(line).toBe("↩️ Picked up where we left off, simple\n\n");
|
|
242
242
|
});
|
|
243
243
|
|
|
244
244
|
it("always ends with a blank-line separator", () => {
|
|
@@ -246,4 +246,17 @@ describe("formatHandoffLine", () => {
|
|
|
246
246
|
expect(formatHandoffLine("t", fmt).endsWith("\n\n")).toBe(true);
|
|
247
247
|
}
|
|
248
248
|
});
|
|
249
|
+
|
|
250
|
+
// Regression guard: the handoff prefix was an em-dash bypass for the
|
|
251
|
+
// v0.13.20 voice scrubber (the framework prefix is concatenated AFTER
|
|
252
|
+
// scrubVoice runs in executeReply). Replacing the em-dash with a
|
|
253
|
+
// comma at the template source closes that leak. Pin it so a future
|
|
254
|
+
// operator who "fixes typography" doesn't re-introduce the dash.
|
|
255
|
+
it("does NOT contain an em-dash or en-dash in any format (voice-scrub guard)", () => {
|
|
256
|
+
for (const fmt of ["html", "markdownv2", "text"] as const) {
|
|
257
|
+
const line = formatHandoffLine("anything goes here", fmt);
|
|
258
|
+
expect(line).not.toContain("—");
|
|
259
|
+
expect(line).not.toContain("–");
|
|
260
|
+
}
|
|
261
|
+
});
|
|
249
262
|
});
|
|
@@ -41,13 +41,86 @@ describe('decideInboundDelivery', () => {
|
|
|
41
41
|
).toBe('deliver')
|
|
42
42
|
})
|
|
43
43
|
|
|
44
|
-
it('is total: the ONLY deferral path is mid-turn AND not steering', () => {
|
|
44
|
+
it('is total: the ONLY deferral path is mid-turn AND not steering AND not interrupt', () => {
|
|
45
45
|
for (const turnInFlight of [true, false]) {
|
|
46
46
|
for (const isSteering of [true, false]) {
|
|
47
|
-
const
|
|
48
|
-
|
|
49
|
-
|
|
47
|
+
for (const isInterrupt of [true, false]) {
|
|
48
|
+
const decision = decideInboundDelivery({ turnInFlight, isSteering, isInterrupt })
|
|
49
|
+
const expectBuffer = turnInFlight && !isSteering && !isInterrupt
|
|
50
|
+
expect(decision).toBe(expectBuffer ? 'buffer-until-idle' : 'deliver')
|
|
51
|
+
}
|
|
50
52
|
}
|
|
51
53
|
}
|
|
52
54
|
})
|
|
55
|
+
|
|
56
|
+
// ─── Interrupt-marker carve-out (2026-05-24 fix for the stranded-body bug) ──
|
|
57
|
+
// Live UAT trace: user fires `! actually do X` mid-turn. SIGINT delivered
|
|
58
|
+
// to claude via tmux send-keys. The killed turn does NOT emit
|
|
59
|
+
// turn_complete in many cases (mid-tool-call kill, in-flight subagent),
|
|
60
|
+
// so the post-`!` body sits in pendingInboundBuffer forever — the
|
|
61
|
+
// turn-complete drain trigger never fires. The user never gets a reply
|
|
62
|
+
// to their replacement instruction.
|
|
63
|
+
//
|
|
64
|
+
// The carve-out is a peer of isSteering: an interrupt body is by
|
|
65
|
+
// definition an intentional mid-turn delivery — the user explicitly
|
|
66
|
+
// asked for "stop and do this instead".
|
|
67
|
+
describe('interrupt-marker carve-out', () => {
|
|
68
|
+
it('delivers a `!`-interrupt body mid-turn (does NOT buffer)', () => {
|
|
69
|
+
// The headline regression fix. Without the carve-out the killed turn
|
|
70
|
+
// strands the body indefinitely.
|
|
71
|
+
expect(
|
|
72
|
+
decideInboundDelivery({
|
|
73
|
+
turnInFlight: true,
|
|
74
|
+
isSteering: false,
|
|
75
|
+
isInterrupt: true,
|
|
76
|
+
}),
|
|
77
|
+
).toBe('deliver')
|
|
78
|
+
})
|
|
79
|
+
|
|
80
|
+
it('delivers a `!`-interrupt body even when claude is idle (no harm)', () => {
|
|
81
|
+
expect(
|
|
82
|
+
decideInboundDelivery({
|
|
83
|
+
turnInFlight: false,
|
|
84
|
+
isSteering: false,
|
|
85
|
+
isInterrupt: true,
|
|
86
|
+
}),
|
|
87
|
+
).toBe('deliver')
|
|
88
|
+
})
|
|
89
|
+
|
|
90
|
+
it('isInterrupt is optional — omitting it preserves legacy behavior', () => {
|
|
91
|
+
// Backward-compat for callers that haven't been updated yet. Mirrors
|
|
92
|
+
// the optional-default pattern used in other gateway predicates this
|
|
93
|
+
// session (silent-reply-anchor wasOverPingSuppressed, recent-outbound-
|
|
94
|
+
// dedup turnKey).
|
|
95
|
+
expect(
|
|
96
|
+
decideInboundDelivery({ turnInFlight: true, isSteering: false }),
|
|
97
|
+
).toBe('buffer-until-idle')
|
|
98
|
+
expect(
|
|
99
|
+
decideInboundDelivery({ turnInFlight: false, isSteering: false }),
|
|
100
|
+
).toBe('deliver')
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
it('explicit isInterrupt:false is identical to omitting it', () => {
|
|
104
|
+
expect(
|
|
105
|
+
decideInboundDelivery({
|
|
106
|
+
turnInFlight: true,
|
|
107
|
+
isSteering: false,
|
|
108
|
+
isInterrupt: false,
|
|
109
|
+
}),
|
|
110
|
+
).toBe('buffer-until-idle')
|
|
111
|
+
})
|
|
112
|
+
|
|
113
|
+
it('interrupt + steering combination delivers (both are exempt paths)', () => {
|
|
114
|
+
// Pathological prompt: `! /steer change tactics`. parseInterruptMarker
|
|
115
|
+
// strips the `!`, then steering parse sees `/steer`. Either flag
|
|
116
|
+
// alone delivers; both together still deliver. No regression.
|
|
117
|
+
expect(
|
|
118
|
+
decideInboundDelivery({
|
|
119
|
+
turnInFlight: true,
|
|
120
|
+
isSteering: true,
|
|
121
|
+
isInterrupt: true,
|
|
122
|
+
}),
|
|
123
|
+
).toBe('deliver')
|
|
124
|
+
})
|
|
125
|
+
})
|
|
53
126
|
})
|
|
@@ -190,3 +190,75 @@ describe('OutboundDedupCache — multiple entries per chat', () => {
|
|
|
190
190
|
expect(cache.check('chat', undefined, LONG_HTML, 6000)).not.toBeNull()
|
|
191
191
|
})
|
|
192
192
|
})
|
|
193
|
+
|
|
194
|
+
// ─── turnKey carve-out (2026-05-23 cross-turn-swallow fix) ───────────────────
|
|
195
|
+
// Without turnKey awareness, the 60s TTL eats the SECOND turn's reply when a
|
|
196
|
+
// user asks similar questions back-to-back (forensic audit on midturn-silent
|
|
197
|
+
// UAT). The carve-out: both sides non-null + distinct ⇒ treat as miss.
|
|
198
|
+
// Within-turn (#546 retry race) protection unchanged: same turnKey on both
|
|
199
|
+
// sides ⇒ legacy hit. Null on either side ⇒ legacy hit.
|
|
200
|
+
|
|
201
|
+
const LONG_TEXT = 'long enough text to count as content for the dedup floor'
|
|
202
|
+
|
|
203
|
+
describe('OutboundDedupCache — turnKey carve-out', () => {
|
|
204
|
+
it('cross-turn identical content with distinct non-null turnKeys MISSES', () => {
|
|
205
|
+
// The headline bug: dedup was eating user replies across turns.
|
|
206
|
+
const cache = new OutboundDedupCache()
|
|
207
|
+
cache.record('chat', undefined, LONG_TEXT, 1000, 'turn-A')
|
|
208
|
+
expect(
|
|
209
|
+
cache.check('chat', undefined, LONG_TEXT, 5000, 'turn-B'),
|
|
210
|
+
).toBeNull()
|
|
211
|
+
})
|
|
212
|
+
|
|
213
|
+
it('within-turn duplicates (same turnKey) STILL HIT — preserves #546 protection', () => {
|
|
214
|
+
// Same-turn retry race the module was built for.
|
|
215
|
+
const cache = new OutboundDedupCache()
|
|
216
|
+
cache.record('chat', undefined, LONG_TEXT, 1000, 'turn-A')
|
|
217
|
+
expect(
|
|
218
|
+
cache.check('chat', undefined, LONG_TEXT, 10_000, 'turn-A'),
|
|
219
|
+
).not.toBeNull()
|
|
220
|
+
})
|
|
221
|
+
|
|
222
|
+
it('record-null + check-non-null → legacy hit', () => {
|
|
223
|
+
// Boot-time / silent-marker callers pass null on record; later
|
|
224
|
+
// executeReply checks with a turnKey. Legacy match must persist
|
|
225
|
+
// for the #546 protection to cover these cross-context cases.
|
|
226
|
+
const cache = new OutboundDedupCache()
|
|
227
|
+
cache.record('chat', undefined, LONG_TEXT, 1000, null)
|
|
228
|
+
expect(
|
|
229
|
+
cache.check('chat', undefined, LONG_TEXT, 5000, 'turn-A'),
|
|
230
|
+
).not.toBeNull()
|
|
231
|
+
})
|
|
232
|
+
|
|
233
|
+
it('record-non-null + check-null → legacy hit', () => {
|
|
234
|
+
// Symmetric direction: turn-flush records with turnKey, a later
|
|
235
|
+
// null-context probe (rare but possible) still matches.
|
|
236
|
+
const cache = new OutboundDedupCache()
|
|
237
|
+
cache.record('chat', undefined, LONG_TEXT, 1000, 'turn-A')
|
|
238
|
+
expect(
|
|
239
|
+
cache.check('chat', undefined, LONG_TEXT, 5000, null),
|
|
240
|
+
).not.toBeNull()
|
|
241
|
+
})
|
|
242
|
+
|
|
243
|
+
it('cross-turn entry does NOT shadow a same-turn match later in the list', () => {
|
|
244
|
+
// Edge case the predicate must handle: when the scan hits a stale
|
|
245
|
+
// cross-turn entry whose hash matches, it must keep scanning past
|
|
246
|
+
// it to find a real same-turn match. (The carve-out is implemented
|
|
247
|
+
// as `continue`, not `return null`.)
|
|
248
|
+
const cache = new OutboundDedupCache()
|
|
249
|
+
cache.record('chat', undefined, LONG_TEXT, 1000, 'turn-A') // older, cross-turn
|
|
250
|
+
cache.record('chat', undefined, LONG_TEXT, 3000, 'turn-B') // newer, same turn as query
|
|
251
|
+
expect(
|
|
252
|
+
cache.check('chat', undefined, LONG_TEXT, 5000, 'turn-B'),
|
|
253
|
+
).not.toBeNull()
|
|
254
|
+
})
|
|
255
|
+
|
|
256
|
+
it('legacy 4-arg API still compiles + matches (default turnKey=null)', () => {
|
|
257
|
+
// Backward-compat smoke test — older callers that haven't been
|
|
258
|
+
// updated to pass turnKey continue to behave as the original test
|
|
259
|
+
// suite pins.
|
|
260
|
+
const cache = new OutboundDedupCache()
|
|
261
|
+
cache.record('chat', undefined, LONG_TEXT, 1000)
|
|
262
|
+
expect(cache.check('chat', undefined, LONG_TEXT, 5000)).not.toBeNull()
|
|
263
|
+
})
|
|
264
|
+
})
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for the readSubTail ENOENT/EACCES deregister path.
|
|
3
|
+
*
|
|
4
|
+
* Production symptom: clerk agent's gateway-supervisor.log was growing
|
|
5
|
+
* at ~30 ENOENT lines/sec sustained (540k+ in 3 days) because the
|
|
6
|
+
* watcher's poll loop kept statx-ing JSONL files Claude Code had
|
|
7
|
+
* already reaped along with the parent session's `subagents/` dir.
|
|
8
|
+
* Same shape on klanker with EACCES (635 events) — likely a perm
|
|
9
|
+
* flip during cleanup.
|
|
10
|
+
*
|
|
11
|
+
* Fix shape: when readSubTail's statSync throws ENOENT or EACCES,
|
|
12
|
+
* log ONE line + invoke the onFileVanished callback so the watcher
|
|
13
|
+
* factory can call cleanupTerminalAgent and stop polling. Other
|
|
14
|
+
* errors (parse, malformed JSONL) keep the legacy per-poll log line.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { describe, it, expect, vi } from 'vitest'
|
|
18
|
+
import { readSubTail } from '../subagent-watcher.js'
|
|
19
|
+
import type { WorkerEntry } from '../subagent-watcher.js'
|
|
20
|
+
|
|
21
|
+
function fakeFsThrowingFromStat(code: 'ENOENT' | 'EACCES' | 'EOTHER') {
|
|
22
|
+
const err = new Error(`fake ${code}`) as NodeJS.ErrnoException
|
|
23
|
+
err.code = code
|
|
24
|
+
return {
|
|
25
|
+
existsSync: () => true,
|
|
26
|
+
readdirSync: () => [],
|
|
27
|
+
statSync: () => { throw err },
|
|
28
|
+
openSync: () => -1,
|
|
29
|
+
closeSync: () => {},
|
|
30
|
+
readSync: () => 0,
|
|
31
|
+
watch: () => ({ close: () => {} } as ReturnType<typeof require>),
|
|
32
|
+
} as unknown as Parameters<typeof readSubTail>[4]
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function makeEntry(): WorkerEntry {
|
|
36
|
+
return {
|
|
37
|
+
agentId: 'a1234567890abcdef',
|
|
38
|
+
filePath: '/tmp/fake/agent-a1234567890abcdef.jsonl',
|
|
39
|
+
dispatchedAt: 0,
|
|
40
|
+
lastActivityAt: 0,
|
|
41
|
+
toolCount: 0,
|
|
42
|
+
state: 'running',
|
|
43
|
+
completionNotified: false,
|
|
44
|
+
stallNotified: false,
|
|
45
|
+
historical: false,
|
|
46
|
+
description: '',
|
|
47
|
+
lastSummaryLine: '',
|
|
48
|
+
lastResultText: '',
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function makeTail() {
|
|
53
|
+
return {
|
|
54
|
+
cursor: 0,
|
|
55
|
+
pendingPartial: '',
|
|
56
|
+
hasEmittedStart: false,
|
|
57
|
+
watcher: null,
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
describe('readSubTail — ENOENT/EACCES deregister', () => {
|
|
62
|
+
it('fires onFileVanished and logs ONCE on ENOENT', () => {
|
|
63
|
+
const onFileVanished = vi.fn()
|
|
64
|
+
const log = vi.fn()
|
|
65
|
+
const entry = makeEntry()
|
|
66
|
+
|
|
67
|
+
readSubTail(
|
|
68
|
+
entry,
|
|
69
|
+
makeTail(),
|
|
70
|
+
0,
|
|
71
|
+
vi.fn(),
|
|
72
|
+
fakeFsThrowingFromStat('ENOENT'),
|
|
73
|
+
log,
|
|
74
|
+
null,
|
|
75
|
+
null,
|
|
76
|
+
undefined,
|
|
77
|
+
onFileVanished,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
expect(onFileVanished).toHaveBeenCalledTimes(1)
|
|
81
|
+
expect(onFileVanished).toHaveBeenCalledWith('a1234567890abcdef', 'ENOENT')
|
|
82
|
+
expect(log).toHaveBeenCalledTimes(1)
|
|
83
|
+
expect(log.mock.calls[0][0]).toMatch(/JSONL vanished for a1234567890abcdef \(ENOENT\) — deregistering/)
|
|
84
|
+
expect(log.mock.calls[0][0]).not.toMatch(/read error/)
|
|
85
|
+
})
|
|
86
|
+
|
|
87
|
+
it('fires onFileVanished and logs ONCE on EACCES (klanker pattern)', () => {
|
|
88
|
+
const onFileVanished = vi.fn()
|
|
89
|
+
const log = vi.fn()
|
|
90
|
+
|
|
91
|
+
readSubTail(
|
|
92
|
+
makeEntry(),
|
|
93
|
+
makeTail(),
|
|
94
|
+
0,
|
|
95
|
+
vi.fn(),
|
|
96
|
+
fakeFsThrowingFromStat('EACCES'),
|
|
97
|
+
log,
|
|
98
|
+
null,
|
|
99
|
+
null,
|
|
100
|
+
undefined,
|
|
101
|
+
onFileVanished,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
expect(onFileVanished).toHaveBeenCalledTimes(1)
|
|
105
|
+
expect(onFileVanished).toHaveBeenCalledWith('a1234567890abcdef', 'EACCES')
|
|
106
|
+
expect(log).toHaveBeenCalledTimes(1)
|
|
107
|
+
expect(log.mock.calls[0][0]).toMatch(/EACCES/)
|
|
108
|
+
})
|
|
109
|
+
|
|
110
|
+
it('still logs the legacy "read error" for unexpected error codes', () => {
|
|
111
|
+
// Regression guard: parse errors, EIO, EBUSY, etc. must still
|
|
112
|
+
// surface their detail. Only file-vanished codes are deregistered.
|
|
113
|
+
const onFileVanished = vi.fn()
|
|
114
|
+
const log = vi.fn()
|
|
115
|
+
|
|
116
|
+
readSubTail(
|
|
117
|
+
makeEntry(),
|
|
118
|
+
makeTail(),
|
|
119
|
+
0,
|
|
120
|
+
vi.fn(),
|
|
121
|
+
fakeFsThrowingFromStat('EOTHER'),
|
|
122
|
+
log,
|
|
123
|
+
null,
|
|
124
|
+
null,
|
|
125
|
+
undefined,
|
|
126
|
+
onFileVanished,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
expect(onFileVanished).not.toHaveBeenCalled()
|
|
130
|
+
expect(log).toHaveBeenCalledTimes(1)
|
|
131
|
+
expect(log.mock.calls[0][0]).toMatch(/read error a1234567890abcdef/)
|
|
132
|
+
})
|
|
133
|
+
|
|
134
|
+
it('omitting onFileVanished is safe (optional callback)', () => {
|
|
135
|
+
const log = vi.fn()
|
|
136
|
+
|
|
137
|
+
expect(() =>
|
|
138
|
+
readSubTail(
|
|
139
|
+
makeEntry(),
|
|
140
|
+
makeTail(),
|
|
141
|
+
0,
|
|
142
|
+
vi.fn(),
|
|
143
|
+
fakeFsThrowingFromStat('ENOENT'),
|
|
144
|
+
log,
|
|
145
|
+
null,
|
|
146
|
+
null,
|
|
147
|
+
undefined,
|
|
148
|
+
),
|
|
149
|
+
).not.toThrow()
|
|
150
|
+
expect(log).toHaveBeenCalledTimes(1)
|
|
151
|
+
})
|
|
152
|
+
})
|