switchroom 0.14.38 → 0.14.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -191,6 +191,24 @@ function writeRow(dbPath, { id, parentSessionId, parentTurnKey, agentType, descr
191
191
  db.exec('ALTER TABLE subagents ADD COLUMN jsonl_agent_id TEXT')
192
192
  db.exec('CREATE INDEX IF NOT EXISTS subagents_jsonl_id ON subagents(jsonl_agent_id)')
193
193
  }
194
+ // Verify the marker-derived parent_turn_key (snapParams[2]) actually has
195
+ // a row in the turns table before trusting it. The gateway writes the
196
+ // turn-active marker even when recordTurnStart's INSERT failed (the two
197
+ // writes have independent failure surfaces), so a marker can name a
198
+ // turn_key with no turns row. Stamping that phantom key would route the
199
+ // worker card to the operator DM AND block the watcher's NULL-guarded
200
+ // window backfill from recovering it. Downgrade to NULL so the backfill
201
+ // stays eligible — this also defends against a stale/corrupted marker.
202
+ if (snapParams[2] != null) {
203
+ let turnRow = null
204
+ try {
205
+ turnRow = db.prepare('SELECT 1 FROM turns WHERE turn_key = ? LIMIT 1').get(snapParams[2])
206
+ } catch {
207
+ // turns table may not exist yet on a brand-new agent — treat as no row.
208
+ turnRow = null
209
+ }
210
+ if (turnRow == null) snapParams[2] = null
211
+ }
194
212
  db.prepare(snapInsertSql).run(...snapParams)
195
213
  db.close()
196
214
  done(null)
@@ -202,12 +220,65 @@ function writeRow(dbPath, { id, parentSessionId, parentTurnKey, agentType, descr
202
220
  }
203
221
 
204
222
  // sqlite3 CLI fallback — two non-blocking spawns sequenced via callbacks.
223
+ // This legacy path (neither node:sqlite nor bun:sqlite available) can't
224
+ // cheaply verify the marker's turn_key against the turns table, so drop
225
+ // parent_turn_key and let the gateway's window backfill attribute it.
226
+ // Production agents use node:sqlite; bun test uses bun:sqlite — both take
227
+ // the verified path above.
228
+ params[2] = null
205
229
  spawnSql(dbPath, SCHEMA_SQL.replace(/\n\s+/g, ' '), (err) => {
206
230
  if (err) { done(err); return }
207
231
  spawnSql(dbPath, fillPlaceholders(INSERT_SQL.trim(), params), done)
208
232
  })
209
233
  }
210
234
 
235
+ // ---------------------------------------------------------------------------
236
+ // Active-turn resolution (the parent_turn_key the row belongs to)
237
+ // ---------------------------------------------------------------------------
238
+
239
+ /**
240
+ * Read the gateway's turn-active marker to learn the turn_key of the turn that
241
+ * is active *right now* — the turn whose tool call is dispatching this
242
+ * sub-agent. The gateway writes `<TELEGRAM_STATE_DIR>/turn-active.json`
243
+ * synchronously at turn-start (gateway/turn-active-marker.ts), keyed
244
+ * `{turnKey, chatId, threadId, startedAt}`, and removes it at turn-complete.
245
+ * `telegramDir` here resolves to that same `TELEGRAM_STATE_DIR` in production
246
+ * (verified: identical inode to the registry.db dir), so the marker is a
247
+ * sibling of registry.db.
248
+ *
249
+ * Stamping parent_turn_key from this marker at INSERT time — instead of
250
+ * leaving it NULL for the gateway to reconstruct from a started_at time-window
251
+ * at jsonl-link time — fixes two bugs:
252
+ * - #2081: the time-window backfill mis-attributes when turn windows overlap
253
+ * (supergroup forum topics multiplex many concurrent turns under one
254
+ * chat_id; `ended_at` is unreliable/batch-swept). The live marker is the
255
+ * ground truth for "which turn dispatched this", so there is nothing to
256
+ * reconstruct and no overlap to disambiguate.
257
+ * - #2083: the backfill only runs when a sub-agent's JSONL links; ~8% never
258
+ * link and were never attributed. Stamping at INSERT is independent of
259
+ * linking.
260
+ *
261
+ * `turnKey` equals `turns.turn_key` (both minted by chatKeyWithSuffix at
262
+ * turn-start), so resolveSubagentOriginChat()'s getTurnByKey() finds the exact
263
+ * (chat_id, thread_id) and routes the worker card to the originating topic.
264
+ *
265
+ * Best-effort: if no turn is active (no marker — e.g. a sub-agent dispatched
266
+ * outside a turn) or the marker is unreadable/malformed, return null and let
267
+ * the gateway's started_at backfill remain the fallback (today's behaviour).
268
+ * Never throws; never blocks the tool call.
269
+ */
270
+ function readActiveTurnKey(telegramDir) {
271
+ try {
272
+ // Mirrors TURN_ACTIVE_MARKER_FILE in gateway/turn-active-marker.ts.
273
+ const raw = readFileSync(join(telegramDir, 'turn-active.json'), 'utf8')
274
+ const marker = JSON.parse(raw)
275
+ const turnKey = marker?.turnKey
276
+ return typeof turnKey === 'string' && turnKey.length > 0 ? turnKey : null
277
+ } catch {
278
+ return null
279
+ }
280
+ }
281
+
211
282
  // ---------------------------------------------------------------------------
212
283
  // main
213
284
  // ---------------------------------------------------------------------------
@@ -257,22 +328,22 @@ function main() {
257
328
  }
258
329
 
259
330
  const input = event.tool_input ?? {}
331
+ // Resolve parent_turn_key from the live turn-active marker (the turn whose
332
+ // tool call is dispatching this sub-agent). Claude Code's PreToolUse payload
333
+ // carries only its own session id, never the gateway-minted Telegram turn_key
334
+ // — but the gateway writes that turn_key to <telegramDir>/turn-active.json
335
+ // for the duration of the turn, so we read it directly here. Stamping it at
336
+ // INSERT (vs leaving NULL for the gateway's started_at time-window backfill)
337
+ // fixes overlapping-window mis-attribution (#2081) and attributes sub-agents
338
+ // whose JSONL never links (#2083). NULL when no turn is active → the gateway
339
+ // backfill remains the fallback. See readActiveTurnKey().
340
+ const parentTurnKey = readActiveTurnKey(telegramDir)
260
341
  writeRow(
261
342
  dbPath,
262
343
  {
263
344
  id: event.tool_use_id ?? null,
264
345
  parentSessionId: event.session_id ?? null,
265
- // parent_turn_key is intentionally NULL here. Claude Code's PreToolUse
266
- // payload carries its own session id, not the gateway-minted Telegram
267
- // turn_key (a chat+topic+turn key) the `turns` table is keyed on —
268
- // `event.turn_id` is always undefined, and even if a future CLI
269
- // populated it, it would not match a `turns.turn_key`. The gateway
270
- // resolves parent_turn_key from the
271
- // sub-agent's started_at at jsonl-link time (subagent-watcher.ts
272
- // backfillJsonlAgentId), which works even after the parent turn ends.
273
- // Writing a bogus value here would defeat that backfill's
274
- // `parent_turn_key IS NULL` guard.
275
- parentTurnKey: null,
346
+ parentTurnKey,
276
347
  agentType: input.subagent_type ?? null,
277
348
  description: input.description ?? null,
278
349
  background: input.run_in_background === true ? 1 : 0,
@@ -12,7 +12,7 @@
12
12
  */
13
13
 
14
14
  import { describe, it, expect, beforeEach, afterEach } from 'bun:test'
15
- import { mkdtempSync, mkdirSync, rmSync } from 'fs'
15
+ import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'fs'
16
16
  import { tmpdir } from 'os'
17
17
  import { join } from 'path'
18
18
  import { spawnSync } from 'child_process'
@@ -385,26 +385,62 @@ describe('Bug 4 — result_summary always NULL (hook integration)', () => {
385
385
  })
386
386
  })
387
387
 
388
- // ─── Bug 5 — parent_turn_key always NULL ─────────────────────────────────────
389
-
390
- describe('Bug 5 parent_turn_key backfilled by gateway, not the hook', () => {
391
- it('pretool writes parent_turn_key=NULL even when event.turn_id is present', () => {
392
- // Claude Code's PreToolUse payload carries its own session id, never the
393
- // gateway-minted Telegram turn_key (a chat+topic+turn key) the `turns`
394
- // table is keyed on. `event.turn_id` — even if a future CLI populated it
395
- // would not match a `turns.turn_key`, so the hook intentionally writes
396
- // NULL and lets the gateway backfill parent_turn_key from the sub-agent's
397
- // started_at at jsonl-link time (subagent-watcher.ts backfillJsonlAgentId).
398
- // Writing a bogus value here would defeat that backfill's
399
- // `parent_turn_key IS NULL` guard.
388
+ // ─── Bug 5 — parent_turn_key stamped from the live turn-active marker ─────────
389
+ // (#2081 / #2083) The PreToolUse hook reads <telegramDir>/turn-active.json —
390
+ // the gateway-written marker for the turn whose tool call is dispatching this
391
+ // sub-agent and stamps parent_turn_key = marker.turnKey at INSERT. This
392
+ // captures the EXACT active turn (no started_at time-window reconstruction at
393
+ // jsonl-link time), so it can't mis-attribute under overlapping turn windows
394
+ // (#2081) and works even for sub-agents whose JSONL never links (#2083).
395
+
396
+ /** Write the gateway's turn-active marker into the agent's telegram dir. */
397
+ function writeTurnActiveMarker(turnKey: string, chatId = '12345', threadId: string | null = null) {
398
+ writeFileSync(
399
+ join(agentDir, 'telegram', 'turn-active.json'),
400
+ JSON.stringify({ turnKey, chatId, threadId, startedAt: Date.now() }, null, 2) + '\n',
401
+ )
402
+ }
403
+
404
+ /**
405
+ * Seed a turns row so the hook's phantom-turn_key guard (it only stamps a
406
+ * marker turn_key that actually has a turns row) is satisfied. In production
407
+ * the gateway writes this row via recordTurnStart at turn-start.
408
+ */
409
+ function seedTurn(turnKey: string, chatId = '12345', threadId: string | null = null) {
410
+ const { Database } = require('bun:sqlite') as {
411
+ Database: new (path: string) => {
412
+ prepare(sql: string): { run(...p: unknown[]): unknown }
413
+ exec(sql: string): void
414
+ close(): void
415
+ }
416
+ }
417
+ const db = new Database(dbPath)
418
+ db.exec(
419
+ `CREATE TABLE IF NOT EXISTS turns (
420
+ turn_key TEXT PRIMARY KEY, chat_id TEXT, thread_id TEXT,
421
+ started_at INTEGER, ended_at INTEGER, created_at INTEGER, updated_at INTEGER
422
+ )`,
423
+ )
424
+ const now = Date.now()
425
+ db.prepare(
426
+ 'INSERT OR IGNORE INTO turns (turn_key, chat_id, thread_id, started_at, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?)',
427
+ ).run(turnKey, chatId, threadId, now, now, now)
428
+ db.close()
429
+ }
430
+
431
+ describe('Bug 5 — parent_turn_key stamped from the turn-active marker', () => {
432
+ it('stamps parent_turn_key = marker.turnKey when a turn is active', () => {
433
+ // Supergroup forum-topic turn_key (chat:thread:startedAt).
434
+ const turnKey = '-1003831053471:4:1780370238492'
435
+ seedTurn(turnKey, '-1003831053471', '4')
436
+ writeTurnActiveMarker(turnKey, '-1003831053471', '4')
437
+
400
438
  const event = {
401
439
  session_id: 'sess-turnkey',
402
- turn_id: 'turn-abc-001',
403
440
  tool_name: 'Agent',
404
441
  tool_use_id: 'toolu_turnkey001',
405
442
  tool_input: { description: 'Task with turn context', run_in_background: false },
406
443
  }
407
-
408
444
  const result = runHook(PRETOOL_SCRIPT, event)
409
445
  expect(result.status).toBe(0)
410
446
 
@@ -414,18 +450,43 @@ describe('Bug 5 — parent_turn_key backfilled by gateway, not the hook', () =>
414
450
  | undefined
415
451
 
416
452
  expect(row).toBeDefined()
417
- // The hook never trusts event.turn_id — gateway backfill owns this column.
453
+ expect(row!.parent_turn_key).toBe(turnKey)
454
+ })
455
+
456
+ it('downgrades to NULL when the marker names a turn_key with no turns row (phantom-marker guard)', () => {
457
+ // The gateway writes the marker even if recordTurnStart's INSERT failed, so
458
+ // a marker can point at a turn_key with no row. Stamping it would mis-route
459
+ // the worker card AND block the watcher backfill (NULL guard). The hook must
460
+ // verify the row exists and fall back to NULL.
461
+ seedTurn('12345:_:1780000000000') // a DIFFERENT, real turn exists…
462
+ writeTurnActiveMarker('12345:_:9999999999999') // …but the marker names a phantom.
463
+
464
+ const event = {
465
+ session_id: 'sess-phantom',
466
+ tool_name: 'Agent',
467
+ tool_use_id: 'toolu_phantom001',
468
+ tool_input: { description: 'Task', run_in_background: false },
469
+ }
470
+ const result = runHook(PRETOOL_SCRIPT, event)
471
+ expect(result.status).toBe(0)
472
+
473
+ const db = openDb()
474
+ const row = db.prepare('SELECT parent_turn_key FROM subagents WHERE id = ?').get('toolu_phantom001') as
475
+ | { parent_turn_key: string | null }
476
+ | undefined
477
+ expect(row).toBeDefined()
418
478
  expect(row!.parent_turn_key).toBeNull()
419
479
  })
420
480
 
421
- it('pretool stores parent_turn_key as NULL when turn_id absent (no regression)', () => {
481
+ it('writes parent_turn_key=NULL when no turn is active (gateway backfill fallback)', () => {
482
+ // No marker written → no active turn → hook leaves NULL and the gateway's
483
+ // started_at backfill remains the fallback (today's behaviour).
422
484
  const event = {
423
485
  session_id: 'sess-noturnkey',
424
486
  tool_name: 'Agent',
425
487
  tool_use_id: 'toolu_noturn001',
426
488
  tool_input: { description: 'Task without turn context', run_in_background: false },
427
489
  }
428
-
429
490
  runHook(PRETOOL_SCRIPT, event)
430
491
 
431
492
  const db = openDb()
@@ -434,7 +495,47 @@ describe('Bug 5 — parent_turn_key backfilled by gateway, not the hook', () =>
434
495
  | undefined
435
496
 
436
497
  expect(row).toBeDefined()
437
- // When no turn_id in event, parent_turn_key should be NULL — no crash
498
+ expect(row!.parent_turn_key).toBeNull()
499
+ })
500
+
501
+ it('ignores event.turn_id — only the marker is authoritative', () => {
502
+ // A future CLI populating event.turn_id must NOT be trusted: it is Claude
503
+ // Code's session turn, never a gateway turns.turn_key. With no marker the
504
+ // result is NULL regardless of turn_id.
505
+ const event = {
506
+ session_id: 'sess-turnid-only',
507
+ turn_id: 'turn-abc-001',
508
+ tool_name: 'Agent',
509
+ tool_use_id: 'toolu_turnid001',
510
+ tool_input: { description: 'Task', run_in_background: false },
511
+ }
512
+ runHook(PRETOOL_SCRIPT, event)
513
+
514
+ const db = openDb()
515
+ const row = db.prepare('SELECT parent_turn_key FROM subagents WHERE id = ?').get('toolu_turnid001') as
516
+ | { parent_turn_key: string | null }
517
+ | undefined
518
+
519
+ expect(row).toBeDefined()
520
+ expect(row!.parent_turn_key).toBeNull()
521
+ })
522
+
523
+ it('a malformed marker degrades to NULL (never crashes the dispatch)', () => {
524
+ writeFileSync(join(agentDir, 'telegram', 'turn-active.json'), '{ not valid json')
525
+ const event = {
526
+ session_id: 'sess-badmarker',
527
+ tool_name: 'Agent',
528
+ tool_use_id: 'toolu_badmarker001',
529
+ tool_input: { description: 'Task', run_in_background: false },
530
+ }
531
+ const result = runHook(PRETOOL_SCRIPT, event)
532
+ expect(result.status).toBe(0)
533
+
534
+ const db = openDb()
535
+ const row = db.prepare('SELECT parent_turn_key FROM subagents WHERE id = ?').get('toolu_badmarker001') as
536
+ | { parent_turn_key: string | null }
537
+ | undefined
538
+ expect(row).toBeDefined()
438
539
  expect(row!.parent_turn_key).toBeNull()
439
540
  })
440
541
 
@@ -0,0 +1,109 @@
1
+ import { describe, expect, it } from 'vitest'
2
+
3
+ import {
4
+ ackDelivery,
5
+ createDeliveryQueue,
6
+ forgetDelivery,
7
+ sweep,
8
+ trackDelivery,
9
+ type DeliveryQueue,
10
+ } from '../gateway/inbound-delivery-confirm.js'
11
+
12
+ /**
13
+ * Regression coverage for the marko drop-wedge.
14
+ *
15
+ * An inbound delivered to claude's TUI composer strands unsubmitted when the
16
+ * auto-submit races turn-completion. claude never emits `enqueue`, so the
17
+ * gateway used to sit "typing…" for 300s then DROP the message.
18
+ *
19
+ * The queue's contract: a delivered inbound is acked ONLY by `enqueue`; until
20
+ * then it is re-delivered every `timeoutMs`, forever, never dropped — and an
21
+ * acked delivery never re-fires (no duplicate turns).
22
+ */
23
+ type Msg = { text: string }
24
+ const TIMEOUT = 15_000
25
+
26
+ function fresh(): DeliveryQueue<Msg> {
27
+ return createDeliveryQueue<Msg>()
28
+ }
29
+
30
+ describe('inbound-delivery-confirm (reliable deliver-until-acked queue)', () => {
31
+ it('an acked delivery is never re-delivered (happy path — no duplicate turns)', () => {
32
+ const q = fresh()
33
+ trackDelivery(q, 'chat:_', { text: 'hi' }, 1_000)
34
+ expect(ackDelivery(q, 'chat:_')).toBe(true) // enqueue arrived
35
+ expect(sweep(q, 1_000 + 999_999, TIMEOUT)).toHaveLength(0)
36
+ expect(q.pending.size).toBe(0)
37
+ })
38
+
39
+ it('within the timeout, an un-acked delivery is left alone (claude may still be picking it up)', () => {
40
+ const q = fresh()
41
+ trackDelivery(q, 'chat:_', { text: 'hi' }, 1_000)
42
+ expect(sweep(q, 1_000 + 14_999, TIMEOUT)).toHaveLength(0)
43
+ expect(q.pending.size).toBe(1)
44
+ })
45
+
46
+ it('a strand (no ack) is re-delivered after the timeout, and the clock resets', () => {
47
+ const q = fresh()
48
+ trackDelivery(q, 'chat:_', { text: 'draft nurture email' }, 1_000)
49
+ const r = sweep(q, 1_000 + 15_000, TIMEOUT)
50
+ expect(r).toHaveLength(1)
51
+ expect(r[0]!.inbound.text).toBe('draft nurture email')
52
+ expect(r[0]!.lastAttemptAt).toBe(1_000 + 15_000) // clock reset
53
+ // not re-swept until another full timeout elapses
54
+ expect(sweep(q, 1_000 + 15_000 + 14_999, TIMEOUT)).toHaveLength(0)
55
+ })
56
+
57
+ it('keeps re-delivering forever until acked — never drops (the reliability invariant)', () => {
58
+ const q = fresh()
59
+ let t = 0
60
+ trackDelivery(q, 'chat:_', { text: 'x' }, t)
61
+ for (let i = 0; i < 50; i++) {
62
+ t += 15_000
63
+ expect(sweep(q, t, TIMEOUT)).toHaveLength(1) // still trying after 50 strands
64
+ }
65
+ expect(q.pending.size).toBe(1) // never dropped
66
+ // claude finally picks it up → acked → stops.
67
+ expect(ackDelivery(q, 'chat:_')).toBe(true)
68
+ expect(sweep(q, t + 999_999, TIMEOUT)).toHaveLength(0)
69
+ })
70
+
71
+ it('an ack that lands right after a re-delivery stops further re-delivery (no duplicate turns)', () => {
72
+ const q = fresh()
73
+ trackDelivery(q, 'chat:_', { text: 'x' }, 0)
74
+ sweep(q, 15_000, TIMEOUT) // strand → re-delivered
75
+ expect(ackDelivery(q, 'chat:_')).toBe(true) // the re-delivered copy landed
76
+ expect(sweep(q, 999_999, TIMEOUT)).toHaveLength(0)
77
+ expect(q.pending.size).toBe(0)
78
+ })
79
+
80
+ it('keys are independent — a strand on one topic does not affect another (DM + supergroup topics)', () => {
81
+ const q = fresh()
82
+ trackDelivery(q, '-100:4', { text: 'crm topic msg' }, 0) // supergroup CRM topic
83
+ trackDelivery(q, '555:_', { text: 'dm msg' }, 0) // a DM
84
+ ackDelivery(q, '555:_') // the DM submits fine
85
+ const r = sweep(q, 15_000, TIMEOUT)
86
+ expect(r).toHaveLength(1)
87
+ expect(r[0]!.key).toBe('-100:4') // only the stranded topic re-delivers
88
+ })
89
+
90
+ it('tracking the same key twice keeps only the latest inbound (gate serialises per key)', () => {
91
+ const q = fresh()
92
+ trackDelivery(q, 'chat:_', { text: 'first' }, 0)
93
+ trackDelivery(q, 'chat:_', { text: 'second' }, 100)
94
+ expect(q.pending.size).toBe(1)
95
+ expect(sweep(q, 100 + 15_000, TIMEOUT)[0]!.inbound.text).toBe('second')
96
+ })
97
+
98
+ it('ack on an unknown key is a harmless no-op', () => {
99
+ expect(ackDelivery(fresh(), 'never-tracked')).toBe(false)
100
+ })
101
+
102
+ it('forgetDelivery clears without acking or re-delivering (bridge went offline)', () => {
103
+ const q = fresh()
104
+ trackDelivery(q, 'chat:_', { text: 'x' }, 0)
105
+ forgetDelivery(q, 'chat:_')
106
+ expect(q.pending.size).toBe(0)
107
+ expect(sweep(q, 999_999, TIMEOUT)).toHaveLength(0)
108
+ })
109
+ })
@@ -344,6 +344,117 @@ describe('subagent-tracker-posttool', () => {
344
344
  expect(row?.background).toBe(0)
345
345
  expect(row?.status).toBe('completed')
346
346
  })
347
+
348
+ // ─── async-launch ACK contract — drift tolerance (#2084) ────────────────────
349
+ // Tier 3 of isAsyncLaunchAck keys on the functional `agentId: <stem>` token
350
+ // (the most wording-stable part of the ACK) so promotion survives a
351
+ // claude-code bump that rewords BOTH the launch verb AND the "working in the
352
+ // background" phrase. The context-word requirement keeps it from tripping on
353
+ // a foreground report that merely mentions an agentId.
354
+ it('promotes on reworded ACK prose when the agentId token + a context word survive', () => {
355
+ runHook(PRETOOL_SCRIPT, {
356
+ session_id: 's-drift',
357
+ tool_name: 'Agent',
358
+ tool_use_id: 'toolu_drift1',
359
+ tool_input: { subagent_type: 'worker', description: 'Drifted ACK' },
360
+ })
361
+ // Neither "async agent launched" nor "working in the background" — a
362
+ // hypothetical reworded ACK — but the agentId token + "background" remain.
363
+ const postResult = runHook(POSTTOOL_SCRIPT, {
364
+ tool_name: 'Agent',
365
+ tool_use_id: 'toolu_drift1',
366
+ tool_response: {
367
+ content: [{ type: 'text', text: 'Background worker started.\nagentId: drift-7f3a91\nYou will be notified when it finishes.' }],
368
+ },
369
+ })
370
+ expect(postResult.status).toBe(0)
371
+ expect(postResult.stdout).not.toContain('additionalContext')
372
+
373
+ const db = openDb()
374
+ const row = db.prepare('SELECT background, status, ended_at FROM subagents WHERE id = ?').get('toolu_drift1') as
375
+ | { background: number; status: string; ended_at: number | null }
376
+ | undefined
377
+ expect(row?.background).toBe(1)
378
+ expect(row?.status).toBe('running')
379
+ expect(row?.ended_at == null).toBe(true)
380
+ })
381
+
382
+ it('does NOT promote when an agentId token appears without any launch/background context word', () => {
383
+ // A genuine foreground report mentioning an agentId in passing — no
384
+ // launch/background/async/dispatch word — must terminalize, not promote.
385
+ runHook(PRETOOL_SCRIPT, {
386
+ session_id: 's-falsepos',
387
+ tool_name: 'Agent',
388
+ tool_use_id: 'toolu_falsepos1',
389
+ tool_input: { subagent_type: 'worker', description: 'Foreground lookup', run_in_background: false },
390
+ })
391
+ const postResult = runHook(POSTTOOL_SCRIPT, {
392
+ tool_name: 'Agent',
393
+ tool_use_id: 'toolu_falsepos1',
394
+ tool_response: { result: 'Done. Verified the record; agentId: svc-42 is valid and active.', is_error: false },
395
+ })
396
+ expect(postResult.status).toBe(0)
397
+ expect(postResult.stdout).toContain('additionalContext')
398
+
399
+ const db = openDb()
400
+ const row = db.prepare('SELECT background, status FROM subagents WHERE id = ?').get('toolu_falsepos1') as
401
+ | { background: number; status: string }
402
+ | undefined
403
+ expect(row?.background).toBe(0)
404
+ expect(row?.status).toBe('completed')
405
+ })
406
+
407
+ it('does NOT promote a foreground orchestrator narrative that embeds an agentId mid-sentence', () => {
408
+ // The #2085-review false-positive: a coordinator agent reporting on other
409
+ // agents. The agentId is mid-sentence (not on its own line), so tier 3's
410
+ // own-line anchor rejects it even though "background"/"dispatching" appear.
411
+ runHook(PRETOOL_SCRIPT, {
412
+ session_id: 's-orch',
413
+ tool_name: 'Agent',
414
+ tool_use_id: 'toolu_orch1',
415
+ tool_input: { subagent_type: 'worker', description: 'Coordinator', run_in_background: false },
416
+ })
417
+ const postResult = runHook(POSTTOOL_SCRIPT, {
418
+ tool_name: 'Agent',
419
+ tool_use_id: 'toolu_orch1',
420
+ tool_response: { result: 'agentId: coord-x is managing background work and dispatching checks.', is_error: false },
421
+ })
422
+ expect(postResult.status).toBe(0)
423
+ expect(postResult.stdout).toContain('additionalContext')
424
+
425
+ const db = openDb()
426
+ const row = db.prepare('SELECT background, status FROM subagents WHERE id = ?').get('toolu_orch1') as
427
+ | { background: number; status: string }
428
+ | undefined
429
+ expect(row?.background).toBe(0)
430
+ expect(row?.status).toBe('completed')
431
+ })
432
+
433
+ it('does NOT promote an own-line agentId with no launch/background context word (boundary)', () => {
434
+ // Documents the accepted tier-3 boundary: an own-line bare agentId alone
435
+ // (no background/launch/dispatch/async/notify word) is not enough to
436
+ // promote — guards a foreground report that prints an id on its own line.
437
+ runHook(PRETOOL_SCRIPT, {
438
+ session_id: 's-bound',
439
+ tool_name: 'Agent',
440
+ tool_use_id: 'toolu_bound1',
441
+ tool_input: { subagent_type: 'worker', description: 'Infra', run_in_background: false },
442
+ })
443
+ const postResult = runHook(POSTTOOL_SCRIPT, {
444
+ tool_name: 'Agent',
445
+ tool_use_id: 'toolu_bound1',
446
+ tool_response: { content: [{ type: 'text', text: 'Created the worker.\nagentId: svc-99\nIt is ready.' }] },
447
+ })
448
+ expect(postResult.status).toBe(0)
449
+ expect(postResult.stdout).toContain('additionalContext')
450
+
451
+ const db = openDb()
452
+ const row = db.prepare('SELECT background, status FROM subagents WHERE id = ?').get('toolu_bound1') as
453
+ | { background: number; status: string }
454
+ | undefined
455
+ expect(row?.background).toBe(0)
456
+ expect(row?.status).toBe('completed')
457
+ })
347
458
  })
348
459
 
349
460
  describe('agent-dir resolution (RFC §Bug 2)', () => {
@@ -153,3 +153,53 @@ describe('backfillJsonlAgentId — parent_turn_key resolution', () => {
153
153
  expect(row?.parent_turn_key == null).toBe(true)
154
154
  })
155
155
  })
156
+
157
+ // ─── #2081: overlapping windows + hook-stamped value precedence ───────────────
158
+ // The backfill is now only a FALLBACK — the PreToolUse hook stamps
159
+ // parent_turn_key from the live turn-active marker at dispatch
160
+ // (subagent-tracker-pretool.mjs readActiveTurnKey). These tests pin the two
161
+ // guarantees that make the hook fix correct end-to-end.
162
+ describe('backfillJsonlAgentId — overlapping windows / hook precedence (#2081)', () => {
163
+ it('does NOT overwrite a hook-stamped parent_turn_key, even when overlapping windows would resolve differently', () => {
164
+ // Supergroup: two forum topics under one chat with OVERLAPPING windows.
165
+ // Topic A (thread 4) started first and is still open; topic B (thread 7)
166
+ // started later. A sub-agent dispatched at 1500 falls inside BOTH windows.
167
+ insertTurn({ turnKey: '-100:4:1000', chatId: '-100', threadId: '4', startedAt: 1000, endedAt: null })
168
+ insertTurn({ turnKey: '-100:7:1400', chatId: '-100', threadId: '7', startedAt: 1400, endedAt: null })
169
+
170
+ // The hook already stamped the CORRECT parent (topic A) from the marker.
171
+ insertSub({
172
+ id: 'toolu_overlap',
173
+ agentType: 'worker',
174
+ description: 'Topic-A worker',
175
+ startedAt: 1500,
176
+ parentTurnKey: '-100:4:1000',
177
+ })
178
+
179
+ const jsonlPath = writeMeta('worker', 'Topic-A worker')
180
+ backfillJsonlAgentId(db, jsonlPath, 'agentstem_overlap')
181
+
182
+ const row = readSub('toolu_overlap')
183
+ expect(row?.jsonl_agent_id).toBe('agentstem_overlap')
184
+ // The IS NULL guard means the hook's correct value survives — NOT the
185
+ // window query's ORDER BY started_at DESC pick (which would be topic B).
186
+ expect(row?.parent_turn_key).toBe('-100:4:1000')
187
+ })
188
+
189
+ it('fallback window-match (NULL parent) picks the latest-started overlapping turn — the documented fallback limitation', () => {
190
+ // When the hook left parent_turn_key NULL (no active marker at dispatch),
191
+ // the backfill falls back to the started_at window match. With overlapping
192
+ // windows it resolves to the latest-started containing turn. This is a
193
+ // best-effort fallback for the no-marker case — the hook path above is the
194
+ // correct primary. Pinned here so the fallback behaviour is explicit.
195
+ insertTurn({ turnKey: '-100:4:1000', chatId: '-100', threadId: '4', startedAt: 1000, endedAt: null })
196
+ insertTurn({ turnKey: '-100:7:1400', chatId: '-100', threadId: '7', startedAt: 1400, endedAt: null })
197
+ insertSub({ id: 'toolu_fallback', agentType: 'worker', description: 'No-marker worker', startedAt: 1500 })
198
+
199
+ const jsonlPath = writeMeta('worker', 'No-marker worker')
200
+ backfillJsonlAgentId(db, jsonlPath, 'agentstem_fallback')
201
+
202
+ const row = readSub('toolu_fallback')
203
+ expect(row?.parent_turn_key).toBe('-100:7:1400')
204
+ })
205
+ })
@@ -0,0 +1,64 @@
1
+ /**
2
+ * E2E regression — inbound is NEVER dropped under rapid fire (the drop-wedge).
3
+ *
4
+ * The bug: a Telegram inbound reaches claude as an MCP channel notification
5
+ * the unmodified CLI appends to its composer and auto-submits only when the
6
+ * composer is empty + idle. A message arriving the instant the prior turn
7
+ * completes races that auto-submit and strands unsubmitted — claude never
8
+ * starts the turn, the gateway sits "typing…", and the 300s silence-poke
9
+ * DROPS the message. Observed recurring on `marko` (supergroup topics + DMs).
10
+ *
11
+ * This scenario drives the exact failure timing: it fires each message the
12
+ * instant the prior reply lands (i.e. right at turn-completion, the strand
13
+ * window) and asserts EVERY message gets its own unique token back. With the
14
+ * deliver-until-acked queue (inbound-delivery-confirm.ts) a strand self-heals
15
+ * via re-delivery; without it, a stranded message yields NO reply within the
16
+ * timeout and this test fails on exactly the message that was swallowed.
17
+ *
18
+ * Each message carries a random token and the assertion matches THAT token,
19
+ * so a reply to message N-1 can never be mistaken for message N's reply — the
20
+ * test proves every distinct message was actually processed, not merely that
21
+ * "some replies came back".
22
+ */
23
+
24
+ import { describe, it, expect } from "vitest";
25
+ import { spinUp } from "../harness.js";
26
+
27
+ describe("uat: rapid-fire inbound — no message is ever dropped (drop-wedge)", () => {
28
+ it(
29
+ "every back-to-back message gets its own reply (delivery never strands)",
30
+ async () => {
31
+ const sc = await spinUp({ agent: "test-harness" });
32
+ try {
33
+ const N = 8;
34
+ const dropped: number[] = [];
35
+ for (let i = 1; i <= N; i++) {
36
+ const token = `acktok-${i}-${Math.random().toString(36).slice(2, 8)}`;
37
+ await sc.sendDM(
38
+ `Reply with exactly this token and nothing else: ${token}`,
39
+ );
40
+ try {
41
+ const reply = await sc.expectMessage((m) => m.text.includes(token), {
42
+ from: "bot",
43
+ timeout: 75_000,
44
+ });
45
+ expect(reply.text).toContain(token);
46
+ } catch {
47
+ // The message stranded — no reply carrying its token arrived.
48
+ dropped.push(i);
49
+ }
50
+ // Deliberately NO delay: fire the next message the instant this
51
+ // reply lands, so it arrives in the turn-completion strand window.
52
+ }
53
+ expect(
54
+ dropped,
55
+ `messages dropped (no reply within timeout): ${dropped.join(", ")} of ${N}`,
56
+ ).toEqual([]);
57
+ } finally {
58
+ await sc.tearDown();
59
+ }
60
+ },
61
+ // N messages × (turn + up to one ~15-20s strand recovery) — generous.
62
+ 900_000,
63
+ );
64
+ });