switchroom 0.14.38 → 0.14.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auth-broker/index.js +294 -46
- package/dist/cli/drive-write-pretool.mjs +25 -1
- package/dist/cli/switchroom.js +63 -6
- package/package.json +3 -3
- package/telegram-plugin/dist/gateway/gateway.js +103 -11
- package/telegram-plugin/gateway/gateway.ts +81 -3
- package/telegram-plugin/gateway/inbound-delivery-confirm.ts +96 -0
- package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +41 -6
- package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +82 -11
- package/telegram-plugin/registry/subagents-bugs.test.ts +120 -19
- package/telegram-plugin/tests/inbound-delivery-confirm.test.ts +109 -0
- package/telegram-plugin/tests/subagent-tracker-hooks.test.ts +111 -0
- package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +50 -0
- package/telegram-plugin/uat/scenarios/inbound-no-drop-rapid-fire-dm.test.ts +64 -0
|
@@ -191,6 +191,24 @@ function writeRow(dbPath, { id, parentSessionId, parentTurnKey, agentType, descr
|
|
|
191
191
|
db.exec('ALTER TABLE subagents ADD COLUMN jsonl_agent_id TEXT')
|
|
192
192
|
db.exec('CREATE INDEX IF NOT EXISTS subagents_jsonl_id ON subagents(jsonl_agent_id)')
|
|
193
193
|
}
|
|
194
|
+
// Verify the marker-derived parent_turn_key (snapParams[2]) actually has
|
|
195
|
+
// a row in the turns table before trusting it. The gateway writes the
|
|
196
|
+
// turn-active marker even when recordTurnStart's INSERT failed (the two
|
|
197
|
+
// writes have independent failure surfaces), so a marker can name a
|
|
198
|
+
// turn_key with no turns row. Stamping that phantom key would route the
|
|
199
|
+
// worker card to the operator DM AND block the watcher's NULL-guarded
|
|
200
|
+
// window backfill from recovering it. Downgrade to NULL so the backfill
|
|
201
|
+
// stays eligible — this also defends against a stale/corrupted marker.
|
|
202
|
+
if (snapParams[2] != null) {
|
|
203
|
+
let turnRow = null
|
|
204
|
+
try {
|
|
205
|
+
turnRow = db.prepare('SELECT 1 FROM turns WHERE turn_key = ? LIMIT 1').get(snapParams[2])
|
|
206
|
+
} catch {
|
|
207
|
+
// turns table may not exist yet on a brand-new agent — treat as no row.
|
|
208
|
+
turnRow = null
|
|
209
|
+
}
|
|
210
|
+
if (turnRow == null) snapParams[2] = null
|
|
211
|
+
}
|
|
194
212
|
db.prepare(snapInsertSql).run(...snapParams)
|
|
195
213
|
db.close()
|
|
196
214
|
done(null)
|
|
@@ -202,12 +220,65 @@ function writeRow(dbPath, { id, parentSessionId, parentTurnKey, agentType, descr
|
|
|
202
220
|
}
|
|
203
221
|
|
|
204
222
|
// sqlite3 CLI fallback — two non-blocking spawns sequenced via callbacks.
|
|
223
|
+
// This legacy path (neither node:sqlite nor bun:sqlite available) can't
|
|
224
|
+
// cheaply verify the marker's turn_key against the turns table, so drop
|
|
225
|
+
// parent_turn_key and let the gateway's window backfill attribute it.
|
|
226
|
+
// Production agents use node:sqlite; bun test uses bun:sqlite — both take
|
|
227
|
+
// the verified path above.
|
|
228
|
+
params[2] = null
|
|
205
229
|
spawnSql(dbPath, SCHEMA_SQL.replace(/\n\s+/g, ' '), (err) => {
|
|
206
230
|
if (err) { done(err); return }
|
|
207
231
|
spawnSql(dbPath, fillPlaceholders(INSERT_SQL.trim(), params), done)
|
|
208
232
|
})
|
|
209
233
|
}
|
|
210
234
|
|
|
235
|
+
// ---------------------------------------------------------------------------
|
|
236
|
+
// Active-turn resolution (the parent_turn_key the row belongs to)
|
|
237
|
+
// ---------------------------------------------------------------------------
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* Read the gateway's turn-active marker to learn the turn_key of the turn that
|
|
241
|
+
* is active *right now* — the turn whose tool call is dispatching this
|
|
242
|
+
* sub-agent. The gateway writes `<TELEGRAM_STATE_DIR>/turn-active.json`
|
|
243
|
+
* synchronously at turn-start (gateway/turn-active-marker.ts), keyed
|
|
244
|
+
* `{turnKey, chatId, threadId, startedAt}`, and removes it at turn-complete.
|
|
245
|
+
* `telegramDir` here resolves to that same `TELEGRAM_STATE_DIR` in production
|
|
246
|
+
* (verified: identical inode to the registry.db dir), so the marker is a
|
|
247
|
+
* sibling of registry.db.
|
|
248
|
+
*
|
|
249
|
+
* Stamping parent_turn_key from this marker at INSERT time — instead of
|
|
250
|
+
* leaving it NULL for the gateway to reconstruct from a started_at time-window
|
|
251
|
+
* at jsonl-link time — fixes two bugs:
|
|
252
|
+
* - #2081: the time-window backfill mis-attributes when turn windows overlap
|
|
253
|
+
* (supergroup forum topics multiplex many concurrent turns under one
|
|
254
|
+
* chat_id; `ended_at` is unreliable/batch-swept). The live marker is the
|
|
255
|
+
* ground truth for "which turn dispatched this", so there is nothing to
|
|
256
|
+
* reconstruct and no overlap to disambiguate.
|
|
257
|
+
* - #2083: the backfill only runs when a sub-agent's JSONL links; ~8% never
|
|
258
|
+
* link and were never attributed. Stamping at INSERT is independent of
|
|
259
|
+
* linking.
|
|
260
|
+
*
|
|
261
|
+
* `turnKey` equals `turns.turn_key` (both minted by chatKeyWithSuffix at
|
|
262
|
+
* turn-start), so resolveSubagentOriginChat()'s getTurnByKey() finds the exact
|
|
263
|
+
* (chat_id, thread_id) and routes the worker card to the originating topic.
|
|
264
|
+
*
|
|
265
|
+
* Best-effort: if no turn is active (no marker — e.g. a sub-agent dispatched
|
|
266
|
+
* outside a turn) or the marker is unreadable/malformed, return null and let
|
|
267
|
+
* the gateway's started_at backfill remain the fallback (today's behaviour).
|
|
268
|
+
* Never throws; never blocks the tool call.
|
|
269
|
+
*/
|
|
270
|
+
function readActiveTurnKey(telegramDir) {
|
|
271
|
+
try {
|
|
272
|
+
// Mirrors TURN_ACTIVE_MARKER_FILE in gateway/turn-active-marker.ts.
|
|
273
|
+
const raw = readFileSync(join(telegramDir, 'turn-active.json'), 'utf8')
|
|
274
|
+
const marker = JSON.parse(raw)
|
|
275
|
+
const turnKey = marker?.turnKey
|
|
276
|
+
return typeof turnKey === 'string' && turnKey.length > 0 ? turnKey : null
|
|
277
|
+
} catch {
|
|
278
|
+
return null
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
211
282
|
// ---------------------------------------------------------------------------
|
|
212
283
|
// main
|
|
213
284
|
// ---------------------------------------------------------------------------
|
|
@@ -257,22 +328,22 @@ function main() {
|
|
|
257
328
|
}
|
|
258
329
|
|
|
259
330
|
const input = event.tool_input ?? {}
|
|
331
|
+
// Resolve parent_turn_key from the live turn-active marker (the turn whose
|
|
332
|
+
// tool call is dispatching this sub-agent). Claude Code's PreToolUse payload
|
|
333
|
+
// carries only its own session id, never the gateway-minted Telegram turn_key
|
|
334
|
+
// — but the gateway writes that turn_key to <telegramDir>/turn-active.json
|
|
335
|
+
// for the duration of the turn, so we read it directly here. Stamping it at
|
|
336
|
+
// INSERT (vs leaving NULL for the gateway's started_at time-window backfill)
|
|
337
|
+
// fixes overlapping-window mis-attribution (#2081) and attributes sub-agents
|
|
338
|
+
// whose JSONL never links (#2083). NULL when no turn is active → the gateway
|
|
339
|
+
// backfill remains the fallback. See readActiveTurnKey().
|
|
340
|
+
const parentTurnKey = readActiveTurnKey(telegramDir)
|
|
260
341
|
writeRow(
|
|
261
342
|
dbPath,
|
|
262
343
|
{
|
|
263
344
|
id: event.tool_use_id ?? null,
|
|
264
345
|
parentSessionId: event.session_id ?? null,
|
|
265
|
-
|
|
266
|
-
// payload carries its own session id, not the gateway-minted Telegram
|
|
267
|
-
// turn_key (a chat+topic+turn key) the `turns` table is keyed on —
|
|
268
|
-
// `event.turn_id` is always undefined, and even if a future CLI
|
|
269
|
-
// populated it, it would not match a `turns.turn_key`. The gateway
|
|
270
|
-
// resolves parent_turn_key from the
|
|
271
|
-
// sub-agent's started_at at jsonl-link time (subagent-watcher.ts
|
|
272
|
-
// backfillJsonlAgentId), which works even after the parent turn ends.
|
|
273
|
-
// Writing a bogus value here would defeat that backfill's
|
|
274
|
-
// `parent_turn_key IS NULL` guard.
|
|
275
|
-
parentTurnKey: null,
|
|
346
|
+
parentTurnKey,
|
|
276
347
|
agentType: input.subagent_type ?? null,
|
|
277
348
|
description: input.description ?? null,
|
|
278
349
|
background: input.run_in_background === true ? 1 : 0,
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
*/
|
|
13
13
|
|
|
14
14
|
import { describe, it, expect, beforeEach, afterEach } from 'bun:test'
|
|
15
|
-
import { mkdtempSync, mkdirSync, rmSync } from 'fs'
|
|
15
|
+
import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'fs'
|
|
16
16
|
import { tmpdir } from 'os'
|
|
17
17
|
import { join } from 'path'
|
|
18
18
|
import { spawnSync } from 'child_process'
|
|
@@ -385,26 +385,62 @@ describe('Bug 4 — result_summary always NULL (hook integration)', () => {
|
|
|
385
385
|
})
|
|
386
386
|
})
|
|
387
387
|
|
|
388
|
-
// ─── Bug 5 — parent_turn_key
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
388
|
+
// ─── Bug 5 — parent_turn_key stamped from the live turn-active marker ─────────
|
|
389
|
+
// (#2081 / #2083) The PreToolUse hook reads <telegramDir>/turn-active.json —
|
|
390
|
+
// the gateway-written marker for the turn whose tool call is dispatching this
|
|
391
|
+
// sub-agent — and stamps parent_turn_key = marker.turnKey at INSERT. This
|
|
392
|
+
// captures the EXACT active turn (no started_at time-window reconstruction at
|
|
393
|
+
// jsonl-link time), so it can't mis-attribute under overlapping turn windows
|
|
394
|
+
// (#2081) and works even for sub-agents whose JSONL never links (#2083).
|
|
395
|
+
|
|
396
|
+
/** Write the gateway's turn-active marker into the agent's telegram dir. */
|
|
397
|
+
function writeTurnActiveMarker(turnKey: string, chatId = '12345', threadId: string | null = null) {
|
|
398
|
+
writeFileSync(
|
|
399
|
+
join(agentDir, 'telegram', 'turn-active.json'),
|
|
400
|
+
JSON.stringify({ turnKey, chatId, threadId, startedAt: Date.now() }, null, 2) + '\n',
|
|
401
|
+
)
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
/**
|
|
405
|
+
* Seed a turns row so the hook's phantom-turn_key guard (it only stamps a
|
|
406
|
+
* marker turn_key that actually has a turns row) is satisfied. In production
|
|
407
|
+
* the gateway writes this row via recordTurnStart at turn-start.
|
|
408
|
+
*/
|
|
409
|
+
function seedTurn(turnKey: string, chatId = '12345', threadId: string | null = null) {
|
|
410
|
+
const { Database } = require('bun:sqlite') as {
|
|
411
|
+
Database: new (path: string) => {
|
|
412
|
+
prepare(sql: string): { run(...p: unknown[]): unknown }
|
|
413
|
+
exec(sql: string): void
|
|
414
|
+
close(): void
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
const db = new Database(dbPath)
|
|
418
|
+
db.exec(
|
|
419
|
+
`CREATE TABLE IF NOT EXISTS turns (
|
|
420
|
+
turn_key TEXT PRIMARY KEY, chat_id TEXT, thread_id TEXT,
|
|
421
|
+
started_at INTEGER, ended_at INTEGER, created_at INTEGER, updated_at INTEGER
|
|
422
|
+
)`,
|
|
423
|
+
)
|
|
424
|
+
const now = Date.now()
|
|
425
|
+
db.prepare(
|
|
426
|
+
'INSERT OR IGNORE INTO turns (turn_key, chat_id, thread_id, started_at, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?)',
|
|
427
|
+
).run(turnKey, chatId, threadId, now, now, now)
|
|
428
|
+
db.close()
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
describe('Bug 5 — parent_turn_key stamped from the turn-active marker', () => {
|
|
432
|
+
it('stamps parent_turn_key = marker.turnKey when a turn is active', () => {
|
|
433
|
+
// Supergroup forum-topic turn_key (chat:thread:startedAt).
|
|
434
|
+
const turnKey = '-1003831053471:4:1780370238492'
|
|
435
|
+
seedTurn(turnKey, '-1003831053471', '4')
|
|
436
|
+
writeTurnActiveMarker(turnKey, '-1003831053471', '4')
|
|
437
|
+
|
|
400
438
|
const event = {
|
|
401
439
|
session_id: 'sess-turnkey',
|
|
402
|
-
turn_id: 'turn-abc-001',
|
|
403
440
|
tool_name: 'Agent',
|
|
404
441
|
tool_use_id: 'toolu_turnkey001',
|
|
405
442
|
tool_input: { description: 'Task with turn context', run_in_background: false },
|
|
406
443
|
}
|
|
407
|
-
|
|
408
444
|
const result = runHook(PRETOOL_SCRIPT, event)
|
|
409
445
|
expect(result.status).toBe(0)
|
|
410
446
|
|
|
@@ -414,18 +450,43 @@ describe('Bug 5 — parent_turn_key backfilled by gateway, not the hook', () =>
|
|
|
414
450
|
| undefined
|
|
415
451
|
|
|
416
452
|
expect(row).toBeDefined()
|
|
417
|
-
|
|
453
|
+
expect(row!.parent_turn_key).toBe(turnKey)
|
|
454
|
+
})
|
|
455
|
+
|
|
456
|
+
it('downgrades to NULL when the marker names a turn_key with no turns row (phantom-marker guard)', () => {
|
|
457
|
+
// The gateway writes the marker even if recordTurnStart's INSERT failed, so
|
|
458
|
+
// a marker can point at a turn_key with no row. Stamping it would mis-route
|
|
459
|
+
// the worker card AND block the watcher backfill (NULL guard). The hook must
|
|
460
|
+
// verify the row exists and fall back to NULL.
|
|
461
|
+
seedTurn('12345:_:1780000000000') // a DIFFERENT, real turn exists…
|
|
462
|
+
writeTurnActiveMarker('12345:_:9999999999999') // …but the marker names a phantom.
|
|
463
|
+
|
|
464
|
+
const event = {
|
|
465
|
+
session_id: 'sess-phantom',
|
|
466
|
+
tool_name: 'Agent',
|
|
467
|
+
tool_use_id: 'toolu_phantom001',
|
|
468
|
+
tool_input: { description: 'Task', run_in_background: false },
|
|
469
|
+
}
|
|
470
|
+
const result = runHook(PRETOOL_SCRIPT, event)
|
|
471
|
+
expect(result.status).toBe(0)
|
|
472
|
+
|
|
473
|
+
const db = openDb()
|
|
474
|
+
const row = db.prepare('SELECT parent_turn_key FROM subagents WHERE id = ?').get('toolu_phantom001') as
|
|
475
|
+
| { parent_turn_key: string | null }
|
|
476
|
+
| undefined
|
|
477
|
+
expect(row).toBeDefined()
|
|
418
478
|
expect(row!.parent_turn_key).toBeNull()
|
|
419
479
|
})
|
|
420
480
|
|
|
421
|
-
it('
|
|
481
|
+
it('writes parent_turn_key=NULL when no turn is active (gateway backfill fallback)', () => {
|
|
482
|
+
// No marker written → no active turn → hook leaves NULL and the gateway's
|
|
483
|
+
// started_at backfill remains the fallback (today's behaviour).
|
|
422
484
|
const event = {
|
|
423
485
|
session_id: 'sess-noturnkey',
|
|
424
486
|
tool_name: 'Agent',
|
|
425
487
|
tool_use_id: 'toolu_noturn001',
|
|
426
488
|
tool_input: { description: 'Task without turn context', run_in_background: false },
|
|
427
489
|
}
|
|
428
|
-
|
|
429
490
|
runHook(PRETOOL_SCRIPT, event)
|
|
430
491
|
|
|
431
492
|
const db = openDb()
|
|
@@ -434,7 +495,47 @@ describe('Bug 5 — parent_turn_key backfilled by gateway, not the hook', () =>
|
|
|
434
495
|
| undefined
|
|
435
496
|
|
|
436
497
|
expect(row).toBeDefined()
|
|
437
|
-
|
|
498
|
+
expect(row!.parent_turn_key).toBeNull()
|
|
499
|
+
})
|
|
500
|
+
|
|
501
|
+
it('ignores event.turn_id — only the marker is authoritative', () => {
|
|
502
|
+
// A future CLI populating event.turn_id must NOT be trusted: it is Claude
|
|
503
|
+
// Code's session turn, never a gateway turns.turn_key. With no marker the
|
|
504
|
+
// result is NULL regardless of turn_id.
|
|
505
|
+
const event = {
|
|
506
|
+
session_id: 'sess-turnid-only',
|
|
507
|
+
turn_id: 'turn-abc-001',
|
|
508
|
+
tool_name: 'Agent',
|
|
509
|
+
tool_use_id: 'toolu_turnid001',
|
|
510
|
+
tool_input: { description: 'Task', run_in_background: false },
|
|
511
|
+
}
|
|
512
|
+
runHook(PRETOOL_SCRIPT, event)
|
|
513
|
+
|
|
514
|
+
const db = openDb()
|
|
515
|
+
const row = db.prepare('SELECT parent_turn_key FROM subagents WHERE id = ?').get('toolu_turnid001') as
|
|
516
|
+
| { parent_turn_key: string | null }
|
|
517
|
+
| undefined
|
|
518
|
+
|
|
519
|
+
expect(row).toBeDefined()
|
|
520
|
+
expect(row!.parent_turn_key).toBeNull()
|
|
521
|
+
})
|
|
522
|
+
|
|
523
|
+
it('a malformed marker degrades to NULL (never crashes the dispatch)', () => {
|
|
524
|
+
writeFileSync(join(agentDir, 'telegram', 'turn-active.json'), '{ not valid json')
|
|
525
|
+
const event = {
|
|
526
|
+
session_id: 'sess-badmarker',
|
|
527
|
+
tool_name: 'Agent',
|
|
528
|
+
tool_use_id: 'toolu_badmarker001',
|
|
529
|
+
tool_input: { description: 'Task', run_in_background: false },
|
|
530
|
+
}
|
|
531
|
+
const result = runHook(PRETOOL_SCRIPT, event)
|
|
532
|
+
expect(result.status).toBe(0)
|
|
533
|
+
|
|
534
|
+
const db = openDb()
|
|
535
|
+
const row = db.prepare('SELECT parent_turn_key FROM subagents WHERE id = ?').get('toolu_badmarker001') as
|
|
536
|
+
| { parent_turn_key: string | null }
|
|
537
|
+
| undefined
|
|
538
|
+
expect(row).toBeDefined()
|
|
438
539
|
expect(row!.parent_turn_key).toBeNull()
|
|
439
540
|
})
|
|
440
541
|
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest'
|
|
2
|
+
|
|
3
|
+
import {
|
|
4
|
+
ackDelivery,
|
|
5
|
+
createDeliveryQueue,
|
|
6
|
+
forgetDelivery,
|
|
7
|
+
sweep,
|
|
8
|
+
trackDelivery,
|
|
9
|
+
type DeliveryQueue,
|
|
10
|
+
} from '../gateway/inbound-delivery-confirm.js'
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Regression coverage for the marko drop-wedge.
|
|
14
|
+
*
|
|
15
|
+
* An inbound delivered to claude's TUI composer strands unsubmitted when the
|
|
16
|
+
* auto-submit races turn-completion. claude never emits `enqueue`, so the
|
|
17
|
+
* gateway used to sit "typing…" for 300s then DROP the message.
|
|
18
|
+
*
|
|
19
|
+
* The queue's contract: a delivered inbound is acked ONLY by `enqueue`; until
|
|
20
|
+
* then it is re-delivered every `timeoutMs`, forever, never dropped — and an
|
|
21
|
+
* acked delivery never re-fires (no duplicate turns).
|
|
22
|
+
*/
|
|
23
|
+
type Msg = { text: string }
|
|
24
|
+
const TIMEOUT = 15_000
|
|
25
|
+
|
|
26
|
+
function fresh(): DeliveryQueue<Msg> {
|
|
27
|
+
return createDeliveryQueue<Msg>()
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
describe('inbound-delivery-confirm (reliable deliver-until-acked queue)', () => {
|
|
31
|
+
it('an acked delivery is never re-delivered (happy path — no duplicate turns)', () => {
|
|
32
|
+
const q = fresh()
|
|
33
|
+
trackDelivery(q, 'chat:_', { text: 'hi' }, 1_000)
|
|
34
|
+
expect(ackDelivery(q, 'chat:_')).toBe(true) // enqueue arrived
|
|
35
|
+
expect(sweep(q, 1_000 + 999_999, TIMEOUT)).toHaveLength(0)
|
|
36
|
+
expect(q.pending.size).toBe(0)
|
|
37
|
+
})
|
|
38
|
+
|
|
39
|
+
it('within the timeout, an un-acked delivery is left alone (claude may still be picking it up)', () => {
|
|
40
|
+
const q = fresh()
|
|
41
|
+
trackDelivery(q, 'chat:_', { text: 'hi' }, 1_000)
|
|
42
|
+
expect(sweep(q, 1_000 + 14_999, TIMEOUT)).toHaveLength(0)
|
|
43
|
+
expect(q.pending.size).toBe(1)
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
it('a strand (no ack) is re-delivered after the timeout, and the clock resets', () => {
|
|
47
|
+
const q = fresh()
|
|
48
|
+
trackDelivery(q, 'chat:_', { text: 'draft nurture email' }, 1_000)
|
|
49
|
+
const r = sweep(q, 1_000 + 15_000, TIMEOUT)
|
|
50
|
+
expect(r).toHaveLength(1)
|
|
51
|
+
expect(r[0]!.inbound.text).toBe('draft nurture email')
|
|
52
|
+
expect(r[0]!.lastAttemptAt).toBe(1_000 + 15_000) // clock reset
|
|
53
|
+
// not re-swept until another full timeout elapses
|
|
54
|
+
expect(sweep(q, 1_000 + 15_000 + 14_999, TIMEOUT)).toHaveLength(0)
|
|
55
|
+
})
|
|
56
|
+
|
|
57
|
+
it('keeps re-delivering forever until acked — never drops (the reliability invariant)', () => {
|
|
58
|
+
const q = fresh()
|
|
59
|
+
let t = 0
|
|
60
|
+
trackDelivery(q, 'chat:_', { text: 'x' }, t)
|
|
61
|
+
for (let i = 0; i < 50; i++) {
|
|
62
|
+
t += 15_000
|
|
63
|
+
expect(sweep(q, t, TIMEOUT)).toHaveLength(1) // still trying after 50 strands
|
|
64
|
+
}
|
|
65
|
+
expect(q.pending.size).toBe(1) // never dropped
|
|
66
|
+
// claude finally picks it up → acked → stops.
|
|
67
|
+
expect(ackDelivery(q, 'chat:_')).toBe(true)
|
|
68
|
+
expect(sweep(q, t + 999_999, TIMEOUT)).toHaveLength(0)
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
it('an ack that lands right after a re-delivery stops further re-delivery (no duplicate turns)', () => {
|
|
72
|
+
const q = fresh()
|
|
73
|
+
trackDelivery(q, 'chat:_', { text: 'x' }, 0)
|
|
74
|
+
sweep(q, 15_000, TIMEOUT) // strand → re-delivered
|
|
75
|
+
expect(ackDelivery(q, 'chat:_')).toBe(true) // the re-delivered copy landed
|
|
76
|
+
expect(sweep(q, 999_999, TIMEOUT)).toHaveLength(0)
|
|
77
|
+
expect(q.pending.size).toBe(0)
|
|
78
|
+
})
|
|
79
|
+
|
|
80
|
+
it('keys are independent — a strand on one topic does not affect another (DM + supergroup topics)', () => {
|
|
81
|
+
const q = fresh()
|
|
82
|
+
trackDelivery(q, '-100:4', { text: 'crm topic msg' }, 0) // supergroup CRM topic
|
|
83
|
+
trackDelivery(q, '555:_', { text: 'dm msg' }, 0) // a DM
|
|
84
|
+
ackDelivery(q, '555:_') // the DM submits fine
|
|
85
|
+
const r = sweep(q, 15_000, TIMEOUT)
|
|
86
|
+
expect(r).toHaveLength(1)
|
|
87
|
+
expect(r[0]!.key).toBe('-100:4') // only the stranded topic re-delivers
|
|
88
|
+
})
|
|
89
|
+
|
|
90
|
+
it('tracking the same key twice keeps only the latest inbound (gate serialises per key)', () => {
|
|
91
|
+
const q = fresh()
|
|
92
|
+
trackDelivery(q, 'chat:_', { text: 'first' }, 0)
|
|
93
|
+
trackDelivery(q, 'chat:_', { text: 'second' }, 100)
|
|
94
|
+
expect(q.pending.size).toBe(1)
|
|
95
|
+
expect(sweep(q, 100 + 15_000, TIMEOUT)[0]!.inbound.text).toBe('second')
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
it('ack on an unknown key is a harmless no-op', () => {
|
|
99
|
+
expect(ackDelivery(fresh(), 'never-tracked')).toBe(false)
|
|
100
|
+
})
|
|
101
|
+
|
|
102
|
+
it('forgetDelivery clears without acking or re-delivering (bridge went offline)', () => {
|
|
103
|
+
const q = fresh()
|
|
104
|
+
trackDelivery(q, 'chat:_', { text: 'x' }, 0)
|
|
105
|
+
forgetDelivery(q, 'chat:_')
|
|
106
|
+
expect(q.pending.size).toBe(0)
|
|
107
|
+
expect(sweep(q, 999_999, TIMEOUT)).toHaveLength(0)
|
|
108
|
+
})
|
|
109
|
+
})
|
|
@@ -344,6 +344,117 @@ describe('subagent-tracker-posttool', () => {
|
|
|
344
344
|
expect(row?.background).toBe(0)
|
|
345
345
|
expect(row?.status).toBe('completed')
|
|
346
346
|
})
|
|
347
|
+
|
|
348
|
+
// ─── async-launch ACK contract — drift tolerance (#2084) ────────────────────
|
|
349
|
+
// Tier 3 of isAsyncLaunchAck keys on the functional `agentId: <stem>` token
|
|
350
|
+
// (the most wording-stable part of the ACK) so promotion survives a
|
|
351
|
+
// claude-code bump that rewords BOTH the launch verb AND the "working in the
|
|
352
|
+
// background" phrase. The context-word requirement keeps it from tripping on
|
|
353
|
+
// a foreground report that merely mentions an agentId.
|
|
354
|
+
it('promotes on reworded ACK prose when the agentId token + a context word survive', () => {
|
|
355
|
+
runHook(PRETOOL_SCRIPT, {
|
|
356
|
+
session_id: 's-drift',
|
|
357
|
+
tool_name: 'Agent',
|
|
358
|
+
tool_use_id: 'toolu_drift1',
|
|
359
|
+
tool_input: { subagent_type: 'worker', description: 'Drifted ACK' },
|
|
360
|
+
})
|
|
361
|
+
// Neither "async agent launched" nor "working in the background" — a
|
|
362
|
+
// hypothetical reworded ACK — but the agentId token + "background" remain.
|
|
363
|
+
const postResult = runHook(POSTTOOL_SCRIPT, {
|
|
364
|
+
tool_name: 'Agent',
|
|
365
|
+
tool_use_id: 'toolu_drift1',
|
|
366
|
+
tool_response: {
|
|
367
|
+
content: [{ type: 'text', text: 'Background worker started.\nagentId: drift-7f3a91\nYou will be notified when it finishes.' }],
|
|
368
|
+
},
|
|
369
|
+
})
|
|
370
|
+
expect(postResult.status).toBe(0)
|
|
371
|
+
expect(postResult.stdout).not.toContain('additionalContext')
|
|
372
|
+
|
|
373
|
+
const db = openDb()
|
|
374
|
+
const row = db.prepare('SELECT background, status, ended_at FROM subagents WHERE id = ?').get('toolu_drift1') as
|
|
375
|
+
| { background: number; status: string; ended_at: number | null }
|
|
376
|
+
| undefined
|
|
377
|
+
expect(row?.background).toBe(1)
|
|
378
|
+
expect(row?.status).toBe('running')
|
|
379
|
+
expect(row?.ended_at == null).toBe(true)
|
|
380
|
+
})
|
|
381
|
+
|
|
382
|
+
it('does NOT promote when an agentId token appears without any launch/background context word', () => {
|
|
383
|
+
// A genuine foreground report mentioning an agentId in passing — no
|
|
384
|
+
// launch/background/async/dispatch word — must terminalize, not promote.
|
|
385
|
+
runHook(PRETOOL_SCRIPT, {
|
|
386
|
+
session_id: 's-falsepos',
|
|
387
|
+
tool_name: 'Agent',
|
|
388
|
+
tool_use_id: 'toolu_falsepos1',
|
|
389
|
+
tool_input: { subagent_type: 'worker', description: 'Foreground lookup', run_in_background: false },
|
|
390
|
+
})
|
|
391
|
+
const postResult = runHook(POSTTOOL_SCRIPT, {
|
|
392
|
+
tool_name: 'Agent',
|
|
393
|
+
tool_use_id: 'toolu_falsepos1',
|
|
394
|
+
tool_response: { result: 'Done. Verified the record; agentId: svc-42 is valid and active.', is_error: false },
|
|
395
|
+
})
|
|
396
|
+
expect(postResult.status).toBe(0)
|
|
397
|
+
expect(postResult.stdout).toContain('additionalContext')
|
|
398
|
+
|
|
399
|
+
const db = openDb()
|
|
400
|
+
const row = db.prepare('SELECT background, status FROM subagents WHERE id = ?').get('toolu_falsepos1') as
|
|
401
|
+
| { background: number; status: string }
|
|
402
|
+
| undefined
|
|
403
|
+
expect(row?.background).toBe(0)
|
|
404
|
+
expect(row?.status).toBe('completed')
|
|
405
|
+
})
|
|
406
|
+
|
|
407
|
+
it('does NOT promote a foreground orchestrator narrative that embeds an agentId mid-sentence', () => {
|
|
408
|
+
// The #2085-review false-positive: a coordinator agent reporting on other
|
|
409
|
+
// agents. The agentId is mid-sentence (not on its own line), so tier 3's
|
|
410
|
+
// own-line anchor rejects it even though "background"/"dispatching" appear.
|
|
411
|
+
runHook(PRETOOL_SCRIPT, {
|
|
412
|
+
session_id: 's-orch',
|
|
413
|
+
tool_name: 'Agent',
|
|
414
|
+
tool_use_id: 'toolu_orch1',
|
|
415
|
+
tool_input: { subagent_type: 'worker', description: 'Coordinator', run_in_background: false },
|
|
416
|
+
})
|
|
417
|
+
const postResult = runHook(POSTTOOL_SCRIPT, {
|
|
418
|
+
tool_name: 'Agent',
|
|
419
|
+
tool_use_id: 'toolu_orch1',
|
|
420
|
+
tool_response: { result: 'agentId: coord-x is managing background work and dispatching checks.', is_error: false },
|
|
421
|
+
})
|
|
422
|
+
expect(postResult.status).toBe(0)
|
|
423
|
+
expect(postResult.stdout).toContain('additionalContext')
|
|
424
|
+
|
|
425
|
+
const db = openDb()
|
|
426
|
+
const row = db.prepare('SELECT background, status FROM subagents WHERE id = ?').get('toolu_orch1') as
|
|
427
|
+
| { background: number; status: string }
|
|
428
|
+
| undefined
|
|
429
|
+
expect(row?.background).toBe(0)
|
|
430
|
+
expect(row?.status).toBe('completed')
|
|
431
|
+
})
|
|
432
|
+
|
|
433
|
+
it('does NOT promote an own-line agentId with no launch/background context word (boundary)', () => {
|
|
434
|
+
// Documents the accepted tier-3 boundary: an own-line bare agentId alone
|
|
435
|
+
// (no background/launch/dispatch/async/notify word) is not enough to
|
|
436
|
+
// promote — guards a foreground report that prints an id on its own line.
|
|
437
|
+
runHook(PRETOOL_SCRIPT, {
|
|
438
|
+
session_id: 's-bound',
|
|
439
|
+
tool_name: 'Agent',
|
|
440
|
+
tool_use_id: 'toolu_bound1',
|
|
441
|
+
tool_input: { subagent_type: 'worker', description: 'Infra', run_in_background: false },
|
|
442
|
+
})
|
|
443
|
+
const postResult = runHook(POSTTOOL_SCRIPT, {
|
|
444
|
+
tool_name: 'Agent',
|
|
445
|
+
tool_use_id: 'toolu_bound1',
|
|
446
|
+
tool_response: { content: [{ type: 'text', text: 'Created the worker.\nagentId: svc-99\nIt is ready.' }] },
|
|
447
|
+
})
|
|
448
|
+
expect(postResult.status).toBe(0)
|
|
449
|
+
expect(postResult.stdout).toContain('additionalContext')
|
|
450
|
+
|
|
451
|
+
const db = openDb()
|
|
452
|
+
const row = db.prepare('SELECT background, status FROM subagents WHERE id = ?').get('toolu_bound1') as
|
|
453
|
+
| { background: number; status: string }
|
|
454
|
+
| undefined
|
|
455
|
+
expect(row?.background).toBe(0)
|
|
456
|
+
expect(row?.status).toBe('completed')
|
|
457
|
+
})
|
|
347
458
|
})
|
|
348
459
|
|
|
349
460
|
describe('agent-dir resolution (RFC §Bug 2)', () => {
|
|
@@ -153,3 +153,53 @@ describe('backfillJsonlAgentId — parent_turn_key resolution', () => {
|
|
|
153
153
|
expect(row?.parent_turn_key == null).toBe(true)
|
|
154
154
|
})
|
|
155
155
|
})
|
|
156
|
+
|
|
157
|
+
// ─── #2081: overlapping windows + hook-stamped value precedence ───────────────
|
|
158
|
+
// The backfill is now only a FALLBACK — the PreToolUse hook stamps
|
|
159
|
+
// parent_turn_key from the live turn-active marker at dispatch
|
|
160
|
+
// (subagent-tracker-pretool.mjs readActiveTurnKey). These tests pin the two
|
|
161
|
+
// guarantees that make the hook fix correct end-to-end.
|
|
162
|
+
describe('backfillJsonlAgentId — overlapping windows / hook precedence (#2081)', () => {
|
|
163
|
+
it('does NOT overwrite a hook-stamped parent_turn_key, even when overlapping windows would resolve differently', () => {
|
|
164
|
+
// Supergroup: two forum topics under one chat with OVERLAPPING windows.
|
|
165
|
+
// Topic A (thread 4) started first and is still open; topic B (thread 7)
|
|
166
|
+
// started later. A sub-agent dispatched at 1500 falls inside BOTH windows.
|
|
167
|
+
insertTurn({ turnKey: '-100:4:1000', chatId: '-100', threadId: '4', startedAt: 1000, endedAt: null })
|
|
168
|
+
insertTurn({ turnKey: '-100:7:1400', chatId: '-100', threadId: '7', startedAt: 1400, endedAt: null })
|
|
169
|
+
|
|
170
|
+
// The hook already stamped the CORRECT parent (topic A) from the marker.
|
|
171
|
+
insertSub({
|
|
172
|
+
id: 'toolu_overlap',
|
|
173
|
+
agentType: 'worker',
|
|
174
|
+
description: 'Topic-A worker',
|
|
175
|
+
startedAt: 1500,
|
|
176
|
+
parentTurnKey: '-100:4:1000',
|
|
177
|
+
})
|
|
178
|
+
|
|
179
|
+
const jsonlPath = writeMeta('worker', 'Topic-A worker')
|
|
180
|
+
backfillJsonlAgentId(db, jsonlPath, 'agentstem_overlap')
|
|
181
|
+
|
|
182
|
+
const row = readSub('toolu_overlap')
|
|
183
|
+
expect(row?.jsonl_agent_id).toBe('agentstem_overlap')
|
|
184
|
+
// The IS NULL guard means the hook's correct value survives — NOT the
|
|
185
|
+
// window query's ORDER BY started_at DESC pick (which would be topic B).
|
|
186
|
+
expect(row?.parent_turn_key).toBe('-100:4:1000')
|
|
187
|
+
})
|
|
188
|
+
|
|
189
|
+
it('fallback window-match (NULL parent) picks the latest-started overlapping turn — the documented fallback limitation', () => {
|
|
190
|
+
// When the hook left parent_turn_key NULL (no active marker at dispatch),
|
|
191
|
+
// the backfill falls back to the started_at window match. With overlapping
|
|
192
|
+
// windows it resolves to the latest-started containing turn. This is a
|
|
193
|
+
// best-effort fallback for the no-marker case — the hook path above is the
|
|
194
|
+
// correct primary. Pinned here so the fallback behaviour is explicit.
|
|
195
|
+
insertTurn({ turnKey: '-100:4:1000', chatId: '-100', threadId: '4', startedAt: 1000, endedAt: null })
|
|
196
|
+
insertTurn({ turnKey: '-100:7:1400', chatId: '-100', threadId: '7', startedAt: 1400, endedAt: null })
|
|
197
|
+
insertSub({ id: 'toolu_fallback', agentType: 'worker', description: 'No-marker worker', startedAt: 1500 })
|
|
198
|
+
|
|
199
|
+
const jsonlPath = writeMeta('worker', 'No-marker worker')
|
|
200
|
+
backfillJsonlAgentId(db, jsonlPath, 'agentstem_fallback')
|
|
201
|
+
|
|
202
|
+
const row = readSub('toolu_fallback')
|
|
203
|
+
expect(row?.parent_turn_key).toBe('-100:7:1400')
|
|
204
|
+
})
|
|
205
|
+
})
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* E2E regression — inbound is NEVER dropped under rapid fire (the drop-wedge).
|
|
3
|
+
*
|
|
4
|
+
* The bug: a Telegram inbound reaches claude as an MCP channel notification
|
|
5
|
+
* the unmodified CLI appends to its composer and auto-submits only when the
|
|
6
|
+
* composer is empty + idle. A message arriving the instant the prior turn
|
|
7
|
+
* completes races that auto-submit and strands unsubmitted — claude never
|
|
8
|
+
* starts the turn, the gateway sits "typing…", and the 300s silence-poke
|
|
9
|
+
* DROPS the message. Observed recurring on `marko` (supergroup topics + DMs).
|
|
10
|
+
*
|
|
11
|
+
* This scenario drives the exact failure timing: it fires each message the
|
|
12
|
+
* instant the prior reply lands (i.e. right at turn-completion, the strand
|
|
13
|
+
* window) and asserts EVERY message gets its own unique token back. With the
|
|
14
|
+
* deliver-until-acked queue (inbound-delivery-confirm.ts) a strand self-heals
|
|
15
|
+
* via re-delivery; without it, a stranded message yields NO reply within the
|
|
16
|
+
* timeout and this test fails on exactly the message that was swallowed.
|
|
17
|
+
*
|
|
18
|
+
* Each message carries a random token and the assertion matches THAT token,
|
|
19
|
+
* so a reply to message N-1 can never be mistaken for message N's reply — the
|
|
20
|
+
* test proves every distinct message was actually processed, not merely that
|
|
21
|
+
* "some replies came back".
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
import { describe, it, expect } from "vitest";
|
|
25
|
+
import { spinUp } from "../harness.js";
|
|
26
|
+
|
|
27
|
+
describe("uat: rapid-fire inbound — no message is ever dropped (drop-wedge)", () => {
|
|
28
|
+
it(
|
|
29
|
+
"every back-to-back message gets its own reply (delivery never strands)",
|
|
30
|
+
async () => {
|
|
31
|
+
const sc = await spinUp({ agent: "test-harness" });
|
|
32
|
+
try {
|
|
33
|
+
const N = 8;
|
|
34
|
+
const dropped: number[] = [];
|
|
35
|
+
for (let i = 1; i <= N; i++) {
|
|
36
|
+
const token = `acktok-${i}-${Math.random().toString(36).slice(2, 8)}`;
|
|
37
|
+
await sc.sendDM(
|
|
38
|
+
`Reply with exactly this token and nothing else: ${token}`,
|
|
39
|
+
);
|
|
40
|
+
try {
|
|
41
|
+
const reply = await sc.expectMessage((m) => m.text.includes(token), {
|
|
42
|
+
from: "bot",
|
|
43
|
+
timeout: 75_000,
|
|
44
|
+
});
|
|
45
|
+
expect(reply.text).toContain(token);
|
|
46
|
+
} catch {
|
|
47
|
+
// The message stranded — no reply carrying its token arrived.
|
|
48
|
+
dropped.push(i);
|
|
49
|
+
}
|
|
50
|
+
// Deliberately NO delay: fire the next message the instant this
|
|
51
|
+
// reply lands, so it arrives in the turn-completion strand window.
|
|
52
|
+
}
|
|
53
|
+
expect(
|
|
54
|
+
dropped,
|
|
55
|
+
`messages dropped (no reply within timeout): ${dropped.join(", ")} of ${N}`,
|
|
56
|
+
).toEqual([]);
|
|
57
|
+
} finally {
|
|
58
|
+
await sc.tearDown();
|
|
59
|
+
}
|
|
60
|
+
},
|
|
61
|
+
// N messages × (turn + up to one ~15-20s strand recovery) — generous.
|
|
62
|
+
900_000,
|
|
63
|
+
);
|
|
64
|
+
});
|