switchroom 0.15.45 → 0.16.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/dist/agent-scheduler/index.js +56 -15
  2. package/dist/auth-broker/index.js +383 -97
  3. package/dist/cli/autoaccept-poll.js +4842 -35
  4. package/dist/cli/drive-write-pretool.mjs +7 -4
  5. package/dist/cli/notion-write-pretool.mjs +35 -4
  6. package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
  7. package/dist/cli/self-improve-stop.mjs +428 -0
  8. package/dist/cli/switchroom.js +2894 -841
  9. package/dist/host-control/main.js +2685 -207
  10. package/dist/vault/approvals/kernel-server.js +7453 -7413
  11. package/dist/vault/broker/server.js +11428 -11388
  12. package/examples/minimal.yaml +1 -0
  13. package/examples/switchroom.yaml +1 -0
  14. package/package.json +3 -3
  15. package/profiles/_base/start.sh.hbs +97 -1
  16. package/profiles/_shared/execution-discipline.md.hbs +18 -0
  17. package/profiles/default/CLAUDE.md.hbs +0 -19
  18. package/telegram-plugin/.claude-plugin/plugin.json +2 -2
  19. package/telegram-plugin/answer-stream-flag.ts +12 -49
  20. package/telegram-plugin/answer-stream.ts +5 -150
  21. package/telegram-plugin/auth-snapshot-format.ts +280 -48
  22. package/telegram-plugin/auto-fallback-fleet.ts +44 -1
  23. package/telegram-plugin/context-exhaustion.ts +12 -0
  24. package/telegram-plugin/demo-mask.ts +154 -0
  25. package/telegram-plugin/dist/bridge/bridge.js +55 -12
  26. package/telegram-plugin/dist/gateway/gateway.js +2938 -977
  27. package/telegram-plugin/dist/server.js +55 -12
  28. package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
  29. package/telegram-plugin/draft-stream.ts +47 -410
  30. package/telegram-plugin/final-answer-detect.ts +17 -12
  31. package/telegram-plugin/fleet-fallback-resume.ts +131 -0
  32. package/telegram-plugin/format.ts +56 -19
  33. package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
  34. package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
  35. package/telegram-plugin/gateway/auth-command.ts +70 -14
  36. package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
  37. package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
  38. package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
  39. package/telegram-plugin/gateway/current-turn-map.ts +188 -0
  40. package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
  41. package/telegram-plugin/gateway/effort-command.ts +8 -3
  42. package/telegram-plugin/gateway/emission-authority.ts +369 -0
  43. package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
  44. package/telegram-plugin/gateway/gateway.ts +1857 -292
  45. package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
  46. package/telegram-plugin/gateway/model-command.ts +115 -4
  47. package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
  48. package/telegram-plugin/gateway/represent-guard.ts +72 -0
  49. package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
  50. package/telegram-plugin/gateway/status-surface-log.ts +14 -3
  51. package/telegram-plugin/history.ts +33 -11
  52. package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
  53. package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
  54. package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
  55. package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
  56. package/telegram-plugin/issues-card.ts +4 -0
  57. package/telegram-plugin/model-unavailable.ts +124 -0
  58. package/telegram-plugin/narrative-dedup.ts +69 -0
  59. package/telegram-plugin/over-ping-safety-net.ts +70 -4
  60. package/telegram-plugin/package.json +3 -3
  61. package/telegram-plugin/pending-work-progress.ts +12 -0
  62. package/telegram-plugin/permission-rule.ts +32 -5
  63. package/telegram-plugin/permission-title.ts +152 -9
  64. package/telegram-plugin/quota-check.ts +13 -0
  65. package/telegram-plugin/quota-watch.ts +135 -7
  66. package/telegram-plugin/registry/turns-schema.test.ts +24 -0
  67. package/telegram-plugin/registry/turns-schema.ts +9 -0
  68. package/telegram-plugin/runtime-metrics.ts +13 -0
  69. package/telegram-plugin/session-tail.ts +96 -11
  70. package/telegram-plugin/silence-poke.ts +170 -24
  71. package/telegram-plugin/slot-banner-driver.ts +3 -0
  72. package/telegram-plugin/status-no-truncate.ts +44 -0
  73. package/telegram-plugin/status-reactions.ts +20 -3
  74. package/telegram-plugin/stream-controller.ts +4 -23
  75. package/telegram-plugin/stream-reply-handler.ts +6 -24
  76. package/telegram-plugin/streaming-metrics.ts +91 -0
  77. package/telegram-plugin/subagent-watcher.ts +212 -66
  78. package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
  79. package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
  80. package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
  81. package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
  82. package/telegram-plugin/tests/answer-stream.test.ts +2 -411
  83. package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
  84. package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
  85. package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
  86. package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
  87. package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
  88. package/telegram-plugin/tests/demo-mask.test.ts +127 -0
  89. package/telegram-plugin/tests/draft-stream.test.ts +0 -827
  90. package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
  91. package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
  92. package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
  93. package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
  94. package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
  95. package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
  96. package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
  97. package/telegram-plugin/tests/feed-survival.test.ts +526 -0
  98. package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
  99. package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
  100. package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
  101. package/telegram-plugin/tests/history.test.ts +60 -0
  102. package/telegram-plugin/tests/model-command.test.ts +134 -0
  103. package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
  104. package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
  105. package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
  106. package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
  107. package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
  108. package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
  109. package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
  110. package/telegram-plugin/tests/permission-rule.test.ts +17 -0
  111. package/telegram-plugin/tests/permission-title.test.ts +206 -17
  112. package/telegram-plugin/tests/quota-watch.test.ts +252 -9
  113. package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
  114. package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
  115. package/telegram-plugin/tests/represent-guard.test.ts +162 -0
  116. package/telegram-plugin/tests/session-tail.test.ts +147 -3
  117. package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
  118. package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
  119. package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
  120. package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
  121. package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
  122. package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
  123. package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
  124. package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
  125. package/telegram-plugin/tests/telegram-format.test.ts +101 -6
  126. package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
  127. package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
  128. package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
  129. package/telegram-plugin/tests/tool-labels.test.ts +67 -0
  130. package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
  131. package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
  132. package/telegram-plugin/tests/welcome-text.test.ts +32 -3
  133. package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
  134. package/telegram-plugin/tool-activity-summary.ts +375 -58
  135. package/telegram-plugin/turn-liveness-floor.ts +240 -0
  136. package/telegram-plugin/uat/assertions.ts +115 -0
  137. package/telegram-plugin/uat/driver.ts +68 -0
  138. package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
  139. package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
  140. package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
  141. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
  142. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
  143. package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
  144. package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
  145. package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
  146. package/telegram-plugin/welcome-text.ts +13 -1
  147. package/telegram-plugin/worker-activity-feed.ts +157 -82
  148. package/telegram-plugin/draft-transport.ts +0 -122
  149. package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
  150. package/telegram-plugin/tests/draft-transport.test.ts +0 -211
@@ -30,6 +30,13 @@
30
30
  * IPC call (cheap). `probeQuota` is only called on state-change (when
31
31
  * we're going to send a message anyway) to get fresh numbers for the
32
32
  * notification body. On no-change polls, only `listState` is called.
33
+ *
34
+ * #2495 Change 3 — the transition-to-alarm probe is `forceLive` (bypasses
35
+ * the broker's probe-on-open TTL), so the DECISION to alarm is corroborated
36
+ * by a TRUE live probe of the affected account, not a possibly-stale cache
37
+ * read. The re-evaluation with fresh numbers can suppress an alarm whose
38
+ * stale-snapshot transition no longer holds. Steady state stays cheap: a
39
+ * no-change poll never probes. Cost is one live probe per transition edge.
33
40
  */
34
41
 
35
42
  import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs";
@@ -175,6 +182,51 @@ export type QuotaWatchDecision =
175
182
  }
176
183
  | { kind: "skip"; accountLabel: string; reason: string };
177
184
 
185
+ /**
186
+ * #2495 BLOCKER fix — the corroboration probe result, as the gateway's
187
+ * runQuotaWatch sees it from `brokerClient.probeQuota(..., forceLive=true)`.
188
+ * Structurally a subset of `ProbeQuotaEntry` (src/auth/broker/client.ts): a
189
+ * `result` discriminated on `ok`, plus a `served` tag the broker stamps to
190
+ * say HOW the result was sourced.
191
+ *
192
+ * The trap this guards: under `forceLive`, when the upstream live probe FAILS
193
+ * and the broker holds a prior snapshot, it returns `cachedSnapshotToResult`
194
+ * — `result.ok === true` but `served === "cache"` (server.ts opProbeQuota).
195
+ * A naive `result.ok` check then treats that stale cache read as a live
196
+ * corroboration, fires the alarm, and stamps the false "Live-probe
197
+ * corroborated (#2495)" footnote. The acceptance criterion is the opposite:
198
+ * an alarm must be backed by a LIVE probe, not a stale cache read.
199
+ */
200
+ export type CorroborationProbe = {
201
+ result: { ok: true } | { ok: false };
202
+ /**
203
+ * How the result was sourced. `"live"` = fresh upstream probe (genuine
204
+ * corroboration). `"cache"` = served from the durable cache (TTL-hit or
205
+ * probe-failure fallback) — NOT corroboration. Absent on legacy responses,
206
+ * which we treat as NOT corroborated (fail-closed: never claim a live
207
+ * corroboration we can't prove).
208
+ */
209
+ served?: "live" | "cache";
210
+ };
211
+
212
+ /**
213
+ * #2495 BLOCKER fix — decide whether a forceLive corroboration probe counts
214
+ * as a genuine LIVE corroboration of the alarm.
215
+ *
216
+ * Genuine corroboration requires BOTH `result.ok` AND `served === "live"`.
217
+ * A result that is `ok:true` but `served:"cache"` (the failed-probe
218
+ * cache-fallback) is treated EXACTLY like a probe failure: it is NOT
219
+ * corroboration, so the caller must DEFER — leave watch state untouched and
220
+ * re-evaluate next tick when a true live probe can be obtained. A missing
221
+ * entry (`undefined`) is likewise not corroboration.
222
+ *
223
+ * Pure + total so it can be unit-tested at the seam without standing up the
224
+ * broker or the gateway loop.
225
+ */
226
+ export function isLiveCorroboration(entry: CorroborationProbe | undefined): boolean {
227
+ return entry?.result.ok === true && entry.served === "live";
228
+ }
229
+
178
230
  /**
179
231
  * Evaluate one account's quota state against its last-notified health.
180
232
  *
@@ -224,7 +276,11 @@ export function evaluateQuotaWatchAccount(args: {
224
276
  return { kind: "skip", accountLabel: label, reason: "stale-snapshot" };
225
277
  }
226
278
 
227
- const currentHealth = classifyHealth(snap);
279
+ // #2494 Bug A — classify against THIS tick's clock so the refill
280
+ // normalization uses the same `now` the rest of the decision does (the
281
+ // default `new Date()` would diverge from a frozen test clock / a replayed
282
+ // tick and mis-zero a still-future reset window).
283
+ const currentHealth = classifyHealth(snap, new Date(now));
228
284
 
229
285
  // Unknown (probe failed) or blocked — skip entirely.
230
286
  if (currentHealth === "unknown" || currentHealth === "blocked") {
@@ -324,22 +380,58 @@ export type FleetAllExhaustedDecision =
324
380
  * cases the trigger-based interactive all-blocked card misses: a quiet period
325
381
  * (no agent happens to 429 into the wall) and the consumer/cron paths.
326
382
  *
327
- * Authoritative source: the broker's per-account `exhausted` flag (set by
328
- * mark-exhausted via failover + the consumer sensor), NOT probe-derived health
329
- * — so there is no probe-failure false-alarm. Requires at least one account;
330
- * an empty fleet never alerts.
383
+ * Source: the broker's per-account `exhausted` flag (set by mark-exhausted via
384
+ * failover + the consumer sensor). That flag is NOT purely live — `isAccountBlocked`
385
+ * (src/auth/broker/account-eligibility.ts) falls back to the persisted
386
+ * `exhausted_until` mark whenever there is no fresh live snapshot. During a
387
+ * broker-unreachable / probe-timeout blackout, short-lived auto-fallback marks
388
+ * can make `every(a.exhausted)` momentarily true with ZERO live corroboration
389
+ * (#2478, klanker 2026-06-20). So the `entered` alert requires POSITIVE LIVE
390
+ * CORROBORATION: an account counts toward "all exhausted" only when its
391
+ * `exhausted` flag is backed by a FRESH live snapshot (last_quota.capturedAt
392
+ * within `maxStaleMs`). If ANY account's exhaustion rests solely on a
393
+ * stale/absent-probe mark we are
394
+ * probe-blind and return `skip: "probe-blind"` — no false fleet alert. The
395
+ * guarantee is "no false alarm off stale marks during a probe blackout", NOT
396
+ * blanket probe-failure immunity. The `recovered` transition is unguarded so a
397
+ * legitimately-fired alert is never stranded. Requires at least one account; an
398
+ * empty fleet never alerts.
331
399
  */
332
400
  export function evaluateFleetAllExhausted(args: {
333
- accounts: Array<{ label: string; exhausted: boolean; exhausted_until?: number }>;
401
+ accounts: Array<{
402
+ label: string;
403
+ exhausted: boolean;
404
+ exhausted_until?: number;
405
+ /** Most-recent live probe snapshot, used to corroborate `exhausted`. */
406
+ last_quota?: {
407
+ capturedAt: number;
408
+ overageDisabledReason?: string | null;
409
+ } | null;
410
+ }>;
334
411
  prev: QuotaWatchAccountState;
335
412
  now: number;
413
+ /** Staleness ceiling for "fresh probe"; 0 disables the gate (legacy callers/tests). */
414
+ tuning?: Pick<QuotaWatchTuning, "maxStaleMs">;
336
415
  }): FleetAllExhaustedDecision {
337
416
  const { accounts, prev, now } = args;
417
+ const maxStaleMs = args.tuning?.maxStaleMs ?? 0;
338
418
  const allExhausted = accounts.length > 0 && accounts.every((a) => a.exhausted);
339
419
  // "throttling" doubles as the "currently alerting all-exhausted" marker.
340
420
  const wasAlerting = prev.lastNotifiedHealth === "throttling";
341
421
 
342
422
  if (allExhausted && !wasAlerting) {
423
+ // Probe-blind guard (#2478): only fire `entered` if EVERY account's
424
+ // exhaustion is backed by live evidence — a fresh snapshot. An account
425
+ // exhausted solely on a stale/absent mark means we have no live
426
+ // corroboration → skip rather than false-alarm.
427
+ if (maxStaleMs > 0) {
428
+ const allLiveCorroborated = accounts.every((a) =>
429
+ exhaustionLiveCorroborated(a, now, maxStaleMs),
430
+ );
431
+ if (!allLiveCorroborated) {
432
+ return { kind: "skip", reason: "probe-blind" };
433
+ }
434
+ }
343
435
  return {
344
436
  kind: "notify",
345
437
  message: buildAllExhaustedMessage(accounts, now),
@@ -358,6 +450,42 @@ export function evaluateFleetAllExhausted(args: {
358
450
  return { kind: "skip", reason: allExhausted ? "still-all-exhausted" : "not-all-exhausted" };
359
451
  }
360
452
 
453
+ /**
454
+ * Is an account's `exhausted` flag backed by live evidence (#2478)?
455
+ *
456
+ * True when the most-recent live probe is FRESH (`capturedAt` within
457
+ * `maxStaleMs`) — that fresh probe is what set/upholds the broker's blocked
458
+ * verdict. False when there is no `last_quota` at all, or the snapshot is
459
+ * stale: the `exhausted` flag then rests solely on a persisted mark with no
460
+ * live backing, which is exactly the probe-blind condition that false-fires
461
+ * the fleet alert.
462
+ *
463
+ * NOTE: `out_of_credits` is NOT treated as corroboration here. Per
464
+ * fix/out-of-credits-serve-block, out_of_credits is INFORMATIONAL — it is
465
+ * not exhaustion in its own right at any util. Corroboration requires a
466
+ * genuinely fresh quota snapshot (real 429 / util-wall path).
467
+ *
468
+ * Mirrors `snapshotFresh` in src/auth/broker/account-eligibility.ts (the
469
+ * serving-side authority); kept as a local check so the decision layer
470
+ * carries no broker dependency.
471
+ */
472
+ function exhaustionLiveCorroborated(
473
+ account: {
474
+ last_quota?: { capturedAt: number; overageDisabledReason?: string | null } | null;
475
+ },
476
+ now: number,
477
+ maxStaleMs: number,
478
+ ): boolean {
479
+ const lq = account.last_quota;
480
+ if (!lq) return false;
481
+ // Mirror `snapshotFresh`'s clock-skew guard: a future-dated `capturedAt`
482
+ // makes `now - capturedAt` negative and would slip past the staleness gate,
483
+ // so a skewed snapshot reads as fresh. Reject snapshots dated more than the
484
+ // broker's 60_000 ms tolerance ahead of `now` (matches the inline literal in
485
+ // `snapshotFresh`, src/auth/broker/account-eligibility.ts).
486
+ return now - lq.capturedAt <= maxStaleMs && lq.capturedAt <= now + 60_000;
487
+ }
488
+
361
489
  function buildAllExhaustedMessage(
362
490
  accounts: Array<{ label: string; exhausted_until?: number }>,
363
491
  now: number,
@@ -420,7 +548,7 @@ function buildThrottlingMessage(agentName: string, snap: AccountSnapshot): strin
420
548
  `Binding window: ${winLabel}${resetStr}`,
421
549
  `${activeNote}${altNote}`,
422
550
  ``,
423
- `<i>Threshold: ${THROTTLING_THRESHOLD_PCT}% on either window. Source: broker quota cache.</i>`,
551
+ `<i>Threshold: ${THROTTLING_THRESHOLD_PCT}% on either window. Live-probe corroborated (#2495).</i>`,
424
552
  `<i>Run /auth for full fleet status or /usage for the active account.</i>`,
425
553
  ]
426
554
  .join("\n")
@@ -23,6 +23,30 @@ import {
23
23
  getTurnByKey,
24
24
  } from './turns-schema.js'
25
25
 
26
+ // ---------------------------------------------------------------------------
27
+ // Concurrency PRAGMAs — applySchema must arm busy_timeout so concurrent
28
+ // writers (the subagent-tracker hooks + the gateway watcher) wait-and-retry
29
+ // instead of failing with SQLITE_BUSY ("database is locked").
30
+ // ---------------------------------------------------------------------------
31
+
32
+ describe('registry concurrency PRAGMAs', () => {
33
+ it('arms busy_timeout (5000ms) on every opened connection', () => {
34
+ const db = openTurnsDbInMemory()
35
+ const row = db.prepare('PRAGMA busy_timeout').get() as { timeout: number }
36
+ expect(row.timeout).toBe(5000)
37
+ db.close()
38
+ })
39
+
40
+ it('uses WAL journal mode for concurrent readers', () => {
41
+ const db = openTurnsDbInMemory()
42
+ const row = db.prepare('PRAGMA journal_mode').get() as { journal_mode: string }
43
+ // `:memory:` reports 'memory'; a file DB reports 'wal'. Either way the
44
+ // exec ran without error — the file-path open (openTurnsDb) yields 'wal'.
45
+ expect(['wal', 'memory']).toContain(String(row.journal_mode).toLowerCase())
46
+ db.close()
47
+ })
48
+ })
49
+
26
50
  // ---------------------------------------------------------------------------
27
51
  // Test 1 — empty DB
28
52
  // ---------------------------------------------------------------------------
@@ -172,6 +172,15 @@ const PHASE2_MIGRATIONS = [
172
172
  function applySchema(db: SqliteDatabase): void {
173
173
  db.exec('PRAGMA journal_mode = WAL')
174
174
  db.exec('PRAGMA synchronous = NORMAL')
175
+ // Concurrency: multiple writers contend on this registry (the PreToolUse
176
+ // subagent-tracker hook, the gateway's subagent-watcher backfill, the turns
177
+ // writer) — especially when several sub-agents dispatch at once. Without a
178
+ // busy_timeout, bun:sqlite/better-sqlite3 default to 0ms and the second
179
+ // contending write fails IMMEDIATELY with SQLITE_BUSY ("database is locked"),
180
+ // which the watcher swallows → jsonl_agent_id / parent_turn_key left NULL →
181
+ // worker card mis-routes to the operator DM + false silent-stall synthesis.
182
+ // 5s of wait-and-retry serializes the contenders instead of dropping writes.
183
+ db.exec('PRAGMA busy_timeout = 5000')
175
184
  db.exec(SCHEMA_SQL)
176
185
  // Run migrations. SQLite doesn't support "ADD COLUMN IF NOT EXISTS", so
177
186
  // we swallow the "duplicate column" error to stay idempotent on
@@ -77,6 +77,19 @@ export type RuntimeMetricEvent =
77
77
  fallback_kind: 'working' | 'thinking'
78
78
  silence_ms: number
79
79
  }
80
+ /**
81
+ * #2527 — mid-turn liveness floor decision. `decision: 'fire'` when the
82
+ * quiet "still on it" beat was sent; otherwise the machine-readable skip
83
+ * reason for a declined forced ("Status?") poke. `forced` distinguishes
84
+ * the timer beat from a user-asked one.
85
+ */
86
+ | {
87
+ kind: 'mid_turn_floor'
88
+ key: string
89
+ silence_ms: number
90
+ forced: boolean
91
+ decision: string
92
+ }
80
93
  /**
81
94
  * #1445 cross-turn pending-async ambient lifecycle. `started` fires
82
95
  * when a turn ends with a captured anchor AND a pending Agent/Task/
@@ -98,7 +98,17 @@ export type SessionEvent =
98
98
  // the lazily-flushed transcript. The draft-mirror drives off THIS, not
99
99
  // the flush-gated `tool_use`, so activity streams deterministically.
100
100
  | { kind: 'tool_label'; toolUseId: string; label: string; toolName: string }
101
- | { kind: 'text'; text: string }
101
+ // `blockIndex` = index of this text block in the assistant message's
102
+ // content[] — load-bearing: it keys the returned Map so callers emit
103
+ // events in source order. `lastInMessage` = true iff no tool_use block
104
+ // follows it in the SAME message. NOTE: `lastInMessage` is a PROJECTION
105
+ // ARTIFACT only — the current reducer-side narrative-dedup gate
106
+ // (narrative-dedup.ts) decides draft-then-send vs working-narration by
107
+ // LOOKAHEAD (the next tool_use / turn_end), NOT by reading this flag. It
108
+ // is retained as a stable projection output (pinned by the kernel test)
109
+ // and reserved for a future staging-skip optimization; do not assume the
110
+ // gate keys on it.
111
+ | { kind: 'text'; text: string; blockIndex: number; lastInMessage: boolean }
102
112
  | { kind: 'tool_result'; toolUseId: string; toolName: string | null; isError?: boolean; errorText?: string }
103
113
  | { kind: 'turn_end'; durationMs: number }
104
114
  // Multi-agent: sub-agent-scoped events. agentId is the sub-agent JSONL
@@ -106,8 +116,12 @@ export type SessionEvent =
106
116
  // as parent events; the reducer fans them out to per-sub-agent state.
107
117
  | { kind: 'sub_agent_started'; agentId: string; firstPromptText: string; subagentType?: string }
108
118
  | { kind: 'sub_agent_tool_use'; agentId: string; toolUseId: string | null; toolName: string; input?: Record<string, unknown>; precomputedLabel?: string }
109
- | { kind: 'sub_agent_text'; agentId: string; text: string }
110
- | { kind: 'sub_agent_narrative'; agentId: string; text: string }
119
+ // Same shared contract as the main-agent `text` kind — see its doc above
120
+ // (including the `lastInMessage` projection-artifact note). The wire-kind
121
+ // stays distinct (the gateway/watcher split is load-bearing) but the
122
+ // payload + `lastInMessage` derivation are identical so ONE shared dedup
123
+ // gate handles both tiers.
124
+ | { kind: 'sub_agent_text'; agentId: string; text: string; blockIndex: number; lastInMessage: boolean }
111
125
  | { kind: 'sub_agent_tool_result'; agentId: string; toolUseId: string; isError?: boolean; errorText?: string }
112
126
  | { kind: 'sub_agent_turn_end'; agentId: string }
113
127
  | { kind: 'sub_agent_nested_spawn'; agentId: string }
@@ -182,6 +196,49 @@ function extractToolResultErrorText(content: unknown): string {
182
196
  return ''
183
197
  }
184
198
 
199
+ /**
200
+ * THE single text→narrative projection primitive. Both projectTranscriptLine
201
+ * and projectSubagentLine derive their text events through this helper so
202
+ * main-agent, sub-agent, worker, and every other execution shape inherit
203
+ * identical text-block semantics from ONE place: empty/whitespace blocks are
204
+ * dropped, and each surviving block carries its `blockIndex` plus the
205
+ * `lastInMessage` signal (no tool_use follows it in this message). NOTE:
206
+ * `lastInMessage` is a projection artifact — the reducer-side dedup gate
207
+ * decides SHOW/SUPPRESS by lookahead, not by reading this flag (see the
208
+ * SessionEvent `text` doc); it is reserved for a future staging-skip
209
+ * optimization.
210
+ *
211
+ * `make` adapts the shared payload into the tier-specific wire kind
212
+ * (`text` vs `sub_agent_text`); the contract — what counts as a text block,
213
+ * how `lastInMessage` is computed — lives here, not in the callers.
214
+ *
215
+ * Returns a `Map<blockIndex, SessionEvent>` keyed by the text block's source
216
+ * index, NOT a flat list. This is the load-bearing design choice: the callers
217
+ * must emit thinking / tool_use / text events in SOURCE ORDER (the reducer
218
+ * pairs a preamble to the immediately-next tool_use), so they iterate
219
+ * `content` once and, at each text position, emit the precomputed event from
220
+ * this map. The kernel owns the contract; the caller owns only the ordering.
221
+ */
222
+ export function projectAssistantTextBlocks(
223
+ content: Array<Record<string, unknown>>,
224
+ make: (text: string, blockIndex: number, lastInMessage: boolean) => SessionEvent,
225
+ ): Map<number, SessionEvent> {
226
+ const out = new Map<number, SessionEvent>()
227
+ // Precompute the index of the last tool_use so each text block knows
228
+ // whether a tool_use follows it in THIS message (the draft-then-send signal).
229
+ let lastToolUseIdx = -1
230
+ content.forEach((c, i) => {
231
+ if (c.type === 'tool_use') lastToolUseIdx = i
232
+ })
233
+ content.forEach((c, i) => {
234
+ if (c.type !== 'text') return
235
+ const text = (c.text as string | undefined) ?? ''
236
+ if (text.trim().length === 0) return // drop empty/whitespace-only blocks
237
+ out.set(i, make(text, i, i > lastToolUseIdx))
238
+ })
239
+ return out
240
+ }
241
+
185
242
  /**
186
243
  * Project a single transcript line into a SessionEvent (or null if it's
187
244
  * uninteresting noise). Caller is responsible for the JSON parse — if a
@@ -218,7 +275,16 @@ export function projectTranscriptLine(line: string): SessionEvent[] {
218
275
  const content = message?.content as Array<Record<string, unknown>> | undefined
219
276
  if (!Array.isArray(content)) return []
220
277
  const events: SessionEvent[] = []
221
- for (const c of content) {
278
+ // Text→narrative projection comes from the ONE shared kernel
279
+ // (projectAssistantTextBlocks): it owns the empty-drop + blockIndex +
280
+ // lastInMessage contract. We emit its events at their source positions
281
+ // so thinking / tool_use / text stay in source order (the reducer pairs
282
+ // a preamble to the immediately-next tool_use).
283
+ const textEvents = projectAssistantTextBlocks(
284
+ content,
285
+ (text, blockIndex, lastInMessage): SessionEvent => ({ kind: 'text', text, blockIndex, lastInMessage }),
286
+ )
287
+ content.forEach((c, i) => {
222
288
  const ct = c.type as string | undefined
223
289
  if (ct === 'thinking') {
224
290
  events.push({ kind: 'thinking' })
@@ -237,10 +303,10 @@ export function projectTranscriptLine(line: string): SessionEvent[] {
237
303
  input: input && typeof input === 'object' ? input : undefined,
238
304
  })
239
305
  } else if (ct === 'text') {
240
- const text = (c.text as string | undefined) ?? ''
241
- events.push({ kind: 'text', text })
306
+ const ev = textEvents.get(i)
307
+ if (ev != null) events.push(ev)
242
308
  }
243
- }
309
+ })
244
310
  return events
245
311
  }
246
312
 
@@ -357,7 +423,25 @@ export function projectSubagentLine(
357
423
  const content = message?.content as Array<Record<string, unknown>> | undefined
358
424
  if (!Array.isArray(content)) return []
359
425
  const events: SessionEvent[] = []
360
- for (const c of content) {
426
+ // Text→narrative projection comes from the SAME shared kernel as the
427
+ // main agent (projectAssistantTextBlocks): one source for the empty-drop
428
+ // + blockIndex + lastInMessage contract. The `make` adapter only changes
429
+ // the wire kind to `sub_agent_text`. A nested Agent/Task tool_use still
430
+ // counts as a tool_use that follows a preceding text block — handled by
431
+ // the kernel — so a sub-agent preamble before a nested spawn is correctly
432
+ // NOT `lastInMessage`. We emit at source positions so text + tool_use
433
+ // stay in source order (the reducer pairs preamble → next tool_use).
434
+ const textEvents = projectAssistantTextBlocks(
435
+ content,
436
+ (text, blockIndex, lastInMessage): SessionEvent => ({
437
+ kind: 'sub_agent_text',
438
+ agentId,
439
+ text,
440
+ blockIndex,
441
+ lastInMessage,
442
+ }),
443
+ )
444
+ content.forEach((c, i) => {
361
445
  const ct = c.type as string | undefined
362
446
  if (ct === 'tool_use') {
363
447
  const name = (c.name as string | undefined) ?? ''
@@ -386,10 +470,11 @@ export function projectSubagentLine(
386
470
  // in the SAME assistant message must be emitted in source order
387
471
  // so the reducer consumes the preamble on the immediately-next
388
472
  // tool_use and sibling tool_uses fall back to filename/pattern.
389
- const text = (c.text as string | undefined) ?? ''
390
- events.push({ kind: 'sub_agent_text', agentId, text })
473
+ // The event itself comes from the shared kernel (textEvents above).
474
+ const ev = textEvents.get(i)
475
+ if (ev != null) events.push(ev)
391
476
  }
392
- }
477
+ })
393
478
  // Authoritative early terminal: a background `Agent` worker's JSONL on
394
479
  // claude ≥2.1.156 never writes the `system/turn_duration` line below, so
395
480
  // the watcher used to only learn the worker finished via the ~5-min