switchroom 0.14.2 → 0.14.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -59,6 +59,7 @@ import {
59
59
  registerAndRender,
60
60
  describeToolUse,
61
61
  appendActivityLine,
62
+ appendActivityLabel,
62
63
  type ActivityState,
63
64
  } from '../tool-activity-summary.js'
64
65
  import { toolLabel } from '../tool-labels.js'
@@ -286,7 +287,7 @@ import { chatKey, chatKeyWithSuffix, chatIdOfChatKey } from './chat-key.js'
286
287
  // should do. Behavior unchanged in this PR — the imperative code below
287
288
  // still runs everything. PR 3 will cut over to executing the machine's
288
289
  // effects.
289
- import { shadowEmit } from './inbound-delivery-machine-shadow.js'
290
+ import { shadowEmit, isMachineInTurn, isDeliveryCutoverEnabled } from './inbound-delivery-machine-shadow.js'
290
291
  import type { ChatKey as _ChatKey } from './inbound-delivery-machine.js'
291
292
  import { dispatchEffects, isDispatchEnabled } from './inbound-delivery-machine-dispatch.js'
292
293
  import { maybeFireWarmup } from './prefix-warmup.js'
@@ -367,7 +368,7 @@ import { createIssuesCardHandle, type IssuesCardHandle } from '../issues-card.js
367
368
  import { startIssuesWatcher, type IssuesWatcherHandle } from '../issues-watcher.js'
368
369
  import { list as listIssues, resolve as resolveIssue } from '../../src/issues/index.js'
369
370
  import { summarizeToolForTitle, formatPermissionCardBody } from '../permission-title.js'
370
- import { resolveAlwaysAllowRule } from '../permission-rule.js'
371
+ import { resolveAlwaysAllowRule, isRulePersisted } from '../permission-rule.js'
371
372
  import {
372
373
  readClaudeJsonOverage,
373
374
  evaluateCreditState,
@@ -1161,6 +1162,24 @@ function markClaudeBusyForInbound(m: {
1161
1162
  }
1162
1163
  claudeBusyKeys.add(chatKey(m.chatId, tid))
1163
1164
  }
1165
+
1166
+ /**
1167
+ * Authoritative "is a turn in flight?" for every gate that previously
1168
+ * read `claudeBusyKeys.size`. PR 3b cutover (extends PR 3a's bridgeUp
1169
+ * dispatch): when the delivery state machine is authoritative
1170
+ * (`SWITCHROOM_DELIVERY_MACHINE_CUTOVER` on + shadow on) the answer is
1171
+ * its single-`activeTurn` global state, which — unlike the
1172
+ * per-delivery `claudeBusyKeys` set — cannot accumulate orphan keys and
1173
+ * wedge the gate "in-flight forever" (the gymbro/clerk 5-min dangle,
1174
+ * 2026-05-28). Kill-switch off → exact legacy claudeBusyKeys behaviour.
1175
+ *
1176
+ * NOT for the inbound-receipt gate (line ~8551): that must snapshot the
1177
+ * machine state BEFORE the inbound event advances it, or a fresh-turn
1178
+ * message self-blocks. See the snapshot at the inbound handler.
1179
+ */
1180
+ function turnInFlightForGate(): boolean {
1181
+ return isDeliveryCutoverEnabled() ? isMachineInTurn() : claudeBusyKeys.size > 0
1182
+ }
1164
1183
  const pendingRestarts = new Map<string, number>() // agentName -> timestamp when restart was requested
1165
1184
 
1166
1185
  // ─── Proactive context compaction (session.max_context_tokens) ──────────
@@ -1490,7 +1509,11 @@ function purgeReactionTracking(key: string, endingTurn?: CurrentTurn): void {
1490
1509
  // activeTurnStartedAt entry in the fresh-turn branch) doesn't pin this
1491
1510
  // gate forever while claude is genuinely idle. See the claudeBusyKeys
1492
1511
  // declaration for the supergroup deadlock this fixes.
1493
- if (claudeBusyKeys.size === 0) {
1512
+ // PR3b-cutover: `turnInFlightForGate()` reads the delivery machine
1513
+ // when the cutover kill-switch is on; the turnEnd event was emitted
1514
+ // just above (purgeReactionTracking head), so the machine is already
1515
+ // idle here.
1516
+ if (!turnInFlightForGate()) {
1494
1517
  // #1556: the deterministic delivery point. claude has just gone
1495
1518
  // idle — flush any inbound held mid-turn so the channel
1496
1519
  // notification lands at the idle prompt and submits as a fresh
@@ -1590,7 +1613,9 @@ function releaseTurnBufferGate(key: string): void {
1590
1613
  // test-harness's 13:02 UAT now opens after the reply.
1591
1614
  //
1592
1615
  // PR3b: gated on claudeBusyKeys (see purgeReactionTracking comment).
1593
- if (claudeBusyKeys.size === 0) {
1616
+ // PR3b-cutover: turnEnd was emitted just above (releaseTurnBufferGate
1617
+ // head), so the machine is already idle when the cutover gate reads.
1618
+ if (!turnInFlightForGate()) {
1594
1619
  const selfAgentForFlush = process.env.SWITCHROOM_AGENT_NAME ?? ''
1595
1620
  if (pendingInboundBuffer.depth(selfAgentForFlush) > 0) {
1596
1621
  const fr = redeliverBufferedInbound(
@@ -3656,6 +3681,23 @@ silencePoke.startTimer({
3656
3681
  },
3657
3682
  })
3658
3683
 
3684
+ // PR3b-cutover: drive the delivery machine's TTL `tick`. The machine
3685
+ // expires any turn whose `turnStartedAt` is older than TURN_TTL_MS
3686
+ // (5 min) and drops global state back to idle — its structural
3687
+ // equivalent of the imperative silence-poke framework-fallback. This
3688
+ // is the load-bearing safety net for the cutover gate: even if a
3689
+ // `turnEnd` event is somehow missed (the dangle class), the machine
3690
+ // self-heals at TTL instead of pinning the gate "in-flight forever".
3691
+ // shadowEmit only advances state + logs the predicted effects; we
3692
+ // deliberately do NOT execute the machine's firePoke here (the
3693
+ // imperative silence-poke still owns the user-facing ping), so there
3694
+ // is no double-poke. unref so the interval never holds the process.
3695
+ const DELIVERY_MACHINE_TICK_MS = 30_000
3696
+ const _deliveryMachineTick = setInterval(() => {
3697
+ shadowEmit({ kind: 'tick', now: Date.now() })
3698
+ }, DELIVERY_MACHINE_TICK_MS)
3699
+ _deliveryMachineTick.unref?.()
3700
+
3659
3701
  // #1445 cross-turn pending-async ambient. When a turn ends after the
3660
3702
  // model dispatched background async work (Agent / Task / Bash run-in-
3661
3703
  // background) and the model has stopped speaking, keep editing the
@@ -4195,7 +4237,8 @@ const ipcServer: IpcServer = createIpcServer({
4195
4237
  // PR3b: gated on claudeBusyKeys (actually-handed-to-claude turns)
4196
4238
  // not activeTurnStartedAt (receipt-eager), so a buffered topic-B
4197
4239
  // inbound doesn't pin this as turnInFlight=true forever.
4198
- const turnInFlight = claudeBusyKeys.size > 0;
4240
+ // PR3b-cutover: reads the delivery machine when the kill-switch is on.
4241
+ const turnInFlight = turnInFlightForGate();
4199
4242
 
4200
4243
  if (!turnInFlight) {
4201
4244
  // No active turn, restart immediately. Cycle both the agent and
@@ -4615,7 +4658,8 @@ if (!STATIC) {
4615
4658
  // #1556: never drain mid-turn — that re-creates the composer
4616
4659
  // wedge this buffer exists to prevent.
4617
4660
  // PR3b: gated on claudeBusyKeys (see purgeReactionTracking).
4618
- if (claudeBusyKeys.size > 0) return false
4661
+ // PR3b-cutover: reads the delivery machine when the kill-switch is on.
4662
+ if (turnInFlightForGate()) return false
4619
4663
  const c = ipcServer.getClient(selfAgent)
4620
4664
  return c != null && c.isAlive()
4621
4665
  },
@@ -5020,6 +5064,11 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
5020
5064
  // silence-poke clock so the next poke is measured from this send.
5021
5065
  signalTracker.noteOutbound(statusKey(chat_id, threadId), Date.now())
5022
5066
  silencePoke.noteOutbound(statusKey(chat_id, threadId), Date.now())
5067
+ // PR3b-cutover: feed lastOutboundAt to the delivery machine so its
5068
+ // TTL `tick` suppresses the fallback for a long-but-active turn
5069
+ // (model streaming past 5 min) — parity with silencePoke's own
5070
+ // suppression, so the cutover gate doesn't clear a live turn.
5071
+ shadowEmit({ kind: 'modelOutbound', key: statusKey(chat_id, threadId) as _ChatKey, at: Date.now() })
5023
5072
  // #1741 — only clear silent-end state on a plausibly-final reply.
5024
5073
  // An interim ack (disable_notification:true, short text, no done)
5025
5074
  // must NOT clear the state file; otherwise a turn that ends with
@@ -5615,6 +5664,9 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
5615
5664
  const sKey = statusKey(streamChatId, streamThreadId)
5616
5665
  signalTracker.noteOutbound(sKey, Date.now())
5617
5666
  silencePoke.noteOutbound(sKey, Date.now())
5667
+ // PR3b-cutover: feed lastOutboundAt to the delivery machine (see
5668
+ // executeReply) so its TTL tick suppresses an active-turn fallback.
5669
+ shadowEmit({ kind: 'modelOutbound', key: sKey as _ChatKey, at: Date.now() })
5618
5670
  // #1741 — see executeReply for the rationale: only a plausibly-
5619
5671
  // final stream_reply clears the silent-end state. An interim
5620
5672
  // ack via stream_reply must NOT clear; the Stop hook needs
@@ -7012,6 +7064,20 @@ function handleSessionEvent(ev: SessionEvent): void {
7012
7064
  isDm: isDmChatId(ev.chatId),
7013
7065
  }
7014
7066
  currentTurn = next
7067
+ // PR3b-cutover: feed the authoritative turn-start to the delivery
7068
+ // machine. `enqueue` fires for EVERY turn atom regardless of
7069
+ // source — inbound, cron, subagent-handback, vault-resume,
7070
+ // restart-marker — so it is the single chokepoint that captures
7071
+ // the non-inbound turns the machine's own `inbound` event never
7072
+ // sees (those bypass handleInbound). Without it the machine reads
7073
+ // idle during a cron/handback turn and the gate would mis-deliver
7074
+ // a concurrent inbound mid-turn (the #1556 composer wedge).
7075
+ // Idempotent when already in_turn (turnStart only sets perKey).
7076
+ shadowEmit({
7077
+ kind: 'turnStart',
7078
+ key: statusKey(ev.chatId, ev.threadId != null ? Number(ev.threadId) : undefined) as _ChatKey,
7079
+ at: startedAt,
7080
+ })
7015
7081
  // #549 fix — fresh turn, reset preamble-suppression state.
7016
7082
  preambleSuppressor.reset()
7017
7083
  // Reset the silent-end retry budget for this chat. The stored
@@ -7130,7 +7196,12 @@ function handleSessionEvent(ev: SessionEvent): void {
7130
7196
  // empty draft to wipe the compose-area preview; for persisted
7131
7197
  // messages, delete. The user sees the real reply land in the
7132
7198
  // same beat the summary disappears.
7133
- if (wasFirstReply) {
7199
+ // Legacy (flag-off): the activity summary clears on the first
7200
+ // reply — it was a one-shot "what I did" line. DRAFT_MIRROR keeps
7201
+ // the live feed running through mid-turn replies and clears it at
7202
+ // turn_end instead, so an early reply doesn't wipe the stream
7203
+ // (the fast-turn determinism fix).
7204
+ if (wasFirstReply && !DRAFT_MIRROR_ENABLED) {
7134
7205
  clearActivitySummary(turn)
7135
7206
  }
7136
7207
  }
@@ -7153,22 +7224,19 @@ function handleSessionEvent(ev: SessionEvent): void {
7153
7224
  // exactly once at a time and re-running until pending matches
7154
7225
  // the last-sent. Captures `turn` so a late drain after turn-swap
7155
7226
  // can't corrupt the next turn's atom.
7156
- // DRAFT_MIRROR (RFC draft-mirror-preview): accumulate each tool_use
7157
- // into a human-friendly running feed in the live preview, using the
7158
- // model-authored descriptive field (Bash.description, Read/Edit file
7159
- // basename, hindsight→"Searching memory", etc. — see describeToolUse
7160
- // / appendActivityLine). The draft shows the turn's actions as a
7161
- // capped chronological list (Claude Code-style), clears on reply.
7162
- // Never surfaces raw shell/query syntax — option A, uniform across
7163
- // code + non-code agents.
7164
- //
7165
7227
  // Flag OFF (default): the legacy generic verb-count summary
7166
7228
  // ("Ran 5 commands") via registerAndRender — byte-identical to
7167
- // pre-draft-mirror behavior.
7168
- if (!turn.replyCalled && !isTelegramSurfaceTool(name)) {
7169
- const rendered = DRAFT_MIRROR_ENABLED
7170
- ? appendActivityLine(turn.mirrorLines, name, ev.input)
7171
- : registerAndRender(turn.toolActivity, name)
7229
+ // pre-draft-mirror behavior, cleared on first reply.
7230
+ //
7231
+ // DRAFT_MIRROR: the draft is NOT driven from this (flush-gated)
7232
+ // tool_use event — it's driven by the real-time `tool_label` event
7233
+ // (PreToolUse sidecar, fires at tool-call time regardless of when
7234
+ // claude flushes the transcript). See `case 'tool_label'`. That's
7235
+ // the determinism fix: on a fast/clustered-tool turn the JSONL
7236
+ // tool_use rows aren't on disk until ~turn-end, so sourcing the
7237
+ // draft here lost the feed; the sidecar is flush-independent.
7238
+ if (!DRAFT_MIRROR_ENABLED && !turn.replyCalled && !isTelegramSurfaceTool(name)) {
7239
+ const rendered = registerAndRender(turn.toolActivity, name)
7172
7240
  if (rendered != null) {
7173
7241
  turn.activityPendingRender = rendered
7174
7242
  if (turn.activityInFlight == null) {
@@ -7184,6 +7252,31 @@ function handleSessionEvent(ev: SessionEvent): void {
7184
7252
  }
7185
7253
  return
7186
7254
  }
7255
+ case 'tool_label': {
7256
+ // DRAFT_MIRROR real-time driver. The PreToolUse hook wrote this
7257
+ // label synchronously at tool-call time; the sidecar surfaced it
7258
+ // here (~250ms) independent of the transcript flush. Accumulate it
7259
+ // into the live feed and update the ephemeral draft — this is what
7260
+ // makes the draft deterministic on fast/clustered-tool turns where
7261
+ // the JSONL tool_use rows arrive too late.
7262
+ if (!DRAFT_MIRROR_ENABLED) return
7263
+ const turn = currentTurn
7264
+ if (turn == null) return
7265
+ // Surface tools (reply/stream_reply/react) are the conversation, not
7266
+ // activity — the hook labels them ("Replying"), so filter by name.
7267
+ if (isTelegramSurfaceTool(ev.toolName)) return
7268
+ // Unlike the legacy tool_use path, do NOT gate on replyCalled — the
7269
+ // whole point is to show activity even when a reply raced ahead of
7270
+ // the (lagged) transcript. The feed clears at turn_end.
7271
+ const rendered = appendActivityLabel(turn.mirrorLines, ev.label)
7272
+ if (rendered != null) {
7273
+ turn.activityPendingRender = rendered
7274
+ if (turn.activityInFlight == null) {
7275
+ turn.activityInFlight = drainActivitySummary(turn)
7276
+ }
7277
+ }
7278
+ return
7279
+ }
7187
7280
  case 'text': {
7188
7281
  // #1067: snapshot at entry. The answer-stream creation closures
7189
7282
  // below also read `turn` instead of currentTurn so they pin to
@@ -7454,6 +7547,14 @@ function handleSessionEvent(ev: SessionEvent): void {
7454
7547
  clearTimeout(turn.orphanedReplyTimeoutId)
7455
7548
  turn.orphanedReplyTimeoutId = null
7456
7549
  }
7550
+ // DRAFT_MIRROR: the live activity feed runs through the whole turn
7551
+ // (it is NOT cleared on the first reply, unlike the legacy summary)
7552
+ // so an early/mid-turn reply can't wipe it. Clear it here, at the
7553
+ // real end of the turn — the ephemeral compose-area draft goes away
7554
+ // once the work is actually done.
7555
+ if (DRAFT_MIRROR_ENABLED && turn != null) {
7556
+ clearActivitySummary(turn)
7557
+ }
7457
7558
  // #549 fix — flush any pending preamble BEFORE the answer stream is
7458
7559
  // nulled below. Text emitted immediately before turn_end (no tool
7459
7560
  // followed) is the answer; the suppressor's emitAnswer callback
@@ -8505,6 +8606,14 @@ async function handleInbound(
8505
8606
  // vs mid-turn — its decision will be visible in the gw-trace shadow
8506
8607
  // line emitted to stderr.
8507
8608
  const _shadowKey = statusKey(ctx.chat?.id != null ? String(ctx.chat.id) : '0', ctx.message?.message_thread_id) as _ChatKey
8609
+ // PR3b-cutover: snapshot the machine's in-turn state BEFORE the
8610
+ // inbound event advances it. A fresh-turn inbound transitions the
8611
+ // machine idle→in_turn; reading after the emit would see THIS
8612
+ // message's own just-started turn and self-block it (the same
8613
+ // self-block hazard the claudeBusyKeys snapshot below guards). When
8614
+ // the kill-switch is off this is null and the gate uses the legacy
8615
+ // claudeBusyKeys read.
8616
+ const machineInTurnAtReceipt = isDeliveryCutoverEnabled() ? isMachineInTurn() : null
8508
8617
  shadowEmit({
8509
8618
  kind: 'inbound',
8510
8619
  key: _shadowKey,
@@ -8556,7 +8665,12 @@ async function handleInbound(
8556
8665
  // no turn_end ever fires). With claudeBusyKeys, B sees true (A is
8557
8666
  // busy) → B is buffered correctly, AND the gate cleanly reopens
8558
8667
  // when A's turn_end deletes keyA → flush triggers → B delivered.
8559
- const turnInFlightAtReceipt = claudeBusyKeys.size > 0
8668
+ // PR3b-cutover: prefer the machine snapshot taken before the inbound
8669
+ // event advanced it (machineInTurnAtReceipt); null when the
8670
+ // kill-switch is off, in which case the legacy claudeBusyKeys read
8671
+ // stands. Both are "was a turn in flight at receipt", not a live
8672
+ // post-this-inbound read — see machineInTurnAtReceipt's comment.
8673
+ const turnInFlightAtReceipt = machineInTurnAtReceipt ?? (claudeBusyKeys.size > 0)
8560
8674
 
8561
8675
  const access = result.access
8562
8676
  const from = ctx.from!
@@ -15172,25 +15286,56 @@ bot.on('callback_query:data', async ctx => {
15172
15286
  return
15173
15287
  }
15174
15288
  let grantOk = false
15289
+ let grantFailReason = ''
15175
15290
  try {
15176
15291
  // --no-restart: settings.json gets the new entry on the next
15177
15292
  // reconcile but we don't bounce the agent mid-turn. Operator
15178
15293
  // can restart manually if they want this rule live in this
15179
15294
  // session; otherwise it kicks in next session.
15180
15295
  switchroomExec(['agent', 'grant', agentName, rule.rule, '--no-restart'])
15181
- grantOk = true
15182
- process.stderr.write(
15183
- `telegram gateway: always-allow added rule="${rule.rule}" agent=${agentName} (request_id=${request_id})\n`,
15184
- )
15296
+ // Verify the rule actually landed in the resolved config — guards
15297
+ // against config-location-drift (gateway edited a yaml that isn't
15298
+ // the durable source-of-truth, or the grant was a no-op). One
15299
+ // fresh config read; cheap since this is a rare operator tap.
15300
+ try {
15301
+ const cfg = loadSwitchroomConfig()
15302
+ const rawAgent = cfg.agents?.[agentName]
15303
+ if (rawAgent) {
15304
+ const resolved = resolveAgentConfig(cfg.defaults, cfg.profiles, rawAgent)
15305
+ const allowList: string[] = (resolved as { tools?: { allow?: string[] } }).tools?.allow ?? []
15306
+ if (isRulePersisted(allowList, rule.rule)) {
15307
+ grantOk = true
15308
+ process.stderr.write(
15309
+ `telegram gateway: always-allow added rule="${rule.rule}" agent=${agentName} (request_id=${request_id})\n`,
15310
+ )
15311
+ } else {
15312
+ grantFailReason = `rule "${rule.rule}" not found in resolved tools.allow after write — config location may have drifted`
15313
+ process.stderr.write(
15314
+ `telegram gateway: always-allow VERIFY FAILED: ${grantFailReason} (request_id=${request_id})\n`,
15315
+ )
15316
+ }
15317
+ } else {
15318
+ grantFailReason = `agent "${agentName}" not found in config after write`
15319
+ process.stderr.write(
15320
+ `telegram gateway: always-allow VERIFY FAILED: ${grantFailReason} (request_id=${request_id})\n`,
15321
+ )
15322
+ }
15323
+ } catch (verifyErr) {
15324
+ grantFailReason = `config re-read failed: ${(verifyErr as Error).message}`
15325
+ process.stderr.write(
15326
+ `telegram gateway: always-allow VERIFY FAILED: ${grantFailReason} (request_id=${request_id})\n`,
15327
+ )
15328
+ }
15185
15329
  } catch (err) {
15186
- process.stderr.write(`telegram gateway: always-allow grant failed: ${(err as Error).message}\n`)
15330
+ grantFailReason = (err as Error).message
15331
+ process.stderr.write(`telegram gateway: always-allow grant failed: ${grantFailReason}\n`)
15187
15332
  }
15188
15333
 
15189
15334
  pendingPermissions.delete(request_id)
15190
15335
 
15191
15336
  const ackText = grantOk
15192
15337
  ? `🔁 Always allow ${rule.label} for ${agentName}`
15193
- : `✅ Allowed (always-allow yaml edit failed; check gateway log)`
15338
+ : `⚠️ Allowed for now, but "always" did NOT save it will ask again after restart. Check gateway log.`
15194
15339
  // HTML-escape baseText — `ctx.callbackQuery.message.text` returns
15195
15340
  // entities-stripped plain UTF-8, so raw `<`/`>`/`&` in the
15196
15341
  // expanded permission card's `description` or `input_preview`
@@ -15203,7 +15348,7 @@ bot.on('callback_query:data', async ctx => {
15203
15348
  : ''
15204
15349
  const editLabel = grantOk
15205
15350
  ? `🔁 <b>Always allow ${escapeHtmlForTg(rule.label)}</b> for ${escapeHtmlForTg(agentName)} — restart agent for full effect`
15206
- : `✅ <b>Allowed</b> (always-allow rule edit failed; see logs)`
15351
+ : `⚠️ <b>Allowed for now — "always" did NOT save.</b> It will ask again after restart. Check gateway log.`
15207
15352
  // #1150 audit: route through finalizeCallback so the keyboard
15208
15353
  // strips alongside the status-line edit. Pre-fix this called
15209
15354
  // editMessageText without `reply_markup` so the Allow/Deny/Always
@@ -43,6 +43,39 @@ import {
43
43
  let state: State = initialState()
44
44
  const enabled = process.env.SWITCHROOM_DELIVERY_MACHINE_SHADOW !== '0'
45
45
 
46
+ // Phase 2b PR 3 — STAGED CUTOVER. When enabled, the gateway's
47
+ // "is a turn in flight?" gate reads this machine's global state
48
+ // instead of the PR3b `claudeBusyKeys` set. The machine tracks ONE
49
+ // `activeTurn` (single bridge) plus TTL `tick` expiry, so — unlike a
50
+ // per-delivery key set — it cannot accumulate orphan keys and wedge
51
+ // the gate "in-flight forever" (the gymbro/clerk 5-min dangle of
52
+ // 2026-05-28). Scope is the turn-in-flight GATE only; the poke ladder
53
+ // and perm-verdict effects stay imperative for a follow-up PR.
54
+ //
55
+ // Kill switch: `SWITCHROOM_DELIVERY_MACHINE_CUTOVER=0` reverts every
56
+ // gate to the legacy claudeBusyKeys read (zero behaviour change).
57
+ // Requires shadow mode ON — with shadow off the machine state is
58
+ // frozen and must NOT be read as authoritative.
59
+ const cutoverEnabled = enabled && process.env.SWITCHROOM_DELIVERY_MACHINE_CUTOVER !== '0'
60
+
61
+ /**
62
+ * True when the kill-switch leaves the delivery machine authoritative
63
+ * for the turn-in-flight gate. Gateway gate sites branch on this.
64
+ */
65
+ export function isDeliveryCutoverEnabled(): boolean {
66
+ return cutoverEnabled
67
+ }
68
+
69
+ /**
70
+ * Authoritative "is a turn currently in flight?" read for the gate.
71
+ * Maps the machine's global state to the boolean the legacy
72
+ * `claudeBusyKeys.size > 0` gate produced. `bridge_dead` and
73
+ * `bridge_alive_idle` are both "not in flight".
74
+ */
75
+ export function isMachineInTurn(): boolean {
76
+ return state.global.kind === 'bridge_alive_in_turn'
77
+ }
78
+
46
79
  /**
47
80
  * Run an event through the state machine in shadow mode. The machine
48
81
  * state advances, the predicted effects are LOGGED, but no I/O fires.
@@ -74,15 +74,24 @@ function urlHostPath(u) {
74
74
  export function computeLabel(toolName, input) {
75
75
  const i = input ?? {}
76
76
 
77
- // Tools whose labels are already handled elsewhere emit nothing so
78
- // the existing description / TodoWrite / sub-agent paths win.
77
+ // Bash / Task / ToolSearch / TodoWrite: previously emitted nothing
78
+ // (deferred to the session-JSONL description path). The draft-mirror
79
+ // now drives off THIS sidecar in real time (flush-independent), so we
80
+ // must label them here too — otherwise the most common tool (Bash)
81
+ // never reaches the live draft. Uses the model-authored `description`
82
+ // for Bash/Task, matching the gateway's describeToolUse rendering.
79
83
  switch (toolName) {
80
84
  case 'Bash':
85
+ return clip(String(i.description ?? ''), 70).trim() || 'Running a command'
81
86
  case 'Task':
82
- case 'Agent':
87
+ case 'Agent': {
88
+ const d = clip(String(i.description ?? ''), 60).trim()
89
+ return d ? `Delegating: ${d}` : 'Delegating to a sub-agent'
90
+ }
83
91
  case 'TodoWrite':
92
+ return 'Updating the plan'
84
93
  case 'ToolSearch':
85
- return null
94
+ return 'Finding the right tool'
86
95
  }
87
96
 
88
97
  // Built-in rule table.
@@ -132,6 +132,28 @@ function skillBasenameFromPath(input: Record<string, unknown>): string | null {
132
132
  return basename(trimmed) || null;
133
133
  }
134
134
 
135
+ /**
136
+ * Verify that a grant actually landed in the resolved `tools.allow` list.
137
+ *
138
+ * Called by the `perm:always:*` handler after `switchroom agent grant`
139
+ * returns to guard against silently-failed or misdirected yaml writes.
140
+ * Extracted as a pure helper so it can be unit-tested without a full
141
+ * Grammy + switchroomExec harness.
142
+ *
143
+ * @param resolvedAllow The `tools.allow` array from `resolveAgentConfig`
144
+ * for the target agent (pass `[]` when absent/undefined).
145
+ * @param ruleRule The rule string produced by `resolveAlwaysAllowRule`
146
+ * (e.g. `"Skill(garmin)"`, `"Bash"`, `"mcp__x__y"`).
147
+ * @returns `true` when the rule is present (grant confirmed), `false` when
148
+ * absent (grant failed / config location drifted).
149
+ */
150
+ export function isRulePersisted(
151
+ resolvedAllow: readonly string[],
152
+ ruleRule: string,
153
+ ): boolean {
154
+ return resolvedAllow.includes(ruleRule);
155
+ }
156
+
135
157
  /**
136
158
  * Inverse of `resolveAlwaysAllowRule` — does a stored allow-rule cover a
137
159
  * fresh `permission_request`? Used by the bridge's session-scoped
@@ -93,6 +93,11 @@ export type SessionEvent =
93
93
  | { kind: 'dequeue' }
94
94
  | { kind: 'thinking' }
95
95
  | { kind: 'tool_use'; toolName: string; toolUseId?: string | null; input?: Record<string, unknown>; precomputedLabel?: string }
96
+ // Real-time tool label from the PreToolUse-hook sidecar — fires when the
97
+ // hook writes the label (synchronous at tool-call time), independent of
98
+ // the lazily-flushed transcript. The draft-mirror drives off THIS, not
99
+ // the flush-gated `tool_use`, so activity streams deterministically.
100
+ | { kind: 'tool_label'; toolUseId: string; label: string; toolName: string }
96
101
  | { kind: 'text'; text: string }
97
102
  | { kind: 'tool_result'; toolUseId: string; toolName: string | null; isError?: boolean; errorText?: string }
98
103
  | { kind: 'turn_end'; durationMs: number }
@@ -639,6 +644,13 @@ export function startSessionTail(config: SessionTailConfig): SessionTailHandle {
639
644
  try {
640
645
  const s = createToolLabelSidecar({ stateDir: stateDirForSidecar, sessionId })
641
646
  sidecars.set(sessionId, s)
647
+ // Real-time draft-mirror source: emit a `tool_label` event the moment
648
+ // the hook writes a label (flush-independent), so the gateway can
649
+ // stream the activity feed without waiting on the transcript flush.
650
+ // Subscribed once per sidecar (this is the only creation site).
651
+ s.onLabel((toolUseId, label, toolName) => {
652
+ rawOnEvent({ kind: 'tool_label', toolUseId, label, toolName })
653
+ })
642
654
  return s
643
655
  } catch (err) {
644
656
  log?.(`session-tail: sidecar create failed: ${(err as Error).message}`)
@@ -775,6 +787,12 @@ export function startSessionTail(config: SessionTailConfig): SessionTailHandle {
775
787
  }
776
788
  log?.(`session-tail: attached to ${file} (cursor=${cursor})`)
777
789
  }
790
+ // Eagerly create + subscribe the PreToolUse sidecar for this session
791
+ // NOW (on attach), not lazily on the first JSONL tool_use — otherwise
792
+ // the real-time `tool_label` source wouldn't exist until a flush-gated
793
+ // tool_use arrived, re-introducing the very lag the sidecar avoids.
794
+ const attachSid = sessionIdForFile(file)
795
+ if (attachSid) ensureSidecar(attachSid)
778
796
  try {
779
797
  watcher = watch(file, () => readNew())
780
798
  } catch (err) {
@@ -0,0 +1,147 @@
1
+ /**
2
+ * Structural contract tests for the "🔁 Always allow" handler in
3
+ * gateway.ts (the `behavior === 'always'` branch of the perm: callback
4
+ * dispatcher).
5
+ *
6
+ * Why structural: the handler lives inside a Grammy callback closure
7
+ * that's not exported. Full-function invocation would require a complete
8
+ * Grammy + switchroomExec harness. Instead, we pin the source-level
9
+ * invariants that were introduced to fix the silent-failure bug:
10
+ *
11
+ * 1. Loud failure text — the failure path must NOT read like success
12
+ * (`✅ Allowed …`). After the fix, both the toast (ackText) and the
13
+ * chat edit (editLabel) use the `⚠️` marker.
14
+ * 2. Post-write verification — after `switchroomExec` returns success
15
+ * the handler MUST re-read the config and check that the rule is
16
+ * actually present in `tools.allow`. If the check fails it sets
17
+ * grantOk=false and surfaces the loud message.
18
+ * 3. Success path unchanged — when `grantOk` is true the success
19
+ * strings (`🔁 Always allow …`, `restart agent for full effect`)
20
+ * are still present.
21
+ * 4. Error reason capture — `grantFailReason` is declared and
22
+ * populated from `(err as Error).message` so the root cause can
23
+ * appear in logs; it is NOT silently swallowed into `message`-less
24
+ * stderr output.
25
+ *
26
+ * Slicing strategy: we extract the `if (behavior === 'always') {` block
27
+ * from gateway.ts and run string assertions against that slice only —
28
+ * so additions elsewhere in the 17k-line file don't produce false
29
+ * positives or negatives.
30
+ */
31
+
32
+ import { describe, it, expect } from 'vitest'
33
+ import { readFileSync } from 'node:fs'
34
+ import { resolve } from 'node:path'
35
+
36
+ const gatewaySrc = readFileSync(
37
+ resolve(__dirname, '..', 'gateway', 'gateway.ts'),
38
+ 'utf-8',
39
+ )
40
+
41
+ /**
42
+ * Extract the `behavior === 'always'` block from the perm: callback
43
+ * dispatcher. The slice runs from the `if (behavior === 'always')` guard
44
+ * up to (but not including) the next top-level `// Forward permission`
45
+ * comment which opens the allow/deny branch.
46
+ */
47
+ function sliceAlwaysBlock(): string {
48
+ const start = gatewaySrc.indexOf("if (behavior === 'always')")
49
+ const end = gatewaySrc.indexOf('// Forward permission decision to connected bridges', start)
50
+ if (start === -1 || end === -1) return ''
51
+ return gatewaySrc.slice(start, end)
52
+ }
53
+
54
+ const alwaysBlock = sliceAlwaysBlock()
55
+
56
+ describe('always-allow handler — loud failure invariants', () => {
57
+ it('failure ackText uses the ⚠️ warning marker, not ✅', () => {
58
+ // The failure path must be unambiguous. Before the fix, the failure
59
+ // ackText started with "✅ Allowed …" which reads like success.
60
+ expect(alwaysBlock).toContain(
61
+ `⚠️ Allowed for now, but "always" did NOT save — it will ask again after restart. Check gateway log.`,
62
+ )
63
+ // Confirm the old misleading text is gone.
64
+ expect(alwaysBlock).not.toContain('✅ Allowed (always-allow yaml edit failed')
65
+ })
66
+
67
+ it('failure editLabel uses the ⚠️ warning marker, not ✅', () => {
68
+ // The inline-keyboard collapse edit also must NOT look like success.
69
+ expect(alwaysBlock).toContain(
70
+ `⚠️ <b>Allowed for now — "always" did NOT save.</b> It will ask again after restart. Check gateway log.`,
71
+ )
72
+ // Confirm the old misleading text is gone.
73
+ expect(alwaysBlock).not.toContain('✅ <b>Allowed</b> (always-allow rule edit failed')
74
+ })
75
+ })
76
+
77
+ describe('always-allow handler — success path unchanged', () => {
78
+ it('success ackText still uses 🔁 and names the rule', () => {
79
+ expect(alwaysBlock).toContain('`🔁 Always allow ${rule.label} for ${agentName}`')
80
+ })
81
+
82
+ it('success editLabel still uses 🔁 bold + restart hint', () => {
83
+ expect(alwaysBlock).toContain('restart agent for full effect')
84
+ expect(alwaysBlock).toContain('🔁 <b>Always allow')
85
+ })
86
+ })
87
+
88
+ describe('always-allow handler — post-write verification', () => {
89
+ it('reloads config after switchroomExec returns', () => {
90
+ // The verification block must call loadSwitchroomConfig() AFTER
91
+ // the switchroomExec call to confirm the rule landed in the
92
+ // resolved tools.allow.
93
+ const execIdx = alwaysBlock.indexOf("switchroomExec(['agent', 'grant'")
94
+ const loadIdx = alwaysBlock.indexOf('loadSwitchroomConfig()', execIdx)
95
+ expect(execIdx).toBeGreaterThan(-1)
96
+ expect(loadIdx).toBeGreaterThan(execIdx)
97
+ })
98
+
99
+ it('calls resolveAgentConfig to obtain the merged tools.allow list', () => {
100
+ const execIdx = alwaysBlock.indexOf("switchroomExec(['agent', 'grant'")
101
+ const resolveIdx = alwaysBlock.indexOf('resolveAgentConfig(', execIdx)
102
+ expect(resolveIdx).toBeGreaterThan(execIdx)
103
+ })
104
+
105
+ it('calls isRulePersisted(allowList, rule.rule) after the reload', () => {
106
+ // The handler delegates the membership check to the extracted pure
107
+ // helper so the behavioral test in always-allow-persist.test.ts can
108
+ // cover the same code path.
109
+ expect(alwaysBlock).toContain('isRulePersisted(allowList, rule.rule)')
110
+ })
111
+
112
+ it('sets grantOk=true only when isRulePersisted returns true', () => {
113
+ // grantOk=true must be inside the `if (isRulePersisted(...))` branch,
114
+ // not unconditionally after switchroomExec.
115
+ const persistIdx = alwaysBlock.indexOf('isRulePersisted(allowList, rule.rule)')
116
+ const grantOkIdx = alwaysBlock.indexOf('grantOk = true', persistIdx)
117
+ expect(persistIdx).toBeGreaterThan(-1)
118
+ expect(grantOkIdx).toBeGreaterThan(persistIdx)
119
+ // Confirm grantOk=true does NOT appear before the persistence check
120
+ // (i.e., not unconditionally on switchroomExec success as in the old code).
121
+ const grantOkFirst = alwaysBlock.indexOf('grantOk = true')
122
+ expect(grantOkFirst).toBeGreaterThanOrEqual(persistIdx)
123
+ })
124
+
125
+ it('logs a VERIFY FAILED message when the rule is absent after the write', () => {
126
+ expect(alwaysBlock).toContain('always-allow VERIFY FAILED')
127
+ })
128
+
129
+ it('surfaces config-location drift as a failure reason', () => {
130
+ expect(alwaysBlock).toContain('config location may have drifted')
131
+ })
132
+ })
133
+
134
+ describe('always-allow handler — error reason capture', () => {
135
+ it('declares grantFailReason to capture the root cause', () => {
136
+ expect(alwaysBlock).toContain('let grantFailReason')
137
+ })
138
+
139
+ it('populates grantFailReason from the thrown error on switchroomExec failure', () => {
140
+ // After the catch for switchroomExec, grantFailReason must be set
141
+ // from the error object so log messages can show the actual cause.
142
+ const catchIdx = alwaysBlock.lastIndexOf('} catch (err) {')
143
+ const reasonIdx = alwaysBlock.indexOf('grantFailReason = (err as Error).message', catchIdx)
144
+ expect(catchIdx).toBeGreaterThan(-1)
145
+ expect(reasonIdx).toBeGreaterThan(catchIdx)
146
+ })
147
+ })