switchroom 0.15.45 → 0.16.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. package/dist/agent-scheduler/index.js +122 -88
  2. package/dist/auth-broker/index.js +463 -177
  3. package/dist/cli/autoaccept-poll.js +4842 -35
  4. package/dist/cli/drive-write-pretool.mjs +17 -14
  5. package/dist/cli/notion-write-pretool.mjs +117 -86
  6. package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
  7. package/dist/cli/self-improve-stop.mjs +428 -0
  8. package/dist/cli/skill-validate-pretool.mjs +72 -72
  9. package/dist/cli/switchroom.js +3158 -1178
  10. package/dist/host-control/main.js +2833 -355
  11. package/dist/vault/approvals/kernel-server.js +7479 -7439
  12. package/dist/vault/broker/server.js +11312 -11272
  13. package/examples/minimal.yaml +1 -0
  14. package/examples/switchroom.yaml +1 -0
  15. package/package.json +3 -3
  16. package/profiles/_base/start.sh.hbs +88 -1
  17. package/profiles/_shared/execution-discipline.md.hbs +18 -0
  18. package/profiles/default/CLAUDE.md.hbs +0 -19
  19. package/telegram-plugin/.claude-plugin/plugin.json +2 -2
  20. package/telegram-plugin/answer-stream-flag.ts +12 -49
  21. package/telegram-plugin/answer-stream.ts +5 -150
  22. package/telegram-plugin/auth-snapshot-format.ts +280 -48
  23. package/telegram-plugin/auto-fallback-fleet.ts +44 -1
  24. package/telegram-plugin/context-exhaustion.ts +12 -0
  25. package/telegram-plugin/demo-mask.ts +154 -0
  26. package/telegram-plugin/dist/bridge/bridge.js +167 -124
  27. package/telegram-plugin/dist/gateway/gateway.js +3039 -1159
  28. package/telegram-plugin/dist/server.js +215 -172
  29. package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
  30. package/telegram-plugin/draft-stream.ts +47 -410
  31. package/telegram-plugin/final-answer-detect.ts +17 -12
  32. package/telegram-plugin/fleet-fallback-resume.ts +131 -0
  33. package/telegram-plugin/format.ts +56 -19
  34. package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
  35. package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
  36. package/telegram-plugin/gateway/auth-command.ts +70 -14
  37. package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
  38. package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
  39. package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
  40. package/telegram-plugin/gateway/current-turn-map.ts +188 -0
  41. package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
  42. package/telegram-plugin/gateway/effort-command.ts +8 -3
  43. package/telegram-plugin/gateway/emission-authority.ts +369 -0
  44. package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
  45. package/telegram-plugin/gateway/gateway.ts +1837 -291
  46. package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
  47. package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
  48. package/telegram-plugin/gateway/represent-guard.ts +72 -0
  49. package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
  50. package/telegram-plugin/gateway/status-surface-log.ts +14 -3
  51. package/telegram-plugin/history.ts +33 -11
  52. package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
  53. package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
  54. package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
  55. package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
  56. package/telegram-plugin/issues-card.ts +4 -0
  57. package/telegram-plugin/model-unavailable.ts +124 -0
  58. package/telegram-plugin/narrative-dedup.ts +69 -0
  59. package/telegram-plugin/over-ping-safety-net.ts +70 -4
  60. package/telegram-plugin/package.json +3 -3
  61. package/telegram-plugin/pending-work-progress.ts +12 -0
  62. package/telegram-plugin/permission-rule.ts +32 -5
  63. package/telegram-plugin/permission-title.ts +152 -9
  64. package/telegram-plugin/quota-check.ts +13 -0
  65. package/telegram-plugin/quota-watch.ts +135 -7
  66. package/telegram-plugin/registry/turns-schema.test.ts +24 -0
  67. package/telegram-plugin/registry/turns-schema.ts +9 -0
  68. package/telegram-plugin/runtime-metrics.ts +13 -0
  69. package/telegram-plugin/session-tail.ts +96 -11
  70. package/telegram-plugin/silence-poke.ts +170 -24
  71. package/telegram-plugin/slot-banner-driver.ts +3 -0
  72. package/telegram-plugin/status-no-truncate.ts +44 -0
  73. package/telegram-plugin/status-reactions.ts +20 -3
  74. package/telegram-plugin/stream-controller.ts +4 -23
  75. package/telegram-plugin/stream-reply-handler.ts +6 -24
  76. package/telegram-plugin/streaming-metrics.ts +91 -0
  77. package/telegram-plugin/subagent-watcher.ts +212 -66
  78. package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
  79. package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
  80. package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
  81. package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
  82. package/telegram-plugin/tests/answer-stream.test.ts +2 -411
  83. package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
  84. package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
  85. package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
  86. package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
  87. package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
  88. package/telegram-plugin/tests/demo-mask.test.ts +127 -0
  89. package/telegram-plugin/tests/draft-stream.test.ts +0 -827
  90. package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
  91. package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
  92. package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
  93. package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
  94. package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
  95. package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
  96. package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
  97. package/telegram-plugin/tests/feed-survival.test.ts +526 -0
  98. package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
  99. package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
  100. package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
  101. package/telegram-plugin/tests/history.test.ts +60 -0
  102. package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
  103. package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
  104. package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
  105. package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
  106. package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
  107. package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
  108. package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
  109. package/telegram-plugin/tests/permission-rule.test.ts +17 -0
  110. package/telegram-plugin/tests/permission-title.test.ts +206 -17
  111. package/telegram-plugin/tests/quota-watch.test.ts +252 -9
  112. package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
  113. package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
  114. package/telegram-plugin/tests/represent-guard.test.ts +162 -0
  115. package/telegram-plugin/tests/session-tail.test.ts +147 -3
  116. package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
  117. package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
  118. package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
  119. package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
  120. package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
  121. package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
  122. package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
  123. package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
  124. package/telegram-plugin/tests/telegram-format.test.ts +101 -6
  125. package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
  126. package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
  127. package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
  128. package/telegram-plugin/tests/tool-labels.test.ts +67 -0
  129. package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
  130. package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
  131. package/telegram-plugin/tests/welcome-text.test.ts +32 -3
  132. package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
  133. package/telegram-plugin/tool-activity-summary.ts +375 -58
  134. package/telegram-plugin/turn-liveness-floor.ts +240 -0
  135. package/telegram-plugin/uat/assertions.ts +115 -0
  136. package/telegram-plugin/uat/driver.ts +68 -0
  137. package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
  138. package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
  139. package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
  140. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
  141. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
  142. package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
  143. package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
  144. package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
  145. package/telegram-plugin/welcome-text.ts +13 -1
  146. package/telegram-plugin/worker-activity-feed.ts +157 -82
  147. package/telegram-plugin/draft-transport.ts +0 -122
  148. package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
  149. package/telegram-plugin/tests/draft-transport.test.ts +0 -211
@@ -25,7 +25,29 @@
25
25
  * the decision says CLAIM the slot — caller sets `firstPingAt`.
26
26
  * - When the model requested silent, this module is a no-op.
27
27
  *
28
+ * Notification ownership (R8 / PR-2). The bare "first ping wins" rule
29
+ * above has a residual failure: an interim ACK that pings first claims
30
+ * the turn's single slot, and the later SUBSTANTIVE answer is then
31
+ * downgraded to silent — "the reply is last but the phone never buzzed
32
+ * for the answer." To fix that without re-introducing model double-pings,
33
+ * the decision is now aware of WHO holds the slot and WHO is asking:
34
+ *
35
+ * - A SUBSTANTIVE final asking to ping while the slot is held by a
36
+ * NON-substantive (ack) send ⇒ do NOT suppress; let the answer ping
37
+ * and UPGRADE the slot to substantive (the answer owns the ping even
38
+ * though the ack already buzzed once — a deliberate, bounded second
39
+ * ping so the user is notified of the actual answer).
40
+ * - An ACK asking to ping while the slot is held by a SUBSTANTIVE send
41
+ * ⇒ suppress (no spurious double-ping AFTER the real answer).
42
+ * - A SUBSTANTIVE asking while the slot is held by a SUBSTANTIVE ⇒
43
+ * suppress (preserves the #1674 model-double-ping guard: answer +
44
+ * wrap-up should be one beep, not two).
45
+ * - An ACK while the slot is held by an ACK ⇒ suppress (unchanged).
46
+ *
28
47
  * The slot is claimed BEFORE the actual send (caller responsibility).
48
+ * On a CLAIM or an UPGRADE the caller MUST set `firstPingAt` AND
49
+ * `firstPingWasSubstantive` ATOMICALLY (same synchronous block, no await
50
+ * between) so a racing second reply reads a consistent pair.
29
51
  * Trade-off documented inline in `gateway.ts:executeReply`.
30
52
  */
31
53
 
@@ -39,6 +61,18 @@ export interface OverPingDecisionInput {
39
61
  * has landed yet. Caller threads this through from
40
62
  * `CurrentTurn.firstPingAt`. */
41
63
  firstPingAt: number | null
64
+ /** True iff THIS reply is a substantive final answer (stream `done`,
65
+ * or text length ≥ FINAL_ANSWER_MIN_CHARS) — as opposed to a short
66
+ * interim ack. Caller computes via `isSubstantiveFinalReply`. Defaults
67
+ * to `false` (treat as a non-substantive ack) when omitted, which
68
+ * preserves the pre-PR-2 "first ping wins, the rest suppress" behaviour
69
+ * for callers that don't yet thread it. */
70
+ substantive?: boolean
71
+ /** True iff the send that CLAIMED the turn's ping slot was itself a
72
+ * substantive final answer. Caller threads this through from
73
+ * `CurrentTurn.firstPingWasSubstantive`. Meaningless (and ignored)
74
+ * when `firstPingAt == null`. Defaults to `false`. */
75
+ firstPingWasSubstantive?: boolean
42
76
  /** Deterministic clock for tests; defaults to Date.now() in callers. */
43
77
  nowMs: number
44
78
  }
@@ -49,8 +83,18 @@ export interface OverPingDecision {
49
83
  * violation by the model — caller should log + emit a metric. */
50
84
  suppress: boolean
51
85
  /** True iff the caller should claim the slot —
52
- * `turn.firstPingAt = nowMs`. Mutually exclusive with `suppress`. */
86
+ * `turn.firstPingAt = nowMs` AND
87
+ * `turn.firstPingWasSubstantive = substantive`. Mutually exclusive
88
+ * with `suppress`. Set both on a fresh claim (no prior ping) and on
89
+ * an UPGRADE (a substantive answer pinging over an ack's slot). */
53
90
  claimSlot: boolean
91
+ /** True iff this is an UPGRADE — a substantive final answer claiming
92
+ * the ping slot that was previously held by a NON-substantive ack.
93
+ * The answer pings even though the ack already buzzed once. Implied
94
+ * by `claimSlot && firstPingAt != null` but surfaced explicitly so
95
+ * the caller can log/meter the (intentional) second ping distinctly
96
+ * from a normal first claim. Always false on a suppress or a no-op. */
97
+ upgrade: boolean
54
98
  /** When `suppress` is true, how long the first ping has been
55
99
  * "active" (ms since `firstPingAt`). Caller surfaces this in the
56
100
  * log + metric for forensic analysis (e.g. tight rapid double-pings
@@ -63,18 +107,40 @@ export interface OverPingDecision {
63
107
  * No mutation, no IO, deterministic under a fixed `nowMs`.
64
108
  */
65
109
  export function decideOverPing(input: OverPingDecisionInput): OverPingDecision {
110
+ const substantive = input.substantive === true
111
+ const firstPingWasSubstantive = input.firstPingWasSubstantive === true
112
+
66
113
  if (!input.modelRequestedPing) {
67
114
  // Model already chose silent — nothing for the safety net to do.
68
- return { suppress: false, claimSlot: false, sinceFirstPingMs: null }
115
+ return { suppress: false, claimSlot: false, upgrade: false, sinceFirstPingMs: null }
69
116
  }
70
117
  if (input.firstPingAt != null) {
71
- // Slot already claimed by an earlier ping this turn suppress.
118
+ // The turn's ping slot is already held. WHO holds it and WHO is
119
+ // asking decides whether this is a notification-ownership UPGRADE or
120
+ // a double-ping to suppress (see the module doc-comment for the full
121
+ // matrix).
122
+ if (substantive && !firstPingWasSubstantive) {
123
+ // The substantive ANSWER is pinging over a slot held by an ack.
124
+ // Let it ping and upgrade the slot to substantive — the answer
125
+ // owns the turn's notification, not the earlier ack.
126
+ return {
127
+ suppress: false,
128
+ claimSlot: true,
129
+ upgrade: true,
130
+ sinceFirstPingMs: null,
131
+ }
132
+ }
133
+ // Every other slot-held case is a double-ping to suppress:
134
+ // - ack over substantive: a spurious wrap-up after the real answer
135
+ // - substantive over substantive: the #1674 answer+wrap-up guard
136
+ // - ack over ack: the original one-ping-per-turn behaviour
72
137
  return {
73
138
  suppress: true,
74
139
  claimSlot: false,
140
+ upgrade: false,
75
141
  sinceFirstPingMs: input.nowMs - input.firstPingAt,
76
142
  }
77
143
  }
78
144
  // First ping this turn — let it through and claim the slot.
79
- return { suppress: false, claimSlot: true, sinceFirstPingMs: null }
145
+ return { suppress: false, claimSlot: true, upgrade: false, sinceFirstPingMs: null }
80
146
  }
@@ -40,12 +40,12 @@
40
40
  },
41
41
  "repository": {
42
42
  "type": "git",
43
- "url": "https://github.com/mekenthompson/switchroom.git",
43
+ "url": "https://github.com/switchroom/switchroom.git",
44
44
  "directory": "telegram-plugin"
45
45
  },
46
- "homepage": "https://github.com/mekenthompson/switchroom/tree/main/telegram-plugin#readme",
46
+ "homepage": "https://github.com/switchroom/switchroom/tree/main/telegram-plugin#readme",
47
47
  "bugs": {
48
- "url": "https://github.com/mekenthompson/switchroom/issues"
48
+ "url": "https://github.com/switchroom/switchroom/issues"
49
49
  },
50
50
  "publishConfig": {
51
51
  "access": "public"
@@ -284,6 +284,18 @@ export function noteTurnEnd(key: string): void {
284
284
  }
285
285
  }
286
286
 
287
+ /**
288
+ * True when the current turn for `key` dispatched async background work
289
+ * (Agent / Task / Bash run_in_background:true) but the turn has not yet ended
290
+ * with a cleared pending flag. Used by the feed-survival predicate so the
291
+ * orphaned-reply backstop and silence-poke teardown are deferred while a
292
+ * detached background process is still running — even after inFlight empties
293
+ * when the near-instant tool_result (e.g. the Bash background handle) returns.
294
+ */
295
+ export function hasPendingAsyncDispatch(key: string): boolean {
296
+ return stateByKey.get(key)?.pending === true
297
+ }
298
+
287
299
  /**
288
300
  * Clear pending-progress for a chat — reasons:
289
301
  * 'inbound' — user sent a new message, they're re-engaged
@@ -91,7 +91,7 @@ export function resolveScopedAllowChoices(
91
91
 
92
92
  // ── File tools: this exact path vs any file.
93
93
  if (FILE_TOOLS.has(toolName)) {
94
- const path = filePathFrom(input);
94
+ const path = filePathFrom(input, inputPreview);
95
95
  const broad: ScopeOption = { rule: toolName, buttonLabel: "Any file", broad: true };
96
96
  if (path) {
97
97
  return {
@@ -163,9 +163,36 @@ function resolveSkillName(input: Record<string, unknown>): string | null {
163
163
  );
164
164
  }
165
165
 
166
- function filePathFrom(input: Record<string, unknown> | null): string | null {
167
- if (!input) return null;
168
- return readString(input, "file_path") ?? readString(input, "notebook_path");
166
+ function filePathFrom(
167
+ input: Record<string, unknown> | null,
168
+ rawPreview?: string,
169
+ ): string | null {
170
+ if (input) {
171
+ const p = readString(input, "file_path") ?? readString(input, "notebook_path");
172
+ if (p) return p;
173
+ }
174
+ // Claude Code truncates inputPreview to 200 chars, making the surrounding
175
+ // JSON invalid for Edit/Write (old_string/new_string push it past 200).
176
+ // "file_path" is the first key, so its value is intact in the truncated
177
+ // prefix — extract it with a lenient regex on the raw string.
178
+ if (rawPreview) return extractFilePathFromRaw(rawPreview);
179
+ return null;
180
+ }
181
+
182
+ /**
183
+ * Regex-based fallback to extract "file_path" or "notebook_path" from a raw
184
+ * (possibly truncated / invalid-JSON) inputPreview string. JSON-unescapes the
185
+ * captured value. Returns null when neither key is present or value is empty.
186
+ */
187
+ function extractFilePathFromRaw(raw: string): string | null {
188
+ const m = /"(?:file_path|notebook_path)"\s*:\s*"((?:[^"\\]|\\.)*)"/.exec(raw);
189
+ if (!m) return null;
190
+ try {
191
+ const value = JSON.parse(`"${m[1]}"`) as string;
192
+ return typeof value === "string" && value.length > 0 ? value : null;
193
+ } catch {
194
+ return null;
195
+ }
169
196
  }
170
197
 
171
198
  /**
@@ -274,7 +301,7 @@ export function matchesAllowRule(
274
301
  return bashFirstToken(cmd) === m[1];
275
302
  }
276
303
  if (FILE_TOOLS.has(ruleTool)) {
277
- return filePathFrom(input) === arg;
304
+ return filePathFrom(input, inputPreview) === arg;
278
305
  }
279
306
  return false;
280
307
  }
@@ -77,6 +77,21 @@ const INTERNAL_MCP_SERVERS = new Set([
77
77
  "switchroom-telegram",
78
78
  ]);
79
79
 
80
+ /**
81
+ * hostd fleet verbs that take a target agent `name` as a required arg. The
82
+ * approval card MUST name WHICH agent is targeted (#2469) — "restart an
83
+ * agent" with no name leaves the operator blind. We interpolate the target
84
+ * into the curated phrase: "restart an agent in the fleet" → "restart agent
85
+ * `carrie` in the fleet". Stays generic when `name` is absent (never crash).
86
+ */
87
+ const HOSTD_AGENT_TARGET_VERBS = new Set([
88
+ "mcp__hostd__agent_restart",
89
+ "mcp__hostd__agent_start",
90
+ "mcp__hostd__agent_stop",
91
+ "mcp__hostd__agent_logs",
92
+ "mcp__hostd__agent_exec",
93
+ ]);
94
+
80
95
  /**
81
96
  * Build the multi-line card body for an approval prompt.
82
97
  *
@@ -86,10 +101,23 @@ const INTERNAL_MCP_SERVERS = new Set([
86
101
  * Output is HTML-escaped for `parse_mode: 'HTML'`. The agent name is
87
102
  * capitalized for the sentence; dropped (with "wants to") when null —
88
103
  * the bridge client can be anonymous during early-boot edge cases.
104
+ *
105
+ * The `why:` line is the CALLER's stated rationale — the `reason`/`why`
106
+ * argument on the tool input, NOT the tool's static JSONSchema
107
+ * `description`. The schema description is documentation (it can contain
108
+ * literal tokens like `$SWITCHROOM_AGENT_NAME`), so surfacing it as the
109
+ * "why" reads like an un-interpolated variable and discards the agent's
110
+ * actual reason (#2469). We only fall back to "not provided" — never to
111
+ * the schema description.
89
112
  */
90
113
  export function formatPermissionCardBody(opts: {
91
114
  toolName: string;
92
115
  inputPreview: string | undefined;
116
+ /**
117
+ * The tool's static JSONSchema description. Retained for the signature
118
+ * (callers still pass it) but deliberately NOT used as the `why:` line —
119
+ * see #2469. The caller's rationale comes from the input args instead.
120
+ */
93
121
  description: string | undefined;
94
122
  agentName: string | null;
95
123
  }): string {
@@ -104,7 +132,10 @@ export function formatPermissionCardBody(opts: {
104
132
  lines.push(`🔐 ${escapeTgHtml(capFirst(action))}`);
105
133
  }
106
134
 
107
- const rawWhy = (opts.description ?? "").replace(/\s+/g, " ").trim();
135
+ // why: the caller-supplied rationale (`reason`/`why` arg), never the
136
+ // static schema description (#2469).
137
+ const callerReason = callerSuppliedReason(opts.inputPreview);
138
+ const rawWhy = (callerReason ?? "").replace(/\s+/g, " ").trim();
108
139
  const truncatedWhy =
109
140
  rawWhy.length > DESCRIPTION_LINE_MAX
110
141
  ? rawWhy.slice(0, DESCRIPTION_LINE_MAX - 1) + "…"
@@ -142,15 +173,15 @@ export function naturalAction(
142
173
  case "Edit":
143
174
  case "MultiEdit":
144
175
  case "NotebookEdit": {
145
- const f = fileBase(input);
176
+ const f = fileBase(input, inputPreview);
146
177
  return f ? `edit: ${f}` : "edit files";
147
178
  }
148
179
  case "Write": {
149
- const f = fileBase(input);
180
+ const f = fileBase(input, inputPreview);
150
181
  return f ? `write: ${f}` : "write files";
151
182
  }
152
183
  case "Read": {
153
- const f = fileBase(input);
184
+ const f = fileBase(input, inputPreview);
154
185
  return f ? `read: ${f}` : "read files";
155
186
  }
156
187
  case "Bash": {
@@ -194,7 +225,7 @@ function naturalMcpAction(
194
225
  const server = parts.length >= 2 ? parts[1]! : "";
195
226
  const curated = MCP_TOOL_DESCRIPTIONS[toolName];
196
227
  if (curated) {
197
- const phrase = lowerFirst(curated);
228
+ const phrase = hostdAgentPhrase(toolName, input) ?? lowerFirst(curated);
198
229
  return INTERNAL_MCP_SERVERS.has(server)
199
230
  ? phrase
200
231
  : `${phrase} (${prettyMcpServer(server)})`;
@@ -217,6 +248,37 @@ function naturalMcpAction(
217
248
  return `use ${toolName}`;
218
249
  }
219
250
 
251
+ /**
252
+ * For the hostd `agent_*` fleet verbs, build an action phrase that NAMES the
253
+ * target agent (#2469) — "restart agent `carrie` in the fleet". The verb is
254
+ * derived from the tool name (`agent_restart` → "restart"); `agent_logs` /
255
+ * `agent_exec` get bespoke phrasing. Returns null when the tool isn't a
256
+ * name-targeted hostd verb or no `name` arg is present, so the caller falls
257
+ * back to the generic curated phrase (never crashes on a missing name).
258
+ */
259
+ function hostdAgentPhrase(
260
+ toolName: string,
261
+ input: Record<string, unknown> | null,
262
+ ): string | null {
263
+ if (!HOSTD_AGENT_TARGET_VERBS.has(toolName)) return null;
264
+ const name = input ? readString(input, "name") : null;
265
+ if (!name) return null;
266
+ switch (toolName) {
267
+ case "mcp__hostd__agent_restart":
268
+ return `restart agent \`${name}\` in the fleet`;
269
+ case "mcp__hostd__agent_start":
270
+ return `start agent \`${name}\` in the fleet`;
271
+ case "mcp__hostd__agent_stop":
272
+ return `stop agent \`${name}\` in the fleet`;
273
+ case "mcp__hostd__agent_logs":
274
+ return `read agent \`${name}\`'s container logs`;
275
+ case "mcp__hostd__agent_exec":
276
+ return `run a read-only inspection inside agent \`${name}\``;
277
+ default:
278
+ return null;
279
+ }
280
+ }
281
+
220
282
  /**
221
283
  * For a REST-wrapper MCP call ({ path, body?, query? }), build the action
222
284
  * phrase "<VERB> <path> (<Server>)" — e.g. "POST /smtp/email (Brevo)". The
@@ -405,10 +467,43 @@ function resolveSkillName(input: Record<string, unknown>): string | null {
405
467
  );
406
468
  }
407
469
 
408
- function fileBase(input: Record<string, unknown> | null): string | null {
409
- if (!input) return null;
410
- const p = readString(input, "file_path") ?? readString(input, "notebook_path");
411
- return p ? basename(p) : null;
470
+ function fileBase(
471
+ input: Record<string, unknown> | null,
472
+ rawPreview?: string,
473
+ ): string | null {
474
+ if (input) {
475
+ const p = readString(input, "file_path") ?? readString(input, "notebook_path");
476
+ if (p) return basename(p);
477
+ }
478
+ // Claude Code truncates inputPreview to 200 chars, making the surrounding
479
+ // JSON invalid (Edit/Write always exceed 200 chars once old_string/new_string
480
+ // are included). "file_path" is the first key, so its value is intact in the
481
+ // truncated prefix — extract it with a lenient regex on the raw string.
482
+ if (rawPreview) {
483
+ const p = extractFilePathFromRaw(rawPreview);
484
+ if (p) return basename(p);
485
+ }
486
+ return null;
487
+ }
488
+
489
+ /**
490
+ * Regex-based fallback to extract "file_path" or "notebook_path" from a raw
491
+ * (possibly truncated / invalid-JSON) inputPreview string. JSON-unescapes the
492
+ * captured value so paths with backslashes or unicode escapes are returned
493
+ * correctly. Returns null when neither key is present or the captured value is
494
+ * empty.
495
+ */
496
+ function extractFilePathFromRaw(raw: string): string | null {
497
+ // Match the first occurrence of "file_path" or "notebook_path".
498
+ const m = /"(?:file_path|notebook_path)"\s*:\s*"((?:[^"\\]|\\.)*)"/.exec(raw);
499
+ if (!m) return null;
500
+ try {
501
+ // JSON.parse the quoted string literal so escape sequences are resolved.
502
+ const value = JSON.parse(`"${m[1]}"`) as string;
503
+ return typeof value === "string" && value.length > 0 ? value : null;
504
+ } catch {
505
+ return null;
506
+ }
412
507
  }
413
508
 
414
509
  function lowerFirst(text: string): string {
@@ -447,6 +542,54 @@ function readString(input: Record<string, unknown>, key: string): string | null
447
542
  return typeof value === "string" && value.length > 0 ? value : null;
448
543
  }
449
544
 
545
+ /**
546
+ * The caller's stated rationale for a tool call — the `reason` (or `why`)
547
+ * argument it passed. This is the agent's actual justification, which is
548
+ * what belongs on the `why:` line of the approval card. Returns null when
549
+ * no reason was supplied (caller renders "not provided") — we never fall
550
+ * back to the tool's static schema description (#2469).
551
+ */
552
+ function callerSuppliedReason(inputPreview: string | undefined): string | null {
553
+ const input = parseInput(inputPreview);
554
+ if (input) {
555
+ const fromJson = readString(input, "reason") ?? readString(input, "why");
556
+ if (fromJson) return fromJson;
557
+ }
558
+ // Truncation fallback (#2580 follow-up): upstream Claude Code truncates
559
+ // `inputPreview` to ~200 chars. For a tool whose first/largest key is a
560
+ // big blob (e.g. config_propose_edit's `unified_diff`), the truncated JSON
561
+ // is unparseable and the schema-required `reason` is lost — the card then
562
+ // renders "why: not provided" even though a reason WAS supplied. Mirror the
563
+ // `extractFilePathFromRaw` lenient-regex fallback so a `reason`/`why` value
564
+ // surviving in the truncated prefix is still recovered. (Reordering the
565
+ // schema so `reason` precedes the blob keeps it inside the 200-char prefix;
566
+ // this regex is what then reads it back out.)
567
+ if (inputPreview) {
568
+ const r = extractReasonFromRaw(inputPreview);
569
+ if (r) return r;
570
+ }
571
+ return null;
572
+ }
573
+
574
+ /**
575
+ * Regex-based fallback to extract a `reason` or `why` value from a raw
576
+ * (possibly truncated / invalid-JSON) inputPreview string. Mirrors
577
+ * `extractFilePathFromRaw`: JSON-unescapes the captured value so a reason
578
+ * with quotes/backslashes/unicode escapes is returned correctly. Returns
579
+ * null when neither key is present or the captured value is empty/whitespace.
580
+ */
581
+ export function extractReasonFromRaw(raw: string): string | null {
582
+ // Match the first occurrence of "reason" or "why".
583
+ const m = /"(?:reason|why)"\s*:\s*"((?:[^"\\]|\\.)*)"/.exec(raw);
584
+ if (!m) return null;
585
+ try {
586
+ const value = JSON.parse(`"${m[1]}"`) as string;
587
+ return typeof value === "string" && value.trim().length > 0 ? value : null;
588
+ } catch {
589
+ return null;
590
+ }
591
+ }
592
+
450
593
  function skillBasenameFromPath(input: Record<string, unknown>): string | null {
451
594
  const path = readString(input, "path") ?? readString(input, "skill_path");
452
595
  if (!path) return null;
@@ -54,6 +54,15 @@ export type QuotaUtilization = {
54
54
  representativeClaim: string | null;
55
55
  overageStatus: string | null;
56
56
  overageDisabledReason: string | null;
57
+ /**
58
+ * #2494 Bug C — header-presence markers. Mirror of the field in
59
+ * `src/auth/quota.ts` (kept in sync across the bundle boundary). The
60
+ * utilization fields are always numeric (a missing header coalesces to 0),
61
+ * so on their own they cannot tell a genuine 0% from a filled-0 thin probe.
62
+ * Optional → unset means "real probe" (legacy snapshots / fixtures).
63
+ */
64
+ fiveHourUtilPresent?: boolean;
65
+ sevenDayUtilPresent?: boolean;
57
66
  };
58
67
 
59
68
  export type QuotaResult =
@@ -120,8 +129,12 @@ export function parseQuotaHeaders(headers: Headers): QuotaResult {
120
129
  return {
121
130
  ok: true,
122
131
  data: {
132
+ // #2494 Bug C — coalesce missing window to 0 for back-compat but record
133
+ // which windows were actually present (both-absent returned ok:false).
123
134
  fiveHourUtilizationPct: (fiveHour ?? 0) * 100,
124
135
  sevenDayUtilizationPct: (sevenDay ?? 0) * 100,
136
+ fiveHourUtilPresent: fiveHour != null,
137
+ sevenDayUtilPresent: sevenDay != null,
125
138
  fiveHourResetAt: parseEpochHeader(headers, "anthropic-ratelimit-unified-5h-reset"),
126
139
  sevenDayResetAt: parseEpochHeader(headers, "anthropic-ratelimit-unified-7d-reset"),
127
140
  representativeClaim: headers.get("anthropic-ratelimit-unified-representative-claim"),
@@ -30,6 +30,13 @@
30
30
  * IPC call (cheap). `probeQuota` is only called on state-change (when
31
31
  * we're going to send a message anyway) to get fresh numbers for the
32
32
  * notification body. On no-change polls, only `listState` is called.
33
+ *
34
+ * #2495 Change 3 — the transition-to-alarm probe is `forceLive` (bypasses
35
+ * the broker's probe-on-open TTL), so the DECISION to alarm is corroborated
36
+ * by a TRUE live probe of the affected account, not a possibly-stale cache
37
+ * read. The re-evaluation with fresh numbers can suppress an alarm whose
38
+ * stale-snapshot transition no longer holds. Steady state stays cheap: a
39
+ * no-change poll never probes. Cost is one live probe per transition edge.
33
40
  */
34
41
 
35
42
  import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs";
@@ -175,6 +182,51 @@ export type QuotaWatchDecision =
175
182
  }
176
183
  | { kind: "skip"; accountLabel: string; reason: string };
177
184
 
185
+ /**
186
+ * #2495 BLOCKER fix — the corroboration probe result, as the gateway's
187
+ * runQuotaWatch sees it from `brokerClient.probeQuota(..., forceLive=true)`.
188
+ * Structurally a subset of `ProbeQuotaEntry` (src/auth/broker/client.ts): a
189
+ * `result` discriminated on `ok`, plus a `served` tag the broker stamps to
190
+ * say HOW the result was sourced.
191
+ *
192
+ * The trap this guards: under `forceLive`, when the upstream live probe FAILS
193
+ * and the broker holds a prior snapshot, it returns `cachedSnapshotToResult`
194
+ * — `result.ok === true` but `served === "cache"` (server.ts opProbeQuota).
195
+ * A naive `result.ok` check then treats that stale cache read as a live
196
+ * corroboration, fires the alarm, and stamps the false "Live-probe
197
+ * corroborated (#2495)" footnote. The acceptance criterion is the opposite:
198
+ * an alarm must be backed by a LIVE probe, not a stale cache read.
199
+ */
200
+ export type CorroborationProbe = {
201
+ result: { ok: true } | { ok: false };
202
+ /**
203
+ * How the result was sourced. `"live"` = fresh upstream probe (genuine
204
+ * corroboration). `"cache"` = served from the durable cache (TTL-hit or
205
+ * probe-failure fallback) — NOT corroboration. Absent on legacy responses,
206
+ * which we treat as NOT corroborated (fail-closed: never claim a live
207
+ * corroboration we can't prove).
208
+ */
209
+ served?: "live" | "cache";
210
+ };
211
+
212
+ /**
213
+ * #2495 BLOCKER fix — decide whether a forceLive corroboration probe counts
214
+ * as a genuine LIVE corroboration of the alarm.
215
+ *
216
+ * Genuine corroboration requires BOTH `result.ok` AND `served === "live"`.
217
+ * A result that is `ok:true` but `served:"cache"` (the failed-probe
218
+ * cache-fallback) is treated EXACTLY like a probe failure: it is NOT
219
+ * corroboration, so the caller must DEFER — leave watch state untouched and
220
+ * re-evaluate next tick when a true live probe can be obtained. A missing
221
+ * entry (`undefined`) is likewise not corroboration.
222
+ *
223
+ * Pure + total so it can be unit-tested at the seam without standing up the
224
+ * broker or the gateway loop.
225
+ */
226
+ export function isLiveCorroboration(entry: CorroborationProbe | undefined): boolean {
227
+ return entry?.result.ok === true && entry.served === "live";
228
+ }
229
+
178
230
  /**
179
231
  * Evaluate one account's quota state against its last-notified health.
180
232
  *
@@ -224,7 +276,11 @@ export function evaluateQuotaWatchAccount(args: {
224
276
  return { kind: "skip", accountLabel: label, reason: "stale-snapshot" };
225
277
  }
226
278
 
227
- const currentHealth = classifyHealth(snap);
279
+ // #2494 Bug A — classify against THIS tick's clock so the refill
280
+ // normalization uses the same `now` the rest of the decision does (the
281
+ // default `new Date()` would diverge from a frozen test clock / a replayed
282
+ // tick and mis-zero a still-future reset window).
283
+ const currentHealth = classifyHealth(snap, new Date(now));
228
284
 
229
285
  // Unknown (probe failed) or blocked — skip entirely.
230
286
  if (currentHealth === "unknown" || currentHealth === "blocked") {
@@ -324,22 +380,58 @@ export type FleetAllExhaustedDecision =
324
380
  * cases the trigger-based interactive all-blocked card misses: a quiet period
325
381
  * (no agent happens to 429 into the wall) and the consumer/cron paths.
326
382
  *
327
- * Authoritative source: the broker's per-account `exhausted` flag (set by
328
- * mark-exhausted via failover + the consumer sensor), NOT probe-derived health
329
- * — so there is no probe-failure false-alarm. Requires at least one account;
330
- * an empty fleet never alerts.
383
+ * Source: the broker's per-account `exhausted` flag (set by mark-exhausted via
384
+ * failover + the consumer sensor). That flag is NOT purely live — `isAccountBlocked`
385
+ * (src/auth/broker/account-eligibility.ts) falls back to the persisted
386
+ * `exhausted_until` mark whenever there is no fresh live snapshot. During a
387
+ * broker-unreachable / probe-timeout blackout, short-lived auto-fallback marks
388
+ * can make `every(a.exhausted)` momentarily true with ZERO live corroboration
389
+ * (#2478, klanker 2026-06-20). So the `entered` alert requires POSITIVE LIVE
390
+ * CORROBORATION: an account counts toward "all exhausted" only when its
391
+ * `exhausted` flag is backed by a FRESH live snapshot (last_quota.capturedAt
392
+ * within `maxStaleMs`). If ANY account's exhaustion rests solely on a
393
+ * stale/absent-probe mark we are
394
+ * probe-blind and return `skip: "probe-blind"` — no false fleet alert. The
395
+ * guarantee is "no false alarm off stale marks during a probe blackout", NOT
396
+ * blanket probe-failure immunity. The `recovered` transition is unguarded so a
397
+ * legitimately-fired alert is never stranded. Requires at least one account; an
398
+ * empty fleet never alerts.
331
399
  */
332
400
  export function evaluateFleetAllExhausted(args: {
333
- accounts: Array<{ label: string; exhausted: boolean; exhausted_until?: number }>;
401
+ accounts: Array<{
402
+ label: string;
403
+ exhausted: boolean;
404
+ exhausted_until?: number;
405
+ /** Most-recent live probe snapshot, used to corroborate `exhausted`. */
406
+ last_quota?: {
407
+ capturedAt: number;
408
+ overageDisabledReason?: string | null;
409
+ } | null;
410
+ }>;
334
411
  prev: QuotaWatchAccountState;
335
412
  now: number;
413
+ /** Staleness ceiling for "fresh probe"; 0 disables the gate (legacy callers/tests). */
414
+ tuning?: Pick<QuotaWatchTuning, "maxStaleMs">;
336
415
  }): FleetAllExhaustedDecision {
337
416
  const { accounts, prev, now } = args;
417
+ const maxStaleMs = args.tuning?.maxStaleMs ?? 0;
338
418
  const allExhausted = accounts.length > 0 && accounts.every((a) => a.exhausted);
339
419
  // "throttling" doubles as the "currently alerting all-exhausted" marker.
340
420
  const wasAlerting = prev.lastNotifiedHealth === "throttling";
341
421
 
342
422
  if (allExhausted && !wasAlerting) {
423
+ // Probe-blind guard (#2478): only fire `entered` if EVERY account's
424
+ // exhaustion is backed by live evidence — a fresh snapshot. An account
425
+ // exhausted solely on a stale/absent mark means we have no live
426
+ // corroboration → skip rather than false-alarm.
427
+ if (maxStaleMs > 0) {
428
+ const allLiveCorroborated = accounts.every((a) =>
429
+ exhaustionLiveCorroborated(a, now, maxStaleMs),
430
+ );
431
+ if (!allLiveCorroborated) {
432
+ return { kind: "skip", reason: "probe-blind" };
433
+ }
434
+ }
343
435
  return {
344
436
  kind: "notify",
345
437
  message: buildAllExhaustedMessage(accounts, now),
@@ -358,6 +450,42 @@ export function evaluateFleetAllExhausted(args: {
358
450
  return { kind: "skip", reason: allExhausted ? "still-all-exhausted" : "not-all-exhausted" };
359
451
  }
360
452
 
453
+ /**
454
+ * Is an account's `exhausted` flag backed by live evidence (#2478)?
455
+ *
456
+ * True when the most-recent live probe is FRESH (`capturedAt` within
457
+ * `maxStaleMs`) — that fresh probe is what set/upholds the broker's blocked
458
+ * verdict. False when there is no `last_quota` at all, or the snapshot is
459
+ * stale: the `exhausted` flag then rests solely on a persisted mark with no
460
+ * live backing, which is exactly the probe-blind condition that false-fires
461
+ * the fleet alert.
462
+ *
463
+ * NOTE: `out_of_credits` is NOT treated as corroboration here. Per
464
+ * fix/out-of-credits-serve-block, out_of_credits is INFORMATIONAL — it is
465
+ * not exhaustion in its own right at any util. Corroboration requires a
466
+ * genuinely fresh quota snapshot (real 429 / util-wall path).
467
+ *
468
+ * Mirrors `snapshotFresh` in src/auth/broker/account-eligibility.ts (the
469
+ * serving-side authority); kept as a local check so the decision layer
470
+ * carries no broker dependency.
471
+ */
472
+ function exhaustionLiveCorroborated(
473
+ account: {
474
+ last_quota?: { capturedAt: number; overageDisabledReason?: string | null } | null;
475
+ },
476
+ now: number,
477
+ maxStaleMs: number,
478
+ ): boolean {
479
+ const lq = account.last_quota;
480
+ if (!lq) return false;
481
+ // Mirror `snapshotFresh`'s clock-skew guard: a future-dated `capturedAt`
482
+ // makes `now - capturedAt` negative and would slip past the staleness gate,
483
+ // so a skewed snapshot reads as fresh. Reject snapshots dated more than the
484
+ // broker's 60_000 ms tolerance ahead of `now` (matches the inline literal in
485
+ // `snapshotFresh`, src/auth/broker/account-eligibility.ts).
486
+ return now - lq.capturedAt <= maxStaleMs && lq.capturedAt <= now + 60_000;
487
+ }
488
+
361
489
  function buildAllExhaustedMessage(
362
490
  accounts: Array<{ label: string; exhausted_until?: number }>,
363
491
  now: number,
@@ -420,7 +548,7 @@ function buildThrottlingMessage(agentName: string, snap: AccountSnapshot): strin
420
548
  `Binding window: ${winLabel}${resetStr}`,
421
549
  `${activeNote}${altNote}`,
422
550
  ``,
423
- `<i>Threshold: ${THROTTLING_THRESHOLD_PCT}% on either window. Source: broker quota cache.</i>`,
551
+ `<i>Threshold: ${THROTTLING_THRESHOLD_PCT}% on either window. Live-probe corroborated (#2495).</i>`,
424
552
  `<i>Run /auth for full fleet status or /usage for the active account.</i>`,
425
553
  ]
426
554
  .join("\n")