npm - switchroom - Versions diffs - 0.15.44 → 0.16.4 - Mend

switchroom 0.15.44 → 0.16.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (150) hide show

package/dist/agent-scheduler/index.js +122 -88
package/dist/auth-broker/index.js +463 -177
package/dist/cli/autoaccept-poll.js +4842 -35
package/dist/cli/drive-write-pretool.mjs +17 -14
package/dist/cli/notion-write-pretool.mjs +117 -86
package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
package/dist/cli/self-improve-stop.mjs +428 -0
package/dist/cli/skill-validate-pretool.mjs +72 -72
package/dist/cli/switchroom.js +3249 -1241
package/dist/cli/ui/index.html +1 -1
package/dist/host-control/main.js +2833 -355
package/dist/vault/approvals/kernel-server.js +7482 -7439
package/dist/vault/broker/server.js +11315 -11272
package/examples/minimal.yaml +1 -0
package/examples/switchroom.yaml +1 -0
package/package.json +3 -3
package/profiles/_base/start.sh.hbs +88 -1
package/profiles/_shared/execution-discipline.md.hbs +18 -0
package/profiles/default/CLAUDE.md.hbs +3 -22
package/telegram-plugin/.claude-plugin/plugin.json +2 -2
package/telegram-plugin/answer-stream-flag.ts +12 -49
package/telegram-plugin/answer-stream.ts +5 -150
package/telegram-plugin/auth-snapshot-format.ts +280 -48
package/telegram-plugin/auto-fallback-fleet.ts +44 -1
package/telegram-plugin/context-exhaustion.ts +12 -0
package/telegram-plugin/demo-mask.ts +154 -0
package/telegram-plugin/dist/bridge/bridge.js +167 -124
package/telegram-plugin/dist/gateway/gateway.js +3039 -1159
package/telegram-plugin/dist/server.js +215 -172
package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
package/telegram-plugin/draft-stream.ts +47 -410
package/telegram-plugin/final-answer-detect.ts +17 -12
package/telegram-plugin/fleet-fallback-resume.ts +131 -0
package/telegram-plugin/format.ts +56 -19
package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
package/telegram-plugin/gateway/auth-command.ts +70 -14
package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
package/telegram-plugin/gateway/current-turn-map.ts +188 -0
package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
package/telegram-plugin/gateway/effort-command.ts +8 -3
package/telegram-plugin/gateway/emission-authority.ts +369 -0
package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
package/telegram-plugin/gateway/gateway.ts +1837 -291
package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
package/telegram-plugin/gateway/represent-guard.ts +72 -0
package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
package/telegram-plugin/gateway/status-surface-log.ts +14 -3
package/telegram-plugin/history.ts +33 -11
package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
package/telegram-plugin/issues-card.ts +4 -0
package/telegram-plugin/model-unavailable.ts +124 -0
package/telegram-plugin/narrative-dedup.ts +69 -0
package/telegram-plugin/over-ping-safety-net.ts +70 -4
package/telegram-plugin/package.json +3 -3
package/telegram-plugin/pending-work-progress.ts +12 -0
package/telegram-plugin/permission-rule.ts +32 -5
package/telegram-plugin/permission-title.ts +152 -9
package/telegram-plugin/quota-check.ts +13 -0
package/telegram-plugin/quota-watch.ts +135 -7
package/telegram-plugin/registry/turns-schema.test.ts +24 -0
package/telegram-plugin/registry/turns-schema.ts +9 -0
package/telegram-plugin/runtime-metrics.ts +13 -0
package/telegram-plugin/session-tail.ts +96 -11
package/telegram-plugin/silence-poke.ts +170 -24
package/telegram-plugin/slot-banner-driver.ts +3 -0
package/telegram-plugin/status-no-truncate.ts +44 -0
package/telegram-plugin/status-reactions.ts +20 -3
package/telegram-plugin/stream-controller.ts +4 -23
package/telegram-plugin/stream-reply-handler.ts +6 -24
package/telegram-plugin/streaming-metrics.ts +91 -0
package/telegram-plugin/subagent-watcher.ts +212 -66
package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
package/telegram-plugin/tests/answer-stream.test.ts +2 -411
package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
package/telegram-plugin/tests/demo-mask.test.ts +127 -0
package/telegram-plugin/tests/draft-stream.test.ts +0 -827
package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
package/telegram-plugin/tests/feed-survival.test.ts +526 -0
package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
package/telegram-plugin/tests/history.test.ts +60 -0
package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
package/telegram-plugin/tests/permission-rule.test.ts +17 -0
package/telegram-plugin/tests/permission-title.test.ts +206 -17
package/telegram-plugin/tests/quota-watch.test.ts +252 -9
package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
package/telegram-plugin/tests/represent-guard.test.ts +162 -0
package/telegram-plugin/tests/session-tail.test.ts +147 -3
package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
package/telegram-plugin/tests/telegram-format.test.ts +101 -6
package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
package/telegram-plugin/tests/tool-labels.test.ts +67 -0
package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
package/telegram-plugin/tests/welcome-text.test.ts +32 -3
package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
package/telegram-plugin/tool-activity-summary.ts +375 -58
package/telegram-plugin/turn-liveness-floor.ts +240 -0
package/telegram-plugin/uat/assertions.ts +115 -0
package/telegram-plugin/uat/driver.ts +68 -0
package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
package/telegram-plugin/welcome-text.ts +13 -1
package/telegram-plugin/worker-activity-feed.ts +157 -82
package/telegram-plugin/draft-transport.ts +0 -122
package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
package/telegram-plugin/tests/draft-transport.test.ts +0 -211

package/telegram-plugin/over-ping-safety-net.ts CHANGED Viewed

@@ -25,7 +25,29 @@
  *     the decision says CLAIM the slot — caller sets `firstPingAt`.
  *   - When the model requested silent, this module is a no-op.
  *
+ * Notification ownership (R8 / PR-2). The bare "first ping wins" rule
+ * above has a residual failure: an interim ACK that pings first claims
+ * the turn's single slot, and the later SUBSTANTIVE answer is then
+ * downgraded to silent — "the reply is last but the phone never buzzed
+ * for the answer." To fix that without re-introducing model double-pings,
+ * the decision is now aware of WHO holds the slot and WHO is asking:
+ *
+ *   - A SUBSTANTIVE final asking to ping while the slot is held by a
+ *     NON-substantive (ack) send ⇒ do NOT suppress; let the answer ping
+ *     and UPGRADE the slot to substantive (the answer owns the ping even
+ *     though the ack already buzzed once — a deliberate, bounded second
+ *     ping so the user is notified of the actual answer).
+ *   - An ACK asking to ping while the slot is held by a SUBSTANTIVE send
+ *     ⇒ suppress (no spurious double-ping AFTER the real answer).
+ *   - A SUBSTANTIVE asking while the slot is held by a SUBSTANTIVE ⇒
+ *     suppress (preserves the #1674 model-double-ping guard: answer +
+ *     wrap-up should be one beep, not two).
+ *   - An ACK while the slot is held by an ACK ⇒ suppress (unchanged).
+ *
  * The slot is claimed BEFORE the actual send (caller responsibility).
+ * On a CLAIM or an UPGRADE the caller MUST set `firstPingAt` AND
+ * `firstPingWasSubstantive` ATOMICALLY (same synchronous block, no await
+ * between) so a racing second reply reads a consistent pair.
  * Trade-off documented inline in `gateway.ts:executeReply`.
  */
@@ -39,6 +61,18 @@ export interface OverPingDecisionInput {
    *  has landed yet. Caller threads this through from
    *  `CurrentTurn.firstPingAt`. */
   firstPingAt: number | null
+  /** True iff THIS reply is a substantive final answer (stream `done`,
+   *  or text length ≥ FINAL_ANSWER_MIN_CHARS) — as opposed to a short
+   *  interim ack. Caller computes via `isSubstantiveFinalReply`. Defaults
+   *  to `false` (treat as a non-substantive ack) when omitted, which
+   *  preserves the pre-PR-2 "first ping wins, the rest suppress" behaviour
+   *  for callers that don't yet thread it. */
+  substantive?: boolean
+  /** True iff the send that CLAIMED the turn's ping slot was itself a
+   *  substantive final answer. Caller threads this through from
+   *  `CurrentTurn.firstPingWasSubstantive`. Meaningless (and ignored)
+   *  when `firstPingAt == null`. Defaults to `false`. */
+  firstPingWasSubstantive?: boolean
   /** Deterministic clock for tests; defaults to Date.now() in callers. */
   nowMs: number
 }
@@ -49,8 +83,18 @@ export interface OverPingDecision {
    *  violation by the model — caller should log + emit a metric. */
   suppress: boolean
   /** True iff the caller should claim the slot —
-   *  `turn.firstPingAt = nowMs`. Mutually exclusive with `suppress`. */
+   *  `turn.firstPingAt = nowMs` AND
+   *  `turn.firstPingWasSubstantive = substantive`. Mutually exclusive
+   *  with `suppress`. Set both on a fresh claim (no prior ping) and on
+   *  an UPGRADE (a substantive answer pinging over an ack's slot). */
   claimSlot: boolean
+  /** True iff this is an UPGRADE — a substantive final answer claiming
+   *  the ping slot that was previously held by a NON-substantive ack.
+   *  The answer pings even though the ack already buzzed once. Implied
+   *  by `claimSlot && firstPingAt != null` but surfaced explicitly so
+   *  the caller can log/meter the (intentional) second ping distinctly
+   *  from a normal first claim. Always false on a suppress or a no-op. */
+  upgrade: boolean
   /** When `suppress` is true, how long the first ping has been
    *  "active" (ms since `firstPingAt`). Caller surfaces this in the
    *  log + metric for forensic analysis (e.g. tight rapid double-pings
@@ -63,18 +107,40 @@ export interface OverPingDecision {
  * No mutation, no IO, deterministic under a fixed `nowMs`.
  */
 export function decideOverPing(input: OverPingDecisionInput): OverPingDecision {
+  const substantive = input.substantive === true
+  const firstPingWasSubstantive = input.firstPingWasSubstantive === true
   if (!input.modelRequestedPing) {
     // Model already chose silent — nothing for the safety net to do.
-    return { suppress: false, claimSlot: false, sinceFirstPingMs: null }
+    return { suppress: false, claimSlot: false, upgrade: false, sinceFirstPingMs: null }
   }
   if (input.firstPingAt != null) {
-    // Slot already claimed by an earlier ping this turn — suppress.
+    // The turn's ping slot is already held. WHO holds it and WHO is
+    // asking decides whether this is a notification-ownership UPGRADE or
+    // a double-ping to suppress (see the module doc-comment for the full
+    // matrix).
+    if (substantive && !firstPingWasSubstantive) {
+      // The substantive ANSWER is pinging over a slot held by an ack.
+      // Let it ping and upgrade the slot to substantive — the answer
+      // owns the turn's notification, not the earlier ack.
+      return {
+        suppress: false,
+        claimSlot: true,
+        upgrade: true,
+        sinceFirstPingMs: null,
+      }
+    }
+    // Every other slot-held case is a double-ping to suppress:
+    //   - ack over substantive: a spurious wrap-up after the real answer
+    //   - substantive over substantive: the #1674 answer+wrap-up guard
+    //   - ack over ack: the original one-ping-per-turn behaviour
     return {
       suppress: true,
       claimSlot: false,
+      upgrade: false,
       sinceFirstPingMs: input.nowMs - input.firstPingAt,
     }
   }
   // First ping this turn — let it through and claim the slot.
-  return { suppress: false, claimSlot: true, sinceFirstPingMs: null }
+  return { suppress: false, claimSlot: true, upgrade: false, sinceFirstPingMs: null }
 }

package/telegram-plugin/package.json CHANGED Viewed

@@ -40,12 +40,12 @@
   },
   "repository": {
     "type": "git",
-    "url": "https://github.com/mekenthompson/switchroom.git",
+    "url": "https://github.com/switchroom/switchroom.git",
     "directory": "telegram-plugin"
   },
-  "homepage": "https://github.com/mekenthompson/switchroom/tree/main/telegram-plugin#readme",
+  "homepage": "https://github.com/switchroom/switchroom/tree/main/telegram-plugin#readme",
   "bugs": {
-    "url": "https://github.com/mekenthompson/switchroom/issues"
+    "url": "https://github.com/switchroom/switchroom/issues"
   },
   "publishConfig": {
     "access": "public"

package/telegram-plugin/pending-work-progress.ts CHANGED Viewed

@@ -284,6 +284,18 @@ export function noteTurnEnd(key: string): void {
   }
 }
+/**
+ * True when the current turn for `key` dispatched async background work
+ * (Agent / Task / Bash run_in_background:true) but the turn has not yet ended
+ * with a cleared pending flag.  Used by the feed-survival predicate so the
+ * orphaned-reply backstop and silence-poke teardown are deferred while a
+ * detached background process is still running — even after inFlight empties
+ * when the near-instant tool_result (e.g. the Bash background handle) returns.
+ */
+export function hasPendingAsyncDispatch(key: string): boolean {
+  return stateByKey.get(key)?.pending === true
+}
 /**
  * Clear pending-progress for a chat — reasons:
  *   'inbound'   — user sent a new message, they're re-engaged

package/telegram-plugin/permission-rule.ts CHANGED Viewed

@@ -91,7 +91,7 @@ export function resolveScopedAllowChoices(
   // ── File tools: this exact path vs any file.
   if (FILE_TOOLS.has(toolName)) {
-    const path = filePathFrom(input);
+    const path = filePathFrom(input, inputPreview);
     const broad: ScopeOption = { rule: toolName, buttonLabel: "Any file", broad: true };
     if (path) {
       return {
@@ -163,9 +163,36 @@ function resolveSkillName(input: Record<string, unknown>): string | null {
   );
 }
-function filePathFrom(input: Record<string, unknown> | null): string | null {
-  if (!input) return null;
-  return readString(input, "file_path") ?? readString(input, "notebook_path");
+function filePathFrom(
+  input: Record<string, unknown> | null,
+  rawPreview?: string,
+): string | null {
+  if (input) {
+    const p = readString(input, "file_path") ?? readString(input, "notebook_path");
+    if (p) return p;
+  }
+  // Claude Code truncates inputPreview to 200 chars, making the surrounding
+  // JSON invalid for Edit/Write (old_string/new_string push it past 200).
+  // "file_path" is the first key, so its value is intact in the truncated
+  // prefix — extract it with a lenient regex on the raw string.
+  if (rawPreview) return extractFilePathFromRaw(rawPreview);
+  return null;
+}
+/**
+ * Regex-based fallback to extract "file_path" or "notebook_path" from a raw
+ * (possibly truncated / invalid-JSON) inputPreview string. JSON-unescapes the
+ * captured value. Returns null when neither key is present or value is empty.
+ */
+function extractFilePathFromRaw(raw: string): string | null {
+  const m = /"(?:file_path|notebook_path)"\s*:\s*"((?:[^"\\]|\\.)*)"/.exec(raw);
+  if (!m) return null;
+  try {
+    const value = JSON.parse(`"${m[1]}"`) as string;
+    return typeof value === "string" && value.length > 0 ? value : null;
+  } catch {
+    return null;
+  }
 }
 /**
@@ -274,7 +301,7 @@ export function matchesAllowRule(
       return bashFirstToken(cmd) === m[1];
     }
     if (FILE_TOOLS.has(ruleTool)) {
-      return filePathFrom(input) === arg;
+      return filePathFrom(input, inputPreview) === arg;
     }
     return false;
   }

package/telegram-plugin/permission-title.ts CHANGED Viewed

@@ -77,6 +77,21 @@ const INTERNAL_MCP_SERVERS = new Set([
   "switchroom-telegram",
 ]);
+/**
+ * hostd fleet verbs that take a target agent `name` as a required arg. The
+ * approval card MUST name WHICH agent is targeted (#2469) — "restart an
+ * agent" with no name leaves the operator blind. We interpolate the target
+ * into the curated phrase: "restart an agent in the fleet" → "restart agent
+ * `carrie` in the fleet". Stays generic when `name` is absent (never crash).
+ */
+const HOSTD_AGENT_TARGET_VERBS = new Set([
+  "mcp__hostd__agent_restart",
+  "mcp__hostd__agent_start",
+  "mcp__hostd__agent_stop",
+  "mcp__hostd__agent_logs",
+  "mcp__hostd__agent_exec",
+]);
 /**
  * Build the multi-line card body for an approval prompt.
  *
@@ -86,10 +101,23 @@ const INTERNAL_MCP_SERVERS = new Set([
  * Output is HTML-escaped for `parse_mode: 'HTML'`. The agent name is
  * capitalized for the sentence; dropped (with "wants to") when null —
  * the bridge client can be anonymous during early-boot edge cases.
+ *
+ * The `why:` line is the CALLER's stated rationale — the `reason`/`why`
+ * argument on the tool input, NOT the tool's static JSONSchema
+ * `description`. The schema description is documentation (it can contain
+ * literal tokens like `$SWITCHROOM_AGENT_NAME`), so surfacing it as the
+ * "why" reads like an un-interpolated variable and discards the agent's
+ * actual reason (#2469). We only fall back to "not provided" — never to
+ * the schema description.
  */
 export function formatPermissionCardBody(opts: {
   toolName: string;
   inputPreview: string | undefined;
+  /**
+   * The tool's static JSONSchema description. Retained for the signature
+   * (callers still pass it) but deliberately NOT used as the `why:` line —
+   * see #2469. The caller's rationale comes from the input args instead.
+   */
   description: string | undefined;
   agentName: string | null;
 }): string {
@@ -104,7 +132,10 @@ export function formatPermissionCardBody(opts: {
     lines.push(`🔐 ${escapeTgHtml(capFirst(action))}`);
   }
-  const rawWhy = (opts.description ?? "").replace(/\s+/g, " ").trim();
+  // why: the caller-supplied rationale (`reason`/`why` arg), never the
+  // static schema description (#2469).
+  const callerReason = callerSuppliedReason(opts.inputPreview);
+  const rawWhy = (callerReason ?? "").replace(/\s+/g, " ").trim();
   const truncatedWhy =
     rawWhy.length > DESCRIPTION_LINE_MAX
       ? rawWhy.slice(0, DESCRIPTION_LINE_MAX - 1) + "…"
@@ -142,15 +173,15 @@ export function naturalAction(
     case "Edit":
     case "MultiEdit":
     case "NotebookEdit": {
-      const f = fileBase(input);
+      const f = fileBase(input, inputPreview);
       return f ? `edit: ${f}` : "edit files";
     }
     case "Write": {
-      const f = fileBase(input);
+      const f = fileBase(input, inputPreview);
       return f ? `write: ${f}` : "write files";
     }
     case "Read": {
-      const f = fileBase(input);
+      const f = fileBase(input, inputPreview);
       return f ? `read: ${f}` : "read files";
     }
     case "Bash": {
@@ -194,7 +225,7 @@ function naturalMcpAction(
   const server = parts.length >= 2 ? parts[1]! : "";
   const curated = MCP_TOOL_DESCRIPTIONS[toolName];
   if (curated) {
-    const phrase = lowerFirst(curated);
+    const phrase = hostdAgentPhrase(toolName, input) ?? lowerFirst(curated);
     return INTERNAL_MCP_SERVERS.has(server)
       ? phrase
       : `${phrase} (${prettyMcpServer(server)})`;
@@ -217,6 +248,37 @@ function naturalMcpAction(
   return `use ${toolName}`;
 }
+/**
+ * For the hostd `agent_*` fleet verbs, build an action phrase that NAMES the
+ * target agent (#2469) — "restart agent `carrie` in the fleet". The verb is
+ * derived from the tool name (`agent_restart` → "restart"); `agent_logs` /
+ * `agent_exec` get bespoke phrasing. Returns null when the tool isn't a
+ * name-targeted hostd verb or no `name` arg is present, so the caller falls
+ * back to the generic curated phrase (never crashes on a missing name).
+ */
+function hostdAgentPhrase(
+  toolName: string,
+  input: Record<string, unknown> | null,
+): string | null {
+  if (!HOSTD_AGENT_TARGET_VERBS.has(toolName)) return null;
+  const name = input ? readString(input, "name") : null;
+  if (!name) return null;
+  switch (toolName) {
+    case "mcp__hostd__agent_restart":
+      return `restart agent \`${name}\` in the fleet`;
+    case "mcp__hostd__agent_start":
+      return `start agent \`${name}\` in the fleet`;
+    case "mcp__hostd__agent_stop":
+      return `stop agent \`${name}\` in the fleet`;
+    case "mcp__hostd__agent_logs":
+      return `read agent \`${name}\`'s container logs`;
+    case "mcp__hostd__agent_exec":
+      return `run a read-only inspection inside agent \`${name}\``;
+    default:
+      return null;
+  }
+}
 /**
  * For a REST-wrapper MCP call ({ path, body?, query? }), build the action
  * phrase "<VERB> <path> (<Server>)" — e.g. "POST /smtp/email (Brevo)". The
@@ -405,10 +467,43 @@ function resolveSkillName(input: Record<string, unknown>): string | null {
   );
 }
-function fileBase(input: Record<string, unknown> | null): string | null {
-  if (!input) return null;
-  const p = readString(input, "file_path") ?? readString(input, "notebook_path");
-  return p ? basename(p) : null;
+function fileBase(
+  input: Record<string, unknown> | null,
+  rawPreview?: string,
+): string | null {
+  if (input) {
+    const p = readString(input, "file_path") ?? readString(input, "notebook_path");
+    if (p) return basename(p);
+  }
+  // Claude Code truncates inputPreview to 200 chars, making the surrounding
+  // JSON invalid (Edit/Write always exceed 200 chars once old_string/new_string
+  // are included). "file_path" is the first key, so its value is intact in the
+  // truncated prefix — extract it with a lenient regex on the raw string.
+  if (rawPreview) {
+    const p = extractFilePathFromRaw(rawPreview);
+    if (p) return basename(p);
+  }
+  return null;
+}
+/**
+ * Regex-based fallback to extract "file_path" or "notebook_path" from a raw
+ * (possibly truncated / invalid-JSON) inputPreview string. JSON-unescapes the
+ * captured value so paths with backslashes or unicode escapes are returned
+ * correctly. Returns null when neither key is present or the captured value is
+ * empty.
+ */
+function extractFilePathFromRaw(raw: string): string | null {
+  // Match the first occurrence of "file_path" or "notebook_path".
+  const m = /"(?:file_path|notebook_path)"\s*:\s*"((?:[^"\\]|\\.)*)"/.exec(raw);
+  if (!m) return null;
+  try {
+    // JSON.parse the quoted string literal so escape sequences are resolved.
+    const value = JSON.parse(`"${m[1]}"`) as string;
+    return typeof value === "string" && value.length > 0 ? value : null;
+  } catch {
+    return null;
+  }
 }
 function lowerFirst(text: string): string {
@@ -447,6 +542,54 @@ function readString(input: Record<string, unknown>, key: string): string | null
   return typeof value === "string" && value.length > 0 ? value : null;
 }
+/**
+ * The caller's stated rationale for a tool call — the `reason` (or `why`)
+ * argument it passed. This is the agent's actual justification, which is
+ * what belongs on the `why:` line of the approval card. Returns null when
+ * no reason was supplied (caller renders "not provided") — we never fall
+ * back to the tool's static schema description (#2469).
+ */
+function callerSuppliedReason(inputPreview: string | undefined): string | null {
+  const input = parseInput(inputPreview);
+  if (input) {
+    const fromJson = readString(input, "reason") ?? readString(input, "why");
+    if (fromJson) return fromJson;
+  }
+  // Truncation fallback (#2580 follow-up): upstream Claude Code truncates
+  // `inputPreview` to ~200 chars. For a tool whose first/largest key is a
+  // big blob (e.g. config_propose_edit's `unified_diff`), the truncated JSON
+  // is unparseable and the schema-required `reason` is lost — the card then
+  // renders "why: not provided" even though a reason WAS supplied. Mirror the
+  // `extractFilePathFromRaw` lenient-regex fallback so a `reason`/`why` value
+  // surviving in the truncated prefix is still recovered. (Reordering the
+  // schema so `reason` precedes the blob keeps it inside the 200-char prefix;
+  // this regex is what then reads it back out.)
+  if (inputPreview) {
+    const r = extractReasonFromRaw(inputPreview);
+    if (r) return r;
+  }
+  return null;
+}
+/**
+ * Regex-based fallback to extract a `reason` or `why` value from a raw
+ * (possibly truncated / invalid-JSON) inputPreview string. Mirrors
+ * `extractFilePathFromRaw`: JSON-unescapes the captured value so a reason
+ * with quotes/backslashes/unicode escapes is returned correctly. Returns
+ * null when neither key is present or the captured value is empty/whitespace.
+ */
+export function extractReasonFromRaw(raw: string): string | null {
+  // Match the first occurrence of "reason" or "why".
+  const m = /"(?:reason|why)"\s*:\s*"((?:[^"\\]|\\.)*)"/.exec(raw);
+  if (!m) return null;
+  try {
+    const value = JSON.parse(`"${m[1]}"`) as string;
+    return typeof value === "string" && value.trim().length > 0 ? value : null;
+  } catch {
+    return null;
+  }
+}
 function skillBasenameFromPath(input: Record<string, unknown>): string | null {
   const path = readString(input, "path") ?? readString(input, "skill_path");
   if (!path) return null;

package/telegram-plugin/quota-check.ts CHANGED Viewed

@@ -54,6 +54,15 @@ export type QuotaUtilization = {
   representativeClaim: string | null;
   overageStatus: string | null;
   overageDisabledReason: string | null;
+  /**
+   * #2494 Bug C — header-presence markers. Mirror of the field in
+   * `src/auth/quota.ts` (kept in sync across the bundle boundary). The
+   * utilization fields are always numeric (a missing header coalesces to 0),
+   * so on their own they cannot tell a genuine 0% from a filled-0 thin probe.
+   * Optional → unset means "real probe" (legacy snapshots / fixtures).
+   */
+  fiveHourUtilPresent?: boolean;
+  sevenDayUtilPresent?: boolean;
 };
 export type QuotaResult =
@@ -120,8 +129,12 @@ export function parseQuotaHeaders(headers: Headers): QuotaResult {
   return {
     ok: true,
     data: {
+      // #2494 Bug C — coalesce missing window to 0 for back-compat but record
+      // which windows were actually present (both-absent returned ok:false).
       fiveHourUtilizationPct: (fiveHour ?? 0) * 100,
       sevenDayUtilizationPct: (sevenDay ?? 0) * 100,
+      fiveHourUtilPresent: fiveHour != null,
+      sevenDayUtilPresent: sevenDay != null,
       fiveHourResetAt: parseEpochHeader(headers, "anthropic-ratelimit-unified-5h-reset"),
       sevenDayResetAt: parseEpochHeader(headers, "anthropic-ratelimit-unified-7d-reset"),
       representativeClaim: headers.get("anthropic-ratelimit-unified-representative-claim"),

package/telegram-plugin/quota-watch.ts CHANGED Viewed

@@ -30,6 +30,13 @@
  * IPC call (cheap). `probeQuota` is only called on state-change (when
  * we're going to send a message anyway) to get fresh numbers for the
  * notification body. On no-change polls, only `listState` is called.
+ *
+ * #2495 Change 3 — the transition-to-alarm probe is `forceLive` (bypasses
+ * the broker's probe-on-open TTL), so the DECISION to alarm is corroborated
+ * by a TRUE live probe of the affected account, not a possibly-stale cache
+ * read. The re-evaluation with fresh numbers can suppress an alarm whose
+ * stale-snapshot transition no longer holds. Steady state stays cheap: a
+ * no-change poll never probes. Cost is one live probe per transition edge.
  */
 import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs";
@@ -175,6 +182,51 @@ export type QuotaWatchDecision =
     }
   | { kind: "skip"; accountLabel: string; reason: string };
+/**
+ * #2495 BLOCKER fix — the corroboration probe result, as the gateway's
+ * runQuotaWatch sees it from `brokerClient.probeQuota(..., forceLive=true)`.
+ * Structurally a subset of `ProbeQuotaEntry` (src/auth/broker/client.ts): a
+ * `result` discriminated on `ok`, plus a `served` tag the broker stamps to
+ * say HOW the result was sourced.
+ *
+ * The trap this guards: under `forceLive`, when the upstream live probe FAILS
+ * and the broker holds a prior snapshot, it returns `cachedSnapshotToResult`
+ * — `result.ok === true` but `served === "cache"` (server.ts opProbeQuota).
+ * A naive `result.ok` check then treats that stale cache read as a live
+ * corroboration, fires the alarm, and stamps the false "Live-probe
+ * corroborated (#2495)" footnote. The acceptance criterion is the opposite:
+ * an alarm must be backed by a LIVE probe, not a stale cache read.
+ */
+export type CorroborationProbe = {
+  result: { ok: true } | { ok: false };
+  /**
+   * How the result was sourced. `"live"` = fresh upstream probe (genuine
+   * corroboration). `"cache"` = served from the durable cache (TTL-hit or
+   * probe-failure fallback) — NOT corroboration. Absent on legacy responses,
+   * which we treat as NOT corroborated (fail-closed: never claim a live
+   * corroboration we can't prove).
+   */
+  served?: "live" | "cache";
+};
+/**
+ * #2495 BLOCKER fix — decide whether a forceLive corroboration probe counts
+ * as a genuine LIVE corroboration of the alarm.
+ *
+ * Genuine corroboration requires BOTH `result.ok` AND `served === "live"`.
+ * A result that is `ok:true` but `served:"cache"` (the failed-probe
+ * cache-fallback) is treated EXACTLY like a probe failure: it is NOT
+ * corroboration, so the caller must DEFER — leave watch state untouched and
+ * re-evaluate next tick when a true live probe can be obtained. A missing
+ * entry (`undefined`) is likewise not corroboration.
+ *
+ * Pure + total so it can be unit-tested at the seam without standing up the
+ * broker or the gateway loop.
+ */
+export function isLiveCorroboration(entry: CorroborationProbe | undefined): boolean {
+  return entry?.result.ok === true && entry.served === "live";
+}
 /**
  * Evaluate one account's quota state against its last-notified health.
  *
@@ -224,7 +276,11 @@ export function evaluateQuotaWatchAccount(args: {
     return { kind: "skip", accountLabel: label, reason: "stale-snapshot" };
   }
-  const currentHealth = classifyHealth(snap);
+  // #2494 Bug A — classify against THIS tick's clock so the refill
+  // normalization uses the same `now` the rest of the decision does (the
+  // default `new Date()` would diverge from a frozen test clock / a replayed
+  // tick and mis-zero a still-future reset window).
+  const currentHealth = classifyHealth(snap, new Date(now));
   // Unknown (probe failed) or blocked — skip entirely.
   if (currentHealth === "unknown" || currentHealth === "blocked") {
@@ -324,22 +380,58 @@ export type FleetAllExhaustedDecision =
  * cases the trigger-based interactive all-blocked card misses: a quiet period
  * (no agent happens to 429 into the wall) and the consumer/cron paths.
  *
- * Authoritative source: the broker's per-account `exhausted` flag (set by
- * mark-exhausted via failover + the consumer sensor), NOT probe-derived health
- * — so there is no probe-failure false-alarm. Requires at least one account;
- * an empty fleet never alerts.
+ * Source: the broker's per-account `exhausted` flag (set by mark-exhausted via
+ * failover + the consumer sensor). That flag is NOT purely live — `isAccountBlocked`
+ * (src/auth/broker/account-eligibility.ts) falls back to the persisted
+ * `exhausted_until` mark whenever there is no fresh live snapshot. During a
+ * broker-unreachable / probe-timeout blackout, short-lived auto-fallback marks
+ * can make `every(a.exhausted)` momentarily true with ZERO live corroboration
+ * (#2478, klanker 2026-06-20). So the `entered` alert requires POSITIVE LIVE
+ * CORROBORATION: an account counts toward "all exhausted" only when its
+ * `exhausted` flag is backed by a FRESH live snapshot (last_quota.capturedAt
+ * within `maxStaleMs`). If ANY account's exhaustion rests solely on a
+ * stale/absent-probe mark we are
+ * probe-blind and return `skip: "probe-blind"` — no false fleet alert. The
+ * guarantee is "no false alarm off stale marks during a probe blackout", NOT
+ * blanket probe-failure immunity. The `recovered` transition is unguarded so a
+ * legitimately-fired alert is never stranded. Requires at least one account; an
+ * empty fleet never alerts.
  */
 export function evaluateFleetAllExhausted(args: {
-  accounts: Array<{ label: string; exhausted: boolean; exhausted_until?: number }>;
+  accounts: Array<{
+    label: string;
+    exhausted: boolean;
+    exhausted_until?: number;
+    /** Most-recent live probe snapshot, used to corroborate `exhausted`. */
+    last_quota?: {
+      capturedAt: number;
+      overageDisabledReason?: string | null;
+    } | null;
+  }>;
   prev: QuotaWatchAccountState;
   now: number;
+  /** Staleness ceiling for "fresh probe"; 0 disables the gate (legacy callers/tests). */
+  tuning?: Pick<QuotaWatchTuning, "maxStaleMs">;
 }): FleetAllExhaustedDecision {
   const { accounts, prev, now } = args;
+  const maxStaleMs = args.tuning?.maxStaleMs ?? 0;
   const allExhausted = accounts.length > 0 && accounts.every((a) => a.exhausted);
   // "throttling" doubles as the "currently alerting all-exhausted" marker.
   const wasAlerting = prev.lastNotifiedHealth === "throttling";
   if (allExhausted && !wasAlerting) {
+    // Probe-blind guard (#2478): only fire `entered` if EVERY account's
+    // exhaustion is backed by live evidence — a fresh snapshot. An account
+    // exhausted solely on a stale/absent mark means we have no live
+    // corroboration → skip rather than false-alarm.
+    if (maxStaleMs > 0) {
+      const allLiveCorroborated = accounts.every((a) =>
+        exhaustionLiveCorroborated(a, now, maxStaleMs),
+      );
+      if (!allLiveCorroborated) {
+        return { kind: "skip", reason: "probe-blind" };
+      }
+    }
     return {
       kind: "notify",
       message: buildAllExhaustedMessage(accounts, now),
@@ -358,6 +450,42 @@ export function evaluateFleetAllExhausted(args: {
   return { kind: "skip", reason: allExhausted ? "still-all-exhausted" : "not-all-exhausted" };
 }
+/**
+ * Is an account's `exhausted` flag backed by live evidence (#2478)?
+ *
+ * True when the most-recent live probe is FRESH (`capturedAt` within
+ * `maxStaleMs`) — that fresh probe is what set/upholds the broker's blocked
+ * verdict. False when there is no `last_quota` at all, or the snapshot is
+ * stale: the `exhausted` flag then rests solely on a persisted mark with no
+ * live backing, which is exactly the probe-blind condition that false-fires
+ * the fleet alert.
+ *
+ * NOTE: `out_of_credits` is NOT treated as corroboration here. Per
+ * fix/out-of-credits-serve-block, out_of_credits is INFORMATIONAL — it is
+ * not exhaustion in its own right at any util. Corroboration requires a
+ * genuinely fresh quota snapshot (real 429 / util-wall path).
+ *
+ * Mirrors `snapshotFresh` in src/auth/broker/account-eligibility.ts (the
+ * serving-side authority); kept as a local check so the decision layer
+ * carries no broker dependency.
+ */
+function exhaustionLiveCorroborated(
+  account: {
+    last_quota?: { capturedAt: number; overageDisabledReason?: string | null } | null;
+  },
+  now: number,
+  maxStaleMs: number,
+): boolean {
+  const lq = account.last_quota;
+  if (!lq) return false;
+  // Mirror `snapshotFresh`'s clock-skew guard: a future-dated `capturedAt`
+  // makes `now - capturedAt` negative and would slip past the staleness gate,
+  // so a skewed snapshot reads as fresh. Reject snapshots dated more than the
+  // broker's 60_000 ms tolerance ahead of `now` (matches the inline literal in
+  // `snapshotFresh`, src/auth/broker/account-eligibility.ts).
+  return now - lq.capturedAt <= maxStaleMs && lq.capturedAt <= now + 60_000;
+}
 function buildAllExhaustedMessage(
   accounts: Array<{ label: string; exhausted_until?: number }>,
   now: number,
@@ -420,7 +548,7 @@ function buildThrottlingMessage(agentName: string, snap: AccountSnapshot): strin
     `Binding window: ${winLabel}${resetStr}`,
     `${activeNote}${altNote}`,
     ``,
-    `<i>Threshold: ${THROTTLING_THRESHOLD_PCT}% on either window. Source: broker quota cache.</i>`,
+    `<i>Threshold: ${THROTTLING_THRESHOLD_PCT}% on either window. Live-probe corroborated (#2495).</i>`,
     `<i>Run /auth for full fleet status or /usage for the active account.</i>`,
   ]
     .join("\n")