switchroom 0.15.44 → 0.16.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-scheduler/index.js +122 -88
- package/dist/auth-broker/index.js +463 -177
- package/dist/cli/autoaccept-poll.js +4842 -35
- package/dist/cli/drive-write-pretool.mjs +17 -14
- package/dist/cli/notion-write-pretool.mjs +117 -86
- package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
- package/dist/cli/self-improve-stop.mjs +428 -0
- package/dist/cli/skill-validate-pretool.mjs +72 -72
- package/dist/cli/switchroom.js +3249 -1241
- package/dist/cli/ui/index.html +1 -1
- package/dist/host-control/main.js +2833 -355
- package/dist/vault/approvals/kernel-server.js +7482 -7439
- package/dist/vault/broker/server.js +11315 -11272
- package/examples/minimal.yaml +1 -0
- package/examples/switchroom.yaml +1 -0
- package/package.json +3 -3
- package/profiles/_base/start.sh.hbs +88 -1
- package/profiles/_shared/execution-discipline.md.hbs +18 -0
- package/profiles/default/CLAUDE.md.hbs +3 -22
- package/telegram-plugin/.claude-plugin/plugin.json +2 -2
- package/telegram-plugin/answer-stream-flag.ts +12 -49
- package/telegram-plugin/answer-stream.ts +5 -150
- package/telegram-plugin/auth-snapshot-format.ts +280 -48
- package/telegram-plugin/auto-fallback-fleet.ts +44 -1
- package/telegram-plugin/context-exhaustion.ts +12 -0
- package/telegram-plugin/demo-mask.ts +154 -0
- package/telegram-plugin/dist/bridge/bridge.js +167 -124
- package/telegram-plugin/dist/gateway/gateway.js +3039 -1159
- package/telegram-plugin/dist/server.js +215 -172
- package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
- package/telegram-plugin/draft-stream.ts +47 -410
- package/telegram-plugin/final-answer-detect.ts +17 -12
- package/telegram-plugin/fleet-fallback-resume.ts +131 -0
- package/telegram-plugin/format.ts +56 -19
- package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
- package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
- package/telegram-plugin/gateway/auth-command.ts +70 -14
- package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
- package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
- package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
- package/telegram-plugin/gateway/current-turn-map.ts +188 -0
- package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
- package/telegram-plugin/gateway/effort-command.ts +8 -3
- package/telegram-plugin/gateway/emission-authority.ts +369 -0
- package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
- package/telegram-plugin/gateway/gateway.ts +1837 -291
- package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
- package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
- package/telegram-plugin/gateway/represent-guard.ts +72 -0
- package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
- package/telegram-plugin/gateway/status-surface-log.ts +14 -3
- package/telegram-plugin/history.ts +33 -11
- package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
- package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
- package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
- package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
- package/telegram-plugin/issues-card.ts +4 -0
- package/telegram-plugin/model-unavailable.ts +124 -0
- package/telegram-plugin/narrative-dedup.ts +69 -0
- package/telegram-plugin/over-ping-safety-net.ts +70 -4
- package/telegram-plugin/package.json +3 -3
- package/telegram-plugin/pending-work-progress.ts +12 -0
- package/telegram-plugin/permission-rule.ts +32 -5
- package/telegram-plugin/permission-title.ts +152 -9
- package/telegram-plugin/quota-check.ts +13 -0
- package/telegram-plugin/quota-watch.ts +135 -7
- package/telegram-plugin/registry/turns-schema.test.ts +24 -0
- package/telegram-plugin/registry/turns-schema.ts +9 -0
- package/telegram-plugin/runtime-metrics.ts +13 -0
- package/telegram-plugin/session-tail.ts +96 -11
- package/telegram-plugin/silence-poke.ts +170 -24
- package/telegram-plugin/slot-banner-driver.ts +3 -0
- package/telegram-plugin/status-no-truncate.ts +44 -0
- package/telegram-plugin/status-reactions.ts +20 -3
- package/telegram-plugin/stream-controller.ts +4 -23
- package/telegram-plugin/stream-reply-handler.ts +6 -24
- package/telegram-plugin/streaming-metrics.ts +91 -0
- package/telegram-plugin/subagent-watcher.ts +212 -66
- package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
- package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
- package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
- package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
- package/telegram-plugin/tests/answer-stream.test.ts +2 -411
- package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
- package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
- package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
- package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
- package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
- package/telegram-plugin/tests/demo-mask.test.ts +127 -0
- package/telegram-plugin/tests/draft-stream.test.ts +0 -827
- package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
- package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
- package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
- package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
- package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
- package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
- package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
- package/telegram-plugin/tests/feed-survival.test.ts +526 -0
- package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
- package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
- package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
- package/telegram-plugin/tests/history.test.ts +60 -0
- package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
- package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
- package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
- package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
- package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
- package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
- package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
- package/telegram-plugin/tests/permission-rule.test.ts +17 -0
- package/telegram-plugin/tests/permission-title.test.ts +206 -17
- package/telegram-plugin/tests/quota-watch.test.ts +252 -9
- package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
- package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
- package/telegram-plugin/tests/represent-guard.test.ts +162 -0
- package/telegram-plugin/tests/session-tail.test.ts +147 -3
- package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
- package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
- package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
- package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
- package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
- package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
- package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
- package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
- package/telegram-plugin/tests/telegram-format.test.ts +101 -6
- package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
- package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
- package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
- package/telegram-plugin/tests/tool-labels.test.ts +67 -0
- package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
- package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
- package/telegram-plugin/tests/welcome-text.test.ts +32 -3
- package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
- package/telegram-plugin/tool-activity-summary.ts +375 -58
- package/telegram-plugin/turn-liveness-floor.ts +240 -0
- package/telegram-plugin/uat/assertions.ts +115 -0
- package/telegram-plugin/uat/driver.ts +68 -0
- package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
- package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
- package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
- package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
- package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
- package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
- package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
- package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
- package/telegram-plugin/welcome-text.ts +13 -1
- package/telegram-plugin/worker-activity-feed.ts +157 -82
- package/telegram-plugin/draft-transport.ts +0 -122
- package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
- package/telegram-plugin/tests/draft-transport.test.ts +0 -211
|
@@ -25,7 +25,29 @@
|
|
|
25
25
|
* the decision says CLAIM the slot — caller sets `firstPingAt`.
|
|
26
26
|
* - When the model requested silent, this module is a no-op.
|
|
27
27
|
*
|
|
28
|
+
* Notification ownership (R8 / PR-2). The bare "first ping wins" rule
|
|
29
|
+
* above has a residual failure: an interim ACK that pings first claims
|
|
30
|
+
* the turn's single slot, and the later SUBSTANTIVE answer is then
|
|
31
|
+
* downgraded to silent — "the reply is last but the phone never buzzed
|
|
32
|
+
* for the answer." To fix that without re-introducing model double-pings,
|
|
33
|
+
* the decision is now aware of WHO holds the slot and WHO is asking:
|
|
34
|
+
*
|
|
35
|
+
* - A SUBSTANTIVE final asking to ping while the slot is held by a
|
|
36
|
+
* NON-substantive (ack) send ⇒ do NOT suppress; let the answer ping
|
|
37
|
+
* and UPGRADE the slot to substantive (the answer owns the ping even
|
|
38
|
+
* though the ack already buzzed once — a deliberate, bounded second
|
|
39
|
+
* ping so the user is notified of the actual answer).
|
|
40
|
+
* - An ACK asking to ping while the slot is held by a SUBSTANTIVE send
|
|
41
|
+
* ⇒ suppress (no spurious double-ping AFTER the real answer).
|
|
42
|
+
* - A SUBSTANTIVE asking while the slot is held by a SUBSTANTIVE ⇒
|
|
43
|
+
* suppress (preserves the #1674 model-double-ping guard: answer +
|
|
44
|
+
* wrap-up should be one beep, not two).
|
|
45
|
+
* - An ACK while the slot is held by an ACK ⇒ suppress (unchanged).
|
|
46
|
+
*
|
|
28
47
|
* The slot is claimed BEFORE the actual send (caller responsibility).
|
|
48
|
+
* On a CLAIM or an UPGRADE the caller MUST set `firstPingAt` AND
|
|
49
|
+
* `firstPingWasSubstantive` ATOMICALLY (same synchronous block, no await
|
|
50
|
+
* between) so a racing second reply reads a consistent pair.
|
|
29
51
|
* Trade-off documented inline in `gateway.ts:executeReply`.
|
|
30
52
|
*/
|
|
31
53
|
|
|
@@ -39,6 +61,18 @@ export interface OverPingDecisionInput {
|
|
|
39
61
|
* has landed yet. Caller threads this through from
|
|
40
62
|
* `CurrentTurn.firstPingAt`. */
|
|
41
63
|
firstPingAt: number | null
|
|
64
|
+
/** True iff THIS reply is a substantive final answer (stream `done`,
|
|
65
|
+
* or text length ≥ FINAL_ANSWER_MIN_CHARS) — as opposed to a short
|
|
66
|
+
* interim ack. Caller computes via `isSubstantiveFinalReply`. Defaults
|
|
67
|
+
* to `false` (treat as a non-substantive ack) when omitted, which
|
|
68
|
+
* preserves the pre-PR-2 "first ping wins, the rest suppress" behaviour
|
|
69
|
+
* for callers that don't yet thread it. */
|
|
70
|
+
substantive?: boolean
|
|
71
|
+
/** True iff the send that CLAIMED the turn's ping slot was itself a
|
|
72
|
+
* substantive final answer. Caller threads this through from
|
|
73
|
+
* `CurrentTurn.firstPingWasSubstantive`. Meaningless (and ignored)
|
|
74
|
+
* when `firstPingAt == null`. Defaults to `false`. */
|
|
75
|
+
firstPingWasSubstantive?: boolean
|
|
42
76
|
/** Deterministic clock for tests; defaults to Date.now() in callers. */
|
|
43
77
|
nowMs: number
|
|
44
78
|
}
|
|
@@ -49,8 +83,18 @@ export interface OverPingDecision {
|
|
|
49
83
|
* violation by the model — caller should log + emit a metric. */
|
|
50
84
|
suppress: boolean
|
|
51
85
|
/** True iff the caller should claim the slot —
|
|
52
|
-
* `turn.firstPingAt = nowMs
|
|
86
|
+
* `turn.firstPingAt = nowMs` AND
|
|
87
|
+
* `turn.firstPingWasSubstantive = substantive`. Mutually exclusive
|
|
88
|
+
* with `suppress`. Set both on a fresh claim (no prior ping) and on
|
|
89
|
+
* an UPGRADE (a substantive answer pinging over an ack's slot). */
|
|
53
90
|
claimSlot: boolean
|
|
91
|
+
/** True iff this is an UPGRADE — a substantive final answer claiming
|
|
92
|
+
* the ping slot that was previously held by a NON-substantive ack.
|
|
93
|
+
* The answer pings even though the ack already buzzed once. Implied
|
|
94
|
+
* by `claimSlot && firstPingAt != null` but surfaced explicitly so
|
|
95
|
+
* the caller can log/meter the (intentional) second ping distinctly
|
|
96
|
+
* from a normal first claim. Always false on a suppress or a no-op. */
|
|
97
|
+
upgrade: boolean
|
|
54
98
|
/** When `suppress` is true, how long the first ping has been
|
|
55
99
|
* "active" (ms since `firstPingAt`). Caller surfaces this in the
|
|
56
100
|
* log + metric for forensic analysis (e.g. tight rapid double-pings
|
|
@@ -63,18 +107,40 @@ export interface OverPingDecision {
|
|
|
63
107
|
* No mutation, no IO, deterministic under a fixed `nowMs`.
|
|
64
108
|
*/
|
|
65
109
|
export function decideOverPing(input: OverPingDecisionInput): OverPingDecision {
|
|
110
|
+
const substantive = input.substantive === true
|
|
111
|
+
const firstPingWasSubstantive = input.firstPingWasSubstantive === true
|
|
112
|
+
|
|
66
113
|
if (!input.modelRequestedPing) {
|
|
67
114
|
// Model already chose silent — nothing for the safety net to do.
|
|
68
|
-
return { suppress: false, claimSlot: false, sinceFirstPingMs: null }
|
|
115
|
+
return { suppress: false, claimSlot: false, upgrade: false, sinceFirstPingMs: null }
|
|
69
116
|
}
|
|
70
117
|
if (input.firstPingAt != null) {
|
|
71
|
-
//
|
|
118
|
+
// The turn's ping slot is already held. WHO holds it and WHO is
|
|
119
|
+
// asking decides whether this is a notification-ownership UPGRADE or
|
|
120
|
+
// a double-ping to suppress (see the module doc-comment for the full
|
|
121
|
+
// matrix).
|
|
122
|
+
if (substantive && !firstPingWasSubstantive) {
|
|
123
|
+
// The substantive ANSWER is pinging over a slot held by an ack.
|
|
124
|
+
// Let it ping and upgrade the slot to substantive — the answer
|
|
125
|
+
// owns the turn's notification, not the earlier ack.
|
|
126
|
+
return {
|
|
127
|
+
suppress: false,
|
|
128
|
+
claimSlot: true,
|
|
129
|
+
upgrade: true,
|
|
130
|
+
sinceFirstPingMs: null,
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
// Every other slot-held case is a double-ping to suppress:
|
|
134
|
+
// - ack over substantive: a spurious wrap-up after the real answer
|
|
135
|
+
// - substantive over substantive: the #1674 answer+wrap-up guard
|
|
136
|
+
// - ack over ack: the original one-ping-per-turn behaviour
|
|
72
137
|
return {
|
|
73
138
|
suppress: true,
|
|
74
139
|
claimSlot: false,
|
|
140
|
+
upgrade: false,
|
|
75
141
|
sinceFirstPingMs: input.nowMs - input.firstPingAt,
|
|
76
142
|
}
|
|
77
143
|
}
|
|
78
144
|
// First ping this turn — let it through and claim the slot.
|
|
79
|
-
return { suppress: false, claimSlot: true, sinceFirstPingMs: null }
|
|
145
|
+
return { suppress: false, claimSlot: true, upgrade: false, sinceFirstPingMs: null }
|
|
80
146
|
}
|
|
@@ -40,12 +40,12 @@
|
|
|
40
40
|
},
|
|
41
41
|
"repository": {
|
|
42
42
|
"type": "git",
|
|
43
|
-
"url": "https://github.com/
|
|
43
|
+
"url": "https://github.com/switchroom/switchroom.git",
|
|
44
44
|
"directory": "telegram-plugin"
|
|
45
45
|
},
|
|
46
|
-
"homepage": "https://github.com/
|
|
46
|
+
"homepage": "https://github.com/switchroom/switchroom/tree/main/telegram-plugin#readme",
|
|
47
47
|
"bugs": {
|
|
48
|
-
"url": "https://github.com/
|
|
48
|
+
"url": "https://github.com/switchroom/switchroom/issues"
|
|
49
49
|
},
|
|
50
50
|
"publishConfig": {
|
|
51
51
|
"access": "public"
|
|
@@ -284,6 +284,18 @@ export function noteTurnEnd(key: string): void {
|
|
|
284
284
|
}
|
|
285
285
|
}
|
|
286
286
|
|
|
287
|
+
/**
|
|
288
|
+
* True when the current turn for `key` dispatched async background work
|
|
289
|
+
* (Agent / Task / Bash run_in_background:true) but the turn has not yet ended
|
|
290
|
+
* with a cleared pending flag. Used by the feed-survival predicate so the
|
|
291
|
+
* orphaned-reply backstop and silence-poke teardown are deferred while a
|
|
292
|
+
* detached background process is still running — even after inFlight empties
|
|
293
|
+
* when the near-instant tool_result (e.g. the Bash background handle) returns.
|
|
294
|
+
*/
|
|
295
|
+
export function hasPendingAsyncDispatch(key: string): boolean {
|
|
296
|
+
return stateByKey.get(key)?.pending === true
|
|
297
|
+
}
|
|
298
|
+
|
|
287
299
|
/**
|
|
288
300
|
* Clear pending-progress for a chat — reasons:
|
|
289
301
|
* 'inbound' — user sent a new message, they're re-engaged
|
|
@@ -91,7 +91,7 @@ export function resolveScopedAllowChoices(
|
|
|
91
91
|
|
|
92
92
|
// ── File tools: this exact path vs any file.
|
|
93
93
|
if (FILE_TOOLS.has(toolName)) {
|
|
94
|
-
const path = filePathFrom(input);
|
|
94
|
+
const path = filePathFrom(input, inputPreview);
|
|
95
95
|
const broad: ScopeOption = { rule: toolName, buttonLabel: "Any file", broad: true };
|
|
96
96
|
if (path) {
|
|
97
97
|
return {
|
|
@@ -163,9 +163,36 @@ function resolveSkillName(input: Record<string, unknown>): string | null {
|
|
|
163
163
|
);
|
|
164
164
|
}
|
|
165
165
|
|
|
166
|
-
function filePathFrom(
|
|
167
|
-
|
|
168
|
-
|
|
166
|
+
function filePathFrom(
|
|
167
|
+
input: Record<string, unknown> | null,
|
|
168
|
+
rawPreview?: string,
|
|
169
|
+
): string | null {
|
|
170
|
+
if (input) {
|
|
171
|
+
const p = readString(input, "file_path") ?? readString(input, "notebook_path");
|
|
172
|
+
if (p) return p;
|
|
173
|
+
}
|
|
174
|
+
// Claude Code truncates inputPreview to 200 chars, making the surrounding
|
|
175
|
+
// JSON invalid for Edit/Write (old_string/new_string push it past 200).
|
|
176
|
+
// "file_path" is the first key, so its value is intact in the truncated
|
|
177
|
+
// prefix — extract it with a lenient regex on the raw string.
|
|
178
|
+
if (rawPreview) return extractFilePathFromRaw(rawPreview);
|
|
179
|
+
return null;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Regex-based fallback to extract "file_path" or "notebook_path" from a raw
|
|
184
|
+
* (possibly truncated / invalid-JSON) inputPreview string. JSON-unescapes the
|
|
185
|
+
* captured value. Returns null when neither key is present or value is empty.
|
|
186
|
+
*/
|
|
187
|
+
function extractFilePathFromRaw(raw: string): string | null {
|
|
188
|
+
const m = /"(?:file_path|notebook_path)"\s*:\s*"((?:[^"\\]|\\.)*)"/.exec(raw);
|
|
189
|
+
if (!m) return null;
|
|
190
|
+
try {
|
|
191
|
+
const value = JSON.parse(`"${m[1]}"`) as string;
|
|
192
|
+
return typeof value === "string" && value.length > 0 ? value : null;
|
|
193
|
+
} catch {
|
|
194
|
+
return null;
|
|
195
|
+
}
|
|
169
196
|
}
|
|
170
197
|
|
|
171
198
|
/**
|
|
@@ -274,7 +301,7 @@ export function matchesAllowRule(
|
|
|
274
301
|
return bashFirstToken(cmd) === m[1];
|
|
275
302
|
}
|
|
276
303
|
if (FILE_TOOLS.has(ruleTool)) {
|
|
277
|
-
return filePathFrom(input) === arg;
|
|
304
|
+
return filePathFrom(input, inputPreview) === arg;
|
|
278
305
|
}
|
|
279
306
|
return false;
|
|
280
307
|
}
|
|
@@ -77,6 +77,21 @@ const INTERNAL_MCP_SERVERS = new Set([
|
|
|
77
77
|
"switchroom-telegram",
|
|
78
78
|
]);
|
|
79
79
|
|
|
80
|
+
/**
|
|
81
|
+
* hostd fleet verbs that take a target agent `name` as a required arg. The
|
|
82
|
+
* approval card MUST name WHICH agent is targeted (#2469) — "restart an
|
|
83
|
+
* agent" with no name leaves the operator blind. We interpolate the target
|
|
84
|
+
* into the curated phrase: "restart an agent in the fleet" → "restart agent
|
|
85
|
+
* `carrie` in the fleet". Stays generic when `name` is absent (never crash).
|
|
86
|
+
*/
|
|
87
|
+
const HOSTD_AGENT_TARGET_VERBS = new Set([
|
|
88
|
+
"mcp__hostd__agent_restart",
|
|
89
|
+
"mcp__hostd__agent_start",
|
|
90
|
+
"mcp__hostd__agent_stop",
|
|
91
|
+
"mcp__hostd__agent_logs",
|
|
92
|
+
"mcp__hostd__agent_exec",
|
|
93
|
+
]);
|
|
94
|
+
|
|
80
95
|
/**
|
|
81
96
|
* Build the multi-line card body for an approval prompt.
|
|
82
97
|
*
|
|
@@ -86,10 +101,23 @@ const INTERNAL_MCP_SERVERS = new Set([
|
|
|
86
101
|
* Output is HTML-escaped for `parse_mode: 'HTML'`. The agent name is
|
|
87
102
|
* capitalized for the sentence; dropped (with "wants to") when null —
|
|
88
103
|
* the bridge client can be anonymous during early-boot edge cases.
|
|
104
|
+
*
|
|
105
|
+
* The `why:` line is the CALLER's stated rationale — the `reason`/`why`
|
|
106
|
+
* argument on the tool input, NOT the tool's static JSONSchema
|
|
107
|
+
* `description`. The schema description is documentation (it can contain
|
|
108
|
+
* literal tokens like `$SWITCHROOM_AGENT_NAME`), so surfacing it as the
|
|
109
|
+
* "why" reads like an un-interpolated variable and discards the agent's
|
|
110
|
+
* actual reason (#2469). We only fall back to "not provided" — never to
|
|
111
|
+
* the schema description.
|
|
89
112
|
*/
|
|
90
113
|
export function formatPermissionCardBody(opts: {
|
|
91
114
|
toolName: string;
|
|
92
115
|
inputPreview: string | undefined;
|
|
116
|
+
/**
|
|
117
|
+
* The tool's static JSONSchema description. Retained for the signature
|
|
118
|
+
* (callers still pass it) but deliberately NOT used as the `why:` line —
|
|
119
|
+
* see #2469. The caller's rationale comes from the input args instead.
|
|
120
|
+
*/
|
|
93
121
|
description: string | undefined;
|
|
94
122
|
agentName: string | null;
|
|
95
123
|
}): string {
|
|
@@ -104,7 +132,10 @@ export function formatPermissionCardBody(opts: {
|
|
|
104
132
|
lines.push(`🔐 ${escapeTgHtml(capFirst(action))}`);
|
|
105
133
|
}
|
|
106
134
|
|
|
107
|
-
|
|
135
|
+
// why: the caller-supplied rationale (`reason`/`why` arg), never the
|
|
136
|
+
// static schema description (#2469).
|
|
137
|
+
const callerReason = callerSuppliedReason(opts.inputPreview);
|
|
138
|
+
const rawWhy = (callerReason ?? "").replace(/\s+/g, " ").trim();
|
|
108
139
|
const truncatedWhy =
|
|
109
140
|
rawWhy.length > DESCRIPTION_LINE_MAX
|
|
110
141
|
? rawWhy.slice(0, DESCRIPTION_LINE_MAX - 1) + "…"
|
|
@@ -142,15 +173,15 @@ export function naturalAction(
|
|
|
142
173
|
case "Edit":
|
|
143
174
|
case "MultiEdit":
|
|
144
175
|
case "NotebookEdit": {
|
|
145
|
-
const f = fileBase(input);
|
|
176
|
+
const f = fileBase(input, inputPreview);
|
|
146
177
|
return f ? `edit: ${f}` : "edit files";
|
|
147
178
|
}
|
|
148
179
|
case "Write": {
|
|
149
|
-
const f = fileBase(input);
|
|
180
|
+
const f = fileBase(input, inputPreview);
|
|
150
181
|
return f ? `write: ${f}` : "write files";
|
|
151
182
|
}
|
|
152
183
|
case "Read": {
|
|
153
|
-
const f = fileBase(input);
|
|
184
|
+
const f = fileBase(input, inputPreview);
|
|
154
185
|
return f ? `read: ${f}` : "read files";
|
|
155
186
|
}
|
|
156
187
|
case "Bash": {
|
|
@@ -194,7 +225,7 @@ function naturalMcpAction(
|
|
|
194
225
|
const server = parts.length >= 2 ? parts[1]! : "";
|
|
195
226
|
const curated = MCP_TOOL_DESCRIPTIONS[toolName];
|
|
196
227
|
if (curated) {
|
|
197
|
-
const phrase = lowerFirst(curated);
|
|
228
|
+
const phrase = hostdAgentPhrase(toolName, input) ?? lowerFirst(curated);
|
|
198
229
|
return INTERNAL_MCP_SERVERS.has(server)
|
|
199
230
|
? phrase
|
|
200
231
|
: `${phrase} (${prettyMcpServer(server)})`;
|
|
@@ -217,6 +248,37 @@ function naturalMcpAction(
|
|
|
217
248
|
return `use ${toolName}`;
|
|
218
249
|
}
|
|
219
250
|
|
|
251
|
+
/**
|
|
252
|
+
* For the hostd `agent_*` fleet verbs, build an action phrase that NAMES the
|
|
253
|
+
* target agent (#2469) — "restart agent `carrie` in the fleet". The verb is
|
|
254
|
+
* derived from the tool name (`agent_restart` → "restart"); `agent_logs` /
|
|
255
|
+
* `agent_exec` get bespoke phrasing. Returns null when the tool isn't a
|
|
256
|
+
* name-targeted hostd verb or no `name` arg is present, so the caller falls
|
|
257
|
+
* back to the generic curated phrase (never crashes on a missing name).
|
|
258
|
+
*/
|
|
259
|
+
function hostdAgentPhrase(
|
|
260
|
+
toolName: string,
|
|
261
|
+
input: Record<string, unknown> | null,
|
|
262
|
+
): string | null {
|
|
263
|
+
if (!HOSTD_AGENT_TARGET_VERBS.has(toolName)) return null;
|
|
264
|
+
const name = input ? readString(input, "name") : null;
|
|
265
|
+
if (!name) return null;
|
|
266
|
+
switch (toolName) {
|
|
267
|
+
case "mcp__hostd__agent_restart":
|
|
268
|
+
return `restart agent \`${name}\` in the fleet`;
|
|
269
|
+
case "mcp__hostd__agent_start":
|
|
270
|
+
return `start agent \`${name}\` in the fleet`;
|
|
271
|
+
case "mcp__hostd__agent_stop":
|
|
272
|
+
return `stop agent \`${name}\` in the fleet`;
|
|
273
|
+
case "mcp__hostd__agent_logs":
|
|
274
|
+
return `read agent \`${name}\`'s container logs`;
|
|
275
|
+
case "mcp__hostd__agent_exec":
|
|
276
|
+
return `run a read-only inspection inside agent \`${name}\``;
|
|
277
|
+
default:
|
|
278
|
+
return null;
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
220
282
|
/**
|
|
221
283
|
* For a REST-wrapper MCP call ({ path, body?, query? }), build the action
|
|
222
284
|
* phrase "<VERB> <path> (<Server>)" — e.g. "POST /smtp/email (Brevo)". The
|
|
@@ -405,10 +467,43 @@ function resolveSkillName(input: Record<string, unknown>): string | null {
|
|
|
405
467
|
);
|
|
406
468
|
}
|
|
407
469
|
|
|
408
|
-
function fileBase(
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
470
|
+
function fileBase(
|
|
471
|
+
input: Record<string, unknown> | null,
|
|
472
|
+
rawPreview?: string,
|
|
473
|
+
): string | null {
|
|
474
|
+
if (input) {
|
|
475
|
+
const p = readString(input, "file_path") ?? readString(input, "notebook_path");
|
|
476
|
+
if (p) return basename(p);
|
|
477
|
+
}
|
|
478
|
+
// Claude Code truncates inputPreview to 200 chars, making the surrounding
|
|
479
|
+
// JSON invalid (Edit/Write always exceed 200 chars once old_string/new_string
|
|
480
|
+
// are included). "file_path" is the first key, so its value is intact in the
|
|
481
|
+
// truncated prefix — extract it with a lenient regex on the raw string.
|
|
482
|
+
if (rawPreview) {
|
|
483
|
+
const p = extractFilePathFromRaw(rawPreview);
|
|
484
|
+
if (p) return basename(p);
|
|
485
|
+
}
|
|
486
|
+
return null;
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
/**
|
|
490
|
+
* Regex-based fallback to extract "file_path" or "notebook_path" from a raw
|
|
491
|
+
* (possibly truncated / invalid-JSON) inputPreview string. JSON-unescapes the
|
|
492
|
+
* captured value so paths with backslashes or unicode escapes are returned
|
|
493
|
+
* correctly. Returns null when neither key is present or the captured value is
|
|
494
|
+
* empty.
|
|
495
|
+
*/
|
|
496
|
+
function extractFilePathFromRaw(raw: string): string | null {
|
|
497
|
+
// Match the first occurrence of "file_path" or "notebook_path".
|
|
498
|
+
const m = /"(?:file_path|notebook_path)"\s*:\s*"((?:[^"\\]|\\.)*)"/.exec(raw);
|
|
499
|
+
if (!m) return null;
|
|
500
|
+
try {
|
|
501
|
+
// JSON.parse the quoted string literal so escape sequences are resolved.
|
|
502
|
+
const value = JSON.parse(`"${m[1]}"`) as string;
|
|
503
|
+
return typeof value === "string" && value.length > 0 ? value : null;
|
|
504
|
+
} catch {
|
|
505
|
+
return null;
|
|
506
|
+
}
|
|
412
507
|
}
|
|
413
508
|
|
|
414
509
|
function lowerFirst(text: string): string {
|
|
@@ -447,6 +542,54 @@ function readString(input: Record<string, unknown>, key: string): string | null
|
|
|
447
542
|
return typeof value === "string" && value.length > 0 ? value : null;
|
|
448
543
|
}
|
|
449
544
|
|
|
545
|
+
/**
|
|
546
|
+
* The caller's stated rationale for a tool call — the `reason` (or `why`)
|
|
547
|
+
* argument it passed. This is the agent's actual justification, which is
|
|
548
|
+
* what belongs on the `why:` line of the approval card. Returns null when
|
|
549
|
+
* no reason was supplied (caller renders "not provided") — we never fall
|
|
550
|
+
* back to the tool's static schema description (#2469).
|
|
551
|
+
*/
|
|
552
|
+
function callerSuppliedReason(inputPreview: string | undefined): string | null {
|
|
553
|
+
const input = parseInput(inputPreview);
|
|
554
|
+
if (input) {
|
|
555
|
+
const fromJson = readString(input, "reason") ?? readString(input, "why");
|
|
556
|
+
if (fromJson) return fromJson;
|
|
557
|
+
}
|
|
558
|
+
// Truncation fallback (#2580 follow-up): upstream Claude Code truncates
|
|
559
|
+
// `inputPreview` to ~200 chars. For a tool whose first/largest key is a
|
|
560
|
+
// big blob (e.g. config_propose_edit's `unified_diff`), the truncated JSON
|
|
561
|
+
// is unparseable and the schema-required `reason` is lost — the card then
|
|
562
|
+
// renders "why: not provided" even though a reason WAS supplied. Mirror the
|
|
563
|
+
// `extractFilePathFromRaw` lenient-regex fallback so a `reason`/`why` value
|
|
564
|
+
// surviving in the truncated prefix is still recovered. (Reordering the
|
|
565
|
+
// schema so `reason` precedes the blob keeps it inside the 200-char prefix;
|
|
566
|
+
// this regex is what then reads it back out.)
|
|
567
|
+
if (inputPreview) {
|
|
568
|
+
const r = extractReasonFromRaw(inputPreview);
|
|
569
|
+
if (r) return r;
|
|
570
|
+
}
|
|
571
|
+
return null;
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
/**
|
|
575
|
+
* Regex-based fallback to extract a `reason` or `why` value from a raw
|
|
576
|
+
* (possibly truncated / invalid-JSON) inputPreview string. Mirrors
|
|
577
|
+
* `extractFilePathFromRaw`: JSON-unescapes the captured value so a reason
|
|
578
|
+
* with quotes/backslashes/unicode escapes is returned correctly. Returns
|
|
579
|
+
* null when neither key is present or the captured value is empty/whitespace.
|
|
580
|
+
*/
|
|
581
|
+
export function extractReasonFromRaw(raw: string): string | null {
|
|
582
|
+
// Match the first occurrence of "reason" or "why".
|
|
583
|
+
const m = /"(?:reason|why)"\s*:\s*"((?:[^"\\]|\\.)*)"/.exec(raw);
|
|
584
|
+
if (!m) return null;
|
|
585
|
+
try {
|
|
586
|
+
const value = JSON.parse(`"${m[1]}"`) as string;
|
|
587
|
+
return typeof value === "string" && value.trim().length > 0 ? value : null;
|
|
588
|
+
} catch {
|
|
589
|
+
return null;
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
|
|
450
593
|
function skillBasenameFromPath(input: Record<string, unknown>): string | null {
|
|
451
594
|
const path = readString(input, "path") ?? readString(input, "skill_path");
|
|
452
595
|
if (!path) return null;
|
|
@@ -54,6 +54,15 @@ export type QuotaUtilization = {
|
|
|
54
54
|
representativeClaim: string | null;
|
|
55
55
|
overageStatus: string | null;
|
|
56
56
|
overageDisabledReason: string | null;
|
|
57
|
+
/**
|
|
58
|
+
* #2494 Bug C — header-presence markers. Mirror of the field in
|
|
59
|
+
* `src/auth/quota.ts` (kept in sync across the bundle boundary). The
|
|
60
|
+
* utilization fields are always numeric (a missing header coalesces to 0),
|
|
61
|
+
* so on their own they cannot tell a genuine 0% from a filled-0 thin probe.
|
|
62
|
+
* Optional → unset means "real probe" (legacy snapshots / fixtures).
|
|
63
|
+
*/
|
|
64
|
+
fiveHourUtilPresent?: boolean;
|
|
65
|
+
sevenDayUtilPresent?: boolean;
|
|
57
66
|
};
|
|
58
67
|
|
|
59
68
|
export type QuotaResult =
|
|
@@ -120,8 +129,12 @@ export function parseQuotaHeaders(headers: Headers): QuotaResult {
|
|
|
120
129
|
return {
|
|
121
130
|
ok: true,
|
|
122
131
|
data: {
|
|
132
|
+
// #2494 Bug C — coalesce missing window to 0 for back-compat but record
|
|
133
|
+
// which windows were actually present (both-absent returned ok:false).
|
|
123
134
|
fiveHourUtilizationPct: (fiveHour ?? 0) * 100,
|
|
124
135
|
sevenDayUtilizationPct: (sevenDay ?? 0) * 100,
|
|
136
|
+
fiveHourUtilPresent: fiveHour != null,
|
|
137
|
+
sevenDayUtilPresent: sevenDay != null,
|
|
125
138
|
fiveHourResetAt: parseEpochHeader(headers, "anthropic-ratelimit-unified-5h-reset"),
|
|
126
139
|
sevenDayResetAt: parseEpochHeader(headers, "anthropic-ratelimit-unified-7d-reset"),
|
|
127
140
|
representativeClaim: headers.get("anthropic-ratelimit-unified-representative-claim"),
|
|
@@ -30,6 +30,13 @@
|
|
|
30
30
|
* IPC call (cheap). `probeQuota` is only called on state-change (when
|
|
31
31
|
* we're going to send a message anyway) to get fresh numbers for the
|
|
32
32
|
* notification body. On no-change polls, only `listState` is called.
|
|
33
|
+
*
|
|
34
|
+
* #2495 Change 3 — the transition-to-alarm probe is `forceLive` (bypasses
|
|
35
|
+
* the broker's probe-on-open TTL), so the DECISION to alarm is corroborated
|
|
36
|
+
* by a TRUE live probe of the affected account, not a possibly-stale cache
|
|
37
|
+
* read. The re-evaluation with fresh numbers can suppress an alarm whose
|
|
38
|
+
* stale-snapshot transition no longer holds. Steady state stays cheap: a
|
|
39
|
+
* no-change poll never probes. Cost is one live probe per transition edge.
|
|
33
40
|
*/
|
|
34
41
|
|
|
35
42
|
import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs";
|
|
@@ -175,6 +182,51 @@ export type QuotaWatchDecision =
|
|
|
175
182
|
}
|
|
176
183
|
| { kind: "skip"; accountLabel: string; reason: string };
|
|
177
184
|
|
|
185
|
+
/**
|
|
186
|
+
* #2495 BLOCKER fix — the corroboration probe result, as the gateway's
|
|
187
|
+
* runQuotaWatch sees it from `brokerClient.probeQuota(..., forceLive=true)`.
|
|
188
|
+
* Structurally a subset of `ProbeQuotaEntry` (src/auth/broker/client.ts): a
|
|
189
|
+
* `result` discriminated on `ok`, plus a `served` tag the broker stamps to
|
|
190
|
+
* say HOW the result was sourced.
|
|
191
|
+
*
|
|
192
|
+
* The trap this guards: under `forceLive`, when the upstream live probe FAILS
|
|
193
|
+
* and the broker holds a prior snapshot, it returns `cachedSnapshotToResult`
|
|
194
|
+
* — `result.ok === true` but `served === "cache"` (server.ts opProbeQuota).
|
|
195
|
+
* A naive `result.ok` check then treats that stale cache read as a live
|
|
196
|
+
* corroboration, fires the alarm, and stamps the false "Live-probe
|
|
197
|
+
* corroborated (#2495)" footnote. The acceptance criterion is the opposite:
|
|
198
|
+
* an alarm must be backed by a LIVE probe, not a stale cache read.
|
|
199
|
+
*/
|
|
200
|
+
export type CorroborationProbe = {
|
|
201
|
+
result: { ok: true } | { ok: false };
|
|
202
|
+
/**
|
|
203
|
+
* How the result was sourced. `"live"` = fresh upstream probe (genuine
|
|
204
|
+
* corroboration). `"cache"` = served from the durable cache (TTL-hit or
|
|
205
|
+
* probe-failure fallback) — NOT corroboration. Absent on legacy responses,
|
|
206
|
+
* which we treat as NOT corroborated (fail-closed: never claim a live
|
|
207
|
+
* corroboration we can't prove).
|
|
208
|
+
*/
|
|
209
|
+
served?: "live" | "cache";
|
|
210
|
+
};
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* #2495 BLOCKER fix — decide whether a forceLive corroboration probe counts
|
|
214
|
+
* as a genuine LIVE corroboration of the alarm.
|
|
215
|
+
*
|
|
216
|
+
* Genuine corroboration requires BOTH `result.ok` AND `served === "live"`.
|
|
217
|
+
* A result that is `ok:true` but `served:"cache"` (the failed-probe
|
|
218
|
+
* cache-fallback) is treated EXACTLY like a probe failure: it is NOT
|
|
219
|
+
* corroboration, so the caller must DEFER — leave watch state untouched and
|
|
220
|
+
* re-evaluate next tick when a true live probe can be obtained. A missing
|
|
221
|
+
* entry (`undefined`) is likewise not corroboration.
|
|
222
|
+
*
|
|
223
|
+
* Pure + total so it can be unit-tested at the seam without standing up the
|
|
224
|
+
* broker or the gateway loop.
|
|
225
|
+
*/
|
|
226
|
+
export function isLiveCorroboration(entry: CorroborationProbe | undefined): boolean {
|
|
227
|
+
return entry?.result.ok === true && entry.served === "live";
|
|
228
|
+
}
|
|
229
|
+
|
|
178
230
|
/**
|
|
179
231
|
* Evaluate one account's quota state against its last-notified health.
|
|
180
232
|
*
|
|
@@ -224,7 +276,11 @@ export function evaluateQuotaWatchAccount(args: {
|
|
|
224
276
|
return { kind: "skip", accountLabel: label, reason: "stale-snapshot" };
|
|
225
277
|
}
|
|
226
278
|
|
|
227
|
-
|
|
279
|
+
// #2494 Bug A — classify against THIS tick's clock so the refill
|
|
280
|
+
// normalization uses the same `now` the rest of the decision does (the
|
|
281
|
+
// default `new Date()` would diverge from a frozen test clock / a replayed
|
|
282
|
+
// tick and mis-zero a still-future reset window).
|
|
283
|
+
const currentHealth = classifyHealth(snap, new Date(now));
|
|
228
284
|
|
|
229
285
|
// Unknown (probe failed) or blocked — skip entirely.
|
|
230
286
|
if (currentHealth === "unknown" || currentHealth === "blocked") {
|
|
@@ -324,22 +380,58 @@ export type FleetAllExhaustedDecision =
|
|
|
324
380
|
* cases the trigger-based interactive all-blocked card misses: a quiet period
|
|
325
381
|
* (no agent happens to 429 into the wall) and the consumer/cron paths.
|
|
326
382
|
*
|
|
327
|
-
*
|
|
328
|
-
*
|
|
329
|
-
*
|
|
330
|
-
*
|
|
383
|
+
* Source: the broker's per-account `exhausted` flag (set by mark-exhausted via
|
|
384
|
+
* failover + the consumer sensor). That flag is NOT purely live — `isAccountBlocked`
|
|
385
|
+
* (src/auth/broker/account-eligibility.ts) falls back to the persisted
|
|
386
|
+
* `exhausted_until` mark whenever there is no fresh live snapshot. During a
|
|
387
|
+
* broker-unreachable / probe-timeout blackout, short-lived auto-fallback marks
|
|
388
|
+
* can make `every(a.exhausted)` momentarily true with ZERO live corroboration
|
|
389
|
+
* (#2478, klanker 2026-06-20). So the `entered` alert requires POSITIVE LIVE
|
|
390
|
+
* CORROBORATION: an account counts toward "all exhausted" only when its
|
|
391
|
+
* `exhausted` flag is backed by a FRESH live snapshot (last_quota.capturedAt
|
|
392
|
+
* within `maxStaleMs`). If ANY account's exhaustion rests solely on a
|
|
393
|
+
* stale/absent-probe mark we are
|
|
394
|
+
* probe-blind and return `skip: "probe-blind"` — no false fleet alert. The
|
|
395
|
+
* guarantee is "no false alarm off stale marks during a probe blackout", NOT
|
|
396
|
+
* blanket probe-failure immunity. The `recovered` transition is unguarded so a
|
|
397
|
+
* legitimately-fired alert is never stranded. Requires at least one account; an
|
|
398
|
+
* empty fleet never alerts.
|
|
331
399
|
*/
|
|
332
400
|
export function evaluateFleetAllExhausted(args: {
|
|
333
|
-
accounts: Array<{
|
|
401
|
+
accounts: Array<{
|
|
402
|
+
label: string;
|
|
403
|
+
exhausted: boolean;
|
|
404
|
+
exhausted_until?: number;
|
|
405
|
+
/** Most-recent live probe snapshot, used to corroborate `exhausted`. */
|
|
406
|
+
last_quota?: {
|
|
407
|
+
capturedAt: number;
|
|
408
|
+
overageDisabledReason?: string | null;
|
|
409
|
+
} | null;
|
|
410
|
+
}>;
|
|
334
411
|
prev: QuotaWatchAccountState;
|
|
335
412
|
now: number;
|
|
413
|
+
/** Staleness ceiling for "fresh probe"; 0 disables the gate (legacy callers/tests). */
|
|
414
|
+
tuning?: Pick<QuotaWatchTuning, "maxStaleMs">;
|
|
336
415
|
}): FleetAllExhaustedDecision {
|
|
337
416
|
const { accounts, prev, now } = args;
|
|
417
|
+
const maxStaleMs = args.tuning?.maxStaleMs ?? 0;
|
|
338
418
|
const allExhausted = accounts.length > 0 && accounts.every((a) => a.exhausted);
|
|
339
419
|
// "throttling" doubles as the "currently alerting all-exhausted" marker.
|
|
340
420
|
const wasAlerting = prev.lastNotifiedHealth === "throttling";
|
|
341
421
|
|
|
342
422
|
if (allExhausted && !wasAlerting) {
|
|
423
|
+
// Probe-blind guard (#2478): only fire `entered` if EVERY account's
|
|
424
|
+
// exhaustion is backed by live evidence — a fresh snapshot. An account
|
|
425
|
+
// exhausted solely on a stale/absent mark means we have no live
|
|
426
|
+
// corroboration → skip rather than false-alarm.
|
|
427
|
+
if (maxStaleMs > 0) {
|
|
428
|
+
const allLiveCorroborated = accounts.every((a) =>
|
|
429
|
+
exhaustionLiveCorroborated(a, now, maxStaleMs),
|
|
430
|
+
);
|
|
431
|
+
if (!allLiveCorroborated) {
|
|
432
|
+
return { kind: "skip", reason: "probe-blind" };
|
|
433
|
+
}
|
|
434
|
+
}
|
|
343
435
|
return {
|
|
344
436
|
kind: "notify",
|
|
345
437
|
message: buildAllExhaustedMessage(accounts, now),
|
|
@@ -358,6 +450,42 @@ export function evaluateFleetAllExhausted(args: {
|
|
|
358
450
|
return { kind: "skip", reason: allExhausted ? "still-all-exhausted" : "not-all-exhausted" };
|
|
359
451
|
}
|
|
360
452
|
|
|
453
|
+
/**
|
|
454
|
+
* Is an account's `exhausted` flag backed by live evidence (#2478)?
|
|
455
|
+
*
|
|
456
|
+
* True when the most-recent live probe is FRESH (`capturedAt` within
|
|
457
|
+
* `maxStaleMs`) — that fresh probe is what set/upholds the broker's blocked
|
|
458
|
+
* verdict. False when there is no `last_quota` at all, or the snapshot is
|
|
459
|
+
* stale: the `exhausted` flag then rests solely on a persisted mark with no
|
|
460
|
+
* live backing, which is exactly the probe-blind condition that false-fires
|
|
461
|
+
* the fleet alert.
|
|
462
|
+
*
|
|
463
|
+
* NOTE: `out_of_credits` is NOT treated as corroboration here. Per
|
|
464
|
+
* fix/out-of-credits-serve-block, out_of_credits is INFORMATIONAL — it is
|
|
465
|
+
* not exhaustion in its own right at any util. Corroboration requires a
|
|
466
|
+
* genuinely fresh quota snapshot (real 429 / util-wall path).
|
|
467
|
+
*
|
|
468
|
+
* Mirrors `snapshotFresh` in src/auth/broker/account-eligibility.ts (the
|
|
469
|
+
* serving-side authority); kept as a local check so the decision layer
|
|
470
|
+
* carries no broker dependency.
|
|
471
|
+
*/
|
|
472
|
+
function exhaustionLiveCorroborated(
|
|
473
|
+
account: {
|
|
474
|
+
last_quota?: { capturedAt: number; overageDisabledReason?: string | null } | null;
|
|
475
|
+
},
|
|
476
|
+
now: number,
|
|
477
|
+
maxStaleMs: number,
|
|
478
|
+
): boolean {
|
|
479
|
+
const lq = account.last_quota;
|
|
480
|
+
if (!lq) return false;
|
|
481
|
+
// Mirror `snapshotFresh`'s clock-skew guard: a future-dated `capturedAt`
|
|
482
|
+
// makes `now - capturedAt` negative and would slip past the staleness gate,
|
|
483
|
+
// so a skewed snapshot reads as fresh. Reject snapshots dated more than the
|
|
484
|
+
// broker's 60_000 ms tolerance ahead of `now` (matches the inline literal in
|
|
485
|
+
// `snapshotFresh`, src/auth/broker/account-eligibility.ts).
|
|
486
|
+
return now - lq.capturedAt <= maxStaleMs && lq.capturedAt <= now + 60_000;
|
|
487
|
+
}
|
|
488
|
+
|
|
361
489
|
function buildAllExhaustedMessage(
|
|
362
490
|
accounts: Array<{ label: string; exhausted_until?: number }>,
|
|
363
491
|
now: number,
|
|
@@ -420,7 +548,7 @@ function buildThrottlingMessage(agentName: string, snap: AccountSnapshot): strin
|
|
|
420
548
|
`Binding window: ${winLabel}${resetStr}`,
|
|
421
549
|
`${activeNote}${altNote}`,
|
|
422
550
|
``,
|
|
423
|
-
`<i>Threshold: ${THROTTLING_THRESHOLD_PCT}% on either window.
|
|
551
|
+
`<i>Threshold: ${THROTTLING_THRESHOLD_PCT}% on either window. Live-probe corroborated (#2495).</i>`,
|
|
424
552
|
`<i>Run /auth for full fleet status or /usage for the active account.</i>`,
|
|
425
553
|
]
|
|
426
554
|
.join("\n")
|