switchroom 0.15.0 → 0.15.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -73,6 +73,76 @@ export function emptyAccountState(): QuotaWatchAccountState {
73
73
  return { lastNotifiedHealth: null, lastNotifiedAt: 0 };
74
74
  }
75
75
 
76
+ // ─── Tuning (env knobs) ───────────────────────────────────────────────────────
77
+
78
+ /**
79
+ * Operational tuning for the watch loop, resolved once from env by the
80
+ * gateway. All three hardening behaviours are individually
81
+ * kill-switchable (incident 2026-06-09: a fleet bounce released
82
+ * days-stale recovery latches on all 11 agents at once → 26 duplicate
83
+ * 🟢 messages in 16 minutes):
84
+ *
85
+ * SWITCHROOM_QUOTA_WATCH_MAX_STALE_MS 0 disables the staleness gate
86
+ * (default 60 min)
87
+ * SWITCHROOM_QUOTA_WATCH_LATE_RECOVERY_MS 0 disables silent late-recovery
88
+ * reconciliation (default 6 h)
89
+ * SWITCHROOM_QUOTA_WATCH_FLEET_DEDUP "0" disables the broker claim
90
+ * (every agent sends, pre-incident
91
+ * behaviour)
92
+ * SWITCHROOM_QUOTA_WATCH_SEND_ON_PROBE_FAIL "1" restores sending from
93
+ * cached data when the pre-send
94
+ * validation probe fails
95
+ */
96
+ export interface QuotaWatchTuning {
97
+ /** Cached snapshots older than this are treated as unknown (no opinion). 0 = off. */
98
+ maxStaleMs: number;
99
+ /** Recovery edges whose 🟡 warning is older than this reconcile silently. 0 = off. */
100
+ lateRecoveryMs: number;
101
+ /** Route sends through the broker's claim-notification dedup. */
102
+ fleetDedup: boolean;
103
+ /** Legacy: send from cached data when the validation probe fails. */
104
+ sendOnProbeFail: boolean;
105
+ }
106
+
107
+ export const DEFAULT_QUOTA_WATCH_MAX_STALE_MS = 60 * 60_000;
108
+ export const DEFAULT_QUOTA_WATCH_LATE_RECOVERY_MS = 6 * 60 * 60_000;
109
+
110
+ /** Broker claim window. Must exceed one full poll cycle (15 min) plus the
111
+ * boot-stagger spread so every agent's observation of the SAME edge lands
112
+ * inside one window; an account genuinely re-crossing the same edge later
113
+ * than this re-notifies. */
114
+ export const QUOTA_WATCH_CLAIM_WINDOW_MS = 30 * 60_000;
115
+
116
+ export function resolveQuotaWatchTuning(
117
+ env: Record<string, string | undefined>,
118
+ ): QuotaWatchTuning {
119
+ const num = (raw: string | undefined, fallback: number): number => {
120
+ if (raw === undefined || raw === "") return fallback;
121
+ const n = Number(raw);
122
+ return Number.isFinite(n) && n >= 0 ? n : fallback;
123
+ };
124
+ return {
125
+ maxStaleMs: num(env.SWITCHROOM_QUOTA_WATCH_MAX_STALE_MS, DEFAULT_QUOTA_WATCH_MAX_STALE_MS),
126
+ lateRecoveryMs: num(env.SWITCHROOM_QUOTA_WATCH_LATE_RECOVERY_MS, DEFAULT_QUOTA_WATCH_LATE_RECOVERY_MS),
127
+ fleetDedup: env.SWITCHROOM_QUOTA_WATCH_FLEET_DEDUP !== "0",
128
+ sendOnProbeFail: env.SWITCHROOM_QUOTA_WATCH_SEND_ON_PROBE_FAIL === "1",
129
+ };
130
+ }
131
+
132
+ /**
133
+ * Broker dedup-claim key for one (account, transition, chat) cell.
134
+ * Per-CHAT keys keep the audience identical to pre-dedup behaviour:
135
+ * every chat that any agent would have notified still receives exactly
136
+ * one copy — from whichever agent claims it first.
137
+ */
138
+ export function buildQuotaClaimKey(
139
+ accountLabel: string,
140
+ transition: string,
141
+ chatId: string | number,
142
+ ): string {
143
+ return `quota-watch:${accountLabel}:${transition}:${chatId}`;
144
+ }
145
+
76
146
  // ─── Decision logic ───────────────────────────────────────────────────────────
77
147
 
78
148
  export type QuotaWatchTransition =
@@ -87,30 +157,73 @@ export type QuotaWatchDecision =
87
157
  newAccountState: QuotaWatchAccountState;
88
158
  transition: QuotaWatchTransition;
89
159
  }
160
+ | {
161
+ /**
162
+ * A real transition was observed, but it is no longer NEWS — persist
163
+ * the new state so the edge-trigger latch clears, send nothing.
164
+ * Two producers: boot-tick recoveries (a just-booted gateway cannot
165
+ * distinguish "just recovered" from "recovered while we were down",
166
+ * and fleet bounces synchronize all agents' first ticks → flood) and
167
+ * late recoveries (the matching 🟡 is hours old; an "all clear" now
168
+ * is state reconciliation, not information).
169
+ */
170
+ kind: "reconcile";
171
+ accountLabel: string;
172
+ newAccountState: QuotaWatchAccountState;
173
+ transition: QuotaWatchTransition;
174
+ reason: "boot-tick-recovery" | "late-recovery";
175
+ }
90
176
  | { kind: "skip"; accountLabel: string; reason: string };
91
177
 
92
178
  /**
93
179
  * Evaluate one account's quota state against its last-notified health.
94
180
  *
95
- * Transition table:
181
+ * Transition table (after the staleness gate — a cached snapshot older
182
+ * than `maxStaleMs` is no opinion at all → skip "stale-snapshot"):
96
183
  * healthy → healthy skip (steady-state)
97
- * healthy → throttling notify (entered-throttling)
184
+ * healthy → throttling notify (entered-throttling) — warnings are
185
+ * level-state news, valid on any tick incl. boot
98
186
  * healthy → blocked skip (credits-watch covers this)
99
- * throttling → healthy notify (recovered-to-healthy)
187
+ * throttling → healthy notify (recovered-to-healthy), EXCEPT:
188
+ * boot tick → reconcile silently
189
+ * warning > lateRecoveryMs old → reconcile silently
100
190
  * throttling → throttling skip (already notified)
101
191
  * throttling → blocked skip (credits-watch covers blocked)
102
192
  * blocked → * skip (credits-watch domain)
103
193
  * unknown → * skip (no quota data — don't spam)
104
194
  * * → unknown skip (probe failed — transient, don't alarm)
195
+ *
196
+ * `bootTick` / `tuning` are optional: omitted (legacy callers/tests) the
197
+ * behaviour is exactly the pre-hardening table (no stale gate, no
198
+ * reconciliation).
105
199
  */
106
200
  export function evaluateQuotaWatchAccount(args: {
107
201
  agentName: string;
108
202
  snap: AccountSnapshot;
109
203
  prev: QuotaWatchAccountState;
110
204
  now: number;
205
+ /** True on the gateway's first watch tick after boot. */
206
+ bootTick?: boolean;
207
+ /** Staleness / late-recovery thresholds; 0 disables each. */
208
+ tuning?: Pick<QuotaWatchTuning, "maxStaleMs" | "lateRecoveryMs">;
111
209
  }): QuotaWatchDecision {
112
210
  const { agentName, snap, prev, now } = args;
211
+ const bootTick = args.bootTick ?? false;
212
+ const maxStaleMs = args.tuning?.maxStaleMs ?? 0;
213
+ const lateRecoveryMs = args.tuning?.lateRecoveryMs ?? 0;
113
214
  const label = snap.label;
215
+
216
+ // Staleness gate: a CACHED snapshot (capturedAtMs set) past its shelf
217
+ // life carries no opinion about the present — neither latch nor release.
218
+ // Live-probe snapshots (capturedAtMs undefined) are fresh by construction.
219
+ if (
220
+ maxStaleMs > 0 &&
221
+ snap.capturedAtMs !== undefined &&
222
+ now - snap.capturedAtMs > maxStaleMs
223
+ ) {
224
+ return { kind: "skip", accountLabel: label, reason: "stale-snapshot" };
225
+ }
226
+
114
227
  const currentHealth = classifyHealth(snap);
115
228
 
116
229
  // Unknown (probe failed) or blocked — skip entirely.
@@ -147,6 +260,31 @@ export function evaluateQuotaWatchAccount(args: {
147
260
  lastNotifiedHealth: "healthy",
148
261
  lastNotifiedAt: now,
149
262
  };
263
+ // A recovery observed on the first post-boot tick is not attributable
264
+ // to "just now" — the account may have recovered any time while this
265
+ // gateway was down, and a fleet bounce synchronizes every agent's
266
+ // first tick (the 2026-06-09 26-message flood). Reconcile silently.
267
+ if (bootTick) {
268
+ return {
269
+ kind: "reconcile",
270
+ accountLabel: label,
271
+ newAccountState: newState,
272
+ transition: "recovered-to-healthy",
273
+ reason: "boot-tick-recovery",
274
+ };
275
+ }
276
+ // Recovery whose matching 🟡 warning is hours old: the "all clear" is
277
+ // no longer actionable news (the user has long moved on; /auth shows
278
+ // live state on demand). Clear the latch without a message.
279
+ if (lateRecoveryMs > 0 && now - prev.lastNotifiedAt > lateRecoveryMs) {
280
+ return {
281
+ kind: "reconcile",
282
+ accountLabel: label,
283
+ newAccountState: newState,
284
+ transition: "recovered-to-healthy",
285
+ reason: "late-recovery",
286
+ };
287
+ }
150
288
  return {
151
289
  kind: "notify",
152
290
  accountLabel: label,
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Telegram legacy accounts-table twin of the CLI honesty fix — the
3
+ * legacy table renders exactly when the live probe FAILED, i.e. when
4
+ * cached-data disclosure matters most.
5
+ */
6
+ import { describe, it, expect } from "vitest";
7
+ import { formatQuotaUtilCell } from "../gateway/auth-command.js";
8
+
9
+ const NOW = 1_780_000_000_000;
10
+
11
+ describe("formatQuotaUtilCell (Telegram legacy table)", () => {
12
+ it("no cached snapshot → 'no data'", () => {
13
+ expect(formatQuotaUtilCell({ last_quota: null }, NOW)).toBe("no data");
14
+ });
15
+
16
+ it("renders both windows with the snapshot age", () => {
17
+ const cell = formatQuotaUtilCell(
18
+ { last_quota: { fiveHourUtilizationPct: 84.6, sevenDayUtilizationPct: 12.1, capturedAt: NOW - 90_000 } },
19
+ NOW,
20
+ );
21
+ expect(cell).toBe("85%·12% (1m 30s ago)");
22
+ });
23
+ });
@@ -173,3 +173,74 @@ describe('runFleetAutoFallback', () => {
173
173
  }
174
174
  });
175
175
  });
176
+
177
+ // ── failure notice (broken-promise fix, 2026-06-09 incident follow-up) ──────
178
+
179
+ import { renderFallbackFailureNotice } from "../auto-fallback-fleet.js";
180
+
181
+ describe("renderFallbackFailureNotice", () => {
182
+ it("names the trigger agent, the reason, and the manual recovery verbs", () => {
183
+ const html = renderFallbackFailureNotice("marko", "auth-broker unreachable (no client).");
184
+ expect(html).toContain("Auto-failover could not run");
185
+ expect(html).toContain("<b>marko</b>");
186
+ expect(html).toContain("auth-broker unreachable");
187
+ expect(html).toContain("/auth use");
188
+ expect(html).toContain("/auth</code>");
189
+ });
190
+
191
+ it("escapes HTML in the error reason (broker errors can contain angle brackets)", () => {
192
+ const html = renderFallbackFailureNotice("a<b", 'request <probe-quota> failed & "timed out"');
193
+ expect(html).toContain("a&lt;b");
194
+ expect(html).toContain("&lt;probe-quota&gt;");
195
+ expect(html).toContain("&amp;");
196
+ expect(html).not.toMatch(/<probe-quota>/);
197
+ });
198
+ });
199
+
200
+ // ── failure-notice cooldown (reviewer blocker: gate window never arms on
201
+ // failure; quota_wall_detected re-fires ~60s → unbounded notice spam) ─────
202
+
203
+ import {
204
+ evaluateFallbackFailureNotice,
205
+ FALLBACK_FAILURE_NOTICE_COOLDOWN_MS,
206
+ } from "../auto-fallback-fleet.js";
207
+
208
+ describe("evaluateFallbackFailureNotice", () => {
209
+ const T0 = 1_780_000_000_000;
210
+
211
+ it("first failure always sends and arms the cooldown", () => {
212
+ const r = evaluateFallbackFailureNotice({ lastSentAtMs: 0 }, T0);
213
+ expect(r.send).toBe(true);
214
+ expect(r.next.lastSentAtMs).toBe(T0);
215
+ });
216
+
217
+ it("a repeat failure inside the cooldown is suppressed and does NOT extend the window", () => {
218
+ const armed = { lastSentAtMs: T0 };
219
+ const r = evaluateFallbackFailureNotice(armed, T0 + 60_000);
220
+ expect(r.send).toBe(false);
221
+ expect(r.next).toBe(armed); // unchanged — window not extended by suppressed attempts
222
+ });
223
+
224
+ it("sends again once the cooldown elapses", () => {
225
+ const r = evaluateFallbackFailureNotice(
226
+ { lastSentAtMs: T0 },
227
+ T0 + FALLBACK_FAILURE_NOTICE_COOLDOWN_MS,
228
+ );
229
+ expect(r.send).toBe(true);
230
+ expect(r.next.lastSentAtMs).toBe(T0 + FALLBACK_FAILURE_NOTICE_COOLDOWN_MS);
231
+ });
232
+
233
+ it("bounds the 60s quota_wall_detected re-fire storm to ≤2 notices/hour", () => {
234
+ // Simulate a wedged agent re-signalling every 60s for one hour with a
235
+ // dead broker — the incident shape the reviewer flagged.
236
+ let state = { lastSentAtMs: 0 };
237
+ let sent = 0;
238
+ for (let t = T0; t < T0 + 3_600_000; t += 60_000) {
239
+ const r = evaluateFallbackFailureNotice(state, t);
240
+ if (r.send) sent++;
241
+ state = r.next;
242
+ }
243
+ expect(sent).toBeLessThanOrEqual(2);
244
+ expect(sent).toBeGreaterThanOrEqual(1);
245
+ });
246
+ });
@@ -0,0 +1,205 @@
1
+ /**
2
+ * `/model` Telegram command — parser + handler coverage.
3
+ *
4
+ * The headline guarantees:
5
+ *
6
+ * 1. The bare `/model` form NEVER reaches the inject primitive —
7
+ * with no argument claude renders an interactive picker modal
8
+ * that Telegram can't drive (no arrows, no Esc), so injecting it
9
+ * would wedge the pane (the /rate-limit-options class of wedge).
10
+ * 2. The argument is shape-gated before it's typed into the tmux
11
+ * pane: one token, no whitespace, no shell/control smuggling.
12
+ * 3. The set path injects exactly `/model <name>` (claude's own
13
+ * REPL verb — already on the inject allowlist) and relays the
14
+ * captured output, with the session-only persistence caveat.
15
+ */
16
+ import { describe, it, expect } from "vitest";
17
+ import {
18
+ parseModelCommand,
19
+ handleModelCommand,
20
+ isValidModelArg,
21
+ MODEL_ALIASES,
22
+ type ModelCommandDeps,
23
+ } from "../gateway/model-command.js";
24
+ import type { InjectResult } from "../../src/agents/inject.js";
25
+
26
+ function okResult(output: string): InjectResult {
27
+ return {
28
+ outcome: "ok",
29
+ output,
30
+ truncated: false,
31
+ command: "/model",
32
+ meta: { description: "Open model picker", expectsOutput: true },
33
+ };
34
+ }
35
+
36
+ function makeDeps(overrides: Partial<ModelCommandDeps> = {}) {
37
+ const calls: Array<{ agent: string; command: string }> = [];
38
+ const deps: ModelCommandDeps = {
39
+ inject: async (agent, command) => {
40
+ calls.push({ agent, command });
41
+ return okResult("⏺ Set model to sonnet");
42
+ },
43
+ getAgentName: () => "klanker",
44
+ getConfiguredModel: () => "claude-sonnet-4-6",
45
+ escapeHtml: (s) =>
46
+ s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;"),
47
+ preBlock: (s) => `<pre>${s}</pre>`,
48
+ ...overrides,
49
+ };
50
+ return { deps, calls };
51
+ }
52
+
53
+ describe("parseModelCommand", () => {
54
+ it("returns null for non-/model text", () => {
55
+ expect(parseModelCommand("/auth list")).toBeNull();
56
+ expect(parseModelCommand("model sonnet")).toBeNull();
57
+ expect(parseModelCommand("/modelx sonnet")).toBeNull();
58
+ });
59
+
60
+ it("bare /model (and @botname form) parses as show", () => {
61
+ expect(parseModelCommand("/model")).toEqual({ kind: "show" });
62
+ expect(parseModelCommand("/model@klanker_bot")).toEqual({ kind: "show" });
63
+ expect(parseModelCommand("/model ")).toEqual({ kind: "show" });
64
+ });
65
+
66
+ it("single valid token parses as set", () => {
67
+ expect(parseModelCommand("/model sonnet")).toEqual({ kind: "set", model: "sonnet" });
68
+ expect(parseModelCommand("/model@bot claude-opus-4-8")).toEqual({
69
+ kind: "set",
70
+ model: "claude-opus-4-8",
71
+ });
72
+ // 1m-context variant ids carry brackets
73
+ expect(parseModelCommand("/model claude-sonnet-4-6[1m]")).toEqual({
74
+ kind: "set",
75
+ model: "claude-sonnet-4-6[1m]",
76
+ });
77
+ });
78
+
79
+ it("/model help parses as help", () => {
80
+ expect(parseModelCommand("/model help")).toEqual({ kind: "help" });
81
+ });
82
+
83
+ it("rejects multi-token args (no second token can ride into the pane)", () => {
84
+ const p = parseModelCommand("/model sonnet; rm -rf /");
85
+ expect(p?.kind).toBe("help");
86
+ });
87
+
88
+ it("rejects shell/control smuggling shapes", () => {
89
+ for (const bad of [
90
+ "/model $(reboot)",
91
+ "/model `id`",
92
+ "/model -opus", // leading dash — looks like a flag
93
+ "/model sonnet\nEnter",
94
+ "/model ../../etc/passwd",
95
+ "/model a|b",
96
+ ]) {
97
+ const p = parseModelCommand(bad);
98
+ expect(p?.kind, `should reject: ${bad}`).toBe("help");
99
+ }
100
+ });
101
+ });
102
+
103
+ describe("isValidModelArg", () => {
104
+ it("accepts aliases and full ids", () => {
105
+ for (const good of [...MODEL_ALIASES, "claude-opus-4-8", "claude-haiku-4-5-20251001", "claude-sonnet-4-6[1m]"]) {
106
+ expect(isValidModelArg(good), good).toBe(true);
107
+ }
108
+ });
109
+ it("rejects whitespace, metacharacters, and over-long strings", () => {
110
+ for (const bad of ["", " ", "a b", "a;b", "a/b", "-x", "a".repeat(120), "a\tb", "a\nb"]) {
111
+ expect(isValidModelArg(bad), JSON.stringify(bad)).toBe(false);
112
+ }
113
+ });
114
+ });
115
+
116
+ describe("handleModelCommand — show / help never inject (picker-wedge guard)", () => {
117
+ it("show renders configured model + switch options without injecting", async () => {
118
+ const { deps, calls } = makeDeps();
119
+ const reply = await handleModelCommand({ kind: "show" }, deps);
120
+ expect(calls.length).toBe(0);
121
+ expect(reply.text).toContain("claude-sonnet-4-6");
122
+ expect(reply.text).toContain("/model opus");
123
+ expect(reply.text).toContain("switchroom.yaml");
124
+ });
125
+
126
+ it("show falls back to 'default' when no model configured", async () => {
127
+ const { deps, calls } = makeDeps({ getConfiguredModel: () => null });
128
+ const reply = await handleModelCommand({ kind: "show" }, deps);
129
+ expect(calls.length).toBe(0);
130
+ expect(reply.text).toContain("<code>default</code>");
131
+ });
132
+
133
+ it("help never injects", async () => {
134
+ const { deps, calls } = makeDeps();
135
+ const reply = await handleModelCommand({ kind: "help", reason: "nope" }, deps);
136
+ expect(calls.length).toBe(0);
137
+ expect(reply.text).toContain("nope");
138
+ });
139
+ });
140
+
141
+ describe("handleModelCommand — set", () => {
142
+ it("injects exactly `/model <name>` once and relays output + persistence note", async () => {
143
+ const { deps, calls } = makeDeps();
144
+ const reply = await handleModelCommand({ kind: "set", model: "opus" }, deps);
145
+ expect(calls).toEqual([{ agent: "klanker", command: "/model opus" }]);
146
+ expect(reply.text).toContain("<pre>⏺ Set model to sonnet</pre>");
147
+ expect(reply.text).toContain("Session-only");
148
+ expect(reply.html).toBe(true);
149
+ });
150
+
151
+ it("re-gates the model arg at the seam (caller bypassing the parser)", async () => {
152
+ const { deps, calls } = makeDeps();
153
+ const reply = await handleModelCommand({ kind: "set", model: "a b; reboot" }, deps);
154
+ expect(calls.length).toBe(0);
155
+ expect(reply.text).toContain("not a valid model name");
156
+ });
157
+
158
+ it("ok_no_output explains the empty capture", async () => {
159
+ const { deps } = makeDeps({
160
+ inject: async () => ({
161
+ outcome: "ok_no_output",
162
+ output: "",
163
+ truncated: false,
164
+ command: "/model",
165
+ meta: { description: "Open model picker", expectsOutput: true },
166
+ }),
167
+ });
168
+ const reply = await handleModelCommand({ kind: "set", model: "sonnet" }, deps);
169
+ expect(reply.text).toContain("no response captured");
170
+ });
171
+
172
+ it("session_missing failure surfaces the tmux-supervisor hint", async () => {
173
+ const { deps } = makeDeps({
174
+ inject: async () => ({
175
+ outcome: "failed",
176
+ output: "",
177
+ truncated: false,
178
+ command: "/model",
179
+ meta: null,
180
+ errorCode: "session_missing",
181
+ errorMessage: "tmux session not found",
182
+ }),
183
+ });
184
+ const reply = await handleModelCommand({ kind: "set", model: "sonnet" }, deps);
185
+ expect(reply.text).toContain("tmux session not found");
186
+ expect(reply.text).toContain("tmux supervisor");
187
+ });
188
+
189
+ it("inject throwing is surfaced, not propagated", async () => {
190
+ const { deps } = makeDeps({
191
+ inject: async () => {
192
+ throw new Error("boom");
193
+ },
194
+ });
195
+ const reply = await handleModelCommand({ kind: "set", model: "sonnet" }, deps);
196
+ expect(reply.text).toContain("boom");
197
+ });
198
+ });
199
+
200
+ describe("inject allowlist contract", () => {
201
+ it("/model stays on the inject allowlist (the set path depends on it)", async () => {
202
+ const { INJECT_COMMANDS } = await import("../../src/agents/inject.js");
203
+ expect(INJECT_COMMANDS.has("/model")).toBe(true);
204
+ });
205
+ });