switchroom 0.14.0 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,366 @@
1
+ /**
2
+ * Unit tests for the proactive quota threshold-tier push helper (#E4).
3
+ * Mirrors the shape of credits-watch.test.ts. Covers:
4
+ * - Pure decision logic across all transition-table cases
5
+ * - State persistence round-trip
6
+ * - Message body content sanity
7
+ */
8
+
9
+ import { describe, it, expect, beforeEach, afterEach } from "vitest";
10
+ import { mkdtempSync, rmSync, writeFileSync, mkdirSync } from "fs";
11
+ import { tmpdir } from "os";
12
+ import { join } from "path";
13
+ import {
14
+ evaluateQuotaWatchAccount,
15
+ loadQuotaWatchState,
16
+ saveQuotaWatchState,
17
+ patchQuotaWatchState,
18
+ emptyQuotaWatchState,
19
+ emptyAccountState,
20
+ } from "../quota-watch.js";
21
+ import type { AccountSnapshot } from "../auth-snapshot-format.js";
22
+ import type { QuotaUtilization } from "../quota-check.js";
23
+
24
+ // ── test fixtures ────────────────────────────────────────────────────────────
25
+
26
+ const NOW = 1_780_000_000_000;
27
+
28
+ /** Build a minimal QuotaUtilization at given utilization percentages. */
29
+ function makeQuota(
30
+ fivePct: number,
31
+ sevenPct: number,
32
+ fiveHourResetAt?: Date,
33
+ sevenDayResetAt?: Date,
34
+ ): QuotaUtilization {
35
+ return {
36
+ fiveHourUtilizationPct: fivePct,
37
+ sevenDayUtilizationPct: sevenPct,
38
+ fiveHourResetAt: fiveHourResetAt ?? null,
39
+ sevenDayResetAt: sevenDayResetAt ?? null,
40
+ representativeClaim: null,
41
+ overageStatus: null,
42
+ overageDisabledReason: null,
43
+ };
44
+ }
45
+
46
+ /** Build an AccountSnapshot with given quota. */
47
+ function makeSnap(
48
+ label: string,
49
+ quota: QuotaUtilization | null,
50
+ isActive = false,
51
+ ): AccountSnapshot {
52
+ return { label, isActive, quota };
53
+ }
54
+
55
+ const HEALTHY_SNAP = makeSnap("alice@example.com", makeQuota(30, 40));
56
+ const THROTTLING_5H = makeSnap("alice@example.com", makeQuota(85, 40));
57
+ const THROTTLING_7D = makeSnap("alice@example.com", makeQuota(40, 90));
58
+ const BLOCKED_SNAP = makeSnap("alice@example.com", makeQuota(99.9, 99.9));
59
+ const UNKNOWN_SNAP = makeSnap("alice@example.com", null);
60
+
61
+ const PREV_NEVER_NOTIFIED = emptyAccountState(); // lastNotifiedHealth: null
62
+ const PREV_WAS_HEALTHY = { lastNotifiedHealth: "healthy" as const, lastNotifiedAt: NOW - 1000 };
63
+ const PREV_WAS_THROTTLING = { lastNotifiedHealth: "throttling" as const, lastNotifiedAt: NOW - 1000 };
64
+
65
+ // ── transition decision tests ────────────────────────────────────────────────
66
+
67
+ describe("evaluateQuotaWatchAccount — transition table", () => {
68
+ it("healthy → healthy (never notified) skips", () => {
69
+ const d = evaluateQuotaWatchAccount({
70
+ agentName: "lawgpt",
71
+ snap: HEALTHY_SNAP,
72
+ prev: PREV_NEVER_NOTIFIED,
73
+ now: NOW,
74
+ });
75
+ expect(d.kind).toBe("skip");
76
+ if (d.kind !== "skip") return;
77
+ expect(d.reason).toBe("steady-state");
78
+ });
79
+
80
+ it("healthy → healthy (was healthy) skips", () => {
81
+ const d = evaluateQuotaWatchAccount({
82
+ agentName: "lawgpt",
83
+ snap: HEALTHY_SNAP,
84
+ prev: PREV_WAS_HEALTHY,
85
+ now: NOW,
86
+ });
87
+ expect(d.kind).toBe("skip");
88
+ });
89
+
90
+ it("healthy → throttling (5h) fires entered-throttling notification", () => {
91
+ const d = evaluateQuotaWatchAccount({
92
+ agentName: "lawgpt",
93
+ snap: THROTTLING_5H,
94
+ prev: PREV_NEVER_NOTIFIED,
95
+ now: NOW,
96
+ });
97
+ expect(d.kind).toBe("notify");
98
+ if (d.kind !== "notify") return;
99
+ expect(d.transition).toBe("entered-throttling");
100
+ expect(d.newAccountState.lastNotifiedHealth).toBe("throttling");
101
+ expect(d.newAccountState.lastNotifiedAt).toBe(NOW);
102
+ });
103
+
104
+ it("healthy → throttling (7d) fires entered-throttling notification", () => {
105
+ const d = evaluateQuotaWatchAccount({
106
+ agentName: "lawgpt",
107
+ snap: THROTTLING_7D,
108
+ prev: PREV_NEVER_NOTIFIED,
109
+ now: NOW,
110
+ });
111
+ expect(d.kind).toBe("notify");
112
+ if (d.kind !== "notify") return;
113
+ expect(d.transition).toBe("entered-throttling");
114
+ });
115
+
116
+ it("throttling → throttling skips (already notified)", () => {
117
+ const d = evaluateQuotaWatchAccount({
118
+ agentName: "lawgpt",
119
+ snap: THROTTLING_5H,
120
+ prev: PREV_WAS_THROTTLING,
121
+ now: NOW,
122
+ });
123
+ expect(d.kind).toBe("skip");
124
+ if (d.kind !== "skip") return;
125
+ expect(d.reason).toBe("steady-state");
126
+ });
127
+
128
+ it("throttling → healthy fires recovered-to-healthy notification", () => {
129
+ const d = evaluateQuotaWatchAccount({
130
+ agentName: "lawgpt",
131
+ snap: HEALTHY_SNAP,
132
+ prev: PREV_WAS_THROTTLING,
133
+ now: NOW,
134
+ });
135
+ expect(d.kind).toBe("notify");
136
+ if (d.kind !== "notify") return;
137
+ expect(d.transition).toBe("recovered-to-healthy");
138
+ expect(d.newAccountState.lastNotifiedHealth).toBe("healthy");
139
+ expect(d.newAccountState.lastNotifiedAt).toBe(NOW);
140
+ });
141
+
142
+ it("* → blocked skips (credits-watch domain)", () => {
143
+ const dFromHealthy = evaluateQuotaWatchAccount({
144
+ agentName: "lawgpt",
145
+ snap: BLOCKED_SNAP,
146
+ prev: PREV_NEVER_NOTIFIED,
147
+ now: NOW,
148
+ });
149
+ expect(dFromHealthy.kind).toBe("skip");
150
+ if (dFromHealthy.kind !== "skip") return;
151
+ expect(dFromHealthy.reason).toBe("blocked-not-our-domain");
152
+ });
153
+
154
+ it("blocked → healthy skips (credits-watch domain)", () => {
155
+ // blocked → healthy: credits-watch handles the blocked recovery path.
156
+ // Our watcher should not fire for it (we never tracked 'blocked').
157
+ const d = evaluateQuotaWatchAccount({
158
+ agentName: "lawgpt",
159
+ snap: HEALTHY_SNAP,
160
+ prev: PREV_NEVER_NOTIFIED, // we were never tracking as throttling
161
+ now: NOW,
162
+ });
163
+ // healthy → healthy from null-prev should skip, not fire
164
+ expect(d.kind).toBe("skip");
165
+ });
166
+
167
+ it("unknown quota snap skips (probe failed)", () => {
168
+ const d = evaluateQuotaWatchAccount({
169
+ agentName: "lawgpt",
170
+ snap: UNKNOWN_SNAP,
171
+ prev: PREV_WAS_THROTTLING,
172
+ now: NOW,
173
+ });
174
+ expect(d.kind).toBe("skip");
175
+ if (d.kind !== "skip") return;
176
+ expect(d.reason).toBe("unknown-not-our-domain");
177
+ });
178
+
179
+ it("no duplicate: two consecutive polls in throttling state produce one notify then skip", () => {
180
+ // First poll: healthy → throttling → notify
181
+ const d1 = evaluateQuotaWatchAccount({
182
+ agentName: "lawgpt",
183
+ snap: THROTTLING_5H,
184
+ prev: PREV_NEVER_NOTIFIED,
185
+ now: NOW,
186
+ });
187
+ expect(d1.kind).toBe("notify");
188
+ if (d1.kind !== "notify") return;
189
+
190
+ // Second poll: throttling → throttling → skip
191
+ const d2 = evaluateQuotaWatchAccount({
192
+ agentName: "lawgpt",
193
+ snap: THROTTLING_5H,
194
+ prev: d1.newAccountState,
195
+ now: NOW + 15 * 60_000,
196
+ });
197
+ expect(d2.kind).toBe("skip");
198
+ });
199
+ });
200
+
201
+ // ── message content tests ────────────────────────────────────────────────────
202
+
203
+ describe("evaluateQuotaWatchAccount — message content", () => {
204
+ it("throttling message contains account label and percentages", () => {
205
+ const d = evaluateQuotaWatchAccount({
206
+ agentName: "lawgpt",
207
+ snap: THROTTLING_5H,
208
+ prev: PREV_NEVER_NOTIFIED,
209
+ now: NOW,
210
+ });
211
+ expect(d.kind).toBe("notify");
212
+ if (d.kind !== "notify") return;
213
+ expect(d.message).toContain("alice@example.com");
214
+ expect(d.message).toContain("85%");
215
+ expect(d.message).toContain("40%");
216
+ expect(d.message).toContain("5-hour");
217
+ });
218
+
219
+ it("recovery message contains account label and percentages", () => {
220
+ const d = evaluateQuotaWatchAccount({
221
+ agentName: "lawgpt",
222
+ snap: HEALTHY_SNAP,
223
+ prev: PREV_WAS_THROTTLING,
224
+ now: NOW,
225
+ });
226
+ expect(d.kind).toBe("notify");
227
+ if (d.kind !== "notify") return;
228
+ expect(d.message).toContain("alice@example.com");
229
+ expect(d.message).toContain("Quota back in healthy range");
230
+ expect(d.message).toContain("30%");
231
+ });
232
+
233
+ it("throttling message HTML-escapes account label", () => {
234
+ const d = evaluateQuotaWatchAccount({
235
+ agentName: "lawgpt",
236
+ snap: makeSnap("<evil>@example.com", makeQuota(85, 40)),
237
+ prev: PREV_NEVER_NOTIFIED,
238
+ now: NOW,
239
+ });
240
+ expect(d.kind).toBe("notify");
241
+ if (d.kind !== "notify") return;
242
+ expect(d.message).toContain("&lt;evil&gt;");
243
+ expect(d.message).not.toContain("<evil>");
244
+ });
245
+
246
+ it("throttling message for active account mentions /auth use", () => {
247
+ const activeSnap = makeSnap("alice@example.com", makeQuota(85, 40), /* isActive */ true);
248
+ const d = evaluateQuotaWatchAccount({
249
+ agentName: "lawgpt",
250
+ snap: activeSnap,
251
+ prev: PREV_NEVER_NOTIFIED,
252
+ now: NOW,
253
+ });
254
+ expect(d.kind).toBe("notify");
255
+ if (d.kind !== "notify") return;
256
+ // Active account message should mention switching
257
+ expect(d.message).toContain("/auth");
258
+ });
259
+
260
+ it("throttling message includes reset time when provided", () => {
261
+ const resetAt = new Date(NOW + 3 * 60 * 60_000); // 3 hours from now
262
+ const d = evaluateQuotaWatchAccount({
263
+ agentName: "lawgpt",
264
+ snap: makeSnap("alice@example.com", makeQuota(85, 40, resetAt)),
265
+ prev: PREV_NEVER_NOTIFIED,
266
+ now: NOW,
267
+ });
268
+ expect(d.kind).toBe("notify");
269
+ if (d.kind !== "notify") return;
270
+ expect(d.message).toContain("refills in");
271
+ });
272
+ });
273
+
274
+ // ── state persistence tests ──────────────────────────────────────────────────
275
+
276
+ describe("loadQuotaWatchState / saveQuotaWatchState — round-trip", () => {
277
+ let tmp: string;
278
+
279
+ beforeEach(() => {
280
+ tmp = mkdtempSync(join(tmpdir(), "quota-watch-"));
281
+ });
282
+
283
+ afterEach(() => {
284
+ rmSync(tmp, { recursive: true, force: true });
285
+ });
286
+
287
+ it("returns emptyQuotaWatchState when no file exists", () => {
288
+ expect(loadQuotaWatchState(tmp)).toEqual(emptyQuotaWatchState());
289
+ });
290
+
291
+ it("round-trips a saved state with multiple accounts", () => {
292
+ const state = {
293
+ "alice@example.com": { lastNotifiedHealth: "throttling" as const, lastNotifiedAt: 1_780_000_000_000 },
294
+ "bob@example.com": { lastNotifiedHealth: null, lastNotifiedAt: 0 },
295
+ };
296
+ saveQuotaWatchState(tmp, state);
297
+ expect(loadQuotaWatchState(tmp)).toEqual(state);
298
+ });
299
+
300
+ it("falls back to empty on malformed JSON", () => {
301
+ mkdirSync(tmp, { recursive: true });
302
+ writeFileSync(join(tmp, "quota-watch.json"), "{broken");
303
+ expect(loadQuotaWatchState(tmp)).toEqual(emptyQuotaWatchState());
304
+ });
305
+
306
+ it("falls back to empty on shape mismatch (not an object)", () => {
307
+ writeFileSync(join(tmp, "quota-watch.json"), JSON.stringify([1, 2, 3]));
308
+ expect(loadQuotaWatchState(tmp)).toEqual(emptyQuotaWatchState());
309
+ });
310
+
311
+ it("drops malformed entries but preserves valid ones", () => {
312
+ writeFileSync(
313
+ join(tmp, "quota-watch.json"),
314
+ JSON.stringify({
315
+ "good@example.com": { lastNotifiedHealth: "throttling", lastNotifiedAt: 1000 },
316
+ "bad@example.com": { lastNotifiedHealth: "invalid", lastNotifiedAt: "not-a-number" },
317
+ }),
318
+ );
319
+ const loaded = loadQuotaWatchState(tmp);
320
+ expect(loaded["good@example.com"]).toEqual({ lastNotifiedHealth: "throttling", lastNotifiedAt: 1000 });
321
+ expect(loaded["bad@example.com"]).toBeUndefined();
322
+ });
323
+
324
+ it("creates the state dir on save (if it doesn't exist yet)", () => {
325
+ const fresh = join(tmp, "fresh-subdir");
326
+ saveQuotaWatchState(fresh, {});
327
+ expect(loadQuotaWatchState(fresh)).toEqual({});
328
+ });
329
+ });
330
+
331
+ // ── patchQuotaWatchState tests ────────────────────────────────────────────────
332
+
333
+ describe("patchQuotaWatchState", () => {
334
+ it("adds a new account entry without clobbering others", () => {
335
+ const current: ReturnType<typeof emptyQuotaWatchState> = {
336
+ "alice@example.com": { lastNotifiedHealth: "throttling", lastNotifiedAt: 1000 },
337
+ };
338
+ const updated = patchQuotaWatchState(
339
+ current,
340
+ "bob@example.com",
341
+ { lastNotifiedHealth: "healthy", lastNotifiedAt: 2000 },
342
+ );
343
+ expect(updated["alice@example.com"]).toEqual({ lastNotifiedHealth: "throttling", lastNotifiedAt: 1000 });
344
+ expect(updated["bob@example.com"]).toEqual({ lastNotifiedHealth: "healthy", lastNotifiedAt: 2000 });
345
+ });
346
+
347
+ it("updates an existing account entry", () => {
348
+ const current = {
349
+ "alice@example.com": { lastNotifiedHealth: "throttling" as const, lastNotifiedAt: 1000 },
350
+ };
351
+ const updated = patchQuotaWatchState(
352
+ current,
353
+ "alice@example.com",
354
+ { lastNotifiedHealth: "healthy", lastNotifiedAt: 2000 },
355
+ );
356
+ expect(updated["alice@example.com"]).toEqual({ lastNotifiedHealth: "healthy", lastNotifiedAt: 2000 });
357
+ });
358
+
359
+ it("does not mutate the original state object", () => {
360
+ const current = {
361
+ "alice@example.com": { lastNotifiedHealth: "throttling" as const, lastNotifiedAt: 1000 },
362
+ };
363
+ patchQuotaWatchState(current, "bob@example.com", { lastNotifiedHealth: null, lastNotifiedAt: 0 });
364
+ expect(current["bob@example.com"]).toBeUndefined();
365
+ });
366
+ });
@@ -17,9 +17,57 @@ import { describe, it, expect } from 'vitest'
17
17
  import {
18
18
  decideTurnFlush,
19
19
  isSilentFlushMarker,
20
+ isCompositeSilentNoise,
20
21
  isTurnFlushSafetyEnabled,
21
22
  } from '../turn-flush-safety.js'
22
23
 
24
+ describe('isCompositeSilentNoise — Stop-hook re-prompt leak backstop', () => {
25
+ it('suppresses the observed leak "Sent.\\nNO_REPLY\\nNO_REPLY"', () => {
26
+ expect(isCompositeSilentNoise('Sent.\nNO_REPLY\nNO_REPLY')).toBe(true)
27
+ })
28
+ it('suppresses repeated bare markers and marker+confirmation variants', () => {
29
+ expect(isCompositeSilentNoise('NO_REPLY\nNO_REPLY')).toBe(true)
30
+ expect(isCompositeSilentNoise('Done\nNO_REPLY')).toBe(true)
31
+ expect(isCompositeSilentNoise('NO_REPLY\nHEARTBEAT_OK')).toBe(true)
32
+ })
33
+ it('requires at least one real marker — standalone confirmations still flush', () => {
34
+ // Conservative: no NO_REPLY/HEARTBEAT_OK present → NOT suppressed here,
35
+ // so we never silently drop a turn that wasn\'t already signalling silence.
36
+ expect(isCompositeSilentNoise('Sent.')).toBe(false)
37
+ expect(isCompositeSilentNoise('Done.\nOK')).toBe(false)
38
+ })
39
+ it('does NOT suppress real content glued to a marker', () => {
40
+ expect(
41
+ isCompositeSilentNoise('Here is the summary of the page.\nNO_REPLY'),
42
+ ).toBe(false)
43
+ expect(isCompositeSilentNoise('NO_REPLY\nThe answer is 42.')).toBe(false)
44
+ })
45
+ it('handles non-strings / empty safely', () => {
46
+ expect(isCompositeSilentNoise(undefined)).toBe(false)
47
+ expect(isCompositeSilentNoise('')).toBe(false)
48
+ expect(isCompositeSilentNoise(' \n ')).toBe(false)
49
+ })
50
+ })
51
+
52
+ describe('decideTurnFlush — composite silent noise is skipped, not leaked', () => {
53
+ it('skips "Sent.\\nNO_REPLY\\nNO_REPLY" (the live clerk/test-harness leak)', () => {
54
+ const d = decideTurnFlush({
55
+ chatId: '12345',
56
+ replyCalled: false,
57
+ capturedText: ['Sent.', 'NO_REPLY', 'NO_REPLY'],
58
+ })
59
+ expect(d).toEqual({ kind: 'skip', reason: 'silent-marker' })
60
+ })
61
+ it('still flushes genuine trailing answer text', () => {
62
+ const d = decideTurnFlush({
63
+ chatId: '12345',
64
+ replyCalled: false,
65
+ capturedText: ['The page summarises three news stories.'],
66
+ })
67
+ expect(d.kind).toBe('flush')
68
+ })
69
+ })
70
+
23
71
  describe('decideTurnFlush', () => {
24
72
  it('(a) does NOT flush when the reply tool was called', () => {
25
73
  const decision = decideTurnFlush({
@@ -50,6 +50,48 @@ export function isSilentFlushMarker(text: string | undefined): boolean {
50
50
  return SILENT_MARKERS.has(trimmed.toUpperCase())
51
51
  }
52
52
 
53
+ // Trivial end-of-turn confirmations the model emits as terminal text after
54
+ // calling reply (e.g. "Sent." once the reply tool returns). On their own
55
+ // they're harmless; the danger is when they're glued to silent markers
56
+ // across Stop-hook re-prompt cycles into a composite blob like
57
+ // "Sent.\nNO_REPLY\nNO_REPLY" — which `isSilentFlushMarker` can't match
58
+ // (multi-line, over the length guard) so it leaks to chat. See
59
+ // `isCompositeSilentNoise`.
60
+ const TRIVIAL_CONFIRMATIONS = new Set(['SENT', 'DONE', 'OK', 'OKAY', 'ACK'])
61
+
62
+ function isTrivialConfirmationLine(line: string): boolean {
63
+ let t = line.trim()
64
+ if (t.length === 0 || t.length > 8) return false
65
+ if (/\W$/.test(t)) t = t.slice(0, -1) // strip a single trailing punct ("Sent.")
66
+ return TRIVIAL_CONFIRMATIONS.has(t.toUpperCase())
67
+ }
68
+
69
+ /**
70
+ * Recognise a multi-line composite that is *entirely* silent noise — every
71
+ * non-empty line is a silent marker (NO_REPLY / HEARTBEAT_OK) or a trivial
72
+ * confirmation ("Sent."), AND at least one line is a real silent marker.
73
+ *
74
+ * Backstop for the Stop-hook re-prompt leak: the model replies cleanly,
75
+ * emits a terminal "Sent.", gets re-prompted by the silent-end Stop hook,
76
+ * answers "NO_REPLY" one or more times, and the accumulated `capturedText`
77
+ * ("Sent.\nNO_REPLY\nNO_REPLY") flushes as a visible message because
78
+ * `isSilentFlushMarker` only matches a single sentinel. Requiring ≥1 hard
79
+ * marker keeps this conservative — a standalone "Sent." (no NO_REPLY) is NOT
80
+ * suppressed here, so we never silently drop a turn that wasn't already
81
+ * signalling "nothing to add".
82
+ */
83
+ export function isCompositeSilentNoise(text: string | undefined): boolean {
84
+ if (typeof text !== 'string') return false
85
+ const lines = text
86
+ .split('\n')
87
+ .map(l => l.trim())
88
+ .filter(l => l.length > 0)
89
+ if (lines.length === 0) return false
90
+ const hasMarker = lines.some(l => isSilentFlushMarker(l))
91
+ if (!hasMarker) return false
92
+ return lines.every(l => isSilentFlushMarker(l) || isTrivialConfirmationLine(l))
93
+ }
94
+
53
95
  export type FlushDecision =
54
96
  | { kind: 'flush'; text: string }
55
97
  | { kind: 'skip'; reason: FlushSkipReason }
@@ -115,6 +157,11 @@ export function decideTurnFlush(input: FlushDecisionInput): FlushDecision {
115
157
  const joined = input.capturedText.join('\n').trim()
116
158
  if (joined.length === 0) return { kind: 'skip', reason: 'empty-text' }
117
159
  if (isSilentFlushMarker(joined)) return { kind: 'skip', reason: 'silent-marker' }
160
+ // Composite silent noise — e.g. "Sent.\nNO_REPLY\nNO_REPLY" accumulated
161
+ // across Stop-hook re-prompt cycles. The single-sentinel check above
162
+ // misses it (multi-line, over the length guard); without this the blob
163
+ // leaks to chat as a visible message.
164
+ if (isCompositeSilentNoise(joined)) return { kind: 'skip', reason: 'silent-marker' }
118
165
  return { kind: 'flush', text: joined }
119
166
  }
120
167
 
@@ -337,10 +337,10 @@ export async function waitForCardPhase(
337
337
  /**
338
338
  * Detect the progress card's phase from its rendered text.
339
339
  *
340
- * The actual card render (telegram-plugin/progress-card.ts) uses
341
- * emoji markers in the header: `✅` for done, `❌` for errors, `⚙️`
342
- * while working (foreground), `🌀` for Background (parent done but
343
- * fleet still running, see #862 / status-card-design.md §Header),
340
+ * The actual card render uses emoji markers in the header: `✅` for
341
+ * done, `❌` for errors, `⚙️` while working (foreground), `🌀` for
342
+ * Background (parent done but fleet still running, see #862 /
343
+ * reference/conversational-pacing.md),
344
344
  * and `⏳` during the boot-card window. These markers are stable
345
345
  * enough to key on for UAT — finer parsing (checklist items,
346
346
  * sub-agent row content) is out of scope.