switchroom 0.14.0 → 0.14.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auth-broker/index.js +16 -1
- package/dist/cli/switchroom.js +1082 -873
- package/dist/host-control/main.js +1 -1
- package/package.json +1 -1
- package/profiles/_shared/telegram-style.md.hbs +1 -1
- package/telegram-plugin/auth-snapshot-format.ts +47 -1
- package/telegram-plugin/dist/gateway/gateway.js +983 -537
- package/telegram-plugin/gateway/boot-card.ts +100 -0
- package/telegram-plugin/gateway/config-snapshot.ts +274 -0
- package/telegram-plugin/gateway/gateway.ts +235 -20
- package/telegram-plugin/operator-events.ts +2 -10
- package/telegram-plugin/quota-watch.ts +276 -0
- package/telegram-plugin/tests/auth-snapshot-format.test.ts +133 -1
- package/telegram-plugin/tests/boot-card-render.test.ts +93 -0
- package/telegram-plugin/tests/config-snapshot.test.ts +409 -0
- package/telegram-plugin/tests/operator-events.test.ts +12 -6
- package/telegram-plugin/tests/quota-watch.test.ts +366 -0
- package/telegram-plugin/tests/tool-activity-summary.test.ts +45 -0
- package/telegram-plugin/tests/turn-flush-safety.test.ts +48 -0
- package/telegram-plugin/tool-activity-summary.ts +47 -0
- package/telegram-plugin/turn-flush-safety.ts +47 -0
- package/telegram-plugin/uat/assertions.ts +4 -4
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for the proactive quota threshold-tier push helper (#E4).
|
|
3
|
+
* Mirrors the shape of credits-watch.test.ts. Covers:
|
|
4
|
+
* - Pure decision logic across all transition-table cases
|
|
5
|
+
* - State persistence round-trip
|
|
6
|
+
* - Message body content sanity
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
|
10
|
+
import { mkdtempSync, rmSync, writeFileSync, mkdirSync } from "fs";
|
|
11
|
+
import { tmpdir } from "os";
|
|
12
|
+
import { join } from "path";
|
|
13
|
+
import {
|
|
14
|
+
evaluateQuotaWatchAccount,
|
|
15
|
+
loadQuotaWatchState,
|
|
16
|
+
saveQuotaWatchState,
|
|
17
|
+
patchQuotaWatchState,
|
|
18
|
+
emptyQuotaWatchState,
|
|
19
|
+
emptyAccountState,
|
|
20
|
+
} from "../quota-watch.js";
|
|
21
|
+
import type { AccountSnapshot } from "../auth-snapshot-format.js";
|
|
22
|
+
import type { QuotaUtilization } from "../quota-check.js";
|
|
23
|
+
|
|
24
|
+
// ── test fixtures ────────────────────────────────────────────────────────────
|
|
25
|
+
|
|
26
|
+
const NOW = 1_780_000_000_000;
|
|
27
|
+
|
|
28
|
+
/** Build a minimal QuotaUtilization at given utilization percentages. */
|
|
29
|
+
function makeQuota(
|
|
30
|
+
fivePct: number,
|
|
31
|
+
sevenPct: number,
|
|
32
|
+
fiveHourResetAt?: Date,
|
|
33
|
+
sevenDayResetAt?: Date,
|
|
34
|
+
): QuotaUtilization {
|
|
35
|
+
return {
|
|
36
|
+
fiveHourUtilizationPct: fivePct,
|
|
37
|
+
sevenDayUtilizationPct: sevenPct,
|
|
38
|
+
fiveHourResetAt: fiveHourResetAt ?? null,
|
|
39
|
+
sevenDayResetAt: sevenDayResetAt ?? null,
|
|
40
|
+
representativeClaim: null,
|
|
41
|
+
overageStatus: null,
|
|
42
|
+
overageDisabledReason: null,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/** Build an AccountSnapshot with given quota. */
|
|
47
|
+
function makeSnap(
|
|
48
|
+
label: string,
|
|
49
|
+
quota: QuotaUtilization | null,
|
|
50
|
+
isActive = false,
|
|
51
|
+
): AccountSnapshot {
|
|
52
|
+
return { label, isActive, quota };
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const HEALTHY_SNAP = makeSnap("alice@example.com", makeQuota(30, 40));
|
|
56
|
+
const THROTTLING_5H = makeSnap("alice@example.com", makeQuota(85, 40));
|
|
57
|
+
const THROTTLING_7D = makeSnap("alice@example.com", makeQuota(40, 90));
|
|
58
|
+
const BLOCKED_SNAP = makeSnap("alice@example.com", makeQuota(99.9, 99.9));
|
|
59
|
+
const UNKNOWN_SNAP = makeSnap("alice@example.com", null);
|
|
60
|
+
|
|
61
|
+
const PREV_NEVER_NOTIFIED = emptyAccountState(); // lastNotifiedHealth: null
|
|
62
|
+
const PREV_WAS_HEALTHY = { lastNotifiedHealth: "healthy" as const, lastNotifiedAt: NOW - 1000 };
|
|
63
|
+
const PREV_WAS_THROTTLING = { lastNotifiedHealth: "throttling" as const, lastNotifiedAt: NOW - 1000 };
|
|
64
|
+
|
|
65
|
+
// ── transition decision tests ────────────────────────────────────────────────
|
|
66
|
+
|
|
67
|
+
describe("evaluateQuotaWatchAccount — transition table", () => {
|
|
68
|
+
it("healthy → healthy (never notified) skips", () => {
|
|
69
|
+
const d = evaluateQuotaWatchAccount({
|
|
70
|
+
agentName: "lawgpt",
|
|
71
|
+
snap: HEALTHY_SNAP,
|
|
72
|
+
prev: PREV_NEVER_NOTIFIED,
|
|
73
|
+
now: NOW,
|
|
74
|
+
});
|
|
75
|
+
expect(d.kind).toBe("skip");
|
|
76
|
+
if (d.kind !== "skip") return;
|
|
77
|
+
expect(d.reason).toBe("steady-state");
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
it("healthy → healthy (was healthy) skips", () => {
|
|
81
|
+
const d = evaluateQuotaWatchAccount({
|
|
82
|
+
agentName: "lawgpt",
|
|
83
|
+
snap: HEALTHY_SNAP,
|
|
84
|
+
prev: PREV_WAS_HEALTHY,
|
|
85
|
+
now: NOW,
|
|
86
|
+
});
|
|
87
|
+
expect(d.kind).toBe("skip");
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
it("healthy → throttling (5h) fires entered-throttling notification", () => {
|
|
91
|
+
const d = evaluateQuotaWatchAccount({
|
|
92
|
+
agentName: "lawgpt",
|
|
93
|
+
snap: THROTTLING_5H,
|
|
94
|
+
prev: PREV_NEVER_NOTIFIED,
|
|
95
|
+
now: NOW,
|
|
96
|
+
});
|
|
97
|
+
expect(d.kind).toBe("notify");
|
|
98
|
+
if (d.kind !== "notify") return;
|
|
99
|
+
expect(d.transition).toBe("entered-throttling");
|
|
100
|
+
expect(d.newAccountState.lastNotifiedHealth).toBe("throttling");
|
|
101
|
+
expect(d.newAccountState.lastNotifiedAt).toBe(NOW);
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
it("healthy → throttling (7d) fires entered-throttling notification", () => {
|
|
105
|
+
const d = evaluateQuotaWatchAccount({
|
|
106
|
+
agentName: "lawgpt",
|
|
107
|
+
snap: THROTTLING_7D,
|
|
108
|
+
prev: PREV_NEVER_NOTIFIED,
|
|
109
|
+
now: NOW,
|
|
110
|
+
});
|
|
111
|
+
expect(d.kind).toBe("notify");
|
|
112
|
+
if (d.kind !== "notify") return;
|
|
113
|
+
expect(d.transition).toBe("entered-throttling");
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it("throttling → throttling skips (already notified)", () => {
|
|
117
|
+
const d = evaluateQuotaWatchAccount({
|
|
118
|
+
agentName: "lawgpt",
|
|
119
|
+
snap: THROTTLING_5H,
|
|
120
|
+
prev: PREV_WAS_THROTTLING,
|
|
121
|
+
now: NOW,
|
|
122
|
+
});
|
|
123
|
+
expect(d.kind).toBe("skip");
|
|
124
|
+
if (d.kind !== "skip") return;
|
|
125
|
+
expect(d.reason).toBe("steady-state");
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
it("throttling → healthy fires recovered-to-healthy notification", () => {
|
|
129
|
+
const d = evaluateQuotaWatchAccount({
|
|
130
|
+
agentName: "lawgpt",
|
|
131
|
+
snap: HEALTHY_SNAP,
|
|
132
|
+
prev: PREV_WAS_THROTTLING,
|
|
133
|
+
now: NOW,
|
|
134
|
+
});
|
|
135
|
+
expect(d.kind).toBe("notify");
|
|
136
|
+
if (d.kind !== "notify") return;
|
|
137
|
+
expect(d.transition).toBe("recovered-to-healthy");
|
|
138
|
+
expect(d.newAccountState.lastNotifiedHealth).toBe("healthy");
|
|
139
|
+
expect(d.newAccountState.lastNotifiedAt).toBe(NOW);
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
it("* → blocked skips (credits-watch domain)", () => {
|
|
143
|
+
const dFromHealthy = evaluateQuotaWatchAccount({
|
|
144
|
+
agentName: "lawgpt",
|
|
145
|
+
snap: BLOCKED_SNAP,
|
|
146
|
+
prev: PREV_NEVER_NOTIFIED,
|
|
147
|
+
now: NOW,
|
|
148
|
+
});
|
|
149
|
+
expect(dFromHealthy.kind).toBe("skip");
|
|
150
|
+
if (dFromHealthy.kind !== "skip") return;
|
|
151
|
+
expect(dFromHealthy.reason).toBe("blocked-not-our-domain");
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
it("blocked → healthy skips (credits-watch domain)", () => {
|
|
155
|
+
// blocked → healthy: credits-watch handles the blocked recovery path.
|
|
156
|
+
// Our watcher should not fire for it (we never tracked 'blocked').
|
|
157
|
+
const d = evaluateQuotaWatchAccount({
|
|
158
|
+
agentName: "lawgpt",
|
|
159
|
+
snap: HEALTHY_SNAP,
|
|
160
|
+
prev: PREV_NEVER_NOTIFIED, // we were never tracking as throttling
|
|
161
|
+
now: NOW,
|
|
162
|
+
});
|
|
163
|
+
// healthy → healthy from null-prev should skip, not fire
|
|
164
|
+
expect(d.kind).toBe("skip");
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
it("unknown quota snap skips (probe failed)", () => {
|
|
168
|
+
const d = evaluateQuotaWatchAccount({
|
|
169
|
+
agentName: "lawgpt",
|
|
170
|
+
snap: UNKNOWN_SNAP,
|
|
171
|
+
prev: PREV_WAS_THROTTLING,
|
|
172
|
+
now: NOW,
|
|
173
|
+
});
|
|
174
|
+
expect(d.kind).toBe("skip");
|
|
175
|
+
if (d.kind !== "skip") return;
|
|
176
|
+
expect(d.reason).toBe("unknown-not-our-domain");
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
it("no duplicate: two consecutive polls in throttling state produce one notify then skip", () => {
|
|
180
|
+
// First poll: healthy → throttling → notify
|
|
181
|
+
const d1 = evaluateQuotaWatchAccount({
|
|
182
|
+
agentName: "lawgpt",
|
|
183
|
+
snap: THROTTLING_5H,
|
|
184
|
+
prev: PREV_NEVER_NOTIFIED,
|
|
185
|
+
now: NOW,
|
|
186
|
+
});
|
|
187
|
+
expect(d1.kind).toBe("notify");
|
|
188
|
+
if (d1.kind !== "notify") return;
|
|
189
|
+
|
|
190
|
+
// Second poll: throttling → throttling → skip
|
|
191
|
+
const d2 = evaluateQuotaWatchAccount({
|
|
192
|
+
agentName: "lawgpt",
|
|
193
|
+
snap: THROTTLING_5H,
|
|
194
|
+
prev: d1.newAccountState,
|
|
195
|
+
now: NOW + 15 * 60_000,
|
|
196
|
+
});
|
|
197
|
+
expect(d2.kind).toBe("skip");
|
|
198
|
+
});
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
// ── message content tests ────────────────────────────────────────────────────
|
|
202
|
+
|
|
203
|
+
describe("evaluateQuotaWatchAccount — message content", () => {
|
|
204
|
+
it("throttling message contains account label and percentages", () => {
|
|
205
|
+
const d = evaluateQuotaWatchAccount({
|
|
206
|
+
agentName: "lawgpt",
|
|
207
|
+
snap: THROTTLING_5H,
|
|
208
|
+
prev: PREV_NEVER_NOTIFIED,
|
|
209
|
+
now: NOW,
|
|
210
|
+
});
|
|
211
|
+
expect(d.kind).toBe("notify");
|
|
212
|
+
if (d.kind !== "notify") return;
|
|
213
|
+
expect(d.message).toContain("alice@example.com");
|
|
214
|
+
expect(d.message).toContain("85%");
|
|
215
|
+
expect(d.message).toContain("40%");
|
|
216
|
+
expect(d.message).toContain("5-hour");
|
|
217
|
+
});
|
|
218
|
+
|
|
219
|
+
it("recovery message contains account label and percentages", () => {
|
|
220
|
+
const d = evaluateQuotaWatchAccount({
|
|
221
|
+
agentName: "lawgpt",
|
|
222
|
+
snap: HEALTHY_SNAP,
|
|
223
|
+
prev: PREV_WAS_THROTTLING,
|
|
224
|
+
now: NOW,
|
|
225
|
+
});
|
|
226
|
+
expect(d.kind).toBe("notify");
|
|
227
|
+
if (d.kind !== "notify") return;
|
|
228
|
+
expect(d.message).toContain("alice@example.com");
|
|
229
|
+
expect(d.message).toContain("Quota back in healthy range");
|
|
230
|
+
expect(d.message).toContain("30%");
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
it("throttling message HTML-escapes account label", () => {
|
|
234
|
+
const d = evaluateQuotaWatchAccount({
|
|
235
|
+
agentName: "lawgpt",
|
|
236
|
+
snap: makeSnap("<evil>@example.com", makeQuota(85, 40)),
|
|
237
|
+
prev: PREV_NEVER_NOTIFIED,
|
|
238
|
+
now: NOW,
|
|
239
|
+
});
|
|
240
|
+
expect(d.kind).toBe("notify");
|
|
241
|
+
if (d.kind !== "notify") return;
|
|
242
|
+
expect(d.message).toContain("<evil>");
|
|
243
|
+
expect(d.message).not.toContain("<evil>");
|
|
244
|
+
});
|
|
245
|
+
|
|
246
|
+
it("throttling message for active account mentions /auth use", () => {
|
|
247
|
+
const activeSnap = makeSnap("alice@example.com", makeQuota(85, 40), /* isActive */ true);
|
|
248
|
+
const d = evaluateQuotaWatchAccount({
|
|
249
|
+
agentName: "lawgpt",
|
|
250
|
+
snap: activeSnap,
|
|
251
|
+
prev: PREV_NEVER_NOTIFIED,
|
|
252
|
+
now: NOW,
|
|
253
|
+
});
|
|
254
|
+
expect(d.kind).toBe("notify");
|
|
255
|
+
if (d.kind !== "notify") return;
|
|
256
|
+
// Active account message should mention switching
|
|
257
|
+
expect(d.message).toContain("/auth");
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
it("throttling message includes reset time when provided", () => {
|
|
261
|
+
const resetAt = new Date(NOW + 3 * 60 * 60_000); // 3 hours from now
|
|
262
|
+
const d = evaluateQuotaWatchAccount({
|
|
263
|
+
agentName: "lawgpt",
|
|
264
|
+
snap: makeSnap("alice@example.com", makeQuota(85, 40, resetAt)),
|
|
265
|
+
prev: PREV_NEVER_NOTIFIED,
|
|
266
|
+
now: NOW,
|
|
267
|
+
});
|
|
268
|
+
expect(d.kind).toBe("notify");
|
|
269
|
+
if (d.kind !== "notify") return;
|
|
270
|
+
expect(d.message).toContain("refills in");
|
|
271
|
+
});
|
|
272
|
+
});
|
|
273
|
+
|
|
274
|
+
// ── state persistence tests ──────────────────────────────────────────────────
|
|
275
|
+
|
|
276
|
+
describe("loadQuotaWatchState / saveQuotaWatchState — round-trip", () => {
|
|
277
|
+
let tmp: string;
|
|
278
|
+
|
|
279
|
+
beforeEach(() => {
|
|
280
|
+
tmp = mkdtempSync(join(tmpdir(), "quota-watch-"));
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
afterEach(() => {
|
|
284
|
+
rmSync(tmp, { recursive: true, force: true });
|
|
285
|
+
});
|
|
286
|
+
|
|
287
|
+
it("returns emptyQuotaWatchState when no file exists", () => {
|
|
288
|
+
expect(loadQuotaWatchState(tmp)).toEqual(emptyQuotaWatchState());
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
it("round-trips a saved state with multiple accounts", () => {
|
|
292
|
+
const state = {
|
|
293
|
+
"alice@example.com": { lastNotifiedHealth: "throttling" as const, lastNotifiedAt: 1_780_000_000_000 },
|
|
294
|
+
"bob@example.com": { lastNotifiedHealth: null, lastNotifiedAt: 0 },
|
|
295
|
+
};
|
|
296
|
+
saveQuotaWatchState(tmp, state);
|
|
297
|
+
expect(loadQuotaWatchState(tmp)).toEqual(state);
|
|
298
|
+
});
|
|
299
|
+
|
|
300
|
+
it("falls back to empty on malformed JSON", () => {
|
|
301
|
+
mkdirSync(tmp, { recursive: true });
|
|
302
|
+
writeFileSync(join(tmp, "quota-watch.json"), "{broken");
|
|
303
|
+
expect(loadQuotaWatchState(tmp)).toEqual(emptyQuotaWatchState());
|
|
304
|
+
});
|
|
305
|
+
|
|
306
|
+
it("falls back to empty on shape mismatch (not an object)", () => {
|
|
307
|
+
writeFileSync(join(tmp, "quota-watch.json"), JSON.stringify([1, 2, 3]));
|
|
308
|
+
expect(loadQuotaWatchState(tmp)).toEqual(emptyQuotaWatchState());
|
|
309
|
+
});
|
|
310
|
+
|
|
311
|
+
it("drops malformed entries but preserves valid ones", () => {
|
|
312
|
+
writeFileSync(
|
|
313
|
+
join(tmp, "quota-watch.json"),
|
|
314
|
+
JSON.stringify({
|
|
315
|
+
"good@example.com": { lastNotifiedHealth: "throttling", lastNotifiedAt: 1000 },
|
|
316
|
+
"bad@example.com": { lastNotifiedHealth: "invalid", lastNotifiedAt: "not-a-number" },
|
|
317
|
+
}),
|
|
318
|
+
);
|
|
319
|
+
const loaded = loadQuotaWatchState(tmp);
|
|
320
|
+
expect(loaded["good@example.com"]).toEqual({ lastNotifiedHealth: "throttling", lastNotifiedAt: 1000 });
|
|
321
|
+
expect(loaded["bad@example.com"]).toBeUndefined();
|
|
322
|
+
});
|
|
323
|
+
|
|
324
|
+
it("creates the state dir on save (if it doesn't exist yet)", () => {
|
|
325
|
+
const fresh = join(tmp, "fresh-subdir");
|
|
326
|
+
saveQuotaWatchState(fresh, {});
|
|
327
|
+
expect(loadQuotaWatchState(fresh)).toEqual({});
|
|
328
|
+
});
|
|
329
|
+
});
|
|
330
|
+
|
|
331
|
+
// ── patchQuotaWatchState tests ────────────────────────────────────────────────
|
|
332
|
+
|
|
333
|
+
describe("patchQuotaWatchState", () => {
|
|
334
|
+
it("adds a new account entry without clobbering others", () => {
|
|
335
|
+
const current: ReturnType<typeof emptyQuotaWatchState> = {
|
|
336
|
+
"alice@example.com": { lastNotifiedHealth: "throttling", lastNotifiedAt: 1000 },
|
|
337
|
+
};
|
|
338
|
+
const updated = patchQuotaWatchState(
|
|
339
|
+
current,
|
|
340
|
+
"bob@example.com",
|
|
341
|
+
{ lastNotifiedHealth: "healthy", lastNotifiedAt: 2000 },
|
|
342
|
+
);
|
|
343
|
+
expect(updated["alice@example.com"]).toEqual({ lastNotifiedHealth: "throttling", lastNotifiedAt: 1000 });
|
|
344
|
+
expect(updated["bob@example.com"]).toEqual({ lastNotifiedHealth: "healthy", lastNotifiedAt: 2000 });
|
|
345
|
+
});
|
|
346
|
+
|
|
347
|
+
it("updates an existing account entry", () => {
|
|
348
|
+
const current = {
|
|
349
|
+
"alice@example.com": { lastNotifiedHealth: "throttling" as const, lastNotifiedAt: 1000 },
|
|
350
|
+
};
|
|
351
|
+
const updated = patchQuotaWatchState(
|
|
352
|
+
current,
|
|
353
|
+
"alice@example.com",
|
|
354
|
+
{ lastNotifiedHealth: "healthy", lastNotifiedAt: 2000 },
|
|
355
|
+
);
|
|
356
|
+
expect(updated["alice@example.com"]).toEqual({ lastNotifiedHealth: "healthy", lastNotifiedAt: 2000 });
|
|
357
|
+
});
|
|
358
|
+
|
|
359
|
+
it("does not mutate the original state object", () => {
|
|
360
|
+
const current = {
|
|
361
|
+
"alice@example.com": { lastNotifiedHealth: "throttling" as const, lastNotifiedAt: 1000 },
|
|
362
|
+
};
|
|
363
|
+
patchQuotaWatchState(current, "bob@example.com", { lastNotifiedHealth: null, lastNotifiedAt: 0 });
|
|
364
|
+
expect(current["bob@example.com"]).toBeUndefined();
|
|
365
|
+
});
|
|
366
|
+
});
|
|
@@ -6,6 +6,9 @@ import {
|
|
|
6
6
|
registerAndRender,
|
|
7
7
|
verbForTool,
|
|
8
8
|
describeToolUse,
|
|
9
|
+
appendActivityLine,
|
|
10
|
+
renderActivityFeed,
|
|
11
|
+
MIRROR_MAX_LINES,
|
|
9
12
|
} from "../tool-activity-summary.js";
|
|
10
13
|
|
|
11
14
|
describe("describeToolUse — friendly per-tool rendering (draft-mirror)", () => {
|
|
@@ -283,3 +286,45 @@ describe("registerAndRender — ergonomic full-pipeline call", () => {
|
|
|
283
286
|
expect(s.firstToolName).toBeNull();
|
|
284
287
|
});
|
|
285
288
|
});
|
|
289
|
+
|
|
290
|
+
describe("appendActivityLine + renderActivityFeed — accumulating draft feed", () => {
|
|
291
|
+
it("accumulates distinct actions chronologically (newest last)", () => {
|
|
292
|
+
const lines: string[] = [];
|
|
293
|
+
expect(appendActivityLine(lines, "Read", { file_path: "a/gateway.ts" })).toBe(
|
|
294
|
+
"· Reading gateway.ts",
|
|
295
|
+
);
|
|
296
|
+
expect(appendActivityLine(lines, "mcp__hindsight__reflect", { query: "x" })).toBe(
|
|
297
|
+
"· Reading gateway.ts\n· Searching memory",
|
|
298
|
+
);
|
|
299
|
+
expect(appendActivityLine(lines, "Bash", { command: "ls", description: "List workspace" })).toBe(
|
|
300
|
+
"· Reading gateway.ts\n· Searching memory\n· List workspace",
|
|
301
|
+
);
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
it("collapses consecutive exact-duplicate lines", () => {
|
|
305
|
+
const lines: string[] = [];
|
|
306
|
+
appendActivityLine(lines, "Read", { file_path: "a.ts" });
|
|
307
|
+
appendActivityLine(lines, "Read", { file_path: "a.ts" }); // dup → collapsed
|
|
308
|
+
expect(lines).toEqual(["Reading a.ts"]);
|
|
309
|
+
});
|
|
310
|
+
|
|
311
|
+
it("returns null (no feed update) for surface tools", () => {
|
|
312
|
+
const lines: string[] = [];
|
|
313
|
+
expect(appendActivityLine(lines, "mcp__switchroom-telegram__reply", { text: "hi" })).toBeNull();
|
|
314
|
+
expect(lines).toEqual([]);
|
|
315
|
+
});
|
|
316
|
+
|
|
317
|
+
it("caps to the last MIRROR_MAX_LINES with a '+N earlier' header", () => {
|
|
318
|
+
const lines = Array.from({ length: 9 }, (_, i) => `Action ${i + 1}`);
|
|
319
|
+
const out = renderActivityFeed(lines)!;
|
|
320
|
+
expect(out.startsWith("· +3 earlier…\n")).toBe(true);
|
|
321
|
+
// Only the last 6 actions are shown.
|
|
322
|
+
expect(out).toContain("· Action 4");
|
|
323
|
+
expect(out).toContain("· Action 9");
|
|
324
|
+
expect(out).not.toContain("· Action 3\n");
|
|
325
|
+
});
|
|
326
|
+
|
|
327
|
+
it("renderActivityFeed returns null on empty", () => {
|
|
328
|
+
expect(renderActivityFeed([])).toBeNull();
|
|
329
|
+
});
|
|
330
|
+
});
|
|
@@ -17,9 +17,57 @@ import { describe, it, expect } from 'vitest'
|
|
|
17
17
|
import {
|
|
18
18
|
decideTurnFlush,
|
|
19
19
|
isSilentFlushMarker,
|
|
20
|
+
isCompositeSilentNoise,
|
|
20
21
|
isTurnFlushSafetyEnabled,
|
|
21
22
|
} from '../turn-flush-safety.js'
|
|
22
23
|
|
|
24
|
+
describe('isCompositeSilentNoise — Stop-hook re-prompt leak backstop', () => {
|
|
25
|
+
it('suppresses the observed leak "Sent.\\nNO_REPLY\\nNO_REPLY"', () => {
|
|
26
|
+
expect(isCompositeSilentNoise('Sent.\nNO_REPLY\nNO_REPLY')).toBe(true)
|
|
27
|
+
})
|
|
28
|
+
it('suppresses repeated bare markers and marker+confirmation variants', () => {
|
|
29
|
+
expect(isCompositeSilentNoise('NO_REPLY\nNO_REPLY')).toBe(true)
|
|
30
|
+
expect(isCompositeSilentNoise('Done\nNO_REPLY')).toBe(true)
|
|
31
|
+
expect(isCompositeSilentNoise('NO_REPLY\nHEARTBEAT_OK')).toBe(true)
|
|
32
|
+
})
|
|
33
|
+
it('requires at least one real marker — standalone confirmations still flush', () => {
|
|
34
|
+
// Conservative: no NO_REPLY/HEARTBEAT_OK present → NOT suppressed here,
|
|
35
|
+
// so we never silently drop a turn that wasn\'t already signalling silence.
|
|
36
|
+
expect(isCompositeSilentNoise('Sent.')).toBe(false)
|
|
37
|
+
expect(isCompositeSilentNoise('Done.\nOK')).toBe(false)
|
|
38
|
+
})
|
|
39
|
+
it('does NOT suppress real content glued to a marker', () => {
|
|
40
|
+
expect(
|
|
41
|
+
isCompositeSilentNoise('Here is the summary of the page.\nNO_REPLY'),
|
|
42
|
+
).toBe(false)
|
|
43
|
+
expect(isCompositeSilentNoise('NO_REPLY\nThe answer is 42.')).toBe(false)
|
|
44
|
+
})
|
|
45
|
+
it('handles non-strings / empty safely', () => {
|
|
46
|
+
expect(isCompositeSilentNoise(undefined)).toBe(false)
|
|
47
|
+
expect(isCompositeSilentNoise('')).toBe(false)
|
|
48
|
+
expect(isCompositeSilentNoise(' \n ')).toBe(false)
|
|
49
|
+
})
|
|
50
|
+
})
|
|
51
|
+
|
|
52
|
+
describe('decideTurnFlush — composite silent noise is skipped, not leaked', () => {
|
|
53
|
+
it('skips "Sent.\\nNO_REPLY\\nNO_REPLY" (the live clerk/test-harness leak)', () => {
|
|
54
|
+
const d = decideTurnFlush({
|
|
55
|
+
chatId: '12345',
|
|
56
|
+
replyCalled: false,
|
|
57
|
+
capturedText: ['Sent.', 'NO_REPLY', 'NO_REPLY'],
|
|
58
|
+
})
|
|
59
|
+
expect(d).toEqual({ kind: 'skip', reason: 'silent-marker' })
|
|
60
|
+
})
|
|
61
|
+
it('still flushes genuine trailing answer text', () => {
|
|
62
|
+
const d = decideTurnFlush({
|
|
63
|
+
chatId: '12345',
|
|
64
|
+
replyCalled: false,
|
|
65
|
+
capturedText: ['The page summarises three news stories.'],
|
|
66
|
+
})
|
|
67
|
+
expect(d.kind).toBe('flush')
|
|
68
|
+
})
|
|
69
|
+
})
|
|
70
|
+
|
|
23
71
|
describe('decideTurnFlush', () => {
|
|
24
72
|
it('(a) does NOT flush when the reply tool was called', () => {
|
|
25
73
|
const decision = decideTurnFlush({
|
|
@@ -335,3 +335,50 @@ export function describeToolUse(
|
|
|
335
335
|
return "Working…";
|
|
336
336
|
}
|
|
337
337
|
}
|
|
338
|
+
|
|
339
|
+
// ─── Accumulating activity feed (draft-mirror Phase 2) ──────────────────────
|
|
340
|
+
//
|
|
341
|
+
// Phase 1 showed only the latest action; this accumulates the turn's actions
|
|
342
|
+
// into a running feed — like Claude Code's own UI — streamed into the
|
|
343
|
+
// ephemeral draft and cleared on reply. Chronological (oldest first, newest
|
|
344
|
+
// last), consecutive exact-duplicates collapsed, capped to the most recent
|
|
345
|
+
// MIRROR_MAX_LINES with a "+N earlier" header so a heavy turn stays readable
|
|
346
|
+
// inside Telegram's compose-area draft.
|
|
347
|
+
|
|
348
|
+
export const MIRROR_MAX_LINES = 6;
|
|
349
|
+
|
|
350
|
+
/**
|
|
351
|
+
* Append a tool_use's friendly line to the running feed (mutates `lines`)
|
|
352
|
+
* and return the rendered draft body — or null when the tool is a surface
|
|
353
|
+
* tool / produced no line (caller skips the draft update).
|
|
354
|
+
*
|
|
355
|
+
* Dedups only consecutive identical lines (e.g. a burst of parallel Reads of
|
|
356
|
+
* the same file) so distinct actions are all preserved.
|
|
357
|
+
*/
|
|
358
|
+
export function appendActivityLine(
|
|
359
|
+
lines: string[],
|
|
360
|
+
toolName: string,
|
|
361
|
+
input: Record<string, unknown> | undefined,
|
|
362
|
+
): string | null {
|
|
363
|
+
const line = describeToolUse(toolName, input);
|
|
364
|
+
if (line == null) return null;
|
|
365
|
+
if (lines.length === 0 || lines[lines.length - 1] !== line) {
|
|
366
|
+
lines.push(line);
|
|
367
|
+
}
|
|
368
|
+
return renderActivityFeed(lines);
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
/**
|
|
372
|
+
* Render the accumulated feed as a plain-text block (one action per line).
|
|
373
|
+
* The caller HTML-escapes + wraps it for Telegram. Returns null when empty.
|
|
374
|
+
*
|
|
375
|
+
* Newest-last chronological order; capped to the last MIRROR_MAX_LINES with a
|
|
376
|
+
* dim "+N earlier" header when the turn ran longer.
|
|
377
|
+
*/
|
|
378
|
+
export function renderActivityFeed(lines: string[]): string | null {
|
|
379
|
+
if (lines.length === 0) return null;
|
|
380
|
+
const shown = lines.slice(-MIRROR_MAX_LINES);
|
|
381
|
+
const hidden = lines.length - shown.length;
|
|
382
|
+
const body = shown.map((l) => `· ${l}`).join("\n");
|
|
383
|
+
return hidden > 0 ? `· +${hidden} earlier…\n${body}` : body;
|
|
384
|
+
}
|
|
@@ -50,6 +50,48 @@ export function isSilentFlushMarker(text: string | undefined): boolean {
|
|
|
50
50
|
return SILENT_MARKERS.has(trimmed.toUpperCase())
|
|
51
51
|
}
|
|
52
52
|
|
|
53
|
+
// Trivial end-of-turn confirmations the model emits as terminal text after
|
|
54
|
+
// calling reply (e.g. "Sent." once the reply tool returns). On their own
|
|
55
|
+
// they're harmless; the danger is when they're glued to silent markers
|
|
56
|
+
// across Stop-hook re-prompt cycles into a composite blob like
|
|
57
|
+
// "Sent.\nNO_REPLY\nNO_REPLY" — which `isSilentFlushMarker` can't match
|
|
58
|
+
// (multi-line, over the length guard) so it leaks to chat. See
|
|
59
|
+
// `isCompositeSilentNoise`.
|
|
60
|
+
const TRIVIAL_CONFIRMATIONS = new Set(['SENT', 'DONE', 'OK', 'OKAY', 'ACK'])
|
|
61
|
+
|
|
62
|
+
function isTrivialConfirmationLine(line: string): boolean {
|
|
63
|
+
let t = line.trim()
|
|
64
|
+
if (t.length === 0 || t.length > 8) return false
|
|
65
|
+
if (/\W$/.test(t)) t = t.slice(0, -1) // strip a single trailing punct ("Sent.")
|
|
66
|
+
return TRIVIAL_CONFIRMATIONS.has(t.toUpperCase())
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Recognise a multi-line composite that is *entirely* silent noise — every
|
|
71
|
+
* non-empty line is a silent marker (NO_REPLY / HEARTBEAT_OK) or a trivial
|
|
72
|
+
* confirmation ("Sent."), AND at least one line is a real silent marker.
|
|
73
|
+
*
|
|
74
|
+
* Backstop for the Stop-hook re-prompt leak: the model replies cleanly,
|
|
75
|
+
* emits a terminal "Sent.", gets re-prompted by the silent-end Stop hook,
|
|
76
|
+
* answers "NO_REPLY" one or more times, and the accumulated `capturedText`
|
|
77
|
+
* ("Sent.\nNO_REPLY\nNO_REPLY") flushes as a visible message because
|
|
78
|
+
* `isSilentFlushMarker` only matches a single sentinel. Requiring ≥1 hard
|
|
79
|
+
* marker keeps this conservative — a standalone "Sent." (no NO_REPLY) is NOT
|
|
80
|
+
* suppressed here, so we never silently drop a turn that wasn't already
|
|
81
|
+
* signalling "nothing to add".
|
|
82
|
+
*/
|
|
83
|
+
export function isCompositeSilentNoise(text: string | undefined): boolean {
|
|
84
|
+
if (typeof text !== 'string') return false
|
|
85
|
+
const lines = text
|
|
86
|
+
.split('\n')
|
|
87
|
+
.map(l => l.trim())
|
|
88
|
+
.filter(l => l.length > 0)
|
|
89
|
+
if (lines.length === 0) return false
|
|
90
|
+
const hasMarker = lines.some(l => isSilentFlushMarker(l))
|
|
91
|
+
if (!hasMarker) return false
|
|
92
|
+
return lines.every(l => isSilentFlushMarker(l) || isTrivialConfirmationLine(l))
|
|
93
|
+
}
|
|
94
|
+
|
|
53
95
|
export type FlushDecision =
|
|
54
96
|
| { kind: 'flush'; text: string }
|
|
55
97
|
| { kind: 'skip'; reason: FlushSkipReason }
|
|
@@ -115,6 +157,11 @@ export function decideTurnFlush(input: FlushDecisionInput): FlushDecision {
|
|
|
115
157
|
const joined = input.capturedText.join('\n').trim()
|
|
116
158
|
if (joined.length === 0) return { kind: 'skip', reason: 'empty-text' }
|
|
117
159
|
if (isSilentFlushMarker(joined)) return { kind: 'skip', reason: 'silent-marker' }
|
|
160
|
+
// Composite silent noise — e.g. "Sent.\nNO_REPLY\nNO_REPLY" accumulated
|
|
161
|
+
// across Stop-hook re-prompt cycles. The single-sentinel check above
|
|
162
|
+
// misses it (multi-line, over the length guard); without this the blob
|
|
163
|
+
// leaks to chat as a visible message.
|
|
164
|
+
if (isCompositeSilentNoise(joined)) return { kind: 'skip', reason: 'silent-marker' }
|
|
118
165
|
return { kind: 'flush', text: joined }
|
|
119
166
|
}
|
|
120
167
|
|
|
@@ -337,10 +337,10 @@ export async function waitForCardPhase(
|
|
|
337
337
|
/**
|
|
338
338
|
* Detect the progress card's phase from its rendered text.
|
|
339
339
|
*
|
|
340
|
-
* The actual card render
|
|
341
|
-
*
|
|
342
|
-
*
|
|
343
|
-
*
|
|
340
|
+
* The actual card render uses emoji markers in the header: `✅` for
|
|
341
|
+
* done, `❌` for errors, `⚙️` while working (foreground), `🌀` for
|
|
342
|
+
* Background (parent done but fleet still running, see #862 /
|
|
343
|
+
* reference/conversational-pacing.md),
|
|
344
344
|
* and `⏳` during the boot-card window. These markers are stable
|
|
345
345
|
* enough to key on for UAT — finer parsing (checklist items,
|
|
346
346
|
* sub-agent row content) is out of scope.
|