switchroom 0.13.65 → 0.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-scheduler/index.js +80 -80
- package/dist/auth-broker/index.js +96 -81
- package/dist/cli/drive-write-pretool.mjs +10 -10
- package/dist/cli/notion-write-pretool.mjs +82 -82
- package/dist/cli/skill-validate-pretool.mjs +72 -72
- package/dist/cli/switchroom.js +1883 -1479
- package/dist/host-control/main.js +149 -149
- package/dist/vault/approvals/kernel-server.js +82 -82
- package/dist/vault/broker/server.js +83 -83
- package/package.json +1 -1
- package/profiles/_shared/telegram-style.md.hbs +1 -1
- package/telegram-plugin/auth-snapshot-format.ts +47 -1
- package/telegram-plugin/dist/bridge/bridge.js +112 -112
- package/telegram-plugin/dist/gateway/gateway.js +1226 -696
- package/telegram-plugin/dist/server.js +160 -160
- package/telegram-plugin/gateway/boot-card.ts +100 -0
- package/telegram-plugin/gateway/config-snapshot.ts +274 -0
- package/telegram-plugin/gateway/gateway.ts +256 -36
- package/telegram-plugin/operator-events.ts +2 -10
- package/telegram-plugin/quota-watch.ts +276 -0
- package/telegram-plugin/tests/auth-snapshot-format.test.ts +133 -1
- package/telegram-plugin/tests/boot-card-render.test.ts +93 -0
- package/telegram-plugin/tests/config-snapshot.test.ts +409 -0
- package/telegram-plugin/tests/operator-events.test.ts +12 -6
- package/telegram-plugin/tests/quota-watch.test.ts +366 -0
- package/telegram-plugin/tests/tool-activity-summary.test.ts +66 -0
- package/telegram-plugin/tests/turn-flush-safety.test.ts +48 -0
- package/telegram-plugin/tool-activity-summary.ts +137 -0
- package/telegram-plugin/turn-flush-safety.ts +47 -0
- package/telegram-plugin/uat/assertions.ts +4 -4
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for the proactive quota threshold-tier push helper (#E4).
|
|
3
|
+
* Mirrors the shape of credits-watch.test.ts. Covers:
|
|
4
|
+
* - Pure decision logic across all transition-table cases
|
|
5
|
+
* - State persistence round-trip
|
|
6
|
+
* - Message body content sanity
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
|
10
|
+
import { mkdtempSync, rmSync, writeFileSync, mkdirSync } from "fs";
|
|
11
|
+
import { tmpdir } from "os";
|
|
12
|
+
import { join } from "path";
|
|
13
|
+
import {
|
|
14
|
+
evaluateQuotaWatchAccount,
|
|
15
|
+
loadQuotaWatchState,
|
|
16
|
+
saveQuotaWatchState,
|
|
17
|
+
patchQuotaWatchState,
|
|
18
|
+
emptyQuotaWatchState,
|
|
19
|
+
emptyAccountState,
|
|
20
|
+
} from "../quota-watch.js";
|
|
21
|
+
import type { AccountSnapshot } from "../auth-snapshot-format.js";
|
|
22
|
+
import type { QuotaUtilization } from "../quota-check.js";
|
|
23
|
+
|
|
24
|
+
// ── test fixtures ────────────────────────────────────────────────────────────
|
|
25
|
+
|
|
26
|
+
const NOW = 1_780_000_000_000;
|
|
27
|
+
|
|
28
|
+
/** Build a minimal QuotaUtilization at given utilization percentages. */
|
|
29
|
+
function makeQuota(
|
|
30
|
+
fivePct: number,
|
|
31
|
+
sevenPct: number,
|
|
32
|
+
fiveHourResetAt?: Date,
|
|
33
|
+
sevenDayResetAt?: Date,
|
|
34
|
+
): QuotaUtilization {
|
|
35
|
+
return {
|
|
36
|
+
fiveHourUtilizationPct: fivePct,
|
|
37
|
+
sevenDayUtilizationPct: sevenPct,
|
|
38
|
+
fiveHourResetAt: fiveHourResetAt ?? null,
|
|
39
|
+
sevenDayResetAt: sevenDayResetAt ?? null,
|
|
40
|
+
representativeClaim: null,
|
|
41
|
+
overageStatus: null,
|
|
42
|
+
overageDisabledReason: null,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/** Build an AccountSnapshot with given quota. */
|
|
47
|
+
function makeSnap(
|
|
48
|
+
label: string,
|
|
49
|
+
quota: QuotaUtilization | null,
|
|
50
|
+
isActive = false,
|
|
51
|
+
): AccountSnapshot {
|
|
52
|
+
return { label, isActive, quota };
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const HEALTHY_SNAP = makeSnap("alice@example.com", makeQuota(30, 40));
|
|
56
|
+
const THROTTLING_5H = makeSnap("alice@example.com", makeQuota(85, 40));
|
|
57
|
+
const THROTTLING_7D = makeSnap("alice@example.com", makeQuota(40, 90));
|
|
58
|
+
const BLOCKED_SNAP = makeSnap("alice@example.com", makeQuota(99.9, 99.9));
|
|
59
|
+
const UNKNOWN_SNAP = makeSnap("alice@example.com", null);
|
|
60
|
+
|
|
61
|
+
const PREV_NEVER_NOTIFIED = emptyAccountState(); // lastNotifiedHealth: null
|
|
62
|
+
const PREV_WAS_HEALTHY = { lastNotifiedHealth: "healthy" as const, lastNotifiedAt: NOW - 1000 };
|
|
63
|
+
const PREV_WAS_THROTTLING = { lastNotifiedHealth: "throttling" as const, lastNotifiedAt: NOW - 1000 };
|
|
64
|
+
|
|
65
|
+
// ── transition decision tests ────────────────────────────────────────────────
|
|
66
|
+
|
|
67
|
+
describe("evaluateQuotaWatchAccount — transition table", () => {
|
|
68
|
+
it("healthy → healthy (never notified) skips", () => {
|
|
69
|
+
const d = evaluateQuotaWatchAccount({
|
|
70
|
+
agentName: "lawgpt",
|
|
71
|
+
snap: HEALTHY_SNAP,
|
|
72
|
+
prev: PREV_NEVER_NOTIFIED,
|
|
73
|
+
now: NOW,
|
|
74
|
+
});
|
|
75
|
+
expect(d.kind).toBe("skip");
|
|
76
|
+
if (d.kind !== "skip") return;
|
|
77
|
+
expect(d.reason).toBe("steady-state");
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
it("healthy → healthy (was healthy) skips", () => {
|
|
81
|
+
const d = evaluateQuotaWatchAccount({
|
|
82
|
+
agentName: "lawgpt",
|
|
83
|
+
snap: HEALTHY_SNAP,
|
|
84
|
+
prev: PREV_WAS_HEALTHY,
|
|
85
|
+
now: NOW,
|
|
86
|
+
});
|
|
87
|
+
expect(d.kind).toBe("skip");
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
it("healthy → throttling (5h) fires entered-throttling notification", () => {
|
|
91
|
+
const d = evaluateQuotaWatchAccount({
|
|
92
|
+
agentName: "lawgpt",
|
|
93
|
+
snap: THROTTLING_5H,
|
|
94
|
+
prev: PREV_NEVER_NOTIFIED,
|
|
95
|
+
now: NOW,
|
|
96
|
+
});
|
|
97
|
+
expect(d.kind).toBe("notify");
|
|
98
|
+
if (d.kind !== "notify") return;
|
|
99
|
+
expect(d.transition).toBe("entered-throttling");
|
|
100
|
+
expect(d.newAccountState.lastNotifiedHealth).toBe("throttling");
|
|
101
|
+
expect(d.newAccountState.lastNotifiedAt).toBe(NOW);
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
it("healthy → throttling (7d) fires entered-throttling notification", () => {
|
|
105
|
+
const d = evaluateQuotaWatchAccount({
|
|
106
|
+
agentName: "lawgpt",
|
|
107
|
+
snap: THROTTLING_7D,
|
|
108
|
+
prev: PREV_NEVER_NOTIFIED,
|
|
109
|
+
now: NOW,
|
|
110
|
+
});
|
|
111
|
+
expect(d.kind).toBe("notify");
|
|
112
|
+
if (d.kind !== "notify") return;
|
|
113
|
+
expect(d.transition).toBe("entered-throttling");
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it("throttling → throttling skips (already notified)", () => {
|
|
117
|
+
const d = evaluateQuotaWatchAccount({
|
|
118
|
+
agentName: "lawgpt",
|
|
119
|
+
snap: THROTTLING_5H,
|
|
120
|
+
prev: PREV_WAS_THROTTLING,
|
|
121
|
+
now: NOW,
|
|
122
|
+
});
|
|
123
|
+
expect(d.kind).toBe("skip");
|
|
124
|
+
if (d.kind !== "skip") return;
|
|
125
|
+
expect(d.reason).toBe("steady-state");
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
it("throttling → healthy fires recovered-to-healthy notification", () => {
|
|
129
|
+
const d = evaluateQuotaWatchAccount({
|
|
130
|
+
agentName: "lawgpt",
|
|
131
|
+
snap: HEALTHY_SNAP,
|
|
132
|
+
prev: PREV_WAS_THROTTLING,
|
|
133
|
+
now: NOW,
|
|
134
|
+
});
|
|
135
|
+
expect(d.kind).toBe("notify");
|
|
136
|
+
if (d.kind !== "notify") return;
|
|
137
|
+
expect(d.transition).toBe("recovered-to-healthy");
|
|
138
|
+
expect(d.newAccountState.lastNotifiedHealth).toBe("healthy");
|
|
139
|
+
expect(d.newAccountState.lastNotifiedAt).toBe(NOW);
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
it("* → blocked skips (credits-watch domain)", () => {
|
|
143
|
+
const dFromHealthy = evaluateQuotaWatchAccount({
|
|
144
|
+
agentName: "lawgpt",
|
|
145
|
+
snap: BLOCKED_SNAP,
|
|
146
|
+
prev: PREV_NEVER_NOTIFIED,
|
|
147
|
+
now: NOW,
|
|
148
|
+
});
|
|
149
|
+
expect(dFromHealthy.kind).toBe("skip");
|
|
150
|
+
if (dFromHealthy.kind !== "skip") return;
|
|
151
|
+
expect(dFromHealthy.reason).toBe("blocked-not-our-domain");
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
it("blocked → healthy skips (credits-watch domain)", () => {
|
|
155
|
+
// blocked → healthy: credits-watch handles the blocked recovery path.
|
|
156
|
+
// Our watcher should not fire for it (we never tracked 'blocked').
|
|
157
|
+
const d = evaluateQuotaWatchAccount({
|
|
158
|
+
agentName: "lawgpt",
|
|
159
|
+
snap: HEALTHY_SNAP,
|
|
160
|
+
prev: PREV_NEVER_NOTIFIED, // we were never tracking as throttling
|
|
161
|
+
now: NOW,
|
|
162
|
+
});
|
|
163
|
+
// healthy → healthy from null-prev should skip, not fire
|
|
164
|
+
expect(d.kind).toBe("skip");
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
it("unknown quota snap skips (probe failed)", () => {
|
|
168
|
+
const d = evaluateQuotaWatchAccount({
|
|
169
|
+
agentName: "lawgpt",
|
|
170
|
+
snap: UNKNOWN_SNAP,
|
|
171
|
+
prev: PREV_WAS_THROTTLING,
|
|
172
|
+
now: NOW,
|
|
173
|
+
});
|
|
174
|
+
expect(d.kind).toBe("skip");
|
|
175
|
+
if (d.kind !== "skip") return;
|
|
176
|
+
expect(d.reason).toBe("unknown-not-our-domain");
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
it("no duplicate: two consecutive polls in throttling state produce one notify then skip", () => {
|
|
180
|
+
// First poll: healthy → throttling → notify
|
|
181
|
+
const d1 = evaluateQuotaWatchAccount({
|
|
182
|
+
agentName: "lawgpt",
|
|
183
|
+
snap: THROTTLING_5H,
|
|
184
|
+
prev: PREV_NEVER_NOTIFIED,
|
|
185
|
+
now: NOW,
|
|
186
|
+
});
|
|
187
|
+
expect(d1.kind).toBe("notify");
|
|
188
|
+
if (d1.kind !== "notify") return;
|
|
189
|
+
|
|
190
|
+
// Second poll: throttling → throttling → skip
|
|
191
|
+
const d2 = evaluateQuotaWatchAccount({
|
|
192
|
+
agentName: "lawgpt",
|
|
193
|
+
snap: THROTTLING_5H,
|
|
194
|
+
prev: d1.newAccountState,
|
|
195
|
+
now: NOW + 15 * 60_000,
|
|
196
|
+
});
|
|
197
|
+
expect(d2.kind).toBe("skip");
|
|
198
|
+
});
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
// ── message content tests ────────────────────────────────────────────────────
|
|
202
|
+
|
|
203
|
+
describe("evaluateQuotaWatchAccount — message content", () => {
|
|
204
|
+
it("throttling message contains account label and percentages", () => {
|
|
205
|
+
const d = evaluateQuotaWatchAccount({
|
|
206
|
+
agentName: "lawgpt",
|
|
207
|
+
snap: THROTTLING_5H,
|
|
208
|
+
prev: PREV_NEVER_NOTIFIED,
|
|
209
|
+
now: NOW,
|
|
210
|
+
});
|
|
211
|
+
expect(d.kind).toBe("notify");
|
|
212
|
+
if (d.kind !== "notify") return;
|
|
213
|
+
expect(d.message).toContain("alice@example.com");
|
|
214
|
+
expect(d.message).toContain("85%");
|
|
215
|
+
expect(d.message).toContain("40%");
|
|
216
|
+
expect(d.message).toContain("5-hour");
|
|
217
|
+
});
|
|
218
|
+
|
|
219
|
+
it("recovery message contains account label and percentages", () => {
|
|
220
|
+
const d = evaluateQuotaWatchAccount({
|
|
221
|
+
agentName: "lawgpt",
|
|
222
|
+
snap: HEALTHY_SNAP,
|
|
223
|
+
prev: PREV_WAS_THROTTLING,
|
|
224
|
+
now: NOW,
|
|
225
|
+
});
|
|
226
|
+
expect(d.kind).toBe("notify");
|
|
227
|
+
if (d.kind !== "notify") return;
|
|
228
|
+
expect(d.message).toContain("alice@example.com");
|
|
229
|
+
expect(d.message).toContain("Quota back in healthy range");
|
|
230
|
+
expect(d.message).toContain("30%");
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
it("throttling message HTML-escapes account label", () => {
|
|
234
|
+
const d = evaluateQuotaWatchAccount({
|
|
235
|
+
agentName: "lawgpt",
|
|
236
|
+
snap: makeSnap("<evil>@example.com", makeQuota(85, 40)),
|
|
237
|
+
prev: PREV_NEVER_NOTIFIED,
|
|
238
|
+
now: NOW,
|
|
239
|
+
});
|
|
240
|
+
expect(d.kind).toBe("notify");
|
|
241
|
+
if (d.kind !== "notify") return;
|
|
242
|
+
expect(d.message).toContain("<evil>");
|
|
243
|
+
expect(d.message).not.toContain("<evil>");
|
|
244
|
+
});
|
|
245
|
+
|
|
246
|
+
it("throttling message for active account mentions /auth use", () => {
|
|
247
|
+
const activeSnap = makeSnap("alice@example.com", makeQuota(85, 40), /* isActive */ true);
|
|
248
|
+
const d = evaluateQuotaWatchAccount({
|
|
249
|
+
agentName: "lawgpt",
|
|
250
|
+
snap: activeSnap,
|
|
251
|
+
prev: PREV_NEVER_NOTIFIED,
|
|
252
|
+
now: NOW,
|
|
253
|
+
});
|
|
254
|
+
expect(d.kind).toBe("notify");
|
|
255
|
+
if (d.kind !== "notify") return;
|
|
256
|
+
// Active account message should mention switching
|
|
257
|
+
expect(d.message).toContain("/auth");
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
it("throttling message includes reset time when provided", () => {
|
|
261
|
+
const resetAt = new Date(NOW + 3 * 60 * 60_000); // 3 hours from now
|
|
262
|
+
const d = evaluateQuotaWatchAccount({
|
|
263
|
+
agentName: "lawgpt",
|
|
264
|
+
snap: makeSnap("alice@example.com", makeQuota(85, 40, resetAt)),
|
|
265
|
+
prev: PREV_NEVER_NOTIFIED,
|
|
266
|
+
now: NOW,
|
|
267
|
+
});
|
|
268
|
+
expect(d.kind).toBe("notify");
|
|
269
|
+
if (d.kind !== "notify") return;
|
|
270
|
+
expect(d.message).toContain("refills in");
|
|
271
|
+
});
|
|
272
|
+
});
|
|
273
|
+
|
|
274
|
+
// ── state persistence tests ──────────────────────────────────────────────────
|
|
275
|
+
|
|
276
|
+
describe("loadQuotaWatchState / saveQuotaWatchState — round-trip", () => {
|
|
277
|
+
let tmp: string;
|
|
278
|
+
|
|
279
|
+
beforeEach(() => {
|
|
280
|
+
tmp = mkdtempSync(join(tmpdir(), "quota-watch-"));
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
afterEach(() => {
|
|
284
|
+
rmSync(tmp, { recursive: true, force: true });
|
|
285
|
+
});
|
|
286
|
+
|
|
287
|
+
it("returns emptyQuotaWatchState when no file exists", () => {
|
|
288
|
+
expect(loadQuotaWatchState(tmp)).toEqual(emptyQuotaWatchState());
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
it("round-trips a saved state with multiple accounts", () => {
|
|
292
|
+
const state = {
|
|
293
|
+
"alice@example.com": { lastNotifiedHealth: "throttling" as const, lastNotifiedAt: 1_780_000_000_000 },
|
|
294
|
+
"bob@example.com": { lastNotifiedHealth: null, lastNotifiedAt: 0 },
|
|
295
|
+
};
|
|
296
|
+
saveQuotaWatchState(tmp, state);
|
|
297
|
+
expect(loadQuotaWatchState(tmp)).toEqual(state);
|
|
298
|
+
});
|
|
299
|
+
|
|
300
|
+
it("falls back to empty on malformed JSON", () => {
|
|
301
|
+
mkdirSync(tmp, { recursive: true });
|
|
302
|
+
writeFileSync(join(tmp, "quota-watch.json"), "{broken");
|
|
303
|
+
expect(loadQuotaWatchState(tmp)).toEqual(emptyQuotaWatchState());
|
|
304
|
+
});
|
|
305
|
+
|
|
306
|
+
it("falls back to empty on shape mismatch (not an object)", () => {
|
|
307
|
+
writeFileSync(join(tmp, "quota-watch.json"), JSON.stringify([1, 2, 3]));
|
|
308
|
+
expect(loadQuotaWatchState(tmp)).toEqual(emptyQuotaWatchState());
|
|
309
|
+
});
|
|
310
|
+
|
|
311
|
+
it("drops malformed entries but preserves valid ones", () => {
|
|
312
|
+
writeFileSync(
|
|
313
|
+
join(tmp, "quota-watch.json"),
|
|
314
|
+
JSON.stringify({
|
|
315
|
+
"good@example.com": { lastNotifiedHealth: "throttling", lastNotifiedAt: 1000 },
|
|
316
|
+
"bad@example.com": { lastNotifiedHealth: "invalid", lastNotifiedAt: "not-a-number" },
|
|
317
|
+
}),
|
|
318
|
+
);
|
|
319
|
+
const loaded = loadQuotaWatchState(tmp);
|
|
320
|
+
expect(loaded["good@example.com"]).toEqual({ lastNotifiedHealth: "throttling", lastNotifiedAt: 1000 });
|
|
321
|
+
expect(loaded["bad@example.com"]).toBeUndefined();
|
|
322
|
+
});
|
|
323
|
+
|
|
324
|
+
it("creates the state dir on save (if it doesn't exist yet)", () => {
|
|
325
|
+
const fresh = join(tmp, "fresh-subdir");
|
|
326
|
+
saveQuotaWatchState(fresh, {});
|
|
327
|
+
expect(loadQuotaWatchState(fresh)).toEqual({});
|
|
328
|
+
});
|
|
329
|
+
});
|
|
330
|
+
|
|
331
|
+
// ── patchQuotaWatchState tests ────────────────────────────────────────────────
|
|
332
|
+
|
|
333
|
+
describe("patchQuotaWatchState", () => {
|
|
334
|
+
it("adds a new account entry without clobbering others", () => {
|
|
335
|
+
const current: ReturnType<typeof emptyQuotaWatchState> = {
|
|
336
|
+
"alice@example.com": { lastNotifiedHealth: "throttling", lastNotifiedAt: 1000 },
|
|
337
|
+
};
|
|
338
|
+
const updated = patchQuotaWatchState(
|
|
339
|
+
current,
|
|
340
|
+
"bob@example.com",
|
|
341
|
+
{ lastNotifiedHealth: "healthy", lastNotifiedAt: 2000 },
|
|
342
|
+
);
|
|
343
|
+
expect(updated["alice@example.com"]).toEqual({ lastNotifiedHealth: "throttling", lastNotifiedAt: 1000 });
|
|
344
|
+
expect(updated["bob@example.com"]).toEqual({ lastNotifiedHealth: "healthy", lastNotifiedAt: 2000 });
|
|
345
|
+
});
|
|
346
|
+
|
|
347
|
+
it("updates an existing account entry", () => {
|
|
348
|
+
const current = {
|
|
349
|
+
"alice@example.com": { lastNotifiedHealth: "throttling" as const, lastNotifiedAt: 1000 },
|
|
350
|
+
};
|
|
351
|
+
const updated = patchQuotaWatchState(
|
|
352
|
+
current,
|
|
353
|
+
"alice@example.com",
|
|
354
|
+
{ lastNotifiedHealth: "healthy", lastNotifiedAt: 2000 },
|
|
355
|
+
);
|
|
356
|
+
expect(updated["alice@example.com"]).toEqual({ lastNotifiedHealth: "healthy", lastNotifiedAt: 2000 });
|
|
357
|
+
});
|
|
358
|
+
|
|
359
|
+
it("does not mutate the original state object", () => {
|
|
360
|
+
const current = {
|
|
361
|
+
"alice@example.com": { lastNotifiedHealth: "throttling" as const, lastNotifiedAt: 1000 },
|
|
362
|
+
};
|
|
363
|
+
patchQuotaWatchState(current, "bob@example.com", { lastNotifiedHealth: null, lastNotifiedAt: 0 });
|
|
364
|
+
expect(current["bob@example.com"]).toBeUndefined();
|
|
365
|
+
});
|
|
366
|
+
});
|
|
@@ -5,8 +5,74 @@ import {
|
|
|
5
5
|
formatSummary,
|
|
6
6
|
registerAndRender,
|
|
7
7
|
verbForTool,
|
|
8
|
+
describeToolUse,
|
|
8
9
|
} from "../tool-activity-summary.js";
|
|
9
10
|
|
|
11
|
+
describe("describeToolUse — friendly per-tool rendering (draft-mirror)", () => {
|
|
12
|
+
it("Bash uses the model-authored description verbatim, never the command", () => {
|
|
13
|
+
expect(
|
|
14
|
+
describeToolUse("Bash", { command: "ls -la /tmp", description: "List workspace" }),
|
|
15
|
+
).toBe("List workspace");
|
|
16
|
+
// No description → safe generic, still never the raw command.
|
|
17
|
+
expect(describeToolUse("Bash", { command: "grep -r foo ." })).toBe("Running a command");
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
it("Read/Edit/Write render the file basename, not the full path", () => {
|
|
21
|
+
expect(describeToolUse("Read", { file_path: "/home/ken/code/switchroom/gateway.ts" })).toBe(
|
|
22
|
+
"Reading gateway.ts",
|
|
23
|
+
);
|
|
24
|
+
expect(describeToolUse("Edit", { file_path: "/a/b/CLAUDE.md" })).toBe("Editing CLAUDE.md");
|
|
25
|
+
expect(describeToolUse("Write", { file_path: "notes.txt" })).toBe("Writing notes.txt");
|
|
26
|
+
expect(describeToolUse("Read", {})).toBe("Reading a file");
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
it("Grep/Glob show the pattern; WebFetch shows the hostname", () => {
|
|
30
|
+
expect(describeToolUse("Grep", { pattern: "TODO" })).toBe("Searching for TODO");
|
|
31
|
+
expect(describeToolUse("WebFetch", { url: "https://www.example.com/path?q=1" })).toBe(
|
|
32
|
+
"Reading example.com",
|
|
33
|
+
);
|
|
34
|
+
expect(describeToolUse("WebSearch", { query: "best running shoes" })).toBe(
|
|
35
|
+
"Searching the web for best running shoes",
|
|
36
|
+
);
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
it("Task/Agent surface the sub-agent task description", () => {
|
|
40
|
+
expect(describeToolUse("Task", { description: "Review the migration" })).toBe(
|
|
41
|
+
"Delegating: Review the migration",
|
|
42
|
+
);
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it("domain MCP tools render human-meaningful labels (no jargon)", () => {
|
|
46
|
+
expect(describeToolUse("mcp__hindsight__reflect", { query: "x" })).toBe("Searching memory");
|
|
47
|
+
expect(describeToolUse("mcp__hindsight__retain", {})).toBe("Saving to memory");
|
|
48
|
+
expect(describeToolUse("mcp__claude_ai_Google_Calendar__list_events", {})).toBe(
|
|
49
|
+
"Checking your calendar",
|
|
50
|
+
);
|
|
51
|
+
expect(describeToolUse("mcp__claude_ai_Gmail__search", {})).toBe("Checking your email");
|
|
52
|
+
expect(describeToolUse("mcp__claude_ai_Google_Drive__search_files", {})).toBe(
|
|
53
|
+
"Looking through your files",
|
|
54
|
+
);
|
|
55
|
+
expect(describeToolUse("mcp__claude_ai_Notion__notion-search", {})).toBe("Checking your notes");
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
it("surface tools (reply/stream_reply) return null — never mirrored", () => {
|
|
59
|
+
expect(describeToolUse("mcp__switchroom-telegram__reply", { text: "hi" })).toBeNull();
|
|
60
|
+
expect(describeToolUse("mcp__switchroom-telegram__stream_reply", {})).toBeNull();
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
it("unknown MCP tool prefers a model-authored field, else humanizes the name", () => {
|
|
64
|
+
expect(describeToolUse("mcp__acme__do_thing", { description: "Fetched the report" })).toBe(
|
|
65
|
+
"Fetched the report",
|
|
66
|
+
);
|
|
67
|
+
expect(describeToolUse("mcp__acme__do_thing", {})).toBe("Using do thing");
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it("unknown built-in falls back to a generic working line, never raw syntax", () => {
|
|
71
|
+
expect(describeToolUse("SomeFutureTool", {})).toBe("Working…");
|
|
72
|
+
expect(describeToolUse("", {})).toBeNull();
|
|
73
|
+
});
|
|
74
|
+
});
|
|
75
|
+
|
|
10
76
|
describe("verbForTool — tool name → past-tense verb", () => {
|
|
11
77
|
it("maps standard CLI tools to readable verbs", () => {
|
|
12
78
|
expect(verbForTool("Read")).toBe("read");
|
|
@@ -17,9 +17,57 @@ import { describe, it, expect } from 'vitest'
|
|
|
17
17
|
import {
|
|
18
18
|
decideTurnFlush,
|
|
19
19
|
isSilentFlushMarker,
|
|
20
|
+
isCompositeSilentNoise,
|
|
20
21
|
isTurnFlushSafetyEnabled,
|
|
21
22
|
} from '../turn-flush-safety.js'
|
|
22
23
|
|
|
24
|
+
describe('isCompositeSilentNoise — Stop-hook re-prompt leak backstop', () => {
|
|
25
|
+
it('suppresses the observed leak "Sent.\\nNO_REPLY\\nNO_REPLY"', () => {
|
|
26
|
+
expect(isCompositeSilentNoise('Sent.\nNO_REPLY\nNO_REPLY')).toBe(true)
|
|
27
|
+
})
|
|
28
|
+
it('suppresses repeated bare markers and marker+confirmation variants', () => {
|
|
29
|
+
expect(isCompositeSilentNoise('NO_REPLY\nNO_REPLY')).toBe(true)
|
|
30
|
+
expect(isCompositeSilentNoise('Done\nNO_REPLY')).toBe(true)
|
|
31
|
+
expect(isCompositeSilentNoise('NO_REPLY\nHEARTBEAT_OK')).toBe(true)
|
|
32
|
+
})
|
|
33
|
+
it('requires at least one real marker — standalone confirmations still flush', () => {
|
|
34
|
+
// Conservative: no NO_REPLY/HEARTBEAT_OK present → NOT suppressed here,
|
|
35
|
+
// so we never silently drop a turn that wasn\'t already signalling silence.
|
|
36
|
+
expect(isCompositeSilentNoise('Sent.')).toBe(false)
|
|
37
|
+
expect(isCompositeSilentNoise('Done.\nOK')).toBe(false)
|
|
38
|
+
})
|
|
39
|
+
it('does NOT suppress real content glued to a marker', () => {
|
|
40
|
+
expect(
|
|
41
|
+
isCompositeSilentNoise('Here is the summary of the page.\nNO_REPLY'),
|
|
42
|
+
).toBe(false)
|
|
43
|
+
expect(isCompositeSilentNoise('NO_REPLY\nThe answer is 42.')).toBe(false)
|
|
44
|
+
})
|
|
45
|
+
it('handles non-strings / empty safely', () => {
|
|
46
|
+
expect(isCompositeSilentNoise(undefined)).toBe(false)
|
|
47
|
+
expect(isCompositeSilentNoise('')).toBe(false)
|
|
48
|
+
expect(isCompositeSilentNoise(' \n ')).toBe(false)
|
|
49
|
+
})
|
|
50
|
+
})
|
|
51
|
+
|
|
52
|
+
describe('decideTurnFlush — composite silent noise is skipped, not leaked', () => {
|
|
53
|
+
it('skips "Sent.\\nNO_REPLY\\nNO_REPLY" (the live clerk/test-harness leak)', () => {
|
|
54
|
+
const d = decideTurnFlush({
|
|
55
|
+
chatId: '12345',
|
|
56
|
+
replyCalled: false,
|
|
57
|
+
capturedText: ['Sent.', 'NO_REPLY', 'NO_REPLY'],
|
|
58
|
+
})
|
|
59
|
+
expect(d).toEqual({ kind: 'skip', reason: 'silent-marker' })
|
|
60
|
+
})
|
|
61
|
+
it('still flushes genuine trailing answer text', () => {
|
|
62
|
+
const d = decideTurnFlush({
|
|
63
|
+
chatId: '12345',
|
|
64
|
+
replyCalled: false,
|
|
65
|
+
capturedText: ['The page summarises three news stories.'],
|
|
66
|
+
})
|
|
67
|
+
expect(d.kind).toBe('flush')
|
|
68
|
+
})
|
|
69
|
+
})
|
|
70
|
+
|
|
23
71
|
describe('decideTurnFlush', () => {
|
|
24
72
|
it('(a) does NOT flush when the reply tool was called', () => {
|
|
25
73
|
const decision = decideTurnFlush({
|
|
@@ -198,3 +198,140 @@ export function registerAndRender(
|
|
|
198
198
|
if (!changed) return null;
|
|
199
199
|
return formatSummary(state);
|
|
200
200
|
}
|
|
201
|
+
|
|
202
|
+
// ─── Friendly per-tool rendering (draft-mirror, RFC draft-mirror-preview) ───
|
|
203
|
+
//
|
|
204
|
+
// Claude Code's own UI reads human-friendly because the model AUTHORS the
|
|
205
|
+
// descriptive text inside each tool_use.input — verified against a real
|
|
206
|
+
// session JSONL (1360 Bash calls etc.):
|
|
207
|
+
// Bash → input.description ("Get CLAUDE.md size and recent history")
|
|
208
|
+
// Read → input.file_path (basename → "Reading CLAUDE.md")
|
|
209
|
+
// Edit/Write → input.file_path (basename)
|
|
210
|
+
// Grep/Glob → input.pattern
|
|
211
|
+
// Task/Agent → input.description (the sub-agent's task)
|
|
212
|
+
// WebFetch → input.url (hostname → "Reading example.com")
|
|
213
|
+
// hindsight → friendly label ("Searching memory")
|
|
214
|
+
// There is never a raw `grep`/`jq`/`ls` to surface — only the model's own
|
|
215
|
+
// plain-English description or a domain label. This is the signal the
|
|
216
|
+
// draft-mirror renders (option A: uniform across code + non-code agents).
|
|
217
|
+
|
|
218
|
+
/** Strip a path to its basename for display. */
|
|
219
|
+
function baseName(p: unknown): string | null {
|
|
220
|
+
if (typeof p !== "string" || p.length === 0) return null;
|
|
221
|
+
const parts = p.split("/").filter(Boolean);
|
|
222
|
+
return parts.length > 0 ? parts[parts.length - 1] : p;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/** Extract a bare hostname from a URL for display (no scheme/path). */
|
|
226
|
+
function hostName(u: unknown): string | null {
|
|
227
|
+
if (typeof u !== "string" || u.length === 0) return null;
|
|
228
|
+
try {
|
|
229
|
+
return new URL(u).hostname.replace(/^www\./, "");
|
|
230
|
+
} catch {
|
|
231
|
+
return u.replace(/^https?:\/\//, "").split("/")[0] || null;
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
function clip(s: unknown, n: number): string | null {
|
|
236
|
+
if (typeof s !== "string") return null;
|
|
237
|
+
const t = s.trim();
|
|
238
|
+
if (t.length === 0) return null;
|
|
239
|
+
return t.length > n ? t.slice(0, n - 1) + "…" : t;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Render a single tool_use into a human-friendly, present-tense activity
|
|
244
|
+
* line for the live draft preview — or null when the tool should NOT be
|
|
245
|
+
* surfaced (the Telegram-plugin surface tools, which ARE the conversation).
|
|
246
|
+
*
|
|
247
|
+
* Leads with the model-authored descriptive field per the map above; falls
|
|
248
|
+
* back to a domain label, then to a humanized tool name. Never emits raw
|
|
249
|
+
* shell/query syntax.
|
|
250
|
+
*/
|
|
251
|
+
export function describeToolUse(
|
|
252
|
+
toolName: string,
|
|
253
|
+
input: Record<string, unknown> | undefined,
|
|
254
|
+
): string | null {
|
|
255
|
+
if (!toolName) return null;
|
|
256
|
+
const inp = input ?? {};
|
|
257
|
+
|
|
258
|
+
const mcpMatch = /^mcp__(.+?)__(.+)$/.exec(toolName);
|
|
259
|
+
if (mcpMatch) {
|
|
260
|
+
const server = mcpMatch[1].toLowerCase();
|
|
261
|
+
const tool = mcpMatch[2].toLowerCase();
|
|
262
|
+
// Surface tools ARE the conversation — never mirror them.
|
|
263
|
+
if (server === "switchroom-telegram") return null;
|
|
264
|
+
if (server === "hindsight") {
|
|
265
|
+
if (tool === "recall" || tool === "reflect") return "Searching memory";
|
|
266
|
+
if (tool === "retain" || tool === "update_memory" || tool === "sync_retain")
|
|
267
|
+
return "Saving to memory";
|
|
268
|
+
return "Working with memory";
|
|
269
|
+
}
|
|
270
|
+
if (
|
|
271
|
+
server === "google-workspace" ||
|
|
272
|
+
server === "claude_ai_google_calendar"
|
|
273
|
+
) {
|
|
274
|
+
return "Checking your calendar";
|
|
275
|
+
}
|
|
276
|
+
if (server === "claude_ai_gmail") return "Checking your email";
|
|
277
|
+
if (server === "claude_ai_google_drive") return "Looking through your files";
|
|
278
|
+
if (server === "notion" || server === "claude_ai_notion") {
|
|
279
|
+
return "Checking your notes";
|
|
280
|
+
}
|
|
281
|
+
// Unknown MCP tool: prefer a model-authored field, else a humanized name.
|
|
282
|
+
const desc = clip(inp.description, 60) ?? clip(inp.query, 50) ?? clip(inp.title, 50);
|
|
283
|
+
if (desc) return desc;
|
|
284
|
+
return "Using " + tool.replace(/[-_]+/g, " ");
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
switch (toolName) {
|
|
288
|
+
case "Bash": {
|
|
289
|
+
// The model writes a plain-English description for every command.
|
|
290
|
+
return clip(inp.description, 70) ?? "Running a command";
|
|
291
|
+
}
|
|
292
|
+
case "BashOutput":
|
|
293
|
+
case "KillShell":
|
|
294
|
+
return "Managing a background command";
|
|
295
|
+
case "Read": {
|
|
296
|
+
const f = baseName(inp.file_path);
|
|
297
|
+
return f ? `Reading ${f}` : "Reading a file";
|
|
298
|
+
}
|
|
299
|
+
case "Edit":
|
|
300
|
+
case "MultiEdit":
|
|
301
|
+
case "NotebookEdit": {
|
|
302
|
+
const f = baseName(inp.file_path) ?? baseName(inp.notebook_path);
|
|
303
|
+
return f ? `Editing ${f}` : "Editing a file";
|
|
304
|
+
}
|
|
305
|
+
case "Write": {
|
|
306
|
+
const f = baseName(inp.file_path);
|
|
307
|
+
return f ? `Writing ${f}` : "Writing a file";
|
|
308
|
+
}
|
|
309
|
+
case "Grep":
|
|
310
|
+
case "Glob": {
|
|
311
|
+
const p = clip(inp.pattern, 40);
|
|
312
|
+
return p ? `Searching for ${p}` : "Searching files";
|
|
313
|
+
}
|
|
314
|
+
case "WebFetch": {
|
|
315
|
+
const h = hostName(inp.url);
|
|
316
|
+
return h ? `Reading ${h}` : "Reading a web page";
|
|
317
|
+
}
|
|
318
|
+
case "WebSearch": {
|
|
319
|
+
const q = clip(inp.query, 50);
|
|
320
|
+
return q ? `Searching the web for ${q}` : "Searching the web";
|
|
321
|
+
}
|
|
322
|
+
case "Task":
|
|
323
|
+
case "Agent": {
|
|
324
|
+
const d = clip(inp.description, 60);
|
|
325
|
+
return d ? `Delegating: ${d}` : "Delegating to a sub-agent";
|
|
326
|
+
}
|
|
327
|
+
case "TodoWrite":
|
|
328
|
+
case "TaskCreate":
|
|
329
|
+
case "TaskUpdate":
|
|
330
|
+
case "TaskList":
|
|
331
|
+
return "Updating the plan";
|
|
332
|
+
case "ToolSearch":
|
|
333
|
+
return "Finding the right tool";
|
|
334
|
+
default:
|
|
335
|
+
return "Working…";
|
|
336
|
+
}
|
|
337
|
+
}
|