switchroom 0.10.0 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +5 -4
  2. package/dist/agent-scheduler/index.js +2 -2
  3. package/dist/auth-broker/index.js +125 -3
  4. package/dist/cli/drive-write-pretool.mjs +5436 -0
  5. package/dist/cli/switchroom.js +231 -29
  6. package/dist/host-control/main.js +2 -2
  7. package/dist/vault/approvals/kernel-server.js +2 -2
  8. package/dist/vault/broker/server.js +2 -2
  9. package/package.json +1 -1
  10. package/telegram-plugin/admin-commands/dispatch.test.ts +1 -1
  11. package/telegram-plugin/admin-commands/index.ts +2 -0
  12. package/telegram-plugin/auth-snapshot-format.ts +612 -0
  13. package/telegram-plugin/auto-fallback-fleet.ts +215 -0
  14. package/telegram-plugin/auto-fallback.ts +28 -301
  15. package/telegram-plugin/dist/gateway/gateway.js +4314 -2143
  16. package/telegram-plugin/fleet-fallback-gate.ts +105 -0
  17. package/telegram-plugin/gateway/approval-callback.test.ts +104 -0
  18. package/telegram-plugin/gateway/approval-callback.ts +31 -3
  19. package/telegram-plugin/gateway/auth-broker-client.ts +2 -0
  20. package/telegram-plugin/gateway/auth-command.ts +131 -10
  21. package/telegram-plugin/gateway/auth-status-adapter.ts +101 -0
  22. package/telegram-plugin/gateway/boot-card.ts +1 -1
  23. package/telegram-plugin/gateway/boot-probes.ts +6 -9
  24. package/telegram-plugin/gateway/diff-preview-card.test.ts +192 -0
  25. package/telegram-plugin/gateway/diff-preview-card.ts +170 -0
  26. package/telegram-plugin/gateway/drive-write-approval.test.ts +312 -0
  27. package/telegram-plugin/gateway/drive-write-approval.ts +243 -0
  28. package/telegram-plugin/gateway/folder-picker-handler.test.ts +314 -0
  29. package/telegram-plugin/gateway/folder-picker-handler.ts +348 -0
  30. package/telegram-plugin/gateway/gateway.ts +903 -173
  31. package/telegram-plugin/gateway/hostd-dispatch.ts +137 -2
  32. package/telegram-plugin/gateway/ipc-protocol.ts +83 -2
  33. package/telegram-plugin/gateway/ipc-server.ts +69 -0
  34. package/telegram-plugin/hooks/sandbox-hint-posttool.mjs +103 -12
  35. package/telegram-plugin/model-unavailable.ts +28 -12
  36. package/telegram-plugin/silence-poke.ts +153 -1
  37. package/telegram-plugin/tests/auth-command-format2.test.ts +156 -0
  38. package/telegram-plugin/tests/auth-snapshot-format.test.ts +429 -0
  39. package/telegram-plugin/tests/auth-status-adapter.test.ts +129 -0
  40. package/telegram-plugin/tests/auto-fallback-fleet.test.ts +211 -0
  41. package/telegram-plugin/tests/auto-fallback.test.ts +60 -358
  42. package/telegram-plugin/tests/boot-probes.test.ts +16 -18
  43. package/telegram-plugin/tests/fleet-fallback-gate.test.ts +197 -0
  44. package/telegram-plugin/tests/model-unavailable.test.ts +30 -5
  45. package/telegram-plugin/tests/sandbox-hint-posttool.test.ts +212 -2
  46. package/telegram-plugin/tests/silence-poke.test.ts +237 -0
  47. package/telegram-plugin/tests/turn-flush-safety.test.ts +112 -0
  48. package/telegram-plugin/turn-flush-safety.ts +55 -1
  49. package/telegram-plugin/uat/SETUP.md +16 -12
  50. package/telegram-plugin/auto-fallback-dispatcher.ts +0 -68
  51. package/telegram-plugin/tests/auto-fallback-dispatcher.e2e.test.ts +0 -183
  52. package/telegram-plugin/tests/hostd-dispatch.test.ts +0 -129
@@ -45,6 +45,22 @@
45
45
 
46
46
  export type PokeLevel = 'soft' | 'firm'
47
47
 
48
+ /** #1292: snapshot of an in-flight tool call, surfaced in the 300s
49
+ * framework-fallback message so the user sees the actual observable
50
+ * ("running Grep \"foo\" for 4m") instead of the dishonest generic
51
+ * "still working… no update in 5 min" when the agent is clearly busy
52
+ * grinding through tool calls. */
53
+ export interface ToolSnapshot {
54
+ /** Bare tool name as it came off the wire (e.g. "Grep", "Read", "Bash"). */
55
+ name: string
56
+ /** Natural-language descriptor from `toolLabel()` if available (e.g. the
57
+ * query for Grep, basename for Read/Edit/Write, hostname for WebFetch),
58
+ * or null when no useful label could be derived. */
59
+ label: string | null
60
+ /** Time since this tool call started, in ms. */
61
+ durationMs: number
62
+ }
63
+
48
64
  export interface SilencePokeState {
49
65
  /** Wall-clock ms of turn start. Silence clock zero-point when no outbound yet. */
50
66
  turnStartedAt: number
@@ -62,6 +78,16 @@ export interface SilencePokeState {
62
78
  fallbackFired: boolean
63
79
  /** Wall-clock ms of last poke fire — used for poke-success latency. */
64
80
  lastPokeFiredAt: number | null
81
+ /** #1292: in-flight tool calls keyed by toolUseId. Populated by
82
+ * `noteToolStart` on every parent-agent `tool_use` event the gateway
83
+ * sees and drained by `noteToolEnd` on the matching `tool_result`.
84
+ * Read only inside `tick()` when the 300s fallback fires — at that
85
+ * point we snapshot the entries (sorted by startedAt ascending) and
86
+ * include the longest-running one in the fallback message body.
87
+ * NOTE: presence of in-flight tools does NOT reset the silence
88
+ * clock — the design choice in this module's header is preserved.
89
+ * We only enrich the fallback TEXT, not the timing. */
90
+ inFlightTools: Map<string, { name: string; startedAt: number; label: string | null }>
65
91
  }
66
92
 
67
93
  export interface ThresholdsMs {
@@ -89,9 +115,19 @@ export interface FrameworkFallbackContext {
89
115
  chatId: string
90
116
  threadId: number | null
91
117
  /** Picked from lastThinkingAt: 'thinking' if a thinking event landed in
92
- * the last 30s of silence, else 'working'. */
118
+ * the last 30s of silence, else 'working'. Note: 'working' is the
119
+ * default base; when `inFlightTools` is non-empty the fallback text
120
+ * uses the tool-aware wording instead of either 'working' / 'thinking'
121
+ * (see `formatFrameworkFallbackText`). */
93
122
  fallbackKind: 'working' | 'thinking'
94
123
  silenceMs: number
124
+ /** #1292: snapshot of in-flight tool calls at the moment the fallback
125
+ * fires, sorted by startedAt ascending. Empty when no tools were
126
+ * in flight (e.g. agent genuinely silent, or all tools completed
127
+ * faster than the 300s threshold). The format helper uses entry [0]
128
+ * (longest-running) for the message body and "+ N more" when
129
+ * length > 1. */
130
+ inFlightTools: ToolSnapshot[]
95
131
  }
96
132
 
97
133
  export type SilencePokeMetric =
@@ -141,6 +177,7 @@ export function startTurn(key: string, now: number): void {
141
177
  lastThinkingAt: null,
142
178
  fallbackFired: false,
143
179
  lastPokeFiredAt: null,
180
+ inFlightTools: new Map(),
144
181
  })
145
182
  }
146
183
 
@@ -205,6 +242,72 @@ export function noteThinking(key: string, now: number): void {
205
242
  s.lastThinkingAt = now
206
243
  }
207
244
 
245
+ /**
246
+ * #1292: record the start of a tool call. Stored in `inFlightTools` keyed
247
+ * by `toolUseId` so a later `noteToolEnd` can drain the entry. Read only
248
+ * by `tick()` when the 300s fallback fires, where we snapshot the map
249
+ * into the fallback context so the user-visible message can name the
250
+ * actual observable (e.g. "running Grep \"foo\" for 4m") instead of the
251
+ * dishonest generic "still working… no update in 5 min".
252
+ *
253
+ * Idempotent: calling twice with the same toolUseId overwrites — useful
254
+ * when a late `noteToolLabel` arrives but the caller wants to reuse the
255
+ * start-side API. The `startedAt` is updated; for label-only refreshes
256
+ * use `noteToolLabel` instead so duration stays correct.
257
+ *
258
+ * No-op when the kill switch is on (state Map will be empty for this key).
259
+ */
260
+ export function noteToolStart(
261
+ key: string,
262
+ toolUseId: string,
263
+ name: string,
264
+ label: string | null,
265
+ now: number,
266
+ ): void {
267
+ const s = state.get(key)
268
+ if (s == null) return
269
+ s.inFlightTools.set(toolUseId, { name, startedAt: now, label })
270
+ }
271
+
272
+ /**
273
+ * #1292: record completion of a tool call. Removes the entry from
274
+ * `inFlightTools`. Idempotent — calling on an unknown toolUseId is a
275
+ * no-op. Sub-second tools that start and end inside one poll interval
276
+ * are still safe because the map is only read inside `tick()` at the
277
+ * 300s fallback boundary; the churn never gets observed.
278
+ */
279
+ export function noteToolEnd(
280
+ key: string,
281
+ toolUseId: string,
282
+ _now: number,
283
+ ): void {
284
+ const s = state.get(key)
285
+ if (s == null) return
286
+ s.inFlightTools.delete(toolUseId)
287
+ }
288
+
289
+ /**
290
+ * #1292: late label update for an in-flight tool. The tool-label sidecar
291
+ * (PreToolUse hook, polled every 250ms via `tool-label-sidecar.ts`) can
292
+ * publish a richer label some time after the `tool_use` event landed.
293
+ * When that arrives, refresh the entry in-place so the fallback message
294
+ * — if it fires later — picks up the better label.
295
+ *
296
+ * No-op when the toolUseId isn't tracked (e.g. tool already completed,
297
+ * or the start event was skipped because the tool is a Telegram surface).
298
+ */
299
+ export function noteToolLabel(
300
+ key: string,
301
+ toolUseId: string,
302
+ label: string,
303
+ ): void {
304
+ const s = state.get(key)
305
+ if (s == null) return
306
+ const entry = s.inFlightTools.get(toolUseId)
307
+ if (entry == null) return
308
+ entry.label = label
309
+ }
310
+
208
311
  /**
209
312
  * Drain any armed poke for ANY active turn and return the system-reminder
210
313
  * text to append. Returns null if nothing is armed.
@@ -273,14 +376,52 @@ export function formatPokeText(level: PokeLevel): string {
273
376
  export function formatFrameworkFallbackText(
274
377
  fallbackKind: 'working' | 'thinking',
275
378
  silenceMs: number,
379
+ inFlightTools: ToolSnapshot[] = [],
276
380
  ): string {
277
381
  const minutes = Math.max(1, Math.round(silenceMs / 60_000))
278
382
  const suffix = `(no update from agent in ${minutes} min)`
383
+ // #1292 case (a): tools in flight. Name the longest-running one
384
+ // (entry[0] — caller pre-sorts by startedAt ascending). Avoid the
385
+ // "still working" framing #1292 explicitly calls out as dishonest:
386
+ // the agent IS doing work, we can see the tool. Format:
387
+ // running Grep "foo" for 4m (no update from agent in 5 min)
388
+ // running Grep "foo" + 2 more (4m) (no update from agent in 5 min)
389
+ // running Grep (no label) for 4m (no update from agent in 5 min)
390
+ if (inFlightTools.length > 0) {
391
+ const longest = inFlightTools[0]!
392
+ const dur = formatDurationShort(longest.durationMs)
393
+ const labelTail = longest.label && longest.label.length > 0
394
+ ? ` ${truncateLabel(longest.label)}`
395
+ : ''
396
+ const more = inFlightTools.length > 1
397
+ ? ` + ${inFlightTools.length - 1} more`
398
+ : ''
399
+ return `running ${longest.name}${labelTail}${more} for ${dur} ${suffix}`
400
+ }
279
401
  return fallbackKind === 'thinking'
280
402
  ? `still thinking… ${suffix}`
281
403
  : `still working… ${suffix}`
282
404
  }
283
405
 
406
+ /** Compact m/s rendering for the fallback message. Anything under a
407
+ * minute reads as `${s}s`, otherwise `${m}m`. Always rounds toward the
408
+ * user-honest direction — "4m" for 4m 30s, "5m" for 4m 45s. */
409
+ function formatDurationShort(ms: number): string {
410
+ const totalSec = Math.max(0, Math.round(ms / 1000))
411
+ if (totalSec < 60) return `${totalSec}s`
412
+ const minutes = Math.round(totalSec / 60)
413
+ return `${minutes}m`
414
+ }
415
+
416
+ /** Telegram lines are short on mobile. Clip the label to keep the
417
+ * fallback message readable. Truncation point is generous (60 chars)
418
+ * because tool labels are pre-truncated by `toolLabel()` already. */
419
+ function truncateLabel(label: string): string {
420
+ const MAX = 60
421
+ if (label.length <= MAX) return label
422
+ return label.slice(0, MAX - 1) + '…'
423
+ }
424
+
284
425
  /**
285
426
  * Internal tick — iterates active states, arms pokes or fires fallback.
286
427
  * Exported as __tickForTests so suite can step the clock deterministically.
@@ -331,6 +472,16 @@ function tick(now: number): void {
331
472
  const recentThinking = s.lastThinkingAt != null
332
473
  && (now - s.lastThinkingAt) < 30_000
333
474
  const fallbackKind: 'working' | 'thinking' = recentThinking ? 'thinking' : 'working'
475
+ // #1292: snapshot in-flight tools at fire time, sorted by
476
+ // startedAt ascending so entry[0] is the longest-running.
477
+ // Pre-computed durations in ms; the formatter just renders.
478
+ const inFlightTools: ToolSnapshot[] = Array.from(s.inFlightTools.values())
479
+ .sort((a, b) => a.startedAt - b.startedAt)
480
+ .map(t => ({
481
+ name: t.name,
482
+ label: t.label,
483
+ durationMs: now - t.startedAt,
484
+ }))
334
485
  activeDeps.emitMetric({
335
486
  kind: 'silence_fallback_sent',
336
487
  key,
@@ -345,6 +496,7 @@ function tick(now: number): void {
345
496
  threadId,
346
497
  fallbackKind,
347
498
  silenceMs: silence,
499
+ inFlightTools,
348
500
  })
349
501
  if (r != null && typeof (r as Promise<void>).catch === 'function') {
350
502
  ;(r as Promise<void>).catch((err) => {
@@ -0,0 +1,156 @@
1
+ /**
2
+ * Integration test for the Format 2 wiring through `renderShowText` +
3
+ * `handleAuthCommand`. The pure formatter has dedicated tests in
4
+ * auth-snapshot-format.test.ts; here we cover the seam between the
5
+ * legacy ASCII-table path and the new health-grouped path.
6
+ *
7
+ * Headline guarantees:
8
+ *
9
+ * 1. With no liveQuotas, renderShowText produces the legacy ASCII
10
+ * table shape (back-compat preserved).
11
+ * 2. With liveQuotas matching state.accounts.length, renderShowText
12
+ * produces the Format 2 health-grouped shape (Recommendation
13
+ * footer present, ASCII column header absent).
14
+ * 3. handleAuthCommand attaches a keyboard ONLY when liveQuotas is
15
+ * supplied AND yields one quota per account (no half-rendered
16
+ * buttons under partial-failure).
17
+ * 4. The keyboard emitted by handleAuthCommand never references a
18
+ * blocked or unknown-health account in a switch button (smart-
19
+ * hide rule, integration variant of the unit test in
20
+ * auth-snapshot-format.test.ts).
21
+ */
22
+ import { describe, it, expect, vi } from 'vitest';
23
+ import { renderShowText, handleAuthCommand } from '../gateway/auth-command.js';
24
+ import type { AuthBrokerClient, AuthCommandContext } from '../gateway/auth-command.js';
25
+ import type { ListStateData } from '../../src/auth/broker/client.js';
26
+ import type { QuotaResult, QuotaUtilization } from '../quota-check.js';
27
+
28
+ function quota(part: Partial<QuotaUtilization>): QuotaUtilization {
29
+ return {
30
+ fiveHourUtilizationPct: 0,
31
+ sevenDayUtilizationPct: 0,
32
+ fiveHourResetAt: null,
33
+ sevenDayResetAt: null,
34
+ representativeClaim: null,
35
+ overageStatus: null,
36
+ overageDisabledReason: null,
37
+ ...part,
38
+ };
39
+ }
40
+
41
+ function qOk(part: Partial<QuotaUtilization>): QuotaResult {
42
+ return { ok: true, data: quota(part) };
43
+ }
44
+
45
+ const NOW_MS = new Date('2026-05-15T00:53:00Z').getTime();
46
+
47
+ const FIXTURE_STATE: ListStateData = {
48
+ active: 'pixsoul@x',
49
+ fallback_order: ['ken@x', 'me@x', 'pixsoul@x'],
50
+ accounts: [
51
+ { label: 'ken@x', exhausted: false },
52
+ { label: 'me@x', exhausted: false },
53
+ { label: 'pixsoul@x', exhausted: false },
54
+ ],
55
+ agents: [{ name: 'carrie', account: 'pixsoul@x', override: null }],
56
+ consumers: [],
57
+ };
58
+
59
+ const FIXTURE_QUOTAS: QuotaResult[] = [
60
+ qOk({ fiveHourUtilizationPct: 0, sevenDayUtilizationPct: 23 }),
61
+ qOk({ sevenDayUtilizationPct: 100 }), // blocked
62
+ qOk({ fiveHourUtilizationPct: 8, sevenDayUtilizationPct: 20 }),
63
+ ];
64
+
65
+ function mockClient(over: Partial<AuthBrokerClient> = {}): AuthBrokerClient {
66
+ return {
67
+ listState: vi.fn(async () => FIXTURE_STATE),
68
+ setActive: vi.fn(async (label: string) => ({ active: label, fanned: ['carrie'] })),
69
+ rmAccount: vi.fn(async (label: string) => ({ label })),
70
+ refreshAccount: vi.fn(async (label: string) => ({ account: label })),
71
+ setOverride: vi.fn(async (agent: string, account: string | null) => ({ agent, account })),
72
+ ...over,
73
+ };
74
+ }
75
+
76
+ describe('renderShowText — Format 2 vs legacy', () => {
77
+ it('falls back to legacy ASCII table when no liveQuotas given', () => {
78
+ const out = renderShowText(FIXTURE_STATE, NOW_MS);
79
+ expect(out).toContain('<b>Auth — fleet snapshot</b>');
80
+ expect(out).toContain('ACCOUNT');
81
+ expect(out).toContain('STATUS');
82
+ expect(out).toContain('EXPIRES');
83
+ expect(out).not.toContain('🔋');
84
+ expect(out).not.toContain('Recommendation:');
85
+ });
86
+
87
+ it('renders Format 2 when liveQuotas length matches accounts length', () => {
88
+ const out = renderShowText(FIXTURE_STATE, NOW_MS, {
89
+ liveQuotas: FIXTURE_QUOTAS,
90
+ tz: 'UTC',
91
+ liveProbedAtMs: NOW_MS,
92
+ });
93
+ expect(out).toContain('🔋 <b>Auth — fleet status</b>');
94
+ expect(out).toContain('Recommendation:');
95
+ expect(out).toContain('🔴 <b>BLOCKED</b>');
96
+ expect(out).toContain('🟢 <b>HEALTHY</b>');
97
+ // Legacy ASCII column headers should be absent
98
+ expect(out).not.toContain('ACCOUNT STATUS');
99
+ });
100
+
101
+ it('falls back to legacy when liveQuotas length disagrees with accounts (defensive)', () => {
102
+ const out = renderShowText(FIXTURE_STATE, NOW_MS, {
103
+ liveQuotas: FIXTURE_QUOTAS.slice(0, 2), // wrong length
104
+ });
105
+ expect(out).not.toContain('🔋');
106
+ expect(out).toContain('ACCOUNT');
107
+ });
108
+ });
109
+
110
+ describe('handleAuthCommand — keyboard attachment', () => {
111
+ function makeCtx(overrides: Partial<AuthCommandContext> = {}): AuthCommandContext {
112
+ return {
113
+ agentName: 'carrie',
114
+ isAdmin: true,
115
+ client: mockClient(),
116
+ chatId: 'chat-1',
117
+ ...overrides,
118
+ };
119
+ }
120
+
121
+ it('attaches NO keyboard when liveQuotas is omitted (legacy callers)', async () => {
122
+ const reply = await handleAuthCommand({ kind: 'show' }, makeCtx());
123
+ expect(reply.keyboard).toBeUndefined();
124
+ expect(reply.text).toContain('ACCOUNT'); // legacy table
125
+ });
126
+
127
+ it('attaches a smart keyboard when liveQuotas yields one result per account', async () => {
128
+ const reply = await handleAuthCommand(
129
+ { kind: 'show' },
130
+ makeCtx({ liveQuotas: async () => FIXTURE_QUOTAS, tz: 'UTC' }),
131
+ );
132
+ expect(reply.keyboard).toBeDefined();
133
+ const allButtonText = reply.keyboard!.flat().map((b) => b.text);
134
+ // Switch button should exist for ken@x (healthy, not active)
135
+ expect(allButtonText).toContain('Switch fleet → ken@x');
136
+ // me@x is blocked — must NOT appear as a switch target
137
+ expect(allButtonText).not.toContain('Switch fleet → me@x');
138
+ // Bottom row hardware
139
+ expect(allButtonText).toContain('↻ Refresh');
140
+ expect(allButtonText).toContain('/usage');
141
+ expect(allButtonText).toContain('+ Add');
142
+ });
143
+
144
+ it('attaches no keyboard when the live probe throws (graceful degrade)', async () => {
145
+ const reply = await handleAuthCommand(
146
+ { kind: 'show' },
147
+ makeCtx({
148
+ liveQuotas: async () => {
149
+ throw new Error('network down');
150
+ },
151
+ }),
152
+ );
153
+ expect(reply.keyboard).toBeUndefined();
154
+ expect(reply.text).toContain('ACCOUNT'); // legacy table fallback
155
+ });
156
+ });