npm - switchroom - Versions diffs - 0.10.0 → 0.11.1 - Mend

switchroom 0.10.0 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/README.md +5 -4
package/dist/agent-scheduler/index.js +2 -2
package/dist/auth-broker/index.js +125 -3
package/dist/cli/drive-write-pretool.mjs +5436 -0
package/dist/cli/switchroom.js +231 -29
package/dist/host-control/main.js +2 -2
package/dist/vault/approvals/kernel-server.js +2 -2
package/dist/vault/broker/server.js +2 -2
package/package.json +1 -1
package/telegram-plugin/admin-commands/dispatch.test.ts +1 -1
package/telegram-plugin/admin-commands/index.ts +2 -0
package/telegram-plugin/auth-snapshot-format.ts +612 -0
package/telegram-plugin/auto-fallback-fleet.ts +215 -0
package/telegram-plugin/auto-fallback.ts +28 -301
package/telegram-plugin/dist/gateway/gateway.js +4314 -2143
package/telegram-plugin/fleet-fallback-gate.ts +105 -0
package/telegram-plugin/gateway/approval-callback.test.ts +104 -0
package/telegram-plugin/gateway/approval-callback.ts +31 -3
package/telegram-plugin/gateway/auth-broker-client.ts +2 -0
package/telegram-plugin/gateway/auth-command.ts +131 -10
package/telegram-plugin/gateway/auth-status-adapter.ts +101 -0
package/telegram-plugin/gateway/boot-card.ts +1 -1
package/telegram-plugin/gateway/boot-probes.ts +6 -9
package/telegram-plugin/gateway/diff-preview-card.test.ts +192 -0
package/telegram-plugin/gateway/diff-preview-card.ts +170 -0
package/telegram-plugin/gateway/drive-write-approval.test.ts +312 -0
package/telegram-plugin/gateway/drive-write-approval.ts +243 -0
package/telegram-plugin/gateway/folder-picker-handler.test.ts +314 -0
package/telegram-plugin/gateway/folder-picker-handler.ts +348 -0
package/telegram-plugin/gateway/gateway.ts +903 -173
package/telegram-plugin/gateway/hostd-dispatch.ts +137 -2
package/telegram-plugin/gateway/ipc-protocol.ts +83 -2
package/telegram-plugin/gateway/ipc-server.ts +69 -0
package/telegram-plugin/hooks/sandbox-hint-posttool.mjs +103 -12
package/telegram-plugin/model-unavailable.ts +28 -12
package/telegram-plugin/silence-poke.ts +153 -1
package/telegram-plugin/tests/auth-command-format2.test.ts +156 -0
package/telegram-plugin/tests/auth-snapshot-format.test.ts +429 -0
package/telegram-plugin/tests/auth-status-adapter.test.ts +129 -0
package/telegram-plugin/tests/auto-fallback-fleet.test.ts +211 -0
package/telegram-plugin/tests/auto-fallback.test.ts +60 -358
package/telegram-plugin/tests/boot-probes.test.ts +16 -18
package/telegram-plugin/tests/fleet-fallback-gate.test.ts +197 -0
package/telegram-plugin/tests/model-unavailable.test.ts +30 -5
package/telegram-plugin/tests/sandbox-hint-posttool.test.ts +212 -2
package/telegram-plugin/tests/silence-poke.test.ts +237 -0
package/telegram-plugin/tests/turn-flush-safety.test.ts +112 -0
package/telegram-plugin/turn-flush-safety.ts +55 -1
package/telegram-plugin/uat/SETUP.md +16 -12
package/telegram-plugin/auto-fallback-dispatcher.ts +0 -68
package/telegram-plugin/tests/auto-fallback-dispatcher.e2e.test.ts +0 -183
package/telegram-plugin/tests/hostd-dispatch.test.ts +0 -129

package/telegram-plugin/silence-poke.ts CHANGED Viewed

@@ -45,6 +45,22 @@
 export type PokeLevel = 'soft' | 'firm'
+/** #1292: snapshot of an in-flight tool call, surfaced in the 300s
+ * framework-fallback message so the user sees the actual observable
+ * ("running Grep \"foo\" for 4m") instead of the dishonest generic
+ * "still working… no update in 5 min" when the agent is clearly busy
+ * grinding through tool calls. */
+export interface ToolSnapshot {
+  /** Bare tool name as it came off the wire (e.g. "Grep", "Read", "Bash"). */
+  name: string
+  /** Natural-language descriptor from `toolLabel()` if available (e.g. the
+   * query for Grep, basename for Read/Edit/Write, hostname for WebFetch),
+   * or null when no useful label could be derived. */
+  label: string | null
+  /** Time since this tool call started, in ms. */
+  durationMs: number
+}
 export interface SilencePokeState {
   /** Wall-clock ms of turn start. Silence clock zero-point when no outbound yet. */
   turnStartedAt: number
@@ -62,6 +78,16 @@ export interface SilencePokeState {
   fallbackFired: boolean
   /** Wall-clock ms of last poke fire — used for poke-success latency. */
   lastPokeFiredAt: number | null
+  /** #1292: in-flight tool calls keyed by toolUseId. Populated by
+   * `noteToolStart` on every parent-agent `tool_use` event the gateway
+   * sees and drained by `noteToolEnd` on the matching `tool_result`.
+   * Read only inside `tick()` when the 300s fallback fires — at that
+   * point we snapshot the entries (sorted by startedAt ascending) and
+   * include the longest-running one in the fallback message body.
+   * NOTE: presence of in-flight tools does NOT reset the silence
+   * clock — the design choice in this module's header is preserved.
+   * We only enrich the fallback TEXT, not the timing. */
+  inFlightTools: Map<string, { name: string; startedAt: number; label: string | null }>
 }
 export interface ThresholdsMs {
@@ -89,9 +115,19 @@ export interface FrameworkFallbackContext {
   chatId: string
   threadId: number | null
   /** Picked from lastThinkingAt: 'thinking' if a thinking event landed in
-   *  the last 30s of silence, else 'working'. */
+   *  the last 30s of silence, else 'working'. Note: 'working' is the
+   *  default base; when `inFlightTools` is non-empty the fallback text
+   *  uses the tool-aware wording instead of either 'working' / 'thinking'
+   *  (see `formatFrameworkFallbackText`). */
   fallbackKind: 'working' | 'thinking'
   silenceMs: number
+  /** #1292: snapshot of in-flight tool calls at the moment the fallback
+   *  fires, sorted by startedAt ascending. Empty when no tools were
+   *  in flight (e.g. agent genuinely silent, or all tools completed
+   *  faster than the 300s threshold). The format helper uses entry [0]
+   *  (longest-running) for the message body and "+ N more" when
+   *  length > 1. */
+  inFlightTools: ToolSnapshot[]
 }
 export type SilencePokeMetric =
@@ -141,6 +177,7 @@ export function startTurn(key: string, now: number): void {
     lastThinkingAt: null,
     fallbackFired: false,
     lastPokeFiredAt: null,
+    inFlightTools: new Map(),
   })
 }
@@ -205,6 +242,72 @@ export function noteThinking(key: string, now: number): void {
   s.lastThinkingAt = now
 }
+/**
+ * #1292: record the start of a tool call. Stored in `inFlightTools` keyed
+ * by `toolUseId` so a later `noteToolEnd` can drain the entry. Read only
+ * by `tick()` when the 300s fallback fires, where we snapshot the map
+ * into the fallback context so the user-visible message can name the
+ * actual observable (e.g. "running Grep \"foo\" for 4m") instead of the
+ * dishonest generic "still working… no update in 5 min".
+ *
+ * Idempotent: calling twice with the same toolUseId overwrites — useful
+ * when a late `noteToolLabel` arrives but the caller wants to reuse the
+ * start-side API. The `startedAt` is updated; for label-only refreshes
+ * use `noteToolLabel` instead so duration stays correct.
+ *
+ * No-op when the kill switch is on (state Map will be empty for this key).
+ */
+export function noteToolStart(
+  key: string,
+  toolUseId: string,
+  name: string,
+  label: string | null,
+  now: number,
+): void {
+  const s = state.get(key)
+  if (s == null) return
+  s.inFlightTools.set(toolUseId, { name, startedAt: now, label })
+}
+/**
+ * #1292: record completion of a tool call. Removes the entry from
+ * `inFlightTools`. Idempotent — calling on an unknown toolUseId is a
+ * no-op. Sub-second tools that start and end inside one poll interval
+ * are still safe because the map is only read inside `tick()` at the
+ * 300s fallback boundary; the churn never gets observed.
+ */
+export function noteToolEnd(
+  key: string,
+  toolUseId: string,
+  _now: number,
+): void {
+  const s = state.get(key)
+  if (s == null) return
+  s.inFlightTools.delete(toolUseId)
+}
+/**
+ * #1292: late label update for an in-flight tool. The tool-label sidecar
+ * (PreToolUse hook, polled every 250ms via `tool-label-sidecar.ts`) can
+ * publish a richer label some time after the `tool_use` event landed.
+ * When that arrives, refresh the entry in-place so the fallback message
+ * — if it fires later — picks up the better label.
+ *
+ * No-op when the toolUseId isn't tracked (e.g. tool already completed,
+ * or the start event was skipped because the tool is a Telegram surface).
+ */
+export function noteToolLabel(
+  key: string,
+  toolUseId: string,
+  label: string,
+): void {
+  const s = state.get(key)
+  if (s == null) return
+  const entry = s.inFlightTools.get(toolUseId)
+  if (entry == null) return
+  entry.label = label
+}
 /**
  * Drain any armed poke for ANY active turn and return the system-reminder
  * text to append. Returns null if nothing is armed.
@@ -273,14 +376,52 @@ export function formatPokeText(level: PokeLevel): string {
 export function formatFrameworkFallbackText(
   fallbackKind: 'working' | 'thinking',
   silenceMs: number,
+  inFlightTools: ToolSnapshot[] = [],
 ): string {
   const minutes = Math.max(1, Math.round(silenceMs / 60_000))
   const suffix = `(no update from agent in ${minutes} min)`
+  // #1292 case (a): tools in flight. Name the longest-running one
+  // (entry[0] — caller pre-sorts by startedAt ascending). Avoid the
+  // "still working" framing #1292 explicitly calls out as dishonest:
+  // the agent IS doing work, we can see the tool. Format:
+  //   running Grep "foo" for 4m (no update from agent in 5 min)
+  //   running Grep "foo" + 2 more (4m) (no update from agent in 5 min)
+  //   running Grep (no label) for 4m (no update from agent in 5 min)
+  if (inFlightTools.length > 0) {
+    const longest = inFlightTools[0]!
+    const dur = formatDurationShort(longest.durationMs)
+    const labelTail = longest.label && longest.label.length > 0
+      ? ` ${truncateLabel(longest.label)}`
+      : ''
+    const more = inFlightTools.length > 1
+      ? ` + ${inFlightTools.length - 1} more`
+      : ''
+    return `running ${longest.name}${labelTail}${more} for ${dur} ${suffix}`
+  }
   return fallbackKind === 'thinking'
     ? `still thinking… ${suffix}`
     : `still working… ${suffix}`
 }
+/** Compact m/s rendering for the fallback message. Anything under a
+ *  minute reads as `${s}s`, otherwise `${m}m`. Always rounds toward the
+ *  user-honest direction — "4m" for 4m 30s, "5m" for 4m 45s. */
+function formatDurationShort(ms: number): string {
+  const totalSec = Math.max(0, Math.round(ms / 1000))
+  if (totalSec < 60) return `${totalSec}s`
+  const minutes = Math.round(totalSec / 60)
+  return `${minutes}m`
+}
+/** Telegram lines are short on mobile. Clip the label to keep the
+ *  fallback message readable. Truncation point is generous (60 chars)
+ *  because tool labels are pre-truncated by `toolLabel()` already. */
+function truncateLabel(label: string): string {
+  const MAX = 60
+  if (label.length <= MAX) return label
+  return label.slice(0, MAX - 1) + '…'
+}
 /**
  * Internal tick — iterates active states, arms pokes or fires fallback.
  * Exported as __tickForTests so suite can step the clock deterministically.
@@ -331,6 +472,16 @@ function tick(now: number): void {
       const recentThinking = s.lastThinkingAt != null
         && (now - s.lastThinkingAt) < 30_000
       const fallbackKind: 'working' | 'thinking' = recentThinking ? 'thinking' : 'working'
+      // #1292: snapshot in-flight tools at fire time, sorted by
+      // startedAt ascending so entry[0] is the longest-running.
+      // Pre-computed durations in ms; the formatter just renders.
+      const inFlightTools: ToolSnapshot[] = Array.from(s.inFlightTools.values())
+        .sort((a, b) => a.startedAt - b.startedAt)
+        .map(t => ({
+          name: t.name,
+          label: t.label,
+          durationMs: now - t.startedAt,
+        }))
       activeDeps.emitMetric({
         kind: 'silence_fallback_sent',
         key,
@@ -345,6 +496,7 @@ function tick(now: number): void {
           threadId,
           fallbackKind,
           silenceMs: silence,
+          inFlightTools,
         })
         if (r != null && typeof (r as Promise<void>).catch === 'function') {
           ;(r as Promise<void>).catch((err) => {

package/telegram-plugin/tests/auth-command-format2.test.ts ADDED Viewed

@@ -0,0 +1,156 @@
+/**
+ * Integration test for the Format 2 wiring through `renderShowText` +
+ * `handleAuthCommand`. The pure formatter has dedicated tests in
+ * auth-snapshot-format.test.ts; here we cover the seam between the
+ * legacy ASCII-table path and the new health-grouped path.
+ *
+ * Headline guarantees:
+ *
+ *   1. With no liveQuotas, renderShowText produces the legacy ASCII
+ *      table shape (back-compat preserved).
+ *   2. With liveQuotas matching state.accounts.length, renderShowText
+ *      produces the Format 2 health-grouped shape (Recommendation
+ *      footer present, ASCII column header absent).
+ *   3. handleAuthCommand attaches a keyboard ONLY when liveQuotas is
+ *      supplied AND yields one quota per account (no half-rendered
+ *      buttons under partial-failure).
+ *   4. The keyboard emitted by handleAuthCommand never references a
+ *      blocked or unknown-health account in a switch button (smart-
+ *      hide rule, integration variant of the unit test in
+ *      auth-snapshot-format.test.ts).
+ */
+import { describe, it, expect, vi } from 'vitest';
+import { renderShowText, handleAuthCommand } from '../gateway/auth-command.js';
+import type { AuthBrokerClient, AuthCommandContext } from '../gateway/auth-command.js';
+import type { ListStateData } from '../../src/auth/broker/client.js';
+import type { QuotaResult, QuotaUtilization } from '../quota-check.js';
+function quota(part: Partial<QuotaUtilization>): QuotaUtilization {
+  return {
+    fiveHourUtilizationPct: 0,
+    sevenDayUtilizationPct: 0,
+    fiveHourResetAt: null,
+    sevenDayResetAt: null,
+    representativeClaim: null,
+    overageStatus: null,
+    overageDisabledReason: null,
+    ...part,
+  };
+}
+function qOk(part: Partial<QuotaUtilization>): QuotaResult {
+  return { ok: true, data: quota(part) };
+}
+const NOW_MS = new Date('2026-05-15T00:53:00Z').getTime();
+const FIXTURE_STATE: ListStateData = {
+  active: 'pixsoul@x',
+  fallback_order: ['ken@x', 'me@x', 'pixsoul@x'],
+  accounts: [
+    { label: 'ken@x', exhausted: false },
+    { label: 'me@x', exhausted: false },
+    { label: 'pixsoul@x', exhausted: false },
+  ],
+  agents: [{ name: 'carrie', account: 'pixsoul@x', override: null }],
+  consumers: [],
+};
+const FIXTURE_QUOTAS: QuotaResult[] = [
+  qOk({ fiveHourUtilizationPct: 0, sevenDayUtilizationPct: 23 }),
+  qOk({ sevenDayUtilizationPct: 100 }), // blocked
+  qOk({ fiveHourUtilizationPct: 8, sevenDayUtilizationPct: 20 }),
+];
+function mockClient(over: Partial<AuthBrokerClient> = {}): AuthBrokerClient {
+  return {
+    listState: vi.fn(async () => FIXTURE_STATE),
+    setActive: vi.fn(async (label: string) => ({ active: label, fanned: ['carrie'] })),
+    rmAccount: vi.fn(async (label: string) => ({ label })),
+    refreshAccount: vi.fn(async (label: string) => ({ account: label })),
+    setOverride: vi.fn(async (agent: string, account: string | null) => ({ agent, account })),
+    ...over,
+  };
+}
+describe('renderShowText — Format 2 vs legacy', () => {
+  it('falls back to legacy ASCII table when no liveQuotas given', () => {
+    const out = renderShowText(FIXTURE_STATE, NOW_MS);
+    expect(out).toContain('<b>Auth — fleet snapshot</b>');
+    expect(out).toContain('ACCOUNT');
+    expect(out).toContain('STATUS');
+    expect(out).toContain('EXPIRES');
+    expect(out).not.toContain('🔋');
+    expect(out).not.toContain('Recommendation:');
+  });
+  it('renders Format 2 when liveQuotas length matches accounts length', () => {
+    const out = renderShowText(FIXTURE_STATE, NOW_MS, {
+      liveQuotas: FIXTURE_QUOTAS,
+      tz: 'UTC',
+      liveProbedAtMs: NOW_MS,
+    });
+    expect(out).toContain('🔋 <b>Auth — fleet status</b>');
+    expect(out).toContain('Recommendation:');
+    expect(out).toContain('🔴 <b>BLOCKED</b>');
+    expect(out).toContain('🟢 <b>HEALTHY</b>');
+    // Legacy ASCII column headers should be absent
+    expect(out).not.toContain('ACCOUNT     STATUS');
+  });
+  it('falls back to legacy when liveQuotas length disagrees with accounts (defensive)', () => {
+    const out = renderShowText(FIXTURE_STATE, NOW_MS, {
+      liveQuotas: FIXTURE_QUOTAS.slice(0, 2), // wrong length
+    });
+    expect(out).not.toContain('🔋');
+    expect(out).toContain('ACCOUNT');
+  });
+});
+describe('handleAuthCommand — keyboard attachment', () => {
+  function makeCtx(overrides: Partial<AuthCommandContext> = {}): AuthCommandContext {
+    return {
+      agentName: 'carrie',
+      isAdmin: true,
+      client: mockClient(),
+      chatId: 'chat-1',
+      ...overrides,
+    };
+  }
+  it('attaches NO keyboard when liveQuotas is omitted (legacy callers)', async () => {
+    const reply = await handleAuthCommand({ kind: 'show' }, makeCtx());
+    expect(reply.keyboard).toBeUndefined();
+    expect(reply.text).toContain('ACCOUNT'); // legacy table
+  });
+  it('attaches a smart keyboard when liveQuotas yields one result per account', async () => {
+    const reply = await handleAuthCommand(
+      { kind: 'show' },
+      makeCtx({ liveQuotas: async () => FIXTURE_QUOTAS, tz: 'UTC' }),
+    );
+    expect(reply.keyboard).toBeDefined();
+    const allButtonText = reply.keyboard!.flat().map((b) => b.text);
+    // Switch button should exist for ken@x (healthy, not active)
+    expect(allButtonText).toContain('Switch fleet → ken@x');
+    // me@x is blocked — must NOT appear as a switch target
+    expect(allButtonText).not.toContain('Switch fleet → me@x');
+    // Bottom row hardware
+    expect(allButtonText).toContain('↻ Refresh');
+    expect(allButtonText).toContain('/usage');
+    expect(allButtonText).toContain('+ Add');
+  });
+  it('attaches no keyboard when the live probe throws (graceful degrade)', async () => {
+    const reply = await handleAuthCommand(
+      { kind: 'show' },
+      makeCtx({
+        liveQuotas: async () => {
+          throw new Error('network down');
+        },
+      }),
+    );
+    expect(reply.keyboard).toBeUndefined();
+    expect(reply.text).toContain('ACCOUNT'); // legacy table fallback
+  });
+});