npm - switchroom - Versions diffs - 0.14.0 → 0.14.1 - Mend

switchroom 0.14.0 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/dist/auth-broker/index.js +16 -1
package/dist/cli/switchroom.js +1082 -873
package/dist/host-control/main.js +1 -1
package/package.json +1 -1
package/profiles/_shared/telegram-style.md.hbs +1 -1
package/telegram-plugin/auth-snapshot-format.ts +47 -1
package/telegram-plugin/dist/gateway/gateway.js +967 -542
package/telegram-plugin/gateway/boot-card.ts +100 -0
package/telegram-plugin/gateway/config-snapshot.ts +274 -0
package/telegram-plugin/gateway/gateway.ts +221 -14
package/telegram-plugin/operator-events.ts +2 -10
package/telegram-plugin/quota-watch.ts +276 -0
package/telegram-plugin/tests/auth-snapshot-format.test.ts +133 -1
package/telegram-plugin/tests/boot-card-render.test.ts +93 -0
package/telegram-plugin/tests/config-snapshot.test.ts +409 -0
package/telegram-plugin/tests/operator-events.test.ts +12 -6
package/telegram-plugin/tests/quota-watch.test.ts +366 -0
package/telegram-plugin/tests/turn-flush-safety.test.ts +48 -0
package/telegram-plugin/turn-flush-safety.ts +47 -0
package/telegram-plugin/uat/assertions.ts +4 -4

package/telegram-plugin/tests/quota-watch.test.ts ADDED Viewed

@@ -0,0 +1,366 @@
+/**
+ * Unit tests for the proactive quota threshold-tier push helper (#E4).
+ * Mirrors the shape of credits-watch.test.ts. Covers:
+ *   - Pure decision logic across all transition-table cases
+ *   - State persistence round-trip
+ *   - Message body content sanity
+ */
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import { mkdtempSync, rmSync, writeFileSync, mkdirSync } from "fs";
+import { tmpdir } from "os";
+import { join } from "path";
+import {
+  evaluateQuotaWatchAccount,
+  loadQuotaWatchState,
+  saveQuotaWatchState,
+  patchQuotaWatchState,
+  emptyQuotaWatchState,
+  emptyAccountState,
+} from "../quota-watch.js";
+import type { AccountSnapshot } from "../auth-snapshot-format.js";
+import type { QuotaUtilization } from "../quota-check.js";
+// ── test fixtures ────────────────────────────────────────────────────────────
+const NOW = 1_780_000_000_000;
+/** Build a minimal QuotaUtilization at given utilization percentages. */
+function makeQuota(
+  fivePct: number,
+  sevenPct: number,
+  fiveHourResetAt?: Date,
+  sevenDayResetAt?: Date,
+): QuotaUtilization {
+  return {
+    fiveHourUtilizationPct: fivePct,
+    sevenDayUtilizationPct: sevenPct,
+    fiveHourResetAt: fiveHourResetAt ?? null,
+    sevenDayResetAt: sevenDayResetAt ?? null,
+    representativeClaim: null,
+    overageStatus: null,
+    overageDisabledReason: null,
+  };
+}
+/** Build an AccountSnapshot with given quota. */
+function makeSnap(
+  label: string,
+  quota: QuotaUtilization | null,
+  isActive = false,
+): AccountSnapshot {
+  return { label, isActive, quota };
+}
+const HEALTHY_SNAP = makeSnap("alice@example.com", makeQuota(30, 40));
+const THROTTLING_5H = makeSnap("alice@example.com", makeQuota(85, 40));
+const THROTTLING_7D = makeSnap("alice@example.com", makeQuota(40, 90));
+const BLOCKED_SNAP = makeSnap("alice@example.com", makeQuota(99.9, 99.9));
+const UNKNOWN_SNAP = makeSnap("alice@example.com", null);
+const PREV_NEVER_NOTIFIED = emptyAccountState(); // lastNotifiedHealth: null
+const PREV_WAS_HEALTHY = { lastNotifiedHealth: "healthy" as const, lastNotifiedAt: NOW - 1000 };
+const PREV_WAS_THROTTLING = { lastNotifiedHealth: "throttling" as const, lastNotifiedAt: NOW - 1000 };
+// ── transition decision tests ────────────────────────────────────────────────
+describe("evaluateQuotaWatchAccount — transition table", () => {
+  it("healthy → healthy (never notified) skips", () => {
+    const d = evaluateQuotaWatchAccount({
+      agentName: "lawgpt",
+      snap: HEALTHY_SNAP,
+      prev: PREV_NEVER_NOTIFIED,
+      now: NOW,
+    });
+    expect(d.kind).toBe("skip");
+    if (d.kind !== "skip") return;
+    expect(d.reason).toBe("steady-state");
+  });
+  it("healthy → healthy (was healthy) skips", () => {
+    const d = evaluateQuotaWatchAccount({
+      agentName: "lawgpt",
+      snap: HEALTHY_SNAP,
+      prev: PREV_WAS_HEALTHY,
+      now: NOW,
+    });
+    expect(d.kind).toBe("skip");
+  });
+  it("healthy → throttling (5h) fires entered-throttling notification", () => {
+    const d = evaluateQuotaWatchAccount({
+      agentName: "lawgpt",
+      snap: THROTTLING_5H,
+      prev: PREV_NEVER_NOTIFIED,
+      now: NOW,
+    });
+    expect(d.kind).toBe("notify");
+    if (d.kind !== "notify") return;
+    expect(d.transition).toBe("entered-throttling");
+    expect(d.newAccountState.lastNotifiedHealth).toBe("throttling");
+    expect(d.newAccountState.lastNotifiedAt).toBe(NOW);
+  });
+  it("healthy → throttling (7d) fires entered-throttling notification", () => {
+    const d = evaluateQuotaWatchAccount({
+      agentName: "lawgpt",
+      snap: THROTTLING_7D,
+      prev: PREV_NEVER_NOTIFIED,
+      now: NOW,
+    });
+    expect(d.kind).toBe("notify");
+    if (d.kind !== "notify") return;
+    expect(d.transition).toBe("entered-throttling");
+  });
+  it("throttling → throttling skips (already notified)", () => {
+    const d = evaluateQuotaWatchAccount({
+      agentName: "lawgpt",
+      snap: THROTTLING_5H,
+      prev: PREV_WAS_THROTTLING,
+      now: NOW,
+    });
+    expect(d.kind).toBe("skip");
+    if (d.kind !== "skip") return;
+    expect(d.reason).toBe("steady-state");
+  });
+  it("throttling → healthy fires recovered-to-healthy notification", () => {
+    const d = evaluateQuotaWatchAccount({
+      agentName: "lawgpt",
+      snap: HEALTHY_SNAP,
+      prev: PREV_WAS_THROTTLING,
+      now: NOW,
+    });
+    expect(d.kind).toBe("notify");
+    if (d.kind !== "notify") return;
+    expect(d.transition).toBe("recovered-to-healthy");
+    expect(d.newAccountState.lastNotifiedHealth).toBe("healthy");
+    expect(d.newAccountState.lastNotifiedAt).toBe(NOW);
+  });
+  it("* → blocked skips (credits-watch domain)", () => {
+    const dFromHealthy = evaluateQuotaWatchAccount({
+      agentName: "lawgpt",
+      snap: BLOCKED_SNAP,
+      prev: PREV_NEVER_NOTIFIED,
+      now: NOW,
+    });
+    expect(dFromHealthy.kind).toBe("skip");
+    if (dFromHealthy.kind !== "skip") return;
+    expect(dFromHealthy.reason).toBe("blocked-not-our-domain");
+  });
+  it("blocked → healthy skips (credits-watch domain)", () => {
+    // blocked → healthy: credits-watch handles the blocked recovery path.
+    // Our watcher should not fire for it (we never tracked 'blocked').
+    const d = evaluateQuotaWatchAccount({
+      agentName: "lawgpt",
+      snap: HEALTHY_SNAP,
+      prev: PREV_NEVER_NOTIFIED, // we were never tracking as throttling
+      now: NOW,
+    });
+    // healthy → healthy from null-prev should skip, not fire
+    expect(d.kind).toBe("skip");
+  });
+  it("unknown quota snap skips (probe failed)", () => {
+    const d = evaluateQuotaWatchAccount({
+      agentName: "lawgpt",
+      snap: UNKNOWN_SNAP,
+      prev: PREV_WAS_THROTTLING,
+      now: NOW,
+    });
+    expect(d.kind).toBe("skip");
+    if (d.kind !== "skip") return;
+    expect(d.reason).toBe("unknown-not-our-domain");
+  });
+  it("no duplicate: two consecutive polls in throttling state produce one notify then skip", () => {
+    // First poll: healthy → throttling → notify
+    const d1 = evaluateQuotaWatchAccount({
+      agentName: "lawgpt",
+      snap: THROTTLING_5H,
+      prev: PREV_NEVER_NOTIFIED,
+      now: NOW,
+    });
+    expect(d1.kind).toBe("notify");
+    if (d1.kind !== "notify") return;
+    // Second poll: throttling → throttling → skip
+    const d2 = evaluateQuotaWatchAccount({
+      agentName: "lawgpt",
+      snap: THROTTLING_5H,
+      prev: d1.newAccountState,
+      now: NOW + 15 * 60_000,
+    });
+    expect(d2.kind).toBe("skip");
+  });
+});
+// ── message content tests ────────────────────────────────────────────────────
+describe("evaluateQuotaWatchAccount — message content", () => {
+  it("throttling message contains account label and percentages", () => {
+    const d = evaluateQuotaWatchAccount({
+      agentName: "lawgpt",
+      snap: THROTTLING_5H,
+      prev: PREV_NEVER_NOTIFIED,
+      now: NOW,
+    });
+    expect(d.kind).toBe("notify");
+    if (d.kind !== "notify") return;
+    expect(d.message).toContain("alice@example.com");
+    expect(d.message).toContain("85%");
+    expect(d.message).toContain("40%");
+    expect(d.message).toContain("5-hour");
+  });
+  it("recovery message contains account label and percentages", () => {
+    const d = evaluateQuotaWatchAccount({
+      agentName: "lawgpt",
+      snap: HEALTHY_SNAP,
+      prev: PREV_WAS_THROTTLING,
+      now: NOW,
+    });
+    expect(d.kind).toBe("notify");
+    if (d.kind !== "notify") return;
+    expect(d.message).toContain("alice@example.com");
+    expect(d.message).toContain("Quota back in healthy range");
+    expect(d.message).toContain("30%");
+  });
+  it("throttling message HTML-escapes account label", () => {
+    const d = evaluateQuotaWatchAccount({
+      agentName: "lawgpt",
+      snap: makeSnap("<evil>@example.com", makeQuota(85, 40)),
+      prev: PREV_NEVER_NOTIFIED,
+      now: NOW,
+    });
+    expect(d.kind).toBe("notify");
+    if (d.kind !== "notify") return;
+    expect(d.message).toContain("&lt;evil&gt;");
+    expect(d.message).not.toContain("<evil>");
+  });
+  it("throttling message for active account mentions /auth use", () => {
+    const activeSnap = makeSnap("alice@example.com", makeQuota(85, 40), /* isActive */ true);
+    const d = evaluateQuotaWatchAccount({
+      agentName: "lawgpt",
+      snap: activeSnap,
+      prev: PREV_NEVER_NOTIFIED,
+      now: NOW,
+    });
+    expect(d.kind).toBe("notify");
+    if (d.kind !== "notify") return;
+    // Active account message should mention switching
+    expect(d.message).toContain("/auth");
+  });
+  it("throttling message includes reset time when provided", () => {
+    const resetAt = new Date(NOW + 3 * 60 * 60_000); // 3 hours from now
+    const d = evaluateQuotaWatchAccount({
+      agentName: "lawgpt",
+      snap: makeSnap("alice@example.com", makeQuota(85, 40, resetAt)),
+      prev: PREV_NEVER_NOTIFIED,
+      now: NOW,
+    });
+    expect(d.kind).toBe("notify");
+    if (d.kind !== "notify") return;
+    expect(d.message).toContain("refills in");
+  });
+});
+// ── state persistence tests ──────────────────────────────────────────────────
+describe("loadQuotaWatchState / saveQuotaWatchState — round-trip", () => {
+  let tmp: string;
+  beforeEach(() => {
+    tmp = mkdtempSync(join(tmpdir(), "quota-watch-"));
+  });
+  afterEach(() => {
+    rmSync(tmp, { recursive: true, force: true });
+  });
+  it("returns emptyQuotaWatchState when no file exists", () => {
+    expect(loadQuotaWatchState(tmp)).toEqual(emptyQuotaWatchState());
+  });
+  it("round-trips a saved state with multiple accounts", () => {
+    const state = {
+      "alice@example.com": { lastNotifiedHealth: "throttling" as const, lastNotifiedAt: 1_780_000_000_000 },
+      "bob@example.com": { lastNotifiedHealth: null, lastNotifiedAt: 0 },
+    };
+    saveQuotaWatchState(tmp, state);
+    expect(loadQuotaWatchState(tmp)).toEqual(state);
+  });
+  it("falls back to empty on malformed JSON", () => {
+    mkdirSync(tmp, { recursive: true });
+    writeFileSync(join(tmp, "quota-watch.json"), "{broken");
+    expect(loadQuotaWatchState(tmp)).toEqual(emptyQuotaWatchState());
+  });
+  it("falls back to empty on shape mismatch (not an object)", () => {
+    writeFileSync(join(tmp, "quota-watch.json"), JSON.stringify([1, 2, 3]));
+    expect(loadQuotaWatchState(tmp)).toEqual(emptyQuotaWatchState());
+  });
+  it("drops malformed entries but preserves valid ones", () => {
+    writeFileSync(
+      join(tmp, "quota-watch.json"),
+      JSON.stringify({
+        "good@example.com": { lastNotifiedHealth: "throttling", lastNotifiedAt: 1000 },
+        "bad@example.com": { lastNotifiedHealth: "invalid", lastNotifiedAt: "not-a-number" },
+      }),
+    );
+    const loaded = loadQuotaWatchState(tmp);
+    expect(loaded["good@example.com"]).toEqual({ lastNotifiedHealth: "throttling", lastNotifiedAt: 1000 });
+    expect(loaded["bad@example.com"]).toBeUndefined();
+  });
+  it("creates the state dir on save (if it doesn't exist yet)", () => {
+    const fresh = join(tmp, "fresh-subdir");
+    saveQuotaWatchState(fresh, {});
+    expect(loadQuotaWatchState(fresh)).toEqual({});
+  });
+});
+// ── patchQuotaWatchState tests ────────────────────────────────────────────────
+describe("patchQuotaWatchState", () => {
+  it("adds a new account entry without clobbering others", () => {
+    const current: ReturnType<typeof emptyQuotaWatchState> = {
+      "alice@example.com": { lastNotifiedHealth: "throttling", lastNotifiedAt: 1000 },
+    };
+    const updated = patchQuotaWatchState(
+      current,
+      "bob@example.com",
+      { lastNotifiedHealth: "healthy", lastNotifiedAt: 2000 },
+    );
+    expect(updated["alice@example.com"]).toEqual({ lastNotifiedHealth: "throttling", lastNotifiedAt: 1000 });
+    expect(updated["bob@example.com"]).toEqual({ lastNotifiedHealth: "healthy", lastNotifiedAt: 2000 });
+  });
+  it("updates an existing account entry", () => {
+    const current = {
+      "alice@example.com": { lastNotifiedHealth: "throttling" as const, lastNotifiedAt: 1000 },
+    };
+    const updated = patchQuotaWatchState(
+      current,
+      "alice@example.com",
+      { lastNotifiedHealth: "healthy", lastNotifiedAt: 2000 },
+    );
+    expect(updated["alice@example.com"]).toEqual({ lastNotifiedHealth: "healthy", lastNotifiedAt: 2000 });
+  });
+  it("does not mutate the original state object", () => {
+    const current = {
+      "alice@example.com": { lastNotifiedHealth: "throttling" as const, lastNotifiedAt: 1000 },
+    };
+    patchQuotaWatchState(current, "bob@example.com", { lastNotifiedHealth: null, lastNotifiedAt: 0 });
+    expect(current["bob@example.com"]).toBeUndefined();
+  });
+});

package/telegram-plugin/tests/turn-flush-safety.test.ts CHANGED Viewed

@@ -17,9 +17,57 @@ import { describe, it, expect } from 'vitest'
 import {
   decideTurnFlush,
   isSilentFlushMarker,
+  isCompositeSilentNoise,
   isTurnFlushSafetyEnabled,
 } from '../turn-flush-safety.js'
+describe('isCompositeSilentNoise — Stop-hook re-prompt leak backstop', () => {
+  it('suppresses the observed leak "Sent.\\nNO_REPLY\\nNO_REPLY"', () => {
+    expect(isCompositeSilentNoise('Sent.\nNO_REPLY\nNO_REPLY')).toBe(true)
+  })
+  it('suppresses repeated bare markers and marker+confirmation variants', () => {
+    expect(isCompositeSilentNoise('NO_REPLY\nNO_REPLY')).toBe(true)
+    expect(isCompositeSilentNoise('Done\nNO_REPLY')).toBe(true)
+    expect(isCompositeSilentNoise('NO_REPLY\nHEARTBEAT_OK')).toBe(true)
+  })
+  it('requires at least one real marker — standalone confirmations still flush', () => {
+    // Conservative: no NO_REPLY/HEARTBEAT_OK present → NOT suppressed here,
+    // so we never silently drop a turn that wasn\'t already signalling silence.
+    expect(isCompositeSilentNoise('Sent.')).toBe(false)
+    expect(isCompositeSilentNoise('Done.\nOK')).toBe(false)
+  })
+  it('does NOT suppress real content glued to a marker', () => {
+    expect(
+      isCompositeSilentNoise('Here is the summary of the page.\nNO_REPLY'),
+    ).toBe(false)
+    expect(isCompositeSilentNoise('NO_REPLY\nThe answer is 42.')).toBe(false)
+  })
+  it('handles non-strings / empty safely', () => {
+    expect(isCompositeSilentNoise(undefined)).toBe(false)
+    expect(isCompositeSilentNoise('')).toBe(false)
+    expect(isCompositeSilentNoise('   \n  ')).toBe(false)
+  })
+})
+describe('decideTurnFlush — composite silent noise is skipped, not leaked', () => {
+  it('skips "Sent.\\nNO_REPLY\\nNO_REPLY" (the live clerk/test-harness leak)', () => {
+    const d = decideTurnFlush({
+      chatId: '12345',
+      replyCalled: false,
+      capturedText: ['Sent.', 'NO_REPLY', 'NO_REPLY'],
+    })
+    expect(d).toEqual({ kind: 'skip', reason: 'silent-marker' })
+  })
+  it('still flushes genuine trailing answer text', () => {
+    const d = decideTurnFlush({
+      chatId: '12345',
+      replyCalled: false,
+      capturedText: ['The page summarises three news stories.'],
+    })
+    expect(d.kind).toBe('flush')
+  })
+})
 describe('decideTurnFlush', () => {
   it('(a) does NOT flush when the reply tool was called', () => {
     const decision = decideTurnFlush({

package/telegram-plugin/turn-flush-safety.ts CHANGED Viewed

@@ -50,6 +50,48 @@ export function isSilentFlushMarker(text: string | undefined): boolean {
   return SILENT_MARKERS.has(trimmed.toUpperCase())
 }
+// Trivial end-of-turn confirmations the model emits as terminal text after
+// calling reply (e.g. "Sent." once the reply tool returns). On their own
+// they're harmless; the danger is when they're glued to silent markers
+// across Stop-hook re-prompt cycles into a composite blob like
+// "Sent.\nNO_REPLY\nNO_REPLY" — which `isSilentFlushMarker` can't match
+// (multi-line, over the length guard) so it leaks to chat. See
+// `isCompositeSilentNoise`.
+const TRIVIAL_CONFIRMATIONS = new Set(['SENT', 'DONE', 'OK', 'OKAY', 'ACK'])
+function isTrivialConfirmationLine(line: string): boolean {
+  let t = line.trim()
+  if (t.length === 0 || t.length > 8) return false
+  if (/\W$/.test(t)) t = t.slice(0, -1) // strip a single trailing punct ("Sent.")
+  return TRIVIAL_CONFIRMATIONS.has(t.toUpperCase())
+}
+/**
+ * Recognise a multi-line composite that is *entirely* silent noise — every
+ * non-empty line is a silent marker (NO_REPLY / HEARTBEAT_OK) or a trivial
+ * confirmation ("Sent."), AND at least one line is a real silent marker.
+ *
+ * Backstop for the Stop-hook re-prompt leak: the model replies cleanly,
+ * emits a terminal "Sent.", gets re-prompted by the silent-end Stop hook,
+ * answers "NO_REPLY" one or more times, and the accumulated `capturedText`
+ * ("Sent.\nNO_REPLY\nNO_REPLY") flushes as a visible message because
+ * `isSilentFlushMarker` only matches a single sentinel. Requiring ≥1 hard
+ * marker keeps this conservative — a standalone "Sent." (no NO_REPLY) is NOT
+ * suppressed here, so we never silently drop a turn that wasn't already
+ * signalling "nothing to add".
+ */
+export function isCompositeSilentNoise(text: string | undefined): boolean {
+  if (typeof text !== 'string') return false
+  const lines = text
+    .split('\n')
+    .map(l => l.trim())
+    .filter(l => l.length > 0)
+  if (lines.length === 0) return false
+  const hasMarker = lines.some(l => isSilentFlushMarker(l))
+  if (!hasMarker) return false
+  return lines.every(l => isSilentFlushMarker(l) || isTrivialConfirmationLine(l))
+}
 export type FlushDecision =
   | { kind: 'flush'; text: string }
   | { kind: 'skip'; reason: FlushSkipReason }
@@ -115,6 +157,11 @@ export function decideTurnFlush(input: FlushDecisionInput): FlushDecision {
   const joined = input.capturedText.join('\n').trim()
   if (joined.length === 0) return { kind: 'skip', reason: 'empty-text' }
   if (isSilentFlushMarker(joined)) return { kind: 'skip', reason: 'silent-marker' }
+  // Composite silent noise — e.g. "Sent.\nNO_REPLY\nNO_REPLY" accumulated
+  // across Stop-hook re-prompt cycles. The single-sentinel check above
+  // misses it (multi-line, over the length guard); without this the blob
+  // leaks to chat as a visible message.
+  if (isCompositeSilentNoise(joined)) return { kind: 'skip', reason: 'silent-marker' }
   return { kind: 'flush', text: joined }
 }

package/telegram-plugin/uat/assertions.ts CHANGED Viewed

@@ -337,10 +337,10 @@ export async function waitForCardPhase(
 /**
  * Detect the progress card's phase from its rendered text.
  *
- * The actual card render (telegram-plugin/progress-card.ts) uses
- * emoji markers in the header: `✅` for done, `❌` for errors, `⚙️`
- * while working (foreground), `🌀` for Background (parent done but
- * fleet still running, see #862 / status-card-design.md §Header),
+ * The actual card render uses emoji markers in the header: `✅` for
+ * done, `❌` for errors, `⚙️` while working (foreground), `🌀` for
+ * Background (parent done but fleet still running, see #862 /
+ * reference/conversational-pacing.md),
  * and `⏳` during the boot-card window. These markers are stable
  * enough to key on for UAT — finer parsing (checklist items,
  * sub-agent row content) is out of scope.