npm - switchroom - Versions diffs - 0.14.18 → 0.14.20 - Mend

switchroom 0.14.18 → 0.14.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/dist/agent-scheduler/index.js +6 -1
package/dist/auth-broker/index.js +6 -1
package/dist/cli/notion-write-pretool.mjs +6 -1
package/dist/cli/switchroom.js +48 -3
package/dist/host-control/main.js +6 -1
package/dist/vault/approvals/kernel-server.js +6 -1
package/dist/vault/broker/server.js +6 -1
package/package.json +1 -1
package/telegram-plugin/README.md +7 -3
package/telegram-plugin/bridge/bridge.ts +1 -1
package/telegram-plugin/dist/bridge/bridge.js +1 -1
package/telegram-plugin/dist/gateway/gateway.js +381 -153
package/telegram-plugin/dist/server.js +1 -1
package/telegram-plugin/gateway/coalesce-attachments.ts +70 -0
package/telegram-plugin/gateway/gateway.ts +296 -37
package/telegram-plugin/gateway/interrupt-defer.ts +100 -0
package/telegram-plugin/gateway/pending-inbound-buffer.ts +21 -4
package/telegram-plugin/status-reactions.ts +18 -0
package/telegram-plugin/tests/coalesce-attachments.test.ts +152 -0
package/telegram-plugin/tests/interrupt-defer.test.ts +147 -0
package/telegram-plugin/tests/pending-inbound-buffer.test.ts +36 -0
package/telegram-plugin/tests/status-reactions.test.ts +69 -0
package/telegram-plugin/tests/worker-activity-feed.test.ts +113 -0
package/telegram-plugin/uat/scenarios/jtbd-forwarded-burst-dm.test.ts +158 -0
package/telegram-plugin/worker-activity-feed.ts +54 -4

package/telegram-plugin/tests/worker-activity-feed.test.ts CHANGED Viewed

@@ -92,6 +92,42 @@ describe('renderWorkerActivity', () => {
     expect(out).toContain('⚠️ <b>Worker failed</b>')
   })
+  it('grows a narrative block when narrativeLines is present', () => {
+    const out = renderWorkerActivity(
+      view({
+        latestSummary: 'newest only — should be ignored',
+        narrativeLines: ['read the brief', 'scanned vendor A', 'scanned vendor B'],
+      }),
+    )
+    expect(out).toContain('↳ <i>read the brief</i>')
+    expect(out).toContain('↳ <i>scanned vendor A</i>')
+    expect(out).toContain('↳ <i>scanned vendor B</i>')
+    // The single-line latestSummary fallback is NOT used when a block is present.
+    expect(out).not.toContain('newest only')
+    // Three narrative lines → three ↳ lines.
+    expect(out.match(/↳/g) ?? []).toHaveLength(3)
+  })
+  it('falls back to latestSummary when narrativeLines is empty', () => {
+    const out = renderWorkerActivity(view({ narrativeLines: [], latestSummary: 'one line' }))
+    expect(out).toContain('↳ <i>one line</i>')
+    expect(out.match(/↳/g) ?? []).toHaveLength(1)
+  })
+  it('drops blank narrative lines from the block', () => {
+    const out = renderWorkerActivity(
+      view({ narrativeLines: ['kept', '   ', 'also kept'] }),
+    )
+    expect(out).toContain('↳ <i>kept</i>')
+    expect(out).toContain('↳ <i>also kept</i>')
+    expect(out.match(/↳/g) ?? []).toHaveLength(2)
+  })
+  it('escapes HTML inside narrative lines', () => {
+    const out = renderWorkerActivity(view({ narrativeLines: ['a <b>x</b> & y'] }))
+    expect(out).toContain('a &lt;b&gt;x&lt;/b&gt; &amp; y')
+  })
   it('escapes HTML in description, tool, arg, and summary', () => {
     const out = renderWorkerActivity(
       view({
@@ -246,6 +282,83 @@ describe('createWorkerActivityFeed', () => {
     expect(feed.size).toBe(0)
   })
+  it('accumulates distinct narrative lines into a growing block across ticks', async () => {
+    const bot = makeFakeBot()
+    let clock = 10_000
+    const feed = createWorkerActivityFeed({ bot, now: () => clock, minEditIntervalMs: 0 })
+    await feed.update('w1', 'chat', view({ toolCount: 1, latestSummary: 'read the brief' }))
+    expect(bot.sent).toHaveLength(1)
+    expect(bot.sent[0].text).toContain('↳ <i>read the brief</i>')
+    clock = 11_000
+    await feed.update('w1', 'chat', view({ toolCount: 2, latestSummary: 'scanned vendor A' }))
+    clock = 12_000
+    await feed.update('w1', 'chat', view({ toolCount: 3, latestSummary: 'scanned vendor B' }))
+    const last = bot.edits.at(-1)!
+    expect(last.text).toContain('↳ <i>read the brief</i>')
+    expect(last.text).toContain('↳ <i>scanned vendor A</i>')
+    expect(last.text).toContain('↳ <i>scanned vendor B</i>')
+    expect(last.text.match(/↳/g) ?? []).toHaveLength(3)
+  })
+  it('dedups a repeated narrative line so the block does not duplicate', async () => {
+    const bot = makeFakeBot()
+    let clock = 10_000
+    const feed = createWorkerActivityFeed({ bot, now: () => clock, minEditIntervalMs: 0 })
+    await feed.update('w1', 'chat', view({ toolCount: 1, latestSummary: 'same line' }))
+    // Repeated narrative but a changed tool count → body differs, edit fires,
+    // but the narrative block must not gain a duplicate line.
+    clock = 11_000
+    await feed.update('w1', 'chat', view({ toolCount: 2, latestSummary: 'same line' }))
+    const last = bot.edits.at(-1)!
+    expect(last.text.match(/↳/g) ?? []).toHaveLength(1)
+  })
+  it('caps the narrative block to the last 6 lines', async () => {
+    const bot = makeFakeBot()
+    let clock = 10_000
+    const feed = createWorkerActivityFeed({ bot, now: () => clock, minEditIntervalMs: 0 })
+    for (let i = 1; i <= 9; i++) {
+      clock += 1000
+      await feed.update('w1', 'chat', view({ toolCount: i, latestSummary: `line ${i}` }))
+    }
+    const last = bot.edits.at(-1)!
+    expect(last.text.match(/↳/g) ?? []).toHaveLength(6)
+    // Oldest lines evicted; newest retained.
+    expect(last.text).not.toContain('line 1')
+    expect(last.text).not.toContain('line 3')
+    expect(last.text).toContain('line 4')
+    expect(last.text).toContain('line 9')
+  })
+  it('grows the narrative even while throttled (line surfaces on next edit)', async () => {
+    const bot = makeFakeBot()
+    let clock = 10_000
+    const feed = createWorkerActivityFeed({ bot, now: () => clock, minEditIntervalMs: 2500 })
+    await feed.update('w1', 'chat', view({ toolCount: 1, latestSummary: 'line A' }))
+    expect(bot.sent).toHaveLength(1)
+    // Throttled tick — no edit, but the line must still be accumulated.
+    clock = 11_000
+    await feed.update('w1', 'chat', view({ toolCount: 2, latestSummary: 'line B' }))
+    expect(bot.edits).toHaveLength(0)
+    // Past the throttle — the edit now carries BOTH lines.
+    clock = 13_000
+    await feed.update('w1', 'chat', view({ toolCount: 3, latestSummary: 'line C' }))
+    const last = bot.edits.at(-1)!
+    expect(last.text).toContain('↳ <i>line A</i>')
+    expect(last.text).toContain('↳ <i>line B</i>')
+    expect(last.text).toContain('↳ <i>line C</i>')
+  })
   it('forwards threadId as message_thread_id on send', async () => {
     const bot = makeFakeBot()
     let clock = 10_000

package/telegram-plugin/uat/scenarios/jtbd-forwarded-burst-dm.test.ts ADDED Viewed

@@ -0,0 +1,158 @@
+/**
+ * JTBD scenario — forwarded burst / split paste coalesces into ONE turn.
+ *
+ * Serves: `reference/steer-or-queue-mid-flight.md` — the "Forwarded
+ * burst / split paste" UAT prompt. When several messages land in quick
+ * succession from the same sender (a forward of 3-4 messages, or a long
+ * paste Telegram split into chunks), inbound coalescing must merge them
+ * into a SINGLE Claude turn with shared context — not reply to each
+ * fragment in isolation.
+ *
+ * This is the end-to-end gate for the A1 coalescing work shipped in
+ * v0.14.18 (#2007). The merge logic itself is covered by unit + fuzz
+ * tests (`inbound-coalesce.test.ts`, `pending-inbound-buffer.test.ts`),
+ * but only this scenario exercises the real inbound → gateway coalescer
+ * → claude → outbound path over a live Telegram chat.
+ *
+ * ## How the signal is constructed
+ *
+ * Naively asking the agent to "combine facts from several messages"
+ * does NOT distinguish coalesced from fanned-out: even when each
+ * message becomes its own turn, every later turn carries the PRIOR
+ * turns in its conversation history, so the model could still answer
+ * from history. History bleed makes a content-combination assertion
+ * useless as a coalescing probe.
+ *
+ * The distinguishing fact is whether a SINGLE turn saw all the parts in
+ * ONE incoming message. So we anchor the instruction on "this single
+ * message": three messages are fired near-simultaneously (so they land
+ * inside the default 500ms coalesce window), each carrying a distinct
+ * code token, and the instruction (in the last part) asks the agent to
+ * echo every token it received *in this one incoming message*.
+ *
+ *   - Coalesced  → the merged turn's single message contains ALPHA,
+ *     BRAVO and CHARLIE → the reply names all three.
+ *   - Fanned out → the message carrying the instruction contains only
+ *     its own token → the reply names just that one. (And the other
+ *     two tokens arrive as their own separate turns.)
+ *
+ * Tokens are deliberately odd uppercase strings so a substring match is
+ * unambiguous and won't collide with incidental words in the reply.
+ *
+ * Order-independence: the three sends are dispatched concurrently to
+ * guarantee they share a coalesce window, which means Telegram may
+ * deliver them in any order. We assert SET membership (all three
+ * present), never order.
+ */
+import { describe, it, expect } from "vitest";
+import { spinUp } from "../harness.js";
+import { pollUntil } from "../assertions.js";
+import type { ObservedMessage } from "../driver.js";
+const AGENT = "test-harness";
+// Distinctive code tokens — unlikely to appear incidentally in a reply.
+const TOKENS = ["ALPHA", "BRAVO", "CHARLIE"] as const;
+const BURST: string[] = [
+  `BURST-PROBE part 1 of 3. Code token: ${TOKENS[0]}.`,
+  `BURST-PROBE part 2 of 3. Code token: ${TOKENS[1]}.`,
+  `BURST-PROBE part 3 of 3. Code token: ${TOKENS[2]}. ` +
+    `Reply with ONLY the BURST-PROBE code tokens contained in THIS SINGLE ` +
+    `incoming message, slash-separated. If this message contains just one ` +
+    `token, reply with only that token.`,
+];
+// Generous budget: TTFO on the test-harness is ~7s warm; coalescing
+// adds the (sub-second) window plus normal model latency.
+const ANSWER_TIMEOUT_MS = 40_000;
+function tokensIn(text: string): string[] {
+  const upper = text.toUpperCase();
+  return TOKENS.filter((t) => upper.includes(t));
+}
+describe("uat: forwarded burst / split paste coalesces into one turn", () => {
+  it(
+    "a 3-message burst is answered as ONE shared-context turn",
+    async () => {
+      const sc = await spinUp({ agent: AGENT });
+      try {
+        // Start observing BEFORE the burst — observeMessages only sees
+        // live updates, not history. Drain into a per-message-id map so
+        // streamed edits collapse to the message's latest text.
+        const latestById = new Map<number, ObservedMessage>();
+        const stream = sc.driver.observeMessages(sc.botUserId);
+        const consume = (async () => {
+          for await (const m of stream) {
+            if (m.senderUserId === sc.driverUserId) continue; // skip our own sends
+            latestById.set(m.messageId, m);
+          }
+        })();
+        // Fire all three concurrently so they share one coalesce window.
+        // Concurrency (not serial awaits) is what keeps inter-arrival
+        // under the 500ms default — three serial round-trips could blow
+        // the window on a slow link.
+        await Promise.all(BURST.map((t) => sc.sendDM(t)));
+        // Wait until a single bot message names all three tokens — the
+        // proof that one turn saw the whole burst in one incoming
+        // message.
+        const allThree = await pollUntil(
+          () => {
+            for (const m of latestById.values()) {
+              if (tokensIn(m.text).length === TOKENS.length) return m;
+            }
+            return undefined;
+          },
+          { timeout: ANSWER_TIMEOUT_MS, interval: 500 },
+        ).catch(() => undefined);
+        // Close the observer stream.
+        await stream[Symbol.asyncIterator]().return?.(undefined as never);
+        await consume;
+        const botMsgs = [...latestById.values()];
+        const tokenBearing = botMsgs.filter((m) => tokensIn(m.text).length > 0);
+        if (!allThree) {
+          const seen = tokenBearing
+            .map((m) => `#${m.messageId}=[${tokensIn(m.text).join(",")}]`)
+            .join(" ");
+          throw new Error(
+            `[forwarded-burst] No single bot reply named all of ` +
+              `${TOKENS.join("/")}. This is the coalescing regression: the ` +
+              `burst fanned out into separate turns so no turn saw the full ` +
+              `message. Token-bearing replies: ${seen || "(none)"}.`,
+          );
+        }
+        expect(tokensIn(allThree.text).sort()).toEqual([...TOKENS].sort());
+        // Forensic: a coalesced burst should produce ONE answer that
+        // names the tokens. Several token-bearing replies hint at a
+        // partial fan-out even though one of them happened to be
+        // complete — worth a warning before it becomes a hard failure.
+        if (tokenBearing.length > 1) {
+          console.warn(
+            `[forwarded-burst] ${tokenBearing.length} token-bearing replies ` +
+              `observed (expected 1). Possible partial fan-out: ` +
+              tokenBearing
+                .map((m) => `#${m.messageId}=[${tokensIn(m.text).join(",")}]`)
+                .join(" "),
+          );
+        } else {
+          console.log(
+            `[forwarded-burst] One shared-context reply named all tokens ` +
+              `(#${allThree.messageId}). Coalescing healthy.`,
+          );
+        }
+      } finally {
+        await sc.tearDown();
+      }
+    },
+    ANSWER_TIMEOUT_MS + 20_000,
+  );
+});

package/telegram-plugin/worker-activity-feed.ts CHANGED Viewed

@@ -46,6 +46,14 @@ export interface WorkerActivityView {
   toolCount: number
   /** The worker's latest narrative line, if any (already capped upstream). */
   latestSummary: string
+  /**
+   * Accumulated narrative lines, oldest→newest, already deduped + capped by
+   * the feed manager. When present and non-empty, the render grows a block
+   * of `↳` lines (mirroring the main agent's live answer) instead of
+   * collapsing to the single `latestSummary` line. Absent/empty → the
+   * single-line fallback (back-compat for direct render callers).
+   */
+  narrativeLines?: string[]
   /** Wall-clock since dispatch, ms. */
   elapsedMs: number
   state: WorkerActivityState
@@ -68,6 +76,13 @@ export interface BotApiForWorkerFeed {
 const DESC_MAX = 80
 const TOOL_ARG_MAX = 64
 const SUMMARY_MAX = 100
+/**
+ * How many trailing narrative lines the live feed keeps visible. The feed
+ * grows like the main agent's answer but can't grow unbounded — Telegram
+ * caps message length and a wall of stale lines buries the live one. Six
+ * keeps recent context without dominating the chat.
+ */
+const NARRATIVE_MAX_LINES = 6
 /**
  * Render the worker-activity message body as Telegram HTML.
@@ -105,10 +120,23 @@ export function renderWorkerActivity(v: WorkerActivityView): string {
     activity = `<i>starting… (${elapsed})</i>`
   }
-  const summary = v.latestSummary.trim()
   const lines = [header, activity]
-  if (summary.length > 0) {
-    lines.push(`  ↳ <i>${escapeHtml(truncate(summary, SUMMARY_MAX))}</i>`)
+  // Growing narrative block when the manager has accumulated lines; the feed
+  // reads like the main agent's live answer rather than a single replaced
+  // status line. Fall back to the single latestSummary line otherwise.
+  const narrative = (v.narrativeLines ?? [])
+    .map((s) => s.trim())
+    .filter((s) => s.length > 0)
+  if (narrative.length > 0) {
+    for (const line of narrative) {
+      lines.push(`  ↳ <i>${escapeHtml(truncate(line, SUMMARY_MAX))}</i>`)
+    }
+  } else {
+    const summary = v.latestSummary.trim()
+    if (summary.length > 0) {
+      lines.push(`  ↳ <i>${escapeHtml(truncate(summary, SUMMARY_MAX))}</i>`)
+    }
   }
   return lines.join('\n')
 }
@@ -142,6 +170,12 @@ interface WorkerHandle {
   lastBody: string | null
   lastEditAt: number
   cooldownUntil: number
+  /**
+   * Accumulated narrative lines (oldest→newest), deduped against the
+   * immediately-preceding line and capped to NARRATIVE_MAX_LINES. Grows the
+   * live render so the feed reads like the main agent's answer.
+   */
+  narrative: string[]
   /** Per-worker serialization chain so ticks can't interleave sends. */
   chain: Promise<void>
 }
@@ -203,9 +237,24 @@ export function createWorkerActivityFeed(opts: WorkerActivityFeedOpts): WorkerAc
     log(`worker-feed: ${label} 429 — backing off ${retryAfter}s`)
   }
+  function accumulateNarrative(h: WorkerHandle, view: WorkerActivityView): void {
+    const line = view.latestSummary.trim()
+    if (line.length === 0) return
+    // Dedup against the immediately-preceding line — the watcher re-emits the
+    // same narrative across ticks while a tool runs; we only grow on change.
+    if (h.narrative[h.narrative.length - 1] === line) return
+    h.narrative.push(line)
+    if (h.narrative.length > NARRATIVE_MAX_LINES) {
+      h.narrative.splice(0, h.narrative.length - NARRATIVE_MAX_LINES)
+    }
+  }
   async function doUpdate(h: WorkerHandle, view: WorkerActivityView): Promise<void> {
+    // Accumulate before any gate so a throttled/cooled-down tick still grows
+    // the narrative — the line surfaces on the next edit that does fire.
+    accumulateNarrative(h, view)
     if (nowFn() < h.cooldownUntil) return
-    const body = renderWorkerActivity(view)
+    const body = renderWorkerActivity({ ...view, narrativeLines: h.narrative })
     // First paint: hold off until the worker has run long enough to be
     // worth a message; trivial workers stay silent (handback covers them).
@@ -284,6 +333,7 @@ export function createWorkerActivityFeed(opts: WorkerActivityFeedOpts): WorkerAc
           lastBody: null,
           lastEditAt: 0,
           cooldownUntil: 0,
+          narrative: [],
           chain: Promise.resolve(),
         }
         handles.set(agentId, h)