switchroom 0.14.19 → 0.14.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/dist/agent-scheduler/index.js +6 -1
  2. package/dist/auth-broker/index.js +6 -1
  3. package/dist/cli/notion-write-pretool.mjs +6 -1
  4. package/dist/cli/switchroom.js +17 -3
  5. package/dist/host-control/main.js +6 -1
  6. package/dist/vault/approvals/kernel-server.js +6 -1
  7. package/dist/vault/broker/server.js +6 -1
  8. package/package.json +2 -2
  9. package/telegram-plugin/README.md +7 -3
  10. package/telegram-plugin/bridge/bridge.ts +1 -1
  11. package/telegram-plugin/dist/bridge/bridge.js +1 -1
  12. package/telegram-plugin/dist/gateway/gateway.js +368 -153
  13. package/telegram-plugin/dist/server.js +1 -1
  14. package/telegram-plugin/gateway/coalesce-attachments.ts +79 -0
  15. package/telegram-plugin/gateway/gateway.ts +257 -39
  16. package/telegram-plugin/gateway/interrupt-defer.ts +106 -0
  17. package/telegram-plugin/gateway/pending-inbound-buffer.ts +21 -4
  18. package/telegram-plugin/tests/coalesce-attachments.test.ts +170 -0
  19. package/telegram-plugin/tests/interrupt-defer.test.ts +160 -0
  20. package/telegram-plugin/tests/pending-inbound-buffer.test.ts +36 -0
  21. package/telegram-plugin/tests/permission-verdict-resume-guard.test.ts +86 -0
  22. package/telegram-plugin/tests/worker-activity-feed.test.ts +127 -0
  23. package/telegram-plugin/uat/assertions.ts +53 -0
  24. package/telegram-plugin/uat/driver.ts +28 -0
  25. package/telegram-plugin/uat/feed-matcher.test.ts +80 -0
  26. package/telegram-plugin/uat/fixtures/album/blue.jpg +0 -0
  27. package/telegram-plugin/uat/fixtures/album/green.jpg +0 -0
  28. package/telegram-plugin/uat/fixtures/album/red.jpg +0 -0
  29. package/telegram-plugin/uat/scenarios/jtbd-album-coalescing-dm.test.ts +136 -0
  30. package/telegram-plugin/uat/scenarios/jtbd-forwarded-burst-dm.test.ts +158 -0
  31. package/telegram-plugin/uat/scenarios/jtbd-memory-survives-restart-dm.test.ts +17 -2
  32. package/telegram-plugin/worker-activity-feed.ts +65 -9
@@ -11,6 +11,59 @@
11
11
 
12
12
  import type { Driver, ObservedMessage, ObservedReaction } from "./driver.js";
13
13
 
14
+ /**
15
+ * Canonical shape of a worker-activity-feed message (#2000) as rendered
16
+ * in Telegram: a running header `🔧 Worker · …` that edits in place and
17
+ * finalizes to `✅ Worker done · …` / `⚠️ Worker failed · …`. The feed is
18
+ * default-on fleet-wide as of v0.14.19, so background sub-agent activity
19
+ * now surfaces as its own bot message in any chat — including DMs whose
20
+ * scenario only cares about the agent's conversational reply.
21
+ *
22
+ * Single source of truth; the worker-feed scenario asserts against this,
23
+ * and recall/reply scenarios exclude it via {@link isWorkerFeedMessage}.
24
+ */
25
+ export const WORKER_FEED_RE = /🔧\s*Worker|✅\s*Worker done|⚠️\s*Worker failed|Worker (?:done|failed)/i;
26
+
27
+ /**
28
+ * True when `m` is a worker-activity-feed message rather than the agent's
29
+ * own reply. Use it to skip feed noise when matching for a turn's actual
30
+ * answer — without it, an `expectMessage(/\S/)` can latch onto the feed's
31
+ * first paint and miss (or mis-time) the real reply. See #2000 / the
32
+ * memory-survives-restart recall scenario.
33
+ */
34
+ export function isWorkerFeedMessage(m: ObservedMessage): boolean {
35
+ return WORKER_FEED_RE.test(m.text);
36
+ }
37
+
38
+ /**
39
+ * A single tool-activity-feed line as rendered by
40
+ * `renderActivityFeed` (telegram-plugin/tool-activity-summary.ts): the
41
+ * in-progress step is `→ <label>`, finished steps are `✓ <label>`, and a
42
+ * long turn gets a `✓ +N earlier…` header. Telegram strips the bold/italic
43
+ * wrapping, so the observed text is just the marker glyph + label.
44
+ */
45
+ const ACTIVITY_FEED_LINE_RE = /^[→✓]\s/u;
46
+
47
+ /**
48
+ * True when `m` is the live tool-activity feed (the one-message list of
49
+ * "what the agent is doing this turn") rather than the agent's reply. A
50
+ * message qualifies only when EVERY non-empty line is an activity line —
51
+ * so a real reply that merely contains an arrow is never misclassified.
52
+ *
53
+ * Recall/reply scenarios must skip this in addition to
54
+ * {@link isWorkerFeedMessage}: on a turn that uses tools, the feed paints
55
+ * `→ Finding the right tool` as its own bot message before the real answer
56
+ * lands, and an `expectMessage(/\S/)` would otherwise latch onto it.
57
+ */
58
+ export function isActivityFeedMessage(m: ObservedMessage): boolean {
59
+ const lines = m.text
60
+ .split("\n")
61
+ .map((l) => l.trim())
62
+ .filter((l) => l.length > 0);
63
+ if (lines.length === 0) return false;
64
+ return lines.every((l) => ACTIVITY_FEED_LINE_RE.test(l));
65
+ }
66
+
14
67
  export interface PollOptions {
15
68
  /** Hard deadline; the predicate must resolve truthy before this. */
16
69
  timeout: number;
@@ -646,6 +646,34 @@ export class Driver {
646
646
  return { messageId: sent.id };
647
647
  }
648
648
 
649
+ /**
650
+ * Send a photo album (Telegram media_group) — multiple photos posted as
651
+ * one group, the way a forwarded album or a multi-image paste arrives.
652
+ * Exercises the gateway's A2 multi-attachment coalescing: with
653
+ * coalesce.max_attachments default 10, the whole album folds into ONE
654
+ * Claude turn (the agent sees image_path, image_path_2, …). The optional
655
+ * caption rides on the first item, matching Telegram client behaviour.
656
+ * Returns every sent message id (one per album item).
657
+ */
658
+ async sendAlbum(
659
+ chatId: number,
660
+ photoPaths: string[],
661
+ caption?: string,
662
+ opts?: SendTextOptions,
663
+ ): Promise<{ messageIds: number[] }> {
664
+ const c = this.requireClient();
665
+ const replyTo = opts?.replyTo ?? opts?.messageThreadId;
666
+ const medias = photoPaths.map((p, i) =>
667
+ InputMedia.photo(p, i === 0 && caption ? { caption } : undefined),
668
+ );
669
+ const sent = await c.sendMediaGroup(
670
+ chatId,
671
+ medias,
672
+ replyTo ? { replyTo } : undefined,
673
+ );
674
+ return { messageIds: sent.map((m) => m.id) };
675
+ }
676
+
649
677
  /**
650
678
  * Send or remove an emoji reaction on a target message. Used by the
651
679
  * UAT reaction-trigger scenario (#1074) to exercise the gateway's
@@ -0,0 +1,80 @@
1
+ import { describe, expect, it } from "bun:test";
2
+ import {
3
+ isActivityFeedMessage,
4
+ isWorkerFeedMessage,
5
+ WORKER_FEED_RE,
6
+ } from "./assertions.js";
7
+
8
+ // Pins the worker-activity-feed detector (#2000) used by recall/reply
9
+ // scenarios to skip feed noise. The live UAT it guards can't run in CI
10
+ // (needs sudo + a real Telegram session), so this is the CI-verifiable
11
+ // floor for the matcher's behavior.
12
+ const feed = (text: string) => ({ text }) as Parameters<typeof isWorkerFeedMessage>[0];
13
+
14
+ describe("isWorkerFeedMessage", () => {
15
+ it("matches the running feed header", () => {
16
+ expect(isWorkerFeedMessage(feed("🔧 Worker · crawling changelog · 0:12"))).toBe(true);
17
+ });
18
+
19
+ it("matches the terminal done/failed recaps", () => {
20
+ expect(isWorkerFeedMessage(feed("✅ Worker done · 10 tools · 1:03"))).toBe(true);
21
+ expect(isWorkerFeedMessage(feed("⚠️ Worker failed · 3 tools"))).toBe(true);
22
+ });
23
+
24
+ it("matches a done/failed header even without the leading emoji", () => {
25
+ expect(isWorkerFeedMessage(feed("Worker done · 2 tools"))).toBe(true);
26
+ expect(isWorkerFeedMessage(feed("Worker failed mid-step"))).toBe(true);
27
+ });
28
+
29
+ it("does NOT match an ordinary agent reply", () => {
30
+ expect(isWorkerFeedMessage(feed("on it, pulling the logs now"))).toBe(false);
31
+ expect(
32
+ isWorkerFeedMessage(feed("SWITCHROOM_UAT_MEM_DEADBEEFCAFE1234")),
33
+ ).toBe(false);
34
+ });
35
+
36
+ it("does NOT match a reply that merely mentions the word worker", () => {
37
+ expect(
38
+ isWorkerFeedMessage(feed("I'll dispatch a worker to handle the crawl.")),
39
+ ).toBe(false);
40
+ });
41
+
42
+ it("exposes the regex for scenarios that assert on the feed directly", () => {
43
+ expect(WORKER_FEED_RE.test("🔧 Worker · x")).toBe(true);
44
+ });
45
+ });
46
+
47
+ describe("isActivityFeedMessage", () => {
48
+ it("matches the in-progress step line", () => {
49
+ expect(isActivityFeedMessage(feed("→ Finding the right tool"))).toBe(true);
50
+ });
51
+
52
+ it("matches a multi-line feed (done steps + in-progress)", () => {
53
+ expect(
54
+ isActivityFeedMessage(feed("✓ Reading CLAUDE.md\n→ Searching memory")),
55
+ ).toBe(true);
56
+ });
57
+
58
+ it("matches the +N earlier header", () => {
59
+ expect(
60
+ isActivityFeedMessage(feed("✓ +3 earlier…\n✓ Reading CLAUDE.md\n→ Searching memory")),
61
+ ).toBe(true);
62
+ });
63
+
64
+ it("does NOT match an ordinary agent reply", () => {
65
+ expect(isActivityFeedMessage(feed("on it, pulling the logs now"))).toBe(false);
66
+ expect(
67
+ isActivityFeedMessage(feed("SWITCHROOM_UAT_MEM_DEADBEEFCAFE1234")),
68
+ ).toBe(false);
69
+ });
70
+
71
+ it("does NOT match a reply that merely contains an arrow mid-text", () => {
72
+ expect(
73
+ isActivityFeedMessage(feed("The flow is request → response → render.")),
74
+ ).toBe(false);
75
+ });
76
+
77
+ it("does NOT match an empty message", () => {
78
+ expect(isActivityFeedMessage(feed(" "))).toBe(false);
79
+ });
80
+ });
@@ -0,0 +1,136 @@
1
+ /**
2
+ * Album-coalescing scenario — driver sends a 3-photo Telegram album
3
+ * (media_group) in one shot; the gateway's A2 multi-attachment
4
+ * coalescing (coalesce.max_attachments, default 10 since v0.14.21)
5
+ * MUST fold all three into a SINGLE Claude turn, so the agent sees
6
+ * image_path + image_path_2 + image_path_3 together and can report a
7
+ * count of 3.
8
+ *
9
+ * Regression gate for the default-on flip (#2021): before max_attachments
10
+ * defaulted to 10, an album bypassed coalescing (each part its own turn),
11
+ * so the agent would only ever see ONE image per turn and answer "1".
12
+ * A reply of "3" proves the album coalesced.
13
+ *
14
+ * Part of: https://github.com/switchroom/switchroom/issues/865
15
+ *
16
+ * ## How the signal is read robustly
17
+ *
18
+ * The agent's answer is a bare count, which would collide with incidental
19
+ * digits the chat is now full of by default: the pinned progress card's
20
+ * timer (`00:03`) and — since the worker feed went default-on fleet-wide
21
+ * (#2009 / v0.14.19) — worker-feed lines like `🔧 Worker · … · 0:12`.
22
+ * A "first bot message containing a digit" matcher would latch onto one of
23
+ * those and false-fail even when coalescing is healthy (see the
24
+ * memory-survives-restart matcher-flake note + `isWorkerFeedMessage`).
25
+ *
26
+ * Two defences, mirroring the sibling `jtbd-forwarded-burst-dm` gate:
27
+ * 1. Anchor the answer on a distinctive token — the agent is told to
28
+ * reply `IMAGECOUNT=<n>`. "IMAGECOUNT" never appears in a card or a
29
+ * worker-feed line, so the matcher cannot collide with their digits.
30
+ * 2. Drain observed messages into a per-id map (collapsing streamed
31
+ * edits to latest text), skip our own sends + worker-feed noise, and
32
+ * poll until the ANSWER token appears — rather than returning the
33
+ * first message that happens to match.
34
+ *
35
+ * - Coalesced → one turn sees 3 images → `IMAGECOUNT=3`.
36
+ * - Non-coalesced → the turn carrying the caption/question sees only its
37
+ * own (first) image → `IMAGECOUNT=1`. Surfaced as an explicit failure.
38
+ *
39
+ * Fixtures: three tiny solid-colour JPEGs under fixtures/album/, committed
40
+ * so the gate runs without a generation step. (Regenerate with
41
+ * `ffmpeg -f lavfi -i color=c=red:s=320x240 -frames:v 1 red.jpg`.)
42
+ */
43
+
44
+ import path from "node:path";
45
+ import { existsSync } from "node:fs";
46
+ import { describe, expect, it } from "vitest";
47
+ import { spinUp } from "../harness.js";
48
+ import { pollUntil, isWorkerFeedMessage } from "../assertions.js";
49
+ import type { ObservedMessage } from "../driver.js";
50
+
51
+ const AGENT = "test-harness";
52
+
53
+ const FIXTURE_DIR = path.resolve(__dirname, "..", "fixtures", "album");
54
+ const PHOTOS = ["red.jpg", "green.jpg", "blue.jpg"].map((f) =>
55
+ path.join(FIXTURE_DIR, f),
56
+ );
57
+
58
+ const CAPTION =
59
+ "I just sent you a photo album in a single message. Count the separate " +
60
+ "image files you received in THIS ONE incoming message and reply with " +
61
+ "ONLY the token IMAGECOUNT=<n> (e.g. IMAGECOUNT=3). Nothing else.";
62
+
63
+ // Warm TTFO on test-harness is ~7s; an album adds the (sub-second)
64
+ // coalesce window plus the model looking at three images.
65
+ const ANSWER_TIMEOUT_MS = 90_000;
66
+
67
+ // Pull the count out of an `IMAGECOUNT=<n>` token, tolerating "= : whitespace".
68
+ function imageCountIn(text: string): number | undefined {
69
+ const m = text.match(/IMAGECOUNT\s*[:=]?\s*(\d+)/i);
70
+ return m ? Number.parseInt(m[1], 10) : undefined;
71
+ }
72
+
73
+ describe("uat: album-coalescing DM round-trip", () => {
74
+ it(
75
+ "a 3-photo album folds into ONE turn — agent reports seeing 3 images",
76
+ async () => {
77
+ for (const p of PHOTOS) {
78
+ if (!existsSync(p)) {
79
+ throw new Error(
80
+ `album fixture missing at ${p} — see scenario header to regenerate`,
81
+ );
82
+ }
83
+ }
84
+ const sc = await spinUp({ agent: AGENT });
85
+ try {
86
+ // Observe BEFORE sending — observeMessages only sees live updates.
87
+ // Drain into a per-id map so streamed edits collapse to latest text;
88
+ // skip our own sends and worker-feed noise so neither can satisfy
89
+ // the count matcher.
90
+ const latestById = new Map<number, ObservedMessage>();
91
+ const stream = sc.driver.observeMessages(sc.botUserId);
92
+ const consume = (async () => {
93
+ for await (const m of stream) {
94
+ if (m.senderUserId === sc.driverUserId) continue;
95
+ if (isWorkerFeedMessage(m)) continue;
96
+ latestById.set(m.messageId, m);
97
+ }
98
+ })();
99
+
100
+ await sc.driver.sendAlbum(sc.botUserId, PHOTOS, CAPTION);
101
+
102
+ // Poll until a bot message carries an IMAGECOUNT token.
103
+ const answer = await pollUntil(
104
+ () => {
105
+ for (const m of latestById.values()) {
106
+ if (imageCountIn(m.text) !== undefined) return m;
107
+ }
108
+ return undefined;
109
+ },
110
+ { timeout: ANSWER_TIMEOUT_MS, interval: 500 },
111
+ ).catch(() => undefined);
112
+
113
+ await stream[Symbol.asyncIterator]().return?.(undefined as never);
114
+ await consume;
115
+
116
+ if (!answer) {
117
+ const seen = [...latestById.values()]
118
+ .map((m) => `#${m.messageId}=${JSON.stringify(m.text.slice(0, 60))}`)
119
+ .join(" ");
120
+ throw new Error(
121
+ `[album-coalescing] No bot reply carried an IMAGECOUNT token ` +
122
+ `within ${ANSWER_TIMEOUT_MS}ms. Bot messages seen: ${seen || "(none)"}.`,
123
+ );
124
+ }
125
+
126
+ const count = imageCountIn(answer.text);
127
+ // Coalesced => 3. A non-coalescing gateway answers 1 (the question
128
+ // rides photo #1; the other two parts spill to their own turns).
129
+ expect(count).toBe(3);
130
+ } finally {
131
+ await sc.tearDown();
132
+ }
133
+ },
134
+ ANSWER_TIMEOUT_MS + 30_000,
135
+ );
136
+ });
@@ -0,0 +1,158 @@
1
+ /**
2
+ * JTBD scenario — forwarded burst / split paste coalesces into ONE turn.
3
+ *
4
+ * Serves: `reference/steer-or-queue-mid-flight.md` — the "Forwarded
5
+ * burst / split paste" UAT prompt. When several messages land in quick
6
+ * succession from the same sender (a forward of 3-4 messages, or a long
7
+ * paste Telegram split into chunks), inbound coalescing must merge them
8
+ * into a SINGLE Claude turn with shared context — not reply to each
9
+ * fragment in isolation.
10
+ *
11
+ * This is the end-to-end gate for the A1 coalescing work shipped in
12
+ * v0.14.18 (#2007). The merge logic itself is covered by unit + fuzz
13
+ * tests (`inbound-coalesce.test.ts`, `pending-inbound-buffer.test.ts`),
14
+ * but only this scenario exercises the real inbound → gateway coalescer
15
+ * → claude → outbound path over a live Telegram chat.
16
+ *
17
+ * ## How the signal is constructed
18
+ *
19
+ * Naively asking the agent to "combine facts from several messages"
20
+ * does NOT distinguish coalesced from fanned-out: even when each
21
+ * message becomes its own turn, every later turn carries the PRIOR
22
+ * turns in its conversation history, so the model could still answer
23
+ * from history. History bleed makes a content-combination assertion
24
+ * useless as a coalescing probe.
25
+ *
26
+ * The distinguishing fact is whether a SINGLE turn saw all the parts in
27
+ * ONE incoming message. So we anchor the instruction on "this single
28
+ * message": three messages are fired near-simultaneously (so they land
29
+ * inside the default 500ms coalesce window), each carrying a distinct
30
+ * code token, and the instruction (in the last part) asks the agent to
31
+ * echo every token it received *in this one incoming message*.
32
+ *
33
+ * - Coalesced → the merged turn's single message contains ALPHA,
34
+ * BRAVO and CHARLIE → the reply names all three.
35
+ * - Fanned out → the message carrying the instruction contains only
36
+ * its own token → the reply names just that one. (And the other
37
+ * two tokens arrive as their own separate turns.)
38
+ *
39
+ * Tokens are deliberately odd uppercase strings so a substring match is
40
+ * unambiguous and won't collide with incidental words in the reply.
41
+ *
42
+ * Order-independence: the three sends are dispatched concurrently to
43
+ * guarantee they share a coalesce window, which means Telegram may
44
+ * deliver them in any order. We assert SET membership (all three
45
+ * present), never order.
46
+ */
47
+
48
+ import { describe, it, expect } from "vitest";
49
+ import { spinUp } from "../harness.js";
50
+ import { pollUntil } from "../assertions.js";
51
+ import type { ObservedMessage } from "../driver.js";
52
+
53
+ const AGENT = "test-harness";
54
+
55
+ // Distinctive code tokens — unlikely to appear incidentally in a reply.
56
+ const TOKENS = ["ALPHA", "BRAVO", "CHARLIE"] as const;
57
+
58
+ const BURST: string[] = [
59
+ `BURST-PROBE part 1 of 3. Code token: ${TOKENS[0]}.`,
60
+ `BURST-PROBE part 2 of 3. Code token: ${TOKENS[1]}.`,
61
+ `BURST-PROBE part 3 of 3. Code token: ${TOKENS[2]}. ` +
62
+ `Reply with ONLY the BURST-PROBE code tokens contained in THIS SINGLE ` +
63
+ `incoming message, slash-separated. If this message contains just one ` +
64
+ `token, reply with only that token.`,
65
+ ];
66
+
67
+ // Generous budget: TTFO on the test-harness is ~7s warm; coalescing
68
+ // adds the (sub-second) window plus normal model latency.
69
+ const ANSWER_TIMEOUT_MS = 40_000;
70
+
71
+ function tokensIn(text: string): string[] {
72
+ const upper = text.toUpperCase();
73
+ return TOKENS.filter((t) => upper.includes(t));
74
+ }
75
+
76
+ describe("uat: forwarded burst / split paste coalesces into one turn", () => {
77
+ it(
78
+ "a 3-message burst is answered as ONE shared-context turn",
79
+ async () => {
80
+ const sc = await spinUp({ agent: AGENT });
81
+ try {
82
+ // Start observing BEFORE the burst — observeMessages only sees
83
+ // live updates, not history. Drain into a per-message-id map so
84
+ // streamed edits collapse to the message's latest text.
85
+ const latestById = new Map<number, ObservedMessage>();
86
+ const stream = sc.driver.observeMessages(sc.botUserId);
87
+ const consume = (async () => {
88
+ for await (const m of stream) {
89
+ if (m.senderUserId === sc.driverUserId) continue; // skip our own sends
90
+ latestById.set(m.messageId, m);
91
+ }
92
+ })();
93
+
94
+ // Fire all three concurrently so they share one coalesce window.
95
+ // Concurrency (not serial awaits) is what keeps inter-arrival
96
+ // under the 500ms default — three serial round-trips could blow
97
+ // the window on a slow link.
98
+ await Promise.all(BURST.map((t) => sc.sendDM(t)));
99
+
100
+ // Wait until a single bot message names all three tokens — the
101
+ // proof that one turn saw the whole burst in one incoming
102
+ // message.
103
+ const allThree = await pollUntil(
104
+ () => {
105
+ for (const m of latestById.values()) {
106
+ if (tokensIn(m.text).length === TOKENS.length) return m;
107
+ }
108
+ return undefined;
109
+ },
110
+ { timeout: ANSWER_TIMEOUT_MS, interval: 500 },
111
+ ).catch(() => undefined);
112
+
113
+ // Close the observer stream.
114
+ await stream[Symbol.asyncIterator]().return?.(undefined as never);
115
+ await consume;
116
+
117
+ const botMsgs = [...latestById.values()];
118
+ const tokenBearing = botMsgs.filter((m) => tokensIn(m.text).length > 0);
119
+
120
+ if (!allThree) {
121
+ const seen = tokenBearing
122
+ .map((m) => `#${m.messageId}=[${tokensIn(m.text).join(",")}]`)
123
+ .join(" ");
124
+ throw new Error(
125
+ `[forwarded-burst] No single bot reply named all of ` +
126
+ `${TOKENS.join("/")}. This is the coalescing regression: the ` +
127
+ `burst fanned out into separate turns so no turn saw the full ` +
128
+ `message. Token-bearing replies: ${seen || "(none)"}.`,
129
+ );
130
+ }
131
+
132
+ expect(tokensIn(allThree.text).sort()).toEqual([...TOKENS].sort());
133
+
134
+ // Forensic: a coalesced burst should produce ONE answer that
135
+ // names the tokens. Several token-bearing replies hint at a
136
+ // partial fan-out even though one of them happened to be
137
+ // complete — worth a warning before it becomes a hard failure.
138
+ if (tokenBearing.length > 1) {
139
+ console.warn(
140
+ `[forwarded-burst] ${tokenBearing.length} token-bearing replies ` +
141
+ `observed (expected 1). Possible partial fan-out: ` +
142
+ tokenBearing
143
+ .map((m) => `#${m.messageId}=[${tokensIn(m.text).join(",")}]`)
144
+ .join(" "),
145
+ );
146
+ } else {
147
+ console.log(
148
+ `[forwarded-burst] One shared-context reply named all tokens ` +
149
+ `(#${allThree.messageId}). Coalescing healthy.`,
150
+ );
151
+ }
152
+ } finally {
153
+ await sc.tearDown();
154
+ }
155
+ },
156
+ ANSWER_TIMEOUT_MS + 20_000,
157
+ );
158
+ });
@@ -53,9 +53,24 @@ import { describe, it, expect, beforeAll } from "vitest";
53
53
  import { execSync } from "node:child_process";
54
54
  import { randomBytes } from "node:crypto";
55
55
  import { spinUp } from "../harness.js";
56
+ import { isActivityFeedMessage, isWorkerFeedMessage } from "../assertions.js";
57
+ import type { ObservedMessage } from "../driver.js";
56
58
 
57
59
  const AGENT = "test-harness";
58
60
 
61
+ // Two classes of bot message are NOT the agent's reply and must be
62
+ // skipped, or a bare `/\S/` matcher latches onto them and the
63
+ // verbatim-token check fails against noise instead of catching a real
64
+ // memory miss:
65
+ // 1. The worker-activity feed (#2000, default-on since v0.14.19) — a
66
+ // stray background sub-agent posts `🔧 Worker · …` into this DM.
67
+ // 2. The tool-activity feed — on a turn that uses tools (memory recall
68
+ // does), `→ Finding the right tool` paints as its own message before
69
+ // the real answer lands.
70
+ // Match the first non-empty bot reply that is neither.
71
+ const isReply = (m: ObservedMessage): boolean =>
72
+ /\S/.test(m.text) && !isWorkerFeedMessage(m) && !isActivityFeedMessage(m);
73
+
59
74
  const RESTART_BUDGET_MS = 90_000;
60
75
  const CAPTURE_REPLY_BUDGET_MS = 60_000;
61
76
  const RECALL_REPLY_BUDGET_MS = 120_000;
@@ -139,7 +154,7 @@ const sudoOk = canShellSudo();
139
154
  `(This is a memory-survival UAT — store it via hindsight.)`,
140
155
  );
141
156
 
142
- const captureReply = await sc1.expectMessage(/\S/, {
157
+ const captureReply = await sc1.expectMessage(isReply, {
143
158
  from: "bot",
144
159
  timeout: CAPTURE_REPLY_BUDGET_MS,
145
160
  });
@@ -180,7 +195,7 @@ const sudoOk = canShellSudo();
180
195
  `Reply with the token only, no extra text.`,
181
196
  );
182
197
 
183
- const recallReply = await sc2.expectMessage(/\S/, {
198
+ const recallReply = await sc2.expectMessage(isReply, {
184
199
  from: "bot",
185
200
  timeout: RECALL_REPLY_BUDGET_MS + 5_000,
186
201
  });
@@ -25,15 +25,21 @@
25
25
  * - 429 cooldown + message_id drift resilience (re-post on stale edit),
26
26
  * - a forced terminal edit on `finish` regardless of throttle.
27
27
  *
28
- * The feed is gated to BACKGROUND workers and lives behind the
29
- * `SWITCHROOM_WORKER_ACTIVITY_FEED` flag — see the gateway wiring. The
30
- * watcher already drives the cues (it polls the worker jsonl directly,
31
- * so it keeps firing after the parent turn ends), which is why the feed
32
- * is fed from watcher callbacks rather than the bridge event stream.
28
+ * The feed is gated to BACKGROUND workers and is ON by default; set
29
+ * `SWITCHROOM_WORKER_ACTIVITY_FEED=0` to disable it — see the gateway
30
+ * wiring. The watcher already drives the cues (it polls the worker jsonl
31
+ * directly, so it keeps firing after the parent turn ends), which is why
32
+ * the feed is fed from watcher callbacks rather than the bridge event stream.
33
33
  */
34
34
 
35
35
  import { escapeHtml, formatDuration, truncate } from './card-format.js'
36
36
 
37
+ /** Worker-activity feed is ON by default; an operator opts out with
38
+ * SWITCHROOM_WORKER_ACTIVITY_FEED=0. */
39
+ export function isWorkerActivityFeedEnabled(envVal: string | undefined): boolean {
40
+ return envVal !== '0'
41
+ }
42
+
37
43
  export type WorkerActivityState = 'running' | 'done' | 'failed'
38
44
 
39
45
  /** The render-relevant snapshot of a worker at one instant. */
@@ -46,6 +52,14 @@ export interface WorkerActivityView {
46
52
  toolCount: number
47
53
  /** The worker's latest narrative line, if any (already capped upstream). */
48
54
  latestSummary: string
55
+ /**
56
+ * Accumulated narrative lines, oldest→newest, already deduped + capped by
57
+ * the feed manager. When present and non-empty, the render grows a block
58
+ * of `↳` lines (mirroring the main agent's live answer) instead of
59
+ * collapsing to the single `latestSummary` line. Absent/empty → the
60
+ * single-line fallback (back-compat for direct render callers).
61
+ */
62
+ narrativeLines?: string[]
49
63
  /** Wall-clock since dispatch, ms. */
50
64
  elapsedMs: number
51
65
  state: WorkerActivityState
@@ -68,6 +82,13 @@ export interface BotApiForWorkerFeed {
68
82
  const DESC_MAX = 80
69
83
  const TOOL_ARG_MAX = 64
70
84
  const SUMMARY_MAX = 100
85
+ /**
86
+ * How many trailing narrative lines the live feed keeps visible. The feed
87
+ * grows like the main agent's answer but can't grow unbounded — Telegram
88
+ * caps message length and a wall of stale lines buries the live one. Six
89
+ * keeps recent context without dominating the chat.
90
+ */
91
+ const NARRATIVE_MAX_LINES = 6
71
92
 
72
93
  /**
73
94
  * Render the worker-activity message body as Telegram HTML.
@@ -105,10 +126,23 @@ export function renderWorkerActivity(v: WorkerActivityView): string {
105
126
  activity = `<i>starting… (${elapsed})</i>`
106
127
  }
107
128
 
108
- const summary = v.latestSummary.trim()
109
129
  const lines = [header, activity]
110
- if (summary.length > 0) {
111
- lines.push(` <i>${escapeHtml(truncate(summary, SUMMARY_MAX))}</i>`)
130
+
131
+ // Growing narrative block when the manager has accumulated lines; the feed
132
+ // reads like the main agent's live answer rather than a single replaced
133
+ // status line. Fall back to the single latestSummary line otherwise.
134
+ const narrative = (v.narrativeLines ?? [])
135
+ .map((s) => s.trim())
136
+ .filter((s) => s.length > 0)
137
+ if (narrative.length > 0) {
138
+ for (const line of narrative) {
139
+ lines.push(` ↳ <i>${escapeHtml(truncate(line, SUMMARY_MAX))}</i>`)
140
+ }
141
+ } else {
142
+ const summary = v.latestSummary.trim()
143
+ if (summary.length > 0) {
144
+ lines.push(` ↳ <i>${escapeHtml(truncate(summary, SUMMARY_MAX))}</i>`)
145
+ }
112
146
  }
113
147
  return lines.join('\n')
114
148
  }
@@ -142,6 +176,12 @@ interface WorkerHandle {
142
176
  lastBody: string | null
143
177
  lastEditAt: number
144
178
  cooldownUntil: number
179
+ /**
180
+ * Accumulated narrative lines (oldest→newest), deduped against the
181
+ * immediately-preceding line and capped to NARRATIVE_MAX_LINES. Grows the
182
+ * live render so the feed reads like the main agent's answer.
183
+ */
184
+ narrative: string[]
145
185
  /** Per-worker serialization chain so ticks can't interleave sends. */
146
186
  chain: Promise<void>
147
187
  }
@@ -203,9 +243,24 @@ export function createWorkerActivityFeed(opts: WorkerActivityFeedOpts): WorkerAc
203
243
  log(`worker-feed: ${label} 429 — backing off ${retryAfter}s`)
204
244
  }
205
245
 
246
+ function accumulateNarrative(h: WorkerHandle, view: WorkerActivityView): void {
247
+ const line = view.latestSummary.trim()
248
+ if (line.length === 0) return
249
+ // Dedup against the immediately-preceding line — the watcher re-emits the
250
+ // same narrative across ticks while a tool runs; we only grow on change.
251
+ if (h.narrative[h.narrative.length - 1] === line) return
252
+ h.narrative.push(line)
253
+ if (h.narrative.length > NARRATIVE_MAX_LINES) {
254
+ h.narrative.splice(0, h.narrative.length - NARRATIVE_MAX_LINES)
255
+ }
256
+ }
257
+
206
258
  async function doUpdate(h: WorkerHandle, view: WorkerActivityView): Promise<void> {
259
+ // Accumulate before any gate so a throttled/cooled-down tick still grows
260
+ // the narrative — the line surfaces on the next edit that does fire.
261
+ accumulateNarrative(h, view)
207
262
  if (nowFn() < h.cooldownUntil) return
208
- const body = renderWorkerActivity(view)
263
+ const body = renderWorkerActivity({ ...view, narrativeLines: h.narrative })
209
264
 
210
265
  // First paint: hold off until the worker has run long enough to be
211
266
  // worth a message; trivial workers stay silent (handback covers them).
@@ -284,6 +339,7 @@ export function createWorkerActivityFeed(opts: WorkerActivityFeedOpts): WorkerAc
284
339
  lastBody: null,
285
340
  lastEditAt: 0,
286
341
  cooldownUntil: 0,
342
+ narrative: [],
287
343
  chain: Promise.resolve(),
288
344
  }
289
345
  handles.set(agentId, h)