switchroom 0.14.20 → 0.14.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/dist/agent-scheduler/index.js +2 -3
  2. package/dist/auth-broker/index.js +2 -3
  3. package/dist/cli/notion-write-pretool.mjs +2 -3
  4. package/dist/cli/switchroom.js +16 -8
  5. package/dist/host-control/main.js +2 -3
  6. package/dist/vault/approvals/kernel-server.js +2 -3
  7. package/dist/vault/broker/server.js +2 -3
  8. package/package.json +3 -3
  9. package/profiles/_base/start.sh.hbs +11 -24
  10. package/profiles/_shared/telegram-style.md.hbs +2 -2
  11. package/profiles/default/CLAUDE.md.hbs +4 -1
  12. package/skills/switchroom-runtime/SKILL.md +6 -16
  13. package/telegram-plugin/agent-dir.ts +15 -0
  14. package/telegram-plugin/dist/gateway/gateway.js +655 -514
  15. package/telegram-plugin/gateway/coalesce-attachments.ts +9 -0
  16. package/telegram-plugin/gateway/gateway.ts +246 -83
  17. package/telegram-plugin/gateway/inbound-spool.ts +15 -0
  18. package/telegram-plugin/gateway/interrupt-defer.ts +6 -0
  19. package/telegram-plugin/gateway/resume-inbound-builder.ts +180 -0
  20. package/telegram-plugin/registry/turns-schema.ts +138 -33
  21. package/telegram-plugin/stream-reply-handler.ts +1 -11
  22. package/telegram-plugin/tests/agent-dir.test.ts +25 -0
  23. package/telegram-plugin/tests/coalesce-attachments.test.ts +24 -6
  24. package/telegram-plugin/tests/e2e.test.ts +2 -77
  25. package/telegram-plugin/tests/inbound-spool.test.ts +45 -0
  26. package/telegram-plugin/tests/interrupt-defer.test.ts +13 -0
  27. package/telegram-plugin/tests/multi-turn-continuity.test.ts +0 -1
  28. package/telegram-plugin/tests/outbound-ordering.test.ts +0 -1
  29. package/telegram-plugin/tests/parse-mode-rotation.test.ts +0 -1
  30. package/telegram-plugin/tests/permission-verdict-resume-guard.test.ts +86 -0
  31. package/telegram-plugin/tests/races.test.ts +0 -26
  32. package/telegram-plugin/tests/registry-turns.test.ts +106 -29
  33. package/telegram-plugin/tests/resume-inbound-builder.test.ts +182 -0
  34. package/telegram-plugin/tests/status-accent.test.ts +0 -1
  35. package/telegram-plugin/tests/stream-reply-error-paths.test.ts +0 -1
  36. package/telegram-plugin/tests/stream-reply-handler.test.ts +0 -24
  37. package/telegram-plugin/tests/streaming-e2e.test.ts +0 -1
  38. package/telegram-plugin/tests/streaming-orchestration.test.ts +0 -1
  39. package/telegram-plugin/tests/tool-activity-summary.test.ts +44 -0
  40. package/telegram-plugin/tests/turns-writer.test.ts +16 -6
  41. package/telegram-plugin/tests/worker-activity-feed.test.ts +14 -0
  42. package/telegram-plugin/tool-activity-summary.ts +55 -0
  43. package/telegram-plugin/uat/assertions.ts +53 -0
  44. package/telegram-plugin/uat/driver.ts +30 -0
  45. package/telegram-plugin/uat/feed-matcher.test.ts +80 -0
  46. package/telegram-plugin/uat/fixtures/album/blue.jpg +0 -0
  47. package/telegram-plugin/uat/fixtures/album/green.jpg +0 -0
  48. package/telegram-plugin/uat/fixtures/album/red.jpg +0 -0
  49. package/telegram-plugin/uat/scenarios/jtbd-album-coalescing-dm.test.ts +136 -0
  50. package/telegram-plugin/uat/scenarios/jtbd-memory-survives-restart-dm.test.ts +17 -2
  51. package/telegram-plugin/worker-activity-feed.ts +11 -5
  52. package/telegram-plugin/handoff-continuity.ts +0 -206
  53. package/telegram-plugin/tests/handoff-continuity.test.ts +0 -262
@@ -40,7 +40,6 @@ function makeDeps(
40
40
  markdownToHtml: (t) => realMarkdownToHtml(t),
41
41
  escapeMarkdownV2: (t) => t,
42
42
  repairEscapedWhitespace: (t) => t,
43
- takeHandoffPrefix: () => '',
44
43
  assertAllowedChat: () => {},
45
44
  resolveThreadId: (_, explicit) => (explicit != null ? Number(explicit) : undefined),
46
45
  disableLinkPreview: true,
@@ -35,7 +35,6 @@ function makeDeps(
35
35
  markdownToHtml: (t) => `<b>${t}</b>`,
36
36
  escapeMarkdownV2: (t) => `\\${t}\\`,
37
37
  repairEscapedWhitespace: (t) => t,
38
- takeHandoffPrefix: () => '',
39
38
  assertAllowedChat: () => {},
40
39
  resolveThreadId: (_, explicit) => (explicit != null ? Number(explicit) : undefined),
41
40
  disableLinkPreview: true,
@@ -104,29 +103,6 @@ describe('handleStreamReply', () => {
104
103
  expect(bot.api.sendMessage.mock.calls[0][2]?.parse_mode).toBeUndefined()
105
104
  })
106
105
 
107
- it('prepends handoff prefix on first chunk only', async () => {
108
- const state = makeState()
109
- const deps = makeDeps(bot, {
110
- takeHandoffPrefix: vi.fn<(fmt: string) => string>(() => '↩️ '),
111
- })
112
-
113
- // First call: prefix applied
114
- const p1 = handleStreamReply({ chat_id: '1', text: 'first' }, state, deps)
115
- await microtaskFlush()
116
- await p1
117
- // Prefix is prepended AFTER format rendering (it's already format-safe
118
- // because takeHandoffPrefix takes the format tag).
119
- expect(bot.api.sendMessage.mock.calls[0][1]).toBe('↩️ <b>first</b>')
120
-
121
- // Second call: handoff not consumed again
122
- vi.advanceTimersByTime(1000)
123
- const p2 = handleStreamReply({ chat_id: '1', text: 'second' }, state, deps)
124
- await microtaskFlush()
125
- await p2
126
- expect(bot.api.editMessageText.mock.calls[0][2]).toBe('<b>second</b>')
127
- expect(deps.takeHandoffPrefix).toHaveBeenCalledTimes(1)
128
- })
129
-
130
106
  it('throws when text exceeds 4096 (no silent id:pending)', async () => {
131
107
  // Pins the bug found in prod: a >4096-char text would hit draft-
132
108
  // stream's length guard, silently stop, and the handler would return
@@ -88,7 +88,6 @@ function setup(opts: { progressCardActive?: boolean } = {}): Fixture {
88
88
  markdownToHtml: (t) => `<b>${t}</b>`, // stream_reply: bold
89
89
  escapeMarkdownV2: (t) => t,
90
90
  repairEscapedWhitespace: (t) => t,
91
- takeHandoffPrefix: () => '',
92
91
  assertAllowedChat: () => {},
93
92
  resolveThreadId: () => undefined,
94
93
  disableLinkPreview: true,
@@ -407,7 +407,6 @@ function makeActivityDeps(
407
407
  markdownToHtml: (t) => t,
408
408
  escapeMarkdownV2: (t) => t,
409
409
  repairEscapedWhitespace: (t) => t,
410
- takeHandoffPrefix: () => '',
411
410
  assertAllowedChat: () => {},
412
411
  resolveThreadId: (_, explicit) => (explicit != null ? Number(explicit) : undefined),
413
412
  disableLinkPreview: true,
@@ -4,7 +4,9 @@ import {
4
4
  appendActivityLine,
5
5
  appendActivityLabel,
6
6
  renderActivityFeed,
7
+ renderActivityFeedWithNested,
7
8
  MIRROR_MAX_LINES,
9
+ NESTED_MAX_LINES,
8
10
  } from "../tool-activity-summary.js";
9
11
 
10
12
  describe("describeToolUse — friendly per-tool rendering (draft-mirror)", () => {
@@ -143,3 +145,45 @@ describe("appendActivityLabel — precomputed label feed (tool_label path)", ()
143
145
  expect(lines.length).toBe(2);
144
146
  });
145
147
  });
148
+
149
+ describe("renderActivityFeedWithNested — foreground sub-agent nesting (Model A)", () => {
150
+ it("with no child lines, is identical to the flat feed", () => {
151
+ const lines = ["Searching memory", "Delegating: review the migration"];
152
+ expect(renderActivityFeedWithNested(lines, [])).toBe(renderActivityFeed(lines));
153
+ // whitespace-only children also collapse to the flat feed
154
+ expect(renderActivityFeedWithNested(lines, [" ", ""])).toBe(renderActivityFeed(lines));
155
+ });
156
+
157
+ it("done-styles ALL parent lines and nests the child block (newest = bold →)", () => {
158
+ const parent = ["Searching memory", "Delegating: review the migration"];
159
+ const child = ["Reading schema.ts", "Looking for foreign keys"];
160
+ const out = renderActivityFeedWithNested(parent, child)!;
161
+ // Parent is blocked at the Task tool → none of its lines is the live step.
162
+ expect(out).toContain("<i>✓ Searching memory</i>");
163
+ expect(out).toContain("<i>✓ Delegating: review the migration</i>");
164
+ expect(out).not.toContain("<b>→ Delegating");
165
+ // The live → step is the newest nested child line; earlier child = italic.
166
+ expect(out).toContain(" ↳ <i>Reading schema.ts</i>");
167
+ expect(out).toContain(" ↳ <b>→ Looking for foreign keys</b>");
168
+ });
169
+
170
+ it("caps the nested block to NESTED_MAX_LINES with a '↳ +N earlier…' header", () => {
171
+ const child = Array.from({ length: NESTED_MAX_LINES + 3 }, (_, i) => `step ${i + 1}`);
172
+ const out = renderActivityFeedWithNested(["Delegating: x"], child)!;
173
+ expect(out).toContain(" ↳ <i>+3 earlier…</i>");
174
+ // newest nested line is the live → step
175
+ expect(out).toContain(` ↳ <b>→ step ${NESTED_MAX_LINES + 3}</b>`);
176
+ // the oldest (collapsed) lines are not rendered verbatim
177
+ expect(out).not.toContain("step 1<");
178
+ });
179
+
180
+ it("renders the child block even when the parent feed is empty", () => {
181
+ const out = renderActivityFeedWithNested([], ["Reading a.ts"]);
182
+ expect(out).toBe(" ↳ <b>→ Reading a.ts</b>");
183
+ });
184
+
185
+ it("HTML-escapes nested child text", () => {
186
+ const out = renderActivityFeedWithNested(["Delegating: x"], ["touch <a> & <b>"])!;
187
+ expect(out).toContain(" ↳ <b>→ touch &lt;a&gt; &amp; &lt;b&gt;</b>");
188
+ });
189
+ });
@@ -5,7 +5,7 @@
5
5
  * 1. Clean turn: insert + finalize → row has ended_via='stop', non-null
6
6
  * ended_at, correct previews.
7
7
  * 2. Mid-turn restart: insert without finalize, simulate gateway boot via
8
- * markOrphanedAsRestarted → row has ended_via='restart'.
8
+ * markOrphanedWithTimeoutClassification → row has ended_via='restart'.
9
9
  * 3. Multiple concurrent turns same chat: each row has a unique turn_key,
10
10
  * no cross-contamination.
11
11
  * 4. tool_call_count increments correctly for N tool_use events.
@@ -20,9 +20,19 @@ import {
20
20
  openTurnsDbInMemory,
21
21
  recordTurnStart,
22
22
  recordTurnEnd,
23
- markOrphanedAsRestarted,
23
+ markOrphanedWithTimeoutClassification,
24
24
  } from '../registry/turns-schema.js'
25
25
 
26
+ // The boot reaper as the gateway calls it between turns (no live hang
27
+ // marker) — every open turn is a clean 'restart' interrupt.
28
+ function reapAsRestart(db: Parameters<typeof recordTurnEnd>[0]) {
29
+ return markOrphanedWithTimeoutClassification(db, {
30
+ markerTurnKey: null,
31
+ markerAgeMs: null,
32
+ hangThresholdMs: 300_000,
33
+ })
34
+ }
35
+
26
36
  // ---------------------------------------------------------------------------
27
37
  // 1. Clean turn
28
38
  // ---------------------------------------------------------------------------
@@ -110,7 +120,7 @@ describe('clean turn (Phase 1 #332)', () => {
110
120
  // ---------------------------------------------------------------------------
111
121
 
112
122
  describe('mid-turn restart (Phase 1 #332)', () => {
113
- it('insert without finalize, then markOrphanedAsRestarted → ended_via=restart', () => {
123
+ it('insert without finalize, then reaper → ended_via=restart', () => {
114
124
  const db = openTurnsDbInMemory()
115
125
 
116
126
  recordTurnStart(db, {
@@ -120,8 +130,8 @@ describe('mid-turn restart (Phase 1 #332)', () => {
120
130
  })
121
131
 
122
132
  // Simulate gateway boot reaper (same path as the real gateway boot).
123
- const swept = markOrphanedAsRestarted(db)
124
- expect(swept).toBe(1)
133
+ const swept = reapAsRestart(db)
134
+ expect(swept.reaped).toBe(1)
125
135
 
126
136
  const row = db
127
137
  .prepare('SELECT ended_via, ended_at FROM turns WHERE turn_key = ?')
@@ -141,7 +151,7 @@ describe('mid-turn restart (Phase 1 #332)', () => {
141
151
  recordTurnEnd(db, { turnKey: 'chat2:_:2001', endedVia: 'stop' })
142
152
  recordTurnStart(db, { turnKey: 'chat2:_:2002', chatId: 'chat2' })
143
153
 
144
- markOrphanedAsRestarted(db)
154
+ reapAsRestart(db)
145
155
 
146
156
  const clean = db
147
157
  .prepare('SELECT ended_via FROM turns WHERE turn_key = ?')
@@ -2,10 +2,24 @@ import { describe, it, expect } from 'vitest'
2
2
  import {
3
3
  renderWorkerActivity,
4
4
  createWorkerActivityFeed,
5
+ isWorkerActivityFeedEnabled,
5
6
  type WorkerActivityView,
6
7
  type BotApiForWorkerFeed,
7
8
  } from '../worker-activity-feed.js'
8
9
 
10
+ describe('isWorkerActivityFeedEnabled (default ON)', () => {
11
+ it('defaults to true when the env var is unset', () => {
12
+ expect(isWorkerActivityFeedEnabled(undefined)).toBe(true)
13
+ })
14
+ it('stays on for any value other than "0"', () => {
15
+ expect(isWorkerActivityFeedEnabled('1')).toBe(true)
16
+ expect(isWorkerActivityFeedEnabled('')).toBe(true)
17
+ })
18
+ it('only "0" disables it', () => {
19
+ expect(isWorkerActivityFeedEnabled('0')).toBe(false)
20
+ })
21
+ })
22
+
9
23
  function view(partial: Partial<WorkerActivityView> = {}): WorkerActivityView {
10
24
  return {
11
25
  description: 'research competitors',
@@ -216,6 +216,61 @@ export function renderActivityFeed(lines: string[]): string | null {
216
216
  return out.join("\n");
217
217
  }
218
218
 
219
+ // ─── Foreground sub-agent nesting (Model A) ─────────────────────────────────
220
+ //
221
+ // A foreground sub-agent (Task/Agent with no `run_in_background`) runs INSIDE
222
+ // the parent's turn — the parent is blocked at the Task tool until it returns.
223
+ // Rather than a separate message, its live steps nest under the parent's own
224
+ // activity feed: the gold-standard main-turn visibility applied one level
225
+ // down. The parent's lines render as done (the parent handed off; it isn't
226
+ // the active worker), and the sub-agent's recent narrative lines render as an
227
+ // indented `↳` block with the newest as the in-progress `→` step.
228
+
229
+ /** Trailing nested child lines kept visible (Telegram length + readability). */
230
+ export const NESTED_MAX_LINES = 4;
231
+ /** Hard cap on a single nested narrative line. */
232
+ const NESTED_LINE_MAX = 90;
233
+ /** Indent marker for a nested sub-agent step. */
234
+ const NESTED_PREFIX = " ↳ ";
235
+
236
+ /**
237
+ * Render the parent activity feed with an active foreground sub-agent's steps
238
+ * nested beneath it. When `childLines` is empty this is identical to
239
+ * `renderActivityFeed(lines)`. Otherwise the parent's own lines are all
240
+ * done-styled (`✓` italic) — the live `→` step lives in the nested block —
241
+ * and the child block is indented, newest = bold `→`, earlier = italic, with
242
+ * a `↳ +N earlier…` header when it overflows. Returns ready Telegram HTML
243
+ * (callers must NOT re-escape) or null when there is nothing to show.
244
+ */
245
+ export function renderActivityFeedWithNested(
246
+ lines: string[],
247
+ childLines: string[],
248
+ ): string | null {
249
+ const children = childLines.map((s) => s.trim()).filter((s) => s.length > 0);
250
+ if (children.length === 0) return renderActivityFeed(lines);
251
+
252
+ const out: string[] = [];
253
+ const shownParent = lines.slice(-MIRROR_MAX_LINES);
254
+ const hiddenParent = lines.length - shownParent.length;
255
+ if (hiddenParent > 0) out.push(`<i>✓ +${hiddenParent} earlier…</i>`);
256
+ for (const l of shownParent) out.push(`<i>✓ ${escapeFeedHtml(l)}</i>`);
257
+
258
+ const shownChild = children.slice(-NESTED_MAX_LINES);
259
+ const hiddenChild = children.length - shownChild.length;
260
+ if (hiddenChild > 0) out.push(`${NESTED_PREFIX}<i>+${hiddenChild} earlier…</i>`);
261
+ const lastChildIdx = shownChild.length - 1;
262
+ shownChild.forEach((l, i) => {
263
+ const t = l.length > NESTED_LINE_MAX ? l.slice(0, NESTED_LINE_MAX - 1) + "…" : l;
264
+ const esc = escapeFeedHtml(t);
265
+ out.push(
266
+ i === lastChildIdx
267
+ ? `${NESTED_PREFIX}<b>→ ${esc}</b>`
268
+ : `${NESTED_PREFIX}<i>${esc}</i>`,
269
+ );
270
+ });
271
+ return out.length > 0 ? out.join("\n") : null;
272
+ }
273
+
219
274
  /**
220
275
  * Like appendActivityLine, but for a pre-computed label (from the
221
276
  * real-time PreToolUse sidecar / `tool_label` event) — the hook already
@@ -11,6 +11,59 @@
11
11
 
12
12
  import type { Driver, ObservedMessage, ObservedReaction } from "./driver.js";
13
13
 
14
+ /**
15
+ * Canonical shape of a worker-activity-feed message (#2000) as rendered
16
+ * in Telegram: a running header `🔧 Worker · …` that edits in place and
17
+ * finalizes to `✅ Worker done · …` / `⚠️ Worker failed · …`. The feed is
18
+ * default-on fleet-wide as of v0.14.19, so background sub-agent activity
19
+ * now surfaces as its own bot message in any chat — including DMs whose
20
+ * scenario only cares about the agent's conversational reply.
21
+ *
22
+ * Single source of truth; the worker-feed scenario asserts against this,
23
+ * and recall/reply scenarios exclude it via {@link isWorkerFeedMessage}.
24
+ */
25
+ export const WORKER_FEED_RE = /🔧\s*Worker|✅\s*Worker done|⚠️\s*Worker failed|Worker (?:done|failed)/i;
26
+
27
+ /**
28
+ * True when `m` is a worker-activity-feed message rather than the agent's
29
+ * own reply. Use it to skip feed noise when matching for a turn's actual
30
+ * answer — without it, an `expectMessage(/\S/)` can latch onto the feed's
31
+ * first paint and miss (or mis-time) the real reply. See #2000 / the
32
+ * memory-survives-restart recall scenario.
33
+ */
34
+ export function isWorkerFeedMessage(m: ObservedMessage): boolean {
35
+ return WORKER_FEED_RE.test(m.text);
36
+ }
37
+
38
+ /**
39
+ * A single tool-activity-feed line as rendered by
40
+ * `renderActivityFeed` (telegram-plugin/tool-activity-summary.ts): the
41
+ * in-progress step is `→ <label>`, finished steps are `✓ <label>`, and a
42
+ * long turn gets a `✓ +N earlier…` header. Telegram strips the bold/italic
43
+ * wrapping, so the observed text is just the marker glyph + label.
44
+ */
45
+ const ACTIVITY_FEED_LINE_RE = /^[→✓]\s/u;
46
+
47
+ /**
48
+ * True when `m` is the live tool-activity feed (the one-message list of
49
+ * "what the agent is doing this turn") rather than the agent's reply. A
50
+ * message qualifies only when EVERY non-empty line is an activity line —
51
+ * so a real reply that merely contains an arrow is never misclassified.
52
+ *
53
+ * Recall/reply scenarios must skip this in addition to
54
+ * {@link isWorkerFeedMessage}: on a turn that uses tools, the feed paints
55
+ * `→ Finding the right tool` as its own bot message before the real answer
56
+ * lands, and an `expectMessage(/\S/)` would otherwise latch onto it.
57
+ */
58
+ export function isActivityFeedMessage(m: ObservedMessage): boolean {
59
+ const lines = m.text
60
+ .split("\n")
61
+ .map((l) => l.trim())
62
+ .filter((l) => l.length > 0);
63
+ if (lines.length === 0) return false;
64
+ return lines.every((l) => ACTIVITY_FEED_LINE_RE.test(l));
65
+ }
66
+
14
67
  export interface PollOptions {
15
68
  /** Hard deadline; the predicate must resolve truthy before this. */
16
69
  timeout: number;
@@ -646,6 +646,36 @@ export class Driver {
646
646
  return { messageId: sent.id };
647
647
  }
648
648
 
649
+ /**
650
+ * Send a photo album (Telegram media_group) — multiple photos posted as
651
+ * one group, the way a forwarded album or a multi-image paste arrives.
652
+ * Exercises the gateway's A2 multi-attachment coalescing: with
653
+ * coalesce.max_attachments default 10, the whole album folds into ONE
654
+ * Claude turn (the agent sees image_path, image_path_2, …). The optional
655
+ * caption rides on the first item, matching Telegram client behaviour.
656
+ * Returns every sent message id (one per album item).
657
+ */
658
+ async sendAlbum(
659
+ chatId: number,
660
+ photoPaths: string[],
661
+ caption?: string,
662
+ opts?: SendTextOptions,
663
+ ): Promise<{ messageIds: number[] }> {
664
+ const c = this.requireClient();
665
+ const replyTo = opts?.replyTo ?? opts?.messageThreadId;
666
+ // mtcute reads a bare string as a file_id/URL; the `file:` scheme is
667
+ // what forces an upload from local disk (see normalize-input-media).
668
+ const medias = photoPaths.map((p, i) =>
669
+ InputMedia.photo(`file:${p}`, i === 0 && caption ? { caption } : undefined),
670
+ );
671
+ const sent = await c.sendMediaGroup(
672
+ chatId,
673
+ medias,
674
+ replyTo ? { replyTo } : undefined,
675
+ );
676
+ return { messageIds: sent.map((m) => m.id) };
677
+ }
678
+
649
679
  /**
650
680
  * Send or remove an emoji reaction on a target message. Used by the
651
681
  * UAT reaction-trigger scenario (#1074) to exercise the gateway's
@@ -0,0 +1,80 @@
1
+ import { describe, expect, it } from "bun:test";
2
+ import {
3
+ isActivityFeedMessage,
4
+ isWorkerFeedMessage,
5
+ WORKER_FEED_RE,
6
+ } from "./assertions.js";
7
+
8
+ // Pins the worker-activity-feed detector (#2000) used by recall/reply
9
+ // scenarios to skip feed noise. The live UAT it guards can't run in CI
10
+ // (needs sudo + a real Telegram session), so this is the CI-verifiable
11
+ // floor for the matcher's behavior.
12
+ const feed = (text: string) => ({ text }) as Parameters<typeof isWorkerFeedMessage>[0];
13
+
14
+ describe("isWorkerFeedMessage", () => {
15
+ it("matches the running feed header", () => {
16
+ expect(isWorkerFeedMessage(feed("🔧 Worker · crawling changelog · 0:12"))).toBe(true);
17
+ });
18
+
19
+ it("matches the terminal done/failed recaps", () => {
20
+ expect(isWorkerFeedMessage(feed("✅ Worker done · 10 tools · 1:03"))).toBe(true);
21
+ expect(isWorkerFeedMessage(feed("⚠️ Worker failed · 3 tools"))).toBe(true);
22
+ });
23
+
24
+ it("matches a done/failed header even without the leading emoji", () => {
25
+ expect(isWorkerFeedMessage(feed("Worker done · 2 tools"))).toBe(true);
26
+ expect(isWorkerFeedMessage(feed("Worker failed mid-step"))).toBe(true);
27
+ });
28
+
29
+ it("does NOT match an ordinary agent reply", () => {
30
+ expect(isWorkerFeedMessage(feed("on it, pulling the logs now"))).toBe(false);
31
+ expect(
32
+ isWorkerFeedMessage(feed("SWITCHROOM_UAT_MEM_DEADBEEFCAFE1234")),
33
+ ).toBe(false);
34
+ });
35
+
36
+ it("does NOT match a reply that merely mentions the word worker", () => {
37
+ expect(
38
+ isWorkerFeedMessage(feed("I'll dispatch a worker to handle the crawl.")),
39
+ ).toBe(false);
40
+ });
41
+
42
+ it("exposes the regex for scenarios that assert on the feed directly", () => {
43
+ expect(WORKER_FEED_RE.test("🔧 Worker · x")).toBe(true);
44
+ });
45
+ });
46
+
47
+ describe("isActivityFeedMessage", () => {
48
+ it("matches the in-progress step line", () => {
49
+ expect(isActivityFeedMessage(feed("→ Finding the right tool"))).toBe(true);
50
+ });
51
+
52
+ it("matches a multi-line feed (done steps + in-progress)", () => {
53
+ expect(
54
+ isActivityFeedMessage(feed("✓ Reading CLAUDE.md\n→ Searching memory")),
55
+ ).toBe(true);
56
+ });
57
+
58
+ it("matches the +N earlier header", () => {
59
+ expect(
60
+ isActivityFeedMessage(feed("✓ +3 earlier…\n✓ Reading CLAUDE.md\n→ Searching memory")),
61
+ ).toBe(true);
62
+ });
63
+
64
+ it("does NOT match an ordinary agent reply", () => {
65
+ expect(isActivityFeedMessage(feed("on it, pulling the logs now"))).toBe(false);
66
+ expect(
67
+ isActivityFeedMessage(feed("SWITCHROOM_UAT_MEM_DEADBEEFCAFE1234")),
68
+ ).toBe(false);
69
+ });
70
+
71
+ it("does NOT match a reply that merely contains an arrow mid-text", () => {
72
+ expect(
73
+ isActivityFeedMessage(feed("The flow is request → response → render.")),
74
+ ).toBe(false);
75
+ });
76
+
77
+ it("does NOT match an empty message", () => {
78
+ expect(isActivityFeedMessage(feed(" "))).toBe(false);
79
+ });
80
+ });
@@ -0,0 +1,136 @@
1
+ /**
2
+ * Album-coalescing scenario — driver sends a 3-photo Telegram album
3
+ * (media_group) in one shot; the gateway's A2 multi-attachment
4
+ * coalescing (coalesce.max_attachments, default 10 since v0.14.21)
5
+ * MUST fold all three into a SINGLE Claude turn, so the agent sees
6
+ * image_path + image_path_2 + image_path_3 together and can report a
7
+ * count of 3.
8
+ *
9
+ * Regression gate for the default-on flip (#2021): before max_attachments
10
+ * defaulted to 10, an album bypassed coalescing (each part its own turn),
11
+ * so the agent would only ever see ONE image per turn and answer "1".
12
+ * A reply of "3" proves the album coalesced.
13
+ *
14
+ * Part of: https://github.com/switchroom/switchroom/issues/865
15
+ *
16
+ * ## How the signal is read robustly
17
+ *
18
+ * The agent's answer is a bare count, which would collide with incidental
19
+ * digits the chat is now full of by default: the pinned progress card's
20
+ * timer (`00:03`) and — since the worker feed went default-on fleet-wide
21
+ * (#2009 / v0.14.19) — worker-feed lines like `🔧 Worker · … · 0:12`.
22
+ * A "first bot message containing a digit" matcher would latch onto one of
23
+ * those and false-fail even when coalescing is healthy (see the
24
+ * memory-survives-restart matcher-flake note + `isWorkerFeedMessage`).
25
+ *
26
+ * Two defences, mirroring the sibling `jtbd-forwarded-burst-dm` gate:
27
+ * 1. Anchor the answer on a distinctive token — the agent is told to
28
+ * reply `IMAGECOUNT=<n>`. "IMAGECOUNT" never appears in a card or a
29
+ * worker-feed line, so the matcher cannot collide with their digits.
30
+ * 2. Drain observed messages into a per-id map (collapsing streamed
31
+ * edits to latest text), skip our own sends + worker-feed noise, and
32
+ * poll until the ANSWER token appears — rather than returning the
33
+ * first message that happens to match.
34
+ *
35
+ * - Coalesced → one turn sees 3 images → `IMAGECOUNT=3`.
36
+ * - Non-coalesced → the turn carrying the caption/question sees only its
37
+ * own (first) image → `IMAGECOUNT=1`. Surfaced as an explicit failure.
38
+ *
39
+ * Fixtures: three tiny solid-colour JPEGs under fixtures/album/, committed
40
+ * so the gate runs without a generation step. (Regenerate with
41
+ * `ffmpeg -f lavfi -i color=c=red:s=320x240 -frames:v 1 red.jpg`.)
42
+ */
43
+
44
+ import path from "node:path";
45
+ import { existsSync } from "node:fs";
46
+ import { describe, expect, it } from "vitest";
47
+ import { spinUp } from "../harness.js";
48
+ import { pollUntil, isWorkerFeedMessage } from "../assertions.js";
49
+ import type { ObservedMessage } from "../driver.js";
50
+
51
+ const AGENT = "test-harness";
52
+
53
+ const FIXTURE_DIR = path.resolve(__dirname, "..", "fixtures", "album");
54
+ const PHOTOS = ["red.jpg", "green.jpg", "blue.jpg"].map((f) =>
55
+ path.join(FIXTURE_DIR, f),
56
+ );
57
+
58
+ const CAPTION =
59
+ "I just sent you a photo album in a single message. Count the separate " +
60
+ "image files you received in THIS ONE incoming message and reply with " +
61
+ "ONLY the token IMAGECOUNT=<n> (e.g. IMAGECOUNT=3). Nothing else.";
62
+
63
+ // Warm TTFO on test-harness is ~7s; an album adds the (sub-second)
64
+ // coalesce window plus the model looking at three images.
65
+ const ANSWER_TIMEOUT_MS = 90_000;
66
+
67
+ // Pull the count out of an `IMAGECOUNT=<n>` token, tolerating "= : whitespace".
68
+ function imageCountIn(text: string): number | undefined {
69
+ const m = text.match(/IMAGECOUNT\s*[:=]?\s*(\d+)/i);
70
+ return m ? Number.parseInt(m[1], 10) : undefined;
71
+ }
72
+
73
+ describe("uat: album-coalescing DM round-trip", () => {
74
+ it(
75
+ "a 3-photo album folds into ONE turn — agent reports seeing 3 images",
76
+ async () => {
77
+ for (const p of PHOTOS) {
78
+ if (!existsSync(p)) {
79
+ throw new Error(
80
+ `album fixture missing at ${p} — see scenario header to regenerate`,
81
+ );
82
+ }
83
+ }
84
+ const sc = await spinUp({ agent: AGENT });
85
+ try {
86
+ // Observe BEFORE sending — observeMessages only sees live updates.
87
+ // Drain into a per-id map so streamed edits collapse to latest text;
88
+ // skip our own sends and worker-feed noise so neither can satisfy
89
+ // the count matcher.
90
+ const latestById = new Map<number, ObservedMessage>();
91
+ const stream = sc.driver.observeMessages(sc.botUserId);
92
+ const consume = (async () => {
93
+ for await (const m of stream) {
94
+ if (m.senderUserId === sc.driverUserId) continue;
95
+ if (isWorkerFeedMessage(m)) continue;
96
+ latestById.set(m.messageId, m);
97
+ }
98
+ })();
99
+
100
+ await sc.driver.sendAlbum(sc.botUserId, PHOTOS, CAPTION);
101
+
102
+ // Poll until a bot message carries an IMAGECOUNT token.
103
+ const answer = await pollUntil(
104
+ () => {
105
+ for (const m of latestById.values()) {
106
+ if (imageCountIn(m.text) !== undefined) return m;
107
+ }
108
+ return undefined;
109
+ },
110
+ { timeout: ANSWER_TIMEOUT_MS, interval: 500 },
111
+ ).catch(() => undefined);
112
+
113
+ await stream[Symbol.asyncIterator]().return?.(undefined as never);
114
+ await consume;
115
+
116
+ if (!answer) {
117
+ const seen = [...latestById.values()]
118
+ .map((m) => `#${m.messageId}=${JSON.stringify(m.text.slice(0, 60))}`)
119
+ .join(" ");
120
+ throw new Error(
121
+ `[album-coalescing] No bot reply carried an IMAGECOUNT token ` +
122
+ `within ${ANSWER_TIMEOUT_MS}ms. Bot messages seen: ${seen || "(none)"}.`,
123
+ );
124
+ }
125
+
126
+ const count = imageCountIn(answer.text);
127
+ // Coalesced => 3. A non-coalescing gateway answers 1 (the question
128
+ // rides photo #1; the other two parts spill to their own turns).
129
+ expect(count).toBe(3);
130
+ } finally {
131
+ await sc.tearDown();
132
+ }
133
+ },
134
+ ANSWER_TIMEOUT_MS + 30_000,
135
+ );
136
+ });
@@ -53,9 +53,24 @@ import { describe, it, expect, beforeAll } from "vitest";
53
53
  import { execSync } from "node:child_process";
54
54
  import { randomBytes } from "node:crypto";
55
55
  import { spinUp } from "../harness.js";
56
+ import { isActivityFeedMessage, isWorkerFeedMessage } from "../assertions.js";
57
+ import type { ObservedMessage } from "../driver.js";
56
58
 
57
59
  const AGENT = "test-harness";
58
60
 
61
+ // Two classes of bot message are NOT the agent's reply and must be
62
+ // skipped, or a bare `/\S/` matcher latches onto them and the
63
+ // verbatim-token check fails against noise instead of catching a real
64
+ // memory miss:
65
+ // 1. The worker-activity feed (#2000, default-on since v0.14.19) — a
66
+ // stray background sub-agent posts `🔧 Worker · …` into this DM.
67
+ // 2. The tool-activity feed — on a turn that uses tools (memory recall
68
+ // does), `→ Finding the right tool` paints as its own message before
69
+ // the real answer lands.
70
+ // Match the first non-empty bot reply that is neither.
71
+ const isReply = (m: ObservedMessage): boolean =>
72
+ /\S/.test(m.text) && !isWorkerFeedMessage(m) && !isActivityFeedMessage(m);
73
+
59
74
  const RESTART_BUDGET_MS = 90_000;
60
75
  const CAPTURE_REPLY_BUDGET_MS = 60_000;
61
76
  const RECALL_REPLY_BUDGET_MS = 120_000;
@@ -139,7 +154,7 @@ const sudoOk = canShellSudo();
139
154
  `(This is a memory-survival UAT — store it via hindsight.)`,
140
155
  );
141
156
 
142
- const captureReply = await sc1.expectMessage(/\S/, {
157
+ const captureReply = await sc1.expectMessage(isReply, {
143
158
  from: "bot",
144
159
  timeout: CAPTURE_REPLY_BUDGET_MS,
145
160
  });
@@ -180,7 +195,7 @@ const sudoOk = canShellSudo();
180
195
  `Reply with the token only, no extra text.`,
181
196
  );
182
197
 
183
- const recallReply = await sc2.expectMessage(/\S/, {
198
+ const recallReply = await sc2.expectMessage(isReply, {
184
199
  from: "bot",
185
200
  timeout: RECALL_REPLY_BUDGET_MS + 5_000,
186
201
  });
@@ -25,15 +25,21 @@
25
25
  * - 429 cooldown + message_id drift resilience (re-post on stale edit),
26
26
  * - a forced terminal edit on `finish` regardless of throttle.
27
27
  *
28
- * The feed is gated to BACKGROUND workers and lives behind the
29
- * `SWITCHROOM_WORKER_ACTIVITY_FEED` flag — see the gateway wiring. The
30
- * watcher already drives the cues (it polls the worker jsonl directly,
31
- * so it keeps firing after the parent turn ends), which is why the feed
32
- * is fed from watcher callbacks rather than the bridge event stream.
28
+ * The feed is gated to BACKGROUND workers and is ON by default; set
29
+ * `SWITCHROOM_WORKER_ACTIVITY_FEED=0` to disable it — see the gateway
30
+ * wiring. The watcher already drives the cues (it polls the worker jsonl
31
+ * directly, so it keeps firing after the parent turn ends), which is why
32
+ * the feed is fed from watcher callbacks rather than the bridge event stream.
33
33
  */
34
34
 
35
35
  import { escapeHtml, formatDuration, truncate } from './card-format.js'
36
36
 
37
+ /** Worker-activity feed is ON by default; an operator opts out with
38
+ * SWITCHROOM_WORKER_ACTIVITY_FEED=0. */
39
+ export function isWorkerActivityFeedEnabled(envVal: string | undefined): boolean {
40
+ return envVal !== '0'
41
+ }
42
+
37
43
  export type WorkerActivityState = 'running' | 'done' | 'failed'
38
44
 
39
45
  /** The render-relevant snapshot of a worker at one instant. */