switchroom 0.14.20 → 0.14.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-scheduler/index.js +2 -3
- package/dist/auth-broker/index.js +2 -3
- package/dist/cli/notion-write-pretool.mjs +2 -3
- package/dist/cli/switchroom.js +16 -8
- package/dist/host-control/main.js +2 -3
- package/dist/vault/approvals/kernel-server.js +2 -3
- package/dist/vault/broker/server.js +2 -3
- package/package.json +3 -3
- package/profiles/_base/start.sh.hbs +11 -24
- package/profiles/_shared/telegram-style.md.hbs +2 -2
- package/profiles/default/CLAUDE.md.hbs +4 -1
- package/skills/switchroom-runtime/SKILL.md +6 -16
- package/telegram-plugin/agent-dir.ts +15 -0
- package/telegram-plugin/dist/gateway/gateway.js +655 -514
- package/telegram-plugin/gateway/coalesce-attachments.ts +9 -0
- package/telegram-plugin/gateway/gateway.ts +246 -83
- package/telegram-plugin/gateway/inbound-spool.ts +15 -0
- package/telegram-plugin/gateway/interrupt-defer.ts +6 -0
- package/telegram-plugin/gateway/resume-inbound-builder.ts +180 -0
- package/telegram-plugin/registry/turns-schema.ts +138 -33
- package/telegram-plugin/stream-reply-handler.ts +1 -11
- package/telegram-plugin/tests/agent-dir.test.ts +25 -0
- package/telegram-plugin/tests/coalesce-attachments.test.ts +24 -6
- package/telegram-plugin/tests/e2e.test.ts +2 -77
- package/telegram-plugin/tests/inbound-spool.test.ts +45 -0
- package/telegram-plugin/tests/interrupt-defer.test.ts +13 -0
- package/telegram-plugin/tests/multi-turn-continuity.test.ts +0 -1
- package/telegram-plugin/tests/outbound-ordering.test.ts +0 -1
- package/telegram-plugin/tests/parse-mode-rotation.test.ts +0 -1
- package/telegram-plugin/tests/permission-verdict-resume-guard.test.ts +86 -0
- package/telegram-plugin/tests/races.test.ts +0 -26
- package/telegram-plugin/tests/registry-turns.test.ts +106 -29
- package/telegram-plugin/tests/resume-inbound-builder.test.ts +182 -0
- package/telegram-plugin/tests/status-accent.test.ts +0 -1
- package/telegram-plugin/tests/stream-reply-error-paths.test.ts +0 -1
- package/telegram-plugin/tests/stream-reply-handler.test.ts +0 -24
- package/telegram-plugin/tests/streaming-e2e.test.ts +0 -1
- package/telegram-plugin/tests/streaming-orchestration.test.ts +0 -1
- package/telegram-plugin/tests/tool-activity-summary.test.ts +44 -0
- package/telegram-plugin/tests/turns-writer.test.ts +16 -6
- package/telegram-plugin/tests/worker-activity-feed.test.ts +14 -0
- package/telegram-plugin/tool-activity-summary.ts +55 -0
- package/telegram-plugin/uat/assertions.ts +53 -0
- package/telegram-plugin/uat/driver.ts +30 -0
- package/telegram-plugin/uat/feed-matcher.test.ts +80 -0
- package/telegram-plugin/uat/fixtures/album/blue.jpg +0 -0
- package/telegram-plugin/uat/fixtures/album/green.jpg +0 -0
- package/telegram-plugin/uat/fixtures/album/red.jpg +0 -0
- package/telegram-plugin/uat/scenarios/jtbd-album-coalescing-dm.test.ts +136 -0
- package/telegram-plugin/uat/scenarios/jtbd-memory-survives-restart-dm.test.ts +17 -2
- package/telegram-plugin/worker-activity-feed.ts +11 -5
- package/telegram-plugin/handoff-continuity.ts +0 -206
- package/telegram-plugin/tests/handoff-continuity.test.ts +0 -262
|
@@ -40,7 +40,6 @@ function makeDeps(
|
|
|
40
40
|
markdownToHtml: (t) => realMarkdownToHtml(t),
|
|
41
41
|
escapeMarkdownV2: (t) => t,
|
|
42
42
|
repairEscapedWhitespace: (t) => t,
|
|
43
|
-
takeHandoffPrefix: () => '',
|
|
44
43
|
assertAllowedChat: () => {},
|
|
45
44
|
resolveThreadId: (_, explicit) => (explicit != null ? Number(explicit) : undefined),
|
|
46
45
|
disableLinkPreview: true,
|
|
@@ -35,7 +35,6 @@ function makeDeps(
|
|
|
35
35
|
markdownToHtml: (t) => `<b>${t}</b>`,
|
|
36
36
|
escapeMarkdownV2: (t) => `\\${t}\\`,
|
|
37
37
|
repairEscapedWhitespace: (t) => t,
|
|
38
|
-
takeHandoffPrefix: () => '',
|
|
39
38
|
assertAllowedChat: () => {},
|
|
40
39
|
resolveThreadId: (_, explicit) => (explicit != null ? Number(explicit) : undefined),
|
|
41
40
|
disableLinkPreview: true,
|
|
@@ -104,29 +103,6 @@ describe('handleStreamReply', () => {
|
|
|
104
103
|
expect(bot.api.sendMessage.mock.calls[0][2]?.parse_mode).toBeUndefined()
|
|
105
104
|
})
|
|
106
105
|
|
|
107
|
-
it('prepends handoff prefix on first chunk only', async () => {
|
|
108
|
-
const state = makeState()
|
|
109
|
-
const deps = makeDeps(bot, {
|
|
110
|
-
takeHandoffPrefix: vi.fn<(fmt: string) => string>(() => '↩️ '),
|
|
111
|
-
})
|
|
112
|
-
|
|
113
|
-
// First call: prefix applied
|
|
114
|
-
const p1 = handleStreamReply({ chat_id: '1', text: 'first' }, state, deps)
|
|
115
|
-
await microtaskFlush()
|
|
116
|
-
await p1
|
|
117
|
-
// Prefix is prepended AFTER format rendering (it's already format-safe
|
|
118
|
-
// because takeHandoffPrefix takes the format tag).
|
|
119
|
-
expect(bot.api.sendMessage.mock.calls[0][1]).toBe('↩️ <b>first</b>')
|
|
120
|
-
|
|
121
|
-
// Second call: handoff not consumed again
|
|
122
|
-
vi.advanceTimersByTime(1000)
|
|
123
|
-
const p2 = handleStreamReply({ chat_id: '1', text: 'second' }, state, deps)
|
|
124
|
-
await microtaskFlush()
|
|
125
|
-
await p2
|
|
126
|
-
expect(bot.api.editMessageText.mock.calls[0][2]).toBe('<b>second</b>')
|
|
127
|
-
expect(deps.takeHandoffPrefix).toHaveBeenCalledTimes(1)
|
|
128
|
-
})
|
|
129
|
-
|
|
130
106
|
it('throws when text exceeds 4096 (no silent id:pending)', async () => {
|
|
131
107
|
// Pins the bug found in prod: a >4096-char text would hit draft-
|
|
132
108
|
// stream's length guard, silently stop, and the handler would return
|
|
@@ -88,7 +88,6 @@ function setup(opts: { progressCardActive?: boolean } = {}): Fixture {
|
|
|
88
88
|
markdownToHtml: (t) => `<b>${t}</b>`, // stream_reply: bold
|
|
89
89
|
escapeMarkdownV2: (t) => t,
|
|
90
90
|
repairEscapedWhitespace: (t) => t,
|
|
91
|
-
takeHandoffPrefix: () => '',
|
|
92
91
|
assertAllowedChat: () => {},
|
|
93
92
|
resolveThreadId: () => undefined,
|
|
94
93
|
disableLinkPreview: true,
|
|
@@ -407,7 +407,6 @@ function makeActivityDeps(
|
|
|
407
407
|
markdownToHtml: (t) => t,
|
|
408
408
|
escapeMarkdownV2: (t) => t,
|
|
409
409
|
repairEscapedWhitespace: (t) => t,
|
|
410
|
-
takeHandoffPrefix: () => '',
|
|
411
410
|
assertAllowedChat: () => {},
|
|
412
411
|
resolveThreadId: (_, explicit) => (explicit != null ? Number(explicit) : undefined),
|
|
413
412
|
disableLinkPreview: true,
|
|
@@ -4,7 +4,9 @@ import {
|
|
|
4
4
|
appendActivityLine,
|
|
5
5
|
appendActivityLabel,
|
|
6
6
|
renderActivityFeed,
|
|
7
|
+
renderActivityFeedWithNested,
|
|
7
8
|
MIRROR_MAX_LINES,
|
|
9
|
+
NESTED_MAX_LINES,
|
|
8
10
|
} from "../tool-activity-summary.js";
|
|
9
11
|
|
|
10
12
|
describe("describeToolUse — friendly per-tool rendering (draft-mirror)", () => {
|
|
@@ -143,3 +145,45 @@ describe("appendActivityLabel — precomputed label feed (tool_label path)", ()
|
|
|
143
145
|
expect(lines.length).toBe(2);
|
|
144
146
|
});
|
|
145
147
|
});
|
|
148
|
+
|
|
149
|
+
describe("renderActivityFeedWithNested — foreground sub-agent nesting (Model A)", () => {
|
|
150
|
+
it("with no child lines, is identical to the flat feed", () => {
|
|
151
|
+
const lines = ["Searching memory", "Delegating: review the migration"];
|
|
152
|
+
expect(renderActivityFeedWithNested(lines, [])).toBe(renderActivityFeed(lines));
|
|
153
|
+
// whitespace-only children also collapse to the flat feed
|
|
154
|
+
expect(renderActivityFeedWithNested(lines, [" ", ""])).toBe(renderActivityFeed(lines));
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
it("done-styles ALL parent lines and nests the child block (newest = bold →)", () => {
|
|
158
|
+
const parent = ["Searching memory", "Delegating: review the migration"];
|
|
159
|
+
const child = ["Reading schema.ts", "Looking for foreign keys"];
|
|
160
|
+
const out = renderActivityFeedWithNested(parent, child)!;
|
|
161
|
+
// Parent is blocked at the Task tool → none of its lines is the live step.
|
|
162
|
+
expect(out).toContain("<i>✓ Searching memory</i>");
|
|
163
|
+
expect(out).toContain("<i>✓ Delegating: review the migration</i>");
|
|
164
|
+
expect(out).not.toContain("<b>→ Delegating");
|
|
165
|
+
// The live → step is the newest nested child line; earlier child = italic.
|
|
166
|
+
expect(out).toContain(" ↳ <i>Reading schema.ts</i>");
|
|
167
|
+
expect(out).toContain(" ↳ <b>→ Looking for foreign keys</b>");
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
it("caps the nested block to NESTED_MAX_LINES with a '↳ +N earlier…' header", () => {
|
|
171
|
+
const child = Array.from({ length: NESTED_MAX_LINES + 3 }, (_, i) => `step ${i + 1}`);
|
|
172
|
+
const out = renderActivityFeedWithNested(["Delegating: x"], child)!;
|
|
173
|
+
expect(out).toContain(" ↳ <i>+3 earlier…</i>");
|
|
174
|
+
// newest nested line is the live → step
|
|
175
|
+
expect(out).toContain(` ↳ <b>→ step ${NESTED_MAX_LINES + 3}</b>`);
|
|
176
|
+
// the oldest (collapsed) lines are not rendered verbatim
|
|
177
|
+
expect(out).not.toContain("step 1<");
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
it("renders the child block even when the parent feed is empty", () => {
|
|
181
|
+
const out = renderActivityFeedWithNested([], ["Reading a.ts"]);
|
|
182
|
+
expect(out).toBe(" ↳ <b>→ Reading a.ts</b>");
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
it("HTML-escapes nested child text", () => {
|
|
186
|
+
const out = renderActivityFeedWithNested(["Delegating: x"], ["touch <a> & <b>"])!;
|
|
187
|
+
expect(out).toContain(" ↳ <b>→ touch <a> & <b></b>");
|
|
188
|
+
});
|
|
189
|
+
});
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* 1. Clean turn: insert + finalize → row has ended_via='stop', non-null
|
|
6
6
|
* ended_at, correct previews.
|
|
7
7
|
* 2. Mid-turn restart: insert without finalize, simulate gateway boot via
|
|
8
|
-
*
|
|
8
|
+
* markOrphanedWithTimeoutClassification → row has ended_via='restart'.
|
|
9
9
|
* 3. Multiple concurrent turns same chat: each row has a unique turn_key,
|
|
10
10
|
* no cross-contamination.
|
|
11
11
|
* 4. tool_call_count increments correctly for N tool_use events.
|
|
@@ -20,9 +20,19 @@ import {
|
|
|
20
20
|
openTurnsDbInMemory,
|
|
21
21
|
recordTurnStart,
|
|
22
22
|
recordTurnEnd,
|
|
23
|
-
|
|
23
|
+
markOrphanedWithTimeoutClassification,
|
|
24
24
|
} from '../registry/turns-schema.js'
|
|
25
25
|
|
|
26
|
+
// The boot reaper as the gateway calls it between turns (no live hang
|
|
27
|
+
// marker) — every open turn is a clean 'restart' interrupt.
|
|
28
|
+
function reapAsRestart(db: Parameters<typeof recordTurnEnd>[0]) {
|
|
29
|
+
return markOrphanedWithTimeoutClassification(db, {
|
|
30
|
+
markerTurnKey: null,
|
|
31
|
+
markerAgeMs: null,
|
|
32
|
+
hangThresholdMs: 300_000,
|
|
33
|
+
})
|
|
34
|
+
}
|
|
35
|
+
|
|
26
36
|
// ---------------------------------------------------------------------------
|
|
27
37
|
// 1. Clean turn
|
|
28
38
|
// ---------------------------------------------------------------------------
|
|
@@ -110,7 +120,7 @@ describe('clean turn (Phase 1 #332)', () => {
|
|
|
110
120
|
// ---------------------------------------------------------------------------
|
|
111
121
|
|
|
112
122
|
describe('mid-turn restart (Phase 1 #332)', () => {
|
|
113
|
-
it('insert without finalize, then
|
|
123
|
+
it('insert without finalize, then reaper → ended_via=restart', () => {
|
|
114
124
|
const db = openTurnsDbInMemory()
|
|
115
125
|
|
|
116
126
|
recordTurnStart(db, {
|
|
@@ -120,8 +130,8 @@ describe('mid-turn restart (Phase 1 #332)', () => {
|
|
|
120
130
|
})
|
|
121
131
|
|
|
122
132
|
// Simulate gateway boot reaper (same path as the real gateway boot).
|
|
123
|
-
const swept =
|
|
124
|
-
expect(swept).toBe(1)
|
|
133
|
+
const swept = reapAsRestart(db)
|
|
134
|
+
expect(swept.reaped).toBe(1)
|
|
125
135
|
|
|
126
136
|
const row = db
|
|
127
137
|
.prepare('SELECT ended_via, ended_at FROM turns WHERE turn_key = ?')
|
|
@@ -141,7 +151,7 @@ describe('mid-turn restart (Phase 1 #332)', () => {
|
|
|
141
151
|
recordTurnEnd(db, { turnKey: 'chat2:_:2001', endedVia: 'stop' })
|
|
142
152
|
recordTurnStart(db, { turnKey: 'chat2:_:2002', chatId: 'chat2' })
|
|
143
153
|
|
|
144
|
-
|
|
154
|
+
reapAsRestart(db)
|
|
145
155
|
|
|
146
156
|
const clean = db
|
|
147
157
|
.prepare('SELECT ended_via FROM turns WHERE turn_key = ?')
|
|
@@ -2,10 +2,24 @@ import { describe, it, expect } from 'vitest'
|
|
|
2
2
|
import {
|
|
3
3
|
renderWorkerActivity,
|
|
4
4
|
createWorkerActivityFeed,
|
|
5
|
+
isWorkerActivityFeedEnabled,
|
|
5
6
|
type WorkerActivityView,
|
|
6
7
|
type BotApiForWorkerFeed,
|
|
7
8
|
} from '../worker-activity-feed.js'
|
|
8
9
|
|
|
10
|
+
describe('isWorkerActivityFeedEnabled (default ON)', () => {
|
|
11
|
+
it('defaults to true when the env var is unset', () => {
|
|
12
|
+
expect(isWorkerActivityFeedEnabled(undefined)).toBe(true)
|
|
13
|
+
})
|
|
14
|
+
it('stays on for any value other than "0"', () => {
|
|
15
|
+
expect(isWorkerActivityFeedEnabled('1')).toBe(true)
|
|
16
|
+
expect(isWorkerActivityFeedEnabled('')).toBe(true)
|
|
17
|
+
})
|
|
18
|
+
it('only "0" disables it', () => {
|
|
19
|
+
expect(isWorkerActivityFeedEnabled('0')).toBe(false)
|
|
20
|
+
})
|
|
21
|
+
})
|
|
22
|
+
|
|
9
23
|
function view(partial: Partial<WorkerActivityView> = {}): WorkerActivityView {
|
|
10
24
|
return {
|
|
11
25
|
description: 'research competitors',
|
|
@@ -216,6 +216,61 @@ export function renderActivityFeed(lines: string[]): string | null {
|
|
|
216
216
|
return out.join("\n");
|
|
217
217
|
}
|
|
218
218
|
|
|
219
|
+
// ─── Foreground sub-agent nesting (Model A) ─────────────────────────────────
|
|
220
|
+
//
|
|
221
|
+
// A foreground sub-agent (Task/Agent with no `run_in_background`) runs INSIDE
|
|
222
|
+
// the parent's turn — the parent is blocked at the Task tool until it returns.
|
|
223
|
+
// Rather than a separate message, its live steps nest under the parent's own
|
|
224
|
+
// activity feed: the gold-standard main-turn visibility applied one level
|
|
225
|
+
// down. The parent's lines render as done (the parent handed off; it isn't
|
|
226
|
+
// the active worker), and the sub-agent's recent narrative lines render as an
|
|
227
|
+
// indented `↳` block with the newest as the in-progress `→` step.
|
|
228
|
+
|
|
229
|
+
/** Trailing nested child lines kept visible (Telegram length + readability). */
|
|
230
|
+
export const NESTED_MAX_LINES = 4;
|
|
231
|
+
/** Hard cap on a single nested narrative line. */
|
|
232
|
+
const NESTED_LINE_MAX = 90;
|
|
233
|
+
/** Indent marker for a nested sub-agent step. */
|
|
234
|
+
const NESTED_PREFIX = " ↳ ";
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* Render the parent activity feed with an active foreground sub-agent's steps
|
|
238
|
+
* nested beneath it. When `childLines` is empty this is identical to
|
|
239
|
+
* `renderActivityFeed(lines)`. Otherwise the parent's own lines are all
|
|
240
|
+
* done-styled (`✓` italic) — the live `→` step lives in the nested block —
|
|
241
|
+
* and the child block is indented, newest = bold `→`, earlier = italic, with
|
|
242
|
+
* a `↳ +N earlier…` header when it overflows. Returns ready Telegram HTML
|
|
243
|
+
* (callers must NOT re-escape) or null when there is nothing to show.
|
|
244
|
+
*/
|
|
245
|
+
export function renderActivityFeedWithNested(
|
|
246
|
+
lines: string[],
|
|
247
|
+
childLines: string[],
|
|
248
|
+
): string | null {
|
|
249
|
+
const children = childLines.map((s) => s.trim()).filter((s) => s.length > 0);
|
|
250
|
+
if (children.length === 0) return renderActivityFeed(lines);
|
|
251
|
+
|
|
252
|
+
const out: string[] = [];
|
|
253
|
+
const shownParent = lines.slice(-MIRROR_MAX_LINES);
|
|
254
|
+
const hiddenParent = lines.length - shownParent.length;
|
|
255
|
+
if (hiddenParent > 0) out.push(`<i>✓ +${hiddenParent} earlier…</i>`);
|
|
256
|
+
for (const l of shownParent) out.push(`<i>✓ ${escapeFeedHtml(l)}</i>`);
|
|
257
|
+
|
|
258
|
+
const shownChild = children.slice(-NESTED_MAX_LINES);
|
|
259
|
+
const hiddenChild = children.length - shownChild.length;
|
|
260
|
+
if (hiddenChild > 0) out.push(`${NESTED_PREFIX}<i>+${hiddenChild} earlier…</i>`);
|
|
261
|
+
const lastChildIdx = shownChild.length - 1;
|
|
262
|
+
shownChild.forEach((l, i) => {
|
|
263
|
+
const t = l.length > NESTED_LINE_MAX ? l.slice(0, NESTED_LINE_MAX - 1) + "…" : l;
|
|
264
|
+
const esc = escapeFeedHtml(t);
|
|
265
|
+
out.push(
|
|
266
|
+
i === lastChildIdx
|
|
267
|
+
? `${NESTED_PREFIX}<b>→ ${esc}</b>`
|
|
268
|
+
: `${NESTED_PREFIX}<i>${esc}</i>`,
|
|
269
|
+
);
|
|
270
|
+
});
|
|
271
|
+
return out.length > 0 ? out.join("\n") : null;
|
|
272
|
+
}
|
|
273
|
+
|
|
219
274
|
/**
|
|
220
275
|
* Like appendActivityLine, but for a pre-computed label (from the
|
|
221
276
|
* real-time PreToolUse sidecar / `tool_label` event) — the hook already
|
|
@@ -11,6 +11,59 @@
|
|
|
11
11
|
|
|
12
12
|
import type { Driver, ObservedMessage, ObservedReaction } from "./driver.js";
|
|
13
13
|
|
|
14
|
+
/**
|
|
15
|
+
* Canonical shape of a worker-activity-feed message (#2000) as rendered
|
|
16
|
+
* in Telegram: a running header `🔧 Worker · …` that edits in place and
|
|
17
|
+
* finalizes to `✅ Worker done · …` / `⚠️ Worker failed · …`. The feed is
|
|
18
|
+
* default-on fleet-wide as of v0.14.19, so background sub-agent activity
|
|
19
|
+
* now surfaces as its own bot message in any chat — including DMs whose
|
|
20
|
+
* scenario only cares about the agent's conversational reply.
|
|
21
|
+
*
|
|
22
|
+
* Single source of truth; the worker-feed scenario asserts against this,
|
|
23
|
+
* and recall/reply scenarios exclude it via {@link isWorkerFeedMessage}.
|
|
24
|
+
*/
|
|
25
|
+
export const WORKER_FEED_RE = /🔧\s*Worker|✅\s*Worker done|⚠️\s*Worker failed|Worker (?:done|failed)/i;
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* True when `m` is a worker-activity-feed message rather than the agent's
|
|
29
|
+
* own reply. Use it to skip feed noise when matching for a turn's actual
|
|
30
|
+
* answer — without it, an `expectMessage(/\S/)` can latch onto the feed's
|
|
31
|
+
* first paint and miss (or mis-time) the real reply. See #2000 / the
|
|
32
|
+
* memory-survives-restart recall scenario.
|
|
33
|
+
*/
|
|
34
|
+
export function isWorkerFeedMessage(m: ObservedMessage): boolean {
|
|
35
|
+
return WORKER_FEED_RE.test(m.text);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* A single tool-activity-feed line as rendered by
|
|
40
|
+
* `renderActivityFeed` (telegram-plugin/tool-activity-summary.ts): the
|
|
41
|
+
* in-progress step is `→ <label>`, finished steps are `✓ <label>`, and a
|
|
42
|
+
* long turn gets a `✓ +N earlier…` header. Telegram strips the bold/italic
|
|
43
|
+
* wrapping, so the observed text is just the marker glyph + label.
|
|
44
|
+
*/
|
|
45
|
+
const ACTIVITY_FEED_LINE_RE = /^[→✓]\s/u;
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* True when `m` is the live tool-activity feed (the one-message list of
|
|
49
|
+
* "what the agent is doing this turn") rather than the agent's reply. A
|
|
50
|
+
* message qualifies only when EVERY non-empty line is an activity line —
|
|
51
|
+
* so a real reply that merely contains an arrow is never misclassified.
|
|
52
|
+
*
|
|
53
|
+
* Recall/reply scenarios must skip this in addition to
|
|
54
|
+
* {@link isWorkerFeedMessage}: on a turn that uses tools, the feed paints
|
|
55
|
+
* `→ Finding the right tool` as its own bot message before the real answer
|
|
56
|
+
* lands, and an `expectMessage(/\S/)` would otherwise latch onto it.
|
|
57
|
+
*/
|
|
58
|
+
export function isActivityFeedMessage(m: ObservedMessage): boolean {
|
|
59
|
+
const lines = m.text
|
|
60
|
+
.split("\n")
|
|
61
|
+
.map((l) => l.trim())
|
|
62
|
+
.filter((l) => l.length > 0);
|
|
63
|
+
if (lines.length === 0) return false;
|
|
64
|
+
return lines.every((l) => ACTIVITY_FEED_LINE_RE.test(l));
|
|
65
|
+
}
|
|
66
|
+
|
|
14
67
|
export interface PollOptions {
|
|
15
68
|
/** Hard deadline; the predicate must resolve truthy before this. */
|
|
16
69
|
timeout: number;
|
|
@@ -646,6 +646,36 @@ export class Driver {
|
|
|
646
646
|
return { messageId: sent.id };
|
|
647
647
|
}
|
|
648
648
|
|
|
649
|
+
/**
|
|
650
|
+
* Send a photo album (Telegram media_group) — multiple photos posted as
|
|
651
|
+
* one group, the way a forwarded album or a multi-image paste arrives.
|
|
652
|
+
* Exercises the gateway's A2 multi-attachment coalescing: with
|
|
653
|
+
* coalesce.max_attachments default 10, the whole album folds into ONE
|
|
654
|
+
* Claude turn (the agent sees image_path, image_path_2, …). The optional
|
|
655
|
+
* caption rides on the first item, matching Telegram client behaviour.
|
|
656
|
+
* Returns every sent message id (one per album item).
|
|
657
|
+
*/
|
|
658
|
+
async sendAlbum(
|
|
659
|
+
chatId: number,
|
|
660
|
+
photoPaths: string[],
|
|
661
|
+
caption?: string,
|
|
662
|
+
opts?: SendTextOptions,
|
|
663
|
+
): Promise<{ messageIds: number[] }> {
|
|
664
|
+
const c = this.requireClient();
|
|
665
|
+
const replyTo = opts?.replyTo ?? opts?.messageThreadId;
|
|
666
|
+
// mtcute reads a bare string as a file_id/URL; the `file:` scheme is
|
|
667
|
+
// what forces an upload from local disk (see normalize-input-media).
|
|
668
|
+
const medias = photoPaths.map((p, i) =>
|
|
669
|
+
InputMedia.photo(`file:${p}`, i === 0 && caption ? { caption } : undefined),
|
|
670
|
+
);
|
|
671
|
+
const sent = await c.sendMediaGroup(
|
|
672
|
+
chatId,
|
|
673
|
+
medias,
|
|
674
|
+
replyTo ? { replyTo } : undefined,
|
|
675
|
+
);
|
|
676
|
+
return { messageIds: sent.map((m) => m.id) };
|
|
677
|
+
}
|
|
678
|
+
|
|
649
679
|
/**
|
|
650
680
|
* Send or remove an emoji reaction on a target message. Used by the
|
|
651
681
|
* UAT reaction-trigger scenario (#1074) to exercise the gateway's
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import { describe, expect, it } from "bun:test";
|
|
2
|
+
import {
|
|
3
|
+
isActivityFeedMessage,
|
|
4
|
+
isWorkerFeedMessage,
|
|
5
|
+
WORKER_FEED_RE,
|
|
6
|
+
} from "./assertions.js";
|
|
7
|
+
|
|
8
|
+
// Pins the worker-activity-feed detector (#2000) used by recall/reply
|
|
9
|
+
// scenarios to skip feed noise. The live UAT it guards can't run in CI
|
|
10
|
+
// (needs sudo + a real Telegram session), so this is the CI-verifiable
|
|
11
|
+
// floor for the matcher's behavior.
|
|
12
|
+
const feed = (text: string) => ({ text }) as Parameters<typeof isWorkerFeedMessage>[0];
|
|
13
|
+
|
|
14
|
+
describe("isWorkerFeedMessage", () => {
|
|
15
|
+
it("matches the running feed header", () => {
|
|
16
|
+
expect(isWorkerFeedMessage(feed("🔧 Worker · crawling changelog · 0:12"))).toBe(true);
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
it("matches the terminal done/failed recaps", () => {
|
|
20
|
+
expect(isWorkerFeedMessage(feed("✅ Worker done · 10 tools · 1:03"))).toBe(true);
|
|
21
|
+
expect(isWorkerFeedMessage(feed("⚠️ Worker failed · 3 tools"))).toBe(true);
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it("matches a done/failed header even without the leading emoji", () => {
|
|
25
|
+
expect(isWorkerFeedMessage(feed("Worker done · 2 tools"))).toBe(true);
|
|
26
|
+
expect(isWorkerFeedMessage(feed("Worker failed mid-step"))).toBe(true);
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
it("does NOT match an ordinary agent reply", () => {
|
|
30
|
+
expect(isWorkerFeedMessage(feed("on it, pulling the logs now"))).toBe(false);
|
|
31
|
+
expect(
|
|
32
|
+
isWorkerFeedMessage(feed("SWITCHROOM_UAT_MEM_DEADBEEFCAFE1234")),
|
|
33
|
+
).toBe(false);
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
it("does NOT match a reply that merely mentions the word worker", () => {
|
|
37
|
+
expect(
|
|
38
|
+
isWorkerFeedMessage(feed("I'll dispatch a worker to handle the crawl.")),
|
|
39
|
+
).toBe(false);
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it("exposes the regex for scenarios that assert on the feed directly", () => {
|
|
43
|
+
expect(WORKER_FEED_RE.test("🔧 Worker · x")).toBe(true);
|
|
44
|
+
});
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
describe("isActivityFeedMessage", () => {
|
|
48
|
+
it("matches the in-progress step line", () => {
|
|
49
|
+
expect(isActivityFeedMessage(feed("→ Finding the right tool"))).toBe(true);
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it("matches a multi-line feed (done steps + in-progress)", () => {
|
|
53
|
+
expect(
|
|
54
|
+
isActivityFeedMessage(feed("✓ Reading CLAUDE.md\n→ Searching memory")),
|
|
55
|
+
).toBe(true);
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
it("matches the +N earlier header", () => {
|
|
59
|
+
expect(
|
|
60
|
+
isActivityFeedMessage(feed("✓ +3 earlier…\n✓ Reading CLAUDE.md\n→ Searching memory")),
|
|
61
|
+
).toBe(true);
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
it("does NOT match an ordinary agent reply", () => {
|
|
65
|
+
expect(isActivityFeedMessage(feed("on it, pulling the logs now"))).toBe(false);
|
|
66
|
+
expect(
|
|
67
|
+
isActivityFeedMessage(feed("SWITCHROOM_UAT_MEM_DEADBEEFCAFE1234")),
|
|
68
|
+
).toBe(false);
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
it("does NOT match a reply that merely contains an arrow mid-text", () => {
|
|
72
|
+
expect(
|
|
73
|
+
isActivityFeedMessage(feed("The flow is request → response → render.")),
|
|
74
|
+
).toBe(false);
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
it("does NOT match an empty message", () => {
|
|
78
|
+
expect(isActivityFeedMessage(feed(" "))).toBe(false);
|
|
79
|
+
});
|
|
80
|
+
});
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Album-coalescing scenario — driver sends a 3-photo Telegram album
|
|
3
|
+
* (media_group) in one shot; the gateway's A2 multi-attachment
|
|
4
|
+
* coalescing (coalesce.max_attachments, default 10 since v0.14.21)
|
|
5
|
+
* MUST fold all three into a SINGLE Claude turn, so the agent sees
|
|
6
|
+
* image_path + image_path_2 + image_path_3 together and can report a
|
|
7
|
+
* count of 3.
|
|
8
|
+
*
|
|
9
|
+
* Regression gate for the default-on flip (#2021): before max_attachments
|
|
10
|
+
* defaulted to 10, an album bypassed coalescing (each part its own turn),
|
|
11
|
+
* so the agent would only ever see ONE image per turn and answer "1".
|
|
12
|
+
* A reply of "3" proves the album coalesced.
|
|
13
|
+
*
|
|
14
|
+
* Part of: https://github.com/switchroom/switchroom/issues/865
|
|
15
|
+
*
|
|
16
|
+
* ## How the signal is read robustly
|
|
17
|
+
*
|
|
18
|
+
* The agent's answer is a bare count, which would collide with incidental
|
|
19
|
+
* digits the chat is now full of by default: the pinned progress card's
|
|
20
|
+
* timer (`00:03`) and — since the worker feed went default-on fleet-wide
|
|
21
|
+
* (#2009 / v0.14.19) — worker-feed lines like `🔧 Worker · … · 0:12`.
|
|
22
|
+
* A "first bot message containing a digit" matcher would latch onto one of
|
|
23
|
+
* those and false-fail even when coalescing is healthy (see the
|
|
24
|
+
* memory-survives-restart matcher-flake note + `isWorkerFeedMessage`).
|
|
25
|
+
*
|
|
26
|
+
* Two defences, mirroring the sibling `jtbd-forwarded-burst-dm` gate:
|
|
27
|
+
* 1. Anchor the answer on a distinctive token — the agent is told to
|
|
28
|
+
* reply `IMAGECOUNT=<n>`. "IMAGECOUNT" never appears in a card or a
|
|
29
|
+
* worker-feed line, so the matcher cannot collide with their digits.
|
|
30
|
+
* 2. Drain observed messages into a per-id map (collapsing streamed
|
|
31
|
+
* edits to latest text), skip our own sends + worker-feed noise, and
|
|
32
|
+
* poll until the ANSWER token appears — rather than returning the
|
|
33
|
+
* first message that happens to match.
|
|
34
|
+
*
|
|
35
|
+
* - Coalesced → one turn sees 3 images → `IMAGECOUNT=3`.
|
|
36
|
+
* - Non-coalesced → the turn carrying the caption/question sees only its
|
|
37
|
+
* own (first) image → `IMAGECOUNT=1`. Surfaced as an explicit failure.
|
|
38
|
+
*
|
|
39
|
+
* Fixtures: three tiny solid-colour JPEGs under fixtures/album/, committed
|
|
40
|
+
* so the gate runs without a generation step. (Regenerate with
|
|
41
|
+
* `ffmpeg -f lavfi -i color=c=red:s=320x240 -frames:v 1 red.jpg`.)
|
|
42
|
+
*/
|
|
43
|
+
|
|
44
|
+
import path from "node:path";
|
|
45
|
+
import { existsSync } from "node:fs";
|
|
46
|
+
import { describe, expect, it } from "vitest";
|
|
47
|
+
import { spinUp } from "../harness.js";
|
|
48
|
+
import { pollUntil, isWorkerFeedMessage } from "../assertions.js";
|
|
49
|
+
import type { ObservedMessage } from "../driver.js";
|
|
50
|
+
|
|
51
|
+
const AGENT = "test-harness";
|
|
52
|
+
|
|
53
|
+
const FIXTURE_DIR = path.resolve(__dirname, "..", "fixtures", "album");
|
|
54
|
+
const PHOTOS = ["red.jpg", "green.jpg", "blue.jpg"].map((f) =>
|
|
55
|
+
path.join(FIXTURE_DIR, f),
|
|
56
|
+
);
|
|
57
|
+
|
|
58
|
+
const CAPTION =
|
|
59
|
+
"I just sent you a photo album in a single message. Count the separate " +
|
|
60
|
+
"image files you received in THIS ONE incoming message and reply with " +
|
|
61
|
+
"ONLY the token IMAGECOUNT=<n> (e.g. IMAGECOUNT=3). Nothing else.";
|
|
62
|
+
|
|
63
|
+
// Warm TTFO on test-harness is ~7s; an album adds the (sub-second)
|
|
64
|
+
// coalesce window plus the model looking at three images.
|
|
65
|
+
const ANSWER_TIMEOUT_MS = 90_000;
|
|
66
|
+
|
|
67
|
+
// Pull the count out of an `IMAGECOUNT=<n>` token, tolerating "= : whitespace".
|
|
68
|
+
function imageCountIn(text: string): number | undefined {
|
|
69
|
+
const m = text.match(/IMAGECOUNT\s*[:=]?\s*(\d+)/i);
|
|
70
|
+
return m ? Number.parseInt(m[1], 10) : undefined;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
describe("uat: album-coalescing DM round-trip", () => {
|
|
74
|
+
it(
|
|
75
|
+
"a 3-photo album folds into ONE turn — agent reports seeing 3 images",
|
|
76
|
+
async () => {
|
|
77
|
+
for (const p of PHOTOS) {
|
|
78
|
+
if (!existsSync(p)) {
|
|
79
|
+
throw new Error(
|
|
80
|
+
`album fixture missing at ${p} — see scenario header to regenerate`,
|
|
81
|
+
);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
const sc = await spinUp({ agent: AGENT });
|
|
85
|
+
try {
|
|
86
|
+
// Observe BEFORE sending — observeMessages only sees live updates.
|
|
87
|
+
// Drain into a per-id map so streamed edits collapse to latest text;
|
|
88
|
+
// skip our own sends and worker-feed noise so neither can satisfy
|
|
89
|
+
// the count matcher.
|
|
90
|
+
const latestById = new Map<number, ObservedMessage>();
|
|
91
|
+
const stream = sc.driver.observeMessages(sc.botUserId);
|
|
92
|
+
const consume = (async () => {
|
|
93
|
+
for await (const m of stream) {
|
|
94
|
+
if (m.senderUserId === sc.driverUserId) continue;
|
|
95
|
+
if (isWorkerFeedMessage(m)) continue;
|
|
96
|
+
latestById.set(m.messageId, m);
|
|
97
|
+
}
|
|
98
|
+
})();
|
|
99
|
+
|
|
100
|
+
await sc.driver.sendAlbum(sc.botUserId, PHOTOS, CAPTION);
|
|
101
|
+
|
|
102
|
+
// Poll until a bot message carries an IMAGECOUNT token.
|
|
103
|
+
const answer = await pollUntil(
|
|
104
|
+
() => {
|
|
105
|
+
for (const m of latestById.values()) {
|
|
106
|
+
if (imageCountIn(m.text) !== undefined) return m;
|
|
107
|
+
}
|
|
108
|
+
return undefined;
|
|
109
|
+
},
|
|
110
|
+
{ timeout: ANSWER_TIMEOUT_MS, interval: 500 },
|
|
111
|
+
).catch(() => undefined);
|
|
112
|
+
|
|
113
|
+
await stream[Symbol.asyncIterator]().return?.(undefined as never);
|
|
114
|
+
await consume;
|
|
115
|
+
|
|
116
|
+
if (!answer) {
|
|
117
|
+
const seen = [...latestById.values()]
|
|
118
|
+
.map((m) => `#${m.messageId}=${JSON.stringify(m.text.slice(0, 60))}`)
|
|
119
|
+
.join(" ");
|
|
120
|
+
throw new Error(
|
|
121
|
+
`[album-coalescing] No bot reply carried an IMAGECOUNT token ` +
|
|
122
|
+
`within ${ANSWER_TIMEOUT_MS}ms. Bot messages seen: ${seen || "(none)"}.`,
|
|
123
|
+
);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
const count = imageCountIn(answer.text);
|
|
127
|
+
// Coalesced => 3. A non-coalescing gateway answers 1 (the question
|
|
128
|
+
// rides photo #1; the other two parts spill to their own turns).
|
|
129
|
+
expect(count).toBe(3);
|
|
130
|
+
} finally {
|
|
131
|
+
await sc.tearDown();
|
|
132
|
+
}
|
|
133
|
+
},
|
|
134
|
+
ANSWER_TIMEOUT_MS + 30_000,
|
|
135
|
+
);
|
|
136
|
+
});
|
|
@@ -53,9 +53,24 @@ import { describe, it, expect, beforeAll } from "vitest";
|
|
|
53
53
|
import { execSync } from "node:child_process";
|
|
54
54
|
import { randomBytes } from "node:crypto";
|
|
55
55
|
import { spinUp } from "../harness.js";
|
|
56
|
+
import { isActivityFeedMessage, isWorkerFeedMessage } from "../assertions.js";
|
|
57
|
+
import type { ObservedMessage } from "../driver.js";
|
|
56
58
|
|
|
57
59
|
const AGENT = "test-harness";
|
|
58
60
|
|
|
61
|
+
// Two classes of bot message are NOT the agent's reply and must be
|
|
62
|
+
// skipped, or a bare `/\S/` matcher latches onto them and the
|
|
63
|
+
// verbatim-token check fails against noise instead of catching a real
|
|
64
|
+
// memory miss:
|
|
65
|
+
// 1. The worker-activity feed (#2000, default-on since v0.14.19) — a
|
|
66
|
+
// stray background sub-agent posts `🔧 Worker · …` into this DM.
|
|
67
|
+
// 2. The tool-activity feed — on a turn that uses tools (memory recall
|
|
68
|
+
// does), `→ Finding the right tool` paints as its own message before
|
|
69
|
+
// the real answer lands.
|
|
70
|
+
// Match the first non-empty bot reply that is neither.
|
|
71
|
+
const isReply = (m: ObservedMessage): boolean =>
|
|
72
|
+
/\S/.test(m.text) && !isWorkerFeedMessage(m) && !isActivityFeedMessage(m);
|
|
73
|
+
|
|
59
74
|
const RESTART_BUDGET_MS = 90_000;
|
|
60
75
|
const CAPTURE_REPLY_BUDGET_MS = 60_000;
|
|
61
76
|
const RECALL_REPLY_BUDGET_MS = 120_000;
|
|
@@ -139,7 +154,7 @@ const sudoOk = canShellSudo();
|
|
|
139
154
|
`(This is a memory-survival UAT — store it via hindsight.)`,
|
|
140
155
|
);
|
|
141
156
|
|
|
142
|
-
const captureReply = await sc1.expectMessage(
|
|
157
|
+
const captureReply = await sc1.expectMessage(isReply, {
|
|
143
158
|
from: "bot",
|
|
144
159
|
timeout: CAPTURE_REPLY_BUDGET_MS,
|
|
145
160
|
});
|
|
@@ -180,7 +195,7 @@ const sudoOk = canShellSudo();
|
|
|
180
195
|
`Reply with the token only, no extra text.`,
|
|
181
196
|
);
|
|
182
197
|
|
|
183
|
-
const recallReply = await sc2.expectMessage(
|
|
198
|
+
const recallReply = await sc2.expectMessage(isReply, {
|
|
184
199
|
from: "bot",
|
|
185
200
|
timeout: RECALL_REPLY_BUDGET_MS + 5_000,
|
|
186
201
|
});
|
|
@@ -25,15 +25,21 @@
|
|
|
25
25
|
* - 429 cooldown + message_id drift resilience (re-post on stale edit),
|
|
26
26
|
* - a forced terminal edit on `finish` regardless of throttle.
|
|
27
27
|
*
|
|
28
|
-
* The feed is gated to BACKGROUND workers and
|
|
29
|
-
* `SWITCHROOM_WORKER_ACTIVITY_FEED`
|
|
30
|
-
* watcher already drives the cues (it polls the worker jsonl
|
|
31
|
-
* so it keeps firing after the parent turn ends), which is why
|
|
32
|
-
* is fed from watcher callbacks rather than the bridge event stream.
|
|
28
|
+
* The feed is gated to BACKGROUND workers and is ON by default; set
|
|
29
|
+
* `SWITCHROOM_WORKER_ACTIVITY_FEED=0` to disable it — see the gateway
|
|
30
|
+
* wiring. The watcher already drives the cues (it polls the worker jsonl
|
|
31
|
+
* directly, so it keeps firing after the parent turn ends), which is why
|
|
32
|
+
* the feed is fed from watcher callbacks rather than the bridge event stream.
|
|
33
33
|
*/
|
|
34
34
|
|
|
35
35
|
import { escapeHtml, formatDuration, truncate } from './card-format.js'
|
|
36
36
|
|
|
37
|
+
/** Worker-activity feed is ON by default; an operator opts out with
|
|
38
|
+
* SWITCHROOM_WORKER_ACTIVITY_FEED=0. */
|
|
39
|
+
export function isWorkerActivityFeedEnabled(envVal: string | undefined): boolean {
|
|
40
|
+
return envVal !== '0'
|
|
41
|
+
}
|
|
42
|
+
|
|
37
43
|
export type WorkerActivityState = 'running' | 'done' | 'failed'
|
|
38
44
|
|
|
39
45
|
/** The render-relevant snapshot of a worker at one instant. */
|