switchroom 0.14.26 → 0.14.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. package/dist/cli/switchroom.js +20 -4
  2. package/dist/host-control/main.js +2 -2
  3. package/package.json +1 -1
  4. package/telegram-plugin/bridge/bridge.ts +15 -0
  5. package/telegram-plugin/card-format.ts +65 -0
  6. package/telegram-plugin/dist/bridge/bridge.js +14 -0
  7. package/telegram-plugin/dist/gateway/gateway.js +2201 -1748
  8. package/telegram-plugin/dist/server.js +14 -0
  9. package/telegram-plugin/gateway/gateway.ts +458 -13
  10. package/telegram-plugin/gateway/worker-feed-dispatch.ts +1 -1
  11. package/telegram-plugin/history.ts +16 -4
  12. package/telegram-plugin/permission-title.ts +48 -0
  13. package/telegram-plugin/secret-detect/patterns.ts +8 -0
  14. package/telegram-plugin/secret-detect/redact.ts +76 -0
  15. package/telegram-plugin/tests/card-format.test.ts +96 -0
  16. package/telegram-plugin/tests/gateway-outbound-redact.test.ts +80 -0
  17. package/telegram-plugin/tests/gateway-request-secret.test.ts +78 -0
  18. package/telegram-plugin/tests/history.test.ts +59 -0
  19. package/telegram-plugin/tests/permission-title.test.ts +68 -0
  20. package/telegram-plugin/tests/permission-verdict-resume-guard.test.ts +35 -0
  21. package/telegram-plugin/tests/secret-detect-sanctum.test.ts +115 -0
  22. package/telegram-plugin/tests/worker-activity-feed.test.ts +110 -51
  23. package/telegram-plugin/uat/assertions.ts +8 -6
  24. package/telegram-plugin/uat/feed-matcher.test.ts +14 -8
  25. package/telegram-plugin/uat/scenarios/jtbd-request-secret-dm.test.ts +101 -0
  26. package/telegram-plugin/uat/scenarios/jtbd-worker-activity-feed-dm.test.ts +17 -6
  27. package/telegram-plugin/worker-activity-feed.ts +84 -46
@@ -0,0 +1,115 @@
1
+ import { describe, it, expect } from 'vitest'
2
+ import { detectSecrets } from '../secret-detect/index.js'
3
+ import { redact } from '../secret-detect/redact.js'
4
+
5
+ /**
6
+ * Regression for the 2026-06-01 incident: a live Laravel Sanctum / Coolify
7
+ * personal-access token (`<id>|<40-char base62>`, e.g. `17|aB3d…`) pasted by
8
+ * a USER into chat slipped every existing pattern and persisted in plaintext.
9
+ *
10
+ * Diagnosis: the inbound gate (gateway handleInbound) already runs
11
+ * `detectSecrets` at ingest, fail-closed, BEFORE recordInbound + IPC dispatch
12
+ * — so the leak was NOT an inbound bypass and NOT render-time masking. It was
13
+ * pure PATTERN COVERAGE: no rule matched the `<id>|<token>` shape. These tests
14
+ * pin the new `laravel_sanctum_token` rule and the redactor that backs both
15
+ * the history-store persistence (both directions) and the issues pipeline.
16
+ */
17
+
18
+ // Build token fixtures by concatenation so the source file never contains a
19
+ // contiguous secret-shaped literal (repo Push Protection / no-pii lint).
20
+ const ID = '17'
21
+ const BODY40 = 'aB3dE6fH9j'.repeat(4) // 40 base62 chars — Sanctum's Str::random(40)
22
+ const SANCTUM = `${ID}|${BODY40}` // e.g. 17|aB3dE6fH9j…
23
+
24
+ describe('laravel/coolify sanctum token detection', () => {
25
+ it('detects <id>|<40-char> as a high-confidence laravel_sanctum_token', () => {
26
+ const hits = detectSecrets(`here is the token: ${SANCTUM}`)
27
+ const hit = hits.find((d) => d.rule_id === 'laravel_sanctum_token')
28
+ expect(hit).toBeDefined()
29
+ expect(hit!.matched_text).toBe(SANCTUM)
30
+ expect(hit!.confidence).toBe('high')
31
+ expect(hit!.suppressed).toBe(false)
32
+ })
33
+
34
+ it('satisfies the exact inbound-gate predicate (high + not suppressed)', () => {
35
+ // This is verbatim the condition gateway handleInbound uses to decide to
36
+ // delete + defer-to-vault: if it is truthy, the pasted token is
37
+ // intercepted before recordInbound() and the IPC broadcast to the agent.
38
+ const detections = detectSecrets(SANCTUM)
39
+ const gateHit = detections.find((d) => d.confidence === 'high' && !d.suppressed)
40
+ expect(gateHit).toBeDefined()
41
+ })
42
+
43
+ it('redact() masks the token in place, preserving surrounding prose', () => {
44
+ const out = redact(`new token: ${SANCTUM}\nuse it for the deploy API`)
45
+ expect(out).not.toContain(SANCTUM)
46
+ expect(out).not.toContain(BODY40)
47
+ expect(out).toContain('[REDACTED:laravel_sanctum_token]')
48
+ // Surrounding prose is untouched.
49
+ expect(out).toContain('new token:')
50
+ expect(out).toContain('use it for the deploy API')
51
+ })
52
+
53
+ it('covers longer-than-40 token bodies', () => {
54
+ const longTok = `42|${'Xy7Zk2Qp9w'.repeat(6)}` // 60 base62 chars
55
+ const hits = detectSecrets(longTok)
56
+ expect(hits.find((d) => d.rule_id === 'laravel_sanctum_token')?.matched_text).toBe(longTok)
57
+ })
58
+
59
+ it('catches a token mid-sentence and masks only the token', () => {
60
+ const out = redact(`use ${SANCTUM} when deploying, ok?`)
61
+ expect(out).not.toContain(SANCTUM)
62
+ expect(out).toContain('use ')
63
+ expect(out).toContain(' when deploying, ok?')
64
+ })
65
+
66
+ it('catches multiple tokens in one message (redact right-to-left loop)', () => {
67
+ const second = `88|${'mK2pR7vT4x'.repeat(4)}` // distinct <id>|<40 base62>
68
+ const detected = detectSecrets(`old ${SANCTUM} new ${second}`)
69
+ .filter((d) => d.rule_id === 'laravel_sanctum_token')
70
+ expect(detected).toHaveLength(2)
71
+ const out = redact(`old ${SANCTUM} new ${second}`)
72
+ expect(out).not.toContain(SANCTUM)
73
+ expect(out).not.toContain(second)
74
+ expect(out).not.toContain(BODY40)
75
+ })
76
+
77
+ describe('false-positive guards', () => {
78
+ it('does NOT match a pipe-joined value under the 40-char floor', () => {
79
+ const short = `1|${'abcDEF123'}` // 9 chars after the pipe
80
+ expect(detectSecrets(short).some((d) => d.rule_id === 'laravel_sanctum_token')).toBe(false)
81
+ })
82
+
83
+ it('does NOT match exactly 39 base62 chars (one under the floor)', () => {
84
+ const justUnder = `17|${BODY40.slice(0, 39)}`
85
+ expect(
86
+ detectSecrets(justUnder).some((d) => d.rule_id === 'laravel_sanctum_token'),
87
+ ).toBe(false)
88
+ })
89
+
90
+ it('does NOT match a number with no pipe-delimited token', () => {
91
+ expect(
92
+ detectSecrets('order 17 shipped 40 items').some(
93
+ (d) => d.rule_id === 'laravel_sanctum_token',
94
+ ),
95
+ ).toBe(false)
96
+ })
97
+
98
+ it('does NOT match a spaced markdown table cell', () => {
99
+ // Table cells are `| value |` with spaces around the pipe; the Sanctum
100
+ // shape has the token immediately adjacent to the pipe.
101
+ const table = `| 17 | ${BODY40} |`
102
+ expect(
103
+ detectSecrets(table).some((d) => d.rule_id === 'laravel_sanctum_token'),
104
+ ).toBe(false)
105
+ })
106
+ })
107
+
108
+ it('suppresses a token sitting next to an example/fixture marker', () => {
109
+ // A documented example must not trigger a destructive delete+vault.
110
+ const hits = detectSecrets(`example token: ${SANCTUM}`)
111
+ const hit = hits.find((d) => d.rule_id === 'laravel_sanctum_token')
112
+ expect(hit).toBeDefined()
113
+ expect(hit!.suppressed).toBe(true)
114
+ })
115
+ })
@@ -69,72 +69,137 @@ function makeFakeBot(): FakeBot {
69
69
  // ─── renderWorkerActivity (pure) ─────────────────────────────────────────────
70
70
 
71
71
  describe('renderWorkerActivity', () => {
72
- it('renders running header + tool activity line + summary', () => {
72
+ /** Count of step-feed bullets (`✓` done + `→` in-progress) in a body. */
73
+ const stepCount = (s: string) => (s.match(/[✓→]/g) ?? []).length
74
+
75
+ it('renders the native header + running status + step feed', () => {
73
76
  const out = renderWorkerActivity(view())
74
- expect(out).toContain('🔧 <b>Worker</b> · <i>research competitors</i>')
75
- expect(out).toContain(' <code>Bash</code> grep -r pricing')
76
- expect(out).toContain('(3 tools · ')
77
- expect(out).toContain('↳ <i>scanning vendor pages</i>')
77
+ expect(out).toContain('🛠 <b>Worker</b> · <i>research competitors</i>')
78
+ expect(out).toContain('running · ')
79
+ expect(out).toContain('3 tools')
80
+ // No narrativeLines → the latestSummary surfaces as the newest `→` step.
81
+ expect(out).toContain('<b>→ scanning vendor pages</b>')
82
+ // The old tool/arg chrome is gone.
83
+ expect(out).not.toContain('⚡')
84
+ expect(out).not.toContain('<code>')
78
85
  })
79
86
 
80
- it('shows a "starting…" line when no tool has run yet', () => {
87
+ it('shows a "starting…" line when no step has run yet', () => {
81
88
  const out = renderWorkerActivity(view({ lastTool: null, latestSummary: '' }))
82
- expect(out).toContain('🔧 <b>Worker</b>')
89
+ expect(out).toContain('🛠 <b>Worker</b>')
83
90
  expect(out).toContain('starting…')
84
- expect(out).not.toContain('')
91
+ expect(out).not.toContain('')
85
92
  })
86
93
 
87
- it('omits the summary line when latestSummary is blank', () => {
94
+ it('falls back to starting… when the summary is blank', () => {
88
95
  const out = renderWorkerActivity(view({ latestSummary: ' ' }))
89
- expect(out).not.toContain('')
96
+ expect(out).toContain('starting…')
97
+ expect(stepCount(out)).toBe(0)
90
98
  })
91
99
 
92
100
  it('uses singular "tool" for a single tool call', () => {
93
101
  const out = renderWorkerActivity(view({ toolCount: 1 }))
94
- expect(out).toContain('(1 tool · ')
102
+ expect(out).toContain('1 tool')
103
+ expect(out).not.toContain('1 tools')
95
104
  })
96
105
 
97
- it('renders a done terminal recap', () => {
98
- const out = renderWorkerActivity(view({ state: 'done', toolCount: 5 }))
99
- expect(out).toContain('✅ <b>Worker done</b> · <i>research competitors</i>')
100
- expect(out).toContain('5 tools · ')
101
- expect(out).not.toContain('')
106
+ it('renders a done terminal recap with a rule + cleaned result', () => {
107
+ const out = renderWorkerActivity(
108
+ view({ state: 'done', toolCount: 5, latestSummary: 'PR #21 opened' }),
109
+ )
110
+ expect(out).toContain('🛠 <b>Worker</b> · <i>research competitors</i>')
111
+ expect(out).toContain('finished · completed · 5 tools · ')
112
+ expect(out).toContain('─────')
113
+ expect(out).toContain('✅ <i>PR #21 opened</i>')
102
114
  })
103
115
 
104
116
  it('renders a failed terminal recap', () => {
105
- const out = renderWorkerActivity(view({ state: 'failed' }))
106
- expect(out).toContain('⚠️ <b>Worker failed</b>')
117
+ const out = renderWorkerActivity(view({ state: 'failed', latestSummary: 'blew up' }))
118
+ expect(out).toContain('finished · failed · ')
119
+ expect(out).toContain('⚠️ <i>blew up</i>')
120
+ })
121
+
122
+ it('omits the rule + result line when the terminal result is empty', () => {
123
+ const out = renderWorkerActivity(view({ state: 'done', latestSummary: ' ' }))
124
+ expect(out).toContain('finished · completed · ')
125
+ expect(out).not.toContain('─────')
107
126
  })
108
127
 
109
- it('grows a narrative block when narrativeLines is present', () => {
128
+ it('grows a step feed when narrativeLines is present (prior ✓, newest →)', () => {
110
129
  const out = renderWorkerActivity(
111
130
  view({
112
131
  latestSummary: 'newest only — should be ignored',
113
132
  narrativeLines: ['read the brief', 'scanned vendor A', 'scanned vendor B'],
114
133
  }),
115
134
  )
116
- expect(out).toContain('<i>read the brief</i>')
117
- expect(out).toContain('<i>scanned vendor A</i>')
118
- expect(out).toContain(' <i>scanned vendor B</i>')
135
+ expect(out).toContain('<i>✓ read the brief</i>')
136
+ expect(out).toContain('<i>✓ scanned vendor A</i>')
137
+ expect(out).toContain('<b>→ scanned vendor B</b>')
119
138
  // The single-line latestSummary fallback is NOT used when a block is present.
120
139
  expect(out).not.toContain('newest only')
121
- // Three narrative lines → three ↳ lines.
122
- expect(out.match(/↳/g) ?? []).toHaveLength(3)
140
+ expect(stepCount(out)).toBe(3)
123
141
  })
124
142
 
125
143
  it('falls back to latestSummary when narrativeLines is empty', () => {
126
144
  const out = renderWorkerActivity(view({ narrativeLines: [], latestSummary: 'one line' }))
127
- expect(out).toContain(' <i>one line</i>')
128
- expect(out.match(/↳/g) ?? []).toHaveLength(1)
145
+ expect(out).toContain('<b>→ one line</b>')
146
+ expect(stepCount(out)).toBe(1)
129
147
  })
130
148
 
131
- it('drops blank narrative lines from the block', () => {
149
+ it('drops blank narrative lines from the feed', () => {
150
+ const out = renderWorkerActivity(view({ narrativeLines: ['kept', ' ', 'also kept'] }))
151
+ expect(out).toContain('<i>✓ kept</i>')
152
+ expect(out).toContain('<b>→ also kept</b>')
153
+ expect(stepCount(out)).toBe(2)
154
+ })
155
+
156
+ it('shows an overflow header when the feed exceeds the cap', () => {
157
+ const lines = Array.from({ length: 9 }, (_, i) => `step ${i + 1}`)
158
+ const out = renderWorkerActivity(view({ narrativeLines: lines }))
159
+ expect(out).toContain('<i>✓ +3 earlier…</i>')
160
+ expect(out).not.toContain('step 1')
161
+ expect(out).toContain('<i>✓ step 4</i>')
162
+ expect(out).toContain('<b>→ step 9</b>')
163
+ // 6 visible step lines (the overflow header is not itself a step).
164
+ expect(out.match(/step \d/g) ?? []).toHaveLength(6)
165
+ })
166
+
167
+ it('strips Markdown markup from narrative + description + result', () => {
168
+ const running = renderWorkerActivity(
169
+ view({
170
+ description: '**Build** the `sync`',
171
+ narrativeLines: ['- ran the **full** suite', '`git push`'],
172
+ }),
173
+ )
174
+ expect(running).toContain('🛠 <b>Worker</b> · <i>Build the sync</i>')
175
+ expect(running).toContain('ran the full suite')
176
+ expect(running).toContain('git push')
177
+ expect(running).not.toContain('**')
178
+ expect(running).not.toContain('`')
179
+
180
+ const done = renderWorkerActivity(
181
+ view({ state: 'done', latestSummary: '## Done\n\n**PR #21** opened\n\n---\n`merged`' }),
182
+ )
183
+ expect(done).toContain('Done PR #21 opened merged')
184
+ expect(done).not.toContain('**')
185
+ expect(done).not.toContain('`')
186
+ // The card's own divider is the box-drawing rule, never a raw `---`.
187
+ expect(done).not.toMatch(/(^|\n)\s*-{3,}\s*(\n|$)/)
188
+ })
189
+
190
+ it('renders a multi-line narrative entry as one clean step (no raw ## leak)', () => {
191
+ // Screenshot regression: a running-card step whose narrative entry is
192
+ // itself multi-line ("Done.\n\n## Summary\n…"). The heading marker must
193
+ // not leak and the step must collapse to a single visual line.
132
194
  const out = renderWorkerActivity(
133
- view({ narrativeLines: ['kept', ' ', 'also kept'] }),
195
+ view({
196
+ narrativeLines: ['Done.\n\n## Summary\n\nFixed the bug where a bad password logged you out'],
197
+ latestSummary: '',
198
+ }),
134
199
  )
135
- expect(out).toContain(' <i>kept</i>')
136
- expect(out).toContain('↳ <i>also kept</i>')
137
- expect(out.match(/↳/g) ?? []).toHaveLength(2)
200
+ expect(out).not.toContain('## Summary')
201
+ expect(out).not.toContain('\n\n')
202
+ expect(out).toContain('Done. Summary Fixed the bug where a bad password logged you out')
138
203
  })
139
204
 
140
205
  it('escapes HTML inside narrative lines', () => {
@@ -142,17 +207,11 @@ describe('renderWorkerActivity', () => {
142
207
  expect(out).toContain('a &lt;b&gt;x&lt;/b&gt; &amp; y')
143
208
  })
144
209
 
145
- it('escapes HTML in description, tool, arg, and summary', () => {
210
+ it('escapes HTML in description and summary', () => {
146
211
  const out = renderWorkerActivity(
147
- view({
148
- description: 'a <b>bold</b> task',
149
- lastTool: { name: 'Ba<sh', sanitisedArg: 'x & y' },
150
- latestSummary: 'a > b',
151
- }),
212
+ view({ description: 'a <b>bold</b> task', latestSummary: 'a > b' }),
152
213
  )
153
214
  expect(out).toContain('a &lt;b&gt;bold&lt;/b&gt; task')
154
- expect(out).toContain('Ba&lt;sh')
155
- expect(out).toContain('x &amp; y')
156
215
  expect(out).toContain('a &gt; b')
157
216
  })
158
217
  })
@@ -206,7 +265,7 @@ describe('createWorkerActivityFeed', () => {
206
265
  clock = 13_000 // +3000 since last edit > 2500
207
266
  await feed.update('w1', 'chat', view({ toolCount: 3 }))
208
267
  expect(bot.edits).toHaveLength(1)
209
- expect(bot.edits[0].text).toContain('(3 tools · ')
268
+ expect(bot.edits[0].text).toContain('3 tools')
210
269
  })
211
270
 
212
271
  it('forces a terminal edit on finish, skipping the throttle', async () => {
@@ -219,7 +278,7 @@ describe('createWorkerActivityFeed', () => {
219
278
  clock = 10_500 // well within the throttle window
220
279
  await feed.finish('w1', view({ state: 'done', toolCount: 5 }))
221
280
  expect(bot.edits).toHaveLength(1)
222
- expect(bot.edits[0].text).toContain(' <b>Worker done</b>')
281
+ expect(bot.edits[0].text).toContain('finished · completed · 5 tools')
223
282
  // finish forgets the worker.
224
283
  expect(feed.has('w1')).toBe(false)
225
284
  expect(feed.size).toBe(0)
@@ -303,7 +362,7 @@ describe('createWorkerActivityFeed', () => {
303
362
 
304
363
  await feed.update('w1', 'chat', view({ toolCount: 1, latestSummary: 'read the brief' }))
305
364
  expect(bot.sent).toHaveLength(1)
306
- expect(bot.sent[0].text).toContain(' <i>read the brief</i>')
365
+ expect(bot.sent[0].text).toContain('<b>→ read the brief</b>')
307
366
 
308
367
  clock = 11_000
309
368
  await feed.update('w1', 'chat', view({ toolCount: 2, latestSummary: 'scanned vendor A' }))
@@ -311,10 +370,10 @@ describe('createWorkerActivityFeed', () => {
311
370
  await feed.update('w1', 'chat', view({ toolCount: 3, latestSummary: 'scanned vendor B' }))
312
371
 
313
372
  const last = bot.edits.at(-1)!
314
- expect(last.text).toContain('<i>read the brief</i>')
315
- expect(last.text).toContain('<i>scanned vendor A</i>')
316
- expect(last.text).toContain(' <i>scanned vendor B</i>')
317
- expect(last.text.match(/↳/g) ?? []).toHaveLength(3)
373
+ expect(last.text).toContain('<i>✓ read the brief</i>')
374
+ expect(last.text).toContain('<i>✓ scanned vendor A</i>')
375
+ expect(last.text).toContain('<b>→ scanned vendor B</b>')
376
+ expect(last.text.match(/[✓→]/g) ?? []).toHaveLength(3)
318
377
  })
319
378
 
320
379
  it('dedups a repeated narrative line so the block does not duplicate', async () => {
@@ -329,7 +388,7 @@ describe('createWorkerActivityFeed', () => {
329
388
  await feed.update('w1', 'chat', view({ toolCount: 2, latestSummary: 'same line' }))
330
389
 
331
390
  const last = bot.edits.at(-1)!
332
- expect(last.text.match(/↳/g) ?? []).toHaveLength(1)
391
+ expect(last.text.match(/[✓→]/g) ?? []).toHaveLength(1)
333
392
  })
334
393
 
335
394
  it('caps the narrative block to the last 6 lines', async () => {
@@ -343,7 +402,7 @@ describe('createWorkerActivityFeed', () => {
343
402
  }
344
403
 
345
404
  const last = bot.edits.at(-1)!
346
- expect(last.text.match(/↳/g) ?? []).toHaveLength(6)
405
+ expect(last.text.match(/[✓→]/g) ?? []).toHaveLength(6)
347
406
  // Oldest lines evicted; newest retained.
348
407
  expect(last.text).not.toContain('line 1')
349
408
  expect(last.text).not.toContain('line 3')
@@ -368,9 +427,9 @@ describe('createWorkerActivityFeed', () => {
368
427
  clock = 13_000
369
428
  await feed.update('w1', 'chat', view({ toolCount: 3, latestSummary: 'line C' }))
370
429
  const last = bot.edits.at(-1)!
371
- expect(last.text).toContain('<i>line A</i>')
372
- expect(last.text).toContain('<i>line B</i>')
373
- expect(last.text).toContain(' <i>line C</i>')
430
+ expect(last.text).toContain('<i>✓ line A</i>')
431
+ expect(last.text).toContain('<i>✓ line B</i>')
432
+ expect(last.text).toContain('<b>→ line C</b>')
374
433
  })
375
434
 
376
435
  it('forwards threadId as message_thread_id on send', async () => {
@@ -13,16 +13,18 @@ import type { Driver, ObservedMessage, ObservedReaction } from "./driver.js";
13
13
 
14
14
  /**
15
15
  * Canonical shape of a worker-activity-feed message (#2000) as rendered
16
- * in Telegram: a running header `🔧 Worker · …` that edits in place and
17
- * finalizes to `✅ Worker done · …` / `⚠️ Worker failed · …`. The feed is
18
- * default-on fleet-wide as of v0.14.19, so background sub-agent activity
19
- * now surfaces as its own bot message in any chat — including DMs whose
20
- * scenario only cares about the agent's conversational reply.
16
+ * in Telegram: a native card with a `🛠 Worker · …` header that edits in
17
+ * place (running) and finalizes to a `finished · completed/failed · …`
18
+ * status line. The header is present in every state, so it's the primary
19
+ * key. The feed is default-on fleet-wide as of v0.14.19, so background
20
+ * sub-agent activity surfaces as its own bot message in any chat —
21
+ * including DMs whose scenario only cares about the agent's conversational
22
+ * reply.
21
23
  *
22
24
  * Single source of truth; the worker-feed scenario asserts against this,
23
25
  * and recall/reply scenarios exclude it via {@link isWorkerFeedMessage}.
24
26
  */
25
- export const WORKER_FEED_RE = /🔧\s*Worker|✅\s*Worker done|⚠️\s*Worker failed|Worker (?:done|failed)/i;
27
+ export const WORKER_FEED_RE = /🛠[️]?\s*Worker\b|finished\s*·\s*(?:completed|failed)/i;
26
28
 
27
29
  /**
28
30
  * True when `m` is a worker-activity-feed message rather than the agent's
@@ -13,17 +13,23 @@ const feed = (text: string) => ({ text }) as Parameters<typeof isWorkerFeedMessa
13
13
 
14
14
  describe("isWorkerFeedMessage", () => {
15
15
  it("matches the running feed header", () => {
16
- expect(isWorkerFeedMessage(feed("🔧 Worker · crawling changelog · 0:12"))).toBe(true);
16
+ expect(isWorkerFeedMessage(feed("🛠 Worker · crawling changelog"))).toBe(true);
17
+ expect(
18
+ isWorkerFeedMessage(feed("🛠 Worker · crawling changelog\nrunning · 00:12 · 4 tools")),
19
+ ).toBe(true);
17
20
  });
18
21
 
19
- it("matches the terminal done/failed recaps", () => {
20
- expect(isWorkerFeedMessage(feed("✅ Worker done · 10 tools · 1:03"))).toBe(true);
21
- expect(isWorkerFeedMessage(feed("⚠️ Worker failed · 3 tools"))).toBe(true);
22
+ it("matches the terminal finished status (completed/failed)", () => {
23
+ expect(
24
+ isWorkerFeedMessage(feed("🛠 Worker · crawl\nfinished · completed · 10 tools · 01:03")),
25
+ ).toBe(true);
26
+ expect(
27
+ isWorkerFeedMessage(feed("🛠 Worker · crawl\nfinished · failed · 3 tools · 00:08")),
28
+ ).toBe(true);
22
29
  });
23
30
 
24
- it("matches a done/failed header even without the leading emoji", () => {
25
- expect(isWorkerFeedMessage(feed("Worker done · 2 tools"))).toBe(true);
26
- expect(isWorkerFeedMessage(feed("Worker failed mid-step"))).toBe(true);
31
+ it("matches the finished status even on its own line (header-stripped edge)", () => {
32
+ expect(isWorkerFeedMessage(feed("finished · completed · 2 tools · 00:30"))).toBe(true);
27
33
  });
28
34
 
29
35
  it("does NOT match an ordinary agent reply", () => {
@@ -40,7 +46,7 @@ describe("isWorkerFeedMessage", () => {
40
46
  });
41
47
 
42
48
  it("exposes the regex for scenarios that assert on the feed directly", () => {
43
- expect(WORKER_FEED_RE.test("🔧 Worker · x")).toBe(true);
49
+ expect(WORKER_FEED_RE.test("🛠 Worker · x")).toBe(true);
44
50
  });
45
51
  });
46
52
 
@@ -0,0 +1,101 @@
1
+ /**
2
+ * End-to-end UAT for the agent-initiated `request_secret` flow (#2045).
3
+ *
4
+ * The upstream fix behind the 2026-06-01 incident: an agent must never ask
5
+ * the user to paste a secret into chat. Instead it calls `request_secret`,
6
+ * the operator taps [Provide securely] and sends the value once, and the
7
+ * gateway deletes the message + writes it straight to the vault — the raw
8
+ * value never lands in history/logs and is never returned to the agent.
9
+ *
10
+ * This round-trips through real Telegram + real broker + real agent and
11
+ * asserts the FINAL state: (a) the secure card renders with the right
12
+ * button, (b) after the operator provides the value the bot confirms a
13
+ * vault save, (c) the value actually landed in the vault (host-side read),
14
+ * and (d) the raw value never reappears in a bot message.
15
+ *
16
+ * **Skipped by default.** To unskip:
17
+ *
18
+ * 1. Standard UAT preflight (`uat/SETUP.md` §5-6) — test-harness agent live
19
+ * (running build WITH request_secret, #2045), driver session auth'd,
20
+ * env vars set.
21
+ * 2. `SWITCHROOM_VAULT_PASSPHRASE` in env (read-back asserts the value
22
+ * landed; under telegram-id approval mode the save itself is
23
+ * posture-attested and needs no passphrase).
24
+ * 3. Remove `describe.skip`.
25
+ *
26
+ * Cleanup is operator-side: `switchroom vault rm uat/req-secret-target`.
27
+ */
28
+
29
+ import { execFileSync } from "node:child_process";
30
+ import { describe, expect, it } from "vitest";
31
+ import { spinUp } from "../harness.js";
32
+
33
+ const TARGET_KEY = "uat/req-secret-target";
34
+ // Built at runtime so the source never holds a contiguous secret literal.
35
+ const PROVIDED_VALUE = "sentinel-2045-" + "Ab3xK9pQ".repeat(2);
36
+
37
+ function hostVaultGet(key: string): string {
38
+ try {
39
+ return execFileSync("switchroom", ["vault", "get", key], {
40
+ encoding: "utf-8",
41
+ env: { ...process.env },
42
+ }).trim();
43
+ } catch {
44
+ return "";
45
+ }
46
+ }
47
+
48
+ describe.skip("uat: request_secret end-to-end (#2045)", () => {
49
+ it(
50
+ "agent calls request_secret → operator provides → value vaulted, never echoed",
51
+ async () => {
52
+ const sc = await spinUp({ agent: "test-harness" });
53
+ try {
54
+ // 1. Prompt the agent to call request_secret for a key it lacks.
55
+ // (We don't fire the card from the driver — the point is to
56
+ // cover the agent → gateway → capture → vault path.)
57
+ await sc.sendDM(
58
+ `I need an API token but it is NOT in your vault. Use your ` +
59
+ `request_secret MCP tool with key="${TARGET_KEY}", ` +
60
+ `reason="UAT for #2045" to ask me for it securely. Do NOT ask me ` +
61
+ `to paste it as a normal message.`,
62
+ );
63
+
64
+ // 2. The secure card (request_secret renders "needs a secret").
65
+ const card = await sc.expectMessage(/needs a secret/i, {
66
+ from: "bot",
67
+ timeout: 60_000,
68
+ });
69
+
70
+ // 3. It must carry a [Provide securely] button.
71
+ const kb = await sc.driver.getKeyboard(sc.botUserId, card.messageId);
72
+ expect(kb).not.toBeNull();
73
+ const provide = kb!
74
+ .flat()
75
+ .find((b) => b.callbackData !== undefined && /provide/i.test(b.text));
76
+ expect(provide, "card should have a [Provide securely] button").toBeDefined();
77
+
78
+ // 4. Tap Provide → the card arms capture + prompts for the value.
79
+ await sc.driver.pressButton(sc.botUserId, card.messageId, provide!.callbackData!);
80
+ await sc.expectMessage(/Send the value for/i, { from: "bot", timeout: 15_000 });
81
+
82
+ // 5. Send the value. The gateway deletes it + writes to the vault.
83
+ await sc.sendDM(PROVIDED_VALUE);
84
+ const confirm = await sc.expectMessage(/saved as/i, {
85
+ from: "bot",
86
+ timeout: 30_000,
87
+ });
88
+
89
+ // 6. The confirmation references the key, NOT the raw value.
90
+ expect(confirm.text).toContain(`vault:${TARGET_KEY}`);
91
+ expect(confirm.text).not.toContain(PROVIDED_VALUE);
92
+
93
+ // 7. Load-bearing: the value actually landed in the vault.
94
+ expect(hostVaultGet(TARGET_KEY)).toBe(PROVIDED_VALUE);
95
+ } finally {
96
+ await sc.tearDown();
97
+ }
98
+ },
99
+ 300_000,
100
+ );
101
+ });
@@ -12,12 +12,14 @@
12
12
  * sleep/echo work, so it narrates between tools and the feed can paint
13
13
  * + edit), then asserts:
14
14
  *
15
- * 1. a worker-feed message appears (🔧 Worker · …), distinct from the
15
+ * 1. a worker-feed message appears (🛠 Worker · …), distinct from the
16
16
  * parent's ack reply — proving background activity surfaces after
17
17
  * the parent turn closed;
18
18
  * 2. the message edits in place while work is in flight (body changes
19
19
  * across a window) — proving it's live, not a one-shot post;
20
- * 3. it finalizes to the terminal recap ( Worker done · … / N tools).
20
+ * 3. it finalizes to the terminal recap (finished · completed · N tools);
21
+ * 4. the native card never leaks raw Markdown (no `**`, backticks, or
22
+ * ASCII `---` rule) — the #94-class regression guard.
21
23
  *
22
24
  * It logs every observed body so a human can read the real rendered UX.
23
25
  *
@@ -53,10 +55,11 @@ const BG_DISPATCH_PROMPT =
53
55
  `brief reply saying you've kicked off the background worker so I can ` +
54
56
  `watch its progress.`;
55
57
 
56
- // The feed header rendered in Telegram: "🔧 Worker · <desc>" (running)
57
- // or " Worker done · …" / "⚠️ Worker failed · …" (terminal).
58
- const WORKER_FEED_RE = /🔧\s*Worker|Worker done|Worker failed|⚡/i;
59
- const WORKER_DONE_RE = /✅\s*Worker done|⚠️\s*Worker failed/i;
58
+ // The feed header rendered in Telegram: "🛠 Worker · <desc>" with a
59
+ // "running · …" status (running) or "finished · completed/failed · …"
60
+ // (terminal).
61
+ const WORKER_FEED_RE = /🛠\s*Worker|running\s*·|finished\s*·/i;
62
+ const WORKER_DONE_RE = /finished\s*·\s*(completed|failed)/i;
60
63
 
61
64
  describe("uat: live worker-activity feed (#2000)", () => {
62
65
  it(
@@ -116,6 +119,14 @@ describe("uat: live worker-activity feed (#2000)", () => {
116
119
  expect(doneText!).toMatch(/tools?|tool ·/i);
117
120
  // Did the body actually move between first paint and terminal?
118
121
  expect(doneText).not.toBe(before);
122
+ // #94-class regression guard: the native card strips worker Markdown,
123
+ // so the rendered body must never carry raw `**`, backticks, or an
124
+ // ASCII `---` rule (the finished divider is the box-drawing `─────`).
125
+ expect(doneText!, "raw ** leaked into the card").not.toMatch(/\*\*/);
126
+ expect(doneText!, "raw backtick leaked into the card").not.toContain("`");
127
+ expect(doneText!, "raw --- rule leaked into the card").not.toMatch(
128
+ /(^|\n)\s*-{3,}\s*(\n|$)/,
129
+ );
119
130
  } finally {
120
131
  await sc.tearDown();
121
132
  }