switchroom 0.14.26 → 0.14.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +20 -4
- package/dist/host-control/main.js +2 -2
- package/package.json +1 -1
- package/telegram-plugin/bridge/bridge.ts +15 -0
- package/telegram-plugin/card-format.ts +65 -0
- package/telegram-plugin/dist/bridge/bridge.js +14 -0
- package/telegram-plugin/dist/gateway/gateway.js +2201 -1748
- package/telegram-plugin/dist/server.js +14 -0
- package/telegram-plugin/gateway/gateway.ts +458 -13
- package/telegram-plugin/gateway/worker-feed-dispatch.ts +1 -1
- package/telegram-plugin/history.ts +16 -4
- package/telegram-plugin/permission-title.ts +48 -0
- package/telegram-plugin/secret-detect/patterns.ts +8 -0
- package/telegram-plugin/secret-detect/redact.ts +76 -0
- package/telegram-plugin/tests/card-format.test.ts +96 -0
- package/telegram-plugin/tests/gateway-outbound-redact.test.ts +80 -0
- package/telegram-plugin/tests/gateway-request-secret.test.ts +78 -0
- package/telegram-plugin/tests/history.test.ts +59 -0
- package/telegram-plugin/tests/permission-title.test.ts +68 -0
- package/telegram-plugin/tests/permission-verdict-resume-guard.test.ts +35 -0
- package/telegram-plugin/tests/secret-detect-sanctum.test.ts +115 -0
- package/telegram-plugin/tests/worker-activity-feed.test.ts +110 -51
- package/telegram-plugin/uat/assertions.ts +8 -6
- package/telegram-plugin/uat/feed-matcher.test.ts +14 -8
- package/telegram-plugin/uat/scenarios/jtbd-request-secret-dm.test.ts +101 -0
- package/telegram-plugin/uat/scenarios/jtbd-worker-activity-feed-dm.test.ts +17 -6
- package/telegram-plugin/worker-activity-feed.ts +84 -46
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import { detectSecrets } from '../secret-detect/index.js'
|
|
3
|
+
import { redact } from '../secret-detect/redact.js'
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Regression for the 2026-06-01 incident: a live Laravel Sanctum / Coolify
|
|
7
|
+
* personal-access token (`<id>|<40-char base62>`, e.g. `17|aB3d…`) pasted by
|
|
8
|
+
* a USER into chat slipped every existing pattern and persisted in plaintext.
|
|
9
|
+
*
|
|
10
|
+
* Diagnosis: the inbound gate (gateway handleInbound) already runs
|
|
11
|
+
* `detectSecrets` at ingest, fail-closed, BEFORE recordInbound + IPC dispatch
|
|
12
|
+
* — so the leak was NOT an inbound bypass and NOT render-time masking. It was
|
|
13
|
+
* pure PATTERN COVERAGE: no rule matched the `<id>|<token>` shape. These tests
|
|
14
|
+
* pin the new `laravel_sanctum_token` rule and the redactor that backs both
|
|
15
|
+
* the history-store persistence (both directions) and the issues pipeline.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
// Build token fixtures by concatenation so the source file never contains a
|
|
19
|
+
// contiguous secret-shaped literal (repo Push Protection / no-pii lint).
|
|
20
|
+
const ID = '17'
|
|
21
|
+
const BODY40 = 'aB3dE6fH9j'.repeat(4) // 40 base62 chars — Sanctum's Str::random(40)
|
|
22
|
+
const SANCTUM = `${ID}|${BODY40}` // e.g. 17|aB3dE6fH9j…
|
|
23
|
+
|
|
24
|
+
describe('laravel/coolify sanctum token detection', () => {
|
|
25
|
+
it('detects <id>|<40-char> as a high-confidence laravel_sanctum_token', () => {
|
|
26
|
+
const hits = detectSecrets(`here is the token: ${SANCTUM}`)
|
|
27
|
+
const hit = hits.find((d) => d.rule_id === 'laravel_sanctum_token')
|
|
28
|
+
expect(hit).toBeDefined()
|
|
29
|
+
expect(hit!.matched_text).toBe(SANCTUM)
|
|
30
|
+
expect(hit!.confidence).toBe('high')
|
|
31
|
+
expect(hit!.suppressed).toBe(false)
|
|
32
|
+
})
|
|
33
|
+
|
|
34
|
+
it('satisfies the exact inbound-gate predicate (high + not suppressed)', () => {
|
|
35
|
+
// This is verbatim the condition gateway handleInbound uses to decide to
|
|
36
|
+
// delete + defer-to-vault: if it is truthy, the pasted token is
|
|
37
|
+
// intercepted before recordInbound() and the IPC broadcast to the agent.
|
|
38
|
+
const detections = detectSecrets(SANCTUM)
|
|
39
|
+
const gateHit = detections.find((d) => d.confidence === 'high' && !d.suppressed)
|
|
40
|
+
expect(gateHit).toBeDefined()
|
|
41
|
+
})
|
|
42
|
+
|
|
43
|
+
it('redact() masks the token in place, preserving surrounding prose', () => {
|
|
44
|
+
const out = redact(`new token: ${SANCTUM}\nuse it for the deploy API`)
|
|
45
|
+
expect(out).not.toContain(SANCTUM)
|
|
46
|
+
expect(out).not.toContain(BODY40)
|
|
47
|
+
expect(out).toContain('[REDACTED:laravel_sanctum_token]')
|
|
48
|
+
// Surrounding prose is untouched.
|
|
49
|
+
expect(out).toContain('new token:')
|
|
50
|
+
expect(out).toContain('use it for the deploy API')
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
it('covers longer-than-40 token bodies', () => {
|
|
54
|
+
const longTok = `42|${'Xy7Zk2Qp9w'.repeat(6)}` // 60 base62 chars
|
|
55
|
+
const hits = detectSecrets(longTok)
|
|
56
|
+
expect(hits.find((d) => d.rule_id === 'laravel_sanctum_token')?.matched_text).toBe(longTok)
|
|
57
|
+
})
|
|
58
|
+
|
|
59
|
+
it('catches a token mid-sentence and masks only the token', () => {
|
|
60
|
+
const out = redact(`use ${SANCTUM} when deploying, ok?`)
|
|
61
|
+
expect(out).not.toContain(SANCTUM)
|
|
62
|
+
expect(out).toContain('use ')
|
|
63
|
+
expect(out).toContain(' when deploying, ok?')
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
it('catches multiple tokens in one message (redact right-to-left loop)', () => {
|
|
67
|
+
const second = `88|${'mK2pR7vT4x'.repeat(4)}` // distinct <id>|<40 base62>
|
|
68
|
+
const detected = detectSecrets(`old ${SANCTUM} new ${second}`)
|
|
69
|
+
.filter((d) => d.rule_id === 'laravel_sanctum_token')
|
|
70
|
+
expect(detected).toHaveLength(2)
|
|
71
|
+
const out = redact(`old ${SANCTUM} new ${second}`)
|
|
72
|
+
expect(out).not.toContain(SANCTUM)
|
|
73
|
+
expect(out).not.toContain(second)
|
|
74
|
+
expect(out).not.toContain(BODY40)
|
|
75
|
+
})
|
|
76
|
+
|
|
77
|
+
describe('false-positive guards', () => {
|
|
78
|
+
it('does NOT match a pipe-joined value under the 40-char floor', () => {
|
|
79
|
+
const short = `1|${'abcDEF123'}` // 9 chars after the pipe
|
|
80
|
+
expect(detectSecrets(short).some((d) => d.rule_id === 'laravel_sanctum_token')).toBe(false)
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
it('does NOT match exactly 39 base62 chars (one under the floor)', () => {
|
|
84
|
+
const justUnder = `17|${BODY40.slice(0, 39)}`
|
|
85
|
+
expect(
|
|
86
|
+
detectSecrets(justUnder).some((d) => d.rule_id === 'laravel_sanctum_token'),
|
|
87
|
+
).toBe(false)
|
|
88
|
+
})
|
|
89
|
+
|
|
90
|
+
it('does NOT match a number with no pipe-delimited token', () => {
|
|
91
|
+
expect(
|
|
92
|
+
detectSecrets('order 17 shipped 40 items').some(
|
|
93
|
+
(d) => d.rule_id === 'laravel_sanctum_token',
|
|
94
|
+
),
|
|
95
|
+
).toBe(false)
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
it('does NOT match a spaced markdown table cell', () => {
|
|
99
|
+
// Table cells are `| value |` with spaces around the pipe; the Sanctum
|
|
100
|
+
// shape has the token immediately adjacent to the pipe.
|
|
101
|
+
const table = `| 17 | ${BODY40} |`
|
|
102
|
+
expect(
|
|
103
|
+
detectSecrets(table).some((d) => d.rule_id === 'laravel_sanctum_token'),
|
|
104
|
+
).toBe(false)
|
|
105
|
+
})
|
|
106
|
+
})
|
|
107
|
+
|
|
108
|
+
it('suppresses a token sitting next to an example/fixture marker', () => {
|
|
109
|
+
// A documented example must not trigger a destructive delete+vault.
|
|
110
|
+
const hits = detectSecrets(`example token: ${SANCTUM}`)
|
|
111
|
+
const hit = hits.find((d) => d.rule_id === 'laravel_sanctum_token')
|
|
112
|
+
expect(hit).toBeDefined()
|
|
113
|
+
expect(hit!.suppressed).toBe(true)
|
|
114
|
+
})
|
|
115
|
+
})
|
|
@@ -69,72 +69,137 @@ function makeFakeBot(): FakeBot {
|
|
|
69
69
|
// ─── renderWorkerActivity (pure) ─────────────────────────────────────────────
|
|
70
70
|
|
|
71
71
|
describe('renderWorkerActivity', () => {
|
|
72
|
-
|
|
72
|
+
/** Count of step-feed bullets (`✓` done + `→` in-progress) in a body. */
|
|
73
|
+
const stepCount = (s: string) => (s.match(/[✓→]/g) ?? []).length
|
|
74
|
+
|
|
75
|
+
it('renders the native header + running status + step feed', () => {
|
|
73
76
|
const out = renderWorkerActivity(view())
|
|
74
|
-
expect(out).toContain('
|
|
75
|
-
expect(out).toContain('
|
|
76
|
-
expect(out).toContain('
|
|
77
|
-
|
|
77
|
+
expect(out).toContain('🛠 <b>Worker</b> · <i>research competitors</i>')
|
|
78
|
+
expect(out).toContain('running · ')
|
|
79
|
+
expect(out).toContain('3 tools')
|
|
80
|
+
// No narrativeLines → the latestSummary surfaces as the newest `→` step.
|
|
81
|
+
expect(out).toContain('<b>→ scanning vendor pages</b>')
|
|
82
|
+
// The old tool/arg chrome is gone.
|
|
83
|
+
expect(out).not.toContain('⚡')
|
|
84
|
+
expect(out).not.toContain('<code>')
|
|
78
85
|
})
|
|
79
86
|
|
|
80
|
-
it('shows a "starting…" line when no
|
|
87
|
+
it('shows a "starting…" line when no step has run yet', () => {
|
|
81
88
|
const out = renderWorkerActivity(view({ lastTool: null, latestSummary: '' }))
|
|
82
|
-
expect(out).toContain('
|
|
89
|
+
expect(out).toContain('🛠 <b>Worker</b>')
|
|
83
90
|
expect(out).toContain('starting…')
|
|
84
|
-
expect(out).not.toContain('
|
|
91
|
+
expect(out).not.toContain('→')
|
|
85
92
|
})
|
|
86
93
|
|
|
87
|
-
it('
|
|
94
|
+
it('falls back to starting… when the summary is blank', () => {
|
|
88
95
|
const out = renderWorkerActivity(view({ latestSummary: ' ' }))
|
|
89
|
-
expect(out).
|
|
96
|
+
expect(out).toContain('starting…')
|
|
97
|
+
expect(stepCount(out)).toBe(0)
|
|
90
98
|
})
|
|
91
99
|
|
|
92
100
|
it('uses singular "tool" for a single tool call', () => {
|
|
93
101
|
const out = renderWorkerActivity(view({ toolCount: 1 }))
|
|
94
|
-
expect(out).toContain('
|
|
102
|
+
expect(out).toContain('1 tool')
|
|
103
|
+
expect(out).not.toContain('1 tools')
|
|
95
104
|
})
|
|
96
105
|
|
|
97
|
-
it('renders a done terminal recap', () => {
|
|
98
|
-
const out = renderWorkerActivity(
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
expect(out).
|
|
106
|
+
it('renders a done terminal recap with a rule + cleaned result', () => {
|
|
107
|
+
const out = renderWorkerActivity(
|
|
108
|
+
view({ state: 'done', toolCount: 5, latestSummary: 'PR #21 opened' }),
|
|
109
|
+
)
|
|
110
|
+
expect(out).toContain('🛠 <b>Worker</b> · <i>research competitors</i>')
|
|
111
|
+
expect(out).toContain('finished · completed · 5 tools · ')
|
|
112
|
+
expect(out).toContain('─────')
|
|
113
|
+
expect(out).toContain('✅ <i>PR #21 opened</i>')
|
|
102
114
|
})
|
|
103
115
|
|
|
104
116
|
it('renders a failed terminal recap', () => {
|
|
105
|
-
const out = renderWorkerActivity(view({ state: 'failed' }))
|
|
106
|
-
expect(out).toContain('
|
|
117
|
+
const out = renderWorkerActivity(view({ state: 'failed', latestSummary: 'blew up' }))
|
|
118
|
+
expect(out).toContain('finished · failed · ')
|
|
119
|
+
expect(out).toContain('⚠️ <i>blew up</i>')
|
|
120
|
+
})
|
|
121
|
+
|
|
122
|
+
it('omits the rule + result line when the terminal result is empty', () => {
|
|
123
|
+
const out = renderWorkerActivity(view({ state: 'done', latestSummary: ' ' }))
|
|
124
|
+
expect(out).toContain('finished · completed · ')
|
|
125
|
+
expect(out).not.toContain('─────')
|
|
107
126
|
})
|
|
108
127
|
|
|
109
|
-
it('grows a
|
|
128
|
+
it('grows a step feed when narrativeLines is present (prior ✓, newest →)', () => {
|
|
110
129
|
const out = renderWorkerActivity(
|
|
111
130
|
view({
|
|
112
131
|
latestSummary: 'newest only — should be ignored',
|
|
113
132
|
narrativeLines: ['read the brief', 'scanned vendor A', 'scanned vendor B'],
|
|
114
133
|
}),
|
|
115
134
|
)
|
|
116
|
-
expect(out).toContain('
|
|
117
|
-
expect(out).toContain('
|
|
118
|
-
expect(out).toContain('
|
|
135
|
+
expect(out).toContain('<i>✓ read the brief</i>')
|
|
136
|
+
expect(out).toContain('<i>✓ scanned vendor A</i>')
|
|
137
|
+
expect(out).toContain('<b>→ scanned vendor B</b>')
|
|
119
138
|
// The single-line latestSummary fallback is NOT used when a block is present.
|
|
120
139
|
expect(out).not.toContain('newest only')
|
|
121
|
-
|
|
122
|
-
expect(out.match(/↳/g) ?? []).toHaveLength(3)
|
|
140
|
+
expect(stepCount(out)).toBe(3)
|
|
123
141
|
})
|
|
124
142
|
|
|
125
143
|
it('falls back to latestSummary when narrativeLines is empty', () => {
|
|
126
144
|
const out = renderWorkerActivity(view({ narrativeLines: [], latestSummary: 'one line' }))
|
|
127
|
-
expect(out).toContain('
|
|
128
|
-
expect(out
|
|
145
|
+
expect(out).toContain('<b>→ one line</b>')
|
|
146
|
+
expect(stepCount(out)).toBe(1)
|
|
129
147
|
})
|
|
130
148
|
|
|
131
|
-
it('drops blank narrative lines from the
|
|
149
|
+
it('drops blank narrative lines from the feed', () => {
|
|
150
|
+
const out = renderWorkerActivity(view({ narrativeLines: ['kept', ' ', 'also kept'] }))
|
|
151
|
+
expect(out).toContain('<i>✓ kept</i>')
|
|
152
|
+
expect(out).toContain('<b>→ also kept</b>')
|
|
153
|
+
expect(stepCount(out)).toBe(2)
|
|
154
|
+
})
|
|
155
|
+
|
|
156
|
+
it('shows an overflow header when the feed exceeds the cap', () => {
|
|
157
|
+
const lines = Array.from({ length: 9 }, (_, i) => `step ${i + 1}`)
|
|
158
|
+
const out = renderWorkerActivity(view({ narrativeLines: lines }))
|
|
159
|
+
expect(out).toContain('<i>✓ +3 earlier…</i>')
|
|
160
|
+
expect(out).not.toContain('step 1')
|
|
161
|
+
expect(out).toContain('<i>✓ step 4</i>')
|
|
162
|
+
expect(out).toContain('<b>→ step 9</b>')
|
|
163
|
+
// 6 visible step lines (the overflow header is not itself a step).
|
|
164
|
+
expect(out.match(/step \d/g) ?? []).toHaveLength(6)
|
|
165
|
+
})
|
|
166
|
+
|
|
167
|
+
it('strips Markdown markup from narrative + description + result', () => {
|
|
168
|
+
const running = renderWorkerActivity(
|
|
169
|
+
view({
|
|
170
|
+
description: '**Build** the `sync`',
|
|
171
|
+
narrativeLines: ['- ran the **full** suite', '`git push`'],
|
|
172
|
+
}),
|
|
173
|
+
)
|
|
174
|
+
expect(running).toContain('🛠 <b>Worker</b> · <i>Build the sync</i>')
|
|
175
|
+
expect(running).toContain('ran the full suite')
|
|
176
|
+
expect(running).toContain('git push')
|
|
177
|
+
expect(running).not.toContain('**')
|
|
178
|
+
expect(running).not.toContain('`')
|
|
179
|
+
|
|
180
|
+
const done = renderWorkerActivity(
|
|
181
|
+
view({ state: 'done', latestSummary: '## Done\n\n**PR #21** opened\n\n---\n`merged`' }),
|
|
182
|
+
)
|
|
183
|
+
expect(done).toContain('Done PR #21 opened merged')
|
|
184
|
+
expect(done).not.toContain('**')
|
|
185
|
+
expect(done).not.toContain('`')
|
|
186
|
+
// The card's own divider is the box-drawing rule, never a raw `---`.
|
|
187
|
+
expect(done).not.toMatch(/(^|\n)\s*-{3,}\s*(\n|$)/)
|
|
188
|
+
})
|
|
189
|
+
|
|
190
|
+
it('renders a multi-line narrative entry as one clean step (no raw ## leak)', () => {
|
|
191
|
+
// Screenshot regression: a running-card step whose narrative entry is
|
|
192
|
+
// itself multi-line ("Done.\n\n## Summary\n…"). The heading marker must
|
|
193
|
+
// not leak and the step must collapse to a single visual line.
|
|
132
194
|
const out = renderWorkerActivity(
|
|
133
|
-
view({
|
|
195
|
+
view({
|
|
196
|
+
narrativeLines: ['Done.\n\n## Summary\n\nFixed the bug where a bad password logged you out'],
|
|
197
|
+
latestSummary: '',
|
|
198
|
+
}),
|
|
134
199
|
)
|
|
135
|
-
expect(out).toContain('
|
|
136
|
-
expect(out).toContain('
|
|
137
|
-
expect(out.
|
|
200
|
+
expect(out).not.toContain('## Summary')
|
|
201
|
+
expect(out).not.toContain('\n\n')
|
|
202
|
+
expect(out).toContain('Done. Summary Fixed the bug where a bad password logged you out')
|
|
138
203
|
})
|
|
139
204
|
|
|
140
205
|
it('escapes HTML inside narrative lines', () => {
|
|
@@ -142,17 +207,11 @@ describe('renderWorkerActivity', () => {
|
|
|
142
207
|
expect(out).toContain('a <b>x</b> & y')
|
|
143
208
|
})
|
|
144
209
|
|
|
145
|
-
it('escapes HTML in description
|
|
210
|
+
it('escapes HTML in description and summary', () => {
|
|
146
211
|
const out = renderWorkerActivity(
|
|
147
|
-
view({
|
|
148
|
-
description: 'a <b>bold</b> task',
|
|
149
|
-
lastTool: { name: 'Ba<sh', sanitisedArg: 'x & y' },
|
|
150
|
-
latestSummary: 'a > b',
|
|
151
|
-
}),
|
|
212
|
+
view({ description: 'a <b>bold</b> task', latestSummary: 'a > b' }),
|
|
152
213
|
)
|
|
153
214
|
expect(out).toContain('a <b>bold</b> task')
|
|
154
|
-
expect(out).toContain('Ba<sh')
|
|
155
|
-
expect(out).toContain('x & y')
|
|
156
215
|
expect(out).toContain('a > b')
|
|
157
216
|
})
|
|
158
217
|
})
|
|
@@ -206,7 +265,7 @@ describe('createWorkerActivityFeed', () => {
|
|
|
206
265
|
clock = 13_000 // +3000 since last edit > 2500
|
|
207
266
|
await feed.update('w1', 'chat', view({ toolCount: 3 }))
|
|
208
267
|
expect(bot.edits).toHaveLength(1)
|
|
209
|
-
expect(bot.edits[0].text).toContain('
|
|
268
|
+
expect(bot.edits[0].text).toContain('3 tools')
|
|
210
269
|
})
|
|
211
270
|
|
|
212
271
|
it('forces a terminal edit on finish, skipping the throttle', async () => {
|
|
@@ -219,7 +278,7 @@ describe('createWorkerActivityFeed', () => {
|
|
|
219
278
|
clock = 10_500 // well within the throttle window
|
|
220
279
|
await feed.finish('w1', view({ state: 'done', toolCount: 5 }))
|
|
221
280
|
expect(bot.edits).toHaveLength(1)
|
|
222
|
-
expect(bot.edits[0].text).toContain('
|
|
281
|
+
expect(bot.edits[0].text).toContain('finished · completed · 5 tools')
|
|
223
282
|
// finish forgets the worker.
|
|
224
283
|
expect(feed.has('w1')).toBe(false)
|
|
225
284
|
expect(feed.size).toBe(0)
|
|
@@ -303,7 +362,7 @@ describe('createWorkerActivityFeed', () => {
|
|
|
303
362
|
|
|
304
363
|
await feed.update('w1', 'chat', view({ toolCount: 1, latestSummary: 'read the brief' }))
|
|
305
364
|
expect(bot.sent).toHaveLength(1)
|
|
306
|
-
expect(bot.sent[0].text).toContain('
|
|
365
|
+
expect(bot.sent[0].text).toContain('<b>→ read the brief</b>')
|
|
307
366
|
|
|
308
367
|
clock = 11_000
|
|
309
368
|
await feed.update('w1', 'chat', view({ toolCount: 2, latestSummary: 'scanned vendor A' }))
|
|
@@ -311,10 +370,10 @@ describe('createWorkerActivityFeed', () => {
|
|
|
311
370
|
await feed.update('w1', 'chat', view({ toolCount: 3, latestSummary: 'scanned vendor B' }))
|
|
312
371
|
|
|
313
372
|
const last = bot.edits.at(-1)!
|
|
314
|
-
expect(last.text).toContain('
|
|
315
|
-
expect(last.text).toContain('
|
|
316
|
-
expect(last.text).toContain('
|
|
317
|
-
expect(last.text.match(
|
|
373
|
+
expect(last.text).toContain('<i>✓ read the brief</i>')
|
|
374
|
+
expect(last.text).toContain('<i>✓ scanned vendor A</i>')
|
|
375
|
+
expect(last.text).toContain('<b>→ scanned vendor B</b>')
|
|
376
|
+
expect(last.text.match(/[✓→]/g) ?? []).toHaveLength(3)
|
|
318
377
|
})
|
|
319
378
|
|
|
320
379
|
it('dedups a repeated narrative line so the block does not duplicate', async () => {
|
|
@@ -329,7 +388,7 @@ describe('createWorkerActivityFeed', () => {
|
|
|
329
388
|
await feed.update('w1', 'chat', view({ toolCount: 2, latestSummary: 'same line' }))
|
|
330
389
|
|
|
331
390
|
const last = bot.edits.at(-1)!
|
|
332
|
-
expect(last.text.match(
|
|
391
|
+
expect(last.text.match(/[✓→]/g) ?? []).toHaveLength(1)
|
|
333
392
|
})
|
|
334
393
|
|
|
335
394
|
it('caps the narrative block to the last 6 lines', async () => {
|
|
@@ -343,7 +402,7 @@ describe('createWorkerActivityFeed', () => {
|
|
|
343
402
|
}
|
|
344
403
|
|
|
345
404
|
const last = bot.edits.at(-1)!
|
|
346
|
-
expect(last.text.match(
|
|
405
|
+
expect(last.text.match(/[✓→]/g) ?? []).toHaveLength(6)
|
|
347
406
|
// Oldest lines evicted; newest retained.
|
|
348
407
|
expect(last.text).not.toContain('line 1')
|
|
349
408
|
expect(last.text).not.toContain('line 3')
|
|
@@ -368,9 +427,9 @@ describe('createWorkerActivityFeed', () => {
|
|
|
368
427
|
clock = 13_000
|
|
369
428
|
await feed.update('w1', 'chat', view({ toolCount: 3, latestSummary: 'line C' }))
|
|
370
429
|
const last = bot.edits.at(-1)!
|
|
371
|
-
expect(last.text).toContain('
|
|
372
|
-
expect(last.text).toContain('
|
|
373
|
-
expect(last.text).toContain('
|
|
430
|
+
expect(last.text).toContain('<i>✓ line A</i>')
|
|
431
|
+
expect(last.text).toContain('<i>✓ line B</i>')
|
|
432
|
+
expect(last.text).toContain('<b>→ line C</b>')
|
|
374
433
|
})
|
|
375
434
|
|
|
376
435
|
it('forwards threadId as message_thread_id on send', async () => {
|
|
@@ -13,16 +13,18 @@ import type { Driver, ObservedMessage, ObservedReaction } from "./driver.js";
|
|
|
13
13
|
|
|
14
14
|
/**
|
|
15
15
|
* Canonical shape of a worker-activity-feed message (#2000) as rendered
|
|
16
|
-
* in Telegram: a
|
|
17
|
-
* finalizes to
|
|
18
|
-
*
|
|
19
|
-
*
|
|
20
|
-
*
|
|
16
|
+
* in Telegram: a native card with a `🛠 Worker · …` header that edits in
|
|
17
|
+
* place (running) and finalizes to a `finished · completed/failed · …`
|
|
18
|
+
* status line. The header is present in every state, so it's the primary
|
|
19
|
+
* key. The feed is default-on fleet-wide as of v0.14.19, so background
|
|
20
|
+
* sub-agent activity surfaces as its own bot message in any chat —
|
|
21
|
+
* including DMs whose scenario only cares about the agent's conversational
|
|
22
|
+
* reply.
|
|
21
23
|
*
|
|
22
24
|
* Single source of truth; the worker-feed scenario asserts against this,
|
|
23
25
|
* and recall/reply scenarios exclude it via {@link isWorkerFeedMessage}.
|
|
24
26
|
*/
|
|
25
|
-
export const WORKER_FEED_RE =
|
|
27
|
+
export const WORKER_FEED_RE = /🛠[️]?\s*Worker\b|finished\s*·\s*(?:completed|failed)/i;
|
|
26
28
|
|
|
27
29
|
/**
|
|
28
30
|
* True when `m` is a worker-activity-feed message rather than the agent's
|
|
@@ -13,17 +13,23 @@ const feed = (text: string) => ({ text }) as Parameters<typeof isWorkerFeedMessa
|
|
|
13
13
|
|
|
14
14
|
describe("isWorkerFeedMessage", () => {
|
|
15
15
|
it("matches the running feed header", () => {
|
|
16
|
-
expect(isWorkerFeedMessage(feed("
|
|
16
|
+
expect(isWorkerFeedMessage(feed("🛠 Worker · crawling changelog"))).toBe(true);
|
|
17
|
+
expect(
|
|
18
|
+
isWorkerFeedMessage(feed("🛠 Worker · crawling changelog\nrunning · 00:12 · 4 tools")),
|
|
19
|
+
).toBe(true);
|
|
17
20
|
});
|
|
18
21
|
|
|
19
|
-
it("matches the terminal
|
|
20
|
-
expect(
|
|
21
|
-
|
|
22
|
+
it("matches the terminal finished status (completed/failed)", () => {
|
|
23
|
+
expect(
|
|
24
|
+
isWorkerFeedMessage(feed("🛠 Worker · crawl\nfinished · completed · 10 tools · 01:03")),
|
|
25
|
+
).toBe(true);
|
|
26
|
+
expect(
|
|
27
|
+
isWorkerFeedMessage(feed("🛠 Worker · crawl\nfinished · failed · 3 tools · 00:08")),
|
|
28
|
+
).toBe(true);
|
|
22
29
|
});
|
|
23
30
|
|
|
24
|
-
it("matches
|
|
25
|
-
expect(isWorkerFeedMessage(feed("
|
|
26
|
-
expect(isWorkerFeedMessage(feed("Worker failed mid-step"))).toBe(true);
|
|
31
|
+
it("matches the finished status even on its own line (header-stripped edge)", () => {
|
|
32
|
+
expect(isWorkerFeedMessage(feed("finished · completed · 2 tools · 00:30"))).toBe(true);
|
|
27
33
|
});
|
|
28
34
|
|
|
29
35
|
it("does NOT match an ordinary agent reply", () => {
|
|
@@ -40,7 +46,7 @@ describe("isWorkerFeedMessage", () => {
|
|
|
40
46
|
});
|
|
41
47
|
|
|
42
48
|
it("exposes the regex for scenarios that assert on the feed directly", () => {
|
|
43
|
-
expect(WORKER_FEED_RE.test("
|
|
49
|
+
expect(WORKER_FEED_RE.test("🛠 Worker · x")).toBe(true);
|
|
44
50
|
});
|
|
45
51
|
});
|
|
46
52
|
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* End-to-end UAT for the agent-initiated `request_secret` flow (#2045).
|
|
3
|
+
*
|
|
4
|
+
* The upstream fix behind the 2026-06-01 incident: an agent must never ask
|
|
5
|
+
* the user to paste a secret into chat. Instead it calls `request_secret`,
|
|
6
|
+
* the operator taps [Provide securely] and sends the value once, and the
|
|
7
|
+
* gateway deletes the message + writes it straight to the vault — the raw
|
|
8
|
+
* value never lands in history/logs and is never returned to the agent.
|
|
9
|
+
*
|
|
10
|
+
* This round-trips through real Telegram + real broker + real agent and
|
|
11
|
+
* asserts the FINAL state: (a) the secure card renders with the right
|
|
12
|
+
* button, (b) after the operator provides the value the bot confirms a
|
|
13
|
+
* vault save, (c) the value actually landed in the vault (host-side read),
|
|
14
|
+
* and (d) the raw value never reappears in a bot message.
|
|
15
|
+
*
|
|
16
|
+
* **Skipped by default.** To unskip:
|
|
17
|
+
*
|
|
18
|
+
* 1. Standard UAT preflight (`uat/SETUP.md` §5-6) — test-harness agent live
|
|
19
|
+
* (running build WITH request_secret, #2045), driver session auth'd,
|
|
20
|
+
* env vars set.
|
|
21
|
+
* 2. `SWITCHROOM_VAULT_PASSPHRASE` in env (read-back asserts the value
|
|
22
|
+
* landed; under telegram-id approval mode the save itself is
|
|
23
|
+
* posture-attested and needs no passphrase).
|
|
24
|
+
* 3. Remove `describe.skip`.
|
|
25
|
+
*
|
|
26
|
+
* Cleanup is operator-side: `switchroom vault rm uat/req-secret-target`.
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
import { execFileSync } from "node:child_process";
|
|
30
|
+
import { describe, expect, it } from "vitest";
|
|
31
|
+
import { spinUp } from "../harness.js";
|
|
32
|
+
|
|
33
|
+
const TARGET_KEY = "uat/req-secret-target";
|
|
34
|
+
// Built at runtime so the source never holds a contiguous secret literal.
|
|
35
|
+
const PROVIDED_VALUE = "sentinel-2045-" + "Ab3xK9pQ".repeat(2);
|
|
36
|
+
|
|
37
|
+
function hostVaultGet(key: string): string {
|
|
38
|
+
try {
|
|
39
|
+
return execFileSync("switchroom", ["vault", "get", key], {
|
|
40
|
+
encoding: "utf-8",
|
|
41
|
+
env: { ...process.env },
|
|
42
|
+
}).trim();
|
|
43
|
+
} catch {
|
|
44
|
+
return "";
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
describe.skip("uat: request_secret end-to-end (#2045)", () => {
|
|
49
|
+
it(
|
|
50
|
+
"agent calls request_secret → operator provides → value vaulted, never echoed",
|
|
51
|
+
async () => {
|
|
52
|
+
const sc = await spinUp({ agent: "test-harness" });
|
|
53
|
+
try {
|
|
54
|
+
// 1. Prompt the agent to call request_secret for a key it lacks.
|
|
55
|
+
// (We don't fire the card from the driver — the point is to
|
|
56
|
+
// cover the agent → gateway → capture → vault path.)
|
|
57
|
+
await sc.sendDM(
|
|
58
|
+
`I need an API token but it is NOT in your vault. Use your ` +
|
|
59
|
+
`request_secret MCP tool with key="${TARGET_KEY}", ` +
|
|
60
|
+
`reason="UAT for #2045" to ask me for it securely. Do NOT ask me ` +
|
|
61
|
+
`to paste it as a normal message.`,
|
|
62
|
+
);
|
|
63
|
+
|
|
64
|
+
// 2. The secure card (request_secret renders "needs a secret").
|
|
65
|
+
const card = await sc.expectMessage(/needs a secret/i, {
|
|
66
|
+
from: "bot",
|
|
67
|
+
timeout: 60_000,
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
// 3. It must carry a [Provide securely] button.
|
|
71
|
+
const kb = await sc.driver.getKeyboard(sc.botUserId, card.messageId);
|
|
72
|
+
expect(kb).not.toBeNull();
|
|
73
|
+
const provide = kb!
|
|
74
|
+
.flat()
|
|
75
|
+
.find((b) => b.callbackData !== undefined && /provide/i.test(b.text));
|
|
76
|
+
expect(provide, "card should have a [Provide securely] button").toBeDefined();
|
|
77
|
+
|
|
78
|
+
// 4. Tap Provide → the card arms capture + prompts for the value.
|
|
79
|
+
await sc.driver.pressButton(sc.botUserId, card.messageId, provide!.callbackData!);
|
|
80
|
+
await sc.expectMessage(/Send the value for/i, { from: "bot", timeout: 15_000 });
|
|
81
|
+
|
|
82
|
+
// 5. Send the value. The gateway deletes it + writes to the vault.
|
|
83
|
+
await sc.sendDM(PROVIDED_VALUE);
|
|
84
|
+
const confirm = await sc.expectMessage(/saved as/i, {
|
|
85
|
+
from: "bot",
|
|
86
|
+
timeout: 30_000,
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
// 6. The confirmation references the key, NOT the raw value.
|
|
90
|
+
expect(confirm.text).toContain(`vault:${TARGET_KEY}`);
|
|
91
|
+
expect(confirm.text).not.toContain(PROVIDED_VALUE);
|
|
92
|
+
|
|
93
|
+
// 7. Load-bearing: the value actually landed in the vault.
|
|
94
|
+
expect(hostVaultGet(TARGET_KEY)).toBe(PROVIDED_VALUE);
|
|
95
|
+
} finally {
|
|
96
|
+
await sc.tearDown();
|
|
97
|
+
}
|
|
98
|
+
},
|
|
99
|
+
300_000,
|
|
100
|
+
);
|
|
101
|
+
});
|
|
@@ -12,12 +12,14 @@
|
|
|
12
12
|
* sleep/echo work, so it narrates between tools and the feed can paint
|
|
13
13
|
* + edit), then asserts:
|
|
14
14
|
*
|
|
15
|
-
* 1. a worker-feed message appears (
|
|
15
|
+
* 1. a worker-feed message appears (🛠 Worker · …), distinct from the
|
|
16
16
|
* parent's ack reply — proving background activity surfaces after
|
|
17
17
|
* the parent turn closed;
|
|
18
18
|
* 2. the message edits in place while work is in flight (body changes
|
|
19
19
|
* across a window) — proving it's live, not a one-shot post;
|
|
20
|
-
* 3. it finalizes to the terminal recap (
|
|
20
|
+
* 3. it finalizes to the terminal recap (finished · completed · N tools);
|
|
21
|
+
* 4. the native card never leaks raw Markdown (no `**`, backticks, or
|
|
22
|
+
* ASCII `---` rule) — the #94-class regression guard.
|
|
21
23
|
*
|
|
22
24
|
* It logs every observed body so a human can read the real rendered UX.
|
|
23
25
|
*
|
|
@@ -53,10 +55,11 @@ const BG_DISPATCH_PROMPT =
|
|
|
53
55
|
`brief reply saying you've kicked off the background worker so I can ` +
|
|
54
56
|
`watch its progress.`;
|
|
55
57
|
|
|
56
|
-
// The feed header rendered in Telegram: "
|
|
57
|
-
//
|
|
58
|
-
|
|
59
|
-
const
|
|
58
|
+
// The feed header rendered in Telegram: "🛠 Worker · <desc>" with a
|
|
59
|
+
// "running · …" status (running) or "finished · completed/failed · …"
|
|
60
|
+
// (terminal).
|
|
61
|
+
const WORKER_FEED_RE = /🛠\s*Worker|running\s*·|finished\s*·/i;
|
|
62
|
+
const WORKER_DONE_RE = /finished\s*·\s*(completed|failed)/i;
|
|
60
63
|
|
|
61
64
|
describe("uat: live worker-activity feed (#2000)", () => {
|
|
62
65
|
it(
|
|
@@ -116,6 +119,14 @@ describe("uat: live worker-activity feed (#2000)", () => {
|
|
|
116
119
|
expect(doneText!).toMatch(/tools?|tool ·/i);
|
|
117
120
|
// Did the body actually move between first paint and terminal?
|
|
118
121
|
expect(doneText).not.toBe(before);
|
|
122
|
+
// #94-class regression guard: the native card strips worker Markdown,
|
|
123
|
+
// so the rendered body must never carry raw `**`, backticks, or an
|
|
124
|
+
// ASCII `---` rule (the finished divider is the box-drawing `─────`).
|
|
125
|
+
expect(doneText!, "raw ** leaked into the card").not.toMatch(/\*\*/);
|
|
126
|
+
expect(doneText!, "raw backtick leaked into the card").not.toContain("`");
|
|
127
|
+
expect(doneText!, "raw --- rule leaked into the card").not.toMatch(
|
|
128
|
+
/(^|\n)\s*-{3,}\s*(\n|$)/,
|
|
129
|
+
);
|
|
119
130
|
} finally {
|
|
120
131
|
await sc.tearDown();
|
|
121
132
|
}
|