npm - switchroom - Versions diffs - 0.14.26 → 0.14.28 - Mend

switchroom 0.14.26 → 0.14.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/dist/cli/switchroom.js +20 -4
package/dist/host-control/main.js +2 -2
package/package.json +1 -1
package/telegram-plugin/bridge/bridge.ts +15 -0
package/telegram-plugin/card-format.ts +65 -0
package/telegram-plugin/dist/bridge/bridge.js +14 -0
package/telegram-plugin/dist/gateway/gateway.js +2201 -1748
package/telegram-plugin/dist/server.js +14 -0
package/telegram-plugin/gateway/gateway.ts +458 -13
package/telegram-plugin/gateway/worker-feed-dispatch.ts +1 -1
package/telegram-plugin/history.ts +16 -4
package/telegram-plugin/permission-title.ts +48 -0
package/telegram-plugin/secret-detect/patterns.ts +8 -0
package/telegram-plugin/secret-detect/redact.ts +76 -0
package/telegram-plugin/tests/card-format.test.ts +96 -0
package/telegram-plugin/tests/gateway-outbound-redact.test.ts +80 -0
package/telegram-plugin/tests/gateway-request-secret.test.ts +78 -0
package/telegram-plugin/tests/history.test.ts +59 -0
package/telegram-plugin/tests/permission-title.test.ts +68 -0
package/telegram-plugin/tests/permission-verdict-resume-guard.test.ts +35 -0
package/telegram-plugin/tests/secret-detect-sanctum.test.ts +115 -0
package/telegram-plugin/tests/worker-activity-feed.test.ts +110 -51
package/telegram-plugin/uat/assertions.ts +8 -6
package/telegram-plugin/uat/feed-matcher.test.ts +14 -8
package/telegram-plugin/uat/scenarios/jtbd-request-secret-dm.test.ts +101 -0
package/telegram-plugin/uat/scenarios/jtbd-worker-activity-feed-dm.test.ts +17 -6
package/telegram-plugin/worker-activity-feed.ts +84 -46

package/telegram-plugin/tests/secret-detect-sanctum.test.ts ADDED Viewed

@@ -0,0 +1,115 @@
+import { describe, it, expect } from 'vitest'
+import { detectSecrets } from '../secret-detect/index.js'
+import { redact } from '../secret-detect/redact.js'
+/**
+ * Regression for the 2026-06-01 incident: a live Laravel Sanctum / Coolify
+ * personal-access token (`<id>|<40-char base62>`, e.g. `17|aB3d…`) pasted by
+ * a USER into chat slipped every existing pattern and persisted in plaintext.
+ *
+ * Diagnosis: the inbound gate (gateway handleInbound) already runs
+ * `detectSecrets` at ingest, fail-closed, BEFORE recordInbound + IPC dispatch
+ * — so the leak was NOT an inbound bypass and NOT render-time masking. It was
+ * pure PATTERN COVERAGE: no rule matched the `<id>|<token>` shape. These tests
+ * pin the new `laravel_sanctum_token` rule and the redactor that backs both
+ * the history-store persistence (both directions) and the issues pipeline.
+ */
+// Build token fixtures by concatenation so the source file never contains a
+// contiguous secret-shaped literal (repo Push Protection / no-pii lint).
+const ID = '17'
+const BODY40 = 'aB3dE6fH9j'.repeat(4) // 40 base62 chars — Sanctum's Str::random(40)
+const SANCTUM = `${ID}|${BODY40}` // e.g. 17|aB3dE6fH9j…
+describe('laravel/coolify sanctum token detection', () => {
+  it('detects <id>|<40-char> as a high-confidence laravel_sanctum_token', () => {
+    const hits = detectSecrets(`here is the token: ${SANCTUM}`)
+    const hit = hits.find((d) => d.rule_id === 'laravel_sanctum_token')
+    expect(hit).toBeDefined()
+    expect(hit!.matched_text).toBe(SANCTUM)
+    expect(hit!.confidence).toBe('high')
+    expect(hit!.suppressed).toBe(false)
+  })
+  it('satisfies the exact inbound-gate predicate (high + not suppressed)', () => {
+    // This is verbatim the condition gateway handleInbound uses to decide to
+    // delete + defer-to-vault: if it is truthy, the pasted token is
+    // intercepted before recordInbound() and the IPC broadcast to the agent.
+    const detections = detectSecrets(SANCTUM)
+    const gateHit = detections.find((d) => d.confidence === 'high' && !d.suppressed)
+    expect(gateHit).toBeDefined()
+  })
+  it('redact() masks the token in place, preserving surrounding prose', () => {
+    const out = redact(`new token: ${SANCTUM}\nuse it for the deploy API`)
+    expect(out).not.toContain(SANCTUM)
+    expect(out).not.toContain(BODY40)
+    expect(out).toContain('[REDACTED:laravel_sanctum_token]')
+    // Surrounding prose is untouched.
+    expect(out).toContain('new token:')
+    expect(out).toContain('use it for the deploy API')
+  })
+  it('covers longer-than-40 token bodies', () => {
+    const longTok = `42|${'Xy7Zk2Qp9w'.repeat(6)}` // 60 base62 chars
+    const hits = detectSecrets(longTok)
+    expect(hits.find((d) => d.rule_id === 'laravel_sanctum_token')?.matched_text).toBe(longTok)
+  })
+  it('catches a token mid-sentence and masks only the token', () => {
+    const out = redact(`use ${SANCTUM} when deploying, ok?`)
+    expect(out).not.toContain(SANCTUM)
+    expect(out).toContain('use ')
+    expect(out).toContain(' when deploying, ok?')
+  })
+  it('catches multiple tokens in one message (redact right-to-left loop)', () => {
+    const second = `88|${'mK2pR7vT4x'.repeat(4)}` // distinct <id>|<40 base62>
+    const detected = detectSecrets(`old ${SANCTUM} new ${second}`)
+      .filter((d) => d.rule_id === 'laravel_sanctum_token')
+    expect(detected).toHaveLength(2)
+    const out = redact(`old ${SANCTUM} new ${second}`)
+    expect(out).not.toContain(SANCTUM)
+    expect(out).not.toContain(second)
+    expect(out).not.toContain(BODY40)
+  })
+  describe('false-positive guards', () => {
+    it('does NOT match a pipe-joined value under the 40-char floor', () => {
+      const short = `1|${'abcDEF123'}` // 9 chars after the pipe
+      expect(detectSecrets(short).some((d) => d.rule_id === 'laravel_sanctum_token')).toBe(false)
+    })
+    it('does NOT match exactly 39 base62 chars (one under the floor)', () => {
+      const justUnder = `17|${BODY40.slice(0, 39)}`
+      expect(
+        detectSecrets(justUnder).some((d) => d.rule_id === 'laravel_sanctum_token'),
+      ).toBe(false)
+    })
+    it('does NOT match a number with no pipe-delimited token', () => {
+      expect(
+        detectSecrets('order 17 shipped 40 items').some(
+          (d) => d.rule_id === 'laravel_sanctum_token',
+        ),
+      ).toBe(false)
+    })
+    it('does NOT match a spaced markdown table cell', () => {
+      // Table cells are `| value |` with spaces around the pipe; the Sanctum
+      // shape has the token immediately adjacent to the pipe.
+      const table = `| 17 | ${BODY40} |`
+      expect(
+        detectSecrets(table).some((d) => d.rule_id === 'laravel_sanctum_token'),
+      ).toBe(false)
+    })
+  })
+  it('suppresses a token sitting next to an example/fixture marker', () => {
+    // A documented example must not trigger a destructive delete+vault.
+    const hits = detectSecrets(`example token: ${SANCTUM}`)
+    const hit = hits.find((d) => d.rule_id === 'laravel_sanctum_token')
+    expect(hit).toBeDefined()
+    expect(hit!.suppressed).toBe(true)
+  })
+})

package/telegram-plugin/tests/worker-activity-feed.test.ts CHANGED Viewed

@@ -69,72 +69,137 @@ function makeFakeBot(): FakeBot {
 // ─── renderWorkerActivity (pure) ─────────────────────────────────────────────
 describe('renderWorkerActivity', () => {
-  it('renders running header + tool activity line + summary', () => {
+  /** Count of step-feed bullets (`✓` done + `→` in-progress) in a body. */
+  const stepCount = (s: string) => (s.match(/[✓→]/g) ?? []).length
+  it('renders the native header + running status + step feed', () => {
     const out = renderWorkerActivity(view())
-    expect(out).toContain('🔧 <b>Worker</b> · <i>research competitors</i>')
-    expect(out).toContain('⚡ <code>Bash</code> grep -r pricing')
-    expect(out).toContain('(3 tools · ')
-    expect(out).toContain('↳ <i>scanning vendor pages</i>')
+    expect(out).toContain('🛠 <b>Worker</b> · <i>research competitors</i>')
+    expect(out).toContain('running · ')
+    expect(out).toContain('3 tools')
+    // No narrativeLines → the latestSummary surfaces as the newest `→` step.
+    expect(out).toContain('<b>→ scanning vendor pages</b>')
+    // The old tool/arg chrome is gone.
+    expect(out).not.toContain('⚡')
+    expect(out).not.toContain('<code>')
   })
-  it('shows a "starting…" line when no tool has run yet', () => {
+  it('shows a "starting…" line when no step has run yet', () => {
     const out = renderWorkerActivity(view({ lastTool: null, latestSummary: '' }))
-    expect(out).toContain('🔧 <b>Worker</b>')
+    expect(out).toContain('🛠 <b>Worker</b>')
     expect(out).toContain('starting…')
-    expect(out).not.toContain('⚡')
+    expect(out).not.toContain('→')
   })
-  it('omits the summary line when latestSummary is blank', () => {
+  it('falls back to starting… when the summary is blank', () => {
     const out = renderWorkerActivity(view({ latestSummary: '   ' }))
-    expect(out).not.toContain('↳')
+    expect(out).toContain('starting…')
+    expect(stepCount(out)).toBe(0)
   })
   it('uses singular "tool" for a single tool call', () => {
     const out = renderWorkerActivity(view({ toolCount: 1 }))
-    expect(out).toContain('(1 tool · ')
+    expect(out).toContain('1 tool')
+    expect(out).not.toContain('1 tools')
   })
-  it('renders a done terminal recap', () => {
-    const out = renderWorkerActivity(view({ state: 'done', toolCount: 5 }))
-    expect(out).toContain('✅ <b>Worker done</b> · <i>research competitors</i>')
-    expect(out).toContain('5 tools · ')
-    expect(out).not.toContain('⚡')
+  it('renders a done terminal recap with a rule + cleaned result', () => {
+    const out = renderWorkerActivity(
+      view({ state: 'done', toolCount: 5, latestSummary: 'PR #21 opened' }),
+    )
+    expect(out).toContain('🛠 <b>Worker</b> · <i>research competitors</i>')
+    expect(out).toContain('finished · completed · 5 tools · ')
+    expect(out).toContain('─────')
+    expect(out).toContain('✅ <i>PR #21 opened</i>')
   })
   it('renders a failed terminal recap', () => {
-    const out = renderWorkerActivity(view({ state: 'failed' }))
-    expect(out).toContain('⚠️ <b>Worker failed</b>')
+    const out = renderWorkerActivity(view({ state: 'failed', latestSummary: 'blew up' }))
+    expect(out).toContain('finished · failed · ')
+    expect(out).toContain('⚠️ <i>blew up</i>')
+  })
+  it('omits the rule + result line when the terminal result is empty', () => {
+    const out = renderWorkerActivity(view({ state: 'done', latestSummary: '   ' }))
+    expect(out).toContain('finished · completed · ')
+    expect(out).not.toContain('─────')
   })
-  it('grows a narrative block when narrativeLines is present', () => {
+  it('grows a step feed when narrativeLines is present (prior ✓, newest →)', () => {
     const out = renderWorkerActivity(
       view({
         latestSummary: 'newest only — should be ignored',
         narrativeLines: ['read the brief', 'scanned vendor A', 'scanned vendor B'],
       }),
     )
-    expect(out).toContain('↳ <i>read the brief</i>')
-    expect(out).toContain('↳ <i>scanned vendor A</i>')
-    expect(out).toContain('↳ <i>scanned vendor B</i>')
+    expect(out).toContain('<i>✓ read the brief</i>')
+    expect(out).toContain('<i>✓ scanned vendor A</i>')
+    expect(out).toContain('<b>→ scanned vendor B</b>')
     // The single-line latestSummary fallback is NOT used when a block is present.
     expect(out).not.toContain('newest only')
-    // Three narrative lines → three ↳ lines.
-    expect(out.match(/↳/g) ?? []).toHaveLength(3)
+    expect(stepCount(out)).toBe(3)
   })
   it('falls back to latestSummary when narrativeLines is empty', () => {
     const out = renderWorkerActivity(view({ narrativeLines: [], latestSummary: 'one line' }))
-    expect(out).toContain('↳ <i>one line</i>')
-    expect(out.match(/↳/g) ?? []).toHaveLength(1)
+    expect(out).toContain('<b>→ one line</b>')
+    expect(stepCount(out)).toBe(1)
   })
-  it('drops blank narrative lines from the block', () => {
+  it('drops blank narrative lines from the feed', () => {
+    const out = renderWorkerActivity(view({ narrativeLines: ['kept', '   ', 'also kept'] }))
+    expect(out).toContain('<i>✓ kept</i>')
+    expect(out).toContain('<b>→ also kept</b>')
+    expect(stepCount(out)).toBe(2)
+  })
+  it('shows an overflow header when the feed exceeds the cap', () => {
+    const lines = Array.from({ length: 9 }, (_, i) => `step ${i + 1}`)
+    const out = renderWorkerActivity(view({ narrativeLines: lines }))
+    expect(out).toContain('<i>✓ +3 earlier…</i>')
+    expect(out).not.toContain('step 1')
+    expect(out).toContain('<i>✓ step 4</i>')
+    expect(out).toContain('<b>→ step 9</b>')
+    // 6 visible step lines (the overflow header is not itself a step).
+    expect(out.match(/step \d/g) ?? []).toHaveLength(6)
+  })
+  it('strips Markdown markup from narrative + description + result', () => {
+    const running = renderWorkerActivity(
+      view({
+        description: '**Build** the `sync`',
+        narrativeLines: ['- ran the **full** suite', '`git push`'],
+      }),
+    )
+    expect(running).toContain('🛠 <b>Worker</b> · <i>Build the sync</i>')
+    expect(running).toContain('ran the full suite')
+    expect(running).toContain('git push')
+    expect(running).not.toContain('**')
+    expect(running).not.toContain('`')
+    const done = renderWorkerActivity(
+      view({ state: 'done', latestSummary: '## Done\n\n**PR #21** opened\n\n---\n`merged`' }),
+    )
+    expect(done).toContain('Done PR #21 opened merged')
+    expect(done).not.toContain('**')
+    expect(done).not.toContain('`')
+    // The card's own divider is the box-drawing rule, never a raw `---`.
+    expect(done).not.toMatch(/(^|\n)\s*-{3,}\s*(\n|$)/)
+  })
+  it('renders a multi-line narrative entry as one clean step (no raw ## leak)', () => {
+    // Screenshot regression: a running-card step whose narrative entry is
+    // itself multi-line ("Done.\n\n## Summary\n…"). The heading marker must
+    // not leak and the step must collapse to a single visual line.
     const out = renderWorkerActivity(
-      view({ narrativeLines: ['kept', '   ', 'also kept'] }),
+      view({
+        narrativeLines: ['Done.\n\n## Summary\n\nFixed the bug where a bad password logged you out'],
+        latestSummary: '',
+      }),
     )
-    expect(out).toContain('↳ <i>kept</i>')
-    expect(out).toContain('↳ <i>also kept</i>')
-    expect(out.match(/↳/g) ?? []).toHaveLength(2)
+    expect(out).not.toContain('## Summary')
+    expect(out).not.toContain('\n\n')
+    expect(out).toContain('Done. Summary Fixed the bug where a bad password logged you out')
   })
   it('escapes HTML inside narrative lines', () => {
@@ -142,17 +207,11 @@ describe('renderWorkerActivity', () => {
     expect(out).toContain('a &lt;b&gt;x&lt;/b&gt; &amp; y')
   })
-  it('escapes HTML in description, tool, arg, and summary', () => {
+  it('escapes HTML in description and summary', () => {
     const out = renderWorkerActivity(
-      view({
-        description: 'a <b>bold</b> task',
-        lastTool: { name: 'Ba<sh', sanitisedArg: 'x & y' },
-        latestSummary: 'a > b',
-      }),
+      view({ description: 'a <b>bold</b> task', latestSummary: 'a > b' }),
     )
     expect(out).toContain('a &lt;b&gt;bold&lt;/b&gt; task')
-    expect(out).toContain('Ba&lt;sh')
-    expect(out).toContain('x &amp; y')
     expect(out).toContain('a &gt; b')
   })
 })
@@ -206,7 +265,7 @@ describe('createWorkerActivityFeed', () => {
     clock = 13_000 // +3000 since last edit > 2500
     await feed.update('w1', 'chat', view({ toolCount: 3 }))
     expect(bot.edits).toHaveLength(1)
-    expect(bot.edits[0].text).toContain('(3 tools · ')
+    expect(bot.edits[0].text).toContain('3 tools')
   })
   it('forces a terminal edit on finish, skipping the throttle', async () => {
@@ -219,7 +278,7 @@ describe('createWorkerActivityFeed', () => {
     clock = 10_500 // well within the throttle window
     await feed.finish('w1', view({ state: 'done', toolCount: 5 }))
     expect(bot.edits).toHaveLength(1)
-    expect(bot.edits[0].text).toContain('✅ <b>Worker done</b>')
+    expect(bot.edits[0].text).toContain('finished · completed · 5 tools')
     // finish forgets the worker.
     expect(feed.has('w1')).toBe(false)
     expect(feed.size).toBe(0)
@@ -303,7 +362,7 @@ describe('createWorkerActivityFeed', () => {
     await feed.update('w1', 'chat', view({ toolCount: 1, latestSummary: 'read the brief' }))
     expect(bot.sent).toHaveLength(1)
-    expect(bot.sent[0].text).toContain('↳ <i>read the brief</i>')
+    expect(bot.sent[0].text).toContain('<b>→ read the brief</b>')
     clock = 11_000
     await feed.update('w1', 'chat', view({ toolCount: 2, latestSummary: 'scanned vendor A' }))
@@ -311,10 +370,10 @@ describe('createWorkerActivityFeed', () => {
     await feed.update('w1', 'chat', view({ toolCount: 3, latestSummary: 'scanned vendor B' }))
     const last = bot.edits.at(-1)!
-    expect(last.text).toContain('↳ <i>read the brief</i>')
-    expect(last.text).toContain('↳ <i>scanned vendor A</i>')
-    expect(last.text).toContain('↳ <i>scanned vendor B</i>')
-    expect(last.text.match(/↳/g) ?? []).toHaveLength(3)
+    expect(last.text).toContain('<i>✓ read the brief</i>')
+    expect(last.text).toContain('<i>✓ scanned vendor A</i>')
+    expect(last.text).toContain('<b>→ scanned vendor B</b>')
+    expect(last.text.match(/[✓→]/g) ?? []).toHaveLength(3)
   })
   it('dedups a repeated narrative line so the block does not duplicate', async () => {
@@ -329,7 +388,7 @@ describe('createWorkerActivityFeed', () => {
     await feed.update('w1', 'chat', view({ toolCount: 2, latestSummary: 'same line' }))
     const last = bot.edits.at(-1)!
-    expect(last.text.match(/↳/g) ?? []).toHaveLength(1)
+    expect(last.text.match(/[✓→]/g) ?? []).toHaveLength(1)
   })
   it('caps the narrative block to the last 6 lines', async () => {
@@ -343,7 +402,7 @@ describe('createWorkerActivityFeed', () => {
     }
     const last = bot.edits.at(-1)!
-    expect(last.text.match(/↳/g) ?? []).toHaveLength(6)
+    expect(last.text.match(/[✓→]/g) ?? []).toHaveLength(6)
     // Oldest lines evicted; newest retained.
     expect(last.text).not.toContain('line 1')
     expect(last.text).not.toContain('line 3')
@@ -368,9 +427,9 @@ describe('createWorkerActivityFeed', () => {
     clock = 13_000
     await feed.update('w1', 'chat', view({ toolCount: 3, latestSummary: 'line C' }))
     const last = bot.edits.at(-1)!
-    expect(last.text).toContain('↳ <i>line A</i>')
-    expect(last.text).toContain('↳ <i>line B</i>')
-    expect(last.text).toContain('↳ <i>line C</i>')
+    expect(last.text).toContain('<i>✓ line A</i>')
+    expect(last.text).toContain('<i>✓ line B</i>')
+    expect(last.text).toContain('<b>→ line C</b>')
   })
   it('forwards threadId as message_thread_id on send', async () => {

package/telegram-plugin/uat/assertions.ts CHANGED Viewed

@@ -13,16 +13,18 @@ import type { Driver, ObservedMessage, ObservedReaction } from "./driver.js";
 /**
  * Canonical shape of a worker-activity-feed message (#2000) as rendered
- * in Telegram: a running header `🔧 Worker · …` that edits in place and
- * finalizes to `✅ Worker done · …` / `⚠️ Worker failed · …`. The feed is
- * default-on fleet-wide as of v0.14.19, so background sub-agent activity
- * now surfaces as its own bot message in any chat — including DMs whose
- * scenario only cares about the agent's conversational reply.
+ * in Telegram: a native card with a `🛠 Worker · …` header that edits in
+ * place (running) and finalizes to a `finished · completed/failed · …`
+ * status line. The header is present in every state, so it's the primary
+ * key. The feed is default-on fleet-wide as of v0.14.19, so background
+ * sub-agent activity surfaces as its own bot message in any chat —
+ * including DMs whose scenario only cares about the agent's conversational
+ * reply.
  *
  * Single source of truth; the worker-feed scenario asserts against this,
  * and recall/reply scenarios exclude it via {@link isWorkerFeedMessage}.
  */
-export const WORKER_FEED_RE = /🔧\s*Worker|✅\s*Worker done|⚠️\s*Worker failed|Worker (?:done|failed)/i;
+export const WORKER_FEED_RE = /🛠[️]?\s*Worker\b|finished\s*·\s*(?:completed|failed)/i;
 /**
  * True when `m` is a worker-activity-feed message rather than the agent's

package/telegram-plugin/uat/feed-matcher.test.ts CHANGED Viewed

@@ -13,17 +13,23 @@ const feed = (text: string) => ({ text }) as Parameters<typeof isWorkerFeedMessa
 describe("isWorkerFeedMessage", () => {
   it("matches the running feed header", () => {
-    expect(isWorkerFeedMessage(feed("🔧 Worker · crawling changelog · 0:12"))).toBe(true);
+    expect(isWorkerFeedMessage(feed("🛠 Worker · crawling changelog"))).toBe(true);
+    expect(
+      isWorkerFeedMessage(feed("🛠 Worker · crawling changelog\nrunning · 00:12 · 4 tools")),
+    ).toBe(true);
   });
-  it("matches the terminal done/failed recaps", () => {
-    expect(isWorkerFeedMessage(feed("✅ Worker done · 10 tools · 1:03"))).toBe(true);
-    expect(isWorkerFeedMessage(feed("⚠️ Worker failed · 3 tools"))).toBe(true);
+  it("matches the terminal finished status (completed/failed)", () => {
+    expect(
+      isWorkerFeedMessage(feed("🛠 Worker · crawl\nfinished · completed · 10 tools · 01:03")),
+    ).toBe(true);
+    expect(
+      isWorkerFeedMessage(feed("🛠 Worker · crawl\nfinished · failed · 3 tools · 00:08")),
+    ).toBe(true);
   });
-  it("matches a done/failed header even without the leading emoji", () => {
-    expect(isWorkerFeedMessage(feed("Worker done · 2 tools"))).toBe(true);
-    expect(isWorkerFeedMessage(feed("Worker failed mid-step"))).toBe(true);
+  it("matches the finished status even on its own line (header-stripped edge)", () => {
+    expect(isWorkerFeedMessage(feed("finished · completed · 2 tools · 00:30"))).toBe(true);
   });
   it("does NOT match an ordinary agent reply", () => {
@@ -40,7 +46,7 @@ describe("isWorkerFeedMessage", () => {
   });
   it("exposes the regex for scenarios that assert on the feed directly", () => {
-    expect(WORKER_FEED_RE.test("🔧 Worker · x")).toBe(true);
+    expect(WORKER_FEED_RE.test("🛠 Worker · x")).toBe(true);
   });
 });

package/telegram-plugin/uat/scenarios/jtbd-request-secret-dm.test.ts ADDED Viewed

@@ -0,0 +1,101 @@
+/**
+ * End-to-end UAT for the agent-initiated `request_secret` flow (#2045).
+ *
+ * The upstream fix behind the 2026-06-01 incident: an agent must never ask
+ * the user to paste a secret into chat. Instead it calls `request_secret`,
+ * the operator taps [Provide securely] and sends the value once, and the
+ * gateway deletes the message + writes it straight to the vault — the raw
+ * value never lands in history/logs and is never returned to the agent.
+ *
+ * This round-trips through real Telegram + real broker + real agent and
+ * asserts the FINAL state: (a) the secure card renders with the right
+ * button, (b) after the operator provides the value the bot confirms a
+ * vault save, (c) the value actually landed in the vault (host-side read),
+ * and (d) the raw value never reappears in a bot message.
+ *
+ * **Skipped by default.** To unskip:
+ *
+ * 1. Standard UAT preflight (`uat/SETUP.md` §5-6) — test-harness agent live
+ *    (running build WITH request_secret, #2045), driver session auth'd,
+ *    env vars set.
+ * 2. `SWITCHROOM_VAULT_PASSPHRASE` in env (read-back asserts the value
+ *    landed; under telegram-id approval mode the save itself is
+ *    posture-attested and needs no passphrase).
+ * 3. Remove `describe.skip`.
+ *
+ * Cleanup is operator-side: `switchroom vault rm uat/req-secret-target`.
+ */
+import { execFileSync } from "node:child_process";
+import { describe, expect, it } from "vitest";
+import { spinUp } from "../harness.js";
+const TARGET_KEY = "uat/req-secret-target";
+// Built at runtime so the source never holds a contiguous secret literal.
+const PROVIDED_VALUE = "sentinel-2045-" + "Ab3xK9pQ".repeat(2);
+function hostVaultGet(key: string): string {
+  try {
+    return execFileSync("switchroom", ["vault", "get", key], {
+      encoding: "utf-8",
+      env: { ...process.env },
+    }).trim();
+  } catch {
+    return "";
+  }
+}
+describe.skip("uat: request_secret end-to-end (#2045)", () => {
+  it(
+    "agent calls request_secret → operator provides → value vaulted, never echoed",
+    async () => {
+      const sc = await spinUp({ agent: "test-harness" });
+      try {
+        // 1. Prompt the agent to call request_secret for a key it lacks.
+        //    (We don't fire the card from the driver — the point is to
+        //    cover the agent → gateway → capture → vault path.)
+        await sc.sendDM(
+          `I need an API token but it is NOT in your vault. Use your ` +
+          `request_secret MCP tool with key="${TARGET_KEY}", ` +
+          `reason="UAT for #2045" to ask me for it securely. Do NOT ask me ` +
+          `to paste it as a normal message.`,
+        );
+        // 2. The secure card (request_secret renders "needs a secret").
+        const card = await sc.expectMessage(/needs a secret/i, {
+          from: "bot",
+          timeout: 60_000,
+        });
+        // 3. It must carry a [Provide securely] button.
+        const kb = await sc.driver.getKeyboard(sc.botUserId, card.messageId);
+        expect(kb).not.toBeNull();
+        const provide = kb!
+          .flat()
+          .find((b) => b.callbackData !== undefined && /provide/i.test(b.text));
+        expect(provide, "card should have a [Provide securely] button").toBeDefined();
+        // 4. Tap Provide → the card arms capture + prompts for the value.
+        await sc.driver.pressButton(sc.botUserId, card.messageId, provide!.callbackData!);
+        await sc.expectMessage(/Send the value for/i, { from: "bot", timeout: 15_000 });
+        // 5. Send the value. The gateway deletes it + writes to the vault.
+        await sc.sendDM(PROVIDED_VALUE);
+        const confirm = await sc.expectMessage(/saved as/i, {
+          from: "bot",
+          timeout: 30_000,
+        });
+        // 6. The confirmation references the key, NOT the raw value.
+        expect(confirm.text).toContain(`vault:${TARGET_KEY}`);
+        expect(confirm.text).not.toContain(PROVIDED_VALUE);
+        // 7. Load-bearing: the value actually landed in the vault.
+        expect(hostVaultGet(TARGET_KEY)).toBe(PROVIDED_VALUE);
+      } finally {
+        await sc.tearDown();
+      }
+    },
+    300_000,
+  );
+});

package/telegram-plugin/uat/scenarios/jtbd-worker-activity-feed-dm.test.ts CHANGED Viewed

@@ -12,12 +12,14 @@
  * sleep/echo work, so it narrates between tools and the feed can paint
  * + edit), then asserts:
  *
- *   1. a worker-feed message appears (🔧 Worker · …), distinct from the
+ *   1. a worker-feed message appears (🛠 Worker · …), distinct from the
  *      parent's ack reply — proving background activity surfaces after
  *      the parent turn closed;
  *   2. the message edits in place while work is in flight (body changes
  *      across a window) — proving it's live, not a one-shot post;
- *   3. it finalizes to the terminal recap (✅ Worker done · … / N tools).
+ *   3. it finalizes to the terminal recap (finished · completed · N tools);
+ *   4. the native card never leaks raw Markdown (no `**`, backticks, or
+ *      ASCII `---` rule) — the #94-class regression guard.
  *
  * It logs every observed body so a human can read the real rendered UX.
  *
@@ -53,10 +55,11 @@ const BG_DISPATCH_PROMPT =
   `brief reply saying you've kicked off the background worker so I can ` +
   `watch its progress.`;
-// The feed header rendered in Telegram: "🔧 Worker · <desc>" (running)
-// or "✅ Worker done · …" / "⚠️ Worker failed · …" (terminal).
-const WORKER_FEED_RE = /🔧\s*Worker|Worker done|Worker failed|⚡/i;
-const WORKER_DONE_RE = /✅\s*Worker done|⚠️\s*Worker failed/i;
+// The feed header rendered in Telegram: "🛠 Worker · <desc>" with a
+// "running · …" status (running) or "finished · completed/failed · …"
+// (terminal).
+const WORKER_FEED_RE = /🛠\s*Worker|running\s*·|finished\s*·/i;
+const WORKER_DONE_RE = /finished\s*·\s*(completed|failed)/i;
 describe("uat: live worker-activity feed (#2000)", () => {
   it(
@@ -116,6 +119,14 @@ describe("uat: live worker-activity feed (#2000)", () => {
         expect(doneText!).toMatch(/tools?|tool ·/i);
         // Did the body actually move between first paint and terminal?
         expect(doneText).not.toBe(before);
+        // #94-class regression guard: the native card strips worker Markdown,
+        // so the rendered body must never carry raw `**`, backticks, or an
+        // ASCII `---` rule (the finished divider is the box-drawing `─────`).
+        expect(doneText!, "raw ** leaked into the card").not.toMatch(/\*\*/);
+        expect(doneText!, "raw backtick leaked into the card").not.toContain("`");
+        expect(doneText!, "raw --- rule leaked into the card").not.toMatch(
+          /(^|\n)\s*-{3,}\s*(\n|$)/,
+        );
       } finally {
         await sc.tearDown();
       }