npm - switchroom - Versions diffs - 0.15.44 → 0.16.4 - Mend

switchroom 0.15.44 → 0.16.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (150) hide show

package/dist/agent-scheduler/index.js +122 -88
package/dist/auth-broker/index.js +463 -177
package/dist/cli/autoaccept-poll.js +4842 -35
package/dist/cli/drive-write-pretool.mjs +17 -14
package/dist/cli/notion-write-pretool.mjs +117 -86
package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
package/dist/cli/self-improve-stop.mjs +428 -0
package/dist/cli/skill-validate-pretool.mjs +72 -72
package/dist/cli/switchroom.js +3249 -1241
package/dist/cli/ui/index.html +1 -1
package/dist/host-control/main.js +2833 -355
package/dist/vault/approvals/kernel-server.js +7482 -7439
package/dist/vault/broker/server.js +11315 -11272
package/examples/minimal.yaml +1 -0
package/examples/switchroom.yaml +1 -0
package/package.json +3 -3
package/profiles/_base/start.sh.hbs +88 -1
package/profiles/_shared/execution-discipline.md.hbs +18 -0
package/profiles/default/CLAUDE.md.hbs +3 -22
package/telegram-plugin/.claude-plugin/plugin.json +2 -2
package/telegram-plugin/answer-stream-flag.ts +12 -49
package/telegram-plugin/answer-stream.ts +5 -150
package/telegram-plugin/auth-snapshot-format.ts +280 -48
package/telegram-plugin/auto-fallback-fleet.ts +44 -1
package/telegram-plugin/context-exhaustion.ts +12 -0
package/telegram-plugin/demo-mask.ts +154 -0
package/telegram-plugin/dist/bridge/bridge.js +167 -124
package/telegram-plugin/dist/gateway/gateway.js +3039 -1159
package/telegram-plugin/dist/server.js +215 -172
package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
package/telegram-plugin/draft-stream.ts +47 -410
package/telegram-plugin/final-answer-detect.ts +17 -12
package/telegram-plugin/fleet-fallback-resume.ts +131 -0
package/telegram-plugin/format.ts +56 -19
package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
package/telegram-plugin/gateway/auth-command.ts +70 -14
package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
package/telegram-plugin/gateway/current-turn-map.ts +188 -0
package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
package/telegram-plugin/gateway/effort-command.ts +8 -3
package/telegram-plugin/gateway/emission-authority.ts +369 -0
package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
package/telegram-plugin/gateway/gateway.ts +1837 -291
package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
package/telegram-plugin/gateway/represent-guard.ts +72 -0
package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
package/telegram-plugin/gateway/status-surface-log.ts +14 -3
package/telegram-plugin/history.ts +33 -11
package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
package/telegram-plugin/issues-card.ts +4 -0
package/telegram-plugin/model-unavailable.ts +124 -0
package/telegram-plugin/narrative-dedup.ts +69 -0
package/telegram-plugin/over-ping-safety-net.ts +70 -4
package/telegram-plugin/package.json +3 -3
package/telegram-plugin/pending-work-progress.ts +12 -0
package/telegram-plugin/permission-rule.ts +32 -5
package/telegram-plugin/permission-title.ts +152 -9
package/telegram-plugin/quota-check.ts +13 -0
package/telegram-plugin/quota-watch.ts +135 -7
package/telegram-plugin/registry/turns-schema.test.ts +24 -0
package/telegram-plugin/registry/turns-schema.ts +9 -0
package/telegram-plugin/runtime-metrics.ts +13 -0
package/telegram-plugin/session-tail.ts +96 -11
package/telegram-plugin/silence-poke.ts +170 -24
package/telegram-plugin/slot-banner-driver.ts +3 -0
package/telegram-plugin/status-no-truncate.ts +44 -0
package/telegram-plugin/status-reactions.ts +20 -3
package/telegram-plugin/stream-controller.ts +4 -23
package/telegram-plugin/stream-reply-handler.ts +6 -24
package/telegram-plugin/streaming-metrics.ts +91 -0
package/telegram-plugin/subagent-watcher.ts +212 -66
package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
package/telegram-plugin/tests/answer-stream.test.ts +2 -411
package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
package/telegram-plugin/tests/demo-mask.test.ts +127 -0
package/telegram-plugin/tests/draft-stream.test.ts +0 -827
package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
package/telegram-plugin/tests/feed-survival.test.ts +526 -0
package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
package/telegram-plugin/tests/history.test.ts +60 -0
package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
package/telegram-plugin/tests/permission-rule.test.ts +17 -0
package/telegram-plugin/tests/permission-title.test.ts +206 -17
package/telegram-plugin/tests/quota-watch.test.ts +252 -9
package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
package/telegram-plugin/tests/represent-guard.test.ts +162 -0
package/telegram-plugin/tests/session-tail.test.ts +147 -3
package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
package/telegram-plugin/tests/telegram-format.test.ts +101 -6
package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
package/telegram-plugin/tests/tool-labels.test.ts +67 -0
package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
package/telegram-plugin/tests/welcome-text.test.ts +32 -3
package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
package/telegram-plugin/tool-activity-summary.ts +375 -58
package/telegram-plugin/turn-liveness-floor.ts +240 -0
package/telegram-plugin/uat/assertions.ts +115 -0
package/telegram-plugin/uat/driver.ts +68 -0
package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
package/telegram-plugin/welcome-text.ts +13 -1
package/telegram-plugin/worker-activity-feed.ts +157 -82
package/telegram-plugin/draft-transport.ts +0 -122
package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
package/telegram-plugin/tests/draft-transport.test.ts +0 -211

package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts ADDED Viewed

@@ -0,0 +1,202 @@
+/**
+ * JTBD: "the reply is last" + "a conversational turn opens no card" — the
+ * CI-enforced form of the deterministic emission invariants (design
+ * `docs/message-emission-determinism.md` §11; #2556).
+ *
+ * Four cases, each pulled from server SEND-ORDER history (`driver.getHistory`)
+ * so a post-reply card that landed before any live observer started is still
+ * caught. The ordering assertion is the SCOPED one (§6): within a single
+ * foreground turn, no activity-card / worker-feed surface opens after that
+ * turn's reply — NOT a naive "answer has the max message_id" (that would
+ * false-positive on a legitimate later background / represent / error surface).
+ * `assertReplyIsLast` filters to the activity/answer lanes of the SAME turn.
+ *
+ *   1. Conversational, zero-tool ("Reply with only: pong") — NO activity card
+ *      opens in this turn at all (lever 5 base case / G1, the triplication).
+ *   2. Tool-heavy (a REAL_WORK activity-surface prompt) — a card opened AND no
+ *      card for this turn sits below the substantive reply (lever 1 / races
+ *      A/B/E).
+ *   3. Short-pinging final ("Reply 'Done!' then write one memory") — the
+ *      currently-reordering case; green only once lever 2 lands (G5).
+ *   4. Two-turn backstop — a prompt that ends a turn without a qualifying reply
+ *      (forcing the silent-end re-prompt); no card opens below the final answer
+ *      across the re-prompt boundary (G3/C). Needs `getHistory`.
+ *
+ * Runs under CI `uat-gate`; the full live MTProto run needs the test-harness
+ * agent + a vault session, so locally this self-skips green (no driver).
+ */
+import { describe, it, expect, beforeAll } from "vitest";
+import { spinUp, type Scenario } from "../harness.js";
+import {
+  assertReplyIsLast,
+  isAnswer,
+  isActivityFeedMessage,
+  isWorkerFeedMessage,
+} from "../assertions.js";
+import { collectTurn } from "../real-work-prompts.js";
+import type { ObservedMessage } from "../driver.js";
+/** Per-case overall budget. */
+const TURN_BUDGET_MS = 130_000;
+/** History pull depth — comfortably covers a multi-surface two-turn exchange. */
+const HISTORY_LIMIT = 80;
+describe("uat: reply-is-last + conversational-turn-opens-no-card (DM)", () => {
+  let sc: Scenario | null = null;
+  beforeAll(async () => {
+    try {
+      sc = await spinUp({ agent: "test-harness" });
+      await sc.driver.primeDialogs();
+    } catch (err) {
+      console.warn(
+        `[reply-is-last] no live driver — self-skipping green: ${(err as Error).message}`,
+      );
+      sc = null;
+    }
+  });
+  // Case 1 — conversational, zero-tool: NO activity card opens at all.
+  it(
+    "case 1: a conversational 0-tool turn opens NO activity card (lever 5 / G1)",
+    async () => {
+      if (sc == null) return; // self-skip green
+      const { driver, botUserId, driverUserId } = sc;
+      const obs = await collectTurn(
+        driver,
+        botUserId,
+        driverUserId,
+        "Reply with only this exact word and nothing else, using no tools at all: pong",
+        { timeoutMs: TURN_BUDGET_MS, minAnswerChars: 1, settleMs: 8_000 },
+      );
+      expect(obs.answer, "the pong answer must land").not.toBeNull();
+      // Pull send-order history and confirm: no activity-card surface opened in
+      // this turn. We scope to surfaces at/after the answer's turn by reusing
+      // assertReplyIsLast, AND additionally assert the live collector saw no
+      // activity feed for this minimal turn.
+      const history = await driver.getHistory(botUserId, HISTORY_LIMIT);
+      assertReplyIsLast(history, driverUserId, { turn: obs.answer! });
+      // The strong G1 assertion: a 0-tool conversational turn must produce no
+      // activity feed at all (neither open nor edit).
+      expect(
+        obs.sawActivityFeed,
+        "a 0-tool conversational turn must not open an activity card (the triplication)",
+      ).toBe(false);
+    },
+    TURN_BUDGET_MS + 30_000,
+  );
+  // Case 2 — tool-heavy: a card opens, but none below the substantive reply.
+  it(
+    "case 2: a tool-heavy turn opens a card but none below the reply (lever 1 / races A/B/E)",
+    async () => {
+      if (sc == null) return; // self-skip green
+      const { driver, botUserId, driverUserId } = sc;
+      const obs = await collectTurn(
+        driver,
+        botUserId,
+        driverUserId,
+        "Use your Bash tool to run `uname -a`, then tell me in one sentence what " +
+          "operating system this machine is running. Keep the answer substantive " +
+          "(a few sentences explaining what the output means).",
+        { timeoutMs: TURN_BUDGET_MS, minAnswerChars: 60, settleMs: 8_000 },
+      );
+      if (obs.answer == null) {
+        console.warn("[reply-is-last] case 2 INCONCLUSIVE — no answer landed in budget.");
+        return;
+      }
+      if (!obs.sawActivityFeed && !obs.sawWorkerFeed) {
+        console.warn(
+          "[reply-is-last] case 2 INCONCLUSIVE — the agent answered without a " +
+            "tool feed; the reorder vector this guards was not exercised.",
+        );
+        return;
+      }
+      const history = await driver.getHistory(botUserId, HISTORY_LIMIT);
+      // The scoped invariant: no activity/worker-feed surface for this turn
+      // lands below the substantive reply.
+      assertReplyIsLast(history, driverUserId, { turn: obs.answer });
+    },
+    TURN_BUDGET_MS + 30_000,
+  );
+  // Case 3 — short-pinging final: the case the lever-2 ordering fix makes pass.
+  it(
+    "case 3: a short-pinging final stays last even with post-reply tool work (lever 2 / G5)",
+    async () => {
+      if (sc == null) return; // self-skip green
+      const { driver, botUserId, driverUserId } = sc;
+      const obs = await collectTurn(
+        driver,
+        botUserId,
+        driverUserId,
+        "Reply with only the single word 'Done!' (with the exclamation mark) — " +
+          "then, AFTER that reply, save a one-line memory noting you completed " +
+          "this test. The short reply is your final answer.",
+        // The "Done!" reply is short; accept it as the answer.
+        { timeoutMs: TURN_BUDGET_MS, minAnswerChars: 1, settleMs: 10_000 },
+      );
+      if (obs.answer == null) {
+        console.warn("[reply-is-last] case 3 INCONCLUSIVE — no answer landed in budget.");
+        return;
+      }
+      const history = await driver.getHistory(botUserId, HISTORY_LIMIT);
+      // The post-'Done!' memory write must NOT have reopened a card BELOW the
+      // reply. Before lever 2 this reorders (the named G5 residual); after, the
+      // card is finalized before the send and stays above.
+      assertReplyIsLast(history, driverUserId, { turn: obs.answer });
+    },
+    TURN_BUDGET_MS + 30_000,
+  );
+  // Case 4 — two-turn backstop: no card below the final answer across the
+  // silent-end re-prompt boundary.
+  it(
+    "case 4: no card opens below the final answer across a re-prompt boundary (G3/C)",
+    async () => {
+      if (sc == null) return; // self-skip green
+      const { driver, botUserId, driverUserId } = sc;
+      // A prompt that nudges the model to write its answer as prose first
+      // (no reply tool) — forcing the silent-end re-prompt, then a real answer
+      // on the re-prompted turn. The model's exact path isn't forceable, so the
+      // ordering assertion is the durable part; the re-prompt is best-effort.
+      const obs = await collectTurn(
+        driver,
+        botUserId,
+        driverUserId,
+        "Think out loud briefly, then give me a thorough multi-sentence answer " +
+          "(at least 220 characters) explaining what a Telegram supergroup is and " +
+          "how forum topics work inside one.",
+        { timeoutMs: TURN_BUDGET_MS, minAnswerChars: 200, settleMs: 12_000 },
+      );
+      if (obs.answer == null) {
+        console.warn("[reply-is-last] case 4 INCONCLUSIVE — no answer landed in budget.");
+        return;
+      }
+      // Pull full send-order history (the re-prompt may have produced a second
+      // card before the live observer in collectTurn caught it) and assert the
+      // final answer's turn has no feed surface below it.
+      const history = await driver.getHistory(botUserId, HISTORY_LIMIT);
+      assertReplyIsLast(history, driverUserId, { turn: obs.answer });
+      // Sanity: the answer is a genuine answer-lane message (not a feed).
+      expect(isAnswer(obs.answer, driverUserId)).toBe(true);
+      expect(isActivityFeedMessage(obs.answer)).toBe(false);
+      expect(isWorkerFeedMessage(obs.answer)).toBe(false);
+    },
+    TURN_BUDGET_MS + 30_000,
+  );
+});

package/telegram-plugin/uat/scenarios/reactions-dm.test.ts CHANGED Viewed

@@ -10,36 +10,27 @@
  * of the bot's final reply — otherwise the user looks at their
  * inbound, sees it still wearing 🤔, and asks "you done?").
  *
- * History: this scenario was previously `describe.skip` with a
- * rationale that the pinned progress card "renders INSTEAD of
- * reactions". The card was retired in #1126; the card-vs-reaction
- * branch in the gateway is dead. We can now exercise the full
- * lifecycle end-to-end without the two-agent split.
- *
  * What we assert (in priority order):
  *
- *  1. Within the turn, the driver sees AT LEAST ONE `+` reaction
- *     op (the L1 "I'm alive" signal). Fast turns may collapse
- *     intermediate states, so we only require *one* add, not a
- *     specific emoji.
+ *  1. Within the turn, the bot places AT LEAST ONE reaction on the
+ *     inbound message (the L1 "I'm alive" signal). We poll via
+ *     `driver.pollReactions()` rather than subscribing to push
+ *     events — Telegram does not deliver `updateMessageReactions`
+ *     push events to the human account when a bot sets a reaction
+ *     in a DM (fixes #2502).
  *  2. By the time the bot has sent a final reply (+ a short tail
- *     for Telegram to deliver the terminal-emoji replace), the
- *     LAST observed `+` op is in the `done` set (`👍 / 💯 / 🎉`).
- *
- * Why "last `+` op wins" rather than `expectReaction(['👍'])` with
- * a literal sequence: `setMessageReaction` REPLACES the prior emoji
- * atomically. mtcute's update stream can deliver the replace as a
- * `-prev` followed by a `+next`, or as a single coalesced event,
- * depending on server batching. The "last add wins" shape matches
- * the production semantics — whatever's *currently* on the message
- * is what the user actually sees.
+ *     for Telegram to apply the terminal-emoji replace), the reaction
+ *     on the inbound message is in the `done` set (`👍 / 💯 / 🎉`).
  *
- * The observer must be attached BEFORE the reply lands so we
- * capture the queued / working reactions, not just the terminal
- * one. Pattern: `observeReactions` immediately after `sendDM`
- * returns the messageId, drain into a trail array while we wait
- * for the reply, then run a short tail to catch the terminal
- * after the reply.
+ * Polling strategy:
+ *   - Poll every `POLL_INTERVAL_MS` until a terminal-done emoji
+ *     appears OR the bot has replied AND `TAIL_AFTER_REPLY_MS` has
+ *     elapsed. Bail immediately on reply-timeout so CI doesn't burn
+ *     the full 90s safety ceiling.
+ *   - After the reply arrives, keep polling through the tail window
+ *     so the terminal emoji (👍) has time to replace the working
+ *     emoji (👀/🤔). In practice the replace happens within 1-2s
+ *     of the reply on a healthy bot; the 8s ceiling absorbs jitter.
  *
  * Requires the same env as `smoke-dm-reply.test.ts` (see
  * `uat/SETUP.md` §6).
@@ -49,16 +40,11 @@ import { describe, expect, it } from "vitest";
 import { spinUp } from "../harness.js";
 const TERMINAL_DONE_EMOJI = new Set(["👍", "💯", "🎉"]);
+const POLL_INTERVAL_MS = 1_000;
 const TAIL_AFTER_REPLY_MS = 8_000;
 const INBOUND = (): string => `uat-reactions ${new Date().toISOString()}`;
-interface ObservedOp {
-  emoji: string;
-  op: "+" | "-";
-  at: number;
-}
 describe("uat: reaction lifecycle on driver DM", () => {
   it(
     "driver sees an alive reaction, then a terminal-done emoji by reply tail",
@@ -67,71 +53,91 @@ describe("uat: reaction lifecycle on driver DM", () => {
       try {
         const sent = await sc.sendDM(INBOUND());
-        // Attach the observer immediately so the queued (👀) and
-        // working reactions don't fire before the listener exists.
-        const trail: ObservedOp[] = [];
-        const iter = sc.driver
-          .observeReactions(sc.botUserId, { messageId: sent.messageId })
-          [Symbol.asyncIterator]();
-        let pump: Promise<void> | null = null;
-        let stopPump = false;
-        pump = (async () => {
-          while (!stopPump) {
-            const next = await iter.next();
-            if (next.done === true) return;
-            trail.push({
-              emoji: next.value.emoji,
-              op: next.value.op,
-              at: Date.now(),
-            });
-          }
-        })();
+        // Poll the reaction state on the sent message. We use polling
+        // rather than `observeReactions` because Telegram does not
+        // deliver `updateMessageReactions` push updates to user accounts
+        // when a bot sets a reaction in a DM — see module docblock.
+        const reactionHistory: string[][] = [];
+        let replyReceived = false;
+        let replyReceivedAt = 0;
-        try {
-          // Wait for the bot's reply (any content). Gives the L1
-          // lifecycle time to traverse queued → working → done.
-          const reply = await sc.expectMessage(/\S/, {
-            from: "bot",
-            timeout: 60_000,
+        // Wait for the bot's reply (any content). We start polling
+        // concurrently so we capture intermediate reactions during
+        // the turn.
+        let replyTimedOut = false;
+        const replyPromise = sc
+          .expectMessage(/\S/, { from: "bot", timeout: 60_000 })
+          .then((reply) => {
+            expect(reply.text.length).toBeGreaterThan(0);
+            replyReceived = true;
+            replyReceivedAt = Date.now();
+            return reply;
+          })
+          .catch((err: unknown) => {
+            // expectMessage timeout → mark so the poll loop exits immediately
+            // instead of burning the full 90s safety ceiling on CI failure.
+            replyTimedOut = true;
+            throw err;
           });
-          expect(reply.text.length).toBeGreaterThan(0);
-          // Tail after the reply for Telegram to deliver the
-          // terminal-emoji replace. In practice <1s on a healthy bot;
-          // 8s ceiling absorbs server batching jitter.
-          await new Promise((resolve) =>
-            setTimeout(resolve, TAIL_AFTER_REPLY_MS),
-          );
-        } finally {
-          stopPump = true;
-          await iter.return?.();
-          if (pump) {
-            await pump.catch(() => {
-              /* generator return triggers rejection on pending iter.next() — ignore */
-            });
+        // Polling loop: sample reactions while waiting for the reply,
+        // then continue for TAIL_AFTER_REPLY_MS after the reply lands.
+        const poll = async (): Promise<void> => {
+          const deadline = Date.now() + 90_000; // safety ceiling
+          while (Date.now() < deadline) {
+            // Bail early if the reply timed out — no point polling further.
+            if (replyTimedOut) break;
+            const emojis = await sc.driver.pollReactions(
+              sc.botUserId,
+              sent.messageId,
+            );
+            if (emojis.length > 0) {
+              reactionHistory.push([...emojis]);
+            }
+            if (
+              replyReceived &&
+              Date.now() - replyReceivedAt >= TAIL_AFTER_REPLY_MS
+            ) {
+              break;
+            }
+            await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
           }
-        }
+        };
+        // Run both concurrently; wait for both to settle.
+        await Promise.all([replyPromise, poll()]);
-        // L1 alive signal: at least one `+` op landed during the turn.
-        const adds = trail.filter((o) => o.op === "+");
+        // L1 alive signal: at least one non-empty reaction set was
+        // observed during the turn.
+        const allSeen = reactionHistory.flat();
+        const uniqueSeen = [...new Set(allSeen)];
         expect(
-          adds.length,
-          `expected at least one reaction-add during the turn, got 0. ` +
-            `Full trail: ${trail.map((o) => `${o.op}${o.emoji}`).join(" ") || "(empty)"}`,
+          reactionHistory.length,
+          `expected at least one reaction poll to show a reaction during the ` +
+            `turn, but all ${reactionHistory.length > 0 ? "polls returned nothing with emojis" : "polls returned empty"}. ` +
+            `History snapshots: ${reactionHistory.map((s) => `[${s.join(",")}]`).join(" ") || "(none)"}`,
         ).toBeGreaterThan(0);
-        // L1 terminal: the LAST `+` op should be a terminal-done emoji.
-        // Extra `-` ops after the final `+` are tolerated (Telegram
-        // sometimes emits a bare clean-up `-`); the last `+` is what
-        // the user actually sees.
-        const lastAdd = adds[adds.length - 1];
+        // L1 terminal: the LAST non-empty snapshot should contain a
+        // terminal-done emoji. `setMessageReaction` replaces atomically,
+        // so the last snapshot holds whatever is currently on the message.
+        const lastSnapshot = reactionHistory[reactionHistory.length - 1];
+        // The bot uses setMessageReaction (replace, not append) — exactly one
+        // emoji should be set at any time. Assert the invariant so we catch
+        // accidental multi-emoji states, then check the terminal-done value.
+        expect(
+          lastSnapshot.length,
+          `expected exactly 1 reaction in the final snapshot, got [${lastSnapshot.join(",")}]. ` +
+            `All snapshots: ${reactionHistory.map((s) => `[${s.join(",")}]`).join(" ")}`,
+        ).toBe(1);
+        const lastEmoji = lastSnapshot[0];
         expect(
-          TERMINAL_DONE_EMOJI.has(lastAdd.emoji),
-          `expected last reaction-add to be one of ${[
+          TERMINAL_DONE_EMOJI.has(lastEmoji),
+          `expected last reaction to be one of ${[
             ...TERMINAL_DONE_EMOJI,
-          ].join(", ")}, got ${lastAdd.emoji}. Full trail: ${trail
-            .map((o) => `${o.op}${o.emoji}`)
-            .join(" ")}`,
+          ].join(", ")}, got ${lastEmoji}. ` +
+            `All snapshots: ${reactionHistory.map((s) => `[${s.join(",")}]`).join(" ")}. ` +
+            `Unique emojis seen: ${uniqueSeen.join(", ") || "(none)"}`,
         ).toBe(true);
       } finally {
         await sc.tearDown();

package/telegram-plugin/welcome-text.ts CHANGED Viewed

@@ -11,6 +11,8 @@
  * monospace inline and avoid Telegram treating them as markdown.
  */
+import { maskUsername } from "./demo-mask.js";
 export type AuthSummary = {
   authenticated: boolean;
   subscription_type: string | null;
@@ -198,10 +200,19 @@ const STATUS_DOT: Record<StatusProbeRow['status'], string> = {
 export function statusPairedText(params: {
   user: string;
   meta: AgentMetadata;
+  /**
+   * Demo mode (the `/status demo` suffix). When true the paired-user tag
+   * (`@handle` or numeric sender id) is run through `maskUsername` so a
+   * screen recording shows a stable fake `@demo_user…` handle instead of
+   * the operator's real Telegram identity. Off by default — the agent /
+   * model / health / audit topology below is NOT masked (out of scope).
+   */
+  demo?: boolean;
 }): string {
   const { user, meta } = params;
+  const shownUser = params.demo ? maskUsername(user) : user;
   const lines = [
-    `Paired as ${escapeHtml(user)}.`,
+    `Paired as ${escapeHtml(shownUser)}.`,
     ``,
     `Agent: ${formatAgentLine(meta)}`,
     `Auth: ${formatAuthLine(meta.auth)}`,
@@ -327,6 +338,7 @@ export const TELEGRAM_MENU_COMMANDS = [
   { command: "effort", description: "Show or switch the reasoning effort" },
   { command: "doctor", description: "Health check (deps, services, MCP)" },
   { command: "usage", description: "Pro/Max plan quota (5h + 7d windows)" },
+  { command: "whoami", description: "This agent's sandbox: tools, MCP, vault key-names" },
   // Vault — secrets + capability grants. /vault is a top-level command
   // dispatching subcommands (list, get, set, delete, status, unlock, lock,
   // grant, grants). Surfaced in the menu so mobile users can tap-to-pick