switchroom 0.15.44 → 0.16.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/dist/agent-scheduler/index.js +122 -88
  2. package/dist/auth-broker/index.js +463 -177
  3. package/dist/cli/autoaccept-poll.js +4842 -35
  4. package/dist/cli/drive-write-pretool.mjs +17 -14
  5. package/dist/cli/notion-write-pretool.mjs +117 -86
  6. package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
  7. package/dist/cli/self-improve-stop.mjs +428 -0
  8. package/dist/cli/skill-validate-pretool.mjs +72 -72
  9. package/dist/cli/switchroom.js +3249 -1241
  10. package/dist/cli/ui/index.html +1 -1
  11. package/dist/host-control/main.js +2833 -355
  12. package/dist/vault/approvals/kernel-server.js +7482 -7439
  13. package/dist/vault/broker/server.js +11315 -11272
  14. package/examples/minimal.yaml +1 -0
  15. package/examples/switchroom.yaml +1 -0
  16. package/package.json +3 -3
  17. package/profiles/_base/start.sh.hbs +88 -1
  18. package/profiles/_shared/execution-discipline.md.hbs +18 -0
  19. package/profiles/default/CLAUDE.md.hbs +3 -22
  20. package/telegram-plugin/.claude-plugin/plugin.json +2 -2
  21. package/telegram-plugin/answer-stream-flag.ts +12 -49
  22. package/telegram-plugin/answer-stream.ts +5 -150
  23. package/telegram-plugin/auth-snapshot-format.ts +280 -48
  24. package/telegram-plugin/auto-fallback-fleet.ts +44 -1
  25. package/telegram-plugin/context-exhaustion.ts +12 -0
  26. package/telegram-plugin/demo-mask.ts +154 -0
  27. package/telegram-plugin/dist/bridge/bridge.js +167 -124
  28. package/telegram-plugin/dist/gateway/gateway.js +3039 -1159
  29. package/telegram-plugin/dist/server.js +215 -172
  30. package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
  31. package/telegram-plugin/draft-stream.ts +47 -410
  32. package/telegram-plugin/final-answer-detect.ts +17 -12
  33. package/telegram-plugin/fleet-fallback-resume.ts +131 -0
  34. package/telegram-plugin/format.ts +56 -19
  35. package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
  36. package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
  37. package/telegram-plugin/gateway/auth-command.ts +70 -14
  38. package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
  39. package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
  40. package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
  41. package/telegram-plugin/gateway/current-turn-map.ts +188 -0
  42. package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
  43. package/telegram-plugin/gateway/effort-command.ts +8 -3
  44. package/telegram-plugin/gateway/emission-authority.ts +369 -0
  45. package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
  46. package/telegram-plugin/gateway/gateway.ts +1837 -291
  47. package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
  48. package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
  49. package/telegram-plugin/gateway/represent-guard.ts +72 -0
  50. package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
  51. package/telegram-plugin/gateway/status-surface-log.ts +14 -3
  52. package/telegram-plugin/history.ts +33 -11
  53. package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
  54. package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
  55. package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
  56. package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
  57. package/telegram-plugin/issues-card.ts +4 -0
  58. package/telegram-plugin/model-unavailable.ts +124 -0
  59. package/telegram-plugin/narrative-dedup.ts +69 -0
  60. package/telegram-plugin/over-ping-safety-net.ts +70 -4
  61. package/telegram-plugin/package.json +3 -3
  62. package/telegram-plugin/pending-work-progress.ts +12 -0
  63. package/telegram-plugin/permission-rule.ts +32 -5
  64. package/telegram-plugin/permission-title.ts +152 -9
  65. package/telegram-plugin/quota-check.ts +13 -0
  66. package/telegram-plugin/quota-watch.ts +135 -7
  67. package/telegram-plugin/registry/turns-schema.test.ts +24 -0
  68. package/telegram-plugin/registry/turns-schema.ts +9 -0
  69. package/telegram-plugin/runtime-metrics.ts +13 -0
  70. package/telegram-plugin/session-tail.ts +96 -11
  71. package/telegram-plugin/silence-poke.ts +170 -24
  72. package/telegram-plugin/slot-banner-driver.ts +3 -0
  73. package/telegram-plugin/status-no-truncate.ts +44 -0
  74. package/telegram-plugin/status-reactions.ts +20 -3
  75. package/telegram-plugin/stream-controller.ts +4 -23
  76. package/telegram-plugin/stream-reply-handler.ts +6 -24
  77. package/telegram-plugin/streaming-metrics.ts +91 -0
  78. package/telegram-plugin/subagent-watcher.ts +212 -66
  79. package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
  80. package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
  81. package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
  82. package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
  83. package/telegram-plugin/tests/answer-stream.test.ts +2 -411
  84. package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
  85. package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
  86. package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
  87. package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
  88. package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
  89. package/telegram-plugin/tests/demo-mask.test.ts +127 -0
  90. package/telegram-plugin/tests/draft-stream.test.ts +0 -827
  91. package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
  92. package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
  93. package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
  94. package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
  95. package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
  96. package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
  97. package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
  98. package/telegram-plugin/tests/feed-survival.test.ts +526 -0
  99. package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
  100. package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
  101. package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
  102. package/telegram-plugin/tests/history.test.ts +60 -0
  103. package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
  104. package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
  105. package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
  106. package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
  107. package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
  108. package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
  109. package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
  110. package/telegram-plugin/tests/permission-rule.test.ts +17 -0
  111. package/telegram-plugin/tests/permission-title.test.ts +206 -17
  112. package/telegram-plugin/tests/quota-watch.test.ts +252 -9
  113. package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
  114. package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
  115. package/telegram-plugin/tests/represent-guard.test.ts +162 -0
  116. package/telegram-plugin/tests/session-tail.test.ts +147 -3
  117. package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
  118. package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
  119. package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
  120. package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
  121. package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
  122. package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
  123. package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
  124. package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
  125. package/telegram-plugin/tests/telegram-format.test.ts +101 -6
  126. package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
  127. package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
  128. package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
  129. package/telegram-plugin/tests/tool-labels.test.ts +67 -0
  130. package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
  131. package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
  132. package/telegram-plugin/tests/welcome-text.test.ts +32 -3
  133. package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
  134. package/telegram-plugin/tool-activity-summary.ts +375 -58
  135. package/telegram-plugin/turn-liveness-floor.ts +240 -0
  136. package/telegram-plugin/uat/assertions.ts +115 -0
  137. package/telegram-plugin/uat/driver.ts +68 -0
  138. package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
  139. package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
  140. package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
  141. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
  142. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
  143. package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
  144. package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
  145. package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
  146. package/telegram-plugin/welcome-text.ts +13 -1
  147. package/telegram-plugin/worker-activity-feed.ts +157 -82
  148. package/telegram-plugin/draft-transport.ts +0 -122
  149. package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
  150. package/telegram-plugin/tests/draft-transport.test.ts +0 -211
@@ -0,0 +1,202 @@
1
+ /**
2
+ * JTBD: "the reply is last" + "a conversational turn opens no card" — the
3
+ * CI-enforced form of the deterministic emission invariants (design
4
+ * `docs/message-emission-determinism.md` §11; #2556).
5
+ *
6
+ * Four cases, each pulled from server SEND-ORDER history (`driver.getHistory`)
7
+ * so a post-reply card that landed before any live observer started is still
8
+ * caught. The ordering assertion is the SCOPED one (§6): within a single
9
+ * foreground turn, no activity-card / worker-feed surface opens after that
10
+ * turn's reply — NOT a naive "answer has the max message_id" (that would
11
+ * false-positive on a legitimate later background / represent / error surface).
12
+ * `assertReplyIsLast` filters to the activity/answer lanes of the SAME turn.
13
+ *
14
+ * 1. Conversational, zero-tool ("Reply with only: pong") — NO activity card
15
+ * opens in this turn at all (lever 5 base case / G1, the triplication).
16
+ * 2. Tool-heavy (a REAL_WORK activity-surface prompt) — a card opened AND no
17
+ * card for this turn sits below the substantive reply (lever 1 / races
18
+ * A/B/E).
19
+ * 3. Short-pinging final ("Reply 'Done!' then write one memory") — the
20
+ * currently-reordering case; green only once lever 2 lands (G5).
21
+ * 4. Two-turn backstop — a prompt that ends a turn without a qualifying reply
22
+ * (forcing the silent-end re-prompt); no card opens below the final answer
23
+ * across the re-prompt boundary (G3/C). Needs `getHistory`.
24
+ *
25
+ * Runs under CI `uat-gate`; the full live MTProto run needs the test-harness
26
+ * agent + a vault session, so locally this self-skips green (no driver).
27
+ */
28
+ import { describe, it, expect, beforeAll } from "vitest";
29
+ import { spinUp, type Scenario } from "../harness.js";
30
+ import {
31
+ assertReplyIsLast,
32
+ isAnswer,
33
+ isActivityFeedMessage,
34
+ isWorkerFeedMessage,
35
+ } from "../assertions.js";
36
+ import { collectTurn } from "../real-work-prompts.js";
37
+ import type { ObservedMessage } from "../driver.js";
38
+
39
+ /** Per-case overall budget. */
40
+ const TURN_BUDGET_MS = 130_000;
41
+ /** History pull depth — comfortably covers a multi-surface two-turn exchange. */
42
+ const HISTORY_LIMIT = 80;
43
+
44
+ describe("uat: reply-is-last + conversational-turn-opens-no-card (DM)", () => {
45
+ let sc: Scenario | null = null;
46
+
47
+ beforeAll(async () => {
48
+ try {
49
+ sc = await spinUp({ agent: "test-harness" });
50
+ await sc.driver.primeDialogs();
51
+ } catch (err) {
52
+ console.warn(
53
+ `[reply-is-last] no live driver — self-skipping green: ${(err as Error).message}`,
54
+ );
55
+ sc = null;
56
+ }
57
+ });
58
+
59
+ // Case 1 — conversational, zero-tool: NO activity card opens at all.
60
+ it(
61
+ "case 1: a conversational 0-tool turn opens NO activity card (lever 5 / G1)",
62
+ async () => {
63
+ if (sc == null) return; // self-skip green
64
+ const { driver, botUserId, driverUserId } = sc;
65
+
66
+ const obs = await collectTurn(
67
+ driver,
68
+ botUserId,
69
+ driverUserId,
70
+ "Reply with only this exact word and nothing else, using no tools at all: pong",
71
+ { timeoutMs: TURN_BUDGET_MS, minAnswerChars: 1, settleMs: 8_000 },
72
+ );
73
+
74
+ expect(obs.answer, "the pong answer must land").not.toBeNull();
75
+
76
+ // Pull send-order history and confirm: no activity-card surface opened in
77
+ // this turn. We scope to surfaces at/after the answer's turn by reusing
78
+ // assertReplyIsLast, AND additionally assert the live collector saw no
79
+ // activity feed for this minimal turn.
80
+ const history = await driver.getHistory(botUserId, HISTORY_LIMIT);
81
+ assertReplyIsLast(history, driverUserId, { turn: obs.answer! });
82
+
83
+ // The strong G1 assertion: a 0-tool conversational turn must produce no
84
+ // activity feed at all (neither open nor edit).
85
+ expect(
86
+ obs.sawActivityFeed,
87
+ "a 0-tool conversational turn must not open an activity card (the triplication)",
88
+ ).toBe(false);
89
+ },
90
+ TURN_BUDGET_MS + 30_000,
91
+ );
92
+
93
+ // Case 2 — tool-heavy: a card opens, but none below the substantive reply.
94
+ it(
95
+ "case 2: a tool-heavy turn opens a card but none below the reply (lever 1 / races A/B/E)",
96
+ async () => {
97
+ if (sc == null) return; // self-skip green
98
+ const { driver, botUserId, driverUserId } = sc;
99
+
100
+ const obs = await collectTurn(
101
+ driver,
102
+ botUserId,
103
+ driverUserId,
104
+ "Use your Bash tool to run `uname -a`, then tell me in one sentence what " +
105
+ "operating system this machine is running. Keep the answer substantive " +
106
+ "(a few sentences explaining what the output means).",
107
+ { timeoutMs: TURN_BUDGET_MS, minAnswerChars: 60, settleMs: 8_000 },
108
+ );
109
+
110
+ if (obs.answer == null) {
111
+ console.warn("[reply-is-last] case 2 INCONCLUSIVE — no answer landed in budget.");
112
+ return;
113
+ }
114
+ if (!obs.sawActivityFeed && !obs.sawWorkerFeed) {
115
+ console.warn(
116
+ "[reply-is-last] case 2 INCONCLUSIVE — the agent answered without a " +
117
+ "tool feed; the reorder vector this guards was not exercised.",
118
+ );
119
+ return;
120
+ }
121
+
122
+ const history = await driver.getHistory(botUserId, HISTORY_LIMIT);
123
+ // The scoped invariant: no activity/worker-feed surface for this turn
124
+ // lands below the substantive reply.
125
+ assertReplyIsLast(history, driverUserId, { turn: obs.answer });
126
+ },
127
+ TURN_BUDGET_MS + 30_000,
128
+ );
129
+
130
+ // Case 3 — short-pinging final: the case the lever-2 ordering fix makes pass.
131
+ it(
132
+ "case 3: a short-pinging final stays last even with post-reply tool work (lever 2 / G5)",
133
+ async () => {
134
+ if (sc == null) return; // self-skip green
135
+ const { driver, botUserId, driverUserId } = sc;
136
+
137
+ const obs = await collectTurn(
138
+ driver,
139
+ botUserId,
140
+ driverUserId,
141
+ "Reply with only the single word 'Done!' (with the exclamation mark) — " +
142
+ "then, AFTER that reply, save a one-line memory noting you completed " +
143
+ "this test. The short reply is your final answer.",
144
+ // The "Done!" reply is short; accept it as the answer.
145
+ { timeoutMs: TURN_BUDGET_MS, minAnswerChars: 1, settleMs: 10_000 },
146
+ );
147
+
148
+ if (obs.answer == null) {
149
+ console.warn("[reply-is-last] case 3 INCONCLUSIVE — no answer landed in budget.");
150
+ return;
151
+ }
152
+
153
+ const history = await driver.getHistory(botUserId, HISTORY_LIMIT);
154
+ // The post-'Done!' memory write must NOT have reopened a card BELOW the
155
+ // reply. Before lever 2 this reorders (the named G5 residual); after, the
156
+ // card is finalized before the send and stays above.
157
+ assertReplyIsLast(history, driverUserId, { turn: obs.answer });
158
+ },
159
+ TURN_BUDGET_MS + 30_000,
160
+ );
161
+
162
+ // Case 4 — two-turn backstop: no card below the final answer across the
163
+ // silent-end re-prompt boundary.
164
+ it(
165
+ "case 4: no card opens below the final answer across a re-prompt boundary (G3/C)",
166
+ async () => {
167
+ if (sc == null) return; // self-skip green
168
+ const { driver, botUserId, driverUserId } = sc;
169
+
170
+ // A prompt that nudges the model to write its answer as prose first
171
+ // (no reply tool) — forcing the silent-end re-prompt, then a real answer
172
+ // on the re-prompted turn. The model's exact path isn't forceable, so the
173
+ // ordering assertion is the durable part; the re-prompt is best-effort.
174
+ const obs = await collectTurn(
175
+ driver,
176
+ botUserId,
177
+ driverUserId,
178
+ "Think out loud briefly, then give me a thorough multi-sentence answer " +
179
+ "(at least 220 characters) explaining what a Telegram supergroup is and " +
180
+ "how forum topics work inside one.",
181
+ { timeoutMs: TURN_BUDGET_MS, minAnswerChars: 200, settleMs: 12_000 },
182
+ );
183
+
184
+ if (obs.answer == null) {
185
+ console.warn("[reply-is-last] case 4 INCONCLUSIVE — no answer landed in budget.");
186
+ return;
187
+ }
188
+
189
+ // Pull full send-order history (the re-prompt may have produced a second
190
+ // card before the live observer in collectTurn caught it) and assert the
191
+ // final answer's turn has no feed surface below it.
192
+ const history = await driver.getHistory(botUserId, HISTORY_LIMIT);
193
+ assertReplyIsLast(history, driverUserId, { turn: obs.answer });
194
+
195
+ // Sanity: the answer is a genuine answer-lane message (not a feed).
196
+ expect(isAnswer(obs.answer, driverUserId)).toBe(true);
197
+ expect(isActivityFeedMessage(obs.answer)).toBe(false);
198
+ expect(isWorkerFeedMessage(obs.answer)).toBe(false);
199
+ },
200
+ TURN_BUDGET_MS + 30_000,
201
+ );
202
+ });
@@ -10,36 +10,27 @@
10
10
  * of the bot's final reply — otherwise the user looks at their
11
11
  * inbound, sees it still wearing 🤔, and asks "you done?").
12
12
  *
13
- * History: this scenario was previously `describe.skip` with a
14
- * rationale that the pinned progress card "renders INSTEAD of
15
- * reactions". The card was retired in #1126; the card-vs-reaction
16
- * branch in the gateway is dead. We can now exercise the full
17
- * lifecycle end-to-end without the two-agent split.
18
- *
19
13
  * What we assert (in priority order):
20
14
  *
21
- * 1. Within the turn, the driver sees AT LEAST ONE `+` reaction
22
- * op (the L1 "I'm alive" signal). Fast turns may collapse
23
- * intermediate states, so we only require *one* add, not a
24
- * specific emoji.
15
+ * 1. Within the turn, the bot places AT LEAST ONE reaction on the
16
+ * inbound message (the L1 "I'm alive" signal). We poll via
17
+ * `driver.pollReactions()` rather than subscribing to push
18
+ * events — Telegram does not deliver `updateMessageReactions`
19
+ * push events to the human account when a bot sets a reaction
20
+ * in a DM (fixes #2502).
25
21
  * 2. By the time the bot has sent a final reply (+ a short tail
26
- * for Telegram to deliver the terminal-emoji replace), the
27
- * LAST observed `+` op is in the `done` set (`👍 / 💯 / 🎉`).
28
- *
29
- * Why "last `+` op wins" rather than `expectReaction(['👍'])` with
30
- * a literal sequence: `setMessageReaction` REPLACES the prior emoji
31
- * atomically. mtcute's update stream can deliver the replace as a
32
- * `-prev` followed by a `+next`, or as a single coalesced event,
33
- * depending on server batching. The "last add wins" shape matches
34
- * the production semantics — whatever's *currently* on the message
35
- * is what the user actually sees.
22
+ * for Telegram to apply the terminal-emoji replace), the reaction
23
+ * on the inbound message is in the `done` set (`👍 / 💯 / 🎉`).
36
24
  *
37
- * The observer must be attached BEFORE the reply lands so we
38
- * capture the queued / working reactions, not just the terminal
39
- * one. Pattern: `observeReactions` immediately after `sendDM`
40
- * returns the messageId, drain into a trail array while we wait
41
- * for the reply, then run a short tail to catch the terminal
42
- * after the reply.
25
+ * Polling strategy:
26
+ * - Poll every `POLL_INTERVAL_MS` until a terminal-done emoji
27
+ * appears OR the bot has replied AND `TAIL_AFTER_REPLY_MS` has
28
+ * elapsed. Bail immediately on reply-timeout so CI doesn't burn
29
+ * the full 90s safety ceiling.
30
+ * - After the reply arrives, keep polling through the tail window
31
+ * so the terminal emoji (👍) has time to replace the working
32
+ * emoji (👀/🤔). In practice the replace happens within 1-2s
33
+ * of the reply on a healthy bot; the 8s ceiling absorbs jitter.
43
34
  *
44
35
  * Requires the same env as `smoke-dm-reply.test.ts` (see
45
36
  * `uat/SETUP.md` §6).
@@ -49,16 +40,11 @@ import { describe, expect, it } from "vitest";
49
40
  import { spinUp } from "../harness.js";
50
41
 
51
42
  const TERMINAL_DONE_EMOJI = new Set(["👍", "💯", "🎉"]);
43
+ const POLL_INTERVAL_MS = 1_000;
52
44
  const TAIL_AFTER_REPLY_MS = 8_000;
53
45
 
54
46
  const INBOUND = (): string => `uat-reactions ${new Date().toISOString()}`;
55
47
 
56
- interface ObservedOp {
57
- emoji: string;
58
- op: "+" | "-";
59
- at: number;
60
- }
61
-
62
48
  describe("uat: reaction lifecycle on driver DM", () => {
63
49
  it(
64
50
  "driver sees an alive reaction, then a terminal-done emoji by reply tail",
@@ -67,71 +53,91 @@ describe("uat: reaction lifecycle on driver DM", () => {
67
53
  try {
68
54
  const sent = await sc.sendDM(INBOUND());
69
55
 
70
- // Attach the observer immediately so the queued (👀) and
71
- // working reactions don't fire before the listener exists.
72
- const trail: ObservedOp[] = [];
73
- const iter = sc.driver
74
- .observeReactions(sc.botUserId, { messageId: sent.messageId })
75
- [Symbol.asyncIterator]();
76
- let pump: Promise<void> | null = null;
77
- let stopPump = false;
78
- pump = (async () => {
79
- while (!stopPump) {
80
- const next = await iter.next();
81
- if (next.done === true) return;
82
- trail.push({
83
- emoji: next.value.emoji,
84
- op: next.value.op,
85
- at: Date.now(),
86
- });
87
- }
88
- })();
56
+ // Poll the reaction state on the sent message. We use polling
57
+ // rather than `observeReactions` because Telegram does not
58
+ // deliver `updateMessageReactions` push updates to user accounts
59
+ // when a bot sets a reaction in a DM — see module docblock.
60
+ const reactionHistory: string[][] = [];
61
+ let replyReceived = false;
62
+ let replyReceivedAt = 0;
89
63
 
90
- try {
91
- // Wait for the bot's reply (any content). Gives the L1
92
- // lifecycle time to traverse queued → working → done.
93
- const reply = await sc.expectMessage(/\S/, {
94
- from: "bot",
95
- timeout: 60_000,
64
+ // Wait for the bot's reply (any content). We start polling
65
+ // concurrently so we capture intermediate reactions during
66
+ // the turn.
67
+ let replyTimedOut = false;
68
+ const replyPromise = sc
69
+ .expectMessage(/\S/, { from: "bot", timeout: 60_000 })
70
+ .then((reply) => {
71
+ expect(reply.text.length).toBeGreaterThan(0);
72
+ replyReceived = true;
73
+ replyReceivedAt = Date.now();
74
+ return reply;
75
+ })
76
+ .catch((err: unknown) => {
77
+ // expectMessage timeout → mark so the poll loop exits immediately
78
+ // instead of burning the full 90s safety ceiling on CI failure.
79
+ replyTimedOut = true;
80
+ throw err;
96
81
  });
97
- expect(reply.text.length).toBeGreaterThan(0);
98
82
 
99
- // Tail after the reply for Telegram to deliver the
100
- // terminal-emoji replace. In practice <1s on a healthy bot;
101
- // 8s ceiling absorbs server batching jitter.
102
- await new Promise((resolve) =>
103
- setTimeout(resolve, TAIL_AFTER_REPLY_MS),
104
- );
105
- } finally {
106
- stopPump = true;
107
- await iter.return?.();
108
- if (pump) {
109
- await pump.catch(() => {
110
- /* generator return triggers rejection on pending iter.next() — ignore */
111
- });
83
+ // Polling loop: sample reactions while waiting for the reply,
84
+ // then continue for TAIL_AFTER_REPLY_MS after the reply lands.
85
+ const poll = async (): Promise<void> => {
86
+ const deadline = Date.now() + 90_000; // safety ceiling
87
+ while (Date.now() < deadline) {
88
+ // Bail early if the reply timed out — no point polling further.
89
+ if (replyTimedOut) break;
90
+ const emojis = await sc.driver.pollReactions(
91
+ sc.botUserId,
92
+ sent.messageId,
93
+ );
94
+ if (emojis.length > 0) {
95
+ reactionHistory.push([...emojis]);
96
+ }
97
+ if (
98
+ replyReceived &&
99
+ Date.now() - replyReceivedAt >= TAIL_AFTER_REPLY_MS
100
+ ) {
101
+ break;
102
+ }
103
+ await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
112
104
  }
113
- }
105
+ };
106
+
107
+ // Run both concurrently; wait for both to settle.
108
+ await Promise.all([replyPromise, poll()]);
114
109
 
115
- // L1 alive signal: at least one `+` op landed during the turn.
116
- const adds = trail.filter((o) => o.op === "+");
110
+ // L1 alive signal: at least one non-empty reaction set was
111
+ // observed during the turn.
112
+ const allSeen = reactionHistory.flat();
113
+ const uniqueSeen = [...new Set(allSeen)];
117
114
  expect(
118
- adds.length,
119
- `expected at least one reaction-add during the turn, got 0. ` +
120
- `Full trail: ${trail.map((o) => `${o.op}${o.emoji}`).join(" ") || "(empty)"}`,
115
+ reactionHistory.length,
116
+ `expected at least one reaction poll to show a reaction during the ` +
117
+ `turn, but all ${reactionHistory.length > 0 ? "polls returned nothing with emojis" : "polls returned empty"}. ` +
118
+ `History snapshots: ${reactionHistory.map((s) => `[${s.join(",")}]`).join(" ") || "(none)"}`,
121
119
  ).toBeGreaterThan(0);
122
120
 
123
- // L1 terminal: the LAST `+` op should be a terminal-done emoji.
124
- // Extra `-` ops after the final `+` are tolerated (Telegram
125
- // sometimes emits a bare clean-up `-`); the last `+` is what
126
- // the user actually sees.
127
- const lastAdd = adds[adds.length - 1];
121
+ // L1 terminal: the LAST non-empty snapshot should contain a
122
+ // terminal-done emoji. `setMessageReaction` replaces atomically,
123
+ // so the last snapshot holds whatever is currently on the message.
124
+ const lastSnapshot = reactionHistory[reactionHistory.length - 1];
125
+ // The bot uses setMessageReaction (replace, not append) — exactly one
126
+ // emoji should be set at any time. Assert the invariant so we catch
127
+ // accidental multi-emoji states, then check the terminal-done value.
128
+ expect(
129
+ lastSnapshot.length,
130
+ `expected exactly 1 reaction in the final snapshot, got [${lastSnapshot.join(",")}]. ` +
131
+ `All snapshots: ${reactionHistory.map((s) => `[${s.join(",")}]`).join(" ")}`,
132
+ ).toBe(1);
133
+ const lastEmoji = lastSnapshot[0];
128
134
  expect(
129
- TERMINAL_DONE_EMOJI.has(lastAdd.emoji),
130
- `expected last reaction-add to be one of ${[
135
+ TERMINAL_DONE_EMOJI.has(lastEmoji),
136
+ `expected last reaction to be one of ${[
131
137
  ...TERMINAL_DONE_EMOJI,
132
- ].join(", ")}, got ${lastAdd.emoji}. Full trail: ${trail
133
- .map((o) => `${o.op}${o.emoji}`)
134
- .join(" ")}`,
138
+ ].join(", ")}, got ${lastEmoji}. ` +
139
+ `All snapshots: ${reactionHistory.map((s) => `[${s.join(",")}]`).join(" ")}. ` +
140
+ `Unique emojis seen: ${uniqueSeen.join(", ") || "(none)"}`,
135
141
  ).toBe(true);
136
142
  } finally {
137
143
  await sc.tearDown();
@@ -11,6 +11,8 @@
11
11
  * monospace inline and avoid Telegram treating them as markdown.
12
12
  */
13
13
 
14
+ import { maskUsername } from "./demo-mask.js";
15
+
14
16
  export type AuthSummary = {
15
17
  authenticated: boolean;
16
18
  subscription_type: string | null;
@@ -198,10 +200,19 @@ const STATUS_DOT: Record<StatusProbeRow['status'], string> = {
198
200
  export function statusPairedText(params: {
199
201
  user: string;
200
202
  meta: AgentMetadata;
203
+ /**
204
+ * Demo mode (the `/status demo` suffix). When true the paired-user tag
205
+ * (`@handle` or numeric sender id) is run through `maskUsername` so a
206
+ * screen recording shows a stable fake `@demo_user…` handle instead of
207
+ * the operator's real Telegram identity. Off by default — the agent /
208
+ * model / health / audit topology below is NOT masked (out of scope).
209
+ */
210
+ demo?: boolean;
201
211
  }): string {
202
212
  const { user, meta } = params;
213
+ const shownUser = params.demo ? maskUsername(user) : user;
203
214
  const lines = [
204
- `Paired as ${escapeHtml(user)}.`,
215
+ `Paired as ${escapeHtml(shownUser)}.`,
205
216
  ``,
206
217
  `Agent: ${formatAgentLine(meta)}`,
207
218
  `Auth: ${formatAuthLine(meta.auth)}`,
@@ -327,6 +338,7 @@ export const TELEGRAM_MENU_COMMANDS = [
327
338
  { command: "effort", description: "Show or switch the reasoning effort" },
328
339
  { command: "doctor", description: "Health check (deps, services, MCP)" },
329
340
  { command: "usage", description: "Pro/Max plan quota (5h + 7d windows)" },
341
+ { command: "whoami", description: "This agent's sandbox: tools, MCP, vault key-names" },
330
342
  // Vault — secrets + capability grants. /vault is a top-level command
331
343
  // dispatching subcommands (list, get, set, delete, status, unlock, lock,
332
344
  // grant, grants). Surfaced in the menu so mobile users can tap-to-pick