switchroom 0.15.45 → 0.16.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-scheduler/index.js +122 -88
- package/dist/auth-broker/index.js +463 -177
- package/dist/cli/autoaccept-poll.js +4842 -35
- package/dist/cli/drive-write-pretool.mjs +17 -14
- package/dist/cli/notion-write-pretool.mjs +117 -86
- package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
- package/dist/cli/self-improve-stop.mjs +428 -0
- package/dist/cli/skill-validate-pretool.mjs +72 -72
- package/dist/cli/switchroom.js +3158 -1178
- package/dist/host-control/main.js +2833 -355
- package/dist/vault/approvals/kernel-server.js +7479 -7439
- package/dist/vault/broker/server.js +11312 -11272
- package/examples/minimal.yaml +1 -0
- package/examples/switchroom.yaml +1 -0
- package/package.json +3 -3
- package/profiles/_base/start.sh.hbs +88 -1
- package/profiles/_shared/execution-discipline.md.hbs +18 -0
- package/profiles/default/CLAUDE.md.hbs +0 -19
- package/telegram-plugin/.claude-plugin/plugin.json +2 -2
- package/telegram-plugin/answer-stream-flag.ts +12 -49
- package/telegram-plugin/answer-stream.ts +5 -150
- package/telegram-plugin/auth-snapshot-format.ts +280 -48
- package/telegram-plugin/auto-fallback-fleet.ts +44 -1
- package/telegram-plugin/context-exhaustion.ts +12 -0
- package/telegram-plugin/demo-mask.ts +154 -0
- package/telegram-plugin/dist/bridge/bridge.js +167 -124
- package/telegram-plugin/dist/gateway/gateway.js +3039 -1159
- package/telegram-plugin/dist/server.js +215 -172
- package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
- package/telegram-plugin/draft-stream.ts +47 -410
- package/telegram-plugin/final-answer-detect.ts +17 -12
- package/telegram-plugin/fleet-fallback-resume.ts +131 -0
- package/telegram-plugin/format.ts +56 -19
- package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
- package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
- package/telegram-plugin/gateway/auth-command.ts +70 -14
- package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
- package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
- package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
- package/telegram-plugin/gateway/current-turn-map.ts +188 -0
- package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
- package/telegram-plugin/gateway/effort-command.ts +8 -3
- package/telegram-plugin/gateway/emission-authority.ts +369 -0
- package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
- package/telegram-plugin/gateway/gateway.ts +1837 -291
- package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
- package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
- package/telegram-plugin/gateway/represent-guard.ts +72 -0
- package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
- package/telegram-plugin/gateway/status-surface-log.ts +14 -3
- package/telegram-plugin/history.ts +33 -11
- package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
- package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
- package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
- package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
- package/telegram-plugin/issues-card.ts +4 -0
- package/telegram-plugin/model-unavailable.ts +124 -0
- package/telegram-plugin/narrative-dedup.ts +69 -0
- package/telegram-plugin/over-ping-safety-net.ts +70 -4
- package/telegram-plugin/package.json +3 -3
- package/telegram-plugin/pending-work-progress.ts +12 -0
- package/telegram-plugin/permission-rule.ts +32 -5
- package/telegram-plugin/permission-title.ts +152 -9
- package/telegram-plugin/quota-check.ts +13 -0
- package/telegram-plugin/quota-watch.ts +135 -7
- package/telegram-plugin/registry/turns-schema.test.ts +24 -0
- package/telegram-plugin/registry/turns-schema.ts +9 -0
- package/telegram-plugin/runtime-metrics.ts +13 -0
- package/telegram-plugin/session-tail.ts +96 -11
- package/telegram-plugin/silence-poke.ts +170 -24
- package/telegram-plugin/slot-banner-driver.ts +3 -0
- package/telegram-plugin/status-no-truncate.ts +44 -0
- package/telegram-plugin/status-reactions.ts +20 -3
- package/telegram-plugin/stream-controller.ts +4 -23
- package/telegram-plugin/stream-reply-handler.ts +6 -24
- package/telegram-plugin/streaming-metrics.ts +91 -0
- package/telegram-plugin/subagent-watcher.ts +212 -66
- package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
- package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
- package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
- package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
- package/telegram-plugin/tests/answer-stream.test.ts +2 -411
- package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
- package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
- package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
- package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
- package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
- package/telegram-plugin/tests/demo-mask.test.ts +127 -0
- package/telegram-plugin/tests/draft-stream.test.ts +0 -827
- package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
- package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
- package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
- package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
- package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
- package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
- package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
- package/telegram-plugin/tests/feed-survival.test.ts +526 -0
- package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
- package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
- package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
- package/telegram-plugin/tests/history.test.ts +60 -0
- package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
- package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
- package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
- package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
- package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
- package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
- package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
- package/telegram-plugin/tests/permission-rule.test.ts +17 -0
- package/telegram-plugin/tests/permission-title.test.ts +206 -17
- package/telegram-plugin/tests/quota-watch.test.ts +252 -9
- package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
- package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
- package/telegram-plugin/tests/represent-guard.test.ts +162 -0
- package/telegram-plugin/tests/session-tail.test.ts +147 -3
- package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
- package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
- package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
- package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
- package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
- package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
- package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
- package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
- package/telegram-plugin/tests/telegram-format.test.ts +101 -6
- package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
- package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
- package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
- package/telegram-plugin/tests/tool-labels.test.ts +67 -0
- package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
- package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
- package/telegram-plugin/tests/welcome-text.test.ts +32 -3
- package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
- package/telegram-plugin/tool-activity-summary.ts +375 -58
- package/telegram-plugin/turn-liveness-floor.ts +240 -0
- package/telegram-plugin/uat/assertions.ts +115 -0
- package/telegram-plugin/uat/driver.ts +68 -0
- package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
- package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
- package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
- package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
- package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
- package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
- package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
- package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
- package/telegram-plugin/welcome-text.ts +13 -1
- package/telegram-plugin/worker-activity-feed.ts +157 -82
- package/telegram-plugin/draft-transport.ts +0 -122
- package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
- package/telegram-plugin/tests/draft-transport.test.ts +0 -211
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JTBD: "the reply is last" + "a conversational turn opens no card" — the
|
|
3
|
+
* CI-enforced form of the deterministic emission invariants (design
|
|
4
|
+
* `docs/message-emission-determinism.md` §11; #2556).
|
|
5
|
+
*
|
|
6
|
+
* Four cases, each pulled from server SEND-ORDER history (`driver.getHistory`)
|
|
7
|
+
* so a post-reply card that landed before any live observer started is still
|
|
8
|
+
* caught. The ordering assertion is the SCOPED one (§6): within a single
|
|
9
|
+
* foreground turn, no activity-card / worker-feed surface opens after that
|
|
10
|
+
* turn's reply — NOT a naive "answer has the max message_id" (that would
|
|
11
|
+
* false-positive on a legitimate later background / represent / error surface).
|
|
12
|
+
* `assertReplyIsLast` filters to the activity/answer lanes of the SAME turn.
|
|
13
|
+
*
|
|
14
|
+
* 1. Conversational, zero-tool ("Reply with only: pong") — NO activity card
|
|
15
|
+
* opens in this turn at all (lever 5 base case / G1, the triplication).
|
|
16
|
+
* 2. Tool-heavy (a REAL_WORK activity-surface prompt) — a card opened AND no
|
|
17
|
+
* card for this turn sits below the substantive reply (lever 1 / races
|
|
18
|
+
* A/B/E).
|
|
19
|
+
* 3. Short-pinging final ("Reply 'Done!' then write one memory") — the
|
|
20
|
+
* currently-reordering case; green only once lever 2 lands (G5).
|
|
21
|
+
* 4. Two-turn backstop — a prompt that ends a turn without a qualifying reply
|
|
22
|
+
* (forcing the silent-end re-prompt); no card opens below the final answer
|
|
23
|
+
* across the re-prompt boundary (G3/C). Needs `getHistory`.
|
|
24
|
+
*
|
|
25
|
+
* Runs under CI `uat-gate`; the full live MTProto run needs the test-harness
|
|
26
|
+
* agent + a vault session, so locally this self-skips green (no driver).
|
|
27
|
+
*/
|
|
28
|
+
import { describe, it, expect, beforeAll } from "vitest";
|
|
29
|
+
import { spinUp, type Scenario } from "../harness.js";
|
|
30
|
+
import {
|
|
31
|
+
assertReplyIsLast,
|
|
32
|
+
isAnswer,
|
|
33
|
+
isActivityFeedMessage,
|
|
34
|
+
isWorkerFeedMessage,
|
|
35
|
+
} from "../assertions.js";
|
|
36
|
+
import { collectTurn } from "../real-work-prompts.js";
|
|
37
|
+
import type { ObservedMessage } from "../driver.js";
|
|
38
|
+
|
|
39
|
+
/** Per-case overall budget. */
|
|
40
|
+
const TURN_BUDGET_MS = 130_000;
|
|
41
|
+
/** History pull depth — comfortably covers a multi-surface two-turn exchange. */
|
|
42
|
+
const HISTORY_LIMIT = 80;
|
|
43
|
+
|
|
44
|
+
describe("uat: reply-is-last + conversational-turn-opens-no-card (DM)", () => {
|
|
45
|
+
let sc: Scenario | null = null;
|
|
46
|
+
|
|
47
|
+
beforeAll(async () => {
|
|
48
|
+
try {
|
|
49
|
+
sc = await spinUp({ agent: "test-harness" });
|
|
50
|
+
await sc.driver.primeDialogs();
|
|
51
|
+
} catch (err) {
|
|
52
|
+
console.warn(
|
|
53
|
+
`[reply-is-last] no live driver — self-skipping green: ${(err as Error).message}`,
|
|
54
|
+
);
|
|
55
|
+
sc = null;
|
|
56
|
+
}
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
// Case 1 — conversational, zero-tool: NO activity card opens at all.
|
|
60
|
+
it(
|
|
61
|
+
"case 1: a conversational 0-tool turn opens NO activity card (lever 5 / G1)",
|
|
62
|
+
async () => {
|
|
63
|
+
if (sc == null) return; // self-skip green
|
|
64
|
+
const { driver, botUserId, driverUserId } = sc;
|
|
65
|
+
|
|
66
|
+
const obs = await collectTurn(
|
|
67
|
+
driver,
|
|
68
|
+
botUserId,
|
|
69
|
+
driverUserId,
|
|
70
|
+
"Reply with only this exact word and nothing else, using no tools at all: pong",
|
|
71
|
+
{ timeoutMs: TURN_BUDGET_MS, minAnswerChars: 1, settleMs: 8_000 },
|
|
72
|
+
);
|
|
73
|
+
|
|
74
|
+
expect(obs.answer, "the pong answer must land").not.toBeNull();
|
|
75
|
+
|
|
76
|
+
// Pull send-order history and confirm: no activity-card surface opened in
|
|
77
|
+
// this turn. We scope to surfaces at/after the answer's turn by reusing
|
|
78
|
+
// assertReplyIsLast, AND additionally assert the live collector saw no
|
|
79
|
+
// activity feed for this minimal turn.
|
|
80
|
+
const history = await driver.getHistory(botUserId, HISTORY_LIMIT);
|
|
81
|
+
assertReplyIsLast(history, driverUserId, { turn: obs.answer! });
|
|
82
|
+
|
|
83
|
+
// The strong G1 assertion: a 0-tool conversational turn must produce no
|
|
84
|
+
// activity feed at all (neither open nor edit).
|
|
85
|
+
expect(
|
|
86
|
+
obs.sawActivityFeed,
|
|
87
|
+
"a 0-tool conversational turn must not open an activity card (the triplication)",
|
|
88
|
+
).toBe(false);
|
|
89
|
+
},
|
|
90
|
+
TURN_BUDGET_MS + 30_000,
|
|
91
|
+
);
|
|
92
|
+
|
|
93
|
+
// Case 2 — tool-heavy: a card opens, but none below the substantive reply.
|
|
94
|
+
it(
|
|
95
|
+
"case 2: a tool-heavy turn opens a card but none below the reply (lever 1 / races A/B/E)",
|
|
96
|
+
async () => {
|
|
97
|
+
if (sc == null) return; // self-skip green
|
|
98
|
+
const { driver, botUserId, driverUserId } = sc;
|
|
99
|
+
|
|
100
|
+
const obs = await collectTurn(
|
|
101
|
+
driver,
|
|
102
|
+
botUserId,
|
|
103
|
+
driverUserId,
|
|
104
|
+
"Use your Bash tool to run `uname -a`, then tell me in one sentence what " +
|
|
105
|
+
"operating system this machine is running. Keep the answer substantive " +
|
|
106
|
+
"(a few sentences explaining what the output means).",
|
|
107
|
+
{ timeoutMs: TURN_BUDGET_MS, minAnswerChars: 60, settleMs: 8_000 },
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
if (obs.answer == null) {
|
|
111
|
+
console.warn("[reply-is-last] case 2 INCONCLUSIVE — no answer landed in budget.");
|
|
112
|
+
return;
|
|
113
|
+
}
|
|
114
|
+
if (!obs.sawActivityFeed && !obs.sawWorkerFeed) {
|
|
115
|
+
console.warn(
|
|
116
|
+
"[reply-is-last] case 2 INCONCLUSIVE — the agent answered without a " +
|
|
117
|
+
"tool feed; the reorder vector this guards was not exercised.",
|
|
118
|
+
);
|
|
119
|
+
return;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const history = await driver.getHistory(botUserId, HISTORY_LIMIT);
|
|
123
|
+
// The scoped invariant: no activity/worker-feed surface for this turn
|
|
124
|
+
// lands below the substantive reply.
|
|
125
|
+
assertReplyIsLast(history, driverUserId, { turn: obs.answer });
|
|
126
|
+
},
|
|
127
|
+
TURN_BUDGET_MS + 30_000,
|
|
128
|
+
);
|
|
129
|
+
|
|
130
|
+
// Case 3 — short-pinging final: the case the lever-2 ordering fix makes pass.
|
|
131
|
+
it(
|
|
132
|
+
"case 3: a short-pinging final stays last even with post-reply tool work (lever 2 / G5)",
|
|
133
|
+
async () => {
|
|
134
|
+
if (sc == null) return; // self-skip green
|
|
135
|
+
const { driver, botUserId, driverUserId } = sc;
|
|
136
|
+
|
|
137
|
+
const obs = await collectTurn(
|
|
138
|
+
driver,
|
|
139
|
+
botUserId,
|
|
140
|
+
driverUserId,
|
|
141
|
+
"Reply with only the single word 'Done!' (with the exclamation mark) — " +
|
|
142
|
+
"then, AFTER that reply, save a one-line memory noting you completed " +
|
|
143
|
+
"this test. The short reply is your final answer.",
|
|
144
|
+
// The "Done!" reply is short; accept it as the answer.
|
|
145
|
+
{ timeoutMs: TURN_BUDGET_MS, minAnswerChars: 1, settleMs: 10_000 },
|
|
146
|
+
);
|
|
147
|
+
|
|
148
|
+
if (obs.answer == null) {
|
|
149
|
+
console.warn("[reply-is-last] case 3 INCONCLUSIVE — no answer landed in budget.");
|
|
150
|
+
return;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
const history = await driver.getHistory(botUserId, HISTORY_LIMIT);
|
|
154
|
+
// The post-'Done!' memory write must NOT have reopened a card BELOW the
|
|
155
|
+
// reply. Before lever 2 this reorders (the named G5 residual); after, the
|
|
156
|
+
// card is finalized before the send and stays above.
|
|
157
|
+
assertReplyIsLast(history, driverUserId, { turn: obs.answer });
|
|
158
|
+
},
|
|
159
|
+
TURN_BUDGET_MS + 30_000,
|
|
160
|
+
);
|
|
161
|
+
|
|
162
|
+
// Case 4 — two-turn backstop: no card below the final answer across the
|
|
163
|
+
// silent-end re-prompt boundary.
|
|
164
|
+
it(
|
|
165
|
+
"case 4: no card opens below the final answer across a re-prompt boundary (G3/C)",
|
|
166
|
+
async () => {
|
|
167
|
+
if (sc == null) return; // self-skip green
|
|
168
|
+
const { driver, botUserId, driverUserId } = sc;
|
|
169
|
+
|
|
170
|
+
// A prompt that nudges the model to write its answer as prose first
|
|
171
|
+
// (no reply tool) — forcing the silent-end re-prompt, then a real answer
|
|
172
|
+
// on the re-prompted turn. The model's exact path isn't forceable, so the
|
|
173
|
+
// ordering assertion is the durable part; the re-prompt is best-effort.
|
|
174
|
+
const obs = await collectTurn(
|
|
175
|
+
driver,
|
|
176
|
+
botUserId,
|
|
177
|
+
driverUserId,
|
|
178
|
+
"Think out loud briefly, then give me a thorough multi-sentence answer " +
|
|
179
|
+
"(at least 220 characters) explaining what a Telegram supergroup is and " +
|
|
180
|
+
"how forum topics work inside one.",
|
|
181
|
+
{ timeoutMs: TURN_BUDGET_MS, minAnswerChars: 200, settleMs: 12_000 },
|
|
182
|
+
);
|
|
183
|
+
|
|
184
|
+
if (obs.answer == null) {
|
|
185
|
+
console.warn("[reply-is-last] case 4 INCONCLUSIVE — no answer landed in budget.");
|
|
186
|
+
return;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// Pull full send-order history (the re-prompt may have produced a second
|
|
190
|
+
// card before the live observer in collectTurn caught it) and assert the
|
|
191
|
+
// final answer's turn has no feed surface below it.
|
|
192
|
+
const history = await driver.getHistory(botUserId, HISTORY_LIMIT);
|
|
193
|
+
assertReplyIsLast(history, driverUserId, { turn: obs.answer });
|
|
194
|
+
|
|
195
|
+
// Sanity: the answer is a genuine answer-lane message (not a feed).
|
|
196
|
+
expect(isAnswer(obs.answer, driverUserId)).toBe(true);
|
|
197
|
+
expect(isActivityFeedMessage(obs.answer)).toBe(false);
|
|
198
|
+
expect(isWorkerFeedMessage(obs.answer)).toBe(false);
|
|
199
|
+
},
|
|
200
|
+
TURN_BUDGET_MS + 30_000,
|
|
201
|
+
);
|
|
202
|
+
});
|
|
@@ -10,36 +10,27 @@
|
|
|
10
10
|
* of the bot's final reply — otherwise the user looks at their
|
|
11
11
|
* inbound, sees it still wearing 🤔, and asks "you done?").
|
|
12
12
|
*
|
|
13
|
-
* History: this scenario was previously `describe.skip` with a
|
|
14
|
-
* rationale that the pinned progress card "renders INSTEAD of
|
|
15
|
-
* reactions". The card was retired in #1126; the card-vs-reaction
|
|
16
|
-
* branch in the gateway is dead. We can now exercise the full
|
|
17
|
-
* lifecycle end-to-end without the two-agent split.
|
|
18
|
-
*
|
|
19
13
|
* What we assert (in priority order):
|
|
20
14
|
*
|
|
21
|
-
* 1. Within the turn, the
|
|
22
|
-
*
|
|
23
|
-
*
|
|
24
|
-
*
|
|
15
|
+
* 1. Within the turn, the bot places AT LEAST ONE reaction on the
|
|
16
|
+
* inbound message (the L1 "I'm alive" signal). We poll via
|
|
17
|
+
* `driver.pollReactions()` rather than subscribing to push
|
|
18
|
+
* events — Telegram does not deliver `updateMessageReactions`
|
|
19
|
+
* push events to the human account when a bot sets a reaction
|
|
20
|
+
* in a DM (fixes #2502).
|
|
25
21
|
* 2. By the time the bot has sent a final reply (+ a short tail
|
|
26
|
-
* for Telegram to
|
|
27
|
-
*
|
|
28
|
-
*
|
|
29
|
-
* Why "last `+` op wins" rather than `expectReaction(['👍'])` with
|
|
30
|
-
* a literal sequence: `setMessageReaction` REPLACES the prior emoji
|
|
31
|
-
* atomically. mtcute's update stream can deliver the replace as a
|
|
32
|
-
* `-prev` followed by a `+next`, or as a single coalesced event,
|
|
33
|
-
* depending on server batching. The "last add wins" shape matches
|
|
34
|
-
* the production semantics — whatever's *currently* on the message
|
|
35
|
-
* is what the user actually sees.
|
|
22
|
+
* for Telegram to apply the terminal-emoji replace), the reaction
|
|
23
|
+
* on the inbound message is in the `done` set (`👍 / 💯 / 🎉`).
|
|
36
24
|
*
|
|
37
|
-
*
|
|
38
|
-
*
|
|
39
|
-
*
|
|
40
|
-
*
|
|
41
|
-
*
|
|
42
|
-
*
|
|
25
|
+
* Polling strategy:
|
|
26
|
+
* - Poll every `POLL_INTERVAL_MS` until a terminal-done emoji
|
|
27
|
+
* appears OR the bot has replied AND `TAIL_AFTER_REPLY_MS` has
|
|
28
|
+
* elapsed. Bail immediately on reply-timeout so CI doesn't burn
|
|
29
|
+
* the full 90s safety ceiling.
|
|
30
|
+
* - After the reply arrives, keep polling through the tail window
|
|
31
|
+
* so the terminal emoji (👍) has time to replace the working
|
|
32
|
+
* emoji (👀/🤔). In practice the replace happens within 1-2s
|
|
33
|
+
* of the reply on a healthy bot; the 8s ceiling absorbs jitter.
|
|
43
34
|
*
|
|
44
35
|
* Requires the same env as `smoke-dm-reply.test.ts` (see
|
|
45
36
|
* `uat/SETUP.md` §6).
|
|
@@ -49,16 +40,11 @@ import { describe, expect, it } from "vitest";
|
|
|
49
40
|
import { spinUp } from "../harness.js";
|
|
50
41
|
|
|
51
42
|
const TERMINAL_DONE_EMOJI = new Set(["👍", "💯", "🎉"]);
|
|
43
|
+
const POLL_INTERVAL_MS = 1_000;
|
|
52
44
|
const TAIL_AFTER_REPLY_MS = 8_000;
|
|
53
45
|
|
|
54
46
|
const INBOUND = (): string => `uat-reactions ${new Date().toISOString()}`;
|
|
55
47
|
|
|
56
|
-
interface ObservedOp {
|
|
57
|
-
emoji: string;
|
|
58
|
-
op: "+" | "-";
|
|
59
|
-
at: number;
|
|
60
|
-
}
|
|
61
|
-
|
|
62
48
|
describe("uat: reaction lifecycle on driver DM", () => {
|
|
63
49
|
it(
|
|
64
50
|
"driver sees an alive reaction, then a terminal-done emoji by reply tail",
|
|
@@ -67,71 +53,91 @@ describe("uat: reaction lifecycle on driver DM", () => {
|
|
|
67
53
|
try {
|
|
68
54
|
const sent = await sc.sendDM(INBOUND());
|
|
69
55
|
|
|
70
|
-
//
|
|
71
|
-
//
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
let
|
|
77
|
-
let stopPump = false;
|
|
78
|
-
pump = (async () => {
|
|
79
|
-
while (!stopPump) {
|
|
80
|
-
const next = await iter.next();
|
|
81
|
-
if (next.done === true) return;
|
|
82
|
-
trail.push({
|
|
83
|
-
emoji: next.value.emoji,
|
|
84
|
-
op: next.value.op,
|
|
85
|
-
at: Date.now(),
|
|
86
|
-
});
|
|
87
|
-
}
|
|
88
|
-
})();
|
|
56
|
+
// Poll the reaction state on the sent message. We use polling
|
|
57
|
+
// rather than `observeReactions` because Telegram does not
|
|
58
|
+
// deliver `updateMessageReactions` push updates to user accounts
|
|
59
|
+
// when a bot sets a reaction in a DM — see module docblock.
|
|
60
|
+
const reactionHistory: string[][] = [];
|
|
61
|
+
let replyReceived = false;
|
|
62
|
+
let replyReceivedAt = 0;
|
|
89
63
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
64
|
+
// Wait for the bot's reply (any content). We start polling
|
|
65
|
+
// concurrently so we capture intermediate reactions during
|
|
66
|
+
// the turn.
|
|
67
|
+
let replyTimedOut = false;
|
|
68
|
+
const replyPromise = sc
|
|
69
|
+
.expectMessage(/\S/, { from: "bot", timeout: 60_000 })
|
|
70
|
+
.then((reply) => {
|
|
71
|
+
expect(reply.text.length).toBeGreaterThan(0);
|
|
72
|
+
replyReceived = true;
|
|
73
|
+
replyReceivedAt = Date.now();
|
|
74
|
+
return reply;
|
|
75
|
+
})
|
|
76
|
+
.catch((err: unknown) => {
|
|
77
|
+
// expectMessage timeout → mark so the poll loop exits immediately
|
|
78
|
+
// instead of burning the full 90s safety ceiling on CI failure.
|
|
79
|
+
replyTimedOut = true;
|
|
80
|
+
throw err;
|
|
96
81
|
});
|
|
97
|
-
expect(reply.text.length).toBeGreaterThan(0);
|
|
98
82
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
83
|
+
// Polling loop: sample reactions while waiting for the reply,
|
|
84
|
+
// then continue for TAIL_AFTER_REPLY_MS after the reply lands.
|
|
85
|
+
const poll = async (): Promise<void> => {
|
|
86
|
+
const deadline = Date.now() + 90_000; // safety ceiling
|
|
87
|
+
while (Date.now() < deadline) {
|
|
88
|
+
// Bail early if the reply timed out — no point polling further.
|
|
89
|
+
if (replyTimedOut) break;
|
|
90
|
+
const emojis = await sc.driver.pollReactions(
|
|
91
|
+
sc.botUserId,
|
|
92
|
+
sent.messageId,
|
|
93
|
+
);
|
|
94
|
+
if (emojis.length > 0) {
|
|
95
|
+
reactionHistory.push([...emojis]);
|
|
96
|
+
}
|
|
97
|
+
if (
|
|
98
|
+
replyReceived &&
|
|
99
|
+
Date.now() - replyReceivedAt >= TAIL_AFTER_REPLY_MS
|
|
100
|
+
) {
|
|
101
|
+
break;
|
|
102
|
+
}
|
|
103
|
+
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
|
|
112
104
|
}
|
|
113
|
-
}
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
// Run both concurrently; wait for both to settle.
|
|
108
|
+
await Promise.all([replyPromise, poll()]);
|
|
114
109
|
|
|
115
|
-
// L1 alive signal: at least one
|
|
116
|
-
|
|
110
|
+
// L1 alive signal: at least one non-empty reaction set was
|
|
111
|
+
// observed during the turn.
|
|
112
|
+
const allSeen = reactionHistory.flat();
|
|
113
|
+
const uniqueSeen = [...new Set(allSeen)];
|
|
117
114
|
expect(
|
|
118
|
-
|
|
119
|
-
`expected at least one reaction
|
|
120
|
-
`
|
|
115
|
+
reactionHistory.length,
|
|
116
|
+
`expected at least one reaction poll to show a reaction during the ` +
|
|
117
|
+
`turn, but all ${reactionHistory.length > 0 ? "polls returned nothing with emojis" : "polls returned empty"}. ` +
|
|
118
|
+
`History snapshots: ${reactionHistory.map((s) => `[${s.join(",")}]`).join(" ") || "(none)"}`,
|
|
121
119
|
).toBeGreaterThan(0);
|
|
122
120
|
|
|
123
|
-
// L1 terminal: the LAST
|
|
124
|
-
//
|
|
125
|
-
//
|
|
126
|
-
|
|
127
|
-
|
|
121
|
+
// L1 terminal: the LAST non-empty snapshot should contain a
|
|
122
|
+
// terminal-done emoji. `setMessageReaction` replaces atomically,
|
|
123
|
+
// so the last snapshot holds whatever is currently on the message.
|
|
124
|
+
const lastSnapshot = reactionHistory[reactionHistory.length - 1];
|
|
125
|
+
// The bot uses setMessageReaction (replace, not append) — exactly one
|
|
126
|
+
// emoji should be set at any time. Assert the invariant so we catch
|
|
127
|
+
// accidental multi-emoji states, then check the terminal-done value.
|
|
128
|
+
expect(
|
|
129
|
+
lastSnapshot.length,
|
|
130
|
+
`expected exactly 1 reaction in the final snapshot, got [${lastSnapshot.join(",")}]. ` +
|
|
131
|
+
`All snapshots: ${reactionHistory.map((s) => `[${s.join(",")}]`).join(" ")}`,
|
|
132
|
+
).toBe(1);
|
|
133
|
+
const lastEmoji = lastSnapshot[0];
|
|
128
134
|
expect(
|
|
129
|
-
TERMINAL_DONE_EMOJI.has(
|
|
130
|
-
`expected last reaction
|
|
135
|
+
TERMINAL_DONE_EMOJI.has(lastEmoji),
|
|
136
|
+
`expected last reaction to be one of ${[
|
|
131
137
|
...TERMINAL_DONE_EMOJI,
|
|
132
|
-
].join(", ")}, got ${
|
|
133
|
-
.map((
|
|
134
|
-
.join(" ")}`,
|
|
138
|
+
].join(", ")}, got ${lastEmoji}. ` +
|
|
139
|
+
`All snapshots: ${reactionHistory.map((s) => `[${s.join(",")}]`).join(" ")}. ` +
|
|
140
|
+
`Unique emojis seen: ${uniqueSeen.join(", ") || "(none)"}`,
|
|
135
141
|
).toBe(true);
|
|
136
142
|
} finally {
|
|
137
143
|
await sc.tearDown();
|
|
@@ -11,6 +11,8 @@
|
|
|
11
11
|
* monospace inline and avoid Telegram treating them as markdown.
|
|
12
12
|
*/
|
|
13
13
|
|
|
14
|
+
import { maskUsername } from "./demo-mask.js";
|
|
15
|
+
|
|
14
16
|
export type AuthSummary = {
|
|
15
17
|
authenticated: boolean;
|
|
16
18
|
subscription_type: string | null;
|
|
@@ -198,10 +200,19 @@ const STATUS_DOT: Record<StatusProbeRow['status'], string> = {
|
|
|
198
200
|
export function statusPairedText(params: {
|
|
199
201
|
user: string;
|
|
200
202
|
meta: AgentMetadata;
|
|
203
|
+
/**
|
|
204
|
+
* Demo mode (the `/status demo` suffix). When true the paired-user tag
|
|
205
|
+
* (`@handle` or numeric sender id) is run through `maskUsername` so a
|
|
206
|
+
* screen recording shows a stable fake `@demo_user…` handle instead of
|
|
207
|
+
* the operator's real Telegram identity. Off by default — the agent /
|
|
208
|
+
* model / health / audit topology below is NOT masked (out of scope).
|
|
209
|
+
*/
|
|
210
|
+
demo?: boolean;
|
|
201
211
|
}): string {
|
|
202
212
|
const { user, meta } = params;
|
|
213
|
+
const shownUser = params.demo ? maskUsername(user) : user;
|
|
203
214
|
const lines = [
|
|
204
|
-
`Paired as ${escapeHtml(
|
|
215
|
+
`Paired as ${escapeHtml(shownUser)}.`,
|
|
205
216
|
``,
|
|
206
217
|
`Agent: ${formatAgentLine(meta)}`,
|
|
207
218
|
`Auth: ${formatAuthLine(meta.auth)}`,
|
|
@@ -327,6 +338,7 @@ export const TELEGRAM_MENU_COMMANDS = [
|
|
|
327
338
|
{ command: "effort", description: "Show or switch the reasoning effort" },
|
|
328
339
|
{ command: "doctor", description: "Health check (deps, services, MCP)" },
|
|
329
340
|
{ command: "usage", description: "Pro/Max plan quota (5h + 7d windows)" },
|
|
341
|
+
{ command: "whoami", description: "This agent's sandbox: tools, MCP, vault key-names" },
|
|
330
342
|
// Vault — secrets + capability grants. /vault is a top-level command
|
|
331
343
|
// dispatching subcommands (list, get, set, delete, status, unlock, lock,
|
|
332
344
|
// grant, grants). Surfaced in the menu so mobile users can tap-to-pick
|