switchroom 0.15.45 → 0.16.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. package/dist/agent-scheduler/index.js +122 -88
  2. package/dist/auth-broker/index.js +463 -177
  3. package/dist/cli/autoaccept-poll.js +4842 -35
  4. package/dist/cli/drive-write-pretool.mjs +17 -14
  5. package/dist/cli/notion-write-pretool.mjs +117 -86
  6. package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
  7. package/dist/cli/self-improve-stop.mjs +428 -0
  8. package/dist/cli/skill-validate-pretool.mjs +72 -72
  9. package/dist/cli/switchroom.js +3158 -1178
  10. package/dist/host-control/main.js +2833 -355
  11. package/dist/vault/approvals/kernel-server.js +7479 -7439
  12. package/dist/vault/broker/server.js +11312 -11272
  13. package/examples/minimal.yaml +1 -0
  14. package/examples/switchroom.yaml +1 -0
  15. package/package.json +3 -3
  16. package/profiles/_base/start.sh.hbs +88 -1
  17. package/profiles/_shared/execution-discipline.md.hbs +18 -0
  18. package/profiles/default/CLAUDE.md.hbs +0 -19
  19. package/telegram-plugin/.claude-plugin/plugin.json +2 -2
  20. package/telegram-plugin/answer-stream-flag.ts +12 -49
  21. package/telegram-plugin/answer-stream.ts +5 -150
  22. package/telegram-plugin/auth-snapshot-format.ts +280 -48
  23. package/telegram-plugin/auto-fallback-fleet.ts +44 -1
  24. package/telegram-plugin/context-exhaustion.ts +12 -0
  25. package/telegram-plugin/demo-mask.ts +154 -0
  26. package/telegram-plugin/dist/bridge/bridge.js +167 -124
  27. package/telegram-plugin/dist/gateway/gateway.js +3039 -1159
  28. package/telegram-plugin/dist/server.js +215 -172
  29. package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
  30. package/telegram-plugin/draft-stream.ts +47 -410
  31. package/telegram-plugin/final-answer-detect.ts +17 -12
  32. package/telegram-plugin/fleet-fallback-resume.ts +131 -0
  33. package/telegram-plugin/format.ts +56 -19
  34. package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
  35. package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
  36. package/telegram-plugin/gateway/auth-command.ts +70 -14
  37. package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
  38. package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
  39. package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
  40. package/telegram-plugin/gateway/current-turn-map.ts +188 -0
  41. package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
  42. package/telegram-plugin/gateway/effort-command.ts +8 -3
  43. package/telegram-plugin/gateway/emission-authority.ts +369 -0
  44. package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
  45. package/telegram-plugin/gateway/gateway.ts +1837 -291
  46. package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
  47. package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
  48. package/telegram-plugin/gateway/represent-guard.ts +72 -0
  49. package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
  50. package/telegram-plugin/gateway/status-surface-log.ts +14 -3
  51. package/telegram-plugin/history.ts +33 -11
  52. package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
  53. package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
  54. package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
  55. package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
  56. package/telegram-plugin/issues-card.ts +4 -0
  57. package/telegram-plugin/model-unavailable.ts +124 -0
  58. package/telegram-plugin/narrative-dedup.ts +69 -0
  59. package/telegram-plugin/over-ping-safety-net.ts +70 -4
  60. package/telegram-plugin/package.json +3 -3
  61. package/telegram-plugin/pending-work-progress.ts +12 -0
  62. package/telegram-plugin/permission-rule.ts +32 -5
  63. package/telegram-plugin/permission-title.ts +152 -9
  64. package/telegram-plugin/quota-check.ts +13 -0
  65. package/telegram-plugin/quota-watch.ts +135 -7
  66. package/telegram-plugin/registry/turns-schema.test.ts +24 -0
  67. package/telegram-plugin/registry/turns-schema.ts +9 -0
  68. package/telegram-plugin/runtime-metrics.ts +13 -0
  69. package/telegram-plugin/session-tail.ts +96 -11
  70. package/telegram-plugin/silence-poke.ts +170 -24
  71. package/telegram-plugin/slot-banner-driver.ts +3 -0
  72. package/telegram-plugin/status-no-truncate.ts +44 -0
  73. package/telegram-plugin/status-reactions.ts +20 -3
  74. package/telegram-plugin/stream-controller.ts +4 -23
  75. package/telegram-plugin/stream-reply-handler.ts +6 -24
  76. package/telegram-plugin/streaming-metrics.ts +91 -0
  77. package/telegram-plugin/subagent-watcher.ts +212 -66
  78. package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
  79. package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
  80. package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
  81. package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
  82. package/telegram-plugin/tests/answer-stream.test.ts +2 -411
  83. package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
  84. package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
  85. package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
  86. package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
  87. package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
  88. package/telegram-plugin/tests/demo-mask.test.ts +127 -0
  89. package/telegram-plugin/tests/draft-stream.test.ts +0 -827
  90. package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
  91. package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
  92. package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
  93. package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
  94. package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
  95. package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
  96. package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
  97. package/telegram-plugin/tests/feed-survival.test.ts +526 -0
  98. package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
  99. package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
  100. package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
  101. package/telegram-plugin/tests/history.test.ts +60 -0
  102. package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
  103. package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
  104. package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
  105. package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
  106. package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
  107. package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
  108. package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
  109. package/telegram-plugin/tests/permission-rule.test.ts +17 -0
  110. package/telegram-plugin/tests/permission-title.test.ts +206 -17
  111. package/telegram-plugin/tests/quota-watch.test.ts +252 -9
  112. package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
  113. package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
  114. package/telegram-plugin/tests/represent-guard.test.ts +162 -0
  115. package/telegram-plugin/tests/session-tail.test.ts +147 -3
  116. package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
  117. package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
  118. package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
  119. package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
  120. package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
  121. package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
  122. package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
  123. package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
  124. package/telegram-plugin/tests/telegram-format.test.ts +101 -6
  125. package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
  126. package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
  127. package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
  128. package/telegram-plugin/tests/tool-labels.test.ts +67 -0
  129. package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
  130. package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
  131. package/telegram-plugin/tests/welcome-text.test.ts +32 -3
  132. package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
  133. package/telegram-plugin/tool-activity-summary.ts +375 -58
  134. package/telegram-plugin/turn-liveness-floor.ts +240 -0
  135. package/telegram-plugin/uat/assertions.ts +115 -0
  136. package/telegram-plugin/uat/driver.ts +68 -0
  137. package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
  138. package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
  139. package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
  140. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
  141. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
  142. package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
  143. package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
  144. package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
  145. package/telegram-plugin/welcome-text.ts +13 -1
  146. package/telegram-plugin/worker-activity-feed.ts +157 -82
  147. package/telegram-plugin/draft-transport.ts +0 -122
  148. package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
  149. package/telegram-plugin/tests/draft-transport.test.ts +0 -211
@@ -0,0 +1,240 @@
1
+ /**
2
+ * turn-liveness-floor.ts — the mid-turn liveness floor decision (issue #2527).
3
+ *
4
+ * The conversational-pacing safety net (`silence-poke.ts`) only ever sent a
5
+ * user-visible TEXT signal at its 300s framework fallback, and DEFERRED even
6
+ * that while the agent was "legitimately working" (an in-flight tool) on the
7
+ * rationale that the live activity feed renders the work. But the feed only
8
+ * exists for BACKGROUND sub-agents — a FOREGROUND turn grinding through
9
+ * minutes of silent Bash/Read/restart calls has no feed, so the user sees
10
+ * only the ambient 👀 and reasonably reads it as "done" (the documented
11
+ * #2527 evidence: a 6-minute silent diagnose with "Status?" asked twice).
12
+ *
13
+ * This module is the missing floor: a code-owned, fire-once-per-turn interim
14
+ * that fires PRECISELY BECAUSE the turn is busy-but-silent — the exact
15
+ * inversion of silence-poke's "busy ⇒ suppress" defer. It is a pure decision
16
+ * (the gateway owns the actual send, through the same path a model reply
17
+ * takes) so the policy is unit-testable in isolation.
18
+ *
19
+ * Design contract: `reference/rfcs/turn-liveness-primitive.md`.
20
+ * Job: `reference/jobs/know-what-my-agent-is-doing.md`.
21
+ *
22
+ * Keyed on LOOP ROLE, never chat type — so a DM and a forum-supergroup
23
+ * topic get identical guarantees (surface parity by construction), and the
24
+ * floor binds only the `user` role (a `system`/cron turn's silence is
25
+ * legitimate; a `sub-agent`'s liveness is carried by the parent turn).
26
+ */
27
+
28
+ /**
29
+ * The single turn-provenance discriminator. Stamped once at enqueue and read
30
+ * everywhere — replaces the scattered `chatType !== 'private'` /
31
+ * `chatId == null` / `source === 'cron'` predicates. A research worker and a
32
+ * nested sub-agent are BOTH `sub-agent`: new agent *types* are not new roles.
33
+ */
34
+ export type LoopRole = 'user' | 'sub-agent' | 'system'
35
+
36
+ /**
37
+ * Enqueue `source` values that mark a turn as system-initiated (no human is
38
+ * waiting at that instant, so silence is legitimate). Everything else —
39
+ * including a plain human DM (no source) and a sub-agent handback that
40
+ * continues user-facing work — is `user` and owes the never-silent guarantee.
41
+ *
42
+ * Conservative by design: only known scheduled/wake sources are `system`, so
43
+ * a novel source defaults to `user` (gets liveness) rather than silently
44
+ * opting out of the floor.
45
+ */
46
+ const SYSTEM_SOURCES = new Set(['cron', 'wake', 'schedule', 'scheduler', 'timer', 'heartbeat'])
47
+
48
+ /**
49
+ * Derive the loop role for a MAIN-session turn from its enqueue envelope
50
+ * (`<channel ... source="cron" ...>`). The gateway never creates a turn atom
51
+ * for a sub-agent — those terminate on `SubagentStop` — so this returns only
52
+ * `user` | `system`. Parsing mirrors `silent-end-scan.mjs:parseChannelEnvelope`.
53
+ */
54
+ export function deriveTurnRole(rawContent: string | null | undefined): Exclude<LoopRole, 'sub-agent'> {
55
+ if (typeof rawContent !== 'string') return 'user'
56
+ const m = rawContent.match(/<channel[^>]*\bsource="([^"]+)"/)
57
+ const source = m ? m[1] : null
58
+ if (source != null && SYSTEM_SOURCES.has(source)) return 'system'
59
+ return 'user'
60
+ }
61
+
62
+ export interface MidTurnFloorInput {
63
+ /** Kill switch — `midTurnFloorEnabled()` resolved by the caller. */
64
+ enabled: boolean
65
+ /** The turn's loop role. The floor binds ONLY `user`. */
66
+ role: LoopRole
67
+ /** Whether a substantive answer has already reached the user this turn. */
68
+ finalAnswerDelivered: boolean
69
+ /** ms since the last user-visible outbound (or turn start if none). */
70
+ silenceMs: number
71
+ /** Floor threshold — fire at/after this much silence (default 45s). */
72
+ floorThresholdMs: number
73
+ /** The 300s fallback threshold — above it, the fallback owns the beat. */
74
+ fallbackThresholdMs: number
75
+ /** Whether the agent is demonstrably working (in-flight tool / dispatched
76
+ * sub-agent / open ask_user). The floor fires only when working — a
77
+ * genuinely silent/wedged turn is the 300s fallback's job. */
78
+ legitimatelyWorking: boolean
79
+ /** Whether the floor has already fired once this turn (fire-once latch). */
80
+ alreadyFired: boolean
81
+ /** When true (a user "Status?" mid-turn inbound), bypass the threshold and
82
+ * the working check — the user explicitly asked, so answer immediately —
83
+ * but still honour role / delivery / fire-once / enabled. */
84
+ force?: boolean
85
+ }
86
+
87
+ export type MidTurnFloorDecision =
88
+ | { kind: 'fire' }
89
+ | { kind: 'skip'; reason: string }
90
+
91
+ /**
92
+ * Pure decision: should the mid-turn liveness floor fire now?
93
+ *
94
+ * Fires exactly once per `user` turn when the turn has been silently working
95
+ * past the floor threshold (and below the 300s fallback window), OR
96
+ * immediately on a forced "Status?" poke. Skips with a machine-readable
97
+ * reason otherwise so the decision is observable in telemetry.
98
+ */
99
+ export function decideMidTurnFloor(input: MidTurnFloorInput): MidTurnFloorDecision {
100
+ if (!input.enabled) return { kind: 'skip', reason: 'disabled' }
101
+ if (input.alreadyFired) return { kind: 'skip', reason: 'already-fired' }
102
+ // The floor binds the user role only — system/cron silence is legitimate,
103
+ // and a sub-agent's liveness is carried by its parent turn.
104
+ if (input.role !== 'user') return { kind: 'skip', reason: 'non-user-role' }
105
+ // The user already saw a real answer — no floor needed.
106
+ if (input.finalAnswerDelivered) return { kind: 'skip', reason: 'answer-delivered' }
107
+
108
+ // A forced poke (user asked "Status?") short-circuits timing + working.
109
+ if (input.force === true) return { kind: 'fire' }
110
+
111
+ if (input.silenceMs < input.floorThresholdMs) return { kind: 'skip', reason: 'below-threshold' }
112
+ // At/above the 300s window the loud fallback owns the beat; the floor
113
+ // is the quiet early one.
114
+ if (input.silenceMs >= input.fallbackThresholdMs) return { kind: 'skip', reason: 'fallback-window' }
115
+ // Only fire when the turn is demonstrably busy: a genuinely silent turn is
116
+ // a wedge, which is the 300s fallback's job (it unwedges; the floor does not).
117
+ if (!input.legitimatelyWorking) return { kind: 'skip', reason: 'not-working' }
118
+ return { kind: 'fire' }
119
+ }
120
+
121
+ export interface TerminalReasonInput {
122
+ /** Kill switch for the role-aware terminal honesty (#2527). */
123
+ enabled: boolean
124
+ /** The turn's loop role. */
125
+ role: LoopRole
126
+ /** Whether a substantive answer reached the user this turn. */
127
+ finalAnswerDelivered: boolean
128
+ }
129
+
130
+ /**
131
+ * Pure decision: which terminal reaction a turn finalizes to.
132
+ *
133
+ * `'undelivered'` (😐, the gentle non-celebratory terminal) ONLY when a
134
+ * `user` turn ends without a delivered answer and the honesty gate is on —
135
+ * the #2527 "thumbs-up false done" fix. Everything else (a delivered user
136
+ * turn, any system/sub-agent turn, or the gate disabled) is `'done'` (👍).
137
+ * A `system`/cron turn's silence is legitimate, so it keeps 👍.
138
+ */
139
+ export function decideTerminalReason(input: TerminalReasonInput): 'done' | 'undelivered' {
140
+ if (!input.enabled) return 'done'
141
+ if (input.role !== 'user') return 'done'
142
+ if (input.finalAnswerDelivered) return 'done'
143
+ return 'undelivered'
144
+ }
145
+
146
+ /** Default floor threshold — fire a first liveness beat after this much
147
+ * busy-silence. 45s is comfortably past a normal short turn and well under
148
+ * the 300s wedge fallback. Overridable via env at the gateway. */
149
+ export const DEFAULT_FLOOR_THRESHOLD_MS = 45_000
150
+
151
+ /**
152
+ * Kill switch for the mid-turn floor. Default ON; set
153
+ * `SWITCHROOM_TG_LIVENESS_FLOOR` to `0`/`false`/`off`/`no` to disable without
154
+ * a rebuild. Re-read every call so tests can toggle env without reloading.
155
+ */
156
+ export function midTurnFloorEnabled(): boolean {
157
+ const v = process.env.SWITCHROOM_TG_LIVENESS_FLOOR
158
+ if (v == null) return true
159
+ const t = v.trim().toLowerCase()
160
+ return !(t === '0' || t === 'false' || t === 'off' || t === 'no')
161
+ }
162
+
163
+ /**
164
+ * Parse a positive-integer ms window from an env value, or 0 when unset / empty /
165
+ * non-numeric / 0 / negative.
166
+ *
167
+ * Now backs the post-answer background-agent liveness STALENESS CAP
168
+ * (`SWITCHROOM_POST_ANSWER_LIVENESS_STALE_MS`): the gateway reads
169
+ * `parsePostAnswerLivenessMs(env) || 30_000`, so 0 (unset/invalid) falls back to
170
+ * a default-ON 30s cap, and a positive override wins. The `feedHeartbeatTick`
171
+ * post-answer branch uses that cap (via `evaluatePostAnswerLiveness`) to stop
172
+ * re-rendering the "background agent still working" card once the worker's last
173
+ * advance goes stale — mirroring the pre-answer `FEED_LIVENESS_OPEN_MS` recency
174
+ * cap. (This helper previously parsed the dormant `SWITCHROOM_POST_ANSWER_LIVENESS_MS`
175
+ * Item-3 escape hatch, whose gate was removed; the parse semantics are unchanged.)
176
+ *
177
+ * Extracted as a pure function so the parse contract is unit-testable (gateway.ts
178
+ * is not importable in isolation — top-level side effects). Mirrors
179
+ * `parseVisibleAnswerStreamEnabled`'s pattern.
180
+ */
181
+ export function parsePostAnswerLivenessMs(raw: string | undefined): number {
182
+ const n = raw ? Number(raw) : NaN
183
+ return Number.isFinite(n) && n > 0 ? n : 0
184
+ }
185
+
186
+ /**
187
+ * Post-answer background-agent liveness — pure decision (Fix 2 / #2587 supersede,
188
+ * concern 3 staleness cap).
189
+ *
190
+ * The `feedHeartbeatTick` post-answer branch re-renders a "background agent still
191
+ * working" card every FEED_HEARTBEAT_TICK_MS while a sub-agent/workflow watcher
192
+ * keeps advancing `turn.subagentActivityAt` AFTER the substantive final answer.
193
+ * This function is the gate it consults each tick. It encodes BOTH guards:
194
+ *
195
+ * - **idle-gap suppression** — when no watcher activity arrived after the
196
+ * answer (`subagentActivityAt` unset, or ≤ the answer time), stay silent so
197
+ * the reply-is-last invariant holds for genuinely-idle turns; and
198
+ * - **staleness cap** (concern 3) — once the worker's activity has gone stale
199
+ * (`now - subagentActivityAt >= staleCapMs`, i.e. its `onFinish` froze the
200
+ * timestamp and no new step has arrived), STOP emitting. Without this the
201
+ * post-answer card kept re-rendering `state:'running'` with an
202
+ * ever-growing `elapsed` forever, long after the worker terminated. This
203
+ * mirrors the pre-answer `FEED_LIVENESS_OPEN_MS` recency cap, which bounds
204
+ * the open window the same way.
205
+ *
206
+ * `staleCapMs <= 0` disables the cap (idle-gap suppression still applies) — but
207
+ * the gateway parses a positive default, so the cap is ON by default.
208
+ *
209
+ * Extracted as a pure function so the lifecycle (emit in the post-answer /
210
+ * pre-teardown window, stop once stale) is unit-testable without instantiating
211
+ * the gateway IIFE (top-level side effects make it un-importable in isolation).
212
+ */
213
+ export type PostAnswerLivenessVerdict =
214
+ /** No post-answer watcher activity (idle gap) — stay silent. */
215
+ | 'idle'
216
+ /** Activity has gone stale (worker finished / went quiet) — stop emitting. */
217
+ | 'stale'
218
+ /** Genuine in-flight post-answer activity — render the liveness card. */
219
+ | 'emit'
220
+
221
+ export interface PostAnswerLivenessInput {
222
+ /** `turn.subagentActivityAt` — the watcher's last post-answer advance, or undefined. */
223
+ subagentActivityAt: number | undefined
224
+ /** `turn.finalAnswerDeliveredAt` — when the substantive final landed (undefined ⇒ 0). */
225
+ finalAnswerDeliveredAt: number | undefined
226
+ /** Wall-clock now (injected for tests). */
227
+ now: number
228
+ /** Staleness cap in ms; `<= 0` disables the cap. */
229
+ staleCapMs: number
230
+ }
231
+
232
+ export function evaluatePostAnswerLiveness(input: PostAnswerLivenessInput): PostAnswerLivenessVerdict {
233
+ const { subagentActivityAt, finalAnswerDeliveredAt, now, staleCapMs } = input
234
+ const answeredAt = finalAnswerDeliveredAt ?? 0
235
+ // idle-gap: nothing surfaced after the answer → silent (reply-is-last preserved).
236
+ if (subagentActivityAt == null || subagentActivityAt <= answeredAt) return 'idle'
237
+ // staleness cap: the worker's last advance is older than the cap → stop emitting.
238
+ if (staleCapMs > 0 && now - subagentActivityAt >= staleCapMs) return 'stale'
239
+ return 'emit'
240
+ }
@@ -66,6 +66,121 @@ export function isActivityFeedMessage(m: ObservedMessage): boolean {
66
66
  return lines.every((l) => ACTIVITY_FEED_LINE_RE.test(l));
67
67
  }
68
68
 
69
+ /**
70
+ * True when `m` is the agent's actual answer (the foreground reply) — sender is
71
+ * the bot (not the driver), it's an original send (not an edit), and it is
72
+ * neither a worker-feed nor an activity-feed surface, with non-empty text.
73
+ * Promoted here from the cross-surface fuzz scenario so `assertReplyIsLast` and
74
+ * any scenario can share one definition of "this is the answer lane".
75
+ */
76
+ export function isAnswer(m: ObservedMessage, driverUserId: number): boolean {
77
+ return (
78
+ m.senderUserId !== driverUserId &&
79
+ !m.edited &&
80
+ !isWorkerFeedMessage(m) &&
81
+ !isActivityFeedMessage(m) &&
82
+ m.text.trim().length > 0
83
+ );
84
+ }
85
+
86
+ export interface ReplyIsLastOptions {
87
+ /**
88
+ * The answer message that must be last in its foreground turn. The turn is
89
+ * scoped as the half-open window `[turn.messageId, nextDriverMessageId)` —
90
+ * i.e. everything from the answer up to (but excluding) the NEXT message the
91
+ * driver sent. Activity/worker-feed surfaces inside that window belong to
92
+ * this turn; a legitimately later surface (a background worker card, an
93
+ * obligation-represent nudge, an error envelope, or the next turn's feed)
94
+ * sits OUTSIDE it and is correctly NOT flagged.
95
+ *
96
+ * Pass the answer ObservedMessage returned by `expectMessage` (or pulled
97
+ * from `getHistory`).
98
+ */
99
+ turn: ObservedMessage;
100
+ }
101
+
102
+ /**
103
+ * Assert the scoped "reply is last" invariant (design §6/§11): within a single
104
+ * foreground turn, NO activity-card / worker-feed surface opens AFTER that
105
+ * turn's reply. Operates on a server-send-order `history` pull
106
+ * (`driver.getHistory`) so it sees surfaces that may have landed before any
107
+ * live observer started — required to catch a post-reply card across a
108
+ * re-prompt boundary.
109
+ *
110
+ * Deliberately NOT a naive cross-surface "answer has the max message_id":
111
+ * legitimate background / represent / error surfaces land later and would
112
+ * false-positive. We filter to the activity + answer LANES of the SAME
113
+ * foreground turn (`opts.turn`), reusing `isActivityFeedMessage` /
114
+ * `isWorkerFeedMessage` / `isAnswer`.
115
+ *
116
+ * Throws with the offending feed message when an activity/worker-feed surface
117
+ * for this turn has a HIGHER message_id than the reply.
118
+ */
119
+ export function assertReplyIsLast(
120
+ history: ObservedMessage[],
121
+ driverUserId: number,
122
+ opts: ReplyIsLastOptions,
123
+ ): void {
124
+ const answer = opts.turn;
125
+ // Turn window upper bound: the first driver (user) message strictly after the
126
+ // answer. Anything at/after that belongs to a later turn and is out of scope.
127
+ const nextDriverAfter = history
128
+ .filter((m) => m.senderUserId === driverUserId && m.messageId > answer.messageId)
129
+ .reduce<number | null>(
130
+ (acc, m) => (acc == null || m.messageId < acc ? m.messageId : acc),
131
+ null,
132
+ );
133
+
134
+ const inThisTurn = (m: ObservedMessage): boolean =>
135
+ m.messageId >= answer.messageId &&
136
+ (nextDriverAfter == null || m.messageId < nextDriverAfter);
137
+
138
+ // The reply must be the last ACTIVITY/ANSWER-LANE surface of its turn: no
139
+ // activity-card or worker-feed message in-turn may have a higher id than it.
140
+ const offenders = history.filter(
141
+ (m) =>
142
+ inThisTurn(m) &&
143
+ m.messageId > answer.messageId &&
144
+ (isActivityFeedMessage(m) || isWorkerFeedMessage(m)),
145
+ );
146
+
147
+ if (offenders.length > 0) {
148
+ const detail = offenders
149
+ .map((m) => `msg=${m.messageId} ${JSON.stringify(m.text.slice(0, 60))}`)
150
+ .join("; ");
151
+ throw new Error(
152
+ `assertReplyIsLast: an activity/feed surface opened AFTER the reply ` +
153
+ `(answer msg=${answer.messageId}) in the same foreground turn: ${detail}`,
154
+ );
155
+ }
156
+ }
157
+
158
+ /**
159
+ * Assert NOTIFICATION OWNERSHIP (R8 / PR-2 — design
160
+ * `docs/message-emission-determinism.md` §over-ping): the turn's SUBSTANTIVE
161
+ * answer must have buzzed the device. mtcute surfaces Telegram's `silent`
162
+ * flag on every message (`ObservedMessage.silent`, set from the sender's
163
+ * `disable_notification`); a substantive answer must NOT be silent.
164
+ *
165
+ * This guards the residual the bare "first ping wins" rule left: when an
166
+ * interim ack pings first and claims the turn's single ping slot, the later
167
+ * substantive answer used to be downgraded to silent — "the reply is last
168
+ * but the phone never buzzed for the answer." After PR-2 the answer UPGRADES
169
+ * over the ack's slot and arrives non-silent.
170
+ *
171
+ * Throws (rather than returning false) so a scenario reads as a plain
172
+ * assertion; the message text + silent flag are in the error for triage.
173
+ */
174
+ export function assertAnswerPinged(answer: ObservedMessage): void {
175
+ if (answer.silent) {
176
+ throw new Error(
177
+ `assertAnswerPinged: the substantive answer arrived SILENT (no device ping) ` +
178
+ `— an earlier ack-ping downgraded the answer (R8 / PR-2 regression). ` +
179
+ `answer msg=${answer.messageId} ${JSON.stringify(answer.text.slice(0, 80))}`,
180
+ );
181
+ }
182
+ }
183
+
69
184
  export interface PollOptions {
70
185
  /** Hard deadline; the predicate must resolve truthy before this. */
71
186
  timeout: number;
@@ -368,6 +368,15 @@ export class Driver {
368
368
  * - Custom emojis (`reactionCustomEmoji`) are skipped — scenarios
369
369
  * that need them aren't in scope and parsing them would require
370
370
  * resolving the document id to an alias.
371
+ *
372
+ * **DM / bot-reaction limitation:** when a BOT calls
373
+ * `setMessageReaction` on a DM, Telegram's MTProto server does NOT
374
+ * deliver `updateMessageReactions` to the human user's account.
375
+ * The server only delivers `updateBotMessageReaction` to the BOT's
376
+ * own update stream, so `onRawUpdate` never fires for the driver.
377
+ * For DM bot-reaction assertions, use {@link pollReactions} instead
378
+ * — it calls `messages.getMessagesReactions` directly (pull, not
379
+ * push) to read the current reaction set on any message.
371
380
  */
372
381
  observeReactions(
373
382
  chatId: number,
@@ -581,6 +590,65 @@ export class Driver {
581
590
  return toObserved(msg, false);
582
591
  }
583
592
 
593
+ /**
594
+ * Pull a window of chat history in SERVER SEND-ORDER (ascending message_id —
595
+ * the actual on-screen order). The Phase-3 helper the `observeMessages`
596
+ * doc-comment references: unlike the new+edit observer stream, this is a pull
597
+ * that sees ALL messages already in the chat, including ones that landed
598
+ * before the observer started — required to assert ordering across a
599
+ * re-prompt boundary (`jtbd-reply-is-last-dm`, design §11 cases 3 & 4).
600
+ *
601
+ * mtcute's `getHistory` returns newest-first by default (and `reverse=true`
602
+ * needs an offset, returning [] otherwise), so we fetch then sort ascending
603
+ * by message_id to recover send-order. Each entry is mapped to the same
604
+ * `ObservedMessage` shape the assertions/predicates consume.
605
+ */
606
+ async getHistory(
607
+ chatId: number,
608
+ limit = 100,
609
+ ): Promise<ObservedMessage[]> {
610
+ const c = this.requireClient();
611
+ const messages = await c.getHistory(chatId, { limit });
612
+ return messages
613
+ .map((m) => toObserved(m, false))
614
+ .sort((a, b) => a.messageId - b.messageId);
615
+ }
616
+
617
+ /**
618
+ * Poll the current set of emoji reactions on a message by making a
619
+ * direct `messages.getMessagesReactions` MTProto call.
620
+ *
621
+ * Unlike {@link observeReactions}, this is a **pull** operation —
622
+ * it does not depend on push updates from the Telegram server. This
623
+ * makes it the correct verification method for DM bot-reaction
624
+ * scenarios: when a bot calls `setMessageReaction` on a DM, Telegram
625
+ * does not deliver `updateMessageReactions` to the user account, but
626
+ * the reaction IS queryable via this API.
627
+ *
628
+ * Returns an array of emoji strings currently on the message
629
+ * (e.g. `["👍"]`, `["👀", "🤔"]`). Returns an empty array when
630
+ * no reactions are set, or when `getMessageReactionsById` returns
631
+ * null (message deleted / not visible).
632
+ *
633
+ * Custom-emoji reactions are excluded (the documentId can't be
634
+ * trivially shown as a string without resolving it).
635
+ */
636
+ async pollReactions(chatId: number, messageId: number): Promise<string[]> {
637
+ const c = this.requireClient();
638
+ const results = await c.getMessageReactionsById(chatId, [messageId]);
639
+ const msgReactions = results[0];
640
+ if (!msgReactions) return [];
641
+ const emojis: string[] = [];
642
+ for (const rc of msgReactions.reactions) {
643
+ const emoji = rc.emoji;
644
+ if (typeof emoji === "string") {
645
+ emojis.push(emoji);
646
+ }
647
+ // Long (custom emoji document id) — skip, can't stringify cheaply
648
+ }
649
+ return emojis;
650
+ }
651
+
584
652
  /**
585
653
  * Fetch the inline keyboard attached to a bot message, if any.
586
654
  * Returns `null` for messages without an inline_keyboard (or with