npm - switchroom - Versions diffs - 0.15.45 → 0.16.4 - Mend

switchroom 0.15.45 → 0.16.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (149) hide show

package/dist/agent-scheduler/index.js +122 -88
package/dist/auth-broker/index.js +463 -177
package/dist/cli/autoaccept-poll.js +4842 -35
package/dist/cli/drive-write-pretool.mjs +17 -14
package/dist/cli/notion-write-pretool.mjs +117 -86
package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
package/dist/cli/self-improve-stop.mjs +428 -0
package/dist/cli/skill-validate-pretool.mjs +72 -72
package/dist/cli/switchroom.js +3158 -1178
package/dist/host-control/main.js +2833 -355
package/dist/vault/approvals/kernel-server.js +7479 -7439
package/dist/vault/broker/server.js +11312 -11272
package/examples/minimal.yaml +1 -0
package/examples/switchroom.yaml +1 -0
package/package.json +3 -3
package/profiles/_base/start.sh.hbs +88 -1
package/profiles/_shared/execution-discipline.md.hbs +18 -0
package/profiles/default/CLAUDE.md.hbs +0 -19
package/telegram-plugin/.claude-plugin/plugin.json +2 -2
package/telegram-plugin/answer-stream-flag.ts +12 -49
package/telegram-plugin/answer-stream.ts +5 -150
package/telegram-plugin/auth-snapshot-format.ts +280 -48
package/telegram-plugin/auto-fallback-fleet.ts +44 -1
package/telegram-plugin/context-exhaustion.ts +12 -0
package/telegram-plugin/demo-mask.ts +154 -0
package/telegram-plugin/dist/bridge/bridge.js +167 -124
package/telegram-plugin/dist/gateway/gateway.js +3039 -1159
package/telegram-plugin/dist/server.js +215 -172
package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
package/telegram-plugin/draft-stream.ts +47 -410
package/telegram-plugin/final-answer-detect.ts +17 -12
package/telegram-plugin/fleet-fallback-resume.ts +131 -0
package/telegram-plugin/format.ts +56 -19
package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
package/telegram-plugin/gateway/auth-command.ts +70 -14
package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
package/telegram-plugin/gateway/current-turn-map.ts +188 -0
package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
package/telegram-plugin/gateway/effort-command.ts +8 -3
package/telegram-plugin/gateway/emission-authority.ts +369 -0
package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
package/telegram-plugin/gateway/gateway.ts +1837 -291
package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
package/telegram-plugin/gateway/represent-guard.ts +72 -0
package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
package/telegram-plugin/gateway/status-surface-log.ts +14 -3
package/telegram-plugin/history.ts +33 -11
package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
package/telegram-plugin/issues-card.ts +4 -0
package/telegram-plugin/model-unavailable.ts +124 -0
package/telegram-plugin/narrative-dedup.ts +69 -0
package/telegram-plugin/over-ping-safety-net.ts +70 -4
package/telegram-plugin/package.json +3 -3
package/telegram-plugin/pending-work-progress.ts +12 -0
package/telegram-plugin/permission-rule.ts +32 -5
package/telegram-plugin/permission-title.ts +152 -9
package/telegram-plugin/quota-check.ts +13 -0
package/telegram-plugin/quota-watch.ts +135 -7
package/telegram-plugin/registry/turns-schema.test.ts +24 -0
package/telegram-plugin/registry/turns-schema.ts +9 -0
package/telegram-plugin/runtime-metrics.ts +13 -0
package/telegram-plugin/session-tail.ts +96 -11
package/telegram-plugin/silence-poke.ts +170 -24
package/telegram-plugin/slot-banner-driver.ts +3 -0
package/telegram-plugin/status-no-truncate.ts +44 -0
package/telegram-plugin/status-reactions.ts +20 -3
package/telegram-plugin/stream-controller.ts +4 -23
package/telegram-plugin/stream-reply-handler.ts +6 -24
package/telegram-plugin/streaming-metrics.ts +91 -0
package/telegram-plugin/subagent-watcher.ts +212 -66
package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
package/telegram-plugin/tests/answer-stream.test.ts +2 -411
package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
package/telegram-plugin/tests/demo-mask.test.ts +127 -0
package/telegram-plugin/tests/draft-stream.test.ts +0 -827
package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
package/telegram-plugin/tests/feed-survival.test.ts +526 -0
package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
package/telegram-plugin/tests/history.test.ts +60 -0
package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
package/telegram-plugin/tests/permission-rule.test.ts +17 -0
package/telegram-plugin/tests/permission-title.test.ts +206 -17
package/telegram-plugin/tests/quota-watch.test.ts +252 -9
package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
package/telegram-plugin/tests/represent-guard.test.ts +162 -0
package/telegram-plugin/tests/session-tail.test.ts +147 -3
package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
package/telegram-plugin/tests/telegram-format.test.ts +101 -6
package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
package/telegram-plugin/tests/tool-labels.test.ts +67 -0
package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
package/telegram-plugin/tests/welcome-text.test.ts +32 -3
package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
package/telegram-plugin/tool-activity-summary.ts +375 -58
package/telegram-plugin/turn-liveness-floor.ts +240 -0
package/telegram-plugin/uat/assertions.ts +115 -0
package/telegram-plugin/uat/driver.ts +68 -0
package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
package/telegram-plugin/welcome-text.ts +13 -1
package/telegram-plugin/worker-activity-feed.ts +157 -82
package/telegram-plugin/draft-transport.ts +0 -122
package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
package/telegram-plugin/tests/draft-transport.test.ts +0 -211

package/telegram-plugin/turn-liveness-floor.ts ADDED Viewed

@@ -0,0 +1,240 @@
+/**
+ * turn-liveness-floor.ts — the mid-turn liveness floor decision (issue #2527).
+ *
+ * The conversational-pacing safety net (`silence-poke.ts`) only ever sent a
+ * user-visible TEXT signal at its 300s framework fallback, and DEFERRED even
+ * that while the agent was "legitimately working" (an in-flight tool) on the
+ * rationale that the live activity feed renders the work. But the feed only
+ * exists for BACKGROUND sub-agents — a FOREGROUND turn grinding through
+ * minutes of silent Bash/Read/restart calls has no feed, so the user sees
+ * only the ambient 👀 and reasonably reads it as "done" (the documented
+ * #2527 evidence: a 6-minute silent diagnose with "Status?" asked twice).
+ *
+ * This module is the missing floor: a code-owned, fire-once-per-turn interim
+ * that fires PRECISELY BECAUSE the turn is busy-but-silent — the exact
+ * inversion of silence-poke's "busy ⇒ suppress" defer. It is a pure decision
+ * (the gateway owns the actual send, through the same path a model reply
+ * takes) so the policy is unit-testable in isolation.
+ *
+ * Design contract: `reference/rfcs/turn-liveness-primitive.md`.
+ * Job: `reference/jobs/know-what-my-agent-is-doing.md`.
+ *
+ * Keyed on LOOP ROLE, never chat type — so a DM and a forum-supergroup
+ * topic get identical guarantees (surface parity by construction), and the
+ * floor binds only the `user` role (a `system`/cron turn's silence is
+ * legitimate; a `sub-agent`'s liveness is carried by the parent turn).
+ */
+/**
+ * The single turn-provenance discriminator. Stamped once at enqueue and read
+ * everywhere — replaces the scattered `chatType !== 'private'` /
+ * `chatId == null` / `source === 'cron'` predicates. A research worker and a
+ * nested sub-agent are BOTH `sub-agent`: new agent *types* are not new roles.
+ */
+export type LoopRole = 'user' | 'sub-agent' | 'system'
+/**
+ * Enqueue `source` values that mark a turn as system-initiated (no human is
+ * waiting at that instant, so silence is legitimate). Everything else —
+ * including a plain human DM (no source) and a sub-agent handback that
+ * continues user-facing work — is `user` and owes the never-silent guarantee.
+ *
+ * Conservative by design: only known scheduled/wake sources are `system`, so
+ * a novel source defaults to `user` (gets liveness) rather than silently
+ * opting out of the floor.
+ */
+const SYSTEM_SOURCES = new Set(['cron', 'wake', 'schedule', 'scheduler', 'timer', 'heartbeat'])
+/**
+ * Derive the loop role for a MAIN-session turn from its enqueue envelope
+ * (`<channel ... source="cron" ...>`). The gateway never creates a turn atom
+ * for a sub-agent — those terminate on `SubagentStop` — so this returns only
+ * `user` | `system`. Parsing mirrors `silent-end-scan.mjs:parseChannelEnvelope`.
+ */
+export function deriveTurnRole(rawContent: string | null | undefined): Exclude<LoopRole, 'sub-agent'> {
+  if (typeof rawContent !== 'string') return 'user'
+  const m = rawContent.match(/<channel[^>]*\bsource="([^"]+)"/)
+  const source = m ? m[1] : null
+  if (source != null && SYSTEM_SOURCES.has(source)) return 'system'
+  return 'user'
+}
+export interface MidTurnFloorInput {
+  /** Kill switch — `midTurnFloorEnabled()` resolved by the caller. */
+  enabled: boolean
+  /** The turn's loop role. The floor binds ONLY `user`. */
+  role: LoopRole
+  /** Whether a substantive answer has already reached the user this turn. */
+  finalAnswerDelivered: boolean
+  /** ms since the last user-visible outbound (or turn start if none). */
+  silenceMs: number
+  /** Floor threshold — fire at/after this much silence (default 45s). */
+  floorThresholdMs: number
+  /** The 300s fallback threshold — above it, the fallback owns the beat. */
+  fallbackThresholdMs: number
+  /** Whether the agent is demonstrably working (in-flight tool / dispatched
+   *  sub-agent / open ask_user). The floor fires only when working — a
+   *  genuinely silent/wedged turn is the 300s fallback's job. */
+  legitimatelyWorking: boolean
+  /** Whether the floor has already fired once this turn (fire-once latch). */
+  alreadyFired: boolean
+  /** When true (a user "Status?" mid-turn inbound), bypass the threshold and
+   *  the working check — the user explicitly asked, so answer immediately —
+   *  but still honour role / delivery / fire-once / enabled. */
+  force?: boolean
+}
+export type MidTurnFloorDecision =
+  | { kind: 'fire' }
+  | { kind: 'skip'; reason: string }
+/**
+ * Pure decision: should the mid-turn liveness floor fire now?
+ *
+ * Fires exactly once per `user` turn when the turn has been silently working
+ * past the floor threshold (and below the 300s fallback window), OR
+ * immediately on a forced "Status?" poke. Skips with a machine-readable
+ * reason otherwise so the decision is observable in telemetry.
+ */
+export function decideMidTurnFloor(input: MidTurnFloorInput): MidTurnFloorDecision {
+  if (!input.enabled) return { kind: 'skip', reason: 'disabled' }
+  if (input.alreadyFired) return { kind: 'skip', reason: 'already-fired' }
+  // The floor binds the user role only — system/cron silence is legitimate,
+  // and a sub-agent's liveness is carried by its parent turn.
+  if (input.role !== 'user') return { kind: 'skip', reason: 'non-user-role' }
+  // The user already saw a real answer — no floor needed.
+  if (input.finalAnswerDelivered) return { kind: 'skip', reason: 'answer-delivered' }
+  // A forced poke (user asked "Status?") short-circuits timing + working.
+  if (input.force === true) return { kind: 'fire' }
+  if (input.silenceMs < input.floorThresholdMs) return { kind: 'skip', reason: 'below-threshold' }
+  // At/above the 300s window the loud fallback owns the beat; the floor
+  // is the quiet early one.
+  if (input.silenceMs >= input.fallbackThresholdMs) return { kind: 'skip', reason: 'fallback-window' }
+  // Only fire when the turn is demonstrably busy: a genuinely silent turn is
+  // a wedge, which is the 300s fallback's job (it unwedges; the floor does not).
+  if (!input.legitimatelyWorking) return { kind: 'skip', reason: 'not-working' }
+  return { kind: 'fire' }
+}
+export interface TerminalReasonInput {
+  /** Kill switch for the role-aware terminal honesty (#2527). */
+  enabled: boolean
+  /** The turn's loop role. */
+  role: LoopRole
+  /** Whether a substantive answer reached the user this turn. */
+  finalAnswerDelivered: boolean
+}
+/**
+ * Pure decision: which terminal reaction a turn finalizes to.
+ *
+ * `'undelivered'` (😐, the gentle non-celebratory terminal) ONLY when a
+ * `user` turn ends without a delivered answer and the honesty gate is on —
+ * the #2527 "thumbs-up false done" fix. Everything else (a delivered user
+ * turn, any system/sub-agent turn, or the gate disabled) is `'done'` (👍).
+ * A `system`/cron turn's silence is legitimate, so it keeps 👍.
+ */
+export function decideTerminalReason(input: TerminalReasonInput): 'done' | 'undelivered' {
+  if (!input.enabled) return 'done'
+  if (input.role !== 'user') return 'done'
+  if (input.finalAnswerDelivered) return 'done'
+  return 'undelivered'
+}
+/** Default floor threshold — fire a first liveness beat after this much
+ *  busy-silence. 45s is comfortably past a normal short turn and well under
+ *  the 300s wedge fallback. Overridable via env at the gateway. */
+export const DEFAULT_FLOOR_THRESHOLD_MS = 45_000
+/**
+ * Kill switch for the mid-turn floor. Default ON; set
+ * `SWITCHROOM_TG_LIVENESS_FLOOR` to `0`/`false`/`off`/`no` to disable without
+ * a rebuild. Re-read every call so tests can toggle env without reloading.
+ */
+export function midTurnFloorEnabled(): boolean {
+  const v = process.env.SWITCHROOM_TG_LIVENESS_FLOOR
+  if (v == null) return true
+  const t = v.trim().toLowerCase()
+  return !(t === '0' || t === 'false' || t === 'off' || t === 'no')
+}
+/**
+ * Parse a positive-integer ms window from an env value, or 0 when unset / empty /
+ * non-numeric / 0 / negative.
+ *
+ * Now backs the post-answer background-agent liveness STALENESS CAP
+ * (`SWITCHROOM_POST_ANSWER_LIVENESS_STALE_MS`): the gateway reads
+ * `parsePostAnswerLivenessMs(env) || 30_000`, so 0 (unset/invalid) falls back to
+ * a default-ON 30s cap, and a positive override wins. The `feedHeartbeatTick`
+ * post-answer branch uses that cap (via `evaluatePostAnswerLiveness`) to stop
+ * re-rendering the "background agent still working" card once the worker's last
+ * advance goes stale — mirroring the pre-answer `FEED_LIVENESS_OPEN_MS` recency
+ * cap. (This helper previously parsed the dormant `SWITCHROOM_POST_ANSWER_LIVENESS_MS`
+ * Item-3 escape hatch, whose gate was removed; the parse semantics are unchanged.)
+ *
+ * Extracted as a pure function so the parse contract is unit-testable (gateway.ts
+ * is not importable in isolation — top-level side effects). Mirrors
+ * `parseVisibleAnswerStreamEnabled`'s pattern.
+ */
+export function parsePostAnswerLivenessMs(raw: string | undefined): number {
+  const n = raw ? Number(raw) : NaN
+  return Number.isFinite(n) && n > 0 ? n : 0
+}
+/**
+ * Post-answer background-agent liveness — pure decision (Fix 2 / #2587 supersede,
+ * concern 3 staleness cap).
+ *
+ * The `feedHeartbeatTick` post-answer branch re-renders a "background agent still
+ * working" card every FEED_HEARTBEAT_TICK_MS while a sub-agent/workflow watcher
+ * keeps advancing `turn.subagentActivityAt` AFTER the substantive final answer.
+ * This function is the gate it consults each tick. It encodes BOTH guards:
+ *
+ *   - **idle-gap suppression** — when no watcher activity arrived after the
+ *     answer (`subagentActivityAt` unset, or ≤ the answer time), stay silent so
+ *     the reply-is-last invariant holds for genuinely-idle turns; and
+ *   - **staleness cap** (concern 3) — once the worker's activity has gone stale
+ *     (`now - subagentActivityAt >= staleCapMs`, i.e. its `onFinish` froze the
+ *     timestamp and no new step has arrived), STOP emitting. Without this the
+ *     post-answer card kept re-rendering `state:'running'` with an
+ *     ever-growing `elapsed` forever, long after the worker terminated. This
+ *     mirrors the pre-answer `FEED_LIVENESS_OPEN_MS` recency cap, which bounds
+ *     the open window the same way.
+ *
+ * `staleCapMs <= 0` disables the cap (idle-gap suppression still applies) — but
+ * the gateway parses a positive default, so the cap is ON by default.
+ *
+ * Extracted as a pure function so the lifecycle (emit in the post-answer /
+ * pre-teardown window, stop once stale) is unit-testable without instantiating
+ * the gateway IIFE (top-level side effects make it un-importable in isolation).
+ */
+export type PostAnswerLivenessVerdict =
+  /** No post-answer watcher activity (idle gap) — stay silent. */
+  | 'idle'
+  /** Activity has gone stale (worker finished / went quiet) — stop emitting. */
+  | 'stale'
+  /** Genuine in-flight post-answer activity — render the liveness card. */
+  | 'emit'
+export interface PostAnswerLivenessInput {
+  /** `turn.subagentActivityAt` — the watcher's last post-answer advance, or undefined. */
+  subagentActivityAt: number | undefined
+  /** `turn.finalAnswerDeliveredAt` — when the substantive final landed (undefined ⇒ 0). */
+  finalAnswerDeliveredAt: number | undefined
+  /** Wall-clock now (injected for tests). */
+  now: number
+  /** Staleness cap in ms; `<= 0` disables the cap. */
+  staleCapMs: number
+}
+export function evaluatePostAnswerLiveness(input: PostAnswerLivenessInput): PostAnswerLivenessVerdict {
+  const { subagentActivityAt, finalAnswerDeliveredAt, now, staleCapMs } = input
+  const answeredAt = finalAnswerDeliveredAt ?? 0
+  // idle-gap: nothing surfaced after the answer → silent (reply-is-last preserved).
+  if (subagentActivityAt == null || subagentActivityAt <= answeredAt) return 'idle'
+  // staleness cap: the worker's last advance is older than the cap → stop emitting.
+  if (staleCapMs > 0 && now - subagentActivityAt >= staleCapMs) return 'stale'
+  return 'emit'
+}

package/telegram-plugin/uat/assertions.ts CHANGED Viewed

@@ -66,6 +66,121 @@ export function isActivityFeedMessage(m: ObservedMessage): boolean {
   return lines.every((l) => ACTIVITY_FEED_LINE_RE.test(l));
 }
+/**
+ * True when `m` is the agent's actual answer (the foreground reply) — sender is
+ * the bot (not the driver), it's an original send (not an edit), and it is
+ * neither a worker-feed nor an activity-feed surface, with non-empty text.
+ * Promoted here from the cross-surface fuzz scenario so `assertReplyIsLast` and
+ * any scenario can share one definition of "this is the answer lane".
+ */
+export function isAnswer(m: ObservedMessage, driverUserId: number): boolean {
+  return (
+    m.senderUserId !== driverUserId &&
+    !m.edited &&
+    !isWorkerFeedMessage(m) &&
+    !isActivityFeedMessage(m) &&
+    m.text.trim().length > 0
+  );
+}
+export interface ReplyIsLastOptions {
+  /**
+   * The answer message that must be last in its foreground turn. The turn is
+   * scoped as the half-open window `[turn.messageId, nextDriverMessageId)` —
+   * i.e. everything from the answer up to (but excluding) the NEXT message the
+   * driver sent. Activity/worker-feed surfaces inside that window belong to
+   * this turn; a legitimately later surface (a background worker card, an
+   * obligation-represent nudge, an error envelope, or the next turn's feed)
+   * sits OUTSIDE it and is correctly NOT flagged.
+   *
+   * Pass the answer ObservedMessage returned by `expectMessage` (or pulled
+   * from `getHistory`).
+   */
+  turn: ObservedMessage;
+}
+/**
+ * Assert the scoped "reply is last" invariant (design §6/§11): within a single
+ * foreground turn, NO activity-card / worker-feed surface opens AFTER that
+ * turn's reply. Operates on a server-send-order `history` pull
+ * (`driver.getHistory`) so it sees surfaces that may have landed before any
+ * live observer started — required to catch a post-reply card across a
+ * re-prompt boundary.
+ *
+ * Deliberately NOT a naive cross-surface "answer has the max message_id":
+ * legitimate background / represent / error surfaces land later and would
+ * false-positive. We filter to the activity + answer LANES of the SAME
+ * foreground turn (`opts.turn`), reusing `isActivityFeedMessage` /
+ * `isWorkerFeedMessage` / `isAnswer`.
+ *
+ * Throws with the offending feed message when an activity/worker-feed surface
+ * for this turn has a HIGHER message_id than the reply.
+ */
+export function assertReplyIsLast(
+  history: ObservedMessage[],
+  driverUserId: number,
+  opts: ReplyIsLastOptions,
+): void {
+  const answer = opts.turn;
+  // Turn window upper bound: the first driver (user) message strictly after the
+  // answer. Anything at/after that belongs to a later turn and is out of scope.
+  const nextDriverAfter = history
+    .filter((m) => m.senderUserId === driverUserId && m.messageId > answer.messageId)
+    .reduce<number | null>(
+      (acc, m) => (acc == null || m.messageId < acc ? m.messageId : acc),
+      null,
+    );
+  const inThisTurn = (m: ObservedMessage): boolean =>
+    m.messageId >= answer.messageId &&
+    (nextDriverAfter == null || m.messageId < nextDriverAfter);
+  // The reply must be the last ACTIVITY/ANSWER-LANE surface of its turn: no
+  // activity-card or worker-feed message in-turn may have a higher id than it.
+  const offenders = history.filter(
+    (m) =>
+      inThisTurn(m) &&
+      m.messageId > answer.messageId &&
+      (isActivityFeedMessage(m) || isWorkerFeedMessage(m)),
+  );
+  if (offenders.length > 0) {
+    const detail = offenders
+      .map((m) => `msg=${m.messageId} ${JSON.stringify(m.text.slice(0, 60))}`)
+      .join("; ");
+    throw new Error(
+      `assertReplyIsLast: an activity/feed surface opened AFTER the reply ` +
+        `(answer msg=${answer.messageId}) in the same foreground turn: ${detail}`,
+    );
+  }
+}
+/**
+ * Assert NOTIFICATION OWNERSHIP (R8 / PR-2 — design
+ * `docs/message-emission-determinism.md` §over-ping): the turn's SUBSTANTIVE
+ * answer must have buzzed the device. mtcute surfaces Telegram's `silent`
+ * flag on every message (`ObservedMessage.silent`, set from the sender's
+ * `disable_notification`); a substantive answer must NOT be silent.
+ *
+ * This guards the residual the bare "first ping wins" rule left: when an
+ * interim ack pings first and claims the turn's single ping slot, the later
+ * substantive answer used to be downgraded to silent — "the reply is last
+ * but the phone never buzzed for the answer." After PR-2 the answer UPGRADES
+ * over the ack's slot and arrives non-silent.
+ *
+ * Throws (rather than returning false) so a scenario reads as a plain
+ * assertion; the message text + silent flag are in the error for triage.
+ */
+export function assertAnswerPinged(answer: ObservedMessage): void {
+  if (answer.silent) {
+    throw new Error(
+      `assertAnswerPinged: the substantive answer arrived SILENT (no device ping) ` +
+        `— an earlier ack-ping downgraded the answer (R8 / PR-2 regression). ` +
+        `answer msg=${answer.messageId} ${JSON.stringify(answer.text.slice(0, 80))}`,
+    );
+  }
+}
 export interface PollOptions {
   /** Hard deadline; the predicate must resolve truthy before this. */
   timeout: number;

package/telegram-plugin/uat/driver.ts CHANGED Viewed

@@ -368,6 +368,15 @@ export class Driver {
    * - Custom emojis (`reactionCustomEmoji`) are skipped — scenarios
    *   that need them aren't in scope and parsing them would require
    *   resolving the document id to an alias.
+   *
+   * **DM / bot-reaction limitation:** when a BOT calls
+   * `setMessageReaction` on a DM, Telegram's MTProto server does NOT
+   * deliver `updateMessageReactions` to the human user's account.
+   * The server only delivers `updateBotMessageReaction` to the BOT's
+   * own update stream, so `onRawUpdate` never fires for the driver.
+   * For DM bot-reaction assertions, use {@link pollReactions} instead
+   * — it calls `messages.getMessagesReactions` directly (pull, not
+   * push) to read the current reaction set on any message.
    */
   observeReactions(
     chatId: number,
@@ -581,6 +590,65 @@ export class Driver {
     return toObserved(msg, false);
   }
+  /**
+   * Pull a window of chat history in SERVER SEND-ORDER (ascending message_id —
+   * the actual on-screen order). The Phase-3 helper the `observeMessages`
+   * doc-comment references: unlike the new+edit observer stream, this is a pull
+   * that sees ALL messages already in the chat, including ones that landed
+   * before the observer started — required to assert ordering across a
+   * re-prompt boundary (`jtbd-reply-is-last-dm`, design §11 cases 3 & 4).
+   *
+   * mtcute's `getHistory` returns newest-first by default (and `reverse=true`
+   * needs an offset, returning [] otherwise), so we fetch then sort ascending
+   * by message_id to recover send-order. Each entry is mapped to the same
+   * `ObservedMessage` shape the assertions/predicates consume.
+   */
+  async getHistory(
+    chatId: number,
+    limit = 100,
+  ): Promise<ObservedMessage[]> {
+    const c = this.requireClient();
+    const messages = await c.getHistory(chatId, { limit });
+    return messages
+      .map((m) => toObserved(m, false))
+      .sort((a, b) => a.messageId - b.messageId);
+  }
+  /**
+   * Poll the current set of emoji reactions on a message by making a
+   * direct `messages.getMessagesReactions` MTProto call.
+   *
+   * Unlike {@link observeReactions}, this is a **pull** operation —
+   * it does not depend on push updates from the Telegram server. This
+   * makes it the correct verification method for DM bot-reaction
+   * scenarios: when a bot calls `setMessageReaction` on a DM, Telegram
+   * does not deliver `updateMessageReactions` to the user account, but
+   * the reaction IS queryable via this API.
+   *
+   * Returns an array of emoji strings currently on the message
+   * (e.g. `["👍"]`, `["👀", "🤔"]`). Returns an empty array when
+   * no reactions are set, or when `getMessageReactionsById` returns
+   * null (message deleted / not visible).
+   *
+   * Custom-emoji reactions are excluded (the documentId can't be
+   * trivially shown as a string without resolving it).
+   */
+  async pollReactions(chatId: number, messageId: number): Promise<string[]> {
+    const c = this.requireClient();
+    const results = await c.getMessageReactionsById(chatId, [messageId]);
+    const msgReactions = results[0];
+    if (!msgReactions) return [];
+    const emojis: string[] = [];
+    for (const rc of msgReactions.reactions) {
+      const emoji = rc.emoji;
+      if (typeof emoji === "string") {
+        emojis.push(emoji);
+      }
+      // Long (custom emoji document id) — skip, can't stringify cheaply
+    }
+    return emojis;
+  }
   /**
    * Fetch the inline keyboard attached to a bot message, if any.
    * Returns `null` for messages without an inline_keyboard (or with