npm - switchroom - Versions diffs - 0.15.45 → 0.16.5 - Mend

switchroom 0.15.45 → 0.16.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (150) hide show

package/dist/agent-scheduler/index.js +56 -15
package/dist/auth-broker/index.js +383 -97
package/dist/cli/autoaccept-poll.js +4842 -35
package/dist/cli/drive-write-pretool.mjs +7 -4
package/dist/cli/notion-write-pretool.mjs +35 -4
package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
package/dist/cli/self-improve-stop.mjs +428 -0
package/dist/cli/switchroom.js +2894 -841
package/dist/host-control/main.js +2685 -207
package/dist/vault/approvals/kernel-server.js +7453 -7413
package/dist/vault/broker/server.js +11428 -11388
package/examples/minimal.yaml +1 -0
package/examples/switchroom.yaml +1 -0
package/package.json +3 -3
package/profiles/_base/start.sh.hbs +97 -1
package/profiles/_shared/execution-discipline.md.hbs +18 -0
package/profiles/default/CLAUDE.md.hbs +0 -19
package/telegram-plugin/.claude-plugin/plugin.json +2 -2
package/telegram-plugin/answer-stream-flag.ts +12 -49
package/telegram-plugin/answer-stream.ts +5 -150
package/telegram-plugin/auth-snapshot-format.ts +280 -48
package/telegram-plugin/auto-fallback-fleet.ts +44 -1
package/telegram-plugin/context-exhaustion.ts +12 -0
package/telegram-plugin/demo-mask.ts +154 -0
package/telegram-plugin/dist/bridge/bridge.js +55 -12
package/telegram-plugin/dist/gateway/gateway.js +2938 -977
package/telegram-plugin/dist/server.js +55 -12
package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
package/telegram-plugin/draft-stream.ts +47 -410
package/telegram-plugin/final-answer-detect.ts +17 -12
package/telegram-plugin/fleet-fallback-resume.ts +131 -0
package/telegram-plugin/format.ts +56 -19
package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
package/telegram-plugin/gateway/auth-command.ts +70 -14
package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
package/telegram-plugin/gateway/current-turn-map.ts +188 -0
package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
package/telegram-plugin/gateway/effort-command.ts +8 -3
package/telegram-plugin/gateway/emission-authority.ts +369 -0
package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
package/telegram-plugin/gateway/gateway.ts +1857 -292
package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
package/telegram-plugin/gateway/model-command.ts +115 -4
package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
package/telegram-plugin/gateway/represent-guard.ts +72 -0
package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
package/telegram-plugin/gateway/status-surface-log.ts +14 -3
package/telegram-plugin/history.ts +33 -11
package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
package/telegram-plugin/issues-card.ts +4 -0
package/telegram-plugin/model-unavailable.ts +124 -0
package/telegram-plugin/narrative-dedup.ts +69 -0
package/telegram-plugin/over-ping-safety-net.ts +70 -4
package/telegram-plugin/package.json +3 -3
package/telegram-plugin/pending-work-progress.ts +12 -0
package/telegram-plugin/permission-rule.ts +32 -5
package/telegram-plugin/permission-title.ts +152 -9
package/telegram-plugin/quota-check.ts +13 -0
package/telegram-plugin/quota-watch.ts +135 -7
package/telegram-plugin/registry/turns-schema.test.ts +24 -0
package/telegram-plugin/registry/turns-schema.ts +9 -0
package/telegram-plugin/runtime-metrics.ts +13 -0
package/telegram-plugin/session-tail.ts +96 -11
package/telegram-plugin/silence-poke.ts +170 -24
package/telegram-plugin/slot-banner-driver.ts +3 -0
package/telegram-plugin/status-no-truncate.ts +44 -0
package/telegram-plugin/status-reactions.ts +20 -3
package/telegram-plugin/stream-controller.ts +4 -23
package/telegram-plugin/stream-reply-handler.ts +6 -24
package/telegram-plugin/streaming-metrics.ts +91 -0
package/telegram-plugin/subagent-watcher.ts +212 -66
package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
package/telegram-plugin/tests/answer-stream.test.ts +2 -411
package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
package/telegram-plugin/tests/demo-mask.test.ts +127 -0
package/telegram-plugin/tests/draft-stream.test.ts +0 -827
package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
package/telegram-plugin/tests/feed-survival.test.ts +526 -0
package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
package/telegram-plugin/tests/history.test.ts +60 -0
package/telegram-plugin/tests/model-command.test.ts +134 -0
package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
package/telegram-plugin/tests/permission-rule.test.ts +17 -0
package/telegram-plugin/tests/permission-title.test.ts +206 -17
package/telegram-plugin/tests/quota-watch.test.ts +252 -9
package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
package/telegram-plugin/tests/represent-guard.test.ts +162 -0
package/telegram-plugin/tests/session-tail.test.ts +147 -3
package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
package/telegram-plugin/tests/telegram-format.test.ts +101 -6
package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
package/telegram-plugin/tests/tool-labels.test.ts +67 -0
package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
package/telegram-plugin/tests/welcome-text.test.ts +32 -3
package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
package/telegram-plugin/tool-activity-summary.ts +375 -58
package/telegram-plugin/turn-liveness-floor.ts +240 -0
package/telegram-plugin/uat/assertions.ts +115 -0
package/telegram-plugin/uat/driver.ts +68 -0
package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
package/telegram-plugin/welcome-text.ts +13 -1
package/telegram-plugin/worker-activity-feed.ts +157 -82
package/telegram-plugin/draft-transport.ts +0 -122
package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
package/telegram-plugin/tests/draft-transport.test.ts +0 -211

package/telegram-plugin/draft-stream.ts CHANGED Viewed

@@ -11,69 +11,37 @@
  *
  * This is what makes the experience feel responsive without burning
  * Telegram's 1-edit-per-second-per-message rate limit. The latest delta
- * always lands within ~1s, with at most one outstanding API call.
+ * always lands within ~1s (or ~400ms in DMs), with at most one outstanding
+ * API call.
  *
  * In our model-driven architecture (no inference hooks), the controller
  * is driven by the model calling stream_reply(text, done) multiple times
- * during a long task. First call → sendMessage (or sendMessageDraft in DMs).
- * Subsequent calls → throttled editMessageText (or sendMessageDraft). done=true
- * → flush, materialize as a fresh sendMessage (push notification), clear draft.
+ * during a long task. First call → sendMessage. Subsequent calls →
+ * throttled editMessageText. done=true → flush, finalize.
  *
- * Transport selection:
- *   - previewTransport: "auto" (default) — use draft in DMs only
- *   - previewTransport: "draft"           — always use draft (if API available)
- *   - previewTransport: "message"         — always use sendMessage/editMessageText
- *
- * Forum topics (message_thread_id set) force message transport because
- * sendMessageDraft does not support threads. The caller (stream-controller.ts)
- * handles this by passing previewTransport: "message" for threaded chats.
+ * The draft transport (sendMessageDraft) has been permanently retired —
+ * all streams use sendMessage + editMessageText (the in-place engine).
+ * See PR fix/retire-draft-transport for the removal rationale.
  */
-import {
-  shouldFallbackFromDraftTransport,
-  allocateDraftId,
-  isDraft429,
-  extractDraft429RetryAfterSecs,
-} from './draft-transport.js'
 const TELEGRAM_MAX_CHARS = 4096
-// PR B: transport-aware defaults.
-//   Draft transport (DMs): 300 ms — drafts are ephemeral and don't share
-//     editMessageText's per-message rate cap, so we can refresh much faster.
-//     300 ms feels live without burning bandwidth.
-//   Message transport (groups / forums / draft API absent): 1000 ms — must
-//     respect Telegram's "1 edit/sec/message" practical ceiling.
+// Throttle defaults for the in-place engine.
+//   DM chats: 400 ms — slightly more responsive than groups while staying
+//     well under Telegram's practical ~1 edit/sec/message ceiling. This
+//     replaces the legacy 300 ms draft default: drafts were ephemeral and
+//     didn't share the editMessageText rate cap, but in-place edits do, so
+//     300 ms would routinely hit the limit. 400 ms keeps DM streaming
+//     noticeably snappier than the group default without rate-limit pressure.
+//   Group/forum chats: 1000 ms — must respect Telegram's
+//     "1 edit/sec/message" practical ceiling.
 // Both defaults can be overridden per-stream via `config.throttleMs` (which
 // is itself wired from `channels.telegram.stream_throttle_ms` in the agent
 // yaml, via the SWITCHROOM_TG_STREAM_THROTTLE_MS env var the gateway reads).
-const DEFAULT_DRAFT_THROTTLE_MS = 300
-const DEFAULT_MESSAGE_THROTTLE_MS = 1000
+const DEFAULT_DM_THROTTLE_MS = 400
+const DEFAULT_GROUP_THROTTLE_MS = 1000
 const MIN_THROTTLE_MS = 250
-// PR C — sendMessageDraft 30-second ephemeral persist-chain.
-//
-// Telegram's sendMessageDraft preview expires after 30 seconds. Long
-// LLM turns blow past that, leaving the user staring at a stale draft.
-// To stay live for arbitrary-length turns: at ~25s of accumulated
-// draft streaming (or when the unpersisted chunk approaches 4000 chars
-// — the per-message length cap with safety margin), fire a real
-// sendMessage with the current chunk. This persists what the user has
-// seen so far as a real message (with push notification). Then we
-// allocate a fresh draft_id and continue streaming the next chunk
-// into a new ephemeral preview. The model still sees a single
-// continuous turn; the user sees a CHAIN of persisted messages, each
-// up to ~25s / ~4000 chars, separated by live previews.
-//
-// At done=true / finalize(), the LAST unpersisted chunk is fired via
-// sendMessage so the final state of the response is durable.
-//
-// These triggers fire on top of the normal throttle loop — i.e., the
-// persist boundary is checked just before each draft fire, not on a
-// separate timer. This keeps the loop simple and avoids fighting with
-// the in-flight promise.
-const PERSIST_INTERVAL_MS = 25_000
-const PERSIST_SAFETY_CHAR_LIMIT = 4000
 /**
  * Send the first message in a stream. Receives the rendered text plus a
  * thread_id (forum topic) and returns the new Telegram message_id.
@@ -85,20 +53,8 @@ export type StreamSendFn = (text: string) => Promise<number>
  */
 export type StreamEditFn = (messageId: number, text: string) => Promise<void>
-/**
- * Optional sendMessageDraft callback. When present and the transport is
- * "draft", this is called instead of sendMessage/editMessageText.
- * Signature mirrors Telegram's sendMessageDraft Bot API method.
- */
-export type StreamDraftFn = (
-  chatId: string,
-  draftId: number,
-  text: string,
-  params?: { message_thread_id?: number },
-) => Promise<unknown>
 export interface DraftStreamConfig {
-  /** Throttle window in ms. Floored at 250. Default 1000. */
+  /** Throttle window in ms. Floored at 250. Default 400 for DMs, 1000 for groups. */
   throttleMs?: number
   /**
    * Maximum total characters before hard-stopping the stream. Default 4096
@@ -116,51 +72,21 @@ export interface DraftStreamConfig {
    *
    * Default 0 (no pre-send debounce — first update fires immediately).
    * Only affects the first send; subsequent edits use throttleMs.
-   *
-   * NOTE: This debounce only applies to message transport. Draft transport
-   * fires immediately on the first update because drafts are ephemeral —
-   * the throttle/flush loop already collapses bursts into 1 API call/sec
-   * via throttleMs.
    */
   idleMs?: number
   /**
-   * Transport selector.
-   * - "auto" (default): use draft transport when isPrivateChat=true AND
-   *   sendMessageDraft is provided; otherwise use message transport.
-   * - "draft": always prefer draft (falls back to message if sendMessageDraft absent).
-   * - "message": always use sendMessage/editMessageText.
-   */
-  previewTransport?: 'auto' | 'message' | 'draft'
-  /**
-   * True if the current chat is a private DM. Used by "auto" transport to
-   * decide whether to activate draft. Has no effect when previewTransport
-   * is "draft" or "message".
+   * True if the current chat is a private DM. Used to select the throttle
+   * default (400 ms for DMs vs 1000 ms for groups) when `throttleMs` is
+   * not explicitly provided. Has no effect when `throttleMs` is set.
    */
   isPrivateChat?: boolean
   /**
-   * sendMessageDraft callback. When absent, the stream falls back to
-   * sendMessage/editMessageText regardless of previewTransport.
-   */
-  sendMessageDraft?: StreamDraftFn
-  /**
-   * The Telegram chat id string — required when sendMessageDraft is provided,
-   * so the draft can be cleared on finalize.
+   * The Telegram chat id string — used for diagnostic traces.
    */
   chatId?: string
-  /**
-   * PR C — persist-chain interval override. Default 25_000 ms. Lower
-   * for tests; production should leave default.
-   */
-  persistIntervalMs?: number
-  /**
-   * PR C — persist-chain size threshold override (chars). Default 4000.
-   * Lower for tests so the size-trigger can fire on small text without
-   * colliding with the 4096-char maxChars hard-stop.
-   */
-  persistSizeLimit?: number
   /** Optional logger for debugging. Receives one string per event. */
   log?: (msg: string) => void
-  /** Optional warning logger. Used for transport fallback notices. */
+  /** Optional warning logger. Used for fallback notices. */
   warn?: (msg: string) => void
   /**
    * If set, the stream is initialized as if a previous send had landed
@@ -172,8 +98,8 @@ export interface DraftStreamConfig {
    * sendMessage. This closes the "done=true → activeDraftStreams entry
    * deleted → next emit creates fresh sendMessage" duplicate-message
    * class (issue #626). The not-found fallback at the edit site
-   * (line ~280: re-send on `MESSAGE_ID_INVALID`) gracefully handles a
-   * stale id — the bad edit fails once, then a fresh send fires.
+   * (re-send on `MESSAGE_ID_INVALID`) gracefully handles a stale id —
+   * the bad edit fails once, then a fresh send fires.
    */
   initialMessageId?: number | null
 }
@@ -205,92 +131,31 @@ export interface DraftStreamHandle {
  *
  * The first update() call invokes `send` to create the message. All
  * subsequent calls invoke `edit` against the captured message_id.
- *
- * When sendMessageDraft is provided (and transport allows it), intermediate
- * updates use the draft API instead of sendMessage/editMessageText. On
- * finalize(), a real sendMessage is sent for push notification, then the
- * draft is cleared best-effort.
+ * All streaming uses the sendMessage + editMessageText in-place engine.
  */
 export function createDraftStream(
   send: StreamSendFn,
   edit: StreamEditFn,
   config: DraftStreamConfig = {},
 ): DraftStreamHandle {
-  // PR B: transport-aware default — the actual transport resolves a few
-  // lines below, so we replicate the prefersDraft check here. An
-  // explicit `config.throttleMs` (from the operator yaml or the
-  // caller) wins.
-  const _willPreferDraft =
-    (config.previewTransport ?? 'auto') === 'draft' ||
-    ((config.previewTransport ?? 'auto') === 'auto' && config.isPrivateChat === true)
-  const _defaultForTransport = _willPreferDraft && config.sendMessageDraft != null
-    ? DEFAULT_DRAFT_THROTTLE_MS
-    : DEFAULT_MESSAGE_THROTTLE_MS
-  const throttleMs = Math.max(MIN_THROTTLE_MS, config.throttleMs ?? _defaultForTransport)
-  // PR C: persist-chain config overrides (testability — production
-  // leaves defaults at 25 s / 4000 chars).
-  const persistIntervalMs = config.persistIntervalMs ?? PERSIST_INTERVAL_MS
-  const persistSizeLimit = config.persistSizeLimit ?? PERSIST_SAFETY_CHAR_LIMIT
+  // Select throttle default: DMs get 400 ms (more responsive), groups get 1000 ms.
+  // An explicit `config.throttleMs` (from the operator yaml or the caller) always wins.
+  const _defaultThrottle = config.isPrivateChat === true
+    ? DEFAULT_DM_THROTTLE_MS
+    : DEFAULT_GROUP_THROTTLE_MS
+  const throttleMs = Math.max(MIN_THROTTLE_MS, config.throttleMs ?? _defaultThrottle)
   const maxChars = config.maxChars ?? TELEGRAM_MAX_CHARS
   const idleMs = Math.max(0, config.idleMs ?? 0)
   const log = config.log
   const warn = config.warn
-  const draftApi = config.sendMessageDraft
   const chatId = config.chatId ?? ''
-  // Resolve transport
-  const requestedTransport = config.previewTransport ?? 'auto'
-  const prefersDraft =
-    requestedTransport === 'draft'
-      ? true
-      : requestedTransport === 'message'
-        ? false
-        : (config.isPrivateChat === true) // 'auto': DM only
-  // Footgun guard: caller asked for "auto" + provided sendMessageDraft but
-  // forgot isPrivateChat. They almost certainly wanted draft in DMs but will
-  // silently get message transport everywhere. Warn so the bug is visible.
-  if (
-    requestedTransport === 'auto'
-    && draftApi != null
-    && config.isPrivateChat === undefined
-  ) {
-    warn?.('draft-stream: previewTransport="auto" with sendMessageDraft but isPrivateChat undefined — defaulting to message transport')
-  }
-  // Use draft transport only if we have the API
-  let usesDraftTransport = prefersDraft && draftApi != null
-  let draftId: number | undefined = usesDraftTransport
-    ? allocateDraftId()
-    : undefined
-  if (prefersDraft && !usesDraftTransport) {
-    warn?.('draft-stream: sendMessageDraft unavailable; falling back to sendMessage/editMessageText')
-  }
   // Stream-start trace — always-on, structured for grep + aggregation.
-  // Resolves WHY the chosen transport landed (req=auto|draft|message;
-  // dm=true|false|undef; api=available|absent). Gates the rest of the
-  // sendMessageDraft alignment PR sequence: without this we can't tell
-  // a draft-routing regression from a config-toggle change.
-  // Kill switch: SWITCHROOM_STREAM_TRACES=0.
   if (process.env.SWITCHROOM_STREAM_TRACES !== '0') {
-    const reason = usesDraftTransport
-      ? 'draft'
-      : requestedTransport === 'message'
-        ? 'explicit-message'
-        : requestedTransport === 'draft' && draftApi == null
-          ? 'draft-requested-but-no-api'
-          : !prefersDraft
-            ? 'auto-non-dm'
-            : 'fallback'
-    const draftIdPart = draftId != null ? ` draftId=${draftId}` : ''
     process.stderr.write(
-      `gw-trace stream-start transport=${usesDraftTransport ? 'draft' : 'message'} ` +
-        `reason=${reason} req=${requestedTransport} ` +
+      `gw-trace stream-start transport=message ` +
         `dm=${config.isPrivateChat === undefined ? 'undef' : String(config.isPrivateChat)} ` +
-        `api=${draftApi != null ? 'available' : 'absent'} ` +
-        `throttleMs=${throttleMs}${draftIdPart} ` +
+        `throttleMs=${throttleMs} ` +
         `chatId=${chatId || '-'}\n`,
     )
   }
@@ -300,29 +165,11 @@ export function createDraftStream(
   let lastSentText: string | null = null
   let lastSentAt = 0
   let inFlight: Promise<void> | null = null
-  // PR A observability — per-stream fire counters for the stream-end
-  // trace. draftFires/editFires/sendFires let the aggregator distinguish
-  // "stream used 80% draft + 20% edit fallback" vs "all edits, draft
-  // never fired". `firstFireAtMs` is the latency from stream-start to
-  // first wire send (matches TTFO sub-component for a single stream).
+  // Observability — per-stream fire counters for the stream-end trace.
   const streamStartedAt = Date.now()
   let firstFireAtMs: number | null = null
-  let draftFires = 0
   let editFires = 0
   let sendFires = 0
-  let fallbackFires = 0
-  // PR C — persist-chain state. `persistedTextLen` is the offset into
-  // the full cumulative model text that has already been committed to
-  // a real Telegram message via `sendMessage`. Subsequent draft fires
-  // send only the slice from `persistedTextLen` onward (the
-  // unpersisted tail). `currentChunkStartedAt` is when the CURRENT
-  // chunk (since last persist boundary) started streaming — drives
-  // the 25-second persist trigger. `persistChainFires` counts how
-  // many chunks have been persisted in this stream (always 0 for
-  // message-transport streams, only ticks for draft-transport).
-  let persistedTextLen = 0
-  let currentChunkStartedAt: number | null = null
-  let persistChainFires = 0
   let scheduledTimer: ReturnType<typeof setTimeout> | null = null
   let final = false
   let stopped = false
@@ -339,84 +186,6 @@ export function createDraftStream(
     }
   }
-  async function sendViaDraft(textToSend: string): Promise<boolean> {
-    if (!draftApi || draftId == null) return false
-    // PR C: draft sees only the unpersisted tail. If the model produced
-    // text BEYOND what's already been committed to a real sendMessage,
-    // that tail is what the user sees in the live preview. When the
-    // tail is empty (model hasn't added anything new since persist),
-    // there's nothing to draft — the draft was cleared at persist time.
-    const draftText = textToSend.slice(persistedTextLen)
-    if (draftText.length === 0) {
-      // Treat as success — no work to do, dedup will skip on next call.
-      return true
-    }
-    try {
-      const result = await draftApi(chatId, draftId, draftText)
-      // PR D: sendMessageDraft is documented to return `true` on success.
-      // A non-true (or missing) return is a soft failure — Telegram
-      // accepted the call but the draft didn't land. Fall back to
-      // message transport for the rest of this stream so the user still
-      // sees the content. This catches API surface changes + edge cases
-      // not covered by `shouldFallbackFromDraftTransport`'s regex.
-      if (result !== true && result !== undefined) {
-        // Some grammY wrappers strip the bool and return undefined on
-        // success; treat ONLY explicitly-falsy returns as failure to
-        // avoid false-positive fallback. true / undefined → success.
-        if (result === false || result === null) {
-          warn?.(
-            `draft-stream: sendMessageDraft returned non-true (${JSON.stringify(result)}) — falling back to message transport`,
-          )
-          fallbackFires++
-          usesDraftTransport = false
-          draftId = undefined
-          return false
-        }
-      }
-      if (firstFireAtMs == null) firstFireAtMs = Date.now() - streamStartedAt
-      // Mark the start of THIS chunk's persist window on first fire of
-      // each chunk (after the previous persist boundary).
-      if (currentChunkStartedAt == null) currentChunkStartedAt = Date.now()
-      draftFires++
-      log?.(`stream → draft (id: ${draftId}, ${draftText.length} chars tail)`)
-      return true
-    } catch (err) {
-      // PR D: dedicated 429 path. Telegram rate-limits sendMessageDraft
-      // independently from sendMessage/editMessageText. On 429:
-      //   - extract `retry_after`
-      //   - fall back to message transport for the rest of this stream
-      //   - bump `lastSentAt` so the throttle window absorbs the
-      //     retry_after delay — prevents the message-transport
-      //     fallback from immediately firing and getting 429'd too
-      //     (Telegram's per-chat rate cap is shared across methods).
-      const retryAfterSecs = extractDraft429RetryAfterSecs(err)
-      if (retryAfterSecs != null && isDraft429(err)) {
-        warn?.(
-          `draft-stream: sendMessageDraft 429 (retry_after=${retryAfterSecs}s) — falling back to message transport + backoff`,
-        )
-        fallbackFires++
-        usesDraftTransport = false
-        draftId = undefined
-        // Push lastSentAt forward so the NEXT flush waits at least
-        // `retry_after` seconds before the message-transport send.
-        // The throttle math at update() / schedule() compares
-        // `Date.now() - lastSentAt >= throttleMs`, so by moving
-        // lastSentAt forward we delay the next fire.
-        lastSentAt = Date.now() + retryAfterSecs * 1000 - throttleMs
-        return false
-      }
-      if (shouldFallbackFromDraftTransport(err)) {
-        const msg = err instanceof Error ? err.message : String(err)
-        warn?.(`draft-stream: sendMessageDraft rejected — falling back to sendMessage/editMessageText (${msg})`)
-        fallbackFires++
-        usesDraftTransport = false
-        draftId = undefined
-        return false
-      }
-      throw err
-    }
-  }
   async function flush(): Promise<void> {
     if (stopped) {
       notifyWaiters()
@@ -435,99 +204,16 @@ export function createDraftStream(
       return
     }
-    // PR C — persist-chain trigger check. Runs BEFORE the maxChars
-    // hard-stop so we can chunk large outputs across multiple
-    // sendMessage calls instead of dropping them. Only the draft
-    // path needs this; message transport edits the same id forever
-    // and the 4096-char cap is a real terminal stop there.
-    //
-    // The trigger fires when EITHER the current chunk has been
-    // streaming for ≥25s OR the unpersisted tail is approaching the
-    // 4000-char message length cap. On fire: send the chunk via
-    // real sendMessage, bump persistedTextLen, allocate a fresh
-    // draftId, reset the chunk window. The subsequent normal-flow
-    // draft fire below sends only the (now-empty or post-persist) tail.
-    if (usesDraftTransport && currentChunkStartedAt != null) {
-      const elapsed = Date.now() - currentChunkStartedAt
-      const tailLen = textToSend.length - persistedTextLen
-      const sizeApproaching = tailLen >= persistSizeLimit
-      const timeElapsed = elapsed >= persistIntervalMs
-      if ((timeElapsed || sizeApproaching) && tailLen > 0) {
-        const chunk = textToSend.slice(persistedTextLen)
-        try {
-          const newMsgId = await send(chunk)
-          messageId = newMsgId
-          persistedTextLen = textToSend.length
-          draftId = allocateDraftId()
-          currentChunkStartedAt = null
-          persistChainFires++
-          // PR follow-up: persist-chain's bare send() bypasses
-          // sendViaMessage's increment, same shape as the finalize-
-          // materialize bug. Without this, streams that cross the
-          // 25s / 4000-char boundary would under-report `sends` by
-          // the chain count in stream-end.
-          sendFires++
-          if (process.env.SWITCHROOM_STREAM_TRACES !== '0') {
-            process.stderr.write(
-              `gw-trace stream-persist chunk_chars=${chunk.length} ` +
-                `elapsed=${elapsed} reason=${timeElapsed ? 'time' : 'size'} ` +
-                `newMsgId=${newMsgId} newDraftId=${draftId} ` +
-                `chatId=${chatId || '-'}\n`,
-            )
-          }
-          log?.(`stream → persisted chunk (id: ${newMsgId}, ${chunk.length} chars, reason=${timeElapsed ? 'time' : 'size'})`)
-        } catch (err) {
-          // Persist failed — log and continue. The next flush re-
-          // evaluates the trigger and re-fires.
-          //
-          // Edge case (accepted as v1 ceiling): if `send(chunk)`
-          // actually LANDED on Telegram but the response/ack was lost
-          // (network blip), the retry will double-persist — the user
-          // sees the same chunk twice as two separate sendMessages.
-          // Telegram doesn't expose a sendMessage idempotency key. The
-          // user-visible artifact is "duplicate chunk", not data loss,
-          // and observed rate of lost-ACK is rare. PR D follow-up
-          // could add a per-chunk hash dedup on retry.
-          warn?.(
-            `draft-stream: persist sendMessage failed — chunk stays in draft (${err instanceof Error ? err.message : String(err)})`,
-          )
-        }
-      }
-    }
-    // Edge case: if the model RETRACTS cumulative text (rare — most
-    // LLM streams are strict-extension), `textToSend.length` may be
-    // less than `persistedTextLen`. `slice(persistedTextLen)` returns
-    // "" and the persist trigger's `tailLen > 0` guard short-circuits,
-    // so we silently skip. The live preview goes stale until the model
-    // re-extends past `persistedTextLen`. No crash, no double-send.
-    // Tolerated as the failure mode is benign and the cause is upstream.
-    // Hard-stop check — applies to the sendable size (full text for
-    // message transport, post-persist tail for draft transport). After
-    // a successful persist, the tail resets so this won't fire even
-    // for huge cumulative texts in the draft path.
-    const sendableLen = usesDraftTransport
-      ? textToSend.length - persistedTextLen
-      : textToSend.length
-    if (sendableLen > maxChars) {
-      log?.(`stream stopped: ${usesDraftTransport ? 'tail' : 'text'} exceeds ${maxChars} chars`)
+    // Hard-stop check
+    if (textToSend.length > maxChars) {
+      log?.(`stream stopped: text exceeds ${maxChars} chars`)
       stopped = true
       notifyWaiters()
       return
     }
     try {
-      if (usesDraftTransport) {
-        const ok = await sendViaDraft(textToSend)
-        if (!ok) {
-          // Draft failed with a permanent error → fell back to message transport.
-          // Replay this text via message transport.
-          await sendViaMessage(textToSend)
-        }
-      } else {
-        await sendViaMessage(textToSend)
-      }
+      await sendViaMessage(textToSend)
       lastSentText = textToSend
       lastSentAt = Date.now()
     } catch (err) {
@@ -601,9 +287,9 @@ export function createDraftStream(
       // Pre-send idle debounce: for the FIRST send of a stream, optionally
       // defer by idleMs so a burst of update() calls collapses into one
       // send. Each incoming update resets the timer. Once the initial
-      // send has landed (messageId != null OR draft has fired), this path
-      // is skipped and the regular throttle kicks in.
-      if (idleMs > 0 && messageId == null && !usesDraftTransport && inFlight == null) {
+      // send has landed (messageId != null), this path is skipped and
+      // the regular throttle kicks in.
+      if (idleMs > 0 && messageId == null && inFlight == null) {
         if (scheduledTimer != null) clearTimeout(scheduledTimer)
         scheduledTimer = setTimeout(() => {
           scheduledTimer = null
@@ -656,63 +342,14 @@ export function createDraftStream(
         await flush()
       }
-      // Draft transport: materialize as a real sendMessage for push
-      // notification, then clear the draft best-effort.
-      //
-      // PR C: with the persist-chain in play, earlier chunks may
-      // already be persisted as their own sendMessages. We materialize
-      // ONLY the unpersisted tail here — otherwise the user gets a
-      // duplicate of the prior chunks at turn end.
-      if (usesDraftTransport && draftApi != null) {
-        const fullText = lastSentText ?? ''
-        const textToMaterialize = fullText.slice(persistedTextLen)
-        if (textToMaterialize.length > 0) {
-          try {
-            messageId = await send(textToMaterialize)
-            persistedTextLen = fullText.length
-            // PR follow-up: bump sendFires so the stream-end trace
-            // reflects the finalize-materialize sendMessage call. Pre-
-            // this fix, the counter under-reported by 1 for every
-            // draft-transport stream that produced a non-empty reply:
-            // gw-trace stream-end showed `drafts=N sends=0` even
-            // though sendMessage HAD fired (visible in tg-post lines).
-            sendFires++
-            log?.(`stream → materialized tail (id: ${messageId}, ${textToMaterialize.length} chars)`)
-          } catch (err) {
-            warn?.(`draft-stream: materialize sendMessage failed: ${err instanceof Error ? err.message : String(err)}`)
-          }
-          // Clear draft best-effort (cosmetic — Telegram input area cleanup)
-          if (draftId != null) {
-            try {
-              await draftApi(chatId, draftId, '')
-            } catch {
-              // Best-effort — ignore failures
-            }
-          }
-        } else if (draftId != null) {
-          // Whole text already persisted via the chain — just clear the
-          // current draft so the input area isn't left with stale
-          // preview content.
-          try {
-            await draftApi(chatId, draftId, '')
-          } catch {
-            // Best-effort — ignore
-          }
-        }
-      }
       log?.(`stream finalized (id: ${messageId})`)
-      // Stream-end trace — pairs with stream-start. `drafts`/`edits`/
-      // `sends` lets the aggregator see the transport ratio per stream;
-      // `firstFireMs` is the per-stream send latency component of TTFO;
-      // `chars` is the final committed text length.
+      // Stream-end trace — pairs with stream-start.
       if (process.env.SWITCHROOM_STREAM_TRACES !== '0') {
         const durationMs = Date.now() - streamStartedAt
         process.stderr.write(
-          `gw-trace stream-end transport=${usesDraftTransport ? 'draft' : 'message'} ` +
-            `drafts=${draftFires} sends=${sendFires} edits=${editFires} ` +
-            `fallbacks=${fallbackFires} persists=${persistChainFires} ` +
+          `gw-trace stream-end transport=message ` +
+            `sends=${sendFires} edits=${editFires} ` +
             `firstFireMs=${firstFireAtMs ?? -1} durationMs=${durationMs} ` +
             `chars=${(lastSentText ?? '').length} ` +
             `chatId=${chatId || '-'}\n`,

package/telegram-plugin/final-answer-detect.ts CHANGED Viewed

@@ -3,9 +3,9 @@
  *
  * Background. An agent often ends a turn with its real answer as plain
  * assistant transcript text instead of a `reply` / `stream_reply` tool
- * call. The gateway renders that transcript as a live Telegram draft
- * (`sendMessageDraft`) and, at turn_end, retracts the draft — so the
- * answer is never finalized and the user watches it vanish (#1664).
+ * call. The gateway renders that transcript via the answer-lane stream
+ * and, at turn_end, retracts the preview — so the answer is never
+ * finalized and the user watches it vanish (#1664).
  *
  * The gateway's `replyCalled` flag flips on the FIRST reply / stream_reply
  * tool use and stays true for the rest of the turn. It cannot distinguish
@@ -23,9 +23,9 @@
  * gateway is a multi-thousand-line module that's expensive to import in a
  * test. See `telegram-plugin/tests/final-answer-detect.test.ts`.
  *
- * The fix re-prompts the model; it never materializes the draft into a
- * message (`reference/principles.md`: the model communicates, the
- * framework is the safety net). So a false "interim" classification is
+ * The fix re-prompts the model; it never silently drops the answer
+ * (`reference/principles.md`: the model communicates, the framework is
+ * the safety net). So a false "interim" classification is
  * cheap (one extra re-prompt) and a false "final" classification is the
  * dangerous one (a real answer left undelivered) — the length backstop
  * exists to make the dangerous miss rare.
@@ -103,12 +103,17 @@ export function isFinalAnswerReply(input: FinalAnswerReplyInput): boolean {
  * otherwise the silent-end re-prompt would spuriously fire and the agent
  * would re-deliver a duplicate / garbled answer.
  *
- * Residual: a reply that is genuinely the final answer yet is BOTH short
- * (<200 chars) AND pinging (e.g. "Done!") is indistinguishable here from
- * an ack, so post-answer housekeeping after it still re-opens the feed.
- * That is much rarer than the housekeeping-after-long-answer case this
- * predicate protects, and is kill-switchable via
- * `SWITCHROOM_FEED_REOPEN_AFTER_ACK=0`.
+ * Residual (pre-existing, predates PR-2 — a conscious accept, no regression):
+ * a reply that is genuinely the final answer yet is BOTH short (<200 chars)
+ * AND pinging (e.g. "Done!") is indistinguishable here from an ack. So when
+ * such an answer arrives AFTER an ack has already pinged this turn, it
+ * classifies as an ack and its ping is suppressed (and post-answer
+ * housekeeping after it still re-opens the feed). PR-2's slot-ownership
+ * upgrade does NOT rescue this case — the upgrade only fires for a
+ * *substantive* answer, and this answer reads as non-substantive by the
+ * ≥200-char test. That is much rarer than the housekeeping-after-long-answer
+ * case this predicate protects, and the feed-reopen half is kill-switchable
+ * via `SWITCHROOM_FEED_REOPEN_AFTER_ACK=0`.
  */
 export function isSubstantiveFinalReply(input: FinalAnswerReplyInput): boolean {
   if (input.done === true) return true