npm - switchroom - Versions diffs - 0.12.29 → 0.13.0 - Mend

switchroom 0.12.29 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/dist/agent-scheduler/index.js +81 -80
package/dist/auth-broker/index.js +81 -80
package/dist/cli/drive-write-pretool.mjs +10 -10
package/dist/cli/skill-validate-pretool.mjs +72 -72
package/dist/cli/switchroom.js +361 -357
package/dist/host-control/main.js +100 -99
package/dist/vault/approvals/kernel-server.js +83 -82
package/dist/vault/broker/server.js +84 -83
package/package.json +1 -1
package/telegram-plugin/dist/bridge/bridge.js +112 -112
package/telegram-plugin/dist/gateway/gateway.js +326 -204
package/telegram-plugin/dist/server.js +160 -160
package/telegram-plugin/draft-stream.ts +287 -11
package/telegram-plugin/draft-transport.ts +50 -0
package/telegram-plugin/gateway/gateway.ts +26 -1
package/telegram-plugin/stream-reply-handler.ts +3 -1
package/telegram-plugin/tests/draft-stream.test.ts +453 -0
package/telegram-plugin/tests/draft-transport.test.ts +70 -0

package/telegram-plugin/draft-stream.ts CHANGED Viewed

@@ -32,12 +32,48 @@
 import {
   shouldFallbackFromDraftTransport,
   allocateDraftId,
+  isDraft429,
+  extractDraft429RetryAfterSecs,
 } from './draft-transport.js'
 const TELEGRAM_MAX_CHARS = 4096
-const DEFAULT_THROTTLE_MS = 1000
+// PR B: transport-aware defaults.
+//   Draft transport (DMs): 300 ms — drafts are ephemeral and don't share
+//     editMessageText's per-message rate cap, so we can refresh much faster.
+//     300 ms feels live without burning bandwidth.
+//   Message transport (groups / forums / draft API absent): 1000 ms — must
+//     respect Telegram's "1 edit/sec/message" practical ceiling.
+// Both defaults can be overridden per-stream via `config.throttleMs` (which
+// is itself wired from `channels.telegram.stream_throttle_ms` in the agent
+// yaml, via the SWITCHROOM_TG_STREAM_THROTTLE_MS env var the gateway reads).
+const DEFAULT_DRAFT_THROTTLE_MS = 300
+const DEFAULT_MESSAGE_THROTTLE_MS = 1000
 const MIN_THROTTLE_MS = 250
+// PR C — sendMessageDraft 30-second ephemeral persist-chain.
+//
+// Telegram's sendMessageDraft preview expires after 30 seconds. Long
+// LLM turns blow past that, leaving the user staring at a stale draft.
+// To stay live for arbitrary-length turns: at ~25s of accumulated
+// draft streaming (or when the unpersisted chunk approaches 4000 chars
+// — the per-message length cap with safety margin), fire a real
+// sendMessage with the current chunk. This persists what the user has
+// seen so far as a real message (with push notification). Then we
+// allocate a fresh draft_id and continue streaming the next chunk
+// into a new ephemeral preview. The model still sees a single
+// continuous turn; the user sees a CHAIN of persisted messages, each
+// up to ~25s / ~4000 chars, separated by live previews.
+//
+// At done=true / finalize(), the LAST unpersisted chunk is fired via
+// sendMessage so the final state of the response is durable.
+//
+// These triggers fire on top of the normal throttle loop — i.e., the
+// persist boundary is checked just before each draft fire, not on a
+// separate timer. This keeps the loop simple and avoids fighting with
+// the in-flight promise.
+const PERSIST_INTERVAL_MS = 25_000
+const PERSIST_SAFETY_CHAR_LIMIT = 4000
 /**
  * Send the first message in a stream. Receives the rendered text plus a
  * thread_id (forum topic) and returns the new Telegram message_id.
@@ -111,6 +147,17 @@ export interface DraftStreamConfig {
    * so the draft can be cleared on finalize.
    */
   chatId?: string
+  /**
+   * PR C — persist-chain interval override. Default 25_000 ms. Lower
+   * for tests; production should leave default.
+   */
+  persistIntervalMs?: number
+  /**
+   * PR C — persist-chain size threshold override (chars). Default 4000.
+   * Lower for tests so the size-trigger can fire on small text without
+   * colliding with the 4096-char maxChars hard-stop.
+   */
+  persistSizeLimit?: number
   /** Optional logger for debugging. Receives one string per event. */
   log?: (msg: string) => void
   /** Optional warning logger. Used for transport fallback notices. */
@@ -169,7 +216,21 @@ export function createDraftStream(
   edit: StreamEditFn,
   config: DraftStreamConfig = {},
 ): DraftStreamHandle {
-  const throttleMs = Math.max(MIN_THROTTLE_MS, config.throttleMs ?? DEFAULT_THROTTLE_MS)
+  // PR B: transport-aware default — the actual transport resolves a few
+  // lines below, so we replicate the prefersDraft check here. An
+  // explicit `config.throttleMs` (from the operator yaml or the
+  // caller) wins.
+  const _willPreferDraft =
+    (config.previewTransport ?? 'auto') === 'draft' ||
+    ((config.previewTransport ?? 'auto') === 'auto' && config.isPrivateChat === true)
+  const _defaultForTransport = _willPreferDraft && config.sendMessageDraft != null
+    ? DEFAULT_DRAFT_THROTTLE_MS
+    : DEFAULT_MESSAGE_THROTTLE_MS
+  const throttleMs = Math.max(MIN_THROTTLE_MS, config.throttleMs ?? _defaultForTransport)
+  // PR C: persist-chain config overrides (testability — production
+  // leaves defaults at 25 s / 4000 chars).
+  const persistIntervalMs = config.persistIntervalMs ?? PERSIST_INTERVAL_MS
+  const persistSizeLimit = config.persistSizeLimit ?? PERSIST_SAFETY_CHAR_LIMIT
   const maxChars = config.maxChars ?? TELEGRAM_MAX_CHARS
   const idleMs = Math.max(0, config.idleMs ?? 0)
   const log = config.log
@@ -207,11 +268,61 @@ export function createDraftStream(
     warn?.('draft-stream: sendMessageDraft unavailable; falling back to sendMessage/editMessageText')
   }
+  // Stream-start trace — always-on, structured for grep + aggregation.
+  // Resolves WHY the chosen transport landed (req=auto|draft|message;
+  // dm=true|false|undef; api=available|absent). Gates the rest of the
+  // sendMessageDraft alignment PR sequence: without this we can't tell
+  // a draft-routing regression from a config-toggle change.
+  // Kill switch: SWITCHROOM_STREAM_TRACES=0.
+  if (process.env.SWITCHROOM_STREAM_TRACES !== '0') {
+    const reason = usesDraftTransport
+      ? 'draft'
+      : requestedTransport === 'message'
+        ? 'explicit-message'
+        : requestedTransport === 'draft' && draftApi == null
+          ? 'draft-requested-but-no-api'
+          : !prefersDraft
+            ? 'auto-non-dm'
+            : 'fallback'
+    const draftIdPart = draftId != null ? ` draftId=${draftId}` : ''
+    process.stderr.write(
+      `gw-trace stream-start transport=${usesDraftTransport ? 'draft' : 'message'} ` +
+        `reason=${reason} req=${requestedTransport} ` +
+        `dm=${config.isPrivateChat === undefined ? 'undef' : String(config.isPrivateChat)} ` +
+        `api=${draftApi != null ? 'available' : 'absent'} ` +
+        `throttleMs=${throttleMs}${draftIdPart} ` +
+        `chatId=${chatId || '-'}\n`,
+    )
+  }
   let messageId: number | null = config.initialMessageId ?? null
   let pendingText: string | null = null
   let lastSentText: string | null = null
   let lastSentAt = 0
   let inFlight: Promise<void> | null = null
+  // PR A observability — per-stream fire counters for the stream-end
+  // trace. draftFires/editFires/sendFires let the aggregator distinguish
+  // "stream used 80% draft + 20% edit fallback" vs "all edits, draft
+  // never fired". `firstFireAtMs` is the latency from stream-start to
+  // first wire send (matches TTFO sub-component for a single stream).
+  const streamStartedAt = Date.now()
+  let firstFireAtMs: number | null = null
+  let draftFires = 0
+  let editFires = 0
+  let sendFires = 0
+  let fallbackFires = 0
+  // PR C — persist-chain state. `persistedTextLen` is the offset into
+  // the full cumulative model text that has already been committed to
+  // a real Telegram message via `sendMessage`. Subsequent draft fires
+  // send only the slice from `persistedTextLen` onward (the
+  // unpersisted tail). `currentChunkStartedAt` is when the CURRENT
+  // chunk (since last persist boundary) started streaming — drives
+  // the 25-second persist trigger. `persistChainFires` counts how
+  // many chunks have been persisted in this stream (always 0 for
+  // message-transport streams, only ticks for draft-transport).
+  let persistedTextLen = 0
+  let currentChunkStartedAt: number | null = null
+  let persistChainFires = 0
   let scheduledTimer: ReturnType<typeof setTimeout> | null = null
   let final = false
   let stopped = false
@@ -230,14 +341,74 @@ export function createDraftStream(
   async function sendViaDraft(textToSend: string): Promise<boolean> {
     if (!draftApi || draftId == null) return false
+    // PR C: draft sees only the unpersisted tail. If the model produced
+    // text BEYOND what's already been committed to a real sendMessage,
+    // that tail is what the user sees in the live preview. When the
+    // tail is empty (model hasn't added anything new since persist),
+    // there's nothing to draft — the draft was cleared at persist time.
+    const draftText = textToSend.slice(persistedTextLen)
+    if (draftText.length === 0) {
+      // Treat as success — no work to do, dedup will skip on next call.
+      return true
+    }
     try {
-      await draftApi(chatId, draftId, textToSend)
-      log?.(`stream → draft (id: ${draftId}, ${textToSend.length} chars)`)
+      const result = await draftApi(chatId, draftId, draftText)
+      // PR D: sendMessageDraft is documented to return `true` on success.
+      // A non-true (or missing) return is a soft failure — Telegram
+      // accepted the call but the draft didn't land. Fall back to
+      // message transport for the rest of this stream so the user still
+      // sees the content. This catches API surface changes + edge cases
+      // not covered by `shouldFallbackFromDraftTransport`'s regex.
+      if (result !== true && result !== undefined) {
+        // Some grammY wrappers strip the bool and return undefined on
+        // success; treat ONLY explicitly-falsy returns as failure to
+        // avoid false-positive fallback. true / undefined → success.
+        if (result === false || result === null) {
+          warn?.(
+            `draft-stream: sendMessageDraft returned non-true (${JSON.stringify(result)}) — falling back to message transport`,
+          )
+          fallbackFires++
+          usesDraftTransport = false
+          draftId = undefined
+          return false
+        }
+      }
+      if (firstFireAtMs == null) firstFireAtMs = Date.now() - streamStartedAt
+      // Mark the start of THIS chunk's persist window on first fire of
+      // each chunk (after the previous persist boundary).
+      if (currentChunkStartedAt == null) currentChunkStartedAt = Date.now()
+      draftFires++
+      log?.(`stream → draft (id: ${draftId}, ${draftText.length} chars tail)`)
       return true
     } catch (err) {
+      // PR D: dedicated 429 path. Telegram rate-limits sendMessageDraft
+      // independently from sendMessage/editMessageText. On 429:
+      //   - extract `retry_after`
+      //   - fall back to message transport for the rest of this stream
+      //   - bump `lastSentAt` so the throttle window absorbs the
+      //     retry_after delay — prevents the message-transport
+      //     fallback from immediately firing and getting 429'd too
+      //     (Telegram's per-chat rate cap is shared across methods).
+      const retryAfterSecs = extractDraft429RetryAfterSecs(err)
+      if (retryAfterSecs != null && isDraft429(err)) {
+        warn?.(
+          `draft-stream: sendMessageDraft 429 (retry_after=${retryAfterSecs}s) — falling back to message transport + backoff`,
+        )
+        fallbackFires++
+        usesDraftTransport = false
+        draftId = undefined
+        // Push lastSentAt forward so the NEXT flush waits at least
+        // `retry_after` seconds before the message-transport send.
+        // The throttle math at update() / schedule() compares
+        // `Date.now() - lastSentAt >= throttleMs`, so by moving
+        // lastSentAt forward we delay the next fire.
+        lastSentAt = Date.now() + retryAfterSecs * 1000 - throttleMs
+        return false
+      }
       if (shouldFallbackFromDraftTransport(err)) {
         const msg = err instanceof Error ? err.message : String(err)
         warn?.(`draft-stream: sendMessageDraft rejected — falling back to sendMessage/editMessageText (${msg})`)
+        fallbackFires++
         usesDraftTransport = false
         draftId = undefined
         return false
@@ -264,8 +435,77 @@ export function createDraftStream(
       return
     }
-    if (textToSend.length > maxChars) {
-      log?.(`stream stopped: text exceeds ${maxChars} chars`)
+    // PR C — persist-chain trigger check. Runs BEFORE the maxChars
+    // hard-stop so we can chunk large outputs across multiple
+    // sendMessage calls instead of dropping them. Only the draft
+    // path needs this; message transport edits the same id forever
+    // and the 4096-char cap is a real terminal stop there.
+    //
+    // The trigger fires when EITHER the current chunk has been
+    // streaming for ≥25s OR the unpersisted tail is approaching the
+    // 4000-char message length cap. On fire: send the chunk via
+    // real sendMessage, bump persistedTextLen, allocate a fresh
+    // draftId, reset the chunk window. The subsequent normal-flow
+    // draft fire below sends only the (now-empty or post-persist) tail.
+    if (usesDraftTransport && currentChunkStartedAt != null) {
+      const elapsed = Date.now() - currentChunkStartedAt
+      const tailLen = textToSend.length - persistedTextLen
+      const sizeApproaching = tailLen >= persistSizeLimit
+      const timeElapsed = elapsed >= persistIntervalMs
+      if ((timeElapsed || sizeApproaching) && tailLen > 0) {
+        const chunk = textToSend.slice(persistedTextLen)
+        try {
+          const newMsgId = await send(chunk)
+          messageId = newMsgId
+          persistedTextLen = textToSend.length
+          draftId = allocateDraftId()
+          currentChunkStartedAt = null
+          persistChainFires++
+          if (process.env.SWITCHROOM_STREAM_TRACES !== '0') {
+            process.stderr.write(
+              `gw-trace stream-persist chunk_chars=${chunk.length} ` +
+                `elapsed=${elapsed} reason=${timeElapsed ? 'time' : 'size'} ` +
+                `newMsgId=${newMsgId} newDraftId=${draftId} ` +
+                `chatId=${chatId || '-'}\n`,
+            )
+          }
+          log?.(`stream → persisted chunk (id: ${newMsgId}, ${chunk.length} chars, reason=${timeElapsed ? 'time' : 'size'})`)
+        } catch (err) {
+          // Persist failed — log and continue. The next flush re-
+          // evaluates the trigger and re-fires.
+          //
+          // Edge case (accepted as v1 ceiling): if `send(chunk)`
+          // actually LANDED on Telegram but the response/ack was lost
+          // (network blip), the retry will double-persist — the user
+          // sees the same chunk twice as two separate sendMessages.
+          // Telegram doesn't expose a sendMessage idempotency key. The
+          // user-visible artifact is "duplicate chunk", not data loss,
+          // and observed rate of lost-ACK is rare. PR D follow-up
+          // could add a per-chunk hash dedup on retry.
+          warn?.(
+            `draft-stream: persist sendMessage failed — chunk stays in draft (${err instanceof Error ? err.message : String(err)})`,
+          )
+        }
+      }
+    }
+    // Edge case: if the model RETRACTS cumulative text (rare — most
+    // LLM streams are strict-extension), `textToSend.length` may be
+    // less than `persistedTextLen`. `slice(persistedTextLen)` returns
+    // "" and the persist trigger's `tailLen > 0` guard short-circuits,
+    // so we silently skip. The live preview goes stale until the model
+    // re-extends past `persistedTextLen`. No crash, no double-send.
+    // Tolerated as the failure mode is benign and the cause is upstream.
+    // Hard-stop check — applies to the sendable size (full text for
+    // message transport, post-persist tail for draft transport). After
+    // a successful persist, the tail resets so this won't fire even
+    // for huge cumulative texts in the draft path.
+    const sendableLen = usesDraftTransport
+      ? textToSend.length - persistedTextLen
+      : textToSend.length
+    if (sendableLen > maxChars) {
+      log?.(`stream stopped: ${usesDraftTransport ? 'tail' : 'text'} exceeds ${maxChars} chars`)
       stopped = true
       notifyWaiters()
       return
@@ -309,9 +549,13 @@ export function createDraftStream(
   async function sendViaMessage(textToSend: string): Promise<void> {
     if (messageId == null) {
       messageId = await send(textToSend)
+      if (firstFireAtMs == null) firstFireAtMs = Date.now() - streamStartedAt
+      sendFires++
       log?.(`stream → sent (id: ${messageId}, ${textToSend.length} chars)`)
     } else {
       await edit(messageId, textToSend)
+      if (firstFireAtMs == null) firstFireAtMs = Date.now() - streamStartedAt
+      editFires++
       log?.(`stream → edited (id: ${messageId}, ${textToSend.length} chars)`)
     }
   }
@@ -406,14 +650,21 @@ export function createDraftStream(
         await flush()
       }
-      // Draft transport: materialize as a real sendMessage for push notification,
-      // then clear the draft best-effort.
+      // Draft transport: materialize as a real sendMessage for push
+      // notification, then clear the draft best-effort.
+      //
+      // PR C: with the persist-chain in play, earlier chunks may
+      // already be persisted as their own sendMessages. We materialize
+      // ONLY the unpersisted tail here — otherwise the user gets a
+      // duplicate of the prior chunks at turn end.
       if (usesDraftTransport && draftApi != null) {
-        const textToMaterialize = lastSentText
-        if (textToMaterialize) {
+        const fullText = lastSentText ?? ''
+        const textToMaterialize = fullText.slice(persistedTextLen)
+        if (textToMaterialize.length > 0) {
           try {
             messageId = await send(textToMaterialize)
-            log?.(`stream → materialized (id: ${messageId}, ${textToMaterialize.length} chars)`)
+            persistedTextLen = fullText.length
+            log?.(`stream → materialized tail (id: ${messageId}, ${textToMaterialize.length} chars)`)
           } catch (err) {
             warn?.(`draft-stream: materialize sendMessage failed: ${err instanceof Error ? err.message : String(err)}`)
           }
@@ -425,10 +676,35 @@ export function createDraftStream(
               // Best-effort — ignore failures
             }
           }
+        } else if (draftId != null) {
+          // Whole text already persisted via the chain — just clear the
+          // current draft so the input area isn't left with stale
+          // preview content.
+          try {
+            await draftApi(chatId, draftId, '')
+          } catch {
+            // Best-effort — ignore
+          }
         }
       }
       log?.(`stream finalized (id: ${messageId})`)
+      // Stream-end trace — pairs with stream-start. `drafts`/`edits`/
+      // `sends` lets the aggregator see the transport ratio per stream;
+      // `firstFireMs` is the per-stream send latency component of TTFO;
+      // `chars` is the final committed text length.
+      if (process.env.SWITCHROOM_STREAM_TRACES !== '0') {
+        const durationMs = Date.now() - streamStartedAt
+        process.stderr.write(
+          `gw-trace stream-end transport=${usesDraftTransport ? 'draft' : 'message'} ` +
+            `drafts=${draftFires} sends=${sendFires} edits=${editFires} ` +
+            `fallbacks=${fallbackFires} persists=${persistChainFires} ` +
+            `firstFireMs=${firstFireAtMs ?? -1} durationMs=${durationMs} ` +
+            `chars=${(lastSentText ?? '').length} ` +
+            `chatId=${chatId || '-'}\n`,
+        )
+      }
     },
     getMessageId(): number | null {

package/telegram-plugin/draft-transport.ts CHANGED Viewed

@@ -34,6 +34,56 @@ export function shouldFallbackFromDraftTransport(err: unknown): boolean {
   return DRAFT_METHOD_UNAVAILABLE_RE.test(text) || DRAFT_CHAT_UNSUPPORTED_RE.test(text)
 }
+/**
+ * PR D — extract the `retry_after` seconds from a grammY 429 error.
+ * Returns null when the error isn't a 429 (or has no retry_after).
+ *
+ * Shared with `issues-card.ts:extractRetryAfterSecs`. Duck-typed on the
+ * documented grammY `GrammyError` shape to keep this module
+ * test-friendly without importing `GrammyError` directly.
+ */
+export function extractDraft429RetryAfterSecs(err: unknown): number | null {
+  if (err == null || typeof err !== 'object') return null
+  const e = err as { error_code?: unknown; parameters?: { retry_after?: unknown } }
+  if (e.error_code !== 429) return null
+  const ra = e.parameters?.retry_after
+  if (typeof ra === 'number' && Number.isFinite(ra) && ra > 0) return ra
+  return null
+}
+/**
+ * PR D — was this a 429 from `sendMessageDraft` specifically? Used by
+ * draft-stream to differentiate "draft is rate-limited" (transient,
+ * just back off this stream) from a non-429 send error (handled
+ * separately by `shouldFallbackFromDraftTransport`).
+ *
+ * Both cases trigger fallback to message transport for the rest of
+ * the stream, but the 429 case ALSO bumps the throttle window to
+ * honor Telegram's `retry_after` — so the message-transport fallback
+ * doesn't immediately fire a fresh send before Telegram's cooldown
+ * elapses and re-429s.
+ */
+export function isDraft429(err: unknown): boolean {
+  if (extractDraft429RetryAfterSecs(err) == null) return false
+  // grammY GrammyError carries the method name in its `method` field.
+  // Best-effort: match either the structured method or the error text.
+  if (typeof err === 'object' && err != null && 'method' in err) {
+    const m = (err as { method?: unknown }).method
+    if (typeof m === 'string' && /sendMessageDraft/i.test(m)) return true
+  }
+  const text =
+    typeof err === 'string'
+      ? err
+      : err instanceof Error
+        ? err.message
+        : typeof err === 'object' && err != null && 'description' in err
+          ? typeof (err as { description: unknown }).description === 'string'
+            ? (err as { description: string }).description
+            : ''
+          : ''
+  return /sendMessageDraft/i.test(text)
+}
 /**
  * Symbol-keyed shared counter for draft-id allocation across concurrent
  * streams (mirrors openclaw's getDraftStreamState). Using Symbol.for ensures

package/telegram-plugin/gateway/gateway.ts CHANGED Viewed

@@ -2786,6 +2786,17 @@ function postLegacyBanner(
 // short-circuit to no-ops at runtime. `progressDriver` is typed `any`
 // so TS doesn't resolve `progressDriver?.X` to `never`.
 const streamMode = process.env.SWITCHROOM_TG_STREAM_MODE ?? 'checklist'
+// PR B: per-agent stream throttle override via channels.telegram.stream_throttle_ms.
+// When unset, draft-stream.ts applies transport-aware defaults (300 ms draft,
+// 1000 ms message). Parsed once at boot; sub-zero / NaN values fall back to
+// undefined so the per-transport default wins. See `src/agents/scaffold.ts`
+// `channelsToEnv()` for the yaml → env wiring.
+const STREAM_THROTTLE_MS_OVERRIDE: number | undefined = (() => {
+  const raw = process.env.SWITCHROOM_TG_STREAM_THROTTLE_MS
+  if (raw == null || raw === '') return undefined
+  const n = Number.parseInt(raw, 10)
+  return Number.isFinite(n) && n >= 0 ? n : undefined
+})()
 const TURN_FLUSH_SAFETY_ENABLED = isTurnFlushSafetyEnabled()
 // eslint-disable-next-line @typescript-eslint/no-explicit-any
 const progressDriver: any = null
@@ -4471,7 +4482,13 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
       recordOutbound,
       ...(HISTORY_ENABLED ? { getLatestInboundMessageId } : {}),
       writeError: (line) => process.stderr.write(line),
-      throttleMs: 600,
+      // PR B: drop the legacy 600 ms compromise. When the operator sets
+      // `channels.telegram.stream_throttle_ms` in yaml, the env override
+      // wins; otherwise draft-stream's transport-aware default fires
+      // (300 ms draft / 1000 ms message). `throttleMs: undefined` is a
+      // signal — handlers downgrade to `?? undefined`, which then
+      // passes through to draft-stream where the default applies.
+      ...(STREAM_THROTTLE_MS_OVERRIDE != null ? { throttleMs: STREAM_THROTTLE_MS_OVERRIDE } : {}),
       progressCardActive: streamMode === 'checklist',
     },
   )
@@ -6418,6 +6435,14 @@ function handlePtyActivity(text: string): void {
       historyEnabled: false,
       recordOutbound,
       writeError: (line) => process.stderr.write(line),
+      // PR B note: this is the PTY-activity stream, NOT the LLM
+      // stream_reply path. PTY drives many tiny partials as a TUI
+      // re-renders; 600 ms is a deliberate compromise tuned for the
+      // PTY flicker characteristics, not LLM token cadence. The
+      // transport-aware defaults (300/1000) deliberately do NOT
+      // apply here. If you change this, also check
+      // telegram-plugin/pty-partial-handler.ts:159 which has the
+      // same value for the same reason.
       throttleMs: 600,
     },
   ).catch((err) => {

package/telegram-plugin/stream-reply-handler.ts CHANGED Viewed

@@ -514,7 +514,9 @@ export async function handleStreamReply(
       threadId,
       parseMode,
       disableLinkPreview: deps.disableLinkPreview,
-      throttleMs: deps.throttleMs ?? 600,
+      // PR B: pass undefined when caller didn't override, so draft-stream's
+      // transport-aware default (300 ms draft / 1000 ms message) wins.
+      ...(deps.throttleMs != null ? { throttleMs: deps.throttleMs } : {}),
       retry: deps.retry,
       ...(replyToMessageId != null ? { replyToMessageId } : {}),
       ...(args.quote_text != null && replyToMessageId != null ? { quoteText: args.quote_text } : {}),