switchroom 0.12.29 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,12 +32,48 @@
32
32
  import {
33
33
  shouldFallbackFromDraftTransport,
34
34
  allocateDraftId,
35
+ isDraft429,
36
+ extractDraft429RetryAfterSecs,
35
37
  } from './draft-transport.js'
36
38
 
37
39
  const TELEGRAM_MAX_CHARS = 4096
38
- const DEFAULT_THROTTLE_MS = 1000
40
+ // PR B: transport-aware defaults.
41
+ // Draft transport (DMs): 300 ms — drafts are ephemeral and don't share
42
+ // editMessageText's per-message rate cap, so we can refresh much faster.
43
+ // 300 ms feels live without burning bandwidth.
44
+ // Message transport (groups / forums / draft API absent): 1000 ms — must
45
+ // respect Telegram's "1 edit/sec/message" practical ceiling.
46
+ // Both defaults can be overridden per-stream via `config.throttleMs` (which
47
+ // is itself wired from `channels.telegram.stream_throttle_ms` in the agent
48
+ // yaml, via the SWITCHROOM_TG_STREAM_THROTTLE_MS env var the gateway reads).
49
+ const DEFAULT_DRAFT_THROTTLE_MS = 300
50
+ const DEFAULT_MESSAGE_THROTTLE_MS = 1000
39
51
  const MIN_THROTTLE_MS = 250
40
52
 
53
+ // PR C — sendMessageDraft 30-second ephemeral persist-chain.
54
+ //
55
+ // Telegram's sendMessageDraft preview expires after 30 seconds. Long
56
+ // LLM turns blow past that, leaving the user staring at a stale draft.
57
+ // To stay live for arbitrary-length turns: at ~25s of accumulated
58
+ // draft streaming (or when the unpersisted chunk approaches 4000 chars
59
+ // — the per-message length cap with safety margin), fire a real
60
+ // sendMessage with the current chunk. This persists what the user has
61
+ // seen so far as a real message (with push notification). Then we
62
+ // allocate a fresh draft_id and continue streaming the next chunk
63
+ // into a new ephemeral preview. The model still sees a single
64
+ // continuous turn; the user sees a CHAIN of persisted messages, each
65
+ // up to ~25s / ~4000 chars, separated by live previews.
66
+ //
67
+ // At done=true / finalize(), the LAST unpersisted chunk is fired via
68
+ // sendMessage so the final state of the response is durable.
69
+ //
70
+ // These triggers fire on top of the normal throttle loop — i.e., the
71
+ // persist boundary is checked just before each draft fire, not on a
72
+ // separate timer. This keeps the loop simple and avoids fighting with
73
+ // the in-flight promise.
74
+ const PERSIST_INTERVAL_MS = 25_000
75
+ const PERSIST_SAFETY_CHAR_LIMIT = 4000
76
+
41
77
  /**
42
78
  * Send the first message in a stream. Receives the rendered text plus a
43
79
  * thread_id (forum topic) and returns the new Telegram message_id.
@@ -111,6 +147,17 @@ export interface DraftStreamConfig {
111
147
  * so the draft can be cleared on finalize.
112
148
  */
113
149
  chatId?: string
150
+ /**
151
+ * PR C — persist-chain interval override. Default 25_000 ms. Lower
152
+ * for tests; production should leave default.
153
+ */
154
+ persistIntervalMs?: number
155
+ /**
156
+ * PR C — persist-chain size threshold override (chars). Default 4000.
157
+ * Lower for tests so the size-trigger can fire on small text without
158
+ * colliding with the 4096-char maxChars hard-stop.
159
+ */
160
+ persistSizeLimit?: number
114
161
  /** Optional logger for debugging. Receives one string per event. */
115
162
  log?: (msg: string) => void
116
163
  /** Optional warning logger. Used for transport fallback notices. */
@@ -169,7 +216,21 @@ export function createDraftStream(
169
216
  edit: StreamEditFn,
170
217
  config: DraftStreamConfig = {},
171
218
  ): DraftStreamHandle {
172
- const throttleMs = Math.max(MIN_THROTTLE_MS, config.throttleMs ?? DEFAULT_THROTTLE_MS)
219
+ // PR B: transport-aware default the actual transport resolves a few
220
+ // lines below, so we replicate the prefersDraft check here. An
221
+ // explicit `config.throttleMs` (from the operator yaml or the
222
+ // caller) wins.
223
+ const _willPreferDraft =
224
+ (config.previewTransport ?? 'auto') === 'draft' ||
225
+ ((config.previewTransport ?? 'auto') === 'auto' && config.isPrivateChat === true)
226
+ const _defaultForTransport = _willPreferDraft && config.sendMessageDraft != null
227
+ ? DEFAULT_DRAFT_THROTTLE_MS
228
+ : DEFAULT_MESSAGE_THROTTLE_MS
229
+ const throttleMs = Math.max(MIN_THROTTLE_MS, config.throttleMs ?? _defaultForTransport)
230
+ // PR C: persist-chain config overrides (testability — production
231
+ // leaves defaults at 25 s / 4000 chars).
232
+ const persistIntervalMs = config.persistIntervalMs ?? PERSIST_INTERVAL_MS
233
+ const persistSizeLimit = config.persistSizeLimit ?? PERSIST_SAFETY_CHAR_LIMIT
173
234
  const maxChars = config.maxChars ?? TELEGRAM_MAX_CHARS
174
235
  const idleMs = Math.max(0, config.idleMs ?? 0)
175
236
  const log = config.log
@@ -207,11 +268,61 @@ export function createDraftStream(
207
268
  warn?.('draft-stream: sendMessageDraft unavailable; falling back to sendMessage/editMessageText')
208
269
  }
209
270
 
271
+ // Stream-start trace — always-on, structured for grep + aggregation.
272
+ // Resolves WHY the chosen transport landed (req=auto|draft|message;
273
+ // dm=true|false|undef; api=available|absent). Gates the rest of the
274
+ // sendMessageDraft alignment PR sequence: without this we can't tell
275
+ // a draft-routing regression from a config-toggle change.
276
+ // Kill switch: SWITCHROOM_STREAM_TRACES=0.
277
+ if (process.env.SWITCHROOM_STREAM_TRACES !== '0') {
278
+ const reason = usesDraftTransport
279
+ ? 'draft'
280
+ : requestedTransport === 'message'
281
+ ? 'explicit-message'
282
+ : requestedTransport === 'draft' && draftApi == null
283
+ ? 'draft-requested-but-no-api'
284
+ : !prefersDraft
285
+ ? 'auto-non-dm'
286
+ : 'fallback'
287
+ const draftIdPart = draftId != null ? ` draftId=${draftId}` : ''
288
+ process.stderr.write(
289
+ `gw-trace stream-start transport=${usesDraftTransport ? 'draft' : 'message'} ` +
290
+ `reason=${reason} req=${requestedTransport} ` +
291
+ `dm=${config.isPrivateChat === undefined ? 'undef' : String(config.isPrivateChat)} ` +
292
+ `api=${draftApi != null ? 'available' : 'absent'} ` +
293
+ `throttleMs=${throttleMs}${draftIdPart} ` +
294
+ `chatId=${chatId || '-'}\n`,
295
+ )
296
+ }
297
+
210
298
  let messageId: number | null = config.initialMessageId ?? null
211
299
  let pendingText: string | null = null
212
300
  let lastSentText: string | null = null
213
301
  let lastSentAt = 0
214
302
  let inFlight: Promise<void> | null = null
303
+ // PR A observability — per-stream fire counters for the stream-end
304
+ // trace. draftFires/editFires/sendFires let the aggregator distinguish
305
+ // "stream used 80% draft + 20% edit fallback" vs "all edits, draft
306
+ // never fired". `firstFireAtMs` is the latency from stream-start to
307
+ // first wire send (matches TTFO sub-component for a single stream).
308
+ const streamStartedAt = Date.now()
309
+ let firstFireAtMs: number | null = null
310
+ let draftFires = 0
311
+ let editFires = 0
312
+ let sendFires = 0
313
+ let fallbackFires = 0
314
+ // PR C — persist-chain state. `persistedTextLen` is the offset into
315
+ // the full cumulative model text that has already been committed to
316
+ // a real Telegram message via `sendMessage`. Subsequent draft fires
317
+ // send only the slice from `persistedTextLen` onward (the
318
+ // unpersisted tail). `currentChunkStartedAt` is when the CURRENT
319
+ // chunk (since last persist boundary) started streaming — drives
320
+ // the 25-second persist trigger. `persistChainFires` counts how
321
+ // many chunks have been persisted in this stream (always 0 for
322
+ // message-transport streams, only ticks for draft-transport).
323
+ let persistedTextLen = 0
324
+ let currentChunkStartedAt: number | null = null
325
+ let persistChainFires = 0
215
326
  let scheduledTimer: ReturnType<typeof setTimeout> | null = null
216
327
  let final = false
217
328
  let stopped = false
@@ -230,14 +341,74 @@ export function createDraftStream(
230
341
 
231
342
  async function sendViaDraft(textToSend: string): Promise<boolean> {
232
343
  if (!draftApi || draftId == null) return false
344
+ // PR C: draft sees only the unpersisted tail. If the model produced
345
+ // text BEYOND what's already been committed to a real sendMessage,
346
+ // that tail is what the user sees in the live preview. When the
347
+ // tail is empty (model hasn't added anything new since persist),
348
+ // there's nothing to draft — the draft was cleared at persist time.
349
+ const draftText = textToSend.slice(persistedTextLen)
350
+ if (draftText.length === 0) {
351
+ // Treat as success — no work to do, dedup will skip on next call.
352
+ return true
353
+ }
233
354
  try {
234
- await draftApi(chatId, draftId, textToSend)
235
- log?.(`stream draft (id: ${draftId}, ${textToSend.length} chars)`)
355
+ const result = await draftApi(chatId, draftId, draftText)
356
+ // PR D: sendMessageDraft is documented to return `true` on success.
357
+ // A non-true (or missing) return is a soft failure — Telegram
358
+ // accepted the call but the draft didn't land. Fall back to
359
+ // message transport for the rest of this stream so the user still
360
+ // sees the content. This catches API surface changes + edge cases
361
+ // not covered by `shouldFallbackFromDraftTransport`'s regex.
362
+ if (result !== true && result !== undefined) {
363
+ // Some grammY wrappers strip the bool and return undefined on
364
+ // success; treat ONLY explicitly-falsy returns as failure to
365
+ // avoid false-positive fallback. true / undefined → success.
366
+ if (result === false || result === null) {
367
+ warn?.(
368
+ `draft-stream: sendMessageDraft returned non-true (${JSON.stringify(result)}) — falling back to message transport`,
369
+ )
370
+ fallbackFires++
371
+ usesDraftTransport = false
372
+ draftId = undefined
373
+ return false
374
+ }
375
+ }
376
+ if (firstFireAtMs == null) firstFireAtMs = Date.now() - streamStartedAt
377
+ // Mark the start of THIS chunk's persist window on first fire of
378
+ // each chunk (after the previous persist boundary).
379
+ if (currentChunkStartedAt == null) currentChunkStartedAt = Date.now()
380
+ draftFires++
381
+ log?.(`stream → draft (id: ${draftId}, ${draftText.length} chars tail)`)
236
382
  return true
237
383
  } catch (err) {
384
+ // PR D: dedicated 429 path. Telegram rate-limits sendMessageDraft
385
+ // independently from sendMessage/editMessageText. On 429:
386
+ // - extract `retry_after`
387
+ // - fall back to message transport for the rest of this stream
388
+ // - bump `lastSentAt` so the throttle window absorbs the
389
+ // retry_after delay — prevents the message-transport
390
+ // fallback from immediately firing and getting 429'd too
391
+ // (Telegram's per-chat rate cap is shared across methods).
392
+ const retryAfterSecs = extractDraft429RetryAfterSecs(err)
393
+ if (retryAfterSecs != null && isDraft429(err)) {
394
+ warn?.(
395
+ `draft-stream: sendMessageDraft 429 (retry_after=${retryAfterSecs}s) — falling back to message transport + backoff`,
396
+ )
397
+ fallbackFires++
398
+ usesDraftTransport = false
399
+ draftId = undefined
400
+ // Push lastSentAt forward so the NEXT flush waits at least
401
+ // `retry_after` seconds before the message-transport send.
402
+ // The throttle math at update() / schedule() compares
403
+ // `Date.now() - lastSentAt >= throttleMs`, so by moving
404
+ // lastSentAt forward we delay the next fire.
405
+ lastSentAt = Date.now() + retryAfterSecs * 1000 - throttleMs
406
+ return false
407
+ }
238
408
  if (shouldFallbackFromDraftTransport(err)) {
239
409
  const msg = err instanceof Error ? err.message : String(err)
240
410
  warn?.(`draft-stream: sendMessageDraft rejected — falling back to sendMessage/editMessageText (${msg})`)
411
+ fallbackFires++
241
412
  usesDraftTransport = false
242
413
  draftId = undefined
243
414
  return false
@@ -264,8 +435,77 @@ export function createDraftStream(
264
435
  return
265
436
  }
266
437
 
267
- if (textToSend.length > maxChars) {
268
- log?.(`stream stopped: text exceeds ${maxChars} chars`)
438
+ // PR C — persist-chain trigger check. Runs BEFORE the maxChars
439
+ // hard-stop so we can chunk large outputs across multiple
440
+ // sendMessage calls instead of dropping them. Only the draft
441
+ // path needs this; message transport edits the same id forever
442
+ // and the 4096-char cap is a real terminal stop there.
443
+ //
444
+ // The trigger fires when EITHER the current chunk has been
445
+ // streaming for ≥25s OR the unpersisted tail is approaching the
446
+ // 4000-char message length cap. On fire: send the chunk via
447
+ // real sendMessage, bump persistedTextLen, allocate a fresh
448
+ // draftId, reset the chunk window. The subsequent normal-flow
449
+ // draft fire below sends only the (now-empty or post-persist) tail.
450
+ if (usesDraftTransport && currentChunkStartedAt != null) {
451
+ const elapsed = Date.now() - currentChunkStartedAt
452
+ const tailLen = textToSend.length - persistedTextLen
453
+ const sizeApproaching = tailLen >= persistSizeLimit
454
+ const timeElapsed = elapsed >= persistIntervalMs
455
+ if ((timeElapsed || sizeApproaching) && tailLen > 0) {
456
+ const chunk = textToSend.slice(persistedTextLen)
457
+ try {
458
+ const newMsgId = await send(chunk)
459
+ messageId = newMsgId
460
+ persistedTextLen = textToSend.length
461
+ draftId = allocateDraftId()
462
+ currentChunkStartedAt = null
463
+ persistChainFires++
464
+ if (process.env.SWITCHROOM_STREAM_TRACES !== '0') {
465
+ process.stderr.write(
466
+ `gw-trace stream-persist chunk_chars=${chunk.length} ` +
467
+ `elapsed=${elapsed} reason=${timeElapsed ? 'time' : 'size'} ` +
468
+ `newMsgId=${newMsgId} newDraftId=${draftId} ` +
469
+ `chatId=${chatId || '-'}\n`,
470
+ )
471
+ }
472
+ log?.(`stream → persisted chunk (id: ${newMsgId}, ${chunk.length} chars, reason=${timeElapsed ? 'time' : 'size'})`)
473
+ } catch (err) {
474
+ // Persist failed — log and continue. The next flush re-
475
+ // evaluates the trigger and re-fires.
476
+ //
477
+ // Edge case (accepted as v1 ceiling): if `send(chunk)`
478
+ // actually LANDED on Telegram but the response/ack was lost
479
+ // (network blip), the retry will double-persist — the user
480
+ // sees the same chunk twice as two separate sendMessages.
481
+ // Telegram doesn't expose a sendMessage idempotency key. The
482
+ // user-visible artifact is "duplicate chunk", not data loss,
483
+ // and observed rate of lost-ACK is rare. PR D follow-up
484
+ // could add a per-chunk hash dedup on retry.
485
+ warn?.(
486
+ `draft-stream: persist sendMessage failed — chunk stays in draft (${err instanceof Error ? err.message : String(err)})`,
487
+ )
488
+ }
489
+ }
490
+ }
491
+
492
+ // Edge case: if the model RETRACTS cumulative text (rare — most
493
+ // LLM streams are strict-extension), `textToSend.length` may be
494
+ // less than `persistedTextLen`. `slice(persistedTextLen)` returns
495
+ // "" and the persist trigger's `tailLen > 0` guard short-circuits,
496
+ // so we silently skip. The live preview goes stale until the model
497
+ // re-extends past `persistedTextLen`. No crash, no double-send.
498
+ // Tolerated as the failure mode is benign and the cause is upstream.
499
+
500
+ // Hard-stop check — applies to the sendable size (full text for
501
+ // message transport, post-persist tail for draft transport). After
502
+ // a successful persist, the tail resets so this won't fire even
503
+ // for huge cumulative texts in the draft path.
504
+ const sendableLen = usesDraftTransport
505
+ ? textToSend.length - persistedTextLen
506
+ : textToSend.length
507
+ if (sendableLen > maxChars) {
508
+ log?.(`stream stopped: ${usesDraftTransport ? 'tail' : 'text'} exceeds ${maxChars} chars`)
269
509
  stopped = true
270
510
  notifyWaiters()
271
511
  return
@@ -309,9 +549,13 @@ export function createDraftStream(
309
549
  async function sendViaMessage(textToSend: string): Promise<void> {
310
550
  if (messageId == null) {
311
551
  messageId = await send(textToSend)
552
+ if (firstFireAtMs == null) firstFireAtMs = Date.now() - streamStartedAt
553
+ sendFires++
312
554
  log?.(`stream → sent (id: ${messageId}, ${textToSend.length} chars)`)
313
555
  } else {
314
556
  await edit(messageId, textToSend)
557
+ if (firstFireAtMs == null) firstFireAtMs = Date.now() - streamStartedAt
558
+ editFires++
315
559
  log?.(`stream → edited (id: ${messageId}, ${textToSend.length} chars)`)
316
560
  }
317
561
  }
@@ -406,14 +650,21 @@ export function createDraftStream(
406
650
  await flush()
407
651
  }
408
652
 
409
- // Draft transport: materialize as a real sendMessage for push notification,
410
- // then clear the draft best-effort.
653
+ // Draft transport: materialize as a real sendMessage for push
654
+ // notification, then clear the draft best-effort.
655
+ //
656
+ // PR C: with the persist-chain in play, earlier chunks may
657
+ // already be persisted as their own sendMessages. We materialize
658
+ // ONLY the unpersisted tail here — otherwise the user gets a
659
+ // duplicate of the prior chunks at turn end.
411
660
  if (usesDraftTransport && draftApi != null) {
412
- const textToMaterialize = lastSentText
413
- if (textToMaterialize) {
661
+ const fullText = lastSentText ?? ''
662
+ const textToMaterialize = fullText.slice(persistedTextLen)
663
+ if (textToMaterialize.length > 0) {
414
664
  try {
415
665
  messageId = await send(textToMaterialize)
416
- log?.(`stream materialized (id: ${messageId}, ${textToMaterialize.length} chars)`)
666
+ persistedTextLen = fullText.length
667
+ log?.(`stream → materialized tail (id: ${messageId}, ${textToMaterialize.length} chars)`)
417
668
  } catch (err) {
418
669
  warn?.(`draft-stream: materialize sendMessage failed: ${err instanceof Error ? err.message : String(err)}`)
419
670
  }
@@ -425,10 +676,35 @@ export function createDraftStream(
425
676
  // Best-effort — ignore failures
426
677
  }
427
678
  }
679
+ } else if (draftId != null) {
680
+ // Whole text already persisted via the chain — just clear the
681
+ // current draft so the input area isn't left with stale
682
+ // preview content.
683
+ try {
684
+ await draftApi(chatId, draftId, '')
685
+ } catch {
686
+ // Best-effort — ignore
687
+ }
428
688
  }
429
689
  }
430
690
 
431
691
  log?.(`stream finalized (id: ${messageId})`)
692
+
693
+ // Stream-end trace — pairs with stream-start. `drafts`/`edits`/
694
+ // `sends` lets the aggregator see the transport ratio per stream;
695
+ // `firstFireMs` is the per-stream send latency component of TTFO;
696
+ // `chars` is the final committed text length.
697
+ if (process.env.SWITCHROOM_STREAM_TRACES !== '0') {
698
+ const durationMs = Date.now() - streamStartedAt
699
+ process.stderr.write(
700
+ `gw-trace stream-end transport=${usesDraftTransport ? 'draft' : 'message'} ` +
701
+ `drafts=${draftFires} sends=${sendFires} edits=${editFires} ` +
702
+ `fallbacks=${fallbackFires} persists=${persistChainFires} ` +
703
+ `firstFireMs=${firstFireAtMs ?? -1} durationMs=${durationMs} ` +
704
+ `chars=${(lastSentText ?? '').length} ` +
705
+ `chatId=${chatId || '-'}\n`,
706
+ )
707
+ }
432
708
  },
433
709
 
434
710
  getMessageId(): number | null {
@@ -34,6 +34,56 @@ export function shouldFallbackFromDraftTransport(err: unknown): boolean {
34
34
  return DRAFT_METHOD_UNAVAILABLE_RE.test(text) || DRAFT_CHAT_UNSUPPORTED_RE.test(text)
35
35
  }
36
36
 
37
+ /**
38
+ * PR D — extract the `retry_after` seconds from a grammY 429 error.
39
+ * Returns null when the error isn't a 429 (or has no retry_after).
40
+ *
41
+ * Shared with `issues-card.ts:extractRetryAfterSecs`. Duck-typed on the
42
+ * documented grammY `GrammyError` shape to keep this module
43
+ * test-friendly without importing `GrammyError` directly.
44
+ */
45
+ export function extractDraft429RetryAfterSecs(err: unknown): number | null {
46
+ if (err == null || typeof err !== 'object') return null
47
+ const e = err as { error_code?: unknown; parameters?: { retry_after?: unknown } }
48
+ if (e.error_code !== 429) return null
49
+ const ra = e.parameters?.retry_after
50
+ if (typeof ra === 'number' && Number.isFinite(ra) && ra > 0) return ra
51
+ return null
52
+ }
53
+
54
+ /**
55
+ * PR D — was this a 429 from `sendMessageDraft` specifically? Used by
56
+ * draft-stream to differentiate "draft is rate-limited" (transient,
57
+ * just back off this stream) from a non-429 send error (handled
58
+ * separately by `shouldFallbackFromDraftTransport`).
59
+ *
60
+ * Both cases trigger fallback to message transport for the rest of
61
+ * the stream, but the 429 case ALSO bumps the throttle window to
62
+ * honor Telegram's `retry_after` — so the message-transport fallback
63
+ * doesn't immediately fire a fresh send before Telegram's cooldown
64
+ * elapses and re-429s.
65
+ */
66
+ export function isDraft429(err: unknown): boolean {
67
+ if (extractDraft429RetryAfterSecs(err) == null) return false
68
+ // grammY GrammyError carries the method name in its `method` field.
69
+ // Best-effort: match either the structured method or the error text.
70
+ if (typeof err === 'object' && err != null && 'method' in err) {
71
+ const m = (err as { method?: unknown }).method
72
+ if (typeof m === 'string' && /sendMessageDraft/i.test(m)) return true
73
+ }
74
+ const text =
75
+ typeof err === 'string'
76
+ ? err
77
+ : err instanceof Error
78
+ ? err.message
79
+ : typeof err === 'object' && err != null && 'description' in err
80
+ ? typeof (err as { description: unknown }).description === 'string'
81
+ ? (err as { description: string }).description
82
+ : ''
83
+ : ''
84
+ return /sendMessageDraft/i.test(text)
85
+ }
86
+
37
87
  /**
38
88
  * Symbol-keyed shared counter for draft-id allocation across concurrent
39
89
  * streams (mirrors openclaw's getDraftStreamState). Using Symbol.for ensures
@@ -2786,6 +2786,17 @@ function postLegacyBanner(
2786
2786
  // short-circuit to no-ops at runtime. `progressDriver` is typed `any`
2787
2787
  // so TS doesn't resolve `progressDriver?.X` to `never`.
2788
2788
  const streamMode = process.env.SWITCHROOM_TG_STREAM_MODE ?? 'checklist'
2789
+ // PR B: per-agent stream throttle override via channels.telegram.stream_throttle_ms.
2790
+ // When unset, draft-stream.ts applies transport-aware defaults (300 ms draft,
2791
+ // 1000 ms message). Parsed once at boot; sub-zero / NaN values fall back to
2792
+ // undefined so the per-transport default wins. See `src/agents/scaffold.ts`
2793
+ // `channelsToEnv()` for the yaml → env wiring.
2794
+ const STREAM_THROTTLE_MS_OVERRIDE: number | undefined = (() => {
2795
+ const raw = process.env.SWITCHROOM_TG_STREAM_THROTTLE_MS
2796
+ if (raw == null || raw === '') return undefined
2797
+ const n = Number.parseInt(raw, 10)
2798
+ return Number.isFinite(n) && n >= 0 ? n : undefined
2799
+ })()
2789
2800
  const TURN_FLUSH_SAFETY_ENABLED = isTurnFlushSafetyEnabled()
2790
2801
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
2791
2802
  const progressDriver: any = null
@@ -4471,7 +4482,13 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
4471
4482
  recordOutbound,
4472
4483
  ...(HISTORY_ENABLED ? { getLatestInboundMessageId } : {}),
4473
4484
  writeError: (line) => process.stderr.write(line),
4474
- throttleMs: 600,
4485
+ // PR B: drop the legacy 600 ms compromise. When the operator sets
4486
+ // `channels.telegram.stream_throttle_ms` in yaml, the env override
4487
+ // wins; otherwise draft-stream's transport-aware default fires
4488
+ // (300 ms draft / 1000 ms message). `throttleMs: undefined` is a
4489
+ // signal — handlers downgrade to `?? undefined`, which then
4490
+ // passes through to draft-stream where the default applies.
4491
+ ...(STREAM_THROTTLE_MS_OVERRIDE != null ? { throttleMs: STREAM_THROTTLE_MS_OVERRIDE } : {}),
4475
4492
  progressCardActive: streamMode === 'checklist',
4476
4493
  },
4477
4494
  )
@@ -6418,6 +6435,14 @@ function handlePtyActivity(text: string): void {
6418
6435
  historyEnabled: false,
6419
6436
  recordOutbound,
6420
6437
  writeError: (line) => process.stderr.write(line),
6438
+ // PR B note: this is the PTY-activity stream, NOT the LLM
6439
+ // stream_reply path. PTY drives many tiny partials as a TUI
6440
+ // re-renders; 600 ms is a deliberate compromise tuned for the
6441
+ // PTY flicker characteristics, not LLM token cadence. The
6442
+ // transport-aware defaults (300/1000) deliberately do NOT
6443
+ // apply here. If you change this, also check
6444
+ // telegram-plugin/pty-partial-handler.ts:159 which has the
6445
+ // same value for the same reason.
6421
6446
  throttleMs: 600,
6422
6447
  },
6423
6448
  ).catch((err) => {
@@ -514,7 +514,9 @@ export async function handleStreamReply(
514
514
  threadId,
515
515
  parseMode,
516
516
  disableLinkPreview: deps.disableLinkPreview,
517
- throttleMs: deps.throttleMs ?? 600,
517
+ // PR B: pass undefined when caller didn't override, so draft-stream's
518
+ // transport-aware default (300 ms draft / 1000 ms message) wins.
519
+ ...(deps.throttleMs != null ? { throttleMs: deps.throttleMs } : {}),
518
520
  retry: deps.retry,
519
521
  ...(replyToMessageId != null ? { replyToMessageId } : {}),
520
522
  ...(args.quote_text != null && replyToMessageId != null ? { quoteText: args.quote_text } : {}),