switchroom 0.12.29 → 0.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,12 +32,48 @@
32
32
  import {
33
33
  shouldFallbackFromDraftTransport,
34
34
  allocateDraftId,
35
+ isDraft429,
36
+ extractDraft429RetryAfterSecs,
35
37
  } from './draft-transport.js'
36
38
 
37
39
  const TELEGRAM_MAX_CHARS = 4096
38
- const DEFAULT_THROTTLE_MS = 1000
40
+ // PR B: transport-aware defaults.
41
+ // Draft transport (DMs): 300 ms — drafts are ephemeral and don't share
42
+ // editMessageText's per-message rate cap, so we can refresh much faster.
43
+ // 300 ms feels live without burning bandwidth.
44
+ // Message transport (groups / forums / draft API absent): 1000 ms — must
45
+ // respect Telegram's "1 edit/sec/message" practical ceiling.
46
+ // Both defaults can be overridden per-stream via `config.throttleMs` (which
47
+ // is itself wired from `channels.telegram.stream_throttle_ms` in the agent
48
+ // yaml, via the SWITCHROOM_TG_STREAM_THROTTLE_MS env var the gateway reads).
49
+ const DEFAULT_DRAFT_THROTTLE_MS = 300
50
+ const DEFAULT_MESSAGE_THROTTLE_MS = 1000
39
51
  const MIN_THROTTLE_MS = 250
40
52
 
53
+ // PR C — sendMessageDraft 30-second ephemeral persist-chain.
54
+ //
55
+ // Telegram's sendMessageDraft preview expires after 30 seconds. Long
56
+ // LLM turns blow past that, leaving the user staring at a stale draft.
57
+ // To stay live for arbitrary-length turns: at ~25s of accumulated
58
+ // draft streaming (or when the unpersisted chunk approaches 4000 chars
59
+ // — the per-message length cap with safety margin), fire a real
60
+ // sendMessage with the current chunk. This persists what the user has
61
+ // seen so far as a real message (with push notification). Then we
62
+ // allocate a fresh draft_id and continue streaming the next chunk
63
+ // into a new ephemeral preview. The model still sees a single
64
+ // continuous turn; the user sees a CHAIN of persisted messages, each
65
+ // up to ~25s / ~4000 chars, separated by live previews.
66
+ //
67
+ // At done=true / finalize(), the LAST unpersisted chunk is fired via
68
+ // sendMessage so the final state of the response is durable.
69
+ //
70
+ // These triggers fire on top of the normal throttle loop — i.e., the
71
+ // persist boundary is checked just before each draft fire, not on a
72
+ // separate timer. This keeps the loop simple and avoids fighting with
73
+ // the in-flight promise.
74
+ const PERSIST_INTERVAL_MS = 25_000
75
+ const PERSIST_SAFETY_CHAR_LIMIT = 4000
76
+
41
77
  /**
42
78
  * Send the first message in a stream. Receives the rendered text plus a
43
79
  * thread_id (forum topic) and returns the new Telegram message_id.
@@ -111,6 +147,17 @@ export interface DraftStreamConfig {
111
147
  * so the draft can be cleared on finalize.
112
148
  */
113
149
  chatId?: string
150
+ /**
151
+ * PR C — persist-chain interval override. Default 25_000 ms. Lower
152
+ * for tests; production should leave default.
153
+ */
154
+ persistIntervalMs?: number
155
+ /**
156
+ * PR C — persist-chain size threshold override (chars). Default 4000.
157
+ * Lower for tests so the size-trigger can fire on small text without
158
+ * colliding with the 4096-char maxChars hard-stop.
159
+ */
160
+ persistSizeLimit?: number
114
161
  /** Optional logger for debugging. Receives one string per event. */
115
162
  log?: (msg: string) => void
116
163
  /** Optional warning logger. Used for transport fallback notices. */
@@ -169,7 +216,21 @@ export function createDraftStream(
169
216
  edit: StreamEditFn,
170
217
  config: DraftStreamConfig = {},
171
218
  ): DraftStreamHandle {
172
- const throttleMs = Math.max(MIN_THROTTLE_MS, config.throttleMs ?? DEFAULT_THROTTLE_MS)
219
+ // PR B: transport-aware default the actual transport resolves a few
220
+ // lines below, so we replicate the prefersDraft check here. An
221
+ // explicit `config.throttleMs` (from the operator yaml or the
222
+ // caller) wins.
223
+ const _willPreferDraft =
224
+ (config.previewTransport ?? 'auto') === 'draft' ||
225
+ ((config.previewTransport ?? 'auto') === 'auto' && config.isPrivateChat === true)
226
+ const _defaultForTransport = _willPreferDraft && config.sendMessageDraft != null
227
+ ? DEFAULT_DRAFT_THROTTLE_MS
228
+ : DEFAULT_MESSAGE_THROTTLE_MS
229
+ const throttleMs = Math.max(MIN_THROTTLE_MS, config.throttleMs ?? _defaultForTransport)
230
+ // PR C: persist-chain config overrides (testability — production
231
+ // leaves defaults at 25 s / 4000 chars).
232
+ const persistIntervalMs = config.persistIntervalMs ?? PERSIST_INTERVAL_MS
233
+ const persistSizeLimit = config.persistSizeLimit ?? PERSIST_SAFETY_CHAR_LIMIT
173
234
  const maxChars = config.maxChars ?? TELEGRAM_MAX_CHARS
174
235
  const idleMs = Math.max(0, config.idleMs ?? 0)
175
236
  const log = config.log
@@ -207,11 +268,61 @@ export function createDraftStream(
207
268
  warn?.('draft-stream: sendMessageDraft unavailable; falling back to sendMessage/editMessageText')
208
269
  }
209
270
 
271
+ // Stream-start trace — always-on, structured for grep + aggregation.
272
+ // Resolves WHY the chosen transport landed (req=auto|draft|message;
273
+ // dm=true|false|undef; api=available|absent). Gates the rest of the
274
+ // sendMessageDraft alignment PR sequence: without this we can't tell
275
+ // a draft-routing regression from a config-toggle change.
276
+ // Kill switch: SWITCHROOM_STREAM_TRACES=0.
277
+ if (process.env.SWITCHROOM_STREAM_TRACES !== '0') {
278
+ const reason = usesDraftTransport
279
+ ? 'draft'
280
+ : requestedTransport === 'message'
281
+ ? 'explicit-message'
282
+ : requestedTransport === 'draft' && draftApi == null
283
+ ? 'draft-requested-but-no-api'
284
+ : !prefersDraft
285
+ ? 'auto-non-dm'
286
+ : 'fallback'
287
+ const draftIdPart = draftId != null ? ` draftId=${draftId}` : ''
288
+ process.stderr.write(
289
+ `gw-trace stream-start transport=${usesDraftTransport ? 'draft' : 'message'} ` +
290
+ `reason=${reason} req=${requestedTransport} ` +
291
+ `dm=${config.isPrivateChat === undefined ? 'undef' : String(config.isPrivateChat)} ` +
292
+ `api=${draftApi != null ? 'available' : 'absent'} ` +
293
+ `throttleMs=${throttleMs}${draftIdPart} ` +
294
+ `chatId=${chatId || '-'}\n`,
295
+ )
296
+ }
297
+
210
298
  let messageId: number | null = config.initialMessageId ?? null
211
299
  let pendingText: string | null = null
212
300
  let lastSentText: string | null = null
213
301
  let lastSentAt = 0
214
302
  let inFlight: Promise<void> | null = null
303
+ // PR A observability — per-stream fire counters for the stream-end
304
+ // trace. draftFires/editFires/sendFires let the aggregator distinguish
305
+ // "stream used 80% draft + 20% edit fallback" vs "all edits, draft
306
+ // never fired". `firstFireAtMs` is the latency from stream-start to
307
+ // first wire send (matches TTFO sub-component for a single stream).
308
+ const streamStartedAt = Date.now()
309
+ let firstFireAtMs: number | null = null
310
+ let draftFires = 0
311
+ let editFires = 0
312
+ let sendFires = 0
313
+ let fallbackFires = 0
314
+ // PR C — persist-chain state. `persistedTextLen` is the offset into
315
+ // the full cumulative model text that has already been committed to
316
+ // a real Telegram message via `sendMessage`. Subsequent draft fires
317
+ // send only the slice from `persistedTextLen` onward (the
318
+ // unpersisted tail). `currentChunkStartedAt` is when the CURRENT
319
+ // chunk (since last persist boundary) started streaming — drives
320
+ // the 25-second persist trigger. `persistChainFires` counts how
321
+ // many chunks have been persisted in this stream (always 0 for
322
+ // message-transport streams, only ticks for draft-transport).
323
+ let persistedTextLen = 0
324
+ let currentChunkStartedAt: number | null = null
325
+ let persistChainFires = 0
215
326
  let scheduledTimer: ReturnType<typeof setTimeout> | null = null
216
327
  let final = false
217
328
  let stopped = false
@@ -230,14 +341,74 @@ export function createDraftStream(
230
341
 
231
342
  async function sendViaDraft(textToSend: string): Promise<boolean> {
232
343
  if (!draftApi || draftId == null) return false
344
+ // PR C: draft sees only the unpersisted tail. If the model produced
345
+ // text BEYOND what's already been committed to a real sendMessage,
346
+ // that tail is what the user sees in the live preview. When the
347
+ // tail is empty (model hasn't added anything new since persist),
348
+ // there's nothing to draft — the draft was cleared at persist time.
349
+ const draftText = textToSend.slice(persistedTextLen)
350
+ if (draftText.length === 0) {
351
+ // Treat as success — no work to do, dedup will skip on next call.
352
+ return true
353
+ }
233
354
  try {
234
- await draftApi(chatId, draftId, textToSend)
235
- log?.(`stream draft (id: ${draftId}, ${textToSend.length} chars)`)
355
+ const result = await draftApi(chatId, draftId, draftText)
356
+ // PR D: sendMessageDraft is documented to return `true` on success.
357
+ // A non-true (or missing) return is a soft failure — Telegram
358
+ // accepted the call but the draft didn't land. Fall back to
359
+ // message transport for the rest of this stream so the user still
360
+ // sees the content. This catches API surface changes + edge cases
361
+ // not covered by `shouldFallbackFromDraftTransport`'s regex.
362
+ if (result !== true && result !== undefined) {
363
+ // Some grammY wrappers strip the bool and return undefined on
364
+ // success; treat ONLY explicitly-falsy returns as failure to
365
+ // avoid false-positive fallback. true / undefined → success.
366
+ if (result === false || result === null) {
367
+ warn?.(
368
+ `draft-stream: sendMessageDraft returned non-true (${JSON.stringify(result)}) — falling back to message transport`,
369
+ )
370
+ fallbackFires++
371
+ usesDraftTransport = false
372
+ draftId = undefined
373
+ return false
374
+ }
375
+ }
376
+ if (firstFireAtMs == null) firstFireAtMs = Date.now() - streamStartedAt
377
+ // Mark the start of THIS chunk's persist window on first fire of
378
+ // each chunk (after the previous persist boundary).
379
+ if (currentChunkStartedAt == null) currentChunkStartedAt = Date.now()
380
+ draftFires++
381
+ log?.(`stream → draft (id: ${draftId}, ${draftText.length} chars tail)`)
236
382
  return true
237
383
  } catch (err) {
384
+ // PR D: dedicated 429 path. Telegram rate-limits sendMessageDraft
385
+ // independently from sendMessage/editMessageText. On 429:
386
+ // - extract `retry_after`
387
+ // - fall back to message transport for the rest of this stream
388
+ // - bump `lastSentAt` so the throttle window absorbs the
389
+ // retry_after delay — prevents the message-transport
390
+ // fallback from immediately firing and getting 429'd too
391
+ // (Telegram's per-chat rate cap is shared across methods).
392
+ const retryAfterSecs = extractDraft429RetryAfterSecs(err)
393
+ if (retryAfterSecs != null && isDraft429(err)) {
394
+ warn?.(
395
+ `draft-stream: sendMessageDraft 429 (retry_after=${retryAfterSecs}s) — falling back to message transport + backoff`,
396
+ )
397
+ fallbackFires++
398
+ usesDraftTransport = false
399
+ draftId = undefined
400
+ // Push lastSentAt forward so the NEXT flush waits at least
401
+ // `retry_after` seconds before the message-transport send.
402
+ // The throttle math at update() / schedule() compares
403
+ // `Date.now() - lastSentAt >= throttleMs`, so by moving
404
+ // lastSentAt forward we delay the next fire.
405
+ lastSentAt = Date.now() + retryAfterSecs * 1000 - throttleMs
406
+ return false
407
+ }
238
408
  if (shouldFallbackFromDraftTransport(err)) {
239
409
  const msg = err instanceof Error ? err.message : String(err)
240
410
  warn?.(`draft-stream: sendMessageDraft rejected — falling back to sendMessage/editMessageText (${msg})`)
411
+ fallbackFires++
241
412
  usesDraftTransport = false
242
413
  draftId = undefined
243
414
  return false
@@ -264,8 +435,83 @@ export function createDraftStream(
264
435
  return
265
436
  }
266
437
 
267
- if (textToSend.length > maxChars) {
268
- log?.(`stream stopped: text exceeds ${maxChars} chars`)
438
+ // PR C — persist-chain trigger check. Runs BEFORE the maxChars
439
+ // hard-stop so we can chunk large outputs across multiple
440
+ // sendMessage calls instead of dropping them. Only the draft
441
+ // path needs this; message transport edits the same id forever
442
+ // and the 4096-char cap is a real terminal stop there.
443
+ //
444
+ // The trigger fires when EITHER the current chunk has been
445
+ // streaming for ≥25s OR the unpersisted tail is approaching the
446
+ // 4000-char message length cap. On fire: send the chunk via
447
+ // real sendMessage, bump persistedTextLen, allocate a fresh
448
+ // draftId, reset the chunk window. The subsequent normal-flow
449
+ // draft fire below sends only the (now-empty or post-persist) tail.
450
+ if (usesDraftTransport && currentChunkStartedAt != null) {
451
+ const elapsed = Date.now() - currentChunkStartedAt
452
+ const tailLen = textToSend.length - persistedTextLen
453
+ const sizeApproaching = tailLen >= persistSizeLimit
454
+ const timeElapsed = elapsed >= persistIntervalMs
455
+ if ((timeElapsed || sizeApproaching) && tailLen > 0) {
456
+ const chunk = textToSend.slice(persistedTextLen)
457
+ try {
458
+ const newMsgId = await send(chunk)
459
+ messageId = newMsgId
460
+ persistedTextLen = textToSend.length
461
+ draftId = allocateDraftId()
462
+ currentChunkStartedAt = null
463
+ persistChainFires++
464
+ // PR follow-up: persist-chain's bare send() bypasses
465
+ // sendViaMessage's increment, same shape as the finalize-
466
+ // materialize bug. Without this, streams that cross the
467
+ // 25s / 4000-char boundary would under-report `sends` by
468
+ // the chain count in stream-end.
469
+ sendFires++
470
+ if (process.env.SWITCHROOM_STREAM_TRACES !== '0') {
471
+ process.stderr.write(
472
+ `gw-trace stream-persist chunk_chars=${chunk.length} ` +
473
+ `elapsed=${elapsed} reason=${timeElapsed ? 'time' : 'size'} ` +
474
+ `newMsgId=${newMsgId} newDraftId=${draftId} ` +
475
+ `chatId=${chatId || '-'}\n`,
476
+ )
477
+ }
478
+ log?.(`stream → persisted chunk (id: ${newMsgId}, ${chunk.length} chars, reason=${timeElapsed ? 'time' : 'size'})`)
479
+ } catch (err) {
480
+ // Persist failed — log and continue. The next flush re-
481
+ // evaluates the trigger and re-fires.
482
+ //
483
+ // Edge case (accepted as v1 ceiling): if `send(chunk)`
484
+ // actually LANDED on Telegram but the response/ack was lost
485
+ // (network blip), the retry will double-persist — the user
486
+ // sees the same chunk twice as two separate sendMessages.
487
+ // Telegram doesn't expose a sendMessage idempotency key. The
488
+ // user-visible artifact is "duplicate chunk", not data loss,
489
+ // and observed rate of lost-ACK is rare. PR D follow-up
490
+ // could add a per-chunk hash dedup on retry.
491
+ warn?.(
492
+ `draft-stream: persist sendMessage failed — chunk stays in draft (${err instanceof Error ? err.message : String(err)})`,
493
+ )
494
+ }
495
+ }
496
+ }
497
+
498
+ // Edge case: if the model RETRACTS cumulative text (rare — most
499
+ // LLM streams are strict-extension), `textToSend.length` may be
500
+ // less than `persistedTextLen`. `slice(persistedTextLen)` returns
501
+ // "" and the persist trigger's `tailLen > 0` guard short-circuits,
502
+ // so we silently skip. The live preview goes stale until the model
503
+ // re-extends past `persistedTextLen`. No crash, no double-send.
504
+ // Tolerated as the failure mode is benign and the cause is upstream.
505
+
506
+ // Hard-stop check — applies to the sendable size (full text for
507
+ // message transport, post-persist tail for draft transport). After
508
+ // a successful persist, the tail resets so this won't fire even
509
+ // for huge cumulative texts in the draft path.
510
+ const sendableLen = usesDraftTransport
511
+ ? textToSend.length - persistedTextLen
512
+ : textToSend.length
513
+ if (sendableLen > maxChars) {
514
+ log?.(`stream stopped: ${usesDraftTransport ? 'tail' : 'text'} exceeds ${maxChars} chars`)
269
515
  stopped = true
270
516
  notifyWaiters()
271
517
  return
@@ -309,9 +555,13 @@ export function createDraftStream(
309
555
  async function sendViaMessage(textToSend: string): Promise<void> {
310
556
  if (messageId == null) {
311
557
  messageId = await send(textToSend)
558
+ if (firstFireAtMs == null) firstFireAtMs = Date.now() - streamStartedAt
559
+ sendFires++
312
560
  log?.(`stream → sent (id: ${messageId}, ${textToSend.length} chars)`)
313
561
  } else {
314
562
  await edit(messageId, textToSend)
563
+ if (firstFireAtMs == null) firstFireAtMs = Date.now() - streamStartedAt
564
+ editFires++
315
565
  log?.(`stream → edited (id: ${messageId}, ${textToSend.length} chars)`)
316
566
  }
317
567
  }
@@ -406,14 +656,28 @@ export function createDraftStream(
406
656
  await flush()
407
657
  }
408
658
 
409
- // Draft transport: materialize as a real sendMessage for push notification,
410
- // then clear the draft best-effort.
659
+ // Draft transport: materialize as a real sendMessage for push
660
+ // notification, then clear the draft best-effort.
661
+ //
662
+ // PR C: with the persist-chain in play, earlier chunks may
663
+ // already be persisted as their own sendMessages. We materialize
664
+ // ONLY the unpersisted tail here — otherwise the user gets a
665
+ // duplicate of the prior chunks at turn end.
411
666
  if (usesDraftTransport && draftApi != null) {
412
- const textToMaterialize = lastSentText
413
- if (textToMaterialize) {
667
+ const fullText = lastSentText ?? ''
668
+ const textToMaterialize = fullText.slice(persistedTextLen)
669
+ if (textToMaterialize.length > 0) {
414
670
  try {
415
671
  messageId = await send(textToMaterialize)
416
- log?.(`stream materialized (id: ${messageId}, ${textToMaterialize.length} chars)`)
672
+ persistedTextLen = fullText.length
673
+ // PR follow-up: bump sendFires so the stream-end trace
674
+ // reflects the finalize-materialize sendMessage call. Pre-
675
+ // this fix, the counter under-reported by 1 for every
676
+ // draft-transport stream that produced a non-empty reply:
677
+ // gw-trace stream-end showed `drafts=N sends=0` even
678
+ // though sendMessage HAD fired (visible in tg-post lines).
679
+ sendFires++
680
+ log?.(`stream → materialized tail (id: ${messageId}, ${textToMaterialize.length} chars)`)
417
681
  } catch (err) {
418
682
  warn?.(`draft-stream: materialize sendMessage failed: ${err instanceof Error ? err.message : String(err)}`)
419
683
  }
@@ -425,10 +689,35 @@ export function createDraftStream(
425
689
  // Best-effort — ignore failures
426
690
  }
427
691
  }
692
+ } else if (draftId != null) {
693
+ // Whole text already persisted via the chain — just clear the
694
+ // current draft so the input area isn't left with stale
695
+ // preview content.
696
+ try {
697
+ await draftApi(chatId, draftId, '')
698
+ } catch {
699
+ // Best-effort — ignore
700
+ }
428
701
  }
429
702
  }
430
703
 
431
704
  log?.(`stream finalized (id: ${messageId})`)
705
+
706
+ // Stream-end trace — pairs with stream-start. `drafts`/`edits`/
707
+ // `sends` lets the aggregator see the transport ratio per stream;
708
+ // `firstFireMs` is the per-stream send latency component of TTFO;
709
+ // `chars` is the final committed text length.
710
+ if (process.env.SWITCHROOM_STREAM_TRACES !== '0') {
711
+ const durationMs = Date.now() - streamStartedAt
712
+ process.stderr.write(
713
+ `gw-trace stream-end transport=${usesDraftTransport ? 'draft' : 'message'} ` +
714
+ `drafts=${draftFires} sends=${sendFires} edits=${editFires} ` +
715
+ `fallbacks=${fallbackFires} persists=${persistChainFires} ` +
716
+ `firstFireMs=${firstFireAtMs ?? -1} durationMs=${durationMs} ` +
717
+ `chars=${(lastSentText ?? '').length} ` +
718
+ `chatId=${chatId || '-'}\n`,
719
+ )
720
+ }
432
721
  },
433
722
 
434
723
  getMessageId(): number | null {
@@ -34,6 +34,56 @@ export function shouldFallbackFromDraftTransport(err: unknown): boolean {
34
34
  return DRAFT_METHOD_UNAVAILABLE_RE.test(text) || DRAFT_CHAT_UNSUPPORTED_RE.test(text)
35
35
  }
36
36
 
37
+ /**
38
+ * PR D — extract the `retry_after` seconds from a grammY 429 error.
39
+ * Returns null when the error isn't a 429 (or has no retry_after).
40
+ *
41
+ * Shared with `issues-card.ts:extractRetryAfterSecs`. Duck-typed on the
42
+ * documented grammY `GrammyError` shape to keep this module
43
+ * test-friendly without importing `GrammyError` directly.
44
+ */
45
+ export function extractDraft429RetryAfterSecs(err: unknown): number | null {
46
+ if (err == null || typeof err !== 'object') return null
47
+ const e = err as { error_code?: unknown; parameters?: { retry_after?: unknown } }
48
+ if (e.error_code !== 429) return null
49
+ const ra = e.parameters?.retry_after
50
+ if (typeof ra === 'number' && Number.isFinite(ra) && ra > 0) return ra
51
+ return null
52
+ }
53
+
54
+ /**
55
+ * PR D — was this a 429 from `sendMessageDraft` specifically? Used by
56
+ * draft-stream to differentiate "draft is rate-limited" (transient,
57
+ * just back off this stream) from a non-429 send error (handled
58
+ * separately by `shouldFallbackFromDraftTransport`).
59
+ *
60
+ * Both cases trigger fallback to message transport for the rest of
61
+ * the stream, but the 429 case ALSO bumps the throttle window to
62
+ * honor Telegram's `retry_after` — so the message-transport fallback
63
+ * doesn't immediately fire a fresh send before Telegram's cooldown
64
+ * elapses and re-429s.
65
+ */
66
+ export function isDraft429(err: unknown): boolean {
67
+ if (extractDraft429RetryAfterSecs(err) == null) return false
68
+ // grammY GrammyError carries the method name in its `method` field.
69
+ // Best-effort: match either the structured method or the error text.
70
+ if (typeof err === 'object' && err != null && 'method' in err) {
71
+ const m = (err as { method?: unknown }).method
72
+ if (typeof m === 'string' && /sendMessageDraft/i.test(m)) return true
73
+ }
74
+ const text =
75
+ typeof err === 'string'
76
+ ? err
77
+ : err instanceof Error
78
+ ? err.message
79
+ : typeof err === 'object' && err != null && 'description' in err
80
+ ? typeof (err as { description: unknown }).description === 'string'
81
+ ? (err as { description: string }).description
82
+ : ''
83
+ : ''
84
+ return /sendMessageDraft/i.test(text)
85
+ }
86
+
37
87
  /**
38
88
  * Symbol-keyed shared counter for draft-id allocation across concurrent
39
89
  * streams (mirrors openclaw's getDraftStreamState). Using Symbol.for ensures