switchroom 0.12.29 → 0.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1278,32 +1278,62 @@ function streamKey(chatId: string, threadId?: number | null): string {
1278
1278
  return chatKey(chatId, threadId)
1279
1279
  }
1280
1280
 
1281
- function purgeReactionTracking(key: string, endingTurn?: CurrentTurn): void {
1282
- // Phase 2b: turn end. The key was registered via setTurnStarted when
1283
- // the inbound arrived; purge is the canonical turn-end signal.
1284
- //
1285
- // outboundEmitted: read from the explicit `endingTurn` parameter when
1286
- // provided (canonical path via endCurrentTurnAtomic module-scope
1287
- // currentTurn is already null by the time we get here), falling back
1288
- // to `currentTurn?.replyCalled` for the legacy callsites that haven't
1289
- // been threaded yet (sibling-key purges, restart-init cleanup).
1290
- // Without this explicit-turn handoff the shadow trace would report
1291
- // outboundEmitted=false on every replied turn (the dominant happy
1292
- // path), producing strictly worse data than the blind `true` it
1293
- // replaced. Invariant #5's `lastOutboundAt` correctness depends on
1294
- // this signal being accurate.
1295
- const outboundEmitted = endingTurn != null
1296
- ? endingTurn.replyCalled === true
1297
- : currentTurn?.replyCalled === true
1298
- shadowEmit({ kind: 'turnEnd', key: key as _ChatKey, at: Date.now(), outboundEmitted })
1281
+ /**
1282
+ * Reaction-state cleanup controller + msg-id maps + active-reaction
1283
+ * file removal. PURE reaction-cleanup, no turn-end semantics:
1284
+ * - does NOT emit shadow `turnEnd`
1285
+ * - does NOT clear `activeTurnStartedAt` (turn-active marker)
1286
+ * - does NOT fire the model-idle restart/flush gate
1287
+ *
1288
+ * Called from mid-turn signals like `endStatusReaction` (post-reply-tool,
1289
+ * post-stream-reply-finalize) where the 👍 transition fires but the
1290
+ * turn is still active. Per #1603 audit step 2: the reply tool was
1291
+ * previously calling `purgeReactionTracking` here, which fired premature
1292
+ * shadow `turnEnd` events and cleared `activeTurnStartedAt` mid-turn —
1293
+ * the latter would trigger the model-idle restart probe and
1294
+ * pendingInbound flush as if claude had gone idle.
1295
+ */
1296
+ function clearReactionState(key: string): void {
1299
1297
  const msgInfo = activeReactionMsgIds.get(key)
1300
1298
  activeStatusReactions.delete(key)
1301
1299
  activeReactionMsgIds.delete(key)
1302
- activeTurnStartedAt.delete(key)
1303
1300
  if (msgInfo) {
1304
1301
  const agentDir = resolveAgentDirFromEnv()
1305
1302
  if (agentDir != null) removeActiveReaction(agentDir, msgInfo.chatId, msgInfo.messageId)
1306
1303
  }
1304
+ }
1305
+
1306
+ function purgeReactionTracking(
1307
+ key: string,
1308
+ endingTurn?: CurrentTurn,
1309
+ outboundEmittedOverride?: boolean,
1310
+ ): void {
1311
+ // Phase 2b: turn end. The key was registered via setTurnStarted when
1312
+ // the inbound arrived; purge is the canonical turn-end signal.
1313
+ //
1314
+ // outboundEmitted derivation, in precedence order:
1315
+ // 1. Explicit `outboundEmittedOverride` (e.g. silence-poke
1316
+ // framework fallback FORCES false because the 5-min fallback
1317
+ // firing proves visible delivery never happened — regardless of
1318
+ // whatever `replyCalled` the wedged turn object carries).
1319
+ // 2. `endingTurn.replyCalled` when the canonical caller threads
1320
+ // the authoritative turn (endCurrentTurnAtomic path; module-scope
1321
+ // currentTurn is already null by the time we get here).
1322
+ // 3. `currentTurn?.replyCalled` fallback for the (now-vanishing)
1323
+ // legacy callsites. Without the explicit-turn handoff the shadow
1324
+ // trace would report outboundEmitted=false on every replied
1325
+ // turn (the dominant happy path), producing strictly worse data
1326
+ // than the blind `true` it replaced. Invariant #5's
1327
+ // `lastOutboundAt` correctness depends on this signal being
1328
+ // accurate.
1329
+ const outboundEmitted = outboundEmittedOverride !== undefined
1330
+ ? outboundEmittedOverride
1331
+ : endingTurn != null
1332
+ ? endingTurn.replyCalled === true
1333
+ : currentTurn?.replyCalled === true
1334
+ shadowEmit({ kind: 'turnEnd', key: key as _ChatKey, at: Date.now(), outboundEmitted })
1335
+ clearReactionState(key)
1336
+ activeTurnStartedAt.delete(key)
1307
1337
 
1308
1338
  // If no more active turns and a restart is pending, perform it now.
1309
1339
  //
@@ -1593,12 +1623,24 @@ async function resolveCompactCard(
1593
1623
  }
1594
1624
 
1595
1625
  function endStatusReaction(chatId: string, threadId: number | undefined, outcome: 'done' | 'error'): void {
1626
+ // Mid-turn signal: the reply tool fired, or stream_reply finalized,
1627
+ // and the status-reaction needs to transition to its terminal emoji
1628
+ // (👍 / ⚠️). The turn itself is still active — the canonical turn-end
1629
+ // signal is `endCurrentTurnAtomic(turn)`, which runs later via the
1630
+ // turn_end handler / context-exhaust path / silent-marker path.
1631
+ //
1632
+ // Pre-#1603 audit step 2 (this commit), this called
1633
+ // `purgeReactionTracking(key)` directly, which would fire shadow
1634
+ // `turnEnd` and clear the turn-active marker mid-turn — the latter
1635
+ // triggering the model-idle restart probe + pendingInbound flush as
1636
+ // if claude had gone idle. Use `clearReactionState` to only do the
1637
+ // reaction-cleanup work.
1596
1638
  const key = statusKey(chatId, threadId)
1597
1639
  const ctrl = activeStatusReactions.get(key)
1598
1640
  if (!ctrl) return
1599
1641
  if (outcome === 'done') ctrl.setDone()
1600
1642
  else ctrl.setError()
1601
- purgeReactionTracking(key)
1643
+ clearReactionState(key)
1602
1644
  }
1603
1645
 
1604
1646
  function resolveThreadId(chat_id: string, explicit?: string | number | null): number | undefined {
@@ -2786,6 +2828,17 @@ function postLegacyBanner(
2786
2828
  // short-circuit to no-ops at runtime. `progressDriver` is typed `any`
2787
2829
  // so TS doesn't resolve `progressDriver?.X` to `never`.
2788
2830
  const streamMode = process.env.SWITCHROOM_TG_STREAM_MODE ?? 'checklist'
2831
+ // PR B: per-agent stream throttle override via channels.telegram.stream_throttle_ms.
2832
+ // When unset, draft-stream.ts applies transport-aware defaults (300 ms draft,
2833
+ // 1000 ms message). Parsed once at boot; sub-zero / NaN values fall back to
2834
+ // undefined so the per-transport default wins. See `src/agents/scaffold.ts`
2835
+ // `channelsToEnv()` for the yaml → env wiring.
2836
+ const STREAM_THROTTLE_MS_OVERRIDE: number | undefined = (() => {
2837
+ const raw = process.env.SWITCHROOM_TG_STREAM_THROTTLE_MS
2838
+ if (raw == null || raw === '') return undefined
2839
+ const n = Number.parseInt(raw, 10)
2840
+ return Number.isFinite(n) && n >= 0 ? n : undefined
2841
+ })()
2789
2842
  const TURN_FLUSH_SAFETY_ENABLED = isTurnFlushSafetyEnabled()
2790
2843
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
2791
2844
  const progressDriver: any = null
@@ -3082,7 +3135,15 @@ silencePoke.startTimer({
3082
3135
  // Drop silence-poke state and clear turn-active so the next inbound
3083
3136
  // for this chat starts a fresh turn instead of queueing forever.
3084
3137
  silencePoke.endTurn(fbKey)
3085
- purgeReactionTracking(fbKey)
3138
+ // PR 3b step 5 (#1603 audit): force outboundEmitted=false. The
3139
+ // framework fallback fires precisely because visible delivery
3140
+ // didn't happen in 5 min — `wedgedTurn.replyCalled` may have been
3141
+ // set during the turn (e.g. reply tool invoked but Telegram side
3142
+ // never confirmed delivery), but from the user's perspective no
3143
+ // outbound landed. The state machine's `noteOutbound` effect
3144
+ // must NOT fire for this path. Pass `undefined` for endingTurn
3145
+ // and `false` as the explicit override.
3146
+ purgeReactionTracking(fbKey, undefined, false)
3086
3147
  // Defense-in-depth: the fallback's purgeReactionTracking above
3087
3148
  // clears the canonical statusKey(chatId, threadId) for fbKey
3088
3149
  // only. activeTurnStartedAt can hold sibling entries for the
@@ -3095,10 +3156,14 @@ silencePoke.startTimer({
3095
3156
  // purger. Multi-chat-safe — only touches keys for fbChatId, so
3096
3157
  // #1546's intentional cross-chat safety guard is preserved.
3097
3158
  // See turn-state-purge.ts.
3159
+ //
3160
+ // Same `outboundEmitted=false` rationale as the bare call above —
3161
+ // wrap the purger so every sibling-key purge emits a fallback
3162
+ // shadow turnEnd with the truthful "no visible delivery" signal.
3098
3163
  const fbExtraPurge = purgeStaleTurnsForChat(
3099
3164
  fbChatId,
3100
3165
  activeTurnStartedAt.keys(),
3101
- purgeReactionTracking,
3166
+ (k) => purgeReactionTracking(k, undefined, false),
3102
3167
  )
3103
3168
  // Null `currentTurn` if it's still pointing at the wedged turn —
3104
3169
  // when claude eventually fires a late `turn_end` for this session
@@ -4471,7 +4536,13 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
4471
4536
  recordOutbound,
4472
4537
  ...(HISTORY_ENABLED ? { getLatestInboundMessageId } : {}),
4473
4538
  writeError: (line) => process.stderr.write(line),
4474
- throttleMs: 600,
4539
+ // PR B: drop the legacy 600 ms compromise. When the operator sets
4540
+ // `channels.telegram.stream_throttle_ms` in yaml, the env override
4541
+ // wins; otherwise draft-stream's transport-aware default fires
4542
+ // (300 ms draft / 1000 ms message). `throttleMs: undefined` is a
4543
+ // signal — handlers downgrade to `?? undefined`, which then
4544
+ // passes through to draft-stream where the default applies.
4545
+ ...(STREAM_THROTTLE_MS_OVERRIDE != null ? { throttleMs: STREAM_THROTTLE_MS_OVERRIDE } : {}),
4475
4546
  progressCardActive: streamMode === 'checklist',
4476
4547
  },
4477
4548
  )
@@ -5811,7 +5882,10 @@ function handleSessionEvent(ev: SessionEvent): void {
5811
5882
  const ceKey = statusKey(chatId, threadId)
5812
5883
  const ctrl = activeStatusReactions.get(ceKey)
5813
5884
  if (ctrl) ctrl.setError()
5814
- purgeReactionTracking(ceKey)
5885
+ // Duplicate-emit removed (#1603 audit, step 1): the canonical
5886
+ // endCurrentTurnAtomic(turn) call at line ~5851 below already
5887
+ // invokes purgeReactionTracking on the same ceKey. The bare
5888
+ // call here was firing a second shadow `turnEnd` per traversal.
5815
5889
  // Surfaced during CC-5 investigation (`docs/status-ask-cause-classes.md`):
5816
5890
  // the context-exhaust bail path teardown was missing
5817
5891
  // `silencePoke.endTurn(key)`. Without it, the silence-poke state for
@@ -5969,7 +6043,10 @@ function handleSessionEvent(ev: SessionEvent): void {
5969
6043
  // Fall through to normal state cleanup (ctrl.setDone, purge, etc.)
5970
6044
  // but skip the regular closeProgressLane so we don't re-finalize.
5971
6045
  if (ctrl) ctrl.setDone()
5972
- purgeReactionTracking(statusKey(chatId, threadId))
6046
+ // Duplicate-emit removed (#1603 audit, step 1): endCurrentTurnAtomic(turn)
6047
+ // at line ~6049 below invokes purgeReactionTracking on the same key
6048
+ // (statusKey(chatId, threadId)). The bare call here was firing a
6049
+ // second shadow `turnEnd` per silent-marker traversal.
5973
6050
  // Match the normal turn_end path's telemetry so silent-marker turns
5974
6051
  // still appear in turn-duration graphs.
5975
6052
  {
@@ -6110,7 +6187,15 @@ function handleSessionEvent(ev: SessionEvent): void {
6110
6187
  // mirroring this contract — so reply-only turns transition
6111
6188
  // to terminal 👍 in their own success path rather than
6112
6189
  // relying on this dedup heuristic.
6113
- purgeReactionTracking(statusKey(backstopChatId, backstopThreadId))
6190
+ //
6191
+ // PR 3b step 3 (#1603 audit): thread the captured `turn`
6192
+ // explicitly. `endCurrentTurnAtomic(turn)` ran at line ~6120
6193
+ // before this IIFE started, so `currentTurn === null` by
6194
+ // now — without an explicit endingTurn argument, the shadow
6195
+ // trace would read `outboundEmitted=false` for this dedup
6196
+ // path even though `recentCount > 0` proves the reply tool
6197
+ // did fire (turn.replyCalled === true).
6198
+ purgeReactionTracking(statusKey(backstopChatId, backstopThreadId), turn)
6114
6199
  return
6115
6200
  }
6116
6201
  } catch {}
@@ -6238,14 +6323,35 @@ function handleSessionEvent(ev: SessionEvent): void {
6238
6323
  process.stderr.write(`telegram gateway: turn-flush send failed: ${(err as Error).message}\n`)
6239
6324
  if (backstopCtrl) backstopCtrl.setError()
6240
6325
  } finally {
6241
- purgeReactionTracking(statusKey(backstopChatId, backstopThreadId))
6326
+ // PR 3b step 3 (#1603 audit): thread the captured `turn`
6327
+ // explicitly. The turn-flush backstop runs inside this IIFE
6328
+ // after `endCurrentTurnAtomic(turn)` already nulled
6329
+ // `currentTurn` at line ~6120. Without threading, the shadow
6330
+ // trace would read `outboundEmitted=currentTurn?.replyCalled
6331
+ // === undefined` → false. For the turn-flush path
6332
+ // `turn.replyCalled` is `false` regardless (the model didn't
6333
+ // call the reply tool — the gateway backstop did the work),
6334
+ // so the threaded value matches the existing fallback here.
6335
+ // But pinning the source via the captured turn matches the
6336
+ // canonical pattern and survives any future change to how
6337
+ // `currentTurn` is sequenced.
6338
+ purgeReactionTracking(statusKey(backstopChatId, backstopThreadId), turn)
6242
6339
  }
6243
6340
  })()
6244
6341
  return
6245
6342
  }
6246
6343
 
6247
6344
  if (ctrl) ctrl.setDone()
6248
- purgeReactionTracking(statusKey(chatId, threadId))
6345
+ // Duplicate-emit removed (#1603 audit, step 4 — the audit's
6346
+ // original "route through endCurrentTurnAtomic" recommendation
6347
+ // missed that this same code path already calls
6348
+ // `endCurrentTurnAtomic(turn)` ~90 lines below at line ~6412
6349
+ // on the same key — `chatId === turn.sessionChatId` and
6350
+ // `threadId === turn.sessionThreadId` per the bindings at
6351
+ // ~5946-5947. Removing this bare call closes the last duplicate
6352
+ // shadow-`turnEnd` emit on the dominant happy-path turn-end
6353
+ // tail; the canonical primitive below still fires the single
6354
+ // authoritative turnEnd with the threaded turn).
6249
6355
  {
6250
6356
  const sKey = streamKey(chatId, threadId)
6251
6357
  const turnDurationMs = turn.startedAt > 0 ? Date.now() - turn.startedAt : 0
@@ -6418,6 +6524,14 @@ function handlePtyActivity(text: string): void {
6418
6524
  historyEnabled: false,
6419
6525
  recordOutbound,
6420
6526
  writeError: (line) => process.stderr.write(line),
6527
+ // PR B note: this is the PTY-activity stream, NOT the LLM
6528
+ // stream_reply path. PTY drives many tiny partials as a TUI
6529
+ // re-renders; 600 ms is a deliberate compromise tuned for the
6530
+ // PTY flicker characteristics, not LLM token cadence. The
6531
+ // transport-aware defaults (300/1000) deliberately do NOT
6532
+ // apply here. If you change this, also check
6533
+ // telegram-plugin/pty-partial-handler.ts:159 which has the
6534
+ // same value for the same reason.
6421
6535
  throttleMs: 600,
6422
6536
  },
6423
6537
  ).catch((err) => {
@@ -514,7 +514,9 @@ export async function handleStreamReply(
514
514
  threadId,
515
515
  parseMode,
516
516
  disableLinkPreview: deps.disableLinkPreview,
517
- throttleMs: deps.throttleMs ?? 600,
517
+ // PR B: pass undefined when caller didn't override, so draft-stream's
518
+ // transport-aware default (300 ms draft / 1000 ms message) wins.
519
+ ...(deps.throttleMs != null ? { throttleMs: deps.throttleMs } : {}),
518
520
  retry: deps.retry,
519
521
  ...(replyToMessageId != null ? { replyToMessageId } : {}),
520
522
  ...(args.quote_text != null && replyToMessageId != null ? { quoteText: args.quote_text } : {}),