switchroom 0.13.13 → 0.13.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -76,6 +76,7 @@ import {
76
76
  import { emitRuntimeMetric } from '../runtime-metrics.js'
77
77
  import { classifyInbound } from '../inbound-classifier.js'
78
78
  import * as silencePoke from '../silence-poke.js'
79
+ import * as pendingProgress from '../pending-work-progress.js'
79
80
  import { writeSilentEndState, clearSilentEndState, recordUndeliveredTurnEnd } from '../silent-end.js'
80
81
  import { isFinalAnswerReply } from '../final-answer-detect.js'
81
82
  import { createAnswerStream, type AnswerStreamHandle } from '../answer-stream.js'
@@ -2854,6 +2855,42 @@ const STREAM_THROTTLE_MS_OVERRIDE: number | undefined = (() => {
2854
2855
  return Number.isFinite(n) && n >= 0 ? n : undefined
2855
2856
  })()
2856
2857
  const TURN_FLUSH_SAFETY_ENABLED = isTurnFlushSafetyEnabled()
2858
+
2859
+ // #869-Phase1 / openclaw-pattern. When SET, the answer-lane stream
2860
+ // (telegram-plugin/answer-stream.ts) renders the model's transcript
2861
+ // text as a USER-VISIBLE edit-in-place message instead of writing to
2862
+ // Telegram's invisible compose-box draft (which is the default and
2863
+ // supports the #1664 "retract + re-prompt" contract). With this flag
2864
+ // on:
2865
+ // 1. createAnswerStream is instantiated without `sendMessageDraft`,
2866
+ // so it falls back to `sendMessage` + `editMessageText` for a
2867
+ // real chat-timeline message (`answer-stream.ts:212-214`).
2868
+ // 2. minInitialChars is set to 1 — the first text chunk pushes a
2869
+ // visible message immediately (TTFO under 5s for short turns).
2870
+ // 3. At turn_end, if the model never called reply / stream_reply
2871
+ // AND the streamed message has substantive captured text, the
2872
+ // gateway DOES NOT retract (which would delete a user-visible
2873
+ // message the user has been reading live); it calls
2874
+ // `stream.stop()` to freeze the current text as the final
2875
+ // answer, records the message in dedup + history, and marks
2876
+ // `turn.finalAnswerDelivered = true` so the #1664 silent-end
2877
+ // re-prompt does not fire. Turn-flush is suppressed for this
2878
+ // branch — its job (deliver captured text) is structurally
2879
+ // already done by the visible stream.
2880
+ // 4. The reply-tool / stream_reply path is unchanged — when the
2881
+ // model uses an explicit reply tool the prior streamed message
2882
+ // is retracted (delete) and the reply takes over as before.
2883
+ // Trade-off: a stream-as-final-answer turn does NOT push a device
2884
+ // notification (Telegram does not notify on edits, and we choose
2885
+ // not to send a duplicate fresh message for the ping). For short
2886
+ // turns where the user is actively watching, this is the right
2887
+ // shape — they see the answer materialise live. For longer waits,
2888
+ // the cross-turn pending-progress system (#1445/#1669) is the
2889
+ // canonical surface and DOES ping at the appropriate boundaries.
2890
+ // Default OFF; flip per-agent via env to canary the new behaviour.
2891
+ const ANSWER_STREAM_VISIBLE_ENABLED =
2892
+ process.env.SWITCHROOM_VISIBLE_ANSWER_STREAM === '1'
2893
+ || process.env.SWITCHROOM_VISIBLE_ANSWER_STREAM === 'true'
2857
2894
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
2858
2895
  const progressDriver: any = null
2859
2896
  const unpinProgressCardForChat: ((chatId: string, threadId: number | undefined) => void) | null = null
@@ -3149,6 +3186,7 @@ silencePoke.startTimer({
3149
3186
  // Drop silence-poke state and clear turn-active so the next inbound
3150
3187
  // for this chat starts a fresh turn instead of queueing forever.
3151
3188
  silencePoke.endTurn(fbKey)
3189
+ pendingProgress.noteTurnEnd(fbKey)
3152
3190
  purgeReactionTracking(fbKey)
3153
3191
  // Defense-in-depth: the fallback's purgeReactionTracking above
3154
3192
  // clears the canonical statusKey(chatId, threadId) for fbKey
@@ -3206,6 +3244,34 @@ silencePoke.startTimer({
3206
3244
  },
3207
3245
  })
3208
3246
 
3247
+ // #1445 cross-turn pending-async ambient. When a turn ends after the
3248
+ // model dispatched background async work (Agent / Task / Bash run-in-
3249
+ // background) and the model has stopped speaking, keep editing the
3250
+ // model's last reply in place at 60s intervals so the user sees
3251
+ // ambient liveness during the wait. Edits are silent, never spawn a
3252
+ // new pinged message, and stop the moment the user re-engages or the
3253
+ // model synthesises a handback. The full design rationale lives in
3254
+ // `pending-work-progress.ts`'s header docblock. Kill switch:
3255
+ // `SWITCHROOM_DISABLE_PENDING_PROGRESS=1`.
3256
+ pendingProgress.startTimer({
3257
+ editMessage: async (ctx) => {
3258
+ await swallowingApiCall(
3259
+ () =>
3260
+ lockedBot.api.editMessageText(
3261
+ ctx.chatId,
3262
+ ctx.messageId,
3263
+ ctx.newText,
3264
+ ),
3265
+ {
3266
+ chat_id: ctx.chatId,
3267
+ verb: 'pending-progress-edit',
3268
+ ...(ctx.threadId != null ? { threadId: ctx.threadId } : {}),
3269
+ },
3270
+ )
3271
+ },
3272
+ emitMetric: (event) => emitRuntimeMetric(event),
3273
+ })
3274
+
3209
3275
  // Per-agent buffer for synthetic inbounds the gateway couldn't deliver
3210
3276
  // because the bridge wasn't connected at send-time. Drained on
3211
3277
  // bridge-register so a fresh client picks up missed wake-ups before
@@ -3578,6 +3644,22 @@ const ipcServer: IpcServer = createIpcServer({
3578
3644
  label.length > 0 ? label : null,
3579
3645
  Date.now(),
3580
3646
  )
3647
+ // #1445 cross-turn pending-async ambient. Mark the chat as
3648
+ // having dispatched background work this turn so a turn_end
3649
+ // that follows activates the edit-in-place ambient line.
3650
+ // Covers `Agent` / `Task` (the harness-managed async path
3651
+ // — handback channel turn clears it) and `Bash` with
3652
+ // run_in_background:true (model is expected to poll
3653
+ // BashOutput; the ambient ticks until next inbound or the
3654
+ // 30-min budget cap).
3655
+ const evInput = ev.input as { run_in_background?: boolean } | undefined
3656
+ if (
3657
+ ev.toolName === 'Agent'
3658
+ || ev.toolName === 'Task'
3659
+ || (ev.toolName === 'Bash' && evInput?.run_in_background === true)
3660
+ ) {
3661
+ pendingProgress.noteAsyncDispatch(key)
3662
+ }
3581
3663
  }
3582
3664
  } else if (ev.kind === 'tool_result') {
3583
3665
  // #1292: drain the in-flight entry. Idempotent on unknown ids
@@ -4391,6 +4473,22 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
4391
4473
  }
4392
4474
  }
4393
4475
 
4476
+ // #1445 cross-turn pending-async ambient. Capture the last text
4477
+ // chunk as the anchor — if this turn ends with a pending async
4478
+ // dispatch, the framework edits THIS message in place every 60s
4479
+ // with a `— still working (Nm)` suffix until the user re-engages.
4480
+ // Multi-chunk replies: anchor is the LAST chunk (edits append to
4481
+ // the visually-trailing message; earlier chunks are left intact).
4482
+ if (sentIds.length === chunks.length && chunks.length > 0) {
4483
+ const anchorMsgId = sentIds[chunks.length - 1]
4484
+ if (typeof anchorMsgId === 'number') {
4485
+ pendingProgress.noteOutbound(statusKey(chat_id, threadId), {
4486
+ messageId: anchorMsgId,
4487
+ text: chunks[chunks.length - 1],
4488
+ })
4489
+ }
4490
+ }
4491
+
4394
4492
  // #273: when files is 2-10 photos, batch them into a single
4395
4493
  // sendMediaGroup album rather than N separate sendPhoto calls. The
4396
4494
  // user's device fires one notification for the album instead of N
@@ -4715,6 +4813,15 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
4715
4813
  const sChatId = args.chat_id as string
4716
4814
  const sThreadId = args.message_thread_id != null ? Number(args.message_thread_id) : undefined
4717
4815
  outboundDedup.record(sChatId, sThreadId, args.text as string, Date.now())
4816
+ // #1445 cross-turn pending-async ambient. The terminal stream_reply
4817
+ // (done=true) is the user-visible anchor for any cross-turn wait
4818
+ // that follows. Capture it so if this turn ends with a pending
4819
+ // async dispatch, the framework edits THIS message in place at
4820
+ // intervals.
4821
+ pendingProgress.noteOutbound(statusKey(sChatId, sThreadId), {
4822
+ messageId: result.messageId,
4823
+ text: args.text as string,
4824
+ })
4718
4825
  }
4719
4826
  // #1664 — mark the turn's final answer as delivered. For stream_reply a
4720
4827
  // call with done=true IS the final answer by definition (the model
@@ -5728,6 +5835,25 @@ function handleSessionEvent(ev: SessionEvent): void {
5728
5835
  // Drain any orphaned typing-wrap entries left over from a crashed
5729
5836
  // prior turn before resetting focus.
5730
5837
  typingWrapper.drainAll()
5838
+ if (ev.chatId) {
5839
+ // #1445 cross-turn pending-async ambient — backstop for the
5840
+ // `handleInbound` path's `clearPending('inbound')`. The
5841
+ // inbound path covers real user messages, but synthesised
5842
+ // wakes (subagent-handback channel turn, cron fires, vault
5843
+ // grant resumes, restart markers) push directly to
5844
+ // `pendingInboundBuffer` and bypass `handleInbound`. The
5845
+ // `enqueue` session-event fires for EVERY fresh turn atom
5846
+ // regardless of source — clearing here drops any prior turn's
5847
+ // ambient before the new turn's `noteOutbound` lands. The
5848
+ // call is idempotent so it's safe to fire in addition to the
5849
+ // inbound-path clear (for the real-inbound case, this is a
5850
+ // no-op because state was already deleted by then).
5851
+ const enqThreadId = ev.threadId != null ? Number(ev.threadId) : undefined
5852
+ pendingProgress.clearPending(
5853
+ statusKey(ev.chatId, enqThreadId),
5854
+ 'handback',
5855
+ )
5856
+ }
5731
5857
  if (ev.chatId) {
5732
5858
  // Issue #195: if a previous turn left an answer-lane stream open
5733
5859
  // (rapid steer/queue), force it to a new generation so its in-flight
@@ -5896,7 +6022,13 @@ function handleSessionEvent(ev: SessionEvent): void {
5896
6022
  chatId: turn.sessionChatId,
5897
6023
  isPrivateChat: turn.isDm,
5898
6024
  threadId: turn.sessionThreadId,
5899
- sendMessageDraft: sendMessageDraftFn,
6025
+ // #869-Phase1 visible-answer-stream: omit the draft API so
6026
+ // the lane uses the real sendMessage / editMessageText path
6027
+ // and edits a user-visible chat-timeline message instead
6028
+ // of the invisible compose-box draft.
6029
+ ...(ANSWER_STREAM_VISIBLE_ENABLED
6030
+ ? { minInitialChars: 1 }
6031
+ : { sendMessageDraft: sendMessageDraftFn }),
5900
6032
  // #1075: route through robustApiCall so flood-wait,
5901
6033
  // benign-400, and THREAD_NOT_FOUND are handled uniformly
5902
6034
  // instead of crashing the answer-stream loop on a deleted
@@ -6045,6 +6177,7 @@ function handleSessionEvent(ev: SessionEvent): void {
6045
6177
  // full message above). Match the pattern used at the regular
6046
6178
  // turn-end path (line ~5039) and the wedged-turn path (~5290).
6047
6179
  silencePoke.endTurn(ceKey)
6180
+ pendingProgress.noteTurnEnd(ceKey)
6048
6181
  // Issue #195: tear down the answer-lane stream on context-exhaustion
6049
6182
  // bail-out. The user is being told the session needs /restart, so any
6050
6183
  // partially-streamed answer would be misleading.
@@ -6098,20 +6231,71 @@ function handleSessionEvent(ev: SessionEvent): void {
6098
6231
  // (regression for short no-tool replies). Order matters here: this
6099
6232
  // call must come before the retract/null block.
6100
6233
  preambleSuppressor.flushNow()
6101
- // #656: always retract the answer-lane stream at turn_end. Turn-flush
6102
- // (gateway.ts ~3475) is the sole canonical emitter for no-reply turns —
6103
- // it runs markdownToHtml and records to outboundDedup. Materializing
6104
- // here would race turn-flush and post raw model text (no HTML conv).
6234
+ // #656: by default we ALWAYS retract the answer-lane stream at
6235
+ // turn_end. Turn-flush is the canonical emitter for no-reply
6236
+ // turns; materialising here would race it and post raw model
6237
+ // text (no HTML conv).
6238
+ //
6239
+ // #869-Phase1 override: when `ANSWER_STREAM_VISIBLE_ENABLED` is
6240
+ // on, the stream is rendering a USER-VISIBLE message in the
6241
+ // chat timeline. Retracting (delete) destroys content the user
6242
+ // has been reading live — the worst possible UX flicker. So
6243
+ // when the stream is the de-facto final answer (model never
6244
+ // called reply, captured text is substantive) we instead call
6245
+ // `stream.stop()` to freeze it as the final state, record the
6246
+ // outbound for history + dedup, mark the turn answered, and
6247
+ // suppress the turn-flush IIFE downstream.
6248
+ let streamFinalizedAsAnswer = false
6105
6249
  if (turn?.answerStream != null) {
6106
6250
  const stream = turn.answerStream
6107
- turn.answerStream = null
6108
- void stream.retract().catch((err) => {
6251
+ const streamedMsgId = stream.messageId()
6252
+ const streamedFinalText = turn.capturedText.join('').trim()
6253
+ if (
6254
+ ANSWER_STREAM_VISIBLE_ENABLED
6255
+ && !turn.replyCalled
6256
+ && streamedMsgId != null
6257
+ && streamedFinalText.length > 0
6258
+ ) {
6259
+ turn.answerStream = null
6260
+ stream.stop()
6261
+ streamFinalizedAsAnswer = true
6262
+ turn.finalAnswerDelivered = true
6263
+ // Record as canonical outbound so retries dedup against it
6264
+ // and the SQLite history can surface it. Mirrors the
6265
+ // hooks turn-flush + reply both run.
6266
+ try {
6267
+ outboundDedup.record(
6268
+ turn.sessionChatId,
6269
+ turn.sessionThreadId,
6270
+ streamedFinalText,
6271
+ Date.now(),
6272
+ )
6273
+ } catch { /* best-effort */ }
6274
+ if (HISTORY_ENABLED) {
6275
+ try {
6276
+ recordOutbound({
6277
+ chat_id: turn.sessionChatId,
6278
+ thread_id: turn.sessionThreadId ?? null,
6279
+ message_ids: [streamedMsgId],
6280
+ texts: [streamedFinalText],
6281
+ })
6282
+ } catch { /* best-effort */ }
6283
+ }
6109
6284
  process.stderr.write(
6110
- `telegram gateway: answer-stream retract failed: ${
6111
- err instanceof Error ? err.message : String(err)
6112
- }\n`,
6285
+ `telegram gateway: answer-stream finalized as answer ` +
6286
+ `chat=${turn.sessionChatId} msg=${streamedMsgId} ` +
6287
+ `chars=${streamedFinalText.length}\n`,
6113
6288
  )
6114
- })
6289
+ } else {
6290
+ turn.answerStream = null
6291
+ void stream.retract().catch((err) => {
6292
+ process.stderr.write(
6293
+ `telegram gateway: answer-stream retract failed: ${
6294
+ err instanceof Error ? err.message : String(err)
6295
+ }\n`,
6296
+ )
6297
+ })
6298
+ }
6115
6299
  }
6116
6300
  if (turn == null) return
6117
6301
  const chatId = turn.sessionChatId
@@ -6123,12 +6307,19 @@ function handleSessionEvent(ev: SessionEvent): void {
6123
6307
  // surface to recover from. The decideTurnFlush 'empty-text'
6124
6308
  // path now relies on capturedText alone.
6125
6309
 
6126
- const flushDecision = decideTurnFlush({
6127
- chatId: turn.sessionChatId,
6128
- replyCalled: turn.replyCalled,
6129
- capturedText: turn.capturedText,
6130
- flushEnabled: TURN_FLUSH_SAFETY_ENABLED,
6131
- })
6310
+ // #869-Phase1: when the answer-stream finalised as the answer
6311
+ // above, skip the turn-flush IIFE entirely — its job (deliver
6312
+ // captured text) is already done by the visible stream, and
6313
+ // running it would race a duplicate fresh-sendMessage against
6314
+ // the user-visible edited message.
6315
+ const flushDecision = streamFinalizedAsAnswer
6316
+ ? ({ kind: 'skip', reason: 'reply-called' } as ReturnType<typeof decideTurnFlush>)
6317
+ : decideTurnFlush({
6318
+ chatId: turn.sessionChatId,
6319
+ replyCalled: turn.replyCalled,
6320
+ capturedText: turn.capturedText,
6321
+ flushEnabled: TURN_FLUSH_SAFETY_ENABLED,
6322
+ })
6132
6323
  if (flushDecision.kind === 'skip' && flushDecision.reason !== 'reply-called') {
6133
6324
  process.stderr.write(
6134
6325
  `telegram gateway: turn-flush skipped — reason=${flushDecision.reason}\n`,
@@ -6230,6 +6421,7 @@ function handleSessionEvent(ev: SessionEvent): void {
6230
6421
  try { removeTurnActiveMarker(STATE_DIR) } catch { /* best-effort */ }
6231
6422
  signalTracker.clear(tKey)
6232
6423
  silencePoke.endTurn(tKey)
6424
+ pendingProgress.noteTurnEnd(tKey)
6233
6425
  }
6234
6426
  lastPtyPreviewByChat.delete(statusKey(chatId, threadId))
6235
6427
  pendingPtyPartial = null
@@ -6304,6 +6496,7 @@ function handleSessionEvent(ev: SessionEvent): void {
6304
6496
  const tKey = statusKey(chatId, threadId)
6305
6497
  signalTracker.clear(tKey)
6306
6498
  silencePoke.endTurn(tKey)
6499
+ pendingProgress.noteTurnEnd(tKey)
6307
6500
  }
6308
6501
 
6309
6502
  void (async () => {
@@ -6550,6 +6743,7 @@ function handleSessionEvent(ev: SessionEvent): void {
6550
6743
  }
6551
6744
  signalTracker.clear(tKey)
6552
6745
  silencePoke.endTurn(tKey)
6746
+ pendingProgress.noteTurnEnd(tKey)
6553
6747
  }
6554
6748
  lastPtyPreviewByChat.delete(statusKey(chatId, threadId))
6555
6749
  pendingPtyPartial = null
@@ -7772,6 +7966,18 @@ async function handleInbound(
7772
7966
  // the framework can nudge the model if it goes quiet past the
7773
7967
  // soft / firm thresholds.
7774
7968
  silencePoke.startTurn(statusKey(chat_id, messageThreadId), Date.now())
7969
+ // #1445 cross-turn pending-async ambient. A new turn starting
7970
+ // (user inbound, synthesised wake, or handback channel) is the
7971
+ // signal that the model is about to re-engage — clear any
7972
+ // pending-progress edits anchored to the *prior* turn's
7973
+ // outbound so the framework stops talking over the new turn.
7974
+ // clearPending drops the per-key state outright, so the new
7975
+ // turn's `tool_use(Agent|Task|Bash bg)` + outbound capture
7976
+ // afresh via `noteAsyncDispatch` / `noteOutbound`.
7977
+ pendingProgress.clearPending(
7978
+ statusKey(chat_id, messageThreadId),
7979
+ 'inbound',
7980
+ )
7775
7981
  // Human-feel UX: hold a continuous `typing…` indicator for the
7776
7982
  // WHOLE turn, not just the split-second a reply is transmitted.
7777
7983
  // A person you message shows as typing the entire time they