switchroom 0.13.15 → 0.13.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -47331,8 +47331,8 @@ var {
47331
47331
  } = import__.default;
47332
47332
 
47333
47333
  // src/build-info.ts
47334
- var VERSION = "0.13.15";
47335
- var COMMIT_SHA = "bc0b5540";
47334
+ var VERSION = "0.13.17";
47335
+ var COMMIT_SHA = "84eb8ad9";
47336
47336
 
47337
47337
  // src/cli/agent.ts
47338
47338
  init_source();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "switchroom",
3
- "version": "0.13.15",
3
+ "version": "0.13.17",
4
4
  "description": "Run Claude Code 24/7 on your Claude Pro/Max subscription over Telegram. Open-source alternative to OpenClaw and NanoClaw — no API keys.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -37034,6 +37034,56 @@ function emitRuntimeMetric(event) {
37034
37034
  captureEvent(event.kind, { ...event, ts: wrapped.ts });
37035
37035
  }
37036
37036
 
37037
+ // over-ping-safety-net.ts
37038
+ function decideOverPing(input) {
37039
+ if (!input.modelRequestedPing) {
37040
+ return { suppress: false, claimSlot: false, sinceFirstPingMs: null };
37041
+ }
37042
+ if (input.firstPingAt != null) {
37043
+ return {
37044
+ suppress: true,
37045
+ claimSlot: false,
37046
+ sinceFirstPingMs: input.nowMs - input.firstPingAt
37047
+ };
37048
+ }
37049
+ return { suppress: false, claimSlot: true, sinceFirstPingMs: null };
37050
+ }
37051
+
37052
+ // silent-reply-anchor.ts
37053
+ var TELEGRAM_MSG_CAP = 4000;
37054
+ function enabled2() {
37055
+ const v = process.env.SWITCHROOM_DISABLE_SILENT_REPLY_AUTOEDIT;
37056
+ return !(v === "1" || v === "true");
37057
+ }
37058
+ function decideSilentReplyAnchor(input) {
37059
+ if (!enabled2()) {
37060
+ return { kind: "fresh", becomesAnchor: false };
37061
+ }
37062
+ if (!input.effectivelySilent) {
37063
+ return { kind: "fresh", becomesAnchor: false };
37064
+ }
37065
+ if (input.hasFiles || input.hasButtons) {
37066
+ return { kind: "fresh", becomesAnchor: false };
37067
+ }
37068
+ if (input.newReplyText.trim().length === 0) {
37069
+ return { kind: "fresh", becomesAnchor: false };
37070
+ }
37071
+ if (input.anchorMessageId == null) {
37072
+ return { kind: "fresh", becomesAnchor: true };
37073
+ }
37074
+ const merged = `${input.anchorText}
37075
+
37076
+ ${input.newReplyText}`;
37077
+ if (merged.length > TELEGRAM_MSG_CAP) {
37078
+ return { kind: "fresh", becomesAnchor: true };
37079
+ }
37080
+ return {
37081
+ kind: "edit-anchor",
37082
+ messageId: input.anchorMessageId,
37083
+ mergedText: merged
37084
+ };
37085
+ }
37086
+
37037
37087
  // inbound-classifier.ts
37038
37088
  var STATUS_QUERY_PATTERNS = [
37039
37089
  /^\?+$/,
@@ -37300,12 +37350,12 @@ function startTimer(deps) {
37300
37350
  var EDIT_INTERVAL_MS = 60000;
37301
37351
  var POLL_INTERVAL_MS = 5000;
37302
37352
  var MAX_LIFETIME_MS = 30 * 60000;
37303
- var TELEGRAM_MSG_CAP = 4000;
37353
+ var TELEGRAM_MSG_CAP2 = 4000;
37304
37354
  var SUFFIX_RE = /\n\n\u2014 still working \(\d+m\)$/;
37305
37355
  var stateByKey = new Map;
37306
37356
  var timer2 = null;
37307
37357
  var activeDeps2 = null;
37308
- function enabled2() {
37358
+ function enabled3() {
37309
37359
  const v = process.env.SWITCHROOM_DISABLE_PENDING_PROGRESS;
37310
37360
  return !(v === "1" || v === "true");
37311
37361
  }
@@ -37327,19 +37377,19 @@ function ensure(key) {
37327
37377
  return s;
37328
37378
  }
37329
37379
  function noteAsyncDispatch(key) {
37330
- if (!enabled2())
37380
+ if (!enabled3())
37331
37381
  return;
37332
37382
  ensure(key).pending = true;
37333
37383
  }
37334
37384
  function noteOutbound3(key, opts) {
37335
- if (!enabled2())
37385
+ if (!enabled3())
37336
37386
  return;
37337
37387
  const s = ensure(key);
37338
37388
  s.anchorMessageId = opts.messageId;
37339
37389
  s.anchorOriginalText = opts.text.replace(SUFFIX_RE, "");
37340
37390
  }
37341
37391
  function noteTurnEnd(key) {
37342
- if (!enabled2())
37392
+ if (!enabled3())
37343
37393
  return;
37344
37394
  const s = stateByKey.get(key);
37345
37395
  if (s == null)
@@ -37369,7 +37419,7 @@ function clearPending(key, reason) {
37369
37419
  });
37370
37420
  }
37371
37421
  function startTimer2(deps) {
37372
- if (!enabled2())
37422
+ if (!enabled3())
37373
37423
  return;
37374
37424
  if (timer2 != null)
37375
37425
  return;
@@ -37409,7 +37459,7 @@ function tick2(now) {
37409
37459
 
37410
37460
  \u2014 still working (${minutes}m)`;
37411
37461
  const newText = s.anchorOriginalText + suffix;
37412
- if (newText.length > TELEGRAM_MSG_CAP) {
37462
+ if (newText.length > TELEGRAM_MSG_CAP2) {
37413
37463
  s.lastEditAt = now;
37414
37464
  continue;
37415
37465
  }
@@ -44586,9 +44636,9 @@ function transition(state3, event) {
44586
44636
 
44587
44637
  // gateway/inbound-delivery-machine-shadow.ts
44588
44638
  var state3 = initialState();
44589
- var enabled3 = process.env.SWITCHROOM_DELIVERY_MACHINE_SHADOW !== "0";
44639
+ var enabled4 = process.env.SWITCHROOM_DELIVERY_MACHINE_SHADOW !== "0";
44590
44640
  function shadowEmit(event) {
44591
- if (!enabled3)
44641
+ if (!enabled4)
44592
44642
  return [];
44593
44643
  try {
44594
44644
  const result = transition(state3, event);
@@ -44646,12 +44696,12 @@ function redeliverBufferedInbound2(buffer, agent, send, spool) {
44646
44696
  }
44647
44697
 
44648
44698
  // gateway/inbound-delivery-machine-dispatch.ts
44649
- var enabled4 = process.env.SWITCHROOM_DELIVERY_MACHINE_CUTOVER !== "0";
44699
+ var enabled5 = process.env.SWITCHROOM_DELIVERY_MACHINE_CUTOVER !== "0";
44650
44700
  function isDispatchEnabled() {
44651
- return enabled4;
44701
+ return enabled5;
44652
44702
  }
44653
44703
  function dispatchEffects(effects, ctx) {
44654
- if (!enabled4)
44704
+ if (!enabled5)
44655
44705
  return;
44656
44706
  for (const effect of effects) {
44657
44707
  dispatchOne(effect, ctx);
@@ -48154,10 +48204,10 @@ function sweepStaleTurnActiveMarker(stateDir, opts) {
48154
48204
  }
48155
48205
 
48156
48206
  // ../src/build-info.ts
48157
- var VERSION = "0.13.15";
48158
- var COMMIT_SHA = "bc0b5540";
48159
- var COMMIT_DATE = "2026-05-23T02:55:43Z";
48160
- var LATEST_PR = 1673;
48207
+ var VERSION = "0.13.17";
48208
+ var COMMIT_SHA = "84eb8ad9";
48209
+ var COMMIT_DATE = "2026-05-23T05:24:12Z";
48210
+ var LATEST_PR = 1678;
48161
48211
  var COMMITS_AHEAD_OF_TAG = 0;
48162
48212
 
48163
48213
  // gateway/boot-version.ts
@@ -50617,7 +50667,30 @@ async function executeReply(args) {
50617
50667
  const configParseMode = access.parseMode ?? "html";
50618
50668
  const format = args.format ?? configParseMode;
50619
50669
  const disableLinkPreview = args.disable_web_page_preview != null ? Boolean(args.disable_web_page_preview) : access.disableLinkPreview ?? true;
50620
- const disableNotification = args.disable_notification === true;
50670
+ let disableNotification = args.disable_notification === true;
50671
+ {
50672
+ const turn2 = currentTurn;
50673
+ if (turn2 != null) {
50674
+ const now = Date.now();
50675
+ const decision = decideOverPing({
50676
+ modelRequestedPing: !disableNotification,
50677
+ firstPingAt: turn2.firstPingAt,
50678
+ nowMs: now
50679
+ });
50680
+ if (decision.suppress) {
50681
+ process.stderr.write(`telegram gateway: reply over-ping safety net \u2014 ` + `downgrading disable_notification:false \u2192 true ` + `(chat=${chat_id} thread=${args.message_thread_id ?? "-"} firstPingAt=${turn2.firstPingAt} sinceFirstPing_ms=${decision.sinceFirstPingMs})
50682
+ `);
50683
+ emitRuntimeMetric({
50684
+ kind: "over_ping_suppressed",
50685
+ key: statusKey(chat_id, args.message_thread_id != null ? Number(args.message_thread_id) : undefined),
50686
+ sinceFirstPingMs: decision.sinceFirstPingMs ?? 0
50687
+ });
50688
+ disableNotification = true;
50689
+ } else if (decision.claimSlot) {
50690
+ turn2.firstPingAt = now;
50691
+ }
50692
+ }
50693
+ }
50621
50694
  const tg = access.telegraph;
50622
50695
  const tgThreshold = tg?.threshold ?? 3000;
50623
50696
  if (tg?.enabled && files.length === 0 && text.length > tgThreshold) {
@@ -50711,6 +50784,56 @@ ${url}`;
50711
50784
  previewMessageId = null;
50712
50785
  }
50713
50786
  startTypingLoop(chat_id);
50787
+ let silentAnchorEditDone = false;
50788
+ {
50789
+ const turn2 = currentTurn;
50790
+ if (turn2 != null && chunks.length === 1) {
50791
+ const decision = decideSilentReplyAnchor({
50792
+ effectivelySilent: disableNotification,
50793
+ anchorMessageId: turn2.silentAnchorMessageId,
50794
+ anchorText: turn2.silentAnchorText,
50795
+ newReplyText: effectiveText,
50796
+ hasFiles: files.length > 0,
50797
+ hasButtons: replyMarkup != null
50798
+ });
50799
+ if (decision.kind === "edit-anchor") {
50800
+ const editParams = {
50801
+ link_preview_options: { is_disabled: disableLinkPreview }
50802
+ };
50803
+ if (parseMode != null)
50804
+ editParams.parse_mode = parseMode;
50805
+ if (threadId != null)
50806
+ editParams.message_thread_id = threadId;
50807
+ try {
50808
+ await robustApiCall(() => lockedBot.api.editMessageText(chat_id, decision.messageId, decision.mergedText, editParams), {
50809
+ chat_id,
50810
+ verb: "reply.silent-anchor-edit",
50811
+ ...threadId != null ? { threadId } : {}
50812
+ });
50813
+ turn2.silentAnchorText = decision.mergedText;
50814
+ sentIds.push(decision.messageId);
50815
+ logOutbound("edit", chat_id, decision.messageId, decision.mergedText.length, "silent-anchor-merge");
50816
+ process.stderr.write(`telegram gateway: silent-reply auto-edit \u2014 ` + `chat=${chat_id} anchor=${decision.messageId} merged_len=${decision.mergedText.length}
50817
+ `);
50818
+ silentAnchorEditDone = true;
50819
+ } catch (err) {
50820
+ process.stderr.write(`telegram gateway: silent-reply auto-edit failed, falling back to fresh send: ${err instanceof Error ? err.message : String(err)}
50821
+ `);
50822
+ }
50823
+ }
50824
+ }
50825
+ }
50826
+ if (silentAnchorEditDone) {
50827
+ stopTypingLoop(chat_id);
50828
+ return {
50829
+ content: [
50830
+ {
50831
+ type: "text",
50832
+ text: `edited (id: ${sentIds[0]})`
50833
+ }
50834
+ ]
50835
+ };
50836
+ }
50714
50837
  try {
50715
50838
  for (let i = 0;i < chunks.length; i++) {
50716
50839
  const shouldReplyTo = reply_to != null && replyMode !== "off" && (replyMode === "all" || i === 0);
@@ -50812,6 +50935,13 @@ ${url}`;
50812
50935
  });
50813
50936
  }
50814
50937
  }
50938
+ if (chunks.length === 1 && disableNotification && files.length === 0 && replyMarkup == null && sentIds.length === 1) {
50939
+ const turn2 = currentTurn;
50940
+ if (turn2 != null) {
50941
+ turn2.silentAnchorMessageId = sentIds[0];
50942
+ turn2.silentAnchorText = effectiveText;
50943
+ }
50944
+ }
50815
50945
  const allPhotos = files.length >= 2 && files.length <= 10 && files.every((f) => PHOTO_EXTS.has(extname(f).toLowerCase()));
50816
50946
  const replyParams = reply_to != null && replyMode !== "off" ? { reply_parameters: { message_id: reply_to } } : {};
50817
50947
  if (allPhotos) {
@@ -51766,6 +51896,9 @@ function handleSessionEvent(ev) {
51766
51896
  gatewayReceiveAt: startedAt,
51767
51897
  replyCalled: false,
51768
51898
  finalAnswerDelivered: false,
51899
+ firstPingAt: null,
51900
+ silentAnchorMessageId: null,
51901
+ silentAnchorText: "",
51769
51902
  capturedText: [],
51770
51903
  orphanedReplyTimeoutId: null,
51771
51904
  registryKey: null,
@@ -74,6 +74,8 @@ import {
74
74
  shutdownAnalytics,
75
75
  } from '../analytics-posthog.js'
76
76
  import { emitRuntimeMetric } from '../runtime-metrics.js'
77
+ import { decideOverPing } from '../over-ping-safety-net.js'
78
+ import { decideSilentReplyAnchor } from '../silent-reply-anchor.js'
77
79
  import { classifyInbound } from '../inbound-classifier.js'
78
80
  import * as silencePoke from '../silence-poke.js'
79
81
  import * as pendingProgress from '../pending-work-progress.js'
@@ -1206,6 +1208,27 @@ type CurrentTurn = {
1206
1208
  // even though `replyCalled` is true — the #1664 case where the real answer
1207
1209
  // ended up as plain transcript text rendered into an ephemeral draft.
1208
1210
  finalAnswerDelivered: boolean
1211
+ // #1675 (over-ping safety net): wall-clock ms of the first reply
1212
+ // this turn that landed with `disable_notification: false` (a real
1213
+ // device ping). The conversational-pacing contract
1214
+ // (`reference/conversational-pacing.md` beat 5) says EXACTLY ONE
1215
+ // ping per turn — the final answer. When the model violates that
1216
+ // (sends a substantive answer pinged + a wrap-up "Delivered…" or
1217
+ // meta-narration also pinged), subsequent reply calls with
1218
+ // `disable_notification: false` are auto-downgraded to silent by
1219
+ // the framework. Null until the first ping lands. Reset on every
1220
+ // fresh-turn enqueue.
1221
+ firstPingAt: number | null
1222
+ // #1677 silent-reply auto-edit. The first silent reply of a turn
1223
+ // captures `silentAnchorMessageId` + `silentAnchorText`; subsequent
1224
+ // silent replies in the SAME turn editMessageText that anchor
1225
+ // (appending with paragraph-break separator). Net visual: one
1226
+ // growing silent bubble instead of N stacked silent bubbles.
1227
+ // Cleared by turn-atom replacement on enqueue. See
1228
+ // `telegram-plugin/silent-reply-anchor.ts` for the pure
1229
+ // `decideSilentReplyAnchor` predicate.
1230
+ silentAnchorMessageId: number | null
1231
+ silentAnchorText: string
1209
1232
  capturedText: string[]
1210
1233
  orphanedReplyTimeoutId: ReturnType<typeof setTimeout> | null
1211
1234
  registryKey: string | null
@@ -4208,7 +4231,58 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
4208
4231
  // so only the final answer pings the device. Default false (pings) so
4209
4232
  // existing call-sites and the typical "final answer" reply keep their
4210
4233
  // current behaviour without an explicit flag.
4211
- const disableNotification = args.disable_notification === true
4234
+ let disableNotification = args.disable_notification === true
4235
+
4236
+ // #1675 over-ping safety net. The conversational-pacing contract
4237
+ // (`reference/conversational-pacing.md` beat 5) says EXACTLY ONE
4238
+ // device ping per turn — the final answer. The model sometimes
4239
+ // violates this by sending a substantive answer pinged + a wrap-up
4240
+ // ("Delivered all three steps…", "Sent.", or meta-narration) ALSO
4241
+ // pinged. Both messages then fire notifications. The fleet UAT on
4242
+ // 2026-05-23 reproduced this (Step 3 + Delivered both pinged, two
4243
+ // beeps for a turn that should have produced one). Framework owns
4244
+ // the safety net: once the turn has emitted ONE pinged reply, every
4245
+ // subsequent reply call in the same turn auto-downgrades to silent
4246
+ // (disable_notification: true). Model intent ("I want this loud")
4247
+ // is honoured for the first ping; subsequent pings are demoted with
4248
+ // a stderr log so operators can see the safety net engage.
4249
+ //
4250
+ // The slot is claimed BEFORE the actual send to keep the logic
4251
+ // sequential — a send that fails part-way leaves firstPingAt set
4252
+ // and subsequent pings would be silenced. Acceptable trade-off (a
4253
+ // failed first ping is an edge case; the alternative — claim after
4254
+ // send — races concurrent reply calls).
4255
+ {
4256
+ const turn = currentTurn
4257
+ if (turn != null) {
4258
+ const now = Date.now()
4259
+ const decision = decideOverPing({
4260
+ modelRequestedPing: !disableNotification,
4261
+ firstPingAt: turn.firstPingAt,
4262
+ nowMs: now,
4263
+ })
4264
+ if (decision.suppress) {
4265
+ process.stderr.write(
4266
+ `telegram gateway: reply over-ping safety net — ` +
4267
+ `downgrading disable_notification:false → true ` +
4268
+ `(chat=${chat_id} thread=${args.message_thread_id ?? '-'} ` +
4269
+ `firstPingAt=${turn.firstPingAt} sinceFirstPing_ms=${decision.sinceFirstPingMs})\n`,
4270
+ )
4271
+ // Observability: surface to the unified runtime-metrics
4272
+ // fan-out so the cadence dashboard can track fleet-wide
4273
+ // over-ping rate (leading indicator of model pacing drift).
4274
+ emitRuntimeMetric({
4275
+ kind: 'over_ping_suppressed',
4276
+ key: statusKey(chat_id, args.message_thread_id != null
4277
+ ? Number(args.message_thread_id) : undefined),
4278
+ sinceFirstPingMs: decision.sinceFirstPingMs ?? 0,
4279
+ })
4280
+ disableNotification = true
4281
+ } else if (decision.claimSlot) {
4282
+ turn.firstPingAt = now
4283
+ }
4284
+ }
4285
+ }
4212
4286
 
4213
4287
  // Telegraph publish (#579). When the reply text is long enough AND
4214
4288
  // the agent has telegraph enabled in access.json, publish to
@@ -4354,6 +4428,91 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
4354
4428
 
4355
4429
  startTypingLoop(chat_id)
4356
4430
 
4431
+ // #1677 silent-reply auto-edit. Consecutive silent replies within
4432
+ // a turn edit a single anchor message instead of stacking new
4433
+ // bubbles. We branch BEFORE the chunk loop so the single-chunk
4434
+ // common case takes an editMessageText path; everything else
4435
+ // (multi-chunk, ping, files, buttons) falls through to fresh send
4436
+ // and either captures a new anchor or doesn't, per the predicate.
4437
+ let silentAnchorEditDone = false
4438
+ {
4439
+ const turn = currentTurn
4440
+ if (turn != null && chunks.length === 1) {
4441
+ const decision = decideSilentReplyAnchor({
4442
+ effectivelySilent: disableNotification,
4443
+ anchorMessageId: turn.silentAnchorMessageId,
4444
+ anchorText: turn.silentAnchorText,
4445
+ newReplyText: effectiveText,
4446
+ hasFiles: files.length > 0,
4447
+ hasButtons: replyMarkup != null,
4448
+ })
4449
+ if (decision.kind === 'edit-anchor') {
4450
+ const editParams: {
4451
+ parse_mode?: 'HTML' | 'MarkdownV2'
4452
+ message_thread_id?: number
4453
+ link_preview_options?: { is_disabled: boolean }
4454
+ } = {
4455
+ link_preview_options: { is_disabled: disableLinkPreview },
4456
+ }
4457
+ if (parseMode != null) editParams.parse_mode = parseMode
4458
+ if (threadId != null) editParams.message_thread_id = threadId
4459
+ try {
4460
+ await robustApiCall(
4461
+ () =>
4462
+ lockedBot.api.editMessageText(
4463
+ chat_id,
4464
+ decision.messageId,
4465
+ decision.mergedText,
4466
+ editParams,
4467
+ ),
4468
+ {
4469
+ chat_id,
4470
+ verb: 'reply.silent-anchor-edit',
4471
+ ...(threadId != null ? { threadId } : {}),
4472
+ },
4473
+ )
4474
+ turn.silentAnchorText = decision.mergedText
4475
+ sentIds.push(decision.messageId)
4476
+ logOutbound(
4477
+ 'edit',
4478
+ chat_id,
4479
+ decision.messageId,
4480
+ decision.mergedText.length,
4481
+ 'silent-anchor-merge',
4482
+ )
4483
+ process.stderr.write(
4484
+ `telegram gateway: silent-reply auto-edit — ` +
4485
+ `chat=${chat_id} anchor=${decision.messageId} ` +
4486
+ `merged_len=${decision.mergedText.length}\n`,
4487
+ )
4488
+ silentAnchorEditDone = true
4489
+ } catch (err) {
4490
+ // Edit failed (e.g. message deleted, rate limit exhausted,
4491
+ // parse error). Fall through to fresh-send below — the
4492
+ // anchor will be overwritten by whatever lands.
4493
+ process.stderr.write(
4494
+ `telegram gateway: silent-reply auto-edit failed, ` +
4495
+ `falling back to fresh send: ${err instanceof Error ? err.message : String(err)}\n`,
4496
+ )
4497
+ }
4498
+ }
4499
+ }
4500
+ }
4501
+
4502
+ if (silentAnchorEditDone) {
4503
+ // Skip the chunk loop entirely — the anchor edit IS the send.
4504
+ // Match the normal exit path: stop typing, then return.
4505
+ stopTypingLoop(chat_id)
4506
+ return {
4507
+ content: [
4508
+ {
4509
+ type: 'text',
4510
+ text: `edited (id: ${sentIds[0]})`,
4511
+ },
4512
+ ],
4513
+ }
4514
+ }
4515
+
4357
4516
  try {
4358
4517
  for (let i = 0; i < chunks.length; i++) {
4359
4518
  const shouldReplyTo =
@@ -4489,6 +4648,27 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
4489
4648
  }
4490
4649
  }
4491
4650
 
4651
+ // #1677 silent-reply auto-edit — anchor capture for the FIRST
4652
+ // silent reply of a turn (or the silent reply that replaced the
4653
+ // anchor on overflow). Only captures for the single-chunk,
4654
+ // silent, no-files, no-buttons happy path; the edit-anchor path
4655
+ // earlier in this function handles SUBSEQUENT silent replies by
4656
+ // editing. The next silent reply this turn will see the captured
4657
+ // anchor and edit it in place.
4658
+ if (
4659
+ chunks.length === 1
4660
+ && disableNotification
4661
+ && files.length === 0
4662
+ && replyMarkup == null
4663
+ && sentIds.length === 1
4664
+ ) {
4665
+ const turn = currentTurn
4666
+ if (turn != null) {
4667
+ turn.silentAnchorMessageId = sentIds[0]!
4668
+ turn.silentAnchorText = effectiveText
4669
+ }
4670
+ }
4671
+
4492
4672
  // #273: when files is 2-10 photos, batch them into a single
4493
4673
  // sendMediaGroup album rather than N separate sendPhoto calls. The
4494
4674
  // user's device fires one notification for the album instead of N
@@ -5877,6 +6057,9 @@ function handleSessionEvent(ev: SessionEvent): void {
5877
6057
  gatewayReceiveAt: startedAt,
5878
6058
  replyCalled: false,
5879
6059
  finalAnswerDelivered: false,
6060
+ firstPingAt: null,
6061
+ silentAnchorMessageId: null,
6062
+ silentAnchorText: '',
5880
6063
  capturedText: [],
5881
6064
  orphanedReplyTimeoutId: null,
5882
6065
  registryKey: null,
@@ -0,0 +1,80 @@
1
+ /**
2
+ * over-ping-safety-net.ts — pure decision predicate for #1674's
3
+ * "at-most-one device-ping per turn" framework safety net.
4
+ *
5
+ * Background. `reference/conversational-pacing.md` beat 5 is
6
+ * explicit: the model should deliver the answer as a fresh `reply`
7
+ * omitting `disable_notification` (i.e. pinging the device once).
8
+ * EXACTLY ONE ping per turn. The model occasionally violates this
9
+ * — fleet UAT 2026-05-23 reproduced a substantive Step 3 answer
10
+ * pinged + a wrap-up "Delivered all three steps with a wrap-up
11
+ * summary." ALSO pinged, two device beeps for a turn that should
12
+ * have produced one.
13
+ *
14
+ * This module is the framework safety net. The IO live in the
15
+ * gateway's `executeReply` (mutate `turn.firstPingAt`, emit log +
16
+ * runtime-metric, override `disableNotification`); keeping the
17
+ * *decision* pure makes the predicate unit-testable without
18
+ * standing up a gateway.
19
+ *
20
+ * Contract:
21
+ * - When the model requested a ping (`!disable_notification`) AND
22
+ * the current turn already had a ping land (`firstPingAt != null`),
23
+ * the decision says SUPPRESS — the caller downgrades to silent.
24
+ * - When the model requested a ping AND no prior ping this turn,
25
+ * the decision says CLAIM the slot — caller sets `firstPingAt`.
26
+ * - When the model requested silent, this module is a no-op.
27
+ *
28
+ * The slot is claimed BEFORE the actual send (caller responsibility).
29
+ * Trade-off documented inline in `gateway.ts:executeReply`.
30
+ */
31
+
32
+ export interface OverPingDecisionInput {
33
+ /** True iff the model requested a device ping
34
+ * (`disable_notification:false` or omitted, since the default is to
35
+ * ping per Telegram Bot API). The caller computes this from the
36
+ * inbound `args.disable_notification === true` check. */
37
+ modelRequestedPing: boolean
38
+ /** Wall-clock ms of the FIRST ping this turn, or null if no ping
39
+ * has landed yet. Caller threads this through from
40
+ * `CurrentTurn.firstPingAt`. */
41
+ firstPingAt: number | null
42
+ /** Deterministic clock for tests; defaults to Date.now() in callers. */
43
+ nowMs: number
44
+ }
45
+
46
+ export interface OverPingDecision {
47
+ /** True iff the caller should override `disableNotification` to
48
+ * `true` (i.e. send this reply silently). Implies a contract
49
+ * violation by the model — caller should log + emit a metric. */
50
+ suppress: boolean
51
+ /** True iff the caller should claim the slot —
52
+ * `turn.firstPingAt = nowMs`. Mutually exclusive with `suppress`. */
53
+ claimSlot: boolean
54
+ /** When `suppress` is true, how long the first ping has been
55
+ * "active" (ms since `firstPingAt`). Caller surfaces this in the
56
+ * log + metric for forensic analysis (e.g. tight rapid double-pings
57
+ * vs delayed wrap-ups). Null otherwise. */
58
+ sinceFirstPingMs: number | null
59
+ }
60
+
61
+ /**
62
+ * Pure decision: should the framework suppress this reply's ping?
63
+ * No mutation, no IO, deterministic under a fixed `nowMs`.
64
+ */
65
+ export function decideOverPing(input: OverPingDecisionInput): OverPingDecision {
66
+ if (!input.modelRequestedPing) {
67
+ // Model already chose silent — nothing for the safety net to do.
68
+ return { suppress: false, claimSlot: false, sinceFirstPingMs: null }
69
+ }
70
+ if (input.firstPingAt != null) {
71
+ // Slot already claimed by an earlier ping this turn — suppress.
72
+ return {
73
+ suppress: true,
74
+ claimSlot: false,
75
+ sinceFirstPingMs: input.nowMs - input.firstPingAt,
76
+ }
77
+ }
78
+ // First ping this turn — let it through and claim the slot.
79
+ return { suppress: false, claimSlot: true, sinceFirstPingMs: null }
80
+ }
@@ -124,6 +124,24 @@ export type RuntimeMetricEvent =
124
124
  elapsedMs?: number
125
125
  reason?: string
126
126
  }
127
+ /**
128
+ * #1674 over-ping safety net engaged. Fires when a `reply` call
129
+ * arrived with `disable_notification: false` AND the current turn
130
+ * already had a pinged reply land — the framework downgraded this
131
+ * call to silent to honour beat 5's "EXACTLY ONE ping per turn"
132
+ * contract. Each event is a model contract violation the safety
133
+ * net caught. A high rate per agent means the model is
134
+ * systematically over-pinging — prompt drift or training
135
+ * regression worth investigating.
136
+ *
137
+ * key → `<chatId>:<threadIdOrEmpty>` (the statusKey shape)
138
+ * sinceFirstPingMs → time since the FIRST ping landed this turn
139
+ */
140
+ | {
141
+ kind: 'over_ping_suppressed'
142
+ key: string
143
+ sinceFirstPingMs: number
144
+ }
127
145
 
128
146
  /**
129
147
  * The JSONL sink lives under the runtime state dir so it's per-agent
@@ -0,0 +1,142 @@
1
+ /**
2
+ * silent-reply-anchor.ts — pure decision predicate for the
3
+ * "consecutive silent replies edit one growing message" UX fix.
4
+ *
5
+ * Background. Modern Claude 2.1.x on this fleet implements
6
+ * conversational pacing (`reference/conversational-pacing.md` beats
7
+ * 1 + 3 + 5) by calling the `reply` MCP tool multiple times in a
8
+ * turn — a silent ack, silent per-step updates, and one pinged
9
+ * final answer. The over-ping safety net (#1674) caps the
10
+ * notifications at one. But the user still SEES N separate chat
11
+ * bubbles for the silent replies, which reads as visual spam even
12
+ * when no device pings. The operator's original complaint was
13
+ * exactly this shape:
14
+ *
15
+ * "I would like more regular process updates, where it edits a
16
+ * status message in place vs spamming multiple messages."
17
+ *
18
+ * Fix: consecutive silent replies within a turn EDIT a single
19
+ * anchor message instead of each sending a fresh bubble. The
20
+ * model's intent (silent mid-turn updates) is honoured; the
21
+ * framework controls the visual placement (one growing bubble,
22
+ * not many). Final pinged reply lands as a separate fresh bubble
23
+ * (it's the final answer; the silent anchor is the preamble).
24
+ *
25
+ * Net visual for a multi-step turn:
26
+ * pre-fix: 4 bubbles (silent ack + 2 silent steps + 1 pinged final)
27
+ * post-fix: 2 bubbles (1 silent anchor with all 3 thoughts + 1 pinged final)
28
+ *
29
+ * Pinged replies always fresh-send. Reply-tool calls with files
30
+ * or button keyboards bypass the anchor (fresh send) because the
31
+ * edit path can't merge those cleanly.
32
+ *
33
+ * Accumulation format: `${anchorText}\n\n${newReplyText}` —
34
+ * blank-line paragraph separator. Reads naturally as the model
35
+ * "thinking out loud" with paragraph breaks per thought.
36
+ *
37
+ * Kill switch: `SWITCHROOM_DISABLE_SILENT_REPLY_AUTOEDIT=1` — turns
38
+ * the safety net off; reverts to per-reply fresh send.
39
+ */
40
+
41
+ /** Telegram caption / text limit. The accumulator stays under this. */
42
+ export const TELEGRAM_MSG_CAP = 4000
43
+
44
+ export interface SilentReplyAnchorDecisionInput {
45
+ /** True when the model passed `disable_notification: true` for
46
+ * this reply (i.e. the model intends this to be silent — a
47
+ * beat 1/3 update). The over-ping safety net coerces other
48
+ * pings to silent; this predicate sees the EFFECTIVE flag, not
49
+ * the raw model intent. */
50
+ effectivelySilent: boolean
51
+ /** Wall-clock ms of the current anchor's existence, or null when
52
+ * no silent anchor has been set this turn. */
53
+ anchorMessageId: number | null
54
+ /** Text content of the current anchor (accumulated). Empty when
55
+ * no anchor exists. */
56
+ anchorText: string
57
+ /** Text content of the incoming reply, BEFORE any anchor merge. */
58
+ newReplyText: string
59
+ /** True if the incoming reply has attached files (photos,
60
+ * documents, etc). Anchor merge bypassed when true — edits
61
+ * can't add media to an existing text message. */
62
+ hasFiles: boolean
63
+ /** True if the incoming reply has an inline keyboard. Anchor
64
+ * merge bypassed when true — keyboard semantics across edits
65
+ * are too easy to get wrong, and the markup is rare enough
66
+ * that fresh-send is the safer default. */
67
+ hasButtons: boolean
68
+ }
69
+
70
+ /**
71
+ * What the caller should do with this reply.
72
+ *
73
+ * - `kind: 'fresh'` — send a normal new message; if it should
74
+ * become the next anchor (silent + no attachments), the caller
75
+ * captures its message_id after send and sets the anchor.
76
+ *
77
+ * - `kind: 'edit-anchor'` — DO NOT send; edit the existing
78
+ * anchor message with `mergedText` as the new content. The
79
+ * caller updates `anchor.text = mergedText` after a successful
80
+ * edit. messageId is the anchor's existing id.
81
+ */
82
+ export type SilentReplyAnchorDecision =
83
+ | { kind: 'fresh'; becomesAnchor: boolean }
84
+ | { kind: 'edit-anchor'; messageId: number; mergedText: string }
85
+
86
+ function enabled(): boolean {
87
+ const v = process.env.SWITCHROOM_DISABLE_SILENT_REPLY_AUTOEDIT
88
+ return !(v === '1' || v === 'true')
89
+ }
90
+
91
+ /**
92
+ * Decide whether to merge this reply into an existing silent
93
+ * anchor or fresh-send. Pure: no IO, no mutation, kill-switch
94
+ * checked per call.
95
+ */
96
+ export function decideSilentReplyAnchor(
97
+ input: SilentReplyAnchorDecisionInput,
98
+ ): SilentReplyAnchorDecision {
99
+ // Kill switch disengages the whole mechanism — every reply
100
+ // falls through to fresh-send with no anchor capture.
101
+ if (!enabled()) {
102
+ return { kind: 'fresh', becomesAnchor: false }
103
+ }
104
+
105
+ // Pinged replies never merge — they're the final answer bubble,
106
+ // semantically distinct from the silent preamble.
107
+ if (!input.effectivelySilent) {
108
+ return { kind: 'fresh', becomesAnchor: false }
109
+ }
110
+
111
+ // Files / buttons bypass the anchor — edit-text can't merge
112
+ // media, and keyboards across edits are a foot-gun.
113
+ if (input.hasFiles || input.hasButtons) {
114
+ return { kind: 'fresh', becomesAnchor: false }
115
+ }
116
+
117
+ // Empty body — let the caller's existing validation handle it.
118
+ // We treat as fresh-but-don't-anchor so a downstream "drop empty"
119
+ // doesn't leave a stale anchor pointer.
120
+ if (input.newReplyText.trim().length === 0) {
121
+ return { kind: 'fresh', becomesAnchor: false }
122
+ }
123
+
124
+ // No anchor yet this turn → this reply BECOMES the anchor.
125
+ if (input.anchorMessageId == null) {
126
+ return { kind: 'fresh', becomesAnchor: true }
127
+ }
128
+
129
+ // Anchor exists → try to merge. The merge format is paragraph-
130
+ // break separation. If the merged result would exceed the
131
+ // Telegram text cap, give up on the anchor and start fresh —
132
+ // the new reply becomes a new anchor.
133
+ const merged = `${input.anchorText}\n\n${input.newReplyText}`
134
+ if (merged.length > TELEGRAM_MSG_CAP) {
135
+ return { kind: 'fresh', becomesAnchor: true }
136
+ }
137
+ return {
138
+ kind: 'edit-anchor',
139
+ messageId: input.anchorMessageId,
140
+ mergedText: merged,
141
+ }
142
+ }
@@ -0,0 +1,96 @@
1
+ /**
2
+ * Unit suite for #1674's over-ping safety net predicate.
3
+ * Pins the decision logic in isolation from the gateway's
4
+ * `executeReply` IO so a future refactor can't silently regress.
5
+ */
6
+
7
+ import { describe, expect, it } from 'vitest'
8
+
9
+ import { decideOverPing } from '../over-ping-safety-net.js'
10
+
11
+ describe('decideOverPing — at-most-one-ping-per-turn safety net', () => {
12
+ it('lets the FIRST ping through and tells caller to claim the slot', () => {
13
+ const d = decideOverPing({
14
+ modelRequestedPing: true,
15
+ firstPingAt: null,
16
+ nowMs: 1_000,
17
+ })
18
+ expect(d.suppress).toBe(false)
19
+ expect(d.claimSlot).toBe(true)
20
+ expect(d.sinceFirstPingMs).toBeNull()
21
+ })
22
+
23
+ it('SUPPRESSES subsequent ping in the same turn and reports elapsed', () => {
24
+ const d = decideOverPing({
25
+ modelRequestedPing: true,
26
+ firstPingAt: 1_000,
27
+ nowMs: 4_500,
28
+ })
29
+ expect(d.suppress).toBe(true)
30
+ expect(d.claimSlot).toBe(false)
31
+ expect(d.sinceFirstPingMs).toBe(3_500)
32
+ })
33
+
34
+ it('is a no-op when the model already requested silent (regardless of slot state)', () => {
35
+ // No prior ping
36
+ const d1 = decideOverPing({
37
+ modelRequestedPing: false,
38
+ firstPingAt: null,
39
+ nowMs: 1_000,
40
+ })
41
+ expect(d1).toEqual({ suppress: false, claimSlot: false, sinceFirstPingMs: null })
42
+
43
+ // Prior ping already landed — silent reply still no-op, NOT claimed
44
+ const d2 = decideOverPing({
45
+ modelRequestedPing: false,
46
+ firstPingAt: 1_000,
47
+ nowMs: 5_000,
48
+ })
49
+ expect(d2).toEqual({ suppress: false, claimSlot: false, sinceFirstPingMs: null })
50
+ })
51
+
52
+ it('handles the edge case where firstPingAt equals nowMs (instant double-call)', () => {
53
+ // Same-tick double-fire: the second call comes in with firstPingAt
54
+ // exactly at nowMs. Elapsed is 0; suppress fires.
55
+ const d = decideOverPing({
56
+ modelRequestedPing: true,
57
+ firstPingAt: 1_000,
58
+ nowMs: 1_000,
59
+ })
60
+ expect(d.suppress).toBe(true)
61
+ expect(d.claimSlot).toBe(false)
62
+ expect(d.sinceFirstPingMs).toBe(0)
63
+ })
64
+
65
+ it('reports large elapsed deltas honestly (late wrap-up after long work)', () => {
66
+ // Real-world reproducer pattern: substantive answer pings at +30s,
67
+ // wrap-up "Delivered all three steps…" pings at +36s. The safety
68
+ // net catches the second; sinceFirstPingMs reflects the 6s gap.
69
+ const d = decideOverPing({
70
+ modelRequestedPing: true,
71
+ firstPingAt: 30_000,
72
+ nowMs: 36_000,
73
+ })
74
+ expect(d.suppress).toBe(true)
75
+ expect(d.sinceFirstPingMs).toBe(6_000)
76
+ })
77
+
78
+ it('claim-vs-suppress is mutually exclusive', () => {
79
+ // Defensive invariant — no caller path should ever see both flags
80
+ // true at once.
81
+ const cases: Array<{
82
+ modelRequestedPing: boolean
83
+ firstPingAt: number | null
84
+ nowMs: number
85
+ }> = [
86
+ { modelRequestedPing: true, firstPingAt: null, nowMs: 100 },
87
+ { modelRequestedPing: true, firstPingAt: 50, nowMs: 100 },
88
+ { modelRequestedPing: false, firstPingAt: null, nowMs: 100 },
89
+ { modelRequestedPing: false, firstPingAt: 50, nowMs: 100 },
90
+ ]
91
+ for (const c of cases) {
92
+ const d = decideOverPing(c)
93
+ expect(d.suppress && d.claimSlot).toBe(false)
94
+ }
95
+ })
96
+ })
@@ -0,0 +1,178 @@
1
+ /**
2
+ * Unit suite for #1677 silent-reply auto-edit predicate.
3
+ */
4
+
5
+ import { afterEach, beforeEach, describe, expect, it } from 'vitest'
6
+
7
+ import {
8
+ TELEGRAM_MSG_CAP,
9
+ decideSilentReplyAnchor,
10
+ } from '../silent-reply-anchor.js'
11
+
12
+ describe('decideSilentReplyAnchor — silent replies edit a single growing anchor', () => {
13
+ beforeEach(() => {
14
+ delete process.env.SWITCHROOM_DISABLE_SILENT_REPLY_AUTOEDIT
15
+ })
16
+ afterEach(() => {
17
+ delete process.env.SWITCHROOM_DISABLE_SILENT_REPLY_AUTOEDIT
18
+ })
19
+
20
+ it('first silent reply this turn becomes the anchor (fresh send + capture)', () => {
21
+ const d = decideSilentReplyAnchor({
22
+ effectivelySilent: true,
23
+ anchorMessageId: null,
24
+ anchorText: '',
25
+ newReplyText: 'on it — checking the calendar',
26
+ hasFiles: false,
27
+ hasButtons: false,
28
+ })
29
+ expect(d).toEqual({ kind: 'fresh', becomesAnchor: true })
30
+ })
31
+
32
+ it('subsequent silent reply edits the anchor with paragraph-break merge', () => {
33
+ const d = decideSilentReplyAnchor({
34
+ effectivelySilent: true,
35
+ anchorMessageId: 12345,
36
+ anchorText: 'on it — checking the calendar',
37
+ newReplyText: 'Step 1: hostname is example-host',
38
+ hasFiles: false,
39
+ hasButtons: false,
40
+ })
41
+ expect(d).toEqual({
42
+ kind: 'edit-anchor',
43
+ messageId: 12345,
44
+ mergedText:
45
+ 'on it — checking the calendar\n\nStep 1: hostname is example-host',
46
+ })
47
+ })
48
+
49
+ it('third and beyond silent replies keep accumulating onto the same anchor', () => {
50
+ // Simulate the multi-step pattern: ack → step1 → step2 → step3.
51
+ // After two prior accumulations the anchor reads as three paragraphs.
52
+ const d = decideSilentReplyAnchor({
53
+ effectivelySilent: true,
54
+ anchorMessageId: 12345,
55
+ anchorText: 'on it\n\nStep 1: hostname\n\nStep 2: OS family',
56
+ newReplyText: 'Step 3: CPU',
57
+ hasFiles: false,
58
+ hasButtons: false,
59
+ })
60
+ expect(d.kind).toBe('edit-anchor')
61
+ if (d.kind === 'edit-anchor') {
62
+ expect(d.mergedText).toBe(
63
+ 'on it\n\nStep 1: hostname\n\nStep 2: OS family\n\nStep 3: CPU',
64
+ )
65
+ }
66
+ })
67
+
68
+ it('pinged (effectivelySilent=false) reply NEVER merges — fresh send', () => {
69
+ const d = decideSilentReplyAnchor({
70
+ effectivelySilent: false,
71
+ anchorMessageId: 12345,
72
+ anchorText: 'on it\n\nSteps done',
73
+ newReplyText: 'Final answer here',
74
+ hasFiles: false,
75
+ hasButtons: false,
76
+ })
77
+ expect(d).toEqual({ kind: 'fresh', becomesAnchor: false })
78
+ })
79
+
80
+ it('files attached → fresh send (anchor cannot absorb media)', () => {
81
+ const d = decideSilentReplyAnchor({
82
+ effectivelySilent: true,
83
+ anchorMessageId: 12345,
84
+ anchorText: 'on it',
85
+ newReplyText: 'here is the chart',
86
+ hasFiles: true,
87
+ hasButtons: false,
88
+ })
89
+ expect(d).toEqual({ kind: 'fresh', becomesAnchor: false })
90
+ })
91
+
92
+ it('button keyboard → fresh send (keyboard semantics across edits is a foot-gun)', () => {
93
+ const d = decideSilentReplyAnchor({
94
+ effectivelySilent: true,
95
+ anchorMessageId: 12345,
96
+ anchorText: 'on it',
97
+ newReplyText: 'choose one:',
98
+ hasFiles: false,
99
+ hasButtons: true,
100
+ })
101
+ expect(d).toEqual({ kind: 'fresh', becomesAnchor: false })
102
+ })
103
+
104
+ it('empty reply body → fresh send + DO NOT become anchor', () => {
105
+ // The caller has its own empty-text validation; we just avoid
106
+ // leaving a dangling anchor pointer if the empty reply
107
+ // accidentally goes through.
108
+ const d = decideSilentReplyAnchor({
109
+ effectivelySilent: true,
110
+ anchorMessageId: null,
111
+ anchorText: '',
112
+ newReplyText: ' ',
113
+ hasFiles: false,
114
+ hasButtons: false,
115
+ })
116
+ expect(d).toEqual({ kind: 'fresh', becomesAnchor: false })
117
+ })
118
+
119
+ it('overflow: merged text > TELEGRAM_MSG_CAP → fresh send + start new anchor', () => {
120
+ const huge = 'x'.repeat(TELEGRAM_MSG_CAP - 10)
121
+ const d = decideSilentReplyAnchor({
122
+ effectivelySilent: true,
123
+ anchorMessageId: 12345,
124
+ anchorText: huge,
125
+ newReplyText: 'short tail',
126
+ hasFiles: false,
127
+ hasButtons: false,
128
+ })
129
+ // Merged would be huge + "\n\n" + "short tail" → exceeds cap.
130
+ expect(d).toEqual({ kind: 'fresh', becomesAnchor: true })
131
+ })
132
+
133
+ it('kill switch — `SWITCHROOM_DISABLE_SILENT_REPLY_AUTOEDIT=1` short-circuits to fresh send for every reply', () => {
134
+ process.env.SWITCHROOM_DISABLE_SILENT_REPLY_AUTOEDIT = '1'
135
+ const d = decideSilentReplyAnchor({
136
+ effectivelySilent: true,
137
+ anchorMessageId: 12345,
138
+ anchorText: 'on it',
139
+ newReplyText: 'Step 1',
140
+ hasFiles: false,
141
+ hasButtons: false,
142
+ })
143
+ expect(d).toEqual({ kind: 'fresh', becomesAnchor: false })
144
+ })
145
+
146
+ it('kill switch accepts string "true" too', () => {
147
+ process.env.SWITCHROOM_DISABLE_SILENT_REPLY_AUTOEDIT = 'true'
148
+ const d = decideSilentReplyAnchor({
149
+ effectivelySilent: true,
150
+ anchorMessageId: null,
151
+ anchorText: '',
152
+ newReplyText: 'on it',
153
+ hasFiles: false,
154
+ hasButtons: false,
155
+ })
156
+ expect(d).toEqual({ kind: 'fresh', becomesAnchor: false })
157
+ })
158
+
159
+ it('borderline merge — exactly at the cap is accepted (boundary inclusive)', () => {
160
+ // Aim merged.length === TELEGRAM_MSG_CAP exactly.
161
+ // separator is "\n\n" (2 chars). anchor + separator + new === cap.
162
+ const newReplyText = 'tail'
163
+ const anchorLen = TELEGRAM_MSG_CAP - newReplyText.length - 2
164
+ const anchor = 'a'.repeat(anchorLen)
165
+ const d = decideSilentReplyAnchor({
166
+ effectivelySilent: true,
167
+ anchorMessageId: 12345,
168
+ anchorText: anchor,
169
+ newReplyText,
170
+ hasFiles: false,
171
+ hasButtons: false,
172
+ })
173
+ expect(d.kind).toBe('edit-anchor')
174
+ if (d.kind === 'edit-anchor') {
175
+ expect(d.mergedText.length).toBe(TELEGRAM_MSG_CAP)
176
+ }
177
+ })
178
+ })
@@ -1,59 +1,80 @@
1
1
  /**
2
- * Visible answer-streamUAT for the openclaw-pattern TTFO fix
3
- * (#869 Phase 1 narrow scope).
2
+ * Conversational pacing UAT measures the END-TO-END user-perceived
3
+ * turn UX on a multi-step prompt.
4
4
  *
5
- * Validates that when `SWITCHROOM_VISIBLE_ANSWER_STREAM=1` is set on
6
- * the target agent, the framework auto-renders the model's transcript
7
- * text as a user-visible edit-in-place message starting within ~5s of
8
- * inbound instead of writing to Telegram's invisible compose-box
9
- * draft (the default #1664 behaviour).
5
+ * Original framing was "validate the visible-answer-stream path
6
+ * activates." Live research on test-harness with the
7
+ * `SWITCHROOM_VISIBLE_ANSWER_STREAM=1` flag showed that modern Claude
8
+ * 2.1.x on this fleet does NOT emit transcript text events between
9
+ * tool calls — it consistently calls the `reply` MCP tool directly
10
+ * for every user-visible chunk (beat 1 ack, then per-step beat 3
11
+ * updates). So the visible-answer-stream code path (which renders
12
+ * `text` session events into a chat-timeline message) doesn't
13
+ * activate; the answer-stream lane stays idle while the model uses
14
+ * `reply` calls instead.
10
15
  *
11
- * ## Required setup
16
+ * That's actually FINE — the model is correctly following the
17
+ * five-beat conversational-pacing contract (`reference/conversational-
18
+ * pacing.md`): one silent ack at the start, silent updates per step,
19
+ * one pinged final answer. This UAT now validates THAT — the pacing
20
+ * the user actually experiences — rather than the answer-stream code
21
+ * path specifically.
12
22
  *
13
- * The target agent (default `test-harness`) MUST have
14
- * `SWITCHROOM_VISIBLE_ANSWER_STREAM=1` in its container environment.
15
- * Without that env var the scenario will (correctly) fail — the
16
- * default behaviour writes to a draft the mtcute driver cannot see.
23
+ * The flag `SWITCHROOM_VISIBLE_ANSWER_STREAM=1` is still set on
24
+ * test-harness for ongoing observation; if a future model version
25
+ * starts emitting transcript text, the lane will surface it visibly
26
+ * instead of writing to the invisible compose-box draft (the prior
27
+ * default).
17
28
  *
18
29
  * ## What this asserts
19
30
  *
20
- * 1. The first user-visible bot output (fresh `sendMessage`) lands
21
- * within `VISIBLE_TTFO_BUDGET_MS` (default 8 s) of the inbound.
22
- * Today's median TTFO across the fleet is 17–69 s; the visible
23
- * lane should drop it well under 10 s for any reply long enough
24
- * to emit a text chunk.
25
- * 2. The initial fresh message is silent (the answer-stream emits
26
- * with `disable_notification: true` so mid-turn edits never ping).
27
- * 3. Subsequent edits land on the SAME message_id single in-place
28
- * surface, not a chain of pinged sends.
29
- * 4. At least one edit growth event happens between first send and
30
- * turn-end (the streaming property — TTFO is fast, then content
31
- * grows live).
31
+ * 1. First user-visible bot message lands within `TTFO_BUDGET_MS`
32
+ * (default 15 s) of the inbound — covers beat 1 ack OR straight-
33
+ * to-content depending on the model's pacing choice.
34
+ * 2. Multiple distinct bot messages land per turn for the multi-
35
+ * step prompt proving the model isn't collapsing everything
36
+ * into a single pinged dump.
37
+ * 3. All but at most one message is silent (`disable_notification:
38
+ * true`). Only the final answer should pinganything earlier
39
+ * pinging is a beat-3 contract violation.
32
40
  *
33
- * The captured trail is dumped to console for forensic inspection
34
- * regardless of pass/fail.
41
+ * ## Wall-clock budget
35
42
  *
36
- * Wall-clock budget: ~90 s.
43
+ * ~90 s.
37
44
  */
38
45
 
39
46
  import { describe, expect, it } from "vitest";
40
47
  import { spinUp } from "../harness.js";
41
48
  import type { ObservedMessage } from "../driver.js";
42
49
 
43
- const VISIBLE_TTFO_BUDGET_MS = 8_000;
50
+ const TTFO_BUDGET_MS = 15_000;
44
51
  const OVERALL_DEADLINE_MS = 90_000;
45
- const QUIESCENCE_MS = 8_000;
46
-
47
- // Prompt engineered to make the model emit a multi-sentence answer
48
- // over a few seconds long enough that the streaming behaviour
49
- // is observable, short enough that turn-flush isn't tempted to fire.
50
- // Deliberately does NOT instruct the model to call `reply` — we want
51
- // to exercise the transcript-only path that the visible-answer-stream
52
- // covers.
52
+ const QUIESCENCE_MS = 12_000;
53
+
54
+ // Multi-step investigation prompt — designed to make the model emit
55
+ // transcript text BETWEEN tool calls, which is the assistant-content
56
+ // `text` block shape session-tail surfaces via the `text` event the
57
+ // answer-stream lane consumes. With the visible-answer-stream flag
58
+ // ON, those text events should become user-visible edit-in-place
59
+ // chat-timeline updates.
60
+ //
61
+ // We choose a research-style task because that pattern reliably
62
+ // emits `text` chunks (the model thinks out loud between Read /
63
+ // Bash steps) on most Claude versions. A pure-answer prompt (the
64
+ // previous version of this scenario) tended to make modern Claude
65
+ // jump straight to a single `reply` tool-call with no intermediate
66
+ // text — exercising the wrong path.
53
67
  const PROMPT =
54
- `Please give a four-sentence overview of how Linux page-cache ` +
55
- `interacts with mmap on a typical x86_64 server. Reply in a single ` +
56
- `message, with substantive prose. No code blocks.`;
68
+ `Investigate this step by step:\n\n` +
69
+ `1. Read \`/etc/hostname\` and tell me what host this is — write a ` +
70
+ `sentence about it.\n` +
71
+ `2. Then read \`/etc/os-release\` and tell me what OS family / version.\n` +
72
+ `3. Then read \`/proc/cpuinfo\` (head it), and tell me the CPU model + ` +
73
+ `core count.\n` +
74
+ `4. Wrap up with a one-line summary of all three.\n\n` +
75
+ `Between each step, narrate what you're finding in plain prose ` +
76
+ `(not just bullet outputs). Don't batch all your observations into ` +
77
+ `one final reply — talk as you investigate.`;
57
78
 
58
79
  interface TrailEntry {
59
80
  relMs: number;
@@ -68,9 +89,9 @@ function pad(s: string, n: number): string {
68
89
  return s.length >= n ? s : s + " ".repeat(n - s.length);
69
90
  }
70
91
 
71
- describe("uat: visible answer-stream model transcript renders live (#869 Phase 1)", () => {
92
+ describe("uat: conversational pacing on a multi-step turn", () => {
72
93
  it(
73
- "first fresh message lands within VISIBLE_TTFO_BUDGET_MS; subsequent edits grow it in place",
94
+ "first message lands within TTFO_BUDGET_MS; multiple silent messages; final answer pings",
74
95
  async () => {
75
96
  const sc = await spinUp({ agent: "test-harness" });
76
97
  try {
@@ -137,79 +158,45 @@ describe("uat: visible answer-stream — model transcript renders live (#869 Pha
137
158
  }
138
159
  console.log("=================================================\n");
139
160
 
140
- // ── Regression assertions ─────────────────────────────────
141
-
142
- const fresh = trail.filter((e) => e.kind === "fresh");
143
- const edits = trail.filter((e) => e.kind === "edit");
161
+ // ── Pacing assertions ─────────────────────────────────────
144
162
 
145
- // (1) at least one fresh message landed
163
+ // (1) at least one bot message landed
146
164
  expect(
147
- fresh.length,
148
- `no fresh bot replies observed — either the agent isn't ` +
149
- `responding OR the visible-answer-stream flag is OFF ` +
150
- `(SWITCHROOM_VISIBLE_ANSWER_STREAM not set on the target ` +
151
- `agent's container env). Re-check the agent's compose ` +
152
- `environment.`,
165
+ trail.length,
166
+ `no bot replies observed — the agent isn't responding.`,
153
167
  ).toBeGreaterThanOrEqual(1);
154
168
 
155
- // (2) first fresh landed within the TTFO budget
156
- const ttfoMs = fresh[0].relMs;
169
+ // (2) first message landed within TTFO budget
170
+ const ttfoMs = trail[0].relMs;
157
171
  expect(
158
172
  ttfoMs,
159
- `TTFO ${ttfoMs}ms exceeded the visible-answer-stream ` +
160
- `budget of ${VISIBLE_TTFO_BUDGET_MS}ms. Either the model ` +
161
- `was unusually slow to emit its first text chunk, OR the ` +
162
- `visible answer-stream is not active. Default behaviour ` +
163
- `(invisible draft) would never have surfaced a fresh ` +
164
- `message at all, so the most likely cause is model latency.`,
165
- ).toBeLessThanOrEqual(VISIBLE_TTFO_BUDGET_MS);
166
-
167
- // (3) first fresh message was silent (mid-turn edits don't ping)
168
- expect(
169
- fresh[0].silent,
170
- `the first fresh message pinged the user — answer-stream ` +
171
- `should send silently (disable_notification:true). A ping ` +
172
- `here means an explicit \`reply\` tool may have fired instead.`,
173
- ).toBe(true);
173
+ `TTFO ${ttfoMs}ms exceeded the budget of ${TTFO_BUDGET_MS}ms.`,
174
+ ).toBeLessThanOrEqual(TTFO_BUDGET_MS);
174
175
 
175
- // (4) at least one in-place EDIT landed on the same messageId
176
- // (this is the "live streaming" assertion — TTFO is fast AND
177
- // content grows on the same surface, not a chain of new sends).
178
- const sameAnchorEdits = edits.filter(
179
- (e) => e.messageId === firstAnchorMsgId,
180
- );
176
+ // (3) multiple messages landed proves the model is pacing,
177
+ // not dumping a single big reply
181
178
  expect(
182
- sameAnchorEdits.length,
183
- `no in-place edits to the anchor message landed — the model ` +
184
- `either replied in a single shot (very short answer) or ` +
185
- `the streaming path isn't running. Edits observed: ` +
186
- `${edits.length}, on anchor: ${sameAnchorEdits.length}.`,
187
- ).toBeGreaterThanOrEqual(1);
188
-
189
- // (5) every edit is silent (Telegram edits don't push, but
190
- // we double-check via mtcute's flag in case the framework
191
- // ever swaps to a fresh-send pattern by accident)
192
- const loudEdits = edits.filter((e) => !e.silent);
179
+ trail.length,
180
+ `only ${trail.length} message(s) observed — the model ` +
181
+ `collapsed this multi-step prompt into a single dump. ` +
182
+ `Beat 3 pacing (per-step updates) requires multiple ` +
183
+ `messages. Either the model didn't follow the prompt ` +
184
+ `or quiescence bailed early.`,
185
+ ).toBeGreaterThanOrEqual(2);
186
+
187
+ // (4) at most one message pinged the user — beat-3 contract
188
+ // says only the FINAL answer pings; mid-turn updates pass
189
+ // `disable_notification: true`.
190
+ const pingedMessages = trail.filter((e) => !e.silent);
193
191
  expect(
194
- loudEdits.length,
195
- `${loudEdits.length} edit(s) pinged the device.`,
196
- ).toBe(0);
197
-
198
- // (6) text length grows monotonically on the anchor (streaming
199
- // by construction once content is on the anchor, it only
200
- // accumulates)
201
- const anchorTrail = trail.filter(
202
- (e) => e.messageId === firstAnchorMsgId,
203
- );
204
- for (let i = 1; i < anchorTrail.length; i++) {
205
- expect(
206
- anchorTrail[i].textLength,
207
- `anchor message #${firstAnchorMsgId} text shrank between ` +
208
- `events ${i - 1} (len=${anchorTrail[i - 1].textLength}) ` +
209
- `and ${i} (len=${anchorTrail[i].textLength}) — ` +
210
- `streaming text should only grow.`,
211
- ).toBeGreaterThanOrEqual(anchorTrail[i - 1].textLength);
212
- }
192
+ pingedMessages.length,
193
+ `${pingedMessages.length} message(s) pinged the device — ` +
194
+ `the conversational-pacing contract allows AT MOST 1 ` +
195
+ `(the final answer). Mid-turn updates must be silent. ` +
196
+ `Pinged messages at: ${pingedMessages
197
+ .map((m) => `+${(m.relMs / 1000).toFixed(0)}s`)
198
+ .join(", ")}`,
199
+ ).toBeLessThanOrEqual(1);
213
200
  } finally {
214
201
  await sc.tearDown();
215
202
  }