switchroom 0.14.57 → 0.14.59

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -94,7 +94,7 @@ import { classifyInbound } from '../inbound-classifier.js'
94
94
  import * as silencePoke from '../silence-poke.js'
95
95
  import * as pendingProgress from '../pending-work-progress.js'
96
96
  import { writeSilentEndState, clearSilentEndState, recordUndeliveredTurnEnd } from '../silent-end.js'
97
- import { isFinalAnswerReply } from '../final-answer-detect.js'
97
+ import { isFinalAnswerReply, isSubstantiveFinalReply } from '../final-answer-detect.js'
98
98
  import { createAnswerStream, type AnswerStreamHandle } from '../answer-stream.js'
99
99
  import { parseVisibleAnswerStreamEnabled } from '../answer-stream-flag.js'
100
100
  import { type SessionEvent } from '../session-tail.js'
@@ -278,10 +278,16 @@ import { handleRequestDriveApproval } from './drive-write-approval.js'
278
278
  import { handleRequestMs365Approval } from './ms365-write-approval.js'
279
279
  import { buildDiffPreviewCard } from './diff-preview-card.js'
280
280
  import { createPendingInboundBuffer, redeliverBufferedInbound, idleDrainTick } from './pending-inbound-buffer.js'
281
+ import {
282
+ ObligationLedger,
283
+ buildObligationRepresentInbound,
284
+ obligationEscalationText,
285
+ } from './obligation-ledger.js'
281
286
  import { createInboundSpool } from './inbound-spool.js'
282
287
  import { purgeStaleTurnsForChat } from './turn-state-purge.js'
283
288
  import { decideInboundDelivery } from './inbound-delivery-gate.js'
284
289
  import { mayDrainBufferedInbound, shouldArmNoReplyDrain } from './serialize-drain-gate.js'
290
+ import { decideFeedReopen } from './feed-reopen-gate.js'
285
291
  import { resolveAnswerThreadId } from './answer-thread-resolve.js'
286
292
  import {
287
293
  createDeliveryQueue,
@@ -1379,6 +1385,20 @@ const DELIVERY_CONFIRM_TIMEOUT_MS =
1379
1385
  const DELIVERY_CONFIRM_SWEEP_MS = 5_000
1380
1386
  const deliveryQueue = createDeliveryQueue<InboundMessage>()
1381
1387
 
1388
+ // ─── Deterministic delivery-obligation ledger (systems-analysis PR2) ──────────
1389
+ // An inbound is an OBLIGATION (keyed origin_turn_id) that is OPEN at receipt and
1390
+ // CLOSED only by an observable substantive reply resolving to that origin —
1391
+ // never the model's words. An open obligation that survives a turn boundary is
1392
+ // re-presented (bounded) until it closes, so a message the model read but never
1393
+ // answered (the marko 715 drop) cannot be silently lost. ADDITIVE + flagged: it
1394
+ // runs ALONGSIDE the existing acks/spool/buffer (PR3 retires the redundant
1395
+ // pieces). Default OFF — the canary turns it on (713/715 interleave UAT) before
1396
+ // any fleet activation. When off, every hook below is a no-op → zero change.
1397
+ const OBLIGATION_LEDGER_ENABLED = process.env.SWITCHROOM_OBLIGATION_LEDGER === '1'
1398
+ const OBLIGATION_REPRESENT_MAX = 2
1399
+ const OBLIGATION_SWEEP_MS = 5_000
1400
+ const obligationLedger = new ObligationLedger(OBLIGATION_REPRESENT_MAX)
1401
+
1382
1402
  // ─── Serialize-until-replied (multitopic reply-routing) ───────────────────
1383
1403
  // Component 1 (deliver-before-drain gate). A buffered cross-topic inbound
1384
1404
  // drains ONLY after the just-ended turn delivered its reply to its own
@@ -1419,6 +1439,35 @@ const TOPIC_FRAMING_ENABLED =
1419
1439
  // → no placeholder (the 👀 ack reaction still fires). Delete-on-answer.
1420
1440
  const QUEUED_STATUS_UX_ENABLED =
1421
1441
  process.env.SWITCHROOM_QUEUED_STATUS_UX !== '0'
1442
+ // Feed-reopen-after-ack. When a tool label arrives for a turn already
1443
+ // marked finalAnswerDelivered, the model is still WORKING — so the earlier
1444
+ // "final" reply was an interim ACK (an ack-first reply pings or runs ≥200
1445
+ // chars, both of which isFinalAnswerReply classifies as final). Re-open the
1446
+ // live activity feed for the post-ack work instead of dropping the label.
1447
+ // Kill switch off (=0) → legacy behaviour: the label is dropped and the
1448
+ // post-ack feed stays dark. See `feed-reopen-gate.ts` for the rationale and
1449
+ // the finalAnswerDelivered-consumer interactions.
1450
+ const FEED_REOPEN_AFTER_ACK_ENABLED =
1451
+ process.env.SWITCHROOM_FEED_REOPEN_AFTER_ACK !== '0'
1452
+
1453
+ // Activity-feed heartbeat (PR1). The feed is pull-only — it only re-renders on
1454
+ // a tool_label event, so a long single step that emits no new label leaves the
1455
+ // feed frozen on "→ doing X" for tens of seconds (the marko ~26s freeze). The
1456
+ // heartbeat re-renders the live feed every FEED_HEARTBEAT_TICK_MS with a
1457
+ // climbing " · Ns" elapsed on the in-progress line, but only once the current
1458
+ // step has run >= FEED_HEARTBEAT_MIN_STALE_MS (so a normally-advancing feed is
1459
+ // untouched). Kill switch: SWITCHROOM_FEED_HEARTBEAT=0. Default on.
1460
+ const FEED_HEARTBEAT_ENABLED = process.env.SWITCHROOM_FEED_HEARTBEAT !== '0'
1461
+ const FEED_HEARTBEAT_TICK_MS = 6_000
1462
+ const FEED_HEARTBEAT_MIN_STALE_MS = 6_000
1463
+
1464
+ /** Compact mm/ss-ish elapsed for the live feed suffix: "18s", "1m05s". */
1465
+ function formatFeedElapsed(ms: number): string {
1466
+ const s = Math.floor(ms / 1000)
1467
+ if (s < 60) return `${s}s`
1468
+ const m = Math.floor(s / 60)
1469
+ return `${m}m${(s % 60).toString().padStart(2, '0')}s`
1470
+ }
1422
1471
 
1423
1472
  /**
1424
1473
  * Authoritative "is a turn in flight?" for every gate that previously
@@ -1552,6 +1601,20 @@ type CurrentTurn = {
1552
1601
  // even though `replyCalled` is true — the #1664 case where the real answer
1553
1602
  // ended up as plain transcript text rendered into an ephemeral draft.
1554
1603
  finalAnswerDelivered: boolean
1604
+ // Feed-reopen-after-ack refinement — whether the reply that set
1605
+ // `finalAnswerDelivered` was a *substantive* final answer (stream
1606
+ // `done`, or ≥200 chars) as opposed to a short pinging interim ACK.
1607
+ // Set via `isSubstantiveFinalReply` at every site that sets
1608
+ // `finalAnswerDelivered = true`. The tool_label handler re-opens the
1609
+ // live activity feed ONLY when `finalAnswerDelivered && !finalAnswer-
1610
+ // Substantive` (the prior "final" was an ack). After a genuine final
1611
+ // answer this stays true, so routine post-answer housekeeping (memory
1612
+ // write / TodoWrite / Bash — non-surface tools that reach the handler)
1613
+ // does NOT reopen and does NOT reset `finalAnswerDelivered`, which would
1614
+ // otherwise spuriously trip the silent-end re-prompt → duplicate answer.
1615
+ // Reset to false on every fresh-turn enqueue alongside
1616
+ // `finalAnswerDelivered`.
1617
+ finalAnswerSubstantive: boolean
1555
1618
  // #1675 (over-ping safety net): wall-clock ms of the first reply
1556
1619
  // this turn that landed with `disable_notification: false` (a real
1557
1620
  // device ping). The conversational-pacing contract
@@ -1628,6 +1691,12 @@ type CurrentTurn = {
1628
1691
  activityInFlight: Promise<void> | null
1629
1692
  activityPendingRender: string | null
1630
1693
  activityLastSentRender: string | null
1694
+ // Wall-clock anchor for the newest in-progress feed step — set each time a
1695
+ // tool_label re-renders the feed. The heartbeat (`feedHeartbeatTick`) reads
1696
+ // it to show a climbing " · Ns" elapsed on the live line so a long single
1697
+ // step that emits no new label doesn't read as frozen (the feed is otherwise
1698
+ // pull-only). undefined until the first label of the turn renders.
1699
+ lastToolLabelAt?: number
1631
1700
  // Accumulating friendly-action feed for this turn. Each non-surface
1632
1701
  // tool_label appends a line via `appendActivityLabel`; the feed renders
1633
1702
  // (via `renderActivityFeed`) as a capped chronological list into the
@@ -1702,6 +1771,69 @@ function findTurnByOriginId(originTurnId: string | null | undefined): CurrentTur
1702
1771
  return recentTurnsById.get(originTurnId) ?? null
1703
1772
  }
1704
1773
 
1774
+ /**
1775
+ * PR2 obligation-ledger CLOSE. Called when a SUBSTANTIVE final answer lands
1776
+ * (not a bare interim ack — using finalAnswerSubstantive, the #2141 signal): the
1777
+ * obligation discharged is the one for the SAME origin the answer routes to
1778
+ * (origin_turn_id the model echoed, else the live turn). So 713's reply closes
1779
+ * 713's obligation even after currentTurn flipped to 715, and 715 stays open
1780
+ * until ITS own substantive answer. An ack does NOT close (so ack-then-ghost is
1781
+ * re-presented, not re-dropped). turn.turnId === the obligation's origin id
1782
+ * (both deriveTurnId(chat,thread,messageId) of the same inbound). No-op unless
1783
+ * the flag is on. NOTE residual: a genuinely SHORT answer (<200 chars, not a
1784
+ * stream-done) reads as non-substantive and won't close → a bounded re-ask
1785
+ * (≤2) then one operator-visible nudge — the accepted double-ask tradeoff,
1786
+ * measured in the canary.
1787
+ */
1788
+ function closeObligationOnSubstantiveReply(
1789
+ args: Record<string, unknown>,
1790
+ liveTurn: CurrentTurn | null | undefined,
1791
+ ): void {
1792
+ if (!OBLIGATION_LEDGER_ENABLED) return
1793
+ const echoed = findTurnByOriginId(args.origin_turn_id as string | undefined)
1794
+ const target = obligationLedger.resolveCloseTarget(echoed?.turnId, liveTurn?.turnId)
1795
+ if (target != null) obligationLedger.close(target)
1796
+ }
1797
+
1798
+ /**
1799
+ * PR2 obligation-ledger OPEN. Track a fresh user inbound as an unanswered
1800
+ * obligation the moment it is received — called BEFORE the buffer-until-idle /
1801
+ * deliver split so a mid-turn cross-topic inbound (the 715 case: buffered while
1802
+ * another turn runs) is tracked too. Drain paths re-deliver buffered inbounds
1803
+ * via sendToAgent (NOT through handleInbound), so handleInbound is the ONLY
1804
+ * point that sees every fresh inbound — opening here is mandatory for both
1805
+ * branches. Same gate as delivery-tracking (real user turns only; synthetic /
1806
+ * steering / `!` interrupt / empty excluded — they have no origin id / need no
1807
+ * answer; deriveTurnId null-guards them). Idempotent → opening at buffer-time
1808
+ * and any later delivery is safe. No-op unless the flag is on.
1809
+ */
1810
+ function openObligationFromInbound(
1811
+ inboundMsg: InboundMessage,
1812
+ gate: { isSteering: boolean; isInterrupt: boolean; effectiveText: string },
1813
+ ): void {
1814
+ if (!OBLIGATION_LEDGER_ENABLED) return
1815
+ if (
1816
+ !shouldTrackDelivery({
1817
+ isSteering: gate.isSteering,
1818
+ isInterrupt: gate.isInterrupt,
1819
+ hasSource: inboundMsg.meta?.source != null,
1820
+ effectiveText: gate.effectiveText,
1821
+ })
1822
+ ) {
1823
+ return
1824
+ }
1825
+ const oid = deriveTurnId(inboundMsg.chatId, inboundMsg.threadId, inboundMsg.messageId)
1826
+ if (oid == null) return
1827
+ obligationLedger.openIfAbsent({
1828
+ originTurnId: oid,
1829
+ chatId: inboundMsg.chatId,
1830
+ threadId: inboundMsg.threadId,
1831
+ messageId: inboundMsg.messageId,
1832
+ text: inboundMsg.text ?? '',
1833
+ openedAt: Date.now(),
1834
+ })
1835
+ }
1836
+
1705
1837
  /**
1706
1838
  * Component 5 — post a "Queued — replying in another topic first" status
1707
1839
  * into a cross-topic buffered message's OWN topic. Fire-and-forget through
@@ -4675,6 +4807,51 @@ const inboundSpool = STATIC
4675
4807
  },
4676
4808
  })
4677
4809
  const pendingInboundBuffer = createPendingInboundBuffer({ spool: inboundSpool })
4810
+
4811
+ // PR2 obligation-ledger idle sweep. Re-present an OPEN obligation only at a
4812
+ // CLEAN idle: no turn in flight AND the inbound buffer is empty — so the
4813
+ // existing buffer-drain has already had its turn and anything still OPEN is
4814
+ // "delivered but never answered" (the 715 gap). Re-present by pushing a
4815
+ // synthetic must-answer inbound through the SAME buffer→drain path (idempotent
4816
+ // OPEN via meta.source; reuses tested delivery). Bounded: after maxRepresents,
4817
+ // escalate to ONE operator-visible "did I miss this?" and close — no loop.
4818
+ // No-op unless the flag is on; gated on the same idle predicate as the drains.
4819
+ function obligationSweep(): void {
4820
+ if (!OBLIGATION_LEDGER_ENABLED) return
4821
+ if (!obligationLedger.hasOpen()) return
4822
+ if (turnInFlightForGate()) return // a turn is running — let it finish/answer
4823
+ const agent = process.env.SWITCHROOM_AGENT_NAME ?? ''
4824
+ if (pendingInboundBuffer.depth(agent) > 0) return // existing drain runs first; avoids double-present
4825
+ const decision = obligationLedger.decideAtIdle()
4826
+ const o = decision.obligation
4827
+ if (decision.action === 'none' || o == null) return
4828
+ if (decision.action === 'represent') {
4829
+ pendingInboundBuffer.push(agent, buildObligationRepresentInbound(o, Date.now()))
4830
+ const attempt = obligationLedger.markRepresented(o.originTurnId)
4831
+ process.stderr.write(
4832
+ `telegram gateway: obligation re-presented origin=${o.originTurnId} attempt=${attempt}/${OBLIGATION_REPRESENT_MAX}\n`,
4833
+ )
4834
+ return
4835
+ }
4836
+ // escalate — close FIRST so the loop ends even if the send fails.
4837
+ obligationLedger.close(o.originTurnId)
4838
+ process.stderr.write(
4839
+ `telegram gateway: obligation escalated (exhausted ${OBLIGATION_REPRESENT_MAX} re-presents) origin=${o.originTurnId}\n`,
4840
+ )
4841
+ void robustApiCall(
4842
+ () =>
4843
+ bot.api.sendMessage(o.chatId, obligationEscalationText(o), {
4844
+ ...(o.threadId != null ? { message_thread_id: o.threadId } : {}),
4845
+ }),
4846
+ { chat_id: o.chatId, ...(o.threadId != null ? { threadId: o.threadId } : {}), verb: 'obligation.escalate' },
4847
+ ).catch((err) => {
4848
+ process.stderr.write(`telegram gateway: obligation escalation send failed: ${err}\n`)
4849
+ })
4850
+ }
4851
+ if (!STATIC && OBLIGATION_LEDGER_ENABLED) {
4852
+ setInterval(obligationSweep, OBLIGATION_SWEEP_MS).unref()
4853
+ }
4854
+
4678
4855
  // Honest-restart-resume: inject the boot resume/report inbound built by the
4679
4856
  // registry classifier above. When the spool exists we only PUT it (the
4680
4857
  // boot-replay loop below pulls it into the in-memory buffer exactly once via
@@ -6305,6 +6482,13 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
6305
6482
  })
6306
6483
  ) {
6307
6484
  turn.finalAnswerDelivered = true
6485
+ // Feed-reopen refinement: a substantive merged silent-anchor
6486
+ // answer must NOT re-open the feed on post-answer housekeeping.
6487
+ turn.finalAnswerSubstantive = isSubstantiveFinalReply({
6488
+ text: decision.mergedText,
6489
+ disableNotification,
6490
+ })
6491
+ if (turn.finalAnswerSubstantive) closeObligationOnSubstantiveReply(args, turn)
6308
6492
  }
6309
6493
  outboundDedup.record(
6310
6494
  chat_id,
@@ -6644,6 +6828,10 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
6644
6828
  // end re-prompt from spuriously firing on a delivered final.
6645
6829
  if (turn != null && isFinalAnswerReply({ text: rawText, disableNotification })) {
6646
6830
  turn.finalAnswerDelivered = true
6831
+ // Feed-reopen refinement: track whether this final was substantive
6832
+ // (≥200 chars or stream-done — not a short pinging ack) so post-answer
6833
+ // housekeeping tool work does NOT re-open the feed / trip silent-end.
6834
+ turn.finalAnswerSubstantive = isSubstantiveFinalReply({ text: rawText, disableNotification })
6647
6835
  // #1728: release the buffer gate + emit terminal 👍. Mid-turn
6648
6836
  // acks bypass this branch and remain non-events for the
6649
6837
  // reaction (preserves #1713). The full turn-state teardown
@@ -6651,6 +6839,9 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
6651
6839
  // the `turn_end` handler when it lands; this only fires the
6652
6840
  // observable side effects that #1718 deferred unconditionally.
6653
6841
  finalizeStatusReaction(chat_id, threadId, 'done')
6842
+ // PR2: close this origin's obligation on a SUBSTANTIVE final answer
6843
+ // (after finalize so the reaction guard test's anchor window is stable).
6844
+ if (turn.finalAnswerSubstantive) closeObligationOnSubstantiveReply(args, turn)
6654
6845
  }
6655
6846
  // v0.13.30 follow-up — release the buffer gate on EVERY reply
6656
6847
  // finalize, not just on `isFinalAnswerReply`. The narrow
@@ -6987,6 +7178,15 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
6987
7178
  })
6988
7179
  ) {
6989
7180
  turn.finalAnswerDelivered = true
7181
+ // Feed-reopen refinement: a stream_reply done=true (or a ≥200-char
7182
+ // chunk) is substantive; a short pinging non-done chunk is an ack. Only
7183
+ // the latter should re-open the feed on subsequent post-answer work.
7184
+ turn.finalAnswerSubstantive = isSubstantiveFinalReply({
7185
+ text: (args.text as string | undefined) ?? '',
7186
+ disableNotification: args.disable_notification === true,
7187
+ done: args.done === true,
7188
+ })
7189
+ if (turn.finalAnswerSubstantive) closeObligationOnSubstantiveReply(args, turn)
6990
7190
  // #1744 follow-up — stream_reply edge case. The first-emit gate at
6991
7191
  // L5178 only clears silent-end state on the FIRST emit of a stream.
6992
7192
  // If a stream's first emit was ack-shaped (disable_notification:true,
@@ -8374,12 +8574,12 @@ const FOREGROUND_SUBAGENT_ACCUM_MAX = 12
8374
8574
  * order; the single-sub-agent common case nests precisely under its
8375
8575
  * Delegating line.
8376
8576
  */
8377
- function composeTurnActivity(turn: CurrentTurn, final = false): string | null {
8577
+ function composeTurnActivity(turn: CurrentTurn, final = false, liveSuffix = ''): string | null {
8378
8578
  const childLines: string[] = []
8379
8579
  for (const narrative of turn.foregroundSubAgents.values()) {
8380
8580
  childLines.push(...narrative)
8381
8581
  }
8382
- return renderActivityFeedWithNested(turn.mirrorLines, childLines, final)
8582
+ return renderActivityFeedWithNested(turn.mirrorLines, childLines, final, liveSuffix)
8383
8583
  }
8384
8584
 
8385
8585
  /**
@@ -8452,6 +8652,35 @@ async function drainActivitySummary(turn: CurrentTurn): Promise<void> {
8452
8652
  }
8453
8653
  }
8454
8654
 
8655
+ /**
8656
+ * Heartbeat tick (PR1): keep the live activity feed visibly advancing during a
8657
+ * long single step that emits no new tool_label. Re-renders the feed with a
8658
+ * climbing " · Ns" elapsed on the in-progress line through the SAME single-flight
8659
+ * drain path the tool_label handler uses (no separate transport, no race). Pure
8660
+ * no-op unless there is a live in-flight feed whose newest step has gone stale.
8661
+ * Skips once the final answer landed (the feed is handing off) and after the
8662
+ * turn ends (activityMessageId nulled by clearActivitySummary). Deterministic +
8663
+ * framework-owned — never depends on the model.
8664
+ */
8665
+ function feedHeartbeatTick(): void {
8666
+ const turn = currentTurn
8667
+ if (turn == null) return
8668
+ if (turn.activityMessageId == null) return // no live feed yet / already cleared
8669
+ if (turn.finalAnswerDelivered) return // feed handed off to the answer
8670
+ if (turn.lastToolLabelAt == null) return // feed not driven by a labelled step
8671
+ const elapsed = Date.now() - turn.lastToolLabelAt
8672
+ if (elapsed < FEED_HEARTBEAT_MIN_STALE_MS) return // step is fresh; feed advancing normally
8673
+ const rendered = composeTurnActivity(turn, false, ` · ${formatFeedElapsed(elapsed)}`)
8674
+ if (rendered == null) return
8675
+ turn.activityPendingRender = rendered
8676
+ if (turn.activityInFlight == null) {
8677
+ turn.activityInFlight = drainActivitySummary(turn)
8678
+ }
8679
+ }
8680
+ if (!STATIC && FEED_HEARTBEAT_ENABLED) {
8681
+ setInterval(feedHeartbeatTick, FEED_HEARTBEAT_TICK_MS).unref()
8682
+ }
8683
+
8455
8684
  /**
8456
8685
  * Reconcile the activity summary when the model's reply tool takes over as the
8457
8686
  * authoritative surface. Awaits any in-flight render so we don't race a stale
@@ -8577,6 +8806,7 @@ function handleSessionEvent(ev: SessionEvent): void {
8577
8806
  gatewayReceiveAt: startedAt,
8578
8807
  replyCalled: false,
8579
8808
  finalAnswerDelivered: false,
8809
+ finalAnswerSubstantive: false,
8580
8810
  firstPingAt: null,
8581
8811
  silentAnchorMessageId: null,
8582
8812
  silentAnchorText: '',
@@ -8794,9 +9024,59 @@ function handleSessionEvent(ev: SessionEvent): void {
8794
9024
  // the FINAL answer would re-`sendMessage` a fresh feed below it (flicker).
8795
9025
  // Safe ordering: `tool_label` is real-time (PreToolUse, ~250ms) while
8796
9026
  // `finalAnswerDelivered` is set from executeReply on the final answer.
8797
- if (turn.finalAnswerDelivered) return
9027
+ //
9028
+ // Feed-reopen-after-ack: a tool label here means the model is STILL
9029
+ // working. If the turn was already marked finalAnswerDelivered, the
9030
+ // "final" reply MIGHT have been an interim ACK ("on it, checking
9031
+ // Brevo…" pings, classified final by isFinalAnswerReply), so the
9032
+ // post-ack work had no live feed — the gate above dropped every label.
9033
+ //
9034
+ // ACK-ONLY refinement: finalAnswerDelivered latches true for BOTH a
9035
+ // short pinging ack AND a substantive answer. Reopening unconditionally
9036
+ // is harmful after a GENUINE final answer — routine post-answer
9037
+ // housekeeping (memory write / TodoWrite / Bash; non-surface tools that
9038
+ // reach here) would reset finalAnswerDelivered=false and trip the
9039
+ // silent-end re-prompt (NOT zero-outbound gated) → duplicate answer. So
9040
+ // reopen ONLY when the prior final was a short ack
9041
+ // (finalAnswerSubstantive=false). When it was substantive, drop the
9042
+ // label (legacy gate) so the genuine final stays delivered.
9043
+ //
9044
+ // On reopen: reclassify the interim ack — the turn has NOT delivered its
9045
+ // final answer while still doing tool work. Reset the flag and clear
9046
+ // activityMessageId so a FRESH feed message opens below the ack, then
9047
+ // proceed normally. When the model's REAL final answer lands,
9048
+ // executeReply / stream_reply re-set finalAnswerDelivered=true (and
9049
+ // finalAnswerSubstantive) and the feed gates off again. The reset keeps
9050
+ // the #2137 serialize gate HOLDING the next topic mid-work (next-topic
9051
+ // liveness is the bounded no-reply timer's job) and lets the silent-end
9052
+ // re-prompt fire if the turn ends on only an ack.
9053
+ // Kill switch SWITCHROOM_FEED_REOPEN_AFTER_ACK=0 → legacy `return`.
9054
+ if (turn.finalAnswerDelivered) {
9055
+ // decideFeedReopen returns dropLabel (legacy return) or the reset
9056
+ // deltas: finalAnswerDelivered→false (the turn has NOT delivered its
9057
+ // final answer while still doing tool work), activityMessageId→null
9058
+ // (a FRESH feed message opens below the ack), activityLastSentRender
9059
+ // →null (so the drain loop's `pending !== lastSent` guard never
9060
+ // mistakes the fresh render for the ack's finalized one and skips it).
9061
+ const reopen = decideFeedReopen({
9062
+ finalAnswerDelivered: turn.finalAnswerDelivered,
9063
+ // ACK-ONLY: reopen only when the prior final was a short ack, not a
9064
+ // substantive answer — otherwise post-answer housekeeping would
9065
+ // reset finalAnswerDelivered and trip the silent-end re-prompt.
9066
+ finalAnswerSubstantive: turn.finalAnswerSubstantive,
9067
+ enabled: FEED_REOPEN_AFTER_ACK_ENABLED,
9068
+ })
9069
+ if (reopen.dropLabel) return
9070
+ turn.finalAnswerDelivered = reopen.reset!.finalAnswerDelivered
9071
+ turn.activityMessageId = reopen.reset!.activityMessageId
9072
+ turn.activityLastSentRender = reopen.reset!.activityLastSentRender
9073
+ }
8798
9074
  const rendered = appendActivityLabel(turn.mirrorLines, ev.label)
8799
9075
  if (rendered != null) {
9076
+ // A new tool label = a new live step → re-anchor the heartbeat clock so
9077
+ // the " · Ns" elapsed restarts from this step (and the feed itself just
9078
+ // advanced, so it isn't stale).
9079
+ turn.lastToolLabelAt = Date.now()
8800
9080
  // Recompose so any active foreground sub-agent's nested block (Model A)
8801
9081
  // is preserved when the parent appends its own step. composeTurnActivity
8802
9082
  // == the flat render when no foreground sub-agent is active.
@@ -9148,6 +9428,11 @@ function handleSessionEvent(ev: SessionEvent): void {
9148
9428
  turn.answerStream = null
9149
9429
  streamFinalizedAsAnswer = true
9150
9430
  turn.finalAnswerDelivered = true
9431
+ // Feed-reopen refinement: the stream is being finalized as the
9432
+ // turn's answer (the model's terminal text), i.e. done=true by
9433
+ // construction → substantive. Post-answer housekeeping must NOT
9434
+ // re-open the feed.
9435
+ turn.finalAnswerSubstantive = true
9151
9436
  // Capture the old streamed message_id BEFORE materialize so
9152
9437
  // we can delete it after the fresh ping send. materialize()
9153
9438
  // overwrites `streamMsgId` internally with the new send's id;
@@ -9424,6 +9709,12 @@ function handleSessionEvent(ev: SessionEvent): void {
9424
9709
  // it keeps the captured `turn` atom internally consistent for any
9425
9710
  // future reader.)
9426
9711
  turn.finalAnswerDelivered = true
9712
+ // Feed-reopen refinement: turn-flush delivers the model's terminal
9713
+ // transcript text as the genuine answer (not an ack). Default to
9714
+ // substantive so a late tool label does NOT re-open the feed / trip
9715
+ // the silent-end re-prompt. (Belt-and-braces, like the set above —
9716
+ // this branch returns before any further tool_label can arrive.)
9717
+ turn.finalAnswerSubstantive = true
9427
9718
 
9428
9719
  // #654 deterministic double-message fix. Hand off the pinned
9429
9720
  // progress card BEFORE state reset so the driver doesn't keep
@@ -11336,6 +11627,15 @@ async function handleInbound(
11336
11627
  return
11337
11628
  }
11338
11629
 
11630
+ // PR2 obligation-ledger OPEN — BEFORE the buffer-until-idle / deliver split so
11631
+ // a mid-turn cross-topic inbound (the 715 case) is tracked whether it is
11632
+ // buffered or delivered now. Idempotent + gated; no-op when the flag is off.
11633
+ openObligationFromInbound(inboundMsg, {
11634
+ isSteering,
11635
+ isInterrupt: interrupt.isInterrupt,
11636
+ effectiveText,
11637
+ })
11638
+
11339
11639
  if (
11340
11640
  decideInboundDelivery({
11341
11641
  turnInFlight: turnInFlightAtReceipt,
@@ -0,0 +1,216 @@
1
+ /**
2
+ * Deterministic delivery-obligation ledger (systems-analysis PR2).
3
+ *
4
+ * The framework today tracks whether an inbound was DELIVERED (read by claude),
5
+ * never whether it was ANSWERED — so a message claude reads but never replies to
6
+ * (the marko msg-715 verbal-deferral drop) is silently lost. This ledger adds
7
+ * the one missing invariant, and it is model-INDEPENDENT by construction:
8
+ *
9
+ * An inbound is an OBLIGATION keyed by its origin_turn_id. It is OPEN the
10
+ * moment the message is received, and CLOSED only by an observable framework
11
+ * event — a reply-tool call whose resolved target equals that origin_turn_id
12
+ * AND that carries a substantive answer (not a bare interim ack). The engine
13
+ * may go idle only when no obligation is OPEN; an OPEN obligation that
14
+ * survives a turn boundary is re-presented as a fresh must-answer turn until
15
+ * it closes, bounded so a mis-close degrades to ONE operator-visible nudge
16
+ * rather than an infinite re-ask loop.
17
+ *
18
+ * This file is PURE state + decisions — no Telegram, no claude, no timers. The
19
+ * gateway owns OPEN/CLOSE/re-present I/O and calls in here. Pure ⇒ unit-testable
20
+ * (see tests/obligation-ledger.test.ts), the seam the analysis demanded.
21
+ *
22
+ * The close event (substantive reply resolving to origin) is observed by the
23
+ * framework, never the model's narration/promise — that is the whole point: the
24
+ * 715 "I'll handle thread 3 as its own turn" does NOT close the obligation.
25
+ */
26
+
27
+ import type { InboundMessage } from './ipc-protocol.js'
28
+
29
+ export interface Obligation {
30
+ /** deriveTurnId(chat, thread, messageId) — the stable identity. */
31
+ readonly originTurnId: string
32
+ readonly chatId: string
33
+ readonly threadId?: number
34
+ readonly messageId: number
35
+ /** Original inbound text (may be truncated by the caller for re-presentation). */
36
+ readonly text: string
37
+ /** Wall-clock ms the obligation was first opened. */
38
+ readonly openedAt: number
39
+ /** How many times it has been re-presented (0 on first open). */
40
+ representCount: number
41
+ }
42
+
43
+ /** What the gateway should do for the oldest open obligation at an idle boundary. */
44
+ export type LedgerAction = 'none' | 'represent' | 'escalate'
45
+
46
+ export interface LedgerDecision {
47
+ action: LedgerAction
48
+ obligation?: Obligation
49
+ }
50
+
51
+ export interface ObligationInput {
52
+ originTurnId: string
53
+ chatId: string
54
+ threadId?: number
55
+ messageId: number
56
+ text: string
57
+ openedAt: number
58
+ }
59
+
60
+ export class ObligationLedger {
61
+ private readonly open = new Map<string, Obligation>()
62
+
63
+ /**
64
+ * @param maxRepresents max re-presentations before escalating to an
65
+ * operator-visible nudge instead of re-asking again. Default 2.
66
+ */
67
+ constructor(private readonly maxRepresents = 2) {}
68
+
69
+ /**
70
+ * Open an obligation if not already tracked. Idempotent on originTurnId — a
71
+ * message that is buffered AND later enqueued opens once, keeping the first
72
+ * (earliest openedAt + accumulated representCount). Returns true if newly
73
+ * opened.
74
+ */
75
+ openIfAbsent(input: ObligationInput): boolean {
76
+ if (this.open.has(input.originTurnId)) return false
77
+ this.open.set(input.originTurnId, { ...input, representCount: 0 })
78
+ return true
79
+ }
80
+
81
+ /** Close by origin id. Returns true if an obligation was open and is now closed. */
82
+ close(originTurnId: string | null | undefined): boolean {
83
+ if (originTurnId == null) return false
84
+ return this.open.delete(originTurnId)
85
+ }
86
+
87
+ isOpen(originTurnId: string): boolean {
88
+ return this.open.has(originTurnId)
89
+ }
90
+
91
+ hasOpen(): boolean {
92
+ return this.open.size > 0
93
+ }
94
+
95
+ size(): number {
96
+ return this.open.size
97
+ }
98
+
99
+ /** Snapshot of open obligations, oldest first. For introspection/tests. */
100
+ list(): Obligation[] {
101
+ return [...this.open.values()].sort((a, b) => a.openedAt - b.openedAt)
102
+ }
103
+
104
+ /** The oldest open obligation, or undefined. */
105
+ private oldest(): Obligation | undefined {
106
+ let best: Obligation | undefined
107
+ for (const o of this.open.values()) {
108
+ if (best === undefined || o.openedAt < best.openedAt) best = o
109
+ }
110
+ return best
111
+ }
112
+
113
+ /**
114
+ * Decide what to do at an idle boundary (caller guarantees: no turn in flight
115
+ * AND the inbound buffer is empty — so the existing buffer-drain has already
116
+ * had its turn and anything still OPEN is "delivered but unanswered"). PURE —
117
+ * does not mutate. The caller performs the side effect then calls
118
+ * markRepresented / close accordingly.
119
+ *
120
+ * - 'none' → no open obligation; the agent may idle.
121
+ * - 'represent' → re-present `obligation` as a fresh must-answer turn.
122
+ * - 'escalate' → it has already been re-presented maxRepresents times; send
123
+ * ONE operator-visible "did I miss this?" and close it
124
+ * (caller calls close) rather than loop forever.
125
+ */
126
+ decideAtIdle(): LedgerDecision {
127
+ const o = this.oldest()
128
+ if (o === undefined) return { action: 'none' }
129
+ if (o.representCount >= this.maxRepresents) return { action: 'escalate', obligation: o }
130
+ return { action: 'represent', obligation: o }
131
+ }
132
+
133
+ /**
134
+ * Decide which obligation a substantive reply discharges — DETERMINISTICALLY,
135
+ * holding for any model behavior:
136
+ * - `echoedTurnId` (the model echoed origin_turn_id back) → authoritative;
137
+ * close exactly that (a no-op via close() if it isn't actually open).
138
+ * - else, close the live turn's obligation ONLY when UNAMBIGUOUS — exactly
139
+ * one obligation open. With >1 open and no echo we cannot tell which one
140
+ * the reply answered; closing the live turn's would silently drop the other
141
+ * (713's un-echoed reply landing while currentTurn=715 must NOT close 715).
142
+ * So we close nothing → the real target stays open and is re-presented (a
143
+ * bounded double-ask), never wrong-closed. Returns the id to close, or null.
144
+ */
145
+ resolveCloseTarget(
146
+ echoedTurnId: string | null | undefined,
147
+ liveTurnId: string | null | undefined,
148
+ ): string | null {
149
+ if (echoedTurnId != null) return echoedTurnId
150
+ if (liveTurnId != null && this.open.size === 1 && this.open.has(liveTurnId)) return liveTurnId
151
+ return null
152
+ }
153
+
154
+ /** Record that an obligation was just re-presented (bumps representCount). */
155
+ markRepresented(originTurnId: string): number {
156
+ const o = this.open.get(originTurnId)
157
+ if (o === undefined) return 0
158
+ o.representCount += 1
159
+ return o.representCount
160
+ }
161
+ }
162
+
163
+ /** Original message preview length for re-presentation (mirrors resume builder). */
164
+ const REPRESENT_PREVIEW_MAX = 200
165
+
166
+ /**
167
+ * Build the synthetic inbound that RE-PRESENTS an open obligation as a fresh
168
+ * must-answer turn. Carries the obligation's original message_id (so the
169
+ * reply-quote and origin routing land in the right place) and origin_turn_id in
170
+ * meta (so the model's reply resolves back to THIS obligation → the close event
171
+ * matches). `source: obligation_represent` marks it synthetic, so it is NOT
172
+ * delivery-tracked and does NOT open a fresh obligation (the original stays
173
+ * open until a substantive reply closes it). Pure — the gateway injects it via
174
+ * the existing buffer→drain path. Context restoration (inject vs pointer) is a
175
+ * separate layer; here we point at get_recent_messages and quote the original.
176
+ */
177
+ export function buildObligationRepresentInbound(o: Obligation, now: number): InboundMessage {
178
+ const preview =
179
+ o.text.length > REPRESENT_PREVIEW_MAX ? o.text.slice(0, REPRESENT_PREVIEW_MAX - 1) + '…' : o.text
180
+ const topicClause = o.threadId != null ? ' in this topic' : ''
181
+ return {
182
+ type: 'inbound',
183
+ chatId: o.chatId,
184
+ ...(o.threadId != null ? { threadId: o.threadId } : {}),
185
+ messageId: o.messageId,
186
+ user: 'switchroom',
187
+ userId: 0,
188
+ ts: now,
189
+ text:
190
+ `You have an earlier message${topicClause} that you started but never actually ` +
191
+ `answered (you may have set it aside mid-work): "${preview}". Answer it now via the ` +
192
+ `reply tool — deliver the real answer, don't just acknowledge it. If you've lost the ` +
193
+ `surrounding context, call get_recent_messages for this chat${topicClause} first. ` +
194
+ `That quoted text may be only the first ~200 characters of the original.`,
195
+ meta: {
196
+ source: 'obligation_represent',
197
+ origin_turn_id: o.originTurnId,
198
+ represent_count: String(o.representCount + 1),
199
+ },
200
+ }
201
+ }
202
+
203
+ /**
204
+ * Build the operator-visible escalation message text, used when an obligation
205
+ * has been re-presented maxRepresents times without closing — rather than loop
206
+ * forever (the new failure mode this trades silent-drop for), surface ONE
207
+ * honest "did I miss this?" and close it.
208
+ */
209
+ export function obligationEscalationText(o: Obligation): string {
210
+ const preview =
211
+ o.text.length > REPRESENT_PREVIEW_MAX ? o.text.slice(0, REPRESENT_PREVIEW_MAX - 1) + '…' : o.text
212
+ return (
213
+ `⚠️ I may have missed an earlier message and I'm not sure I answered it: ` +
214
+ `"${preview}". If you still need it, please re-send.`
215
+ )
216
+ }