npm - polygram - Versions diffs - 0.12.0-rc.8 → 0.12.0 - Mend

polygram 0.12.0-rc.8 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

package/config.example.json +4 -3
package/lib/claude-bin.js +14 -1
package/lib/compaction-warn.js +59 -0
package/lib/context-usage.js +93 -0
package/lib/db.js +1 -1
package/lib/error/classify.js +33 -10
package/lib/feedback/session-feedback.js +91 -0
package/lib/handlers/abort.js +87 -40
package/lib/handlers/autosteer.js +4 -0
package/lib/handlers/config-callback.js +25 -6
package/lib/handlers/config-ui.js +39 -10
package/lib/handlers/dispatcher.js +83 -0
package/lib/handlers/download.js +101 -58
package/lib/handlers/drop-redeliver.js +69 -0
package/lib/handlers/edit-correction.js +2 -0
package/lib/handlers/edit-redelivery.js +136 -0
package/lib/handlers/gate-inbound.js +188 -0
package/lib/handlers/questions.js +289 -0
package/lib/handlers/redeliver.js +122 -0
package/lib/handlers/slash-commands.js +43 -30
package/lib/history-preload.js +6 -0
package/lib/history.js +7 -1
package/lib/model-costs.js +4 -0
package/lib/process/channels-bridge-protocol.js +22 -1
package/lib/process/channels-bridge.mjs +128 -7
package/lib/process/channels-tool-dispatcher.js +105 -12
package/lib/process/cli-process.js +1277 -70
package/lib/process/hook-event-tail.js +7 -0
package/lib/process/hook-settings.js +7 -0
package/lib/process/process.js +22 -0
package/lib/process-guard.js +57 -1
package/lib/process-manager.js +120 -35
package/lib/questions/questions.js +187 -0
package/lib/questions/store.js +105 -0
package/lib/rewind/execute.js +89 -0
package/lib/rewind/fork.js +112 -0
package/lib/rewind/rewind.js +174 -0
package/lib/sdk/callbacks.js +165 -167
package/lib/session-key.js +29 -0
package/lib/telegram/album-reactions.js +50 -0
package/lib/telegram/parse.js +9 -2
package/lib/telegram/typing.js +17 -2
package/lib/tmux/startup-gate.js +44 -14
package/migrations/012-pending-questions.sql +30 -0
package/package.json +1 -1
package/polygram.js +224 -78

package/lib/process/cli-process.js CHANGED Viewed

@@ -53,6 +53,8 @@ const { createHookTail } = require('./hook-event-tail');
 // /private/tmp drift — one of the original Music-topic failures).
 const { DEFAULT_ATTACHMENT_BASE } = require('./channels-tool-dispatcher');
 const { resolveFileCaps } = require('../attachments');
+const { resolveCompactionWarnConfig } = require('../compaction-warn');
+const { readContextTokens, contextPct } = require('../context-usage');
 const { runStartupGate } = require('../tmux/startup-gate');
 const { POLYGRAM_DISPLAY_HINT } = require('../telegram/display-hint');
@@ -70,6 +72,30 @@ const DEFAULT_MCP_READY_TIMEOUT_MS = 5_000;
 // Mirrors rc.41 H4 stopGraceMs from tmux backend. 2s default = same as tmux.
 const DEFAULT_STOP_GRACE_MS = 2_000;
 const DEFAULT_TURN_QUIET_MS = 2_000;     // after first reply, wait this long for more before resolving turn
+// 0.13 D1 rung 2 (docs/0.13-channels-lifecycle-design.md §3 D1): once a turn has
+// ≥1 delivered reply AND the hook stream is live, the turn finalizes when the
+// session's whole ACTIVITY surface (hook events + the pane "esc to interrupt"
+// thinking heartbeat + bridge tool calls + replies) goes quiet for this long.
+// Calibrated against the busy-phase inter-activity gap: the pane heartbeat fires
+// on the 5s pong tick while a turn is pending, so a live claude can never be
+// "activity-quiet" — only a truly ended (or hook-and-pane-dead) tail is.
+const DEFAULT_ACTIVITY_QUIET_MS = 18_000;
+// 0.13 D2 (P3): InputLedger windows. dropConfirm = how long after the trigger
+// cycle's end an unseen/unacked non-primary entry may still be picked up as a
+// claude-side next cycle before it is declared dropped (late seen/ack cancels).
+// deliveryWatchdog = the primary pickup window: a dispatched primary with no
+// UPS and ZERO session activity gets one idempotent re-write, then (still
+// nothing) a bridge teardown onto the existing recovery path.
+const DEFAULT_DROP_CONFIRM_MS = 20_000;
+const DEFAULT_DELIVERY_WATCHDOG_MS = 10_000;
+const INPUT_LEDGER_CAP = 64;
+// 0.13 D1 P1 seen-slice: parse the pickup turn_id out of the UserPromptSubmit
+// prompt. Anchored on the RAW `<channel ` tag prefix — the bridge body-escape
+// (channels-bridge.mjs escapeChannelBody) turns every user-authored `<` into
+// `&lt;`, so a raw tag prefix is bridge-authored by construction and a pasted/
+// spoofed `turn_id="…"` in message body text can never mark a pending seen.
+// (Envelope shape verified from prod JSONL + the P0 spike — Q1.)
+const UPS_ENVELOPE_TURN_ID_RE = /<channel\s[^>]*turn_id="([0-9a-f-]{36})"/g;
 const DEFAULT_TURN_TIMEOUT_MS = 600_000; // 10 min idle cap (resets on each reply — Review F#13)
 const DEFAULT_TURN_ABSOLUTE_MS = 1_800_000; // 30 min absolute wall-clock ceiling (no reset)
 const DEFAULT_INTERRUPT_GRACE_MS = 5_000; // after Ctrl-C, wait this long for Claude to ack before synthesizing 'interrupted'
@@ -96,7 +122,10 @@ const DEFAULT_QUEUE_CAP = 50;                // Parity P2: match SDK/tmux pendin
 // catalog when new dialogs are observed in production.
 const SESSION_AGE_PROMPT_RE = /Resuming the full session[\s\S]*Resume from summary/i;
 const MID_TURN_PROMPTS = [
-  { name: 'session-age', regex: SESSION_AGE_PROMPT_RE, action: 'enter' },
+  // Review F2 (resume-dialog fix): bare Enter selects the pre-selected
+  // "Resume from summary" — which literally runs /compact. Navigate to
+  // "Resume full session as-is" instead, same as the startup-gate trigger.
+  { name: 'session-age', regex: SESSION_AGE_PROMPT_RE, action: 'keys', keys: ['Down', 'Enter'] },
 ];
 // 0.12 Phase 3.2 (Finding 0.1.A): rc.45 esc-to-interrupt liveness heartbeat.
@@ -111,6 +140,27 @@ const MID_TURN_PROMPTS = [
 // hook process.
 const STREAMING_HINT_RE = /esc to interrupt/i;
+// 0.12.0 background-work lifecycle: claude's TUI mode line shows a live
+// background-shell COUNT while a `run_in_background:true` Bash outlives its turn,
+// e.g. `⏵⏵ bypass permissions on · 1 shell · ← for agents · ↓ to manage`.
+// Confirmed on claude 2.1.158 (P0 spike — docs/0.12.0-background-work-lifecycle-
+// plan.md): the count is always-present in the viewport mode line while shells run
+// and clears IN-PLACE within ~3s when they exit (no stale scrollback).
+//
+// MODE-INDEPENDENT (prod regression fix, 2026-06-04): the original regex anchored
+// on "auto mode on", but EVERY shumorobot session runs "⏵⏵ bypass permissions on"
+// — the spike happened to be captured in auto mode. So the detector never matched
+// in prod and bg-work-status fired zero times. Anchor instead on the `⏵⏵` mode-
+// line glyph (present in auto / bypass / accept-edits modes alike); only the mode
+// label between it and `· N shell` varies. Still matched only against the captured
+// TAIL so a scrolled-off history line never trips it. R1: re-validate on each
+// pinned-claude bump (glyph + `N shell` wording).
+const BACKGROUND_SHELL_RE = /⏵⏵[^\n]*·\s*(\d+)\s+shells?\b/i;
+// How long a detached background shell may run AFTER its turn resolved (claude
+// idle) before the stall-watchdog fires one read-only self-check. Override via
+// the constructor (tests use a small value).
+const DEFAULT_BG_WORK_STALL_MS = 600_000; // 10 min
 // 0.12 Phase 3.3 (Q1 resolution): heuristic for "looks like an unknown
 // interactive prompt." Match common prompt shapes that don't appear in
 // MID_TURN_PROMPTS — operator gets a telemetry event so they can decide
@@ -118,6 +168,17 @@ const STREAMING_HINT_RE = /esc to interrupt/i;
 // — false positives surface as no-op telemetry, false negatives surface
 // as the idle-ceiling timeout (~10min).
 const UNKNOWN_PROMPT_HEURISTIC_RE = /(\?\s*$|\(y\/N\)|Yes\/No|❯\s|^\s*[12345]\.\s)/im;
+// rc.14: a previous rc (rc.11) had a BRIDGE_DEAD_RE here that matched the pane
+// line "server:polygram-bridge  no MCP server configured with that name" and
+// treated it as a dead bridge to recover from. That was a MISDIAGNOSIS: this
+// line is a BENIGN, persistent banner that `--dangerously-load-development-
+// channels` + `--strict-mcp-config` prints on EVERY healthy session — the
+// channel still delivers messages and the reply tool still works (reproduced
+// 2026-06-01 with a test MCP server that demonstrably functions). The pane
+// matcher therefore false-fired ~5s into every channels turn and KILLED
+// healthy sessions (the Music-topic "mid-turn detach" regression). Real bridge
+// loss is caught by the socket-close path (bridgeServer 'bridge-disconnected'
+// → _handleBridgeDisconnected). There is no reliable pane signal — removed.
 // Per-pattern rate limit so a dialog that lingers across multiple polls
 // doesn't spam sendControl/event emissions. Aligned with the 5s poll cadence.
 const MID_TURN_DEDUP_WINDOW_MS = 30_000;
@@ -157,8 +218,12 @@ class CliProcess extends Process {
     mcpReadyTimeoutMs = DEFAULT_MCP_READY_TIMEOUT_MS,
     stopGraceMs = DEFAULT_STOP_GRACE_MS,
     turnQuietMs = DEFAULT_TURN_QUIET_MS,
+    activityQuietMs = DEFAULT_ACTIVITY_QUIET_MS,
+    dropConfirmMs = DEFAULT_DROP_CONFIRM_MS,
+    deliveryWatchdogMs = DEFAULT_DELIVERY_WATCHDOG_MS,
     turnTimeoutMs = DEFAULT_TURN_TIMEOUT_MS,
     turnAbsoluteMs = DEFAULT_TURN_ABSOLUTE_MS,
+    bgWorkStallMs = DEFAULT_BG_WORK_STALL_MS,
     interruptGraceMs = DEFAULT_INTERRUPT_GRACE_MS,
     maxRepliesPerTurn = DEFAULT_MAX_REPLIES_PER_TURN,
     queueCap = DEFAULT_QUEUE_CAP,        // Parity P2
@@ -188,8 +253,12 @@ class CliProcess extends Process {
     this.mcpReadyTimeoutMs = mcpReadyTimeoutMs;
     this.stopGraceMs = stopGraceMs;
     this.turnQuietMs = turnQuietMs;
+    this.activityQuietMs = activityQuietMs;
+    this.dropConfirmMs = dropConfirmMs;
+    this.deliveryWatchdogMs = deliveryWatchdogMs;
     this.turnTimeoutMs = turnTimeoutMs;
     this.turnAbsoluteMs = turnAbsoluteMs;
+    this.bgWorkStallMs = bgWorkStallMs;
     this.interruptGraceMs = interruptGraceMs;
     this.maxRepliesPerTurn = maxRepliesPerTurn;
     this.queueCap = queueCap;
@@ -213,6 +282,15 @@ class CliProcess extends Process {
     // interval fires bridge-disconnected if too much time elapses.
     this.lastPongAt = 0;
     this.pongWatchdog = null;
+    // 0.12.0 background-work stall-watchdog state. `_bgWorkSince` = when a live
+    // background shell was first observed while idle (null = none); reset only
+    // when the shell count returns to 0. `_bgWorkEscalations` caps the watchdog
+    // at one read-only self-check per continuous background-work window.
+    this._bgWorkSince = null;
+    this._bgWorkEscalations = 0;
+    // Visibility (Use 3): whether a "⏳ working in background" status message is
+    // currently shown, so we emit exactly one running→cleared pair per window.
+    this._bgWorkStatusShown = false;
     // Review P2 ADV-6: token-bucket rate limit on Claude's reply tool calls.
     // Without this, a prompt-injected or runaway Claude can fire reply() 1000×
     // in a tight loop, flooding TG + saturating the daemon event loop.
@@ -236,6 +314,7 @@ class CliProcess extends Process {
     // doesn't re-invoke the dispatcher → duplicate TG send. Set is bounded
     // to RECENT_TOOL_CALL_LIMIT entries via FIFO eviction.
     this.recentToolCallIds = new Set();
+    this.recentToolCallResults = new Map();   // tool_call_id → message_id (0.13: replay on re-ACK)
     this.recentToolCallOrder = [];   // FIFO bound
     // Review F#17: per-pattern last-fired timestamp for the mid-turn dialog
     // watchdog. Dedups within MID_TURN_DEDUP_WINDOW_MS so a lingering dialog
@@ -253,8 +332,32 @@ class CliProcess extends Process {
     this.recentContentHashes = new Map();   // key → expiryTs
     this.contentDedupWindowMs = 60_000;
-    // pending turn(s): turn_id → { resolve, reject, replies: [], quietTimer, hardTimer, startedAt }
+    // pending turn(s): turn_id → { resolve, reject, replies: [], seen, quietTimer,
+    // hardTimer, absoluteTimer, _activityQuietTimer, startedAt }
     this.pendingTurns = new Map();
+    // 0.13 D1: activity bookkeeping for the finalizer ladder. _lastHookEventAt
+    // feeds the rung-2 telemetry (hook-stalled discrimination); _lastActivityAt
+    // is the broader surface (hooks + pane heartbeat + bridge tool calls).
+    this._lastHookEventAt = 0;
+    this._lastActivityAt = 0;
+    // 0.13 D2: the InputLedger — every user-shaped input written to the bridge
+    // gets an observable lifecycle: written → seen → resolved | dropped |
+    // superseded | fold-suspected. Pre-P3, injectUserMessage minted a turn_id
+    // that never escaped the function (fold/new-turn/drop indistinguishable —
+    // seam S4; the #14 msg-2385 drop was invisible by construction).
+    // turn_id → { turnId, source, msgId, chatId, writtenAt, state, _dropTimer,
+    //             _watchdogTimer, _rewritten }
+    this.inputLedger = new Map();
+    // Set whenever a reply carried the consumed_turn_ids contract field —
+    // the Tier 2C "contract observed" discriminator (P0 spike: incidental
+    // echo is trigger-only; without the contract a fold is indistinguishable
+    // from a drop, and auto-redelivering folds double-answers the common case).
+    this._lastAckFieldAt = 0;
+    // 0.12 interactive questions: tool_call_ids of `ask` calls awaiting an answer.
+    // While non-empty, the keep-alive interval resets the turn's idle ceiling (an
+    // idle `ask` fires no tool hooks, so _extendQuietOnToolActivity wouldn't run).
+    this._openQuestions = new Set();
+    this._questionKeepAliveTimer = null;
     // File-send outbound cap (bot → user). Safe cloud default; overwritten in
     // _spawnTmuxClaude with the backend/chat-resolved value before any turn.
@@ -494,9 +597,15 @@ class CliProcess extends Process {
     // after this.
     const topicConfig = opts.threadId && opts.chatConfig?.topics?.[opts.threadId];
     const agent  = topicConfig?.agent  || opts.chatConfig?.agent  || opts.agent;
-    const model  = topicConfig?.model  || opts.chatConfig?.model  || opts.model;
-    const effort = topicConfig?.effort || opts.chatConfig?.effort || opts.effort;
+    const model  = this._resolveModel(opts);
+    const effort = this._resolveEffort(opts);
     const resolvedCwd = topicConfig?.cwd || opts.chatConfig?.cwd || opts.cwd;
+    // Record the spawn-time model/effort. cli has no live model/effort swap
+    // (they are spawn-time --model / --effort flags), so getOrSpawn detects a
+    // /model or /effort drift against these and reloads — --resume preserves
+    // the conversation, the new flag takes effect. See wouldReloadFor.
+    this.model = model;
+    this.effort = effort;
     // File-send outbound cap (bot → user). Backend-derived (cloud 50MB vs
     // local Bot API server 2GB via opts.localApi) with per-topic/chat
@@ -510,6 +619,14 @@ class CliProcess extends Process {
       override: _capOverride,
     }).outBytes;
+    // 0.12.0-rc.13: per-chat/topic compaction warning (default OFF). Same
+    // topic→chat precedence as the file cap above. When enabled, the channels
+    // backend warns the chat as context fills (propose /compact at a break)
+    // and on auto-compaction (the event that detaches the bridge mid-turn).
+    const _compactionWarnRaw = topicConfig?.compactionWarnings ?? opts.chatConfig?.compactionWarnings;
+    this.compactionWarn = resolveCompactionWarnConfig({ compactionWarnings: _compactionWarnRaw });
+    this._compactionWarned = false;  // proactive warn-once per climb; reset on PostCompact
     // Parity audit P8 + rc.8 fs-guard (2026-05-26 shumorobot Music topic):
     // `--session-id <id>` creates a NEW claude session with that id;
     // `--resume <id>` resumes the EXISTING conversation. Lazy-respawn after
@@ -637,6 +754,44 @@ class CliProcess extends Process {
       'as normal — only the FINAL user-visible message needs to go through',
       'the reply tool.',
       '',
+      'When you call `reply`, ALWAYS set `consumed_turn_ids` to the turn_id',
+      'attribute of EVERY <channel> message you are answering or have received',
+      'since your last reply — including mid-turn follow-ups you absorbed into',
+      'the current answer. polygram uses it to confirm follow-up delivery;',
+      'omitting it can cause a follow-up to be re-sent to you.',
+      '',
+      '### Staying responsive on a long task',
+      '',
+      'The user cannot see you working — no live typing reaches them. For any task',
+      'that takes more than a few seconds, send a SHORT status first via `reply`',
+      '(it returns a `message_id`), then call `mcp__polygram-bridge__edit_message`',
+      'with that `message_id` to update the SAME bubble as you make progress,',
+      'finishing with the result. One evolving message beats silence or a flood of',
+      'new ones.',
+      '',
+      'Write status in PLAIN, friendly language about what you are doing FOR THE',
+      'USER — never tool names or mechanics. Say "Checking your config now…", not',
+      '"Running Bash" or "Calling Read". If the final answer is long, send it as a',
+      'fresh `reply` rather than an edit (an edit is one single message bubble).',
+      '',
+      // TEMPORARY mitigation (2026-06-08 Shumabit@UMI wedge): AskUserQuestion opens
+      // a blocking TUI selection widget the channel can't answer → the session
+      // parks until manually Esc'd. REMOVE this whole rule when the rich
+      // question→Telegram-keyboard feature ships (see docs design); claude should
+      // then use the native question tool again. Tracked so it isn't forgotten.
+      '### Asking the user a question / offering choices — HARD RULE',
+      '',
+      'NEVER use the AskUserQuestion tool or any interactive menu / selection',
+      'widget. They open a blocking terminal prompt the user on Telegram CANNOT',
+      'see or navigate — it silently wedges the entire session until it is manually',
+      'cleared. (Rich tap-to-answer choices are coming; until then this is a hard rule.)',
+      '',
+      'To ask a multiple-choice question, a confirmation, or yes/no, call the',
+      '`mcp__polygram-bridge__ask` tool — it renders tap-to-answer inline buttons',
+      '(supports multiSelect via `multiSelect:true` and a free-text answer via',
+      '`allowOther:true`) and returns the user\'s selection(s) as the tool result.',
+      'Prefer `ask` over a typed numbered list whenever you are offering choices.',
+      '',
       '### Sending FILES (tracks, images, docs) to the user',
       '',
       'The `mcp__polygram-bridge__reply` tool takes an optional `files` array of',
@@ -712,6 +867,20 @@ class CliProcess extends Process {
       cwd: resolvedCwd || opts.cwd || process.cwd(),
       command: this.claudeBin,
       args: claudeArgs,
+      envExtras: {
+        // Resume-dialog suppression (docs/0.13-resume-dialog-fix-spec.md B1):
+        // claude's session-age "resume-return" dialog fires when sessionAge ≥
+        // this many minutes AND est. tokens ≥ CLAUDE_CODE_RESUME_TOKEN_THRESHOLD
+        // (defaults 70 / 1e5, binary-verified on 2.1.158). Its pre-selected
+        // option literally runs /compact — silently compacting every aged
+        // --resume (and breaking the /model "conversation kept" guarantee).
+        // A huge threshold (1 year) means the dialog never triggers and resume
+        // is always full-session-as-is. Per-process env — the operator's own
+        // interactive claude is untouched. Belt-and-braces: the session-age
+        // gate trigger below still navigates to "full" if a future binary bump
+        // renames this var.
+        CLAUDE_CODE_RESUME_THRESHOLD_MINUTES: '525600',
+      },
     });
     // Dialog handling (Phase 0 finding) — poll capture-pane and Enter through:
@@ -728,24 +897,46 @@ class CliProcess extends Process {
    * lives in the shared helper.
    */
   async _handleStartupDialogs(tmuxName) {
-    await runStartupGate({
+    const gateResult = await runStartupGate({
       runner: this.runner,
       tmuxName,
       triggers: [
         // Dev-channels confirmation — always fires under
         // --dangerously-load-development-channels.
         { name: 'dev-channels', regex: /WARNING: Loading development channels/i, key: 'Enter' },
-        // Workspace trust prompt — fires on first-time cwd or untrusted.
-        { name: 'trust',        regex: /trust the files in this folder/i,        key: 'Enter' },
-        // Review F#12: session-age "Resume from summary?" prompt — fires on
-        // aged sessions (claude treats older session JSONLs differently).
-        // Tmux backend dismisses with Enter at tmux-process.js:2637 onward;
-        // mirror that here so an aged channels session doesn't hang the
-        // handshake until CHANNELS_HANDSHAKE_TIMEOUT (15s) → dead chat
-        // requiring manual /reset.
-        { name: 'session-age',  regex: SESSION_AGE_PROMPT_RE, key: 'Enter' },
+        // Workspace trust prompt — fires on first-time cwd or untrusted. claude
+        // 2.1.158 renders "Quick safety check: Is this a project you created or
+        // one you trust? … ❯ 1. Yes, I trust this folder" (Enter confirms the
+        // pre-selected "trust" option). The older "trust the files in this folder"
+        // wording is kept for back-compat; both anchor on "trust … this folder".
+        { name: 'trust',        regex: /trust (?:the files in )?this folder/i,    key: 'Enter' },
+        // Review F#12 + 2026-06-11 resume-dialog fix: session-age
+        // "resume-return" prompt on aged sessions. Bare Enter selects the
+        // pre-selected "Resume from summary" — which literally runs /compact
+        // on the resumed session (silent context degradation; the original
+        // F#12 dismissal compacted every aged resume). Navigate to option 2
+        // "Resume full session as-is" instead. This is the FALLBACK path:
+        // spawn env (CLAUDE_CODE_RESUME_THRESHOLD_MINUTES above) suppresses
+        // the dialog entirely; this trigger firing at all means suppression
+        // failed (upstream renamed the env var?) — surfaced via the
+        // session-age-dialog-fallback event below.
+        { name: 'session-age',  regex: SESSION_AGE_PROMPT_RE, keys: ['Down', 'Enter'] },
       ],
-      readySignal: /Listening for channel messages from: server:polygram-bridge/i,
+      // 2.1.173 reworked the channels UI banner (live-captured 2026-06-11):
+      // "Channels (experimental) messages from server:polygram-bridge inject
+      // directly in this session · …". Keep the 2.1.158 text too so a
+      // POLYGRAM_CLAUDE_BIN override to an older binary still gates correctly.
+      //
+      // 2026-06-12 (caught by the cancel-cheap E2E before prod): in 2.1.173
+      // the banner lives in a COLLAPSIBLE notice list — with ≥3 notices the
+      // pane shows "+N more · /status" and the banner is hidden, stalling a
+      // banner-only gate into a false CHANNELS_DIALOG_TIMEOUT. An interactive
+      // prompt footer ("(shift+tab to cycle)" / "? for shortcuts") with no
+      // pending dialog is equally READY: the gate's job is dialog navigation;
+      // channel liveness is separately guaranteed by mcp-ready (send() gate)
+      // + the delivery watchdog. Dialog panes render "Enter to confirm"
+      // instead of the footer, so the footer can't match mid-dialog.
+      readySignal: /(?:Listening for channel messages from:|Channels \(experimental\) messages from) server:polygram-bridge|shift\+tab to cycle|\? for shortcuts/i,
       timeoutCode: 'CHANNELS_DIALOG_TIMEOUT',
       // Progress-aware gate (shumorobot General incident 2026-05-30): a
       // cold spawn that's mid-download (runtime fetch, "24%" progress bar)
@@ -754,11 +945,32 @@ class CliProcess extends Process {
       // actively-changing pane (download bar, dialog nav) keeps resetting
       // the stall clock and rides out to the ready signal. deadlineMs stays
       // the absolute backstop. 30s of zero pane activity = genuinely wedged.
-      stallMs: this.startupGateStallMs ?? 30_000,
+      // Stall = pane rendered then went static (genuinely wedged). 60s, not
+      // 30s: some topics' TUIs cold-render slowly (Music ~45s, slow MCP
+      // startup) — 30s was too tight and false-aborted them. Blank panes
+      // don't arm the stall timer at all now (see runStartupGate), so this
+      // only bounds a TUI that rendered and then truly hung.
+      stallMs: this.startupGateStallMs ?? 60_000,
       deadlineMs: this.startupGateDeadlineMs ?? 180_000,
+      // Review F4: fire-time, NOT gate-resolution — the 2026-06-10 incident
+      // matched session-age and THEN died (TMUX_SESSION_GONE), which a
+      // success-path check would miss. The dialog appearing AT ALL means the
+      // env suppression (CLAUDE_CODE_RESUME_THRESHOLD_MINUTES in
+      // _spawnTmuxClaude) stopped working — almost certainly an upstream
+      // rename on a binary bump. The gate handles it (full resume picked);
+      // this makes the regression visible.
+      onTrigger: (name) => {
+        if (name !== 'session-age') return;
+        this.logger.warn?.(
+          `[${this.label}] channels: session-age resume dialog appeared despite env suppression — ` +
+          'check CLAUDE_CODE_RESUME_THRESHOLD_MINUTES against the pinned claude binary',
+        );
+        this._logEvent('session-age-dialog-fallback', { tmux_name: tmuxName, phase: 'startup-gate' });
+      },
       logger: this.logger,
       label: `${this.label}:startup-gate`,
     });
+    return gateResult;
   }
   // 0.12 Phase 1.6: TWO-handshake gate. The original implementation only
@@ -922,7 +1134,61 @@ class CliProcess extends Process {
       this.logger.warn?.(
         `[${this.label}] channels: duplicate tool_call_id=${msg.tool_call_id} — re-ACKing without dispatch`,
       );
-      this._writeToBridge({ kind: 'tool_ack', tool_call_id: msg.tool_call_id, ok: true });
+      // 0.13: replay the cached message_id so a retried reply keeps its edit handle
+      // (re-ACKing without it would null the handle → progressive status silently breaks).
+      this._writeToBridge({ kind: 'tool_ack', tool_call_id: msg.tool_call_id, ok: true, message_id: this.recentToolCallResults.get(msg.tool_call_id) ?? null });
+      return;
+    }
+    // 0.13 D1: any bridge tool call is same-session activity (the reply tool's
+    // own delivery additionally notes activity via _recordReplyForPendingTurn,
+    // but Pre/PostToolUse hook lag is 250ms–5s — the socket message is the
+    // earliest truthful signal claude is working).
+    this._noteActivity('bridge-tool');
+    // 0.13 D2 Tier 2C: the consumed_turn_ids contract field — claude
+    // acknowledges every <channel> message this reply covers (incl. folds the
+    // incidental turn_id echo can't express; the reply schema carries ONE
+    // turn_id). Acked entries can never be declared dropped.
+    //
+    // SECURITY (review 2026-06-12): gate the ack on chat_id matching this
+    // session. The chat_id check lives further down (after dedup/rate-limit);
+    // without this guard a reply carrying a FOREIGN chat_id but naming the live
+    // turn here would mark it resolved/_consumedAcked + arm the finalizer —
+    // "delivered" though nothing reached this chat. The actual reject still
+    // happens at the chat_id guard below.
+    const chatIdMatches = this.chatId == null || String(args.chat_id) === String(this.chatId);
+    if (chatIdMatches && Array.isArray(args.consumed_turn_ids) && args.consumed_turn_ids.length) {
+      this._ledgerAckConsumed(args.consumed_turn_ids.filter((x) => typeof x === 'string'));
+    } else if (chatIdMatches && msg.name === 'reply' && 'consumed_turn_ids' in args) {
+      this._lastAckFieldAt = Date.now();   // field present but empty — contract observed
+    }
+    // 0.12 interactive questions: `ask` is a BLOCKING tool whose answer rides back
+    // on a `question_answer` message (NOT tool_ack). Skip the reply-only paths
+    // (content-dedup, rate-limit, the reply dispatcher) — just guard chat_id and
+    // emit so polygram renders the keyboard; the answer is written later via
+    // writeQuestionAnswer(). claude is now idle waiting on the result, so start a
+    // keep-alive that resets the turn's idle ceiling (no tool hooks fire meanwhile).
+    if (msg.name === 'ask') {
+      if (this.chatId != null && args.chat_id != null && String(args.chat_id) !== String(this.chatId)) {
+        this._writeToBridge({ kind: 'question_answer', tool_call_id: msg.tool_call_id, result: { cancelled: true, error: 'chat_id mismatch' } });
+        return;
+      }
+      this._openQuestions.add(msg.tool_call_id);
+      this._startQuestionKeepAlive();
+      // 0.13 D1: waiting-on-user — claude is legitimately silent, so the
+      // activity-quiet finalize must not run down while the keyboard is up.
+      this._suspendActivityQuiet();
+      this.emit('question-asked', {
+        sessionKey: this.sessionKey,
+        chatId: this.chatId,
+        threadId: this.threadId,
+        turnId: args.turn_id || null,
+        toolCallId: msg.tool_call_id,
+        questions: Array.isArray(args.questions) ? args.questions : [],
+        backend: this.backend,
+      });
       return;
     }
@@ -931,15 +1197,15 @@ class CliProcess extends Process {
     // an isError ack). Window-based so legit repeat sends eventually pass.
     if (msg.name === 'reply' && typeof args.text === 'string' && args.chat_id != null) {
       const dedupKey = this._buildContentDedupKey(args.chat_id, args.text);
-      const expiry = this.recentContentHashes.get(dedupKey);
+      const entry = this.recentContentHashes.get(dedupKey);   // { expiry, message_id }
       const nowDedup = Date.now();
       // Evict stale entries opportunistically (avoids unbounded growth).
       if (this.recentContentHashes.size > 64) {
-        for (const [k, ts] of this.recentContentHashes) {
-          if (ts < nowDedup) this.recentContentHashes.delete(k);
+        for (const [k, e] of this.recentContentHashes) {
+          if (e.expiry < nowDedup) this.recentContentHashes.delete(k);
         }
       }
-      if (expiry && expiry > nowDedup) {
+      if (entry && entry.expiry > nowDedup) {
         this.logger.warn?.(
           `[${this.label}] channels: duplicate content within ${this.contentDedupWindowMs}ms ` +
           `(new tool_call_id=${msg.tool_call_id}, hash=${dedupKey.slice(-12)}) — re-ACKing without dispatch`,
@@ -949,7 +1215,9 @@ class CliProcess extends Process {
           chat_id: args.chat_id,
           window_ms: this.contentDedupWindowMs,
         });
-        this._writeToBridge({ kind: 'tool_ack', tool_call_id: msg.tool_call_id, ok: true });
+        // 0.13: replay the ORIGINAL bubble's message_id so a retried identical reply
+        // keeps its edit handle (the slow-ack-retry case progressive status targets).
+        this._writeToBridge({ kind: 'tool_ack', tool_call_id: msg.tool_call_id, ok: true, message_id: entry.message_id ?? null });
         return;
       }
     }
@@ -992,6 +1260,34 @@ class CliProcess extends Process {
       return;
     }
+    // Dropped-"4" fix A2 (docs/0.13-resume-dialog-fix-spec.md): resolve the
+    // reply's originating TG message so the dispatcher has a target for solo
+    // reactions (and reply-quoting). Resolution order strictly mirrors
+    // _recordReplyForPendingTurn so quote/reaction attribution can never
+    // disagree with reply attribution: echoed turn_id → InputLedger entry's
+    // msgId (registered at send/inject time); no echo → the single pending
+    // turn's ledger entry. Anything else stays null — an unattributable
+    // reply must never react to / quote an unrelated message.
+    //
+    // Review F1: quote only the FIRST delivered reply per turn. On SDK,
+    // deliverReplies fires once per turn → one quote; the channels dispatcher
+    // fires per reply tool call, and an N-reply turn must not produce N
+    // bubbles all quoting the same user message.
+    let sourceMsgId = null;
+    let sourceEntry = null;
+    if (args.turn_id && this.inputLedger.has(args.turn_id)) {
+      sourceEntry = this.inputLedger.get(args.turn_id);
+    } else if (this.pendingTurns.size === 1) {
+      const [[onlyTurnId]] = this.pendingTurns;
+      sourceEntry = this.inputLedger.get(onlyTurnId) || null;
+    }
+    if (sourceEntry && !sourceEntry._quoteUsed) {
+      // Review F6: ledger stores msgId stringified; every other delivery call
+      // site passes numeric message_id — coerce rather than lean on TG leniency.
+      const n = Number(sourceEntry.msgId);
+      sourceMsgId = Number.isFinite(n) && n > 0 ? n : null;
+    }
     let result;
     try {
       result = await this.toolDispatcher({
@@ -1001,6 +1297,8 @@ class CliProcess extends Process {
         toolName: msg.name,
         text: args.text,
         files: args.files,
+        messageId: args.message_id,         // 0.13: edit_message target bubble
+        sourceMsgId,                        // reaction/quote target (A2)
         sessionCwd: this.sessionCwd,        // P0 #2: dispatcher uses this to allowlist file roots
         maxOutboundFileBytes: this.maxOutboundFileBytes, // backend/chat-derived upload cap
       });
@@ -1009,18 +1307,28 @@ class CliProcess extends Process {
       return;
     }
-    this._writeToBridge({ kind: 'tool_ack', tool_call_id: msg.tool_call_id, ok: !!result?.ok, error: result?.error });
+    // Review F1: the quote target is spent once a reply actually delivered
+    // with it. A FAILED delivery doesn't consume it — the retry still quotes.
+    if (msg.name === 'reply' && result?.ok && sourceMsgId != null && sourceEntry) {
+      sourceEntry._quoteUsed = true;
+    }
+    // 0.13: carry the delivered message_id back so the bridge hands it to claude
+    // (reply → edit_message progressive status).
+    this._writeToBridge({ kind: 'tool_ack', tool_call_id: msg.tool_call_id, ok: !!result?.ok, error: result?.error, message_id: result?.message_id });
     // P1 #7: remember the tool_call_id so duplicates re-ACK without dispatch.
     // Only cache on SUCCESS — failed calls should be retryable (transient TG
     // outage etc).
     if (result?.ok && msg.tool_call_id) {
       this.recentToolCallIds.add(msg.tool_call_id);
+      this.recentToolCallResults.set(msg.tool_call_id, result.message_id ?? null);   // 0.13: for re-ACK replay
       this.recentToolCallOrder.push(msg.tool_call_id);
       // FIFO eviction at cap
       while (this.recentToolCallOrder.length > RECENT_TOOL_CALL_LIMIT) {
         const evicted = this.recentToolCallOrder.shift();
         this.recentToolCallIds.delete(evicted);
+        this.recentToolCallResults.delete(evicted);
       }
     }
@@ -1028,7 +1336,9 @@ class CliProcess extends Process {
     // NEW tool_call_id still dedups. TTL-based via expiry timestamp.
     if (result?.ok && msg.name === 'reply' && typeof args.text === 'string' && args.chat_id != null) {
       const dedupKey = this._buildContentDedupKey(args.chat_id, args.text);
-      this.recentContentHashes.set(dedupKey, Date.now() + this.contentDedupWindowMs);
+      // 0.13: store the delivered message_id alongside the expiry so a deduped retry
+      // can replay it (keeps claude's edit handle for progressive status).
+      this.recentContentHashes.set(dedupKey, { expiry: Date.now() + this.contentDedupWindowMs, message_id: result.message_id ?? null });
     }
     // Review #16 + C9: only record the reply for pending-turn resolution when
@@ -1050,6 +1360,24 @@ class CliProcess extends Process {
    * @param {string|undefined} replyTurnId — echoed from Claude's reply tool args
    */
   _recordReplyForPendingTurn(text, replyTurnId) {
+    // 0.13 D2 (S5 tightening): a reply echoing a KNOWN ledgered turn_id that is
+    // NOT the current pending is a LATE reply from an earlier cycle (post-
+    // finalize tails, fireUserMessage cycles, ask wrap-ups). Pre-P3 the
+    // ==1 fallback below bound it into whatever pending exists now — the live
+    // misattribution path the design's §1.4 corollary names. Correlate it,
+    // resolve its entry, and route it as already-delivered instead.
+    if (replyTurnId && !this.pendingTurns.has(replyTurnId) && this.inputLedger.has(replyTurnId)) {
+      const lEntry = this.inputLedger.get(replyTurnId);
+      this._ledgerTransition(replyTurnId, 'resolved');
+      this._logEvent('cli-late-reply-correlated', { turn_id: replyTurnId, source: lEntry.source });
+      this.emit('autonomous-assistant-message', {
+        text,
+        sessionId: this.claudeSessionId,
+        backend: this.backend,
+        alreadyDelivered: true,
+      });
+      return;
+    }
     let target = null;
     if (replyTurnId && this.pendingTurns.has(replyTurnId)) {
       // Canonical path: Claude echoed the turn_id we sent.
@@ -1116,6 +1444,26 @@ class CliProcess extends Process {
     }
     target.replies.push(text);
+    target.replyCount = (target.replyCount || 0) + 1;
+    if (this._sawHookStream) {
+      // 0.13 D1: a delivered reply is ACTIVITY — rung 2 (activity-quiet) owns
+      // the finalize; the reply-quiet window never arms on hooks-live sessions.
+      // The chatty-claude cap (Review P1 #12) no longer instant-resolves a turn
+      // claude may still be working (that was seam S1's third premature-finalize
+      // trigger); past the cap, rung 2 + the ceilings govern — and a ceiling on
+      // a replied turn now RESOLVES with its replies (see fireTimeout).
+      if (target.replyCount === this.maxRepliesPerTurn) {
+        this.logger.warn?.(
+          `[${this.label}] cli: ${target.replyCount} replies in single turn — deferring to activity-quiet (cap=${this.maxRepliesPerTurn})`,
+        );
+        this._logEvent('cli-reply-cap-noted', { reply_count: target.replyCount });
+      }
+      this._noteActivity('reply');
+      return;
+    }
+    // ── Legacy (rung 3, hook stream never came up): pre-D1 path, byte-identical ──
     // Review F#13: each reply is "activity" — reset the idle ceiling so a
     // 15-min legit turn (PDF analysis, multi-file refactor) replying every
     // minute doesn't get killed at the 10-min wall-clock. The absoluteTimer
@@ -1132,7 +1480,6 @@ class CliProcess extends Process {
     // hang. After N reply tool calls in a single turn, resolve immediately on
     // the NEXT reply without waiting for the quiet window. N defaults to 20
     // which is plenty for normal multi-message replies but caps runaway chains.
-    target.replyCount = (target.replyCount || 0) + 1;
     if (target.quietTimer) clearTimeout(target.quietTimer);
     if (target.replyCount >= this.maxRepliesPerTurn) {
       // Skip the quiet-window — resolve right away with whatever we've got.
@@ -1145,6 +1492,318 @@ class CliProcess extends Process {
     }
   }
+  // ─── 0.13 D2: InputLedger ──────────────────────────────────────────
+  _ledgerAdd(turnId, { source, msgId = null } = {}) {
+    this.inputLedger.set(turnId, {
+      turnId,
+      source,
+      msgId: msgId != null ? String(msgId) : null,
+      chatId: this.chatId,
+      writtenAt: Date.now(),
+      state: 'written',
+      _dropTimer: null,
+      _watchdogTimer: null,
+      _rewritten: false,
+    });
+    // Bounded: prune terminal entries first, then the oldest.
+    if (this.inputLedger.size > INPUT_LEDGER_CAP) {
+      let victim = null;
+      for (const [id, e] of this.inputLedger) {
+        if (e.state !== 'written' && e.state !== 'seen') { victim = id; break; }
+        if (!victim) victim = id;
+      }
+      if (victim) this._ledgerDelete(victim);
+    }
+  }
+  _ledgerDelete(turnId) {
+    const e = this.inputLedger.get(turnId);
+    if (!e) return;
+    if (e._dropTimer) clearTimeout(e._dropTimer);
+    if (e._watchdogTimer) clearTimeout(e._watchdogTimer);
+    this.inputLedger.delete(turnId);
+  }
+  /** Transition + cancel the entry's timers (a seen/resolved entry can never drop or re-write). */
+  _ledgerTransition(turnId, state) {
+    const e = this.inputLedger.get(turnId);
+    if (!e) return;
+    e.state = state;
+    if (e._dropTimer) { clearTimeout(e._dropTimer); e._dropTimer = null; }
+    if (e._watchdogTimer) { clearTimeout(e._watchdogTimer); e._watchdogTimer = null; }
+  }
+  /** Tier 2C: a reply carried consumed_turn_ids — acknowledge every known id. */
+  _ledgerAckConsumed(ids) {
+    this._lastAckFieldAt = Date.now();
+    for (const id of ids) {
+      const e = this.inputLedger.get(id);
+      if (e && e.state !== 'resolved') {
+        this._ledgerTransition(id, 'resolved');
+        this._logEvent('cli-input-acked', { turn_id: id, source: e.source });
+      }
+      // UMI 2026-06-11 19:49 false ⏱ timeout: when claude answers a
+      // primary+fold in ONE reply but echoes the FOLD's turn_id, the reply
+      // routes via late-reply correlation and the PRIMARY pending absorbs
+      // nothing — yet this ack names the primary. Mark it consumed so the
+      // finalizer rungs treat it as replied (resolve already-delivered)
+      // instead of rejecting it at a ceiling AFTER the user got the answer.
+      const pending = this.pendingTurns.get(id);
+      if (pending) {
+        pending._consumedAcked = true;
+        // The ack itself flips rung-2 eligibility on — arm now. (The turn's
+        // last _noteActivity ran BEFORE this flag was set, so without this
+        // a quiet tail would never re-arm and the turn would sit until a
+        // ceiling.)
+        this._armActivityQuiet(id, pending);
+      }
+    }
+  }
+  _clearLedgerTimers() {
+    for (const e of this.inputLedger.values()) {
+      if (e._dropTimer) { clearTimeout(e._dropTimer); e._dropTimer = null; }
+      if (e._watchdogTimer) { clearTimeout(e._watchdogTimer); e._watchdogTimer = null; }
+    }
+  }
+  /**
+   * D2 drop detection, armed at every cycle end for non-primary entries still
+   * 'written'. The confirm window exists because a non-folded inject legally
+   * queues claude-side and is picked up as the NEXT cycle (its UPS then
+   * cancels this); only entries nobody ever picked up or acknowledged drop.
+   */
+  _armDropConfirmSweep() {
+    for (const [id, entry] of this.inputLedger) {
+      if (entry.state !== 'written') continue;
+      if (entry.source === 'primary') continue;   // pending lifecycle + delivery watchdog govern primaries
+      if (entry._dropTimer) continue;
+      entry._dropTimer = setTimeout(() => this._dropConfirmFire(id), this.dropConfirmMs);
+      entry._dropTimer.unref?.();
+    }
+  }
+  _dropConfirmFire(turnId) {
+    const entry = this.inputLedger.get(turnId);
+    if (!entry || entry.state !== 'written') return;
+    entry._dropTimer = null;
+    // System/anonymous pushes are never auto-redelivered — resolve quietly.
+    if (entry.source === 'system' || entry.source === 'inject') {
+      this._ledgerTransition(turnId, 'resolved');
+      this._logEvent('cli-input-unconfirmed', { turn_id: turnId, source: entry.source });
+      return;
+    }
+    // Supersession: the user re-sent / moved on — a newer primary was picked
+    // up after this entry was written. Redelivering the stale one would
+    // double-answer the same intent.
+    for (const e of this.inputLedger.values()) {
+      if (e.source === 'primary' && e.writtenAt > entry.writtenAt
+          && (e.state === 'seen' || e.state === 'resolved')) {
+        this._ledgerTransition(turnId, 'superseded');
+        this._logEvent('input-superseded', { turn_id: turnId, msg_id: entry.msgId });
+        return;
+      }
+    }
+    // Contract discriminator: if NO reply since this entry carried the
+    // consumed_turn_ids field, the model ignored the contract this cycle — a
+    // fold is then indistinguishable from a drop, and redelivering folds
+    // double-answers the COMMON case (the inversion that killed the A1 spec).
+    // Park as fold-suspected (telemetry; the soak's anomaly signal).
+    if (!(this._lastAckFieldAt >= entry.writtenAt)) {   // >= : same-ms ack still proves the contract mode
+      this._ledgerTransition(turnId, 'fold-suspected');
+      this._logEvent('input-fold-suspected', { turn_id: turnId, msg_id: entry.msgId, source: entry.source });
+      return;
+    }
+    this._ledgerTransition(turnId, 'dropped');
+    this._logEvent('input-dropped', { turn_id: turnId, msg_id: entry.msgId, source: entry.source });
+    this.emit('input-dropped', {
+      turnId, msgId: entry.msgId, chatId: entry.chatId, source: entry.source,
+    });
+  }
+  /**
+   * D2 primary-delivery watchdog (KI-drop's missing half — the channel-bind
+   * race drops a user_msg before claude's subscription is live). Fire logic:
+   *   - entry seen / turn settled → done (timer was already cancelled).
+   *   - ANY session activity since dispatch (hooks, pane heartbeat, bridge
+   *     tool calls) → claude is busy (likely a foreign cycle; the queued
+   *     pickup is legitimately deferred) → extend, NEVER re-write (round-2
+   *     panel: re-writes against a busy session double-prompt it).
+   *   - total silence → ONE re-write of the SAME envelope (idempotent:
+   *     never seen + zero activity ⇒ claude never had it — the rc.25
+   *     argument, properly scoped); still silence after that → bridge
+   *     teardown onto the existing bridge-disconnected recovery path.
+   */
+  _armDeliveryWatchdog(turnId, pending) {
+    const entry = this.inputLedger.get(turnId);
+    if (!entry) return;
+    entry._watchdogTimer = setTimeout(() => this._deliveryWatchdogFire(turnId, pending), this.deliveryWatchdogMs);
+    entry._watchdogTimer.unref?.();
+  }
+  _deliveryWatchdogFire(turnId, pending) {
+    const entry = this.inputLedger.get(turnId);
+    if (!entry || entry.state !== 'written') return;
+    if (!this.pendingTurns.has(turnId)) return;       // settled some other way
+    entry._watchdogTimer = null;
+    const activitySince = Math.max(this._lastActivityAt, this._lastHookEventAt) >= entry.writtenAt
+      && Math.max(this._lastActivityAt, this._lastHookEventAt) > 0;
+    if (activitySince) {
+      this._armDeliveryWatchdog(turnId, pending);     // busy — extend the window
+      return;
+    }
+    if (!entry._rewritten) {
+      entry._rewritten = true;
+      this._logEvent('cli-delivery-rewrite', { turn_id: turnId });
+      if (pending._userMsgPayload) this._writeToBridge(pending._userMsgPayload);
+      this._armDeliveryWatchdog(turnId, pending);
+      return;
+    }
+    this._logEvent('cli-delivery-watchdog-escalate', { turn_id: turnId });
+    if (this.bridgeServer?.destroyConnection) this.bridgeServer.destroyConnection();
+  }
+  /**
+   * 0.13 D1: note same-session activity — the heartbeat of the finalizer ladder
+   * (docs/0.13-channels-lifecycle-design.md §3 D1). Supersedes the 0.12
+   * `_extendQuietOnToolActivity` (the WA-topic point fix): instead of pushing a
+   * 2s reply-quiet window around, activity now drives three things per pending:
+   *
+   *   1. The idle ceiling resets (pre-D1 semantics preserved — a long
+   *      tool-heavy turn isn't idle-killed).
+   *   2. HOOKS-LIVE sessions: an attributed-Stop grace in flight is CANCELLED —
+   *      Stop arrives via the ndjson tail with 250ms–5s lag, so a foreign
+   *      cycle's lagged Stop can land after this turn's fast first pickup;
+   *      activity proves claude is still working and the Stop was stale. The
+   *      legacy reply-quiet timer (rung 3) is likewise superseded the moment
+   *      hooks go live mid-turn. The activity-quiet window (rung 2) re-arms.
+   *   3. HOOK-NEVER-ALIVE sessions (rung 3): the pre-D1 reply-quiet re-arm,
+   *      byte-identical.
+   *
+   * Callers: every hook event except Stop, the pane "esc to interrupt"
+   * thinking heartbeat, bridge tool calls, delivered replies, the question
+   * keep-alive, and question answers.
+   */
+  _noteActivity(source = 'activity') {
+    this._lastActivityAt = Date.now();
+    for (const [turnId, pending] of this.pendingTurns) {
+      // Idle ceiling: activity IS activity.
+      if (pending.hardTimer) {
+        clearTimeout(pending.hardTimer);
+        pending.hardTimer = setTimeout(() => pending._fireTimeout?.('idle'), this.turnTimeoutMs);
+      }
+      if (this._sawHookStream) {
+        if (pending._stopGracePending) this._cancelStopGrace(turnId, pending, source);
+        if (pending.quietTimer) { clearTimeout(pending.quietTimer); pending.quietTimer = null; }
+        this._armActivityQuiet(turnId, pending);
+      } else if (pending._stopGracePending) {
+        // Legacy grace (resolveTurn's wait-for-Stop) — never revived/cancelled
+        // by activity; identical to pre-D1.
+        continue;
+      } else if (pending.quietTimer) {
+        clearTimeout(pending.quietTimer);
+        pending.quietTimer = setTimeout(() => this._resolveTurn(turnId), this.turnQuietMs);
+      }
+    }
+  }
+  /**
+   * D1 rung 2: arm/refresh the activity-quiet finalize for one pending.
+   * Preconditions: hooks live, ≥1 delivered reply (a reply-less turn ends via
+   * rung 1 or the ceilings), no open question (waiting-on-user suspends the
+   * clock — claude is legitimately silent), and no rung-1 grace in flight.
+   */
+  _armActivityQuiet(turnId, pending) {
+    if (!this._sawHookStream) return;
+    // ≥1 reply, OR seen + consumed-acked (the answer rode a sibling turn_id —
+    // fold-id echo; see _ledgerAckConsumed). Same eligibility as the fire site.
+    if ((!pending.replies || pending.replies.length === 0)
+        && !(pending.seen === true && pending._consumedAcked === true)) return;
+    if (this._openQuestions.size > 0) return;
+    if (pending._stopGracePending) return;
+    if (pending._activityQuietTimer) clearTimeout(pending._activityQuietTimer);
+    pending._activityQuietTimer = setTimeout(() => this._activityQuietFinalize(turnId), this.activityQuietMs);
+    pending._activityQuietTimer.unref?.();
+  }
+  /** D1: suspend rung 2 for all pendings (an `ask` just opened — waiting on the user). */
+  _suspendActivityQuiet() {
+    for (const [, pending] of this.pendingTurns) {
+      if (pending._activityQuietTimer) {
+        clearTimeout(pending._activityQuietTimer);
+        pending._activityQuietTimer = null;
+      }
+    }
+  }
+  /**
+   * D1 rung 2 fire: the whole activity surface (hooks + pane heartbeat + bridge
+   * tool calls) has been quiet for activityQuietMs on a replied turn — the tail
+   * is over (Stop was lost, foreign, or the hook stream died mid-session; the
+   * pre-D1 `_sawHookStream` one-way boolean left that last class with NO
+   * finalizer until a 10-min TURN_TIMEOUT *rejection* after a delivered answer).
+   */
+  _activityQuietFinalize(turnId) {
+    const pending = this.pendingTurns.get(turnId);
+    if (!pending) return;
+    if (pending._stopGracePending) return;
+    if (this._openQuestions.size > 0) return;          // re-check at fire time
+    // Eligibility: ≥1 bound reply, OR seen + consumed-acked (the answer went
+    // out under a sibling turn_id — fold-id echo; see _ledgerAckConsumed).
+    const consumedAcked = pending.seen === true && pending._consumedAcked === true;
+    if ((!pending.replies || pending.replies.length === 0) && !consumedAcked) return;
+    const lastHookAgeMs = this._lastHookEventAt ? Date.now() - this._lastHookEventAt : null;
+    this._logEvent('cli-activity-quiet-finalize', {
+      turn_id: turnId,
+      reply_count: pending.replies.length,
+      consumed_acked: consumedAcked,
+      last_hook_age_ms: lastHookAgeMs,
+      had_stop: !!pending._stopHookData,
+    });
+    if (lastHookAgeMs != null && lastHookAgeMs >= this.activityQuietMs) {
+      // A previously-live hook stream went quiet enough that rung 2 (not an
+      // attributed Stop) ended the turn — the soak's mid-session-death signal.
+      this._logEvent('cli-hook-stream-stalled', { turn_id: turnId, last_hook_age_ms: lastHookAgeMs });
+    }
+    this._finalizeTurn(turnId);
+  }
+  /**
+   * D1 rung 1: an attributed Stop (the pending was `seen` at pickup, or has
+   * ≥1 turn_id-bound reply) finalizes through a short grace that any
+   * subsequent same-session activity cancels (see _noteActivity #2).
+   */
+  _beginAttributedStopGrace(turnId, pending, info) {
+    pending._stopHookData = info;
+    pending._stopGracePending = true;
+    if (pending._activityQuietTimer) {
+      clearTimeout(pending._activityQuietTimer);
+      pending._activityQuietTimer = null;
+    }
+    pending._stopGraceTimer = setTimeout(() => {
+      pending._stopGraceTimer = null;
+      pending._stopGracePending = false;
+      this._logEvent('cli-turn-resolved-by-stop', {
+        turn_id: turnId,
+        reply_count: pending.replies?.length || 0,
+        via_text_fallback: (pending.replies?.length || 0) === 0,
+        attributed: pending.seen === true ? 'seen' : 'reply-bound',
+        session_id: this.claudeSessionId,
+      });
+      this._finalizeTurn(turnId);
+    }, this.stopGraceMs);
+    pending._stopGraceTimer.unref?.();
+  }
+  /** D1: cancel a stop-grace (rung 1 stale-Stop, or a superseded legacy grace). */
+  _cancelStopGrace(turnId, pending, source) {
+    if (pending._stopGraceTimer) { clearTimeout(pending._stopGraceTimer); pending._stopGraceTimer = null; }
+    if (pending._onStop) { this.off('stop-hook', pending._onStop); pending._onStop = null; }
+    pending._stopGracePending = false;
+    this._logEvent('cli-stop-grace-cancelled', { turn_id: turnId, source });
+  }
   // 0.12 Phase 1.7 (Finding 0.1.A): two-step turn resolution.
   //   _resolveTurn — entry point called by channel-result OR quiet-window
   //                  expiry. Schedules a stopGraceMs window during which
@@ -1223,6 +1882,9 @@ class CliProcess extends Process {
     if (pending.hardTimer) clearTimeout(pending.hardTimer);
     if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
     if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer);
+    if (pending._activityQuietTimer) clearTimeout(pending._activityQuietTimer);   // 0.13 D1
+    if (pending._onStop) { this.off('stop-hook', pending._onStop); pending._onStop = null; }
+    const hadReplyToolCalls = pending.replies.length > 0;
     let text = pending.replies.join('\n\n');
     // 0.12 Phase 1.7 fallback: if no reply tool calls landed (claude ended
     // the turn without calling mcp__polygram-bridge__reply), use the Stop
@@ -1240,12 +1902,17 @@ class CliProcess extends Process {
     // to appear free in dashboards.
     const result = {
       text,
-      // Review F#2: dispatcher has ALREADY delivered text to Telegram on each
-      // reply tool call (incremental real-time UX is the channels delivery
-      // model). polygram.js's post-pm.send pipeline must short-circuit its
-      // streamer.finalize / deliverReplies branch — otherwise every turn
-      // delivers twice. Logging + DB transcript still use result.text.
-      alreadyDelivered: true,
+      // Review F#2: when claude used reply tool calls, the dispatcher ALREADY
+      // delivered that text to Telegram incrementally — polygram.js must
+      // short-circuit its deliverReplies branch or every turn delivers twice.
+      // BUT a turn finalized via the Stop fallback (no reply tool calls — the
+      // stuck-turn case) has delivered NOTHING; marking it alreadyDelivered
+      // would resolve the turn silently and the user still sees nothing. So
+      // only claim already-delivered when reply tool calls actually fired —
+      // or when claude ACKED consuming this turn in a sibling reply
+      // (consumed_turn_ids; the fold-id-echo case): re-sending the Stop
+      // fallback there would duplicate the delivered answer.
+      alreadyDelivered: hadReplyToolCalls || pending._consumedAcked === true,
       sessionId: this.claudeSessionId,
       cost: null,             // Channels protocol doesn't expose per-turn cost
       duration,
@@ -1261,6 +1928,12 @@ class CliProcess extends Process {
       },
     };
     this.inFlight = this.pendingTurns.size > 0;
+    // 0.13 D2: the finalized cycle resolves its own ledger entry; any
+    // non-primary entries still 'written' enter the drop-confirm window
+    // (a late next-cycle pickup or ack cancels; otherwise dropped /
+    // fold-suspected / superseded — see _dropConfirmFire).
+    this._ledgerTransition(turnId, 'resolved');
+    this._armDropConfirmSweep();
     pending.resolve(result);
     this.emit('result', { subtype: 'success' }, { streamText: text });
     this.emit('idle');
@@ -1310,6 +1983,9 @@ class CliProcess extends Process {
       if (oldest.quietTimer) clearTimeout(oldest.quietTimer);
       if (oldest.hardTimer) clearTimeout(oldest.hardTimer);
       if (oldest.absoluteTimer) clearTimeout(oldest.absoluteTimer);
+      if (oldest._stopGraceTimer) clearTimeout(oldest._stopGraceTimer);
+      if (oldest._activityQuietTimer) clearTimeout(oldest._activityQuietTimer);   // 0.13 D1
+      if (oldest._onStop) this.off('stop-hook', oldest._onStop);
       const dropErr = new Error('queue overflow — oldest pending evicted');
       dropErr.code = 'QUEUE_OVERFLOW';
       try { oldest.reject(dropErr); } catch {}
@@ -1348,6 +2024,15 @@ class CliProcess extends Process {
       const fireTimeout = (reason) => {
         if (!this.pendingTurns.has(turnId)) return;
         const pending = this.pendingTurns.get(turnId);
+        // 0.13 D1 (S9): unblock any open ask FIRST — claude must never stay
+        // hung on a question whose turn we are about to end. The card cleanup
+        // stays with the question sweep; this only resolves the blocking tool.
+        if (this._openQuestions.size > 0) {
+          for (const tc of [...this._openQuestions]) {
+            this._logEvent('cli-question-timedout-at-ceiling', { tool_call_id: tc, reason });
+            try { this.writeQuestionAnswer(tc, { timedout: true }); } catch { /* best-effort */ }
+          }
+        }
         this.pendingTurns.delete(turnId);
         const idx = this.pendingQueue.findIndex(e => e.turnId === turnId);
         if (idx >= 0) this.pendingQueue.splice(idx, 1);
@@ -1355,8 +2040,44 @@ class CliProcess extends Process {
         if (pending.hardTimer) clearTimeout(pending.hardTimer);
         if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
         if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer);
+        if (pending._activityQuietTimer) clearTimeout(pending._activityQuietTimer);
+        if (pending._onStop) this.off('stop-hook', pending._onStop);
         this.inFlight = this.pendingTurns.size > 0;
         const turnTimeoutMs = reason === 'absolute' ? this.turnAbsoluteMs : (opts.maxTurnMs || this.turnTimeoutMs);
+        // 0.13 D1 ceiling-resolve: a ceiling expiring on a turn with delivered
+        // replies RESOLVES it — the user already has their answer; rejecting
+        // would send a scary timeout error AFTER a successful reply (round-2
+        // panel finding: the v2 soak gate contradicted the design's own
+        // ask-timeout-then-ceiling path). TURN_TIMEOUT rejection is reserved
+        // for turns with ZERO delivered replies. Consumed-acked counts as
+        // replied: the answer rode a sibling turn_id (fold-id echo — the UMI
+        // 2026-06-11 19:49 false ⏱; see _ledgerAckConsumed).
+        if ((pending.replies?.length || 0) > 0
+            || (pending.seen === true && pending._consumedAcked === true)) {
+          this._logEvent('cli-turn-ceiling-resolved', {
+            reason, turnTimeoutMs, reply_count: pending.replies?.length || 0,
+            consumed_acked: pending._consumedAcked === true,
+          });
+          this.emit('idle');
+          resolve({
+            text: pending.replies.join('\n\n'),
+            alreadyDelivered: true,
+            sessionId: this.claudeSessionId,
+            cost: null,
+            duration: Date.now() - pending.startedAt,
+            error: null,
+            metrics: {
+              inputTokens: null, outputTokens: null,
+              cacheCreationTokens: null, cacheReadTokens: null,
+              numAssistantMessages: pending.replies.length,
+              numToolUses: null,
+              resultSubtype: 'success',
+            },
+          });
+          return;
+        }
         this.emit('turn-timeout', {
           reason,
           turnTimeoutMs,
@@ -1372,24 +2093,43 @@ class CliProcess extends Process {
       const pending = {
         resolve, reject,
         replies: [],
+        // 0.13 D1: pickup marker — set when a UserPromptSubmit prompt carries
+        // this turn's envelope (the seen-slice). Rung 1's Stop attribution.
+        seen: false,
         quietTimer: null,
-        // hardTimer = idle ceiling. Resets on each reply in
-        // _recordReplyForPendingTurn so a chatty turn (replies every 60s)
-        // doesn't get killed at 10 min wall-clock.
+        _activityQuietTimer: null,
+        // hardTimer = idle ceiling. Resets on any activity (_noteActivity)
+        // so a chatty or tool-heavy turn isn't killed at 10 min wall-clock.
         hardTimer: setTimeout(() => fireTimeout('idle'), opts.maxTurnMs || this.turnTimeoutMs),
         // absoluteTimer = wall-clock ceiling. Does NOT reset. Bounds true
         // runaways. 30 min default — high enough that legitimate
         // multi-step refactors complete, low enough to catch infinite
         // chatter.
         absoluteTimer: setTimeout(() => fireTimeout('absolute'), this.turnAbsoluteMs),
-        // Review F#13: attach fireTimeout so _recordReplyForPendingTurn can
-        // reset the idle timer (creates a fresh setTimeout with the same
-        // reject closure).
+        // Review F#13: attach fireTimeout so activity can reset the idle
+        // timer (creates a fresh setTimeout with the same closure).
         _fireTimeout: fireTimeout,
         startedAt: Date.now(),
       };
       this.pendingTurns.set(turnId, pending);
+      // 0.13 D1 (§1.4): the single-active-cycle invariant is enforced by the
+      // daemon's stdinLock (held across the full turn) — CliProcess can't see
+      // the lock, so a second concurrent pending means a caller bypassed the
+      // contract. Loud assertion telemetry; the drop-rather-than-misattribute
+      // defenses (reply routing, Stop attribution) remain the failure mode.
+      if (this.pendingTurns.size > 1) {
+        this.logger.warn?.(
+          `[${this.label}] cli: ${this.pendingTurns.size} concurrent pending turns — stdinLock contract violated upstream`,
+        );
+        this._logEvent('cli-multi-pending-assert', { pending_count: this.pendingTurns.size });
+      }
+      // 0.13 D2: ledger the primary + keep the exact envelope for the delivery
+      // watchdog's idempotent re-write (the pending owns it — no text in the
+      // ledger, events stay content-free per L13).
+      this._ledgerAdd(turnId, { source: 'primary', msgId: opts.context?.sourceMsgId });
       // Review F#18: bridge-disconnect TOCTOU. The bridgeReady check at
       // top of send() can race the bridge socket close. If the bridge
       // dies between check and write, _writeToBridge silently no-ops (it
@@ -1397,14 +2137,16 @@ class CliProcess extends Process {
       // pending entry sits with no live bridge until hardTimer (10 min).
       // Pass the actual write result back and reject immediately on
       // failure so the caller sees a fast, code-tagged error.
-      const wrote = this._writeToBridge({
+      pending._userMsgPayload = {
         kind: 'user_msg',
         turn_id: turnId,
         text: prompt,
         chat_id: this.chatId,
         user: opts.context?.user || '',
         msg_id: opts.context?.sourceMsgId || '',
-      });
+      };
+      const wrote = this._writeToBridge(pending._userMsgPayload);
+      if (wrote) this._armDeliveryWatchdog(turnId, pending);
       if (!wrote) {
         this.pendingTurns.delete(turnId);
         const qIdx = this.pendingQueue.findIndex(e => e.turnId === turnId);
@@ -1423,6 +2165,13 @@ class CliProcess extends Process {
   async interrupt() {
     if (this.closed) return;
     if (!this.tmuxSession) return;
+    // Cancel-cheap C2 (spec Finding 7): a cancel is already in flight — a
+    // SECOND C-c would land at the now-idle prompt, which is claude's exit
+    // chord ("press ctrl+c again to exit") and would convert the cheap cancel
+    // into an accidental process exit. Also: resetting the grace timer would
+    // DELAY the synthetic resolution for a user double-tapping "stop".
+    // Idempotent no-op instead.
+    if (this._interruptGraceTimer) return;
     // tmux SIGINT — hard interrupt for the running turn.
     try {
       await this.runner.sendControl(this.tmuxSession, 'C-c');
@@ -1433,18 +2182,47 @@ class CliProcess extends Process {
     this.emit('interrupt-applied', { backend: this.backend });
     this._logEvent('interrupt-applied', {});
+    // Cancel-cheap C1 — the spec's O2 BLOCKER: the cancelled work's inputs
+    // must never re-deliver. The grace below synthesizes the resolution
+    // WITHOUT _finalizeTurn, so without this, an autosteer/fold entry stays
+    // 'written' and a LATER cycle-end sweep declares it dropped →
+    // drop-redeliver re-injects the user's CANCELLED message minutes later.
+    // 'cancelled' is terminal: the sweep only targets 'written', and
+    // _ledgerTransition clears the entry's drop/watchdog timers.
+    for (const [id, e] of this.inputLedger) {
+      if (e.state === 'written' || e.state === 'seen') {
+        this._ledgerTransition(id, 'cancelled');
+        this._logEvent('cli-input-cancelled', { turn_id: id, source: e.source });
+      }
+    }
     // Review P3 C8: after Ctrl-C, Claude may or may not call reply with an
     // "I was interrupted" message. If it doesn't (5s grace), resolve pending
     // turns with subtype 'interrupted' instead of letting them wait the full
-    // 10-min hardTimer. The grace window is reset if a new interrupt fires.
-    if (this._interruptGraceTimer) clearTimeout(this._interruptGraceTimer);
+    // 10-min hardTimer.
+    //
+    // C4 BLOCKER (review 2026-06-12): SNAPSHOT the turns that were in flight at
+    // C-c time and resolve ONLY those. The cancelled turn often finalizes
+    // cleanly DURING the grace (claude acks the C-c) and the user then starts a
+    // NEW turn — the "stop, then redirect" flow cheap-cancel exists for. Without
+    // the snapshot the stale grace iterated pendingTurns LIVE and silently
+    // resolved that fresh turn as 'interrupted' (lost). send() doesn't clear the
+    // grace, so the snapshot is the fix.
+    const interruptedTurnIds = new Set(this.pendingTurns.keys());
     this._interruptGraceTimer = setTimeout(() => {
       let resolvedAny = false;
       for (const [turnId, pending] of this.pendingTurns) {
+        if (!interruptedTurnIds.has(turnId)) continue;   // only the turns in flight at C-c
         // Synthesize an interrupted resolution: empty text, 'interrupted' subtype.
+        // Cancel-cheap C3: clear ALL per-pending machinery (mirrors
+        // _finalizeTurn) — stray timers/listeners on the kept-warm proc are
+        // exactly what the cheap-cancel design must not leak.
         if (pending.quietTimer) clearTimeout(pending.quietTimer);
         if (pending.hardTimer) clearTimeout(pending.hardTimer);
-      if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
+        if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
+        if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer);
+        if (pending._activityQuietTimer) clearTimeout(pending._activityQuietTimer);
+        if (pending._onStop) { this.off('stop-hook', pending._onStop); pending._onStop = null; }
         this.pendingTurns.delete(turnId);
         const qIdx = this.pendingQueue.findIndex(e => e.turnId === turnId);
         if (qIdx >= 0) this.pendingQueue.splice(qIdx, 1);
@@ -1502,7 +2280,7 @@ class CliProcess extends Process {
    */
   async probeBusyState() {
     const base = {
-      busy: false, streaming: false,
+      busy: false, streaming: false, backgroundShell: false, shellCount: 0,
       inFlight: this.inFlight, pendingTurns: this.pendingTurns.size,
       captured: false, paneTail: null,
     };
@@ -1518,10 +2296,23 @@ class CliProcess extends Process {
     }
     if (!pane) return base;
     const streaming = STREAMING_HINT_RE.test(pane);
+    // Background-shell count from the TUI mode line. Match only the captured
+    // TAIL (the mode line lives at the bottom of the viewport) so a `· N shell ·`
+    // string scrolled off into history can't trip a stale false-positive — see
+    // BACKGROUND_SHELL_RE. A detached `run_in_background` Bash that outlived its
+    // turn shows here even while claude is idle and not streaming.
+    const m = pane.slice(-400).match(BACKGROUND_SHELL_RE);
+    const shellCount = m ? parseInt(m[1], 10) : 0;
+    const backgroundShell = shellCount > 0;
     return {
       ...base,
+      // `busy` stays streaming-only — it is the abort path's "is claude working a
+      // turn" signal and must not change behaviour. Background-shell liveness is a
+      // separate axis the stall-watchdog reads via `backgroundShell`/`shellCount`.
       busy: streaming,
       streaming,
+      backgroundShell,
+      shellCount,
       captured: true,
       paneTail: pane.slice(-200),
     };
@@ -1533,6 +2324,137 @@ class CliProcess extends Process {
     return busy;
   }
+  /**
+   * Does this session have a detached background shell running RIGHT NOW — a
+   * `run_in_background` Bash that may have outlived its turn? Thin probe over
+   * probeBusyState's background-shell signal; the stall-watchdog's input.
+   * @returns {Promise<{live:boolean, count:number}>}
+   */
+  async hasLiveBackgroundWork() {
+    const { backgroundShell, shellCount } = await this.probeBusyState();
+    return { live: backgroundShell, count: shellCount };
+  }
+  /**
+   * LRU eviction pin (0.12.0 spec). Cached read of `_bgWorkSince` — the idle bg-work
+   * watchdog state maintained by `_pollBackgroundWork` on the ≤5s pong tick. Non-null ⟺ a
+   * detached background shell has been observed while idle. No time cap: a job that runs for
+   * hours stays pinned (elapsed time can't tell "slow-but-progressing" from "stuck"). Cheap,
+   * sync — safe to call from `_evictLRU`.
+   * @returns {boolean}
+   */
+  hasActiveBackgroundWork() {
+    return this._bgWorkSince !== null;
+  }
+  /**
+   * Resolve the model / effort for a spawn context using the topic→chat→
+   * fallback precedence (mirrors the spawn path). Single source of truth shared
+   * by start() (which records this.model / this.effort) and wouldReloadFor()
+   * (which compares the current config to those spawn-time values).
+   */
+  _resolveModel(opts) {
+    const topicConfig = opts.threadId && opts.chatConfig?.topics?.[opts.threadId];
+    return topicConfig?.model || opts.chatConfig?.model || opts.model;
+  }
+  _resolveEffort(opts) {
+    const topicConfig = opts.threadId && opts.chatConfig?.topics?.[opts.threadId];
+    return topicConfig?.effort || opts.chatConfig?.effort || opts.effort;
+  }
+  /**
+   * getOrSpawn calls this before reusing a warm proc. cli can't hot-swap model
+   * or effort (spawn-time flags), so when the resolved config has drifted from
+   * what we spawned with AND we are idle, the proc must be killed + cold-
+   * respawned (--resume keeps the conversation; the new --model / --effort takes
+   * effect). In-flight → false: fold the message into the running turn; the
+   * drift reloads on the next idle dispatch. SDK procs apply model live and do
+   * NOT implement this method, so process-manager only reloads when it exists.
+   * @returns {boolean}
+   */
+  wouldReloadFor(spawnContext) {
+    if (this.inFlight || this.closed) return false;
+    return this._resolveModel(spawnContext) !== this.model
+        || this._resolveEffort(spawnContext) !== this.effort;
+  }
+  /**
+   * 0.13 D1 (S9): LRU eviction pin — a session blocked on an open `ask` must
+   * not be evicted (the question, and claude's blocked cycle, would die with
+   * it). Belt-and-braces: with D1 the turn stays inFlight through the wait.
+   */
+  hasOpenQuestions() {
+    return this._openQuestions.size > 0;
+  }
+  /**
+   * Stall-watchdog for detached background work (0.12.0 background-work
+   * lifecycle, shumorobot Music 7h frozen-Chrome download). Runs on the
+   * pongWatchdog 5s tick but ONLY while the session is IDLE (pendingTurns===0) —
+   * the mirror of _pollMidTurnDialogs, which only runs DURING turns. When a
+   * `run_in_background` Bash outlives its turn and keeps running while claude is
+   * idle for > bgWorkStallMs, nothing tells the agent or user whether it's
+   * progressing or stuck. One read-only self-check re-invokes the agent to
+   * diagnose — via `fireUserMessage`, NOT `injectUserMessage` (which no-ops when
+   * !inFlight, the exact idle state here). Read-only framing matters: the agent
+   * runs bypassPermissions, so an open-ended "fix it" could background another
+   * hung shell unattended.
+   *
+   * Exactly one self-check per continuous background-work window (capped by
+   * `_bgWorkEscalations`); the window resets only when the shell count returns to
+   * 0. Never throws — swallows its own errors so the pong watchdog stays clean.
+   */
+  async _pollBackgroundWork() {
+    if (this.closed || !this.bridgeReady) return;
+    // Only watch while idle. An active turn means the agent is engaged
+    // (_pollMidTurnDialogs owns that path). Crucially we do NOT reset the clock
+    // here — the same shell is still running, so the window persists across a
+    // brief self-check turn rather than restarting and re-pinging every window.
+    if (this.pendingTurns.size > 0) return;
+    let live = false;
+    let count = 0;
+    try {
+      ({ live, count } = await this.hasLiveBackgroundWork());
+    } catch (err) {
+      this.logger.warn?.(`[${this.label}] channels: bg-work probe failed: ${err.message}`);
+      return;
+    }
+    if (!live) {
+      if (this._bgWorkSince !== null) {
+        this._logEvent('cli-bg-work-cleared', { idle_ms: Date.now() - this._bgWorkSince });
+        // Visibility: tear down the status indicator once work clears.
+        if (this._bgWorkStatusShown) {
+          this.emit('bg-work-status', { state: 'cleared' });
+          this._bgWorkStatusShown = false;
+        }
+      }
+      this._bgWorkSince = null;
+      this._bgWorkEscalations = 0;
+      return;
+    }
+    if (this._bgWorkSince === null) {
+      // First idle observation of a live background shell — start the clock AND
+      // raise the visibility indicator so a long job reads as working, not stuck.
+      this._bgWorkSince = Date.now();
+      this._bgWorkEscalations = 0;
+      this._logEvent('cli-bg-work-detected', { shell_count: count });
+      this.emit('bg-work-status', { state: 'running', count });
+      this._bgWorkStatusShown = true;
+      return;
+    }
+    const idleMs = Date.now() - this._bgWorkSince;
+    if (idleMs < this.bgWorkStallMs || this._bgWorkEscalations >= 1) return;
+    const mins = Math.max(1, Math.round(idleMs / 60000));
+    const prompt =
+      `⏳ A background job has been running ~${mins} min with no update. `
+      + `Check its status and report whether it's progressing or stuck. `
+      + `Do NOT start new work, re-run it, or kill anything — report only.`;
+    const fired = this.fireUserMessage(prompt);
+    this._bgWorkEscalations = 1;
+    this._logEvent('cli-bg-work-stall-selfcheck', { idle_ms: idleMs, shell_count: count, fired });
+  }
   async kill(reason = 'kill') {
     if (this.closed) return;
     // Parity P19: re-entry guard for concurrent kill() calls. Mirrors
@@ -1606,6 +2528,22 @@ class CliProcess extends Process {
   _handleHookEvent(ev) {
     if (!ev || typeof ev !== 'object') return;
+    // rc.16 observability: emit once when the FIRST hook event arrives for
+    // this session, confirming the claude→ndjson→tail pipeline is actually
+    // flowing. The 2026-06-02 stuck turn had a session whose hook ndjson was
+    // 0 bytes — claude emitted no hooks polygram could see, so no Stop ever
+    // arrived to finalize the turn. Without this signal that's invisible: a
+    // turn that hangs with NO `cli-hook-stream-live` for its session means the
+    // hook pipeline is dead for it (distinct from "Stop fired but wasn't
+    // acted on", which `cli-turn-resolved-by-stop` now covers).
+    if (!this._sawHookStream) {
+      this._sawHookStream = true;
+      this._logEvent('cli-hook-stream-live', {
+        session_id: this.claudeSessionId,
+        first_event: ev.type,
+      });
+    }
     // 0.12 Phase 1.8 (Finding 0.4.A): per-event lag measurement.
     // polygram_received_at_ms is stamped by the helper subprocess at write
     // time; subtracting from Date.now() gives the helper-write → tail-emit
@@ -1627,11 +2565,57 @@ class CliProcess extends Process {
       });
     }
+    // 0.13 D1: every hook event is same-session ACTIVITY for the finalizer
+    // ladder (generalizes the 2026-06-08 WA-topic fix, which only extended on
+    // Pre/PostToolUse) — EXCEPT Stop, which is a terminal signal, not work:
+    // noting it as activity would cancel its own attribution grace. parse-error
+    // and unknown are excluded too (stream noise is not evidence of work).
+    if (ev.type === 'Stop') {
+      this._lastHookEventAt = Date.now();
+    } else if (ev.type && ev.type !== 'parse-error' && ev.type !== 'unknown') {
+      this._lastHookEventAt = Date.now();
+      this._noteActivity(`hook:${ev.type}`);
+    }
     switch (ev.type) {
       case 'UserPromptSubmit':
+        // 0.13 D1 seen-slice: the UPS prompt carries the bridge-authored
+        // <channel turn_id="…"> envelope (P0 spike Q1) — parse it (anchored on
+        // the raw tag prefix, see UPS_ENVELOPE_TURN_ID_RE) and mark the
+        // matching pending as picked-up. `seen` is what lets rung 1 tell this
+        // cycle's Stop from a foreign cycle's. Never log prompt content (L13).
+        let anchorMsgId = null;
+        if (typeof ev.prompt === 'string' && ev.prompt) {
+          for (const m of ev.prompt.matchAll(UPS_ENVELOPE_TURN_ID_RE)) {
+            const seenPending = this.pendingTurns.get(m[1]);
+            if (seenPending && seenPending.seen !== true) {
+              seenPending.seen = true;
+              this._logEvent('cli-ups-seen', { turn_id: m[1] });
+            }
+            // 0.13 D2: pickup transitions the ledger entry too — for injected
+            // (no-pending) inputs this is THE fold/next-cycle signal that
+            // cancels drop detection; for primaries it cancels the delivery
+            // watchdog. A late pickup (queued inject becoming the next cycle)
+            // landing inside the drop-confirm window cancels it here.
+            const lEntry = this.inputLedger.get(m[1]);
+            if (lEntry) {
+              if (lEntry.state === 'written' || lEntry.state === 'fold-suspected') {
+                this._ledgerTransition(m[1], 'seen');
+                if (!seenPending) this._logEvent('cli-ups-seen', { turn_id: m[1] });
+              }
+              // 0.13 D3: the picked-up message anchors the cycle's visuals.
+              if (anchorMsgId == null && lEntry.msgId != null) anchorMsgId = lEntry.msgId;
+            }
+          }
+        }
         this.emit('turn-start', {
           backend: this.backend,
           sessionId: this.claudeSessionId,
+          // 0.13 D3: lets the session feedback controller distinguish a
+          // normal turn (has pending — per-turn visuals own it) from an
+          // autonomous/injected cycle (no pending — the controller's job).
+          hasPending: this.pendingTurns.size > 0,
+          anchorMsgId,
         });
         return;
@@ -1714,15 +2698,94 @@ class CliProcess extends Process {
         return;
       }
-      case 'Stop':
-        // Phase 1.7 (TODO) will use this as the authoritative turn-end
-        // signal with stopGraceMs. For now: pass through as 'stop-hook'
-        // event so the resolver in Phase 1.7 can subscribe.
-        this.emit('stop-hook', {
+      case 'Stop': {
+        // 0.13 D1 rung 1: Stop ends the turn ONLY when the ending cycle is
+        // attributable to it. Stop carries no turn_id, and claude-side cycles
+        // polygram never registered a pending for are routine (/compact +
+        // bg-work self-checks via fireUserMessage, ScheduleWakeup cycles, a
+        // non-folded inject running as its own cycle — the P0 spike confirmed
+        // such cycles DO fire Stop). Pre-D1 the rc.16 branch finalized the
+        // single pending on ANY Stop — a foreign cycle's Stop could close a
+        // queued, never-picked-up user turn and deliver the FOREIGN cycle's
+        // last_assistant_message as its answer (seam S5's Stop-identity gap).
+        const info = {
           stopHookActive: ev.stopHookActive,
           lastAssistantMessage: ev.lastAssistantMessage,
           backend: this.backend,
-        });
+        };
+        // Legacy (rung 3) turns already resolving via a reply quiet-window
+        // consume this via their per-turn onStop listener (the text-fallback
+        // rescue inside _resolveTurn). Emit first so that path runs
+        // synchronously before the attribution branch below.
+        this.emit('stop-hook', info);
+        // A stop-hook-forced continuation means the cycle is, by definition,
+        // NOT over — never finalize on it. (Unobserved in 30d of prod data;
+        // cheap insurance per the design's round-2 review.)
+        if (ev.stopHookActive === true) {
+          this._logEvent('cli-stop-hook-active-ignored', { pending_count: this.pendingTurns.size });
+          return;
+        }
+        if (this.pendingTurns.size === 1) {
+          const [turnId, p] = [...this.pendingTurns.entries()][0];
+          if (!p._stopGracePending) {
+            const attributed = p.seen === true || (p.replies?.length || 0) > 0;
+            if (attributed) {
+              // Finalize through a short grace; any same-session activity
+              // inside it proves this Stop was stale/foreign (lagged ndjson
+              // delivery) and cancels — the turn falls back to rung 2.
+              this._beginAttributedStopGrace(turnId, p, info);
+            } else {
+              // Never picked up (no UPS-seen) and never replied — this Stop
+              // belongs to a foreign cycle. Ignore it loudly; the pending
+              // ends via its own pickup→Stop, rung 2, or the ceilings.
+              this._logEvent('cli-stop-foreign', {
+                turn_id: turnId,
+                session_id: this.claudeSessionId,
+              });
+            }
+          }
+        } else if (this.pendingTurns.size > 1) {
+          // Can't attribute Stop to one of several concurrent turns — surface
+          // it so a turn that waited for its grace timer (instead of resolving
+          // on Stop) is explained in the events DB.
+          this._logEvent('cli-stop-unattributed', { pending_count: this.pendingTurns.size });
+        }
+        // 0.12.0-rc.13 proactive compaction warning: on turn-end, if enabled
+        // for this chat and not already warned this climb, sample context
+        // occupancy from the transcript and warn (propose /compact) BEFORE
+        // claude auto-compacts mid-turn and detaches the bridge. Fire-and-
+        // forget — transcript IO must never block the stop path.
+        if (this.compactionWarn?.enabled && !this._compactionWarned && ev.transcriptPath) {
+          this._maybeProactiveCompactionWarn(ev.transcriptPath);
+        }
+        return;
+      }
+      case 'PreCompact':
+        // 0.12.0-rc.13: auto-compaction is the event that detaches the
+        // channels MCP bridge mid-turn. Record it; and on the dangerous AUTO
+        // case (manual /compact is the user's own deliberate action — never
+        // nag), emit a reactive warning the chat layer posts. The proactive
+        // warning (on Stop) tries to PREVENT this; this is the backstop.
+        this._logEvent('cli-compaction-imminent', { trigger: ev.trigger });
+        if (this.compactionWarn?.enabled && ev.trigger === 'auto') {
+          this.emit('compaction-warn', {
+            kind: 'reactive',
+            trigger: 'auto',
+            sessionId: this.claudeSessionId,
+            backend: this.backend,
+          });
+        }
+        return;
+      case 'PostCompact':
+        // Context just dropped — re-arm the proactive warn-once so the next
+        // climb can warn again.
+        this._compactionWarned = false;
+        this._logEvent('cli-compaction-done', { trigger: ev.trigger });
         return;
       case 'Notification':
@@ -1761,15 +2824,22 @@ class CliProcess extends Process {
         {
           const requestId = ev.toolUseId || `hook-notification-${Date.now()}`;
           const toolName = ev.toolName;
-          const toolInput = this._formatToolInputForApproval(
-            ev.prompt || null,
-            // Use the structured tool_input as the "preview" — it's
-            // already structured by claude rather than truncated to
-            // 200 chars like the channels bridge perm_req does.
-            typeof ev.toolInput === 'string'
-              ? ev.toolInput
-              : JSON.stringify(ev.toolInput || {}),
-          );
+          // Finding #11 fix: pass the STRUCTURED tool_input through. makeCanUseTool
+          // matches gated patterns via matchesAnyPattern, which reads
+          // input.command (Bash) / input.url (WebFetch) — a formatted STRING
+          // makes those undefined so a gated `Bash(rm *)` never matches and the
+          // tool is allowed with NO approval card (silent gating bypass). The
+          // hook Notification payload carries structured tool_input, so forward
+          // it as-is; the approval card (approvalCardText) renders a structured
+          // object fine — same shape the SDK canUseTool path already uses. Fall
+          // back to the formatted-string preview only if claude sent no
+          // structured tool_input (degenerate — tool needs perm but no input).
+          const toolInput = (ev.toolInput && typeof ev.toolInput === 'object')
+            ? ev.toolInput
+            : this._formatToolInputForApproval(
+                ev.prompt || null,
+                typeof ev.toolInput === 'string' ? ev.toolInput : JSON.stringify(ev.toolInput || {}),
+              );
           this.emit('approval-required', {
             id: requestId,
             toolName,
@@ -1843,11 +2913,11 @@ class CliProcess extends Process {
    * landing just before the disconnect would otherwise leave a stray
    * timer on the dead instance).
    */
-  _handleBridgeDisconnected() {
+  _handleBridgeDisconnected(reason = 'socket-close') {
     this.bridgeReady = false;
     this.mcpReady = false;
     if (this.closed) return;
-    this.logger.warn?.(`[${this.label}] channels: bridge disconnected unexpectedly`);
+    this.logger.warn?.(`[${this.label}] channels: bridge disconnected unexpectedly (${reason})`);
     // L6: clear the interrupt grace timer alongside the rest of the lifecycle.
     if (this._interruptGraceTimer) {
       clearTimeout(this._interruptGraceTimer);
@@ -1859,6 +2929,7 @@ class CliProcess extends Process {
       if (pending.hardTimer) clearTimeout(pending.hardTimer);
       if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
       if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer);
+      if (pending._activityQuietTimer) clearTimeout(pending._activityQuietTimer);   // 0.13 D1
       // L5: remove the per-turn stop-hook listener (this path bypasses
       // Process.kill()'s removeAllListeners).
       if (pending._onStop) this.off('stop-hook', pending._onStop);
@@ -1869,14 +2940,24 @@ class CliProcess extends Process {
     this.pendingTurns.clear();
     this.pendingQueue.length = 0;
     this.inFlight = false;
+    // 0.12: drop the interactive-question keep-alive here too, for parity with
+    // _doKill — pm reacts to 'bridge-disconnected' by killing us anyway, but don't
+    // depend on that ordering to stop the 60s interval / clear the open set.
+    this._stopQuestionKeepAlive();
+    this._openQuestions.clear();
+    this._clearLedgerTimers();       // 0.13 D2
     this.emit('bridge-disconnected');
-    this._logEvent('bridge-disconnected', { reason: 'socket-close' });
+    this._logEvent('bridge-disconnected', { reason });
   }
   async _doKill(reason) {
     this.closed = true;
     this.inFlight = false;
+    this._stopQuestionKeepAlive();   // 0.12: drop the interactive-question keep-alive
+    this._openQuestions.clear();
+    this._clearLedgerTimers();       // 0.13 D2
     if (this.pingTimer) {
       clearInterval(this.pingTimer);
       this.pingTimer = null;
@@ -1896,6 +2977,7 @@ class CliProcess extends Process {
       if (pending.hardTimer) clearTimeout(pending.hardTimer);
       if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
       if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer);
+      if (pending._activityQuietTimer) clearTimeout(pending._activityQuietTimer);   // 0.13 D1
       if (pending._onStop) this.off('stop-hook', pending._onStop); // L5
       const err = new Error(`session killed: ${reason}`);
       err.code = 'KILLED';
@@ -1995,9 +3077,15 @@ class CliProcess extends Process {
    * @param {string|number} [opts.msgId] — inbound Telegram msg_id, passed through to the bridge so claude's next reply can echo it via turn_id
    * @returns {boolean}
    */
-  injectUserMessage({ content, priority = 'next', shouldQuery, msgId } = {}) {
+  injectUserMessage({ content, priority = 'next', shouldQuery, msgId, source = 'inject' } = {}) {
     if (this.closed) return false;
     if (!this.inFlight) return false;                // base contract: no live turn → caller falls through
+    // C5 (review 2026-06-12): a cancel is in flight (interrupt grace armed) —
+    // inFlight is still true until the grace fires, but merging a follow-up into
+    // work the user just stopped is wrong AND leaks a fresh 'written' ledger
+    // entry the cancel-loop already passed (later re-delivery). Refuse so the
+    // caller queues it as a fresh primary turn instead.
+    if (this._interruptGraceTimer) return false;
     if (!this.bridgeReady) return false;
     if (typeof content !== 'string' || !content) return false;
@@ -2026,9 +3114,14 @@ class CliProcess extends Process {
       this.emit('inject-fail', { err: 'bridge write failed', source: 'inject' });
       return false;
     }
+    // 0.13 D2: the injected turn_id is LEDGERED — pre-P3 it never escaped this
+    // function, making fold/new-turn/drop indistinguishable (seam S4).
+    this._ledgerAdd(turnId, { source, msgId });
     this._logEvent('inject-user-message', {
       session_key: this.sessionKey,
       chat_id: this.chatId,
+      turn_id: turnId,
+      source,
       priority: priority ?? null,
       should_query: shouldQuery ?? null,
       text_len: safeContent.length,
@@ -2045,7 +3138,8 @@ class CliProcess extends Process {
   /**
    * Review AC7: fire-and-forget user-message into the bridge. Polygram's
-   * slash-command paths (/compact, /reload) use this to push a user-shaped
+   * /compact path, the boot-time compact-replay, and the bg-work stall
+   * self-check use this to push a user-shaped
    * prompt without registering a pending turn. SDK/tmux implement this
    * differently per backend; channels just writes a user_msg to the bridge
    * with a fresh turn_id (which has no listener — so any reply Claude sends
@@ -2059,6 +3153,7 @@ class CliProcess extends Process {
     if (typeof text !== 'string' || text.length === 0) return false;
     if (this.closed || !this.bridgeReady) return false;
     const turnId = crypto.randomUUID();
+    this._ledgerAdd(turnId, { source: 'system' });   // 0.13 D2: visible, never redelivered
     this._writeToBridge({
       kind: 'user_msg',
       turn_id: turnId,
@@ -2092,6 +3187,7 @@ class CliProcess extends Process {
       if (pending.hardTimer) clearTimeout(pending.hardTimer);
       if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
       if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer); // L5
+      if (pending._activityQuietTimer) clearTimeout(pending._activityQuietTimer);   // 0.13 D1
       if (pending._onStop) this.off('stop-hook', pending._onStop);        // L5
       const err = new Error(`session reset: ${reason}`);
       err.code = 'RESET';
@@ -2237,6 +3333,49 @@ class CliProcess extends Process {
     this._writeToBridge({ kind: 'perm_verdict', request_id: requestId, behavior });
   }
+  // ─── interactive questions (0.12 ask) ─────────────────────────────
+  /**
+   * Hand a question's answer back to the blocking `ask` tool call. `result` is
+   * {answers:[...]} | {cancelled:true} | {timedout:true}. Stops the keep-alive
+   * once no questions remain open. Called by pm.answerQuestion (from the handler).
+   */
+  writeQuestionAnswer(toolCallId, result) {
+    this._openQuestions.delete(toolCallId);
+    const noneLeft = this._openQuestions.size === 0;
+    if (noneLeft) this._stopQuestionKeepAlive();
+    const wrote = this._writeToBridge({ kind: 'question_answer', tool_call_id: toolCallId, result: result ?? {} });
+    // Re-light progress: claude is about to resume working on the answer. The per-turn reactor
+    // cleared when claude posted its reply + asked, and no tool hooks fired during the wait, so
+    // it stayed cleared — the post-answer work was invisible ("why don't I see it working after
+    // submit?", hire topic 2026-06-09). On a REAL answer (cancelled/timeout END the turn → let
+    // the normal teardown clear), signal polygram to re-arm the turn's working reaction.
+    if (noneLeft && result && !result.cancelled && !result.timedout) {
+      this.emit('question-resumed');
+    }
+    // 0.13 D1: the wait is over either way — restart the activity clock so a
+    // replied turn's rung-2 finalize resumes (real answer: claude works on;
+    // cancelled/timedout: claude wraps up — rung 2 then ends the tail cleanly).
+    if (noneLeft) this._noteActivity('question-answered');
+    return wrote;
+  }
+  _startQuestionKeepAlive() {
+    if (this._questionKeepAliveTimer) return;
+    this._questionKeepAliveTimer = setInterval(() => {
+      if (this._openQuestions.size === 0) { this._stopQuestionKeepAlive(); return; }
+      // claude is idle waiting on the answer → no tool hooks → reset the idle
+      // ceiling so the turn isn't killed mid-question. (Rung 2 is suspended
+      // while a question is open, so this only feeds the hardTimer.)
+      this._noteActivity('question-keepalive');
+    }, 60_000);
+    this._questionKeepAliveTimer.unref?.();
+  }
+  _stopQuestionKeepAlive() {
+    if (this._questionKeepAliveTimer) { clearInterval(this._questionKeepAliveTimer); this._questionKeepAliveTimer = null; }
+  }
   // ─── socket plumbing ──────────────────────────────────────────────
   _writeToBridge(obj) {
@@ -2280,6 +3419,11 @@ class CliProcess extends Process {
       this._pollMidTurnDialogs().catch((err) => {
         this.logger.warn?.(`[${this.label}] channels: mid-turn poll failed: ${err.message}`);
       });
+      // 0.12.0 background-work lifecycle: idle-side stall-watchdog, the mirror of
+      // _pollMidTurnDialogs (which only runs during turns). Fire-and-forget.
+      this._pollBackgroundWork().catch((err) => {
+        this.logger.warn?.(`[${this.label}] channels: bg-work poll failed: ${err.message}`);
+      });
     }, PONG_CHECK_INTERVAL_MS);
     this.pongWatchdog.unref?.();
   }
@@ -2305,9 +3449,46 @@ class CliProcess extends Process {
    * Extracted as a separate async method so unit tests can drive it
    * directly without waiting for the setInterval tick.
    */
+  /**
+   * 0.12.0-rc.13: proactive compaction warning. Read the transcript's current
+   * context occupancy and, if past the per-chat threshold, emit a
+   * 'compaction-warn' the chat layer turns into "you're ~N% full, run
+   * /compact" — giving the user a window to compact on their terms BEFORE
+   * claude auto-compacts mid-turn (which detaches the channels bridge). Warns
+   * once per climb (this._compactionWarned), re-armed on PostCompact.
+   * Fire-and-forget: swallows its own errors so transcript IO never breaks
+   * the turn-end path.
+   */
+  async _maybeProactiveCompactionWarn(transcriptPath) {
+    try {
+      if (!this.compactionWarn?.enabled || this._compactionWarned) return;
+      const usage = await readContextTokens(transcriptPath);
+      if (!usage) return;
+      const pct = contextPct(usage.total) * 100;
+      if (pct < this.compactionWarn.thresholdPct) return;
+      if (this._compactionWarned) return;   // re-check after the async gap
+      this._compactionWarned = true;
+      this.emit('compaction-warn', {
+        kind: 'proactive',
+        pct: Math.round(pct),
+        totalTokens: usage.total,
+        sessionId: this.claudeSessionId,
+        backend: this.backend,
+      });
+    } catch (err) {
+      this.logger.warn?.(`[${this.label}] compaction-warn sample failed: ${err.message}`);
+    }
+  }
   async _pollMidTurnDialogs() {
     if (this.closed) return;
     if (this.pendingTurns.size === 0) return;        // no work to do when idle
+    // 0.12 interactive questions: while an `ask` is open claude sits idle at the
+    // prompt waiting on the tool result — so the pane shows no "esc to interrupt"
+    // and the question's own echoed text (a "?"/numbered list/"Yes/No") would
+    // false-trip the unknown-prompt heuristic + starve the STALL heartbeat. The
+    // keyboard lives on Telegram; suppress the pane watchdog while a question is open.
+    if (this._openQuestions.size > 0) return;
     if (!this.tmuxSession) return;                   // pre-spawn / post-kill
     if (typeof this.runner?.captureWide !== 'function') return;
@@ -2323,6 +3504,15 @@ class CliProcess extends Process {
     }
     if (!pane) return;
+    // rc.14: removed the rc.11 pane-based "dead bridge" detection here. It
+    // matched the BENIGN banner "server:polygram-bridge  no MCP server
+    // configured with that name" — a cosmetic line that
+    // `--dangerously-load-development-channels` + `--strict-mcp-config` prints
+    // on EVERY healthy session (channel still delivers; reply tool still
+    // works). The matcher false-fired ~5s into every channels turn and killed
+    // healthy sessions. Real bridge loss is the socket-close path
+    // (_handleBridgeDisconnected), not anything observable in the pane.
     const now = Date.now();
     // 0.12 Phase 3.2: liveness heartbeat. The TUI prints "esc to interrupt"
@@ -2333,6 +3523,11 @@ class CliProcess extends Process {
     // resets a timer; safe to fire on every poll while claude is busy.
     if (STREAMING_HINT_RE.test(pane)) {
       this.emit('thinking');
+      // 0.13 D1: the pane heartbeat is ACTIVITY for the finalizer ladder —
+      // pure-thinking stretches fire ZERO hooks for 45s+ (that is this
+      // heartbeat's whole reason to exist), so a hook-only quiet clock would
+      // finalize a replied turn mid-thought (round-2 panel finding).
+      this._noteActivity('pane-thinking');
     }
     let matchedKnownPrompt = false;
@@ -2359,16 +3554,28 @@ class CliProcess extends Process {
         pending_count: this.pendingTurns.size,
       });
-      if (prompt.action === 'enter') {
-        try {
-          await this.runner.sendControl(this.tmuxSession, 'Enter');
-        } catch (err) {
-          this.logger.warn?.(
-            `[${this.label}] cli: mid-turn dismiss-Enter failed for ${prompt.name}: ${err.message}`,
-          );
+      if (prompt.action === 'enter' || prompt.action === 'keys') {
+        // 'keys' sends a navigation sequence (e.g. Down,Enter to pick a
+        // non-default dialog option); 'enter' stays the single-key dismissal.
+        const keySeq = prompt.action === 'keys' ? prompt.keys : ['Enter'];
+        for (let ki = 0; ki < keySeq.length; ki++) {
+          if (ki > 0) await new Promise(r => setTimeout(r, 120));   // Ink can swallow same-batch keys
+          try {
+            await this.runner.sendControl(this.tmuxSession, keySeq[ki]);
+          } catch (err) {
+            this.logger.warn?.(
+              `[${this.label}] cli: mid-turn ${keySeq[ki]} failed for ${prompt.name}: ${err.message}`,
+            );
+          }
         }
       }
       // 'emit-only': telemetry-only; operator decides next step.
+      // Resume-dialog fix: the session-age dialog escaping to MID-TURN means
+      // env suppression failed AND the startup gate didn't see it — same
+      // soak-queryable event kind as the startup-gate fallback.
+      if (prompt.name === 'session-age') {
+        this._logEvent('session-age-dialog-fallback', { tmux_name: this.tmuxSession, phase: 'mid-turn' });
+      }
     }
     // 0.12 Phase 3.3 (Q1 resolution): unknown-prompt heuristic. If the pane