polygram 0.12.0-rc.8 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.example.json +4 -3
- package/lib/claude-bin.js +14 -1
- package/lib/compaction-warn.js +59 -0
- package/lib/context-usage.js +93 -0
- package/lib/db.js +1 -1
- package/lib/error/classify.js +33 -10
- package/lib/feedback/session-feedback.js +91 -0
- package/lib/handlers/abort.js +87 -40
- package/lib/handlers/autosteer.js +4 -0
- package/lib/handlers/config-callback.js +25 -6
- package/lib/handlers/config-ui.js +39 -10
- package/lib/handlers/dispatcher.js +83 -0
- package/lib/handlers/download.js +101 -58
- package/lib/handlers/drop-redeliver.js +69 -0
- package/lib/handlers/edit-correction.js +2 -0
- package/lib/handlers/edit-redelivery.js +136 -0
- package/lib/handlers/gate-inbound.js +188 -0
- package/lib/handlers/questions.js +289 -0
- package/lib/handlers/redeliver.js +122 -0
- package/lib/handlers/slash-commands.js +43 -30
- package/lib/history-preload.js +6 -0
- package/lib/history.js +7 -1
- package/lib/model-costs.js +4 -0
- package/lib/process/channels-bridge-protocol.js +22 -1
- package/lib/process/channels-bridge.mjs +128 -7
- package/lib/process/channels-tool-dispatcher.js +105 -12
- package/lib/process/cli-process.js +1277 -70
- package/lib/process/hook-event-tail.js +7 -0
- package/lib/process/hook-settings.js +7 -0
- package/lib/process/process.js +22 -0
- package/lib/process-guard.js +57 -1
- package/lib/process-manager.js +120 -35
- package/lib/questions/questions.js +187 -0
- package/lib/questions/store.js +105 -0
- package/lib/rewind/execute.js +89 -0
- package/lib/rewind/fork.js +112 -0
- package/lib/rewind/rewind.js +174 -0
- package/lib/sdk/callbacks.js +165 -167
- package/lib/session-key.js +29 -0
- package/lib/telegram/album-reactions.js +50 -0
- package/lib/telegram/parse.js +9 -2
- package/lib/telegram/typing.js +17 -2
- package/lib/tmux/startup-gate.js +44 -14
- package/migrations/012-pending-questions.sql +30 -0
- package/package.json +1 -1
- package/polygram.js +224 -78
|
@@ -53,6 +53,8 @@ const { createHookTail } = require('./hook-event-tail');
|
|
|
53
53
|
// /private/tmp drift — one of the original Music-topic failures).
|
|
54
54
|
const { DEFAULT_ATTACHMENT_BASE } = require('./channels-tool-dispatcher');
|
|
55
55
|
const { resolveFileCaps } = require('../attachments');
|
|
56
|
+
const { resolveCompactionWarnConfig } = require('../compaction-warn');
|
|
57
|
+
const { readContextTokens, contextPct } = require('../context-usage');
|
|
56
58
|
const { runStartupGate } = require('../tmux/startup-gate');
|
|
57
59
|
const { POLYGRAM_DISPLAY_HINT } = require('../telegram/display-hint');
|
|
58
60
|
|
|
@@ -70,6 +72,30 @@ const DEFAULT_MCP_READY_TIMEOUT_MS = 5_000;
|
|
|
70
72
|
// Mirrors rc.41 H4 stopGraceMs from tmux backend. 2s default = same as tmux.
|
|
71
73
|
const DEFAULT_STOP_GRACE_MS = 2_000;
|
|
72
74
|
const DEFAULT_TURN_QUIET_MS = 2_000; // after first reply, wait this long for more before resolving turn
|
|
75
|
+
// 0.13 D1 rung 2 (docs/0.13-channels-lifecycle-design.md §3 D1): once a turn has
|
|
76
|
+
// ≥1 delivered reply AND the hook stream is live, the turn finalizes when the
|
|
77
|
+
// session's whole ACTIVITY surface (hook events + the pane "esc to interrupt"
|
|
78
|
+
// thinking heartbeat + bridge tool calls + replies) goes quiet for this long.
|
|
79
|
+
// Calibrated against the busy-phase inter-activity gap: the pane heartbeat fires
|
|
80
|
+
// on the 5s pong tick while a turn is pending, so a live claude can never be
|
|
81
|
+
// "activity-quiet" — only a truly ended (or hook-and-pane-dead) tail is.
|
|
82
|
+
const DEFAULT_ACTIVITY_QUIET_MS = 18_000;
|
|
83
|
+
// 0.13 D2 (P3): InputLedger windows. dropConfirm = how long after the trigger
|
|
84
|
+
// cycle's end an unseen/unacked non-primary entry may still be picked up as a
|
|
85
|
+
// claude-side next cycle before it is declared dropped (late seen/ack cancels).
|
|
86
|
+
// deliveryWatchdog = the primary pickup window: a dispatched primary with no
|
|
87
|
+
// UPS and ZERO session activity gets one idempotent re-write, then (still
|
|
88
|
+
// nothing) a bridge teardown onto the existing recovery path.
|
|
89
|
+
const DEFAULT_DROP_CONFIRM_MS = 20_000;
|
|
90
|
+
const DEFAULT_DELIVERY_WATCHDOG_MS = 10_000;
|
|
91
|
+
const INPUT_LEDGER_CAP = 64;
|
|
92
|
+
// 0.13 D1 P1 seen-slice: parse the pickup turn_id out of the UserPromptSubmit
|
|
93
|
+
// prompt. Anchored on the RAW `<channel ` tag prefix — the bridge body-escape
|
|
94
|
+
// (channels-bridge.mjs escapeChannelBody) turns every user-authored `<` into
|
|
95
|
+
// `<`, so a raw tag prefix is bridge-authored by construction and a pasted/
|
|
96
|
+
// spoofed `turn_id="…"` in message body text can never mark a pending seen.
|
|
97
|
+
// (Envelope shape verified from prod JSONL + the P0 spike — Q1.)
|
|
98
|
+
const UPS_ENVELOPE_TURN_ID_RE = /<channel\s[^>]*turn_id="([0-9a-f-]{36})"/g;
|
|
73
99
|
const DEFAULT_TURN_TIMEOUT_MS = 600_000; // 10 min idle cap (resets on each reply — Review F#13)
|
|
74
100
|
const DEFAULT_TURN_ABSOLUTE_MS = 1_800_000; // 30 min absolute wall-clock ceiling (no reset)
|
|
75
101
|
const DEFAULT_INTERRUPT_GRACE_MS = 5_000; // after Ctrl-C, wait this long for Claude to ack before synthesizing 'interrupted'
|
|
@@ -96,7 +122,10 @@ const DEFAULT_QUEUE_CAP = 50; // Parity P2: match SDK/tmux pendin
|
|
|
96
122
|
// catalog when new dialogs are observed in production.
|
|
97
123
|
const SESSION_AGE_PROMPT_RE = /Resuming the full session[\s\S]*Resume from summary/i;
|
|
98
124
|
const MID_TURN_PROMPTS = [
|
|
99
|
-
|
|
125
|
+
// Review F2 (resume-dialog fix): bare Enter selects the pre-selected
|
|
126
|
+
// "Resume from summary" — which literally runs /compact. Navigate to
|
|
127
|
+
// "Resume full session as-is" instead, same as the startup-gate trigger.
|
|
128
|
+
{ name: 'session-age', regex: SESSION_AGE_PROMPT_RE, action: 'keys', keys: ['Down', 'Enter'] },
|
|
100
129
|
];
|
|
101
130
|
|
|
102
131
|
// 0.12 Phase 3.2 (Finding 0.1.A): rc.45 esc-to-interrupt liveness heartbeat.
|
|
@@ -111,6 +140,27 @@ const MID_TURN_PROMPTS = [
|
|
|
111
140
|
// hook process.
|
|
112
141
|
const STREAMING_HINT_RE = /esc to interrupt/i;
|
|
113
142
|
|
|
143
|
+
// 0.12.0 background-work lifecycle: claude's TUI mode line shows a live
|
|
144
|
+
// background-shell COUNT while a `run_in_background:true` Bash outlives its turn,
|
|
145
|
+
// e.g. `⏵⏵ bypass permissions on · 1 shell · ← for agents · ↓ to manage`.
|
|
146
|
+
// Confirmed on claude 2.1.158 (P0 spike — docs/0.12.0-background-work-lifecycle-
|
|
147
|
+
// plan.md): the count is always-present in the viewport mode line while shells run
|
|
148
|
+
// and clears IN-PLACE within ~3s when they exit (no stale scrollback).
|
|
149
|
+
//
|
|
150
|
+
// MODE-INDEPENDENT (prod regression fix, 2026-06-04): the original regex anchored
|
|
151
|
+
// on "auto mode on", but EVERY shumorobot session runs "⏵⏵ bypass permissions on"
|
|
152
|
+
// — the spike happened to be captured in auto mode. So the detector never matched
|
|
153
|
+
// in prod and bg-work-status fired zero times. Anchor instead on the `⏵⏵` mode-
|
|
154
|
+
// line glyph (present in auto / bypass / accept-edits modes alike); only the mode
|
|
155
|
+
// label between it and `· N shell` varies. Still matched only against the captured
|
|
156
|
+
// TAIL so a scrolled-off history line never trips it. R1: re-validate on each
|
|
157
|
+
// pinned-claude bump (glyph + `N shell` wording).
|
|
158
|
+
const BACKGROUND_SHELL_RE = /⏵⏵[^\n]*·\s*(\d+)\s+shells?\b/i;
|
|
159
|
+
// How long a detached background shell may run AFTER its turn resolved (claude
|
|
160
|
+
// idle) before the stall-watchdog fires one read-only self-check. Override via
|
|
161
|
+
// the constructor (tests use a small value).
|
|
162
|
+
const DEFAULT_BG_WORK_STALL_MS = 600_000; // 10 min
|
|
163
|
+
|
|
114
164
|
// 0.12 Phase 3.3 (Q1 resolution): heuristic for "looks like an unknown
|
|
115
165
|
// interactive prompt." Match common prompt shapes that don't appear in
|
|
116
166
|
// MID_TURN_PROMPTS — operator gets a telemetry event so they can decide
|
|
@@ -118,6 +168,17 @@ const STREAMING_HINT_RE = /esc to interrupt/i;
|
|
|
118
168
|
// — false positives surface as no-op telemetry, false negatives surface
|
|
119
169
|
// as the idle-ceiling timeout (~10min).
|
|
120
170
|
const UNKNOWN_PROMPT_HEURISTIC_RE = /(\?\s*$|\(y\/N\)|Yes\/No|❯\s|^\s*[12345]\.\s)/im;
|
|
171
|
+
// rc.14: a previous rc (rc.11) had a BRIDGE_DEAD_RE here that matched the pane
|
|
172
|
+
// line "server:polygram-bridge no MCP server configured with that name" and
|
|
173
|
+
// treated it as a dead bridge to recover from. That was a MISDIAGNOSIS: this
|
|
174
|
+
// line is a BENIGN, persistent banner that `--dangerously-load-development-
|
|
175
|
+
// channels` + `--strict-mcp-config` prints on EVERY healthy session — the
|
|
176
|
+
// channel still delivers messages and the reply tool still works (reproduced
|
|
177
|
+
// 2026-06-01 with a test MCP server that demonstrably functions). The pane
|
|
178
|
+
// matcher therefore false-fired ~5s into every channels turn and KILLED
|
|
179
|
+
// healthy sessions (the Music-topic "mid-turn detach" regression). Real bridge
|
|
180
|
+
// loss is caught by the socket-close path (bridgeServer 'bridge-disconnected'
|
|
181
|
+
// → _handleBridgeDisconnected). There is no reliable pane signal — removed.
|
|
121
182
|
// Per-pattern rate limit so a dialog that lingers across multiple polls
|
|
122
183
|
// doesn't spam sendControl/event emissions. Aligned with the 5s poll cadence.
|
|
123
184
|
const MID_TURN_DEDUP_WINDOW_MS = 30_000;
|
|
@@ -157,8 +218,12 @@ class CliProcess extends Process {
|
|
|
157
218
|
mcpReadyTimeoutMs = DEFAULT_MCP_READY_TIMEOUT_MS,
|
|
158
219
|
stopGraceMs = DEFAULT_STOP_GRACE_MS,
|
|
159
220
|
turnQuietMs = DEFAULT_TURN_QUIET_MS,
|
|
221
|
+
activityQuietMs = DEFAULT_ACTIVITY_QUIET_MS,
|
|
222
|
+
dropConfirmMs = DEFAULT_DROP_CONFIRM_MS,
|
|
223
|
+
deliveryWatchdogMs = DEFAULT_DELIVERY_WATCHDOG_MS,
|
|
160
224
|
turnTimeoutMs = DEFAULT_TURN_TIMEOUT_MS,
|
|
161
225
|
turnAbsoluteMs = DEFAULT_TURN_ABSOLUTE_MS,
|
|
226
|
+
bgWorkStallMs = DEFAULT_BG_WORK_STALL_MS,
|
|
162
227
|
interruptGraceMs = DEFAULT_INTERRUPT_GRACE_MS,
|
|
163
228
|
maxRepliesPerTurn = DEFAULT_MAX_REPLIES_PER_TURN,
|
|
164
229
|
queueCap = DEFAULT_QUEUE_CAP, // Parity P2
|
|
@@ -188,8 +253,12 @@ class CliProcess extends Process {
|
|
|
188
253
|
this.mcpReadyTimeoutMs = mcpReadyTimeoutMs;
|
|
189
254
|
this.stopGraceMs = stopGraceMs;
|
|
190
255
|
this.turnQuietMs = turnQuietMs;
|
|
256
|
+
this.activityQuietMs = activityQuietMs;
|
|
257
|
+
this.dropConfirmMs = dropConfirmMs;
|
|
258
|
+
this.deliveryWatchdogMs = deliveryWatchdogMs;
|
|
191
259
|
this.turnTimeoutMs = turnTimeoutMs;
|
|
192
260
|
this.turnAbsoluteMs = turnAbsoluteMs;
|
|
261
|
+
this.bgWorkStallMs = bgWorkStallMs;
|
|
193
262
|
this.interruptGraceMs = interruptGraceMs;
|
|
194
263
|
this.maxRepliesPerTurn = maxRepliesPerTurn;
|
|
195
264
|
this.queueCap = queueCap;
|
|
@@ -213,6 +282,15 @@ class CliProcess extends Process {
|
|
|
213
282
|
// interval fires bridge-disconnected if too much time elapses.
|
|
214
283
|
this.lastPongAt = 0;
|
|
215
284
|
this.pongWatchdog = null;
|
|
285
|
+
// 0.12.0 background-work stall-watchdog state. `_bgWorkSince` = when a live
|
|
286
|
+
// background shell was first observed while idle (null = none); reset only
|
|
287
|
+
// when the shell count returns to 0. `_bgWorkEscalations` caps the watchdog
|
|
288
|
+
// at one read-only self-check per continuous background-work window.
|
|
289
|
+
this._bgWorkSince = null;
|
|
290
|
+
this._bgWorkEscalations = 0;
|
|
291
|
+
// Visibility (Use 3): whether a "⏳ working in background" status message is
|
|
292
|
+
// currently shown, so we emit exactly one running→cleared pair per window.
|
|
293
|
+
this._bgWorkStatusShown = false;
|
|
216
294
|
// Review P2 ADV-6: token-bucket rate limit on Claude's reply tool calls.
|
|
217
295
|
// Without this, a prompt-injected or runaway Claude can fire reply() 1000×
|
|
218
296
|
// in a tight loop, flooding TG + saturating the daemon event loop.
|
|
@@ -236,6 +314,7 @@ class CliProcess extends Process {
|
|
|
236
314
|
// doesn't re-invoke the dispatcher → duplicate TG send. Set is bounded
|
|
237
315
|
// to RECENT_TOOL_CALL_LIMIT entries via FIFO eviction.
|
|
238
316
|
this.recentToolCallIds = new Set();
|
|
317
|
+
this.recentToolCallResults = new Map(); // tool_call_id → message_id (0.13: replay on re-ACK)
|
|
239
318
|
this.recentToolCallOrder = []; // FIFO bound
|
|
240
319
|
// Review F#17: per-pattern last-fired timestamp for the mid-turn dialog
|
|
241
320
|
// watchdog. Dedups within MID_TURN_DEDUP_WINDOW_MS so a lingering dialog
|
|
@@ -253,8 +332,32 @@ class CliProcess extends Process {
|
|
|
253
332
|
this.recentContentHashes = new Map(); // key → expiryTs
|
|
254
333
|
this.contentDedupWindowMs = 60_000;
|
|
255
334
|
|
|
256
|
-
// pending turn(s): turn_id → { resolve, reject, replies: [],
|
|
335
|
+
// pending turn(s): turn_id → { resolve, reject, replies: [], seen, quietTimer,
|
|
336
|
+
// hardTimer, absoluteTimer, _activityQuietTimer, startedAt }
|
|
257
337
|
this.pendingTurns = new Map();
|
|
338
|
+
// 0.13 D1: activity bookkeeping for the finalizer ladder. _lastHookEventAt
|
|
339
|
+
// feeds the rung-2 telemetry (hook-stalled discrimination); _lastActivityAt
|
|
340
|
+
// is the broader surface (hooks + pane heartbeat + bridge tool calls).
|
|
341
|
+
this._lastHookEventAt = 0;
|
|
342
|
+
this._lastActivityAt = 0;
|
|
343
|
+
// 0.13 D2: the InputLedger — every user-shaped input written to the bridge
|
|
344
|
+
// gets an observable lifecycle: written → seen → resolved | dropped |
|
|
345
|
+
// superseded | fold-suspected. Pre-P3, injectUserMessage minted a turn_id
|
|
346
|
+
// that never escaped the function (fold/new-turn/drop indistinguishable —
|
|
347
|
+
// seam S4; the #14 msg-2385 drop was invisible by construction).
|
|
348
|
+
// turn_id → { turnId, source, msgId, chatId, writtenAt, state, _dropTimer,
|
|
349
|
+
// _watchdogTimer, _rewritten }
|
|
350
|
+
this.inputLedger = new Map();
|
|
351
|
+
// Set whenever a reply carried the consumed_turn_ids contract field —
|
|
352
|
+
// the Tier 2C "contract observed" discriminator (P0 spike: incidental
|
|
353
|
+
// echo is trigger-only; without the contract a fold is indistinguishable
|
|
354
|
+
// from a drop, and auto-redelivering folds double-answers the common case).
|
|
355
|
+
this._lastAckFieldAt = 0;
|
|
356
|
+
// 0.12 interactive questions: tool_call_ids of `ask` calls awaiting an answer.
|
|
357
|
+
// While non-empty, the keep-alive interval resets the turn's idle ceiling (an
|
|
358
|
+
// idle `ask` fires no tool hooks, so _extendQuietOnToolActivity wouldn't run).
|
|
359
|
+
this._openQuestions = new Set();
|
|
360
|
+
this._questionKeepAliveTimer = null;
|
|
258
361
|
|
|
259
362
|
// File-send outbound cap (bot → user). Safe cloud default; overwritten in
|
|
260
363
|
// _spawnTmuxClaude with the backend/chat-resolved value before any turn.
|
|
@@ -494,9 +597,15 @@ class CliProcess extends Process {
|
|
|
494
597
|
// after this.
|
|
495
598
|
const topicConfig = opts.threadId && opts.chatConfig?.topics?.[opts.threadId];
|
|
496
599
|
const agent = topicConfig?.agent || opts.chatConfig?.agent || opts.agent;
|
|
497
|
-
const model =
|
|
498
|
-
const effort =
|
|
600
|
+
const model = this._resolveModel(opts);
|
|
601
|
+
const effort = this._resolveEffort(opts);
|
|
499
602
|
const resolvedCwd = topicConfig?.cwd || opts.chatConfig?.cwd || opts.cwd;
|
|
603
|
+
// Record the spawn-time model/effort. cli has no live model/effort swap
|
|
604
|
+
// (they are spawn-time --model / --effort flags), so getOrSpawn detects a
|
|
605
|
+
// /model or /effort drift against these and reloads — --resume preserves
|
|
606
|
+
// the conversation, the new flag takes effect. See wouldReloadFor.
|
|
607
|
+
this.model = model;
|
|
608
|
+
this.effort = effort;
|
|
500
609
|
|
|
501
610
|
// File-send outbound cap (bot → user). Backend-derived (cloud 50MB vs
|
|
502
611
|
// local Bot API server 2GB via opts.localApi) with per-topic/chat
|
|
@@ -510,6 +619,14 @@ class CliProcess extends Process {
|
|
|
510
619
|
override: _capOverride,
|
|
511
620
|
}).outBytes;
|
|
512
621
|
|
|
622
|
+
// 0.12.0-rc.13: per-chat/topic compaction warning (default OFF). Same
|
|
623
|
+
// topic→chat precedence as the file cap above. When enabled, the channels
|
|
624
|
+
// backend warns the chat as context fills (propose /compact at a break)
|
|
625
|
+
// and on auto-compaction (the event that detaches the bridge mid-turn).
|
|
626
|
+
const _compactionWarnRaw = topicConfig?.compactionWarnings ?? opts.chatConfig?.compactionWarnings;
|
|
627
|
+
this.compactionWarn = resolveCompactionWarnConfig({ compactionWarnings: _compactionWarnRaw });
|
|
628
|
+
this._compactionWarned = false; // proactive warn-once per climb; reset on PostCompact
|
|
629
|
+
|
|
513
630
|
// Parity audit P8 + rc.8 fs-guard (2026-05-26 shumorobot Music topic):
|
|
514
631
|
// `--session-id <id>` creates a NEW claude session with that id;
|
|
515
632
|
// `--resume <id>` resumes the EXISTING conversation. Lazy-respawn after
|
|
@@ -637,6 +754,44 @@ class CliProcess extends Process {
|
|
|
637
754
|
'as normal — only the FINAL user-visible message needs to go through',
|
|
638
755
|
'the reply tool.',
|
|
639
756
|
'',
|
|
757
|
+
'When you call `reply`, ALWAYS set `consumed_turn_ids` to the turn_id',
|
|
758
|
+
'attribute of EVERY <channel> message you are answering or have received',
|
|
759
|
+
'since your last reply — including mid-turn follow-ups you absorbed into',
|
|
760
|
+
'the current answer. polygram uses it to confirm follow-up delivery;',
|
|
761
|
+
'omitting it can cause a follow-up to be re-sent to you.',
|
|
762
|
+
'',
|
|
763
|
+
'### Staying responsive on a long task',
|
|
764
|
+
'',
|
|
765
|
+
'The user cannot see you working — no live typing reaches them. For any task',
|
|
766
|
+
'that takes more than a few seconds, send a SHORT status first via `reply`',
|
|
767
|
+
'(it returns a `message_id`), then call `mcp__polygram-bridge__edit_message`',
|
|
768
|
+
'with that `message_id` to update the SAME bubble as you make progress,',
|
|
769
|
+
'finishing with the result. One evolving message beats silence or a flood of',
|
|
770
|
+
'new ones.',
|
|
771
|
+
'',
|
|
772
|
+
'Write status in PLAIN, friendly language about what you are doing FOR THE',
|
|
773
|
+
'USER — never tool names or mechanics. Say "Checking your config now…", not',
|
|
774
|
+
'"Running Bash" or "Calling Read". If the final answer is long, send it as a',
|
|
775
|
+
'fresh `reply` rather than an edit (an edit is one single message bubble).',
|
|
776
|
+
'',
|
|
777
|
+
// TEMPORARY mitigation (2026-06-08 Shumabit@UMI wedge): AskUserQuestion opens
|
|
778
|
+
// a blocking TUI selection widget the channel can't answer → the session
|
|
779
|
+
// parks until manually Esc'd. REMOVE this whole rule when the rich
|
|
780
|
+
// question→Telegram-keyboard feature ships (see docs design); claude should
|
|
781
|
+
// then use the native question tool again. Tracked so it isn't forgotten.
|
|
782
|
+
'### Asking the user a question / offering choices — HARD RULE',
|
|
783
|
+
'',
|
|
784
|
+
'NEVER use the AskUserQuestion tool or any interactive menu / selection',
|
|
785
|
+
'widget. They open a blocking terminal prompt the user on Telegram CANNOT',
|
|
786
|
+
'see or navigate — it silently wedges the entire session until it is manually',
|
|
787
|
+
'cleared. (Rich tap-to-answer choices are coming; until then this is a hard rule.)',
|
|
788
|
+
'',
|
|
789
|
+
'To ask a multiple-choice question, a confirmation, or yes/no, call the',
|
|
790
|
+
'`mcp__polygram-bridge__ask` tool — it renders tap-to-answer inline buttons',
|
|
791
|
+
'(supports multiSelect via `multiSelect:true` and a free-text answer via',
|
|
792
|
+
'`allowOther:true`) and returns the user\'s selection(s) as the tool result.',
|
|
793
|
+
'Prefer `ask` over a typed numbered list whenever you are offering choices.',
|
|
794
|
+
'',
|
|
640
795
|
'### Sending FILES (tracks, images, docs) to the user',
|
|
641
796
|
'',
|
|
642
797
|
'The `mcp__polygram-bridge__reply` tool takes an optional `files` array of',
|
|
@@ -712,6 +867,20 @@ class CliProcess extends Process {
|
|
|
712
867
|
cwd: resolvedCwd || opts.cwd || process.cwd(),
|
|
713
868
|
command: this.claudeBin,
|
|
714
869
|
args: claudeArgs,
|
|
870
|
+
envExtras: {
|
|
871
|
+
// Resume-dialog suppression (docs/0.13-resume-dialog-fix-spec.md B1):
|
|
872
|
+
// claude's session-age "resume-return" dialog fires when sessionAge ≥
|
|
873
|
+
// this many minutes AND est. tokens ≥ CLAUDE_CODE_RESUME_TOKEN_THRESHOLD
|
|
874
|
+
// (defaults 70 / 1e5, binary-verified on 2.1.158). Its pre-selected
|
|
875
|
+
// option literally runs /compact — silently compacting every aged
|
|
876
|
+
// --resume (and breaking the /model "conversation kept" guarantee).
|
|
877
|
+
// A huge threshold (1 year) means the dialog never triggers and resume
|
|
878
|
+
// is always full-session-as-is. Per-process env — the operator's own
|
|
879
|
+
// interactive claude is untouched. Belt-and-braces: the session-age
|
|
880
|
+
// gate trigger below still navigates to "full" if a future binary bump
|
|
881
|
+
// renames this var.
|
|
882
|
+
CLAUDE_CODE_RESUME_THRESHOLD_MINUTES: '525600',
|
|
883
|
+
},
|
|
715
884
|
});
|
|
716
885
|
|
|
717
886
|
// Dialog handling (Phase 0 finding) — poll capture-pane and Enter through:
|
|
@@ -728,24 +897,46 @@ class CliProcess extends Process {
|
|
|
728
897
|
* lives in the shared helper.
|
|
729
898
|
*/
|
|
730
899
|
async _handleStartupDialogs(tmuxName) {
|
|
731
|
-
await runStartupGate({
|
|
900
|
+
const gateResult = await runStartupGate({
|
|
732
901
|
runner: this.runner,
|
|
733
902
|
tmuxName,
|
|
734
903
|
triggers: [
|
|
735
904
|
// Dev-channels confirmation — always fires under
|
|
736
905
|
// --dangerously-load-development-channels.
|
|
737
906
|
{ name: 'dev-channels', regex: /WARNING: Loading development channels/i, key: 'Enter' },
|
|
738
|
-
// Workspace trust prompt — fires on first-time cwd or untrusted.
|
|
739
|
-
|
|
740
|
-
//
|
|
741
|
-
//
|
|
742
|
-
//
|
|
743
|
-
|
|
744
|
-
//
|
|
745
|
-
//
|
|
746
|
-
|
|
907
|
+
// Workspace trust prompt — fires on first-time cwd or untrusted. claude
|
|
908
|
+
// 2.1.158 renders "Quick safety check: Is this a project you created or
|
|
909
|
+
// one you trust? … ❯ 1. Yes, I trust this folder" (Enter confirms the
|
|
910
|
+
// pre-selected "trust" option). The older "trust the files in this folder"
|
|
911
|
+
// wording is kept for back-compat; both anchor on "trust … this folder".
|
|
912
|
+
{ name: 'trust', regex: /trust (?:the files in )?this folder/i, key: 'Enter' },
|
|
913
|
+
// Review F#12 + 2026-06-11 resume-dialog fix: session-age
|
|
914
|
+
// "resume-return" prompt on aged sessions. Bare Enter selects the
|
|
915
|
+
// pre-selected "Resume from summary" — which literally runs /compact
|
|
916
|
+
// on the resumed session (silent context degradation; the original
|
|
917
|
+
// F#12 dismissal compacted every aged resume). Navigate to option 2
|
|
918
|
+
// "Resume full session as-is" instead. This is the FALLBACK path:
|
|
919
|
+
// spawn env (CLAUDE_CODE_RESUME_THRESHOLD_MINUTES above) suppresses
|
|
920
|
+
// the dialog entirely; this trigger firing at all means suppression
|
|
921
|
+
// failed (upstream renamed the env var?) — surfaced via the
|
|
922
|
+
// session-age-dialog-fallback event below.
|
|
923
|
+
{ name: 'session-age', regex: SESSION_AGE_PROMPT_RE, keys: ['Down', 'Enter'] },
|
|
747
924
|
],
|
|
748
|
-
|
|
925
|
+
// 2.1.173 reworked the channels UI banner (live-captured 2026-06-11):
|
|
926
|
+
// "Channels (experimental) messages from server:polygram-bridge inject
|
|
927
|
+
// directly in this session · …". Keep the 2.1.158 text too so a
|
|
928
|
+
// POLYGRAM_CLAUDE_BIN override to an older binary still gates correctly.
|
|
929
|
+
//
|
|
930
|
+
// 2026-06-12 (caught by the cancel-cheap E2E before prod): in 2.1.173
|
|
931
|
+
// the banner lives in a COLLAPSIBLE notice list — with ≥3 notices the
|
|
932
|
+
// pane shows "+N more · /status" and the banner is hidden, stalling a
|
|
933
|
+
// banner-only gate into a false CHANNELS_DIALOG_TIMEOUT. An interactive
|
|
934
|
+
// prompt footer ("(shift+tab to cycle)" / "? for shortcuts") with no
|
|
935
|
+
// pending dialog is equally READY: the gate's job is dialog navigation;
|
|
936
|
+
// channel liveness is separately guaranteed by mcp-ready (send() gate)
|
|
937
|
+
// + the delivery watchdog. Dialog panes render "Enter to confirm"
|
|
938
|
+
// instead of the footer, so the footer can't match mid-dialog.
|
|
939
|
+
readySignal: /(?:Listening for channel messages from:|Channels \(experimental\) messages from) server:polygram-bridge|shift\+tab to cycle|\? for shortcuts/i,
|
|
749
940
|
timeoutCode: 'CHANNELS_DIALOG_TIMEOUT',
|
|
750
941
|
// Progress-aware gate (shumorobot General incident 2026-05-30): a
|
|
751
942
|
// cold spawn that's mid-download (runtime fetch, "24%" progress bar)
|
|
@@ -754,11 +945,32 @@ class CliProcess extends Process {
|
|
|
754
945
|
// actively-changing pane (download bar, dialog nav) keeps resetting
|
|
755
946
|
// the stall clock and rides out to the ready signal. deadlineMs stays
|
|
756
947
|
// the absolute backstop. 30s of zero pane activity = genuinely wedged.
|
|
757
|
-
|
|
948
|
+
// Stall = pane rendered then went static (genuinely wedged). 60s, not
|
|
949
|
+
// 30s: some topics' TUIs cold-render slowly (Music ~45s, slow MCP
|
|
950
|
+
// startup) — 30s was too tight and false-aborted them. Blank panes
|
|
951
|
+
// don't arm the stall timer at all now (see runStartupGate), so this
|
|
952
|
+
// only bounds a TUI that rendered and then truly hung.
|
|
953
|
+
stallMs: this.startupGateStallMs ?? 60_000,
|
|
758
954
|
deadlineMs: this.startupGateDeadlineMs ?? 180_000,
|
|
955
|
+
// Review F4: fire-time, NOT gate-resolution — the 2026-06-10 incident
|
|
956
|
+
// matched session-age and THEN died (TMUX_SESSION_GONE), which a
|
|
957
|
+
// success-path check would miss. The dialog appearing AT ALL means the
|
|
958
|
+
// env suppression (CLAUDE_CODE_RESUME_THRESHOLD_MINUTES in
|
|
959
|
+
// _spawnTmuxClaude) stopped working — almost certainly an upstream
|
|
960
|
+
// rename on a binary bump. The gate handles it (full resume picked);
|
|
961
|
+
// this makes the regression visible.
|
|
962
|
+
onTrigger: (name) => {
|
|
963
|
+
if (name !== 'session-age') return;
|
|
964
|
+
this.logger.warn?.(
|
|
965
|
+
`[${this.label}] channels: session-age resume dialog appeared despite env suppression — ` +
|
|
966
|
+
'check CLAUDE_CODE_RESUME_THRESHOLD_MINUTES against the pinned claude binary',
|
|
967
|
+
);
|
|
968
|
+
this._logEvent('session-age-dialog-fallback', { tmux_name: tmuxName, phase: 'startup-gate' });
|
|
969
|
+
},
|
|
759
970
|
logger: this.logger,
|
|
760
971
|
label: `${this.label}:startup-gate`,
|
|
761
972
|
});
|
|
973
|
+
return gateResult;
|
|
762
974
|
}
|
|
763
975
|
|
|
764
976
|
// 0.12 Phase 1.6: TWO-handshake gate. The original implementation only
|
|
@@ -922,7 +1134,61 @@ class CliProcess extends Process {
|
|
|
922
1134
|
this.logger.warn?.(
|
|
923
1135
|
`[${this.label}] channels: duplicate tool_call_id=${msg.tool_call_id} — re-ACKing without dispatch`,
|
|
924
1136
|
);
|
|
925
|
-
|
|
1137
|
+
// 0.13: replay the cached message_id so a retried reply keeps its edit handle
|
|
1138
|
+
// (re-ACKing without it would null the handle → progressive status silently breaks).
|
|
1139
|
+
this._writeToBridge({ kind: 'tool_ack', tool_call_id: msg.tool_call_id, ok: true, message_id: this.recentToolCallResults.get(msg.tool_call_id) ?? null });
|
|
1140
|
+
return;
|
|
1141
|
+
}
|
|
1142
|
+
|
|
1143
|
+
// 0.13 D1: any bridge tool call is same-session activity (the reply tool's
|
|
1144
|
+
// own delivery additionally notes activity via _recordReplyForPendingTurn,
|
|
1145
|
+
// but Pre/PostToolUse hook lag is 250ms–5s — the socket message is the
|
|
1146
|
+
// earliest truthful signal claude is working).
|
|
1147
|
+
this._noteActivity('bridge-tool');
|
|
1148
|
+
|
|
1149
|
+
// 0.13 D2 Tier 2C: the consumed_turn_ids contract field — claude
|
|
1150
|
+
// acknowledges every <channel> message this reply covers (incl. folds the
|
|
1151
|
+
// incidental turn_id echo can't express; the reply schema carries ONE
|
|
1152
|
+
// turn_id). Acked entries can never be declared dropped.
|
|
1153
|
+
//
|
|
1154
|
+
// SECURITY (review 2026-06-12): gate the ack on chat_id matching this
|
|
1155
|
+
// session. The chat_id check lives further down (after dedup/rate-limit);
|
|
1156
|
+
// without this guard a reply carrying a FOREIGN chat_id but naming the live
|
|
1157
|
+
// turn here would mark it resolved/_consumedAcked + arm the finalizer —
|
|
1158
|
+
// "delivered" though nothing reached this chat. The actual reject still
|
|
1159
|
+
// happens at the chat_id guard below.
|
|
1160
|
+
const chatIdMatches = this.chatId == null || String(args.chat_id) === String(this.chatId);
|
|
1161
|
+
if (chatIdMatches && Array.isArray(args.consumed_turn_ids) && args.consumed_turn_ids.length) {
|
|
1162
|
+
this._ledgerAckConsumed(args.consumed_turn_ids.filter((x) => typeof x === 'string'));
|
|
1163
|
+
} else if (chatIdMatches && msg.name === 'reply' && 'consumed_turn_ids' in args) {
|
|
1164
|
+
this._lastAckFieldAt = Date.now(); // field present but empty — contract observed
|
|
1165
|
+
}
|
|
1166
|
+
|
|
1167
|
+
// 0.12 interactive questions: `ask` is a BLOCKING tool whose answer rides back
|
|
1168
|
+
// on a `question_answer` message (NOT tool_ack). Skip the reply-only paths
|
|
1169
|
+
// (content-dedup, rate-limit, the reply dispatcher) — just guard chat_id and
|
|
1170
|
+
// emit so polygram renders the keyboard; the answer is written later via
|
|
1171
|
+
// writeQuestionAnswer(). claude is now idle waiting on the result, so start a
|
|
1172
|
+
// keep-alive that resets the turn's idle ceiling (no tool hooks fire meanwhile).
|
|
1173
|
+
if (msg.name === 'ask') {
|
|
1174
|
+
if (this.chatId != null && args.chat_id != null && String(args.chat_id) !== String(this.chatId)) {
|
|
1175
|
+
this._writeToBridge({ kind: 'question_answer', tool_call_id: msg.tool_call_id, result: { cancelled: true, error: 'chat_id mismatch' } });
|
|
1176
|
+
return;
|
|
1177
|
+
}
|
|
1178
|
+
this._openQuestions.add(msg.tool_call_id);
|
|
1179
|
+
this._startQuestionKeepAlive();
|
|
1180
|
+
// 0.13 D1: waiting-on-user — claude is legitimately silent, so the
|
|
1181
|
+
// activity-quiet finalize must not run down while the keyboard is up.
|
|
1182
|
+
this._suspendActivityQuiet();
|
|
1183
|
+
this.emit('question-asked', {
|
|
1184
|
+
sessionKey: this.sessionKey,
|
|
1185
|
+
chatId: this.chatId,
|
|
1186
|
+
threadId: this.threadId,
|
|
1187
|
+
turnId: args.turn_id || null,
|
|
1188
|
+
toolCallId: msg.tool_call_id,
|
|
1189
|
+
questions: Array.isArray(args.questions) ? args.questions : [],
|
|
1190
|
+
backend: this.backend,
|
|
1191
|
+
});
|
|
926
1192
|
return;
|
|
927
1193
|
}
|
|
928
1194
|
|
|
@@ -931,15 +1197,15 @@ class CliProcess extends Process {
|
|
|
931
1197
|
// an isError ack). Window-based so legit repeat sends eventually pass.
|
|
932
1198
|
if (msg.name === 'reply' && typeof args.text === 'string' && args.chat_id != null) {
|
|
933
1199
|
const dedupKey = this._buildContentDedupKey(args.chat_id, args.text);
|
|
934
|
-
const
|
|
1200
|
+
const entry = this.recentContentHashes.get(dedupKey); // { expiry, message_id }
|
|
935
1201
|
const nowDedup = Date.now();
|
|
936
1202
|
// Evict stale entries opportunistically (avoids unbounded growth).
|
|
937
1203
|
if (this.recentContentHashes.size > 64) {
|
|
938
|
-
for (const [k,
|
|
939
|
-
if (
|
|
1204
|
+
for (const [k, e] of this.recentContentHashes) {
|
|
1205
|
+
if (e.expiry < nowDedup) this.recentContentHashes.delete(k);
|
|
940
1206
|
}
|
|
941
1207
|
}
|
|
942
|
-
if (
|
|
1208
|
+
if (entry && entry.expiry > nowDedup) {
|
|
943
1209
|
this.logger.warn?.(
|
|
944
1210
|
`[${this.label}] channels: duplicate content within ${this.contentDedupWindowMs}ms ` +
|
|
945
1211
|
`(new tool_call_id=${msg.tool_call_id}, hash=${dedupKey.slice(-12)}) — re-ACKing without dispatch`,
|
|
@@ -949,7 +1215,9 @@ class CliProcess extends Process {
|
|
|
949
1215
|
chat_id: args.chat_id,
|
|
950
1216
|
window_ms: this.contentDedupWindowMs,
|
|
951
1217
|
});
|
|
952
|
-
|
|
1218
|
+
// 0.13: replay the ORIGINAL bubble's message_id so a retried identical reply
|
|
1219
|
+
// keeps its edit handle (the slow-ack-retry case progressive status targets).
|
|
1220
|
+
this._writeToBridge({ kind: 'tool_ack', tool_call_id: msg.tool_call_id, ok: true, message_id: entry.message_id ?? null });
|
|
953
1221
|
return;
|
|
954
1222
|
}
|
|
955
1223
|
}
|
|
@@ -992,6 +1260,34 @@ class CliProcess extends Process {
|
|
|
992
1260
|
return;
|
|
993
1261
|
}
|
|
994
1262
|
|
|
1263
|
+
// Dropped-"4" fix A2 (docs/0.13-resume-dialog-fix-spec.md): resolve the
|
|
1264
|
+
// reply's originating TG message so the dispatcher has a target for solo
|
|
1265
|
+
// reactions (and reply-quoting). Resolution order strictly mirrors
|
|
1266
|
+
// _recordReplyForPendingTurn so quote/reaction attribution can never
|
|
1267
|
+
// disagree with reply attribution: echoed turn_id → InputLedger entry's
|
|
1268
|
+
// msgId (registered at send/inject time); no echo → the single pending
|
|
1269
|
+
// turn's ledger entry. Anything else stays null — an unattributable
|
|
1270
|
+
// reply must never react to / quote an unrelated message.
|
|
1271
|
+
//
|
|
1272
|
+
// Review F1: quote only the FIRST delivered reply per turn. On SDK,
|
|
1273
|
+
// deliverReplies fires once per turn → one quote; the channels dispatcher
|
|
1274
|
+
// fires per reply tool call, and an N-reply turn must not produce N
|
|
1275
|
+
// bubbles all quoting the same user message.
|
|
1276
|
+
let sourceMsgId = null;
|
|
1277
|
+
let sourceEntry = null;
|
|
1278
|
+
if (args.turn_id && this.inputLedger.has(args.turn_id)) {
|
|
1279
|
+
sourceEntry = this.inputLedger.get(args.turn_id);
|
|
1280
|
+
} else if (this.pendingTurns.size === 1) {
|
|
1281
|
+
const [[onlyTurnId]] = this.pendingTurns;
|
|
1282
|
+
sourceEntry = this.inputLedger.get(onlyTurnId) || null;
|
|
1283
|
+
}
|
|
1284
|
+
if (sourceEntry && !sourceEntry._quoteUsed) {
|
|
1285
|
+
// Review F6: ledger stores msgId stringified; every other delivery call
|
|
1286
|
+
// site passes numeric message_id — coerce rather than lean on TG leniency.
|
|
1287
|
+
const n = Number(sourceEntry.msgId);
|
|
1288
|
+
sourceMsgId = Number.isFinite(n) && n > 0 ? n : null;
|
|
1289
|
+
}
|
|
1290
|
+
|
|
995
1291
|
let result;
|
|
996
1292
|
try {
|
|
997
1293
|
result = await this.toolDispatcher({
|
|
@@ -1001,6 +1297,8 @@ class CliProcess extends Process {
|
|
|
1001
1297
|
toolName: msg.name,
|
|
1002
1298
|
text: args.text,
|
|
1003
1299
|
files: args.files,
|
|
1300
|
+
messageId: args.message_id, // 0.13: edit_message target bubble
|
|
1301
|
+
sourceMsgId, // reaction/quote target (A2)
|
|
1004
1302
|
sessionCwd: this.sessionCwd, // P0 #2: dispatcher uses this to allowlist file roots
|
|
1005
1303
|
maxOutboundFileBytes: this.maxOutboundFileBytes, // backend/chat-derived upload cap
|
|
1006
1304
|
});
|
|
@@ -1009,18 +1307,28 @@ class CliProcess extends Process {
|
|
|
1009
1307
|
return;
|
|
1010
1308
|
}
|
|
1011
1309
|
|
|
1012
|
-
|
|
1310
|
+
// Review F1: the quote target is spent once a reply actually delivered
|
|
1311
|
+
// with it. A FAILED delivery doesn't consume it — the retry still quotes.
|
|
1312
|
+
if (msg.name === 'reply' && result?.ok && sourceMsgId != null && sourceEntry) {
|
|
1313
|
+
sourceEntry._quoteUsed = true;
|
|
1314
|
+
}
|
|
1315
|
+
|
|
1316
|
+
// 0.13: carry the delivered message_id back so the bridge hands it to claude
|
|
1317
|
+
// (reply → edit_message progressive status).
|
|
1318
|
+
this._writeToBridge({ kind: 'tool_ack', tool_call_id: msg.tool_call_id, ok: !!result?.ok, error: result?.error, message_id: result?.message_id });
|
|
1013
1319
|
|
|
1014
1320
|
// P1 #7: remember the tool_call_id so duplicates re-ACK without dispatch.
|
|
1015
1321
|
// Only cache on SUCCESS — failed calls should be retryable (transient TG
|
|
1016
1322
|
// outage etc).
|
|
1017
1323
|
if (result?.ok && msg.tool_call_id) {
|
|
1018
1324
|
this.recentToolCallIds.add(msg.tool_call_id);
|
|
1325
|
+
this.recentToolCallResults.set(msg.tool_call_id, result.message_id ?? null); // 0.13: for re-ACK replay
|
|
1019
1326
|
this.recentToolCallOrder.push(msg.tool_call_id);
|
|
1020
1327
|
// FIFO eviction at cap
|
|
1021
1328
|
while (this.recentToolCallOrder.length > RECENT_TOOL_CALL_LIMIT) {
|
|
1022
1329
|
const evicted = this.recentToolCallOrder.shift();
|
|
1023
1330
|
this.recentToolCallIds.delete(evicted);
|
|
1331
|
+
this.recentToolCallResults.delete(evicted);
|
|
1024
1332
|
}
|
|
1025
1333
|
}
|
|
1026
1334
|
|
|
@@ -1028,7 +1336,9 @@ class CliProcess extends Process {
|
|
|
1028
1336
|
// NEW tool_call_id still dedups. TTL-based via expiry timestamp.
|
|
1029
1337
|
if (result?.ok && msg.name === 'reply' && typeof args.text === 'string' && args.chat_id != null) {
|
|
1030
1338
|
const dedupKey = this._buildContentDedupKey(args.chat_id, args.text);
|
|
1031
|
-
|
|
1339
|
+
// 0.13: store the delivered message_id alongside the expiry so a deduped retry
|
|
1340
|
+
// can replay it (keeps claude's edit handle for progressive status).
|
|
1341
|
+
this.recentContentHashes.set(dedupKey, { expiry: Date.now() + this.contentDedupWindowMs, message_id: result.message_id ?? null });
|
|
1032
1342
|
}
|
|
1033
1343
|
|
|
1034
1344
|
// Review #16 + C9: only record the reply for pending-turn resolution when
|
|
@@ -1050,6 +1360,24 @@ class CliProcess extends Process {
|
|
|
1050
1360
|
* @param {string|undefined} replyTurnId — echoed from Claude's reply tool args
|
|
1051
1361
|
*/
|
|
1052
1362
|
_recordReplyForPendingTurn(text, replyTurnId) {
|
|
1363
|
+
// 0.13 D2 (S5 tightening): a reply echoing a KNOWN ledgered turn_id that is
|
|
1364
|
+
// NOT the current pending is a LATE reply from an earlier cycle (post-
|
|
1365
|
+
// finalize tails, fireUserMessage cycles, ask wrap-ups). Pre-P3 the
|
|
1366
|
+
// ==1 fallback below bound it into whatever pending exists now — the live
|
|
1367
|
+
// misattribution path the design's §1.4 corollary names. Correlate it,
|
|
1368
|
+
// resolve its entry, and route it as already-delivered instead.
|
|
1369
|
+
if (replyTurnId && !this.pendingTurns.has(replyTurnId) && this.inputLedger.has(replyTurnId)) {
|
|
1370
|
+
const lEntry = this.inputLedger.get(replyTurnId);
|
|
1371
|
+
this._ledgerTransition(replyTurnId, 'resolved');
|
|
1372
|
+
this._logEvent('cli-late-reply-correlated', { turn_id: replyTurnId, source: lEntry.source });
|
|
1373
|
+
this.emit('autonomous-assistant-message', {
|
|
1374
|
+
text,
|
|
1375
|
+
sessionId: this.claudeSessionId,
|
|
1376
|
+
backend: this.backend,
|
|
1377
|
+
alreadyDelivered: true,
|
|
1378
|
+
});
|
|
1379
|
+
return;
|
|
1380
|
+
}
|
|
1053
1381
|
let target = null;
|
|
1054
1382
|
if (replyTurnId && this.pendingTurns.has(replyTurnId)) {
|
|
1055
1383
|
// Canonical path: Claude echoed the turn_id we sent.
|
|
@@ -1116,6 +1444,26 @@ class CliProcess extends Process {
|
|
|
1116
1444
|
}
|
|
1117
1445
|
|
|
1118
1446
|
target.replies.push(text);
|
|
1447
|
+
target.replyCount = (target.replyCount || 0) + 1;
|
|
1448
|
+
|
|
1449
|
+
if (this._sawHookStream) {
|
|
1450
|
+
// 0.13 D1: a delivered reply is ACTIVITY — rung 2 (activity-quiet) owns
|
|
1451
|
+
// the finalize; the reply-quiet window never arms on hooks-live sessions.
|
|
1452
|
+
// The chatty-claude cap (Review P1 #12) no longer instant-resolves a turn
|
|
1453
|
+
// claude may still be working (that was seam S1's third premature-finalize
|
|
1454
|
+
// trigger); past the cap, rung 2 + the ceilings govern — and a ceiling on
|
|
1455
|
+
// a replied turn now RESOLVES with its replies (see fireTimeout).
|
|
1456
|
+
if (target.replyCount === this.maxRepliesPerTurn) {
|
|
1457
|
+
this.logger.warn?.(
|
|
1458
|
+
`[${this.label}] cli: ${target.replyCount} replies in single turn — deferring to activity-quiet (cap=${this.maxRepliesPerTurn})`,
|
|
1459
|
+
);
|
|
1460
|
+
this._logEvent('cli-reply-cap-noted', { reply_count: target.replyCount });
|
|
1461
|
+
}
|
|
1462
|
+
this._noteActivity('reply');
|
|
1463
|
+
return;
|
|
1464
|
+
}
|
|
1465
|
+
|
|
1466
|
+
// ── Legacy (rung 3, hook stream never came up): pre-D1 path, byte-identical ──
|
|
1119
1467
|
// Review F#13: each reply is "activity" — reset the idle ceiling so a
|
|
1120
1468
|
// 15-min legit turn (PDF analysis, multi-file refactor) replying every
|
|
1121
1469
|
// minute doesn't get killed at the 10-min wall-clock. The absoluteTimer
|
|
@@ -1132,7 +1480,6 @@ class CliProcess extends Process {
|
|
|
1132
1480
|
// hang. After N reply tool calls in a single turn, resolve immediately on
|
|
1133
1481
|
// the NEXT reply without waiting for the quiet window. N defaults to 20
|
|
1134
1482
|
// which is plenty for normal multi-message replies but caps runaway chains.
|
|
1135
|
-
target.replyCount = (target.replyCount || 0) + 1;
|
|
1136
1483
|
if (target.quietTimer) clearTimeout(target.quietTimer);
|
|
1137
1484
|
if (target.replyCount >= this.maxRepliesPerTurn) {
|
|
1138
1485
|
// Skip the quiet-window — resolve right away with whatever we've got.
|
|
@@ -1145,6 +1492,318 @@ class CliProcess extends Process {
|
|
|
1145
1492
|
}
|
|
1146
1493
|
}
|
|
1147
1494
|
|
|
1495
|
+
// ─── 0.13 D2: InputLedger ──────────────────────────────────────────
|
|
1496
|
+
|
|
1497
|
+
_ledgerAdd(turnId, { source, msgId = null } = {}) {
|
|
1498
|
+
this.inputLedger.set(turnId, {
|
|
1499
|
+
turnId,
|
|
1500
|
+
source,
|
|
1501
|
+
msgId: msgId != null ? String(msgId) : null,
|
|
1502
|
+
chatId: this.chatId,
|
|
1503
|
+
writtenAt: Date.now(),
|
|
1504
|
+
state: 'written',
|
|
1505
|
+
_dropTimer: null,
|
|
1506
|
+
_watchdogTimer: null,
|
|
1507
|
+
_rewritten: false,
|
|
1508
|
+
});
|
|
1509
|
+
// Bounded: prune terminal entries first, then the oldest.
|
|
1510
|
+
if (this.inputLedger.size > INPUT_LEDGER_CAP) {
|
|
1511
|
+
let victim = null;
|
|
1512
|
+
for (const [id, e] of this.inputLedger) {
|
|
1513
|
+
if (e.state !== 'written' && e.state !== 'seen') { victim = id; break; }
|
|
1514
|
+
if (!victim) victim = id;
|
|
1515
|
+
}
|
|
1516
|
+
if (victim) this._ledgerDelete(victim);
|
|
1517
|
+
}
|
|
1518
|
+
}
|
|
1519
|
+
|
|
1520
|
+
_ledgerDelete(turnId) {
|
|
1521
|
+
const e = this.inputLedger.get(turnId);
|
|
1522
|
+
if (!e) return;
|
|
1523
|
+
if (e._dropTimer) clearTimeout(e._dropTimer);
|
|
1524
|
+
if (e._watchdogTimer) clearTimeout(e._watchdogTimer);
|
|
1525
|
+
this.inputLedger.delete(turnId);
|
|
1526
|
+
}
|
|
1527
|
+
|
|
1528
|
+
/** Transition + cancel the entry's timers (a seen/resolved entry can never drop or re-write). */
|
|
1529
|
+
_ledgerTransition(turnId, state) {
|
|
1530
|
+
const e = this.inputLedger.get(turnId);
|
|
1531
|
+
if (!e) return;
|
|
1532
|
+
e.state = state;
|
|
1533
|
+
if (e._dropTimer) { clearTimeout(e._dropTimer); e._dropTimer = null; }
|
|
1534
|
+
if (e._watchdogTimer) { clearTimeout(e._watchdogTimer); e._watchdogTimer = null; }
|
|
1535
|
+
}
|
|
1536
|
+
|
|
1537
|
+
/** Tier 2C: a reply carried consumed_turn_ids — acknowledge every known id. */
|
|
1538
|
+
_ledgerAckConsumed(ids) {
|
|
1539
|
+
this._lastAckFieldAt = Date.now();
|
|
1540
|
+
for (const id of ids) {
|
|
1541
|
+
const e = this.inputLedger.get(id);
|
|
1542
|
+
if (e && e.state !== 'resolved') {
|
|
1543
|
+
this._ledgerTransition(id, 'resolved');
|
|
1544
|
+
this._logEvent('cli-input-acked', { turn_id: id, source: e.source });
|
|
1545
|
+
}
|
|
1546
|
+
// UMI 2026-06-11 19:49 false ⏱ timeout: when claude answers a
|
|
1547
|
+
// primary+fold in ONE reply but echoes the FOLD's turn_id, the reply
|
|
1548
|
+
// routes via late-reply correlation and the PRIMARY pending absorbs
|
|
1549
|
+
// nothing — yet this ack names the primary. Mark it consumed so the
|
|
1550
|
+
// finalizer rungs treat it as replied (resolve already-delivered)
|
|
1551
|
+
// instead of rejecting it at a ceiling AFTER the user got the answer.
|
|
1552
|
+
const pending = this.pendingTurns.get(id);
|
|
1553
|
+
if (pending) {
|
|
1554
|
+
pending._consumedAcked = true;
|
|
1555
|
+
// The ack itself flips rung-2 eligibility on — arm now. (The turn's
|
|
1556
|
+
// last _noteActivity ran BEFORE this flag was set, so without this
|
|
1557
|
+
// a quiet tail would never re-arm and the turn would sit until a
|
|
1558
|
+
// ceiling.)
|
|
1559
|
+
this._armActivityQuiet(id, pending);
|
|
1560
|
+
}
|
|
1561
|
+
}
|
|
1562
|
+
}
|
|
1563
|
+
|
|
1564
|
+
_clearLedgerTimers() {
|
|
1565
|
+
for (const e of this.inputLedger.values()) {
|
|
1566
|
+
if (e._dropTimer) { clearTimeout(e._dropTimer); e._dropTimer = null; }
|
|
1567
|
+
if (e._watchdogTimer) { clearTimeout(e._watchdogTimer); e._watchdogTimer = null; }
|
|
1568
|
+
}
|
|
1569
|
+
}
|
|
1570
|
+
|
|
1571
|
+
/**
|
|
1572
|
+
* D2 drop detection, armed at every cycle end for non-primary entries still
|
|
1573
|
+
* 'written'. The confirm window exists because a non-folded inject legally
|
|
1574
|
+
* queues claude-side and is picked up as the NEXT cycle (its UPS then
|
|
1575
|
+
* cancels this); only entries nobody ever picked up or acknowledged drop.
|
|
1576
|
+
*/
|
|
1577
|
+
_armDropConfirmSweep() {
|
|
1578
|
+
for (const [id, entry] of this.inputLedger) {
|
|
1579
|
+
if (entry.state !== 'written') continue;
|
|
1580
|
+
if (entry.source === 'primary') continue; // pending lifecycle + delivery watchdog govern primaries
|
|
1581
|
+
if (entry._dropTimer) continue;
|
|
1582
|
+
entry._dropTimer = setTimeout(() => this._dropConfirmFire(id), this.dropConfirmMs);
|
|
1583
|
+
entry._dropTimer.unref?.();
|
|
1584
|
+
}
|
|
1585
|
+
}
|
|
1586
|
+
|
|
1587
|
+
_dropConfirmFire(turnId) {
|
|
1588
|
+
const entry = this.inputLedger.get(turnId);
|
|
1589
|
+
if (!entry || entry.state !== 'written') return;
|
|
1590
|
+
entry._dropTimer = null;
|
|
1591
|
+
// System/anonymous pushes are never auto-redelivered — resolve quietly.
|
|
1592
|
+
if (entry.source === 'system' || entry.source === 'inject') {
|
|
1593
|
+
this._ledgerTransition(turnId, 'resolved');
|
|
1594
|
+
this._logEvent('cli-input-unconfirmed', { turn_id: turnId, source: entry.source });
|
|
1595
|
+
return;
|
|
1596
|
+
}
|
|
1597
|
+
// Supersession: the user re-sent / moved on — a newer primary was picked
|
|
1598
|
+
// up after this entry was written. Redelivering the stale one would
|
|
1599
|
+
// double-answer the same intent.
|
|
1600
|
+
for (const e of this.inputLedger.values()) {
|
|
1601
|
+
if (e.source === 'primary' && e.writtenAt > entry.writtenAt
|
|
1602
|
+
&& (e.state === 'seen' || e.state === 'resolved')) {
|
|
1603
|
+
this._ledgerTransition(turnId, 'superseded');
|
|
1604
|
+
this._logEvent('input-superseded', { turn_id: turnId, msg_id: entry.msgId });
|
|
1605
|
+
return;
|
|
1606
|
+
}
|
|
1607
|
+
}
|
|
1608
|
+
// Contract discriminator: if NO reply since this entry carried the
|
|
1609
|
+
// consumed_turn_ids field, the model ignored the contract this cycle — a
|
|
1610
|
+
// fold is then indistinguishable from a drop, and redelivering folds
|
|
1611
|
+
// double-answers the COMMON case (the inversion that killed the A1 spec).
|
|
1612
|
+
// Park as fold-suspected (telemetry; the soak's anomaly signal).
|
|
1613
|
+
if (!(this._lastAckFieldAt >= entry.writtenAt)) { // >= : same-ms ack still proves the contract mode
|
|
1614
|
+
this._ledgerTransition(turnId, 'fold-suspected');
|
|
1615
|
+
this._logEvent('input-fold-suspected', { turn_id: turnId, msg_id: entry.msgId, source: entry.source });
|
|
1616
|
+
return;
|
|
1617
|
+
}
|
|
1618
|
+
this._ledgerTransition(turnId, 'dropped');
|
|
1619
|
+
this._logEvent('input-dropped', { turn_id: turnId, msg_id: entry.msgId, source: entry.source });
|
|
1620
|
+
this.emit('input-dropped', {
|
|
1621
|
+
turnId, msgId: entry.msgId, chatId: entry.chatId, source: entry.source,
|
|
1622
|
+
});
|
|
1623
|
+
}
|
|
1624
|
+
|
|
1625
|
+
/**
|
|
1626
|
+
* D2 primary-delivery watchdog (KI-drop's missing half — the channel-bind
|
|
1627
|
+
* race drops a user_msg before claude's subscription is live). Fire logic:
|
|
1628
|
+
* - entry seen / turn settled → done (timer was already cancelled).
|
|
1629
|
+
* - ANY session activity since dispatch (hooks, pane heartbeat, bridge
|
|
1630
|
+
* tool calls) → claude is busy (likely a foreign cycle; the queued
|
|
1631
|
+
* pickup is legitimately deferred) → extend, NEVER re-write (round-2
|
|
1632
|
+
* panel: re-writes against a busy session double-prompt it).
|
|
1633
|
+
* - total silence → ONE re-write of the SAME envelope (idempotent:
|
|
1634
|
+
* never seen + zero activity ⇒ claude never had it — the rc.25
|
|
1635
|
+
* argument, properly scoped); still silence after that → bridge
|
|
1636
|
+
* teardown onto the existing bridge-disconnected recovery path.
|
|
1637
|
+
*/
|
|
1638
|
+
_armDeliveryWatchdog(turnId, pending) {
|
|
1639
|
+
const entry = this.inputLedger.get(turnId);
|
|
1640
|
+
if (!entry) return;
|
|
1641
|
+
entry._watchdogTimer = setTimeout(() => this._deliveryWatchdogFire(turnId, pending), this.deliveryWatchdogMs);
|
|
1642
|
+
entry._watchdogTimer.unref?.();
|
|
1643
|
+
}
|
|
1644
|
+
|
|
1645
|
+
_deliveryWatchdogFire(turnId, pending) {
|
|
1646
|
+
const entry = this.inputLedger.get(turnId);
|
|
1647
|
+
if (!entry || entry.state !== 'written') return;
|
|
1648
|
+
if (!this.pendingTurns.has(turnId)) return; // settled some other way
|
|
1649
|
+
entry._watchdogTimer = null;
|
|
1650
|
+
const activitySince = Math.max(this._lastActivityAt, this._lastHookEventAt) >= entry.writtenAt
|
|
1651
|
+
&& Math.max(this._lastActivityAt, this._lastHookEventAt) > 0;
|
|
1652
|
+
if (activitySince) {
|
|
1653
|
+
this._armDeliveryWatchdog(turnId, pending); // busy — extend the window
|
|
1654
|
+
return;
|
|
1655
|
+
}
|
|
1656
|
+
if (!entry._rewritten) {
|
|
1657
|
+
entry._rewritten = true;
|
|
1658
|
+
this._logEvent('cli-delivery-rewrite', { turn_id: turnId });
|
|
1659
|
+
if (pending._userMsgPayload) this._writeToBridge(pending._userMsgPayload);
|
|
1660
|
+
this._armDeliveryWatchdog(turnId, pending);
|
|
1661
|
+
return;
|
|
1662
|
+
}
|
|
1663
|
+
this._logEvent('cli-delivery-watchdog-escalate', { turn_id: turnId });
|
|
1664
|
+
if (this.bridgeServer?.destroyConnection) this.bridgeServer.destroyConnection();
|
|
1665
|
+
}
|
|
1666
|
+
|
|
1667
|
+
/**
|
|
1668
|
+
* 0.13 D1: note same-session activity — the heartbeat of the finalizer ladder
|
|
1669
|
+
* (docs/0.13-channels-lifecycle-design.md §3 D1). Supersedes the 0.12
|
|
1670
|
+
* `_extendQuietOnToolActivity` (the WA-topic point fix): instead of pushing a
|
|
1671
|
+
* 2s reply-quiet window around, activity now drives three things per pending:
|
|
1672
|
+
*
|
|
1673
|
+
* 1. The idle ceiling resets (pre-D1 semantics preserved — a long
|
|
1674
|
+
* tool-heavy turn isn't idle-killed).
|
|
1675
|
+
* 2. HOOKS-LIVE sessions: an attributed-Stop grace in flight is CANCELLED —
|
|
1676
|
+
* Stop arrives via the ndjson tail with 250ms–5s lag, so a foreign
|
|
1677
|
+
* cycle's lagged Stop can land after this turn's fast first pickup;
|
|
1678
|
+
* activity proves claude is still working and the Stop was stale. The
|
|
1679
|
+
* legacy reply-quiet timer (rung 3) is likewise superseded the moment
|
|
1680
|
+
* hooks go live mid-turn. The activity-quiet window (rung 2) re-arms.
|
|
1681
|
+
* 3. HOOK-NEVER-ALIVE sessions (rung 3): the pre-D1 reply-quiet re-arm,
|
|
1682
|
+
* byte-identical.
|
|
1683
|
+
*
|
|
1684
|
+
* Callers: every hook event except Stop, the pane "esc to interrupt"
|
|
1685
|
+
* thinking heartbeat, bridge tool calls, delivered replies, the question
|
|
1686
|
+
* keep-alive, and question answers.
|
|
1687
|
+
*/
|
|
1688
|
+
_noteActivity(source = 'activity') {
|
|
1689
|
+
this._lastActivityAt = Date.now();
|
|
1690
|
+
for (const [turnId, pending] of this.pendingTurns) {
|
|
1691
|
+
// Idle ceiling: activity IS activity.
|
|
1692
|
+
if (pending.hardTimer) {
|
|
1693
|
+
clearTimeout(pending.hardTimer);
|
|
1694
|
+
pending.hardTimer = setTimeout(() => pending._fireTimeout?.('idle'), this.turnTimeoutMs);
|
|
1695
|
+
}
|
|
1696
|
+
if (this._sawHookStream) {
|
|
1697
|
+
if (pending._stopGracePending) this._cancelStopGrace(turnId, pending, source);
|
|
1698
|
+
if (pending.quietTimer) { clearTimeout(pending.quietTimer); pending.quietTimer = null; }
|
|
1699
|
+
this._armActivityQuiet(turnId, pending);
|
|
1700
|
+
} else if (pending._stopGracePending) {
|
|
1701
|
+
// Legacy grace (resolveTurn's wait-for-Stop) — never revived/cancelled
|
|
1702
|
+
// by activity; identical to pre-D1.
|
|
1703
|
+
continue;
|
|
1704
|
+
} else if (pending.quietTimer) {
|
|
1705
|
+
clearTimeout(pending.quietTimer);
|
|
1706
|
+
pending.quietTimer = setTimeout(() => this._resolveTurn(turnId), this.turnQuietMs);
|
|
1707
|
+
}
|
|
1708
|
+
}
|
|
1709
|
+
}
|
|
1710
|
+
|
|
1711
|
+
/**
|
|
1712
|
+
* D1 rung 2: arm/refresh the activity-quiet finalize for one pending.
|
|
1713
|
+
* Preconditions: hooks live, ≥1 delivered reply (a reply-less turn ends via
|
|
1714
|
+
* rung 1 or the ceilings), no open question (waiting-on-user suspends the
|
|
1715
|
+
* clock — claude is legitimately silent), and no rung-1 grace in flight.
|
|
1716
|
+
*/
|
|
1717
|
+
_armActivityQuiet(turnId, pending) {
|
|
1718
|
+
if (!this._sawHookStream) return;
|
|
1719
|
+
// ≥1 reply, OR seen + consumed-acked (the answer rode a sibling turn_id —
|
|
1720
|
+
// fold-id echo; see _ledgerAckConsumed). Same eligibility as the fire site.
|
|
1721
|
+
if ((!pending.replies || pending.replies.length === 0)
|
|
1722
|
+
&& !(pending.seen === true && pending._consumedAcked === true)) return;
|
|
1723
|
+
if (this._openQuestions.size > 0) return;
|
|
1724
|
+
if (pending._stopGracePending) return;
|
|
1725
|
+
if (pending._activityQuietTimer) clearTimeout(pending._activityQuietTimer);
|
|
1726
|
+
pending._activityQuietTimer = setTimeout(() => this._activityQuietFinalize(turnId), this.activityQuietMs);
|
|
1727
|
+
pending._activityQuietTimer.unref?.();
|
|
1728
|
+
}
|
|
1729
|
+
|
|
1730
|
+
/** D1: suspend rung 2 for all pendings (an `ask` just opened — waiting on the user). */
|
|
1731
|
+
_suspendActivityQuiet() {
|
|
1732
|
+
for (const [, pending] of this.pendingTurns) {
|
|
1733
|
+
if (pending._activityQuietTimer) {
|
|
1734
|
+
clearTimeout(pending._activityQuietTimer);
|
|
1735
|
+
pending._activityQuietTimer = null;
|
|
1736
|
+
}
|
|
1737
|
+
}
|
|
1738
|
+
}
|
|
1739
|
+
|
|
1740
|
+
/**
|
|
1741
|
+
* D1 rung 2 fire: the whole activity surface (hooks + pane heartbeat + bridge
|
|
1742
|
+
* tool calls) has been quiet for activityQuietMs on a replied turn — the tail
|
|
1743
|
+
* is over (Stop was lost, foreign, or the hook stream died mid-session; the
|
|
1744
|
+
* pre-D1 `_sawHookStream` one-way boolean left that last class with NO
|
|
1745
|
+
* finalizer until a 10-min TURN_TIMEOUT *rejection* after a delivered answer).
|
|
1746
|
+
*/
|
|
1747
|
+
_activityQuietFinalize(turnId) {
|
|
1748
|
+
const pending = this.pendingTurns.get(turnId);
|
|
1749
|
+
if (!pending) return;
|
|
1750
|
+
if (pending._stopGracePending) return;
|
|
1751
|
+
if (this._openQuestions.size > 0) return; // re-check at fire time
|
|
1752
|
+
// Eligibility: ≥1 bound reply, OR seen + consumed-acked (the answer went
|
|
1753
|
+
// out under a sibling turn_id — fold-id echo; see _ledgerAckConsumed).
|
|
1754
|
+
const consumedAcked = pending.seen === true && pending._consumedAcked === true;
|
|
1755
|
+
if ((!pending.replies || pending.replies.length === 0) && !consumedAcked) return;
|
|
1756
|
+
const lastHookAgeMs = this._lastHookEventAt ? Date.now() - this._lastHookEventAt : null;
|
|
1757
|
+
this._logEvent('cli-activity-quiet-finalize', {
|
|
1758
|
+
turn_id: turnId,
|
|
1759
|
+
reply_count: pending.replies.length,
|
|
1760
|
+
consumed_acked: consumedAcked,
|
|
1761
|
+
last_hook_age_ms: lastHookAgeMs,
|
|
1762
|
+
had_stop: !!pending._stopHookData,
|
|
1763
|
+
});
|
|
1764
|
+
if (lastHookAgeMs != null && lastHookAgeMs >= this.activityQuietMs) {
|
|
1765
|
+
// A previously-live hook stream went quiet enough that rung 2 (not an
|
|
1766
|
+
// attributed Stop) ended the turn — the soak's mid-session-death signal.
|
|
1767
|
+
this._logEvent('cli-hook-stream-stalled', { turn_id: turnId, last_hook_age_ms: lastHookAgeMs });
|
|
1768
|
+
}
|
|
1769
|
+
this._finalizeTurn(turnId);
|
|
1770
|
+
}
|
|
1771
|
+
|
|
1772
|
+
/**
|
|
1773
|
+
* D1 rung 1: an attributed Stop (the pending was `seen` at pickup, or has
|
|
1774
|
+
* ≥1 turn_id-bound reply) finalizes through a short grace that any
|
|
1775
|
+
* subsequent same-session activity cancels (see _noteActivity #2).
|
|
1776
|
+
*/
|
|
1777
|
+
_beginAttributedStopGrace(turnId, pending, info) {
|
|
1778
|
+
pending._stopHookData = info;
|
|
1779
|
+
pending._stopGracePending = true;
|
|
1780
|
+
if (pending._activityQuietTimer) {
|
|
1781
|
+
clearTimeout(pending._activityQuietTimer);
|
|
1782
|
+
pending._activityQuietTimer = null;
|
|
1783
|
+
}
|
|
1784
|
+
pending._stopGraceTimer = setTimeout(() => {
|
|
1785
|
+
pending._stopGraceTimer = null;
|
|
1786
|
+
pending._stopGracePending = false;
|
|
1787
|
+
this._logEvent('cli-turn-resolved-by-stop', {
|
|
1788
|
+
turn_id: turnId,
|
|
1789
|
+
reply_count: pending.replies?.length || 0,
|
|
1790
|
+
via_text_fallback: (pending.replies?.length || 0) === 0,
|
|
1791
|
+
attributed: pending.seen === true ? 'seen' : 'reply-bound',
|
|
1792
|
+
session_id: this.claudeSessionId,
|
|
1793
|
+
});
|
|
1794
|
+
this._finalizeTurn(turnId);
|
|
1795
|
+
}, this.stopGraceMs);
|
|
1796
|
+
pending._stopGraceTimer.unref?.();
|
|
1797
|
+
}
|
|
1798
|
+
|
|
1799
|
+
/** D1: cancel a stop-grace (rung 1 stale-Stop, or a superseded legacy grace). */
|
|
1800
|
+
_cancelStopGrace(turnId, pending, source) {
|
|
1801
|
+
if (pending._stopGraceTimer) { clearTimeout(pending._stopGraceTimer); pending._stopGraceTimer = null; }
|
|
1802
|
+
if (pending._onStop) { this.off('stop-hook', pending._onStop); pending._onStop = null; }
|
|
1803
|
+
pending._stopGracePending = false;
|
|
1804
|
+
this._logEvent('cli-stop-grace-cancelled', { turn_id: turnId, source });
|
|
1805
|
+
}
|
|
1806
|
+
|
|
1148
1807
|
// 0.12 Phase 1.7 (Finding 0.1.A): two-step turn resolution.
|
|
1149
1808
|
// _resolveTurn — entry point called by channel-result OR quiet-window
|
|
1150
1809
|
// expiry. Schedules a stopGraceMs window during which
|
|
@@ -1223,6 +1882,9 @@ class CliProcess extends Process {
|
|
|
1223
1882
|
if (pending.hardTimer) clearTimeout(pending.hardTimer);
|
|
1224
1883
|
if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
|
|
1225
1884
|
if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer);
|
|
1885
|
+
if (pending._activityQuietTimer) clearTimeout(pending._activityQuietTimer); // 0.13 D1
|
|
1886
|
+
if (pending._onStop) { this.off('stop-hook', pending._onStop); pending._onStop = null; }
|
|
1887
|
+
const hadReplyToolCalls = pending.replies.length > 0;
|
|
1226
1888
|
let text = pending.replies.join('\n\n');
|
|
1227
1889
|
// 0.12 Phase 1.7 fallback: if no reply tool calls landed (claude ended
|
|
1228
1890
|
// the turn without calling mcp__polygram-bridge__reply), use the Stop
|
|
@@ -1240,12 +1902,17 @@ class CliProcess extends Process {
|
|
|
1240
1902
|
// to appear free in dashboards.
|
|
1241
1903
|
const result = {
|
|
1242
1904
|
text,
|
|
1243
|
-
// Review F#2:
|
|
1244
|
-
//
|
|
1245
|
-
//
|
|
1246
|
-
//
|
|
1247
|
-
//
|
|
1248
|
-
|
|
1905
|
+
// Review F#2: when claude used reply tool calls, the dispatcher ALREADY
|
|
1906
|
+
// delivered that text to Telegram incrementally — polygram.js must
|
|
1907
|
+
// short-circuit its deliverReplies branch or every turn delivers twice.
|
|
1908
|
+
// BUT a turn finalized via the Stop fallback (no reply tool calls — the
|
|
1909
|
+
// stuck-turn case) has delivered NOTHING; marking it alreadyDelivered
|
|
1910
|
+
// would resolve the turn silently and the user still sees nothing. So
|
|
1911
|
+
// only claim already-delivered when reply tool calls actually fired —
|
|
1912
|
+
// or when claude ACKED consuming this turn in a sibling reply
|
|
1913
|
+
// (consumed_turn_ids; the fold-id-echo case): re-sending the Stop
|
|
1914
|
+
// fallback there would duplicate the delivered answer.
|
|
1915
|
+
alreadyDelivered: hadReplyToolCalls || pending._consumedAcked === true,
|
|
1249
1916
|
sessionId: this.claudeSessionId,
|
|
1250
1917
|
cost: null, // Channels protocol doesn't expose per-turn cost
|
|
1251
1918
|
duration,
|
|
@@ -1261,6 +1928,12 @@ class CliProcess extends Process {
|
|
|
1261
1928
|
},
|
|
1262
1929
|
};
|
|
1263
1930
|
this.inFlight = this.pendingTurns.size > 0;
|
|
1931
|
+
// 0.13 D2: the finalized cycle resolves its own ledger entry; any
|
|
1932
|
+
// non-primary entries still 'written' enter the drop-confirm window
|
|
1933
|
+
// (a late next-cycle pickup or ack cancels; otherwise dropped /
|
|
1934
|
+
// fold-suspected / superseded — see _dropConfirmFire).
|
|
1935
|
+
this._ledgerTransition(turnId, 'resolved');
|
|
1936
|
+
this._armDropConfirmSweep();
|
|
1264
1937
|
pending.resolve(result);
|
|
1265
1938
|
this.emit('result', { subtype: 'success' }, { streamText: text });
|
|
1266
1939
|
this.emit('idle');
|
|
@@ -1310,6 +1983,9 @@ class CliProcess extends Process {
|
|
|
1310
1983
|
if (oldest.quietTimer) clearTimeout(oldest.quietTimer);
|
|
1311
1984
|
if (oldest.hardTimer) clearTimeout(oldest.hardTimer);
|
|
1312
1985
|
if (oldest.absoluteTimer) clearTimeout(oldest.absoluteTimer);
|
|
1986
|
+
if (oldest._stopGraceTimer) clearTimeout(oldest._stopGraceTimer);
|
|
1987
|
+
if (oldest._activityQuietTimer) clearTimeout(oldest._activityQuietTimer); // 0.13 D1
|
|
1988
|
+
if (oldest._onStop) this.off('stop-hook', oldest._onStop);
|
|
1313
1989
|
const dropErr = new Error('queue overflow — oldest pending evicted');
|
|
1314
1990
|
dropErr.code = 'QUEUE_OVERFLOW';
|
|
1315
1991
|
try { oldest.reject(dropErr); } catch {}
|
|
@@ -1348,6 +2024,15 @@ class CliProcess extends Process {
|
|
|
1348
2024
|
const fireTimeout = (reason) => {
|
|
1349
2025
|
if (!this.pendingTurns.has(turnId)) return;
|
|
1350
2026
|
const pending = this.pendingTurns.get(turnId);
|
|
2027
|
+
// 0.13 D1 (S9): unblock any open ask FIRST — claude must never stay
|
|
2028
|
+
// hung on a question whose turn we are about to end. The card cleanup
|
|
2029
|
+
// stays with the question sweep; this only resolves the blocking tool.
|
|
2030
|
+
if (this._openQuestions.size > 0) {
|
|
2031
|
+
for (const tc of [...this._openQuestions]) {
|
|
2032
|
+
this._logEvent('cli-question-timedout-at-ceiling', { tool_call_id: tc, reason });
|
|
2033
|
+
try { this.writeQuestionAnswer(tc, { timedout: true }); } catch { /* best-effort */ }
|
|
2034
|
+
}
|
|
2035
|
+
}
|
|
1351
2036
|
this.pendingTurns.delete(turnId);
|
|
1352
2037
|
const idx = this.pendingQueue.findIndex(e => e.turnId === turnId);
|
|
1353
2038
|
if (idx >= 0) this.pendingQueue.splice(idx, 1);
|
|
@@ -1355,8 +2040,44 @@ class CliProcess extends Process {
|
|
|
1355
2040
|
if (pending.hardTimer) clearTimeout(pending.hardTimer);
|
|
1356
2041
|
if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
|
|
1357
2042
|
if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer);
|
|
2043
|
+
if (pending._activityQuietTimer) clearTimeout(pending._activityQuietTimer);
|
|
2044
|
+
if (pending._onStop) this.off('stop-hook', pending._onStop);
|
|
1358
2045
|
this.inFlight = this.pendingTurns.size > 0;
|
|
1359
2046
|
const turnTimeoutMs = reason === 'absolute' ? this.turnAbsoluteMs : (opts.maxTurnMs || this.turnTimeoutMs);
|
|
2047
|
+
|
|
2048
|
+
// 0.13 D1 ceiling-resolve: a ceiling expiring on a turn with delivered
|
|
2049
|
+
// replies RESOLVES it — the user already has their answer; rejecting
|
|
2050
|
+
// would send a scary timeout error AFTER a successful reply (round-2
|
|
2051
|
+
// panel finding: the v2 soak gate contradicted the design's own
|
|
2052
|
+
// ask-timeout-then-ceiling path). TURN_TIMEOUT rejection is reserved
|
|
2053
|
+
// for turns with ZERO delivered replies. Consumed-acked counts as
|
|
2054
|
+
// replied: the answer rode a sibling turn_id (fold-id echo — the UMI
|
|
2055
|
+
// 2026-06-11 19:49 false ⏱; see _ledgerAckConsumed).
|
|
2056
|
+
if ((pending.replies?.length || 0) > 0
|
|
2057
|
+
|| (pending.seen === true && pending._consumedAcked === true)) {
|
|
2058
|
+
this._logEvent('cli-turn-ceiling-resolved', {
|
|
2059
|
+
reason, turnTimeoutMs, reply_count: pending.replies?.length || 0,
|
|
2060
|
+
consumed_acked: pending._consumedAcked === true,
|
|
2061
|
+
});
|
|
2062
|
+
this.emit('idle');
|
|
2063
|
+
resolve({
|
|
2064
|
+
text: pending.replies.join('\n\n'),
|
|
2065
|
+
alreadyDelivered: true,
|
|
2066
|
+
sessionId: this.claudeSessionId,
|
|
2067
|
+
cost: null,
|
|
2068
|
+
duration: Date.now() - pending.startedAt,
|
|
2069
|
+
error: null,
|
|
2070
|
+
metrics: {
|
|
2071
|
+
inputTokens: null, outputTokens: null,
|
|
2072
|
+
cacheCreationTokens: null, cacheReadTokens: null,
|
|
2073
|
+
numAssistantMessages: pending.replies.length,
|
|
2074
|
+
numToolUses: null,
|
|
2075
|
+
resultSubtype: 'success',
|
|
2076
|
+
},
|
|
2077
|
+
});
|
|
2078
|
+
return;
|
|
2079
|
+
}
|
|
2080
|
+
|
|
1360
2081
|
this.emit('turn-timeout', {
|
|
1361
2082
|
reason,
|
|
1362
2083
|
turnTimeoutMs,
|
|
@@ -1372,24 +2093,43 @@ class CliProcess extends Process {
|
|
|
1372
2093
|
const pending = {
|
|
1373
2094
|
resolve, reject,
|
|
1374
2095
|
replies: [],
|
|
2096
|
+
// 0.13 D1: pickup marker — set when a UserPromptSubmit prompt carries
|
|
2097
|
+
// this turn's envelope (the seen-slice). Rung 1's Stop attribution.
|
|
2098
|
+
seen: false,
|
|
1375
2099
|
quietTimer: null,
|
|
1376
|
-
|
|
1377
|
-
//
|
|
1378
|
-
//
|
|
2100
|
+
_activityQuietTimer: null,
|
|
2101
|
+
// hardTimer = idle ceiling. Resets on any activity (_noteActivity)
|
|
2102
|
+
// so a chatty or tool-heavy turn isn't killed at 10 min wall-clock.
|
|
1379
2103
|
hardTimer: setTimeout(() => fireTimeout('idle'), opts.maxTurnMs || this.turnTimeoutMs),
|
|
1380
2104
|
// absoluteTimer = wall-clock ceiling. Does NOT reset. Bounds true
|
|
1381
2105
|
// runaways. 30 min default — high enough that legitimate
|
|
1382
2106
|
// multi-step refactors complete, low enough to catch infinite
|
|
1383
2107
|
// chatter.
|
|
1384
2108
|
absoluteTimer: setTimeout(() => fireTimeout('absolute'), this.turnAbsoluteMs),
|
|
1385
|
-
// Review F#13: attach fireTimeout so
|
|
1386
|
-
//
|
|
1387
|
-
// reject closure).
|
|
2109
|
+
// Review F#13: attach fireTimeout so activity can reset the idle
|
|
2110
|
+
// timer (creates a fresh setTimeout with the same closure).
|
|
1388
2111
|
_fireTimeout: fireTimeout,
|
|
1389
2112
|
startedAt: Date.now(),
|
|
1390
2113
|
};
|
|
1391
2114
|
this.pendingTurns.set(turnId, pending);
|
|
1392
2115
|
|
|
2116
|
+
// 0.13 D1 (§1.4): the single-active-cycle invariant is enforced by the
|
|
2117
|
+
// daemon's stdinLock (held across the full turn) — CliProcess can't see
|
|
2118
|
+
// the lock, so a second concurrent pending means a caller bypassed the
|
|
2119
|
+
// contract. Loud assertion telemetry; the drop-rather-than-misattribute
|
|
2120
|
+
// defenses (reply routing, Stop attribution) remain the failure mode.
|
|
2121
|
+
if (this.pendingTurns.size > 1) {
|
|
2122
|
+
this.logger.warn?.(
|
|
2123
|
+
`[${this.label}] cli: ${this.pendingTurns.size} concurrent pending turns — stdinLock contract violated upstream`,
|
|
2124
|
+
);
|
|
2125
|
+
this._logEvent('cli-multi-pending-assert', { pending_count: this.pendingTurns.size });
|
|
2126
|
+
}
|
|
2127
|
+
|
|
2128
|
+
// 0.13 D2: ledger the primary + keep the exact envelope for the delivery
|
|
2129
|
+
// watchdog's idempotent re-write (the pending owns it — no text in the
|
|
2130
|
+
// ledger, events stay content-free per L13).
|
|
2131
|
+
this._ledgerAdd(turnId, { source: 'primary', msgId: opts.context?.sourceMsgId });
|
|
2132
|
+
|
|
1393
2133
|
// Review F#18: bridge-disconnect TOCTOU. The bridgeReady check at
|
|
1394
2134
|
// top of send() can race the bridge socket close. If the bridge
|
|
1395
2135
|
// dies between check and write, _writeToBridge silently no-ops (it
|
|
@@ -1397,14 +2137,16 @@ class CliProcess extends Process {
|
|
|
1397
2137
|
// pending entry sits with no live bridge until hardTimer (10 min).
|
|
1398
2138
|
// Pass the actual write result back and reject immediately on
|
|
1399
2139
|
// failure so the caller sees a fast, code-tagged error.
|
|
1400
|
-
|
|
2140
|
+
pending._userMsgPayload = {
|
|
1401
2141
|
kind: 'user_msg',
|
|
1402
2142
|
turn_id: turnId,
|
|
1403
2143
|
text: prompt,
|
|
1404
2144
|
chat_id: this.chatId,
|
|
1405
2145
|
user: opts.context?.user || '',
|
|
1406
2146
|
msg_id: opts.context?.sourceMsgId || '',
|
|
1407
|
-
}
|
|
2147
|
+
};
|
|
2148
|
+
const wrote = this._writeToBridge(pending._userMsgPayload);
|
|
2149
|
+
if (wrote) this._armDeliveryWatchdog(turnId, pending);
|
|
1408
2150
|
if (!wrote) {
|
|
1409
2151
|
this.pendingTurns.delete(turnId);
|
|
1410
2152
|
const qIdx = this.pendingQueue.findIndex(e => e.turnId === turnId);
|
|
@@ -1423,6 +2165,13 @@ class CliProcess extends Process {
|
|
|
1423
2165
|
async interrupt() {
|
|
1424
2166
|
if (this.closed) return;
|
|
1425
2167
|
if (!this.tmuxSession) return;
|
|
2168
|
+
// Cancel-cheap C2 (spec Finding 7): a cancel is already in flight — a
|
|
2169
|
+
// SECOND C-c would land at the now-idle prompt, which is claude's exit
|
|
2170
|
+
// chord ("press ctrl+c again to exit") and would convert the cheap cancel
|
|
2171
|
+
// into an accidental process exit. Also: resetting the grace timer would
|
|
2172
|
+
// DELAY the synthetic resolution for a user double-tapping "stop".
|
|
2173
|
+
// Idempotent no-op instead.
|
|
2174
|
+
if (this._interruptGraceTimer) return;
|
|
1426
2175
|
// tmux SIGINT — hard interrupt for the running turn.
|
|
1427
2176
|
try {
|
|
1428
2177
|
await this.runner.sendControl(this.tmuxSession, 'C-c');
|
|
@@ -1433,18 +2182,47 @@ class CliProcess extends Process {
|
|
|
1433
2182
|
this.emit('interrupt-applied', { backend: this.backend });
|
|
1434
2183
|
this._logEvent('interrupt-applied', {});
|
|
1435
2184
|
|
|
2185
|
+
// Cancel-cheap C1 — the spec's O2 BLOCKER: the cancelled work's inputs
|
|
2186
|
+
// must never re-deliver. The grace below synthesizes the resolution
|
|
2187
|
+
// WITHOUT _finalizeTurn, so without this, an autosteer/fold entry stays
|
|
2188
|
+
// 'written' and a LATER cycle-end sweep declares it dropped →
|
|
2189
|
+
// drop-redeliver re-injects the user's CANCELLED message minutes later.
|
|
2190
|
+
// 'cancelled' is terminal: the sweep only targets 'written', and
|
|
2191
|
+
// _ledgerTransition clears the entry's drop/watchdog timers.
|
|
2192
|
+
for (const [id, e] of this.inputLedger) {
|
|
2193
|
+
if (e.state === 'written' || e.state === 'seen') {
|
|
2194
|
+
this._ledgerTransition(id, 'cancelled');
|
|
2195
|
+
this._logEvent('cli-input-cancelled', { turn_id: id, source: e.source });
|
|
2196
|
+
}
|
|
2197
|
+
}
|
|
2198
|
+
|
|
1436
2199
|
// Review P3 C8: after Ctrl-C, Claude may or may not call reply with an
|
|
1437
2200
|
// "I was interrupted" message. If it doesn't (5s grace), resolve pending
|
|
1438
2201
|
// turns with subtype 'interrupted' instead of letting them wait the full
|
|
1439
|
-
// 10-min hardTimer.
|
|
1440
|
-
|
|
2202
|
+
// 10-min hardTimer.
|
|
2203
|
+
//
|
|
2204
|
+
// C4 BLOCKER (review 2026-06-12): SNAPSHOT the turns that were in flight at
|
|
2205
|
+
// C-c time and resolve ONLY those. The cancelled turn often finalizes
|
|
2206
|
+
// cleanly DURING the grace (claude acks the C-c) and the user then starts a
|
|
2207
|
+
// NEW turn — the "stop, then redirect" flow cheap-cancel exists for. Without
|
|
2208
|
+
// the snapshot the stale grace iterated pendingTurns LIVE and silently
|
|
2209
|
+
// resolved that fresh turn as 'interrupted' (lost). send() doesn't clear the
|
|
2210
|
+
// grace, so the snapshot is the fix.
|
|
2211
|
+
const interruptedTurnIds = new Set(this.pendingTurns.keys());
|
|
1441
2212
|
this._interruptGraceTimer = setTimeout(() => {
|
|
1442
2213
|
let resolvedAny = false;
|
|
1443
2214
|
for (const [turnId, pending] of this.pendingTurns) {
|
|
2215
|
+
if (!interruptedTurnIds.has(turnId)) continue; // only the turns in flight at C-c
|
|
1444
2216
|
// Synthesize an interrupted resolution: empty text, 'interrupted' subtype.
|
|
2217
|
+
// Cancel-cheap C3: clear ALL per-pending machinery (mirrors
|
|
2218
|
+
// _finalizeTurn) — stray timers/listeners on the kept-warm proc are
|
|
2219
|
+
// exactly what the cheap-cancel design must not leak.
|
|
1445
2220
|
if (pending.quietTimer) clearTimeout(pending.quietTimer);
|
|
1446
2221
|
if (pending.hardTimer) clearTimeout(pending.hardTimer);
|
|
1447
|
-
|
|
2222
|
+
if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
|
|
2223
|
+
if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer);
|
|
2224
|
+
if (pending._activityQuietTimer) clearTimeout(pending._activityQuietTimer);
|
|
2225
|
+
if (pending._onStop) { this.off('stop-hook', pending._onStop); pending._onStop = null; }
|
|
1448
2226
|
this.pendingTurns.delete(turnId);
|
|
1449
2227
|
const qIdx = this.pendingQueue.findIndex(e => e.turnId === turnId);
|
|
1450
2228
|
if (qIdx >= 0) this.pendingQueue.splice(qIdx, 1);
|
|
@@ -1502,7 +2280,7 @@ class CliProcess extends Process {
|
|
|
1502
2280
|
*/
|
|
1503
2281
|
async probeBusyState() {
|
|
1504
2282
|
const base = {
|
|
1505
|
-
busy: false, streaming: false,
|
|
2283
|
+
busy: false, streaming: false, backgroundShell: false, shellCount: 0,
|
|
1506
2284
|
inFlight: this.inFlight, pendingTurns: this.pendingTurns.size,
|
|
1507
2285
|
captured: false, paneTail: null,
|
|
1508
2286
|
};
|
|
@@ -1518,10 +2296,23 @@ class CliProcess extends Process {
|
|
|
1518
2296
|
}
|
|
1519
2297
|
if (!pane) return base;
|
|
1520
2298
|
const streaming = STREAMING_HINT_RE.test(pane);
|
|
2299
|
+
// Background-shell count from the TUI mode line. Match only the captured
|
|
2300
|
+
// TAIL (the mode line lives at the bottom of the viewport) so a `· N shell ·`
|
|
2301
|
+
// string scrolled off into history can't trip a stale false-positive — see
|
|
2302
|
+
// BACKGROUND_SHELL_RE. A detached `run_in_background` Bash that outlived its
|
|
2303
|
+
// turn shows here even while claude is idle and not streaming.
|
|
2304
|
+
const m = pane.slice(-400).match(BACKGROUND_SHELL_RE);
|
|
2305
|
+
const shellCount = m ? parseInt(m[1], 10) : 0;
|
|
2306
|
+
const backgroundShell = shellCount > 0;
|
|
1521
2307
|
return {
|
|
1522
2308
|
...base,
|
|
2309
|
+
// `busy` stays streaming-only — it is the abort path's "is claude working a
|
|
2310
|
+
// turn" signal and must not change behaviour. Background-shell liveness is a
|
|
2311
|
+
// separate axis the stall-watchdog reads via `backgroundShell`/`shellCount`.
|
|
1523
2312
|
busy: streaming,
|
|
1524
2313
|
streaming,
|
|
2314
|
+
backgroundShell,
|
|
2315
|
+
shellCount,
|
|
1525
2316
|
captured: true,
|
|
1526
2317
|
paneTail: pane.slice(-200),
|
|
1527
2318
|
};
|
|
@@ -1533,6 +2324,137 @@ class CliProcess extends Process {
|
|
|
1533
2324
|
return busy;
|
|
1534
2325
|
}
|
|
1535
2326
|
|
|
2327
|
+
/**
|
|
2328
|
+
* Does this session have a detached background shell running RIGHT NOW — a
|
|
2329
|
+
* `run_in_background` Bash that may have outlived its turn? Thin probe over
|
|
2330
|
+
* probeBusyState's background-shell signal; the stall-watchdog's input.
|
|
2331
|
+
* @returns {Promise<{live:boolean, count:number}>}
|
|
2332
|
+
*/
|
|
2333
|
+
async hasLiveBackgroundWork() {
|
|
2334
|
+
const { backgroundShell, shellCount } = await this.probeBusyState();
|
|
2335
|
+
return { live: backgroundShell, count: shellCount };
|
|
2336
|
+
}
|
|
2337
|
+
|
|
2338
|
+
/**
|
|
2339
|
+
* LRU eviction pin (0.12.0 spec). Cached read of `_bgWorkSince` — the idle bg-work
|
|
2340
|
+
* watchdog state maintained by `_pollBackgroundWork` on the ≤5s pong tick. Non-null ⟺ a
|
|
2341
|
+
* detached background shell has been observed while idle. No time cap: a job that runs for
|
|
2342
|
+
* hours stays pinned (elapsed time can't tell "slow-but-progressing" from "stuck"). Cheap,
|
|
2343
|
+
* sync — safe to call from `_evictLRU`.
|
|
2344
|
+
* @returns {boolean}
|
|
2345
|
+
*/
|
|
2346
|
+
hasActiveBackgroundWork() {
|
|
2347
|
+
return this._bgWorkSince !== null;
|
|
2348
|
+
}
|
|
2349
|
+
|
|
2350
|
+
/**
|
|
2351
|
+
* Resolve the model / effort for a spawn context using the topic→chat→
|
|
2352
|
+
* fallback precedence (mirrors the spawn path). Single source of truth shared
|
|
2353
|
+
* by start() (which records this.model / this.effort) and wouldReloadFor()
|
|
2354
|
+
* (which compares the current config to those spawn-time values).
|
|
2355
|
+
*/
|
|
2356
|
+
_resolveModel(opts) {
|
|
2357
|
+
const topicConfig = opts.threadId && opts.chatConfig?.topics?.[opts.threadId];
|
|
2358
|
+
return topicConfig?.model || opts.chatConfig?.model || opts.model;
|
|
2359
|
+
}
|
|
2360
|
+
|
|
2361
|
+
_resolveEffort(opts) {
|
|
2362
|
+
const topicConfig = opts.threadId && opts.chatConfig?.topics?.[opts.threadId];
|
|
2363
|
+
return topicConfig?.effort || opts.chatConfig?.effort || opts.effort;
|
|
2364
|
+
}
|
|
2365
|
+
|
|
2366
|
+
/**
|
|
2367
|
+
* getOrSpawn calls this before reusing a warm proc. cli can't hot-swap model
|
|
2368
|
+
* or effort (spawn-time flags), so when the resolved config has drifted from
|
|
2369
|
+
* what we spawned with AND we are idle, the proc must be killed + cold-
|
|
2370
|
+
* respawned (--resume keeps the conversation; the new --model / --effort takes
|
|
2371
|
+
* effect). In-flight → false: fold the message into the running turn; the
|
|
2372
|
+
* drift reloads on the next idle dispatch. SDK procs apply model live and do
|
|
2373
|
+
* NOT implement this method, so process-manager only reloads when it exists.
|
|
2374
|
+
* @returns {boolean}
|
|
2375
|
+
*/
|
|
2376
|
+
wouldReloadFor(spawnContext) {
|
|
2377
|
+
if (this.inFlight || this.closed) return false;
|
|
2378
|
+
return this._resolveModel(spawnContext) !== this.model
|
|
2379
|
+
|| this._resolveEffort(spawnContext) !== this.effort;
|
|
2380
|
+
}
|
|
2381
|
+
|
|
2382
|
+
/**
|
|
2383
|
+
* 0.13 D1 (S9): LRU eviction pin — a session blocked on an open `ask` must
|
|
2384
|
+
* not be evicted (the question, and claude's blocked cycle, would die with
|
|
2385
|
+
* it). Belt-and-braces: with D1 the turn stays inFlight through the wait.
|
|
2386
|
+
*/
|
|
2387
|
+
hasOpenQuestions() {
|
|
2388
|
+
return this._openQuestions.size > 0;
|
|
2389
|
+
}
|
|
2390
|
+
|
|
2391
|
+
/**
|
|
2392
|
+
* Stall-watchdog for detached background work (0.12.0 background-work
|
|
2393
|
+
* lifecycle, shumorobot Music 7h frozen-Chrome download). Runs on the
|
|
2394
|
+
* pongWatchdog 5s tick but ONLY while the session is IDLE (pendingTurns===0) —
|
|
2395
|
+
* the mirror of _pollMidTurnDialogs, which only runs DURING turns. When a
|
|
2396
|
+
* `run_in_background` Bash outlives its turn and keeps running while claude is
|
|
2397
|
+
* idle for > bgWorkStallMs, nothing tells the agent or user whether it's
|
|
2398
|
+
* progressing or stuck. One read-only self-check re-invokes the agent to
|
|
2399
|
+
* diagnose — via `fireUserMessage`, NOT `injectUserMessage` (which no-ops when
|
|
2400
|
+
* !inFlight, the exact idle state here). Read-only framing matters: the agent
|
|
2401
|
+
* runs bypassPermissions, so an open-ended "fix it" could background another
|
|
2402
|
+
* hung shell unattended.
|
|
2403
|
+
*
|
|
2404
|
+
* Exactly one self-check per continuous background-work window (capped by
|
|
2405
|
+
* `_bgWorkEscalations`); the window resets only when the shell count returns to
|
|
2406
|
+
* 0. Never throws — swallows its own errors so the pong watchdog stays clean.
|
|
2407
|
+
*/
|
|
2408
|
+
async _pollBackgroundWork() {
|
|
2409
|
+
if (this.closed || !this.bridgeReady) return;
|
|
2410
|
+
// Only watch while idle. An active turn means the agent is engaged
|
|
2411
|
+
// (_pollMidTurnDialogs owns that path). Crucially we do NOT reset the clock
|
|
2412
|
+
// here — the same shell is still running, so the window persists across a
|
|
2413
|
+
// brief self-check turn rather than restarting and re-pinging every window.
|
|
2414
|
+
if (this.pendingTurns.size > 0) return;
|
|
2415
|
+
let live = false;
|
|
2416
|
+
let count = 0;
|
|
2417
|
+
try {
|
|
2418
|
+
({ live, count } = await this.hasLiveBackgroundWork());
|
|
2419
|
+
} catch (err) {
|
|
2420
|
+
this.logger.warn?.(`[${this.label}] channels: bg-work probe failed: ${err.message}`);
|
|
2421
|
+
return;
|
|
2422
|
+
}
|
|
2423
|
+
if (!live) {
|
|
2424
|
+
if (this._bgWorkSince !== null) {
|
|
2425
|
+
this._logEvent('cli-bg-work-cleared', { idle_ms: Date.now() - this._bgWorkSince });
|
|
2426
|
+
// Visibility: tear down the status indicator once work clears.
|
|
2427
|
+
if (this._bgWorkStatusShown) {
|
|
2428
|
+
this.emit('bg-work-status', { state: 'cleared' });
|
|
2429
|
+
this._bgWorkStatusShown = false;
|
|
2430
|
+
}
|
|
2431
|
+
}
|
|
2432
|
+
this._bgWorkSince = null;
|
|
2433
|
+
this._bgWorkEscalations = 0;
|
|
2434
|
+
return;
|
|
2435
|
+
}
|
|
2436
|
+
if (this._bgWorkSince === null) {
|
|
2437
|
+
// First idle observation of a live background shell — start the clock AND
|
|
2438
|
+
// raise the visibility indicator so a long job reads as working, not stuck.
|
|
2439
|
+
this._bgWorkSince = Date.now();
|
|
2440
|
+
this._bgWorkEscalations = 0;
|
|
2441
|
+
this._logEvent('cli-bg-work-detected', { shell_count: count });
|
|
2442
|
+
this.emit('bg-work-status', { state: 'running', count });
|
|
2443
|
+
this._bgWorkStatusShown = true;
|
|
2444
|
+
return;
|
|
2445
|
+
}
|
|
2446
|
+
const idleMs = Date.now() - this._bgWorkSince;
|
|
2447
|
+
if (idleMs < this.bgWorkStallMs || this._bgWorkEscalations >= 1) return;
|
|
2448
|
+
const mins = Math.max(1, Math.round(idleMs / 60000));
|
|
2449
|
+
const prompt =
|
|
2450
|
+
`⏳ A background job has been running ~${mins} min with no update. `
|
|
2451
|
+
+ `Check its status and report whether it's progressing or stuck. `
|
|
2452
|
+
+ `Do NOT start new work, re-run it, or kill anything — report only.`;
|
|
2453
|
+
const fired = this.fireUserMessage(prompt);
|
|
2454
|
+
this._bgWorkEscalations = 1;
|
|
2455
|
+
this._logEvent('cli-bg-work-stall-selfcheck', { idle_ms: idleMs, shell_count: count, fired });
|
|
2456
|
+
}
|
|
2457
|
+
|
|
1536
2458
|
async kill(reason = 'kill') {
|
|
1537
2459
|
if (this.closed) return;
|
|
1538
2460
|
// Parity P19: re-entry guard for concurrent kill() calls. Mirrors
|
|
@@ -1606,6 +2528,22 @@ class CliProcess extends Process {
|
|
|
1606
2528
|
_handleHookEvent(ev) {
|
|
1607
2529
|
if (!ev || typeof ev !== 'object') return;
|
|
1608
2530
|
|
|
2531
|
+
// rc.16 observability: emit once when the FIRST hook event arrives for
|
|
2532
|
+
// this session, confirming the claude→ndjson→tail pipeline is actually
|
|
2533
|
+
// flowing. The 2026-06-02 stuck turn had a session whose hook ndjson was
|
|
2534
|
+
// 0 bytes — claude emitted no hooks polygram could see, so no Stop ever
|
|
2535
|
+
// arrived to finalize the turn. Without this signal that's invisible: a
|
|
2536
|
+
// turn that hangs with NO `cli-hook-stream-live` for its session means the
|
|
2537
|
+
// hook pipeline is dead for it (distinct from "Stop fired but wasn't
|
|
2538
|
+
// acted on", which `cli-turn-resolved-by-stop` now covers).
|
|
2539
|
+
if (!this._sawHookStream) {
|
|
2540
|
+
this._sawHookStream = true;
|
|
2541
|
+
this._logEvent('cli-hook-stream-live', {
|
|
2542
|
+
session_id: this.claudeSessionId,
|
|
2543
|
+
first_event: ev.type,
|
|
2544
|
+
});
|
|
2545
|
+
}
|
|
2546
|
+
|
|
1609
2547
|
// 0.12 Phase 1.8 (Finding 0.4.A): per-event lag measurement.
|
|
1610
2548
|
// polygram_received_at_ms is stamped by the helper subprocess at write
|
|
1611
2549
|
// time; subtracting from Date.now() gives the helper-write → tail-emit
|
|
@@ -1627,11 +2565,57 @@ class CliProcess extends Process {
|
|
|
1627
2565
|
});
|
|
1628
2566
|
}
|
|
1629
2567
|
|
|
2568
|
+
// 0.13 D1: every hook event is same-session ACTIVITY for the finalizer
|
|
2569
|
+
// ladder (generalizes the 2026-06-08 WA-topic fix, which only extended on
|
|
2570
|
+
// Pre/PostToolUse) — EXCEPT Stop, which is a terminal signal, not work:
|
|
2571
|
+
// noting it as activity would cancel its own attribution grace. parse-error
|
|
2572
|
+
// and unknown are excluded too (stream noise is not evidence of work).
|
|
2573
|
+
if (ev.type === 'Stop') {
|
|
2574
|
+
this._lastHookEventAt = Date.now();
|
|
2575
|
+
} else if (ev.type && ev.type !== 'parse-error' && ev.type !== 'unknown') {
|
|
2576
|
+
this._lastHookEventAt = Date.now();
|
|
2577
|
+
this._noteActivity(`hook:${ev.type}`);
|
|
2578
|
+
}
|
|
2579
|
+
|
|
1630
2580
|
switch (ev.type) {
|
|
1631
2581
|
case 'UserPromptSubmit':
|
|
2582
|
+
// 0.13 D1 seen-slice: the UPS prompt carries the bridge-authored
|
|
2583
|
+
// <channel turn_id="…"> envelope (P0 spike Q1) — parse it (anchored on
|
|
2584
|
+
// the raw tag prefix, see UPS_ENVELOPE_TURN_ID_RE) and mark the
|
|
2585
|
+
// matching pending as picked-up. `seen` is what lets rung 1 tell this
|
|
2586
|
+
// cycle's Stop from a foreign cycle's. Never log prompt content (L13).
|
|
2587
|
+
let anchorMsgId = null;
|
|
2588
|
+
if (typeof ev.prompt === 'string' && ev.prompt) {
|
|
2589
|
+
for (const m of ev.prompt.matchAll(UPS_ENVELOPE_TURN_ID_RE)) {
|
|
2590
|
+
const seenPending = this.pendingTurns.get(m[1]);
|
|
2591
|
+
if (seenPending && seenPending.seen !== true) {
|
|
2592
|
+
seenPending.seen = true;
|
|
2593
|
+
this._logEvent('cli-ups-seen', { turn_id: m[1] });
|
|
2594
|
+
}
|
|
2595
|
+
// 0.13 D2: pickup transitions the ledger entry too — for injected
|
|
2596
|
+
// (no-pending) inputs this is THE fold/next-cycle signal that
|
|
2597
|
+
// cancels drop detection; for primaries it cancels the delivery
|
|
2598
|
+
// watchdog. A late pickup (queued inject becoming the next cycle)
|
|
2599
|
+
// landing inside the drop-confirm window cancels it here.
|
|
2600
|
+
const lEntry = this.inputLedger.get(m[1]);
|
|
2601
|
+
if (lEntry) {
|
|
2602
|
+
if (lEntry.state === 'written' || lEntry.state === 'fold-suspected') {
|
|
2603
|
+
this._ledgerTransition(m[1], 'seen');
|
|
2604
|
+
if (!seenPending) this._logEvent('cli-ups-seen', { turn_id: m[1] });
|
|
2605
|
+
}
|
|
2606
|
+
// 0.13 D3: the picked-up message anchors the cycle's visuals.
|
|
2607
|
+
if (anchorMsgId == null && lEntry.msgId != null) anchorMsgId = lEntry.msgId;
|
|
2608
|
+
}
|
|
2609
|
+
}
|
|
2610
|
+
}
|
|
1632
2611
|
this.emit('turn-start', {
|
|
1633
2612
|
backend: this.backend,
|
|
1634
2613
|
sessionId: this.claudeSessionId,
|
|
2614
|
+
// 0.13 D3: lets the session feedback controller distinguish a
|
|
2615
|
+
// normal turn (has pending — per-turn visuals own it) from an
|
|
2616
|
+
// autonomous/injected cycle (no pending — the controller's job).
|
|
2617
|
+
hasPending: this.pendingTurns.size > 0,
|
|
2618
|
+
anchorMsgId,
|
|
1635
2619
|
});
|
|
1636
2620
|
return;
|
|
1637
2621
|
|
|
@@ -1714,15 +2698,94 @@ class CliProcess extends Process {
|
|
|
1714
2698
|
return;
|
|
1715
2699
|
}
|
|
1716
2700
|
|
|
1717
|
-
case 'Stop':
|
|
1718
|
-
//
|
|
1719
|
-
//
|
|
1720
|
-
//
|
|
1721
|
-
|
|
2701
|
+
case 'Stop': {
|
|
2702
|
+
// 0.13 D1 rung 1: Stop ends the turn ONLY when the ending cycle is
|
|
2703
|
+
// attributable to it. Stop carries no turn_id, and claude-side cycles
|
|
2704
|
+
// polygram never registered a pending for are routine (/compact +
|
|
2705
|
+
// bg-work self-checks via fireUserMessage, ScheduleWakeup cycles, a
|
|
2706
|
+
// non-folded inject running as its own cycle — the P0 spike confirmed
|
|
2707
|
+
// such cycles DO fire Stop). Pre-D1 the rc.16 branch finalized the
|
|
2708
|
+
// single pending on ANY Stop — a foreign cycle's Stop could close a
|
|
2709
|
+
// queued, never-picked-up user turn and deliver the FOREIGN cycle's
|
|
2710
|
+
// last_assistant_message as its answer (seam S5's Stop-identity gap).
|
|
2711
|
+
const info = {
|
|
1722
2712
|
stopHookActive: ev.stopHookActive,
|
|
1723
2713
|
lastAssistantMessage: ev.lastAssistantMessage,
|
|
1724
2714
|
backend: this.backend,
|
|
1725
|
-
}
|
|
2715
|
+
};
|
|
2716
|
+
// Legacy (rung 3) turns already resolving via a reply quiet-window
|
|
2717
|
+
// consume this via their per-turn onStop listener (the text-fallback
|
|
2718
|
+
// rescue inside _resolveTurn). Emit first so that path runs
|
|
2719
|
+
// synchronously before the attribution branch below.
|
|
2720
|
+
this.emit('stop-hook', info);
|
|
2721
|
+
|
|
2722
|
+
// A stop-hook-forced continuation means the cycle is, by definition,
|
|
2723
|
+
// NOT over — never finalize on it. (Unobserved in 30d of prod data;
|
|
2724
|
+
// cheap insurance per the design's round-2 review.)
|
|
2725
|
+
if (ev.stopHookActive === true) {
|
|
2726
|
+
this._logEvent('cli-stop-hook-active-ignored', { pending_count: this.pendingTurns.size });
|
|
2727
|
+
return;
|
|
2728
|
+
}
|
|
2729
|
+
|
|
2730
|
+
if (this.pendingTurns.size === 1) {
|
|
2731
|
+
const [turnId, p] = [...this.pendingTurns.entries()][0];
|
|
2732
|
+
if (!p._stopGracePending) {
|
|
2733
|
+
const attributed = p.seen === true || (p.replies?.length || 0) > 0;
|
|
2734
|
+
if (attributed) {
|
|
2735
|
+
// Finalize through a short grace; any same-session activity
|
|
2736
|
+
// inside it proves this Stop was stale/foreign (lagged ndjson
|
|
2737
|
+
// delivery) and cancels — the turn falls back to rung 2.
|
|
2738
|
+
this._beginAttributedStopGrace(turnId, p, info);
|
|
2739
|
+
} else {
|
|
2740
|
+
// Never picked up (no UPS-seen) and never replied — this Stop
|
|
2741
|
+
// belongs to a foreign cycle. Ignore it loudly; the pending
|
|
2742
|
+
// ends via its own pickup→Stop, rung 2, or the ceilings.
|
|
2743
|
+
this._logEvent('cli-stop-foreign', {
|
|
2744
|
+
turn_id: turnId,
|
|
2745
|
+
session_id: this.claudeSessionId,
|
|
2746
|
+
});
|
|
2747
|
+
}
|
|
2748
|
+
}
|
|
2749
|
+
} else if (this.pendingTurns.size > 1) {
|
|
2750
|
+
// Can't attribute Stop to one of several concurrent turns — surface
|
|
2751
|
+
// it so a turn that waited for its grace timer (instead of resolving
|
|
2752
|
+
// on Stop) is explained in the events DB.
|
|
2753
|
+
this._logEvent('cli-stop-unattributed', { pending_count: this.pendingTurns.size });
|
|
2754
|
+
}
|
|
2755
|
+
|
|
2756
|
+
// 0.12.0-rc.13 proactive compaction warning: on turn-end, if enabled
|
|
2757
|
+
// for this chat and not already warned this climb, sample context
|
|
2758
|
+
// occupancy from the transcript and warn (propose /compact) BEFORE
|
|
2759
|
+
// claude auto-compacts mid-turn and detaches the bridge. Fire-and-
|
|
2760
|
+
// forget — transcript IO must never block the stop path.
|
|
2761
|
+
if (this.compactionWarn?.enabled && !this._compactionWarned && ev.transcriptPath) {
|
|
2762
|
+
this._maybeProactiveCompactionWarn(ev.transcriptPath);
|
|
2763
|
+
}
|
|
2764
|
+
return;
|
|
2765
|
+
}
|
|
2766
|
+
|
|
2767
|
+
case 'PreCompact':
|
|
2768
|
+
// 0.12.0-rc.13: auto-compaction is the event that detaches the
|
|
2769
|
+
// channels MCP bridge mid-turn. Record it; and on the dangerous AUTO
|
|
2770
|
+
// case (manual /compact is the user's own deliberate action — never
|
|
2771
|
+
// nag), emit a reactive warning the chat layer posts. The proactive
|
|
2772
|
+
// warning (on Stop) tries to PREVENT this; this is the backstop.
|
|
2773
|
+
this._logEvent('cli-compaction-imminent', { trigger: ev.trigger });
|
|
2774
|
+
if (this.compactionWarn?.enabled && ev.trigger === 'auto') {
|
|
2775
|
+
this.emit('compaction-warn', {
|
|
2776
|
+
kind: 'reactive',
|
|
2777
|
+
trigger: 'auto',
|
|
2778
|
+
sessionId: this.claudeSessionId,
|
|
2779
|
+
backend: this.backend,
|
|
2780
|
+
});
|
|
2781
|
+
}
|
|
2782
|
+
return;
|
|
2783
|
+
|
|
2784
|
+
case 'PostCompact':
|
|
2785
|
+
// Context just dropped — re-arm the proactive warn-once so the next
|
|
2786
|
+
// climb can warn again.
|
|
2787
|
+
this._compactionWarned = false;
|
|
2788
|
+
this._logEvent('cli-compaction-done', { trigger: ev.trigger });
|
|
1726
2789
|
return;
|
|
1727
2790
|
|
|
1728
2791
|
case 'Notification':
|
|
@@ -1761,15 +2824,22 @@ class CliProcess extends Process {
|
|
|
1761
2824
|
{
|
|
1762
2825
|
const requestId = ev.toolUseId || `hook-notification-${Date.now()}`;
|
|
1763
2826
|
const toolName = ev.toolName;
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
|
|
1772
|
-
|
|
2827
|
+
// Finding #11 fix: pass the STRUCTURED tool_input through. makeCanUseTool
|
|
2828
|
+
// matches gated patterns via matchesAnyPattern, which reads
|
|
2829
|
+
// input.command (Bash) / input.url (WebFetch) — a formatted STRING
|
|
2830
|
+
// makes those undefined so a gated `Bash(rm *)` never matches and the
|
|
2831
|
+
// tool is allowed with NO approval card (silent gating bypass). The
|
|
2832
|
+
// hook Notification payload carries structured tool_input, so forward
|
|
2833
|
+
// it as-is; the approval card (approvalCardText) renders a structured
|
|
2834
|
+
// object fine — same shape the SDK canUseTool path already uses. Fall
|
|
2835
|
+
// back to the formatted-string preview only if claude sent no
|
|
2836
|
+
// structured tool_input (degenerate — tool needs perm but no input).
|
|
2837
|
+
const toolInput = (ev.toolInput && typeof ev.toolInput === 'object')
|
|
2838
|
+
? ev.toolInput
|
|
2839
|
+
: this._formatToolInputForApproval(
|
|
2840
|
+
ev.prompt || null,
|
|
2841
|
+
typeof ev.toolInput === 'string' ? ev.toolInput : JSON.stringify(ev.toolInput || {}),
|
|
2842
|
+
);
|
|
1773
2843
|
this.emit('approval-required', {
|
|
1774
2844
|
id: requestId,
|
|
1775
2845
|
toolName,
|
|
@@ -1843,11 +2913,11 @@ class CliProcess extends Process {
|
|
|
1843
2913
|
* landing just before the disconnect would otherwise leave a stray
|
|
1844
2914
|
* timer on the dead instance).
|
|
1845
2915
|
*/
|
|
1846
|
-
_handleBridgeDisconnected() {
|
|
2916
|
+
_handleBridgeDisconnected(reason = 'socket-close') {
|
|
1847
2917
|
this.bridgeReady = false;
|
|
1848
2918
|
this.mcpReady = false;
|
|
1849
2919
|
if (this.closed) return;
|
|
1850
|
-
this.logger.warn?.(`[${this.label}] channels: bridge disconnected unexpectedly`);
|
|
2920
|
+
this.logger.warn?.(`[${this.label}] channels: bridge disconnected unexpectedly (${reason})`);
|
|
1851
2921
|
// L6: clear the interrupt grace timer alongside the rest of the lifecycle.
|
|
1852
2922
|
if (this._interruptGraceTimer) {
|
|
1853
2923
|
clearTimeout(this._interruptGraceTimer);
|
|
@@ -1859,6 +2929,7 @@ class CliProcess extends Process {
|
|
|
1859
2929
|
if (pending.hardTimer) clearTimeout(pending.hardTimer);
|
|
1860
2930
|
if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
|
|
1861
2931
|
if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer);
|
|
2932
|
+
if (pending._activityQuietTimer) clearTimeout(pending._activityQuietTimer); // 0.13 D1
|
|
1862
2933
|
// L5: remove the per-turn stop-hook listener (this path bypasses
|
|
1863
2934
|
// Process.kill()'s removeAllListeners).
|
|
1864
2935
|
if (pending._onStop) this.off('stop-hook', pending._onStop);
|
|
@@ -1869,14 +2940,24 @@ class CliProcess extends Process {
|
|
|
1869
2940
|
this.pendingTurns.clear();
|
|
1870
2941
|
this.pendingQueue.length = 0;
|
|
1871
2942
|
this.inFlight = false;
|
|
2943
|
+
// 0.12: drop the interactive-question keep-alive here too, for parity with
|
|
2944
|
+
// _doKill — pm reacts to 'bridge-disconnected' by killing us anyway, but don't
|
|
2945
|
+
// depend on that ordering to stop the 60s interval / clear the open set.
|
|
2946
|
+
this._stopQuestionKeepAlive();
|
|
2947
|
+
this._openQuestions.clear();
|
|
2948
|
+
this._clearLedgerTimers(); // 0.13 D2
|
|
1872
2949
|
this.emit('bridge-disconnected');
|
|
1873
|
-
this._logEvent('bridge-disconnected', { reason
|
|
2950
|
+
this._logEvent('bridge-disconnected', { reason });
|
|
1874
2951
|
}
|
|
1875
2952
|
|
|
1876
2953
|
async _doKill(reason) {
|
|
1877
2954
|
this.closed = true;
|
|
1878
2955
|
this.inFlight = false;
|
|
1879
2956
|
|
|
2957
|
+
this._stopQuestionKeepAlive(); // 0.12: drop the interactive-question keep-alive
|
|
2958
|
+
this._openQuestions.clear();
|
|
2959
|
+
this._clearLedgerTimers(); // 0.13 D2
|
|
2960
|
+
|
|
1880
2961
|
if (this.pingTimer) {
|
|
1881
2962
|
clearInterval(this.pingTimer);
|
|
1882
2963
|
this.pingTimer = null;
|
|
@@ -1896,6 +2977,7 @@ class CliProcess extends Process {
|
|
|
1896
2977
|
if (pending.hardTimer) clearTimeout(pending.hardTimer);
|
|
1897
2978
|
if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
|
|
1898
2979
|
if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer);
|
|
2980
|
+
if (pending._activityQuietTimer) clearTimeout(pending._activityQuietTimer); // 0.13 D1
|
|
1899
2981
|
if (pending._onStop) this.off('stop-hook', pending._onStop); // L5
|
|
1900
2982
|
const err = new Error(`session killed: ${reason}`);
|
|
1901
2983
|
err.code = 'KILLED';
|
|
@@ -1995,9 +3077,15 @@ class CliProcess extends Process {
|
|
|
1995
3077
|
* @param {string|number} [opts.msgId] — inbound Telegram msg_id, passed through to the bridge so claude's next reply can echo it via turn_id
|
|
1996
3078
|
* @returns {boolean}
|
|
1997
3079
|
*/
|
|
1998
|
-
injectUserMessage({ content, priority = 'next', shouldQuery, msgId } = {}) {
|
|
3080
|
+
injectUserMessage({ content, priority = 'next', shouldQuery, msgId, source = 'inject' } = {}) {
|
|
1999
3081
|
if (this.closed) return false;
|
|
2000
3082
|
if (!this.inFlight) return false; // base contract: no live turn → caller falls through
|
|
3083
|
+
// C5 (review 2026-06-12): a cancel is in flight (interrupt grace armed) —
|
|
3084
|
+
// inFlight is still true until the grace fires, but merging a follow-up into
|
|
3085
|
+
// work the user just stopped is wrong AND leaks a fresh 'written' ledger
|
|
3086
|
+
// entry the cancel-loop already passed (later re-delivery). Refuse so the
|
|
3087
|
+
// caller queues it as a fresh primary turn instead.
|
|
3088
|
+
if (this._interruptGraceTimer) return false;
|
|
2001
3089
|
if (!this.bridgeReady) return false;
|
|
2002
3090
|
if (typeof content !== 'string' || !content) return false;
|
|
2003
3091
|
|
|
@@ -2026,9 +3114,14 @@ class CliProcess extends Process {
|
|
|
2026
3114
|
this.emit('inject-fail', { err: 'bridge write failed', source: 'inject' });
|
|
2027
3115
|
return false;
|
|
2028
3116
|
}
|
|
3117
|
+
// 0.13 D2: the injected turn_id is LEDGERED — pre-P3 it never escaped this
|
|
3118
|
+
// function, making fold/new-turn/drop indistinguishable (seam S4).
|
|
3119
|
+
this._ledgerAdd(turnId, { source, msgId });
|
|
2029
3120
|
this._logEvent('inject-user-message', {
|
|
2030
3121
|
session_key: this.sessionKey,
|
|
2031
3122
|
chat_id: this.chatId,
|
|
3123
|
+
turn_id: turnId,
|
|
3124
|
+
source,
|
|
2032
3125
|
priority: priority ?? null,
|
|
2033
3126
|
should_query: shouldQuery ?? null,
|
|
2034
3127
|
text_len: safeContent.length,
|
|
@@ -2045,7 +3138,8 @@ class CliProcess extends Process {
|
|
|
2045
3138
|
|
|
2046
3139
|
/**
|
|
2047
3140
|
* Review AC7: fire-and-forget user-message into the bridge. Polygram's
|
|
2048
|
-
*
|
|
3141
|
+
* /compact path, the boot-time compact-replay, and the bg-work stall
|
|
3142
|
+
* self-check use this to push a user-shaped
|
|
2049
3143
|
* prompt without registering a pending turn. SDK/tmux implement this
|
|
2050
3144
|
* differently per backend; channels just writes a user_msg to the bridge
|
|
2051
3145
|
* with a fresh turn_id (which has no listener — so any reply Claude sends
|
|
@@ -2059,6 +3153,7 @@ class CliProcess extends Process {
|
|
|
2059
3153
|
if (typeof text !== 'string' || text.length === 0) return false;
|
|
2060
3154
|
if (this.closed || !this.bridgeReady) return false;
|
|
2061
3155
|
const turnId = crypto.randomUUID();
|
|
3156
|
+
this._ledgerAdd(turnId, { source: 'system' }); // 0.13 D2: visible, never redelivered
|
|
2062
3157
|
this._writeToBridge({
|
|
2063
3158
|
kind: 'user_msg',
|
|
2064
3159
|
turn_id: turnId,
|
|
@@ -2092,6 +3187,7 @@ class CliProcess extends Process {
|
|
|
2092
3187
|
if (pending.hardTimer) clearTimeout(pending.hardTimer);
|
|
2093
3188
|
if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
|
|
2094
3189
|
if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer); // L5
|
|
3190
|
+
if (pending._activityQuietTimer) clearTimeout(pending._activityQuietTimer); // 0.13 D1
|
|
2095
3191
|
if (pending._onStop) this.off('stop-hook', pending._onStop); // L5
|
|
2096
3192
|
const err = new Error(`session reset: ${reason}`);
|
|
2097
3193
|
err.code = 'RESET';
|
|
@@ -2237,6 +3333,49 @@ class CliProcess extends Process {
|
|
|
2237
3333
|
this._writeToBridge({ kind: 'perm_verdict', request_id: requestId, behavior });
|
|
2238
3334
|
}
|
|
2239
3335
|
|
|
3336
|
+
// ─── interactive questions (0.12 ask) ─────────────────────────────
|
|
3337
|
+
|
|
3338
|
+
/**
|
|
3339
|
+
* Hand a question's answer back to the blocking `ask` tool call. `result` is
|
|
3340
|
+
* {answers:[...]} | {cancelled:true} | {timedout:true}. Stops the keep-alive
|
|
3341
|
+
* once no questions remain open. Called by pm.answerQuestion (from the handler).
|
|
3342
|
+
*/
|
|
3343
|
+
writeQuestionAnswer(toolCallId, result) {
|
|
3344
|
+
this._openQuestions.delete(toolCallId);
|
|
3345
|
+
const noneLeft = this._openQuestions.size === 0;
|
|
3346
|
+
if (noneLeft) this._stopQuestionKeepAlive();
|
|
3347
|
+
const wrote = this._writeToBridge({ kind: 'question_answer', tool_call_id: toolCallId, result: result ?? {} });
|
|
3348
|
+
// Re-light progress: claude is about to resume working on the answer. The per-turn reactor
|
|
3349
|
+
// cleared when claude posted its reply + asked, and no tool hooks fired during the wait, so
|
|
3350
|
+
// it stayed cleared — the post-answer work was invisible ("why don't I see it working after
|
|
3351
|
+
// submit?", hire topic 2026-06-09). On a REAL answer (cancelled/timeout END the turn → let
|
|
3352
|
+
// the normal teardown clear), signal polygram to re-arm the turn's working reaction.
|
|
3353
|
+
if (noneLeft && result && !result.cancelled && !result.timedout) {
|
|
3354
|
+
this.emit('question-resumed');
|
|
3355
|
+
}
|
|
3356
|
+
// 0.13 D1: the wait is over either way — restart the activity clock so a
|
|
3357
|
+
// replied turn's rung-2 finalize resumes (real answer: claude works on;
|
|
3358
|
+
// cancelled/timedout: claude wraps up — rung 2 then ends the tail cleanly).
|
|
3359
|
+
if (noneLeft) this._noteActivity('question-answered');
|
|
3360
|
+
return wrote;
|
|
3361
|
+
}
|
|
3362
|
+
|
|
3363
|
+
_startQuestionKeepAlive() {
|
|
3364
|
+
if (this._questionKeepAliveTimer) return;
|
|
3365
|
+
this._questionKeepAliveTimer = setInterval(() => {
|
|
3366
|
+
if (this._openQuestions.size === 0) { this._stopQuestionKeepAlive(); return; }
|
|
3367
|
+
// claude is idle waiting on the answer → no tool hooks → reset the idle
|
|
3368
|
+
// ceiling so the turn isn't killed mid-question. (Rung 2 is suspended
|
|
3369
|
+
// while a question is open, so this only feeds the hardTimer.)
|
|
3370
|
+
this._noteActivity('question-keepalive');
|
|
3371
|
+
}, 60_000);
|
|
3372
|
+
this._questionKeepAliveTimer.unref?.();
|
|
3373
|
+
}
|
|
3374
|
+
|
|
3375
|
+
_stopQuestionKeepAlive() {
|
|
3376
|
+
if (this._questionKeepAliveTimer) { clearInterval(this._questionKeepAliveTimer); this._questionKeepAliveTimer = null; }
|
|
3377
|
+
}
|
|
3378
|
+
|
|
2240
3379
|
// ─── socket plumbing ──────────────────────────────────────────────
|
|
2241
3380
|
|
|
2242
3381
|
_writeToBridge(obj) {
|
|
@@ -2280,6 +3419,11 @@ class CliProcess extends Process {
|
|
|
2280
3419
|
this._pollMidTurnDialogs().catch((err) => {
|
|
2281
3420
|
this.logger.warn?.(`[${this.label}] channels: mid-turn poll failed: ${err.message}`);
|
|
2282
3421
|
});
|
|
3422
|
+
// 0.12.0 background-work lifecycle: idle-side stall-watchdog, the mirror of
|
|
3423
|
+
// _pollMidTurnDialogs (which only runs during turns). Fire-and-forget.
|
|
3424
|
+
this._pollBackgroundWork().catch((err) => {
|
|
3425
|
+
this.logger.warn?.(`[${this.label}] channels: bg-work poll failed: ${err.message}`);
|
|
3426
|
+
});
|
|
2283
3427
|
}, PONG_CHECK_INTERVAL_MS);
|
|
2284
3428
|
this.pongWatchdog.unref?.();
|
|
2285
3429
|
}
|
|
@@ -2305,9 +3449,46 @@ class CliProcess extends Process {
|
|
|
2305
3449
|
* Extracted as a separate async method so unit tests can drive it
|
|
2306
3450
|
* directly without waiting for the setInterval tick.
|
|
2307
3451
|
*/
|
|
3452
|
+
/**
|
|
3453
|
+
* 0.12.0-rc.13: proactive compaction warning. Read the transcript's current
|
|
3454
|
+
* context occupancy and, if past the per-chat threshold, emit a
|
|
3455
|
+
* 'compaction-warn' the chat layer turns into "you're ~N% full, run
|
|
3456
|
+
* /compact" — giving the user a window to compact on their terms BEFORE
|
|
3457
|
+
* claude auto-compacts mid-turn (which detaches the channels bridge). Warns
|
|
3458
|
+
* once per climb (this._compactionWarned), re-armed on PostCompact.
|
|
3459
|
+
* Fire-and-forget: swallows its own errors so transcript IO never breaks
|
|
3460
|
+
* the turn-end path.
|
|
3461
|
+
*/
|
|
3462
|
+
async _maybeProactiveCompactionWarn(transcriptPath) {
|
|
3463
|
+
try {
|
|
3464
|
+
if (!this.compactionWarn?.enabled || this._compactionWarned) return;
|
|
3465
|
+
const usage = await readContextTokens(transcriptPath);
|
|
3466
|
+
if (!usage) return;
|
|
3467
|
+
const pct = contextPct(usage.total) * 100;
|
|
3468
|
+
if (pct < this.compactionWarn.thresholdPct) return;
|
|
3469
|
+
if (this._compactionWarned) return; // re-check after the async gap
|
|
3470
|
+
this._compactionWarned = true;
|
|
3471
|
+
this.emit('compaction-warn', {
|
|
3472
|
+
kind: 'proactive',
|
|
3473
|
+
pct: Math.round(pct),
|
|
3474
|
+
totalTokens: usage.total,
|
|
3475
|
+
sessionId: this.claudeSessionId,
|
|
3476
|
+
backend: this.backend,
|
|
3477
|
+
});
|
|
3478
|
+
} catch (err) {
|
|
3479
|
+
this.logger.warn?.(`[${this.label}] compaction-warn sample failed: ${err.message}`);
|
|
3480
|
+
}
|
|
3481
|
+
}
|
|
3482
|
+
|
|
2308
3483
|
async _pollMidTurnDialogs() {
|
|
2309
3484
|
if (this.closed) return;
|
|
2310
3485
|
if (this.pendingTurns.size === 0) return; // no work to do when idle
|
|
3486
|
+
// 0.12 interactive questions: while an `ask` is open claude sits idle at the
|
|
3487
|
+
// prompt waiting on the tool result — so the pane shows no "esc to interrupt"
|
|
3488
|
+
// and the question's own echoed text (a "?"/numbered list/"Yes/No") would
|
|
3489
|
+
// false-trip the unknown-prompt heuristic + starve the STALL heartbeat. The
|
|
3490
|
+
// keyboard lives on Telegram; suppress the pane watchdog while a question is open.
|
|
3491
|
+
if (this._openQuestions.size > 0) return;
|
|
2311
3492
|
if (!this.tmuxSession) return; // pre-spawn / post-kill
|
|
2312
3493
|
if (typeof this.runner?.captureWide !== 'function') return;
|
|
2313
3494
|
|
|
@@ -2323,6 +3504,15 @@ class CliProcess extends Process {
|
|
|
2323
3504
|
}
|
|
2324
3505
|
if (!pane) return;
|
|
2325
3506
|
|
|
3507
|
+
// rc.14: removed the rc.11 pane-based "dead bridge" detection here. It
|
|
3508
|
+
// matched the BENIGN banner "server:polygram-bridge no MCP server
|
|
3509
|
+
// configured with that name" — a cosmetic line that
|
|
3510
|
+
// `--dangerously-load-development-channels` + `--strict-mcp-config` prints
|
|
3511
|
+
// on EVERY healthy session (channel still delivers; reply tool still
|
|
3512
|
+
// works). The matcher false-fired ~5s into every channels turn and killed
|
|
3513
|
+
// healthy sessions. Real bridge loss is the socket-close path
|
|
3514
|
+
// (_handleBridgeDisconnected), not anything observable in the pane.
|
|
3515
|
+
|
|
2326
3516
|
const now = Date.now();
|
|
2327
3517
|
|
|
2328
3518
|
// 0.12 Phase 3.2: liveness heartbeat. The TUI prints "esc to interrupt"
|
|
@@ -2333,6 +3523,11 @@ class CliProcess extends Process {
|
|
|
2333
3523
|
// resets a timer; safe to fire on every poll while claude is busy.
|
|
2334
3524
|
if (STREAMING_HINT_RE.test(pane)) {
|
|
2335
3525
|
this.emit('thinking');
|
|
3526
|
+
// 0.13 D1: the pane heartbeat is ACTIVITY for the finalizer ladder —
|
|
3527
|
+
// pure-thinking stretches fire ZERO hooks for 45s+ (that is this
|
|
3528
|
+
// heartbeat's whole reason to exist), so a hook-only quiet clock would
|
|
3529
|
+
// finalize a replied turn mid-thought (round-2 panel finding).
|
|
3530
|
+
this._noteActivity('pane-thinking');
|
|
2336
3531
|
}
|
|
2337
3532
|
|
|
2338
3533
|
let matchedKnownPrompt = false;
|
|
@@ -2359,16 +3554,28 @@ class CliProcess extends Process {
|
|
|
2359
3554
|
pending_count: this.pendingTurns.size,
|
|
2360
3555
|
});
|
|
2361
3556
|
|
|
2362
|
-
if (prompt.action === 'enter') {
|
|
2363
|
-
|
|
2364
|
-
|
|
2365
|
-
|
|
2366
|
-
|
|
2367
|
-
|
|
2368
|
-
|
|
3557
|
+
if (prompt.action === 'enter' || prompt.action === 'keys') {
|
|
3558
|
+
// 'keys' sends a navigation sequence (e.g. Down,Enter to pick a
|
|
3559
|
+
// non-default dialog option); 'enter' stays the single-key dismissal.
|
|
3560
|
+
const keySeq = prompt.action === 'keys' ? prompt.keys : ['Enter'];
|
|
3561
|
+
for (let ki = 0; ki < keySeq.length; ki++) {
|
|
3562
|
+
if (ki > 0) await new Promise(r => setTimeout(r, 120)); // Ink can swallow same-batch keys
|
|
3563
|
+
try {
|
|
3564
|
+
await this.runner.sendControl(this.tmuxSession, keySeq[ki]);
|
|
3565
|
+
} catch (err) {
|
|
3566
|
+
this.logger.warn?.(
|
|
3567
|
+
`[${this.label}] cli: mid-turn ${keySeq[ki]} failed for ${prompt.name}: ${err.message}`,
|
|
3568
|
+
);
|
|
3569
|
+
}
|
|
2369
3570
|
}
|
|
2370
3571
|
}
|
|
2371
3572
|
// 'emit-only': telemetry-only; operator decides next step.
|
|
3573
|
+
// Resume-dialog fix: the session-age dialog escaping to MID-TURN means
|
|
3574
|
+
// env suppression failed AND the startup gate didn't see it — same
|
|
3575
|
+
// soak-queryable event kind as the startup-gate fallback.
|
|
3576
|
+
if (prompt.name === 'session-age') {
|
|
3577
|
+
this._logEvent('session-age-dialog-fallback', { tmux_name: this.tmuxSession, phase: 'mid-turn' });
|
|
3578
|
+
}
|
|
2372
3579
|
}
|
|
2373
3580
|
|
|
2374
3581
|
// 0.12 Phase 3.3 (Q1 resolution): unknown-prompt heuristic. If the pane
|