polygram 0.12.0-rc.2 → 0.12.0-rc.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -416,28 +416,7 @@ class CliProcess extends Process {
416
416
 
417
417
  this.bridgeServer.on('bridge-message', msg => this._handleBridgeMessage(msg));
418
418
 
419
- this.bridgeServer.on('bridge-disconnected', () => {
420
- this.bridgeReady = false;
421
- this.mcpReady = false;
422
- if (!this.closed) {
423
- this.logger.warn?.(`[${this.label}] channels: bridge disconnected unexpectedly`);
424
- // P1 #5: drain pendingTurns immediately so hardTimers don't run 10min.
425
- for (const [, pending] of this.pendingTurns) {
426
- if (pending.quietTimer) clearTimeout(pending.quietTimer);
427
- if (pending.hardTimer) clearTimeout(pending.hardTimer);
428
- if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
429
- if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer);
430
- const err = new Error('bridge disconnected');
431
- err.code = 'BRIDGE_DISCONNECTED';
432
- try { pending.reject(err); } catch {}
433
- }
434
- this.pendingTurns.clear();
435
- this.pendingQueue.length = 0;
436
- this.inFlight = false;
437
- this.emit('bridge-disconnected');
438
- this._logEvent('bridge-disconnected', { reason: 'socket-close' });
439
- }
440
- });
419
+ this.bridgeServer.on('bridge-disconnected', () => this._handleBridgeDisconnected());
441
420
 
442
421
  await this.bridgeServer.listen();
443
422
  }
@@ -540,6 +519,9 @@ class CliProcess extends Process {
540
519
  );
541
520
  }
542
521
  }
522
+ // Finding 0.12-M2: record the resume decision so _armHookTail (run
523
+ // after spawn) skips the prior session's still-on-disk hook ndjson.
524
+ this._resumedSession = canResume;
543
525
  if (agent) claudeArgs.push('--agent', agent);
544
526
  if (model) claudeArgs.unshift('--model', model);
545
527
  if (effort) claudeArgs.push('--effort', effort);
@@ -705,6 +687,15 @@ class CliProcess extends Process {
705
687
  ],
706
688
  readySignal: /Listening for channel messages from: server:polygram-bridge/i,
707
689
  timeoutCode: 'CHANNELS_DIALOG_TIMEOUT',
690
+ // Progress-aware gate (shumorobot General incident 2026-05-30): a
691
+ // cold spawn that's mid-download (runtime fetch, "24%" progress bar)
692
+ // is genuinely working and must NOT be killed by the blind 30s
693
+ // wall-clock. stallMs fails fast only when the pane is FROZEN; an
694
+ // actively-changing pane (download bar, dialog nav) keeps resetting
695
+ // the stall clock and rides out to the ready signal. deadlineMs stays
696
+ // the absolute backstop. 30s of zero pane activity = genuinely wedged.
697
+ stallMs: this.startupGateStallMs ?? 30_000,
698
+ deadlineMs: this.startupGateDeadlineMs ?? 180_000,
708
699
  logger: this.logger,
709
700
  label: `${this.label}:startup-gate`,
710
701
  });
@@ -849,15 +840,18 @@ class CliProcess extends Process {
849
840
  // rate-limit / chat-id-mismatch path. Live shumorobot 2026-05-26 23:44
850
841
  // observed 3+ "Called polygram-bridge" entries in the TUI pane with
851
842
  // ZERO OUT messages delivered to TG and zero warn-level diagnostics —
852
- // need to see args.text / args.chat_id / args.turn_id to know whether
853
- // claude is calling reply with empty text, wrong chat_id, or something
854
- // else entirely.
855
- this.logger.warn?.(
843
+ // need to see args.chat_id / args.turn_id to know whether claude is
844
+ // calling reply with empty text, wrong chat_id, or something else.
845
+ // L13: root-caused — demoted to debug and DROPPED text_head. Logging
846
+ // the first 80 chars of every reply at warn level leaked private chat
847
+ // content / file excerpts / secrets into the default log sink,
848
+ // unconditionally. name/chat_id/turn_id/text_len diagnose dispatch
849
+ // without exposing message content.
850
+ this.logger.debug?.(
856
851
  `[${this.label}] channels: tool-call name=${msg.name} ` +
857
852
  `chat_id=${JSON.stringify(args.chat_id)} ` +
858
853
  `turn_id=${JSON.stringify(args.turn_id)} ` +
859
- `text_len=${typeof args.text === 'string' ? args.text.length : 'non-string'} ` +
860
- `text_head=${JSON.stringify((args.text || '').slice(0, 80))}`,
854
+ `text_len=${typeof args.text === 'string' ? args.text.length : 'non-string'}`,
861
855
  );
862
856
 
863
857
  // Review P1 #7: idempotency. If we've already ACK'd this tool_call_id,
@@ -1122,13 +1116,27 @@ class CliProcess extends Process {
1122
1116
  this._finalizeTurn(turnId);
1123
1117
  };
1124
1118
  const onStop = (info) => {
1125
- // Capture the fallback text; the actual finalize call below will pick
1126
- // it up via pending._stopHookData.
1119
+ // Finding 0.12-M1: the Stop hook carries NO turn_id, and a single
1120
+ // global 'stop-hook' emission fires EVERY per-turn onStop listener.
1121
+ // When more than one turn is in stop-grace we cannot attribute this
1122
+ // Stop (or its last_assistant_message) to a specific turn — the
1123
+ // pre-fix code let one Stop finalize all grace-pending turns and
1124
+ // cross-attribute one turn's text to another (the exact class the
1125
+ // F#3 reply routing prevents). Mirror that drop-rather-than-
1126
+ // misattribute discipline: only consume the Stop when exactly ONE
1127
+ // turn is in grace; otherwise ignore it and let each turn finalize
1128
+ // on its own grace timer (each keeps its own reply text).
1129
+ let graceCount = 0;
1130
+ for (const p of this.pendingTurns.values()) if (p._stopGracePending) graceCount++;
1131
+ if (graceCount !== 1) return;
1127
1132
  pending._stopHookData = info;
1128
1133
  clearTimeout(pending._stopGraceTimer);
1129
1134
  pending._stopGraceTimer = null;
1130
1135
  finalize();
1131
1136
  };
1137
+ // L5: stash the closure so teardown paths that bypass Process.kill()'s
1138
+ // removeAllListeners (bridge-disconnect drain, resetSession) can off it.
1139
+ pending._onStop = onStop;
1132
1140
  pending._stopGraceTimer = setTimeout(finalize, this.stopGraceMs);
1133
1141
  // unref so a never-fired grace doesn't pin the event loop. In tests
1134
1142
  // where a CliProcess is created, send() is called, then the test
@@ -1415,17 +1423,18 @@ class CliProcess extends Process {
1415
1423
  this.logger.warn?.(`[${this.label}] _armHookTail: _hookNdjsonPath unset; hooks disabled. Phase 1.2 may have failed.`);
1416
1424
  return;
1417
1425
  }
1418
- // Fresh spawn: ndjson was just touched by writeHookFiles and is empty,
1419
- // so `skipExisting: false` (default) is correct. For lazy-respawn on
1420
- // existingSessionId, we currently re-run writeHookFiles which touches
1421
- // a NEW file with the same name (overwrite). If we ever switch to
1422
- // resume-without-touch, set skipExisting: true to avoid replaying
1423
- // stale events from the prior process same pattern tmux uses on
1424
- // --resume per rc.42 #5.
1426
+ // Finding 0.12-M2: writeHookFiles opens the ndjson in APPEND mode
1427
+ // ('a') and never truncates, so on a --resume respawn the prior
1428
+ // session's hook lines are still on disk under the same path. Replaying
1429
+ // them re-drives the turn state machine from stale Stop/PreToolUse
1430
+ // events (a stale Stop can finalize the fresh turn). So skip existing
1431
+ // content when (and only when) this is a resumed session the same
1432
+ // discipline the JSONL tail uses on --resume. A fresh spawn's ndjson is
1433
+ // empty, so skipExisting:false is correct there.
1425
1434
  this._hookTail = createHookTail({
1426
1435
  path: this._hookNdjsonPath,
1427
1436
  logger: this.logger,
1428
- skipExisting: false,
1437
+ skipExisting: this._resumedSession === true,
1429
1438
  });
1430
1439
  this._hookTail.on('event', (ev) => {
1431
1440
  try {
@@ -1465,25 +1474,18 @@ class CliProcess extends Process {
1465
1474
  // gates tag-out on median < 2s and p99 < 5s across the events DB.
1466
1475
  if (Number.isFinite(ev.receivedAtMs)) {
1467
1476
  const lagMs = Date.now() - ev.receivedAtMs;
1477
+ // L10: emit ONLY — the onHookLagSample callback owns the DB write
1478
+ // (CALLBACK_TO_EVENT → callbacks.js). Previously this ALSO wrote
1479
+ // directly via this.db.logEvent, double-persisting every sample and
1480
+ // inflating the Phase 1.8 soak-gate row count. Consistent with how
1481
+ // tool-result / subagent-start / subagent-done are handled (emit,
1482
+ // don't double-write).
1468
1483
  this.emit('hook-lag-sample', {
1469
1484
  hookEventName: ev.type,
1470
1485
  lagMs,
1471
1486
  toolName: ev.toolName || null,
1472
1487
  backend: this.backend,
1473
1488
  });
1474
- // Log to events DB if wired. db is optional (factory injects when
1475
- // available) — same pattern as the other parity-P1 _logEvent calls.
1476
- if (this.db?.logEvent) {
1477
- try {
1478
- this.db.logEvent('hook-lag-sample', {
1479
- session_key: this.sessionKey,
1480
- backend: this.backend,
1481
- hook_event_name: ev.type,
1482
- tool_name: ev.toolName || null,
1483
- lag_ms: lagMs,
1484
- });
1485
- } catch {}
1486
- }
1487
1489
  }
1488
1490
 
1489
1491
  switch (ev.type) {
@@ -1503,6 +1505,16 @@ class CliProcess extends Process {
1503
1505
  const subagentType = ev.toolInput?.subagent_type
1504
1506
  || ev.toolInput?.agent_type
1505
1507
  || 'general-purpose';
1508
+ // Finding 0.12-M4: SubagentStop carries agent_id/agent_type but
1509
+ // NOT the originating Agent tool_use_id, so without help the
1510
+ // subagent-start/subagent-done rows share no JOIN key (the
1511
+ // documented soak query on $.tool_use_id returns zero rows).
1512
+ // Track the in-flight Agent tool_use_id keyed by subagent type so
1513
+ // the paired SubagentStop below can stamp it onto subagent-done.
1514
+ (this._pendingSubagentStarts ||= []).push({
1515
+ agentType: subagentType,
1516
+ toolUseId: ev.toolUseId,
1517
+ });
1506
1518
  this.emit('subagent-start', {
1507
1519
  agentType: subagentType,
1508
1520
  // PreToolUse for Agent carries no agent_id (set later on
@@ -1541,14 +1553,27 @@ class CliProcess extends Process {
1541
1553
  });
1542
1554
  return;
1543
1555
 
1544
- case 'SubagentStop':
1556
+ case 'SubagentStop': {
1557
+ // Finding 0.12-M4: recover the originating Agent tool_use_id so the
1558
+ // subagent-start/subagent-done pair is JOINable. Prefer a match on
1559
+ // agent type (correct for parallel subagents of different types);
1560
+ // fall back to the oldest pending start when types don't line up.
1561
+ let subagentToolUseId = null;
1562
+ const pendingStarts = this._pendingSubagentStarts;
1563
+ if (pendingStarts && pendingStarts.length) {
1564
+ let idx = pendingStarts.findIndex(s => s.agentType === ev.agentType);
1565
+ if (idx < 0) idx = 0;
1566
+ subagentToolUseId = pendingStarts.splice(idx, 1)[0]?.toolUseId ?? null;
1567
+ }
1545
1568
  this.emit('subagent-done', {
1546
1569
  agentType: ev.agentType,
1547
1570
  agentId: ev.agentId,
1548
1571
  durationMs: ev.durationMs,
1572
+ toolUseId: subagentToolUseId,
1549
1573
  backend: this.backend,
1550
1574
  });
1551
1575
  return;
1576
+ }
1552
1577
 
1553
1578
  case 'Stop':
1554
1579
  // Phase 1.7 (TODO) will use this as the authoritative turn-end
@@ -1665,6 +1690,50 @@ class CliProcess extends Process {
1665
1690
  }
1666
1691
  }
1667
1692
 
1693
+ /**
1694
+ * Drain on unexpected bridge socket loss (claude crash, bridge crash,
1695
+ * EOF). Extracted from the inline 'bridge-disconnected' handler so the
1696
+ * teardown is testable and consistent with _doKill.
1697
+ *
1698
+ * Findings 0.12-L5 + L6: in addition to clearing the per-turn timers
1699
+ * and rejecting pendings (the original P1 #5 behavior), this now also
1700
+ * (L5) removes each turn's stop-hook listener — this drain does NOT go
1701
+ * through Process.kill()'s blanket removeAllListeners, so a turn torn
1702
+ * down mid-stop-grace would otherwise leak its onStop closure — and
1703
+ * (L6) clears _interruptGraceTimer, matching _doKill (a /stop verdict
1704
+ * landing just before the disconnect would otherwise leave a stray
1705
+ * timer on the dead instance).
1706
+ */
1707
+ _handleBridgeDisconnected() {
1708
+ this.bridgeReady = false;
1709
+ this.mcpReady = false;
1710
+ if (this.closed) return;
1711
+ this.logger.warn?.(`[${this.label}] channels: bridge disconnected unexpectedly`);
1712
+ // L6: clear the interrupt grace timer alongside the rest of the lifecycle.
1713
+ if (this._interruptGraceTimer) {
1714
+ clearTimeout(this._interruptGraceTimer);
1715
+ this._interruptGraceTimer = null;
1716
+ }
1717
+ // P1 #5: drain pendingTurns immediately so hardTimers don't run 10min.
1718
+ for (const [, pending] of this.pendingTurns) {
1719
+ if (pending.quietTimer) clearTimeout(pending.quietTimer);
1720
+ if (pending.hardTimer) clearTimeout(pending.hardTimer);
1721
+ if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
1722
+ if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer);
1723
+ // L5: remove the per-turn stop-hook listener (this path bypasses
1724
+ // Process.kill()'s removeAllListeners).
1725
+ if (pending._onStop) this.off('stop-hook', pending._onStop);
1726
+ const err = new Error('bridge disconnected');
1727
+ err.code = 'BRIDGE_DISCONNECTED';
1728
+ try { pending.reject(err); } catch {}
1729
+ }
1730
+ this.pendingTurns.clear();
1731
+ this.pendingQueue.length = 0;
1732
+ this.inFlight = false;
1733
+ this.emit('bridge-disconnected');
1734
+ this._logEvent('bridge-disconnected', { reason: 'socket-close' });
1735
+ }
1736
+
1668
1737
  async _doKill(reason) {
1669
1738
  this.closed = true;
1670
1739
  this.inFlight = false;
@@ -1688,6 +1757,7 @@ class CliProcess extends Process {
1688
1757
  if (pending.hardTimer) clearTimeout(pending.hardTimer);
1689
1758
  if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
1690
1759
  if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer);
1760
+ if (pending._onStop) this.off('stop-hook', pending._onStop); // L5
1691
1761
  const err = new Error(`session killed: ${reason}`);
1692
1762
  err.code = 'KILLED';
1693
1763
  pending.reject(err);
@@ -1876,6 +1946,8 @@ class CliProcess extends Process {
1876
1946
  if (pending.quietTimer) clearTimeout(pending.quietTimer);
1877
1947
  if (pending.hardTimer) clearTimeout(pending.hardTimer);
1878
1948
  if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
1949
+ if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer); // L5
1950
+ if (pending._onStop) this.off('stop-hook', pending._onStop); // L5
1879
1951
  const err = new Error(`session reset: ${reason}`);
1880
1952
  err.code = 'RESET';
1881
1953
  try { pending.reject(err); } catch {}
@@ -91,10 +91,6 @@ function _maybeWarnR12Migration({ rawPm, canonical, chatId, threadId, chatCfg, t
91
91
  * @param {number} [opts.queryCloseTimeoutMs]
92
92
  * @param {object} [opts.tmuxRunner] — required when ANY chat routes to 'cli'
93
93
  * @param {string} [opts.botName] — required when ANY chat routes to 'cli'
94
- * @param {object} [opts.pollScheduler] — DEPRECATED in 0.12 — was used by the
95
- * removed tmux backend to share one setInterval across all chats; CliProcess's
96
- * per-session pongWatchdog handles its own cadence. Param kept for caller
97
- * back-compat; ignored. Will be removed in 0.13.
98
94
  * @param {Function} [opts.toolDispatcher] — required when ANY chat routes to 'cli'.
99
95
  * async ({sessionKey, chatId, threadId, toolName, text, files}) => {ok, error?}.
100
96
  * Called when Claude's reply (or react/edit_message) tool fires inside a
@@ -113,7 +109,6 @@ function createProcessFactory({
113
109
  queryCloseTimeoutMs,
114
110
  tmuxRunner = null,
115
111
  botName = null,
116
- pollScheduler = null,
117
112
  toolDispatcher = null,
118
113
  channelsClaudeBin = null,
119
114
  } = {}) {
@@ -464,7 +464,10 @@ function createSdkCallbacks({
464
464
  const detail = {
465
465
  chat_id: getChatIdFromKey(sessionKey),
466
466
  session_key: sessionKey,
467
- backend: 'tmux',
467
+ // Finding 0.12-M3: tmux backend was deleted in 0.12; these hook
468
+ // handlers only ever fire on the CLI driver now — default to 'cli'
469
+ // (honor an explicit payload.backend if a caller ever sets one).
470
+ backend: payload?.backend ?? 'cli',
468
471
  hook_type: payload?.type ?? null,
469
472
  claude_session_id: payload?.sessionId ?? null,
470
473
  tool_name: payload?.toolName ?? null,
@@ -555,7 +558,7 @@ function createSdkCallbacks({
555
558
  logEvent('turn-timeout', {
556
559
  chat_id: getChatIdFromKey(sessionKey),
557
560
  session_key: sessionKey,
558
- backend: 'tmux',
561
+ backend: payload?.backend ?? 'cli', // Finding 0.12-M3
559
562
  turn_id: payload?.turnId ?? null,
560
563
  reason: payload?.reason ?? null,
561
564
  idle_ms: payload?.idleMs ?? null,
@@ -578,7 +581,7 @@ function createSdkCallbacks({
578
581
  logEvent('hook-tail-error', {
579
582
  chat_id: getChatIdFromKey(sessionKey),
580
583
  session_key: sessionKey,
581
- backend: 'tmux',
584
+ backend: payload?.backend ?? 'cli', // Finding 0.12-M3 (fires on the CLI hook tail)
582
585
  message: (payload?.message || '').slice(0, 200),
583
586
  path: payload?.path ?? null,
584
587
  claude_session_id: payload?.sessionId ?? null,
@@ -596,7 +599,7 @@ function createSdkCallbacks({
596
599
  logEvent('stop-hook-resolved', {
597
600
  chat_id: getChatIdFromKey(sessionKey),
598
601
  session_key: sessionKey,
599
- backend: 'tmux',
602
+ backend: payload?.backend ?? 'cli', // Finding 0.12-M3
600
603
  turn_id: payload?.turnId ?? null,
601
604
  claude_session_id: payload?.sessionId ?? null,
602
605
  });
@@ -614,7 +617,7 @@ function createSdkCallbacks({
614
617
  logEvent('session-age-prompt-dismissed', {
615
618
  chat_id: getChatIdFromKey(sessionKey),
616
619
  session_key: sessionKey,
617
- backend: 'tmux',
620
+ backend: payload?.backend ?? 'cli', // Finding 0.12-M3
618
621
  claude_session_id: payload?.sessionId ?? null,
619
622
  });
620
623
  } catch (err) {
@@ -680,7 +683,7 @@ function createSdkCallbacks({
680
683
  // ON json_extract(s.detail_json, '$.tool_use_id') =
681
684
  // json_extract(d.detail_json, '$.tool_use_id')
682
685
  // WHERE s.kind='subagent-start' AND d.kind='subagent-done';
683
- onSubagentStart: (sessionKey, payload /* , entry */) => {
686
+ onSubagentStart: (sessionKey, payload, entry) => {
684
687
  try {
685
688
  logEvent('subagent-start', {
686
689
  chat_id: getChatIdFromKey(sessionKey),
@@ -689,13 +692,23 @@ function createSdkCallbacks({
689
692
  agent_type: payload?.agentType ?? null,
690
693
  tool_use_id: payload?.toolUseId ?? null,
691
694
  });
695
+ // Findings L9/L14: drive the head reactor into the distinct SUBAGENT
696
+ // state so a running subagent shows 👾 rather than freezing on the
697
+ // prior tool's emoji. The plan promised this; previously the handler
698
+ // only persisted the DB row and never touched the reactor.
699
+ const r = entry?.pendingQueue?.[0]?.context?.reactor;
700
+ if (r) r.setState('SUBAGENT');
692
701
  } catch (err) {
693
702
  logger.error?.(`[${botName}] subagent-start handler: ${err.message}`);
694
703
  }
695
704
  },
696
705
 
697
- onSubagentDone: (sessionKey, payload /* , entry */) => {
706
+ onSubagentDone: (sessionKey, payload, entry) => {
698
707
  try {
708
+ // L9/L14: heartbeat at subagent end so the cascade/stall clock
709
+ // resets; the next tool's PreToolUse sets the following state.
710
+ const r = entry?.pendingQueue?.[0]?.context?.reactor;
711
+ if (r && typeof r.heartbeat === 'function') r.heartbeat();
699
712
  logEvent('subagent-done', {
700
713
  chat_id: getChatIdFromKey(sessionKey),
701
714
  session_key: sessionKey,
@@ -703,6 +716,11 @@ function createSdkCallbacks({
703
716
  agent_type: payload?.agentType ?? null,
704
717
  agent_id: payload?.agentId ?? null,
705
718
  duration_ms: payload?.durationMs ?? null,
719
+ // Finding 0.12-M4: persist the originating Agent tool_use_id so the
720
+ // documented subagent-start/subagent-done soak JOIN on
721
+ // $.tool_use_id matches (subagent-done's tool_use_id is recovered
722
+ // in cli-process.js from the paired Agent PreToolUse).
723
+ tool_use_id: payload?.toolUseId ?? null,
706
724
  });
707
725
  } catch (err) {
708
726
  logger.error?.(`[${botName}] subagent-done handler: ${err.message}`);
@@ -55,6 +55,11 @@ const STATES = {
55
55
  // mid-turn user message is buffered for the next PostToolBatch
56
56
  // injection.
57
57
  AUTOSTEERED: { label: 'autosteered', chain: ['✍', '👀'] },
58
+ // 0.12 (Findings L9/L14): distinct in-progress reaction for a running
59
+ // subagent (Agent PreToolUse → SubagentStop). Driven by onSubagentStart.
60
+ // Preferred 👾 (NOT 🤖 — 🤖 is REACTION_INVALID for bots, same class as
61
+ // the rc.37 🧐 bug); falls back to 🔥 then 🤔, all bot-usable.
62
+ SUBAGENT: { label: 'subagent', chain: ['👾', '🔥', '🤔'] },
58
63
  DONE: { label: 'done', chain: ['👍'] },
59
64
  ERROR: { label: 'error', chain: ['🤯', '🤔'] },
60
65
  STALL: { label: 'stall', chain: ['🥱', '🤔'] },
@@ -42,6 +42,7 @@
42
42
  const EventEmitter = require('events');
43
43
  const fs = require('fs');
44
44
  const path = require('path');
45
+ const { StringDecoder } = require('string_decoder');
45
46
 
46
47
  const DEFAULT_INTERVAL_MS = 100;
47
48
  // Slow safety-net poll when fs.watch is active. Catches any events
@@ -91,6 +92,13 @@ class LogTail extends EventEmitter {
91
92
  this.fs = fsOverride || fs;
92
93
  this._offset = 0;
93
94
  this._buf = '';
95
+ // L8: decode bytes through a StringDecoder so a multibyte UTF-8 char
96
+ // split across two read chunks (the 64KB DEFAULT_CHUNK_BYTES boundary)
97
+ // isn't corrupted into U+FFFD. The decoder holds an incomplete trailing
98
+ // sequence until the continuation bytes arrive on the next read. The
99
+ // hook ndjson carries large non-ASCII tool payloads, so this is
100
+ // load-bearing on the CliProcess observability path.
101
+ this._decoder = new StringDecoder('utf8');
94
102
  this._closed = false;
95
103
  this._timer = null;
96
104
  this._watcher = null;
@@ -260,7 +268,9 @@ class LogTail extends EventEmitter {
260
268
  const readSize = Math.min(remaining, buffer.length);
261
269
  const { bytesRead } = await fd.read(buffer, 0, readSize, this._offset + totalRead);
262
270
  if (bytesRead === 0) break;
263
- this._buf += buffer.slice(0, bytesRead).toString('utf8');
271
+ // L8: StringDecoder.write instead of per-chunk toString('utf8') so a
272
+ // multibyte char straddling the read boundary survives intact.
273
+ this._buf += this._decoder.write(buffer.subarray(0, bytesRead));
264
274
  totalRead += bytesRead;
265
275
  }
266
276
  this._offset += totalRead;
@@ -17,6 +17,19 @@
17
17
  * - if `readySignal` regex matches the captured pane content, resolve
18
18
  * - if `Date.now()` exceeds the deadline, throw with `err.code = timeoutCode`
19
19
  *
20
+ * Progress-aware (stall) deadline — `stallMs`:
21
+ * The blind wall-clock `deadlineMs` can't tell "claude is mid-download
22
+ * (24% progress bar, genuinely working)" from "claude is wedged". The
23
+ * shumorobot General incident (2026-05-30) killed a cold-spawn that was
24
+ * actively downloading the runtime. When `stallMs` is set, the gate
25
+ * tracks pane ACTIVITY: any change in captured pane content — or a
26
+ * trigger key being sent — resets a stall clock. The gate fails early
27
+ * (with `timeoutCode`) only after `stallMs` elapses with NO activity,
28
+ * i.e. the pane is frozen. `deadlineMs` remains an absolute backstop so
29
+ * a pane that animates forever but never reaches `readySignal` still
30
+ * terminates. When `stallMs` is omitted (default), behavior is the pure
31
+ * `deadlineMs` wall-clock exactly as before.
32
+ *
20
33
  * Each trigger is one-shot per gate run (tracked by `name` in a Set).
21
34
  *
22
35
  * Caller supplies:
@@ -40,7 +53,10 @@ const DEFAULT_SETTLE_MS = 500;
40
53
  * @param {string} opts.tmuxName — tmux session name to poll
41
54
  * @param {Array<{name:string, regex:RegExp, key:string}>} opts.triggers
42
55
  * @param {RegExp} opts.readySignal — match → resolve
43
- * @param {number} [opts.deadlineMs=30000]
56
+ * @param {number} [opts.deadlineMs=30000] — absolute backstop
57
+ * @param {number} [opts.stallMs] — if set, fail after this much
58
+ * wall-clock with NO pane activity (progress-aware). Omit for pure
59
+ * wall-clock behavior.
44
60
  * @param {number} [opts.pollMs=300]
45
61
  * @param {number} [opts.settleMs=500]
46
62
  * @param {string} [opts.timeoutCode='TUI_STARTUP_TIMEOUT']
@@ -54,6 +70,7 @@ async function runStartupGate({
54
70
  triggers = [],
55
71
  readySignal,
56
72
  deadlineMs = DEFAULT_DEADLINE_MS,
73
+ stallMs,
57
74
  pollMs = DEFAULT_POLL_MS,
58
75
  settleMs = DEFAULT_SETTLE_MS,
59
76
  timeoutCode = 'TUI_STARTUP_TIMEOUT',
@@ -70,6 +87,7 @@ async function runStartupGate({
70
87
 
71
88
  const startedAt = Date.now();
72
89
  const deadline = startedAt + deadlineMs;
90
+ const stallEnabled = Number.isFinite(stallMs) && stallMs > 0;
73
91
  const seen = new Set();
74
92
  const matchedTriggers = [];
75
93
  // rc.4: remember the most recent successful pane snapshot. If the gate
@@ -78,8 +96,30 @@ async function runStartupGate({
78
96
  // this, "claude exits code 0 after dev-channels Enter" surfaces as a
79
97
  // 30-second `can't find pane` spam with no diagnostic about WHY.
80
98
  let lastPane = null;
99
+ // Progress-aware gate: timestamp of the last observed pane CHANGE (or
100
+ // trigger send). Seeded to startedAt so a pane that's frozen from the
101
+ // very first capture still trips stallMs. Only consulted when
102
+ // stallEnabled.
103
+ let lastActivityAt = startedAt;
81
104
 
82
105
  while (Date.now() < deadline) {
106
+ // Stall check (progress-aware): the pane has been doing nothing for
107
+ // stallMs. Distinct from the absolute deadline — fires early so a
108
+ // wedged TUI fails fast, while an actively-progressing one (download
109
+ // bar, dialog navigation) keeps resetting lastActivityAt below.
110
+ if (stallEnabled && Date.now() - lastActivityAt >= stallMs) {
111
+ const err = new Error(
112
+ `[${label}] startup gate saw no pane activity for ${stallMs}ms for ${tmuxName} ` +
113
+ `(matched: ${matchedTriggers.length ? matchedTriggers.join(', ') : 'none'}). ` +
114
+ `Pane appears wedged. Last pane content:\n` +
115
+ _formatPaneTail(lastPane),
116
+ );
117
+ err.code = timeoutCode;
118
+ err.lastPane = lastPane;
119
+ err.matchedTriggers = matchedTriggers;
120
+ err.reason = 'stall';
121
+ throw err;
122
+ }
83
123
  let pane;
84
124
  try {
85
125
  pane = await runner.captureWide(tmuxName);
@@ -107,6 +147,10 @@ async function runStartupGate({
107
147
  await new Promise(r => setTimeout(r, settleMs));
108
148
  continue;
109
149
  }
150
+ // Progress signal: any change in pane content is activity → reset the
151
+ // stall clock. A captureWide that returns the SAME bytes is NOT
152
+ // activity (a frozen download bar at 24% reads identically each poll).
153
+ if (pane !== lastPane) lastActivityAt = Date.now();
110
154
  lastPane = pane;
111
155
 
112
156
  // Walk triggers in declaration order — first match (and not yet seen) wins
@@ -122,6 +166,10 @@ async function runStartupGate({
122
166
  seen.add(trigger.name);
123
167
  matchedTriggers.push(trigger.name);
124
168
  matched = true;
169
+ // Sending a key is activity — navigating the TUI counts as progress
170
+ // even if the pre-transition pane text was static (e.g. a dialog we
171
+ // just answered). Reset the stall clock so we don't fail mid-nav.
172
+ lastActivityAt = Date.now();
125
173
  // Settle window so the TUI transitions out of the dialog before next poll
126
174
  await new Promise(r => setTimeout(r, settleMs));
127
175
  break;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "polygram",
3
- "version": "0.12.0-rc.2",
3
+ "version": "0.12.0-rc.4",
4
4
  "description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
5
5
  "main": "lib/ipc/client.js",
6
6
  "bin": {
package/polygram.js CHANGED
@@ -51,7 +51,6 @@ const { extractAssistantText } = require('./lib/process/sdk-process');
51
51
  const { createChannelsToolDispatcher } = require('./lib/process/channels-tool-dispatcher');
52
52
  const { createTmuxRunner } = require('./lib/tmux/tmux-runner');
53
53
  const { sweepTmuxOrphans } = require('./lib/tmux/orphan-sweep');
54
- const { PollScheduler } = require('./lib/tmux/poll-scheduler');
55
54
  // rc.42: autosteer-buffer module deleted. Native SDK priority push
56
55
  // (pm.injectUserMessage) replaces the buffer + PostToolBatch detour.
57
56
  const { createAutosteeredRefs } = require('./lib/autosteered-refs');
@@ -2244,19 +2243,13 @@ async function main() {
2244
2243
  const binCheck = verifyPinnedClaudeBin(CLAUDE_CLI_PINNED_VERSION);
2245
2244
  if (binCheck.ok) {
2246
2245
  console.log(
2247
- `[polygram] tmux backend pinned to claude CLI v${CLAUDE_CLI_PINNED_VERSION}: ${binCheck.path}`,
2246
+ `[polygram] CliProcess pinned to claude CLI v${CLAUDE_CLI_PINNED_VERSION}: ${binCheck.path}`,
2248
2247
  );
2249
2248
  pinnedClaudeBin = binCheck.path;
2250
2249
  } else {
2251
2250
  console.warn(`[polygram] WARNING: ${binCheck.reason}`);
2252
2251
  }
2253
2252
  }
2254
- // O1 optimization: shared poll-tick scheduler. N TmuxProcess
2255
- // instances share ONE setInterval instead of spawning N independent
2256
- // setTimeout chains. Idle when no chats are in flight (zero timers
2257
- // running). Configurable via config.bot.tmuxPollIntervalMs.
2258
- const tmuxPollIntervalMs = config.bot?.tmuxPollIntervalMs || 250;
2259
- const pollScheduler = new PollScheduler({ intervalMs: tmuxPollIntervalMs });
2260
2253
  // 0.11.0: channels backend wiring. Used when a chat opts in via
2261
2254
  // `pm: 'channels'` config. Falls back to SDK gracefully if the pinned
2262
2255
  // claude binary isn't present (see factory.js — channelsClaudeBin
@@ -2282,7 +2275,6 @@ async function main() {
2282
2275
  logger: console,
2283
2276
  tmuxRunner,
2284
2277
  botName: BOT_NAME,
2285
- pollScheduler,
2286
2278
  // channels backend
2287
2279
  toolDispatcher: channelsToolDispatcher,
2288
2280
  channelsClaudeBin,