polygram 0.12.0-rc.2 → 0.12.0-rc.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/process/cli-process.js +124 -52
- package/lib/process/factory.js +0 -5
- package/lib/sdk/callbacks.js +25 -7
- package/lib/telegram/reactions.js +5 -0
- package/lib/tmux/log-tail.js +11 -1
- package/lib/tmux/startup-gate.js +49 -1
- package/package.json +1 -1
- package/polygram.js +1 -9
|
@@ -416,28 +416,7 @@ class CliProcess extends Process {
|
|
|
416
416
|
|
|
417
417
|
this.bridgeServer.on('bridge-message', msg => this._handleBridgeMessage(msg));
|
|
418
418
|
|
|
419
|
-
this.bridgeServer.on('bridge-disconnected', () =>
|
|
420
|
-
this.bridgeReady = false;
|
|
421
|
-
this.mcpReady = false;
|
|
422
|
-
if (!this.closed) {
|
|
423
|
-
this.logger.warn?.(`[${this.label}] channels: bridge disconnected unexpectedly`);
|
|
424
|
-
// P1 #5: drain pendingTurns immediately so hardTimers don't run 10min.
|
|
425
|
-
for (const [, pending] of this.pendingTurns) {
|
|
426
|
-
if (pending.quietTimer) clearTimeout(pending.quietTimer);
|
|
427
|
-
if (pending.hardTimer) clearTimeout(pending.hardTimer);
|
|
428
|
-
if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
|
|
429
|
-
if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer);
|
|
430
|
-
const err = new Error('bridge disconnected');
|
|
431
|
-
err.code = 'BRIDGE_DISCONNECTED';
|
|
432
|
-
try { pending.reject(err); } catch {}
|
|
433
|
-
}
|
|
434
|
-
this.pendingTurns.clear();
|
|
435
|
-
this.pendingQueue.length = 0;
|
|
436
|
-
this.inFlight = false;
|
|
437
|
-
this.emit('bridge-disconnected');
|
|
438
|
-
this._logEvent('bridge-disconnected', { reason: 'socket-close' });
|
|
439
|
-
}
|
|
440
|
-
});
|
|
419
|
+
this.bridgeServer.on('bridge-disconnected', () => this._handleBridgeDisconnected());
|
|
441
420
|
|
|
442
421
|
await this.bridgeServer.listen();
|
|
443
422
|
}
|
|
@@ -540,6 +519,9 @@ class CliProcess extends Process {
|
|
|
540
519
|
);
|
|
541
520
|
}
|
|
542
521
|
}
|
|
522
|
+
// Finding 0.12-M2: record the resume decision so _armHookTail (run
|
|
523
|
+
// after spawn) skips the prior session's still-on-disk hook ndjson.
|
|
524
|
+
this._resumedSession = canResume;
|
|
543
525
|
if (agent) claudeArgs.push('--agent', agent);
|
|
544
526
|
if (model) claudeArgs.unshift('--model', model);
|
|
545
527
|
if (effort) claudeArgs.push('--effort', effort);
|
|
@@ -705,6 +687,15 @@ class CliProcess extends Process {
|
|
|
705
687
|
],
|
|
706
688
|
readySignal: /Listening for channel messages from: server:polygram-bridge/i,
|
|
707
689
|
timeoutCode: 'CHANNELS_DIALOG_TIMEOUT',
|
|
690
|
+
// Progress-aware gate (shumorobot General incident 2026-05-30): a
|
|
691
|
+
// cold spawn that's mid-download (runtime fetch, "24%" progress bar)
|
|
692
|
+
// is genuinely working and must NOT be killed by the blind 30s
|
|
693
|
+
// wall-clock. stallMs fails fast only when the pane is FROZEN; an
|
|
694
|
+
// actively-changing pane (download bar, dialog nav) keeps resetting
|
|
695
|
+
// the stall clock and rides out to the ready signal. deadlineMs stays
|
|
696
|
+
// the absolute backstop. 30s of zero pane activity = genuinely wedged.
|
|
697
|
+
stallMs: this.startupGateStallMs ?? 30_000,
|
|
698
|
+
deadlineMs: this.startupGateDeadlineMs ?? 180_000,
|
|
708
699
|
logger: this.logger,
|
|
709
700
|
label: `${this.label}:startup-gate`,
|
|
710
701
|
});
|
|
@@ -849,15 +840,18 @@ class CliProcess extends Process {
|
|
|
849
840
|
// rate-limit / chat-id-mismatch path. Live shumorobot 2026-05-26 23:44
|
|
850
841
|
// observed 3+ "Called polygram-bridge" entries in the TUI pane with
|
|
851
842
|
// ZERO OUT messages delivered to TG and zero warn-level diagnostics —
|
|
852
|
-
// need to see args.
|
|
853
|
-
//
|
|
854
|
-
//
|
|
855
|
-
|
|
843
|
+
// need to see args.chat_id / args.turn_id to know whether claude is
|
|
844
|
+
// calling reply with empty text, wrong chat_id, or something else.
|
|
845
|
+
// L13: root-caused — demoted to debug and DROPPED text_head. Logging
|
|
846
|
+
// the first 80 chars of every reply at warn level leaked private chat
|
|
847
|
+
// content / file excerpts / secrets into the default log sink,
|
|
848
|
+
// unconditionally. name/chat_id/turn_id/text_len diagnose dispatch
|
|
849
|
+
// without exposing message content.
|
|
850
|
+
this.logger.debug?.(
|
|
856
851
|
`[${this.label}] channels: tool-call name=${msg.name} ` +
|
|
857
852
|
`chat_id=${JSON.stringify(args.chat_id)} ` +
|
|
858
853
|
`turn_id=${JSON.stringify(args.turn_id)} ` +
|
|
859
|
-
`text_len=${typeof args.text === 'string' ? args.text.length : 'non-string'}
|
|
860
|
-
`text_head=${JSON.stringify((args.text || '').slice(0, 80))}`,
|
|
854
|
+
`text_len=${typeof args.text === 'string' ? args.text.length : 'non-string'}`,
|
|
861
855
|
);
|
|
862
856
|
|
|
863
857
|
// Review P1 #7: idempotency. If we've already ACK'd this tool_call_id,
|
|
@@ -1122,13 +1116,27 @@ class CliProcess extends Process {
|
|
|
1122
1116
|
this._finalizeTurn(turnId);
|
|
1123
1117
|
};
|
|
1124
1118
|
const onStop = (info) => {
|
|
1125
|
-
//
|
|
1126
|
-
//
|
|
1119
|
+
// Finding 0.12-M1: the Stop hook carries NO turn_id, and a single
|
|
1120
|
+
// global 'stop-hook' emission fires EVERY per-turn onStop listener.
|
|
1121
|
+
// When more than one turn is in stop-grace we cannot attribute this
|
|
1122
|
+
// Stop (or its last_assistant_message) to a specific turn — the
|
|
1123
|
+
// pre-fix code let one Stop finalize all grace-pending turns and
|
|
1124
|
+
// cross-attribute one turn's text to another (the exact class the
|
|
1125
|
+
// F#3 reply routing prevents). Mirror that drop-rather-than-
|
|
1126
|
+
// misattribute discipline: only consume the Stop when exactly ONE
|
|
1127
|
+
// turn is in grace; otherwise ignore it and let each turn finalize
|
|
1128
|
+
// on its own grace timer (each keeps its own reply text).
|
|
1129
|
+
let graceCount = 0;
|
|
1130
|
+
for (const p of this.pendingTurns.values()) if (p._stopGracePending) graceCount++;
|
|
1131
|
+
if (graceCount !== 1) return;
|
|
1127
1132
|
pending._stopHookData = info;
|
|
1128
1133
|
clearTimeout(pending._stopGraceTimer);
|
|
1129
1134
|
pending._stopGraceTimer = null;
|
|
1130
1135
|
finalize();
|
|
1131
1136
|
};
|
|
1137
|
+
// L5: stash the closure so teardown paths that bypass Process.kill()'s
|
|
1138
|
+
// removeAllListeners (bridge-disconnect drain, resetSession) can off it.
|
|
1139
|
+
pending._onStop = onStop;
|
|
1132
1140
|
pending._stopGraceTimer = setTimeout(finalize, this.stopGraceMs);
|
|
1133
1141
|
// unref so a never-fired grace doesn't pin the event loop. In tests
|
|
1134
1142
|
// where a CliProcess is created, send() is called, then the test
|
|
@@ -1415,17 +1423,18 @@ class CliProcess extends Process {
|
|
|
1415
1423
|
this.logger.warn?.(`[${this.label}] _armHookTail: _hookNdjsonPath unset; hooks disabled. Phase 1.2 may have failed.`);
|
|
1416
1424
|
return;
|
|
1417
1425
|
}
|
|
1418
|
-
//
|
|
1419
|
-
//
|
|
1420
|
-
//
|
|
1421
|
-
//
|
|
1422
|
-
//
|
|
1423
|
-
//
|
|
1424
|
-
// --resume
|
|
1426
|
+
// Finding 0.12-M2: writeHookFiles opens the ndjson in APPEND mode
|
|
1427
|
+
// ('a') and never truncates, so on a --resume respawn the prior
|
|
1428
|
+
// session's hook lines are still on disk under the same path. Replaying
|
|
1429
|
+
// them re-drives the turn state machine from stale Stop/PreToolUse
|
|
1430
|
+
// events (a stale Stop can finalize the fresh turn). So skip existing
|
|
1431
|
+
// content when (and only when) this is a resumed session — the same
|
|
1432
|
+
// discipline the JSONL tail uses on --resume. A fresh spawn's ndjson is
|
|
1433
|
+
// empty, so skipExisting:false is correct there.
|
|
1425
1434
|
this._hookTail = createHookTail({
|
|
1426
1435
|
path: this._hookNdjsonPath,
|
|
1427
1436
|
logger: this.logger,
|
|
1428
|
-
skipExisting:
|
|
1437
|
+
skipExisting: this._resumedSession === true,
|
|
1429
1438
|
});
|
|
1430
1439
|
this._hookTail.on('event', (ev) => {
|
|
1431
1440
|
try {
|
|
@@ -1465,25 +1474,18 @@ class CliProcess extends Process {
|
|
|
1465
1474
|
// gates tag-out on median < 2s and p99 < 5s across the events DB.
|
|
1466
1475
|
if (Number.isFinite(ev.receivedAtMs)) {
|
|
1467
1476
|
const lagMs = Date.now() - ev.receivedAtMs;
|
|
1477
|
+
// L10: emit ONLY — the onHookLagSample callback owns the DB write
|
|
1478
|
+
// (CALLBACK_TO_EVENT → callbacks.js). Previously this ALSO wrote
|
|
1479
|
+
// directly via this.db.logEvent, double-persisting every sample and
|
|
1480
|
+
// inflating the Phase 1.8 soak-gate row count. Consistent with how
|
|
1481
|
+
// tool-result / subagent-start / subagent-done are handled (emit,
|
|
1482
|
+
// don't double-write).
|
|
1468
1483
|
this.emit('hook-lag-sample', {
|
|
1469
1484
|
hookEventName: ev.type,
|
|
1470
1485
|
lagMs,
|
|
1471
1486
|
toolName: ev.toolName || null,
|
|
1472
1487
|
backend: this.backend,
|
|
1473
1488
|
});
|
|
1474
|
-
// Log to events DB if wired. db is optional (factory injects when
|
|
1475
|
-
// available) — same pattern as the other parity-P1 _logEvent calls.
|
|
1476
|
-
if (this.db?.logEvent) {
|
|
1477
|
-
try {
|
|
1478
|
-
this.db.logEvent('hook-lag-sample', {
|
|
1479
|
-
session_key: this.sessionKey,
|
|
1480
|
-
backend: this.backend,
|
|
1481
|
-
hook_event_name: ev.type,
|
|
1482
|
-
tool_name: ev.toolName || null,
|
|
1483
|
-
lag_ms: lagMs,
|
|
1484
|
-
});
|
|
1485
|
-
} catch {}
|
|
1486
|
-
}
|
|
1487
1489
|
}
|
|
1488
1490
|
|
|
1489
1491
|
switch (ev.type) {
|
|
@@ -1503,6 +1505,16 @@ class CliProcess extends Process {
|
|
|
1503
1505
|
const subagentType = ev.toolInput?.subagent_type
|
|
1504
1506
|
|| ev.toolInput?.agent_type
|
|
1505
1507
|
|| 'general-purpose';
|
|
1508
|
+
// Finding 0.12-M4: SubagentStop carries agent_id/agent_type but
|
|
1509
|
+
// NOT the originating Agent tool_use_id, so without help the
|
|
1510
|
+
// subagent-start/subagent-done rows share no JOIN key (the
|
|
1511
|
+
// documented soak query on $.tool_use_id returns zero rows).
|
|
1512
|
+
// Track the in-flight Agent tool_use_id keyed by subagent type so
|
|
1513
|
+
// the paired SubagentStop below can stamp it onto subagent-done.
|
|
1514
|
+
(this._pendingSubagentStarts ||= []).push({
|
|
1515
|
+
agentType: subagentType,
|
|
1516
|
+
toolUseId: ev.toolUseId,
|
|
1517
|
+
});
|
|
1506
1518
|
this.emit('subagent-start', {
|
|
1507
1519
|
agentType: subagentType,
|
|
1508
1520
|
// PreToolUse for Agent carries no agent_id (set later on
|
|
@@ -1541,14 +1553,27 @@ class CliProcess extends Process {
|
|
|
1541
1553
|
});
|
|
1542
1554
|
return;
|
|
1543
1555
|
|
|
1544
|
-
case 'SubagentStop':
|
|
1556
|
+
case 'SubagentStop': {
|
|
1557
|
+
// Finding 0.12-M4: recover the originating Agent tool_use_id so the
|
|
1558
|
+
// subagent-start/subagent-done pair is JOINable. Prefer a match on
|
|
1559
|
+
// agent type (correct for parallel subagents of different types);
|
|
1560
|
+
// fall back to the oldest pending start when types don't line up.
|
|
1561
|
+
let subagentToolUseId = null;
|
|
1562
|
+
const pendingStarts = this._pendingSubagentStarts;
|
|
1563
|
+
if (pendingStarts && pendingStarts.length) {
|
|
1564
|
+
let idx = pendingStarts.findIndex(s => s.agentType === ev.agentType);
|
|
1565
|
+
if (idx < 0) idx = 0;
|
|
1566
|
+
subagentToolUseId = pendingStarts.splice(idx, 1)[0]?.toolUseId ?? null;
|
|
1567
|
+
}
|
|
1545
1568
|
this.emit('subagent-done', {
|
|
1546
1569
|
agentType: ev.agentType,
|
|
1547
1570
|
agentId: ev.agentId,
|
|
1548
1571
|
durationMs: ev.durationMs,
|
|
1572
|
+
toolUseId: subagentToolUseId,
|
|
1549
1573
|
backend: this.backend,
|
|
1550
1574
|
});
|
|
1551
1575
|
return;
|
|
1576
|
+
}
|
|
1552
1577
|
|
|
1553
1578
|
case 'Stop':
|
|
1554
1579
|
// Phase 1.7 (TODO) will use this as the authoritative turn-end
|
|
@@ -1665,6 +1690,50 @@ class CliProcess extends Process {
|
|
|
1665
1690
|
}
|
|
1666
1691
|
}
|
|
1667
1692
|
|
|
1693
|
+
/**
|
|
1694
|
+
* Drain on unexpected bridge socket loss (claude crash, bridge crash,
|
|
1695
|
+
* EOF). Extracted from the inline 'bridge-disconnected' handler so the
|
|
1696
|
+
* teardown is testable and consistent with _doKill.
|
|
1697
|
+
*
|
|
1698
|
+
* Findings 0.12-L5 + L6: in addition to clearing the per-turn timers
|
|
1699
|
+
* and rejecting pendings (the original P1 #5 behavior), this now also
|
|
1700
|
+
* (L5) removes each turn's stop-hook listener — this drain does NOT go
|
|
1701
|
+
* through Process.kill()'s blanket removeAllListeners, so a turn torn
|
|
1702
|
+
* down mid-stop-grace would otherwise leak its onStop closure — and
|
|
1703
|
+
* (L6) clears _interruptGraceTimer, matching _doKill (a /stop verdict
|
|
1704
|
+
* landing just before the disconnect would otherwise leave a stray
|
|
1705
|
+
* timer on the dead instance).
|
|
1706
|
+
*/
|
|
1707
|
+
_handleBridgeDisconnected() {
|
|
1708
|
+
this.bridgeReady = false;
|
|
1709
|
+
this.mcpReady = false;
|
|
1710
|
+
if (this.closed) return;
|
|
1711
|
+
this.logger.warn?.(`[${this.label}] channels: bridge disconnected unexpectedly`);
|
|
1712
|
+
// L6: clear the interrupt grace timer alongside the rest of the lifecycle.
|
|
1713
|
+
if (this._interruptGraceTimer) {
|
|
1714
|
+
clearTimeout(this._interruptGraceTimer);
|
|
1715
|
+
this._interruptGraceTimer = null;
|
|
1716
|
+
}
|
|
1717
|
+
// P1 #5: drain pendingTurns immediately so hardTimers don't run 10min.
|
|
1718
|
+
for (const [, pending] of this.pendingTurns) {
|
|
1719
|
+
if (pending.quietTimer) clearTimeout(pending.quietTimer);
|
|
1720
|
+
if (pending.hardTimer) clearTimeout(pending.hardTimer);
|
|
1721
|
+
if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
|
|
1722
|
+
if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer);
|
|
1723
|
+
// L5: remove the per-turn stop-hook listener (this path bypasses
|
|
1724
|
+
// Process.kill()'s removeAllListeners).
|
|
1725
|
+
if (pending._onStop) this.off('stop-hook', pending._onStop);
|
|
1726
|
+
const err = new Error('bridge disconnected');
|
|
1727
|
+
err.code = 'BRIDGE_DISCONNECTED';
|
|
1728
|
+
try { pending.reject(err); } catch {}
|
|
1729
|
+
}
|
|
1730
|
+
this.pendingTurns.clear();
|
|
1731
|
+
this.pendingQueue.length = 0;
|
|
1732
|
+
this.inFlight = false;
|
|
1733
|
+
this.emit('bridge-disconnected');
|
|
1734
|
+
this._logEvent('bridge-disconnected', { reason: 'socket-close' });
|
|
1735
|
+
}
|
|
1736
|
+
|
|
1668
1737
|
async _doKill(reason) {
|
|
1669
1738
|
this.closed = true;
|
|
1670
1739
|
this.inFlight = false;
|
|
@@ -1688,6 +1757,7 @@ class CliProcess extends Process {
|
|
|
1688
1757
|
if (pending.hardTimer) clearTimeout(pending.hardTimer);
|
|
1689
1758
|
if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
|
|
1690
1759
|
if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer);
|
|
1760
|
+
if (pending._onStop) this.off('stop-hook', pending._onStop); // L5
|
|
1691
1761
|
const err = new Error(`session killed: ${reason}`);
|
|
1692
1762
|
err.code = 'KILLED';
|
|
1693
1763
|
pending.reject(err);
|
|
@@ -1876,6 +1946,8 @@ class CliProcess extends Process {
|
|
|
1876
1946
|
if (pending.quietTimer) clearTimeout(pending.quietTimer);
|
|
1877
1947
|
if (pending.hardTimer) clearTimeout(pending.hardTimer);
|
|
1878
1948
|
if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
|
|
1949
|
+
if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer); // L5
|
|
1950
|
+
if (pending._onStop) this.off('stop-hook', pending._onStop); // L5
|
|
1879
1951
|
const err = new Error(`session reset: ${reason}`);
|
|
1880
1952
|
err.code = 'RESET';
|
|
1881
1953
|
try { pending.reject(err); } catch {}
|
package/lib/process/factory.js
CHANGED
|
@@ -91,10 +91,6 @@ function _maybeWarnR12Migration({ rawPm, canonical, chatId, threadId, chatCfg, t
|
|
|
91
91
|
* @param {number} [opts.queryCloseTimeoutMs]
|
|
92
92
|
* @param {object} [opts.tmuxRunner] — required when ANY chat routes to 'cli'
|
|
93
93
|
* @param {string} [opts.botName] — required when ANY chat routes to 'cli'
|
|
94
|
-
* @param {object} [opts.pollScheduler] — DEPRECATED in 0.12 — was used by the
|
|
95
|
-
* removed tmux backend to share one setInterval across all chats; CliProcess's
|
|
96
|
-
* per-session pongWatchdog handles its own cadence. Param kept for caller
|
|
97
|
-
* back-compat; ignored. Will be removed in 0.13.
|
|
98
94
|
* @param {Function} [opts.toolDispatcher] — required when ANY chat routes to 'cli'.
|
|
99
95
|
* async ({sessionKey, chatId, threadId, toolName, text, files}) => {ok, error?}.
|
|
100
96
|
* Called when Claude's reply (or react/edit_message) tool fires inside a
|
|
@@ -113,7 +109,6 @@ function createProcessFactory({
|
|
|
113
109
|
queryCloseTimeoutMs,
|
|
114
110
|
tmuxRunner = null,
|
|
115
111
|
botName = null,
|
|
116
|
-
pollScheduler = null,
|
|
117
112
|
toolDispatcher = null,
|
|
118
113
|
channelsClaudeBin = null,
|
|
119
114
|
} = {}) {
|
package/lib/sdk/callbacks.js
CHANGED
|
@@ -464,7 +464,10 @@ function createSdkCallbacks({
|
|
|
464
464
|
const detail = {
|
|
465
465
|
chat_id: getChatIdFromKey(sessionKey),
|
|
466
466
|
session_key: sessionKey,
|
|
467
|
-
|
|
467
|
+
// Finding 0.12-M3: tmux backend was deleted in 0.12; these hook
|
|
468
|
+
// handlers only ever fire on the CLI driver now — default to 'cli'
|
|
469
|
+
// (honor an explicit payload.backend if a caller ever sets one).
|
|
470
|
+
backend: payload?.backend ?? 'cli',
|
|
468
471
|
hook_type: payload?.type ?? null,
|
|
469
472
|
claude_session_id: payload?.sessionId ?? null,
|
|
470
473
|
tool_name: payload?.toolName ?? null,
|
|
@@ -555,7 +558,7 @@ function createSdkCallbacks({
|
|
|
555
558
|
logEvent('turn-timeout', {
|
|
556
559
|
chat_id: getChatIdFromKey(sessionKey),
|
|
557
560
|
session_key: sessionKey,
|
|
558
|
-
backend: '
|
|
561
|
+
backend: payload?.backend ?? 'cli', // Finding 0.12-M3
|
|
559
562
|
turn_id: payload?.turnId ?? null,
|
|
560
563
|
reason: payload?.reason ?? null,
|
|
561
564
|
idle_ms: payload?.idleMs ?? null,
|
|
@@ -578,7 +581,7 @@ function createSdkCallbacks({
|
|
|
578
581
|
logEvent('hook-tail-error', {
|
|
579
582
|
chat_id: getChatIdFromKey(sessionKey),
|
|
580
583
|
session_key: sessionKey,
|
|
581
|
-
backend: '
|
|
584
|
+
backend: payload?.backend ?? 'cli', // Finding 0.12-M3 (fires on the CLI hook tail)
|
|
582
585
|
message: (payload?.message || '').slice(0, 200),
|
|
583
586
|
path: payload?.path ?? null,
|
|
584
587
|
claude_session_id: payload?.sessionId ?? null,
|
|
@@ -596,7 +599,7 @@ function createSdkCallbacks({
|
|
|
596
599
|
logEvent('stop-hook-resolved', {
|
|
597
600
|
chat_id: getChatIdFromKey(sessionKey),
|
|
598
601
|
session_key: sessionKey,
|
|
599
|
-
backend: '
|
|
602
|
+
backend: payload?.backend ?? 'cli', // Finding 0.12-M3
|
|
600
603
|
turn_id: payload?.turnId ?? null,
|
|
601
604
|
claude_session_id: payload?.sessionId ?? null,
|
|
602
605
|
});
|
|
@@ -614,7 +617,7 @@ function createSdkCallbacks({
|
|
|
614
617
|
logEvent('session-age-prompt-dismissed', {
|
|
615
618
|
chat_id: getChatIdFromKey(sessionKey),
|
|
616
619
|
session_key: sessionKey,
|
|
617
|
-
backend: '
|
|
620
|
+
backend: payload?.backend ?? 'cli', // Finding 0.12-M3
|
|
618
621
|
claude_session_id: payload?.sessionId ?? null,
|
|
619
622
|
});
|
|
620
623
|
} catch (err) {
|
|
@@ -680,7 +683,7 @@ function createSdkCallbacks({
|
|
|
680
683
|
// ON json_extract(s.detail_json, '$.tool_use_id') =
|
|
681
684
|
// json_extract(d.detail_json, '$.tool_use_id')
|
|
682
685
|
// WHERE s.kind='subagent-start' AND d.kind='subagent-done';
|
|
683
|
-
onSubagentStart: (sessionKey, payload
|
|
686
|
+
onSubagentStart: (sessionKey, payload, entry) => {
|
|
684
687
|
try {
|
|
685
688
|
logEvent('subagent-start', {
|
|
686
689
|
chat_id: getChatIdFromKey(sessionKey),
|
|
@@ -689,13 +692,23 @@ function createSdkCallbacks({
|
|
|
689
692
|
agent_type: payload?.agentType ?? null,
|
|
690
693
|
tool_use_id: payload?.toolUseId ?? null,
|
|
691
694
|
});
|
|
695
|
+
// Findings L9/L14: drive the head reactor into the distinct SUBAGENT
|
|
696
|
+
// state so a running subagent shows 👾 rather than freezing on the
|
|
697
|
+
// prior tool's emoji. The plan promised this; previously the handler
|
|
698
|
+
// only persisted the DB row and never touched the reactor.
|
|
699
|
+
const r = entry?.pendingQueue?.[0]?.context?.reactor;
|
|
700
|
+
if (r) r.setState('SUBAGENT');
|
|
692
701
|
} catch (err) {
|
|
693
702
|
logger.error?.(`[${botName}] subagent-start handler: ${err.message}`);
|
|
694
703
|
}
|
|
695
704
|
},
|
|
696
705
|
|
|
697
|
-
onSubagentDone: (sessionKey, payload
|
|
706
|
+
onSubagentDone: (sessionKey, payload, entry) => {
|
|
698
707
|
try {
|
|
708
|
+
// L9/L14: heartbeat at subagent end so the cascade/stall clock
|
|
709
|
+
// resets; the next tool's PreToolUse sets the following state.
|
|
710
|
+
const r = entry?.pendingQueue?.[0]?.context?.reactor;
|
|
711
|
+
if (r && typeof r.heartbeat === 'function') r.heartbeat();
|
|
699
712
|
logEvent('subagent-done', {
|
|
700
713
|
chat_id: getChatIdFromKey(sessionKey),
|
|
701
714
|
session_key: sessionKey,
|
|
@@ -703,6 +716,11 @@ function createSdkCallbacks({
|
|
|
703
716
|
agent_type: payload?.agentType ?? null,
|
|
704
717
|
agent_id: payload?.agentId ?? null,
|
|
705
718
|
duration_ms: payload?.durationMs ?? null,
|
|
719
|
+
// Finding 0.12-M4: persist the originating Agent tool_use_id so the
|
|
720
|
+
// documented subagent-start/subagent-done soak JOIN on
|
|
721
|
+
// $.tool_use_id matches (subagent-done's tool_use_id is recovered
|
|
722
|
+
// in cli-process.js from the paired Agent PreToolUse).
|
|
723
|
+
tool_use_id: payload?.toolUseId ?? null,
|
|
706
724
|
});
|
|
707
725
|
} catch (err) {
|
|
708
726
|
logger.error?.(`[${botName}] subagent-done handler: ${err.message}`);
|
|
@@ -55,6 +55,11 @@ const STATES = {
|
|
|
55
55
|
// mid-turn user message is buffered for the next PostToolBatch
|
|
56
56
|
// injection.
|
|
57
57
|
AUTOSTEERED: { label: 'autosteered', chain: ['✍', '👀'] },
|
|
58
|
+
// 0.12 (Findings L9/L14): distinct in-progress reaction for a running
|
|
59
|
+
// subagent (Agent PreToolUse → SubagentStop). Driven by onSubagentStart.
|
|
60
|
+
// Preferred 👾 (NOT 🤖 — 🤖 is REACTION_INVALID for bots, same class as
|
|
61
|
+
// the rc.37 🧐 bug); falls back to 🔥 then 🤔, all bot-usable.
|
|
62
|
+
SUBAGENT: { label: 'subagent', chain: ['👾', '🔥', '🤔'] },
|
|
58
63
|
DONE: { label: 'done', chain: ['👍'] },
|
|
59
64
|
ERROR: { label: 'error', chain: ['🤯', '🤔'] },
|
|
60
65
|
STALL: { label: 'stall', chain: ['🥱', '🤔'] },
|
package/lib/tmux/log-tail.js
CHANGED
|
@@ -42,6 +42,7 @@
|
|
|
42
42
|
const EventEmitter = require('events');
|
|
43
43
|
const fs = require('fs');
|
|
44
44
|
const path = require('path');
|
|
45
|
+
const { StringDecoder } = require('string_decoder');
|
|
45
46
|
|
|
46
47
|
const DEFAULT_INTERVAL_MS = 100;
|
|
47
48
|
// Slow safety-net poll when fs.watch is active. Catches any events
|
|
@@ -91,6 +92,13 @@ class LogTail extends EventEmitter {
|
|
|
91
92
|
this.fs = fsOverride || fs;
|
|
92
93
|
this._offset = 0;
|
|
93
94
|
this._buf = '';
|
|
95
|
+
// L8: decode bytes through a StringDecoder so a multibyte UTF-8 char
|
|
96
|
+
// split across two read chunks (the 64KB DEFAULT_CHUNK_BYTES boundary)
|
|
97
|
+
// isn't corrupted into U+FFFD. The decoder holds an incomplete trailing
|
|
98
|
+
// sequence until the continuation bytes arrive on the next read. The
|
|
99
|
+
// hook ndjson carries large non-ASCII tool payloads, so this is
|
|
100
|
+
// load-bearing on the CliProcess observability path.
|
|
101
|
+
this._decoder = new StringDecoder('utf8');
|
|
94
102
|
this._closed = false;
|
|
95
103
|
this._timer = null;
|
|
96
104
|
this._watcher = null;
|
|
@@ -260,7 +268,9 @@ class LogTail extends EventEmitter {
|
|
|
260
268
|
const readSize = Math.min(remaining, buffer.length);
|
|
261
269
|
const { bytesRead } = await fd.read(buffer, 0, readSize, this._offset + totalRead);
|
|
262
270
|
if (bytesRead === 0) break;
|
|
263
|
-
|
|
271
|
+
// L8: StringDecoder.write instead of per-chunk toString('utf8') so a
|
|
272
|
+
// multibyte char straddling the read boundary survives intact.
|
|
273
|
+
this._buf += this._decoder.write(buffer.subarray(0, bytesRead));
|
|
264
274
|
totalRead += bytesRead;
|
|
265
275
|
}
|
|
266
276
|
this._offset += totalRead;
|
package/lib/tmux/startup-gate.js
CHANGED
|
@@ -17,6 +17,19 @@
|
|
|
17
17
|
* - if `readySignal` regex matches the captured pane content, resolve
|
|
18
18
|
* - if `Date.now()` exceeds the deadline, throw with `err.code = timeoutCode`
|
|
19
19
|
*
|
|
20
|
+
* Progress-aware (stall) deadline — `stallMs`:
|
|
21
|
+
* The blind wall-clock `deadlineMs` can't tell "claude is mid-download
|
|
22
|
+
* (24% progress bar, genuinely working)" from "claude is wedged". The
|
|
23
|
+
* shumorobot General incident (2026-05-30) killed a cold-spawn that was
|
|
24
|
+
* actively downloading the runtime. When `stallMs` is set, the gate
|
|
25
|
+
* tracks pane ACTIVITY: any change in captured pane content — or a
|
|
26
|
+
* trigger key being sent — resets a stall clock. The gate fails early
|
|
27
|
+
* (with `timeoutCode`) only after `stallMs` elapses with NO activity,
|
|
28
|
+
* i.e. the pane is frozen. `deadlineMs` remains an absolute backstop so
|
|
29
|
+
* a pane that animates forever but never reaches `readySignal` still
|
|
30
|
+
* terminates. When `stallMs` is omitted (default), behavior is the pure
|
|
31
|
+
* `deadlineMs` wall-clock exactly as before.
|
|
32
|
+
*
|
|
20
33
|
* Each trigger is one-shot per gate run (tracked by `name` in a Set).
|
|
21
34
|
*
|
|
22
35
|
* Caller supplies:
|
|
@@ -40,7 +53,10 @@ const DEFAULT_SETTLE_MS = 500;
|
|
|
40
53
|
* @param {string} opts.tmuxName — tmux session name to poll
|
|
41
54
|
* @param {Array<{name:string, regex:RegExp, key:string}>} opts.triggers
|
|
42
55
|
* @param {RegExp} opts.readySignal — match → resolve
|
|
43
|
-
* @param {number} [opts.deadlineMs=30000]
|
|
56
|
+
* @param {number} [opts.deadlineMs=30000] — absolute backstop
|
|
57
|
+
* @param {number} [opts.stallMs] — if set, fail after this much
|
|
58
|
+
* wall-clock with NO pane activity (progress-aware). Omit for pure
|
|
59
|
+
* wall-clock behavior.
|
|
44
60
|
* @param {number} [opts.pollMs=300]
|
|
45
61
|
* @param {number} [opts.settleMs=500]
|
|
46
62
|
* @param {string} [opts.timeoutCode='TUI_STARTUP_TIMEOUT']
|
|
@@ -54,6 +70,7 @@ async function runStartupGate({
|
|
|
54
70
|
triggers = [],
|
|
55
71
|
readySignal,
|
|
56
72
|
deadlineMs = DEFAULT_DEADLINE_MS,
|
|
73
|
+
stallMs,
|
|
57
74
|
pollMs = DEFAULT_POLL_MS,
|
|
58
75
|
settleMs = DEFAULT_SETTLE_MS,
|
|
59
76
|
timeoutCode = 'TUI_STARTUP_TIMEOUT',
|
|
@@ -70,6 +87,7 @@ async function runStartupGate({
|
|
|
70
87
|
|
|
71
88
|
const startedAt = Date.now();
|
|
72
89
|
const deadline = startedAt + deadlineMs;
|
|
90
|
+
const stallEnabled = Number.isFinite(stallMs) && stallMs > 0;
|
|
73
91
|
const seen = new Set();
|
|
74
92
|
const matchedTriggers = [];
|
|
75
93
|
// rc.4: remember the most recent successful pane snapshot. If the gate
|
|
@@ -78,8 +96,30 @@ async function runStartupGate({
|
|
|
78
96
|
// this, "claude exits code 0 after dev-channels Enter" surfaces as a
|
|
79
97
|
// 30-second `can't find pane` spam with no diagnostic about WHY.
|
|
80
98
|
let lastPane = null;
|
|
99
|
+
// Progress-aware gate: timestamp of the last observed pane CHANGE (or
|
|
100
|
+
// trigger send). Seeded to startedAt so a pane that's frozen from the
|
|
101
|
+
// very first capture still trips stallMs. Only consulted when
|
|
102
|
+
// stallEnabled.
|
|
103
|
+
let lastActivityAt = startedAt;
|
|
81
104
|
|
|
82
105
|
while (Date.now() < deadline) {
|
|
106
|
+
// Stall check (progress-aware): the pane has been doing nothing for
|
|
107
|
+
// stallMs. Distinct from the absolute deadline — fires early so a
|
|
108
|
+
// wedged TUI fails fast, while an actively-progressing one (download
|
|
109
|
+
// bar, dialog navigation) keeps resetting lastActivityAt below.
|
|
110
|
+
if (stallEnabled && Date.now() - lastActivityAt >= stallMs) {
|
|
111
|
+
const err = new Error(
|
|
112
|
+
`[${label}] startup gate saw no pane activity for ${stallMs}ms for ${tmuxName} ` +
|
|
113
|
+
`(matched: ${matchedTriggers.length ? matchedTriggers.join(', ') : 'none'}). ` +
|
|
114
|
+
`Pane appears wedged. Last pane content:\n` +
|
|
115
|
+
_formatPaneTail(lastPane),
|
|
116
|
+
);
|
|
117
|
+
err.code = timeoutCode;
|
|
118
|
+
err.lastPane = lastPane;
|
|
119
|
+
err.matchedTriggers = matchedTriggers;
|
|
120
|
+
err.reason = 'stall';
|
|
121
|
+
throw err;
|
|
122
|
+
}
|
|
83
123
|
let pane;
|
|
84
124
|
try {
|
|
85
125
|
pane = await runner.captureWide(tmuxName);
|
|
@@ -107,6 +147,10 @@ async function runStartupGate({
|
|
|
107
147
|
await new Promise(r => setTimeout(r, settleMs));
|
|
108
148
|
continue;
|
|
109
149
|
}
|
|
150
|
+
// Progress signal: any change in pane content is activity → reset the
|
|
151
|
+
// stall clock. A captureWide that returns the SAME bytes is NOT
|
|
152
|
+
// activity (a frozen download bar at 24% reads identically each poll).
|
|
153
|
+
if (pane !== lastPane) lastActivityAt = Date.now();
|
|
110
154
|
lastPane = pane;
|
|
111
155
|
|
|
112
156
|
// Walk triggers in declaration order — first match (and not yet seen) wins
|
|
@@ -122,6 +166,10 @@ async function runStartupGate({
|
|
|
122
166
|
seen.add(trigger.name);
|
|
123
167
|
matchedTriggers.push(trigger.name);
|
|
124
168
|
matched = true;
|
|
169
|
+
// Sending a key is activity — navigating the TUI counts as progress
|
|
170
|
+
// even if the pre-transition pane text was static (e.g. a dialog we
|
|
171
|
+
// just answered). Reset the stall clock so we don't fail mid-nav.
|
|
172
|
+
lastActivityAt = Date.now();
|
|
125
173
|
// Settle window so the TUI transitions out of the dialog before next poll
|
|
126
174
|
await new Promise(r => setTimeout(r, settleMs));
|
|
127
175
|
break;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "polygram",
|
|
3
|
-
"version": "0.12.0-rc.
|
|
3
|
+
"version": "0.12.0-rc.4",
|
|
4
4
|
"description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
|
|
5
5
|
"main": "lib/ipc/client.js",
|
|
6
6
|
"bin": {
|
package/polygram.js
CHANGED
|
@@ -51,7 +51,6 @@ const { extractAssistantText } = require('./lib/process/sdk-process');
|
|
|
51
51
|
const { createChannelsToolDispatcher } = require('./lib/process/channels-tool-dispatcher');
|
|
52
52
|
const { createTmuxRunner } = require('./lib/tmux/tmux-runner');
|
|
53
53
|
const { sweepTmuxOrphans } = require('./lib/tmux/orphan-sweep');
|
|
54
|
-
const { PollScheduler } = require('./lib/tmux/poll-scheduler');
|
|
55
54
|
// rc.42: autosteer-buffer module deleted. Native SDK priority push
|
|
56
55
|
// (pm.injectUserMessage) replaces the buffer + PostToolBatch detour.
|
|
57
56
|
const { createAutosteeredRefs } = require('./lib/autosteered-refs');
|
|
@@ -2244,19 +2243,13 @@ async function main() {
|
|
|
2244
2243
|
const binCheck = verifyPinnedClaudeBin(CLAUDE_CLI_PINNED_VERSION);
|
|
2245
2244
|
if (binCheck.ok) {
|
|
2246
2245
|
console.log(
|
|
2247
|
-
`[polygram]
|
|
2246
|
+
`[polygram] CliProcess pinned to claude CLI v${CLAUDE_CLI_PINNED_VERSION}: ${binCheck.path}`,
|
|
2248
2247
|
);
|
|
2249
2248
|
pinnedClaudeBin = binCheck.path;
|
|
2250
2249
|
} else {
|
|
2251
2250
|
console.warn(`[polygram] WARNING: ${binCheck.reason}`);
|
|
2252
2251
|
}
|
|
2253
2252
|
}
|
|
2254
|
-
// O1 optimization: shared poll-tick scheduler. N TmuxProcess
|
|
2255
|
-
// instances share ONE setInterval instead of spawning N independent
|
|
2256
|
-
// setTimeout chains. Idle when no chats are in flight (zero timers
|
|
2257
|
-
// running). Configurable via config.bot.tmuxPollIntervalMs.
|
|
2258
|
-
const tmuxPollIntervalMs = config.bot?.tmuxPollIntervalMs || 250;
|
|
2259
|
-
const pollScheduler = new PollScheduler({ intervalMs: tmuxPollIntervalMs });
|
|
2260
2253
|
// 0.11.0: channels backend wiring. Used when a chat opts in via
|
|
2261
2254
|
// `pm: 'channels'` config. Falls back to SDK gracefully if the pinned
|
|
2262
2255
|
// claude binary isn't present (see factory.js — channelsClaudeBin
|
|
@@ -2282,7 +2275,6 @@ async function main() {
|
|
|
2282
2275
|
logger: console,
|
|
2283
2276
|
tmuxRunner,
|
|
2284
2277
|
botName: BOT_NAME,
|
|
2285
|
-
pollScheduler,
|
|
2286
2278
|
// channels backend
|
|
2287
2279
|
toolDispatcher: channelsToolDispatcher,
|
|
2288
2280
|
channelsClaudeBin,
|