polygram 0.10.0-rc.40 → 0.10.0-rc.42
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/lib/process/hook-event-tail.js +16 -5
- package/lib/process/tmux-process.js +263 -28
- package/lib/process-manager.js +21 -0
- package/lib/sdk/callbacks.js +61 -0
- package/package.json +1 -1
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"$schema": "https://anthropic.com/claude-code/plugin.schema.json",
|
|
3
3
|
"name": "polygram",
|
|
4
|
-
"version": "0.10.0-rc.
|
|
4
|
+
"version": "0.10.0-rc.42",
|
|
5
5
|
"description": "Telegram integration for Claude Code that preserves the OpenClaw per-chat session model. Migration target for OpenClaw users. Multi-bot, multi-chat, per-topic isolation; SQLite transcripts; inline-keyboard approvals. Bundles /polygram:status|logs|pair-code|approvals admin commands plus history (transcript queries) and polygram-send (out-of-turn IPC sends with file-upload validation) skills.",
|
|
6
6
|
"keywords": [
|
|
7
7
|
"telegram",
|
|
@@ -121,15 +121,26 @@ function pipeHookParser(tail) {
|
|
|
121
121
|
|
|
122
122
|
/**
|
|
123
123
|
* One-shot helper: build a LogTail at the given path with the
|
|
124
|
-
* H1-typical config (watch mode,
|
|
125
|
-
*
|
|
126
|
-
*
|
|
124
|
+
* H1-typical config (watch mode), wire the hook parser, and return
|
|
125
|
+
* it. Caller calls `.start()` and `.on('event', ...)`.
|
|
126
|
+
*
|
|
127
|
+
* `skipExisting`:
|
|
128
|
+
* - false (default) for a FRESH spawn — the ndjson was just
|
|
129
|
+
* touched at start time and is empty, so any future write IS a
|
|
130
|
+
* new event.
|
|
131
|
+
* - true for a `--resume` spawn — `writeHookFiles` uses 'a' mode
|
|
132
|
+
* (append) and never truncates, so the prior session's hook
|
|
133
|
+
* events are still on disk. Without skipExisting they replay
|
|
134
|
+
* into the fresh process, arming a Stop synth against the
|
|
135
|
+
* fresh turn (H4) and heartbeating it (H3) from stale events.
|
|
136
|
+
* rc.42 #5 (review-driven): mirror what `_armSessionLogTail`
|
|
137
|
+
* already does for the JSONL tail.
|
|
127
138
|
*/
|
|
128
|
-
function createHookTail({ path: filePath, logger = console } = {}) {
|
|
139
|
+
function createHookTail({ path: filePath, skipExisting = false, logger = console } = {}) {
|
|
129
140
|
const tail = new LogTail({
|
|
130
141
|
path: filePath,
|
|
131
142
|
intervalMs: 50,
|
|
132
|
-
skipExisting
|
|
143
|
+
skipExisting,
|
|
133
144
|
useWatch: 'auto',
|
|
134
145
|
logger,
|
|
135
146
|
});
|
|
@@ -202,6 +202,14 @@ const DEFAULT_QUIESCE_MS = 500; // require READY for this long before decl
|
|
|
202
202
|
// wedge-detection delay is bounded.
|
|
203
203
|
const DEFAULT_HARD_BACKSTOP_MS = 4 * 60 * 60_000; // 4 hours
|
|
204
204
|
const IDLE_POLL_INTERVAL_MS = 30_000; // 30 s
|
|
205
|
+
// 0.10.0 H4 — `Stop` hook as authoritative turn-done.
|
|
206
|
+
// The Stop hook fires when claude finishes responding — same
|
|
207
|
+
// semantic as the JSONL `result` event. Both should land within
|
|
208
|
+
// ms of each other; this grace gives JSONL a chance to win (full
|
|
209
|
+
// result data: subtype, stopReason, all the metadata) before the
|
|
210
|
+
// Stop hook synthesizes a fallback settle. If the JSONL stream is
|
|
211
|
+
// broken or stuck, Stop carries the turn to completion alone.
|
|
212
|
+
const DEFAULT_STOP_GRACE_MS = 2_000; // 2 s
|
|
205
213
|
|
|
206
214
|
// B8 (slow-MCP readiness): how long the claude `--debug-file` log must
|
|
207
215
|
// have had NO new bytes appended before the startup is considered
|
|
@@ -251,6 +259,7 @@ class TmuxProcess extends Process {
|
|
|
251
259
|
readyTimeoutMs = DEFAULT_READY_TIMEOUT_MS,
|
|
252
260
|
turnTimeoutMs = DEFAULT_TURN_TIMEOUT_MS,
|
|
253
261
|
hardBackstopMs = DEFAULT_HARD_BACKSTOP_MS,
|
|
262
|
+
stopGraceMs = DEFAULT_STOP_GRACE_MS,
|
|
254
263
|
pollMs = DEFAULT_POLL_MS,
|
|
255
264
|
quiesceMs = DEFAULT_QUIESCE_MS,
|
|
256
265
|
lateGraceMs = 1500,
|
|
@@ -293,8 +302,25 @@ class TmuxProcess extends Process {
|
|
|
293
302
|
|
|
294
303
|
// Tunables
|
|
295
304
|
this.readyTimeoutMs = readyTimeoutMs;
|
|
305
|
+
// rc.42 #7 (review-driven): validate timer config at construction
|
|
306
|
+
// so a misconfigured process fails loud here instead of silently
|
|
307
|
+
// mid-turn (NaN → setInterval ≈1 ms spin; 0/negative → instant
|
|
308
|
+
// idle-timeout).
|
|
309
|
+
for (const [name, v] of [
|
|
310
|
+
['turnTimeoutMs', turnTimeoutMs],
|
|
311
|
+
['hardBackstopMs', hardBackstopMs],
|
|
312
|
+
['stopGraceMs', stopGraceMs],
|
|
313
|
+
]) {
|
|
314
|
+
if (!Number.isFinite(v) || v < 0) {
|
|
315
|
+
throw Object.assign(
|
|
316
|
+
new TypeError(`TmuxProcess: ${name} must be a finite non-negative number (got ${v})`),
|
|
317
|
+
{ code: 'TMUX_INVALID_TIMEOUT_CONFIG', field: name, value: v },
|
|
318
|
+
);
|
|
319
|
+
}
|
|
320
|
+
}
|
|
296
321
|
this.turnTimeoutMs = turnTimeoutMs;
|
|
297
322
|
this.hardBackstopMs = hardBackstopMs;
|
|
323
|
+
this.stopGraceMs = stopGraceMs;
|
|
298
324
|
this.pollMs = pollMs;
|
|
299
325
|
this.quiesceMs = quiesceMs;
|
|
300
326
|
this.readyDebugQuietMs = readyDebugQuietMs;
|
|
@@ -635,8 +661,10 @@ class TmuxProcess extends Process {
|
|
|
635
661
|
this._armSessionLogTail({ resuming: Boolean(ctx.existingSessionId) });
|
|
636
662
|
// H1 — same-pattern hook tail. Only arm when the settings
|
|
637
663
|
// write succeeded above (otherwise there's nothing to tail).
|
|
664
|
+
// rc.42 #5: on `--resume`, pass skipExisting through so
|
|
665
|
+
// prior-process hook events aren't replayed into this turn.
|
|
638
666
|
if (this._hookNdjsonPath) {
|
|
639
|
-
this._armHookTail();
|
|
667
|
+
this._armHookTail({ resuming: Boolean(ctx.existingSessionId) });
|
|
640
668
|
}
|
|
641
669
|
|
|
642
670
|
// G6 — block until TUI is responsive.
|
|
@@ -767,7 +795,22 @@ class TmuxProcess extends Process {
|
|
|
767
795
|
this.inFlight = true;
|
|
768
796
|
turn.state = 'pasted';
|
|
769
797
|
turn.startedAt = this._now();
|
|
770
|
-
|
|
798
|
+
// rc.42 #7 (review-driven): validate the resolved turnTimeoutMs.
|
|
799
|
+
// NaN would coerce setInterval cadence to ≈1 ms (spin-loop);
|
|
800
|
+
// 0 or negative would trip the idle-ceiling on the first poll.
|
|
801
|
+
// Neither is reachable via current defaults, but a config override
|
|
802
|
+
// can produce them. Fall back to the instance default (already
|
|
803
|
+
// validated as a finite positive number at construction time).
|
|
804
|
+
const rawTimeoutMs = turn.opts.timeoutMs || this.turnTimeoutMs;
|
|
805
|
+
const turnTimeoutMs = (Number.isFinite(rawTimeoutMs) && rawTimeoutMs > 0)
|
|
806
|
+
? rawTimeoutMs
|
|
807
|
+
: this.turnTimeoutMs;
|
|
808
|
+
if (turnTimeoutMs !== rawTimeoutMs) {
|
|
809
|
+
this.logger.warn?.(
|
|
810
|
+
`[${this.label}] invalid turn timeoutMs (${rawTimeoutMs}); `
|
|
811
|
+
+ `falling back to ${turnTimeoutMs} ms`,
|
|
812
|
+
);
|
|
813
|
+
}
|
|
771
814
|
// Internal turn-done signal — settled by _flushActiveGroup when
|
|
772
815
|
// this turn's group is flushed on a terminal `result`.
|
|
773
816
|
turn.resultPromise = new Promise((resolve) => { turn.settleResult = resolve; });
|
|
@@ -835,15 +878,46 @@ class TmuxProcess extends Process {
|
|
|
835
878
|
// outstanding (subsumes B10 — capture can no
|
|
836
879
|
// longer settle a turn mid-subagent, so the old
|
|
837
880
|
// nested re-wait is unnecessary)
|
|
838
|
-
// - timeout :
|
|
839
|
-
//
|
|
881
|
+
// - timeout : EITHER idle-ceiling poller (#5a) OR
|
|
882
|
+
// hard-backstop setTimeout (#5b) — see H3
|
|
883
|
+
// in `_awaitSettle`. The `reason` field on
|
|
884
|
+
// the outcome carries which racer fired so
|
|
885
|
+
// operators can distinguish a wedged-silent
|
|
886
|
+
// turn (idle-ceiling) from a 4-hour runaway
|
|
887
|
+
// tool loop (hard-backstop).
|
|
840
888
|
const outcome = await this._awaitSettle(turn, { turnTimeoutMs, confirmP });
|
|
841
889
|
|
|
842
890
|
if (outcome.kind === 'submit-fail') throw outcome.err;
|
|
843
891
|
if (outcome.kind === 'timeout') {
|
|
892
|
+
// rc.42 #1 (review-driven): thread the racer-specific
|
|
893
|
+
// `reason` + observed `idleMs` onto the thrown Error AND
|
|
894
|
+
// emit a `turn-timeout` event (mirrors sdk-process.js's
|
|
895
|
+
// pattern at line 532) so the events DB records WHICH
|
|
896
|
+
// racer fired. Pre-rc.42 the diagnostic value of H3 was
|
|
897
|
+
// silently dropped — operators couldn't distinguish a
|
|
898
|
+
// wedged-silent subagent (idle-ceiling) from a 4-hour
|
|
899
|
+
// runaway tool loop (hard-backstop).
|
|
900
|
+
this.logger.warn?.(
|
|
901
|
+
`[${this.label}] turn timeout (${outcome.reason || 'unknown'}`
|
|
902
|
+
+ `${outcome.idleMs != null ? `, idle ${Math.round(outcome.idleMs)} ms` : ''})`,
|
|
903
|
+
);
|
|
904
|
+
this.emit('turn-timeout', {
|
|
905
|
+
turnId: turn.turnId,
|
|
906
|
+
reason: outcome.reason || null,
|
|
907
|
+
idleMs: outcome.idleMs ?? null,
|
|
908
|
+
turnTimeoutMs,
|
|
909
|
+
hardBackstopMs: this.hardBackstopMs,
|
|
910
|
+
sessionId: this.claudeSessionId,
|
|
911
|
+
backend: 'tmux',
|
|
912
|
+
});
|
|
844
913
|
throw Object.assign(
|
|
845
914
|
new Error('TmuxProcess: turn did not complete in time'),
|
|
846
|
-
{
|
|
915
|
+
{
|
|
916
|
+
code: 'TMUX_TURN_TIMEOUT',
|
|
917
|
+
tmuxName: this.tmuxName,
|
|
918
|
+
reason: outcome.reason || null,
|
|
919
|
+
idleMs: outcome.idleMs ?? null,
|
|
920
|
+
},
|
|
847
921
|
);
|
|
848
922
|
}
|
|
849
923
|
|
|
@@ -867,6 +941,20 @@ class TmuxProcess extends Process {
|
|
|
867
941
|
resultSubtype = outcome.ev.subtype || 'success';
|
|
868
942
|
stopReason = outcome.ev.stopReason || null;
|
|
869
943
|
if (outcome.ev.sessionId) this.claudeSessionId = outcome.ev.sessionId;
|
|
944
|
+
// rc.42 #15 (review-driven): if the settle came via the H4
|
|
945
|
+
// Stop-hook synth (not the JSONL `result`), surface that
|
|
946
|
+
// distinction. Track it on `resolvedVia` so the result event
|
|
947
|
+
// downstream consumers see the provenance, and emit a
|
|
948
|
+
// `stop-hook-resolved` event for forensic count of how often
|
|
949
|
+
// Stop actually rescued a JSONL-stuck turn.
|
|
950
|
+
if (outcome.ev.via === 'stop-hook') {
|
|
951
|
+
resolvedVia = 'stop-hook';
|
|
952
|
+
this.emit('stop-hook-resolved', {
|
|
953
|
+
turnId: turn.turnId,
|
|
954
|
+
sessionId: this.claudeSessionId,
|
|
955
|
+
backend: 'tmux',
|
|
956
|
+
});
|
|
957
|
+
}
|
|
870
958
|
// R10: a genuinely-empty terminal `result` — end_turn, no
|
|
871
959
|
// reply text, AND no tool ran this turn — is the agent
|
|
872
960
|
// producing literally nothing (a thinking-only terminal
|
|
@@ -1038,7 +1126,12 @@ class TmuxProcess extends Process {
|
|
|
1038
1126
|
* TMUX_SUBMIT_FAILED (B7)
|
|
1039
1127
|
* { kind: 'quiesced' } — capture-pane idle AND the predicate
|
|
1040
1128
|
* says it is SAFE to conclude
|
|
1041
|
-
* { kind: 'timeout'
|
|
1129
|
+
* { kind: 'timeout',
|
|
1130
|
+
* reason: 'idle-ceiling' — H3 idle-poller (#5a)
|
|
1131
|
+
* | 'hard-backstop' — H3 absolute backstop (#5b)
|
|
1132
|
+
* | 'idle-poller-error' — defensive: throw inside the
|
|
1133
|
+
* idle-poller callback (rc.42 #3)
|
|
1134
|
+
* idleMs? } — observed idle for idle-ceiling
|
|
1042
1135
|
*
|
|
1043
1136
|
* The structural win over the old race:
|
|
1044
1137
|
* - B7 gate: capture quiescence is ignored until
|
|
@@ -1139,9 +1232,21 @@ class TmuxProcess extends Process {
|
|
|
1139
1232
|
Math.min(IDLE_POLL_INTERVAL_MS, Math.floor(turnTimeoutMs / 4)),
|
|
1140
1233
|
);
|
|
1141
1234
|
idlePoller = setInterval(() => {
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1235
|
+
// rc.42 #3 (review-driven): try/catch around the body.
|
|
1236
|
+
// Adaptive poll cadence can fire as often as every 50 ms;
|
|
1237
|
+
// a repeating throw here would trip process-guard's
|
|
1238
|
+
// 100-in-5s sliding-window panicExit(2) and kill the
|
|
1239
|
+
// daemon. Catch + finish-on-error fails fast instead.
|
|
1240
|
+
try {
|
|
1241
|
+
const idleMs = this._now() - turn.lastActivityAt;
|
|
1242
|
+
if (idleMs >= turnTimeoutMs) {
|
|
1243
|
+
finish({ kind: 'timeout', reason: 'idle-ceiling', idleMs });
|
|
1244
|
+
}
|
|
1245
|
+
} catch (err) {
|
|
1246
|
+
this.logger.warn?.(
|
|
1247
|
+
`[${this.label}] idle-poller error: ${err.message}`,
|
|
1248
|
+
);
|
|
1249
|
+
finish({ kind: 'timeout', reason: 'idle-poller-error' });
|
|
1145
1250
|
}
|
|
1146
1251
|
}, pollIntervalMs);
|
|
1147
1252
|
idlePoller.unref?.();
|
|
@@ -1154,10 +1259,19 @@ class TmuxProcess extends Process {
|
|
|
1154
1259
|
// from turn start.
|
|
1155
1260
|
const backstopRemaining = Math.max(
|
|
1156
1261
|
0, (turn.startedAt + this.hardBackstopMs) - this._now());
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1262
|
+
// rc.42 #3 (review-driven): try/catch in the one-shot
|
|
1263
|
+
// setTimeout callback. Symmetric with the idle-poller +
|
|
1264
|
+
// Stop-synth fixes — protects against any future change
|
|
1265
|
+
// that introduces a throw surface here.
|
|
1266
|
+
hardBackstopTimer = setTimeout(() => {
|
|
1267
|
+
try {
|
|
1268
|
+
finish({ kind: 'timeout', reason: 'hard-backstop' });
|
|
1269
|
+
} catch (err) {
|
|
1270
|
+
this.logger.warn?.(
|
|
1271
|
+
`[${this.label}] hard-backstop error: ${err.message}`,
|
|
1272
|
+
);
|
|
1273
|
+
}
|
|
1274
|
+
}, backstopRemaining);
|
|
1161
1275
|
hardBackstopTimer.unref?.();
|
|
1162
1276
|
});
|
|
1163
1277
|
}
|
|
@@ -1178,6 +1292,15 @@ class TmuxProcess extends Process {
|
|
|
1178
1292
|
// rejects in `_runTurn`, never resolving `resultPromise`) would
|
|
1179
1293
|
// otherwise leave a dangling Map entry. Defensive + cheap.
|
|
1180
1294
|
if (turn?.token) this._submitConfirms.delete(turn.token);
|
|
1295
|
+
// rc.42 #6 (review-driven): clear the H4 Stop-synth setTimeout
|
|
1296
|
+
// when the turn retires through any other path. Idempotency
|
|
1297
|
+
// makes a post-retirement settleResult call harmless, but the
|
|
1298
|
+
// timer handle would otherwise sit in the event loop for up to
|
|
1299
|
+
// `stopGraceMs` past the turn's death.
|
|
1300
|
+
if (turn?._stopSynthTimer) {
|
|
1301
|
+
clearTimeout(turn._stopSynthTimer);
|
|
1302
|
+
turn._stopSynthTimer = null;
|
|
1303
|
+
}
|
|
1181
1304
|
const qi = this.pendingQueue.indexOf(turn);
|
|
1182
1305
|
if (qi >= 0) this.pendingQueue.splice(qi, 1);
|
|
1183
1306
|
this._dropFromActiveGroup(turn);
|
|
@@ -1585,23 +1708,43 @@ class TmuxProcess extends Process {
|
|
|
1585
1708
|
*
|
|
1586
1709
|
* See docs/0.10.0-tmux-hook-observability.md.
|
|
1587
1710
|
*/
|
|
1588
|
-
_armHookTail() {
|
|
1711
|
+
_armHookTail({ resuming = false } = {}) {
|
|
1589
1712
|
if (this._hookTail) return; // idempotent
|
|
1590
1713
|
if (!this._hookNdjsonPath) {
|
|
1591
1714
|
this.logger.warn?.(`[${this.label}] _armHookTail: no ndjson path, skipping`);
|
|
1592
1715
|
return;
|
|
1593
1716
|
}
|
|
1594
|
-
|
|
1717
|
+
// rc.42 #5 (review-driven): on `--resume`, the per-session hook
|
|
1718
|
+
// ndjson kept by `writeHookFiles` (opened in append mode) still
|
|
1719
|
+
// carries the prior process's events. `skipExisting:true`
|
|
1720
|
+
// mirrors `_armSessionLogTail`'s handling so historic Stop
|
|
1721
|
+
// events don't replay into the fresh turn (would arm a synth
|
|
1722
|
+
// settle on a freshly-pasted prompt with stale text) and stale
|
|
1723
|
+
// heartbeats don't reset the new turn's idle clock.
|
|
1724
|
+
const tail = createHookTail({
|
|
1725
|
+
path: this._hookNdjsonPath,
|
|
1726
|
+
skipExisting: resuming,
|
|
1727
|
+
logger: this.logger,
|
|
1728
|
+
});
|
|
1595
1729
|
tail.on('event', (ev) => this._handleHookEvent(ev));
|
|
1596
1730
|
tail.on('error', (err) => {
|
|
1597
1731
|
this.logger.warn?.(`[${this.label}] hook-tail error: ${err.message}`);
|
|
1732
|
+
// rc.42 #8 (review-driven): make the tail-degradation
|
|
1733
|
+
// observable so msg-884-shaped silent regressions surface in
|
|
1734
|
+
// the events DB instead of just the daemon log.
|
|
1735
|
+
this.emit('hook-tail-error', {
|
|
1736
|
+
message: err.message,
|
|
1737
|
+
path: this._hookNdjsonPath,
|
|
1738
|
+
sessionId: this.claudeSessionId,
|
|
1739
|
+
backend: 'tmux',
|
|
1740
|
+
});
|
|
1598
1741
|
});
|
|
1599
1742
|
tail.start();
|
|
1600
1743
|
this._hookTail = tail;
|
|
1601
1744
|
}
|
|
1602
1745
|
|
|
1603
1746
|
/**
|
|
1604
|
-
* Hook-event handler.
|
|
1747
|
+
* Hook-event handler. Four roles, layered over time:
|
|
1605
1748
|
*
|
|
1606
1749
|
* H1 (rc.36) — emit `hook-event` so polygram persists each event
|
|
1607
1750
|
* to the events DB; observer-only.
|
|
@@ -1615,25 +1758,107 @@ class TmuxProcess extends Process {
|
|
|
1615
1758
|
* the structural fix for the msg-884 incident (49-min
|
|
1616
1759
|
* SoundCloud subagent killed at the 30-min wall-clock while
|
|
1617
1760
|
* demonstrably alive).
|
|
1618
|
-
*
|
|
1619
|
-
*
|
|
1761
|
+
* H4 (rc.41) — `Stop` hook is an authoritative turn-done signal.
|
|
1762
|
+
* If JSONL `result` doesn't fire within `stopGraceMs`,
|
|
1763
|
+
* synthesize a settle from the Stop payload so a broken or
|
|
1764
|
+
* stuck JSONL stream can't strand a finished turn. Promise-
|
|
1765
|
+
* resolve idempotency means JSONL still wins when both fire.
|
|
1620
1766
|
*
|
|
1621
1767
|
* Parse errors and unknown event shapes are intentionally still
|
|
1622
1768
|
* forwarded — observer-only metrics for stream-reliability soak.
|
|
1623
1769
|
*/
|
|
1624
1770
|
_handleHookEvent(ev) {
|
|
1625
|
-
//
|
|
1626
|
-
//
|
|
1627
|
-
//
|
|
1628
|
-
//
|
|
1629
|
-
//
|
|
1630
|
-
|
|
1631
|
-
|
|
1632
|
-
|
|
1633
|
-
|
|
1771
|
+
// rc.42 #2 (review-driven): wrap the whole body in try/catch.
|
|
1772
|
+
// pipeHookParser emits 'event' synchronously inside LogTail's
|
|
1773
|
+
// `for (const line of parts)` loop in _readNew; a throw here
|
|
1774
|
+
// would propagate back into that loop AFTER _offset is already
|
|
1775
|
+
// advanced past the unread lines, silently dropping every
|
|
1776
|
+
// remaining line in the batch. With H3 making hook events
|
|
1777
|
+
// load-bearing for liveness, lost events cause false idle
|
|
1778
|
+
// timeouts. Catch + warn + continue keeps the rest of the
|
|
1779
|
+
// batch flowing.
|
|
1780
|
+
try {
|
|
1781
|
+
// H3: every hook event (except the diagnostic types) is liveness
|
|
1782
|
+
// evidence. Heartbeat every turn we can identify as in-flight so
|
|
1783
|
+
// the idle-ceiling poller resets. We don't differentiate by event
|
|
1784
|
+
// type — even Notification or UserPromptSubmit prove claude is
|
|
1785
|
+
// active in this session.
|
|
1786
|
+
//
|
|
1787
|
+
// Two scopes are searched (deduped via Set): active group turns
|
|
1788
|
+
// (the steady state once `user-message` has landed) AND the
|
|
1789
|
+
// pendingQueue head (the PRE-active window between turn start
|
|
1790
|
+
// and the first `user-message`). Hook events can fire in either
|
|
1791
|
+
// window — e.g. `UserPromptSubmit` arrives just after claude
|
|
1792
|
+
// receives the paste but BEFORE the `user-message` is echoed
|
|
1793
|
+
// back into the JSONL. Without the pendingQueue fallback, that
|
|
1794
|
+
// window leaves the turn un-heartbeated and the idle poller
|
|
1795
|
+
// could fire on a turn that's actively starting up.
|
|
1796
|
+
if (ev?.type && ev.type !== 'parse-error' && ev.type !== 'unknown') {
|
|
1797
|
+
const turns = new Set(this._activeGroup?.turns || []);
|
|
1798
|
+
const head = this.pendingQueue[0];
|
|
1799
|
+
if (head) turns.add(head);
|
|
1800
|
+
for (const t of turns) {
|
|
1801
|
+
this._heartbeat(t, `hook:${ev.type}`);
|
|
1802
|
+
}
|
|
1634
1803
|
}
|
|
1804
|
+
// H4: Stop hook → synthesize a settle for the primary turn after
|
|
1805
|
+
// a grace, so JSONL `result` (which carries richer metadata)
|
|
1806
|
+
// wins when both fire. If JSONL never arrives — broken stream,
|
|
1807
|
+
// stuck parser — the Stop synth settles the turn instead of
|
|
1808
|
+
// stranding it. Idempotent: a later JSONL settleResult call is
|
|
1809
|
+
// a no-op once the promise has resolved.
|
|
1810
|
+
//
|
|
1811
|
+
// rc.42 #6 (review-driven): per-primary `_stopSynthScheduled`
|
|
1812
|
+
// guard + stored timer ref so kill()/`_finishTurn` can clear
|
|
1813
|
+
// the pending synth. Without these, repeated Stop events
|
|
1814
|
+
// accumulate N independent timers (rare in production, but a
|
|
1815
|
+
// possible memory leak), and a synth scheduled against a
|
|
1816
|
+
// primary that retires via another path (timeout, interrupt)
|
|
1817
|
+
// fires post-mortem against a freed promise. Idempotency
|
|
1818
|
+
// makes both harmless TODAY; defensive future-proofing.
|
|
1819
|
+
if (ev?.type === 'Stop') {
|
|
1820
|
+
const primary = (this._activeGroup?.turns || [])
|
|
1821
|
+
.find((t) => t.kind === 'primary');
|
|
1822
|
+
if (primary
|
|
1823
|
+
&& typeof primary.settleResult === 'function'
|
|
1824
|
+
&& !primary._stopSynthScheduled) {
|
|
1825
|
+
const synth = {
|
|
1826
|
+
text: primary.text || ev.lastAssistantMessage || '',
|
|
1827
|
+
subtype: 'success',
|
|
1828
|
+
stopReason: 'stop_hook',
|
|
1829
|
+
sessionId: this.claudeSessionId,
|
|
1830
|
+
via: 'stop-hook',
|
|
1831
|
+
};
|
|
1832
|
+
primary._stopSynthScheduled = true;
|
|
1833
|
+
// rc.42 #3 (review-driven): try/catch in the timer callback.
|
|
1834
|
+
// settleResult is a Promise resolver (cannot throw under
|
|
1835
|
+
// current spec), but a future refactor where settleResult
|
|
1836
|
+
// gates on instance state could; the surrounding setTimeout
|
|
1837
|
+
// has no recovery path otherwise.
|
|
1838
|
+
primary._stopSynthTimer = setTimeout(() => {
|
|
1839
|
+
try {
|
|
1840
|
+
// Recheck the turn is still in a state where the synth
|
|
1841
|
+
// is meaningful — if `_finishTurn` already retired it,
|
|
1842
|
+
// settleResult is idempotent but skipping is cleaner.
|
|
1843
|
+
if (typeof primary.settleResult === 'function') {
|
|
1844
|
+
primary.settleResult(synth);
|
|
1845
|
+
}
|
|
1846
|
+
} catch (err) {
|
|
1847
|
+
this.logger.warn?.(
|
|
1848
|
+
`[${this.label}] Stop-synth settle error: ${err.message}`,
|
|
1849
|
+
);
|
|
1850
|
+
}
|
|
1851
|
+
}, this.stopGraceMs);
|
|
1852
|
+
primary._stopSynthTimer.unref?.();
|
|
1853
|
+
}
|
|
1854
|
+
}
|
|
1855
|
+
this.emit('hook-event', ev);
|
|
1856
|
+
} catch (err) {
|
|
1857
|
+
this.logger.warn?.(
|
|
1858
|
+
`[${this.label}] _handleHookEvent error (${ev?.type || 'unknown'}): `
|
|
1859
|
+
+ `${err.message}`,
|
|
1860
|
+
);
|
|
1635
1861
|
}
|
|
1636
|
-
this.emit('hook-event', ev);
|
|
1637
1862
|
}
|
|
1638
1863
|
|
|
1639
1864
|
_handleSessionEvent(ev) {
|
|
@@ -2959,6 +3184,16 @@ class TmuxProcess extends Process {
|
|
|
2959
3184
|
for (const finish of [...this._submitConfirms.values()]) {
|
|
2960
3185
|
try { finish(); } catch { /* swallow */ }
|
|
2961
3186
|
}
|
|
3187
|
+
// rc.42 #6 (review-driven): drop pending H4 Stop-synth timers
|
|
3188
|
+
// across every turn the ledger still holds. Symmetric with the
|
|
3189
|
+
// _finishTurn cleanup — kill() bypasses _finishTurn for the
|
|
3190
|
+
// drainQueue'd turns, so do it here.
|
|
3191
|
+
for (const turn of this._ledger) {
|
|
3192
|
+
if (turn?._stopSynthTimer) {
|
|
3193
|
+
try { clearTimeout(turn._stopSynthTimer); } catch { /* swallow */ }
|
|
3194
|
+
turn._stopSynthTimer = null;
|
|
3195
|
+
}
|
|
3196
|
+
}
|
|
2962
3197
|
if (this._sessionLogTail) {
|
|
2963
3198
|
try { this._sessionLogTail.close(); } catch { /* swallow */ }
|
|
2964
3199
|
this._sessionLogTail = null;
|
package/lib/process-manager.js
CHANGED
|
@@ -98,6 +98,27 @@ const CALLBACK_TO_EVENT = {
|
|
|
98
98
|
// unification). SDK backend never emits — hooks are tmux-specific.
|
|
99
99
|
// See docs/0.10.0-tmux-hook-observability.md.
|
|
100
100
|
onHookEvent: 'hook-event',
|
|
101
|
+
// 0.10.0 rc.42 (review-driven #1): tmux backend turn-timeout event.
|
|
102
|
+
// Mirrors sdk-process.js's `_logEvent('turn-timeout', ...)` so both
|
|
103
|
+
// backends emit the same diagnostic. Payload distinguishes
|
|
104
|
+
// `idle-ceiling` vs `hard-backstop` (the H3 racers) so operators can
|
|
105
|
+
// tell a wedged-silent subagent from a runaway tool loop.
|
|
106
|
+
onTurnTimeout: 'turn-timeout',
|
|
107
|
+
// 0.10.0 rc.42 (review-driven #8): tmux backend hook-tail
|
|
108
|
+
// degradation event. The hook ndjson is load-bearing for H3 idle
|
|
109
|
+
// heartbeats; a persistently broken tail silently resurrects
|
|
110
|
+
// msg-884-class kills. Emitting the event surfaces the degradation
|
|
111
|
+
// in the events DB so it's visible in forensics, not just
|
|
112
|
+
// logger.warn.
|
|
113
|
+
onHookTailError: 'hook-tail-error',
|
|
114
|
+
// 0.10.0 rc.42 (review-driven #15): tmux backend stop-hook-resolved
|
|
115
|
+
// event. Fires when a turn settled via the H4 Stop-hook synth path
|
|
116
|
+
// instead of the canonical JSONL `result` (i.e. JSONL was broken or
|
|
117
|
+
// stuck and Stop rescued the turn). The synth's `via: 'stop-hook'`
|
|
118
|
+
// field was previously dead — only the tests read it. Persisting
|
|
119
|
+
// the event lets the soak count how often H4 actually fires its
|
|
120
|
+
// rescue contract.
|
|
121
|
+
onStopHookResolved: 'stop-hook-resolved',
|
|
101
122
|
};
|
|
102
123
|
|
|
103
124
|
class ProcessManager {
|
package/lib/sdk/callbacks.js
CHANGED
|
@@ -459,6 +459,67 @@ function createSdkCallbacks({
|
|
|
459
459
|
}
|
|
460
460
|
},
|
|
461
461
|
|
|
462
|
+
// 0.10.0 rc.42 #1: tmux backend turn-timeout observability.
|
|
463
|
+
// H3 introduced two timeout racers (idle-ceiling, hard-backstop)
|
|
464
|
+
// but their `reason`/`idleMs` were silently dropped at the throw
|
|
465
|
+
// site, so the events DB couldn't distinguish a wedged-silent
|
|
466
|
+
// subagent (msg-884 shape) from a 4-hour runaway tool loop. The
|
|
467
|
+
// handler persists the distinguisher.
|
|
468
|
+
onTurnTimeout: (sessionKey, payload /* , entry */) => {
|
|
469
|
+
try {
|
|
470
|
+
logEvent('turn-timeout', {
|
|
471
|
+
chat_id: getChatIdFromKey(sessionKey),
|
|
472
|
+
session_key: sessionKey,
|
|
473
|
+
backend: 'tmux',
|
|
474
|
+
turn_id: payload?.turnId ?? null,
|
|
475
|
+
reason: payload?.reason ?? null,
|
|
476
|
+
idle_ms: payload?.idleMs ?? null,
|
|
477
|
+
turn_timeout_ms: payload?.turnTimeoutMs ?? null,
|
|
478
|
+
hard_backstop_ms: payload?.hardBackstopMs ?? null,
|
|
479
|
+
claude_session_id: payload?.sessionId ?? null,
|
|
480
|
+
});
|
|
481
|
+
} catch (err) {
|
|
482
|
+
logger.error?.(`[${botName}] turn-timeout handler: ${err.message}`);
|
|
483
|
+
}
|
|
484
|
+
},
|
|
485
|
+
|
|
486
|
+
// 0.10.0 rc.42 #8: tmux backend hook-tail error observability.
|
|
487
|
+
// Persistent failures of the hook ndjson tail degrade H3 idle-
|
|
488
|
+
// ceiling accuracy and H4 Stop-synth coverage with no surface
|
|
489
|
+
// signal. Record one event per error so post-mortem can correlate
|
|
490
|
+
// unexpected idle-timeouts to a broken tail.
|
|
491
|
+
onHookTailError: (sessionKey, payload /* , entry */) => {
|
|
492
|
+
try {
|
|
493
|
+
logEvent('hook-tail-error', {
|
|
494
|
+
chat_id: getChatIdFromKey(sessionKey),
|
|
495
|
+
session_key: sessionKey,
|
|
496
|
+
backend: 'tmux',
|
|
497
|
+
message: (payload?.message || '').slice(0, 200),
|
|
498
|
+
path: payload?.path ?? null,
|
|
499
|
+
claude_session_id: payload?.sessionId ?? null,
|
|
500
|
+
});
|
|
501
|
+
} catch (err) {
|
|
502
|
+
logger.error?.(`[${botName}] hook-tail-error handler: ${err.message}`);
|
|
503
|
+
}
|
|
504
|
+
},
|
|
505
|
+
|
|
506
|
+
// 0.10.0 rc.42 #15: H4 Stop-hook synth fired and won the race
|
|
507
|
+
// against JSONL `result` (or JSONL never landed). Forensic count
|
|
508
|
+
// of how often Stop actually rescues a stuck JSONL stream.
|
|
509
|
+
onStopHookResolved: (sessionKey, payload /* , entry */) => {
|
|
510
|
+
try {
|
|
511
|
+
logEvent('stop-hook-resolved', {
|
|
512
|
+
chat_id: getChatIdFromKey(sessionKey),
|
|
513
|
+
session_key: sessionKey,
|
|
514
|
+
backend: 'tmux',
|
|
515
|
+
turn_id: payload?.turnId ?? null,
|
|
516
|
+
claude_session_id: payload?.sessionId ?? null,
|
|
517
|
+
});
|
|
518
|
+
} catch (err) {
|
|
519
|
+
logger.error?.(`[${botName}] stop-hook-resolved handler: ${err.message}`);
|
|
520
|
+
}
|
|
521
|
+
},
|
|
522
|
+
|
|
462
523
|
onInjectFail: (sessionKey, payload /* , entry */) => {
|
|
463
524
|
try {
|
|
464
525
|
const msgId = payload?.msgId;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "polygram",
|
|
3
|
-
"version": "0.10.0-rc.
|
|
3
|
+
"version": "0.10.0-rc.42",
|
|
4
4
|
"description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
|
|
5
5
|
"main": "lib/ipc/client.js",
|
|
6
6
|
"bin": {
|