polygram 0.10.0-rc.39 → 0.10.0-rc.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "$schema": "https://anthropic.com/claude-code/plugin.schema.json",
3
3
  "name": "polygram",
4
- "version": "0.10.0-rc.39",
4
+ "version": "0.10.0-rc.40",
5
5
  "description": "Telegram integration for Claude Code that preserves the OpenClaw per-chat session model. Migration target for OpenClaw users. Multi-bot, multi-chat, per-topic isolation; SQLite transcripts; inline-keyboard approvals. Bundles /polygram:status|logs|pair-code|approvals admin commands plus history (transcript queries) and polygram-send (out-of-turn IPC sends with file-upload validation) skills.",
6
6
  "keywords": [
7
7
  "telegram",
@@ -186,6 +186,22 @@ const DEFAULT_READY_TIMEOUT_MS = 120_000;
186
186
  const DEFAULT_TURN_TIMEOUT_MS = 5 * 60_000;
187
187
  const DEFAULT_POLL_MS = 250;
188
188
  const DEFAULT_QUIESCE_MS = 500; // require READY for this long before declaring done
189
+ // 0.10.0 H3 — hook-fed idle ceiling + hard backstop.
190
+ // `turnTimeoutMs` — IDLE ceiling (default 5 min). A turn is
191
+ // wedged only if there's no activity
192
+ // (JSONL events, capture-pane signals,
193
+ // OR hook events) for this long. A
194
+ // healthy long subagent firing hooks
195
+ // every few seconds never trips this.
196
+ // `hardBackstopMs` — absolute backstop against pathological
197
+ // infinite tool loops. Default 4h.
198
+ // `IDLE_POLL_INTERVAL_MS` — how often the idle poller in
199
+ // _awaitSettle wakes to check the
200
+ // accumulated idle. Coarse enough to be
201
+ // cheap, fine enough that the perceived
202
+ // wedge-detection delay is bounded.
203
+ const DEFAULT_HARD_BACKSTOP_MS = 4 * 60 * 60_000; // 4 hours
204
+ const IDLE_POLL_INTERVAL_MS = 30_000; // 30 s
189
205
 
190
206
  // B8 (slow-MCP readiness): how long the claude `--debug-file` log must
191
207
  // have had NO new bytes appended before the startup is considered
@@ -234,6 +250,7 @@ class TmuxProcess extends Process {
234
250
  sleepFn, nowFn,
235
251
  readyTimeoutMs = DEFAULT_READY_TIMEOUT_MS,
236
252
  turnTimeoutMs = DEFAULT_TURN_TIMEOUT_MS,
253
+ hardBackstopMs = DEFAULT_HARD_BACKSTOP_MS,
237
254
  pollMs = DEFAULT_POLL_MS,
238
255
  quiesceMs = DEFAULT_QUIESCE_MS,
239
256
  lateGraceMs = 1500,
@@ -277,6 +294,7 @@ class TmuxProcess extends Process {
277
294
  // Tunables
278
295
  this.readyTimeoutMs = readyTimeoutMs;
279
296
  this.turnTimeoutMs = turnTimeoutMs;
297
+ this.hardBackstopMs = hardBackstopMs;
280
298
  this.pollMs = pollMs;
281
299
  this.quiesceMs = quiesceMs;
282
300
  this.readyDebugQuietMs = readyDebugQuietMs;
@@ -1044,11 +1062,13 @@ class TmuxProcess extends Process {
1044
1062
  const abortP = new Promise((resolve) => { signalAbort = resolve; });
1045
1063
  return new Promise((resolve) => {
1046
1064
  let done = false;
1047
- let deadlineTimer = null;
1065
+ let idlePoller = null;
1066
+ let hardBackstopTimer = null;
1048
1067
  const finish = (outcome) => {
1049
1068
  if (done) return;
1050
1069
  done = true;
1051
- if (deadlineTimer) clearTimeout(deadlineTimer);
1070
+ if (idlePoller) clearInterval(idlePoller);
1071
+ if (hardBackstopTimer) clearTimeout(hardBackstopTimer);
1052
1072
  // Release the capture-pane poll loop (and, with a shared
1053
1073
  // PollScheduler, its refcount) even when a non-capture outcome
1054
1074
  // won — mirrors the old `finally { signalAbort() }`.
@@ -1076,7 +1096,7 @@ class TmuxProcess extends Process {
1076
1096
  try {
1077
1097
  buf = await this._awaitTurnComplete({ timeoutMs: turnTimeoutMs, abortP });
1078
1098
  } catch {
1079
- return; // capture's own timeout — the W1 deadline (#5) settles
1099
+ return; // capture's own timeout — the idle poller (#5a) settles
1080
1100
  }
1081
1101
  if (buf === ABORT_SENTINEL) return; // released by another outcome
1082
1102
  // B7 gate: a paste that never submitted leaves the pane idle
@@ -1088,20 +1108,57 @@ class TmuxProcess extends Process {
1088
1108
  if (turn.token && !turn.submitConfirmed) return;
1089
1109
  // B10 gate: a tool or subagent is in flight — the main pane is
1090
1110
  // quiescent because the agent is WORKING, not done. Ignore
1091
- // capture; settle via JSONL `result` (or W1) when the work
1092
- // returns.
1111
+ // capture; settle via JSONL `result` (or the idle/backstop
1112
+ // racers below) when the work returns.
1093
1113
  if (turn.outstandingTools.size > 0
1094
1114
  || turn.outstandingSubagents.size > 0) return;
1095
1115
  finish({ kind: 'quiesced' });
1096
1116
  })();
1097
1117
 
1098
- // 5. W1 absolute deadline one timer, not a racer. `unref` so
1099
- // it never keeps the process alive on its own.
1100
- deadlineTimer = setTimeout(
1101
- () => finish({ kind: 'timeout' }),
1102
- turnTimeoutMs,
1118
+ // 5a. Idle-ceiling poller (H3, rc.40). The old W1 was an
1119
+ // ABSOLUTE setTimeout it killed any turn that ran longer
1120
+ // than turnTimeoutMs, regardless of whether the turn was
1121
+ // making progress (msg 884: 49-min SoundCloud subagent
1122
+ // killed at 30 min while demonstrably alive). H3 inverts:
1123
+ // `turnTimeoutMs` is now the IDLE ceiling. A turn is
1124
+ // wedged only if NO activity (JSONL events, capture-pane
1125
+ // stream signals, OR hook events — see _handleHookEvent)
1126
+ // for `turnTimeoutMs`. Every active signal heartbeats
1127
+ // `turn.lastActivityAt`, resetting the clock implicitly.
1128
+ //
1129
+ // Poll cadence (30 s) is the worst-case detection delay
1130
+ // past the configured ceiling. Cheap.
1131
+ //
1132
+ // Poll cadence is ADAPTIVE: 30 s in production (where
1133
+ // `turnTimeoutMs` is minutes), but capped at
1134
+ // ~`turnTimeoutMs / 4` with a 50 ms floor so test configs
1135
+ // with small `turnTimeoutMs` (e.g. R7's 60 ms wedge test)
1136
+ // still detect idle inside the test's own assertion budget.
1137
+ const pollIntervalMs = Math.max(
1138
+ 50,
1139
+ Math.min(IDLE_POLL_INTERVAL_MS, Math.floor(turnTimeoutMs / 4)),
1140
+ );
1141
+ idlePoller = setInterval(() => {
1142
+ const idleMs = this._now() - turn.lastActivityAt;
1143
+ if (idleMs >= turnTimeoutMs) {
1144
+ finish({ kind: 'timeout', reason: 'idle-ceiling', idleMs });
1145
+ }
1146
+ }, pollIntervalMs);
1147
+ idlePoller.unref?.();
1148
+
1149
+ // 5b. Hard backstop (H3, rc.40). Absolute deadline against a
1150
+ // pathological infinite tool loop that DOES fire hooks
1151
+ // continuously and so never trips the idle ceiling. Default
1152
+ // 4h is far beyond any legitimate single-turn runtime,
1153
+ // even a multi-hour rate-limited SoundCloud crawl. Counted
1154
+ // from turn start.
1155
+ const backstopRemaining = Math.max(
1156
+ 0, (turn.startedAt + this.hardBackstopMs) - this._now());
1157
+ hardBackstopTimer = setTimeout(
1158
+ () => finish({ kind: 'timeout', reason: 'hard-backstop' }),
1159
+ backstopRemaining,
1103
1160
  );
1104
- deadlineTimer.unref?.();
1161
+ hardBackstopTimer.unref?.();
1105
1162
  });
1106
1163
  }
1107
1164
 
@@ -1544,20 +1601,38 @@ class TmuxProcess extends Process {
1544
1601
  }
1545
1602
 
1546
1603
  /**
1547
- * Observer-only hook-event handler. Persists each event for the
1548
- * H1 soak (so the trajectory can be inspected against real Music
1549
- * traffic) and emits a `hook-event` event so process-manager's
1550
- * `onHookEvent` callback writes it to the events DB.
1604
+ * Hook-event handler. Three roles, layered over time:
1551
1605
  *
1552
- * No `turn.*` field consumes hook signals in H1. The next phases
1553
- * (see hook-observability doc):
1554
- * H2 — reactor wiring (kills the fear).
1555
- * H3 predicate progress + W1 retirement.
1556
- * H4 `Stop` as authoritative completion.
1606
+ * H1 (rc.36) emit `hook-event` so polygram persists each event
1607
+ * to the events DB; observer-only.
1608
+ * H2 (rc.38) sdk/callbacks.js extends onHookEvent to route to
1609
+ * the reactor (PreToolUse setState, PostToolUse / SubagentStop
1610
+ * / Notification heartbeat). Kills the fear escalation.
1611
+ * H3 (rc.40) — hook events count as PREDICATE-side liveness too:
1612
+ * every hook event heartbeats the active group's turns so the
1613
+ * idle-ceiling poller in `_awaitSettle` doesn't fire on a long
1614
+ * healthy subagent that is communicating via hooks. THIS is
1615
+ * the structural fix for the msg-884 incident (49-min
1616
+ * SoundCloud subagent killed at the 30-min wall-clock while
1617
+ * demonstrably alive).
1618
+ *
1619
+ * H4 — `Stop` as authoritative completion (still pending).
1620
+ *
1621
+ * Parse errors and unknown event shapes are intentionally still
1622
+ * forwarded — observer-only metrics for stream-reliability soak.
1557
1623
  */
1558
1624
  _handleHookEvent(ev) {
1559
- // Parse errors and unknown event shapes are intentionally
1560
- // forwarded H1 measures how often they fire on real traffic.
1625
+ // H3: every hook event (except the diagnostic types) is liveness
1626
+ // evidence. Heartbeat every turn in the active group so the
1627
+ // idle-ceiling poller resets. We don't differentiate by event
1628
+ // type — even Notification or UserPromptSubmit prove claude is
1629
+ // active in this session.
1630
+ if (ev?.type && ev.type !== 'parse-error' && ev.type !== 'unknown') {
1631
+ const turns = this._activeGroup?.turns || [];
1632
+ for (const t of turns) {
1633
+ this._heartbeat(t, `hook:${ev.type}`);
1634
+ }
1635
+ }
1561
1636
  this.emit('hook-event', ev);
1562
1637
  }
1563
1638
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "polygram",
3
- "version": "0.10.0-rc.39",
3
+ "version": "0.10.0-rc.40",
4
4
  "description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
5
5
  "main": "lib/ipc/client.js",
6
6
  "bin": {