polygram 0.16.0 → 0.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/claude-bin.js CHANGED
@@ -3,6 +3,7 @@
3
3
  const os = require('os');
4
4
  const path = require('path');
5
5
  const fs = require('fs');
6
+ const { execFileSync } = require('child_process');
6
7
 
7
8
  // 0.12 Phase 4: moved from lib/process/tmux-process.js into the helper module
8
9
  // that consumes it, so the constant survives TmuxProcess deletion. CliProcess
@@ -41,9 +42,13 @@ const CLAUDE_CLI_PINNED_VERSION = '2.1.173';
41
42
  * lands — so a $PATH spawn silently drifts (shumorobot 2026-05-16:
42
43
  * CLI auto-updated 2.1.142 → 2.1.143 between deploys).
43
44
  *
44
- * Spawning the ABSOLUTE versioned path is immune to that: the
45
- * updater only ADDS new version files, it never overwrites an
46
- * existing one. `versions/2.1.142` stays byte-identical forever.
45
+ * Spawning the ABSOLUTE versioned path avoids the symlink-drift, but is
46
+ * NOT immune to the updater: claude keeps only the ~3 newest versions
47
+ * and PRUNES (deletes) the rest. Once the pin falls out of the top 3 the
48
+ * pinned path is a dead file → every cli spawn exits in ~14ms (prod
49
+ * outages 2026-06-21/22). So `verifyPinnedClaudeBin` (point-in-time check)
50
+ * is not enough; `ensureVendoredClaudeBin` (below, 0.17) keeps a
51
+ * polygram-owned copy the pruner can't touch.
47
52
  */
48
53
 
49
54
  /**
@@ -92,4 +97,150 @@ function verifyPinnedClaudeBin(version) {
92
97
  }
93
98
  }
94
99
 
95
- module.exports = { resolvePinnedClaudeBin, verifyPinnedClaudeBin, CLAUDE_CLI_PINNED_VERSION };
100
+ // ─── 0.17: vendored pinned binary (immune to claude's auto-pruner) ──────────
101
+ //
102
+ // claude's updater deletes all but the ~3 newest versions, so the pinned
103
+ // version eventually vanishes from ~/.local/share/claude/versions and every
104
+ // cli spawn dies. We can't fall forward (the cli backend reads version-specific
105
+ // TUI internals). Fix: polygram keeps its OWN copy of the exact pinned binary
106
+ // in a dir the pruner never touches, and spawns from there. Once vendored it
107
+ // never depends on the system copy or the network again.
108
+
109
+ /**
110
+ * polygram-owned vendor dir for claude binaries. Under ~/.local/share/polygram
111
+ * (XDG data) — claude's pruner only touches ~/.local/share/claude/versions, and
112
+ * `npm i -g polygram` only replaces the package dir, so this survives both.
113
+ * Override with POLYGRAM_CLAUDE_VENDOR_DIR.
114
+ */
115
+ function vendorDir() {
116
+ return process.env.POLYGRAM_CLAUDE_VENDOR_DIR
117
+ || path.join(os.homedir(), '.local', 'share', 'polygram', 'claude-bin');
118
+ }
119
+
120
+ function isExecutable(p) {
121
+ try { fs.accessSync(p, fs.constants.X_OK); return true; } catch { return false; }
122
+ }
123
+
124
+ // Atomic: copy to a unique tmp in the same dir, chmod, then rename over.
125
+ function _atomicCopyExec(src, dst) {
126
+ const tmp = `${dst}.tmp.${process.pid}.${Date.now()}`;
127
+ fs.copyFileSync(src, tmp);
128
+ fs.chmodSync(tmp, 0o755);
129
+ fs.renameSync(tmp, dst);
130
+ }
131
+
132
+ // Remove vendored binaries (and stale .tmp.*) that aren't the live version.
133
+ function _gcVendored(dir, keepVersion, logger) {
134
+ let entries = [];
135
+ try { entries = fs.readdirSync(dir); } catch { return; }
136
+ for (const name of entries) {
137
+ if (name === keepVersion) continue;
138
+ // Never delete an in-flight copy: a CONCURRENT boot (multi-bot host shares
139
+ // this dir) may be mid-copy into `<keepVersion>.tmp.<pid>.<ts>`; removing it
140
+ // ENOENTs that boot's rename → it falls back to SDK. Skip all .tmp.* — a
141
+ // genuinely orphaned tmp is cheap to leave (cleaned when its version is GC'd
142
+ // by name, or harmless). Defense-in-depth: only GC version-shaped names so a
143
+ // misconfigured vendor dir can't nuke unrelated files.
144
+ if (name.includes('.tmp.')) continue;
145
+ if (!/^\d+\.\d+\.\d+$/.test(name)) continue;
146
+ try { fs.rmSync(path.join(dir, name), { force: true }); } catch (e) {
147
+ logger?.warn?.(`[claude-bin] vendor GC: could not remove ${name}: ${e.message}`);
148
+ }
149
+ }
150
+ }
151
+
152
+ /**
153
+ * Ensure a polygram-owned copy of the pinned claude binary exists and return
154
+ * its path. Steady state is a single stat (fast). On a cold/pruned host it
155
+ * obtains the binary once (copy from the system install, else `claude install`
156
+ * then copy) and caches it forever.
157
+ *
158
+ * @param {string} version
159
+ * @param {{ logger?: object }} [opts]
160
+ * @returns {{ ok: boolean, path: string, vendored?: boolean, reason?: string }}
161
+ */
162
+ function ensureVendoredClaudeBin(version, { logger = console } = {}) {
163
+ // Explicit override wins, unchanged — non-standard installs / CI / tests.
164
+ const override = process.env.POLYGRAM_CLAUDE_BIN;
165
+ if (override) {
166
+ return isExecutable(override)
167
+ ? { ok: true, path: override, vendored: false }
168
+ : { ok: false, path: override, reason: `POLYGRAM_CLAUDE_BIN=${override} not executable` };
169
+ }
170
+
171
+ const dir = vendorDir();
172
+ const vendored = path.join(dir, version);
173
+
174
+ // Fast path: already vendored.
175
+ if (isExecutable(vendored)) {
176
+ _gcVendored(dir, version, logger);
177
+ return { ok: true, path: vendored, vendored: true };
178
+ }
179
+
180
+ // Need to obtain it. Ensure the dir exists.
181
+ try { fs.mkdirSync(dir, { recursive: true }); } catch (e) {
182
+ return { ok: false, path: vendored, reason: `cannot create vendor dir ${dir}: ${e.message}` };
183
+ }
184
+
185
+ const versionsDir = process.env.POLYGRAM_CLAUDE_VERSIONS_DIR
186
+ || path.join(os.homedir(), '.local', 'share', 'claude', 'versions');
187
+ const systemPath = path.join(versionsDir, version);
188
+
189
+ // (a) copy from the system install if present.
190
+ if (isExecutable(systemPath)) {
191
+ try {
192
+ _atomicCopyExec(systemPath, vendored);
193
+ logger?.log?.(`[claude-bin] vendored claude v${version} ← ${systemPath} → ${vendored}`);
194
+ } catch (e) {
195
+ return { ok: false, path: vendored, reason: `copy ${systemPath} → ${vendored} failed: ${e.message}` };
196
+ }
197
+ } else {
198
+ // (b) try to install the exact version, then copy. If
199
+ // POLYGRAM_CLAUDE_INSTALL_BIN is set, use it VERBATIM (no fallback — an
200
+ // explicit override that's wrong must fail loudly, not silently shell out to
201
+ // a different claude). Otherwise prefer ~/.local/bin/claude, else PATH.
202
+ let installerBin = process.env.POLYGRAM_CLAUDE_INSTALL_BIN;
203
+ if (!installerBin) {
204
+ const localBin = path.join(os.homedir(), '.local', 'bin', 'claude');
205
+ installerBin = isExecutable(localBin) ? localBin : 'claude';
206
+ }
207
+ logger?.warn?.(`[claude-bin] pinned claude v${version} absent from ${systemPath}; installing via ${installerBin}…`);
208
+ try {
209
+ // Synchronous: blocks boot until the install completes. Rare (deploys
210
+ // pre-install the pin → the fast copy path above is the norm). On the VPS
211
+ // polygram boots DETACHED in tmux (Type=oneshot start-sessions.sh), so
212
+ // this block is NOT gated by systemd's TimeoutStartSec; on the Mac launchd
213
+ // has no hard start-timeout. Timeout kept under the VPS unit's 120s anyway.
214
+ execFileSync(installerBin, ['install', version], { timeout: 110_000, stdio: 'ignore' });
215
+ } catch (e) {
216
+ return {
217
+ ok: false, path: vendored,
218
+ reason: `claude v${version} not present and \`claude install ${version}\` failed (${e.message}). `
219
+ + 'Install it manually or set POLYGRAM_CLAUDE_BIN.',
220
+ };
221
+ }
222
+ if (!isExecutable(systemPath)) {
223
+ return { ok: false, path: vendored, reason: `claude install ${version} ran but ${systemPath} still missing` };
224
+ }
225
+ try {
226
+ _atomicCopyExec(systemPath, vendored);
227
+ logger?.log?.(`[claude-bin] installed + vendored claude v${version} → ${vendored}`);
228
+ } catch (e) {
229
+ return { ok: false, path: vendored, reason: `copy after install failed: ${e.message}` };
230
+ }
231
+ }
232
+
233
+ _gcVendored(dir, version, logger);
234
+ if (!isExecutable(vendored)) {
235
+ return { ok: false, path: vendored, reason: `vendored copy ${vendored} is not executable after copy` };
236
+ }
237
+ return { ok: true, path: vendored, vendored: true };
238
+ }
239
+
240
+ module.exports = {
241
+ resolvePinnedClaudeBin,
242
+ verifyPinnedClaudeBin,
243
+ ensureVendoredClaudeBin,
244
+ vendorDir,
245
+ CLAUDE_CLI_PINNED_VERSION,
246
+ };
@@ -317,6 +317,13 @@ mcp.setRequestHandler(ListToolsRequestSchema, async () => {
317
317
  turn_id: { type: 'string', description: 'Echo of turn_id from inbound channel meta (required for correct turn routing).' },
318
318
  text: { type: 'string', description: 'Message body (markdown ok).' },
319
319
  files: { type: 'array', items: { type: 'string' }, description: 'Optional absolute file paths to attach.' },
320
+ interim: {
321
+ type: 'boolean',
322
+ description: 'Set true ONLY for a short status/progress update on a long task '
323
+ + '(e.g. "Looking into that now…"). An interim reply is shown to the user but is '
324
+ + 'NOT the turn\'s answer — you MUST still deliver the real result as a later reply '
325
+ + 'with interim omitted/false in the SAME turn. NEVER end a turn on an interim reply.',
326
+ },
320
327
  // 0.13 D2 Tier 2C: the fold-acknowledgment contract. The single turn_id
321
328
  // field can't express a combined reply that covers a mid-turn follow-up
322
329
  // (P0 spike Q-B: claude echoes only the trigger id) — this array can.
@@ -776,14 +776,21 @@ class CliProcess extends Process {
776
776
  '',
777
777
  'So once you are clearly into multi-step work — you have run a couple of tool',
778
778
  'calls without replying, or the request plainly needs research / several steps —',
779
- 'send a SHORT one-line status via `reply` (it returns a `message_id`), then use',
780
- '`mcp__polygram-bridge__edit_message` on that SAME `message_id` to update the',
781
- 'bubble as you progress. `edit_message` is for INTERIM status ONLY.',
779
+ 'send a SHORT one-line status via `reply` WITH `interim: true` (it returns a',
780
+ '`message_id`), then use `mcp__polygram-bridge__edit_message` on that SAME',
781
+ '`message_id` to update the bubble as you progress. `edit_message` is for',
782
+ 'INTERIM status ONLY.',
782
783
  '',
783
- 'Deliver the FINAL answer as a fresh `reply`, never as an edit: a fresh reply',
784
- 'notifies the user and carries `consumed_turn_ids`; an edit does neither. If you',
785
- 'no longer have the status bubble\'s message_id, just send a fresh `reply` ',
786
- 'never guess an id.',
784
+ 'HARD RULE a status is a MID-TURN update, NOT the end of work. After an',
785
+ 'interim reply you MUST keep working in the SAME turn and deliver the real',
786
+ 'result. NEVER end your turn on a status / "give me a couple min" / "looking',
787
+ 'into it" reply with no result behind it — that leaves the user staring at a',
788
+ 'promise with nothing delivered. Do the work, then answer.',
789
+ '',
790
+ 'Deliver the FINAL answer as a fresh `reply` with interim omitted/false, never',
791
+ 'as an edit: a fresh reply notifies the user and carries `consumed_turn_ids`; an',
792
+ 'edit does neither. If you no longer have the status bubble\'s message_id, just',
793
+ 'send a fresh `reply` — never guess an id.',
787
794
  '',
788
795
  'If you will finish in one or two tool calls, just answer — no status bubble.',
789
796
  'Status is for work that takes time, not for quick answers (do not spam it).',
@@ -1320,6 +1327,7 @@ class CliProcess extends Process {
1320
1327
  threadId: this.threadId,
1321
1328
  toolName: msg.name,
1322
1329
  text: args.text,
1330
+ interim: args.interim === true, // status/progress reply — not the turn's answer
1323
1331
  files: args.files,
1324
1332
  messageId: args.message_id, // 0.13: edit_message target bubble
1325
1333
  sourceMsgId, // reaction/quote target (A2)
@@ -1373,7 +1381,7 @@ class CliProcess extends Process {
1373
1381
  // fall back to the SINGLE pending turn if exactly one exists, else the
1374
1382
  // oldest pending — log a warning either way so we can audit drift.
1375
1383
  if (msg.name === 'reply' && result?.ok && typeof args.text === 'string' && args.text.length > 0) {
1376
- this._recordReplyForPendingTurn(args.text, args.turn_id);
1384
+ this._recordReplyForPendingTurn(args.text, args.turn_id, args.interim === true);
1377
1385
  }
1378
1386
  }
1379
1387
 
@@ -1382,8 +1390,10 @@ class CliProcess extends Process {
1382
1390
  *
1383
1391
  * @param {string} text
1384
1392
  * @param {string|undefined} replyTurnId — echoed from Claude's reply tool args
1393
+ * @param {boolean} interim — true for a status/progress reply (`interim:true`),
1394
+ * which is NOT the turn's answer. A reply is FINAL by default (fail-safe).
1385
1395
  */
1386
- _recordReplyForPendingTurn(text, replyTurnId) {
1396
+ _recordReplyForPendingTurn(text, replyTurnId, interim = false) {
1387
1397
  // 0.13 D2 (S5 tightening): a reply echoing a KNOWN ledgered turn_id that is
1388
1398
  // NOT the current pending is a LATE reply from an earlier cycle (post-
1389
1399
  // finalize tails, fireUserMessage cycles, ask wrap-ups). Pre-P3 the
@@ -1469,6 +1479,12 @@ class CliProcess extends Process {
1469
1479
 
1470
1480
  target.replies.push(text);
1471
1481
  target.replyCount = (target.replyCount || 0) + 1;
1482
+ // A status/progress reply (`interim:true`) is delivered but is NOT the turn's
1483
+ // answer — track it so the finalizer can tell an interim-only turn (a promise
1484
+ // like "give me a couple min") from a delivered result, and so the ceilings
1485
+ // keep extending it as still-working rather than resolving it as done.
1486
+ // docs/progress-is-not-turn-end-spec.md
1487
+ if (interim) target._interimReplyCount = (target._interimReplyCount || 0) + 1;
1472
1488
 
1473
1489
  if (this._sawHookStream) {
1474
1490
  // 0.13 D1: a delivered reply is ACTIVITY — rung 2 (activity-quiet) owns
@@ -1756,9 +1772,11 @@ class CliProcess extends Process {
1756
1772
  */
1757
1773
  _armActivityQuiet(turnId, pending) {
1758
1774
  if (!this._sawHookStream) return;
1759
- // ≥1 reply, OR seen + consumed-acked (the answer rode a sibling turn_id —
1760
- // fold-id echo; see _ledgerAckConsumed). Same eligibility as the fire site.
1761
- if ((!pending.replies || pending.replies.length === 0)
1775
+ // ≥1 FINAL reply, OR seen + consumed-acked (the answer rode a sibling turn_id —
1776
+ // fold-id echo; see _ledgerAckConsumed). Same eligibility as the fire site. An
1777
+ // interim-only turn (status promise, no final reply) is NOT eligible — it must
1778
+ // keep working, not quiet-finalize as done. docs/progress-is-not-turn-end-spec.md
1779
+ if (!this._turnHasFinalReply(pending)
1762
1780
  && !(pending.seen === true && pending._consumedAcked === true)) return;
1763
1781
  if (this._openQuestions.size > 0) return;
1764
1782
  if (pending._stopGracePending) return;
@@ -1792,7 +1810,7 @@ class CliProcess extends Process {
1792
1810
  // Eligibility: ≥1 bound reply, OR seen + consumed-acked (the answer went
1793
1811
  // out under a sibling turn_id — fold-id echo; see _ledgerAckConsumed).
1794
1812
  const consumedAcked = pending.seen === true && pending._consumedAcked === true;
1795
- if ((!pending.replies || pending.replies.length === 0) && !consumedAcked) return;
1813
+ if (!this._turnHasFinalReply(pending) && !consumedAcked) return;
1796
1814
  const lastHookAgeMs = this._lastHookEventAt ? Date.now() - this._lastHookEventAt : null;
1797
1815
  this._logEvent('cli-activity-quiet-finalize', {
1798
1816
  turn_id: turnId,
@@ -1910,6 +1928,81 @@ class CliProcess extends Process {
1910
1928
  this.on('stop-hook', onStop);
1911
1929
  }
1912
1930
 
1931
+ /**
1932
+ * Has this turn delivered a FINAL (non-interim) reply? A reply is final by
1933
+ * default; only `interim:true` status replies don't count. A turn whose only
1934
+ * output is a status promise has NOT delivered its answer. Used by the
1935
+ * finalizer and the absolute checkpoint so an interim-only turn is treated as
1936
+ * still-working (keep extending / deliver the produced result), not as done.
1937
+ */
1938
+ _turnHasFinalReply(pending) {
1939
+ return (pending?.replies?.length || 0) > (pending?._interimReplyCount || 0);
1940
+ }
1941
+
1942
+ /**
1943
+ * Compute the {text, alreadyDelivered} a resolving turn delivers, honoring the
1944
+ * interim-reply rules. Shared by BOTH resolve paths — `_finalizeTurn` (Stop /
1945
+ * activity-quiet) AND the `fireTimeout` ceiling-resolve — so neither drops the
1946
+ * produced answer of an interim-only turn. docs/progress-is-not-turn-end-spec.md
1947
+ *
1948
+ * - a FINAL reply landed → its text was already delivered incrementally
1949
+ * (polygram.js short-circuits) → alreadyDelivered.
1950
+ * - zero replies → 0.12/0.13 Stop-fallback: deliver last_assistant_message
1951
+ * unless a consuming sibling already carried it (consumed-ack).
1952
+ * - interim-only (status promise, no final) → deliver the produced final answer
1953
+ * (last_assistant_message) if it exists and is distinct from the status / a
1954
+ * sibling's text; otherwise leave the status (nothing more to send).
1955
+ */
1956
+ _resolveTurnDelivery(pending, turnId) {
1957
+ const norm = (s) => (s || '').trim();
1958
+ const interimText = pending.replies.join('\n\n');
1959
+ const fallbackText = pending._stopHookData?.lastAssistantMessage || '';
1960
+
1961
+ if (this._turnHasFinalReply(pending)) {
1962
+ return { text: interimText, alreadyDelivered: true };
1963
+ }
1964
+ if (pending.replies.length === 0) {
1965
+ // 0.12 Phase 1.7 fallback: no reply tool call landed — use the Stop hook's
1966
+ // last_assistant_message so the user isn't left with silence (rc.41 H4).
1967
+ const usedStopFallback = !!fallbackText;
1968
+ const text = usedStopFallback ? fallbackText : '';
1969
+ if (usedStopFallback) {
1970
+ this.logger.warn?.(`[${this.label}] cli: turn finalized via stop-hook fallback (no reply tool call); text_len=${text.length}`);
1971
+ }
1972
+ // A _consumedAcked turn is "already delivered" ONLY when the consuming sibling
1973
+ // reply actually carried THIS text — not merely an ack (prod 2026-06-13: a
1974
+ // "Researching now…" ack then the real answer as Stop-fallback was suppressed
1975
+ // and dropped for 5h20m). docs/0.13-consumed-ack-stop-fallback-drop-spec.md
1976
+ const consumedCoversFallback = !usedStopFallback || norm(text) === norm(pending._consumedByText);
1977
+ const alreadyDelivered = pending._consumedAcked === true && consumedCoversFallback;
1978
+ if (usedStopFallback && pending._consumedAcked === true && !consumedCoversFallback) {
1979
+ this.logger.warn?.(`[${this.label}] cli: consumed-ack did NOT cover the Stop-fallback answer — delivering rescued text (len=${text.length})`);
1980
+ this._logEvent('cli-consumed-ack-fallback-rescued', {
1981
+ turn_id: turnId, session_key: this.sessionKey, backend: this.backend,
1982
+ rescued_len: text.length, ack_len: norm(pending._consumedByText).length,
1983
+ });
1984
+ }
1985
+ return { text, alreadyDelivered };
1986
+ }
1987
+ // Interim-only: the turn delivered ONLY status/progress promises ("give me a
1988
+ // couple min") and never a final reply. If claude produced a substantive final
1989
+ // answer as its last assistant message — distinct from the status, and not text a
1990
+ // consuming sibling already delivered — DELIVER it (the status bubbles are already
1991
+ // on screen, so send the FINAL only). Else leave the status; don't re-deliver it.
1992
+ const interimRescue = !!fallbackText
1993
+ && norm(fallbackText) !== norm(interimText)
1994
+ && norm(fallbackText) !== norm(pending._consumedByText);
1995
+ if (interimRescue) {
1996
+ this.logger.warn?.(`[${this.label}] cli: interim-only turn — delivering the produced final answer the status promise didn't (len=${fallbackText.length})`);
1997
+ this._logEvent('cli-interim-only-final-rescued', {
1998
+ turn_id: turnId, session_key: this.sessionKey, backend: this.backend,
1999
+ rescued_len: fallbackText.length, interim_count: pending.replies.length,
2000
+ });
2001
+ return { text: fallbackText, alreadyDelivered: false };
2002
+ }
2003
+ return { text: interimText, alreadyDelivered: true };
2004
+ }
2005
+
1913
2006
  _finalizeTurn(turnId) {
1914
2007
  const pending = this.pendingTurns.get(turnId);
1915
2008
  if (!pending) return;
@@ -1924,39 +2017,7 @@ class CliProcess extends Process {
1924
2017
  if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer);
1925
2018
  if (pending._activityQuietTimer) clearTimeout(pending._activityQuietTimer); // 0.13 D1
1926
2019
  if (pending._onStop) { this.off('stop-hook', pending._onStop); pending._onStop = null; }
1927
- const hadReplyToolCalls = pending.replies.length > 0;
1928
- let text = pending.replies.join('\n\n');
1929
- // 0.12 Phase 1.7 fallback: if no reply tool calls landed (claude ended
1930
- // the turn without calling mcp__polygram-bridge__reply), use the Stop
1931
- // hook's last_assistant_message as the text. Same rescue pattern rc.41
1932
- // H4 uses on tmux backend when JSONL stream is broken.
1933
- const usedStopFallback = !text && !!pending._stopHookData?.lastAssistantMessage;
1934
- if (usedStopFallback) {
1935
- text = pending._stopHookData.lastAssistantMessage;
1936
- this.logger.warn?.(`[${this.label}] cli: turn finalized via stop-hook fallback (no reply tool call); text_len=${text.length}`);
1937
- }
1938
- // A _consumedAcked turn is "already delivered" ONLY when the consuming
1939
- // sibling reply actually carried THIS text — not merely an ack. Prod
1940
- // 2026-06-13 (Shumabit@UMI/37): the consuming reply was a 294-char
1941
- // "Researching now…" ack, then the real answer arrived as Stop-fallback
1942
- // text. Suppressing it (alreadyDelivered=true → polygram.js short-circuit)
1943
- // dropped the answer silently for 5h20m. Only suppress when the rescued
1944
- // text matches what the sibling delivered.
1945
- // docs/0.13-consumed-ack-stop-fallback-drop-spec.md
1946
- const norm = (s) => (s || '').trim();
1947
- const consumedCoversFallback = !usedStopFallback || norm(text) === norm(pending._consumedByText);
1948
- const alreadyDelivered = hadReplyToolCalls
1949
- || (pending._consumedAcked === true && consumedCoversFallback);
1950
- if (usedStopFallback && pending._consumedAcked === true && !consumedCoversFallback) {
1951
- this.logger.warn?.(`[${this.label}] cli: consumed-ack did NOT cover the Stop-fallback answer — delivering rescued text (len=${text.length})`);
1952
- this._logEvent('cli-consumed-ack-fallback-rescued', {
1953
- turn_id: turnId,
1954
- session_key: this.sessionKey,
1955
- backend: this.backend,
1956
- rescued_len: text.length,
1957
- ack_len: norm(pending._consumedByText).length,
1958
- });
1959
- }
2020
+ const { text, alreadyDelivered } = this._resolveTurnDelivery(pending, turnId);
1960
2021
  const duration = Date.now() - pending.startedAt;
1961
2022
  // Review AC4: cost=null + metrics-tokens=null signal "unmeasured-subscription"
1962
2023
  // (channels protocol doesn't expose per-turn cost or token breakdowns).
@@ -2007,7 +2068,13 @@ class CliProcess extends Process {
2007
2068
  // claude copied in to send don't accumulate on disk across turns. Only
2008
2069
  // when fully idle, so a file staged for a still-pending concurrent turn
2009
2070
  // isn't yanked mid-send.
2010
- if (this.pendingTurns.size === 0) this._purgeStagingDir();
2071
+ if (this.pendingTurns.size === 0) {
2072
+ this._purgeStagingDir();
2073
+ // B3: fully idle — drop any in-flight sub-agent bookkeeping so a lost
2074
+ // SubagentStop can't leak a stale count (a stuck "working" hold) into the
2075
+ // next turn. Safe only when no turn is pending (it's proc-wide state).
2076
+ this._pendingSubagentStarts = [];
2077
+ }
2011
2078
  }
2012
2079
 
2013
2080
  /**
@@ -2129,14 +2196,20 @@ class CliProcess extends Process {
2129
2196
  // 2026-06-11 19:49 false ⏱; see _ledgerAckConsumed).
2130
2197
  if ((pending.replies?.length || 0) > 0
2131
2198
  || (pending.seen === true && pending._consumedAcked === true)) {
2199
+ // Interim-aware: an interim-only turn delivers its PRODUCED final answer
2200
+ // here too (not the status promise) — the same rescue as _finalizeTurn, so
2201
+ // the answer isn't dropped when the turn resolves at a ceiling rather than
2202
+ // via Stop. docs/progress-is-not-turn-end-spec.md
2203
+ const { text, alreadyDelivered } = this._resolveTurnDelivery(pending, turnId);
2132
2204
  this._logEvent('cli-turn-ceiling-resolved', {
2133
2205
  reason, turnTimeoutMs, reply_count: pending.replies?.length || 0,
2134
2206
  consumed_acked: pending._consumedAcked === true,
2207
+ interim_only: !this._turnHasFinalReply(pending),
2135
2208
  });
2136
2209
  this.emit('idle');
2137
2210
  resolve({
2138
- text: pending.replies.join('\n\n'),
2139
- alreadyDelivered: true,
2211
+ text,
2212
+ alreadyDelivered,
2140
2213
  sessionId: this.claudeSessionId,
2141
2214
  cost: null,
2142
2215
  duration: Date.now() - pending.startedAt,
@@ -2378,8 +2451,11 @@ class CliProcess extends Process {
2378
2451
  async _checkpointAbsolute(turnId) {
2379
2452
  if (!this.pendingTurns.has(turnId)) return;
2380
2453
  let pending = this.pendingTurns.get(turnId);
2381
- // Replied turn (or consumed-acked): the ceiling RESOLVES it, never extends.
2382
- if ((pending.replies?.length || 0) > 0
2454
+ // Turn with a FINAL reply (or consumed-acked): the ceiling RESOLVES it, never
2455
+ // extends. An interim-only turn (status promise, no final reply) is still
2456
+ // working — fall through to the busy-aware probe so it extends, not resolves.
2457
+ // docs/progress-is-not-turn-end-spec.md
2458
+ if (this._turnHasFinalReply(pending)
2383
2459
  || (pending.seen === true && pending._consumedAcked === true)) {
2384
2460
  pending._fireTimeout('absolute');
2385
2461
  return;
@@ -2394,7 +2470,7 @@ class CliProcess extends Process {
2394
2470
  // now would resurrect a settling turn (spurious "still working" right as the
2395
2471
  // real answer lands). It will finalize through its own quiet/grace path.
2396
2472
  if (pending._stopGracePending
2397
- || (pending.replies?.length || 0) > 0
2473
+ || this._turnHasFinalReply(pending)
2398
2474
  || (pending.seen === true && pending._consumedAcked === true)) return;
2399
2475
  const now = Date.now();
2400
2476
  const elapsed = now - pending.startedAt;
@@ -2830,6 +2906,9 @@ class CliProcess extends Process {
2830
2906
  // SubagentStop). We still emit the start event so the reactor
2831
2907
  // can transition into SUBAGENT state immediately.
2832
2908
  toolUseId: ev.toolUseId,
2909
+ // B3: in-flight sub-agent count so the reactor holds a "working" face
2910
+ // (suppresses the 🥱/😨 decay) until the LAST sub-agent finishes.
2911
+ inFlight: this._pendingSubagentStarts.length,
2833
2912
  backend: this.backend,
2834
2913
  });
2835
2914
  return;
@@ -2879,6 +2958,9 @@ class CliProcess extends Process {
2879
2958
  agentId: ev.agentId,
2880
2959
  durationMs: ev.durationMs,
2881
2960
  toolUseId: subagentToolUseId,
2961
+ // B3: remaining in-flight sub-agents (post-decrement). 0 ⇒ the reactor
2962
+ // resumes the normal stall/freeze cascade.
2963
+ inFlight: this._pendingSubagentStarts.length,
2882
2964
  backend: this.backend,
2883
2965
  });
2884
2966
  return;
@@ -787,7 +787,13 @@ function createSdkCallbacks({
787
787
  // prior tool's emoji. The plan promised this; previously the handler
788
788
  // only persisted the DB row and never touched the reactor.
789
789
  const r = entry?.pendingQueue?.[0]?.context?.reactor;
790
- if (r) r.setState('SUBAGENT');
790
+ if (r) {
791
+ r.setState('SUBAGENT');
792
+ // B3: hold a "working" face for the whole sub-agent run — the quiet
793
+ // stretch between its tool hooks is expected, not a stall, so suppress
794
+ // the 🥱/😨 decay until it finishes. docs/progress-is-not-turn-end-spec.md
795
+ if (typeof r.setWorkInFlight === 'function') r.setWorkInFlight(true);
796
+ }
791
797
  } catch (err) {
792
798
  logger.error?.(`[${botName}] subagent-start handler: ${err.message}`);
793
799
  }
@@ -798,7 +804,12 @@ function createSdkCallbacks({
798
804
  // L9/L14: heartbeat at subagent end so the cascade/stall clock
799
805
  // resets; the next tool's PreToolUse sets the following state.
800
806
  const r = entry?.pendingQueue?.[0]?.context?.reactor;
801
- if (r && typeof r.heartbeat === 'function') r.heartbeat();
807
+ if (r) {
808
+ // B3: release the working-hold only when the LAST sub-agent finishes
809
+ // (inFlight === 0) — nested/parallel sub-agents keep it held.
810
+ if (typeof r.setWorkInFlight === 'function') r.setWorkInFlight((payload?.inFlight ?? 0) > 0);
811
+ if (typeof r.heartbeat === 'function') r.heartbeat();
812
+ }
802
813
  logEvent('subagent-done', {
803
814
  chat_id: getChatIdFromKey(sessionKey),
804
815
  session_key: sessionKey,
@@ -226,6 +226,9 @@ function createReactionManager({
226
226
  // Chaining all applies through `applyChain` guarantees they're sent
227
227
  // to Telegram in setState() invocation order.
228
228
  let applyChain = Promise.resolve();
229
+ // B3: set true while a sub-agent / background work is in flight — suppresses the
230
+ // stall/freeze decay so a working-but-quiet turn never shows 🥱/😨.
231
+ let workInFlight = false;
229
232
  // States the auto-stall path may transition to. Once we've already
230
233
  // shown STALL or TIMEOUT we don't downgrade or rearm — only an
231
234
  // explicit setState() call (Claude resumed) can move us forward.
@@ -330,6 +333,10 @@ function createReactionManager({
330
333
  const armStallTimers = () => {
331
334
  clearStallTimers();
332
335
  if (stopped) return;
336
+ // B3: while a sub-agent (or background work) is genuinely in flight, a quiet
337
+ // stretch is EXPECTED — the turn is working, not stalled. Don't arm the
338
+ // 🥱/😨 decay; hold the current working face until work drains.
339
+ if (workInFlight) return;
333
340
  if (!STALL_PROMOTABLE.has(currentState)) return;
334
341
  stallTimer = setTimeout(() => {
335
342
  stallTimer = null;
@@ -432,6 +439,7 @@ function createReactionManager({
432
439
 
433
440
  const stop = () => {
434
441
  stopped = true;
442
+ workInFlight = false; // B3: defense-in-depth if a reactor is ever reused
435
443
  if (pendingTimer) { clearTimeout(pendingTimer); pendingTimer = null; }
436
444
  clearStallTimers();
437
445
  clearDeepeningTimers();
@@ -452,11 +460,24 @@ function createReactionManager({
452
460
  armStallTimers();
453
461
  };
454
462
 
463
+ // B3: mark whether work (a sub-agent / background shell) is in flight. While
464
+ // active, the silence between tool hooks is expected, so the stall/freeze decay
465
+ // is suppressed and the reactor holds its working face. When work drains, the
466
+ // normal cascade resumes from now. docs/progress-is-not-turn-end-spec.md
467
+ const setWorkInFlight = (active) => {
468
+ const next = !!active;
469
+ if (next === workInFlight) return;
470
+ workInFlight = next;
471
+ if (workInFlight) clearStallTimers(); // cancel any pending 🥱/😨 decay
472
+ else armStallTimers(); // work drained — resume the cascade
473
+ };
474
+
455
475
  return {
456
476
  setState,
457
477
  clear,
458
478
  stop,
459
479
  heartbeat,
480
+ setWorkInFlight,
460
481
  // Introspection for tests:
461
482
  get currentState() { return currentState; },
462
483
  get currentEmoji() { return currentEmoji; },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "polygram",
3
- "version": "0.16.0",
3
+ "version": "0.17.1",
4
4
  "description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
5
5
  "main": "lib/ipc/client.js",
6
6
  "bin": {
package/polygram.js CHANGED
@@ -2484,12 +2484,16 @@ async function main() {
2484
2484
  // 0.11.0: binCheck reused for channels backend wiring below.
2485
2485
  let pinnedClaudeBin = null;
2486
2486
  {
2487
- const { CLAUDE_CLI_PINNED_VERSION } = require('./lib/claude-bin');
2488
- const { verifyPinnedClaudeBin } = require('./lib/claude-bin');
2489
- const binCheck = verifyPinnedClaudeBin(CLAUDE_CLI_PINNED_VERSION);
2487
+ // 0.17: vendor a polygram-owned copy of the pinned binary so claude's
2488
+ // auto-pruner (keeps only ~3 newest, deletes the rest) can't take cli chats
2489
+ // down. Spawns from ~/.local/share/polygram/claude-bin/<version>, immune to
2490
+ // pruning. Self-heals on boot (copy from the system install, else install).
2491
+ const { CLAUDE_CLI_PINNED_VERSION, ensureVendoredClaudeBin } = require('./lib/claude-bin');
2492
+ const binCheck = ensureVendoredClaudeBin(CLAUDE_CLI_PINNED_VERSION, { logger: console });
2490
2493
  if (binCheck.ok) {
2491
2494
  console.log(
2492
- `[polygram] CliProcess pinned to claude CLI v${CLAUDE_CLI_PINNED_VERSION}: ${binCheck.path}`,
2495
+ `[polygram] CliProcess pinned to claude CLI v${CLAUDE_CLI_PINNED_VERSION}: ${binCheck.path}`
2496
+ + `${binCheck.vendored ? ' (vendored)' : ''}`,
2493
2497
  );
2494
2498
  pinnedClaudeBin = binCheck.path;
2495
2499
  } else {