polygram 0.17.3 → 0.17.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -119,11 +119,19 @@ function resolveToolAck(toolCallId, ok, error, messageId) {
119
119
  }
120
120
 
121
121
  // ─── 0.12 interactive questions: `ask` blocks for the user's answer ──
122
- // Separate from tool_ack: a question can take MINUTES (the daemon-side 30-min
123
- // question timeoutaligned to the turn absolute cap resolves it with {timedout}
124
- // before this hard ceiling, which sits just above as the last-resort backstop).
122
+ // Separate from tool_ack: a question waits for the user, possibly for hours. The
123
+ // DAEMON owns the lifecycle it resolves the ask with the user's answer, or sweeps
124
+ // it {timedout} at its configured question timeout (POLYGRAM_QUESTION_TIMEOUT_MS,
125
+ // default 24h). This local timer is ONLY a last-resort backstop for the narrow case
126
+ // where the daemon stays connected but never calls back; it sits a margin ABOVE the
127
+ // daemon timeout so the daemon always resolves first (with the proper user-facing
128
+ // message). It must track the daemon value — a hardcoded 32min here once fired long
129
+ // before the 24h wait, resolving {timedout} on a question the user answered an hour
130
+ // later (0.17.5).
125
131
  const pendingQuestions = new Map() // tool_call_id → { resolve, timer }
126
- const QUESTION_ANSWER_TIMEOUT_MS = 32 * 60 * 1000
132
+ const QUESTION_BACKSTOP_MARGIN_MS = 5 * 60 * 1000
133
+ const DAEMON_QUESTION_TIMEOUT_MS = Number(process.env.POLYGRAM_QUESTION_TIMEOUT_MS) || (24 * 60 * 60 * 1000)
134
+ const QUESTION_ANSWER_TIMEOUT_MS = DAEMON_QUESTION_TIMEOUT_MS + QUESTION_BACKSTOP_MARGIN_MS
127
135
 
128
136
  function awaitQuestionAnswer(toolCallId) {
129
137
  return new Promise((resolve) => {
@@ -48,6 +48,10 @@ const { Process, UnsupportedOperationError } = require('./process');
48
48
  const { ChannelsBridgeServer } = require('./channels-bridge-server');
49
49
  const { writeHookFiles, removeHookFiles } = require('./hook-settings');
50
50
  const { createHookTail } = require('./hook-event-tail');
51
+ // Single source of truth for the question wait: the daemon owns the question
52
+ // lifecycle (answer or {timedout} sweep), and we pass this to the bridge so its
53
+ // last-resort `ask` backstop sits ABOVE it instead of undercutting it.
54
+ const { DEFAULT_TIMEOUT_MS: QUESTION_TIMEOUT_MS } = require('../questions/store');
51
55
  // File-send staging: reuse the dispatcher's allowlist root so the dir we
52
56
  // create exactly matches the realpath the validator accepts (no /tmp vs
53
57
  // /private/tmp drift — one of the original Music-topic failures).
@@ -343,6 +347,9 @@ class CliProcess extends Process {
343
347
  // is the broader surface (hooks + pane heartbeat + bridge tool calls).
344
348
  this._lastHookEventAt = 0;
345
349
  this._lastActivityAt = 0;
350
+ // Monotonic count of work hooks (all but the terminal Stop) — the rung-2
351
+ // no-reply backstop snapshots it at Stop capture to detect a later resume.
352
+ this._workHookSeq = 0;
346
353
  // 0.13 D2: the InputLedger — every user-shaped input written to the bridge
347
354
  // gets an observable lifecycle: written → seen → resolved | dropped |
348
355
  // superseded | fold-suspected. Pre-P3, injectUserMessage minted a turn_id
@@ -553,13 +560,25 @@ class CliProcess extends Process {
553
560
  await this.bridgeServer.listen();
554
561
  }
555
562
 
556
- async _spawnTmuxClaude({ tmuxName, opts }) {
557
- const bridgeEnv = {
558
- POLYGRAM_SESSION_KEY: this.sessionKey,
559
- POLYGRAM_SOCK: this.sockPath,
560
- POLYGRAM_SOCK_SECRET: this.sockSecret,
561
- POLYGRAM_CLAUDE_SESSION_ID: this.claudeSessionId,
563
+ /**
564
+ * Env for the spawned channels-bridge MCP subprocess. POLYGRAM_QUESTION_TIMEOUT_MS
565
+ * tells the bridge our question wait so its last-resort `ask` backstop sits ABOVE
566
+ * it — without it the bridge fell back to a hardcoded 32min that fired long before
567
+ * the daemon's 24h wait, so a question the user answered an hour later was already
568
+ * resolved {timedout}. Extracted (pure) so the alignment is unit-testable.
569
+ */
570
+ _bridgeEnv() {
571
+ return {
572
+ POLYGRAM_SESSION_KEY: this.sessionKey,
573
+ POLYGRAM_SOCK: this.sockPath,
574
+ POLYGRAM_SOCK_SECRET: this.sockSecret,
575
+ POLYGRAM_CLAUDE_SESSION_ID: this.claudeSessionId,
576
+ POLYGRAM_QUESTION_TIMEOUT_MS: String(QUESTION_TIMEOUT_MS),
562
577
  };
578
+ }
579
+
580
+ async _spawnTmuxClaude({ tmuxName, opts }) {
581
+ const bridgeEnv = this._bridgeEnv();
563
582
  const mcpConfig = {
564
583
  mcpServers: {
565
584
  'polygram-bridge': {
@@ -1764,20 +1783,41 @@ class CliProcess extends Process {
1764
1783
  }
1765
1784
  }
1766
1785
 
1786
+ /**
1787
+ * Is this turn eligible for the rung-2 activity-quiet finalize? Eligible when the
1788
+ * answer is already captured where a finalize can deliver it:
1789
+ * - a delivered FINAL reply (it went out incrementally), OR
1790
+ * - seen + consumed-acked (the answer rode a sibling turn_id — fold-id echo;
1791
+ * see _ledgerAckConsumed), OR
1792
+ * - an attributed Stop captured the answer AND no work hook has fired since
1793
+ * (_workHookSeq unchanged from the capture) — i.e. claude is genuinely done,
1794
+ * not resumed into more work. A reply-less turn's only finalizer is its Stop grace;
1795
+ * when a pane-thinking heartbeat cancels that grace (the turn's own residual
1796
+ * "esc to interrupt"), this is the backstop that still delivers the captured
1797
+ * last_assistant_message instead of orphaning to the idle ceiling. The
1798
+ * hook-recency check withdraws eligibility the moment claude resumes (a resume
1799
+ * emits PreToolUse/etc. that increments _workHookSeq past the capture), so a
1800
+ * stale early Stop can't finalize over a still-working turn — that also covers
1801
+ * an in-flight sub-agent, which emits work hooks after any boundary Stop.
1802
+ * An interim-only turn with no captured answer stays ineligible (it must keep working).
1803
+ */
1804
+ _activityQuietEligible(pending) {
1805
+ if (this._turnHasFinalReply(pending)) return true;
1806
+ if (pending.seen === true && pending._consumedAcked === true) return true;
1807
+ if (pending._stopHookData
1808
+ && (this._workHookSeq || 0) === (pending._stopHookDataSeq || 0)) return true;
1809
+ return false;
1810
+ }
1811
+
1767
1812
  /**
1768
1813
  * D1 rung 2: arm/refresh the activity-quiet finalize for one pending.
1769
- * Preconditions: hooks live, ≥1 delivered reply (a reply-less turn ends via
1770
- * rung 1 or the ceilings), no open question (waiting-on-user suspends the
1771
- * clock — claude is legitimately silent), and no rung-1 grace in flight.
1814
+ * Preconditions: hooks live, the answer is captured (see _activityQuietEligible),
1815
+ * no open question (waiting-on-user suspends the clock — claude is legitimately
1816
+ * silent), and no rung-1 grace in flight.
1772
1817
  */
1773
1818
  _armActivityQuiet(turnId, pending) {
1774
1819
  if (!this._sawHookStream) return;
1775
- // ≥1 FINAL reply, OR seen + consumed-acked (the answer rode a sibling turn_id —
1776
- // fold-id echo; see _ledgerAckConsumed). Same eligibility as the fire site. An
1777
- // interim-only turn (status promise, no final reply) is NOT eligible — it must
1778
- // keep working, not quiet-finalize as done. docs/progress-is-not-turn-end-spec.md
1779
- if (!this._turnHasFinalReply(pending)
1780
- && !(pending.seen === true && pending._consumedAcked === true)) return;
1820
+ if (!this._activityQuietEligible(pending)) return;
1781
1821
  if (this._openQuestions.size > 0) return;
1782
1822
  if (pending._stopGracePending) return;
1783
1823
  if (pending._activityQuietTimer) clearTimeout(pending._activityQuietTimer);
@@ -1797,20 +1837,19 @@ class CliProcess extends Process {
1797
1837
 
1798
1838
  /**
1799
1839
  * D1 rung 2 fire: the whole activity surface (hooks + pane heartbeat + bridge
1800
- * tool calls) has been quiet for activityQuietMs on a replied turn the tail
1801
- * is over (Stop was lost, foreign, or the hook stream died mid-session; the
1802
- * pre-D1 `_sawHookStream` one-way boolean left that last class with NO
1803
- * finalizer until a 10-min TURN_TIMEOUT *rejection* after a delivered answer).
1840
+ * tool calls) has been quiet for activityQuietMs and the answer is captured (a
1841
+ * delivered reply, a consumed-ack, or an attributed Stop see
1842
+ * _activityQuietEligible). The tail is over (Stop was lost, foreign, the hook
1843
+ * stream died mid-session, or — the no-reply case the Stop grace was cancelled
1844
+ * by a pane-thinking heartbeat racing the Stop's own residual streaming hint).
1804
1845
  */
1805
1846
  _activityQuietFinalize(turnId) {
1806
1847
  const pending = this.pendingTurns.get(turnId);
1807
1848
  if (!pending) return;
1808
1849
  if (pending._stopGracePending) return;
1809
1850
  if (this._openQuestions.size > 0) return; // re-check at fire time
1810
- // Eligibility: ≥1 bound reply, OR seen + consumed-acked (the answer went
1811
- // out under a sibling turn_id — fold-id echo; see _ledgerAckConsumed).
1851
+ if (!this._activityQuietEligible(pending)) return;
1812
1852
  const consumedAcked = pending.seen === true && pending._consumedAcked === true;
1813
- if (!this._turnHasFinalReply(pending) && !consumedAcked) return;
1814
1853
  const lastHookAgeMs = this._lastHookEventAt ? Date.now() - this._lastHookEventAt : null;
1815
1854
  this._logEvent('cli-activity-quiet-finalize', {
1816
1855
  turn_id: turnId,
@@ -1819,6 +1858,16 @@ class CliProcess extends Process {
1819
1858
  last_hook_age_ms: lastHookAgeMs,
1820
1859
  had_stop: !!pending._stopHookData,
1821
1860
  });
1861
+ // The no-reply rescue: a reply-less, not-consumed-acked turn finalizing here
1862
+ // qualified ONLY via its captured Stop — i.e. it would have orphaned to the idle
1863
+ // ceiling before this backstop existed. Distinct event so the soak can count it.
1864
+ if (!this._turnHasFinalReply(pending) && !consumedAcked) {
1865
+ this._logEvent('cli-noreply-stop-rescued', {
1866
+ turn_id: turnId,
1867
+ last_hook_age_ms: lastHookAgeMs,
1868
+ text_len: (pending._stopHookData?.lastAssistantMessage || '').length,
1869
+ });
1870
+ }
1822
1871
  if (lastHookAgeMs != null && lastHookAgeMs >= this.activityQuietMs) {
1823
1872
  // A previously-live hook stream went quiet enough that rung 2 (not an
1824
1873
  // attributed Stop) ended the turn — the soak's mid-session-death signal.
@@ -1827,13 +1876,25 @@ class CliProcess extends Process {
1827
1876
  this._finalizeTurn(turnId);
1828
1877
  }
1829
1878
 
1879
+ /**
1880
+ * Capture a Stop hook's data on a pending, recording the work-hook count AT capture.
1881
+ * The rung-2 no-reply backstop (_activityQuietEligible) compares the live _workHookSeq
1882
+ * against this snapshot to tell "claude is done" (no work hook since the Stop) from
1883
+ * "claude resumed" (a later work hook bumped the count). A monotonic counter — not a
1884
+ * timestamp — so a Stop and a resume hook landing in the same millisecond still differ.
1885
+ */
1886
+ _captureStopHookData(pending, info) {
1887
+ pending._stopHookData = info;
1888
+ pending._stopHookDataSeq = this._workHookSeq || 0;
1889
+ }
1890
+
1830
1891
  /**
1831
1892
  * D1 rung 1: an attributed Stop (the pending was `seen` at pickup, or has
1832
1893
  * ≥1 turn_id-bound reply) finalizes through a short grace that any
1833
1894
  * subsequent same-session activity cancels (see _noteActivity #2).
1834
1895
  */
1835
1896
  _beginAttributedStopGrace(turnId, pending, info) {
1836
- pending._stopHookData = info;
1897
+ this._captureStopHookData(pending, info);
1837
1898
  pending._stopGracePending = true;
1838
1899
  if (pending._activityQuietTimer) {
1839
1900
  clearTimeout(pending._activityQuietTimer);
@@ -1932,7 +1993,7 @@ class CliProcess extends Process {
1932
1993
  let graceCount = 0;
1933
1994
  for (const p of this.pendingTurns.values()) if (p._stopGracePending) graceCount++;
1934
1995
  if (graceCount !== 1) return;
1935
- pending._stopHookData = info;
1996
+ this._captureStopHookData(pending, info);
1936
1997
  clearTimeout(pending._stopGraceTimer);
1937
1998
  pending._stopGraceTimer = null;
1938
1999
  finalize();
@@ -2186,14 +2247,20 @@ class CliProcess extends Process {
2186
2247
  const fireTimeout = (reason, probeResult = null) => {
2187
2248
  if (!this.pendingTurns.has(turnId)) return;
2188
2249
  const pending = this.pendingTurns.get(turnId);
2189
- // 0.13 D1 (S9): unblock any open ask FIRST claude must never stay
2190
- // hung on a question whose turn we are about to end. The card cleanup
2191
- // stays with the question sweep; this only resolves the blocking tool.
2250
+ // A question waits for the user: while an `ask` is open the turn must NOT
2251
+ // time out and die mid-question. Defer re-arm the absolute checkpoint and
2252
+ // keep waiting; the question store's long safety backstop is the only bound
2253
+ // (a truly-abandoned question eventually expires {timedout}). Pre-0.17.4 this
2254
+ // force-answered {timedout} at the ~30-min ceiling and killed the turn.
2255
+ // docs/progress-is-not-turn-end-spec.md
2192
2256
  if (this._openQuestions.size > 0) {
2193
- for (const tc of [...this._openQuestions]) {
2194
- this._logEvent('cli-question-timedout-at-ceiling', { tool_call_id: tc, reason });
2195
- try { this.writeQuestionAnswer(tc, { timedout: true }); } catch { /* best-effort */ }
2196
- }
2257
+ this._logEvent('cli-question-wait-extended', { reason, open_count: this._openQuestions.size });
2258
+ // Reached via the idle hardTimer too — clear any still-armed absoluteTimer
2259
+ // before re-arming so we don't orphan a ref-holding handle teardown can't see.
2260
+ if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
2261
+ pending.absoluteTimer = setTimeout(() => this._checkpointAbsolute(turnId), this.turnAbsoluteMs);
2262
+ pending.absoluteTimer.unref?.();
2263
+ return;
2197
2264
  }
2198
2265
  this.pendingTurns.delete(turnId);
2199
2266
  const idx = this.pendingQueue.findIndex(e => e.turnId === turnId);
@@ -2476,6 +2543,15 @@ class CliProcess extends Process {
2476
2543
  async _checkpointAbsolute(turnId) {
2477
2544
  if (!this.pendingTurns.has(turnId)) return;
2478
2545
  let pending = this.pendingTurns.get(turnId);
2546
+ // A question is open → the turn is waiting on the USER, not stalled. Don't probe
2547
+ // or time out: re-arm and keep waiting (the question store's long backstop is the
2548
+ // bound). docs/progress-is-not-turn-end-spec.md
2549
+ if (this._openQuestions.size > 0) {
2550
+ this._logEvent('cli-question-wait-extended', { reason: 'absolute-checkpoint', open_count: this._openQuestions.size });
2551
+ pending.absoluteTimer = setTimeout(() => this._checkpointAbsolute(turnId), this.turnAbsoluteMs);
2552
+ pending.absoluteTimer.unref?.();
2553
+ return;
2554
+ }
2479
2555
  // Turn with a FINAL reply (or consumed-acked): the ceiling RESOLVES it, never
2480
2556
  // extends. An interim-only turn (status promise, no final reply) is still
2481
2557
  // working — fall through to the busy-aware probe so it extends, not resolves.
@@ -2861,6 +2937,10 @@ class CliProcess extends Process {
2861
2937
  this._lastHookEventAt = Date.now();
2862
2938
  } else if (ev.type && ev.type !== 'parse-error' && ev.type !== 'unknown') {
2863
2939
  this._lastHookEventAt = Date.now();
2940
+ // Monotonic count of WORK hooks (everything but the terminal Stop). The rung-2
2941
+ // no-reply backstop snapshots this at Stop capture; a later increment means
2942
+ // claude resumed work, withdrawing the stale Stop's finalize eligibility.
2943
+ this._workHookSeq = (this._workHookSeq || 0) + 1;
2864
2944
  this._noteActivity(`hook:${ev.type}`);
2865
2945
  }
2866
2946
 
@@ -3043,7 +3123,7 @@ class CliProcess extends Process {
3043
3123
  // sub-agent: refresh the captured last_assistant_message so the
3044
3124
  // eventual finalize delivers the LATEST produced answer (claude's real
3045
3125
  // end-of-work text), not the boundary Stop's stale/partial text.
3046
- p._stopHookData = info;
3126
+ this._captureStopHookData(p, info);
3047
3127
  }
3048
3128
  } else if (this.pendingTurns.size > 1) {
3049
3129
  // Can't attribute Stop to one of several concurrent turns — surface
@@ -3493,6 +3573,11 @@ class CliProcess extends Process {
3493
3573
  try { pending.reject(err); } catch {}
3494
3574
  }
3495
3575
  this.pendingTurns.clear();
3576
+ // Drop interactive-question state too (parity with _doKill /
3577
+ // _handleBridgeDisconnected) — else the 60s keep-alive interval leaks and
3578
+ // _openQuestions is left stale on the reset session.
3579
+ this._stopQuestionKeepAlive();
3580
+ this._openQuestions.clear();
3496
3581
  // Now drain pendingQueue. Skip matching turnIds (already counted), reject
3497
3582
  // the rest (entries pushed by callers other than this.send — contract
3498
3583
  // test, tmux/sdk pm callback path).
@@ -11,12 +11,13 @@
11
11
 
12
12
  const { newToken, tokensEqual } = require('../approvals/store');
13
13
 
14
- // Option A (2026-06-09): don't expire a question before the turn that's blocking on
15
- // it can. A blocking `ask` can live at most the 30-min turn ABSOLUTE cap
16
- // (DEFAULT_TURN_ABSOLUTE_MS) the keep-alive resets the idle cap but not the absolute
17
- // so align here. The user answers any time within the turn's life, not an arbitrary
18
- // 8-min window. (Truly-unbounded "answer hours later" needs the non-blocking redesign.)
19
- const DEFAULT_TIMEOUT_MS = 30 * 60 * 1000;
14
+ // A question waits for the user the turn no longer times out while an `ask` is open
15
+ // (cli-process defers its ceilings during a question wait, docs/progress-is-not-turn-end-spec.md),
16
+ // so this is only the long SAFETY BACKSTOP: a forgotten/abandoned question eventually
17
+ // expires {timedout} instead of pinning the session forever. Generous (a full day) so a
18
+ // real user answering hours later is never cut off; tune via the `questionTimeoutMs` config
19
+ // if a chat needs shorter/longer.
20
+ const DEFAULT_TIMEOUT_MS = 24 * 60 * 60 * 1000;
20
21
 
21
22
  function createQuestionStore(rawDb, now = () => Date.now()) {
22
23
  const insertStmt = rawDb.prepare(`
@@ -351,6 +351,9 @@ function createSdkCallbacks({
351
351
  ctx?.typing?.resume?.();
352
352
  const r = ctx?.reactor;
353
353
  if (r && typeof r.setState === 'function') {
354
+ // 0.17.4: release the question-wait hold (a concurrent sub-agent hold, if
355
+ // any, keeps its own — owner-scoped so they don't stomp each other).
356
+ if (typeof r.setWorkInFlight === 'function') r.setWorkInFlight(false, 'question');
354
357
  r.setState('THINKING');
355
358
  logEvent('question-resumed', { chat_id: getChatIdFromKey(sessionKey), session_key: sessionKey });
356
359
  }
@@ -367,6 +370,13 @@ function createSdkCallbacks({
367
370
  // loop) alive through the whole wait, so without this pause every
368
371
  // ask-wait would show continuous typing. Guarded no-op on dead turns.
369
372
  try { entry?.pendingQueue?.[0]?.context?.typing?.pause?.(); } catch { /* guarded */ }
373
+ // 0.17.4: hold the reaction through the question wait — it's waiting on the
374
+ // USER, not stalled, so don't let it decay to the 🥱/😨 stall faces (reuses
375
+ // the B3 work-in-flight hold). Released on the answer in onQuestionResumed.
376
+ try {
377
+ const r = entry?.pendingQueue?.[0]?.context?.reactor;
378
+ if (r && typeof r.setWorkInFlight === 'function') r.setWorkInFlight(true, 'question');
379
+ } catch { /* guarded */ }
370
380
  if (typeof renderQuestion !== 'function') return;
371
381
  await renderQuestion({ sessionKey, ...payload });
372
382
  } catch (err) {
@@ -792,7 +802,7 @@ function createSdkCallbacks({
792
802
  // B3: hold a "working" face for the whole sub-agent run — the quiet
793
803
  // stretch between its tool hooks is expected, not a stall, so suppress
794
804
  // the 🥱/😨 decay until it finishes. docs/progress-is-not-turn-end-spec.md
795
- if (typeof r.setWorkInFlight === 'function') r.setWorkInFlight(true);
805
+ if (typeof r.setWorkInFlight === 'function') r.setWorkInFlight(true, 'subagent');
796
806
  }
797
807
  } catch (err) {
798
808
  logger.error?.(`[${botName}] subagent-start handler: ${err.message}`);
@@ -807,7 +817,7 @@ function createSdkCallbacks({
807
817
  if (r) {
808
818
  // B3: release the working-hold only when the LAST sub-agent finishes
809
819
  // (inFlight === 0) — nested/parallel sub-agents keep it held.
810
- if (typeof r.setWorkInFlight === 'function') r.setWorkInFlight((payload?.inFlight ?? 0) > 0);
820
+ if (typeof r.setWorkInFlight === 'function') r.setWorkInFlight((payload?.inFlight ?? 0) > 0, 'subagent');
811
821
  if (typeof r.heartbeat === 'function') r.heartbeat();
812
822
  }
813
823
  logEvent('subagent-done', {
@@ -226,9 +226,11 @@ function createReactionManager({
226
226
  // Chaining all applies through `applyChain` guarantees they're sent
227
227
  // to Telegram in setState() invocation order.
228
228
  let applyChain = Promise.resolve();
229
- // B3: set true while a sub-agent / background work is in flight suppresses the
230
- // stall/freeze decay so a working-but-quiet turn never shows 🥱/😨.
231
- let workInFlight = false;
229
+ // B3 / 0.17.4: independent "hold the reaction, suppress the 🥱/😨 decay" owners
230
+ // a sub-agent run AND an open question can each hold concurrently. A boolean would
231
+ // let one release while the other still needs the hold (review MUST-FIX), so track
232
+ // the set of active owners; the decay is suppressed while ANY owner holds.
233
+ const workOwners = new Set();
232
234
  // States the auto-stall path may transition to. Once we've already
233
235
  // shown STALL or TIMEOUT we don't downgrade or rearm — only an
234
236
  // explicit setState() call (Claude resumed) can move us forward.
@@ -333,10 +335,10 @@ function createReactionManager({
333
335
  const armStallTimers = () => {
334
336
  clearStallTimers();
335
337
  if (stopped) return;
336
- // B3: while a sub-agent (or background work) is genuinely in flight, a quiet
337
- // stretch is EXPECTED the turn is working, not stalled. Don't arm the
338
- // 🥱/😨 decay; hold the current working face until work drains.
339
- if (workInFlight) return;
338
+ // B3 / 0.17.4: while any owner holds (a sub-agent in flight, or an open question
339
+ // waiting on the user), a quiet stretch is EXPECTED not stalled. Don't arm the
340
+ // 🥱/😨 decay; hold the current face until every owner releases.
341
+ if (workOwners.size > 0) return;
340
342
  if (!STALL_PROMOTABLE.has(currentState)) return;
341
343
  stallTimer = setTimeout(() => {
342
344
  stallTimer = null;
@@ -439,7 +441,7 @@ function createReactionManager({
439
441
 
440
442
  const stop = () => {
441
443
  stopped = true;
442
- workInFlight = false; // B3: defense-in-depth if a reactor is ever reused
444
+ workOwners.clear(); // B3: defense-in-depth if a reactor is ever reused
443
445
  if (pendingTimer) { clearTimeout(pendingTimer); pendingTimer = null; }
444
446
  clearStallTimers();
445
447
  clearDeepeningTimers();
@@ -460,16 +462,18 @@ function createReactionManager({
460
462
  armStallTimers();
461
463
  };
462
464
 
463
- // B3: mark whether work (a sub-agent / background shell) is in flight. While
464
- // active, the silence between tool hooks is expected, so the stall/freeze decay
465
- // is suppressed and the reactor holds its working face. When work drains, the
466
- // normal cascade resumes from now. docs/progress-is-not-turn-end-spec.md
467
- const setWorkInFlight = (active) => {
468
- const next = !!active;
469
- if (next === workInFlight) return;
470
- workInFlight = next;
471
- if (workInFlight) clearStallTimers(); // cancel any pending 🥱/😨 decay
472
- else armStallTimers(); // work drained — resume the cascade
465
+ // B3 / 0.17.4: a named owner ('subagent', 'question', ) holds/releases the
466
+ // reaction. While ANY owner holds, the silence is expected (work in flight, or
467
+ // waiting on the user), so the stall/freeze decay is suppressed and the reactor
468
+ // holds its face. The cascade resumes only when the LAST owner releases. A boolean
469
+ // couldn't represent two concurrent owners. docs/progress-is-not-turn-end-spec.md
470
+ const setWorkInFlight = (active, owner = 'default') => {
471
+ const wasHeld = workOwners.size > 0;
472
+ if (active) workOwners.add(owner); else workOwners.delete(owner);
473
+ const isHeld = workOwners.size > 0;
474
+ if (isHeld === wasHeld) return;
475
+ if (isHeld) clearStallTimers(); // first owner → cancel any pending 🥱/😨 decay
476
+ else armStallTimers(); // last owner released → resume the cascade
473
477
  };
474
478
 
475
479
  return {
@@ -133,8 +133,15 @@ function startTyping({
133
133
  // tearing the loop down; resume() restarts immediately (the answer landed,
134
134
  // claude is working again). Attached to the stop function so every existing
135
135
  // `stopTyping()` call site keeps working unchanged.
136
- stop.pause = () => { paused = true; };
136
+ stop.pause = () => {
137
+ if (paused) return;
138
+ paused = true;
139
+ // 0.17.4: instrument the question pause/resume so "typing disappeared after I
140
+ // answered" is diagnosable (typing pings themselves aren't logged).
141
+ onEvent?.({ kind: 'typing-state', chat_id: key, detail: { state: 'paused' } });
142
+ };
137
143
  stop.resume = () => {
144
+ onEvent?.({ kind: 'typing-state', chat_id: key, detail: { state: 'resume-called', stopped } });
138
145
  if (stopped) return;
139
146
  paused = false;
140
147
  tick();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "polygram",
3
- "version": "0.17.3",
3
+ "version": "0.17.5",
4
4
  "description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
5
5
  "main": "lib/ipc/client.js",
6
6
  "bin": {