polygram 0.10.0-rc.20 → 0.10.0-rc.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "$schema": "https://anthropic.com/claude-code/plugin.schema.json",
3
3
  "name": "polygram",
4
- "version": "0.10.0-rc.20",
4
+ "version": "0.10.0-rc.22",
5
5
  "description": "Telegram integration for Claude Code that preserves the OpenClaw per-chat session model. Migration target for OpenClaw users. Multi-bot, multi-chat, per-topic isolation; SQLite transcripts; inline-keyboard approvals. Bundles /polygram:status|logs|pair-code|approvals admin commands plus history (transcript queries) and polygram-send (out-of-turn IPC sends with file-upload validation) skills.",
6
6
  "keywords": [
7
7
  "telegram",
@@ -95,4 +95,100 @@ function getClaudeSessionId(db, sessionKey) {
95
95
  return row?.claude_session_id || null;
96
96
  }
97
97
 
98
- module.exports = { migrateJsonToDb, getClaudeSessionId, countSessions };
98
+ // ─── S2: session-config drift ────────────────────────────────────────
99
+ //
100
+ // A stored `sessions` row records the config the claude session was
101
+ // SPAWNED under (agent / cwd / pm_backend). Those three are
102
+ // spawn-identity: they are baked into the process at spawn time —
103
+ // `--agent`, the tmux/SDK working dir, the backend class — and cannot
104
+ // be changed on a live session. If the chat/topic config has drifted
105
+ // from the stored row, `--resume`-ing the old session forces claude
106
+ // to run under a config it was never built for. shumorobot
107
+ // 2026-05-17 22:03, topic :3: the row was agent=shumabit / cwd=$HOME
108
+ // / sdk (created before the Music topic got its per-topic override);
109
+ // resuming it under agent=music-curation:music-curator /
110
+ // cwd=.../Music/rekordbox / tmux left the TUI never signalling ready.
111
+ //
112
+ // model + effort are deliberately EXCLUDED from the invalidating set.
113
+ // They are NOT spawn-identity: a live `/model` or `/effort` change is
114
+ // pushed into the running session by `pm.setModel` /
115
+ // `pm.applyFlagSettings` with no respawn (lib/handlers/slash-commands.js,
116
+ // lib/handlers/config-callback.js). Including them here would
117
+ // destructively drop the whole session — discarding all context — on
118
+ // every model switch, double-handling what the live-apply path
119
+ // already covers cleanly. The stored model/effort columns are
120
+ // informational, not identity.
121
+ const SPAWN_IDENTITY_FIELDS = ['agent', 'cwd', 'pm_backend'];
122
+
123
+ /**
124
+ * Decide whether a stored session can be resumed for the next spawn,
125
+ * or whether config drift means it must be dropped and re-spawned
126
+ * fresh.
127
+ *
128
+ * On drift the stale row is DELETED here — so the very next spawn
129
+ * mints a fresh claude_session_id under the correct config and the
130
+ * `onInit` callback re-upserts the row. This self-heals every
131
+ * pre-migration stale row across all chats with no manual SQL.
132
+ *
133
+ * @param {object|null} db — DB handle (null → fresh spawn)
134
+ * @param {string} sessionKey
135
+ * @param {object} resolved — freshly-resolved spawn config
136
+ * @param {string} [resolved.agent]
137
+ * @param {string} [resolved.cwd]
138
+ * @param {string} [resolved.backend] — 'sdk' | 'tmux' (resolved by
139
+ * process/factory.js pickBackend); compared to the row's pm_backend
140
+ * @returns {{ existingSessionId: string|null, drift: object|null }}
141
+ * existingSessionId — pass to start() for --resume, or null for a
142
+ * fresh spawn (no stored row, or drift dropped it)
143
+ * drift — null when no drift; otherwise { fields, before, after }
144
+ * for the `session-config-drift` telemetry event
145
+ */
146
+ function resolveSessionForSpawn(db, sessionKey, resolved = {}) {
147
+ if (!db) return { existingSessionId: null, drift: null };
148
+ const row = db.getSession(sessionKey);
149
+ if (!row || !row.claude_session_id) {
150
+ return { existingSessionId: null, drift: null };
151
+ }
152
+
153
+ // Normalise: a missing field on either side is treated as equal to
154
+ // a missing field on the other (both null/undefined → no drift).
155
+ const after = {
156
+ agent: resolved.agent || null,
157
+ cwd: resolved.cwd || null,
158
+ pm_backend: resolved.backend || null,
159
+ };
160
+ const before = {
161
+ agent: row.agent || null,
162
+ cwd: row.cwd || null,
163
+ pm_backend: row.pm_backend || null,
164
+ };
165
+ const drifted = SPAWN_IDENTITY_FIELDS.filter((f) => {
166
+ // If the resolved config does not specify a field, do not treat
167
+ // it as drift — we have nothing to compare against.
168
+ if (after[f] == null) return false;
169
+ return before[f] !== after[f];
170
+ });
171
+
172
+ if (drifted.length === 0) {
173
+ return { existingSessionId: row.claude_session_id, drift: null };
174
+ }
175
+
176
+ // Drift: drop the stale row so the next spawn is fresh + correct.
177
+ db.clearSessionId(sessionKey);
178
+ return {
179
+ existingSessionId: null,
180
+ drift: {
181
+ fields: drifted,
182
+ before: { ...before, claude_session_id: row.claude_session_id },
183
+ after,
184
+ },
185
+ };
186
+ }
187
+
188
+ module.exports = {
189
+ migrateJsonToDb,
190
+ getClaudeSessionId,
191
+ resolveSessionForSpawn,
192
+ countSessions,
193
+ SPAWN_IDENTITY_FIELDS,
194
+ };
@@ -41,13 +41,37 @@ function createHandleAbort({
41
41
 
42
42
  const threadId = msg.message_thread_id?.toString();
43
43
  const sessionKey = getSessionKey(chatId, threadId, chatConfig);
44
- const hadActive = pm.has(sessionKey) && !!pm.get(sessionKey)?.inFlight;
44
+ const proc = pm.has(sessionKey) ? pm.get(sessionKey) : null;
45
+ const hadActive = !!proc?.inFlight;
45
46
 
46
47
  // Mark BEFORE killing: the 'close' event fires almost immediately
47
48
  // after interrupt, and the surrounding handleMessage's catch
48
49
  // needs to see the flag to skip the generic error-reply.
49
50
  if (hadActive) markSessionAborted(sessionKey);
50
51
 
52
+ // Bug 1 (incident 2026-05-18): "Stop" was turn-scoped — it only
53
+ // looked at an in-flight TURN. But the agent can leave a DETACHED
54
+ // background shell running (a `run_in_background:true` Bash) that
55
+ // outlives the turn; the tmux TUI shows an `N shell` indicator.
56
+ // When there is no live turn, check for such a shell and stop it
57
+ // so "Stop" acts truthfully instead of replying "Nothing to stop"
58
+ // while work is still churning. tmux-only — the SDK Process has no
59
+ // hasBackgroundShell()/killBackgroundShells(); the typeof guards
60
+ // make this a no-op there.
61
+ let killedBackgroundShell = false;
62
+ if (!hadActive && proc
63
+ && typeof proc.hasBackgroundShell === 'function'
64
+ && typeof proc.killBackgroundShells === 'function') {
65
+ try {
66
+ if (await proc.hasBackgroundShell()) {
67
+ markSessionAborted(sessionKey);
68
+ killedBackgroundShell = await proc.killBackgroundShells();
69
+ }
70
+ } catch (err) {
71
+ logger.error?.(`[${botName}] background-shell stop failed: ${err.message}`);
72
+ }
73
+ }
74
+
51
75
  // SDK abort: interrupt() + drainQueue(). interrupt() cancels
52
76
  // the in-flight turn at SDK level WITHOUT tearing down the
53
77
  // Query (cheap to reuse for the user's next message);
@@ -62,6 +86,7 @@ function createHandleAbort({
62
86
  logEvent('abort-requested', {
63
87
  chat_id: chatId, user_id: msg.from?.id || null,
64
88
  had_active: hadActive,
89
+ killed_background_shell: killedBackgroundShell,
65
90
  trigger: cleanText.slice(0, 40),
66
91
  });
67
92
 
@@ -69,10 +94,23 @@ function createHandleAbort({
69
94
  // detection is crude but reliable for ru/en.
70
95
  const lang = /[а-яё]/i.test(cleanText) ? 'ru' : 'en';
71
96
  const strs = {
72
- en: { stopped: 'Stopped.', nothing: 'Nothing to stop.' },
73
- ru: { stopped: 'Остановлено.', nothing: 'Нечего останавливать.' },
97
+ en: {
98
+ stopped: 'Stopped.',
99
+ bgStopped: 'Stopped the background task.',
100
+ nothing: 'Nothing to stop.',
101
+ },
102
+ ru: {
103
+ stopped: 'Остановлено.',
104
+ bgStopped: 'Фоновая задача остановлена.',
105
+ nothing: 'Нечего останавливать.',
106
+ },
74
107
  }[lang];
75
- const reply = hadActive ? strs.stopped : strs.nothing;
108
+ // Truthful ack: a stopped in-flight turn → "Stopped"; a stopped
109
+ // background shell → "Stopped the background task"; neither →
110
+ // "Nothing to stop".
111
+ const reply = hadActive ? strs.stopped
112
+ : killedBackgroundShell ? strs.bgStopped
113
+ : strs.nothing;
76
114
  try {
77
115
  await tg(bot, 'sendMessage', {
78
116
  chat_id: chatId, text: reply,
@@ -88,6 +88,16 @@ const DEFAULT_CONTEXT_WINDOW = 200_000;
88
88
  const READY_HINTS_RE = /\?\s+for shortcuts|accept edits on|bypass permissions on/;
89
89
  const STREAMING_HINT_RE = /esc to interrupt/;
90
90
 
91
+ // Bug 1 (incident 2026-05-18): when the agent leaves a detached
92
+ // background shell running (a `run_in_background:true` Bash), the
93
+ // claude TUI shows a background-shell count in the pane. Verified
94
+ // against claude 2.1.142 — two forms:
95
+ // - the bottom hint line: "… · 1 shell · ↓ to manage"
96
+ // - the status line: "✻ Baked for 5s · 1 shell still running"
97
+ // Both carry "<N> shell(s)". polygram's turn-scoped Stop is blind to
98
+ // these; this regex lets the abort handler see them.
99
+ const BG_SHELL_RE = /\b\d+\s+shells?\b/;
100
+
91
101
  // L1 fix (spike leftover): the claude TUI shows its welcome banner
92
102
  // WITH a ready hint at the bottom during startup — before the user's
93
103
  // prompt has been processed:
@@ -135,7 +145,17 @@ const TUI_BANNER_RE = /▐▛███▜▌|▝▜█████▛▘/;
135
145
  // The optional `❯` cursor in [^\S\n]*(?:❯[^\S\n]+)?1\. is still
136
146
  // bounded to the line containing `1.`, so the security property
137
147
  // holds — only a real menu line satisfies it.
138
- const APPROVAL_PROMPT_RE = /Do you want to (?:proceed|do this|continue)\??[\s\S]{0,400}?(?:^|\n)[^\S\n]*(?:❯[^\S\n]+)?1\.\s+/im;
148
+ //
149
+ // 2026-05-18 incident fix: the verb after "Do you want to" varies by
150
+ // tool — Bash → "do this", Write → "create CLAUDE.md", Edit → "make
151
+ // this edit", etc. A `proceed|do this|continue` whitelist missed
152
+ // "create" and hung the Music topic for 7+ min with no approval card.
153
+ // Match the STRUCTURE, not a verb whitelist: a "Do you want to …?"
154
+ // question (verb is a bounded wildcard, single-line — no newline so
155
+ // it can't swallow past the question) followed within the bounded
156
+ // window by the numbered menu. The verb was never the security
157
+ // control — the required `1.` menu line is, and it is unchanged.
158
+ const APPROVAL_PROMPT_RE = /Do you want to [^\n?]{1,80}\??[\s\S]{0,400}?(?:^|\n)[^\S\n]*(?:❯[^\S\n]+)?1\.\s+/im;
139
159
  // Pull the tool name + raw arg snippet from the line preceding the
140
160
  // approval prompt. Capture-pane preserves the ⏺ marker.
141
161
  const TOOL_INVOCATION_RE = /⏺\s+([A-Za-z_]\w*)\s*\((.*?)\)\s*$/m;
@@ -389,21 +409,60 @@ class TmuxProcess extends Process {
389
409
  envExtras: ctx.envExtras || {},
390
410
  });
391
411
 
392
- // v9: tail the per-session JSONL file (the REAL structured-event
393
- // channel v9 probe showed --debug-file emits only infra noise).
394
- // Path is deterministic once we have cwd + sessionId. The file
395
- // may not exist for ~100ms after spawn; LogTail tolerates ENOENT.
396
- this._cwd = cwd;
397
- this._armSessionLogTail({ resuming: Boolean(ctx.existingSessionId) });
398
-
399
- // G6 block until TUI is responsive.
400
- await this._waitForReady();
401
- this.emit('init', {
402
- session_id: this.claudeSessionId,
403
- label: this.label,
404
- backend: 'tmux',
405
- tmux_name: this.tmuxName,
406
- });
412
+ // SPAWN-LIFECYCLE FIX (shumorobot 2026-05-17 22:03, topic :3):
413
+ // `spawn()` resolving means the tmux session NAME now exists on
414
+ // the host. From here on, ANY failure readiness timeout, a
415
+ // wedged capture-pane, an `init` listener throwing must tear
416
+ // that session down before propagating, or the orphan lingers
417
+ // and every retry's `tmux new-session -s <same-name>` fails
418
+ // "duplicate session". A transient first-spawn failure would
419
+ // otherwise become a PERMANENT wedge for the chat/topic until a
420
+ // human kills the orphan. `_sessionCreated` is the seam that
421
+ // distinguishes "spawn() itself failed (no session — nothing to
422
+ // kill)" from "session created, a later step failed (must
423
+ // kill)". This is a spawn-lifecycle bug, independent of the
424
+ // turn-ledger concurrency rewrite.
425
+ const sessionCreated = true;
426
+
427
+ try {
428
+ // v9: tail the per-session JSONL file (the REAL structured-
429
+ // event channel — v9 probe showed --debug-file emits only
430
+ // infra noise). Path is deterministic once we have cwd +
431
+ // sessionId. The file may not exist for ~100ms after spawn;
432
+ // LogTail tolerates ENOENT.
433
+ this._cwd = cwd;
434
+ this._armSessionLogTail({ resuming: Boolean(ctx.existingSessionId) });
435
+
436
+ // G6 — block until TUI is responsive.
437
+ await this._waitForReady();
438
+ this.emit('init', {
439
+ session_id: this.claudeSessionId,
440
+ label: this.label,
441
+ backend: 'tmux',
442
+ tmux_name: this.tmuxName,
443
+ });
444
+ } catch (err) {
445
+ // Post-spawn failure — the session exists but is unusable.
446
+ // Kill it so a retry gets a clean name. Best-effort: the
447
+ // runner's killSession already swallows its own errors, but
448
+ // guard anyway so a kill failure can never mask the real
449
+ // spawn error. Also tear down the just-armed JSONL tail so it
450
+ // doesn't leak a watcher against a dead session.
451
+ if (sessionCreated) {
452
+ if (this._sessionLogTail) {
453
+ try { this._sessionLogTail.close(); } catch { /* swallow */ }
454
+ this._sessionLogTail = null;
455
+ }
456
+ try {
457
+ await this.runner.killSession(this.tmuxName);
458
+ } catch (killErr) {
459
+ this.logger.warn?.(
460
+ `[${this.label}] start() cleanup killSession failed: ${killErr.message}`,
461
+ );
462
+ }
463
+ }
464
+ throw err;
465
+ }
407
466
  })();
408
467
 
409
468
  try {
@@ -493,6 +552,12 @@ class TmuxProcess extends Process {
493
552
  // Internal turn-done signal — settled by _flushActiveGroup when
494
553
  // this turn's group is flushed on a terminal `result`.
495
554
  turn.resultPromise = new Promise((resolve) => { turn.settleResult = resolve; });
555
+ // Bug 3: interrupt signal. `interrupt()` settles `signalInterrupt`
556
+ // to end this turn's race promptly — without it, an interrupted
557
+ // turn whose tool was killed by C-c writes no JSONL `result` and
558
+ // shows no capture-pane completion the race recognises, so
559
+ // `_runTurn` would hang until the absolute `turnTimeoutMs`.
560
+ turn.interruptP = new Promise((resolve) => { turn.signalInterrupt = resolve; });
496
561
 
497
562
  try {
498
563
  // rc.13.1: pasteAndEnter holds a per-session async lock around
@@ -557,6 +622,7 @@ class TmuxProcess extends Process {
557
622
  turn.resultPromise.then((ev) => ({ kind: 'jsonl', ev })),
558
623
  captureRaceP,
559
624
  turnDeadlineP,
625
+ turn.interruptP.then(() => ({ kind: 'interrupt' })),
560
626
  ]);
561
627
 
562
628
  // If capture-pane won but the turn used a tool, the agent is
@@ -564,10 +630,14 @@ class TmuxProcess extends Process {
564
630
  // tool calls. Wait for the real terminal result from JSONL, but
565
631
  // keep the absolute deadline armed so a JSONL `result` that
566
632
  // never arrives still fails the turn rather than hanging it.
633
+ // The interrupt signal still wins here too — Bug 3: an
634
+ // interrupted tool turn writes no terminal JSONL `result`, so
635
+ // without this racer it would hang to `turnTimeoutMs`.
567
636
  if (winner.kind === 'capture' && turn.toolUsedThisTurn) {
568
637
  winner = await Promise.race([
569
638
  turn.resultPromise.then((ev) => ({ kind: 'jsonl', ev })),
570
639
  turnDeadlineP,
640
+ turn.interruptP.then(() => ({ kind: 'interrupt' })),
571
641
  ]);
572
642
  }
573
643
  } finally {
@@ -581,7 +651,18 @@ class TmuxProcess extends Process {
581
651
  let text;
582
652
  let resultSubtype = 'success';
583
653
  let stopReason = null;
584
- if (winner.kind === 'jsonl') {
654
+ if (winner.kind === 'interrupt') {
655
+ // Bug 3: `interrupt()` ended the turn. C-c was sent to the
656
+ // TUI; the turn stops here instead of hanging until the
657
+ // absolute `turnTimeoutMs`. Deliver whatever partial text the
658
+ // agent streamed before the interrupt (may be empty) with an
659
+ // explicit `interrupted` subtype so polygram's caller can tell
660
+ // a stopped turn apart from a clean completion.
661
+ turn.interrupted = true;
662
+ text = turn.text || '';
663
+ resultSubtype = 'interrupted';
664
+ stopReason = 'interrupted';
665
+ } else if (winner.kind === 'jsonl') {
585
666
  text = turn.text || winner.ev.text || '';
586
667
  resultSubtype = winner.ev.subtype || 'success';
587
668
  stopReason = winner.ev.stopReason || null;
@@ -757,6 +838,10 @@ class TmuxProcess extends Process {
757
838
  startedAt: 0,
758
839
  resolve: null, reject: null, callerPromise: null,
759
840
  settleResult: null, resultPromise: null,
841
+ // Bug 3: settled by `interrupt()` to make a live turn's
842
+ // `_runTurn` race end promptly instead of hanging until
843
+ // `turnTimeoutMs`. Armed at the top of `_runTurn`.
844
+ signalInterrupt: null, interruptP: null, interrupted: false,
760
845
  };
761
846
  }
762
847
 
@@ -1469,10 +1554,78 @@ class TmuxProcess extends Process {
1469
1554
  this.logger.error?.(`[${this.label}] interrupt: ${err.message}`);
1470
1555
  return false;
1471
1556
  }
1557
+ // Bug 3: C-c stops the agent's work in the TUI, but an interrupted
1558
+ // turn (especially a tool turn) writes no terminal JSONL `result`
1559
+ // and shows no capture-pane completion `_runTurn`'s race
1560
+ // recognises — so `_runTurn` would hang until the absolute
1561
+ // `turnTimeoutMs`. Settle the running turn's interrupt signal so
1562
+ // its race ends NOW. The running primary turn is `pendingQueue[0]`
1563
+ // in state 'pasted'/'streaming'.
1564
+ const running = this.pendingQueue.find(
1565
+ (t) => t.state === 'pasted' || t.state === 'streaming',
1566
+ );
1567
+ if (running && typeof running.signalInterrupt === 'function') {
1568
+ running.signalInterrupt();
1569
+ }
1472
1570
  this.emit('interrupt-applied', { backend: 'tmux' });
1473
1571
  return true;
1474
1572
  }
1475
1573
 
1574
+ /**
1575
+ * Bug 1: report whether the TUI currently shows a running
1576
+ * background shell (a detached `run_in_background:true` Bash). This
1577
+ * is work that outlives the turn — polygram's turn-scoped Stop is
1578
+ * blind to it. Reads the pane bottom for the `N shell` indicator.
1579
+ * @returns {Promise<boolean>}
1580
+ */
1581
+ async hasBackgroundShell() {
1582
+ if (this.closed) return false;
1583
+ try {
1584
+ const buf = await this.runner.captureWide(this.tmuxName, { lines: 80 });
1585
+ // The indicator lives in the bottom few lines of the pane.
1586
+ return BG_SHELL_RE.test(String(buf || '').slice(-2000));
1587
+ } catch (err) {
1588
+ this.logger.error?.(`[${this.label}] hasBackgroundShell: ${err.message}`);
1589
+ return false;
1590
+ }
1591
+ }
1592
+
1593
+ /**
1594
+ * Bug 1: stop every running background shell via the TUI's
1595
+ * background-task panel. Sequence verified against claude 2.1.142:
1596
+ * `/bashes` + Enter opens the "Shell details" panel (legend
1597
+ * "Esc/Enter/Space to close · x to stop"); `x` stops the shell;
1598
+ * Esc closes the panel. Repeats while a shell remains, bounded so a
1599
+ * stuck panel can't loop forever.
1600
+ *
1601
+ * @returns {Promise<boolean>} true if no background shell remains
1602
+ * after the attempt (all stopped, or none was running).
1603
+ */
1604
+ async killBackgroundShells() {
1605
+ if (this.closed) return false;
1606
+ const maxRounds = 8; // bound — one round per shell, plus slack
1607
+ for (let round = 0; round < maxRounds; round += 1) {
1608
+ if (!(await this.hasBackgroundShell())) return true;
1609
+ try {
1610
+ // Open the background-task panel.
1611
+ await this.runner.pasteText(this.tmuxName, '/bashes');
1612
+ await this.runner.sendControl(this.tmuxName, 'Enter');
1613
+ await this._sleep(this.pollMs * 4 + 200);
1614
+ // Stop the shell shown in the Shell-details panel.
1615
+ await this.runner.sendControl(this.tmuxName, 'x');
1616
+ await this._sleep(this.pollMs * 4 + 200);
1617
+ // Close the panel.
1618
+ await this.runner.sendControl(this.tmuxName, 'Escape');
1619
+ await this._sleep(this.pollMs * 2 + 100);
1620
+ } catch (err) {
1621
+ this.logger.error?.(`[${this.label}] killBackgroundShells: ${err.message}`);
1622
+ return false;
1623
+ }
1624
+ }
1625
+ // Bounded out — report the residual state honestly.
1626
+ return !(await this.hasBackgroundShell());
1627
+ }
1628
+
1476
1629
  async setModel(model) {
1477
1630
  if (this.closed || !model) return false;
1478
1631
  try {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "polygram",
3
- "version": "0.10.0-rc.20",
3
+ "version": "0.10.0-rc.22",
4
4
  "description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
5
5
  "main": "lib/ipc/client.js",
6
6
  "bin": {
package/polygram.js CHANGED
@@ -24,7 +24,9 @@ const fs = require('fs');
24
24
  const path = require('path');
25
25
  const processGuard = require('./lib/process-guard');
26
26
  const dbClient = require('./lib/db');
27
- const { migrateJsonToDb, getClaudeSessionId } = require('./lib/db/sessions');
27
+ const {
28
+ migrateJsonToDb, getClaudeSessionId, resolveSessionForSpawn,
29
+ } = require('./lib/db/sessions');
28
30
  const { buildPrompt } = require('./lib/prompt');
29
31
  const { filterAttachments } = require('./lib/attachments');
30
32
  // 0.9.0: SDK ProcessManager is the only pm. CLI pm
@@ -38,7 +40,7 @@ const { filterAttachments } = require('./lib/attachments');
38
40
  // per-session mechanics. The pre-0.10.0 monolithic ProcessManagerSdk
39
41
  // is deleted; SdkProcess inherits its per-entry guts.
40
42
  const { ProcessManager } = require('./lib/process-manager');
41
- const { createProcessFactory } = require('./lib/process/factory');
43
+ const { createProcessFactory, pickBackend } = require('./lib/process/factory');
42
44
  const { extractAssistantText } = require('./lib/process/sdk-process');
43
45
  const { createTmuxRunner } = require('./lib/tmux/tmux-runner');
44
46
  const { sweepTmuxOrphans } = require('./lib/tmux/orphan-sweep');
@@ -396,12 +398,56 @@ function buildSpawnContext(sessionKey) {
396
398
  const chatConfig = config.chats[chatId];
397
399
  if (!chatConfig) return null;
398
400
  const threadId = sessionKey.includes(':') ? sessionKey.split(':')[1] : null;
401
+
402
+ // S2: a stored session is valid ONLY for the config it was spawned
403
+ // under. agent / cwd / pm_backend are spawn-identity — baked into
404
+ // the process at spawn time, never mutable on a live session.
405
+ // Resolve them the same way the backends do (topic override merged
406
+ // over chat-level) and compare to the stored `sessions` row. On
407
+ // drift, resolveSessionForSpawn drops the stale row and returns
408
+ // existingSessionId:null → the spawn starts fresh under the correct
409
+ // config instead of `--resume`-ing a stale one. This self-heals the
410
+ // pre-per-topic-config rows (e.g. shumorobot's Music topic :3,
411
+ // stored agent=shumabit / cwd=$HOME / sdk vs the current
412
+ // music-curation:music-curator / .../Music/rekordbox / tmux).
413
+ // model/effort are NOT compared — they apply live via setModel /
414
+ // applyFlagSettings with no respawn.
415
+ //
416
+ // The drift check runs only at COLD spawn (no warm process). A warm
417
+ // process already runs under its spawn-time config; getOrSpawn
418
+ // returns it without using this context, so dropping its row here
419
+ // would be premature — defer to the next cold spawn.
420
+ const isColdSpawn = !pm || !pm.has(sessionKey) || pm.get(sessionKey)?.closed;
421
+ let existingSessionId;
422
+ if (isColdSpawn) {
423
+ const topicConfig = getTopicConfig(chatConfig, threadId || null);
424
+ const resolved = {
425
+ agent: topicConfig.agent || chatConfig.agent || null,
426
+ cwd: topicConfig.cwd || chatConfig.cwd || null,
427
+ backend: pickBackend({ config, chatId, threadId: threadId || null }),
428
+ };
429
+ const r = resolveSessionForSpawn(db, sessionKey, resolved);
430
+ existingSessionId = r.existingSessionId;
431
+ if (r.drift) {
432
+ logEvent('session-config-drift', {
433
+ chat_id: chatId,
434
+ thread_id: threadId || null,
435
+ session_key: sessionKey,
436
+ fields: r.drift.fields,
437
+ before: r.drift.before,
438
+ after: r.drift.after,
439
+ });
440
+ }
441
+ } else {
442
+ existingSessionId = getClaudeSessionId(db, sessionKey);
443
+ }
444
+
399
445
  return {
400
446
  chatConfig,
401
447
  chatId,
402
448
  threadId: threadId || null,
403
449
  label: getSessionLabel(chatConfig, threadId),
404
- existingSessionId: getClaudeSessionId(db, sessionKey),
450
+ existingSessionId,
405
451
  };
406
452
  }
407
453
 
@@ -1333,6 +1379,16 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
1333
1379
  await sendInlineStickers();
1334
1380
  await sendInlineReactions();
1335
1381
  await cleanupArchivedBubbles();
1382
+ // Bug 2 (incident 2026-05-18): this streamed-success branch
1383
+ // returns BEFORE the rc.10 deferred-clear block at the
1384
+ // bottom of the handler — so a turn that streamed its reply
1385
+ // never cleared the reactor. If the turn went quiet
1386
+ // mid-stream long enough to trip STALL (🥱), the emoji
1387
+ // stuck. reactor.stop() in the finally only kills timers,
1388
+ // not the visible reaction. Clear here, mirroring the
1389
+ // rc.10 block — AFTER delivery so there's no visual gap.
1390
+ reactor.clear().catch(() => {});
1391
+ clearAutosteeredReactions(sessionKey).catch(() => {});
1336
1392
  console.log(`[${label}] ${elapsed}s | ${result.text.length} chars | streamed | ${chatConfig.model}/${chatConfig.effort} | $${result.cost?.toFixed(4) || '?'}`);
1337
1393
  markReplied();
1338
1394
  return;
@@ -1380,6 +1436,18 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
1380
1436
  await sendInlineStickers();
1381
1437
  await sendInlineReactions();
1382
1438
  await cleanupArchivedBubbles();
1439
+ // Bug 2 (incident 2026-05-18): same gap as the finalEditOk
1440
+ // branch above — this streamed-redeliver path returns before
1441
+ // the rc.10 deferred-clear block, so the reactor would stay
1442
+ // stuck. Clear it (and autosteered ✍) here, after delivery —
1443
+ // but ONLY on a clean delivery. When r.failed.length>0 the
1444
+ // ERROR state (😨) was set above as the "look here" signal
1445
+ // for the partial-delivery failure; clearing it would wipe
1446
+ // that signal, so leave the reactor as-is in that case.
1447
+ if (r.failed.length === 0) {
1448
+ reactor.clear().catch(() => {});
1449
+ }
1450
+ clearAutosteeredReactions(sessionKey).catch(() => {});
1383
1451
  console.log(`[${label}] ${elapsed}s | ${result.text.length} chars | streamed-redeliver(${reason}, ${chunks.length} chunks${r.failed.length ? `, ${r.failed.length} failed` : ''}) | ${chatConfig.model}/${chatConfig.effort} | $${result.cost?.toFixed(4) || '?'}`);
1384
1452
  markReplied();
1385
1453
  return;