polygram 0.12.0-rc.18 → 0.12.0-rc.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -362,6 +362,29 @@ class CliProcess extends Process {
362
362
  this._startPingLoop();
363
363
  } catch (err) {
364
364
  await this._teardownOnStartFailure();
365
+ // Self-heal an unresumable stale session. A `--resume` spawn that dies
366
+ // during startup means the persisted session can't be resumed cleanly.
367
+ // The production case (shumorobot general chat, session minted 2026-05-27):
368
+ // claude resumes a week-old session → renders the aged-session "Resuming
369
+ // the full session… Resume from summary?" dialog → the gate confirms it →
370
+ // the /compact resume exits code 0 → the gate reports TMUX_SESSION_GONE.
371
+ // Retrying the SAME id loops forever (the chat sat dead for days). Clear it
372
+ // and respawn ONCE with a fresh session: a fresh spawn has no aged-session
373
+ // dialog and starts clean — exactly what made the Music topic healthy once
374
+ // its cwd change minted a new session. The `_freshRetry` guard means a
375
+ // genuine fresh-spawn failure throws instead of looping. onInit re-upserts
376
+ // the fresh id on success, so the dead id leaves the DB without extra wiring.
377
+ if (this._shouldRefreshAfterStartupFailure(err, opts)) {
378
+ this._logEvent('cli-stale-session-reset', {
379
+ stale_session_id: opts.existingSessionId,
380
+ code: err.code,
381
+ });
382
+ this.logger.warn?.(
383
+ `[${this.label}] channels: startup failed resuming ${opts.existingSessionId} `
384
+ + `(${err.code}) — clearing stale session, respawning fresh.`,
385
+ );
386
+ return this.start({ ...opts, existingSessionId: null, _freshRetry: true });
387
+ }
365
388
  throw err;
366
389
  }
367
390
 
@@ -415,6 +438,27 @@ class CliProcess extends Process {
415
438
  }
416
439
  }
417
440
 
441
+ /**
442
+ * Decide whether a start() failure warrants a one-shot fresh-session
443
+ * retry. True only when ALL hold:
444
+ * - we were RESUMING a session (`opts.existingSessionId` set) — a fresh
445
+ * spawn that fails is a real failure, not a poisoned resume;
446
+ * - we haven't already retried (`_freshRetry` guard) — prevents an
447
+ * infinite respawn loop if the fresh spawn also dies;
448
+ * - the failure is a startup-gate death: claude exited
449
+ * (`TMUX_SESSION_GONE`) or the dialog gate timed out
450
+ * (`CHANNELS_DIALOG_TIMEOUT`) — the signature of an unresumable aged
451
+ * session (the "Resume from summary?" /compact path that exits code 0).
452
+ * Any other error (bridge-handshake timeout on a healthy session, config
453
+ * error, etc.) throws as-is.
454
+ */
455
+ _shouldRefreshAfterStartupFailure(err, opts) {
456
+ const code = err && err.code;
457
+ return !!(opts && opts.existingSessionId)
458
+ && !(opts && opts._freshRetry)
459
+ && (code === 'TMUX_SESSION_GONE' || code === 'CHANNELS_DIALOG_TIMEOUT');
460
+ }
461
+
418
462
  /**
419
463
  * M1 refactor: socket-server lifecycle delegated to ChannelsBridgeServer.
420
464
  * This class wires the event surface (bridge-ready, bridge-message,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "polygram",
3
- "version": "0.12.0-rc.18",
3
+ "version": "0.12.0-rc.19",
4
4
  "description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
5
5
  "main": "lib/ipc/client.js",
6
6
  "bin": {