polygram 0.12.0-rc.19 → 0.12.0-rc.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/error/classify.js +12 -0
- package/lib/handlers/dispatcher.js +20 -0
- package/lib/process/cli-process.js +0 -44
- package/package.json +1 -1
package/lib/error/classify.js
CHANGED
|
@@ -195,6 +195,18 @@ const CODES = {
|
|
|
195
195
|
isTransient: false,
|
|
196
196
|
autoRecover: null,
|
|
197
197
|
},
|
|
198
|
+
// TMUX_SESSION_GONE: claude exited during spawn so the tmux session vanished
|
|
199
|
+
// before the channel went live (the startup-gate's captureWide hit "can't
|
|
200
|
+
// find pane"). Usual cause: an unresumable aged session whose "Resume from
|
|
201
|
+
// summary?" /compact exits code 0. The dispatcher poison-clears the session
|
|
202
|
+
// on this code, so a resend genuinely starts fresh and works — hence the
|
|
203
|
+
// calm "send it again" copy instead of the old raw "[startup-gate]…" leak.
|
|
204
|
+
TMUX_SESSION_GONE: {
|
|
205
|
+
kind: 'tmuxSessionGone',
|
|
206
|
+
userMessage: '🔄 That chat got stuck starting up, so I reset it. Send your message again and I\'ll pick it up fresh.',
|
|
207
|
+
isTransient: false,
|
|
208
|
+
autoRecover: null,
|
|
209
|
+
},
|
|
198
210
|
// TURN_TIMEOUT: 10-min wall-clock cap on a single channels turn. Mirror
|
|
199
211
|
// of the tmux wall-clock ceiling — typically a runaway, not a wedge.
|
|
200
212
|
// Not transient (auto-retry would just runaway again).
|
|
@@ -178,6 +178,26 @@ function createDispatcher({
|
|
|
178
178
|
aborted: wasAborted || undefined,
|
|
179
179
|
replay: isReplay || undefined,
|
|
180
180
|
});
|
|
181
|
+
// Startup-gate death (claude exited during spawn / the dialog gate timed
|
|
182
|
+
// out) of a likely-aged RESUMED session — the persisted claude_session_id
|
|
183
|
+
// can't be resumed cleanly (shumorobot general chat 2026-06-01→03: a
|
|
184
|
+
// week-old session renders claude's "Resume from summary?" dialog whose
|
|
185
|
+
// /compact resume exits code 0 → TMUX_SESSION_GONE → the chat re-resumes
|
|
186
|
+
// the same dead id on every message, stuck for days). Poison-clear so the
|
|
187
|
+
// NEXT message spawns a FRESH session — same recovery the auto-resume path
|
|
188
|
+
// does for BRIDGE_DISCONNECTED below. clearSessionId is a no-op DELETE when
|
|
189
|
+
// there's no row (a genuine fresh-spawn failure), so this is safe; and
|
|
190
|
+
// unlike an in-process recursive retry it never reuses a closed instance.
|
|
191
|
+
if ((err.code === 'TMUX_SESSION_GONE' || err.code === 'CHANNELS_DIALOG_TIMEOUT')
|
|
192
|
+
&& typeof db.clearSessionId === 'function') {
|
|
193
|
+
dbWrite(
|
|
194
|
+
() => db.clearSessionId(sessionKey),
|
|
195
|
+
`clearSessionId: poisoned by ${err.code} on startup`,
|
|
196
|
+
);
|
|
197
|
+
logEvent('session-reset-after-startup-gate', {
|
|
198
|
+
chat_id: chatId, session_key: sessionKey, msg_id: msg?.message_id, code: err.code,
|
|
199
|
+
});
|
|
200
|
+
}
|
|
181
201
|
// rc.55: surface replay failures with a meaningful message.
|
|
182
202
|
// Pre-rc.55 any boot-replay turn that failed for ANY reason
|
|
183
203
|
// was silently dropped. The rc.51-onward boot-replay path is
|
|
@@ -362,29 +362,6 @@ class CliProcess extends Process {
|
|
|
362
362
|
this._startPingLoop();
|
|
363
363
|
} catch (err) {
|
|
364
364
|
await this._teardownOnStartFailure();
|
|
365
|
-
// Self-heal an unresumable stale session. A `--resume` spawn that dies
|
|
366
|
-
// during startup means the persisted session can't be resumed cleanly.
|
|
367
|
-
// The production case (shumorobot general chat, session minted 2026-05-27):
|
|
368
|
-
// claude resumes a week-old session → renders the aged-session "Resuming
|
|
369
|
-
// the full session… Resume from summary?" dialog → the gate confirms it →
|
|
370
|
-
// the /compact resume exits code 0 → the gate reports TMUX_SESSION_GONE.
|
|
371
|
-
// Retrying the SAME id loops forever (the chat sat dead for days). Clear it
|
|
372
|
-
// and respawn ONCE with a fresh session: a fresh spawn has no aged-session
|
|
373
|
-
// dialog and starts clean — exactly what made the Music topic healthy once
|
|
374
|
-
// its cwd change minted a new session. The `_freshRetry` guard means a
|
|
375
|
-
// genuine fresh-spawn failure throws instead of looping. onInit re-upserts
|
|
376
|
-
// the fresh id on success, so the dead id leaves the DB without extra wiring.
|
|
377
|
-
if (this._shouldRefreshAfterStartupFailure(err, opts)) {
|
|
378
|
-
this._logEvent('cli-stale-session-reset', {
|
|
379
|
-
stale_session_id: opts.existingSessionId,
|
|
380
|
-
code: err.code,
|
|
381
|
-
});
|
|
382
|
-
this.logger.warn?.(
|
|
383
|
-
`[${this.label}] channels: startup failed resuming ${opts.existingSessionId} `
|
|
384
|
-
+ `(${err.code}) — clearing stale session, respawning fresh.`,
|
|
385
|
-
);
|
|
386
|
-
return this.start({ ...opts, existingSessionId: null, _freshRetry: true });
|
|
387
|
-
}
|
|
388
365
|
throw err;
|
|
389
366
|
}
|
|
390
367
|
|
|
@@ -438,27 +415,6 @@ class CliProcess extends Process {
|
|
|
438
415
|
}
|
|
439
416
|
}
|
|
440
417
|
|
|
441
|
-
/**
|
|
442
|
-
* Decide whether a start() failure warrants a one-shot fresh-session
|
|
443
|
-
* retry. True only when ALL hold:
|
|
444
|
-
* - we were RESUMING a session (`opts.existingSessionId` set) — a fresh
|
|
445
|
-
* spawn that fails is a real failure, not a poisoned resume;
|
|
446
|
-
* - we haven't already retried (`_freshRetry` guard) — prevents an
|
|
447
|
-
* infinite respawn loop if the fresh spawn also dies;
|
|
448
|
-
* - the failure is a startup-gate death: claude exited
|
|
449
|
-
* (`TMUX_SESSION_GONE`) or the dialog gate timed out
|
|
450
|
-
* (`CHANNELS_DIALOG_TIMEOUT`) — the signature of an unresumable aged
|
|
451
|
-
* session (the "Resume from summary?" /compact path that exits code 0).
|
|
452
|
-
* Any other error (bridge-handshake timeout on a healthy session, config
|
|
453
|
-
* error, etc.) throws as-is.
|
|
454
|
-
*/
|
|
455
|
-
_shouldRefreshAfterStartupFailure(err, opts) {
|
|
456
|
-
const code = err && err.code;
|
|
457
|
-
return !!(opts && opts.existingSessionId)
|
|
458
|
-
&& !(opts && opts._freshRetry)
|
|
459
|
-
&& (code === 'TMUX_SESSION_GONE' || code === 'CHANNELS_DIALOG_TIMEOUT');
|
|
460
|
-
}
|
|
461
|
-
|
|
462
418
|
/**
|
|
463
419
|
* M1 refactor: socket-server lifecycle delegated to ChannelsBridgeServer.
|
|
464
420
|
* This class wires the event surface (bridge-ready, bridge-message,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "polygram",
|
|
3
|
-
"version": "0.12.0-rc.
|
|
3
|
+
"version": "0.12.0-rc.20",
|
|
4
4
|
"description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
|
|
5
5
|
"main": "lib/ipc/client.js",
|
|
6
6
|
"bin": {
|