polygram 0.8.0-rc.52 → 0.8.0-rc.54

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "$schema": "https://anthropic.com/claude-code/plugin.schema.json",
3
3
  "name": "polygram",
4
- "version": "0.8.0-rc.52",
4
+ "version": "0.8.0-rc.54",
5
5
  "description": "Telegram integration for Claude Code that preserves the OpenClaw per-chat session model. Migration target for OpenClaw users. Multi-bot, multi-chat, per-topic isolation; SQLite transcripts; inline-keyboard approvals. Bundles /polygram:status|logs|pair-code|approvals admin commands and a history skill.",
6
6
  "keywords": [
7
7
  "telegram",
@@ -0,0 +1,101 @@
1
+ /**
2
+ * rc.54: auto-resume on 300s no-activity timeout.
3
+ *
4
+ * Background — the rc.54 incident pattern:
5
+ * When polygram's per-turn watchdog fires "Timeout: 300s idle with
6
+ * no Claude activity", the running SDK Query is torn down and the
7
+ * user gets the friendly "⏳ I went quiet too long without finishing.
8
+ * Try resending or simplifying." message. The session_id is preserved,
9
+ * so the *next* user message resumes context — but the work the user
10
+ * was waiting for is dropped on the floor.
11
+ *
12
+ * Most timeouts are wedged tool calls (long Bash, hanging MCP, stuck
13
+ * subagent). The wedged subprocess is dead by the time the watchdog
14
+ * fires; a fresh resume of the same session_id will spawn a clean
15
+ * Query and Claude has full prior context to continue.
16
+ *
17
+ * What this module provides: a per-session cooldown tracker so we
18
+ * don't auto-resume in a tight loop when the wedge is permanent.
19
+ *
20
+ * - markAttempt(sessionKey) — record we just tried an auto-resume
21
+ * - isInCooldown(sessionKey) — true if we attempted within the
22
+ * cooldown window (default 10 min). Caller skips auto-resume and
23
+ * falls back to the existing user-facing timeout reply.
24
+ * - clear(sessionKey) — drop the timestamp (e.g. a successful turn
25
+ * completed since the auto-resume — we're back to healthy).
26
+ */
27
+
28
+ 'use strict';
29
+
30
+ const DEFAULT_COOLDOWN_MS = 10 * 60 * 1000; // 10 min
31
+
32
+ function createAutoResumeTracker({ cooldownMs = DEFAULT_COOLDOWN_MS, now = Date.now } = {}) {
33
+ const lastAttemptAt = new Map();
34
+
35
+ return {
36
+ /**
37
+ * Returns true if the most recent attempt for this sessionKey was
38
+ * within `cooldownMs` ago. Use to gate further auto-resume
39
+ * attempts when a wedge keeps recurring.
40
+ */
41
+ isInCooldown(sessionKey) {
42
+ const ts = lastAttemptAt.get(sessionKey);
43
+ if (ts == null) return false;
44
+ return now() - ts < cooldownMs;
45
+ },
46
+
47
+ /**
48
+ * Record an auto-resume attempt. Call BEFORE dispatching the
49
+ * resumed turn so a fast follow-up timeout can still see this
50
+ * session is in cooldown.
51
+ */
52
+ markAttempt(sessionKey) {
53
+ lastAttemptAt.set(sessionKey, now());
54
+ },
55
+
56
+ /**
57
+ * Clear the cooldown for a session — called when a normal turn
58
+ * succeeds, signalling the session is healthy again. Without
59
+ * this, a session that auto-resumed once would be locked out of
60
+ * future auto-resumes for the full 10 min even after recovery.
61
+ */
62
+ clear(sessionKey) {
63
+ lastAttemptAt.delete(sessionKey);
64
+ },
65
+
66
+ /**
67
+ * Reset all tracked sessions. Called by daemon reload, tests.
68
+ */
69
+ reset() {
70
+ lastAttemptAt.clear();
71
+ },
72
+
73
+ // Test hooks
74
+ _size() { return lastAttemptAt.size; },
75
+ _get(sessionKey) { return lastAttemptAt.get(sessionKey); },
76
+ };
77
+ }
78
+
79
+ /**
80
+ * Decide whether an error is a candidate for auto-resume.
81
+ *
82
+ * Gates:
83
+ * - error message matches the 300s no-activity timeout pattern
84
+ * (NOT the wall-clock ceiling — that's usually a runaway, not
85
+ * a wedge; resuming might just runaway again)
86
+ * - NOT user-aborted (the user explicitly /stop'd; never resume)
87
+ * - NOT a boot-replay (the user typed this minutes ago and moved
88
+ * on; resuming now is more confusing than helpful)
89
+ * - NOT during shutdown (boot replay will pick it up)
90
+ */
91
+ function isAutoResumable({ error, aborted, replay, shuttingDown }) {
92
+ if (aborted || replay || shuttingDown) return false;
93
+ const msg = String(error?.message || error || '');
94
+ return /idle with no Claude activity/i.test(msg);
95
+ }
96
+
97
+ module.exports = {
98
+ createAutoResumeTracker,
99
+ isAutoResumable,
100
+ DEFAULT_COOLDOWN_MS,
101
+ };
@@ -28,36 +28,35 @@
28
28
  const TELEGRAM_TABLE_WIDTH_BUDGET = 40;
29
29
 
30
30
  const POLYGRAM_DISPLAY_HINT = [
31
- '## Telegram display constraints',
31
+ '## Telegram display rules',
32
32
  '',
33
- 'Your replies are sent to Telegram. The user reads them on phone or desktop.',
33
+ 'Your replies render in the Telegram client. Phone is the design target.',
34
34
  '',
35
- '**Tables:** Telegram renders markdown tables as monospace `<pre>` blocks.',
36
- `On mobile portrait, lines wrap after ~${TELEGRAM_TABLE_WIDTH_BUDGET} chars and look broken.`,
35
+ '### Tables HARD RULE',
37
36
  '',
38
- '- Use a markdown table when **every** rendered row (including separators',
39
- ` and padding) fits in ${TELEGRAM_TABLE_WIDTH_BUDGET} chars or fewer.`,
40
- '- If any row would exceed that budget, **drop the table** and switch to',
41
- ' vertical "row blocks": one entity per paragraph, **bold** headline,',
42
- ' then `Field: value` per data point. Example:',
37
+ `Before emitting any markdown table, count the longest row in characters (including pipes \`|\`, padding, and separator dashes). If that row is longer than ${TELEGRAM_TABLE_WIDTH_BUDGET}, you MUST NOT emit a table. Use row blocks instead.`,
43
38
  '',
44
- ' ```',
45
- ' **Mini dress Keen → Black dress mini**',
46
- ' COGS: ฿546 → ฿1144 (2.1×)',
47
- ' Margin: 84.8% → 77% ↓',
39
+ 'This applies even when the user is on desktop. Tables don\'t scroll horizontally on mobile; they wrap and become unreadable. Row blocks always work on every surface.',
48
40
  '',
49
- ' **Tank top Sway Top voluminous cotton**',
50
- ' COGS: ฿360 → ฿947 (2.6×)',
51
- ' Margin: 78.7% → 73% ↓',
52
- ' ```',
41
+ '**Row block format:** one entity per paragraph, **bold** headline, then `Field: value` lines.',
53
42
  '',
54
- '- Decide row-by-row before emitting; do not start a wide table assuming',
55
- ' the user can scroll.',
43
+ '```',
44
+ '**Mini dress Keen → Black dress mini**',
45
+ 'COGS: ฿546 → ฿1144 (2.1×)',
46
+ 'Margin: 84.8% → 77% ↓',
47
+ '',
48
+ '**Tank top Sway → Top voluminous cotton**',
49
+ 'COGS: ฿360 → ฿947 (2.6×)',
50
+ 'Margin: 78.7% → 73% ↓',
51
+ '```',
52
+ '',
53
+ 'Do NOT start a wide table assuming the user can scroll. Decide BEFORE you start writing the first `|` whether all rows will fit. If unsure, use row blocks — they\'re always safe.',
54
+ '',
55
+ '### Other Telegram quirks',
56
56
  '',
57
- 'Other Telegram quirks:',
58
57
  '- Headers `#`, `##`, `###` render as plain text — use **bold** for emphasis.',
59
58
  '- Horizontal rules render as a thin divider line.',
60
- '- Long replies stream in chunks, so prefer concise structure over walls of text.',
59
+ '- Long replies stream in chunks; prefer concise structure over walls of text.',
61
60
  ].join('\n');
62
61
 
63
62
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "polygram",
3
- "version": "0.8.0-rc.52",
3
+ "version": "0.8.0-rc.54",
4
4
  "description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
5
5
  "main": "lib/ipc-client.js",
6
6
  "bin": {
package/polygram.js CHANGED
@@ -65,6 +65,7 @@ const { redactBotToken } = require('./lib/net-errors');
65
65
  const { createReactionManager, classifyToolName } = require('./lib/status-reactions');
66
66
  const { createMediaGroupBuffer } = require('./lib/media-group-buffer');
67
67
  const { classify: classifyError, isTransientHttpError } = require('./lib/error-classify');
68
+ const { createAutoResumeTracker, isAutoResumable } = require('./lib/auto-resume');
68
69
  const {
69
70
  createStore: createApprovalsStore,
70
71
  matchesAnyPattern: matchesApprovalPattern,
@@ -1118,6 +1119,94 @@ const abortGrace = createAbortGrace();
1118
1119
  function markSessionAborted(sessionKey) { abortGrace.mark(sessionKey); }
1119
1120
  function isSessionRecentlyAborted(sessionKey) { return abortGrace.isRecent(sessionKey); }
1120
1121
 
1122
+ // rc.54: per-session cooldown for auto-resume on 300s no-activity
1123
+ // timeout. Without the cooldown, a permanently-wedged tool would
1124
+ // trigger an infinite resume → timeout → resume loop.
1125
+ const autoResumeTracker = createAutoResumeTracker();
1126
+
1127
+ // rc.54: spawn a fresh Query resuming the same session_id and ask
1128
+ // Claude to continue the timed-out work. The killed pm Query has
1129
+ // already torn down the wedged subprocess (via pm.kill on timeout);
1130
+ // getOrSpawnForChat creates a new entry that picks up the saved
1131
+ // session_id from `sessions` table and sets `--resume <id>` on the
1132
+ // SDK Options. The continuation message tells Claude what happened
1133
+ // and that it has full prior context to keep going.
1134
+ //
1135
+ // Returns the result.text on success (already-sent to chat); throws
1136
+ // on any failure (caller writes auto-resume-failed event + falls
1137
+ // back to the standard timeout reply).
1138
+ async function attemptAutoResume(sessionKey, chatId, originalMsg, bot) {
1139
+ const threadId = originalMsg.message_thread_id || null;
1140
+ // 1. Tell the user we're auto-resuming so they don't think nothing
1141
+ // happened. Threaded under the original user message.
1142
+ await tg(bot, 'sendMessage', {
1143
+ chat_id: chatId,
1144
+ text: '🔁 Auto-resuming after timeout — continuing where the previous turn left off.',
1145
+ reply_parameters: { message_id: originalMsg.message_id },
1146
+ ...(threadId && { message_thread_id: threadId }),
1147
+ }, { source: 'auto-resume-indicator', botName: BOT_NAME }).catch((sendErr) => {
1148
+ // Indicator is informational; don't fail the whole resume on it.
1149
+ console.error(`[${sessionKey}] auto-resume indicator send failed: ${sendErr.message}`);
1150
+ });
1151
+
1152
+ // 2. Continuation prompt. Plain text — no XML wrapper. The SDK
1153
+ // Query resumes the saved session_id, so Claude has full prior
1154
+ // transcript context including its own partially-streamed text
1155
+ // and tool calls. We just need to tell it WHAT happened and
1156
+ // that it should pick up where it left off.
1157
+ const continuation = '[polygram] Your previous turn timed out at 300s with no Claude activity (likely a wedged tool call — long Bash, hanging MCP, or stuck subagent). Continue from where you left off; do not restart from scratch. If the same operation would just hang again, abort it and tell me.';
1158
+
1159
+ // 3. No-op streamer + reactor. We don't need to stream the resume
1160
+ // turn's response (we'll send it as one message at the end). pm
1161
+ // invokes streamer/reactor methods only when present; passing
1162
+ // minimal stubs keeps pm happy.
1163
+ const noopStreamer = {
1164
+ onChunk: async () => {},
1165
+ forceNewMessage: () => {},
1166
+ finalize: async () => ({ streamed: false }),
1167
+ flushDraft: async () => {},
1168
+ discard: async () => {},
1169
+ };
1170
+ const noopReactor = {
1171
+ setState: () => {},
1172
+ heartbeat: () => {},
1173
+ clear: async () => {},
1174
+ stop: () => {},
1175
+ };
1176
+
1177
+ const result = await sendToProcess(sessionKey, continuation, {
1178
+ streamer: noopStreamer,
1179
+ reactor: noopReactor,
1180
+ sourceMsgId: originalMsg.message_id,
1181
+ threadId,
1182
+ onFirstStream: () => {},
1183
+ });
1184
+
1185
+ if (result?.error) {
1186
+ throw new Error(`auto-resume turn errored: ${String(result.error).slice(0, 200)}`);
1187
+ }
1188
+ if (!result?.text) {
1189
+ throw new Error('auto-resume turn produced no text');
1190
+ }
1191
+
1192
+ // 4. Send the continuation reply as regular Telegram message(s),
1193
+ // threaded under the original user message. Reuse the existing
1194
+ // chunked-delivery + markdown-formatting primitives.
1195
+ const chunks = chunkMarkdownText(result.text, TG_MAX_LEN);
1196
+ await deliverReplies({
1197
+ bot,
1198
+ send: (b, method, params, m) => tg(b, method, params, m),
1199
+ chatId,
1200
+ threadId,
1201
+ chunks,
1202
+ replyToMessageId: originalMsg.message_id,
1203
+ meta: { source: 'auto-resume-reply', botName: BOT_NAME },
1204
+ logger: { error: (m) => console.error(`[${sessionKey}] auto-resume deliver: ${m}`) },
1205
+ });
1206
+
1207
+ return result.text;
1208
+ }
1209
+
1121
1210
  // Called by bot.on('message') for every regular (non-admin, non-pair)
1122
1211
  // message. Runs handleMessage in a fire-and-forget manner with centralised
1123
1212
  // error handling. Replaces the old processQueue loop.
@@ -1170,6 +1259,49 @@ function dispatchHandleMessage(sessionKey, chatId, msg, bot) {
1170
1259
  // re-dispatch it on next start)
1171
1260
  // - user just /stop'd (already saw their abort acknowledgement)
1172
1261
  if (!wasAborted && !isReplay && !isShuttingDown) {
1262
+ // rc.54: auto-resume on 300s no-activity timeout. Spawn a fresh
1263
+ // Query resuming the same session_id and inject a continuation
1264
+ // nudge. This recovers from wedged tool calls (long Bash, hung
1265
+ // MCP, stuck subagent) that polygram's watchdog catches but
1266
+ // currently leaves the user stranded with "try resending".
1267
+ // Skipped when the failed turn was ITSELF an auto-resume
1268
+ // (msg._isAutoResume) to prevent recursion; per-session
1269
+ // cooldown blocks tight loops on permanent wedges.
1270
+ const isResumeTurn = msg._isAutoResume === true;
1271
+ const resumable = !isResumeTurn && isAutoResumable({
1272
+ error: err, aborted: wasAborted, replay: isReplay, shuttingDown: isShuttingDown,
1273
+ });
1274
+ if (resumable && !autoResumeTracker.isInCooldown(sessionKey)) {
1275
+ autoResumeTracker.markAttempt(sessionKey);
1276
+ logEvent('auto-resume-attempted', {
1277
+ chat_id: chatId, session_key: sessionKey, msg_id: msg.message_id,
1278
+ original_error: err.message?.slice(0, 200),
1279
+ });
1280
+ attemptAutoResume(sessionKey, chatId, msg, bot)
1281
+ .then(() => {
1282
+ logEvent('auto-resume-success', {
1283
+ chat_id: chatId, session_key: sessionKey, msg_id: msg.message_id,
1284
+ });
1285
+ autoResumeTracker.clear(sessionKey);
1286
+ })
1287
+ .catch((resumeErr) => {
1288
+ console.error(`[${sessionKey}] auto-resume failed: ${resumeErr?.message}`);
1289
+ logEvent('auto-resume-failed', {
1290
+ chat_id: chatId, session_key: sessionKey, msg_id: msg.message_id,
1291
+ error: resumeErr?.message?.slice(0, 200),
1292
+ });
1293
+ // Fall back to the original error reply so the user isn't
1294
+ // left with just the 🔁 indicator and no answer.
1295
+ const fallbackText = errorReplyText(err);
1296
+ if (fallbackText) {
1297
+ tg(bot, 'sendMessage', {
1298
+ chat_id: chatId, text: fallbackText,
1299
+ reply_parameters: { message_id: msg.message_id },
1300
+ }, { source: 'error-reply', botName: BOT_NAME }).catch(() => {});
1301
+ }
1302
+ });
1303
+ return;
1304
+ }
1173
1305
  // 0.7.7: errorReplyText may return null when the classifier
1174
1306
  // says "suppress reply" (e.g. INTERRUPTED inside abort grace —
1175
1307
  // user already saw their /stop ack). Skip the send call in