polygram 0.4.11 → 0.4.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/lib/db.js +59 -1
- package/lib/process-manager.js +46 -1
- package/migrations/006-inbound-handler-status.sql +27 -0
- package/package.json +1 -1
- package/polygram.js +169 -26
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"$schema": "https://anthropic.com/claude-code/plugin.schema.json",
|
|
3
3
|
"name": "polygram",
|
|
4
|
-
"version": "0.4.
|
|
4
|
+
"version": "0.4.13",
|
|
5
5
|
"description": "Telegram integration for Claude Code that preserves the OpenClaw per-chat session model. Migration target for OpenClaw users. Multi-bot, multi-chat, per-topic isolation; SQLite transcripts; inline-keyboard approvals. Bundles /polygram:status|logs|pair-code|approvals admin commands and a history skill.",
|
|
6
6
|
"keywords": [
|
|
7
7
|
"telegram",
|
package/lib/db.js
CHANGED
|
@@ -8,7 +8,7 @@ const fs = require('fs');
|
|
|
8
8
|
const path = require('path');
|
|
9
9
|
const Database = require('better-sqlite3');
|
|
10
10
|
|
|
11
|
-
const SCHEMA_VERSION =
|
|
11
|
+
const SCHEMA_VERSION = 6;
|
|
12
12
|
|
|
13
13
|
function open(dbPath) {
|
|
14
14
|
const db = new Database(dbPath);
|
|
@@ -300,6 +300,64 @@ function wrap(db) {
|
|
|
300
300
|
ON CONFLICT(bot_name) DO UPDATE SET last_update_id = excluded.last_update_id, ts = excluded.ts
|
|
301
301
|
`).run(botName, lastUpdateId, Date.now());
|
|
302
302
|
},
|
|
303
|
+
|
|
304
|
+
// Inbound handler lifecycle — see migrations/006-inbound-handler-status.sql.
|
|
305
|
+
// Called by handleMessage as the turn progresses. Used by boot replay to
|
|
306
|
+
// find work that was interrupted by a crash or restart.
|
|
307
|
+
setInboundHandlerStatus({ chat_id, msg_id, status }) {
|
|
308
|
+
return db.prepare(`
|
|
309
|
+
UPDATE messages SET handler_status = ?
|
|
310
|
+
WHERE chat_id = ? AND msg_id = ? AND direction = 'in'
|
|
311
|
+
`).run(status, chat_id, msg_id);
|
|
312
|
+
},
|
|
313
|
+
|
|
314
|
+
// Find inbound messages that were being processed when polygram stopped.
|
|
315
|
+
// Scoped by bot_name via the chat_id → config mapping, so each bot only
|
|
316
|
+
// replays its own turns on boot. Scoped by olderThanMs (default 30 min)
|
|
317
|
+
// so we never resurrect ancient messages after a long outage.
|
|
318
|
+
getReplayCandidates({ chatIds, olderThanMs = 30 * 60 * 1000, limit = 100 } = {}) {
|
|
319
|
+
if (!Array.isArray(chatIds) || chatIds.length === 0) return [];
|
|
320
|
+
const cutoff = Date.now() - olderThanMs;
|
|
321
|
+
const placeholders = chatIds.map(() => '?').join(',');
|
|
322
|
+
return db.prepare(`
|
|
323
|
+
SELECT id, chat_id, thread_id, msg_id, user, user_id, text, reply_to_id,
|
|
324
|
+
attachments_json, ts, handler_status
|
|
325
|
+
FROM messages
|
|
326
|
+
WHERE direction = 'in'
|
|
327
|
+
AND handler_status IN ('dispatched', 'processing', 'replay-pending')
|
|
328
|
+
AND chat_id IN (${placeholders})
|
|
329
|
+
AND ts > ?
|
|
330
|
+
ORDER BY ts ASC
|
|
331
|
+
LIMIT ?
|
|
332
|
+
`).all(...chatIds, cutoff, limit);
|
|
333
|
+
},
|
|
334
|
+
|
|
335
|
+
// Dedupe check: did we already send an outbound reply to this inbound?
|
|
336
|
+
// Prevents double-processing if a redelivered/replayed message has
|
|
337
|
+
// already been answered.
|
|
338
|
+
hasOutboundReplyTo({ chat_id, msg_id }) {
|
|
339
|
+
const row = db.prepare(`
|
|
340
|
+
SELECT 1 FROM messages
|
|
341
|
+
WHERE chat_id = ? AND direction = 'out' AND reply_to_id = ? AND status = 'sent'
|
|
342
|
+
LIMIT 1
|
|
343
|
+
`).get(chat_id, msg_id);
|
|
344
|
+
return !!row;
|
|
345
|
+
},
|
|
346
|
+
|
|
347
|
+
// On shutdown, mark any inbound rows still in-flight so the boot replay
|
|
348
|
+
// knows to pick them up. `sessionKey`s narrow the update to the sessions
|
|
349
|
+
// we're draining (useful if we ever do partial shutdown; otherwise leave
|
|
350
|
+
// null to mark all dispatched/processing rows for a bot).
|
|
351
|
+
markReplayPending({ botName, since }) {
|
|
352
|
+
const cutoff = since ?? Date.now() - 30 * 60 * 1000;
|
|
353
|
+
return db.prepare(`
|
|
354
|
+
UPDATE messages SET handler_status = 'replay-pending'
|
|
355
|
+
WHERE direction = 'in'
|
|
356
|
+
AND handler_status IN ('dispatched', 'processing')
|
|
357
|
+
AND bot_name = ?
|
|
358
|
+
AND ts > ?
|
|
359
|
+
`).run(botName, cutoff);
|
|
360
|
+
},
|
|
303
361
|
};
|
|
304
362
|
}
|
|
305
363
|
|
package/lib/process-manager.js
CHANGED
|
@@ -100,11 +100,52 @@ class ProcessManager {
|
|
|
100
100
|
}
|
|
101
101
|
if (this.procs.size >= this.cap) {
|
|
102
102
|
const evicted = await this.evictLRU();
|
|
103
|
-
if (!evicted)
|
|
103
|
+
if (!evicted) {
|
|
104
|
+
// All sessions are in-flight — wait for one to drain, then retry.
|
|
105
|
+
// Waiters are held in `this._lruWaiters` FIFO and signalled when any
|
|
106
|
+
// pending queue empties (see _maybeSignalLruWaiter).
|
|
107
|
+
await this._awaitLruSlot();
|
|
108
|
+
// After waking, try the whole path again — the evictLRU may now
|
|
109
|
+
// succeed, or an existing session may have been spawned for this key.
|
|
110
|
+
return this.getOrSpawn(sessionKey, spawnContext);
|
|
111
|
+
}
|
|
104
112
|
}
|
|
105
113
|
return this._spawn(sessionKey, spawnContext);
|
|
106
114
|
}
|
|
107
115
|
|
|
116
|
+
// Hold a promise pair per waiter. _maybeSignalLruWaiter shifts the oldest
|
|
117
|
+
// waiter when a slot might have freed up. Each waiter has its own timer
|
|
118
|
+
// that rejects with 'LRU wait timeout' if no slot appears in time.
|
|
119
|
+
_awaitLruSlot({ timeoutMs = 5 * 60_000 } = {}) {
|
|
120
|
+
if (!this._lruWaiters) this._lruWaiters = [];
|
|
121
|
+
return new Promise((resolve, reject) => {
|
|
122
|
+
const waiter = { resolve, reject };
|
|
123
|
+
const timer = setTimeout(() => {
|
|
124
|
+
const idx = this._lruWaiters.indexOf(waiter);
|
|
125
|
+
if (idx !== -1) this._lruWaiters.splice(idx, 1);
|
|
126
|
+
this._logEvent('lru-wait-timeout', { cap: this.cap, queued_waiters: this._lruWaiters.length });
|
|
127
|
+
reject(new Error(`LRU wait timeout after ${timeoutMs / 1000}s`));
|
|
128
|
+
}, timeoutMs);
|
|
129
|
+
waiter.timer = timer;
|
|
130
|
+
this._lruWaiters.push(waiter);
|
|
131
|
+
this._logEvent('lru-wait', { cap: this.cap, queued_waiters: this._lruWaiters.length });
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
_maybeSignalLruWaiter() {
|
|
136
|
+
if (!this._lruWaiters || this._lruWaiters.length === 0) return;
|
|
137
|
+
// Only signal if there's actually capacity now (a session went idle
|
|
138
|
+
// or closed). Otherwise keep waiters sleeping for the next chance.
|
|
139
|
+
let hasIdle = false;
|
|
140
|
+
for (const v of this.procs.values()) {
|
|
141
|
+
if (!v.inFlight) { hasIdle = true; break; }
|
|
142
|
+
}
|
|
143
|
+
if (!hasIdle && this.procs.size >= this.cap) return;
|
|
144
|
+
const w = this._lruWaiters.shift();
|
|
145
|
+
clearTimeout(w.timer);
|
|
146
|
+
w.resolve();
|
|
147
|
+
}
|
|
148
|
+
|
|
108
149
|
async evictLRU() {
|
|
109
150
|
let victim = null;
|
|
110
151
|
for (const [k, v] of this.procs) {
|
|
@@ -267,6 +308,8 @@ class ProcessManager {
|
|
|
267
308
|
entry.pendingQueue[0].activate();
|
|
268
309
|
} else {
|
|
269
310
|
entry.inFlight = false;
|
|
311
|
+
// An entry just went idle → an LRU waiter might be able to run now.
|
|
312
|
+
this._maybeSignalLruWaiter();
|
|
270
313
|
// Graceful drain-and-respawn: if caller asked for a respawn
|
|
271
314
|
// (e.g. /model change) and we just emptied the queue, kill now
|
|
272
315
|
// and fire onRespawn so the caller can post confirmation.
|
|
@@ -293,6 +336,8 @@ class ProcessManager {
|
|
|
293
336
|
p.reject(new Error(`Process exited (code ${code})`));
|
|
294
337
|
}
|
|
295
338
|
this.procs.delete(sessionKey);
|
|
339
|
+
// A slot freed up → maybe an LRU waiter can run now.
|
|
340
|
+
this._maybeSignalLruWaiter();
|
|
296
341
|
if (code !== 0 && ctx.existingSessionId && this.db?.clearSessionId) {
|
|
297
342
|
this._logEvent('resume-fail', { session_key: sessionKey, session_id: ctx.existingSessionId, code });
|
|
298
343
|
try { this.db.clearSessionId(sessionKey); } catch (err) {
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
-- Track the lifecycle state of inbound message processing so a polygram
|
|
2
|
+
-- restart (SIGTERM, crash) can replay any turns that were in progress.
|
|
3
|
+
--
|
|
4
|
+
-- The existing `status` column on messages already tracks OUTBOUND state
|
|
5
|
+
-- ('pending' / 'sent' / 'failed'). Rather than overload it with inbound
|
|
6
|
+
-- semantics, add a dedicated column.
|
|
7
|
+
--
|
|
8
|
+
-- States for inbound:
|
|
9
|
+
-- received — row inserted by recordInbound, nothing else has happened
|
|
10
|
+
-- dispatched — handleMessage started (attachment download, voice, format)
|
|
11
|
+
-- processing — pm.send has written the prompt to claude's stdin
|
|
12
|
+
-- replied — outbound reply was sent successfully
|
|
13
|
+
-- replay-pending — marked by graceful shutdown to be replayed on next boot
|
|
14
|
+
--
|
|
15
|
+
-- NULL is valid (for historical rows inserted before this migration).
|
|
16
|
+
--
|
|
17
|
+
-- The boot replay loop scans for rows where:
|
|
18
|
+
-- direction = 'in'
|
|
19
|
+
-- AND handler_status IN ('dispatched', 'processing', 'replay-pending')
|
|
20
|
+
-- AND ts > now() - REPLAY_WINDOW_MS (default 30 min — anything older is stale)
|
|
21
|
+
-- and re-dispatches them.
|
|
22
|
+
|
|
23
|
+
ALTER TABLE messages ADD COLUMN handler_status TEXT;
|
|
24
|
+
|
|
25
|
+
CREATE INDEX IF NOT EXISTS idx_messages_handler_status
|
|
26
|
+
ON messages(handler_status, ts)
|
|
27
|
+
WHERE handler_status IS NOT NULL;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "polygram",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.13",
|
|
4
4
|
"description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
|
|
5
5
|
"main": "lib/ipc-client.js",
|
|
6
6
|
"bin": {
|
package/polygram.js
CHANGED
|
@@ -562,15 +562,29 @@ async function sendToProcess(sessionKey, prompt, context = {}) {
|
|
|
562
562
|
const CONCURRENT_WARN_THRESHOLD = 20;
|
|
563
563
|
const inFlightHandlers = new Map(); // sessionKey → count
|
|
564
564
|
|
|
565
|
-
// Sessions the operator just /stop'd (or natural-language "стоп").
|
|
566
|
-
//
|
|
567
|
-
//
|
|
568
|
-
//
|
|
569
|
-
//
|
|
570
|
-
|
|
565
|
+
// Sessions the operator just /stop'd (or natural-language "стоп"). Keyed
|
|
566
|
+
// by sessionKey → timestamp of abort. ANY pending that rejects within
|
|
567
|
+
// ABORT_GRACE_MS of the mark is considered abort-caused — its generic
|
|
568
|
+
// error reply is suppressed and the streamer warning is skipped.
|
|
569
|
+
//
|
|
570
|
+
// Timestamp model (vs the earlier "delete after first read" Set) fixes
|
|
571
|
+
// the case where multiple pendings were in-flight at abort time: all of
|
|
572
|
+
// them reject with "Process killed", all of them should be silent, not
|
|
573
|
+
// just the first one.
|
|
574
|
+
const ABORT_GRACE_MS = 15_000;
|
|
575
|
+
const abortedSessions = new Map();
|
|
571
576
|
|
|
572
577
|
function markSessionAborted(sessionKey) {
|
|
573
|
-
abortedSessions.
|
|
578
|
+
abortedSessions.set(sessionKey, Date.now());
|
|
579
|
+
// Sweep old entries opportunistically.
|
|
580
|
+
for (const [k, ts] of abortedSessions) {
|
|
581
|
+
if (Date.now() - ts > ABORT_GRACE_MS * 2) abortedSessions.delete(k);
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
function isSessionRecentlyAborted(sessionKey) {
|
|
586
|
+
const ts = abortedSessions.get(sessionKey);
|
|
587
|
+
return ts != null && (Date.now() - ts) < ABORT_GRACE_MS;
|
|
574
588
|
}
|
|
575
589
|
|
|
576
590
|
// Called by bot.on('message') for every regular (non-admin, non-pair)
|
|
@@ -586,9 +600,15 @@ function dispatchHandleMessage(sessionKey, chatId, msg, bot) {
|
|
|
586
600
|
}), 'log queue-depth-warning');
|
|
587
601
|
}
|
|
588
602
|
handleMessage(sessionKey, chatId, msg, bot).catch((err) => {
|
|
589
|
-
const wasAborted =
|
|
590
|
-
if (wasAborted) abortedSessions.delete(sessionKey);
|
|
603
|
+
const wasAborted = isSessionRecentlyAborted(sessionKey);
|
|
591
604
|
console.error(`[${sessionKey}] Error:`, err.message);
|
|
605
|
+
// Mark the row as 'failed' so boot replay doesn't re-dispatch it.
|
|
606
|
+
// Exception: aborted sessions → 'aborted' (same — not replayable).
|
|
607
|
+
// Shutdown case handled separately in the SIGTERM handler.
|
|
608
|
+
dbWrite(() => db.setInboundHandlerStatus({
|
|
609
|
+
chat_id: chatId, msg_id: msg.message_id,
|
|
610
|
+
status: wasAborted ? 'aborted' : 'failed',
|
|
611
|
+
}), 'set handler_status=failed/aborted');
|
|
592
612
|
dbWrite(() => db.logEvent('handler-error', {
|
|
593
613
|
chat_id: chatId, session_key: sessionKey,
|
|
594
614
|
msg_id: msg?.message_id,
|
|
@@ -964,6 +984,12 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
|
|
|
964
984
|
const chatConfig = config.chats[chatId];
|
|
965
985
|
if (!chatConfig) return;
|
|
966
986
|
|
|
987
|
+
// Mark the inbound row as 'dispatched' so the boot replay loop knows
|
|
988
|
+
// this turn started. Cleared to 'replied' (or 'failed') when done.
|
|
989
|
+
dbWrite(() => db.setInboundHandlerStatus({
|
|
990
|
+
chat_id: chatId, msg_id: msg.message_id, status: 'dispatched',
|
|
991
|
+
}), 'set handler_status=dispatched');
|
|
992
|
+
|
|
967
993
|
const text = msg.text || msg.caption || '';
|
|
968
994
|
const threadId = msg.message_thread_id;
|
|
969
995
|
const threadIdStr = threadId?.toString() || null;
|
|
@@ -1269,7 +1295,11 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
|
|
|
1269
1295
|
reactor.setState('ERROR');
|
|
1270
1296
|
if (!result.text) return;
|
|
1271
1297
|
} else {
|
|
1272
|
-
|
|
1298
|
+
// Clear the progress reaction instead of stamping 👍 — the reply
|
|
1299
|
+
// bubble itself is the "done" signal and a permanent thumbs-up on
|
|
1300
|
+
// every answered message is chat noise (plus triggers reaction
|
|
1301
|
+
// notifications for other group members).
|
|
1302
|
+
reactor.clear().catch(() => {});
|
|
1273
1303
|
}
|
|
1274
1304
|
|
|
1275
1305
|
if (!result.text || result.text === 'NO_REPLY') return;
|
|
@@ -1334,25 +1364,32 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
|
|
|
1334
1364
|
}
|
|
1335
1365
|
|
|
1336
1366
|
console.log(`[${label}] ${elapsed}s | ${result.text.length} chars | ${chatConfig.model}/${chatConfig.effort} | $${result.cost?.toFixed(4) || '?'}`);
|
|
1367
|
+
// Success: mark the inbound row 'replied' so boot replay doesn't
|
|
1368
|
+
// pick it up again on restart.
|
|
1369
|
+
dbWrite(() => db.setInboundHandlerStatus({
|
|
1370
|
+
chat_id: chatId, msg_id: msg.message_id, status: 'replied',
|
|
1371
|
+
}), 'set handler_status=replied');
|
|
1337
1372
|
} catch (err) {
|
|
1338
|
-
//
|
|
1339
|
-
|
|
1340
|
-
//
|
|
1341
|
-
//
|
|
1342
|
-
|
|
1343
|
-
|
|
1373
|
+
// If the user just aborted this session, silently finalise the stream
|
|
1374
|
+
// without the scary "⚠ stream interrupted" banner. The user has already
|
|
1375
|
+
// seen their "Остановлено." ack; adding a warning to the partial bubble
|
|
1376
|
+
// just reads as "something crashed".
|
|
1377
|
+
const abortedByUser = isSessionRecentlyAborted(sessionKey);
|
|
1378
|
+
if (abortedByUser) {
|
|
1379
|
+
await streamer.finalize('').catch(() => {});
|
|
1380
|
+
// Leave reaction as-is — no 🤯 / 😨; user asked for stop.
|
|
1344
1381
|
} else {
|
|
1345
|
-
|
|
1382
|
+
await streamer.finalize('', { errorSuffix: 'stream interrupted' }).catch(() => {});
|
|
1383
|
+
if (/wall-clock ceiling|idle with no Claude activity/i.test(err?.message || '')) {
|
|
1384
|
+
reactor.setState('TIMEOUT');
|
|
1385
|
+
} else {
|
|
1386
|
+
reactor.setState('ERROR');
|
|
1387
|
+
}
|
|
1346
1388
|
}
|
|
1347
1389
|
throw err;
|
|
1348
1390
|
} finally {
|
|
1349
1391
|
stopTyping();
|
|
1350
|
-
// streamer is per-turn and not stored in any session Map in 0.4.8
|
|
1351
|
-
// Give the reactor a beat to flush the terminal state (DONE/ERROR/TIMEOUT
|
|
1352
|
-
// bypass throttle so this is instant in practice; the stop() below
|
|
1353
|
-
// guards against any late transition leaking after the turn ends).
|
|
1354
1392
|
reactor.stop();
|
|
1355
|
-
// reactor is per-turn and not stored in any session Map in 0.4.8
|
|
1356
1393
|
}
|
|
1357
1394
|
}
|
|
1358
1395
|
|
|
@@ -1935,22 +1972,69 @@ async function main() {
|
|
|
1935
1972
|
|
|
1936
1973
|
bot = createBot(config.bot.token);
|
|
1937
1974
|
|
|
1938
|
-
|
|
1975
|
+
// Graceful shutdown: stop accepting new inbound, drain in-flight pendings
|
|
1976
|
+
// up to SHUTDOWN_DRAIN_MS, then mark anything still unfinished so boot
|
|
1977
|
+
// replay picks it up. Prevents "Sorry, I couldn't process that message"
|
|
1978
|
+
// from showing on every restart.
|
|
1979
|
+
const SHUTDOWN_DRAIN_MS = 30_000;
|
|
1980
|
+
let shuttingDown = false;
|
|
1981
|
+
const shutdown = async () => {
|
|
1982
|
+
if (shuttingDown) return;
|
|
1983
|
+
shuttingDown = true;
|
|
1939
1984
|
console.log('\nShutting down...');
|
|
1985
|
+
// 1. Stop accepting new inbound first so nothing new queues behind the drain.
|
|
1940
1986
|
if (bot && bot._stop) bot._stop();
|
|
1987
|
+
|
|
1988
|
+
// 2. Drain in-flight handlers. Wait for inFlightHandlers to empty or
|
|
1989
|
+
// SHUTDOWN_DRAIN_MS to elapse. pm handlers resolve naturally when
|
|
1990
|
+
// result events arrive; the dispatcher's .finally decrements.
|
|
1991
|
+
const drainStart = Date.now();
|
|
1992
|
+
while (inFlightHandlers.size > 0) {
|
|
1993
|
+
if (Date.now() - drainStart >= SHUTDOWN_DRAIN_MS) break;
|
|
1994
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
1995
|
+
}
|
|
1996
|
+
const drainElapsed = Date.now() - drainStart;
|
|
1997
|
+
let remaining = 0;
|
|
1998
|
+
for (const n of inFlightHandlers.values()) remaining += n;
|
|
1999
|
+
|
|
2000
|
+
// 3. Anything still in-flight → mark in DB as replay-pending so the
|
|
2001
|
+
// next polygram boot re-dispatches it. User never sees an error.
|
|
2002
|
+
if (remaining > 0 && db) {
|
|
2003
|
+
try {
|
|
2004
|
+
const res = db.markReplayPending({ botName: BOT_NAME });
|
|
2005
|
+
dbWrite(() => db.logEvent('shutdown-drain', {
|
|
2006
|
+
bot: BOT_NAME,
|
|
2007
|
+
in_flight: remaining,
|
|
2008
|
+
replay_marked: res?.changes ?? 0,
|
|
2009
|
+
elapsed_ms: drainElapsed,
|
|
2010
|
+
}), 'log shutdown-drain');
|
|
2011
|
+
console.log(`[shutdown] drained ${drainElapsed}ms, ${remaining} still in-flight, ${res?.changes ?? 0} rows marked replay-pending`);
|
|
2012
|
+
} catch (err) {
|
|
2013
|
+
console.error(`[shutdown] markReplayPending failed: ${err.message}`);
|
|
2014
|
+
}
|
|
2015
|
+
} else if (db) {
|
|
2016
|
+
dbWrite(() => db.logEvent('shutdown-drain', {
|
|
2017
|
+
bot: BOT_NAME,
|
|
2018
|
+
in_flight: 0,
|
|
2019
|
+
elapsed_ms: drainElapsed,
|
|
2020
|
+
}), 'log shutdown-drain');
|
|
2021
|
+
console.log(`[shutdown] clean drain in ${drainElapsed}ms`);
|
|
2022
|
+
}
|
|
2023
|
+
|
|
2024
|
+
// 4. Remaining shutdown: approvals sweeper, IPC, resolve hook waiters,
|
|
2025
|
+
// kill pm subprocesses, close DB.
|
|
1941
2026
|
if (approvalSweepTimer) clearInterval(approvalSweepTimer);
|
|
1942
2027
|
if (ipcCloser) ipcCloser.close().catch(() => {});
|
|
1943
2028
|
try { fs.unlinkSync(ipcServer.secretPathFor(BOT_NAME)); } catch {}
|
|
1944
|
-
// Resolve any blocked hook waiters so Claude processes don't hang.
|
|
1945
2029
|
for (const list of approvalWaiters.values()) {
|
|
1946
2030
|
for (const fn of list) { try { fn('cancelled', 'polygram shutting down'); } catch {} }
|
|
1947
2031
|
}
|
|
1948
2032
|
approvalWaiters.clear();
|
|
1949
|
-
if (pm) pm.shutdown().catch(() => {});
|
|
2033
|
+
if (pm) await pm.shutdown().catch(() => {});
|
|
1950
2034
|
if (db) {
|
|
1951
2035
|
try { db.logEvent('polygram-stop'); db.raw.close(); } catch {}
|
|
1952
2036
|
}
|
|
1953
|
-
setTimeout(() => process.exit(0),
|
|
2037
|
+
setTimeout(() => process.exit(0), 100);
|
|
1954
2038
|
};
|
|
1955
2039
|
process.on('SIGINT', shutdown);
|
|
1956
2040
|
process.on('SIGTERM', shutdown);
|
|
@@ -1975,6 +2059,65 @@ async function main() {
|
|
|
1975
2059
|
}
|
|
1976
2060
|
approvalSweepTimer = startApprovalSweeper();
|
|
1977
2061
|
|
|
2062
|
+
// Boot replay: re-dispatch any inbound turns that were interrupted by
|
|
2063
|
+
// the previous polygram's shutdown or crash. These are rows marked
|
|
2064
|
+
// 'dispatched', 'processing', or 'replay-pending' (set by the SIGTERM
|
|
2065
|
+
// handler) — all within the last 30 min so we don't resurrect ancient
|
|
2066
|
+
// work. Dedupe against already-sent outbound replies in case the
|
|
2067
|
+
// previous instance DID answer before dying.
|
|
2068
|
+
try {
|
|
2069
|
+
const chatIds = Object.keys(config.chats);
|
|
2070
|
+
if (chatIds.length > 0) {
|
|
2071
|
+
const candidates = db.getReplayCandidates({ chatIds });
|
|
2072
|
+
let replayed = 0;
|
|
2073
|
+
let skipped = 0;
|
|
2074
|
+
for (const row of candidates) {
|
|
2075
|
+
if (db.hasOutboundReplyTo({ chat_id: row.chat_id, msg_id: row.msg_id })) {
|
|
2076
|
+
// Already replied — just mark so we don't look at it again.
|
|
2077
|
+
db.setInboundHandlerStatus({
|
|
2078
|
+
chat_id: row.chat_id, msg_id: row.msg_id, status: 'replied',
|
|
2079
|
+
});
|
|
2080
|
+
skipped += 1;
|
|
2081
|
+
continue;
|
|
2082
|
+
}
|
|
2083
|
+
// Reconstruct a minimal grammy-like Message object. Enough for
|
|
2084
|
+
// dispatchRegularMessage (mention detect, abort, admin cmds,
|
|
2085
|
+
// shouldHandle, enqueue). Attachments carry file_ids so the
|
|
2086
|
+
// normal download path re-fetches on replay.
|
|
2087
|
+
const reconstructed = {
|
|
2088
|
+
chat: { id: Number(row.chat_id), type: row.chat_id.startsWith('-') ? 'supergroup' : 'private' },
|
|
2089
|
+
message_id: row.msg_id,
|
|
2090
|
+
from: { id: row.user_id, first_name: row.user },
|
|
2091
|
+
text: row.text || '',
|
|
2092
|
+
date: Math.floor(row.ts / 1000),
|
|
2093
|
+
...(row.thread_id && { message_thread_id: Number(row.thread_id) }),
|
|
2094
|
+
...(row.reply_to_id && { reply_to_message: { message_id: row.reply_to_id } }),
|
|
2095
|
+
};
|
|
2096
|
+
// Attach already-extracted attachments via the media-group shortcut
|
|
2097
|
+
// field so extractAttachments picks them up without re-parsing
|
|
2098
|
+
// grammy fields that don't exist on this reconstructed object.
|
|
2099
|
+
if (row.attachments_json) {
|
|
2100
|
+
try {
|
|
2101
|
+
reconstructed._mergedAttachments = JSON.parse(row.attachments_json);
|
|
2102
|
+
} catch {}
|
|
2103
|
+
}
|
|
2104
|
+
const chatConfig = config.chats[row.chat_id];
|
|
2105
|
+
if (!chatConfig) { skipped += 1; continue; }
|
|
2106
|
+
const sessionKey = getSessionKey(row.chat_id, row.thread_id, chatConfig);
|
|
2107
|
+
dispatchHandleMessage(sessionKey, row.chat_id, reconstructed, bot);
|
|
2108
|
+
replayed += 1;
|
|
2109
|
+
}
|
|
2110
|
+
if (candidates.length > 0) {
|
|
2111
|
+
console.log(`[replay] ${replayed} turns re-dispatched, ${skipped} skipped (already replied or no chat config)`);
|
|
2112
|
+
dbWrite(() => db.logEvent('replay-on-boot', {
|
|
2113
|
+
bot: BOT_NAME, replayed, skipped, total: candidates.length,
|
|
2114
|
+
}), 'log replay-on-boot');
|
|
2115
|
+
}
|
|
2116
|
+
}
|
|
2117
|
+
} catch (err) {
|
|
2118
|
+
console.error(`[replay] boot replay failed: ${err.message}`);
|
|
2119
|
+
}
|
|
2120
|
+
|
|
1978
2121
|
console.log(`[${BOT_NAME}] Starting...`);
|
|
1979
2122
|
const pollPromise = pollBot(bot).catch(err => {
|
|
1980
2123
|
console.error(`[${BOT_NAME}] Fatal:`, err.message);
|