polygram 0.5.11 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/db.js +133 -9
- package/migrations/007-attachments-table.sql +89 -0
- package/migrations/008-drop-attachments-json.sql +12 -0
- package/package.json +1 -1
- package/polygram.js +125 -38
- package/scripts/split-db.js +23 -2
package/lib/db.js
CHANGED
|
@@ -8,7 +8,7 @@ const fs = require('fs');
|
|
|
8
8
|
const path = require('path');
|
|
9
9
|
const Database = require('better-sqlite3');
|
|
10
10
|
|
|
11
|
-
const SCHEMA_VERSION =
|
|
11
|
+
const SCHEMA_VERSION = 8;
|
|
12
12
|
|
|
13
13
|
function open(dbPath) {
|
|
14
14
|
const db = new Database(dbPath);
|
|
@@ -56,14 +56,16 @@ function runMigrations(db, migrationsDir) {
|
|
|
56
56
|
}
|
|
57
57
|
|
|
58
58
|
function wrap(db) {
|
|
59
|
+
// 0.6.1: attachments_json column dropped (migration 008). All attachment
|
|
60
|
+
// data lives in the per-attachment table now (see attachments stmts below).
|
|
59
61
|
const insertMessageStmt = db.prepare(`
|
|
60
62
|
INSERT INTO messages (
|
|
61
63
|
chat_id, thread_id, msg_id, user, user_id, text, reply_to_id,
|
|
62
|
-
direction, source, bot_name,
|
|
64
|
+
direction, source, bot_name, session_id,
|
|
63
65
|
model, effort, turn_id, status, error, cost_usd, ts
|
|
64
66
|
) VALUES (
|
|
65
67
|
@chat_id, @thread_id, @msg_id, @user, @user_id, @text, @reply_to_id,
|
|
66
|
-
@direction, @source, @bot_name, @
|
|
68
|
+
@direction, @source, @bot_name, @session_id,
|
|
67
69
|
@model, @effort, @turn_id, @status, @error, @cost_usd, @ts
|
|
68
70
|
)
|
|
69
71
|
ON CONFLICT(chat_id, msg_id) DO UPDATE SET
|
|
@@ -121,8 +123,7 @@ function wrap(db) {
|
|
|
121
123
|
|
|
122
124
|
const setMessageTextStmt = db.prepare(`
|
|
123
125
|
UPDATE messages
|
|
124
|
-
SET text = @text
|
|
125
|
-
attachments_json = COALESCE(@attachments_json, attachments_json)
|
|
126
|
+
SET text = @text
|
|
126
127
|
WHERE chat_id = @chat_id AND msg_id = @msg_id
|
|
127
128
|
`);
|
|
128
129
|
|
|
@@ -174,7 +175,6 @@ function wrap(db) {
|
|
|
174
175
|
direction: row.direction || 'in',
|
|
175
176
|
source: row.source || 'polygram',
|
|
176
177
|
bot_name: row.bot_name || null,
|
|
177
|
-
attachments_json: row.attachments_json || null,
|
|
178
178
|
session_id: row.session_id || null,
|
|
179
179
|
model: row.model || null,
|
|
180
180
|
effort: row.effort || null,
|
|
@@ -240,12 +240,11 @@ function wrap(db) {
|
|
|
240
240
|
return getMessageStmt.get(String(chatId), msgId);
|
|
241
241
|
},
|
|
242
242
|
|
|
243
|
-
setMessageText({ chat_id, msg_id, text
|
|
243
|
+
setMessageText({ chat_id, msg_id, text }) {
|
|
244
244
|
return setMessageTextStmt.run({
|
|
245
245
|
chat_id: String(chat_id),
|
|
246
246
|
msg_id,
|
|
247
247
|
text: text ?? '',
|
|
248
|
-
attachments_json,
|
|
249
248
|
});
|
|
250
249
|
},
|
|
251
250
|
|
|
@@ -324,7 +323,7 @@ function wrap(db) {
|
|
|
324
323
|
const placeholders = chatIds.map(() => '?').join(',');
|
|
325
324
|
return db.prepare(`
|
|
326
325
|
SELECT id, chat_id, thread_id, msg_id, user, user_id, text, reply_to_id,
|
|
327
|
-
|
|
326
|
+
ts, handler_status
|
|
328
327
|
FROM messages
|
|
329
328
|
WHERE direction = 'in'
|
|
330
329
|
AND handler_status IN ('dispatched', 'processing', 'replay-pending')
|
|
@@ -361,6 +360,131 @@ function wrap(db) {
|
|
|
361
360
|
AND ts > ?
|
|
362
361
|
`).run(botName, cutoff);
|
|
363
362
|
},
|
|
363
|
+
|
|
364
|
+
// ─── Attachments (migration 007, polygram 0.6.0) ──────────────────
|
|
365
|
+
//
|
|
366
|
+
// Replaces the messages.attachments_json blob. Each attachment is its
|
|
367
|
+
// own row with lifecycle (`pending` → `downloaded` | `failed`),
|
|
368
|
+
// searchable by chat / kind / time. recordInbound now inserts these
|
|
369
|
+
// alongside the message in a transaction; downloadAttachments updates
|
|
370
|
+
// status as it processes each file. See docs/attachments-table.md.
|
|
371
|
+
|
|
372
|
+
insertAttachment({
|
|
373
|
+
message_id, chat_id, msg_id, thread_id, bot_name,
|
|
374
|
+
file_id, file_unique_id, kind, name, mime_type, size_bytes,
|
|
375
|
+
ts,
|
|
376
|
+
}) {
|
|
377
|
+
return db.prepare(`
|
|
378
|
+
INSERT INTO attachments (
|
|
379
|
+
message_id, chat_id, msg_id, thread_id, bot_name,
|
|
380
|
+
file_id, file_unique_id, kind, name, mime_type, size_bytes,
|
|
381
|
+
download_status, ts
|
|
382
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'pending', ?)
|
|
383
|
+
`).run(
|
|
384
|
+
message_id,
|
|
385
|
+
String(chat_id),
|
|
386
|
+
msg_id,
|
|
387
|
+
thread_id ? String(thread_id) : null,
|
|
388
|
+
bot_name || null,
|
|
389
|
+
file_id,
|
|
390
|
+
file_unique_id || null,
|
|
391
|
+
kind,
|
|
392
|
+
name || null,
|
|
393
|
+
mime_type || null,
|
|
394
|
+
size_bytes ?? null,
|
|
395
|
+
ts || Date.now(),
|
|
396
|
+
);
|
|
397
|
+
},
|
|
398
|
+
|
|
399
|
+
markAttachmentDownloaded(id, { local_path, size_bytes }) {
|
|
400
|
+
return db.prepare(`
|
|
401
|
+
UPDATE attachments
|
|
402
|
+
SET download_status = 'downloaded',
|
|
403
|
+
local_path = ?,
|
|
404
|
+
size_bytes = COALESCE(?, size_bytes),
|
|
405
|
+
download_error = NULL
|
|
406
|
+
WHERE id = ?
|
|
407
|
+
`).run(local_path, size_bytes ?? null, id);
|
|
408
|
+
},
|
|
409
|
+
|
|
410
|
+
markAttachmentFailed(id, error) {
|
|
411
|
+
return db.prepare(`
|
|
412
|
+
UPDATE attachments
|
|
413
|
+
SET download_status = 'failed',
|
|
414
|
+
download_error = ?
|
|
415
|
+
WHERE id = ?
|
|
416
|
+
`).run(String(error || 'unknown').slice(0, 500), id);
|
|
417
|
+
},
|
|
418
|
+
|
|
419
|
+
setAttachmentTranscription(id, text) {
|
|
420
|
+
return db.prepare(`
|
|
421
|
+
UPDATE attachments SET transcription = ? WHERE id = ?
|
|
422
|
+
`).run(text || null, id);
|
|
423
|
+
},
|
|
424
|
+
|
|
425
|
+
getAttachmentsByMessage(message_id) {
|
|
426
|
+
return db.prepare(`
|
|
427
|
+
SELECT id, message_id, chat_id, msg_id, thread_id, bot_name,
|
|
428
|
+
file_id, file_unique_id, kind, name, mime_type, size_bytes,
|
|
429
|
+
local_path, download_status, download_error, transcription, ts
|
|
430
|
+
FROM attachments
|
|
431
|
+
WHERE message_id = ?
|
|
432
|
+
ORDER BY id ASC
|
|
433
|
+
`).all(message_id);
|
|
434
|
+
},
|
|
435
|
+
|
|
436
|
+
// Rich filter for ops queries. All filters are optional; with no filters
|
|
437
|
+
// returns the most recent 100. Caller can paginate via since/until.
|
|
438
|
+
searchAttachments({
|
|
439
|
+
chat_id = null,
|
|
440
|
+
kind = null,
|
|
441
|
+
status = null,
|
|
442
|
+
since = null,
|
|
443
|
+
until = null,
|
|
444
|
+
limit = 100,
|
|
445
|
+
} = {}) {
|
|
446
|
+
const where = [];
|
|
447
|
+
const args = [];
|
|
448
|
+
if (chat_id !== null) { where.push('chat_id = ?'); args.push(String(chat_id)); }
|
|
449
|
+
if (kind !== null) { where.push('kind = ?'); args.push(kind); }
|
|
450
|
+
if (status !== null) { where.push('download_status = ?'); args.push(status); }
|
|
451
|
+
if (since !== null) { where.push('ts >= ?'); args.push(Number(since)); }
|
|
452
|
+
if (until !== null) { where.push('ts <= ?'); args.push(Number(until)); }
|
|
453
|
+
const sql = `
|
|
454
|
+
SELECT id, message_id, chat_id, msg_id, thread_id, bot_name,
|
|
455
|
+
file_id, file_unique_id, kind, name, mime_type, size_bytes,
|
|
456
|
+
local_path, download_status, download_error, transcription, ts
|
|
457
|
+
FROM attachments
|
|
458
|
+
${where.length ? 'WHERE ' + where.join(' AND ') : ''}
|
|
459
|
+
ORDER BY ts DESC
|
|
460
|
+
LIMIT ?
|
|
461
|
+
`;
|
|
462
|
+
args.push(Number(limit));
|
|
463
|
+
return db.prepare(sql).all(...args);
|
|
464
|
+
},
|
|
465
|
+
|
|
466
|
+
// Look up the messages.id auto-pk for an inbound message. Used by
|
|
467
|
+
// recordInbound to FK attachments to the just-inserted message even
|
|
468
|
+
// when an ON-CONFLICT update happened (lastInsertRowid is 0 in that
|
|
469
|
+
// case, so we can't rely on the run-result alone).
|
|
470
|
+
getInboundMessageId({ chat_id, msg_id }) {
|
|
471
|
+
const row = db.prepare(`
|
|
472
|
+
SELECT id FROM messages WHERE chat_id = ? AND msg_id = ? AND direction = 'in'
|
|
473
|
+
`).get(String(chat_id), msg_id);
|
|
474
|
+
return row ? row.id : null;
|
|
475
|
+
},
|
|
476
|
+
|
|
477
|
+
listFailedAttachments({ since = null, limit = 100 } = {}) {
|
|
478
|
+
const cutoff = since ?? Date.now() - 24 * 60 * 60 * 1000;
|
|
479
|
+
return db.prepare(`
|
|
480
|
+
SELECT id, message_id, chat_id, msg_id, kind, name, mime_type,
|
|
481
|
+
download_error, ts
|
|
482
|
+
FROM attachments
|
|
483
|
+
WHERE download_status = 'failed' AND ts >= ?
|
|
484
|
+
ORDER BY ts DESC
|
|
485
|
+
LIMIT ?
|
|
486
|
+
`).all(cutoff, limit);
|
|
487
|
+
},
|
|
364
488
|
};
|
|
365
489
|
}
|
|
366
490
|
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
-- Replace the messages.attachments_json blob with a real table so we can
|
|
2
|
+
-- query, search, and track lifecycle per attachment. Design doc:
|
|
3
|
+
-- docs/attachments-table.md.
|
|
4
|
+
--
|
|
5
|
+
-- This migration creates the table + indexes and backfills from existing
|
|
6
|
+
-- attachments_json. The column itself is NOT dropped here — kept as a
|
|
7
|
+
-- safety net for one minor release. A follow-up migration drops it once
|
|
8
|
+
-- we're confident reads/writes have fully moved over.
|
|
9
|
+
--
|
|
10
|
+
-- For backfilled rows we set download_status='downloaded' (we know they
|
|
11
|
+
-- went through to disk historically; recreating failure state isn't
|
|
12
|
+
-- useful for old data). Local path is left NULL — the deterministic
|
|
13
|
+
-- on-disk location can be re-derived from msg_id + file_unique_id at
|
|
14
|
+
-- read time if a caller needs it; the existing inbox/<chat_id>/...
|
|
15
|
+
-- filename convention is unchanged.
|
|
16
|
+
|
|
17
|
+
CREATE TABLE IF NOT EXISTS attachments (
|
|
18
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
19
|
+
message_id INTEGER NOT NULL,
|
|
20
|
+
chat_id TEXT NOT NULL,
|
|
21
|
+
msg_id INTEGER NOT NULL,
|
|
22
|
+
thread_id TEXT,
|
|
23
|
+
bot_name TEXT,
|
|
24
|
+
file_id TEXT NOT NULL,
|
|
25
|
+
file_unique_id TEXT,
|
|
26
|
+
kind TEXT NOT NULL,
|
|
27
|
+
name TEXT,
|
|
28
|
+
mime_type TEXT,
|
|
29
|
+
size_bytes INTEGER,
|
|
30
|
+
local_path TEXT,
|
|
31
|
+
download_status TEXT NOT NULL CHECK(download_status IN ('pending','downloaded','failed')),
|
|
32
|
+
download_error TEXT,
|
|
33
|
+
transcription TEXT,
|
|
34
|
+
ts INTEGER NOT NULL,
|
|
35
|
+
FOREIGN KEY (message_id) REFERENCES messages(id) ON DELETE CASCADE
|
|
36
|
+
);
|
|
37
|
+
|
|
38
|
+
CREATE INDEX IF NOT EXISTS idx_attachments_chat_ts
|
|
39
|
+
ON attachments(chat_id, ts);
|
|
40
|
+
|
|
41
|
+
CREATE INDEX IF NOT EXISTS idx_attachments_kind_ts
|
|
42
|
+
ON attachments(kind, ts);
|
|
43
|
+
|
|
44
|
+
-- Narrow index: only the small set we actually want to query for retries
|
|
45
|
+
-- and dashboards. Skips the bulk 'downloaded' rows.
|
|
46
|
+
CREATE INDEX IF NOT EXISTS idx_attachments_status
|
|
47
|
+
ON attachments(download_status, ts)
|
|
48
|
+
WHERE download_status != 'downloaded';
|
|
49
|
+
|
|
50
|
+
CREATE INDEX IF NOT EXISTS idx_attachments_message
|
|
51
|
+
ON attachments(message_id);
|
|
52
|
+
|
|
53
|
+
-- Narrow: file_unique_id is NULL for some historical rows (Telegram doesn't
|
|
54
|
+
-- always populate it for old messages); only index the populated ones.
|
|
55
|
+
CREATE INDEX IF NOT EXISTS idx_attachments_unique_id
|
|
56
|
+
ON attachments(file_unique_id)
|
|
57
|
+
WHERE file_unique_id IS NOT NULL;
|
|
58
|
+
|
|
59
|
+
-- Backfill from messages.attachments_json. Only rows that aren't already
|
|
60
|
+
-- represented (idempotent — re-running this migration on a partially-
|
|
61
|
+
-- migrated DB doesn't double-insert).
|
|
62
|
+
--
|
|
63
|
+
-- attachments_json shape: array of objects, each with kind, name,
|
|
64
|
+
-- mime_type, size, file_id, file_unique_id, and optionally transcription.
|
|
65
|
+
-- Pre-0.5.x rows may not have file_unique_id. We pull what's there and
|
|
66
|
+
-- leave the rest NULL.
|
|
67
|
+
INSERT INTO attachments (
|
|
68
|
+
message_id, chat_id, msg_id, thread_id, bot_name,
|
|
69
|
+
file_id, file_unique_id, kind, name, mime_type, size_bytes,
|
|
70
|
+
local_path, download_status, transcription, ts
|
|
71
|
+
)
|
|
72
|
+
SELECT
|
|
73
|
+
m.id, m.chat_id, m.msg_id, m.thread_id, m.bot_name,
|
|
74
|
+
COALESCE(json_extract(att.value, '$.file_id'), ''),
|
|
75
|
+
json_extract(att.value, '$.file_unique_id'),
|
|
76
|
+
COALESCE(json_extract(att.value, '$.kind'), 'document'),
|
|
77
|
+
json_extract(att.value, '$.name'),
|
|
78
|
+
json_extract(att.value, '$.mime_type'),
|
|
79
|
+
json_extract(att.value, '$.size'),
|
|
80
|
+
json_extract(att.value, '$.path'),
|
|
81
|
+
'downloaded',
|
|
82
|
+
json_extract(att.value, '$.transcription.text'),
|
|
83
|
+
m.ts
|
|
84
|
+
FROM messages m, json_each(m.attachments_json) att
|
|
85
|
+
WHERE m.attachments_json IS NOT NULL
|
|
86
|
+
AND m.direction = 'in'
|
|
87
|
+
AND NOT EXISTS (
|
|
88
|
+
SELECT 1 FROM attachments a WHERE a.message_id = m.id
|
|
89
|
+
);
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
-- Drop the legacy messages.attachments_json column. 0.6.0's migration 007
|
|
2
|
+
-- created the per-attachment table and backfilled from this column; the
|
|
3
|
+
-- column was kept for one minor as a safety net. polygram 0.6.1 reads
|
|
4
|
+
-- exclusively from the attachments table now, so the column is dead code
|
|
5
|
+
-- on the schema side and can go.
|
|
6
|
+
--
|
|
7
|
+
-- SQLite supports ALTER TABLE DROP COLUMN since 3.35 (well below
|
|
8
|
+
-- better-sqlite3's bundled SQLite). The op rewrites the table in place,
|
|
9
|
+
-- which is fine — `messages` is small enough that a one-time rewrite at
|
|
10
|
+
-- migration time is cheaper than carrying the column around forever.
|
|
11
|
+
|
|
12
|
+
ALTER TABLE messages DROP COLUMN attachments_json;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "polygram",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.2",
|
|
4
4
|
"description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
|
|
5
5
|
"main": "lib/ipc-client.js",
|
|
6
6
|
"bin": {
|
package/polygram.js
CHANGED
|
@@ -206,23 +206,48 @@ function recordInbound(msg) {
|
|
|
206
206
|
const user = msg.from?.first_name || msg.from?.username || null;
|
|
207
207
|
const attachments = extractAttachments(msg);
|
|
208
208
|
const chatConfig = config.chats[chatId];
|
|
209
|
+
const ts = (msg.date || Math.floor(Date.now() / 1000)) * 1000;
|
|
209
210
|
|
|
210
|
-
dbWrite(() =>
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
211
|
+
dbWrite(() => {
|
|
212
|
+
db.insertMessage({
|
|
213
|
+
chat_id: chatId,
|
|
214
|
+
thread_id: threadId,
|
|
215
|
+
msg_id: msg.message_id,
|
|
216
|
+
user,
|
|
217
|
+
user_id: msg.from?.id || null,
|
|
218
|
+
text: msg.text || msg.caption || '',
|
|
219
|
+
reply_to_id: msg.reply_to_message?.message_id || null,
|
|
220
|
+
direction: 'in',
|
|
221
|
+
source: 'polygram',
|
|
222
|
+
bot_name: BOT_NAME,
|
|
223
|
+
model: chatConfig?.model || null,
|
|
224
|
+
effort: chatConfig?.effort || null,
|
|
225
|
+
ts,
|
|
226
|
+
});
|
|
227
|
+
|
|
228
|
+
if (!attachments.length) return;
|
|
229
|
+
// Look up the just-inserted (or ON-CONFLICT-updated) message row id
|
|
230
|
+
// so attachments can FK to it. lastInsertRowid is unreliable across
|
|
231
|
+
// the upsert path; an explicit lookup is cheap and always correct.
|
|
232
|
+
const messageId = db.getInboundMessageId({ chat_id: chatId, msg_id: msg.message_id });
|
|
233
|
+
if (!messageId) return;
|
|
234
|
+
for (const att of attachments) {
|
|
235
|
+
db.insertAttachment({
|
|
236
|
+
message_id: messageId,
|
|
237
|
+
chat_id: chatId,
|
|
238
|
+
msg_id: msg.message_id,
|
|
239
|
+
thread_id: threadId,
|
|
240
|
+
bot_name: BOT_NAME,
|
|
241
|
+
file_id: att.file_id,
|
|
242
|
+
file_unique_id: att.file_unique_id,
|
|
243
|
+
kind: att.kind,
|
|
244
|
+
name: att.name,
|
|
245
|
+
mime_type: att.mime_type,
|
|
246
|
+
size_bytes: att.size,
|
|
247
|
+
ts,
|
|
248
|
+
});
|
|
249
|
+
}
|
|
250
|
+
}, `insert inbound ${chatId}/${msg.message_id}`);
|
|
226
251
|
}
|
|
227
252
|
|
|
228
253
|
|
|
@@ -344,30 +369,54 @@ async function transcribeVoiceAttachments(downloaded, { chatId, msgId, label, bo
|
|
|
344
369
|
}
|
|
345
370
|
}));
|
|
346
371
|
|
|
347
|
-
// Persist transcription
|
|
348
|
-
//
|
|
349
|
-
//
|
|
372
|
+
// Persist transcription:
|
|
373
|
+
// - Per-attachment: setAttachmentTranscription stores the full
|
|
374
|
+
// transcription object (text + language + duration + provider) as
|
|
375
|
+
// JSON in the attachments.transcription column. buildVoiceTags
|
|
376
|
+
// parses it back when building the prompt.
|
|
377
|
+
// - Message-level: setMessageText updates messages.text with the
|
|
378
|
+
// combined transcript so FTS finds "what Maria said" via the
|
|
379
|
+
// normal chat search path.
|
|
350
380
|
const successful = targets.filter((a) => a.transcription?.text);
|
|
351
381
|
if (!successful.length) return;
|
|
382
|
+
for (const a of successful) {
|
|
383
|
+
if (a.id != null) {
|
|
384
|
+
dbWrite(() => db.setAttachmentTranscription(a.id, JSON.stringify(a.transcription)),
|
|
385
|
+
`setAttachmentTranscription ${a.id}`);
|
|
386
|
+
}
|
|
387
|
+
}
|
|
352
388
|
const combinedText = successful.map((a) => a.transcription.text).join(' ').trim();
|
|
353
|
-
const attJson = JSON.stringify(downloaded.map((a) => ({
|
|
354
|
-
kind: a.kind, name: a.name, mime_type: a.mime_type, size: a.size,
|
|
355
|
-
path: a.path, file_unique_id: a.file_unique_id,
|
|
356
|
-
transcription: a.transcription || null,
|
|
357
|
-
})));
|
|
358
389
|
dbWrite(() => db.setMessageText({
|
|
359
|
-
chat_id: chatId, msg_id: msgId,
|
|
360
|
-
text: combinedText, attachments_json: attJson,
|
|
390
|
+
chat_id: chatId, msg_id: msgId, text: combinedText,
|
|
361
391
|
}), 'persist voice transcription');
|
|
362
392
|
}
|
|
363
393
|
|
|
364
|
-
|
|
365
|
-
|
|
394
|
+
// 0.6.0: takes attachment ROW objects from the DB (not raw extracted
|
|
395
|
+
// metadata). Each row has an `id` so we can mark status as we go.
|
|
396
|
+
// On replay: a row with status='downloaded' and a local_path that's
|
|
397
|
+
// still on disk is reused without re-fetching. Anything else (failed,
|
|
398
|
+
// missing file, never downloaded) hits Telegram's CDN.
|
|
399
|
+
async function downloadAttachments(bot, token, chatId, msg, rows) {
|
|
400
|
+
if (!rows.length) return [];
|
|
366
401
|
const chatDir = path.join(INBOX_DIR, String(chatId));
|
|
367
402
|
fs.mkdirSync(chatDir, { recursive: true });
|
|
368
403
|
|
|
369
404
|
const results = [];
|
|
370
|
-
for (const att of
|
|
405
|
+
for (const att of rows) {
|
|
406
|
+
// Reuse path: row already says downloaded AND the file is on disk.
|
|
407
|
+
if (att.download_status === 'downloaded' && att.local_path) {
|
|
408
|
+
try {
|
|
409
|
+
if (fs.statSync(att.local_path).size > 0) {
|
|
410
|
+
results.push({
|
|
411
|
+
...att,
|
|
412
|
+
path: att.local_path,
|
|
413
|
+
size: att.size_bytes || 0,
|
|
414
|
+
error: null,
|
|
415
|
+
});
|
|
416
|
+
continue;
|
|
417
|
+
}
|
|
418
|
+
} catch { /* fall through to refetch */ }
|
|
419
|
+
}
|
|
371
420
|
try {
|
|
372
421
|
const fileInfo = await bot.api.getFile(att.file_id);
|
|
373
422
|
if (!fileInfo?.file_path) throw new Error('no file_path from getFile');
|
|
@@ -412,18 +461,22 @@ async function downloadAttachments(bot, token, chatId, msg, attachments) {
|
|
|
412
461
|
console.log(`[attach] ${chatId} ← ${att.kind} ${safeName} (race: already on disk)`);
|
|
413
462
|
}
|
|
414
463
|
}
|
|
415
|
-
results.push({ ...att, path: localPath, size: att.
|
|
464
|
+
results.push({ ...att, path: localPath, size: att.size_bytes || buf.length, error: null });
|
|
416
465
|
console.log(`[attach] ${chatId} ← ${att.kind} ${safeName} (${buf.length} bytes) → ${localPath}`);
|
|
466
|
+
dbWrite(() => db.markAttachmentDownloaded(att.id, {
|
|
467
|
+
local_path: localPath, size_bytes: att.size_bytes || buf.length,
|
|
468
|
+
}), `markAttachmentDownloaded ${att.id}`);
|
|
417
469
|
} catch (err) {
|
|
418
470
|
// Don't drop the attachment silently — push it through with the
|
|
419
471
|
// failure noted. buildAttachmentTags renders this as
|
|
420
472
|
// <attachment-failed reason="..." /> so claude tells the user
|
|
421
473
|
// "I couldn't see your <kind>" instead of pretending it received
|
|
422
|
-
// text only.
|
|
423
|
-
// so claude got the prompt as if no attachment was sent.
|
|
474
|
+
// text only.
|
|
424
475
|
const reason = (err.message || 'unknown').slice(0, 200);
|
|
425
476
|
console.error(`[attach] download failed for ${att.name}: ${reason}`);
|
|
426
477
|
results.push({ ...att, path: null, error: reason });
|
|
478
|
+
dbWrite(() => db.markAttachmentFailed(att.id, reason),
|
|
479
|
+
`markAttachmentFailed ${att.id}`);
|
|
427
480
|
}
|
|
428
481
|
}
|
|
429
482
|
return results;
|
|
@@ -1399,7 +1452,39 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
|
|
|
1399
1452
|
dbWrite(() => db.logEvent('attachment-skipped', { chat_id: chatId, msg_id: msg.message_id, name: att.name, reason }), 'log attachment-skipped');
|
|
1400
1453
|
}
|
|
1401
1454
|
const token = config.bot?.token || '';
|
|
1402
|
-
|
|
1455
|
+
|
|
1456
|
+
// 0.6.0: pull persisted attachment rows (recordInbound inserted them
|
|
1457
|
+
// upstream). Filter to the ones that survived filterAttachments.
|
|
1458
|
+
// Replays / reconstructed messages may not have inserted rows yet —
|
|
1459
|
+
// for that path we fall back to the in-memory `accepted` list. Both
|
|
1460
|
+
// shapes have the same fields downloadAttachments consumes (kind,
|
|
1461
|
+
// file_id, file_unique_id, name, mime_type) plus optionally `id` /
|
|
1462
|
+
// `download_status` / `local_path` for the row variant.
|
|
1463
|
+
const messageId = db.getInboundMessageId({ chat_id: chatId, msg_id: msg.message_id });
|
|
1464
|
+
const allRows = messageId ? db.getAttachmentsByMessage(messageId) : [];
|
|
1465
|
+
const acceptedKeys = new Set(accepted.map((a) => a.file_unique_id || a.file_id));
|
|
1466
|
+
let downloadInputs;
|
|
1467
|
+
if (allRows.length) {
|
|
1468
|
+
downloadInputs = allRows.filter((r) => acceptedKeys.has(r.file_unique_id || r.file_id));
|
|
1469
|
+
} else {
|
|
1470
|
+
// Fallback for replayed turns where rows weren't persisted: synthesize
|
|
1471
|
+
// row-like objects so downloadAttachments treats them as never-tried.
|
|
1472
|
+
downloadInputs = accepted.map((a) => ({
|
|
1473
|
+
...a, id: null, size_bytes: a.size,
|
|
1474
|
+
download_status: 'pending', local_path: null,
|
|
1475
|
+
}));
|
|
1476
|
+
}
|
|
1477
|
+
const downloaded = downloadInputs.length
|
|
1478
|
+
? await downloadAttachments(bot, token, chatId, msg, downloadInputs)
|
|
1479
|
+
: [];
|
|
1480
|
+
// Decode JSON-encoded transcription on enriched rows so buildVoiceTags
|
|
1481
|
+
// can read .text/.language/.duration_sec/.provider directly.
|
|
1482
|
+
for (const a of downloaded) {
|
|
1483
|
+
if (typeof a.transcription === 'string' && a.transcription) {
|
|
1484
|
+
try { a.transcription = JSON.parse(a.transcription); }
|
|
1485
|
+
catch { /* leave as string */ }
|
|
1486
|
+
}
|
|
1487
|
+
}
|
|
1403
1488
|
if (rejected.length) {
|
|
1404
1489
|
const summary = rejected.map(({ att, reason }) => `${att.name}: ${reason}`).join('; ');
|
|
1405
1490
|
try {
|
|
@@ -2339,13 +2424,15 @@ async function main() {
|
|
|
2339
2424
|
...(row.thread_id && { message_thread_id: Number(row.thread_id) }),
|
|
2340
2425
|
...(row.reply_to_id && { reply_to_message: { message_id: row.reply_to_id } }),
|
|
2341
2426
|
};
|
|
2342
|
-
// Attach already-
|
|
2427
|
+
// Attach already-recorded attachments via the media-group shortcut
|
|
2343
2428
|
// field so extractAttachments picks them up without re-parsing
|
|
2344
2429
|
// grammy fields that don't exist on this reconstructed object.
|
|
2345
|
-
|
|
2346
|
-
|
|
2347
|
-
|
|
2348
|
-
|
|
2430
|
+
const attRows = db.getAttachmentsByMessage(row.id);
|
|
2431
|
+
if (attRows.length) {
|
|
2432
|
+
reconstructed._mergedAttachments = attRows.map((a) => ({
|
|
2433
|
+
kind: a.kind, name: a.name, mime_type: a.mime_type,
|
|
2434
|
+
size: a.size_bytes, file_id: a.file_id, file_unique_id: a.file_unique_id,
|
|
2435
|
+
}));
|
|
2349
2436
|
}
|
|
2350
2437
|
const chatConfig = config.chats[row.chat_id];
|
|
2351
2438
|
if (!chatConfig) { skipped += 1; continue; }
|
package/scripts/split-db.js
CHANGED
|
@@ -148,18 +148,39 @@ function copy(src, dst, bot, chatToBot) {
|
|
|
148
148
|
const rows = src.raw.prepare(
|
|
149
149
|
`SELECT * FROM messages WHERE chat_id IN (${ph(chatIds.length)}) OR bot_name = ?`,
|
|
150
150
|
).all(...chatIds, bot);
|
|
151
|
+
// 0.6.1: messages.attachments_json column was dropped (migration 008).
|
|
152
|
+
// Per-attachment rows live in the `attachments` table now and are
|
|
153
|
+
// copied separately below.
|
|
151
154
|
const ins = dst.raw.prepare(`
|
|
152
155
|
INSERT OR IGNORE INTO messages
|
|
153
156
|
(id, chat_id, thread_id, msg_id, user, user_id, text, reply_to_id,
|
|
154
|
-
direction, source, bot_name,
|
|
157
|
+
direction, source, bot_name, session_id,
|
|
155
158
|
model, effort, turn_id, status, error, cost_usd, ts, edited_ts)
|
|
156
159
|
VALUES
|
|
157
160
|
(@id, @chat_id, @thread_id, @msg_id, @user, @user_id, @text, @reply_to_id,
|
|
158
|
-
@direction, @source, @bot_name, @
|
|
161
|
+
@direction, @source, @bot_name, @session_id,
|
|
159
162
|
@model, @effort, @turn_id, @status, @error, @cost_usd, @ts, @edited_ts)
|
|
160
163
|
`);
|
|
161
164
|
for (const r of rows) { if (ins.run(r).changes) stats.messages++; }
|
|
162
165
|
|
|
166
|
+
// Copy per-attachment rows for the messages we just copied. FK target
|
|
167
|
+
// exists since messages were inserted in the same transaction.
|
|
168
|
+
const arows = src.raw.prepare(
|
|
169
|
+
`SELECT * FROM attachments WHERE chat_id IN (${ph(chatIds.length)})`,
|
|
170
|
+
).all(...chatIds);
|
|
171
|
+
const aIns = dst.raw.prepare(`
|
|
172
|
+
INSERT OR IGNORE INTO attachments
|
|
173
|
+
(id, message_id, chat_id, msg_id, thread_id, bot_name,
|
|
174
|
+
file_id, file_unique_id, kind, name, mime_type, size_bytes,
|
|
175
|
+
local_path, download_status, download_error, transcription, ts)
|
|
176
|
+
VALUES
|
|
177
|
+
(@id, @message_id, @chat_id, @msg_id, @thread_id, @bot_name,
|
|
178
|
+
@file_id, @file_unique_id, @kind, @name, @mime_type, @size_bytes,
|
|
179
|
+
@local_path, @download_status, @download_error, @transcription, @ts)
|
|
180
|
+
`);
|
|
181
|
+
stats.attachments = 0;
|
|
182
|
+
for (const r of arows) { if (aIns.run(r).changes) stats.attachments++; }
|
|
183
|
+
|
|
163
184
|
const srows = src.raw.prepare(`SELECT * FROM sessions WHERE chat_id IN (${ph(chatIds.length)})`).all(...chatIds);
|
|
164
185
|
const sins = dst.raw.prepare(`
|
|
165
186
|
INSERT OR REPLACE INTO sessions
|