polygram 0.5.10 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/db.js CHANGED
@@ -8,7 +8,7 @@ const fs = require('fs');
8
8
  const path = require('path');
9
9
  const Database = require('better-sqlite3');
10
10
 
11
- const SCHEMA_VERSION = 6;
11
+ const SCHEMA_VERSION = 7;
12
12
 
13
13
  function open(dbPath) {
14
14
  const db = new Database(dbPath);
@@ -361,6 +361,131 @@ function wrap(db) {
361
361
  AND ts > ?
362
362
  `).run(botName, cutoff);
363
363
  },
364
+
365
+ // ─── Attachments (migration 007, polygram 0.6.0) ──────────────────
366
+ //
367
+ // Replaces the messages.attachments_json blob. Each attachment is its
368
+ // own row with lifecycle (`pending` → `downloaded` | `failed`),
369
+ // searchable by chat / kind / time. recordInbound now inserts these
370
+ // alongside the message in a transaction; downloadAttachments updates
371
+ // status as it processes each file. See docs/attachments-table.md.
372
+
373
+ insertAttachment({
374
+ message_id, chat_id, msg_id, thread_id, bot_name,
375
+ file_id, file_unique_id, kind, name, mime_type, size_bytes,
376
+ ts,
377
+ }) {
378
+ return db.prepare(`
379
+ INSERT INTO attachments (
380
+ message_id, chat_id, msg_id, thread_id, bot_name,
381
+ file_id, file_unique_id, kind, name, mime_type, size_bytes,
382
+ download_status, ts
383
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'pending', ?)
384
+ `).run(
385
+ message_id,
386
+ String(chat_id),
387
+ msg_id,
388
+ thread_id ? String(thread_id) : null,
389
+ bot_name || null,
390
+ file_id,
391
+ file_unique_id || null,
392
+ kind,
393
+ name || null,
394
+ mime_type || null,
395
+ size_bytes ?? null,
396
+ ts || Date.now(),
397
+ );
398
+ },
399
+
400
+ markAttachmentDownloaded(id, { local_path, size_bytes }) {
401
+ return db.prepare(`
402
+ UPDATE attachments
403
+ SET download_status = 'downloaded',
404
+ local_path = ?,
405
+ size_bytes = COALESCE(?, size_bytes),
406
+ download_error = NULL
407
+ WHERE id = ?
408
+ `).run(local_path, size_bytes ?? null, id);
409
+ },
410
+
411
+ markAttachmentFailed(id, error) {
412
+ return db.prepare(`
413
+ UPDATE attachments
414
+ SET download_status = 'failed',
415
+ download_error = ?
416
+ WHERE id = ?
417
+ `).run(String(error || 'unknown').slice(0, 500), id);
418
+ },
419
+
420
+ setAttachmentTranscription(id, text) {
421
+ return db.prepare(`
422
+ UPDATE attachments SET transcription = ? WHERE id = ?
423
+ `).run(text || null, id);
424
+ },
425
+
426
+ getAttachmentsByMessage(message_id) {
427
+ return db.prepare(`
428
+ SELECT id, message_id, chat_id, msg_id, thread_id, bot_name,
429
+ file_id, file_unique_id, kind, name, mime_type, size_bytes,
430
+ local_path, download_status, download_error, transcription, ts
431
+ FROM attachments
432
+ WHERE message_id = ?
433
+ ORDER BY id ASC
434
+ `).all(message_id);
435
+ },
436
+
437
+ // Rich filter for ops queries. All filters are optional; with no filters
438
+ // returns the most recent 100. Caller can paginate via since/until.
439
+ searchAttachments({
440
+ chat_id = null,
441
+ kind = null,
442
+ status = null,
443
+ since = null,
444
+ until = null,
445
+ limit = 100,
446
+ } = {}) {
447
+ const where = [];
448
+ const args = [];
449
+ if (chat_id !== null) { where.push('chat_id = ?'); args.push(String(chat_id)); }
450
+ if (kind !== null) { where.push('kind = ?'); args.push(kind); }
451
+ if (status !== null) { where.push('download_status = ?'); args.push(status); }
452
+ if (since !== null) { where.push('ts >= ?'); args.push(Number(since)); }
453
+ if (until !== null) { where.push('ts <= ?'); args.push(Number(until)); }
454
+ const sql = `
455
+ SELECT id, message_id, chat_id, msg_id, thread_id, bot_name,
456
+ file_id, file_unique_id, kind, name, mime_type, size_bytes,
457
+ local_path, download_status, download_error, transcription, ts
458
+ FROM attachments
459
+ ${where.length ? 'WHERE ' + where.join(' AND ') : ''}
460
+ ORDER BY ts DESC
461
+ LIMIT ?
462
+ `;
463
+ args.push(Number(limit));
464
+ return db.prepare(sql).all(...args);
465
+ },
466
+
467
+ // Look up the messages.id auto-pk for an inbound message. Used by
468
+ // recordInbound to FK attachments to the just-inserted message even
469
+ // when an ON-CONFLICT update happened (lastInsertRowid is 0 in that
470
+ // case, so we can't rely on the run-result alone).
471
+ getInboundMessageId({ chat_id, msg_id }) {
472
+ const row = db.prepare(`
473
+ SELECT id FROM messages WHERE chat_id = ? AND msg_id = ? AND direction = 'in'
474
+ `).get(String(chat_id), msg_id);
475
+ return row ? row.id : null;
476
+ },
477
+
478
+ listFailedAttachments({ since = null, limit = 100 } = {}) {
479
+ const cutoff = since ?? Date.now() - 24 * 60 * 60 * 1000;
480
+ return db.prepare(`
481
+ SELECT id, message_id, chat_id, msg_id, kind, name, mime_type,
482
+ download_error, ts
483
+ FROM attachments
484
+ WHERE download_status = 'failed' AND ts >= ?
485
+ ORDER BY ts DESC
486
+ LIMIT ?
487
+ `).all(cutoff, limit);
488
+ },
364
489
  };
365
490
  }
366
491
 
package/lib/prompt.js CHANGED
@@ -121,9 +121,17 @@ function buildChannelAttrs({ chatId, msgId, user, userId, ts, threadId, topicNam
121
121
 
122
122
  function buildAttachmentTags(attachments) {
123
123
  if (!attachments?.length) return '';
124
- return attachments.map((a) =>
125
- `<attachment kind="${xmlEscape(a.kind)}" name="${xmlEscape(a.name)}" mime="${xmlEscape(a.mime_type)}" size="${a.size || 0}" path="${xmlEscape(a.path || '')}" />`
126
- ).join('\n');
124
+ // Failed downloads (no `path`, has `error`) get a separate tag so claude
125
+ // can mention them to the user instead of pretending nothing was sent.
126
+ // The actual failure reason is included so claude can offer a useful
127
+ // recovery hint ("looks like the file is too large", "Telegram CDN had
128
+ // a 410 — could you resend?").
129
+ return attachments.map((a) => {
130
+ if (a.error || !a.path) {
131
+ return `<attachment-failed kind="${xmlEscape(a.kind)}" name="${xmlEscape(a.name)}" mime="${xmlEscape(a.mime_type)}" reason="${xmlEscape(a.error || 'no local path')}" />`;
132
+ }
133
+ return `<attachment kind="${xmlEscape(a.kind)}" name="${xmlEscape(a.name)}" mime="${xmlEscape(a.mime_type)}" size="${a.size || 0}" path="${xmlEscape(a.path)}" />`;
134
+ }).join('\n');
127
135
  }
128
136
 
129
137
  function buildVoiceTags(attachments) {
@@ -0,0 +1,89 @@
1
+ -- Replace the messages.attachments_json blob with a real table so we can
2
+ -- query, search, and track lifecycle per attachment. Design doc:
3
+ -- docs/attachments-table.md.
4
+ --
5
+ -- This migration creates the table + indexes and backfills from existing
6
+ -- attachments_json. The column itself is NOT dropped here — kept as a
7
+ -- safety net for one minor release. A follow-up migration drops it once
8
+ -- we're confident reads/writes have fully moved over.
9
+ --
10
+ -- For backfilled rows we set download_status='downloaded' (we know they
11
+ -- went through to disk historically; recreating failure state isn't
12
+ -- useful for old data). Local path is left NULL — the deterministic
13
+ -- on-disk location can be re-derived from msg_id + file_unique_id at
14
+ -- read time if a caller needs it; the existing inbox/<chat_id>/...
15
+ -- filename convention is unchanged.
16
+
17
+ CREATE TABLE IF NOT EXISTS attachments (
18
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
19
+ message_id INTEGER NOT NULL,
20
+ chat_id TEXT NOT NULL,
21
+ msg_id INTEGER NOT NULL,
22
+ thread_id TEXT,
23
+ bot_name TEXT,
24
+ file_id TEXT NOT NULL,
25
+ file_unique_id TEXT,
26
+ kind TEXT NOT NULL,
27
+ name TEXT,
28
+ mime_type TEXT,
29
+ size_bytes INTEGER,
30
+ local_path TEXT,
31
+ download_status TEXT NOT NULL CHECK(download_status IN ('pending','downloaded','failed')),
32
+ download_error TEXT,
33
+ transcription TEXT,
34
+ ts INTEGER NOT NULL,
35
+ FOREIGN KEY (message_id) REFERENCES messages(id) ON DELETE CASCADE
36
+ );
37
+
38
+ CREATE INDEX IF NOT EXISTS idx_attachments_chat_ts
39
+ ON attachments(chat_id, ts);
40
+
41
+ CREATE INDEX IF NOT EXISTS idx_attachments_kind_ts
42
+ ON attachments(kind, ts);
43
+
44
+ -- Narrow index: only the small set we actually want to query for retries
45
+ -- and dashboards. Skips the bulk 'downloaded' rows.
46
+ CREATE INDEX IF NOT EXISTS idx_attachments_status
47
+ ON attachments(download_status, ts)
48
+ WHERE download_status != 'downloaded';
49
+
50
+ CREATE INDEX IF NOT EXISTS idx_attachments_message
51
+ ON attachments(message_id);
52
+
53
+ -- Narrow: file_unique_id is NULL for some historical rows (Telegram doesn't
54
+ -- always populate it for old messages); only index the populated ones.
55
+ CREATE INDEX IF NOT EXISTS idx_attachments_unique_id
56
+ ON attachments(file_unique_id)
57
+ WHERE file_unique_id IS NOT NULL;
58
+
59
+ -- Backfill from messages.attachments_json. Only rows that aren't already
60
+ -- represented (idempotent — re-running this migration on a partially-
61
+ -- migrated DB doesn't double-insert).
62
+ --
63
+ -- attachments_json shape: array of objects, each with kind, name,
64
+ -- mime_type, size, file_id, file_unique_id, and optionally transcription.
65
+ -- Pre-0.5.x rows may not have file_unique_id. We pull what's there and
66
+ -- leave the rest NULL.
67
+ INSERT INTO attachments (
68
+ message_id, chat_id, msg_id, thread_id, bot_name,
69
+ file_id, file_unique_id, kind, name, mime_type, size_bytes,
70
+ local_path, download_status, transcription, ts
71
+ )
72
+ SELECT
73
+ m.id, m.chat_id, m.msg_id, m.thread_id, m.bot_name,
74
+ COALESCE(json_extract(att.value, '$.file_id'), ''),
75
+ json_extract(att.value, '$.file_unique_id'),
76
+ COALESCE(json_extract(att.value, '$.kind'), 'document'),
77
+ json_extract(att.value, '$.name'),
78
+ json_extract(att.value, '$.mime_type'),
79
+ json_extract(att.value, '$.size'),
80
+ json_extract(att.value, '$.path'),
81
+ 'downloaded',
82
+ json_extract(att.value, '$.transcription.text'),
83
+ m.ts
84
+ FROM messages m, json_each(m.attachments_json) att
85
+ WHERE m.attachments_json IS NOT NULL
86
+ AND m.direction = 'in'
87
+ AND NOT EXISTS (
88
+ SELECT 1 FROM attachments a WHERE a.message_id = m.id
89
+ );
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "polygram",
3
- "version": "0.5.10",
3
+ "version": "0.6.0",
4
4
  "description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
5
5
  "main": "lib/ipc-client.js",
6
6
  "bin": {
package/polygram.js CHANGED
@@ -206,23 +206,52 @@ function recordInbound(msg) {
206
206
  const user = msg.from?.first_name || msg.from?.username || null;
207
207
  const attachments = extractAttachments(msg);
208
208
  const chatConfig = config.chats[chatId];
209
+ const ts = (msg.date || Math.floor(Date.now() / 1000)) * 1000;
209
210
 
210
- dbWrite(() => db.insertMessage({
211
- chat_id: chatId,
212
- thread_id: threadId,
213
- msg_id: msg.message_id,
214
- user,
215
- user_id: msg.from?.id || null,
216
- text: msg.text || msg.caption || '',
217
- reply_to_id: msg.reply_to_message?.message_id || null,
218
- direction: 'in',
219
- source: 'polygram',
220
- bot_name: BOT_NAME,
221
- attachments_json: attachments.length ? JSON.stringify(attachments) : null,
222
- model: chatConfig?.model || null,
223
- effort: chatConfig?.effort || null,
224
- ts: (msg.date || Math.floor(Date.now() / 1000)) * 1000,
225
- }), `insert inbound ${chatId}/${msg.message_id}`);
211
+ dbWrite(() => {
212
+ db.insertMessage({
213
+ chat_id: chatId,
214
+ thread_id: threadId,
215
+ msg_id: msg.message_id,
216
+ user,
217
+ user_id: msg.from?.id || null,
218
+ text: msg.text || msg.caption || '',
219
+ reply_to_id: msg.reply_to_message?.message_id || null,
220
+ direction: 'in',
221
+ source: 'polygram',
222
+ bot_name: BOT_NAME,
223
+ // attachments_json kept temporarily as a fallback during the 0.6.0
224
+ // migration window; per-attachment rows below are the source of
225
+ // truth. Will be dropped in a follow-up minor.
226
+ attachments_json: attachments.length ? JSON.stringify(attachments) : null,
227
+ model: chatConfig?.model || null,
228
+ effort: chatConfig?.effort || null,
229
+ ts,
230
+ });
231
+
232
+ if (!attachments.length) return;
233
+ // Look up the just-inserted (or ON-CONFLICT-updated) message row id
234
+ // so attachments can FK to it. lastInsertRowid is unreliable across
235
+ // the upsert path; an explicit lookup is cheap and always correct.
236
+ const messageId = db.getInboundMessageId({ chat_id: chatId, msg_id: msg.message_id });
237
+ if (!messageId) return;
238
+ for (const att of attachments) {
239
+ db.insertAttachment({
240
+ message_id: messageId,
241
+ chat_id: chatId,
242
+ msg_id: msg.message_id,
243
+ thread_id: threadId,
244
+ bot_name: BOT_NAME,
245
+ file_id: att.file_id,
246
+ file_unique_id: att.file_unique_id,
247
+ kind: att.kind,
248
+ name: att.name,
249
+ mime_type: att.mime_type,
250
+ size_bytes: att.size,
251
+ ts,
252
+ });
253
+ }
254
+ }, `insert inbound ${chatId}/${msg.message_id}`);
226
255
  }
227
256
 
228
257
 
@@ -344,30 +373,57 @@ async function transcribeVoiceAttachments(downloaded, { chatId, msgId, label, bo
344
373
  }
345
374
  }));
346
375
 
347
- // Persist transcription into the inbound row so FTS search finds it.
348
- // Combine all successful transcriptions into `text` and mirror the
349
- // transcription data back into attachments_json.
376
+ // Persist transcription:
377
+ // - Per-attachment: setAttachmentTranscription stores the full
378
+ // transcription object (text + language + duration + provider) as
379
+ // JSON in the attachments.transcription column. buildVoiceTags
380
+ // parses it back when building the prompt.
381
+ // - Message-level: setMessageText updates messages.text with the
382
+ // combined transcript so FTS finds "what Maria said" via the
383
+ // normal chat search path. attachments_json is left as-is (will
384
+ // be dropped in a future minor; per-attachment row is the source
385
+ // of truth).
350
386
  const successful = targets.filter((a) => a.transcription?.text);
351
387
  if (!successful.length) return;
388
+ for (const a of successful) {
389
+ if (a.id != null) {
390
+ dbWrite(() => db.setAttachmentTranscription(a.id, JSON.stringify(a.transcription)),
391
+ `setAttachmentTranscription ${a.id}`);
392
+ }
393
+ }
352
394
  const combinedText = successful.map((a) => a.transcription.text).join(' ').trim();
353
- const attJson = JSON.stringify(downloaded.map((a) => ({
354
- kind: a.kind, name: a.name, mime_type: a.mime_type, size: a.size,
355
- path: a.path, file_unique_id: a.file_unique_id,
356
- transcription: a.transcription || null,
357
- })));
358
395
  dbWrite(() => db.setMessageText({
359
396
  chat_id: chatId, msg_id: msgId,
360
- text: combinedText, attachments_json: attJson,
397
+ text: combinedText, attachments_json: null,
361
398
  }), 'persist voice transcription');
362
399
  }
363
400
 
364
- async function downloadAttachments(bot, token, chatId, msg, attachments) {
365
- if (!attachments.length) return [];
401
+ // 0.6.0: takes attachment ROW objects from the DB (not raw extracted
402
+ // metadata). Each row has an `id` so we can mark status as we go.
403
+ // On replay: a row with status='downloaded' and a local_path that's
404
+ // still on disk is reused without re-fetching. Anything else (failed,
405
+ // missing file, never downloaded) hits Telegram's CDN.
406
+ async function downloadAttachments(bot, token, chatId, msg, rows) {
407
+ if (!rows.length) return [];
366
408
  const chatDir = path.join(INBOX_DIR, String(chatId));
367
409
  fs.mkdirSync(chatDir, { recursive: true });
368
410
 
369
411
  const results = [];
370
- for (const att of attachments) {
412
+ for (const att of rows) {
413
+ // Reuse path: row already says downloaded AND the file is on disk.
414
+ if (att.download_status === 'downloaded' && att.local_path) {
415
+ try {
416
+ if (fs.statSync(att.local_path).size > 0) {
417
+ results.push({
418
+ ...att,
419
+ path: att.local_path,
420
+ size: att.size_bytes || 0,
421
+ error: null,
422
+ });
423
+ continue;
424
+ }
425
+ } catch { /* fall through to refetch */ }
426
+ }
371
427
  try {
372
428
  const fileInfo = await bot.api.getFile(att.file_id);
373
429
  if (!fileInfo?.file_path) throw new Error('no file_path from getFile');
@@ -412,10 +468,22 @@ async function downloadAttachments(bot, token, chatId, msg, attachments) {
412
468
  console.log(`[attach] ${chatId} ← ${att.kind} ${safeName} (race: already on disk)`);
413
469
  }
414
470
  }
415
- results.push({ ...att, path: localPath, size: att.size || buf.length });
471
+ results.push({ ...att, path: localPath, size: att.size_bytes || buf.length, error: null });
416
472
  console.log(`[attach] ${chatId} ← ${att.kind} ${safeName} (${buf.length} bytes) → ${localPath}`);
473
+ dbWrite(() => db.markAttachmentDownloaded(att.id, {
474
+ local_path: localPath, size_bytes: att.size_bytes || buf.length,
475
+ }), `markAttachmentDownloaded ${att.id}`);
417
476
  } catch (err) {
418
- console.error(`[attach] download failed for ${att.name}: ${err.message}`);
477
+ // Don't drop the attachment silently — push it through with the
478
+ // failure noted. buildAttachmentTags renders this as
479
+ // <attachment-failed reason="..." /> so claude tells the user
480
+ // "I couldn't see your <kind>" instead of pretending it received
481
+ // text only.
482
+ const reason = (err.message || 'unknown').slice(0, 200);
483
+ console.error(`[attach] download failed for ${att.name}: ${reason}`);
484
+ results.push({ ...att, path: null, error: reason });
485
+ dbWrite(() => db.markAttachmentFailed(att.id, reason),
486
+ `markAttachmentFailed ${att.id}`);
419
487
  }
420
488
  }
421
489
  return results;
@@ -1391,7 +1459,39 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
1391
1459
  dbWrite(() => db.logEvent('attachment-skipped', { chat_id: chatId, msg_id: msg.message_id, name: att.name, reason }), 'log attachment-skipped');
1392
1460
  }
1393
1461
  const token = config.bot?.token || '';
1394
- const downloaded = accepted.length ? await downloadAttachments(bot, token, chatId, msg, accepted) : [];
1462
+
1463
+ // 0.6.0: pull persisted attachment rows (recordInbound inserted them
1464
+ // upstream). Filter to the ones that survived filterAttachments.
1465
+ // Replays / reconstructed messages may not have inserted rows yet —
1466
+ // for that path we fall back to the in-memory `accepted` list. Both
1467
+ // shapes have the same fields downloadAttachments consumes (kind,
1468
+ // file_id, file_unique_id, name, mime_type) plus optionally `id` /
1469
+ // `download_status` / `local_path` for the row variant.
1470
+ const messageId = db.getInboundMessageId({ chat_id: chatId, msg_id: msg.message_id });
1471
+ const allRows = messageId ? db.getAttachmentsByMessage(messageId) : [];
1472
+ const acceptedKeys = new Set(accepted.map((a) => a.file_unique_id || a.file_id));
1473
+ let downloadInputs;
1474
+ if (allRows.length) {
1475
+ downloadInputs = allRows.filter((r) => acceptedKeys.has(r.file_unique_id || r.file_id));
1476
+ } else {
1477
+ // Fallback for replayed turns where rows weren't persisted: synthesize
1478
+ // row-like objects so downloadAttachments treats them as never-tried.
1479
+ downloadInputs = accepted.map((a) => ({
1480
+ ...a, id: null, size_bytes: a.size,
1481
+ download_status: 'pending', local_path: null,
1482
+ }));
1483
+ }
1484
+ const downloaded = downloadInputs.length
1485
+ ? await downloadAttachments(bot, token, chatId, msg, downloadInputs)
1486
+ : [];
1487
+ // Decode JSON-encoded transcription on enriched rows so buildVoiceTags
1488
+ // can read .text/.language/.duration_sec/.provider directly.
1489
+ for (const a of downloaded) {
1490
+ if (typeof a.transcription === 'string' && a.transcription) {
1491
+ try { a.transcription = JSON.parse(a.transcription); }
1492
+ catch { /* leave as string */ }
1493
+ }
1494
+ }
1395
1495
  if (rejected.length) {
1396
1496
  const summary = rejected.map(({ att, reason }) => `${att.name}: ${reason}`).join('; ');
1397
1497
  try {
@@ -2331,13 +2431,21 @@ async function main() {
2331
2431
  ...(row.thread_id && { message_thread_id: Number(row.thread_id) }),
2332
2432
  ...(row.reply_to_id && { reply_to_message: { message_id: row.reply_to_id } }),
2333
2433
  };
2334
- // Attach already-extracted attachments via the media-group shortcut
2434
+ // Attach already-recorded attachments via the media-group shortcut
2335
2435
  // field so extractAttachments picks them up without re-parsing
2336
2436
  // grammy fields that don't exist on this reconstructed object.
2337
- if (row.attachments_json) {
2338
- try {
2339
- reconstructed._mergedAttachments = JSON.parse(row.attachments_json);
2340
- } catch {}
2437
+ // 0.6.0: read from the per-attachment table; fall back to the
2438
+ // legacy attachments_json blob for rows inserted before migration
2439
+ // 007 ran (covers the small window during the upgrade).
2440
+ const attRows = db.getAttachmentsByMessage(row.id);
2441
+ if (attRows.length) {
2442
+ reconstructed._mergedAttachments = attRows.map((a) => ({
2443
+ kind: a.kind, name: a.name, mime_type: a.mime_type,
2444
+ size: a.size_bytes, file_id: a.file_id, file_unique_id: a.file_unique_id,
2445
+ }));
2446
+ } else if (row.attachments_json) {
2447
+ try { reconstructed._mergedAttachments = JSON.parse(row.attachments_json); }
2448
+ catch {}
2341
2449
  }
2342
2450
  const chatConfig = config.chats[row.chat_id];
2343
2451
  if (!chatConfig) { skipped += 1; continue; }