polygram 0.6.6 → 0.6.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -6
- package/package.json +1 -1
- package/polygram.js +108 -85
package/README.md
CHANGED
|
@@ -49,8 +49,18 @@ ergonomics while running on top of `claude` CLI.
|
|
|
49
49
|
- **Voice transcription.** OpenAI Whisper API or local `whisper.cpp`,
|
|
50
50
|
selectable per bot. Transcriptions land in `messages.text` so FTS
|
|
51
51
|
finds them.
|
|
52
|
+
- **Per-attachment table** (`attachments`, since 0.6.0) with download
|
|
53
|
+
lifecycle (`pending` → `downloaded` | `failed`), per-attachment
|
|
54
|
+
transcription, and `chat_id`/`kind`/`status` indexes for ops queries.
|
|
55
|
+
Replaces the older `attachments_json` blob — query "all PDFs Maria
|
|
56
|
+
sent last week" without scanning every message. Failed downloads
|
|
57
|
+
surface to Claude as `<attachment-failed reason="..." />` so the
|
|
58
|
+
user gets a real explanation, not silence.
|
|
52
59
|
- **Content-addressed attachment storage** via Telegram's `file_unique_id`.
|
|
53
|
-
Same photo forwarded twice = one file on disk.
|
|
60
|
+
Same photo forwarded twice = one file on disk. Multi-photo albums
|
|
61
|
+
(Telegram delivers each photo as a separate message sharing
|
|
62
|
+
`media_group_id`) coalesce into one logical turn so Claude sees the
|
|
63
|
+
whole album, not just the first photo.
|
|
54
64
|
- **Prompt-injection hardening.** User text wrapped in `<untrusted-input>`
|
|
55
65
|
with xml-escape; attributes use `"`. A partner typing
|
|
56
66
|
`</channel><system>...` sees it as literal text in the prompt.
|
|
@@ -59,6 +69,22 @@ ergonomics while running on top of `claude` CLI.
|
|
|
59
69
|
- **Step-level streaming replies** (optional per bot). Telegram message
|
|
60
70
|
edits on each assistant step as Claude works through tool calls and
|
|
61
71
|
reasoning.
|
|
72
|
+
- **Crash-resilient handler lifecycle.** Inbound rows track a
|
|
73
|
+
`handler_status` (received → dispatched → replied | failed |
|
|
74
|
+
replay-pending). On graceful shutdown, in-flight turns are marked
|
|
75
|
+
for replay; on next boot the daemon re-dispatches anything within a
|
|
76
|
+
3-minute window, deduped against already-sent outbound replies.
|
|
77
|
+
One-shot guard prevents replay loops.
|
|
78
|
+
- **Contextual error replies.** Idle timeouts, wall-clock ceilings, and
|
|
79
|
+
process crashes each get a distinct user-facing message with a
|
|
80
|
+
recovery hint, not a generic "something went wrong." Restarts and
|
|
81
|
+
user-issued aborts don't fire the apology at all.
|
|
82
|
+
- **Abort detection in natural language** (`stop`, `cancel`, `wait`,
|
|
83
|
+
`стоп`, `отмена`, `хватит`, ...) plus the slash forms (`/stop`,
|
|
84
|
+
`/abort`, `/cancel`). First-sentence match catches "Stop. I'll ask
|
|
85
|
+
in another session." too. Scoped to the user's own session, so an
|
|
86
|
+
abort in one topic never disturbs sibling topics under
|
|
87
|
+
`isolateTopics`.
|
|
62
88
|
|
|
63
89
|
## Relation to existing projects
|
|
64
90
|
|
|
@@ -133,7 +159,7 @@ Output:
|
|
|
133
159
|
|
|
134
160
|
```
|
|
135
161
|
✅ config — bot found, 4 chat(s), admin=68861949
|
|
136
|
-
✅ db — schema
|
|
162
|
+
✅ db — schema v8
|
|
137
163
|
✅ ipc — socket responsive, bot=my-bot
|
|
138
164
|
✅ telegram — @my_bot (My Bot)
|
|
139
165
|
✅ recent-errors — no failure events in last 24h
|
|
@@ -325,7 +351,7 @@ foreign-chat clicks are rejected. Default-deny on IPC error.
|
|
|
325
351
|
## Development
|
|
326
352
|
|
|
327
353
|
```bash
|
|
328
|
-
npm test #
|
|
354
|
+
npm test # 470 tests, 110 suites, node:test, no external services
|
|
329
355
|
npm start -- --bot my-bot
|
|
330
356
|
npm run split-db -- --config config.json --dry-run
|
|
331
357
|
npm run ipc-smoke -- my-bot
|
|
@@ -357,7 +383,11 @@ tests/*.test.js node:test
|
|
|
357
383
|
- Claude Code only. No abstraction over other AIs.
|
|
358
384
|
- macOS LaunchAgent plists included; Linux systemd units are not (easy
|
|
359
385
|
to adapt).
|
|
360
|
-
-
|
|
386
|
+
- On FileVault-on macOS, the daemon's LaunchAgents fire via shumabit's
|
|
387
|
+
own GUI login — there's no auto-start without the keychain being
|
|
388
|
+
unlocked, so a one-time Fast User Switch into the daemon's user
|
|
389
|
+
after each reboot is the supported pattern. See
|
|
390
|
+
`skills/infrastructure/SKILL.md` in the source repo for details.
|
|
361
391
|
|
|
362
392
|
## Roadmap
|
|
363
393
|
|
|
@@ -365,8 +395,8 @@ tests/*.test.js node:test
|
|
|
365
395
|
unknown chats.
|
|
366
396
|
- Approvals phase 2: deny-with-reason, per-user quotas.
|
|
367
397
|
- Voice phase 2: `/replay-voice` to re-transcribe with a language hint.
|
|
368
|
-
-
|
|
369
|
-
|
|
398
|
+
- Per-attachment ops queries wired into `/polygram:*` slash commands
|
|
399
|
+
(search by chat/kind/time, list failed downloads).
|
|
370
400
|
|
|
371
401
|
## Licence
|
|
372
402
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "polygram",
|
|
3
|
-
"version": "0.6.
|
|
3
|
+
"version": "0.6.7",
|
|
4
4
|
"description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
|
|
5
5
|
"main": "lib/ipc-client.js",
|
|
6
6
|
"bin": {
|
package/polygram.js
CHANGED
|
@@ -406,102 +406,125 @@ async function transcribeVoiceAttachments(downloaded, { chatId, msgId, label, bo
|
|
|
406
406
|
}), 'persist voice transcription');
|
|
407
407
|
}
|
|
408
408
|
|
|
409
|
+
// Bounded concurrency for parallel fetches. A 10-photo album used to be
|
|
410
|
+
// 10× per-photo latency (each `await fetch` was serial); now in-flight
|
|
411
|
+
// downloads are capped to a small pool. Telegram's per-bot rate limit is
|
|
412
|
+
// ~30 req/s, so 6 concurrent fetches is comfortably under and keeps the
|
|
413
|
+
// happy path responsive without burning sockets on a 100-file edge case.
|
|
414
|
+
const ATTACHMENT_DOWNLOAD_CONCURRENCY = 6;
|
|
415
|
+
|
|
416
|
+
// Per-attachment download. Pure function over (att, deps) → result. Pulled
|
|
417
|
+
// out of the loop so downloadAttachments can run several in parallel.
|
|
418
|
+
async function downloadOneAttachment(bot, token, chatId, msg, chatDir, att) {
|
|
419
|
+
// Reuse path: row already says downloaded AND the file is on disk.
|
|
420
|
+
if (att.download_status === 'downloaded' && att.local_path) {
|
|
421
|
+
try {
|
|
422
|
+
if (fs.statSync(att.local_path).size > 0) {
|
|
423
|
+
return { ...att, path: att.local_path, size: att.size_bytes || 0, error: null };
|
|
424
|
+
}
|
|
425
|
+
} catch { /* fall through to refetch */ }
|
|
426
|
+
}
|
|
427
|
+
try {
|
|
428
|
+
const fileInfo = await bot.api.getFile(att.file_id);
|
|
429
|
+
if (!fileInfo?.file_path) throw new Error('no file_path from getFile');
|
|
430
|
+
const url = `https://api.telegram.org/file/bot${token}/${fileInfo.file_path}`;
|
|
431
|
+
const res = await fetch(url);
|
|
432
|
+
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
|
433
|
+
// Defense in depth: re-check size at download time. Telegram can
|
|
434
|
+
// omit file_size from the Message, or its value may not match what
|
|
435
|
+
// the CDN actually serves. Trust Content-Length and fall back to
|
|
436
|
+
// buffering with a ceiling.
|
|
437
|
+
const cl = parseInt(res.headers.get('content-length') || '0', 10);
|
|
438
|
+
if (cl > MAX_FILE_BYTES) {
|
|
439
|
+
throw new Error(`content-length ${cl} exceeds per-file cap ${MAX_FILE_BYTES}`);
|
|
440
|
+
}
|
|
441
|
+
const buf = Buffer.from(await res.arrayBuffer());
|
|
442
|
+
if (buf.length > MAX_FILE_BYTES) {
|
|
443
|
+
throw new Error(`body ${buf.length} bytes exceeds per-file cap ${MAX_FILE_BYTES}`);
|
|
444
|
+
}
|
|
445
|
+
const safeName = sanitizeFilename(att.name);
|
|
446
|
+
// Embed file_unique_id so two attachments with the same msg_id+name
|
|
447
|
+
// (album, resend) can't silently overwrite each other. Telegram
|
|
448
|
+
// guarantees file_unique_id is stable and globally unique per file.
|
|
449
|
+
const uniq = att.file_unique_id ? `-${att.file_unique_id}` : '';
|
|
450
|
+
const localName = `${msg.message_id}${uniq}-${safeName}`;
|
|
451
|
+
const localPath = path.join(chatDir, localName);
|
|
452
|
+
// Atomic write: create a temp with the unique PID+timestamp suffix,
|
|
453
|
+
// fill it, then rename to the canonical name. A crash mid-write leaves
|
|
454
|
+
// a `.tmp.*` file (swept later) rather than a truncated canonical file
|
|
455
|
+
// that the EEXIST dedup branch would happily serve on next request.
|
|
456
|
+
if (fs.existsSync(localPath)) {
|
|
457
|
+
console.log(`[attach] ${chatId} ← ${att.kind} ${safeName} (already on disk, reusing)`);
|
|
458
|
+
} else {
|
|
459
|
+
const tmpPath = `${localPath}.tmp.${process.pid}.${Date.now()}`;
|
|
460
|
+
try {
|
|
461
|
+
fs.writeFileSync(tmpPath, buf, { flag: 'wx' });
|
|
462
|
+
fs.renameSync(tmpPath, localPath);
|
|
463
|
+
} catch (e) {
|
|
464
|
+
// Clean up stray tmp on any failure; if the rename fell through
|
|
465
|
+
// because another process beat us, EEXIST on the target is fine.
|
|
466
|
+
try { fs.unlinkSync(tmpPath); } catch {}
|
|
467
|
+
if (e.code !== 'EEXIST') throw e;
|
|
468
|
+
console.log(`[attach] ${chatId} ← ${att.kind} ${safeName} (race: already on disk)`);
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
console.log(`[attach] ${chatId} ← ${att.kind} ${safeName} (${buf.length} bytes) → ${localPath}`);
|
|
472
|
+
dbWrite(() => db.markAttachmentDownloaded(att.id, {
|
|
473
|
+
local_path: localPath, size_bytes: att.size_bytes || buf.length,
|
|
474
|
+
}), `markAttachmentDownloaded ${att.id}`);
|
|
475
|
+
return { ...att, path: localPath, size: att.size_bytes || buf.length, error: null };
|
|
476
|
+
} catch (err) {
|
|
477
|
+
// Don't drop the attachment silently — push it through with the
|
|
478
|
+
// failure noted. buildAttachmentTags renders this as
|
|
479
|
+
// <attachment-failed reason="..." /> so claude tells the user
|
|
480
|
+
// "I couldn't see your <kind>" instead of pretending it received
|
|
481
|
+
// text only.
|
|
482
|
+
//
|
|
483
|
+
// Token redaction: the fetch URL embeds bot${TOKEN} (Telegram CDN
|
|
484
|
+
// requirement) and some undici/network error variants stringify
|
|
485
|
+
// the request including the URL into err.message. Persisting that
|
|
486
|
+
// raw to attachments.download_error or stderr would leak the bot
|
|
487
|
+
// token to anyone with DB or log access. Strip any `bot<token>`
|
|
488
|
+
// pattern from the reason before storing/logging.
|
|
489
|
+
const raw = (err.message || 'unknown').slice(0, 200);
|
|
490
|
+
const reason = raw.replace(/bot\d+:[A-Za-z0-9_-]+/g, 'bot<redacted>');
|
|
491
|
+
console.error(`[attach] download failed for ${att.name}: ${reason}`);
|
|
492
|
+
dbWrite(() => db.markAttachmentFailed(att.id, reason),
|
|
493
|
+
`markAttachmentFailed ${att.id}`);
|
|
494
|
+
return { ...att, path: null, error: reason };
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
|
|
409
498
|
// 0.6.0: takes attachment ROW objects from the DB (not raw extracted
|
|
410
499
|
// metadata). Each row has an `id` so we can mark status as we go.
|
|
411
500
|
// On replay: a row with status='downloaded' and a local_path that's
|
|
412
501
|
// still on disk is reused without re-fetching. Anything else (failed,
|
|
413
502
|
// missing file, never downloaded) hits Telegram's CDN.
|
|
503
|
+
//
|
|
504
|
+
// 0.6.7: parallel fetches with bounded concurrency. The inner work is
|
|
505
|
+
// stateless per-attachment (only writes go to DB / disk via paths
|
|
506
|
+
// keyed on file_unique_id, so two parallel downloads can't collide).
|
|
507
|
+
// Order of `results` is preserved by writing into a fixed-size array
|
|
508
|
+
// at the original index — important so the prompt sees attachments in
|
|
509
|
+
// the same order the user sent them in an album.
|
|
414
510
|
async function downloadAttachments(bot, token, chatId, msg, rows) {
|
|
415
511
|
if (!rows.length) return [];
|
|
416
512
|
const chatDir = path.join(INBOX_DIR, String(chatId));
|
|
417
513
|
fs.mkdirSync(chatDir, { recursive: true });
|
|
418
514
|
|
|
419
|
-
const results =
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
size: att.size_bytes || 0,
|
|
429
|
-
error: null,
|
|
430
|
-
});
|
|
431
|
-
continue;
|
|
432
|
-
}
|
|
433
|
-
} catch { /* fall through to refetch */ }
|
|
434
|
-
}
|
|
435
|
-
try {
|
|
436
|
-
const fileInfo = await bot.api.getFile(att.file_id);
|
|
437
|
-
if (!fileInfo?.file_path) throw new Error('no file_path from getFile');
|
|
438
|
-
const url = `https://api.telegram.org/file/bot${token}/${fileInfo.file_path}`;
|
|
439
|
-
const res = await fetch(url);
|
|
440
|
-
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
|
441
|
-
// Defense in depth: re-check size at download time. Telegram can
|
|
442
|
-
// omit file_size from the Message, or its value may not match what
|
|
443
|
-
// the CDN actually serves. Trust Content-Length and fall back to
|
|
444
|
-
// buffering with a ceiling.
|
|
445
|
-
const cl = parseInt(res.headers.get('content-length') || '0', 10);
|
|
446
|
-
if (cl > MAX_FILE_BYTES) {
|
|
447
|
-
throw new Error(`content-length ${cl} exceeds per-file cap ${MAX_FILE_BYTES}`);
|
|
448
|
-
}
|
|
449
|
-
const buf = Buffer.from(await res.arrayBuffer());
|
|
450
|
-
if (buf.length > MAX_FILE_BYTES) {
|
|
451
|
-
throw new Error(`body ${buf.length} bytes exceeds per-file cap ${MAX_FILE_BYTES}`);
|
|
515
|
+
const results = new Array(rows.length);
|
|
516
|
+
let cursor = 0;
|
|
517
|
+
const workers = Array.from(
|
|
518
|
+
{ length: Math.min(ATTACHMENT_DOWNLOAD_CONCURRENCY, rows.length) },
|
|
519
|
+
async () => {
|
|
520
|
+
while (true) {
|
|
521
|
+
const idx = cursor++;
|
|
522
|
+
if (idx >= rows.length) return;
|
|
523
|
+
results[idx] = await downloadOneAttachment(bot, token, chatId, msg, chatDir, rows[idx]);
|
|
452
524
|
}
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
// guarantees file_unique_id is stable and globally unique per file.
|
|
457
|
-
const uniq = att.file_unique_id ? `-${att.file_unique_id}` : '';
|
|
458
|
-
const localName = `${msg.message_id}${uniq}-${safeName}`;
|
|
459
|
-
const localPath = path.join(chatDir, localName);
|
|
460
|
-
// Atomic write: create a temp with the unique PID+timestamp suffix,
|
|
461
|
-
// fill it, then rename to the canonical name. A crash mid-write leaves
|
|
462
|
-
// a `.tmp.*` file (swept later) rather than a truncated canonical file
|
|
463
|
-
// that the EEXIST dedup branch would happily serve on next request.
|
|
464
|
-
if (fs.existsSync(localPath)) {
|
|
465
|
-
console.log(`[attach] ${chatId} ← ${att.kind} ${safeName} (already on disk, reusing)`);
|
|
466
|
-
} else {
|
|
467
|
-
const tmpPath = `${localPath}.tmp.${process.pid}.${Date.now()}`;
|
|
468
|
-
try {
|
|
469
|
-
fs.writeFileSync(tmpPath, buf, { flag: 'wx' });
|
|
470
|
-
fs.renameSync(tmpPath, localPath);
|
|
471
|
-
} catch (e) {
|
|
472
|
-
// Clean up stray tmp on any failure; if the rename fell through
|
|
473
|
-
// because another process beat us, EEXIST on the target is fine.
|
|
474
|
-
try { fs.unlinkSync(tmpPath); } catch {}
|
|
475
|
-
if (e.code !== 'EEXIST') throw e;
|
|
476
|
-
console.log(`[attach] ${chatId} ← ${att.kind} ${safeName} (race: already on disk)`);
|
|
477
|
-
}
|
|
478
|
-
}
|
|
479
|
-
results.push({ ...att, path: localPath, size: att.size_bytes || buf.length, error: null });
|
|
480
|
-
console.log(`[attach] ${chatId} ← ${att.kind} ${safeName} (${buf.length} bytes) → ${localPath}`);
|
|
481
|
-
dbWrite(() => db.markAttachmentDownloaded(att.id, {
|
|
482
|
-
local_path: localPath, size_bytes: att.size_bytes || buf.length,
|
|
483
|
-
}), `markAttachmentDownloaded ${att.id}`);
|
|
484
|
-
} catch (err) {
|
|
485
|
-
// Don't drop the attachment silently — push it through with the
|
|
486
|
-
// failure noted. buildAttachmentTags renders this as
|
|
487
|
-
// <attachment-failed reason="..." /> so claude tells the user
|
|
488
|
-
// "I couldn't see your <kind>" instead of pretending it received
|
|
489
|
-
// text only.
|
|
490
|
-
//
|
|
491
|
-
// Token redaction: the fetch URL embeds bot${TOKEN} (Telegram CDN
|
|
492
|
-
// requirement) and some undici/network error variants stringify
|
|
493
|
-
// the request including the URL into err.message. Persisting that
|
|
494
|
-
// raw to attachments.download_error or stderr would leak the bot
|
|
495
|
-
// token to anyone with DB or log access. Strip any `bot<token>`
|
|
496
|
-
// pattern from the reason before storing/logging.
|
|
497
|
-
const raw = (err.message || 'unknown').slice(0, 200);
|
|
498
|
-
const reason = raw.replace(/bot\d+:[A-Za-z0-9_-]+/g, 'bot<redacted>');
|
|
499
|
-
console.error(`[attach] download failed for ${att.name}: ${reason}`);
|
|
500
|
-
results.push({ ...att, path: null, error: reason });
|
|
501
|
-
dbWrite(() => db.markAttachmentFailed(att.id, reason),
|
|
502
|
-
`markAttachmentFailed ${att.id}`);
|
|
503
|
-
}
|
|
504
|
-
}
|
|
525
|
+
},
|
|
526
|
+
);
|
|
527
|
+
await Promise.all(workers);
|
|
505
528
|
return results;
|
|
506
529
|
}
|
|
507
530
|
|