bloby-bot 0.70.13 → 0.71.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +223 -45
- package/dist-bloby/assets/{bloby-CU9KhQdP.js → bloby-es6cZJzs.js} +6 -6
- package/dist-bloby/assets/globals-DBqwNiJV.css +2 -0
- package/dist-bloby/assets/{highlighted-body-OFNGDK62-D0Tm_wgU.js → highlighted-body-OFNGDK62-8PiOHw9p.js} +1 -1
- package/dist-bloby/assets/mermaid-GHXKKRXX-BJWX8urU.js +1 -0
- package/dist-bloby/assets/{onboard-GfjHF9nm.js → onboard-BKgy17OU.js} +1 -1
- package/dist-bloby/bloby.html +3 -3
- package/dist-bloby/onboard.html +3 -3
- package/package.json +2 -3
- package/scripts/install +141 -34
- package/scripts/install.ps1 +111 -15
- package/scripts/install.sh +141 -34
- package/shared/config.ts +37 -2
- package/supervisor/channels/manager.ts +68 -33
- package/supervisor/channels/telegram.ts +57 -16
- package/supervisor/channels/types.ts +4 -1
- package/supervisor/channels/whatsapp.ts +57 -10
- package/supervisor/chat/src/components/Chat/AudioBubble.tsx +1 -1
- package/supervisor/chat/src/components/Chat/AuthedImage.tsx +16 -3
- package/supervisor/chat/src/components/Chat/BlobyImageCard.tsx +2 -2
- package/supervisor/chat/src/components/Chat/ImageLightbox.tsx +25 -8
- package/supervisor/chat/src/components/Chat/InputBar.tsx +62 -7
- package/supervisor/chat/src/components/Chat/MessageBubble.tsx +37 -18
- package/supervisor/chat/src/components/Chat/MessageList.tsx +3 -3
- package/supervisor/chat/src/hooks/useChat.ts +52 -0
- package/supervisor/chat/src/lib/authedFile.ts +24 -12
- package/supervisor/file-saver.ts +92 -19
- package/supervisor/harnesses/attachment-policy.ts +111 -0
- package/supervisor/harnesses/claude.ts +62 -15
- package/supervisor/harnesses/codex.ts +69 -43
- package/supervisor/harnesses/pi/index.ts +84 -49
- package/supervisor/harnesses/pi/providers/humanize-error.ts +25 -0
- package/supervisor/harnesses/pi/providers/stream-anthropic.ts +8 -0
- package/supervisor/harnesses/pi/providers/stream-google.ts +5 -0
- package/supervisor/harnesses/pi/providers/stream-openai-completions.ts +15 -6
- package/supervisor/harnesses/pi/providers/types.ts +18 -1
- package/supervisor/harnesses/pi/session.ts +28 -1
- package/supervisor/index.ts +57 -16
- package/supervisor/widget.js +19 -5
- package/worker/db.ts +2 -0
- package/dist-bloby/assets/globals-DlPtwiZL.css +0 -2
- package/dist-bloby/assets/mermaid-GHXKKRXX-B95J3s3s.js +0 -1
- package/supervisor/public/headphones_spritesheet.webp +0 -0
- package/supervisor/public/spritesheet.webp +0 -0
- /package/dist-bloby/assets/{globals-mGpojCOe.js → globals-DN3F0CQE.js} +0 -0
|
@@ -29,7 +29,7 @@ import { AlexaChannel } from './alexa.js';
|
|
|
29
29
|
import { TelegramChannel, type TelegramInbound } from './telegram.js';
|
|
30
30
|
import type { ChannelConfig, ChannelProvider, ChannelStatus, ChannelType, InboundMessage, InboundMessageAttachment, RoutingTarget, SenderRole } from './types.js';
|
|
31
31
|
import type { AgentAttachment } from '../bloby-agent.js';
|
|
32
|
-
import { saveAttachment, type SavedFile } from '../file-saver.js';
|
|
32
|
+
import { saveAttachment, MAX_ATTACHMENTS_PER_MESSAGE, MAX_TOTAL_ATTACHMENT_BYTES, type SavedFile } from '../file-saver.js';
|
|
33
33
|
import type { WAMessageKey } from '@whiskeysockets/baileys';
|
|
34
34
|
|
|
35
35
|
const MAX_CONCURRENT_AGENTS = 5;
|
|
@@ -37,16 +37,34 @@ const MAX_BUFFER_MESSAGES = 30;
|
|
|
37
37
|
const DEBOUNCE_MS = 4000; // 4s — wait for the user to finish typing
|
|
38
38
|
|
|
39
39
|
/** Persist channel-inbound attachments to disk so harnesses that consume file
|
|
40
|
-
* paths (Codex's `localImage`) can see them.
|
|
41
|
-
* attachment is dropped —
|
|
42
|
-
|
|
43
|
-
|
|
40
|
+
* paths (Codex's `localImage`) can see them. Per-file failures are logged and that
|
|
41
|
+
* attachment is dropped — one oversize/corrupt file can't abort the whole message,
|
|
42
|
+
* and text-only delivery still goes through. Bounded by MAX_ATTACHMENTS_PER_MESSAGE
|
|
43
|
+
* (count) and MAX_TOTAL_ATTACHMENT_BYTES (decoded bytes) so a single message can't
|
|
44
|
+
* flood the disk; saveAttachment itself caps each file's size. */
|
|
45
|
+
function saveInboundAttachments(attachments?: AgentAttachment[]): { saved: SavedFile[]; accepted: AgentAttachment[] } {
|
|
46
|
+
if (!attachments?.length) return { saved: [], accepted: [] };
|
|
47
|
+
const capped = attachments.slice(0, MAX_ATTACHMENTS_PER_MESSAGE);
|
|
48
|
+
if (attachments.length > capped.length) {
|
|
49
|
+
log.warn(`[channels] Dropping ${attachments.length - capped.length} inbound attachment(s) over the per-message cap (${MAX_ATTACHMENTS_PER_MESSAGE})`);
|
|
50
|
+
}
|
|
44
51
|
const saved: SavedFile[] = [];
|
|
45
|
-
|
|
46
|
-
|
|
52
|
+
// The raw attachments that actually saved within budget — handed to the harness so the
|
|
53
|
+
// model inlines exactly what got persisted + shown in chat (no over-cap divergence).
|
|
54
|
+
const accepted: AgentAttachment[] = [];
|
|
55
|
+
let totalBytes = 0;
|
|
56
|
+
for (const att of capped) {
|
|
57
|
+
// Estimate decoded size from the base64 length (×3/4) before writing so a burst of
|
|
58
|
+
// mid-size files can't blow the per-message byte budget in aggregate.
|
|
59
|
+
totalBytes += Math.floor((att.data?.length || 0) * 0.75);
|
|
60
|
+
if (totalBytes > MAX_TOTAL_ATTACHMENT_BYTES) {
|
|
61
|
+
log.warn('[channels] Per-message attachment byte budget exceeded — dropping remaining inbound attachments');
|
|
62
|
+
break;
|
|
63
|
+
}
|
|
64
|
+
try { saved.push(saveAttachment(att)); accepted.push(att); }
|
|
47
65
|
catch (err: any) { log.warn(`[channels] Failed to save inbound attachment: ${err.message}`); }
|
|
48
66
|
}
|
|
49
|
-
return saved;
|
|
67
|
+
return { saved, accepted };
|
|
50
68
|
}
|
|
51
69
|
|
|
52
70
|
interface ChannelManagerOpts {
|
|
@@ -144,8 +162,8 @@ export class ChannelManager {
|
|
|
144
162
|
if (channelConfigs?.whatsapp?.enabled && !this.providers.has('whatsapp')) {
|
|
145
163
|
log.info('[channels] Initializing WhatsApp channel...');
|
|
146
164
|
const whatsapp = new WhatsAppChannel(
|
|
147
|
-
(sender, senderName, text, fromMe, isSelfChat, chatJid, isGroup,
|
|
148
|
-
const attachments =
|
|
165
|
+
(sender, senderName, text, fromMe, isSelfChat, chatJid, isGroup, media, inboundKey) => {
|
|
166
|
+
const attachments = media?.map((att) => ({ type: att.type, mediaType: att.mediaType, data: att.data, name: att.name }));
|
|
149
167
|
this.handleInboundMessage('whatsapp', sender, senderName, text, fromMe, isSelfChat, chatJid, isGroup, attachments, inboundKey);
|
|
150
168
|
},
|
|
151
169
|
(status) => this.handleStatusChange(status),
|
|
@@ -211,7 +229,7 @@ export class ChannelManager {
|
|
|
211
229
|
}
|
|
212
230
|
}
|
|
213
231
|
const isOwner = !!ownerUserId && msg.fromUserId === String(ownerUserId);
|
|
214
|
-
const attachments = msg.
|
|
232
|
+
const attachments = msg.attachments?.map((att) => ({ type: att.type, mediaType: att.mediaType, data: att.data, name: att.name }));
|
|
215
233
|
// Sanitize the attacker-controlled display name so it can't fake a `[Telegram | … | admin]`
|
|
216
234
|
// context tag or inject newlines into the agent's context.
|
|
217
235
|
const safeName = msg.senderName ? msg.senderName.replace(/[\[\]|\r\n]/g, ' ').slice(0, 64).trim() || undefined : undefined;
|
|
@@ -236,8 +254,8 @@ export class ChannelManager {
|
|
|
236
254
|
if (provider?.getStatus().connected) return;
|
|
237
255
|
if (!provider) {
|
|
238
256
|
const whatsapp = new WhatsAppChannel(
|
|
239
|
-
(sender, senderName, text, fromMe, isSelfChat, chatJid, isGroup,
|
|
240
|
-
const attachments =
|
|
257
|
+
(sender, senderName, text, fromMe, isSelfChat, chatJid, isGroup, media, inboundKey) => {
|
|
258
|
+
const attachments = media?.map((att) => ({ type: att.type, mediaType: att.mediaType, data: att.data, name: att.name }));
|
|
241
259
|
this.handleInboundMessage('whatsapp', sender, senderName, text, fromMe, isSelfChat, chatJid, isGroup, attachments, inboundKey);
|
|
242
260
|
},
|
|
243
261
|
(status) => this.handleStatusChange(status),
|
|
@@ -992,12 +1010,34 @@ export class ChannelManager {
|
|
|
992
1010
|
const channelTag = `[${this.channelLabel(msg.channel)} | ${msg.sender} | ${earlyRoleTag}]\n`;
|
|
993
1011
|
const displayContent = channelTag + rawDisplay;
|
|
994
1012
|
|
|
1013
|
+
// Convert inbound attachments to agent format and persist them to disk BEFORE the
|
|
1014
|
+
// user-message persist/broadcast — so the StoredAttachment array (filePath-based, served
|
|
1015
|
+
// at /api/files/<relPath>) can ride along in meta.attachments + chat:sync. Without this
|
|
1016
|
+
// the agent sees the media but the chat shows nothing (live or after refresh). An image
|
|
1017
|
+
// keeps an auto-generated name; a file uses the channel-provided filename. (Mirrors the
|
|
1018
|
+
// PWA path in supervisor/index.ts.)
|
|
1019
|
+
const agentAttachments: AgentAttachment[] | undefined = msg.attachments?.map((att) => ({
|
|
1020
|
+
type: att.type,
|
|
1021
|
+
name: att.type === 'image'
|
|
1022
|
+
? `${msg.channel}_image.${att.mediaType.split('/')[1] || 'jpg'}`
|
|
1023
|
+
: (att.name || `${msg.channel}_file`),
|
|
1024
|
+
mediaType: att.mediaType,
|
|
1025
|
+
data: att.data,
|
|
1026
|
+
}));
|
|
1027
|
+
// Save to disk so providers that consume file paths (Codex → localImage) can see the
|
|
1028
|
+
// attachment. Claude consumes raw base64 from `agentAttachments` directly, but the
|
|
1029
|
+
// on-disk copy is what the chat UI renders (filePath → /api/files/<relPath>).
|
|
1030
|
+
const { saved: savedFiles, accepted: acceptedAttachments } = saveInboundAttachments(agentAttachments);
|
|
1031
|
+
const storedAtts = savedFiles.map((f) => ({ type: f.type, name: f.name, mediaType: f.mediaType, filePath: f.relPath }));
|
|
1032
|
+
|
|
995
1033
|
// Save user message to DB
|
|
996
1034
|
try {
|
|
1035
|
+
const userMeta: any = { model, channel: msg.channel };
|
|
1036
|
+
if (storedAtts.length) userMeta.attachments = JSON.stringify(storedAtts);
|
|
997
1037
|
await workerApi(`/api/conversations/${convId}/messages`, 'POST', {
|
|
998
1038
|
role: 'user',
|
|
999
1039
|
content: displayContent,
|
|
1000
|
-
meta:
|
|
1040
|
+
meta: userMeta,
|
|
1001
1041
|
});
|
|
1002
1042
|
} catch (err: any) {
|
|
1003
1043
|
log.warn(`[channels] DB persist error: ${err.message}`);
|
|
@@ -1006,7 +1046,12 @@ export class ChannelManager {
|
|
|
1006
1046
|
// Broadcast to chat clients (mirroring)
|
|
1007
1047
|
broadcastBloby('chat:sync', {
|
|
1008
1048
|
conversationId: convId,
|
|
1009
|
-
message: {
|
|
1049
|
+
message: {
|
|
1050
|
+
role: 'user',
|
|
1051
|
+
content: displayContent,
|
|
1052
|
+
timestamp: new Date().toISOString(),
|
|
1053
|
+
attachments: storedAtts.length ? storedAtts : undefined,
|
|
1054
|
+
},
|
|
1010
1055
|
});
|
|
1011
1056
|
|
|
1012
1057
|
// Fetch names and recent messages
|
|
@@ -1033,18 +1078,6 @@ export class ChannelManager {
|
|
|
1033
1078
|
// Channel context — same tag we already prepended to the stored display content
|
|
1034
1079
|
const channelContext = channelTag;
|
|
1035
1080
|
|
|
1036
|
-
// Convert inbound attachments to agent format
|
|
1037
|
-
const agentAttachments: AgentAttachment[] | undefined = msg.attachments?.map((att) => ({
|
|
1038
|
-
type: 'image' as const,
|
|
1039
|
-
name: `whatsapp_image.${att.mediaType.split('/')[1] || 'jpg'}`,
|
|
1040
|
-
mediaType: att.mediaType,
|
|
1041
|
-
data: att.data,
|
|
1042
|
-
}));
|
|
1043
|
-
// Save to disk so providers that consume file paths (Codex → localImage)
|
|
1044
|
-
// can see the attachment. Claude consumes raw base64 from `agentAttachments`
|
|
1045
|
-
// directly, but the on-disk copy is still useful for the path mention.
|
|
1046
|
-
const savedFiles = saveInboundAttachments(agentAttachments);
|
|
1047
|
-
|
|
1048
1081
|
// Show "typing..." in the correct chat
|
|
1049
1082
|
this.startTyping(msg.channel, msg.rawSender);
|
|
1050
1083
|
|
|
@@ -1127,7 +1160,7 @@ export class ChannelManager {
|
|
|
1127
1160
|
assistantBufferKey: msg.role === 'assistant' ? `${msg.channel}:${msg.sender}` : undefined,
|
|
1128
1161
|
inboundKey: msg.inboundKey,
|
|
1129
1162
|
};
|
|
1130
|
-
this.pushWithRouting(convId, target, channelContent,
|
|
1163
|
+
this.pushWithRouting(convId, target, channelContent, acceptedAttachments, savedFiles);
|
|
1131
1164
|
}
|
|
1132
1165
|
|
|
1133
1166
|
/** Synchronously handle an Alexa utterance: push into the shared conversation,
|
|
@@ -1338,14 +1371,16 @@ export class ChannelManager {
|
|
|
1338
1371
|
|
|
1339
1372
|
const channelContext = `[${this.channelLabel(msg.channel)} | ${msg.sender} | customer${msg.senderName ? ` | ${msg.senderName}` : ''}]\n`;
|
|
1340
1373
|
|
|
1341
|
-
// Convert inbound attachments to agent format
|
|
1374
|
+
// Convert inbound attachments to agent format (image → auto-name; file → channel filename)
|
|
1342
1375
|
const agentAttachments: AgentAttachment[] | undefined = msg.attachments?.map((att) => ({
|
|
1343
|
-
type:
|
|
1344
|
-
name:
|
|
1376
|
+
type: att.type,
|
|
1377
|
+
name: att.type === 'image'
|
|
1378
|
+
? `${msg.channel}_image.${att.mediaType.split('/')[1] || 'jpg'}`
|
|
1379
|
+
: (att.name || `${msg.channel}_file`),
|
|
1345
1380
|
mediaType: att.mediaType,
|
|
1346
1381
|
data: att.data,
|
|
1347
1382
|
}));
|
|
1348
|
-
const savedFiles = saveInboundAttachments(agentAttachments);
|
|
1383
|
+
const { saved: savedFiles, accepted: acceptedAttachments } = saveInboundAttachments(agentAttachments);
|
|
1349
1384
|
|
|
1350
1385
|
// Stable convId per customer (not per message)
|
|
1351
1386
|
const convId = `channel-${agentKey}`;
|
|
@@ -1404,7 +1439,7 @@ export class ChannelManager {
|
|
|
1404
1439
|
this.processQueue();
|
|
1405
1440
|
}
|
|
1406
1441
|
},
|
|
1407
|
-
|
|
1442
|
+
acceptedAttachments,
|
|
1408
1443
|
savedFiles,
|
|
1409
1444
|
{ botName, humanName },
|
|
1410
1445
|
recentMessages,
|
|
@@ -20,10 +20,14 @@ const POLL_TIMEOUT_S = 25; // long-poll hold time
|
|
|
20
20
|
const MAX_MESSAGE_CHARS = 4096; // Telegram hard limit per sendMessage
|
|
21
21
|
const TYPING_REFRESH_MS = 5_000; // Telegram "typing" expires ~5s
|
|
22
22
|
|
|
23
|
-
/**
|
|
24
|
-
|
|
23
|
+
/** Media attachment extracted from an inbound Telegram message.
|
|
24
|
+
* `type: 'image'` → inline vision; `type: 'file'` → a document the agent reads from disk. */
|
|
25
|
+
export interface TelegramMediaAttachment {
|
|
26
|
+
type: 'image' | 'file';
|
|
25
27
|
mediaType: string;
|
|
26
28
|
data: string; // base64
|
|
29
|
+
/** Original filename — present for documents, absent for photos. */
|
|
30
|
+
name?: string;
|
|
27
31
|
}
|
|
28
32
|
|
|
29
33
|
/** Normalized inbound message handed to the ChannelManager. */
|
|
@@ -37,7 +41,7 @@ export interface TelegramInbound {
|
|
|
37
41
|
text: string;
|
|
38
42
|
isGroup: boolean;
|
|
39
43
|
messageId?: number;
|
|
40
|
-
|
|
44
|
+
attachments?: TelegramMediaAttachment[];
|
|
41
45
|
}
|
|
42
46
|
|
|
43
47
|
export type OnTelegramMessage = (msg: TelegramInbound) => void;
|
|
@@ -236,13 +240,27 @@ export class TelegramChannel implements ChannelProvider {
|
|
|
236
240
|
: (from.username || undefined);
|
|
237
241
|
|
|
238
242
|
let rawText: string = message.text || message.caption || '';
|
|
239
|
-
const
|
|
243
|
+
const attachments: TelegramMediaAttachment[] = [];
|
|
240
244
|
|
|
241
|
-
// Photo: download the largest available size.
|
|
245
|
+
// Photo: download the largest available size. Derive the real mediaType from the CDN
|
|
246
|
+
// file extension (Telegram stores PNG/JPEG/WebP as-is) — default to image/jpeg only when unknown.
|
|
242
247
|
if (Array.isArray(message.photo) && message.photo.length > 0) {
|
|
243
248
|
const largest = message.photo[message.photo.length - 1];
|
|
244
249
|
const img = await this.downloadFile(largest.file_id).catch(() => null);
|
|
245
|
-
if (img)
|
|
250
|
+
if (img) attachments.push({ type: 'image', mediaType: mimeFromPath(img.filePath, 'image/jpeg'), data: img.buffer.toString('base64') });
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// Document: download the binary and forward as a file the agent reads from disk.
|
|
254
|
+
if (message.document?.file_id) {
|
|
255
|
+
const doc = await this.downloadFile(message.document.file_id).catch(() => null);
|
|
256
|
+
if (doc) {
|
|
257
|
+
attachments.push({
|
|
258
|
+
type: 'file',
|
|
259
|
+
mediaType: message.document.mime_type || mimeFromPath(doc.filePath, 'application/octet-stream'),
|
|
260
|
+
data: doc.buffer.toString('base64'),
|
|
261
|
+
name: message.document.file_name || undefined,
|
|
262
|
+
});
|
|
263
|
+
}
|
|
246
264
|
}
|
|
247
265
|
|
|
248
266
|
// Voice note / audio: download + transcribe.
|
|
@@ -252,9 +270,9 @@ export class TelegramChannel implements ChannelProvider {
|
|
|
252
270
|
await this.sendMessage(chatId, 'Voice transcription is off — add an OpenAI API key in your Bloby chat settings (the three-dots menu) to enable it.');
|
|
253
271
|
return;
|
|
254
272
|
}
|
|
255
|
-
const
|
|
256
|
-
if (
|
|
257
|
-
const transcript = await this.transcribe(
|
|
273
|
+
const got = await this.downloadFile(voice.file_id).catch(() => null);
|
|
274
|
+
if (got) {
|
|
275
|
+
const transcript = await this.transcribe(got.buffer.toString('base64')).catch(() => null);
|
|
258
276
|
if (transcript) {
|
|
259
277
|
rawText = transcript;
|
|
260
278
|
log.info(`[telegram] Transcribed voice: "${rawText.slice(0, 80)}"`);
|
|
@@ -265,12 +283,23 @@ export class TelegramChannel implements ChannelProvider {
|
|
|
265
283
|
}
|
|
266
284
|
}
|
|
267
285
|
|
|
268
|
-
|
|
269
|
-
|
|
286
|
+
// Nothing usable extracted. If the message DID carry media we couldn't handle
|
|
287
|
+
// (sticker, video, location, contact, …), tell the user instead of dropping it silently.
|
|
288
|
+
if (!rawText && attachments.length === 0) {
|
|
289
|
+
const hadUnsupportedMedia = !!(message.sticker || message.video || message.video_note ||
|
|
290
|
+
message.animation || message.location || message.contact || message.poll || message.dice);
|
|
291
|
+
if (hadUnsupportedMedia) {
|
|
292
|
+
await this.sendMessage(chatId, "Sorry, I can't read that type of message yet — try sending text, a photo, or a document.");
|
|
293
|
+
}
|
|
294
|
+
return;
|
|
295
|
+
}
|
|
296
|
+
if (!rawText && attachments.length > 0) {
|
|
297
|
+
rawText = attachments.some((a) => a.type === 'image') ? '(image)' : '(document)';
|
|
298
|
+
}
|
|
270
299
|
|
|
271
300
|
const text = escapeMessageText(rawText);
|
|
272
301
|
|
|
273
|
-
log.info(`[telegram] Message from ${fromUserId} (chat=${chatId}, group=${isGroup},
|
|
302
|
+
log.info(`[telegram] Message from ${fromUserId} (chat=${chatId}, group=${isGroup}, media=${attachments.length}): ${text.slice(0, 80)}`);
|
|
274
303
|
|
|
275
304
|
this.onMessage({
|
|
276
305
|
chatId,
|
|
@@ -279,12 +308,13 @@ export class TelegramChannel implements ChannelProvider {
|
|
|
279
308
|
text,
|
|
280
309
|
isGroup,
|
|
281
310
|
messageId: message.message_id,
|
|
282
|
-
|
|
311
|
+
attachments: attachments.length > 0 ? attachments : undefined,
|
|
283
312
|
});
|
|
284
313
|
}
|
|
285
314
|
|
|
286
|
-
/** Resolve a Telegram file_id to its bytes (getFile → download from the file CDN).
|
|
287
|
-
|
|
315
|
+
/** Resolve a Telegram file_id to its bytes (getFile → download from the file CDN).
|
|
316
|
+
* Also returns the CDN file_path so callers can derive an extension/mediaType. */
|
|
317
|
+
private async downloadFile(fileId: string): Promise<{ buffer: Buffer; filePath: string } | null> {
|
|
288
318
|
const file = await this.call('getFile', { file_id: fileId });
|
|
289
319
|
const filePath = file?.file_path;
|
|
290
320
|
if (!filePath) return null;
|
|
@@ -292,7 +322,7 @@ export class TelegramChannel implements ChannelProvider {
|
|
|
292
322
|
if (!r.ok) throw new Error(`file download HTTP ${r.status}`);
|
|
293
323
|
const buf = Buffer.from(await r.arrayBuffer());
|
|
294
324
|
log.info(`[telegram] Downloaded file (${Math.round(buf.length / 1024)}KB)`);
|
|
295
|
-
return buf;
|
|
325
|
+
return { buffer: buf, filePath };
|
|
296
326
|
}
|
|
297
327
|
|
|
298
328
|
/** Call a Bot API method, returning `result` or throwing on `ok:false`. */
|
|
@@ -336,6 +366,17 @@ function sleep(ms: number): Promise<void> {
|
|
|
336
366
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
337
367
|
}
|
|
338
368
|
|
|
369
|
+
/** Best-effort mime type from a file path's extension; returns `fallback` when unknown.
|
|
370
|
+
* Used for Telegram photos/documents where the API doesn't always supply a mime_type. */
|
|
371
|
+
function mimeFromPath(filePath: string | undefined, fallback: string): string {
|
|
372
|
+
const ext = (filePath?.split('.').pop() || '').toLowerCase();
|
|
373
|
+
const map: Record<string, string> = {
|
|
374
|
+
png: 'image/png', jpg: 'image/jpeg', jpeg: 'image/jpeg', gif: 'image/gif', webp: 'image/webp',
|
|
375
|
+
pdf: 'application/pdf', zip: 'application/zip', txt: 'text/plain', csv: 'text/csv', json: 'application/json',
|
|
376
|
+
};
|
|
377
|
+
return map[ext] || fallback;
|
|
378
|
+
}
|
|
379
|
+
|
|
339
380
|
/** Split a long message into <=limit-char chunks, preferring newline boundaries. */
|
|
340
381
|
function splitMessage(text: string, limit: number): string[] {
|
|
341
382
|
if (text.length <= limit) return [text];
|
|
@@ -23,9 +23,12 @@ export interface ChannelConfig {
|
|
|
23
23
|
}
|
|
24
24
|
|
|
25
25
|
export interface InboundMessageAttachment {
|
|
26
|
-
|
|
26
|
+
/** 'image' → inline vision block; 'file' → any document the agent reads from disk. */
|
|
27
|
+
type: 'image' | 'file';
|
|
27
28
|
mediaType: string;
|
|
28
29
|
data: string; // base64
|
|
30
|
+
/** Original filename when the channel provides one (WhatsApp/Telegram documents). */
|
|
31
|
+
name?: string;
|
|
29
32
|
}
|
|
30
33
|
|
|
31
34
|
export interface InboundMessage {
|
|
@@ -24,16 +24,21 @@ import type { ChannelProvider, ChannelStatus, ChannelType } from './types.js';
|
|
|
24
24
|
|
|
25
25
|
const AUTH_DIR = path.join(DATA_DIR, 'channels', 'whatsapp', 'auth');
|
|
26
26
|
|
|
27
|
-
/**
|
|
28
|
-
|
|
27
|
+
/** Media attachment extracted from a WhatsApp message.
|
|
28
|
+
* `type: 'image'` → inline vision; `type: 'file'` → a document the agent reads from disk. */
|
|
29
|
+
export interface WhatsAppMediaAttachment {
|
|
30
|
+
type: 'image' | 'file';
|
|
29
31
|
mediaType: string;
|
|
30
32
|
data: string; // base64
|
|
33
|
+
/** Original filename — present for documents (WhatsApp supplies it), absent for images. */
|
|
34
|
+
name?: string;
|
|
31
35
|
}
|
|
32
36
|
|
|
33
37
|
/** Callback when a new message arrives.
|
|
34
38
|
* - sender: who sent it (phone JID, translated from LID where possible)
|
|
35
39
|
* - chatJid: the conversation identifier (group JID for groups, peer JID for 1:1) — reply to this
|
|
36
40
|
* - isGroup: true when the chat is a WhatsApp group (@g.us)
|
|
41
|
+
* - media: image and/or document attachments extracted from the message
|
|
37
42
|
* - inboundKey: original Baileys message key — used to react/quote/ack the user's message
|
|
38
43
|
*/
|
|
39
44
|
export type OnWhatsAppMessage = (
|
|
@@ -44,7 +49,7 @@ export type OnWhatsAppMessage = (
|
|
|
44
49
|
isSelfChat: boolean,
|
|
45
50
|
chatJid: string,
|
|
46
51
|
isGroup: boolean,
|
|
47
|
-
|
|
52
|
+
media?: WhatsAppMediaAttachment[],
|
|
48
53
|
inboundKey?: WAMessageKey,
|
|
49
54
|
) => void;
|
|
50
55
|
|
|
@@ -576,7 +581,7 @@ export class WhatsAppChannel implements ChannelProvider {
|
|
|
576
581
|
|
|
577
582
|
// Extract text — or transcribe audio if it's a voice note
|
|
578
583
|
let rawText = this.extractText(msg.message);
|
|
579
|
-
const
|
|
584
|
+
const media: WhatsAppMediaAttachment[] = [];
|
|
580
585
|
|
|
581
586
|
// Download image if present
|
|
582
587
|
if (this.isImageMessage(msg.message)) {
|
|
@@ -584,13 +589,32 @@ export class WhatsAppChannel implements ChannelProvider {
|
|
|
584
589
|
const buffer = await downloadMediaMessage(msg, 'buffer', {}) as Buffer;
|
|
585
590
|
const mimeType = this.getImageMimeType(msg.message) || 'image/jpeg';
|
|
586
591
|
const base64 = buffer.toString('base64');
|
|
587
|
-
|
|
592
|
+
media.push({ type: 'image', mediaType: mimeType, data: base64 });
|
|
588
593
|
log.info(`[whatsapp] Downloaded image (${Math.round(buffer.length / 1024)}KB, ${mimeType})`);
|
|
589
594
|
} catch (err: any) {
|
|
590
595
|
log.warn(`[whatsapp] Image download failed: ${err.message}`);
|
|
591
596
|
}
|
|
592
597
|
}
|
|
593
598
|
|
|
599
|
+
// Download document if present (PDF, docx, zip, etc.) — the binary is downloaded
|
|
600
|
+
// here (the caption, if any, is already covered by extractText above).
|
|
601
|
+
const docInfo = this.getDocumentInfo(msg.message);
|
|
602
|
+
if (docInfo) {
|
|
603
|
+
try {
|
|
604
|
+
const buffer = await downloadMediaMessage(msg, 'buffer', {}) as Buffer;
|
|
605
|
+
const base64 = buffer.toString('base64');
|
|
606
|
+
media.push({
|
|
607
|
+
type: 'file',
|
|
608
|
+
mediaType: docInfo.mimetype || 'application/octet-stream',
|
|
609
|
+
data: base64,
|
|
610
|
+
name: docInfo.fileName,
|
|
611
|
+
});
|
|
612
|
+
log.info(`[whatsapp] Downloaded document (${Math.round(buffer.length / 1024)}KB, ${docInfo.mimetype || 'unknown'}, ${docInfo.fileName || 'unnamed'})`);
|
|
613
|
+
} catch (err: any) {
|
|
614
|
+
log.warn(`[whatsapp] Document download failed: ${err.message}`);
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
|
|
594
618
|
if (!rawText && this.isAudioMessage(msg.message)) {
|
|
595
619
|
// Voice note / audio — download and transcribe
|
|
596
620
|
if (!this.transcribe) {
|
|
@@ -616,11 +640,11 @@ export class WhatsAppChannel implements ChannelProvider {
|
|
|
616
640
|
}
|
|
617
641
|
}
|
|
618
642
|
|
|
619
|
-
// Skip if no text AND no
|
|
643
|
+
// Skip if no text AND no media; otherwise default text for media-only
|
|
620
644
|
// messages. Collapsing both branches also narrows `rawText` to `string`.
|
|
621
645
|
if (!rawText) {
|
|
622
|
-
if (
|
|
623
|
-
rawText = '(image)';
|
|
646
|
+
if (media.length === 0) continue;
|
|
647
|
+
rawText = media.some((m) => m.type === 'image') ? '(image)' : '(document)';
|
|
624
648
|
}
|
|
625
649
|
|
|
626
650
|
// Escape special characters to prevent prompt injection via message content
|
|
@@ -663,7 +687,7 @@ export class WhatsAppChannel implements ChannelProvider {
|
|
|
663
687
|
const ownsParticipant = !participant || participantResolved === this.ownPhoneJid;
|
|
664
688
|
const isSelfChat = !isGroup && ownsChat && ownsParticipant;
|
|
665
689
|
|
|
666
|
-
log.info(`[whatsapp] Message from ${sender} (chat=${chatJid}, group=${isGroup}, fromMe=${fromMe}, selfChat=${isSelfChat},
|
|
690
|
+
log.info(`[whatsapp] Message from ${sender} (chat=${chatJid}, group=${isGroup}, fromMe=${fromMe}, selfChat=${isSelfChat}, media=${media.length}): ${text.slice(0, 80)}`);
|
|
667
691
|
|
|
668
692
|
this.onMessage(
|
|
669
693
|
sender,
|
|
@@ -673,7 +697,7 @@ export class WhatsAppChannel implements ChannelProvider {
|
|
|
673
697
|
isSelfChat,
|
|
674
698
|
chatJid,
|
|
675
699
|
isGroup,
|
|
676
|
-
|
|
700
|
+
media.length > 0 ? media : undefined,
|
|
677
701
|
msg.key,
|
|
678
702
|
);
|
|
679
703
|
}
|
|
@@ -692,6 +716,10 @@ export class WhatsAppChannel implements ChannelProvider {
|
|
|
692
716
|
if (message.imageMessage?.caption) return message.imageMessage.caption;
|
|
693
717
|
if (message.videoMessage?.caption) return message.videoMessage.caption;
|
|
694
718
|
if (message.documentMessage?.caption) return message.documentMessage.caption;
|
|
719
|
+
// Captioned documents arrive wrapped in documentWithCaptionMessage.
|
|
720
|
+
if (message.documentWithCaptionMessage?.message?.documentMessage?.caption) {
|
|
721
|
+
return message.documentWithCaptionMessage.message.documentMessage.caption;
|
|
722
|
+
}
|
|
695
723
|
|
|
696
724
|
// View-once wrappers
|
|
697
725
|
if (message.viewOnceMessage?.message) return this.extractText(message.viewOnceMessage.message);
|
|
@@ -729,6 +757,25 @@ export class WhatsAppChannel implements ChannelProvider {
|
|
|
729
757
|
return null;
|
|
730
758
|
}
|
|
731
759
|
|
|
760
|
+
/** Extract document metadata (mimetype + fileName) from a message, unwrapping the
|
|
761
|
+
* common containers. Returns null when there is no document.
|
|
762
|
+
*
|
|
763
|
+
* WhatsApp wraps a captioned document in `documentWithCaptionMessage.message.documentMessage`
|
|
764
|
+
* while a bare document is `documentMessage` directly — both must resolve. (The actual binary
|
|
765
|
+
* is fetched via downloadMediaMessage on the outer `msg`, which Baileys unwraps itself.) */
|
|
766
|
+
private getDocumentInfo(message: any): { mimetype?: string; fileName?: string } | null {
|
|
767
|
+
if (!message) return null;
|
|
768
|
+
const doc =
|
|
769
|
+
message.documentMessage ||
|
|
770
|
+
message.documentWithCaptionMessage?.message?.documentMessage ||
|
|
771
|
+
message.viewOnceMessage?.message?.documentMessage ||
|
|
772
|
+
message.viewOnceMessageV2?.message?.documentMessage ||
|
|
773
|
+
message.ephemeralMessage?.message?.documentMessage ||
|
|
774
|
+
message.ephemeralMessage?.message?.documentWithCaptionMessage?.message?.documentMessage;
|
|
775
|
+
if (!doc) return null;
|
|
776
|
+
return { mimetype: doc.mimetype || undefined, fileName: doc.fileName || undefined };
|
|
777
|
+
}
|
|
778
|
+
|
|
732
779
|
/** Check if a message contains audio (voice note or audio file) */
|
|
733
780
|
private isAudioMessage(message: any): boolean {
|
|
734
781
|
if (!message) return false;
|
|
@@ -23,7 +23,7 @@ export default function AudioBubble({ audioData }: Props) {
|
|
|
23
23
|
// Historical audio is stored as an /api/files/* path, which a native Audio element
|
|
24
24
|
// can't fetch (the Bearer token can't ride on the request) — resolve it to a blob URL
|
|
25
25
|
// fetched with auth. data: URLs (freshly-recorded clips) pass straight through.
|
|
26
|
-
const resolvedAudioUrl = useAuthedFileUrl(audioData);
|
|
26
|
+
const { url: resolvedAudioUrl } = useAuthedFileUrl(audioData);
|
|
27
27
|
|
|
28
28
|
// Create Audio element once the source URL is ready
|
|
29
29
|
useEffect(() => {
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { ImageOff } from 'lucide-react';
|
|
1
2
|
import { useAuthedFileUrl } from '../../lib/authedFile';
|
|
2
3
|
|
|
3
4
|
interface Props {
|
|
@@ -11,11 +12,23 @@ interface Props {
|
|
|
11
12
|
* An `<img>` for `/api/files/*` attachments. The file is fetched with the auth token
|
|
12
13
|
* (see `useAuthedFileUrl`) and rendered from a blob URL, because a native `<img src>`
|
|
13
14
|
* request can't carry the Bearer token that `/api/files` now requires. While the fetch
|
|
14
|
-
* is in flight
|
|
15
|
-
*
|
|
15
|
+
* is in flight a subtle pulsing placeholder is shown in its place so the layout doesn't
|
|
16
|
+
* jump; if it failed (deleted / 401 / 5xx) a "broken image" fallback is shown instead.
|
|
16
17
|
*/
|
|
17
18
|
export default function AuthedImage({ src, alt, className, onClick }: Props) {
|
|
18
|
-
const resolvedSrc = useAuthedFileUrl(src);
|
|
19
|
+
const { url: resolvedSrc, status } = useAuthedFileUrl(src);
|
|
20
|
+
|
|
21
|
+
if (status === 'error') {
|
|
22
|
+
return (
|
|
23
|
+
<div
|
|
24
|
+
className={`${className ?? ''} flex items-center justify-center bg-black/10 text-muted-foreground/50`}
|
|
25
|
+
onClick={onClick}
|
|
26
|
+
title={alt || 'Image not found'}
|
|
27
|
+
>
|
|
28
|
+
<ImageOff className="h-5 w-5" />
|
|
29
|
+
</div>
|
|
30
|
+
);
|
|
31
|
+
}
|
|
19
32
|
|
|
20
33
|
if (!resolvedSrc) {
|
|
21
34
|
return <div className={`${className ?? ''} bg-white/10 animate-pulse`} onClick={onClick} />;
|
|
@@ -18,7 +18,7 @@ export default function BlobyImageCard({ src, alt }: Props) {
|
|
|
18
18
|
// `src` may be a same-origin /api/files/* path (needs the auth token, which a native
|
|
19
19
|
// <img> can't send) or an external URL — useAuthedFileUrl only fetches+authes the
|
|
20
20
|
// former and passes external URLs through untouched (so the token never leaves origin).
|
|
21
|
-
const resolvedSrc = useAuthedFileUrl(src);
|
|
21
|
+
const { url: resolvedSrc, status } = useAuthedFileUrl(src);
|
|
22
22
|
|
|
23
23
|
const handleDownload = async () => {
|
|
24
24
|
try {
|
|
@@ -37,7 +37,7 @@ export default function BlobyImageCard({ src, alt }: Props) {
|
|
|
37
37
|
}
|
|
38
38
|
};
|
|
39
39
|
|
|
40
|
-
if (failed) {
|
|
40
|
+
if (failed || status === 'error') {
|
|
41
41
|
return (
|
|
42
42
|
<div className="my-2 flex items-center gap-2.5 px-3.5 py-2.5 rounded-xl border border-border/30 bg-black/10 text-muted-foreground/50 text-xs">
|
|
43
43
|
<ImageOff className="h-4 w-4 shrink-0" />
|
|
@@ -1,17 +1,26 @@
|
|
|
1
1
|
import { useCallback, useEffect } from 'react';
|
|
2
2
|
import { motion, AnimatePresence } from 'framer-motion';
|
|
3
|
-
import { ChevronLeft, ChevronRight, X, Download } from 'lucide-react';
|
|
3
|
+
import { ChevronLeft, ChevronRight, X, Download, ImageOff } from 'lucide-react';
|
|
4
4
|
import { authFetch } from '../../lib/auth';
|
|
5
5
|
import { useAuthedFileUrl } from '../../lib/authedFile';
|
|
6
6
|
|
|
7
|
+
/** One lightbox entry: the (possibly data:/`/api/files`) URL plus the human filename,
|
|
8
|
+
* so downloads and alt text use the real name rather than a URL stamp. */
|
|
9
|
+
export interface LightboxImage {
|
|
10
|
+
url: string;
|
|
11
|
+
name?: string;
|
|
12
|
+
}
|
|
13
|
+
|
|
7
14
|
interface Props {
|
|
8
|
-
images:
|
|
15
|
+
images: LightboxImage[];
|
|
9
16
|
index: number;
|
|
10
17
|
onClose: () => void;
|
|
11
18
|
onNavigate: (index: number) => void;
|
|
12
19
|
}
|
|
13
20
|
|
|
14
21
|
export default function ImageLightbox({ images, index, onClose, onNavigate }: Props) {
|
|
22
|
+
const current = images[index];
|
|
23
|
+
|
|
15
24
|
const goPrev = useCallback(() => {
|
|
16
25
|
if (index > 0) onNavigate(index - 1);
|
|
17
26
|
}, [index, onNavigate]);
|
|
@@ -22,7 +31,7 @@ export default function ImageLightbox({ images, index, onClose, onNavigate }: Pr
|
|
|
22
31
|
|
|
23
32
|
// /api/files/* needs the auth token, which a native <img src> can't send — resolve
|
|
24
33
|
// the currently-shown image to a blob URL fetched with the Authorization header.
|
|
25
|
-
const resolvedSrc = useAuthedFileUrl(
|
|
34
|
+
const { url: resolvedSrc, status } = useAuthedFileUrl(current?.url);
|
|
26
35
|
|
|
27
36
|
useEffect(() => {
|
|
28
37
|
const handleKey = (e: KeyboardEvent) => {
|
|
@@ -50,18 +59,18 @@ export default function ImageLightbox({ images, index, onClose, onNavigate }: Pr
|
|
|
50
59
|
onClick={async (e) => {
|
|
51
60
|
e.stopPropagation();
|
|
52
61
|
try {
|
|
53
|
-
const res = await authFetch(
|
|
62
|
+
const res = await authFetch(current.url);
|
|
54
63
|
const blob = await res.blob();
|
|
55
64
|
const url = URL.createObjectURL(blob);
|
|
56
65
|
const a = document.createElement('a');
|
|
57
66
|
a.href = url;
|
|
58
|
-
a.download =
|
|
67
|
+
a.download = current.name || current.url.split('/').pop() || 'image';
|
|
59
68
|
document.body.appendChild(a);
|
|
60
69
|
a.click();
|
|
61
70
|
document.body.removeChild(a);
|
|
62
71
|
URL.revokeObjectURL(url);
|
|
63
72
|
} catch {
|
|
64
|
-
window.open(
|
|
73
|
+
window.open(current.url, '_blank');
|
|
65
74
|
}
|
|
66
75
|
}}
|
|
67
76
|
className="p-2 rounded-full bg-white/10 hover:bg-white/20 transition-colors text-white"
|
|
@@ -105,10 +114,18 @@ export default function ImageLightbox({ images, index, onClose, onNavigate }: Pr
|
|
|
105
114
|
)}
|
|
106
115
|
|
|
107
116
|
{/* Image */}
|
|
108
|
-
{
|
|
117
|
+
{status === 'error' ? (
|
|
118
|
+
<div
|
|
119
|
+
className="flex flex-col items-center gap-2 rounded-lg bg-white/5 px-8 py-10 text-white/50"
|
|
120
|
+
onClick={(e) => e.stopPropagation()}
|
|
121
|
+
>
|
|
122
|
+
<ImageOff className="h-10 w-10" />
|
|
123
|
+
<span className="text-sm">{current?.name || 'Image not found'}</span>
|
|
124
|
+
</div>
|
|
125
|
+
) : resolvedSrc ? (
|
|
109
126
|
<img
|
|
110
127
|
src={resolvedSrc}
|
|
111
|
-
alt=
|
|
128
|
+
alt={current?.name || ''}
|
|
112
129
|
className="max-h-[85vh] max-w-[90vw] object-contain rounded-lg"
|
|
113
130
|
onClick={(e) => e.stopPropagation()}
|
|
114
131
|
/>
|