bloby-bot 0.40.0 → 0.41.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -116,6 +116,7 @@ export class ChannelManager {
|
|
|
116
116
|
},
|
|
117
117
|
(status) => this.handleStatusChange(status),
|
|
118
118
|
(audioBase64) => this.transcribeAudio(audioBase64),
|
|
119
|
+
(fromMe, isSelfChat, isGroup) => this.shouldProcessWhatsAppAudio(fromMe, isSelfChat, isGroup),
|
|
119
120
|
);
|
|
120
121
|
this.providers.set('whatsapp', whatsapp);
|
|
121
122
|
|
|
@@ -140,6 +141,7 @@ export class ChannelManager {
|
|
|
140
141
|
},
|
|
141
142
|
(status) => this.handleStatusChange(status),
|
|
142
143
|
(audioBase64) => this.transcribeAudio(audioBase64),
|
|
144
|
+
(fromMe, isSelfChat, isGroup) => this.shouldProcessWhatsAppAudio(fromMe, isSelfChat, isGroup),
|
|
143
145
|
);
|
|
144
146
|
this.providers.set('whatsapp', whatsapp);
|
|
145
147
|
provider = whatsapp;
|
|
@@ -419,6 +421,35 @@ export class ChannelManager {
|
|
|
419
421
|
return config.channels?.[channel];
|
|
420
422
|
}
|
|
421
423
|
|
|
424
|
+
/** Decide whether an inbound WhatsApp audio is worth transcribing.
|
|
425
|
+
* Mirrors the gates in handleInboundMessage so we don't burn Whisper calls
|
|
426
|
+
* (or, worse, leak the bot via "Whisper not enabled" replies) on messages
|
|
427
|
+
* that would be filtered out anyway.
|
|
428
|
+
*
|
|
429
|
+
* Audio carries no `@bloby` text trigger, so in assistant mode we only
|
|
430
|
+
* transcribe when the audio is admin's self-chat command. */
|
|
431
|
+
private shouldProcessWhatsAppAudio(fromMe: boolean, isSelfChat: boolean, isGroup: boolean): boolean {
|
|
432
|
+
const channelConfig = this.getChannelConfig('whatsapp');
|
|
433
|
+
if (!channelConfig) return false;
|
|
434
|
+
|
|
435
|
+
const mode = channelConfig.mode || 'channel';
|
|
436
|
+
|
|
437
|
+
// Group gating mirrors handleInboundMessage.
|
|
438
|
+
if (isGroup) {
|
|
439
|
+
if (mode === 'channel') return false;
|
|
440
|
+
if (!channelConfig.allowGroups) return false;
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
if (mode === 'channel') return fromMe && isSelfChat;
|
|
444
|
+
if (mode === 'assistant') return fromMe && isSelfChat;
|
|
445
|
+
if (mode === 'business') {
|
|
446
|
+
// Outbound non-self-chat messages are filtered out — same as handleInboundMessage.
|
|
447
|
+
if (fromMe && !isSelfChat) return false;
|
|
448
|
+
return true;
|
|
449
|
+
}
|
|
450
|
+
return false;
|
|
451
|
+
}
|
|
452
|
+
|
|
422
453
|
/** Handle an incoming message from any channel — debounces rapid messages from the same sender.
|
|
423
454
|
*
|
|
424
455
|
* Per-mode behavior is decided here. To add a new mode: extend the gating block below
|
|
@@ -48,6 +48,16 @@ export type OnWhatsAppMessage = (
|
|
|
48
48
|
/** Callback to transcribe audio via whisper */
|
|
49
49
|
export type TranscribeFn = (audioBase64: string) => Promise<string | null>;
|
|
50
50
|
|
|
51
|
+
/** Callback that decides whether an audio message warrants transcription.
|
|
52
|
+
* Returning false makes the channel silently skip the audio (no Whisper call,
|
|
53
|
+
* no "Whisper not enabled" reply) — used to avoid leaking the bot in modes
|
|
54
|
+
* where the message would be filtered out downstream anyway. */
|
|
55
|
+
export type ShouldTranscribeAudioFn = (
|
|
56
|
+
fromMe: boolean,
|
|
57
|
+
isSelfChat: boolean,
|
|
58
|
+
isGroup: boolean,
|
|
59
|
+
) => boolean;
|
|
60
|
+
|
|
51
61
|
export class WhatsAppChannel implements ChannelProvider {
|
|
52
62
|
readonly type: ChannelType = 'whatsapp';
|
|
53
63
|
|
|
@@ -58,6 +68,7 @@ export class WhatsAppChannel implements ChannelProvider {
|
|
|
58
68
|
private onMessage: OnWhatsAppMessage;
|
|
59
69
|
private onStatusChange: (status: ChannelStatus) => void;
|
|
60
70
|
private transcribe: TranscribeFn | null = null;
|
|
71
|
+
private shouldTranscribeAudio: ShouldTranscribeAudioFn | null = null;
|
|
61
72
|
private reconnectTimer: ReturnType<typeof setTimeout> | null = null;
|
|
62
73
|
private intentionalDisconnect = false;
|
|
63
74
|
|
|
@@ -76,10 +87,12 @@ export class WhatsAppChannel implements ChannelProvider {
|
|
|
76
87
|
onMessage: OnWhatsAppMessage,
|
|
77
88
|
onStatusChange: (status: ChannelStatus) => void,
|
|
78
89
|
transcribe?: TranscribeFn,
|
|
90
|
+
shouldTranscribeAudio?: ShouldTranscribeAudioFn,
|
|
79
91
|
) {
|
|
80
92
|
this.onMessage = onMessage;
|
|
81
93
|
this.onStatusChange = onStatusChange;
|
|
82
94
|
this.transcribe = transcribe || null;
|
|
95
|
+
this.shouldTranscribeAudio = shouldTranscribeAudio || null;
|
|
83
96
|
}
|
|
84
97
|
|
|
85
98
|
async connect(): Promise<void> {
|
|
@@ -441,6 +454,29 @@ export class WhatsAppChannel implements ChannelProvider {
|
|
|
441
454
|
continue;
|
|
442
455
|
}
|
|
443
456
|
|
|
457
|
+
// Resolve sender/chat identity up front so audio gating can consult mode/role.
|
|
458
|
+
const fromMe = msg.key.fromMe || false;
|
|
459
|
+
const rawSender = msg.key.remoteJid || '';
|
|
460
|
+
const participant = msg.key.participant || '';
|
|
461
|
+
const isGroup = rawSender.endsWith('@g.us');
|
|
462
|
+
|
|
463
|
+
// chatJid: where to reply (group JID for groups, peer JID otherwise).
|
|
464
|
+
const chatJid = rawSender;
|
|
465
|
+
|
|
466
|
+
// The actual sender JID:
|
|
467
|
+
// - groups: always `participant` (remoteJid is the group)
|
|
468
|
+
// - 1:1: `participant` if Baileys provided one (newer protocol), else remoteJid
|
|
469
|
+
const actualSender = isGroup
|
|
470
|
+
? participant || rawSender
|
|
471
|
+
: (participant || rawSender);
|
|
472
|
+
|
|
473
|
+
// Translate LID JIDs to phone JIDs (only handles our own LID)
|
|
474
|
+
const sender = this.translateJid(actualSender);
|
|
475
|
+
const pushName = msg.pushName || undefined;
|
|
476
|
+
|
|
477
|
+
// Self-chat: only meaningful for 1:1 — remoteJid is our own number AND no participant.
|
|
478
|
+
const isSelfChat = !isGroup && !participant && this.ownPhoneJid !== null && this.translateJid(rawSender) === this.ownPhoneJid;
|
|
479
|
+
|
|
444
480
|
// Extract text — or transcribe audio if it's a voice note
|
|
445
481
|
let rawText = this.extractText(msg.message);
|
|
446
482
|
const images: WhatsAppImageAttachment[] = [];
|
|
@@ -459,6 +495,13 @@ export class WhatsAppChannel implements ChannelProvider {
|
|
|
459
495
|
}
|
|
460
496
|
|
|
461
497
|
if (!rawText && this.isAudioMessage(msg.message)) {
|
|
498
|
+
// Mode-aware gate: don't transcribe (and don't reveal the bot with a
|
|
499
|
+
// "Whisper not enabled" reply) when the message would be filtered out
|
|
500
|
+
// downstream — e.g. a friend's voice note in assistant mode.
|
|
501
|
+
if (this.shouldTranscribeAudio && !this.shouldTranscribeAudio(fromMe, isSelfChat, isGroup)) {
|
|
502
|
+
log.info(`[whatsapp] Audio skipped by mode gate (fromMe=${fromMe}, selfChat=${isSelfChat}, group=${isGroup})`);
|
|
503
|
+
continue;
|
|
504
|
+
}
|
|
462
505
|
// Voice note / audio — download and transcribe
|
|
463
506
|
if (!this.transcribe) {
|
|
464
507
|
log.info('[whatsapp] Audio message received but no transcribe function configured — skipping');
|
|
@@ -494,28 +537,6 @@ export class WhatsAppChannel implements ChannelProvider {
|
|
|
494
537
|
// Escape special characters to prevent prompt injection via message content
|
|
495
538
|
const text = this.escapeMessageText(rawText);
|
|
496
539
|
|
|
497
|
-
const fromMe = msg.key.fromMe || false;
|
|
498
|
-
const rawSender = msg.key.remoteJid || '';
|
|
499
|
-
const participant = msg.key.participant || '';
|
|
500
|
-
const isGroup = rawSender.endsWith('@g.us');
|
|
501
|
-
|
|
502
|
-
// chatJid: where to reply (group JID for groups, peer JID otherwise).
|
|
503
|
-
const chatJid = rawSender;
|
|
504
|
-
|
|
505
|
-
// The actual sender JID:
|
|
506
|
-
// - groups: always `participant` (remoteJid is the group)
|
|
507
|
-
// - 1:1: `participant` if Baileys provided one (newer protocol), else remoteJid
|
|
508
|
-
const actualSender = isGroup
|
|
509
|
-
? participant || rawSender
|
|
510
|
-
: (participant || rawSender);
|
|
511
|
-
|
|
512
|
-
// Translate LID JIDs to phone JIDs (only handles our own LID)
|
|
513
|
-
const sender = this.translateJid(actualSender);
|
|
514
|
-
const pushName = msg.pushName || undefined;
|
|
515
|
-
|
|
516
|
-
// Self-chat: only meaningful for 1:1 — remoteJid is our own number AND no participant.
|
|
517
|
-
const isSelfChat = !isGroup && !participant && this.ownPhoneJid !== null && this.translateJid(rawSender) === this.ownPhoneJid;
|
|
518
|
-
|
|
519
540
|
log.info(`[whatsapp] Message from ${sender} (chat=${chatJid}, group=${isGroup}, fromMe=${fromMe}, selfChat=${isSelfChat}, images=${images.length}): ${text.slice(0, 80)}`);
|
|
520
541
|
|
|
521
542
|
this.onMessage(sender, pushName, text, fromMe, isSelfChat, chatJid, isGroup, images.length > 0 ? images : undefined);
|