bloby-bot 0.40.0 → 0.41.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bloby-bot",
3
- "version": "0.40.0",
3
+ "version": "0.41.0",
4
4
  "releaseNotes": [
5
5
  "1. # voice note (PTT bubble)",
6
6
  "2. # audio file + caption",
@@ -116,6 +116,7 @@ export class ChannelManager {
116
116
  },
117
117
  (status) => this.handleStatusChange(status),
118
118
  (audioBase64) => this.transcribeAudio(audioBase64),
119
+ (fromMe, isSelfChat, isGroup) => this.shouldProcessWhatsAppAudio(fromMe, isSelfChat, isGroup),
119
120
  );
120
121
  this.providers.set('whatsapp', whatsapp);
121
122
 
@@ -140,6 +141,7 @@ export class ChannelManager {
140
141
  },
141
142
  (status) => this.handleStatusChange(status),
142
143
  (audioBase64) => this.transcribeAudio(audioBase64),
144
+ (fromMe, isSelfChat, isGroup) => this.shouldProcessWhatsAppAudio(fromMe, isSelfChat, isGroup),
143
145
  );
144
146
  this.providers.set('whatsapp', whatsapp);
145
147
  provider = whatsapp;
@@ -419,6 +421,35 @@ export class ChannelManager {
419
421
  return config.channels?.[channel];
420
422
  }
421
423
 
424
+ /** Decide whether an inbound WhatsApp audio is worth transcribing.
425
+ * Mirrors the gates in handleInboundMessage so we don't burn Whisper calls
426
+ * (or, worse, leak the bot via "Whisper not enabled" replies) on messages
427
+ * that would be filtered out anyway.
428
+ *
429
+ * Audio carries no `@bloby` text trigger, so in assistant mode we only
430
+ * transcribe when the audio is admin's self-chat command. */
431
+ private shouldProcessWhatsAppAudio(fromMe: boolean, isSelfChat: boolean, isGroup: boolean): boolean {
432
+ const channelConfig = this.getChannelConfig('whatsapp');
433
+ if (!channelConfig) return false;
434
+
435
+ const mode = channelConfig.mode || 'channel';
436
+
437
+ // Group gating mirrors handleInboundMessage.
438
+ if (isGroup) {
439
+ if (mode === 'channel') return false;
440
+ if (!channelConfig.allowGroups) return false;
441
+ }
442
+
443
+ if (mode === 'channel') return fromMe && isSelfChat;
444
+ if (mode === 'assistant') return fromMe && isSelfChat;
445
+ if (mode === 'business') {
446
+ // Outbound non-self-chat messages are filtered out — same as handleInboundMessage.
447
+ if (fromMe && !isSelfChat) return false;
448
+ return true;
449
+ }
450
+ return false;
451
+ }
452
+
422
453
  /** Handle an incoming message from any channel — debounces rapid messages from the same sender.
423
454
  *
424
455
  * Per-mode behavior is decided here. To add a new mode: extend the gating block below
@@ -48,6 +48,16 @@ export type OnWhatsAppMessage = (
48
48
  /** Callback to transcribe audio via whisper */
49
49
  export type TranscribeFn = (audioBase64: string) => Promise<string | null>;
50
50
 
51
+ /** Callback that decides whether an audio message warrants transcription.
52
+ * Returning false makes the channel silently skip the audio (no Whisper call,
53
+ * no "Whisper not enabled" reply) — used to avoid leaking the bot in modes
54
+ * where the message would be filtered out downstream anyway. */
55
+ export type ShouldTranscribeAudioFn = (
56
+ fromMe: boolean,
57
+ isSelfChat: boolean,
58
+ isGroup: boolean,
59
+ ) => boolean;
60
+
51
61
  export class WhatsAppChannel implements ChannelProvider {
52
62
  readonly type: ChannelType = 'whatsapp';
53
63
 
@@ -58,6 +68,7 @@ export class WhatsAppChannel implements ChannelProvider {
58
68
  private onMessage: OnWhatsAppMessage;
59
69
  private onStatusChange: (status: ChannelStatus) => void;
60
70
  private transcribe: TranscribeFn | null = null;
71
+ private shouldTranscribeAudio: ShouldTranscribeAudioFn | null = null;
61
72
  private reconnectTimer: ReturnType<typeof setTimeout> | null = null;
62
73
  private intentionalDisconnect = false;
63
74
 
@@ -76,10 +87,12 @@ export class WhatsAppChannel implements ChannelProvider {
76
87
  onMessage: OnWhatsAppMessage,
77
88
  onStatusChange: (status: ChannelStatus) => void,
78
89
  transcribe?: TranscribeFn,
90
+ shouldTranscribeAudio?: ShouldTranscribeAudioFn,
79
91
  ) {
80
92
  this.onMessage = onMessage;
81
93
  this.onStatusChange = onStatusChange;
82
94
  this.transcribe = transcribe || null;
95
+ this.shouldTranscribeAudio = shouldTranscribeAudio || null;
83
96
  }
84
97
 
85
98
  async connect(): Promise<void> {
@@ -441,6 +454,29 @@ export class WhatsAppChannel implements ChannelProvider {
441
454
  continue;
442
455
  }
443
456
 
457
+ // Resolve sender/chat identity up front so audio gating can consult mode/role.
458
+ const fromMe = msg.key.fromMe || false;
459
+ const rawSender = msg.key.remoteJid || '';
460
+ const participant = msg.key.participant || '';
461
+ const isGroup = rawSender.endsWith('@g.us');
462
+
463
+ // chatJid: where to reply (group JID for groups, peer JID otherwise).
464
+ const chatJid = rawSender;
465
+
466
+ // The actual sender JID:
467
+ // - groups: always `participant` (remoteJid is the group)
468
+ // - 1:1: `participant` if Baileys provided one (newer protocol), else remoteJid
469
+ const actualSender = isGroup
470
+ ? participant || rawSender
471
+ : (participant || rawSender);
472
+
473
+ // Translate LID JIDs to phone JIDs (only handles our own LID)
474
+ const sender = this.translateJid(actualSender);
475
+ const pushName = msg.pushName || undefined;
476
+
477
+ // Self-chat: only meaningful for 1:1 — remoteJid is our own number AND no participant.
478
+ const isSelfChat = !isGroup && !participant && this.ownPhoneJid !== null && this.translateJid(rawSender) === this.ownPhoneJid;
479
+
444
480
  // Extract text — or transcribe audio if it's a voice note
445
481
  let rawText = this.extractText(msg.message);
446
482
  const images: WhatsAppImageAttachment[] = [];
@@ -459,6 +495,13 @@ export class WhatsAppChannel implements ChannelProvider {
459
495
  }
460
496
 
461
497
  if (!rawText && this.isAudioMessage(msg.message)) {
498
+ // Mode-aware gate: don't transcribe (and don't reveal the bot with a
499
+ // "Whisper not enabled" reply) when the message would be filtered out
500
+ // downstream — e.g. a friend's voice note in assistant mode.
501
+ if (this.shouldTranscribeAudio && !this.shouldTranscribeAudio(fromMe, isSelfChat, isGroup)) {
502
+ log.info(`[whatsapp] Audio skipped by mode gate (fromMe=${fromMe}, selfChat=${isSelfChat}, group=${isGroup})`);
503
+ continue;
504
+ }
462
505
  // Voice note / audio — download and transcribe
463
506
  if (!this.transcribe) {
464
507
  log.info('[whatsapp] Audio message received but no transcribe function configured — skipping');
@@ -494,28 +537,6 @@ export class WhatsAppChannel implements ChannelProvider {
494
537
  // Escape special characters to prevent prompt injection via message content
495
538
  const text = this.escapeMessageText(rawText);
496
539
 
497
- const fromMe = msg.key.fromMe || false;
498
- const rawSender = msg.key.remoteJid || '';
499
- const participant = msg.key.participant || '';
500
- const isGroup = rawSender.endsWith('@g.us');
501
-
502
- // chatJid: where to reply (group JID for groups, peer JID otherwise).
503
- const chatJid = rawSender;
504
-
505
- // The actual sender JID:
506
- // - groups: always `participant` (remoteJid is the group)
507
- // - 1:1: `participant` if Baileys provided one (newer protocol), else remoteJid
508
- const actualSender = isGroup
509
- ? participant || rawSender
510
- : (participant || rawSender);
511
-
512
- // Translate LID JIDs to phone JIDs (only handles our own LID)
513
- const sender = this.translateJid(actualSender);
514
- const pushName = msg.pushName || undefined;
515
-
516
- // Self-chat: only meaningful for 1:1 — remoteJid is our own number AND no participant.
517
- const isSelfChat = !isGroup && !participant && this.ownPhoneJid !== null && this.translateJid(rawSender) === this.ownPhoneJid;
518
-
519
540
  log.info(`[whatsapp] Message from ${sender} (chat=${chatJid}, group=${isGroup}, fromMe=${fromMe}, selfChat=${isSelfChat}, images=${images.length}): ${text.slice(0, 80)}`);
520
541
 
521
542
  this.onMessage(sender, pushName, text, fromMe, isSelfChat, chatJid, isGroup, images.length > 0 ? images : undefined);