fluxy-bot 0.15.10 → 0.15.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "fluxy-bot",
3
- "version": "0.15.10",
3
+ "version": "0.15.11",
4
4
  "releaseNotes": [
5
5
  "1. react router implemented",
6
6
  "2. new workspace design",
@@ -71,6 +71,7 @@ export class ChannelManager {
71
71
  const whatsapp = new WhatsAppChannel(
72
72
  (sender, senderName, text, fromMe, isSelfChat) => this.handleInboundMessage('whatsapp', sender, senderName, text, fromMe, isSelfChat),
73
73
  (status) => this.handleStatusChange(status),
74
+ (audioBase64) => this.transcribeAudio(audioBase64),
74
75
  );
75
76
  this.providers.set('whatsapp', whatsapp);
76
77
 
@@ -91,6 +92,7 @@ export class ChannelManager {
91
92
  const whatsapp = new WhatsAppChannel(
92
93
  (sender, senderName, text, fromMe, isSelfChat) => this.handleInboundMessage('whatsapp', sender, senderName, text, fromMe, isSelfChat),
93
94
  (status) => this.handleStatusChange(status),
95
+ (audioBase64) => this.transcribeAudio(audioBase64),
94
96
  );
95
97
  this.providers.set('whatsapp', whatsapp);
96
98
  provider = whatsapp;
@@ -122,6 +124,14 @@ export class ChannelManager {
122
124
  await provider.sendMessage(to, text);
123
125
  }
124
126
 
127
+ /** Show "typing..." indicator in a chat */
128
+ startTyping(channel: ChannelType, jid: string): void {
129
+ const provider = this.providers.get(channel);
130
+ if (provider && 'startTyping' in provider) {
131
+ (provider as WhatsAppChannel).startTyping(jid);
132
+ }
133
+ }
134
+
125
135
  /** Get status of all channels */
126
136
  getStatuses(): ChannelStatus[] {
127
137
  return Array.from(this.providers.values()).map((p) => p.getStatus());
@@ -320,6 +330,9 @@ export class ChannelManager {
320
330
  // Channel context — tells the agent this is a WhatsApp message, respond naturally
321
331
  const channelContext = `[WhatsApp | ${msg.sender} | admin]\n`;
322
332
 
333
+ // Show "typing..." while the agent processes
334
+ this.startTyping(msg.channel, msg.rawSender);
335
+
323
336
  startFluxyAgentQuery(
324
337
  convId,
325
338
  channelContext + msg.text,
@@ -416,6 +429,9 @@ export class ChannelManager {
416
429
 
417
430
  this.activeAgents.set(agentKey, { sender: msg.sender, channel: msg.channel });
418
431
 
432
+ // Show "typing..." while the agent processes
433
+ this.startTyping(msg.channel, msg.rawSender);
434
+
419
435
  // Build an enriched script prompt with customer memory if available
420
436
  let enrichedScript = scriptPrompt;
421
437
  if (customerMemory && enrichedScript) {
@@ -453,6 +469,21 @@ export class ChannelManager {
453
469
  );
454
470
  }
455
471
 
472
+ /** Transcribe audio via the existing whisper endpoint */
473
+ private async transcribeAudio(audioBase64: string): Promise<string | null> {
474
+ try {
475
+ const result = await this.opts.workerApi('/api/whisper/transcribe', 'POST', { audio: audioBase64 });
476
+ if (result.error) {
477
+ log.warn(`[channels] Whisper error: ${result.error}`);
478
+ return null;
479
+ }
480
+ return result.transcript || null;
481
+ } catch (err: any) {
482
+ log.warn(`[channels] Whisper transcription failed: ${err.message}`);
483
+ return null;
484
+ }
485
+ }
486
+
456
487
  /** Load SCRIPT.md from the active skill configured for this channel */
457
488
  private loadActiveScript(channelConfig: ChannelConfig): string | undefined {
458
489
  const skillName = channelConfig.skill;
@@ -7,6 +7,7 @@ import makeWASocket, {
7
7
  useMultiFileAuthState,
8
8
  makeCacheableSignalKeyStore,
9
9
  fetchLatestWaWebVersion,
10
+ downloadMediaMessage,
10
11
  DisconnectReason,
11
12
  Browsers,
12
13
  type WASocket,
@@ -25,6 +26,9 @@ const AUTH_DIR = path.join(DATA_DIR, 'channels', 'whatsapp', 'auth');
25
26
  /** Callback when a new message arrives */
26
27
  export type OnWhatsAppMessage = (sender: string, senderName: string | undefined, text: string, fromMe: boolean, isSelfChat: boolean) => void;
27
28
 
29
+ /** Callback to transcribe audio via whisper */
30
+ export type TranscribeFn = (audioBase64: string) => Promise<string | null>;
31
+
28
32
  export class WhatsAppChannel implements ChannelProvider {
29
33
  readonly type: ChannelType = 'whatsapp';
30
34
 
@@ -34,9 +38,16 @@ export class WhatsAppChannel implements ChannelProvider {
34
38
  private qrSvg: string | null = null;
35
39
  private onMessage: OnWhatsAppMessage;
36
40
  private onStatusChange: (status: ChannelStatus) => void;
41
+ private transcribe: TranscribeFn | null = null;
37
42
  private reconnectTimer: ReturnType<typeof setTimeout> | null = null;
38
43
  private intentionalDisconnect = false;
39
44
 
45
+ /** IDs of messages we sent — used to prevent echo loops */
46
+ private sentMessageIds = new Set<string>();
47
+ private readonly MAX_SENT_IDS = 100;
48
+ /** Active typing indicator intervals per chat JID */
49
+ private typingIntervals = new Map<string, ReturnType<typeof setInterval>>();
50
+
40
51
  /** Maps LID JIDs to phone JIDs (WhatsApp uses LIDs internally for self-chat) */
41
52
  private lidToPhoneMap = new Map<string, string>();
42
53
  /** Our own phone JID (number@s.whatsapp.net) */
@@ -45,9 +56,11 @@ export class WhatsAppChannel implements ChannelProvider {
45
56
  constructor(
46
57
  onMessage: OnWhatsAppMessage,
47
58
  onStatusChange: (status: ChannelStatus) => void,
59
+ transcribe?: TranscribeFn,
48
60
  ) {
49
61
  this.onMessage = onMessage;
50
62
  this.onStatusChange = onStatusChange;
63
+ this.transcribe = transcribe || null;
51
64
  }
52
65
 
53
66
  async connect(): Promise<void> {
@@ -65,6 +78,9 @@ export class WhatsAppChannel implements ChannelProvider {
65
78
  this.sock.end(undefined);
66
79
  this.sock = null;
67
80
  }
81
+ // Clear all typing intervals
82
+ for (const interval of this.typingIntervals.values()) clearInterval(interval);
83
+ this.typingIntervals.clear();
68
84
  this.connected = false;
69
85
  this.qrData = null;
70
86
  this.qrSvg = null;
@@ -78,8 +94,52 @@ export class WhatsAppChannel implements ChannelProvider {
78
94
  }
79
95
  // Normalize: ensure JID format (number@s.whatsapp.net)
80
96
  const jid = to.includes('@') ? to : `${to.replace(/[^0-9]/g, '')}@s.whatsapp.net`;
81
- await this.sock.sendMessage(jid, { text });
82
- log.info(`[whatsapp] Sent message to ${jid}`);
97
+
98
+ // Clear typing indicator before sending
99
+ this.stopTyping(jid);
100
+
101
+ const result = await this.sock.sendMessage(jid, { text });
102
+
103
+ // Track sent message ID to prevent echo loops
104
+ if (result?.key?.id) {
105
+ this.trackSentId(result.key.id);
106
+ }
107
+
108
+ log.info(`[whatsapp] Sent message to ${jid} (id=${result?.key?.id || 'unknown'})`);
109
+ }
110
+
111
+ /** Show "typing..." indicator in a chat. Re-sends every 20s to keep it visible. */
112
+ startTyping(jid: string): void {
113
+ if (!this.sock || !this.connected) return;
114
+ // Clear any existing interval for this chat
115
+ this.stopTyping(jid);
116
+
117
+ const send = () => {
118
+ this.sock?.sendPresenceUpdate('composing', jid).catch(() => {});
119
+ };
120
+
121
+ send(); // immediate
122
+ this.typingIntervals.set(jid, setInterval(send, 20_000)); // refresh every 20s
123
+ }
124
+
125
+ /** Clear "typing..." indicator in a chat */
126
+ stopTyping(jid: string): void {
127
+ const interval = this.typingIntervals.get(jid);
128
+ if (interval) {
129
+ clearInterval(interval);
130
+ this.typingIntervals.delete(jid);
131
+ }
132
+ this.sock?.sendPresenceUpdate('paused', jid).catch(() => {});
133
+ }
134
+
135
+ /** Track a sent message ID, evicting oldest when at capacity */
136
+ private trackSentId(id: string) {
137
+ this.sentMessageIds.add(id);
138
+ if (this.sentMessageIds.size > this.MAX_SENT_IDS) {
139
+ // Delete the first (oldest) entry
140
+ const first = this.sentMessageIds.values().next().value;
141
+ if (first) this.sentMessageIds.delete(first);
142
+ }
83
143
  }
84
144
 
85
145
  getStatus(): ChannelStatus {
@@ -206,6 +266,8 @@ export class WhatsAppChannel implements ChannelProvider {
206
266
  this.qrData = null;
207
267
  this.qrSvg = null;
208
268
  this.buildLidMap();
269
+ // Set presence to unavailable so the phone doesn't show "online" constantly
270
+ sock.sendPresenceUpdate('unavailable').catch(() => {});
209
271
  log.ok(`[whatsapp] Connected as ${sock.user?.id}`);
210
272
  this.emitStatus();
211
273
  }
@@ -244,18 +306,47 @@ export class WhatsAppChannel implements ChannelProvider {
244
306
  if (msg.key.remoteJid === 'status@broadcast') continue;
245
307
  if (!msg.message) continue;
246
308
 
247
- // Extract text from various message types
248
- const text = this.extractText(msg.message);
249
- if (!text) continue;
309
+ // Echo prevention: skip messages we sent ourselves
310
+ if (msg.key.id && this.sentMessageIds.has(msg.key.id)) {
311
+ this.sentMessageIds.delete(msg.key.id);
312
+ continue;
313
+ }
314
+
315
+ // Extract text — or transcribe audio if it's a voice note
316
+ let rawText = this.extractText(msg.message);
317
+
318
+ if (!rawText && this.isAudioMessage(msg.message)) {
319
+ // Voice note / audio — download and transcribe
320
+ if (!this.transcribe) {
321
+ log.info('[whatsapp] Audio message received but no transcribe function configured — skipping');
322
+ continue;
323
+ }
324
+ try {
325
+ const buffer = await downloadMediaMessage(msg, 'buffer', {}) as Buffer;
326
+ const base64 = buffer.toString('base64');
327
+ log.info(`[whatsapp] Transcribing audio (${Math.round(buffer.length / 1024)}KB)...`);
328
+ const transcript = await this.transcribe(base64);
329
+ if (!transcript) {
330
+ log.warn('[whatsapp] Transcription returned empty — skipping');
331
+ continue;
332
+ }
333
+ rawText = transcript;
334
+ log.info(`[whatsapp] Transcribed: "${rawText.slice(0, 80)}"`);
335
+ } catch (err: any) {
336
+ log.warn(`[whatsapp] Audio transcription failed: ${err.message}`);
337
+ continue;
338
+ }
339
+ }
340
+
341
+ if (!rawText) continue;
342
+
343
+ // Escape special characters to prevent prompt injection via message content
344
+ const text = this.escapeMessageText(rawText);
250
345
 
251
346
  const fromMe = msg.key.fromMe || false;
252
347
  const rawSender = msg.key.remoteJid || '';
253
348
  const participant = msg.key.participant || '';
254
349
 
255
- // Debug: log all available sender fields to find the actual sender
256
- log.info(`[whatsapp] DEBUG msg.key: remoteJid=${rawSender} fromMe=${fromMe} participant=${participant}`);
257
- log.info(`[whatsapp] DEBUG pushName=${msg.pushName || 'none'} verifiedBizName=${(msg as any).verifiedBizName || 'none'}`);
258
-
259
350
  // The actual sender JID — use participant if available (newer protocol), fallback to remoteJid
260
351
  const actualSender = participant || rawSender;
261
352
 
@@ -302,6 +393,25 @@ export class WhatsAppChannel implements ChannelProvider {
302
393
  return null;
303
394
  }
304
395
 
396
+ /** Check if a message contains audio (voice note or audio file) */
397
+ private isAudioMessage(message: any): boolean {
398
+ if (!message) return false;
399
+ if (message.audioMessage) return true;
400
+ // Check inside wrappers
401
+ if (message.viewOnceMessage?.message?.audioMessage) return true;
402
+ if (message.viewOnceMessageV2?.message?.audioMessage) return true;
403
+ if (message.ephemeralMessage?.message?.audioMessage) return true;
404
+ return false;
405
+ }
406
+
407
+ /** Escape message text to prevent prompt injection via special characters */
408
+ private escapeMessageText(text: string): string {
409
+ return text
410
+ .replace(/\[WhatsApp\s*\|/gi, '(WhatsApp|') // prevent faking channel context tags
411
+ .replace(/\[Telegram\s*\|/gi, '(Telegram|')
412
+ .replace(/\[\s*(admin|customer)\s*\]/gi, '($1)'); // prevent faking role tags
413
+ }
414
+
305
415
  private emitStatus() {
306
416
  this.onStatusChange(this.getStatus());
307
417
  }