whatsapp-pi 1.0.67 → 1.0.69
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -21
- package/package.json +4 -2
- package/src/i18n.ts +14 -0
- package/src/services/audio.service.ts +133 -90
- package/src/services/storage-path.ts +18 -12
- package/src/services/whisper-cpp-audio.transcriber.ts +179 -0
- package/whatsapp-pi.ts +15 -14
package/README.md
CHANGED
|
@@ -26,7 +26,7 @@ Pi is a powerful agentic AI coding assistant that operates in your terminal. Thi
|
|
|
26
26
|
- **Group-Only Mode**: Bind the agent to a single WhatsApp group with `--whatsapp-group`
|
|
27
27
|
- **Media Support**:
|
|
28
28
|
- **Vision Analysis**: Automatically forwards WhatsApp images to Pi for analysis.
|
|
29
|
-
- **Audio Transcription**: Transcribes voice notes
|
|
29
|
+
- **Audio Transcription**: Transcribes voice notes locally with Whisper.cpp (`whisper-cpp-node`); `ffmpeg` is used to convert WhatsApp audio to 16 kHz mono WAV first.
|
|
30
30
|
- **Document Handling**: Downloads and stores documents (PDF, text) for agent access; PDFs include a bounded text preview when readable.
|
|
31
31
|
|
|
32
32
|
## Prerequisites
|
|
@@ -59,11 +59,15 @@ See the [Pi documentation](https://pi.dev/docs/latest) for full setup, providers
|
|
|
59
59
|
|
|
60
60
|
### Audio Transcription
|
|
61
61
|
|
|
62
|
-
|
|
62
|
+
Audio transcription uses `whisper-cpp-node` and `ffmpeg`.
|
|
63
|
+
|
|
64
|
+
Install dependencies:
|
|
63
65
|
```bash
|
|
64
|
-
|
|
66
|
+
npm install
|
|
65
67
|
```
|
|
66
68
|
|
|
69
|
+
Make sure `ffmpeg` is available in PATH.
|
|
70
|
+
|
|
67
71
|
PDF documents are parsed locally and do not require extra system utilities.
|
|
68
72
|
If a PDF cannot be parsed automatically, it is still saved and forwarded with a clear fallback notice.
|
|
69
73
|
|
|
@@ -109,7 +113,7 @@ npm install
|
|
|
109
113
|
pi -e whatsapp-pi.ts
|
|
110
114
|
```
|
|
111
115
|
|
|
112
|
-
For verbose mode (shows Baileys trace logs for debugging):
|
|
116
|
+
For verbose mode (shows Baileys trace logs and audio timing logs for debugging):
|
|
113
117
|
```bash
|
|
114
118
|
pi -e whatsapp-pi.ts --verbose
|
|
115
119
|
```
|
|
@@ -138,7 +142,7 @@ The extension registers the following tools that the Pi agent can call:
|
|
|
138
142
|
| `get_wa_conversation_history` | read-only | Get the most recent messages with a given `senderNumber` (accepts `+E164`, raw digits, or a JID). Supports `limit`. |
|
|
139
143
|
| `check_wa_new_messages` | read-only | List conversations whose most recent message is incoming (i.e. waiting for a reply). Supports `sinceTimestamp` (ms epoch). |
|
|
140
144
|
|
|
141
|
-
The three read-only tools query the local recents store at `~/.pi/agent/
|
|
145
|
+
The three read-only tools query the local recents store at `~/.pi/agent/extensions/whatsapp-pi/recents/recents.json`. They never touch the network and do not mark messages as read.
|
|
142
146
|
|
|
143
147
|
## WhatsApp Numbers and JIDs
|
|
144
148
|
|
|
@@ -168,8 +172,6 @@ The three read-only tools query the local recents store at `~/.pi/agent/extensio
|
|
|
168
172
|
- **Add Group** - Add a WhatsApp group JID to the allowed groups list (format: 120363012345@g.us)
|
|
169
173
|
- **Select a group** - Open a submenu with **History**, **Send Message**, **Print Group JID**, **Add Alias**, **Remove Alias**, **Remove Group**, and **Back**
|
|
170
174
|
|
|
171
|
-
- **Back** - Return to main menu
|
|
172
|
-
|
|
173
175
|
### Recents Management
|
|
174
176
|
- **History** - Open full message history for that conversation
|
|
175
177
|
- **Send Message** - Send a new message without Pi suffix
|
|
@@ -187,7 +189,6 @@ Send these commands directly in WhatsApp to control the agent session:
|
|
|
187
189
|
|
|
188
190
|
```
|
|
189
191
|
src/
|
|
190
|
-
├── models/ # Type definitions
|
|
191
192
|
├── services/ # Core services (WhatsApp, Session, Recents, Media)
|
|
192
193
|
└── ui/ # Menu handlers and TUI views
|
|
193
194
|
|
|
@@ -202,17 +203,10 @@ Run tests:
|
|
|
202
203
|
npm test
|
|
203
204
|
```
|
|
204
205
|
|
|
205
|
-
##
|
|
206
|
-
|
|
207
|
-
### Recent Feature Updates (2026-05)
|
|
206
|
+
## Notes
|
|
208
207
|
|
|
209
|
-
-
|
|
210
|
-
-
|
|
211
|
-
-
|
|
212
|
-
-
|
|
213
|
-
-
|
|
214
|
-
- **Media Support**: Images are forwarded for vision analysis, audio is transcribed with Whisper, and PDFs are saved under `./.pi-data/whatsapp/documents/` with local text preview when available.
|
|
215
|
-
- **Session Handling**: Saved state, allow list, and startup reconnects are restored automatically when available.
|
|
216
|
-
- **Intelligent Message Filtering**: Messages ending with `π` are ignored to prevent bot loops.
|
|
217
|
-
- **Storage Management**: Persistent data lives under `.pi-data/` plus the recents store in the user home directory.
|
|
218
|
-
- **Improved Test Coverage (v1.0.59)**: Added unit tests for the `message_end` auto-reply handler, covering the happy path, disconnected guard, role guard, send failure, thrown exceptions, and the `send_wa_message` dedup flag. Fixed a Windows path separator bug in the recents service test suite.
|
|
208
|
+
- `--whatsapp-pi-online` auto-connects when credentials already exist.
|
|
209
|
+
- `--whatsapp-group <jid>` binds Pi to one WhatsApp group.
|
|
210
|
+
- Media handling is local: images for vision, audio via Whisper.cpp + ffmpeg, documents stored under `.pi-data/whatsapp/documents/`.
|
|
211
|
+
- Recents/history live in `~/.pi/agent/extensions/whatsapp-pi/recents/recents.json`.
|
|
212
|
+
- Session state, allow lists, and startup reconnects are persisted locally.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "whatsapp-pi",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.69",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "WhatsApp integration extension for Pi",
|
|
6
6
|
"main": "whatsapp-pi.ts",
|
|
@@ -33,9 +33,11 @@
|
|
|
33
33
|
},
|
|
34
34
|
"dependencies": {
|
|
35
35
|
"@llamaindex/liteparse": "^1.5.3",
|
|
36
|
+
"@sinclair/typebox": "^0.27.10",
|
|
36
37
|
"baileys": "^6.7.21",
|
|
37
38
|
"pino": "^10.3.1",
|
|
38
|
-
"qrcode-terminal": "^0.12.0"
|
|
39
|
+
"qrcode-terminal": "^0.12.0",
|
|
40
|
+
"whisper-cpp-node": "^0.2.12"
|
|
39
41
|
},
|
|
40
42
|
"devDependencies": {
|
|
41
43
|
"@eslint/js": "^9.39.4",
|
package/src/i18n.ts
CHANGED
|
@@ -220,6 +220,13 @@ const fallback = {
|
|
|
220
220
|
"incoming.media.documentDownloadFailed": "[WhatsApp-Pi] Failed to download document:",
|
|
221
221
|
"incoming.media.documentDownloadFailedText": "[Document: {fileName} (download failed)]",
|
|
222
222
|
"audio.emptyTranscription": "[Empty transcription]",
|
|
223
|
+
"audio.phase.download": "download",
|
|
224
|
+
"audio.phase.write": "write file",
|
|
225
|
+
"audio.phase.convert": "convert",
|
|
226
|
+
"audio.phase.whisper": "whisper",
|
|
227
|
+
"audio.phase.read": "read transcription",
|
|
228
|
+
"audio.phase.total": "total",
|
|
229
|
+
"audio.phaseTiming": "[WhatsApp-Pi] Audio {phase}: {duration} ms",
|
|
223
230
|
"audio.transcriptionError": "[AudioService] Transcription error:",
|
|
224
231
|
"audio.transcriptionErrorResult": "[Transcription error: {error}]",
|
|
225
232
|
"baileys.filter.failedDecrypt": "Failed to decrypt message with any known session",
|
|
@@ -393,6 +400,13 @@ const translations: Record<Locale, Partial<Record<Key, string>>> = {
|
|
|
393
400
|
"session.manager.failedDeleteAuthState": "Falha ao excluir estado de autenticação:",
|
|
394
401
|
"incoming.media.audioTranscribing": "[WhatsApp-Pi] Transcrevendo áudio de {pushName}...",
|
|
395
402
|
"incoming.media.audioTranscribed": "[Áudio transcrito]: {transcription}",
|
|
403
|
+
"audio.phase.download": "baixar",
|
|
404
|
+
"audio.phase.write": "salvar arquivo",
|
|
405
|
+
"audio.phase.convert": "converter",
|
|
406
|
+
"audio.phase.whisper": "transcrever",
|
|
407
|
+
"audio.phase.read": "ler transcrição",
|
|
408
|
+
"audio.phase.total": "total",
|
|
409
|
+
"audio.phaseTiming": "[WhatsApp-Pi] Áudio {phase}: {duration} ms",
|
|
396
410
|
"incoming.media.imageDownloading": "[WhatsApp-Pi] Baixando imagem de {pushName}...",
|
|
397
411
|
"incoming.media.imageDownloaded": "[WhatsApp-Pi] Imagem baixada. MIME: {imageMimeType} (original: {rawMime}), Tamanho: {size} bytes",
|
|
398
412
|
"incoming.media.imageDownloadFailed": "[WhatsApp-Pi] Falha ao baixar imagem:",
|
|
@@ -1,90 +1,133 @@
|
|
|
1
|
-
import { downloadContentFromMessage } from 'baileys';
|
|
2
|
-
import {
|
|
3
|
-
import { promisify } from 'node:util';
|
|
4
|
-
import { writeFile, mkdir } from 'node:fs/promises';
|
|
5
|
-
import { join } from 'node:path';
|
|
6
|
-
import { existsSync } from 'node:fs';
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
import {
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
1
|
+
import { downloadContentFromMessage } from 'baileys';
|
|
2
|
+
import { execFile } from 'node:child_process';
|
|
3
|
+
import { promisify } from 'node:util';
|
|
4
|
+
import { writeFile, mkdir } from 'node:fs/promises';
|
|
5
|
+
import { join } from 'node:path';
|
|
6
|
+
import { existsSync } from 'node:fs';
|
|
7
|
+
import { createStoragePaths } from './storage-path.js';
|
|
8
|
+
import { WhatsAppPiLogger } from './whatsapp-pi.logger.js';
|
|
9
|
+
import { tryCreateWhisperCppAudioTranscriber, type AudioTranscriber } from './whisper-cpp-audio.transcriber.js';
|
|
10
|
+
import { t } from '../i18n.js';
|
|
11
|
+
|
|
12
|
+
const execFileAsync = promisify(execFile);
|
|
13
|
+
|
|
14
|
+
type AudioLogger = Pick<WhatsAppPiLogger, 'log' | 'error'>;
|
|
15
|
+
type AudioPhase = 'download' | 'write' | 'convert' | 'whisper' | 'total';
|
|
16
|
+
|
|
17
|
+
export class AudioService {
|
|
18
|
+
private readonly mediaDir = createStoragePaths().mediaDir;
|
|
19
|
+
private readonly logger: AudioLogger;
|
|
20
|
+
private readonly whisperCppTranscriber: AudioTranscriber | null;
|
|
21
|
+
private readonly ffmpegCommands = process.platform === 'win32' ? ['ffmpeg', 'ffmpeg.exe'] : ['ffmpeg'];
|
|
22
|
+
|
|
23
|
+
constructor(logger: AudioLogger = new WhatsAppPiLogger(false), whisperCppTranscriber?: AudioTranscriber | null) {
|
|
24
|
+
this.logger = logger;
|
|
25
|
+
this.whisperCppTranscriber = whisperCppTranscriber === undefined
|
|
26
|
+
? tryCreateWhisperCppAudioTranscriber(logger)
|
|
27
|
+
: whisperCppTranscriber;
|
|
28
|
+
|
|
29
|
+
if (!existsSync(this.mediaDir)) {
|
|
30
|
+
mkdir(this.mediaDir, { recursive: true }).catch(() => {});
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
async transcribe(audioMessage: any): Promise<string> {
|
|
35
|
+
const totalStart = Date.now();
|
|
36
|
+
|
|
37
|
+
try {
|
|
38
|
+
const filename = `audio_${Date.now()}`;
|
|
39
|
+
const inputPath = join(this.mediaDir, `${filename}.ogg`);
|
|
40
|
+
const wavPath = join(this.mediaDir, `${filename}.wav`);
|
|
41
|
+
|
|
42
|
+
const buffer = await this.measurePhase('download', async () => {
|
|
43
|
+
const stream = await downloadContentFromMessage(audioMessage, 'audio');
|
|
44
|
+
let output = Buffer.from([]);
|
|
45
|
+
|
|
46
|
+
for await (const chunk of stream) {
|
|
47
|
+
output = Buffer.concat([output, chunk]);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return output;
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
await this.measurePhase('write', async () => {
|
|
54
|
+
await writeFile(inputPath, buffer);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
await this.measurePhase('convert', async () => {
|
|
58
|
+
await this.convertToWav(inputPath, wavPath);
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
const whisperCppTranscriber = this.whisperCppTranscriber;
|
|
62
|
+
if (!whisperCppTranscriber) {
|
|
63
|
+
throw new Error('whisper-cpp-node unavailable');
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return await this.measurePhase('whisper', async () => {
|
|
67
|
+
const transcription = await whisperCppTranscriber.transcribe(wavPath);
|
|
68
|
+
const text = String(transcription ?? '').trim();
|
|
69
|
+
return text || t('audio.emptyTranscription');
|
|
70
|
+
});
|
|
71
|
+
} catch (error) {
|
|
72
|
+
console.error(t('audio.transcriptionError'), error);
|
|
73
|
+
return t('audio.transcriptionErrorResult', { error: error instanceof Error ? error.message : String(error) });
|
|
74
|
+
} finally {
|
|
75
|
+
this.logger.log(t('audio.phaseTiming', { phase: t('audio.phase.total'), duration: Date.now() - totalStart }));
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
private async measurePhase<T>(phase: Exclude<AudioPhase, 'total'>, action: () => Promise<T>): Promise<T> {
|
|
80
|
+
const start = Date.now();
|
|
81
|
+
|
|
82
|
+
try {
|
|
83
|
+
return await action();
|
|
84
|
+
} finally {
|
|
85
|
+
this.logger.log(t('audio.phaseTiming', { phase: this.getPhaseLabel(phase), duration: Date.now() - start }));
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
private getPhaseLabel(phase: Exclude<AudioPhase, 'total'>): string {
|
|
90
|
+
switch (phase) {
|
|
91
|
+
case 'download':
|
|
92
|
+
return t('audio.phase.download');
|
|
93
|
+
case 'write':
|
|
94
|
+
return t('audio.phase.write');
|
|
95
|
+
case 'convert':
|
|
96
|
+
return t('audio.phase.convert');
|
|
97
|
+
case 'whisper':
|
|
98
|
+
return t('audio.phase.whisper');
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
private async convertToWav(inputPath: string, outputPath: string): Promise<void> {
|
|
103
|
+
const args = ['-y', '-i', inputPath, '-ar', '16000', '-ac', '1', '-c:a', 'pcm_s16le', outputPath];
|
|
104
|
+
let lastError: unknown;
|
|
105
|
+
|
|
106
|
+
for (const command of this.ffmpegCommands) {
|
|
107
|
+
try {
|
|
108
|
+
await execFileAsync(command, args, { windowsHide: true });
|
|
109
|
+
return;
|
|
110
|
+
} catch (error) {
|
|
111
|
+
lastError = error;
|
|
112
|
+
if (!this.isMissingFfmpegCommand(error)) {
|
|
113
|
+
throw error;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
throw lastError instanceof Error ? lastError : new Error('ffmpeg unavailable');
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
private isMissingFfmpegCommand(error: unknown): boolean {
|
|
122
|
+
if (!(error instanceof Error)) {
|
|
123
|
+
return false;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
const anyError = error as Error & { code?: number | string; stderr?: string };
|
|
127
|
+
const message = `${anyError.message}\n${anyError.stderr ?? ''}`;
|
|
128
|
+
|
|
129
|
+
return anyError.code === 127
|
|
130
|
+
|| anyError.code === 9009
|
|
131
|
+
|| /not found|not recognized/i.test(message);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
@@ -3,11 +3,11 @@ import { homedir } from 'os';
|
|
|
3
3
|
import { join } from 'path';
|
|
4
4
|
|
|
5
5
|
export function getDefaultStorageRoot(): string {
|
|
6
|
-
return join(homedir(), '.pi', 'agent', '
|
|
6
|
+
return join(homedir(), '.pi', 'agent', 'extensions', 'whatsapp-pi');
|
|
7
7
|
}
|
|
8
8
|
|
|
9
9
|
export function getDefaultLegacyStorageRoot(): string {
|
|
10
|
-
return join(homedir(), '.pi', 'whatsapp-pi');
|
|
10
|
+
return join(homedir(), '.pi', 'agent', 'extension', 'whatsapp-pi');
|
|
11
11
|
}
|
|
12
12
|
|
|
13
13
|
export interface StoragePaths {
|
|
@@ -71,20 +71,26 @@ async function copyEntry(source: string, target: string) {
|
|
|
71
71
|
}
|
|
72
72
|
|
|
73
73
|
export async function migrateLegacyStorage(paths: Pick<StoragePaths, 'root' | 'legacyRoot'>): Promise<boolean> {
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
}
|
|
74
|
+
const legacyRoots = [paths.legacyRoot, join(homedir(), '.pi', 'whatsapp-pi')]
|
|
75
|
+
.filter((root, index, roots) => root && roots.indexOf(root) === index && root !== paths.root);
|
|
77
76
|
|
|
78
|
-
|
|
79
|
-
return false;
|
|
80
|
-
}
|
|
77
|
+
let migrated = false;
|
|
81
78
|
|
|
82
79
|
await mkdir(paths.root, { recursive: true });
|
|
83
|
-
const entries = await readdir(paths.legacyRoot, { withFileTypes: true });
|
|
84
80
|
|
|
85
|
-
for (const
|
|
86
|
-
|
|
81
|
+
for (const legacyRoot of legacyRoots) {
|
|
82
|
+
if (!(await pathExists(legacyRoot))) {
|
|
83
|
+
continue;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const entries = await readdir(legacyRoot, { withFileTypes: true });
|
|
87
|
+
|
|
88
|
+
for (const entry of entries) {
|
|
89
|
+
await copyEntry(join(legacyRoot, entry.name), join(paths.root, entry.name));
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
migrated = true;
|
|
87
93
|
}
|
|
88
94
|
|
|
89
|
-
return
|
|
95
|
+
return migrated;
|
|
90
96
|
}
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
import { createRequire } from 'node:module';
|
|
2
|
+
import { dirname, join } from 'node:path';
|
|
3
|
+
import { createWriteStream } from 'node:fs';
|
|
4
|
+
import { mkdir, stat, unlink } from 'node:fs/promises';
|
|
5
|
+
import https from 'node:https';
|
|
6
|
+
import { createStoragePaths } from './storage-path.js';
|
|
7
|
+
import type { WhatsAppPiLogger } from './whatsapp-pi.logger.js';
|
|
8
|
+
|
|
9
|
+
type WhisperModule = {
|
|
10
|
+
createWhisperContext: (options: { model: string; use_gpu?: boolean; no_prints?: boolean }) => {
|
|
11
|
+
free?: () => void;
|
|
12
|
+
};
|
|
13
|
+
transcribeAsync: (context: { free?: () => void }, options: Record<string, unknown>) => Promise<{
|
|
14
|
+
segments?: Array<[string, string, string] | { text?: string }>;
|
|
15
|
+
}>;
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
type AudioLogger = Pick<WhatsAppPiLogger, 'log' | 'error'>;
|
|
19
|
+
|
|
20
|
+
type WhisperContext = ReturnType<WhisperModule['createWhisperContext']>;
|
|
21
|
+
type WhisperResult = Awaited<ReturnType<WhisperModule['transcribeAsync']>>;
|
|
22
|
+
|
|
23
|
+
const DEFAULT_MODEL_URL = 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin';
|
|
24
|
+
const MODEL_FILENAME = 'ggml-base.bin';
|
|
25
|
+
|
|
26
|
+
let whisperModule: WhisperModule | undefined;
|
|
27
|
+
let cachedContext: WhisperContext | undefined;
|
|
28
|
+
let cachedModelPath: string | undefined;
|
|
29
|
+
|
|
30
|
+
export interface AudioTranscriber {
|
|
31
|
+
transcribe(inputPath: string): Promise<string>;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function loadWhisperModule(): WhisperModule {
|
|
35
|
+
if (whisperModule) {
|
|
36
|
+
return whisperModule;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const require = createRequire(import.meta.url);
|
|
40
|
+
try {
|
|
41
|
+
whisperModule = require('whisper-cpp-node') as WhisperModule;
|
|
42
|
+
return whisperModule;
|
|
43
|
+
} catch {
|
|
44
|
+
throw new Error('whisper-cpp-node not installed. Run npm install.');
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function getModelPath(): string {
|
|
49
|
+
const { root } = createStoragePaths();
|
|
50
|
+
return join(root, 'whisper', 'models', MODEL_FILENAME);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
async function downloadFile(url: string, targetPath: string): Promise<void> {
|
|
54
|
+
await mkdir(dirname(targetPath), { recursive: true });
|
|
55
|
+
await unlink(targetPath).catch(() => undefined);
|
|
56
|
+
|
|
57
|
+
await new Promise<void>((resolve, reject) => {
|
|
58
|
+
const file = createWriteStream(targetPath);
|
|
59
|
+
const cleanup = () => void unlink(targetPath).catch(() => undefined);
|
|
60
|
+
const request = https.get(url, (response) => {
|
|
61
|
+
if (response.statusCode && response.statusCode >= 300 && response.statusCode < 400 && response.headers.location) {
|
|
62
|
+
const nextUrl = new URL(response.headers.location, url).toString();
|
|
63
|
+
response.resume();
|
|
64
|
+
file.close(cleanup);
|
|
65
|
+
downloadFile(nextUrl, targetPath).then(resolve).catch(reject);
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
if (response.statusCode !== 200) {
|
|
70
|
+
file.close(cleanup);
|
|
71
|
+
reject(new Error(`Model download failed: HTTP ${response.statusCode}`));
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
response.pipe(file);
|
|
76
|
+
file.once('finish', () => {
|
|
77
|
+
file.close((error) => {
|
|
78
|
+
if (error) {
|
|
79
|
+
cleanup();
|
|
80
|
+
reject(error);
|
|
81
|
+
return;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
resolve();
|
|
85
|
+
});
|
|
86
|
+
});
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
request.on('error', (error) => {
|
|
90
|
+
file.close(cleanup);
|
|
91
|
+
reject(error);
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
file.on('error', (error) => {
|
|
95
|
+
request.destroy(error);
|
|
96
|
+
cleanup();
|
|
97
|
+
reject(error);
|
|
98
|
+
});
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
async function ensureWhisperModel(logger: AudioLogger): Promise<string> {
|
|
103
|
+
const modelPath = getModelPath();
|
|
104
|
+
|
|
105
|
+
try {
|
|
106
|
+
const stats = await stat(modelPath);
|
|
107
|
+
if (stats.size > 0) {
|
|
108
|
+
return modelPath;
|
|
109
|
+
}
|
|
110
|
+
} catch {
|
|
111
|
+
// download below
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
logger.log(`[WhatsApp-Pi] Whisper.cpp model download: ${modelPath}`);
|
|
115
|
+
await downloadFile(DEFAULT_MODEL_URL, modelPath);
|
|
116
|
+
return modelPath;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
async function createContext(modelPath: string, logger: AudioLogger): Promise<WhisperContext> {
|
|
120
|
+
const { createWhisperContext } = loadWhisperModule();
|
|
121
|
+
logger.log('[WhatsApp-Pi] Whisper.cpp context init');
|
|
122
|
+
return createWhisperContext({
|
|
123
|
+
model: modelPath,
|
|
124
|
+
use_gpu: false,
|
|
125
|
+
no_prints: true
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
async function ensureContext(logger: AudioLogger): Promise<WhisperContext> {
|
|
130
|
+
const modelPath = await ensureWhisperModel(logger);
|
|
131
|
+
if (!cachedContext || cachedModelPath !== modelPath) {
|
|
132
|
+
cachedContext?.free?.();
|
|
133
|
+
cachedContext = await createContext(modelPath, logger);
|
|
134
|
+
cachedModelPath = modelPath;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
return cachedContext;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function extractText(result: WhisperResult): string {
|
|
141
|
+
const segments = (result?.segments ?? []) as Array<[string, string, string] | { text?: string }>;
|
|
142
|
+
return segments
|
|
143
|
+
.map((segment) => Array.isArray(segment) ? segment[2] : segment.text)
|
|
144
|
+
.map((value) => String(value ?? '').trim())
|
|
145
|
+
.filter(Boolean)
|
|
146
|
+
.join(' ')
|
|
147
|
+
.trim();
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
export function tryCreateWhisperCppAudioTranscriber(logger: AudioLogger): AudioTranscriber | null {
|
|
151
|
+
try {
|
|
152
|
+
loadWhisperModule();
|
|
153
|
+
} catch {
|
|
154
|
+
return null;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
return {
|
|
158
|
+
async transcribe(inputPath: string): Promise<string> {
|
|
159
|
+
const context = await ensureContext(logger);
|
|
160
|
+
const { transcribeAsync } = loadWhisperModule();
|
|
161
|
+
logger.log('[WhatsApp-Pi] Whisper.cpp transcribe');
|
|
162
|
+
const result = await transcribeAsync(context, {
|
|
163
|
+
fname_inp: inputPath,
|
|
164
|
+
language: 'pt',
|
|
165
|
+
no_timestamps: true,
|
|
166
|
+
no_context: false,
|
|
167
|
+
detect_language: false
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
return extractText(result);
|
|
171
|
+
}
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
export function freeWhisperCppContext() {
|
|
176
|
+
cachedContext?.free?.();
|
|
177
|
+
cachedContext = undefined;
|
|
178
|
+
cachedModelPath = undefined;
|
|
179
|
+
}
|
package/whatsapp-pi.ts
CHANGED
|
@@ -43,8 +43,8 @@ export default function (pi: ExtensionAPI) {
|
|
|
43
43
|
const sessionManager = new SessionManager();
|
|
44
44
|
const whatsappService = new WhatsAppService(sessionManager);
|
|
45
45
|
const recentsService = new RecentsService(sessionManager);
|
|
46
|
-
const audioService = new AudioService();
|
|
47
46
|
const logger = new WhatsAppPiLogger(false);
|
|
47
|
+
const audioService = new AudioService(logger);
|
|
48
48
|
const incomingMediaService = new IncomingMediaService(audioService, logger);
|
|
49
49
|
const menuHandler = new MenuHandler(whatsappService, sessionManager, recentsService);
|
|
50
50
|
let _ctx: ExtensionContext | undefined;
|
|
@@ -315,20 +315,21 @@ export default function (pi: ExtensionAPI) {
|
|
|
315
315
|
};
|
|
316
316
|
}
|
|
317
317
|
|
|
318
|
-
const
|
|
318
|
+
const message = params.message ?? '';
|
|
319
|
+
const formattedMessage = message
|
|
319
320
|
.split('\n')
|
|
320
|
-
.map((line) => ` ${line}`)
|
|
321
|
+
.map((line: string) => ` ${line}`)
|
|
321
322
|
.join('\n');
|
|
322
323
|
|
|
323
|
-
|
|
324
|
+
logger.log([
|
|
324
325
|
t("log.outgoing.title"),
|
|
325
|
-
t("log.outgoing.to", { jid:
|
|
326
|
+
t("log.outgoing.to", { jid: resolvedJid }),
|
|
326
327
|
t("log.outgoing.message"),
|
|
327
328
|
formattedMessage
|
|
328
329
|
].join('\n'));
|
|
329
330
|
|
|
330
331
|
const outboundJid = whatsappService.resolveOutboundRecipientJid(resolvedJid);
|
|
331
|
-
const result = await whatsappService.sendMessage(outboundJid,
|
|
332
|
+
const result = await whatsappService.sendMessage(outboundJid, message);
|
|
332
333
|
|
|
333
334
|
if (result.success) {
|
|
334
335
|
// Mark that tool already sent to this JID — prevents message_end from re-sending
|
|
@@ -336,20 +337,20 @@ export default function (pi: ExtensionAPI) {
|
|
|
336
337
|
await recentsService.recordMessage({
|
|
337
338
|
messageId: result.messageId!,
|
|
338
339
|
senderNumber: toRecentSenderNumber(outboundJid),
|
|
339
|
-
text:
|
|
340
|
+
text: message,
|
|
340
341
|
direction: 'outgoing',
|
|
341
342
|
timestamp: Date.now()
|
|
342
343
|
});
|
|
343
|
-
|
|
344
|
+
logger.log([
|
|
344
345
|
t("log.result.title"),
|
|
345
|
-
t("log.outgoing.to", { jid:
|
|
346
|
+
t("log.outgoing.to", { jid: resolvedJid }),
|
|
346
347
|
t("log.result.status.sent"),
|
|
347
348
|
t("log.result.messageId", { messageId: result.messageId ?? t("log.unknownMessageId") })
|
|
348
349
|
].join('\n'));
|
|
349
350
|
} else {
|
|
350
|
-
|
|
351
|
+
logger.log([
|
|
351
352
|
t("log.result.title"),
|
|
352
|
-
t("log.outgoing.to", { jid:
|
|
353
|
+
t("log.outgoing.to", { jid: resolvedJid }),
|
|
353
354
|
t("log.result.status.failed"),
|
|
354
355
|
t("log.result.error", { error: result.error ?? t("log.unknownError") })
|
|
355
356
|
].join('\n'));
|
|
@@ -388,9 +389,9 @@ export default function (pi: ExtensionAPI) {
|
|
|
388
389
|
// Create sender with the socket
|
|
389
390
|
const sender = new ReactionSender(socket as any);
|
|
390
391
|
const result = await sender.sendReaction({
|
|
391
|
-
jid: params.jid,
|
|
392
|
-
messageId: params.messageId,
|
|
393
|
-
emoji: params.emoji
|
|
392
|
+
jid: params.jid ?? '',
|
|
393
|
+
messageId: params.messageId ?? '',
|
|
394
|
+
emoji: params.emoji ?? ''
|
|
394
395
|
});
|
|
395
396
|
|
|
396
397
|
return {
|