agentgui 1.0.195 → 1.0.196
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/speech.js +72 -1
- package/package.json +1 -1
package/lib/speech.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { createRequire } from 'module';
|
|
2
2
|
import fs from 'fs';
|
|
3
3
|
import path from 'path';
|
|
4
|
+
import http from 'http';
|
|
4
5
|
import { fileURLToPath } from 'url';
|
|
5
6
|
|
|
6
7
|
const require = createRequire(import.meta.url);
|
|
@@ -12,6 +13,63 @@ const serverTTS = require('webtalk/server-tts');
|
|
|
12
13
|
|
|
13
14
|
const EXTRA_VOICE_DIRS = [path.join(ROOT, 'voices')];
|
|
14
15
|
|
|
16
|
+
const POCKET_TTS_VOICES = [
|
|
17
|
+
{ id: 'default', name: 'Default', gender: 'female', accent: 'French' },
|
|
18
|
+
{ id: 'alba', name: 'Alba', gender: 'female', accent: 'French' },
|
|
19
|
+
{ id: 'marius', name: 'Marius', gender: 'male', accent: 'French' },
|
|
20
|
+
{ id: 'javert', name: 'Javert', gender: 'male', accent: 'French' },
|
|
21
|
+
{ id: 'jean', name: 'Jean', gender: 'male', accent: 'French' },
|
|
22
|
+
{ id: 'fantine', name: 'Fantine', gender: 'female', accent: 'French' },
|
|
23
|
+
{ id: 'cosette', name: 'Cosette', gender: 'female', accent: 'French' },
|
|
24
|
+
{ id: 'eponine', name: 'Eponine', gender: 'female', accent: 'French' },
|
|
25
|
+
{ id: 'azelma', name: 'Azelma', gender: 'female', accent: 'French' },
|
|
26
|
+
];
|
|
27
|
+
|
|
28
|
+
const PREDEFINED_IDS = new Set(POCKET_TTS_VOICES.filter(v => v.id !== 'default').map(v => v.id));
|
|
29
|
+
const POCKET_PORT = 8787;
|
|
30
|
+
|
|
31
|
+
const needsPatch = !serverTTS.getVoices(EXTRA_VOICE_DIRS).some(v => v.id === 'alba' && !v.isCustom);
|
|
32
|
+
|
|
33
|
+
function synthesizeDirect(text, voiceId) {
|
|
34
|
+
const voicePath = serverTTS.findVoiceFile(voiceId, EXTRA_VOICE_DIRS);
|
|
35
|
+
const isPredefined = voiceId && PREDEFINED_IDS.has(voiceId);
|
|
36
|
+
const boundary = '----PocketTTS' + Date.now();
|
|
37
|
+
const parts = [];
|
|
38
|
+
parts.push(`--${boundary}\r\nContent-Disposition: form-data; name="text"\r\n\r\n${text}\r\n`);
|
|
39
|
+
if (voicePath) {
|
|
40
|
+
const data = fs.readFileSync(voicePath);
|
|
41
|
+
const name = path.basename(voicePath);
|
|
42
|
+
parts.push(`--${boundary}\r\nContent-Disposition: form-data; name="voice_wav"; filename="${name}"\r\nContent-Type: audio/wav\r\n\r\n`);
|
|
43
|
+
parts.push(data);
|
|
44
|
+
parts.push('\r\n');
|
|
45
|
+
} else if (isPredefined) {
|
|
46
|
+
parts.push(`--${boundary}\r\nContent-Disposition: form-data; name="voice_url"\r\n\r\n${voiceId}\r\n`);
|
|
47
|
+
}
|
|
48
|
+
parts.push(`--${boundary}--\r\n`);
|
|
49
|
+
const body = Buffer.concat(parts.map(p => Buffer.isBuffer(p) ? p : Buffer.from(p)));
|
|
50
|
+
return new Promise((resolve, reject) => {
|
|
51
|
+
const req = http.request({
|
|
52
|
+
hostname: '127.0.0.1', port: POCKET_PORT, path: '/tts', method: 'POST',
|
|
53
|
+
headers: { 'Content-Type': `multipart/form-data; boundary=${boundary}`, 'Content-Length': body.length },
|
|
54
|
+
timeout: 60000,
|
|
55
|
+
}, res => {
|
|
56
|
+
if (res.statusCode !== 200) {
|
|
57
|
+
let e = '';
|
|
58
|
+
res.on('data', d => e += d);
|
|
59
|
+
res.on('end', () => reject(new Error(`pocket-tts HTTP ${res.statusCode}: ${e}`)));
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
const chunks = [];
|
|
63
|
+
res.on('data', d => chunks.push(d));
|
|
64
|
+
res.on('end', () => resolve(Buffer.concat(chunks)));
|
|
65
|
+
});
|
|
66
|
+
req.on('error', reject);
|
|
67
|
+
req.on('timeout', () => { req.destroy(); reject(new Error('pocket-tts timeout')); });
|
|
68
|
+
req.write(body);
|
|
69
|
+
req.end();
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
|
|
15
73
|
function transcribe(audioBuffer) {
|
|
16
74
|
return serverSTT.transcribe(audioBuffer);
|
|
17
75
|
}
|
|
@@ -21,15 +79,28 @@ function getSTT() {
|
|
|
21
79
|
}
|
|
22
80
|
|
|
23
81
|
function synthesize(text, voiceId) {
|
|
82
|
+
if (needsPatch && voiceId && PREDEFINED_IDS.has(voiceId)) {
|
|
83
|
+
return synthesizeDirect(text, voiceId);
|
|
84
|
+
}
|
|
24
85
|
return serverTTS.synthesize(text, voiceId, EXTRA_VOICE_DIRS);
|
|
25
86
|
}
|
|
26
87
|
|
|
27
88
|
function synthesizeStream(text, voiceId) {
|
|
89
|
+
if (needsPatch && voiceId && PREDEFINED_IDS.has(voiceId)) {
|
|
90
|
+
return (async function* () {
|
|
91
|
+
const sentences = serverTTS.splitSentences(text);
|
|
92
|
+
for (const sentence of sentences) {
|
|
93
|
+
yield await synthesizeDirect(sentence, voiceId);
|
|
94
|
+
}
|
|
95
|
+
})();
|
|
96
|
+
}
|
|
28
97
|
return serverTTS.synthesizeStream(text, voiceId, EXTRA_VOICE_DIRS);
|
|
29
98
|
}
|
|
30
99
|
|
|
31
100
|
function getVoices() {
|
|
32
|
-
|
|
101
|
+
const upstream = serverTTS.getVoices(EXTRA_VOICE_DIRS);
|
|
102
|
+
const custom = upstream.filter(v => v.isCustom);
|
|
103
|
+
return [...POCKET_TTS_VOICES, ...custom];
|
|
33
104
|
}
|
|
34
105
|
|
|
35
106
|
function getStatus() {
|