agentgui 1.0.192 → 1.0.193
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/speech.js +7 -95
- package/package.json +1 -1
package/lib/speech.js
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import { createRequire } from 'module';
|
|
2
2
|
import fs from 'fs';
|
|
3
3
|
import path from 'path';
|
|
4
|
-
import http from 'http';
|
|
5
4
|
import { fileURLToPath } from 'url';
|
|
6
5
|
|
|
7
6
|
const require = createRequire(import.meta.url);
|
|
@@ -12,64 +11,6 @@ const serverSTT = require('webtalk/server-stt');
|
|
|
12
11
|
const serverTTS = require('webtalk/server-tts');
|
|
13
12
|
|
|
14
13
|
const EXTRA_VOICE_DIRS = [path.join(ROOT, 'voices')];
|
|
15
|
-
const TTS_PORT = 8787;
|
|
16
|
-
|
|
17
|
-
const TTS_CACHE_MAX = 10 * 1024 * 1024;
|
|
18
|
-
let cacheBytes = 0;
|
|
19
|
-
const cache = new Map();
|
|
20
|
-
const inflight = new Map();
|
|
21
|
-
|
|
22
|
-
function resolveVoice(voiceId) {
|
|
23
|
-
if (!voiceId || voiceId === 'default') return null;
|
|
24
|
-
return serverTTS.findVoiceFile(voiceId, EXTRA_VOICE_DIRS);
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
function cachePut(key, buf) {
|
|
28
|
-
if (cache.has(key)) { cacheBytes -= cache.get(key).length; cache.delete(key); }
|
|
29
|
-
while (cacheBytes + buf.length > TTS_CACHE_MAX && cache.size > 0) {
|
|
30
|
-
const oldest = cache.keys().next().value;
|
|
31
|
-
cacheBytes -= cache.get(oldest).length;
|
|
32
|
-
cache.delete(oldest);
|
|
33
|
-
}
|
|
34
|
-
cache.set(key, buf);
|
|
35
|
-
cacheBytes += buf.length;
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
function sendToPocket(text, voicePath) {
|
|
39
|
-
return new Promise((resolve, reject) => {
|
|
40
|
-
const boundary = '----PocketTTS' + Date.now();
|
|
41
|
-
const parts = [];
|
|
42
|
-
parts.push(`--${boundary}\r\nContent-Disposition: form-data; name="text"\r\n\r\n${text}\r\n`);
|
|
43
|
-
if (voicePath) {
|
|
44
|
-
const data = fs.readFileSync(voicePath);
|
|
45
|
-
const name = path.basename(voicePath);
|
|
46
|
-
parts.push(`--${boundary}\r\nContent-Disposition: form-data; name="voice_wav"; filename="${name}"\r\nContent-Type: audio/wav\r\n\r\n`);
|
|
47
|
-
parts.push(data);
|
|
48
|
-
parts.push('\r\n');
|
|
49
|
-
}
|
|
50
|
-
parts.push(`--${boundary}--\r\n`);
|
|
51
|
-
const body = Buffer.concat(parts.map(p => Buffer.isBuffer(p) ? p : Buffer.from(p)));
|
|
52
|
-
const req = http.request({
|
|
53
|
-
hostname: '127.0.0.1', port: TTS_PORT, path: '/tts', method: 'POST',
|
|
54
|
-
headers: { 'Content-Type': `multipart/form-data; boundary=${boundary}`, 'Content-Length': body.length },
|
|
55
|
-
timeout: 60000,
|
|
56
|
-
}, res => {
|
|
57
|
-
if (res.statusCode !== 200) {
|
|
58
|
-
let e = '';
|
|
59
|
-
res.on('data', d => e += d);
|
|
60
|
-
res.on('end', () => reject(new Error(`pocket-tts HTTP ${res.statusCode}: ${e}`)));
|
|
61
|
-
return;
|
|
62
|
-
}
|
|
63
|
-
const chunks = [];
|
|
64
|
-
res.on('data', d => chunks.push(d));
|
|
65
|
-
res.on('end', () => resolve(Buffer.concat(chunks)));
|
|
66
|
-
});
|
|
67
|
-
req.on('error', reject);
|
|
68
|
-
req.on('timeout', () => { req.destroy(); reject(new Error('pocket-tts timeout')); });
|
|
69
|
-
req.write(body);
|
|
70
|
-
req.end();
|
|
71
|
-
});
|
|
72
|
-
}
|
|
73
14
|
|
|
74
15
|
function transcribe(audioBuffer) {
|
|
75
16
|
return serverSTT.transcribe(audioBuffer);
|
|
@@ -79,37 +20,12 @@ function getSTT() {
|
|
|
79
20
|
return serverSTT.getSTT();
|
|
80
21
|
}
|
|
81
22
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
if (!status.ready) throw new Error('pocket-tts not healthy');
|
|
85
|
-
const key = (voiceId || 'default') + ':' + text;
|
|
86
|
-
const cached = cache.get(key);
|
|
87
|
-
if (cached) { cache.delete(key); cache.set(key, cached); return cached; }
|
|
88
|
-
const existing = inflight.get(key);
|
|
89
|
-
if (existing) return existing;
|
|
90
|
-
const promise = (async () => {
|
|
91
|
-
const voicePath = resolveVoice(voiceId);
|
|
92
|
-
const wav = await sendToPocket(text, voicePath);
|
|
93
|
-
if (!wav || wav.length <= 44) throw new Error('pocket-tts returned empty audio');
|
|
94
|
-
cachePut(key, wav);
|
|
95
|
-
return wav;
|
|
96
|
-
})();
|
|
97
|
-
inflight.set(key, promise);
|
|
98
|
-
try { return await promise; } finally { inflight.delete(key); }
|
|
23
|
+
function synthesize(text, voiceId) {
|
|
24
|
+
return serverTTS.synthesize(text, voiceId, EXTRA_VOICE_DIRS);
|
|
99
25
|
}
|
|
100
26
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
if (!status.ready) throw new Error('pocket-tts not healthy');
|
|
104
|
-
const sentences = splitSentences(text);
|
|
105
|
-
for (const sentence of sentences) {
|
|
106
|
-
const key = (voiceId || 'default') + ':' + sentence;
|
|
107
|
-
const cached = cache.get(key);
|
|
108
|
-
if (cached) { cache.delete(key); cache.set(key, cached); yield cached; continue; }
|
|
109
|
-
const voicePath = resolveVoice(voiceId);
|
|
110
|
-
const wav = await sendToPocket(sentence, voicePath);
|
|
111
|
-
if (wav && wav.length > 44) { cachePut(key, wav); yield wav; }
|
|
112
|
-
}
|
|
27
|
+
function synthesizeStream(text, voiceId) {
|
|
28
|
+
return serverTTS.synthesizeStream(text, voiceId, EXTRA_VOICE_DIRS);
|
|
113
29
|
}
|
|
114
30
|
|
|
115
31
|
function getVoices() {
|
|
@@ -142,19 +58,15 @@ function preloadTTS() {
|
|
|
142
58
|
}
|
|
143
59
|
|
|
144
60
|
function ttsCacheKey(text, voiceId) {
|
|
145
|
-
return (voiceId
|
|
61
|
+
return serverTTS.ttsCacheKey(text, voiceId);
|
|
146
62
|
}
|
|
147
63
|
|
|
148
64
|
function ttsCacheGet(key) {
|
|
149
|
-
|
|
150
|
-
if (cached) { cache.delete(key); cache.set(key, cached); }
|
|
151
|
-
return cached || null;
|
|
65
|
+
return serverTTS.ttsCacheGet(key);
|
|
152
66
|
}
|
|
153
67
|
|
|
154
68
|
function splitSentences(text) {
|
|
155
|
-
|
|
156
|
-
if (!raw) return [text];
|
|
157
|
-
return raw.map(s => s.trim()).filter(s => s.length > 0);
|
|
69
|
+
return serverTTS.splitSentences(text);
|
|
158
70
|
}
|
|
159
71
|
|
|
160
72
|
export { transcribe, synthesize, synthesizeStream, getSTT, getStatus, getVoices, preloadTTS, ttsCacheKey, ttsCacheGet, splitSentences };
|