agentgui 1.0.176 → 1.0.177
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/speech.js +79 -41
- package/package.json +1 -1
- package/static/js/voice.js +6 -1
package/lib/speech.js
CHANGED
|
@@ -8,7 +8,7 @@ const require = createRequire(import.meta.url);
|
|
|
8
8
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
9
9
|
const ROOT = path.dirname(__dirname);
|
|
10
10
|
const DATA_DIR = path.join(ROOT, 'data');
|
|
11
|
-
const AUDIO_EXTENSIONS = ['.
|
|
11
|
+
const AUDIO_EXTENSIONS = ['.wav', '.mp3', '.ogg', '.flac', '.m4a'];
|
|
12
12
|
|
|
13
13
|
function getVoiceDirs() {
|
|
14
14
|
const dirs = [];
|
|
@@ -106,6 +106,7 @@ const SPEAKER_EMBEDDINGS_URL = 'https://huggingface.co/datasets/Xenova/speaker_e
|
|
|
106
106
|
const SPEAKER_EMBEDDINGS_PATH = path.join(DATA_DIR, 'speaker_embeddings.bin');
|
|
107
107
|
const DATASET_API = 'https://datasets-server.huggingface.co/rows?dataset=Xenova%2Fspeaker_embeddings&config=default&split=train';
|
|
108
108
|
const SAMPLES_TO_AVERAGE = 30;
|
|
109
|
+
const DEFAULT_EMBEDDING_B64 = 'xhibvao34LylqXQ8cNg7Pd1cCTw0keG8awRRvRqje7070G48AtOgPMFbnr1oeKC9I4ZuPZzqGT1DjWs8y3iMPB/SZLzdl7E6b9QaPKSpHTwYuh49FrMlO9YnebwmTzu9/3CPvQuvCbxsSWC9Sb2bO+tvXj0Cjpo8mTMxu/FDrjzQ4x09gyxCvUn6STxjAo+9vtXdPJtsYT3iMna9dQ+EvfQ72zuvxk69GAonPU8KdjsNPAU96e/8veN7lrwgyzk8HA5vvYE1Rz3gpZ484MsLPUKkxTzM54U81ECwvcbFHzv8gT08T6/7POCqBT2fv5E8fvsXPfZiJrzEhme8dg8kPR+mKTutQOU822maPMlMDb1x/IS93+6KvdyThzwhry880JBqvRVOhjzZods8SD08PLpObTn/0wk9BnAwvWiiz72EWgS9RpcjvV4VR73ZqJW9PoUFvfZYYb1h26S98levPHZbTjxH6qU9RPfoPHmJu70mSNo8ztJmvWgMBj0IX8i7TE3lPINY2DzoEma9wMObvTwKCT3pObe8t9KEvaWixjzc5fI8hj6MvaKv4Txl4h09d2a+PHCvTDxorJ69ekRrPeoPjz1JPfI7rUH7PIaJgz0O1YW9JLumvCxDnr1bmMm8GbIFPBX1oL3bRN08oYcXPEaFfL13Vxo9EKfbvTFcOTxdogA9XS3kPEWJoLvChc887BEgPMOvUT2Ba3s8tUDBvYPMZ72dNRG80AuTvQt7d72foTU9qO20O4INEb1u1iE9ibqJvZYaOj2nbYc8lsodvS5HPD1lCqK9EkBYPR0I/rySMIK9plcpPdpJEz2E/DY88d2DPIRTf71ZQZS9b1v5PPseFT2YiJu8OiOwPC8Wnr2QW4Q8n+o7PPQ8PD0QqAg9Vk7APDT6+jzreP88KH6GvTvAKD0AYiO9qOavvORySjvQ6y+9epb5PFvZijxYzlK9BwjUPK0HXL3acWc7dmwmPc/kXb2VBg68MGYRPR5q9zzmFiS9al2IvdVTfDwJOa88SzVkvVlrPD0WvJQ8Vm76PMUAQDzNgyK8QQZVPdMoibxrCBc9BgKTPDLoV70Iu6g7k+kBPZ3lhTy6sOU8OGkVvFaLRD14oqa9a4UVO4z4Gr1eYlO9u5BgPWS1ZL3kFPE8JGEwPQFTl71tHso8g+ElPd9Rgr2XCtc8axudvWC2IL09wSg9E7ZzPT6uBz2XmK09A1HcPJK8rTxK8Zu8GuMTPTuINTyRAhS9OSqDPDralLza3q48EgtePPf797rIWKo9NtkrvbO34zxKZ6m97l0GPQYVlL2igDA9UyfEPJhZyjx4/2Q8ggBpPYcAkzzIVu08ykYNPESdZr3uqmq8fS/zPKUYvzv67x49cUkqvXDlJj1us/88gASuvcs6G7sUshY9SgWiOqu4OD1WQ7k7/sLoPKuLJjwZYFm9an+zPOnfNry9Jh49/XX3vN1sc731fBM9TnBDPHzOAD26/dS9mg57vY+TA7wVJCw9pPb1PE30l7019la9UyRTPXFqljyRDnw9eZ6nvU03kTtS9907L+wavIBtab3k6cs8KVr6vPZ5zTxy+Zs8VuopPQTTUj0tNxg96qZyPY69lTzQEp48BXGJvVopBDvskUg9G2dOPaJMXDylJZU8FxcMvBQkNzzjPKs8FYUpvepYYj1AQsK9upQsvS4037xDcO48GhmIvWb1iT1gJhy9TG7iPHKAG70cuCQ8F1ZwPYqtj7300T89rTujPbXy2r3/cK69FtBNvY3iMT0DoqI4KK0QPYKEqr2Z6RU9ni0UPUNDLb3BsCi8+GttvZYp9zwUaHe9TqrFPOnlH7yCXJC9U8vDu8u2MjxA8xs9SAGxvPpphr29y2e9y2AYvTv+Eb1Elus9DdpGPSfmNL39Ggu85RVXPZbLh70Jvna7XkLGvR230DtGjpu7Ih8HPJKnIz1o35i8x5NVvXwFNDzs/ZM8+kw8PfFJSTwdlJA9ZJ+tvaoVZ7zTvVi8p6wluwh/IT0Kmg088o1rPRhiwjxpWIe9a+LuvYuYtjwAxE09WkPJPBuFh73UotY820JjvXpnQD3fJ/w8TM3JPOz0pTnbTim9tpe6PBHzJT1HEb66SkAKPasLgr1l/Mm8IOGgvM2pZbzwd4a9znOIO4d4Bb1DW5I8EZXzOxvBKDqKpHG9UwCHvd/Epb2cDRi9V1ztPNPBNTrLXHa8FdGHPPo+hb3DnJ08G+SvvVPQBL6zzrC8Omksvc+eIjyvGfU8eG9nvaVkdL1HBvs8eaeGPfcbVD1/Pfw8+TUFvU6aTL2JN5W8HXDNvGKFEj1i+T09UiCIOySbDD2x2/y7VTmnvTe3gb0ZhJw8WrKIuU5RGT09mKU7eFGtPFpr6DzaoyI9hsItPKU+YzuQlXK8f9IePSmUxTwXdoo9W6FJPV2kLzwkU1o8fGnfPInxg70rEVe9H7sNPWJDbbxSqLY8cQAOPUdpAD2YknK9ykFXPeVALz1mq3W96kO/PLERzjyXIRC7jxsXPRnLzjyUEoU7gTKvu+stlb1D1g45IH+2u5sOIj0wXPA8yTqDvT6mV72NsFq8ExeuPJlGyDxvjgk9lJeJvWSF8DwFvaW7oZ9GvHq1Rr1FJsk83zxVvfyGqTz7thG9fslpPF5RPb1Q6BQ9iXGovTeDeb2cmic8oBsRPYeni72TPcI8EKcPvfCJUbyQJqW9fCAYPRk8qT2q6rk8mEw2PfDeXL0=';
|
|
109
110
|
|
|
110
111
|
let transformersModule = null;
|
|
111
112
|
let sttPipeline = null;
|
|
@@ -143,16 +144,36 @@ function whisperModelPath() {
|
|
|
143
144
|
return 'onnx-community/whisper-base';
|
|
144
145
|
}
|
|
145
146
|
|
|
147
|
+
function defaultEmbedding() {
|
|
148
|
+
const buf = Buffer.from(DEFAULT_EMBEDDING_B64, 'base64');
|
|
149
|
+
return new Float32Array(new Uint8Array(buf).buffer);
|
|
150
|
+
}
|
|
151
|
+
|
|
146
152
|
async function ensureSpeakerEmbeddings() {
|
|
147
153
|
if (speakerEmbeddings) return speakerEmbeddings;
|
|
148
154
|
if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
|
|
149
|
-
if (
|
|
150
|
-
const
|
|
151
|
-
if (
|
|
152
|
-
|
|
155
|
+
if (fs.existsSync(SPEAKER_EMBEDDINGS_PATH)) {
|
|
156
|
+
const buf = fs.readFileSync(SPEAKER_EMBEDDINGS_PATH);
|
|
157
|
+
if (buf.length === 2048) {
|
|
158
|
+
speakerEmbeddings = new Float32Array(new Uint8Array(buf).buffer);
|
|
159
|
+
return speakerEmbeddings;
|
|
160
|
+
}
|
|
153
161
|
}
|
|
154
|
-
|
|
155
|
-
|
|
162
|
+
try {
|
|
163
|
+
const resp = await fetch(SPEAKER_EMBEDDINGS_URL);
|
|
164
|
+
if (resp.ok) {
|
|
165
|
+
const data = Buffer.from(await resp.arrayBuffer());
|
|
166
|
+
if (data.length >= 2048) {
|
|
167
|
+
fs.writeFileSync(SPEAKER_EMBEDDINGS_PATH, data);
|
|
168
|
+
speakerEmbeddings = new Float32Array(new Uint8Array(data).buffer);
|
|
169
|
+
return speakerEmbeddings;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
} catch (_) {}
|
|
173
|
+
console.log('[TTS] Using bundled default speaker embedding');
|
|
174
|
+
speakerEmbeddings = defaultEmbedding();
|
|
175
|
+
const buf = Buffer.from(speakerEmbeddings.buffer);
|
|
176
|
+
fs.writeFileSync(SPEAKER_EMBEDDINGS_PATH, buf);
|
|
156
177
|
return speakerEmbeddings;
|
|
157
178
|
}
|
|
158
179
|
|
|
@@ -171,25 +192,30 @@ async function loadVoiceEmbedding(voiceId) {
|
|
|
171
192
|
}
|
|
172
193
|
const offset = SPEAKER_OFFSETS[voiceId];
|
|
173
194
|
if (offset === undefined) return ensureSpeakerEmbeddings();
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
const
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
195
|
+
try {
|
|
196
|
+
const url = `${DATASET_API}&offset=${offset}&length=${SAMPLES_TO_AVERAGE}`;
|
|
197
|
+
const resp = await fetch(url);
|
|
198
|
+
if (!resp.ok) throw new Error('HTTP ' + resp.status);
|
|
199
|
+
const data = await resp.json();
|
|
200
|
+
const avg = new Float32Array(512);
|
|
201
|
+
let count = 0;
|
|
202
|
+
for (const item of data.rows) {
|
|
203
|
+
const match = item.row.filename.match(/cmu_us_(\w+)_arctic/);
|
|
204
|
+
if (match && match[1] === voiceId) {
|
|
205
|
+
for (let i = 0; i < 512; i++) avg[i] += item.row.xvector[i];
|
|
206
|
+
count++;
|
|
207
|
+
}
|
|
185
208
|
}
|
|
209
|
+
if (count === 0) return ensureSpeakerEmbeddings();
|
|
210
|
+
for (let i = 0; i < 512; i++) avg[i] /= count;
|
|
211
|
+
if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
|
|
212
|
+
fs.writeFileSync(binPath, Buffer.from(avg.buffer));
|
|
213
|
+
voiceEmbeddingsCache.set(voiceId, avg);
|
|
214
|
+
return avg;
|
|
215
|
+
} catch (err) {
|
|
216
|
+
console.error('[TTS] Failed to fetch voice embedding for ' + voiceId + ':', err.message);
|
|
217
|
+
return ensureSpeakerEmbeddings();
|
|
186
218
|
}
|
|
187
|
-
if (count === 0) return ensureSpeakerEmbeddings();
|
|
188
|
-
for (let i = 0; i < 512; i++) avg[i] /= count;
|
|
189
|
-
if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
|
|
190
|
-
fs.writeFileSync(binPath, Buffer.from(avg.buffer));
|
|
191
|
-
voiceEmbeddingsCache.set(voiceId, avg);
|
|
192
|
-
return avg;
|
|
193
219
|
}
|
|
194
220
|
|
|
195
221
|
async function getSpeakerEmbeddingPipeline() {
|
|
@@ -234,6 +260,12 @@ async function decodeAudioFile(filePath) {
|
|
|
234
260
|
const decoded = decodeWavToFloat32(buf);
|
|
235
261
|
return resampleTo16k(decoded.audio, decoded.sampleRate);
|
|
236
262
|
}
|
|
263
|
+
const wavPath = filePath.replace(/\.[^.]+$/, '.wav');
|
|
264
|
+
if (fs.existsSync(wavPath)) {
|
|
265
|
+
const wavBuf = fs.readFileSync(wavPath);
|
|
266
|
+
const decoded = decodeWavToFloat32(wavBuf);
|
|
267
|
+
return resampleTo16k(decoded.audio, decoded.sampleRate);
|
|
268
|
+
}
|
|
237
269
|
const decode = (await import('audio-decode')).default;
|
|
238
270
|
const audioBuffer = await decode(buf);
|
|
239
271
|
const mono = audioBuffer.getChannelData(0);
|
|
@@ -246,23 +278,28 @@ async function generateEmbeddingFromCustomVoice(voiceId) {
|
|
|
246
278
|
console.error('[VOICES] Custom voice file not found for:', voiceId);
|
|
247
279
|
return ensureSpeakerEmbeddings();
|
|
248
280
|
}
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
281
|
+
try {
|
|
282
|
+
console.log('[VOICES] Generating embedding from:', audioFile);
|
|
283
|
+
const audio = await decodeAudioFile(audioFile);
|
|
284
|
+
if (audio.length < SAMPLE_RATE_STT * 0.5) {
|
|
285
|
+
throw new Error('Audio too short for embedding extraction');
|
|
286
|
+
}
|
|
287
|
+
const pipe = await getSpeakerEmbeddingPipeline();
|
|
288
|
+
const output = await pipe(audio, { pooling: 'mean', normalize: true });
|
|
289
|
+
const embedding = new Float32Array(512);
|
|
290
|
+
for (let i = 0; i < Math.min(512, output.data.length); i++) {
|
|
291
|
+
embedding[i] = output.data[i];
|
|
292
|
+
}
|
|
293
|
+
if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
|
|
294
|
+
const binPath = path.join(DATA_DIR, `speaker_${voiceId}.bin`);
|
|
295
|
+
fs.writeFileSync(binPath, Buffer.from(embedding.buffer));
|
|
296
|
+
voiceEmbeddingsCache.set(voiceId, embedding);
|
|
297
|
+
console.log('[VOICES] Generated embedding for custom voice:', voiceId);
|
|
298
|
+
return embedding;
|
|
299
|
+
} catch (err) {
|
|
300
|
+
console.error('[VOICES] Failed to generate embedding for', voiceId + ':', err.message);
|
|
301
|
+
return ensureSpeakerEmbeddings();
|
|
259
302
|
}
|
|
260
|
-
if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
|
|
261
|
-
const binPath = path.join(DATA_DIR, `speaker_${voiceId}.bin`);
|
|
262
|
-
fs.writeFileSync(binPath, Buffer.from(embedding.buffer));
|
|
263
|
-
voiceEmbeddingsCache.set(voiceId, embedding);
|
|
264
|
-
console.log('[VOICES] Generated embedding for custom voice:', voiceId);
|
|
265
|
-
return embedding;
|
|
266
303
|
}
|
|
267
304
|
|
|
268
305
|
async function getSTT() {
|
|
@@ -509,13 +546,14 @@ async function* synthesizeStream(text, voiceId) {
|
|
|
509
546
|
}
|
|
510
547
|
|
|
511
548
|
function getStatus() {
|
|
549
|
+
const ttsRetryExpired = ttsLoadError && (Date.now() - ttsLoadErrorTime >= TTS_ERROR_RETRY_MS);
|
|
512
550
|
return {
|
|
513
551
|
sttReady: !!sttPipeline,
|
|
514
552
|
ttsReady: !!ttsPipeline,
|
|
515
553
|
sttLoading,
|
|
516
554
|
ttsLoading,
|
|
517
555
|
sttError: sttLoadError ? sttLoadError.message : null,
|
|
518
|
-
ttsError: ttsLoadError ? ttsLoadError.message : null,
|
|
556
|
+
ttsError: (ttsLoadError && !ttsRetryExpired) ? ttsLoadError.message : null,
|
|
519
557
|
};
|
|
520
558
|
}
|
|
521
559
|
|
package/package.json
CHANGED
package/static/js/voice.js
CHANGED
|
@@ -320,6 +320,7 @@
|
|
|
320
320
|
var ttsConsecutiveFailures = 0;
|
|
321
321
|
var TTS_MAX_FAILURES = 3;
|
|
322
322
|
var ttsDisabledUntilReset = false;
|
|
323
|
+
var streamingSupported = true;
|
|
323
324
|
|
|
324
325
|
function playNextChunk() {
|
|
325
326
|
if (audioChunkQueue.length === 0) {
|
|
@@ -391,12 +392,16 @@
|
|
|
391
392
|
}
|
|
392
393
|
|
|
393
394
|
function tryStreaming() {
|
|
395
|
+
if (!streamingSupported) { tryNonStreaming(text); return; }
|
|
394
396
|
fetch(BASE + '/api/tts-stream', {
|
|
395
397
|
method: 'POST',
|
|
396
398
|
headers: { 'Content-Type': 'application/json' },
|
|
397
399
|
body: JSON.stringify({ text: text, voiceId: selectedVoiceId })
|
|
398
400
|
}).then(function(resp) {
|
|
399
|
-
if (!resp.ok)
|
|
401
|
+
if (!resp.ok) {
|
|
402
|
+
streamingSupported = false;
|
|
403
|
+
throw new Error('TTS stream failed: ' + resp.status);
|
|
404
|
+
}
|
|
400
405
|
var reader = resp.body.getReader();
|
|
401
406
|
var buffer = new Uint8Array(0);
|
|
402
407
|
|