agentgui 1.0.176 → 1.0.177

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/speech.js CHANGED
@@ -8,7 +8,7 @@ const require = createRequire(import.meta.url);
8
8
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
9
9
  const ROOT = path.dirname(__dirname);
10
10
  const DATA_DIR = path.join(ROOT, 'data');
11
- const AUDIO_EXTENSIONS = ['.mp3', '.wav', '.ogg', '.flac', '.m4a'];
11
+ const AUDIO_EXTENSIONS = ['.wav', '.mp3', '.ogg', '.flac', '.m4a'];
12
12
 
13
13
  function getVoiceDirs() {
14
14
  const dirs = [];
@@ -106,6 +106,7 @@ const SPEAKER_EMBEDDINGS_URL = 'https://huggingface.co/datasets/Xenova/speaker_e
106
106
  const SPEAKER_EMBEDDINGS_PATH = path.join(DATA_DIR, 'speaker_embeddings.bin');
107
107
  const DATASET_API = 'https://datasets-server.huggingface.co/rows?dataset=Xenova%2Fspeaker_embeddings&config=default&split=train';
108
108
  const SAMPLES_TO_AVERAGE = 30;
109
+ const DEFAULT_EMBEDDING_B64 = 'xhibvao34LylqXQ8cNg7Pd1cCTw0keG8awRRvRqje7070G48AtOgPMFbnr1oeKC9I4ZuPZzqGT1DjWs8y3iMPB/SZLzdl7E6b9QaPKSpHTwYuh49FrMlO9YnebwmTzu9/3CPvQuvCbxsSWC9Sb2bO+tvXj0Cjpo8mTMxu/FDrjzQ4x09gyxCvUn6STxjAo+9vtXdPJtsYT3iMna9dQ+EvfQ72zuvxk69GAonPU8KdjsNPAU96e/8veN7lrwgyzk8HA5vvYE1Rz3gpZ484MsLPUKkxTzM54U81ECwvcbFHzv8gT08T6/7POCqBT2fv5E8fvsXPfZiJrzEhme8dg8kPR+mKTutQOU822maPMlMDb1x/IS93+6KvdyThzwhry880JBqvRVOhjzZods8SD08PLpObTn/0wk9BnAwvWiiz72EWgS9RpcjvV4VR73ZqJW9PoUFvfZYYb1h26S98levPHZbTjxH6qU9RPfoPHmJu70mSNo8ztJmvWgMBj0IX8i7TE3lPINY2DzoEma9wMObvTwKCT3pObe8t9KEvaWixjzc5fI8hj6MvaKv4Txl4h09d2a+PHCvTDxorJ69ekRrPeoPjz1JPfI7rUH7PIaJgz0O1YW9JLumvCxDnr1bmMm8GbIFPBX1oL3bRN08oYcXPEaFfL13Vxo9EKfbvTFcOTxdogA9XS3kPEWJoLvChc887BEgPMOvUT2Ba3s8tUDBvYPMZ72dNRG80AuTvQt7d72foTU9qO20O4INEb1u1iE9ibqJvZYaOj2nbYc8lsodvS5HPD1lCqK9EkBYPR0I/rySMIK9plcpPdpJEz2E/DY88d2DPIRTf71ZQZS9b1v5PPseFT2YiJu8OiOwPC8Wnr2QW4Q8n+o7PPQ8PD0QqAg9Vk7APDT6+jzreP88KH6GvTvAKD0AYiO9qOavvORySjvQ6y+9epb5PFvZijxYzlK9BwjUPK0HXL3acWc7dmwmPc/kXb2VBg68MGYRPR5q9zzmFiS9al2IvdVTfDwJOa88SzVkvVlrPD0WvJQ8Vm76PMUAQDzNgyK8QQZVPdMoibxrCBc9BgKTPDLoV70Iu6g7k+kBPZ3lhTy6sOU8OGkVvFaLRD14oqa9a4UVO4z4Gr1eYlO9u5BgPWS1ZL3kFPE8JGEwPQFTl71tHso8g+ElPd9Rgr2XCtc8axudvWC2IL09wSg9E7ZzPT6uBz2XmK09A1HcPJK8rTxK8Zu8GuMTPTuINTyRAhS9OSqDPDralLza3q48EgtePPf797rIWKo9NtkrvbO34zxKZ6m97l0GPQYVlL2igDA9UyfEPJhZyjx4/2Q8ggBpPYcAkzzIVu08ykYNPESdZr3uqmq8fS/zPKUYvzv67x49cUkqvXDlJj1us/88gASuvcs6G7sUshY9SgWiOqu4OD1WQ7k7/sLoPKuLJjwZYFm9an+zPOnfNry9Jh49/XX3vN1sc731fBM9TnBDPHzOAD26/dS9mg57vY+TA7wVJCw9pPb1PE30l7019la9UyRTPXFqljyRDnw9eZ6nvU03kTtS9907L+wavIBtab3k6cs8KVr6vPZ5zTxy+Zs8VuopPQTTUj0tNxg96qZyPY69lTzQEp48BXGJvVopBDvskUg9G2dOPaJMXDylJZU8FxcMvBQkNzzjPKs8FYUpvepYYj1AQsK9upQsvS4037xDcO48GhmIvWb1iT1gJhy9TG7iPHKAG70cuCQ8F1ZwPYqtj7300T89rTujPbXy2r3/cK69FtBNvY3iMT0DoqI4KK0QPYKEqr2Z6RU9ni0UPUNDLb3BsCi8+GttvZYp9zwUaHe9TqrFPOnlH7yCXJC9U8vDu8u2MjxA8xs9SAGxvPpphr29y2e9y2AYvTv+Eb1Elus9DdpGPSfmNL39Ggu85RVXPZbLh70Jvna7XkLGvR230DtGjpu7Ih8HPJKnIz1o35i8x5NVvXwFNDzs/ZM8+kw8PfFJSTwdlJA9ZJ+tvaoVZ7zTvVi8p6wluwh/IT0Kmg088o1rPRhiwjxpWIe9a+LuvYuYtjwAxE09WkPJPBuFh73UotY820JjvXpnQD3fJ/w8TM3JPOz0pTnbTim9tpe6PBHzJT1HEb66SkAKPasLgr1l/Mm8IOGgvM2pZbzwd4a9znOIO4d4Bb1DW5I8EZXzOxvBKDqKpHG9UwCHvd/Epb2cDRi9V1ztPNPBNTrLXHa8FdGHPPo+hb3DnJ08G+SvvVPQBL6zzrC8Omksvc+eIjyvGfU8eG9nvaVkdL1HBvs8eaeGPfcbVD1/Pfw8+TUFvU6aTL2JN5W8HXDNvGKFEj1i+T09UiCIOySbDD2x2/y7VTmnvTe3gb0ZhJw8WrKIuU5RGT09mKU7eFGtPFpr6DzaoyI9hsItPKU+YzuQlXK8f9IePSmUxTwXdoo9W6FJPV2kLzwkU1o8fGnfPInxg70rEVe9H7sNPWJDbbxSqLY8cQAOPUdpAD2YknK9ykFXPeVALz1mq3W96kO/PLERzjyXIRC7jxsXPRnLzjyUEoU7gTKvu+stlb1D1g45IH+2u5sOIj0wXPA8yTqDvT6mV72NsFq8ExeuPJlGyDxvjgk9lJeJvWSF8DwFvaW7oZ9GvHq1Rr1FJsk83zxVvfyGqTz7thG9fslpPF5RPb1Q6BQ9iXGovTeDeb2cmic8oBsRPYeni72TPcI8EKcPvfCJUbyQJqW9fCAYPRk8qT2q6rk8mEw2PfDeXL0=';
109
110
 
110
111
  let transformersModule = null;
111
112
  let sttPipeline = null;
@@ -143,16 +144,36 @@ function whisperModelPath() {
143
144
  return 'onnx-community/whisper-base';
144
145
  }
145
146
 
147
+ function defaultEmbedding() {
148
+ const buf = Buffer.from(DEFAULT_EMBEDDING_B64, 'base64');
149
+ return new Float32Array(new Uint8Array(buf).buffer);
150
+ }
151
+
146
152
  async function ensureSpeakerEmbeddings() {
147
153
  if (speakerEmbeddings) return speakerEmbeddings;
148
154
  if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
149
- if (!fs.existsSync(SPEAKER_EMBEDDINGS_PATH)) {
150
- const resp = await fetch(SPEAKER_EMBEDDINGS_URL);
151
- if (!resp.ok) throw new Error('Failed to download speaker embeddings');
152
- fs.writeFileSync(SPEAKER_EMBEDDINGS_PATH, Buffer.from(await resp.arrayBuffer()));
155
+ if (fs.existsSync(SPEAKER_EMBEDDINGS_PATH)) {
156
+ const buf = fs.readFileSync(SPEAKER_EMBEDDINGS_PATH);
157
+ if (buf.length === 2048) {
158
+ speakerEmbeddings = new Float32Array(new Uint8Array(buf).buffer);
159
+ return speakerEmbeddings;
160
+ }
153
161
  }
154
- const buf = fs.readFileSync(SPEAKER_EMBEDDINGS_PATH);
155
- speakerEmbeddings = new Float32Array(new Uint8Array(buf).buffer);
162
+ try {
163
+ const resp = await fetch(SPEAKER_EMBEDDINGS_URL);
164
+ if (resp.ok) {
165
+ const data = Buffer.from(await resp.arrayBuffer());
166
+ if (data.length >= 2048) {
167
+ fs.writeFileSync(SPEAKER_EMBEDDINGS_PATH, data);
168
+ speakerEmbeddings = new Float32Array(new Uint8Array(data).buffer);
169
+ return speakerEmbeddings;
170
+ }
171
+ }
172
+ } catch (_) {}
173
+ console.log('[TTS] Using bundled default speaker embedding');
174
+ speakerEmbeddings = defaultEmbedding();
175
+ const buf = Buffer.from(speakerEmbeddings.buffer);
176
+ fs.writeFileSync(SPEAKER_EMBEDDINGS_PATH, buf);
156
177
  return speakerEmbeddings;
157
178
  }
158
179
 
@@ -171,25 +192,30 @@ async function loadVoiceEmbedding(voiceId) {
171
192
  }
172
193
  const offset = SPEAKER_OFFSETS[voiceId];
173
194
  if (offset === undefined) return ensureSpeakerEmbeddings();
174
- const url = `${DATASET_API}&offset=${offset}&length=${SAMPLES_TO_AVERAGE}`;
175
- const resp = await fetch(url);
176
- if (!resp.ok) throw new Error('Failed to fetch voice embeddings for ' + voiceId);
177
- const data = await resp.json();
178
- const avg = new Float32Array(512);
179
- let count = 0;
180
- for (const item of data.rows) {
181
- const match = item.row.filename.match(/cmu_us_(\w+)_arctic/);
182
- if (match && match[1] === voiceId) {
183
- for (let i = 0; i < 512; i++) avg[i] += item.row.xvector[i];
184
- count++;
195
+ try {
196
+ const url = `${DATASET_API}&offset=${offset}&length=${SAMPLES_TO_AVERAGE}`;
197
+ const resp = await fetch(url);
198
+ if (!resp.ok) throw new Error('HTTP ' + resp.status);
199
+ const data = await resp.json();
200
+ const avg = new Float32Array(512);
201
+ let count = 0;
202
+ for (const item of data.rows) {
203
+ const match = item.row.filename.match(/cmu_us_(\w+)_arctic/);
204
+ if (match && match[1] === voiceId) {
205
+ for (let i = 0; i < 512; i++) avg[i] += item.row.xvector[i];
206
+ count++;
207
+ }
185
208
  }
209
+ if (count === 0) return ensureSpeakerEmbeddings();
210
+ for (let i = 0; i < 512; i++) avg[i] /= count;
211
+ if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
212
+ fs.writeFileSync(binPath, Buffer.from(avg.buffer));
213
+ voiceEmbeddingsCache.set(voiceId, avg);
214
+ return avg;
215
+ } catch (err) {
216
+ console.error('[TTS] Failed to fetch voice embedding for ' + voiceId + ':', err.message);
217
+ return ensureSpeakerEmbeddings();
186
218
  }
187
- if (count === 0) return ensureSpeakerEmbeddings();
188
- for (let i = 0; i < 512; i++) avg[i] /= count;
189
- if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
190
- fs.writeFileSync(binPath, Buffer.from(avg.buffer));
191
- voiceEmbeddingsCache.set(voiceId, avg);
192
- return avg;
193
219
  }
194
220
 
195
221
  async function getSpeakerEmbeddingPipeline() {
@@ -234,6 +260,12 @@ async function decodeAudioFile(filePath) {
234
260
  const decoded = decodeWavToFloat32(buf);
235
261
  return resampleTo16k(decoded.audio, decoded.sampleRate);
236
262
  }
263
+ const wavPath = filePath.replace(/\.[^.]+$/, '.wav');
264
+ if (fs.existsSync(wavPath)) {
265
+ const wavBuf = fs.readFileSync(wavPath);
266
+ const decoded = decodeWavToFloat32(wavBuf);
267
+ return resampleTo16k(decoded.audio, decoded.sampleRate);
268
+ }
237
269
  const decode = (await import('audio-decode')).default;
238
270
  const audioBuffer = await decode(buf);
239
271
  const mono = audioBuffer.getChannelData(0);
@@ -246,23 +278,28 @@ async function generateEmbeddingFromCustomVoice(voiceId) {
246
278
  console.error('[VOICES] Custom voice file not found for:', voiceId);
247
279
  return ensureSpeakerEmbeddings();
248
280
  }
249
- console.log('[VOICES] Generating embedding from:', audioFile);
250
- const audio = await decodeAudioFile(audioFile);
251
- if (audio.length < SAMPLE_RATE_STT * 0.5) {
252
- throw new Error('Audio too short for embedding extraction (need at least 0.5 seconds)');
253
- }
254
- const pipe = await getSpeakerEmbeddingPipeline();
255
- const output = await pipe(audio, { pooling: 'mean', normalize: true });
256
- const embedding = new Float32Array(512);
257
- for (let i = 0; i < Math.min(512, output.data.length); i++) {
258
- embedding[i] = output.data[i];
281
+ try {
282
+ console.log('[VOICES] Generating embedding from:', audioFile);
283
+ const audio = await decodeAudioFile(audioFile);
284
+ if (audio.length < SAMPLE_RATE_STT * 0.5) {
285
+ throw new Error('Audio too short for embedding extraction');
286
+ }
287
+ const pipe = await getSpeakerEmbeddingPipeline();
288
+ const output = await pipe(audio, { pooling: 'mean', normalize: true });
289
+ const embedding = new Float32Array(512);
290
+ for (let i = 0; i < Math.min(512, output.data.length); i++) {
291
+ embedding[i] = output.data[i];
292
+ }
293
+ if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
294
+ const binPath = path.join(DATA_DIR, `speaker_${voiceId}.bin`);
295
+ fs.writeFileSync(binPath, Buffer.from(embedding.buffer));
296
+ voiceEmbeddingsCache.set(voiceId, embedding);
297
+ console.log('[VOICES] Generated embedding for custom voice:', voiceId);
298
+ return embedding;
299
+ } catch (err) {
300
+ console.error('[VOICES] Failed to generate embedding for', voiceId + ':', err.message);
301
+ return ensureSpeakerEmbeddings();
259
302
  }
260
- if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
261
- const binPath = path.join(DATA_DIR, `speaker_${voiceId}.bin`);
262
- fs.writeFileSync(binPath, Buffer.from(embedding.buffer));
263
- voiceEmbeddingsCache.set(voiceId, embedding);
264
- console.log('[VOICES] Generated embedding for custom voice:', voiceId);
265
- return embedding;
266
303
  }
267
304
 
268
305
  async function getSTT() {
@@ -509,13 +546,14 @@ async function* synthesizeStream(text, voiceId) {
509
546
  }
510
547
 
511
548
  function getStatus() {
549
+ const ttsRetryExpired = ttsLoadError && (Date.now() - ttsLoadErrorTime >= TTS_ERROR_RETRY_MS);
512
550
  return {
513
551
  sttReady: !!sttPipeline,
514
552
  ttsReady: !!ttsPipeline,
515
553
  sttLoading,
516
554
  ttsLoading,
517
555
  sttError: sttLoadError ? sttLoadError.message : null,
518
- ttsError: ttsLoadError ? ttsLoadError.message : null,
556
+ ttsError: (ttsLoadError && !ttsRetryExpired) ? ttsLoadError.message : null,
519
557
  };
520
558
  }
521
559
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentgui",
3
- "version": "1.0.176",
3
+ "version": "1.0.177",
4
4
  "description": "Multi-agent ACP client with real-time communication",
5
5
  "type": "module",
6
6
  "main": "server.js",
@@ -320,6 +320,7 @@
320
320
  var ttsConsecutiveFailures = 0;
321
321
  var TTS_MAX_FAILURES = 3;
322
322
  var ttsDisabledUntilReset = false;
323
+ var streamingSupported = true;
323
324
 
324
325
  function playNextChunk() {
325
326
  if (audioChunkQueue.length === 0) {
@@ -391,12 +392,16 @@
391
392
  }
392
393
 
393
394
  function tryStreaming() {
395
+ if (!streamingSupported) { tryNonStreaming(text); return; }
394
396
  fetch(BASE + '/api/tts-stream', {
395
397
  method: 'POST',
396
398
  headers: { 'Content-Type': 'application/json' },
397
399
  body: JSON.stringify({ text: text, voiceId: selectedVoiceId })
398
400
  }).then(function(resp) {
399
- if (!resp.ok) throw new Error('TTS stream failed: ' + resp.status);
401
+ if (!resp.ok) {
402
+ streamingSupported = false;
403
+ throw new Error('TTS stream failed: ' + resp.status);
404
+ }
400
405
  var reader = resp.body.getReader();
401
406
  var buffer = new Uint8Array(0);
402
407