agentgui 1.0.177 → 1.0.178

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/lib/speech.js +14 -7
  2. package/package.json +1 -1
package/lib/speech.js CHANGED
@@ -218,24 +218,29 @@ async function loadVoiceEmbedding(voiceId) {
218
218
  }
219
219
  }
220
220
 
221
+ let speakerFeatureExtractor = null;
222
+
221
223
  async function getSpeakerEmbeddingPipeline() {
222
224
  if (speakerEmbeddingPipeline) return speakerEmbeddingPipeline;
223
225
  if (speakerEmbeddingLoading) {
224
226
  while (speakerEmbeddingLoading) await new Promise(r => setTimeout(r, 100));
225
- if (!speakerEmbeddingPipeline) throw new Error('Speaker embedding pipeline failed to load');
227
+ if (!speakerEmbeddingPipeline) throw new Error('Speaker embedding model failed to load');
226
228
  return speakerEmbeddingPipeline;
227
229
  }
228
230
  speakerEmbeddingLoading = true;
229
231
  try {
230
- const { pipeline, env } = await loadTransformers();
232
+ const { AutoModelForXVector, AutoFeatureExtractor, env } = await loadTransformers();
231
233
  env.allowRemoteModels = true;
232
- speakerEmbeddingPipeline = await pipeline('feature-extraction', 'speechbrain/spkrec-xvectors-voxceleb', {
234
+ const modelId = 'Xenova/wavlm-base-plus-sv';
235
+ speakerEmbeddingPipeline = await AutoModelForXVector.from_pretrained(modelId, {
233
236
  device: 'cpu',
234
237
  dtype: 'fp32',
235
238
  });
239
+ speakerFeatureExtractor = await AutoFeatureExtractor.from_pretrained(modelId);
236
240
  return speakerEmbeddingPipeline;
237
241
  } catch (err) {
238
242
  speakerEmbeddingPipeline = null;
243
+ speakerFeatureExtractor = null;
239
244
  throw new Error('Speaker embedding model load failed: ' + err.message);
240
245
  } finally {
241
246
  speakerEmbeddingLoading = false;
@@ -284,11 +289,13 @@ async function generateEmbeddingFromCustomVoice(voiceId) {
284
289
  if (audio.length < SAMPLE_RATE_STT * 0.5) {
285
290
  throw new Error('Audio too short for embedding extraction');
286
291
  }
287
- const pipe = await getSpeakerEmbeddingPipeline();
288
- const output = await pipe(audio, { pooling: 'mean', normalize: true });
292
+ const model = await getSpeakerEmbeddingPipeline();
293
+ const inputs = await speakerFeatureExtractor(audio, { sampling_rate: SAMPLE_RATE_STT });
294
+ const output = await model(inputs);
295
+ const embData = output.embeddings.data;
289
296
  const embedding = new Float32Array(512);
290
- for (let i = 0; i < Math.min(512, output.data.length); i++) {
291
- embedding[i] = output.data[i];
297
+ for (let i = 0; i < Math.min(512, embData.length); i++) {
298
+ embedding[i] = embData[i];
292
299
  }
293
300
  if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
294
301
  const binPath = path.join(DATA_DIR, `speaker_${voiceId}.bin`);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentgui",
3
- "version": "1.0.177",
3
+ "version": "1.0.178",
4
4
  "description": "Multi-agent ACP client with real-time communication",
5
5
  "type": "module",
6
6
  "main": "server.js",