npm - @ducci/jarvis - Versions diffs - 1.0.86 → 1.0.87 - Mend

@ducci/jarvis 1.0.86 → 1.0.87

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/package.json +1 -1
package/src/server/fish-audio.js +23 -3

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ducci/jarvis",
-  "version": "1.0.86",
+  "version": "1.0.87",
   "description": "A fully automated agent system that lives on a server.",
   "main": "./src/index.js",
   "type": "module",

package/src/server/fish-audio.js CHANGED Viewed

@@ -3,6 +3,12 @@
  */
 import { createClient } from './provider.js';
+import { exec } from 'child_process';
+import { promisify } from 'util';
+import { writeFile, readFile, unlink } from 'fs/promises';
+import { tmpdir } from 'os';
+import { join } from 'path';
+const execAsync = promisify(exec);
 // System prompt for TTS summary generation.
 // fish.audio s1 emotion tags: (emotion) at the START of a sentence only — applies to the whole sentence.
@@ -97,10 +103,24 @@ export async function textToSpeech(text, config) {
 export async function speechToText(audioBuffer, config) {
   const { fishAudioApiKey } = config;
+  // Telegram voice messages are OGG/Opus — fish.audio ASR doesn't support Opus.
+  // Convert to WAV first via ffmpeg.
+  const id = `jarvis-stt-${Date.now()}`;
+  const inPath = join(tmpdir(), `${id}.ogg`);
+  const outPath = join(tmpdir(), `${id}.wav`);
+  let wavBuffer;
+  try {
+    await writeFile(inPath, audioBuffer);
+    await execAsync(`ffmpeg -y -i "${inPath}" -ar 16000 -ac 1 "${outPath}"`);
+    wavBuffer = await readFile(outPath);
+  } finally {
+    unlink(inPath).catch(() => {});
+    unlink(outPath).catch(() => {});
+  }
   const formData = new FormData();
-  const blob = new Blob([audioBuffer], { type: 'audio/ogg' });
-  formData.append('audio', blob, 'voice.ogg');
-  formData.append('ignore_timestamps', 'true');
+  const blob = new Blob([wavBuffer], { type: 'audio/wav' });
+  formData.append('audio', blob, 'voice.wav');
   const response = await fetch('https://api.fish.audio/v1/asr', {
     method: 'POST',