@ducci/jarvis 1.0.85 → 1.0.87

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ducci/jarvis",
3
- "version": "1.0.85",
3
+ "version": "1.0.87",
4
4
  "description": "A fully automated agent system that lives on a server.",
5
5
  "main": "./src/index.js",
6
6
  "type": "module",
@@ -804,7 +804,7 @@ export async function startTelegramChannel(config) {
804
804
  transcription = await speechToText(audioBuffer, config);
805
805
  } catch (e) {
806
806
  console.error(`[telegram] STT error chat_id=${chatId}: ${e.message}`);
807
- await ctx.reply('Sorry, could not transcribe the voice message.').catch(() => {});
807
+ await ctx.reply(`Voice transcription failed: ${e.message}`).catch(() => {});
808
808
  return;
809
809
  }
810
810
 
@@ -3,6 +3,12 @@
3
3
  */
4
4
 
5
5
  import { createClient } from './provider.js';
6
+ import { exec } from 'child_process';
7
+ import { promisify } from 'util';
8
+ import { writeFile, readFile, unlink } from 'fs/promises';
9
+ import { tmpdir } from 'os';
10
+ import { join } from 'path';
11
+ const execAsync = promisify(exec);
6
12
 
7
13
  // System prompt for TTS summary generation.
8
14
  // fish.audio s1 emotion tags: (emotion) at the START of a sentence only — applies to the whole sentence.
@@ -97,10 +103,24 @@ export async function textToSpeech(text, config) {
97
103
  export async function speechToText(audioBuffer, config) {
98
104
  const { fishAudioApiKey } = config;
99
105
 
106
+ // Telegram voice messages are OGG/Opus — fish.audio ASR doesn't support Opus.
107
+ // Convert to WAV first via ffmpeg.
108
+ const id = `jarvis-stt-${Date.now()}`;
109
+ const inPath = join(tmpdir(), `${id}.ogg`);
110
+ const outPath = join(tmpdir(), `${id}.wav`);
111
+ let wavBuffer;
112
+ try {
113
+ await writeFile(inPath, audioBuffer);
114
+ await execAsync(`ffmpeg -y -i "${inPath}" -ar 16000 -ac 1 "${outPath}"`);
115
+ wavBuffer = await readFile(outPath);
116
+ } finally {
117
+ unlink(inPath).catch(() => {});
118
+ unlink(outPath).catch(() => {});
119
+ }
120
+
100
121
  const formData = new FormData();
101
- const blob = new Blob([audioBuffer], { type: 'audio/ogg' });
102
- formData.append('audio', blob, 'voice.ogg');
103
- formData.append('ignore_timestamps', 'true');
122
+ const blob = new Blob([wavBuffer], { type: 'audio/wav' });
123
+ formData.append('audio', blob, 'voice.wav');
104
124
 
105
125
  const response = await fetch('https://api.fish.audio/v1/asr', {
106
126
  method: 'POST',