npm - ai-terminal-remote-bot - Versions diffs - 1.0.2 → 1.0.3 - Mend

ai-terminal-remote-bot 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +2 -1
package/src/i18n/locales/en.json +2 -1
package/src/i18n/locales/es.json +2 -1
package/src/modules/transcriber.mjs +62 -51

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ai-terminal-remote-bot",
-  "version": "1.0.2",
+  "version": "1.0.3",
   "description": "Secure remote terminal bot with Claude Code integration",
   "type": "module",
   "bin": {
@@ -25,6 +25,7 @@
   },
   "dependencies": {
     "@huggingface/transformers": "^3.4.1",
+    "ogg-opus-decoder": "^1.7.3",
     "telegraf": "^4.16.3",
     "wavefile": "^11.0.0"
   }

package/src/i18n/locales/en.json CHANGED Viewed

@@ -109,6 +109,7 @@
     "truncated": "... (truncated)"
   },
   "transcriber": {
-    "ffmpegMissing": "ffmpeg is not installed. Install it with: sudo apt install ffmpeg"
+    "ffmpegMissing": "ffmpeg is not installed. Install it with: sudo apt install ffmpeg",
+    "unsupportedFormat": "Unsupported audio format: .{{ext}} (supported: ogg, opus, wav)"
   }
 }

package/src/i18n/locales/es.json CHANGED Viewed

@@ -109,6 +109,7 @@
     "truncated": "... (truncado)"
   },
   "transcriber": {
-    "ffmpegMissing": "ffmpeg no está instalado. Instalalo con: sudo apt install ffmpeg"
+    "ffmpegMissing": "ffmpeg no está instalado. Instalalo con: sudo apt install ffmpeg",
+    "unsupportedFormat": "Formato de audio no soportado: .{{ext}} (soportados: ogg, opus, wav)"
   }
 }

package/src/modules/transcriber.mjs CHANGED Viewed

@@ -1,17 +1,12 @@
 /**
  * Audio transcription module using @huggingface/transformers (Whisper ONNX).
  * Lazy-loads the pipeline on first use. Model cached in ~/.cache/huggingface/.
+ * Uses ogg-opus-decoder (WASM) instead of ffmpeg for OGG→PCM conversion.
  */
-import { execFile } from 'child_process';
-import { promisify } from 'util';
 import fs from 'fs';
-import path from 'path';
-import os from 'os';
 import { t, getLang } from '../i18n/index.mjs';
-const execFileAsync = promisify(execFile);
 export class TranscriberModule {
   constructor() {
     this._pipeline = null;
@@ -31,80 +26,96 @@ export class TranscriberModule {
   }
   /**
-   * Convert audio file to 16kHz mono WAV using ffmpeg.
-   * @param {string} inputPath - Path to input audio file
-   * @returns {Promise<string>} Path to converted WAV file
+   * Decode OGG Opus file to Float32Array (16kHz mono) using WASM decoder.
+   * @param {string} filePath - Path to OGG Opus file
+   * @returns {Promise<Float32Array>}
    */
-  async _convertToWav(inputPath) {
-    const wavPath = path.join(os.tmpdir(), `transcribe_${Date.now()}.wav`);
-    await execFileAsync('ffmpeg', [
-      '-i', inputPath,
-      '-ar', '16000',
-      '-ac', '1',
-      '-f', 'wav',
-      '-y',
-      wavPath,
-    ], { timeout: 30000 });
-    return wavPath;
+  async _decodeOgg(filePath) {
+    const { OggOpusDecoder } = await import('ogg-opus-decoder');
+    const decoder = new OggOpusDecoder();
+    await decoder.ready;
+    const fileBuffer = fs.readFileSync(filePath);
+    const { channelData, sampleRate } = await decoder.decode(new Uint8Array(fileBuffer));
+    decoder.free();
+    // Take first channel (mono)
+    let samples = channelData[0];
+    // Resample to 16kHz if needed
+    if (sampleRate !== 16000) {
+      samples = this._resample(samples, sampleRate, 16000);
+    }
+    return samples;
   }
   /**
-   * Parse WAV file into Float32Array of audio samples.
-   * @param {string} wavPath - Path to 16kHz mono WAV file
+   * Decode WAV file to Float32Array (16kHz mono).
+   * @param {string} filePath - Path to WAV file
    * @returns {Promise<Float32Array>}
    */
-  async _parseWav(wavPath) {
+  async _decodeWav(filePath) {
     const { default: WaveFile } = await import('wavefile');
-    const wavBuffer = fs.readFileSync(wavPath);
+    const wavBuffer = fs.readFileSync(filePath);
     const wav = new WaveFile(wavBuffer);
     wav.toBitDepth('32f');
     wav.toSampleRate(16000);
     const samples = wav.getSamples();
-    // getSamples returns Float64Array for 32f, convert to Float32
     if (samples instanceof Float64Array || samples instanceof Float32Array) {
       return new Float32Array(samples);
     }
-    // Interleaved multi-channel: take first channel
     if (Array.isArray(samples)) {
       return new Float32Array(samples[0]);
     }
     return new Float32Array(samples);
   }
+  /**
+   * Simple linear resampling.
+   * @param {Float32Array} samples
+   * @param {number} fromRate
+   * @param {number} toRate
+   * @returns {Float32Array}
+   */
+  _resample(samples, fromRate, toRate) {
+    const ratio = fromRate / toRate;
+    const newLength = Math.round(samples.length / ratio);
+    const result = new Float32Array(newLength);
+    for (let i = 0; i < newLength; i++) {
+      const srcIdx = i * ratio;
+      const low = Math.floor(srcIdx);
+      const high = Math.min(low + 1, samples.length - 1);
+      const frac = srcIdx - low;
+      result[i] = samples[low] * (1 - frac) + samples[high] * frac;
+    }
+    return result;
+  }
   /**
    * Transcribe an audio file to text.
    * @param {string} filePath - Path to audio file (OGG, MP3, WAV, etc.)
    * @returns {Promise<string>} Transcribed text
    */
   async transcribe(filePath) {
-    // Check ffmpeg availability
-    try {
-      await execFileAsync('which', ['ffmpeg']);
-    } catch {
-      throw new Error(t('transcriber.ffmpegMissing'));
-    }
+    const ext = filePath.split('.').pop().toLowerCase();
+    let audioData;
-    // Convert to 16kHz WAV
-    const wavPath = await this._convertToWav(filePath);
-    try {
-      // Parse WAV to Float32Array
-      const audioData = await this._parseWav(wavPath);
+    if (ext === 'ogg' || ext === 'opus' || ext === 'oga') {
+      audioData = await this._decodeOgg(filePath);
+    } else if (ext === 'wav') {
+      audioData = await this._decodeWav(filePath);
+    } else {
+      throw new Error(t('transcriber.unsupportedFormat', { ext }));
+    }
-      // Run whisper pipeline
-      const pipe = await this._getPipeline();
-      const result = await pipe(audioData, {
-        language: getLang(),
-        task: 'transcribe',
-      });
+    const pipe = await this._getPipeline();
+    const result = await pipe(audioData, {
+      language: getLang(),
+      task: 'transcribe',
+    });
-      return result.text.trim();
-    } finally {
-      // Clean up temp WAV
-      try { fs.unlinkSync(wavPath); } catch {}
-    }
+    return result.text.trim();
   }
 }