ai-terminal-remote-bot 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-terminal-remote-bot",
3
- "version": "1.0.2",
3
+ "version": "1.0.3",
4
4
  "description": "Secure remote terminal bot with Claude Code integration",
5
5
  "type": "module",
6
6
  "bin": {
@@ -25,6 +25,7 @@
25
25
  },
26
26
  "dependencies": {
27
27
  "@huggingface/transformers": "^3.4.1",
28
+ "ogg-opus-decoder": "^1.7.3",
28
29
  "telegraf": "^4.16.3",
29
30
  "wavefile": "^11.0.0"
30
31
  }
@@ -109,6 +109,7 @@
109
109
  "truncated": "... (truncated)"
110
110
  },
111
111
  "transcriber": {
112
- "ffmpegMissing": "ffmpeg is not installed. Install it with: sudo apt install ffmpeg"
112
+ "ffmpegMissing": "ffmpeg is not installed. Install it with: sudo apt install ffmpeg",
113
+ "unsupportedFormat": "Unsupported audio format: .{{ext}} (supported: ogg, opus, wav)"
113
114
  }
114
115
  }
@@ -109,6 +109,7 @@
109
109
  "truncated": "... (truncado)"
110
110
  },
111
111
  "transcriber": {
112
- "ffmpegMissing": "ffmpeg no está instalado. Instalalo con: sudo apt install ffmpeg"
112
+ "ffmpegMissing": "ffmpeg no está instalado. Instalalo con: sudo apt install ffmpeg",
113
+ "unsupportedFormat": "Formato de audio no soportado: .{{ext}} (soportados: ogg, opus, wav)"
113
114
  }
114
115
  }
@@ -1,17 +1,12 @@
1
1
  /**
2
2
  * Audio transcription module using @huggingface/transformers (Whisper ONNX).
3
3
  * Lazy-loads the pipeline on first use. Model cached in ~/.cache/huggingface/.
4
+ * Uses ogg-opus-decoder (WASM) instead of ffmpeg for OGG→PCM conversion.
4
5
  */
5
6
 
6
- import { execFile } from 'child_process';
7
- import { promisify } from 'util';
8
7
  import fs from 'fs';
9
- import path from 'path';
10
- import os from 'os';
11
8
  import { t, getLang } from '../i18n/index.mjs';
12
9
 
13
- const execFileAsync = promisify(execFile);
14
-
15
10
  export class TranscriberModule {
16
11
  constructor() {
17
12
  this._pipeline = null;
@@ -31,80 +26,96 @@ export class TranscriberModule {
31
26
  }
32
27
 
33
28
  /**
34
- * Convert audio file to 16kHz mono WAV using ffmpeg.
35
- * @param {string} inputPath - Path to input audio file
36
- * @returns {Promise<string>} Path to converted WAV file
29
+ * Decode OGG Opus file to Float32Array (16kHz mono) using WASM decoder.
30
+ * @param {string} filePath - Path to OGG Opus file
31
+ * @returns {Promise<Float32Array>}
37
32
  */
38
- async _convertToWav(inputPath) {
39
- const wavPath = path.join(os.tmpdir(), `transcribe_${Date.now()}.wav`);
40
-
41
- await execFileAsync('ffmpeg', [
42
- '-i', inputPath,
43
- '-ar', '16000',
44
- '-ac', '1',
45
- '-f', 'wav',
46
- '-y',
47
- wavPath,
48
- ], { timeout: 30000 });
49
-
50
- return wavPath;
33
+ async _decodeOgg(filePath) {
34
+ const { OggOpusDecoder } = await import('ogg-opus-decoder');
35
+ const decoder = new OggOpusDecoder();
36
+ await decoder.ready;
37
+
38
+ const fileBuffer = fs.readFileSync(filePath);
39
+ const { channelData, sampleRate } = await decoder.decode(new Uint8Array(fileBuffer));
40
+ decoder.free();
41
+
42
+ // Take first channel (mono)
43
+ let samples = channelData[0];
44
+
45
+ // Resample to 16kHz if needed
46
+ if (sampleRate !== 16000) {
47
+ samples = this._resample(samples, sampleRate, 16000);
48
+ }
49
+
50
+ return samples;
51
51
  }
52
52
 
53
53
  /**
54
- * Parse WAV file into Float32Array of audio samples.
55
- * @param {string} wavPath - Path to 16kHz mono WAV file
54
+ * Decode WAV file to Float32Array (16kHz mono).
55
+ * @param {string} filePath - Path to WAV file
56
56
  * @returns {Promise<Float32Array>}
57
57
  */
58
- async _parseWav(wavPath) {
58
+ async _decodeWav(filePath) {
59
59
  const { default: WaveFile } = await import('wavefile');
60
- const wavBuffer = fs.readFileSync(wavPath);
60
+ const wavBuffer = fs.readFileSync(filePath);
61
61
  const wav = new WaveFile(wavBuffer);
62
62
  wav.toBitDepth('32f');
63
63
  wav.toSampleRate(16000);
64
64
 
65
65
  const samples = wav.getSamples();
66
- // getSamples returns Float64Array for 32f, convert to Float32
67
66
  if (samples instanceof Float64Array || samples instanceof Float32Array) {
68
67
  return new Float32Array(samples);
69
68
  }
70
- // Interleaved multi-channel: take first channel
71
69
  if (Array.isArray(samples)) {
72
70
  return new Float32Array(samples[0]);
73
71
  }
74
72
  return new Float32Array(samples);
75
73
  }
76
74
 
75
+ /**
76
+ * Simple linear resampling.
77
+ * @param {Float32Array} samples
78
+ * @param {number} fromRate
79
+ * @param {number} toRate
80
+ * @returns {Float32Array}
81
+ */
82
+ _resample(samples, fromRate, toRate) {
83
+ const ratio = fromRate / toRate;
84
+ const newLength = Math.round(samples.length / ratio);
85
+ const result = new Float32Array(newLength);
86
+ for (let i = 0; i < newLength; i++) {
87
+ const srcIdx = i * ratio;
88
+ const low = Math.floor(srcIdx);
89
+ const high = Math.min(low + 1, samples.length - 1);
90
+ const frac = srcIdx - low;
91
+ result[i] = samples[low] * (1 - frac) + samples[high] * frac;
92
+ }
93
+ return result;
94
+ }
95
+
77
96
  /**
78
97
  * Transcribe an audio file to text.
79
98
  * @param {string} filePath - Path to audio file (OGG, MP3, WAV, etc.)
80
99
  * @returns {Promise<string>} Transcribed text
81
100
  */
82
101
  async transcribe(filePath) {
83
- // Check ffmpeg availability
84
- try {
85
- await execFileAsync('which', ['ffmpeg']);
86
- } catch {
87
- throw new Error(t('transcriber.ffmpegMissing'));
88
- }
102
+ const ext = filePath.split('.').pop().toLowerCase();
103
+ let audioData;
89
104
 
90
- // Convert to 16kHz WAV
91
- const wavPath = await this._convertToWav(filePath);
92
-
93
- try {
94
- // Parse WAV to Float32Array
95
- const audioData = await this._parseWav(wavPath);
105
+ if (ext === 'ogg' || ext === 'opus' || ext === 'oga') {
106
+ audioData = await this._decodeOgg(filePath);
107
+ } else if (ext === 'wav') {
108
+ audioData = await this._decodeWav(filePath);
109
+ } else {
110
+ throw new Error(t('transcriber.unsupportedFormat', { ext }));
111
+ }
96
112
 
97
- // Run whisper pipeline
98
- const pipe = await this._getPipeline();
99
- const result = await pipe(audioData, {
100
- language: getLang(),
101
- task: 'transcribe',
102
- });
113
+ const pipe = await this._getPipeline();
114
+ const result = await pipe(audioData, {
115
+ language: getLang(),
116
+ task: 'transcribe',
117
+ });
103
118
 
104
- return result.text.trim();
105
- } finally {
106
- // Clean up temp WAV
107
- try { fs.unlinkSync(wavPath); } catch {}
108
- }
119
+ return result.text.trim();
109
120
  }
110
121
  }