ai-terminal-remote-bot 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ai-terminal-remote-bot",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.3",
|
|
4
4
|
"description": "Secure remote terminal bot with Claude Code integration",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -25,6 +25,7 @@
|
|
|
25
25
|
},
|
|
26
26
|
"dependencies": {
|
|
27
27
|
"@huggingface/transformers": "^3.4.1",
|
|
28
|
+
"ogg-opus-decoder": "^1.7.3",
|
|
28
29
|
"telegraf": "^4.16.3",
|
|
29
30
|
"wavefile": "^11.0.0"
|
|
30
31
|
}
|
package/src/i18n/locales/en.json
CHANGED
|
@@ -109,6 +109,7 @@
|
|
|
109
109
|
"truncated": "... (truncated)"
|
|
110
110
|
},
|
|
111
111
|
"transcriber": {
|
|
112
|
-
"ffmpegMissing": "ffmpeg is not installed. Install it with: sudo apt install ffmpeg"
|
|
112
|
+
"ffmpegMissing": "ffmpeg is not installed. Install it with: sudo apt install ffmpeg",
|
|
113
|
+
"unsupportedFormat": "Unsupported audio format: .{{ext}} (supported: ogg, opus, wav)"
|
|
113
114
|
}
|
|
114
115
|
}
|
package/src/i18n/locales/es.json
CHANGED
|
@@ -109,6 +109,7 @@
|
|
|
109
109
|
"truncated": "... (truncado)"
|
|
110
110
|
},
|
|
111
111
|
"transcriber": {
|
|
112
|
-
"ffmpegMissing": "ffmpeg no está instalado. Instalalo con: sudo apt install ffmpeg"
|
|
112
|
+
"ffmpegMissing": "ffmpeg no está instalado. Instalalo con: sudo apt install ffmpeg",
|
|
113
|
+
"unsupportedFormat": "Formato de audio no soportado: .{{ext}} (soportados: ogg, opus, wav)"
|
|
113
114
|
}
|
|
114
115
|
}
|
|
@@ -1,17 +1,12 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Audio transcription module using @huggingface/transformers (Whisper ONNX).
|
|
3
3
|
* Lazy-loads the pipeline on first use. Model cached in ~/.cache/huggingface/.
|
|
4
|
+
* Uses ogg-opus-decoder (WASM) instead of ffmpeg for OGG→PCM conversion.
|
|
4
5
|
*/
|
|
5
6
|
|
|
6
|
-
import { execFile } from 'child_process';
|
|
7
|
-
import { promisify } from 'util';
|
|
8
7
|
import fs from 'fs';
|
|
9
|
-
import path from 'path';
|
|
10
|
-
import os from 'os';
|
|
11
8
|
import { t, getLang } from '../i18n/index.mjs';
|
|
12
9
|
|
|
13
|
-
const execFileAsync = promisify(execFile);
|
|
14
|
-
|
|
15
10
|
export class TranscriberModule {
|
|
16
11
|
constructor() {
|
|
17
12
|
this._pipeline = null;
|
|
@@ -31,80 +26,96 @@ export class TranscriberModule {
|
|
|
31
26
|
}
|
|
32
27
|
|
|
33
28
|
/**
|
|
34
|
-
*
|
|
35
|
-
* @param {string}
|
|
36
|
-
* @returns {Promise<
|
|
29
|
+
* Decode OGG Opus file to Float32Array (16kHz mono) using WASM decoder.
|
|
30
|
+
* @param {string} filePath - Path to OGG Opus file
|
|
31
|
+
* @returns {Promise<Float32Array>}
|
|
37
32
|
*/
|
|
38
|
-
async
|
|
39
|
-
const
|
|
40
|
-
|
|
41
|
-
await
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
33
|
+
async _decodeOgg(filePath) {
|
|
34
|
+
const { OggOpusDecoder } = await import('ogg-opus-decoder');
|
|
35
|
+
const decoder = new OggOpusDecoder();
|
|
36
|
+
await decoder.ready;
|
|
37
|
+
|
|
38
|
+
const fileBuffer = fs.readFileSync(filePath);
|
|
39
|
+
const { channelData, sampleRate } = await decoder.decode(new Uint8Array(fileBuffer));
|
|
40
|
+
decoder.free();
|
|
41
|
+
|
|
42
|
+
// Take first channel (mono)
|
|
43
|
+
let samples = channelData[0];
|
|
44
|
+
|
|
45
|
+
// Resample to 16kHz if needed
|
|
46
|
+
if (sampleRate !== 16000) {
|
|
47
|
+
samples = this._resample(samples, sampleRate, 16000);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return samples;
|
|
51
51
|
}
|
|
52
52
|
|
|
53
53
|
/**
|
|
54
|
-
*
|
|
55
|
-
* @param {string}
|
|
54
|
+
* Decode WAV file to Float32Array (16kHz mono).
|
|
55
|
+
* @param {string} filePath - Path to WAV file
|
|
56
56
|
* @returns {Promise<Float32Array>}
|
|
57
57
|
*/
|
|
58
|
-
async
|
|
58
|
+
async _decodeWav(filePath) {
|
|
59
59
|
const { default: WaveFile } = await import('wavefile');
|
|
60
|
-
const wavBuffer = fs.readFileSync(
|
|
60
|
+
const wavBuffer = fs.readFileSync(filePath);
|
|
61
61
|
const wav = new WaveFile(wavBuffer);
|
|
62
62
|
wav.toBitDepth('32f');
|
|
63
63
|
wav.toSampleRate(16000);
|
|
64
64
|
|
|
65
65
|
const samples = wav.getSamples();
|
|
66
|
-
// getSamples returns Float64Array for 32f, convert to Float32
|
|
67
66
|
if (samples instanceof Float64Array || samples instanceof Float32Array) {
|
|
68
67
|
return new Float32Array(samples);
|
|
69
68
|
}
|
|
70
|
-
// Interleaved multi-channel: take first channel
|
|
71
69
|
if (Array.isArray(samples)) {
|
|
72
70
|
return new Float32Array(samples[0]);
|
|
73
71
|
}
|
|
74
72
|
return new Float32Array(samples);
|
|
75
73
|
}
|
|
76
74
|
|
|
75
|
+
/**
|
|
76
|
+
* Simple linear resampling.
|
|
77
|
+
* @param {Float32Array} samples
|
|
78
|
+
* @param {number} fromRate
|
|
79
|
+
* @param {number} toRate
|
|
80
|
+
* @returns {Float32Array}
|
|
81
|
+
*/
|
|
82
|
+
_resample(samples, fromRate, toRate) {
|
|
83
|
+
const ratio = fromRate / toRate;
|
|
84
|
+
const newLength = Math.round(samples.length / ratio);
|
|
85
|
+
const result = new Float32Array(newLength);
|
|
86
|
+
for (let i = 0; i < newLength; i++) {
|
|
87
|
+
const srcIdx = i * ratio;
|
|
88
|
+
const low = Math.floor(srcIdx);
|
|
89
|
+
const high = Math.min(low + 1, samples.length - 1);
|
|
90
|
+
const frac = srcIdx - low;
|
|
91
|
+
result[i] = samples[low] * (1 - frac) + samples[high] * frac;
|
|
92
|
+
}
|
|
93
|
+
return result;
|
|
94
|
+
}
|
|
95
|
+
|
|
77
96
|
/**
|
|
78
97
|
* Transcribe an audio file to text.
|
|
79
98
|
* @param {string} filePath - Path to audio file (OGG, MP3, WAV, etc.)
|
|
80
99
|
* @returns {Promise<string>} Transcribed text
|
|
81
100
|
*/
|
|
82
101
|
async transcribe(filePath) {
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
await execFileAsync('which', ['ffmpeg']);
|
|
86
|
-
} catch {
|
|
87
|
-
throw new Error(t('transcriber.ffmpegMissing'));
|
|
88
|
-
}
|
|
102
|
+
const ext = filePath.split('.').pop().toLowerCase();
|
|
103
|
+
let audioData;
|
|
89
104
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
105
|
+
if (ext === 'ogg' || ext === 'opus' || ext === 'oga') {
|
|
106
|
+
audioData = await this._decodeOgg(filePath);
|
|
107
|
+
} else if (ext === 'wav') {
|
|
108
|
+
audioData = await this._decodeWav(filePath);
|
|
109
|
+
} else {
|
|
110
|
+
throw new Error(t('transcriber.unsupportedFormat', { ext }));
|
|
111
|
+
}
|
|
96
112
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
});
|
|
113
|
+
const pipe = await this._getPipeline();
|
|
114
|
+
const result = await pipe(audioData, {
|
|
115
|
+
language: getLang(),
|
|
116
|
+
task: 'transcribe',
|
|
117
|
+
});
|
|
103
118
|
|
|
104
|
-
|
|
105
|
-
} finally {
|
|
106
|
-
// Clean up temp WAV
|
|
107
|
-
try { fs.unlinkSync(wavPath); } catch {}
|
|
108
|
-
}
|
|
119
|
+
return result.text.trim();
|
|
109
120
|
}
|
|
110
121
|
}
|