ai-terminal-remote-bot 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -4
- package/src/i18n/locales/en.json +4 -1
- package/src/i18n/locales/es.json +4 -1
- package/src/server.mjs +6 -12
- package/src/modules/transcriber.mjs +0 -110
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ai-terminal-remote-bot",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.4",
|
|
4
4
|
"description": "Secure remote terminal bot with Claude Code integration",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -24,8 +24,6 @@
|
|
|
24
24
|
"node": ">=18.0.0"
|
|
25
25
|
},
|
|
26
26
|
"dependencies": {
|
|
27
|
-
"
|
|
28
|
-
"telegraf": "^4.16.3",
|
|
29
|
-
"wavefile": "^11.0.0"
|
|
27
|
+
"telegraf": "^4.16.3"
|
|
30
28
|
}
|
|
31
29
|
}
|
package/src/i18n/locales/en.json
CHANGED
|
@@ -52,6 +52,8 @@
|
|
|
52
52
|
"imageSaved": "🖼️ Image saved: `{{dest}}`",
|
|
53
53
|
"claudeImageWithCaption": "Look at this image I saved at {{dest}} and: {{caption}}",
|
|
54
54
|
"claudeImageAnalyze": "Look at the image I saved at {{dest}} and describe it. If it's a code or error screenshot, analyze it.",
|
|
55
|
+
"claudeAudioTranscribe": "Transcribe the audio file at {{dest}}. Return only the transcription text.",
|
|
56
|
+
"claudeAudioWithCaption": "Transcribe the audio file at {{dest}} and then respond to: {{caption}}",
|
|
55
57
|
"audioSaved": "🎤 Audio saved: `{{dest}}`\n\nTranscribing...",
|
|
56
58
|
"transcription": "🎤 *Transcription:*\n\n{{text}}",
|
|
57
59
|
"transcriptionFailed": "⚠️ Could not transcribe: {{error}}\nAudio saved as file.",
|
|
@@ -109,6 +111,7 @@
|
|
|
109
111
|
"truncated": "... (truncated)"
|
|
110
112
|
},
|
|
111
113
|
"transcriber": {
|
|
112
|
-
"ffmpegMissing": "ffmpeg is not installed. Install it with: sudo apt install ffmpeg"
|
|
114
|
+
"ffmpegMissing": "ffmpeg is not installed. Install it with: sudo apt install ffmpeg",
|
|
115
|
+
"unsupportedFormat": "Unsupported audio format: .{{ext}} (supported: ogg, opus, wav)"
|
|
113
116
|
}
|
|
114
117
|
}
|
package/src/i18n/locales/es.json
CHANGED
|
@@ -52,6 +52,8 @@
|
|
|
52
52
|
"imageSaved": "🖼️ Imagen guardada: `{{dest}}`",
|
|
53
53
|
"claudeImageWithCaption": "Mirá esta imagen que guardé en {{dest}} y: {{caption}}",
|
|
54
54
|
"claudeImageAnalyze": "Mirá la imagen que guardé en {{dest}} y describila. Si es un screenshot de código o error, analizalo.",
|
|
55
|
+
"claudeAudioTranscribe": "Transcribí el archivo de audio en {{dest}}. Devolvé solo el texto de la transcripción.",
|
|
56
|
+
"claudeAudioWithCaption": "Transcribí el archivo de audio en {{dest}} y luego respondé a: {{caption}}",
|
|
55
57
|
"audioSaved": "🎤 Audio guardado: `{{dest}}`\n\nTranscribiendo...",
|
|
56
58
|
"transcription": "🎤 *Transcripción:*\n\n{{text}}",
|
|
57
59
|
"transcriptionFailed": "⚠️ No se pudo transcribir: {{error}}\nAudio guardado como archivo.",
|
|
@@ -109,6 +111,7 @@
|
|
|
109
111
|
"truncated": "... (truncado)"
|
|
110
112
|
},
|
|
111
113
|
"transcriber": {
|
|
112
|
-
"ffmpegMissing": "ffmpeg no está instalado. Instalalo con: sudo apt install ffmpeg"
|
|
114
|
+
"ffmpegMissing": "ffmpeg no está instalado. Instalalo con: sudo apt install ffmpeg",
|
|
115
|
+
"unsupportedFormat": "Formato de audio no soportado: .{{ext}} (soportados: ogg, opus, wav)"
|
|
113
116
|
}
|
|
114
117
|
}
|
package/src/server.mjs
CHANGED
|
@@ -10,7 +10,7 @@ import { TelegramPlatform } from './platforms/telegram.mjs';
|
|
|
10
10
|
import { TerminalModule } from './modules/terminal.mjs';
|
|
11
11
|
import { ClaudeModule } from './modules/claude.mjs';
|
|
12
12
|
import { MediaModule } from './modules/media.mjs';
|
|
13
|
-
|
|
13
|
+
|
|
14
14
|
import { validateCommand, validatePath, validatePid, createRateLimiter } from './security.mjs';
|
|
15
15
|
import { chunkText } from './utils.mjs';
|
|
16
16
|
import { t } from './i18n/index.mjs';
|
|
@@ -45,7 +45,7 @@ export async function startServer(config) {
|
|
|
45
45
|
});
|
|
46
46
|
const claude = new ClaudeModule({ timeout: config.claudeTimeout });
|
|
47
47
|
const media = new MediaModule({ allowedPaths: config.allowedPaths });
|
|
48
|
-
|
|
48
|
+
|
|
49
49
|
const rateLimiter = createRateLimiter();
|
|
50
50
|
|
|
51
51
|
// === Session Management ===
|
|
@@ -444,16 +444,10 @@ export async function startServer(config) {
|
|
|
444
444
|
|
|
445
445
|
await mctx.reply(t('server.audioSaved', { dest }));
|
|
446
446
|
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
if (session.mode === 'claude') {
|
|
452
|
-
await handleClaude(mctx, transcript, session);
|
|
453
|
-
}
|
|
454
|
-
} catch (txErr) {
|
|
455
|
-
await mctx.reply(t('server.transcriptionFailed', { error: txErr.message }));
|
|
456
|
-
}
|
|
447
|
+
const prompt = mctx.caption
|
|
448
|
+
? t('server.claudeAudioWithCaption', { dest, caption: mctx.caption })
|
|
449
|
+
: t('server.claudeAudioTranscribe', { dest });
|
|
450
|
+
await handleClaude(mctx, prompt, session);
|
|
457
451
|
} catch (e) {
|
|
458
452
|
mctx.reply(`❌ ${e.message}`);
|
|
459
453
|
}
|
|
@@ -1,110 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Audio transcription module using @huggingface/transformers (Whisper ONNX).
|
|
3
|
-
* Lazy-loads the pipeline on first use. Model cached in ~/.cache/huggingface/.
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
import { execFile } from 'child_process';
|
|
7
|
-
import { promisify } from 'util';
|
|
8
|
-
import fs from 'fs';
|
|
9
|
-
import path from 'path';
|
|
10
|
-
import os from 'os';
|
|
11
|
-
import { t, getLang } from '../i18n/index.mjs';
|
|
12
|
-
|
|
13
|
-
const execFileAsync = promisify(execFile);
|
|
14
|
-
|
|
15
|
-
export class TranscriberModule {
|
|
16
|
-
constructor() {
|
|
17
|
-
this._pipeline = null;
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
/**
|
|
21
|
-
* Lazy-load the whisper pipeline (downloads ~40MB model on first call).
|
|
22
|
-
*/
|
|
23
|
-
async _getPipeline() {
|
|
24
|
-
if (this._pipeline) return this._pipeline;
|
|
25
|
-
|
|
26
|
-
const { pipeline } = await import('@huggingface/transformers');
|
|
27
|
-
this._pipeline = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny', {
|
|
28
|
-
dtype: 'fp32',
|
|
29
|
-
});
|
|
30
|
-
return this._pipeline;
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
/**
|
|
34
|
-
* Convert audio file to 16kHz mono WAV using ffmpeg.
|
|
35
|
-
* @param {string} inputPath - Path to input audio file
|
|
36
|
-
* @returns {Promise<string>} Path to converted WAV file
|
|
37
|
-
*/
|
|
38
|
-
async _convertToWav(inputPath) {
|
|
39
|
-
const wavPath = path.join(os.tmpdir(), `transcribe_${Date.now()}.wav`);
|
|
40
|
-
|
|
41
|
-
await execFileAsync('ffmpeg', [
|
|
42
|
-
'-i', inputPath,
|
|
43
|
-
'-ar', '16000',
|
|
44
|
-
'-ac', '1',
|
|
45
|
-
'-f', 'wav',
|
|
46
|
-
'-y',
|
|
47
|
-
wavPath,
|
|
48
|
-
], { timeout: 30000 });
|
|
49
|
-
|
|
50
|
-
return wavPath;
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
/**
|
|
54
|
-
* Parse WAV file into Float32Array of audio samples.
|
|
55
|
-
* @param {string} wavPath - Path to 16kHz mono WAV file
|
|
56
|
-
* @returns {Promise<Float32Array>}
|
|
57
|
-
*/
|
|
58
|
-
async _parseWav(wavPath) {
|
|
59
|
-
const { default: WaveFile } = await import('wavefile');
|
|
60
|
-
const wavBuffer = fs.readFileSync(wavPath);
|
|
61
|
-
const wav = new WaveFile(wavBuffer);
|
|
62
|
-
wav.toBitDepth('32f');
|
|
63
|
-
wav.toSampleRate(16000);
|
|
64
|
-
|
|
65
|
-
const samples = wav.getSamples();
|
|
66
|
-
// getSamples returns Float64Array for 32f, convert to Float32
|
|
67
|
-
if (samples instanceof Float64Array || samples instanceof Float32Array) {
|
|
68
|
-
return new Float32Array(samples);
|
|
69
|
-
}
|
|
70
|
-
// Interleaved multi-channel: take first channel
|
|
71
|
-
if (Array.isArray(samples)) {
|
|
72
|
-
return new Float32Array(samples[0]);
|
|
73
|
-
}
|
|
74
|
-
return new Float32Array(samples);
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
/**
|
|
78
|
-
* Transcribe an audio file to text.
|
|
79
|
-
* @param {string} filePath - Path to audio file (OGG, MP3, WAV, etc.)
|
|
80
|
-
* @returns {Promise<string>} Transcribed text
|
|
81
|
-
*/
|
|
82
|
-
async transcribe(filePath) {
|
|
83
|
-
// Check ffmpeg availability
|
|
84
|
-
try {
|
|
85
|
-
await execFileAsync('which', ['ffmpeg']);
|
|
86
|
-
} catch {
|
|
87
|
-
throw new Error(t('transcriber.ffmpegMissing'));
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
// Convert to 16kHz WAV
|
|
91
|
-
const wavPath = await this._convertToWav(filePath);
|
|
92
|
-
|
|
93
|
-
try {
|
|
94
|
-
// Parse WAV to Float32Array
|
|
95
|
-
const audioData = await this._parseWav(wavPath);
|
|
96
|
-
|
|
97
|
-
// Run whisper pipeline
|
|
98
|
-
const pipe = await this._getPipeline();
|
|
99
|
-
const result = await pipe(audioData, {
|
|
100
|
-
language: getLang(),
|
|
101
|
-
task: 'transcribe',
|
|
102
|
-
});
|
|
103
|
-
|
|
104
|
-
return result.text.trim();
|
|
105
|
-
} finally {
|
|
106
|
-
// Clean up temp WAV
|
|
107
|
-
try { fs.unlinkSync(wavPath); } catch {}
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
}
|