ai-terminal-remote-bot 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-terminal-remote-bot",
3
- "version": "1.0.2",
3
+ "version": "1.0.4",
4
4
  "description": "Secure remote terminal bot with Claude Code integration",
5
5
  "type": "module",
6
6
  "bin": {
@@ -24,8 +24,6 @@
24
24
  "node": ">=18.0.0"
25
25
  },
26
26
  "dependencies": {
27
- "@huggingface/transformers": "^3.4.1",
28
- "telegraf": "^4.16.3",
29
- "wavefile": "^11.0.0"
27
+ "telegraf": "^4.16.3"
30
28
  }
31
29
  }
@@ -52,6 +52,8 @@
52
52
  "imageSaved": "🖼️ Image saved: `{{dest}}`",
53
53
  "claudeImageWithCaption": "Look at this image I saved at {{dest}} and: {{caption}}",
54
54
  "claudeImageAnalyze": "Look at the image I saved at {{dest}} and describe it. If it's a code or error screenshot, analyze it.",
55
+ "claudeAudioTranscribe": "Transcribe the audio file at {{dest}}. Return only the transcription text.",
56
+ "claudeAudioWithCaption": "Transcribe the audio file at {{dest}} and then respond to: {{caption}}",
55
57
  "audioSaved": "🎤 Audio saved: `{{dest}}`\n\nTranscribing...",
56
58
  "transcription": "🎤 *Transcription:*\n\n{{text}}",
57
59
  "transcriptionFailed": "⚠️ Could not transcribe: {{error}}\nAudio saved as file.",
@@ -109,6 +111,7 @@
109
111
  "truncated": "... (truncated)"
110
112
  },
111
113
  "transcriber": {
112
- "ffmpegMissing": "ffmpeg is not installed. Install it with: sudo apt install ffmpeg"
114
+ "ffmpegMissing": "ffmpeg is not installed. Install it with: sudo apt install ffmpeg",
115
+ "unsupportedFormat": "Unsupported audio format: .{{ext}} (supported: ogg, opus, wav)"
113
116
  }
114
117
  }
@@ -52,6 +52,8 @@
52
52
  "imageSaved": "🖼️ Imagen guardada: `{{dest}}`",
53
53
  "claudeImageWithCaption": "Mirá esta imagen que guardé en {{dest}} y: {{caption}}",
54
54
  "claudeImageAnalyze": "Mirá la imagen que guardé en {{dest}} y describila. Si es un screenshot de código o error, analizalo.",
55
+ "claudeAudioTranscribe": "Transcribí el archivo de audio en {{dest}}. Devolvé solo el texto de la transcripción.",
56
+ "claudeAudioWithCaption": "Transcribí el archivo de audio en {{dest}} y luego respondé a: {{caption}}",
55
57
  "audioSaved": "🎤 Audio guardado: `{{dest}}`\n\nTranscribiendo...",
56
58
  "transcription": "🎤 *Transcripción:*\n\n{{text}}",
57
59
  "transcriptionFailed": "⚠️ No se pudo transcribir: {{error}}\nAudio guardado como archivo.",
@@ -109,6 +111,7 @@
109
111
  "truncated": "... (truncado)"
110
112
  },
111
113
  "transcriber": {
112
- "ffmpegMissing": "ffmpeg no está instalado. Instalalo con: sudo apt install ffmpeg"
114
+ "ffmpegMissing": "ffmpeg no está instalado. Instalalo con: sudo apt install ffmpeg",
115
+ "unsupportedFormat": "Formato de audio no soportado: .{{ext}} (soportados: ogg, opus, wav)"
113
116
  }
114
117
  }
package/src/server.mjs CHANGED
@@ -10,7 +10,7 @@ import { TelegramPlatform } from './platforms/telegram.mjs';
10
10
  import { TerminalModule } from './modules/terminal.mjs';
11
11
  import { ClaudeModule } from './modules/claude.mjs';
12
12
  import { MediaModule } from './modules/media.mjs';
13
- import { TranscriberModule } from './modules/transcriber.mjs';
13
+
14
14
  import { validateCommand, validatePath, validatePid, createRateLimiter } from './security.mjs';
15
15
  import { chunkText } from './utils.mjs';
16
16
  import { t } from './i18n/index.mjs';
@@ -45,7 +45,7 @@ export async function startServer(config) {
45
45
  });
46
46
  const claude = new ClaudeModule({ timeout: config.claudeTimeout });
47
47
  const media = new MediaModule({ allowedPaths: config.allowedPaths });
48
- const transcriber = new TranscriberModule();
48
+
49
49
  const rateLimiter = createRateLimiter();
50
50
 
51
51
  // === Session Management ===
@@ -444,16 +444,10 @@ export async function startServer(config) {
444
444
 
445
445
  await mctx.reply(t('server.audioSaved', { dest }));
446
446
 
447
- try {
448
- const transcript = await transcriber.transcribe(dest);
449
- await platform.sendMessage(mctx.chatId, t('server.transcription', { text: transcript }));
450
-
451
- if (session.mode === 'claude') {
452
- await handleClaude(mctx, transcript, session);
453
- }
454
- } catch (txErr) {
455
- await mctx.reply(t('server.transcriptionFailed', { error: txErr.message }));
456
- }
447
+ const prompt = mctx.caption
448
+ ? t('server.claudeAudioWithCaption', { dest, caption: mctx.caption })
449
+ : t('server.claudeAudioTranscribe', { dest });
450
+ await handleClaude(mctx, prompt, session);
457
451
  } catch (e) {
458
452
  mctx.reply(`❌ ${e.message}`);
459
453
  }
@@ -1,110 +0,0 @@
1
- /**
2
- * Audio transcription module using @huggingface/transformers (Whisper ONNX).
3
- * Lazy-loads the pipeline on first use. Model cached in ~/.cache/huggingface/.
4
- */
5
-
6
- import { execFile } from 'child_process';
7
- import { promisify } from 'util';
8
- import fs from 'fs';
9
- import path from 'path';
10
- import os from 'os';
11
- import { t, getLang } from '../i18n/index.mjs';
12
-
13
- const execFileAsync = promisify(execFile);
14
-
15
- export class TranscriberModule {
16
- constructor() {
17
- this._pipeline = null;
18
- }
19
-
20
- /**
21
- * Lazy-load the whisper pipeline (downloads ~40MB model on first call).
22
- */
23
- async _getPipeline() {
24
- if (this._pipeline) return this._pipeline;
25
-
26
- const { pipeline } = await import('@huggingface/transformers');
27
- this._pipeline = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny', {
28
- dtype: 'fp32',
29
- });
30
- return this._pipeline;
31
- }
32
-
33
- /**
34
- * Convert audio file to 16kHz mono WAV using ffmpeg.
35
- * @param {string} inputPath - Path to input audio file
36
- * @returns {Promise<string>} Path to converted WAV file
37
- */
38
- async _convertToWav(inputPath) {
39
- const wavPath = path.join(os.tmpdir(), `transcribe_${Date.now()}.wav`);
40
-
41
- await execFileAsync('ffmpeg', [
42
- '-i', inputPath,
43
- '-ar', '16000',
44
- '-ac', '1',
45
- '-f', 'wav',
46
- '-y',
47
- wavPath,
48
- ], { timeout: 30000 });
49
-
50
- return wavPath;
51
- }
52
-
53
- /**
54
- * Parse WAV file into Float32Array of audio samples.
55
- * @param {string} wavPath - Path to 16kHz mono WAV file
56
- * @returns {Promise<Float32Array>}
57
- */
58
- async _parseWav(wavPath) {
59
- const { default: WaveFile } = await import('wavefile');
60
- const wavBuffer = fs.readFileSync(wavPath);
61
- const wav = new WaveFile(wavBuffer);
62
- wav.toBitDepth('32f');
63
- wav.toSampleRate(16000);
64
-
65
- const samples = wav.getSamples();
66
- // getSamples returns Float64Array for 32f, convert to Float32
67
- if (samples instanceof Float64Array || samples instanceof Float32Array) {
68
- return new Float32Array(samples);
69
- }
70
- // Interleaved multi-channel: take first channel
71
- if (Array.isArray(samples)) {
72
- return new Float32Array(samples[0]);
73
- }
74
- return new Float32Array(samples);
75
- }
76
-
77
- /**
78
- * Transcribe an audio file to text.
79
- * @param {string} filePath - Path to audio file (OGG, MP3, WAV, etc.)
80
- * @returns {Promise<string>} Transcribed text
81
- */
82
- async transcribe(filePath) {
83
- // Check ffmpeg availability
84
- try {
85
- await execFileAsync('which', ['ffmpeg']);
86
- } catch {
87
- throw new Error(t('transcriber.ffmpegMissing'));
88
- }
89
-
90
- // Convert to 16kHz WAV
91
- const wavPath = await this._convertToWav(filePath);
92
-
93
- try {
94
- // Parse WAV to Float32Array
95
- const audioData = await this._parseWav(wavPath);
96
-
97
- // Run whisper pipeline
98
- const pipe = await this._getPipeline();
99
- const result = await pipe(audioData, {
100
- language: getLang(),
101
- task: 'transcribe',
102
- });
103
-
104
- return result.text.trim();
105
- } finally {
106
- // Clean up temp WAV
107
- try { fs.unlinkSync(wavPath); } catch {}
108
- }
109
- }
110
- }