npm - ai-terminal-remote-bot - Versions diffs - 1.0.2 → 1.0.4 - Mend

ai-terminal-remote-bot 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json +2 -4
package/src/i18n/locales/en.json +4 -1
package/src/i18n/locales/es.json +4 -1
package/src/server.mjs +6 -12
package/src/modules/transcriber.mjs +0 -110

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ai-terminal-remote-bot",
-  "version": "1.0.2",
+  "version": "1.0.4",
   "description": "Secure remote terminal bot with Claude Code integration",
   "type": "module",
   "bin": {
@@ -24,8 +24,6 @@
     "node": ">=18.0.0"
   },
   "dependencies": {
-    "@huggingface/transformers": "^3.4.1",
-    "telegraf": "^4.16.3",
-    "wavefile": "^11.0.0"
+    "telegraf": "^4.16.3"
   }
 }

package/src/i18n/locales/en.json CHANGED Viewed

@@ -52,6 +52,8 @@
     "imageSaved": "🖼️ Image saved: `{{dest}}`",
     "claudeImageWithCaption": "Look at this image I saved at {{dest}} and: {{caption}}",
     "claudeImageAnalyze": "Look at the image I saved at {{dest}} and describe it. If it's a code or error screenshot, analyze it.",
+    "claudeAudioTranscribe": "Transcribe the audio file at {{dest}}. Return only the transcription text.",
+    "claudeAudioWithCaption": "Transcribe the audio file at {{dest}} and then respond to: {{caption}}",
     "audioSaved": "🎤 Audio saved: `{{dest}}`\n\nTranscribing...",
     "transcription": "🎤 *Transcription:*\n\n{{text}}",
     "transcriptionFailed": "⚠️ Could not transcribe: {{error}}\nAudio saved as file.",
@@ -109,6 +111,7 @@
     "truncated": "... (truncated)"
   },
   "transcriber": {
-    "ffmpegMissing": "ffmpeg is not installed. Install it with: sudo apt install ffmpeg"
+    "ffmpegMissing": "ffmpeg is not installed. Install it with: sudo apt install ffmpeg",
+    "unsupportedFormat": "Unsupported audio format: .{{ext}} (supported: ogg, opus, wav)"
   }
 }

package/src/i18n/locales/es.json CHANGED Viewed

@@ -52,6 +52,8 @@
     "imageSaved": "🖼️ Imagen guardada: `{{dest}}`",
     "claudeImageWithCaption": "Mirá esta imagen que guardé en {{dest}} y: {{caption}}",
     "claudeImageAnalyze": "Mirá la imagen que guardé en {{dest}} y describila. Si es un screenshot de código o error, analizalo.",
+    "claudeAudioTranscribe": "Transcribí el archivo de audio en {{dest}}. Devolvé solo el texto de la transcripción.",
+    "claudeAudioWithCaption": "Transcribí el archivo de audio en {{dest}} y luego respondé a: {{caption}}",
     "audioSaved": "🎤 Audio guardado: `{{dest}}`\n\nTranscribiendo...",
     "transcription": "🎤 *Transcripción:*\n\n{{text}}",
     "transcriptionFailed": "⚠️ No se pudo transcribir: {{error}}\nAudio guardado como archivo.",
@@ -109,6 +111,7 @@
     "truncated": "... (truncado)"
   },
   "transcriber": {
-    "ffmpegMissing": "ffmpeg no está instalado. Instalalo con: sudo apt install ffmpeg"
+    "ffmpegMissing": "ffmpeg no está instalado. Instalalo con: sudo apt install ffmpeg",
+    "unsupportedFormat": "Formato de audio no soportado: .{{ext}} (soportados: ogg, opus, wav)"
   }
 }

package/src/server.mjs CHANGED Viewed

@@ -10,7 +10,7 @@ import { TelegramPlatform } from './platforms/telegram.mjs';
 import { TerminalModule } from './modules/terminal.mjs';
 import { ClaudeModule } from './modules/claude.mjs';
 import { MediaModule } from './modules/media.mjs';
-import { TranscriberModule } from './modules/transcriber.mjs';
 import { validateCommand, validatePath, validatePid, createRateLimiter } from './security.mjs';
 import { chunkText } from './utils.mjs';
 import { t } from './i18n/index.mjs';
@@ -45,7 +45,7 @@ export async function startServer(config) {
   });
   const claude = new ClaudeModule({ timeout: config.claudeTimeout });
   const media = new MediaModule({ allowedPaths: config.allowedPaths });
-  const transcriber = new TranscriberModule();
   const rateLimiter = createRateLimiter();
   // === Session Management ===
@@ -444,16 +444,10 @@ export async function startServer(config) {
       await mctx.reply(t('server.audioSaved', { dest }));
-      try {
-        const transcript = await transcriber.transcribe(dest);
-        await platform.sendMessage(mctx.chatId, t('server.transcription', { text: transcript }));
-        if (session.mode === 'claude') {
-          await handleClaude(mctx, transcript, session);
-        }
-      } catch (txErr) {
-        await mctx.reply(t('server.transcriptionFailed', { error: txErr.message }));
-      }
+      const prompt = mctx.caption
+        ? t('server.claudeAudioWithCaption', { dest, caption: mctx.caption })
+        : t('server.claudeAudioTranscribe', { dest });
+      await handleClaude(mctx, prompt, session);
     } catch (e) {
       mctx.reply(`❌ ${e.message}`);
     }

package/src/modules/transcriber.mjs DELETED Viewed

@@ -1,110 +0,0 @@
-/**
- * Audio transcription module using @huggingface/transformers (Whisper ONNX).
- * Lazy-loads the pipeline on first use. Model cached in ~/.cache/huggingface/.
- */
-import { execFile } from 'child_process';
-import { promisify } from 'util';
-import fs from 'fs';
-import path from 'path';
-import os from 'os';
-import { t, getLang } from '../i18n/index.mjs';
-const execFileAsync = promisify(execFile);
-export class TranscriberModule {
-  constructor() {
-    this._pipeline = null;
-  }
-  /**
-   * Lazy-load the whisper pipeline (downloads ~40MB model on first call).
-   */
-  async _getPipeline() {
-    if (this._pipeline) return this._pipeline;
-    const { pipeline } = await import('@huggingface/transformers');
-    this._pipeline = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny', {
-      dtype: 'fp32',
-    });
-    return this._pipeline;
-  }
-  /**
-   * Convert audio file to 16kHz mono WAV using ffmpeg.
-   * @param {string} inputPath - Path to input audio file
-   * @returns {Promise<string>} Path to converted WAV file
-   */
-  async _convertToWav(inputPath) {
-    const wavPath = path.join(os.tmpdir(), `transcribe_${Date.now()}.wav`);
-    await execFileAsync('ffmpeg', [
-      '-i', inputPath,
-      '-ar', '16000',
-      '-ac', '1',
-      '-f', 'wav',
-      '-y',
-      wavPath,
-    ], { timeout: 30000 });
-    return wavPath;
-  }
-  /**
-   * Parse WAV file into Float32Array of audio samples.
-   * @param {string} wavPath - Path to 16kHz mono WAV file
-   * @returns {Promise<Float32Array>}
-   */
-  async _parseWav(wavPath) {
-    const { default: WaveFile } = await import('wavefile');
-    const wavBuffer = fs.readFileSync(wavPath);
-    const wav = new WaveFile(wavBuffer);
-    wav.toBitDepth('32f');
-    wav.toSampleRate(16000);
-    const samples = wav.getSamples();
-    // getSamples returns Float64Array for 32f, convert to Float32
-    if (samples instanceof Float64Array || samples instanceof Float32Array) {
-      return new Float32Array(samples);
-    }
-    // Interleaved multi-channel: take first channel
-    if (Array.isArray(samples)) {
-      return new Float32Array(samples[0]);
-    }
-    return new Float32Array(samples);
-  }
-  /**
-   * Transcribe an audio file to text.
-   * @param {string} filePath - Path to audio file (OGG, MP3, WAV, etc.)
-   * @returns {Promise<string>} Transcribed text
-   */
-  async transcribe(filePath) {
-    // Check ffmpeg availability
-    try {
-      await execFileAsync('which', ['ffmpeg']);
-    } catch {
-      throw new Error(t('transcriber.ffmpegMissing'));
-    }
-    // Convert to 16kHz WAV
-    const wavPath = await this._convertToWav(filePath);
-    try {
-      // Parse WAV to Float32Array
-      const audioData = await this._parseWav(wavPath);
-      // Run whisper pipeline
-      const pipe = await this._getPipeline();
-      const result = await pipe(audioData, {
-        language: getLang(),
-        task: 'transcribe',
-      });
-      return result.text.trim();
-    } finally {
-      // Clean up temp WAV
-      try { fs.unlinkSync(wavPath); } catch {}
-    }
-  }
-}