npm - @krishivpb60/aether-ai-cli - Versions diffs - 1.3.8 → 1.3.10 - Mend

@krishivpb60/aether-ai-cli 1.3.8 → 1.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/HIGHLIGHTS.md CHANGED Viewed

@@ -1,3 +1,18 @@
+# Aether CLI v1.3.10 Highlights
+- **Microphone Audio Input Fixes & Transcription (`/mic`)**:
+  - Adds `/mic` voice command to record audio directly from your microphone inside the terminal session.
+  - Implements native zero-dependency audio recording on Windows using the WinMM Multimedia Control Interface (MCI) via PowerShell.
+  - Automatically transcribes speech using Google Gemini (base64 inlineData), Groq Whisper, or OpenAI Whisper.
+  - Fixes readline interface raw mode pausing blockages to ensure Enter keypress resolves transcription correctly.
+  - Populates the active readline prompt buffer directly with the transcribed text so you can review, edit, and send it.
+# Aether CLI v1.3.9 Highlights
+- **Microphone Audio Input & Transcription (`/mic`)**:
+  - Adds `/mic` voice command to record audio directly from your microphone inside the terminal session.
+  - Implements native zero-dependency audio recording on Windows using the WinMM Multimedia Control Interface (MCI) via PowerShell.
+  - Automatically transcribes speech using Google Gemini (base64 inlineData), Groq Whisper, or OpenAI Whisper.
+  - Populates the active readline prompt buffer directly with the transcribed text so you can review, edit, and send it.
 # Aether CLI v1.3.8 Highlights
 - **OpenCode TUI Welcome & Navigation**:
   - Implements a stunning, responsive OpenCode-style TUI System State dashboard.

package/README.md CHANGED Viewed

@@ -32,6 +32,7 @@
 - 🤖 **Autopilot Debug Loop** — Automatically correct build/test failures using AI self-correcting feedback loop
 - 🌿 **Interactive Git TUI** — Beautiful cyberpunk ASCII branch tree commit history & interactive file staging checkbox menu
 - 📊 **Web HUD Dashboard** — Companion local zero-dependency telemetry dashboard displaying real-time latencies & provider status
+- 🎤 **Voice Microphone Input** — Record voice input directly from your terminal and transcribe it to text using Google Gemini or Whisper
 - 🔄 **Failover Mesh** — Automatic failback across all configured providers
 - 🔢 **Local Math Solver** — Evaluates mathematical expressions without an API call
 - 🤖 **Krylo Companion** — Offline cyberpunk companion bot when no API keys are configured
@@ -197,6 +198,7 @@ Inside interactive chat mode, use these slash commands:
 | `/autopilot <mode\|debug [cmd]>` | View/switch autopilot safety level or run autonomous debug loop |
 | `/git` | Launch interactive cyberpunk Git TUI and file stager checkbox menu |
 | `/dashboard` | Spawn zero-dependency local web server and launch telemetry dashboard HUD |
+| `/mic` | Record audio voice input from microphone and transcribe to text |
 | `/tokens` | View detailed session token usage and exchanges telemetry |
 | `/update` | Force check for updates and update Aether CLI manually |
 | `/review` | Run git diff and stream an AI code review |

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@krishivpb60/aether-ai-cli",
-  "version": "1.3.8",
+  "version": "1.3.10",
   "description": "Aether Core AI — A cyberpunk command-line AI assistant with multi-mode reasoning, 12-node failover mesh, file context injection, and offline fallbacks.",
   "main": "src/cli.js",
   "bin": {

package/src/chat.js CHANGED Viewed

@@ -139,7 +139,7 @@ export async function startChat(options = {}) {
       "/theme", "/themes", "/history-clear", "/game", "/abort", "/cmd", "/write",
       "/commit", "/run", "/history", "/autopilot", "/tokens", "/update",
       "/review", "/diagnose", "/explain", "/refactor", "/bug", "/doc", "/translate",
-      "/search", "/git", "/dashboard", "/cd"
+      "/search", "/git", "/dashboard", "/cd", "/mic"
     ];
     const customCmds = aiConfig.CUSTOM_COMMANDS || {};
     const commands = [...builtIn, ...Object.keys(customCmds)];
@@ -432,7 +432,7 @@ export async function startChat(options = {}) {
         "/theme", "/themes", "/history-clear", "/game", "/abort", "/cmd",
         "/guess", "/write", "/commit", "/run", "/history", "/autopilot", "/tokens",
         "/update", "/review", "/diagnose", "/explain", "/refactor", "/bug", "/doc",
-        "/translate", "/search", "/git", "/dashboard", "/cd"
+        "/translate", "/search", "/git", "/dashboard", "/cd", "/mic"
       ];
       const customCmds = aiConfig.CUSTOM_COMMANDS || {};
@@ -617,6 +617,10 @@ async function handleCommand(input, ctx) {
       await handleDashboardCommand(ctx);
       break;
+    case "/mic":
+      await handleMicInput(ctx);
+      break;
     case "/tokens":
       await handleTokensDisplay(ctx);
       break;
@@ -655,6 +659,7 @@ function showHelp(aiConfig) {
   console.log(keyValue("/autopilot <mode|debug [cmd]>", "View/switch autopilot level (off, safe, workspace, machine) or run autonomous debug loop"));
   console.log(keyValue("/git", "Launch interactive Git branch tree, history, and file staging TUI"));
   console.log(keyValue("/dashboard", "Spawn web-based local cyberpunk telemetry dashboard companion"));
+  console.log(keyValue("/mic", "Record audio voice input from microphone and transcribe to text"));
   console.log(keyValue("/tokens", "View detailed session token usage and exchanges telemetry"));
   console.log(keyValue("/update", "Force check for updates and update Aether CLI manually"));
   console.log(keyValue("/game", "Start the local mainframe hacking mini-game"));
@@ -2226,3 +2231,104 @@ export async function handleDashboardCommand(ctx) {
   }
 }
+/**
+ * Handles recording audio voice from microphone and transcribing to text input.
+ */
+export async function handleMicInput(ctx) {
+  const { startRecording, transcribeAudioFile } = await import("./mic.js");
+  const { join } = await import("node:path");
+  const { tmpdir } = await import("node:os");
+  const fs = await import("node:fs");
+  const apiKeyExists = ctx.aiConfig.GOOGLE_API_KEY || ctx.aiConfig.GROQ_API_KEY || ctx.aiConfig.OPENAI_API_KEY;
+  if (!apiKeyExists) {
+    console.log("\n" + label.error + " " + colors.danger("No API keys found for speech-to-text. Please configure GOOGLE_API_KEY, GROQ_API_KEY, or OPENAI_API_KEY.\n"));
+    return;
+  }
+  const wavPath = join(tmpdir(), `aether_mic_${Date.now()}.wav`);
+  let handle;
+  try {
+    handle = await startRecording(wavPath);
+  } catch (err) {
+    console.log("\n" + label.error + " " + colors.danger(`Failed to start recording: ${err.message}\n`));
+    return;
+  }
+  console.log("\n" + label.system + " " + colors.brand("🎤 AUDIO VOICE INPUT"));
+  console.log(separator("─"));
+  console.log(colors.accent("  Recording started..."));
+  console.log("  " + colors.muted("Speak into your microphone."));
+  console.log("  " + colors.brand("Press [Enter] to STOP and transcribe..."));
+  console.log(separator("─"));
+  ctx.rl.pause();
+  const stdin = process.stdin;
+  const wasRaw = stdin.isRaw;
+  stdin.setRawMode(true);
+  stdin.resume();
+  stdin.setEncoding("utf8");
+  let aborted = false;
+  await new Promise((resolve) => {
+    function onData(chunk) {
+      if (chunk === "\u0003") {
+        aborted = true;
+        stdin.removeListener("data", onData);
+        resolve();
+        return;
+      }
+      if (chunk === "\r" || chunk === "\n" || chunk === "\r\n") {
+        stdin.removeListener("data", onData);
+        resolve();
+      }
+    }
+    stdin.on("data", onData);
+  });
+  stdin.setRawMode(wasRaw);
+  ctx.rl.resume();
+  if (aborted) {
+    console.log("\n" + label.system + " " + colors.warning("Recording aborted by user.\n"));
+    try {
+      await handle.stop();
+      if (fs.existsSync(wavPath)) { fs.unlinkSync(wavPath); }
+    } catch (e) {}
+    return;
+  }
+  console.log("");
+  const spinner = createSpinner("transcribe");
+  spinner.start("Stopping recording and transcribing...");
+  try {
+    await handle.stop();
+    const text = await transcribeAudioFile(wavPath, ctx.aiConfig);
+    spinner.stop();
+    if (fs.existsSync(wavPath)) {
+      try { fs.unlinkSync(wavPath); } catch (e) {}
+    }
+    if (!text.trim()) {
+      console.log("\n" + label.system + " " + colors.warning("No speech detected or transcription was empty.\n"));
+      return;
+    }
+    console.log("\n" + label.system + " " + colors.success("✓ Transcribed text:"));
+    console.log("  " + colors.text(`"${text}"`));
+    console.log("");
+    ctx.rl.write(text);
+  } catch (err) {
+    spinner.stop();
+    if (fs.existsSync(wavPath)) {
+      try { fs.unlinkSync(wavPath); } catch (e) {}
+    }
+    console.log("\n" + label.error + " " + colors.danger(`Transcription failed: ${err.message}\n`));
+  }
+}

package/src/mic.js ADDED Viewed

@@ -0,0 +1,220 @@
+// ═══════════════════════════════════════════════════════════
+// AETHER AI CLI — Voice Input / Microphone Engine
+// ═══════════════════════════════════════════════════════════
+import { spawn } from "node:child_process";
+import { platform } from "node:os";
+import fs from "node:fs";
+/**
+ * Starts audio recording from the microphone and returns a handle to stop it.
+ * @param {string} wavPath - Path where the .wav file will be saved
+ * @returns {Promise<{ stop: () => Promise<void> }>}
+ */
+export async function startRecording(wavPath) {
+  if (fs.existsSync(wavPath)) {
+    try {
+      fs.unlinkSync(wavPath);
+    } catch (e) {
+      // Ignore
+    }
+  }
+  const isWin = platform() === "win32";
+  if (isWin) {
+    // Windows: Use native WinMM MCI API via a background PowerShell process
+    const ps = spawn("powershell", ["-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", "-"], {
+      stdio: ["pipe", "pipe", "ignore"]
+    });
+    ps.stdin.write(`Add-Type -MemberDefinition '[DllImport("winmm.dll", CharSet = CharSet.Ansi)] public static extern int mciSendString(string cmd, System.Text.StringBuilder ret, int len, IntPtr cb);' -Name WinMM -Namespace Win32\r\n`);
+    ps.stdin.write(`[Win32.WinMM]::mciSendString("open new Type waveaudio Alias myRecorder", $null, 0, [IntPtr]::Zero)\r\n`);
+    ps.stdin.write(`[Win32.WinMM]::mciSendString("record myRecorder", $null, 0, [IntPtr]::Zero)\r\n`);
+    return {
+      stop: () => {
+        return new Promise((resolve) => {
+          ps.on("close", () => {
+            resolve();
+          });
+          const escapedPath = wavPath.replace(/\\/g, "\\\\");
+          ps.stdin.write(`[Win32.WinMM]::mciSendString('save myRecorder "${escapedPath}"', $null, 0, [IntPtr]::Zero)\r\n`);
+          ps.stdin.write(`[Win32.WinMM]::mciSendString("close myRecorder", $null, 0, [IntPtr]::Zero)\r\n`);
+          ps.stdin.write("exit\r\n");
+          ps.stdin.end();
+        });
+      }
+    };
+  } else {
+    // macOS / Linux: Try spawning standard command-line recording tools
+    let cmd = "";
+    let args = [];
+    // Check if sox/rec is available (highest quality/reliability)
+    if (await commandExists("rec")) {
+      cmd = "rec";
+      args = ["-q", wavPath];
+    } else if (await commandExists("arecord")) {
+      cmd = "arecord";
+      args = ["-f", "cd", "-t", "wav", wavPath];
+    } else if (await commandExists("ffmpeg")) {
+      cmd = "ffmpeg";
+      const isMac = platform() === "darwin";
+      args = isMac
+        ? ["-y", "-f", "avfoundation", "-i", ":0", wavPath]
+        : ["-y", "-f", "alsa", "-i", "default", wavPath];
+    } else {
+      throw new Error("No recording utility found. On Windows, recording is native. On macOS/Linux, please install 'sox', 'arecord', or 'ffmpeg'.");
+    }
+    const proc = spawn(cmd, args, { stdio: "ignore" });
+    return {
+      stop: () => {
+        return new Promise((resolve) => {
+          proc.on("close", () => {
+            resolve();
+          });
+          proc.kill("SIGTERM");
+        });
+      }
+    };
+  }
+}
+/**
+ * Helper to check if a command exists in the environment PATH.
+ */
+function commandExists(name) {
+  return new Promise((resolve) => {
+    const isWin = platform() === "win32";
+    const checkCmd = isWin ? "where" : "which";
+    const check = spawn(checkCmd, [name], { stdio: "ignore" });
+    check.on("close", (code) => {
+      resolve(code === 0);
+    });
+  });
+}
+/**
+ * Transcribes a local audio WAV file using the configured AI providers.
+ * Priority: Google Gemini -> Groq Whisper -> OpenAI Whisper.
+ * @param {string} wavPath - Path to the WAV file
+ * @param {object} aiConfig - Active AI configuration
+ * @returns {Promise<string>} Transcribed text
+ */
+export async function transcribeAudioFile(wavPath, aiConfig) {
+  if (!fs.existsSync(wavPath)) {
+    throw new Error(`Audio file not found: ${wavPath}`);
+  }
+  const fileBuffer = fs.readFileSync(wavPath);
+  // 1. Google Gemini Transcription
+  if (aiConfig.GOOGLE_API_KEY) {
+    const base64Audio = fileBuffer.toString("base64");
+    const model = "gemini-2.5-flash";
+    const url = `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent?key=${aiConfig.GOOGLE_API_KEY}`;
+    const body = {
+      contents: [
+        {
+          role: "user",
+          parts: [
+            {
+              inlineData: {
+                mimeType: "audio/wav",
+                data: base64Audio
+              }
+            },
+            {
+              text: "Transcribe this audio file exactly as spoken. Output ONLY the plain transcription text, with no extra formatting, conversational filler, timestamps, or commentary. If there is no speech, output an empty string."
+            }
+          ]
+        }
+      ]
+    };
+    const response = await fetch(url, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify(body)
+    });
+    if (!response.ok) {
+      const errorText = await response.text();
+      throw new Error(`Gemini transcription error: ${response.statusText}. ${errorText}`);
+    }
+    const data = await response.json();
+    const candidate = data.candidates?.[0];
+    if (!candidate) {
+      return "";
+    }
+    const text = candidate.content?.parts
+      ?.map((p) => p.text)
+      .filter(Boolean)
+      .join("") || "";
+    // Clean up timestamps if returned (e.g. 00:00:23)
+    const cleaned = text.trim();
+    if (/^\d{2}:\d{2}:\d{2}$/.test(cleaned)) {
+      return "";
+    }
+    return cleaned;
+  }
+  // 2. Groq Whisper / OpenAI Whisper
+  let apiKey = aiConfig.GROQ_API_KEY;
+  let url = "https://api.groq.com/openai/v1/audio/transcriptions";
+  let modelName = "whisper-large-v3";
+  if (!apiKey) {
+    apiKey = aiConfig.OPENAI_API_KEY;
+    url = "https://api.openai.com/v1/audio/transcriptions";
+    modelName = "whisper-1";
+  }
+  if (!apiKey) {
+    throw new Error("No API key configured for speech-to-text. Please configure GOOGLE_API_KEY, GROQ_API_KEY, or OPENAI_API_KEY.");
+  }
+  const boundary = "----WebKitFormBoundary" + Math.random().toString(36).substring(2);
+  const header =
+    `--${boundary}\r\n` +
+    `Content-Disposition: form-data; name="file"; filename="audio.wav"\r\n` +
+    `Content-Type: audio/wav\r\n\r\n`;
+  const modelPart =
+    `\r\n--${boundary}\r\n` +
+    `Content-Disposition: form-data; name="model"\r\n\r\n${modelName}\r\n`;
+  const footer = `--${boundary}--\r\n`;
+  const body = Buffer.concat([
+    Buffer.from(header, 'utf-8'),
+    fileBuffer,
+    Buffer.from(modelPart, 'utf-8'),
+    Buffer.from(footer, 'utf-8')
+  ]);
+  const response = await fetch(url, {
+    method: "POST",
+    headers: {
+      "Authorization": `Bearer ${apiKey}`,
+      "Content-Type": `multipart/form-data; boundary=${boundary}`
+    },
+    body: body
+  });
+  if (!response.ok) {
+    const errorText = await response.text();
+    throw new Error(`Whisper transcription error: ${response.statusText}. ${errorText}`);
+  }
+  const data = await response.json();
+  return (data.text || "").trim();
+}

package/src/ui/dashboard.html CHANGED Viewed

@@ -76,7 +76,7 @@
     }
     .hud-frame::after {
-      content: "AETHER CLI V1.3.8";
+      content: "AETHER CLI V1.3.10";
       position: absolute;
       bottom: -12px;
       right: 20px;