@krishivpb60/aether-ai-cli 1.3.8 → 1.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/HIGHLIGHTS.md CHANGED
@@ -1,3 +1,10 @@
1
+ # Aether CLI v1.3.9 Highlights
2
+ - **Microphone Audio Input & Transcription (`/mic`)**:
3
+ - Adds `/mic` voice command to record audio directly from your microphone inside the terminal session.
4
+ - Implements native zero-dependency audio recording on Windows using the WinMM Multimedia Control Interface (MCI) via PowerShell.
5
+ - Automatically transcribes speech using Google Gemini (base64 inlineData), Groq Whisper, or OpenAI Whisper.
6
+ - Populates the active readline prompt buffer directly with the transcribed text so you can review, edit, and send it.
7
+
1
8
  # Aether CLI v1.3.8 Highlights
2
9
  - **OpenCode TUI Welcome & Navigation**:
3
10
  - Implements a stunning, responsive OpenCode-style TUI System State dashboard.
package/README.md CHANGED
@@ -32,6 +32,7 @@
32
32
  - 🤖 **Autopilot Debug Loop** — Automatically correct build/test failures using AI self-correcting feedback loop
33
33
  - 🌿 **Interactive Git TUI** — Beautiful cyberpunk ASCII branch tree commit history & interactive file staging checkbox menu
34
34
  - 📊 **Web HUD Dashboard** — Companion local zero-dependency telemetry dashboard displaying real-time latencies & provider status
35
+ - 🎤 **Voice Microphone Input** — Record voice input directly from your terminal and transcribe it to text using Google Gemini or Whisper
35
36
  - 🔄 **Failover Mesh** — Automatic failback across all configured providers
36
37
  - 🔢 **Local Math Solver** — Evaluates mathematical expressions without an API call
37
38
  - 🤖 **Krylo Companion** — Offline cyberpunk companion bot when no API keys are configured
@@ -197,6 +198,7 @@ Inside interactive chat mode, use these slash commands:
197
198
  | `/autopilot <mode\|debug [cmd]>` | View/switch autopilot safety level or run autonomous debug loop |
198
199
  | `/git` | Launch interactive cyberpunk Git TUI and file stager checkbox menu |
199
200
  | `/dashboard` | Spawn zero-dependency local web server and launch telemetry dashboard HUD |
201
+ | `/mic` | Record audio voice input from microphone and transcribe to text |
200
202
  | `/tokens` | View detailed session token usage and exchanges telemetry |
201
203
  | `/update` | Force check for updates and update Aether CLI manually |
202
204
  | `/review` | Run git diff and stream an AI code review |
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@krishivpb60/aether-ai-cli",
3
- "version": "1.3.8",
3
+ "version": "1.3.9",
4
4
  "description": "Aether Core AI — A cyberpunk command-line AI assistant with multi-mode reasoning, 12-node failover mesh, file context injection, and offline fallbacks.",
5
5
  "main": "src/cli.js",
6
6
  "bin": {
package/src/chat.js CHANGED
@@ -139,7 +139,7 @@ export async function startChat(options = {}) {
139
139
  "/theme", "/themes", "/history-clear", "/game", "/abort", "/cmd", "/write",
140
140
  "/commit", "/run", "/history", "/autopilot", "/tokens", "/update",
141
141
  "/review", "/diagnose", "/explain", "/refactor", "/bug", "/doc", "/translate",
142
- "/search", "/git", "/dashboard", "/cd"
142
+ "/search", "/git", "/dashboard", "/cd", "/mic"
143
143
  ];
144
144
  const customCmds = aiConfig.CUSTOM_COMMANDS || {};
145
145
  const commands = [...builtIn, ...Object.keys(customCmds)];
@@ -432,7 +432,7 @@ export async function startChat(options = {}) {
432
432
  "/theme", "/themes", "/history-clear", "/game", "/abort", "/cmd",
433
433
  "/guess", "/write", "/commit", "/run", "/history", "/autopilot", "/tokens",
434
434
  "/update", "/review", "/diagnose", "/explain", "/refactor", "/bug", "/doc",
435
- "/translate", "/search", "/git", "/dashboard", "/cd"
435
+ "/translate", "/search", "/git", "/dashboard", "/cd", "/mic"
436
436
  ];
437
437
 
438
438
  const customCmds = aiConfig.CUSTOM_COMMANDS || {};
@@ -617,6 +617,10 @@ async function handleCommand(input, ctx) {
617
617
  await handleDashboardCommand(ctx);
618
618
  break;
619
619
 
620
+ case "/mic":
621
+ await handleMicInput(ctx);
622
+ break;
623
+
620
624
  case "/tokens":
621
625
  await handleTokensDisplay(ctx);
622
626
  break;
@@ -655,6 +659,7 @@ function showHelp(aiConfig) {
655
659
  console.log(keyValue("/autopilot <mode|debug [cmd]>", "View/switch autopilot level (off, safe, workspace, machine) or run autonomous debug loop"));
656
660
  console.log(keyValue("/git", "Launch interactive Git branch tree, history, and file staging TUI"));
657
661
  console.log(keyValue("/dashboard", "Spawn web-based local cyberpunk telemetry dashboard companion"));
662
+ console.log(keyValue("/mic", "Record audio voice input from microphone and transcribe to text"));
658
663
  console.log(keyValue("/tokens", "View detailed session token usage and exchanges telemetry"));
659
664
  console.log(keyValue("/update", "Force check for updates and update Aether CLI manually"));
660
665
  console.log(keyValue("/game", "Start the local mainframe hacking mini-game"));
@@ -2226,3 +2231,81 @@ export async function handleDashboardCommand(ctx) {
2226
2231
  }
2227
2232
  }
2228
2233
 
2234
+ /**
2235
+ * Handles recording audio voice from microphone and transcribing to text input.
2236
+ */
2237
+ export async function handleMicInput(ctx) {
2238
+ const { startRecording, transcribeAudioFile } = await import("./mic.js");
2239
+ const { join } = await import("node:path");
2240
+ const { tmpdir } = await import("node:os");
2241
+ const fs = await import("node:fs");
2242
+
2243
+ const apiKeyExists = ctx.aiConfig.GOOGLE_API_KEY || ctx.aiConfig.GROQ_API_KEY || ctx.aiConfig.OPENAI_API_KEY;
2244
+ if (!apiKeyExists) {
2245
+ console.log("\n" + label.error + " " + colors.danger("No API keys found for speech-to-text. Please configure GOOGLE_API_KEY, GROQ_API_KEY, or OPENAI_API_KEY.\n"));
2246
+ return;
2247
+ }
2248
+
2249
+ const wavPath = join(tmpdir(), `aether_mic_${Date.now()}.wav`);
2250
+ let handle;
2251
+
2252
+ try {
2253
+ handle = await startRecording(wavPath);
2254
+ } catch (err) {
2255
+ console.log("\n" + label.error + " " + colors.danger(`Failed to start recording: ${err.message}\n`));
2256
+ return;
2257
+ }
2258
+
2259
+ console.log("\n" + label.system + " " + colors.brand("🎤 AUDIO VOICE INPUT"));
2260
+ console.log(separator("─"));
2261
+ console.log(colors.accent(" Recording started..."));
2262
+ console.log(" " + colors.muted("Speak into your microphone."));
2263
+ console.log(" " + colors.brand("Press [Enter] to STOP and transcribe..."));
2264
+ console.log(separator("─"));
2265
+
2266
+ ctx.rl.pause();
2267
+
2268
+ await new Promise((resolve) => {
2269
+ function onData(chunk) {
2270
+ if (chunk === "\r" || chunk === "\n" || chunk === "\r\n") {
2271
+ process.stdin.removeListener("data", onData);
2272
+ resolve();
2273
+ }
2274
+ }
2275
+ process.stdin.on("data", onData);
2276
+ });
2277
+
2278
+ ctx.rl.resume();
2279
+
2280
+ console.log("");
2281
+ const spinner = createSpinner("transcribe");
2282
+ spinner.start("Stopping recording and transcribing...");
2283
+
2284
+ try {
2285
+ await handle.stop();
2286
+ const text = await transcribeAudioFile(wavPath, ctx.aiConfig);
2287
+ spinner.stop();
2288
+
2289
+ if (fs.existsSync(wavPath)) {
2290
+ try { fs.unlinkSync(wavPath); } catch (e) {}
2291
+ }
2292
+
2293
+ if (!text.trim()) {
2294
+ console.log("\n" + label.system + " " + colors.warning("No speech detected or transcription was empty.\n"));
2295
+ return;
2296
+ }
2297
+
2298
+ console.log("\n" + label.system + " " + colors.success("✓ Transcribed text:"));
2299
+ console.log(" " + colors.text(`"${text}"`));
2300
+ console.log("");
2301
+
2302
+ ctx.rl.write(text);
2303
+ } catch (err) {
2304
+ spinner.stop();
2305
+ if (fs.existsSync(wavPath)) {
2306
+ try { fs.unlinkSync(wavPath); } catch (e) {}
2307
+ }
2308
+ console.log("\n" + label.error + " " + colors.danger(`Transcription failed: ${err.message}\n`));
2309
+ }
2310
+ }
2311
+
package/src/mic.js ADDED
@@ -0,0 +1,220 @@
1
+ // ═══════════════════════════════════════════════════════════
2
+ // AETHER AI CLI — Voice Input / Microphone Engine
3
+ // ═══════════════════════════════════════════════════════════
4
+
5
+ import { spawn } from "node:child_process";
6
+ import { platform } from "node:os";
7
+ import fs from "node:fs";
8
+
9
+ /**
10
+ * Starts audio recording from the microphone and returns a handle to stop it.
11
+ * @param {string} wavPath - Path where the .wav file will be saved
12
+ * @returns {Promise<{ stop: () => Promise<void> }>}
13
+ */
14
+ export async function startRecording(wavPath) {
15
+ if (fs.existsSync(wavPath)) {
16
+ try {
17
+ fs.unlinkSync(wavPath);
18
+ } catch (e) {
19
+ // Ignore
20
+ }
21
+ }
22
+
23
+ const isWin = platform() === "win32";
24
+
25
+ if (isWin) {
26
+ // Windows: Use native WinMM MCI API via a background PowerShell process
27
+ const ps = spawn("powershell", ["-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", "-"], {
28
+ stdio: ["pipe", "pipe", "ignore"]
29
+ });
30
+
31
+ ps.stdin.write(`Add-Type -MemberDefinition '[DllImport("winmm.dll", CharSet = CharSet.Ansi)] public static extern int mciSendString(string cmd, System.Text.StringBuilder ret, int len, IntPtr cb);' -Name WinMM -Namespace Win32\r\n`);
32
+ ps.stdin.write(`[Win32.WinMM]::mciSendString("open new Type waveaudio Alias myRecorder", $null, 0, [IntPtr]::Zero)\r\n`);
33
+ ps.stdin.write(`[Win32.WinMM]::mciSendString("record myRecorder", $null, 0, [IntPtr]::Zero)\r\n`);
34
+
35
+ return {
36
+ stop: () => {
37
+ return new Promise((resolve) => {
38
+ ps.on("close", () => {
39
+ resolve();
40
+ });
41
+ const escapedPath = wavPath.replace(/\\/g, "\\\\");
42
+ ps.stdin.write(`[Win32.WinMM]::mciSendString('save myRecorder "${escapedPath}"', $null, 0, [IntPtr]::Zero)\r\n`);
43
+ ps.stdin.write(`[Win32.WinMM]::mciSendString("close myRecorder", $null, 0, [IntPtr]::Zero)\r\n`);
44
+ ps.stdin.write("exit\r\n");
45
+ ps.stdin.end();
46
+ });
47
+ }
48
+ };
49
+ } else {
50
+ // macOS / Linux: Try spawning standard command-line recording tools
51
+ let cmd = "";
52
+ let args = [];
53
+
54
+ // Check if sox/rec is available (highest quality/reliability)
55
+ if (await commandExists("rec")) {
56
+ cmd = "rec";
57
+ args = ["-q", wavPath];
58
+ } else if (await commandExists("arecord")) {
59
+ cmd = "arecord";
60
+ args = ["-f", "cd", "-t", "wav", wavPath];
61
+ } else if (await commandExists("ffmpeg")) {
62
+ cmd = "ffmpeg";
63
+ const isMac = platform() === "darwin";
64
+ args = isMac
65
+ ? ["-y", "-f", "avfoundation", "-i", ":0", wavPath]
66
+ : ["-y", "-f", "alsa", "-i", "default", wavPath];
67
+ } else {
68
+ throw new Error("No recording utility found. On Windows, recording is native. On macOS/Linux, please install 'sox', 'arecord', or 'ffmpeg'.");
69
+ }
70
+
71
+ const proc = spawn(cmd, args, { stdio: "ignore" });
72
+
73
+ return {
74
+ stop: () => {
75
+ return new Promise((resolve) => {
76
+ proc.on("close", () => {
77
+ resolve();
78
+ });
79
+ proc.kill("SIGTERM");
80
+ });
81
+ }
82
+ };
83
+ }
84
+ }
85
+
86
+ /**
87
+ * Helper to check if a command exists in the environment PATH.
88
+ */
89
+ function commandExists(name) {
90
+ return new Promise((resolve) => {
91
+ const isWin = platform() === "win32";
92
+ const checkCmd = isWin ? "where" : "which";
93
+ const check = spawn(checkCmd, [name], { stdio: "ignore" });
94
+ check.on("close", (code) => {
95
+ resolve(code === 0);
96
+ });
97
+ });
98
+ }
99
+
100
+ /**
101
+ * Transcribes a local audio WAV file using the configured AI providers.
102
+ * Priority: Google Gemini -> Groq Whisper -> OpenAI Whisper.
103
+ * @param {string} wavPath - Path to the WAV file
104
+ * @param {object} aiConfig - Active AI configuration
105
+ * @returns {Promise<string>} Transcribed text
106
+ */
107
+ export async function transcribeAudioFile(wavPath, aiConfig) {
108
+ if (!fs.existsSync(wavPath)) {
109
+ throw new Error(`Audio file not found: ${wavPath}`);
110
+ }
111
+
112
+ const fileBuffer = fs.readFileSync(wavPath);
113
+
114
+ // 1. Google Gemini Transcription
115
+ if (aiConfig.GOOGLE_API_KEY) {
116
+ const base64Audio = fileBuffer.toString("base64");
117
+ const model = "gemini-2.5-flash";
118
+ const url = `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent?key=${aiConfig.GOOGLE_API_KEY}`;
119
+
120
+ const body = {
121
+ contents: [
122
+ {
123
+ role: "user",
124
+ parts: [
125
+ {
126
+ inlineData: {
127
+ mimeType: "audio/wav",
128
+ data: base64Audio
129
+ }
130
+ },
131
+ {
132
+ text: "Transcribe this audio file exactly as spoken. Output ONLY the plain transcription text, with no extra formatting, conversational filler, timestamps, or commentary. If there is no speech, output an empty string."
133
+ }
134
+ ]
135
+ }
136
+ ]
137
+ };
138
+
139
+ const response = await fetch(url, {
140
+ method: "POST",
141
+ headers: { "Content-Type": "application/json" },
142
+ body: JSON.stringify(body)
143
+ });
144
+
145
+ if (!response.ok) {
146
+ const errorText = await response.text();
147
+ throw new Error(`Gemini transcription error: ${response.statusText}. ${errorText}`);
148
+ }
149
+
150
+ const data = await response.json();
151
+ const candidate = data.candidates?.[0];
152
+ if (!candidate) {
153
+ return "";
154
+ }
155
+
156
+ const text = candidate.content?.parts
157
+ ?.map((p) => p.text)
158
+ .filter(Boolean)
159
+ .join("") || "";
160
+
161
+ // Clean up timestamps if returned (e.g. 00:00:23)
162
+ const cleaned = text.trim();
163
+ if (/^\d{2}:\d{2}:\d{2}$/.test(cleaned)) {
164
+ return "";
165
+ }
166
+ return cleaned;
167
+ }
168
+
169
+ // 2. Groq Whisper / OpenAI Whisper
170
+ let apiKey = aiConfig.GROQ_API_KEY;
171
+ let url = "https://api.groq.com/openai/v1/audio/transcriptions";
172
+ let modelName = "whisper-large-v3";
173
+
174
+ if (!apiKey) {
175
+ apiKey = aiConfig.OPENAI_API_KEY;
176
+ url = "https://api.openai.com/v1/audio/transcriptions";
177
+ modelName = "whisper-1";
178
+ }
179
+
180
+ if (!apiKey) {
181
+ throw new Error("No API key configured for speech-to-text. Please configure GOOGLE_API_KEY, GROQ_API_KEY, or OPENAI_API_KEY.");
182
+ }
183
+
184
+ const boundary = "----WebKitFormBoundary" + Math.random().toString(36).substring(2);
185
+
186
+ const header =
187
+ `--${boundary}\r\n` +
188
+ `Content-Disposition: form-data; name="file"; filename="audio.wav"\r\n` +
189
+ `Content-Type: audio/wav\r\n\r\n`;
190
+
191
+ const modelPart =
192
+ `\r\n--${boundary}\r\n` +
193
+ `Content-Disposition: form-data; name="model"\r\n\r\n${modelName}\r\n`;
194
+
195
+ const footer = `--${boundary}--\r\n`;
196
+
197
+ const body = Buffer.concat([
198
+ Buffer.from(header, 'utf-8'),
199
+ fileBuffer,
200
+ Buffer.from(modelPart, 'utf-8'),
201
+ Buffer.from(footer, 'utf-8')
202
+ ]);
203
+
204
+ const response = await fetch(url, {
205
+ method: "POST",
206
+ headers: {
207
+ "Authorization": `Bearer ${apiKey}`,
208
+ "Content-Type": `multipart/form-data; boundary=${boundary}`
209
+ },
210
+ body: body
211
+ });
212
+
213
+ if (!response.ok) {
214
+ const errorText = await response.text();
215
+ throw new Error(`Whisper transcription error: ${response.statusText}. ${errorText}`);
216
+ }
217
+
218
+ const data = await response.json();
219
+ return (data.text || "").trim();
220
+ }
@@ -76,7 +76,7 @@
76
76
  }
77
77
 
78
78
  .hud-frame::after {
79
- content: "AETHER CLI V1.3.8";
79
+ content: "AETHER CLI V1.3.9";
80
80
  position: absolute;
81
81
  bottom: -12px;
82
82
  right: 20px;