@krishivpb60/aether-ai-cli 1.3.8 → 1.3.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/HIGHLIGHTS.md +15 -0
- package/README.md +2 -0
- package/package.json +1 -1
- package/src/chat.js +108 -2
- package/src/mic.js +220 -0
- package/src/ui/dashboard.html +1 -1
package/HIGHLIGHTS.md
CHANGED
|
@@ -1,3 +1,18 @@
|
|
|
1
|
+
# Aether CLI v1.3.10 Highlights
|
|
2
|
+
- **Microphone Audio Input Fixes & Transcription (`/mic`)**:
|
|
3
|
+
- Adds `/mic` voice command to record audio directly from your microphone inside the terminal session.
|
|
4
|
+
- Implements native zero-dependency audio recording on Windows using the WinMM Multimedia Control Interface (MCI) via PowerShell.
|
|
5
|
+
- Automatically transcribes speech using Google Gemini (base64 inlineData), Groq Whisper, or OpenAI Whisper.
|
|
6
|
+
- Fixes readline interface raw mode pausing blockages to ensure Enter keypress resolves transcription correctly.
|
|
7
|
+
- Populates the active readline prompt buffer directly with the transcribed text so you can review, edit, and send it.
|
|
8
|
+
|
|
9
|
+
# Aether CLI v1.3.9 Highlights
|
|
10
|
+
- **Microphone Audio Input & Transcription (`/mic`)**:
|
|
11
|
+
- Adds `/mic` voice command to record audio directly from your microphone inside the terminal session.
|
|
12
|
+
- Implements native zero-dependency audio recording on Windows using the WinMM Multimedia Control Interface (MCI) via PowerShell.
|
|
13
|
+
- Automatically transcribes speech using Google Gemini (base64 inlineData), Groq Whisper, or OpenAI Whisper.
|
|
14
|
+
- Populates the active readline prompt buffer directly with the transcribed text so you can review, edit, and send it.
|
|
15
|
+
|
|
1
16
|
# Aether CLI v1.3.8 Highlights
|
|
2
17
|
- **OpenCode TUI Welcome & Navigation**:
|
|
3
18
|
- Implements a stunning, responsive OpenCode-style TUI System State dashboard.
|
package/README.md
CHANGED
|
@@ -32,6 +32,7 @@
|
|
|
32
32
|
- 🤖 **Autopilot Debug Loop** — Automatically correct build/test failures using AI self-correcting feedback loop
|
|
33
33
|
- 🌿 **Interactive Git TUI** — Beautiful cyberpunk ASCII branch tree commit history & interactive file staging checkbox menu
|
|
34
34
|
- 📊 **Web HUD Dashboard** — Companion local zero-dependency telemetry dashboard displaying real-time latencies & provider status
|
|
35
|
+
- 🎤 **Voice Microphone Input** — Record voice input directly from your terminal and transcribe it to text using Google Gemini or Whisper
|
|
35
36
|
- 🔄 **Failover Mesh** — Automatic failback across all configured providers
|
|
36
37
|
- 🔢 **Local Math Solver** — Evaluates mathematical expressions without an API call
|
|
37
38
|
- 🤖 **Krylo Companion** — Offline cyberpunk companion bot when no API keys are configured
|
|
@@ -197,6 +198,7 @@ Inside interactive chat mode, use these slash commands:
|
|
|
197
198
|
| `/autopilot <mode\|debug [cmd]>` | View/switch autopilot safety level or run autonomous debug loop |
|
|
198
199
|
| `/git` | Launch interactive cyberpunk Git TUI and file stager checkbox menu |
|
|
199
200
|
| `/dashboard` | Spawn zero-dependency local web server and launch telemetry dashboard HUD |
|
|
201
|
+
| `/mic` | Record audio voice input from microphone and transcribe to text |
|
|
200
202
|
| `/tokens` | View detailed session token usage and exchanges telemetry |
|
|
201
203
|
| `/update` | Force check for updates and update Aether CLI manually |
|
|
202
204
|
| `/review` | Run git diff and stream an AI code review |
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@krishivpb60/aether-ai-cli",
|
|
3
|
-
"version": "1.3.
|
|
3
|
+
"version": "1.3.10",
|
|
4
4
|
"description": "Aether Core AI — A cyberpunk command-line AI assistant with multi-mode reasoning, 12-node failover mesh, file context injection, and offline fallbacks.",
|
|
5
5
|
"main": "src/cli.js",
|
|
6
6
|
"bin": {
|
package/src/chat.js
CHANGED
|
@@ -139,7 +139,7 @@ export async function startChat(options = {}) {
|
|
|
139
139
|
"/theme", "/themes", "/history-clear", "/game", "/abort", "/cmd", "/write",
|
|
140
140
|
"/commit", "/run", "/history", "/autopilot", "/tokens", "/update",
|
|
141
141
|
"/review", "/diagnose", "/explain", "/refactor", "/bug", "/doc", "/translate",
|
|
142
|
-
"/search", "/git", "/dashboard", "/cd"
|
|
142
|
+
"/search", "/git", "/dashboard", "/cd", "/mic"
|
|
143
143
|
];
|
|
144
144
|
const customCmds = aiConfig.CUSTOM_COMMANDS || {};
|
|
145
145
|
const commands = [...builtIn, ...Object.keys(customCmds)];
|
|
@@ -432,7 +432,7 @@ export async function startChat(options = {}) {
|
|
|
432
432
|
"/theme", "/themes", "/history-clear", "/game", "/abort", "/cmd",
|
|
433
433
|
"/guess", "/write", "/commit", "/run", "/history", "/autopilot", "/tokens",
|
|
434
434
|
"/update", "/review", "/diagnose", "/explain", "/refactor", "/bug", "/doc",
|
|
435
|
-
"/translate", "/search", "/git", "/dashboard", "/cd"
|
|
435
|
+
"/translate", "/search", "/git", "/dashboard", "/cd", "/mic"
|
|
436
436
|
];
|
|
437
437
|
|
|
438
438
|
const customCmds = aiConfig.CUSTOM_COMMANDS || {};
|
|
@@ -617,6 +617,10 @@ async function handleCommand(input, ctx) {
|
|
|
617
617
|
await handleDashboardCommand(ctx);
|
|
618
618
|
break;
|
|
619
619
|
|
|
620
|
+
case "/mic":
|
|
621
|
+
await handleMicInput(ctx);
|
|
622
|
+
break;
|
|
623
|
+
|
|
620
624
|
case "/tokens":
|
|
621
625
|
await handleTokensDisplay(ctx);
|
|
622
626
|
break;
|
|
@@ -655,6 +659,7 @@ function showHelp(aiConfig) {
|
|
|
655
659
|
console.log(keyValue("/autopilot <mode|debug [cmd]>", "View/switch autopilot level (off, safe, workspace, machine) or run autonomous debug loop"));
|
|
656
660
|
console.log(keyValue("/git", "Launch interactive Git branch tree, history, and file staging TUI"));
|
|
657
661
|
console.log(keyValue("/dashboard", "Spawn web-based local cyberpunk telemetry dashboard companion"));
|
|
662
|
+
console.log(keyValue("/mic", "Record audio voice input from microphone and transcribe to text"));
|
|
658
663
|
console.log(keyValue("/tokens", "View detailed session token usage and exchanges telemetry"));
|
|
659
664
|
console.log(keyValue("/update", "Force check for updates and update Aether CLI manually"));
|
|
660
665
|
console.log(keyValue("/game", "Start the local mainframe hacking mini-game"));
|
|
@@ -2226,3 +2231,104 @@ export async function handleDashboardCommand(ctx) {
|
|
|
2226
2231
|
}
|
|
2227
2232
|
}
|
|
2228
2233
|
|
|
2234
|
+
/**
|
|
2235
|
+
* Handles recording audio voice from microphone and transcribing to text input.
|
|
2236
|
+
*/
|
|
2237
|
+
export async function handleMicInput(ctx) {
|
|
2238
|
+
const { startRecording, transcribeAudioFile } = await import("./mic.js");
|
|
2239
|
+
const { join } = await import("node:path");
|
|
2240
|
+
const { tmpdir } = await import("node:os");
|
|
2241
|
+
const fs = await import("node:fs");
|
|
2242
|
+
|
|
2243
|
+
const apiKeyExists = ctx.aiConfig.GOOGLE_API_KEY || ctx.aiConfig.GROQ_API_KEY || ctx.aiConfig.OPENAI_API_KEY;
|
|
2244
|
+
if (!apiKeyExists) {
|
|
2245
|
+
console.log("\n" + label.error + " " + colors.danger("No API keys found for speech-to-text. Please configure GOOGLE_API_KEY, GROQ_API_KEY, or OPENAI_API_KEY.\n"));
|
|
2246
|
+
return;
|
|
2247
|
+
}
|
|
2248
|
+
|
|
2249
|
+
const wavPath = join(tmpdir(), `aether_mic_${Date.now()}.wav`);
|
|
2250
|
+
let handle;
|
|
2251
|
+
|
|
2252
|
+
try {
|
|
2253
|
+
handle = await startRecording(wavPath);
|
|
2254
|
+
} catch (err) {
|
|
2255
|
+
console.log("\n" + label.error + " " + colors.danger(`Failed to start recording: ${err.message}\n`));
|
|
2256
|
+
return;
|
|
2257
|
+
}
|
|
2258
|
+
|
|
2259
|
+
console.log("\n" + label.system + " " + colors.brand("🎤 AUDIO VOICE INPUT"));
|
|
2260
|
+
console.log(separator("─"));
|
|
2261
|
+
console.log(colors.accent(" Recording started..."));
|
|
2262
|
+
console.log(" " + colors.muted("Speak into your microphone."));
|
|
2263
|
+
console.log(" " + colors.brand("Press [Enter] to STOP and transcribe..."));
|
|
2264
|
+
console.log(separator("─"));
|
|
2265
|
+
|
|
2266
|
+
ctx.rl.pause();
|
|
2267
|
+
|
|
2268
|
+
const stdin = process.stdin;
|
|
2269
|
+
const wasRaw = stdin.isRaw;
|
|
2270
|
+
stdin.setRawMode(true);
|
|
2271
|
+
stdin.resume();
|
|
2272
|
+
stdin.setEncoding("utf8");
|
|
2273
|
+
|
|
2274
|
+
let aborted = false;
|
|
2275
|
+
await new Promise((resolve) => {
|
|
2276
|
+
function onData(chunk) {
|
|
2277
|
+
if (chunk === "\u0003") {
|
|
2278
|
+
aborted = true;
|
|
2279
|
+
stdin.removeListener("data", onData);
|
|
2280
|
+
resolve();
|
|
2281
|
+
return;
|
|
2282
|
+
}
|
|
2283
|
+
if (chunk === "\r" || chunk === "\n" || chunk === "\r\n") {
|
|
2284
|
+
stdin.removeListener("data", onData);
|
|
2285
|
+
resolve();
|
|
2286
|
+
}
|
|
2287
|
+
}
|
|
2288
|
+
stdin.on("data", onData);
|
|
2289
|
+
});
|
|
2290
|
+
|
|
2291
|
+
stdin.setRawMode(wasRaw);
|
|
2292
|
+
ctx.rl.resume();
|
|
2293
|
+
|
|
2294
|
+
if (aborted) {
|
|
2295
|
+
console.log("\n" + label.system + " " + colors.warning("Recording aborted by user.\n"));
|
|
2296
|
+
try {
|
|
2297
|
+
await handle.stop();
|
|
2298
|
+
if (fs.existsSync(wavPath)) { fs.unlinkSync(wavPath); }
|
|
2299
|
+
} catch (e) {}
|
|
2300
|
+
return;
|
|
2301
|
+
}
|
|
2302
|
+
|
|
2303
|
+
console.log("");
|
|
2304
|
+
const spinner = createSpinner("transcribe");
|
|
2305
|
+
spinner.start("Stopping recording and transcribing...");
|
|
2306
|
+
|
|
2307
|
+
try {
|
|
2308
|
+
await handle.stop();
|
|
2309
|
+
const text = await transcribeAudioFile(wavPath, ctx.aiConfig);
|
|
2310
|
+
spinner.stop();
|
|
2311
|
+
|
|
2312
|
+
if (fs.existsSync(wavPath)) {
|
|
2313
|
+
try { fs.unlinkSync(wavPath); } catch (e) {}
|
|
2314
|
+
}
|
|
2315
|
+
|
|
2316
|
+
if (!text.trim()) {
|
|
2317
|
+
console.log("\n" + label.system + " " + colors.warning("No speech detected or transcription was empty.\n"));
|
|
2318
|
+
return;
|
|
2319
|
+
}
|
|
2320
|
+
|
|
2321
|
+
console.log("\n" + label.system + " " + colors.success("✓ Transcribed text:"));
|
|
2322
|
+
console.log(" " + colors.text(`"${text}"`));
|
|
2323
|
+
console.log("");
|
|
2324
|
+
|
|
2325
|
+
ctx.rl.write(text);
|
|
2326
|
+
} catch (err) {
|
|
2327
|
+
spinner.stop();
|
|
2328
|
+
if (fs.existsSync(wavPath)) {
|
|
2329
|
+
try { fs.unlinkSync(wavPath); } catch (e) {}
|
|
2330
|
+
}
|
|
2331
|
+
console.log("\n" + label.error + " " + colors.danger(`Transcription failed: ${err.message}\n`));
|
|
2332
|
+
}
|
|
2333
|
+
}
|
|
2334
|
+
|
package/src/mic.js
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
// ═══════════════════════════════════════════════════════════
|
|
2
|
+
// AETHER AI CLI — Voice Input / Microphone Engine
|
|
3
|
+
// ═══════════════════════════════════════════════════════════
|
|
4
|
+
|
|
5
|
+
import { spawn } from "node:child_process";
|
|
6
|
+
import { platform } from "node:os";
|
|
7
|
+
import fs from "node:fs";
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Starts audio recording from the microphone and returns a handle to stop it.
|
|
11
|
+
* @param {string} wavPath - Path where the .wav file will be saved
|
|
12
|
+
* @returns {Promise<{ stop: () => Promise<void> }>}
|
|
13
|
+
*/
|
|
14
|
+
export async function startRecording(wavPath) {
|
|
15
|
+
if (fs.existsSync(wavPath)) {
|
|
16
|
+
try {
|
|
17
|
+
fs.unlinkSync(wavPath);
|
|
18
|
+
} catch (e) {
|
|
19
|
+
// Ignore
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const isWin = platform() === "win32";
|
|
24
|
+
|
|
25
|
+
if (isWin) {
|
|
26
|
+
// Windows: Use native WinMM MCI API via a background PowerShell process
|
|
27
|
+
const ps = spawn("powershell", ["-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", "-"], {
|
|
28
|
+
stdio: ["pipe", "pipe", "ignore"]
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
ps.stdin.write(`Add-Type -MemberDefinition '[DllImport("winmm.dll", CharSet = CharSet.Ansi)] public static extern int mciSendString(string cmd, System.Text.StringBuilder ret, int len, IntPtr cb);' -Name WinMM -Namespace Win32\r\n`);
|
|
32
|
+
ps.stdin.write(`[Win32.WinMM]::mciSendString("open new Type waveaudio Alias myRecorder", $null, 0, [IntPtr]::Zero)\r\n`);
|
|
33
|
+
ps.stdin.write(`[Win32.WinMM]::mciSendString("record myRecorder", $null, 0, [IntPtr]::Zero)\r\n`);
|
|
34
|
+
|
|
35
|
+
return {
|
|
36
|
+
stop: () => {
|
|
37
|
+
return new Promise((resolve) => {
|
|
38
|
+
ps.on("close", () => {
|
|
39
|
+
resolve();
|
|
40
|
+
});
|
|
41
|
+
const escapedPath = wavPath.replace(/\\/g, "\\\\");
|
|
42
|
+
ps.stdin.write(`[Win32.WinMM]::mciSendString('save myRecorder "${escapedPath}"', $null, 0, [IntPtr]::Zero)\r\n`);
|
|
43
|
+
ps.stdin.write(`[Win32.WinMM]::mciSendString("close myRecorder", $null, 0, [IntPtr]::Zero)\r\n`);
|
|
44
|
+
ps.stdin.write("exit\r\n");
|
|
45
|
+
ps.stdin.end();
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
};
|
|
49
|
+
} else {
|
|
50
|
+
// macOS / Linux: Try spawning standard command-line recording tools
|
|
51
|
+
let cmd = "";
|
|
52
|
+
let args = [];
|
|
53
|
+
|
|
54
|
+
// Check if sox/rec is available (highest quality/reliability)
|
|
55
|
+
if (await commandExists("rec")) {
|
|
56
|
+
cmd = "rec";
|
|
57
|
+
args = ["-q", wavPath];
|
|
58
|
+
} else if (await commandExists("arecord")) {
|
|
59
|
+
cmd = "arecord";
|
|
60
|
+
args = ["-f", "cd", "-t", "wav", wavPath];
|
|
61
|
+
} else if (await commandExists("ffmpeg")) {
|
|
62
|
+
cmd = "ffmpeg";
|
|
63
|
+
const isMac = platform() === "darwin";
|
|
64
|
+
args = isMac
|
|
65
|
+
? ["-y", "-f", "avfoundation", "-i", ":0", wavPath]
|
|
66
|
+
: ["-y", "-f", "alsa", "-i", "default", wavPath];
|
|
67
|
+
} else {
|
|
68
|
+
throw new Error("No recording utility found. On Windows, recording is native. On macOS/Linux, please install 'sox', 'arecord', or 'ffmpeg'.");
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const proc = spawn(cmd, args, { stdio: "ignore" });
|
|
72
|
+
|
|
73
|
+
return {
|
|
74
|
+
stop: () => {
|
|
75
|
+
return new Promise((resolve) => {
|
|
76
|
+
proc.on("close", () => {
|
|
77
|
+
resolve();
|
|
78
|
+
});
|
|
79
|
+
proc.kill("SIGTERM");
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Helper to check if a command exists in the environment PATH.
|
|
88
|
+
*/
|
|
89
|
+
function commandExists(name) {
|
|
90
|
+
return new Promise((resolve) => {
|
|
91
|
+
const isWin = platform() === "win32";
|
|
92
|
+
const checkCmd = isWin ? "where" : "which";
|
|
93
|
+
const check = spawn(checkCmd, [name], { stdio: "ignore" });
|
|
94
|
+
check.on("close", (code) => {
|
|
95
|
+
resolve(code === 0);
|
|
96
|
+
});
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Transcribes a local audio WAV file using the configured AI providers.
|
|
102
|
+
* Priority: Google Gemini -> Groq Whisper -> OpenAI Whisper.
|
|
103
|
+
* @param {string} wavPath - Path to the WAV file
|
|
104
|
+
* @param {object} aiConfig - Active AI configuration
|
|
105
|
+
* @returns {Promise<string>} Transcribed text
|
|
106
|
+
*/
|
|
107
|
+
export async function transcribeAudioFile(wavPath, aiConfig) {
|
|
108
|
+
if (!fs.existsSync(wavPath)) {
|
|
109
|
+
throw new Error(`Audio file not found: ${wavPath}`);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const fileBuffer = fs.readFileSync(wavPath);
|
|
113
|
+
|
|
114
|
+
// 1. Google Gemini Transcription
|
|
115
|
+
if (aiConfig.GOOGLE_API_KEY) {
|
|
116
|
+
const base64Audio = fileBuffer.toString("base64");
|
|
117
|
+
const model = "gemini-2.5-flash";
|
|
118
|
+
const url = `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent?key=${aiConfig.GOOGLE_API_KEY}`;
|
|
119
|
+
|
|
120
|
+
const body = {
|
|
121
|
+
contents: [
|
|
122
|
+
{
|
|
123
|
+
role: "user",
|
|
124
|
+
parts: [
|
|
125
|
+
{
|
|
126
|
+
inlineData: {
|
|
127
|
+
mimeType: "audio/wav",
|
|
128
|
+
data: base64Audio
|
|
129
|
+
}
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
text: "Transcribe this audio file exactly as spoken. Output ONLY the plain transcription text, with no extra formatting, conversational filler, timestamps, or commentary. If there is no speech, output an empty string."
|
|
133
|
+
}
|
|
134
|
+
]
|
|
135
|
+
}
|
|
136
|
+
]
|
|
137
|
+
};
|
|
138
|
+
|
|
139
|
+
const response = await fetch(url, {
|
|
140
|
+
method: "POST",
|
|
141
|
+
headers: { "Content-Type": "application/json" },
|
|
142
|
+
body: JSON.stringify(body)
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
if (!response.ok) {
|
|
146
|
+
const errorText = await response.text();
|
|
147
|
+
throw new Error(`Gemini transcription error: ${response.statusText}. ${errorText}`);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
const data = await response.json();
|
|
151
|
+
const candidate = data.candidates?.[0];
|
|
152
|
+
if (!candidate) {
|
|
153
|
+
return "";
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
const text = candidate.content?.parts
|
|
157
|
+
?.map((p) => p.text)
|
|
158
|
+
.filter(Boolean)
|
|
159
|
+
.join("") || "";
|
|
160
|
+
|
|
161
|
+
// Clean up timestamps if returned (e.g. 00:00:23)
|
|
162
|
+
const cleaned = text.trim();
|
|
163
|
+
if (/^\d{2}:\d{2}:\d{2}$/.test(cleaned)) {
|
|
164
|
+
return "";
|
|
165
|
+
}
|
|
166
|
+
return cleaned;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// 2. Groq Whisper / OpenAI Whisper
|
|
170
|
+
let apiKey = aiConfig.GROQ_API_KEY;
|
|
171
|
+
let url = "https://api.groq.com/openai/v1/audio/transcriptions";
|
|
172
|
+
let modelName = "whisper-large-v3";
|
|
173
|
+
|
|
174
|
+
if (!apiKey) {
|
|
175
|
+
apiKey = aiConfig.OPENAI_API_KEY;
|
|
176
|
+
url = "https://api.openai.com/v1/audio/transcriptions";
|
|
177
|
+
modelName = "whisper-1";
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
if (!apiKey) {
|
|
181
|
+
throw new Error("No API key configured for speech-to-text. Please configure GOOGLE_API_KEY, GROQ_API_KEY, or OPENAI_API_KEY.");
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
const boundary = "----WebKitFormBoundary" + Math.random().toString(36).substring(2);
|
|
185
|
+
|
|
186
|
+
const header =
|
|
187
|
+
`--${boundary}\r\n` +
|
|
188
|
+
`Content-Disposition: form-data; name="file"; filename="audio.wav"\r\n` +
|
|
189
|
+
`Content-Type: audio/wav\r\n\r\n`;
|
|
190
|
+
|
|
191
|
+
const modelPart =
|
|
192
|
+
`\r\n--${boundary}\r\n` +
|
|
193
|
+
`Content-Disposition: form-data; name="model"\r\n\r\n${modelName}\r\n`;
|
|
194
|
+
|
|
195
|
+
const footer = `--${boundary}--\r\n`;
|
|
196
|
+
|
|
197
|
+
const body = Buffer.concat([
|
|
198
|
+
Buffer.from(header, 'utf-8'),
|
|
199
|
+
fileBuffer,
|
|
200
|
+
Buffer.from(modelPart, 'utf-8'),
|
|
201
|
+
Buffer.from(footer, 'utf-8')
|
|
202
|
+
]);
|
|
203
|
+
|
|
204
|
+
const response = await fetch(url, {
|
|
205
|
+
method: "POST",
|
|
206
|
+
headers: {
|
|
207
|
+
"Authorization": `Bearer ${apiKey}`,
|
|
208
|
+
"Content-Type": `multipart/form-data; boundary=${boundary}`
|
|
209
|
+
},
|
|
210
|
+
body: body
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
if (!response.ok) {
|
|
214
|
+
const errorText = await response.text();
|
|
215
|
+
throw new Error(`Whisper transcription error: ${response.statusText}. ${errorText}`);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
const data = await response.json();
|
|
219
|
+
return (data.text || "").trim();
|
|
220
|
+
}
|