@tritard/waterbrother 0.9.2 → 0.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agent.js +2 -1
- package/src/cli.js +100 -4
- package/src/voice.js +511 -0
package/package.json
CHANGED
package/src/agent.js
CHANGED
|
@@ -74,7 +74,8 @@ When you use tools:
|
|
|
74
74
|
- avoid hype such as "premium", "luxurious", "studio-grade", or "improved!"
|
|
75
75
|
- Explain what you changed and why.
|
|
76
76
|
- Never claim you ran commands you did not run.
|
|
77
|
-
- If a tool fails, show the failure and recover
|
|
77
|
+
- If a tool fails, show the failure and recover.
|
|
78
|
+
- You are a coding tool for real software engineering work. If a request is clearly a joke, hypothetical, non-technical, or not related to actual software development, respond conversationally WITHOUT using any tools. Do not create files, write scripts, or make edits for non-engineering requests. Examples of things you should NOT build: personality generators, dating advice scripts, joke apps, horoscope generators, or any request that is clearly not serious engineering work.`;
|
|
78
79
|
|
|
79
80
|
const COMPACTION_SYSTEM_PROMPT = `You summarize coding assistant transcripts for context compaction.
|
|
80
81
|
Output concise markdown with these sections:
|
package/src/cli.js
CHANGED
|
@@ -167,7 +167,8 @@ const INTERACTIVE_COMMANDS = [
|
|
|
167
167
|
{ name: "/models", description: "Select model from list" },
|
|
168
168
|
{ name: "/feedback", description: "Report a bug or share feedback" },
|
|
169
169
|
{ name: "/cost", description: "Show session token usage and cost breakdown" },
|
|
170
|
-
{ name: "/diff", description: "Show git changes in the current repo" }
|
|
170
|
+
{ name: "/diff", description: "Show git changes in the current repo" },
|
|
171
|
+
{ name: "/voice", description: "Toggle voice dictation (press space to record)" }
|
|
171
172
|
];
|
|
172
173
|
|
|
173
174
|
const AGENT_PROFILES = ["coder", "designer", "reviewer", "planner"];
|
|
@@ -4459,6 +4460,8 @@ async function readInteractiveLine(options = {}) {
|
|
|
4459
4460
|
const output = process.stdout;
|
|
4460
4461
|
const initialRaw = Boolean(input.isRaw);
|
|
4461
4462
|
const getFooterText = typeof options.getFooterText === "function" ? options.getFooterText : null;
|
|
4463
|
+
const voiceSession = options.voiceSession || null;
|
|
4464
|
+
const grokConfig = options.grokConfig || null;
|
|
4462
4465
|
|
|
4463
4466
|
return new Promise((resolve, reject) => {
|
|
4464
4467
|
let buffer = "";
|
|
@@ -4468,6 +4471,10 @@ async function readInteractiveLine(options = {}) {
|
|
|
4468
4471
|
let ignoredPasteEnters = 0;
|
|
4469
4472
|
let pasteSuppressUntil = 0;
|
|
4470
4473
|
|
|
4474
|
+
// Voice recording state
|
|
4475
|
+
let voiceRecording = false;
|
|
4476
|
+
let voiceIndicator = "";
|
|
4477
|
+
|
|
4471
4478
|
function finish(nextValue) {
|
|
4472
4479
|
if (settled) return;
|
|
4473
4480
|
settled = true;
|
|
@@ -4490,8 +4497,9 @@ async function readInteractiveLine(options = {}) {
|
|
|
4490
4497
|
selectedIndex = 0;
|
|
4491
4498
|
}
|
|
4492
4499
|
|
|
4500
|
+
const displayBuffer = voiceIndicator ? `${buffer} ${voiceIndicator}` : buffer;
|
|
4493
4501
|
const writePrompt = () => {
|
|
4494
|
-
output.write(formatPromptRow(
|
|
4502
|
+
output.write(formatPromptRow(displayBuffer, columns));
|
|
4495
4503
|
};
|
|
4496
4504
|
|
|
4497
4505
|
output.write("\r\x1b[2K");
|
|
@@ -4625,6 +4633,60 @@ async function readInteractiveLine(options = {}) {
|
|
|
4625
4633
|
return;
|
|
4626
4634
|
}
|
|
4627
4635
|
|
|
4636
|
+
// Voice: spacebar on empty/trailing-space triggers a 5-second recording.
|
|
4637
|
+
// Uses fixed duration with clean sox exit — same code path as test-capture.mjs.
|
|
4638
|
+
if (voiceSession && !voiceRecording && str === " " && (buffer.length === 0 || buffer.endsWith(" "))) {
|
|
4639
|
+
voiceRecording = true;
|
|
4640
|
+
voiceIndicator = "\x1b[31m[recording 5s — speak now]\x1b[0m";
|
|
4641
|
+
render();
|
|
4642
|
+
|
|
4643
|
+
(async () => {
|
|
4644
|
+
try {
|
|
4645
|
+
const result = await voiceSession.recordAndTranscribe(5);
|
|
4646
|
+
voiceRecording = false;
|
|
4647
|
+
|
|
4648
|
+
if (result && typeof result === "object" && result.error) {
|
|
4649
|
+
voiceIndicator = `\x1b[31m[${result.error}]\x1b[0m`;
|
|
4650
|
+
render();
|
|
4651
|
+
setTimeout(() => { voiceIndicator = ""; render(); }, 6000);
|
|
4652
|
+
return;
|
|
4653
|
+
}
|
|
4654
|
+
|
|
4655
|
+
const rawText = typeof result === "string" ? result : "";
|
|
4656
|
+
voiceIndicator = "";
|
|
4657
|
+
if (!rawText) {
|
|
4658
|
+
render();
|
|
4659
|
+
return;
|
|
4660
|
+
}
|
|
4661
|
+
|
|
4662
|
+
const insertPoint = buffer.length;
|
|
4663
|
+
buffer += rawText;
|
|
4664
|
+
render();
|
|
4665
|
+
|
|
4666
|
+
if (grokConfig && grokConfig.apiKey) {
|
|
4667
|
+
voiceIndicator = "\x1b[36m[correcting...]\x1b[0m";
|
|
4668
|
+
render();
|
|
4669
|
+
voiceSession.correctTranscript(rawText, grokConfig).then((corrected) => {
|
|
4670
|
+
voiceIndicator = "";
|
|
4671
|
+
if (settled) return;
|
|
4672
|
+
if (corrected && corrected !== rawText) {
|
|
4673
|
+
const before = buffer.slice(0, insertPoint);
|
|
4674
|
+
const after = buffer.slice(insertPoint + rawText.length);
|
|
4675
|
+
buffer = before + corrected + after;
|
|
4676
|
+
}
|
|
4677
|
+
render();
|
|
4678
|
+
});
|
|
4679
|
+
}
|
|
4680
|
+
} catch (err) {
|
|
4681
|
+
voiceIndicator = `\x1b[31m[voice error: ${err.message || err}]\x1b[0m`;
|
|
4682
|
+
voiceRecording = false;
|
|
4683
|
+
render();
|
|
4684
|
+
setTimeout(() => { voiceIndicator = ""; render(); }, 6000);
|
|
4685
|
+
}
|
|
4686
|
+
})();
|
|
4687
|
+
return;
|
|
4688
|
+
}
|
|
4689
|
+
|
|
4628
4690
|
if (isPrintableKey(str, key)) {
|
|
4629
4691
|
buffer += str;
|
|
4630
4692
|
selectedIndex = 0;
|
|
@@ -5015,7 +5077,7 @@ async function promptLoop(agent, session, context) {
|
|
|
5015
5077
|
let line = normalizeInteractiveInput(
|
|
5016
5078
|
await readInteractiveLine({
|
|
5017
5079
|
getFooterText(inputBuffer) {
|
|
5018
|
-
|
|
5080
|
+
const footer = buildInteractiveFooter({
|
|
5019
5081
|
agent,
|
|
5020
5082
|
cwd: context.cwd,
|
|
5021
5083
|
sessionId: currentSession.id,
|
|
@@ -5023,7 +5085,17 @@ async function promptLoop(agent, session, context) {
|
|
|
5023
5085
|
lastUsage: context.lastUsage,
|
|
5024
5086
|
costTracker: context.costTracker
|
|
5025
5087
|
});
|
|
5026
|
-
|
|
5088
|
+
if (context.voiceModeEnabled) {
|
|
5089
|
+
return "Voice ON — space to record, space to stop | " + footer;
|
|
5090
|
+
}
|
|
5091
|
+
return footer;
|
|
5092
|
+
},
|
|
5093
|
+
voiceSession: context.voiceModeEnabled ? context.voiceSession : null,
|
|
5094
|
+
grokConfig: context.voiceModeEnabled ? {
|
|
5095
|
+
apiKey: context.runtime.apiKey,
|
|
5096
|
+
baseUrl: context.runtime.baseUrl,
|
|
5097
|
+
model: context.runtime.model
|
|
5098
|
+
} : null
|
|
5027
5099
|
})
|
|
5028
5100
|
);
|
|
5029
5101
|
if (!line) continue;
|
|
@@ -6680,6 +6752,30 @@ async function promptLoop(agent, session, context) {
|
|
|
6680
6752
|
continue;
|
|
6681
6753
|
}
|
|
6682
6754
|
|
|
6755
|
+
if (line === "/voice") {
|
|
6756
|
+
if (!context.voiceModeEnabled) {
|
|
6757
|
+
try {
|
|
6758
|
+
if (!context.voiceSession) {
|
|
6759
|
+
const { setupVoice } = await import("./voice.js");
|
|
6760
|
+
context.voiceSession = await setupVoice((msg) => console.log(msg));
|
|
6761
|
+
}
|
|
6762
|
+
context.voiceModeEnabled = true;
|
|
6763
|
+
console.log("Voice mode ON. Press spacebar to record (5 seconds).");
|
|
6764
|
+
console.log(dim("Tip: Grok will auto-correct technical terms after transcription."));
|
|
6765
|
+
} catch (error) {
|
|
6766
|
+
console.log(`Voice mode failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
6767
|
+
}
|
|
6768
|
+
} else {
|
|
6769
|
+
context.voiceModeEnabled = false;
|
|
6770
|
+
if (context.voiceSession) {
|
|
6771
|
+
context.voiceSession.destroy();
|
|
6772
|
+
context.voiceSession = null;
|
|
6773
|
+
}
|
|
6774
|
+
console.log("Voice mode OFF.");
|
|
6775
|
+
}
|
|
6776
|
+
continue;
|
|
6777
|
+
}
|
|
6778
|
+
|
|
6683
6779
|
if (line.startsWith("/")) {
|
|
6684
6780
|
console.log("Unknown slash command. Use /help.");
|
|
6685
6781
|
continue;
|
package/src/voice.js
ADDED
|
@@ -0,0 +1,511 @@
|
|
|
1
|
+
import { execFile, spawn } from "node:child_process";
|
|
2
|
+
import fs from "node:fs/promises";
|
|
3
|
+
import { createRequire } from "node:module";
|
|
4
|
+
import path from "node:path";
|
|
5
|
+
import process from "node:process";
|
|
6
|
+
import { pathToFileURL } from "node:url";
|
|
7
|
+
import { promisify } from "node:util";
|
|
8
|
+
import { createChatCompletion } from "./grok-client.js";
|
|
9
|
+
|
|
10
|
+
const execFileAsync = promisify(execFile);
|
|
11
|
+
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
// Paths
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
|
|
16
|
+
const MODEL_DIR_NAME = "sherpa-onnx-moonshine-base-en-int8";
|
|
17
|
+
const MODEL_FILES = [
|
|
18
|
+
"preprocess.onnx",
|
|
19
|
+
"encode.int8.onnx",
|
|
20
|
+
"uncached_decode.int8.onnx",
|
|
21
|
+
"cached_decode.int8.onnx",
|
|
22
|
+
"tokens.txt"
|
|
23
|
+
];
|
|
24
|
+
|
|
25
|
+
function getWaterbrotherHome() {
|
|
26
|
+
const home = process.env.HOME || process.env.USERPROFILE || "";
|
|
27
|
+
return path.join(home, ".waterbrother");
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function getModelsDir() {
|
|
31
|
+
return path.join(getWaterbrotherHome(), "models", MODEL_DIR_NAME);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function getVoiceRuntimeDir() {
|
|
35
|
+
return path.join(getWaterbrotherHome(), "voice-runtime");
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
// System checks
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
async function hasBin(name) {
|
|
43
|
+
const cmd = process.platform === "win32" ? "where" : "which";
|
|
44
|
+
try {
|
|
45
|
+
await execFileAsync(cmd, [name]);
|
|
46
|
+
return true;
|
|
47
|
+
} catch {
|
|
48
|
+
return false;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async function checkSox() {
|
|
53
|
+
const cmd = process.platform === "win32" ? "where" : "which";
|
|
54
|
+
try {
|
|
55
|
+
const { stdout } = await execFileAsync(cmd, ["sox"]);
|
|
56
|
+
return { ok: true, path: String(stdout || "").trim().split("\n")[0] || null };
|
|
57
|
+
} catch {}
|
|
58
|
+
|
|
59
|
+
// On Windows, winget portable installs don't add to PATH — search known locations
|
|
60
|
+
if (process.platform === "win32") {
|
|
61
|
+
const localAppData = process.env.LOCALAPPDATA;
|
|
62
|
+
if (localAppData) {
|
|
63
|
+
const packagesDir = path.join(localAppData, "Microsoft", "WinGet", "Packages");
|
|
64
|
+
try {
|
|
65
|
+
const entries = await fs.readdir(packagesDir);
|
|
66
|
+
for (const entry of entries) {
|
|
67
|
+
if (!entry.toLowerCase().includes("sox")) continue;
|
|
68
|
+
const entryPath = path.join(packagesDir, entry);
|
|
69
|
+
const subEntries = await fs.readdir(entryPath, { recursive: true });
|
|
70
|
+
for (const sub of subEntries) {
|
|
71
|
+
if (path.basename(sub).toLowerCase() === "sox.exe") {
|
|
72
|
+
const fullPath = path.join(entryPath, sub);
|
|
73
|
+
const soxDir = path.dirname(fullPath);
|
|
74
|
+
process.env.PATH = `${soxDir};${process.env.PATH}`;
|
|
75
|
+
return { ok: true, path: fullPath };
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
} catch {}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
return { ok: false, path: null };
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
async function checkSherpaOnnx() {
|
|
87
|
+
const runtimeDir = getVoiceRuntimeDir();
|
|
88
|
+
const markerPath = path.join(runtimeDir, "node_modules", "sherpa-onnx-node", "package.json");
|
|
89
|
+
try {
|
|
90
|
+
await fs.access(markerPath);
|
|
91
|
+
return { ok: true };
|
|
92
|
+
} catch {
|
|
93
|
+
return { ok: false };
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
async function checkModel() {
|
|
98
|
+
const dir = getModelsDir();
|
|
99
|
+
try {
|
|
100
|
+
const entries = await fs.readdir(dir);
|
|
101
|
+
const missing = MODEL_FILES.filter((f) => !entries.includes(f));
|
|
102
|
+
return { ok: missing.length === 0, dir, missing };
|
|
103
|
+
} catch {
|
|
104
|
+
return { ok: false, dir, missing: MODEL_FILES };
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// ---------------------------------------------------------------------------
|
|
109
|
+
// Model download
|
|
110
|
+
// ---------------------------------------------------------------------------
|
|
111
|
+
|
|
112
|
+
const MODEL_ARCHIVE_URL =
|
|
113
|
+
`https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${MODEL_DIR_NAME}.tar.bz2`;
|
|
114
|
+
|
|
115
|
+
async function downloadModel(onProgress) {
|
|
116
|
+
const modelsRoot = path.dirname(getModelsDir());
|
|
117
|
+
await fs.mkdir(modelsRoot, { recursive: true });
|
|
118
|
+
|
|
119
|
+
if (onProgress) onProgress({ status: "downloading" });
|
|
120
|
+
|
|
121
|
+
const response = await fetch(MODEL_ARCHIVE_URL, { redirect: "follow" });
|
|
122
|
+
if (!response.ok) {
|
|
123
|
+
throw new Error(`Failed to download model archive: HTTP ${response.status}`);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
const contentLength = Number(response.headers.get("content-length")) || 0;
|
|
127
|
+
const reader = response.body.getReader();
|
|
128
|
+
const chunks = [];
|
|
129
|
+
let downloaded = 0;
|
|
130
|
+
|
|
131
|
+
while (true) {
|
|
132
|
+
const { done, value } = await reader.read();
|
|
133
|
+
if (done) break;
|
|
134
|
+
chunks.push(value);
|
|
135
|
+
downloaded += value.length;
|
|
136
|
+
if (onProgress && contentLength > 0) {
|
|
137
|
+
onProgress({ status: "progress", downloaded, total: contentLength });
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Write archive to temp file, then extract
|
|
142
|
+
const archivePath = path.join(modelsRoot, `${MODEL_DIR_NAME}.tar.bz2`);
|
|
143
|
+
const archiveBuffer = Buffer.concat(chunks);
|
|
144
|
+
await fs.writeFile(archivePath, archiveBuffer);
|
|
145
|
+
if (onProgress) onProgress({ status: "extracting" });
|
|
146
|
+
|
|
147
|
+
await execFileAsync("tar", ["xjf", archivePath, "-C", modelsRoot], {
|
|
148
|
+
timeout: 120_000
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
// Clean up archive
|
|
152
|
+
await fs.unlink(archivePath).catch(() => {});
|
|
153
|
+
if (onProgress) onProgress({ status: "done", size: archiveBuffer.length });
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// ---------------------------------------------------------------------------
|
|
157
|
+
// Recognizer lifecycle
|
|
158
|
+
// ---------------------------------------------------------------------------
|
|
159
|
+
|
|
160
|
+
let _sherpaOnnx = null;
|
|
161
|
+
|
|
162
|
+
async function loadSherpaOnnx() {
|
|
163
|
+
if (_sherpaOnnx) return _sherpaOnnx;
|
|
164
|
+
|
|
165
|
+
// Load from the local voice-runtime install via createRequire
|
|
166
|
+
const runtimeDir = getVoiceRuntimeDir();
|
|
167
|
+
const fakePath = path.join(runtimeDir, "loader.cjs");
|
|
168
|
+
const require = createRequire(fakePath);
|
|
169
|
+
_sherpaOnnx = require("sherpa-onnx-node");
|
|
170
|
+
return _sherpaOnnx;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function createRecognizer() {
|
|
174
|
+
const sherpa = _sherpaOnnx;
|
|
175
|
+
if (!sherpa) throw new Error("sherpa-onnx-node not loaded");
|
|
176
|
+
|
|
177
|
+
const dir = getModelsDir();
|
|
178
|
+
const config = {
|
|
179
|
+
modelConfig: {
|
|
180
|
+
moonshine: {
|
|
181
|
+
preprocessor: path.join(dir, "preprocess.onnx"),
|
|
182
|
+
encoder: path.join(dir, "encode.int8.onnx"),
|
|
183
|
+
uncachedDecoder: path.join(dir, "uncached_decode.int8.onnx"),
|
|
184
|
+
cachedDecoder: path.join(dir, "cached_decode.int8.onnx")
|
|
185
|
+
},
|
|
186
|
+
tokens: path.join(dir, "tokens.txt"),
|
|
187
|
+
provider: "cpu",
|
|
188
|
+
numThreads: 2,
|
|
189
|
+
debug: 0
|
|
190
|
+
}
|
|
191
|
+
};
|
|
192
|
+
|
|
193
|
+
return new sherpa.OfflineRecognizer(config);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// ---------------------------------------------------------------------------
|
|
197
|
+
// Audio device detection (Windows)
|
|
198
|
+
// ---------------------------------------------------------------------------
|
|
199
|
+
|
|
200
|
+
const WAVEIN_ENUM_SCRIPT = `
|
|
201
|
+
Add-Type -TypeDefinition @"
|
|
202
|
+
using System;
|
|
203
|
+
using System.Runtime.InteropServices;
|
|
204
|
+
public class WaveInHelper {
|
|
205
|
+
[DllImport("winmm.dll")]
|
|
206
|
+
public static extern uint waveInGetNumDevs();
|
|
207
|
+
[DllImport("winmm.dll", CharSet = CharSet.Auto)]
|
|
208
|
+
public static extern uint waveInGetDevCapsW(uint id, ref WAVEINCAPS caps, uint size);
|
|
209
|
+
[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Auto)]
|
|
210
|
+
public struct WAVEINCAPS {
|
|
211
|
+
public ushort wMid;
|
|
212
|
+
public ushort wPid;
|
|
213
|
+
public uint vDriverVersion;
|
|
214
|
+
[MarshalAs(UnmanagedType.ByValTStr, SizeConst = 32)]
|
|
215
|
+
public string szPname;
|
|
216
|
+
public uint dwFormats;
|
|
217
|
+
public ushort wChannels;
|
|
218
|
+
public ushort wReserved1;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
"@
|
|
222
|
+
$$n = [WaveInHelper]::waveInGetNumDevs()
|
|
223
|
+
for ($$i = 0; $$i -lt $$n; $$i++) {
|
|
224
|
+
$$c = New-Object WaveInHelper+WAVEINCAPS
|
|
225
|
+
[WaveInHelper]::waveInGetDevCapsW($$i, [ref]$$c, [Runtime.InteropServices.Marshal]::SizeOf($$c)) | Out-Null
|
|
226
|
+
Write-Output "$$i|$$($$c.szPname)"
|
|
227
|
+
}
|
|
228
|
+
`.replace(/\$\$/g, "$");
|
|
229
|
+
|
|
230
|
+
async function detectAudioDevice(soxPath, log) {
|
|
231
|
+
if (process.platform !== "win32") return null;
|
|
232
|
+
|
|
233
|
+
try {
|
|
234
|
+
const { stdout } = await execFileAsync("powershell.exe", [
|
|
235
|
+
"-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", WAVEIN_ENUM_SCRIPT
|
|
236
|
+
], { timeout: 10000 });
|
|
237
|
+
|
|
238
|
+
const devices = String(stdout).trim().split("\n")
|
|
239
|
+
.map((line) => line.trim())
|
|
240
|
+
.filter(Boolean)
|
|
241
|
+
.map((line) => {
|
|
242
|
+
const sep = line.indexOf("|");
|
|
243
|
+
return { index: line.slice(0, sep), name: line.slice(sep + 1) };
|
|
244
|
+
});
|
|
245
|
+
|
|
246
|
+
if (devices.length === 0) return "default";
|
|
247
|
+
|
|
248
|
+
// Score each device: prefer hardware mics, deprioritize virtual/mixer devices
|
|
249
|
+
const scored = devices.map((d) => {
|
|
250
|
+
const lower = d.name.toLowerCase();
|
|
251
|
+
let score = 0;
|
|
252
|
+
if (lower.startsWith("headset microphone")) score += 100;
|
|
253
|
+
else if (lower.startsWith("microphone (")) score += 50;
|
|
254
|
+
else if (lower.startsWith("microphone")) score += 30;
|
|
255
|
+
else if (lower.startsWith("line in")) score += 20;
|
|
256
|
+
if (/sonar|virtual|nahimic|nvidia/i.test(d.name)) score -= 200;
|
|
257
|
+
return { ...d, score };
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
scored.sort((a, b) => b.score - a.score);
|
|
261
|
+
const best = scored[0];
|
|
262
|
+
log(` audio device: ${best.name} (device ${best.index})`);
|
|
263
|
+
return best.index;
|
|
264
|
+
} catch {
|
|
265
|
+
return "default";
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// ---------------------------------------------------------------------------
|
|
270
|
+
// Audio capture via sox
|
|
271
|
+
// ---------------------------------------------------------------------------
|
|
272
|
+
|
|
273
|
+
// Record for a fixed duration, sox exits cleanly. This is the exact same
|
|
274
|
+
// approach as test-capture.mjs which is the only code path proven to work.
|
|
275
|
+
async function captureAudio(soxPath, audioDevice, durationSec) {
|
|
276
|
+
const isWin = process.platform === "win32";
|
|
277
|
+
const inputArgs = isWin
|
|
278
|
+
? ["-t", "waveaudio", audioDevice || "default"]
|
|
279
|
+
: ["-d"];
|
|
280
|
+
|
|
281
|
+
const args = [
|
|
282
|
+
...inputArgs,
|
|
283
|
+
"-t", "raw", "-r", "16000", "-c", "1", "-b", "16", "-e", "signed-integer",
|
|
284
|
+
"-", "trim", "0", String(durationSec)
|
|
285
|
+
];
|
|
286
|
+
|
|
287
|
+
const { stdout } = await execFileAsync(soxPath, args, {
|
|
288
|
+
timeout: (durationSec + 5) * 1000,
|
|
289
|
+
maxBuffer: durationSec * 32000 + 1024,
|
|
290
|
+
encoding: "buffer"
|
|
291
|
+
});
|
|
292
|
+
|
|
293
|
+
const samples = new Float32Array(Math.floor(stdout.length / 2));
|
|
294
|
+
for (let i = 0; i < samples.length; i++) {
|
|
295
|
+
samples[i] = stdout.readInt16LE(i * 2) / 32768.0;
|
|
296
|
+
}
|
|
297
|
+
return samples;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// ---------------------------------------------------------------------------
|
|
301
|
+
// Transcription
|
|
302
|
+
// ---------------------------------------------------------------------------
|
|
303
|
+
|
|
304
|
+
function transcribe(recognizer, audioSamples) {
|
|
305
|
+
if (!audioSamples || audioSamples.length < 1600) {
|
|
306
|
+
return "";
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
const stream = recognizer.createStream();
|
|
310
|
+
stream.acceptWaveform({ sampleRate: 16000, samples: audioSamples });
|
|
311
|
+
recognizer.decode(stream);
|
|
312
|
+
return recognizer.getResult(stream).text.trim();
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
// ---------------------------------------------------------------------------
|
|
316
|
+
// Grok correction pass
|
|
317
|
+
// ---------------------------------------------------------------------------
|
|
318
|
+
|
|
319
|
+
const CORRECTION_SYSTEM_PROMPT =
|
|
320
|
+
"You are a transcription corrector for a coding CLI. Fix speech-to-text errors. " +
|
|
321
|
+
"Properly format technical terms (camelCase, snake_case, file paths, CLI flags, function names). " +
|
|
322
|
+
"Return ONLY the corrected text, nothing else. If the text is already correct, return it unchanged.";
|
|
323
|
+
|
|
324
|
+
async function correctTranscript(rawText, { apiKey, baseUrl, model }) {
|
|
325
|
+
if (!rawText || !apiKey) return rawText;
|
|
326
|
+
|
|
327
|
+
try {
|
|
328
|
+
const completion = await createChatCompletion({
|
|
329
|
+
apiKey,
|
|
330
|
+
baseUrl,
|
|
331
|
+
model: model || "grok-3-mini",
|
|
332
|
+
messages: [
|
|
333
|
+
{ role: "system", content: CORRECTION_SYSTEM_PROMPT },
|
|
334
|
+
{ role: "user", content: rawText }
|
|
335
|
+
],
|
|
336
|
+
temperature: 0
|
|
337
|
+
});
|
|
338
|
+
|
|
339
|
+
const corrected = (completion?.message?.content || "").trim();
|
|
340
|
+
return corrected || rawText;
|
|
341
|
+
} catch {
|
|
342
|
+
return rawText;
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
// ---------------------------------------------------------------------------
|
|
347
|
+
// Auto-install helpers
|
|
348
|
+
// ---------------------------------------------------------------------------
|
|
349
|
+
|
|
350
|
+
async function runShell(command, args, { label, log, timeout = 300_000 } = {}) {
|
|
351
|
+
if (log && label) log(` Installing ${label}...`);
|
|
352
|
+
const { stdout, stderr } = await execFileAsync(command, args, {
|
|
353
|
+
timeout,
|
|
354
|
+
env: process.env,
|
|
355
|
+
shell: process.platform === "win32"
|
|
356
|
+
});
|
|
357
|
+
return { stdout, stderr };
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
async function installSox(log) {
|
|
361
|
+
const platform = process.platform;
|
|
362
|
+
if (platform === "darwin") {
|
|
363
|
+
if (!await hasBin("brew")) throw new Error("Cannot auto-install sox: Homebrew not found.");
|
|
364
|
+
await runShell("brew", ["install", "sox"], { label: "sox via Homebrew", log });
|
|
365
|
+
} else if (platform === "win32") {
|
|
366
|
+
if (await hasBin("winget")) {
|
|
367
|
+
await runShell("winget", ["install", "--id", "ChrisBagwell.SoX", "-e", "--accept-source-agreements", "--accept-package-agreements"], { label: "sox via winget", log });
|
|
368
|
+
} else if (await hasBin("choco")) {
|
|
369
|
+
await runShell("choco", ["install", "sox", "-y"], { label: "sox via Chocolatey", log });
|
|
370
|
+
} else {
|
|
371
|
+
throw new Error("Cannot auto-install sox: neither winget nor choco found.");
|
|
372
|
+
}
|
|
373
|
+
} else {
|
|
374
|
+
if (await hasBin("apt-get")) {
|
|
375
|
+
await runShell("sudo", ["apt-get", "install", "-y", "sox"], { label: "sox via apt", log });
|
|
376
|
+
} else if (await hasBin("dnf")) {
|
|
377
|
+
await runShell("sudo", ["dnf", "install", "-y", "sox"], { label: "sox via dnf", log });
|
|
378
|
+
} else {
|
|
379
|
+
throw new Error("Cannot auto-install sox: neither apt-get nor dnf found.");
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
function getNativeAddonPackage() {
|
|
385
|
+
const { platform, arch } = process;
|
|
386
|
+
if (platform === "win32" && arch === "x64") return "sherpa-onnx-win-x64";
|
|
387
|
+
if (platform === "win32" && arch === "ia32") return "sherpa-onnx-win-ia32";
|
|
388
|
+
if (platform === "darwin" && arch === "arm64") return "sherpa-onnx-darwin-arm64";
|
|
389
|
+
if (platform === "darwin" && arch === "x64") return "sherpa-onnx-darwin-x64";
|
|
390
|
+
if (platform === "linux" && arch === "x64") return "sherpa-onnx-linux-x64";
|
|
391
|
+
if (platform === "linux" && arch === "arm64") return "sherpa-onnx-linux-arm64";
|
|
392
|
+
return null;
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
async function installSherpaOnnx(log) {
|
|
396
|
+
const runtimeDir = getVoiceRuntimeDir();
|
|
397
|
+
await fs.mkdir(runtimeDir, { recursive: true });
|
|
398
|
+
|
|
399
|
+
const nativePkg = getNativeAddonPackage();
|
|
400
|
+
if (!nativePkg) {
|
|
401
|
+
throw new Error(`Unsupported platform: ${process.platform}-${process.arch}`);
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
// Include the platform-specific native addon as a direct dependency
|
|
405
|
+
const pkgPath = path.join(runtimeDir, "package.json");
|
|
406
|
+
await fs.writeFile(pkgPath, JSON.stringify({
|
|
407
|
+
name: "waterbrother-voice-runtime",
|
|
408
|
+
version: "1.0.0",
|
|
409
|
+
private: true,
|
|
410
|
+
dependencies: {
|
|
411
|
+
"sherpa-onnx-node": "^1.12.0",
|
|
412
|
+
[nativePkg]: "^1.12.0"
|
|
413
|
+
}
|
|
414
|
+
}, null, 2));
|
|
415
|
+
|
|
416
|
+
log(" Installing sherpa-onnx-node (this may take a minute)...");
|
|
417
|
+
const npmCmd = process.platform === "win32" ? "npm.cmd" : "npm";
|
|
418
|
+
await execFileAsync(npmCmd, ["install", "--no-audit", "--no-fund"], {
|
|
419
|
+
cwd: runtimeDir,
|
|
420
|
+
timeout: 300_000,
|
|
421
|
+
env: process.env,
|
|
422
|
+
shell: process.platform === "win32"
|
|
423
|
+
});
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
// ---------------------------------------------------------------------------
|
|
427
|
+
// Setup orchestrator
|
|
428
|
+
// ---------------------------------------------------------------------------
|
|
429
|
+
|
|
430
|
+
export async function setupVoice(onStatus) {
|
|
431
|
+
const log = onStatus || (() => {});
|
|
432
|
+
|
|
433
|
+
log("Setting up voice mode...");
|
|
434
|
+
|
|
435
|
+
// 1. sox — check, auto-install if missing
|
|
436
|
+
let sox = await checkSox();
|
|
437
|
+
if (!sox.ok) {
|
|
438
|
+
await installSox(log);
|
|
439
|
+
sox = await checkSox();
|
|
440
|
+
if (!sox.ok) throw new Error("sox installed but not found. Restart your terminal and try again.");
|
|
441
|
+
}
|
|
442
|
+
log(` sox: ${sox.path}`);
|
|
443
|
+
|
|
444
|
+
// 2. sherpa-onnx — install into ~/.waterbrother/voice-runtime/ if missing
|
|
445
|
+
let sherpa = await checkSherpaOnnx();
|
|
446
|
+
if (!sherpa.ok) {
|
|
447
|
+
await installSherpaOnnx(log);
|
|
448
|
+
sherpa = await checkSherpaOnnx();
|
|
449
|
+
if (!sherpa.ok) throw new Error("sherpa-onnx-node install failed. Check ~/.waterbrother/voice-runtime/ for errors.");
|
|
450
|
+
}
|
|
451
|
+
log(" sherpa-onnx: ready");
|
|
452
|
+
|
|
453
|
+
// 3. Model — auto-download if missing
|
|
454
|
+
const model = await checkModel();
|
|
455
|
+
if (!model.ok) {
|
|
456
|
+
log(" Downloading Moonshine Base model (~250 MB)...");
|
|
457
|
+
await downloadModel(({ status, downloaded, total, size }) => {
|
|
458
|
+
if (status === "progress" && total > 0) {
|
|
459
|
+
const pct = Math.round((downloaded / total) * 100);
|
|
460
|
+
process.stdout.write(`\r ${pct}% (${formatBytes(downloaded)}/${formatBytes(total)})`);
|
|
461
|
+
} else if (status === "extracting") {
|
|
462
|
+
process.stdout.write(`\r Extracting... \n`);
|
|
463
|
+
} else if (status === "done") {
|
|
464
|
+
log(` Done (${formatBytes(size)})`);
|
|
465
|
+
}
|
|
466
|
+
});
|
|
467
|
+
log(" Model ready.");
|
|
468
|
+
} else {
|
|
469
|
+
log(" Moonshine Base: ready");
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
// 4. Detect audio device (Windows)
|
|
473
|
+
const soxPath = sox.path;
|
|
474
|
+
const audioDevice = await detectAudioDevice(soxPath, log);
|
|
475
|
+
|
|
476
|
+
// 5. Initialize recognizer
|
|
477
|
+
await loadSherpaOnnx();
|
|
478
|
+
const recognizer = createRecognizer();
|
|
479
|
+
|
|
480
|
+
return {
|
|
481
|
+
// Record for a fixed duration (sox exits cleanly, no kill).
|
|
482
|
+
// Returns transcribed text or { error: "..." }.
|
|
483
|
+
async recordAndTranscribe(durationSec = 5) {
|
|
484
|
+
const samples = await captureAudio(soxPath, audioDevice, durationSec);
|
|
485
|
+
const durationMs = Math.round((samples.length / 16000) * 1000);
|
|
486
|
+
let maxAmp = 0;
|
|
487
|
+
for (const v of samples) { const a = Math.abs(v); if (a > maxAmp) maxAmp = a; }
|
|
488
|
+
const text = transcribe(recognizer, samples);
|
|
489
|
+
if (text) return text;
|
|
490
|
+
if (samples.length < 1600) return { error: `Recording too short (${durationMs}ms)` };
|
|
491
|
+
if (maxAmp < 0.01) return { error: `Silence (${durationMs}ms, amp=${maxAmp.toFixed(4)}) — mic not active` };
|
|
492
|
+
return { error: `No speech detected (${durationMs}ms, amp=${maxAmp.toFixed(4)})` };
|
|
493
|
+
},
|
|
494
|
+
|
|
495
|
+
async correctTranscript(rawText, grokConfig) {
|
|
496
|
+
return correctTranscript(rawText, grokConfig);
|
|
497
|
+
},
|
|
498
|
+
|
|
499
|
+
destroy() {}
|
|
500
|
+
};
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
// ---------------------------------------------------------------------------
|
|
504
|
+
// Helpers
|
|
505
|
+
// ---------------------------------------------------------------------------
|
|
506
|
+
|
|
507
|
+
function formatBytes(bytes) {
|
|
508
|
+
if (bytes < 1024) return `${bytes} B`;
|
|
509
|
+
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
|
|
510
|
+
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
|
|
511
|
+
}
|