@oh-my-pi/pi-coding-agent 12.3.0 → 12.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/CHANGELOG.md +66 -0
  2. package/docs/custom-tools.md +21 -6
  3. package/docs/extensions.md +20 -0
  4. package/package.json +12 -12
  5. package/src/cli/setup-cli.ts +62 -2
  6. package/src/commands/setup.ts +1 -1
  7. package/src/config/keybindings.ts +6 -2
  8. package/src/config/settings-schema.ts +58 -4
  9. package/src/config/settings.ts +23 -9
  10. package/src/debug/index.ts +26 -19
  11. package/src/debug/log-formatting.ts +60 -0
  12. package/src/debug/log-viewer.ts +903 -0
  13. package/src/debug/report-bundle.ts +87 -8
  14. package/src/discovery/helpers.ts +131 -137
  15. package/src/extensibility/custom-tools/types.ts +44 -6
  16. package/src/extensibility/extensions/types.ts +60 -0
  17. package/src/extensibility/hooks/types.ts +60 -0
  18. package/src/extensibility/skills.ts +4 -2
  19. package/src/lsp/render.ts +1 -1
  20. package/src/main.ts +7 -1
  21. package/src/memories/index.ts +11 -7
  22. package/src/modes/components/bash-execution.ts +16 -9
  23. package/src/modes/components/custom-editor.ts +8 -0
  24. package/src/modes/components/python-execution.ts +16 -7
  25. package/src/modes/components/settings-selector.ts +29 -14
  26. package/src/modes/components/tool-execution.ts +2 -1
  27. package/src/modes/controllers/command-controller.ts +3 -1
  28. package/src/modes/controllers/event-controller.ts +7 -0
  29. package/src/modes/controllers/input-controller.ts +23 -2
  30. package/src/modes/controllers/selector-controller.ts +9 -7
  31. package/src/modes/interactive-mode.ts +84 -1
  32. package/src/modes/rpc/rpc-client.ts +7 -0
  33. package/src/modes/rpc/rpc-mode.ts +8 -0
  34. package/src/modes/rpc/rpc-types.ts +2 -0
  35. package/src/modes/theme/theme.ts +163 -7
  36. package/src/modes/types.ts +1 -0
  37. package/src/patch/hashline.ts +2 -1
  38. package/src/patch/shared.ts +44 -13
  39. package/src/prompts/system/plan-mode-approved.md +5 -0
  40. package/src/prompts/system/subagent-system-prompt.md +1 -0
  41. package/src/prompts/system/system-prompt.md +10 -0
  42. package/src/prompts/tools/todo-write.md +3 -1
  43. package/src/sdk.ts +82 -9
  44. package/src/session/agent-session.ts +137 -29
  45. package/src/session/streaming-output.ts +1 -1
  46. package/src/stt/downloader.ts +71 -0
  47. package/src/stt/index.ts +3 -0
  48. package/src/stt/recorder.ts +351 -0
  49. package/src/stt/setup.ts +52 -0
  50. package/src/stt/stt-controller.ts +160 -0
  51. package/src/stt/transcribe.py +70 -0
  52. package/src/stt/transcriber.ts +91 -0
  53. package/src/task/executor.ts +10 -2
  54. package/src/tools/bash-interactive.ts +10 -6
  55. package/src/tools/fetch.ts +1 -1
  56. package/src/tools/output-meta.ts +6 -2
  57. package/src/web/scrapers/types.ts +1 -0
@@ -0,0 +1,351 @@
1
+ import * as fs from "node:fs/promises";
2
+ import * as os from "node:os";
3
+ import * as path from "node:path";
4
+ import { logger, Snowflake } from "@oh-my-pi/pi-utils";
5
+ import { $ } from "bun";
6
+
7
+ export interface RecordingHandle {
8
+ stop(): Promise<void>;
9
+ }
10
+
11
+ const isWindows = process.platform === "win32";
12
+
13
+ /**
14
+ * Returns available recording tools in priority order.
15
+ */
16
+ export function detectRecordingTools(): string[] {
17
+ const tools: string[] = [];
18
+ if (Bun.which("sox")) tools.push("sox");
19
+ if (Bun.which("ffmpeg")) tools.push("ffmpeg");
20
+ if (!isWindows && Bun.which("arecord")) tools.push("arecord");
21
+ if (isWindows) tools.push("powershell");
22
+ return tools;
23
+ }
24
+
25
+ // ── ffmpeg dshow device detection ──────────────────────────────────
26
+
27
+ async function detectWindowsAudioDevice(): Promise<string> {
28
+ const result = await $`ffmpeg -f dshow -list_devices true -i dummy`.quiet().nothrow();
29
+ const output = result.stderr.toString();
30
+ const audioDevices: string[] = [];
31
+ const re = /"([^"]+)"\s*\(audio\)/gi;
32
+ for (const match of output.matchAll(re)) {
33
+ audioDevices.push(match[1]);
34
+ }
35
+ if (audioDevices.length === 0) {
36
+ throw new Error("No audio input device found via ffmpeg dshow. Ensure a microphone is connected.");
37
+ }
38
+ logger.debug("Detected dshow audio devices", { devices: audioDevices });
39
+ return audioDevices[0];
40
+ }
41
+
42
+ // ── Recording implementations ──────────────────────────────────────
43
+
44
+ async function startSoxRecording(outputPath: string): Promise<RecordingHandle> {
45
+ // On Windows, "-d" (default device) often fails. Use "-t waveaudio 0" for the first input.
46
+ const inputArgs = isWindows ? ["-t", "waveaudio", "0"] : ["-d"];
47
+
48
+ const proc = Bun.spawn(["sox", ...inputArgs, "-r", "16000", "-c", "1", "-b", "16", "-t", "wav", outputPath], {
49
+ stdout: "pipe",
50
+ stderr: "ignore",
51
+ });
52
+ await verifyProcessAlive(proc, "sox");
53
+ return {
54
+ async stop() {
55
+ proc.kill("SIGTERM");
56
+ await proc.exited;
57
+ },
58
+ };
59
+ }
60
+
61
+ async function startFFmpegRecording(outputPath: string): Promise<RecordingHandle> {
62
+ let args: string[];
63
+ if (isWindows) {
64
+ const device = await detectWindowsAudioDevice();
65
+ args = [
66
+ "ffmpeg",
67
+ "-f",
68
+ "dshow",
69
+ "-i",
70
+ `audio=${device}`,
71
+ "-ar",
72
+ "16000",
73
+ "-ac",
74
+ "1",
75
+ "-sample_fmt",
76
+ "s16",
77
+ "-y",
78
+ outputPath,
79
+ ];
80
+ } else if (process.platform === "darwin") {
81
+ args = [
82
+ "ffmpeg",
83
+ "-f",
84
+ "avfoundation",
85
+ "-i",
86
+ ":0",
87
+ "-ar",
88
+ "16000",
89
+ "-ac",
90
+ "1",
91
+ "-sample_fmt",
92
+ "s16",
93
+ "-y",
94
+ outputPath,
95
+ ];
96
+ } else {
97
+ args = [
98
+ "ffmpeg",
99
+ "-f",
100
+ "pulse",
101
+ "-i",
102
+ "default",
103
+ "-ar",
104
+ "16000",
105
+ "-ac",
106
+ "1",
107
+ "-sample_fmt",
108
+ "s16",
109
+ "-y",
110
+ outputPath,
111
+ ];
112
+ }
113
+
114
+ const proc = Bun.spawn(args, {
115
+ stdin: "pipe",
116
+ stdout: "pipe",
117
+ stderr: "ignore",
118
+ });
119
+ await verifyProcessAlive(proc, "ffmpeg");
120
+
121
+ return {
122
+ async stop() {
123
+ try {
124
+ proc.stdin.write("q");
125
+ proc.stdin.end();
126
+ } catch {
127
+ // stdin may already be closed
128
+ }
129
+ const killTimer = setTimeout(() => proc.kill(), 3000);
130
+ await proc.exited;
131
+ clearTimeout(killTimer);
132
+ },
133
+ };
134
+ }
135
+
136
+ async function startArecordRecording(outputPath: string): Promise<RecordingHandle> {
137
+ const proc = Bun.spawn(["arecord", "-f", "S16_LE", "-r", "16000", "-c", "1", outputPath], {
138
+ stdout: "pipe",
139
+ stderr: "ignore",
140
+ });
141
+ await verifyProcessAlive(proc, "arecord");
142
+ return {
143
+ async stop() {
144
+ proc.kill("SIGTERM");
145
+ await proc.exited;
146
+ },
147
+ };
148
+ }
149
+
150
+ // ── PowerShell mci recorder (Windows zero-dep fallback) ────────────
151
+
152
+ const PS_RECORD_SCRIPT = `
153
+ param([string]$outPath)
154
+
155
+ if ($outPath -match '["\r\n]') {
156
+ [Console]::Error.WriteLine("Invalid output path: $outPath")
157
+ exit 1
158
+ }
159
+
160
+
161
+ Add-Type @"
162
+ using System;
163
+ using System.Runtime.InteropServices;
164
+ using System.Text;
165
+ public class MciAudio {
166
+ [DllImport("winmm.dll", CharSet=CharSet.Auto)]
167
+ public static extern int mciSendString(
168
+ string command, StringBuilder buffer, int bufferSize, IntPtr callback);
169
+ }
170
+ "@
171
+
172
+ function Mci([string]$cmd) {
173
+ $buf = New-Object System.Text.StringBuilder 256
174
+ $r = [MciAudio]::mciSendString($cmd, $buf, 256, [IntPtr]::Zero)
175
+ if ($r -ne 0) {
176
+ [Console]::Error.WriteLine("MCI error $r for: $cmd")
177
+ }
178
+ return $r
179
+ }
180
+
181
+ $r = Mci "open new type waveaudio alias omp_rec"
182
+ if ($r -ne 0) { exit 1 }
183
+
184
+ Mci "set omp_rec channels 1 samplespersec 16000 bitspersample 16"
185
+
186
+ $r = Mci "record omp_rec"
187
+ if ($r -ne 0) {
188
+ Mci "close omp_rec"
189
+ exit 1
190
+ }
191
+
192
+ Write-Output "RECORDING"
193
+ [Console]::Out.Flush()
194
+
195
+ # Block until parent closes stdin or writes a line
196
+ try { [Console]::In.ReadLine() | Out-Null } catch {}
197
+
198
+ # Stop and save
199
+ Mci "stop omp_rec"
200
+ $saveCmd = 'save omp_rec "' + $outPath + '"'
201
+ $r = Mci $saveCmd
202
+ if ($r -ne 0) {
203
+ [Console]::Error.WriteLine("Save failed for: $saveCmd")
204
+ }
205
+ Mci "close omp_rec"
206
+
207
+ if (Test-Path $outPath) {
208
+ Write-Output "SAVED"
209
+ } else {
210
+ Write-Error "Output file was not created: $outPath"
211
+ exit 1
212
+ }
213
+ `;
214
+
215
+ async function startPowerShellRecording(outputPath: string): Promise<RecordingHandle> {
216
+ // Write script to temp file — avoids quoting/escaping issues with -Command
217
+ const scriptPath = path.join(os.tmpdir(), `omp-stt-record-${Snowflake.next()}.ps1`);
218
+ await Bun.write(scriptPath, PS_RECORD_SCRIPT);
219
+
220
+ const proc = Bun.spawn(["powershell", "-NoProfile", "-ExecutionPolicy", "Bypass", "-File", scriptPath, outputPath], {
221
+ stdin: "pipe",
222
+ stdout: "pipe",
223
+ stderr: "ignore",
224
+ });
225
+
226
+ proc.exited.then(() => {
227
+ fs.unlink(scriptPath).catch(() => {});
228
+ });
229
+
230
+ // Wait for "RECORDING" on stdout to confirm it started
231
+ const reader = (proc.stdout as ReadableStream<Uint8Array>).getReader();
232
+ const decoder = new TextDecoder();
233
+ let output = "";
234
+ const deadline = Date.now() + 8000; // PowerShell + Add-Type is slow
235
+
236
+ while (Date.now() < deadline) {
237
+ const readPromise = reader.read();
238
+ const timeoutPromise = Bun.sleep(deadline - Date.now()).then(() => ({ done: true, value: undefined }));
239
+ const { done, value } = await Promise.race([readPromise, timeoutPromise]);
240
+ if (done || !value) break;
241
+ output += decoder.decode(value, { stream: true });
242
+ if (output.includes("RECORDING")) break;
243
+ }
244
+ reader.releaseLock();
245
+
246
+ if (!output.includes("RECORDING")) {
247
+ proc.kill();
248
+ await proc.exited;
249
+ let stderrText = "";
250
+ if (proc.stderr && typeof proc.stderr !== "number") {
251
+ stderrText = await new Response(proc.stderr as ReadableStream).text();
252
+ }
253
+ // Clean up temp script
254
+ fs.unlink(scriptPath).catch(() => {});
255
+ throw new Error(
256
+ `PowerShell audio recording failed to start: ${stderrText.trim() || output.trim() || "(no output)"}`,
257
+ );
258
+ }
259
+
260
+ return {
261
+ async stop() {
262
+ try {
263
+ proc.stdin.write("stop\n");
264
+ proc.stdin.end();
265
+ } catch {
266
+ // stdin may already be closed
267
+ }
268
+ // Give PowerShell time to save the file
269
+ const killTimer = setTimeout(() => proc.kill(), 8000);
270
+ await proc.exited;
271
+ clearTimeout(killTimer);
272
+ // Clean up temp script
273
+ fs.unlink(scriptPath).catch(() => {});
274
+ },
275
+ };
276
+ }
277
+
278
+ // ── Health check ───────────────────────────────────────────────────
279
+
280
+ async function verifyProcessAlive(proc: ReturnType<typeof Bun.spawn>, tool: string): Promise<void> {
281
+ await Bun.sleep(300);
282
+
283
+ const exited = await Promise.race([proc.exited.then(code => code), Bun.sleep(0).then(() => "running" as const)]);
284
+
285
+ if (exited !== "running") {
286
+ let stderr = "";
287
+ if (proc.stderr && typeof proc.stderr !== "number") {
288
+ stderr = await new Response(proc.stderr as ReadableStream).text();
289
+ }
290
+ throw new Error(`${tool} exited immediately (code ${exited}): ${stderr.trim() || "(no output)"}`);
291
+ }
292
+ }
293
+
294
+ // ── Public API ─────────────────────────────────────────────────────
295
+
296
+ export async function startRecording(outputPath: string): Promise<RecordingHandle> {
297
+ const tools = detectRecordingTools();
298
+ if (tools.length === 0) {
299
+ throw new Error(
300
+ isWindows
301
+ ? "No audio recording tool found. Install FFmpeg or SoX and add to PATH."
302
+ : "No audio recording tool found. Install SoX: sudo apt install sox, or FFmpeg: sudo apt install ffmpeg",
303
+ );
304
+ }
305
+
306
+ const errors: string[] = [];
307
+ for (const tool of tools) {
308
+ logger.debug("Trying audio recording", { tool, outputPath });
309
+ try {
310
+ switch (tool) {
311
+ case "sox":
312
+ return await startSoxRecording(outputPath);
313
+ case "ffmpeg":
314
+ return await startFFmpegRecording(outputPath);
315
+ case "arecord":
316
+ return await startArecordRecording(outputPath);
317
+ case "powershell":
318
+ return await startPowerShellRecording(outputPath);
319
+ }
320
+ } catch (err) {
321
+ const msg = err instanceof Error ? err.message : String(err);
322
+ logger.debug(`Recording tool ${tool} failed, trying next`, { error: msg });
323
+ errors.push(`${tool}: ${msg}`);
324
+ }
325
+ }
326
+
327
+ throw new Error(`All recording tools failed:\n${errors.join("\n")}`);
328
+ }
329
+
330
+ /**
331
+ * Verify a recorded audio file is usable.
332
+ * Returns the file size in bytes, or throws.
333
+ */
334
+ export async function verifyRecordingFile(filePath: string): Promise<number> {
335
+ try {
336
+ const stat = await fs.stat(filePath);
337
+ if (stat.size < 100) {
338
+ throw new Error(
339
+ `Recording file is too small (${stat.size} bytes) — audio may not have been captured. ` +
340
+ "Check that a microphone is connected and permissions are granted.",
341
+ );
342
+ }
343
+ return stat.size;
344
+ } catch (err) {
345
+ if (err instanceof Error && err.message.includes("too small")) throw err;
346
+ throw new Error(
347
+ "Recording file was not created. The recording process may have failed silently. " +
348
+ "Check that a microphone is connected.",
349
+ );
350
+ }
351
+ }
@@ -0,0 +1,52 @@
1
+ import { detectRecordingTools } from "./recorder";
2
+ import { resolvePython } from "./transcriber";
3
+
4
+ const isWindows = process.platform === "win32";
5
+
6
+ export interface STTDependencyStatus {
7
+ recorder: { available: boolean; tool: string | null; installHint: string };
8
+ python: { available: boolean; path: string | null; installHint: string };
9
+ whisper: { available: boolean; installHint: string };
10
+ }
11
+
12
+ export async function checkDependencies(): Promise<STTDependencyStatus> {
13
+ const recorderTools = detectRecordingTools();
14
+ const recorderHint = isWindows
15
+ ? "PowerShell fallback available. For better quality: install SoX or FFmpeg."
16
+ : "Install SoX: sudo apt install sox, or FFmpeg: sudo apt install ffmpeg";
17
+
18
+ const pythonCmd = resolvePython();
19
+ const pythonHint = "Install Python 3.8+ from https://python.org";
20
+
21
+ let whisperAvailable = false;
22
+ if (pythonCmd) {
23
+ const check = Bun.spawnSync([pythonCmd, "-c", "import whisper"], {
24
+ stdout: "pipe",
25
+ stderr: "pipe",
26
+ });
27
+ whisperAvailable = check.exitCode === 0;
28
+ }
29
+ const whisperHint = "Run 'omp setup stt' to auto-install, or: pip install openai-whisper";
30
+
31
+ return {
32
+ recorder: { available: recorderTools.length > 0, tool: recorderTools[0] ?? null, installHint: recorderHint },
33
+ python: { available: pythonCmd !== null, path: pythonCmd, installHint: pythonHint },
34
+ whisper: { available: whisperAvailable, installHint: whisperHint },
35
+ };
36
+ }
37
+
38
+ export function formatDependencyStatus(status: STTDependencyStatus): string {
39
+ const lines: string[] = ["STT Dependencies:"];
40
+ const check = (ok: boolean) => (ok ? "[ok]" : "[missing]");
41
+
42
+ lines.push(` Recorder: ${check(status.recorder.available)} ${status.recorder.tool ?? "none"}`);
43
+ if (!status.recorder.available) lines.push(` -> ${status.recorder.installHint}`);
44
+
45
+ lines.push(` Python: ${check(status.python.available)} ${status.python.path ?? "none"}`);
46
+ if (!status.python.available) lines.push(` -> ${status.python.installHint}`);
47
+
48
+ lines.push(` Whisper: ${check(status.whisper.available)}`);
49
+ if (!status.whisper.available) lines.push(` -> ${status.whisper.installHint}`);
50
+
51
+ return lines.join("\n");
52
+ }
@@ -0,0 +1,160 @@
1
+ import * as fs from "node:fs/promises";
2
+ import * as os from "node:os";
3
+ import * as path from "node:path";
4
+ import { logger, Snowflake } from "@oh-my-pi/pi-utils";
5
+ import { settings } from "../config/settings";
6
+ import { ensureSTTDependencies } from "./downloader";
7
+ import { type RecordingHandle, startRecording, verifyRecordingFile } from "./recorder";
8
+ import { transcribe } from "./transcriber";
9
+
10
+ export type SttState = "idle" | "recording" | "transcribing";
11
+
12
+ interface ToggleOptions {
13
+ showWarning(msg: string): void;
14
+ showStatus(msg: string): void;
15
+ onStateChange(state: SttState): void;
16
+ }
17
+
18
+ interface Editor {
19
+ insertText(text: string): void;
20
+ }
21
+
22
+ export class STTController {
23
+ #state: SttState = "idle";
24
+ #recordingHandle: RecordingHandle | null = null;
25
+ #tempFile: string | null = null;
26
+ #depsResolved = false;
27
+ #toggling = false;
28
+ #disposed = false;
29
+ #transcriptionAbort: AbortController | null = null;
30
+
31
+ get state(): SttState {
32
+ return this.#state;
33
+ }
34
+
35
+ #setState(state: SttState, options: ToggleOptions): void {
36
+ this.#state = state;
37
+ options.onStateChange(state);
38
+ }
39
+
40
+ async toggle(editor: Editor, options: ToggleOptions): Promise<void> {
41
+ if (this.#toggling) return;
42
+ this.#toggling = true;
43
+ try {
44
+ switch (this.#state) {
45
+ case "idle":
46
+ await this.#startRecording(options);
47
+ break;
48
+ case "recording":
49
+ await this.#stopAndTranscribe(editor, options);
50
+ break;
51
+ case "transcribing":
52
+ options.showStatus("Transcription in progress...");
53
+ break;
54
+ }
55
+ } finally {
56
+ this.#toggling = false;
57
+ }
58
+ }
59
+
60
+ async #startRecording(options: ToggleOptions): Promise<void> {
61
+ if (!this.#depsResolved) {
62
+ try {
63
+ options.showStatus("Checking STT dependencies...");
64
+ await ensureSTTDependencies({
65
+ modelName: settings.get("stt.modelName") as string | undefined,
66
+ onProgress: p => options.showStatus(p.stage + (p.percent != null ? ` (${p.percent}%)` : "")),
67
+ });
68
+ options.showStatus("");
69
+ this.#depsResolved = true;
70
+ } catch (err) {
71
+ const msg = err instanceof Error ? err.message : "Failed to setup STT dependencies";
72
+ options.showWarning(msg);
73
+ logger.error("STT dependency setup failed", { error: msg });
74
+ return;
75
+ }
76
+ }
77
+ const id = Snowflake.next();
78
+ this.#tempFile = path.join(os.tmpdir(), `omp-stt-${id}.wav`);
79
+
80
+ try {
81
+ this.#recordingHandle = await startRecording(this.#tempFile);
82
+ this.#setState("recording", options);
83
+ logger.debug("STT recording started", { tempFile: this.#tempFile });
84
+ } catch (err) {
85
+ this.#tempFile = null;
86
+ const msg = err instanceof Error ? err.message : "Failed to start recording";
87
+ options.showWarning(msg);
88
+ logger.error("STT recording failed to start", { error: msg });
89
+ }
90
+ }
91
+
92
+ async #stopAndTranscribe(editor: Editor, options: ToggleOptions): Promise<void> {
93
+ const handle = this.#recordingHandle;
94
+ const tempFile = this.#tempFile;
95
+ this.#recordingHandle = null;
96
+
97
+ if (!handle || !tempFile) {
98
+ this.#setState("idle", options);
99
+ return;
100
+ }
101
+
102
+ try {
103
+ await handle.stop();
104
+ // Validate the recording produced a usable file
105
+ await verifyRecordingFile(tempFile);
106
+ this.#setState("transcribing", options);
107
+
108
+ const sttSettings = {
109
+ modelName: settings.get("stt.modelName") as string | undefined,
110
+ language: settings.get("stt.language") as string | undefined,
111
+ };
112
+ this.#transcriptionAbort = new AbortController();
113
+ const text = await transcribe(tempFile, { ...sttSettings, signal: this.#transcriptionAbort.signal });
114
+ this.#transcriptionAbort = null;
115
+ if (this.#disposed) return;
116
+ if (text.length > 0) {
117
+ editor.insertText(text);
118
+ options.showStatus("");
119
+ } else {
120
+ options.showStatus("No speech detected.");
121
+ }
122
+ if (!this.#disposed) this.#setState("idle", options);
123
+ } catch (err) {
124
+ if (this.#disposed) return;
125
+ if (err instanceof DOMException && err.name === "AbortError") {
126
+ this.#setState("idle", options);
127
+ return;
128
+ }
129
+ const msg = err instanceof Error ? err.message : "Transcription failed";
130
+ options.showWarning(msg);
131
+ logger.error("STT transcription failed", { error: msg });
132
+ this.#setState("idle", options);
133
+ } finally {
134
+ try {
135
+ await fs.rm(tempFile, { force: true });
136
+ } catch {
137
+ // best effort cleanup
138
+ }
139
+ this.#tempFile = null;
140
+ }
141
+ }
142
+
143
+ dispose(): void {
144
+ this.#disposed = true;
145
+ if (this.#transcriptionAbort) {
146
+ this.#transcriptionAbort.abort();
147
+ this.#transcriptionAbort = null;
148
+ }
149
+ if (this.#recordingHandle) {
150
+ this.#recordingHandle.stop().catch(() => {});
151
+ this.#recordingHandle = null;
152
+ }
153
+ if (this.#tempFile) {
154
+ fs.rm(this.#tempFile, { force: true }).catch(() => {});
155
+ this.#tempFile = null;
156
+ }
157
+ this.#state = "idle";
158
+ this.#depsResolved = false;
159
+ }
160
+ }
@@ -0,0 +1,70 @@
1
+ """Transcribe a WAV file using openai-whisper.
2
+
3
+ Reads WAV directly via Python's wave module (no ffmpeg needed).
4
+ Resamples to 16kHz mono float32 and passes to whisper as a numpy array.
5
+
6
+ Usage: python transcribe.py <audio.wav> <model_name> <language>
7
+ Prints transcribed text to stdout.
8
+ """
9
+
10
+ import sys
11
+ import wave
12
+ import re
13
+
14
+
15
+ import numpy as np
16
+ import whisper
17
+
18
+
19
+ def load_wav(path: str) -> np.ndarray:
20
+ with wave.open(path, "rb") as wf:
21
+ rate = wf.getframerate()
22
+ channels = wf.getnchannels()
23
+ width = wf.getsampwidth()
24
+ n_frames = wf.getnframes()
25
+ raw = wf.readframes(n_frames)
26
+
27
+ if width == 2:
28
+ audio = np.frombuffer(raw, dtype=np.int16).astype(np.float32) / 32768.0
29
+ elif width == 1:
30
+ audio = (np.frombuffer(raw, dtype=np.uint8).astype(np.float32) - 128.0) / 128.0
31
+ elif width == 4:
32
+ audio = np.frombuffer(raw, dtype=np.int32).astype(np.float32) / 2147483648.0
33
+ else:
34
+ raise ValueError(f"Unsupported sample width: {width}")
35
+
36
+ # Mix to mono
37
+ if channels > 1:
38
+ audio = audio.reshape(-1, channels).mean(axis=1)
39
+
40
+ # Resample to 16 kHz
41
+ if rate != 16000:
42
+ target_len = int(len(audio) * 16000 / rate)
43
+ audio = np.interp(
44
+ np.linspace(0, len(audio) - 1, target_len),
45
+ np.arange(len(audio)),
46
+ audio,
47
+ ).astype(np.float32)
48
+
49
+ return audio
50
+
51
+
52
+ def main() -> None:
53
+ if len(sys.argv) < 2:
54
+ print("Usage: python transcribe.py <audio.wav> <model_name> <language>", file=sys.stderr)
55
+ sys.exit(1)
56
+ audio_path = sys.argv[1]
57
+ model_name = sys.argv[2] if len(sys.argv) > 2 else "base.en"
58
+ language = sys.argv[3] if len(sys.argv) > 3 else "en"
59
+ if not re.fullmatch(r"[A-Za-z]{2,3}(-[A-Za-z]{2})?", language):
60
+ print(f"Invalid language code: {language}", file=sys.stderr)
61
+ sys.exit(1)
62
+
63
+ audio = load_wav(audio_path)
64
+ model = whisper.load_model(model_name)
65
+ result = model.transcribe(audio, language=language)
66
+ print(result["text"].strip())
67
+
68
+
69
+ if __name__ == "__main__":
70
+ main()