npm - @oh-my-pi/pi-coding-agent - Versions diffs - 12.3.0 → 12.5.0 - Mend

@oh-my-pi/pi-coding-agent 12.3.0 → 12.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

package/CHANGELOG.md +66 -0
package/docs/custom-tools.md +21 -6
package/docs/extensions.md +20 -0
package/package.json +12 -12
package/src/cli/setup-cli.ts +62 -2
package/src/commands/setup.ts +1 -1
package/src/config/keybindings.ts +6 -2
package/src/config/settings-schema.ts +58 -4
package/src/config/settings.ts +23 -9
package/src/debug/index.ts +26 -19
package/src/debug/log-formatting.ts +60 -0
package/src/debug/log-viewer.ts +903 -0
package/src/debug/report-bundle.ts +87 -8
package/src/discovery/helpers.ts +131 -137
package/src/extensibility/custom-tools/types.ts +44 -6
package/src/extensibility/extensions/types.ts +60 -0
package/src/extensibility/hooks/types.ts +60 -0
package/src/extensibility/skills.ts +4 -2
package/src/lsp/render.ts +1 -1
package/src/main.ts +7 -1
package/src/memories/index.ts +11 -7
package/src/modes/components/bash-execution.ts +16 -9
package/src/modes/components/custom-editor.ts +8 -0
package/src/modes/components/python-execution.ts +16 -7
package/src/modes/components/settings-selector.ts +29 -14
package/src/modes/components/tool-execution.ts +2 -1
package/src/modes/controllers/command-controller.ts +3 -1
package/src/modes/controllers/event-controller.ts +7 -0
package/src/modes/controllers/input-controller.ts +23 -2
package/src/modes/controllers/selector-controller.ts +9 -7
package/src/modes/interactive-mode.ts +84 -1
package/src/modes/rpc/rpc-client.ts +7 -0
package/src/modes/rpc/rpc-mode.ts +8 -0
package/src/modes/rpc/rpc-types.ts +2 -0
package/src/modes/theme/theme.ts +163 -7
package/src/modes/types.ts +1 -0
package/src/patch/hashline.ts +2 -1
package/src/patch/shared.ts +44 -13
package/src/prompts/system/plan-mode-approved.md +5 -0
package/src/prompts/system/subagent-system-prompt.md +1 -0
package/src/prompts/system/system-prompt.md +10 -0
package/src/prompts/tools/todo-write.md +3 -1
package/src/sdk.ts +82 -9
package/src/session/agent-session.ts +137 -29
package/src/session/streaming-output.ts +1 -1
package/src/stt/downloader.ts +71 -0
package/src/stt/index.ts +3 -0
package/src/stt/recorder.ts +351 -0
package/src/stt/setup.ts +52 -0
package/src/stt/stt-controller.ts +160 -0
package/src/stt/transcribe.py +70 -0
package/src/stt/transcriber.ts +91 -0
package/src/task/executor.ts +10 -2
package/src/tools/bash-interactive.ts +10 -6
package/src/tools/fetch.ts +1 -1
package/src/tools/output-meta.ts +6 -2
package/src/web/scrapers/types.ts +1 -0

package/src/stt/recorder.ts ADDED Viewed

@@ -0,0 +1,351 @@
+import * as fs from "node:fs/promises";
+import * as os from "node:os";
+import * as path from "node:path";
+import { logger, Snowflake } from "@oh-my-pi/pi-utils";
+import { $ } from "bun";
+export interface RecordingHandle {
+	stop(): Promise<void>;
+}
+const isWindows = process.platform === "win32";
+/**
+ * Returns available recording tools in priority order.
+ */
+export function detectRecordingTools(): string[] {
+	const tools: string[] = [];
+	if (Bun.which("sox")) tools.push("sox");
+	if (Bun.which("ffmpeg")) tools.push("ffmpeg");
+	if (!isWindows && Bun.which("arecord")) tools.push("arecord");
+	if (isWindows) tools.push("powershell");
+	return tools;
+}
+// ── ffmpeg dshow device detection ──────────────────────────────────
+async function detectWindowsAudioDevice(): Promise<string> {
+	const result = await $`ffmpeg -f dshow -list_devices true -i dummy`.quiet().nothrow();
+	const output = result.stderr.toString();
+	const audioDevices: string[] = [];
+	const re = /"([^"]+)"\s*\(audio\)/gi;
+	for (const match of output.matchAll(re)) {
+		audioDevices.push(match[1]);
+	}
+	if (audioDevices.length === 0) {
+		throw new Error("No audio input device found via ffmpeg dshow. Ensure a microphone is connected.");
+	}
+	logger.debug("Detected dshow audio devices", { devices: audioDevices });
+	return audioDevices[0];
+}
+// ── Recording implementations ──────────────────────────────────────
+async function startSoxRecording(outputPath: string): Promise<RecordingHandle> {
+	// On Windows, "-d" (default device) often fails. Use "-t waveaudio 0" for the first input.
+	const inputArgs = isWindows ? ["-t", "waveaudio", "0"] : ["-d"];
+	const proc = Bun.spawn(["sox", ...inputArgs, "-r", "16000", "-c", "1", "-b", "16", "-t", "wav", outputPath], {
+		stdout: "pipe",
+		stderr: "ignore",
+	});
+	await verifyProcessAlive(proc, "sox");
+	return {
+		async stop() {
+			proc.kill("SIGTERM");
+			await proc.exited;
+		},
+	};
+}
+async function startFFmpegRecording(outputPath: string): Promise<RecordingHandle> {
+	let args: string[];
+	if (isWindows) {
+		const device = await detectWindowsAudioDevice();
+		args = [
+			"ffmpeg",
+			"-f",
+			"dshow",
+			"-i",
+			`audio=${device}`,
+			"-ar",
+			"16000",
+			"-ac",
+			"1",
+			"-sample_fmt",
+			"s16",
+			"-y",
+			outputPath,
+		];
+	} else if (process.platform === "darwin") {
+		args = [
+			"ffmpeg",
+			"-f",
+			"avfoundation",
+			"-i",
+			":0",
+			"-ar",
+			"16000",
+			"-ac",
+			"1",
+			"-sample_fmt",
+			"s16",
+			"-y",
+			outputPath,
+		];
+	} else {
+		args = [
+			"ffmpeg",
+			"-f",
+			"pulse",
+			"-i",
+			"default",
+			"-ar",
+			"16000",
+			"-ac",
+			"1",
+			"-sample_fmt",
+			"s16",
+			"-y",
+			outputPath,
+		];
+	}
+	const proc = Bun.spawn(args, {
+		stdin: "pipe",
+		stdout: "pipe",
+		stderr: "ignore",
+	});
+	await verifyProcessAlive(proc, "ffmpeg");
+	return {
+		async stop() {
+			try {
+				proc.stdin.write("q");
+				proc.stdin.end();
+			} catch {
+				// stdin may already be closed
+			}
+			const killTimer = setTimeout(() => proc.kill(), 3000);
+			await proc.exited;
+			clearTimeout(killTimer);
+		},
+	};
+}
+async function startArecordRecording(outputPath: string): Promise<RecordingHandle> {
+	const proc = Bun.spawn(["arecord", "-f", "S16_LE", "-r", "16000", "-c", "1", outputPath], {
+		stdout: "pipe",
+		stderr: "ignore",
+	});
+	await verifyProcessAlive(proc, "arecord");
+	return {
+		async stop() {
+			proc.kill("SIGTERM");
+			await proc.exited;
+		},
+	};
+}
+// ── PowerShell mci recorder (Windows zero-dep fallback) ────────────
+const PS_RECORD_SCRIPT = `
+param([string]$outPath)
+if ($outPath -match '["\r\n]') {
+    [Console]::Error.WriteLine("Invalid output path: $outPath")
+    exit 1
+}
+Add-Type @"
+using System;
+using System.Runtime.InteropServices;
+using System.Text;
+public class MciAudio {
+    [DllImport("winmm.dll", CharSet=CharSet.Auto)]
+    public static extern int mciSendString(
+        string command, StringBuilder buffer, int bufferSize, IntPtr callback);
+}
+"@
+function Mci([string]$cmd) {
+    $buf = New-Object System.Text.StringBuilder 256
+    $r = [MciAudio]::mciSendString($cmd, $buf, 256, [IntPtr]::Zero)
+    if ($r -ne 0) {
+        [Console]::Error.WriteLine("MCI error $r for: $cmd")
+    }
+    return $r
+}
+$r = Mci "open new type waveaudio alias omp_rec"
+if ($r -ne 0) { exit 1 }
+Mci "set omp_rec channels 1 samplespersec 16000 bitspersample 16"
+$r = Mci "record omp_rec"
+if ($r -ne 0) {
+    Mci "close omp_rec"
+    exit 1
+}
+Write-Output "RECORDING"
+[Console]::Out.Flush()
+# Block until parent closes stdin or writes a line
+try { [Console]::In.ReadLine() | Out-Null } catch {}
+# Stop and save
+Mci "stop omp_rec"
+$saveCmd = 'save omp_rec "' + $outPath + '"'
+$r = Mci $saveCmd
+if ($r -ne 0) {
+    [Console]::Error.WriteLine("Save failed for: $saveCmd")
+}
+Mci "close omp_rec"
+if (Test-Path $outPath) {
+    Write-Output "SAVED"
+} else {
+    Write-Error "Output file was not created: $outPath"
+    exit 1
+}
+`;
+async function startPowerShellRecording(outputPath: string): Promise<RecordingHandle> {
+	// Write script to temp file — avoids quoting/escaping issues with -Command
+	const scriptPath = path.join(os.tmpdir(), `omp-stt-record-${Snowflake.next()}.ps1`);
+	await Bun.write(scriptPath, PS_RECORD_SCRIPT);
+	const proc = Bun.spawn(["powershell", "-NoProfile", "-ExecutionPolicy", "Bypass", "-File", scriptPath, outputPath], {
+		stdin: "pipe",
+		stdout: "pipe",
+		stderr: "ignore",
+	});
+	proc.exited.then(() => {
+		fs.unlink(scriptPath).catch(() => {});
+	});
+	// Wait for "RECORDING" on stdout to confirm it started
+	const reader = (proc.stdout as ReadableStream<Uint8Array>).getReader();
+	const decoder = new TextDecoder();
+	let output = "";
+	const deadline = Date.now() + 8000; // PowerShell + Add-Type is slow
+	while (Date.now() < deadline) {
+		const readPromise = reader.read();
+		const timeoutPromise = Bun.sleep(deadline - Date.now()).then(() => ({ done: true, value: undefined }));
+		const { done, value } = await Promise.race([readPromise, timeoutPromise]);
+		if (done || !value) break;
+		output += decoder.decode(value, { stream: true });
+		if (output.includes("RECORDING")) break;
+	}
+	reader.releaseLock();
+	if (!output.includes("RECORDING")) {
+		proc.kill();
+		await proc.exited;
+		let stderrText = "";
+		if (proc.stderr && typeof proc.stderr !== "number") {
+			stderrText = await new Response(proc.stderr as ReadableStream).text();
+		}
+		// Clean up temp script
+		fs.unlink(scriptPath).catch(() => {});
+		throw new Error(
+			`PowerShell audio recording failed to start: ${stderrText.trim() || output.trim() || "(no output)"}`,
+		);
+	}
+	return {
+		async stop() {
+			try {
+				proc.stdin.write("stop\n");
+				proc.stdin.end();
+			} catch {
+				// stdin may already be closed
+			}
+			// Give PowerShell time to save the file
+			const killTimer = setTimeout(() => proc.kill(), 8000);
+			await proc.exited;
+			clearTimeout(killTimer);
+			// Clean up temp script
+			fs.unlink(scriptPath).catch(() => {});
+		},
+	};
+}
+// ── Health check ───────────────────────────────────────────────────
+async function verifyProcessAlive(proc: ReturnType<typeof Bun.spawn>, tool: string): Promise<void> {
+	await Bun.sleep(300);
+	const exited = await Promise.race([proc.exited.then(code => code), Bun.sleep(0).then(() => "running" as const)]);
+	if (exited !== "running") {
+		let stderr = "";
+		if (proc.stderr && typeof proc.stderr !== "number") {
+			stderr = await new Response(proc.stderr as ReadableStream).text();
+		}
+		throw new Error(`${tool} exited immediately (code ${exited}): ${stderr.trim() || "(no output)"}`);
+	}
+}
+// ── Public API ─────────────────────────────────────────────────────
+export async function startRecording(outputPath: string): Promise<RecordingHandle> {
+	const tools = detectRecordingTools();
+	if (tools.length === 0) {
+		throw new Error(
+			isWindows
+				? "No audio recording tool found. Install FFmpeg or SoX and add to PATH."
+				: "No audio recording tool found. Install SoX: sudo apt install sox, or FFmpeg: sudo apt install ffmpeg",
+		);
+	}
+	const errors: string[] = [];
+	for (const tool of tools) {
+		logger.debug("Trying audio recording", { tool, outputPath });
+		try {
+			switch (tool) {
+				case "sox":
+					return await startSoxRecording(outputPath);
+				case "ffmpeg":
+					return await startFFmpegRecording(outputPath);
+				case "arecord":
+					return await startArecordRecording(outputPath);
+				case "powershell":
+					return await startPowerShellRecording(outputPath);
+			}
+		} catch (err) {
+			const msg = err instanceof Error ? err.message : String(err);
+			logger.debug(`Recording tool ${tool} failed, trying next`, { error: msg });
+			errors.push(`${tool}: ${msg}`);
+		}
+	}
+	throw new Error(`All recording tools failed:\n${errors.join("\n")}`);
+}
+/**
+ * Verify a recorded audio file is usable.
+ * Returns the file size in bytes, or throws.
+ */
+export async function verifyRecordingFile(filePath: string): Promise<number> {
+	try {
+		const stat = await fs.stat(filePath);
+		if (stat.size < 100) {
+			throw new Error(
+				`Recording file is too small (${stat.size} bytes) — audio may not have been captured. ` +
+					"Check that a microphone is connected and permissions are granted.",
+			);
+		}
+		return stat.size;
+	} catch (err) {
+		if (err instanceof Error && err.message.includes("too small")) throw err;
+		throw new Error(
+			"Recording file was not created. The recording process may have failed silently. " +
+				"Check that a microphone is connected.",
+		);
+	}
+}

package/src/stt/setup.ts ADDED Viewed

@@ -0,0 +1,52 @@
+import { detectRecordingTools } from "./recorder";
+import { resolvePython } from "./transcriber";
+const isWindows = process.platform === "win32";
+export interface STTDependencyStatus {
+	recorder: { available: boolean; tool: string | null; installHint: string };
+	python: { available: boolean; path: string | null; installHint: string };
+	whisper: { available: boolean; installHint: string };
+}
+export async function checkDependencies(): Promise<STTDependencyStatus> {
+	const recorderTools = detectRecordingTools();
+	const recorderHint = isWindows
+		? "PowerShell fallback available. For better quality: install SoX or FFmpeg."
+		: "Install SoX: sudo apt install sox, or FFmpeg: sudo apt install ffmpeg";
+	const pythonCmd = resolvePython();
+	const pythonHint = "Install Python 3.8+ from https://python.org";
+	let whisperAvailable = false;
+	if (pythonCmd) {
+		const check = Bun.spawnSync([pythonCmd, "-c", "import whisper"], {
+			stdout: "pipe",
+			stderr: "pipe",
+		});
+		whisperAvailable = check.exitCode === 0;
+	}
+	const whisperHint = "Run 'omp setup stt' to auto-install, or: pip install openai-whisper";
+	return {
+		recorder: { available: recorderTools.length > 0, tool: recorderTools[0] ?? null, installHint: recorderHint },
+		python: { available: pythonCmd !== null, path: pythonCmd, installHint: pythonHint },
+		whisper: { available: whisperAvailable, installHint: whisperHint },
+	};
+}
+export function formatDependencyStatus(status: STTDependencyStatus): string {
+	const lines: string[] = ["STT Dependencies:"];
+	const check = (ok: boolean) => (ok ? "[ok]" : "[missing]");
+	lines.push(`  Recorder: ${check(status.recorder.available)} ${status.recorder.tool ?? "none"}`);
+	if (!status.recorder.available) lines.push(`    -> ${status.recorder.installHint}`);
+	lines.push(`  Python:   ${check(status.python.available)} ${status.python.path ?? "none"}`);
+	if (!status.python.available) lines.push(`    -> ${status.python.installHint}`);
+	lines.push(`  Whisper:  ${check(status.whisper.available)}`);
+	if (!status.whisper.available) lines.push(`    -> ${status.whisper.installHint}`);
+	return lines.join("\n");
+}

package/src/stt/stt-controller.ts ADDED Viewed

@@ -0,0 +1,160 @@
+import * as fs from "node:fs/promises";
+import * as os from "node:os";
+import * as path from "node:path";
+import { logger, Snowflake } from "@oh-my-pi/pi-utils";
+import { settings } from "../config/settings";
+import { ensureSTTDependencies } from "./downloader";
+import { type RecordingHandle, startRecording, verifyRecordingFile } from "./recorder";
+import { transcribe } from "./transcriber";
+export type SttState = "idle" | "recording" | "transcribing";
+interface ToggleOptions {
+	showWarning(msg: string): void;
+	showStatus(msg: string): void;
+	onStateChange(state: SttState): void;
+}
+interface Editor {
+	insertText(text: string): void;
+}
+export class STTController {
+	#state: SttState = "idle";
+	#recordingHandle: RecordingHandle | null = null;
+	#tempFile: string | null = null;
+	#depsResolved = false;
+	#toggling = false;
+	#disposed = false;
+	#transcriptionAbort: AbortController | null = null;
+	get state(): SttState {
+		return this.#state;
+	}
+	#setState(state: SttState, options: ToggleOptions): void {
+		this.#state = state;
+		options.onStateChange(state);
+	}
+	async toggle(editor: Editor, options: ToggleOptions): Promise<void> {
+		if (this.#toggling) return;
+		this.#toggling = true;
+		try {
+			switch (this.#state) {
+				case "idle":
+					await this.#startRecording(options);
+					break;
+				case "recording":
+					await this.#stopAndTranscribe(editor, options);
+					break;
+				case "transcribing":
+					options.showStatus("Transcription in progress...");
+					break;
+			}
+		} finally {
+			this.#toggling = false;
+		}
+	}
+	async #startRecording(options: ToggleOptions): Promise<void> {
+		if (!this.#depsResolved) {
+			try {
+				options.showStatus("Checking STT dependencies...");
+				await ensureSTTDependencies({
+					modelName: settings.get("stt.modelName") as string | undefined,
+					onProgress: p => options.showStatus(p.stage + (p.percent != null ? ` (${p.percent}%)` : "")),
+				});
+				options.showStatus("");
+				this.#depsResolved = true;
+			} catch (err) {
+				const msg = err instanceof Error ? err.message : "Failed to setup STT dependencies";
+				options.showWarning(msg);
+				logger.error("STT dependency setup failed", { error: msg });
+				return;
+			}
+		}
+		const id = Snowflake.next();
+		this.#tempFile = path.join(os.tmpdir(), `omp-stt-${id}.wav`);
+		try {
+			this.#recordingHandle = await startRecording(this.#tempFile);
+			this.#setState("recording", options);
+			logger.debug("STT recording started", { tempFile: this.#tempFile });
+		} catch (err) {
+			this.#tempFile = null;
+			const msg = err instanceof Error ? err.message : "Failed to start recording";
+			options.showWarning(msg);
+			logger.error("STT recording failed to start", { error: msg });
+		}
+	}
+	async #stopAndTranscribe(editor: Editor, options: ToggleOptions): Promise<void> {
+		const handle = this.#recordingHandle;
+		const tempFile = this.#tempFile;
+		this.#recordingHandle = null;
+		if (!handle || !tempFile) {
+			this.#setState("idle", options);
+			return;
+		}
+		try {
+			await handle.stop();
+			// Validate the recording produced a usable file
+			await verifyRecordingFile(tempFile);
+			this.#setState("transcribing", options);
+			const sttSettings = {
+				modelName: settings.get("stt.modelName") as string | undefined,
+				language: settings.get("stt.language") as string | undefined,
+			};
+			this.#transcriptionAbort = new AbortController();
+			const text = await transcribe(tempFile, { ...sttSettings, signal: this.#transcriptionAbort.signal });
+			this.#transcriptionAbort = null;
+			if (this.#disposed) return;
+			if (text.length > 0) {
+				editor.insertText(text);
+				options.showStatus("");
+			} else {
+				options.showStatus("No speech detected.");
+			}
+			if (!this.#disposed) this.#setState("idle", options);
+		} catch (err) {
+			if (this.#disposed) return;
+			if (err instanceof DOMException && err.name === "AbortError") {
+				this.#setState("idle", options);
+				return;
+			}
+			const msg = err instanceof Error ? err.message : "Transcription failed";
+			options.showWarning(msg);
+			logger.error("STT transcription failed", { error: msg });
+			this.#setState("idle", options);
+		} finally {
+			try {
+				await fs.rm(tempFile, { force: true });
+			} catch {
+				// best effort cleanup
+			}
+			this.#tempFile = null;
+		}
+	}
+	dispose(): void {
+		this.#disposed = true;
+		if (this.#transcriptionAbort) {
+			this.#transcriptionAbort.abort();
+			this.#transcriptionAbort = null;
+		}
+		if (this.#recordingHandle) {
+			this.#recordingHandle.stop().catch(() => {});
+			this.#recordingHandle = null;
+		}
+		if (this.#tempFile) {
+			fs.rm(this.#tempFile, { force: true }).catch(() => {});
+			this.#tempFile = null;
+		}
+		this.#state = "idle";
+		this.#depsResolved = false;
+	}
+}

package/src/stt/transcribe.py ADDED Viewed

@@ -0,0 +1,70 @@
+"""Transcribe a WAV file using openai-whisper.
+Reads WAV directly via Python's wave module (no ffmpeg needed).
+Resamples to 16kHz mono float32 and passes to whisper as a numpy array.
+Usage: python transcribe.py <audio.wav> <model_name> <language>
+Prints transcribed text to stdout.
+"""
+import sys
+import wave
+import re
+import numpy as np
+import whisper
+def load_wav(path: str) -> np.ndarray:
+    with wave.open(path, "rb") as wf:
+        rate = wf.getframerate()
+        channels = wf.getnchannels()
+        width = wf.getsampwidth()
+        n_frames = wf.getnframes()
+        raw = wf.readframes(n_frames)
+    if width == 2:
+        audio = np.frombuffer(raw, dtype=np.int16).astype(np.float32) / 32768.0
+    elif width == 1:
+        audio = (np.frombuffer(raw, dtype=np.uint8).astype(np.float32) - 128.0) / 128.0
+    elif width == 4:
+        audio = np.frombuffer(raw, dtype=np.int32).astype(np.float32) / 2147483648.0
+    else:
+        raise ValueError(f"Unsupported sample width: {width}")
+    # Mix to mono
+    if channels > 1:
+        audio = audio.reshape(-1, channels).mean(axis=1)
+    # Resample to 16 kHz
+    if rate != 16000:
+        target_len = int(len(audio) * 16000 / rate)
+        audio = np.interp(
+            np.linspace(0, len(audio) - 1, target_len),
+            np.arange(len(audio)),
+            audio,
+        ).astype(np.float32)
+    return audio
+def main() -> None:
+    if len(sys.argv) < 2:
+        print("Usage: python transcribe.py <audio.wav> <model_name> <language>", file=sys.stderr)
+        sys.exit(1)
+    audio_path = sys.argv[1]
+    model_name = sys.argv[2] if len(sys.argv) > 2 else "base.en"
+    language = sys.argv[3] if len(sys.argv) > 3 else "en"
+    if not re.fullmatch(r"[A-Za-z]{2,3}(-[A-Za-z]{2})?", language):
+        print(f"Invalid language code: {language}", file=sys.stderr)
+        sys.exit(1)
+    audio = load_wav(audio_path)
+    model = whisper.load_model(model_name)
+    result = model.transcribe(audio, language=language)
+    print(result["text"].strip())
+if __name__ == "__main__":
+    main()