npm - pi-voice-input - Versions diffs - 0.2.4 → 0.2.6 - Mend

pi-voice-input 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md +13 -8
package/ROADMAP.md +8 -9
package/extensions/voice-input.ts +19 -8
package/package.json +3 -2

package/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # pi Voice Input
-A publishable, pure TypeScript [pi](https://pi.dev/) extension for Linux voice dictation into pi's editor.
+A publishable, pure TypeScript [pi](https://pi.dev/) extension for Linux and macOS voice dictation into pi's editor.
 - Press `Ctrl+Shift+R` once to start recording.
 - Press `Ctrl+Shift+R` again to stop.
@@ -9,7 +9,8 @@ A publishable, pure TypeScript [pi](https://pi.dev/) extension for Linux voice d
 Current scope:
-- Linux only for now, using `pw-record` from PipeWire tools or `arecord` from alsa-utils.
+- Linux uses `pw-record` from PipeWire tools or `arecord` from alsa-utils.
+- macOS uses the system `afrecord` command. This path is implemented but not yet validated by the maintainer on macOS hardware.
 - A VolcEngine Speech API key is required.
 - This is not a local/offline ASR engine.
@@ -23,8 +24,9 @@ No Python, `uv`, upload service, or `ffmpeg` is required for normal shortcut usa
 pi extension: extensions/voice-input.ts
   ├─ registers Ctrl+Shift+R and /voice commands
   ├─ starts/stops a local recorder process
-  │    ├─ preferred: pw-record
-  │    └─ fallback: arecord
+  │    ├─ Linux preferred: pw-record
+  │    ├─ Linux fallback: arecord
+  │    └─ macOS: afrecord
   ├─ records 16 kHz mono 16-bit WAV
   ├─ parses the WAV container in TypeScript and extracts raw PCM
   ├─ sends PCM frames to the configured ASR provider via ws
@@ -40,8 +42,11 @@ Runtime package dependency:
 System dependency, one of:
-- `pw-record` from PipeWire tools, preferred
-- `arecord` from alsa-utils, fallback
+- Linux: `pw-record` from PipeWire tools, preferred
+- Linux: `arecord` from alsa-utils, fallback
+- macOS: `afrecord`, included with macOS
+On macOS, grant Terminal or your pi host app microphone permission when prompted. If macOS has previously denied microphone access, enable it in System Settings → Privacy & Security → Microphone.
 ## Install / Update
@@ -143,7 +148,7 @@ Slash commands:
 - The default ASR segment size is intentionally larger than realtime packet sizes because this workflow sends already-recorded audio.
 - The transcript is inserted into the editor only; it is not submitted automatically.
 - When `polishModel` is set, polishing uses the unsent editor draft and recent session messages as context, but outputs only the refined voice text. The final text is still pasted at the current cursor position without replacing the draft.
-- While recording, the status line shows `● Mic on: [device name]` in the current theme accent color.
+- While recording, the status line shows `● Mic on: [device name] — press Ctrl+Shift+R again to stop/transcribe` in the current theme accent color; no separate popup is shown when recording starts.
 ## Development
@@ -175,7 +180,7 @@ After changing the extension while pi is open, run:
 ## Roadmap
-See [ROADMAP.md](ROADMAP.md) for planned user-visible work, including macOS support.
+See [ROADMAP.md](ROADMAP.md) for planned user-visible work.
 ## Links

package/ROADMAP.md CHANGED Viewed

@@ -5,23 +5,22 @@ This roadmap lists user-visible work planned for pi Voice Input. It is intention
 ## Current support
 - Linux voice input through `pw-record` or `arecord`
+- macOS voice input through the system `afrecord` command
 - VolcEngine WebSocket ASR
 - Optional transcript polish through a configured pi model
 ## Planned
-### macOS support
+### Validate and refine macOS support
-Add first-class macOS recording support so users can dictate into pi without PipeWire or ALSA.
+The macOS recording path is implemented with `afrecord`, but it still needs hands-on validation on macOS hardware.
-Expected direction:
+Expected follow-up:
-- use a macOS-native recording command or a small bundled recorder helper
-- keep the existing user workflow: press `Ctrl+Shift+R` to start, press it again to stop and insert text
-- document required microphone permissions clearly
-- preserve the same config file and ASR provider behavior where possible
-Status: planned, not yet implemented.
+- confirm microphone permission prompts and recovery steps
+- confirm produced WAV files are compatible across supported macOS versions
+- refine device naming and setup diagnostics if needed
+- preserve the same config file and ASR provider behavior
 ## Later candidates

package/extensions/voice-input.ts CHANGED Viewed

@@ -14,7 +14,7 @@ import {
   unlinkSync,
   writeFileSync,
 } from "node:fs";
-import { homedir } from "node:os";
+import { homedir, platform } from "node:os";
 import path from "node:path";
 import { gzipSync, gunzipSync } from "node:zlib";
 import WebSocket from "ws";
@@ -212,17 +212,28 @@ function commandOutput(command: string, args: string[], timeoutMs = 1500): strin
   return (result.stdout || "").trim();
 }
+function selectRecorderExecutable(): string {
+  if (platform() === "darwin" && commandExists("afrecord")) return "afrecord";
+  if (commandExists("pw-record")) return "pw-record";
+  if (commandExists("arecord")) return "arecord";
+  return "";
+}
 function recorderCommand(config: VoiceConfig, outputPath: string): string[] {
-  if (commandExists("pw-record")) {
+  const executable = selectRecorderExecutable();
+  if (executable === "pw-record") {
     const cmd = ["pw-record", "--rate", "16000", "--channels", "1", "--format", "s16"];
     if (config.recorderTarget) cmd.push("--target", config.recorderTarget);
     cmd.push(outputPath);
     return cmd;
   }
-  if (commandExists("arecord")) {
+  if (executable === "arecord") {
     return ["arecord", "-q", "-f", "S16_LE", "-r", "16000", "-c", "1", "-t", "wav", outputPath];
   }
-  throw new Error("No recorder found. Install PipeWire tools (pw-record) or alsa-utils (arecord).");
+  if (executable === "afrecord") {
+    return ["afrecord", "-f", "WAVE", "-d", "LEI16@16000", "-c", "1", outputPath];
+  }
+  throw new Error("No recorder found. On Linux, install PipeWire tools (pw-record) or alsa-utils (arecord). On macOS, afrecord should be available with the system.");
 }
 type PipeWireSource = {
@@ -310,11 +321,12 @@ function pipeWireSourceName(target: string): string {
 function recordingDeviceName(config: VoiceConfig, recorderExecutable: string): string {
   if (recorderExecutable === "pw-record") return pipeWireSourceName(config.recorderTarget);
   if (recorderExecutable === "arecord") return "ALSA default microphone";
+  if (recorderExecutable === "afrecord") return "macOS default microphone";
   return config.recorderTarget || "default microphone";
 }
 function recordingStatusText(deviceName: string): string {
-  return `● Mic on: ${deviceName || "default microphone"}`;
+  return `● Mic on: ${deviceName || "default microphone"} — press Ctrl+Shift+R again to stop/transcribe`;
 }
 function readState(config: VoiceConfig): RecordingState | null {
@@ -903,7 +915,7 @@ async function startRecording(ctx: ExtensionContext) {
   const config = getConfig();
   const existing = readState(config);
   if (existing && pidAlive(existing.pid)) {
-    const deviceName = existing.deviceName || recordingDeviceName(config, commandExists("pw-record") ? "pw-record" : "arecord");
+    const deviceName = existing.deviceName || recordingDeviceName(config, selectRecorderExecutable());
     ctx.ui.notify(`Already recording: pid=${existing.pid}. ${recordingStatusText(deviceName)}`, "warning");
     ctx.ui.setStatus("voice-input", ctx.ui.theme.fg("accent", recordingStatusText(deviceName)));
     return;
@@ -937,7 +949,6 @@ async function startRecording(ctx: ExtensionContext) {
   });
   ctx.ui.setStatus("voice-input", ctx.ui.theme.fg("accent", recordingStatusText(deviceName)));
-  ctx.ui.notify(`${recordingStatusText(deviceName)}. Press Ctrl+Shift+R again to stop/transcribe.`, "info");
 }
 async function stopRecording(ctx: ExtensionContext, transcribe = true) {
@@ -1054,7 +1065,7 @@ async function configureApiKey(ctx: ExtensionContext, providedKey = "") {
 }
 function configSummary(config: VoiceConfig): string {
-  const recorderExecutable = commandExists("pw-record") ? "pw-record" : commandExists("arecord") ? "arecord" : "";
+  const recorderExecutable = selectRecorderExecutable();
   const currentDevice = recorderExecutable ? recordingDeviceName(config, recorderExecutable) : "no recorder found";
   return [
     "Voice input config:",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-voice-input",
-  "version": "0.2.4",
+  "version": "0.2.6",
   "description": "Press Ctrl+Shift+R to dictate prompts into Pi using VolcEngine ASR",
   "type": "module",
   "keywords": [
@@ -12,7 +12,8 @@
     "dictation",
     "asr",
     "volcengine",
-    "linux"
+    "linux",
+    "macos"
   ],
   "license": "MIT",
   "author": "tr-nc",