pi-voice-input 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # pi Voice Input
2
2
 
3
- A publishable, pure TypeScript [pi](https://pi.dev/) extension for Linux voice dictation into pi's editor.
3
+ A publishable, pure TypeScript [pi](https://pi.dev/) extension for Linux and macOS voice dictation into pi's editor.
4
4
 
5
5
  - Press `Ctrl+Shift+R` once to start recording.
6
6
  - Press `Ctrl+Shift+R` again to stop.
@@ -9,7 +9,8 @@ A publishable, pure TypeScript [pi](https://pi.dev/) extension for Linux voice d
9
9
 
10
10
  Current scope:
11
11
 
12
- - Linux only for now, using `pw-record` from PipeWire tools or `arecord` from alsa-utils.
12
+ - Linux uses `pw-record` from PipeWire tools or `arecord` from alsa-utils.
13
+ - macOS uses the system `afrecord` command. This path is implemented but not yet validated by the maintainer on macOS hardware.
13
14
  - A VolcEngine Speech API key is required.
14
15
  - This is not a local/offline ASR engine.
15
16
 
@@ -23,8 +24,9 @@ No Python, `uv`, upload service, or `ffmpeg` is required for normal shortcut usa
23
24
  pi extension: extensions/voice-input.ts
24
25
  ├─ registers Ctrl+Shift+R and /voice commands
25
26
  ├─ starts/stops a local recorder process
26
- │ ├─ preferred: pw-record
27
- └─ fallback: arecord
27
+ │ ├─ Linux preferred: pw-record
28
+ ├─ Linux fallback: arecord
29
+ │ └─ macOS: afrecord
28
30
  ├─ records 16 kHz mono 16-bit WAV
29
31
  ├─ parses the WAV container in TypeScript and extracts raw PCM
30
32
  ├─ sends PCM frames to the configured ASR provider via ws
@@ -40,8 +42,11 @@ Runtime package dependency:
40
42
 
41
43
  System dependency, one of:
42
44
 
43
- - `pw-record` from PipeWire tools, preferred
44
- - `arecord` from alsa-utils, fallback
45
+ - Linux: `pw-record` from PipeWire tools, preferred
46
+ - Linux: `arecord` from alsa-utils, fallback
47
+ - macOS: `afrecord`, included with macOS
48
+
49
+ On macOS, grant Terminal or your pi host app microphone permission when prompted. If macOS has previously denied microphone access, enable it in System Settings → Privacy & Security → Microphone.
45
50
 
46
51
  ## Install / Update
47
52
 
@@ -143,7 +148,7 @@ Slash commands:
143
148
  - The default ASR segment size is intentionally larger than realtime packet sizes because this workflow sends already-recorded audio.
144
149
  - The transcript is inserted into the editor only; it is not submitted automatically.
145
150
  - When `polishModel` is set, polishing uses the unsent editor draft and recent session messages as context, but outputs only the refined voice text. The final text is still pasted at the current cursor position without replacing the draft.
146
- - While recording, the status line shows `● Mic on: [device name]` in the current theme accent color.
151
+ - While recording, the status line shows `● Mic on: [device name] — press Ctrl+Shift+R again to stop/transcribe` in the current theme accent color; no separate popup is shown when recording starts.
147
152
 
148
153
  ## Development
149
154
 
@@ -175,7 +180,7 @@ After changing the extension while pi is open, run:
175
180
 
176
181
  ## Roadmap
177
182
 
178
- See [ROADMAP.md](ROADMAP.md) for planned user-visible work, including macOS support.
183
+ See [ROADMAP.md](ROADMAP.md) for planned user-visible work.
179
184
 
180
185
  ## Links
181
186
 
package/ROADMAP.md CHANGED
@@ -5,23 +5,22 @@ This roadmap lists user-visible work planned for pi Voice Input. It is intention
5
5
  ## Current support
6
6
 
7
7
  - Linux voice input through `pw-record` or `arecord`
8
+ - macOS voice input through the system `afrecord` command
8
9
  - VolcEngine WebSocket ASR
9
10
  - Optional transcript polish through a configured pi model
10
11
 
11
12
  ## Planned
12
13
 
13
- ### macOS support
14
+ ### Validate and refine macOS support
14
15
 
15
- Add first-class macOS recording support so users can dictate into pi without PipeWire or ALSA.
16
+ The macOS recording path is implemented with `afrecord`, but it still needs hands-on validation on macOS hardware.
16
17
 
17
- Expected direction:
18
+ Expected follow-up:
18
19
 
19
- - use a macOS-native recording command or a small bundled recorder helper
20
- - keep the existing user workflow: press `Ctrl+Shift+R` to start, press it again to stop and insert text
21
- - document required microphone permissions clearly
22
- - preserve the same config file and ASR provider behavior where possible
23
-
24
- Status: planned, not yet implemented.
20
+ - confirm microphone permission prompts and recovery steps
21
+ - confirm produced WAV files are compatible across supported macOS versions
22
+ - refine device naming and setup diagnostics if needed
23
+ - preserve the same config file and ASR provider behavior
25
24
 
26
25
  ## Later candidates
27
26
 
@@ -14,7 +14,7 @@ import {
14
14
  unlinkSync,
15
15
  writeFileSync,
16
16
  } from "node:fs";
17
- import { homedir } from "node:os";
17
+ import { homedir, platform } from "node:os";
18
18
  import path from "node:path";
19
19
  import { gzipSync, gunzipSync } from "node:zlib";
20
20
  import WebSocket from "ws";
@@ -212,17 +212,28 @@ function commandOutput(command: string, args: string[], timeoutMs = 1500): strin
212
212
  return (result.stdout || "").trim();
213
213
  }
214
214
 
215
+ function selectRecorderExecutable(): string {
216
+ if (platform() === "darwin" && commandExists("afrecord")) return "afrecord";
217
+ if (commandExists("pw-record")) return "pw-record";
218
+ if (commandExists("arecord")) return "arecord";
219
+ return "";
220
+ }
221
+
215
222
  function recorderCommand(config: VoiceConfig, outputPath: string): string[] {
216
- if (commandExists("pw-record")) {
223
+ const executable = selectRecorderExecutable();
224
+ if (executable === "pw-record") {
217
225
  const cmd = ["pw-record", "--rate", "16000", "--channels", "1", "--format", "s16"];
218
226
  if (config.recorderTarget) cmd.push("--target", config.recorderTarget);
219
227
  cmd.push(outputPath);
220
228
  return cmd;
221
229
  }
222
- if (commandExists("arecord")) {
230
+ if (executable === "arecord") {
223
231
  return ["arecord", "-q", "-f", "S16_LE", "-r", "16000", "-c", "1", "-t", "wav", outputPath];
224
232
  }
225
- throw new Error("No recorder found. Install PipeWire tools (pw-record) or alsa-utils (arecord).");
233
+ if (executable === "afrecord") {
234
+ return ["afrecord", "-f", "WAVE", "-d", "LEI16@16000", "-c", "1", outputPath];
235
+ }
236
+ throw new Error("No recorder found. On Linux, install PipeWire tools (pw-record) or alsa-utils (arecord). On macOS, afrecord should be available with the system.");
226
237
  }
227
238
 
228
239
  type PipeWireSource = {
@@ -310,11 +321,12 @@ function pipeWireSourceName(target: string): string {
310
321
  function recordingDeviceName(config: VoiceConfig, recorderExecutable: string): string {
311
322
  if (recorderExecutable === "pw-record") return pipeWireSourceName(config.recorderTarget);
312
323
  if (recorderExecutable === "arecord") return "ALSA default microphone";
324
+ if (recorderExecutable === "afrecord") return "macOS default microphone";
313
325
  return config.recorderTarget || "default microphone";
314
326
  }
315
327
 
316
328
  function recordingStatusText(deviceName: string): string {
317
- return `● Mic on: ${deviceName || "default microphone"}`;
329
+ return `● Mic on: ${deviceName || "default microphone"} — press Ctrl+Shift+R again to stop/transcribe`;
318
330
  }
319
331
 
320
332
  function readState(config: VoiceConfig): RecordingState | null {
@@ -903,7 +915,7 @@ async function startRecording(ctx: ExtensionContext) {
903
915
  const config = getConfig();
904
916
  const existing = readState(config);
905
917
  if (existing && pidAlive(existing.pid)) {
906
- const deviceName = existing.deviceName || recordingDeviceName(config, commandExists("pw-record") ? "pw-record" : "arecord");
918
+ const deviceName = existing.deviceName || recordingDeviceName(config, selectRecorderExecutable());
907
919
  ctx.ui.notify(`Already recording: pid=${existing.pid}. ${recordingStatusText(deviceName)}`, "warning");
908
920
  ctx.ui.setStatus("voice-input", ctx.ui.theme.fg("accent", recordingStatusText(deviceName)));
909
921
  return;
@@ -937,7 +949,6 @@ async function startRecording(ctx: ExtensionContext) {
937
949
  });
938
950
 
939
951
  ctx.ui.setStatus("voice-input", ctx.ui.theme.fg("accent", recordingStatusText(deviceName)));
940
- ctx.ui.notify(`${recordingStatusText(deviceName)}. Press Ctrl+Shift+R again to stop/transcribe.`, "info");
941
952
  }
942
953
 
943
954
  async function stopRecording(ctx: ExtensionContext, transcribe = true) {
@@ -1054,7 +1065,7 @@ async function configureApiKey(ctx: ExtensionContext, providedKey = "") {
1054
1065
  }
1055
1066
 
1056
1067
  function configSummary(config: VoiceConfig): string {
1057
- const recorderExecutable = commandExists("pw-record") ? "pw-record" : commandExists("arecord") ? "arecord" : "";
1068
+ const recorderExecutable = selectRecorderExecutable();
1058
1069
  const currentDevice = recorderExecutable ? recordingDeviceName(config, recorderExecutable) : "no recorder found";
1059
1070
  return [
1060
1071
  "Voice input config:",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-voice-input",
3
- "version": "0.2.4",
3
+ "version": "0.2.6",
4
4
  "description": "Press Ctrl+Shift+R to dictate prompts into Pi using VolcEngine ASR",
5
5
  "type": "module",
6
6
  "keywords": [
@@ -12,7 +12,8 @@
12
12
  "dictation",
13
13
  "asr",
14
14
  "volcengine",
15
- "linux"
15
+ "linux",
16
+ "macos"
16
17
  ],
17
18
  "license": "MIT",
18
19
  "author": "tr-nc",