pi-voice-input 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -9
- package/ROADMAP.md +8 -9
- package/extensions/voice-input.ts +18 -6
- package/package.json +3 -2
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# pi Voice Input
|
|
2
2
|
|
|
3
|
-
A publishable, pure TypeScript [pi](https://pi.dev/) extension for Linux voice dictation into pi's editor.
|
|
3
|
+
A publishable, pure TypeScript [pi](https://pi.dev/) extension for Linux and macOS voice dictation into pi's editor.
|
|
4
4
|
|
|
5
5
|
- Press `Ctrl+Shift+R` once to start recording.
|
|
6
6
|
- Press `Ctrl+Shift+R` again to stop.
|
|
@@ -9,7 +9,8 @@ A publishable, pure TypeScript [pi](https://pi.dev/) extension for Linux voice d
|
|
|
9
9
|
|
|
10
10
|
Current scope:
|
|
11
11
|
|
|
12
|
-
- Linux
|
|
12
|
+
- Linux uses `pw-record` from PipeWire tools or `arecord` from alsa-utils.
|
|
13
|
+
- macOS uses the system `afrecord` command. This path is implemented but not yet validated by the maintainer on macOS hardware.
|
|
13
14
|
- A VolcEngine Speech API key is required.
|
|
14
15
|
- This is not a local/offline ASR engine.
|
|
15
16
|
|
|
@@ -23,8 +24,9 @@ No Python, `uv`, upload service, or `ffmpeg` is required for normal shortcut usa
|
|
|
23
24
|
pi extension: extensions/voice-input.ts
|
|
24
25
|
├─ registers Ctrl+Shift+R and /voice commands
|
|
25
26
|
├─ starts/stops a local recorder process
|
|
26
|
-
│ ├─ preferred: pw-record
|
|
27
|
-
│
|
|
27
|
+
│ ├─ Linux preferred: pw-record
|
|
28
|
+
│ ├─ Linux fallback: arecord
|
|
29
|
+
│ └─ macOS: afrecord
|
|
28
30
|
├─ records 16 kHz mono 16-bit WAV
|
|
29
31
|
├─ parses the WAV container in TypeScript and extracts raw PCM
|
|
30
32
|
├─ sends PCM frames to the configured ASR provider via ws
|
|
@@ -40,10 +42,13 @@ Runtime package dependency:
|
|
|
40
42
|
|
|
41
43
|
System dependency, one of:
|
|
42
44
|
|
|
43
|
-
- `pw-record` from PipeWire tools, preferred
|
|
44
|
-
- `arecord` from alsa-utils, fallback
|
|
45
|
+
- Linux: `pw-record` from PipeWire tools, preferred
|
|
46
|
+
- Linux: `arecord` from alsa-utils, fallback
|
|
47
|
+
- macOS: `afrecord`, included with macOS
|
|
45
48
|
|
|
46
|
-
|
|
49
|
+
On macOS, grant Terminal or your pi host app microphone permission when prompted. If macOS has previously denied microphone access, enable it in System Settings → Privacy & Security → Microphone.
|
|
50
|
+
|
|
51
|
+
## Install / Update
|
|
47
52
|
|
|
48
53
|
Install the published package with pi:
|
|
49
54
|
|
|
@@ -51,6 +56,14 @@ Install the published package with pi:
|
|
|
51
56
|
pi install npm:pi-voice-input
|
|
52
57
|
```
|
|
53
58
|
|
|
59
|
+
Update to the latest published version:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
pi update npm:pi-voice-input
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
If pi is already running, restart pi after installing or updating. `/reload` may not replace code that was already loaded by the current pi process.
|
|
66
|
+
|
|
54
67
|
## Providers
|
|
55
68
|
|
|
56
69
|
The extension is structured around a provider boundary: recording, editor insertion, and command handling are generic; ASR transport/protocol logic is provider-specific.
|
|
@@ -135,7 +148,7 @@ Slash commands:
|
|
|
135
148
|
- The default ASR segment size is intentionally larger than realtime packet sizes because this workflow sends already-recorded audio.
|
|
136
149
|
- The transcript is inserted into the editor only; it is not submitted automatically.
|
|
137
150
|
- When `polishModel` is set, polishing uses the unsent editor draft and recent session messages as context, but outputs only the refined voice text. The final text is still pasted at the current cursor position without replacing the draft.
|
|
138
|
-
- While recording, the status line
|
|
151
|
+
- While recording, the status line shows `● Mic on: [device name]` in the current theme accent color.
|
|
139
152
|
|
|
140
153
|
## Development
|
|
141
154
|
|
|
@@ -167,7 +180,7 @@ After changing the extension while pi is open, run:
|
|
|
167
180
|
|
|
168
181
|
## Roadmap
|
|
169
182
|
|
|
170
|
-
See [ROADMAP.md](ROADMAP.md) for planned user-visible work
|
|
183
|
+
See [ROADMAP.md](ROADMAP.md) for planned user-visible work.
|
|
171
184
|
|
|
172
185
|
## Links
|
|
173
186
|
|
package/ROADMAP.md
CHANGED
|
@@ -5,23 +5,22 @@ This roadmap lists user-visible work planned for pi Voice Input. It is intention
|
|
|
5
5
|
## Current support
|
|
6
6
|
|
|
7
7
|
- Linux voice input through `pw-record` or `arecord`
|
|
8
|
+
- macOS voice input through the system `afrecord` command
|
|
8
9
|
- VolcEngine WebSocket ASR
|
|
9
10
|
- Optional transcript polish through a configured pi model
|
|
10
11
|
|
|
11
12
|
## Planned
|
|
12
13
|
|
|
13
|
-
### macOS support
|
|
14
|
+
### Validate and refine macOS support
|
|
14
15
|
|
|
15
|
-
|
|
16
|
+
The macOS recording path is implemented with `afrecord`, but it still needs hands-on validation on macOS hardware.
|
|
16
17
|
|
|
17
|
-
Expected
|
|
18
|
+
Expected follow-up:
|
|
18
19
|
|
|
19
|
-
-
|
|
20
|
-
-
|
|
21
|
-
-
|
|
22
|
-
- preserve the same config file and ASR provider behavior
|
|
23
|
-
|
|
24
|
-
Status: planned, not yet implemented.
|
|
20
|
+
- confirm microphone permission prompts and recovery steps
|
|
21
|
+
- confirm produced WAV files are compatible across supported macOS versions
|
|
22
|
+
- refine device naming and setup diagnostics if needed
|
|
23
|
+
- preserve the same config file and ASR provider behavior
|
|
25
24
|
|
|
26
25
|
## Later candidates
|
|
27
26
|
|
|
@@ -14,7 +14,7 @@ import {
|
|
|
14
14
|
unlinkSync,
|
|
15
15
|
writeFileSync,
|
|
16
16
|
} from "node:fs";
|
|
17
|
-
import { homedir } from "node:os";
|
|
17
|
+
import { homedir, platform } from "node:os";
|
|
18
18
|
import path from "node:path";
|
|
19
19
|
import { gzipSync, gunzipSync } from "node:zlib";
|
|
20
20
|
import WebSocket from "ws";
|
|
@@ -212,17 +212,28 @@ function commandOutput(command: string, args: string[], timeoutMs = 1500): strin
|
|
|
212
212
|
return (result.stdout || "").trim();
|
|
213
213
|
}
|
|
214
214
|
|
|
215
|
+
function selectRecorderExecutable(): string {
|
|
216
|
+
if (platform() === "darwin" && commandExists("afrecord")) return "afrecord";
|
|
217
|
+
if (commandExists("pw-record")) return "pw-record";
|
|
218
|
+
if (commandExists("arecord")) return "arecord";
|
|
219
|
+
return "";
|
|
220
|
+
}
|
|
221
|
+
|
|
215
222
|
function recorderCommand(config: VoiceConfig, outputPath: string): string[] {
|
|
216
|
-
|
|
223
|
+
const executable = selectRecorderExecutable();
|
|
224
|
+
if (executable === "pw-record") {
|
|
217
225
|
const cmd = ["pw-record", "--rate", "16000", "--channels", "1", "--format", "s16"];
|
|
218
226
|
if (config.recorderTarget) cmd.push("--target", config.recorderTarget);
|
|
219
227
|
cmd.push(outputPath);
|
|
220
228
|
return cmd;
|
|
221
229
|
}
|
|
222
|
-
if (
|
|
230
|
+
if (executable === "arecord") {
|
|
223
231
|
return ["arecord", "-q", "-f", "S16_LE", "-r", "16000", "-c", "1", "-t", "wav", outputPath];
|
|
224
232
|
}
|
|
225
|
-
|
|
233
|
+
if (executable === "afrecord") {
|
|
234
|
+
return ["afrecord", "-f", "WAVE", "-d", "LEI16@16000", "-c", "1", outputPath];
|
|
235
|
+
}
|
|
236
|
+
throw new Error("No recorder found. On Linux, install PipeWire tools (pw-record) or alsa-utils (arecord). On macOS, afrecord should be available with the system.");
|
|
226
237
|
}
|
|
227
238
|
|
|
228
239
|
type PipeWireSource = {
|
|
@@ -310,6 +321,7 @@ function pipeWireSourceName(target: string): string {
|
|
|
310
321
|
function recordingDeviceName(config: VoiceConfig, recorderExecutable: string): string {
|
|
311
322
|
if (recorderExecutable === "pw-record") return pipeWireSourceName(config.recorderTarget);
|
|
312
323
|
if (recorderExecutable === "arecord") return "ALSA default microphone";
|
|
324
|
+
if (recorderExecutable === "afrecord") return "macOS default microphone";
|
|
313
325
|
return config.recorderTarget || "default microphone";
|
|
314
326
|
}
|
|
315
327
|
|
|
@@ -903,7 +915,7 @@ async function startRecording(ctx: ExtensionContext) {
|
|
|
903
915
|
const config = getConfig();
|
|
904
916
|
const existing = readState(config);
|
|
905
917
|
if (existing && pidAlive(existing.pid)) {
|
|
906
|
-
const deviceName = existing.deviceName || recordingDeviceName(config,
|
|
918
|
+
const deviceName = existing.deviceName || recordingDeviceName(config, selectRecorderExecutable());
|
|
907
919
|
ctx.ui.notify(`Already recording: pid=${existing.pid}. ${recordingStatusText(deviceName)}`, "warning");
|
|
908
920
|
ctx.ui.setStatus("voice-input", ctx.ui.theme.fg("accent", recordingStatusText(deviceName)));
|
|
909
921
|
return;
|
|
@@ -1054,7 +1066,7 @@ async function configureApiKey(ctx: ExtensionContext, providedKey = "") {
|
|
|
1054
1066
|
}
|
|
1055
1067
|
|
|
1056
1068
|
function configSummary(config: VoiceConfig): string {
|
|
1057
|
-
const recorderExecutable =
|
|
1069
|
+
const recorderExecutable = selectRecorderExecutable();
|
|
1058
1070
|
const currentDevice = recorderExecutable ? recordingDeviceName(config, recorderExecutable) : "no recorder found";
|
|
1059
1071
|
return [
|
|
1060
1072
|
"Voice input config:",
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-voice-input",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.5",
|
|
4
4
|
"description": "Press Ctrl+Shift+R to dictate prompts into Pi using VolcEngine ASR",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"keywords": [
|
|
@@ -12,7 +12,8 @@
|
|
|
12
12
|
"dictation",
|
|
13
13
|
"asr",
|
|
14
14
|
"volcengine",
|
|
15
|
-
"linux"
|
|
15
|
+
"linux",
|
|
16
|
+
"macos"
|
|
16
17
|
],
|
|
17
18
|
"license": "MIT",
|
|
18
19
|
"author": "tr-nc",
|