pi-voice-input 0.2.11 → 0.2.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -1
- package/extensions/voice-input.ts +159 -4
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -27,6 +27,7 @@ pi extension: extensions/index.ts → extensions/voice-input.ts
|
|
|
27
27
|
│ ├─ Linux preferred: pw-record
|
|
28
28
|
│ ├─ Linux fallback: arecord
|
|
29
29
|
│ └─ macOS: afrecord, or ffmpeg/AVFoundation fallback
|
|
30
|
+
├─ ducks system output volume while the microphone is listening
|
|
30
31
|
├─ records a temporary 16 kHz mono 16-bit WAV
|
|
31
32
|
├─ parses the WAV container in TypeScript and extracts raw PCM
|
|
32
33
|
├─ sends PCM frames to the configured ASR provider via ws
|
|
@@ -108,12 +109,17 @@ The config file is plain JSON and can be edited directly:
|
|
|
108
109
|
```json
|
|
109
110
|
{
|
|
110
111
|
"volcApiKey": "",
|
|
111
|
-
"polishModel": ""
|
|
112
|
+
"polishModel": "",
|
|
113
|
+
"duckSystemVolume": true,
|
|
114
|
+
"duckSystemVolumeFactor": 0.5,
|
|
115
|
+
"duckSystemVolumeFadeMs": 300
|
|
112
116
|
}
|
|
113
117
|
```
|
|
114
118
|
|
|
115
119
|
`polishModel` is disabled by default. Set it to any model shown by `pi --list-models` to enable transcript polish. If polishing fails, the raw ASR transcript is inserted instead.
|
|
116
120
|
|
|
121
|
+
`duckSystemVolume` is enabled by default. While recording, the extension lowers system output volume to `duckSystemVolumeFactor` of the original volume using a short ease-in/ease-out fade (`duckSystemVolumeFadeMs`), then restores the saved volume when recording stops or is cancelled. Linux uses `wpctl` or `pactl`; macOS uses `osascript`.
|
|
122
|
+
|
|
117
123
|
Verify the effective non-secret config:
|
|
118
124
|
|
|
119
125
|
```text
|
|
@@ -151,6 +157,7 @@ Slash commands:
|
|
|
151
157
|
- On startup, legacy `~/.pi/agent/voice-input/recordings` and `~/.pi/agent/voice-input/logs` artifacts are cleaned up when they are not part of an active recording.
|
|
152
158
|
- When `polishModel` is set, polishing uses the unsent editor draft and recent session messages as context, but outputs only the refined voice text to insert at the current cursor. It must not reconstruct the full draft; the final text is pasted without replacing existing editor content.
|
|
153
159
|
- While recording, the status line shows `● Mic on: [device name] — press Ctrl+Shift+R again to stop/transcribe` in the current theme accent color; no separate popup is shown when recording starts.
|
|
160
|
+
- By default, system output volume is ducked to 50% of its previous level with a 300 ms ease-in/ease-out fade while the microphone is listening, then restored after recording stops.
|
|
154
161
|
|
|
155
162
|
## Development
|
|
156
163
|
|
|
@@ -61,6 +61,9 @@ type JsonObject = Record<string, unknown>;
|
|
|
61
61
|
type VoiceInputConfigFile = {
|
|
62
62
|
volcApiKey: string;
|
|
63
63
|
polishModel: string;
|
|
64
|
+
duckSystemVolume: boolean;
|
|
65
|
+
duckSystemVolumeFactor: number;
|
|
66
|
+
duckSystemVolumeFadeMs: number;
|
|
64
67
|
};
|
|
65
68
|
|
|
66
69
|
type VoiceConfig = {
|
|
@@ -86,6 +89,17 @@ type VoiceConfig = {
|
|
|
86
89
|
postprocessTimeoutMs: number;
|
|
87
90
|
postprocessMaxTokens: number;
|
|
88
91
|
postprocessContextChars: number;
|
|
92
|
+
duckSystemVolume: boolean;
|
|
93
|
+
duckSystemVolumeFactor: number;
|
|
94
|
+
duckSystemVolumeFadeMs: number;
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
type SystemVolumeDuckingState = {
|
|
98
|
+
provider: "macos" | "wpctl" | "pactl";
|
|
99
|
+
originalVolumePercent: number;
|
|
100
|
+
duckedVolumePercent: number;
|
|
101
|
+
factor: number;
|
|
102
|
+
fadeMs: number;
|
|
89
103
|
};
|
|
90
104
|
|
|
91
105
|
type RecordingState = {
|
|
@@ -95,6 +109,7 @@ type RecordingState = {
|
|
|
95
109
|
startedAt: string;
|
|
96
110
|
recorderTarget?: string;
|
|
97
111
|
deviceName?: string;
|
|
112
|
+
systemVolume?: SystemVolumeDuckingState;
|
|
98
113
|
};
|
|
99
114
|
|
|
100
115
|
type DecodedFrame = {
|
|
@@ -124,6 +139,9 @@ function defaultConfigFile(): VoiceInputConfigFile {
|
|
|
124
139
|
return {
|
|
125
140
|
volcApiKey: "",
|
|
126
141
|
polishModel: DEFAULT_POSTPROCESS_MODEL,
|
|
142
|
+
duckSystemVolume: true,
|
|
143
|
+
duckSystemVolumeFactor: 0.5,
|
|
144
|
+
duckSystemVolumeFadeMs: 300,
|
|
127
145
|
};
|
|
128
146
|
}
|
|
129
147
|
|
|
@@ -136,12 +154,29 @@ function stringField(source: JsonObject, name: string, fallback: string): string
|
|
|
136
154
|
return typeof value === "string" ? value : fallback;
|
|
137
155
|
}
|
|
138
156
|
|
|
157
|
+
function booleanField(source: JsonObject, name: string, fallback: boolean): boolean {
|
|
158
|
+
const value = source[name];
|
|
159
|
+
return typeof value === "boolean" ? value : fallback;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
function numberField(source: JsonObject, name: string, fallback: number): number {
|
|
163
|
+
const value = source[name];
|
|
164
|
+
return typeof value === "number" && Number.isFinite(value) ? value : fallback;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
function clamp(value: number, min: number, max: number): number {
|
|
168
|
+
return Math.min(max, Math.max(min, value));
|
|
169
|
+
}
|
|
170
|
+
|
|
139
171
|
function normalizeConfigFile(input: unknown): VoiceInputConfigFile {
|
|
140
172
|
const defaults = defaultConfigFile();
|
|
141
173
|
const root = isObject(input) ? input : {};
|
|
142
174
|
return {
|
|
143
175
|
volcApiKey: stringField(root, "volcApiKey", defaults.volcApiKey).trim(),
|
|
144
176
|
polishModel: stringField(root, "polishModel", defaults.polishModel).trim(),
|
|
177
|
+
duckSystemVolume: booleanField(root, "duckSystemVolume", defaults.duckSystemVolume),
|
|
178
|
+
duckSystemVolumeFactor: clamp(numberField(root, "duckSystemVolumeFactor", defaults.duckSystemVolumeFactor), 0, 1),
|
|
179
|
+
duckSystemVolumeFadeMs: Math.round(clamp(numberField(root, "duckSystemVolumeFadeMs", defaults.duckSystemVolumeFadeMs), 0, 3000)),
|
|
145
180
|
};
|
|
146
181
|
}
|
|
147
182
|
|
|
@@ -188,6 +223,9 @@ function getConfig(): VoiceConfig {
|
|
|
188
223
|
postprocessTimeoutMs: 30000,
|
|
189
224
|
postprocessMaxTokens: 2048,
|
|
190
225
|
postprocessContextChars: 6000,
|
|
226
|
+
duckSystemVolume: fileConfig.duckSystemVolume,
|
|
227
|
+
duckSystemVolumeFactor: fileConfig.duckSystemVolumeFactor,
|
|
228
|
+
duckSystemVolumeFadeMs: fileConfig.duckSystemVolumeFadeMs,
|
|
191
229
|
};
|
|
192
230
|
}
|
|
193
231
|
|
|
@@ -218,6 +256,111 @@ function commandOutput(command: string, args: string[], timeoutMs = 1500): strin
|
|
|
218
256
|
return (result.stdout || "").trim();
|
|
219
257
|
}
|
|
220
258
|
|
|
259
|
+
function runCommand(command: string, args: string[], timeoutMs = 1500): boolean {
|
|
260
|
+
return spawnSync(command, args, { stdio: "ignore", timeout: timeoutMs }).status === 0;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
function formatPercent(value: number): string {
|
|
264
|
+
return Number(value.toFixed(2)).toString();
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
function readSystemOutputVolume(): Pick<SystemVolumeDuckingState, "provider" | "originalVolumePercent"> | null {
|
|
268
|
+
if (platform() === "darwin") {
|
|
269
|
+
if (!commandExists("osascript")) return null;
|
|
270
|
+
const output = commandOutput("osascript", ["-e", "output volume of (get volume settings)"]);
|
|
271
|
+
const volume = Number(output.trim());
|
|
272
|
+
return Number.isFinite(volume) ? { provider: "macos", originalVolumePercent: clamp(volume, 0, 100) } : null;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
if (platform() !== "linux") return null;
|
|
276
|
+
|
|
277
|
+
if (commandExists("wpctl")) {
|
|
278
|
+
const output = commandOutput("wpctl", ["get-volume", "@DEFAULT_AUDIO_SINK@"]);
|
|
279
|
+
const match = output.match(/Volume:\s*([0-9.]+)/);
|
|
280
|
+
const volume = match ? Number(match[1]) * 100 : NaN;
|
|
281
|
+
if (Number.isFinite(volume)) return { provider: "wpctl", originalVolumePercent: Math.max(0, volume) };
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
if (commandExists("pactl")) {
|
|
285
|
+
const output = commandOutput("pactl", ["get-sink-volume", "@DEFAULT_SINK@"]);
|
|
286
|
+
const match = output.match(/([0-9]+(?:\.[0-9]+)?)%/);
|
|
287
|
+
const volume = match ? Number(match[1]) : NaN;
|
|
288
|
+
if (Number.isFinite(volume)) return { provider: "pactl", originalVolumePercent: Math.max(0, volume) };
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
return null;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
function setSystemOutputVolume(state: Pick<SystemVolumeDuckingState, "provider">, volumePercent: number): boolean {
|
|
295
|
+
if (state.provider === "macos") {
|
|
296
|
+
return runCommand("osascript", ["-e", `set volume output volume ${Math.round(clamp(volumePercent, 0, 100))}`]);
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
const safePercent = Math.max(0, volumePercent);
|
|
300
|
+
if (state.provider === "wpctl") {
|
|
301
|
+
return runCommand("wpctl", ["set-volume", "@DEFAULT_AUDIO_SINK@", `${formatPercent(safePercent)}%`]);
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
return runCommand("pactl", ["set-sink-volume", "@DEFAULT_SINK@", `${formatPercent(safePercent)}%`]);
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
function easeInOut(t: number): number {
|
|
308
|
+
return 0.5 - Math.cos(Math.PI * clamp(t, 0, 1)) / 2;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
async function fadeSystemOutputVolume(
|
|
312
|
+
state: Pick<SystemVolumeDuckingState, "provider">,
|
|
313
|
+
fromPercent: number,
|
|
314
|
+
toPercent: number,
|
|
315
|
+
fadeMs: number,
|
|
316
|
+
): Promise<string | null> {
|
|
317
|
+
if (fadeMs <= 0 || Math.abs(fromPercent - toPercent) < 0.1) {
|
|
318
|
+
return setSystemOutputVolume(state, toPercent) ? null : "failed to set system output volume";
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
const steps = Math.max(2, Math.min(20, Math.round(fadeMs / 30)));
|
|
322
|
+
const intervalMs = fadeMs / steps;
|
|
323
|
+
for (let step = 1; step <= steps; step += 1) {
|
|
324
|
+
const eased = easeInOut(step / steps);
|
|
325
|
+
const volume = fromPercent + (toPercent - fromPercent) * eased;
|
|
326
|
+
if (!setSystemOutputVolume(state, volume)) return "failed to set system output volume";
|
|
327
|
+
if (step < steps) await sleep(intervalMs);
|
|
328
|
+
}
|
|
329
|
+
return null;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
function createSystemVolumeDuckingState(config: VoiceConfig): { state?: SystemVolumeDuckingState; warning?: string } {
|
|
333
|
+
if (!config.duckSystemVolume || config.duckSystemVolumeFactor >= 1) return {};
|
|
334
|
+
const snapshot = readSystemOutputVolume();
|
|
335
|
+
if (!snapshot) return { warning: "system output volume ducking is enabled, but no supported volume control was found" };
|
|
336
|
+
|
|
337
|
+
return {
|
|
338
|
+
state: {
|
|
339
|
+
...snapshot,
|
|
340
|
+
duckedVolumePercent: snapshot.originalVolumePercent * config.duckSystemVolumeFactor,
|
|
341
|
+
factor: config.duckSystemVolumeFactor,
|
|
342
|
+
fadeMs: config.duckSystemVolumeFadeMs,
|
|
343
|
+
},
|
|
344
|
+
};
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
async function applySystemVolumeDucking(state?: SystemVolumeDuckingState): Promise<string | null> {
|
|
348
|
+
if (!state) return null;
|
|
349
|
+
const warning = await fadeSystemOutputVolume(state, state.originalVolumePercent, state.duckedVolumePercent, state.fadeMs);
|
|
350
|
+
return warning ? `system output volume ducking failed: ${warning}` : null;
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
async function restoreSystemOutputVolume(state?: SystemVolumeDuckingState): Promise<string | null> {
|
|
354
|
+
if (!state) return null;
|
|
355
|
+
const warning = await fadeSystemOutputVolume(state, state.duckedVolumePercent, state.originalVolumePercent, state.fadeMs);
|
|
356
|
+
return warning ? `system output volume restore failed: ${warning}` : null;
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
function restoreSystemOutputVolumeNow(state?: SystemVolumeDuckingState): string | null {
|
|
360
|
+
if (!state) return null;
|
|
361
|
+
return setSystemOutputVolume(state, state.originalVolumePercent) ? null : "system output volume restore failed";
|
|
362
|
+
}
|
|
363
|
+
|
|
221
364
|
function selectRecorderExecutable(): string {
|
|
222
365
|
if (platform() === "darwin") {
|
|
223
366
|
if (commandExists("afrecord")) return "afrecord";
|
|
@@ -1092,9 +1235,10 @@ async function isRecording(config: VoiceConfig): Promise<boolean> {
|
|
|
1092
1235
|
function cleanupStaleRecordingState(config: VoiceConfig): string[] {
|
|
1093
1236
|
const state = readState(config);
|
|
1094
1237
|
if (!state || pidAlive(state.pid)) return [];
|
|
1238
|
+
const volumeWarning = restoreSystemOutputVolumeNow(state.systemVolume);
|
|
1095
1239
|
const cleanupWarnings = cleanupRecordingArtifacts(state);
|
|
1096
1240
|
clearState(config);
|
|
1097
|
-
return cleanupWarnings;
|
|
1241
|
+
return [volumeWarning, ...cleanupWarnings].filter((message): message is string => Boolean(message));
|
|
1098
1242
|
}
|
|
1099
1243
|
|
|
1100
1244
|
function requireInteractiveUi(ctx: ExtensionContext, action: string): boolean {
|
|
@@ -1128,6 +1272,7 @@ async function startRecording(ctx: ExtensionContext) {
|
|
|
1128
1272
|
throw error;
|
|
1129
1273
|
}
|
|
1130
1274
|
const deviceName = recordingDeviceName(config, cmd[0]);
|
|
1275
|
+
const volumeDucking = createSystemVolumeDuckingState(config);
|
|
1131
1276
|
|
|
1132
1277
|
ctx.ui.setStatus("voice-input", ctx.ui.theme.fg("warning", "● starting mic"));
|
|
1133
1278
|
let child: ReturnType<typeof spawn>;
|
|
@@ -1152,7 +1297,11 @@ async function startRecording(ctx: ExtensionContext) {
|
|
|
1152
1297
|
startedAt: new Date().toISOString(),
|
|
1153
1298
|
recorderTarget: config.recorderTarget || undefined,
|
|
1154
1299
|
deviceName,
|
|
1300
|
+
systemVolume: volumeDucking.state,
|
|
1155
1301
|
});
|
|
1302
|
+
if (volumeDucking.warning) ctx.ui.notify(`Voice input warning: ${volumeDucking.warning}`, "warning");
|
|
1303
|
+
const duckingWarning = await applySystemVolumeDucking(volumeDucking.state);
|
|
1304
|
+
if (duckingWarning) ctx.ui.notify(`Voice input warning: ${duckingWarning}`, "warning");
|
|
1156
1305
|
|
|
1157
1306
|
ctx.ui.setStatus("voice-input", ctx.ui.theme.fg("accent", recordingStatusText(deviceName)));
|
|
1158
1307
|
}
|
|
@@ -1169,7 +1318,9 @@ async function stopRecording(ctx: ExtensionContext, transcribe = true) {
|
|
|
1169
1318
|
|
|
1170
1319
|
ctx.ui.setStatus("voice-input", ctx.ui.theme.fg("warning", transcribe ? "● transcribing" : "● stopping"));
|
|
1171
1320
|
if (pidAlive(state.pid)) await stopProcessGroup(state.pid);
|
|
1321
|
+
const volumeRestoreWarning = await restoreSystemOutputVolume(state.systemVolume);
|
|
1172
1322
|
clearState(config);
|
|
1323
|
+
if (volumeRestoreWarning) ctx.ui.notify(`Voice input warning: ${volumeRestoreWarning}`, "warning");
|
|
1173
1324
|
if (config.finalizeDelayMs > 0) await sleep(config.finalizeDelayMs);
|
|
1174
1325
|
|
|
1175
1326
|
if (!transcribe) {
|
|
@@ -1211,8 +1362,8 @@ async function stopRecording(ctx: ExtensionContext, transcribe = true) {
|
|
|
1211
1362
|
if (!result.text.trim()) {
|
|
1212
1363
|
ctx.ui.setStatus("voice-input", undefined);
|
|
1213
1364
|
ctx.ui.notify(
|
|
1214
|
-
`
|
|
1215
|
-
"
|
|
1365
|
+
`No speech detected. audio=${(durationMs / 1000).toFixed(2)}s total=${result.timings.totalMs}ms`,
|
|
1366
|
+
"info",
|
|
1216
1367
|
);
|
|
1217
1368
|
return;
|
|
1218
1369
|
}
|
|
@@ -1262,6 +1413,7 @@ function setupHelp(config = getConfig()): string {
|
|
|
1262
1413
|
"- To create/update the JSON config file, run: /voice init",
|
|
1263
1414
|
"- To save/update the key, run: /voice key",
|
|
1264
1415
|
`- Polish: ${config.postprocessEnabled ? config.postprocessModel : "disabled"}`,
|
|
1416
|
+
`- System volume ducking: ${config.duckSystemVolume ? `${Math.round(config.duckSystemVolumeFactor * 100)}% over ${config.duckSystemVolumeFadeMs}ms` : "disabled"}`,
|
|
1265
1417
|
`- Get/create a VolcEngine Speech API key here: ${VOLC_API_KEY_URL}`,
|
|
1266
1418
|
"- After saving the key, run: /voice config",
|
|
1267
1419
|
].join("\n");
|
|
@@ -1298,8 +1450,11 @@ function configSummary(config: VoiceConfig): string {
|
|
|
1298
1450
|
`- config file: ${config.configPath}${existsSync(config.configPath) ? "" : " (missing; run /voice init to create it)"}`,
|
|
1299
1451
|
`- volcApiKey: ${config.apiKey ? "set" : "missing"} (update with /voice key)`,
|
|
1300
1452
|
`- polishModel: ${config.postprocessEnabled ? config.postprocessModel : "disabled"}`,
|
|
1453
|
+
`- duckSystemVolume: ${config.duckSystemVolume ? "enabled" : "disabled"}`,
|
|
1454
|
+
`- duckSystemVolumeFactor: ${config.duckSystemVolumeFactor}`,
|
|
1455
|
+
`- duckSystemVolumeFadeMs: ${config.duckSystemVolumeFadeMs}`,
|
|
1301
1456
|
`- current recording device: ${currentDevice}`,
|
|
1302
|
-
"Config keys: volcApiKey, polishModel. Leave polishModel empty to disable polish.",
|
|
1457
|
+
"Config keys: volcApiKey, polishModel, duckSystemVolume, duckSystemVolumeFactor, duckSystemVolumeFadeMs. Leave polishModel empty to disable polish.",
|
|
1303
1458
|
`VolcEngine API key URL: ${VOLC_API_KEY_URL}`,
|
|
1304
1459
|
].join("\n");
|
|
1305
1460
|
}
|