@p8n.ai/pi-listens 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/audio.ts ADDED
@@ -0,0 +1,361 @@
1
+ import { mkdir, rm } from "node:fs/promises";
2
+ import { randomUUID } from "node:crypto";
3
+ import { join } from "node:path";
4
+ import { spawn } from "node:child_process";
5
+ import { accessSync, constants } from "node:fs";
6
+ import { once } from "node:events";
7
+ import type { PiListensConfig } from "./config.js";
8
+
9
+ export interface AudioRuntime {
10
+ record(seconds?: number, signal?: AbortSignal): Promise<string>;
11
+ streamPcm(signal?: AbortSignal): AsyncIterable<Buffer>;
12
+ play(path: string, signal?: AbortSignal): Promise<void>;
13
+ cleanup(path: string): Promise<void>;
14
+ stopAll(): void;
15
+ describe(): { recorder: string; player: string };
16
+ }
17
+
18
+ export function createAudioRuntime(config: PiListensConfig): AudioRuntime {
19
+ const recorder = config.recordCommand ? "custom" : detectRecorder();
20
+ const player = config.playCommand ? "custom" : detectPlayer();
21
+
22
+ return {
23
+ async record(seconds = config.recordSeconds, signal?: AbortSignal): Promise<string> {
24
+ if (!recorder) {
25
+ throw new Error(
26
+ "No microphone recorder found. Install sox (`rec`) or ffmpeg, or set PI_LISTENS_RECORD_COMMAND. See README for command templates.",
27
+ );
28
+ }
29
+ await mkdir(config.audioDir, { recursive: true });
30
+ const path = join(config.audioDir, `pi-listens-input-${Date.now()}-${randomUUID()}.wav`);
31
+ const useUtteranceMode = config.recordMode === "utterance" && recorder === "rec";
32
+ const command = config.recordCommand
33
+ ? customCommand(config.recordCommand, {
34
+ path,
35
+ seconds,
36
+ sampleRate: config.recordSampleRate,
37
+ silenceStartSeconds: config.silenceStartSeconds,
38
+ silenceStopSeconds: config.silenceStopSeconds,
39
+ silenceThreshold: config.silenceThreshold,
40
+ })
41
+ : useUtteranceMode
42
+ ? utteranceRecorderCommand(recorder, path, config.recordSampleRate, config.silenceStartSeconds, config.silenceStopSeconds, config.silenceThreshold)
43
+ : recorderCommand(recorder, path, seconds, config.recordSampleRate);
44
+ await run(command.command, command.args, signal, useUtteranceMode ? { timeoutMs: seconds * 1000, resolveOnTimeout: true } : undefined);
45
+ return path;
46
+ },
47
+
48
+ streamPcm(signal?: AbortSignal): AsyncIterable<Buffer> {
49
+ if (!recorder) {
50
+ throw new Error(
51
+ "No microphone recorder found. Install sox (`rec`) or ffmpeg, or set PI_LISTENS_STREAM_COMMAND. See README for command templates.",
52
+ );
53
+ }
54
+ const command = config.streamCommand
55
+ ? customCommand(config.streamCommand, { sampleRate: config.recordSampleRate })
56
+ : pcmStreamCommand(recorder, config.recordSampleRate);
57
+ return streamCommandOutput(command.command, command.args, signal);
58
+ },
59
+
60
+ async play(path: string, signal?: AbortSignal): Promise<void> {
61
+ if (!player) {
62
+ throw new Error(
63
+ "No audio player found. Install afplay, sox (`play`), ffplay, or aplay, or set PI_LISTENS_PLAY_COMMAND. See README for command templates.",
64
+ );
65
+ }
66
+ const command = config.playCommand ? customCommand(config.playCommand, { path }) : playerCommand(player, path);
67
+ await run(command.command, command.args, signal);
68
+ },
69
+
70
+ async cleanup(path: string): Promise<void> {
71
+ if (!config.deleteAudio) return;
72
+ await rm(path, { force: true }).catch(() => undefined);
73
+ },
74
+
75
+ stopAll(): void {
76
+ stopActiveAudioProcesses();
77
+ },
78
+
79
+ describe() {
80
+ return { recorder: recorder ?? "missing", player: player ?? "missing" };
81
+ },
82
+ };
83
+ }
84
+
85
+ type CommandSpec = { command: string; args: string[] };
86
+
87
+ function recorderCommand(recorder: string, path: string, seconds: number, sampleRate: number): CommandSpec {
88
+ if (recorder === "rec") {
89
+ return { command: "rec", args: ["-q", "-r", String(sampleRate), "-c", "1", "-b", "16", path, "trim", "0", String(seconds)] };
90
+ }
91
+ if (recorder === "ffmpeg-avfoundation") {
92
+ return {
93
+ command: "ffmpeg",
94
+ args: ["-hide_banner", "-loglevel", "error", "-y", "-f", "avfoundation", "-i", ":0", "-t", String(seconds), "-ar", String(sampleRate), "-ac", "1", path],
95
+ };
96
+ }
97
+ if (recorder === "ffmpeg-alsa") {
98
+ return {
99
+ command: "ffmpeg",
100
+ args: ["-hide_banner", "-loglevel", "error", "-y", "-f", "alsa", "-i", "default", "-t", String(seconds), "-ar", String(sampleRate), "-ac", "1", path],
101
+ };
102
+ }
103
+ if (recorder === "ffmpeg-pulse") {
104
+ return {
105
+ command: "ffmpeg",
106
+ args: ["-hide_banner", "-loglevel", "error", "-y", "-f", "pulse", "-i", "default", "-t", String(seconds), "-ar", String(sampleRate), "-ac", "1", path],
107
+ };
108
+ }
109
+ throw new Error(`Unsupported recorder: ${recorder}`);
110
+ }
111
+
112
+ function pcmStreamCommand(recorder: string, sampleRate: number): CommandSpec {
113
+ if (recorder === "rec") {
114
+ return { command: "rec", args: ["-q", "-r", String(sampleRate), "-c", "1", "-b", "16", "-e", "signed-integer", "-t", "raw", "-"] };
115
+ }
116
+ if (recorder === "ffmpeg-avfoundation") {
117
+ return {
118
+ command: "ffmpeg",
119
+ args: ["-hide_banner", "-loglevel", "error", "-f", "avfoundation", "-i", ":0", "-ar", String(sampleRate), "-ac", "1", "-f", "s16le", "pipe:1"],
120
+ };
121
+ }
122
+ if (recorder === "ffmpeg-alsa") {
123
+ return {
124
+ command: "ffmpeg",
125
+ args: ["-hide_banner", "-loglevel", "error", "-f", "alsa", "-i", "default", "-ar", String(sampleRate), "-ac", "1", "-f", "s16le", "pipe:1"],
126
+ };
127
+ }
128
+ if (recorder === "ffmpeg-pulse") {
129
+ return {
130
+ command: "ffmpeg",
131
+ args: ["-hide_banner", "-loglevel", "error", "-f", "pulse", "-i", "default", "-ar", String(sampleRate), "-ac", "1", "-f", "s16le", "pipe:1"],
132
+ };
133
+ }
134
+ throw new Error(`Unsupported streaming recorder: ${recorder}`);
135
+ }
136
+
137
+ function utteranceRecorderCommand(
138
+ recorder: string,
139
+ path: string,
140
+ sampleRate: number,
141
+ silenceStartSeconds: number,
142
+ silenceStopSeconds: number,
143
+ silenceThreshold: string,
144
+ ): CommandSpec {
145
+ if (recorder === "rec") {
146
+ return {
147
+ command: "rec",
148
+ args: [
149
+ "-q",
150
+ "-r",
151
+ String(sampleRate),
152
+ "-c",
153
+ "1",
154
+ "-b",
155
+ "16",
156
+ path,
157
+ "silence",
158
+ "1",
159
+ String(silenceStartSeconds),
160
+ silenceThreshold,
161
+ "1",
162
+ String(silenceStopSeconds),
163
+ silenceThreshold,
164
+ ],
165
+ };
166
+ }
167
+ throw new Error(`Unsupported utterance recorder: ${recorder}`);
168
+ }
169
+
170
+ function playerCommand(player: string, path: string): CommandSpec {
171
+ if (player === "afplay") return { command: "afplay", args: [path] };
172
+ if (player === "play") return { command: "play", args: ["-q", path] };
173
+ if (player === "ffplay") return { command: "ffplay", args: ["-nodisp", "-autoexit", "-loglevel", "error", path] };
174
+ if (player === "aplay") return { command: "aplay", args: [path] };
175
+ throw new Error(`Unsupported player: ${player}`);
176
+ }
177
+
178
+ function customCommand(template: string, values: Record<string, string | number>): CommandSpec {
179
+ let command = template;
180
+ for (const [key, value] of Object.entries(values)) {
181
+ command = command.replaceAll(`{${key}}`, shellQuote(String(value)));
182
+ }
183
+ return { command: "sh", args: ["-lc", command] };
184
+ }
185
+
186
+ function detectRecorder(): string | null {
187
+ if (isCommandAvailable("rec")) return "rec";
188
+ if (isCommandAvailable("ffmpeg")) {
189
+ if (process.platform === "darwin") return "ffmpeg-avfoundation";
190
+ if (process.platform === "linux") return "ffmpeg-alsa";
191
+ }
192
+ return null;
193
+ }
194
+
195
+ function detectPlayer(): string | null {
196
+ if (process.platform === "darwin" && isCommandAvailable("afplay")) return "afplay";
197
+ if (isCommandAvailable("play")) return "play";
198
+ if (isCommandAvailable("ffplay")) return "ffplay";
199
+ if (isCommandAvailable("aplay")) return "aplay";
200
+ return null;
201
+ }
202
+
203
+ function isCommandAvailable(command: string): boolean {
204
+ const paths = (process.env.PATH ?? "").split(":").filter(Boolean);
205
+ for (const dir of paths) {
206
+ try {
207
+ accessSync(join(dir, command), constants.X_OK);
208
+ return true;
209
+ } catch {
210
+ // keep looking
211
+ }
212
+ }
213
+ return false;
214
+ }
215
+
216
+ function run(command: string, args: string[], signal?: AbortSignal, options: { timeoutMs?: number; resolveOnTimeout?: boolean } = {}): Promise<void> {
217
+ return new Promise((resolve, reject) => {
218
+ if (signal?.aborted) {
219
+ reject(new Error("Cancelled"));
220
+ return;
221
+ }
222
+
223
+ const child = spawnManaged(command, args);
224
+ let stderr = "";
225
+ let stdout = "";
226
+ let timedOut = false;
227
+ const timeout = options.timeoutMs
228
+ ? setTimeout(() => {
229
+ timedOut = true;
230
+ terminateChild(child);
231
+ }, options.timeoutMs)
232
+ : undefined;
233
+ const onAbort = () => terminateChild(child);
234
+ signal?.addEventListener("abort", onAbort, { once: true });
235
+
236
+ child.stdout?.on("data", (chunk) => {
237
+ stdout += chunk.toString();
238
+ });
239
+ child.stderr?.on("data", (chunk) => {
240
+ stderr += chunk.toString();
241
+ });
242
+
243
+ const cleanup = () => {
244
+ signal?.removeEventListener("abort", onAbort);
245
+ if (timeout) clearTimeout(timeout);
246
+ };
247
+
248
+ child.on("error", (err) => {
249
+ cleanup();
250
+ reject(err);
251
+ });
252
+ child.on("close", (code, termSignal) => {
253
+ cleanup();
254
+ if (signal?.aborted) {
255
+ reject(new Error("Cancelled"));
256
+ return;
257
+ }
258
+ if (code === 0 || (timedOut && options.resolveOnTimeout)) {
259
+ resolve();
260
+ return;
261
+ }
262
+ const output = [stderr.trim(), stdout.trim()].filter(Boolean).join("\n");
263
+ reject(new Error(`${command} failed${termSignal ? ` (${termSignal})` : ""}${code === null ? "" : ` with exit code ${code}`}${output ? `: ${output}` : ""}`));
264
+ });
265
+ });
266
+ }
267
+
268
+ async function* streamCommandOutput(command: string, args: string[], signal?: AbortSignal): AsyncIterable<Buffer> {
269
+ if (signal?.aborted) throw new Error("Cancelled");
270
+ const child = spawnManaged(command, args);
271
+ let stderr = "";
272
+ let exitCode: number | null = null;
273
+ let exitSignal: NodeJS.Signals | null = null;
274
+ let spawnError: Error | undefined;
275
+
276
+ const stop = () => terminateChild(child);
277
+ signal?.addEventListener("abort", stop, { once: true });
278
+ child.stderr?.on("data", (chunk) => { stderr += chunk.toString(); });
279
+ child.on("error", (err) => { spawnError = err; });
280
+ child.on("close", (code, termSignal) => { exitCode = code; exitSignal = termSignal; });
281
+
282
+ try {
283
+ if (!child.stdout) throw new Error(`${command} did not provide stdout for audio streaming`);
284
+ for await (const chunk of child.stdout) {
285
+ if (signal?.aborted) throw new Error("Cancelled");
286
+ yield Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
287
+ }
288
+ if (exitCode === null && !spawnError) await once(child, "close");
289
+ if (signal?.aborted) throw new Error("Cancelled");
290
+ if (spawnError) throw spawnError;
291
+ if (exitCode !== 0) {
292
+ const output = stderr.trim();
293
+ throw new Error(`${command} failed${exitSignal ? ` (${exitSignal})` : ""}${exitCode === null ? "" : ` with exit code ${exitCode}`}${output ? `: ${output}` : ""}`);
294
+ }
295
+ } finally {
296
+ signal?.removeEventListener("abort", stop);
297
+ if (!child.killed && exitCode === null) stop();
298
+ }
299
+ }
300
+
301
+ type ManagedChild = ReturnType<typeof spawn>;
302
+
303
+ const activeChildren = new Set<ManagedChild>();
304
+ const terminatingChildren = new WeakSet<ManagedChild>();
305
+ let processExitCleanupInstalled = false;
306
+
307
+ export function stopActiveAudioProcesses(force = false): void {
308
+ for (const child of [...activeChildren]) terminateChild(child, force);
309
+ }
310
+
311
+ function spawnManaged(command: string, args: string[]): ManagedChild {
312
+ installProcessExitCleanup();
313
+ const child = spawn(command, args, {
314
+ stdio: ["ignore", "pipe", "pipe"],
315
+ detached: process.platform !== "win32",
316
+ });
317
+ activeChildren.add(child);
318
+ const untrack = () => activeChildren.delete(child);
319
+ child.once("close", untrack);
320
+ child.once("error", untrack);
321
+ return child;
322
+ }
323
+
324
+ function installProcessExitCleanup(): void {
325
+ if (processExitCleanupInstalled) return;
326
+ processExitCleanupInstalled = true;
327
+ process.once("exit", () => stopActiveAudioProcesses(true));
328
+ }
329
+
330
+ function terminateChild(child: ManagedChild, force = false): void {
331
+ if (child.exitCode !== null || child.signalCode !== null || terminatingChildren.has(child)) return;
332
+ terminatingChildren.add(child);
333
+ sendSignal(child, "SIGTERM");
334
+ if (force) {
335
+ sendSignal(child, "SIGKILL");
336
+ return;
337
+ }
338
+ const killTimer = setTimeout(() => sendSignal(child, "SIGKILL"), 1500);
339
+ killTimer.unref();
340
+ child.once("close", () => clearTimeout(killTimer));
341
+ }
342
+
343
+ function sendSignal(child: ManagedChild, signal: NodeJS.Signals): void {
344
+ try {
345
+ if (process.platform !== "win32" && child.pid) {
346
+ process.kill(-child.pid, signal);
347
+ return;
348
+ }
349
+ } catch {
350
+ // Fall through to direct child signaling; the process may already be gone.
351
+ }
352
+ try {
353
+ child.kill(signal);
354
+ } catch {
355
+ // Best-effort cleanup only.
356
+ }
357
+ }
358
+
359
+ function shellQuote(value: string): string {
360
+ return `'${value.replaceAll("'", `'"'"'`)}'`;
361
+ }
@@ -0,0 +1,286 @@
1
+ import { mkdir } from "node:fs/promises";
2
+ import { join } from "node:path";
3
+ import type { ExtensionAPI, ExtensionCommandContext, ExtensionContext } from "@earendil-works/pi-coding-agent";
4
+ import type { VoiceToolServices } from "./tools.js";
5
+ import { conciseTranscript, prepareSpokenText } from "./text.js";
6
+ import { audioExtensionForCodec } from "./config.js";
7
+ import { applyVoiceChrome, installVoiceUi, uninstallVoiceUi } from "./voice-ui.js";
8
+
9
+ export type VoiceLoopStatus = "idle" | "listening" | "transcribing" | "agent" | "speaking" | "error";
10
+
11
+ export interface VoiceModeState {
12
+ enabled: boolean;
13
+ autoListen: boolean;
14
+ autoSpeakAssistant: boolean;
15
+ isListening: boolean;
16
+ status: VoiceLoopStatus;
17
+ uiInstalled?: boolean;
18
+ previousEditorFactory?: unknown;
19
+ lastAssistantText?: string;
20
+ lastTranscript?: string;
21
+ lastError?: string;
22
+ recordSeconds?: number;
23
+ silenceStopSeconds?: number;
24
+ listenAbortController?: AbortController;
25
+ speakAbortController?: AbortController;
26
+ }
27
+
28
+ export function registerVoiceCommands(pi: ExtensionAPI, services: VoiceToolServices, state: VoiceModeState) {
29
+ pi.registerCommand("listen", {
30
+ description: "Record speech, transcribe with Sarvam AI, and send it to pi as a user message",
31
+ handler: async (args, ctx) => {
32
+ await listenAndSend(pi, services, ctx, parseSeconds(args));
33
+ },
34
+ });
35
+
36
+ pi.registerCommand("speak", {
37
+ description: "Speak text with Sarvam AI TTS",
38
+ handler: async (args, ctx) => {
39
+ const text = args.trim();
40
+ if (!text) {
41
+ ctx.ui.notify("Usage: /speak <text>", "warning");
42
+ return;
43
+ }
44
+ await speakText(services, text, ctx.signal);
45
+ },
46
+ });
47
+
48
+ pi.registerCommand("voice-on", {
49
+ description: "Enable hands-free voice loop. Use --speak to read short assistant replies aloud.",
50
+ handler: async (args, ctx) => {
51
+ state.enabled = true;
52
+ if (args.includes("--speak")) state.autoSpeakAssistant = true;
53
+ if (args.includes("--no-speak")) state.autoSpeakAssistant = false;
54
+ state.autoListen = !args.includes("--manual");
55
+ installVoiceUi(ctx, state, createVoiceUiCallbacks(pi, services, state, ctx));
56
+ applyVoiceChrome(ctx, state);
57
+ ctx.ui.notify("Voice mode enabled. Press Q in the voice panel to close it.", "info");
58
+ if (!args.includes("--no-listen")) await listenAndSend(pi, services, ctx, parseSeconds(args));
59
+ },
60
+ });
61
+
62
+
63
+ pi.registerCommand("voice-status", {
64
+ description: "Show pi-listens Sarvam AI, recorder, player, and voice-mode status",
65
+ handler: async (_args, ctx) => {
66
+ const config = services.getConfig();
67
+ const audio = services.getAudio().describe();
68
+ ctx.ui.notify(
69
+ [
70
+ `Voice mode: ${state.enabled ? "on" : "off"}`,
71
+ `Auto-speak assistant: ${state.autoSpeakAssistant ? "on" : "off"}`,
72
+ `Auto-listen: ${state.autoListen ? "on" : "off"}`,
73
+ `Status: ${state.status}`,
74
+ `Sarvam API key: ${config.apiKey ? "set" : "missing"}`,
75
+ `Recorder: ${audio.recorder}`,
76
+ `Player: ${audio.player}`,
77
+ `STT: ${config.sttModel} (${config.translateInputToEnglish ? "translate→English" : config.sttMode}, ${config.sttLanguageCode})`,
78
+ `TTS: ${config.ttsModel} (${config.ttsLanguageCode}, speaker ${config.ttsSpeaker})`,
79
+ ].join("\n"),
80
+ config.apiKey && audio.recorder !== "missing" && audio.player !== "missing" ? "info" : "warning",
81
+ );
82
+ },
83
+ });
84
+ }
85
+
86
+ export async function maybeContinueVoiceLoop(pi: ExtensionAPI, services: VoiceToolServices, state: VoiceModeState, ctx: ExtensionContext) {
87
+ if (!state.enabled || state.isListening) return;
88
+ if (state.autoSpeakAssistant && state.lastAssistantText) {
89
+ const spoken = prepareSpokenText(state.lastAssistantText, services.getConfig().maxAutoSpeakChars);
90
+ if (spoken) {
91
+ try {
92
+ await speakText(services, spoken, ctx.signal, state, ctx);
93
+ } catch (err) {
94
+ if (isCancelled(err)) {
95
+ state.status = "idle";
96
+ state.lastError = undefined;
97
+ applyVoiceChrome(ctx, state);
98
+ return;
99
+ }
100
+ state.status = "error";
101
+ state.lastError = errorMessage(err);
102
+ applyVoiceChrome(ctx, state);
103
+ ctx.ui.notify(`pi-listens could not speak assistant response: ${errorMessage(err)}`, "warning");
104
+ }
105
+ }
106
+ }
107
+ if (!state.enabled || !state.autoListen) { state.status = "idle"; applyVoiceChrome(ctx, state); return; }
108
+ ctx.ui.notify("Listening for your next instruction…", "info");
109
+ await listenAndSend(pi, services, ctx, undefined, { followUpWhenBusy: true });
110
+ }
111
+
112
+ async function listenAndSend(
113
+ pi: ExtensionAPI,
114
+ services: VoiceToolServices,
115
+ ctx: ExtensionContext | ExtensionCommandContext,
116
+ seconds: number | undefined,
117
+ options: { followUpWhenBusy?: boolean } = {},
118
+ ) {
119
+ const state = getStateFromServices(services);
120
+ if (state.isListening) {
121
+ state.listenAbortController?.abort();
122
+ return;
123
+ }
124
+ state.recordSeconds = seconds ?? services.getConfig().recordSeconds;
125
+ state.silenceStopSeconds = services.getConfig().silenceStopSeconds;
126
+ state.isListening = true;
127
+ state.status = "listening";
128
+ state.lastError = undefined;
129
+ const listenAbortController = new AbortController();
130
+ state.listenAbortController = listenAbortController;
131
+ const listenSignal = combineSignals(ctx.signal, listenAbortController.signal);
132
+ applyVoiceChrome(ctx, state);
133
+ if (ctx.hasUI) ctx.ui.setStatus("pi-listens", "listening…");
134
+ let transcript = "";
135
+ try {
136
+ const result = await services.getSpeech().transcribeMicrophone(services.getAudio(), listenSignal.signal, {
137
+ seconds: seconds ?? services.getConfig().recordSeconds,
138
+ mode: services.getConfig().translateInputToEnglish ? "translate" : services.getConfig().sttMode,
139
+ });
140
+ transcript = result.transcript.trim();
141
+
142
+ if (!transcript && services.getConfig().textFallback && ctx.hasUI) {
143
+ const typed = await ctx.ui.input("I did not catch that. Type your message:", "Type a message for pi");
144
+ transcript = typed?.trim() ?? "";
145
+ }
146
+
147
+ if (!transcript) {
148
+ ctx.ui.notify("No speech recognized.", "warning");
149
+ return;
150
+ }
151
+
152
+ ctx.ui.notify(`Heard: ${conciseTranscript(transcript)}`, "info");
153
+ state.lastTranscript = transcript;
154
+ state.status = "agent";
155
+ applyVoiceChrome(ctx, state);
156
+ if (ctx.isIdle()) {
157
+ pi.sendUserMessage(transcript);
158
+ } else {
159
+ pi.sendUserMessage(transcript, { deliverAs: options.followUpWhenBusy ? "followUp" : "steer" });
160
+ }
161
+ } catch (err) {
162
+ if (isCancelled(err)) {
163
+ state.status = "idle";
164
+ state.lastError = undefined;
165
+ if (state.enabled) ctx.ui.notify("Listening cancelled.", "info");
166
+ } else {
167
+ state.status = "error"; state.lastError = errorMessage(err); ctx.ui.notify(`pi-listens failed: ${errorMessage(err)}`, "error");
168
+ }
169
+ } finally {
170
+ listenSignal.cleanup();
171
+ state.isListening = false;
172
+ if (state.listenAbortController === listenAbortController) state.listenAbortController = undefined;
173
+ if (state.status !== "agent" && state.status !== "error") state.status = "idle";
174
+ applyVoiceChrome(ctx, state);
175
+ }
176
+ }
177
+
178
+ async function speakText(services: VoiceToolServices, text: string, signal?: AbortSignal, state?: VoiceModeState, ctx?: ExtensionContext) {
179
+ const config = services.getConfig();
180
+ const speakAbortController = state ? new AbortController() : undefined;
181
+ const speakSignal = combineSignals(signal, speakAbortController?.signal);
182
+ let path: string | undefined;
183
+
184
+ if (state) {
185
+ state.speakAbortController?.abort();
186
+ state.speakAbortController = speakAbortController;
187
+ state.status = "speaking";
188
+ if (ctx) applyVoiceChrome(ctx, state);
189
+ }
190
+
191
+ try {
192
+ await mkdir(config.audioDir, { recursive: true });
193
+ path = join(config.audioDir, `pi-listens-command-${Date.now()}.${audioExtensionForCodec(config.ttsOutputCodec)}`);
194
+ const result = await services.getSpeech().synthesizeToFile(text, path, speakSignal.signal);
195
+ path = result.path;
196
+ await services.getAudio().play(result.path, speakSignal.signal);
197
+ } finally {
198
+ speakSignal.cleanup();
199
+ if (path) await services.getAudio().cleanup(path);
200
+ if (state && state.speakAbortController === speakAbortController) state.speakAbortController = undefined;
201
+ if (state && state.status === "speaking") {
202
+ state.status = "idle";
203
+ if (ctx) applyVoiceChrome(ctx, state);
204
+ }
205
+ }
206
+ }
207
+
208
+ function parseSeconds(args: string): number | undefined {
209
+ const match = args.match(/(?:^|\s)(\d{1,4})(?:\s|$)/);
210
+ if (!match) return undefined;
211
+ const parsed = Number.parseInt(match[1]!, 10);
212
+ return Number.isFinite(parsed) ? Math.max(1, Math.min(3600, parsed)) : undefined;
213
+ }
214
+
215
+ function errorMessage(err: unknown): string {
216
+ return err instanceof Error ? err.message : String(err);
217
+ }
218
+
219
+ function createVoiceUiCallbacks(pi: ExtensionAPI, services: VoiceToolServices, state: VoiceModeState, ctx: ExtensionContext | ExtensionCommandContext) {
220
+ return {
221
+ startListening: () => { void listenAndSend(pi, services, ctx, undefined); },
222
+ disable: () => {
223
+ stopVoiceMode(services, state, ctx);
224
+ },
225
+ toggleSpeak: () => { state.autoSpeakAssistant = !state.autoSpeakAssistant; applyVoiceChrome(ctx, state); },
226
+ toggleAutoListen: () => { state.autoListen = !state.autoListen; applyVoiceChrome(ctx, state); },
227
+ };
228
+ }
229
+
230
+ type CombinedSignal = { signal?: AbortSignal; cleanup: () => void };
231
+
232
+ function combineSignals(...signals: Array<AbortSignal | undefined>): CombinedSignal {
233
+ const active = signals.filter((signal): signal is AbortSignal => Boolean(signal));
234
+ if (active.length === 0) return { signal: undefined, cleanup: () => undefined };
235
+ if (active.length === 1) return { signal: active[0], cleanup: () => undefined };
236
+ const controller = new AbortController();
237
+ const attached: AbortSignal[] = [];
238
+ const abort = () => controller.abort();
239
+ for (const signal of active) {
240
+ if (signal.aborted) { controller.abort(); break; }
241
+ signal.addEventListener("abort", abort, { once: true });
242
+ attached.push(signal);
243
+ }
244
+ return {
245
+ signal: controller.signal,
246
+ cleanup: () => {
247
+ for (const signal of attached) signal.removeEventListener("abort", abort);
248
+ },
249
+ };
250
+ }
251
+
252
+ function isCancelled(err: unknown): boolean {
253
+ return err instanceof Error && /cancelled|aborted/i.test(err.message);
254
+ }
255
+
256
+ export function stopVoiceMode(services: VoiceToolServices, state: VoiceModeState, ctx?: ExtensionContext | ExtensionCommandContext) {
257
+ state.enabled = false;
258
+ state.autoListen = false;
259
+ state.isListening = false;
260
+ state.status = "idle";
261
+ state.lastError = undefined;
262
+
263
+ const listenAbortController = state.listenAbortController;
264
+ state.listenAbortController = undefined;
265
+ listenAbortController?.abort();
266
+
267
+ const speakAbortController = state.speakAbortController;
268
+ state.speakAbortController = undefined;
269
+ speakAbortController?.abort();
270
+
271
+ services.getAudio().stopAll();
272
+
273
+ if (ctx) uninstallVoiceUi(ctx, state);
274
+ }
275
+
276
+ const serviceState = new WeakMap<VoiceToolServices, VoiceModeState>();
277
+
278
+ export function attachStateToServices(services: VoiceToolServices, state: VoiceModeState) {
279
+ serviceState.set(services, state);
280
+ }
281
+
282
+ function getStateFromServices(services: VoiceToolServices): VoiceModeState {
283
+ const state = serviceState.get(services);
284
+ if (!state) throw new Error("voice mode state not attached");
285
+ return state;
286
+ }