@p8n.ai/pi-listens 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -1
- package/README.md +1 -1
- package/package.json +1 -1
- package/src/audio.ts +94 -5
- package/src/commands.ts +21 -8
- package/src/sarvam.ts +26 -0
- package/src/tools.ts +27 -15
- package/src/voice-ui.ts +2 -2
package/CHANGELOG.md
CHANGED
|
@@ -6,6 +6,14 @@ This project follows [Semantic Versioning](https://semver.org/).
|
|
|
6
6
|
|
|
7
7
|
## [Unreleased]
|
|
8
8
|
|
|
9
|
+
## [0.1.2] - 2026-05-09
|
|
10
|
+
|
|
11
|
+
### Changed
|
|
12
|
+
|
|
13
|
+
- Stream TTS audio directly to the local player so speech starts sooner.
|
|
14
|
+
- Make `voice_output` non-blocking by default; pass `wait_for_playback: true` to wait.
|
|
15
|
+
- Replace the `R` voice-panel shortcut with Space for easier listen/stop control.
|
|
16
|
+
|
|
9
17
|
## [0.1.1] - 2026-05-09
|
|
10
18
|
|
|
11
19
|
### Fixed
|
|
@@ -31,6 +39,7 @@ This project follows [Semantic Versioning](https://semver.org/).
|
|
|
31
39
|
- Stop active audio capture/playback subprocesses when voice mode is closed or the Pi session shuts down.
|
|
32
40
|
- Clean up generated audio files when spoken playback is interrupted.
|
|
33
41
|
|
|
34
|
-
[Unreleased]: https://github.com/p8n-ai/pi-listens/compare/v0.1.
|
|
42
|
+
[Unreleased]: https://github.com/p8n-ai/pi-listens/compare/v0.1.2...HEAD
|
|
35
43
|
[0.1.0]: https://github.com/p8n-ai/pi-listens/releases/tag/v0.1.0
|
|
36
44
|
[0.1.1]: https://github.com/p8n-ai/pi-listens/releases/tag/v0.1.1
|
|
45
|
+
[0.1.2]: https://github.com/p8n-ai/pi-listens/releases/tag/v0.1.2
|
package/README.md
CHANGED
|
@@ -95,7 +95,7 @@ The extension also injects voice guidance into the system prompt:
|
|
|
95
95
|
| `/voice-status` | Show setup and voice-mode status. |
|
|
96
96
|
|
|
97
97
|
Voice panel controls in interactive mode:
|
|
98
|
-
-
|
|
98
|
+
- Space: listen now; press again while listening to stop listening; if Pi is speaking, Space stops playback before listening
|
|
99
99
|
- A: auto-listen on/off (listen again after each assistant reply)
|
|
100
100
|
- S: read aloud on/off (speak assistant replies)
|
|
101
101
|
- Q: close the panel and stop any active listening or speaking
|
package/package.json
CHANGED
package/src/audio.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { mkdir, rm } from "node:fs/promises";
|
|
2
2
|
import { randomUUID } from "node:crypto";
|
|
3
3
|
import { join } from "node:path";
|
|
4
|
-
import { spawn } from "node:child_process";
|
|
4
|
+
import { spawn, type StdioOptions } from "node:child_process";
|
|
5
5
|
import { accessSync, constants } from "node:fs";
|
|
6
6
|
import { once } from "node:events";
|
|
7
7
|
import type { PiListensConfig } from "./config.js";
|
|
@@ -10,15 +10,17 @@ export interface AudioRuntime {
|
|
|
10
10
|
record(seconds?: number, signal?: AbortSignal): Promise<string>;
|
|
11
11
|
streamPcm(signal?: AbortSignal): AsyncIterable<Buffer>;
|
|
12
12
|
play(path: string, signal?: AbortSignal): Promise<void>;
|
|
13
|
+
playStream(stream: ReadableStream<Uint8Array>, signal?: AbortSignal): Promise<void>;
|
|
13
14
|
cleanup(path: string): Promise<void>;
|
|
14
15
|
stopPlayback(): void;
|
|
15
16
|
stopAll(): void;
|
|
16
|
-
describe(): { recorder: string; player: string };
|
|
17
|
+
describe(): { recorder: string; player: string; streamingPlayer: string };
|
|
17
18
|
}
|
|
18
19
|
|
|
19
20
|
export function createAudioRuntime(config: PiListensConfig): AudioRuntime {
|
|
20
21
|
const recorder = config.recordCommand ? "custom" : detectRecorder();
|
|
21
22
|
const player = config.playCommand ? "custom" : detectPlayer();
|
|
23
|
+
const streamingPlayer = detectStreamingPlayer();
|
|
22
24
|
|
|
23
25
|
return {
|
|
24
26
|
async record(seconds = config.recordSeconds, signal?: AbortSignal): Promise<string> {
|
|
@@ -68,6 +70,16 @@ export function createAudioRuntime(config: PiListensConfig): AudioRuntime {
|
|
|
68
70
|
await run(command.command, command.args, signal, { kind: "play" });
|
|
69
71
|
},
|
|
70
72
|
|
|
73
|
+
async playStream(stream: ReadableStream<Uint8Array>, signal?: AbortSignal): Promise<void> {
|
|
74
|
+
if (!streamingPlayer) {
|
|
75
|
+
throw new Error(
|
|
76
|
+
"No streaming audio player found. Install ffplay or sox (`play`) for low-latency TTS playback, or use file playback fallback.",
|
|
77
|
+
);
|
|
78
|
+
}
|
|
79
|
+
const command = streamingPlayerCommand(streamingPlayer, config.ttsOutputCodec, config.ttsSampleRate);
|
|
80
|
+
await pipeStreamToCommand(stream, command.command, command.args, signal);
|
|
81
|
+
},
|
|
82
|
+
|
|
71
83
|
async cleanup(path: string): Promise<void> {
|
|
72
84
|
if (!config.deleteAudio) return;
|
|
73
85
|
await rm(path, { force: true }).catch(() => undefined);
|
|
@@ -82,7 +94,7 @@ export function createAudioRuntime(config: PiListensConfig): AudioRuntime {
|
|
|
82
94
|
},
|
|
83
95
|
|
|
84
96
|
describe() {
|
|
85
|
-
return { recorder: recorder ?? "missing", player: player ?? "missing" };
|
|
97
|
+
return { recorder: recorder ?? "missing", player: player ?? "missing", streamingPlayer: streamingPlayer ?? "missing" };
|
|
86
98
|
},
|
|
87
99
|
};
|
|
88
100
|
}
|
|
@@ -205,6 +217,13 @@ function detectPlayer(): string | null {
|
|
|
205
217
|
return null;
|
|
206
218
|
}
|
|
207
219
|
|
|
220
|
+
function detectStreamingPlayer(): string | null {
|
|
221
|
+
if (isCommandAvailable("ffplay")) return "ffplay";
|
|
222
|
+
if (isCommandAvailable("play")) return "play";
|
|
223
|
+
if (isCommandAvailable("aplay")) return "aplay";
|
|
224
|
+
return null;
|
|
225
|
+
}
|
|
226
|
+
|
|
208
227
|
function isCommandAvailable(command: string): boolean {
|
|
209
228
|
const paths = (process.env.PATH ?? "").split(":").filter(Boolean);
|
|
210
229
|
for (const dir of paths) {
|
|
@@ -303,6 +322,76 @@ async function* streamCommandOutput(command: string, args: string[], signal?: Ab
|
|
|
303
322
|
}
|
|
304
323
|
}
|
|
305
324
|
|
|
325
|
+
async function pipeStreamToCommand(stream: ReadableStream<Uint8Array>, command: string, args: string[], signal?: AbortSignal): Promise<void> {
|
|
326
|
+
if (signal?.aborted) throw new Error("Cancelled");
|
|
327
|
+
const child = spawnManaged(command, args, "play", ["pipe", "pipe", "pipe"]);
|
|
328
|
+
let stderr = "";
|
|
329
|
+
let stdout = "";
|
|
330
|
+
let exitCode: number | null = null;
|
|
331
|
+
let exitSignal: NodeJS.Signals | null = null;
|
|
332
|
+
let spawnError: Error | undefined;
|
|
333
|
+
|
|
334
|
+
const stop = () => terminateChild(child);
|
|
335
|
+
signal?.addEventListener("abort", stop, { once: true });
|
|
336
|
+
child.stdout?.on("data", (chunk) => { stdout += chunk.toString(); });
|
|
337
|
+
child.stderr?.on("data", (chunk) => { stderr += chunk.toString(); });
|
|
338
|
+
child.on("error", (err) => { spawnError = err; });
|
|
339
|
+
child.on("close", (code, termSignal) => { exitCode = code; exitSignal = termSignal; });
|
|
340
|
+
|
|
341
|
+
try {
|
|
342
|
+
if (!child.stdin) throw new Error(`${command} did not provide stdin for streaming audio playback`);
|
|
343
|
+
const stdin = child.stdin;
|
|
344
|
+
const reader = stream.getReader();
|
|
345
|
+
try {
|
|
346
|
+
while (true) {
|
|
347
|
+
if (signal?.aborted) throw new Error("Cancelled");
|
|
348
|
+
if (spawnError) throw spawnError;
|
|
349
|
+
const { done, value } = await reader.read();
|
|
350
|
+
if (done) break;
|
|
351
|
+
if (!value?.byteLength) continue;
|
|
352
|
+
if (!stdin.write(Buffer.from(value))) await once(stdin, "drain");
|
|
353
|
+
}
|
|
354
|
+
} finally {
|
|
355
|
+
reader.releaseLock();
|
|
356
|
+
}
|
|
357
|
+
stdin.end();
|
|
358
|
+
if (exitCode === null && !spawnError) await once(child, "close");
|
|
359
|
+
if (signal?.aborted) throw new Error("Cancelled");
|
|
360
|
+
if (spawnError) throw spawnError;
|
|
361
|
+
if (exitCode !== 0) {
|
|
362
|
+
const output = [stderr.trim(), stdout.trim()].filter(Boolean).join("\n");
|
|
363
|
+
throw new Error(`${command} failed${exitSignal ? ` (${exitSignal})` : ""}${exitCode === null ? "" : ` with exit code ${exitCode}`}${output ? `: ${output}` : ""}`);
|
|
364
|
+
}
|
|
365
|
+
} finally {
|
|
366
|
+
signal?.removeEventListener("abort", stop);
|
|
367
|
+
if (!child.killed && exitCode === null) stop();
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
function streamingPlayerCommand(player: string, codec: PiListensConfig["ttsOutputCodec"], sampleRate: number): CommandSpec {
|
|
372
|
+
if (player === "ffplay") {
|
|
373
|
+
const args = ["-nodisp", "-autoexit", "-loglevel", "error"];
|
|
374
|
+
if (codec === "linear16") args.push("-f", "s16le", "-ar", String(sampleRate), "-ac", "1");
|
|
375
|
+
if (codec === "mulaw") args.push("-f", "mulaw", "-ar", String(sampleRate), "-ac", "1");
|
|
376
|
+
if (codec === "alaw") args.push("-f", "alaw", "-ar", String(sampleRate), "-ac", "1");
|
|
377
|
+
args.push("-i", "pipe:0");
|
|
378
|
+
return { command: "ffplay", args };
|
|
379
|
+
}
|
|
380
|
+
if (player === "play") {
|
|
381
|
+
if (codec === "linear16") return { command: "play", args: ["-q", "-r", String(sampleRate), "-c", "1", "-b", "16", "-e", "signed-integer", "-t", "raw", "-"] };
|
|
382
|
+
if (codec === "mulaw" || codec === "alaw") return { command: "play", args: ["-q", "-r", String(sampleRate), "-c", "1", "-t", codec, "-"] };
|
|
383
|
+
return { command: "play", args: ["-q", "-t", soxTypeForCodec(codec), "-"] };
|
|
384
|
+
}
|
|
385
|
+
if (player === "aplay" && codec === "wav") return { command: "aplay", args: ["-q", "-"] };
|
|
386
|
+
throw new Error(`Unsupported streaming player ${player} for codec ${codec}`);
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
function soxTypeForCodec(codec: PiListensConfig["ttsOutputCodec"]): string {
|
|
390
|
+
if (codec === "aac") return "adts";
|
|
391
|
+
if (codec === "linear16") return "raw";
|
|
392
|
+
return codec;
|
|
393
|
+
}
|
|
394
|
+
|
|
306
395
|
type AudioProcessKind = "record" | "play" | "other";
|
|
307
396
|
|
|
308
397
|
type ManagedChild = ReturnType<typeof spawn>;
|
|
@@ -318,10 +407,10 @@ export function stopActiveAudioProcesses(options: { kind?: AudioProcessKind; for
|
|
|
318
407
|
}
|
|
319
408
|
}
|
|
320
409
|
|
|
321
|
-
function spawnManaged(command: string, args: string[], kind: AudioProcessKind): ManagedChild {
|
|
410
|
+
function spawnManaged(command: string, args: string[], kind: AudioProcessKind, stdio: StdioOptions = ["ignore", "pipe", "pipe"]): ManagedChild {
|
|
322
411
|
installProcessExitCleanup();
|
|
323
412
|
const child = spawn(command, args, {
|
|
324
|
-
stdio
|
|
413
|
+
stdio,
|
|
325
414
|
detached: process.platform !== "win32",
|
|
326
415
|
});
|
|
327
416
|
activeChildren.add(child);
|
package/src/commands.ts
CHANGED
|
@@ -74,6 +74,7 @@ export function registerVoiceCommands(pi: ExtensionAPI, services: VoiceToolServi
|
|
|
74
74
|
`Sarvam API key: ${config.apiKey ? "set" : "missing"}`,
|
|
75
75
|
`Recorder: ${audio.recorder}`,
|
|
76
76
|
`Player: ${audio.player}`,
|
|
77
|
+
`Streaming player: ${audio.streamingPlayer}`,
|
|
77
78
|
`STT: ${config.sttModel} (${config.translateInputToEnglish ? "translate→English" : config.sttMode}, ${config.sttLanguageCode})`,
|
|
78
79
|
`TTS: ${config.ttsModel} (${config.ttsLanguageCode}, speaker ${config.ttsSpeaker})`,
|
|
79
80
|
].join("\n"),
|
|
@@ -177,10 +178,8 @@ async function listenAndSend(
|
|
|
177
178
|
}
|
|
178
179
|
|
|
179
180
|
async function speakText(services: VoiceToolServices, text: string, signal?: AbortSignal, state?: VoiceModeState, ctx?: ExtensionContext) {
|
|
180
|
-
const config = services.getConfig();
|
|
181
181
|
const speakAbortController = state ? new AbortController() : undefined;
|
|
182
182
|
const speakSignal = combineSignals(signal, speakAbortController?.signal);
|
|
183
|
-
let path: string | undefined;
|
|
184
183
|
|
|
185
184
|
if (state) {
|
|
186
185
|
state.speakAbortController?.abort();
|
|
@@ -190,14 +189,9 @@ async function speakText(services: VoiceToolServices, text: string, signal?: Abo
|
|
|
190
189
|
}
|
|
191
190
|
|
|
192
191
|
try {
|
|
193
|
-
await
|
|
194
|
-
path = join(config.audioDir, `pi-listens-command-${Date.now()}.${audioExtensionForCodec(config.ttsOutputCodec)}`);
|
|
195
|
-
const result = await services.getSpeech().synthesizeToFile(text, path, speakSignal.signal);
|
|
196
|
-
path = result.path;
|
|
197
|
-
await services.getAudio().play(result.path, speakSignal.signal);
|
|
192
|
+
await playSpeechBest(services, text, speakSignal.signal);
|
|
198
193
|
} finally {
|
|
199
194
|
speakSignal.cleanup();
|
|
200
|
-
if (path) await services.getAudio().cleanup(path);
|
|
201
195
|
if (state && state.speakAbortController === speakAbortController) state.speakAbortController = undefined;
|
|
202
196
|
if (state && state.status === "speaking") {
|
|
203
197
|
state.status = "idle";
|
|
@@ -206,6 +200,25 @@ async function speakText(services: VoiceToolServices, text: string, signal?: Abo
|
|
|
206
200
|
}
|
|
207
201
|
}
|
|
208
202
|
|
|
203
|
+
async function playSpeechBest(services: VoiceToolServices, text: string, signal?: AbortSignal) {
|
|
204
|
+
const audio = services.getAudio();
|
|
205
|
+
if (audio.describe().streamingPlayer !== "missing") {
|
|
206
|
+
const result = await services.getSpeech().synthesizeStream(text, signal);
|
|
207
|
+
await audio.playStream(result.stream, signal);
|
|
208
|
+
return;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
const config = services.getConfig();
|
|
212
|
+
await mkdir(config.audioDir, { recursive: true });
|
|
213
|
+
const path = join(config.audioDir, `pi-listens-command-${Date.now()}.${audioExtensionForCodec(config.ttsOutputCodec)}`);
|
|
214
|
+
try {
|
|
215
|
+
const result = await services.getSpeech().synthesizeToFile(text, path, signal);
|
|
216
|
+
await audio.play(result.path, signal);
|
|
217
|
+
} finally {
|
|
218
|
+
await audio.cleanup(path);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
209
222
|
function parseSeconds(args: string): number | undefined {
|
|
210
223
|
const match = args.match(/(?:^|\s)(\d{1,4})(?:\s|$)/);
|
|
211
224
|
if (!match) return undefined;
|
package/src/sarvam.ts
CHANGED
|
@@ -15,6 +15,10 @@ export interface SynthesisResult {
|
|
|
15
15
|
bytes: number;
|
|
16
16
|
}
|
|
17
17
|
|
|
18
|
+
export interface SynthesisStreamResult {
|
|
19
|
+
stream: ReadableStream<Uint8Array>;
|
|
20
|
+
}
|
|
21
|
+
|
|
18
22
|
type StreamingData = {
|
|
19
23
|
transcript?: string;
|
|
20
24
|
request_id?: string;
|
|
@@ -127,6 +131,28 @@ export class SarvamSpeechClient {
|
|
|
127
131
|
return { path, bytes: buffer.byteLength };
|
|
128
132
|
}
|
|
129
133
|
|
|
134
|
+
async synthesizeStream(text: string, signal?: AbortSignal): Promise<SynthesisStreamResult> {
|
|
135
|
+
const config = this.getConfig();
|
|
136
|
+
const client = this.getClient(config);
|
|
137
|
+
const response = await client.textToSpeech.convertStream(
|
|
138
|
+
{
|
|
139
|
+
text,
|
|
140
|
+
target_language_code: config.ttsLanguageCode as never,
|
|
141
|
+
speaker: config.ttsSpeaker as never,
|
|
142
|
+
model: config.ttsModel as never,
|
|
143
|
+
pace: config.ttsPace,
|
|
144
|
+
temperature: config.ttsTemperature,
|
|
145
|
+
speech_sample_rate: config.ttsSampleRate as never,
|
|
146
|
+
enable_preprocessing: true,
|
|
147
|
+
output_audio_codec: config.ttsOutputCodec as never,
|
|
148
|
+
},
|
|
149
|
+
{ abortSignal: signal },
|
|
150
|
+
);
|
|
151
|
+
const stream = response.stream();
|
|
152
|
+
if (!stream) throw new Error("Sarvam TTS response did not include a readable audio stream");
|
|
153
|
+
return { stream };
|
|
154
|
+
}
|
|
155
|
+
|
|
130
156
|
private async withStreamingSocket(
|
|
131
157
|
signal: AbortSignal | undefined,
|
|
132
158
|
mode: SttMode | undefined,
|
package/src/tools.ts
CHANGED
|
@@ -17,7 +17,7 @@ export interface VoiceToolServices {
|
|
|
17
17
|
|
|
18
18
|
const VoiceOutputParams = Type.Object({
|
|
19
19
|
text: Type.String({ description: "Short text to speak to the user. Keep it concise; do not speak code blocks or long logs." }),
|
|
20
|
-
wait_for_playback: Type.Optional(Type.Boolean({ description: "Wait until audio playback completes before returning. Default
|
|
20
|
+
wait_for_playback: Type.Optional(Type.Boolean({ description: "Wait until audio playback completes before returning. Default false." })),
|
|
21
21
|
});
|
|
22
22
|
|
|
23
23
|
const VoiceInputParams = Type.Object({
|
|
@@ -54,21 +54,20 @@ export function registerVoiceTools(pi: ExtensionAPI, services: VoiceToolServices
|
|
|
54
54
|
],
|
|
55
55
|
parameters: VoiceOutputParams,
|
|
56
56
|
async execute(_toolCallId, params: VoiceOutputInput, signal, onUpdate) {
|
|
57
|
-
onUpdate?.({ content: [{ type: "text", text: "
|
|
58
|
-
const
|
|
59
|
-
|
|
60
|
-
if (params.wait_for_playback === false) {
|
|
57
|
+
onUpdate?.({ content: [{ type: "text", text: "Starting streamed speech with Sarvam AI…" }], details: {} });
|
|
58
|
+
const playback = playSpeechBest(params.text, services, signal);
|
|
59
|
+
if (params.wait_for_playback !== true) {
|
|
61
60
|
void playback.catch(() => undefined);
|
|
62
61
|
return {
|
|
63
62
|
content: [{ type: "text", text: `Started speaking to user: ${params.text}` }],
|
|
64
|
-
details: {
|
|
63
|
+
details: { played: "started", text: params.text },
|
|
65
64
|
};
|
|
66
65
|
}
|
|
67
66
|
onUpdate?.({ content: [{ type: "text", text: "Playing audio…" }], details: {} });
|
|
68
|
-
await playback;
|
|
67
|
+
const details = await playback;
|
|
69
68
|
return {
|
|
70
69
|
content: [{ type: "text", text: `Spoke to user: ${params.text}` }],
|
|
71
|
-
details: { ...
|
|
70
|
+
details: { ...details, played: true, text: params.text },
|
|
72
71
|
};
|
|
73
72
|
},
|
|
74
73
|
renderCall(args: VoiceOutputInput, theme) {
|
|
@@ -117,12 +116,7 @@ export function registerVoiceTools(pi: ExtensionAPI, services: VoiceToolServices
|
|
|
117
116
|
parameters: VoiceAskParams,
|
|
118
117
|
async execute(_toolCallId, params: VoiceAskInput, signal, onUpdate, ctx) {
|
|
119
118
|
onUpdate?.({ content: [{ type: "text", text: "Speaking question…" }], details: {} });
|
|
120
|
-
|
|
121
|
-
try {
|
|
122
|
-
await services.getAudio().play(spoken.path, signal);
|
|
123
|
-
} finally {
|
|
124
|
-
await services.getAudio().cleanup(spoken.path);
|
|
125
|
-
}
|
|
119
|
+
await playSpeechBest(params.question, services, signal);
|
|
126
120
|
const answer = await listenAndMaybeFallback(
|
|
127
121
|
params,
|
|
128
122
|
services,
|
|
@@ -173,6 +167,7 @@ export function registerVoiceTools(pi: ExtensionAPI, services: VoiceToolServices
|
|
|
173
167
|
`Sarvam API key: ${config.apiKey ? "set" : "missing"}`,
|
|
174
168
|
`Recorder: ${audio.recorder}`,
|
|
175
169
|
`Player: ${audio.player}`,
|
|
170
|
+
`Streaming player: ${audio.streamingPlayer}`,
|
|
176
171
|
`STT: ${config.sttModel} (${config.translateInputToEnglish ? "translate→English" : config.sttMode}, ${config.sttLanguageCode})`,
|
|
177
172
|
`TTS: ${config.ttsModel} (${config.ttsLanguageCode}, speaker ${config.ttsSpeaker})`,
|
|
178
173
|
].join("\n"),
|
|
@@ -184,7 +179,24 @@ export function registerVoiceTools(pi: ExtensionAPI, services: VoiceToolServices
|
|
|
184
179
|
});
|
|
185
180
|
}
|
|
186
181
|
|
|
187
|
-
async function
|
|
182
|
+
async function playSpeechBest(text: string, services: VoiceToolServices, signal?: AbortSignal): Promise<Record<string, unknown>> {
|
|
183
|
+
const audio = services.getAudio();
|
|
184
|
+
if (audio.describe().streamingPlayer !== "missing") {
|
|
185
|
+
const result = await services.getSpeech().synthesizeStream(text, signal);
|
|
186
|
+
await audio.playStream(result.stream, signal);
|
|
187
|
+
return { playback: "stream" };
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
const result = await speakToFile(text, services, signal);
|
|
191
|
+
try {
|
|
192
|
+
await audio.play(result.path, signal);
|
|
193
|
+
return { ...result, playback: "file" };
|
|
194
|
+
} finally {
|
|
195
|
+
await audio.cleanup(result.path);
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
async function speakToFile(text: string, services: VoiceToolServices, signal?: AbortSignal) {
|
|
188
200
|
const config = services.getConfig();
|
|
189
201
|
await mkdir(config.audioDir, { recursive: true });
|
|
190
202
|
const path = join(config.audioDir, `pi-listens-output-${Date.now()}-${randomUUID()}.${audioExtensionForCodec(config.ttsOutputCodec)}`);
|
package/src/voice-ui.ts
CHANGED
|
@@ -88,7 +88,7 @@ class VoiceLoopEditor extends CustomEditor {
|
|
|
88
88
|
if (mouse.pressed && mouse.button === 0) this.triggerMouseOrbClick(mouse);
|
|
89
89
|
return;
|
|
90
90
|
}
|
|
91
|
-
if (data
|
|
91
|
+
if (data === " ") {
|
|
92
92
|
this.triggerOrbClick(1);
|
|
93
93
|
this.callbacks.startListening();
|
|
94
94
|
return;
|
|
@@ -309,7 +309,7 @@ function frameIntervalForStatus(status: VoiceModeState["status"]): number {
|
|
|
309
309
|
function controlRail(state: VoiceModeState, palette: OrbPalette, width: number): string[] {
|
|
310
310
|
const listenLabel = state.isListening ? "stop" : "listen";
|
|
311
311
|
const pills = [
|
|
312
|
-
controlPill("
|
|
312
|
+
controlPill("Space", listenLabel, state.isListening ? "active" : "primary", palette),
|
|
313
313
|
controlPill("A", state.autoListen ? "auto-listen on" : "auto-listen off", state.autoListen ? "active" : "muted", palette),
|
|
314
314
|
controlPill("S", state.autoSpeakAssistant ? "read aloud on" : "read aloud off", state.autoSpeakAssistant ? "active" : "muted", palette),
|
|
315
315
|
controlPill("Q", "close", "danger", palette),
|