pi-friday 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/voice.ts ADDED
@@ -0,0 +1,400 @@
1
+ /**
2
+ * Friday Extension - Voice/TTS Module
3
+ * Everything TTS: speakText, voice queue, voice helpers, and daemon interaction
4
+ */
5
+
6
+ import { spawn, type ChildProcess } from "node:child_process";
7
+ import { promisify } from "node:util";
8
+ import { exec as execCb } from "node:child_process";
9
+ const execAsync = promisify(execCb);
10
+ import { join } from "node:path";
11
+ import { writeFileSync, existsSync, rmSync } from "node:fs";
12
+ import type { FridaySettings } from "./settings.js";
13
+
14
+ export function getVoiceModelPath(settings: FridaySettings): string {
15
+ return join(
16
+ process.env.HOME ?? "~",
17
+ ".local/share/piper-voices",
18
+ settings.voice.model + ".onnx",
19
+ );
20
+ }
21
+
22
+ export let currentPiper: ChildProcess | null = null;
23
+ export let currentPlayer: ChildProcess | null = null;
24
+ export let voiceForceKilled = false;
25
+ export const voiceQueue: { text: string; speed?: number }[] = [];
26
+ export let voicePlaying = false;
27
+
28
+ // Resolve function for the active speakText promise — lets killCurrentVoice
29
+ // instantly resolve it instead of waiting for the 30s safety timer
30
+ let activeSpeakResolve: (() => void) | null = null;
31
+
32
+ /** Kill any currently playing TTS immediately */
33
+ export function killCurrentVoice() {
34
+ try {
35
+ voiceQueue.length = 0;
36
+ // Only set voiceForceKilled if there are actual processes to kill
37
+ // Otherwise the stale flag poisons the next speakText cleanup
38
+ if (currentPlayer || currentPiper) {
39
+ voiceForceKilled = true;
40
+ // Destroy stdio streams first so they don't hold the event loop
41
+ try { currentPiper?.stdout?.destroy(); } catch {}
42
+ try { currentPiper?.stdin?.destroy(); } catch {}
43
+ try { currentPlayer?.stdin?.destroy(); } catch {}
44
+ // SIGKILL for instant death — SIGTERM can leave processes lingering
45
+ try { currentPlayer?.kill("SIGKILL"); } catch {}
46
+ try { currentPiper?.kill("SIGKILL"); } catch {}
47
+ }
48
+ currentPlayer = null;
49
+ currentPiper = null;
50
+ voicePlaying = false;
51
+ // Immediately resolve any pending speakText promise so nothing hangs
52
+ if (activeSpeakResolve) {
53
+ const r = activeSpeakResolve;
54
+ activeSpeakResolve = null;
55
+ r();
56
+ }
57
+ } catch (e) {
58
+ logError("killCurrentVoice", e);
59
+ }
60
+ }
61
+
62
+ /** Kill any orphaned piper/play processes from crashed sessions */
63
+ export async function killOrphanTTS() {
64
+ try {
65
+ const { stdout } = await execAsync(
66
+ "ps aux | grep -E 'piper.*jenny|play.*raw.*22050' | grep -v grep",
67
+ { encoding: "utf8", timeout: 5000 },
68
+ );
69
+ const result = stdout.trim();
70
+ if (!result) return;
71
+ for (const line of result.split("\n")) {
72
+ const parts = line.trim().split(/\s+/);
73
+ const pid = parseInt(parts[1]!, 10);
74
+ if (!pid || isNaN(pid)) continue;
75
+ try {
76
+ process.kill(pid, "SIGTERM");
77
+ log(`Killed orphan TTS process (PID ${pid})`);
78
+ } catch {}
79
+ }
80
+ } catch { /* no orphans */ }
81
+ }
82
+
83
+ export function muteDaemon(commsDir: string) {
84
+ const muteFile = join(commsDir, "tts_playing");
85
+ try { writeFileSync(muteFile, String(Date.now())); } catch {}
86
+ }
87
+
88
+ export function tryUnmuteDaemon(commsDir: string) {
89
+ const muteFile = join(commsDir, "tts_playing");
90
+ try { if (existsSync(muteFile)) rmSync(muteFile, { force: true }); } catch {}
91
+ }
92
+
93
+ export function triggerDaemonListen(
94
+ commsDir: string,
95
+ wakeDaemon: ChildProcess | null,
96
+ waitForSpeechSec = 5,
97
+ maxRecordSec = 10
98
+ ) {
99
+ try {
100
+ if (!wakeDaemon) return;
101
+ const listenNowFile = join(commsDir, "listen_now");
102
+ const payload = JSON.stringify({
103
+ timestamp: Date.now(),
104
+ waitForSpeech: waitForSpeechSec,
105
+ maxRecord: maxRecordSec,
106
+ });
107
+ writeFileSync(listenNowFile, payload);
108
+ } catch (e) {
109
+ logError("triggerDaemonListen", e);
110
+ }
111
+ }
112
+
113
+ export function estimateReadingTimeSec(text: string): number {
114
+ try { return text.trim().split(/\s+/).length / 3.5; } catch { return 2; }
115
+ }
116
+
117
+ export function isQuestion(text: string): boolean {
118
+ try {
119
+ const trimmed = text.trim();
120
+ if (trimmed.endsWith("?")) return true;
121
+ const questionPatterns = /\b(what do you think|which do you prefer|want me to|should I|shall I|sound good|ready to|let me know|your call|up to you)\b/i;
122
+ return questionPatterns.test(trimmed);
123
+ } catch { return false; }
124
+ }
125
+
126
+ export function estimateQuestionTiming(question: string): { waitSec: number; maxRecordSec: number } {
127
+ try {
128
+ const q = question.trim().toLowerCase();
129
+ const yesNo = /\b(is it|are you|do you|did you|will you|can you|have you|was it|were you|is that|does it|right\??|correct\??|ready\??|sure\??)\b/i;
130
+ const choiceAB = /\b(or )\b/i;
131
+ const oneWord = /\b(what color|what colour|what is the capital|what year|how many|how old|what number|which one|what day|what month)\b/i;
132
+
133
+ if (yesNo.test(q)) return { waitSec: 4, maxRecordSec: 5 };
134
+ if (choiceAB.test(q) || oneWord.test(q)) return { waitSec: 5, maxRecordSec: 6 };
135
+
136
+ const shortFactual = /^(what|who|where|when|which)\b/i;
137
+ if (shortFactual.test(q)) return { waitSec: 5, maxRecordSec: 8 };
138
+
139
+ const openEnded = /^(how|why|explain|describe|tell me about|what do you think)\b/i;
140
+ if (openEnded.test(q)) return { waitSec: 6, maxRecordSec: 15 };
141
+ } catch {}
142
+ return { waitSec: 5, maxRecordSec: 10 };
143
+ }
144
+
145
+ let voiceMessageCount = 0;
146
+
147
+ export function deriveVoiceText(message: string, voiceSummary?: string): string {
148
+ voiceMessageCount++;
149
+ if (voiceSummary) return voiceSummary;
150
+
151
+ const plain = message.replace(/[\n\r]+/g, " ").trim();
152
+ if (plain.length <= 200) return plain;
153
+
154
+ const sentences = plain.match(/[^.!?]+[.!?]+/g) ?? [plain];
155
+ let spoken = "";
156
+ for (const s of sentences) {
157
+ if (spoken.length + s.length > 200 && spoken.length > 0) break;
158
+ spoken += s;
159
+ }
160
+
161
+ const PANEL_PHRASES = [
162
+ `Full details in the panel.`,
163
+ `More in the panel if you need it.`,
164
+ `Rest is on screen.`,
165
+ `Details on your screen.`,
166
+ ];
167
+
168
+ const nudge = voiceMessageCount === 1 ?
169
+ ` ${PANEL_PHRASES[Math.floor(Math.random() * PANEL_PHRASES.length)]!}` : "";
170
+ return `${spoken.trim()}${nudge}`;
171
+ }
172
+
173
+ /** Speaks text via piper TTS. Fully hardened — never throws, always resolves. */
174
+ export function speakText(
175
+ text: string,
176
+ settings: FridaySettings,
177
+ commsDir: string,
178
+ wakeDaemon: ChildProcess | null,
179
+ lastFullMessageText: string,
180
+ lastSpokenText: string,
181
+ lastMessageWasQuestion: boolean,
182
+ log: (msg: string) => void,
183
+ logError: (context: string, err: unknown) => void,
184
+ onPlaybackStart?: () => void,
185
+ speed?: number
186
+ ): Promise<void> {
187
+ return new Promise((resolve) => {
188
+ try {
189
+ // Kill any active playback first — never two voices at once
190
+ if (currentPlayer || currentPiper) {
191
+ try { currentPlayer?.kill(); } catch {}
192
+ try { currentPiper?.kill(); } catch {}
193
+ currentPlayer = null;
194
+ currentPiper = null;
195
+ }
196
+
197
+ // Register resolve so killCurrentVoice can settle this promise instantly
198
+ activeSpeakResolve = resolve;
199
+
200
+ const modelPath = getVoiceModelPath(settings);
201
+ const effectiveSpeed = speed ?? 1.0;
202
+ const lengthScale = String(1.0 / effectiveSpeed);
203
+
204
+ muteDaemon(commsDir);
205
+
206
+ const piper = spawn(
207
+ "piper",
208
+ ["--model", modelPath, "--output-raw", "--length-scale", lengthScale],
209
+ { stdio: ["pipe", "pipe", "ignore"] },
210
+ );
211
+
212
+ const player = spawn(
213
+ "play",
214
+ ["-q", "-t", "raw", "-r", "22050", "-e", "signed", "-b", "16", "-c", "1", "-"],
215
+ { stdio: ["pipe", "ignore", "ignore"] },
216
+ );
217
+
218
+ // Unref so these processes don't keep the event loop alive on shutdown
219
+ piper.unref();
220
+ player.unref();
221
+
222
+ currentPiper = piper;
223
+ currentPlayer = player;
224
+
225
+ // CRITICAL: handle 'error' events on stdin streams to prevent
226
+ // unhandled EPIPE crashes when processes die mid-stream
227
+ piper.stdin.on("error", (e) => logError("piper.stdin", e));
228
+ player.stdin.on("error", (e) => logError("player.stdin", e));
229
+
230
+ let started = false;
231
+ let resolved = false;
232
+ const safeResolve = () => {
233
+ if (!resolved) {
234
+ resolved = true;
235
+ if (activeSpeakResolve === resolve) activeSpeakResolve = null;
236
+ resolve();
237
+ }
238
+ };
239
+
240
+ piper.stdout.on("data", (chunk: Buffer) => {
241
+ try {
242
+ if (!started) {
243
+ started = true;
244
+ try { onPlaybackStart?.(); } catch (e) { logError("onPlaybackStart", e); }
245
+ }
246
+ player.stdin.write(chunk);
247
+ } catch {}
248
+ });
249
+ piper.stdout.on("end", () => {
250
+ try { player.stdin.end(); } catch {}
251
+ });
252
+
253
+ try { piper.stdin.write(text); piper.stdin.end(); } catch (e) { logError("piper.stdin.write", e); }
254
+
255
+ const cleanup = () => {
256
+ try {
257
+ // Only null refs if they still point to OUR processes
258
+ if (currentPiper === piper) currentPiper = null;
259
+ if (currentPlayer === player) currentPlayer = null;
260
+
261
+ // If force-killed, skip all daemon interaction
262
+ if (voiceForceKilled) {
263
+ voiceForceKilled = false;
264
+ safeResolve();
265
+ return;
266
+ }
267
+
268
+ // Natural end — unmute daemon and maybe auto-listen
269
+ // CRITICAL FIX: Add .unref() to background timer
270
+ setTimeout(() => {
271
+ try {
272
+ tryUnmuteDaemon(commsDir);
273
+ if (wakeDaemon && isQuestion(text)) {
274
+ lastMessageWasQuestion = true;
275
+ const fullReadTime = estimateReadingTimeSec(lastFullMessageText);
276
+ const spokenTime = estimateReadingTimeSec(lastSpokenText);
277
+ const extraReadMs = Math.max(0, (fullReadTime - spokenTime)) * 1000;
278
+ const baseDelayMs = 500;
279
+ const totalDelay = baseDelayMs + extraReadMs;
280
+ const timing = estimateQuestionTiming(text);
281
+ // CRITICAL FIX: Add .unref() to background timer
282
+ setTimeout(() => {
283
+ try { triggerDaemonListen(commsDir, wakeDaemon, timing.waitSec, timing.maxRecordSec); }
284
+ catch (e) { logError("triggerDaemonListen.timer", e); }
285
+ }, totalDelay).unref();
286
+ } else {
287
+ lastMessageWasQuestion = false;
288
+ }
289
+ } catch (e) { logError("speakText.cleanup.timer", e); }
290
+ }, 500).unref();
291
+ } catch (e) { logError("speakText.cleanup", e); }
292
+ safeResolve();
293
+ };
294
+
295
+ player.on("close", cleanup);
296
+ player.on("error", () => { try { cleanup(); } catch {} });
297
+ piper.on("error", () => { try { player.kill(); } catch {} try { cleanup(); } catch {} });
298
+
299
+ // Safety net — 5s is plenty, the old 30s was causing /reload to hang
300
+ setTimeout(() => safeResolve(), 5000).unref();
301
+ } catch (e) {
302
+ logError("speakText", e);
303
+ resolve(); // always resolve — never leave a dangling promise
304
+ }
305
+ });
306
+ }
307
+
308
+ export function enqueueVoiceWithMessage(
309
+ text: string,
310
+ log: (msg: string) => void,
311
+ logError: (context: string, err: unknown) => void,
312
+ speed?: number
313
+ ) {
314
+ try {
315
+ // Kill anything currently playing — never allow two voices at once
316
+ if (voicePlaying || currentPlayer || currentPiper) {
317
+ killCurrentVoice();
318
+ }
319
+ voiceQueue.push({ text, speed });
320
+ if (!voicePlaying) {
321
+ // processVoiceQueueSynced will be called by the main module
322
+ }
323
+ } catch (e) {
324
+ logError("enqueueVoice", e);
325
+ }
326
+ }
327
+
328
+ export function processVoiceQueueSynced(
329
+ ensurePanelOpen: () => Promise<boolean>,
330
+ writeMessage: (text: string) => void,
331
+ settings: FridaySettings,
332
+ commsDir: string,
333
+ wakeDaemon: ChildProcess | null,
334
+ lastFullMessageText: string,
335
+ lastSpokenText: string,
336
+ lastMessageWasQuestion: boolean,
337
+ log: (msg: string) => void,
338
+ logError: (context: string, err: unknown) => void,
339
+ ): void {
340
+ try {
341
+ if (voiceQueue.length === 0) {
342
+ voicePlaying = false;
343
+ return;
344
+ }
345
+ voicePlaying = true;
346
+ const item = voiceQueue.shift()!;
347
+ ensurePanelOpen().then((ok) => {
348
+ speakText(
349
+ item.text,
350
+ settings,
351
+ commsDir,
352
+ wakeDaemon,
353
+ lastFullMessageText,
354
+ lastSpokenText,
355
+ lastMessageWasQuestion,
356
+ log,
357
+ logError,
358
+ () => {
359
+ try { if (ok) writeMessage(item.text); } catch (e) { logError("voiceQueue.writeMessage", e); }
360
+ },
361
+ item.speed
362
+ ).finally(() => {
363
+ try {
364
+ processVoiceQueueSynced(
365
+ ensurePanelOpen,
366
+ writeMessage,
367
+ settings,
368
+ commsDir,
369
+ wakeDaemon,
370
+ lastFullMessageText,
371
+ lastSpokenText,
372
+ lastMessageWasQuestion,
373
+ log,
374
+ logError
375
+ );
376
+ } catch (e) {
377
+ logError("voiceQueue.next", e);
378
+ }
379
+ });
380
+ }).catch((e) => {
381
+ logError("voiceQueue.ensurePanel", e);
382
+ voicePlaying = false;
383
+ });
384
+ } catch (e) {
385
+ logError("processVoiceQueueSynced", e);
386
+ voicePlaying = false;
387
+ }
388
+ }
389
+
390
+ // Helper functions that need to be exported for other modules to use
391
+ let log: (msg: string) => void = () => {};
392
+ let logError: (context: string, err: unknown) => void = () => {};
393
+
394
+ export function setLogFunctions(
395
+ logFn: (msg: string) => void,
396
+ logErrorFn: (context: string, err: unknown) => void
397
+ ) {
398
+ log = logFn;
399
+ logError = logErrorFn;
400
+ }