klaudio 0.10.1 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -2
  2. package/src/tts.js +104 -19
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "klaudio",
3
- "version": "0.10.1",
3
+ "version": "0.10.3",
4
4
  "description": "Add sound effects to your coding sessions — play sounds when tasks complete, notifications arrive, and more",
5
5
  "type": "module",
6
6
  "bin": {
@@ -32,7 +32,6 @@
32
32
  "ink": "^6.8.0",
33
33
  "ink-select-input": "^6.2.0",
34
34
  "ink-spinner": "^5.0.0",
35
- "kokoro-js": "^1.2.1",
36
35
  "react": "^19.2.4"
37
36
  },
38
37
  "engines": {
package/src/tts.js CHANGED
@@ -40,17 +40,70 @@ const KOKORO_VOICES = [
40
40
  // Singleton: reuse the loaded model across calls
41
41
  let kokoroInstance = null;
42
42
  let kokoroLoadPromise = null;
43
+ const KOKORO_DIR = join(homedir(), ".klaudio", "kokoro");
43
44
 
44
45
  /**
45
- * Load the Kokoro TTS model (singleton, downloads ~86MB on first use).
46
- * Uses CPU backend (DirectML has ConvTranspose compatibility issues).
46
+ * Ensure kokoro-js is installed in ~/.klaudio/kokoro.
47
+ * Installs on first use via npm.
48
+ */
49
+ async function ensureKokoroInstalled() {
50
+ const kokoroMod = join(KOKORO_DIR, "node_modules", "kokoro-js");
51
+ try {
52
+ await stat(join(kokoroMod, "package.json"));
53
+ return; // already installed
54
+ } catch { /* needs install */ }
55
+
56
+ await mkdir(KOKORO_DIR, { recursive: true });
57
+ await fsWriteFile(join(KOKORO_DIR, "package.json"), '{"private":true}', "utf-8");
58
+
59
+ const npmCmd = platform() === "win32" ? "npm.cmd" : "npm";
60
+ await new Promise((resolve, reject) => {
61
+ execFile(npmCmd, ["install", "kokoro-js"], {
62
+ cwd: KOKORO_DIR,
63
+ windowsHide: true,
64
+ timeout: 180000,
65
+ }, (err) => err ? reject(err) : resolve());
66
+ });
67
+ }
68
+
69
+ /**
70
+ * Try to import kokoro-js from various locations.
71
+ */
72
+ async function importKokoro() {
73
+ // 1. Try local ~/.klaudio/kokoro install
74
+ try {
75
+ const { createRequire } = await import("node:module");
76
+ const req = createRequire(join(KOKORO_DIR, "node_modules", "kokoro-js", "package.json"));
77
+ return req("kokoro-js");
78
+ } catch { /* not there */ }
79
+
80
+ // 2. Try global/project import (dev environment or globally installed)
81
+ try {
82
+ return await import("kokoro-js");
83
+ } catch { /* not available */ }
84
+
85
+ throw new Error("kokoro-js not available");
86
+ }
87
+
88
+ /**
89
+ * Load the Kokoro TTS model (singleton).
90
+ * Auto-installs kokoro-js on first use, then downloads ~25MB model on first generate.
47
91
  */
48
92
  async function getKokoro() {
49
93
  if (kokoroInstance) return kokoroInstance;
50
94
  if (kokoroLoadPromise) return kokoroLoadPromise;
51
95
 
52
96
  kokoroLoadPromise = (async () => {
53
- const { KokoroTTS } = await import("kokoro-js");
97
+ // Try import first (already installed?), otherwise install then import
98
+ let mod;
99
+ try {
100
+ mod = await importKokoro();
101
+ } catch {
102
+ await ensureKokoroInstalled();
103
+ mod = await importKokoro();
104
+ }
105
+
106
+ const { KokoroTTS } = mod;
54
107
  kokoroInstance = await KokoroTTS.from_pretrained(
55
108
  "onnx-community/Kokoro-82M-v1.0-ONNX",
56
109
  { dtype: "q4", device: "cpu" },
@@ -260,9 +313,33 @@ function speakMacOS(text) {
260
313
 
261
314
  // ── Public API ──────────────────────────────────────────────────
262
315
 
316
+ let speaking = false;
317
+ const TTS_LOCK = join(tmpdir(), ".klaudio-tts-lock");
318
+
319
+ /**
320
+ * Try to acquire a cross-process TTS lock.
321
+ * Returns true if acquired, false if another process is speaking.
322
+ * Stale locks (>30s) are automatically cleaned up.
323
+ */
324
+ async function acquireTTSLock() {
325
+ try {
326
+ const lockStat = await stat(TTS_LOCK);
327
+ if (Date.now() - lockStat.mtimeMs < 30000) return false; // fresh lock, skip
328
+ } catch { /* no lock file, good */ }
329
+ try {
330
+ await fsWriteFile(TTS_LOCK, String(process.pid), "utf-8");
331
+ return true;
332
+ } catch { return false; }
333
+ }
334
+
335
+ async function releaseTTSLock() {
336
+ try { const { unlink } = await import("node:fs/promises"); await unlink(TTS_LOCK); } catch { /* ignore */ }
337
+ }
338
+
263
339
  /**
264
340
  * Speak text using the best available TTS engine.
265
341
  * Priority: Kokoro (GPU/CPU) → Piper → macOS say
342
+ * Only one speak() call runs at a time — concurrent calls are skipped.
266
343
  *
267
344
  * @param {string} text - Text to speak
268
345
  * @param {object} [options]
@@ -271,26 +348,34 @@ function speakMacOS(text) {
271
348
  */
272
349
  export async function speak(text, options = {}) {
273
350
  if (!text) return;
351
+ if (speaking) return; // in-process mutex
352
+ if (!await acquireTTSLock()) return; // cross-process mutex
353
+ speaking = true;
274
354
 
275
- const { voice, onProgress } = typeof options === "function"
276
- ? { voice: null, onProgress: options } // backwards compat: speak(text, onProgress)
277
- : options;
278
-
279
- // Try Kokoro first (works on all platforms, best quality)
280
355
  try {
281
- await speakKokoro(text, voice);
282
- return;
283
- } catch {
284
- // Kokoro unavailable — fall through
285
- }
356
+ const { voice, onProgress } = typeof options === "function"
357
+ ? { voice: null, onProgress: options } // backwards compat: speak(text, onProgress)
358
+ : options;
359
+
360
+ // Try Kokoro first (works on all platforms, best quality)
361
+ try {
362
+ await speakKokoro(text, voice);
363
+ return;
364
+ } catch {
365
+ // Kokoro unavailable — fall through
366
+ }
286
367
 
287
- // macOS: use built-in `say`
288
- if (platform() === "darwin") {
289
- return speakMacOS(text);
290
- }
368
+ // macOS: use built-in `say`
369
+ if (platform() === "darwin") {
370
+ return speakMacOS(text);
371
+ }
291
372
 
292
- // Fallback: Piper
293
- return speakPiper(text, onProgress);
373
+ // Fallback: Piper
374
+ return speakPiper(text, onProgress);
375
+ } finally {
376
+ speaking = false;
377
+ await releaseTTSLock();
378
+ }
294
379
  }
295
380
 
296
381
  export { KOKORO_PRESET_VOICES, KOKORO_VOICES, KOKORO_DEFAULT_VOICE };