opencode-smart-voice-notify 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,11 +1,15 @@
1
- <!-- Dynamic Header -->
2
- <img width="100%" src="https://capsule-render.vercel.app/api?type=waving&color=0:667eea,100:764ba2&height=120&section=header"/>
3
-
4
- # OpenCode Smart Voice Notify
5
-
6
- ![Coverage](https://img.shields.io/badge/coverage-86.73%25-brightgreen)
7
- ![Version](https://img.shields.io/badge/version-1.2.5-blue)
8
- ![License](https://img.shields.io/badge/license-MIT-green)
1
+ <!-- Dynamic Header -->
2
+ <img width="100%" src="https://capsule-render.vercel.app/api?type=waving&color=0:667eea,100:764ba2&height=120&section=header"/>
3
+
4
+ # OpenCode Smart Voice Notify
5
+
6
+ [![npm version](https://img.shields.io/npm/v/opencode-smart-voice-notify?color=blue&logo=npm)](https://www.npmjs.com/package/opencode-smart-voice-notify)
7
+ [![npm downloads](https://img.shields.io/npm/dm/opencode-smart-voice-notify?color=blue&logo=npm)](https://www.npmjs.com/package/opencode-smart-voice-notify)
8
+ [![GitHub release](https://img.shields.io/github/v/release/MasuRii/opencode-smart-voice-notify?logo=github)](https://github.com/MasuRii/opencode-smart-voice-notify/releases)
9
+ [![CI](https://img.shields.io/github/actions/workflow/status/MasuRii/opencode-smart-voice-notify/test.yml?branch=master&logo=github&label=tests)](https://github.com/MasuRii/opencode-smart-voice-notify/actions/workflows/test.yml)
10
+ [![License](https://img.shields.io/github/license/MasuRii/opencode-smart-voice-notify?color=green)](https://github.com/MasuRii/opencode-smart-voice-notify/blob/master/LICENSE)
11
+ [![Node](https://img.shields.io/node/v/opencode-smart-voice-notify?color=brightgreen&logo=node.js)](https://nodejs.org)
12
+ [![Platform](https://img.shields.io/badge/platform-Windows%20%7C%20macOS%20%7C%20Linux-lightgrey?logo=windows-terminal)](https://github.com/MasuRii/opencode-smart-voice-notify#platform-support-matrix)
9
13
 
10
14
 
11
15
  > **Disclaimer**: This project is not built by the OpenCode team and is not affiliated with [OpenCode](https://opencode.ai) in any way. It is an independent community plugin.
@@ -17,14 +21,15 @@ A smart voice notification plugin for [OpenCode](https://opencode.ai) with **mul
17
21
 
18
22
  ## Features
19
23
 
20
- ### Smart TTS Engine Selection
21
- The plugin automatically tries multiple TTS engines in order, falling back if one fails:
22
-
23
- 1. **OpenAI-Compatible** (Cloud/Self-hosted) - Any OpenAI-compatible `/v1/audio/speech` endpoint (Kokoro, LocalAI, Coqui, AllTalk, OpenAI API, etc.)
24
- 2. **ElevenLabs** (Online) - High-quality, anime-like voices with natural expression
25
- 3. **Edge TTS** (Free) - Microsoft's neural voices, native Node.js implementation (no Python required)
26
- 4. **Windows SAPI** (Offline) - Built-in Windows speech synthesis
27
- 5. **Local Sound Files** (Fallback) - Plays bundled MP3 files if all TTS fails
24
+ ### Smart TTS Engine Selection
25
+ The plugin automatically tries multiple TTS engines in order, falling back if one fails:
26
+
27
+ 1. **OpenAI-Compatible** (Cloud/Self-hosted) - Any OpenAI-compatible `/v1/audio/speech` endpoint (Kokoro, LocalAI, Coqui, AllTalk, OpenAI API, etc.)
28
+ 2. **ElevenLabs** (Online) - High-quality, anime-like voices with natural expression
29
+ 3. **Edge TTS** (Free) - Microsoft's neural voices via Python CLI (recommended) or native npm fallback
30
+ 4. **Windows SAPI** (Offline) - Built-in Windows speech synthesis
31
+ 5. **macOS Say** (Offline) - Built-in macOS speech synthesis
32
+ 6. **Local Sound Files** (Fallback) - Plays bundled MP3 files if all TTS fails
28
33
 
29
34
  ### Smart Notification System
30
35
  - **Sound-first mode**: Play a sound immediately, then speak a TTS reminder if user doesn't respond
@@ -313,16 +318,17 @@ You can replace individual sound files with entire "Sound Themes" (like the clas
313
318
 
314
319
  ### Platform Support Matrix
315
320
 
316
- | Feature | Windows | macOS | Linux |
317
- |---------|:---:|:---:|:---:|
318
- | **Sound Playback** | ✅ | ✅ | ✅ |
319
- | **TTS (Cloud/Edge)** | ✅ | ✅ | ✅ |
320
- | **TTS (Windows SAPI)** | ✅ | ❌ | ❌ |
321
- | **Desktop Notifications** | | ✅ | (req libnotify) |
322
- | **Focus Detection** | | ✅ | |
323
- | **Webhook Integration** | | ✅ | |
324
- | **Wake Monitor** | ✅ | ✅ | ✅ (X11/Gnome) |
325
- | **Volume Control** | ✅ | ✅ | ✅ (Pulse/ALSA) |
321
+ | Feature | Windows | macOS | Linux |
322
+ |---------|:---:|:---:|:---:|
323
+ | **Sound Playback** | ✅ | ✅ | ✅ |
324
+ | **TTS (Cloud/Edge)** | ✅ | ✅ | ✅ |
325
+ | **TTS (Windows SAPI)** | ✅ | ❌ | ❌ |
326
+ | **TTS (macOS Say)** | | ✅ | |
327
+ | **Desktop Notifications** | | ✅ | (req libnotify) |
328
+ | **Focus Detection** | | ✅ | |
329
+ | **Webhook Integration** | ✅ | ✅ | ✅ |
330
+ | **Wake Monitor** | ✅ | ✅ | ✅ (X11/Gnome) |
331
+ | **Volume Control** | ✅ | ✅ | ✅ (Pulse/ALSA) |
326
332
 
327
333
  ### For OpenAI-Compatible TTS
328
334
  - Any server implementing the `/v1/audio/speech` endpoint
@@ -333,11 +339,18 @@ You can replace individual sound files with entire "Sound Themes" (like the clas
333
339
  - ElevenLabs API key (free tier: 10,000 characters/month)
334
340
  - Internet connection
335
341
 
336
- ### For Edge TTS
337
- - Internet connection (No external dependencies required)
338
-
339
- ### For Windows SAPI
340
- - Windows OS (uses built-in System.Speech)
342
+ ### For Edge TTS
343
+ - Internet connection required
344
+ - **Recommended**: Install Python edge-tts for best reliability: `pip install edge-tts`
345
+ - **Fallback**: Works without Python (uses bundled npm package), but may be less reliable
346
+ - If Edge TTS fails, automatically falls back to SAPI (Windows) or Say (macOS)
347
+
348
+ ### For Windows SAPI
349
+ - Windows OS (uses built-in System.Speech)
350
+
351
+ ### For macOS Say
352
+ - macOS (uses built-in `say` command)
353
+ - Serves as fallback when other TTS engines fail
341
354
 
342
355
  ### For Desktop Notifications
343
356
  - **Windows**: Built-in (uses Toast notifications)
@@ -442,19 +455,131 @@ OpenCode does not automatically update plugins. To update to the latest version:
442
455
  ```bash
443
456
  # Clear the cached plugin
444
457
  rm -rf ~/.cache/opencode/node_modules/opencode-smart-voice-notify
445
-
446
- # Run OpenCode to trigger a fresh install
447
- opencode
448
- ```
449
-
450
- ## License
451
-
452
- MIT
453
-
454
- ## Support
455
-
456
- - Open an issue on [GitHub](https://github.com/MasuRii/opencode-smart-voice-notify/issues)
457
- - Check the [OpenCode docs](https://opencode.ai/docs/plugins)
458
-
459
- <!-- Dynamic Header -->
460
- <img width="100%" src="https://capsule-render.vercel.app/api?type=waving&color=0:667eea,100:764ba2&height=120&section=header"/>
458
+
459
+ # Run OpenCode to trigger a fresh install
460
+ opencode
461
+ ```
462
+
463
+ <!--
464
+ LLM-AGENT-INSTRUCTIONS (Hidden from rendered markdown, visible to AI assistants scanning raw file)
465
+
466
+ ## LLM Instructions
467
+
468
+ > This section provides structured guidance for AI assistants (Claude, GPT, Cursor, Copilot, etc.) helping users set up and configure this plugin.
469
+
470
+ ### Quick Setup (Execute These Steps)
471
+
472
+ When a user asks you to set up this plugin, follow these steps:
473
+
474
+ 1. **Add plugin to OpenCode config** - Edit `~/.config/opencode/opencode.json`:
475
+ ```json
476
+ {
477
+ "$schema": "https://opencode.ai/config.json",
478
+ "plugin": ["opencode-smart-voice-notify@latest"]
479
+ }
480
+ ```
481
+
482
+ 2. **Run OpenCode once** - This auto-generates the configuration file at `~/.config/opencode/smart-voice-notify.jsonc`
483
+
484
+ 3. **Configure based on user's TTS preference**:
485
+ - **Free/No API key**: Use Edge TTS (default, works out of box)
486
+ - **High quality**: Use ElevenLabs (requires API key)
487
+ - **Self-hosted**: Use OpenAI-compatible endpoint (Kokoro, LocalAI, etc.)
488
+ - **Offline Windows**: Use Windows SAPI
489
+ - **Offline macOS**: Use macOS Say (built-in fallback)
490
+
491
+ ### Configuration Tasks
492
+
493
+ #### Task: Enable ElevenLabs TTS
494
+ ```jsonc
495
+ // In ~/.config/opencode/smart-voice-notify.jsonc
496
+ {
497
+ "ttsEngine": "elevenlabs",
498
+ "elevenLabsApiKey": "USER_API_KEY_HERE",
499
+ "elevenLabsVoiceId": "cgSgspJ2msm6clMCkdW9"
500
+ }
501
+ ```
502
+ **Note**: User must get API key from https://elevenlabs.io/app/settings/api-keys
503
+
504
+ #### Task: Enable OpenAI-Compatible TTS (Kokoro, LocalAI, etc.)
505
+ ```jsonc
506
+ {
507
+ "ttsEngine": "openai",
508
+ "openaiTtsEndpoint": "http://localhost:8880",
509
+ "openaiTtsVoice": "af_heart",
510
+ "openaiTtsModel": "kokoro"
511
+ }
512
+ ```
513
+ **Note**: User must have a compatible TTS server running locally or remotely.
514
+
515
+ #### Task: Enable Discord Webhook Notifications
516
+ ```jsonc
517
+ {
518
+ "enableWebhook": true,
519
+ "webhookUrl": "https://discord.com/api/webhooks/...",
520
+ "webhookEvents": ["idle", "permission", "error", "question"],
521
+ "webhookMentionOnPermission": true
522
+ }
523
+ ```
524
+
525
+ #### Task: Enable AI-Generated Messages (requires local AI server)
526
+ ```jsonc
527
+ {
528
+ "enableAIMessages": true,
529
+ "aiEndpoint": "http://localhost:11434/v1",
530
+ "aiModel": "llama3"
531
+ }
532
+ ```
533
+ **Note**: Requires Ollama, LM Studio, or similar running locally.
534
+
535
+ #### Task: Set Up Custom Sound Theme
536
+ ```jsonc
537
+ {
538
+ "soundThemeDir": "themes/warcraft2",
539
+ "randomizeSoundFromTheme": true
540
+ }
541
+ ```
542
+ **Directory structure**: `~/.config/opencode/themes/warcraft2/{idle,permission,error,question}/*.mp3`
543
+
544
+ ### Common Issues & Fixes
545
+
546
+ | Issue | Solution |
547
+ |-------|----------|
548
+ | No sound playing | Check `"enableSound": true` and verify audio files exist in `~/.config/opencode/assets/` |
549
+ | TTS not working | Try fallback: change `"ttsEngine"` to `"edge"` (free, no API key) |
550
+ | ElevenLabs quota exceeded | Plugin auto-falls back to Edge TTS; or set `"ttsEngine": "edge"` |
551
+ | Notifications too frequent | Increase `"ttsReminderDelaySeconds"` (default: 30) |
552
+ | Want to disable completely | Set `"enabled": false` in config |
553
+ | Edge TTS not speaking | Install Python edge-tts: `pip install edge-tts` |
554
+
555
+ ### Platform-Specific Notes
556
+
557
+ - **Windows**: All features supported, SAPI as offline fallback
558
+ - **macOS**: Focus detection available, `say` command as offline fallback
559
+ - **Linux**: Requires `libnotify-bin` for desktop notifications, no offline TTS fallback
560
+
561
+ ### TTS Fallback Chain
562
+
563
+ Primary Engine → Edge TTS → Windows SAPI → macOS Say → Sound File
564
+
565
+ ### Done When
566
+
567
+ The plugin is successfully configured when:
568
+ - [ ] `~/.config/opencode/opencode.json` contains the plugin entry
569
+ - [ ] `~/.config/opencode/smart-voice-notify.jsonc` exists with user's settings
570
+ - [ ] Running `opencode` produces audio notification when agent finishes a task
571
+
572
+ END LLM-AGENT-INSTRUCTIONS
573
+ -->
574
+
575
+ ## License
576
+
577
+ MIT
578
+
579
+ ## Support
580
+
581
+ - Open an issue on [GitHub](https://github.com/MasuRii/opencode-smart-voice-notify/issues)
582
+ - Check the [OpenCode docs](https://opencode.ai/docs/plugins)
583
+
584
+ <!-- Dynamic Header -->
585
+ <img width="100%" src="https://capsule-render.vercel.app/api?type=waving&color=0:667eea,100:764ba2&height=120&section=header"/>
package/index.js CHANGED
@@ -385,14 +385,14 @@ export default async function SmartVoiceNotifyPlugin({ project, client, $, direc
385
385
  pendingReminders.clear();
386
386
  };
387
387
 
388
- /**
389
- * Schedule a TTS reminder if user doesn't respond within configured delay.
390
- * The reminder uses a personalized TTS message.
391
- * @param {string} type - 'idle', 'permission', 'question', or 'error'
392
- * @param {string} message - The TTS message to speak (used directly, supports count-aware messages)
393
- * @param {object} options - Additional options (fallbackSound, permissionCount, questionCount, errorCount, aiContext)
394
- */
395
- const scheduleTTSReminder = (type, message, options = {}) => {
388
+ /**
389
+ * Schedule a TTS reminder if user doesn't respond within configured delay.
390
+ * The reminder generates an AI message WHEN IT FIRES (not immediately), avoiding wasteful early AI calls.
391
+ * @param {string} type - 'idle', 'permission', 'question', or 'error'
392
+ * @param {string} _message - DEPRECATED: No longer used (AI message is generated when reminder fires)
393
+ * @param {object} options - Additional options (fallbackSound, permissionCount, questionCount, errorCount, aiContext)
394
+ */
395
+ const scheduleTTSReminder = (type, _message, options = {}) => {
396
396
  // Check if TTS reminders are enabled
397
397
  if (!config.enableTTSReminder) {
398
398
  debugLog(`scheduleTTSReminder: TTS reminders disabled`);
@@ -825,25 +825,25 @@ export default async function SmartVoiceNotifyPlugin({ project, client, $, direc
825
825
  debugLog('processPermissionBatch: new permissions arrived during sound');
826
826
  }
827
827
 
828
- // Step 3: Check race condition - did user respond during sound?
829
- if (activePermissionId === null) {
830
- debugLog('processPermissionBatch: user responded during sound - aborting');
831
- return;
832
- }
833
-
834
- // Step 4: Generate AI message for reminder AFTER sound played (with context)
835
- const reminderMessage = await getPermissionMessage(batchCount, true, aiContext);
836
-
837
- // Step 5: Schedule TTS reminder if enabled
838
- if (config.enableTTSReminder && reminderMessage) {
839
- scheduleTTSReminder('permission', reminderMessage, {
840
- fallbackSound: config.permissionSound,
841
- permissionCount: batchCount,
842
- aiContext // Pass context for follow-up reminders
843
- });
844
- }
845
-
846
- // Step 6: If TTS-first or both mode, generate and speak immediate message
828
+ // Step 3: Check race condition - did user respond during sound?
829
+ if (activePermissionId === null) {
830
+ debugLog('processPermissionBatch: user responded during sound - aborting');
831
+ return;
832
+ }
833
+
834
+ // Step 4: Schedule TTS reminder if enabled
835
+ // NOTE: The AI message is generated ONLY when the reminder fires (inside scheduleTTSReminder)
836
+ // This avoids wasteful immediate AI generation in sound-first mode - the user might respond before the reminder fires
837
+ // IMPORTANT: Skip TTS reminder entirely in 'sound-only' mode
838
+ if (config.enableTTSReminder && config.notificationMode !== 'sound-only') {
839
+ scheduleTTSReminder('permission', null, {
840
+ fallbackSound: config.permissionSound,
841
+ permissionCount: batchCount,
842
+ aiContext // Pass context for reminder message generation
843
+ });
844
+ }
845
+
846
+ // Step 5: If TTS-first or both mode, generate and speak immediate message
847
847
  if (config.notificationMode === 'tts-first' || config.notificationMode === 'both') {
848
848
  const ttsMessage = await getPermissionMessage(batchCount, false, aiContext);
849
849
  await tts.wakeMonitor();
@@ -931,25 +931,25 @@ export default async function SmartVoiceNotifyPlugin({ project, client, $, direc
931
931
  debugLog('processQuestionBatch: new questions arrived during sound');
932
932
  }
933
933
 
934
- // Step 3: Check race condition - did user respond during sound?
935
- if (activeQuestionId === null) {
936
- debugLog('processQuestionBatch: user responded during sound - aborting');
937
- return;
938
- }
939
-
940
- // Step 4: Generate AI message for reminder AFTER sound played (with context)
941
- const reminderMessage = await getQuestionMessage(totalQuestionCount, true, aiContext);
942
-
943
- // Step 5: Schedule TTS reminder if enabled
944
- if (config.enableTTSReminder && reminderMessage) {
945
- scheduleTTSReminder('question', reminderMessage, {
946
- fallbackSound: config.questionSound,
947
- questionCount: totalQuestionCount,
948
- aiContext // Pass context for follow-up reminders
949
- });
950
- }
951
-
952
- // Step 6: If TTS-first or both mode, generate and speak immediate message
934
+ // Step 3: Check race condition - did user respond during sound?
935
+ if (activeQuestionId === null) {
936
+ debugLog('processQuestionBatch: user responded during sound - aborting');
937
+ return;
938
+ }
939
+
940
+ // Step 4: Schedule TTS reminder if enabled
941
+ // NOTE: The AI message is generated ONLY when the reminder fires (inside scheduleTTSReminder)
942
+ // This avoids wasteful immediate AI generation in sound-first mode - the user might respond before the reminder fires
943
+ // IMPORTANT: Skip TTS reminder entirely in 'sound-only' mode
944
+ if (config.enableTTSReminder && config.notificationMode !== 'sound-only') {
945
+ scheduleTTSReminder('question', null, {
946
+ fallbackSound: config.questionSound,
947
+ questionCount: totalQuestionCount,
948
+ aiContext // Pass context for reminder message generation
949
+ });
950
+ }
951
+
952
+ // Step 5: If TTS-first or both mode, generate and speak immediate message
953
953
  if (config.notificationMode === 'tts-first' || config.notificationMode === 'both') {
954
954
  const ttsMessage = await getQuestionMessage(totalQuestionCount, false, aiContext);
955
955
  await tts.wakeMonitor();
@@ -1188,24 +1188,24 @@ export default async function SmartVoiceNotifyPlugin({ project, client, $, direc
1188
1188
  }
1189
1189
  }
1190
1190
 
1191
- // Step 3: Check race condition - did user respond during sound?
1192
- if (lastUserActivityTime > lastSessionIdleTime) {
1193
- debugLog(`session.idle: user active during sound - aborting`);
1194
- return;
1195
- }
1196
-
1197
- // Step 4: Generate AI message for reminder AFTER sound played
1198
- const reminderMessage = await getSmartMessage('idle', true, config.idleReminderTTSMessages, aiContext);
1199
-
1200
- // Step 5: Schedule TTS reminder if enabled
1201
- if (config.enableTTSReminder && reminderMessage) {
1202
- scheduleTTSReminder('idle', reminderMessage, {
1203
- fallbackSound: config.idleSound,
1204
- aiContext // Pass context for follow-up reminders
1205
- });
1206
- }
1207
-
1208
- // Step 6: If TTS-first or both mode, generate and speak immediate message
1191
+ // Step 3: Check race condition - did user respond during sound?
1192
+ if (lastUserActivityTime > lastSessionIdleTime) {
1193
+ debugLog(`session.idle: user active during sound - aborting`);
1194
+ return;
1195
+ }
1196
+
1197
+ // Step 4: Schedule TTS reminder if enabled
1198
+ // NOTE: The AI message is generated ONLY when the reminder fires (inside scheduleTTSReminder)
1199
+ // This avoids wasteful immediate AI generation in sound-first mode - the user might respond before the reminder fires
1200
+ // IMPORTANT: Skip TTS reminder entirely in 'sound-only' mode
1201
+ if (config.enableTTSReminder && config.notificationMode !== 'sound-only') {
1202
+ scheduleTTSReminder('idle', null, {
1203
+ fallbackSound: config.idleSound,
1204
+ aiContext // Pass context for reminder message generation
1205
+ });
1206
+ }
1207
+
1208
+ // Step 5: If TTS-first or both mode, generate and speak immediate message
1209
1209
  if (config.notificationMode === 'tts-first' || config.notificationMode === 'both') {
1210
1210
  const ttsMessage = await getSmartMessage('idle', false, config.idleTTSMessages, aiContext);
1211
1211
  await tts.wakeMonitor();
@@ -1269,23 +1269,23 @@ export default async function SmartVoiceNotifyPlugin({ project, client, $, direc
1269
1269
  if (config.notificationMode !== 'tts-first') {
1270
1270
  if (!suppressError) {
1271
1271
  await playSound(config.errorSound, 2, 'error'); // Play twice for urgency
1272
- } else {
1273
- debugLog('session.error: sound suppressed (terminal focused)');
1274
- }
1275
- }
1276
-
1277
- // Step 3: Generate AI message for reminder AFTER sound played
1278
- const reminderMessage = await getErrorMessage(1, true);
1279
-
1280
- // Step 4: Schedule TTS reminder if enabled
1281
- if (config.enableTTSReminder && reminderMessage) {
1282
- scheduleTTSReminder('error', reminderMessage, {
1283
- fallbackSound: config.errorSound,
1284
- errorCount: 1
1285
- });
1286
- }
1287
-
1288
- // Step 5: If TTS-first or both mode, generate and speak immediate message
1272
+ } else {
1273
+ debugLog('session.error: sound suppressed (terminal focused)');
1274
+ }
1275
+ }
1276
+
1277
+ // Step 3: Schedule TTS reminder if enabled
1278
+ // NOTE: The AI message is generated ONLY when the reminder fires (inside scheduleTTSReminder)
1279
+ // This avoids wasteful immediate AI generation in sound-first mode - the user might respond before the reminder fires
1280
+ // IMPORTANT: Skip TTS reminder entirely in 'sound-only' mode
1281
+ if (config.enableTTSReminder && config.notificationMode !== 'sound-only') {
1282
+ scheduleTTSReminder('error', null, {
1283
+ fallbackSound: config.errorSound,
1284
+ errorCount: 1
1285
+ });
1286
+ }
1287
+
1288
+ // Step 4: If TTS-first or both mode, generate and speak immediate message
1289
1289
  if (config.notificationMode === 'tts-first' || config.notificationMode === 'both') {
1290
1290
  const ttsMessage = await getErrorMessage(1, false);
1291
1291
  await tts.wakeMonitor();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "opencode-smart-voice-notify",
3
- "version": "1.3.0",
3
+ "version": "1.3.1",
4
4
  "description": "Smart voice notification plugin for OpenCode with multiple TTS engines (ElevenLabs, Edge TTS, Windows SAPI), AI-generated dynamic messages, and intelligent reminder system",
5
5
  "main": "index.js",
6
6
  "type": "module",
@@ -50,7 +50,7 @@
50
50
  "dependencies": {
51
51
  "@elevenlabs/elevenlabs-js": "^2.32.0",
52
52
  "detect-terminal": "^2.0.0",
53
- "msedge-tts": "^2.0.3",
53
+ "msedge-tts": "^2.0.4",
54
54
  "node-notifier": "^10.0.1"
55
55
  },
56
56
  "peerDependencies": {
@@ -48,15 +48,15 @@ export const listSoundsInTheme = (themeDir, eventType) => {
48
48
  return [];
49
49
  }
50
50
 
51
- try {
52
- return fs.readdirSync(subDir)
53
- .filter(file => AUDIO_EXTENSIONS.includes(path.extname(file).toLowerCase()))
54
- .sort() // Sort alphabetically for consistent cross-platform behavior
55
- .map(file => path.join(subDir, file))
56
- .filter(filePath => fs.statSync(filePath).isFile());
57
- } catch (error) {
58
- return [];
59
- }
51
+ try {
52
+ return fs.readdirSync(subDir)
53
+ .filter(file => AUDIO_EXTENSIONS.includes(path.extname(file).toLowerCase()))
54
+ .sort() // Sort alphabetically for consistent cross-platform behavior
55
+ .map(file => path.join(subDir, file))
56
+ .filter(filePath => fs.statSync(filePath).isFile());
57
+ } catch (error) {
58
+ return [];
59
+ }
60
60
  };
61
61
 
62
62
  /**
package/util/tts.js CHANGED
@@ -337,30 +337,57 @@ export const createTTS = ({ $, client }) => {
337
337
  }
338
338
  };
339
339
 
340
- /**
341
- * Edge TTS Engine (Free, Neural voices)
342
- */
343
- const speakWithEdgeTTS = async (text) => {
344
- try {
345
- const { MsEdgeTTS, OUTPUT_FORMAT } = await import('msedge-tts');
346
- const tts = new MsEdgeTTS();
347
- const voice = config.edgeVoice || 'en-US-JennyNeural';
348
- const pitch = config.edgePitch || '+0Hz';
349
- const rate = config.edgeRate || '+10%';
350
- const volume = config.edgeVolume || '+0%';
351
-
352
- await tts.setMetadata(voice, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3);
353
-
354
- const { audioFilePath } = await tts.toFile(os.tmpdir(), text, { pitch, rate, volume });
355
-
356
- await playAudioFile(audioFilePath);
357
- try { fs.unlinkSync(audioFilePath); } catch (e) {}
358
- return true;
359
- } catch (e) {
360
- debugLog(`speakWithEdgeTTS error: ${e?.message || String(e) || 'Unknown error'}`);
361
- return false;
362
- }
363
- };
340
+ /**
341
+ * Edge TTS Engine via Python CLI (Free, Neural voices)
342
+ * Uses Python edge-tts package via command line as it's more reliable than Node.js WebSocket libraries.
343
+ * Fallback: tries msedge-tts npm package if Python edge-tts is not available.
344
+ */
345
+ const speakWithEdgeTTS = async (text) => {
346
+ const voice = config.edgeVoice || 'en-US-JennyNeural';
347
+ const pitch = config.edgePitch || '+0Hz';
348
+ const rate = config.edgeRate || '+10%';
349
+ const volume = config.edgeVolume || '+0%';
350
+ const tempFile = path.join(os.tmpdir(), `opencode-edge-tts-${Date.now()}.mp3`);
351
+
352
+ // Escape text for shell (replace quotes with escaped quotes)
353
+ const escapedText = text.replace(/"/g, '\\"');
354
+
355
+ // Try Python edge-tts first (more reliable due to aiohttp WebSocket handling)
356
+ if ($) {
357
+ try {
358
+ // Use proper template literal syntax with individual arguments
359
+ await $`edge-tts --voice ${voice} --rate ${rate} --volume ${volume} --pitch ${pitch} --text ${escapedText} --write-media ${tempFile}`.quiet().nothrow();
360
+
361
+ if (fs.existsSync(tempFile)) {
362
+ await playAudioFile(tempFile);
363
+ try { fs.unlinkSync(tempFile); } catch (e) {}
364
+ debugLog('speakWithEdgeTTS: success via Python edge-tts CLI');
365
+ return true;
366
+ }
367
+ } catch (e) {
368
+ debugLog(`speakWithEdgeTTS: Python CLI failed: ${e?.message || 'unknown'}, trying npm package...`);
369
+ // Fall through to try npm package
370
+ }
371
+ }
372
+
373
+ // Fallback to msedge-tts npm package
374
+ try {
375
+ const { MsEdgeTTS, OUTPUT_FORMAT } = await import('msedge-tts');
376
+ const tts = new MsEdgeTTS();
377
+
378
+ await tts.setMetadata(voice, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3);
379
+
380
+ const { audioFilePath } = await tts.toFile(os.tmpdir(), text, { pitch, rate, volume });
381
+
382
+ await playAudioFile(audioFilePath);
383
+ try { fs.unlinkSync(audioFilePath); } catch (e) {}
384
+ debugLog('speakWithEdgeTTS: success via msedge-tts npm package');
385
+ return true;
386
+ } catch (e) {
387
+ debugLog(`speakWithEdgeTTS error: ${e?.message || String(e) || 'Unknown error'}`);
388
+ return false;
389
+ }
390
+ };
364
391
 
365
392
  /**
366
393
  * Windows SAPI Engine (Offline, Built-in)
@@ -628,46 +655,55 @@ public static extern int waveOutGetVolume(IntPtr hwo, out uint dwVolume);
628
655
  }
629
656
  };
630
657
 
631
- /**
632
- * Main Speak function with fallback chain
633
- * Cascade: ElevenLabs -> Edge TTS -> Windows SAPI -> macOS Say -> Sound File
634
- */
635
- const speak = async (message, options = {}) => {
636
- const activeConfig = { ...config, ...options };
637
- if (!activeConfig.enableSound) return false;
638
-
639
- if (activeConfig.enableTTS) {
640
- let success = false;
641
- const engine = activeConfig.ttsEngine || 'elevenlabs';
642
-
643
- if (engine === 'openai') {
644
- success = await speakWithOpenAI(message);
645
- if (!success) success = await speakWithEdgeTTS(message);
646
- if (!success) success = await speakWithSAPI(message);
647
- } else if (engine === 'elevenlabs') {
648
- success = await speakWithElevenLabs(message);
649
- if (!success) success = await speakWithEdgeTTS(message);
650
- if (!success) success = await speakWithSAPI(message);
651
- } else if (engine === 'edge') {
652
- success = await speakWithEdgeTTS(message);
653
- if (!success) success = await speakWithSAPI(message);
654
- } else if (engine === 'sapi') {
655
- success = await speakWithSAPI(message);
656
- if (!success) success = await speakWithSay(message);
657
- }
658
-
659
- if (success) return true;
660
- }
661
-
662
- if (activeConfig.fallbackSound) {
663
- const soundPath = path.isAbsolute(activeConfig.fallbackSound)
664
- ? activeConfig.fallbackSound
665
- : path.join(getConfigDir(), activeConfig.fallbackSound);
666
-
667
- await playAudioFile(soundPath, activeConfig.loops || 1);
668
- }
669
- return false;
670
- };
658
+ /**
659
+ * Main Speak function with fallback chain
660
+ * Cascade: Primary Engine -> Edge TTS -> Windows SAPI -> macOS Say -> Sound File
661
+ *
662
+ * Fallback ensures TTS works even if:
663
+ * - Python edge-tts not installed (falls to npm package, then SAPI/Say)
664
+ * - msedge-tts npm fails (403 errors - falls to SAPI/Say)
665
+ * - User is on macOS without edge-tts (falls to built-in 'say' command)
666
+ * - User is on Linux without edge-tts (falls to sound file only)
667
+ */
668
+ const speak = async (message, options = {}) => {
669
+ const activeConfig = { ...config, ...options };
670
+ if (!activeConfig.enableSound) return false;
671
+
672
+ if (activeConfig.enableTTS) {
673
+ let success = false;
674
+ const engine = activeConfig.ttsEngine || 'elevenlabs';
675
+
676
+ if (engine === 'openai') {
677
+ success = await speakWithOpenAI(message);
678
+ if (!success) success = await speakWithEdgeTTS(message);
679
+ if (!success) success = await speakWithSAPI(message);
680
+ if (!success) success = await speakWithSay(message); // macOS fallback
681
+ } else if (engine === 'elevenlabs') {
682
+ success = await speakWithElevenLabs(message);
683
+ if (!success) success = await speakWithEdgeTTS(message);
684
+ if (!success) success = await speakWithSAPI(message);
685
+ if (!success) success = await speakWithSay(message); // macOS fallback
686
+ } else if (engine === 'edge') {
687
+ success = await speakWithEdgeTTS(message);
688
+ if (!success) success = await speakWithSAPI(message);
689
+ if (!success) success = await speakWithSay(message); // macOS fallback
690
+ } else if (engine === 'sapi') {
691
+ success = await speakWithSAPI(message);
692
+ if (!success) success = await speakWithSay(message);
693
+ }
694
+
695
+ if (success) return true;
696
+ }
697
+
698
+ if (activeConfig.fallbackSound) {
699
+ const soundPath = path.isAbsolute(activeConfig.fallbackSound)
700
+ ? activeConfig.fallbackSound
701
+ : path.join(getConfigDir(), activeConfig.fallbackSound);
702
+
703
+ await playAudioFile(soundPath, activeConfig.loops || 1);
704
+ }
705
+ return false;
706
+ };
671
707
 
672
708
  return {
673
709
  speak,