openvoiceui 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/.env.example +104 -0
  2. package/Dockerfile +30 -0
  3. package/LICENSE +21 -0
  4. package/README.md +638 -0
  5. package/SETUP.md +360 -0
  6. package/app.py +232 -0
  7. package/auto-approve-devices.js +111 -0
  8. package/cli/index.js +372 -0
  9. package/config/__init__.py +4 -0
  10. package/config/default.yaml +43 -0
  11. package/config/flags.yaml +67 -0
  12. package/config/loader.py +203 -0
  13. package/config/providers.yaml +71 -0
  14. package/config/speech_normalization.yaml +182 -0
  15. package/config/theme.json +4 -0
  16. package/data/greetings.json +25 -0
  17. package/default-pages/ai-image-creator.html +915 -0
  18. package/default-pages/bulk-image-uploader.html +492 -0
  19. package/default-pages/desktop.html +2865 -0
  20. package/default-pages/file-explorer.html +854 -0
  21. package/default-pages/interactive-map.html +655 -0
  22. package/default-pages/style-guide.html +1005 -0
  23. package/default-pages/website-setup.html +1623 -0
  24. package/deploy/openclaw/Dockerfile +46 -0
  25. package/deploy/openvoiceui.service +30 -0
  26. package/deploy/setup-nginx.sh +50 -0
  27. package/deploy/setup-sudo.sh +306 -0
  28. package/deploy/skill-runner/Dockerfile +19 -0
  29. package/deploy/skill-runner/requirements.txt +14 -0
  30. package/deploy/skill-runner/server.py +269 -0
  31. package/deploy/supertonic/Dockerfile +22 -0
  32. package/deploy/supertonic/server.py +79 -0
  33. package/docker-compose.pinokio.yml +11 -0
  34. package/docker-compose.yml +59 -0
  35. package/greetings.json +25 -0
  36. package/index.html +65 -0
  37. package/inject-device-identity.js +142 -0
  38. package/package.json +82 -0
  39. package/profiles/default.json +114 -0
  40. package/profiles/manager.py +354 -0
  41. package/profiles/schema.json +337 -0
  42. package/prompts/voice-system-prompt.md +149 -0
  43. package/providers/__init__.py +39 -0
  44. package/providers/base.py +63 -0
  45. package/providers/llm/__init__.py +12 -0
  46. package/providers/llm/base.py +71 -0
  47. package/providers/llm/clawdbot_provider.py +112 -0
  48. package/providers/llm/zai_provider.py +115 -0
  49. package/providers/registry.py +320 -0
  50. package/providers/stt/__init__.py +12 -0
  51. package/providers/stt/base.py +58 -0
  52. package/providers/stt/webspeech_provider.py +49 -0
  53. package/providers/stt/whisper_provider.py +100 -0
  54. package/providers/tts/__init__.py +20 -0
  55. package/providers/tts/base.py +91 -0
  56. package/providers/tts/groq_provider.py +74 -0
  57. package/providers/tts/supertonic_provider.py +72 -0
  58. package/requirements.txt +38 -0
  59. package/routes/__init__.py +10 -0
  60. package/routes/admin.py +515 -0
  61. package/routes/canvas.py +1315 -0
  62. package/routes/chat.py +51 -0
  63. package/routes/conversation.py +2158 -0
  64. package/routes/elevenlabs_hybrid.py +306 -0
  65. package/routes/greetings.py +98 -0
  66. package/routes/icons.py +279 -0
  67. package/routes/image_gen.py +364 -0
  68. package/routes/instructions.py +190 -0
  69. package/routes/music.py +838 -0
  70. package/routes/onboarding.py +43 -0
  71. package/routes/pi.py +62 -0
  72. package/routes/profiles.py +215 -0
  73. package/routes/report_issue.py +68 -0
  74. package/routes/static_files.py +533 -0
  75. package/routes/suno.py +664 -0
  76. package/routes/theme.py +81 -0
  77. package/routes/transcripts.py +199 -0
  78. package/routes/vision.py +348 -0
  79. package/routes/workspace.py +288 -0
  80. package/server.py +1510 -0
  81. package/services/__init__.py +1 -0
  82. package/services/auth.py +143 -0
  83. package/services/canvas_versioning.py +239 -0
  84. package/services/db_pool.py +107 -0
  85. package/services/gateway.py +16 -0
  86. package/services/gateway_manager.py +333 -0
  87. package/services/gateways/__init__.py +12 -0
  88. package/services/gateways/base.py +110 -0
  89. package/services/gateways/compat.py +264 -0
  90. package/services/gateways/openclaw.py +1134 -0
  91. package/services/health.py +100 -0
  92. package/services/memory_client.py +455 -0
  93. package/services/paths.py +26 -0
  94. package/services/speech_normalizer.py +285 -0
  95. package/services/tts.py +270 -0
  96. package/setup-config.js +262 -0
  97. package/sounds/air_horn.mp3 +0 -0
  98. package/sounds/bruh.mp3 +0 -0
  99. package/sounds/crowd_cheer.mp3 +0 -0
  100. package/sounds/gunshot.mp3 +0 -0
  101. package/sounds/impact.mp3 +0 -0
  102. package/sounds/lets_go.mp3 +0 -0
  103. package/sounds/record_stop.mp3 +0 -0
  104. package/sounds/rewind.mp3 +0 -0
  105. package/sounds/sad_trombone.mp3 +0 -0
  106. package/sounds/scratch_long.mp3 +0 -0
  107. package/sounds/yeah.mp3 +0 -0
  108. package/src/adapters/ClawdBotAdapter.js +264 -0
  109. package/src/adapters/_template.js +133 -0
  110. package/src/adapters/elevenlabs-classic.js +841 -0
  111. package/src/adapters/elevenlabs-hybrid.js +812 -0
  112. package/src/adapters/hume-evi.js +676 -0
  113. package/src/admin.html +1339 -0
  114. package/src/app.js +8802 -0
  115. package/src/core/Config.js +173 -0
  116. package/src/core/EmotionEngine.js +307 -0
  117. package/src/core/EventBridge.js +180 -0
  118. package/src/core/EventBus.js +117 -0
  119. package/src/core/VoiceSession.js +607 -0
  120. package/src/face/BaseFace.js +259 -0
  121. package/src/face/EyeFace.js +208 -0
  122. package/src/face/HaloSmokeFace.js +509 -0
  123. package/src/face/manifest.json +27 -0
  124. package/src/face/previews/eyes.svg +16 -0
  125. package/src/face/previews/orb.svg +29 -0
  126. package/src/features/MusicPlayer.js +620 -0
  127. package/src/features/Soundboard.js +128 -0
  128. package/src/providers/DeepgramSTT.js +472 -0
  129. package/src/providers/DeepgramStreamingSTT.js +766 -0
  130. package/src/providers/GroqSTT.js +559 -0
  131. package/src/providers/TTSPlayer.js +323 -0
  132. package/src/providers/WebSpeechSTT.js +479 -0
  133. package/src/providers/tts/BaseTTSProvider.js +81 -0
  134. package/src/providers/tts/HumeProvider.js +77 -0
  135. package/src/providers/tts/SupertonicProvider.js +174 -0
  136. package/src/providers/tts/index.js +140 -0
  137. package/src/shell/adapter-registry.js +154 -0
  138. package/src/shell/caller-bridge.js +35 -0
  139. package/src/shell/camera-bridge.js +28 -0
  140. package/src/shell/canvas-bridge.js +32 -0
  141. package/src/shell/commercial-bridge.js +44 -0
  142. package/src/shell/face-bridge.js +44 -0
  143. package/src/shell/music-bridge.js +60 -0
  144. package/src/shell/orchestrator.js +233 -0
  145. package/src/shell/profile-discovery.js +303 -0
  146. package/src/shell/sounds-bridge.js +28 -0
  147. package/src/shell/transcript-bridge.js +61 -0
  148. package/src/shell/waveform-bridge.js +33 -0
  149. package/src/styles/base.css +2862 -0
  150. package/src/styles/face.css +417 -0
  151. package/src/styles/pi-overrides.css +89 -0
  152. package/src/styles/theme-dark.css +67 -0
  153. package/src/test-tts.html +175 -0
  154. package/src/ui/AppShell.js +544 -0
  155. package/src/ui/ProfileSwitcher.js +228 -0
  156. package/src/ui/SessionControl.js +240 -0
  157. package/src/ui/face/FacePicker.js +195 -0
  158. package/src/ui/face/FaceRenderer.js +309 -0
  159. package/src/ui/settings/PlaylistEditor.js +366 -0
  160. package/src/ui/settings/SettingsPanel.css +684 -0
  161. package/src/ui/settings/SettingsPanel.js +419 -0
  162. package/src/ui/settings/TTSVoicePreview.js +210 -0
  163. package/src/ui/themes/ThemeManager.js +213 -0
  164. package/src/ui/visualizers/BaseVisualizer.js +29 -0
  165. package/src/ui/visualizers/PartyFXVisualizer.css +291 -0
  166. package/src/ui/visualizers/PartyFXVisualizer.js +637 -0
  167. package/static/emulators/jsdos/js-dos.css +1 -0
  168. package/static/emulators/jsdos/js-dos.js +22 -0
  169. package/static/favicon.svg +55 -0
  170. package/static/icons/apple-touch-icon.png +0 -0
  171. package/static/icons/favicon-32.png +0 -0
  172. package/static/icons/icon-192.png +0 -0
  173. package/static/icons/icon-512.png +0 -0
  174. package/static/install.html +449 -0
  175. package/static/manifest.json +26 -0
  176. package/static/sw.js +21 -0
  177. package/tts_providers/__init__.py +136 -0
  178. package/tts_providers/base_provider.py +319 -0
  179. package/tts_providers/groq_provider.py +155 -0
  180. package/tts_providers/hume_provider.py +226 -0
  181. package/tts_providers/providers_config.json +119 -0
  182. package/tts_providers/qwen3_provider.py +371 -0
  183. package/tts_providers/resemble_provider.py +315 -0
  184. package/tts_providers/supertonic_provider.py +557 -0
  185. package/tts_providers/supertonic_tts.py +399 -0
@@ -0,0 +1,479 @@
1
+ /**
2
+ * WebSpeechSTT — Browser-native speech recognition provider (Web Speech API)
3
+ * Free, no API keys needed.
4
+ *
5
+ * Usage:
6
+ * import { WebSpeechSTT, WakeWordDetector } from './WebSpeechSTT.js';
7
+ *
8
+ * const stt = new WebSpeechSTT();
9
+ * stt.onResult = (text) => console.log('Heard:', text);
10
+ * await stt.start();
11
+ */
12
+
13
+ // Detect iOS — affects mic stream lifetime and recognition restart timing
14
+ const _isIOS = /iPad|iPhone|iPod/.test(navigator.userAgent) && !window.MSStream;
15
+
16
+ // Post real STT errors to the server so session monitoring can track them.
17
+ // no-speech and aborted are normal Chrome behaviour — don't report those.
18
+ function _reportSTTError(error, message, source = 'stt') {
19
+ try {
20
+ fetch('/api/stt-events', {
21
+ method: 'POST',
22
+ headers: { 'Content-Type': 'application/json' },
23
+ body: JSON.stringify({ error, message, provider: 'webspeech', source }),
24
+ }).catch(() => {}); // fire-and-forget, never block STT
25
+ } catch (_) {}
26
+ }
27
+
28
+ // ===== WEB SPEECH STT =====
29
+ // Browser-native speech recognition (free, no API keys needed)
30
+ class WebSpeechSTT {
31
+ constructor() {
32
+ this.recognition = null;
33
+ this.isListening = false;
34
+ this.onResult = null;
35
+ this.onError = null;
36
+ this.onListenFinal = null; // Listen panel hook — called with each final transcript
37
+ this.onInterim = null; // Listen panel hook — interim text
38
+
39
+ // Silence detection for continuous listening
40
+ this.silenceTimer = null;
41
+ this.silenceDelayMs = 3500; // 3.5s — 3s was cutting people off mid-sentence
42
+ this.accumulatedText = '';
43
+ this.isProcessing = false;
44
+
45
+ // PTT support
46
+ this._micMuted = false;
47
+ this._pttHolding = false;
48
+
49
+ // Keep mic stream alive during active listening (critical on iOS —
50
+ // releasing and re-acquiring the stream can re-trigger permission prompts)
51
+ this._micStream = null;
52
+
53
+ // Store constructor ref — recognition instance is created on first start(),
54
+ // NOT in constructor. Having two SpeechRecognition instances (even if only
55
+ // one is started) causes Chrome to route audio incorrectly, breaking wake word.
56
+ this._SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
57
+ if (!this._SpeechRecognition) {
58
+ console.warn('Web Speech API not supported in this browser');
59
+ }
60
+ }
61
+
62
+ // Create the recognition instance on first use and wire up all handlers.
63
+ // Called once from start(), then the instance persists forever.
64
+ // Monkey-patches in app.js poll for stt.recognition and apply within 200ms.
65
+ _ensureRecognition() {
66
+ if (this.recognition) return true;
67
+ if (!this._SpeechRecognition) return false;
68
+
69
+ this.recognition = new this._SpeechRecognition();
70
+ this.recognition.continuous = true;
71
+ this.recognition.interimResults = true;
72
+ this.recognition.lang = 'en-US';
73
+ this.recognition.maxAlternatives = 1;
74
+
75
+ this.recognition.onresult = (event) => {
76
+ if (this.isProcessing) return;
77
+ if (this._micMuted) return; // PTT mode — mic should be silent
78
+
79
+ // ANY result (interim or final) means the user is still speaking.
80
+ // Reset the silence timer on every event so we never cut off mid-speech.
81
+ if (this.silenceTimer) {
82
+ clearTimeout(this.silenceTimer);
83
+ this.silenceTimer = null;
84
+ }
85
+
86
+ let finalTranscript = '';
87
+ for (let i = event.resultIndex; i < event.results.length; i++) {
88
+ if (event.results[i].isFinal) {
89
+ finalTranscript += event.results[i][0].transcript;
90
+ }
91
+ }
92
+
93
+ if (finalTranscript.trim()) {
94
+ // APPEND — user can speak across multiple Chrome final results
95
+ this.accumulatedText = this.accumulatedText
96
+ ? this.accumulatedText + ' ' + finalTranscript.trim()
97
+ : finalTranscript.trim();
98
+ console.log('STT Final:', finalTranscript, '| Accumulated:', this.accumulatedText);
99
+ // Listen panel hook
100
+ if (this.onListenFinal) this.onListenFinal(finalTranscript.trim());
101
+ }
102
+
103
+ // Start/restart silence timer — only fires when Chrome stops sending ANY results
104
+ if (this.accumulatedText) {
105
+ this.silenceTimer = setTimeout(() => {
106
+ const text = this.accumulatedText.trim();
107
+ // Filter out garbage: punctuation-only, single words under 3 chars
108
+ const meaningful = text.replace(/[^a-zA-Z0-9]/g, '');
109
+ if (text && meaningful.length >= 2 && !this.isProcessing) {
110
+ console.log('Sending to AI:', text);
111
+ this.isProcessing = true;
112
+ if (this.onResult) this.onResult(text);
113
+ this.accumulatedText = '';
114
+ } else if (text) {
115
+ console.log('STT filtered garbage:', text);
116
+ this.accumulatedText = '';
117
+ }
118
+ }, this.silenceDelayMs);
119
+ }
120
+ };
121
+
122
+ this.recognition.onerror = (event) => {
123
+ if (event.error === 'no-speech' || event.error === 'aborted') {
124
+ console.log('STT:', event.error, '(normal, will auto-restart)');
125
+ return;
126
+ }
127
+ if (event.error === 'audio-capture') {
128
+ console.error('STT: audio-capture — microphone hardware unavailable');
129
+ _reportSTTError('audio-capture', 'Microphone hardware unavailable', 'stt');
130
+ if (this.onError) this.onError('audio-capture');
131
+ return;
132
+ }
133
+ console.error('STT Error:', event.error);
134
+ _reportSTTError(event.error, `STT recognition error: ${event.error}`, 'stt');
135
+ if (this.onError) this.onError(event.error);
136
+ };
137
+
138
+ this.recognition.onend = () => {
139
+ if (this.isListening && !this.isProcessing && !this._micMuted) {
140
+ const restartDelay = _isIOS ? 500 : 300;
141
+ setTimeout(() => {
142
+ if (this.isListening && !this.isProcessing && !this._micMuted) {
143
+ try {
144
+ this.recognition.start();
145
+ } catch (e) {
146
+ // Already started
147
+ }
148
+ }
149
+ }, restartDelay);
150
+ }
151
+ };
152
+
153
+ console.log('STT: SpeechRecognition instance created');
154
+ return true;
155
+ }
156
+
157
+ isSupported() {
158
+ return !!this._SpeechRecognition;
159
+ }
160
+
161
+ async start() {
162
+ if (this._micMuted) return false;
163
+ if (!this._ensureRecognition()) {
164
+ console.error('Speech recognition not supported');
165
+ return false;
166
+ }
167
+
168
+ // Request mic permission and keep the stream alive.
169
+ try {
170
+ if (!this._micStream) {
171
+ this._micStream = await navigator.mediaDevices.getUserMedia({ audio: true });
172
+ }
173
+ } catch (e) {
174
+ console.error('Mic access failed:', e.name, e.message);
175
+ if (e.name === 'NotFoundError' || e.name === 'DevicesNotFoundError') {
176
+ if (this.onError) this.onError('no-device');
177
+ } else {
178
+ if (this.onError) this.onError('not-allowed');
179
+ }
180
+ return false;
181
+ }
182
+
183
+ try {
184
+ this.isListening = true;
185
+ this.recognition.start();
186
+ console.log('STT started');
187
+ return true;
188
+ } catch (e) {
189
+ console.error('Failed to start STT:', e);
190
+ this.isListening = false;
191
+ return false;
192
+ }
193
+ }
194
+
195
+ stop() {
196
+ if (this.silenceTimer) {
197
+ clearTimeout(this.silenceTimer);
198
+ this.silenceTimer = null;
199
+ }
200
+ if (this.recognition) {
201
+ this.isListening = false;
202
+ this.isProcessing = false;
203
+ this._micMuted = false;
204
+ this._pttHolding = false;
205
+ this.recognition.stop();
206
+ console.log('STT stopped');
207
+ }
208
+ // Release the mic stream when fully stopped
209
+ if (this._micStream) {
210
+ this._micStream.getTracks().forEach(t => t.stop());
211
+ this._micStream = null;
212
+ }
213
+ }
214
+
215
+ resetProcessing() {
216
+ this.isProcessing = false;
217
+ this.accumulatedText = '';
218
+ }
219
+
220
+ /** Alias for mute() — VoiceConversation calls pause() during greeting. */
221
+ pause() {
222
+ this.mute();
223
+ }
224
+
225
+ /**
226
+ * Mute STT immediately — called when TTS starts speaking.
227
+ * Sets isProcessing=true so onresult ignores all incoming audio,
228
+ * and clears any pending silence timer so queued echo text is discarded.
229
+ * onend will not restart the engine while muted, stopping the abort loop.
230
+ */
231
+ mute() {
232
+ this.isProcessing = true;
233
+ if (this.silenceTimer) {
234
+ clearTimeout(this.silenceTimer);
235
+ this.silenceTimer = null;
236
+ }
237
+ this.accumulatedText = '';
238
+ }
239
+
240
+ /**
241
+ * Resume STT after TTS finishes — clears mute flag and explicitly
242
+ * restarts the recognition engine (which may have stopped during mute).
243
+ * Called by VoiceSession._resumeListening() after the settling delay.
244
+ */
245
+ resume() {
246
+ this.isProcessing = false;
247
+ this.accumulatedText = '';
248
+ if (this.silenceTimer) {
249
+ clearTimeout(this.silenceTimer);
250
+ this.silenceTimer = null;
251
+ }
252
+ if (this.isListening && !this._micMuted) {
253
+ try {
254
+ this.recognition.start();
255
+ } catch (e) {
256
+ // Already running — fine
257
+ }
258
+ }
259
+ }
260
+
261
+ // --- PTT helpers (called from PTT code in app.js) ---
262
+
263
+ /**
264
+ * PTT activate — start listening for push-to-talk.
265
+ * Called when user presses the PTT button.
266
+ */
267
+ pttActivate() {
268
+ this._pttHolding = true;
269
+ this._micMuted = false;
270
+ this.isProcessing = false;
271
+ this.accumulatedText = '';
272
+ if (this.silenceTimer) { clearTimeout(this.silenceTimer); this.silenceTimer = null; }
273
+
274
+ // Start recognition fresh
275
+ if (!this._ensureRecognition()) return;
276
+ try {
277
+ this.recognition.start();
278
+ } catch (e) {
279
+ // Already running — fine
280
+ }
281
+ }
282
+
283
+ /**
284
+ * PTT release — stop listening and force-send transcript.
285
+ * Called when user releases the PTT button.
286
+ */
287
+ pttRelease() {
288
+ this._pttHolding = false;
289
+ this._micMuted = true;
290
+ if (this.silenceTimer) { clearTimeout(this.silenceTimer); this.silenceTimer = null; }
291
+
292
+ // Check if Chrome already finalized text during the hold
293
+ const immediate = this.accumulatedText.trim();
294
+ if (immediate && this.onResult) {
295
+ console.log('PTT release — sending:', immediate);
296
+ this.isProcessing = true;
297
+ this.onResult(immediate);
298
+ this.accumulatedText = '';
299
+ }
300
+
301
+ // Stop recognition — Chrome finalizes any pending speech as isFinal
302
+ // (muted state prevents onend restart)
303
+ if (this.recognition) {
304
+ try { this.recognition.stop(); } catch (e) {}
305
+ }
306
+
307
+ // If nothing was finalized during hold, wait for Chrome's post-stop results.
308
+ // Chrome fires onresult with isFinal=true when recognition.stop() is called,
309
+ // but the event is async. Give it time to arrive, then send.
310
+ if (!immediate) {
311
+ setTimeout(() => {
312
+ if (this.silenceTimer) { clearTimeout(this.silenceTimer); this.silenceTimer = null; }
313
+ const text = this.accumulatedText.trim();
314
+ if (text && this.onResult) {
315
+ console.log('PTT release (delayed) — sending:', text);
316
+ this.isProcessing = true;
317
+ this.onResult(text);
318
+ }
319
+ this.accumulatedText = '';
320
+ }, 400);
321
+ }
322
+ }
323
+
324
+ /**
325
+ * PTT mute — stop recognition and discard.
326
+ * Called when PTT mode is toggled ON (mic off by default).
327
+ */
328
+ pttMute() {
329
+ this._pttHolding = false;
330
+ this._micMuted = true;
331
+ this.isProcessing = true;
332
+ this.accumulatedText = '';
333
+ if (this.silenceTimer) { clearTimeout(this.silenceTimer); this.silenceTimer = null; }
334
+
335
+ if (this.recognition) {
336
+ try { this.recognition.stop(); } catch (e) {}
337
+ }
338
+ }
339
+
340
+ /**
341
+ * PTT unmute — resume continuous listening.
342
+ * Called when PTT mode is toggled OFF.
343
+ */
344
+ pttUnmute() {
345
+ this._micMuted = false;
346
+ this._pttHolding = false;
347
+ this.isProcessing = false;
348
+ this.accumulatedText = '';
349
+
350
+ if (this.isListening && this.recognition) {
351
+ try { this.recognition.start(); } catch (e) {}
352
+ }
353
+ }
354
+ }
355
+
356
+ // ===== WAKE WORD DETECTOR =====
357
+ // Listens for wake words in passive mode.
358
+ // Uses getUserMedia() before recognition.start() — without an active mic stream,
359
+ // Chrome's SpeechRecognition immediately aborts every cycle and never captures speech.
360
+ class WakeWordDetector {
361
+ constructor() {
362
+ this.recognition = null;
363
+ this.isListening = false;
364
+ this.onWakeWordDetected = null;
365
+ this._micPermissionGranted = false;
366
+
367
+ // Wake words to listen for (overridden per-profile via applyProfile)
368
+ this.wakeWords = ['wake up'];
369
+
370
+ // Check browser support
371
+ const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
372
+ if (!SpeechRecognition) {
373
+ console.warn('Web Speech API not supported in this browser - wake word detection unavailable');
374
+ return;
375
+ }
376
+
377
+ this.recognition = new SpeechRecognition();
378
+ this.recognition.continuous = true;
379
+ this.recognition.interimResults = true; // Must be true — Chrome produces nothing without it
380
+ this.recognition.lang = 'en-US';
381
+
382
+ this.recognition.onresult = (event) => {
383
+ // Check ALL results (interim + final) for wake words
384
+ for (let i = event.resultIndex; i < event.results.length; i++) {
385
+ const transcript = event.results[i][0].transcript.toLowerCase();
386
+ console.log(`Wake word detector heard (${event.results[i].isFinal ? 'final' : 'interim'}):`, transcript);
387
+
388
+ if (this.wakeWords.some(wakeWord => transcript.includes(wakeWord))) {
389
+ console.log('Wake word detected!');
390
+ if (this.onWakeWordDetected) {
391
+ this.onWakeWordDetected();
392
+ }
393
+ return; // Stop checking once detected
394
+ }
395
+ }
396
+ };
397
+
398
+ this.recognition.onerror = (event) => {
399
+ if (event.error === 'no-speech' || event.error === 'aborted') {
400
+ return; // Normal during passive listening
401
+ }
402
+ console.warn('Wake word detector error:', event.error);
403
+ _reportSTTError(event.error, `Wake word error: ${event.error}`, 'wake_word');
404
+ };
405
+
406
+ this.recognition.onend = () => {
407
+ // Auto-restart if we're supposed to be listening.
408
+ // 300ms delay gives Chrome time to release the speech service connection.
409
+ if (this.isListening) {
410
+ setTimeout(() => {
411
+ if (this.isListening) {
412
+ try {
413
+ this.recognition.start();
414
+ } catch (e) {
415
+ // Already started
416
+ }
417
+ }
418
+ }, 300);
419
+ }
420
+ };
421
+ }
422
+
423
+ isSupported() {
424
+ return this.recognition !== null;
425
+ }
426
+
427
+ async start() {
428
+ if (!this.recognition) {
429
+ console.error('Speech recognition not supported');
430
+ return false;
431
+ }
432
+
433
+ // Ensure mic permission is granted before recognition.start().
434
+ // Without this, Chrome aborts every cycle. We release the stream
435
+ // immediately — we just need the permission grant, not the raw audio.
436
+ // Holding the stream can starve SpeechRecognition of mic access.
437
+ if (!this._micPermissionGranted) {
438
+ try {
439
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
440
+ stream.getTracks().forEach(t => t.stop()); // Release immediately
441
+ this._micPermissionGranted = true;
442
+ console.log('Wake word: mic permission granted');
443
+ } catch (e) {
444
+ console.error('Wake word: mic access failed:', e.name, e.message);
445
+ return false;
446
+ }
447
+ }
448
+
449
+ try {
450
+ this.isListening = true;
451
+ this.recognition.start();
452
+ console.log('Wake word detector started');
453
+ return true;
454
+ } catch (e) {
455
+ console.error('Failed to start wake word detector:', e);
456
+ this.isListening = false;
457
+ return false;
458
+ }
459
+ }
460
+
461
+ stop() {
462
+ if (this.recognition) {
463
+ this.isListening = false;
464
+ this.recognition.stop();
465
+ console.log('Wake word detector stopped');
466
+ }
467
+ }
468
+
469
+ async toggle() {
470
+ if (this.isListening) {
471
+ this.stop();
472
+ return false;
473
+ } else {
474
+ return await this.start();
475
+ }
476
+ }
477
+ }
478
+
479
+ export { WebSpeechSTT, WakeWordDetector };
@@ -0,0 +1,81 @@
1
+ /**
2
+ * Base TTS Provider Interface
3
+ * All TTS providers must implement these methods
4
+ */
5
+ export class BaseTTSProvider {
6
+ constructor(config = {}) {
7
+ this.config = config;
8
+ this.name = 'base';
9
+ this.voices = [];
10
+ this.currentVoice = null;
11
+ this.isPlaying = false;
12
+ }
13
+
14
+ /**
15
+ * Initialize the provider
16
+ * @returns {Promise<boolean>} Success
17
+ */
18
+ async init() {
19
+ throw new Error('init() must be implemented');
20
+ }
21
+
22
+ /**
23
+ * Get available voices
24
+ * @returns {string[]} Array of voice names
25
+ */
26
+ getVoices() {
27
+ return this.voices;
28
+ }
29
+
30
+ /**
31
+ * Set the current voice
32
+ * @param {string} voiceName
33
+ */
34
+ setVoice(voiceName) {
35
+ if (this.voices.includes(voiceName)) {
36
+ this.currentVoice = voiceName;
37
+ }
38
+ }
39
+
40
+ /**
41
+ * Synthesize speech from text
42
+ * @param {string} text - Text to speak
43
+ * @param {object} options - Optional parameters
44
+ * @returns {Promise<AudioBuffer|HTMLAudioElement|null>}
45
+ */
46
+ async speak(text, options = {}) {
47
+ throw new Error('speak() must be implemented');
48
+ }
49
+
50
+ /**
51
+ * Stop current playback
52
+ */
53
+ stop() {
54
+ this.isPlaying = false;
55
+ }
56
+
57
+ /**
58
+ * Check if provider is ready
59
+ * @returns {boolean}
60
+ */
61
+ isReady() {
62
+ return false;
63
+ }
64
+
65
+ /**
66
+ * Get cost per minute (0 = free)
67
+ * @returns {number}
68
+ */
69
+ getCostPerMinute() {
70
+ return 0;
71
+ }
72
+
73
+ /**
74
+ * Cleanup resources
75
+ */
76
+ destroy() {
77
+ this.stop();
78
+ }
79
+ }
80
+
81
+ export default BaseTTSProvider;
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Hume TTS Provider
3
+ * Hume EVI handles TTS internally - this is a passthrough provider
4
+ * that works with HumeAdapter
5
+ */
6
+ import { BaseTTSProvider } from './BaseTTSProvider.js';
7
+
8
+ export class HumeProvider extends BaseTTSProvider {
9
+ constructor(config = {}) {
10
+ super(config);
11
+ this.name = 'hume';
12
+ this.serverUrl = config.serverUrl || '';
13
+ this.configId = config.hume?.configId || '';
14
+ this.voiceId = config.hume?.voiceId || '';
15
+ this.voiceName = config.hume?.voiceName || 'Default';
16
+ this.voices = [this.voiceName]; // Hume uses configured voice
17
+ this.currentVoice = this.voiceName;
18
+
19
+ // Hume handles its own audio - these are for external callbacks
20
+ this.onSpeaking = null;
21
+ this.onListening = null;
22
+ }
23
+
24
+ async init() {
25
+ console.log('[Hume] Initializing...');
26
+ // Get config info from backend
27
+ try {
28
+ const response = await fetch(`${this.serverUrl}/api/hume/token`);
29
+ if (response.ok) {
30
+ const data = await response.json();
31
+ if (data.config_id) {
32
+ this.configId = data.config_id;
33
+ }
34
+ }
35
+ } catch (error) {
36
+ console.warn('[Hume] Could not fetch config:', error);
37
+ }
38
+
39
+ console.log('[Hume] Ready');
40
+ return true;
41
+ }
42
+
43
+ /**
44
+ * Hume handles TTS internally via WebSocket
45
+ * This method is for standalone TTS calls (not via EVI)
46
+ */
47
+ async speak(text, options = {}) {
48
+ console.warn('[Hume] speak() called - Hume normally handles TTS internally via EVI');
49
+ // Hume TTS is handled by the EVI WebSocket connection
50
+ // This is here for interface compatibility
51
+ return false;
52
+ }
53
+
54
+ getVoices() {
55
+ return this.voices;
56
+ }
57
+
58
+ setVoice(voiceName) {
59
+ // Hume voice is configured on the backend
60
+ console.log('[Hume] Voice configured on backend:', voiceName);
61
+ this.currentVoice = voiceName;
62
+ }
63
+
64
+ isReady() {
65
+ return true;
66
+ }
67
+
68
+ getCostPerMinute() {
69
+ return 0.032; // $0.032/minute
70
+ }
71
+
72
+ destroy() {
73
+ // Nothing to clean up
74
+ }
75
+ }
76
+
77
+ export default HumeProvider;