openvoiceui 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/.env.example +104 -0
  2. package/Dockerfile +30 -0
  3. package/LICENSE +21 -0
  4. package/README.md +638 -0
  5. package/SETUP.md +360 -0
  6. package/app.py +232 -0
  7. package/auto-approve-devices.js +111 -0
  8. package/cli/index.js +372 -0
  9. package/config/__init__.py +4 -0
  10. package/config/default.yaml +43 -0
  11. package/config/flags.yaml +67 -0
  12. package/config/loader.py +203 -0
  13. package/config/providers.yaml +71 -0
  14. package/config/speech_normalization.yaml +182 -0
  15. package/config/theme.json +4 -0
  16. package/data/greetings.json +25 -0
  17. package/default-pages/ai-image-creator.html +915 -0
  18. package/default-pages/bulk-image-uploader.html +492 -0
  19. package/default-pages/desktop.html +2865 -0
  20. package/default-pages/file-explorer.html +854 -0
  21. package/default-pages/interactive-map.html +655 -0
  22. package/default-pages/style-guide.html +1005 -0
  23. package/default-pages/website-setup.html +1623 -0
  24. package/deploy/openclaw/Dockerfile +46 -0
  25. package/deploy/openvoiceui.service +30 -0
  26. package/deploy/setup-nginx.sh +50 -0
  27. package/deploy/setup-sudo.sh +306 -0
  28. package/deploy/skill-runner/Dockerfile +19 -0
  29. package/deploy/skill-runner/requirements.txt +14 -0
  30. package/deploy/skill-runner/server.py +269 -0
  31. package/deploy/supertonic/Dockerfile +22 -0
  32. package/deploy/supertonic/server.py +79 -0
  33. package/docker-compose.pinokio.yml +11 -0
  34. package/docker-compose.yml +59 -0
  35. package/greetings.json +25 -0
  36. package/index.html +65 -0
  37. package/inject-device-identity.js +142 -0
  38. package/package.json +82 -0
  39. package/profiles/default.json +114 -0
  40. package/profiles/manager.py +354 -0
  41. package/profiles/schema.json +337 -0
  42. package/prompts/voice-system-prompt.md +149 -0
  43. package/providers/__init__.py +39 -0
  44. package/providers/base.py +63 -0
  45. package/providers/llm/__init__.py +12 -0
  46. package/providers/llm/base.py +71 -0
  47. package/providers/llm/clawdbot_provider.py +112 -0
  48. package/providers/llm/zai_provider.py +115 -0
  49. package/providers/registry.py +320 -0
  50. package/providers/stt/__init__.py +12 -0
  51. package/providers/stt/base.py +58 -0
  52. package/providers/stt/webspeech_provider.py +49 -0
  53. package/providers/stt/whisper_provider.py +100 -0
  54. package/providers/tts/__init__.py +20 -0
  55. package/providers/tts/base.py +91 -0
  56. package/providers/tts/groq_provider.py +74 -0
  57. package/providers/tts/supertonic_provider.py +72 -0
  58. package/requirements.txt +38 -0
  59. package/routes/__init__.py +10 -0
  60. package/routes/admin.py +515 -0
  61. package/routes/canvas.py +1315 -0
  62. package/routes/chat.py +51 -0
  63. package/routes/conversation.py +2158 -0
  64. package/routes/elevenlabs_hybrid.py +306 -0
  65. package/routes/greetings.py +98 -0
  66. package/routes/icons.py +279 -0
  67. package/routes/image_gen.py +364 -0
  68. package/routes/instructions.py +190 -0
  69. package/routes/music.py +838 -0
  70. package/routes/onboarding.py +43 -0
  71. package/routes/pi.py +62 -0
  72. package/routes/profiles.py +215 -0
  73. package/routes/report_issue.py +68 -0
  74. package/routes/static_files.py +533 -0
  75. package/routes/suno.py +664 -0
  76. package/routes/theme.py +81 -0
  77. package/routes/transcripts.py +199 -0
  78. package/routes/vision.py +348 -0
  79. package/routes/workspace.py +288 -0
  80. package/server.py +1510 -0
  81. package/services/__init__.py +1 -0
  82. package/services/auth.py +143 -0
  83. package/services/canvas_versioning.py +239 -0
  84. package/services/db_pool.py +107 -0
  85. package/services/gateway.py +16 -0
  86. package/services/gateway_manager.py +333 -0
  87. package/services/gateways/__init__.py +12 -0
  88. package/services/gateways/base.py +110 -0
  89. package/services/gateways/compat.py +264 -0
  90. package/services/gateways/openclaw.py +1134 -0
  91. package/services/health.py +100 -0
  92. package/services/memory_client.py +455 -0
  93. package/services/paths.py +26 -0
  94. package/services/speech_normalizer.py +285 -0
  95. package/services/tts.py +270 -0
  96. package/setup-config.js +262 -0
  97. package/sounds/air_horn.mp3 +0 -0
  98. package/sounds/bruh.mp3 +0 -0
  99. package/sounds/crowd_cheer.mp3 +0 -0
  100. package/sounds/gunshot.mp3 +0 -0
  101. package/sounds/impact.mp3 +0 -0
  102. package/sounds/lets_go.mp3 +0 -0
  103. package/sounds/record_stop.mp3 +0 -0
  104. package/sounds/rewind.mp3 +0 -0
  105. package/sounds/sad_trombone.mp3 +0 -0
  106. package/sounds/scratch_long.mp3 +0 -0
  107. package/sounds/yeah.mp3 +0 -0
  108. package/src/adapters/ClawdBotAdapter.js +264 -0
  109. package/src/adapters/_template.js +133 -0
  110. package/src/adapters/elevenlabs-classic.js +841 -0
  111. package/src/adapters/elevenlabs-hybrid.js +812 -0
  112. package/src/adapters/hume-evi.js +676 -0
  113. package/src/admin.html +1339 -0
  114. package/src/app.js +8802 -0
  115. package/src/core/Config.js +173 -0
  116. package/src/core/EmotionEngine.js +307 -0
  117. package/src/core/EventBridge.js +180 -0
  118. package/src/core/EventBus.js +117 -0
  119. package/src/core/VoiceSession.js +607 -0
  120. package/src/face/BaseFace.js +259 -0
  121. package/src/face/EyeFace.js +208 -0
  122. package/src/face/HaloSmokeFace.js +509 -0
  123. package/src/face/manifest.json +27 -0
  124. package/src/face/previews/eyes.svg +16 -0
  125. package/src/face/previews/orb.svg +29 -0
  126. package/src/features/MusicPlayer.js +620 -0
  127. package/src/features/Soundboard.js +128 -0
  128. package/src/providers/DeepgramSTT.js +472 -0
  129. package/src/providers/DeepgramStreamingSTT.js +766 -0
  130. package/src/providers/GroqSTT.js +559 -0
  131. package/src/providers/TTSPlayer.js +323 -0
  132. package/src/providers/WebSpeechSTT.js +479 -0
  133. package/src/providers/tts/BaseTTSProvider.js +81 -0
  134. package/src/providers/tts/HumeProvider.js +77 -0
  135. package/src/providers/tts/SupertonicProvider.js +174 -0
  136. package/src/providers/tts/index.js +140 -0
  137. package/src/shell/adapter-registry.js +154 -0
  138. package/src/shell/caller-bridge.js +35 -0
  139. package/src/shell/camera-bridge.js +28 -0
  140. package/src/shell/canvas-bridge.js +32 -0
  141. package/src/shell/commercial-bridge.js +44 -0
  142. package/src/shell/face-bridge.js +44 -0
  143. package/src/shell/music-bridge.js +60 -0
  144. package/src/shell/orchestrator.js +233 -0
  145. package/src/shell/profile-discovery.js +303 -0
  146. package/src/shell/sounds-bridge.js +28 -0
  147. package/src/shell/transcript-bridge.js +61 -0
  148. package/src/shell/waveform-bridge.js +33 -0
  149. package/src/styles/base.css +2862 -0
  150. package/src/styles/face.css +417 -0
  151. package/src/styles/pi-overrides.css +89 -0
  152. package/src/styles/theme-dark.css +67 -0
  153. package/src/test-tts.html +175 -0
  154. package/src/ui/AppShell.js +544 -0
  155. package/src/ui/ProfileSwitcher.js +228 -0
  156. package/src/ui/SessionControl.js +240 -0
  157. package/src/ui/face/FacePicker.js +195 -0
  158. package/src/ui/face/FaceRenderer.js +309 -0
  159. package/src/ui/settings/PlaylistEditor.js +366 -0
  160. package/src/ui/settings/SettingsPanel.css +684 -0
  161. package/src/ui/settings/SettingsPanel.js +419 -0
  162. package/src/ui/settings/TTSVoicePreview.js +210 -0
  163. package/src/ui/themes/ThemeManager.js +213 -0
  164. package/src/ui/visualizers/BaseVisualizer.js +29 -0
  165. package/src/ui/visualizers/PartyFXVisualizer.css +291 -0
  166. package/src/ui/visualizers/PartyFXVisualizer.js +637 -0
  167. package/static/emulators/jsdos/js-dos.css +1 -0
  168. package/static/emulators/jsdos/js-dos.js +22 -0
  169. package/static/favicon.svg +55 -0
  170. package/static/icons/apple-touch-icon.png +0 -0
  171. package/static/icons/favicon-32.png +0 -0
  172. package/static/icons/icon-192.png +0 -0
  173. package/static/icons/icon-512.png +0 -0
  174. package/static/install.html +449 -0
  175. package/static/manifest.json +26 -0
  176. package/static/sw.js +21 -0
  177. package/tts_providers/__init__.py +136 -0
  178. package/tts_providers/base_provider.py +319 -0
  179. package/tts_providers/groq_provider.py +155 -0
  180. package/tts_providers/hume_provider.py +226 -0
  181. package/tts_providers/providers_config.json +119 -0
  182. package/tts_providers/qwen3_provider.py +371 -0
  183. package/tts_providers/resemble_provider.py +315 -0
  184. package/tts_providers/supertonic_provider.py +557 -0
  185. package/tts_providers/supertonic_tts.py +399 -0
@@ -0,0 +1,559 @@
1
+ /**
2
+ * GroqSTT — Server-side speech recognition via Groq Whisper API.
3
+ * Captures audio with MediaRecorder, uses VAD to detect speech/silence,
4
+ * sends audio chunks to /api/stt/groq for transcription.
5
+ *
6
+ * Drop-in replacement for WebSpeechSTT with built-in PTT support.
7
+ *
8
+ * Usage:
9
+ * import { GroqSTT, GroqWakeWordDetector } from './GroqSTT.js';
10
+ *
11
+ * const stt = new GroqSTT();
12
+ * stt.onResult = (text) => console.log('Heard:', text);
13
+ * await stt.start();
14
+ */
15
+
16
+ // ===== GROQ STT =====
17
+ // Server-side speech recognition via Groq Whisper API
18
+ class GroqSTT {
19
+ constructor(config = {}) {
20
+ this.serverUrl = (config.serverUrl || window.AGENT_CONFIG?.serverUrl || window.location.origin).replace(/\/$/, '');
21
+ this.isListening = false;
22
+ this.onResult = null;
23
+ this.onError = null;
24
+ this.onListenFinal = null; // Listen panel hook — called with each transcript
25
+ this.onInterim = null; // Not used (Groq has no interim results)
26
+ this.mediaRecorder = null;
27
+ this.audioChunks = [];
28
+ this.stream = null;
29
+ this.isProcessing = false;
30
+ this.accumulatedText = ''; // PTT compatibility — last transcript
31
+
32
+ // PTT support (built-in, no monkey-patching needed)
33
+ this._micMuted = false;
34
+ this._pttHolding = false;
35
+ this._muteActive = false; // Set by mute(), cleared by resume() — survives API call finally blocks
36
+
37
+ // VAD (Voice Activity Detection) settings
38
+ this.silenceTimer = null;
39
+ this.silenceDelayMs = 800; // 0.8s silence = end of speech (profile can override)
40
+ this.accumulationDelayMs = config.accumulationDelayMs || 0; // No accumulation delay — send immediately (profile can override)
41
+ this.vadThreshold = 25; // FFT average amplitude threshold (profile can override)
42
+ this.minSpeechMs = 300; // Must sustain above threshold for this long before counting as speech
43
+ this.maxRecordingMs = 45000; // 45s max before auto-chunk (profile can override)
44
+ this.maxRecordingTimer = null;
45
+ this.isSpeaking = false;
46
+ this.stoppingRecorder = false;
47
+ this.hadSpeechInChunk = false;
48
+ this._speechStartTime = 0; // When sustained speech started
49
+ this._resumedSpeechStart = 0; // When resumed speech started (for clearing silence timer)
50
+
51
+ // Audio analysis for VAD
52
+ this._audioCtx = null;
53
+ this._analyser = null;
54
+ this._vadAnimFrame = null;
55
+ this._accumulationTimer = null; // Accumulate transcripts across chunks before sending
56
+ }
57
+
58
+ isSupported() {
59
+ return !!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia);
60
+ }
61
+
62
+ async start() {
63
+ if (this.isListening) return true;
64
+ if (this._micMuted) return false;
65
+
66
+ try {
67
+ // Get mic stream (reuse existing if available)
68
+ if (!this.stream || !this.stream.active) {
69
+ this.stream = await navigator.mediaDevices.getUserMedia({ audio: true });
70
+ }
71
+
72
+ this._setupRecorder();
73
+ this._startVAD();
74
+
75
+ this.mediaRecorder.start();
76
+ this.isListening = true;
77
+ console.log('Groq STT started');
78
+ return true;
79
+ } catch (error) {
80
+ console.error('Failed to start Groq STT:', error);
81
+ if (error.name === 'NotFoundError' || error.name === 'DevicesNotFoundError') {
82
+ if (this.onError) this.onError('no-device');
83
+ } else if (error.name === 'NotAllowedError') {
84
+ if (this.onError) this.onError('not-allowed');
85
+ } else {
86
+ if (this.onError) this.onError(error);
87
+ }
88
+ return false;
89
+ }
90
+ }
91
+
92
+ _setupRecorder() {
93
+ const options = { mimeType: 'audio/webm;codecs=opus' };
94
+ this.mediaRecorder = new MediaRecorder(this.stream, options);
95
+ this.audioChunks = [];
96
+
97
+ this.mediaRecorder.ondataavailable = (event) => {
98
+ if (event.data.size > 0) {
99
+ this.audioChunks.push(event.data);
100
+ }
101
+ };
102
+
103
+ this.mediaRecorder.onstop = async () => {
104
+ // Snapshot and clear chunks immediately
105
+ const chunks = this.audioChunks;
106
+ const hadSpeech = this.hadSpeechInChunk;
107
+ this.audioChunks = [];
108
+ this.hadSpeechInChunk = false;
109
+ this.stoppingRecorder = false;
110
+
111
+ // Restart recording IMMEDIATELY to minimize the gap where audio is lost.
112
+ // The API call below runs in parallel — no words dropped at chunk boundaries.
113
+ if (this.isListening && !this._micMuted && !this._muteActive && !this._pttHolding) {
114
+ this.isSpeaking = false;
115
+ this.mediaRecorder.start();
116
+ }
117
+
118
+ if (chunks.length === 0) return;
119
+
120
+ // If muted (TTS playing), discard audio
121
+ if ((this.isProcessing || this._muteActive) && !this._pttHolding) {
122
+ return;
123
+ }
124
+
125
+ this.isProcessing = true;
126
+
127
+ // Clear timers
128
+ if (this.silenceTimer) {
129
+ clearTimeout(this.silenceTimer);
130
+ this.silenceTimer = null;
131
+ }
132
+ if (this.maxRecordingTimer) {
133
+ clearTimeout(this.maxRecordingTimer);
134
+ this.maxRecordingTimer = null;
135
+ }
136
+
137
+ const audioBlob = new Blob(chunks, { type: 'audio/webm' });
138
+
139
+ // Skip if no VAD speech detected AND audio is small — prevents Whisper
140
+ // hallucinations on silence. But if VAD missed speech (quiet mic), still
141
+ // send larger chunks and let Whisper decide.
142
+ if (!hadSpeech && audioBlob.size < 50000) {
143
+ console.log('Groq STT: skipping - no speech detected (' + audioBlob.size + ' bytes)');
144
+ this.isProcessing = false;
145
+ return;
146
+ }
147
+
148
+ try {
149
+ console.log('Groq STT: sending audio (' + audioBlob.size + ' bytes)');
150
+ const formData = new FormData();
151
+ formData.append('audio', audioBlob, 'audio.webm');
152
+
153
+ const response = await fetch(`${this.serverUrl}/api/stt/groq`, {
154
+ method: 'POST',
155
+ body: formData
156
+ });
157
+
158
+ const data = await response.json();
159
+
160
+ if (data.transcript && data.transcript.trim()) {
161
+ console.log('Groq STT transcript:', data.transcript);
162
+ if (this.onListenFinal) this.onListenFinal(data.transcript);
163
+
164
+ // PTT mode: send immediately (user released button = done talking)
165
+ if (this._micMuted) {
166
+ this.accumulatedText = data.transcript.trim();
167
+ if (this.onResult) this.onResult(this.accumulatedText);
168
+ this.accumulatedText = '';
169
+ } else {
170
+ // Listen mode: accumulate across chunks, send after silence
171
+ this.accumulatedText = this.accumulatedText
172
+ ? this.accumulatedText + ' ' + data.transcript.trim()
173
+ : data.transcript.trim();
174
+
175
+ // Clear any existing accumulation timer
176
+ if (this._accumulationTimer) {
177
+ clearTimeout(this._accumulationTimer);
178
+ this._accumulationTimer = null;
179
+ }
180
+ // Short window to merge consecutive chunks, then send
181
+ this._accumulationTimer = setTimeout(() => {
182
+ this._accumulationTimer = null;
183
+ const fullText = this.accumulatedText.trim();
184
+ if (fullText && this.onResult) {
185
+ console.log('Groq STT accumulated result:', fullText);
186
+ this.onResult(fullText);
187
+ }
188
+ this.accumulatedText = '';
189
+ }, this.accumulationDelayMs);
190
+ }
191
+ }
192
+ } catch (error) {
193
+ console.error('Groq STT error:', error);
194
+ if (this.onError) this.onError(error);
195
+ } finally {
196
+ this.isProcessing = false;
197
+ }
198
+ };
199
+ }
200
+
201
+ _startVAD() {
202
+ // Only create AudioContext once per stream
203
+ if (this._audioCtx && this._audioCtx.state !== 'closed') {
204
+ // VAD already running, just restart the animation frame loop
205
+ if (!this._vadAnimFrame) this._runVADLoop();
206
+ return;
207
+ }
208
+
209
+ this._audioCtx = new AudioContext();
210
+ const source = this._audioCtx.createMediaStreamSource(this.stream);
211
+ this._analyser = this._audioCtx.createAnalyser();
212
+ this._analyser.fftSize = 512;
213
+ source.connect(this._analyser);
214
+
215
+ this._runVADLoop();
216
+ }
217
+
218
+ _runVADLoop() {
219
+ const bufferLength = this._analyser.frequencyBinCount;
220
+ const dataArray = new Uint8Array(bufferLength);
221
+
222
+ const checkLevel = () => {
223
+ if (!this.isListening) {
224
+ this._vadAnimFrame = null;
225
+ return;
226
+ }
227
+
228
+ this._analyser.getByteFrequencyData(dataArray);
229
+ const average = dataArray.reduce((a, b) => a + b) / bufferLength;
230
+ const isSpeakingNow = average > this.vadThreshold;
231
+
232
+ // Skip VAD processing while muted (TTS playing) — prevents speaker
233
+ // audio from being detected as speech and queuing phantom transcripts
234
+ if (this._muteActive) {
235
+ this._vadAnimFrame = requestAnimationFrame(checkLevel);
236
+ return;
237
+ }
238
+
239
+ if (isSpeakingNow && !this.isSpeaking) {
240
+ // Potential speech — check minimum duration before confirming
241
+ const now = Date.now();
242
+ if (!this._speechStartTime) {
243
+ this._speechStartTime = now;
244
+ }
245
+ if (now - this._speechStartTime < this.minSpeechMs) {
246
+ // Still below minimum — don't confirm yet, just keep checking
247
+ this._vadAnimFrame = requestAnimationFrame(checkLevel);
248
+ return;
249
+ }
250
+
251
+ // Speech confirmed (sustained above threshold for minSpeechMs)
252
+ this.isSpeaking = true;
253
+ this.hadSpeechInChunk = true;
254
+ this._speechStartTime = 0;
255
+
256
+ if (this.silenceTimer) {
257
+ clearTimeout(this.silenceTimer);
258
+ this.silenceTimer = null;
259
+ }
260
+
261
+ // Max recording safety timer
262
+ if (!this.maxRecordingTimer && !this.isProcessing && !this.stoppingRecorder) {
263
+ this.maxRecordingTimer = setTimeout(() => {
264
+ this.maxRecordingTimer = null;
265
+ this.isSpeaking = false;
266
+ this.stoppingRecorder = true;
267
+ if (this.silenceTimer) {
268
+ clearTimeout(this.silenceTimer);
269
+ this.silenceTimer = null;
270
+ }
271
+ if (this.mediaRecorder && this.mediaRecorder.state === 'recording') {
272
+ this.mediaRecorder.stop();
273
+ }
274
+ }, this.maxRecordingMs);
275
+ }
276
+ } else if (isSpeakingNow && this.isSpeaking) {
277
+ // Continued speech — user still talking after a brief dip.
278
+ // Only clear silence timer after sustained speech (minSpeechMs) to
279
+ // prevent ambient noise blips from keeping the recording open forever.
280
+ const now = Date.now();
281
+ if (!this._resumedSpeechStart) {
282
+ this._resumedSpeechStart = now;
283
+ }
284
+ if (now - this._resumedSpeechStart >= this.minSpeechMs && this.silenceTimer) {
285
+ clearTimeout(this.silenceTimer);
286
+ this.silenceTimer = null;
287
+ this._resumedSpeechStart = 0;
288
+ }
289
+ } else if (!isSpeakingNow && !this.isSpeaking) {
290
+ // Below threshold and not yet confirmed — reset speech start timer
291
+ this._speechStartTime = 0;
292
+ this._resumedSpeechStart = 0;
293
+ } else if (!isSpeakingNow && this.isSpeaking && !this.isProcessing && !this.stoppingRecorder) {
294
+ // Silence after confirmed speech — start silence timer
295
+ this._resumedSpeechStart = 0;
296
+ if (!this.silenceTimer) {
297
+ this.silenceTimer = setTimeout(() => {
298
+ this.isSpeaking = false;
299
+ this.stoppingRecorder = true;
300
+ if (this.mediaRecorder && this.mediaRecorder.state === 'recording') {
301
+ this.mediaRecorder.stop();
302
+ }
303
+ }, this.silenceDelayMs);
304
+ }
305
+ }
306
+
307
+ this._vadAnimFrame = requestAnimationFrame(checkLevel);
308
+ };
309
+
310
+ this._vadAnimFrame = requestAnimationFrame(checkLevel);
311
+ }
312
+
313
+ stop() {
314
+ this.isListening = false;
315
+ this.stoppingRecorder = false;
316
+ this._micMuted = false;
317
+ this._muteActive = false;
318
+
319
+ if (this.silenceTimer) { clearTimeout(this.silenceTimer); this.silenceTimer = null; }
320
+ if (this.maxRecordingTimer) { clearTimeout(this.maxRecordingTimer); this.maxRecordingTimer = null; }
321
+ if (this._accumulationTimer) { clearTimeout(this._accumulationTimer); this._accumulationTimer = null; }
322
+ if (this._vadAnimFrame) { cancelAnimationFrame(this._vadAnimFrame); this._vadAnimFrame = null; }
323
+
324
+ if (this.mediaRecorder && this.mediaRecorder.state !== 'inactive') {
325
+ this.mediaRecorder.stop();
326
+ }
327
+
328
+ // Release mic stream
329
+ if (this.stream) {
330
+ this.stream.getTracks().forEach(track => track.stop());
331
+ this.stream = null;
332
+ }
333
+
334
+ // Close audio context
335
+ if (this._audioCtx) {
336
+ this._audioCtx.close().catch(() => {});
337
+ this._audioCtx = null;
338
+ this._analyser = null;
339
+ }
340
+
341
+ console.log('Groq STT stopped');
342
+ }
343
+
344
+ resetProcessing() {
345
+ this.isProcessing = false;
346
+ this.accumulatedText = '';
347
+ }
348
+
349
+ /** Alias for mute() — VoiceConversation calls pause() during greeting. */
350
+ pause() {
351
+ this.mute();
352
+ }
353
+
354
+ /**
355
+ * Mute STT — called when TTS starts speaking.
356
+ * Stops recording and discards any pending audio to prevent echo.
357
+ * Does NOT release the mic stream or change isListening state.
358
+ */
359
+ mute() {
360
+ this._muteActive = true;
361
+ this.isProcessing = true;
362
+ this.hadSpeechInChunk = false;
363
+ this.accumulatedText = '';
364
+ if (this.silenceTimer) {
365
+ clearTimeout(this.silenceTimer);
366
+ this.silenceTimer = null;
367
+ }
368
+ if (this.maxRecordingTimer) {
369
+ clearTimeout(this.maxRecordingTimer);
370
+ this.maxRecordingTimer = null;
371
+ }
372
+ if (this._accumulationTimer) {
373
+ clearTimeout(this._accumulationTimer);
374
+ this._accumulationTimer = null;
375
+ }
376
+ // Stop recording but keep stream alive
377
+ if (this.mediaRecorder && this.mediaRecorder.state === 'recording') {
378
+ this.mediaRecorder.stop();
379
+ }
380
+ }
381
+
382
+ /**
383
+ * Resume STT after TTS finishes.
384
+ * Restarts recording from clean state.
385
+ */
386
+ resume() {
387
+ this._muteActive = false;
388
+ this.isProcessing = false;
389
+ this.stoppingRecorder = false;
390
+ this.hadSpeechInChunk = false;
391
+ this.isSpeaking = false;
392
+ this.audioChunks = [];
393
+
394
+ // Restart recording if session is active and not muted
395
+ if (this.isListening && !this._micMuted) {
396
+ if (this.stream && this.stream.active) {
397
+ // MediaRecorder may need to be recreated if stream changed
398
+ if (!this.mediaRecorder || this.mediaRecorder.stream !== this.stream) {
399
+ this._setupRecorder();
400
+ }
401
+ if (this.mediaRecorder.state === 'inactive') {
402
+ this.mediaRecorder.start();
403
+ }
404
+ // Restart VAD loop if it stopped
405
+ if (!this._vadAnimFrame) {
406
+ this._startVAD();
407
+ }
408
+ }
409
+ }
410
+ }
411
+
412
+ // --- PTT helpers (called from PTT code in app.js) ---
413
+
414
+ /**
415
+ * PTT activate — start recording for push-to-talk.
416
+ * Called when user presses the PTT button.
417
+ */
418
+ pttActivate() {
419
+ this._pttHolding = true;
420
+ this._micMuted = false;
421
+ this._muteActive = false; // Clear stale TTS mute — PTT overrides
422
+ this.isProcessing = false;
423
+ this.accumulatedText = '';
424
+ this.hadSpeechInChunk = false;
425
+ this.audioChunks = [];
426
+ if (this.silenceTimer) { clearTimeout(this.silenceTimer); this.silenceTimer = null; }
427
+ if (this.maxRecordingTimer) { clearTimeout(this.maxRecordingTimer); this.maxRecordingTimer = null; }
428
+
429
+ // Start recording
430
+ if (this.mediaRecorder && this.mediaRecorder.state === 'inactive') {
431
+ this.mediaRecorder.start();
432
+ }
433
+ }
434
+
435
+ /**
436
+ * PTT release — stop recording and force transcription.
437
+ * Called when user releases the PTT button.
438
+ * Unlike mute(), this DOES process the captured audio.
439
+ */
440
+ pttRelease() {
441
+ this._pttHolding = false;
442
+ this._micMuted = true;
443
+ this.hadSpeechInChunk = true; // Force transcription regardless
444
+ this.stoppingRecorder = true;
445
+
446
+ if (this.silenceTimer) { clearTimeout(this.silenceTimer); this.silenceTimer = null; }
447
+ if (this.maxRecordingTimer) { clearTimeout(this.maxRecordingTimer); this.maxRecordingTimer = null; }
448
+
449
+ if (this.mediaRecorder && this.mediaRecorder.state === 'recording') {
450
+ this.mediaRecorder.stop();
451
+ // onstop handler will send to Groq and call onResult
452
+ }
453
+ }
454
+
455
+ /**
456
+ * PTT mute — stop recording and discard audio.
457
+ * Called when PTT mode is toggled ON (mic off by default).
458
+ */
459
+ pttMute() {
460
+ this._pttHolding = false;
461
+ this._micMuted = true;
462
+ this.hadSpeechInChunk = false;
463
+ if (this.silenceTimer) { clearTimeout(this.silenceTimer); this.silenceTimer = null; }
464
+ if (this.maxRecordingTimer) { clearTimeout(this.maxRecordingTimer); this.maxRecordingTimer = null; }
465
+ this.isProcessing = true; // Prevents onstop from transcribing
466
+ if (this.mediaRecorder && this.mediaRecorder.state === 'recording') {
467
+ this.mediaRecorder.stop();
468
+ }
469
+ }
470
+
471
+ /**
472
+ * PTT unmute — resume continuous listening.
473
+ * Called when PTT mode is toggled OFF.
474
+ */
475
+ pttUnmute() {
476
+ this._micMuted = false;
477
+ this._pttHolding = false;
478
+ this.isProcessing = false;
479
+ this.stoppingRecorder = false;
480
+ this.hadSpeechInChunk = false;
481
+ this.audioChunks = [];
482
+
483
+ if (this.isListening && this.mediaRecorder && this.mediaRecorder.state === 'inactive') {
484
+ this.mediaRecorder.start();
485
+ }
486
+ }
487
+ }
488
+
489
+
490
+ // ===== GROQ WAKE WORD DETECTOR =====
491
+ // Listens for wake words using Groq Whisper API.
492
+ // Continuously records, transcribes, and checks for wake phrases.
493
+ class GroqWakeWordDetector {
494
+ constructor() {
495
+ this.isListening = false;
496
+ this.onWakeWordDetected = null;
497
+ this.wakeWords = ['wake up'];
498
+ this._stt = null;
499
+ }
500
+
501
+ isSupported() {
502
+ return !!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia);
503
+ }
504
+
505
+ async start() {
506
+ if (this.isListening) return true;
507
+
508
+ this._stt = new GroqSTT();
509
+ // Faster settings for wake word detection
510
+ this._stt.silenceDelayMs = 1500; // 1.5s silence (faster response)
511
+ this._stt.maxRecordingMs = 10000; // 10s max chunks
512
+ this._stt.vadThreshold = 40; // Sensitive but not noise-triggering
513
+
514
+ this._stt.onResult = (transcript) => {
515
+ const lower = transcript.toLowerCase();
516
+ console.log(`Wake word detector heard: "${transcript}"`);
517
+ if (this.wakeWords.some(ww => lower.includes(ww))) {
518
+ console.log('Wake word detected!');
519
+ if (this.onWakeWordDetected) {
520
+ this.onWakeWordDetected();
521
+ }
522
+ }
523
+ };
524
+
525
+ this._stt.onError = (error) => {
526
+ console.warn('Wake word detector error:', error);
527
+ };
528
+
529
+ this.isListening = true;
530
+ const ok = await this._stt.start();
531
+ if (!ok) {
532
+ this.isListening = false;
533
+ return false;
534
+ }
535
+
536
+ console.log('Groq wake word detector started');
537
+ return true;
538
+ }
539
+
540
+ stop() {
541
+ this.isListening = false;
542
+ if (this._stt) {
543
+ this._stt.stop();
544
+ this._stt = null;
545
+ }
546
+ console.log('Groq wake word detector stopped');
547
+ }
548
+
549
+ async toggle() {
550
+ if (this.isListening) {
551
+ this.stop();
552
+ return false;
553
+ } else {
554
+ return await this.start();
555
+ }
556
+ }
557
+ }
558
+
559
+ export { GroqSTT, GroqWakeWordDetector };