openvoiceui 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/.env.example +104 -0
  2. package/Dockerfile +30 -0
  3. package/LICENSE +21 -0
  4. package/README.md +638 -0
  5. package/SETUP.md +360 -0
  6. package/app.py +232 -0
  7. package/auto-approve-devices.js +111 -0
  8. package/cli/index.js +372 -0
  9. package/config/__init__.py +4 -0
  10. package/config/default.yaml +43 -0
  11. package/config/flags.yaml +67 -0
  12. package/config/loader.py +203 -0
  13. package/config/providers.yaml +71 -0
  14. package/config/speech_normalization.yaml +182 -0
  15. package/config/theme.json +4 -0
  16. package/data/greetings.json +25 -0
  17. package/default-pages/ai-image-creator.html +915 -0
  18. package/default-pages/bulk-image-uploader.html +492 -0
  19. package/default-pages/desktop.html +2865 -0
  20. package/default-pages/file-explorer.html +854 -0
  21. package/default-pages/interactive-map.html +655 -0
  22. package/default-pages/style-guide.html +1005 -0
  23. package/default-pages/website-setup.html +1623 -0
  24. package/deploy/openclaw/Dockerfile +46 -0
  25. package/deploy/openvoiceui.service +30 -0
  26. package/deploy/setup-nginx.sh +50 -0
  27. package/deploy/setup-sudo.sh +306 -0
  28. package/deploy/skill-runner/Dockerfile +19 -0
  29. package/deploy/skill-runner/requirements.txt +14 -0
  30. package/deploy/skill-runner/server.py +269 -0
  31. package/deploy/supertonic/Dockerfile +22 -0
  32. package/deploy/supertonic/server.py +79 -0
  33. package/docker-compose.pinokio.yml +11 -0
  34. package/docker-compose.yml +59 -0
  35. package/greetings.json +25 -0
  36. package/index.html +65 -0
  37. package/inject-device-identity.js +142 -0
  38. package/package.json +82 -0
  39. package/profiles/default.json +114 -0
  40. package/profiles/manager.py +354 -0
  41. package/profiles/schema.json +337 -0
  42. package/prompts/voice-system-prompt.md +149 -0
  43. package/providers/__init__.py +39 -0
  44. package/providers/base.py +63 -0
  45. package/providers/llm/__init__.py +12 -0
  46. package/providers/llm/base.py +71 -0
  47. package/providers/llm/clawdbot_provider.py +112 -0
  48. package/providers/llm/zai_provider.py +115 -0
  49. package/providers/registry.py +320 -0
  50. package/providers/stt/__init__.py +12 -0
  51. package/providers/stt/base.py +58 -0
  52. package/providers/stt/webspeech_provider.py +49 -0
  53. package/providers/stt/whisper_provider.py +100 -0
  54. package/providers/tts/__init__.py +20 -0
  55. package/providers/tts/base.py +91 -0
  56. package/providers/tts/groq_provider.py +74 -0
  57. package/providers/tts/supertonic_provider.py +72 -0
  58. package/requirements.txt +38 -0
  59. package/routes/__init__.py +10 -0
  60. package/routes/admin.py +515 -0
  61. package/routes/canvas.py +1315 -0
  62. package/routes/chat.py +51 -0
  63. package/routes/conversation.py +2158 -0
  64. package/routes/elevenlabs_hybrid.py +306 -0
  65. package/routes/greetings.py +98 -0
  66. package/routes/icons.py +279 -0
  67. package/routes/image_gen.py +364 -0
  68. package/routes/instructions.py +190 -0
  69. package/routes/music.py +838 -0
  70. package/routes/onboarding.py +43 -0
  71. package/routes/pi.py +62 -0
  72. package/routes/profiles.py +215 -0
  73. package/routes/report_issue.py +68 -0
  74. package/routes/static_files.py +533 -0
  75. package/routes/suno.py +664 -0
  76. package/routes/theme.py +81 -0
  77. package/routes/transcripts.py +199 -0
  78. package/routes/vision.py +348 -0
  79. package/routes/workspace.py +288 -0
  80. package/server.py +1510 -0
  81. package/services/__init__.py +1 -0
  82. package/services/auth.py +143 -0
  83. package/services/canvas_versioning.py +239 -0
  84. package/services/db_pool.py +107 -0
  85. package/services/gateway.py +16 -0
  86. package/services/gateway_manager.py +333 -0
  87. package/services/gateways/__init__.py +12 -0
  88. package/services/gateways/base.py +110 -0
  89. package/services/gateways/compat.py +264 -0
  90. package/services/gateways/openclaw.py +1134 -0
  91. package/services/health.py +100 -0
  92. package/services/memory_client.py +455 -0
  93. package/services/paths.py +26 -0
  94. package/services/speech_normalizer.py +285 -0
  95. package/services/tts.py +270 -0
  96. package/setup-config.js +262 -0
  97. package/sounds/air_horn.mp3 +0 -0
  98. package/sounds/bruh.mp3 +0 -0
  99. package/sounds/crowd_cheer.mp3 +0 -0
  100. package/sounds/gunshot.mp3 +0 -0
  101. package/sounds/impact.mp3 +0 -0
  102. package/sounds/lets_go.mp3 +0 -0
  103. package/sounds/record_stop.mp3 +0 -0
  104. package/sounds/rewind.mp3 +0 -0
  105. package/sounds/sad_trombone.mp3 +0 -0
  106. package/sounds/scratch_long.mp3 +0 -0
  107. package/sounds/yeah.mp3 +0 -0
  108. package/src/adapters/ClawdBotAdapter.js +264 -0
  109. package/src/adapters/_template.js +133 -0
  110. package/src/adapters/elevenlabs-classic.js +841 -0
  111. package/src/adapters/elevenlabs-hybrid.js +812 -0
  112. package/src/adapters/hume-evi.js +676 -0
  113. package/src/admin.html +1339 -0
  114. package/src/app.js +8802 -0
  115. package/src/core/Config.js +173 -0
  116. package/src/core/EmotionEngine.js +307 -0
  117. package/src/core/EventBridge.js +180 -0
  118. package/src/core/EventBus.js +117 -0
  119. package/src/core/VoiceSession.js +607 -0
  120. package/src/face/BaseFace.js +259 -0
  121. package/src/face/EyeFace.js +208 -0
  122. package/src/face/HaloSmokeFace.js +509 -0
  123. package/src/face/manifest.json +27 -0
  124. package/src/face/previews/eyes.svg +16 -0
  125. package/src/face/previews/orb.svg +29 -0
  126. package/src/features/MusicPlayer.js +620 -0
  127. package/src/features/Soundboard.js +128 -0
  128. package/src/providers/DeepgramSTT.js +472 -0
  129. package/src/providers/DeepgramStreamingSTT.js +766 -0
  130. package/src/providers/GroqSTT.js +559 -0
  131. package/src/providers/TTSPlayer.js +323 -0
  132. package/src/providers/WebSpeechSTT.js +479 -0
  133. package/src/providers/tts/BaseTTSProvider.js +81 -0
  134. package/src/providers/tts/HumeProvider.js +77 -0
  135. package/src/providers/tts/SupertonicProvider.js +174 -0
  136. package/src/providers/tts/index.js +140 -0
  137. package/src/shell/adapter-registry.js +154 -0
  138. package/src/shell/caller-bridge.js +35 -0
  139. package/src/shell/camera-bridge.js +28 -0
  140. package/src/shell/canvas-bridge.js +32 -0
  141. package/src/shell/commercial-bridge.js +44 -0
  142. package/src/shell/face-bridge.js +44 -0
  143. package/src/shell/music-bridge.js +60 -0
  144. package/src/shell/orchestrator.js +233 -0
  145. package/src/shell/profile-discovery.js +303 -0
  146. package/src/shell/sounds-bridge.js +28 -0
  147. package/src/shell/transcript-bridge.js +61 -0
  148. package/src/shell/waveform-bridge.js +33 -0
  149. package/src/styles/base.css +2862 -0
  150. package/src/styles/face.css +417 -0
  151. package/src/styles/pi-overrides.css +89 -0
  152. package/src/styles/theme-dark.css +67 -0
  153. package/src/test-tts.html +175 -0
  154. package/src/ui/AppShell.js +544 -0
  155. package/src/ui/ProfileSwitcher.js +228 -0
  156. package/src/ui/SessionControl.js +240 -0
  157. package/src/ui/face/FacePicker.js +195 -0
  158. package/src/ui/face/FaceRenderer.js +309 -0
  159. package/src/ui/settings/PlaylistEditor.js +366 -0
  160. package/src/ui/settings/SettingsPanel.css +684 -0
  161. package/src/ui/settings/SettingsPanel.js +419 -0
  162. package/src/ui/settings/TTSVoicePreview.js +210 -0
  163. package/src/ui/themes/ThemeManager.js +213 -0
  164. package/src/ui/visualizers/BaseVisualizer.js +29 -0
  165. package/src/ui/visualizers/PartyFXVisualizer.css +291 -0
  166. package/src/ui/visualizers/PartyFXVisualizer.js +637 -0
  167. package/static/emulators/jsdos/js-dos.css +1 -0
  168. package/static/emulators/jsdos/js-dos.js +22 -0
  169. package/static/favicon.svg +55 -0
  170. package/static/icons/apple-touch-icon.png +0 -0
  171. package/static/icons/favicon-32.png +0 -0
  172. package/static/icons/icon-192.png +0 -0
  173. package/static/icons/icon-512.png +0 -0
  174. package/static/install.html +449 -0
  175. package/static/manifest.json +26 -0
  176. package/static/sw.js +21 -0
  177. package/tts_providers/__init__.py +136 -0
  178. package/tts_providers/base_provider.py +319 -0
  179. package/tts_providers/groq_provider.py +155 -0
  180. package/tts_providers/hume_provider.py +226 -0
  181. package/tts_providers/providers_config.json +119 -0
  182. package/tts_providers/qwen3_provider.py +371 -0
  183. package/tts_providers/resemble_provider.py +315 -0
  184. package/tts_providers/supertonic_provider.py +557 -0
  185. package/tts_providers/supertonic_tts.py +399 -0
@@ -0,0 +1,128 @@
1
+ /**
2
+ * Soundboard — extracted from index.html DJSoundboard (P3-T6)
3
+ *
4
+ * Manages a library of DJ sound effects with preloading, debounce,
5
+ * and AI text-trigger detection.
6
+ *
7
+ * Usage:
8
+ * import { Soundboard } from './Soundboard.js';
9
+ * const board = new Soundboard({ serverUrl: 'http://localhost:5000' });
10
+ * board.init();
11
+ * window.djSoundboard = board;
12
+ *
13
+ * EventBus events emitted (optional):
14
+ * 'soundboard:play' { soundName }
15
+ */
16
+
17
+ export class Soundboard {
18
+ constructor({ serverUrl = '', eventBus = null } = {}) {
19
+ this.serverUrl = serverUrl;
20
+ this._eventBus = eventBus;
21
+
22
+ this.sounds = {
23
+ 'air_horn': { file: 'air_horn.mp3', triggers: ['air horn', 'airhorn', 'horn', 'bwaaah', 'bwaaa', 'bwah'] },
24
+ 'scratch_long': { file: 'scratch_long.mp3', triggers: ['scratch', 'scratching', 'wicka', 'wikka'] },
25
+ 'rewind': { file: 'rewind.mp3', triggers: ['rewind', 'pull up', 'pull it back', 'hold up', 'bring it back'] },
26
+ 'record_stop': { file: 'record_stop.mp3', triggers: ['record stop', 'stop the record'] },
27
+ 'crowd_cheer': { file: 'crowd_cheer.mp3', triggers: ['crowd cheer', 'applause', 'crowd goes wild', 'give it up', 'make some noise'] },
28
+ 'crowd_hype': { file: 'crowd_hype.mp3', triggers: ['crowd hype', 'hype them up', 'get hype'] },
29
+ 'yeah': { file: 'yeah.mp3', triggers: ['yeah!', 'yeahhh', 'oh yeah', 'yeeah'] },
30
+ 'lets_go': { file: 'lets_go.mp3', triggers: ["let's go!", 'lets go!', "let's goooo", 'here we go'] },
31
+ 'gunshot': { file: 'gunshot.mp3', triggers: ['gunshot', 'gun shot', 'bang bang', 'shots fired', 'pow pow', 'blat blat'] },
32
+ 'bruh': { file: 'bruh.mp3', triggers: ['bruh', 'bruhhh'] },
33
+ 'sad_trombone': { file: 'sad_trombone.mp3', triggers: ['sad trombone', 'womp womp', 'fail', 'wah wah'] }
34
+ };
35
+
36
+ /** @type {Object.<string, HTMLAudioElement>} */
37
+ this.audioCache = {};
38
+
39
+ /** @type {Object.<string, number>} */
40
+ this.lastPlayTime = {};
41
+ }
42
+
43
+ /**
44
+ * Preload common sounds so they play instantly on first trigger.
45
+ */
46
+ init() {
47
+ ['air_horn', 'scratch_long', 'crowd_cheer', 'rewind', 'yeah', 'lets_go'].forEach(name => {
48
+ this.preload(name);
49
+ });
50
+ console.log('Soundboard initialized with', Object.keys(this.sounds).length, 'sounds');
51
+ }
52
+
53
+ /**
54
+ * Preload a sound into the audio cache.
55
+ * @param {string} soundName
56
+ */
57
+ preload(soundName) {
58
+ if (!this.sounds[soundName]) return;
59
+ const audio = new Audio(`${this.serverUrl}/sounds/${this.sounds[soundName].file}`);
60
+ audio.preload = 'auto';
61
+ this.audioCache[soundName] = audio;
62
+ }
63
+
64
+ /**
65
+ * Play a sound by name. Debounced (500ms) to avoid duplicate fires.
66
+ * @param {string} soundName
67
+ */
68
+ play(soundName) {
69
+ if (!this.sounds[soundName]) {
70
+ console.warn('Unknown sound:', soundName);
71
+ return;
72
+ }
73
+
74
+ // Debounce — don't play same sound within 500ms
75
+ const now = Date.now();
76
+ if (this.lastPlayTime[soundName] && now - this.lastPlayTime[soundName] < 500) {
77
+ return;
78
+ }
79
+ this.lastPlayTime[soundName] = now;
80
+
81
+ // Use cached audio or create a fresh element if cached is still playing
82
+ let audio = this.audioCache[soundName];
83
+ if (!audio || !audio.paused) {
84
+ audio = new Audio(`${this.serverUrl}/sounds/${this.sounds[soundName].file}`);
85
+ }
86
+
87
+ audio.currentTime = 0;
88
+ audio.volume = 0.4; // Lower than music so voice stays audible
89
+ audio.play().catch(e => console.error('Sound play error:', e));
90
+ console.log('🎧 DJ Sound:', soundName);
91
+
92
+ if (this._eventBus) {
93
+ this._eventBus.emit('soundboard:play', { soundName });
94
+ }
95
+ }
96
+
97
+ /**
98
+ * Scan text for trigger words and play the first matching sound.
99
+ * Only one sound fires per call (first match wins).
100
+ * @param {string} text
101
+ * @returns {string|null} soundName that was played, or null
102
+ */
103
+ checkTriggers(text) {
104
+ if (!text) return null;
105
+ const lowerText = text.toLowerCase();
106
+
107
+ for (const [soundName, config] of Object.entries(this.sounds)) {
108
+ for (const trigger of config.triggers) {
109
+ if (lowerText.includes(trigger)) {
110
+ this.play(soundName);
111
+ return soundName;
112
+ }
113
+ }
114
+ }
115
+
116
+ return null;
117
+ }
118
+
119
+ /**
120
+ * Add or update a sound definition at runtime.
121
+ * @param {string} name
122
+ * @param {string} file filename (relative to /sounds/)
123
+ * @param {string[]} triggers trigger phrases
124
+ */
125
+ addSound(name, file, triggers = []) {
126
+ this.sounds[name] = { file, triggers };
127
+ }
128
+ }
@@ -0,0 +1,472 @@
1
+ /**
2
+ * DeepgramSTT — Server-side speech recognition via Deepgram Nova-2 API.
3
+ * Captures audio with MediaRecorder, uses VAD to detect speech/silence,
4
+ * sends audio chunks to /api/stt/deepgram for transcription.
5
+ *
6
+ * Drop-in replacement for WebSpeechSTT / GroqSTT with built-in PTT support.
7
+ *
8
+ * Usage:
9
+ * import { DeepgramSTT, DeepgramWakeWordDetector } from './DeepgramSTT.js';
10
+ *
11
+ * const stt = new DeepgramSTT();
12
+ * stt.onResult = (text) => console.log('Heard:', text);
13
+ * await stt.start();
14
+ */
15
+
16
+ // ===== DEEPGRAM STT =====
17
+ // Server-side speech recognition via Deepgram Nova-2 API
18
+ class DeepgramSTT {
19
+ constructor(config = {}) {
20
+ this.serverUrl = (config.serverUrl || window.AGENT_CONFIG?.serverUrl || window.location.origin).replace(/\/$/, '');
21
+ this.isListening = false;
22
+ this.onResult = null;
23
+ this.onError = null;
24
+ this.onListenFinal = null; // Listen panel hook — called with each transcript
25
+ this.onInterim = null; // Not used (pre-recorded mode has no interim results)
26
+ this.mediaRecorder = null;
27
+ this.audioChunks = [];
28
+ this.stream = null;
29
+ this.isProcessing = false;
30
+ this.accumulatedText = ''; // PTT compatibility — last transcript
31
+
32
+ // PTT support (built-in, no monkey-patching needed)
33
+ this._micMuted = false;
34
+ this._pttHolding = false;
35
+ this._muteActive = false; // Set by mute(), cleared by resume()
36
+
37
+ // VAD (Voice Activity Detection) settings
38
+ this.silenceTimer = null;
39
+ this.silenceDelayMs = 800; // 0.8s silence = end of speech
40
+ this.accumulationDelayMs = config.accumulationDelayMs || 0;
41
+ this.vadThreshold = 25; // FFT average amplitude threshold
42
+ this.minSpeechMs = 300; // Must sustain above threshold before counting
43
+ this.maxRecordingMs = 45000; // 45s max before auto-chunk
44
+ this.maxRecordingTimer = null;
45
+ this.isSpeaking = false;
46
+ this.stoppingRecorder = false;
47
+ this.hadSpeechInChunk = false;
48
+ this._speechStartTime = 0;
49
+ this._resumedSpeechStart = 0;
50
+
51
+ // Audio analysis for VAD
52
+ this._audioCtx = null;
53
+ this._analyser = null;
54
+ this._vadAnimFrame = null;
55
+ this._accumulationTimer = null;
56
+ }
57
+
58
+ isSupported() {
59
+ return !!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia);
60
+ }
61
+
62
+ async start() {
63
+ if (this.isListening) return true;
64
+ if (this._micMuted) return false;
65
+
66
+ try {
67
+ if (!this.stream || !this.stream.active) {
68
+ this.stream = await navigator.mediaDevices.getUserMedia({ audio: true });
69
+ }
70
+
71
+ this._setupRecorder();
72
+ this._startVAD();
73
+
74
+ this.mediaRecorder.start();
75
+ this.isListening = true;
76
+ console.log('Deepgram STT started');
77
+ return true;
78
+ } catch (error) {
79
+ console.error('Failed to start Deepgram STT:', error);
80
+ if (error.name === 'NotFoundError' || error.name === 'DevicesNotFoundError') {
81
+ if (this.onError) this.onError('no-device');
82
+ } else if (error.name === 'NotAllowedError') {
83
+ if (this.onError) this.onError('not-allowed');
84
+ } else {
85
+ if (this.onError) this.onError(error);
86
+ }
87
+ return false;
88
+ }
89
+ }
90
+
91
+ _setupRecorder() {
92
+ const options = { mimeType: 'audio/webm;codecs=opus' };
93
+ this.mediaRecorder = new MediaRecorder(this.stream, options);
94
+ this.audioChunks = [];
95
+
96
+ this.mediaRecorder.ondataavailable = (event) => {
97
+ if (event.data.size > 0) {
98
+ this.audioChunks.push(event.data);
99
+ }
100
+ };
101
+
102
+ this.mediaRecorder.onstop = async () => {
103
+ const chunks = this.audioChunks;
104
+ const hadSpeech = this.hadSpeechInChunk;
105
+ this.audioChunks = [];
106
+ this.hadSpeechInChunk = false;
107
+ this.stoppingRecorder = false;
108
+
109
+ // Restart recording immediately to minimize audio gap
110
+ if (this.isListening && !this._micMuted && !this._muteActive && !this._pttHolding) {
111
+ this.isSpeaking = false;
112
+ this.mediaRecorder.start();
113
+ }
114
+
115
+ if (chunks.length === 0) return;
116
+
117
+ // Discard audio if muted (TTS playing)
118
+ if ((this.isProcessing || this._muteActive) && !this._pttHolding) {
119
+ return;
120
+ }
121
+
122
+ this.isProcessing = true;
123
+
124
+ if (this.silenceTimer) { clearTimeout(this.silenceTimer); this.silenceTimer = null; }
125
+ if (this.maxRecordingTimer) { clearTimeout(this.maxRecordingTimer); this.maxRecordingTimer = null; }
126
+
127
+ const audioBlob = new Blob(chunks, { type: 'audio/webm' });
128
+
129
+ // Skip if no speech and small audio — prevents hallucinations
130
+ if (!hadSpeech && audioBlob.size < 50000) {
131
+ console.log('Deepgram STT: skipping - no speech detected (' + audioBlob.size + ' bytes)');
132
+ this.isProcessing = false;
133
+ return;
134
+ }
135
+
136
+ try {
137
+ console.log('Deepgram STT: sending audio (' + audioBlob.size + ' bytes)');
138
+ const formData = new FormData();
139
+ formData.append('audio', audioBlob, 'audio.webm');
140
+
141
+ const response = await fetch(`${this.serverUrl}/api/stt/deepgram`, {
142
+ method: 'POST',
143
+ body: formData
144
+ });
145
+
146
+ const data = await response.json();
147
+
148
+ if (data.transcript && data.transcript.trim()) {
149
+ console.log('Deepgram STT transcript:', data.transcript);
150
+ if (this.onListenFinal) this.onListenFinal(data.transcript);
151
+
152
+ // PTT mode: send immediately
153
+ if (this._micMuted) {
154
+ this.accumulatedText = data.transcript.trim();
155
+ if (this.onResult) this.onResult(this.accumulatedText);
156
+ this.accumulatedText = '';
157
+ } else {
158
+ // Listen mode: accumulate across chunks, send after silence
159
+ this.accumulatedText = this.accumulatedText
160
+ ? this.accumulatedText + ' ' + data.transcript.trim()
161
+ : data.transcript.trim();
162
+
163
+ if (this._accumulationTimer) {
164
+ clearTimeout(this._accumulationTimer);
165
+ this._accumulationTimer = null;
166
+ }
167
+ this._accumulationTimer = setTimeout(() => {
168
+ this._accumulationTimer = null;
169
+ const fullText = this.accumulatedText.trim();
170
+ if (fullText && this.onResult) {
171
+ console.log('Deepgram STT accumulated result:', fullText);
172
+ this.onResult(fullText);
173
+ }
174
+ this.accumulatedText = '';
175
+ }, this.accumulationDelayMs);
176
+ }
177
+ }
178
+ } catch (error) {
179
+ console.error('Deepgram STT error:', error);
180
+ if (this.onError) this.onError(error);
181
+ } finally {
182
+ this.isProcessing = false;
183
+ }
184
+ };
185
+ }
186
+
187
+ _startVAD() {
188
+ if (this._audioCtx && this._audioCtx.state !== 'closed') {
189
+ if (!this._vadAnimFrame) this._runVADLoop();
190
+ return;
191
+ }
192
+
193
+ this._audioCtx = new AudioContext();
194
+ const source = this._audioCtx.createMediaStreamSource(this.stream);
195
+ this._analyser = this._audioCtx.createAnalyser();
196
+ this._analyser.fftSize = 512;
197
+ source.connect(this._analyser);
198
+
199
+ this._runVADLoop();
200
+ }
201
+
202
+ _runVADLoop() {
203
+ const bufferLength = this._analyser.frequencyBinCount;
204
+ const dataArray = new Uint8Array(bufferLength);
205
+
206
+ const checkLevel = () => {
207
+ if (!this.isListening) {
208
+ this._vadAnimFrame = null;
209
+ return;
210
+ }
211
+
212
+ this._analyser.getByteFrequencyData(dataArray);
213
+ const average = dataArray.reduce((a, b) => a + b) / bufferLength;
214
+ const isSpeakingNow = average > this.vadThreshold;
215
+
216
+ // Skip VAD while muted (TTS playing)
217
+ if (this._muteActive) {
218
+ this._vadAnimFrame = requestAnimationFrame(checkLevel);
219
+ return;
220
+ }
221
+
222
+ if (isSpeakingNow && !this.isSpeaking) {
223
+ const now = Date.now();
224
+ if (!this._speechStartTime) this._speechStartTime = now;
225
+ if (now - this._speechStartTime < this.minSpeechMs) {
226
+ this._vadAnimFrame = requestAnimationFrame(checkLevel);
227
+ return;
228
+ }
229
+
230
+ this.isSpeaking = true;
231
+ this.hadSpeechInChunk = true;
232
+ this._speechStartTime = 0;
233
+
234
+ if (this.silenceTimer) { clearTimeout(this.silenceTimer); this.silenceTimer = null; }
235
+
236
+ if (!this.maxRecordingTimer && !this.isProcessing && !this.stoppingRecorder) {
237
+ this.maxRecordingTimer = setTimeout(() => {
238
+ this.maxRecordingTimer = null;
239
+ this.isSpeaking = false;
240
+ this.stoppingRecorder = true;
241
+ if (this.silenceTimer) { clearTimeout(this.silenceTimer); this.silenceTimer = null; }
242
+ if (this.mediaRecorder && this.mediaRecorder.state === 'recording') {
243
+ this.mediaRecorder.stop();
244
+ }
245
+ }, this.maxRecordingMs);
246
+ }
247
+ } else if (isSpeakingNow && this.isSpeaking) {
248
+ const now = Date.now();
249
+ if (!this._resumedSpeechStart) this._resumedSpeechStart = now;
250
+ if (now - this._resumedSpeechStart >= this.minSpeechMs && this.silenceTimer) {
251
+ clearTimeout(this.silenceTimer);
252
+ this.silenceTimer = null;
253
+ this._resumedSpeechStart = 0;
254
+ }
255
+ } else if (!isSpeakingNow && !this.isSpeaking) {
256
+ this._speechStartTime = 0;
257
+ this._resumedSpeechStart = 0;
258
+ } else if (!isSpeakingNow && this.isSpeaking && !this.isProcessing && !this.stoppingRecorder) {
259
+ this._resumedSpeechStart = 0;
260
+ if (!this.silenceTimer) {
261
+ this.silenceTimer = setTimeout(() => {
262
+ this.isSpeaking = false;
263
+ this.stoppingRecorder = true;
264
+ if (this.mediaRecorder && this.mediaRecorder.state === 'recording') {
265
+ this.mediaRecorder.stop();
266
+ }
267
+ }, this.silenceDelayMs);
268
+ }
269
+ }
270
+
271
+ this._vadAnimFrame = requestAnimationFrame(checkLevel);
272
+ };
273
+
274
+ this._vadAnimFrame = requestAnimationFrame(checkLevel);
275
+ }
276
+
277
+ stop() {
278
+ this.isListening = false;
279
+ this.stoppingRecorder = false;
280
+ this._micMuted = false;
281
+ this._muteActive = false;
282
+
283
+ if (this.silenceTimer) { clearTimeout(this.silenceTimer); this.silenceTimer = null; }
284
+ if (this.maxRecordingTimer) { clearTimeout(this.maxRecordingTimer); this.maxRecordingTimer = null; }
285
+ if (this._accumulationTimer) { clearTimeout(this._accumulationTimer); this._accumulationTimer = null; }
286
+ if (this._vadAnimFrame) { cancelAnimationFrame(this._vadAnimFrame); this._vadAnimFrame = null; }
287
+
288
+ if (this.mediaRecorder && this.mediaRecorder.state !== 'inactive') {
289
+ this.mediaRecorder.stop();
290
+ }
291
+
292
+ if (this.stream) {
293
+ this.stream.getTracks().forEach(track => track.stop());
294
+ this.stream = null;
295
+ }
296
+
297
+ if (this._audioCtx) {
298
+ this._audioCtx.close().catch(() => {});
299
+ this._audioCtx = null;
300
+ this._analyser = null;
301
+ }
302
+
303
+ console.log('Deepgram STT stopped');
304
+ }
305
+
306
+ resetProcessing() {
307
+ this.isProcessing = false;
308
+ this.accumulatedText = '';
309
+ }
310
+
311
+ pause() { this.mute(); }
312
+
313
+ mute() {
314
+ this._muteActive = true;
315
+ this.isProcessing = true;
316
+ this.hadSpeechInChunk = false;
317
+ this.accumulatedText = '';
318
+ if (this.silenceTimer) { clearTimeout(this.silenceTimer); this.silenceTimer = null; }
319
+ if (this.maxRecordingTimer) { clearTimeout(this.maxRecordingTimer); this.maxRecordingTimer = null; }
320
+ if (this._accumulationTimer) { clearTimeout(this._accumulationTimer); this._accumulationTimer = null; }
321
+ if (this.mediaRecorder && this.mediaRecorder.state === 'recording') {
322
+ this.mediaRecorder.stop();
323
+ }
324
+ }
325
+
326
+ resume() {
327
+ this._muteActive = false;
328
+ this.isProcessing = false;
329
+ this.stoppingRecorder = false;
330
+ this.hadSpeechInChunk = false;
331
+ this.isSpeaking = false;
332
+ this.audioChunks = [];
333
+
334
+ if (this.isListening && !this._micMuted) {
335
+ if (this.stream && this.stream.active) {
336
+ if (!this.mediaRecorder || this.mediaRecorder.stream !== this.stream) {
337
+ this._setupRecorder();
338
+ }
339
+ if (this.mediaRecorder.state === 'inactive') {
340
+ this.mediaRecorder.start();
341
+ }
342
+ if (!this._vadAnimFrame) {
343
+ this._startVAD();
344
+ }
345
+ }
346
+ }
347
+ }
348
+
349
+ // --- PTT helpers ---
350
+
351
+ pttActivate() {
352
+ this._pttHolding = true;
353
+ this._micMuted = false;
354
+ this._muteActive = false; // Clear stale TTS mute — PTT overrides
355
+ this.isProcessing = false;
356
+ this.accumulatedText = '';
357
+ this.hadSpeechInChunk = false;
358
+ this.audioChunks = [];
359
+ if (this.silenceTimer) { clearTimeout(this.silenceTimer); this.silenceTimer = null; }
360
+ if (this.maxRecordingTimer) { clearTimeout(this.maxRecordingTimer); this.maxRecordingTimer = null; }
361
+
362
+ if (this.mediaRecorder && this.mediaRecorder.state === 'inactive') {
363
+ this.mediaRecorder.start();
364
+ }
365
+ }
366
+
367
+ pttRelease() {
368
+ this._pttHolding = false;
369
+ this._micMuted = true;
370
+ this.hadSpeechInChunk = true;
371
+ this.stoppingRecorder = true;
372
+
373
+ if (this.silenceTimer) { clearTimeout(this.silenceTimer); this.silenceTimer = null; }
374
+ if (this.maxRecordingTimer) { clearTimeout(this.maxRecordingTimer); this.maxRecordingTimer = null; }
375
+
376
+ if (this.mediaRecorder && this.mediaRecorder.state === 'recording') {
377
+ this.mediaRecorder.stop();
378
+ }
379
+ }
380
+
381
+ pttMute() {
382
+ this._pttHolding = false;
383
+ this._micMuted = true;
384
+ this.hadSpeechInChunk = false;
385
+ if (this.silenceTimer) { clearTimeout(this.silenceTimer); this.silenceTimer = null; }
386
+ if (this.maxRecordingTimer) { clearTimeout(this.maxRecordingTimer); this.maxRecordingTimer = null; }
387
+ this.isProcessing = true;
388
+ if (this.mediaRecorder && this.mediaRecorder.state === 'recording') {
389
+ this.mediaRecorder.stop();
390
+ }
391
+ }
392
+
393
+ pttUnmute() {
394
+ this._micMuted = false;
395
+ this._pttHolding = false;
396
+ this.isProcessing = false;
397
+ this.stoppingRecorder = false;
398
+ this.hadSpeechInChunk = false;
399
+ this.audioChunks = [];
400
+
401
+ if (this.isListening && this.mediaRecorder && this.mediaRecorder.state === 'inactive') {
402
+ this.mediaRecorder.start();
403
+ }
404
+ }
405
+ }
406
+
407
+
408
+ // ===== DEEPGRAM WAKE WORD DETECTOR =====
409
+ class DeepgramWakeWordDetector {
410
+ constructor() {
411
+ this.isListening = false;
412
+ this.onWakeWordDetected = null;
413
+ this.wakeWords = ['wake up'];
414
+ this._stt = null;
415
+ }
416
+
417
+ isSupported() {
418
+ return !!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia);
419
+ }
420
+
421
+ async start() {
422
+ if (this.isListening) return true;
423
+
424
+ this._stt = new DeepgramSTT();
425
+ this._stt.silenceDelayMs = 1500;
426
+ this._stt.maxRecordingMs = 10000;
427
+ this._stt.vadThreshold = 40;
428
+
429
+ this._stt.onResult = (transcript) => {
430
+ const lower = transcript.toLowerCase();
431
+ console.log(`Wake word detector heard: "${transcript}"`);
432
+ if (this.wakeWords.some(ww => lower.includes(ww))) {
433
+ console.log('Wake word detected!');
434
+ if (this.onWakeWordDetected) this.onWakeWordDetected();
435
+ }
436
+ };
437
+
438
+ this._stt.onError = (error) => {
439
+ console.warn('Wake word detector error:', error);
440
+ };
441
+
442
+ this.isListening = true;
443
+ const ok = await this._stt.start();
444
+ if (!ok) {
445
+ this.isListening = false;
446
+ return false;
447
+ }
448
+
449
+ console.log('Deepgram wake word detector started');
450
+ return true;
451
+ }
452
+
453
+ stop() {
454
+ this.isListening = false;
455
+ if (this._stt) {
456
+ this._stt.stop();
457
+ this._stt = null;
458
+ }
459
+ console.log('Deepgram wake word detector stopped');
460
+ }
461
+
462
+ async toggle() {
463
+ if (this.isListening) {
464
+ this.stop();
465
+ return false;
466
+ } else {
467
+ return await this.start();
468
+ }
469
+ }
470
+ }
471
+
472
+ export { DeepgramSTT, DeepgramWakeWordDetector };