openvoiceui 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/.env.example +104 -0
  2. package/Dockerfile +30 -0
  3. package/LICENSE +21 -0
  4. package/README.md +638 -0
  5. package/SETUP.md +360 -0
  6. package/app.py +232 -0
  7. package/auto-approve-devices.js +111 -0
  8. package/cli/index.js +372 -0
  9. package/config/__init__.py +4 -0
  10. package/config/default.yaml +43 -0
  11. package/config/flags.yaml +67 -0
  12. package/config/loader.py +203 -0
  13. package/config/providers.yaml +71 -0
  14. package/config/speech_normalization.yaml +182 -0
  15. package/config/theme.json +4 -0
  16. package/data/greetings.json +25 -0
  17. package/default-pages/ai-image-creator.html +915 -0
  18. package/default-pages/bulk-image-uploader.html +492 -0
  19. package/default-pages/desktop.html +2865 -0
  20. package/default-pages/file-explorer.html +854 -0
  21. package/default-pages/interactive-map.html +655 -0
  22. package/default-pages/style-guide.html +1005 -0
  23. package/default-pages/website-setup.html +1623 -0
  24. package/deploy/openclaw/Dockerfile +46 -0
  25. package/deploy/openvoiceui.service +30 -0
  26. package/deploy/setup-nginx.sh +50 -0
  27. package/deploy/setup-sudo.sh +306 -0
  28. package/deploy/skill-runner/Dockerfile +19 -0
  29. package/deploy/skill-runner/requirements.txt +14 -0
  30. package/deploy/skill-runner/server.py +269 -0
  31. package/deploy/supertonic/Dockerfile +22 -0
  32. package/deploy/supertonic/server.py +79 -0
  33. package/docker-compose.pinokio.yml +11 -0
  34. package/docker-compose.yml +59 -0
  35. package/greetings.json +25 -0
  36. package/index.html +65 -0
  37. package/inject-device-identity.js +142 -0
  38. package/package.json +82 -0
  39. package/profiles/default.json +114 -0
  40. package/profiles/manager.py +354 -0
  41. package/profiles/schema.json +337 -0
  42. package/prompts/voice-system-prompt.md +149 -0
  43. package/providers/__init__.py +39 -0
  44. package/providers/base.py +63 -0
  45. package/providers/llm/__init__.py +12 -0
  46. package/providers/llm/base.py +71 -0
  47. package/providers/llm/clawdbot_provider.py +112 -0
  48. package/providers/llm/zai_provider.py +115 -0
  49. package/providers/registry.py +320 -0
  50. package/providers/stt/__init__.py +12 -0
  51. package/providers/stt/base.py +58 -0
  52. package/providers/stt/webspeech_provider.py +49 -0
  53. package/providers/stt/whisper_provider.py +100 -0
  54. package/providers/tts/__init__.py +20 -0
  55. package/providers/tts/base.py +91 -0
  56. package/providers/tts/groq_provider.py +74 -0
  57. package/providers/tts/supertonic_provider.py +72 -0
  58. package/requirements.txt +38 -0
  59. package/routes/__init__.py +10 -0
  60. package/routes/admin.py +515 -0
  61. package/routes/canvas.py +1315 -0
  62. package/routes/chat.py +51 -0
  63. package/routes/conversation.py +2158 -0
  64. package/routes/elevenlabs_hybrid.py +306 -0
  65. package/routes/greetings.py +98 -0
  66. package/routes/icons.py +279 -0
  67. package/routes/image_gen.py +364 -0
  68. package/routes/instructions.py +190 -0
  69. package/routes/music.py +838 -0
  70. package/routes/onboarding.py +43 -0
  71. package/routes/pi.py +62 -0
  72. package/routes/profiles.py +215 -0
  73. package/routes/report_issue.py +68 -0
  74. package/routes/static_files.py +533 -0
  75. package/routes/suno.py +664 -0
  76. package/routes/theme.py +81 -0
  77. package/routes/transcripts.py +199 -0
  78. package/routes/vision.py +348 -0
  79. package/routes/workspace.py +288 -0
  80. package/server.py +1510 -0
  81. package/services/__init__.py +1 -0
  82. package/services/auth.py +143 -0
  83. package/services/canvas_versioning.py +239 -0
  84. package/services/db_pool.py +107 -0
  85. package/services/gateway.py +16 -0
  86. package/services/gateway_manager.py +333 -0
  87. package/services/gateways/__init__.py +12 -0
  88. package/services/gateways/base.py +110 -0
  89. package/services/gateways/compat.py +264 -0
  90. package/services/gateways/openclaw.py +1134 -0
  91. package/services/health.py +100 -0
  92. package/services/memory_client.py +455 -0
  93. package/services/paths.py +26 -0
  94. package/services/speech_normalizer.py +285 -0
  95. package/services/tts.py +270 -0
  96. package/setup-config.js +262 -0
  97. package/sounds/air_horn.mp3 +0 -0
  98. package/sounds/bruh.mp3 +0 -0
  99. package/sounds/crowd_cheer.mp3 +0 -0
  100. package/sounds/gunshot.mp3 +0 -0
  101. package/sounds/impact.mp3 +0 -0
  102. package/sounds/lets_go.mp3 +0 -0
  103. package/sounds/record_stop.mp3 +0 -0
  104. package/sounds/rewind.mp3 +0 -0
  105. package/sounds/sad_trombone.mp3 +0 -0
  106. package/sounds/scratch_long.mp3 +0 -0
  107. package/sounds/yeah.mp3 +0 -0
  108. package/src/adapters/ClawdBotAdapter.js +264 -0
  109. package/src/adapters/_template.js +133 -0
  110. package/src/adapters/elevenlabs-classic.js +841 -0
  111. package/src/adapters/elevenlabs-hybrid.js +812 -0
  112. package/src/adapters/hume-evi.js +676 -0
  113. package/src/admin.html +1339 -0
  114. package/src/app.js +8802 -0
  115. package/src/core/Config.js +173 -0
  116. package/src/core/EmotionEngine.js +307 -0
  117. package/src/core/EventBridge.js +180 -0
  118. package/src/core/EventBus.js +117 -0
  119. package/src/core/VoiceSession.js +607 -0
  120. package/src/face/BaseFace.js +259 -0
  121. package/src/face/EyeFace.js +208 -0
  122. package/src/face/HaloSmokeFace.js +509 -0
  123. package/src/face/manifest.json +27 -0
  124. package/src/face/previews/eyes.svg +16 -0
  125. package/src/face/previews/orb.svg +29 -0
  126. package/src/features/MusicPlayer.js +620 -0
  127. package/src/features/Soundboard.js +128 -0
  128. package/src/providers/DeepgramSTT.js +472 -0
  129. package/src/providers/DeepgramStreamingSTT.js +766 -0
  130. package/src/providers/GroqSTT.js +559 -0
  131. package/src/providers/TTSPlayer.js +323 -0
  132. package/src/providers/WebSpeechSTT.js +479 -0
  133. package/src/providers/tts/BaseTTSProvider.js +81 -0
  134. package/src/providers/tts/HumeProvider.js +77 -0
  135. package/src/providers/tts/SupertonicProvider.js +174 -0
  136. package/src/providers/tts/index.js +140 -0
  137. package/src/shell/adapter-registry.js +154 -0
  138. package/src/shell/caller-bridge.js +35 -0
  139. package/src/shell/camera-bridge.js +28 -0
  140. package/src/shell/canvas-bridge.js +32 -0
  141. package/src/shell/commercial-bridge.js +44 -0
  142. package/src/shell/face-bridge.js +44 -0
  143. package/src/shell/music-bridge.js +60 -0
  144. package/src/shell/orchestrator.js +233 -0
  145. package/src/shell/profile-discovery.js +303 -0
  146. package/src/shell/sounds-bridge.js +28 -0
  147. package/src/shell/transcript-bridge.js +61 -0
  148. package/src/shell/waveform-bridge.js +33 -0
  149. package/src/styles/base.css +2862 -0
  150. package/src/styles/face.css +417 -0
  151. package/src/styles/pi-overrides.css +89 -0
  152. package/src/styles/theme-dark.css +67 -0
  153. package/src/test-tts.html +175 -0
  154. package/src/ui/AppShell.js +544 -0
  155. package/src/ui/ProfileSwitcher.js +228 -0
  156. package/src/ui/SessionControl.js +240 -0
  157. package/src/ui/face/FacePicker.js +195 -0
  158. package/src/ui/face/FaceRenderer.js +309 -0
  159. package/src/ui/settings/PlaylistEditor.js +366 -0
  160. package/src/ui/settings/SettingsPanel.css +684 -0
  161. package/src/ui/settings/SettingsPanel.js +419 -0
  162. package/src/ui/settings/TTSVoicePreview.js +210 -0
  163. package/src/ui/themes/ThemeManager.js +213 -0
  164. package/src/ui/visualizers/BaseVisualizer.js +29 -0
  165. package/src/ui/visualizers/PartyFXVisualizer.css +291 -0
  166. package/src/ui/visualizers/PartyFXVisualizer.js +637 -0
  167. package/static/emulators/jsdos/js-dos.css +1 -0
  168. package/static/emulators/jsdos/js-dos.js +22 -0
  169. package/static/favicon.svg +55 -0
  170. package/static/icons/apple-touch-icon.png +0 -0
  171. package/static/icons/favicon-32.png +0 -0
  172. package/static/icons/icon-192.png +0 -0
  173. package/static/icons/icon-512.png +0 -0
  174. package/static/install.html +449 -0
  175. package/static/manifest.json +26 -0
  176. package/static/sw.js +21 -0
  177. package/tts_providers/__init__.py +136 -0
  178. package/tts_providers/base_provider.py +319 -0
  179. package/tts_providers/groq_provider.py +155 -0
  180. package/tts_providers/hume_provider.py +226 -0
  181. package/tts_providers/providers_config.json +119 -0
  182. package/tts_providers/qwen3_provider.py +371 -0
  183. package/tts_providers/resemble_provider.py +315 -0
  184. package/tts_providers/supertonic_provider.py +557 -0
  185. package/tts_providers/supertonic_tts.py +399 -0
@@ -0,0 +1,676 @@
1
+ /**
2
+ * Hume EVI Adapter (P6-T3)
3
+ *
4
+ * Agent adapter for Hume Empathic Voice Interface (EVI).
5
+ * Wraps the Hume EVI WebSocket API in the EventBridge adapter contract.
6
+ *
7
+ * Hume EVI features:
8
+ * - SDK-managed audio pipeline (mic input + TTS output)
9
+ * - Emotional expression data per utterance
10
+ * - Configurable voice via Hume config_id
11
+ * - Tool calling (server-side tools via config)
12
+ * - WebSocket transport (wss://api.hume.ai/v0/evi/chat)
13
+ *
14
+ * Ref: future-dev-plans/17-MULTI-AGENT-FRAMEWORK.md
15
+ * Ref: future-dev-plans/15-ELEVENLABS-CLASSIC-AGENT.md (similar pattern)
16
+ *
17
+ * Usage:
18
+ * const { HumeEVIAdapter } = await import('./adapters/hume-evi.js');
19
+ * orchestrator.register('hume-evi', HumeEVIAdapter, {
20
+ * serverUrl: 'http://localhost:5001',
21
+ * configId: '<hume-config-id>', // Optional override
22
+ * voiceName: 'MyVoice'
23
+ * });
24
+ */
25
+
26
+ import { AgentEvents, AgentActions } from '../core/EventBridge.js';
27
+
28
+ // ─────────────────────────────────────────────────────────────────
29
+ // Emotion → mood mapping (Hume sends emotion scores per utterance)
30
+ // ─────────────────────────────────────────────────────────────────
31
+
32
+ const EMOTION_TO_MOOD = {
33
+ Joy: 'happy',
34
+ Excitement: 'happy',
35
+ Amusement: 'happy',
36
+ Contentment: 'happy',
37
+ Satisfaction: 'happy',
38
+ Sadness: 'sad',
39
+ Disappointment: 'sad',
40
+ Distress: 'sad',
41
+ Anger: 'thinking',
42
+ Disgust: 'thinking',
43
+ Confusion: 'thinking',
44
+ Contemplation: 'thinking',
45
+ Concentration: 'thinking',
46
+ Interest: 'listening',
47
+ Curiosity: 'listening',
48
+ Surprise: 'listening',
49
+ Realization: 'listening',
50
+ // Default: neutral
51
+ };
52
+
53
+ /**
54
+ * Derive the dominant emotion mood from Hume's emotion scores array.
55
+ * @param {Array<{name: string, score: number}>} emotions
56
+ * @returns {string} mood name
57
+ */
58
+ function dominantMood(emotions) {
59
+ if (!emotions || emotions.length === 0) return 'neutral';
60
+ const top = emotions.reduce((a, b) => (a.score > b.score ? a : b));
61
+ return EMOTION_TO_MOOD[top.name] || 'neutral';
62
+ }
63
+
64
+ // ─────────────────────────────────────────────────────────────────
65
+ // HumeEVIAdapter
66
+ // ─────────────────────────────────────────────────────────────────
67
+
68
+ export const HumeEVIAdapter = {
69
+ name: 'Hume EVI',
70
+
71
+ /**
72
+ * What this adapter supports.
73
+ * UI shows/hides features based on this list.
74
+ */
75
+ capabilities: [
76
+ 'emotion_detection', // Hume sends per-utterance emotion scores
77
+ 'multi_voice', // Voice configured via Hume config_id
78
+ 'canvas', // Agent can issue canvas commands via tool
79
+ 'dj_soundboard', // Agent can trigger sound effects via tool
80
+ 'music_sync', // Agent can control music via tool
81
+ ],
82
+
83
+ // ── Private state ─────────────────────────────────────────────
84
+ _bridge: null,
85
+ _config: null,
86
+ _socket: null,
87
+ _accessToken: null,
88
+ _audioContext: null,
89
+ _mediaStream: null,
90
+ _mediaRecorder: null,
91
+ _audioQueue: [], // Queued audio chunks from Hume TTS
92
+ _isPlaying: false,
93
+ _reconnectTimer: null,
94
+ _reconnectDelay: 1000,
95
+ _maxReconnectDelay: 30000,
96
+ _destroyed: false,
97
+ _unsubscribers: [], // Bridge subscription cleanup functions
98
+ _lastMood: 'neutral',
99
+ _currentSourceNode: null,
100
+
101
+ // ─────────────────────────────────────────────────────────────
102
+ // INIT — called when mode is selected
103
+ // ─────────────────────────────────────────────────────────────
104
+
105
+ async init(bridge, config) {
106
+ this._bridge = bridge;
107
+ this._config = config || {};
108
+ this._destroyed = false;
109
+ this._audioQueue = [];
110
+ this._isPlaying = false;
111
+ this._reconnectDelay = 1000;
112
+
113
+ console.log('[HumeEVI] Initializing adapter');
114
+
115
+ // Subscribe to UI → Agent actions
116
+ this._unsubscribers.push(
117
+ bridge.on(AgentActions.END_SESSION, () => this.stop()),
118
+ bridge.on(AgentActions.CONTEXT_UPDATE, (d) => this._sendContextUpdate(d.text)),
119
+ bridge.on(AgentActions.FORCE_MESSAGE, (d) => this._sendAssistantInput(d.text)),
120
+ );
121
+
122
+ console.log('[HumeEVI] Adapter initialized, call start() to connect');
123
+ },
124
+
125
+ // ─────────────────────────────────────────────────────────────
126
+ // START — connect and begin conversation
127
+ // ─────────────────────────────────────────────────────────────
128
+
129
+ async start() {
130
+ if (this._destroyed) return;
131
+
132
+ try {
133
+ // 1. Fetch access token from our server (keeps API key server-side)
134
+ await this._fetchAccessToken();
135
+
136
+ // 2. Initialize AudioContext (requires user gesture — call start() from click)
137
+ this._audioContext = new (window.AudioContext || window.webkitAudioContext)();
138
+ if (this._audioContext.state === 'suspended') {
139
+ await this._audioContext.resume();
140
+ }
141
+
142
+ // 3. Connect WebSocket
143
+ await this._connect();
144
+
145
+ } catch (err) {
146
+ console.error('[HumeEVI] Start failed:', err);
147
+ this._bridge.emit(AgentEvents.ERROR, { message: err.message });
148
+ this._bridge.emit(AgentEvents.MOOD, { mood: 'sad' });
149
+ }
150
+ },
151
+
152
+ // ─────────────────────────────────────────────────────────────
153
+ // STOP — end current session gracefully
154
+ // ─────────────────────────────────────────────────────────────
155
+
156
+ async stop() {
157
+ clearTimeout(this._reconnectTimer);
158
+ this._stopMicrophone();
159
+ this._stopAudioPlayback();
160
+
161
+ if (this._socket) {
162
+ // Send session_settings with no audio to close cleanly
163
+ try {
164
+ if (this._socket.readyState === WebSocket.OPEN) {
165
+ this._socket.close(1000, 'User ended session');
166
+ }
167
+ } catch (_) {}
168
+ this._socket = null;
169
+ }
170
+
171
+ this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'idle' });
172
+ this._bridge.emit(AgentEvents.DISCONNECTED);
173
+ this._bridge.emit(AgentEvents.MOOD, { mood: 'neutral' });
174
+ console.log('[HumeEVI] Session stopped');
175
+ },
176
+
177
+ // ─────────────────────────────────────────────────────────────
178
+ // DESTROY — full teardown on mode switch
179
+ // ─────────────────────────────────────────────────────────────
180
+
181
+ async destroy() {
182
+ this._destroyed = true;
183
+ await this.stop();
184
+
185
+ // Unsubscribe all bridge listeners
186
+ this._unsubscribers.forEach(unsub => unsub());
187
+ this._unsubscribers = [];
188
+
189
+ // Close AudioContext
190
+ if (this._audioContext) {
191
+ try { await this._audioContext.close(); } catch (_) {}
192
+ this._audioContext = null;
193
+ }
194
+
195
+ this._accessToken = null;
196
+ console.log('[HumeEVI] Adapter destroyed');
197
+ },
198
+
199
+ // ─────────────────────────────────────────────────────────────
200
+ // PRIVATE — Token fetch
201
+ // ─────────────────────────────────────────────────────────────
202
+
203
+ async _fetchAccessToken() {
204
+ const serverUrl = this._config.serverUrl || '';
205
+ const response = await fetch(`${serverUrl}/api/hume/token`);
206
+ if (!response.ok) {
207
+ throw new Error(`Failed to fetch Hume token: ${response.status}`);
208
+ }
209
+ const data = await response.json();
210
+ this._accessToken = data.access_token || data.token;
211
+ if (data.config_id && !this._config.configId) {
212
+ this._config.configId = data.config_id;
213
+ }
214
+ console.log('[HumeEVI] Access token fetched');
215
+ },
216
+
217
+ // ─────────────────────────────────────────────────────────────
218
+ // PRIVATE — WebSocket connection
219
+ // ─────────────────────────────────────────────────────────────
220
+
221
+ async _connect() {
222
+ if (this._destroyed) return;
223
+
224
+ const params = new URLSearchParams({ access_token: this._accessToken });
225
+ if (this._config.configId) {
226
+ params.set('config_id', this._config.configId);
227
+ }
228
+
229
+ const wsUrl = `wss://api.hume.ai/v0/evi/chat?${params}`;
230
+ console.log('[HumeEVI] Connecting to EVI WebSocket...');
231
+
232
+ this._socket = new WebSocket(wsUrl);
233
+ this._socket.binaryType = 'arraybuffer';
234
+
235
+ this._socket.onopen = () => this._onOpen();
236
+ this._socket.onmessage = (evt) => this._onMessage(evt);
237
+ this._socket.onclose = (evt) => this._onClose(evt);
238
+ this._socket.onerror = (evt) => this._onError(evt);
239
+ },
240
+
241
+ _onOpen() {
242
+ console.log('[HumeEVI] WebSocket connected');
243
+ this._reconnectDelay = 1000; // Reset backoff on successful connect
244
+
245
+ this._bridge.emit(AgentEvents.CONNECTED);
246
+ this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'listening' });
247
+ this._bridge.emit(AgentEvents.MOOD, { mood: 'happy' });
248
+
249
+ // Start capturing microphone audio
250
+ this._startMicrophone();
251
+ },
252
+
253
+ _onClose(evt) {
254
+ console.log(`[HumeEVI] WebSocket closed: ${evt.code} ${evt.reason}`);
255
+ this._stopMicrophone();
256
+
257
+ if (!this._destroyed && evt.code !== 1000) {
258
+ // Abnormal close — schedule reconnect with exponential backoff
259
+ console.log(`[HumeEVI] Reconnecting in ${this._reconnectDelay}ms...`);
260
+ this._reconnectTimer = setTimeout(async () => {
261
+ if (!this._destroyed) {
262
+ try {
263
+ await this._fetchAccessToken();
264
+ await this._connect();
265
+ } catch (err) {
266
+ console.error('[HumeEVI] Reconnect failed:', err);
267
+ this._bridge.emit(AgentEvents.ERROR, { message: 'Reconnect failed' });
268
+ }
269
+ }
270
+ }, this._reconnectDelay);
271
+
272
+ // Exponential backoff capped at 30s
273
+ this._reconnectDelay = Math.min(this._reconnectDelay * 2, this._maxReconnectDelay);
274
+
275
+ this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'idle' });
276
+ this._bridge.emit(AgentEvents.MOOD, { mood: 'sad' });
277
+ } else if (!this._destroyed) {
278
+ // Normal close (user stopped)
279
+ this._bridge.emit(AgentEvents.DISCONNECTED);
280
+ this._bridge.emit(AgentEvents.MOOD, { mood: 'neutral' });
281
+ }
282
+ },
283
+
284
+ _onError(evt) {
285
+ console.error('[HumeEVI] WebSocket error:', evt);
286
+ this._bridge.emit(AgentEvents.ERROR, { message: 'WebSocket error' });
287
+ this._bridge.emit(AgentEvents.MOOD, { mood: 'sad' });
288
+ },
289
+
290
+ // ─────────────────────────────────────────────────────────────
291
+ // PRIVATE — Message routing
292
+ // ─────────────────────────────────────────────────────────────
293
+
294
+ _onMessage(evt) {
295
+ // Binary frames = audio data from Hume TTS
296
+ if (evt.data instanceof ArrayBuffer) {
297
+ this._queueAudio(evt.data);
298
+ return;
299
+ }
300
+
301
+ let msg;
302
+ try {
303
+ msg = JSON.parse(evt.data);
304
+ } catch (e) {
305
+ console.warn('[HumeEVI] Unparseable message:', evt.data);
306
+ return;
307
+ }
308
+
309
+ const type = msg.type;
310
+ // console.debug('[HumeEVI] Message:', type);
311
+
312
+ switch (type) {
313
+ case 'session_settings':
314
+ // Server acknowledges session settings
315
+ break;
316
+
317
+ case 'user_interruption':
318
+ // User interrupted the agent
319
+ this._stopAudioPlayback();
320
+ this._bridge.emit(AgentEvents.TTS_STOPPED);
321
+ this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'listening' });
322
+ break;
323
+
324
+ case 'user_message':
325
+ // User's speech transcription
326
+ this._handleUserMessage(msg);
327
+ break;
328
+
329
+ case 'assistant_message':
330
+ // Agent text + emotion scores
331
+ this._handleAssistantMessage(msg);
332
+ break;
333
+
334
+ case 'audio_output':
335
+ // Agent TTS audio chunk (base64)
336
+ this._handleAudioOutput(msg);
337
+ break;
338
+
339
+ case 'assistant_end':
340
+ // Agent finished speaking turn
341
+ this._onAssistantEnd();
342
+ break;
343
+
344
+ case 'tool_call':
345
+ // Agent called a tool
346
+ this._handleToolCall(msg);
347
+ break;
348
+
349
+ case 'tool_response':
350
+ // Server confirms tool response received
351
+ break;
352
+
353
+ case 'error':
354
+ console.error('[HumeEVI] Server error:', msg);
355
+ this._bridge.emit(AgentEvents.ERROR, {
356
+ message: msg.message || 'Unknown Hume error',
357
+ code: msg.code
358
+ });
359
+ break;
360
+
361
+ default:
362
+ // console.debug('[HumeEVI] Unhandled message type:', type);
363
+ break;
364
+ }
365
+ },
366
+
367
+ _handleUserMessage(msg) {
368
+ const text = msg.message?.content || '';
369
+ const isFinal = !msg.interim;
370
+
371
+ this._bridge.emit(AgentEvents.TRANSCRIPT, { text, partial: !isFinal });
372
+
373
+ if (isFinal && text) {
374
+ this._bridge.emit(AgentEvents.MESSAGE, {
375
+ role: 'user',
376
+ text,
377
+ final: true
378
+ });
379
+ this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'thinking' });
380
+ this._bridge.emit(AgentEvents.MOOD, { mood: 'thinking' });
381
+ }
382
+ },
383
+
384
+ _handleAssistantMessage(msg) {
385
+ const text = msg.message?.content || '';
386
+
387
+ // Extract emotion scores and derive mood
388
+ const emotions = msg.models?.prosody?.scores;
389
+ if (emotions) {
390
+ const emotionList = Object.entries(emotions).map(([name, score]) => ({
391
+ name,
392
+ score
393
+ }));
394
+ const mood = dominantMood(emotionList);
395
+ if (mood !== this._lastMood) {
396
+ this._lastMood = mood;
397
+ this._bridge.emit(AgentEvents.MOOD, { mood });
398
+ }
399
+ }
400
+
401
+ if (text) {
402
+ this._bridge.emit(AgentEvents.MESSAGE, {
403
+ role: 'assistant',
404
+ text,
405
+ final: true
406
+ });
407
+ }
408
+
409
+ // State transitions
410
+ this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'speaking' });
411
+ },
412
+
413
+ _handleAudioOutput(msg) {
414
+ // Hume sends audio as base64-encoded PCM or MP3
415
+ if (!msg.data) return;
416
+
417
+ const binary = atob(msg.data);
418
+ const bytes = new Uint8Array(binary.length);
419
+ for (let i = 0; i < binary.length; i++) {
420
+ bytes[i] = binary.charCodeAt(i);
421
+ }
422
+
423
+ this._queueAudio(bytes.buffer);
424
+ },
425
+
426
+ _onAssistantEnd() {
427
+ // Hume signals end of agent's speaking turn
428
+ // (Audio may still be draining from queue)
429
+ this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'listening' });
430
+ this._lastMood = 'listening';
431
+ this._bridge.emit(AgentEvents.MOOD, { mood: 'listening' });
432
+ },
433
+
434
+ _handleToolCall(msg) {
435
+ const toolName = msg.tool_call?.name || msg.name || '';
436
+ const params = msg.tool_call?.parameters || msg.parameters || {};
437
+
438
+ console.log(`[HumeEVI] Tool call: ${toolName}`, params);
439
+
440
+ this._bridge.emit(AgentEvents.TOOL_CALLED, {
441
+ name: toolName,
442
+ params,
443
+ result: null // Will be filled when we get tool_response
444
+ });
445
+
446
+ // Route tool calls to bridge events
447
+ switch (toolName) {
448
+ case 'dj_soundboard':
449
+ case 'play_dj_sound':
450
+ if (params.action === 'play' && params.sound) {
451
+ this._bridge.emit(AgentEvents.PLAY_SOUND, {
452
+ sound: params.sound,
453
+ type: 'dj'
454
+ });
455
+ }
456
+ this._sendToolResponse(msg, `*${params.sound || 'sound'} played* 🎵`);
457
+ break;
458
+
459
+ case 'caller_sounds':
460
+ case 'play_caller_sound':
461
+ if (params.action === 'play') {
462
+ this._bridge.emit(AgentEvents.PLAY_SOUND, {
463
+ sound: params.sound || 'dial_tone',
464
+ type: 'caller'
465
+ });
466
+ }
467
+ this._sendToolResponse(msg, `*Phone sound played* 📞`);
468
+ break;
469
+
470
+ case 'play_music':
471
+ this._handleMusicTool(msg, params);
472
+ break;
473
+
474
+ case 'canvas_show':
475
+ case 'show_canvas': {
476
+ const action = params.action || 'present';
477
+ this._bridge.emit(AgentEvents.CANVAS_CMD, {
478
+ action,
479
+ url: params.url || params.path || ''
480
+ });
481
+ this._sendToolResponse(msg, `Canvas updated`);
482
+ break;
483
+ }
484
+
485
+ default:
486
+ // Unknown tool — just acknowledge
487
+ this._sendToolResponse(msg, `Tool ${toolName} executed`);
488
+ break;
489
+ }
490
+ },
491
+
492
+ _handleMusicTool(msg, params) {
493
+ const action = params.action || 'play';
494
+
495
+ if (action === 'stop') {
496
+ this._bridge.emit(AgentEvents.MUSIC_PLAY, { action: 'stop' });
497
+ } else if (action === 'pause') {
498
+ this._bridge.emit(AgentEvents.MUSIC_PLAY, { action: 'pause' });
499
+ } else if (action === 'play') {
500
+ if (params.track) {
501
+ this._bridge.emit(AgentEvents.MUSIC_PLAY, {
502
+ action: 'play',
503
+ track: params.track
504
+ });
505
+ } else {
506
+ this._bridge.emit(AgentEvents.MUSIC_SYNC);
507
+ }
508
+ } else if (action === 'skip') {
509
+ this._bridge.emit(AgentEvents.MUSIC_SYNC);
510
+ }
511
+
512
+ this._sendToolResponse(msg, `Music: ${action}`);
513
+ },
514
+
515
+ // ─────────────────────────────────────────────────────────────
516
+ // PRIVATE — Send messages to Hume
517
+ // ─────────────────────────────────────────────────────────────
518
+
519
+ _sendJSON(payload) {
520
+ if (this._socket && this._socket.readyState === WebSocket.OPEN) {
521
+ this._socket.send(JSON.stringify(payload));
522
+ }
523
+ },
524
+
525
+ _sendToolResponse(msg, content) {
526
+ const toolCallId = msg.tool_call?.tool_call_id || msg.tool_call_id || '';
527
+ if (!toolCallId) return;
528
+
529
+ this._sendJSON({
530
+ type: 'tool_response',
531
+ tool_call_id: toolCallId,
532
+ content: String(content)
533
+ });
534
+ },
535
+
536
+ /**
537
+ * Send a contextual update (injected silently, not spoken).
538
+ */
539
+ _sendContextUpdate(text) {
540
+ this._sendJSON({
541
+ type: 'session_settings',
542
+ context: {
543
+ text,
544
+ type: 'temporary'
545
+ }
546
+ });
547
+ },
548
+
549
+ /**
550
+ * Send a user-turn message that the agent must respond to.
551
+ */
552
+ _sendAssistantInput(text) {
553
+ this._sendJSON({
554
+ type: 'user_input',
555
+ text
556
+ });
557
+ },
558
+
559
+ // ─────────────────────────────────────────────────────────────
560
+ // PRIVATE — Microphone capture
561
+ // ─────────────────────────────────────────────────────────────
562
+
563
+ async _startMicrophone() {
564
+ try {
565
+ this._mediaStream = await navigator.mediaDevices.getUserMedia({
566
+ audio: {
567
+ channelCount: 1,
568
+ sampleRate: 16000,
569
+ echoCancellation: true,
570
+ noiseSuppression: true,
571
+ }
572
+ });
573
+
574
+ this._mediaRecorder = new MediaRecorder(this._mediaStream, {
575
+ mimeType: this._getSupportedMimeType(),
576
+ });
577
+
578
+ this._mediaRecorder.ondataavailable = (evt) => {
579
+ if (evt.data.size > 0 && this._socket?.readyState === WebSocket.OPEN) {
580
+ // Send raw audio binary to Hume
581
+ this._socket.send(evt.data);
582
+ }
583
+ };
584
+
585
+ // Collect audio in small chunks (100ms)
586
+ this._mediaRecorder.start(100);
587
+ console.log('[HumeEVI] Microphone started');
588
+
589
+ } catch (err) {
590
+ console.error('[HumeEVI] Microphone access denied:', err);
591
+ this._bridge.emit(AgentEvents.ERROR, {
592
+ message: 'Microphone access denied. Please allow microphone access.'
593
+ });
594
+ }
595
+ },
596
+
597
+ _stopMicrophone() {
598
+ if (this._mediaRecorder && this._mediaRecorder.state !== 'inactive') {
599
+ try { this._mediaRecorder.stop(); } catch (_) {}
600
+ }
601
+ if (this._mediaStream) {
602
+ this._mediaStream.getTracks().forEach(t => t.stop());
603
+ }
604
+ this._mediaRecorder = null;
605
+ this._mediaStream = null;
606
+ },
607
+
608
+ _getSupportedMimeType() {
609
+ const types = [
610
+ 'audio/webm;codecs=opus',
611
+ 'audio/webm',
612
+ 'audio/ogg;codecs=opus',
613
+ ];
614
+ return types.find(t => MediaRecorder.isTypeSupported(t)) || '';
615
+ },
616
+
617
+ // ─────────────────────────────────────────────────────────────
618
+ // PRIVATE — Audio playback (TTS from Hume)
619
+ // ─────────────────────────────────────────────────────────────
620
+
621
+ _queueAudio(arrayBuffer) {
622
+ this._audioQueue.push(arrayBuffer);
623
+ if (!this._isPlaying) {
624
+ this._playNextChunk();
625
+ }
626
+ },
627
+
628
+ async _playNextChunk() {
629
+ if (this._audioQueue.length === 0) {
630
+ this._isPlaying = false;
631
+ this._bridge.emit(AgentEvents.TTS_STOPPED);
632
+ return;
633
+ }
634
+
635
+ this._isPlaying = true;
636
+ const chunk = this._audioQueue.shift();
637
+
638
+ if (!this._audioContext) {
639
+ this._playNextChunk();
640
+ return;
641
+ }
642
+
643
+ // Signal TTS start on first chunk
644
+ if (!this._isPlaying) {
645
+ this._bridge.emit(AgentEvents.TTS_PLAYING);
646
+ }
647
+ this._bridge.emit(AgentEvents.TTS_PLAYING);
648
+
649
+ try {
650
+ const audioBuffer = await this._audioContext.decodeAudioData(chunk.slice(0));
651
+ const source = this._audioContext.createBufferSource();
652
+ this._currentSourceNode = source;
653
+ source.buffer = audioBuffer;
654
+ source.connect(this._audioContext.destination);
655
+ source.onended = () => {
656
+ this._currentSourceNode = null;
657
+ this._playNextChunk();
658
+ };
659
+ source.start();
660
+ } catch (err) {
661
+ console.warn('[HumeEVI] Audio decode error, skipping chunk:', err);
662
+ this._playNextChunk();
663
+ }
664
+ },
665
+
666
+ _stopAudioPlayback() {
667
+ this._audioQueue = [];
668
+ if (this._currentSourceNode) {
669
+ try { this._currentSourceNode.stop(); } catch (_) {}
670
+ this._currentSourceNode = null;
671
+ }
672
+ this._isPlaying = false;
673
+ },
674
+ };
675
+
676
+ export default HumeEVIAdapter;