openvoiceui 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +104 -0
- package/Dockerfile +30 -0
- package/LICENSE +21 -0
- package/README.md +638 -0
- package/SETUP.md +360 -0
- package/app.py +232 -0
- package/auto-approve-devices.js +111 -0
- package/cli/index.js +372 -0
- package/config/__init__.py +4 -0
- package/config/default.yaml +43 -0
- package/config/flags.yaml +67 -0
- package/config/loader.py +203 -0
- package/config/providers.yaml +71 -0
- package/config/speech_normalization.yaml +182 -0
- package/config/theme.json +4 -0
- package/data/greetings.json +25 -0
- package/default-pages/ai-image-creator.html +915 -0
- package/default-pages/bulk-image-uploader.html +492 -0
- package/default-pages/desktop.html +2865 -0
- package/default-pages/file-explorer.html +854 -0
- package/default-pages/interactive-map.html +655 -0
- package/default-pages/style-guide.html +1005 -0
- package/default-pages/website-setup.html +1623 -0
- package/deploy/openclaw/Dockerfile +46 -0
- package/deploy/openvoiceui.service +30 -0
- package/deploy/setup-nginx.sh +50 -0
- package/deploy/setup-sudo.sh +306 -0
- package/deploy/skill-runner/Dockerfile +19 -0
- package/deploy/skill-runner/requirements.txt +14 -0
- package/deploy/skill-runner/server.py +269 -0
- package/deploy/supertonic/Dockerfile +22 -0
- package/deploy/supertonic/server.py +79 -0
- package/docker-compose.pinokio.yml +11 -0
- package/docker-compose.yml +59 -0
- package/greetings.json +25 -0
- package/index.html +65 -0
- package/inject-device-identity.js +142 -0
- package/package.json +82 -0
- package/profiles/default.json +114 -0
- package/profiles/manager.py +354 -0
- package/profiles/schema.json +337 -0
- package/prompts/voice-system-prompt.md +149 -0
- package/providers/__init__.py +39 -0
- package/providers/base.py +63 -0
- package/providers/llm/__init__.py +12 -0
- package/providers/llm/base.py +71 -0
- package/providers/llm/clawdbot_provider.py +112 -0
- package/providers/llm/zai_provider.py +115 -0
- package/providers/registry.py +320 -0
- package/providers/stt/__init__.py +12 -0
- package/providers/stt/base.py +58 -0
- package/providers/stt/webspeech_provider.py +49 -0
- package/providers/stt/whisper_provider.py +100 -0
- package/providers/tts/__init__.py +20 -0
- package/providers/tts/base.py +91 -0
- package/providers/tts/groq_provider.py +74 -0
- package/providers/tts/supertonic_provider.py +72 -0
- package/requirements.txt +38 -0
- package/routes/__init__.py +10 -0
- package/routes/admin.py +515 -0
- package/routes/canvas.py +1315 -0
- package/routes/chat.py +51 -0
- package/routes/conversation.py +2158 -0
- package/routes/elevenlabs_hybrid.py +306 -0
- package/routes/greetings.py +98 -0
- package/routes/icons.py +279 -0
- package/routes/image_gen.py +364 -0
- package/routes/instructions.py +190 -0
- package/routes/music.py +838 -0
- package/routes/onboarding.py +43 -0
- package/routes/pi.py +62 -0
- package/routes/profiles.py +215 -0
- package/routes/report_issue.py +68 -0
- package/routes/static_files.py +533 -0
- package/routes/suno.py +664 -0
- package/routes/theme.py +81 -0
- package/routes/transcripts.py +199 -0
- package/routes/vision.py +348 -0
- package/routes/workspace.py +288 -0
- package/server.py +1510 -0
- package/services/__init__.py +1 -0
- package/services/auth.py +143 -0
- package/services/canvas_versioning.py +239 -0
- package/services/db_pool.py +107 -0
- package/services/gateway.py +16 -0
- package/services/gateway_manager.py +333 -0
- package/services/gateways/__init__.py +12 -0
- package/services/gateways/base.py +110 -0
- package/services/gateways/compat.py +264 -0
- package/services/gateways/openclaw.py +1134 -0
- package/services/health.py +100 -0
- package/services/memory_client.py +455 -0
- package/services/paths.py +26 -0
- package/services/speech_normalizer.py +285 -0
- package/services/tts.py +270 -0
- package/setup-config.js +262 -0
- package/sounds/air_horn.mp3 +0 -0
- package/sounds/bruh.mp3 +0 -0
- package/sounds/crowd_cheer.mp3 +0 -0
- package/sounds/gunshot.mp3 +0 -0
- package/sounds/impact.mp3 +0 -0
- package/sounds/lets_go.mp3 +0 -0
- package/sounds/record_stop.mp3 +0 -0
- package/sounds/rewind.mp3 +0 -0
- package/sounds/sad_trombone.mp3 +0 -0
- package/sounds/scratch_long.mp3 +0 -0
- package/sounds/yeah.mp3 +0 -0
- package/src/adapters/ClawdBotAdapter.js +264 -0
- package/src/adapters/_template.js +133 -0
- package/src/adapters/elevenlabs-classic.js +841 -0
- package/src/adapters/elevenlabs-hybrid.js +812 -0
- package/src/adapters/hume-evi.js +676 -0
- package/src/admin.html +1339 -0
- package/src/app.js +8802 -0
- package/src/core/Config.js +173 -0
- package/src/core/EmotionEngine.js +307 -0
- package/src/core/EventBridge.js +180 -0
- package/src/core/EventBus.js +117 -0
- package/src/core/VoiceSession.js +607 -0
- package/src/face/BaseFace.js +259 -0
- package/src/face/EyeFace.js +208 -0
- package/src/face/HaloSmokeFace.js +509 -0
- package/src/face/manifest.json +27 -0
- package/src/face/previews/eyes.svg +16 -0
- package/src/face/previews/orb.svg +29 -0
- package/src/features/MusicPlayer.js +620 -0
- package/src/features/Soundboard.js +128 -0
- package/src/providers/DeepgramSTT.js +472 -0
- package/src/providers/DeepgramStreamingSTT.js +766 -0
- package/src/providers/GroqSTT.js +559 -0
- package/src/providers/TTSPlayer.js +323 -0
- package/src/providers/WebSpeechSTT.js +479 -0
- package/src/providers/tts/BaseTTSProvider.js +81 -0
- package/src/providers/tts/HumeProvider.js +77 -0
- package/src/providers/tts/SupertonicProvider.js +174 -0
- package/src/providers/tts/index.js +140 -0
- package/src/shell/adapter-registry.js +154 -0
- package/src/shell/caller-bridge.js +35 -0
- package/src/shell/camera-bridge.js +28 -0
- package/src/shell/canvas-bridge.js +32 -0
- package/src/shell/commercial-bridge.js +44 -0
- package/src/shell/face-bridge.js +44 -0
- package/src/shell/music-bridge.js +60 -0
- package/src/shell/orchestrator.js +233 -0
- package/src/shell/profile-discovery.js +303 -0
- package/src/shell/sounds-bridge.js +28 -0
- package/src/shell/transcript-bridge.js +61 -0
- package/src/shell/waveform-bridge.js +33 -0
- package/src/styles/base.css +2862 -0
- package/src/styles/face.css +417 -0
- package/src/styles/pi-overrides.css +89 -0
- package/src/styles/theme-dark.css +67 -0
- package/src/test-tts.html +175 -0
- package/src/ui/AppShell.js +544 -0
- package/src/ui/ProfileSwitcher.js +228 -0
- package/src/ui/SessionControl.js +240 -0
- package/src/ui/face/FacePicker.js +195 -0
- package/src/ui/face/FaceRenderer.js +309 -0
- package/src/ui/settings/PlaylistEditor.js +366 -0
- package/src/ui/settings/SettingsPanel.css +684 -0
- package/src/ui/settings/SettingsPanel.js +419 -0
- package/src/ui/settings/TTSVoicePreview.js +210 -0
- package/src/ui/themes/ThemeManager.js +213 -0
- package/src/ui/visualizers/BaseVisualizer.js +29 -0
- package/src/ui/visualizers/PartyFXVisualizer.css +291 -0
- package/src/ui/visualizers/PartyFXVisualizer.js +637 -0
- package/static/emulators/jsdos/js-dos.css +1 -0
- package/static/emulators/jsdos/js-dos.js +22 -0
- package/static/favicon.svg +55 -0
- package/static/icons/apple-touch-icon.png +0 -0
- package/static/icons/favicon-32.png +0 -0
- package/static/icons/icon-192.png +0 -0
- package/static/icons/icon-512.png +0 -0
- package/static/install.html +449 -0
- package/static/manifest.json +26 -0
- package/static/sw.js +21 -0
- package/tts_providers/__init__.py +136 -0
- package/tts_providers/base_provider.py +319 -0
- package/tts_providers/groq_provider.py +155 -0
- package/tts_providers/hume_provider.py +226 -0
- package/tts_providers/providers_config.json +119 -0
- package/tts_providers/qwen3_provider.py +371 -0
- package/tts_providers/resemble_provider.py +315 -0
- package/tts_providers/supertonic_provider.py +557 -0
- package/tts_providers/supertonic_tts.py +399 -0
|
@@ -0,0 +1,676 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hume EVI Adapter (P6-T3)
|
|
3
|
+
*
|
|
4
|
+
* Agent adapter for Hume Empathic Voice Interface (EVI).
|
|
5
|
+
* Wraps the Hume EVI WebSocket API in the EventBridge adapter contract.
|
|
6
|
+
*
|
|
7
|
+
* Hume EVI features:
|
|
8
|
+
* - SDK-managed audio pipeline (mic input + TTS output)
|
|
9
|
+
* - Emotional expression data per utterance
|
|
10
|
+
* - Configurable voice via Hume config_id
|
|
11
|
+
* - Tool calling (server-side tools via config)
|
|
12
|
+
* - WebSocket transport (wss://api.hume.ai/v0/evi/chat)
|
|
13
|
+
*
|
|
14
|
+
* Ref: future-dev-plans/17-MULTI-AGENT-FRAMEWORK.md
|
|
15
|
+
* Ref: future-dev-plans/15-ELEVENLABS-CLASSIC-AGENT.md (similar pattern)
|
|
16
|
+
*
|
|
17
|
+
* Usage:
|
|
18
|
+
* const { HumeEVIAdapter } = await import('./adapters/hume-evi.js');
|
|
19
|
+
* orchestrator.register('hume-evi', HumeEVIAdapter, {
|
|
20
|
+
* serverUrl: 'http://localhost:5001',
|
|
21
|
+
* configId: '<hume-config-id>', // Optional override
|
|
22
|
+
* voiceName: 'MyVoice'
|
|
23
|
+
* });
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
import { AgentEvents, AgentActions } from '../core/EventBridge.js';
|
|
27
|
+
|
|
28
|
+
// ─────────────────────────────────────────────────────────────────
|
|
29
|
+
// Emotion → mood mapping (Hume sends emotion scores per utterance)
|
|
30
|
+
// ─────────────────────────────────────────────────────────────────
|
|
31
|
+
|
|
32
|
+
const EMOTION_TO_MOOD = {
|
|
33
|
+
Joy: 'happy',
|
|
34
|
+
Excitement: 'happy',
|
|
35
|
+
Amusement: 'happy',
|
|
36
|
+
Contentment: 'happy',
|
|
37
|
+
Satisfaction: 'happy',
|
|
38
|
+
Sadness: 'sad',
|
|
39
|
+
Disappointment: 'sad',
|
|
40
|
+
Distress: 'sad',
|
|
41
|
+
Anger: 'thinking',
|
|
42
|
+
Disgust: 'thinking',
|
|
43
|
+
Confusion: 'thinking',
|
|
44
|
+
Contemplation: 'thinking',
|
|
45
|
+
Concentration: 'thinking',
|
|
46
|
+
Interest: 'listening',
|
|
47
|
+
Curiosity: 'listening',
|
|
48
|
+
Surprise: 'listening',
|
|
49
|
+
Realization: 'listening',
|
|
50
|
+
// Default: neutral
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Derive the dominant emotion mood from Hume's emotion scores array.
|
|
55
|
+
* @param {Array<{name: string, score: number}>} emotions
|
|
56
|
+
* @returns {string} mood name
|
|
57
|
+
*/
|
|
58
|
+
function dominantMood(emotions) {
|
|
59
|
+
if (!emotions || emotions.length === 0) return 'neutral';
|
|
60
|
+
const top = emotions.reduce((a, b) => (a.score > b.score ? a : b));
|
|
61
|
+
return EMOTION_TO_MOOD[top.name] || 'neutral';
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// ─────────────────────────────────────────────────────────────────
|
|
65
|
+
// HumeEVIAdapter
|
|
66
|
+
// ─────────────────────────────────────────────────────────────────
|
|
67
|
+
|
|
68
|
+
export const HumeEVIAdapter = {
|
|
69
|
+
name: 'Hume EVI',
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* What this adapter supports.
|
|
73
|
+
* UI shows/hides features based on this list.
|
|
74
|
+
*/
|
|
75
|
+
capabilities: [
|
|
76
|
+
'emotion_detection', // Hume sends per-utterance emotion scores
|
|
77
|
+
'multi_voice', // Voice configured via Hume config_id
|
|
78
|
+
'canvas', // Agent can issue canvas commands via tool
|
|
79
|
+
'dj_soundboard', // Agent can trigger sound effects via tool
|
|
80
|
+
'music_sync', // Agent can control music via tool
|
|
81
|
+
],
|
|
82
|
+
|
|
83
|
+
// ── Private state ─────────────────────────────────────────────
|
|
84
|
+
_bridge: null,
|
|
85
|
+
_config: null,
|
|
86
|
+
_socket: null,
|
|
87
|
+
_accessToken: null,
|
|
88
|
+
_audioContext: null,
|
|
89
|
+
_mediaStream: null,
|
|
90
|
+
_mediaRecorder: null,
|
|
91
|
+
_audioQueue: [], // Queued audio chunks from Hume TTS
|
|
92
|
+
_isPlaying: false,
|
|
93
|
+
_reconnectTimer: null,
|
|
94
|
+
_reconnectDelay: 1000,
|
|
95
|
+
_maxReconnectDelay: 30000,
|
|
96
|
+
_destroyed: false,
|
|
97
|
+
_unsubscribers: [], // Bridge subscription cleanup functions
|
|
98
|
+
_lastMood: 'neutral',
|
|
99
|
+
_currentSourceNode: null,
|
|
100
|
+
|
|
101
|
+
// ─────────────────────────────────────────────────────────────
|
|
102
|
+
// INIT — called when mode is selected
|
|
103
|
+
// ─────────────────────────────────────────────────────────────
|
|
104
|
+
|
|
105
|
+
async init(bridge, config) {
|
|
106
|
+
this._bridge = bridge;
|
|
107
|
+
this._config = config || {};
|
|
108
|
+
this._destroyed = false;
|
|
109
|
+
this._audioQueue = [];
|
|
110
|
+
this._isPlaying = false;
|
|
111
|
+
this._reconnectDelay = 1000;
|
|
112
|
+
|
|
113
|
+
console.log('[HumeEVI] Initializing adapter');
|
|
114
|
+
|
|
115
|
+
// Subscribe to UI → Agent actions
|
|
116
|
+
this._unsubscribers.push(
|
|
117
|
+
bridge.on(AgentActions.END_SESSION, () => this.stop()),
|
|
118
|
+
bridge.on(AgentActions.CONTEXT_UPDATE, (d) => this._sendContextUpdate(d.text)),
|
|
119
|
+
bridge.on(AgentActions.FORCE_MESSAGE, (d) => this._sendAssistantInput(d.text)),
|
|
120
|
+
);
|
|
121
|
+
|
|
122
|
+
console.log('[HumeEVI] Adapter initialized, call start() to connect');
|
|
123
|
+
},
|
|
124
|
+
|
|
125
|
+
// ─────────────────────────────────────────────────────────────
|
|
126
|
+
// START — connect and begin conversation
|
|
127
|
+
// ─────────────────────────────────────────────────────────────
|
|
128
|
+
|
|
129
|
+
async start() {
|
|
130
|
+
if (this._destroyed) return;
|
|
131
|
+
|
|
132
|
+
try {
|
|
133
|
+
// 1. Fetch access token from our server (keeps API key server-side)
|
|
134
|
+
await this._fetchAccessToken();
|
|
135
|
+
|
|
136
|
+
// 2. Initialize AudioContext (requires user gesture — call start() from click)
|
|
137
|
+
this._audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
|
138
|
+
if (this._audioContext.state === 'suspended') {
|
|
139
|
+
await this._audioContext.resume();
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// 3. Connect WebSocket
|
|
143
|
+
await this._connect();
|
|
144
|
+
|
|
145
|
+
} catch (err) {
|
|
146
|
+
console.error('[HumeEVI] Start failed:', err);
|
|
147
|
+
this._bridge.emit(AgentEvents.ERROR, { message: err.message });
|
|
148
|
+
this._bridge.emit(AgentEvents.MOOD, { mood: 'sad' });
|
|
149
|
+
}
|
|
150
|
+
},
|
|
151
|
+
|
|
152
|
+
// ─────────────────────────────────────────────────────────────
|
|
153
|
+
// STOP — end current session gracefully
|
|
154
|
+
// ─────────────────────────────────────────────────────────────
|
|
155
|
+
|
|
156
|
+
async stop() {
|
|
157
|
+
clearTimeout(this._reconnectTimer);
|
|
158
|
+
this._stopMicrophone();
|
|
159
|
+
this._stopAudioPlayback();
|
|
160
|
+
|
|
161
|
+
if (this._socket) {
|
|
162
|
+
// Send session_settings with no audio to close cleanly
|
|
163
|
+
try {
|
|
164
|
+
if (this._socket.readyState === WebSocket.OPEN) {
|
|
165
|
+
this._socket.close(1000, 'User ended session');
|
|
166
|
+
}
|
|
167
|
+
} catch (_) {}
|
|
168
|
+
this._socket = null;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'idle' });
|
|
172
|
+
this._bridge.emit(AgentEvents.DISCONNECTED);
|
|
173
|
+
this._bridge.emit(AgentEvents.MOOD, { mood: 'neutral' });
|
|
174
|
+
console.log('[HumeEVI] Session stopped');
|
|
175
|
+
},
|
|
176
|
+
|
|
177
|
+
// ─────────────────────────────────────────────────────────────
|
|
178
|
+
// DESTROY — full teardown on mode switch
|
|
179
|
+
// ─────────────────────────────────────────────────────────────
|
|
180
|
+
|
|
181
|
+
async destroy() {
|
|
182
|
+
this._destroyed = true;
|
|
183
|
+
await this.stop();
|
|
184
|
+
|
|
185
|
+
// Unsubscribe all bridge listeners
|
|
186
|
+
this._unsubscribers.forEach(unsub => unsub());
|
|
187
|
+
this._unsubscribers = [];
|
|
188
|
+
|
|
189
|
+
// Close AudioContext
|
|
190
|
+
if (this._audioContext) {
|
|
191
|
+
try { await this._audioContext.close(); } catch (_) {}
|
|
192
|
+
this._audioContext = null;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
this._accessToken = null;
|
|
196
|
+
console.log('[HumeEVI] Adapter destroyed');
|
|
197
|
+
},
|
|
198
|
+
|
|
199
|
+
// ─────────────────────────────────────────────────────────────
|
|
200
|
+
// PRIVATE — Token fetch
|
|
201
|
+
// ─────────────────────────────────────────────────────────────
|
|
202
|
+
|
|
203
|
+
async _fetchAccessToken() {
|
|
204
|
+
const serverUrl = this._config.serverUrl || '';
|
|
205
|
+
const response = await fetch(`${serverUrl}/api/hume/token`);
|
|
206
|
+
if (!response.ok) {
|
|
207
|
+
throw new Error(`Failed to fetch Hume token: ${response.status}`);
|
|
208
|
+
}
|
|
209
|
+
const data = await response.json();
|
|
210
|
+
this._accessToken = data.access_token || data.token;
|
|
211
|
+
if (data.config_id && !this._config.configId) {
|
|
212
|
+
this._config.configId = data.config_id;
|
|
213
|
+
}
|
|
214
|
+
console.log('[HumeEVI] Access token fetched');
|
|
215
|
+
},
|
|
216
|
+
|
|
217
|
+
// ─────────────────────────────────────────────────────────────
|
|
218
|
+
// PRIVATE — WebSocket connection
|
|
219
|
+
// ─────────────────────────────────────────────────────────────
|
|
220
|
+
|
|
221
|
+
async _connect() {
|
|
222
|
+
if (this._destroyed) return;
|
|
223
|
+
|
|
224
|
+
const params = new URLSearchParams({ access_token: this._accessToken });
|
|
225
|
+
if (this._config.configId) {
|
|
226
|
+
params.set('config_id', this._config.configId);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
const wsUrl = `wss://api.hume.ai/v0/evi/chat?${params}`;
|
|
230
|
+
console.log('[HumeEVI] Connecting to EVI WebSocket...');
|
|
231
|
+
|
|
232
|
+
this._socket = new WebSocket(wsUrl);
|
|
233
|
+
this._socket.binaryType = 'arraybuffer';
|
|
234
|
+
|
|
235
|
+
this._socket.onopen = () => this._onOpen();
|
|
236
|
+
this._socket.onmessage = (evt) => this._onMessage(evt);
|
|
237
|
+
this._socket.onclose = (evt) => this._onClose(evt);
|
|
238
|
+
this._socket.onerror = (evt) => this._onError(evt);
|
|
239
|
+
},
|
|
240
|
+
|
|
241
|
+
_onOpen() {
|
|
242
|
+
console.log('[HumeEVI] WebSocket connected');
|
|
243
|
+
this._reconnectDelay = 1000; // Reset backoff on successful connect
|
|
244
|
+
|
|
245
|
+
this._bridge.emit(AgentEvents.CONNECTED);
|
|
246
|
+
this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'listening' });
|
|
247
|
+
this._bridge.emit(AgentEvents.MOOD, { mood: 'happy' });
|
|
248
|
+
|
|
249
|
+
// Start capturing microphone audio
|
|
250
|
+
this._startMicrophone();
|
|
251
|
+
},
|
|
252
|
+
|
|
253
|
+
_onClose(evt) {
|
|
254
|
+
console.log(`[HumeEVI] WebSocket closed: ${evt.code} ${evt.reason}`);
|
|
255
|
+
this._stopMicrophone();
|
|
256
|
+
|
|
257
|
+
if (!this._destroyed && evt.code !== 1000) {
|
|
258
|
+
// Abnormal close — schedule reconnect with exponential backoff
|
|
259
|
+
console.log(`[HumeEVI] Reconnecting in ${this._reconnectDelay}ms...`);
|
|
260
|
+
this._reconnectTimer = setTimeout(async () => {
|
|
261
|
+
if (!this._destroyed) {
|
|
262
|
+
try {
|
|
263
|
+
await this._fetchAccessToken();
|
|
264
|
+
await this._connect();
|
|
265
|
+
} catch (err) {
|
|
266
|
+
console.error('[HumeEVI] Reconnect failed:', err);
|
|
267
|
+
this._bridge.emit(AgentEvents.ERROR, { message: 'Reconnect failed' });
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
}, this._reconnectDelay);
|
|
271
|
+
|
|
272
|
+
// Exponential backoff capped at 30s
|
|
273
|
+
this._reconnectDelay = Math.min(this._reconnectDelay * 2, this._maxReconnectDelay);
|
|
274
|
+
|
|
275
|
+
this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'idle' });
|
|
276
|
+
this._bridge.emit(AgentEvents.MOOD, { mood: 'sad' });
|
|
277
|
+
} else if (!this._destroyed) {
|
|
278
|
+
// Normal close (user stopped)
|
|
279
|
+
this._bridge.emit(AgentEvents.DISCONNECTED);
|
|
280
|
+
this._bridge.emit(AgentEvents.MOOD, { mood: 'neutral' });
|
|
281
|
+
}
|
|
282
|
+
},
|
|
283
|
+
|
|
284
|
+
_onError(evt) {
|
|
285
|
+
console.error('[HumeEVI] WebSocket error:', evt);
|
|
286
|
+
this._bridge.emit(AgentEvents.ERROR, { message: 'WebSocket error' });
|
|
287
|
+
this._bridge.emit(AgentEvents.MOOD, { mood: 'sad' });
|
|
288
|
+
},
|
|
289
|
+
|
|
290
|
+
// ─────────────────────────────────────────────────────────────
|
|
291
|
+
// PRIVATE — Message routing
|
|
292
|
+
// ─────────────────────────────────────────────────────────────
|
|
293
|
+
|
|
294
|
+
_onMessage(evt) {
|
|
295
|
+
// Binary frames = audio data from Hume TTS
|
|
296
|
+
if (evt.data instanceof ArrayBuffer) {
|
|
297
|
+
this._queueAudio(evt.data);
|
|
298
|
+
return;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
let msg;
|
|
302
|
+
try {
|
|
303
|
+
msg = JSON.parse(evt.data);
|
|
304
|
+
} catch (e) {
|
|
305
|
+
console.warn('[HumeEVI] Unparseable message:', evt.data);
|
|
306
|
+
return;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
const type = msg.type;
|
|
310
|
+
// console.debug('[HumeEVI] Message:', type);
|
|
311
|
+
|
|
312
|
+
switch (type) {
|
|
313
|
+
case 'session_settings':
|
|
314
|
+
// Server acknowledges session settings
|
|
315
|
+
break;
|
|
316
|
+
|
|
317
|
+
case 'user_interruption':
|
|
318
|
+
// User interrupted the agent
|
|
319
|
+
this._stopAudioPlayback();
|
|
320
|
+
this._bridge.emit(AgentEvents.TTS_STOPPED);
|
|
321
|
+
this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'listening' });
|
|
322
|
+
break;
|
|
323
|
+
|
|
324
|
+
case 'user_message':
|
|
325
|
+
// User's speech transcription
|
|
326
|
+
this._handleUserMessage(msg);
|
|
327
|
+
break;
|
|
328
|
+
|
|
329
|
+
case 'assistant_message':
|
|
330
|
+
// Agent text + emotion scores
|
|
331
|
+
this._handleAssistantMessage(msg);
|
|
332
|
+
break;
|
|
333
|
+
|
|
334
|
+
case 'audio_output':
|
|
335
|
+
// Agent TTS audio chunk (base64)
|
|
336
|
+
this._handleAudioOutput(msg);
|
|
337
|
+
break;
|
|
338
|
+
|
|
339
|
+
case 'assistant_end':
|
|
340
|
+
// Agent finished speaking turn
|
|
341
|
+
this._onAssistantEnd();
|
|
342
|
+
break;
|
|
343
|
+
|
|
344
|
+
case 'tool_call':
|
|
345
|
+
// Agent called a tool
|
|
346
|
+
this._handleToolCall(msg);
|
|
347
|
+
break;
|
|
348
|
+
|
|
349
|
+
case 'tool_response':
|
|
350
|
+
// Server confirms tool response received
|
|
351
|
+
break;
|
|
352
|
+
|
|
353
|
+
case 'error':
|
|
354
|
+
console.error('[HumeEVI] Server error:', msg);
|
|
355
|
+
this._bridge.emit(AgentEvents.ERROR, {
|
|
356
|
+
message: msg.message || 'Unknown Hume error',
|
|
357
|
+
code: msg.code
|
|
358
|
+
});
|
|
359
|
+
break;
|
|
360
|
+
|
|
361
|
+
default:
|
|
362
|
+
// console.debug('[HumeEVI] Unhandled message type:', type);
|
|
363
|
+
break;
|
|
364
|
+
}
|
|
365
|
+
},
|
|
366
|
+
|
|
367
|
+
_handleUserMessage(msg) {
|
|
368
|
+
const text = msg.message?.content || '';
|
|
369
|
+
const isFinal = !msg.interim;
|
|
370
|
+
|
|
371
|
+
this._bridge.emit(AgentEvents.TRANSCRIPT, { text, partial: !isFinal });
|
|
372
|
+
|
|
373
|
+
if (isFinal && text) {
|
|
374
|
+
this._bridge.emit(AgentEvents.MESSAGE, {
|
|
375
|
+
role: 'user',
|
|
376
|
+
text,
|
|
377
|
+
final: true
|
|
378
|
+
});
|
|
379
|
+
this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'thinking' });
|
|
380
|
+
this._bridge.emit(AgentEvents.MOOD, { mood: 'thinking' });
|
|
381
|
+
}
|
|
382
|
+
},
|
|
383
|
+
|
|
384
|
+
_handleAssistantMessage(msg) {
|
|
385
|
+
const text = msg.message?.content || '';
|
|
386
|
+
|
|
387
|
+
// Extract emotion scores and derive mood
|
|
388
|
+
const emotions = msg.models?.prosody?.scores;
|
|
389
|
+
if (emotions) {
|
|
390
|
+
const emotionList = Object.entries(emotions).map(([name, score]) => ({
|
|
391
|
+
name,
|
|
392
|
+
score
|
|
393
|
+
}));
|
|
394
|
+
const mood = dominantMood(emotionList);
|
|
395
|
+
if (mood !== this._lastMood) {
|
|
396
|
+
this._lastMood = mood;
|
|
397
|
+
this._bridge.emit(AgentEvents.MOOD, { mood });
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
if (text) {
|
|
402
|
+
this._bridge.emit(AgentEvents.MESSAGE, {
|
|
403
|
+
role: 'assistant',
|
|
404
|
+
text,
|
|
405
|
+
final: true
|
|
406
|
+
});
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
// State transitions
|
|
410
|
+
this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'speaking' });
|
|
411
|
+
},
|
|
412
|
+
|
|
413
|
+
_handleAudioOutput(msg) {
|
|
414
|
+
// Hume sends audio as base64-encoded PCM or MP3
|
|
415
|
+
if (!msg.data) return;
|
|
416
|
+
|
|
417
|
+
const binary = atob(msg.data);
|
|
418
|
+
const bytes = new Uint8Array(binary.length);
|
|
419
|
+
for (let i = 0; i < binary.length; i++) {
|
|
420
|
+
bytes[i] = binary.charCodeAt(i);
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
this._queueAudio(bytes.buffer);
|
|
424
|
+
},
|
|
425
|
+
|
|
426
|
+
_onAssistantEnd() {
|
|
427
|
+
// Hume signals end of agent's speaking turn
|
|
428
|
+
// (Audio may still be draining from queue)
|
|
429
|
+
this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'listening' });
|
|
430
|
+
this._lastMood = 'listening';
|
|
431
|
+
this._bridge.emit(AgentEvents.MOOD, { mood: 'listening' });
|
|
432
|
+
},
|
|
433
|
+
|
|
434
|
+
_handleToolCall(msg) {
|
|
435
|
+
const toolName = msg.tool_call?.name || msg.name || '';
|
|
436
|
+
const params = msg.tool_call?.parameters || msg.parameters || {};
|
|
437
|
+
|
|
438
|
+
console.log(`[HumeEVI] Tool call: ${toolName}`, params);
|
|
439
|
+
|
|
440
|
+
this._bridge.emit(AgentEvents.TOOL_CALLED, {
|
|
441
|
+
name: toolName,
|
|
442
|
+
params,
|
|
443
|
+
result: null // Will be filled when we get tool_response
|
|
444
|
+
});
|
|
445
|
+
|
|
446
|
+
// Route tool calls to bridge events
|
|
447
|
+
switch (toolName) {
|
|
448
|
+
case 'dj_soundboard':
|
|
449
|
+
case 'play_dj_sound':
|
|
450
|
+
if (params.action === 'play' && params.sound) {
|
|
451
|
+
this._bridge.emit(AgentEvents.PLAY_SOUND, {
|
|
452
|
+
sound: params.sound,
|
|
453
|
+
type: 'dj'
|
|
454
|
+
});
|
|
455
|
+
}
|
|
456
|
+
this._sendToolResponse(msg, `*${params.sound || 'sound'} played* 🎵`);
|
|
457
|
+
break;
|
|
458
|
+
|
|
459
|
+
case 'caller_sounds':
|
|
460
|
+
case 'play_caller_sound':
|
|
461
|
+
if (params.action === 'play') {
|
|
462
|
+
this._bridge.emit(AgentEvents.PLAY_SOUND, {
|
|
463
|
+
sound: params.sound || 'dial_tone',
|
|
464
|
+
type: 'caller'
|
|
465
|
+
});
|
|
466
|
+
}
|
|
467
|
+
this._sendToolResponse(msg, `*Phone sound played* 📞`);
|
|
468
|
+
break;
|
|
469
|
+
|
|
470
|
+
case 'play_music':
|
|
471
|
+
this._handleMusicTool(msg, params);
|
|
472
|
+
break;
|
|
473
|
+
|
|
474
|
+
case 'canvas_show':
|
|
475
|
+
case 'show_canvas': {
|
|
476
|
+
const action = params.action || 'present';
|
|
477
|
+
this._bridge.emit(AgentEvents.CANVAS_CMD, {
|
|
478
|
+
action,
|
|
479
|
+
url: params.url || params.path || ''
|
|
480
|
+
});
|
|
481
|
+
this._sendToolResponse(msg, `Canvas updated`);
|
|
482
|
+
break;
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
default:
|
|
486
|
+
// Unknown tool — just acknowledge
|
|
487
|
+
this._sendToolResponse(msg, `Tool ${toolName} executed`);
|
|
488
|
+
break;
|
|
489
|
+
}
|
|
490
|
+
},
|
|
491
|
+
|
|
492
|
+
_handleMusicTool(msg, params) {
|
|
493
|
+
const action = params.action || 'play';
|
|
494
|
+
|
|
495
|
+
if (action === 'stop') {
|
|
496
|
+
this._bridge.emit(AgentEvents.MUSIC_PLAY, { action: 'stop' });
|
|
497
|
+
} else if (action === 'pause') {
|
|
498
|
+
this._bridge.emit(AgentEvents.MUSIC_PLAY, { action: 'pause' });
|
|
499
|
+
} else if (action === 'play') {
|
|
500
|
+
if (params.track) {
|
|
501
|
+
this._bridge.emit(AgentEvents.MUSIC_PLAY, {
|
|
502
|
+
action: 'play',
|
|
503
|
+
track: params.track
|
|
504
|
+
});
|
|
505
|
+
} else {
|
|
506
|
+
this._bridge.emit(AgentEvents.MUSIC_SYNC);
|
|
507
|
+
}
|
|
508
|
+
} else if (action === 'skip') {
|
|
509
|
+
this._bridge.emit(AgentEvents.MUSIC_SYNC);
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
this._sendToolResponse(msg, `Music: ${action}`);
|
|
513
|
+
},
|
|
514
|
+
|
|
515
|
+
// ─────────────────────────────────────────────────────────────
|
|
516
|
+
// PRIVATE — Send messages to Hume
|
|
517
|
+
// ─────────────────────────────────────────────────────────────
|
|
518
|
+
|
|
519
|
+
_sendJSON(payload) {
|
|
520
|
+
if (this._socket && this._socket.readyState === WebSocket.OPEN) {
|
|
521
|
+
this._socket.send(JSON.stringify(payload));
|
|
522
|
+
}
|
|
523
|
+
},
|
|
524
|
+
|
|
525
|
+
_sendToolResponse(msg, content) {
|
|
526
|
+
const toolCallId = msg.tool_call?.tool_call_id || msg.tool_call_id || '';
|
|
527
|
+
if (!toolCallId) return;
|
|
528
|
+
|
|
529
|
+
this._sendJSON({
|
|
530
|
+
type: 'tool_response',
|
|
531
|
+
tool_call_id: toolCallId,
|
|
532
|
+
content: String(content)
|
|
533
|
+
});
|
|
534
|
+
},
|
|
535
|
+
|
|
536
|
+
/**
|
|
537
|
+
* Send a contextual update (injected silently, not spoken).
|
|
538
|
+
*/
|
|
539
|
+
_sendContextUpdate(text) {
|
|
540
|
+
this._sendJSON({
|
|
541
|
+
type: 'session_settings',
|
|
542
|
+
context: {
|
|
543
|
+
text,
|
|
544
|
+
type: 'temporary'
|
|
545
|
+
}
|
|
546
|
+
});
|
|
547
|
+
},
|
|
548
|
+
|
|
549
|
+
/**
|
|
550
|
+
* Send a user-turn message that the agent must respond to.
|
|
551
|
+
*/
|
|
552
|
+
_sendAssistantInput(text) {
|
|
553
|
+
this._sendJSON({
|
|
554
|
+
type: 'user_input',
|
|
555
|
+
text
|
|
556
|
+
});
|
|
557
|
+
},
|
|
558
|
+
|
|
559
|
+
// ─────────────────────────────────────────────────────────────
|
|
560
|
+
// PRIVATE — Microphone capture
|
|
561
|
+
// ─────────────────────────────────────────────────────────────
|
|
562
|
+
|
|
563
|
+
async _startMicrophone() {
|
|
564
|
+
try {
|
|
565
|
+
this._mediaStream = await navigator.mediaDevices.getUserMedia({
|
|
566
|
+
audio: {
|
|
567
|
+
channelCount: 1,
|
|
568
|
+
sampleRate: 16000,
|
|
569
|
+
echoCancellation: true,
|
|
570
|
+
noiseSuppression: true,
|
|
571
|
+
}
|
|
572
|
+
});
|
|
573
|
+
|
|
574
|
+
this._mediaRecorder = new MediaRecorder(this._mediaStream, {
|
|
575
|
+
mimeType: this._getSupportedMimeType(),
|
|
576
|
+
});
|
|
577
|
+
|
|
578
|
+
this._mediaRecorder.ondataavailable = (evt) => {
|
|
579
|
+
if (evt.data.size > 0 && this._socket?.readyState === WebSocket.OPEN) {
|
|
580
|
+
// Send raw audio binary to Hume
|
|
581
|
+
this._socket.send(evt.data);
|
|
582
|
+
}
|
|
583
|
+
};
|
|
584
|
+
|
|
585
|
+
// Collect audio in small chunks (100ms)
|
|
586
|
+
this._mediaRecorder.start(100);
|
|
587
|
+
console.log('[HumeEVI] Microphone started');
|
|
588
|
+
|
|
589
|
+
} catch (err) {
|
|
590
|
+
console.error('[HumeEVI] Microphone access denied:', err);
|
|
591
|
+
this._bridge.emit(AgentEvents.ERROR, {
|
|
592
|
+
message: 'Microphone access denied. Please allow microphone access.'
|
|
593
|
+
});
|
|
594
|
+
}
|
|
595
|
+
},
|
|
596
|
+
|
|
597
|
+
_stopMicrophone() {
|
|
598
|
+
if (this._mediaRecorder && this._mediaRecorder.state !== 'inactive') {
|
|
599
|
+
try { this._mediaRecorder.stop(); } catch (_) {}
|
|
600
|
+
}
|
|
601
|
+
if (this._mediaStream) {
|
|
602
|
+
this._mediaStream.getTracks().forEach(t => t.stop());
|
|
603
|
+
}
|
|
604
|
+
this._mediaRecorder = null;
|
|
605
|
+
this._mediaStream = null;
|
|
606
|
+
},
|
|
607
|
+
|
|
608
|
+
_getSupportedMimeType() {
|
|
609
|
+
const types = [
|
|
610
|
+
'audio/webm;codecs=opus',
|
|
611
|
+
'audio/webm',
|
|
612
|
+
'audio/ogg;codecs=opus',
|
|
613
|
+
];
|
|
614
|
+
return types.find(t => MediaRecorder.isTypeSupported(t)) || '';
|
|
615
|
+
},
|
|
616
|
+
|
|
617
|
+
// ─────────────────────────────────────────────────────────────
|
|
618
|
+
// PRIVATE — Audio playback (TTS from Hume)
|
|
619
|
+
// ─────────────────────────────────────────────────────────────
|
|
620
|
+
|
|
621
|
+
_queueAudio(arrayBuffer) {
|
|
622
|
+
this._audioQueue.push(arrayBuffer);
|
|
623
|
+
if (!this._isPlaying) {
|
|
624
|
+
this._playNextChunk();
|
|
625
|
+
}
|
|
626
|
+
},
|
|
627
|
+
|
|
628
|
+
async _playNextChunk() {
|
|
629
|
+
if (this._audioQueue.length === 0) {
|
|
630
|
+
this._isPlaying = false;
|
|
631
|
+
this._bridge.emit(AgentEvents.TTS_STOPPED);
|
|
632
|
+
return;
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
this._isPlaying = true;
|
|
636
|
+
const chunk = this._audioQueue.shift();
|
|
637
|
+
|
|
638
|
+
if (!this._audioContext) {
|
|
639
|
+
this._playNextChunk();
|
|
640
|
+
return;
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
// Signal TTS start on first chunk
|
|
644
|
+
if (!this._isPlaying) {
|
|
645
|
+
this._bridge.emit(AgentEvents.TTS_PLAYING);
|
|
646
|
+
}
|
|
647
|
+
this._bridge.emit(AgentEvents.TTS_PLAYING);
|
|
648
|
+
|
|
649
|
+
try {
|
|
650
|
+
const audioBuffer = await this._audioContext.decodeAudioData(chunk.slice(0));
|
|
651
|
+
const source = this._audioContext.createBufferSource();
|
|
652
|
+
this._currentSourceNode = source;
|
|
653
|
+
source.buffer = audioBuffer;
|
|
654
|
+
source.connect(this._audioContext.destination);
|
|
655
|
+
source.onended = () => {
|
|
656
|
+
this._currentSourceNode = null;
|
|
657
|
+
this._playNextChunk();
|
|
658
|
+
};
|
|
659
|
+
source.start();
|
|
660
|
+
} catch (err) {
|
|
661
|
+
console.warn('[HumeEVI] Audio decode error, skipping chunk:', err);
|
|
662
|
+
this._playNextChunk();
|
|
663
|
+
}
|
|
664
|
+
},
|
|
665
|
+
|
|
666
|
+
_stopAudioPlayback() {
|
|
667
|
+
this._audioQueue = [];
|
|
668
|
+
if (this._currentSourceNode) {
|
|
669
|
+
try { this._currentSourceNode.stop(); } catch (_) {}
|
|
670
|
+
this._currentSourceNode = null;
|
|
671
|
+
}
|
|
672
|
+
this._isPlaying = false;
|
|
673
|
+
},
|
|
674
|
+
};
|
|
675
|
+
|
|
676
|
+
export default HumeEVIAdapter;
|