openvoiceui 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/.env.example +104 -0
  2. package/Dockerfile +30 -0
  3. package/LICENSE +21 -0
  4. package/README.md +638 -0
  5. package/SETUP.md +360 -0
  6. package/app.py +232 -0
  7. package/auto-approve-devices.js +111 -0
  8. package/cli/index.js +372 -0
  9. package/config/__init__.py +4 -0
  10. package/config/default.yaml +43 -0
  11. package/config/flags.yaml +67 -0
  12. package/config/loader.py +203 -0
  13. package/config/providers.yaml +71 -0
  14. package/config/speech_normalization.yaml +182 -0
  15. package/config/theme.json +4 -0
  16. package/data/greetings.json +25 -0
  17. package/default-pages/ai-image-creator.html +915 -0
  18. package/default-pages/bulk-image-uploader.html +492 -0
  19. package/default-pages/desktop.html +2865 -0
  20. package/default-pages/file-explorer.html +854 -0
  21. package/default-pages/interactive-map.html +655 -0
  22. package/default-pages/style-guide.html +1005 -0
  23. package/default-pages/website-setup.html +1623 -0
  24. package/deploy/openclaw/Dockerfile +46 -0
  25. package/deploy/openvoiceui.service +30 -0
  26. package/deploy/setup-nginx.sh +50 -0
  27. package/deploy/setup-sudo.sh +306 -0
  28. package/deploy/skill-runner/Dockerfile +19 -0
  29. package/deploy/skill-runner/requirements.txt +14 -0
  30. package/deploy/skill-runner/server.py +269 -0
  31. package/deploy/supertonic/Dockerfile +22 -0
  32. package/deploy/supertonic/server.py +79 -0
  33. package/docker-compose.pinokio.yml +11 -0
  34. package/docker-compose.yml +59 -0
  35. package/greetings.json +25 -0
  36. package/index.html +65 -0
  37. package/inject-device-identity.js +142 -0
  38. package/package.json +82 -0
  39. package/profiles/default.json +114 -0
  40. package/profiles/manager.py +354 -0
  41. package/profiles/schema.json +337 -0
  42. package/prompts/voice-system-prompt.md +149 -0
  43. package/providers/__init__.py +39 -0
  44. package/providers/base.py +63 -0
  45. package/providers/llm/__init__.py +12 -0
  46. package/providers/llm/base.py +71 -0
  47. package/providers/llm/clawdbot_provider.py +112 -0
  48. package/providers/llm/zai_provider.py +115 -0
  49. package/providers/registry.py +320 -0
  50. package/providers/stt/__init__.py +12 -0
  51. package/providers/stt/base.py +58 -0
  52. package/providers/stt/webspeech_provider.py +49 -0
  53. package/providers/stt/whisper_provider.py +100 -0
  54. package/providers/tts/__init__.py +20 -0
  55. package/providers/tts/base.py +91 -0
  56. package/providers/tts/groq_provider.py +74 -0
  57. package/providers/tts/supertonic_provider.py +72 -0
  58. package/requirements.txt +38 -0
  59. package/routes/__init__.py +10 -0
  60. package/routes/admin.py +515 -0
  61. package/routes/canvas.py +1315 -0
  62. package/routes/chat.py +51 -0
  63. package/routes/conversation.py +2158 -0
  64. package/routes/elevenlabs_hybrid.py +306 -0
  65. package/routes/greetings.py +98 -0
  66. package/routes/icons.py +279 -0
  67. package/routes/image_gen.py +364 -0
  68. package/routes/instructions.py +190 -0
  69. package/routes/music.py +838 -0
  70. package/routes/onboarding.py +43 -0
  71. package/routes/pi.py +62 -0
  72. package/routes/profiles.py +215 -0
  73. package/routes/report_issue.py +68 -0
  74. package/routes/static_files.py +533 -0
  75. package/routes/suno.py +664 -0
  76. package/routes/theme.py +81 -0
  77. package/routes/transcripts.py +199 -0
  78. package/routes/vision.py +348 -0
  79. package/routes/workspace.py +288 -0
  80. package/server.py +1510 -0
  81. package/services/__init__.py +1 -0
  82. package/services/auth.py +143 -0
  83. package/services/canvas_versioning.py +239 -0
  84. package/services/db_pool.py +107 -0
  85. package/services/gateway.py +16 -0
  86. package/services/gateway_manager.py +333 -0
  87. package/services/gateways/__init__.py +12 -0
  88. package/services/gateways/base.py +110 -0
  89. package/services/gateways/compat.py +264 -0
  90. package/services/gateways/openclaw.py +1134 -0
  91. package/services/health.py +100 -0
  92. package/services/memory_client.py +455 -0
  93. package/services/paths.py +26 -0
  94. package/services/speech_normalizer.py +285 -0
  95. package/services/tts.py +270 -0
  96. package/setup-config.js +262 -0
  97. package/sounds/air_horn.mp3 +0 -0
  98. package/sounds/bruh.mp3 +0 -0
  99. package/sounds/crowd_cheer.mp3 +0 -0
  100. package/sounds/gunshot.mp3 +0 -0
  101. package/sounds/impact.mp3 +0 -0
  102. package/sounds/lets_go.mp3 +0 -0
  103. package/sounds/record_stop.mp3 +0 -0
  104. package/sounds/rewind.mp3 +0 -0
  105. package/sounds/sad_trombone.mp3 +0 -0
  106. package/sounds/scratch_long.mp3 +0 -0
  107. package/sounds/yeah.mp3 +0 -0
  108. package/src/adapters/ClawdBotAdapter.js +264 -0
  109. package/src/adapters/_template.js +133 -0
  110. package/src/adapters/elevenlabs-classic.js +841 -0
  111. package/src/adapters/elevenlabs-hybrid.js +812 -0
  112. package/src/adapters/hume-evi.js +676 -0
  113. package/src/admin.html +1339 -0
  114. package/src/app.js +8802 -0
  115. package/src/core/Config.js +173 -0
  116. package/src/core/EmotionEngine.js +307 -0
  117. package/src/core/EventBridge.js +180 -0
  118. package/src/core/EventBus.js +117 -0
  119. package/src/core/VoiceSession.js +607 -0
  120. package/src/face/BaseFace.js +259 -0
  121. package/src/face/EyeFace.js +208 -0
  122. package/src/face/HaloSmokeFace.js +509 -0
  123. package/src/face/manifest.json +27 -0
  124. package/src/face/previews/eyes.svg +16 -0
  125. package/src/face/previews/orb.svg +29 -0
  126. package/src/features/MusicPlayer.js +620 -0
  127. package/src/features/Soundboard.js +128 -0
  128. package/src/providers/DeepgramSTT.js +472 -0
  129. package/src/providers/DeepgramStreamingSTT.js +766 -0
  130. package/src/providers/GroqSTT.js +559 -0
  131. package/src/providers/TTSPlayer.js +323 -0
  132. package/src/providers/WebSpeechSTT.js +479 -0
  133. package/src/providers/tts/BaseTTSProvider.js +81 -0
  134. package/src/providers/tts/HumeProvider.js +77 -0
  135. package/src/providers/tts/SupertonicProvider.js +174 -0
  136. package/src/providers/tts/index.js +140 -0
  137. package/src/shell/adapter-registry.js +154 -0
  138. package/src/shell/caller-bridge.js +35 -0
  139. package/src/shell/camera-bridge.js +28 -0
  140. package/src/shell/canvas-bridge.js +32 -0
  141. package/src/shell/commercial-bridge.js +44 -0
  142. package/src/shell/face-bridge.js +44 -0
  143. package/src/shell/music-bridge.js +60 -0
  144. package/src/shell/orchestrator.js +233 -0
  145. package/src/shell/profile-discovery.js +303 -0
  146. package/src/shell/sounds-bridge.js +28 -0
  147. package/src/shell/transcript-bridge.js +61 -0
  148. package/src/shell/waveform-bridge.js +33 -0
  149. package/src/styles/base.css +2862 -0
  150. package/src/styles/face.css +417 -0
  151. package/src/styles/pi-overrides.css +89 -0
  152. package/src/styles/theme-dark.css +67 -0
  153. package/src/test-tts.html +175 -0
  154. package/src/ui/AppShell.js +544 -0
  155. package/src/ui/ProfileSwitcher.js +228 -0
  156. package/src/ui/SessionControl.js +240 -0
  157. package/src/ui/face/FacePicker.js +195 -0
  158. package/src/ui/face/FaceRenderer.js +309 -0
  159. package/src/ui/settings/PlaylistEditor.js +366 -0
  160. package/src/ui/settings/SettingsPanel.css +684 -0
  161. package/src/ui/settings/SettingsPanel.js +419 -0
  162. package/src/ui/settings/TTSVoicePreview.js +210 -0
  163. package/src/ui/themes/ThemeManager.js +213 -0
  164. package/src/ui/visualizers/BaseVisualizer.js +29 -0
  165. package/src/ui/visualizers/PartyFXVisualizer.css +291 -0
  166. package/src/ui/visualizers/PartyFXVisualizer.js +637 -0
  167. package/static/emulators/jsdos/js-dos.css +1 -0
  168. package/static/emulators/jsdos/js-dos.js +22 -0
  169. package/static/favicon.svg +55 -0
  170. package/static/icons/apple-touch-icon.png +0 -0
  171. package/static/icons/favicon-32.png +0 -0
  172. package/static/icons/icon-192.png +0 -0
  173. package/static/icons/icon-512.png +0 -0
  174. package/static/install.html +449 -0
  175. package/static/manifest.json +26 -0
  176. package/static/sw.js +21 -0
  177. package/tts_providers/__init__.py +136 -0
  178. package/tts_providers/base_provider.py +319 -0
  179. package/tts_providers/groq_provider.py +155 -0
  180. package/tts_providers/hume_provider.py +226 -0
  181. package/tts_providers/providers_config.json +119 -0
  182. package/tts_providers/qwen3_provider.py +371 -0
  183. package/tts_providers/resemble_provider.py +315 -0
  184. package/tts_providers/supertonic_provider.py +557 -0
  185. package/tts_providers/supertonic_tts.py +399 -0
@@ -0,0 +1,812 @@
1
+ /**
2
+ * ElevenLabsHybridAdapter — Multi-Agent Framework adapter (P7-T5)
3
+ *
4
+ * Combines ElevenLabs voice quality with OpenClaw's full VPS control:
5
+ *
6
+ * ElevenLabs handles: STT (cloud), TTS (multi-voice), dj_soundboard,
7
+ * caller_sounds, caller phone effects, music sync,
8
+ * turn management.
9
+ *
10
+ * OpenClaw handles: LLM reasoning (GLM-4.7 via Clawdbot Gateway),
11
+ * canvas creation, file ops, code execution,
12
+ * memory, web search, VPS control — everything.
13
+ *
14
+ * The bridge: ElevenLabs is configured with a custom LLM endpoint
15
+ * (POST /api/elevenlabs-llm on our Flask server) instead of its built-in
16
+ * model. Our server receives the conversation context, forwards the last
17
+ * user message to OpenClaw via the persistent Gateway WebSocket, streams
18
+ * the response back to ElevenLabs in OpenAI-compatible SSE format.
19
+ *
20
+ * Canvas side-channel: OpenClaw embeds {canvas:present,url:...} markers in
21
+ * its responses. The server strips them from the spoken text before
22
+ * returning to ElevenLabs (so the agent doesn't read HTML aloud) and stores
23
+ * them in a queue. This adapter polls /api/canvas-pending every second and
24
+ * emits CANVAS_CMD events so the shell loads the iframe.
25
+ *
26
+ * Ref: future-dev-plans/16-ELEVENLABS-OPENCLAW-HYBRID.md
27
+ * Ref: future-dev-plans/15-ELEVENLABS-CLASSIC-AGENT.md (classic base)
28
+ * Ref: future-dev-plans/17-MULTI-AGENT-FRAMEWORK.md
29
+ *
30
+ * Adapter contract (same as all adapters):
31
+ * init(bridge, config) — called when mode is selected
32
+ * start() — called when user clicks call button
33
+ * stop() — graceful disconnect
34
+ * destroy() — full teardown on adapter switch
35
+ *
36
+ * Config shape:
37
+ * {
38
+ * agentId: string, // Hybrid ElevenLabs agent ID (custom LLM configured)
39
+ * serverUrl: string, // Flask server base URL
40
+ * musicPlayer: object, // optional — shared MusicPlayer instance
41
+ * pollInterval: number, // ms between canvas polls (default 1000)
42
+ * }
43
+ */
44
+
45
+ import { AgentEvents, AgentActions } from '../core/EventBridge.js';
46
+
47
+ // ─────────────────────────────────────────────────────────────────────────────
48
+ // Constants
49
+ // ─────────────────────────────────────────────────────────────────────────────
50
+
51
+ const SDK_URL = 'https://cdn.jsdelivr.net/npm/@elevenlabs/client@latest/+esm';
52
+
53
+ /** DJ sound names the agent can play silently */
54
+ const DJ_SOUND_LIST = [
55
+ 'air_horn', 'air_horn_long', 'siren', 'siren_woop', 'scratch', 'scratch_long',
56
+ 'rewind', 'record_stop', 'whoosh', 'riser', 'bass_drop', 'impact',
57
+ 'crowd_cheer', 'crowd_hype', 'applause', 'yeah', 'lets_go', 'laser',
58
+ 'gunshot', 'explosion', 'vinyl_crackle',
59
+ ];
60
+
61
+ /** Music keywords that trigger server-side music sync */
62
+ const MUSIC_KEYWORDS_RE = /\b(spinning|playing|next up|coming up|dropping|fire up|switching|change it up)\b/i;
63
+
64
+ /** Commercial keywords that trigger the ad break system */
65
+ const COMMERCIAL_KEYWORDS_RE = /\b(commercial|sponsor|ad break|word from our|brought to you)\b/i;
66
+
67
+ /** XML tags that indicate a caller persona (enable phone filter) */
68
+ const CALLER_PATTERNS = [
69
+ /<caller\s*1>/i,
70
+ /<caller\s*2>/i,
71
+ /<mike[\-\s]*voice>/i,
72
+ /<caller\s*voice>/i,
73
+ /<phone\s*voice>/i,
74
+ ];
75
+
76
+ /** XML tags that indicate a non-caller persona (disable phone filter) */
77
+ const NON_CALLER_PATTERNS = [
78
+ /<radio\s*voice>/i,
79
+ /<kitt[\-\s]*voice>/i,
80
+ /<dj[\-\s]*soul>/i,
81
+ ];
82
+
83
+ const DEFAULT_POLL_INTERVAL_MS = 1000;
84
+
85
+ // ─────────────────────────────────────────────────────────────────────────────
86
+ // Adapter
87
+ // ─────────────────────────────────────────────────────────────────────────────
88
+
89
+ const ElevenLabsHybridAdapter = {
90
+
91
+ // ── Identity & capabilities ───────────────────────────────────────────────
92
+
93
+ name: 'ElevenLabs + OpenClaw Hybrid',
94
+
95
+ /**
96
+ * Feature flags: the shell shows/hides UI elements based on this array.
97
+ *
98
+ * Union of ElevenLabs Classic capabilities (voice/audio) + ClawdBot
99
+ * capabilities (VPS/canvas/files) — the best of both worlds.
100
+ */
101
+ capabilities: [
102
+ // Voice (from ElevenLabs)
103
+ 'multi_voice',
104
+ 'dj_soundboard',
105
+ 'caller_effects',
106
+ 'caller_sounds',
107
+ 'music_sync',
108
+ 'commercials',
109
+ // Brain (from OpenClaw via custom LLM)
110
+ 'canvas',
111
+ 'vps_control',
112
+ 'file_ops',
113
+ 'code_execution',
114
+ ],
115
+
116
+ // ── Private state ─────────────────────────────────────────────────────────
117
+
118
+ _bridge: null,
119
+ _config: null,
120
+ _conversation: null, // ElevenLabs Conversation session
121
+ _sdk: null, // { Conversation } from @elevenlabs/client
122
+
123
+ // Web Audio API caller phone effect chain
124
+ _audioContext: null,
125
+ _elevenLabsSource: null,
126
+ _callerNodes: null,
127
+ _callerEffectActive: false,
128
+ _audioObserver: null,
129
+
130
+ // Music sync debounce
131
+ _lastSyncTime: 0,
132
+ _syncClearTimer: null,
133
+
134
+ // DJ transition tracking
135
+ _djTransitionTriggered: false,
136
+
137
+ // Caller sounds cooldown
138
+ _callerSoundCooldown: false,
139
+
140
+ // Preloaded DJ sound blob URLs
141
+ _djSoundCache: {},
142
+
143
+ // Commercial state
144
+ _commercialPlaying: false,
145
+ _commercialPlayer: null,
146
+
147
+ // Canvas command polling (the hybrid side-channel)
148
+ _canvasPoller: null,
149
+
150
+ // Bridge / bus unsubscribe cleanup
151
+ _unsubscribers: [],
152
+
153
+ // ── Lifecycle ─────────────────────────────────────────────────────────────
154
+
155
+ /**
156
+ * Initialize the adapter.
157
+ *
158
+ * Loads the ElevenLabs SDK, sets up the Web Audio API caller effect
159
+ * chain, starts canvas command polling, and subscribes to bridge actions.
160
+ *
161
+ * @param {import('../core/EventBridge.js').EventBridge} bridge
162
+ * @param {object} config
163
+ */
164
+ async init(bridge, config) {
165
+ this._bridge = bridge;
166
+ this._config = config || {};
167
+
168
+ // Load ElevenLabs SDK from CDN
169
+ if (!this._sdk) {
170
+ try {
171
+ this._sdk = await import(SDK_URL);
172
+ } catch (err) {
173
+ console.error('[ElevenLabsHybrid] Failed to load SDK:', err);
174
+ bridge.emit(AgentEvents.ERROR, { message: 'Failed to load ElevenLabs SDK' });
175
+ return;
176
+ }
177
+ }
178
+
179
+ // Build caller phone effect audio chain (identical to Classic adapter)
180
+ this._initAudioPipeline();
181
+
182
+ // Watch for unnamed <audio> elements that ElevenLabs creates for TTS
183
+ this._initAudioObserver();
184
+
185
+ // Preload frequently used DJ sounds as blob URLs (non-fatal if it fails)
186
+ this._preloadDJSounds();
187
+
188
+ // Start polling /api/canvas-pending for OpenClaw canvas side-channel
189
+ this._startCanvasPolling();
190
+
191
+ // Subscribe to UI→Agent bridge actions
192
+ this._unsubscribers.push(
193
+ bridge.on(AgentActions.END_SESSION, () => this.stop()),
194
+ bridge.on(AgentActions.CONTEXT_UPDATE, (d) => this._sendContextUpdate(d.text)),
195
+ bridge.on(AgentActions.FORCE_MESSAGE, (d) => this._sendForceMessage(d.text)),
196
+ );
197
+ },
198
+
199
+ /**
200
+ * Start the conversation.
201
+ * Unlocks iOS AudioContext → requests+releases mic → starts ElevenLabs session.
202
+ */
203
+ async start() {
204
+ if (!this._sdk) {
205
+ console.error('[ElevenLabsHybrid] SDK not loaded — cannot start');
206
+ return;
207
+ }
208
+ if (this._conversation) {
209
+ console.warn('[ElevenLabsHybrid] Already connected');
210
+ return;
211
+ }
212
+
213
+ // iOS: request mic then immediately release before startSession()
214
+ try {
215
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
216
+ stream.getTracks().forEach(t => t.stop());
217
+ } catch (err) {
218
+ console.warn('[ElevenLabsHybrid] Mic pre-unlock failed:', err);
219
+ }
220
+
221
+ // Resume AudioContext (must be triggered by a user gesture)
222
+ if (this._audioContext && this._audioContext.state === 'suspended') {
223
+ await this._audioContext.resume();
224
+ }
225
+
226
+ const agentId = this._config.agentId;
227
+ if (!agentId) {
228
+ this._bridge.emit(AgentEvents.ERROR, {
229
+ message: 'ElevenLabsHybrid: agentId not configured — set ELEVENLABS_HYBRID_AGENT_ID in .env',
230
+ });
231
+ return;
232
+ }
233
+
234
+ try {
235
+ this._conversation = await this._sdk.Conversation.startSession({
236
+ agentId,
237
+ overrides: {},
238
+ // Only two client tools — OpenClaw handles everything else via custom LLM
239
+ clientTools: {
240
+ dj_soundboard: (params) => this._clientToolDJSoundboard(params),
241
+ caller_sounds: (params) => this._clientToolCallerSounds(params),
242
+ },
243
+ onConnect: () => this._onConnect(),
244
+ onDisconnect: () => this._onDisconnect(),
245
+ onError: (err) => this._onError(err),
246
+ onModeChange: (mode) => this._onModeChange(mode),
247
+ onMessage: (msg) => this._onMessage(msg),
248
+ });
249
+ } catch (err) {
250
+ console.error('[ElevenLabsHybrid] startSession failed:', err);
251
+ this._bridge.emit(AgentEvents.ERROR, { message: String(err) });
252
+ this._bridge.emit(AgentEvents.MOOD, { mood: 'sad' });
253
+ }
254
+ },
255
+
256
+ /** Stop conversation gracefully. */
257
+ async stop() {
258
+ if (this._conversation) {
259
+ try { await this._conversation.endSession(); } catch (_) { /* ignore */ }
260
+ this._conversation = null;
261
+ }
262
+ this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'idle' });
263
+ this._bridge.emit(AgentEvents.DISCONNECTED);
264
+ this._bridge.emit(AgentEvents.MOOD, { mood: 'neutral' });
265
+ },
266
+
267
+ /** Full teardown on adapter switch — MUST release all resources. */
268
+ async destroy() {
269
+ // Stop ElevenLabs session
270
+ if (this._conversation) {
271
+ try { await this._conversation.endSession(); } catch (_) { /* ignore */ }
272
+ this._conversation = null;
273
+ }
274
+
275
+ // Stop canvas polling
276
+ this._stopCanvasPolling();
277
+
278
+ // Stop MutationObserver
279
+ if (this._audioObserver) {
280
+ this._audioObserver.disconnect();
281
+ this._audioObserver = null;
282
+ }
283
+
284
+ // Close AudioContext
285
+ if (this._audioContext && this._audioContext.state !== 'closed') {
286
+ try { await this._audioContext.close(); } catch (_) { /* ignore */ }
287
+ }
288
+ this._audioContext = null;
289
+ this._elevenLabsSource = null;
290
+ this._callerNodes = null;
291
+ this._callerEffectActive = false;
292
+
293
+ // Revoke preloaded blob URLs
294
+ Object.values(this._djSoundCache).forEach(url => {
295
+ try { URL.revokeObjectURL(url); } catch (_) { /* ignore */ }
296
+ });
297
+ this._djSoundCache = {};
298
+
299
+ // Stop commercial player
300
+ if (this._commercialPlayer) {
301
+ this._commercialPlayer.pause();
302
+ this._commercialPlayer.src = '';
303
+ this._commercialPlayer = null;
304
+ }
305
+ this._commercialPlaying = false;
306
+
307
+ // Clear timers
308
+ clearTimeout(this._syncClearTimer);
309
+
310
+ // Unsubscribe bridge listeners
311
+ this._unsubscribers.forEach(fn => fn());
312
+ this._unsubscribers = [];
313
+
314
+ this._bridge = null;
315
+ this._config = null;
316
+ },
317
+
318
+ // ── ElevenLabs SDK callbacks ──────────────────────────────────────────────
319
+
320
+ _onConnect() {
321
+ console.log('[ElevenLabsHybrid] Connected (ElevenLabs SDK → custom LLM → OpenClaw)');
322
+ this._bridge.emit(AgentEvents.CONNECTED);
323
+ this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'listening' });
324
+ this._bridge.emit(AgentEvents.MOOD, { mood: 'happy' });
325
+ },
326
+
327
+ _onDisconnect() {
328
+ console.log('[ElevenLabsHybrid] Disconnected');
329
+ this._conversation = null;
330
+ this._bridge.emit(AgentEvents.DISCONNECTED);
331
+ this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'idle' });
332
+ this._bridge.emit(AgentEvents.MOOD, { mood: 'neutral' });
333
+ if (this._config.musicPlayer) {
334
+ this._config.musicPlayer.duck(false);
335
+ }
336
+ },
337
+
338
+ _onError(error) {
339
+ console.error('[ElevenLabsHybrid] Error:', error);
340
+ this._bridge.emit(AgentEvents.ERROR, { message: String(error) });
341
+ this._bridge.emit(AgentEvents.MOOD, { mood: 'sad' });
342
+ },
343
+
344
+ /** onModeChange fires when ElevenLabs switches speaking ↔ listening. */
345
+ _onModeChange({ mode }) {
346
+ if (mode === 'speaking') {
347
+ this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'speaking' });
348
+ this._bridge.emit(AgentEvents.TTS_PLAYING);
349
+ if (this._config.musicPlayer) {
350
+ this._config.musicPlayer.duck(true);
351
+ }
352
+ } else {
353
+ // listening
354
+ this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'listening' });
355
+ this._bridge.emit(AgentEvents.TTS_STOPPED);
356
+ this._bridge.emit(AgentEvents.MOOD, { mood: 'listening' });
357
+ if (this._config.musicPlayer) {
358
+ this._config.musicPlayer.duck(false);
359
+ }
360
+ }
361
+ },
362
+
363
+ /**
364
+ * onMessage handles ALL ElevenLabs agent messages and tool responses.
365
+ * Routing order mirrors ElevenLabsClassicAdapter._onMessage (APPENDIX A.2).
366
+ *
367
+ * Note: canvas commands are handled by the polling side-channel
368
+ * (_startCanvasPolling), NOT by text detection here — the server strips
369
+ * {canvas:...} markers from the spoken text before ElevenLabs sees it.
370
+ */
371
+ _onMessage(message) {
372
+ // ── 1. Tool response detection ────────────────────────────────────────
373
+ let toolName = null;
374
+ let toolResult = null;
375
+
376
+ if (message.source === 'ai' && message.message?.toolResult) {
377
+ toolName = message.message.toolResult.toolName;
378
+ toolResult = message.message.toolResult.result;
379
+ }
380
+
381
+ if (toolName) {
382
+ // dj_soundboard
383
+ if (toolName === 'dj_soundboard') {
384
+ try {
385
+ const parsed = JSON.parse(toolResult);
386
+ if (parsed.sound) this._playDJSound(parsed.sound);
387
+ } catch (_) { /* not JSON */ }
388
+ this._bridge.emit(AgentEvents.TOOL_CALLED, { name: toolName, params: {}, result: toolResult });
389
+ return;
390
+ }
391
+
392
+ // play_music
393
+ if (toolName === 'play_music') {
394
+ try {
395
+ const parsed = JSON.parse(toolResult);
396
+ const action = parsed.action || 'play';
397
+ if (action === 'stop') {
398
+ this._bridge.emit(AgentEvents.MUSIC_PLAY, { action: 'stop' });
399
+ } else if (action === 'pause') {
400
+ this._bridge.emit(AgentEvents.MUSIC_PLAY, { action: 'pause' });
401
+ } else {
402
+ this._syncMusicWithServer();
403
+ }
404
+ } catch (_) {
405
+ this._syncMusicWithServer();
406
+ }
407
+ this._bridge.emit(AgentEvents.TOOL_CALLED, { name: toolName, params: {}, result: toolResult });
408
+ return;
409
+ }
410
+
411
+ // play_commercial
412
+ if (toolName === 'play_commercial') {
413
+ this._playCommercial();
414
+ this._bridge.emit(AgentEvents.TOOL_CALLED, { name: toolName, params: {}, result: toolResult });
415
+ return;
416
+ }
417
+
418
+ // generate_song
419
+ if (toolName === 'generate_song') {
420
+ try {
421
+ const parsed = JSON.parse(toolResult);
422
+ if (parsed.song_id) {
423
+ this._bridge.emit(AgentEvents.MUSIC_PLAY, { action: 'generated', songId: parsed.song_id });
424
+ }
425
+ } catch (_) { /* ignore */ }
426
+ this._bridge.emit(AgentEvents.TOOL_CALLED, { name: toolName, params: {}, result: toolResult });
427
+ return;
428
+ }
429
+
430
+ // Generic tool — show in ActionConsole
431
+ this._bridge.emit(AgentEvents.TOOL_CALLED, { name: toolName, params: {}, result: toolResult });
432
+ return;
433
+ }
434
+
435
+ // ── 2. Display in transcript ──────────────────────────────────────────
436
+ const text = message.message?.text || '';
437
+ if (text) {
438
+ const role = message.source === 'user' ? 'user' : 'assistant';
439
+ this._bridge.emit(AgentEvents.MESSAGE, { role, text, final: true });
440
+ }
441
+
442
+ if (message.source !== 'ai' || !text) return;
443
+
444
+ // ── 3. Caller voice detection ─────────────────────────────────────────
445
+ const isCallerVoice = CALLER_PATTERNS.some(re => re.test(text));
446
+ const isNormalVoice = NON_CALLER_PATTERNS.some(re => re.test(text));
447
+
448
+ if (isCallerVoice) {
449
+ this._setCallerEffect(true);
450
+ } else if (isNormalVoice) {
451
+ this._setCallerEffect(false);
452
+ }
453
+
454
+ // ── 4. Music keyword detection ────────────────────────────────────────
455
+ if (MUSIC_KEYWORDS_RE.test(text) && !this._callerEffectActive) {
456
+ this._syncMusicWithServer();
457
+ }
458
+
459
+ // ── 5. Commercial keyword detection ──────────────────────────────────
460
+ if (COMMERCIAL_KEYWORDS_RE.test(text) && !this._commercialPlaying) {
461
+ this._playCommercial();
462
+ }
463
+ },
464
+
465
+ // ── Canvas Side-Channel Polling ───────────────────────────────────────────
466
+
467
+ /**
468
+ * Poll /api/canvas-pending every second for canvas commands from OpenClaw.
469
+ *
470
+ * OpenClaw embeds {canvas:present,url:...} markers in responses. The
471
+ * custom LLM endpoint strips them from the spoken text and queues them
472
+ * server-side. We fetch and consume the queue here, emitting CANVAS_CMD
473
+ * events for the shell.
474
+ *
475
+ * Ref: doc 16 — "Canvas Integration in Hybrid Mode"
476
+ */
477
+ _startCanvasPolling() {
478
+ if (this._canvasPoller) return;
479
+
480
+ const intervalMs = this._config.pollInterval || DEFAULT_POLL_INTERVAL_MS;
481
+ const serverUrl = this._config.serverUrl || '';
482
+
483
+ this._canvasPoller = setInterval(async () => {
484
+ try {
485
+ const resp = await fetch(`${serverUrl}/api/canvas-pending`);
486
+ if (!resp.ok) return;
487
+ const data = await resp.json();
488
+ const commands = data.commands || [];
489
+ for (const cmd of commands) {
490
+ if (cmd.action === 'present' && cmd.url) {
491
+ this._bridge.emit(AgentEvents.CANVAS_CMD, {
492
+ action: 'present',
493
+ url: cmd.url,
494
+ });
495
+ } else if (cmd.action === 'close') {
496
+ this._bridge.emit(AgentEvents.CANVAS_CMD, { action: 'close' });
497
+ }
498
+ }
499
+ } catch (_) {
500
+ // Non-fatal — server may not be running /api/canvas-pending yet
501
+ }
502
+ }, intervalMs);
503
+ },
504
+
505
+ _stopCanvasPolling() {
506
+ if (this._canvasPoller) {
507
+ clearInterval(this._canvasPoller);
508
+ this._canvasPoller = null;
509
+ }
510
+ },
511
+
512
+ // ── Client Tools ──────────────────────────────────────────────────────────
513
+
514
+ /** Client tool: dj_soundboard — plays sounds silently in browser. */
515
+ async _clientToolDJSoundboard(parameters) {
516
+ const action = parameters.action || 'list';
517
+ const sound = parameters.sound || '';
518
+
519
+ if (action === 'play' && sound) {
520
+ await this._playDJSound(sound);
521
+ return `*${sound.replace(/_/g, ' ')}* 🎵`;
522
+ }
523
+ if (action === 'list') {
524
+ return JSON.stringify(DJ_SOUND_LIST);
525
+ }
526
+ return 'Unknown action';
527
+ },
528
+
529
+ /**
530
+ * Client tool: caller_sounds — plays dial tone before caller voice switch.
531
+ * dial_tone = double beep with 400ms gap. 5s cooldown prevents spam.
532
+ */
533
+ async _clientToolCallerSounds(parameters) {
534
+ const action = parameters.action || 'play';
535
+ const sound = parameters.sound || 'dial_tone';
536
+
537
+ if (action === 'list') {
538
+ return JSON.stringify(['dial_tone', 'ring', 'pickup', 'hangup']);
539
+ }
540
+ if (action === 'play') {
541
+ await this._playCallerSound(sound);
542
+ return `*Phone sound: ${sound}* 📞`;
543
+ }
544
+ return 'Unknown action';
545
+ },
546
+
547
+ // ── Audio Pipeline ────────────────────────────────────────────────────────
548
+
549
+ /**
550
+ * Build the Web Audio API context and caller phone effect filter chain.
551
+ *
552
+ * HighPass(500Hz) → LowPass(2200Hz) → PeakingEQ(1200Hz, +6dB)
553
+ * → Compressor(-30dB, 16:1) → WaveShaper(25) → Gain(0.7) → Destination
554
+ *
555
+ * Identical to ElevenLabsClassicAdapter — shared audio system design.
556
+ */
557
+ _initAudioPipeline() {
558
+ if (this._audioContext) return;
559
+
560
+ try {
561
+ this._audioContext = new (window.AudioContext || window.webkitAudioContext)();
562
+ const ctx = this._audioContext;
563
+
564
+ const highPass = ctx.createBiquadFilter();
565
+ highPass.type = 'highpass';
566
+ highPass.frequency.value = 500;
567
+ highPass.Q.value = 1.5;
568
+
569
+ const lowPass = ctx.createBiquadFilter();
570
+ lowPass.type = 'lowpass';
571
+ lowPass.frequency.value = 2200;
572
+ lowPass.Q.value = 1.5;
573
+
574
+ const midBoost = ctx.createBiquadFilter();
575
+ midBoost.type = 'peaking';
576
+ midBoost.frequency.value = 1200;
577
+ midBoost.gain.value = 6;
578
+
579
+ const compressor = ctx.createDynamicsCompressor();
580
+ compressor.threshold.value = -30;
581
+ compressor.ratio.value = 16;
582
+ compressor.attack.value = 0.002;
583
+ compressor.release.value = 0.2;
584
+
585
+ const distortion = ctx.createWaveShaper();
586
+ distortion.curve = this._makeDistortionCurve(25);
587
+
588
+ const outputGain = ctx.createGain();
589
+ outputGain.gain.value = 0.7;
590
+
591
+ // Chain: highPass → lowPass → midBoost → compressor → distortion → outputGain → dest
592
+ highPass.connect(lowPass);
593
+ lowPass.connect(midBoost);
594
+ midBoost.connect(compressor);
595
+ compressor.connect(distortion);
596
+ distortion.connect(outputGain);
597
+ outputGain.connect(ctx.destination);
598
+
599
+ const bypassGain = ctx.createGain();
600
+ bypassGain.gain.value = 1;
601
+ bypassGain.connect(ctx.destination);
602
+
603
+ this._callerNodes = { input: highPass, output: outputGain, bypassGain, effectOutput: outputGain };
604
+ } catch (err) {
605
+ console.warn('[ElevenLabsHybrid] AudioContext init failed:', err);
606
+ }
607
+ },
608
+
609
+ /** MutationObserver — hooks unnamed <audio> elements ElevenLabs creates for TTS. */
610
+ _initAudioObserver() {
611
+ if (this._audioObserver) return;
612
+
613
+ this._audioObserver = new MutationObserver((mutations) => {
614
+ mutations.forEach((mutation) => {
615
+ mutation.addedNodes.forEach((node) => {
616
+ if (node.tagName === 'AUDIO' && !node.id && !node.dataset.hybridHooked) {
617
+ this._hookElevenLabsAudio(node);
618
+ }
619
+ });
620
+ });
621
+ });
622
+
623
+ this._audioObserver.observe(document.body, { childList: true, subtree: true });
624
+ },
625
+
626
+ /** Hook a single ElevenLabs TTS <audio> element into the Web Audio API chain. */
627
+ _hookElevenLabsAudio(audioElement) {
628
+ if (!this._audioContext || !this._callerNodes) return;
629
+ try {
630
+ audioElement.dataset.hybridHooked = 'true';
631
+ this._elevenLabsSource = this._audioContext.createMediaElementSource(audioElement);
632
+ this._elevenLabsSource.connect(this._callerNodes.bypassGain);
633
+
634
+ if (this._callerEffectActive) {
635
+ this._elevenLabsSource.disconnect();
636
+ this._elevenLabsSource.connect(this._callerNodes.input);
637
+ }
638
+ } catch (err) {
639
+ console.warn('[ElevenLabsHybrid] hookElevenLabsAudio failed:', err);
640
+ }
641
+ },
642
+
643
+ /** Enable or disable the caller phone filter effect. */
644
+ _setCallerEffect(enabled) {
645
+ this._callerEffectActive = enabled;
646
+ this._bridge.emit(AgentEvents.CALLER_EFFECT, { enabled });
647
+
648
+ if (!this._elevenLabsSource || !this._callerNodes) return;
649
+ try {
650
+ this._elevenLabsSource.disconnect();
651
+ if (enabled) {
652
+ this._elevenLabsSource.connect(this._callerNodes.input);
653
+ } else {
654
+ this._elevenLabsSource.connect(this._callerNodes.bypassGain);
655
+ }
656
+ } catch (err) {
657
+ console.warn('[ElevenLabsHybrid] setCallerEffect failed:', err);
658
+ }
659
+ },
660
+
661
+ _makeDistortionCurve(amount) {
662
+ const n_samples = 256;
663
+ const curve = new Float32Array(n_samples);
664
+ const deg = Math.PI / 180;
665
+ for (let i = 0; i < n_samples; ++i) {
666
+ const x = (i * 2) / n_samples - 1;
667
+ curve[i] = ((3 + amount) * x * 20 * deg) / (Math.PI + amount * Math.abs(x));
668
+ }
669
+ return curve;
670
+ },
671
+
672
+ // ── DJ Sounds ─────────────────────────────────────────────────────────────
673
+
674
+ async _preloadDJSounds() {
675
+ const serverUrl = this._config.serverUrl || '';
676
+ const toPreload = [
677
+ 'air_horn', 'scratch_long', 'crowd_cheer', 'crowd_hype',
678
+ 'rewind', 'yeah', 'laser', 'lets_go', 'impact', 'record_stop',
679
+ ];
680
+ await Promise.allSettled(
681
+ toPreload.map(async (name) => {
682
+ try {
683
+ const res = await fetch(`${serverUrl}/sounds/dj/${name}.mp3`);
684
+ if (res.ok) {
685
+ const blob = await res.blob();
686
+ this._djSoundCache[name] = URL.createObjectURL(blob);
687
+ }
688
+ } catch (_) { /* non-fatal */ }
689
+ })
690
+ );
691
+ },
692
+
693
+ async _playDJSound(soundName) {
694
+ const serverUrl = this._config.serverUrl || '';
695
+ const src = this._djSoundCache[soundName] || `${serverUrl}/sounds/dj/${soundName}.mp3`;
696
+ const audio = new Audio(src);
697
+ audio.volume = 1.0;
698
+ try { await audio.play(); } catch (err) {
699
+ console.warn(`[ElevenLabsHybrid] playDJSound(${soundName}) failed:`, err);
700
+ }
701
+ this._bridge.emit(AgentEvents.PLAY_SOUND, { sound: soundName, type: 'dj' });
702
+ },
703
+
704
+ async _playCallerSound(sound) {
705
+ if (this._callerSoundCooldown) return;
706
+ this._callerSoundCooldown = true;
707
+ setTimeout(() => { this._callerSoundCooldown = false; }, 5000);
708
+
709
+ const serverUrl = this._config.serverUrl || '';
710
+ const src = `${serverUrl}/sounds/caller/${sound}.mp3`;
711
+ this._bridge.emit(AgentEvents.PLAY_SOUND, { sound, type: 'caller' });
712
+
713
+ if (sound === 'dial_tone') {
714
+ for (let i = 0; i < 2; i++) {
715
+ if (i > 0) await this._sleep(400);
716
+ const audio = new Audio(src);
717
+ try { await audio.play(); } catch (_) { /* ignore */ }
718
+ await this._sleep(800);
719
+ }
720
+ } else {
721
+ const audio = new Audio(src);
722
+ try { await audio.play(); } catch (_) { /* ignore */ }
723
+ }
724
+ },
725
+
726
+ // ── Music Sync ────────────────────────────────────────────────────────────
727
+
728
+ /** Sync music with server state (2s debounce, blocked during caller/commercial). */
729
+ _syncMusicWithServer() {
730
+ const now = Date.now();
731
+ if (now - this._lastSyncTime < 2000) return;
732
+ if (this._callerEffectActive) return;
733
+ if (this._commercialPlaying) return;
734
+
735
+ this._lastSyncTime = now;
736
+ this._bridge.emit(AgentEvents.MUSIC_SYNC);
737
+
738
+ clearTimeout(this._syncClearTimer);
739
+ this._syncClearTimer = setTimeout(() => {}, 30000);
740
+ },
741
+
742
+ // ── DJ Transition Alert ───────────────────────────────────────────────────
743
+
744
+ /** Called by shell's MusicPlayer when a track has ≤12 seconds remaining. */
745
+ onTrackEndingSoon() {
746
+ if (this._djTransitionTriggered || !this._conversation) return;
747
+ this._djTransitionTriggered = true;
748
+ this._sendContextUpdate('[DJ INFO: track ending in 10s]');
749
+ this._sendForceMessage('[SYSTEM: Song ending! Announce next and call play_music action=skip!]');
750
+ },
751
+
752
+ onTrackEnded() {
753
+ this._djTransitionTriggered = false;
754
+ },
755
+
756
+ // ── Commercial System ─────────────────────────────────────────────────────
757
+
758
+ async _playCommercial() {
759
+ if (this._commercialPlaying) return;
760
+ this._commercialPlaying = true;
761
+
762
+ const serverUrl = this._config.serverUrl || '';
763
+ this._bridge.emit(AgentEvents.MUSIC_PLAY, { action: 'stop' });
764
+
765
+ try {
766
+ const res = await fetch(`${serverUrl}/api/commercials?action=play`);
767
+ const data = await res.json();
768
+
769
+ if (data.url) {
770
+ this._commercialPlayer = new Audio(data.url);
771
+ this._sendContextUpdate('[DJ INFO: Commercial playing, stay quiet]');
772
+ await fetch(`${serverUrl}/api/commercials?action=confirm_started`);
773
+
774
+ this._commercialPlayer.addEventListener('ended', async () => {
775
+ this._commercialPlaying = false;
776
+ await fetch(`${serverUrl}/api/commercials?action=ended`);
777
+ this._sendForceMessage("[SYSTEM: Commercial over! Say we're back and play next!");
778
+ });
779
+
780
+ await this._commercialPlayer.play();
781
+ } else {
782
+ this._commercialPlaying = false;
783
+ }
784
+ } catch (err) {
785
+ console.warn('[ElevenLabsHybrid] playCommercial failed:', err);
786
+ this._commercialPlaying = false;
787
+ }
788
+ },
789
+
790
+ // ── ElevenLabs context injection ──────────────────────────────────────────
791
+
792
+ _sendContextUpdate(text) {
793
+ if (!this._conversation) return;
794
+ try { this._conversation.sendContextualUpdate(text); }
795
+ catch (err) { console.warn('[ElevenLabsHybrid] sendContextualUpdate failed:', err); }
796
+ },
797
+
798
+ _sendForceMessage(text) {
799
+ if (!this._conversation) return;
800
+ try { this._conversation.sendUserMessage(text); }
801
+ catch (err) { console.warn('[ElevenLabsHybrid] sendForceMessage failed:', err); }
802
+ },
803
+
804
+ // ── Utilities ─────────────────────────────────────────────────────────────
805
+
806
+ _sleep(ms) {
807
+ return new Promise(resolve => setTimeout(resolve, ms));
808
+ },
809
+ };
810
+
811
+ export default ElevenLabsHybridAdapter;
812
+ export { ElevenLabsHybridAdapter };