openvoiceui 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/.env.example +104 -0
  2. package/Dockerfile +30 -0
  3. package/LICENSE +21 -0
  4. package/README.md +638 -0
  5. package/SETUP.md +360 -0
  6. package/app.py +232 -0
  7. package/auto-approve-devices.js +111 -0
  8. package/cli/index.js +372 -0
  9. package/config/__init__.py +4 -0
  10. package/config/default.yaml +43 -0
  11. package/config/flags.yaml +67 -0
  12. package/config/loader.py +203 -0
  13. package/config/providers.yaml +71 -0
  14. package/config/speech_normalization.yaml +182 -0
  15. package/config/theme.json +4 -0
  16. package/data/greetings.json +25 -0
  17. package/default-pages/ai-image-creator.html +915 -0
  18. package/default-pages/bulk-image-uploader.html +492 -0
  19. package/default-pages/desktop.html +2865 -0
  20. package/default-pages/file-explorer.html +854 -0
  21. package/default-pages/interactive-map.html +655 -0
  22. package/default-pages/style-guide.html +1005 -0
  23. package/default-pages/website-setup.html +1623 -0
  24. package/deploy/openclaw/Dockerfile +46 -0
  25. package/deploy/openvoiceui.service +30 -0
  26. package/deploy/setup-nginx.sh +50 -0
  27. package/deploy/setup-sudo.sh +306 -0
  28. package/deploy/skill-runner/Dockerfile +19 -0
  29. package/deploy/skill-runner/requirements.txt +14 -0
  30. package/deploy/skill-runner/server.py +269 -0
  31. package/deploy/supertonic/Dockerfile +22 -0
  32. package/deploy/supertonic/server.py +79 -0
  33. package/docker-compose.pinokio.yml +11 -0
  34. package/docker-compose.yml +59 -0
  35. package/greetings.json +25 -0
  36. package/index.html +65 -0
  37. package/inject-device-identity.js +142 -0
  38. package/package.json +82 -0
  39. package/profiles/default.json +114 -0
  40. package/profiles/manager.py +354 -0
  41. package/profiles/schema.json +337 -0
  42. package/prompts/voice-system-prompt.md +149 -0
  43. package/providers/__init__.py +39 -0
  44. package/providers/base.py +63 -0
  45. package/providers/llm/__init__.py +12 -0
  46. package/providers/llm/base.py +71 -0
  47. package/providers/llm/clawdbot_provider.py +112 -0
  48. package/providers/llm/zai_provider.py +115 -0
  49. package/providers/registry.py +320 -0
  50. package/providers/stt/__init__.py +12 -0
  51. package/providers/stt/base.py +58 -0
  52. package/providers/stt/webspeech_provider.py +49 -0
  53. package/providers/stt/whisper_provider.py +100 -0
  54. package/providers/tts/__init__.py +20 -0
  55. package/providers/tts/base.py +91 -0
  56. package/providers/tts/groq_provider.py +74 -0
  57. package/providers/tts/supertonic_provider.py +72 -0
  58. package/requirements.txt +38 -0
  59. package/routes/__init__.py +10 -0
  60. package/routes/admin.py +515 -0
  61. package/routes/canvas.py +1315 -0
  62. package/routes/chat.py +51 -0
  63. package/routes/conversation.py +2158 -0
  64. package/routes/elevenlabs_hybrid.py +306 -0
  65. package/routes/greetings.py +98 -0
  66. package/routes/icons.py +279 -0
  67. package/routes/image_gen.py +364 -0
  68. package/routes/instructions.py +190 -0
  69. package/routes/music.py +838 -0
  70. package/routes/onboarding.py +43 -0
  71. package/routes/pi.py +62 -0
  72. package/routes/profiles.py +215 -0
  73. package/routes/report_issue.py +68 -0
  74. package/routes/static_files.py +533 -0
  75. package/routes/suno.py +664 -0
  76. package/routes/theme.py +81 -0
  77. package/routes/transcripts.py +199 -0
  78. package/routes/vision.py +348 -0
  79. package/routes/workspace.py +288 -0
  80. package/server.py +1510 -0
  81. package/services/__init__.py +1 -0
  82. package/services/auth.py +143 -0
  83. package/services/canvas_versioning.py +239 -0
  84. package/services/db_pool.py +107 -0
  85. package/services/gateway.py +16 -0
  86. package/services/gateway_manager.py +333 -0
  87. package/services/gateways/__init__.py +12 -0
  88. package/services/gateways/base.py +110 -0
  89. package/services/gateways/compat.py +264 -0
  90. package/services/gateways/openclaw.py +1134 -0
  91. package/services/health.py +100 -0
  92. package/services/memory_client.py +455 -0
  93. package/services/paths.py +26 -0
  94. package/services/speech_normalizer.py +285 -0
  95. package/services/tts.py +270 -0
  96. package/setup-config.js +262 -0
  97. package/sounds/air_horn.mp3 +0 -0
  98. package/sounds/bruh.mp3 +0 -0
  99. package/sounds/crowd_cheer.mp3 +0 -0
  100. package/sounds/gunshot.mp3 +0 -0
  101. package/sounds/impact.mp3 +0 -0
  102. package/sounds/lets_go.mp3 +0 -0
  103. package/sounds/record_stop.mp3 +0 -0
  104. package/sounds/rewind.mp3 +0 -0
  105. package/sounds/sad_trombone.mp3 +0 -0
  106. package/sounds/scratch_long.mp3 +0 -0
  107. package/sounds/yeah.mp3 +0 -0
  108. package/src/adapters/ClawdBotAdapter.js +264 -0
  109. package/src/adapters/_template.js +133 -0
  110. package/src/adapters/elevenlabs-classic.js +841 -0
  111. package/src/adapters/elevenlabs-hybrid.js +812 -0
  112. package/src/adapters/hume-evi.js +676 -0
  113. package/src/admin.html +1339 -0
  114. package/src/app.js +8802 -0
  115. package/src/core/Config.js +173 -0
  116. package/src/core/EmotionEngine.js +307 -0
  117. package/src/core/EventBridge.js +180 -0
  118. package/src/core/EventBus.js +117 -0
  119. package/src/core/VoiceSession.js +607 -0
  120. package/src/face/BaseFace.js +259 -0
  121. package/src/face/EyeFace.js +208 -0
  122. package/src/face/HaloSmokeFace.js +509 -0
  123. package/src/face/manifest.json +27 -0
  124. package/src/face/previews/eyes.svg +16 -0
  125. package/src/face/previews/orb.svg +29 -0
  126. package/src/features/MusicPlayer.js +620 -0
  127. package/src/features/Soundboard.js +128 -0
  128. package/src/providers/DeepgramSTT.js +472 -0
  129. package/src/providers/DeepgramStreamingSTT.js +766 -0
  130. package/src/providers/GroqSTT.js +559 -0
  131. package/src/providers/TTSPlayer.js +323 -0
  132. package/src/providers/WebSpeechSTT.js +479 -0
  133. package/src/providers/tts/BaseTTSProvider.js +81 -0
  134. package/src/providers/tts/HumeProvider.js +77 -0
  135. package/src/providers/tts/SupertonicProvider.js +174 -0
  136. package/src/providers/tts/index.js +140 -0
  137. package/src/shell/adapter-registry.js +154 -0
  138. package/src/shell/caller-bridge.js +35 -0
  139. package/src/shell/camera-bridge.js +28 -0
  140. package/src/shell/canvas-bridge.js +32 -0
  141. package/src/shell/commercial-bridge.js +44 -0
  142. package/src/shell/face-bridge.js +44 -0
  143. package/src/shell/music-bridge.js +60 -0
  144. package/src/shell/orchestrator.js +233 -0
  145. package/src/shell/profile-discovery.js +303 -0
  146. package/src/shell/sounds-bridge.js +28 -0
  147. package/src/shell/transcript-bridge.js +61 -0
  148. package/src/shell/waveform-bridge.js +33 -0
  149. package/src/styles/base.css +2862 -0
  150. package/src/styles/face.css +417 -0
  151. package/src/styles/pi-overrides.css +89 -0
  152. package/src/styles/theme-dark.css +67 -0
  153. package/src/test-tts.html +175 -0
  154. package/src/ui/AppShell.js +544 -0
  155. package/src/ui/ProfileSwitcher.js +228 -0
  156. package/src/ui/SessionControl.js +240 -0
  157. package/src/ui/face/FacePicker.js +195 -0
  158. package/src/ui/face/FaceRenderer.js +309 -0
  159. package/src/ui/settings/PlaylistEditor.js +366 -0
  160. package/src/ui/settings/SettingsPanel.css +684 -0
  161. package/src/ui/settings/SettingsPanel.js +419 -0
  162. package/src/ui/settings/TTSVoicePreview.js +210 -0
  163. package/src/ui/themes/ThemeManager.js +213 -0
  164. package/src/ui/visualizers/BaseVisualizer.js +29 -0
  165. package/src/ui/visualizers/PartyFXVisualizer.css +291 -0
  166. package/src/ui/visualizers/PartyFXVisualizer.js +637 -0
  167. package/static/emulators/jsdos/js-dos.css +1 -0
  168. package/static/emulators/jsdos/js-dos.js +22 -0
  169. package/static/favicon.svg +55 -0
  170. package/static/icons/apple-touch-icon.png +0 -0
  171. package/static/icons/favicon-32.png +0 -0
  172. package/static/icons/icon-192.png +0 -0
  173. package/static/icons/icon-512.png +0 -0
  174. package/static/install.html +449 -0
  175. package/static/manifest.json +26 -0
  176. package/static/sw.js +21 -0
  177. package/tts_providers/__init__.py +136 -0
  178. package/tts_providers/base_provider.py +319 -0
  179. package/tts_providers/groq_provider.py +155 -0
  180. package/tts_providers/hume_provider.py +226 -0
  181. package/tts_providers/providers_config.json +119 -0
  182. package/tts_providers/qwen3_provider.py +371 -0
  183. package/tts_providers/resemble_provider.py +315 -0
  184. package/tts_providers/supertonic_provider.py +557 -0
  185. package/tts_providers/supertonic_tts.py +399 -0
@@ -0,0 +1,841 @@
1
+ /**
2
+ * ElevenLabsClassicAdapter — Multi-Agent Framework adapter for ElevenLabs Conversational AI (P7-T4)
3
+ *
4
+ * Ports the battle-tested ElevenLabs setup from ai-eyes v1 into ai-eyes2's
5
+ * adapter framework. When selected, this adapter:
6
+ *
7
+ * - Connects to an ElevenLabs agent via the official @elevenlabs/client SDK
8
+ * - Registers two client tools: dj_soundboard and caller_sounds
9
+ * - Hooks ElevenLabs TTS audio elements via MutationObserver for the Web Audio API
10
+ * caller phone filter chain
11
+ * - Detects caller voice XML tags (<Caller 1>, <Caller 2>, <MIke-Voice>) and enables
12
+ * the phone filter effect automatically
13
+ * - Detects music keywords in agent speech and triggers server-side music sync
14
+ * - Handles the track-end DJ transition alert (send context update when ≤12s remaining)
15
+ * - Handles the commercial system (stop music, play ad, resume)
16
+ * - Exposes sendContextualUpdate / sendUserMessage via the EventBridge FORCE_MESSAGE /
17
+ * CONTEXT_UPDATE actions
18
+ *
19
+ * Ref: future-dev-plans/15-ELEVENLABS-CLASSIC-AGENT.md
20
+ * Ref: future-dev-plans/17-MULTI-AGENT-FRAMEWORK.md
21
+ *
22
+ * Adapter contract:
23
+ * init(bridge, config) — called when mode is selected
24
+ * start() — called when user clicks call button (unlocks iOS mic first)
25
+ * stop() — graceful disconnect
26
+ * destroy() — full teardown on adapter switch
27
+ *
28
+ * Config shape:
29
+ * {
30
+ * agentId: string, // ElevenLabs agent ID — default: your-elevenlabs-agent-id
31
+ * serverUrl: string, // Flask server base URL for webhook endpoints
32
+ * musicPlayer: object, // optional — shared MusicPlayer instance from shell
33
+ * }
34
+ */
35
+
36
+ import { AgentEvents, AgentActions } from '../core/EventBridge.js';
37
+
38
+ // ─────────────────────────────────────────────
39
+ // Constants
40
+ // ─────────────────────────────────────────────
41
+
42
+ const SDK_URL = 'https://cdn.jsdelivr.net/npm/@elevenlabs/client@latest/+esm';
43
+
44
+ const DEFAULT_AGENT_ID = 'your-elevenlabs-agent-id';
45
+
46
+ /** DJ sounds the agent can play silently in the browser */
47
+ const DJ_SOUND_LIST = [
48
+ 'air_horn', 'air_horn_long', 'siren', 'siren_woop', 'scratch', 'scratch_long',
49
+ 'rewind', 'record_stop', 'whoosh', 'riser', 'bass_drop', 'impact',
50
+ 'crowd_cheer', 'crowd_hype', 'applause', 'yeah', 'lets_go', 'laser',
51
+ 'gunshot', 'explosion', 'vinyl_crackle',
52
+ ];
53
+
54
+ /** Music-keyword regex — triggers syncMusicWithServer() when agent speaks these */
55
+ const MUSIC_KEYWORDS_RE = /\b(spinning|playing|next up|coming up|dropping|fire up|switching|change it up)\b/i;
56
+
57
+ /** Commercial-keyword regex — triggers playCommercial() */
58
+ const COMMERCIAL_KEYWORDS_RE = /\b(commercial|sponsor|ad break|word from our|brought to you)\b/i;
59
+
60
+ /** Caller voice XML tag patterns → enable phone filter */
61
+ const CALLER_PATTERNS = [
62
+ /<caller\s*1>/i,
63
+ /<caller\s*2>/i,
64
+ /<mike[\-\s]*voice>/i,
65
+ /<caller\s*voice>/i,
66
+ /<phone\s*voice>/i,
67
+ ];
68
+
69
+ /** Voice tags that revert to normal (non-caller) audio */
70
+ const NON_CALLER_PATTERNS = [
71
+ /<radio\s*voice>/i,
72
+ /<kitt[\-\s]*voice>/i,
73
+ /<dj[\-\s]*soul>/i,
74
+ ];
75
+
76
+ // ─────────────────────────────────────────────
77
+ // Adapter
78
+ // ─────────────────────────────────────────────
79
+
80
+ const ElevenLabsClassicAdapter = {
81
+
82
+ // ── Identity & capabilities ───────────────────────────────────────────────
83
+
84
+ name: 'ElevenLabs Classic',
85
+
86
+ /** Feature flags: shell shows/hides UI elements based on this array */
87
+ capabilities: [
88
+ 'multi_voice', // 7 ElevenLabs voices / personas
89
+ 'dj_soundboard', // client tool: dj_soundboard
90
+ 'caller_effects', // phone filter audio chain
91
+ 'caller_sounds', // client tool: caller_sounds (dial tone)
92
+ 'music_sync', // music keyword detection + server sync
93
+ 'commercials', // commercial keyword detection
94
+ 'wake_word', // Web Speech wake word + SSE Pi trigger
95
+ ],
96
+
97
+ // ── Private state ─────────────────────────────────────────────────────────
98
+
99
+ _bridge: null, // EventBridge singleton
100
+ _config: null,
101
+ _conversation: null, // ElevenLabs Conversation session object
102
+ _sdk: null, // { Conversation } from @elevenlabs/client
103
+
104
+ // Audio chain
105
+ _audioContext: null,
106
+ _elevenLabsSource: null, // MediaElementSource for current TTS audio element
107
+ _callerNodes: null, // { input, output, bypassGain, effectOutput }
108
+ _callerEffectActive: false,
109
+ _audioObserver: null, // MutationObserver for unnamed <audio> elements
110
+
111
+ // Music sync debounce
112
+ _lastSyncTime: 0,
113
+ _lastSyncedTrack: null,
114
+ _syncClearTimer: null,
115
+
116
+ // DJ transition
117
+ _djTransitionTriggered: false,
118
+
119
+ // Caller sounds cooldown
120
+ _callerSoundCooldown: false,
121
+
122
+ // Preloaded DJ sound blob URLs
123
+ _djSoundCache: {}, // { soundName: blobUrl }
124
+
125
+ // Commercial state
126
+ _commercialPlaying: false,
127
+ _commercialPlayer: null,
128
+
129
+ // Bridge unsub functions
130
+ _unsubscribers: [],
131
+
132
+ // ── Lifecycle ─────────────────────────────────────────────────────────────
133
+
134
+ /**
135
+ * Initialize the adapter.
136
+ * Loads the SDK, builds the audio pipeline, preloads DJ sounds.
137
+ *
138
+ * @param {import('../core/EventBridge.js').EventBridge} bridge
139
+ * @param {object} config
140
+ */
141
+ async init(bridge, config) {
142
+ this._bridge = bridge;
143
+ this._config = config || {};
144
+
145
+ // Load ElevenLabs SDK (dynamic import from CDN)
146
+ if (!this._sdk) {
147
+ try {
148
+ this._sdk = await import(SDK_URL);
149
+ } catch (err) {
150
+ console.error('[ElevenLabsClassic] Failed to load SDK:', err);
151
+ bridge.emit(AgentEvents.ERROR, { message: 'Failed to load ElevenLabs SDK' });
152
+ return;
153
+ }
154
+ }
155
+
156
+ // Build Web Audio API caller phone effect chain
157
+ this._initAudioPipeline();
158
+
159
+ // Set up MutationObserver to hook unnamed <audio> elements ElevenLabs creates
160
+ this._initAudioObserver();
161
+
162
+ // Preload DJ sounds (fire-and-forget — failures are non-fatal)
163
+ this._preloadDJSounds();
164
+
165
+ // Subscribe to UI → Agent actions
166
+ this._unsubscribers.push(
167
+ bridge.on(AgentActions.END_SESSION, () => this.stop()),
168
+ bridge.on(AgentActions.CONTEXT_UPDATE, (d) => this._sendContextUpdate(d.text)),
169
+ bridge.on(AgentActions.FORCE_MESSAGE, (d) => this._sendForceMessage(d.text)),
170
+ );
171
+ },
172
+
173
+ /**
174
+ * Start conversation.
175
+ * Unlocks iOS AudioContext → requests+releases mic → calls Conversation.startSession().
176
+ */
177
+ async start() {
178
+ if (!this._sdk) {
179
+ console.error('[ElevenLabsClassic] SDK not loaded');
180
+ return;
181
+ }
182
+ if (this._conversation) {
183
+ console.warn('[ElevenLabsClassic] Already connected');
184
+ return;
185
+ }
186
+
187
+ // iOS: must request mic then release it BEFORE startSession() (exclusive access)
188
+ try {
189
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
190
+ stream.getTracks().forEach(t => t.stop());
191
+ } catch (err) {
192
+ console.warn('[ElevenLabsClassic] Mic pre-unlock failed (may be fine on desktop):', err);
193
+ }
194
+
195
+ // Resume AudioContext (must be triggered by user gesture)
196
+ if (this._audioContext && this._audioContext.state === 'suspended') {
197
+ await this._audioContext.resume();
198
+ }
199
+
200
+ const agentId = this._config.agentId || DEFAULT_AGENT_ID;
201
+
202
+ try {
203
+ this._conversation = await this._sdk.Conversation.startSession({
204
+ agentId,
205
+ overrides: {},
206
+ clientTools: {
207
+ dj_soundboard: (params) => this._clientToolDJSoundboard(params),
208
+ caller_sounds: (params) => this._clientToolCallerSounds(params),
209
+ },
210
+ onConnect: () => this._onConnect(),
211
+ onDisconnect: () => this._onDisconnect(),
212
+ onError: (err) => this._onError(err),
213
+ onModeChange: (mode) => this._onModeChange(mode),
214
+ onMessage: (msg) => this._onMessage(msg),
215
+ });
216
+ } catch (err) {
217
+ console.error('[ElevenLabsClassic] startSession failed:', err);
218
+ this._bridge.emit(AgentEvents.ERROR, { message: String(err) });
219
+ this._bridge.emit(AgentEvents.MOOD, { mood: 'sad' });
220
+ }
221
+ },
222
+
223
+ /**
224
+ * Stop conversation gracefully.
225
+ */
226
+ async stop() {
227
+ if (this._conversation) {
228
+ try {
229
+ await this._conversation.endSession();
230
+ } catch (_) { /* ignore */ }
231
+ this._conversation = null;
232
+ }
233
+ // onDisconnect callback fires and emits events, but ensure state even if it doesn't
234
+ this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'idle' });
235
+ this._bridge.emit(AgentEvents.DISCONNECTED);
236
+ this._bridge.emit(AgentEvents.MOOD, { mood: 'neutral' });
237
+ },
238
+
239
+ /**
240
+ * Full teardown on adapter switch. MUST release all resources.
241
+ */
242
+ async destroy() {
243
+ // Stop conversation
244
+ if (this._conversation) {
245
+ try { await this._conversation.endSession(); } catch (_) { /* ignore */ }
246
+ this._conversation = null;
247
+ }
248
+
249
+ // Stop MutationObserver
250
+ if (this._audioObserver) {
251
+ this._audioObserver.disconnect();
252
+ this._audioObserver = null;
253
+ }
254
+
255
+ // Close AudioContext
256
+ if (this._audioContext && this._audioContext.state !== 'closed') {
257
+ try { await this._audioContext.close(); } catch (_) { /* ignore */ }
258
+ }
259
+ this._audioContext = null;
260
+ this._elevenLabsSource = null;
261
+ this._callerNodes = null;
262
+ this._callerEffectActive = false;
263
+
264
+ // Revoke preloaded blob URLs
265
+ Object.values(this._djSoundCache).forEach(url => {
266
+ try { URL.revokeObjectURL(url); } catch (_) { /* ignore */ }
267
+ });
268
+ this._djSoundCache = {};
269
+
270
+ // Stop commercial player
271
+ if (this._commercialPlayer) {
272
+ this._commercialPlayer.pause();
273
+ this._commercialPlayer.src = '';
274
+ this._commercialPlayer = null;
275
+ }
276
+ this._commercialPlaying = false;
277
+
278
+ // Clear timers
279
+ clearTimeout(this._syncClearTimer);
280
+
281
+ // Unsubscribe bridge listeners
282
+ this._unsubscribers.forEach(fn => fn());
283
+ this._unsubscribers = [];
284
+
285
+ this._bridge = null;
286
+ this._config = null;
287
+ },
288
+
289
+ // ── ElevenLabs SDK callbacks ──────────────────────────────────────────────
290
+
291
+ _onConnect() {
292
+ console.log('[ElevenLabsClassic] Connected');
293
+ this._bridge.emit(AgentEvents.CONNECTED);
294
+ this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'listening' });
295
+ this._bridge.emit(AgentEvents.MOOD, { mood: 'happy' });
296
+ },
297
+
298
+ _onDisconnect() {
299
+ console.log('[ElevenLabsClassic] Disconnected');
300
+ this._conversation = null;
301
+ this._bridge.emit(AgentEvents.DISCONNECTED);
302
+ this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'idle' });
303
+ this._bridge.emit(AgentEvents.MOOD, { mood: 'neutral' });
304
+ // Stop music ducking when agent disconnects
305
+ if (this._config.musicPlayer) {
306
+ this._config.musicPlayer.duck(false);
307
+ }
308
+ },
309
+
310
+ _onError(error) {
311
+ console.error('[ElevenLabsClassic] Error:', error);
312
+ this._bridge.emit(AgentEvents.ERROR, { message: String(error) });
313
+ this._bridge.emit(AgentEvents.MOOD, { mood: 'sad' });
314
+ },
315
+
316
+ /**
317
+ * onModeChange fires when ElevenLabs switches between speaking and listening.
318
+ * @param {{ mode: 'speaking'|'listening' }} modeObj
319
+ */
320
+ _onModeChange({ mode }) {
321
+ if (mode === 'speaking') {
322
+ this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'speaking' });
323
+ this._bridge.emit(AgentEvents.TTS_PLAYING);
324
+ // Duck music while agent speaks
325
+ if (this._config.musicPlayer) {
326
+ this._config.musicPlayer.duck(true);
327
+ }
328
+ } else {
329
+ // listening
330
+ this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'listening' });
331
+ this._bridge.emit(AgentEvents.TTS_STOPPED);
332
+ this._bridge.emit(AgentEvents.MOOD, { mood: 'listening' });
333
+ if (this._config.musicPlayer) {
334
+ this._config.musicPlayer.duck(false);
335
+ }
336
+ }
337
+ },
338
+
339
+ /**
340
+ * onMessage handles ALL agent messages, tool responses, and text.
341
+ * Routing order mirrors ai-eyes (APPENDIX A.2).
342
+ */
343
+ _onMessage(message) {
344
+ // ── 1. Tool response detection ────────────────────────────────────────
345
+ let toolName = null;
346
+ let toolResult = null;
347
+
348
+ if (message.source === 'ai' && message.message?.toolResult) {
349
+ toolName = message.message.toolResult.toolName;
350
+ toolResult = message.message.toolResult.result;
351
+ }
352
+
353
+ if (toolName) {
354
+ // ── 2. dj_soundboard tool ─────────────────────────────────────────
355
+ if (toolName === 'dj_soundboard') {
356
+ try {
357
+ const parsed = JSON.parse(toolResult);
358
+ if (parsed.sound) this._playDJSound(parsed.sound);
359
+ } catch (_) { /* not JSON — ignore */ }
360
+ this._bridge.emit(AgentEvents.TOOL_CALLED, { name: toolName, params: {}, result: toolResult });
361
+ return;
362
+ }
363
+
364
+ // ── 3. play_music tool ────────────────────────────────────────────
365
+ if (toolName === 'play_music') {
366
+ try {
367
+ const parsed = JSON.parse(toolResult);
368
+ const action = parsed.action || 'play';
369
+ if (action === 'stop') {
370
+ this._bridge.emit(AgentEvents.MUSIC_PLAY, { action: 'stop' });
371
+ } else if (action === 'pause') {
372
+ this._bridge.emit(AgentEvents.MUSIC_PLAY, { action: 'pause' });
373
+ } else {
374
+ this._syncMusicWithServer();
375
+ }
376
+ } catch (_) {
377
+ this._syncMusicWithServer();
378
+ }
379
+ this._bridge.emit(AgentEvents.TOOL_CALLED, { name: toolName, params: {}, result: toolResult });
380
+ return;
381
+ }
382
+
383
+ // ── 4. play_commercial tool ───────────────────────────────────────
384
+ if (toolName === 'play_commercial') {
385
+ this._playCommercial();
386
+ this._bridge.emit(AgentEvents.TOOL_CALLED, { name: toolName, params: {}, result: toolResult });
387
+ return;
388
+ }
389
+
390
+ // ── 5. generate_song tool ─────────────────────────────────────────
391
+ if (toolName === 'generate_song') {
392
+ try {
393
+ const parsed = JSON.parse(toolResult);
394
+ if (parsed.song_id) {
395
+ this._bridge.emit(AgentEvents.MUSIC_PLAY, { action: 'generated', songId: parsed.song_id });
396
+ }
397
+ } catch (_) { /* ignore */ }
398
+ this._bridge.emit(AgentEvents.TOOL_CALLED, { name: toolName, params: {}, result: toolResult });
399
+ return;
400
+ }
401
+
402
+ // Generic tool — emit for ActionConsole
403
+ this._bridge.emit(AgentEvents.TOOL_CALLED, { name: toolName, params: {}, result: toolResult });
404
+ return;
405
+ }
406
+
407
+ // ── 6. Display in transcript ──────────────────────────────────────────
408
+ const text = message.message?.text || '';
409
+ if (text) {
410
+ const role = message.source === 'user' ? 'user' : 'assistant';
411
+ this._bridge.emit(AgentEvents.MESSAGE, { role, text, final: true });
412
+ }
413
+
414
+ // Only process outgoing agent speech for the following detection
415
+ if (message.source !== 'ai' || !text) return;
416
+
417
+ // ── 7. Caller voice detection ─────────────────────────────────────────
418
+ const isCallerVoice = CALLER_PATTERNS.some(re => re.test(text));
419
+ const isNormalVoice = NON_CALLER_PATTERNS.some(re => re.test(text));
420
+
421
+ if (isCallerVoice) {
422
+ this._setCallerEffect(true);
423
+ } else if (isNormalVoice) {
424
+ this._setCallerEffect(false);
425
+ }
426
+
427
+ // ── 8. Music keyword detection ────────────────────────────────────────
428
+ if (MUSIC_KEYWORDS_RE.test(text) && !this._callerEffectActive) {
429
+ this._syncMusicWithServer();
430
+ }
431
+
432
+ // ── 9. Commercial keyword detection ───────────────────────────────────
433
+ if (COMMERCIAL_KEYWORDS_RE.test(text) && !this._commercialPlaying) {
434
+ this._playCommercial();
435
+ }
436
+ },
437
+
438
+ // ── Client Tools ──────────────────────────────────────────────────────────
439
+
440
+ /**
441
+ * Client tool: dj_soundboard
442
+ * Plays sound effects in the browser silently (no spoken words).
443
+ */
444
+ async _clientToolDJSoundboard(parameters) {
445
+ const action = parameters.action || 'list';
446
+ const sound = parameters.sound || '';
447
+
448
+ if (action === 'play' && sound) {
449
+ await this._playDJSound(sound);
450
+ const desc = sound.replace(/_/g, ' ');
451
+ return `*${desc}* 🎵`;
452
+ }
453
+ if (action === 'list') {
454
+ return JSON.stringify(DJ_SOUND_LIST);
455
+ }
456
+ return 'Unknown action';
457
+ },
458
+
459
+ /**
460
+ * Client tool: caller_sounds
461
+ * Plays dial tone (double beep) before voice switch to caller persona.
462
+ * Critical timing: must fire BEFORE the caller XML voice tag.
463
+ */
464
+ async _clientToolCallerSounds(parameters) {
465
+ const action = parameters.action || 'play';
466
+ const sound = parameters.sound || 'dial_tone';
467
+
468
+ if (action === 'list') {
469
+ return JSON.stringify(['dial_tone', 'ring', 'pickup', 'hangup']);
470
+ }
471
+
472
+ if (action === 'play') {
473
+ await this._playCallerSound(sound);
474
+ return `*Phone sound: ${sound}* 📞`;
475
+ }
476
+ return 'Unknown action';
477
+ },
478
+
479
+ // ── Audio Pipeline ────────────────────────────────────────────────────────
480
+
481
+ /**
482
+ * Create the Web Audio API context and caller phone effect filter chain.
483
+ *
484
+ * Chain: HighPass(500Hz) → LowPass(2200Hz) → PeakingEQ(1200Hz, +6dB)
485
+ * → Compressor(-30dB, 16:1) → WaveShaper(25) → Gain(0.7) → Destination
486
+ *
487
+ * Source: ai-eyes/index.html lines 5456-5629
488
+ */
489
+ _initAudioPipeline() {
490
+ if (this._audioContext) return;
491
+
492
+ try {
493
+ this._audioContext = new (window.AudioContext || window.webkitAudioContext)();
494
+ const ctx = this._audioContext;
495
+
496
+ // ── Effect chain nodes ──────────────────────────────────────────────
497
+ const highPass = ctx.createBiquadFilter();
498
+ highPass.type = 'highpass';
499
+ highPass.frequency.value = 500;
500
+ highPass.Q.value = 1.5;
501
+
502
+ const lowPass = ctx.createBiquadFilter();
503
+ lowPass.type = 'lowpass';
504
+ lowPass.frequency.value = 2200;
505
+ lowPass.Q.value = 1.5;
506
+
507
+ const midBoost = ctx.createBiquadFilter();
508
+ midBoost.type = 'peaking';
509
+ midBoost.frequency.value = 1200;
510
+ midBoost.gain.value = 6;
511
+
512
+ const compressor = ctx.createDynamicsCompressor();
513
+ compressor.threshold.value = -30;
514
+ compressor.ratio.value = 16;
515
+ compressor.attack.value = 0.002;
516
+ compressor.release.value = 0.2;
517
+
518
+ // WaveShaper distortion
519
+ const distortion = ctx.createWaveShaper();
520
+ distortion.curve = this._makeDistortionCurve(25);
521
+
522
+ const outputGain = ctx.createGain();
523
+ outputGain.gain.value = 0.7;
524
+
525
+ // Chain: highPass → lowPass → midBoost → compressor → distortion → outputGain
526
+ highPass.connect(lowPass);
527
+ lowPass.connect(midBoost);
528
+ midBoost.connect(compressor);
529
+ compressor.connect(distortion);
530
+ distortion.connect(outputGain);
531
+ outputGain.connect(ctx.destination);
532
+
533
+ // Bypass gain (direct path — used when caller effect is off)
534
+ const bypassGain = ctx.createGain();
535
+ bypassGain.gain.value = 1;
536
+ bypassGain.connect(ctx.destination);
537
+
538
+ this._callerNodes = {
539
+ input: highPass,
540
+ output: outputGain,
541
+ bypassGain,
542
+ effectOutput: outputGain,
543
+ };
544
+ } catch (err) {
545
+ console.warn('[ElevenLabsClassic] AudioContext init failed:', err);
546
+ }
547
+ },
548
+
549
+ /**
550
+ * MutationObserver — hooks unnamed <audio> elements ElevenLabs creates for TTS.
551
+ * ElevenLabs creates a new <audio> element per TTS chunk; we intercept each one.
552
+ */
553
+ _initAudioObserver() {
554
+ if (this._audioObserver) return;
555
+
556
+ this._audioObserver = new MutationObserver((mutations) => {
557
+ mutations.forEach((mutation) => {
558
+ mutation.addedNodes.forEach((node) => {
559
+ if (node.tagName === 'AUDIO' && !node.id && !node.dataset.callerHooked) {
560
+ this._hookElevenLabsAudio(node);
561
+ }
562
+ });
563
+ });
564
+ });
565
+
566
+ this._audioObserver.observe(document.body, { childList: true, subtree: true });
567
+ },
568
+
569
+ /**
570
+ * Hook a single ElevenLabs TTS <audio> element into the Web Audio API chain.
571
+ * Source: ai-eyes/index.html lines 5548-5583
572
+ */
573
+ _hookElevenLabsAudio(audioElement) {
574
+ if (!this._audioContext || !this._callerNodes) return;
575
+
576
+ try {
577
+ audioElement.dataset.callerHooked = 'true';
578
+ this._elevenLabsSource = this._audioContext.createMediaElementSource(audioElement);
579
+
580
+ // Default route: through bypass (direct to destination)
581
+ this._elevenLabsSource.connect(this._callerNodes.bypassGain);
582
+
583
+ // If caller effect is already active when a new chunk arrives, re-route immediately
584
+ if (this._callerEffectActive) {
585
+ this._elevenLabsSource.disconnect();
586
+ this._elevenLabsSource.connect(this._callerNodes.input);
587
+ }
588
+ } catch (err) {
589
+ // AudioContext limit: one createMediaElementSource per element
590
+ // ElevenLabs sometimes reuses elements; log and skip
591
+ console.warn('[ElevenLabsClassic] hookElevenLabsAudio failed:', err);
592
+ }
593
+ },
594
+
595
+ /**
596
+ * Enable or disable the caller phone filter effect.
597
+ * Source: ai-eyes/index.html lines 5584-5610
598
+ */
599
+ _setCallerEffect(enabled) {
600
+ this._callerEffectActive = enabled;
601
+ this._bridge.emit(AgentEvents.CALLER_EFFECT, { enabled });
602
+
603
+ if (!this._elevenLabsSource || !this._callerNodes) return;
604
+
605
+ try {
606
+ this._elevenLabsSource.disconnect();
607
+ if (enabled) {
608
+ this._elevenLabsSource.connect(this._callerNodes.input);
609
+ } else {
610
+ this._elevenLabsSource.connect(this._callerNodes.bypassGain);
611
+ }
612
+ } catch (err) {
613
+ console.warn('[ElevenLabsClassic] setCallerEffect failed:', err);
614
+ }
615
+ },
616
+
617
+ /**
618
+ * Build a WaveShaper distortion curve.
619
+ * @param {number} amount — 0 (clean) to 400 (heavy)
620
+ */
621
+ _makeDistortionCurve(amount) {
622
+ const n_samples = 256;
623
+ const curve = new Float32Array(n_samples);
624
+ const deg = Math.PI / 180;
625
+ for (let i = 0; i < n_samples; ++i) {
626
+ const x = (i * 2) / n_samples - 1;
627
+ curve[i] = ((3 + amount) * x * 20 * deg) / (Math.PI + amount * Math.abs(x));
628
+ }
629
+ return curve;
630
+ },
631
+
632
+ // ── DJ Sounds ─────────────────────────────────────────────────────────────
633
+
634
+ /**
635
+ * Preload frequently used DJ sounds as blob URLs so they can play instantly.
636
+ * Source: ai-eyes/index.html (sounds preloaded on page load).
637
+ */
638
+ async _preloadDJSounds() {
639
+ const serverUrl = this._config.serverUrl || '';
640
+ const toPreload = [
641
+ 'air_horn', 'scratch_long', 'crowd_cheer', 'crowd_hype',
642
+ 'rewind', 'yeah', 'laser', 'lets_go', 'impact', 'record_stop',
643
+ ];
644
+
645
+ await Promise.allSettled(
646
+ toPreload.map(async (name) => {
647
+ try {
648
+ const res = await fetch(`${serverUrl}/sounds/dj/${name}.mp3`);
649
+ if (res.ok) {
650
+ const blob = await res.blob();
651
+ this._djSoundCache[name] = URL.createObjectURL(blob);
652
+ }
653
+ } catch (_) { /* non-fatal: will fall back to direct URL */ }
654
+ })
655
+ );
656
+ },
657
+
658
+ /**
659
+ * Play a DJ sound silently (no spoken words from agent).
660
+ * Uses preloaded blob URL if available, otherwise falls back to server URL.
661
+ */
662
+ async _playDJSound(soundName) {
663
+ const serverUrl = this._config.serverUrl || '';
664
+ const src = this._djSoundCache[soundName]
665
+ || `${serverUrl}/sounds/dj/${soundName}.mp3`;
666
+
667
+ const audio = new Audio(src);
668
+ audio.volume = 1.0;
669
+ try {
670
+ await audio.play();
671
+ } catch (err) {
672
+ console.warn(`[ElevenLabsClassic] playDJSound(${soundName}) failed:`, err);
673
+ }
674
+
675
+ this._bridge.emit(AgentEvents.PLAY_SOUND, { sound: soundName, type: 'dj' });
676
+ },
677
+
678
+ /**
679
+ * Play a caller phone sound (dial tone = double beep with 400ms gap).
680
+ * 5-second cooldown prevents spam.
681
+ */
682
+ async _playCallerSound(sound) {
683
+ if (this._callerSoundCooldown) return;
684
+
685
+ this._callerSoundCooldown = true;
686
+ setTimeout(() => { this._callerSoundCooldown = false; }, 5000);
687
+
688
+ const serverUrl = this._config.serverUrl || '';
689
+ const src = `${serverUrl}/sounds/caller/${sound}.mp3`;
690
+
691
+ this._bridge.emit(AgentEvents.PLAY_SOUND, { sound, type: 'caller' });
692
+
693
+ if (sound === 'dial_tone') {
694
+ // Double-beep with 400ms gap
695
+ for (let i = 0; i < 2; i++) {
696
+ if (i > 0) await this._sleep(400);
697
+ const audio = new Audio(src);
698
+ try { await audio.play(); } catch (_) { /* ignore */ }
699
+ await this._sleep(800); // wait for beep to finish
700
+ }
701
+ } else {
702
+ const audio = new Audio(src);
703
+ try { await audio.play(); } catch (_) { /* ignore */ }
704
+ }
705
+ },
706
+
707
+ // ── Music Sync ────────────────────────────────────────────────────────────
708
+
709
+ /**
710
+ * Sync music with server state (2-second debounce).
711
+ * Sends a MUSIC_SYNC event for the shell's MusicPlayer to handle via /api/music?action=sync.
712
+ *
713
+ * Source: ai-eyes/index.html lines 5087-5165
714
+ */
715
+ _syncMusicWithServer() {
716
+ const now = Date.now();
717
+ if (now - this._lastSyncTime < 2000) return; // 2s debounce
718
+ if (this._callerEffectActive) return; // Block sync during caller skits
719
+ if (this._commercialPlaying) return; // Block sync during commercials
720
+
721
+ this._lastSyncTime = now;
722
+ this._bridge.emit(AgentEvents.MUSIC_SYNC);
723
+
724
+ // Auto-clear lastSyncedTrack every 30 seconds
725
+ clearTimeout(this._syncClearTimer);
726
+ this._syncClearTimer = setTimeout(() => {
727
+ this._lastSyncedTrack = null;
728
+ }, 30000);
729
+ },
730
+
731
+ // ── DJ Transition Alert ───────────────────────────────────────────────────
732
+
733
+ /**
734
+ * Called by the shell's MusicPlayer when a track has ≤12 seconds remaining.
735
+ * Sends a context update so the agent can announce the next track.
736
+ *
737
+ * Wire up: shell should call adapter.onTrackEndingSoon() via bridge or direct call.
738
+ * Source: ai-eyes/index.html lines 3918-3941
739
+ */
740
+ onTrackEndingSoon() {
741
+ if (this._djTransitionTriggered || !this._conversation) return;
742
+ this._djTransitionTriggered = true;
743
+
744
+ this._sendContextUpdate('[DJ INFO: track ending in 10s]');
745
+ this._sendForceMessage('[SYSTEM: Song ending! Announce next and call play_music action=skip!]');
746
+ },
747
+
748
+ /**
749
+ * Called by shell when a track ends completely.
750
+ * Resets the DJ transition flag.
751
+ */
752
+ onTrackEnded() {
753
+ this._djTransitionTriggered = false;
754
+ },
755
+
756
+ // ── Commercial System ─────────────────────────────────────────────────────
757
+
758
+ /**
759
+ * Play a commercial break:
760
+ * 1. Stop music
761
+ * 2. Fetch /api/commercials?action=play
762
+ * 3. Play the returned audio
763
+ * 4. On end, notify agent to resume
764
+ *
765
+ * Source: ai-eyes/index.html lines 2318-2400
766
+ */
767
+ async _playCommercial() {
768
+ if (this._commercialPlaying) return;
769
+ this._commercialPlaying = true;
770
+
771
+ const serverUrl = this._config.serverUrl || '';
772
+
773
+ // Stop music first
774
+ this._bridge.emit(AgentEvents.MUSIC_PLAY, { action: 'stop' });
775
+
776
+ try {
777
+ const res = await fetch(`${serverUrl}/api/commercials?action=play`);
778
+ const data = await res.json();
779
+
780
+ if (data.url) {
781
+ this._commercialPlayer = new Audio(data.url);
782
+
783
+ // Tell agent to stay quiet during ad
784
+ this._sendContextUpdate('[DJ INFO: Commercial playing, stay quiet]');
785
+
786
+ // Confirm started on server
787
+ await fetch(`${serverUrl}/api/commercials?action=confirm_started`);
788
+
789
+ this._commercialPlayer.addEventListener('ended', async () => {
790
+ this._commercialPlaying = false;
791
+ await fetch(`${serverUrl}/api/commercials?action=ended`);
792
+ this._sendForceMessage("[SYSTEM: Commercial over! Say we're back and play next!");
793
+ });
794
+
795
+ await this._commercialPlayer.play();
796
+ } else {
797
+ this._commercialPlaying = false;
798
+ }
799
+ } catch (err) {
800
+ console.warn('[ElevenLabsClassic] playCommercial failed:', err);
801
+ this._commercialPlaying = false;
802
+ }
803
+ },
804
+
805
+ // ── ElevenLabs context injection ──────────────────────────────────────────
806
+
807
+ /**
808
+ * Send a contextual update to the ElevenLabs agent (silent background info).
809
+ * @param {string} text
810
+ */
811
+ _sendContextUpdate(text) {
812
+ if (!this._conversation) return;
813
+ try {
814
+ this._conversation.sendContextualUpdate(text);
815
+ } catch (err) {
816
+ console.warn('[ElevenLabsClassic] sendContextualUpdate failed:', err);
817
+ }
818
+ },
819
+
820
+ /**
821
+ * Send a forced SYSTEM message the agent must act on.
822
+ * @param {string} text
823
+ */
824
+ _sendForceMessage(text) {
825
+ if (!this._conversation) return;
826
+ try {
827
+ this._conversation.sendUserMessage(text);
828
+ } catch (err) {
829
+ console.warn('[ElevenLabsClassic] sendForceMessage failed:', err);
830
+ }
831
+ },
832
+
833
+ // ── Utilities ─────────────────────────────────────────────────────────────
834
+
835
+ _sleep(ms) {
836
+ return new Promise(resolve => setTimeout(resolve, ms));
837
+ },
838
+ };
839
+
840
+ export default ElevenLabsClassicAdapter;
841
+ export { ElevenLabsClassicAdapter };