openvoiceui 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +104 -0
- package/Dockerfile +30 -0
- package/LICENSE +21 -0
- package/README.md +638 -0
- package/SETUP.md +360 -0
- package/app.py +232 -0
- package/auto-approve-devices.js +111 -0
- package/cli/index.js +372 -0
- package/config/__init__.py +4 -0
- package/config/default.yaml +43 -0
- package/config/flags.yaml +67 -0
- package/config/loader.py +203 -0
- package/config/providers.yaml +71 -0
- package/config/speech_normalization.yaml +182 -0
- package/config/theme.json +4 -0
- package/data/greetings.json +25 -0
- package/default-pages/ai-image-creator.html +915 -0
- package/default-pages/bulk-image-uploader.html +492 -0
- package/default-pages/desktop.html +2865 -0
- package/default-pages/file-explorer.html +854 -0
- package/default-pages/interactive-map.html +655 -0
- package/default-pages/style-guide.html +1005 -0
- package/default-pages/website-setup.html +1623 -0
- package/deploy/openclaw/Dockerfile +46 -0
- package/deploy/openvoiceui.service +30 -0
- package/deploy/setup-nginx.sh +50 -0
- package/deploy/setup-sudo.sh +306 -0
- package/deploy/skill-runner/Dockerfile +19 -0
- package/deploy/skill-runner/requirements.txt +14 -0
- package/deploy/skill-runner/server.py +269 -0
- package/deploy/supertonic/Dockerfile +22 -0
- package/deploy/supertonic/server.py +79 -0
- package/docker-compose.pinokio.yml +11 -0
- package/docker-compose.yml +59 -0
- package/greetings.json +25 -0
- package/index.html +65 -0
- package/inject-device-identity.js +142 -0
- package/package.json +82 -0
- package/profiles/default.json +114 -0
- package/profiles/manager.py +354 -0
- package/profiles/schema.json +337 -0
- package/prompts/voice-system-prompt.md +149 -0
- package/providers/__init__.py +39 -0
- package/providers/base.py +63 -0
- package/providers/llm/__init__.py +12 -0
- package/providers/llm/base.py +71 -0
- package/providers/llm/clawdbot_provider.py +112 -0
- package/providers/llm/zai_provider.py +115 -0
- package/providers/registry.py +320 -0
- package/providers/stt/__init__.py +12 -0
- package/providers/stt/base.py +58 -0
- package/providers/stt/webspeech_provider.py +49 -0
- package/providers/stt/whisper_provider.py +100 -0
- package/providers/tts/__init__.py +20 -0
- package/providers/tts/base.py +91 -0
- package/providers/tts/groq_provider.py +74 -0
- package/providers/tts/supertonic_provider.py +72 -0
- package/requirements.txt +38 -0
- package/routes/__init__.py +10 -0
- package/routes/admin.py +515 -0
- package/routes/canvas.py +1315 -0
- package/routes/chat.py +51 -0
- package/routes/conversation.py +2158 -0
- package/routes/elevenlabs_hybrid.py +306 -0
- package/routes/greetings.py +98 -0
- package/routes/icons.py +279 -0
- package/routes/image_gen.py +364 -0
- package/routes/instructions.py +190 -0
- package/routes/music.py +838 -0
- package/routes/onboarding.py +43 -0
- package/routes/pi.py +62 -0
- package/routes/profiles.py +215 -0
- package/routes/report_issue.py +68 -0
- package/routes/static_files.py +533 -0
- package/routes/suno.py +664 -0
- package/routes/theme.py +81 -0
- package/routes/transcripts.py +199 -0
- package/routes/vision.py +348 -0
- package/routes/workspace.py +288 -0
- package/server.py +1510 -0
- package/services/__init__.py +1 -0
- package/services/auth.py +143 -0
- package/services/canvas_versioning.py +239 -0
- package/services/db_pool.py +107 -0
- package/services/gateway.py +16 -0
- package/services/gateway_manager.py +333 -0
- package/services/gateways/__init__.py +12 -0
- package/services/gateways/base.py +110 -0
- package/services/gateways/compat.py +264 -0
- package/services/gateways/openclaw.py +1134 -0
- package/services/health.py +100 -0
- package/services/memory_client.py +455 -0
- package/services/paths.py +26 -0
- package/services/speech_normalizer.py +285 -0
- package/services/tts.py +270 -0
- package/setup-config.js +262 -0
- package/sounds/air_horn.mp3 +0 -0
- package/sounds/bruh.mp3 +0 -0
- package/sounds/crowd_cheer.mp3 +0 -0
- package/sounds/gunshot.mp3 +0 -0
- package/sounds/impact.mp3 +0 -0
- package/sounds/lets_go.mp3 +0 -0
- package/sounds/record_stop.mp3 +0 -0
- package/sounds/rewind.mp3 +0 -0
- package/sounds/sad_trombone.mp3 +0 -0
- package/sounds/scratch_long.mp3 +0 -0
- package/sounds/yeah.mp3 +0 -0
- package/src/adapters/ClawdBotAdapter.js +264 -0
- package/src/adapters/_template.js +133 -0
- package/src/adapters/elevenlabs-classic.js +841 -0
- package/src/adapters/elevenlabs-hybrid.js +812 -0
- package/src/adapters/hume-evi.js +676 -0
- package/src/admin.html +1339 -0
- package/src/app.js +8802 -0
- package/src/core/Config.js +173 -0
- package/src/core/EmotionEngine.js +307 -0
- package/src/core/EventBridge.js +180 -0
- package/src/core/EventBus.js +117 -0
- package/src/core/VoiceSession.js +607 -0
- package/src/face/BaseFace.js +259 -0
- package/src/face/EyeFace.js +208 -0
- package/src/face/HaloSmokeFace.js +509 -0
- package/src/face/manifest.json +27 -0
- package/src/face/previews/eyes.svg +16 -0
- package/src/face/previews/orb.svg +29 -0
- package/src/features/MusicPlayer.js +620 -0
- package/src/features/Soundboard.js +128 -0
- package/src/providers/DeepgramSTT.js +472 -0
- package/src/providers/DeepgramStreamingSTT.js +766 -0
- package/src/providers/GroqSTT.js +559 -0
- package/src/providers/TTSPlayer.js +323 -0
- package/src/providers/WebSpeechSTT.js +479 -0
- package/src/providers/tts/BaseTTSProvider.js +81 -0
- package/src/providers/tts/HumeProvider.js +77 -0
- package/src/providers/tts/SupertonicProvider.js +174 -0
- package/src/providers/tts/index.js +140 -0
- package/src/shell/adapter-registry.js +154 -0
- package/src/shell/caller-bridge.js +35 -0
- package/src/shell/camera-bridge.js +28 -0
- package/src/shell/canvas-bridge.js +32 -0
- package/src/shell/commercial-bridge.js +44 -0
- package/src/shell/face-bridge.js +44 -0
- package/src/shell/music-bridge.js +60 -0
- package/src/shell/orchestrator.js +233 -0
- package/src/shell/profile-discovery.js +303 -0
- package/src/shell/sounds-bridge.js +28 -0
- package/src/shell/transcript-bridge.js +61 -0
- package/src/shell/waveform-bridge.js +33 -0
- package/src/styles/base.css +2862 -0
- package/src/styles/face.css +417 -0
- package/src/styles/pi-overrides.css +89 -0
- package/src/styles/theme-dark.css +67 -0
- package/src/test-tts.html +175 -0
- package/src/ui/AppShell.js +544 -0
- package/src/ui/ProfileSwitcher.js +228 -0
- package/src/ui/SessionControl.js +240 -0
- package/src/ui/face/FacePicker.js +195 -0
- package/src/ui/face/FaceRenderer.js +309 -0
- package/src/ui/settings/PlaylistEditor.js +366 -0
- package/src/ui/settings/SettingsPanel.css +684 -0
- package/src/ui/settings/SettingsPanel.js +419 -0
- package/src/ui/settings/TTSVoicePreview.js +210 -0
- package/src/ui/themes/ThemeManager.js +213 -0
- package/src/ui/visualizers/BaseVisualizer.js +29 -0
- package/src/ui/visualizers/PartyFXVisualizer.css +291 -0
- package/src/ui/visualizers/PartyFXVisualizer.js +637 -0
- package/static/emulators/jsdos/js-dos.css +1 -0
- package/static/emulators/jsdos/js-dos.js +22 -0
- package/static/favicon.svg +55 -0
- package/static/icons/apple-touch-icon.png +0 -0
- package/static/icons/favicon-32.png +0 -0
- package/static/icons/icon-192.png +0 -0
- package/static/icons/icon-512.png +0 -0
- package/static/install.html +449 -0
- package/static/manifest.json +26 -0
- package/static/sw.js +21 -0
- package/tts_providers/__init__.py +136 -0
- package/tts_providers/base_provider.py +319 -0
- package/tts_providers/groq_provider.py +155 -0
- package/tts_providers/hume_provider.py +226 -0
- package/tts_providers/providers_config.json +119 -0
- package/tts_providers/qwen3_provider.py +371 -0
- package/tts_providers/resemble_provider.py +315 -0
- package/tts_providers/supertonic_provider.py +557 -0
- package/tts_providers/supertonic_tts.py +399 -0
|
@@ -0,0 +1,841 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ElevenLabsClassicAdapter — Multi-Agent Framework adapter for ElevenLabs Conversational AI (P7-T4)
|
|
3
|
+
*
|
|
4
|
+
* Ports the battle-tested ElevenLabs setup from ai-eyes v1 into ai-eyes2's
|
|
5
|
+
* adapter framework. When selected, this adapter:
|
|
6
|
+
*
|
|
7
|
+
* - Connects to an ElevenLabs agent via the official @elevenlabs/client SDK
|
|
8
|
+
* - Registers two client tools: dj_soundboard and caller_sounds
|
|
9
|
+
* - Hooks ElevenLabs TTS audio elements via MutationObserver for the Web Audio API
|
|
10
|
+
* caller phone filter chain
|
|
11
|
+
* - Detects caller voice XML tags (<Caller 1>, <Caller 2>, <MIke-Voice>) and enables
|
|
12
|
+
* the phone filter effect automatically
|
|
13
|
+
* - Detects music keywords in agent speech and triggers server-side music sync
|
|
14
|
+
* - Handles the track-end DJ transition alert (send context update when ≤12s remaining)
|
|
15
|
+
* - Handles the commercial system (stop music, play ad, resume)
|
|
16
|
+
* - Exposes sendContextualUpdate / sendUserMessage via the EventBridge FORCE_MESSAGE /
|
|
17
|
+
* CONTEXT_UPDATE actions
|
|
18
|
+
*
|
|
19
|
+
* Ref: future-dev-plans/15-ELEVENLABS-CLASSIC-AGENT.md
|
|
20
|
+
* Ref: future-dev-plans/17-MULTI-AGENT-FRAMEWORK.md
|
|
21
|
+
*
|
|
22
|
+
* Adapter contract:
|
|
23
|
+
* init(bridge, config) — called when mode is selected
|
|
24
|
+
* start() — called when user clicks call button (unlocks iOS mic first)
|
|
25
|
+
* stop() — graceful disconnect
|
|
26
|
+
* destroy() — full teardown on adapter switch
|
|
27
|
+
*
|
|
28
|
+
* Config shape:
|
|
29
|
+
* {
|
|
30
|
+
* agentId: string, // ElevenLabs agent ID — default: your-elevenlabs-agent-id
|
|
31
|
+
* serverUrl: string, // Flask server base URL for webhook endpoints
|
|
32
|
+
* musicPlayer: object, // optional — shared MusicPlayer instance from shell
|
|
33
|
+
* }
|
|
34
|
+
*/
|
|
35
|
+
|
|
36
|
+
import { AgentEvents, AgentActions } from '../core/EventBridge.js';
|
|
37
|
+
|
|
38
|
+
// ─────────────────────────────────────────────
|
|
39
|
+
// Constants
|
|
40
|
+
// ─────────────────────────────────────────────
|
|
41
|
+
|
|
42
|
+
const SDK_URL = 'https://cdn.jsdelivr.net/npm/@elevenlabs/client@latest/+esm';
|
|
43
|
+
|
|
44
|
+
const DEFAULT_AGENT_ID = 'your-elevenlabs-agent-id';
|
|
45
|
+
|
|
46
|
+
/** DJ sounds the agent can play silently in the browser */
|
|
47
|
+
const DJ_SOUND_LIST = [
|
|
48
|
+
'air_horn', 'air_horn_long', 'siren', 'siren_woop', 'scratch', 'scratch_long',
|
|
49
|
+
'rewind', 'record_stop', 'whoosh', 'riser', 'bass_drop', 'impact',
|
|
50
|
+
'crowd_cheer', 'crowd_hype', 'applause', 'yeah', 'lets_go', 'laser',
|
|
51
|
+
'gunshot', 'explosion', 'vinyl_crackle',
|
|
52
|
+
];
|
|
53
|
+
|
|
54
|
+
/** Music-keyword regex — triggers syncMusicWithServer() when agent speaks these */
|
|
55
|
+
const MUSIC_KEYWORDS_RE = /\b(spinning|playing|next up|coming up|dropping|fire up|switching|change it up)\b/i;
|
|
56
|
+
|
|
57
|
+
/** Commercial-keyword regex — triggers playCommercial() */
|
|
58
|
+
const COMMERCIAL_KEYWORDS_RE = /\b(commercial|sponsor|ad break|word from our|brought to you)\b/i;
|
|
59
|
+
|
|
60
|
+
/** Caller voice XML tag patterns → enable phone filter */
|
|
61
|
+
const CALLER_PATTERNS = [
|
|
62
|
+
/<caller\s*1>/i,
|
|
63
|
+
/<caller\s*2>/i,
|
|
64
|
+
/<mike[\-\s]*voice>/i,
|
|
65
|
+
/<caller\s*voice>/i,
|
|
66
|
+
/<phone\s*voice>/i,
|
|
67
|
+
];
|
|
68
|
+
|
|
69
|
+
/** Voice tags that revert to normal (non-caller) audio */
|
|
70
|
+
const NON_CALLER_PATTERNS = [
|
|
71
|
+
/<radio\s*voice>/i,
|
|
72
|
+
/<kitt[\-\s]*voice>/i,
|
|
73
|
+
/<dj[\-\s]*soul>/i,
|
|
74
|
+
];
|
|
75
|
+
|
|
76
|
+
// ─────────────────────────────────────────────
|
|
77
|
+
// Adapter
|
|
78
|
+
// ─────────────────────────────────────────────
|
|
79
|
+
|
|
80
|
+
const ElevenLabsClassicAdapter = {
|
|
81
|
+
|
|
82
|
+
// ── Identity & capabilities ───────────────────────────────────────────────
|
|
83
|
+
|
|
84
|
+
name: 'ElevenLabs Classic',
|
|
85
|
+
|
|
86
|
+
/** Feature flags: shell shows/hides UI elements based on this array */
|
|
87
|
+
capabilities: [
|
|
88
|
+
'multi_voice', // 7 ElevenLabs voices / personas
|
|
89
|
+
'dj_soundboard', // client tool: dj_soundboard
|
|
90
|
+
'caller_effects', // phone filter audio chain
|
|
91
|
+
'caller_sounds', // client tool: caller_sounds (dial tone)
|
|
92
|
+
'music_sync', // music keyword detection + server sync
|
|
93
|
+
'commercials', // commercial keyword detection
|
|
94
|
+
'wake_word', // Web Speech wake word + SSE Pi trigger
|
|
95
|
+
],
|
|
96
|
+
|
|
97
|
+
// ── Private state ─────────────────────────────────────────────────────────
|
|
98
|
+
|
|
99
|
+
_bridge: null, // EventBridge singleton
|
|
100
|
+
_config: null,
|
|
101
|
+
_conversation: null, // ElevenLabs Conversation session object
|
|
102
|
+
_sdk: null, // { Conversation } from @elevenlabs/client
|
|
103
|
+
|
|
104
|
+
// Audio chain
|
|
105
|
+
_audioContext: null,
|
|
106
|
+
_elevenLabsSource: null, // MediaElementSource for current TTS audio element
|
|
107
|
+
_callerNodes: null, // { input, output, bypassGain, effectOutput }
|
|
108
|
+
_callerEffectActive: false,
|
|
109
|
+
_audioObserver: null, // MutationObserver for unnamed <audio> elements
|
|
110
|
+
|
|
111
|
+
// Music sync debounce
|
|
112
|
+
_lastSyncTime: 0,
|
|
113
|
+
_lastSyncedTrack: null,
|
|
114
|
+
_syncClearTimer: null,
|
|
115
|
+
|
|
116
|
+
// DJ transition
|
|
117
|
+
_djTransitionTriggered: false,
|
|
118
|
+
|
|
119
|
+
// Caller sounds cooldown
|
|
120
|
+
_callerSoundCooldown: false,
|
|
121
|
+
|
|
122
|
+
// Preloaded DJ sound blob URLs
|
|
123
|
+
_djSoundCache: {}, // { soundName: blobUrl }
|
|
124
|
+
|
|
125
|
+
// Commercial state
|
|
126
|
+
_commercialPlaying: false,
|
|
127
|
+
_commercialPlayer: null,
|
|
128
|
+
|
|
129
|
+
// Bridge unsub functions
|
|
130
|
+
_unsubscribers: [],
|
|
131
|
+
|
|
132
|
+
// ── Lifecycle ─────────────────────────────────────────────────────────────
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Initialize the adapter.
|
|
136
|
+
* Loads the SDK, builds the audio pipeline, preloads DJ sounds.
|
|
137
|
+
*
|
|
138
|
+
* @param {import('../core/EventBridge.js').EventBridge} bridge
|
|
139
|
+
* @param {object} config
|
|
140
|
+
*/
|
|
141
|
+
async init(bridge, config) {
|
|
142
|
+
this._bridge = bridge;
|
|
143
|
+
this._config = config || {};
|
|
144
|
+
|
|
145
|
+
// Load ElevenLabs SDK (dynamic import from CDN)
|
|
146
|
+
if (!this._sdk) {
|
|
147
|
+
try {
|
|
148
|
+
this._sdk = await import(SDK_URL);
|
|
149
|
+
} catch (err) {
|
|
150
|
+
console.error('[ElevenLabsClassic] Failed to load SDK:', err);
|
|
151
|
+
bridge.emit(AgentEvents.ERROR, { message: 'Failed to load ElevenLabs SDK' });
|
|
152
|
+
return;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Build Web Audio API caller phone effect chain
|
|
157
|
+
this._initAudioPipeline();
|
|
158
|
+
|
|
159
|
+
// Set up MutationObserver to hook unnamed <audio> elements ElevenLabs creates
|
|
160
|
+
this._initAudioObserver();
|
|
161
|
+
|
|
162
|
+
// Preload DJ sounds (fire-and-forget — failures are non-fatal)
|
|
163
|
+
this._preloadDJSounds();
|
|
164
|
+
|
|
165
|
+
// Subscribe to UI → Agent actions
|
|
166
|
+
this._unsubscribers.push(
|
|
167
|
+
bridge.on(AgentActions.END_SESSION, () => this.stop()),
|
|
168
|
+
bridge.on(AgentActions.CONTEXT_UPDATE, (d) => this._sendContextUpdate(d.text)),
|
|
169
|
+
bridge.on(AgentActions.FORCE_MESSAGE, (d) => this._sendForceMessage(d.text)),
|
|
170
|
+
);
|
|
171
|
+
},
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Start conversation.
|
|
175
|
+
* Unlocks iOS AudioContext → requests+releases mic → calls Conversation.startSession().
|
|
176
|
+
*/
|
|
177
|
+
async start() {
|
|
178
|
+
if (!this._sdk) {
|
|
179
|
+
console.error('[ElevenLabsClassic] SDK not loaded');
|
|
180
|
+
return;
|
|
181
|
+
}
|
|
182
|
+
if (this._conversation) {
|
|
183
|
+
console.warn('[ElevenLabsClassic] Already connected');
|
|
184
|
+
return;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// iOS: must request mic then release it BEFORE startSession() (exclusive access)
|
|
188
|
+
try {
|
|
189
|
+
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
190
|
+
stream.getTracks().forEach(t => t.stop());
|
|
191
|
+
} catch (err) {
|
|
192
|
+
console.warn('[ElevenLabsClassic] Mic pre-unlock failed (may be fine on desktop):', err);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Resume AudioContext (must be triggered by user gesture)
|
|
196
|
+
if (this._audioContext && this._audioContext.state === 'suspended') {
|
|
197
|
+
await this._audioContext.resume();
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
const agentId = this._config.agentId || DEFAULT_AGENT_ID;
|
|
201
|
+
|
|
202
|
+
try {
|
|
203
|
+
this._conversation = await this._sdk.Conversation.startSession({
|
|
204
|
+
agentId,
|
|
205
|
+
overrides: {},
|
|
206
|
+
clientTools: {
|
|
207
|
+
dj_soundboard: (params) => this._clientToolDJSoundboard(params),
|
|
208
|
+
caller_sounds: (params) => this._clientToolCallerSounds(params),
|
|
209
|
+
},
|
|
210
|
+
onConnect: () => this._onConnect(),
|
|
211
|
+
onDisconnect: () => this._onDisconnect(),
|
|
212
|
+
onError: (err) => this._onError(err),
|
|
213
|
+
onModeChange: (mode) => this._onModeChange(mode),
|
|
214
|
+
onMessage: (msg) => this._onMessage(msg),
|
|
215
|
+
});
|
|
216
|
+
} catch (err) {
|
|
217
|
+
console.error('[ElevenLabsClassic] startSession failed:', err);
|
|
218
|
+
this._bridge.emit(AgentEvents.ERROR, { message: String(err) });
|
|
219
|
+
this._bridge.emit(AgentEvents.MOOD, { mood: 'sad' });
|
|
220
|
+
}
|
|
221
|
+
},
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* Stop conversation gracefully.
|
|
225
|
+
*/
|
|
226
|
+
async stop() {
|
|
227
|
+
if (this._conversation) {
|
|
228
|
+
try {
|
|
229
|
+
await this._conversation.endSession();
|
|
230
|
+
} catch (_) { /* ignore */ }
|
|
231
|
+
this._conversation = null;
|
|
232
|
+
}
|
|
233
|
+
// onDisconnect callback fires and emits events, but ensure state even if it doesn't
|
|
234
|
+
this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'idle' });
|
|
235
|
+
this._bridge.emit(AgentEvents.DISCONNECTED);
|
|
236
|
+
this._bridge.emit(AgentEvents.MOOD, { mood: 'neutral' });
|
|
237
|
+
},
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* Full teardown on adapter switch. MUST release all resources.
|
|
241
|
+
*/
|
|
242
|
+
async destroy() {
|
|
243
|
+
// Stop conversation
|
|
244
|
+
if (this._conversation) {
|
|
245
|
+
try { await this._conversation.endSession(); } catch (_) { /* ignore */ }
|
|
246
|
+
this._conversation = null;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// Stop MutationObserver
|
|
250
|
+
if (this._audioObserver) {
|
|
251
|
+
this._audioObserver.disconnect();
|
|
252
|
+
this._audioObserver = null;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
// Close AudioContext
|
|
256
|
+
if (this._audioContext && this._audioContext.state !== 'closed') {
|
|
257
|
+
try { await this._audioContext.close(); } catch (_) { /* ignore */ }
|
|
258
|
+
}
|
|
259
|
+
this._audioContext = null;
|
|
260
|
+
this._elevenLabsSource = null;
|
|
261
|
+
this._callerNodes = null;
|
|
262
|
+
this._callerEffectActive = false;
|
|
263
|
+
|
|
264
|
+
// Revoke preloaded blob URLs
|
|
265
|
+
Object.values(this._djSoundCache).forEach(url => {
|
|
266
|
+
try { URL.revokeObjectURL(url); } catch (_) { /* ignore */ }
|
|
267
|
+
});
|
|
268
|
+
this._djSoundCache = {};
|
|
269
|
+
|
|
270
|
+
// Stop commercial player
|
|
271
|
+
if (this._commercialPlayer) {
|
|
272
|
+
this._commercialPlayer.pause();
|
|
273
|
+
this._commercialPlayer.src = '';
|
|
274
|
+
this._commercialPlayer = null;
|
|
275
|
+
}
|
|
276
|
+
this._commercialPlaying = false;
|
|
277
|
+
|
|
278
|
+
// Clear timers
|
|
279
|
+
clearTimeout(this._syncClearTimer);
|
|
280
|
+
|
|
281
|
+
// Unsubscribe bridge listeners
|
|
282
|
+
this._unsubscribers.forEach(fn => fn());
|
|
283
|
+
this._unsubscribers = [];
|
|
284
|
+
|
|
285
|
+
this._bridge = null;
|
|
286
|
+
this._config = null;
|
|
287
|
+
},
|
|
288
|
+
|
|
289
|
+
// ── ElevenLabs SDK callbacks ──────────────────────────────────────────────
|
|
290
|
+
|
|
291
|
+
_onConnect() {
|
|
292
|
+
console.log('[ElevenLabsClassic] Connected');
|
|
293
|
+
this._bridge.emit(AgentEvents.CONNECTED);
|
|
294
|
+
this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'listening' });
|
|
295
|
+
this._bridge.emit(AgentEvents.MOOD, { mood: 'happy' });
|
|
296
|
+
},
|
|
297
|
+
|
|
298
|
+
_onDisconnect() {
|
|
299
|
+
console.log('[ElevenLabsClassic] Disconnected');
|
|
300
|
+
this._conversation = null;
|
|
301
|
+
this._bridge.emit(AgentEvents.DISCONNECTED);
|
|
302
|
+
this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'idle' });
|
|
303
|
+
this._bridge.emit(AgentEvents.MOOD, { mood: 'neutral' });
|
|
304
|
+
// Stop music ducking when agent disconnects
|
|
305
|
+
if (this._config.musicPlayer) {
|
|
306
|
+
this._config.musicPlayer.duck(false);
|
|
307
|
+
}
|
|
308
|
+
},
|
|
309
|
+
|
|
310
|
+
_onError(error) {
|
|
311
|
+
console.error('[ElevenLabsClassic] Error:', error);
|
|
312
|
+
this._bridge.emit(AgentEvents.ERROR, { message: String(error) });
|
|
313
|
+
this._bridge.emit(AgentEvents.MOOD, { mood: 'sad' });
|
|
314
|
+
},
|
|
315
|
+
|
|
316
|
+
/**
|
|
317
|
+
* onModeChange fires when ElevenLabs switches between speaking and listening.
|
|
318
|
+
* @param {{ mode: 'speaking'|'listening' }} modeObj
|
|
319
|
+
*/
|
|
320
|
+
_onModeChange({ mode }) {
|
|
321
|
+
if (mode === 'speaking') {
|
|
322
|
+
this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'speaking' });
|
|
323
|
+
this._bridge.emit(AgentEvents.TTS_PLAYING);
|
|
324
|
+
// Duck music while agent speaks
|
|
325
|
+
if (this._config.musicPlayer) {
|
|
326
|
+
this._config.musicPlayer.duck(true);
|
|
327
|
+
}
|
|
328
|
+
} else {
|
|
329
|
+
// listening
|
|
330
|
+
this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'listening' });
|
|
331
|
+
this._bridge.emit(AgentEvents.TTS_STOPPED);
|
|
332
|
+
this._bridge.emit(AgentEvents.MOOD, { mood: 'listening' });
|
|
333
|
+
if (this._config.musicPlayer) {
|
|
334
|
+
this._config.musicPlayer.duck(false);
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
},
|
|
338
|
+
|
|
339
|
+
/**
|
|
340
|
+
* onMessage handles ALL agent messages, tool responses, and text.
|
|
341
|
+
* Routing order mirrors ai-eyes (APPENDIX A.2).
|
|
342
|
+
*/
|
|
343
|
+
_onMessage(message) {
|
|
344
|
+
// ── 1. Tool response detection ────────────────────────────────────────
|
|
345
|
+
let toolName = null;
|
|
346
|
+
let toolResult = null;
|
|
347
|
+
|
|
348
|
+
if (message.source === 'ai' && message.message?.toolResult) {
|
|
349
|
+
toolName = message.message.toolResult.toolName;
|
|
350
|
+
toolResult = message.message.toolResult.result;
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
if (toolName) {
|
|
354
|
+
// ── 2. dj_soundboard tool ─────────────────────────────────────────
|
|
355
|
+
if (toolName === 'dj_soundboard') {
|
|
356
|
+
try {
|
|
357
|
+
const parsed = JSON.parse(toolResult);
|
|
358
|
+
if (parsed.sound) this._playDJSound(parsed.sound);
|
|
359
|
+
} catch (_) { /* not JSON — ignore */ }
|
|
360
|
+
this._bridge.emit(AgentEvents.TOOL_CALLED, { name: toolName, params: {}, result: toolResult });
|
|
361
|
+
return;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
// ── 3. play_music tool ────────────────────────────────────────────
|
|
365
|
+
if (toolName === 'play_music') {
|
|
366
|
+
try {
|
|
367
|
+
const parsed = JSON.parse(toolResult);
|
|
368
|
+
const action = parsed.action || 'play';
|
|
369
|
+
if (action === 'stop') {
|
|
370
|
+
this._bridge.emit(AgentEvents.MUSIC_PLAY, { action: 'stop' });
|
|
371
|
+
} else if (action === 'pause') {
|
|
372
|
+
this._bridge.emit(AgentEvents.MUSIC_PLAY, { action: 'pause' });
|
|
373
|
+
} else {
|
|
374
|
+
this._syncMusicWithServer();
|
|
375
|
+
}
|
|
376
|
+
} catch (_) {
|
|
377
|
+
this._syncMusicWithServer();
|
|
378
|
+
}
|
|
379
|
+
this._bridge.emit(AgentEvents.TOOL_CALLED, { name: toolName, params: {}, result: toolResult });
|
|
380
|
+
return;
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
// ── 4. play_commercial tool ───────────────────────────────────────
|
|
384
|
+
if (toolName === 'play_commercial') {
|
|
385
|
+
this._playCommercial();
|
|
386
|
+
this._bridge.emit(AgentEvents.TOOL_CALLED, { name: toolName, params: {}, result: toolResult });
|
|
387
|
+
return;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
// ── 5. generate_song tool ─────────────────────────────────────────
|
|
391
|
+
if (toolName === 'generate_song') {
|
|
392
|
+
try {
|
|
393
|
+
const parsed = JSON.parse(toolResult);
|
|
394
|
+
if (parsed.song_id) {
|
|
395
|
+
this._bridge.emit(AgentEvents.MUSIC_PLAY, { action: 'generated', songId: parsed.song_id });
|
|
396
|
+
}
|
|
397
|
+
} catch (_) { /* ignore */ }
|
|
398
|
+
this._bridge.emit(AgentEvents.TOOL_CALLED, { name: toolName, params: {}, result: toolResult });
|
|
399
|
+
return;
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
// Generic tool — emit for ActionConsole
|
|
403
|
+
this._bridge.emit(AgentEvents.TOOL_CALLED, { name: toolName, params: {}, result: toolResult });
|
|
404
|
+
return;
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
// ── 6. Display in transcript ──────────────────────────────────────────
|
|
408
|
+
const text = message.message?.text || '';
|
|
409
|
+
if (text) {
|
|
410
|
+
const role = message.source === 'user' ? 'user' : 'assistant';
|
|
411
|
+
this._bridge.emit(AgentEvents.MESSAGE, { role, text, final: true });
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
// Only process outgoing agent speech for the following detection
|
|
415
|
+
if (message.source !== 'ai' || !text) return;
|
|
416
|
+
|
|
417
|
+
// ── 7. Caller voice detection ─────────────────────────────────────────
|
|
418
|
+
const isCallerVoice = CALLER_PATTERNS.some(re => re.test(text));
|
|
419
|
+
const isNormalVoice = NON_CALLER_PATTERNS.some(re => re.test(text));
|
|
420
|
+
|
|
421
|
+
if (isCallerVoice) {
|
|
422
|
+
this._setCallerEffect(true);
|
|
423
|
+
} else if (isNormalVoice) {
|
|
424
|
+
this._setCallerEffect(false);
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
// ── 8. Music keyword detection ────────────────────────────────────────
|
|
428
|
+
if (MUSIC_KEYWORDS_RE.test(text) && !this._callerEffectActive) {
|
|
429
|
+
this._syncMusicWithServer();
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
// ── 9. Commercial keyword detection ───────────────────────────────────
|
|
433
|
+
if (COMMERCIAL_KEYWORDS_RE.test(text) && !this._commercialPlaying) {
|
|
434
|
+
this._playCommercial();
|
|
435
|
+
}
|
|
436
|
+
},
|
|
437
|
+
|
|
438
|
+
// ── Client Tools ──────────────────────────────────────────────────────────
|
|
439
|
+
|
|
440
|
+
/**
|
|
441
|
+
* Client tool: dj_soundboard
|
|
442
|
+
* Plays sound effects in the browser silently (no spoken words).
|
|
443
|
+
*/
|
|
444
|
+
async _clientToolDJSoundboard(parameters) {
|
|
445
|
+
const action = parameters.action || 'list';
|
|
446
|
+
const sound = parameters.sound || '';
|
|
447
|
+
|
|
448
|
+
if (action === 'play' && sound) {
|
|
449
|
+
await this._playDJSound(sound);
|
|
450
|
+
const desc = sound.replace(/_/g, ' ');
|
|
451
|
+
return `*${desc}* 🎵`;
|
|
452
|
+
}
|
|
453
|
+
if (action === 'list') {
|
|
454
|
+
return JSON.stringify(DJ_SOUND_LIST);
|
|
455
|
+
}
|
|
456
|
+
return 'Unknown action';
|
|
457
|
+
},
|
|
458
|
+
|
|
459
|
+
/**
|
|
460
|
+
* Client tool: caller_sounds
|
|
461
|
+
* Plays dial tone (double beep) before voice switch to caller persona.
|
|
462
|
+
* Critical timing: must fire BEFORE the caller XML voice tag.
|
|
463
|
+
*/
|
|
464
|
+
async _clientToolCallerSounds(parameters) {
|
|
465
|
+
const action = parameters.action || 'play';
|
|
466
|
+
const sound = parameters.sound || 'dial_tone';
|
|
467
|
+
|
|
468
|
+
if (action === 'list') {
|
|
469
|
+
return JSON.stringify(['dial_tone', 'ring', 'pickup', 'hangup']);
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
if (action === 'play') {
|
|
473
|
+
await this._playCallerSound(sound);
|
|
474
|
+
return `*Phone sound: ${sound}* 📞`;
|
|
475
|
+
}
|
|
476
|
+
return 'Unknown action';
|
|
477
|
+
},
|
|
478
|
+
|
|
479
|
+
// ── Audio Pipeline ────────────────────────────────────────────────────────
|
|
480
|
+
|
|
481
|
+
/**
|
|
482
|
+
* Create the Web Audio API context and caller phone effect filter chain.
|
|
483
|
+
*
|
|
484
|
+
* Chain: HighPass(500Hz) → LowPass(2200Hz) → PeakingEQ(1200Hz, +6dB)
|
|
485
|
+
* → Compressor(-30dB, 16:1) → WaveShaper(25) → Gain(0.7) → Destination
|
|
486
|
+
*
|
|
487
|
+
* Source: ai-eyes/index.html lines 5456-5629
|
|
488
|
+
*/
|
|
489
|
+
_initAudioPipeline() {
|
|
490
|
+
if (this._audioContext) return;
|
|
491
|
+
|
|
492
|
+
try {
|
|
493
|
+
this._audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
|
494
|
+
const ctx = this._audioContext;
|
|
495
|
+
|
|
496
|
+
// ── Effect chain nodes ──────────────────────────────────────────────
|
|
497
|
+
const highPass = ctx.createBiquadFilter();
|
|
498
|
+
highPass.type = 'highpass';
|
|
499
|
+
highPass.frequency.value = 500;
|
|
500
|
+
highPass.Q.value = 1.5;
|
|
501
|
+
|
|
502
|
+
const lowPass = ctx.createBiquadFilter();
|
|
503
|
+
lowPass.type = 'lowpass';
|
|
504
|
+
lowPass.frequency.value = 2200;
|
|
505
|
+
lowPass.Q.value = 1.5;
|
|
506
|
+
|
|
507
|
+
const midBoost = ctx.createBiquadFilter();
|
|
508
|
+
midBoost.type = 'peaking';
|
|
509
|
+
midBoost.frequency.value = 1200;
|
|
510
|
+
midBoost.gain.value = 6;
|
|
511
|
+
|
|
512
|
+
const compressor = ctx.createDynamicsCompressor();
|
|
513
|
+
compressor.threshold.value = -30;
|
|
514
|
+
compressor.ratio.value = 16;
|
|
515
|
+
compressor.attack.value = 0.002;
|
|
516
|
+
compressor.release.value = 0.2;
|
|
517
|
+
|
|
518
|
+
// WaveShaper distortion
|
|
519
|
+
const distortion = ctx.createWaveShaper();
|
|
520
|
+
distortion.curve = this._makeDistortionCurve(25);
|
|
521
|
+
|
|
522
|
+
const outputGain = ctx.createGain();
|
|
523
|
+
outputGain.gain.value = 0.7;
|
|
524
|
+
|
|
525
|
+
// Chain: highPass → lowPass → midBoost → compressor → distortion → outputGain
|
|
526
|
+
highPass.connect(lowPass);
|
|
527
|
+
lowPass.connect(midBoost);
|
|
528
|
+
midBoost.connect(compressor);
|
|
529
|
+
compressor.connect(distortion);
|
|
530
|
+
distortion.connect(outputGain);
|
|
531
|
+
outputGain.connect(ctx.destination);
|
|
532
|
+
|
|
533
|
+
// Bypass gain (direct path — used when caller effect is off)
|
|
534
|
+
const bypassGain = ctx.createGain();
|
|
535
|
+
bypassGain.gain.value = 1;
|
|
536
|
+
bypassGain.connect(ctx.destination);
|
|
537
|
+
|
|
538
|
+
this._callerNodes = {
|
|
539
|
+
input: highPass,
|
|
540
|
+
output: outputGain,
|
|
541
|
+
bypassGain,
|
|
542
|
+
effectOutput: outputGain,
|
|
543
|
+
};
|
|
544
|
+
} catch (err) {
|
|
545
|
+
console.warn('[ElevenLabsClassic] AudioContext init failed:', err);
|
|
546
|
+
}
|
|
547
|
+
},
|
|
548
|
+
|
|
549
|
+
/**
|
|
550
|
+
* MutationObserver — hooks unnamed <audio> elements ElevenLabs creates for TTS.
|
|
551
|
+
* ElevenLabs creates a new <audio> element per TTS chunk; we intercept each one.
|
|
552
|
+
*/
|
|
553
|
+
_initAudioObserver() {
|
|
554
|
+
if (this._audioObserver) return;
|
|
555
|
+
|
|
556
|
+
this._audioObserver = new MutationObserver((mutations) => {
|
|
557
|
+
mutations.forEach((mutation) => {
|
|
558
|
+
mutation.addedNodes.forEach((node) => {
|
|
559
|
+
if (node.tagName === 'AUDIO' && !node.id && !node.dataset.callerHooked) {
|
|
560
|
+
this._hookElevenLabsAudio(node);
|
|
561
|
+
}
|
|
562
|
+
});
|
|
563
|
+
});
|
|
564
|
+
});
|
|
565
|
+
|
|
566
|
+
this._audioObserver.observe(document.body, { childList: true, subtree: true });
|
|
567
|
+
},
|
|
568
|
+
|
|
569
|
+
/**
|
|
570
|
+
* Hook a single ElevenLabs TTS <audio> element into the Web Audio API chain.
|
|
571
|
+
* Source: ai-eyes/index.html lines 5548-5583
|
|
572
|
+
*/
|
|
573
|
+
_hookElevenLabsAudio(audioElement) {
|
|
574
|
+
if (!this._audioContext || !this._callerNodes) return;
|
|
575
|
+
|
|
576
|
+
try {
|
|
577
|
+
audioElement.dataset.callerHooked = 'true';
|
|
578
|
+
this._elevenLabsSource = this._audioContext.createMediaElementSource(audioElement);
|
|
579
|
+
|
|
580
|
+
// Default route: through bypass (direct to destination)
|
|
581
|
+
this._elevenLabsSource.connect(this._callerNodes.bypassGain);
|
|
582
|
+
|
|
583
|
+
// If caller effect is already active when a new chunk arrives, re-route immediately
|
|
584
|
+
if (this._callerEffectActive) {
|
|
585
|
+
this._elevenLabsSource.disconnect();
|
|
586
|
+
this._elevenLabsSource.connect(this._callerNodes.input);
|
|
587
|
+
}
|
|
588
|
+
} catch (err) {
|
|
589
|
+
// AudioContext limit: one createMediaElementSource per element
|
|
590
|
+
// ElevenLabs sometimes reuses elements; log and skip
|
|
591
|
+
console.warn('[ElevenLabsClassic] hookElevenLabsAudio failed:', err);
|
|
592
|
+
}
|
|
593
|
+
},
|
|
594
|
+
|
|
595
|
+
/**
|
|
596
|
+
* Enable or disable the caller phone filter effect.
|
|
597
|
+
* Source: ai-eyes/index.html lines 5584-5610
|
|
598
|
+
*/
|
|
599
|
+
_setCallerEffect(enabled) {
|
|
600
|
+
this._callerEffectActive = enabled;
|
|
601
|
+
this._bridge.emit(AgentEvents.CALLER_EFFECT, { enabled });
|
|
602
|
+
|
|
603
|
+
if (!this._elevenLabsSource || !this._callerNodes) return;
|
|
604
|
+
|
|
605
|
+
try {
|
|
606
|
+
this._elevenLabsSource.disconnect();
|
|
607
|
+
if (enabled) {
|
|
608
|
+
this._elevenLabsSource.connect(this._callerNodes.input);
|
|
609
|
+
} else {
|
|
610
|
+
this._elevenLabsSource.connect(this._callerNodes.bypassGain);
|
|
611
|
+
}
|
|
612
|
+
} catch (err) {
|
|
613
|
+
console.warn('[ElevenLabsClassic] setCallerEffect failed:', err);
|
|
614
|
+
}
|
|
615
|
+
},
|
|
616
|
+
|
|
617
|
+
/**
|
|
618
|
+
* Build a WaveShaper distortion curve.
|
|
619
|
+
* @param {number} amount — 0 (clean) to 400 (heavy)
|
|
620
|
+
*/
|
|
621
|
+
_makeDistortionCurve(amount) {
|
|
622
|
+
const n_samples = 256;
|
|
623
|
+
const curve = new Float32Array(n_samples);
|
|
624
|
+
const deg = Math.PI / 180;
|
|
625
|
+
for (let i = 0; i < n_samples; ++i) {
|
|
626
|
+
const x = (i * 2) / n_samples - 1;
|
|
627
|
+
curve[i] = ((3 + amount) * x * 20 * deg) / (Math.PI + amount * Math.abs(x));
|
|
628
|
+
}
|
|
629
|
+
return curve;
|
|
630
|
+
},
|
|
631
|
+
|
|
632
|
+
// ── DJ Sounds ─────────────────────────────────────────────────────────────
|
|
633
|
+
|
|
634
|
+
/**
|
|
635
|
+
* Preload frequently used DJ sounds as blob URLs so they can play instantly.
|
|
636
|
+
* Source: ai-eyes/index.html (sounds preloaded on page load).
|
|
637
|
+
*/
|
|
638
|
+
async _preloadDJSounds() {
|
|
639
|
+
const serverUrl = this._config.serverUrl || '';
|
|
640
|
+
const toPreload = [
|
|
641
|
+
'air_horn', 'scratch_long', 'crowd_cheer', 'crowd_hype',
|
|
642
|
+
'rewind', 'yeah', 'laser', 'lets_go', 'impact', 'record_stop',
|
|
643
|
+
];
|
|
644
|
+
|
|
645
|
+
await Promise.allSettled(
|
|
646
|
+
toPreload.map(async (name) => {
|
|
647
|
+
try {
|
|
648
|
+
const res = await fetch(`${serverUrl}/sounds/dj/${name}.mp3`);
|
|
649
|
+
if (res.ok) {
|
|
650
|
+
const blob = await res.blob();
|
|
651
|
+
this._djSoundCache[name] = URL.createObjectURL(blob);
|
|
652
|
+
}
|
|
653
|
+
} catch (_) { /* non-fatal: will fall back to direct URL */ }
|
|
654
|
+
})
|
|
655
|
+
);
|
|
656
|
+
},
|
|
657
|
+
|
|
658
|
+
/**
|
|
659
|
+
* Play a DJ sound silently (no spoken words from agent).
|
|
660
|
+
* Uses preloaded blob URL if available, otherwise falls back to server URL.
|
|
661
|
+
*/
|
|
662
|
+
async _playDJSound(soundName) {
|
|
663
|
+
const serverUrl = this._config.serverUrl || '';
|
|
664
|
+
const src = this._djSoundCache[soundName]
|
|
665
|
+
|| `${serverUrl}/sounds/dj/${soundName}.mp3`;
|
|
666
|
+
|
|
667
|
+
const audio = new Audio(src);
|
|
668
|
+
audio.volume = 1.0;
|
|
669
|
+
try {
|
|
670
|
+
await audio.play();
|
|
671
|
+
} catch (err) {
|
|
672
|
+
console.warn(`[ElevenLabsClassic] playDJSound(${soundName}) failed:`, err);
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
this._bridge.emit(AgentEvents.PLAY_SOUND, { sound: soundName, type: 'dj' });
|
|
676
|
+
},
|
|
677
|
+
|
|
678
|
+
/**
|
|
679
|
+
* Play a caller phone sound (dial tone = double beep with 400ms gap).
|
|
680
|
+
* 5-second cooldown prevents spam.
|
|
681
|
+
*/
|
|
682
|
+
async _playCallerSound(sound) {
|
|
683
|
+
if (this._callerSoundCooldown) return;
|
|
684
|
+
|
|
685
|
+
this._callerSoundCooldown = true;
|
|
686
|
+
setTimeout(() => { this._callerSoundCooldown = false; }, 5000);
|
|
687
|
+
|
|
688
|
+
const serverUrl = this._config.serverUrl || '';
|
|
689
|
+
const src = `${serverUrl}/sounds/caller/${sound}.mp3`;
|
|
690
|
+
|
|
691
|
+
this._bridge.emit(AgentEvents.PLAY_SOUND, { sound, type: 'caller' });
|
|
692
|
+
|
|
693
|
+
if (sound === 'dial_tone') {
|
|
694
|
+
// Double-beep with 400ms gap
|
|
695
|
+
for (let i = 0; i < 2; i++) {
|
|
696
|
+
if (i > 0) await this._sleep(400);
|
|
697
|
+
const audio = new Audio(src);
|
|
698
|
+
try { await audio.play(); } catch (_) { /* ignore */ }
|
|
699
|
+
await this._sleep(800); // wait for beep to finish
|
|
700
|
+
}
|
|
701
|
+
} else {
|
|
702
|
+
const audio = new Audio(src);
|
|
703
|
+
try { await audio.play(); } catch (_) { /* ignore */ }
|
|
704
|
+
}
|
|
705
|
+
},
|
|
706
|
+
|
|
707
|
+
// ── Music Sync ────────────────────────────────────────────────────────────
|
|
708
|
+
|
|
709
|
+
/**
|
|
710
|
+
* Sync music with server state (2-second debounce).
|
|
711
|
+
* Sends a MUSIC_SYNC event for the shell's MusicPlayer to handle via /api/music?action=sync.
|
|
712
|
+
*
|
|
713
|
+
* Source: ai-eyes/index.html lines 5087-5165
|
|
714
|
+
*/
|
|
715
|
+
_syncMusicWithServer() {
|
|
716
|
+
const now = Date.now();
|
|
717
|
+
if (now - this._lastSyncTime < 2000) return; // 2s debounce
|
|
718
|
+
if (this._callerEffectActive) return; // Block sync during caller skits
|
|
719
|
+
if (this._commercialPlaying) return; // Block sync during commercials
|
|
720
|
+
|
|
721
|
+
this._lastSyncTime = now;
|
|
722
|
+
this._bridge.emit(AgentEvents.MUSIC_SYNC);
|
|
723
|
+
|
|
724
|
+
// Auto-clear lastSyncedTrack every 30 seconds
|
|
725
|
+
clearTimeout(this._syncClearTimer);
|
|
726
|
+
this._syncClearTimer = setTimeout(() => {
|
|
727
|
+
this._lastSyncedTrack = null;
|
|
728
|
+
}, 30000);
|
|
729
|
+
},
|
|
730
|
+
|
|
731
|
+
// ── DJ Transition Alert ───────────────────────────────────────────────────
|
|
732
|
+
|
|
733
|
+
/**
|
|
734
|
+
* Called by the shell's MusicPlayer when a track has ≤12 seconds remaining.
|
|
735
|
+
* Sends a context update so the agent can announce the next track.
|
|
736
|
+
*
|
|
737
|
+
* Wire up: shell should call adapter.onTrackEndingSoon() via bridge or direct call.
|
|
738
|
+
* Source: ai-eyes/index.html lines 3918-3941
|
|
739
|
+
*/
|
|
740
|
+
onTrackEndingSoon() {
|
|
741
|
+
if (this._djTransitionTriggered || !this._conversation) return;
|
|
742
|
+
this._djTransitionTriggered = true;
|
|
743
|
+
|
|
744
|
+
this._sendContextUpdate('[DJ INFO: track ending in 10s]');
|
|
745
|
+
this._sendForceMessage('[SYSTEM: Song ending! Announce next and call play_music action=skip!]');
|
|
746
|
+
},
|
|
747
|
+
|
|
748
|
+
/**
|
|
749
|
+
* Called by shell when a track ends completely.
|
|
750
|
+
* Resets the DJ transition flag.
|
|
751
|
+
*/
|
|
752
|
+
onTrackEnded() {
|
|
753
|
+
this._djTransitionTriggered = false;
|
|
754
|
+
},
|
|
755
|
+
|
|
756
|
+
// ── Commercial System ─────────────────────────────────────────────────────
|
|
757
|
+
|
|
758
|
+
/**
|
|
759
|
+
* Play a commercial break:
|
|
760
|
+
* 1. Stop music
|
|
761
|
+
* 2. Fetch /api/commercials?action=play
|
|
762
|
+
* 3. Play the returned audio
|
|
763
|
+
* 4. On end, notify agent to resume
|
|
764
|
+
*
|
|
765
|
+
* Source: ai-eyes/index.html lines 2318-2400
|
|
766
|
+
*/
|
|
767
|
+
async _playCommercial() {
|
|
768
|
+
if (this._commercialPlaying) return;
|
|
769
|
+
this._commercialPlaying = true;
|
|
770
|
+
|
|
771
|
+
const serverUrl = this._config.serverUrl || '';
|
|
772
|
+
|
|
773
|
+
// Stop music first
|
|
774
|
+
this._bridge.emit(AgentEvents.MUSIC_PLAY, { action: 'stop' });
|
|
775
|
+
|
|
776
|
+
try {
|
|
777
|
+
const res = await fetch(`${serverUrl}/api/commercials?action=play`);
|
|
778
|
+
const data = await res.json();
|
|
779
|
+
|
|
780
|
+
if (data.url) {
|
|
781
|
+
this._commercialPlayer = new Audio(data.url);
|
|
782
|
+
|
|
783
|
+
// Tell agent to stay quiet during ad
|
|
784
|
+
this._sendContextUpdate('[DJ INFO: Commercial playing, stay quiet]');
|
|
785
|
+
|
|
786
|
+
// Confirm started on server
|
|
787
|
+
await fetch(`${serverUrl}/api/commercials?action=confirm_started`);
|
|
788
|
+
|
|
789
|
+
this._commercialPlayer.addEventListener('ended', async () => {
|
|
790
|
+
this._commercialPlaying = false;
|
|
791
|
+
await fetch(`${serverUrl}/api/commercials?action=ended`);
|
|
792
|
+
this._sendForceMessage("[SYSTEM: Commercial over! Say we're back and play next!");
|
|
793
|
+
});
|
|
794
|
+
|
|
795
|
+
await this._commercialPlayer.play();
|
|
796
|
+
} else {
|
|
797
|
+
this._commercialPlaying = false;
|
|
798
|
+
}
|
|
799
|
+
} catch (err) {
|
|
800
|
+
console.warn('[ElevenLabsClassic] playCommercial failed:', err);
|
|
801
|
+
this._commercialPlaying = false;
|
|
802
|
+
}
|
|
803
|
+
},
|
|
804
|
+
|
|
805
|
+
// ── ElevenLabs context injection ──────────────────────────────────────────
|
|
806
|
+
|
|
807
|
+
/**
|
|
808
|
+
* Send a contextual update to the ElevenLabs agent (silent background info).
|
|
809
|
+
* @param {string} text
|
|
810
|
+
*/
|
|
811
|
+
_sendContextUpdate(text) {
|
|
812
|
+
if (!this._conversation) return;
|
|
813
|
+
try {
|
|
814
|
+
this._conversation.sendContextualUpdate(text);
|
|
815
|
+
} catch (err) {
|
|
816
|
+
console.warn('[ElevenLabsClassic] sendContextualUpdate failed:', err);
|
|
817
|
+
}
|
|
818
|
+
},
|
|
819
|
+
|
|
820
|
+
/**
|
|
821
|
+
* Send a forced SYSTEM message the agent must act on.
|
|
822
|
+
* @param {string} text
|
|
823
|
+
*/
|
|
824
|
+
_sendForceMessage(text) {
|
|
825
|
+
if (!this._conversation) return;
|
|
826
|
+
try {
|
|
827
|
+
this._conversation.sendUserMessage(text);
|
|
828
|
+
} catch (err) {
|
|
829
|
+
console.warn('[ElevenLabsClassic] sendForceMessage failed:', err);
|
|
830
|
+
}
|
|
831
|
+
},
|
|
832
|
+
|
|
833
|
+
// ── Utilities ─────────────────────────────────────────────────────────────
|
|
834
|
+
|
|
835
|
+
_sleep(ms) {
|
|
836
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
837
|
+
},
|
|
838
|
+
};
|
|
839
|
+
|
|
840
|
+
export default ElevenLabsClassicAdapter;
|
|
841
|
+
export { ElevenLabsClassicAdapter };
|