openvoiceui 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +104 -0
- package/Dockerfile +30 -0
- package/LICENSE +21 -0
- package/README.md +638 -0
- package/SETUP.md +360 -0
- package/app.py +232 -0
- package/auto-approve-devices.js +111 -0
- package/cli/index.js +372 -0
- package/config/__init__.py +4 -0
- package/config/default.yaml +43 -0
- package/config/flags.yaml +67 -0
- package/config/loader.py +203 -0
- package/config/providers.yaml +71 -0
- package/config/speech_normalization.yaml +182 -0
- package/config/theme.json +4 -0
- package/data/greetings.json +25 -0
- package/default-pages/ai-image-creator.html +915 -0
- package/default-pages/bulk-image-uploader.html +492 -0
- package/default-pages/desktop.html +2865 -0
- package/default-pages/file-explorer.html +854 -0
- package/default-pages/interactive-map.html +655 -0
- package/default-pages/style-guide.html +1005 -0
- package/default-pages/website-setup.html +1623 -0
- package/deploy/openclaw/Dockerfile +46 -0
- package/deploy/openvoiceui.service +30 -0
- package/deploy/setup-nginx.sh +50 -0
- package/deploy/setup-sudo.sh +306 -0
- package/deploy/skill-runner/Dockerfile +19 -0
- package/deploy/skill-runner/requirements.txt +14 -0
- package/deploy/skill-runner/server.py +269 -0
- package/deploy/supertonic/Dockerfile +22 -0
- package/deploy/supertonic/server.py +79 -0
- package/docker-compose.pinokio.yml +11 -0
- package/docker-compose.yml +59 -0
- package/greetings.json +25 -0
- package/index.html +65 -0
- package/inject-device-identity.js +142 -0
- package/package.json +82 -0
- package/profiles/default.json +114 -0
- package/profiles/manager.py +354 -0
- package/profiles/schema.json +337 -0
- package/prompts/voice-system-prompt.md +149 -0
- package/providers/__init__.py +39 -0
- package/providers/base.py +63 -0
- package/providers/llm/__init__.py +12 -0
- package/providers/llm/base.py +71 -0
- package/providers/llm/clawdbot_provider.py +112 -0
- package/providers/llm/zai_provider.py +115 -0
- package/providers/registry.py +320 -0
- package/providers/stt/__init__.py +12 -0
- package/providers/stt/base.py +58 -0
- package/providers/stt/webspeech_provider.py +49 -0
- package/providers/stt/whisper_provider.py +100 -0
- package/providers/tts/__init__.py +20 -0
- package/providers/tts/base.py +91 -0
- package/providers/tts/groq_provider.py +74 -0
- package/providers/tts/supertonic_provider.py +72 -0
- package/requirements.txt +38 -0
- package/routes/__init__.py +10 -0
- package/routes/admin.py +515 -0
- package/routes/canvas.py +1315 -0
- package/routes/chat.py +51 -0
- package/routes/conversation.py +2158 -0
- package/routes/elevenlabs_hybrid.py +306 -0
- package/routes/greetings.py +98 -0
- package/routes/icons.py +279 -0
- package/routes/image_gen.py +364 -0
- package/routes/instructions.py +190 -0
- package/routes/music.py +838 -0
- package/routes/onboarding.py +43 -0
- package/routes/pi.py +62 -0
- package/routes/profiles.py +215 -0
- package/routes/report_issue.py +68 -0
- package/routes/static_files.py +533 -0
- package/routes/suno.py +664 -0
- package/routes/theme.py +81 -0
- package/routes/transcripts.py +199 -0
- package/routes/vision.py +348 -0
- package/routes/workspace.py +288 -0
- package/server.py +1510 -0
- package/services/__init__.py +1 -0
- package/services/auth.py +143 -0
- package/services/canvas_versioning.py +239 -0
- package/services/db_pool.py +107 -0
- package/services/gateway.py +16 -0
- package/services/gateway_manager.py +333 -0
- package/services/gateways/__init__.py +12 -0
- package/services/gateways/base.py +110 -0
- package/services/gateways/compat.py +264 -0
- package/services/gateways/openclaw.py +1134 -0
- package/services/health.py +100 -0
- package/services/memory_client.py +455 -0
- package/services/paths.py +26 -0
- package/services/speech_normalizer.py +285 -0
- package/services/tts.py +270 -0
- package/setup-config.js +262 -0
- package/sounds/air_horn.mp3 +0 -0
- package/sounds/bruh.mp3 +0 -0
- package/sounds/crowd_cheer.mp3 +0 -0
- package/sounds/gunshot.mp3 +0 -0
- package/sounds/impact.mp3 +0 -0
- package/sounds/lets_go.mp3 +0 -0
- package/sounds/record_stop.mp3 +0 -0
- package/sounds/rewind.mp3 +0 -0
- package/sounds/sad_trombone.mp3 +0 -0
- package/sounds/scratch_long.mp3 +0 -0
- package/sounds/yeah.mp3 +0 -0
- package/src/adapters/ClawdBotAdapter.js +264 -0
- package/src/adapters/_template.js +133 -0
- package/src/adapters/elevenlabs-classic.js +841 -0
- package/src/adapters/elevenlabs-hybrid.js +812 -0
- package/src/adapters/hume-evi.js +676 -0
- package/src/admin.html +1339 -0
- package/src/app.js +8802 -0
- package/src/core/Config.js +173 -0
- package/src/core/EmotionEngine.js +307 -0
- package/src/core/EventBridge.js +180 -0
- package/src/core/EventBus.js +117 -0
- package/src/core/VoiceSession.js +607 -0
- package/src/face/BaseFace.js +259 -0
- package/src/face/EyeFace.js +208 -0
- package/src/face/HaloSmokeFace.js +509 -0
- package/src/face/manifest.json +27 -0
- package/src/face/previews/eyes.svg +16 -0
- package/src/face/previews/orb.svg +29 -0
- package/src/features/MusicPlayer.js +620 -0
- package/src/features/Soundboard.js +128 -0
- package/src/providers/DeepgramSTT.js +472 -0
- package/src/providers/DeepgramStreamingSTT.js +766 -0
- package/src/providers/GroqSTT.js +559 -0
- package/src/providers/TTSPlayer.js +323 -0
- package/src/providers/WebSpeechSTT.js +479 -0
- package/src/providers/tts/BaseTTSProvider.js +81 -0
- package/src/providers/tts/HumeProvider.js +77 -0
- package/src/providers/tts/SupertonicProvider.js +174 -0
- package/src/providers/tts/index.js +140 -0
- package/src/shell/adapter-registry.js +154 -0
- package/src/shell/caller-bridge.js +35 -0
- package/src/shell/camera-bridge.js +28 -0
- package/src/shell/canvas-bridge.js +32 -0
- package/src/shell/commercial-bridge.js +44 -0
- package/src/shell/face-bridge.js +44 -0
- package/src/shell/music-bridge.js +60 -0
- package/src/shell/orchestrator.js +233 -0
- package/src/shell/profile-discovery.js +303 -0
- package/src/shell/sounds-bridge.js +28 -0
- package/src/shell/transcript-bridge.js +61 -0
- package/src/shell/waveform-bridge.js +33 -0
- package/src/styles/base.css +2862 -0
- package/src/styles/face.css +417 -0
- package/src/styles/pi-overrides.css +89 -0
- package/src/styles/theme-dark.css +67 -0
- package/src/test-tts.html +175 -0
- package/src/ui/AppShell.js +544 -0
- package/src/ui/ProfileSwitcher.js +228 -0
- package/src/ui/SessionControl.js +240 -0
- package/src/ui/face/FacePicker.js +195 -0
- package/src/ui/face/FaceRenderer.js +309 -0
- package/src/ui/settings/PlaylistEditor.js +366 -0
- package/src/ui/settings/SettingsPanel.css +684 -0
- package/src/ui/settings/SettingsPanel.js +419 -0
- package/src/ui/settings/TTSVoicePreview.js +210 -0
- package/src/ui/themes/ThemeManager.js +213 -0
- package/src/ui/visualizers/BaseVisualizer.js +29 -0
- package/src/ui/visualizers/PartyFXVisualizer.css +291 -0
- package/src/ui/visualizers/PartyFXVisualizer.js +637 -0
- package/static/emulators/jsdos/js-dos.css +1 -0
- package/static/emulators/jsdos/js-dos.js +22 -0
- package/static/favicon.svg +55 -0
- package/static/icons/apple-touch-icon.png +0 -0
- package/static/icons/favicon-32.png +0 -0
- package/static/icons/icon-192.png +0 -0
- package/static/icons/icon-512.png +0 -0
- package/static/install.html +449 -0
- package/static/manifest.json +26 -0
- package/static/sw.js +21 -0
- package/tts_providers/__init__.py +136 -0
- package/tts_providers/base_provider.py +319 -0
- package/tts_providers/groq_provider.py +155 -0
- package/tts_providers/hume_provider.py +226 -0
- package/tts_providers/providers_config.json +119 -0
- package/tts_providers/qwen3_provider.py +371 -0
- package/tts_providers/resemble_provider.py +315 -0
- package/tts_providers/supertonic_provider.py +557 -0
- package/tts_providers/supertonic_tts.py +399 -0
|
@@ -0,0 +1,812 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ElevenLabsHybridAdapter — Multi-Agent Framework adapter (P7-T5)
|
|
3
|
+
*
|
|
4
|
+
* Combines ElevenLabs voice quality with OpenClaw's full VPS control:
|
|
5
|
+
*
|
|
6
|
+
* ElevenLabs handles: STT (cloud), TTS (multi-voice), dj_soundboard,
|
|
7
|
+
* caller_sounds, caller phone effects, music sync,
|
|
8
|
+
* turn management.
|
|
9
|
+
*
|
|
10
|
+
* OpenClaw handles: LLM reasoning (GLM-4.7 via Clawdbot Gateway),
|
|
11
|
+
* canvas creation, file ops, code execution,
|
|
12
|
+
* memory, web search, VPS control — everything.
|
|
13
|
+
*
|
|
14
|
+
* The bridge: ElevenLabs is configured with a custom LLM endpoint
|
|
15
|
+
* (POST /api/elevenlabs-llm on our Flask server) instead of its built-in
|
|
16
|
+
* model. Our server receives the conversation context, forwards the last
|
|
17
|
+
* user message to OpenClaw via the persistent Gateway WebSocket, streams
|
|
18
|
+
* the response back to ElevenLabs in OpenAI-compatible SSE format.
|
|
19
|
+
*
|
|
20
|
+
* Canvas side-channel: OpenClaw embeds {canvas:present,url:...} markers in
|
|
21
|
+
* its responses. The server strips them from the spoken text before
|
|
22
|
+
* returning to ElevenLabs (so the agent doesn't read HTML aloud) and stores
|
|
23
|
+
* them in a queue. This adapter polls /api/canvas-pending every second and
|
|
24
|
+
* emits CANVAS_CMD events so the shell loads the iframe.
|
|
25
|
+
*
|
|
26
|
+
* Ref: future-dev-plans/16-ELEVENLABS-OPENCLAW-HYBRID.md
|
|
27
|
+
* Ref: future-dev-plans/15-ELEVENLABS-CLASSIC-AGENT.md (classic base)
|
|
28
|
+
* Ref: future-dev-plans/17-MULTI-AGENT-FRAMEWORK.md
|
|
29
|
+
*
|
|
30
|
+
* Adapter contract (same as all adapters):
|
|
31
|
+
* init(bridge, config) — called when mode is selected
|
|
32
|
+
* start() — called when user clicks call button
|
|
33
|
+
* stop() — graceful disconnect
|
|
34
|
+
* destroy() — full teardown on adapter switch
|
|
35
|
+
*
|
|
36
|
+
* Config shape:
|
|
37
|
+
* {
|
|
38
|
+
* agentId: string, // Hybrid ElevenLabs agent ID (custom LLM configured)
|
|
39
|
+
* serverUrl: string, // Flask server base URL
|
|
40
|
+
* musicPlayer: object, // optional — shared MusicPlayer instance
|
|
41
|
+
* pollInterval: number, // ms between canvas polls (default 1000)
|
|
42
|
+
* }
|
|
43
|
+
*/
|
|
44
|
+
|
|
45
|
+
import { AgentEvents, AgentActions } from '../core/EventBridge.js';
|
|
46
|
+
|
|
47
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
48
|
+
// Constants
|
|
49
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
50
|
+
|
|
51
|
+
const SDK_URL = 'https://cdn.jsdelivr.net/npm/@elevenlabs/client@latest/+esm';
|
|
52
|
+
|
|
53
|
+
/** DJ sound names the agent can play silently */
|
|
54
|
+
const DJ_SOUND_LIST = [
|
|
55
|
+
'air_horn', 'air_horn_long', 'siren', 'siren_woop', 'scratch', 'scratch_long',
|
|
56
|
+
'rewind', 'record_stop', 'whoosh', 'riser', 'bass_drop', 'impact',
|
|
57
|
+
'crowd_cheer', 'crowd_hype', 'applause', 'yeah', 'lets_go', 'laser',
|
|
58
|
+
'gunshot', 'explosion', 'vinyl_crackle',
|
|
59
|
+
];
|
|
60
|
+
|
|
61
|
+
/** Music keywords that trigger server-side music sync */
|
|
62
|
+
const MUSIC_KEYWORDS_RE = /\b(spinning|playing|next up|coming up|dropping|fire up|switching|change it up)\b/i;
|
|
63
|
+
|
|
64
|
+
/** Commercial keywords that trigger the ad break system */
|
|
65
|
+
const COMMERCIAL_KEYWORDS_RE = /\b(commercial|sponsor|ad break|word from our|brought to you)\b/i;
|
|
66
|
+
|
|
67
|
+
/** XML tags that indicate a caller persona (enable phone filter) */
|
|
68
|
+
const CALLER_PATTERNS = [
|
|
69
|
+
/<caller\s*1>/i,
|
|
70
|
+
/<caller\s*2>/i,
|
|
71
|
+
/<mike[\-\s]*voice>/i,
|
|
72
|
+
/<caller\s*voice>/i,
|
|
73
|
+
/<phone\s*voice>/i,
|
|
74
|
+
];
|
|
75
|
+
|
|
76
|
+
/** XML tags that indicate a non-caller persona (disable phone filter) */
|
|
77
|
+
const NON_CALLER_PATTERNS = [
|
|
78
|
+
/<radio\s*voice>/i,
|
|
79
|
+
/<kitt[\-\s]*voice>/i,
|
|
80
|
+
/<dj[\-\s]*soul>/i,
|
|
81
|
+
];
|
|
82
|
+
|
|
83
|
+
const DEFAULT_POLL_INTERVAL_MS = 1000;
|
|
84
|
+
|
|
85
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
86
|
+
// Adapter
|
|
87
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
88
|
+
|
|
89
|
+
const ElevenLabsHybridAdapter = {
|
|
90
|
+
|
|
91
|
+
// ── Identity & capabilities ───────────────────────────────────────────────
|
|
92
|
+
|
|
93
|
+
name: 'ElevenLabs + OpenClaw Hybrid',
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Feature flags: the shell shows/hides UI elements based on this array.
|
|
97
|
+
*
|
|
98
|
+
* Union of ElevenLabs Classic capabilities (voice/audio) + ClawdBot
|
|
99
|
+
* capabilities (VPS/canvas/files) — the best of both worlds.
|
|
100
|
+
*/
|
|
101
|
+
capabilities: [
|
|
102
|
+
// Voice (from ElevenLabs)
|
|
103
|
+
'multi_voice',
|
|
104
|
+
'dj_soundboard',
|
|
105
|
+
'caller_effects',
|
|
106
|
+
'caller_sounds',
|
|
107
|
+
'music_sync',
|
|
108
|
+
'commercials',
|
|
109
|
+
// Brain (from OpenClaw via custom LLM)
|
|
110
|
+
'canvas',
|
|
111
|
+
'vps_control',
|
|
112
|
+
'file_ops',
|
|
113
|
+
'code_execution',
|
|
114
|
+
],
|
|
115
|
+
|
|
116
|
+
// ── Private state ─────────────────────────────────────────────────────────
|
|
117
|
+
|
|
118
|
+
_bridge: null,
|
|
119
|
+
_config: null,
|
|
120
|
+
_conversation: null, // ElevenLabs Conversation session
|
|
121
|
+
_sdk: null, // { Conversation } from @elevenlabs/client
|
|
122
|
+
|
|
123
|
+
// Web Audio API caller phone effect chain
|
|
124
|
+
_audioContext: null,
|
|
125
|
+
_elevenLabsSource: null,
|
|
126
|
+
_callerNodes: null,
|
|
127
|
+
_callerEffectActive: false,
|
|
128
|
+
_audioObserver: null,
|
|
129
|
+
|
|
130
|
+
// Music sync debounce
|
|
131
|
+
_lastSyncTime: 0,
|
|
132
|
+
_syncClearTimer: null,
|
|
133
|
+
|
|
134
|
+
// DJ transition tracking
|
|
135
|
+
_djTransitionTriggered: false,
|
|
136
|
+
|
|
137
|
+
// Caller sounds cooldown
|
|
138
|
+
_callerSoundCooldown: false,
|
|
139
|
+
|
|
140
|
+
// Preloaded DJ sound blob URLs
|
|
141
|
+
_djSoundCache: {},
|
|
142
|
+
|
|
143
|
+
// Commercial state
|
|
144
|
+
_commercialPlaying: false,
|
|
145
|
+
_commercialPlayer: null,
|
|
146
|
+
|
|
147
|
+
// Canvas command polling (the hybrid side-channel)
|
|
148
|
+
_canvasPoller: null,
|
|
149
|
+
|
|
150
|
+
// Bridge / bus unsubscribe cleanup
|
|
151
|
+
_unsubscribers: [],
|
|
152
|
+
|
|
153
|
+
// ── Lifecycle ─────────────────────────────────────────────────────────────
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Initialize the adapter.
|
|
157
|
+
*
|
|
158
|
+
* Loads the ElevenLabs SDK, sets up the Web Audio API caller effect
|
|
159
|
+
* chain, starts canvas command polling, and subscribes to bridge actions.
|
|
160
|
+
*
|
|
161
|
+
* @param {import('../core/EventBridge.js').EventBridge} bridge
|
|
162
|
+
* @param {object} config
|
|
163
|
+
*/
|
|
164
|
+
async init(bridge, config) {
|
|
165
|
+
this._bridge = bridge;
|
|
166
|
+
this._config = config || {};
|
|
167
|
+
|
|
168
|
+
// Load ElevenLabs SDK from CDN
|
|
169
|
+
if (!this._sdk) {
|
|
170
|
+
try {
|
|
171
|
+
this._sdk = await import(SDK_URL);
|
|
172
|
+
} catch (err) {
|
|
173
|
+
console.error('[ElevenLabsHybrid] Failed to load SDK:', err);
|
|
174
|
+
bridge.emit(AgentEvents.ERROR, { message: 'Failed to load ElevenLabs SDK' });
|
|
175
|
+
return;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Build caller phone effect audio chain (identical to Classic adapter)
|
|
180
|
+
this._initAudioPipeline();
|
|
181
|
+
|
|
182
|
+
// Watch for unnamed <audio> elements that ElevenLabs creates for TTS
|
|
183
|
+
this._initAudioObserver();
|
|
184
|
+
|
|
185
|
+
// Preload frequently used DJ sounds as blob URLs (non-fatal if it fails)
|
|
186
|
+
this._preloadDJSounds();
|
|
187
|
+
|
|
188
|
+
// Start polling /api/canvas-pending for OpenClaw canvas side-channel
|
|
189
|
+
this._startCanvasPolling();
|
|
190
|
+
|
|
191
|
+
// Subscribe to UI→Agent bridge actions
|
|
192
|
+
this._unsubscribers.push(
|
|
193
|
+
bridge.on(AgentActions.END_SESSION, () => this.stop()),
|
|
194
|
+
bridge.on(AgentActions.CONTEXT_UPDATE, (d) => this._sendContextUpdate(d.text)),
|
|
195
|
+
bridge.on(AgentActions.FORCE_MESSAGE, (d) => this._sendForceMessage(d.text)),
|
|
196
|
+
);
|
|
197
|
+
},
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Start the conversation.
|
|
201
|
+
* Unlocks iOS AudioContext → requests+releases mic → starts ElevenLabs session.
|
|
202
|
+
*/
|
|
203
|
+
async start() {
|
|
204
|
+
if (!this._sdk) {
|
|
205
|
+
console.error('[ElevenLabsHybrid] SDK not loaded — cannot start');
|
|
206
|
+
return;
|
|
207
|
+
}
|
|
208
|
+
if (this._conversation) {
|
|
209
|
+
console.warn('[ElevenLabsHybrid] Already connected');
|
|
210
|
+
return;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// iOS: request mic then immediately release before startSession()
|
|
214
|
+
try {
|
|
215
|
+
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
216
|
+
stream.getTracks().forEach(t => t.stop());
|
|
217
|
+
} catch (err) {
|
|
218
|
+
console.warn('[ElevenLabsHybrid] Mic pre-unlock failed:', err);
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Resume AudioContext (must be triggered by a user gesture)
|
|
222
|
+
if (this._audioContext && this._audioContext.state === 'suspended') {
|
|
223
|
+
await this._audioContext.resume();
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
const agentId = this._config.agentId;
|
|
227
|
+
if (!agentId) {
|
|
228
|
+
this._bridge.emit(AgentEvents.ERROR, {
|
|
229
|
+
message: 'ElevenLabsHybrid: agentId not configured — set ELEVENLABS_HYBRID_AGENT_ID in .env',
|
|
230
|
+
});
|
|
231
|
+
return;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
try {
|
|
235
|
+
this._conversation = await this._sdk.Conversation.startSession({
|
|
236
|
+
agentId,
|
|
237
|
+
overrides: {},
|
|
238
|
+
// Only two client tools — OpenClaw handles everything else via custom LLM
|
|
239
|
+
clientTools: {
|
|
240
|
+
dj_soundboard: (params) => this._clientToolDJSoundboard(params),
|
|
241
|
+
caller_sounds: (params) => this._clientToolCallerSounds(params),
|
|
242
|
+
},
|
|
243
|
+
onConnect: () => this._onConnect(),
|
|
244
|
+
onDisconnect: () => this._onDisconnect(),
|
|
245
|
+
onError: (err) => this._onError(err),
|
|
246
|
+
onModeChange: (mode) => this._onModeChange(mode),
|
|
247
|
+
onMessage: (msg) => this._onMessage(msg),
|
|
248
|
+
});
|
|
249
|
+
} catch (err) {
|
|
250
|
+
console.error('[ElevenLabsHybrid] startSession failed:', err);
|
|
251
|
+
this._bridge.emit(AgentEvents.ERROR, { message: String(err) });
|
|
252
|
+
this._bridge.emit(AgentEvents.MOOD, { mood: 'sad' });
|
|
253
|
+
}
|
|
254
|
+
},
|
|
255
|
+
|
|
256
|
+
/** Stop conversation gracefully. */
|
|
257
|
+
async stop() {
|
|
258
|
+
if (this._conversation) {
|
|
259
|
+
try { await this._conversation.endSession(); } catch (_) { /* ignore */ }
|
|
260
|
+
this._conversation = null;
|
|
261
|
+
}
|
|
262
|
+
this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'idle' });
|
|
263
|
+
this._bridge.emit(AgentEvents.DISCONNECTED);
|
|
264
|
+
this._bridge.emit(AgentEvents.MOOD, { mood: 'neutral' });
|
|
265
|
+
},
|
|
266
|
+
|
|
267
|
+
/** Full teardown on adapter switch — MUST release all resources. */
|
|
268
|
+
async destroy() {
|
|
269
|
+
// Stop ElevenLabs session
|
|
270
|
+
if (this._conversation) {
|
|
271
|
+
try { await this._conversation.endSession(); } catch (_) { /* ignore */ }
|
|
272
|
+
this._conversation = null;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
// Stop canvas polling
|
|
276
|
+
this._stopCanvasPolling();
|
|
277
|
+
|
|
278
|
+
// Stop MutationObserver
|
|
279
|
+
if (this._audioObserver) {
|
|
280
|
+
this._audioObserver.disconnect();
|
|
281
|
+
this._audioObserver = null;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
// Close AudioContext
|
|
285
|
+
if (this._audioContext && this._audioContext.state !== 'closed') {
|
|
286
|
+
try { await this._audioContext.close(); } catch (_) { /* ignore */ }
|
|
287
|
+
}
|
|
288
|
+
this._audioContext = null;
|
|
289
|
+
this._elevenLabsSource = null;
|
|
290
|
+
this._callerNodes = null;
|
|
291
|
+
this._callerEffectActive = false;
|
|
292
|
+
|
|
293
|
+
// Revoke preloaded blob URLs
|
|
294
|
+
Object.values(this._djSoundCache).forEach(url => {
|
|
295
|
+
try { URL.revokeObjectURL(url); } catch (_) { /* ignore */ }
|
|
296
|
+
});
|
|
297
|
+
this._djSoundCache = {};
|
|
298
|
+
|
|
299
|
+
// Stop commercial player
|
|
300
|
+
if (this._commercialPlayer) {
|
|
301
|
+
this._commercialPlayer.pause();
|
|
302
|
+
this._commercialPlayer.src = '';
|
|
303
|
+
this._commercialPlayer = null;
|
|
304
|
+
}
|
|
305
|
+
this._commercialPlaying = false;
|
|
306
|
+
|
|
307
|
+
// Clear timers
|
|
308
|
+
clearTimeout(this._syncClearTimer);
|
|
309
|
+
|
|
310
|
+
// Unsubscribe bridge listeners
|
|
311
|
+
this._unsubscribers.forEach(fn => fn());
|
|
312
|
+
this._unsubscribers = [];
|
|
313
|
+
|
|
314
|
+
this._bridge = null;
|
|
315
|
+
this._config = null;
|
|
316
|
+
},
|
|
317
|
+
|
|
318
|
+
// ── ElevenLabs SDK callbacks ──────────────────────────────────────────────
|
|
319
|
+
|
|
320
|
+
_onConnect() {
|
|
321
|
+
console.log('[ElevenLabsHybrid] Connected (ElevenLabs SDK → custom LLM → OpenClaw)');
|
|
322
|
+
this._bridge.emit(AgentEvents.CONNECTED);
|
|
323
|
+
this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'listening' });
|
|
324
|
+
this._bridge.emit(AgentEvents.MOOD, { mood: 'happy' });
|
|
325
|
+
},
|
|
326
|
+
|
|
327
|
+
_onDisconnect() {
|
|
328
|
+
console.log('[ElevenLabsHybrid] Disconnected');
|
|
329
|
+
this._conversation = null;
|
|
330
|
+
this._bridge.emit(AgentEvents.DISCONNECTED);
|
|
331
|
+
this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'idle' });
|
|
332
|
+
this._bridge.emit(AgentEvents.MOOD, { mood: 'neutral' });
|
|
333
|
+
if (this._config.musicPlayer) {
|
|
334
|
+
this._config.musicPlayer.duck(false);
|
|
335
|
+
}
|
|
336
|
+
},
|
|
337
|
+
|
|
338
|
+
_onError(error) {
|
|
339
|
+
console.error('[ElevenLabsHybrid] Error:', error);
|
|
340
|
+
this._bridge.emit(AgentEvents.ERROR, { message: String(error) });
|
|
341
|
+
this._bridge.emit(AgentEvents.MOOD, { mood: 'sad' });
|
|
342
|
+
},
|
|
343
|
+
|
|
344
|
+
/** onModeChange fires when ElevenLabs switches speaking ↔ listening. */
|
|
345
|
+
_onModeChange({ mode }) {
|
|
346
|
+
if (mode === 'speaking') {
|
|
347
|
+
this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'speaking' });
|
|
348
|
+
this._bridge.emit(AgentEvents.TTS_PLAYING);
|
|
349
|
+
if (this._config.musicPlayer) {
|
|
350
|
+
this._config.musicPlayer.duck(true);
|
|
351
|
+
}
|
|
352
|
+
} else {
|
|
353
|
+
// listening
|
|
354
|
+
this._bridge.emit(AgentEvents.STATE_CHANGED, { state: 'listening' });
|
|
355
|
+
this._bridge.emit(AgentEvents.TTS_STOPPED);
|
|
356
|
+
this._bridge.emit(AgentEvents.MOOD, { mood: 'listening' });
|
|
357
|
+
if (this._config.musicPlayer) {
|
|
358
|
+
this._config.musicPlayer.duck(false);
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
},
|
|
362
|
+
|
|
363
|
+
/**
|
|
364
|
+
* onMessage handles ALL ElevenLabs agent messages and tool responses.
|
|
365
|
+
* Routing order mirrors ElevenLabsClassicAdapter._onMessage (APPENDIX A.2).
|
|
366
|
+
*
|
|
367
|
+
* Note: canvas commands are handled by the polling side-channel
|
|
368
|
+
* (_startCanvasPolling), NOT by text detection here — the server strips
|
|
369
|
+
* {canvas:...} markers from the spoken text before ElevenLabs sees it.
|
|
370
|
+
*/
|
|
371
|
+
_onMessage(message) {
|
|
372
|
+
// ── 1. Tool response detection ────────────────────────────────────────
|
|
373
|
+
let toolName = null;
|
|
374
|
+
let toolResult = null;
|
|
375
|
+
|
|
376
|
+
if (message.source === 'ai' && message.message?.toolResult) {
|
|
377
|
+
toolName = message.message.toolResult.toolName;
|
|
378
|
+
toolResult = message.message.toolResult.result;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
if (toolName) {
|
|
382
|
+
// dj_soundboard
|
|
383
|
+
if (toolName === 'dj_soundboard') {
|
|
384
|
+
try {
|
|
385
|
+
const parsed = JSON.parse(toolResult);
|
|
386
|
+
if (parsed.sound) this._playDJSound(parsed.sound);
|
|
387
|
+
} catch (_) { /* not JSON */ }
|
|
388
|
+
this._bridge.emit(AgentEvents.TOOL_CALLED, { name: toolName, params: {}, result: toolResult });
|
|
389
|
+
return;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// play_music
|
|
393
|
+
if (toolName === 'play_music') {
|
|
394
|
+
try {
|
|
395
|
+
const parsed = JSON.parse(toolResult);
|
|
396
|
+
const action = parsed.action || 'play';
|
|
397
|
+
if (action === 'stop') {
|
|
398
|
+
this._bridge.emit(AgentEvents.MUSIC_PLAY, { action: 'stop' });
|
|
399
|
+
} else if (action === 'pause') {
|
|
400
|
+
this._bridge.emit(AgentEvents.MUSIC_PLAY, { action: 'pause' });
|
|
401
|
+
} else {
|
|
402
|
+
this._syncMusicWithServer();
|
|
403
|
+
}
|
|
404
|
+
} catch (_) {
|
|
405
|
+
this._syncMusicWithServer();
|
|
406
|
+
}
|
|
407
|
+
this._bridge.emit(AgentEvents.TOOL_CALLED, { name: toolName, params: {}, result: toolResult });
|
|
408
|
+
return;
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
// play_commercial
|
|
412
|
+
if (toolName === 'play_commercial') {
|
|
413
|
+
this._playCommercial();
|
|
414
|
+
this._bridge.emit(AgentEvents.TOOL_CALLED, { name: toolName, params: {}, result: toolResult });
|
|
415
|
+
return;
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
// generate_song
|
|
419
|
+
if (toolName === 'generate_song') {
|
|
420
|
+
try {
|
|
421
|
+
const parsed = JSON.parse(toolResult);
|
|
422
|
+
if (parsed.song_id) {
|
|
423
|
+
this._bridge.emit(AgentEvents.MUSIC_PLAY, { action: 'generated', songId: parsed.song_id });
|
|
424
|
+
}
|
|
425
|
+
} catch (_) { /* ignore */ }
|
|
426
|
+
this._bridge.emit(AgentEvents.TOOL_CALLED, { name: toolName, params: {}, result: toolResult });
|
|
427
|
+
return;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
// Generic tool — show in ActionConsole
|
|
431
|
+
this._bridge.emit(AgentEvents.TOOL_CALLED, { name: toolName, params: {}, result: toolResult });
|
|
432
|
+
return;
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
// ── 2. Display in transcript ──────────────────────────────────────────
|
|
436
|
+
const text = message.message?.text || '';
|
|
437
|
+
if (text) {
|
|
438
|
+
const role = message.source === 'user' ? 'user' : 'assistant';
|
|
439
|
+
this._bridge.emit(AgentEvents.MESSAGE, { role, text, final: true });
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
if (message.source !== 'ai' || !text) return;
|
|
443
|
+
|
|
444
|
+
// ── 3. Caller voice detection ─────────────────────────────────────────
|
|
445
|
+
const isCallerVoice = CALLER_PATTERNS.some(re => re.test(text));
|
|
446
|
+
const isNormalVoice = NON_CALLER_PATTERNS.some(re => re.test(text));
|
|
447
|
+
|
|
448
|
+
if (isCallerVoice) {
|
|
449
|
+
this._setCallerEffect(true);
|
|
450
|
+
} else if (isNormalVoice) {
|
|
451
|
+
this._setCallerEffect(false);
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
// ── 4. Music keyword detection ────────────────────────────────────────
|
|
455
|
+
if (MUSIC_KEYWORDS_RE.test(text) && !this._callerEffectActive) {
|
|
456
|
+
this._syncMusicWithServer();
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
// ── 5. Commercial keyword detection ──────────────────────────────────
|
|
460
|
+
if (COMMERCIAL_KEYWORDS_RE.test(text) && !this._commercialPlaying) {
|
|
461
|
+
this._playCommercial();
|
|
462
|
+
}
|
|
463
|
+
},
|
|
464
|
+
|
|
465
|
+
// ── Canvas Side-Channel Polling ───────────────────────────────────────────
|
|
466
|
+
|
|
467
|
+
/**
|
|
468
|
+
* Poll /api/canvas-pending every second for canvas commands from OpenClaw.
|
|
469
|
+
*
|
|
470
|
+
* OpenClaw embeds {canvas:present,url:...} markers in responses. The
|
|
471
|
+
* custom LLM endpoint strips them from the spoken text and queues them
|
|
472
|
+
* server-side. We fetch and consume the queue here, emitting CANVAS_CMD
|
|
473
|
+
* events for the shell.
|
|
474
|
+
*
|
|
475
|
+
* Ref: doc 16 — "Canvas Integration in Hybrid Mode"
|
|
476
|
+
*/
|
|
477
|
+
_startCanvasPolling() {
|
|
478
|
+
if (this._canvasPoller) return;
|
|
479
|
+
|
|
480
|
+
const intervalMs = this._config.pollInterval || DEFAULT_POLL_INTERVAL_MS;
|
|
481
|
+
const serverUrl = this._config.serverUrl || '';
|
|
482
|
+
|
|
483
|
+
this._canvasPoller = setInterval(async () => {
|
|
484
|
+
try {
|
|
485
|
+
const resp = await fetch(`${serverUrl}/api/canvas-pending`);
|
|
486
|
+
if (!resp.ok) return;
|
|
487
|
+
const data = await resp.json();
|
|
488
|
+
const commands = data.commands || [];
|
|
489
|
+
for (const cmd of commands) {
|
|
490
|
+
if (cmd.action === 'present' && cmd.url) {
|
|
491
|
+
this._bridge.emit(AgentEvents.CANVAS_CMD, {
|
|
492
|
+
action: 'present',
|
|
493
|
+
url: cmd.url,
|
|
494
|
+
});
|
|
495
|
+
} else if (cmd.action === 'close') {
|
|
496
|
+
this._bridge.emit(AgentEvents.CANVAS_CMD, { action: 'close' });
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
} catch (_) {
|
|
500
|
+
// Non-fatal — server may not be running /api/canvas-pending yet
|
|
501
|
+
}
|
|
502
|
+
}, intervalMs);
|
|
503
|
+
},
|
|
504
|
+
|
|
505
|
+
_stopCanvasPolling() {
|
|
506
|
+
if (this._canvasPoller) {
|
|
507
|
+
clearInterval(this._canvasPoller);
|
|
508
|
+
this._canvasPoller = null;
|
|
509
|
+
}
|
|
510
|
+
},
|
|
511
|
+
|
|
512
|
+
// ── Client Tools ──────────────────────────────────────────────────────────
|
|
513
|
+
|
|
514
|
+
/** Client tool: dj_soundboard — plays sounds silently in browser. */
|
|
515
|
+
async _clientToolDJSoundboard(parameters) {
|
|
516
|
+
const action = parameters.action || 'list';
|
|
517
|
+
const sound = parameters.sound || '';
|
|
518
|
+
|
|
519
|
+
if (action === 'play' && sound) {
|
|
520
|
+
await this._playDJSound(sound);
|
|
521
|
+
return `*${sound.replace(/_/g, ' ')}* 🎵`;
|
|
522
|
+
}
|
|
523
|
+
if (action === 'list') {
|
|
524
|
+
return JSON.stringify(DJ_SOUND_LIST);
|
|
525
|
+
}
|
|
526
|
+
return 'Unknown action';
|
|
527
|
+
},
|
|
528
|
+
|
|
529
|
+
/**
|
|
530
|
+
* Client tool: caller_sounds — plays dial tone before caller voice switch.
|
|
531
|
+
* dial_tone = double beep with 400ms gap. 5s cooldown prevents spam.
|
|
532
|
+
*/
|
|
533
|
+
async _clientToolCallerSounds(parameters) {
|
|
534
|
+
const action = parameters.action || 'play';
|
|
535
|
+
const sound = parameters.sound || 'dial_tone';
|
|
536
|
+
|
|
537
|
+
if (action === 'list') {
|
|
538
|
+
return JSON.stringify(['dial_tone', 'ring', 'pickup', 'hangup']);
|
|
539
|
+
}
|
|
540
|
+
if (action === 'play') {
|
|
541
|
+
await this._playCallerSound(sound);
|
|
542
|
+
return `*Phone sound: ${sound}* 📞`;
|
|
543
|
+
}
|
|
544
|
+
return 'Unknown action';
|
|
545
|
+
},
|
|
546
|
+
|
|
547
|
+
// ── Audio Pipeline ────────────────────────────────────────────────────────
|
|
548
|
+
|
|
549
|
+
/**
|
|
550
|
+
* Build the Web Audio API context and caller phone effect filter chain.
|
|
551
|
+
*
|
|
552
|
+
* HighPass(500Hz) → LowPass(2200Hz) → PeakingEQ(1200Hz, +6dB)
|
|
553
|
+
* → Compressor(-30dB, 16:1) → WaveShaper(25) → Gain(0.7) → Destination
|
|
554
|
+
*
|
|
555
|
+
* Identical to ElevenLabsClassicAdapter — shared audio system design.
|
|
556
|
+
*/
|
|
557
|
+
_initAudioPipeline() {
|
|
558
|
+
if (this._audioContext) return;
|
|
559
|
+
|
|
560
|
+
try {
|
|
561
|
+
this._audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
|
562
|
+
const ctx = this._audioContext;
|
|
563
|
+
|
|
564
|
+
const highPass = ctx.createBiquadFilter();
|
|
565
|
+
highPass.type = 'highpass';
|
|
566
|
+
highPass.frequency.value = 500;
|
|
567
|
+
highPass.Q.value = 1.5;
|
|
568
|
+
|
|
569
|
+
const lowPass = ctx.createBiquadFilter();
|
|
570
|
+
lowPass.type = 'lowpass';
|
|
571
|
+
lowPass.frequency.value = 2200;
|
|
572
|
+
lowPass.Q.value = 1.5;
|
|
573
|
+
|
|
574
|
+
const midBoost = ctx.createBiquadFilter();
|
|
575
|
+
midBoost.type = 'peaking';
|
|
576
|
+
midBoost.frequency.value = 1200;
|
|
577
|
+
midBoost.gain.value = 6;
|
|
578
|
+
|
|
579
|
+
const compressor = ctx.createDynamicsCompressor();
|
|
580
|
+
compressor.threshold.value = -30;
|
|
581
|
+
compressor.ratio.value = 16;
|
|
582
|
+
compressor.attack.value = 0.002;
|
|
583
|
+
compressor.release.value = 0.2;
|
|
584
|
+
|
|
585
|
+
const distortion = ctx.createWaveShaper();
|
|
586
|
+
distortion.curve = this._makeDistortionCurve(25);
|
|
587
|
+
|
|
588
|
+
const outputGain = ctx.createGain();
|
|
589
|
+
outputGain.gain.value = 0.7;
|
|
590
|
+
|
|
591
|
+
// Chain: highPass → lowPass → midBoost → compressor → distortion → outputGain → dest
|
|
592
|
+
highPass.connect(lowPass);
|
|
593
|
+
lowPass.connect(midBoost);
|
|
594
|
+
midBoost.connect(compressor);
|
|
595
|
+
compressor.connect(distortion);
|
|
596
|
+
distortion.connect(outputGain);
|
|
597
|
+
outputGain.connect(ctx.destination);
|
|
598
|
+
|
|
599
|
+
const bypassGain = ctx.createGain();
|
|
600
|
+
bypassGain.gain.value = 1;
|
|
601
|
+
bypassGain.connect(ctx.destination);
|
|
602
|
+
|
|
603
|
+
this._callerNodes = { input: highPass, output: outputGain, bypassGain, effectOutput: outputGain };
|
|
604
|
+
} catch (err) {
|
|
605
|
+
console.warn('[ElevenLabsHybrid] AudioContext init failed:', err);
|
|
606
|
+
}
|
|
607
|
+
},
|
|
608
|
+
|
|
609
|
+
/** MutationObserver — hooks unnamed <audio> elements ElevenLabs creates for TTS. */
|
|
610
|
+
_initAudioObserver() {
|
|
611
|
+
if (this._audioObserver) return;
|
|
612
|
+
|
|
613
|
+
this._audioObserver = new MutationObserver((mutations) => {
|
|
614
|
+
mutations.forEach((mutation) => {
|
|
615
|
+
mutation.addedNodes.forEach((node) => {
|
|
616
|
+
if (node.tagName === 'AUDIO' && !node.id && !node.dataset.hybridHooked) {
|
|
617
|
+
this._hookElevenLabsAudio(node);
|
|
618
|
+
}
|
|
619
|
+
});
|
|
620
|
+
});
|
|
621
|
+
});
|
|
622
|
+
|
|
623
|
+
this._audioObserver.observe(document.body, { childList: true, subtree: true });
|
|
624
|
+
},
|
|
625
|
+
|
|
626
|
+
/** Hook a single ElevenLabs TTS <audio> element into the Web Audio API chain. */
|
|
627
|
+
_hookElevenLabsAudio(audioElement) {
|
|
628
|
+
if (!this._audioContext || !this._callerNodes) return;
|
|
629
|
+
try {
|
|
630
|
+
audioElement.dataset.hybridHooked = 'true';
|
|
631
|
+
this._elevenLabsSource = this._audioContext.createMediaElementSource(audioElement);
|
|
632
|
+
this._elevenLabsSource.connect(this._callerNodes.bypassGain);
|
|
633
|
+
|
|
634
|
+
if (this._callerEffectActive) {
|
|
635
|
+
this._elevenLabsSource.disconnect();
|
|
636
|
+
this._elevenLabsSource.connect(this._callerNodes.input);
|
|
637
|
+
}
|
|
638
|
+
} catch (err) {
|
|
639
|
+
console.warn('[ElevenLabsHybrid] hookElevenLabsAudio failed:', err);
|
|
640
|
+
}
|
|
641
|
+
},
|
|
642
|
+
|
|
643
|
+
/** Enable or disable the caller phone filter effect. */
|
|
644
|
+
_setCallerEffect(enabled) {
|
|
645
|
+
this._callerEffectActive = enabled;
|
|
646
|
+
this._bridge.emit(AgentEvents.CALLER_EFFECT, { enabled });
|
|
647
|
+
|
|
648
|
+
if (!this._elevenLabsSource || !this._callerNodes) return;
|
|
649
|
+
try {
|
|
650
|
+
this._elevenLabsSource.disconnect();
|
|
651
|
+
if (enabled) {
|
|
652
|
+
this._elevenLabsSource.connect(this._callerNodes.input);
|
|
653
|
+
} else {
|
|
654
|
+
this._elevenLabsSource.connect(this._callerNodes.bypassGain);
|
|
655
|
+
}
|
|
656
|
+
} catch (err) {
|
|
657
|
+
console.warn('[ElevenLabsHybrid] setCallerEffect failed:', err);
|
|
658
|
+
}
|
|
659
|
+
},
|
|
660
|
+
|
|
661
|
+
_makeDistortionCurve(amount) {
|
|
662
|
+
const n_samples = 256;
|
|
663
|
+
const curve = new Float32Array(n_samples);
|
|
664
|
+
const deg = Math.PI / 180;
|
|
665
|
+
for (let i = 0; i < n_samples; ++i) {
|
|
666
|
+
const x = (i * 2) / n_samples - 1;
|
|
667
|
+
curve[i] = ((3 + amount) * x * 20 * deg) / (Math.PI + amount * Math.abs(x));
|
|
668
|
+
}
|
|
669
|
+
return curve;
|
|
670
|
+
},
|
|
671
|
+
|
|
672
|
+
// ── DJ Sounds ─────────────────────────────────────────────────────────────
|
|
673
|
+
|
|
674
|
+
async _preloadDJSounds() {
|
|
675
|
+
const serverUrl = this._config.serverUrl || '';
|
|
676
|
+
const toPreload = [
|
|
677
|
+
'air_horn', 'scratch_long', 'crowd_cheer', 'crowd_hype',
|
|
678
|
+
'rewind', 'yeah', 'laser', 'lets_go', 'impact', 'record_stop',
|
|
679
|
+
];
|
|
680
|
+
await Promise.allSettled(
|
|
681
|
+
toPreload.map(async (name) => {
|
|
682
|
+
try {
|
|
683
|
+
const res = await fetch(`${serverUrl}/sounds/dj/${name}.mp3`);
|
|
684
|
+
if (res.ok) {
|
|
685
|
+
const blob = await res.blob();
|
|
686
|
+
this._djSoundCache[name] = URL.createObjectURL(blob);
|
|
687
|
+
}
|
|
688
|
+
} catch (_) { /* non-fatal */ }
|
|
689
|
+
})
|
|
690
|
+
);
|
|
691
|
+
},
|
|
692
|
+
|
|
693
|
+
async _playDJSound(soundName) {
|
|
694
|
+
const serverUrl = this._config.serverUrl || '';
|
|
695
|
+
const src = this._djSoundCache[soundName] || `${serverUrl}/sounds/dj/${soundName}.mp3`;
|
|
696
|
+
const audio = new Audio(src);
|
|
697
|
+
audio.volume = 1.0;
|
|
698
|
+
try { await audio.play(); } catch (err) {
|
|
699
|
+
console.warn(`[ElevenLabsHybrid] playDJSound(${soundName}) failed:`, err);
|
|
700
|
+
}
|
|
701
|
+
this._bridge.emit(AgentEvents.PLAY_SOUND, { sound: soundName, type: 'dj' });
|
|
702
|
+
},
|
|
703
|
+
|
|
704
|
+
async _playCallerSound(sound) {
|
|
705
|
+
if (this._callerSoundCooldown) return;
|
|
706
|
+
this._callerSoundCooldown = true;
|
|
707
|
+
setTimeout(() => { this._callerSoundCooldown = false; }, 5000);
|
|
708
|
+
|
|
709
|
+
const serverUrl = this._config.serverUrl || '';
|
|
710
|
+
const src = `${serverUrl}/sounds/caller/${sound}.mp3`;
|
|
711
|
+
this._bridge.emit(AgentEvents.PLAY_SOUND, { sound, type: 'caller' });
|
|
712
|
+
|
|
713
|
+
if (sound === 'dial_tone') {
|
|
714
|
+
for (let i = 0; i < 2; i++) {
|
|
715
|
+
if (i > 0) await this._sleep(400);
|
|
716
|
+
const audio = new Audio(src);
|
|
717
|
+
try { await audio.play(); } catch (_) { /* ignore */ }
|
|
718
|
+
await this._sleep(800);
|
|
719
|
+
}
|
|
720
|
+
} else {
|
|
721
|
+
const audio = new Audio(src);
|
|
722
|
+
try { await audio.play(); } catch (_) { /* ignore */ }
|
|
723
|
+
}
|
|
724
|
+
},
|
|
725
|
+
|
|
726
|
+
// ── Music Sync ────────────────────────────────────────────────────────────
|
|
727
|
+
|
|
728
|
+
/** Sync music with server state (2s debounce, blocked during caller/commercial). */
|
|
729
|
+
_syncMusicWithServer() {
|
|
730
|
+
const now = Date.now();
|
|
731
|
+
if (now - this._lastSyncTime < 2000) return;
|
|
732
|
+
if (this._callerEffectActive) return;
|
|
733
|
+
if (this._commercialPlaying) return;
|
|
734
|
+
|
|
735
|
+
this._lastSyncTime = now;
|
|
736
|
+
this._bridge.emit(AgentEvents.MUSIC_SYNC);
|
|
737
|
+
|
|
738
|
+
clearTimeout(this._syncClearTimer);
|
|
739
|
+
this._syncClearTimer = setTimeout(() => {}, 30000);
|
|
740
|
+
},
|
|
741
|
+
|
|
742
|
+
// ── DJ Transition Alert ───────────────────────────────────────────────────
|
|
743
|
+
|
|
744
|
+
/** Called by shell's MusicPlayer when a track has ≤12 seconds remaining. */
|
|
745
|
+
onTrackEndingSoon() {
|
|
746
|
+
if (this._djTransitionTriggered || !this._conversation) return;
|
|
747
|
+
this._djTransitionTriggered = true;
|
|
748
|
+
this._sendContextUpdate('[DJ INFO: track ending in 10s]');
|
|
749
|
+
this._sendForceMessage('[SYSTEM: Song ending! Announce next and call play_music action=skip!]');
|
|
750
|
+
},
|
|
751
|
+
|
|
752
|
+
onTrackEnded() {
|
|
753
|
+
this._djTransitionTriggered = false;
|
|
754
|
+
},
|
|
755
|
+
|
|
756
|
+
// ── Commercial System ─────────────────────────────────────────────────────
|
|
757
|
+
|
|
758
|
+
async _playCommercial() {
|
|
759
|
+
if (this._commercialPlaying) return;
|
|
760
|
+
this._commercialPlaying = true;
|
|
761
|
+
|
|
762
|
+
const serverUrl = this._config.serverUrl || '';
|
|
763
|
+
this._bridge.emit(AgentEvents.MUSIC_PLAY, { action: 'stop' });
|
|
764
|
+
|
|
765
|
+
try {
|
|
766
|
+
const res = await fetch(`${serverUrl}/api/commercials?action=play`);
|
|
767
|
+
const data = await res.json();
|
|
768
|
+
|
|
769
|
+
if (data.url) {
|
|
770
|
+
this._commercialPlayer = new Audio(data.url);
|
|
771
|
+
this._sendContextUpdate('[DJ INFO: Commercial playing, stay quiet]');
|
|
772
|
+
await fetch(`${serverUrl}/api/commercials?action=confirm_started`);
|
|
773
|
+
|
|
774
|
+
this._commercialPlayer.addEventListener('ended', async () => {
|
|
775
|
+
this._commercialPlaying = false;
|
|
776
|
+
await fetch(`${serverUrl}/api/commercials?action=ended`);
|
|
777
|
+
this._sendForceMessage("[SYSTEM: Commercial over! Say we're back and play next!");
|
|
778
|
+
});
|
|
779
|
+
|
|
780
|
+
await this._commercialPlayer.play();
|
|
781
|
+
} else {
|
|
782
|
+
this._commercialPlaying = false;
|
|
783
|
+
}
|
|
784
|
+
} catch (err) {
|
|
785
|
+
console.warn('[ElevenLabsHybrid] playCommercial failed:', err);
|
|
786
|
+
this._commercialPlaying = false;
|
|
787
|
+
}
|
|
788
|
+
},
|
|
789
|
+
|
|
790
|
+
// ── ElevenLabs context injection ──────────────────────────────────────────
|
|
791
|
+
|
|
792
|
+
_sendContextUpdate(text) {
|
|
793
|
+
if (!this._conversation) return;
|
|
794
|
+
try { this._conversation.sendContextualUpdate(text); }
|
|
795
|
+
catch (err) { console.warn('[ElevenLabsHybrid] sendContextualUpdate failed:', err); }
|
|
796
|
+
},
|
|
797
|
+
|
|
798
|
+
_sendForceMessage(text) {
|
|
799
|
+
if (!this._conversation) return;
|
|
800
|
+
try { this._conversation.sendUserMessage(text); }
|
|
801
|
+
catch (err) { console.warn('[ElevenLabsHybrid] sendForceMessage failed:', err); }
|
|
802
|
+
},
|
|
803
|
+
|
|
804
|
+
// ── Utilities ─────────────────────────────────────────────────────────────
|
|
805
|
+
|
|
806
|
+
_sleep(ms) {
|
|
807
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
808
|
+
},
|
|
809
|
+
};
|
|
810
|
+
|
|
811
|
+
export default ElevenLabsHybridAdapter;
|
|
812
|
+
export { ElevenLabsHybridAdapter };
|