aillom-vox-client 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +272 -0
  3. package/dist/AillomVox.d.ts +36 -0
  4. package/dist/AillomVox.js +152 -0
  5. package/dist/index.d.ts +2 -0
  6. package/dist/index.js +18 -0
  7. package/dist/types.d.ts +36 -0
  8. package/dist/types.js +2 -0
  9. package/docs/ASTERISK.md +411 -0
  10. package/docs/PROTOCOL.md +156 -0
  11. package/docs/PROVIDERS.md +40 -0
  12. package/docs/TOOLS.md +314 -0
  13. package/docs/TROUBLESHOOTING.md +86 -0
  14. package/docs/VOICES.md +219 -0
  15. package/docs/providers/AILLOMVOX.md +185 -0
  16. package/docs/providers/AWS.md +32 -0
  17. package/docs/providers/GEMINI.md +33 -0
  18. package/docs/providers/GROK.md +25 -0
  19. package/docs/providers/OPENAI.md +39 -0
  20. package/docs/providers/QWEN.md +27 -0
  21. package/docs/providers/ULTRAVOX.md +29 -0
  22. package/examples/01-basic/app.js +196 -0
  23. package/examples/01-basic/index.html +27 -0
  24. package/examples/02-advanced-dashboard/app.js +465 -0
  25. package/examples/02-advanced-dashboard/index.html +200 -0
  26. package/examples/02-advanced-dashboard/style.css +501 -0
  27. package/examples/03-smart-home/index.html +377 -0
  28. package/examples/04-customer-support/index.html +474 -0
  29. package/examples/sdk-usage.ts +44 -0
  30. package/integrations/n8n-nodes-aillomvox/README.md +56 -0
  31. package/integrations/n8n-nodes-aillomvox/credentials/AillomVoxApi.credentials.ts +29 -0
  32. package/integrations/n8n-nodes-aillomvox/dist/credentials/AillomVoxApi.credentials.js +30 -0
  33. package/integrations/n8n-nodes-aillomvox/dist/nodes/AillomVox/AillomVox.node.js +219 -0
  34. package/integrations/n8n-nodes-aillomvox/dist/nodes/AillomVox/aillomvox.svg +6 -0
  35. package/integrations/n8n-nodes-aillomvox/gulpfile.js +10 -0
  36. package/integrations/n8n-nodes-aillomvox/nodes/AillomVox/AillomVox.node.ts +229 -0
  37. package/integrations/n8n-nodes-aillomvox/nodes/AillomVox/aillomvox.svg +6 -0
  38. package/integrations/n8n-nodes-aillomvox/package-lock.json +11741 -0
  39. package/integrations/n8n-nodes-aillomvox/package.json +56 -0
  40. package/integrations/n8n-nodes-aillomvox/tsconfig.json +32 -0
  41. package/package.json +55 -0
  42. package/src/AillomVox.ts +169 -0
  43. package/src/index.ts +2 -0
  44. package/src/types.ts +50 -0
  45. package/tsconfig.json +23 -0
@@ -0,0 +1,185 @@
1
+ # AillomVox Provider
2
+
3
+ The default, high-performance provider. Uses our proprietary **Hybrid Engine** (Groq LLM + Inworld TTS) to deliver the fastest response times and highest stability at the lowest cost.
4
+
5
+ ## Models
6
+
7
+ | Component | Model |
8
+ | :--- | :--- |
9
+ | **STT** | `stt-rt-v4` (Soniox) |
10
+ | **LLM** | `openai/gpt-oss-120b` (via Groq) |
11
+ | **TTS** | `inworld-tts-1.5-mini` (Inworld) |
12
+
13
+ ## Configuration
14
+
15
+ ```json
16
+ {
17
+ "provider": "aillomvox",
18
+ "voice": "Edward",
19
+ "system_prompt": "You are a helpful assistant.",
20
+ "language": "en-US",
21
+ "sample_rate": 16000
22
+ }
23
+ ```
24
+
25
+ ## Available Voices
26
+
27
+ AillomVox uses **Inworld TTS 1.5** with **65 voices** across 15 languages. All voices support multilingual synthesis.
28
+
29
+ ### English (25 voices)
30
+
31
+ #### Male
32
+ | Voice | Style |
33
+ | :--- | :--- |
34
+ | **Edward** | Fast-talking, emphatic (default EN) |
35
+ | **Dennis** | Smooth, calm, friendly |
36
+ | **Alex** | Energetic, expressive |
37
+ | **Craig** | Older British, refined, articulate |
38
+ | **Mark** | Energetic, rapid delivery |
39
+ | **Ronald** | Confident British, deep, gravelly |
40
+ | **Shaun** | Friendly, dynamic |
41
+ | **Theodore** | Gravelly, time-worn |
42
+ | **Timothy** | Lively, upbeat American |
43
+ | **Carter** | Mature radio announcer |
44
+ | **Blake** | Rich, intimate |
45
+ | **Clive** | British, calm, cordial |
46
+ | **Dominus** | Robotic, deep, menacing |
47
+ | **Hades** | Commanding, gruff narrator |
48
+
49
+ #### Female
50
+ | Voice | Style |
51
+ | :--- | :--- |
52
+ | **Ashley** | Warm, natural |
53
+ | **Deborah** | Gentle, elegant |
54
+ | **Elizabeth** | Professional, perfect for narrations |
55
+ | **Julia** | Quirky, high-pitched, playful |
56
+ | **Olivia** | Young British, upbeat, friendly |
57
+ | **Priya** | Even-toned, Indian accent |
58
+ | **Sarah** | Fast-talking, curious |
59
+ | **Wendy** | Posh British |
60
+ | **Luna** | Calm, relaxing, mindfulness |
61
+ | **Hana** | Bright, expressive, young |
62
+ | **Pixie** | High-pitched, childlike |
63
+
64
+ ### Portuguese (2 voices)
65
+ | Voice | Gender | Style |
66
+ | :--- | :--- | :--- |
67
+ | **Heitor** | Male | Composed, neutral (default PT) |
68
+ | **MaitΓͺ** | Female | Middle-aged, professional |
69
+
70
+ ### Spanish (4 voices)
71
+ | Voice | Gender | Style |
72
+ | :--- | :--- | :--- |
73
+ | **Diego** | Male | Soothing, gentle (default ES) |
74
+ | **Miguel** | Male | Calm, storytelling |
75
+ | **Rafael** | Male | Deep, composed, narrations |
76
+ | **Lupita** | Female | Vibrant, energetic |
77
+
78
+ ### French (4 voices)
79
+ | Voice | Gender | Style |
80
+ | :--- | :--- | :--- |
81
+ | **Alain** | Male | Deep, smooth, composed |
82
+ | **Mathieu** | Male | Nasal quality |
83
+ | **Γ‰tienne** | Male | Calm, young adult |
84
+ | **Hélène** | Female | Smooth, musical, graceful |
85
+
86
+ ### German (2 voices)
87
+ | Voice | Gender | Style |
88
+ | :--- | :--- | :--- |
89
+ | **Josef** | Male | Articulate, announcer-like |
90
+ | **Johanna** | Female | Calm, low, smoky |
91
+
92
+ ### Italian (2 voices)
93
+ | Voice | Gender | Style |
94
+ | :--- | :--- | :--- |
95
+ | **Gianni** | Male | Deep, smooth, rapid |
96
+ | **Orietta** | Female | Calm, soothing cadence |
97
+
98
+ ### Chinese (4 voices)
99
+ | Voice | Gender | Style |
100
+ | :--- | :--- | :--- |
101
+ | **Yichen** | Male | Calm, flat, young adult |
102
+ | **Xiaoyin** | Female | Youthful, gentle, sweet |
103
+ | **Xinyi** | Female | Neutral, narrations |
104
+ | **Jing** | Female | Energetic, fast-paced |
105
+
106
+ ### Dutch (4 voices)
107
+ | Voice | Gender | Style |
108
+ | :--- | :--- | :--- |
109
+ | **Erik** | Male | Older, weathered edge |
110
+ | **Lennart** | Male | Confident, calm, relaxed |
111
+ | **Katrien** | Female | Expressive |
112
+ | **Lore** | Female | Clear, calm, professional |
113
+
114
+ ### Japanese (2 voices)
115
+ | Voice | Gender | Style |
116
+ | :--- | :--- | :--- |
117
+ | **Satoshi** | Male | Dramatic, expressive |
118
+ | **Asuka** | Female | Friendly, young adult |
119
+
120
+ ### Korean (4 voices)
121
+ | Voice | Gender | Style |
122
+ | :--- | :--- | :--- |
123
+ | **Hyunwoo** | Male | Young adult |
124
+ | **Seojun** | Male | Clear, deep, mature |
125
+ | **Minji** | Female | Energetic, friendly |
126
+ | **Yoona** | Female | Gentle, soothing |
127
+
128
+ ### Polish (2 voices)
129
+ | Voice | Gender | Style |
130
+ | :--- | :--- | :--- |
131
+ | **Szymon** | Male | Warm, friendly |
132
+ | **Wojciech** | Male | Middle-aged |
133
+
134
+ ### Russian (4 voices)
135
+ | Voice | Gender | Style |
136
+ | :--- | :--- | :--- |
137
+ | **Dmitry** | Male | Deep, commanding |
138
+ | **Nikolai** | Male | Deep, theatrical |
139
+ | **Svetlana** | Female | Soft, high-pitched |
140
+ | **Elena** | Female | Clear, mid-range, smooth |
141
+
142
+ ### Hindi (2 voices)
143
+ | Voice | Gender | Style |
144
+ | :--- | :--- | :--- |
145
+ | **Manoj** | Male | Clear, professional |
146
+ | **Riya** | Female | Professional, polished |
147
+
148
+ ### Hebrew (2 voices)
149
+ | Voice | Gender | Style |
150
+ | :--- | :--- | :--- |
151
+ | **Oren** | Male | Steady, podcasts |
152
+ | **Yael** | Female | Mid-range, narrations |
153
+
154
+ ### Arabic (2 voices)
155
+ | Voice | Gender | Style |
156
+ | :--- | :--- | :--- |
157
+ | **Omar** | Male | Bright, confident |
158
+ | **Nour** | Female | Polished, friendly |
159
+
160
+ ## Default Voice by Language
161
+ | Language | Default Voice |
162
+ | :--- | :--- |
163
+ | English (`en`) | Edward |
164
+ | Portuguese (`pt`) | Heitor |
165
+ | Spanish (`es`) | Diego |
166
+ | All others | Edward |
167
+
168
+ ## Features
169
+
170
+ - **Smart Fillers**: Automatically plays filler phrases ("Just a moment...", "Let me check...") during LLM processing.
171
+ - **Dynamic Voice Switching**: Change voice mid-conversation with the `update_voice` tool.
172
+ - **Silence Breakers**: Re-engages the user automatically if they go silent.
173
+ - **Jitter Buffer**: Native handling of network instability.
174
+ - **Native 8kHz**: Perfect for telephony (SIP/Asterisk) with zero resampling overhead.
175
+ - **Adaptive Response Profiles**: Automatically adjusts buffer timing based on response length.
176
+ - **Speed Control**: Server-side speed adjustment (0.5x–1.5x, default 1.2x).
177
+
178
+ ## Languages
179
+
180
+ Supports 15 languages: `en`, `pt`, `es`, `fr`, `de`, `it`, `ja`, `zh`, `ko`, `hi`, `ar`, `ru`, `pl`, `nl`, `he`
181
+
182
+ ## Best For
183
+ - **General Purpose**: Customer support, sales, virtual assistants
184
+ - **Telephony**: Extremely robust 8kHz support for SIP/Asterisk
185
+ - **High Volume**: Lowest cost per minute ($0.03/min)
@@ -0,0 +1,32 @@
1
+ # AWS Bedrock (Nova Sonic)
2
+
3
+ Enterprise-grade Speech-to-Speech using Amazon's latest **Nova Sonic** model (`amazon.nova-2-sonic-v1:0`).
4
+
5
+ ## Configuration
6
+
7
+ ```json
8
+ {
9
+ "provider": "aws",
10
+ "voice": "matthew",
11
+ "system_prompt": "You are a helpful assistant.",
12
+ "sample_rate": 16000
13
+ }
14
+ ```
15
+
16
+ ## Voices
17
+
18
+ | Voice | Gender | Style |
19
+ | :--- | :--- | :--- |
20
+ | **matthew** | Male | Neutral, professional |
21
+ | **ruth** | Female | Professional, clear |
22
+ | **tiffany** | Female | Warm, friendly |
23
+
24
+ ## Features
25
+ - **Low Latency**: Faster than previous Polly+Bedrock chains.
26
+ - **Reliability**: Highest uptime guarantee.
27
+ - **Security**: Data privacy compliance (HIPAA, GDPR options available via AWS config).
28
+ - **Tool Use**: Full support for function calling.
29
+
30
+ ## Best For
31
+ - **Enterprise**: Banking, healthcare, corporate environments.
32
+ - **Stability**: When 99.99% uptime is required.
@@ -0,0 +1,33 @@
1
+ # Google Gemini (Multimodal)
2
+
3
+ Leverages `gemini-2.5-flash-native-audio-preview-12-2025` for massive context and multimodal capabilities.
4
+
5
+ ## Configuration
6
+
7
+ ```json
8
+ {
9
+ "provider": "gemini",
10
+ "voice": "Puck",
11
+ "system_prompt": "You are a helpful assistant.",
12
+ "sample_rate": 24000
13
+ }
14
+ ```
15
+
16
+ ## Voices
17
+
18
+ | Voice | Style |
19
+ | :--- | :--- |
20
+ | **Puck** | Soft, higher pitch |
21
+ | **Kore** | Soft, higher pitch |
22
+ | **Charon** | Deep, confident |
23
+ | **Fenrir** | Deep, confident |
24
+ | **Aoede** | Confident, higher pitch |
25
+
26
+ ## Features
27
+ - **Large Context**: Can process huge system prompts or conversation history.
28
+ - **Multimodal**: Can technically process images if sent (though SDK focuses on Audio).
29
+ - **Tool Use**: Robust function calling.
30
+
31
+ ## Best For
32
+ - **Long Context**: Analyzing documents or long previous conversations.
33
+ - **Complex Instructions**: Following very detailed, multi-step system prompts.
@@ -0,0 +1,25 @@
1
+ # xAI Grok
2
+
3
+ A witty, casual, and capable voice model from xAI (`grok-beta`).
4
+
5
+ ## Configuration
6
+
7
+ ```json
8
+ {
9
+ "provider": "grok",
10
+ "system_prompt": "You are a helpful assistant.",
11
+ "sample_rate": 16000
12
+ }
13
+ ```
14
+
15
+ ## Voices
16
+
17
+ Model-dependent. Voice selection depends on the underlying model version.
18
+
19
+ ## Features
20
+ - **Casual Tone**: Designed to be less robotic and more conversational ("witty").
21
+ - **Native 16kHz**: Matches standard VoIP/WebRTC wideband perfectly.
22
+
23
+ ## Best For
24
+ - **Entertainment/Casual**: Chatbots, companions.
25
+ - **News/Twitter**: Real-time information (via RAG/WebSearch tools).
@@ -0,0 +1,39 @@
1
+ # OpenAI Realtime Provider
2
+
3
+ Direct integration with the `gpt-realtime-mini` model via WebSocket.
4
+
5
+ ## Configuration
6
+
7
+ ```json
8
+ {
9
+ "provider": "openai",
10
+ "voice": "alloy",
11
+ "system_prompt": "You are a helpful assistant.",
12
+ "sample_rate": 24000,
13
+ "max_duration": 300
14
+ }
15
+ ```
16
+
17
+ ## Voices
18
+
19
+ | Voice | Style |
20
+ | :--- | :--- |
21
+ | **alloy** | Neutral, balanced |
22
+ | **ash** | Warm, conversational |
23
+ | **coral** | Clear, professional |
24
+ | **echo** | Smooth, calm |
25
+ | **sage** | Wise, measured |
26
+ | **shimmer** | Bright, energetic |
27
+
28
+ ## Features
29
+ - **Function Calling**: Full support for tool calling.
30
+ - **Native VAD**: Uses OpenAI's server-side voice activity detection.
31
+ - **24kHz High Fidelity**: Best used with `sample_rate: 24000`.
32
+
33
+ ## Audio Notes
34
+ - Native rate is **24kHz**.
35
+ - If you request 8kHz (telephony), the SDK automatically resamples it, but **24kHz** gives the best results for web calls.
36
+
37
+ ## Best For
38
+ - **Complex Reasoning**: Logic-heavy tasks, math, coding assistance.
39
+ - **English/Multilingual**: Excellent accent capability.
@@ -0,0 +1,27 @@
1
+ # Qwen (Alibaba Cloud)
2
+
3
+ Open-source based, highly efficient model (`qwen3-omni-flash-realtime`).
4
+
5
+ ## Configuration
6
+
7
+ ```json
8
+ {
9
+ "provider": "qwen",
10
+ "system_prompt": "You are a helpful assistant.",
11
+ "language": "en-US",
12
+ "sample_rate": 16000
13
+ }
14
+ ```
15
+
16
+ ## Voices
17
+
18
+ Model-dependent. Voice selection depends on the underlying model version.
19
+
20
+ ## Features
21
+ - **Cost Effective**: Generally lower cost than OpenAI.
22
+ - **Fast**: "Flash" model is optimized for speed.
23
+ - **No Tool Support**: Function calling / Client Tools are **not supported** in WebSocket Realtime mode. Use AWS, OpenAI, or Gemini for scenarios requiring tools.
24
+
25
+ ## Best For
26
+ - **Cost-Sensitive**: High volume conversational AI.
27
+ - **Asian Markets**: Excellent support for Mandarin/English/Asian languages.
@@ -0,0 +1,29 @@
1
+ # UltraVox
2
+
3
+ Specialized Speech-to-Speech model (`ultravox-v0.7`).
4
+
5
+ ## Configuration
6
+
7
+ ```json
8
+ {
9
+ "provider": "ultravox",
10
+ "voice": "Mark",
11
+ "system_prompt": "You are a helpful assistant.",
12
+ "language": "pt-BR",
13
+ "sample_rate": 16000
14
+ }
15
+ ```
16
+
17
+ ## Voices
18
+
19
+ | Voice | Style |
20
+ | :--- | :--- |
21
+ | **Mark** | Male |
22
+ | **Jessica** | Female |
23
+
24
+ ## Features
25
+ - **Nuance**: High capability in understanding tone and emotion.
26
+ - **Dynamic Voices**: Voice list is fetched dynamically from their API.
27
+
28
+ ## Best For
29
+ - **Emotional Intelligence**: Empathetic support or counseling bots.
@@ -0,0 +1,196 @@
1
+ // Basic AillomVox Client
2
+ const connectBtn = document.getElementById('connectBtn');
3
+ const disconnectBtn = document.getElementById('disconnectBtn');
4
+ const statusDiv = document.getElementById('status');
5
+ const apiKeyInput = document.getElementById('apiKey');
6
+
7
+ let socket;
8
+ let audioContext;
9
+ let processor;
10
+ let mediaStream;
11
+
12
+ // 🎯 ULTRAVOX PATTERN: Track scheduled audio sources for instant barge-in clearing
13
+ let scheduledSources = [];
14
+ let nextPlayTime = 0;
15
+
16
+ connectBtn.onclick = async () => {
17
+ const apiKey = apiKeyInput.value.trim();
18
+ if (!apiKey) return alert('Please enter an API Key');
19
+
20
+ // 1. Initialize Audio Context (Must be user-initiated)
21
+ audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 });
22
+
23
+ // 2. Connect to WebSocket
24
+ // Note: Replace 'your-server-url' with actual server if hosted elsewhere
25
+ // For local dev with aillom-vox, use localhost:8080
26
+ // For production, use wss://vox.aillom.com/ws
27
+ const wsUrl = window.location.hostname === 'localhost'
28
+ ? 'ws://localhost:8080/ws'
29
+ : 'wss://vox.aillom.com/ws';
30
+
31
+ socket = new WebSocket(wsUrl);
32
+ socket.binaryType = 'arraybuffer';
33
+
34
+ socket.onopen = async () => {
35
+ statusDiv.textContent = 'Connected. Handshaking...';
36
+
37
+ // 3. Send Configuration Handshake
38
+ const handshake = {
39
+ type: 'config',
40
+ apikey: apiKey,
41
+ provider: 'aillomvox',
42
+ voice: 'Edward',
43
+ language: 'en-US',
44
+ sample_rate: 16000,
45
+ system_prompt: 'You are a helpful assistant. Be concise and friendly.',
46
+ tools: []
47
+ };
48
+ socket.send(JSON.stringify(handshake));
49
+
50
+ // 4. Start Microphone and Audio Processing
51
+ await startMicrophone();
52
+
53
+ statusDiv.textContent = '🟒 Online - Speak now!';
54
+ toggleButtons(true);
55
+ };
56
+
57
+ socket.onmessage = (event) => {
58
+ if (typeof event.data === 'string') {
59
+ const msg = JSON.parse(event.data);
60
+ console.log('Server Message:', msg);
61
+
62
+ switch (msg.type) {
63
+ case 'hangup':
64
+ disconnect();
65
+ break;
66
+
67
+ case 'playback_clear_buffer':
68
+ // 🎯 ULTRAVOX PATTERN: Instant barge-in β€” clear all buffered audio
69
+ clearPlaybackBuffer();
70
+ break;
71
+
72
+ case 'transcript':
73
+ if (msg.final) {
74
+ console.log(`[${msg.role}] ${msg.text}`);
75
+ }
76
+ break;
77
+
78
+ case 'error':
79
+ console.error('Server error:', msg.message);
80
+ break;
81
+
82
+ case 'state':
83
+ // 🎯 ULTRAVOX P1: Conversation state machine
84
+ statusDiv.textContent = msg.state === 'listening' ? '🟒 Listening...'
85
+ : msg.state === 'thinking' ? '🟑 Thinking...'
86
+ : msg.state === 'speaking' ? '🟠 Speaking...'
87
+ : `🟒 ${msg.state}`;
88
+ break;
89
+ }
90
+ } else {
91
+ // Audio Data (PCM 16-bit) received from server -> Play it
92
+ playAudioChunk(event.data);
93
+ }
94
+ };
95
+
96
+ socket.onclose = () => {
97
+ statusDiv.textContent = 'πŸ”΄ Disconnected';
98
+ disconnect();
99
+ };
100
+ };
101
+
102
+ disconnectBtn.onclick = disconnect;
103
+
104
+ function disconnect() {
105
+ clearPlaybackBuffer();
106
+ if (socket) socket.close();
107
+ if (audioContext) audioContext.close();
108
+ if (mediaStream) mediaStream.getTracks().forEach(t => t.stop());
109
+ toggleButtons(false);
110
+ }
111
+
112
+ function toggleButtons(connected) {
113
+ connectBtn.disabled = connected;
114
+ disconnectBtn.disabled = !connected;
115
+ apiKeyInput.disabled = connected;
116
+ }
117
+
118
+ async function startMicrophone() {
119
+ mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
120
+ const source = audioContext.createMediaStreamSource(mediaStream);
121
+
122
+ // Simple Processor (Buffer Size 4096)
123
+ processor = audioContext.createScriptProcessor(4096, 1, 1);
124
+
125
+ processor.onaudioprocess = (e) => {
126
+ if (socket.readyState !== WebSocket.OPEN) return;
127
+
128
+ const inputData = e.inputBuffer.getChannelData(0);
129
+ // Convert Float32 to Int16 for Server
130
+ const pcmData = floatTo16BitPCM(inputData);
131
+ socket.send(pcmData);
132
+ };
133
+
134
+ source.connect(processor);
135
+ processor.connect(audioContext.destination);
136
+ }
137
+
138
+ /**
139
+ * 🎯 ULTRAVOX PATTERN: Clear all buffered/scheduled audio instantly
140
+ * Called when server detects barge-in (user speaking while AI is talking)
141
+ * Stops all AudioBufferSourceNodes that haven't finished playing yet
142
+ */
143
+ function clearPlaybackBuffer() {
144
+ for (const source of scheduledSources) {
145
+ try { source.stop(); } catch (e) { /* already stopped */ }
146
+ }
147
+ scheduledSources = [];
148
+ nextPlayTime = 0;
149
+ console.log('[AillomVox] πŸ”‡ Playback buffer cleared (barge-in)');
150
+ }
151
+
152
+ /**
153
+ * 🎯 ULTRAVOX PATTERN: Sequential audio scheduling
154
+ * Instead of calling source.start() immediately (which causes overlap),
155
+ * schedule each chunk to play after the previous one finishes.
156
+ * This allows proper cancellation via clearPlaybackBuffer().
157
+ */
158
+ function playAudioChunk(arrayBuffer) {
159
+ if (!audioContext || audioContext.state === 'closed') return;
160
+
161
+ const float32Data = new Float32Array(arrayBuffer.byteLength / 2);
162
+ const dataView = new DataView(arrayBuffer);
163
+
164
+ for (let i = 0; i < float32Data.length; i++) {
165
+ const int16 = dataView.getInt16(i * 2, true); // Little Endian
166
+ float32Data[i] = int16 < 0 ? int16 / 0x8000 : int16 / 0x7FFF;
167
+ }
168
+
169
+ const buffer = audioContext.createBuffer(1, float32Data.length, 16000);
170
+ buffer.getChannelData(0).set(float32Data);
171
+
172
+ const source = audioContext.createBufferSource();
173
+ source.buffer = buffer;
174
+ source.connect(audioContext.destination);
175
+
176
+ // Schedule sequentially: each chunk plays after the previous one ends
177
+ const now = audioContext.currentTime;
178
+ const startTime = Math.max(now, nextPlayTime);
179
+ source.start(startTime);
180
+ nextPlayTime = startTime + buffer.duration;
181
+
182
+ // Track for cancellation on barge-in
183
+ scheduledSources.push(source);
184
+ source.onended = () => {
185
+ scheduledSources = scheduledSources.filter(s => s !== source);
186
+ };
187
+ }
188
+
189
+ function floatTo16BitPCM(input) {
190
+ const output = new Int16Array(input.length);
191
+ for (let i = 0; i < input.length; i++) {
192
+ const s = Math.max(-1, Math.min(1, input[i]));
193
+ output[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
194
+ }
195
+ return output.buffer;
196
+ }
@@ -0,0 +1,27 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>AillomVox - Basic Client</title>
7
+ <style>
8
+ body { font-family: sans-serif; display: flex; flex-direction: column; align-items: center; justify-content: center; height: 100vh; background: #f0f0f0; }
9
+ .container { background: white; padding: 2rem; border-radius: 8px; box-shadow: 0 4px 6px rgba(0,0,0,0.1); text-align: center; }
10
+ input { padding: 8px; margin: 10px 0; width: 100%; box-sizing: border-box; }
11
+ button { padding: 10px 20px; cursor: pointer; background: #007bff; color: white; border: none; border-radius: 4px; font-size: 16px; }
12
+ button:disabled { background: #ccc; }
13
+ #status { margin-top: 15px; font-weight: bold; color: #666; }
14
+ </style>
15
+ </head>
16
+ <body>
17
+ <div class="container">
18
+ <h1>πŸŽ™οΈ AillomVox Basic</h1>
19
+ <input type="password" id="apiKey" placeholder="Enter API Key">
20
+ <button id="connectBtn">Connect</button>
21
+ <button id="disconnectBtn" disabled>Disconnect</button>
22
+ <div id="status">Disconnected</div>
23
+ </div>
24
+
25
+ <script src="app.js"></script>
26
+ </body>
27
+ </html>