intellema-vdk 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {intellema_vdk-0.2.0 → intellema_vdk-0.2.2}/MANIFEST.in +1 -1
  2. intellema_vdk-0.2.2/PKG-INFO +311 -0
  3. intellema_vdk-0.2.2/README.md +251 -0
  4. intellema_vdk-0.2.2/intellema_vdk/__init__.py +92 -0
  5. intellema_vdk-0.2.2/intellema_vdk/config.py +14 -0
  6. intellema_vdk-0.2.2/intellema_vdk/providers/__init__.py +35 -0
  7. intellema_vdk-0.2.2/intellema_vdk/providers/livekit/__init__.py +19 -0
  8. intellema_vdk-0.2.2/intellema_vdk/providers/livekit/client.py +612 -0
  9. intellema_vdk-0.2.2/intellema_vdk/providers/livekit/exceptions.py +23 -0
  10. intellema_vdk-0.2.2/intellema_vdk/providers/protocols.py +33 -0
  11. intellema_vdk-0.2.2/intellema_vdk/providers/retell/__init__.py +17 -0
  12. intellema_vdk-0.2.2/intellema_vdk/providers/retell/client.py +468 -0
  13. intellema_vdk-0.2.2/intellema_vdk/providers/retell/exceptions.py +19 -0
  14. {intellema_vdk-0.2.0/intellema_vdk/retell_lib → intellema_vdk-0.2.2/intellema_vdk/providers/retell}/import_phone_number.py +1 -1
  15. intellema_vdk-0.2.2/intellema_vdk/stt/__init__.py +17 -0
  16. intellema_vdk-0.2.2/intellema_vdk/stt/client.py +482 -0
  17. intellema_vdk-0.2.2/intellema_vdk/stt/exceptions.py +19 -0
  18. intellema_vdk-0.2.2/intellema_vdk/tts/__init__.py +15 -0
  19. intellema_vdk-0.2.2/intellema_vdk/tts/__pycache__/__init__.cpython-312.pyc +0 -0
  20. intellema_vdk-0.2.2/intellema_vdk/tts/__pycache__/client.cpython-312.pyc +0 -0
  21. intellema_vdk-0.2.2/intellema_vdk/tts/__pycache__/exceptions.cpython-312.pyc +0 -0
  22. intellema_vdk-0.2.2/intellema_vdk/tts/__pycache__/providers.cpython-312.pyc +0 -0
  23. intellema_vdk-0.2.2/intellema_vdk/tts/client.py +541 -0
  24. intellema_vdk-0.2.2/intellema_vdk/tts/exceptions.py +15 -0
  25. intellema_vdk-0.2.2/intellema_vdk/tts/providers.py +293 -0
  26. intellema_vdk-0.2.2/intellema_vdk/utils/logger_config.py +41 -0
  27. intellema_vdk-0.2.2/intellema_vdk.egg-info/PKG-INFO +311 -0
  28. intellema_vdk-0.2.2/intellema_vdk.egg-info/SOURCES.txt +34 -0
  29. intellema_vdk-0.2.2/intellema_vdk.egg-info/requires.txt +31 -0
  30. intellema_vdk-0.2.2/pyproject.toml +52 -0
  31. intellema_vdk-0.2.2/requirements.txt +21 -0
  32. intellema_vdk-0.2.0/PKG-INFO +0 -221
  33. intellema_vdk-0.2.0/README.md +0 -174
  34. intellema_vdk-0.2.0/intellema_vdk/__init__.py +0 -35
  35. intellema_vdk-0.2.0/intellema_vdk/livekit_lib/__init__.py +0 -3
  36. intellema_vdk-0.2.0/intellema_vdk/livekit_lib/client.py +0 -280
  37. intellema_vdk-0.2.0/intellema_vdk/retell_lib/retell_client.py +0 -248
  38. intellema_vdk-0.2.0/intellema_vdk/speech_lib/__init__.py +0 -2
  39. intellema_vdk-0.2.0/intellema_vdk/speech_lib/stt_client.py +0 -108
  40. intellema_vdk-0.2.0/intellema_vdk/speech_lib/tts_streamer.py +0 -188
  41. intellema_vdk-0.2.0/intellema_vdk.egg-info/PKG-INFO +0 -221
  42. intellema_vdk-0.2.0/intellema_vdk.egg-info/SOURCES.txt +0 -19
  43. intellema_vdk-0.2.0/intellema_vdk.egg-info/requires.txt +0 -12
  44. intellema_vdk-0.2.0/pyproject.toml +0 -38
  45. intellema_vdk-0.2.0/requirements.txt +0 -12
  46. {intellema_vdk-0.2.0 → intellema_vdk-0.2.2}/LICENSE +0 -0
  47. /intellema_vdk-0.2.0/intellema_vdk/retell_lib/__init__.py → /intellema_vdk-0.2.2/intellema_vdk/stt/providers.py +0 -0
  48. {intellema_vdk-0.2.0 → intellema_vdk-0.2.2}/intellema_vdk.egg-info/dependency_links.txt +0 -0
  49. {intellema_vdk-0.2.0 → intellema_vdk-0.2.2}/intellema_vdk.egg-info/top_level.txt +0 -0
  50. {intellema_vdk-0.2.0 → intellema_vdk-0.2.2}/setup.cfg +0 -0
@@ -2,4 +2,4 @@ include README.md
2
2
  include requirements.txt
3
3
  include LICENSE
4
4
  recursive-include intellema_vdk *
5
- recursive-exclude intellema_vdk/agent_api *
5
+ recursive-exclude agent_api *
@@ -0,0 +1,311 @@
1
+ Metadata-Version: 2.4
2
+ Name: intellema-vdk
3
+ Version: 0.2.2
4
+ Summary: A Voice Development Kit for different Voice Agent Platforms
5
+ Author: Intellema
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 Intellema
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Classifier: Programming Language :: Python :: 3
29
+ Classifier: License :: OSI Approved :: MIT License
30
+ Classifier: Operating System :: OS Independent
31
+ Requires-Python: >=3.8
32
+ Description-Content-Type: text/markdown
33
+ License-File: LICENSE
34
+ Requires-Dist: python-dotenv>=1.0.0
35
+ Requires-Dist: requests>=2.31.0
36
+ Requires-Dist: httpx>=0.24.0
37
+ Provides-Extra: livekit
38
+ Requires-Dist: livekit-api>=1.1.0; extra == "livekit"
39
+ Requires-Dist: boto3>=1.28.0; extra == "livekit"
40
+ Provides-Extra: retell
41
+ Requires-Dist: retell-sdk>=2.0.0; extra == "retell"
42
+ Requires-Dist: twilio>=8.0.0; extra == "retell"
43
+ Requires-Dist: boto3>=1.28.0; extra == "retell"
44
+ Provides-Extra: stt
45
+ Requires-Dist: openai>=1.0.0; extra == "stt"
46
+ Provides-Extra: tts
47
+ Requires-Dist: together>=1.0.0; extra == "tts"
48
+ Requires-Dist: openai>=1.0.0; extra == "tts"
49
+ Provides-Extra: audio
50
+ Requires-Dist: pyaudio>=0.2.13; extra == "audio"
51
+ Provides-Extra: all
52
+ Requires-Dist: livekit-api>=1.1.0; extra == "all"
53
+ Requires-Dist: retell-sdk>=2.0.0; extra == "all"
54
+ Requires-Dist: twilio>=8.0.0; extra == "all"
55
+ Requires-Dist: boto3>=1.28.0; extra == "all"
56
+ Requires-Dist: openai>=1.0.0; extra == "all"
57
+ Requires-Dist: together>=1.0.0; extra == "all"
58
+ Requires-Dist: pyaudio>=0.2.13; extra == "all"
59
+ Dynamic: license-file
60
+
61
+ # Intellema VDK
62
+
63
+ Intellema VDK is a unified Voice Development Kit that simplifies integration with voice agent platforms like LiveKit and Retell AI. Build scalable voice applications with a consistent, provider-agnostic API.
64
+
65
+ ## Features
66
+
67
+ - **Voice Providers**: LiveKit and Retell AI support with unified interface
68
+ - **Outbound Calling**: Initiate phone calls via SIP trunks
69
+ - **Speech-to-Text**: Transcribe audio with OpenAI Whisper
70
+ - **Text-to-Speech**: Low-latency streaming TTS via Together AI
71
+ - **Recording & Streaming**: Save to S3 or stream to RTMP
72
+ - **Participant Management**: Tokens, muting, kick controls
73
+ - **Real-time Messaging**: Send data packets during calls
74
+
75
+ ## Quick Start
76
+
77
+ ### Installation
78
+
79
+ ```bash
80
+ # Minimal installation (core dependencies only)
81
+ pip install intellema-vdk
82
+
83
+ # Install with specific provider support
84
+ pip install intellema-vdk[livekit] # LiveKit voice provider
85
+ pip install intellema-vdk[retell] # Retell voice provider
86
+ pip install intellema-vdk[stt] # Speech-to-Text features
87
+ pip install intellema-vdk[tts] # Text-to-Speech features
88
+ pip install intellema-vdk[audio] # Audio playback (PyAudio)
89
+
90
+ # Install all features
91
+ pip install intellema-vdk[all]
92
+ ```
93
+
94
+ **Requirements:** Python 3.8+
95
+
96
+ **Note on PyAudio:** The `audio` extra requires PortAudio to be installed on your system:
97
+ - **Windows**: Usually works with `pip install pyaudio`, or use `pipwin install pyaudio`
98
+ - **macOS**: `brew install portaudio && pip install pyaudio`
99
+ - **Linux**: `sudo apt-get install portaudio19-dev && pip install pyaudio`
100
+
101
+ The package will automatically install required dependencies when you first use a feature.
102
+
103
+ ### Minimal Example
104
+
105
+ ```python
106
+ import asyncio
107
+ from intellema_vdk import VoiceClient
108
+
109
+ async def main() -> None:
110
+ client = VoiceClient("livekit") # or "retell"
111
+
112
+ call_id: str = await client.start_outbound_call(
113
+ phone_number="+15551234567",
114
+ prompt_content="Hello from VoxChain!"
115
+ )
116
+ print(f"Call started: {call_id}")
117
+
118
+ await client.close()
119
+
120
+ if __name__ == "__main__":
121
+ asyncio.run(main())
122
+ ```
123
+
124
+ ### Configuration
125
+
126
+ Create a `.env` file with your credentials:
127
+
128
+ ```bash
129
+ # LiveKit (if using)
130
+ LIVEKIT_URL=wss://your-livekit-server.com
131
+ LIVEKIT_API_KEY=your_api_key
132
+ LIVEKIT_API_SECRET=your_api_secret
133
+ SIP_OUTBOUND_TRUNK_ID=your_trunk_id
134
+
135
+ # Retell + Twilio (if using)
136
+ TWILIO_ACCOUNT_SID=your_sid
137
+ TWILIO_AUTH_TOKEN=your_token
138
+ TWILIO_PHONE_NUMBER=+15551234567
139
+ RETELL_API_KEY=your_retell_key
140
+ RETELL_AGENT_ID=your_agent_id
141
+
142
+ # STT
143
+ OPENAI_API_KEY=sk-your-key
144
+ AGENT_API_URL=https://your-agent-api.com/process # Optional
145
+
146
+ # TTS (set appropriate API key according to provider)
147
+ TOGETHER_API_KEY=your_together_key
148
+ OPENAI_API_KEY=your_openai_key
149
+
150
+ # Optional: AWS for recordings
151
+ AWS_ACCESS_KEY_ID=your_key
152
+ AWS_SECRET_ACCESS_KEY=your_secret
153
+ AWS_REGION=us-east-1
154
+ AWS_S3_BUCKET=your-bucket
155
+ ```
156
+
157
+ See [docs/guides/configuration.md](docs/guides/configuration.md) for detailed setup.
158
+
159
+ ## Core Modules
160
+
161
+ ### Voice Providers
162
+
163
+ Choose between LiveKit or Retell for voice calls.
164
+
165
+ ```python
166
+ from intellema_vdk import VoiceClient
167
+
168
+ # LiveKit for advanced features
169
+ livekit = VoiceClient("livekit")
170
+
171
+ # Retell for quick setup
172
+ retell = VoiceClient("retell")
173
+
174
+ # Common interface
175
+ call_id: str = await livekit.start_outbound_call("+15551234567", "Hello!")
176
+ await livekit.start_recording(call_id)
177
+ await livekit.delete_room(call_id)
178
+ ```
179
+
180
+ **Detailed Documentation:**
181
+ - [docs/api/providers.md](docs/api/providers.md) - Full API reference with examples
182
+ - [docs/guides/examples.md](docs/guides/examples.md) - Complete usage patterns
183
+
184
+ **Important for Retell:**
185
+ Before making calls, register your Twilio number:
186
+ ```bash
187
+ python import_phone_number.py
188
+ ```
189
+
190
+ ### Speech-to-Text (STT)
191
+
192
+ Transcribe audio files with OpenAI Whisper - supports single files and batch processing:
193
+
194
+ ```python
195
+ from intellema_vdk import STTManager
196
+
197
+ async def transcribe() -> None:
198
+ stt = STTManager()
199
+ try:
200
+ # Single file
201
+ result = await stt.transcribe_audio("recording.wav")
202
+ print(result["text"])
203
+
204
+ # Batch process folder
205
+ results = await stt.transcribe_audio(
206
+ "recordings/",
207
+ batch_process=True,
208
+ output_file="transcripts.json"
209
+ )
210
+ finally:
211
+ await stt.close()
212
+ ```
213
+
214
+ **Detailed Documentation:** [docs/api/stt.md](docs/api/stt.md)
215
+
216
+ ### Text-to-Speech (TTS)
217
+
218
+ Stream text to audio in real-time with support for multiple providers:
219
+
220
+ ```python
221
+ from intellema_vdk import TTSStreamer
222
+
223
+ # Together AI (low latency)
224
+ tts = TTSStreamer(provider="together")
225
+
226
+ # OpenAI (high quality, 6 voices)
227
+ tts = TTSStreamer(
228
+ provider="openai",
229
+ voice="nova", # alloy, echo, fable, onyx, nova, shimmer
230
+ model="tts-1-hd" # tts-1 or tts-1-hd
231
+ )
232
+
233
+ # Feed text as it's generated
234
+ for chunk in llm_stream:
235
+ tts.feed(chunk)
236
+
237
+ tts.flush() # Wait for completion
238
+ tts.close()
239
+ ```
240
+
241
+ **Detailed Documentation:** [docs/api/tts.md](docs/api/tts.md)
242
+
243
+ **Sample Implementation:** Run the included chatbot demo:
244
+ ```bash
245
+ python sample_implementation.py
246
+ ```
247
+
248
+ ## Advanced Usage
249
+
250
+ ### Logging
251
+
252
+ Configure logging to see VDK internals:
253
+
254
+ ```python
255
+ from intellema_vdk import setup_logging
256
+
257
+ setup_logging() # INFO level by default
258
+ ```
259
+
260
+ Custom configuration:
261
+
262
+ ```python
263
+ import logging
264
+ setup_logging(
265
+ log_level=logging.DEBUG,
266
+ log_format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
267
+ )
268
+ ```
269
+
270
+ ### Recording Calls
271
+
272
+ ```python
273
+ # LiveKit or Retell
274
+ recording_id: str = await client.start_recording(
275
+ call_id=call_id,
276
+ upload_to_s3=True,
277
+ wait_for_completion=False
278
+ )
279
+ ```
280
+
281
+ ### Streaming to RTMP
282
+
283
+ ```python
284
+ await client.start_stream(
285
+ call_id=call_id,
286
+ rtmp_urls=["rtmp://your-server.com/live/key"]
287
+ )
288
+ ```
289
+
290
+ ## Documentation
291
+
292
+ - **[Getting Started Guide](docs/guides/getting_started.md)** - Setup and first steps
293
+ - **[Configuration Guide](docs/guides/configuration.md)** - Environment variables
294
+ - **[Examples](docs/guides/examples.md)** - Common usage patterns
295
+ - **API Reference:**
296
+ - [Voice Providers](docs/api/providers.md) - LiveKit & Retell
297
+ - [STT](docs/api/stt.md) - Speech-to-Text
298
+ - [TTS](docs/api/tts.md) - Text-to-Speech
299
+
300
+ ## Important Notes
301
+
302
+ - **Retell `delete_room` Limitation**: Only works if the user speaks, triggering the agent to check the termination variable. For immediate hangup, use Twilio API directly.
303
+ - **Retell Recording**: Retell automatically records calls. The `start_recording` method retrieves the recording URL after the call ends (no need to explicitly start recording during the call). Ensure recording is enabled for your Retell agent in the dashboard.
304
+ - **Retell Audio Streaming**: Real-time audio streaming (`start_stream`) is **not supported** for Retell phone calls. Retell deprecated their Audio WebSocket API at the end of 2024. Use `start_recording()` to retrieve recordings after the call ends.
305
+ - **Type Safety**: All examples include type annotations for better IDE support.
306
+ - **Async Required**: All voice and STT operations are async; use `asyncio.run()`.
307
+
308
+ ## License
309
+
310
+ See [LICENSE](LICENSE) file for details.
311
+
@@ -0,0 +1,251 @@
1
+ # Intellema VDK
2
+
3
+ Intellema VDK is a unified Voice Development Kit that simplifies integration with voice agent platforms like LiveKit and Retell AI. Build scalable voice applications with a consistent, provider-agnostic API.
4
+
5
+ ## Features
6
+
7
+ - **Voice Providers**: LiveKit and Retell AI support with unified interface
8
+ - **Outbound Calling**: Initiate phone calls via SIP trunks
9
+ - **Speech-to-Text**: Transcribe audio with OpenAI Whisper
10
+ - **Text-to-Speech**: Low-latency streaming TTS via Together AI
11
+ - **Recording & Streaming**: Save to S3 or stream to RTMP
12
+ - **Participant Management**: Tokens, muting, kick controls
13
+ - **Real-time Messaging**: Send data packets during calls
14
+
15
+ ## Quick Start
16
+
17
+ ### Installation
18
+
19
+ ```bash
20
+ # Minimal installation (core dependencies only)
21
+ pip install intellema-vdk
22
+
23
+ # Install with specific provider support
24
+ pip install intellema-vdk[livekit] # LiveKit voice provider
25
+ pip install intellema-vdk[retell] # Retell voice provider
26
+ pip install intellema-vdk[stt] # Speech-to-Text features
27
+ pip install intellema-vdk[tts] # Text-to-Speech features
28
+ pip install intellema-vdk[audio] # Audio playback (PyAudio)
29
+
30
+ # Install all features
31
+ pip install intellema-vdk[all]
32
+ ```
33
+
34
+ **Requirements:** Python 3.8+
35
+
36
+ **Note on PyAudio:** The `audio` extra requires PortAudio to be installed on your system:
37
+ - **Windows**: Usually works with `pip install pyaudio`, or use `pipwin install pyaudio`
38
+ - **macOS**: `brew install portaudio && pip install pyaudio`
39
+ - **Linux**: `sudo apt-get install portaudio19-dev && pip install pyaudio`
40
+
41
+ The package will automatically install required dependencies when you first use a feature.
42
+
43
+ ### Minimal Example
44
+
45
+ ```python
46
+ import asyncio
47
+ from intellema_vdk import VoiceClient
48
+
49
+ async def main() -> None:
50
+ client = VoiceClient("livekit") # or "retell"
51
+
52
+ call_id: str = await client.start_outbound_call(
53
+ phone_number="+15551234567",
54
+ prompt_content="Hello from VoxChain!"
55
+ )
56
+ print(f"Call started: {call_id}")
57
+
58
+ await client.close()
59
+
60
+ if __name__ == "__main__":
61
+ asyncio.run(main())
62
+ ```
63
+
64
+ ### Configuration
65
+
66
+ Create a `.env` file with your credentials:
67
+
68
+ ```bash
69
+ # LiveKit (if using)
70
+ LIVEKIT_URL=wss://your-livekit-server.com
71
+ LIVEKIT_API_KEY=your_api_key
72
+ LIVEKIT_API_SECRET=your_api_secret
73
+ SIP_OUTBOUND_TRUNK_ID=your_trunk_id
74
+
75
+ # Retell + Twilio (if using)
76
+ TWILIO_ACCOUNT_SID=your_sid
77
+ TWILIO_AUTH_TOKEN=your_token
78
+ TWILIO_PHONE_NUMBER=+15551234567
79
+ RETELL_API_KEY=your_retell_key
80
+ RETELL_AGENT_ID=your_agent_id
81
+
82
+ # STT
83
+ OPENAI_API_KEY=sk-your-key
84
+ AGENT_API_URL=https://your-agent-api.com/process # Optional
85
+
86
+ # TTS (set appropriate API key according to provider)
87
+ TOGETHER_API_KEY=your_together_key
88
+ OPENAI_API_KEY=your_openai_key
89
+
90
+ # Optional: AWS for recordings
91
+ AWS_ACCESS_KEY_ID=your_key
92
+ AWS_SECRET_ACCESS_KEY=your_secret
93
+ AWS_REGION=us-east-1
94
+ AWS_S3_BUCKET=your-bucket
95
+ ```
96
+
97
+ See [docs/guides/configuration.md](docs/guides/configuration.md) for detailed setup.
98
+
99
+ ## Core Modules
100
+
101
+ ### Voice Providers
102
+
103
+ Choose between LiveKit or Retell for voice calls.
104
+
105
+ ```python
106
+ from intellema_vdk import VoiceClient
107
+
108
+ # LiveKit for advanced features
109
+ livekit = VoiceClient("livekit")
110
+
111
+ # Retell for quick setup
112
+ retell = VoiceClient("retell")
113
+
114
+ # Common interface
115
+ call_id: str = await livekit.start_outbound_call("+15551234567", "Hello!")
116
+ await livekit.start_recording(call_id)
117
+ await livekit.delete_room(call_id)
118
+ ```
119
+
120
+ **Detailed Documentation:**
121
+ - [docs/api/providers.md](docs/api/providers.md) - Full API reference with examples
122
+ - [docs/guides/examples.md](docs/guides/examples.md) - Complete usage patterns
123
+
124
+ **Important for Retell:**
125
+ Before making calls, register your Twilio number:
126
+ ```bash
127
+ python import_phone_number.py
128
+ ```
129
+
130
+ ### Speech-to-Text (STT)
131
+
132
+ Transcribe audio files with OpenAI Whisper - supports single files and batch processing:
133
+
134
+ ```python
135
+ from intellema_vdk import STTManager
136
+
137
+ async def transcribe() -> None:
138
+ stt = STTManager()
139
+ try:
140
+ # Single file
141
+ result = await stt.transcribe_audio("recording.wav")
142
+ print(result["text"])
143
+
144
+ # Batch process folder
145
+ results = await stt.transcribe_audio(
146
+ "recordings/",
147
+ batch_process=True,
148
+ output_file="transcripts.json"
149
+ )
150
+ finally:
151
+ await stt.close()
152
+ ```
153
+
154
+ **Detailed Documentation:** [docs/api/stt.md](docs/api/stt.md)
155
+
156
+ ### Text-to-Speech (TTS)
157
+
158
+ Stream text to audio in real-time with support for multiple providers:
159
+
160
+ ```python
161
+ from intellema_vdk import TTSStreamer
162
+
163
+ # Together AI (low latency)
164
+ tts = TTSStreamer(provider="together")
165
+
166
+ # OpenAI (high quality, 6 voices)
167
+ tts = TTSStreamer(
168
+ provider="openai",
169
+ voice="nova", # alloy, echo, fable, onyx, nova, shimmer
170
+ model="tts-1-hd" # tts-1 or tts-1-hd
171
+ )
172
+
173
+ # Feed text as it's generated
174
+ for chunk in llm_stream:
175
+ tts.feed(chunk)
176
+
177
+ tts.flush() # Wait for completion
178
+ tts.close()
179
+ ```
180
+
181
+ **Detailed Documentation:** [docs/api/tts.md](docs/api/tts.md)
182
+
183
+ **Sample Implementation:** Run the included chatbot demo:
184
+ ```bash
185
+ python sample_implementation.py
186
+ ```
187
+
188
+ ## Advanced Usage
189
+
190
+ ### Logging
191
+
192
+ Configure logging to see VDK internals:
193
+
194
+ ```python
195
+ from intellema_vdk import setup_logging
196
+
197
+ setup_logging() # INFO level by default
198
+ ```
199
+
200
+ Custom configuration:
201
+
202
+ ```python
203
+ import logging
204
+ setup_logging(
205
+ log_level=logging.DEBUG,
206
+ log_format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
207
+ )
208
+ ```
209
+
210
+ ### Recording Calls
211
+
212
+ ```python
213
+ # LiveKit or Retell
214
+ recording_id: str = await client.start_recording(
215
+ call_id=call_id,
216
+ upload_to_s3=True,
217
+ wait_for_completion=False
218
+ )
219
+ ```
220
+
221
+ ### Streaming to RTMP
222
+
223
+ ```python
224
+ await client.start_stream(
225
+ call_id=call_id,
226
+ rtmp_urls=["rtmp://your-server.com/live/key"]
227
+ )
228
+ ```
229
+
230
+ ## Documentation
231
+
232
+ - **[Getting Started Guide](docs/guides/getting_started.md)** - Setup and first steps
233
+ - **[Configuration Guide](docs/guides/configuration.md)** - Environment variables
234
+ - **[Examples](docs/guides/examples.md)** - Common usage patterns
235
+ - **API Reference:**
236
+ - [Voice Providers](docs/api/providers.md) - LiveKit & Retell
237
+ - [STT](docs/api/stt.md) - Speech-to-Text
238
+ - [TTS](docs/api/tts.md) - Text-to-Speech
239
+
240
+ ## Important Notes
241
+
242
+ - **Retell `delete_room` Limitation**: Only works if the user speaks, triggering the agent to check the termination variable. For immediate hangup, use Twilio API directly.
243
+ - **Retell Recording**: Retell automatically records calls. The `start_recording` method retrieves the recording URL after the call ends (no need to explicitly start recording during the call). Ensure recording is enabled for your Retell agent in the dashboard.
244
+ - **Retell Audio Streaming**: Real-time audio streaming (`start_stream`) is **not supported** for Retell phone calls. Retell deprecated their Audio WebSocket API at the end of 2024. Use `start_recording()` to retrieve recordings after the call ends.
245
+ - **Type Safety**: All examples include type annotations for better IDE support.
246
+ - **Async Required**: All voice and STT operations are async; use `asyncio.run()`.
247
+
248
+ ## License
249
+
250
+ See [LICENSE](LICENSE) file for details.
251
+
@@ -0,0 +1,92 @@
1
+ from typing import Optional, List, Any
2
+
3
+ from .providers import (
4
+ VoiceProvider,
5
+ LiveKitManager,
6
+ RetellManager,
7
+ # LiveKit Exceptions
8
+ LiveKitError,
9
+ LiveKitConfigurationError,
10
+ LiveKitRoomError,
11
+ LiveKitSIPError,
12
+ LiveKitDispatchError,
13
+ LiveKitEgressError,
14
+ # Retell Exceptions
15
+ RetellError,
16
+ RetellConfigurationError,
17
+ RetellAPIError,
18
+ RetellPhoneNumberError,
19
+ RetellCallError,
20
+ )
21
+ from .stt import (
22
+ STTManager,
23
+ STTAgentError,
24
+ STTConfigurationError,
25
+ STTError,
26
+ STTFileError,
27
+ STTTranscriptionError
28
+ )
29
+ from .tts import (
30
+ TTSStreamer,
31
+ TTSError,
32
+ TTSConfigurationError,
33
+ TTSStreamError,
34
+ TTSAPIError,
35
+ )
36
+ from .utils.logger_config import setup_logging
37
+
38
+ __all__ = [
39
+ "VoiceClient",
40
+ "start_outbound_call",
41
+ "VoiceProvider",
42
+ "LiveKitManager",
43
+ "RetellManager",
44
+ "STTManager",
45
+ "TTSStreamer",
46
+ "setup_logging",
47
+ "LiveKitError",
48
+ "LiveKitConfigurationError",
49
+ "LiveKitRoomError",
50
+ "LiveKitSIPError",
51
+ "LiveKitDispatchError",
52
+ "LiveKitEgressError",
53
+ "RetellError",
54
+ "RetellConfigurationError",
55
+ "RetellAPIError",
56
+ "RetellPhoneNumberError",
57
+ "RetellCallError",
58
+ "STTAgentError",
59
+ "STTConfigurationError",
60
+ "STTError",
61
+ "STTFileError",
62
+ "STTTranscriptionError",
63
+ "TTSError",
64
+ "TTSConfigurationError",
65
+ "TTSStreamError",
66
+ "TTSAPIError",
67
+ ]
68
+
69
+ def VoiceClient(provider: str, **kwargs) -> VoiceProvider:
70
+ """
71
+ Factory function that returns a specific provider client.
72
+
73
+ Args:
74
+ provider: "livekit" or "retell"
75
+ **kwargs: Arguments passed to the manager's constructor
76
+
77
+ Returns:
78
+ An instance of LiveKitManager or RetellManager
79
+ """
80
+ if provider == "livekit":
81
+ return LiveKitManager(**kwargs)
82
+ elif provider == "retell":
83
+ return RetellManager(**kwargs)
84
+ else:
85
+ raise ValueError(f"Unknown provider: {provider}. Supported providers: 'livekit', 'retell'")
86
+
87
+ async def start_outbound_call(provider: str, *args, **kwargs):
88
+ """
89
+ Convenience wrapper to start an outbound call.
90
+ """
91
+ client = VoiceClient(provider)
92
+ return await client.start_outbound_call(*args, **kwargs)
@@ -0,0 +1,14 @@
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ # Load environment variables
5
+ load_dotenv(dotenv_path=".env.local")
6
+ load_dotenv()
7
+
8
+ def get_env(key: str, default: str = None) -> str:
9
+ """Get an environment variable."""
10
+ return os.getenv(key, default)
11
+
12
+ # TTS Configuration
13
+ TTS_AUDIO_SAMPLE_RATE = 24000
14
+ WAV_HEADER_SIZE = 44
@@ -0,0 +1,35 @@
1
+ from .livekit import (
2
+ LiveKitManager,
3
+ LiveKitError,
4
+ LiveKitConfigurationError,
5
+ LiveKitRoomError,
6
+ LiveKitSIPError,
7
+ LiveKitDispatchError,
8
+ LiveKitEgressError,
9
+ )
10
+ from .retell import (
11
+ RetellManager,
12
+ RetellError,
13
+ RetellConfigurationError,
14
+ RetellAPIError,
15
+ RetellPhoneNumberError,
16
+ RetellCallError,
17
+ )
18
+ from .protocols import VoiceProvider
19
+
20
+ __all__ = [
21
+ "LiveKitManager",
22
+ "LiveKitError",
23
+ "LiveKitConfigurationError",
24
+ "LiveKitRoomError",
25
+ "LiveKitSIPError",
26
+ "LiveKitDispatchError",
27
+ "LiveKitEgressError",
28
+ "RetellManager",
29
+ "RetellError",
30
+ "RetellConfigurationError",
31
+ "RetellAPIError",
32
+ "RetellPhoneNumberError",
33
+ "RetellCallError",
34
+ "VoiceProvider",
35
+ ]