intellema-vdk 0.2.1__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {intellema_vdk-0.2.1 → intellema_vdk-0.2.2}/MANIFEST.in +1 -1
- intellema_vdk-0.2.2/PKG-INFO +311 -0
- intellema_vdk-0.2.2/README.md +251 -0
- intellema_vdk-0.2.2/intellema_vdk/__init__.py +92 -0
- intellema_vdk-0.2.2/intellema_vdk/config.py +14 -0
- intellema_vdk-0.2.2/intellema_vdk/providers/__init__.py +35 -0
- intellema_vdk-0.2.2/intellema_vdk/providers/livekit/__init__.py +19 -0
- intellema_vdk-0.2.2/intellema_vdk/providers/livekit/client.py +612 -0
- intellema_vdk-0.2.2/intellema_vdk/providers/livekit/exceptions.py +23 -0
- intellema_vdk-0.2.2/intellema_vdk/providers/protocols.py +33 -0
- intellema_vdk-0.2.2/intellema_vdk/providers/retell/__init__.py +17 -0
- intellema_vdk-0.2.2/intellema_vdk/providers/retell/client.py +468 -0
- intellema_vdk-0.2.2/intellema_vdk/providers/retell/exceptions.py +19 -0
- {intellema_vdk-0.2.1/intellema_vdk/retell_lib → intellema_vdk-0.2.2/intellema_vdk/providers/retell}/import_phone_number.py +1 -1
- intellema_vdk-0.2.2/intellema_vdk/stt/__init__.py +17 -0
- intellema_vdk-0.2.2/intellema_vdk/stt/client.py +482 -0
- intellema_vdk-0.2.2/intellema_vdk/stt/exceptions.py +19 -0
- intellema_vdk-0.2.2/intellema_vdk/tts/__init__.py +15 -0
- intellema_vdk-0.2.2/intellema_vdk/tts/__pycache__/__init__.cpython-312.pyc +0 -0
- intellema_vdk-0.2.2/intellema_vdk/tts/__pycache__/client.cpython-312.pyc +0 -0
- intellema_vdk-0.2.2/intellema_vdk/tts/__pycache__/exceptions.cpython-312.pyc +0 -0
- intellema_vdk-0.2.2/intellema_vdk/tts/__pycache__/providers.cpython-312.pyc +0 -0
- intellema_vdk-0.2.2/intellema_vdk/tts/client.py +541 -0
- intellema_vdk-0.2.2/intellema_vdk/tts/exceptions.py +15 -0
- intellema_vdk-0.2.2/intellema_vdk/tts/providers.py +293 -0
- intellema_vdk-0.2.2/intellema_vdk/utils/logger_config.py +41 -0
- intellema_vdk-0.2.2/intellema_vdk.egg-info/PKG-INFO +311 -0
- intellema_vdk-0.2.2/intellema_vdk.egg-info/SOURCES.txt +34 -0
- intellema_vdk-0.2.2/intellema_vdk.egg-info/requires.txt +31 -0
- intellema_vdk-0.2.2/pyproject.toml +52 -0
- intellema_vdk-0.2.2/requirements.txt +21 -0
- intellema_vdk-0.2.1/PKG-INFO +0 -221
- intellema_vdk-0.2.1/README.md +0 -174
- intellema_vdk-0.2.1/intellema_vdk/__init__.py +0 -35
- intellema_vdk-0.2.1/intellema_vdk/__pycache__/__init__.cpython-312.pyc +0 -0
- intellema_vdk-0.2.1/intellema_vdk/livekit_lib/__init__.py +0 -3
- intellema_vdk-0.2.1/intellema_vdk/livekit_lib/__pycache__/__init__.cpython-312.pyc +0 -0
- intellema_vdk-0.2.1/intellema_vdk/livekit_lib/__pycache__/client.cpython-312.pyc +0 -0
- intellema_vdk-0.2.1/intellema_vdk/livekit_lib/client.py +0 -280
- intellema_vdk-0.2.1/intellema_vdk/retell_lib/__pycache__/__init__.cpython-312.pyc +0 -0
- intellema_vdk-0.2.1/intellema_vdk/retell_lib/__pycache__/retell_client.cpython-312.pyc +0 -0
- intellema_vdk-0.2.1/intellema_vdk/retell_lib/retell_client.py +0 -248
- intellema_vdk-0.2.1/intellema_vdk/speech_lib/__init__.py +0 -2
- intellema_vdk-0.2.1/intellema_vdk/speech_lib/__pycache__/__init__.cpython-312.pyc +0 -0
- intellema_vdk-0.2.1/intellema_vdk/speech_lib/__pycache__/stt_client.cpython-312.pyc +0 -0
- intellema_vdk-0.2.1/intellema_vdk/speech_lib/__pycache__/tts_streamer.cpython-312.pyc +0 -0
- intellema_vdk-0.2.1/intellema_vdk/speech_lib/stt_client.py +0 -110
- intellema_vdk-0.2.1/intellema_vdk/speech_lib/tts_streamer.py +0 -188
- intellema_vdk-0.2.1/intellema_vdk.egg-info/PKG-INFO +0 -221
- intellema_vdk-0.2.1/intellema_vdk.egg-info/SOURCES.txt +0 -27
- intellema_vdk-0.2.1/intellema_vdk.egg-info/requires.txt +0 -12
- intellema_vdk-0.2.1/pyproject.toml +0 -38
- intellema_vdk-0.2.1/requirements.txt +0 -12
- {intellema_vdk-0.2.1 → intellema_vdk-0.2.2}/LICENSE +0 -0
- /intellema_vdk-0.2.1/intellema_vdk/retell_lib/__init__.py → /intellema_vdk-0.2.2/intellema_vdk/stt/providers.py +0 -0
- {intellema_vdk-0.2.1 → intellema_vdk-0.2.2}/intellema_vdk.egg-info/dependency_links.txt +0 -0
- {intellema_vdk-0.2.1 → intellema_vdk-0.2.2}/intellema_vdk.egg-info/top_level.txt +0 -0
- {intellema_vdk-0.2.1 → intellema_vdk-0.2.2}/setup.cfg +0 -0
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: intellema-vdk
|
|
3
|
+
Version: 0.2.2
|
|
4
|
+
Summary: A Voice Development Kit for different Voice Agent Platforms
|
|
5
|
+
Author: Intellema
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 Intellema
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Classifier: Programming Language :: Python :: 3
|
|
29
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
30
|
+
Classifier: Operating System :: OS Independent
|
|
31
|
+
Requires-Python: >=3.8
|
|
32
|
+
Description-Content-Type: text/markdown
|
|
33
|
+
License-File: LICENSE
|
|
34
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
35
|
+
Requires-Dist: requests>=2.31.0
|
|
36
|
+
Requires-Dist: httpx>=0.24.0
|
|
37
|
+
Provides-Extra: livekit
|
|
38
|
+
Requires-Dist: livekit-api>=1.1.0; extra == "livekit"
|
|
39
|
+
Requires-Dist: boto3>=1.28.0; extra == "livekit"
|
|
40
|
+
Provides-Extra: retell
|
|
41
|
+
Requires-Dist: retell-sdk>=2.0.0; extra == "retell"
|
|
42
|
+
Requires-Dist: twilio>=8.0.0; extra == "retell"
|
|
43
|
+
Requires-Dist: boto3>=1.28.0; extra == "retell"
|
|
44
|
+
Provides-Extra: stt
|
|
45
|
+
Requires-Dist: openai>=1.0.0; extra == "stt"
|
|
46
|
+
Provides-Extra: tts
|
|
47
|
+
Requires-Dist: together>=1.0.0; extra == "tts"
|
|
48
|
+
Requires-Dist: openai>=1.0.0; extra == "tts"
|
|
49
|
+
Provides-Extra: audio
|
|
50
|
+
Requires-Dist: pyaudio>=0.2.13; extra == "audio"
|
|
51
|
+
Provides-Extra: all
|
|
52
|
+
Requires-Dist: livekit-api>=1.1.0; extra == "all"
|
|
53
|
+
Requires-Dist: retell-sdk>=2.0.0; extra == "all"
|
|
54
|
+
Requires-Dist: twilio>=8.0.0; extra == "all"
|
|
55
|
+
Requires-Dist: boto3>=1.28.0; extra == "all"
|
|
56
|
+
Requires-Dist: openai>=1.0.0; extra == "all"
|
|
57
|
+
Requires-Dist: together>=1.0.0; extra == "all"
|
|
58
|
+
Requires-Dist: pyaudio>=0.2.13; extra == "all"
|
|
59
|
+
Dynamic: license-file
|
|
60
|
+
|
|
61
|
+
# Intellema VDK
|
|
62
|
+
|
|
63
|
+
Intellema VDK is a unified Voice Development Kit that simplifies integration with voice agent platforms like LiveKit and Retell AI. Build scalable voice applications with a consistent, provider-agnostic API.
|
|
64
|
+
|
|
65
|
+
## Features
|
|
66
|
+
|
|
67
|
+
- **Voice Providers**: LiveKit and Retell AI support with unified interface
|
|
68
|
+
- **Outbound Calling**: Initiate phone calls via SIP trunks
|
|
69
|
+
- **Speech-to-Text**: Transcribe audio with OpenAI Whisper
|
|
70
|
+
- **Text-to-Speech**: Low-latency streaming TTS via Together AI
|
|
71
|
+
- **Recording & Streaming**: Save to S3 or stream to RTMP
|
|
72
|
+
- **Participant Management**: Tokens, muting, kick controls
|
|
73
|
+
- **Real-time Messaging**: Send data packets during calls
|
|
74
|
+
|
|
75
|
+
## Quick Start
|
|
76
|
+
|
|
77
|
+
### Installation
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
# Minimal installation (core dependencies only)
|
|
81
|
+
pip install intellema-vdk
|
|
82
|
+
|
|
83
|
+
# Install with specific provider support
|
|
84
|
+
pip install intellema-vdk[livekit] # LiveKit voice provider
|
|
85
|
+
pip install intellema-vdk[retell] # Retell voice provider
|
|
86
|
+
pip install intellema-vdk[stt] # Speech-to-Text features
|
|
87
|
+
pip install intellema-vdk[tts] # Text-to-Speech features
|
|
88
|
+
pip install intellema-vdk[audio] # Audio playback (PyAudio)
|
|
89
|
+
|
|
90
|
+
# Install all features
|
|
91
|
+
pip install intellema-vdk[all]
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
**Requirements:** Python 3.8+
|
|
95
|
+
|
|
96
|
+
**Note on PyAudio:** The `audio` extra requires PortAudio to be installed on your system:
|
|
97
|
+
- **Windows**: Usually works with `pip install pyaudio`, or use `pipwin install pyaudio`
|
|
98
|
+
- **macOS**: `brew install portaudio && pip install pyaudio`
|
|
99
|
+
- **Linux**: `sudo apt-get install portaudio19-dev && pip install pyaudio`
|
|
100
|
+
|
|
101
|
+
The package will automatically install required dependencies when you first use a feature.
|
|
102
|
+
|
|
103
|
+
### Minimal Example
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
import asyncio
|
|
107
|
+
from intellema_vdk import VoiceClient
|
|
108
|
+
|
|
109
|
+
async def main() -> None:
|
|
110
|
+
client = VoiceClient("livekit") # or "retell"
|
|
111
|
+
|
|
112
|
+
call_id: str = await client.start_outbound_call(
|
|
113
|
+
phone_number="+15551234567",
|
|
114
|
+
prompt_content="Hello from VoxChain!"
|
|
115
|
+
)
|
|
116
|
+
print(f"Call started: {call_id}")
|
|
117
|
+
|
|
118
|
+
await client.close()
|
|
119
|
+
|
|
120
|
+
if __name__ == "__main__":
|
|
121
|
+
asyncio.run(main())
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### Configuration
|
|
125
|
+
|
|
126
|
+
Create a `.env` file with your credentials:
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
# LiveKit (if using)
|
|
130
|
+
LIVEKIT_URL=wss://your-livekit-server.com
|
|
131
|
+
LIVEKIT_API_KEY=your_api_key
|
|
132
|
+
LIVEKIT_API_SECRET=your_api_secret
|
|
133
|
+
SIP_OUTBOUND_TRUNK_ID=your_trunk_id
|
|
134
|
+
|
|
135
|
+
# Retell + Twilio (if using)
|
|
136
|
+
TWILIO_ACCOUNT_SID=your_sid
|
|
137
|
+
TWILIO_AUTH_TOKEN=your_token
|
|
138
|
+
TWILIO_PHONE_NUMBER=+15551234567
|
|
139
|
+
RETELL_API_KEY=your_retell_key
|
|
140
|
+
RETELL_AGENT_ID=your_agent_id
|
|
141
|
+
|
|
142
|
+
# STT
|
|
143
|
+
OPENAI_API_KEY=sk-your-key
|
|
144
|
+
AGENT_API_URL=https://your-agent-api.com/process # Optional
|
|
145
|
+
|
|
146
|
+
# TTS (set appropriate API key according to provider)
|
|
147
|
+
TOGETHER_API_KEY=your_together_key
|
|
148
|
+
OPENAI_API_KEY=your_openai_key
|
|
149
|
+
|
|
150
|
+
# Optional: AWS for recordings
|
|
151
|
+
AWS_ACCESS_KEY_ID=your_key
|
|
152
|
+
AWS_SECRET_ACCESS_KEY=your_secret
|
|
153
|
+
AWS_REGION=us-east-1
|
|
154
|
+
AWS_S3_BUCKET=your-bucket
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
See [docs/guides/configuration.md](docs/guides/configuration.md) for detailed setup.
|
|
158
|
+
|
|
159
|
+
## Core Modules
|
|
160
|
+
|
|
161
|
+
### Voice Providers
|
|
162
|
+
|
|
163
|
+
Choose between LiveKit or Retell for voice calls.
|
|
164
|
+
|
|
165
|
+
```python
|
|
166
|
+
from intellema_vdk import VoiceClient
|
|
167
|
+
|
|
168
|
+
# LiveKit for advanced features
|
|
169
|
+
livekit = VoiceClient("livekit")
|
|
170
|
+
|
|
171
|
+
# Retell for quick setup
|
|
172
|
+
retell = VoiceClient("retell")
|
|
173
|
+
|
|
174
|
+
# Common interface
|
|
175
|
+
call_id: str = await livekit.start_outbound_call("+15551234567", "Hello!")
|
|
176
|
+
await livekit.start_recording(call_id)
|
|
177
|
+
await livekit.delete_room(call_id)
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
**Detailed Documentation:**
|
|
181
|
+
- [docs/api/providers.md](docs/api/providers.md) - Full API reference with examples
|
|
182
|
+
- [docs/guides/examples.md](docs/guides/examples.md) - Complete usage patterns
|
|
183
|
+
|
|
184
|
+
**Important for Retell:**
|
|
185
|
+
Before making calls, register your Twilio number:
|
|
186
|
+
```bash
|
|
187
|
+
python import_phone_number.py
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
### Speech-to-Text (STT)
|
|
191
|
+
|
|
192
|
+
Transcribe audio files with OpenAI Whisper - supports single files and batch processing:
|
|
193
|
+
|
|
194
|
+
```python
|
|
195
|
+
from intellema_vdk import STTManager
|
|
196
|
+
|
|
197
|
+
async def transcribe() -> None:
|
|
198
|
+
stt = STTManager()
|
|
199
|
+
try:
|
|
200
|
+
# Single file
|
|
201
|
+
result = await stt.transcribe_audio("recording.wav")
|
|
202
|
+
print(result["text"])
|
|
203
|
+
|
|
204
|
+
# Batch process folder
|
|
205
|
+
results = await stt.transcribe_audio(
|
|
206
|
+
"recordings/",
|
|
207
|
+
batch_process=True,
|
|
208
|
+
output_file="transcripts.json"
|
|
209
|
+
)
|
|
210
|
+
finally:
|
|
211
|
+
await stt.close()
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
**Detailed Documentation:** [docs/api/stt.md](docs/api/stt.md)
|
|
215
|
+
|
|
216
|
+
### Text-to-Speech (TTS)
|
|
217
|
+
|
|
218
|
+
Stream text to audio in real-time with support for multiple providers:
|
|
219
|
+
|
|
220
|
+
```python
|
|
221
|
+
from intellema_vdk import TTSStreamer
|
|
222
|
+
|
|
223
|
+
# Together AI (low latency)
|
|
224
|
+
tts = TTSStreamer(provider="together")
|
|
225
|
+
|
|
226
|
+
# OpenAI (high quality, 6 voices)
|
|
227
|
+
tts = TTSStreamer(
|
|
228
|
+
provider="openai",
|
|
229
|
+
voice="nova", # alloy, echo, fable, onyx, nova, shimmer
|
|
230
|
+
model="tts-1-hd" # tts-1 or tts-1-hd
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
# Feed text as it's generated
|
|
234
|
+
for chunk in llm_stream:
|
|
235
|
+
tts.feed(chunk)
|
|
236
|
+
|
|
237
|
+
tts.flush() # Wait for completion
|
|
238
|
+
tts.close()
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
**Detailed Documentation:** [docs/api/tts.md](docs/api/tts.md)
|
|
242
|
+
|
|
243
|
+
**Sample Implementation:** Run the included chatbot demo:
|
|
244
|
+
```bash
|
|
245
|
+
python sample_implementation.py
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
## Advanced Usage
|
|
249
|
+
|
|
250
|
+
### Logging
|
|
251
|
+
|
|
252
|
+
Configure logging to see VDK internals:
|
|
253
|
+
|
|
254
|
+
```python
|
|
255
|
+
from intellema_vdk import setup_logging
|
|
256
|
+
|
|
257
|
+
setup_logging() # INFO level by default
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
Custom configuration:
|
|
261
|
+
|
|
262
|
+
```python
|
|
263
|
+
import logging
|
|
264
|
+
setup_logging(
|
|
265
|
+
log_level=logging.DEBUG,
|
|
266
|
+
log_format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
267
|
+
)
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
### Recording Calls
|
|
271
|
+
|
|
272
|
+
```python
|
|
273
|
+
# LiveKit or Retell
|
|
274
|
+
recording_id: str = await client.start_recording(
|
|
275
|
+
call_id=call_id,
|
|
276
|
+
upload_to_s3=True,
|
|
277
|
+
wait_for_completion=False
|
|
278
|
+
)
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
### Streaming to RTMP
|
|
282
|
+
|
|
283
|
+
```python
|
|
284
|
+
await client.start_stream(
|
|
285
|
+
call_id=call_id,
|
|
286
|
+
rtmp_urls=["rtmp://your-server.com/live/key"]
|
|
287
|
+
)
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
## Documentation
|
|
291
|
+
|
|
292
|
+
- **[Getting Started Guide](docs/guides/getting_started.md)** - Setup and first steps
|
|
293
|
+
- **[Configuration Guide](docs/guides/configuration.md)** - Environment variables
|
|
294
|
+
- **[Examples](docs/guides/examples.md)** - Common usage patterns
|
|
295
|
+
- **API Reference:**
|
|
296
|
+
- [Voice Providers](docs/api/providers.md) - LiveKit & Retell
|
|
297
|
+
- [STT](docs/api/stt.md) - Speech-to-Text
|
|
298
|
+
- [TTS](docs/api/tts.md) - Text-to-Speech
|
|
299
|
+
|
|
300
|
+
## Important Notes
|
|
301
|
+
|
|
302
|
+
- **Retell `delete_room` Limitation**: Only works if the user speaks, triggering the agent to check the termination variable. For immediate hangup, use Twilio API directly.
|
|
303
|
+
- **Retell Recording**: Retell automatically records calls. The `start_recording` method retrieves the recording URL after the call ends (no need to explicitly start recording during the call). Ensure recording is enabled for your Retell agent in the dashboard.
|
|
304
|
+
- **Retell Audio Streaming**: Real-time audio streaming (`start_stream`) is **not supported** for Retell phone calls. Retell deprecated their Audio WebSocket API at the end of 2024. Use `start_recording()` to retrieve recordings after the call ends.
|
|
305
|
+
- **Type Safety**: All examples include type annotations for better IDE support.
|
|
306
|
+
- **Async Required**: All voice and STT operations are async; use `asyncio.run()`.
|
|
307
|
+
|
|
308
|
+
## License
|
|
309
|
+
|
|
310
|
+
See [LICENSE](LICENSE) file for details.
|
|
311
|
+
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
# Intellema VDK
|
|
2
|
+
|
|
3
|
+
Intellema VDK is a unified Voice Development Kit that simplifies integration with voice agent platforms like LiveKit and Retell AI. Build scalable voice applications with a consistent, provider-agnostic API.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Voice Providers**: LiveKit and Retell AI support with unified interface
|
|
8
|
+
- **Outbound Calling**: Initiate phone calls via SIP trunks
|
|
9
|
+
- **Speech-to-Text**: Transcribe audio with OpenAI Whisper
|
|
10
|
+
- **Text-to-Speech**: Low-latency streaming TTS via Together AI
|
|
11
|
+
- **Recording & Streaming**: Save to S3 or stream to RTMP
|
|
12
|
+
- **Participant Management**: Tokens, muting, kick controls
|
|
13
|
+
- **Real-time Messaging**: Send data packets during calls
|
|
14
|
+
|
|
15
|
+
## Quick Start
|
|
16
|
+
|
|
17
|
+
### Installation
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
# Minimal installation (core dependencies only)
|
|
21
|
+
pip install intellema-vdk
|
|
22
|
+
|
|
23
|
+
# Install with specific provider support
|
|
24
|
+
pip install intellema-vdk[livekit] # LiveKit voice provider
|
|
25
|
+
pip install intellema-vdk[retell] # Retell voice provider
|
|
26
|
+
pip install intellema-vdk[stt] # Speech-to-Text features
|
|
27
|
+
pip install intellema-vdk[tts] # Text-to-Speech features
|
|
28
|
+
pip install intellema-vdk[audio] # Audio playback (PyAudio)
|
|
29
|
+
|
|
30
|
+
# Install all features
|
|
31
|
+
pip install intellema-vdk[all]
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
**Requirements:** Python 3.8+
|
|
35
|
+
|
|
36
|
+
**Note on PyAudio:** The `audio` extra requires PortAudio to be installed on your system:
|
|
37
|
+
- **Windows**: Usually works with `pip install pyaudio`, or use `pipwin install pyaudio`
|
|
38
|
+
- **macOS**: `brew install portaudio && pip install pyaudio`
|
|
39
|
+
- **Linux**: `sudo apt-get install portaudio19-dev && pip install pyaudio`
|
|
40
|
+
|
|
41
|
+
The package will automatically install required dependencies when you first use a feature.
|
|
42
|
+
|
|
43
|
+
### Minimal Example
|
|
44
|
+
|
|
45
|
+
```python
|
|
46
|
+
import asyncio
|
|
47
|
+
from intellema_vdk import VoiceClient
|
|
48
|
+
|
|
49
|
+
async def main() -> None:
|
|
50
|
+
client = VoiceClient("livekit") # or "retell"
|
|
51
|
+
|
|
52
|
+
call_id: str = await client.start_outbound_call(
|
|
53
|
+
phone_number="+15551234567",
|
|
54
|
+
prompt_content="Hello from VoxChain!"
|
|
55
|
+
)
|
|
56
|
+
print(f"Call started: {call_id}")
|
|
57
|
+
|
|
58
|
+
await client.close()
|
|
59
|
+
|
|
60
|
+
if __name__ == "__main__":
|
|
61
|
+
asyncio.run(main())
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### Configuration
|
|
65
|
+
|
|
66
|
+
Create a `.env` file with your credentials:
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
# LiveKit (if using)
|
|
70
|
+
LIVEKIT_URL=wss://your-livekit-server.com
|
|
71
|
+
LIVEKIT_API_KEY=your_api_key
|
|
72
|
+
LIVEKIT_API_SECRET=your_api_secret
|
|
73
|
+
SIP_OUTBOUND_TRUNK_ID=your_trunk_id
|
|
74
|
+
|
|
75
|
+
# Retell + Twilio (if using)
|
|
76
|
+
TWILIO_ACCOUNT_SID=your_sid
|
|
77
|
+
TWILIO_AUTH_TOKEN=your_token
|
|
78
|
+
TWILIO_PHONE_NUMBER=+15551234567
|
|
79
|
+
RETELL_API_KEY=your_retell_key
|
|
80
|
+
RETELL_AGENT_ID=your_agent_id
|
|
81
|
+
|
|
82
|
+
# STT
|
|
83
|
+
OPENAI_API_KEY=sk-your-key
|
|
84
|
+
AGENT_API_URL=https://your-agent-api.com/process # Optional
|
|
85
|
+
|
|
86
|
+
# TTS (set appropriate API key according to provider)
|
|
87
|
+
TOGETHER_API_KEY=your_together_key
|
|
88
|
+
OPENAI_API_KEY=your_openai_key
|
|
89
|
+
|
|
90
|
+
# Optional: AWS for recordings
|
|
91
|
+
AWS_ACCESS_KEY_ID=your_key
|
|
92
|
+
AWS_SECRET_ACCESS_KEY=your_secret
|
|
93
|
+
AWS_REGION=us-east-1
|
|
94
|
+
AWS_S3_BUCKET=your-bucket
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
See [docs/guides/configuration.md](docs/guides/configuration.md) for detailed setup.
|
|
98
|
+
|
|
99
|
+
## Core Modules
|
|
100
|
+
|
|
101
|
+
### Voice Providers
|
|
102
|
+
|
|
103
|
+
Choose between LiveKit or Retell for voice calls.
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
from intellema_vdk import VoiceClient
|
|
107
|
+
|
|
108
|
+
# LiveKit for advanced features
|
|
109
|
+
livekit = VoiceClient("livekit")
|
|
110
|
+
|
|
111
|
+
# Retell for quick setup
|
|
112
|
+
retell = VoiceClient("retell")
|
|
113
|
+
|
|
114
|
+
# Common interface
|
|
115
|
+
call_id: str = await livekit.start_outbound_call("+15551234567", "Hello!")
|
|
116
|
+
await livekit.start_recording(call_id)
|
|
117
|
+
await livekit.delete_room(call_id)
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
**Detailed Documentation:**
|
|
121
|
+
- [docs/api/providers.md](docs/api/providers.md) - Full API reference with examples
|
|
122
|
+
- [docs/guides/examples.md](docs/guides/examples.md) - Complete usage patterns
|
|
123
|
+
|
|
124
|
+
**Important for Retell:**
|
|
125
|
+
Before making calls, register your Twilio number:
|
|
126
|
+
```bash
|
|
127
|
+
python import_phone_number.py
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### Speech-to-Text (STT)
|
|
131
|
+
|
|
132
|
+
Transcribe audio files with OpenAI Whisper - supports single files and batch processing:
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
from intellema_vdk import STTManager
|
|
136
|
+
|
|
137
|
+
async def transcribe() -> None:
|
|
138
|
+
stt = STTManager()
|
|
139
|
+
try:
|
|
140
|
+
# Single file
|
|
141
|
+
result = await stt.transcribe_audio("recording.wav")
|
|
142
|
+
print(result["text"])
|
|
143
|
+
|
|
144
|
+
# Batch process folder
|
|
145
|
+
results = await stt.transcribe_audio(
|
|
146
|
+
"recordings/",
|
|
147
|
+
batch_process=True,
|
|
148
|
+
output_file="transcripts.json"
|
|
149
|
+
)
|
|
150
|
+
finally:
|
|
151
|
+
await stt.close()
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
**Detailed Documentation:** [docs/api/stt.md](docs/api/stt.md)
|
|
155
|
+
|
|
156
|
+
### Text-to-Speech (TTS)
|
|
157
|
+
|
|
158
|
+
Stream text to audio in real-time with support for multiple providers:
|
|
159
|
+
|
|
160
|
+
```python
|
|
161
|
+
from intellema_vdk import TTSStreamer
|
|
162
|
+
|
|
163
|
+
# Together AI (low latency)
|
|
164
|
+
tts = TTSStreamer(provider="together")
|
|
165
|
+
|
|
166
|
+
# OpenAI (high quality, 6 voices)
|
|
167
|
+
tts = TTSStreamer(
|
|
168
|
+
provider="openai",
|
|
169
|
+
voice="nova", # alloy, echo, fable, onyx, nova, shimmer
|
|
170
|
+
model="tts-1-hd" # tts-1 or tts-1-hd
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# Feed text as it's generated
|
|
174
|
+
for chunk in llm_stream:
|
|
175
|
+
tts.feed(chunk)
|
|
176
|
+
|
|
177
|
+
tts.flush() # Wait for completion
|
|
178
|
+
tts.close()
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
**Detailed Documentation:** [docs/api/tts.md](docs/api/tts.md)
|
|
182
|
+
|
|
183
|
+
**Sample Implementation:** Run the included chatbot demo:
|
|
184
|
+
```bash
|
|
185
|
+
python sample_implementation.py
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
## Advanced Usage
|
|
189
|
+
|
|
190
|
+
### Logging
|
|
191
|
+
|
|
192
|
+
Configure logging to see VDK internals:
|
|
193
|
+
|
|
194
|
+
```python
|
|
195
|
+
from intellema_vdk import setup_logging
|
|
196
|
+
|
|
197
|
+
setup_logging() # INFO level by default
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
Custom configuration:
|
|
201
|
+
|
|
202
|
+
```python
|
|
203
|
+
import logging
|
|
204
|
+
setup_logging(
|
|
205
|
+
log_level=logging.DEBUG,
|
|
206
|
+
log_format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
207
|
+
)
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### Recording Calls
|
|
211
|
+
|
|
212
|
+
```python
|
|
213
|
+
# LiveKit or Retell
|
|
214
|
+
recording_id: str = await client.start_recording(
|
|
215
|
+
call_id=call_id,
|
|
216
|
+
upload_to_s3=True,
|
|
217
|
+
wait_for_completion=False
|
|
218
|
+
)
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
### Streaming to RTMP
|
|
222
|
+
|
|
223
|
+
```python
|
|
224
|
+
await client.start_stream(
|
|
225
|
+
call_id=call_id,
|
|
226
|
+
rtmp_urls=["rtmp://your-server.com/live/key"]
|
|
227
|
+
)
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
## Documentation
|
|
231
|
+
|
|
232
|
+
- **[Getting Started Guide](docs/guides/getting_started.md)** - Setup and first steps
|
|
233
|
+
- **[Configuration Guide](docs/guides/configuration.md)** - Environment variables
|
|
234
|
+
- **[Examples](docs/guides/examples.md)** - Common usage patterns
|
|
235
|
+
- **API Reference:**
|
|
236
|
+
- [Voice Providers](docs/api/providers.md) - LiveKit & Retell
|
|
237
|
+
- [STT](docs/api/stt.md) - Speech-to-Text
|
|
238
|
+
- [TTS](docs/api/tts.md) - Text-to-Speech
|
|
239
|
+
|
|
240
|
+
## Important Notes
|
|
241
|
+
|
|
242
|
+
- **Retell `delete_room` Limitation**: Only works if the user speaks, triggering the agent to check the termination variable. For immediate hangup, use Twilio API directly.
|
|
243
|
+
- **Retell Recording**: Retell automatically records calls. The `start_recording` method retrieves the recording URL after the call ends (no need to explicitly start recording during the call). Ensure recording is enabled for your Retell agent in the dashboard.
|
|
244
|
+
- **Retell Audio Streaming**: Real-time audio streaming (`start_stream`) is **not supported** for Retell phone calls. Retell deprecated their Audio WebSocket API at the end of 2024. Use `start_recording()` to retrieve recordings after the call ends.
|
|
245
|
+
- **Type Safety**: All examples include type annotations for better IDE support.
|
|
246
|
+
- **Async Required**: All voice and STT operations are async; use `asyncio.run()`.
|
|
247
|
+
|
|
248
|
+
## License
|
|
249
|
+
|
|
250
|
+
See [LICENSE](LICENSE) file for details.
|
|
251
|
+
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
from typing import Optional, List, Any
|
|
2
|
+
|
|
3
|
+
from .providers import (
|
|
4
|
+
VoiceProvider,
|
|
5
|
+
LiveKitManager,
|
|
6
|
+
RetellManager,
|
|
7
|
+
# LiveKit Exceptions
|
|
8
|
+
LiveKitError,
|
|
9
|
+
LiveKitConfigurationError,
|
|
10
|
+
LiveKitRoomError,
|
|
11
|
+
LiveKitSIPError,
|
|
12
|
+
LiveKitDispatchError,
|
|
13
|
+
LiveKitEgressError,
|
|
14
|
+
# Retell Exceptions
|
|
15
|
+
RetellError,
|
|
16
|
+
RetellConfigurationError,
|
|
17
|
+
RetellAPIError,
|
|
18
|
+
RetellPhoneNumberError,
|
|
19
|
+
RetellCallError,
|
|
20
|
+
)
|
|
21
|
+
from .stt import (
|
|
22
|
+
STTManager,
|
|
23
|
+
STTAgentError,
|
|
24
|
+
STTConfigurationError,
|
|
25
|
+
STTError,
|
|
26
|
+
STTFileError,
|
|
27
|
+
STTTranscriptionError
|
|
28
|
+
)
|
|
29
|
+
from .tts import (
|
|
30
|
+
TTSStreamer,
|
|
31
|
+
TTSError,
|
|
32
|
+
TTSConfigurationError,
|
|
33
|
+
TTSStreamError,
|
|
34
|
+
TTSAPIError,
|
|
35
|
+
)
|
|
36
|
+
from .utils.logger_config import setup_logging
|
|
37
|
+
|
|
38
|
+
__all__ = [
|
|
39
|
+
"VoiceClient",
|
|
40
|
+
"start_outbound_call",
|
|
41
|
+
"VoiceProvider",
|
|
42
|
+
"LiveKitManager",
|
|
43
|
+
"RetellManager",
|
|
44
|
+
"STTManager",
|
|
45
|
+
"TTSStreamer",
|
|
46
|
+
"setup_logging",
|
|
47
|
+
"LiveKitError",
|
|
48
|
+
"LiveKitConfigurationError",
|
|
49
|
+
"LiveKitRoomError",
|
|
50
|
+
"LiveKitSIPError",
|
|
51
|
+
"LiveKitDispatchError",
|
|
52
|
+
"LiveKitEgressError",
|
|
53
|
+
"RetellError",
|
|
54
|
+
"RetellConfigurationError",
|
|
55
|
+
"RetellAPIError",
|
|
56
|
+
"RetellPhoneNumberError",
|
|
57
|
+
"RetellCallError",
|
|
58
|
+
"STTAgentError",
|
|
59
|
+
"STTConfigurationError",
|
|
60
|
+
"STTError",
|
|
61
|
+
"STTFileError",
|
|
62
|
+
"STTTranscriptionError",
|
|
63
|
+
"TTSError",
|
|
64
|
+
"TTSConfigurationError",
|
|
65
|
+
"TTSStreamError",
|
|
66
|
+
"TTSAPIError",
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
def VoiceClient(provider: str, **kwargs) -> VoiceProvider:
|
|
70
|
+
"""
|
|
71
|
+
Factory function that returns a specific provider client.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
provider: "livekit" or "retell"
|
|
75
|
+
**kwargs: Arguments passed to the manager's constructor
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
An instance of LiveKitManager or RetellManager
|
|
79
|
+
"""
|
|
80
|
+
if provider == "livekit":
|
|
81
|
+
return LiveKitManager(**kwargs)
|
|
82
|
+
elif provider == "retell":
|
|
83
|
+
return RetellManager(**kwargs)
|
|
84
|
+
else:
|
|
85
|
+
raise ValueError(f"Unknown provider: {provider}. Supported providers: 'livekit', 'retell'")
|
|
86
|
+
|
|
87
|
+
async def start_outbound_call(provider: str, *args, **kwargs):
|
|
88
|
+
"""
|
|
89
|
+
Convenience wrapper to start an outbound call.
|
|
90
|
+
"""
|
|
91
|
+
client = VoiceClient(provider)
|
|
92
|
+
return await client.start_outbound_call(*args, **kwargs)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from dotenv import load_dotenv
|
|
3
|
+
|
|
4
|
+
# Load environment variables
|
|
5
|
+
load_dotenv(dotenv_path=".env.local")
|
|
6
|
+
load_dotenv()
|
|
7
|
+
|
|
8
|
+
def get_env(key: str, default: str = None) -> str:
|
|
9
|
+
"""Get an environment variable."""
|
|
10
|
+
return os.getenv(key, default)
|
|
11
|
+
|
|
12
|
+
# TTS Configuration
|
|
13
|
+
TTS_AUDIO_SAMPLE_RATE = 24000
|
|
14
|
+
WAV_HEADER_SIZE = 44
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from .livekit import (
|
|
2
|
+
LiveKitManager,
|
|
3
|
+
LiveKitError,
|
|
4
|
+
LiveKitConfigurationError,
|
|
5
|
+
LiveKitRoomError,
|
|
6
|
+
LiveKitSIPError,
|
|
7
|
+
LiveKitDispatchError,
|
|
8
|
+
LiveKitEgressError,
|
|
9
|
+
)
|
|
10
|
+
from .retell import (
|
|
11
|
+
RetellManager,
|
|
12
|
+
RetellError,
|
|
13
|
+
RetellConfigurationError,
|
|
14
|
+
RetellAPIError,
|
|
15
|
+
RetellPhoneNumberError,
|
|
16
|
+
RetellCallError,
|
|
17
|
+
)
|
|
18
|
+
from .protocols import VoiceProvider
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"LiveKitManager",
|
|
22
|
+
"LiveKitError",
|
|
23
|
+
"LiveKitConfigurationError",
|
|
24
|
+
"LiveKitRoomError",
|
|
25
|
+
"LiveKitSIPError",
|
|
26
|
+
"LiveKitDispatchError",
|
|
27
|
+
"LiveKitEgressError",
|
|
28
|
+
"RetellManager",
|
|
29
|
+
"RetellError",
|
|
30
|
+
"RetellConfigurationError",
|
|
31
|
+
"RetellAPIError",
|
|
32
|
+
"RetellPhoneNumberError",
|
|
33
|
+
"RetellCallError",
|
|
34
|
+
"VoiceProvider",
|
|
35
|
+
]
|