intellema-vdk 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. intellema_vdk/__init__.py +67 -10
  2. intellema_vdk/config.py +14 -0
  3. intellema_vdk/providers/__init__.py +35 -0
  4. intellema_vdk/providers/livekit/__init__.py +19 -0
  5. intellema_vdk/providers/livekit/client.py +612 -0
  6. intellema_vdk/providers/livekit/exceptions.py +23 -0
  7. intellema_vdk/providers/protocols.py +33 -0
  8. intellema_vdk/providers/retell/__init__.py +17 -0
  9. intellema_vdk/providers/retell/client.py +468 -0
  10. intellema_vdk/providers/retell/exceptions.py +19 -0
  11. intellema_vdk/{retell_lib → providers/retell}/import_phone_number.py +1 -1
  12. intellema_vdk/stt/__init__.py +17 -0
  13. intellema_vdk/stt/client.py +482 -0
  14. intellema_vdk/stt/exceptions.py +19 -0
  15. intellema_vdk/tts/__init__.py +15 -0
  16. intellema_vdk/tts/__pycache__/__init__.cpython-312.pyc +0 -0
  17. intellema_vdk/tts/__pycache__/client.cpython-312.pyc +0 -0
  18. intellema_vdk/tts/__pycache__/exceptions.cpython-312.pyc +0 -0
  19. intellema_vdk/tts/__pycache__/providers.cpython-312.pyc +0 -0
  20. intellema_vdk/tts/client.py +541 -0
  21. intellema_vdk/tts/exceptions.py +15 -0
  22. intellema_vdk/tts/providers.py +293 -0
  23. intellema_vdk/utils/logger_config.py +41 -0
  24. intellema_vdk-0.2.2.dist-info/METADATA +311 -0
  25. intellema_vdk-0.2.2.dist-info/RECORD +29 -0
  26. {intellema_vdk-0.2.0.dist-info → intellema_vdk-0.2.2.dist-info}/WHEEL +1 -1
  27. intellema_vdk/livekit_lib/__init__.py +0 -3
  28. intellema_vdk/livekit_lib/client.py +0 -280
  29. intellema_vdk/retell_lib/retell_client.py +0 -248
  30. intellema_vdk/speech_lib/__init__.py +0 -2
  31. intellema_vdk/speech_lib/stt_client.py +0 -108
  32. intellema_vdk/speech_lib/tts_streamer.py +0 -188
  33. intellema_vdk-0.2.0.dist-info/METADATA +0 -221
  34. intellema_vdk-0.2.0.dist-info/RECORD +0 -14
  35. /intellema_vdk/{retell_lib/__init__.py → stt/providers.py} +0 -0
  36. {intellema_vdk-0.2.0.dist-info → intellema_vdk-0.2.2.dist-info}/licenses/LICENSE +0 -0
  37. {intellema_vdk-0.2.0.dist-info → intellema_vdk-0.2.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,293 @@
1
+ """TTS provider implementations for Together AI and OpenAI."""
2
+
3
+ from typing import Iterator, Protocol, runtime_checkable, Literal, TypedDict
4
+ import logging
5
+ from ..config import (
6
+ WAV_HEADER_SIZE
7
+ )
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ # Type definitions for provider-specific configurations
12
+
13
+ TogetherTTSModel = Literal["canopylabs/orpheus-3b-0.1-ft"]
14
+ TogetherTTSVoice = Literal["tara"]
15
+
16
+ OpenAITTSModel = Literal["tts-1", "tts-1-hd"]
17
+ OpenAITTSVoice = Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
18
+
19
+
20
+ class TogetherTTSConfig(TypedDict, total=False):
21
+ """Configuration options for Together AI TTS provider."""
22
+ model: TogetherTTSModel
23
+ voice: TogetherTTSVoice
24
+
25
+
26
+ class OpenAITTSConfig(TypedDict, total=False):
27
+ """Configuration options for OpenAI TTS provider."""
28
+ model: OpenAITTSModel
29
+ voice: OpenAITTSVoice
30
+
31
+
32
+ @runtime_checkable
33
+ class TTSProvider(Protocol):
34
+ """
35
+ Protocol defining the interface for Text-to-Speech providers.
36
+
37
+ All TTS providers must implement the stream method to generate
38
+ audio from text input.
39
+ """
40
+
41
+ def stream(self, text: str) -> Iterator[bytes]:
42
+ """
43
+ Generate audio data from text input as a stream of bytes.
44
+
45
+ Args:
46
+ text: The text to convert to speech.
47
+
48
+ Returns:
49
+ An iterator yielding raw audio bytes (PCM format).
50
+
51
+ Raises:
52
+ Exception: If the TTS API call fails.
53
+ """
54
+ ...
55
+
56
+
57
+ class TogetherTTSProvider:
58
+ """
59
+ Together AI TTS provider implementation.
60
+
61
+ Uses the Together API to generate speech from text with the Orpheus model.
62
+ Supports streaming audio generation with low latency.
63
+
64
+ Attributes:
65
+ model: The TTS model identifier.
66
+ voice: The voice identifier for speech generation.
67
+ client: The Together API client instance.
68
+
69
+ Example:
70
+ >>> provider = TogetherTTSProvider(
71
+ ... api_key="your-api-key",
72
+ ... model="canopylabs/orpheus-3b-0.1-ft",
73
+ ... voice="tara"
74
+ ... )
75
+ >>> for audio_chunk in provider.stream("Hello world"):
76
+ ... # Process audio chunk
77
+ ... pass
78
+ """
79
+
80
+ def __init__(
81
+ self,
82
+ api_key: str,
83
+ model: TogetherTTSModel = "canopylabs/orpheus-3b-0.1-ft",
84
+ voice: TogetherTTSVoice = "tara"
85
+ ) -> None:
86
+ """
87
+ Initialize the Together TTS provider.
88
+
89
+ Args:
90
+ api_key: Together API key for authentication.
91
+ model: The TTS model to use. Currently supports:
92
+ - "canopylabs/orpheus-3b-0.1-ft" (default)
93
+ voice: The voice to use for speech generation. Currently supports:
94
+ - "tara" (default)
95
+
96
+ Raises:
97
+ ImportError: If the together package is not installed.
98
+ """
99
+ try:
100
+ from together import Together
101
+ except ImportError:
102
+ import subprocess
103
+ import sys
104
+ print("Together AI SDK is not installed. Installing now...")
105
+ print("Run: pip install intellema-vdk[tts]")
106
+ try:
107
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "together>=1.0.0"])
108
+ from together import Together
109
+ print("✓ Together AI SDK installed successfully!")
110
+ except Exception as e:
111
+ raise ImportError(
112
+ "Failed to install together. Please install manually:\n"
113
+ " pip install intellema-vdk[tts]\n"
114
+ "or:\n"
115
+ " pip install together>=1.0.0"
116
+ ) from e
117
+
118
+ self.api_key = api_key
119
+ self.model = model
120
+ self.voice = voice
121
+ self.client = Together(api_key=api_key)
122
+
123
+ def stream(self, text: str) -> Iterator[bytes]:
124
+ """
125
+ Stream audio from Together API.
126
+
127
+ Args:
128
+ text: The text to convert to speech.
129
+
130
+ Yields:
131
+ Raw audio bytes in PCM format.
132
+ """
133
+ try:
134
+ response = self.client.audio.speech.create(
135
+ model=self.model,
136
+ input=text,
137
+ voice=self.voice,
138
+ stream=True,
139
+ response_format="raw",
140
+ response_encoding="pcm_s16le",
141
+ )
142
+
143
+ # The Together API returns a generator directly
144
+ # Try to handle it as a byte stream first
145
+ for chunk in response:
146
+ # Handle tuple format first (most common for Together API)
147
+ if isinstance(chunk, tuple):
148
+ if len(chunk) > 1:
149
+ # Tuple format (metadata, data)
150
+ sub_iterator = chunk[1]
151
+ if isinstance(sub_iterator, (bytes, bytearray)):
152
+ processed = self._strip_wav_header(sub_iterator)
153
+ if len(processed) > 0:
154
+ yield processed
155
+ else:
156
+ try:
157
+ for sub_chunk in sub_iterator:
158
+ if isinstance(sub_chunk, (bytes, bytearray)):
159
+ processed = self._strip_wav_header(sub_chunk)
160
+ if len(processed) > 0:
161
+ yield processed
162
+ elif hasattr(sub_chunk, "data") and sub_chunk.data:
163
+ # TogetherResponse objects have data attribute
164
+ processed = self._strip_wav_header(sub_chunk.data)
165
+ if len(processed) > 0:
166
+ yield processed
167
+ elif hasattr(sub_chunk, "content") and sub_chunk.content:
168
+ processed = self._strip_wav_header(sub_chunk.content)
169
+ if len(processed) > 0:
170
+ yield processed
171
+ except TypeError as te:
172
+ logger.warning(f"Non-iterable sub-iterator: {type(sub_iterator)}, error: {te}")
173
+ # Handle different response structures
174
+ elif isinstance(chunk, (bytes, bytearray)):
175
+ # Direct bytes - this is what we expect
176
+ processed = self._strip_wav_header(chunk)
177
+ if len(processed) > 0:
178
+ yield processed
179
+ elif hasattr(chunk, "content"):
180
+ # Object with content attribute
181
+ processed = self._strip_wav_header(chunk.content)
182
+ if len(processed) > 0:
183
+ yield processed
184
+
185
+ except Exception as e:
186
+ logger.error(f"Together TTS stream error: {e}", exc_info=True)
187
+ raise
188
+
189
+ def _strip_wav_header(self, audio_data: bytes) -> bytes:
190
+ """Remove WAV header if present, returning raw PCM data."""
191
+ if len(audio_data) >= WAV_HEADER_SIZE and audio_data[:4] == b"RIFF":
192
+ return audio_data[WAV_HEADER_SIZE:]
193
+ return audio_data
194
+
195
+
196
+ class OpenAITTSProvider:
197
+ """
198
+ OpenAI TTS provider implementation.
199
+
200
+ Uses the OpenAI API to generate high-quality speech from text.
201
+ Supports multiple voices and quality levels.
202
+
203
+ Attributes:
204
+ model: The TTS model identifier.
205
+ voice: The voice identifier for speech generation.
206
+ client: The OpenAI API client instance.
207
+
208
+ Example:
209
+ >>> provider = OpenAITTSProvider(
210
+ ... api_key="your-api-key",
211
+ ... model="tts-1-hd",
212
+ ... voice="nova"
213
+ ... )
214
+ >>> for audio_chunk in provider.stream("Hello world"):
215
+ ... # Process audio chunk
216
+ ... pass
217
+ """
218
+
219
+ def __init__(
220
+ self,
221
+ api_key: str,
222
+ model: OpenAITTSModel = "tts-1",
223
+ voice: OpenAITTSVoice = "alloy"
224
+ ) -> None:
225
+ """
226
+ Initialize the OpenAI TTS provider.
227
+
228
+ Args:
229
+ api_key: OpenAI API key for authentication.
230
+ model: The TTS model to use:
231
+ - "tts-1": Standard quality, lower latency (default)
232
+ - "tts-1-hd": High definition quality, higher latency
233
+ voice: The voice to use for speech generation:
234
+ - "alloy": Neutral and balanced (default)
235
+ - "echo": Male voice
236
+ - "fable": British accent
237
+ - "onyx": Deep and authoritative
238
+ - "nova": Energetic and youthful
239
+ - "shimmer": Warm and expressive
240
+
241
+ Raises:
242
+ ImportError: If the openai package is not installed.
243
+ """
244
+ try:
245
+ from openai import OpenAI
246
+ except ImportError:
247
+ import subprocess
248
+ import sys
249
+ print("OpenAI SDK is not installed. Installing now...")
250
+ print("Run: pip install intellema-vdk[tts]")
251
+ try:
252
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "openai>=1.0.0"])
253
+ from openai import OpenAI
254
+ print("✓ OpenAI SDK installed successfully!")
255
+ except Exception as e:
256
+ raise ImportError(
257
+ "Failed to install openai. Please install manually:\n"
258
+ " pip install intellema-vdk[tts]\n"
259
+ "or:\n"
260
+ " pip install openai>=1.0.0"
261
+ ) from e
262
+
263
+ self.api_key = api_key
264
+ self.model = model
265
+ self.voice = voice
266
+ self.client = OpenAI(api_key=api_key)
267
+
268
+ def stream(self, text: str) -> Iterator[bytes]:
269
+ """
270
+ Stream audio from OpenAI API.
271
+
272
+ Args:
273
+ text: The text to convert to speech.
274
+
275
+ Yields:
276
+ Raw audio bytes in PCM format (converted from MP3/Opus).
277
+ """
278
+ try:
279
+ response = self.client.audio.speech.create(
280
+ model=self.model,
281
+ voice=self.voice,
282
+ input=text,
283
+ response_format="pcm" # Request PCM format directly
284
+ )
285
+
286
+ # OpenAI streaming response
287
+ for chunk in response.iter_bytes(chunk_size=4096):
288
+ if chunk:
289
+ yield chunk
290
+
291
+ except Exception as e:
292
+ logger.error(f"OpenAI TTS stream error: {e}", exc_info=True)
293
+ raise
@@ -0,0 +1,41 @@
1
+ import logging # Standard library for logging events.
2
+ import sys # Provides access to system-specific parameters and functions.
3
+
4
+
5
+ def setup_logging(
6
+ log_level: int = logging.INFO,
7
+ log_format: str = "%(asctime)s [%(levelname)s] %(name)s: %(message)s",
8
+ date_format: str = "%Y-%m-%d %H:%M:%S",
9
+ ) -> None:
10
+ """Configures basic console logging for the application.
11
+
12
+ This function should be called once at the beginning of the application's
13
+ execution to set up a consistent logging format and level. It configures
14
+ the root logger to output messages to the standard output.
15
+
16
+ Args:
17
+ log_level (int): The logging level for the root logger.
18
+ Defaults to `logging.INFO`.
19
+ Example accepted values: `logging.DEBUG`, `logging.INFO`,
20
+ `logging.WARNING`, `logging.ERROR`.
21
+ log_format (str): The format string for log messages.
22
+ Defaults to a standard format including timestamp, level, and name.
23
+ date_format (str): The format string for the date/time in log messages.
24
+ Defaults to "%Y-%m-%d %H:%M:%S".
25
+
26
+ Example:
27
+ >>> import logging
28
+ >>> # Set up logging with a DEBUG level
29
+ >>> setup_logging(log_level=logging.DEBUG)
30
+ >>>
31
+ >>> # Now, any logger will inherit this configuration
32
+ >>> logger = logging.getLogger("my_app")
33
+ >>> logger.debug("This is a debug message.")
34
+ >>> logger.info("This is an info message.")
35
+ """
36
+ logging.basicConfig(
37
+ level=log_level,
38
+ format=log_format,
39
+ datefmt=date_format,
40
+ stream=sys.stdout, # Direct logs to standard output.
41
+ )
@@ -0,0 +1,311 @@
1
+ Metadata-Version: 2.4
2
+ Name: intellema-vdk
3
+ Version: 0.2.2
4
+ Summary: A Voice Development Kit for different Voice Agent Platforms
5
+ Author: Intellema
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 Intellema
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Classifier: Programming Language :: Python :: 3
29
+ Classifier: License :: OSI Approved :: MIT License
30
+ Classifier: Operating System :: OS Independent
31
+ Requires-Python: >=3.8
32
+ Description-Content-Type: text/markdown
33
+ License-File: LICENSE
34
+ Requires-Dist: python-dotenv>=1.0.0
35
+ Requires-Dist: requests>=2.31.0
36
+ Requires-Dist: httpx>=0.24.0
37
+ Provides-Extra: livekit
38
+ Requires-Dist: livekit-api>=1.1.0; extra == "livekit"
39
+ Requires-Dist: boto3>=1.28.0; extra == "livekit"
40
+ Provides-Extra: retell
41
+ Requires-Dist: retell-sdk>=2.0.0; extra == "retell"
42
+ Requires-Dist: twilio>=8.0.0; extra == "retell"
43
+ Requires-Dist: boto3>=1.28.0; extra == "retell"
44
+ Provides-Extra: stt
45
+ Requires-Dist: openai>=1.0.0; extra == "stt"
46
+ Provides-Extra: tts
47
+ Requires-Dist: together>=1.0.0; extra == "tts"
48
+ Requires-Dist: openai>=1.0.0; extra == "tts"
49
+ Provides-Extra: audio
50
+ Requires-Dist: pyaudio>=0.2.13; extra == "audio"
51
+ Provides-Extra: all
52
+ Requires-Dist: livekit-api>=1.1.0; extra == "all"
53
+ Requires-Dist: retell-sdk>=2.0.0; extra == "all"
54
+ Requires-Dist: twilio>=8.0.0; extra == "all"
55
+ Requires-Dist: boto3>=1.28.0; extra == "all"
56
+ Requires-Dist: openai>=1.0.0; extra == "all"
57
+ Requires-Dist: together>=1.0.0; extra == "all"
58
+ Requires-Dist: pyaudio>=0.2.13; extra == "all"
59
+ Dynamic: license-file
60
+
61
+ # Intellema VDK
62
+
63
+ Intellema VDK is a unified Voice Development Kit that simplifies integration with voice agent platforms like LiveKit and Retell AI. Build scalable voice applications with a consistent, provider-agnostic API.
64
+
65
+ ## Features
66
+
67
+ - **Voice Providers**: LiveKit and Retell AI support with unified interface
68
+ - **Outbound Calling**: Initiate phone calls via SIP trunks
69
+ - **Speech-to-Text**: Transcribe audio with OpenAI Whisper
70
+ - **Text-to-Speech**: Low-latency streaming TTS via Together AI
71
+ - **Recording & Streaming**: Save to S3 or stream to RTMP
72
+ - **Participant Management**: Tokens, muting, kick controls
73
+ - **Real-time Messaging**: Send data packets during calls
74
+
75
+ ## Quick Start
76
+
77
+ ### Installation
78
+
79
+ ```bash
80
+ # Minimal installation (core dependencies only)
81
+ pip install intellema-vdk
82
+
83
+ # Install with specific provider support
84
+ pip install intellema-vdk[livekit] # LiveKit voice provider
85
+ pip install intellema-vdk[retell] # Retell voice provider
86
+ pip install intellema-vdk[stt] # Speech-to-Text features
87
+ pip install intellema-vdk[tts] # Text-to-Speech features
88
+ pip install intellema-vdk[audio] # Audio playback (PyAudio)
89
+
90
+ # Install all features
91
+ pip install intellema-vdk[all]
92
+ ```
93
+
94
+ **Requirements:** Python 3.8+
95
+
96
+ **Note on PyAudio:** The `audio` extra requires PortAudio to be installed on your system:
97
+ - **Windows**: Usually works with `pip install pyaudio`, or use `pipwin install pyaudio`
98
+ - **macOS**: `brew install portaudio && pip install pyaudio`
99
+ - **Linux**: `sudo apt-get install portaudio19-dev && pip install pyaudio`
100
+
101
+ The package will automatically install required dependencies when you first use a feature.
102
+
103
+ ### Minimal Example
104
+
105
+ ```python
106
+ import asyncio
107
+ from intellema_vdk import VoiceClient
108
+
109
+ async def main() -> None:
110
+ client = VoiceClient("livekit") # or "retell"
111
+
112
+ call_id: str = await client.start_outbound_call(
113
+ phone_number="+15551234567",
114
+ prompt_content="Hello from VoxChain!"
115
+ )
116
+ print(f"Call started: {call_id}")
117
+
118
+ await client.close()
119
+
120
+ if __name__ == "__main__":
121
+ asyncio.run(main())
122
+ ```
123
+
124
+ ### Configuration
125
+
126
+ Create a `.env` file with your credentials:
127
+
128
+ ```bash
129
+ # LiveKit (if using)
130
+ LIVEKIT_URL=wss://your-livekit-server.com
131
+ LIVEKIT_API_KEY=your_api_key
132
+ LIVEKIT_API_SECRET=your_api_secret
133
+ SIP_OUTBOUND_TRUNK_ID=your_trunk_id
134
+
135
+ # Retell + Twilio (if using)
136
+ TWILIO_ACCOUNT_SID=your_sid
137
+ TWILIO_AUTH_TOKEN=your_token
138
+ TWILIO_PHONE_NUMBER=+15551234567
139
+ RETELL_API_KEY=your_retell_key
140
+ RETELL_AGENT_ID=your_agent_id
141
+
142
+ # STT
143
+ OPENAI_API_KEY=sk-your-key
144
+ AGENT_API_URL=https://your-agent-api.com/process # Optional
145
+
146
+ # TTS (set appropriate API key according to provider)
147
+ TOGETHER_API_KEY=your_together_key
148
+ OPENAI_API_KEY=your_openai_key
149
+
150
+ # Optional: AWS for recordings
151
+ AWS_ACCESS_KEY_ID=your_key
152
+ AWS_SECRET_ACCESS_KEY=your_secret
153
+ AWS_REGION=us-east-1
154
+ AWS_S3_BUCKET=your-bucket
155
+ ```
156
+
157
+ See [docs/guides/configuration.md](docs/guides/configuration.md) for detailed setup.
158
+
159
+ ## Core Modules
160
+
161
+ ### Voice Providers
162
+
163
+ Choose between LiveKit or Retell for voice calls.
164
+
165
+ ```python
166
+ from intellema_vdk import VoiceClient
167
+
168
+ # LiveKit for advanced features
169
+ livekit = VoiceClient("livekit")
170
+
171
+ # Retell for quick setup
172
+ retell = VoiceClient("retell")
173
+
174
+ # Common interface
175
+ call_id: str = await livekit.start_outbound_call("+15551234567", "Hello!")
176
+ await livekit.start_recording(call_id)
177
+ await livekit.delete_room(call_id)
178
+ ```
179
+
180
+ **Detailed Documentation:**
181
+ - [docs/api/providers.md](docs/api/providers.md) - Full API reference with examples
182
+ - [docs/guides/examples.md](docs/guides/examples.md) - Complete usage patterns
183
+
184
+ **Important for Retell:**
185
+ Before making calls, register your Twilio number:
186
+ ```bash
187
+ python import_phone_number.py
188
+ ```
189
+
190
+ ### Speech-to-Text (STT)
191
+
192
+ Transcribe audio files with OpenAI Whisper - supports single files and batch processing:
193
+
194
+ ```python
195
+ from intellema_vdk import STTManager
196
+
197
+ async def transcribe() -> None:
198
+ stt = STTManager()
199
+ try:
200
+ # Single file
201
+ result = await stt.transcribe_audio("recording.wav")
202
+ print(result["text"])
203
+
204
+ # Batch process folder
205
+ results = await stt.transcribe_audio(
206
+ "recordings/",
207
+ batch_process=True,
208
+ output_file="transcripts.json"
209
+ )
210
+ finally:
211
+ await stt.close()
212
+ ```
213
+
214
+ **Detailed Documentation:** [docs/api/stt.md](docs/api/stt.md)
215
+
216
+ ### Text-to-Speech (TTS)
217
+
218
+ Stream text to audio in real-time with support for multiple providers:
219
+
220
+ ```python
221
+ from intellema_vdk import TTSStreamer
222
+
223
+ # Together AI (low latency)
224
+ tts = TTSStreamer(provider="together")
225
+
226
+ # OpenAI (high quality, 6 voices)
227
+ tts = TTSStreamer(
228
+ provider="openai",
229
+ voice="nova", # alloy, echo, fable, onyx, nova, shimmer
230
+ model="tts-1-hd" # tts-1 or tts-1-hd
231
+ )
232
+
233
+ # Feed text as it's generated
234
+ for chunk in llm_stream:
235
+ tts.feed(chunk)
236
+
237
+ tts.flush() # Wait for completion
238
+ tts.close()
239
+ ```
240
+
241
+ **Detailed Documentation:** [docs/api/tts.md](docs/api/tts.md)
242
+
243
+ **Sample Implementation:** Run the included chatbot demo:
244
+ ```bash
245
+ python sample_implementation.py
246
+ ```
247
+
248
+ ## Advanced Usage
249
+
250
+ ### Logging
251
+
252
+ Configure logging to see VDK internals:
253
+
254
+ ```python
255
+ from intellema_vdk import setup_logging
256
+
257
+ setup_logging() # INFO level by default
258
+ ```
259
+
260
+ Custom configuration:
261
+
262
+ ```python
263
+ import logging
264
+ setup_logging(
265
+ log_level=logging.DEBUG,
266
+ log_format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
267
+ )
268
+ ```
269
+
270
+ ### Recording Calls
271
+
272
+ ```python
273
+ # LiveKit or Retell
274
+ recording_id: str = await client.start_recording(
275
+ call_id=call_id,
276
+ upload_to_s3=True,
277
+ wait_for_completion=False
278
+ )
279
+ ```
280
+
281
+ ### Streaming to RTMP
282
+
283
+ ```python
284
+ await client.start_stream(
285
+ call_id=call_id,
286
+ rtmp_urls=["rtmp://your-server.com/live/key"]
287
+ )
288
+ ```
289
+
290
+ ## Documentation
291
+
292
+ - **[Getting Started Guide](docs/guides/getting_started.md)** - Setup and first steps
293
+ - **[Configuration Guide](docs/guides/configuration.md)** - Environment variables
294
+ - **[Examples](docs/guides/examples.md)** - Common usage patterns
295
+ - **API Reference:**
296
+ - [Voice Providers](docs/api/providers.md) - LiveKit & Retell
297
+ - [STT](docs/api/stt.md) - Speech-to-Text
298
+ - [TTS](docs/api/tts.md) - Text-to-Speech
299
+
300
+ ## Important Notes
301
+
302
+ - **Retell `delete_room` Limitation**: Only works if the user speaks, triggering the agent to check the termination variable. For immediate hangup, use Twilio API directly.
303
+ - **Retell Recording**: Retell automatically records calls. The `start_recording` method retrieves the recording URL after the call ends (no need to explicitly start recording during the call). Ensure recording is enabled for your Retell agent in the dashboard.
304
+ - **Retell Audio Streaming**: Real-time audio streaming (`start_stream`) is **not supported** for Retell phone calls. Retell deprecated their Audio WebSocket API at the end of 2024. Use `start_recording()` to retrieve recordings after the call ends.
305
+ - **Type Safety**: All examples include type annotations for better IDE support.
306
+ - **Async Required**: All voice and STT operations are async; use `asyncio.run()`.
307
+
308
+ ## License
309
+
310
+ See [LICENSE](LICENSE) file for details.
311
+
@@ -0,0 +1,29 @@
1
+ intellema_vdk/__init__.py,sha256=zP113PCAKvkBJytAp-BlaOWM0wHMhWWI2JTraGbQ3AA,2299
2
+ intellema_vdk/config.py,sha256=ziQA2AQunE_FIi7r5xACZPEtQZkViOkx_GdgCscJs7k,335
3
+ intellema_vdk/providers/__init__.py,sha256=rojH0sZvWP3cPxCxdKZeRxT8dXHIrJjZYg-E1zgr6P4,777
4
+ intellema_vdk/providers/protocols.py,sha256=GzLQUdWZYzvV4ftfj_p80pfX8etMoQ5OpyDa59Jpwhw,1390
5
+ intellema_vdk/providers/livekit/__init__.py,sha256=yICha10muvINORDuIO6pdgHs69ubgBJ9TJ90ju2r6as,415
6
+ intellema_vdk/providers/livekit/client.py,sha256=osVtkRWoSRo3VI2jDSd4f2eHKLPOZhgE7RKBbBlLtrI,24758
7
+ intellema_vdk/providers/livekit/exceptions.py,sha256=TaDgkzfPaDFk0gUqU6LuH62hk4jIfjsJd_JH58npg5c,724
8
+ intellema_vdk/providers/retell/__init__.py,sha256=PqWbcugL5Zx40YXldIyPd4w8slD6um6QuDWw3MlJzxA,357
9
+ intellema_vdk/providers/retell/client.py,sha256=MdIU3eC6Z9u56HHrGS-4Mk5OKhU-jrUaR1pcfE_swsM,20497
10
+ intellema_vdk/providers/retell/exceptions.py,sha256=91XHePf_zz6toAakYW7gxOwPrNKNulg16iw-Bgzu-OI,598
11
+ intellema_vdk/providers/retell/import_phone_number.py,sha256=y1E3J0PykCVDsEuJAyC5xE_xCvWLV5KsuxFpPwHrE80,3056
12
+ intellema_vdk/stt/__init__.py,sha256=FUQGQoA1i7mUcD33Oi4QY7JqaPImZk96Ebk4TATr4AA,327
13
+ intellema_vdk/stt/client.py,sha256=a4WFpYOeL01rVTNbfdA_B-kUgyLnTtpx3Uif1zEzPrE,21043
14
+ intellema_vdk/stt/exceptions.py,sha256=d3uqA8EOubfz7_uSHUi2e2nRsr61v60TMlp8BdSNqpM,546
15
+ intellema_vdk/stt/providers.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ intellema_vdk/tts/__init__.py,sha256=jv8bbauP2M6oJeq07VkGGP8eJ4PO7muj6icwC5JmWi8,273
17
+ intellema_vdk/tts/client.py,sha256=h6CwhFUXHcJJfCreRHubyG667q1Wn6QDH15mSOyXsqI,21590
18
+ intellema_vdk/tts/exceptions.py,sha256=7YZPeMXpEStWtZmp2S192Xmc6QmjzzgIRU_0mmq2rCQ,427
19
+ intellema_vdk/tts/providers.py,sha256=mUw5my-x4gF7mhL-pTr01PQa4u3IgPP3XrPctvhTRAE,10894
20
+ intellema_vdk/tts/__pycache__/__init__.cpython-312.pyc,sha256=_nxULtaXwbWZDOyroh9YiuhpUvsyzzHF8el8Fifmz3Y,377
21
+ intellema_vdk/tts/__pycache__/client.cpython-312.pyc,sha256=Vs2Ms9AyG7Qjt9HwKU82w0u7xzcu1p01UrRXzbAC9tg,22581
22
+ intellema_vdk/tts/__pycache__/exceptions.cpython-312.pyc,sha256=ySAYE4I73IQ-oIDwMcX15fBZQYkwMLta2ukK_A6XUKo,1041
23
+ intellema_vdk/tts/__pycache__/providers.cpython-312.pyc,sha256=ICStMr12B2QhxZ5-hRq8CnUUijt_as6EUAxuPIyOacQ,11681
24
+ intellema_vdk/utils/logger_config.py,sha256=fXiQbbYFq5o_XH9Q8Fgh_rdMTAa2QQcwjtWnlwSpDnM,1687
25
+ intellema_vdk-0.2.2.dist-info/licenses/LICENSE,sha256=41qw3yuvY1SpTkwLebZTVYOKk9OIe1Kr6I1S6Y5mp8Y,1087
26
+ intellema_vdk-0.2.2.dist-info/METADATA,sha256=E7xJ0uIbt3eEDZDzSfIXzmivrI7Mf1FsC4H0RLZedcY,9989
27
+ intellema_vdk-0.2.2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
28
+ intellema_vdk-0.2.2.dist-info/top_level.txt,sha256=nQ_0rJRkEthHH0bJYoPAVVgQiO6Uw6c_mHnfeROG14U,14
29
+ intellema_vdk-0.2.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.10.1)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,3 +0,0 @@
1
- from .client import LiveKitManager
2
-
3
- __all__ = ["LiveKitManager"]