intellema-vdk 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. intellema_vdk/__init__.py +67 -10
  2. intellema_vdk/config.py +14 -0
  3. intellema_vdk/providers/__init__.py +35 -0
  4. intellema_vdk/providers/livekit/__init__.py +19 -0
  5. intellema_vdk/providers/livekit/client.py +612 -0
  6. intellema_vdk/providers/livekit/exceptions.py +23 -0
  7. intellema_vdk/providers/protocols.py +33 -0
  8. intellema_vdk/providers/retell/__init__.py +17 -0
  9. intellema_vdk/providers/retell/client.py +468 -0
  10. intellema_vdk/providers/retell/exceptions.py +19 -0
  11. intellema_vdk/{retell_lib → providers/retell}/import_phone_number.py +1 -1
  12. intellema_vdk/stt/__init__.py +17 -0
  13. intellema_vdk/stt/client.py +482 -0
  14. intellema_vdk/stt/exceptions.py +19 -0
  15. intellema_vdk/tts/__init__.py +15 -0
  16. intellema_vdk/tts/__pycache__/__init__.cpython-312.pyc +0 -0
  17. intellema_vdk/tts/__pycache__/client.cpython-312.pyc +0 -0
  18. intellema_vdk/tts/__pycache__/exceptions.cpython-312.pyc +0 -0
  19. intellema_vdk/tts/__pycache__/providers.cpython-312.pyc +0 -0
  20. intellema_vdk/tts/client.py +541 -0
  21. intellema_vdk/tts/exceptions.py +15 -0
  22. intellema_vdk/tts/providers.py +293 -0
  23. intellema_vdk/utils/logger_config.py +41 -0
  24. intellema_vdk-0.2.2.dist-info/METADATA +311 -0
  25. intellema_vdk-0.2.2.dist-info/RECORD +29 -0
  26. {intellema_vdk-0.2.0.dist-info → intellema_vdk-0.2.2.dist-info}/WHEEL +1 -1
  27. intellema_vdk/livekit_lib/__init__.py +0 -3
  28. intellema_vdk/livekit_lib/client.py +0 -280
  29. intellema_vdk/retell_lib/retell_client.py +0 -248
  30. intellema_vdk/speech_lib/__init__.py +0 -2
  31. intellema_vdk/speech_lib/stt_client.py +0 -108
  32. intellema_vdk/speech_lib/tts_streamer.py +0 -188
  33. intellema_vdk-0.2.0.dist-info/METADATA +0 -221
  34. intellema_vdk-0.2.0.dist-info/RECORD +0 -14
  35. /intellema_vdk/{retell_lib/__init__.py → stt/providers.py} +0 -0
  36. {intellema_vdk-0.2.0.dist-info → intellema_vdk-0.2.2.dist-info}/licenses/LICENSE +0 -0
  37. {intellema_vdk-0.2.0.dist-info → intellema_vdk-0.2.2.dist-info}/top_level.txt +0 -0
@@ -1,188 +0,0 @@
1
- import os
2
- import queue
3
- import threading
4
- import time
5
- import pyaudio
6
- from together import Together
7
-
8
-
9
- class TTSStreamer:
10
- def __init__(self, api_key=None):
11
- self.api_key = api_key or os.environ.get("TOGETHER_API_KEY")
12
- if not self.api_key:
13
- raise ValueError(
14
- "Together API Key is missing. Set TOGETHER_API_KEY env var."
15
- )
16
-
17
- self.client = Together(api_key=self.api_key)
18
-
19
- # Audio Config
20
- self.p = pyaudio.PyAudio()
21
- self.stream = self.p.open(
22
- format=pyaudio.paInt16, channels=1, rate=24000, output=True
23
- )
24
-
25
- # Queues
26
- self.text_queue = queue.Queue()
27
- self.audio_queue = queue.Queue()
28
-
29
- # State
30
- self.text_buffer = ""
31
- self.is_running = True
32
- self.playback_finished = threading.Event()
33
-
34
- # Start Threads
35
- self.fetcher_thread = threading.Thread(target=self._tts_fetcher, daemon=True)
36
- self.player_thread = threading.Thread(target=self._audio_player, daemon=True)
37
-
38
- self.fetcher_thread.start()
39
- self.player_thread.start()
40
-
41
- def feed(self, text_chunk):
42
- """Feed text tokens from LLM."""
43
- if not self.is_running or not text_chunk:
44
- return
45
-
46
- self.text_buffer += text_chunk
47
- sentence_endings = [".", "!", "?", "\n"]
48
-
49
- for ending in sentence_endings:
50
- if ending in self.text_buffer:
51
- parts = self.text_buffer.split(ending)
52
-
53
- # Send all complete sentences
54
- for sentence in parts[:-1]:
55
- if sentence.strip():
56
- self.text_queue.put(sentence.strip() + ending)
57
-
58
- # Keep the remainder
59
- self.text_buffer = parts[-1]
60
-
61
- def flush(self):
62
- """
63
- Graceful finish: Push remaining text, signal end, and wait for audio to finish playing.
64
- """
65
- # 1. Push remaining buffer
66
- if self.text_buffer.strip():
67
- self.text_queue.put(self.text_buffer.strip())
68
-
69
- # 2. Signal Fetcher to stop expecting text
70
- self.text_queue.put(None)
71
-
72
- # 3. Wait for the player to signal it's done
73
- # We use a timeout to prevent infinite hanging
74
- self.playback_finished.wait(timeout=10.0)
75
-
76
- def close(self):
77
- """
78
- Immediate kill: Stop threads and close audio stream.
79
- """
80
- if not self.is_running:
81
- return
82
-
83
- self.is_running = False
84
-
85
- # Clear queues to unblock threads if they are stuck
86
- with self.text_queue.mutex:
87
- self.text_queue.queue.clear()
88
- with self.audio_queue.mutex:
89
- self.audio_queue.queue.clear()
90
-
91
- try:
92
- self.stream.stop_stream()
93
- self.stream.close()
94
- self.p.terminate()
95
- except Exception:
96
- pass
97
-
98
- def stop(self):
99
- """Alias for close"""
100
- self.close()
101
-
102
- def _tts_fetcher(self):
103
- while self.is_running:
104
- try:
105
- text = self.text_queue.get(timeout=0.5)
106
- except queue.Empty:
107
- continue
108
-
109
- if text is None:
110
- self.audio_queue.put(None) # Signal player to finish
111
- break
112
-
113
- try:
114
- response = self.client.audio.speech.create(
115
- model="canopylabs/orpheus-3b-0.1-ft",
116
- input=text,
117
- voice="tara",
118
- stream=True,
119
- response_format="raw",
120
- response_encoding="pcm_s16le",
121
- )
122
-
123
- for chunk in response:
124
- if not self.is_running:
125
- break
126
-
127
- if isinstance(chunk, tuple):
128
- if len(chunk) > 1:
129
- sub_iterator = chunk[1]
130
- # Check if explicitly bytes (non-iterable in this context intended for iteration)
131
- if isinstance(sub_iterator, bytes):
132
- self._process_audio_bytes(sub_iterator)
133
- else:
134
- try:
135
- for sub_chunk in sub_iterator:
136
- if isinstance(sub_chunk, bytes):
137
- self._process_audio_bytes(sub_chunk)
138
- elif hasattr(sub_chunk, "content"):
139
- self._process_audio_bytes(sub_chunk.content)
140
- elif hasattr(sub_chunk, "data"):
141
- self._process_audio_bytes(sub_chunk.data)
142
- except TypeError:
143
- pass
144
-
145
- elif hasattr(chunk, "content"):
146
- audio_data = chunk.content
147
- if audio_data:
148
- self._process_audio_bytes(audio_data)
149
-
150
- elif isinstance(chunk, bytes):
151
- self._process_audio_bytes(chunk)
152
-
153
- except Exception as e:
154
- print(f"TTS Error: {e}")
155
- finally:
156
- self.text_queue.task_done()
157
-
158
- def _process_audio_bytes(self, audio_data):
159
- """Helper to strip headers and push to queue"""
160
- # Strip WAV header if present (RIFF...WAVE)
161
- if len(audio_data) >= 44 and audio_data[:4] == b"RIFF":
162
- audio_data = audio_data[44:]
163
- self.audio_queue.put(audio_data)
164
-
165
- def _audio_player(self):
166
- buffer = b""
167
- while self.is_running:
168
- try:
169
- audio_data = self.audio_queue.get(timeout=0.5)
170
- except queue.Empty:
171
- continue
172
-
173
- if audio_data is None:
174
- self.playback_finished.set()
175
- break
176
-
177
- buffer += audio_data
178
-
179
- if len(buffer) >= 2:
180
- frame_count = len(buffer) // 2
181
- bytes_to_play = frame_count * 2
182
- play_chunk = buffer[:bytes_to_play]
183
- buffer = buffer[bytes_to_play:]
184
-
185
- try:
186
- self.stream.write(play_chunk)
187
- except OSError:
188
- break
@@ -1,221 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: intellema-vdk
3
- Version: 0.2.0
4
- Summary: A Voice Development Kit for different Voice Agent Platforms
5
- Author: Intellema
6
- License: MIT License
7
-
8
- Copyright (c) 2026 Intellema
9
-
10
- Permission is hereby granted, free of charge, to any person obtaining a copy
11
- of this software and associated documentation files (the "Software"), to deal
12
- in the Software without restriction, including without limitation the rights
13
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
- copies of the Software, and to permit persons to whom the Software is
15
- furnished to do so, subject to the following conditions:
16
-
17
- The above copyright notice and this permission notice shall be included in all
18
- copies or substantial portions of the Software.
19
-
20
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
- SOFTWARE.
27
-
28
- Classifier: Programming Language :: Python :: 3
29
- Classifier: License :: OSI Approved :: MIT License
30
- Classifier: Operating System :: OS Independent
31
- Requires-Python: >=3.8
32
- Description-Content-Type: text/markdown
33
- License-File: LICENSE
34
- Requires-Dist: livekit-api>=1.1.0
35
- Requires-Dist: python-dotenv>=1.0.0
36
- Requires-Dist: boto3>=1.28.0
37
- Requires-Dist: twilio
38
- Requires-Dist: retell-sdk
39
- Requires-Dist: requests
40
- Requires-Dist: openai
41
- Requires-Dist: httpx
42
- Requires-Dist: pyaudio
43
- Requires-Dist: together
44
- Requires-Dist: langchain-openai
45
- Requires-Dist: langchain-core
46
- Dynamic: license-file
47
-
48
- # Intellema VDK
49
-
50
- Intellema VDK is a unified Voice Development Kit designed to simplify the integration and management of various voice agent platforms. It provides a consistent, factory-based API to interact with providers like LiveKit and Retell AI, enabling developers to build scalable voice applications with ease. Whether you need real-time streaming, outbound calling, or participant management, Intellema VDK abstracts the complexity into a single, intuitive interface.
51
-
52
- ## Features
53
-
54
- - **Room Management**: Create and delete rooms dynamically.
55
- - **Participant Management**: Generate tokens, kick users, and mute tracks.
56
- - **SIP Outbound Calling**: Initiate calls to phone numbers via SIP trunks.
57
- - **Streaming & Recording**: Stream to RTMP destinations and record room sessions directly to AWS S3.
58
- - **Real-time Alerts**: Send data packets (alerts) to participants.
59
-
60
- ## Prerequisites
61
-
62
- - Python 3.8+
63
- - A SIP Provider (for outbound calls)
64
-
65
- ## Installation
66
-
67
- ```bash
68
- pip install intellema-vdk
69
- ```
70
-
71
- ## Usage
72
-
73
- ### Unified Wrapper (Factory Pattern)
74
-
75
- The recommended way to use the library is via the `VoiceClient` factory:
76
-
77
- ```python
78
- import asyncio
79
- from intellema_vdk import VoiceClient
80
-
81
- async def main():
82
- # 1. Initialize the client
83
- client = VoiceClient("livekit")
84
-
85
- # 2. Use methods directly
86
- call_id = await client.start_outbound_call(
87
- phone_number="+15551234567",
88
- prompt_content="Hello from LiveKit"
89
- )
90
-
91
- # 3. Clean API calls
92
- await client.mute_participant(call_id, "user-1", "track-1", True)
93
- await client.close()
94
-
95
- if __name__ == "__main__":
96
- asyncio.run(main())
97
- ```
98
-
99
- ### Convenience Function
100
-
101
- For quick one-off calls, you can still use the helper:
102
-
103
- ```python
104
- from intellema_vdk import start_outbound_call
105
-
106
- await start_outbound_call("livekit", phone_number="+1...")
107
- ```
108
-
109
- ## Speech To Text (STT)
110
-
111
- The `STTManager` class provides an interface for transcribing audio files using OpenAI's Whisper model and optionally posting the transcribed text to a specified agent API.
112
-
113
- ### Usage
114
-
115
- Here's how to use the `STTManager` to transcribe an audio file and post the result:
116
- Ensure to set OPENAI_API_KEY and AGENT_API_URL in your `.env` file.
117
-
118
- ```python
119
- import asyncio
120
- from intellema_vdk import STTManager
121
-
122
- async def main():
123
- # 1- Initialize the STTManager
124
- stt_manager = STTManager()
125
-
126
- try:
127
- # 2- Transcribe an audio file and post the result to your agent API URL (if provided)
128
- # Replace "path/to/your/audio.mp3" with the actual file path
129
- transcript = await stt_manager.transcribe_and_post("path/to/your/audio.mp3")
130
- print(f"Transcription: {transcript}")
131
-
132
- except FileNotFoundError:
133
- print("The audio file was not found.")
134
- except Exception as e:
135
- print(f"An error occurred: {e}")
136
- finally:
137
- # 3- Clean up
138
- await stt_manager.close()
139
-
140
- if __name__ == "__main__":
141
- asyncio.run(main())
142
- ```
143
-
144
- ## TTS Streaming
145
-
146
- The `TTSStreamer` class provides low-latency text-to-speech streaming using Together AI's inference engine. It enables real-time voice synthesis from streaming LLM responses.
147
-
148
- ### Running the Sample implementation
149
-
150
- We provide a ready-to-use sample that connects LangChain (OpenAI) with the TTS Streamer.
151
-
152
- 1. **Configure Keys**: Ensure `OPENAI_API_KEY` and `TOGETHER_API_KEY` are set in your `.env`.
153
- 2. **Run the script**:
154
- ```bash
155
- python sample_implementation.py
156
- ```
157
-
158
- ### Library Usage
159
-
160
- You can integrate the streamer into your own loops:
161
-
162
- ```python
163
- from intellema_vdk import TTSStreamer
164
-
165
- # 1. Initialize per turn
166
- tts = TTSStreamer()
167
-
168
- # 2. Feed text chunks as they are generated
169
- for chunk in llm_response_stream:
170
- tts.feed(chunk)
171
-
172
- # 3. Flush and clean up
173
- tts.flush()
174
- tts.close()
175
- ```
176
-
177
- ## Configuration
178
-
179
- Create a `.env` file in the root directory:
180
-
181
- ```bash
182
- LIVEKIT_URL=wss://your-livekit-domain.com
183
- LIVEKIT_API_KEY=your-key
184
- LIVEKIT_API_SECRET=your-secret
185
- SIP_OUTBOUND_TRUNK_ID=your-trunk-id
186
- TWILIO_ACCOUNT_SID=your-sid
187
- TWILIO_AUTH_TOKEN=your-token
188
- TWILIO_PHONE_NUMBER=your-number
189
- RETELL_API_KEY=your-retell-key
190
- RETELL_AGENT_ID=your-agent-id
191
- TOGETHER_API_KEY=your-together-key
192
- OPENAI_API_KEY=your-openai-key
193
- AGENT_API_URL=https://your-agent-api.com/endpoint
194
- ```
195
-
196
- ## Retell Setup
197
-
198
- **Important:** Before initiating calls with Retell, you must register your Twilio phone number with Retell. This binds your agent to the number and allows Retell to handle the call flow.
199
-
200
- You can register your number in two ways:
201
-
202
- 1. **Using the Helper Script:**
203
- We provide an interactive script to guide you through the process:
204
- ```bash
205
- python import_phone_number.py
206
- ```
207
-
208
- 2. **Programmatically:**
209
- ```python
210
- from intellema_vdk.retell_lib.retell_client import RetellManager
211
-
212
- manager = RetellManager()
213
- # Optional: Pass termination_uri if you have a SIP trunk
214
- manager.import_phone_number(nickname="My Twilio Number")
215
- ```
216
-
217
- ## Notes
218
-
219
- - **Retell `delete_room` Limitation**: The `delete_room` method for Retell relies on updating dynamic variables during the conversation loop. As a result, it **only works if the user speaks something** which triggers the agent to check the variable and terminate the call.
220
-
221
-
@@ -1,14 +0,0 @@
1
- intellema_vdk/__init__.py,sha256=64pm2TLqhGG225JLddco1kSOpLaD3eGByWvMpaHUUX0,1231
2
- intellema_vdk/livekit_lib/__init__.py,sha256=9JsOBswDivM8tRw9EF1ql0wwFnHvwjcPWT-umqad98o,68
3
- intellema_vdk/livekit_lib/client.py,sha256=UxOuT9I-YPtHopx4dXoGKRAJvLXKFgUdtrAcHdR4a-Q,10687
4
- intellema_vdk/retell_lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- intellema_vdk/retell_lib/import_phone_number.py,sha256=pOt1k6De6-bt2xOPzMMR0nI4Ha6BzLjm19qenyy9RN8,3081
6
- intellema_vdk/retell_lib/retell_client.py,sha256=qT00NJWi2rJyLWswWTx5fGl5mwPdy6QurQt1Enac0rU,10793
7
- intellema_vdk/speech_lib/__init__.py,sha256=TXdyAAS6AfQfln_QlIvx_uXU-ksugXzC2N9hrjW1_MQ,73
8
- intellema_vdk/speech_lib/stt_client.py,sha256=YB8-mJUtQKhqEC4zhipJUb6Y8LqJx0Vv_c4iIxuUjJM,4054
9
- intellema_vdk/speech_lib/tts_streamer.py,sha256=qs2mzP0vKqv2eKvGJSCTee3mzeJGS9nji0Yy3Y-sOTc,6453
10
- intellema_vdk-0.2.0.dist-info/licenses/LICENSE,sha256=41qw3yuvY1SpTkwLebZTVYOKk9OIe1Kr6I1S6Y5mp8Y,1087
11
- intellema_vdk-0.2.0.dist-info/METADATA,sha256=j53oDXr8Xcq7nkP4v6bWeK62z0yR-Pa2yivGaCw-abc,7363
12
- intellema_vdk-0.2.0.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
13
- intellema_vdk-0.2.0.dist-info/top_level.txt,sha256=nQ_0rJRkEthHH0bJYoPAVVgQiO6Uw6c_mHnfeROG14U,14
14
- intellema_vdk-0.2.0.dist-info/RECORD,,