PyPI - intellema-vdk - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

intellema-vdk 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

intellema_vdk/__init__.py +67 -10
intellema_vdk/config.py +14 -0
intellema_vdk/providers/__init__.py +35 -0
intellema_vdk/providers/livekit/__init__.py +19 -0
intellema_vdk/providers/livekit/client.py +612 -0
intellema_vdk/providers/livekit/exceptions.py +23 -0
intellema_vdk/providers/protocols.py +33 -0
intellema_vdk/providers/retell/__init__.py +17 -0
intellema_vdk/providers/retell/client.py +468 -0
intellema_vdk/providers/retell/exceptions.py +19 -0
intellema_vdk/{retell_lib → providers/retell}/import_phone_number.py +1 -1
intellema_vdk/stt/__init__.py +17 -0
intellema_vdk/stt/client.py +482 -0
intellema_vdk/stt/exceptions.py +19 -0
intellema_vdk/tts/__init__.py +15 -0
intellema_vdk/tts/__pycache__/__init__.cpython-312.pyc +0 -0
intellema_vdk/tts/__pycache__/client.cpython-312.pyc +0 -0
intellema_vdk/tts/__pycache__/exceptions.cpython-312.pyc +0 -0
intellema_vdk/tts/__pycache__/providers.cpython-312.pyc +0 -0
intellema_vdk/tts/client.py +541 -0
intellema_vdk/tts/exceptions.py +15 -0
intellema_vdk/tts/providers.py +293 -0
intellema_vdk/utils/logger_config.py +41 -0
intellema_vdk-0.2.2.dist-info/METADATA +311 -0
intellema_vdk-0.2.2.dist-info/RECORD +29 -0
{intellema_vdk-0.2.0.dist-info → intellema_vdk-0.2.2.dist-info}/WHEEL +1 -1
intellema_vdk/livekit_lib/__init__.py +0 -3
intellema_vdk/livekit_lib/client.py +0 -280
intellema_vdk/retell_lib/retell_client.py +0 -248
intellema_vdk/speech_lib/__init__.py +0 -2
intellema_vdk/speech_lib/stt_client.py +0 -108
intellema_vdk/speech_lib/tts_streamer.py +0 -188
intellema_vdk-0.2.0.dist-info/METADATA +0 -221
intellema_vdk-0.2.0.dist-info/RECORD +0 -14
/intellema_vdk/{retell_lib/__init__.py → stt/providers.py} +0 -0
{intellema_vdk-0.2.0.dist-info → intellema_vdk-0.2.2.dist-info}/licenses/LICENSE +0 -0
{intellema_vdk-0.2.0.dist-info → intellema_vdk-0.2.2.dist-info}/top_level.txt +0 -0

intellema_vdk/speech_lib/tts_streamer.py DELETED Viewed

@@ -1,188 +0,0 @@
-import os
-import queue
-import threading
-import time
-import pyaudio
-from together import Together
-class TTSStreamer:
-    def __init__(self, api_key=None):
-        self.api_key = api_key or os.environ.get("TOGETHER_API_KEY")
-        if not self.api_key:
-            raise ValueError(
-                "Together API Key is missing. Set TOGETHER_API_KEY env var."
-            )
-        self.client = Together(api_key=self.api_key)
-        # Audio Config
-        self.p = pyaudio.PyAudio()
-        self.stream = self.p.open(
-            format=pyaudio.paInt16, channels=1, rate=24000, output=True
-        )
-        # Queues
-        self.text_queue = queue.Queue()
-        self.audio_queue = queue.Queue()
-        # State
-        self.text_buffer = ""
-        self.is_running = True
-        self.playback_finished = threading.Event()
-        # Start Threads
-        self.fetcher_thread = threading.Thread(target=self._tts_fetcher, daemon=True)
-        self.player_thread = threading.Thread(target=self._audio_player, daemon=True)
-        self.fetcher_thread.start()
-        self.player_thread.start()
-    def feed(self, text_chunk):
-        """Feed text tokens from LLM."""
-        if not self.is_running or not text_chunk:
-            return
-        self.text_buffer += text_chunk
-        sentence_endings = [".", "!", "?", "\n"]
-        for ending in sentence_endings:
-            if ending in self.text_buffer:
-                parts = self.text_buffer.split(ending)
-                # Send all complete sentences
-                for sentence in parts[:-1]:
-                    if sentence.strip():
-                        self.text_queue.put(sentence.strip() + ending)
-                # Keep the remainder
-                self.text_buffer = parts[-1]
-    def flush(self):
-        """
-        Graceful finish: Push remaining text, signal end, and wait for audio to finish playing.
-        """
-        # 1. Push remaining buffer
-        if self.text_buffer.strip():
-            self.text_queue.put(self.text_buffer.strip())
-        # 2. Signal Fetcher to stop expecting text
-        self.text_queue.put(None)
-        # 3. Wait for the player to signal it's done
-        # We use a timeout to prevent infinite hanging
-        self.playback_finished.wait(timeout=10.0)
-    def close(self):
-        """
-        Immediate kill: Stop threads and close audio stream.
-        """
-        if not self.is_running:
-            return
-        self.is_running = False
-        # Clear queues to unblock threads if they are stuck
-        with self.text_queue.mutex:
-            self.text_queue.queue.clear()
-        with self.audio_queue.mutex:
-            self.audio_queue.queue.clear()
-        try:
-            self.stream.stop_stream()
-            self.stream.close()
-            self.p.terminate()
-        except Exception:
-            pass
-    def stop(self):
-        """Alias for close"""
-        self.close()
-    def _tts_fetcher(self):
-        while self.is_running:
-            try:
-                text = self.text_queue.get(timeout=0.5)
-            except queue.Empty:
-                continue
-            if text is None:
-                self.audio_queue.put(None)  # Signal player to finish
-                break
-            try:
-                response = self.client.audio.speech.create(
-                    model="canopylabs/orpheus-3b-0.1-ft",
-                    input=text,
-                    voice="tara",
-                    stream=True,
-                    response_format="raw",
-                    response_encoding="pcm_s16le",
-                )
-                for chunk in response:
-                    if not self.is_running:
-                        break
-                    if isinstance(chunk, tuple):
-                        if len(chunk) > 1:
-                            sub_iterator = chunk[1]
-                            # Check if explicitly bytes (non-iterable in this context intended for iteration)
-                            if isinstance(sub_iterator, bytes):
-                                self._process_audio_bytes(sub_iterator)
-                            else:
-                                try:
-                                    for sub_chunk in sub_iterator:
-                                        if isinstance(sub_chunk, bytes):
-                                            self._process_audio_bytes(sub_chunk)
-                                        elif hasattr(sub_chunk, "content"):
-                                            self._process_audio_bytes(sub_chunk.content)
-                                        elif hasattr(sub_chunk, "data"):
-                                            self._process_audio_bytes(sub_chunk.data)
-                                except TypeError:
-                                    pass
-                    elif hasattr(chunk, "content"):
-                        audio_data = chunk.content
-                        if audio_data:
-                            self._process_audio_bytes(audio_data)
-                    elif isinstance(chunk, bytes):
-                        self._process_audio_bytes(chunk)
-            except Exception as e:
-                print(f"TTS Error: {e}")
-            finally:
-                self.text_queue.task_done()
-    def _process_audio_bytes(self, audio_data):
-        """Helper to strip headers and push to queue"""
-        # Strip WAV header if present (RIFF...WAVE)
-        if len(audio_data) >= 44 and audio_data[:4] == b"RIFF":
-            audio_data = audio_data[44:]
-        self.audio_queue.put(audio_data)
-    def _audio_player(self):
-        buffer = b""
-        while self.is_running:
-            try:
-                audio_data = self.audio_queue.get(timeout=0.5)
-            except queue.Empty:
-                continue
-            if audio_data is None:
-                self.playback_finished.set()
-                break
-            buffer += audio_data
-            if len(buffer) >= 2:
-                frame_count = len(buffer) // 2
-                bytes_to_play = frame_count * 2
-                play_chunk = buffer[:bytes_to_play]
-                buffer = buffer[bytes_to_play:]
-                try:
-                    self.stream.write(play_chunk)
-                except OSError:
-                    break

intellema_vdk-0.2.0.dist-info/METADATA DELETED Viewed

@@ -1,221 +0,0 @@
-Metadata-Version: 2.4
-Name: intellema-vdk
-Version: 0.2.0
-Summary: A Voice Development Kit for different Voice Agent Platforms
-Author: Intellema
-License: MIT License
-        Copyright (c) 2026 Intellema
-        Permission is hereby granted, free of charge, to any person obtaining a copy
-        of this software and associated documentation files (the "Software"), to deal
-        in the Software without restriction, including without limitation the rights
-        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-        copies of the Software, and to permit persons to whom the Software is
-        furnished to do so, subject to the following conditions:
-        The above copyright notice and this permission notice shall be included in all
-        copies or substantial portions of the Software.
-        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-        SOFTWARE.
-Classifier: Programming Language :: Python :: 3
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Operating System :: OS Independent
-Requires-Python: >=3.8
-Description-Content-Type: text/markdown
-License-File: LICENSE
-Requires-Dist: livekit-api>=1.1.0
-Requires-Dist: python-dotenv>=1.0.0
-Requires-Dist: boto3>=1.28.0
-Requires-Dist: twilio
-Requires-Dist: retell-sdk
-Requires-Dist: requests
-Requires-Dist: openai
-Requires-Dist: httpx
-Requires-Dist: pyaudio
-Requires-Dist: together
-Requires-Dist: langchain-openai
-Requires-Dist: langchain-core
-Dynamic: license-file
-# Intellema VDK
-Intellema VDK is a unified Voice Development Kit designed to simplify the integration and management of various voice agent platforms. It provides a consistent, factory-based API to interact with providers like LiveKit and Retell AI, enabling developers to build scalable voice applications with ease. Whether you need real-time streaming, outbound calling, or participant management, Intellema VDK abstracts the complexity into a single, intuitive interface.
-## Features
-- **Room Management**: Create and delete rooms dynamically.
-- **Participant Management**: Generate tokens, kick users, and mute tracks.
-- **SIP Outbound Calling**: Initiate calls to phone numbers via SIP trunks.
-- **Streaming & Recording**: Stream to RTMP destinations and record room sessions directly to AWS S3.
-- **Real-time Alerts**: Send data packets (alerts) to participants.
-## Prerequisites
-- Python 3.8+
-- A SIP Provider (for outbound calls)
-## Installation
-```bash
-pip install intellema-vdk
-```
-## Usage
-### Unified Wrapper (Factory Pattern)
-The recommended way to use the library is via the `VoiceClient` factory:
-```python
-import asyncio
-from intellema_vdk import VoiceClient
-async def main():
-    # 1. Initialize the client
-    client = VoiceClient("livekit")
-    # 2. Use methods directly
-    call_id = await client.start_outbound_call(
-        phone_number="+15551234567",
-        prompt_content="Hello from LiveKit"
-    )
-    # 3. Clean API calls
-    await client.mute_participant(call_id, "user-1", "track-1", True)
-    await client.close()
-if __name__ == "__main__":
-    asyncio.run(main())
-```
-### Convenience Function
-For quick one-off calls, you can still use the helper:
-```python
-from intellema_vdk import start_outbound_call
-await start_outbound_call("livekit", phone_number="+1...")
-```
-## Speech To Text (STT)
-The `STTManager` class provides an interface for transcribing audio files using OpenAI's Whisper model and optionally posting the transcribed text to a specified agent API.
-### Usage
-Here's how to use the `STTManager` to transcribe an audio file and post the result:
-Ensure to set OPENAI_API_KEY and AGENT_API_URL in your `.env` file.
-```python
-import asyncio
-from intellema_vdk import STTManager
-async def main():
-    # 1- Initialize the STTManager
-    stt_manager = STTManager()
-    try:
-        # 2- Transcribe an audio file and post the result to your agent API URL (if provided)
-        # Replace "path/to/your/audio.mp3" with the actual file path
-        transcript = await stt_manager.transcribe_and_post("path/to/your/audio.mp3")
-        print(f"Transcription: {transcript}")
-    except FileNotFoundError:
-        print("The audio file was not found.")
-    except Exception as e:
-        print(f"An error occurred: {e}")
-    finally:
-        # 3- Clean up
-        await stt_manager.close()
-if __name__ == "__main__":
-    asyncio.run(main())
-```
-## TTS Streaming
-The `TTSStreamer` class provides low-latency text-to-speech streaming using Together AI's inference engine. It enables real-time voice synthesis from streaming LLM responses.
-### Running the Sample implementation
-We provide a ready-to-use sample that connects LangChain (OpenAI) with the TTS Streamer.
-1.  **Configure Keys**: Ensure `OPENAI_API_KEY` and `TOGETHER_API_KEY` are set in your `.env`.
-2.  **Run the script**:
-    ```bash
-    python sample_implementation.py
-    ```
-### Library Usage
-You can integrate the streamer into your own loops:
-```python
-from intellema_vdk import TTSStreamer
-# 1. Initialize per turn
-tts = TTSStreamer()
-# 2. Feed text chunks as they are generated
-for chunk in llm_response_stream:
-    tts.feed(chunk)
-# 3. Flush and clean up
-tts.flush()
-tts.close()
-```
-## Configuration
-Create a `.env` file in the root directory:
-```bash
-LIVEKIT_URL=wss://your-livekit-domain.com
-LIVEKIT_API_KEY=your-key
-LIVEKIT_API_SECRET=your-secret
-SIP_OUTBOUND_TRUNK_ID=your-trunk-id
-TWILIO_ACCOUNT_SID=your-sid
-TWILIO_AUTH_TOKEN=your-token
-TWILIO_PHONE_NUMBER=your-number
-RETELL_API_KEY=your-retell-key
-RETELL_AGENT_ID=your-agent-id
-TOGETHER_API_KEY=your-together-key
-OPENAI_API_KEY=your-openai-key
-AGENT_API_URL=https://your-agent-api.com/endpoint
-```
-## Retell Setup
-**Important:** Before initiating calls with Retell, you must register your Twilio phone number with Retell. This binds your agent to the number and allows Retell to handle the call flow.
-You can register your number in two ways:
-1.  **Using the Helper Script:**
-    We provide an interactive script to guide you through the process:
-    ```bash
-    python import_phone_number.py
-    ```
-2.  **Programmatically:**
-    ```python
-    from intellema_vdk.retell_lib.retell_client import RetellManager
-    manager = RetellManager()
-    # Optional: Pass termination_uri if you have a SIP trunk
-    manager.import_phone_number(nickname="My Twilio Number")
-    ```
-## Notes
-- **Retell `delete_room` Limitation**: The `delete_room` method for Retell relies on updating dynamic variables during the conversation loop. As a result, it **only works if the user speaks something** which triggers the agent to check the variable and terminate the call.

intellema_vdk-0.2.0.dist-info/RECORD DELETED Viewed

@@ -1,14 +0,0 @@
-intellema_vdk/__init__.py,sha256=64pm2TLqhGG225JLddco1kSOpLaD3eGByWvMpaHUUX0,1231
-intellema_vdk/livekit_lib/__init__.py,sha256=9JsOBswDivM8tRw9EF1ql0wwFnHvwjcPWT-umqad98o,68
-intellema_vdk/livekit_lib/client.py,sha256=UxOuT9I-YPtHopx4dXoGKRAJvLXKFgUdtrAcHdR4a-Q,10687
-intellema_vdk/retell_lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-intellema_vdk/retell_lib/import_phone_number.py,sha256=pOt1k6De6-bt2xOPzMMR0nI4Ha6BzLjm19qenyy9RN8,3081
-intellema_vdk/retell_lib/retell_client.py,sha256=qT00NJWi2rJyLWswWTx5fGl5mwPdy6QurQt1Enac0rU,10793
-intellema_vdk/speech_lib/__init__.py,sha256=TXdyAAS6AfQfln_QlIvx_uXU-ksugXzC2N9hrjW1_MQ,73
-intellema_vdk/speech_lib/stt_client.py,sha256=YB8-mJUtQKhqEC4zhipJUb6Y8LqJx0Vv_c4iIxuUjJM,4054
-intellema_vdk/speech_lib/tts_streamer.py,sha256=qs2mzP0vKqv2eKvGJSCTee3mzeJGS9nji0Yy3Y-sOTc,6453
-intellema_vdk-0.2.0.dist-info/licenses/LICENSE,sha256=41qw3yuvY1SpTkwLebZTVYOKk9OIe1Kr6I1S6Y5mp8Y,1087
-intellema_vdk-0.2.0.dist-info/METADATA,sha256=j53oDXr8Xcq7nkP4v6bWeK62z0yR-Pa2yivGaCw-abc,7363
-intellema_vdk-0.2.0.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
-intellema_vdk-0.2.0.dist-info/top_level.txt,sha256=nQ_0rJRkEthHH0bJYoPAVVgQiO6Uw6c_mHnfeROG14U,14
-intellema_vdk-0.2.0.dist-info/RECORD,,

/intellema_vdk/{retell_lib/__init__.py → stt/providers.py} RENAMED Viewed

File without changes

{intellema_vdk-0.2.0.dist-info → intellema_vdk-0.2.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{intellema_vdk-0.2.0.dist-info → intellema_vdk-0.2.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

intellema-vdk 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

intellema-vdk 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl