intellema-vdk 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Intellema
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,4 @@
1
+ include README.md
2
+ include requirements.txt
3
+ include LICENSE
4
+ recursive-include intellema_vdk *
@@ -0,0 +1,120 @@
1
+ Metadata-Version: 2.4
2
+ Name: intellema-vdk
3
+ Version: 0.1.0
4
+ Summary: A Voice Development Kit for different Voice Agent Platforms
5
+ Author: Intellema
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 Intellema
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Classifier: Programming Language :: Python :: 3
29
+ Classifier: License :: OSI Approved :: MIT License
30
+ Classifier: Operating System :: OS Independent
31
+ Requires-Python: >=3.8
32
+ Description-Content-Type: text/markdown
33
+ License-File: LICENSE
34
+ Requires-Dist: livekit-api>=1.1.0
35
+ Requires-Dist: python-dotenv>=1.0.0
36
+ Requires-Dist: boto3>=1.28.0
37
+ Requires-Dist: twilio
38
+ Requires-Dist: retell-sdk
39
+ Requires-Dist: requests
40
+ Dynamic: license-file
41
+
42
+ # Intellema VDK
43
+
44
+ Intellema VDK is a unified Voice Development Kit designed to simplify the integration and management of various voice agent platforms. It provides a consistent, factory-based API to interact with providers like LiveKit and Retell AI, enabling developers to build scalable voice applications with ease. Whether you need real-time streaming, outbound calling, or participant management, Intellema VDK abstracts the complexity into a single, intuitive interface.
45
+
46
+ ## Features
47
+
48
+ - **Room Management**: Create and delete rooms dynamically.
49
+ - **Participant Management**: Generate tokens, kick users, and mute tracks.
50
+ - **SIP Outbound Calling**: Initiate calls to phone numbers via SIP trunks.
51
+ - **Streaming & Recording**: Stream to RTMP destinations and record room sessions directly to AWS S3.
52
+ - **Real-time Alerts**: Send data packets (alerts) to participants.
53
+
54
+ ## Prerequisites
55
+
56
+ - Python 3.8+
57
+ - A SIP Provider (for outbound calls)
58
+
59
+ ## Installation
60
+
61
+ ```bash
62
+ pip install intellema-vdk
63
+ ```
64
+
65
+ ## Usage
66
+
67
+ ### Unified Wrapper (Factory Pattern)
68
+
69
+ The recommended way to use the library is via the `VoiceClient` factory:
70
+
71
+ ```python
72
+ import asyncio
73
+ from intellema_vdk import VoiceClient
74
+
75
+ async def main():
76
+ # 1. Initialize the client
77
+ client = VoiceClient("livekit")
78
+
79
+ # 2. Use methods directly
80
+ call_id = await client.start_outbound_call(
81
+ phone_number="+15551234567",
82
+ prompt_content="Hello from LiveKit"
83
+ )
84
+
85
+ # 3. Clean API calls
86
+ await client.mute_participant(call_id, "user-1", "track-1", True)
87
+ await client.close()
88
+
89
+ if __name__ == "__main__":
90
+ asyncio.run(main())
91
+ ```
92
+
93
+ ### Convenience Function
94
+
95
+ For quick one-off calls, you can still use the helper:
96
+
97
+ ```python
98
+ from intellema_vdk import start_outbound_call
99
+
100
+ await start_outbound_call("livekit", phone_number="+1...")
101
+ ```
102
+
103
+
104
+ ## Configuration
105
+
106
+ Create a `.env` file in the root directory:
107
+
108
+ ```bash
109
+ LIVEKIT_URL=wss://your-livekit-domain.com
110
+ LIVEKIT_API_KEY=your-key
111
+ LIVEKIT_API_SECRET=your-secret
112
+ SIP_OUTBOUND_TRUNK_ID=your-trunk-id
113
+ TWILIO_ACCOUNT_SID=your-sid
114
+ TWILIO_AUTH_TOKEN=your-token
115
+ TWILIO_PHONE_NUMBER=your-number
116
+ RETELL_API_KEY=your-retell-key
117
+ RETELL_AGENT_ID=your-agent-id
118
+ ```
119
+
120
+
@@ -0,0 +1,79 @@
1
+ # Intellema VDK
2
+
3
+ Intellema VDK is a unified Voice Development Kit designed to simplify the integration and management of various voice agent platforms. It provides a consistent, factory-based API to interact with providers like LiveKit and Retell AI, enabling developers to build scalable voice applications with ease. Whether you need real-time streaming, outbound calling, or participant management, Intellema VDK abstracts the complexity into a single, intuitive interface.
4
+
5
+ ## Features
6
+
7
+ - **Room Management**: Create and delete rooms dynamically.
8
+ - **Participant Management**: Generate tokens, kick users, and mute tracks.
9
+ - **SIP Outbound Calling**: Initiate calls to phone numbers via SIP trunks.
10
+ - **Streaming & Recording**: Stream to RTMP destinations and record room sessions directly to AWS S3.
11
+ - **Real-time Alerts**: Send data packets (alerts) to participants.
12
+
13
+ ## Prerequisites
14
+
15
+ - Python 3.8+
16
+ - A SIP Provider (for outbound calls)
17
+
18
+ ## Installation
19
+
20
+ ```bash
21
+ pip install intellema-vdk
22
+ ```
23
+
24
+ ## Usage
25
+
26
+ ### Unified Wrapper (Factory Pattern)
27
+
28
+ The recommended way to use the library is via the `VoiceClient` factory:
29
+
30
+ ```python
31
+ import asyncio
32
+ from intellema_vdk import VoiceClient
33
+
34
+ async def main():
35
+ # 1. Initialize the client
36
+ client = VoiceClient("livekit")
37
+
38
+ # 2. Use methods directly
39
+ call_id = await client.start_outbound_call(
40
+ phone_number="+15551234567",
41
+ prompt_content="Hello from LiveKit"
42
+ )
43
+
44
+ # 3. Clean API calls
45
+ await client.mute_participant(call_id, "user-1", "track-1", True)
46
+ await client.close()
47
+
48
+ if __name__ == "__main__":
49
+ asyncio.run(main())
50
+ ```
51
+
52
+ ### Convenience Function
53
+
54
+ For quick one-off calls, you can still use the helper:
55
+
56
+ ```python
57
+ from intellema_vdk import start_outbound_call
58
+
59
+ await start_outbound_call("livekit", phone_number="+1...")
60
+ ```
61
+
62
+
63
+ ## Configuration
64
+
65
+ Create a `.env` file in the root directory:
66
+
67
+ ```bash
68
+ LIVEKIT_URL=wss://your-livekit-domain.com
69
+ LIVEKIT_API_KEY=your-key
70
+ LIVEKIT_API_SECRET=your-secret
71
+ SIP_OUTBOUND_TRUNK_ID=your-trunk-id
72
+ TWILIO_ACCOUNT_SID=your-sid
73
+ TWILIO_AUTH_TOKEN=your-token
74
+ TWILIO_PHONE_NUMBER=your-number
75
+ RETELL_API_KEY=your-retell-key
76
+ RETELL_AGENT_ID=your-agent-id
77
+ ```
78
+
79
+
@@ -0,0 +1,38 @@
1
+ from typing import Optional, List, Any
2
+ import os
3
+ from dotenv import load_dotenv
4
+
5
+ # Load environment variables
6
+ load_dotenv()
7
+
8
+ from .livekit_lib.client import LiveKitManager
9
+ from .retell_lib.retell_client import RetellManager
10
+
11
+ def VoiceClient(provider: str, **kwargs) -> Any:
12
+ """
13
+ Factory function that returns a specific provider client.
14
+
15
+ Args:
16
+ provider: "livekit" or "retell"
17
+ **kwargs: Arguments passed to the manager's constructor
18
+
19
+ Returns:
20
+ An instance of LiveKitManager or RetellManager
21
+ """
22
+ if provider == "livekit":
23
+ return LiveKitManager(**kwargs)
24
+ elif provider == "retell":
25
+ return RetellManager(**kwargs)
26
+ else:
27
+ raise ValueError(f"Unknown provider: {provider}. Supported providers: 'livekit', 'retell'")
28
+
29
+ async def start_outbound_call(provider: str, *args, **kwargs):
30
+ """
31
+ Convenience wrapper to start an outbound call.
32
+ """
33
+ client = VoiceClient(provider)
34
+ # Check if the method is async (LiveKit) or sync (Retell)
35
+ if provider == "livekit":
36
+ return await client.start_outbound_call(*args, **kwargs)
37
+ else:
38
+ return client.start_outbound_call(*args, **kwargs)
@@ -0,0 +1,3 @@
1
+ from .client import LiveKitManager
2
+
3
+ __all__ = ["LiveKitManager"]
@@ -0,0 +1,280 @@
1
+ import os
2
+ import json
3
+ import uuid
4
+ import asyncio
5
+ import time
6
+ import boto3
7
+ from typing import List, Optional
8
+ from dotenv import load_dotenv
9
+ from livekit import api
10
+
11
+ # Load environment variables
12
+ load_dotenv(dotenv_path=".env.local")
13
+ load_dotenv()
14
+
15
+ class LiveKitManager:
16
+ def __init__(self):
17
+ self.url = os.getenv("LIVEKIT_URL")
18
+ self.api_key = os.getenv("LIVEKIT_API_KEY")
19
+ self.api_secret = os.getenv("LIVEKIT_API_SECRET")
20
+ self.sip_trunk_id = os.getenv("SIP_OUTBOUND_TRUNK_ID")
21
+
22
+ if not self.url or not self.api_key or not self.api_secret:
23
+ raise ValueError("LIVEKIT_URL, LIVEKIT_API_KEY, and LIVEKIT_API_SECRET must be set.")
24
+
25
+ self.lk_api = api.LiveKitAPI(
26
+ url=self.url,
27
+ api_key=self.api_key,
28
+ api_secret=self.api_secret,
29
+ )
30
+
31
+ async def close(self):
32
+ await self.lk_api.aclose()
33
+
34
+ async def start_outbound_call(self, phone_number: str, prompt_content: str, call_id: str = None, timeout: int = 600):
35
+ if not call_id:
36
+ call_id = f"outbound_call_{uuid.uuid4().hex[:12]}"
37
+
38
+ metadata = json.dumps({
39
+ "phone_number": phone_number,
40
+ "prompt_content": prompt_content
41
+ })
42
+
43
+ # 1. Create room with metadata
44
+ room = await self.lk_api.room.create_room(
45
+ api.CreateRoomRequest(
46
+ name=call_id,
47
+ empty_timeout=timeout,
48
+ metadata=metadata
49
+ )
50
+ )
51
+
52
+ # 2. Dispatch agent
53
+ await self.lk_api.agent_dispatch.create_dispatch(
54
+ api.CreateAgentDispatchRequest(
55
+ room=call_id,
56
+ agent_name="outbound-caller",
57
+ metadata=metadata
58
+ )
59
+ )
60
+
61
+ # 3. Initiate Outbound Call (SIP/PSTN)
62
+ if not self.sip_trunk_id:
63
+ raise ValueError("SIP_OUTBOUND_TRUNK_ID is not configured in environment.")
64
+
65
+ sip_participant_identity = f"phone-{phone_number}"
66
+
67
+ try:
68
+ await self.lk_api.sip.create_sip_participant(
69
+ api.CreateSIPParticipantRequest(
70
+ room_name=call_id,
71
+ sip_trunk_id=self.sip_trunk_id,
72
+ sip_call_to=phone_number,
73
+ participant_identity=sip_participant_identity,
74
+ wait_until_answered=True,
75
+ )
76
+ )
77
+ except Exception as e:
78
+ # Handle SIP Busy/Error
79
+ if "Busy Here" in str(e) or "486" in str(e):
80
+ print(f"Call failed: User is busy ({phone_number})")
81
+ # We might want to clean up the room if the call failed
82
+ await self.delete_room(call_id)
83
+ raise ValueError("User is busy")
84
+ raise e
85
+
86
+ return room
87
+
88
+ async def create_token(self, call_id: str, participant_name: str) -> str:
89
+ token = api.AccessToken(self.api_key, self.api_secret)
90
+ token.with_identity(participant_name)
91
+ token.with_name(participant_name)
92
+ token.with_grants(api.VideoGrants(
93
+ room_join=True,
94
+ room=call_id,
95
+ ))
96
+ return token.to_jwt()
97
+
98
+ async def delete_room(self, call_id: str):
99
+ await self.lk_api.room.delete_room(api.DeleteRoomRequest(room=call_id))
100
+
101
+ async def start_stream(self, call_id: str, rtmp_urls: List[str]):
102
+ await self.lk_api.egress.start_room_composite_egress(
103
+ api.RoomCompositeEgressRequest(
104
+ room_name=call_id,
105
+ layout="speaker",
106
+ stream_outputs=[
107
+ api.StreamOutput(
108
+ protocol=api.StreamProtocol.RTMP,
109
+ urls=rtmp_urls
110
+ )
111
+ ]
112
+ )
113
+ )
114
+
115
+ async def start_recording(self, call_id: str, output_filepath: Optional[str] = None, upload_to_s3: bool = True, wait_for_completion: bool = True):
116
+ """
117
+ Start recording a room.
118
+
119
+ Args:
120
+ call_id: Name of the room/call to record.
121
+ output_filepath: Optional path/filename for the recording.
122
+ upload_to_s3: If True, uploads to S3 (requires env vars). If False, saves locally on Egress server.
123
+ wait_for_completion: If True, waits for the recording to finish and downloads it locally (if upload_to_s3 is True).
124
+ """
125
+ file_output = None
126
+ filename = output_filepath if output_filepath else f"{call_id}-{uuid.uuid4().hex[:6]}.mp4"
127
+
128
+ if upload_to_s3:
129
+ access_key = os.getenv("AWS_ACCESS_KEY_ID")
130
+ secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
131
+ bucket = os.getenv("AWS_S3_BUCKET")
132
+ region = os.getenv("AWS_REGION")
133
+
134
+ if not access_key or not secret_key or not bucket:
135
+ raise ValueError("AWS credentials (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_S3_BUCKET) are required for S3 upload.")
136
+
137
+ file_output = api.EncodedFileOutput(
138
+ file_type=api.EncodedFileType.MP4,
139
+ filepath=filename,
140
+ s3=api.S3Upload(
141
+ access_key=access_key,
142
+ secret=secret_key,
143
+ bucket=bucket,
144
+ region=region,
145
+ ),
146
+ )
147
+ print(f"Starting recording. File will be saved to S3: s3://{bucket}/{filename}")
148
+ else:
149
+ file_output = api.EncodedFileOutput(
150
+ file_type=api.EncodedFileType.MP4,
151
+ filepath=filename,
152
+ )
153
+ print(f"Starting recording. File will be saved locally: {filename}")
154
+
155
+ egress_info = await self.lk_api.egress.start_room_composite_egress(
156
+ api.RoomCompositeEgressRequest(
157
+ room_name=call_id,
158
+ layout="grid",
159
+ preset=api.EncodingOptionsPreset.H264_720P_30,
160
+ file_outputs=[file_output]
161
+ )
162
+ )
163
+
164
+ if wait_for_completion and upload_to_s3:
165
+ egress_id = egress_info.egress_id
166
+ print(f"Waiting for egress {egress_id} to complete...")
167
+
168
+ while True:
169
+ try:
170
+ egress_list = await self.lk_api.egress.list_egress(api.ListEgressRequest(egress_id=egress_id))
171
+ except Exception as e:
172
+ print(f"Error checking egress status: {e}")
173
+ await asyncio.sleep(5)
174
+ continue
175
+
176
+ if not egress_list.items:
177
+ print("Egress info not found during polling.")
178
+ break
179
+
180
+ info = egress_list.items[0]
181
+ if info.status == api.EgressStatus.EGRESS_COMPLETE:
182
+ print("Egress completed successfully.")
183
+ break
184
+ elif info.status == api.EgressStatus.EGRESS_FAILED:
185
+ raise RuntimeError(f"Egress failed: {info.error}")
186
+ elif info.status == api.EgressStatus.EGRESS_LIMIT_REACHED:
187
+ raise RuntimeError(f"Egress limit reached: {info.error}")
188
+
189
+ await asyncio.sleep(5)
190
+
191
+ # Download from S3
192
+ print(f"Downloading {filename} from S3 bucket {bucket}...")
193
+ s3 = boto3.client(
194
+ 's3',
195
+ aws_access_key_id=access_key,
196
+ aws_secret_access_key=secret_key,
197
+ region_name=region
198
+ )
199
+
200
+ local_dir = "recordings"
201
+ os.makedirs(local_dir, exist_ok=True)
202
+ local_path = os.path.join(local_dir, filename)
203
+
204
+ try:
205
+ s3.download_file(bucket, filename, local_path)
206
+ print(f"Recording downloaded to: {local_path}")
207
+ except Exception as e:
208
+ print(f"Failed to download recording: {e}")
209
+ raise e
210
+
211
+ async def kick_participant(self, call_id: str, identity: str):
212
+ await self.lk_api.room.remove_participant(
213
+ api.RoomParticipantIdentity(
214
+ room=call_id,
215
+ identity=identity
216
+ )
217
+ )
218
+
219
+ async def mute_participant(self, call_id: str, identity: str, track_sid: str, muted: bool):
220
+ await self.lk_api.room.mute_published_track(
221
+ api.MuteRoomTrackRequest(
222
+ room=call_id,
223
+ identity=identity,
224
+ track_sid=track_sid,
225
+ muted=muted
226
+ )
227
+ )
228
+
229
+ async def send_alert(self, call_id: str, message: str, participant_identity: Optional[str] = None):
230
+ destination_identities = [participant_identity] if participant_identity else []
231
+ data_packet = json.dumps({"type": "alert", "message": message}).encode('utf-8')
232
+
233
+ await self.lk_api.room.send_data(
234
+ api.SendDataRequest(
235
+ room=call_id,
236
+ data=data_packet,
237
+ kind=1, # 1 = RELIABLE, 0 = LOSSY
238
+ destination_identities=destination_identities
239
+ )
240
+ )
241
+
242
+ async def get_participant_identities(self, call_id: str) -> List[dict]:
243
+ """
244
+ Get a list of all participants in a room with their identities and tracks.
245
+
246
+ Returns:
247
+ List of dicts with participant info:
248
+ [
249
+ {
250
+ "identity": str,
251
+ "name": str,
252
+ "tracks": [
253
+ {"sid": str, "type": str, "muted": bool, "source": str},
254
+ ...
255
+ ]
256
+ },
257
+ ...
258
+ ]
259
+ """
260
+ response = await self.lk_api.room.list_participants(
261
+ api.ListParticipantsRequest(room=call_id)
262
+ )
263
+ participants = []
264
+ for p in response.participants:
265
+ tracks = []
266
+ for track in p.tracks:
267
+ tracks.append({
268
+ "sid": track.sid,
269
+ "type": "audio" if track.type == 1 else "video" if track.type == 2 else "unknown",
270
+ "muted": track.muted,
271
+ "source": track.source.name if hasattr(track.source, 'name') else str(track.source)
272
+ })
273
+ participants.append({
274
+ "identity": p.identity,
275
+ "name": p.name,
276
+ "tracks": tracks
277
+ })
278
+ return participants
279
+
280
+
@@ -0,0 +1,190 @@
1
+ import os
2
+ from typing import List, Optional
3
+ from dotenv import load_dotenv
4
+ from twilio.rest import Client
5
+ from retell import Retell
6
+ import time
7
+ import uuid
8
+ import requests
9
+ import boto3
10
+
11
+ # Load environment variables
12
+ load_dotenv(dotenv_path=".env.local")
13
+ load_dotenv()
14
+
15
+ class RetellManager:
16
+ def __init__(self):
17
+ self.twilio_account_sid = os.getenv("TWILIO_ACCOUNT_SID")
18
+ self.twilio_auth_token = os.getenv("TWILIO_AUTH_TOKEN")
19
+ self.twilio_number = os.getenv("TWILIO_PHONE_NUMBER")
20
+ self.retell_api_key = os.getenv("RETELL_API_KEY")
21
+ self.retell_agent_id = os.getenv("RETELL_AGENT_ID")
22
+
23
+ if not all([self.twilio_account_sid, self.twilio_auth_token, self.twilio_number, self.retell_api_key, self.retell_agent_id]):
24
+ raise ValueError("Missing necessary environment variables for RetellManager")
25
+
26
+ self.twilio_client = Client(self.twilio_account_sid, self.twilio_auth_token)
27
+ self.retell_client = Retell(api_key=self.retell_api_key)
28
+
29
+ def start_outbound_call(self, phone_number: str, prompt_content: str = None, call_id: str = None) -> str:
30
+ """
31
+ Initiates an outbound call using Twilio.
32
+ Registers the call with Retell first, then uses TwiML to connect Twilio to Retell's WebSocket.
33
+
34
+ Args:
35
+ phone_number: The number to call.
36
+ prompt_content: Content to override the agent's prompt (passed as 'prompt_content' dynamic variable).
37
+ call_id: Custom ID for metadata (optional).
38
+ """
39
+ # 1. Register call with Retell to get the WebSocket URL
40
+ register_response = self.retell_client.call.register_phone_call(
41
+ agent_id=self.retell_agent_id,
42
+ direction="outbound",
43
+ from_number=self.twilio_number,
44
+ to_number=phone_number,
45
+ metadata={"call_id": call_id} if call_id else None,
46
+ retell_llm_dynamic_variables={"prompt_content": prompt_content} if prompt_content else None
47
+ )
48
+
49
+ # 2. Construct the audio WebSocket URL using the call_id
50
+ audio_websocket_url = f"wss://api.retellai.com/audio-websocket/{register_response.call_id}"
51
+
52
+ # 3. Construct TwiML to connect Twilio to Retell
53
+ # Note: We construct the XML string manually to avoid extra dependencies like twilio.twiml
54
+ twiml = f"""<Response>
55
+ <Connect>
56
+ <Stream url="{audio_websocket_url}" />
57
+ </Connect>
58
+ </Response>"""
59
+
60
+ # 3. Create the call with Twilio using the generated TwiML
61
+ call = self.twilio_client.calls.create(
62
+ to=phone_number,
63
+ from_=self.twilio_number,
64
+ twiml=twiml
65
+ )
66
+ return call.sid
67
+
68
+ def delete_room(self, room_name: str):
69
+ """
70
+ Ends the call. 'room_name' is interpreted as the Twilio Call SID.
71
+ Ends both the Retell agent and the Twilio call.
72
+ """
73
+ try:
74
+ # Attempt to end Retell call if mapped, but primarily hang up Twilio
75
+ # Note: Retell SDK end_call requires retell call id, not twilio sid.
76
+ # If we don't have the mapping, hanging up Twilio is the most effective way to stop everything.
77
+ try:
78
+ self.retell_client.call.end_call(call_id=room_name)
79
+ except Exception:
80
+ pass # Ignore if Retell call fails (e.g. invalid ID), ensure Twilio hangs up
81
+
82
+ self.twilio_client.calls(room_name).update(status='completed')
83
+ except Exception as e:
84
+ print(f"Error ending call {room_name}: {e}")
85
+
86
+ def start_stream(self, room_name: str, rtmp_urls: List[str]):
87
+ """
88
+ Starts a Twilio Media Stream.
89
+ Note: Twilio streams are WebSocket-based. If rtmp_urls contains a WSS URL, it will work.
90
+ """
91
+ if not rtmp_urls:
92
+ raise ValueError("No stream URLs provided")
93
+
94
+ self.twilio_client.calls(room_name).streams.create(
95
+ url=rtmp_urls[0]
96
+ )
97
+
98
+ def start_recording(self, room_name: str, output_filepath: Optional[str] = None, upload_to_s3: bool = True, wait_for_completion: bool = True):
99
+ """
100
+ Triggers a recording on the active Twilio call.
101
+
102
+ Args:
103
+ room_name: The Twilio Call SID.
104
+ output_filepath: Optional filename for the recording.
105
+ upload_to_s3: If True, uploads to S3.
106
+ wait_for_completion: If True, waits for recording to finish and then uploads.
107
+
108
+ Returns:
109
+ The Twilio Recording SID.
110
+ """
111
+
112
+ # Start Twilio recording
113
+ recording = self.twilio_client.calls(room_name).recordings.create()
114
+ print(f"Recording started: {recording.sid}")
115
+
116
+ if not wait_for_completion:
117
+ return recording.sid
118
+
119
+ # Poll for recording completion
120
+ print("Waiting for recording to complete...")
121
+ while True:
122
+ rec_status = self.twilio_client.recordings(recording.sid).fetch()
123
+ if rec_status.status == 'completed':
124
+ print("Recording completed.")
125
+ break
126
+ elif rec_status.status in ['failed', 'absent']:
127
+ raise RuntimeError(f"Recording failed with status: {rec_status.status}")
128
+ time.sleep(5)
129
+
130
+ if not upload_to_s3:
131
+ return recording.sid
132
+
133
+ # Download recording from Twilio
134
+ media_url = f"https://api.twilio.com/2010-04-01/Accounts/{self.twilio_account_sid}/Recordings/{recording.sid}.mp3"
135
+ print(f"Downloading recording from: {media_url}")
136
+
137
+ response = requests.get(media_url, auth=(self.twilio_account_sid, self.twilio_auth_token))
138
+ if response.status_code != 200:
139
+ raise RuntimeError(f"Failed to download recording: {response.status_code} {response.text}")
140
+
141
+ # Upload to S3
142
+ access_key = os.getenv("AWS_ACCESS_KEY_ID")
143
+ secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
144
+ bucket = os.getenv("AWS_S3_BUCKET")
145
+ region = os.getenv("AWS_REGION")
146
+
147
+ if not access_key or not secret_key or not bucket:
148
+ raise ValueError("AWS credentials (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_S3_BUCKET) are required for S3 upload.")
149
+
150
+ filename = output_filepath if output_filepath else f"{room_name}-{uuid.uuid4().hex[:6]}.mp3"
151
+
152
+ s3 = boto3.client(
153
+ 's3',
154
+ aws_access_key_id=access_key,
155
+ aws_secret_access_key=secret_key,
156
+ region_name=region
157
+ )
158
+
159
+ print(f"Uploading to S3: s3://{bucket}/{filename}")
160
+ s3.put_object(Bucket=bucket, Key=filename, Body=response.content)
161
+ print(f"Upload complete: s3://{bucket}/{filename}")
162
+
163
+ # Also save locally
164
+ local_dir = "recordings"
165
+ os.makedirs(local_dir, exist_ok=True)
166
+ local_path = os.path.join(local_dir, filename)
167
+ with open(local_path, 'wb') as f:
168
+ f.write(response.content)
169
+ print(f"Recording saved locally: {local_path}")
170
+
171
+ return recording.sid
172
+
173
+ def mute_participant(self, room_name: str, identity: str, track_sid: str, muted: bool):
174
+ """
175
+ Mutes the participant on the Twilio call.
176
+ This prevents audio from reaching the Retell AI.
177
+ """
178
+ self.twilio_client.calls(room_name).update(muted=muted)
179
+
180
+ def kick_participant(self, room_name: str, identity: str):
181
+ """
182
+ Alias for delete_room (hangup).
183
+ """
184
+ self.delete_room(room_name)
185
+
186
+ def send_alert(self, room_name: str, message: str, participant_identity: Optional[str] = None):
187
+ """
188
+ Not fully supported in this hybrid model
189
+ """
190
+ raise NotImplementedError("send_alert is not currently supported in RetellManager")
@@ -0,0 +1,120 @@
1
+ Metadata-Version: 2.4
2
+ Name: intellema-vdk
3
+ Version: 0.1.0
4
+ Summary: A Voice Development Kit for different Voice Agent Platforms
5
+ Author: Intellema
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 Intellema
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Classifier: Programming Language :: Python :: 3
29
+ Classifier: License :: OSI Approved :: MIT License
30
+ Classifier: Operating System :: OS Independent
31
+ Requires-Python: >=3.8
32
+ Description-Content-Type: text/markdown
33
+ License-File: LICENSE
34
+ Requires-Dist: livekit-api>=1.1.0
35
+ Requires-Dist: python-dotenv>=1.0.0
36
+ Requires-Dist: boto3>=1.28.0
37
+ Requires-Dist: twilio
38
+ Requires-Dist: retell-sdk
39
+ Requires-Dist: requests
40
+ Dynamic: license-file
41
+
42
+ # Intellema VDK
43
+
44
+ Intellema VDK is a unified Voice Development Kit designed to simplify the integration and management of various voice agent platforms. It provides a consistent, factory-based API to interact with providers like LiveKit and Retell AI, enabling developers to build scalable voice applications with ease. Whether you need real-time streaming, outbound calling, or participant management, Intellema VDK abstracts the complexity into a single, intuitive interface.
45
+
46
+ ## Features
47
+
48
+ - **Room Management**: Create and delete rooms dynamically.
49
+ - **Participant Management**: Generate tokens, kick users, and mute tracks.
50
+ - **SIP Outbound Calling**: Initiate calls to phone numbers via SIP trunks.
51
+ - **Streaming & Recording**: Stream to RTMP destinations and record room sessions directly to AWS S3.
52
+ - **Real-time Alerts**: Send data packets (alerts) to participants.
53
+
54
+ ## Prerequisites
55
+
56
+ - Python 3.8+
57
+ - A SIP Provider (for outbound calls)
58
+
59
+ ## Installation
60
+
61
+ ```bash
62
+ pip install intellema-vdk
63
+ ```
64
+
65
+ ## Usage
66
+
67
+ ### Unified Wrapper (Factory Pattern)
68
+
69
+ The recommended way to use the library is via the `VoiceClient` factory:
70
+
71
+ ```python
72
+ import asyncio
73
+ from intellema_vdk import VoiceClient
74
+
75
+ async def main():
76
+ # 1. Initialize the client
77
+ client = VoiceClient("livekit")
78
+
79
+ # 2. Use methods directly
80
+ call_id = await client.start_outbound_call(
81
+ phone_number="+15551234567",
82
+ prompt_content="Hello from LiveKit"
83
+ )
84
+
85
+ # 3. Clean API calls
86
+ await client.mute_participant(call_id, "user-1", "track-1", True)
87
+ await client.close()
88
+
89
+ if __name__ == "__main__":
90
+ asyncio.run(main())
91
+ ```
92
+
93
+ ### Convenience Function
94
+
95
+ For quick one-off calls, you can still use the helper:
96
+
97
+ ```python
98
+ from intellema_vdk import start_outbound_call
99
+
100
+ await start_outbound_call("livekit", phone_number="+1...")
101
+ ```
102
+
103
+
104
+ ## Configuration
105
+
106
+ Create a `.env` file in the root directory:
107
+
108
+ ```bash
109
+ LIVEKIT_URL=wss://your-livekit-domain.com
110
+ LIVEKIT_API_KEY=your-key
111
+ LIVEKIT_API_SECRET=your-secret
112
+ SIP_OUTBOUND_TRUNK_ID=your-trunk-id
113
+ TWILIO_ACCOUNT_SID=your-sid
114
+ TWILIO_AUTH_TOKEN=your-token
115
+ TWILIO_PHONE_NUMBER=your-number
116
+ RETELL_API_KEY=your-retell-key
117
+ RETELL_AGENT_ID=your-agent-id
118
+ ```
119
+
120
+
@@ -0,0 +1,20 @@
1
+ LICENSE
2
+ MANIFEST.in
3
+ README.md
4
+ pyproject.toml
5
+ requirements.txt
6
+ intellema_vdk/__init__.py
7
+ intellema_vdk.egg-info/PKG-INFO
8
+ intellema_vdk.egg-info/SOURCES.txt
9
+ intellema_vdk.egg-info/dependency_links.txt
10
+ intellema_vdk.egg-info/requires.txt
11
+ intellema_vdk.egg-info/top_level.txt
12
+ intellema_vdk/livekit_lib/__init__.py
13
+ intellema_vdk/livekit_lib/client.py
14
+ intellema_vdk/livekit_lib/__pycache__/__init__.cpython-312.pyc
15
+ intellema_vdk/livekit_lib/__pycache__/client.cpython-312.pyc
16
+ intellema_vdk/retell_lib/__init__.py
17
+ intellema_vdk/retell_lib/retell_client.py
18
+ intellema_vdk/retell_lib/__pycache__/__init__.cpython-312.pyc
19
+ intellema_vdk/retell_lib/__pycache__/retell_client.cpython-312.pyc
20
+ tests/test_retell_hybrid.py
@@ -0,0 +1,6 @@
1
+ livekit-api>=1.1.0
2
+ python-dotenv>=1.0.0
3
+ boto3>=1.28.0
4
+ twilio
5
+ retell-sdk
6
+ requests
@@ -0,0 +1 @@
1
+ intellema_vdk
@@ -0,0 +1,32 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "intellema-vdk"
7
+ version = "0.1.0"
8
+ description = "A Voice Development Kit for different Voice Agent Platforms"
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ license = {file = "LICENSE"}
12
+ authors = [
13
+ {name = "Intellema"},
14
+ ]
15
+ classifiers = [
16
+ "Programming Language :: Python :: 3",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Operating System :: OS Independent",
19
+ ]
20
+ dependencies = [
21
+ "livekit-api>=1.1.0",
22
+ "python-dotenv>=1.0.0",
23
+ "boto3>=1.28.0",
24
+ "twilio",
25
+ "retell-sdk",
26
+ "requests"
27
+ ]
28
+
29
+
30
+
31
+ [tool.setuptools.packages.find]
32
+ include = ["intellema_vdk*"]
@@ -0,0 +1,6 @@
1
+ livekit-api>=1.1.0
2
+ python-dotenv>=1.0.0
3
+ boto3>=1.28.0
4
+ twilio
5
+ retell-sdk
6
+ requests
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,71 @@
1
+ import unittest
2
+ from unittest.mock import MagicMock, patch
3
+ import os
4
+ import sys
5
+
6
+ # Add the project root to the python path so we can import retell_lib
7
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
8
+
9
+ # Mock environment variables before importing RetellManager
10
+ with patch.dict(os.environ, {
11
+ "TWILIO_ACCOUNT_SID": "ACmock",
12
+ "TWILIO_AUTH_TOKEN": "mock_token",
13
+ "TWILIO_PHONE_NUMBER": "+1234567890",
14
+ "RETELL_API_KEY": "mock_retell_key",
15
+ "WEBHOOK_URL": "https://example.com"
16
+ }):
17
+ from retell_lib.retell_client import RetellManager
18
+
19
+ class TestRetellManager(unittest.TestCase):
20
+ @patch.dict(os.environ, {
21
+ "TWILIO_ACCOUNT_SID": "ACmock",
22
+ "TWILIO_AUTH_TOKEN": "mock_token",
23
+ "TWILIO_PHONE_NUMBER": "+1234567890",
24
+ "RETELL_API_KEY": "mock_retell_key",
25
+ "RETELL_AGENT_ID": "mock_agent_id"
26
+ })
27
+ def setUp(self):
28
+ self.manager = RetellManager()
29
+ # Mock the clients
30
+ self.manager.twilio_client = MagicMock()
31
+ self.manager.retell_client = MagicMock()
32
+
33
+ def test_start_outbound_call(self):
34
+ # Mock Retell register response
35
+ mock_register_response = MagicMock()
36
+ mock_register_response.audio_websocket_url = "wss://api.retellai.com/socket"
37
+ self.manager.retell_client.call.register.return_value = mock_register_response
38
+
39
+ # Mock Twilio call creation
40
+ self.manager.twilio_client.calls.create.return_value.sid = "CA123"
41
+
42
+ sid = self.manager.start_outbound_call("+15550000000")
43
+
44
+ # Verify Retell register called
45
+ self.manager.retell_client.call.register.assert_called_once()
46
+
47
+ # Verify Twilio create called with TwiML
48
+ self.manager.twilio_client.calls.create.assert_called_once()
49
+ call_args = self.manager.twilio_client.calls.create.call_args[1]
50
+ self.assertEqual(call_args['to'], "+15550000000")
51
+ self.assertIn("<Stream url=\"wss://api.retellai.com/socket\" />", call_args['twiml'])
52
+ self.assertEqual(sid, "CA123")
53
+
54
+ def test_delete_room(self):
55
+ self.manager.delete_room("CA123")
56
+ # Retell client end_call should be called
57
+ self.manager.retell_client.call.end_call.assert_called_with(call_id="CA123")
58
+ # Twilio client update should be called
59
+ self.manager.twilio_client.calls.assert_called_with("CA123")
60
+ self.manager.twilio_client.calls("CA123").update.assert_called_with(status='completed')
61
+
62
+ def test_start_recording(self):
63
+ self.manager.start_recording("CA123")
64
+ self.manager.twilio_client.calls("CA123").recordings.create.assert_called_once()
65
+
66
+ def test_mute_participant(self):
67
+ self.manager.mute_participant("CA123", "user", "track", True)
68
+ self.manager.twilio_client.calls("CA123").update.assert_called_with(muted=True)
69
+
70
+ if __name__ == '__main__':
71
+ unittest.main()