intellema-vdk 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- intellema_vdk/__init__.py +67 -10
- intellema_vdk/config.py +14 -0
- intellema_vdk/providers/__init__.py +35 -0
- intellema_vdk/providers/livekit/__init__.py +19 -0
- intellema_vdk/providers/livekit/client.py +612 -0
- intellema_vdk/providers/livekit/exceptions.py +23 -0
- intellema_vdk/providers/protocols.py +33 -0
- intellema_vdk/providers/retell/__init__.py +17 -0
- intellema_vdk/providers/retell/client.py +468 -0
- intellema_vdk/providers/retell/exceptions.py +19 -0
- intellema_vdk/{retell_lib → providers/retell}/import_phone_number.py +1 -1
- intellema_vdk/stt/__init__.py +17 -0
- intellema_vdk/stt/client.py +482 -0
- intellema_vdk/stt/exceptions.py +19 -0
- intellema_vdk/tts/__init__.py +15 -0
- intellema_vdk/tts/__pycache__/__init__.cpython-312.pyc +0 -0
- intellema_vdk/tts/__pycache__/client.cpython-312.pyc +0 -0
- intellema_vdk/tts/__pycache__/exceptions.cpython-312.pyc +0 -0
- intellema_vdk/tts/__pycache__/providers.cpython-312.pyc +0 -0
- intellema_vdk/tts/client.py +541 -0
- intellema_vdk/tts/exceptions.py +15 -0
- intellema_vdk/tts/providers.py +293 -0
- intellema_vdk/utils/logger_config.py +41 -0
- intellema_vdk-0.2.2.dist-info/METADATA +311 -0
- intellema_vdk-0.2.2.dist-info/RECORD +29 -0
- {intellema_vdk-0.2.0.dist-info → intellema_vdk-0.2.2.dist-info}/WHEEL +1 -1
- intellema_vdk/livekit_lib/__init__.py +0 -3
- intellema_vdk/livekit_lib/client.py +0 -280
- intellema_vdk/retell_lib/retell_client.py +0 -248
- intellema_vdk/speech_lib/__init__.py +0 -2
- intellema_vdk/speech_lib/stt_client.py +0 -108
- intellema_vdk/speech_lib/tts_streamer.py +0 -188
- intellema_vdk-0.2.0.dist-info/METADATA +0 -221
- intellema_vdk-0.2.0.dist-info/RECORD +0 -14
- /intellema_vdk/{retell_lib/__init__.py → stt/providers.py} +0 -0
- {intellema_vdk-0.2.0.dist-info → intellema_vdk-0.2.2.dist-info}/licenses/LICENSE +0 -0
- {intellema_vdk-0.2.0.dist-info → intellema_vdk-0.2.2.dist-info}/top_level.txt +0 -0
|
@@ -1,280 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import json
|
|
3
|
-
import uuid
|
|
4
|
-
import asyncio
|
|
5
|
-
import time
|
|
6
|
-
import boto3
|
|
7
|
-
from typing import List, Optional
|
|
8
|
-
from dotenv import load_dotenv
|
|
9
|
-
from livekit import api
|
|
10
|
-
|
|
11
|
-
# Load environment variables
|
|
12
|
-
load_dotenv(dotenv_path=".env.local")
|
|
13
|
-
load_dotenv()
|
|
14
|
-
|
|
15
|
-
class LiveKitManager:
|
|
16
|
-
def __init__(self):
|
|
17
|
-
self.url = os.getenv("LIVEKIT_URL")
|
|
18
|
-
self.api_key = os.getenv("LIVEKIT_API_KEY")
|
|
19
|
-
self.api_secret = os.getenv("LIVEKIT_API_SECRET")
|
|
20
|
-
self.sip_trunk_id = os.getenv("SIP_OUTBOUND_TRUNK_ID")
|
|
21
|
-
|
|
22
|
-
if not self.url or not self.api_key or not self.api_secret:
|
|
23
|
-
raise ValueError("LIVEKIT_URL, LIVEKIT_API_KEY, and LIVEKIT_API_SECRET must be set.")
|
|
24
|
-
|
|
25
|
-
self.lk_api = api.LiveKitAPI(
|
|
26
|
-
url=self.url,
|
|
27
|
-
api_key=self.api_key,
|
|
28
|
-
api_secret=self.api_secret,
|
|
29
|
-
)
|
|
30
|
-
|
|
31
|
-
async def close(self):
|
|
32
|
-
await self.lk_api.aclose()
|
|
33
|
-
|
|
34
|
-
async def start_outbound_call(self, phone_number: str, prompt_content: str, call_id: str = None, timeout: int = 600):
|
|
35
|
-
if not call_id:
|
|
36
|
-
call_id = f"outbound_call_{uuid.uuid4().hex[:12]}"
|
|
37
|
-
|
|
38
|
-
metadata = json.dumps({
|
|
39
|
-
"phone_number": phone_number,
|
|
40
|
-
"prompt_content": prompt_content
|
|
41
|
-
})
|
|
42
|
-
|
|
43
|
-
# 1. Create room with metadata
|
|
44
|
-
room = await self.lk_api.room.create_room(
|
|
45
|
-
api.CreateRoomRequest(
|
|
46
|
-
name=call_id,
|
|
47
|
-
empty_timeout=timeout,
|
|
48
|
-
metadata=metadata
|
|
49
|
-
)
|
|
50
|
-
)
|
|
51
|
-
|
|
52
|
-
# 2. Dispatch agent
|
|
53
|
-
await self.lk_api.agent_dispatch.create_dispatch(
|
|
54
|
-
api.CreateAgentDispatchRequest(
|
|
55
|
-
room=call_id,
|
|
56
|
-
agent_name="outbound-caller",
|
|
57
|
-
metadata=metadata
|
|
58
|
-
)
|
|
59
|
-
)
|
|
60
|
-
|
|
61
|
-
# 3. Initiate Outbound Call (SIP/PSTN)
|
|
62
|
-
if not self.sip_trunk_id:
|
|
63
|
-
raise ValueError("SIP_OUTBOUND_TRUNK_ID is not configured in environment.")
|
|
64
|
-
|
|
65
|
-
sip_participant_identity = f"phone-{phone_number}"
|
|
66
|
-
|
|
67
|
-
try:
|
|
68
|
-
await self.lk_api.sip.create_sip_participant(
|
|
69
|
-
api.CreateSIPParticipantRequest(
|
|
70
|
-
room_name=call_id,
|
|
71
|
-
sip_trunk_id=self.sip_trunk_id,
|
|
72
|
-
sip_call_to=phone_number,
|
|
73
|
-
participant_identity=sip_participant_identity,
|
|
74
|
-
wait_until_answered=True,
|
|
75
|
-
)
|
|
76
|
-
)
|
|
77
|
-
except Exception as e:
|
|
78
|
-
# Handle SIP Busy/Error
|
|
79
|
-
if "Busy Here" in str(e) or "486" in str(e):
|
|
80
|
-
print(f"Call failed: User is busy ({phone_number})")
|
|
81
|
-
# We might want to clean up the room if the call failed
|
|
82
|
-
await self.delete_room(call_id)
|
|
83
|
-
raise ValueError("User is busy")
|
|
84
|
-
raise e
|
|
85
|
-
|
|
86
|
-
return room
|
|
87
|
-
|
|
88
|
-
async def create_token(self, call_id: str, participant_name: str) -> str:
|
|
89
|
-
token = api.AccessToken(self.api_key, self.api_secret)
|
|
90
|
-
token.with_identity(participant_name)
|
|
91
|
-
token.with_name(participant_name)
|
|
92
|
-
token.with_grants(api.VideoGrants(
|
|
93
|
-
room_join=True,
|
|
94
|
-
room=call_id,
|
|
95
|
-
))
|
|
96
|
-
return token.to_jwt()
|
|
97
|
-
|
|
98
|
-
async def delete_room(self, call_id: str):
|
|
99
|
-
await self.lk_api.room.delete_room(api.DeleteRoomRequest(room=call_id))
|
|
100
|
-
|
|
101
|
-
async def start_stream(self, call_id: str, rtmp_urls: List[str]):
|
|
102
|
-
await self.lk_api.egress.start_room_composite_egress(
|
|
103
|
-
api.RoomCompositeEgressRequest(
|
|
104
|
-
room_name=call_id,
|
|
105
|
-
layout="speaker",
|
|
106
|
-
stream_outputs=[
|
|
107
|
-
api.StreamOutput(
|
|
108
|
-
protocol=api.StreamProtocol.RTMP,
|
|
109
|
-
urls=rtmp_urls
|
|
110
|
-
)
|
|
111
|
-
]
|
|
112
|
-
)
|
|
113
|
-
)
|
|
114
|
-
|
|
115
|
-
async def start_recording(self, call_id: str, output_filepath: Optional[str] = None, upload_to_s3: bool = True, wait_for_completion: bool = True):
|
|
116
|
-
"""
|
|
117
|
-
Start recording a room.
|
|
118
|
-
|
|
119
|
-
Args:
|
|
120
|
-
call_id: Name of the room/call to record.
|
|
121
|
-
output_filepath: Optional path/filename for the recording.
|
|
122
|
-
upload_to_s3: If True, uploads to S3 (requires env vars). If False, saves locally on Egress server.
|
|
123
|
-
wait_for_completion: If True, waits for the recording to finish and downloads it locally (if upload_to_s3 is True).
|
|
124
|
-
"""
|
|
125
|
-
file_output = None
|
|
126
|
-
filename = output_filepath if output_filepath else f"{call_id}-{uuid.uuid4().hex[:6]}.mp4"
|
|
127
|
-
|
|
128
|
-
if upload_to_s3:
|
|
129
|
-
access_key = os.getenv("AWS_ACCESS_KEY_ID")
|
|
130
|
-
secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
|
|
131
|
-
bucket = os.getenv("AWS_S3_BUCKET")
|
|
132
|
-
region = os.getenv("AWS_REGION")
|
|
133
|
-
|
|
134
|
-
if not access_key or not secret_key or not bucket:
|
|
135
|
-
raise ValueError("AWS credentials (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_S3_BUCKET) are required for S3 upload.")
|
|
136
|
-
|
|
137
|
-
file_output = api.EncodedFileOutput(
|
|
138
|
-
file_type=api.EncodedFileType.MP4,
|
|
139
|
-
filepath=filename,
|
|
140
|
-
s3=api.S3Upload(
|
|
141
|
-
access_key=access_key,
|
|
142
|
-
secret=secret_key,
|
|
143
|
-
bucket=bucket,
|
|
144
|
-
region=region,
|
|
145
|
-
),
|
|
146
|
-
)
|
|
147
|
-
print(f"Starting recording. File will be saved to S3: s3://{bucket}/{filename}")
|
|
148
|
-
else:
|
|
149
|
-
file_output = api.EncodedFileOutput(
|
|
150
|
-
file_type=api.EncodedFileType.MP4,
|
|
151
|
-
filepath=filename,
|
|
152
|
-
)
|
|
153
|
-
print(f"Starting recording. File will be saved locally: {filename}")
|
|
154
|
-
|
|
155
|
-
egress_info = await self.lk_api.egress.start_room_composite_egress(
|
|
156
|
-
api.RoomCompositeEgressRequest(
|
|
157
|
-
room_name=call_id,
|
|
158
|
-
layout="grid",
|
|
159
|
-
preset=api.EncodingOptionsPreset.H264_720P_30,
|
|
160
|
-
file_outputs=[file_output]
|
|
161
|
-
)
|
|
162
|
-
)
|
|
163
|
-
|
|
164
|
-
if wait_for_completion and upload_to_s3:
|
|
165
|
-
egress_id = egress_info.egress_id
|
|
166
|
-
print(f"Waiting for egress {egress_id} to complete...")
|
|
167
|
-
|
|
168
|
-
while True:
|
|
169
|
-
try:
|
|
170
|
-
egress_list = await self.lk_api.egress.list_egress(api.ListEgressRequest(egress_id=egress_id))
|
|
171
|
-
except Exception as e:
|
|
172
|
-
print(f"Error checking egress status: {e}")
|
|
173
|
-
await asyncio.sleep(5)
|
|
174
|
-
continue
|
|
175
|
-
|
|
176
|
-
if not egress_list.items:
|
|
177
|
-
print("Egress info not found during polling.")
|
|
178
|
-
break
|
|
179
|
-
|
|
180
|
-
info = egress_list.items[0]
|
|
181
|
-
if info.status == api.EgressStatus.EGRESS_COMPLETE:
|
|
182
|
-
print("Egress completed successfully.")
|
|
183
|
-
break
|
|
184
|
-
elif info.status == api.EgressStatus.EGRESS_FAILED:
|
|
185
|
-
raise RuntimeError(f"Egress failed: {info.error}")
|
|
186
|
-
elif info.status == api.EgressStatus.EGRESS_LIMIT_REACHED:
|
|
187
|
-
raise RuntimeError(f"Egress limit reached: {info.error}")
|
|
188
|
-
|
|
189
|
-
await asyncio.sleep(5)
|
|
190
|
-
|
|
191
|
-
# Download from S3
|
|
192
|
-
print(f"Downloading {filename} from S3 bucket {bucket}...")
|
|
193
|
-
s3 = boto3.client(
|
|
194
|
-
's3',
|
|
195
|
-
aws_access_key_id=access_key,
|
|
196
|
-
aws_secret_access_key=secret_key,
|
|
197
|
-
region_name=region
|
|
198
|
-
)
|
|
199
|
-
|
|
200
|
-
local_dir = "recordings"
|
|
201
|
-
os.makedirs(local_dir, exist_ok=True)
|
|
202
|
-
local_path = os.path.join(local_dir, filename)
|
|
203
|
-
|
|
204
|
-
try:
|
|
205
|
-
s3.download_file(bucket, filename, local_path)
|
|
206
|
-
print(f"Recording downloaded to: {local_path}")
|
|
207
|
-
except Exception as e:
|
|
208
|
-
print(f"Failed to download recording: {e}")
|
|
209
|
-
raise e
|
|
210
|
-
|
|
211
|
-
async def kick_participant(self, call_id: str, identity: str):
|
|
212
|
-
await self.lk_api.room.remove_participant(
|
|
213
|
-
api.RoomParticipantIdentity(
|
|
214
|
-
room=call_id,
|
|
215
|
-
identity=identity
|
|
216
|
-
)
|
|
217
|
-
)
|
|
218
|
-
|
|
219
|
-
async def mute_participant(self, call_id: str, identity: str, track_sid: str, muted: bool):
|
|
220
|
-
await self.lk_api.room.mute_published_track(
|
|
221
|
-
api.MuteRoomTrackRequest(
|
|
222
|
-
room=call_id,
|
|
223
|
-
identity=identity,
|
|
224
|
-
track_sid=track_sid,
|
|
225
|
-
muted=muted
|
|
226
|
-
)
|
|
227
|
-
)
|
|
228
|
-
|
|
229
|
-
async def send_alert(self, call_id: str, message: str, participant_identity: Optional[str] = None):
|
|
230
|
-
destination_identities = [participant_identity] if participant_identity else []
|
|
231
|
-
data_packet = json.dumps({"type": "alert", "message": message}).encode('utf-8')
|
|
232
|
-
|
|
233
|
-
await self.lk_api.room.send_data(
|
|
234
|
-
api.SendDataRequest(
|
|
235
|
-
room=call_id,
|
|
236
|
-
data=data_packet,
|
|
237
|
-
kind=1, # 1 = RELIABLE, 0 = LOSSY
|
|
238
|
-
destination_identities=destination_identities
|
|
239
|
-
)
|
|
240
|
-
)
|
|
241
|
-
|
|
242
|
-
async def get_participant_identities(self, call_id: str) -> List[dict]:
|
|
243
|
-
"""
|
|
244
|
-
Get a list of all participants in a room with their identities and tracks.
|
|
245
|
-
|
|
246
|
-
Returns:
|
|
247
|
-
List of dicts with participant info:
|
|
248
|
-
[
|
|
249
|
-
{
|
|
250
|
-
"identity": str,
|
|
251
|
-
"name": str,
|
|
252
|
-
"tracks": [
|
|
253
|
-
{"sid": str, "type": str, "muted": bool, "source": str},
|
|
254
|
-
...
|
|
255
|
-
]
|
|
256
|
-
},
|
|
257
|
-
...
|
|
258
|
-
]
|
|
259
|
-
"""
|
|
260
|
-
response = await self.lk_api.room.list_participants(
|
|
261
|
-
api.ListParticipantsRequest(room=call_id)
|
|
262
|
-
)
|
|
263
|
-
participants = []
|
|
264
|
-
for p in response.participants:
|
|
265
|
-
tracks = []
|
|
266
|
-
for track in p.tracks:
|
|
267
|
-
tracks.append({
|
|
268
|
-
"sid": track.sid,
|
|
269
|
-
"type": "audio" if track.type == 1 else "video" if track.type == 2 else "unknown",
|
|
270
|
-
"muted": track.muted,
|
|
271
|
-
"source": track.source.name if hasattr(track.source, 'name') else str(track.source)
|
|
272
|
-
})
|
|
273
|
-
participants.append({
|
|
274
|
-
"identity": p.identity,
|
|
275
|
-
"name": p.name,
|
|
276
|
-
"tracks": tracks
|
|
277
|
-
})
|
|
278
|
-
return participants
|
|
279
|
-
|
|
280
|
-
|
|
@@ -1,248 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from typing import List, Optional
|
|
3
|
-
from dotenv import load_dotenv
|
|
4
|
-
from twilio.rest import Client
|
|
5
|
-
from retell import Retell
|
|
6
|
-
import time
|
|
7
|
-
import uuid
|
|
8
|
-
import requests
|
|
9
|
-
import boto3
|
|
10
|
-
|
|
11
|
-
# Load environment variables
|
|
12
|
-
load_dotenv(dotenv_path=".env.local")
|
|
13
|
-
load_dotenv()
|
|
14
|
-
|
|
15
|
-
class RetellManager:
|
|
16
|
-
def __init__(self):
|
|
17
|
-
self.twilio_account_sid = os.getenv("TWILIO_ACCOUNT_SID")
|
|
18
|
-
self.twilio_auth_token = os.getenv("TWILIO_AUTH_TOKEN")
|
|
19
|
-
self.twilio_number = os.getenv("TWILIO_PHONE_NUMBER")
|
|
20
|
-
self.retell_api_key = os.getenv("RETELL_API_KEY")
|
|
21
|
-
self.retell_agent_id = os.getenv("RETELL_AGENT_ID")
|
|
22
|
-
|
|
23
|
-
if not all([self.twilio_account_sid, self.twilio_auth_token, self.twilio_number, self.retell_api_key, self.retell_agent_id]):
|
|
24
|
-
raise ValueError("Missing necessary environment variables for RetellManager")
|
|
25
|
-
|
|
26
|
-
self.twilio_client = Client(self.twilio_account_sid, self.twilio_auth_token)
|
|
27
|
-
self.retell_client = Retell(api_key=self.retell_api_key)
|
|
28
|
-
|
|
29
|
-
def import_phone_number(self, termination_uri: str = None, outbound_agent_id: str = None, inbound_agent_id: str = None, nickname: str = None, sip_trunk_auth_username: str = None, sip_trunk_auth_password: str = None):
|
|
30
|
-
"""
|
|
31
|
-
Import/register your Twilio phone number with Retell.
|
|
32
|
-
This is required before you can make outbound calls using the phone number.
|
|
33
|
-
|
|
34
|
-
Args:
|
|
35
|
-
termination_uri: Twilio SIP trunk termination URI (e.g., "yourtrunk.pstn.twilio.com").
|
|
36
|
-
If not provided, will try to use a default format.
|
|
37
|
-
outbound_agent_id: Agent ID to use for outbound calls. Defaults to self.retell_agent_id.
|
|
38
|
-
inbound_agent_id: Agent ID to use for inbound calls. Defaults to None (no inbound).
|
|
39
|
-
nickname: Optional nickname for the phone number.
|
|
40
|
-
sip_trunk_auth_username: Username for SIP trunk authentication (if using credential list).
|
|
41
|
-
sip_trunk_auth_password: Password for SIP trunk authentication (if using credential list).
|
|
42
|
-
|
|
43
|
-
Returns:
|
|
44
|
-
The phone number registration response from Retell.
|
|
45
|
-
"""
|
|
46
|
-
# Build the import kwargs
|
|
47
|
-
import_kwargs = {
|
|
48
|
-
"phone_number": self.twilio_number,
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
# Add termination URI if provided
|
|
52
|
-
if termination_uri:
|
|
53
|
-
import_kwargs["termination_uri"] = termination_uri
|
|
54
|
-
|
|
55
|
-
# Add SIP credentials if provided
|
|
56
|
-
if sip_trunk_auth_username and sip_trunk_auth_password:
|
|
57
|
-
import_kwargs["sip_trunk_auth_username"] = sip_trunk_auth_username
|
|
58
|
-
import_kwargs["sip_trunk_auth_password"] = sip_trunk_auth_password
|
|
59
|
-
|
|
60
|
-
# Set outbound agent (required for outbound calls)
|
|
61
|
-
if outbound_agent_id:
|
|
62
|
-
import_kwargs["outbound_agent_id"] = outbound_agent_id
|
|
63
|
-
elif self.retell_agent_id:
|
|
64
|
-
import_kwargs["outbound_agent_id"] = self.retell_agent_id
|
|
65
|
-
|
|
66
|
-
# Set inbound agent if provided
|
|
67
|
-
if inbound_agent_id:
|
|
68
|
-
import_kwargs["inbound_agent_id"] = inbound_agent_id
|
|
69
|
-
|
|
70
|
-
# Add nickname if provided
|
|
71
|
-
if nickname:
|
|
72
|
-
import_kwargs["nickname"] = nickname
|
|
73
|
-
|
|
74
|
-
try:
|
|
75
|
-
response = self.retell_client.phone_number.import_(**import_kwargs)
|
|
76
|
-
print(f"✓ Phone number {self.twilio_number} successfully imported to Retell!")
|
|
77
|
-
print(f" Phone Number: {response.phone_number}")
|
|
78
|
-
print(f" Type: {response.phone_number_type}")
|
|
79
|
-
if hasattr(response, 'outbound_agent_id') and response.outbound_agent_id:
|
|
80
|
-
print(f" Outbound Agent: {response.outbound_agent_id}")
|
|
81
|
-
if hasattr(response, 'inbound_agent_id') and response.inbound_agent_id:
|
|
82
|
-
print(f" Inbound Agent: {response.inbound_agent_id}")
|
|
83
|
-
return response
|
|
84
|
-
except Exception as e:
|
|
85
|
-
print(f"✗ Error importing phone number: {e}")
|
|
86
|
-
print(f"\nNote: If you're using Twilio, you may need to:")
|
|
87
|
-
print(f" 1. Create an Elastic SIP Trunk in Twilio console")
|
|
88
|
-
print(f" 2. Provide the termination_uri parameter (e.g., 'yourtrunk.pstn.twilio.com')")
|
|
89
|
-
print(f" 3. Or purchase the number directly through Retell dashboard")
|
|
90
|
-
raise
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
def start_outbound_call(self, phone_number: str, prompt_content: str = None, call_id: str = None) -> str:
|
|
94
|
-
call_kwargs = {
|
|
95
|
-
"from_number": self.twilio_number,
|
|
96
|
-
"to_number": phone_number,
|
|
97
|
-
"override_agent_id": self.retell_agent_id,
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
if call_id:
|
|
101
|
-
call_kwargs["metadata"] = {"call_id": call_id}
|
|
102
|
-
|
|
103
|
-
# Only set up dynamic variables if we have a custom prompt
|
|
104
|
-
if prompt_content:
|
|
105
|
-
enhanced_prompt = (
|
|
106
|
-
f"{prompt_content}\n\n"
|
|
107
|
-
"SYSTEM INSTRUCTION: Continually check the dynamic variable {{force_end}}. "
|
|
108
|
-
"If {{force_end}} == 'true, you must IMMEDIATELY say goodbye and activate your end_call tool to terminate the call."
|
|
109
|
-
)
|
|
110
|
-
call_kwargs["retell_llm_dynamic_variables"] = {
|
|
111
|
-
"prompt_content": enhanced_prompt,
|
|
112
|
-
"force_end": "false"
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
call_response = self.retell_client.call.create_phone_call(**call_kwargs)
|
|
116
|
-
|
|
117
|
-
print(f"Call created successfully!")
|
|
118
|
-
print(f"Retell Call ID: {call_response.call_id}")
|
|
119
|
-
print(f"Call Status: {call_response.call_status}")
|
|
120
|
-
|
|
121
|
-
return call_response.call_id
|
|
122
|
-
|
|
123
|
-
def delete_room(self, call_id: str):
|
|
124
|
-
try:
|
|
125
|
-
call_data = self.retell_client.call.retrieve(call_id)
|
|
126
|
-
print(f"Current call status: {call_data.call_status}")
|
|
127
|
-
|
|
128
|
-
if call_data.call_status in ['registered', 'ongoing', 'dialing']:
|
|
129
|
-
print(f"Triggering end for Retell call {call_id}...")
|
|
130
|
-
|
|
131
|
-
self.retell_client.call.update(
|
|
132
|
-
call_id,
|
|
133
|
-
override_dynamic_variables={"force_end": "true"}
|
|
134
|
-
)
|
|
135
|
-
|
|
136
|
-
print("✓ force_end override sent to Retell API")
|
|
137
|
-
else:
|
|
138
|
-
print(f"Call already ended: {call_data.call_status}")
|
|
139
|
-
|
|
140
|
-
except Exception as e:
|
|
141
|
-
print(f"Error ending call {call_id}: {e}")
|
|
142
|
-
raise
|
|
143
|
-
|
|
144
|
-
def start_stream(self, call_id: str, rtmp_urls: List[str]):
|
|
145
|
-
"""
|
|
146
|
-
Starts a Twilio Media Stream.
|
|
147
|
-
Note: Twilio streams are WebSocket-based. If rtmp_urls contains a WSS URL, it will work.
|
|
148
|
-
"""
|
|
149
|
-
if not rtmp_urls:
|
|
150
|
-
raise ValueError("No stream URLs provided")
|
|
151
|
-
|
|
152
|
-
self.twilio_client.calls(call_id).streams.create(
|
|
153
|
-
url=rtmp_urls[0]
|
|
154
|
-
)
|
|
155
|
-
|
|
156
|
-
def start_recording(self, call_id: str, output_filepath: Optional[str] = None, upload_to_s3: bool = True, wait_for_completion: bool = True):
|
|
157
|
-
"""
|
|
158
|
-
Triggers a recording on the active Twilio call.
|
|
159
|
-
|
|
160
|
-
Args:
|
|
161
|
-
call_id: The Twilio Call SID.
|
|
162
|
-
output_filepath: Optional filename for the recording.
|
|
163
|
-
upload_to_s3: If True, uploads to S3.
|
|
164
|
-
wait_for_completion: If True, waits for recording to finish and then uploads.
|
|
165
|
-
|
|
166
|
-
Returns:
|
|
167
|
-
The Twilio Recording SID.
|
|
168
|
-
"""
|
|
169
|
-
|
|
170
|
-
# Start Twilio recording
|
|
171
|
-
recording = self.twilio_client.calls(call_id).recordings.create()
|
|
172
|
-
print(f"Recording started: {recording.sid}")
|
|
173
|
-
|
|
174
|
-
if not wait_for_completion:
|
|
175
|
-
return recording.sid
|
|
176
|
-
|
|
177
|
-
# Poll for recording completion
|
|
178
|
-
print("Waiting for recording to complete...")
|
|
179
|
-
while True:
|
|
180
|
-
rec_status = self.twilio_client.recordings(recording.sid).fetch()
|
|
181
|
-
if rec_status.status == 'completed':
|
|
182
|
-
print("Recording completed.")
|
|
183
|
-
break
|
|
184
|
-
elif rec_status.status in ['failed', 'absent']:
|
|
185
|
-
raise RuntimeError(f"Recording failed with status: {rec_status.status}")
|
|
186
|
-
time.sleep(5)
|
|
187
|
-
|
|
188
|
-
if not upload_to_s3:
|
|
189
|
-
return recording.sid
|
|
190
|
-
|
|
191
|
-
# Download recording from Twilio
|
|
192
|
-
media_url = f"https://api.twilio.com/2010-04-01/Accounts/{self.twilio_account_sid}/Recordings/{recording.sid}.mp3"
|
|
193
|
-
print(f"Downloading recording from: {media_url}")
|
|
194
|
-
|
|
195
|
-
response = requests.get(media_url, auth=(self.twilio_account_sid, self.twilio_auth_token))
|
|
196
|
-
if response.status_code != 200:
|
|
197
|
-
raise RuntimeError(f"Failed to download recording: {response.status_code} {response.text}")
|
|
198
|
-
|
|
199
|
-
# Upload to S3
|
|
200
|
-
access_key = os.getenv("AWS_ACCESS_KEY_ID")
|
|
201
|
-
secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
|
|
202
|
-
bucket = os.getenv("AWS_S3_BUCKET")
|
|
203
|
-
region = os.getenv("AWS_REGION")
|
|
204
|
-
|
|
205
|
-
if not access_key or not secret_key or not bucket:
|
|
206
|
-
raise ValueError("AWS credentials (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_S3_BUCKET) are required for S3 upload.")
|
|
207
|
-
|
|
208
|
-
filename = output_filepath if output_filepath else f"{call_id}-{uuid.uuid4().hex[:6]}.mp3"
|
|
209
|
-
|
|
210
|
-
s3 = boto3.client(
|
|
211
|
-
's3',
|
|
212
|
-
aws_access_key_id=access_key,
|
|
213
|
-
aws_secret_access_key=secret_key,
|
|
214
|
-
region_name=region
|
|
215
|
-
)
|
|
216
|
-
|
|
217
|
-
print(f"Uploading to S3: s3://{bucket}/{filename}")
|
|
218
|
-
s3.put_object(Bucket=bucket, Key=filename, Body=response.content)
|
|
219
|
-
print(f"Upload complete: s3://{bucket}/{filename}")
|
|
220
|
-
|
|
221
|
-
# Also save locally
|
|
222
|
-
local_dir = "recordings"
|
|
223
|
-
os.makedirs(local_dir, exist_ok=True)
|
|
224
|
-
local_path = os.path.join(local_dir, filename)
|
|
225
|
-
with open(local_path, 'wb') as f:
|
|
226
|
-
f.write(response.content)
|
|
227
|
-
print(f"Recording saved locally: {local_path}")
|
|
228
|
-
|
|
229
|
-
return recording.sid
|
|
230
|
-
|
|
231
|
-
def mute_participant(self, call_id: str, identity: str, track_sid: str, muted: bool):
|
|
232
|
-
"""
|
|
233
|
-
Mutes the participant on the Twilio call.
|
|
234
|
-
This prevents audio from reaching the Retell AI.
|
|
235
|
-
"""
|
|
236
|
-
self.twilio_client.calls(call_id).update(muted=muted)
|
|
237
|
-
|
|
238
|
-
def kick_participant(self, call_id: str, identity: str):
|
|
239
|
-
"""
|
|
240
|
-
Alias for delete_room (hangup).
|
|
241
|
-
"""
|
|
242
|
-
self.delete_room(call_id)
|
|
243
|
-
|
|
244
|
-
def send_alert(self, call_id: str, message: str, participant_identity: Optional[str] = None):
|
|
245
|
-
"""
|
|
246
|
-
Not fully supported in this hybrid model
|
|
247
|
-
"""
|
|
248
|
-
raise NotImplementedError("send_alert is not currently supported in RetellManager")
|
|
@@ -1,108 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import logging
|
|
3
|
-
import httpx
|
|
4
|
-
from dotenv import load_dotenv
|
|
5
|
-
from openai import AsyncOpenAI
|
|
6
|
-
|
|
7
|
-
load_dotenv()
|
|
8
|
-
logger = logging.getLogger(__name__)
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class STTManager:
|
|
12
|
-
def __init__(self):
|
|
13
|
-
"""
|
|
14
|
-
Initializes the STTManager.
|
|
15
|
-
|
|
16
|
-
Note:
|
|
17
|
-
The following must be set in your .env file:
|
|
18
|
-
- OPENAI_API_KEY
|
|
19
|
-
- AGENT_API_URL (If not set, posting to agent will be disabled)
|
|
20
|
-
"""
|
|
21
|
-
self._api_key = os.getenv("OPENAI_API_KEY")
|
|
22
|
-
if not self._api_key:
|
|
23
|
-
raise ValueError("OPENAI_API_KEY must be set in your .env file.")
|
|
24
|
-
|
|
25
|
-
self._agent_api_url = os.getenv("AGENT_API_URL")
|
|
26
|
-
if not self._agent_api_url:
|
|
27
|
-
logger.warning("AGENT_API_URL is not set in .env. Posting to agent will be disabled.")
|
|
28
|
-
|
|
29
|
-
self._openai_client = AsyncOpenAI(api_key=self._api_key)
|
|
30
|
-
self._http_client = httpx.AsyncClient()
|
|
31
|
-
|
|
32
|
-
async def close(self):
|
|
33
|
-
"""
|
|
34
|
-
Cleans up resources used by the STTManager.
|
|
35
|
-
"""
|
|
36
|
-
await self._http_client.aclose()
|
|
37
|
-
await self._openai_client.close()
|
|
38
|
-
|
|
39
|
-
async def transcribe_audio(self, file_path: str, model: str = "whisper-1") -> str:
|
|
40
|
-
"""
|
|
41
|
-
Transcribes an audio file using OpenAI's whisper model.
|
|
42
|
-
|
|
43
|
-
Args:
|
|
44
|
-
file_path: The path to the audio file to transcribe.
|
|
45
|
-
Supported formats: mp3, mp4, mpeg, mpga, m4a, wav, and webm.
|
|
46
|
-
model: The name of the whisper model to use.
|
|
47
|
-
Note: The OpenAI API currently only supports "whisper-1".
|
|
48
|
-
Returns:
|
|
49
|
-
The transcribed text as a string.
|
|
50
|
-
"""
|
|
51
|
-
logger.info(f"Starting transcription for file: {file_path}")
|
|
52
|
-
if not os.path.exists(file_path):
|
|
53
|
-
raise FileNotFoundError(f"Audio file not found at: {file_path}")
|
|
54
|
-
|
|
55
|
-
with open(file_path, "rb") as audio_file:
|
|
56
|
-
transcript = await self._openai_client.audio.transcriptions.create(
|
|
57
|
-
model=model,
|
|
58
|
-
file=audio_file
|
|
59
|
-
)
|
|
60
|
-
logger.info(f"Successfully transcribed file: {file_path}")
|
|
61
|
-
|
|
62
|
-
return transcript.text
|
|
63
|
-
|
|
64
|
-
async def transcribe_and_post(self, file_path: str):
|
|
65
|
-
"""
|
|
66
|
-
Processes an audio file by transcribing it and posting the result to the agent API under a 'message' key.
|
|
67
|
-
|
|
68
|
-
Args:
|
|
69
|
-
file_path: The path to the audio file to process.
|
|
70
|
-
Supported formats: mp3, mp4, mpeg, mpga, m4a, wav, and webm.
|
|
71
|
-
Returns:
|
|
72
|
-
The transcribed text as a string.
|
|
73
|
-
"""
|
|
74
|
-
try:
|
|
75
|
-
# Transcribe the audio file
|
|
76
|
-
transcript_text = await self.transcribe_audio(file_path)
|
|
77
|
-
|
|
78
|
-
# Post the transcribed text to the agent API
|
|
79
|
-
if self._agent_api_url:
|
|
80
|
-
await self._post_to_agent(transcript_text)
|
|
81
|
-
else:
|
|
82
|
-
logger.info("AGENT_API_URL not set, skipping post to agent.")
|
|
83
|
-
|
|
84
|
-
return transcript_text
|
|
85
|
-
|
|
86
|
-
except FileNotFoundError:
|
|
87
|
-
logger.error(f"Audio file not found at: {file_path}", exc_info=True)
|
|
88
|
-
raise
|
|
89
|
-
except Exception as e:
|
|
90
|
-
logger.error(f"An error occurred during processing of {file_path}: {e}", exc_info=True)
|
|
91
|
-
raise
|
|
92
|
-
|
|
93
|
-
async def _post_to_agent(self, text: str):
|
|
94
|
-
"""
|
|
95
|
-
Posts the transcribed text to the agent API under a 'message' key.
|
|
96
|
-
|
|
97
|
-
Args:
|
|
98
|
-
text: The transcribed text to post.
|
|
99
|
-
"""
|
|
100
|
-
payload = {"message": text}
|
|
101
|
-
try:
|
|
102
|
-
logger.info(f"Posting to agent with payload: {payload}")
|
|
103
|
-
response = await self._http_client.post(self._agent_api_url, json=payload)
|
|
104
|
-
response.raise_for_status()
|
|
105
|
-
logger.info(f"Successfully posted to agent. Status: {response.status_code}")
|
|
106
|
-
except httpx.HTTPError as e:
|
|
107
|
-
logger.error(f"Failed to post to agent API: {e}", exc_info=True)
|
|
108
|
-
raise
|