dv-pipecat-ai 0.0.85.dev5__py3-none-any.whl → 0.0.85.dev698__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.85.dev5.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/METADATA +78 -117
- {dv_pipecat_ai-0.0.85.dev5.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/RECORD +157 -123
- pipecat/adapters/base_llm_adapter.py +38 -1
- pipecat/adapters/services/anthropic_adapter.py +9 -14
- pipecat/adapters/services/aws_nova_sonic_adapter.py +5 -0
- pipecat/adapters/services/bedrock_adapter.py +236 -13
- pipecat/adapters/services/gemini_adapter.py +12 -8
- pipecat/adapters/services/open_ai_adapter.py +19 -7
- pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
- pipecat/audio/filters/krisp_viva_filter.py +193 -0
- pipecat/audio/filters/noisereduce_filter.py +15 -0
- pipecat/audio/turn/base_turn_analyzer.py +9 -1
- pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
- pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
- pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
- pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
- pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
- pipecat/audio/vad/data/README.md +10 -0
- pipecat/audio/vad/vad_analyzer.py +13 -1
- pipecat/extensions/voicemail/voicemail_detector.py +5 -5
- pipecat/frames/frames.py +120 -87
- pipecat/observers/loggers/debug_log_observer.py +3 -3
- pipecat/observers/loggers/llm_log_observer.py +7 -3
- pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
- pipecat/pipeline/runner.py +12 -4
- pipecat/pipeline/service_switcher.py +64 -36
- pipecat/pipeline/task.py +85 -24
- pipecat/processors/aggregators/dtmf_aggregator.py +28 -22
- pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
- pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
- pipecat/processors/aggregators/llm_response.py +6 -7
- pipecat/processors/aggregators/llm_response_universal.py +19 -15
- pipecat/processors/aggregators/user_response.py +6 -6
- pipecat/processors/aggregators/vision_image_frame.py +24 -2
- pipecat/processors/audio/audio_buffer_processor.py +43 -8
- pipecat/processors/filters/stt_mute_filter.py +2 -0
- pipecat/processors/frame_processor.py +103 -17
- pipecat/processors/frameworks/langchain.py +8 -2
- pipecat/processors/frameworks/rtvi.py +209 -68
- pipecat/processors/frameworks/strands_agents.py +170 -0
- pipecat/processors/logger.py +2 -2
- pipecat/processors/transcript_processor.py +4 -4
- pipecat/processors/user_idle_processor.py +3 -6
- pipecat/runner/run.py +270 -50
- pipecat/runner/types.py +2 -0
- pipecat/runner/utils.py +51 -10
- pipecat/serializers/exotel.py +5 -5
- pipecat/serializers/livekit.py +20 -0
- pipecat/serializers/plivo.py +6 -9
- pipecat/serializers/protobuf.py +6 -5
- pipecat/serializers/telnyx.py +2 -2
- pipecat/serializers/twilio.py +43 -23
- pipecat/services/ai_service.py +2 -6
- pipecat/services/anthropic/llm.py +2 -25
- pipecat/services/asyncai/tts.py +2 -3
- pipecat/services/aws/__init__.py +1 -0
- pipecat/services/aws/llm.py +122 -97
- pipecat/services/aws/nova_sonic/__init__.py +0 -0
- pipecat/services/aws/nova_sonic/context.py +367 -0
- pipecat/services/aws/nova_sonic/frames.py +25 -0
- pipecat/services/aws/nova_sonic/llm.py +1155 -0
- pipecat/services/aws/stt.py +1 -3
- pipecat/services/aws_nova_sonic/__init__.py +19 -1
- pipecat/services/aws_nova_sonic/aws.py +11 -1151
- pipecat/services/aws_nova_sonic/context.py +13 -355
- pipecat/services/aws_nova_sonic/frames.py +13 -17
- pipecat/services/azure/realtime/__init__.py +0 -0
- pipecat/services/azure/realtime/llm.py +65 -0
- pipecat/services/azure/stt.py +15 -0
- pipecat/services/cartesia/tts.py +2 -2
- pipecat/services/deepgram/__init__.py +1 -0
- pipecat/services/deepgram/flux/__init__.py +0 -0
- pipecat/services/deepgram/flux/stt.py +636 -0
- pipecat/services/elevenlabs/__init__.py +2 -1
- pipecat/services/elevenlabs/stt.py +254 -276
- pipecat/services/elevenlabs/tts.py +5 -5
- pipecat/services/fish/tts.py +2 -2
- pipecat/services/gemini_multimodal_live/events.py +38 -524
- pipecat/services/gemini_multimodal_live/file_api.py +23 -173
- pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
- pipecat/services/gladia/stt.py +56 -72
- pipecat/services/google/__init__.py +1 -0
- pipecat/services/google/gemini_live/__init__.py +3 -0
- pipecat/services/google/gemini_live/file_api.py +189 -0
- pipecat/services/google/gemini_live/llm.py +1582 -0
- pipecat/services/google/gemini_live/llm_vertex.py +184 -0
- pipecat/services/google/llm.py +15 -11
- pipecat/services/google/llm_openai.py +3 -3
- pipecat/services/google/llm_vertex.py +86 -16
- pipecat/services/google/tts.py +7 -3
- pipecat/services/heygen/api.py +2 -0
- pipecat/services/heygen/client.py +8 -4
- pipecat/services/heygen/video.py +2 -0
- pipecat/services/hume/__init__.py +5 -0
- pipecat/services/hume/tts.py +220 -0
- pipecat/services/inworld/tts.py +6 -6
- pipecat/services/llm_service.py +15 -5
- pipecat/services/lmnt/tts.py +2 -2
- pipecat/services/mcp_service.py +4 -2
- pipecat/services/mem0/memory.py +6 -5
- pipecat/services/mistral/llm.py +29 -8
- pipecat/services/moondream/vision.py +42 -16
- pipecat/services/neuphonic/tts.py +2 -2
- pipecat/services/openai/__init__.py +1 -0
- pipecat/services/openai/base_llm.py +27 -20
- pipecat/services/openai/realtime/__init__.py +0 -0
- pipecat/services/openai/realtime/context.py +272 -0
- pipecat/services/openai/realtime/events.py +1106 -0
- pipecat/services/openai/realtime/frames.py +37 -0
- pipecat/services/openai/realtime/llm.py +829 -0
- pipecat/services/openai/tts.py +16 -8
- pipecat/services/openai_realtime/__init__.py +27 -0
- pipecat/services/openai_realtime/azure.py +21 -0
- pipecat/services/openai_realtime/context.py +21 -0
- pipecat/services/openai_realtime/events.py +21 -0
- pipecat/services/openai_realtime/frames.py +21 -0
- pipecat/services/openai_realtime_beta/azure.py +16 -0
- pipecat/services/openai_realtime_beta/openai.py +17 -5
- pipecat/services/playht/tts.py +31 -4
- pipecat/services/rime/tts.py +3 -4
- pipecat/services/sarvam/tts.py +2 -6
- pipecat/services/simli/video.py +2 -2
- pipecat/services/speechmatics/stt.py +1 -7
- pipecat/services/stt_service.py +34 -0
- pipecat/services/tavus/video.py +2 -2
- pipecat/services/tts_service.py +9 -9
- pipecat/services/vision_service.py +7 -6
- pipecat/services/vistaar/llm.py +4 -0
- pipecat/tests/utils.py +4 -4
- pipecat/transcriptions/language.py +41 -1
- pipecat/transports/base_input.py +17 -42
- pipecat/transports/base_output.py +42 -26
- pipecat/transports/daily/transport.py +199 -26
- pipecat/transports/heygen/__init__.py +0 -0
- pipecat/transports/heygen/transport.py +381 -0
- pipecat/transports/livekit/transport.py +228 -63
- pipecat/transports/local/audio.py +6 -1
- pipecat/transports/local/tk.py +11 -2
- pipecat/transports/network/fastapi_websocket.py +1 -1
- pipecat/transports/smallwebrtc/connection.py +98 -19
- pipecat/transports/smallwebrtc/request_handler.py +204 -0
- pipecat/transports/smallwebrtc/transport.py +65 -23
- pipecat/transports/tavus/transport.py +23 -12
- pipecat/transports/websocket/client.py +41 -5
- pipecat/transports/websocket/fastapi.py +21 -11
- pipecat/transports/websocket/server.py +14 -7
- pipecat/transports/whatsapp/api.py +8 -0
- pipecat/transports/whatsapp/client.py +47 -0
- pipecat/utils/base_object.py +54 -22
- pipecat/utils/string.py +12 -1
- pipecat/utils/tracing/service_decorators.py +21 -21
- {dv_pipecat_ai-0.0.85.dev5.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.85.dev5.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.85.dev5.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/top_level.txt +0 -0
- /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
|
@@ -12,6 +12,7 @@ event handling for conversational AI applications.
|
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
14
|
import asyncio
|
|
15
|
+
import json
|
|
15
16
|
from dataclasses import dataclass
|
|
16
17
|
from typing import Any, Awaitable, Callable, List, Optional
|
|
17
18
|
|
|
@@ -24,13 +25,15 @@ from pipecat.frames.frames import (
|
|
|
24
25
|
AudioRawFrame,
|
|
25
26
|
CancelFrame,
|
|
26
27
|
EndFrame,
|
|
28
|
+
ImageRawFrame,
|
|
27
29
|
OutputAudioRawFrame,
|
|
28
30
|
OutputDTMFFrame,
|
|
29
31
|
OutputDTMFUrgentFrame,
|
|
32
|
+
OutputTransportMessageFrame,
|
|
33
|
+
OutputTransportMessageUrgentFrame,
|
|
30
34
|
StartFrame,
|
|
31
|
-
TransportMessageFrame,
|
|
32
|
-
TransportMessageUrgentFrame,
|
|
33
35
|
UserAudioRawFrame,
|
|
36
|
+
UserImageRawFrame,
|
|
34
37
|
)
|
|
35
38
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessorSetup
|
|
36
39
|
from pipecat.transports.base_input import BaseInputTransport
|
|
@@ -40,6 +43,7 @@ from pipecat.utils.asyncio.task_manager import BaseTaskManager
|
|
|
40
43
|
|
|
41
44
|
try:
|
|
42
45
|
from livekit import rtc
|
|
46
|
+
from livekit.rtc._proto import video_frame_pb2 as proto_video_frame
|
|
43
47
|
from tenacity import retry, stop_after_attempt, wait_exponential
|
|
44
48
|
except ModuleNotFoundError as e:
|
|
45
49
|
logger.error(f"Exception: {e}")
|
|
@@ -64,7 +68,7 @@ DTMF_CODE_MAP = {
|
|
|
64
68
|
|
|
65
69
|
|
|
66
70
|
@dataclass
|
|
67
|
-
class
|
|
71
|
+
class LiveKitOutputTransportMessageFrame(OutputTransportMessageFrame):
|
|
68
72
|
"""Frame for transport messages in LiveKit rooms.
|
|
69
73
|
|
|
70
74
|
Parameters:
|
|
@@ -75,7 +79,7 @@ class LiveKitTransportMessageFrame(TransportMessageFrame):
|
|
|
75
79
|
|
|
76
80
|
|
|
77
81
|
@dataclass
|
|
78
|
-
class
|
|
82
|
+
class LiveKitOutputTransportMessageUrgentFrame(OutputTransportMessageUrgentFrame):
|
|
79
83
|
"""Frame for urgent transport messages in LiveKit rooms.
|
|
80
84
|
|
|
81
85
|
Parameters:
|
|
@@ -85,6 +89,50 @@ class LiveKitTransportMessageUrgentFrame(TransportMessageUrgentFrame):
|
|
|
85
89
|
participant_id: Optional[str] = None
|
|
86
90
|
|
|
87
91
|
|
|
92
|
+
@dataclass
|
|
93
|
+
class LiveKitTransportMessageFrame(LiveKitOutputTransportMessageFrame):
|
|
94
|
+
"""Frame for transport messages in LiveKit rooms.
|
|
95
|
+
|
|
96
|
+
Parameters:
|
|
97
|
+
participant_id: Optional ID of the participant this message is for/from.
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
def __post_init__(self):
|
|
101
|
+
super().__post_init__()
|
|
102
|
+
import warnings
|
|
103
|
+
|
|
104
|
+
with warnings.catch_warnings():
|
|
105
|
+
warnings.simplefilter("always")
|
|
106
|
+
warnings.warn(
|
|
107
|
+
"LiveKitTransportMessageFrame is deprecated and will be removed in a future version. "
|
|
108
|
+
"Instead, use LiveKitOutputTransportMessageFrame.",
|
|
109
|
+
DeprecationWarning,
|
|
110
|
+
stacklevel=2,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@dataclass
|
|
115
|
+
class LiveKitTransportMessageUrgentFrame(LiveKitOutputTransportMessageUrgentFrame):
|
|
116
|
+
"""Frame for urgent transport messages in LiveKit rooms.
|
|
117
|
+
|
|
118
|
+
Parameters:
|
|
119
|
+
participant_id: Optional ID of the participant this message is for/from.
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
def __post_init__(self):
|
|
123
|
+
super().__post_init__()
|
|
124
|
+
import warnings
|
|
125
|
+
|
|
126
|
+
with warnings.catch_warnings():
|
|
127
|
+
warnings.simplefilter("always")
|
|
128
|
+
warnings.warn(
|
|
129
|
+
"LiveKitTransportMessageUrgentFrame is deprecated and will be removed in a future version. "
|
|
130
|
+
"Instead, use LiveKitOutputTransportMessageUrgentFrame.",
|
|
131
|
+
DeprecationWarning,
|
|
132
|
+
stacklevel=2,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
88
136
|
class LiveKitParams(TransportParams):
|
|
89
137
|
"""Configuration parameters for LiveKit transport.
|
|
90
138
|
|
|
@@ -110,10 +158,13 @@ class LiveKitCallbacks(BaseModel):
|
|
|
110
158
|
|
|
111
159
|
on_connected: Callable[[], Awaitable[None]]
|
|
112
160
|
on_disconnected: Callable[[], Awaitable[None]]
|
|
161
|
+
on_before_disconnect: Callable[[], Awaitable[None]]
|
|
113
162
|
on_participant_connected: Callable[[str], Awaitable[None]]
|
|
114
163
|
on_participant_disconnected: Callable[[str], Awaitable[None]]
|
|
115
164
|
on_audio_track_subscribed: Callable[[str], Awaitable[None]]
|
|
116
165
|
on_audio_track_unsubscribed: Callable[[str], Awaitable[None]]
|
|
166
|
+
on_video_track_subscribed: Callable[[str], Awaitable[None]]
|
|
167
|
+
on_video_track_unsubscribed: Callable[[str], Awaitable[None]]
|
|
117
168
|
on_data_received: Callable[[bytes, str], Awaitable[None]]
|
|
118
169
|
on_first_participant_joined: Callable[[str], Awaitable[None]]
|
|
119
170
|
|
|
@@ -158,8 +209,11 @@ class LiveKitTransportClient:
|
|
|
158
209
|
self._audio_track: Optional[rtc.LocalAudioTrack] = None
|
|
159
210
|
self._audio_tracks = {}
|
|
160
211
|
self._audio_queue = asyncio.Queue()
|
|
212
|
+
self._video_tracks = {}
|
|
213
|
+
self._video_queue = asyncio.Queue()
|
|
161
214
|
self._other_participant_has_joined = False
|
|
162
215
|
self._task_manager: Optional[BaseTaskManager] = None
|
|
216
|
+
self._async_lock = asyncio.Lock()
|
|
163
217
|
|
|
164
218
|
@property
|
|
165
219
|
def participant_id(self) -> str:
|
|
@@ -220,61 +274,64 @@ class LiveKitTransportClient:
|
|
|
220
274
|
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
|
|
221
275
|
async def connect(self):
|
|
222
276
|
"""Connect to the LiveKit room with retry logic."""
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
self.
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
277
|
+
async with self._async_lock:
|
|
278
|
+
if self._connected:
|
|
279
|
+
# Increment disconnect counter if already connected.
|
|
280
|
+
self._disconnect_counter += 1
|
|
281
|
+
return
|
|
282
|
+
|
|
283
|
+
logger.info(f"Connecting to {self._room_name}")
|
|
284
|
+
|
|
285
|
+
try:
|
|
286
|
+
await self.room.connect(
|
|
287
|
+
self._url,
|
|
288
|
+
self._token,
|
|
289
|
+
options=rtc.RoomOptions(auto_subscribe=True),
|
|
290
|
+
)
|
|
291
|
+
self._connected = True
|
|
292
|
+
# Increment disconnect counter if we successfully connected.
|
|
293
|
+
self._disconnect_counter += 1
|
|
239
294
|
|
|
240
|
-
|
|
241
|
-
|
|
295
|
+
self._participant_id = self.room.local_participant.sid
|
|
296
|
+
logger.info(f"Connected to {self._room_name}")
|
|
242
297
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
298
|
+
# Set up audio source and track
|
|
299
|
+
self._audio_source = rtc.AudioSource(
|
|
300
|
+
self._out_sample_rate, self._params.audio_out_channels
|
|
301
|
+
)
|
|
302
|
+
self._audio_track = rtc.LocalAudioTrack.create_audio_track(
|
|
303
|
+
"pipecat-audio", self._audio_source
|
|
304
|
+
)
|
|
305
|
+
options = rtc.TrackPublishOptions()
|
|
306
|
+
options.source = rtc.TrackSource.SOURCE_MICROPHONE
|
|
307
|
+
await self.room.local_participant.publish_track(self._audio_track, options)
|
|
253
308
|
|
|
254
|
-
|
|
309
|
+
await self._callbacks.on_connected()
|
|
255
310
|
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
311
|
+
# Check if there are already participants in the room
|
|
312
|
+
participants = self.get_participants()
|
|
313
|
+
if participants and not self._other_participant_has_joined:
|
|
314
|
+
self._other_participant_has_joined = True
|
|
315
|
+
await self._callbacks.on_first_participant_joined(participants[0])
|
|
316
|
+
except Exception as e:
|
|
317
|
+
logger.error(f"Error connecting to {self._room_name}: {e}")
|
|
318
|
+
raise
|
|
264
319
|
|
|
265
320
|
async def disconnect(self):
|
|
266
321
|
"""Disconnect from the LiveKit room."""
|
|
267
|
-
|
|
268
|
-
|
|
322
|
+
async with self._async_lock:
|
|
323
|
+
# Decrement leave counter when leaving.
|
|
324
|
+
self._disconnect_counter -= 1
|
|
269
325
|
|
|
270
|
-
|
|
271
|
-
|
|
326
|
+
if not self._connected or self._disconnect_counter > 0:
|
|
327
|
+
return
|
|
272
328
|
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
329
|
+
logger.info(f"Disconnecting from {self._room_name}")
|
|
330
|
+
await self._callbacks.on_before_disconnect()
|
|
331
|
+
await self.room.disconnect()
|
|
332
|
+
self._connected = False
|
|
333
|
+
logger.info(f"Disconnected from {self._room_name}")
|
|
334
|
+
await self._callbacks.on_disconnected()
|
|
278
335
|
|
|
279
336
|
async def send_data(self, data: bytes, participant_id: Optional[str] = None):
|
|
280
337
|
"""Send data to participants in the room.
|
|
@@ -297,10 +354,10 @@ class LiveKitTransportClient:
|
|
|
297
354
|
logger.error(f"Error sending data: {e}")
|
|
298
355
|
|
|
299
356
|
async def send_dtmf(self, digit: str):
|
|
300
|
-
"""Send DTMF tone to the room.
|
|
357
|
+
r"""Send DTMF tone to the room.
|
|
301
358
|
|
|
302
359
|
Args:
|
|
303
|
-
digit: The DTMF digit to send (0-9,
|
|
360
|
+
digit: The DTMF digit to send (0-9, \*, #).
|
|
304
361
|
"""
|
|
305
362
|
if not self._connected:
|
|
306
363
|
return
|
|
@@ -316,19 +373,21 @@ class LiveKitTransportClient:
|
|
|
316
373
|
except Exception as e:
|
|
317
374
|
logger.error(f"Error sending DTMF tone {digit}: {e}")
|
|
318
375
|
|
|
319
|
-
async def publish_audio(self, audio_frame: rtc.AudioFrame):
|
|
376
|
+
async def publish_audio(self, audio_frame: rtc.AudioFrame) -> bool:
|
|
320
377
|
"""Publish an audio frame to the room.
|
|
321
378
|
|
|
322
379
|
Args:
|
|
323
380
|
audio_frame: The LiveKit audio frame to publish.
|
|
324
381
|
"""
|
|
325
382
|
if not self._connected or not self._audio_source:
|
|
326
|
-
return
|
|
383
|
+
return False
|
|
327
384
|
|
|
328
385
|
try:
|
|
329
386
|
await self._audio_source.capture_frame(audio_frame)
|
|
387
|
+
return True
|
|
330
388
|
except Exception as e:
|
|
331
389
|
logger.error(f"Error publishing audio: {e}")
|
|
390
|
+
return False
|
|
332
391
|
|
|
333
392
|
def get_participants(self) -> List[str]:
|
|
334
393
|
"""Get list of participant IDs in the room.
|
|
@@ -477,6 +536,15 @@ class LiveKitTransportClient:
|
|
|
477
536
|
f"{self}::_process_audio_stream",
|
|
478
537
|
)
|
|
479
538
|
await self._callbacks.on_audio_track_subscribed(participant.sid)
|
|
539
|
+
elif track.kind == rtc.TrackKind.KIND_VIDEO:
|
|
540
|
+
logger.info(f"Video track subscribed: {track.sid} from participant {participant.sid}")
|
|
541
|
+
self._video_tracks[participant.sid] = track
|
|
542
|
+
video_stream = rtc.VideoStream(track)
|
|
543
|
+
self._task_manager.create_task(
|
|
544
|
+
self._process_video_stream(video_stream, participant.sid),
|
|
545
|
+
f"{self}::_process_video_stream",
|
|
546
|
+
)
|
|
547
|
+
await self._callbacks.on_video_track_subscribed(participant.sid)
|
|
480
548
|
|
|
481
549
|
async def _async_on_track_unsubscribed(
|
|
482
550
|
self,
|
|
@@ -488,6 +556,8 @@ class LiveKitTransportClient:
|
|
|
488
556
|
logger.info(f"Track unsubscribed: {publication.sid} from {participant.identity}")
|
|
489
557
|
if track.kind == rtc.TrackKind.KIND_AUDIO:
|
|
490
558
|
await self._callbacks.on_audio_track_unsubscribed(participant.sid)
|
|
559
|
+
elif track.kind == rtc.TrackKind.KIND_VIDEO:
|
|
560
|
+
await self._callbacks.on_video_track_unsubscribed(participant.sid)
|
|
491
561
|
|
|
492
562
|
async def _async_on_data_received(self, data: rtc.DataPacket):
|
|
493
563
|
"""Handle data received events."""
|
|
@@ -518,6 +588,21 @@ class LiveKitTransportClient:
|
|
|
518
588
|
frame, participant_id = await self._audio_queue.get()
|
|
519
589
|
yield frame, participant_id
|
|
520
590
|
|
|
591
|
+
async def _process_video_stream(self, video_stream: rtc.VideoStream, participant_id: str):
|
|
592
|
+
"""Process incoming video stream from a participant."""
|
|
593
|
+
logger.info(f"Started processing video stream for participant {participant_id}")
|
|
594
|
+
async for event in video_stream:
|
|
595
|
+
if isinstance(event, rtc.VideoFrameEvent):
|
|
596
|
+
await self._video_queue.put((event, participant_id))
|
|
597
|
+
else:
|
|
598
|
+
logger.warning(f"Received unexpected event type: {type(event)}")
|
|
599
|
+
|
|
600
|
+
async def get_next_video_frame(self):
|
|
601
|
+
"""Get the next video frame from the queue."""
|
|
602
|
+
while True:
|
|
603
|
+
frame, participant_id = await self._video_queue.get()
|
|
604
|
+
yield frame, participant_id
|
|
605
|
+
|
|
521
606
|
def __str__(self):
|
|
522
607
|
"""String representation of the LiveKit transport client."""
|
|
523
608
|
return f"{self._transport_name}::LiveKitTransportClient"
|
|
@@ -550,6 +635,7 @@ class LiveKitInputTransport(BaseInputTransport):
|
|
|
550
635
|
self._client = client
|
|
551
636
|
|
|
552
637
|
self._audio_in_task = None
|
|
638
|
+
self._video_in_task = None
|
|
553
639
|
self._vad_analyzer: Optional[VADAnalyzer] = params.vad_analyzer
|
|
554
640
|
self._resampler = create_stream_resampler()
|
|
555
641
|
|
|
@@ -582,6 +668,8 @@ class LiveKitInputTransport(BaseInputTransport):
|
|
|
582
668
|
await self._client.connect()
|
|
583
669
|
if not self._audio_in_task and self._params.audio_in_enabled:
|
|
584
670
|
self._audio_in_task = self.create_task(self._audio_in_task_handler())
|
|
671
|
+
if not self._video_in_task and self._params.video_in_enabled:
|
|
672
|
+
self._video_in_task = self.create_task(self._video_in_task_handler())
|
|
585
673
|
await self.set_transport_ready(frame)
|
|
586
674
|
logger.info("LiveKitInputTransport started")
|
|
587
675
|
|
|
@@ -595,6 +683,8 @@ class LiveKitInputTransport(BaseInputTransport):
|
|
|
595
683
|
await self._client.disconnect()
|
|
596
684
|
if self._audio_in_task:
|
|
597
685
|
await self.cancel_task(self._audio_in_task)
|
|
686
|
+
if self._video_in_task:
|
|
687
|
+
await self.cancel_task(self._video_in_task)
|
|
598
688
|
logger.info("LiveKitInputTransport stopped")
|
|
599
689
|
|
|
600
690
|
async def cancel(self, frame: CancelFrame):
|
|
@@ -607,6 +697,8 @@ class LiveKitInputTransport(BaseInputTransport):
|
|
|
607
697
|
await self._client.disconnect()
|
|
608
698
|
if self._audio_in_task and self._params.audio_in_enabled:
|
|
609
699
|
await self.cancel_task(self._audio_in_task)
|
|
700
|
+
if self._video_in_task and self._params.video_in_enabled:
|
|
701
|
+
await self.cancel_task(self._video_in_task)
|
|
610
702
|
|
|
611
703
|
async def setup(self, setup: FrameProcessorSetup):
|
|
612
704
|
"""Setup the input transport with shared client setup.
|
|
@@ -629,7 +721,7 @@ class LiveKitInputTransport(BaseInputTransport):
|
|
|
629
721
|
message: The message data to send.
|
|
630
722
|
sender: ID of the message sender.
|
|
631
723
|
"""
|
|
632
|
-
frame =
|
|
724
|
+
frame = LiveKitOutputTransportMessageUrgentFrame(message=message, participant_id=sender)
|
|
633
725
|
await self.push_frame(frame)
|
|
634
726
|
|
|
635
727
|
async def _audio_in_task_handler(self):
|
|
@@ -655,6 +747,29 @@ class LiveKitInputTransport(BaseInputTransport):
|
|
|
655
747
|
)
|
|
656
748
|
await self.push_audio_frame(input_audio_frame)
|
|
657
749
|
|
|
750
|
+
async def _video_in_task_handler(self):
|
|
751
|
+
"""Handle incoming video frames from participants."""
|
|
752
|
+
logger.info("Video input task started")
|
|
753
|
+
video_iterator = self._client.get_next_video_frame()
|
|
754
|
+
async for video_data in video_iterator:
|
|
755
|
+
if video_data:
|
|
756
|
+
video_frame_event, participant_id = video_data
|
|
757
|
+
pipecat_video_frame = await self._convert_livekit_video_to_pipecat(
|
|
758
|
+
video_frame_event=video_frame_event
|
|
759
|
+
)
|
|
760
|
+
|
|
761
|
+
# Skip frames with no video data
|
|
762
|
+
if len(pipecat_video_frame.image) == 0:
|
|
763
|
+
continue
|
|
764
|
+
|
|
765
|
+
input_video_frame = UserImageRawFrame(
|
|
766
|
+
user_id=participant_id,
|
|
767
|
+
image=pipecat_video_frame.image,
|
|
768
|
+
size=pipecat_video_frame.size,
|
|
769
|
+
format=pipecat_video_frame.format,
|
|
770
|
+
)
|
|
771
|
+
await self.push_video_frame(input_video_frame)
|
|
772
|
+
|
|
658
773
|
async def _convert_livekit_audio_to_pipecat(
|
|
659
774
|
self, audio_frame_event: rtc.AudioFrameEvent
|
|
660
775
|
) -> AudioRawFrame:
|
|
@@ -671,6 +786,19 @@ class LiveKitInputTransport(BaseInputTransport):
|
|
|
671
786
|
num_channels=audio_frame.num_channels,
|
|
672
787
|
)
|
|
673
788
|
|
|
789
|
+
async def _convert_livekit_video_to_pipecat(
|
|
790
|
+
self,
|
|
791
|
+
video_frame_event: rtc.VideoFrameEvent,
|
|
792
|
+
) -> ImageRawFrame:
|
|
793
|
+
"""Convert LiveKit video frame to Pipecat video frame."""
|
|
794
|
+
rgb_frame = video_frame_event.frame.convert(proto_video_frame.VideoBufferType.RGB24)
|
|
795
|
+
image_frame = ImageRawFrame(
|
|
796
|
+
image=rgb_frame.data,
|
|
797
|
+
size=(rgb_frame.width, rgb_frame.height),
|
|
798
|
+
format="RGB",
|
|
799
|
+
)
|
|
800
|
+
return image_frame
|
|
801
|
+
|
|
674
802
|
|
|
675
803
|
class LiveKitOutputTransport(BaseOutputTransport):
|
|
676
804
|
"""Handles outgoing media streams and events to LiveKit rooms.
|
|
@@ -752,25 +880,36 @@ class LiveKitOutputTransport(BaseOutputTransport):
|
|
|
752
880
|
await super().cleanup()
|
|
753
881
|
await self._transport.cleanup()
|
|
754
882
|
|
|
755
|
-
async def send_message(
|
|
883
|
+
async def send_message(
|
|
884
|
+
self, frame: OutputTransportMessageFrame | OutputTransportMessageUrgentFrame
|
|
885
|
+
):
|
|
756
886
|
"""Send a transport message to participants.
|
|
757
887
|
|
|
758
888
|
Args:
|
|
759
889
|
frame: The transport message frame to send.
|
|
760
890
|
"""
|
|
761
|
-
|
|
762
|
-
|
|
891
|
+
message = frame.message
|
|
892
|
+
if isinstance(message, dict):
|
|
893
|
+
# fix message encoding for dict-like messages, e.g. RTVI messages.
|
|
894
|
+
message = json.dumps(message, ensure_ascii=False)
|
|
895
|
+
if isinstance(
|
|
896
|
+
frame, (LiveKitOutputTransportMessageFrame, LiveKitOutputTransportMessageUrgentFrame)
|
|
897
|
+
):
|
|
898
|
+
await self._client.send_data(message.encode(), frame.participant_id)
|
|
763
899
|
else:
|
|
764
|
-
await self._client.send_data(
|
|
900
|
+
await self._client.send_data(message.encode())
|
|
765
901
|
|
|
766
|
-
async def write_audio_frame(self, frame: OutputAudioRawFrame):
|
|
902
|
+
async def write_audio_frame(self, frame: OutputAudioRawFrame) -> bool:
|
|
767
903
|
"""Write an audio frame to the LiveKit room.
|
|
768
904
|
|
|
769
905
|
Args:
|
|
770
906
|
frame: The audio frame to write.
|
|
907
|
+
|
|
908
|
+
Returns:
|
|
909
|
+
True if the audio frame was written successfully, False otherwise.
|
|
771
910
|
"""
|
|
772
911
|
livekit_audio = self._convert_pipecat_audio_to_livekit(frame.audio)
|
|
773
|
-
await self._client.publish_audio(livekit_audio)
|
|
912
|
+
return await self._client.publish_audio(livekit_audio)
|
|
774
913
|
|
|
775
914
|
def _supports_native_dtmf(self) -> bool:
|
|
776
915
|
"""LiveKit supports native DTMF via telephone events.
|
|
@@ -834,10 +973,13 @@ class LiveKitTransport(BaseTransport):
|
|
|
834
973
|
callbacks = LiveKitCallbacks(
|
|
835
974
|
on_connected=self._on_connected,
|
|
836
975
|
on_disconnected=self._on_disconnected,
|
|
976
|
+
on_before_disconnect=self._on_before_disconnect,
|
|
837
977
|
on_participant_connected=self._on_participant_connected,
|
|
838
978
|
on_participant_disconnected=self._on_participant_disconnected,
|
|
839
979
|
on_audio_track_subscribed=self._on_audio_track_subscribed,
|
|
840
980
|
on_audio_track_unsubscribed=self._on_audio_track_unsubscribed,
|
|
981
|
+
on_video_track_subscribed=self._on_video_track_subscribed,
|
|
982
|
+
on_video_track_unsubscribed=self._on_video_track_unsubscribed,
|
|
841
983
|
on_data_received=self._on_data_received,
|
|
842
984
|
on_first_participant_joined=self._on_first_participant_joined,
|
|
843
985
|
)
|
|
@@ -855,10 +997,13 @@ class LiveKitTransport(BaseTransport):
|
|
|
855
997
|
self._register_event_handler("on_participant_disconnected")
|
|
856
998
|
self._register_event_handler("on_audio_track_subscribed")
|
|
857
999
|
self._register_event_handler("on_audio_track_unsubscribed")
|
|
1000
|
+
self._register_event_handler("on_video_track_subscribed")
|
|
1001
|
+
self._register_event_handler("on_video_track_unsubscribed")
|
|
858
1002
|
self._register_event_handler("on_data_received")
|
|
859
1003
|
self._register_event_handler("on_first_participant_joined")
|
|
860
1004
|
self._register_event_handler("on_participant_left")
|
|
861
1005
|
self._register_event_handler("on_call_state_updated")
|
|
1006
|
+
self._register_event_handler("on_before_disconnect", sync=True)
|
|
862
1007
|
|
|
863
1008
|
def input(self) -> LiveKitInputTransport:
|
|
864
1009
|
"""Get the input transport for receiving media and events.
|
|
@@ -953,6 +1098,10 @@ class LiveKitTransport(BaseTransport):
|
|
|
953
1098
|
"""Handle room disconnected events."""
|
|
954
1099
|
await self._call_event_handler("on_disconnected")
|
|
955
1100
|
|
|
1101
|
+
async def _on_before_disconnect(self):
|
|
1102
|
+
"""Handle before disconnection room events."""
|
|
1103
|
+
await self._call_event_handler("on_before_disconnect")
|
|
1104
|
+
|
|
956
1105
|
async def _on_participant_connected(self, participant_id: str):
|
|
957
1106
|
"""Handle participant connected events."""
|
|
958
1107
|
await self._call_event_handler("on_participant_connected", participant_id)
|
|
@@ -976,6 +1125,20 @@ class LiveKitTransport(BaseTransport):
|
|
|
976
1125
|
"""Handle audio track unsubscribed events."""
|
|
977
1126
|
await self._call_event_handler("on_audio_track_unsubscribed", participant_id)
|
|
978
1127
|
|
|
1128
|
+
async def _on_video_track_subscribed(self, participant_id: str):
|
|
1129
|
+
"""Handle video track subscribed events."""
|
|
1130
|
+
await self._call_event_handler("on_video_track_subscribed", participant_id)
|
|
1131
|
+
participant = self._client.room.remote_participants.get(participant_id)
|
|
1132
|
+
if participant:
|
|
1133
|
+
for publication in participant.video_tracks.values():
|
|
1134
|
+
self._client._on_track_subscribed_wrapper(
|
|
1135
|
+
publication.track, publication, participant
|
|
1136
|
+
)
|
|
1137
|
+
|
|
1138
|
+
async def _on_video_track_unsubscribed(self, participant_id: str):
|
|
1139
|
+
"""Handle video track unsubscribed events."""
|
|
1140
|
+
await self._call_event_handler("on_video_track_unsubscribed", participant_id)
|
|
1141
|
+
|
|
979
1142
|
async def _on_data_received(self, data: bytes, participant_id: str):
|
|
980
1143
|
"""Handle data received events."""
|
|
981
1144
|
if self._input:
|
|
@@ -990,7 +1153,9 @@ class LiveKitTransport(BaseTransport):
|
|
|
990
1153
|
participant_id: Optional specific participant to send to.
|
|
991
1154
|
"""
|
|
992
1155
|
if self._output:
|
|
993
|
-
frame =
|
|
1156
|
+
frame = LiveKitOutputTransportMessageFrame(
|
|
1157
|
+
message=message, participant_id=participant_id
|
|
1158
|
+
)
|
|
994
1159
|
await self._output.send_message(frame)
|
|
995
1160
|
|
|
996
1161
|
async def send_message_urgent(self, message: str, participant_id: Optional[str] = None):
|
|
@@ -1001,7 +1166,7 @@ class LiveKitTransport(BaseTransport):
|
|
|
1001
1166
|
participant_id: Optional specific participant to send to.
|
|
1002
1167
|
"""
|
|
1003
1168
|
if self._output:
|
|
1004
|
-
frame =
|
|
1169
|
+
frame = LiveKitOutputTransportMessageUrgentFrame(
|
|
1005
1170
|
message=message, participant_id=participant_id
|
|
1006
1171
|
)
|
|
1007
1172
|
await self._output.send_message(frame)
|
|
@@ -172,16 +172,21 @@ class LocalAudioOutputTransport(BaseOutputTransport):
|
|
|
172
172
|
self._out_stream.close()
|
|
173
173
|
self._out_stream = None
|
|
174
174
|
|
|
175
|
-
async def write_audio_frame(self, frame: OutputAudioRawFrame):
|
|
175
|
+
async def write_audio_frame(self, frame: OutputAudioRawFrame) -> bool:
|
|
176
176
|
"""Write an audio frame to the output stream.
|
|
177
177
|
|
|
178
178
|
Args:
|
|
179
179
|
frame: The audio frame to write to the output device.
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
True if the audio frame was written successfully, False otherwise.
|
|
180
183
|
"""
|
|
181
184
|
if self._out_stream:
|
|
182
185
|
await self.get_event_loop().run_in_executor(
|
|
183
186
|
self._executor, self._out_stream.write, frame.audio
|
|
184
187
|
)
|
|
188
|
+
return True
|
|
189
|
+
return False
|
|
185
190
|
|
|
186
191
|
|
|
187
192
|
class LocalAudioTransport(BaseTransport):
|
pipecat/transports/local/tk.py
CHANGED
|
@@ -191,24 +191,33 @@ class TkOutputTransport(BaseOutputTransport):
|
|
|
191
191
|
self._out_stream.close()
|
|
192
192
|
self._out_stream = None
|
|
193
193
|
|
|
194
|
-
async def write_audio_frame(self, frame: OutputAudioRawFrame):
|
|
194
|
+
async def write_audio_frame(self, frame: OutputAudioRawFrame) -> bool:
|
|
195
195
|
"""Write an audio frame to the output stream.
|
|
196
196
|
|
|
197
197
|
Args:
|
|
198
198
|
frame: The audio frame to write to the output device.
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
True if the audio frame was written successfully, False otherwise.
|
|
199
202
|
"""
|
|
200
203
|
if self._out_stream:
|
|
201
204
|
await self.get_event_loop().run_in_executor(
|
|
202
205
|
self._executor, self._out_stream.write, frame.audio
|
|
203
206
|
)
|
|
207
|
+
return True
|
|
208
|
+
return False
|
|
204
209
|
|
|
205
|
-
async def write_video_frame(self, frame: OutputImageRawFrame):
|
|
210
|
+
async def write_video_frame(self, frame: OutputImageRawFrame) -> bool:
|
|
206
211
|
"""Write a video frame to the Tkinter display.
|
|
207
212
|
|
|
208
213
|
Args:
|
|
209
214
|
frame: The video frame to display in the Tkinter window.
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
True if the video frame was written successfully, False otherwise.
|
|
210
218
|
"""
|
|
211
219
|
self.get_event_loop().call_soon(self._write_frame_to_tk, frame)
|
|
220
|
+
return True
|
|
212
221
|
|
|
213
222
|
def _write_frame_to_tk(self, frame: OutputImageRawFrame):
|
|
214
223
|
"""Write frame data to the Tkinter image label."""
|