dv-pipecat-ai 0.0.85.dev7__py3-none-any.whl → 0.0.85.dev698__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (156) hide show
  1. {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/METADATA +78 -117
  2. {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/RECORD +156 -122
  3. pipecat/adapters/base_llm_adapter.py +38 -1
  4. pipecat/adapters/services/anthropic_adapter.py +9 -14
  5. pipecat/adapters/services/aws_nova_sonic_adapter.py +5 -0
  6. pipecat/adapters/services/bedrock_adapter.py +236 -13
  7. pipecat/adapters/services/gemini_adapter.py +12 -8
  8. pipecat/adapters/services/open_ai_adapter.py +19 -7
  9. pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
  10. pipecat/audio/filters/krisp_viva_filter.py +193 -0
  11. pipecat/audio/filters/noisereduce_filter.py +15 -0
  12. pipecat/audio/turn/base_turn_analyzer.py +9 -1
  13. pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
  14. pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
  15. pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
  16. pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
  17. pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
  18. pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
  19. pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
  20. pipecat/audio/vad/data/README.md +10 -0
  21. pipecat/audio/vad/vad_analyzer.py +13 -1
  22. pipecat/extensions/voicemail/voicemail_detector.py +5 -5
  23. pipecat/frames/frames.py +120 -87
  24. pipecat/observers/loggers/debug_log_observer.py +3 -3
  25. pipecat/observers/loggers/llm_log_observer.py +7 -3
  26. pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
  27. pipecat/pipeline/runner.py +12 -4
  28. pipecat/pipeline/service_switcher.py +64 -36
  29. pipecat/pipeline/task.py +85 -24
  30. pipecat/processors/aggregators/dtmf_aggregator.py +28 -22
  31. pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
  32. pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
  33. pipecat/processors/aggregators/llm_response.py +6 -7
  34. pipecat/processors/aggregators/llm_response_universal.py +19 -15
  35. pipecat/processors/aggregators/user_response.py +6 -6
  36. pipecat/processors/aggregators/vision_image_frame.py +24 -2
  37. pipecat/processors/audio/audio_buffer_processor.py +43 -8
  38. pipecat/processors/filters/stt_mute_filter.py +2 -0
  39. pipecat/processors/frame_processor.py +103 -17
  40. pipecat/processors/frameworks/langchain.py +8 -2
  41. pipecat/processors/frameworks/rtvi.py +209 -68
  42. pipecat/processors/frameworks/strands_agents.py +170 -0
  43. pipecat/processors/logger.py +2 -2
  44. pipecat/processors/transcript_processor.py +4 -4
  45. pipecat/processors/user_idle_processor.py +3 -6
  46. pipecat/runner/run.py +270 -50
  47. pipecat/runner/types.py +2 -0
  48. pipecat/runner/utils.py +51 -10
  49. pipecat/serializers/exotel.py +5 -5
  50. pipecat/serializers/livekit.py +20 -0
  51. pipecat/serializers/plivo.py +6 -9
  52. pipecat/serializers/protobuf.py +6 -5
  53. pipecat/serializers/telnyx.py +2 -2
  54. pipecat/serializers/twilio.py +43 -23
  55. pipecat/services/ai_service.py +2 -6
  56. pipecat/services/anthropic/llm.py +2 -25
  57. pipecat/services/asyncai/tts.py +2 -3
  58. pipecat/services/aws/__init__.py +1 -0
  59. pipecat/services/aws/llm.py +122 -97
  60. pipecat/services/aws/nova_sonic/__init__.py +0 -0
  61. pipecat/services/aws/nova_sonic/context.py +367 -0
  62. pipecat/services/aws/nova_sonic/frames.py +25 -0
  63. pipecat/services/aws/nova_sonic/llm.py +1155 -0
  64. pipecat/services/aws/stt.py +1 -3
  65. pipecat/services/aws_nova_sonic/__init__.py +19 -1
  66. pipecat/services/aws_nova_sonic/aws.py +11 -1151
  67. pipecat/services/aws_nova_sonic/context.py +13 -355
  68. pipecat/services/aws_nova_sonic/frames.py +13 -17
  69. pipecat/services/azure/realtime/__init__.py +0 -0
  70. pipecat/services/azure/realtime/llm.py +65 -0
  71. pipecat/services/azure/stt.py +15 -0
  72. pipecat/services/cartesia/tts.py +2 -2
  73. pipecat/services/deepgram/__init__.py +1 -0
  74. pipecat/services/deepgram/flux/__init__.py +0 -0
  75. pipecat/services/deepgram/flux/stt.py +636 -0
  76. pipecat/services/elevenlabs/__init__.py +2 -1
  77. pipecat/services/elevenlabs/stt.py +254 -276
  78. pipecat/services/elevenlabs/tts.py +5 -5
  79. pipecat/services/fish/tts.py +2 -2
  80. pipecat/services/gemini_multimodal_live/events.py +38 -524
  81. pipecat/services/gemini_multimodal_live/file_api.py +23 -173
  82. pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
  83. pipecat/services/gladia/stt.py +56 -72
  84. pipecat/services/google/__init__.py +1 -0
  85. pipecat/services/google/gemini_live/__init__.py +3 -0
  86. pipecat/services/google/gemini_live/file_api.py +189 -0
  87. pipecat/services/google/gemini_live/llm.py +1582 -0
  88. pipecat/services/google/gemini_live/llm_vertex.py +184 -0
  89. pipecat/services/google/llm.py +15 -11
  90. pipecat/services/google/llm_openai.py +3 -3
  91. pipecat/services/google/llm_vertex.py +86 -16
  92. pipecat/services/google/tts.py +7 -3
  93. pipecat/services/heygen/api.py +2 -0
  94. pipecat/services/heygen/client.py +8 -4
  95. pipecat/services/heygen/video.py +2 -0
  96. pipecat/services/hume/__init__.py +5 -0
  97. pipecat/services/hume/tts.py +220 -0
  98. pipecat/services/inworld/tts.py +6 -6
  99. pipecat/services/llm_service.py +15 -5
  100. pipecat/services/lmnt/tts.py +2 -2
  101. pipecat/services/mcp_service.py +4 -2
  102. pipecat/services/mem0/memory.py +6 -5
  103. pipecat/services/mistral/llm.py +29 -8
  104. pipecat/services/moondream/vision.py +42 -16
  105. pipecat/services/neuphonic/tts.py +2 -2
  106. pipecat/services/openai/__init__.py +1 -0
  107. pipecat/services/openai/base_llm.py +27 -20
  108. pipecat/services/openai/realtime/__init__.py +0 -0
  109. pipecat/services/openai/realtime/context.py +272 -0
  110. pipecat/services/openai/realtime/events.py +1106 -0
  111. pipecat/services/openai/realtime/frames.py +37 -0
  112. pipecat/services/openai/realtime/llm.py +829 -0
  113. pipecat/services/openai/tts.py +16 -8
  114. pipecat/services/openai_realtime/__init__.py +27 -0
  115. pipecat/services/openai_realtime/azure.py +21 -0
  116. pipecat/services/openai_realtime/context.py +21 -0
  117. pipecat/services/openai_realtime/events.py +21 -0
  118. pipecat/services/openai_realtime/frames.py +21 -0
  119. pipecat/services/openai_realtime_beta/azure.py +16 -0
  120. pipecat/services/openai_realtime_beta/openai.py +17 -5
  121. pipecat/services/playht/tts.py +31 -4
  122. pipecat/services/rime/tts.py +3 -4
  123. pipecat/services/sarvam/tts.py +2 -6
  124. pipecat/services/simli/video.py +2 -2
  125. pipecat/services/speechmatics/stt.py +1 -7
  126. pipecat/services/stt_service.py +34 -0
  127. pipecat/services/tavus/video.py +2 -2
  128. pipecat/services/tts_service.py +9 -9
  129. pipecat/services/vision_service.py +7 -6
  130. pipecat/tests/utils.py +4 -4
  131. pipecat/transcriptions/language.py +41 -1
  132. pipecat/transports/base_input.py +17 -42
  133. pipecat/transports/base_output.py +42 -26
  134. pipecat/transports/daily/transport.py +199 -26
  135. pipecat/transports/heygen/__init__.py +0 -0
  136. pipecat/transports/heygen/transport.py +381 -0
  137. pipecat/transports/livekit/transport.py +228 -63
  138. pipecat/transports/local/audio.py +6 -1
  139. pipecat/transports/local/tk.py +11 -2
  140. pipecat/transports/network/fastapi_websocket.py +1 -1
  141. pipecat/transports/smallwebrtc/connection.py +98 -19
  142. pipecat/transports/smallwebrtc/request_handler.py +204 -0
  143. pipecat/transports/smallwebrtc/transport.py +65 -23
  144. pipecat/transports/tavus/transport.py +23 -12
  145. pipecat/transports/websocket/client.py +41 -5
  146. pipecat/transports/websocket/fastapi.py +21 -11
  147. pipecat/transports/websocket/server.py +14 -7
  148. pipecat/transports/whatsapp/api.py +8 -0
  149. pipecat/transports/whatsapp/client.py +47 -0
  150. pipecat/utils/base_object.py +54 -22
  151. pipecat/utils/string.py +12 -1
  152. pipecat/utils/tracing/service_decorators.py +21 -21
  153. {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/WHEEL +0 -0
  154. {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/licenses/LICENSE +0 -0
  155. {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/top_level.txt +0 -0
  156. /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
@@ -12,6 +12,7 @@ event handling for conversational AI applications.
12
12
  """
13
13
 
14
14
  import asyncio
15
+ import json
15
16
  from dataclasses import dataclass
16
17
  from typing import Any, Awaitable, Callable, List, Optional
17
18
 
@@ -24,13 +25,15 @@ from pipecat.frames.frames import (
24
25
  AudioRawFrame,
25
26
  CancelFrame,
26
27
  EndFrame,
28
+ ImageRawFrame,
27
29
  OutputAudioRawFrame,
28
30
  OutputDTMFFrame,
29
31
  OutputDTMFUrgentFrame,
32
+ OutputTransportMessageFrame,
33
+ OutputTransportMessageUrgentFrame,
30
34
  StartFrame,
31
- TransportMessageFrame,
32
- TransportMessageUrgentFrame,
33
35
  UserAudioRawFrame,
36
+ UserImageRawFrame,
34
37
  )
35
38
  from pipecat.processors.frame_processor import FrameDirection, FrameProcessorSetup
36
39
  from pipecat.transports.base_input import BaseInputTransport
@@ -40,6 +43,7 @@ from pipecat.utils.asyncio.task_manager import BaseTaskManager
40
43
 
41
44
  try:
42
45
  from livekit import rtc
46
+ from livekit.rtc._proto import video_frame_pb2 as proto_video_frame
43
47
  from tenacity import retry, stop_after_attempt, wait_exponential
44
48
  except ModuleNotFoundError as e:
45
49
  logger.error(f"Exception: {e}")
@@ -64,7 +68,7 @@ DTMF_CODE_MAP = {
64
68
 
65
69
 
66
70
  @dataclass
67
- class LiveKitTransportMessageFrame(TransportMessageFrame):
71
+ class LiveKitOutputTransportMessageFrame(OutputTransportMessageFrame):
68
72
  """Frame for transport messages in LiveKit rooms.
69
73
 
70
74
  Parameters:
@@ -75,7 +79,7 @@ class LiveKitTransportMessageFrame(TransportMessageFrame):
75
79
 
76
80
 
77
81
  @dataclass
78
- class LiveKitTransportMessageUrgentFrame(TransportMessageUrgentFrame):
82
+ class LiveKitOutputTransportMessageUrgentFrame(OutputTransportMessageUrgentFrame):
79
83
  """Frame for urgent transport messages in LiveKit rooms.
80
84
 
81
85
  Parameters:
@@ -85,6 +89,50 @@ class LiveKitTransportMessageUrgentFrame(TransportMessageUrgentFrame):
85
89
  participant_id: Optional[str] = None
86
90
 
87
91
 
92
+ @dataclass
93
+ class LiveKitTransportMessageFrame(LiveKitOutputTransportMessageFrame):
94
+ """Frame for transport messages in LiveKit rooms.
95
+
96
+ Parameters:
97
+ participant_id: Optional ID of the participant this message is for/from.
98
+ """
99
+
100
+ def __post_init__(self):
101
+ super().__post_init__()
102
+ import warnings
103
+
104
+ with warnings.catch_warnings():
105
+ warnings.simplefilter("always")
106
+ warnings.warn(
107
+ "LiveKitTransportMessageFrame is deprecated and will be removed in a future version. "
108
+ "Instead, use LiveKitOutputTransportMessageFrame.",
109
+ DeprecationWarning,
110
+ stacklevel=2,
111
+ )
112
+
113
+
114
+ @dataclass
115
+ class LiveKitTransportMessageUrgentFrame(LiveKitOutputTransportMessageUrgentFrame):
116
+ """Frame for urgent transport messages in LiveKit rooms.
117
+
118
+ Parameters:
119
+ participant_id: Optional ID of the participant this message is for/from.
120
+ """
121
+
122
+ def __post_init__(self):
123
+ super().__post_init__()
124
+ import warnings
125
+
126
+ with warnings.catch_warnings():
127
+ warnings.simplefilter("always")
128
+ warnings.warn(
129
+ "LiveKitTransportMessageUrgentFrame is deprecated and will be removed in a future version. "
130
+ "Instead, use LiveKitOutputTransportMessageUrgentFrame.",
131
+ DeprecationWarning,
132
+ stacklevel=2,
133
+ )
134
+
135
+
88
136
  class LiveKitParams(TransportParams):
89
137
  """Configuration parameters for LiveKit transport.
90
138
 
@@ -110,10 +158,13 @@ class LiveKitCallbacks(BaseModel):
110
158
 
111
159
  on_connected: Callable[[], Awaitable[None]]
112
160
  on_disconnected: Callable[[], Awaitable[None]]
161
+ on_before_disconnect: Callable[[], Awaitable[None]]
113
162
  on_participant_connected: Callable[[str], Awaitable[None]]
114
163
  on_participant_disconnected: Callable[[str], Awaitable[None]]
115
164
  on_audio_track_subscribed: Callable[[str], Awaitable[None]]
116
165
  on_audio_track_unsubscribed: Callable[[str], Awaitable[None]]
166
+ on_video_track_subscribed: Callable[[str], Awaitable[None]]
167
+ on_video_track_unsubscribed: Callable[[str], Awaitable[None]]
117
168
  on_data_received: Callable[[bytes, str], Awaitable[None]]
118
169
  on_first_participant_joined: Callable[[str], Awaitable[None]]
119
170
 
@@ -158,8 +209,11 @@ class LiveKitTransportClient:
158
209
  self._audio_track: Optional[rtc.LocalAudioTrack] = None
159
210
  self._audio_tracks = {}
160
211
  self._audio_queue = asyncio.Queue()
212
+ self._video_tracks = {}
213
+ self._video_queue = asyncio.Queue()
161
214
  self._other_participant_has_joined = False
162
215
  self._task_manager: Optional[BaseTaskManager] = None
216
+ self._async_lock = asyncio.Lock()
163
217
 
164
218
  @property
165
219
  def participant_id(self) -> str:
@@ -220,61 +274,64 @@ class LiveKitTransportClient:
220
274
  @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
221
275
  async def connect(self):
222
276
  """Connect to the LiveKit room with retry logic."""
223
- if self._connected:
224
- # Increment disconnect counter if already connected.
225
- self._disconnect_counter += 1
226
- return
227
-
228
- logger.info(f"Connecting to {self._room_name}")
229
-
230
- try:
231
- await self.room.connect(
232
- self._url,
233
- self._token,
234
- options=rtc.RoomOptions(auto_subscribe=True),
235
- )
236
- self._connected = True
237
- # Increment disconnect counter if we successfully connected.
238
- self._disconnect_counter += 1
277
+ async with self._async_lock:
278
+ if self._connected:
279
+ # Increment disconnect counter if already connected.
280
+ self._disconnect_counter += 1
281
+ return
282
+
283
+ logger.info(f"Connecting to {self._room_name}")
284
+
285
+ try:
286
+ await self.room.connect(
287
+ self._url,
288
+ self._token,
289
+ options=rtc.RoomOptions(auto_subscribe=True),
290
+ )
291
+ self._connected = True
292
+ # Increment disconnect counter if we successfully connected.
293
+ self._disconnect_counter += 1
239
294
 
240
- self._participant_id = self.room.local_participant.sid
241
- logger.info(f"Connected to {self._room_name}")
295
+ self._participant_id = self.room.local_participant.sid
296
+ logger.info(f"Connected to {self._room_name}")
242
297
 
243
- # Set up audio source and track
244
- self._audio_source = rtc.AudioSource(
245
- self._out_sample_rate, self._params.audio_out_channels
246
- )
247
- self._audio_track = rtc.LocalAudioTrack.create_audio_track(
248
- "pipecat-audio", self._audio_source
249
- )
250
- options = rtc.TrackPublishOptions()
251
- options.source = rtc.TrackSource.SOURCE_MICROPHONE
252
- await self.room.local_participant.publish_track(self._audio_track, options)
298
+ # Set up audio source and track
299
+ self._audio_source = rtc.AudioSource(
300
+ self._out_sample_rate, self._params.audio_out_channels
301
+ )
302
+ self._audio_track = rtc.LocalAudioTrack.create_audio_track(
303
+ "pipecat-audio", self._audio_source
304
+ )
305
+ options = rtc.TrackPublishOptions()
306
+ options.source = rtc.TrackSource.SOURCE_MICROPHONE
307
+ await self.room.local_participant.publish_track(self._audio_track, options)
253
308
 
254
- await self._callbacks.on_connected()
309
+ await self._callbacks.on_connected()
255
310
 
256
- # Check if there are already participants in the room
257
- participants = self.get_participants()
258
- if participants and not self._other_participant_has_joined:
259
- self._other_participant_has_joined = True
260
- await self._callbacks.on_first_participant_joined(participants[0])
261
- except Exception as e:
262
- logger.error(f"Error connecting to {self._room_name}: {e}")
263
- raise
311
+ # Check if there are already participants in the room
312
+ participants = self.get_participants()
313
+ if participants and not self._other_participant_has_joined:
314
+ self._other_participant_has_joined = True
315
+ await self._callbacks.on_first_participant_joined(participants[0])
316
+ except Exception as e:
317
+ logger.error(f"Error connecting to {self._room_name}: {e}")
318
+ raise
264
319
 
265
320
  async def disconnect(self):
266
321
  """Disconnect from the LiveKit room."""
267
- # Decrement leave counter when leaving.
268
- self._disconnect_counter -= 1
322
+ async with self._async_lock:
323
+ # Decrement leave counter when leaving.
324
+ self._disconnect_counter -= 1
269
325
 
270
- if not self._connected or self._disconnect_counter > 0:
271
- return
326
+ if not self._connected or self._disconnect_counter > 0:
327
+ return
272
328
 
273
- logger.info(f"Disconnecting from {self._room_name}")
274
- await self.room.disconnect()
275
- self._connected = False
276
- logger.info(f"Disconnected from {self._room_name}")
277
- await self._callbacks.on_disconnected()
329
+ logger.info(f"Disconnecting from {self._room_name}")
330
+ await self._callbacks.on_before_disconnect()
331
+ await self.room.disconnect()
332
+ self._connected = False
333
+ logger.info(f"Disconnected from {self._room_name}")
334
+ await self._callbacks.on_disconnected()
278
335
 
279
336
  async def send_data(self, data: bytes, participant_id: Optional[str] = None):
280
337
  """Send data to participants in the room.
@@ -297,10 +354,10 @@ class LiveKitTransportClient:
297
354
  logger.error(f"Error sending data: {e}")
298
355
 
299
356
  async def send_dtmf(self, digit: str):
300
- """Send DTMF tone to the room.
357
+ r"""Send DTMF tone to the room.
301
358
 
302
359
  Args:
303
- digit: The DTMF digit to send (0-9, *, #).
360
+ digit: The DTMF digit to send (0-9, \*, #).
304
361
  """
305
362
  if not self._connected:
306
363
  return
@@ -316,19 +373,21 @@ class LiveKitTransportClient:
316
373
  except Exception as e:
317
374
  logger.error(f"Error sending DTMF tone {digit}: {e}")
318
375
 
319
- async def publish_audio(self, audio_frame: rtc.AudioFrame):
376
+ async def publish_audio(self, audio_frame: rtc.AudioFrame) -> bool:
320
377
  """Publish an audio frame to the room.
321
378
 
322
379
  Args:
323
380
  audio_frame: The LiveKit audio frame to publish.
324
381
  """
325
382
  if not self._connected or not self._audio_source:
326
- return
383
+ return False
327
384
 
328
385
  try:
329
386
  await self._audio_source.capture_frame(audio_frame)
387
+ return True
330
388
  except Exception as e:
331
389
  logger.error(f"Error publishing audio: {e}")
390
+ return False
332
391
 
333
392
  def get_participants(self) -> List[str]:
334
393
  """Get list of participant IDs in the room.
@@ -477,6 +536,15 @@ class LiveKitTransportClient:
477
536
  f"{self}::_process_audio_stream",
478
537
  )
479
538
  await self._callbacks.on_audio_track_subscribed(participant.sid)
539
+ elif track.kind == rtc.TrackKind.KIND_VIDEO:
540
+ logger.info(f"Video track subscribed: {track.sid} from participant {participant.sid}")
541
+ self._video_tracks[participant.sid] = track
542
+ video_stream = rtc.VideoStream(track)
543
+ self._task_manager.create_task(
544
+ self._process_video_stream(video_stream, participant.sid),
545
+ f"{self}::_process_video_stream",
546
+ )
547
+ await self._callbacks.on_video_track_subscribed(participant.sid)
480
548
 
481
549
  async def _async_on_track_unsubscribed(
482
550
  self,
@@ -488,6 +556,8 @@ class LiveKitTransportClient:
488
556
  logger.info(f"Track unsubscribed: {publication.sid} from {participant.identity}")
489
557
  if track.kind == rtc.TrackKind.KIND_AUDIO:
490
558
  await self._callbacks.on_audio_track_unsubscribed(participant.sid)
559
+ elif track.kind == rtc.TrackKind.KIND_VIDEO:
560
+ await self._callbacks.on_video_track_unsubscribed(participant.sid)
491
561
 
492
562
  async def _async_on_data_received(self, data: rtc.DataPacket):
493
563
  """Handle data received events."""
@@ -518,6 +588,21 @@ class LiveKitTransportClient:
518
588
  frame, participant_id = await self._audio_queue.get()
519
589
  yield frame, participant_id
520
590
 
591
+ async def _process_video_stream(self, video_stream: rtc.VideoStream, participant_id: str):
592
+ """Process incoming video stream from a participant."""
593
+ logger.info(f"Started processing video stream for participant {participant_id}")
594
+ async for event in video_stream:
595
+ if isinstance(event, rtc.VideoFrameEvent):
596
+ await self._video_queue.put((event, participant_id))
597
+ else:
598
+ logger.warning(f"Received unexpected event type: {type(event)}")
599
+
600
+ async def get_next_video_frame(self):
601
+ """Get the next video frame from the queue."""
602
+ while True:
603
+ frame, participant_id = await self._video_queue.get()
604
+ yield frame, participant_id
605
+
521
606
  def __str__(self):
522
607
  """String representation of the LiveKit transport client."""
523
608
  return f"{self._transport_name}::LiveKitTransportClient"
@@ -550,6 +635,7 @@ class LiveKitInputTransport(BaseInputTransport):
550
635
  self._client = client
551
636
 
552
637
  self._audio_in_task = None
638
+ self._video_in_task = None
553
639
  self._vad_analyzer: Optional[VADAnalyzer] = params.vad_analyzer
554
640
  self._resampler = create_stream_resampler()
555
641
 
@@ -582,6 +668,8 @@ class LiveKitInputTransport(BaseInputTransport):
582
668
  await self._client.connect()
583
669
  if not self._audio_in_task and self._params.audio_in_enabled:
584
670
  self._audio_in_task = self.create_task(self._audio_in_task_handler())
671
+ if not self._video_in_task and self._params.video_in_enabled:
672
+ self._video_in_task = self.create_task(self._video_in_task_handler())
585
673
  await self.set_transport_ready(frame)
586
674
  logger.info("LiveKitInputTransport started")
587
675
 
@@ -595,6 +683,8 @@ class LiveKitInputTransport(BaseInputTransport):
595
683
  await self._client.disconnect()
596
684
  if self._audio_in_task:
597
685
  await self.cancel_task(self._audio_in_task)
686
+ if self._video_in_task:
687
+ await self.cancel_task(self._video_in_task)
598
688
  logger.info("LiveKitInputTransport stopped")
599
689
 
600
690
  async def cancel(self, frame: CancelFrame):
@@ -607,6 +697,8 @@ class LiveKitInputTransport(BaseInputTransport):
607
697
  await self._client.disconnect()
608
698
  if self._audio_in_task and self._params.audio_in_enabled:
609
699
  await self.cancel_task(self._audio_in_task)
700
+ if self._video_in_task and self._params.video_in_enabled:
701
+ await self.cancel_task(self._video_in_task)
610
702
 
611
703
  async def setup(self, setup: FrameProcessorSetup):
612
704
  """Setup the input transport with shared client setup.
@@ -629,7 +721,7 @@ class LiveKitInputTransport(BaseInputTransport):
629
721
  message: The message data to send.
630
722
  sender: ID of the message sender.
631
723
  """
632
- frame = LiveKitTransportMessageUrgentFrame(message=message, participant_id=sender)
724
+ frame = LiveKitOutputTransportMessageUrgentFrame(message=message, participant_id=sender)
633
725
  await self.push_frame(frame)
634
726
 
635
727
  async def _audio_in_task_handler(self):
@@ -655,6 +747,29 @@ class LiveKitInputTransport(BaseInputTransport):
655
747
  )
656
748
  await self.push_audio_frame(input_audio_frame)
657
749
 
750
+ async def _video_in_task_handler(self):
751
+ """Handle incoming video frames from participants."""
752
+ logger.info("Video input task started")
753
+ video_iterator = self._client.get_next_video_frame()
754
+ async for video_data in video_iterator:
755
+ if video_data:
756
+ video_frame_event, participant_id = video_data
757
+ pipecat_video_frame = await self._convert_livekit_video_to_pipecat(
758
+ video_frame_event=video_frame_event
759
+ )
760
+
761
+ # Skip frames with no video data
762
+ if len(pipecat_video_frame.image) == 0:
763
+ continue
764
+
765
+ input_video_frame = UserImageRawFrame(
766
+ user_id=participant_id,
767
+ image=pipecat_video_frame.image,
768
+ size=pipecat_video_frame.size,
769
+ format=pipecat_video_frame.format,
770
+ )
771
+ await self.push_video_frame(input_video_frame)
772
+
658
773
  async def _convert_livekit_audio_to_pipecat(
659
774
  self, audio_frame_event: rtc.AudioFrameEvent
660
775
  ) -> AudioRawFrame:
@@ -671,6 +786,19 @@ class LiveKitInputTransport(BaseInputTransport):
671
786
  num_channels=audio_frame.num_channels,
672
787
  )
673
788
 
789
+ async def _convert_livekit_video_to_pipecat(
790
+ self,
791
+ video_frame_event: rtc.VideoFrameEvent,
792
+ ) -> ImageRawFrame:
793
+ """Convert LiveKit video frame to Pipecat video frame."""
794
+ rgb_frame = video_frame_event.frame.convert(proto_video_frame.VideoBufferType.RGB24)
795
+ image_frame = ImageRawFrame(
796
+ image=rgb_frame.data,
797
+ size=(rgb_frame.width, rgb_frame.height),
798
+ format="RGB",
799
+ )
800
+ return image_frame
801
+
674
802
 
675
803
  class LiveKitOutputTransport(BaseOutputTransport):
676
804
  """Handles outgoing media streams and events to LiveKit rooms.
@@ -752,25 +880,36 @@ class LiveKitOutputTransport(BaseOutputTransport):
752
880
  await super().cleanup()
753
881
  await self._transport.cleanup()
754
882
 
755
- async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame):
883
+ async def send_message(
884
+ self, frame: OutputTransportMessageFrame | OutputTransportMessageUrgentFrame
885
+ ):
756
886
  """Send a transport message to participants.
757
887
 
758
888
  Args:
759
889
  frame: The transport message frame to send.
760
890
  """
761
- if isinstance(frame, (LiveKitTransportMessageFrame, LiveKitTransportMessageUrgentFrame)):
762
- await self._client.send_data(frame.message.encode(), frame.participant_id)
891
+ message = frame.message
892
+ if isinstance(message, dict):
893
+ # fix message encoding for dict-like messages, e.g. RTVI messages.
894
+ message = json.dumps(message, ensure_ascii=False)
895
+ if isinstance(
896
+ frame, (LiveKitOutputTransportMessageFrame, LiveKitOutputTransportMessageUrgentFrame)
897
+ ):
898
+ await self._client.send_data(message.encode(), frame.participant_id)
763
899
  else:
764
- await self._client.send_data(frame.message.encode())
900
+ await self._client.send_data(message.encode())
765
901
 
766
- async def write_audio_frame(self, frame: OutputAudioRawFrame):
902
+ async def write_audio_frame(self, frame: OutputAudioRawFrame) -> bool:
767
903
  """Write an audio frame to the LiveKit room.
768
904
 
769
905
  Args:
770
906
  frame: The audio frame to write.
907
+
908
+ Returns:
909
+ True if the audio frame was written successfully, False otherwise.
771
910
  """
772
911
  livekit_audio = self._convert_pipecat_audio_to_livekit(frame.audio)
773
- await self._client.publish_audio(livekit_audio)
912
+ return await self._client.publish_audio(livekit_audio)
774
913
 
775
914
  def _supports_native_dtmf(self) -> bool:
776
915
  """LiveKit supports native DTMF via telephone events.
@@ -834,10 +973,13 @@ class LiveKitTransport(BaseTransport):
834
973
  callbacks = LiveKitCallbacks(
835
974
  on_connected=self._on_connected,
836
975
  on_disconnected=self._on_disconnected,
976
+ on_before_disconnect=self._on_before_disconnect,
837
977
  on_participant_connected=self._on_participant_connected,
838
978
  on_participant_disconnected=self._on_participant_disconnected,
839
979
  on_audio_track_subscribed=self._on_audio_track_subscribed,
840
980
  on_audio_track_unsubscribed=self._on_audio_track_unsubscribed,
981
+ on_video_track_subscribed=self._on_video_track_subscribed,
982
+ on_video_track_unsubscribed=self._on_video_track_unsubscribed,
841
983
  on_data_received=self._on_data_received,
842
984
  on_first_participant_joined=self._on_first_participant_joined,
843
985
  )
@@ -855,10 +997,13 @@ class LiveKitTransport(BaseTransport):
855
997
  self._register_event_handler("on_participant_disconnected")
856
998
  self._register_event_handler("on_audio_track_subscribed")
857
999
  self._register_event_handler("on_audio_track_unsubscribed")
1000
+ self._register_event_handler("on_video_track_subscribed")
1001
+ self._register_event_handler("on_video_track_unsubscribed")
858
1002
  self._register_event_handler("on_data_received")
859
1003
  self._register_event_handler("on_first_participant_joined")
860
1004
  self._register_event_handler("on_participant_left")
861
1005
  self._register_event_handler("on_call_state_updated")
1006
+ self._register_event_handler("on_before_disconnect", sync=True)
862
1007
 
863
1008
  def input(self) -> LiveKitInputTransport:
864
1009
  """Get the input transport for receiving media and events.
@@ -953,6 +1098,10 @@ class LiveKitTransport(BaseTransport):
953
1098
  """Handle room disconnected events."""
954
1099
  await self._call_event_handler("on_disconnected")
955
1100
 
1101
+ async def _on_before_disconnect(self):
1102
+ """Handle before disconnection room events."""
1103
+ await self._call_event_handler("on_before_disconnect")
1104
+
956
1105
  async def _on_participant_connected(self, participant_id: str):
957
1106
  """Handle participant connected events."""
958
1107
  await self._call_event_handler("on_participant_connected", participant_id)
@@ -976,6 +1125,20 @@ class LiveKitTransport(BaseTransport):
976
1125
  """Handle audio track unsubscribed events."""
977
1126
  await self._call_event_handler("on_audio_track_unsubscribed", participant_id)
978
1127
 
1128
+ async def _on_video_track_subscribed(self, participant_id: str):
1129
+ """Handle video track subscribed events."""
1130
+ await self._call_event_handler("on_video_track_subscribed", participant_id)
1131
+ participant = self._client.room.remote_participants.get(participant_id)
1132
+ if participant:
1133
+ for publication in participant.video_tracks.values():
1134
+ self._client._on_track_subscribed_wrapper(
1135
+ publication.track, publication, participant
1136
+ )
1137
+
1138
+ async def _on_video_track_unsubscribed(self, participant_id: str):
1139
+ """Handle video track unsubscribed events."""
1140
+ await self._call_event_handler("on_video_track_unsubscribed", participant_id)
1141
+
979
1142
  async def _on_data_received(self, data: bytes, participant_id: str):
980
1143
  """Handle data received events."""
981
1144
  if self._input:
@@ -990,7 +1153,9 @@ class LiveKitTransport(BaseTransport):
990
1153
  participant_id: Optional specific participant to send to.
991
1154
  """
992
1155
  if self._output:
993
- frame = LiveKitTransportMessageFrame(message=message, participant_id=participant_id)
1156
+ frame = LiveKitOutputTransportMessageFrame(
1157
+ message=message, participant_id=participant_id
1158
+ )
994
1159
  await self._output.send_message(frame)
995
1160
 
996
1161
  async def send_message_urgent(self, message: str, participant_id: Optional[str] = None):
@@ -1001,7 +1166,7 @@ class LiveKitTransport(BaseTransport):
1001
1166
  participant_id: Optional specific participant to send to.
1002
1167
  """
1003
1168
  if self._output:
1004
- frame = LiveKitTransportMessageUrgentFrame(
1169
+ frame = LiveKitOutputTransportMessageUrgentFrame(
1005
1170
  message=message, participant_id=participant_id
1006
1171
  )
1007
1172
  await self._output.send_message(frame)
@@ -172,16 +172,21 @@ class LocalAudioOutputTransport(BaseOutputTransport):
172
172
  self._out_stream.close()
173
173
  self._out_stream = None
174
174
 
175
- async def write_audio_frame(self, frame: OutputAudioRawFrame):
175
+ async def write_audio_frame(self, frame: OutputAudioRawFrame) -> bool:
176
176
  """Write an audio frame to the output stream.
177
177
 
178
178
  Args:
179
179
  frame: The audio frame to write to the output device.
180
+
181
+ Returns:
182
+ True if the audio frame was written successfully, False otherwise.
180
183
  """
181
184
  if self._out_stream:
182
185
  await self.get_event_loop().run_in_executor(
183
186
  self._executor, self._out_stream.write, frame.audio
184
187
  )
188
+ return True
189
+ return False
185
190
 
186
191
 
187
192
  class LocalAudioTransport(BaseTransport):
@@ -191,24 +191,33 @@ class TkOutputTransport(BaseOutputTransport):
191
191
  self._out_stream.close()
192
192
  self._out_stream = None
193
193
 
194
- async def write_audio_frame(self, frame: OutputAudioRawFrame):
194
+ async def write_audio_frame(self, frame: OutputAudioRawFrame) -> bool:
195
195
  """Write an audio frame to the output stream.
196
196
 
197
197
  Args:
198
198
  frame: The audio frame to write to the output device.
199
+
200
+ Returns:
201
+ True if the audio frame was written successfully, False otherwise.
199
202
  """
200
203
  if self._out_stream:
201
204
  await self.get_event_loop().run_in_executor(
202
205
  self._executor, self._out_stream.write, frame.audio
203
206
  )
207
+ return True
208
+ return False
204
209
 
205
- async def write_video_frame(self, frame: OutputImageRawFrame):
210
+ async def write_video_frame(self, frame: OutputImageRawFrame) -> bool:
206
211
  """Write a video frame to the Tkinter display.
207
212
 
208
213
  Args:
209
214
  frame: The video frame to display in the Tkinter window.
215
+
216
+ Returns:
217
+ True if the video frame was written successfully, False otherwise.
210
218
  """
211
219
  self.get_event_loop().call_soon(self._write_frame_to_tk, frame)
220
+ return True
212
221
 
213
222
  def _write_frame_to_tk(self, frame: OutputImageRawFrame):
214
223
  """Write frame data to the Tkinter image label."""
@@ -22,4 +22,4 @@ with warnings.catch_warnings():
22
22
  "use `pipecat.transports.websocket.fastapi` instead.",
23
23
  DeprecationWarning,
24
24
  stacklevel=2,
25
- )
25
+ )