dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show
  1. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
  2. dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
  3. pipecat/__init__.py +17 -0
  4. pipecat/adapters/base_llm_adapter.py +36 -1
  5. pipecat/adapters/schemas/direct_function.py +296 -0
  6. pipecat/adapters/schemas/function_schema.py +15 -6
  7. pipecat/adapters/schemas/tools_schema.py +55 -7
  8. pipecat/adapters/services/anthropic_adapter.py +22 -3
  9. pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
  10. pipecat/adapters/services/bedrock_adapter.py +22 -3
  11. pipecat/adapters/services/gemini_adapter.py +16 -3
  12. pipecat/adapters/services/open_ai_adapter.py +17 -2
  13. pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
  14. pipecat/audio/filters/base_audio_filter.py +30 -6
  15. pipecat/audio/filters/koala_filter.py +37 -2
  16. pipecat/audio/filters/krisp_filter.py +59 -6
  17. pipecat/audio/filters/noisereduce_filter.py +37 -0
  18. pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
  19. pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
  20. pipecat/audio/mixers/base_audio_mixer.py +30 -7
  21. pipecat/audio/mixers/soundfile_mixer.py +53 -6
  22. pipecat/audio/resamplers/base_audio_resampler.py +17 -9
  23. pipecat/audio/resamplers/resampy_resampler.py +26 -1
  24. pipecat/audio/resamplers/soxr_resampler.py +32 -1
  25. pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
  26. pipecat/audio/utils.py +194 -1
  27. pipecat/audio/vad/silero.py +60 -3
  28. pipecat/audio/vad/vad_analyzer.py +114 -30
  29. pipecat/clocks/base_clock.py +19 -0
  30. pipecat/clocks/system_clock.py +25 -0
  31. pipecat/extensions/voicemail/__init__.py +0 -0
  32. pipecat/extensions/voicemail/voicemail_detector.py +707 -0
  33. pipecat/frames/frames.py +590 -156
  34. pipecat/metrics/metrics.py +64 -1
  35. pipecat/observers/base_observer.py +58 -19
  36. pipecat/observers/loggers/debug_log_observer.py +56 -64
  37. pipecat/observers/loggers/llm_log_observer.py +8 -1
  38. pipecat/observers/loggers/transcription_log_observer.py +19 -7
  39. pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
  40. pipecat/observers/turn_tracking_observer.py +26 -1
  41. pipecat/pipeline/base_pipeline.py +5 -7
  42. pipecat/pipeline/base_task.py +52 -9
  43. pipecat/pipeline/parallel_pipeline.py +121 -177
  44. pipecat/pipeline/pipeline.py +129 -20
  45. pipecat/pipeline/runner.py +50 -1
  46. pipecat/pipeline/sync_parallel_pipeline.py +132 -32
  47. pipecat/pipeline/task.py +263 -280
  48. pipecat/pipeline/task_observer.py +85 -34
  49. pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
  50. pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
  51. pipecat/processors/aggregators/gated.py +25 -24
  52. pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
  53. pipecat/processors/aggregators/llm_response.py +398 -89
  54. pipecat/processors/aggregators/openai_llm_context.py +161 -13
  55. pipecat/processors/aggregators/sentence.py +25 -14
  56. pipecat/processors/aggregators/user_response.py +28 -3
  57. pipecat/processors/aggregators/vision_image_frame.py +24 -14
  58. pipecat/processors/async_generator.py +28 -0
  59. pipecat/processors/audio/audio_buffer_processor.py +78 -37
  60. pipecat/processors/consumer_processor.py +25 -6
  61. pipecat/processors/filters/frame_filter.py +23 -0
  62. pipecat/processors/filters/function_filter.py +30 -0
  63. pipecat/processors/filters/identity_filter.py +17 -2
  64. pipecat/processors/filters/null_filter.py +24 -1
  65. pipecat/processors/filters/stt_mute_filter.py +56 -21
  66. pipecat/processors/filters/wake_check_filter.py +46 -3
  67. pipecat/processors/filters/wake_notifier_filter.py +21 -3
  68. pipecat/processors/frame_processor.py +488 -131
  69. pipecat/processors/frameworks/langchain.py +38 -3
  70. pipecat/processors/frameworks/rtvi.py +719 -34
  71. pipecat/processors/gstreamer/pipeline_source.py +41 -0
  72. pipecat/processors/idle_frame_processor.py +26 -3
  73. pipecat/processors/logger.py +23 -0
  74. pipecat/processors/metrics/frame_processor_metrics.py +77 -4
  75. pipecat/processors/metrics/sentry.py +42 -4
  76. pipecat/processors/producer_processor.py +34 -14
  77. pipecat/processors/text_transformer.py +22 -10
  78. pipecat/processors/transcript_processor.py +48 -29
  79. pipecat/processors/user_idle_processor.py +31 -21
  80. pipecat/runner/__init__.py +1 -0
  81. pipecat/runner/daily.py +132 -0
  82. pipecat/runner/livekit.py +148 -0
  83. pipecat/runner/run.py +543 -0
  84. pipecat/runner/types.py +67 -0
  85. pipecat/runner/utils.py +515 -0
  86. pipecat/serializers/base_serializer.py +42 -0
  87. pipecat/serializers/exotel.py +17 -6
  88. pipecat/serializers/genesys.py +95 -0
  89. pipecat/serializers/livekit.py +33 -0
  90. pipecat/serializers/plivo.py +16 -15
  91. pipecat/serializers/protobuf.py +37 -1
  92. pipecat/serializers/telnyx.py +18 -17
  93. pipecat/serializers/twilio.py +32 -16
  94. pipecat/services/ai_service.py +5 -3
  95. pipecat/services/anthropic/llm.py +113 -43
  96. pipecat/services/assemblyai/models.py +63 -5
  97. pipecat/services/assemblyai/stt.py +64 -11
  98. pipecat/services/asyncai/__init__.py +0 -0
  99. pipecat/services/asyncai/tts.py +501 -0
  100. pipecat/services/aws/llm.py +185 -111
  101. pipecat/services/aws/stt.py +217 -23
  102. pipecat/services/aws/tts.py +118 -52
  103. pipecat/services/aws/utils.py +101 -5
  104. pipecat/services/aws_nova_sonic/aws.py +82 -64
  105. pipecat/services/aws_nova_sonic/context.py +15 -6
  106. pipecat/services/azure/common.py +10 -2
  107. pipecat/services/azure/image.py +32 -0
  108. pipecat/services/azure/llm.py +9 -7
  109. pipecat/services/azure/stt.py +65 -2
  110. pipecat/services/azure/tts.py +154 -23
  111. pipecat/services/cartesia/stt.py +125 -8
  112. pipecat/services/cartesia/tts.py +102 -38
  113. pipecat/services/cerebras/llm.py +15 -23
  114. pipecat/services/deepgram/stt.py +19 -11
  115. pipecat/services/deepgram/tts.py +36 -0
  116. pipecat/services/deepseek/llm.py +14 -23
  117. pipecat/services/elevenlabs/tts.py +330 -64
  118. pipecat/services/fal/image.py +43 -0
  119. pipecat/services/fal/stt.py +48 -10
  120. pipecat/services/fireworks/llm.py +14 -21
  121. pipecat/services/fish/tts.py +109 -9
  122. pipecat/services/gemini_multimodal_live/__init__.py +1 -0
  123. pipecat/services/gemini_multimodal_live/events.py +83 -2
  124. pipecat/services/gemini_multimodal_live/file_api.py +189 -0
  125. pipecat/services/gemini_multimodal_live/gemini.py +218 -21
  126. pipecat/services/gladia/config.py +17 -10
  127. pipecat/services/gladia/stt.py +82 -36
  128. pipecat/services/google/frames.py +40 -0
  129. pipecat/services/google/google.py +2 -0
  130. pipecat/services/google/image.py +39 -2
  131. pipecat/services/google/llm.py +176 -58
  132. pipecat/services/google/llm_openai.py +26 -4
  133. pipecat/services/google/llm_vertex.py +37 -15
  134. pipecat/services/google/rtvi.py +41 -0
  135. pipecat/services/google/stt.py +65 -17
  136. pipecat/services/google/test-google-chirp.py +45 -0
  137. pipecat/services/google/tts.py +390 -19
  138. pipecat/services/grok/llm.py +8 -6
  139. pipecat/services/groq/llm.py +8 -6
  140. pipecat/services/groq/stt.py +13 -9
  141. pipecat/services/groq/tts.py +40 -0
  142. pipecat/services/hamsa/__init__.py +9 -0
  143. pipecat/services/hamsa/stt.py +241 -0
  144. pipecat/services/heygen/__init__.py +5 -0
  145. pipecat/services/heygen/api.py +281 -0
  146. pipecat/services/heygen/client.py +620 -0
  147. pipecat/services/heygen/video.py +338 -0
  148. pipecat/services/image_service.py +5 -3
  149. pipecat/services/inworld/__init__.py +1 -0
  150. pipecat/services/inworld/tts.py +592 -0
  151. pipecat/services/llm_service.py +127 -45
  152. pipecat/services/lmnt/tts.py +80 -7
  153. pipecat/services/mcp_service.py +85 -44
  154. pipecat/services/mem0/memory.py +42 -13
  155. pipecat/services/minimax/tts.py +74 -15
  156. pipecat/services/mistral/__init__.py +0 -0
  157. pipecat/services/mistral/llm.py +185 -0
  158. pipecat/services/moondream/vision.py +55 -10
  159. pipecat/services/neuphonic/tts.py +275 -48
  160. pipecat/services/nim/llm.py +8 -6
  161. pipecat/services/ollama/llm.py +27 -7
  162. pipecat/services/openai/base_llm.py +54 -16
  163. pipecat/services/openai/image.py +30 -0
  164. pipecat/services/openai/llm.py +7 -5
  165. pipecat/services/openai/stt.py +13 -9
  166. pipecat/services/openai/tts.py +42 -10
  167. pipecat/services/openai_realtime_beta/azure.py +11 -9
  168. pipecat/services/openai_realtime_beta/context.py +7 -5
  169. pipecat/services/openai_realtime_beta/events.py +10 -7
  170. pipecat/services/openai_realtime_beta/openai.py +37 -18
  171. pipecat/services/openpipe/llm.py +30 -24
  172. pipecat/services/openrouter/llm.py +9 -7
  173. pipecat/services/perplexity/llm.py +15 -19
  174. pipecat/services/piper/tts.py +26 -12
  175. pipecat/services/playht/tts.py +227 -65
  176. pipecat/services/qwen/llm.py +8 -6
  177. pipecat/services/rime/tts.py +128 -17
  178. pipecat/services/riva/stt.py +160 -22
  179. pipecat/services/riva/tts.py +67 -2
  180. pipecat/services/sambanova/llm.py +19 -17
  181. pipecat/services/sambanova/stt.py +14 -8
  182. pipecat/services/sarvam/tts.py +60 -13
  183. pipecat/services/simli/video.py +82 -21
  184. pipecat/services/soniox/__init__.py +0 -0
  185. pipecat/services/soniox/stt.py +398 -0
  186. pipecat/services/speechmatics/stt.py +29 -17
  187. pipecat/services/stt_service.py +47 -11
  188. pipecat/services/tavus/video.py +94 -25
  189. pipecat/services/together/llm.py +8 -6
  190. pipecat/services/tts_service.py +77 -53
  191. pipecat/services/ultravox/stt.py +46 -43
  192. pipecat/services/vision_service.py +5 -3
  193. pipecat/services/websocket_service.py +12 -11
  194. pipecat/services/whisper/base_stt.py +58 -12
  195. pipecat/services/whisper/stt.py +69 -58
  196. pipecat/services/xtts/tts.py +59 -2
  197. pipecat/sync/base_notifier.py +19 -0
  198. pipecat/sync/event_notifier.py +24 -0
  199. pipecat/tests/utils.py +73 -5
  200. pipecat/transcriptions/language.py +24 -0
  201. pipecat/transports/base_input.py +112 -8
  202. pipecat/transports/base_output.py +235 -13
  203. pipecat/transports/base_transport.py +119 -0
  204. pipecat/transports/local/audio.py +76 -0
  205. pipecat/transports/local/tk.py +84 -0
  206. pipecat/transports/network/fastapi_websocket.py +174 -15
  207. pipecat/transports/network/small_webrtc.py +383 -39
  208. pipecat/transports/network/webrtc_connection.py +214 -8
  209. pipecat/transports/network/websocket_client.py +171 -1
  210. pipecat/transports/network/websocket_server.py +147 -9
  211. pipecat/transports/services/daily.py +792 -70
  212. pipecat/transports/services/helpers/daily_rest.py +122 -129
  213. pipecat/transports/services/livekit.py +339 -4
  214. pipecat/transports/services/tavus.py +273 -38
  215. pipecat/utils/asyncio/task_manager.py +92 -186
  216. pipecat/utils/base_object.py +83 -1
  217. pipecat/utils/network.py +2 -0
  218. pipecat/utils/string.py +114 -58
  219. pipecat/utils/text/base_text_aggregator.py +44 -13
  220. pipecat/utils/text/base_text_filter.py +46 -0
  221. pipecat/utils/text/markdown_text_filter.py +70 -14
  222. pipecat/utils/text/pattern_pair_aggregator.py +18 -14
  223. pipecat/utils/text/simple_text_aggregator.py +43 -2
  224. pipecat/utils/text/skip_tags_aggregator.py +21 -13
  225. pipecat/utils/time.py +36 -0
  226. pipecat/utils/tracing/class_decorators.py +32 -7
  227. pipecat/utils/tracing/conversation_context_provider.py +12 -2
  228. pipecat/utils/tracing/service_attributes.py +80 -64
  229. pipecat/utils/tracing/service_decorators.py +48 -21
  230. pipecat/utils/tracing/setup.py +13 -7
  231. pipecat/utils/tracing/turn_context_provider.py +12 -2
  232. pipecat/utils/tracing/turn_trace_observer.py +27 -0
  233. pipecat/utils/utils.py +14 -14
  234. dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
  235. pipecat/examples/daily_runner.py +0 -64
  236. pipecat/examples/run.py +0 -265
  237. pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
  238. pipecat/utils/asyncio/watchdog_event.py +0 -42
  239. pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
  240. pipecat/utils/asyncio/watchdog_queue.py +0 -48
  241. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
  242. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
  243. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
  244. /pipecat/{examples → extensions}/__init__.py +0 -0
@@ -4,6 +4,13 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Small WebRTC transport implementation for Pipecat.
8
+
9
+ This module provides a WebRTC transport implementation using aiortc for
10
+ real-time audio and video communication. It supports bidirectional media
11
+ streaming, application messaging, and client connection management.
12
+ """
13
+
7
14
  import asyncio
8
15
  import fractions
9
16
  import time
@@ -33,7 +40,6 @@ from pipecat.transports.base_input import BaseInputTransport
33
40
  from pipecat.transports.base_output import BaseOutputTransport
34
41
  from pipecat.transports.base_transport import BaseTransport, TransportParams
35
42
  from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection
36
- from pipecat.utils.asyncio.watchdog_async_iterator import WatchdogAsyncIterator
37
43
 
38
44
  try:
39
45
  import cv2
@@ -45,15 +51,38 @@ except ModuleNotFoundError as e:
45
51
  logger.error("In order to use the SmallWebRTC, you need to `pip install pipecat-ai[webrtc]`.")
46
52
  raise Exception(f"Missing module: {e}")
47
53
 
54
+ CAM_VIDEO_SOURCE = "camera"
55
+ SCREEN_VIDEO_SOURCE = "screenVideo"
56
+ MIC_AUDIO_SOURCE = "microphone"
57
+
48
58
 
49
59
  class SmallWebRTCCallbacks(BaseModel):
60
+ """Callback handlers for SmallWebRTC events.
61
+
62
+ Parameters:
63
+ on_app_message: Called when an application message is received.
64
+ on_client_connected: Called when a client establishes connection.
65
+ on_client_disconnected: Called when a client disconnects.
66
+ """
67
+
50
68
  on_app_message: Callable[[Any], Awaitable[None]]
51
69
  on_client_connected: Callable[[SmallWebRTCConnection], Awaitable[None]]
52
70
  on_client_disconnected: Callable[[SmallWebRTCConnection], Awaitable[None]]
53
71
 
54
72
 
55
73
  class RawAudioTrack(AudioStreamTrack):
74
+ """Custom audio stream track for WebRTC output.
75
+
76
+ Handles audio frame generation and timing for WebRTC transmission,
77
+ supporting queued audio data with proper synchronization.
78
+ """
79
+
56
80
  def __init__(self, sample_rate):
81
+ """Initialize the raw audio track.
82
+
83
+ Args:
84
+ sample_rate: The audio sample rate in Hz.
85
+ """
57
86
  super().__init__()
58
87
  self._sample_rate = sample_rate
59
88
  self._samples_per_10ms = sample_rate * 10 // 1000
@@ -64,7 +93,17 @@ class RawAudioTrack(AudioStreamTrack):
64
93
  self._chunk_queue = deque()
65
94
 
66
95
  def add_audio_bytes(self, audio_bytes: bytes):
67
- """Adds bytes to the audio buffer and returns a Future that completes when the data is processed."""
96
+ """Add audio bytes to the buffer for transmission.
97
+
98
+ Args:
99
+ audio_bytes: Raw audio data to queue for transmission.
100
+
101
+ Returns:
102
+ A Future that completes when the data is processed.
103
+
104
+ Raises:
105
+ ValueError: If audio bytes are not a multiple of 10ms size.
106
+ """
68
107
  if len(audio_bytes) % self._bytes_per_10ms != 0:
69
108
  raise ValueError("Audio bytes must be a multiple of 10ms size.")
70
109
  future = asyncio.get_running_loop().create_future()
@@ -79,7 +118,11 @@ class RawAudioTrack(AudioStreamTrack):
79
118
  return future
80
119
 
81
120
  async def recv(self):
82
- """Returns the next audio frame, generating silence if needed."""
121
+ """Return the next audio frame for WebRTC transmission.
122
+
123
+ Returns:
124
+ An AudioFrame containing the next audio data or silence.
125
+ """
83
126
  # Compute required wait time for synchronization
84
127
  if self._timestamp > 0:
85
128
  wait = self._start + (self._timestamp / self._sample_rate) - time.time()
@@ -106,18 +149,37 @@ class RawAudioTrack(AudioStreamTrack):
106
149
 
107
150
 
108
151
  class RawVideoTrack(VideoStreamTrack):
152
+ """Custom video stream track for WebRTC output.
153
+
154
+ Handles video frame queuing and conversion for WebRTC transmission.
155
+ """
156
+
109
157
  def __init__(self, width, height):
158
+ """Initialize the raw video track.
159
+
160
+ Args:
161
+ width: Video frame width in pixels.
162
+ height: Video frame height in pixels.
163
+ """
110
164
  super().__init__()
111
165
  self._width = width
112
166
  self._height = height
113
167
  self._video_buffer = asyncio.Queue()
114
168
 
115
169
  def add_video_frame(self, frame):
116
- """Adds a raw video frame to the buffer."""
170
+ """Add a video frame to the transmission buffer.
171
+
172
+ Args:
173
+ frame: The video frame to queue for transmission.
174
+ """
117
175
  self._video_buffer.put_nowait(frame)
118
176
 
119
177
  async def recv(self):
120
- """Returns the next video frame, waiting if the buffer is empty."""
178
+ """Return the next video frame for WebRTC transmission.
179
+
180
+ Returns:
181
+ A VideoFrame ready for WebRTC transmission.
182
+ """
121
183
  raw_frame = await self._video_buffer.get()
122
184
 
123
185
  # Convert bytes to NumPy array
@@ -134,6 +196,12 @@ class RawVideoTrack(VideoStreamTrack):
134
196
 
135
197
 
136
198
  class SmallWebRTCClient:
199
+ """WebRTC client implementation for handling connections and media streams.
200
+
201
+ Manages WebRTC peer connections, audio/video streaming, and application
202
+ messaging through the SmallWebRTCConnection interface.
203
+ """
204
+
137
205
  FORMAT_CONVERSIONS = {
138
206
  "yuv420p": cv2.COLOR_YUV2RGB_I420,
139
207
  "yuvj420p": cv2.COLOR_YUV2RGB_I420, # OpenCV treats both the same
@@ -142,6 +210,12 @@ class SmallWebRTCClient:
142
210
  }
143
211
 
144
212
  def __init__(self, webrtc_connection: SmallWebRTCConnection, callbacks: SmallWebRTCCallbacks):
213
+ """Initialize the WebRTC client.
214
+
215
+ Args:
216
+ webrtc_connection: The underlying WebRTC connection handler.
217
+ callbacks: Event callbacks for connection and message handling.
218
+ """
145
219
  self._webrtc_connection = webrtc_connection
146
220
  self._closing = False
147
221
  self._callbacks = callbacks
@@ -150,11 +224,13 @@ class SmallWebRTCClient:
150
224
  self._video_output_track = None
151
225
  self._audio_input_track: Optional[AudioStreamTrack] = None
152
226
  self._video_input_track: Optional[VideoStreamTrack] = None
227
+ self._screen_video_track: Optional[VideoStreamTrack] = None
153
228
 
154
229
  self._params = None
155
230
  self._audio_in_channels = None
156
231
  self._in_sample_rate = None
157
232
  self._out_sample_rate = None
233
+ self._leave_counter = 0
158
234
 
159
235
  # We are always resampling it for 16000 if the sample_rate that we receive is bigger than that.
160
236
  # otherwise we face issues with Silero VAD
@@ -180,14 +256,14 @@ class SmallWebRTCClient:
180
256
  await self._handle_app_message(message)
181
257
 
182
258
  def _convert_frame(self, frame_array: np.ndarray, format_name: str) -> np.ndarray:
183
- """Convert a given frame to RGB format based on the input format.
259
+ """Convert a video frame to RGB format based on the input format.
184
260
 
185
261
  Args:
186
- frame_array (np.ndarray): The input frame.
187
- format_name (str): The format of the input frame.
262
+ frame_array: The input frame as a NumPy array.
263
+ format_name: The format of the input frame.
188
264
 
189
265
  Returns:
190
- np.ndarray: The converted RGB frame.
266
+ The converted RGB frame as a NumPy array.
191
267
 
192
268
  Raises:
193
269
  ValueError: If the format is unsupported.
@@ -202,17 +278,30 @@ class SmallWebRTCClient:
202
278
 
203
279
  return cv2.cvtColor(frame_array, conversion_code)
204
280
 
205
- async def read_video_frame(self):
206
- """Reads a video frame from the given MediaStreamTrack, converts it to RGB,
281
+ async def read_video_frame(self, video_source: str):
282
+ """Read video frames from the WebRTC connection.
283
+
284
+ Reads a video frame from the given MediaStreamTrack, converts it to RGB,
207
285
  and creates an InputImageRawFrame.
286
+
287
+ Args:
288
+ video_source: Video source to capture ("camera" or "screenVideo").
289
+
290
+ Yields:
291
+ UserImageRawFrame objects containing video data from the peer.
208
292
  """
209
293
  while True:
210
- if self._video_input_track is None:
294
+ video_track = (
295
+ self._video_input_track
296
+ if video_source == CAM_VIDEO_SOURCE
297
+ else self._screen_video_track
298
+ )
299
+ if video_track is None:
211
300
  await asyncio.sleep(0.01)
212
301
  continue
213
302
 
214
303
  try:
215
- frame = await asyncio.wait_for(self._video_input_track.recv(), timeout=2.0)
304
+ frame = await asyncio.wait_for(video_track.recv(), timeout=2.0)
216
305
  except asyncio.TimeoutError:
217
306
  if self._webrtc_connection.is_connected():
218
307
  logger.warning("Timeout: No video frame received within the specified time.")
@@ -238,11 +327,18 @@ class SmallWebRTCClient:
238
327
  size=(frame.width, frame.height),
239
328
  format="RGB",
240
329
  )
330
+ image_frame.transport_source = video_source
241
331
 
242
332
  yield image_frame
243
333
 
244
334
  async def read_audio_frame(self):
245
- """Reads 20ms of audio from the given MediaStreamTrack and creates an InputAudioRawFrame."""
335
+ """Read audio frames from the WebRTC connection.
336
+
337
+ Reads 20ms of audio from the given MediaStreamTrack and creates an InputAudioRawFrame.
338
+
339
+ Yields:
340
+ InputAudioRawFrame objects containing audio data from the peer.
341
+ """
246
342
  while True:
247
343
  if self._audio_input_track is None:
248
344
  await asyncio.sleep(0.01)
@@ -285,20 +381,38 @@ class SmallWebRTCClient:
285
381
  yield audio_frame
286
382
 
287
383
  async def write_audio_frame(self, frame: OutputAudioRawFrame):
384
+ """Write an audio frame to the WebRTC connection.
385
+
386
+ Args:
387
+ frame: The audio frame to transmit.
388
+ """
288
389
  if self._can_send() and self._audio_output_track:
289
390
  await self._audio_output_track.add_audio_bytes(frame.audio)
290
391
 
291
392
  async def write_video_frame(self, frame: OutputImageRawFrame):
393
+ """Write a video frame to the WebRTC connection.
394
+
395
+ Args:
396
+ frame: The video frame to transmit.
397
+ """
292
398
  if self._can_send() and self._video_output_track:
293
399
  self._video_output_track.add_video_frame(frame)
294
400
 
295
401
  async def setup(self, _params: TransportParams, frame):
402
+ """Set up the client with transport parameters.
403
+
404
+ Args:
405
+ _params: Transport configuration parameters.
406
+ frame: The initialization frame containing setup data.
407
+ """
296
408
  self._audio_in_channels = _params.audio_in_channels
297
409
  self._in_sample_rate = _params.audio_in_sample_rate or frame.audio_in_sample_rate
298
410
  self._out_sample_rate = _params.audio_out_sample_rate or frame.audio_out_sample_rate
299
411
  self._params = _params
412
+ self._leave_counter += 1
300
413
 
301
414
  async def connect(self):
415
+ """Establish the WebRTC connection."""
302
416
  if self._webrtc_connection.is_connected():
303
417
  # already initialized
304
418
  return
@@ -307,6 +421,11 @@ class SmallWebRTCClient:
307
421
  await self._webrtc_connection.connect()
308
422
 
309
423
  async def disconnect(self):
424
+ """Disconnect from the WebRTC peer."""
425
+ self._leave_counter -= 1
426
+ if self._leave_counter > 0:
427
+ return
428
+
310
429
  if self.is_connected and not self.is_closing:
311
430
  logger.info(f"Disconnecting to Small WebRTC")
312
431
  self._closing = True
@@ -314,16 +433,23 @@ class SmallWebRTCClient:
314
433
  await self._handle_peer_disconnected()
315
434
 
316
435
  async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame):
436
+ """Send an application message through the WebRTC connection.
437
+
438
+ Args:
439
+ frame: The message frame to send.
440
+ """
317
441
  if self._can_send():
318
442
  self._webrtc_connection.send_app_message(frame.message)
319
443
 
320
444
  async def _handle_client_connected(self):
445
+ """Handle client connection establishment."""
321
446
  # There is nothing to do here yet, the pipeline is still not ready
322
447
  if not self._params:
323
448
  return
324
449
 
325
450
  self._audio_input_track = self._webrtc_connection.audio_input_track()
326
451
  self._video_input_track = self._webrtc_connection.video_input_track()
452
+ self._screen_video_track = self._webrtc_connection.screen_video_input_track()
327
453
  if self._params.audio_out_enabled:
328
454
  self._audio_output_track = RawAudioTrack(sample_rate=self._out_sample_rate)
329
455
  self._webrtc_connection.replace_audio_track(self._audio_output_track)
@@ -337,57 +463,98 @@ class SmallWebRTCClient:
337
463
  await self._callbacks.on_client_connected(self._webrtc_connection)
338
464
 
339
465
  async def _handle_peer_disconnected(self):
466
+ """Handle peer disconnection cleanup."""
340
467
  self._audio_input_track = None
341
468
  self._video_input_track = None
469
+ self._screen_video_track = None
342
470
  self._audio_output_track = None
343
471
  self._video_output_track = None
344
472
 
345
473
  async def _handle_client_closed(self):
474
+ """Handle client connection closure."""
346
475
  self._audio_input_track = None
347
476
  self._video_input_track = None
477
+ self._screen_video_track = None
348
478
  self._audio_output_track = None
349
479
  self._video_output_track = None
350
480
  await self._callbacks.on_client_disconnected(self._webrtc_connection)
351
481
 
352
482
  async def _handle_app_message(self, message: Any):
483
+ """Handle incoming application messages."""
353
484
  await self._callbacks.on_app_message(message)
354
485
 
355
486
  def _can_send(self):
487
+ """Check if the connection is ready for sending data."""
356
488
  return self.is_connected and not self.is_closing
357
489
 
358
490
  @property
359
491
  def is_connected(self) -> bool:
492
+ """Check if the WebRTC connection is established.
493
+
494
+ Returns:
495
+ True if connected to the peer.
496
+ """
360
497
  return self._webrtc_connection.is_connected()
361
498
 
362
499
  @property
363
500
  def is_closing(self) -> bool:
501
+ """Check if the connection is in the process of closing.
502
+
503
+ Returns:
504
+ True if the connection is closing.
505
+ """
364
506
  return self._closing
365
507
 
366
508
 
367
509
  class SmallWebRTCInputTransport(BaseInputTransport):
510
+ """Input transport implementation for SmallWebRTC.
511
+
512
+ Handles incoming audio and video streams from WebRTC peers,
513
+ including user image requests and application message handling.
514
+ """
515
+
368
516
  def __init__(
369
517
  self,
370
518
  client: SmallWebRTCClient,
371
519
  params: TransportParams,
372
520
  **kwargs,
373
521
  ):
522
+ """Initialize the WebRTC input transport.
523
+
524
+ Args:
525
+ client: The WebRTC client instance.
526
+ params: Transport configuration parameters.
527
+ **kwargs: Additional arguments passed to parent class.
528
+ """
374
529
  super().__init__(params, **kwargs)
375
530
  self._client = client
376
531
  self._params = params
377
532
  self._receive_audio_task = None
378
533
  self._receive_video_task = None
534
+ self._receive_screen_video_task = None
379
535
  self._image_requests = {}
380
536
 
381
537
  # Whether we have seen a StartFrame already.
382
538
  self._initialized = False
383
539
 
384
540
  async def process_frame(self, frame: Frame, direction: FrameDirection):
541
+ """Process incoming frames including user image requests.
542
+
543
+ Args:
544
+ frame: The frame to process.
545
+ direction: The direction of frame flow in the pipeline.
546
+ """
385
547
  await super().process_frame(frame, direction)
386
548
 
387
549
  if isinstance(frame, UserImageRequestFrame):
388
550
  await self.request_participant_image(frame)
389
551
 
390
552
  async def start(self, frame: StartFrame):
553
+ """Start the input transport and establish WebRTC connection.
554
+
555
+ Args:
556
+ frame: The start frame containing initialization parameters.
557
+ """
391
558
  await super().start(frame)
392
559
 
393
560
  if self._initialized:
@@ -397,13 +564,14 @@ class SmallWebRTCInputTransport(BaseInputTransport):
397
564
 
398
565
  await self._client.setup(self._params, frame)
399
566
  await self._client.connect()
567
+ await self.set_transport_ready(frame)
400
568
  if not self._receive_audio_task and self._params.audio_in_enabled:
401
569
  self._receive_audio_task = self.create_task(self._receive_audio())
402
570
  if not self._receive_video_task and self._params.video_in_enabled:
403
- self._receive_video_task = self.create_task(self._receive_video())
404
- await self.set_transport_ready(frame)
571
+ self._receive_video_task = self.create_task(self._receive_video(CAM_VIDEO_SOURCE))
405
572
 
406
573
  async def _stop_tasks(self):
574
+ """Stop all background tasks."""
407
575
  if self._receive_audio_task:
408
576
  await self.cancel_task(self._receive_audio_task)
409
577
  self._receive_audio_task = None
@@ -412,66 +580,88 @@ class SmallWebRTCInputTransport(BaseInputTransport):
412
580
  self._receive_video_task = None
413
581
 
414
582
  async def stop(self, frame: EndFrame):
583
+ """Stop the input transport and disconnect from WebRTC.
584
+
585
+ Args:
586
+ frame: The end frame signaling transport shutdown.
587
+ """
415
588
  await super().stop(frame)
416
589
  await self._stop_tasks()
417
590
  await self._client.disconnect()
418
591
 
419
592
  async def cancel(self, frame: CancelFrame):
593
+ """Cancel the input transport and disconnect immediately.
594
+
595
+ Args:
596
+ frame: The cancel frame signaling immediate cancellation.
597
+ """
420
598
  await super().cancel(frame)
421
599
  await self._stop_tasks()
422
600
  await self._client.disconnect()
423
601
 
424
602
  async def _receive_audio(self):
603
+ """Background task for receiving audio frames from WebRTC."""
425
604
  try:
426
605
  audio_iterator = self._client.read_audio_frame()
427
- async for audio_frame in WatchdogAsyncIterator(
428
- audio_iterator, manager=self.task_manager
429
- ):
606
+ async for audio_frame in audio_iterator:
430
607
  if audio_frame:
431
608
  await self.push_audio_frame(audio_frame)
432
609
 
433
610
  except Exception as e:
434
611
  logger.error(f"{self} exception receiving data: {e.__class__.__name__} ({e})")
435
612
 
436
- async def _receive_video(self):
613
+ async def _receive_video(self, video_source: str):
614
+ """Background task for receiving video frames from WebRTC.
615
+
616
+ Args:
617
+ video_source: Video source to capture ("camera" or "screenVideo").
618
+ """
437
619
  try:
438
- video_iterator = self._client.read_video_frame()
439
- async for video_frame in WatchdogAsyncIterator(
440
- video_iterator, manager=self.task_manager
441
- ):
620
+ video_iterator = self._client.read_video_frame(video_source)
621
+ async for video_frame in video_iterator:
442
622
  if video_frame:
443
623
  await self.push_video_frame(video_frame)
444
624
 
445
625
  # Check if there are any pending image requests and create UserImageRawFrame
446
626
  if self._image_requests:
447
627
  for req_id, request_frame in list(self._image_requests.items()):
448
- # Create UserImageRawFrame using the current video frame
449
- image_frame = UserImageRawFrame(
450
- user_id=request_frame.user_id,
451
- request=request_frame,
452
- image=video_frame.image,
453
- size=video_frame.size,
454
- format=video_frame.format,
455
- )
456
- # Push the frame to the pipeline
457
- await self.push_video_frame(image_frame)
458
- # Remove from pending requests
459
- del self._image_requests[req_id]
628
+ if request_frame.video_source == video_source:
629
+ # Create UserImageRawFrame using the current video frame
630
+ image_frame = UserImageRawFrame(
631
+ user_id=request_frame.user_id,
632
+ request=request_frame,
633
+ image=video_frame.image,
634
+ size=video_frame.size,
635
+ format=video_frame.format,
636
+ )
637
+ image_frame.transport_source = video_source
638
+ # Push the frame to the pipeline
639
+ await self.push_video_frame(image_frame)
640
+ # Remove from pending requests
641
+ del self._image_requests[req_id]
460
642
 
461
643
  except Exception as e:
462
644
  logger.error(f"{self} exception receiving data: {e.__class__.__name__} ({e})")
463
645
 
464
646
  async def push_app_message(self, message: Any):
647
+ """Push an application message into the pipeline.
648
+
649
+ Args:
650
+ message: The application message to process.
651
+ """
465
652
  logger.debug(f"Received app message inside SmallWebRTCInputTransport {message}")
466
653
  frame = TransportMessageUrgentFrame(message=message)
467
654
  await self.push_frame(frame)
468
655
 
469
656
  # Add this method similar to DailyInputTransport.request_participant_image
470
657
  async def request_participant_image(self, frame: UserImageRequestFrame):
471
- """Requests an image frame from the participant's video stream.
658
+ """Request an image frame from the participant's video stream.
472
659
 
473
660
  When a UserImageRequestFrame is received, this method will store the request
474
661
  and the next video frame received will be converted to a UserImageRawFrame.
662
+
663
+ Args:
664
+ frame: The user image request frame.
475
665
  """
476
666
  logger.debug(f"Requesting image from participant: {frame.user_id}")
477
667
 
@@ -479,19 +669,82 @@ class SmallWebRTCInputTransport(BaseInputTransport):
479
669
  request_id = f"{frame.function_name}:{frame.tool_call_id}"
480
670
  self._image_requests[request_id] = frame
481
671
 
672
+ # Default to camera if no source specified
673
+ if frame.video_source is None:
674
+ frame.video_source = CAM_VIDEO_SOURCE
482
675
  # If we're not already receiving video, try to get a frame now
483
- if not self._receive_video_task and self._params.video_in_enabled:
676
+ if (
677
+ frame.video_source == CAM_VIDEO_SOURCE
678
+ and not self._receive_video_task
679
+ and self._params.video_in_enabled
680
+ ):
484
681
  # Start video reception if it's not already running
485
- self._receive_video_task = self.create_task(self._receive_video())
682
+ self._receive_video_task = self.create_task(self._receive_video(CAM_VIDEO_SOURCE))
683
+ elif (
684
+ frame.video_source == SCREEN_VIDEO_SOURCE
685
+ and not self._receive_screen_video_task
686
+ and self._params.video_in_enabled
687
+ ):
688
+ # Start screen video reception if it's not already running
689
+ self._receive_screen_video_task = self.create_task(
690
+ self._receive_video(SCREEN_VIDEO_SOURCE)
691
+ )
692
+
693
+ async def capture_participant_media(
694
+ self,
695
+ source: str = CAM_VIDEO_SOURCE,
696
+ ):
697
+ """Capture media from a specific participant.
698
+
699
+ Args:
700
+ source: Media source to capture from. ("camera", "microphone", or "screenVideo")
701
+ """
702
+ # If we're not already receiving video, try to get a frame now
703
+ if (
704
+ source == MIC_AUDIO_SOURCE
705
+ and not self._receive_audio_task
706
+ and self._params.audio_in_enabled
707
+ ):
708
+ # Start audio reception if it's not already running
709
+ self._receive_audio_task = self.create_task(self._receive_audio())
710
+ elif (
711
+ source == CAM_VIDEO_SOURCE
712
+ and not self._receive_video_task
713
+ and self._params.video_in_enabled
714
+ ):
715
+ # Start video reception if it's not already running
716
+ self._receive_video_task = self.create_task(self._receive_video(CAM_VIDEO_SOURCE))
717
+ elif (
718
+ source == SCREEN_VIDEO_SOURCE
719
+ and not self._receive_screen_video_task
720
+ and self._params.video_in_enabled
721
+ ):
722
+ # Start screen video reception if it's not already running
723
+ self._receive_screen_video_task = self.create_task(
724
+ self._receive_video(SCREEN_VIDEO_SOURCE)
725
+ )
486
726
 
487
727
 
488
728
  class SmallWebRTCOutputTransport(BaseOutputTransport):
729
+ """Output transport implementation for SmallWebRTC.
730
+
731
+ Handles outgoing audio and video streams to WebRTC peers,
732
+ including transport message sending.
733
+ """
734
+
489
735
  def __init__(
490
736
  self,
491
737
  client: SmallWebRTCClient,
492
738
  params: TransportParams,
493
739
  **kwargs,
494
740
  ):
741
+ """Initialize the WebRTC output transport.
742
+
743
+ Args:
744
+ client: The WebRTC client instance.
745
+ params: Transport configuration parameters.
746
+ **kwargs: Additional arguments passed to parent class.
747
+ """
495
748
  super().__init__(params, **kwargs)
496
749
  self._client = client
497
750
  self._params = params
@@ -500,6 +753,11 @@ class SmallWebRTCOutputTransport(BaseOutputTransport):
500
753
  self._initialized = False
501
754
 
502
755
  async def start(self, frame: StartFrame):
756
+ """Start the output transport and establish WebRTC connection.
757
+
758
+ Args:
759
+ frame: The start frame containing initialization parameters.
760
+ """
503
761
  await super().start(frame)
504
762
 
505
763
  if self._initialized:
@@ -512,24 +770,55 @@ class SmallWebRTCOutputTransport(BaseOutputTransport):
512
770
  await self.set_transport_ready(frame)
513
771
 
514
772
  async def stop(self, frame: EndFrame):
773
+ """Stop the output transport and disconnect from WebRTC.
774
+
775
+ Args:
776
+ frame: The end frame signaling transport shutdown.
777
+ """
515
778
  await super().stop(frame)
516
779
  await self._client.disconnect()
517
780
 
518
781
  async def cancel(self, frame: CancelFrame):
782
+ """Cancel the output transport and disconnect immediately.
783
+
784
+ Args:
785
+ frame: The cancel frame signaling immediate cancellation.
786
+ """
519
787
  await super().cancel(frame)
520
788
  await self._client.disconnect()
521
789
 
522
790
  async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame):
791
+ """Send a transport message through the WebRTC connection.
792
+
793
+ Args:
794
+ frame: The transport message frame to send.
795
+ """
523
796
  await self._client.send_message(frame)
524
797
 
525
798
  async def write_audio_frame(self, frame: OutputAudioRawFrame):
799
+ """Write an audio frame to the WebRTC connection.
800
+
801
+ Args:
802
+ frame: The output audio frame to transmit.
803
+ """
526
804
  await self._client.write_audio_frame(frame)
527
805
 
528
806
  async def write_video_frame(self, frame: OutputImageRawFrame):
807
+ """Write a video frame to the WebRTC connection.
808
+
809
+ Args:
810
+ frame: The output video frame to transmit.
811
+ """
529
812
  await self._client.write_video_frame(frame)
530
813
 
531
814
 
532
815
  class SmallWebRTCTransport(BaseTransport):
816
+ """WebRTC transport implementation for real-time communication.
817
+
818
+ Provides bidirectional audio and video streaming over WebRTC connections
819
+ with support for application messaging and connection event handling.
820
+ """
821
+
533
822
  def __init__(
534
823
  self,
535
824
  webrtc_connection: SmallWebRTCConnection,
@@ -537,6 +826,14 @@ class SmallWebRTCTransport(BaseTransport):
537
826
  input_name: Optional[str] = None,
538
827
  output_name: Optional[str] = None,
539
828
  ):
829
+ """Initialize the WebRTC transport.
830
+
831
+ Args:
832
+ webrtc_connection: The underlying WebRTC connection handler.
833
+ params: Transport configuration parameters.
834
+ input_name: Optional name for the input processor.
835
+ output_name: Optional name for the output processor.
836
+ """
540
837
  super().__init__(input_name=input_name, output_name=output_name)
541
838
  self._params = params
542
839
 
@@ -558,6 +855,11 @@ class SmallWebRTCTransport(BaseTransport):
558
855
  self._register_event_handler("on_client_disconnected")
559
856
 
560
857
  def input(self) -> SmallWebRTCInputTransport:
858
+ """Get the input transport processor.
859
+
860
+ Returns:
861
+ The input transport for handling incoming media streams.
862
+ """
561
863
  if not self._input:
562
864
  self._input = SmallWebRTCInputTransport(
563
865
  self._client, self._params, name=self._input_name
@@ -565,6 +867,11 @@ class SmallWebRTCTransport(BaseTransport):
565
867
  return self._input
566
868
 
567
869
  def output(self) -> SmallWebRTCOutputTransport:
870
+ """Get the output transport processor.
871
+
872
+ Returns:
873
+ The output transport for handling outgoing media streams.
874
+ """
568
875
  if not self._output:
569
876
  self._output = SmallWebRTCOutputTransport(
570
877
  self._client, self._params, name=self._input_name
@@ -572,20 +879,57 @@ class SmallWebRTCTransport(BaseTransport):
572
879
  return self._output
573
880
 
574
881
  async def send_image(self, frame: OutputImageRawFrame | SpriteFrame):
882
+ """Send an image frame through the transport.
883
+
884
+ Args:
885
+ frame: The image frame to send.
886
+ """
575
887
  if self._output:
576
888
  await self._output.queue_frame(frame, FrameDirection.DOWNSTREAM)
577
889
 
578
890
  async def send_audio(self, frame: OutputAudioRawFrame):
891
+ """Send an audio frame through the transport.
892
+
893
+ Args:
894
+ frame: The audio frame to send.
895
+ """
579
896
  if self._output:
580
897
  await self._output.queue_frame(frame, FrameDirection.DOWNSTREAM)
581
898
 
582
899
  async def _on_app_message(self, message: Any):
900
+ """Handle incoming application messages."""
583
901
  if self._input:
584
902
  await self._input.push_app_message(message)
585
903
  await self._call_event_handler("on_app_message", message)
586
904
 
587
905
  async def _on_client_connected(self, webrtc_connection):
906
+ """Handle client connection events."""
588
907
  await self._call_event_handler("on_client_connected", webrtc_connection)
589
908
 
590
909
  async def _on_client_disconnected(self, webrtc_connection):
910
+ """Handle client disconnection events."""
591
911
  await self._call_event_handler("on_client_disconnected", webrtc_connection)
912
+
913
+ async def capture_participant_video(
914
+ self,
915
+ video_source: str = CAM_VIDEO_SOURCE,
916
+ ):
917
+ """Capture video from a specific participant.
918
+
919
+ Args:
920
+ video_source: Video source to capture from ("camera" or "screenVideo").
921
+ """
922
+ if self._input:
923
+ await self._input.capture_participant_media(source=video_source)
924
+
925
+ async def capture_participant_audio(
926
+ self,
927
+ audio_source: str = MIC_AUDIO_SOURCE,
928
+ ):
929
+ """Capture audio from a specific participant.
930
+
931
+ Args:
932
+ audio_source: Audio source to capture from. (currently, "microphone" is the only supported option)
933
+ """
934
+ if self._input:
935
+ await self._input.capture_participant_media(source=audio_source)