dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show
  1. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
  2. dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
  3. pipecat/__init__.py +17 -0
  4. pipecat/adapters/base_llm_adapter.py +36 -1
  5. pipecat/adapters/schemas/direct_function.py +296 -0
  6. pipecat/adapters/schemas/function_schema.py +15 -6
  7. pipecat/adapters/schemas/tools_schema.py +55 -7
  8. pipecat/adapters/services/anthropic_adapter.py +22 -3
  9. pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
  10. pipecat/adapters/services/bedrock_adapter.py +22 -3
  11. pipecat/adapters/services/gemini_adapter.py +16 -3
  12. pipecat/adapters/services/open_ai_adapter.py +17 -2
  13. pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
  14. pipecat/audio/filters/base_audio_filter.py +30 -6
  15. pipecat/audio/filters/koala_filter.py +37 -2
  16. pipecat/audio/filters/krisp_filter.py +59 -6
  17. pipecat/audio/filters/noisereduce_filter.py +37 -0
  18. pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
  19. pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
  20. pipecat/audio/mixers/base_audio_mixer.py +30 -7
  21. pipecat/audio/mixers/soundfile_mixer.py +53 -6
  22. pipecat/audio/resamplers/base_audio_resampler.py +17 -9
  23. pipecat/audio/resamplers/resampy_resampler.py +26 -1
  24. pipecat/audio/resamplers/soxr_resampler.py +32 -1
  25. pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
  26. pipecat/audio/utils.py +194 -1
  27. pipecat/audio/vad/silero.py +60 -3
  28. pipecat/audio/vad/vad_analyzer.py +114 -30
  29. pipecat/clocks/base_clock.py +19 -0
  30. pipecat/clocks/system_clock.py +25 -0
  31. pipecat/extensions/voicemail/__init__.py +0 -0
  32. pipecat/extensions/voicemail/voicemail_detector.py +707 -0
  33. pipecat/frames/frames.py +590 -156
  34. pipecat/metrics/metrics.py +64 -1
  35. pipecat/observers/base_observer.py +58 -19
  36. pipecat/observers/loggers/debug_log_observer.py +56 -64
  37. pipecat/observers/loggers/llm_log_observer.py +8 -1
  38. pipecat/observers/loggers/transcription_log_observer.py +19 -7
  39. pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
  40. pipecat/observers/turn_tracking_observer.py +26 -1
  41. pipecat/pipeline/base_pipeline.py +5 -7
  42. pipecat/pipeline/base_task.py +52 -9
  43. pipecat/pipeline/parallel_pipeline.py +121 -177
  44. pipecat/pipeline/pipeline.py +129 -20
  45. pipecat/pipeline/runner.py +50 -1
  46. pipecat/pipeline/sync_parallel_pipeline.py +132 -32
  47. pipecat/pipeline/task.py +263 -280
  48. pipecat/pipeline/task_observer.py +85 -34
  49. pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
  50. pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
  51. pipecat/processors/aggregators/gated.py +25 -24
  52. pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
  53. pipecat/processors/aggregators/llm_response.py +398 -89
  54. pipecat/processors/aggregators/openai_llm_context.py +161 -13
  55. pipecat/processors/aggregators/sentence.py +25 -14
  56. pipecat/processors/aggregators/user_response.py +28 -3
  57. pipecat/processors/aggregators/vision_image_frame.py +24 -14
  58. pipecat/processors/async_generator.py +28 -0
  59. pipecat/processors/audio/audio_buffer_processor.py +78 -37
  60. pipecat/processors/consumer_processor.py +25 -6
  61. pipecat/processors/filters/frame_filter.py +23 -0
  62. pipecat/processors/filters/function_filter.py +30 -0
  63. pipecat/processors/filters/identity_filter.py +17 -2
  64. pipecat/processors/filters/null_filter.py +24 -1
  65. pipecat/processors/filters/stt_mute_filter.py +56 -21
  66. pipecat/processors/filters/wake_check_filter.py +46 -3
  67. pipecat/processors/filters/wake_notifier_filter.py +21 -3
  68. pipecat/processors/frame_processor.py +488 -131
  69. pipecat/processors/frameworks/langchain.py +38 -3
  70. pipecat/processors/frameworks/rtvi.py +719 -34
  71. pipecat/processors/gstreamer/pipeline_source.py +41 -0
  72. pipecat/processors/idle_frame_processor.py +26 -3
  73. pipecat/processors/logger.py +23 -0
  74. pipecat/processors/metrics/frame_processor_metrics.py +77 -4
  75. pipecat/processors/metrics/sentry.py +42 -4
  76. pipecat/processors/producer_processor.py +34 -14
  77. pipecat/processors/text_transformer.py +22 -10
  78. pipecat/processors/transcript_processor.py +48 -29
  79. pipecat/processors/user_idle_processor.py +31 -21
  80. pipecat/runner/__init__.py +1 -0
  81. pipecat/runner/daily.py +132 -0
  82. pipecat/runner/livekit.py +148 -0
  83. pipecat/runner/run.py +543 -0
  84. pipecat/runner/types.py +67 -0
  85. pipecat/runner/utils.py +515 -0
  86. pipecat/serializers/base_serializer.py +42 -0
  87. pipecat/serializers/exotel.py +17 -6
  88. pipecat/serializers/genesys.py +95 -0
  89. pipecat/serializers/livekit.py +33 -0
  90. pipecat/serializers/plivo.py +16 -15
  91. pipecat/serializers/protobuf.py +37 -1
  92. pipecat/serializers/telnyx.py +18 -17
  93. pipecat/serializers/twilio.py +32 -16
  94. pipecat/services/ai_service.py +5 -3
  95. pipecat/services/anthropic/llm.py +113 -43
  96. pipecat/services/assemblyai/models.py +63 -5
  97. pipecat/services/assemblyai/stt.py +64 -11
  98. pipecat/services/asyncai/__init__.py +0 -0
  99. pipecat/services/asyncai/tts.py +501 -0
  100. pipecat/services/aws/llm.py +185 -111
  101. pipecat/services/aws/stt.py +217 -23
  102. pipecat/services/aws/tts.py +118 -52
  103. pipecat/services/aws/utils.py +101 -5
  104. pipecat/services/aws_nova_sonic/aws.py +82 -64
  105. pipecat/services/aws_nova_sonic/context.py +15 -6
  106. pipecat/services/azure/common.py +10 -2
  107. pipecat/services/azure/image.py +32 -0
  108. pipecat/services/azure/llm.py +9 -7
  109. pipecat/services/azure/stt.py +65 -2
  110. pipecat/services/azure/tts.py +154 -23
  111. pipecat/services/cartesia/stt.py +125 -8
  112. pipecat/services/cartesia/tts.py +102 -38
  113. pipecat/services/cerebras/llm.py +15 -23
  114. pipecat/services/deepgram/stt.py +19 -11
  115. pipecat/services/deepgram/tts.py +36 -0
  116. pipecat/services/deepseek/llm.py +14 -23
  117. pipecat/services/elevenlabs/tts.py +330 -64
  118. pipecat/services/fal/image.py +43 -0
  119. pipecat/services/fal/stt.py +48 -10
  120. pipecat/services/fireworks/llm.py +14 -21
  121. pipecat/services/fish/tts.py +109 -9
  122. pipecat/services/gemini_multimodal_live/__init__.py +1 -0
  123. pipecat/services/gemini_multimodal_live/events.py +83 -2
  124. pipecat/services/gemini_multimodal_live/file_api.py +189 -0
  125. pipecat/services/gemini_multimodal_live/gemini.py +218 -21
  126. pipecat/services/gladia/config.py +17 -10
  127. pipecat/services/gladia/stt.py +82 -36
  128. pipecat/services/google/frames.py +40 -0
  129. pipecat/services/google/google.py +2 -0
  130. pipecat/services/google/image.py +39 -2
  131. pipecat/services/google/llm.py +176 -58
  132. pipecat/services/google/llm_openai.py +26 -4
  133. pipecat/services/google/llm_vertex.py +37 -15
  134. pipecat/services/google/rtvi.py +41 -0
  135. pipecat/services/google/stt.py +65 -17
  136. pipecat/services/google/test-google-chirp.py +45 -0
  137. pipecat/services/google/tts.py +390 -19
  138. pipecat/services/grok/llm.py +8 -6
  139. pipecat/services/groq/llm.py +8 -6
  140. pipecat/services/groq/stt.py +13 -9
  141. pipecat/services/groq/tts.py +40 -0
  142. pipecat/services/hamsa/__init__.py +9 -0
  143. pipecat/services/hamsa/stt.py +241 -0
  144. pipecat/services/heygen/__init__.py +5 -0
  145. pipecat/services/heygen/api.py +281 -0
  146. pipecat/services/heygen/client.py +620 -0
  147. pipecat/services/heygen/video.py +338 -0
  148. pipecat/services/image_service.py +5 -3
  149. pipecat/services/inworld/__init__.py +1 -0
  150. pipecat/services/inworld/tts.py +592 -0
  151. pipecat/services/llm_service.py +127 -45
  152. pipecat/services/lmnt/tts.py +80 -7
  153. pipecat/services/mcp_service.py +85 -44
  154. pipecat/services/mem0/memory.py +42 -13
  155. pipecat/services/minimax/tts.py +74 -15
  156. pipecat/services/mistral/__init__.py +0 -0
  157. pipecat/services/mistral/llm.py +185 -0
  158. pipecat/services/moondream/vision.py +55 -10
  159. pipecat/services/neuphonic/tts.py +275 -48
  160. pipecat/services/nim/llm.py +8 -6
  161. pipecat/services/ollama/llm.py +27 -7
  162. pipecat/services/openai/base_llm.py +54 -16
  163. pipecat/services/openai/image.py +30 -0
  164. pipecat/services/openai/llm.py +7 -5
  165. pipecat/services/openai/stt.py +13 -9
  166. pipecat/services/openai/tts.py +42 -10
  167. pipecat/services/openai_realtime_beta/azure.py +11 -9
  168. pipecat/services/openai_realtime_beta/context.py +7 -5
  169. pipecat/services/openai_realtime_beta/events.py +10 -7
  170. pipecat/services/openai_realtime_beta/openai.py +37 -18
  171. pipecat/services/openpipe/llm.py +30 -24
  172. pipecat/services/openrouter/llm.py +9 -7
  173. pipecat/services/perplexity/llm.py +15 -19
  174. pipecat/services/piper/tts.py +26 -12
  175. pipecat/services/playht/tts.py +227 -65
  176. pipecat/services/qwen/llm.py +8 -6
  177. pipecat/services/rime/tts.py +128 -17
  178. pipecat/services/riva/stt.py +160 -22
  179. pipecat/services/riva/tts.py +67 -2
  180. pipecat/services/sambanova/llm.py +19 -17
  181. pipecat/services/sambanova/stt.py +14 -8
  182. pipecat/services/sarvam/tts.py +60 -13
  183. pipecat/services/simli/video.py +82 -21
  184. pipecat/services/soniox/__init__.py +0 -0
  185. pipecat/services/soniox/stt.py +398 -0
  186. pipecat/services/speechmatics/stt.py +29 -17
  187. pipecat/services/stt_service.py +47 -11
  188. pipecat/services/tavus/video.py +94 -25
  189. pipecat/services/together/llm.py +8 -6
  190. pipecat/services/tts_service.py +77 -53
  191. pipecat/services/ultravox/stt.py +46 -43
  192. pipecat/services/vision_service.py +5 -3
  193. pipecat/services/websocket_service.py +12 -11
  194. pipecat/services/whisper/base_stt.py +58 -12
  195. pipecat/services/whisper/stt.py +69 -58
  196. pipecat/services/xtts/tts.py +59 -2
  197. pipecat/sync/base_notifier.py +19 -0
  198. pipecat/sync/event_notifier.py +24 -0
  199. pipecat/tests/utils.py +73 -5
  200. pipecat/transcriptions/language.py +24 -0
  201. pipecat/transports/base_input.py +112 -8
  202. pipecat/transports/base_output.py +235 -13
  203. pipecat/transports/base_transport.py +119 -0
  204. pipecat/transports/local/audio.py +76 -0
  205. pipecat/transports/local/tk.py +84 -0
  206. pipecat/transports/network/fastapi_websocket.py +174 -15
  207. pipecat/transports/network/small_webrtc.py +383 -39
  208. pipecat/transports/network/webrtc_connection.py +214 -8
  209. pipecat/transports/network/websocket_client.py +171 -1
  210. pipecat/transports/network/websocket_server.py +147 -9
  211. pipecat/transports/services/daily.py +792 -70
  212. pipecat/transports/services/helpers/daily_rest.py +122 -129
  213. pipecat/transports/services/livekit.py +339 -4
  214. pipecat/transports/services/tavus.py +273 -38
  215. pipecat/utils/asyncio/task_manager.py +92 -186
  216. pipecat/utils/base_object.py +83 -1
  217. pipecat/utils/network.py +2 -0
  218. pipecat/utils/string.py +114 -58
  219. pipecat/utils/text/base_text_aggregator.py +44 -13
  220. pipecat/utils/text/base_text_filter.py +46 -0
  221. pipecat/utils/text/markdown_text_filter.py +70 -14
  222. pipecat/utils/text/pattern_pair_aggregator.py +18 -14
  223. pipecat/utils/text/simple_text_aggregator.py +43 -2
  224. pipecat/utils/text/skip_tags_aggregator.py +21 -13
  225. pipecat/utils/time.py +36 -0
  226. pipecat/utils/tracing/class_decorators.py +32 -7
  227. pipecat/utils/tracing/conversation_context_provider.py +12 -2
  228. pipecat/utils/tracing/service_attributes.py +80 -64
  229. pipecat/utils/tracing/service_decorators.py +48 -21
  230. pipecat/utils/tracing/setup.py +13 -7
  231. pipecat/utils/tracing/turn_context_provider.py +12 -2
  232. pipecat/utils/tracing/turn_trace_observer.py +27 -0
  233. pipecat/utils/utils.py +14 -14
  234. dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
  235. pipecat/examples/daily_runner.py +0 -64
  236. pipecat/examples/run.py +0 -265
  237. pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
  238. pipecat/utils/asyncio/watchdog_event.py +0 -42
  239. pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
  240. pipecat/utils/asyncio/watchdog_queue.py +0 -48
  241. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
  242. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
  243. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
  244. /pipecat/{examples → extensions}/__init__.py +0 -0
@@ -4,9 +4,14 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Base output transport implementation for Pipecat.
8
+
9
+ This module provides the BaseOutputTransport class which handles audio and video
10
+ output processing, including frame buffering, mixing, timing, and media streaming.
11
+ """
12
+
7
13
  import asyncio
8
14
  import itertools
9
- import sys
10
15
  import time
11
16
  from concurrent.futures import ThreadPoolExecutor
12
17
  from typing import Any, AsyncGenerator, Dict, List, Mapping, Optional
@@ -15,7 +20,7 @@ from loguru import logger
15
20
  from PIL import Image
16
21
 
17
22
  from pipecat.audio.mixers.base_audio_mixer import BaseAudioMixer
18
- from pipecat.audio.utils import create_default_resampler
23
+ from pipecat.audio.utils import create_stream_resampler, is_silence
19
24
  from pipecat.frames.frames import (
20
25
  BotSpeakingFrame,
21
26
  BotStartedSpeakingFrame,
@@ -28,6 +33,8 @@ from pipecat.frames.frames import (
28
33
  OutputDTMFFrame,
29
34
  OutputDTMFUrgentFrame,
30
35
  OutputImageRawFrame,
36
+ OutputTransportReadyFrame,
37
+ SpeechOutputAudioRawFrame,
31
38
  SpriteFrame,
32
39
  StartFrame,
33
40
  StartInterruptionFrame,
@@ -39,7 +46,6 @@ from pipecat.frames.frames import (
39
46
  )
40
47
  from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
41
48
  from pipecat.transports.base_transport import TransportParams
42
- from pipecat.utils.asyncio.watchdog_priority_queue import WatchdogPriorityQueue
43
49
  from pipecat.utils.time import nanoseconds_to_seconds
44
50
 
45
51
  # TODO: When we use GeminiMultimodalLiveLLMService, we need to change this to 0.35 but that creates issue for faster TTS.
@@ -47,7 +53,20 @@ BOT_VAD_STOP_SECS = 0.30
47
53
 
48
54
 
49
55
  class BaseOutputTransport(FrameProcessor):
56
+ """Base class for output transport implementations.
57
+
58
+ Handles audio and video output processing including frame buffering, audio mixing,
59
+ timing coordination, and media streaming. Supports multiple output destinations
60
+ and provides interruption handling for real-time communication.
61
+ """
62
+
50
63
  def __init__(self, params: TransportParams, **kwargs):
64
+ """Initialize the base output transport.
65
+
66
+ Args:
67
+ params: Transport configuration parameters.
68
+ **kwargs: Additional arguments passed to parent class.
69
+ """
51
70
  super().__init__(**kwargs)
52
71
 
53
72
  self._params = params
@@ -68,13 +87,28 @@ class BaseOutputTransport(FrameProcessor):
68
87
 
69
88
  @property
70
89
  def sample_rate(self) -> int:
90
+ """Get the current audio sample rate.
91
+
92
+ Returns:
93
+ The sample rate in Hz.
94
+ """
71
95
  return self._sample_rate
72
96
 
73
97
  @property
74
98
  def audio_chunk_size(self) -> int:
99
+ """Get the audio chunk size for output processing.
100
+
101
+ Returns:
102
+ The size of audio chunks in bytes.
103
+ """
75
104
  return self._audio_chunk_size
76
105
 
77
106
  async def start(self, frame: StartFrame):
107
+ """Start the output transport and initialize components.
108
+
109
+ Args:
110
+ frame: The start frame containing initialization parameters.
111
+ """
78
112
  self._sample_rate = self._params.audio_out_sample_rate or frame.audio_out_sample_rate
79
113
 
80
114
  # We will write 10ms*CHUNKS of audio at a time (where CHUNKS is the
@@ -84,15 +118,29 @@ class BaseOutputTransport(FrameProcessor):
84
118
  self._audio_chunk_size = audio_bytes_10ms * self._params.audio_out_10ms_chunks
85
119
 
86
120
  async def stop(self, frame: EndFrame):
121
+ """Stop the output transport and cleanup resources.
122
+
123
+ Args:
124
+ frame: The end frame signaling transport shutdown.
125
+ """
87
126
  for _, sender in self._media_senders.items():
88
127
  await sender.stop(frame)
89
128
 
90
129
  async def cancel(self, frame: CancelFrame):
130
+ """Cancel the output transport and stop all processing.
131
+
132
+ Args:
133
+ frame: The cancel frame signaling immediate cancellation.
134
+ """
91
135
  for _, sender in self._media_senders.items():
92
136
  await sender.cancel(frame)
93
137
 
94
138
  async def set_transport_ready(self, frame: StartFrame):
95
- """To be called when the transport is ready to stream."""
139
+ """Called when the transport is ready to stream.
140
+
141
+ Args:
142
+ frame: The start frame containing initialization parameters.
143
+ """
96
144
  # Register destinations.
97
145
  for destination in self._params.audio_out_destinations:
98
146
  await self.register_audio_destination(destination)
@@ -127,28 +175,71 @@ class BaseOutputTransport(FrameProcessor):
127
175
  )
128
176
  await self._media_senders[destination].start(frame)
129
177
 
178
+ # Sending a frame indicating that the output transport is ready and able to receive frames.
179
+ await self.push_frame(OutputTransportReadyFrame(), FrameDirection.UPSTREAM)
180
+
130
181
  async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame):
182
+ """Send a transport message.
183
+
184
+ Args:
185
+ frame: The transport message frame to send.
186
+ """
131
187
  pass
132
188
 
133
189
  async def register_video_destination(self, destination: str):
190
+ """Register a video output destination.
191
+
192
+ Args:
193
+ destination: The destination identifier to register.
194
+ """
134
195
  pass
135
196
 
136
197
  async def register_audio_destination(self, destination: str):
198
+ """Register an audio output destination.
199
+
200
+ Args:
201
+ destination: The destination identifier to register.
202
+ """
137
203
  pass
138
204
 
139
205
  async def write_video_frame(self, frame: OutputImageRawFrame):
206
+ """Write a video frame to the transport.
207
+
208
+ Args:
209
+ frame: The output video frame to write.
210
+ """
140
211
  pass
141
212
 
142
213
  async def write_audio_frame(self, frame: OutputAudioRawFrame):
214
+ """Write an audio frame to the transport.
215
+
216
+ Args:
217
+ frame: The output audio frame to write.
218
+ """
143
219
  pass
144
220
 
145
221
  async def write_dtmf(self, frame: OutputDTMFFrame | OutputDTMFUrgentFrame):
222
+ """Write a DTMF tone to the transport.
223
+
224
+ Args:
225
+ frame: The DTMF frame to write.
226
+ """
146
227
  pass
147
228
 
148
229
  async def send_audio(self, frame: OutputAudioRawFrame):
230
+ """Send an audio frame downstream.
231
+
232
+ Args:
233
+ frame: The audio frame to send.
234
+ """
149
235
  await self.queue_frame(frame, FrameDirection.DOWNSTREAM)
150
236
 
151
237
  async def send_image(self, frame: OutputImageRawFrame | SpriteFrame):
238
+ """Send an image frame downstream.
239
+
240
+ Args:
241
+ frame: The image frame to send.
242
+ """
152
243
  await self.queue_frame(frame, FrameDirection.DOWNSTREAM)
153
244
 
154
245
  #
@@ -156,6 +247,12 @@ class BaseOutputTransport(FrameProcessor):
156
247
  #
157
248
 
158
249
  async def process_frame(self, frame: Frame, direction: FrameDirection):
250
+ """Process incoming frames and handle transport-specific logic.
251
+
252
+ Args:
253
+ frame: The frame to process.
254
+ direction: The direction of frame flow in the pipeline.
255
+ """
159
256
  await super().process_frame(frame, direction)
160
257
 
161
258
  #
@@ -201,6 +298,7 @@ class BaseOutputTransport(FrameProcessor):
201
298
  await self._handle_frame(frame)
202
299
 
203
300
  async def _handle_frame(self, frame: Frame):
301
+ """Handle frames by routing them to appropriate media senders."""
204
302
  if frame.transport_destination not in self._media_senders:
205
303
  logger.warning(
206
304
  f"{self} destination [{frame.transport_destination}] not registered for frame {frame}"
@@ -227,6 +325,12 @@ class BaseOutputTransport(FrameProcessor):
227
325
  #
228
326
 
229
327
  class MediaSender:
328
+ """Handles media streaming for a specific destination.
329
+
330
+ Manages audio and video output processing including buffering, timing,
331
+ mixing, and frame delivery for a single output destination.
332
+ """
333
+
230
334
  def __init__(
231
335
  self,
232
336
  transport: "BaseOutputTransport",
@@ -236,6 +340,15 @@ class BaseOutputTransport(FrameProcessor):
236
340
  audio_chunk_size: int,
237
341
  params: TransportParams,
238
342
  ):
343
+ """Initialize the media sender.
344
+
345
+ Args:
346
+ transport: The parent transport instance.
347
+ destination: The destination identifier for this sender.
348
+ sample_rate: The audio sample rate in Hz.
349
+ audio_chunk_size: The size of audio chunks in bytes.
350
+ params: Transport configuration parameters.
351
+ """
239
352
  self._transport = transport
240
353
  self._destination = destination
241
354
  self._sample_rate = sample_rate
@@ -249,7 +362,7 @@ class BaseOutputTransport(FrameProcessor):
249
362
  self._audio_buffer = bytearray()
250
363
 
251
364
  # This will be used to resample incoming audio to the output sample rate.
252
- self._resampler = create_default_resampler()
365
+ self._resampler = create_stream_resampler()
253
366
 
254
367
  # The user can provide a single mixer, to be used by the default
255
368
  # destination, or a destination/mixer mapping.
@@ -267,13 +380,28 @@ class BaseOutputTransport(FrameProcessor):
267
380
 
268
381
  @property
269
382
  def sample_rate(self) -> int:
383
+ """Get the audio sample rate.
384
+
385
+ Returns:
386
+ The sample rate in Hz.
387
+ """
270
388
  return self._sample_rate
271
389
 
272
390
  @property
273
391
  def audio_chunk_size(self) -> int:
392
+ """Get the audio chunk size.
393
+
394
+ Returns:
395
+ The size of audio chunks in bytes.
396
+ """
274
397
  return self._audio_chunk_size
275
398
 
276
399
  async def start(self, frame: StartFrame):
400
+ """Start the media sender and initialize components.
401
+
402
+ Args:
403
+ frame: The start frame containing initialization parameters.
404
+ """
277
405
  self._audio_buffer = bytearray()
278
406
 
279
407
  # Create all tasks.
@@ -294,8 +422,13 @@ class BaseOutputTransport(FrameProcessor):
294
422
  await self._mixer.start(self._sample_rate)
295
423
 
296
424
  async def stop(self, frame: EndFrame):
425
+ """Stop the media sender and cleanup resources.
426
+
427
+ Args:
428
+ frame: The end frame signaling sender shutdown.
429
+ """
297
430
  # Let the sink tasks process the queue until they reach this EndFrame.
298
- await self._clock_queue.put((sys.maxsize, frame.id, frame))
431
+ await self._clock_queue.put((float("inf"), frame.id, frame))
299
432
  await self._audio_queue.put(frame)
300
433
 
301
434
  # At this point we have enqueued an EndFrame and we need to wait for
@@ -303,9 +436,9 @@ class BaseOutputTransport(FrameProcessor):
303
436
  # also need to wait for these tasks before cancelling the video task
304
437
  # because it might be still rendering.
305
438
  if self._audio_task:
306
- await self._transport.wait_for_task(self._audio_task)
439
+ await self._audio_task
307
440
  if self._clock_task:
308
- await self._transport.wait_for_task(self._clock_task)
441
+ await self._clock_task
309
442
 
310
443
  # Stop audio mixer.
311
444
  if self._mixer:
@@ -315,12 +448,22 @@ class BaseOutputTransport(FrameProcessor):
315
448
  await self._cancel_video_task()
316
449
 
317
450
  async def cancel(self, frame: CancelFrame):
451
+ """Cancel the media sender and stop all processing.
452
+
453
+ Args:
454
+ frame: The cancel frame signaling immediate cancellation.
455
+ """
318
456
  # Since we are cancelling everything it doesn't matter what task we cancel first.
319
457
  await self._cancel_audio_task()
320
458
  await self._cancel_clock_task()
321
459
  await self._cancel_video_task()
322
460
 
323
461
  async def handle_interruptions(self, _: StartInterruptionFrame):
462
+ """Handle interruption events by restarting tasks and clearing buffers.
463
+
464
+ Args:
465
+ _: The start interruption frame (unused).
466
+ """
324
467
  if not self._transport.interruptions_allowed:
325
468
  return
326
469
 
@@ -336,6 +479,11 @@ class BaseOutputTransport(FrameProcessor):
336
479
  await self._bot_stopped_speaking()
337
480
 
338
481
  async def handle_audio_frame(self, frame: OutputAudioRawFrame):
482
+ """Handle incoming audio frames by buffering and chunking.
483
+
484
+ Args:
485
+ frame: The output audio frame to handle.
486
+ """
339
487
  if not self._params.audio_out_enabled:
340
488
  return
341
489
 
@@ -358,6 +506,11 @@ class BaseOutputTransport(FrameProcessor):
358
506
  self._audio_buffer = self._audio_buffer[self._audio_chunk_size :]
359
507
 
360
508
  async def handle_image_frame(self, frame: OutputImageRawFrame | SpriteFrame):
509
+ """Handle incoming image frames for video output.
510
+
511
+ Args:
512
+ frame: The output image or sprite frame to handle.
513
+ """
361
514
  if not self._params.video_out_enabled:
362
515
  return
363
516
 
@@ -369,12 +522,27 @@ class BaseOutputTransport(FrameProcessor):
369
522
  await self._set_video_images(frame.images)
370
523
 
371
524
  async def handle_timed_frame(self, frame: Frame):
525
+ """Handle frames with presentation timestamps.
526
+
527
+ Args:
528
+ frame: The frame with timing information to handle.
529
+ """
372
530
  await self._clock_queue.put((frame.pts, frame.id, frame))
373
531
 
374
532
  async def handle_sync_frame(self, frame: Frame):
533
+ """Handle frames that need synchronized processing.
534
+
535
+ Args:
536
+ frame: The frame to handle synchronously.
537
+ """
375
538
  await self._audio_queue.put(frame)
376
539
 
377
540
  async def handle_mixer_control_frame(self, frame: MixerControlFrame):
541
+ """Handle audio mixer control frames.
542
+
543
+ Args:
544
+ frame: The mixer control frame to handle.
545
+ """
378
546
  if self._mixer:
379
547
  await self._mixer.process_frame(frame)
380
548
 
@@ -383,16 +551,19 @@ class BaseOutputTransport(FrameProcessor):
383
551
  #
384
552
 
385
553
  def _create_audio_task(self):
554
+ """Create the audio processing task."""
386
555
  if not self._audio_task:
387
556
  self._audio_queue = asyncio.Queue()
388
557
  self._audio_task = self._transport.create_task(self._audio_task_handler())
389
558
 
390
559
  async def _cancel_audio_task(self):
560
+ """Cancel and cleanup the audio processing task."""
391
561
  if self._audio_task:
392
562
  await self._transport.cancel_task(self._audio_task)
393
563
  self._audio_task = None
394
564
 
395
565
  async def _bot_started_speaking(self):
566
+ """Handle bot started speaking event."""
396
567
  if not self._bot_speaking:
397
568
  self._transport.logger.debug(
398
569
  f"Bot{f' [{self._destination}]' if self._destination else ''} started speaking"
@@ -408,6 +579,7 @@ class BaseOutputTransport(FrameProcessor):
408
579
  self._bot_speaking = True
409
580
 
410
581
  async def _bot_stopped_speaking(self):
582
+ """Handle bot stopped speaking event."""
411
583
  if self._bot_speaking:
412
584
  self._transport.logger.debug(
413
585
  f"Bot{f' [{self._destination}]' if self._destination else ''} stopped speaking"
@@ -427,6 +599,11 @@ class BaseOutputTransport(FrameProcessor):
427
599
  self._audio_buffer = bytearray()
428
600
 
429
601
  async def _handle_frame(self, frame: Frame):
602
+ """Handle various frame types with appropriate processing.
603
+
604
+ Args:
605
+ frame: The frame to handle.
606
+ """
430
607
  if isinstance(frame, OutputImageRawFrame):
431
608
  await self._set_video_image(frame)
432
609
  elif isinstance(frame, SpriteFrame):
@@ -437,16 +614,20 @@ class BaseOutputTransport(FrameProcessor):
437
614
  await self._transport.write_dtmf(frame)
438
615
 
439
616
  def _next_frame(self) -> AsyncGenerator[Frame, None]:
617
+ """Generate the next frame for audio processing.
618
+
619
+ Returns:
620
+ An async generator yielding frames for processing.
621
+ """
622
+
440
623
  async def without_mixer(vad_stop_secs: float) -> AsyncGenerator[Frame, None]:
441
624
  while True:
442
625
  try:
443
626
  frame = await asyncio.wait_for(
444
627
  self._audio_queue.get(), timeout=vad_stop_secs
445
628
  )
446
- self._transport.reset_watchdog()
447
629
  yield frame
448
630
  except asyncio.TimeoutError:
449
- self._transport.reset_watchdog()
450
631
  # Notify the bot stopped speaking upstream if necessary.
451
632
  await self._bot_stopped_speaking()
452
633
 
@@ -456,13 +637,11 @@ class BaseOutputTransport(FrameProcessor):
456
637
  while True:
457
638
  try:
458
639
  frame = self._audio_queue.get_nowait()
459
- self._transport.reset_watchdog()
460
640
  if isinstance(frame, OutputAudioRawFrame):
461
641
  frame.audio = await self._mixer.mix(frame.audio)
462
642
  last_frame_time = time.time()
463
643
  yield frame
464
644
  except asyncio.QueueEmpty:
465
- self._transport.reset_watchdog()
466
645
  # Notify the bot stopped speaking upstream if necessary.
467
646
  diff_time = time.time() - last_frame_time
468
647
  if diff_time > vad_stop_secs:
@@ -474,6 +653,11 @@ class BaseOutputTransport(FrameProcessor):
474
653
  num_channels=self._params.audio_out_channels,
475
654
  )
476
655
  yield frame
656
+ # Allow other asyncio tasks to execute by adding a small sleep
657
+ # Without this sleep, in task cancellation scenarios, this loop would
658
+ # continuously return without any delay, leading to 100% CPU utilization
659
+ # and preventing cancel/stop signals from being processed properly
660
+ await asyncio.sleep(0)
477
661
 
478
662
  if self._mixer:
479
663
  return with_mixer(BOT_VAD_STOP_SECS)
@@ -481,16 +665,31 @@ class BaseOutputTransport(FrameProcessor):
481
665
  return without_mixer(BOT_VAD_STOP_SECS)
482
666
 
483
667
  async def _audio_task_handler(self):
668
+ """Main audio processing task handler."""
484
669
  # Push a BotSpeakingFrame every 200ms, we don't really need to push it
485
670
  # at every audio chunk. If the audio chunk is bigger than 200ms, push at
486
671
  # every audio chunk.
487
672
  TOTAL_CHUNK_MS = self._params.audio_out_10ms_chunks * 10
488
673
  BOT_SPEAKING_CHUNK_PERIOD = max(int(200 / TOTAL_CHUNK_MS), 1)
489
674
  bot_speaking_counter = 0
675
+ speech_last_speaking_time = 0
676
+
490
677
  async for frame in self._next_frame():
491
678
  # Notify the bot started speaking upstream if necessary and that
492
679
  # it's actually speaking.
680
+ is_speaking = False
493
681
  if isinstance(frame, TTSAudioRawFrame):
682
+ is_speaking = True
683
+ elif isinstance(frame, SpeechOutputAudioRawFrame):
684
+ if not is_silence(frame.audio):
685
+ is_speaking = True
686
+ speech_last_speaking_time = time.time()
687
+ else:
688
+ silence_duration = time.time() - speech_last_speaking_time
689
+ if silence_duration > BOT_VAD_STOP_SECS:
690
+ await self._bot_stopped_speaking()
691
+
692
+ if is_speaking:
494
693
  await self._bot_started_speaking()
495
694
  if bot_speaking_counter % BOT_SPEAKING_CHUNK_PERIOD == 0:
496
695
  await self._transport.push_frame(BotSpeakingFrame())
@@ -519,23 +718,36 @@ class BaseOutputTransport(FrameProcessor):
519
718
  #
520
719
 
521
720
  def _create_video_task(self):
721
+ """Create the video processing task if video output is enabled."""
522
722
  if not self._video_task and self._params.video_out_enabled:
523
723
  self._video_queue = asyncio.Queue()
524
724
  self._video_task = self._transport.create_task(self._video_task_handler())
525
725
 
526
726
  async def _cancel_video_task(self):
727
+ """Cancel and cleanup the video processing task."""
527
728
  # Stop video output task.
528
729
  if self._video_task:
529
730
  await self._transport.cancel_task(self._video_task)
530
731
  self._video_task = None
531
732
 
532
733
  async def _set_video_image(self, image: OutputImageRawFrame):
734
+ """Set a single video image for cycling output.
735
+
736
+ Args:
737
+ image: The image frame to cycle for video output.
738
+ """
533
739
  self._video_images = itertools.cycle([image])
534
740
 
535
741
  async def _set_video_images(self, images: List[OutputImageRawFrame]):
742
+ """Set multiple video images for cycling output.
743
+
744
+ Args:
745
+ images: The list of image frames to cycle for video output.
746
+ """
536
747
  self._video_images = itertools.cycle(images)
537
748
 
538
749
  async def _video_task_handler(self):
750
+ """Main video processing task handler."""
539
751
  self._video_start_time = None
540
752
  self._video_frame_index = 0
541
753
  self._video_frame_duration = 1 / self._params.video_out_framerate
@@ -551,6 +763,7 @@ class BaseOutputTransport(FrameProcessor):
551
763
  await asyncio.sleep(self._video_frame_duration)
552
764
 
553
765
  async def _video_is_live_handler(self):
766
+ """Handle live video streaming with frame timing."""
554
767
  image = await self._video_queue.get()
555
768
 
556
769
  # We get the start time as soon as we get the first image.
@@ -576,6 +789,12 @@ class BaseOutputTransport(FrameProcessor):
576
789
  self._video_queue.task_done()
577
790
 
578
791
  async def _draw_image(self, frame: OutputImageRawFrame):
792
+ """Draw/render an image frame with resizing if needed.
793
+
794
+ Args:
795
+ frame: The image frame to draw.
796
+ """
797
+
579
798
  def resize_frame(frame: OutputImageRawFrame) -> OutputImageRawFrame:
580
799
  desired_size = (self._params.video_out_width, self._params.video_out_height)
581
800
 
@@ -602,16 +821,19 @@ class BaseOutputTransport(FrameProcessor):
602
821
  #
603
822
 
604
823
  def _create_clock_task(self):
824
+ """Create the clock/timing processing task."""
605
825
  if not self._clock_task:
606
- self._clock_queue = WatchdogPriorityQueue(self._transport.task_manager)
826
+ self._clock_queue = asyncio.PriorityQueue()
607
827
  self._clock_task = self._transport.create_task(self._clock_task_handler())
608
828
 
609
829
  async def _cancel_clock_task(self):
830
+ """Cancel and cleanup the clock processing task."""
610
831
  if self._clock_task:
611
832
  await self._transport.cancel_task(self._clock_task)
612
833
  self._clock_task = None
613
834
 
614
835
  async def _clock_task_handler(self):
836
+ """Main clock/timing task handler for timed frame delivery."""
615
837
  running = True
616
838
  while running:
617
839
  timestamp, _, frame = await self._clock_queue.get()