PyPI - dv-pipecat-ai - Versions diffs - 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show

{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
pipecat/__init__.py +17 -0
pipecat/adapters/base_llm_adapter.py +36 -1
pipecat/adapters/schemas/direct_function.py +296 -0
pipecat/adapters/schemas/function_schema.py +15 -6
pipecat/adapters/schemas/tools_schema.py +55 -7
pipecat/adapters/services/anthropic_adapter.py +22 -3
pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
pipecat/adapters/services/bedrock_adapter.py +22 -3
pipecat/adapters/services/gemini_adapter.py +16 -3
pipecat/adapters/services/open_ai_adapter.py +17 -2
pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
pipecat/audio/filters/base_audio_filter.py +30 -6
pipecat/audio/filters/koala_filter.py +37 -2
pipecat/audio/filters/krisp_filter.py +59 -6
pipecat/audio/filters/noisereduce_filter.py +37 -0
pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
pipecat/audio/mixers/base_audio_mixer.py +30 -7
pipecat/audio/mixers/soundfile_mixer.py +53 -6
pipecat/audio/resamplers/base_audio_resampler.py +17 -9
pipecat/audio/resamplers/resampy_resampler.py +26 -1
pipecat/audio/resamplers/soxr_resampler.py +32 -1
pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
pipecat/audio/utils.py +194 -1
pipecat/audio/vad/silero.py +60 -3
pipecat/audio/vad/vad_analyzer.py +114 -30
pipecat/clocks/base_clock.py +19 -0
pipecat/clocks/system_clock.py +25 -0
pipecat/extensions/voicemail/__init__.py +0 -0
pipecat/extensions/voicemail/voicemail_detector.py +707 -0
pipecat/frames/frames.py +590 -156
pipecat/metrics/metrics.py +64 -1
pipecat/observers/base_observer.py +58 -19
pipecat/observers/loggers/debug_log_observer.py +56 -64
pipecat/observers/loggers/llm_log_observer.py +8 -1
pipecat/observers/loggers/transcription_log_observer.py +19 -7
pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
pipecat/observers/turn_tracking_observer.py +26 -1
pipecat/pipeline/base_pipeline.py +5 -7
pipecat/pipeline/base_task.py +52 -9
pipecat/pipeline/parallel_pipeline.py +121 -177
pipecat/pipeline/pipeline.py +129 -20
pipecat/pipeline/runner.py +50 -1
pipecat/pipeline/sync_parallel_pipeline.py +132 -32
pipecat/pipeline/task.py +263 -280
pipecat/pipeline/task_observer.py +85 -34
pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
pipecat/processors/aggregators/gated.py +25 -24
pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
pipecat/processors/aggregators/llm_response.py +398 -89
pipecat/processors/aggregators/openai_llm_context.py +161 -13
pipecat/processors/aggregators/sentence.py +25 -14
pipecat/processors/aggregators/user_response.py +28 -3
pipecat/processors/aggregators/vision_image_frame.py +24 -14
pipecat/processors/async_generator.py +28 -0
pipecat/processors/audio/audio_buffer_processor.py +78 -37
pipecat/processors/consumer_processor.py +25 -6
pipecat/processors/filters/frame_filter.py +23 -0
pipecat/processors/filters/function_filter.py +30 -0
pipecat/processors/filters/identity_filter.py +17 -2
pipecat/processors/filters/null_filter.py +24 -1
pipecat/processors/filters/stt_mute_filter.py +56 -21
pipecat/processors/filters/wake_check_filter.py +46 -3
pipecat/processors/filters/wake_notifier_filter.py +21 -3
pipecat/processors/frame_processor.py +488 -131
pipecat/processors/frameworks/langchain.py +38 -3
pipecat/processors/frameworks/rtvi.py +719 -34
pipecat/processors/gstreamer/pipeline_source.py +41 -0
pipecat/processors/idle_frame_processor.py +26 -3
pipecat/processors/logger.py +23 -0
pipecat/processors/metrics/frame_processor_metrics.py +77 -4
pipecat/processors/metrics/sentry.py +42 -4
pipecat/processors/producer_processor.py +34 -14
pipecat/processors/text_transformer.py +22 -10
pipecat/processors/transcript_processor.py +48 -29
pipecat/processors/user_idle_processor.py +31 -21
pipecat/runner/__init__.py +1 -0
pipecat/runner/daily.py +132 -0
pipecat/runner/livekit.py +148 -0
pipecat/runner/run.py +543 -0
pipecat/runner/types.py +67 -0
pipecat/runner/utils.py +515 -0
pipecat/serializers/base_serializer.py +42 -0
pipecat/serializers/exotel.py +17 -6
pipecat/serializers/genesys.py +95 -0
pipecat/serializers/livekit.py +33 -0
pipecat/serializers/plivo.py +16 -15
pipecat/serializers/protobuf.py +37 -1
pipecat/serializers/telnyx.py +18 -17
pipecat/serializers/twilio.py +32 -16
pipecat/services/ai_service.py +5 -3
pipecat/services/anthropic/llm.py +113 -43
pipecat/services/assemblyai/models.py +63 -5
pipecat/services/assemblyai/stt.py +64 -11
pipecat/services/asyncai/__init__.py +0 -0
pipecat/services/asyncai/tts.py +501 -0
pipecat/services/aws/llm.py +185 -111
pipecat/services/aws/stt.py +217 -23
pipecat/services/aws/tts.py +118 -52
pipecat/services/aws/utils.py +101 -5
pipecat/services/aws_nova_sonic/aws.py +82 -64
pipecat/services/aws_nova_sonic/context.py +15 -6
pipecat/services/azure/common.py +10 -2
pipecat/services/azure/image.py +32 -0
pipecat/services/azure/llm.py +9 -7
pipecat/services/azure/stt.py +65 -2
pipecat/services/azure/tts.py +154 -23
pipecat/services/cartesia/stt.py +125 -8
pipecat/services/cartesia/tts.py +102 -38
pipecat/services/cerebras/llm.py +15 -23
pipecat/services/deepgram/stt.py +19 -11
pipecat/services/deepgram/tts.py +36 -0
pipecat/services/deepseek/llm.py +14 -23
pipecat/services/elevenlabs/tts.py +330 -64
pipecat/services/fal/image.py +43 -0
pipecat/services/fal/stt.py +48 -10
pipecat/services/fireworks/llm.py +14 -21
pipecat/services/fish/tts.py +109 -9
pipecat/services/gemini_multimodal_live/__init__.py +1 -0
pipecat/services/gemini_multimodal_live/events.py +83 -2
pipecat/services/gemini_multimodal_live/file_api.py +189 -0
pipecat/services/gemini_multimodal_live/gemini.py +218 -21
pipecat/services/gladia/config.py +17 -10
pipecat/services/gladia/stt.py +82 -36
pipecat/services/google/frames.py +40 -0
pipecat/services/google/google.py +2 -0
pipecat/services/google/image.py +39 -2
pipecat/services/google/llm.py +176 -58
pipecat/services/google/llm_openai.py +26 -4
pipecat/services/google/llm_vertex.py +37 -15
pipecat/services/google/rtvi.py +41 -0
pipecat/services/google/stt.py +65 -17
pipecat/services/google/test-google-chirp.py +45 -0
pipecat/services/google/tts.py +390 -19
pipecat/services/grok/llm.py +8 -6
pipecat/services/groq/llm.py +8 -6
pipecat/services/groq/stt.py +13 -9
pipecat/services/groq/tts.py +40 -0
pipecat/services/hamsa/__init__.py +9 -0
pipecat/services/hamsa/stt.py +241 -0
pipecat/services/heygen/__init__.py +5 -0
pipecat/services/heygen/api.py +281 -0
pipecat/services/heygen/client.py +620 -0
pipecat/services/heygen/video.py +338 -0
pipecat/services/image_service.py +5 -3
pipecat/services/inworld/__init__.py +1 -0
pipecat/services/inworld/tts.py +592 -0
pipecat/services/llm_service.py +127 -45
pipecat/services/lmnt/tts.py +80 -7
pipecat/services/mcp_service.py +85 -44
pipecat/services/mem0/memory.py +42 -13
pipecat/services/minimax/tts.py +74 -15
pipecat/services/mistral/__init__.py +0 -0
pipecat/services/mistral/llm.py +185 -0
pipecat/services/moondream/vision.py +55 -10
pipecat/services/neuphonic/tts.py +275 -48
pipecat/services/nim/llm.py +8 -6
pipecat/services/ollama/llm.py +27 -7
pipecat/services/openai/base_llm.py +54 -16
pipecat/services/openai/image.py +30 -0
pipecat/services/openai/llm.py +7 -5
pipecat/services/openai/stt.py +13 -9
pipecat/services/openai/tts.py +42 -10
pipecat/services/openai_realtime_beta/azure.py +11 -9
pipecat/services/openai_realtime_beta/context.py +7 -5
pipecat/services/openai_realtime_beta/events.py +10 -7
pipecat/services/openai_realtime_beta/openai.py +37 -18
pipecat/services/openpipe/llm.py +30 -24
pipecat/services/openrouter/llm.py +9 -7
pipecat/services/perplexity/llm.py +15 -19
pipecat/services/piper/tts.py +26 -12
pipecat/services/playht/tts.py +227 -65
pipecat/services/qwen/llm.py +8 -6
pipecat/services/rime/tts.py +128 -17
pipecat/services/riva/stt.py +160 -22
pipecat/services/riva/tts.py +67 -2
pipecat/services/sambanova/llm.py +19 -17
pipecat/services/sambanova/stt.py +14 -8
pipecat/services/sarvam/tts.py +60 -13
pipecat/services/simli/video.py +82 -21
pipecat/services/soniox/__init__.py +0 -0
pipecat/services/soniox/stt.py +398 -0
pipecat/services/speechmatics/stt.py +29 -17
pipecat/services/stt_service.py +47 -11
pipecat/services/tavus/video.py +94 -25
pipecat/services/together/llm.py +8 -6
pipecat/services/tts_service.py +77 -53
pipecat/services/ultravox/stt.py +46 -43
pipecat/services/vision_service.py +5 -3
pipecat/services/websocket_service.py +12 -11
pipecat/services/whisper/base_stt.py +58 -12
pipecat/services/whisper/stt.py +69 -58
pipecat/services/xtts/tts.py +59 -2
pipecat/sync/base_notifier.py +19 -0
pipecat/sync/event_notifier.py +24 -0
pipecat/tests/utils.py +73 -5
pipecat/transcriptions/language.py +24 -0
pipecat/transports/base_input.py +112 -8
pipecat/transports/base_output.py +235 -13
pipecat/transports/base_transport.py +119 -0
pipecat/transports/local/audio.py +76 -0
pipecat/transports/local/tk.py +84 -0
pipecat/transports/network/fastapi_websocket.py +174 -15
pipecat/transports/network/small_webrtc.py +383 -39
pipecat/transports/network/webrtc_connection.py +214 -8
pipecat/transports/network/websocket_client.py +171 -1
pipecat/transports/network/websocket_server.py +147 -9
pipecat/transports/services/daily.py +792 -70
pipecat/transports/services/helpers/daily_rest.py +122 -129
pipecat/transports/services/livekit.py +339 -4
pipecat/transports/services/tavus.py +273 -38
pipecat/utils/asyncio/task_manager.py +92 -186
pipecat/utils/base_object.py +83 -1
pipecat/utils/network.py +2 -0
pipecat/utils/string.py +114 -58
pipecat/utils/text/base_text_aggregator.py +44 -13
pipecat/utils/text/base_text_filter.py +46 -0
pipecat/utils/text/markdown_text_filter.py +70 -14
pipecat/utils/text/pattern_pair_aggregator.py +18 -14
pipecat/utils/text/simple_text_aggregator.py +43 -2
pipecat/utils/text/skip_tags_aggregator.py +21 -13
pipecat/utils/time.py +36 -0
pipecat/utils/tracing/class_decorators.py +32 -7
pipecat/utils/tracing/conversation_context_provider.py +12 -2
pipecat/utils/tracing/service_attributes.py +80 -64
pipecat/utils/tracing/service_decorators.py +48 -21
pipecat/utils/tracing/setup.py +13 -7
pipecat/utils/tracing/turn_context_provider.py +12 -2
pipecat/utils/tracing/turn_trace_observer.py +27 -0
pipecat/utils/utils.py +14 -14
dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
pipecat/examples/daily_runner.py +0 -64
pipecat/examples/run.py +0 -265
pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
pipecat/utils/asyncio/watchdog_event.py +0 -42
pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
pipecat/utils/asyncio/watchdog_queue.py +0 -48
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
/pipecat/{examples → extensions}/__init__.py +0 -0

pipecat/transports/base_output.py CHANGED Viewed

@@ -4,9 +4,14 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""Base output transport implementation for Pipecat.
+This module provides the BaseOutputTransport class which handles audio and video
+output processing, including frame buffering, mixing, timing, and media streaming.
+"""
 import asyncio
 import itertools
-import sys
 import time
 from concurrent.futures import ThreadPoolExecutor
 from typing import Any, AsyncGenerator, Dict, List, Mapping, Optional
@@ -15,7 +20,7 @@ from loguru import logger
 from PIL import Image
 from pipecat.audio.mixers.base_audio_mixer import BaseAudioMixer
-from pipecat.audio.utils import create_default_resampler
+from pipecat.audio.utils import create_stream_resampler, is_silence
 from pipecat.frames.frames import (
     BotSpeakingFrame,
     BotStartedSpeakingFrame,
@@ -28,6 +33,8 @@ from pipecat.frames.frames import (
     OutputDTMFFrame,
     OutputDTMFUrgentFrame,
     OutputImageRawFrame,
+    OutputTransportReadyFrame,
+    SpeechOutputAudioRawFrame,
     SpriteFrame,
     StartFrame,
     StartInterruptionFrame,
@@ -39,7 +46,6 @@ from pipecat.frames.frames import (
 )
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
 from pipecat.transports.base_transport import TransportParams
-from pipecat.utils.asyncio.watchdog_priority_queue import WatchdogPriorityQueue
 from pipecat.utils.time import nanoseconds_to_seconds
 # TODO: When we use GeminiMultimodalLiveLLMService, we need to change this to 0.35 but that creates issue for faster TTS.
@@ -47,7 +53,20 @@ BOT_VAD_STOP_SECS = 0.30
 class BaseOutputTransport(FrameProcessor):
+    """Base class for output transport implementations.
+    Handles audio and video output processing including frame buffering, audio mixing,
+    timing coordination, and media streaming. Supports multiple output destinations
+    and provides interruption handling for real-time communication.
+    """
     def __init__(self, params: TransportParams, **kwargs):
+        """Initialize the base output transport.
+        Args:
+            params: Transport configuration parameters.
+            **kwargs: Additional arguments passed to parent class.
+        """
         super().__init__(**kwargs)
         self._params = params
@@ -68,13 +87,28 @@ class BaseOutputTransport(FrameProcessor):
     @property
     def sample_rate(self) -> int:
+        """Get the current audio sample rate.
+        Returns:
+            The sample rate in Hz.
+        """
         return self._sample_rate
     @property
     def audio_chunk_size(self) -> int:
+        """Get the audio chunk size for output processing.
+        Returns:
+            The size of audio chunks in bytes.
+        """
         return self._audio_chunk_size
     async def start(self, frame: StartFrame):
+        """Start the output transport and initialize components.
+        Args:
+            frame: The start frame containing initialization parameters.
+        """
         self._sample_rate = self._params.audio_out_sample_rate or frame.audio_out_sample_rate
         # We will write 10ms*CHUNKS of audio at a time (where CHUNKS is the
@@ -84,15 +118,29 @@ class BaseOutputTransport(FrameProcessor):
         self._audio_chunk_size = audio_bytes_10ms * self._params.audio_out_10ms_chunks
     async def stop(self, frame: EndFrame):
+        """Stop the output transport and cleanup resources.
+        Args:
+            frame: The end frame signaling transport shutdown.
+        """
         for _, sender in self._media_senders.items():
             await sender.stop(frame)
     async def cancel(self, frame: CancelFrame):
+        """Cancel the output transport and stop all processing.
+        Args:
+            frame: The cancel frame signaling immediate cancellation.
+        """
         for _, sender in self._media_senders.items():
             await sender.cancel(frame)
     async def set_transport_ready(self, frame: StartFrame):
-        """To be called when the transport is ready to stream."""
+        """Called when the transport is ready to stream.
+        Args:
+            frame: The start frame containing initialization parameters.
+        """
         # Register destinations.
         for destination in self._params.audio_out_destinations:
             await self.register_audio_destination(destination)
@@ -127,28 +175,71 @@ class BaseOutputTransport(FrameProcessor):
             )
             await self._media_senders[destination].start(frame)
+        # Sending a frame indicating that the output transport is ready and able to receive frames.
+        await self.push_frame(OutputTransportReadyFrame(), FrameDirection.UPSTREAM)
     async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame):
+        """Send a transport message.
+        Args:
+            frame: The transport message frame to send.
+        """
         pass
     async def register_video_destination(self, destination: str):
+        """Register a video output destination.
+        Args:
+            destination: The destination identifier to register.
+        """
         pass
     async def register_audio_destination(self, destination: str):
+        """Register an audio output destination.
+        Args:
+            destination: The destination identifier to register.
+        """
         pass
     async def write_video_frame(self, frame: OutputImageRawFrame):
+        """Write a video frame to the transport.
+        Args:
+            frame: The output video frame to write.
+        """
         pass
     async def write_audio_frame(self, frame: OutputAudioRawFrame):
+        """Write an audio frame to the transport.
+        Args:
+            frame: The output audio frame to write.
+        """
         pass
     async def write_dtmf(self, frame: OutputDTMFFrame | OutputDTMFUrgentFrame):
+        """Write a DTMF tone to the transport.
+        Args:
+            frame: The DTMF frame to write.
+        """
         pass
     async def send_audio(self, frame: OutputAudioRawFrame):
+        """Send an audio frame downstream.
+        Args:
+            frame: The audio frame to send.
+        """
         await self.queue_frame(frame, FrameDirection.DOWNSTREAM)
     async def send_image(self, frame: OutputImageRawFrame | SpriteFrame):
+        """Send an image frame downstream.
+        Args:
+            frame: The image frame to send.
+        """
         await self.queue_frame(frame, FrameDirection.DOWNSTREAM)
     #
@@ -156,6 +247,12 @@ class BaseOutputTransport(FrameProcessor):
     #
     async def process_frame(self, frame: Frame, direction: FrameDirection):
+        """Process incoming frames and handle transport-specific logic.
+        Args:
+            frame: The frame to process.
+            direction: The direction of frame flow in the pipeline.
+        """
         await super().process_frame(frame, direction)
         #
@@ -201,6 +298,7 @@ class BaseOutputTransport(FrameProcessor):
             await self._handle_frame(frame)
     async def _handle_frame(self, frame: Frame):
+        """Handle frames by routing them to appropriate media senders."""
         if frame.transport_destination not in self._media_senders:
             logger.warning(
                 f"{self} destination [{frame.transport_destination}] not registered for frame {frame}"
@@ -227,6 +325,12 @@ class BaseOutputTransport(FrameProcessor):
     #
     class MediaSender:
+        """Handles media streaming for a specific destination.
+        Manages audio and video output processing including buffering, timing,
+        mixing, and frame delivery for a single output destination.
+        """
         def __init__(
             self,
             transport: "BaseOutputTransport",
@@ -236,6 +340,15 @@ class BaseOutputTransport(FrameProcessor):
             audio_chunk_size: int,
             params: TransportParams,
         ):
+            """Initialize the media sender.
+            Args:
+                transport: The parent transport instance.
+                destination: The destination identifier for this sender.
+                sample_rate: The audio sample rate in Hz.
+                audio_chunk_size: The size of audio chunks in bytes.
+                params: Transport configuration parameters.
+            """
             self._transport = transport
             self._destination = destination
             self._sample_rate = sample_rate
@@ -249,7 +362,7 @@ class BaseOutputTransport(FrameProcessor):
             self._audio_buffer = bytearray()
             # This will be used to resample incoming audio to the output sample rate.
-            self._resampler = create_default_resampler()
+            self._resampler = create_stream_resampler()
             # The user can provide a single mixer, to be used by the default
             # destination, or a destination/mixer mapping.
@@ -267,13 +380,28 @@ class BaseOutputTransport(FrameProcessor):
         @property
         def sample_rate(self) -> int:
+            """Get the audio sample rate.
+            Returns:
+                The sample rate in Hz.
+            """
             return self._sample_rate
         @property
         def audio_chunk_size(self) -> int:
+            """Get the audio chunk size.
+            Returns:
+                The size of audio chunks in bytes.
+            """
             return self._audio_chunk_size
         async def start(self, frame: StartFrame):
+            """Start the media sender and initialize components.
+            Args:
+                frame: The start frame containing initialization parameters.
+            """
             self._audio_buffer = bytearray()
             # Create all tasks.
@@ -294,8 +422,13 @@ class BaseOutputTransport(FrameProcessor):
                 await self._mixer.start(self._sample_rate)
         async def stop(self, frame: EndFrame):
+            """Stop the media sender and cleanup resources.
+            Args:
+                frame: The end frame signaling sender shutdown.
+            """
             # Let the sink tasks process the queue until they reach this EndFrame.
-            await self._clock_queue.put((sys.maxsize, frame.id, frame))
+            await self._clock_queue.put((float("inf"), frame.id, frame))
             await self._audio_queue.put(frame)
             # At this point we have enqueued an EndFrame and we need to wait for
@@ -303,9 +436,9 @@ class BaseOutputTransport(FrameProcessor):
             # also need to wait for these tasks before cancelling the video task
             # because it might be still rendering.
             if self._audio_task:
-                await self._transport.wait_for_task(self._audio_task)
+                await self._audio_task
             if self._clock_task:
-                await self._transport.wait_for_task(self._clock_task)
+                await self._clock_task
             # Stop audio mixer.
             if self._mixer:
@@ -315,12 +448,22 @@ class BaseOutputTransport(FrameProcessor):
             await self._cancel_video_task()
         async def cancel(self, frame: CancelFrame):
+            """Cancel the media sender and stop all processing.
+            Args:
+                frame: The cancel frame signaling immediate cancellation.
+            """
             # Since we are cancelling everything it doesn't matter what task we cancel first.
             await self._cancel_audio_task()
             await self._cancel_clock_task()
             await self._cancel_video_task()
         async def handle_interruptions(self, _: StartInterruptionFrame):
+            """Handle interruption events by restarting tasks and clearing buffers.
+            Args:
+                _: The start interruption frame (unused).
+            """
             if not self._transport.interruptions_allowed:
                 return
@@ -336,6 +479,11 @@ class BaseOutputTransport(FrameProcessor):
             await self._bot_stopped_speaking()
         async def handle_audio_frame(self, frame: OutputAudioRawFrame):
+            """Handle incoming audio frames by buffering and chunking.
+            Args:
+                frame: The output audio frame to handle.
+            """
             if not self._params.audio_out_enabled:
                 return
@@ -358,6 +506,11 @@ class BaseOutputTransport(FrameProcessor):
                 self._audio_buffer = self._audio_buffer[self._audio_chunk_size :]
         async def handle_image_frame(self, frame: OutputImageRawFrame | SpriteFrame):
+            """Handle incoming image frames for video output.
+            Args:
+                frame: The output image or sprite frame to handle.
+            """
             if not self._params.video_out_enabled:
                 return
@@ -369,12 +522,27 @@ class BaseOutputTransport(FrameProcessor):
                 await self._set_video_images(frame.images)
         async def handle_timed_frame(self, frame: Frame):
+            """Handle frames with presentation timestamps.
+            Args:
+                frame: The frame with timing information to handle.
+            """
             await self._clock_queue.put((frame.pts, frame.id, frame))
         async def handle_sync_frame(self, frame: Frame):
+            """Handle frames that need synchronized processing.
+            Args:
+                frame: The frame to handle synchronously.
+            """
             await self._audio_queue.put(frame)
         async def handle_mixer_control_frame(self, frame: MixerControlFrame):
+            """Handle audio mixer control frames.
+            Args:
+                frame: The mixer control frame to handle.
+            """
             if self._mixer:
                 await self._mixer.process_frame(frame)
@@ -383,16 +551,19 @@ class BaseOutputTransport(FrameProcessor):
         #
         def _create_audio_task(self):
+            """Create the audio processing task."""
             if not self._audio_task:
                 self._audio_queue = asyncio.Queue()
                 self._audio_task = self._transport.create_task(self._audio_task_handler())
         async def _cancel_audio_task(self):
+            """Cancel and cleanup the audio processing task."""
             if self._audio_task:
                 await self._transport.cancel_task(self._audio_task)
                 self._audio_task = None
         async def _bot_started_speaking(self):
+            """Handle bot started speaking event."""
             if not self._bot_speaking:
                 self._transport.logger.debug(
                     f"Bot{f' [{self._destination}]' if self._destination else ''} started speaking"
@@ -408,6 +579,7 @@ class BaseOutputTransport(FrameProcessor):
                 self._bot_speaking = True
         async def _bot_stopped_speaking(self):
+            """Handle bot stopped speaking event."""
             if self._bot_speaking:
                 self._transport.logger.debug(
                     f"Bot{f' [{self._destination}]' if self._destination else ''} stopped speaking"
@@ -427,6 +599,11 @@ class BaseOutputTransport(FrameProcessor):
                 self._audio_buffer = bytearray()
         async def _handle_frame(self, frame: Frame):
+            """Handle various frame types with appropriate processing.
+            Args:
+                frame: The frame to handle.
+            """
             if isinstance(frame, OutputImageRawFrame):
                 await self._set_video_image(frame)
             elif isinstance(frame, SpriteFrame):
@@ -437,16 +614,20 @@ class BaseOutputTransport(FrameProcessor):
                 await self._transport.write_dtmf(frame)
         def _next_frame(self) -> AsyncGenerator[Frame, None]:
+            """Generate the next frame for audio processing.
+            Returns:
+                An async generator yielding frames for processing.
+            """
             async def without_mixer(vad_stop_secs: float) -> AsyncGenerator[Frame, None]:
                 while True:
                     try:
                         frame = await asyncio.wait_for(
                             self._audio_queue.get(), timeout=vad_stop_secs
                         )
-                        self._transport.reset_watchdog()
                         yield frame
                     except asyncio.TimeoutError:
-                        self._transport.reset_watchdog()
                         # Notify the bot stopped speaking upstream if necessary.
                         await self._bot_stopped_speaking()
@@ -456,13 +637,11 @@ class BaseOutputTransport(FrameProcessor):
                 while True:
                     try:
                         frame = self._audio_queue.get_nowait()
-                        self._transport.reset_watchdog()
                         if isinstance(frame, OutputAudioRawFrame):
                             frame.audio = await self._mixer.mix(frame.audio)
                         last_frame_time = time.time()
                         yield frame
                     except asyncio.QueueEmpty:
-                        self._transport.reset_watchdog()
                         # Notify the bot stopped speaking upstream if necessary.
                         diff_time = time.time() - last_frame_time
                         if diff_time > vad_stop_secs:
@@ -474,6 +653,11 @@ class BaseOutputTransport(FrameProcessor):
                             num_channels=self._params.audio_out_channels,
                         )
                         yield frame
+                        # Allow other asyncio tasks to execute by adding a small sleep
+                        # Without this sleep, in task cancellation scenarios, this loop would
+                        # continuously return without any delay, leading to 100% CPU utilization
+                        # and preventing cancel/stop signals from being processed properly
+                        await asyncio.sleep(0)
             if self._mixer:
                 return with_mixer(BOT_VAD_STOP_SECS)
@@ -481,16 +665,31 @@ class BaseOutputTransport(FrameProcessor):
                 return without_mixer(BOT_VAD_STOP_SECS)
         async def _audio_task_handler(self):
+            """Main audio processing task handler."""
             # Push a BotSpeakingFrame every 200ms, we don't really need to push it
             # at every audio chunk. If the audio chunk is bigger than 200ms, push at
             # every audio chunk.
             TOTAL_CHUNK_MS = self._params.audio_out_10ms_chunks * 10
             BOT_SPEAKING_CHUNK_PERIOD = max(int(200 / TOTAL_CHUNK_MS), 1)
             bot_speaking_counter = 0
+            speech_last_speaking_time = 0
             async for frame in self._next_frame():
                 # Notify the bot started speaking upstream if necessary and that
                 # it's actually speaking.
+                is_speaking = False
                 if isinstance(frame, TTSAudioRawFrame):
+                    is_speaking = True
+                elif isinstance(frame, SpeechOutputAudioRawFrame):
+                    if not is_silence(frame.audio):
+                        is_speaking = True
+                        speech_last_speaking_time = time.time()
+                    else:
+                        silence_duration = time.time() - speech_last_speaking_time
+                        if silence_duration > BOT_VAD_STOP_SECS:
+                            await self._bot_stopped_speaking()
+                if is_speaking:
                     await self._bot_started_speaking()
                     if bot_speaking_counter % BOT_SPEAKING_CHUNK_PERIOD == 0:
                         await self._transport.push_frame(BotSpeakingFrame())
@@ -519,23 +718,36 @@ class BaseOutputTransport(FrameProcessor):
         #
         def _create_video_task(self):
+            """Create the video processing task if video output is enabled."""
             if not self._video_task and self._params.video_out_enabled:
                 self._video_queue = asyncio.Queue()
                 self._video_task = self._transport.create_task(self._video_task_handler())
         async def _cancel_video_task(self):
+            """Cancel and cleanup the video processing task."""
             # Stop video output task.
             if self._video_task:
                 await self._transport.cancel_task(self._video_task)
                 self._video_task = None
         async def _set_video_image(self, image: OutputImageRawFrame):
+            """Set a single video image for cycling output.
+            Args:
+                image: The image frame to cycle for video output.
+            """
             self._video_images = itertools.cycle([image])
         async def _set_video_images(self, images: List[OutputImageRawFrame]):
+            """Set multiple video images for cycling output.
+            Args:
+                images: The list of image frames to cycle for video output.
+            """
             self._video_images = itertools.cycle(images)
         async def _video_task_handler(self):
+            """Main video processing task handler."""
             self._video_start_time = None
             self._video_frame_index = 0
             self._video_frame_duration = 1 / self._params.video_out_framerate
@@ -551,6 +763,7 @@ class BaseOutputTransport(FrameProcessor):
                     await asyncio.sleep(self._video_frame_duration)
         async def _video_is_live_handler(self):
+            """Handle live video streaming with frame timing."""
             image = await self._video_queue.get()
             # We get the start time as soon as we get the first image.
@@ -576,6 +789,12 @@ class BaseOutputTransport(FrameProcessor):
             self._video_queue.task_done()
         async def _draw_image(self, frame: OutputImageRawFrame):
+            """Draw/render an image frame with resizing if needed.
+            Args:
+                frame: The image frame to draw.
+            """
             def resize_frame(frame: OutputImageRawFrame) -> OutputImageRawFrame:
                 desired_size = (self._params.video_out_width, self._params.video_out_height)
@@ -602,16 +821,19 @@ class BaseOutputTransport(FrameProcessor):
         #
         def _create_clock_task(self):
+            """Create the clock/timing processing task."""
             if not self._clock_task:
-                self._clock_queue = WatchdogPriorityQueue(self._transport.task_manager)
+                self._clock_queue = asyncio.PriorityQueue()
                 self._clock_task = self._transport.create_task(self._clock_task_handler())
         async def _cancel_clock_task(self):
+            """Cancel and cleanup the clock processing task."""
             if self._clock_task:
                 await self._transport.cancel_task(self._clock_task)
                 self._clock_task = None
         async def _clock_task_handler(self):
+            """Main clock/timing task handler for timed frame delivery."""
             running = True
             while running:
                 timestamp, _, frame = await self._clock_queue.get()

dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl