PyPI - dv-pipecat-ai - Versions diffs - 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show

{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
pipecat/__init__.py +17 -0
pipecat/adapters/base_llm_adapter.py +36 -1
pipecat/adapters/schemas/direct_function.py +296 -0
pipecat/adapters/schemas/function_schema.py +15 -6
pipecat/adapters/schemas/tools_schema.py +55 -7
pipecat/adapters/services/anthropic_adapter.py +22 -3
pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
pipecat/adapters/services/bedrock_adapter.py +22 -3
pipecat/adapters/services/gemini_adapter.py +16 -3
pipecat/adapters/services/open_ai_adapter.py +17 -2
pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
pipecat/audio/filters/base_audio_filter.py +30 -6
pipecat/audio/filters/koala_filter.py +37 -2
pipecat/audio/filters/krisp_filter.py +59 -6
pipecat/audio/filters/noisereduce_filter.py +37 -0
pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
pipecat/audio/mixers/base_audio_mixer.py +30 -7
pipecat/audio/mixers/soundfile_mixer.py +53 -6
pipecat/audio/resamplers/base_audio_resampler.py +17 -9
pipecat/audio/resamplers/resampy_resampler.py +26 -1
pipecat/audio/resamplers/soxr_resampler.py +32 -1
pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
pipecat/audio/utils.py +194 -1
pipecat/audio/vad/silero.py +60 -3
pipecat/audio/vad/vad_analyzer.py +114 -30
pipecat/clocks/base_clock.py +19 -0
pipecat/clocks/system_clock.py +25 -0
pipecat/extensions/voicemail/__init__.py +0 -0
pipecat/extensions/voicemail/voicemail_detector.py +707 -0
pipecat/frames/frames.py +590 -156
pipecat/metrics/metrics.py +64 -1
pipecat/observers/base_observer.py +58 -19
pipecat/observers/loggers/debug_log_observer.py +56 -64
pipecat/observers/loggers/llm_log_observer.py +8 -1
pipecat/observers/loggers/transcription_log_observer.py +19 -7
pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
pipecat/observers/turn_tracking_observer.py +26 -1
pipecat/pipeline/base_pipeline.py +5 -7
pipecat/pipeline/base_task.py +52 -9
pipecat/pipeline/parallel_pipeline.py +121 -177
pipecat/pipeline/pipeline.py +129 -20
pipecat/pipeline/runner.py +50 -1
pipecat/pipeline/sync_parallel_pipeline.py +132 -32
pipecat/pipeline/task.py +263 -280
pipecat/pipeline/task_observer.py +85 -34
pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
pipecat/processors/aggregators/gated.py +25 -24
pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
pipecat/processors/aggregators/llm_response.py +398 -89
pipecat/processors/aggregators/openai_llm_context.py +161 -13
pipecat/processors/aggregators/sentence.py +25 -14
pipecat/processors/aggregators/user_response.py +28 -3
pipecat/processors/aggregators/vision_image_frame.py +24 -14
pipecat/processors/async_generator.py +28 -0
pipecat/processors/audio/audio_buffer_processor.py +78 -37
pipecat/processors/consumer_processor.py +25 -6
pipecat/processors/filters/frame_filter.py +23 -0
pipecat/processors/filters/function_filter.py +30 -0
pipecat/processors/filters/identity_filter.py +17 -2
pipecat/processors/filters/null_filter.py +24 -1
pipecat/processors/filters/stt_mute_filter.py +56 -21
pipecat/processors/filters/wake_check_filter.py +46 -3
pipecat/processors/filters/wake_notifier_filter.py +21 -3
pipecat/processors/frame_processor.py +488 -131
pipecat/processors/frameworks/langchain.py +38 -3
pipecat/processors/frameworks/rtvi.py +719 -34
pipecat/processors/gstreamer/pipeline_source.py +41 -0
pipecat/processors/idle_frame_processor.py +26 -3
pipecat/processors/logger.py +23 -0
pipecat/processors/metrics/frame_processor_metrics.py +77 -4
pipecat/processors/metrics/sentry.py +42 -4
pipecat/processors/producer_processor.py +34 -14
pipecat/processors/text_transformer.py +22 -10
pipecat/processors/transcript_processor.py +48 -29
pipecat/processors/user_idle_processor.py +31 -21
pipecat/runner/__init__.py +1 -0
pipecat/runner/daily.py +132 -0
pipecat/runner/livekit.py +148 -0
pipecat/runner/run.py +543 -0
pipecat/runner/types.py +67 -0
pipecat/runner/utils.py +515 -0
pipecat/serializers/base_serializer.py +42 -0
pipecat/serializers/exotel.py +17 -6
pipecat/serializers/genesys.py +95 -0
pipecat/serializers/livekit.py +33 -0
pipecat/serializers/plivo.py +16 -15
pipecat/serializers/protobuf.py +37 -1
pipecat/serializers/telnyx.py +18 -17
pipecat/serializers/twilio.py +32 -16
pipecat/services/ai_service.py +5 -3
pipecat/services/anthropic/llm.py +113 -43
pipecat/services/assemblyai/models.py +63 -5
pipecat/services/assemblyai/stt.py +64 -11
pipecat/services/asyncai/__init__.py +0 -0
pipecat/services/asyncai/tts.py +501 -0
pipecat/services/aws/llm.py +185 -111
pipecat/services/aws/stt.py +217 -23
pipecat/services/aws/tts.py +118 -52
pipecat/services/aws/utils.py +101 -5
pipecat/services/aws_nova_sonic/aws.py +82 -64
pipecat/services/aws_nova_sonic/context.py +15 -6
pipecat/services/azure/common.py +10 -2
pipecat/services/azure/image.py +32 -0
pipecat/services/azure/llm.py +9 -7
pipecat/services/azure/stt.py +65 -2
pipecat/services/azure/tts.py +154 -23
pipecat/services/cartesia/stt.py +125 -8
pipecat/services/cartesia/tts.py +102 -38
pipecat/services/cerebras/llm.py +15 -23
pipecat/services/deepgram/stt.py +19 -11
pipecat/services/deepgram/tts.py +36 -0
pipecat/services/deepseek/llm.py +14 -23
pipecat/services/elevenlabs/tts.py +330 -64
pipecat/services/fal/image.py +43 -0
pipecat/services/fal/stt.py +48 -10
pipecat/services/fireworks/llm.py +14 -21
pipecat/services/fish/tts.py +109 -9
pipecat/services/gemini_multimodal_live/__init__.py +1 -0
pipecat/services/gemini_multimodal_live/events.py +83 -2
pipecat/services/gemini_multimodal_live/file_api.py +189 -0
pipecat/services/gemini_multimodal_live/gemini.py +218 -21
pipecat/services/gladia/config.py +17 -10
pipecat/services/gladia/stt.py +82 -36
pipecat/services/google/frames.py +40 -0
pipecat/services/google/google.py +2 -0
pipecat/services/google/image.py +39 -2
pipecat/services/google/llm.py +176 -58
pipecat/services/google/llm_openai.py +26 -4
pipecat/services/google/llm_vertex.py +37 -15
pipecat/services/google/rtvi.py +41 -0
pipecat/services/google/stt.py +65 -17
pipecat/services/google/test-google-chirp.py +45 -0
pipecat/services/google/tts.py +390 -19
pipecat/services/grok/llm.py +8 -6
pipecat/services/groq/llm.py +8 -6
pipecat/services/groq/stt.py +13 -9
pipecat/services/groq/tts.py +40 -0
pipecat/services/hamsa/__init__.py +9 -0
pipecat/services/hamsa/stt.py +241 -0
pipecat/services/heygen/__init__.py +5 -0
pipecat/services/heygen/api.py +281 -0
pipecat/services/heygen/client.py +620 -0
pipecat/services/heygen/video.py +338 -0
pipecat/services/image_service.py +5 -3
pipecat/services/inworld/__init__.py +1 -0
pipecat/services/inworld/tts.py +592 -0
pipecat/services/llm_service.py +127 -45
pipecat/services/lmnt/tts.py +80 -7
pipecat/services/mcp_service.py +85 -44
pipecat/services/mem0/memory.py +42 -13
pipecat/services/minimax/tts.py +74 -15
pipecat/services/mistral/__init__.py +0 -0
pipecat/services/mistral/llm.py +185 -0
pipecat/services/moondream/vision.py +55 -10
pipecat/services/neuphonic/tts.py +275 -48
pipecat/services/nim/llm.py +8 -6
pipecat/services/ollama/llm.py +27 -7
pipecat/services/openai/base_llm.py +54 -16
pipecat/services/openai/image.py +30 -0
pipecat/services/openai/llm.py +7 -5
pipecat/services/openai/stt.py +13 -9
pipecat/services/openai/tts.py +42 -10
pipecat/services/openai_realtime_beta/azure.py +11 -9
pipecat/services/openai_realtime_beta/context.py +7 -5
pipecat/services/openai_realtime_beta/events.py +10 -7
pipecat/services/openai_realtime_beta/openai.py +37 -18
pipecat/services/openpipe/llm.py +30 -24
pipecat/services/openrouter/llm.py +9 -7
pipecat/services/perplexity/llm.py +15 -19
pipecat/services/piper/tts.py +26 -12
pipecat/services/playht/tts.py +227 -65
pipecat/services/qwen/llm.py +8 -6
pipecat/services/rime/tts.py +128 -17
pipecat/services/riva/stt.py +160 -22
pipecat/services/riva/tts.py +67 -2
pipecat/services/sambanova/llm.py +19 -17
pipecat/services/sambanova/stt.py +14 -8
pipecat/services/sarvam/tts.py +60 -13
pipecat/services/simli/video.py +82 -21
pipecat/services/soniox/__init__.py +0 -0
pipecat/services/soniox/stt.py +398 -0
pipecat/services/speechmatics/stt.py +29 -17
pipecat/services/stt_service.py +47 -11
pipecat/services/tavus/video.py +94 -25
pipecat/services/together/llm.py +8 -6
pipecat/services/tts_service.py +77 -53
pipecat/services/ultravox/stt.py +46 -43
pipecat/services/vision_service.py +5 -3
pipecat/services/websocket_service.py +12 -11
pipecat/services/whisper/base_stt.py +58 -12
pipecat/services/whisper/stt.py +69 -58
pipecat/services/xtts/tts.py +59 -2
pipecat/sync/base_notifier.py +19 -0
pipecat/sync/event_notifier.py +24 -0
pipecat/tests/utils.py +73 -5
pipecat/transcriptions/language.py +24 -0
pipecat/transports/base_input.py +112 -8
pipecat/transports/base_output.py +235 -13
pipecat/transports/base_transport.py +119 -0
pipecat/transports/local/audio.py +76 -0
pipecat/transports/local/tk.py +84 -0
pipecat/transports/network/fastapi_websocket.py +174 -15
pipecat/transports/network/small_webrtc.py +383 -39
pipecat/transports/network/webrtc_connection.py +214 -8
pipecat/transports/network/websocket_client.py +171 -1
pipecat/transports/network/websocket_server.py +147 -9
pipecat/transports/services/daily.py +792 -70
pipecat/transports/services/helpers/daily_rest.py +122 -129
pipecat/transports/services/livekit.py +339 -4
pipecat/transports/services/tavus.py +273 -38
pipecat/utils/asyncio/task_manager.py +92 -186
pipecat/utils/base_object.py +83 -1
pipecat/utils/network.py +2 -0
pipecat/utils/string.py +114 -58
pipecat/utils/text/base_text_aggregator.py +44 -13
pipecat/utils/text/base_text_filter.py +46 -0
pipecat/utils/text/markdown_text_filter.py +70 -14
pipecat/utils/text/pattern_pair_aggregator.py +18 -14
pipecat/utils/text/simple_text_aggregator.py +43 -2
pipecat/utils/text/skip_tags_aggregator.py +21 -13
pipecat/utils/time.py +36 -0
pipecat/utils/tracing/class_decorators.py +32 -7
pipecat/utils/tracing/conversation_context_provider.py +12 -2
pipecat/utils/tracing/service_attributes.py +80 -64
pipecat/utils/tracing/service_decorators.py +48 -21
pipecat/utils/tracing/setup.py +13 -7
pipecat/utils/tracing/turn_context_provider.py +12 -2
pipecat/utils/tracing/turn_trace_observer.py +27 -0
pipecat/utils/utils.py +14 -14
dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
pipecat/examples/daily_runner.py +0 -64
pipecat/examples/run.py +0 -265
pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
pipecat/utils/asyncio/watchdog_event.py +0 -42
pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
pipecat/utils/asyncio/watchdog_queue.py +0 -48
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
/pipecat/{examples → extensions}/__init__.py +0 -0

pipecat/services/xtts/tts.py CHANGED Viewed

@@ -4,12 +4,18 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""XTTS text-to-speech service implementation.
+This module provides integration with Coqui XTTS streaming server for
+text-to-speech synthesis using local Docker deployment.
+"""
 from typing import Any, AsyncGenerator, Dict, Optional
 import aiohttp
 from loguru import logger
-from pipecat.audio.utils import create_default_resampler
+from pipecat.audio.utils import create_stream_resampler
 from pipecat.frames.frames import (
     ErrorFrame,
     Frame,
@@ -31,6 +37,14 @@ from pipecat.utils.tracing.service_decorators import traced_tts
 def language_to_xtts_language(language: Language) -> Optional[str]:
+    """Convert a Language enum to XTTS language code.
+    Args:
+        language: The Language enum value to convert.
+    Returns:
+        The corresponding XTTS language code, or None if not supported.
+    """
     BASE_LANGUAGES = {
         Language.CS: "cs",
         Language.DE: "de",
@@ -70,6 +84,13 @@ def language_to_xtts_language(language: Language) -> Optional[str]:
 class XTTSService(TTSService):
+    """Coqui XTTS text-to-speech service.
+    Provides text-to-speech synthesis using a locally running Coqui XTTS
+    streaming server. Supports multiple languages and voice cloning through
+    studio speakers configuration.
+    """
     def __init__(
         self,
         *,
@@ -80,6 +101,16 @@ class XTTSService(TTSService):
         sample_rate: Optional[int] = None,
         **kwargs,
     ):
+        """Initialize the XTTS service.
+        Args:
+            voice_id: ID of the voice/speaker to use for synthesis.
+            base_url: Base URL of the XTTS streaming server.
+            aiohttp_session: HTTP session for making requests to the server.
+            language: Language for synthesis. Defaults to English.
+            sample_rate: Audio sample rate. If None, uses default.
+            **kwargs: Additional arguments passed to parent TTSService.
+        """
         super().__init__(sample_rate=sample_rate, **kwargs)
         self._settings = {
@@ -90,15 +121,33 @@ class XTTSService(TTSService):
         self._studio_speakers: Optional[Dict[str, Any]] = None
         self._aiohttp_session = aiohttp_session
-        self._resampler = create_default_resampler()
+        self._resampler = create_stream_resampler()
     def can_generate_metrics(self) -> bool:
+        """Check if this service can generate processing metrics.
+        Returns:
+            True, as XTTS service supports metrics generation.
+        """
         return True
     def language_to_service_language(self, language: Language) -> Optional[str]:
+        """Convert a Language enum to XTTS service language format.
+        Args:
+            language: The language to convert.
+        Returns:
+            The XTTS-specific language code, or None if not supported.
+        """
         return language_to_xtts_language(language)
     async def start(self, frame: StartFrame):
+        """Start the XTTS service and load studio speakers.
+        Args:
+            frame: The start frame containing initialization parameters.
+        """
         await super().start(frame)
         if self._studio_speakers:
@@ -120,6 +169,14 @@ class XTTSService(TTSService):
     @traced_tts
     async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
+        """Generate speech from text using XTTS streaming server.
+        Args:
+            text: The text to synthesize into speech.
+        Yields:
+            Frame: Audio frames containing the synthesized speech.
+        """
         logger.debug(f"{self}: Generating TTS [{text}]")
         if not self._studio_speakers:

pipecat/sync/base_notifier.py CHANGED Viewed

@@ -4,14 +4,33 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""Base notifier interface for Pipecat."""
 from abc import ABC, abstractmethod
 class BaseNotifier(ABC):
+    """Abstract base class for notification mechanisms.
+    Provides a standard interface for implementing notification and waiting
+    patterns used for event coordination and signaling between components
+    in the Pipecat framework.
+    """
     @abstractmethod
     async def notify(self):
+        """Send a notification signal.
+        Implementations should trigger any waiting coroutines or processes
+        that are blocked on this notifier.
+        """
         pass
     @abstractmethod
     async def wait(self):
+        """Wait for a notification signal.
+        Implementations should block until a notification is received
+        from the corresponding notify() call.
+        """
         pass

pipecat/sync/event_notifier.py CHANGED Viewed

@@ -4,18 +4,42 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""Event-based notifier implementation using asyncio Event primitives."""
 import asyncio
 from pipecat.sync.base_notifier import BaseNotifier
 class EventNotifier(BaseNotifier):
+    """Event-based notifier using asyncio.Event for task synchronization.
+    Provides a simple notification mechanism where one task can signal
+    an event and other tasks can wait for that event to occur. The event
+    is automatically cleared after each wait operation.
+    """
     def __init__(self):
+        """Initialize the event notifier.
+        Creates an internal asyncio.Event for managing notifications.
+        """
         self._event = asyncio.Event()
     async def notify(self):
+        """Signal the event to notify waiting tasks.
+        Sets the internal event, causing any tasks waiting on this
+        notifier to be awakened.
+        """
         self._event.set()
     async def wait(self):
+        """Wait for the event to be signaled.
+        Blocks until another task calls notify(). Automatically clears
+        the event after being awakened so subsequent calls will wait
+        for the next notification.
+        """
         await self._event.wait()
         self._event.clear()

pipecat/tests/utils.py CHANGED Viewed

@@ -4,6 +4,8 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""Testing utilities for Pipecat pipeline components."""
 import asyncio
 from dataclasses import dataclass
 from typing import Any, Awaitable, Callable, Dict, List, Optional, Sequence, Tuple
@@ -24,15 +26,27 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
 @dataclass
 class SleepFrame(SystemFrame):
-    """This frame is used by test framework to introduce some sleep time before
-    the next frame is pushed. This is useful to control system frames vs data or
-    control frames.
+    """A system frame that introduces a sleep delay in the test pipeline.
+    This frame is used by the test framework to control timing between
+    frame processing, allowing tests to separate system frames from
+    data or control frames.
+    Parameters:
+        sleep: Duration to sleep in seconds before processing the next frame.
     """
-    sleep: float = 0.1
+    sleep: float = 0.2
 class HeartbeatsObserver(BaseObserver):
+    """Observer that monitors heartbeat frames from a specific processor.
+    This observer watches for HeartbeatFrames from a target processor and
+    invokes a callback when they are detected, useful for testing timing
+    and lifecycle events.
+    """
     def __init__(
         self,
         *,
@@ -40,11 +54,23 @@ class HeartbeatsObserver(BaseObserver):
         heartbeat_callback: Callable[[FrameProcessor, HeartbeatFrame], Awaitable[None]],
         **kwargs,
     ):
+        """Initialize the heartbeats observer.
+        Args:
+            target: The frame processor to monitor for heartbeat frames.
+            heartbeat_callback: Async callback function to invoke when heartbeats are detected.
+            **kwargs: Additional arguments passed to the parent observer.
+        """
         super().__init__(**kwargs)
         self._target = target
         self._callback = heartbeat_callback
     async def on_push_frame(self, data: FramePushed):
+        """Handle frame push events and detect heartbeats from target processor.
+        Args:
+            data: The frame push event data containing source and frame information.
+        """
         src = data.source
         frame = data.frame
@@ -53,6 +79,13 @@ class HeartbeatsObserver(BaseObserver):
 class QueuedFrameProcessor(FrameProcessor):
+    """A processor that captures frames in a queue for testing purposes.
+    This processor intercepts frames flowing in a specific direction and
+    stores them in a queue for later inspection during testing, while
+    still allowing the frames to continue through the pipeline.
+    """
     def __init__(
         self,
         *,
@@ -60,12 +93,25 @@ class QueuedFrameProcessor(FrameProcessor):
         queue_direction: FrameDirection,
         ignore_start: bool = True,
     ):
-        super().__init__()
+        """Initialize the queued frame processor.
+        Args:
+            queue: The asyncio queue to store captured frames.
+            queue_direction: The direction of frames to capture (UPSTREAM or DOWNSTREAM).
+            ignore_start: Whether to ignore StartFrames when capturing.
+        """
+        super().__init__(enable_direct_mode=True)
         self._queue = queue
         self._queue_direction = queue_direction
         self._ignore_start = ignore_start
     async def process_frame(self, frame: Frame, direction: FrameDirection):
+        """Process frames and capture them in the queue if they match the direction.
+        Args:
+            frame: The frame to process.
+            direction: The direction the frame is flowing.
+        """
         await super().process_frame(frame, direction)
         if direction == self._queue_direction:
@@ -85,6 +131,28 @@ async def run_test(
     start_metadata: Optional[Dict[str, Any]] = None,
     send_end_frame: bool = True,
 ) -> Tuple[Sequence[Frame], Sequence[Frame]]:
+    """Run a test pipeline with the specified processor and validate frame flow.
+    This function creates a test pipeline with the given processor, sends the
+    specified frames through it, and validates that the expected frames are
+    received in both upstream and downstream directions.
+    Args:
+        processor: The frame processor to test.
+        frames_to_send: Sequence of frames to send through the processor.
+        expected_down_frames: Expected frame types flowing downstream (optional).
+        expected_up_frames: Expected frame types flowing upstream (optional).
+        ignore_start: Whether to ignore StartFrames in frame validation.
+        observers: Optional list of observers to attach to the pipeline.
+        start_metadata: Optional metadata to include with the StartFrame.
+        send_end_frame: Whether to send an EndFrame at the end of the test.
+    Returns:
+        Tuple containing (downstream_frames, upstream_frames) that were received.
+    Raises:
+        AssertionError: If the received frames don't match the expected frame types.
+    """
     observers = observers or []
     start_metadata = start_metadata or {}

pipecat/transcriptions/language.py CHANGED Viewed

@@ -4,13 +4,23 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""Language code enumerations for Pipecat.
+This module provides comprehensive language code constants following ISO 639
+and BCP 47 standards, supporting both language-only and language-region
+combinations for various speech and text processing services.
+"""
 import sys
 from enum import Enum
 if sys.version_info < (3, 11):
     class StrEnum(str, Enum):
+        """String enumeration base class for Python < 3.11 compatibility."""
         def __new__(cls, value):
+            """Create a new instance of the StrEnum."""
             obj = str.__new__(cls, value)
             obj._value_ = value
             return obj
@@ -19,6 +29,14 @@ else:
 class Language(StrEnum):
+    """Language codes for speech and text processing services.
+    Provides comprehensive language code constants following ISO 639 and BCP 47
+    standards. Includes both language-only codes (e.g., 'en') and language-region
+    combinations (e.g., 'en-US') to support various speech synthesis, recognition,
+    and translation services.
+    """
     # Afrikaans
     AF = "af"
     AF_ZA = "af-ZA"
@@ -127,6 +145,9 @@ class Language(StrEnum):
     EN_US = "en-US"
     EN_ZA = "en-ZA"
+    # Esperanto
+    EO = "eo"
     # Spanish
     ES = "es"
     ES_AR = "es-AR"
@@ -456,6 +477,9 @@ class Language(StrEnum):
     # Tatar
     TT = "tt"
+    # Uyghur
+    UG = "ug"
     # Ukrainian
     UK = "uk"
     UK_UA = "uk-UA"

pipecat/transports/base_input.py CHANGED Viewed

@@ -4,6 +4,12 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""Base input transport implementation for Pipecat.
+This module provides the BaseInputTransport class which handles audio and video
+input processing, including VAD, turn analysis, and interruption management.
+"""
 import asyncio
 from concurrent.futures import ThreadPoolExecutor
 from typing import Optional
@@ -28,6 +34,7 @@ from pipecat.frames.frames import (
     InputAudioRawFrame,
     InputImageRawFrame,
     MetricsFrame,
+    SpeechControlParamsFrame,
     StartFrame,
     StartInterruptionFrame,
     StopFrame,
@@ -47,7 +54,20 @@ AUDIO_INPUT_TIMEOUT_SECS = 0.5
 class BaseInputTransport(FrameProcessor):
+    """Base class for input transport implementations.
+    Handles audio and video input processing including Voice Activity Detection,
+    turn analysis, audio filtering, and user interaction management. Supports
+    interruption handling and provides hooks for transport-specific implementations.
+    """
     def __init__(self, params: TransportParams, **kwargs):
+        """Initialize the base input transport.
+        Args:
+            params: Transport configuration parameters.
+            **kwargs: Additional arguments passed to parent class.
+        """
         super().__init__(**kwargs)
         self._params = params
@@ -115,25 +135,54 @@ class BaseInputTransport(FrameProcessor):
             self._params.video_out_color_format = self._params.camera_out_color_format
     def enable_audio_in_stream_on_start(self, enabled: bool) -> None:
+        """Enable or disable audio streaming on transport start.
+        Args:
+            enabled: Whether to start audio streaming immediately on transport start.
+        """
         self.logger.debug(f"Enabling audio on start. {enabled}")
         self._params.audio_in_stream_on_start = enabled
     async def start_audio_in_streaming(self):
+        """Start audio input streaming.
+        Override in subclasses to implement transport-specific audio streaming.
+        """
         pass
     @property
     def sample_rate(self) -> int:
+        """Get the current audio sample rate.
+        Returns:
+            The sample rate in Hz.
+        """
         return self._sample_rate
     @property
     def vad_analyzer(self) -> Optional[VADAnalyzer]:
+        """Get the Voice Activity Detection analyzer.
+        Returns:
+            The VAD analyzer instance if configured, None otherwise.
+        """
         return self._params.vad_analyzer
     @property
     def turn_analyzer(self) -> Optional[BaseTurnAnalyzer]:
+        """Get the turn-taking analyzer.
+        Returns:
+            The turn analyzer instance if configured, None otherwise.
+        """
         return self._params.turn_analyzer
     async def start(self, frame: StartFrame):
+        """Start the input transport and initialize components.
+        Args:
+            frame: The start frame containing initialization parameters.
+        """
         self._paused = False
         self._user_speaking = False
@@ -147,11 +196,23 @@ class BaseInputTransport(FrameProcessor):
         if self._params.turn_analyzer:
             self._params.turn_analyzer.set_sample_rate(self._sample_rate)
+        if self._params.vad_analyzer or self._params.turn_analyzer:
+            vad_params = self._params.vad_analyzer.params if self._params.vad_analyzer else None
+            turn_params = self._params.turn_analyzer.params if self._params.turn_analyzer else None
+            speech_frame = SpeechControlParamsFrame(vad_params=vad_params, turn_params=turn_params)
+            await self.push_frame(speech_frame)
         # Start audio filter.
         if self._params.audio_in_filter:
             await self._params.audio_in_filter.start(self._sample_rate)
     async def stop(self, frame: EndFrame):
+        """Stop the input transport and cleanup resources.
+        Args:
+            frame: The end frame signaling transport shutdown.
+        """
         # Cancel and wait for the audio input task to finish.
         await self._cancel_audio_task()
         # Stop audio filter.
@@ -159,6 +220,11 @@ class BaseInputTransport(FrameProcessor):
             await self._params.audio_in_filter.stop()
     async def pause(self, frame: StopFrame):
+        """Pause the input transport temporarily.
+        Args:
+            frame: The stop frame signaling transport pause.
+        """
         self._paused = True
         # Cancel task so we clear the queue
         await self._cancel_audio_task()
@@ -166,19 +232,38 @@ class BaseInputTransport(FrameProcessor):
         self._create_audio_task()
     async def cancel(self, frame: CancelFrame):
+        """Cancel the input transport and stop all processing.
+        Args:
+            frame: The cancel frame signaling immediate cancellation.
+        """
         # Cancel and wait for the audio input task to finish.
         await self._cancel_audio_task()
     async def set_transport_ready(self, frame: StartFrame):
-        """To be called when the transport is ready to stream."""
+        """Called when the transport is ready to stream.
+        Args:
+            frame: The start frame containing initialization parameters.
+        """
         # Create audio input queue and task if needed.
         self._create_audio_task()
     async def push_video_frame(self, frame: InputImageRawFrame):
+        """Push a video frame downstream if video input is enabled.
+        Args:
+            frame: The input video frame to process.
+        """
         if self._params.video_in_enabled and not self._paused:
             await self.push_frame(frame)
     async def push_audio_frame(self, frame: InputAudioRawFrame):
+        """Push an audio frame to the processing queue if audio input is enabled.
+        Args:
+            frame: The input audio frame to process.
+        """
         if self._params.audio_in_enabled and not self._paused:
             await self._audio_in_queue.put(frame)
@@ -187,6 +272,12 @@ class BaseInputTransport(FrameProcessor):
     #
     async def process_frame(self, frame: Frame, direction: FrameDirection):
+        """Process incoming frames and handle transport-specific logic.
+        Args:
+            frame: The frame to process.
+            direction: The direction of frame flow in the pipeline.
+        """
         await super().process_frame(frame, direction)
         # Specific system frames
@@ -216,6 +307,13 @@ class BaseInputTransport(FrameProcessor):
         elif isinstance(frame, VADParamsUpdateFrame):
             if self.vad_analyzer:
                 self.vad_analyzer.set_params(frame.params, bot_logger=self.logger)
+                speech_frame = SpeechControlParamsFrame(
+                    vad_params=frame.params,
+                    turn_params=self._params.turn_analyzer.params
+                    if self._params.turn_analyzer
+                    else None,
+                )
+                await self.push_frame(speech_frame)
         elif isinstance(frame, SystemFrame):
             await self.push_frame(frame, direction)
         # Control frames
@@ -238,12 +336,14 @@ class BaseInputTransport(FrameProcessor):
     #
     async def _handle_bot_interruption(self, frame: BotInterruptionFrame):
+        """Handle bot interruption frames."""
         self.logger.debug("Bot interruption")
         if self.interruptions_allowed:
             await self._start_interruption()
             await self.push_frame(StartInterruptionFrame())
     async def _handle_user_interruption(self, frame: Frame):
+        """Handle user interruption events based on speaking state."""
         if isinstance(frame, UserStartedSpeakingFrame):
             self.logger.debug("User started speaking")
             self._user_speaking = True
@@ -281,9 +381,11 @@ class BaseInputTransport(FrameProcessor):
     #
     async def _handle_bot_started_speaking(self, frame: BotStartedSpeakingFrame):
+        """Update bot speaking state when bot starts speaking."""
         self._bot_speaking = True
     async def _handle_bot_stopped_speaking(self, frame: BotStoppedSpeakingFrame):
+        """Update bot speaking state when bot stops speaking."""
         self._bot_speaking = False
     #
@@ -291,16 +393,19 @@ class BaseInputTransport(FrameProcessor):
     #
     def _create_audio_task(self):
+        """Create the audio processing task if audio input is enabled."""
         if not self._audio_task and self._params.audio_in_enabled:
             self._audio_in_queue = asyncio.Queue()
             self._audio_task = self.create_task(self._audio_task_handler())
     async def _cancel_audio_task(self):
+        """Cancel and cleanup the audio processing task."""
         if self._audio_task:
             await self.cancel_task(self._audio_task)
             self._audio_task = None
     async def _vad_analyze(self, audio_frame: InputAudioRawFrame) -> VADState:
+        """Analyze audio frame for voice activity."""
         state = VADState.QUIET
         if self.vad_analyzer:
             state = await self.get_event_loop().run_in_executor(
@@ -309,6 +414,7 @@ class BaseInputTransport(FrameProcessor):
         return state
     async def _handle_vad(self, audio_frame: InputAudioRawFrame, vad_state: VADState):
+        """Handle Voice Activity Detection results and generate appropriate frames."""
         new_vad_state = await self._vad_analyze(audio_frame)
         if (
             new_vad_state != vad_state
@@ -339,18 +445,21 @@ class BaseInputTransport(FrameProcessor):
         return vad_state
     async def _handle_end_of_turn(self):
+        """Handle end-of-turn analysis and generate prediction results."""
         if self.turn_analyzer:
             state, prediction = await self.turn_analyzer.analyze_end_of_turn()
             await self._handle_prediction_result(prediction)
             await self._handle_end_of_turn_complete(state)
     async def _handle_end_of_turn_complete(self, state: EndOfTurnState):
+        """Handle completion of end-of-turn analysis."""
         if state == EndOfTurnState.COMPLETE:
             await self._handle_user_interruption(UserStoppedSpeakingFrame())
     async def _run_turn_analyzer(
         self, frame: InputAudioRawFrame, vad_state: VADState, previous_vad_state: VADState
     ):
+        """Run turn analysis on audio frame and handle results."""
         is_speech = vad_state == VADState.SPEAKING or vad_state == VADState.STARTING
         # If silence exceeds threshold, we are going to receive EndOfTurnState.COMPLETE
         end_of_turn_state = self._params.turn_analyzer.append_audio(frame.audio, is_speech)
@@ -361,6 +470,7 @@ class BaseInputTransport(FrameProcessor):
             await self._handle_end_of_turn()
     async def _audio_task_handler(self):
+        """Main audio processing task handler for VAD and turn analysis."""
         vad_state: VADState = VADState.QUIET
         while True:
             try:
@@ -395,13 +505,7 @@ class BaseInputTransport(FrameProcessor):
                     if self._params.turn_analyzer:
                         self._params.turn_analyzer.clear()
                     await self._handle_user_interruption(UserStoppedSpeakingFrame())
-            finally:
-                self.reset_watchdog()
     async def _handle_prediction_result(self, result: MetricsData):
-        """Handle a prediction result event from the turn analyzer.
-        Args:
-            result: The prediction result MetricsData.
-        """
+        """Handle a prediction result event from the turn analyzer."""
         await self.push_frame(MetricsFrame(data=[result]))

dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl