PyPI - dv-pipecat-ai - Versions diffs - 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show

{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
pipecat/__init__.py +17 -0
pipecat/adapters/base_llm_adapter.py +36 -1
pipecat/adapters/schemas/direct_function.py +296 -0
pipecat/adapters/schemas/function_schema.py +15 -6
pipecat/adapters/schemas/tools_schema.py +55 -7
pipecat/adapters/services/anthropic_adapter.py +22 -3
pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
pipecat/adapters/services/bedrock_adapter.py +22 -3
pipecat/adapters/services/gemini_adapter.py +16 -3
pipecat/adapters/services/open_ai_adapter.py +17 -2
pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
pipecat/audio/filters/base_audio_filter.py +30 -6
pipecat/audio/filters/koala_filter.py +37 -2
pipecat/audio/filters/krisp_filter.py +59 -6
pipecat/audio/filters/noisereduce_filter.py +37 -0
pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
pipecat/audio/mixers/base_audio_mixer.py +30 -7
pipecat/audio/mixers/soundfile_mixer.py +53 -6
pipecat/audio/resamplers/base_audio_resampler.py +17 -9
pipecat/audio/resamplers/resampy_resampler.py +26 -1
pipecat/audio/resamplers/soxr_resampler.py +32 -1
pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
pipecat/audio/utils.py +194 -1
pipecat/audio/vad/silero.py +60 -3
pipecat/audio/vad/vad_analyzer.py +114 -30
pipecat/clocks/base_clock.py +19 -0
pipecat/clocks/system_clock.py +25 -0
pipecat/extensions/voicemail/__init__.py +0 -0
pipecat/extensions/voicemail/voicemail_detector.py +707 -0
pipecat/frames/frames.py +590 -156
pipecat/metrics/metrics.py +64 -1
pipecat/observers/base_observer.py +58 -19
pipecat/observers/loggers/debug_log_observer.py +56 -64
pipecat/observers/loggers/llm_log_observer.py +8 -1
pipecat/observers/loggers/transcription_log_observer.py +19 -7
pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
pipecat/observers/turn_tracking_observer.py +26 -1
pipecat/pipeline/base_pipeline.py +5 -7
pipecat/pipeline/base_task.py +52 -9
pipecat/pipeline/parallel_pipeline.py +121 -177
pipecat/pipeline/pipeline.py +129 -20
pipecat/pipeline/runner.py +50 -1
pipecat/pipeline/sync_parallel_pipeline.py +132 -32
pipecat/pipeline/task.py +263 -280
pipecat/pipeline/task_observer.py +85 -34
pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
pipecat/processors/aggregators/gated.py +25 -24
pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
pipecat/processors/aggregators/llm_response.py +398 -89
pipecat/processors/aggregators/openai_llm_context.py +161 -13
pipecat/processors/aggregators/sentence.py +25 -14
pipecat/processors/aggregators/user_response.py +28 -3
pipecat/processors/aggregators/vision_image_frame.py +24 -14
pipecat/processors/async_generator.py +28 -0
pipecat/processors/audio/audio_buffer_processor.py +78 -37
pipecat/processors/consumer_processor.py +25 -6
pipecat/processors/filters/frame_filter.py +23 -0
pipecat/processors/filters/function_filter.py +30 -0
pipecat/processors/filters/identity_filter.py +17 -2
pipecat/processors/filters/null_filter.py +24 -1
pipecat/processors/filters/stt_mute_filter.py +56 -21
pipecat/processors/filters/wake_check_filter.py +46 -3
pipecat/processors/filters/wake_notifier_filter.py +21 -3
pipecat/processors/frame_processor.py +488 -131
pipecat/processors/frameworks/langchain.py +38 -3
pipecat/processors/frameworks/rtvi.py +719 -34
pipecat/processors/gstreamer/pipeline_source.py +41 -0
pipecat/processors/idle_frame_processor.py +26 -3
pipecat/processors/logger.py +23 -0
pipecat/processors/metrics/frame_processor_metrics.py +77 -4
pipecat/processors/metrics/sentry.py +42 -4
pipecat/processors/producer_processor.py +34 -14
pipecat/processors/text_transformer.py +22 -10
pipecat/processors/transcript_processor.py +48 -29
pipecat/processors/user_idle_processor.py +31 -21
pipecat/runner/__init__.py +1 -0
pipecat/runner/daily.py +132 -0
pipecat/runner/livekit.py +148 -0
pipecat/runner/run.py +543 -0
pipecat/runner/types.py +67 -0
pipecat/runner/utils.py +515 -0
pipecat/serializers/base_serializer.py +42 -0
pipecat/serializers/exotel.py +17 -6
pipecat/serializers/genesys.py +95 -0
pipecat/serializers/livekit.py +33 -0
pipecat/serializers/plivo.py +16 -15
pipecat/serializers/protobuf.py +37 -1
pipecat/serializers/telnyx.py +18 -17
pipecat/serializers/twilio.py +32 -16
pipecat/services/ai_service.py +5 -3
pipecat/services/anthropic/llm.py +113 -43
pipecat/services/assemblyai/models.py +63 -5
pipecat/services/assemblyai/stt.py +64 -11
pipecat/services/asyncai/__init__.py +0 -0
pipecat/services/asyncai/tts.py +501 -0
pipecat/services/aws/llm.py +185 -111
pipecat/services/aws/stt.py +217 -23
pipecat/services/aws/tts.py +118 -52
pipecat/services/aws/utils.py +101 -5
pipecat/services/aws_nova_sonic/aws.py +82 -64
pipecat/services/aws_nova_sonic/context.py +15 -6
pipecat/services/azure/common.py +10 -2
pipecat/services/azure/image.py +32 -0
pipecat/services/azure/llm.py +9 -7
pipecat/services/azure/stt.py +65 -2
pipecat/services/azure/tts.py +154 -23
pipecat/services/cartesia/stt.py +125 -8
pipecat/services/cartesia/tts.py +102 -38
pipecat/services/cerebras/llm.py +15 -23
pipecat/services/deepgram/stt.py +19 -11
pipecat/services/deepgram/tts.py +36 -0
pipecat/services/deepseek/llm.py +14 -23
pipecat/services/elevenlabs/tts.py +330 -64
pipecat/services/fal/image.py +43 -0
pipecat/services/fal/stt.py +48 -10
pipecat/services/fireworks/llm.py +14 -21
pipecat/services/fish/tts.py +109 -9
pipecat/services/gemini_multimodal_live/__init__.py +1 -0
pipecat/services/gemini_multimodal_live/events.py +83 -2
pipecat/services/gemini_multimodal_live/file_api.py +189 -0
pipecat/services/gemini_multimodal_live/gemini.py +218 -21
pipecat/services/gladia/config.py +17 -10
pipecat/services/gladia/stt.py +82 -36
pipecat/services/google/frames.py +40 -0
pipecat/services/google/google.py +2 -0
pipecat/services/google/image.py +39 -2
pipecat/services/google/llm.py +176 -58
pipecat/services/google/llm_openai.py +26 -4
pipecat/services/google/llm_vertex.py +37 -15
pipecat/services/google/rtvi.py +41 -0
pipecat/services/google/stt.py +65 -17
pipecat/services/google/test-google-chirp.py +45 -0
pipecat/services/google/tts.py +390 -19
pipecat/services/grok/llm.py +8 -6
pipecat/services/groq/llm.py +8 -6
pipecat/services/groq/stt.py +13 -9
pipecat/services/groq/tts.py +40 -0
pipecat/services/hamsa/__init__.py +9 -0
pipecat/services/hamsa/stt.py +241 -0
pipecat/services/heygen/__init__.py +5 -0
pipecat/services/heygen/api.py +281 -0
pipecat/services/heygen/client.py +620 -0
pipecat/services/heygen/video.py +338 -0
pipecat/services/image_service.py +5 -3
pipecat/services/inworld/__init__.py +1 -0
pipecat/services/inworld/tts.py +592 -0
pipecat/services/llm_service.py +127 -45
pipecat/services/lmnt/tts.py +80 -7
pipecat/services/mcp_service.py +85 -44
pipecat/services/mem0/memory.py +42 -13
pipecat/services/minimax/tts.py +74 -15
pipecat/services/mistral/__init__.py +0 -0
pipecat/services/mistral/llm.py +185 -0
pipecat/services/moondream/vision.py +55 -10
pipecat/services/neuphonic/tts.py +275 -48
pipecat/services/nim/llm.py +8 -6
pipecat/services/ollama/llm.py +27 -7
pipecat/services/openai/base_llm.py +54 -16
pipecat/services/openai/image.py +30 -0
pipecat/services/openai/llm.py +7 -5
pipecat/services/openai/stt.py +13 -9
pipecat/services/openai/tts.py +42 -10
pipecat/services/openai_realtime_beta/azure.py +11 -9
pipecat/services/openai_realtime_beta/context.py +7 -5
pipecat/services/openai_realtime_beta/events.py +10 -7
pipecat/services/openai_realtime_beta/openai.py +37 -18
pipecat/services/openpipe/llm.py +30 -24
pipecat/services/openrouter/llm.py +9 -7
pipecat/services/perplexity/llm.py +15 -19
pipecat/services/piper/tts.py +26 -12
pipecat/services/playht/tts.py +227 -65
pipecat/services/qwen/llm.py +8 -6
pipecat/services/rime/tts.py +128 -17
pipecat/services/riva/stt.py +160 -22
pipecat/services/riva/tts.py +67 -2
pipecat/services/sambanova/llm.py +19 -17
pipecat/services/sambanova/stt.py +14 -8
pipecat/services/sarvam/tts.py +60 -13
pipecat/services/simli/video.py +82 -21
pipecat/services/soniox/__init__.py +0 -0
pipecat/services/soniox/stt.py +398 -0
pipecat/services/speechmatics/stt.py +29 -17
pipecat/services/stt_service.py +47 -11
pipecat/services/tavus/video.py +94 -25
pipecat/services/together/llm.py +8 -6
pipecat/services/tts_service.py +77 -53
pipecat/services/ultravox/stt.py +46 -43
pipecat/services/vision_service.py +5 -3
pipecat/services/websocket_service.py +12 -11
pipecat/services/whisper/base_stt.py +58 -12
pipecat/services/whisper/stt.py +69 -58
pipecat/services/xtts/tts.py +59 -2
pipecat/sync/base_notifier.py +19 -0
pipecat/sync/event_notifier.py +24 -0
pipecat/tests/utils.py +73 -5
pipecat/transcriptions/language.py +24 -0
pipecat/transports/base_input.py +112 -8
pipecat/transports/base_output.py +235 -13
pipecat/transports/base_transport.py +119 -0
pipecat/transports/local/audio.py +76 -0
pipecat/transports/local/tk.py +84 -0
pipecat/transports/network/fastapi_websocket.py +174 -15
pipecat/transports/network/small_webrtc.py +383 -39
pipecat/transports/network/webrtc_connection.py +214 -8
pipecat/transports/network/websocket_client.py +171 -1
pipecat/transports/network/websocket_server.py +147 -9
pipecat/transports/services/daily.py +792 -70
pipecat/transports/services/helpers/daily_rest.py +122 -129
pipecat/transports/services/livekit.py +339 -4
pipecat/transports/services/tavus.py +273 -38
pipecat/utils/asyncio/task_manager.py +92 -186
pipecat/utils/base_object.py +83 -1
pipecat/utils/network.py +2 -0
pipecat/utils/string.py +114 -58
pipecat/utils/text/base_text_aggregator.py +44 -13
pipecat/utils/text/base_text_filter.py +46 -0
pipecat/utils/text/markdown_text_filter.py +70 -14
pipecat/utils/text/pattern_pair_aggregator.py +18 -14
pipecat/utils/text/simple_text_aggregator.py +43 -2
pipecat/utils/text/skip_tags_aggregator.py +21 -13
pipecat/utils/time.py +36 -0
pipecat/utils/tracing/class_decorators.py +32 -7
pipecat/utils/tracing/conversation_context_provider.py +12 -2
pipecat/utils/tracing/service_attributes.py +80 -64
pipecat/utils/tracing/service_decorators.py +48 -21
pipecat/utils/tracing/setup.py +13 -7
pipecat/utils/tracing/turn_context_provider.py +12 -2
pipecat/utils/tracing/turn_trace_observer.py +27 -0
pipecat/utils/utils.py +14 -14
dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
pipecat/examples/daily_runner.py +0 -64
pipecat/examples/run.py +0 -265
pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
pipecat/utils/asyncio/watchdog_event.py +0 -42
pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
pipecat/utils/asyncio/watchdog_queue.py +0 -48
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
/pipecat/{examples → extensions}/__init__.py +0 -0

pipecat/processors/aggregators/llm_response.py CHANGED Viewed

@@ -4,8 +4,16 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""LLM response aggregators for handling conversation context and message aggregation.
+This module provides aggregators that process and accumulate LLM responses, user inputs,
+and conversation context. These aggregators handle the flow between speech-to-text,
+LLM processing, and text-to-speech components in conversational AI pipelines.
+"""
 import asyncio
 import time
+import warnings
 from abc import abstractmethod
 from dataclasses import dataclass
 from typing import Dict, List, Literal, Optional, Set
@@ -13,6 +21,8 @@ from typing import Dict, List, Literal, Optional, Set
 from loguru import logger
 from pipecat.audio.interruptions.base_interruption_strategy import BaseInterruptionStrategy
+from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
+from pipecat.audio.vad.vad_analyzer import VADParams
 from pipecat.frames.frames import (
     BotInterruptionFrame,
     BotStartedSpeakingFrame,
@@ -37,6 +47,7 @@ from pipecat.frames.frames import (
     LLMSetToolsFrame,
     LLMTextFrame,
     OpenAILLMContextAssistantTimestampFrame,
+    SpeechControlParamsFrame,
     StartFrame,
     StartInterruptionFrame,
     TextFrame,
@@ -55,30 +66,63 @@ from pipecat.utils.time import time_now_iso8601
 @dataclass
 class LLMUserAggregatorParams:
+    """Parameters for configuring LLM user aggregation behavior.
+    Parameters:
+        aggregation_timeout: Maximum time in seconds to wait for additional
+            transcription content before pushing aggregated result. This
+            timeout is used only when the transcription is slow to arrive.
+        turn_emulated_vad_timeout: Maximum time in seconds to wait for emulated
+            VAD when using turn-based analysis. Applied when transcription is
+            received but VAD didn't detect speech (e.g., whispered utterances).
+        enable_emulated_vad_interruptions: When True, allows emulated VAD events
+            to interrupt the bot when it's speaking. When False, emulated speech
+            is ignored while the bot is speaking.
+    """
     aggregation_timeout: float = 0.5
+    turn_emulated_vad_timeout: float = 0.8
+    enable_emulated_vad_interruptions: bool = False
 @dataclass
 class LLMAssistantAggregatorParams:
+    """Parameters for configuring LLM assistant aggregation behavior.
+    Parameters:
+        expect_stripped_words: Whether to expect and handle stripped words
+            in text frames by adding spaces between tokens.
+    """
     expect_stripped_words: bool = True
 class LLMFullResponseAggregator(FrameProcessor):
-    """This is an LLM aggregator that aggregates a full LLM completion. It
-    aggregates LLM text frames (tokens) received between
-    `LLMFullResponseStartFrame` and `LLMFullResponseEndFrame`. Every full
-    completion is returned via the "on_completion" event handler:
-       @aggregator.event_handler("on_completion")
-       async def on_completion(
-           aggregator: LLMFullResponseAggregator,
-           completion: str,
-           completed: bool,
-       )
+    """Aggregates complete LLM responses between start and end frames.
+    This aggregator collects LLM text frames (tokens) received between
+    `LLMFullResponseStartFrame` and `LLMFullResponseEndFrame` and provides
+    the complete response via an event handler.
+    The aggregator provides an "on_completion" event that fires when a full
+    completion is available::
+        @aggregator.event_handler("on_completion")
+        async def on_completion(
+            aggregator: LLMFullResponseAggregator,
+            completion: str,
+            completed: bool,
+        ):
+            # Handle the completion
+            pass
     """
     def __init__(self, **kwargs):
+        """Initialize the LLM full response aggregator.
+        Args:
+            **kwargs: Additional arguments passed to parent FrameProcessor.
+        """
         super().__init__(**kwargs)
         self._aggregation = ""
@@ -87,6 +131,12 @@ class LLMFullResponseAggregator(FrameProcessor):
         self._register_event_handler("on_completion")
     async def process_frame(self, frame: Frame, direction: FrameDirection):
+        """Process incoming frames and aggregate LLM text content.
+        Args:
+            frame: The frame to process.
+            direction: The direction of frame flow in the pipeline.
+        """
         await super().process_frame(frame, direction)
         if isinstance(frame, StartInterruptionFrame):
@@ -117,83 +167,123 @@ class LLMFullResponseAggregator(FrameProcessor):
 class BaseLLMResponseAggregator(FrameProcessor):
-    """This is the base class for all LLM response aggregators. These
-    aggregators process incoming frames and aggregate content until they are
-    ready to push the aggregation. In the case of a user, an aggregation might
-    be a full transcription received from the STT service.
+    """Base class for all LLM response aggregators.
-    The LLM response aggregators also keep a store (e.g. a message list or an
-    LLM context) of the current conversation, that is, it stores the messages
-    said by the user or by the bot.
+    These aggregators process incoming frames and aggregate content until they are
+    ready to push the aggregation downstream. They maintain conversation state
+    and handle message flow between different components in the pipeline.
+    The aggregators keep a store (e.g. message list or LLM context) of the current
+    conversation, storing messages from both users and the bot.
     """
     def __init__(self, **kwargs):
+        """Initialize the base LLM response aggregator.
+        Args:
+            **kwargs: Additional arguments passed to parent FrameProcessor.
+        """
         super().__init__(**kwargs)
     @property
     @abstractmethod
     def messages(self) -> List[dict]:
-        """Returns the messages from the current conversation."""
+        """Get the messages from the current conversation.
+        Returns:
+            List of message dictionaries representing the conversation history.
+        """
         pass
     @property
     @abstractmethod
     def role(self) -> str:
-        """Returns the role (e.g. user, assistant...) for this aggregator."""
+        """Get the role for this aggregator.
+        Returns:
+            The role string (e.g. "user", "assistant") for this aggregator.
+        """
         pass
     @abstractmethod
     def add_messages(self, messages):
-        """Add the given messages to the conversation."""
+        """Add the given messages to the conversation.
+        Args:
+            messages: Messages to append to the conversation history.
+        """
         pass
     @abstractmethod
     def set_messages(self, messages):
-        """Reset the conversation with the given messages."""
+        """Reset the conversation with the given messages.
+        Args:
+            messages: Messages to replace the current conversation history.
+        """
         pass
     @abstractmethod
     def set_tools(self, tools):
-        """Set LLM tools to be used in the current conversation."""
+        """Set LLM tools to be used in the current conversation.
+        Args:
+            tools: List of tool definitions for the LLM to use.
+        """
         pass
     @abstractmethod
     def set_tool_choice(self, tool_choice):
-        """Set the tool choice. This should modify the LLM context."""
+        """Set the tool choice for the LLM.
+        Args:
+            tool_choice: Tool choice configuration for the LLM context.
+        """
         pass
     @abstractmethod
     async def reset(self):
-        """Reset the internals of this aggregator. This should not modify the
-        internal messages.
+        """Reset the internal state of this aggregator.
+        This should clear aggregation state but not modify the conversation messages.
         """
         pass
     @abstractmethod
     async def handle_aggregation(self, aggregation: str):
-        """Adds the given aggregation to the aggregator. The aggregator can use
-        a simple list of message or a context. It doesn't not push any frames.
+        """Add the given aggregation to the conversation store.
+        Args:
+            aggregation: The aggregated text content to add to the conversation.
         """
         pass
     @abstractmethod
     async def push_aggregation(self):
-        """Pushes the current aggregation. For example, iN the case of context
-        aggregation this might push a new context frame.
+        """Push the current aggregation downstream.
+        The specific frame type pushed depends on the aggregator implementation
+        (e.g. context frame, messages frame).
         """
         pass
 class LLMContextResponseAggregator(BaseLLMResponseAggregator):
-    """This is a base LLM aggregator that uses an LLM context to store the
-    conversation. It pushes `OpenAILLMContextFrame` as an aggregation frame.
+    """Base LLM aggregator that uses an OpenAI LLM context for conversation storage.
+    This aggregator maintains conversation state using an OpenAILLMContext and
+    pushes OpenAILLMContextFrame objects as aggregation frames. It provides
+    common functionality for context-based conversation management.
     """
     def __init__(self, *, context: OpenAILLMContext, role: str, **kwargs):
+        """Initialize the context response aggregator.
+        Args:
+            context: The OpenAI LLM context to use for conversation storage.
+            role: The role this aggregator represents (e.g. "user", "assistant").
+            **kwargs: Additional arguments passed to parent class.
+        """
         super().__init__(**kwargs)
         self._context = context
         self._role = role
@@ -202,46 +292,99 @@ class LLMContextResponseAggregator(BaseLLMResponseAggregator):
     @property
     def messages(self) -> List[dict]:
+        """Get messages from the LLM context.
+        Returns:
+            List of message dictionaries from the context.
+        """
         return self._context.get_messages()
     @property
     def role(self) -> str:
+        """Get the role for this aggregator.
+        Returns:
+            The role string for this aggregator.
+        """
         return self._role
     @property
     def context(self):
+        """Get the OpenAI LLM context.
+        Returns:
+            The OpenAILLMContext instance used by this aggregator.
+        """
         return self._context
     def get_context_frame(self) -> OpenAILLMContextFrame:
+        """Create a context frame with the current context.
+        Returns:
+            OpenAILLMContextFrame containing the current context.
+        """
         return OpenAILLMContextFrame(context=self._context)
     async def push_context_frame(self, direction: FrameDirection = FrameDirection.DOWNSTREAM):
+        """Push a context frame in the specified direction.
+        Args:
+            direction: The direction to push the frame (upstream or downstream).
+        """
         frame = self.get_context_frame()
         await self.push_frame(frame, direction)
     def add_messages(self, messages):
+        """Add messages to the context.
+        Args:
+            messages: Messages to add to the conversation context.
+        """
         self._context.add_messages(messages)
     def set_messages(self, messages):
+        """Set the context messages.
+        Args:
+            messages: Messages to replace the current context messages.
+        """
         self._context.set_messages(messages)
     def set_tools(self, tools: List):
+        """Set tools in the context.
+        Args:
+            tools: List of tool definitions to set in the context.
+        """
         self._context.set_tools(tools)
     def set_tool_choice(self, tool_choice: Literal["none", "auto", "required"] | dict):
+        """Set tool choice in the context.
+        Args:
+            tool_choice: Tool choice configuration for the context.
+        """
         self._context.set_tool_choice(tool_choice)
     async def reset(self):
+        """Reset the aggregation state."""
         self._aggregation = ""
 class LLMUserContextAggregator(LLMContextResponseAggregator):
-    """This is a user LLM aggregator that uses an LLM context to store the
-    conversation. It aggregates transcriptions from the STT service and it has
-    logic to handle multiple scenarios where transcriptions are received between
-    VAD events (`UserStartedSpeakingFrame` and `UserStoppedSpeakingFrame`) or
-    even outside or no VAD events at all.
+    """User LLM aggregator that processes speech-to-text transcriptions.
+    This aggregator handles the complex logic of aggregating user speech transcriptions
+    from STT services. It manages multiple scenarios including:
+    - Transcriptions received between VAD events
+    - Transcriptions received outside VAD events
+    - Interim vs final transcriptions
+    - User interruptions during bot speech
+    - Emulated VAD for whispered or short utterances
+    The aggregator uses timeouts to handle cases where transcriptions arrive
+    after VAD events or when no VAD is available.
     """
     def __init__(
@@ -251,8 +394,18 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
         params: Optional[LLMUserAggregatorParams] = None,
         **kwargs,
     ):
+        """Initialize the user context aggregator.
+        Args:
+            context: The OpenAI LLM context for conversation storage.
+            params: Configuration parameters for aggregation behavior.
+            **kwargs: Additional arguments. Supports deprecated 'aggregation_timeout'.
+        """
         super().__init__(context=context, role="user", **kwargs)
         self._params = params or LLMUserAggregatorParams()
+        self._vad_params: Optional[VADParams] = None
+        self._turn_params: Optional[SmartTurnParams] = None
         if "aggregation_timeout" in kwargs:
             import warnings
@@ -280,6 +433,7 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
         self._last_aggregation_push_time = 0
     async def reset(self):
+        """Reset the aggregation state and interruption strategies."""
         await super().reset()
         self._was_bot_speaking = False
         self._seen_interim_results = False
@@ -287,12 +441,22 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
         [await s.reset() for s in self._interruption_strategies]
     async def handle_aggregation(self, aggregation: str):
+        """Add the aggregated user text to the context.
+        Args:
+            aggregation: The aggregated user text to add as a user message.
+        """
         self._context.add_message({"role": self.role, "content": aggregation})
     async def process_frame(self, frame: Frame, direction: FrameDirection):
+        """Process frames for user speech aggregation and context management.
+        Args:
+            frame: The frame to process.
+            direction: The direction of frame flow in the pipeline.
+        """
         if isinstance(frame, StartInterruptionFrame):
             self.logger.debug("Received StartInterruptionFrame")
         await super().process_frame(frame, direction)
         if isinstance(frame, StartFrame):
@@ -328,9 +492,9 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
         elif isinstance(frame, InterimTranscriptionFrame):
             await self._handle_interim_transcription(frame)
         elif isinstance(frame, LLMMessagesAppendFrame):
-            self.add_messages(frame.messages)
+            await self._handle_llm_messages_append(frame)
         elif isinstance(frame, LLMMessagesUpdateFrame):
-            self.set_messages(frame.messages)
+            await self._handle_llm_messages_update(frame)
         elif isinstance(frame, LLMSetToolsFrame):
             self.set_tools(frame.tools)
         elif isinstance(frame, LLMSetToolChoiceFrame):
@@ -338,6 +502,10 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
         elif isinstance(frame, LLMFullResponseStartFrame):
             self._last_llm_response_start_time = time.time()
             self._latest_final_transcript = ""
+        elif isinstance(frame, SpeechControlParamsFrame):
+            self._vad_params = frame.vad_params
+            self._turn_params = frame.turn_params
+            await self.push_frame(frame, direction)
         else:
             await self.push_frame(frame, direction)
@@ -353,7 +521,7 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
         self._last_aggregation_push_time = time.time()
     async def push_aggregation(self, trigger_interruption: bool = False):
-        """Pushes the current aggregation based on interruption strategies and conditions."""
+        """Push the current aggregation based on interruption strategies and conditions."""
         if len(self._aggregation) > 0:
             if self.interruption_strategies and self._bot_speaking:
                 should_interrupt = await self._should_interrupt_based_on_strategies()
@@ -373,7 +541,8 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
                     self.logger.debug(
                         "Triggering interruption - pushing BotInterruptionFrame and aggregation"
                     )
-                    await self.push_frame(BotInterruptionFrame(), FrameDirection.UPSTREAM)
+                    # await self.push_frame(BotInterruptionFrame(), FrameDirection.UPSTREAM)
+                    await self.push_frame(StartInterruptionFrame(), FrameDirection.DOWNSTREAM)
                     self.logger.debug("Pushed BotInterruptionFrame")
                 # No interruption config - normal behavior (always push aggregation)
                 await self._process_aggregation()
@@ -393,7 +562,11 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
             # await self.push_frame(OpenAILLMContextFrame(self._context))
     async def _should_interrupt_based_on_strategies(self) -> bool:
-        """Check if interruption should occur based on configured strategies."""
+        """Check if interruption should occur based on configured strategies.
+        Returns:
+            True if any interruption strategy indicates interruption should occur.
+        """
         async def should_interrupt(strategy: BaseInterruptionStrategy):
             await strategy.append_text(self._aggregation)
@@ -410,6 +583,16 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
     async def _cancel(self, frame: CancelFrame):
         await self._cancel_aggregation_task()
+    async def _handle_llm_messages_append(self, frame: LLMMessagesAppendFrame):
+        self.add_messages(frame.messages)
+        if frame.run_llm:
+            await self.push_context_frame()
+    async def _handle_llm_messages_update(self, frame: LLMMessagesUpdateFrame):
+        self.set_messages(frame.messages)
+        if frame.run_llm:
+            await self.push_context_frame()
     async def _handle_input_audio(self, frame: InputAudioRawFrame):
         for s in self.interruption_strategies:
             await s.append_audio(frame.audio, frame.sample_rate)
@@ -524,9 +707,40 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
     async def _aggregation_task_handler(self):
         while True:
             try:
-                await asyncio.wait_for(
-                    self._aggregation_event.wait(), self._params.aggregation_timeout
-                )
+                # The _aggregation_task_handler handles two distinct timeout scenarios:
+                #
+                # 1. When emulating_vad=True: Wait for emulated VAD timeout before
+                #    pushing aggregation (simulating VAD behavior when no actual VAD
+                #    detection occurred).
+                #
+                # 2. When emulating_vad=False: Use aggregation_timeout as a buffer
+                #    to wait for potential late-arriving transcription frames after
+                #    a real VAD event.
+                #
+                # For emulated VAD scenarios, the timeout strategy depends on whether
+                # a turn analyzer is configured:
+                #
+                # - WITH turn analyzer: Use turn_emulated_vad_timeout parameter because
+                #   the VAD's stop_secs is set very low (e.g. 0.2s) for rapid speech
+                #   chunking to feed the turn analyzer. This low value is too fast
+                #   for emulated VAD scenarios where we need to allow users time to
+                #   finish speaking (e.g. 0.8s).
+                #
+                # - WITHOUT turn analyzer: Use VAD's stop_secs directly to maintain
+                #   consistent user experience between real VAD detection and
+                #   emulated VAD scenarios.
+                if not self._emulating_vad:
+                    timeout = self._params.aggregation_timeout
+                elif self._turn_params:
+                    timeout = self._params.turn_emulated_vad_timeout
+                else:
+                    # Use VAD stop_secs when no turn analyzer is present, fallback if no VAD params
+                    timeout = (
+                        self._vad_params.stop_secs
+                        if self._vad_params
+                        else self._params.turn_emulated_vad_timeout
+                    )
+                await asyncio.wait_for(self._aggregation_event.wait(), timeout=timeout)
                 await self._maybe_emulate_user_speaking()
             except asyncio.TimeoutError:
                 if not self._user_speaking:
@@ -540,43 +754,47 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
                     )
                     self._emulating_vad = False
             finally:
-                self.reset_watchdog()
                 self._aggregation_event.clear()
     async def _maybe_emulate_user_speaking(self):
-        """Emulate user speaking if we got a transcription but it was not
-        detected by VAD. Only do that if the bot is not speaking.
+        """Maybe emulate user speaking based on transcription.
+        Emulate user speaking if we got a transcription but it was not
+        detected by VAD. Behavior when bot is speaking depends on the
+        enable_emulated_vad_interruptions parameter.
         """
-        if not self._user_speaking:
-            diff_time = time.time() - self._last_user_speaking_time
-            if diff_time < self._aggregation_timeout:
-                self.logger.debug("Pushing aggregation")
-                await self.push_aggregation()
-        # Commenting the original pipecat code.
         # Check if we received a transcription but VAD was not able to detect
         # voice (e.g. when you whisper a short utterance). In that case, we need
-        # to emulate VAD (i.e. user start/stopped speaking), but we do it only
-        # if the bot is not speaking. If the bot is speaking and we really have
-        # a short utterance we don't really want to interrupt the bot.
-        # if not self._user_speaking and not self._waiting_for_aggregation:
-        #     if self._bot_speaking:
-        #         # If we reached this case and the bot is speaking, let's ignore
-        #         # what the user said.
-        #         logger.debug("Ignoring user speaking emulation, bot is speaking.")
-        #         await self.reset()
-        #     else:
-        #         # The bot is not speaking so, let's trigger user speaking
-        #         # emulation.
-        #         await self.push_frame(EmulateUserStartedSpeakingFrame(), FrameDirection.UPSTREAM)
-        #         self._emulating_vad = True
+        # to emulate VAD (i.e. user start/stopped speaking).
+        if (
+            not self._user_speaking
+            and not self._waiting_for_aggregation
+            and len(self._aggregation) > 0
+        ):
+            if self._bot_speaking and not self._params.enable_emulated_vad_interruptions:
+                # If emulated VAD interruptions are disabled and bot is speaking, ignore
+                logger.debug("Ignoring user speaking emulation, bot is speaking.")
+                await self.reset()
+            else:
+                # Either bot is not speaking, or emulated VAD interruptions are enabled
+                # - trigger user speaking emulation.
+                await self.push_frame(EmulateUserStartedSpeakingFrame(), FrameDirection.UPSTREAM)
+                self._emulating_vad = True
 class LLMAssistantContextAggregator(LLMContextResponseAggregator):
-    """This is an assistant LLM aggregator that uses an LLM context to store the
-    conversation. It aggregates text frames received between
-    `LLMFullResponseStartFrame` and `LLMFullResponseEndFrame`.
+    """Assistant LLM aggregator that processes bot responses and function calls.
+    This aggregator handles the complex logic of processing assistant responses including:
+    - Text frame aggregation between response start/end markers
+    - Function call lifecycle management
+    - Context updates with timestamps
+    - Tool execution and result handling
+    - Interruption handling during responses
+    The aggregator manages function calls in progress and coordinates between
+    text generation and tool execution phases of LLM responses.
     """
     def __init__(
@@ -586,6 +804,13 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator):
         params: Optional[LLMAssistantAggregatorParams] = None,
         **kwargs,
     ):
+        """Initialize the assistant context aggregator.
+        Args:
+            context: The OpenAI LLM context for conversation storage.
+            params: Configuration parameters for aggregation behavior.
+            **kwargs: Additional arguments. Supports deprecated 'expect_stripped_words'.
+        """
         super().__init__(context=context, role="assistant", **kwargs)
         self._params = params or LLMAssistantAggregatorParams()
@@ -610,26 +835,57 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator):
         """Check if there are any function calls currently in progress.
         Returns:
-            bool: True if function calls are in progress, False otherwise
+            True if function calls are in progress, False otherwise.
         """
         return bool(self._function_calls_in_progress)
     async def handle_aggregation(self, aggregation: str):
+        """Add the aggregated assistant text to the context.
+        Args:
+            aggregation: The aggregated assistant text to add as an assistant message.
+        """
         self._context.add_message({"role": "assistant", "content": aggregation})
     async def handle_function_call_in_progress(self, frame: FunctionCallInProgressFrame):
+        """Handle a function call that is in progress.
+        Args:
+            frame: The function call in progress frame to handle.
+        """
         pass
     async def handle_function_call_result(self, frame: FunctionCallResultFrame):
+        """Handle the result of a completed function call.
+        Args:
+            frame: The function call result frame to handle.
+        """
         pass
     async def handle_function_call_cancel(self, frame: FunctionCallCancelFrame):
+        """Handle cancellation of a function call.
+        Args:
+            frame: The function call cancel frame to handle.
+        """
         pass
     async def handle_user_image_frame(self, frame: UserImageRawFrame):
+        """Handle a user image frame associated with a function call.
+        Args:
+            frame: The user image frame to handle.
+        """
         pass
     async def process_frame(self, frame: Frame, direction: FrameDirection):
+        """Process frames for assistant response aggregation and function call management.
+        Args:
+            frame: The frame to process.
+            direction: The direction of frame flow in the pipeline.
+        """
         await super().process_frame(frame, direction)
         if isinstance(frame, StartInterruptionFrame):
@@ -642,9 +898,9 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator):
         elif isinstance(frame, TextFrame):
             await self._handle_text(frame)
         elif isinstance(frame, LLMMessagesAppendFrame):
-            self.add_messages(frame.messages)
+            await self._handle_llm_messages_append(frame)
         elif isinstance(frame, LLMMessagesUpdateFrame):
-            self.set_messages(frame.messages)
+            await self._handle_llm_messages_update(frame)
         elif isinstance(frame, LLMSetToolsFrame):
             self.set_tools(frame.tools)
         elif isinstance(frame, LLMSetToolChoiceFrame):
@@ -666,6 +922,7 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator):
             await self.push_frame(frame, direction)
     async def push_aggregation(self):
+        """Push the current assistant aggregation with timestamp."""
         if not self._aggregation:
             return
@@ -682,6 +939,16 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator):
         timestamp_frame = OpenAILLMContextAssistantTimestampFrame(timestamp=time_now_iso8601())
         await self.push_frame(timestamp_frame)
+    async def _handle_llm_messages_append(self, frame: LLMMessagesAppendFrame):
+        self.add_messages(frame.messages)
+        if frame.run_llm:
+            await self.push_context_frame(FrameDirection.UPSTREAM)
+    async def _handle_llm_messages_update(self, frame: LLMMessagesUpdateFrame):
+        self.set_messages(frame.messages)
+        if frame.run_llm:
+            await self.push_context_frame(FrameDirection.UPSTREAM)
     async def _handle_interruptions(self, frame: StartInterruptionFrame):
         await self.push_aggregation()
         self._started = 0
@@ -788,13 +1055,20 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator):
     def _context_updated_task_finished(self, task: asyncio.Task):
         self._context_updated_tasks.discard(task)
-        # The task is finished so this should exit immediately. We need to do
-        # this because otherwise the task manager would report a dangling task
-        # if we don't remove it.
-        asyncio.run_coroutine_threadsafe(self.wait_for_task(task), self.get_event_loop())
 class LLMUserResponseAggregator(LLMUserContextAggregator):
+    """User response aggregator that outputs LLMMessagesFrame instead of context frames.
+    .. deprecated:: 0.0.79
+        This class is deprecated and will be removed in a future version.
+        Use `LLMUserContextAggregator` or another LLM-specific subclass instead.
+    This aggregator extends LLMUserContextAggregator but pushes LLMMessagesFrame
+    objects downstream instead of OpenAILLMContextFrame objects. This is useful
+    when you need message-based output rather than context-based output.
+    """
     def __init__(
         self,
         messages: Optional[List[dict]] = None,
@@ -802,21 +1076,42 @@ class LLMUserResponseAggregator(LLMUserContextAggregator):
         params: Optional[LLMUserAggregatorParams] = None,
         **kwargs,
     ):
+        """Initialize the user response aggregator.
+        Args:
+            messages: Initial messages for the conversation context.
+            params: Configuration parameters for aggregation behavior.
+            **kwargs: Additional arguments passed to parent class.
+        """
+        warnings.warn(
+            "LLMUserResponseAggregator is deprecated and will be removed in a future version. "
+            "Use LLMUserContextAggregator or another LLM-specific subclass instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         super().__init__(context=OpenAILLMContext(messages), params=params, **kwargs)
-    async def push_aggregation(self):
-        if len(self._aggregation) > 0:
-            await self.handle_aggregation(self._aggregation)
+    async def _process_aggregation(self):
+        """Process the current aggregation and push it downstream."""
+        aggregation = self._aggregation
+        await self.reset()
+        await self.handle_aggregation(aggregation)
+        frame = LLMMessagesFrame(self._context.messages)
+        await self.push_frame(frame)
-            # Reset the aggregation. Reset it before pushing it down, otherwise
-            # if the tasks gets cancelled we won't be able to clear things up.
-            await self.reset()
-            frame = LLMMessagesFrame(self._context.messages)
-            await self.push_frame(frame)
+class LLMAssistantResponseAggregator(LLMAssistantContextAggregator):
+    """Assistant response aggregator that outputs LLMMessagesFrame instead of context frames.
+    .. deprecated:: 0.0.79
+        This class is deprecated and will be removed in a future version.
+        Use `LLMAssistantContextAggregator` or another LLM-specific subclass instead.
+    This aggregator extends LLMAssistantContextAggregator but pushes LLMMessagesFrame
+    objects downstream instead of OpenAILLMContextFrame objects. This is useful
+    when you need message-based output rather than context-based output.
+    """
-class LLMAssistantResponseAggregator(LLMAssistantContextAggregator):
     def __init__(
         self,
         messages: Optional[List[dict]] = None,
@@ -824,9 +1119,23 @@ class LLMAssistantResponseAggregator(LLMAssistantContextAggregator):
         params: Optional[LLMAssistantAggregatorParams] = None,
         **kwargs,
     ):
+        """Initialize the assistant response aggregator.
+        Args:
+            messages: Initial messages for the conversation context.
+            params: Configuration parameters for aggregation behavior.
+            **kwargs: Additional arguments passed to parent class.
+        """
+        warnings.warn(
+            "LLMAssistantResponseAggregator is deprecated and will be removed in a future version. "
+            "Use LLMAssistantContextAggregator or another LLM-specific subclass instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         super().__init__(context=OpenAILLMContext(messages), params=params, **kwargs)
     async def push_aggregation(self):
+        """Push the aggregated assistant response as an LLMMessagesFrame."""
         if len(self._aggregation) > 0:
             await self.handle_aggregation(self._aggregation)

dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl