PyPI - dv-pipecat-ai - Versions diffs - 0.0.82.dev815__py3-none-any.whl → 0.0.82.dev857__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.82.dev815py3-none-any.whl → 0.0.82.dev857py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (106) hide show

{dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/METADATA +8 -3
{dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/RECORD +106 -79
pipecat/adapters/base_llm_adapter.py +44 -6
pipecat/adapters/services/anthropic_adapter.py +302 -2
pipecat/adapters/services/aws_nova_sonic_adapter.py +40 -2
pipecat/adapters/services/bedrock_adapter.py +40 -2
pipecat/adapters/services/gemini_adapter.py +276 -6
pipecat/adapters/services/open_ai_adapter.py +88 -7
pipecat/adapters/services/open_ai_realtime_adapter.py +39 -1
pipecat/audio/dtmf/__init__.py +0 -0
pipecat/audio/dtmf/types.py +47 -0
pipecat/audio/dtmf/utils.py +70 -0
pipecat/audio/filters/aic_filter.py +199 -0
pipecat/audio/utils.py +9 -7
pipecat/extensions/ivr/__init__.py +0 -0
pipecat/extensions/ivr/ivr_navigator.py +452 -0
pipecat/frames/frames.py +156 -43
pipecat/pipeline/llm_switcher.py +76 -0
pipecat/pipeline/parallel_pipeline.py +3 -3
pipecat/pipeline/service_switcher.py +144 -0
pipecat/pipeline/task.py +68 -28
pipecat/pipeline/task_observer.py +10 -0
pipecat/processors/aggregators/dtmf_aggregator.py +2 -2
pipecat/processors/aggregators/llm_context.py +277 -0
pipecat/processors/aggregators/llm_response.py +48 -15
pipecat/processors/aggregators/llm_response_universal.py +840 -0
pipecat/processors/aggregators/openai_llm_context.py +3 -3
pipecat/processors/dtmf_aggregator.py +0 -2
pipecat/processors/filters/stt_mute_filter.py +0 -2
pipecat/processors/frame_processor.py +18 -11
pipecat/processors/frameworks/rtvi.py +17 -10
pipecat/processors/metrics/sentry.py +2 -0
pipecat/runner/daily.py +137 -36
pipecat/runner/run.py +1 -1
pipecat/runner/utils.py +7 -7
pipecat/serializers/asterisk.py +20 -4
pipecat/serializers/exotel.py +1 -1
pipecat/serializers/plivo.py +1 -1
pipecat/serializers/telnyx.py +1 -1
pipecat/serializers/twilio.py +1 -1
pipecat/services/__init__.py +2 -2
pipecat/services/anthropic/llm.py +113 -28
pipecat/services/asyncai/tts.py +4 -0
pipecat/services/aws/llm.py +82 -8
pipecat/services/aws/tts.py +0 -10
pipecat/services/aws_nova_sonic/aws.py +5 -0
pipecat/services/cartesia/tts.py +28 -16
pipecat/services/cerebras/llm.py +15 -10
pipecat/services/deepgram/stt.py +8 -0
pipecat/services/deepseek/llm.py +13 -8
pipecat/services/fireworks/llm.py +13 -8
pipecat/services/fish/tts.py +8 -6
pipecat/services/gemini_multimodal_live/gemini.py +5 -0
pipecat/services/gladia/config.py +7 -1
pipecat/services/gladia/stt.py +23 -15
pipecat/services/google/llm.py +159 -59
pipecat/services/google/llm_openai.py +18 -3
pipecat/services/grok/llm.py +2 -1
pipecat/services/llm_service.py +38 -3
pipecat/services/mem0/memory.py +2 -1
pipecat/services/mistral/llm.py +5 -6
pipecat/services/nim/llm.py +2 -1
pipecat/services/openai/base_llm.py +88 -26
pipecat/services/openai/image.py +6 -1
pipecat/services/openai_realtime_beta/openai.py +5 -2
pipecat/services/openpipe/llm.py +6 -8
pipecat/services/perplexity/llm.py +13 -8
pipecat/services/playht/tts.py +9 -6
pipecat/services/rime/tts.py +1 -1
pipecat/services/sambanova/llm.py +18 -13
pipecat/services/sarvam/tts.py +415 -10
pipecat/services/speechmatics/stt.py +2 -2
pipecat/services/tavus/video.py +1 -1
pipecat/services/tts_service.py +15 -5
pipecat/services/vistaar/llm.py +2 -5
pipecat/transports/base_input.py +32 -19
pipecat/transports/base_output.py +39 -5
pipecat/transports/daily/__init__.py +0 -0
pipecat/transports/daily/transport.py +2371 -0
pipecat/transports/daily/utils.py +410 -0
pipecat/transports/livekit/__init__.py +0 -0
pipecat/transports/livekit/transport.py +1042 -0
pipecat/transports/network/fastapi_websocket.py +12 -546
pipecat/transports/network/small_webrtc.py +12 -922
pipecat/transports/network/webrtc_connection.py +9 -595
pipecat/transports/network/websocket_client.py +12 -481
pipecat/transports/network/websocket_server.py +12 -487
pipecat/transports/services/daily.py +9 -2334
pipecat/transports/services/helpers/daily_rest.py +12 -396
pipecat/transports/services/livekit.py +12 -975
pipecat/transports/services/tavus.py +12 -757
pipecat/transports/smallwebrtc/__init__.py +0 -0
pipecat/transports/smallwebrtc/connection.py +612 -0
pipecat/transports/smallwebrtc/transport.py +936 -0
pipecat/transports/tavus/__init__.py +0 -0
pipecat/transports/tavus/transport.py +770 -0
pipecat/transports/websocket/__init__.py +0 -0
pipecat/transports/websocket/client.py +494 -0
pipecat/transports/websocket/fastapi.py +559 -0
pipecat/transports/websocket/server.py +500 -0
pipecat/transports/whatsapp/__init__.py +0 -0
pipecat/transports/whatsapp/api.py +345 -0
pipecat/transports/whatsapp/client.py +364 -0
{dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/top_level.txt +0 -0

pipecat/pipeline/task.py CHANGED Viewed

@@ -32,15 +32,11 @@ from pipecat.frames.frames import (
     Frame,
     HeartbeatFrame,
     InputAudioRawFrame,
-    InterimTranscriptionFrame,
-    LLMFullResponseEndFrame,
     MetricsFrame,
     StartFrame,
     StopFrame,
     StopTaskFrame,
-    TranscriptionFrame,
-    UserStartedSpeakingFrame,
-    UserStoppedSpeakingFrame,
+    UserSpeakingFrame,
 )
 from pipecat.metrics.metrics import ProcessingMetricsData, TTFBMetricsData
 from pipecat.observers.base_observer import BaseObserver
@@ -53,8 +49,12 @@ from pipecat.utils.asyncio.task_manager import BaseTaskManager, TaskManager, Tas
 from pipecat.utils.tracing.setup import is_tracing_available
 from pipecat.utils.tracing.turn_trace_observer import TurnTraceObserver
-HEARTBEAT_SECONDS = 1.0
-HEARTBEAT_MONITOR_SECONDS = HEARTBEAT_SECONDS * 10
+HEARTBEAT_SECS = 1.0
+HEARTBEAT_MONITOR_SECS = HEARTBEAT_SECS * 10
+IDLE_TIMEOUT_SECS = 300
+CANCEL_TIMEOUT_SECS = 20.0
 class PipelineParams(BaseModel):
@@ -91,7 +91,7 @@ class PipelineParams(BaseModel):
     enable_heartbeats: bool = False
     enable_metrics: bool = False
     enable_usage_metrics: bool = False
-    heartbeats_period_secs: float = HEARTBEAT_SECONDS
+    heartbeats_period_secs: float = HEARTBEAT_SECS
     interruption_strategies: List[BaseInterruptionStrategy] = Field(default_factory=list)
     observers: List[BaseObserver] = Field(default_factory=list)
     report_only_initial_ttfb: bool = False
@@ -135,20 +135,14 @@ class PipelineTask(BasePipelineTask):
         params: Optional[PipelineParams] = None,
         additional_span_attributes: Optional[dict] = None,
         cancel_on_idle_timeout: bool = True,
+        cancel_timeout_secs: float = CANCEL_TIMEOUT_SECS,
         check_dangling_tasks: bool = True,
         clock: Optional[BaseClock] = None,
         conversation_id: Optional[str] = None,
         enable_tracing: bool = False,
         enable_turn_tracking: bool = True,
-        idle_timeout_frames: Tuple[Type[Frame], ...] = (
-            BotSpeakingFrame,
-            InterimTranscriptionFrame,
-            LLMFullResponseEndFrame,
-            TranscriptionFrame,
-            UserStartedSpeakingFrame,
-            UserStoppedSpeakingFrame,
-        ),
-        idle_timeout_secs: Optional[float] = 300,
+        idle_timeout_frames: Tuple[Type[Frame], ...] = (BotSpeakingFrame, UserSpeakingFrame),
+        idle_timeout_secs: Optional[float] = IDLE_TIMEOUT_SECS,
         observers: Optional[List[BaseObserver]] = None,
         task_manager: Optional[BaseTaskManager] = None,
     ):
@@ -161,6 +155,8 @@ class PipelineTask(BasePipelineTask):
                 OpenTelemetry conversation span attributes.
             cancel_on_idle_timeout: Whether the pipeline task should be cancelled if
                 the idle timeout is reached.
+            cancel_timeout_secs: Timeout (in seconds) to wait for cancellation to happen
+                cleanly.
             check_dangling_tasks: Whether to check for processors' tasks finishing properly.
             clock: Clock implementation for timing operations.
             conversation_id: Optional custom ID for the conversation.
@@ -178,6 +174,7 @@ class PipelineTask(BasePipelineTask):
         self._params = params or PipelineParams()
         self._additional_span_attributes = additional_span_attributes or {}
         self._cancel_on_idle_timeout = cancel_on_idle_timeout
+        self._cancel_timeout_secs = cancel_timeout_secs
         self._check_dangling_tasks = check_dangling_tasks
         self._clock = clock or SystemClock()
         self._conversation_id = conversation_id
@@ -228,8 +225,13 @@ class PipelineTask(BasePipelineTask):
         # idle.
         self._idle_queue = asyncio.Queue()
         self._idle_monitor_task: Optional[asyncio.Task] = None
+        # This event is used to indicate the StartFrame has been received at the
+        # end of the pipeline.
+        self._pipeline_start_event = asyncio.Event()
         # This event is used to indicate a finalize frame (e.g. EndFrame,
-        # StopFrame) has been received in the down queue.
+        # StopFrame) has been received at the end of the pipeline.
         self._pipeline_end_event = asyncio.Event()
         # This is the final pipeline. It is composed of a source processor,
@@ -394,12 +396,13 @@ class PipelineTask(BasePipelineTask):
             # `PipelineRunner` which will call `PipelineTask.cancel()` and
             # therefore becoming case (2).
             if self._finished or self._cancelled:
-                logger.debug(f"Pipeline task {self} has finished, cleaning up resources")
+                logger.debug(f"Pipeline task {self} is finishing cleanup...")
                 await self._cancel_tasks()
                 await self._cleanup(cleanup_pipeline)
                 if self._check_dangling_tasks:
                     self._print_dangling_tasks()
                 self._finished = True
+                logger.debug(f"Pipeline task {self} has finished")
     async def queue_frame(self, frame: Frame):
         """Queue a single frame to be pushed down the pipeline.
@@ -427,12 +430,13 @@ class PipelineTask(BasePipelineTask):
         if not self._cancelled:
             logger.debug(f"Canceling pipeline task {self}", call_id=self._conversation_id)
             self._cancelled = True
+            cancel_frame = CancelFrame()
             # Make sure everything is cleaned up downstream. This is sent
             # out-of-band from the main streaming task which is what we want since
             # we want to cancel right away.
-            await self._pipeline.queue_frame(CancelFrame())
-            # Wait for CancelFrame to make it throught the pipeline.
-            await self._wait_for_pipeline_end()
+            await self._pipeline.queue_frame(cancel_frame)
+            # Wait for CancelFrame to make it through the pipeline.
+            await self._wait_for_pipeline_end(cancel_frame)
             # Only cancel the push task, we don't want to be able to process any
             # other frame after cancel. Everything else will be cancelled in
             # run().
@@ -506,9 +510,37 @@ class PipelineTask(BasePipelineTask):
             data.append(ProcessingMetricsData(processor=p.name, value=0.0))
         return MetricsFrame(data=data)
-    async def _wait_for_pipeline_end(self):
-        """Wait for the pipeline to signal completion."""
-        await self._pipeline_end_event.wait()
+    async def _wait_for_pipeline_start(self, frame: Frame):
+        """Wait for the specified start frame to reach the end of the pipeline."""
+        logger.debug(f"{self}: Starting. Waiting for {frame} to reach the end of the pipeline...")
+        await self._pipeline_start_event.wait()
+        self._pipeline_start_event.clear()
+        logger.debug(f"{self}: {frame} reached the end of the pipeline, pipeline is now ready.")
+    async def _wait_for_pipeline_end(self, frame: Frame):
+        """Wait for the specified frame to reach the end of the pipeline."""
+        async def wait_for_cancel():
+            try:
+                await asyncio.wait_for(
+                    self._pipeline_end_event.wait(), timeout=self._cancel_timeout_secs
+                )
+                logger.debug(f"{self}: {frame} reached the end of the pipeline.")
+            except asyncio.TimeoutError:
+                logger.warning(
+                    f"{self}: timeout waiting for {frame} to reach the end of the pipeline (being blocked somewhere?)."
+                )
+            finally:
+                await self._call_event_handler("on_pipeline_cancelled", frame)
+        logger.debug(f"{self}: Closing. Waiting for {frame} to reach the end of the pipeline...")
+        if isinstance(frame, CancelFrame):
+            await wait_for_cancel()
+        else:
+            await self._pipeline_end_event.wait()
+            logger.debug(f"{self}: {frame} reached the end of the pipeline, pipeline is closing.")
         self._pipeline_end_event.clear()
     async def _setup(self, params: PipelineTaskParams):
@@ -528,6 +560,10 @@ class PipelineTask(BasePipelineTask):
         # Cleanup base object.
         await self.cleanup()
+        # Cleanup observers.
+        if self._observer:
+            await self._observer.cleanup()
         # End conversation tracing if it's active - this will also close any active turn span
         if self._enable_tracing and hasattr(self, "_turn_trace_observer"):
             self._turn_trace_observer.end_conversation_tracing()
@@ -560,6 +596,9 @@ class PipelineTask(BasePipelineTask):
         start_frame.metadata = self._params.start_metadata
         await self._pipeline.queue_frame(start_frame)
+        # Wait for the pipeline to be started before pushing any other frame.
+        await self._wait_for_pipeline_start(start_frame)
         if self._params.enable_metrics and self._params.send_initial_empty_metrics:
             await self._pipeline.queue_frame(self._initial_metrics_frame())
@@ -569,7 +608,7 @@ class PipelineTask(BasePipelineTask):
             frame = await self._push_queue.get()
             await self._pipeline.queue_frame(frame)
             if isinstance(frame, (CancelFrame, EndFrame, StopFrame)):
-                await self._wait_for_pipeline_end()
+                await self._wait_for_pipeline_end(frame)
             running = not isinstance(frame, (CancelFrame, EndFrame, StopFrame))
             cleanup_pipeline = not isinstance(frame, StopFrame)
             self._push_queue.task_done()
@@ -626,6 +665,8 @@ class PipelineTask(BasePipelineTask):
             # Start heartbeat tasks now that StartFrame has been processed
             # by all processors in the pipeline
             self._maybe_start_heartbeat_tasks()
+            self._pipeline_start_event.set()
         elif isinstance(frame, EndFrame):
             await self._call_event_handler("on_pipeline_ended", frame)
             self._pipeline_end_event.set()
@@ -633,7 +674,6 @@ class PipelineTask(BasePipelineTask):
             await self._call_event_handler("on_pipeline_stopped", frame)
             self._pipeline_end_event.set()
         elif isinstance(frame, CancelFrame):
-            await self._call_event_handler("on_pipeline_cancelled", frame)
             self._pipeline_end_event.set()
         elif isinstance(frame, HeartbeatFrame):
             await self._heartbeat_queue.put(frame)
@@ -655,7 +695,7 @@ class PipelineTask(BasePipelineTask):
         the time that a heartbeat frame takes to processes, that is how long it
         takes for the heartbeat frame to traverse all the pipeline.
         """
-        wait_time = HEARTBEAT_MONITOR_SECONDS
+        wait_time = HEARTBEAT_MONITOR_SECS
         while True:
             try:
                 frame = await asyncio.wait_for(self._heartbeat_queue.get(), timeout=wait_time)

pipecat/pipeline/task_observer.py CHANGED Viewed

@@ -119,6 +119,16 @@ class TaskObserver(BaseObserver):
         for proxy in self._proxies.values():
             await self._task_manager.cancel_task(proxy.task)
+    async def cleanup(self):
+        """Cleanup all proxy observers."""
+        await super().cleanup()
+        if not self._proxies:
+            return
+        for proxy in self._proxies:
+            await proxy.cleanup()
     async def on_process_frame(self, data: FramePushed):
         """Queue frame data for all managed observers.

pipecat/processors/aggregators/dtmf_aggregator.py CHANGED Viewed

@@ -14,13 +14,13 @@ for downstream processing by LLM context aggregators.
 import asyncio
 from typing import Optional
+from pipecat.audio.dtmf.types import KeypadEntry
 from pipecat.frames.frames import (
     BotInterruptionFrame,
     CancelFrame,
     EndFrame,
     Frame,
     InputDTMFFrame,
-    KeypadEntry,
     StartFrame,
     TranscriptionFrame,
 )
@@ -103,7 +103,7 @@ class DTMFAggregator(FrameProcessor):
         digit_value = frame.button.value
         self._aggregation += digit_value
-        # For first digit, schedule interruption in separate task
+        # For first digit, schedule interruption.
         if is_first_digit:
             await self.push_frame(BotInterruptionFrame(), FrameDirection.UPSTREAM)

pipecat/processors/aggregators/llm_context.py ADDED Viewed

@@ -0,0 +1,277 @@
+#
+# Copyright (c) 2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+"""Universal LLM context management for LLM services in Pipecat.
+Context contents are represented in a universal format (based on OpenAI)
+that supports a union of known Pipecat LLM service functionality.
+Whenever an LLM service needs to access context, it does a just-in-time
+translation from this universal context into whatever format it needs, using a
+service-specific adapter.
+"""
+import base64
+import io
+from dataclasses import dataclass
+from typing import Any, List, Optional, TypeAlias, Union
+from loguru import logger
+from openai._types import NOT_GIVEN as OPEN_AI_NOT_GIVEN
+from openai._types import NotGiven as OpenAINotGiven
+from openai.types.chat import (
+    ChatCompletionMessageParam,
+    ChatCompletionToolChoiceOptionParam,
+)
+from PIL import Image
+from pipecat.adapters.schemas.tools_schema import ToolsSchema
+from pipecat.frames.frames import AudioRawFrame
+# "Re-export" types from OpenAI that we're using as universal context types.
+# NOTE: if universal message types need to someday diverge from OpenAI's, we
+# should consider managing our own definitions. But we should do so carefully,
+# as the OpenAI messages are somewhat of a standard and we want to continue
+# supporting them.
+LLMStandardMessage = ChatCompletionMessageParam
+LLMContextToolChoice = ChatCompletionToolChoiceOptionParam
+NOT_GIVEN = OPEN_AI_NOT_GIVEN
+NotGiven = OpenAINotGiven
+@dataclass
+class LLMSpecificMessage:
+    """A container for a context message that is specific to a particular LLM service.
+    Enables the use of service-specific message types while maintaining
+    compatibility with the universal LLM context format.
+    """
+    llm: str
+    message: Any
+LLMContextMessage: TypeAlias = Union[LLMStandardMessage, LLMSpecificMessage]
+class LLMContext:
+    """Manages conversation context for LLM interactions.
+    Handles message history, tool definitions, tool choices, and multimedia
+    content for LLM conversations. Provides methods for message manipulation,
+    and content formatting.
+    """
+    def __init__(
+        self,
+        messages: Optional[List[LLMContextMessage]] = None,
+        tools: ToolsSchema | NotGiven = NOT_GIVEN,
+        tool_choice: LLMContextToolChoice | NotGiven = NOT_GIVEN,
+    ):
+        """Initialize the LLM context.
+        Args:
+            messages: Initial list of conversation messages.
+            tools: Available tools for the LLM to use.
+            tool_choice: Tool selection strategy for the LLM.
+        """
+        self._messages: List[LLMContextMessage] = messages if messages else []
+        self._tools: ToolsSchema | NotGiven = LLMContext._normalize_and_validate_tools(tools)
+        self._tool_choice: LLMContextToolChoice | NotGiven = tool_choice
+    def get_messages(self, llm_specific_filter: Optional[str] = None) -> List[LLMContextMessage]:
+        """Get the current messages list.
+        Args:
+            llm_specific_filter: Optional filter to return LLM-specific
+                messages for the given LLM, in addition to the standard
+                messages. If messages end up being filtered, an error will be
+                logged.
+        Returns:
+            List of conversation messages.
+        """
+        if llm_specific_filter is None:
+            return self._messages
+        filtered_messages = [
+            msg
+            for msg in self._messages
+            if not isinstance(msg, LLMSpecificMessage) or msg.llm == llm_specific_filter
+        ]
+        if len(filtered_messages) < len(self._messages):
+            logger.error(
+                f"Attempted to use incompatible LLMSpecificMessages with LLM '{llm_specific_filter}'."
+            )
+        return filtered_messages
+    @property
+    def tools(self) -> ToolsSchema | NotGiven:
+        """Get the tools list.
+        Returns:
+            Tools list.
+        """
+        return self._tools
+    @property
+    def tool_choice(self) -> LLMContextToolChoice | NotGiven:
+        """Get the current tool choice setting.
+        Returns:
+            The tool choice configuration.
+        """
+        return self._tool_choice
+    def add_message(self, message: LLMContextMessage):
+        """Add a single message to the context.
+        Args:
+            message: The message to add to the conversation history.
+        """
+        self._messages.append(message)
+    def add_messages(self, messages: List[LLMContextMessage]):
+        """Add multiple messages to the context.
+        Args:
+            messages: List of messages to add to the conversation history.
+        """
+        self._messages.extend(messages)
+    def set_messages(self, messages: List[LLMContextMessage]):
+        """Replace all messages in the context.
+        Args:
+            messages: New list of messages to replace the current history.
+        """
+        self._messages[:] = messages
+    def set_tools(self, tools: ToolsSchema | NotGiven = NOT_GIVEN):
+        """Set the available tools for the LLM.
+        Args:
+            tools: A ToolsSchema or NOT_GIVEN to disable tools.
+        """
+        self._tools = LLMContext._normalize_and_validate_tools(tools)
+    def set_tool_choice(self, tool_choice: LLMContextToolChoice | NotGiven):
+        """Set the tool choice configuration.
+        Args:
+            tool_choice: Tool selection strategy for the LLM.
+        """
+        self._tool_choice = tool_choice
+    def add_image_frame_message(
+        self, *, format: str, size: tuple[int, int], image: bytes, text: str = None
+    ):
+        """Add a message containing an image frame.
+        Args:
+            format: Image format (e.g., 'RGB', 'RGBA').
+            size: Image dimensions as (width, height) tuple.
+            image: Raw image bytes.
+            text: Optional text to include with the image.
+        """
+        buffer = io.BytesIO()
+        Image.frombytes(format, size, image).save(buffer, format="JPEG")
+        encoded_image = base64.b64encode(buffer.getvalue()).decode("utf-8")
+        content = []
+        if text:
+            content.append({"type": "text", "text": text})
+        content.append(
+            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}},
+        )
+        self.add_message({"role": "user", "content": content})
+    def add_audio_frames_message(
+        self, *, audio_frames: list[AudioRawFrame], text: str = "Audio follows"
+    ):
+        """Add a message containing audio frames.
+        Args:
+            audio_frames: List of audio frame objects to include.
+            text: Optional text to include with the audio.
+        """
+        if not audio_frames:
+            return
+        sample_rate = audio_frames[0].sample_rate
+        num_channels = audio_frames[0].num_channels
+        content = []
+        content.append({"type": "text", "text": text})
+        data = b"".join(frame.audio for frame in audio_frames)
+        data = bytes(
+            self._create_wav_header(
+                sample_rate,
+                num_channels,
+                16,
+                len(data),
+            )
+            + data
+        )
+        encoded_audio = base64.b64encode(data).decode("utf-8")
+        content.append(
+            {
+                "type": "input_audio",
+                "input_audio": {"data": encoded_audio, "format": "wav"},
+            }
+        )
+        self.add_message({"role": "user", "content": content})
+    def _create_wav_header(self, sample_rate, num_channels, bits_per_sample, data_size):
+        """Create a WAV file header for audio data.
+        Args:
+            sample_rate: Audio sample rate in Hz.
+            num_channels: Number of audio channels.
+            bits_per_sample: Bits per audio sample.
+            data_size: Size of audio data in bytes.
+        Returns:
+            WAV header as a bytearray.
+        """
+        # RIFF chunk descriptor
+        header = bytearray()
+        header.extend(b"RIFF")  # ChunkID
+        header.extend((data_size + 36).to_bytes(4, "little"))  # ChunkSize: total size - 8
+        header.extend(b"WAVE")  # Format
+        # "fmt " sub-chunk
+        header.extend(b"fmt ")  # Subchunk1ID
+        header.extend((16).to_bytes(4, "little"))  # Subchunk1Size (16 for PCM)
+        header.extend((1).to_bytes(2, "little"))  # AudioFormat (1 for PCM)
+        header.extend(num_channels.to_bytes(2, "little"))  # NumChannels
+        header.extend(sample_rate.to_bytes(4, "little"))  # SampleRate
+        # Calculate byte rate and block align
+        byte_rate = sample_rate * num_channels * (bits_per_sample // 8)
+        block_align = num_channels * (bits_per_sample // 8)
+        header.extend(byte_rate.to_bytes(4, "little"))  # ByteRate
+        header.extend(block_align.to_bytes(2, "little"))  # BlockAlign
+        header.extend(bits_per_sample.to_bytes(2, "little"))  # BitsPerSample
+        # "data" sub-chunk
+        header.extend(b"data")  # Subchunk2ID
+        header.extend(data_size.to_bytes(4, "little"))  # Subchunk2Size
+        return header
+    @staticmethod
+    def _normalize_and_validate_tools(tools: ToolsSchema | NotGiven) -> ToolsSchema | NotGiven:
+        """Normalize and validate the given tools.
+        Raises:
+            TypeError: If tools are not a ToolsSchema or NotGiven.
+        """
+        if isinstance(tools, ToolsSchema):
+            if not tools.standard_tools and not tools.custom_tools:
+                return NOT_GIVEN
+            return tools
+        elif tools is NOT_GIVEN:
+            return NOT_GIVEN
+        else:
+            raise TypeError(
+                f"In LLMContext, tools must be a ToolsSchema object or NOT_GIVEN. Got type: {type(tools)}",
+            )

pipecat/processors/aggregators/llm_response.py CHANGED Viewed

@@ -13,7 +13,6 @@ LLM processing, and text-to-speech components in conversational AI pipelines.
 import asyncio
 import time
-import warnings
 from abc import abstractmethod
 from dataclasses import dataclass
 from typing import Dict, List, Literal, Optional, Set
@@ -43,6 +42,7 @@ from pipecat.frames.frames import (
     LLMMessagesAppendFrame,
     LLMMessagesFrame,
     LLMMessagesUpdateFrame,
+    LLMRunFrame,
     LLMSetToolChoiceFrame,
     LLMSetToolsFrame,
     LLMTextFrame,
@@ -320,9 +320,24 @@ class LLMContextResponseAggregator(BaseLLMResponseAggregator):
     def get_context_frame(self) -> OpenAILLMContextFrame:
         """Create a context frame with the current context.
+        .. deprecated:: 0.0.82
+            This method is deprecated and will be removed in a future version.
         Returns:
-            OpenAILLMContextFrame containing the current context.
+            LLMContextFrame containing the current context.
         """
+        import warnings
+        with warnings.catch_warnings():
+            warnings.simplefilter("always")
+            warnings.warn(
+                "get_context_frame() is deprecated and will be removed in a future version. To trigger an LLM response, use LLMRunFrame instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+        return self._get_context_frame()
+    def _get_context_frame(self) -> OpenAILLMContextFrame:
         return OpenAILLMContextFrame(context=self._context)
     async def push_context_frame(self, direction: FrameDirection = FrameDirection.DOWNSTREAM):
@@ -331,7 +346,7 @@ class LLMContextResponseAggregator(BaseLLMResponseAggregator):
         Args:
             direction: The direction to push the frame (upstream or downstream).
         """
-        frame = self.get_context_frame()
+        frame = self._get_context_frame()
         await self.push_frame(frame, direction)
     def add_messages(self, messages):
@@ -491,6 +506,8 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
             await self._handle_transcription(frame)
         elif isinstance(frame, InterimTranscriptionFrame):
             await self._handle_interim_transcription(frame)
+        elif isinstance(frame, LLMRunFrame):
+            await self._handle_llm_run(frame)
         elif isinstance(frame, LLMMessagesAppendFrame):
             await self._handle_llm_messages_append(frame)
         elif isinstance(frame, LLMMessagesUpdateFrame):
@@ -583,6 +600,9 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
     async def _cancel(self, frame: CancelFrame):
         await self._cancel_aggregation_task()
+    async def _handle_llm_run(self, frame: LLMRunFrame):
+        await self.push_context_frame()
     async def _handle_llm_messages_append(self, frame: LLMMessagesAppendFrame):
         self.add_messages(frame.messages)
         if frame.run_llm:
@@ -897,6 +917,8 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator):
             await self._handle_llm_end(frame)
         elif isinstance(frame, TextFrame):
             await self._handle_text(frame)
+        elif isinstance(frame, LLMRunFrame):
+            await self._handle_llm_run(frame)
         elif isinstance(frame, LLMMessagesAppendFrame):
             await self._handle_llm_messages_append(frame)
         elif isinstance(frame, LLMMessagesUpdateFrame):
@@ -939,6 +961,9 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator):
         timestamp_frame = OpenAILLMContextAssistantTimestampFrame(timestamp=time_now_iso8601())
         await self.push_frame(timestamp_frame)
+    async def _handle_llm_run(self, frame: LLMRunFrame):
+        await self.push_context_frame(FrameDirection.UPSTREAM)
     async def _handle_llm_messages_append(self, frame: LLMMessagesAppendFrame):
         self.add_messages(frame.messages)
         if frame.run_llm:
@@ -1083,12 +1108,16 @@ class LLMUserResponseAggregator(LLMUserContextAggregator):
             params: Configuration parameters for aggregation behavior.
             **kwargs: Additional arguments passed to parent class.
         """
-        warnings.warn(
-            "LLMUserResponseAggregator is deprecated and will be removed in a future version. "
-            "Use LLMUserContextAggregator or another LLM-specific subclass instead.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
+        import warnings
+        with warnings.catch_warnings():
+            warnings.simplefilter("always")
+            warnings.warn(
+                "LLMUserResponseAggregator is deprecated and will be removed in a future version. "
+                "Use LLMUserContextAggregator or another LLM-specific subclass instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         super().__init__(context=OpenAILLMContext(messages), params=params, **kwargs)
     async def _process_aggregation(self):
@@ -1126,12 +1155,16 @@ class LLMAssistantResponseAggregator(LLMAssistantContextAggregator):
             params: Configuration parameters for aggregation behavior.
             **kwargs: Additional arguments passed to parent class.
         """
-        warnings.warn(
-            "LLMAssistantResponseAggregator is deprecated and will be removed in a future version. "
-            "Use LLMAssistantContextAggregator or another LLM-specific subclass instead.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
+        import warnings
+        with warnings.catch_warnings():
+            warnings.simplefilter("always")
+            warnings.warn(
+                "LLMAssistantResponseAggregator is deprecated and will be removed in a future version. "
+                "Use LLMAssistantContextAggregator or another LLM-specific subclass instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         super().__init__(context=OpenAILLMContext(messages), params=params, **kwargs)
     async def push_aggregation(self):

dv-pipecat-ai 0.0.82.dev815__py3-none-any.whl → 0.0.82.dev857__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.82.dev815py3-none-any.whl → 0.0.82.dev857py3-none-any.whl