PyPI - dv-pipecat-ai - Versions diffs - 0.0.75.dev883__py3-none-any.whl → 0.0.82.dev19__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.75.dev883py3-none-any.whl → 0.0.82.dev19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (121) hide show

{dv_pipecat_ai-0.0.75.dev883.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/METADATA +8 -3
{dv_pipecat_ai-0.0.75.dev883.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/RECORD +121 -80
pipecat/adapters/base_llm_adapter.py +44 -6
pipecat/adapters/services/anthropic_adapter.py +302 -2
pipecat/adapters/services/aws_nova_sonic_adapter.py +40 -2
pipecat/adapters/services/bedrock_adapter.py +40 -2
pipecat/adapters/services/gemini_adapter.py +276 -6
pipecat/adapters/services/open_ai_adapter.py +88 -7
pipecat/adapters/services/open_ai_realtime_adapter.py +39 -1
pipecat/audio/dtmf/__init__.py +0 -0
pipecat/audio/dtmf/dtmf-0.wav +0 -0
pipecat/audio/dtmf/dtmf-1.wav +0 -0
pipecat/audio/dtmf/dtmf-2.wav +0 -0
pipecat/audio/dtmf/dtmf-3.wav +0 -0
pipecat/audio/dtmf/dtmf-4.wav +0 -0
pipecat/audio/dtmf/dtmf-5.wav +0 -0
pipecat/audio/dtmf/dtmf-6.wav +0 -0
pipecat/audio/dtmf/dtmf-7.wav +0 -0
pipecat/audio/dtmf/dtmf-8.wav +0 -0
pipecat/audio/dtmf/dtmf-9.wav +0 -0
pipecat/audio/dtmf/dtmf-pound.wav +0 -0
pipecat/audio/dtmf/dtmf-star.wav +0 -0
pipecat/audio/dtmf/types.py +47 -0
pipecat/audio/dtmf/utils.py +70 -0
pipecat/audio/filters/aic_filter.py +199 -0
pipecat/audio/utils.py +9 -7
pipecat/extensions/ivr/__init__.py +0 -0
pipecat/extensions/ivr/ivr_navigator.py +452 -0
pipecat/frames/frames.py +156 -43
pipecat/pipeline/llm_switcher.py +76 -0
pipecat/pipeline/parallel_pipeline.py +3 -3
pipecat/pipeline/service_switcher.py +144 -0
pipecat/pipeline/task.py +68 -28
pipecat/pipeline/task_observer.py +10 -0
pipecat/processors/aggregators/dtmf_aggregator.py +2 -2
pipecat/processors/aggregators/llm_context.py +277 -0
pipecat/processors/aggregators/llm_response.py +48 -15
pipecat/processors/aggregators/llm_response_universal.py +840 -0
pipecat/processors/aggregators/openai_llm_context.py +3 -3
pipecat/processors/dtmf_aggregator.py +0 -2
pipecat/processors/filters/stt_mute_filter.py +0 -2
pipecat/processors/frame_processor.py +18 -11
pipecat/processors/frameworks/rtvi.py +17 -10
pipecat/processors/metrics/sentry.py +2 -0
pipecat/runner/daily.py +137 -36
pipecat/runner/run.py +1 -1
pipecat/runner/utils.py +7 -7
pipecat/serializers/asterisk.py +145 -0
pipecat/serializers/exotel.py +1 -1
pipecat/serializers/plivo.py +1 -1
pipecat/serializers/telnyx.py +1 -1
pipecat/serializers/twilio.py +1 -1
pipecat/services/__init__.py +2 -2
pipecat/services/anthropic/llm.py +113 -28
pipecat/services/asyncai/tts.py +4 -0
pipecat/services/aws/llm.py +82 -8
pipecat/services/aws/tts.py +0 -10
pipecat/services/aws_nova_sonic/aws.py +5 -0
pipecat/services/azure/llm.py +77 -1
pipecat/services/cartesia/tts.py +28 -16
pipecat/services/cerebras/llm.py +15 -10
pipecat/services/deepgram/stt.py +8 -0
pipecat/services/deepseek/llm.py +13 -8
pipecat/services/elevenlabs/__init__.py +2 -0
pipecat/services/elevenlabs/stt.py +351 -0
pipecat/services/fireworks/llm.py +13 -8
pipecat/services/fish/tts.py +8 -6
pipecat/services/gemini_multimodal_live/gemini.py +5 -0
pipecat/services/gladia/config.py +7 -1
pipecat/services/gladia/stt.py +23 -15
pipecat/services/google/llm.py +159 -59
pipecat/services/google/llm_openai.py +18 -3
pipecat/services/grok/llm.py +2 -1
pipecat/services/llm_service.py +38 -3
pipecat/services/mem0/memory.py +2 -1
pipecat/services/mistral/llm.py +5 -6
pipecat/services/nim/llm.py +2 -1
pipecat/services/openai/base_llm.py +88 -26
pipecat/services/openai/image.py +6 -1
pipecat/services/openai_realtime_beta/openai.py +5 -2
pipecat/services/openpipe/llm.py +6 -8
pipecat/services/perplexity/llm.py +13 -8
pipecat/services/playht/tts.py +9 -6
pipecat/services/rime/tts.py +1 -1
pipecat/services/sambanova/llm.py +18 -13
pipecat/services/sarvam/tts.py +415 -10
pipecat/services/speechmatics/stt.py +4 -4
pipecat/services/tavus/video.py +1 -1
pipecat/services/tts_service.py +15 -5
pipecat/services/vistaar/llm.py +2 -5
pipecat/transports/base_input.py +32 -19
pipecat/transports/base_output.py +39 -5
pipecat/transports/daily/__init__.py +0 -0
pipecat/transports/daily/transport.py +2371 -0
pipecat/transports/daily/utils.py +410 -0
pipecat/transports/livekit/__init__.py +0 -0
pipecat/transports/livekit/transport.py +1042 -0
pipecat/transports/network/fastapi_websocket.py +12 -546
pipecat/transports/network/small_webrtc.py +12 -922
pipecat/transports/network/webrtc_connection.py +9 -595
pipecat/transports/network/websocket_client.py +12 -481
pipecat/transports/network/websocket_server.py +12 -487
pipecat/transports/services/daily.py +9 -2334
pipecat/transports/services/helpers/daily_rest.py +12 -396
pipecat/transports/services/livekit.py +12 -975
pipecat/transports/services/tavus.py +12 -757
pipecat/transports/smallwebrtc/__init__.py +0 -0
pipecat/transports/smallwebrtc/connection.py +612 -0
pipecat/transports/smallwebrtc/transport.py +936 -0
pipecat/transports/tavus/__init__.py +0 -0
pipecat/transports/tavus/transport.py +770 -0
pipecat/transports/websocket/__init__.py +0 -0
pipecat/transports/websocket/client.py +494 -0
pipecat/transports/websocket/fastapi.py +559 -0
pipecat/transports/websocket/server.py +500 -0
pipecat/transports/whatsapp/__init__.py +0 -0
pipecat/transports/whatsapp/api.py +345 -0
pipecat/transports/whatsapp/client.py +364 -0
{dv_pipecat_ai-0.0.75.dev883.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.75.dev883.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.75.dev883.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/top_level.txt +0 -0

pipecat/services/aws_nova_sonic/aws.py CHANGED Viewed

@@ -34,6 +34,7 @@ from pipecat.frames.frames import (
     FunctionCallFromLLM,
     InputAudioRawFrame,
     InterimTranscriptionFrame,
+    LLMContextFrame,
     LLMFullResponseEndFrame,
     LLMFullResponseStartFrame,
     LLMTextFrame,
@@ -322,6 +323,10 @@ class AWSNovaSonicLLMService(LLMService):
         if isinstance(frame, OpenAILLMContextFrame):
             await self._handle_context(frame.context)
+        elif isinstance(frame, LLMContextFrame):
+            raise NotImplementedError(
+                "Universal LLMContext is not yet supported for AWS Nova Sonic."
+            )
         elif isinstance(frame, InputAudioRawFrame):
             await self._handle_input_audio_frame(frame)
         elif isinstance(frame, BotStoppedSpeakingFrame):

pipecat/services/azure/llm.py CHANGED Viewed

@@ -1,4 +1,3 @@
-#
 # Copyright (c) 2024–2025, Daily
 #
 # SPDX-License-Identifier: BSD 2-Clause License
@@ -6,9 +5,14 @@
 """Azure OpenAI service implementation for the Pipecat AI framework."""
+from typing import Any, Dict, List, Optional
 from loguru import logger
 from openai import AsyncAzureOpenAI
+from openai._streaming import AsyncStream
+from openai.types.chat import ChatCompletionChunk, ChatCompletionMessageParam
+from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.services.openai.llm import OpenAILLMService
@@ -17,6 +21,16 @@ class AzureLLMService(OpenAILLMService):
     This service extends OpenAILLMService to connect to Azure's OpenAI endpoint while
     maintaining full compatibility with OpenAI's interface and functionality.
+    Args:
+        api_key: The API key for accessing Azure OpenAI.
+        endpoint: The Azure endpoint URL.
+        model: The model identifier to use.
+        api_version: Azure API version. Defaults to "2024-09-01-preview".
+        reasoning_effort: If provided for reasoning models, sets the effort (e.g. "minimal").
+        **kwargs: Additional keyword arguments passed to OpenAILLMService.
     """
     def __init__(
@@ -26,6 +40,7 @@ class AzureLLMService(OpenAILLMService):
         endpoint: str,
         model: str,
         api_version: str = "2024-09-01-preview",
+        reasoning_effort: Optional[str] = None,
         **kwargs,
     ):
         """Initialize the Azure LLM service.
@@ -41,6 +56,7 @@ class AzureLLMService(OpenAILLMService):
         # will call create_client() and we need those values there.
         self._endpoint = endpoint
         self._api_version = api_version
+        self._reasoning_effort = reasoning_effort
         super().__init__(api_key=api_key, model=model, **kwargs)
     def create_client(self, api_key=None, base_url=None, **kwargs):
@@ -62,3 +78,63 @@ class AzureLLMService(OpenAILLMService):
             api_version=self._api_version,
             azure_deployment=azure_deployment,
         )
+    def _is_reasoning_model(self) -> bool:
+        """Check if the current model supports reasoning parameters.
+        Based on search results:
+        - GPT-5, GPT-5-mini, and GPT-5-nano are reasoning models
+        - GPT-5-chat is a standard chat model that doesn't use reasoning by default
+        Returns:
+            True if model supports reasoning parameters.
+        """
+        model_name_lower = self.model_name.lower()
+        # Reasoning-capable models
+        reasoning_models = {"gpt-5-nano", "gpt-5", "gpt-5-mini"}
+        return model_name_lower in reasoning_models
+    async def get_chat_completions(
+        self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
+    ) -> AsyncStream[ChatCompletionChunk]:
+        """Get streaming chat completions from Azure OpenAI API.
+        Handles both reasoning and standard models according to Azure AI Foundry documentation.
+        Reasoning models use automatic chain of thought and have parameter limitations.
+        """
+        params = {
+            "model": self.model_name,
+            "stream": True,
+            "messages": messages,
+            "tools": context.tools,
+            "tool_choice": context.tool_choice,
+            "stream_options": {"include_usage": True},
+            "max_tokens": self._settings["max_tokens"],
+            "max_completion_tokens": self._settings["max_completion_tokens"],
+        }
+        if self._is_reasoning_model():
+            # Reasoning models generally do NOT support temperature, presence_penalty, top_p
+            if self._reasoning_effort:
+                params["reasoning_effort"] = self._reasoning_effort
+            if self._settings.get("seed"):
+                params["seed"] = self._settings["seed"]
+        else:
+            # Standard models support all parameters
+            params.update(
+                {
+                    "frequency_penalty": self._settings["frequency_penalty"],
+                    "presence_penalty": self._settings["presence_penalty"],
+                    "seed": self._settings["seed"],
+                    "temperature": self._settings["temperature"],
+                    "top_p": self._settings["top_p"],
+                }
+            )
+        # Add any extra parameters from settings
+        extra_params = self._settings.get("extra", {})
+        params.update(extra_params)
+        chunks = await self._client.chat.completions.create(**params)
+        return chunks

pipecat/services/cartesia/tts.py CHANGED Viewed

@@ -10,7 +10,7 @@ import base64
 import json
 import uuid
 import warnings
-from typing import AsyncGenerator, List, Optional, Union
+from typing import AsyncGenerator, List, Literal, Optional, Union
 from loguru import logger
 from pydantic import BaseModel, Field
@@ -102,7 +102,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
         Parameters:
             language: Language to use for synthesis.
-            speed: Voice speed control (string or float).
+            speed: Voice speed control.
             emotion: List of emotion controls.
                 .. deprecated:: 0.0.68
@@ -110,7 +110,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
         """
         language: Optional[Language] = Language.EN
-        speed: Optional[Union[str, float]] = ""
+        speed: Optional[Literal["slow", "normal", "fast"]] = None
         emotion: Optional[List[str]] = []
     def __init__(
@@ -272,11 +272,13 @@ class CartesiaTTSService(AudioContextWordTTSService):
         voice_config["id"] = self._voice_id
         if self._settings["emotion"]:
-            warnings.warn(
-                "The 'emotion' parameter in __experimental_controls is deprecated and will be removed in a future version.",
-                DeprecationWarning,
-                stacklevel=2,
-            )
+            with warnings.catch_warnings():
+                warnings.simplefilter("always")
+                warnings.warn(
+                    "The 'emotion' parameter in __experimental_controls is deprecated and will be removed in a future version.",
+                    DeprecationWarning,
+                    stacklevel=2,
+                )
             voice_config["__experimental_controls"] = {}
             if self._settings["emotion"]:
                 voice_config["__experimental_controls"]["emotion"] = self._settings["emotion"]
@@ -387,7 +389,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
         await self._websocket.send(msg)
         self._context_id = None
-    async def _receive_messages(self):
+    async def _process_messages(self):
         async for message in self._get_websocket():
             msg = json.loads(message)
             if not msg or not self.audio_context_available(msg["context_id"]):
@@ -421,6 +423,14 @@ class CartesiaTTSService(AudioContextWordTTSService):
             else:
                 logger.error(f"{self} error, unknown message type: {msg}")
+    async def _receive_messages(self):
+        while True:
+            await self._process_messages()
+            # Cartesia times out after 5 minutes of innactivity (no keepalive
+            # mechanism is available). So, we try to reconnect.
+            logger.debug(f"{self} Cartesia connection was disconnected (timeout?), reconnecting")
+            await self._connect_websocket()
     @traced_tts
     async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
         """Generate speech from text using Cartesia's streaming API.
@@ -472,7 +482,7 @@ class CartesiaHttpTTSService(TTSService):
         Parameters:
             language: Language to use for synthesis.
-            speed: Voice speed control (string or float).
+            speed: Voice speed control.
             emotion: List of emotion controls.
                 .. deprecated:: 0.0.68
@@ -480,7 +490,7 @@ class CartesiaHttpTTSService(TTSService):
         """
         language: Optional[Language] = Language.EN
-        speed: Optional[Union[str, float]] = ""
+        speed: Optional[Literal["slow", "normal", "fast"]] = None
         emotion: Optional[List[str]] = Field(default_factory=list)
     def __init__(
@@ -600,11 +610,13 @@ class CartesiaHttpTTSService(TTSService):
             voice_config = {"mode": "id", "id": self._voice_id}
             if self._settings["emotion"]:
-                warnings.warn(
-                    "The 'emotion' parameter in voice.__experimental_controls is deprecated and will be removed in a future version.",
-                    DeprecationWarning,
-                    stacklevel=2,
-                )
+                with warnings.catch_warnings():
+                    warnings.simplefilter("always")
+                    warnings.warn(
+                        "The 'emotion' parameter in voice.__experimental_controls is deprecated and will be removed in a future version.",
+                        DeprecationWarning,
+                        stacklevel=2,
+                    )
                 voice_config["__experimental_controls"] = {"emotion": self._settings["emotion"]}
             await self.start_ttfb_metrics()

pipecat/services/cerebras/llm.py CHANGED Viewed

@@ -9,9 +9,8 @@
 from typing import List
 from loguru import logger
-from openai.types.chat import ChatCompletionMessageParam
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams
 from pipecat.services.openai.llm import OpenAILLMService
@@ -27,7 +26,7 @@ class CerebrasLLMService(OpenAILLMService):
         *,
         api_key: str,
         base_url: str = "https://api.cerebras.ai/v1",
-        model: str = "llama-3.3-70b",
+        model: str = "gpt-oss-120b",
         **kwargs,
     ):
         """Initialize the Cerebras LLM service.
@@ -35,7 +34,7 @@ class CerebrasLLMService(OpenAILLMService):
         Args:
             api_key: The API key for accessing Cerebras's API.
             base_url: The base URL for Cerebras API. Defaults to "https://api.cerebras.ai/v1".
-            model: The model identifier to use. Defaults to "llama-3.3-70b".
+            model: The model identifier to use. Defaults to "gpt-oss-120b".
             **kwargs: Additional keyword arguments passed to OpenAILLMService.
         """
         super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
@@ -54,25 +53,31 @@ class CerebrasLLMService(OpenAILLMService):
         logger.debug(f"Creating Cerebras client with api {base_url}")
         return super().create_client(api_key, base_url, **kwargs)
-    def build_chat_completion_params(
-        self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
-    ) -> dict:
+    def build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict:
         """Build parameters for Cerebras chat completion request.
         Cerebras supports a subset of OpenAI parameters, focusing on core
         completion settings without advanced features like frequency/presence penalties.
+        Args:
+            params_from_context: Parameters, derived from the LLM context, to
+                use for the chat completion. Contains messages, tools, and tool
+                choice.
+        Returns:
+            Dictionary of parameters for the chat completion request.
         """
         params = {
             "model": self.model_name,
             "stream": True,
-            "messages": messages,
-            "tools": context.tools,
-            "tool_choice": context.tool_choice,
             "seed": self._settings["seed"],
             "temperature": self._settings["temperature"],
             "top_p": self._settings["top_p"],
             "max_completion_tokens": self._settings["max_completion_tokens"],
         }
+        # Messages, tools, tool_choice
+        params.update(params_from_context)
         params.update(self._settings["extra"])
         return params

pipecat/services/deepgram/stt.py CHANGED Viewed

@@ -276,6 +276,14 @@ class DeepgramSTTService(STTService):
     async def _disconnect(self):
         if self._connection.is_connected:
             self.logger.debug("Disconnecting from Deepgram")
+            # Deepgram swallows asyncio.CancelledError internally which prevents
+            # proper cancellation propagation. This issue was found with
+            # parallel pipelines where `CancelFrame` was not awaited for to
+            # finish in all branches and it was pushed downstream reaching the
+            # end of the pipeline, which caused `cleanup()` to be called while
+            # Deepgram disconnection was still finishing and therefore
+            # preventing the task cancellation that occurs during `cleanup()`.
+            # GH issue: https://github.com/deepgram/deepgram-python-sdk/issues/570
             await self._connection.finish()
     async def start_metrics(self):

pipecat/services/deepseek/llm.py CHANGED Viewed

@@ -9,9 +9,8 @@
 from typing import List
 from loguru import logger
-from openai.types.chat import ChatCompletionMessageParam
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams
 from pipecat.services.openai.llm import OpenAILLMService
@@ -54,19 +53,22 @@ class DeepSeekLLMService(OpenAILLMService):
         logger.debug(f"Creating DeepSeek client with api {base_url}")
         return super().create_client(api_key, base_url, **kwargs)
-    def _build_chat_completion_params(
-        self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
-    ) -> dict:
+    def _build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict:
         """Build parameters for DeepSeek chat completion request.
         DeepSeek doesn't support some OpenAI parameters like seed and max_completion_tokens.
+        Args:
+            params_from_context: Parameters, derived from the LLM context, to
+                use for the chat completion. Contains messages, tools, and tool
+                choice.
+        Returns:
+            Dictionary of parameters for the chat completion request.
         """
         params = {
             "model": self.model_name,
             "stream": True,
-            "messages": messages,
-            "tools": context.tools,
-            "tool_choice": context.tool_choice,
             "stream_options": {"include_usage": True},
             "frequency_penalty": self._settings["frequency_penalty"],
             "presence_penalty": self._settings["presence_penalty"],
@@ -75,5 +77,8 @@ class DeepSeekLLMService(OpenAILLMService):
             "max_tokens": self._settings["max_tokens"],
         }
+        # Messages, tools, tool_choice
+        params.update(params_from_context)
         params.update(self._settings["extra"])
         return params

pipecat/services/elevenlabs/__init__.py CHANGED Viewed

@@ -9,5 +9,7 @@ import sys
 from pipecat.services import DeprecatedModuleProxy
 from .tts import *
+from .stt import *
+# Old
 sys.modules[__name__] = DeprecatedModuleProxy(globals(), "elevenlabs", "elevenlabs.tts")

dv-pipecat-ai 0.0.75.dev883__py3-none-any.whl → 0.0.82.dev19__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.75.dev883py3-none-any.whl → 0.0.82.dev19py3-none-any.whl