PyPI - dv-pipecat-ai - Versions diffs - 0.0.82.dev815__py3-none-any.whl → 0.0.82.dev857__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.82.dev815py3-none-any.whl → 0.0.82.dev857py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (106) hide show

{dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/METADATA +8 -3
{dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/RECORD +106 -79
pipecat/adapters/base_llm_adapter.py +44 -6
pipecat/adapters/services/anthropic_adapter.py +302 -2
pipecat/adapters/services/aws_nova_sonic_adapter.py +40 -2
pipecat/adapters/services/bedrock_adapter.py +40 -2
pipecat/adapters/services/gemini_adapter.py +276 -6
pipecat/adapters/services/open_ai_adapter.py +88 -7
pipecat/adapters/services/open_ai_realtime_adapter.py +39 -1
pipecat/audio/dtmf/__init__.py +0 -0
pipecat/audio/dtmf/types.py +47 -0
pipecat/audio/dtmf/utils.py +70 -0
pipecat/audio/filters/aic_filter.py +199 -0
pipecat/audio/utils.py +9 -7
pipecat/extensions/ivr/__init__.py +0 -0
pipecat/extensions/ivr/ivr_navigator.py +452 -0
pipecat/frames/frames.py +156 -43
pipecat/pipeline/llm_switcher.py +76 -0
pipecat/pipeline/parallel_pipeline.py +3 -3
pipecat/pipeline/service_switcher.py +144 -0
pipecat/pipeline/task.py +68 -28
pipecat/pipeline/task_observer.py +10 -0
pipecat/processors/aggregators/dtmf_aggregator.py +2 -2
pipecat/processors/aggregators/llm_context.py +277 -0
pipecat/processors/aggregators/llm_response.py +48 -15
pipecat/processors/aggregators/llm_response_universal.py +840 -0
pipecat/processors/aggregators/openai_llm_context.py +3 -3
pipecat/processors/dtmf_aggregator.py +0 -2
pipecat/processors/filters/stt_mute_filter.py +0 -2
pipecat/processors/frame_processor.py +18 -11
pipecat/processors/frameworks/rtvi.py +17 -10
pipecat/processors/metrics/sentry.py +2 -0
pipecat/runner/daily.py +137 -36
pipecat/runner/run.py +1 -1
pipecat/runner/utils.py +7 -7
pipecat/serializers/asterisk.py +20 -4
pipecat/serializers/exotel.py +1 -1
pipecat/serializers/plivo.py +1 -1
pipecat/serializers/telnyx.py +1 -1
pipecat/serializers/twilio.py +1 -1
pipecat/services/__init__.py +2 -2
pipecat/services/anthropic/llm.py +113 -28
pipecat/services/asyncai/tts.py +4 -0
pipecat/services/aws/llm.py +82 -8
pipecat/services/aws/tts.py +0 -10
pipecat/services/aws_nova_sonic/aws.py +5 -0
pipecat/services/cartesia/tts.py +28 -16
pipecat/services/cerebras/llm.py +15 -10
pipecat/services/deepgram/stt.py +8 -0
pipecat/services/deepseek/llm.py +13 -8
pipecat/services/fireworks/llm.py +13 -8
pipecat/services/fish/tts.py +8 -6
pipecat/services/gemini_multimodal_live/gemini.py +5 -0
pipecat/services/gladia/config.py +7 -1
pipecat/services/gladia/stt.py +23 -15
pipecat/services/google/llm.py +159 -59
pipecat/services/google/llm_openai.py +18 -3
pipecat/services/grok/llm.py +2 -1
pipecat/services/llm_service.py +38 -3
pipecat/services/mem0/memory.py +2 -1
pipecat/services/mistral/llm.py +5 -6
pipecat/services/nim/llm.py +2 -1
pipecat/services/openai/base_llm.py +88 -26
pipecat/services/openai/image.py +6 -1
pipecat/services/openai_realtime_beta/openai.py +5 -2
pipecat/services/openpipe/llm.py +6 -8
pipecat/services/perplexity/llm.py +13 -8
pipecat/services/playht/tts.py +9 -6
pipecat/services/rime/tts.py +1 -1
pipecat/services/sambanova/llm.py +18 -13
pipecat/services/sarvam/tts.py +415 -10
pipecat/services/speechmatics/stt.py +2 -2
pipecat/services/tavus/video.py +1 -1
pipecat/services/tts_service.py +15 -5
pipecat/services/vistaar/llm.py +2 -5
pipecat/transports/base_input.py +32 -19
pipecat/transports/base_output.py +39 -5
pipecat/transports/daily/__init__.py +0 -0
pipecat/transports/daily/transport.py +2371 -0
pipecat/transports/daily/utils.py +410 -0
pipecat/transports/livekit/__init__.py +0 -0
pipecat/transports/livekit/transport.py +1042 -0
pipecat/transports/network/fastapi_websocket.py +12 -546
pipecat/transports/network/small_webrtc.py +12 -922
pipecat/transports/network/webrtc_connection.py +9 -595
pipecat/transports/network/websocket_client.py +12 -481
pipecat/transports/network/websocket_server.py +12 -487
pipecat/transports/services/daily.py +9 -2334
pipecat/transports/services/helpers/daily_rest.py +12 -396
pipecat/transports/services/livekit.py +12 -975
pipecat/transports/services/tavus.py +12 -757
pipecat/transports/smallwebrtc/__init__.py +0 -0
pipecat/transports/smallwebrtc/connection.py +612 -0
pipecat/transports/smallwebrtc/transport.py +936 -0
pipecat/transports/tavus/__init__.py +0 -0
pipecat/transports/tavus/transport.py +770 -0
pipecat/transports/websocket/__init__.py +0 -0
pipecat/transports/websocket/client.py +494 -0
pipecat/transports/websocket/fastapi.py +559 -0
pipecat/transports/websocket/server.py +500 -0
pipecat/transports/whatsapp/__init__.py +0 -0
pipecat/transports/whatsapp/api.py +345 -0
pipecat/transports/whatsapp/client.py +364 -0
{dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/top_level.txt +0 -0

pipecat/services/anthropic/llm.py CHANGED Viewed

@@ -24,13 +24,17 @@ from loguru import logger
 from PIL import Image
 from pydantic import BaseModel, Field
-from pipecat.adapters.services.anthropic_adapter import AnthropicLLMAdapter
+from pipecat.adapters.services.anthropic_adapter import (
+    AnthropicLLMAdapter,
+    AnthropicLLMInvocationParams,
+)
 from pipecat.frames.frames import (
     ErrorFrame,
     Frame,
     FunctionCallCancelFrame,
     FunctionCallInProgressFrame,
     FunctionCallResultFrame,
+    LLMContextFrame,
     LLMEnablePromptCachingFrame,
     LLMFullResponseEndFrame,
     LLMFullResponseStartFrame,
@@ -41,6 +45,7 @@ from pipecat.frames.frames import (
     VisionImageRawFrame,
 )
 from pipecat.metrics.metrics import LLMTokenUsage
+from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response import (
     LLMAssistantAggregatorParams,
     LLMAssistantContextAggregator,
@@ -110,7 +115,12 @@ class AnthropicLLMService(LLMService):
         """Input parameters for Anthropic model inference.
         Parameters:
-            enable_prompt_caching_beta: Whether to enable beta prompt caching feature.
+            enable_prompt_caching: Whether to enable the prompt caching feature.
+            enable_prompt_caching_beta (deprecated): Whether to enable the beta prompt caching feature.
+                .. deprecated:: 0.0.84
+                    Use the `enable_prompt_caching` parameter instead.
             max_tokens: Maximum tokens to generate. Must be at least 1.
             temperature: Sampling temperature between 0.0 and 1.0.
             top_k: Top-k sampling parameter.
@@ -118,13 +128,26 @@ class AnthropicLLMService(LLMService):
             extra: Additional parameters to pass to the API.
         """
-        enable_prompt_caching_beta: Optional[bool] = False
+        enable_prompt_caching: Optional[bool] = None
+        enable_prompt_caching_beta: Optional[bool] = None
         max_tokens: Optional[int] = Field(default_factory=lambda: 4096, ge=1)
         temperature: Optional[float] = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0)
         top_k: Optional[int] = Field(default_factory=lambda: NOT_GIVEN, ge=0)
         top_p: Optional[float] = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0)
         extra: Optional[Dict[str, Any]] = Field(default_factory=dict)
+        def model_post_init(self, __context):
+            """Post-initialization to handle deprecated parameters."""
+            if self.enable_prompt_caching_beta is not None:
+                import warnings
+                warnings.simplefilter("always")
+                warnings.warn(
+                    "enable_prompt_caching_beta is deprecated. Use enable_prompt_caching instead.",
+                    DeprecationWarning,
+                    stacklevel=2,
+                )
     def __init__(
         self,
         *,
@@ -157,7 +180,15 @@ class AnthropicLLMService(LLMService):
         self._retry_on_timeout = retry_on_timeout
         self._settings = {
             "max_tokens": params.max_tokens,
-            "enable_prompt_caching_beta": params.enable_prompt_caching_beta or False,
+            "enable_prompt_caching": (
+                params.enable_prompt_caching
+                if params.enable_prompt_caching is not None
+                else (
+                    params.enable_prompt_caching_beta
+                    if params.enable_prompt_caching_beta is not None
+                    else False
+                )
+            ),
             "temperature": params.temperature,
             "top_k": params.top_k,
             "top_p": params.top_p,
@@ -197,14 +228,39 @@ class AnthropicLLMService(LLMService):
             response = await api_call(**params)
             return response
-    @property
-    def enable_prompt_caching_beta(self) -> bool:
-        """Check if prompt caching beta feature is enabled.
+    async def run_inference(self, context: LLMContext | OpenAILLMContext) -> Optional[str]:
+        """Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context.
+        Args:
+            context: The LLM context containing conversation history.
         Returns:
-            True if prompt caching is enabled.
+            The LLM's response as a string, or None if no response is generated.
         """
-        return self._enable_prompt_caching_beta
+        messages = []
+        system = NOT_GIVEN
+        if isinstance(context, LLMContext):
+            adapter: AnthropicLLMAdapter = self.get_llm_adapter()
+            params = adapter.get_llm_invocation_params(
+                context, enable_prompt_caching=self._settings["enable_prompt_caching"]
+            )
+            messages = params["messages"]
+            system = params["system"]
+        else:
+            context = AnthropicLLMContext.upgrade_to_anthropic(context)
+            messages = context.messages
+            system = getattr(context, "system", NOT_GIVEN)
+        # LLM completion
+        response = await self._client.messages.create(
+            model=self.model_name,
+            messages=messages,
+            system=system,
+            max_tokens=8192,
+            stream=False,
+        )
+        return response.content[0].text
     def create_context_aggregator(
         self,
@@ -235,8 +291,31 @@ class AnthropicLLMService(LLMService):
         assistant = AnthropicAssistantContextAggregator(context, params=assistant_params)
         return AnthropicContextAggregatorPair(_user=user, _assistant=assistant)
+    def _get_llm_invocation_params(
+        self, context: OpenAILLMContext | LLMContext
+    ) -> AnthropicLLMInvocationParams:
+        # Universal LLMContext
+        if isinstance(context, LLMContext):
+            adapter: AnthropicLLMAdapter = self.get_llm_adapter()
+            params = adapter.get_llm_invocation_params(
+                context, enable_prompt_caching=self._settings["enable_prompt_caching"]
+            )
+            return params
+        # Anthropic-specific context
+        messages = (
+            context.get_messages_with_cache_control_markers()
+            if self._settings["enable_prompt_caching"]
+            else context.messages
+        )
+        return AnthropicLLMInvocationParams(
+            system=context.system,
+            messages=messages,
+            tools=context.tools or [],
+        )
     @traced_llm
-    async def _process_context(self, context: OpenAILLMContext):
+    async def _process_context(self, context: OpenAILLMContext | LLMContext):
         # Usage tracking. We track the usage reported by Anthropic in prompt_tokens and
         # completion_tokens. We also estimate the completion tokens from output text
         # and use that estimate if we are interrupted, because we almost certainly won't
@@ -252,24 +331,22 @@ class AnthropicLLMService(LLMService):
             await self.push_frame(LLMFullResponseStartFrame())
             await self.start_processing_metrics()
+            params_from_context = self._get_llm_invocation_params(context)
+            if isinstance(context, LLMContext):
+                adapter = self.get_llm_adapter()
+                context_type_for_logging = "universal"
+                messages_for_logging = adapter.get_messages_for_logging(context)
+            else:
+                context_type_for_logging = "LLM-specific"
+                messages_for_logging = context.get_messages_for_logging()
             self.logger.debug(
-                f"{self}: Generating chat [{context.system}] | [{context.get_messages_for_logging()}]"
+                f"{self}: Generating chat from {context_type_for_logging} context [{params_from_context['system']}] | {messages_for_logging}"
             )
-            messages = context.messages
-            if self._settings["enable_prompt_caching_beta"]:
-                messages = context.get_messages_with_cache_control_markers()
-            api_call = self._client.messages.create
-            if self._settings["enable_prompt_caching_beta"]:
-                api_call = self._client.beta.prompt_caching.messages.create
             await self.start_ttfb_metrics()
             params = {
-                "tools": context.tools or [],
-                "system": context.system,
-                "messages": messages,
                 "model": self.model_name,
                 "max_tokens": self._settings["max_tokens"],
                 "stream": True,
@@ -278,9 +355,12 @@ class AnthropicLLMService(LLMService):
                 "top_p": self._settings["top_p"],
             }
+            # Messages, system, tools
+            params.update(params_from_context)
             params.update(self._settings["extra"])
-            response = await self._create_message_stream(api_call, params)
+            response = await self._create_message_stream(self._client.messages.create, params)
             await self.stop_ttfb_metrics()
@@ -363,7 +443,10 @@ class AnthropicLLMService(LLMService):
                         prompt_tokens + cache_creation_input_tokens + cache_read_input_tokens
                     )
                     if total_input_tokens >= 1024:
-                        context.turns_above_cache_threshold += 1
+                        if hasattr(
+                            context, "turns_above_cache_threshold"
+                        ):  # LLMContext doesn't have this attribute
+                            context.turns_above_cache_threshold += 1
             await self.run_function_calls(function_calls)
@@ -408,6 +491,8 @@ class AnthropicLLMService(LLMService):
         context = None
         if isinstance(frame, OpenAILLMContextFrame):
             context: "AnthropicLLMContext" = AnthropicLLMContext.upgrade_to_anthropic(frame.context)
+        elif isinstance(frame, LLMContextFrame):
+            context = frame.context
         elif isinstance(frame, LLMMessagesFrame):
             context = AnthropicLLMContext.from_messages(frame.messages)
         elif isinstance(frame, VisionImageRawFrame):
@@ -420,7 +505,7 @@ class AnthropicLLMService(LLMService):
             await self._update_settings(frame.settings)
         elif isinstance(frame, LLMEnablePromptCachingFrame):
             self.logger.debug(f"Setting enable prompt caching to: [{frame.enable}]")
-            self._settings["enable_prompt_caching_beta"] = frame.enable
+            self._settings["enable_prompt_caching"] = frame.enable
         else:
             await self.push_frame(frame, direction)
@@ -889,13 +974,13 @@ class AnthropicLLMContext(OpenAILLMContext):
             messages.insert(0, {"role": "system", "content": self.system})
         return messages
-    def get_messages_for_logging(self) -> str:
+    def get_messages_for_logging(self) -> List[Dict[str, Any]]:
         """Get messages formatted for logging with sensitive data redacted.
         Replaces image data with placeholder text for cleaner logs.
         Returns:
-            JSON string representation of messages for logging.
+            List of messages in a format ready for logging.
         """
         msgs = []
         for message in self.messages:
@@ -906,7 +991,7 @@ class AnthropicLLMContext(OpenAILLMContext):
                         if item["type"] == "image":
                             item["source"]["data"] = "..."
             msgs.append(msg)
-        return json.dumps(msgs)
+        return msgs
 class AnthropicUserContextAggregator(LLMUserContextAggregator):

pipecat/services/asyncai/tts.py CHANGED Viewed

@@ -52,6 +52,10 @@ def language_to_async_language(language: Language) -> Optional[str]:
     """
     BASE_LANGUAGES = {
         Language.EN: "en",
+        Language.FR: "fr",
+        Language.ES: "es",
+        Language.DE: "de",
+        Language.IT: "it",
     }
     result = BASE_LANGUAGES.get(language)

pipecat/services/aws/llm.py CHANGED Viewed

@@ -16,6 +16,7 @@ import base64
 import copy
 import io
 import json
+import os
 import re
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional
@@ -31,6 +32,7 @@ from pipecat.frames.frames import (
     FunctionCallFromLLM,
     FunctionCallInProgressFrame,
     FunctionCallResultFrame,
+    LLMContextFrame,
     LLMFullResponseEndFrame,
     LLMFullResponseStartFrame,
     LLMMessagesFrame,
@@ -40,6 +42,7 @@ from pipecat.frames.frames import (
     VisionImageRawFrame,
 )
 from pipecat.metrics.metrics import LLMTokenUsage
+from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response import (
     LLMAssistantAggregatorParams,
     LLMAssistantContextAggregator,
@@ -553,11 +556,11 @@ class AWSBedrockLLMContext(OpenAILLMContext):
             messages.insert(0, {"role": "system", "content": self.system})
         return messages
-    def get_messages_for_logging(self) -> str:
+    def get_messages_for_logging(self) -> List[Dict[str, Any]]:
         """Get messages formatted for logging with sensitive data redacted.
         Returns:
-            JSON string representation of messages with image data redacted.
+            List of messages in a format ready for logging.
         """
         msgs = []
         for message in self.messages:
@@ -568,7 +571,7 @@ class AWSBedrockLLMContext(OpenAILLMContext):
                         if item.get("image"):
                             item["source"]["bytes"] = "..."
             msgs.append(msg)
-        return json.dumps(msgs)
+        return msgs
 class AWSBedrockUserContextAggregator(LLMUserContextAggregator):
@@ -759,10 +762,10 @@ class AWSBedrockLLMService(LLMService):
         # Store AWS session parameters for creating client in async context
         self._aws_params = {
-            "aws_access_key_id": aws_access_key,
-            "aws_secret_access_key": aws_secret_key,
-            "aws_session_token": aws_session_token,
-            "region_name": aws_region,
+            "aws_access_key_id": aws_access_key or os.getenv("AWS_ACCESS_KEY_ID"),
+            "aws_secret_access_key": aws_secret_key or os.getenv("AWS_SECRET_ACCESS_KEY"),
+            "aws_session_token": aws_session_token or os.getenv("AWS_SESSION_TOKEN"),
+            "region_name": aws_region or os.getenv("AWS_REGION", "us-east-1"),
             "config": client_config,
         }
@@ -789,6 +792,75 @@ class AWSBedrockLLMService(LLMService):
         """
         return True
+    async def run_inference(self, context: LLMContext | OpenAILLMContext) -> Optional[str]:
+        """Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context.
+        Args:
+            context: The LLM context containing conversation history.
+        Returns:
+            The LLM's response as a string, or None if no response is generated.
+        """
+        try:
+            messages = []
+            system = []
+            if isinstance(context, LLMContext):
+                # Future code will be something like this:
+                # adapter = self.get_llm_adapter()
+                # params: AWSBedrockLLMInvocationParams = adapter.get_llm_invocation_params(context)
+                # messages = params["messages"]
+                # system = params["system_instruction"] # [{"text": "system message"}]
+                raise NotImplementedError(
+                    "Universal LLMContext is not yet supported for AWS Bedrock."
+                )
+            else:
+                context = AWSBedrockLLMContext.upgrade_to_bedrock(context)
+                messages = context.messages
+                system = getattr(context, "system", None)  # [{"text": "system message"}]
+            # Determine if we're using Claude or Nova based on model ID
+            model_id = self.model_name
+            # Prepare request parameters
+            request_params = {
+                "modelId": model_id,
+                "messages": messages,
+                "inferenceConfig": {
+                    "maxTokens": 8192,
+                    "temperature": 0.7,
+                    "topP": 0.9,
+                },
+            }
+            if system:
+                request_params["system"] = system
+            async with self._aws_session.client(
+                service_name="bedrock-runtime", **self._aws_params
+            ) as client:
+                # Call Bedrock without streaming
+                response = await client.converse(**request_params)
+                # Extract the response text
+                if (
+                    "output" in response
+                    and "message" in response["output"]
+                    and "content" in response["output"]["message"]
+                ):
+                    content = response["output"]["message"]["content"]
+                    if isinstance(content, list):
+                        for item in content:
+                            if item.get("text"):
+                                return item["text"]
+                    elif isinstance(content, str):
+                        return content
+                return None
+        except Exception as e:
+            logger.error(f"Bedrock summary generation failed: {e}", exc_info=True)
+            return None
     async def _create_converse_stream(self, client, request_params):
         """Create converse stream with optional timeout and retry.
@@ -802,7 +874,7 @@ class AWSBedrockLLMService(LLMService):
         if self._retry_on_timeout:
             try:
                 response = await asyncio.wait_for(
-                    await client.converse_stream(**request_params), timeout=self._retry_timeout_secs
+                    client.converse_stream(**request_params), timeout=self._retry_timeout_secs
                 )
                 return response
             except (ReadTimeoutError, asyncio.TimeoutError) as e:
@@ -1044,6 +1116,8 @@ class AWSBedrockLLMService(LLMService):
         context = None
         if isinstance(frame, OpenAILLMContextFrame):
             context = AWSBedrockLLMContext.upgrade_to_bedrock(frame.context)
+        if isinstance(frame, LLMContextFrame):
+            raise NotImplementedError("Universal LLMContext is not yet supported for AWS Bedrock.")
         elif isinstance(frame, LLMMessagesFrame):
             context = AWSBedrockLLMContext.from_messages(frame.messages)
         elif isinstance(frame, VisionImageRawFrame):

pipecat/services/aws/tts.py CHANGED Viewed

@@ -185,16 +185,6 @@ class AWSPollyTTSService(TTSService):
             "region_name": region or os.getenv("AWS_REGION", "us-east-1"),
         }
-        # Validate that we have the required credentials
-        if (
-            not self._aws_params["aws_access_key_id"]
-            or not self._aws_params["aws_secret_access_key"]
-        ):
-            raise ValueError(
-                "AWS credentials not found. Please provide them either through constructor parameters "
-                "or set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables."
-            )
         self._aws_session = aioboto3.Session()
         self._settings = {
             "engine": params.engine,

pipecat/services/aws_nova_sonic/aws.py CHANGED Viewed

@@ -34,6 +34,7 @@ from pipecat.frames.frames import (
     FunctionCallFromLLM,
     InputAudioRawFrame,
     InterimTranscriptionFrame,
+    LLMContextFrame,
     LLMFullResponseEndFrame,
     LLMFullResponseStartFrame,
     LLMTextFrame,
@@ -322,6 +323,10 @@ class AWSNovaSonicLLMService(LLMService):
         if isinstance(frame, OpenAILLMContextFrame):
             await self._handle_context(frame.context)
+        elif isinstance(frame, LLMContextFrame):
+            raise NotImplementedError(
+                "Universal LLMContext is not yet supported for AWS Nova Sonic."
+            )
         elif isinstance(frame, InputAudioRawFrame):
             await self._handle_input_audio_frame(frame)
         elif isinstance(frame, BotStoppedSpeakingFrame):

pipecat/services/cartesia/tts.py CHANGED Viewed

@@ -10,7 +10,7 @@ import base64
 import json
 import uuid
 import warnings
-from typing import AsyncGenerator, List, Optional, Union
+from typing import AsyncGenerator, List, Literal, Optional, Union
 from loguru import logger
 from pydantic import BaseModel, Field
@@ -102,7 +102,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
         Parameters:
             language: Language to use for synthesis.
-            speed: Voice speed control (string or float).
+            speed: Voice speed control.
             emotion: List of emotion controls.
                 .. deprecated:: 0.0.68
@@ -110,7 +110,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
         """
         language: Optional[Language] = Language.EN
-        speed: Optional[Union[str, float]] = ""
+        speed: Optional[Literal["slow", "normal", "fast"]] = None
         emotion: Optional[List[str]] = []
     def __init__(
@@ -272,11 +272,13 @@ class CartesiaTTSService(AudioContextWordTTSService):
         voice_config["id"] = self._voice_id
         if self._settings["emotion"]:
-            warnings.warn(
-                "The 'emotion' parameter in __experimental_controls is deprecated and will be removed in a future version.",
-                DeprecationWarning,
-                stacklevel=2,
-            )
+            with warnings.catch_warnings():
+                warnings.simplefilter("always")
+                warnings.warn(
+                    "The 'emotion' parameter in __experimental_controls is deprecated and will be removed in a future version.",
+                    DeprecationWarning,
+                    stacklevel=2,
+                )
             voice_config["__experimental_controls"] = {}
             if self._settings["emotion"]:
                 voice_config["__experimental_controls"]["emotion"] = self._settings["emotion"]
@@ -387,7 +389,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
         await self._websocket.send(msg)
         self._context_id = None
-    async def _receive_messages(self):
+    async def _process_messages(self):
         async for message in self._get_websocket():
             msg = json.loads(message)
             if not msg or not self.audio_context_available(msg["context_id"]):
@@ -421,6 +423,14 @@ class CartesiaTTSService(AudioContextWordTTSService):
             else:
                 logger.error(f"{self} error, unknown message type: {msg}")
+    async def _receive_messages(self):
+        while True:
+            await self._process_messages()
+            # Cartesia times out after 5 minutes of innactivity (no keepalive
+            # mechanism is available). So, we try to reconnect.
+            logger.debug(f"{self} Cartesia connection was disconnected (timeout?), reconnecting")
+            await self._connect_websocket()
     @traced_tts
     async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
         """Generate speech from text using Cartesia's streaming API.
@@ -472,7 +482,7 @@ class CartesiaHttpTTSService(TTSService):
         Parameters:
             language: Language to use for synthesis.
-            speed: Voice speed control (string or float).
+            speed: Voice speed control.
             emotion: List of emotion controls.
                 .. deprecated:: 0.0.68
@@ -480,7 +490,7 @@ class CartesiaHttpTTSService(TTSService):
         """
         language: Optional[Language] = Language.EN
-        speed: Optional[Union[str, float]] = ""
+        speed: Optional[Literal["slow", "normal", "fast"]] = None
         emotion: Optional[List[str]] = Field(default_factory=list)
     def __init__(
@@ -600,11 +610,13 @@ class CartesiaHttpTTSService(TTSService):
             voice_config = {"mode": "id", "id": self._voice_id}
             if self._settings["emotion"]:
-                warnings.warn(
-                    "The 'emotion' parameter in voice.__experimental_controls is deprecated and will be removed in a future version.",
-                    DeprecationWarning,
-                    stacklevel=2,
-                )
+                with warnings.catch_warnings():
+                    warnings.simplefilter("always")
+                    warnings.warn(
+                        "The 'emotion' parameter in voice.__experimental_controls is deprecated and will be removed in a future version.",
+                        DeprecationWarning,
+                        stacklevel=2,
+                    )
                 voice_config["__experimental_controls"] = {"emotion": self._settings["emotion"]}
             await self.start_ttfb_metrics()

pipecat/services/cerebras/llm.py CHANGED Viewed

@@ -9,9 +9,8 @@
 from typing import List
 from loguru import logger
-from openai.types.chat import ChatCompletionMessageParam
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams
 from pipecat.services.openai.llm import OpenAILLMService
@@ -27,7 +26,7 @@ class CerebrasLLMService(OpenAILLMService):
         *,
         api_key: str,
         base_url: str = "https://api.cerebras.ai/v1",
-        model: str = "llama-3.3-70b",
+        model: str = "gpt-oss-120b",
         **kwargs,
     ):
         """Initialize the Cerebras LLM service.
@@ -35,7 +34,7 @@ class CerebrasLLMService(OpenAILLMService):
         Args:
             api_key: The API key for accessing Cerebras's API.
             base_url: The base URL for Cerebras API. Defaults to "https://api.cerebras.ai/v1".
-            model: The model identifier to use. Defaults to "llama-3.3-70b".
+            model: The model identifier to use. Defaults to "gpt-oss-120b".
             **kwargs: Additional keyword arguments passed to OpenAILLMService.
         """
         super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
@@ -54,25 +53,31 @@ class CerebrasLLMService(OpenAILLMService):
         logger.debug(f"Creating Cerebras client with api {base_url}")
         return super().create_client(api_key, base_url, **kwargs)
-    def build_chat_completion_params(
-        self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
-    ) -> dict:
+    def build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict:
         """Build parameters for Cerebras chat completion request.
         Cerebras supports a subset of OpenAI parameters, focusing on core
         completion settings without advanced features like frequency/presence penalties.
+        Args:
+            params_from_context: Parameters, derived from the LLM context, to
+                use for the chat completion. Contains messages, tools, and tool
+                choice.
+        Returns:
+            Dictionary of parameters for the chat completion request.
         """
         params = {
             "model": self.model_name,
             "stream": True,
-            "messages": messages,
-            "tools": context.tools,
-            "tool_choice": context.tool_choice,
             "seed": self._settings["seed"],
             "temperature": self._settings["temperature"],
             "top_p": self._settings["top_p"],
             "max_completion_tokens": self._settings["max_completion_tokens"],
         }
+        # Messages, tools, tool_choice
+        params.update(params_from_context)
         params.update(self._settings["extra"])
         return params

pipecat/services/deepgram/stt.py CHANGED Viewed

@@ -276,6 +276,14 @@ class DeepgramSTTService(STTService):
     async def _disconnect(self):
         if self._connection.is_connected:
             self.logger.debug("Disconnecting from Deepgram")
+            # Deepgram swallows asyncio.CancelledError internally which prevents
+            # proper cancellation propagation. This issue was found with
+            # parallel pipelines where `CancelFrame` was not awaited for to
+            # finish in all branches and it was pushed downstream reaching the
+            # end of the pipeline, which caused `cleanup()` to be called while
+            # Deepgram disconnection was still finishing and therefore
+            # preventing the task cancellation that occurs during `cleanup()`.
+            # GH issue: https://github.com/deepgram/deepgram-python-sdk/issues/570
             await self._connection.finish()
     async def start_metrics(self):

dv-pipecat-ai 0.0.82.dev815__py3-none-any.whl → 0.0.82.dev857__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.82.dev815py3-none-any.whl → 0.0.82.dev857py3-none-any.whl