PyPI - dv-pipecat-ai - Versions diffs - 0.0.75.dev887__py3-none-any.whl → 0.0.82.dev19__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.75.dev887py3-none-any.whl → 0.0.82.dev19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (121) hide show

{dv_pipecat_ai-0.0.75.dev887.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/METADATA +8 -3
{dv_pipecat_ai-0.0.75.dev887.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/RECORD +121 -81
pipecat/adapters/base_llm_adapter.py +44 -6
pipecat/adapters/services/anthropic_adapter.py +302 -2
pipecat/adapters/services/aws_nova_sonic_adapter.py +40 -2
pipecat/adapters/services/bedrock_adapter.py +40 -2
pipecat/adapters/services/gemini_adapter.py +276 -6
pipecat/adapters/services/open_ai_adapter.py +88 -7
pipecat/adapters/services/open_ai_realtime_adapter.py +39 -1
pipecat/audio/dtmf/__init__.py +0 -0
pipecat/audio/dtmf/dtmf-0.wav +0 -0
pipecat/audio/dtmf/dtmf-1.wav +0 -0
pipecat/audio/dtmf/dtmf-2.wav +0 -0
pipecat/audio/dtmf/dtmf-3.wav +0 -0
pipecat/audio/dtmf/dtmf-4.wav +0 -0
pipecat/audio/dtmf/dtmf-5.wav +0 -0
pipecat/audio/dtmf/dtmf-6.wav +0 -0
pipecat/audio/dtmf/dtmf-7.wav +0 -0
pipecat/audio/dtmf/dtmf-8.wav +0 -0
pipecat/audio/dtmf/dtmf-9.wav +0 -0
pipecat/audio/dtmf/dtmf-pound.wav +0 -0
pipecat/audio/dtmf/dtmf-star.wav +0 -0
pipecat/audio/dtmf/types.py +47 -0
pipecat/audio/dtmf/utils.py +70 -0
pipecat/audio/filters/aic_filter.py +199 -0
pipecat/audio/utils.py +9 -7
pipecat/extensions/ivr/__init__.py +0 -0
pipecat/extensions/ivr/ivr_navigator.py +452 -0
pipecat/frames/frames.py +156 -43
pipecat/pipeline/llm_switcher.py +76 -0
pipecat/pipeline/parallel_pipeline.py +3 -3
pipecat/pipeline/service_switcher.py +144 -0
pipecat/pipeline/task.py +68 -28
pipecat/pipeline/task_observer.py +10 -0
pipecat/processors/aggregators/dtmf_aggregator.py +2 -2
pipecat/processors/aggregators/llm_context.py +277 -0
pipecat/processors/aggregators/llm_response.py +48 -15
pipecat/processors/aggregators/llm_response_universal.py +840 -0
pipecat/processors/aggregators/openai_llm_context.py +3 -3
pipecat/processors/dtmf_aggregator.py +0 -2
pipecat/processors/filters/stt_mute_filter.py +0 -2
pipecat/processors/frame_processor.py +18 -11
pipecat/processors/frameworks/rtvi.py +17 -10
pipecat/processors/metrics/sentry.py +2 -0
pipecat/runner/daily.py +137 -36
pipecat/runner/run.py +1 -1
pipecat/runner/utils.py +7 -7
pipecat/serializers/asterisk.py +20 -4
pipecat/serializers/exotel.py +1 -1
pipecat/serializers/plivo.py +1 -1
pipecat/serializers/telnyx.py +1 -1
pipecat/serializers/twilio.py +1 -1
pipecat/services/__init__.py +2 -2
pipecat/services/anthropic/llm.py +113 -28
pipecat/services/asyncai/tts.py +4 -0
pipecat/services/aws/llm.py +82 -8
pipecat/services/aws/tts.py +0 -10
pipecat/services/aws_nova_sonic/aws.py +5 -0
pipecat/services/azure/llm.py +77 -1
pipecat/services/cartesia/tts.py +28 -16
pipecat/services/cerebras/llm.py +15 -10
pipecat/services/deepgram/stt.py +8 -0
pipecat/services/deepseek/llm.py +13 -8
pipecat/services/elevenlabs/__init__.py +2 -0
pipecat/services/elevenlabs/stt.py +351 -0
pipecat/services/fireworks/llm.py +13 -8
pipecat/services/fish/tts.py +8 -6
pipecat/services/gemini_multimodal_live/gemini.py +5 -0
pipecat/services/gladia/config.py +7 -1
pipecat/services/gladia/stt.py +23 -15
pipecat/services/google/llm.py +159 -59
pipecat/services/google/llm_openai.py +18 -3
pipecat/services/grok/llm.py +2 -1
pipecat/services/llm_service.py +38 -3
pipecat/services/mem0/memory.py +2 -1
pipecat/services/mistral/llm.py +5 -6
pipecat/services/nim/llm.py +2 -1
pipecat/services/openai/base_llm.py +88 -26
pipecat/services/openai/image.py +6 -1
pipecat/services/openai_realtime_beta/openai.py +5 -2
pipecat/services/openpipe/llm.py +6 -8
pipecat/services/perplexity/llm.py +13 -8
pipecat/services/playht/tts.py +9 -6
pipecat/services/rime/tts.py +1 -1
pipecat/services/sambanova/llm.py +18 -13
pipecat/services/sarvam/tts.py +415 -10
pipecat/services/speechmatics/stt.py +4 -4
pipecat/services/tavus/video.py +1 -1
pipecat/services/tts_service.py +15 -5
pipecat/services/vistaar/llm.py +2 -5
pipecat/transports/base_input.py +32 -19
pipecat/transports/base_output.py +39 -5
pipecat/transports/daily/__init__.py +0 -0
pipecat/transports/daily/transport.py +2371 -0
pipecat/transports/daily/utils.py +410 -0
pipecat/transports/livekit/__init__.py +0 -0
pipecat/transports/livekit/transport.py +1042 -0
pipecat/transports/network/fastapi_websocket.py +12 -546
pipecat/transports/network/small_webrtc.py +12 -922
pipecat/transports/network/webrtc_connection.py +9 -595
pipecat/transports/network/websocket_client.py +12 -481
pipecat/transports/network/websocket_server.py +12 -487
pipecat/transports/services/daily.py +9 -2334
pipecat/transports/services/helpers/daily_rest.py +12 -396
pipecat/transports/services/livekit.py +12 -975
pipecat/transports/services/tavus.py +12 -757
pipecat/transports/smallwebrtc/__init__.py +0 -0
pipecat/transports/smallwebrtc/connection.py +612 -0
pipecat/transports/smallwebrtc/transport.py +936 -0
pipecat/transports/tavus/__init__.py +0 -0
pipecat/transports/tavus/transport.py +770 -0
pipecat/transports/websocket/__init__.py +0 -0
pipecat/transports/websocket/client.py +494 -0
pipecat/transports/websocket/fastapi.py +559 -0
pipecat/transports/websocket/server.py +500 -0
pipecat/transports/whatsapp/__init__.py +0 -0
pipecat/transports/whatsapp/api.py +345 -0
pipecat/transports/whatsapp/client.py +364 -0
{dv_pipecat_ai-0.0.75.dev887.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.75.dev887.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.75.dev887.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/top_level.txt +0 -0

pipecat/services/cartesia/tts.py CHANGED Viewed

@@ -10,7 +10,7 @@ import base64
 import json
 import uuid
 import warnings
-from typing import AsyncGenerator, List, Optional, Union
+from typing import AsyncGenerator, List, Literal, Optional, Union
 from loguru import logger
 from pydantic import BaseModel, Field
@@ -102,7 +102,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
         Parameters:
             language: Language to use for synthesis.
-            speed: Voice speed control (string or float).
+            speed: Voice speed control.
             emotion: List of emotion controls.
                 .. deprecated:: 0.0.68
@@ -110,7 +110,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
         """
         language: Optional[Language] = Language.EN
-        speed: Optional[Union[str, float]] = ""
+        speed: Optional[Literal["slow", "normal", "fast"]] = None
         emotion: Optional[List[str]] = []
     def __init__(
@@ -272,11 +272,13 @@ class CartesiaTTSService(AudioContextWordTTSService):
         voice_config["id"] = self._voice_id
         if self._settings["emotion"]:
-            warnings.warn(
-                "The 'emotion' parameter in __experimental_controls is deprecated and will be removed in a future version.",
-                DeprecationWarning,
-                stacklevel=2,
-            )
+            with warnings.catch_warnings():
+                warnings.simplefilter("always")
+                warnings.warn(
+                    "The 'emotion' parameter in __experimental_controls is deprecated and will be removed in a future version.",
+                    DeprecationWarning,
+                    stacklevel=2,
+                )
             voice_config["__experimental_controls"] = {}
             if self._settings["emotion"]:
                 voice_config["__experimental_controls"]["emotion"] = self._settings["emotion"]
@@ -387,7 +389,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
         await self._websocket.send(msg)
         self._context_id = None
-    async def _receive_messages(self):
+    async def _process_messages(self):
         async for message in self._get_websocket():
             msg = json.loads(message)
             if not msg or not self.audio_context_available(msg["context_id"]):
@@ -421,6 +423,14 @@ class CartesiaTTSService(AudioContextWordTTSService):
             else:
                 logger.error(f"{self} error, unknown message type: {msg}")
+    async def _receive_messages(self):
+        while True:
+            await self._process_messages()
+            # Cartesia times out after 5 minutes of innactivity (no keepalive
+            # mechanism is available). So, we try to reconnect.
+            logger.debug(f"{self} Cartesia connection was disconnected (timeout?), reconnecting")
+            await self._connect_websocket()
     @traced_tts
     async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
         """Generate speech from text using Cartesia's streaming API.
@@ -472,7 +482,7 @@ class CartesiaHttpTTSService(TTSService):
         Parameters:
             language: Language to use for synthesis.
-            speed: Voice speed control (string or float).
+            speed: Voice speed control.
             emotion: List of emotion controls.
                 .. deprecated:: 0.0.68
@@ -480,7 +490,7 @@ class CartesiaHttpTTSService(TTSService):
         """
         language: Optional[Language] = Language.EN
-        speed: Optional[Union[str, float]] = ""
+        speed: Optional[Literal["slow", "normal", "fast"]] = None
         emotion: Optional[List[str]] = Field(default_factory=list)
     def __init__(
@@ -600,11 +610,13 @@ class CartesiaHttpTTSService(TTSService):
             voice_config = {"mode": "id", "id": self._voice_id}
             if self._settings["emotion"]:
-                warnings.warn(
-                    "The 'emotion' parameter in voice.__experimental_controls is deprecated and will be removed in a future version.",
-                    DeprecationWarning,
-                    stacklevel=2,
-                )
+                with warnings.catch_warnings():
+                    warnings.simplefilter("always")
+                    warnings.warn(
+                        "The 'emotion' parameter in voice.__experimental_controls is deprecated and will be removed in a future version.",
+                        DeprecationWarning,
+                        stacklevel=2,
+                    )
                 voice_config["__experimental_controls"] = {"emotion": self._settings["emotion"]}
             await self.start_ttfb_metrics()

pipecat/services/cerebras/llm.py CHANGED Viewed

@@ -9,9 +9,8 @@
 from typing import List
 from loguru import logger
-from openai.types.chat import ChatCompletionMessageParam
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams
 from pipecat.services.openai.llm import OpenAILLMService
@@ -27,7 +26,7 @@ class CerebrasLLMService(OpenAILLMService):
         *,
         api_key: str,
         base_url: str = "https://api.cerebras.ai/v1",
-        model: str = "llama-3.3-70b",
+        model: str = "gpt-oss-120b",
         **kwargs,
     ):
         """Initialize the Cerebras LLM service.
@@ -35,7 +34,7 @@ class CerebrasLLMService(OpenAILLMService):
         Args:
             api_key: The API key for accessing Cerebras's API.
             base_url: The base URL for Cerebras API. Defaults to "https://api.cerebras.ai/v1".
-            model: The model identifier to use. Defaults to "llama-3.3-70b".
+            model: The model identifier to use. Defaults to "gpt-oss-120b".
             **kwargs: Additional keyword arguments passed to OpenAILLMService.
         """
         super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
@@ -54,25 +53,31 @@ class CerebrasLLMService(OpenAILLMService):
         logger.debug(f"Creating Cerebras client with api {base_url}")
         return super().create_client(api_key, base_url, **kwargs)
-    def build_chat_completion_params(
-        self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
-    ) -> dict:
+    def build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict:
         """Build parameters for Cerebras chat completion request.
         Cerebras supports a subset of OpenAI parameters, focusing on core
         completion settings without advanced features like frequency/presence penalties.
+        Args:
+            params_from_context: Parameters, derived from the LLM context, to
+                use for the chat completion. Contains messages, tools, and tool
+                choice.
+        Returns:
+            Dictionary of parameters for the chat completion request.
         """
         params = {
             "model": self.model_name,
             "stream": True,
-            "messages": messages,
-            "tools": context.tools,
-            "tool_choice": context.tool_choice,
             "seed": self._settings["seed"],
             "temperature": self._settings["temperature"],
             "top_p": self._settings["top_p"],
             "max_completion_tokens": self._settings["max_completion_tokens"],
         }
+        # Messages, tools, tool_choice
+        params.update(params_from_context)
         params.update(self._settings["extra"])
         return params

pipecat/services/deepgram/stt.py CHANGED Viewed

@@ -276,6 +276,14 @@ class DeepgramSTTService(STTService):
     async def _disconnect(self):
         if self._connection.is_connected:
             self.logger.debug("Disconnecting from Deepgram")
+            # Deepgram swallows asyncio.CancelledError internally which prevents
+            # proper cancellation propagation. This issue was found with
+            # parallel pipelines where `CancelFrame` was not awaited for to
+            # finish in all branches and it was pushed downstream reaching the
+            # end of the pipeline, which caused `cleanup()` to be called while
+            # Deepgram disconnection was still finishing and therefore
+            # preventing the task cancellation that occurs during `cleanup()`.
+            # GH issue: https://github.com/deepgram/deepgram-python-sdk/issues/570
             await self._connection.finish()
     async def start_metrics(self):

pipecat/services/deepseek/llm.py CHANGED Viewed

@@ -9,9 +9,8 @@
 from typing import List
 from loguru import logger
-from openai.types.chat import ChatCompletionMessageParam
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams
 from pipecat.services.openai.llm import OpenAILLMService
@@ -54,19 +53,22 @@ class DeepSeekLLMService(OpenAILLMService):
         logger.debug(f"Creating DeepSeek client with api {base_url}")
         return super().create_client(api_key, base_url, **kwargs)
-    def _build_chat_completion_params(
-        self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
-    ) -> dict:
+    def _build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict:
         """Build parameters for DeepSeek chat completion request.
         DeepSeek doesn't support some OpenAI parameters like seed and max_completion_tokens.
+        Args:
+            params_from_context: Parameters, derived from the LLM context, to
+                use for the chat completion. Contains messages, tools, and tool
+                choice.
+        Returns:
+            Dictionary of parameters for the chat completion request.
         """
         params = {
             "model": self.model_name,
             "stream": True,
-            "messages": messages,
-            "tools": context.tools,
-            "tool_choice": context.tool_choice,
             "stream_options": {"include_usage": True},
             "frequency_penalty": self._settings["frequency_penalty"],
             "presence_penalty": self._settings["presence_penalty"],
@@ -75,5 +77,8 @@ class DeepSeekLLMService(OpenAILLMService):
             "max_tokens": self._settings["max_tokens"],
         }
+        # Messages, tools, tool_choice
+        params.update(params_from_context)
         params.update(self._settings["extra"])
         return params

pipecat/services/elevenlabs/__init__.py CHANGED Viewed

@@ -9,5 +9,7 @@ import sys
 from pipecat.services import DeprecatedModuleProxy
 from .tts import *
+from .stt import *
+# Old
 sys.modules[__name__] = DeprecatedModuleProxy(globals(), "elevenlabs", "elevenlabs.tts")

pipecat/services/elevenlabs/stt.py ADDED Viewed

@@ -0,0 +1,351 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+"""ElevenLabs speech-to-text service implementation."""
+import asyncio
+from typing import AsyncGenerator, Optional
+from loguru import logger
+from pipecat.frames.frames import ErrorFrame, Frame, TranscriptionFrame
+from pipecat.services.stt_service import SegmentedSTTService
+from pipecat.transcriptions.language import Language
+from pipecat.utils.time import time_now_iso8601
+from pipecat.utils.tracing.service_decorators import traced_stt
+try:
+    from elevenlabs.client import ElevenLabs
+except ModuleNotFoundError as e:
+    logger.error(f"Exception: {e}")
+    logger.error("In order to use ElevenLabs, you need to `pip install pipecat-ai[elevenlabs]`.")
+    raise Exception(f"Missing module: {e}")
+def language_to_elevenlabs_language(language: Language) -> Optional[str]:
+    """Maps pipecat Language enum to ElevenLabs language codes.
+    Args:
+        language: A Language enum value representing the input language.
+    Returns:
+        str or None: The corresponding ElevenLabs language code, or None if not supported.
+    """
+    language_map = {
+        # English
+        Language.EN: "eng",
+        Language.EN_US: "eng",
+        Language.EN_GB: "eng",
+        Language.EN_AU: "eng",
+        Language.EN_CA: "eng",
+        Language.EN_IN: "eng",
+        Language.EN_IE: "eng",
+        Language.EN_NZ: "eng",
+        Language.EN_ZA: "eng",
+        Language.EN_SG: "eng",
+        Language.EN_HK: "eng",
+        Language.EN_PH: "eng",
+        Language.EN_KE: "eng",
+        Language.EN_NG: "eng",
+        Language.EN_TZ: "eng",
+        # Spanish
+        Language.ES: "spa",
+        Language.ES_ES: "spa",
+        Language.ES_MX: "spa",
+        Language.ES_AR: "spa",
+        Language.ES_CO: "spa",
+        Language.ES_CL: "spa",
+        Language.ES_VE: "spa",
+        Language.ES_PE: "spa",
+        Language.ES_EC: "spa",
+        Language.ES_GT: "spa",
+        Language.ES_CU: "spa",
+        Language.ES_BO: "spa",
+        Language.ES_DO: "spa",
+        Language.ES_HN: "spa",
+        Language.ES_PY: "spa",
+        Language.ES_SV: "spa",
+        Language.ES_NI: "spa",
+        Language.ES_CR: "spa",
+        Language.ES_PA: "spa",
+        Language.ES_UY: "spa",
+        Language.ES_PR: "spa",
+        Language.ES_US: "spa",
+        Language.ES_GQ: "spa",
+        # French
+        Language.FR: "fra",
+        Language.FR_FR: "fra",
+        Language.FR_CA: "fra",
+        Language.FR_BE: "fra",
+        Language.FR_CH: "fra",
+        # German
+        Language.DE: "deu",
+        Language.DE_DE: "deu",
+        Language.DE_AT: "deu",
+        Language.DE_CH: "deu",
+        # Italian
+        Language.IT: "ita",
+        Language.IT_IT: "ita",
+        # Portuguese
+        Language.PT: "por",
+        Language.PT_PT: "por",
+        Language.PT_BR: "por",
+        # Hindi
+        Language.HI: "hin",
+        Language.HI_IN: "hin",
+        # Arabic
+        Language.AR: "ara",
+        Language.AR_SA: "ara",
+        Language.AR_EG: "ara",
+        Language.AR_AE: "ara",
+        Language.AR_BH: "ara",
+        Language.AR_DZ: "ara",
+        Language.AR_IQ: "ara",
+        Language.AR_JO: "ara",
+        Language.AR_KW: "ara",
+        Language.AR_LB: "ara",
+        Language.AR_LY: "ara",
+        Language.AR_MA: "ara",
+        Language.AR_OM: "ara",
+        Language.AR_QA: "ara",
+        Language.AR_SY: "ara",
+        Language.AR_TN: "ara",
+        Language.AR_YE: "ara",
+        # Japanese
+        Language.JA: "jpn",
+        Language.JA_JP: "jpn",
+        # Korean
+        Language.KO: "kor",
+        Language.KO_KR: "kor",
+        # Chinese
+        Language.ZH: "cmn",
+        Language.ZH_CN: "cmn",
+        Language.ZH_TW: "cmn",
+        Language.ZH_HK: "cmn",
+        # Russian
+        Language.RU: "rus",
+        Language.RU_RU: "rus",
+        # Dutch
+        Language.NL: "nld",
+        Language.NL_NL: "nld",
+        Language.NL_BE: "nld",
+        # Polish
+        Language.PL: "pol",
+        Language.PL_PL: "pol",
+        # Turkish
+        Language.TR: "tur",
+        Language.TR_TR: "tur",
+        # Swedish
+        Language.SV: "swe",
+        Language.SV_SE: "swe",
+        # Norwegian
+        Language.NO: "nor",
+        Language.NB: "nor",
+        Language.NN: "nor",
+        # Danish
+        Language.DA: "dan",
+        Language.DA_DK: "dan",
+        # Finnish
+        Language.FI: "fin",
+        Language.FI_FI: "fin",
+        # Czech
+        Language.CS: "ces",
+        Language.CS_CZ: "ces",
+        # Hungarian
+        Language.HU: "hun",
+        Language.HU_HU: "hun",
+        # Greek
+        Language.EL: "ell",
+        Language.EL_GR: "ell",
+        # Hebrew
+        Language.HE: "heb",
+        Language.HE_IL: "heb",
+        # Thai
+        Language.TH: "tha",
+        Language.TH_TH: "tha",
+        # Vietnamese
+        Language.VI: "vie",
+        Language.VI_VN: "vie",
+        # Indonesian
+        Language.ID: "ind",
+        Language.ID_ID: "ind",
+        # Malay
+        Language.MS: "msa",
+        Language.MS_MY: "msa",
+        # Ukrainian
+        Language.UK: "ukr",
+        Language.UK_UA: "ukr",
+        # Bulgarian
+        Language.BG: "bul",
+        Language.BG_BG: "bul",
+        # Croatian
+        Language.HR: "hrv",
+        Language.HR_HR: "hrv",
+        # Slovak
+        Language.SK: "slk",
+        Language.SK_SK: "slk",
+        # Slovenian
+        Language.SL: "slv",
+        Language.SL_SI: "slv",
+        # Estonian
+        Language.ET: "est",
+        Language.ET_EE: "est",
+        # Latvian
+        Language.LV: "lav",
+        Language.LV_LV: "lav",
+        # Lithuanian
+        Language.LT: "lit",
+        Language.LT_LT: "lit",
+    }
+    return language_map.get(language)
+class ElevenlabsSTTService(SegmentedSTTService):
+    """ElevenLabs speech-to-text service using Scribe v1 model.
+    This service uses ElevenLabs' batch STT API to transcribe audio segments.
+    It extends SegmentedSTTService to handle VAD-based audio segmentation.
+    Args:
+        api_key: ElevenLabs API key for authentication.
+        model_id: Model to use for transcription (default: "scribe_v1").
+        language: Default language for transcription.
+        tag_audio_events: Whether to tag audio events like laughter (default: False).
+        diarize: Whether to enable speaker diarization (default: False).
+        **kwargs: Additional arguments passed to SegmentedSTTService.
+    """
+    def __init__(
+        self,
+        *,
+        api_key: str,
+        model_id: str = "scribe_v1",
+        language: Language = Language.EN,
+        tag_audio_events: bool = False,
+        sample_rate: Optional[int] = None,
+        diarize: bool = False,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self._client = ElevenLabs(api_key=api_key)
+        self._model_id = model_id
+        self._tag_audio_events = tag_audio_events
+        self._diarize = diarize
+        self._settings = {
+            "language": language,
+            "model_id": self._model_id,
+            "tag_audio_events": self._tag_audio_events,
+            "diarize": self._diarize,
+        }
+        self.set_model_name(model_id)
+    def can_generate_metrics(self) -> bool:
+        """Check if this service can generate processing metrics.
+        Returns:
+            True, as ElevenLabs service supports metrics generation.
+        """
+        return True
+    def language_to_service_language(self, language: Language) -> Optional[str]:
+        """Convert from pipecat Language to ElevenLabs language code.
+        Args:
+            language: The Language enum value to convert.
+        Returns:
+            str or None: The corresponding ElevenLabs language code, or None if not supported.
+        """
+        return language_to_elevenlabs_language(language)
+    async def set_language(self, language: Language):
+        """Set the language for transcription.
+        Args:
+            language: The Language enum value to use for transcription.
+        """
+        self.logger.info(f"Switching STT language to: [{language}]")
+        self._settings["language"] = language
+    @traced_stt
+    async def _handle_transcription(
+        self, transcript: str, is_final: bool, language: Optional[Language] = None
+    ):
+        """Handle a transcription result with tracing."""
+        pass
+    async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
+        """Transcribe the provided audio using ElevenLabs STT.
+        Args:
+            audio: Audio data (WAV format) to transcribe.
+        Yields:
+            Frame: TranscriptionFrame containing the transcribed text or ErrorFrame on failure.
+        """
+        try:
+            await self.start_processing_metrics()
+            await self.start_ttfb_metrics()
+            # Get language code for ElevenLabs API
+            language = self._settings["language"]
+            elevenlabs_lang = self.language_to_service_language(language)
+            # Prepare API parameters
+            params = {
+                "file": audio,
+                "model_id": self._model_id,
+                "tag_audio_events": self._tag_audio_events,
+                "diarize": self._diarize,
+            }
+            # Add language if specified
+            if elevenlabs_lang:
+                params["language_code"] = elevenlabs_lang
+            # Call ElevenLabs STT API in thread pool to avoid blocking
+            transcription = await asyncio.to_thread(self._client.speech_to_text.convert, **params)
+            await self.stop_ttfb_metrics()
+            # Process transcription result
+            if transcription and hasattr(transcription, "text") and transcription.text:
+                transcript_text = transcription.text.strip()
+                if transcript_text:
+                    # Determine language if available from response
+                    response_language = language
+                    if hasattr(transcription, "language_code") and transcription.language_code:
+                        # Try to map back from ElevenLabs language code to pipecat Language
+                        try:
+                            # This is a simplified mapping - you might want to create a reverse map
+                            response_language = language  # For now, keep the original
+                        except ValueError:
+                            self.logger.warning(
+                                f"Unknown language detected: {transcription.language_code}"
+                            )
+                    # Handle transcription with tracing
+                    await self._handle_transcription(transcript_text, True, response_language)
+                    self.logger.debug(f"ElevenLabs transcription: [{transcript_text}]")
+                    yield TranscriptionFrame(
+                        text=transcript_text,
+                        user_id="",
+                        timestamp=time_now_iso8601(),
+                        language=response_language,
+                        result=transcription,
+                    )
+            await self.stop_processing_metrics()
+        except Exception as e:
+            self.logger.error(f"ElevenLabs STT error: {e}")
+            await self.stop_all_metrics()
+            yield ErrorFrame(f"ElevenLabs STT error: {str(e)}")

pipecat/services/fireworks/llm.py CHANGED Viewed

@@ -9,9 +9,8 @@
 from typing import List
 from loguru import logger
-from openai.types.chat import ChatCompletionMessageParam
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams
 from pipecat.services.openai.llm import OpenAILLMService
@@ -54,20 +53,23 @@ class FireworksLLMService(OpenAILLMService):
         logger.debug(f"Creating Fireworks client with api {base_url}")
         return super().create_client(api_key, base_url, **kwargs)
-    def build_chat_completion_params(
-        self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
-    ) -> dict:
+    def build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict:
         """Build parameters for Fireworks chat completion request.
         Fireworks doesn't support some OpenAI parameters like seed, max_completion_tokens,
         and stream_options.
+        Args:
+            params_from_context: Parameters, derived from the LLM context, to
+                use for the chat completion. Contains messages, tools, and tool
+                choice.
+        Returns:
+            Dictionary of parameters for the chat completion request.
         """
         params = {
             "model": self.model_name,
             "stream": True,
-            "messages": messages,
-            "tools": context.tools,
-            "tool_choice": context.tool_choice,
             "frequency_penalty": self._settings["frequency_penalty"],
             "presence_penalty": self._settings["presence_penalty"],
             "temperature": self._settings["temperature"],
@@ -75,5 +77,8 @@ class FireworksLLMService(OpenAILLMService):
             "max_tokens": self._settings["max_tokens"],
         }
+        # Messages, tools, tool_choice
+        params.update(params_from_context)
         params.update(self._settings["extra"])
         return params

pipecat/services/fish/tts.py CHANGED Viewed

@@ -120,12 +120,14 @@ class FishAudioTTSService(InterruptibleTTSService):
         if model:
             import warnings
-            warnings.warn(
-                "Parameter 'model' is deprecated and will be removed in a future version. "
-                "Use 'reference_id' instead.",
-                DeprecationWarning,
-                stacklevel=2,
-            )
+            with warnings.catch_warnings():
+                warnings.simplefilter("always")
+                warnings.warn(
+                    "Parameter 'model' is deprecated and will be removed in a future version. "
+                    "Use 'reference_id' instead.",
+                    DeprecationWarning,
+                    stacklevel=2,
+                )
             reference_id = model
         self._api_key = api_key

pipecat/services/gemini_multimodal_live/gemini.py CHANGED Viewed

@@ -33,6 +33,7 @@ from pipecat.frames.frames import (
     InputAudioRawFrame,
     InputImageRawFrame,
     InputTextRawFrame,
+    LLMContextFrame,
     LLMFullResponseEndFrame,
     LLMFullResponseStartFrame,
     LLMMessagesAppendFrame,
@@ -738,6 +739,10 @@ class GeminiMultimodalLiveLLMService(LLMService):
                 # Support just one tool call per context frame for now
                 tool_result_message = context.messages[-1]
                 await self._tool_result(tool_result_message)
+        elif isinstance(frame, LLMContextFrame):
+            raise NotImplementedError(
+                "Universal LLMContext is not yet supported for Gemini Multimodal Live."
+            )
         elif isinstance(frame, InputTextRawFrame):
             await self._send_user_text(frame.text)
             await self.push_frame(frame, direction)

dv-pipecat-ai 0.0.75.dev887__py3-none-any.whl → 0.0.82.dev19__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.75.dev887py3-none-any.whl → 0.0.82.dev19py3-none-any.whl