PyPI - dv-pipecat-ai - Versions diffs - 0.0.85.dev844__py3-none-any.whl → 0.0.85.dev848__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.85.dev844py3-none-any.whl → 0.0.85.dev848py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (10) hide show

{dv_pipecat_ai-0.0.85.dev844.dist-info → dv_pipecat_ai-0.0.85.dev848.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dv-pipecat-ai
-Version: 0.0.85.dev844
+Version: 0.0.85.dev848
 Summary: An open source framework for voice (and multimodal) assistants
 License-Expression: BSD-2-Clause
 Project-URL: Source, https://github.com/pipecat-ai/pipecat

{dv_pipecat_ai-0.0.85.dev844.dist-info → dv_pipecat_ai-0.0.85.dev848.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-dv_pipecat_ai-0.0.85.dev844.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
+dv_pipecat_ai-0.0.85.dev848.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
 pipecat/__init__.py,sha256=j0Xm6adxHhd7D06dIyyPV_GlBYLlBnTAERVvD_jAARQ,861
 pipecat/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pipecat/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -79,7 +79,7 @@ pipecat/extensions/voicemail/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
 pipecat/extensions/voicemail/voicemail_detector.py,sha256=JxmU2752iWP_1_GmzZReNESUTFAeyEa4XBPL20_C208,30004
 pipecat/frames/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pipecat/frames/frames.proto,sha256=JXZm3VXLR8zMOUcOuhVoe2mhM3MQIQGMJXLopdJO_5Y,839
-pipecat/frames/frames.py,sha256=vuYtmyK1QSU2AWx2c_pFQhcmpXqSTnfqAXF6DXKzTG8,49605
+pipecat/frames/frames.py,sha256=248d54lNOyO04dq9ni51yUTWUItmGw8b9QKarrDGNeo,50354
 pipecat/frames/protobufs/frames_pb2.py,sha256=VHgGV_W7qQ4sfQK6RHb5_DggLm3PiSYMr6aBZ8_p1cQ,2590
 pipecat/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pipecat/metrics/metrics.py,sha256=bdZNciEtLTtA-xgoKDz2RJAy6fKrXkTwz3pryVHzc2M,2713
@@ -217,8 +217,8 @@ pipecat/services/deepgram/flux/stt.py,sha256=yCZodrHAOShgYy_GbdviX8iAuh36dBgDL41
 pipecat/services/deepseek/__init__.py,sha256=bU5z_oNGzgrF_YpsD9pYIMtEibeZFaUobbRjJ9WcYyE,259
 pipecat/services/deepseek/llm.py,sha256=5KjpU2blmhUTM3LcRE1ymdsk6OmoFkIzeQgyNOGwQh8,3112
 pipecat/services/elevenlabs/__init__.py,sha256=cMx5v0HEMh4WetMm5byR9tIjG6_wNVs9UxqWyB3tjlM,313
-pipecat/services/elevenlabs/stt.py,sha256=F3xD82eOIy5OyyE-5StdoFFvKjIXlos2yyP0cyNQj6Y,12214
-pipecat/services/elevenlabs/tts.py,sha256=Okctydqoz2HG9B69l-bDunNHcqoul-kUxhEZjYt539U,45188
+pipecat/services/elevenlabs/stt.py,sha256=_RhBKpUYEGKMpcO7y4RLxmEOMK11LZFdZqDFIA-DZXk,27303
+pipecat/services/elevenlabs/tts.py,sha256=skUndgUatx2F5rjg2tBZLutB8k9B9Cjy-cUeglCDdwc,45314
 pipecat/services/fal/__init__.py,sha256=z_kfZETvUcKy68Lyvni4B-RtdkOvz3J3eh6sFDVKq6M,278
 pipecat/services/fal/image.py,sha256=vArKLKrIGoZfw_xeZY_E7zbUzfzVsScj-R7mOmVqjRQ,4585
 pipecat/services/fal/stt.py,sha256=-5tw7N8srBJTS0Q65SN4csmLkIB6cLHR9pXKimxg55o,9678
@@ -280,7 +280,7 @@ pipecat/services/nim/llm.py,sha256=o4WPGI6kOmSiMV7WwOZ0cNEAoq9hW4Aqs2R8X7c9i94,4
 pipecat/services/ollama/__init__.py,sha256=aw-25zYsR8LR74OFFlMKMTnJjaKwOzdPWVsClueNRkI,255
 pipecat/services/ollama/llm.py,sha256=rfpG92LRHGJlpENKhF6ld8CLVS9DxlKW-WRVNldOIGs,1605
 pipecat/services/openai/__init__.py,sha256=V0ZVa8PzEm3hmcStYICbAsYwfgk4ytZ6kiQoq9UZPmI,354
-pipecat/services/openai/base_llm.py,sha256=J4Ltg1KOXciiUIMBFLn0SmDTZereEE-1LKrPfBsLzFw,19127
+pipecat/services/openai/base_llm.py,sha256=jOiWacimREywCMZZwAwH8RAHCbwnnXvbqAjWQUYA0yM,20727
 pipecat/services/openai/image.py,sha256=3e3h-dVQ6DQuQE7fp8akXwRMd-oYOdGuZg7RCOjHu9A,2994
 pipecat/services/openai/llm.py,sha256=_aKtz1VebSFUUenT3tH6mBW9pSCm65_u45cDu_dkTzs,7396
 pipecat/services/openai/stt.py,sha256=Idf0k73kxFyDgNRBt62MFpoKKNsBV9bwvJteJ6MGWzQ,2419
@@ -329,7 +329,7 @@ pipecat/services/sarvam/tts.py,sha256=lrwfdC53kZ7f2QPgNRxzryISNkrJCvNtlZ-19-iXg9
 pipecat/services/simli/__init__.py,sha256=cbDcqOaGsEgKbGYKpJ1Vv7LN4ZjOWA04sE84WW5vgQI,257
 pipecat/services/simli/video.py,sha256=Zu2XLvl2Y6VHaWzT9wEdzW9d0EYoZyzYLxjQFyV8vho,8320
 pipecat/services/soniox/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-pipecat/services/soniox/stt.py,sha256=Ndml6QvPQ1WZBvdGT3LSg-LLWwrZ8KlqW8wBBFsQrrM,16509
+pipecat/services/soniox/stt.py,sha256=zRp5qWU051hEAikt0vB0rbHrkQkH5sT-IOe-o5vCurQ,16650
 pipecat/services/speechmatics/__init__.py,sha256=Jgq1fqrZVkpWC21D79L1cn5Ub8PnYgnnCaqC5pOlbIc,89
 pipecat/services/speechmatics/stt.py,sha256=GLGJzlMSeZ1WzTOMjhKXDl5JYkqGhnFTbP3o0ez0hSw,44696
 pipecat/services/tavus/__init__.py,sha256=SNyyi2Xq6tXIihDG2Bwvmg6Srbd-uWd1RwG-NKWcPuI,257
@@ -416,7 +416,7 @@ pipecat/utils/tracing/service_decorators.py,sha256=fwzxFpi8DJl6BJbK74G0UEB4ccMJg
 pipecat/utils/tracing/setup.py,sha256=7TEgPNpq6M8lww8OQvf0P9FzYc5A30xICGklVA-fua0,2892
 pipecat/utils/tracing/turn_context_provider.py,sha256=ikon3plFOx0XbMrH6DdeHttNpb-U0gzMZIm3bWLc9eI,2485
 pipecat/utils/tracing/turn_trace_observer.py,sha256=dma16SBJpYSOE58YDWy89QzHyQFc_9gQZszKeWixuwc,9725
-dv_pipecat_ai-0.0.85.dev844.dist-info/METADATA,sha256=JgW9PLS_gplsOlHfyohgocRxrsiivvsAEySMY214f4U,32955
-dv_pipecat_ai-0.0.85.dev844.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dv_pipecat_ai-0.0.85.dev844.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
-dv_pipecat_ai-0.0.85.dev844.dist-info/RECORD,,
+dv_pipecat_ai-0.0.85.dev848.dist-info/METADATA,sha256=T2IPoO2Nmt09lIxM0rKmJRa5ZIBQ-9fcbswOy90lkJg,32955
+dv_pipecat_ai-0.0.85.dev848.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dv_pipecat_ai-0.0.85.dev848.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
+dv_pipecat_ai-0.0.85.dev848.dist-info/RECORD,,

pipecat/frames/frames.py CHANGED Viewed

@@ -586,6 +586,27 @@ class LLMRunFrame(DataFrame):
     pass
+@dataclass
+class WarmupLLMFrame(DataFrame):
+    """Frame to trigger prompt caching/warmup in supported LLM providers.
+    This frame instructs the LLM service to cache the provided messages
+    without generating a visible response. Primarily used for warming up provider
+    caches (e.g., Claude's prompt caching, OpenAI's prompt caching) to improve
+    latency for subsequent requests.
+    The LLM service should:
+    1. Send the messages to the provider to trigger caching
+    2. Generate a minimal response (e.g., single word)
+    3. Discard the response without emitting LLM output frames
+    Parameters:
+        messages: List of messages to send for cache warming (should match conversation structure).
+    """
+    messages: List[dict]
 @dataclass
 class LLMMessagesAppendFrame(DataFrame):
     """Frame containing LLM messages to append to current context.

pipecat/services/elevenlabs/stt.py CHANGED Viewed

@@ -4,26 +4,43 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
-"""ElevenLabs speech-to-text service implementation.
-This module provides integration with ElevenLabs' Speech-to-Text API for transcription
-using segmented audio processing. The service uploads audio files and receives
-transcription results directly.
-"""
+"""ElevenLabs speech-to-text service implementations."""
+import asyncio
+import base64
 import io
-from typing import AsyncGenerator, Optional
+import json
+import urllib.parse
+from typing import Any, AsyncGenerator, Dict, Literal, Optional
 import aiohttp
 from loguru import logger
 from pydantic import BaseModel
-from pipecat.frames.frames import ErrorFrame, Frame, TranscriptionFrame
-from pipecat.services.stt_service import SegmentedSTTService
+from pipecat.frames.frames import (
+    CancelFrame,
+    EndFrame,
+    ErrorFrame,
+    Frame,
+    InterimTranscriptionFrame,
+    StartFrame,
+    TranscriptionFrame,
+    UserStartedSpeakingFrame,
+    UserStoppedSpeakingFrame,
+)
+from pipecat.processors.frame_processor import FrameDirection
+from pipecat.services.stt_service import SegmentedSTTService, WebsocketSTTService
 from pipecat.transcriptions.language import Language
 from pipecat.utils.time import time_now_iso8601
 from pipecat.utils.tracing.service_decorators import traced_stt
+try:
+    from websockets.asyncio.client import connect as websocket_connect
+    from websockets.protocol import State
+except ModuleNotFoundError:
+    websocket_connect = None  # type: ignore[assignment]
+    State = None  # type: ignore[assignment]
 def language_to_elevenlabs_language(language: Language) -> Optional[str]:
     """Convert a Language enum to ElevenLabs language code.
@@ -150,6 +167,19 @@ def language_to_elevenlabs_language(language: Language) -> Optional[str]:
     return result
+def elevenlabs_language_code_to_language(language_code: Optional[str]) -> Optional[Language]:
+    """Convert an ElevenLabs language code back to a Language enum value."""
+    if not language_code:
+        return None
+    normalized = language_code.lower()
+    for language in Language:
+        code = language_to_elevenlabs_language(language)
+        if code and code.lower() == normalized:
+            return language
+    return None
 class ElevenLabsSTTService(SegmentedSTTService):
     """Speech-to-text service using ElevenLabs' file-based API.
@@ -337,3 +367,376 @@ class ElevenLabsSTTService(SegmentedSTTService):
         except Exception as e:
             self.logger.error(f"ElevenLabs STT error: {e}")
             yield ErrorFrame(f"ElevenLabs STT error: {str(e)}")
+class ElevenLabsRealtimeSTTService(WebsocketSTTService):
+    """Realtime speech-to-text service using ElevenLabs Scribe v2 WebSocket API."""
+    class InputParams(BaseModel):
+        """Realtime connection parameters derived from ElevenLabs documentation."""
+        language: Optional[Language] = None
+        commit_strategy: Literal["manual", "vad"] = "manual"
+        vad_silence_threshold_secs: Optional[float] = None
+        vad_threshold: Optional[float] = None
+        min_speech_duration_ms: Optional[int] = None
+        min_silence_duration_ms: Optional[int] = None
+    def __init__(
+        self,
+        *,
+        api_key: str,
+        sample_rate: Optional[int] = None,
+        model: str = "scribe_v2_realtime",
+        url: str = "wss://api.elevenlabs.io/v1/speech-to-text/realtime",
+        params: Optional["ElevenLabsRealtimeSTTService.InputParams"] = None,
+        reconnect_on_error: bool = True,
+        **kwargs,
+    ):
+        """Initialize the realtime STT service.
+        Args:
+            api_key: ElevenLabs API key for authentication.
+            sample_rate: Optional input sample rate. Defaults to pipeline sample rate.
+            model: Scribe realtime model identifier.
+            url: WebSocket endpoint for realtime transcription.
+            params: Optional realtime configuration options.
+            reconnect_on_error: Whether to auto-reconnect on transient failures.
+            **kwargs: Additional arguments forwarded to WebsocketSTTService.
+        """
+        if websocket_connect is None or State is None:
+            logger.error(
+                "In order to use ElevenLabsRealtimeSTTService, you need to "
+                "`pip install pipecat-ai[elevenlabs]` (websockets extra)."
+            )
+            raise ModuleNotFoundError("Missing optional dependency: websockets")
+        super().__init__(sample_rate=sample_rate, reconnect_on_error=reconnect_on_error, **kwargs)
+        self._api_key = api_key
+        self._url = url
+        self.set_model_name(model)
+        self._model = model
+        self._params = params or ElevenLabsRealtimeSTTService.InputParams()
+        self._language_override = self._params.language
+        self._encoding = None
+        self._receive_task: Optional[asyncio.Task] = None
+        self._pending_final_message: Optional[Dict[str, Any]] = None
+        self._pending_final_task: Optional[asyncio.Task] = None
+        self._timestamp_merge_delay_s = 0.25
+        self._ttfb_started = False
+    @property
+    def commit_strategy(self) -> str:
+        """Return the configured commit strategy (manual or vad)."""
+        return (self._params.commit_strategy or "manual").lower()
+    def can_generate_metrics(self) -> bool:
+        """Realtime ElevenLabs service supports latency metrics."""
+        return True
+    async def start(self, frame: StartFrame):
+        """Start the realtime STT service and establish WebSocket connection."""
+        await super().start(frame)
+        self._encoding = self._determine_encoding(self.sample_rate)
+        await self._connect()
+    async def stop(self, frame: EndFrame):
+        """Stop the realtime STT service and close WebSocket connection."""
+        await super().stop(frame)
+        await self._disconnect()
+    async def cancel(self, frame: CancelFrame):
+        """Cancel the realtime STT service and close WebSocket connection."""
+        await super().cancel(frame)
+        await self._disconnect()
+    async def set_language(self, language: Language):
+        """Update preferred transcription language (requires reconnect)."""
+        self._language_override = language
+        self._params.language = language
+        if self._websocket:
+            await self._disconnect()
+            await self._connect()
+    async def set_model(self, model: str):
+        """Set the STT model and reconnect the WebSocket."""
+        await super().set_model(model)
+        self._model = model
+        if self._websocket:
+            await self._disconnect()
+            await self._connect()
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        """Process frames and handle VAD events for commit strategy."""
+        await super().process_frame(frame, direction)
+        if isinstance(frame, UserStartedSpeakingFrame):
+            if frame.emulated:
+                return
+            self._ttfb_started = False
+            await self.start_processing_metrics()
+        elif isinstance(frame, UserStoppedSpeakingFrame):
+            if frame.emulated:
+                return
+            if self.commit_strategy == "manual":
+                await self._send_commit()
+    async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
+        """Stream audio chunks over the ElevenLabs realtime WebSocket."""
+        if not audio:
+            yield None
+            return
+        await self._ensure_connection()
+        await self._send_audio_chunk(audio)
+        yield None
+    async def _ensure_connection(self):
+        if not self._websocket or self._websocket.state is State.CLOSED:
+            await self._connect()
+    async def _connect(self):
+        await self._connect_websocket()
+        if self._websocket and not self._receive_task:
+            self._receive_task = asyncio.create_task(self._receive_task_handler(self._report_error))
+    async def _disconnect(self):
+        if self._receive_task:
+            await self.cancel_task(self._receive_task)
+            self._receive_task = None
+        await self._clear_pending_final()
+        await self._disconnect_websocket()
+    async def _connect_websocket(self):
+        try:
+            if self._websocket and self._websocket.state is State.OPEN:
+                return
+            ws_url = self._build_websocket_url()
+            headers = {"xi-api-key": self._api_key}
+            self.logger.debug(f"Connecting to ElevenLabs realtime STT at {ws_url}")
+            self._websocket = await websocket_connect(ws_url, additional_headers=headers)
+            await self._call_event_handler("on_connected")
+        except Exception as e:
+            self.logger.error(f"{self} unable to connect to ElevenLabs realtime STT: {e}")
+            self._websocket = None
+            await self._call_event_handler("on_connection_error", f"{e}")
+    async def _disconnect_websocket(self):
+        try:
+            await self.stop_all_metrics()
+            if self._websocket and self._websocket.state is State.OPEN:
+                self.logger.debug("Disconnecting from ElevenLabs realtime STT")
+                await self._websocket.close()
+        except Exception as e:
+            self.logger.error(f"{self} error closing ElevenLabs realtime websocket: {e}")
+        finally:
+            self._websocket = None
+            await self._call_event_handler("on_disconnected")
+    async def _receive_messages(self):
+        async for message in self._get_websocket():
+            await self._process_event(message)
+    def _get_websocket(self):
+        if not self._websocket:
+            raise RuntimeError("ElevenLabs realtime websocket not connected")
+        return self._websocket
+    async def _process_event(self, message: Any):
+        try:
+            data = json.loads(message)
+        except json.JSONDecodeError:
+            self.logger.warning(f"ElevenLabs realtime STT sent invalid JSON: {message}")
+            return
+        message_type = data.get("message_type")
+        if message_type == "session_started":
+            self.logger.debug("ElevenLabs realtime session started")
+            return
+        if message_type == "partial_transcript":
+            await self._emit_partial_transcript(data)
+        elif message_type == "committed_transcript":
+            await self._handle_committed_transcript(data)
+        elif message_type == "committed_transcript_with_timestamps":
+            await self._handle_committed_transcript_with_timestamps(data)
+        elif message_type in {
+            "auth_error",
+            "quota_exceeded",
+            "transcriber_error",
+            "input_error",
+            "error",
+        }:
+            fatal = message_type in {"auth_error", "quota_exceeded", "error"}
+            description = data.get("error", data)
+            await self.push_error(
+                ErrorFrame(f"ElevenLabs realtime error: {description}", fatal=fatal)
+            )
+        else:
+            self.logger.debug(f"Unhandled ElevenLabs realtime message: {data}")
+    async def _emit_partial_transcript(self, data: Dict[str, Any]):
+        text = (data.get("text") or data.get("transcript") or "").strip()
+        if not text:
+            return
+        language = (
+            elevenlabs_language_code_to_language(data.get("language_code"))
+            or self._language_override
+        )
+        await self.stop_ttfb_metrics()
+        await self.push_frame(
+            InterimTranscriptionFrame(
+                text,
+                self._user_id,
+                time_now_iso8601(),
+                language,
+                result=data,
+            )
+        )
+    async def _handle_committed_transcript(self, data: Dict[str, Any]):
+        if self._pending_final_message:
+            await self._emit_transcription(self._pending_final_message)
+            self._pending_final_message = None
+        self._pending_final_message = data
+        await self._schedule_pending_final_emit()
+    async def _handle_committed_transcript_with_timestamps(self, data: Dict[str, Any]):
+        if self._pending_final_message:
+            merged = {**self._pending_final_message, **data}
+            await self._emit_transcription(merged)
+            await self._clear_pending_final()
+        else:
+            await self._emit_transcription(data)
+    async def _schedule_pending_final_emit(self):
+        await self._clear_pending_final(timer_only=True)
+        self._pending_final_task = asyncio.create_task(self._emit_pending_after_delay())
+    async def _emit_pending_after_delay(self):
+        try:
+            await asyncio.sleep(self._timestamp_merge_delay_s)
+            if self._pending_final_message:
+                await self._emit_transcription(self._pending_final_message)
+                self._pending_final_message = None
+        except asyncio.CancelledError:
+            pass
+        finally:
+            self._pending_final_task = None
+    async def _clear_pending_final(self, timer_only: bool = False):
+        if self._pending_final_task:
+            await self.cancel_task(self._pending_final_task)
+            self._pending_final_task = None
+        if not timer_only:
+            self._pending_final_message = None
+    async def _emit_transcription(self, data: Dict[str, Any]):
+        text = (data.get("text") or data.get("transcript") or "").strip()
+        if not text:
+            return
+        language = (
+            elevenlabs_language_code_to_language(data.get("language_code"))
+            or self._language_override
+        )
+        await self.stop_ttfb_metrics()
+        frame = TranscriptionFrame(
+            text,
+            self._user_id,
+            time_now_iso8601(),
+            language,
+            result=data,
+        )
+        await self.push_frame(frame)
+        await self._handle_transcription(text, True, language)
+        await self.stop_processing_metrics()
+    async def _send_audio_chunk(self, audio: bytes):
+        if not audio or not self._websocket:
+            return
+        if not self._ttfb_started:
+            await self.start_ttfb_metrics()
+            self._ttfb_started = True
+        payload = {
+            "message_type": "input_audio_chunk",
+            "audio_base_64": base64.b64encode(audio).decode("ascii"),
+            "commit": False,
+            "sample_rate": self.sample_rate,
+        }
+        await self._websocket.send(json.dumps(payload))
+    async def _send_commit(self):
+        if not self._websocket:
+            return
+        payload = {
+            "message_type": "input_audio_chunk",
+            "audio_base_64": "",
+            "commit": True,
+            "sample_rate": self.sample_rate,
+        }
+        await self._websocket.send(json.dumps(payload))
+    def _build_websocket_url(self) -> str:
+        if not self.sample_rate:
+            raise ValueError(
+                "ElevenLabs realtime STT requires a valid sample rate (start() must run first)."
+            )
+        params = {
+            "model_id": self._model,
+            "encoding": self._encoding or "pcm_16000",
+            "sample_rate": str(self.sample_rate),
+            "commit_strategy": self.commit_strategy,
+        }
+        language_code = (
+            language_to_elevenlabs_language(self._language_override)
+            if self._language_override
+            else None
+        )
+        if language_code:
+            params["language_code"] = language_code
+        if self._params.vad_silence_threshold_secs is not None:
+            params["vad_silence_threshold_secs"] = str(self._params.vad_silence_threshold_secs)
+        if self._params.vad_threshold is not None:
+            params["vad_threshold"] = str(self._params.vad_threshold)
+        if self._params.min_speech_duration_ms is not None:
+            params["min_speech_duration_ms"] = str(self._params.min_speech_duration_ms)
+        if self._params.min_silence_duration_ms is not None:
+            params["min_silence_duration_ms"] = str(self._params.min_silence_duration_ms)
+        return f"{self._url}?{urllib.parse.urlencode(params)}"
+    def _determine_encoding(self, sample_rate: int) -> str:
+        if not sample_rate:
+            raise ValueError("ElevenLabs realtime STT requires a valid sample rate.")
+        supported_rates = {8000, 16000, 22050, 24000, 44100, 48000}
+        if sample_rate not in supported_rates:
+            raise ValueError(
+                f"ElevenLabs realtime STT supports sample rates {sorted(supported_rates)}. "
+                f"Received {sample_rate} Hz."
+            )
+        return f"pcm_{sample_rate}"
+    @traced_stt
+    async def _handle_transcription(
+        self, transcript: str, is_final: bool, language: Optional[Language] = None
+    ):
+        """Handle a transcription result with tracing."""
+        # Metrics are stopped by the caller when needed.
+        return

pipecat/services/elevenlabs/tts.py CHANGED Viewed

@@ -14,7 +14,17 @@ import asyncio
 import base64
 import json
 import uuid
-from typing import Any, AsyncGenerator, Dict, List, Literal, Mapping, Optional, Tuple, Union
+from typing import (
+    Any,
+    AsyncGenerator,
+    Dict,
+    List,
+    Literal,
+    Mapping,
+    Optional,
+    Tuple,
+    Union,
+)
 import aiohttp
 from loguru import logger
@@ -157,7 +167,13 @@ def build_elevenlabs_voice_settings(
     Returns:
         Dictionary of voice settings or None if no valid settings are provided.
     """
-    voice_setting_keys = ["stability", "similarity_boost", "style", "use_speaker_boost", "speed"]
+    voice_setting_keys = [
+        "stability",
+        "similarity_boost",
+        "style",
+        "use_speaker_boost",
+        "speed",
+    ]
     voice_settings = {}
     for key in voice_setting_keys:
@@ -530,7 +546,9 @@ class ElevenLabsTTSService(AudioContextWordTTSService):
             # Set max websocket message size to 16MB for large audio responses
             self._websocket = await websocket_connect(
-                url, max_size=16 * 1024 * 1024, additional_headers={"xi-api-key": self._api_key}
+                url,
+                max_size=16 * 1024 * 1024,
+                additional_headers={"xi-api-key": self._api_key},
             )
             await self._call_event_handler("on_connected")
@@ -549,7 +567,7 @@ class ElevenLabsTTSService(AudioContextWordTTSService):
                 if self._context_id:
                     await self._websocket.send(json.dumps({"close_socket": True}))
                 await self._websocket.close()
-                logger.debug("Disconnected from ElevenLabs")
+                self.logger.debug("Disconnected from ElevenLabs")
         except Exception as e:
             self.logger.error(f"{self} error closing websocket: {e}")
         finally:

pipecat/services/openai/base_llm.py CHANGED Viewed

@@ -32,6 +32,7 @@ from pipecat.frames.frames import (
     LLMMessagesFrame,
     LLMTextFrame,
     LLMUpdateSettingsFrame,
+    WarmupLLMFrame,
 )
 from pipecat.metrics.metrics import LLMTokenUsage
 from pipecat.processors.aggregators.llm_context import LLMContext
@@ -438,14 +439,19 @@ class BaseOpenAILLMService(LLMService):
                 completions and manage settings.
         >>>>>>> dv-stage
-                Args:
+        Args:
                     frame: The frame to process.
                     direction: The direction of frame processing.
         """
         await super().process_frame(frame, direction)
         context = None
-        if isinstance(frame, OpenAILLMContextFrame):
+        if isinstance(frame, WarmupLLMFrame):
+            # Handle warmup frame - prime cache without emitting response
+            # Run in background to avoid blocking the pipeline
+            asyncio.create_task(self._handle_warmup_frame(frame))
+            return  # Don't process further, warmup is silent
+        elif isinstance(frame, OpenAILLMContextFrame):
             # Handle OpenAI-specific context frames
             context = frame.context
         elif isinstance(frame, LLMContextFrame):
@@ -470,3 +476,32 @@ class BaseOpenAILLMService(LLMService):
             finally:
                 await self.stop_processing_metrics()
                 await self.push_frame(LLMFullResponseEndFrame())
+    async def _handle_warmup_frame(self, frame: WarmupLLMFrame):
+        """Handle WarmupLLMFrame to prime the LLM cache without emitting responses.
+        This method sends a minimal request to the LLM to warm up any provider-side
+        caches (like prompt caching). The response is discarded and no frames are emitted.
+        Args:
+            frame: WarmupLLMFrame containing the messages to cache.
+        """
+        try:
+            # Use the provided messages for warmup
+            messages: List[ChatCompletionMessageParam] = frame.messages  # type: ignore
+            # Make a non-streaming call to warm the cache
+            # We use a minimal max_tokens to reduce latency and cost
+            await self._client.chat.completions.create(
+                model=self.model_name,  # Use the property, not self._model
+                messages=messages,
+                max_tokens=10,  # Minimal response
+                stream=False,
+            )
+            self.logger.info("LLM cache warmed successfully")
+            # Intentionally don't emit any frames - this is a silent warmup
+        except Exception as e:
+            self.logger.error(f"Failed to warm LLM cache: {e}")
+            # Don't propagate error - warmup failure shouldn't break the bot

pipecat/services/soniox/stt.py CHANGED Viewed

@@ -241,6 +241,7 @@ class SonioxSTTService(STTService):
             if self._receive_task != asyncio.current_task():
                 await self._receive_task
             self._receive_task = None
+        self.logger.debug("Disconnected from Soniox STT")
     async def stop(self, frame: EndFrame):
         """Stop the Soniox STT websocket connection.
@@ -382,7 +383,10 @@ class SonioxSTTService(STTService):
                 if self._final_transcription_buffer or non_final_transcription:
                     final_text = "".join(
-                        map(lambda token: token["text"], self._final_transcription_buffer)
+                        map(
+                            lambda token: token["text"],
+                            self._final_transcription_buffer,
+                        )
                     )
                     non_final_text = "".join(
                         map(lambda token: token["text"], non_final_transcription)

{dv_pipecat_ai-0.0.85.dev844.dist-info → dv_pipecat_ai-0.0.85.dev848.dist-info}/WHEEL RENAMED Viewed

File without changes

{dv_pipecat_ai-0.0.85.dev844.dist-info → dv_pipecat_ai-0.0.85.dev848.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dv_pipecat_ai-0.0.85.dev844.dist-info → dv_pipecat_ai-0.0.85.dev848.dist-info}/top_level.txt RENAMED Viewed

File without changes

dv-pipecat-ai 0.0.85.dev844__py3-none-any.whl → 0.0.85.dev848__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.85.dev844py3-none-any.whl → 0.0.85.dev848py3-none-any.whl