PyPI - dv-pipecat-ai - Versions diffs - 0.0.85.dev824__py3-none-any.whl → 0.0.85.dev858__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.85.dev824py3-none-any.whl → 0.0.85.dev858py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (31) hide show

{dv_pipecat_ai-0.0.85.dev824.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/METADATA +2 -1
{dv_pipecat_ai-0.0.85.dev824.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/RECORD +31 -29
pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +5 -1
pipecat/frames/frames.py +22 -0
pipecat/metrics/connection_metrics.py +45 -0
pipecat/processors/aggregators/llm_response.py +15 -9
pipecat/processors/dtmf_aggregator.py +17 -21
pipecat/processors/frame_processor.py +44 -1
pipecat/processors/metrics/frame_processor_metrics.py +108 -0
pipecat/processors/transcript_processor.py +2 -1
pipecat/serializers/__init__.py +2 -0
pipecat/serializers/asterisk.py +16 -2
pipecat/serializers/convox.py +2 -2
pipecat/serializers/custom.py +2 -2
pipecat/serializers/vi.py +326 -0
pipecat/services/cartesia/tts.py +75 -10
pipecat/services/deepgram/stt.py +317 -17
pipecat/services/elevenlabs/stt.py +487 -19
pipecat/services/elevenlabs/tts.py +28 -4
pipecat/services/google/llm.py +26 -11
pipecat/services/openai/base_llm.py +79 -14
pipecat/services/salesforce/llm.py +64 -59
pipecat/services/sarvam/tts.py +0 -1
pipecat/services/soniox/stt.py +45 -10
pipecat/services/vistaar/llm.py +97 -6
pipecat/transcriptions/language.py +50 -0
pipecat/transports/base_input.py +15 -11
pipecat/transports/base_output.py +26 -3
{dv_pipecat_ai-0.0.85.dev824.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.85.dev824.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.85.dev824.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/top_level.txt +0 -0

pipecat/services/openai/base_llm.py CHANGED Viewed

@@ -18,6 +18,7 @@ from openai import (
     APITimeoutError,
     AsyncOpenAI,
     AsyncStream,
+    BadRequestError,
     DefaultAsyncHttpxClient,
 )
 from openai.types.chat import ChatCompletionChunk, ChatCompletionMessageParam
@@ -32,6 +33,7 @@ from pipecat.frames.frames import (
     LLMMessagesFrame,
     LLMTextFrame,
     LLMUpdateSettingsFrame,
+    WarmupLLMFrame,
 )
 from pipecat.metrics.metrics import LLMTokenUsage
 from pipecat.processors.aggregators.llm_context import LLMContext
@@ -99,6 +101,7 @@ class BaseOpenAILLMService(LLMService):
         params: Optional[InputParams] = None,
         retry_timeout_secs: Optional[float] = 5.0,
         retry_on_timeout: Optional[bool] = False,
+        enable_warmup: bool = False,
         **kwargs,
     ):
         """Initialize the BaseOpenAILLMService.
@@ -113,6 +116,7 @@ class BaseOpenAILLMService(LLMService):
             params: Input parameters for model configuration and behavior.
             retry_timeout_secs: Request timeout in seconds. Defaults to 5.0 seconds.
             retry_on_timeout: Whether to retry the request once if it times out.
+            enable_warmup: Whether to enable LLM cache warmup. Defaults to False.
             **kwargs: Additional arguments passed to the parent LLMService.
         """
         super().__init__(**kwargs)
@@ -132,6 +136,7 @@ class BaseOpenAILLMService(LLMService):
         }
         self._retry_timeout_secs = retry_timeout_secs
         self._retry_on_timeout = retry_on_timeout
+        self._enable_warmup = enable_warmup
         self.set_model_name(model)
         self._client = self.create_client(
             api_key=api_key,
@@ -200,20 +205,29 @@ class BaseOpenAILLMService(LLMService):
         """
         params = self.build_chat_completion_params(params_from_context)
-        if self._retry_on_timeout:
-            try:
-                chunks = await asyncio.wait_for(
-                    self._client.chat.completions.create(**params), timeout=self._retry_timeout_secs
-                )
-                return chunks
-            except (APITimeoutError, asyncio.TimeoutError):
-                # Retry, this time without a timeout so we get a response
-                logger.debug(f"{self}: Retrying chat completion due to timeout")
+        await self.start_connection_metrics()
+        try:
+            if self._retry_on_timeout:
+                try:
+                    chunks = await asyncio.wait_for(
+                        self._client.chat.completions.create(**params), timeout=self._retry_timeout_secs
+                    )
+                    await self.stop_connection_metrics(success=True, connection_type="http")
+                    return chunks
+                except (APITimeoutError, asyncio.TimeoutError):
+                    # Retry, this time without a timeout so we get a response
+                    logger.debug(f"{self}: Retrying chat completion due to timeout")
+                    chunks = await self._client.chat.completions.create(**params)
+                    await self.stop_connection_metrics(success=True, connection_type="http")
+                    return chunks
+            else:
                 chunks = await self._client.chat.completions.create(**params)
+                await self.stop_connection_metrics(success=True, connection_type="http")
                 return chunks
-        else:
-            chunks = await self._client.chat.completions.create(**params)
-            return chunks
+        except Exception as e:
+            await self.stop_connection_metrics(success=False, error=str(e), connection_type="http")
+            raise
     def build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict:
         """Build parameters for chat completion request.
@@ -438,14 +452,19 @@ class BaseOpenAILLMService(LLMService):
                 completions and manage settings.
         >>>>>>> dv-stage
-                Args:
+        Args:
                     frame: The frame to process.
                     direction: The direction of frame processing.
         """
         await super().process_frame(frame, direction)
         context = None
-        if isinstance(frame, OpenAILLMContextFrame):
+        if isinstance(frame, WarmupLLMFrame):
+            # Handle warmup frame - prime cache without emitting response
+            # Run in background to avoid blocking the pipeline
+            asyncio.create_task(self._handle_warmup_frame(frame))
+            return  # Don't process further, warmup is silent
+        elif isinstance(frame, OpenAILLMContextFrame):
             # Handle OpenAI-specific context frames
             context = frame.context
         elif isinstance(frame, LLMContextFrame):
@@ -470,3 +489,49 @@ class BaseOpenAILLMService(LLMService):
             finally:
                 await self.stop_processing_metrics()
                 await self.push_frame(LLMFullResponseEndFrame())
+    def _is_gpt5_model(self) -> bool:
+        """Check if the current model is a GPT-5 series model that requires max_completion_tokens."""
+        model = (self.model_name or "").lower()
+        return model.startswith("gpt-5")
+    async def _handle_warmup_frame(self, frame: WarmupLLMFrame):
+        """Handle WarmupLLMFrame to prime the LLM cache without emitting responses.
+        This method sends a minimal request to the LLM to warm up any provider-side
+        caches (like prompt caching). The response is discarded and no frames are emitted.
+        Args:
+            frame: WarmupLLMFrame containing the messages to cache.
+        """
+        # Skip warmup if disabled
+        if not self._enable_warmup:
+            self.logger.debug("LLM warmup is disabled, skipping")
+            return
+        try:
+            # Use the provided messages for warmup
+            messages: List[ChatCompletionMessageParam] = frame.messages  # type: ignore
+            # Make a non-streaming call to warm the cache
+            # We use a minimal token limit to reduce latency and cost
+            # GPT-5 series models require max_completion_tokens instead of max_tokens
+            warmup_params = {
+                "model": self.model_name,
+                "messages": messages,
+                "stream": False,
+            }
+            if self._is_gpt5_model():
+                warmup_params["max_completion_tokens"] = 10
+            else:
+                warmup_params["max_tokens"] = 10
+            await self._client.chat.completions.create(**warmup_params)
+            self.logger.info("LLM cache warmed successfully")
+            # Intentionally don't emit any frames - this is a silent warmup
+        except Exception as e:
+            self.logger.error(f"Failed to warm LLM cache: {e}")
+            # Don't propagate error - warmup failure shouldn't break the bot

pipecat/services/salesforce/llm.py CHANGED Viewed

@@ -13,6 +13,7 @@ from dataclasses import dataclass
 from typing import AsyncGenerator, Dict, Optional
 import httpx
+from env_config import api_config
 from loguru import logger
 from pipecat.frames.frames import (
@@ -23,6 +24,10 @@ from pipecat.frames.frames import (
     LLMTextFrame,
     LLMUpdateSettingsFrame,
 )
+from pipecat.processors.aggregators.llm_response import (
+    LLMAssistantAggregatorParams,
+    LLMUserAggregatorParams,
+)
 from pipecat.processors.aggregators.openai_llm_context import (
     OpenAILLMContext,
     OpenAILLMContextFrame,
@@ -34,11 +39,6 @@ from pipecat.services.openai.llm import (
     OpenAIContextAggregatorPair,
     OpenAIUserContextAggregator,
 )
-from pipecat.processors.aggregators.llm_response import (
-    LLMAssistantAggregatorParams,
-    LLMUserAggregatorParams,
-)
-from env_config import api_config
 from pipecat.utils.redis import create_async_redis_client
@@ -96,12 +96,11 @@ class SalesforceAgentLLMService(LLMService):
         # Initialize parent LLM service
         super().__init__(**kwargs)
         self._agent_id = agent_id
-        self._org_domain = org_domain
+        self._org_domain = org_domain
         self._client_id = client_id
         self._client_secret = client_secret
         self._api_host = api_host
         # Validate required environment variables
         required_vars = {
             "SALESFORCE_AGENT_ID": self._agent_id,
@@ -145,7 +144,6 @@ class SalesforceAgentLLMService(LLMService):
         )
         self._schedule_session_warmup()
     async def __aenter__(self):
         """Async context manager entry."""
@@ -237,7 +235,7 @@ class SalesforceAgentLLMService(LLMService):
             return
         ttl_seconds = 3600  # Default fallback
         # Try to get expiration from expires_in parameter first
         if expires_in is not None:
             try:
@@ -246,7 +244,7 @@ class SalesforceAgentLLMService(LLMService):
             except (TypeError, ValueError):
                 logger.debug("Unable to parse expires_in parameter")
                 expires_in = None
         # If no expires_in available, use default TTL
         if expires_in is None:
             logger.debug("No expiration info found, using default TTL")
@@ -271,7 +269,7 @@ class SalesforceAgentLLMService(LLMService):
     async def _get_access_token(self, *, force_refresh: bool = False) -> str:
         """Get OAuth access token using client credentials.
         Args:
             force_refresh: If True, skip cache and fetch fresh token from Salesforce.
         """
@@ -301,15 +299,15 @@ class SalesforceAgentLLMService(LLMService):
     async def _make_authenticated_request(self, method: str, url: str, **kwargs):
         """Make an authenticated HTTP request with automatic token refresh on auth errors.
         Args:
             method: HTTP method (GET, POST, DELETE, etc.)
             url: Request URL
             **kwargs: Additional arguments passed to httpx request
         Returns:
             httpx.Response: The HTTP response
         Raises:
             Exception: If request fails after token refresh attempt
         """
@@ -318,7 +316,7 @@ class SalesforceAgentLLMService(LLMService):
         headers = kwargs.get("headers", {})
         headers["Authorization"] = f"Bearer {access_token}"
         kwargs["headers"] = headers
         try:
             response = await self._http_client.request(method, url, **kwargs)
             response.raise_for_status()
@@ -326,14 +324,16 @@ class SalesforceAgentLLMService(LLMService):
         except httpx.HTTPStatusError as e:
             # If authentication error, clear cache and retry with fresh token
             if e.response.status_code in (401, 403):
-                logger.warning(f"Salesforce authentication error ({e.response.status_code}), refreshing token")
+                logger.warning(
+                    f"Salesforce authentication error ({e.response.status_code}), refreshing token"
+                )
                 await self._clear_cached_access_token()
                 # Retry with fresh token
                 fresh_token = await self._get_access_token(force_refresh=True)
                 headers["Authorization"] = f"Bearer {fresh_token}"
                 kwargs["headers"] = headers
                 response = await self._http_client.request(method, url, **kwargs)
                 response.raise_for_status()
                 return response
@@ -359,9 +359,7 @@ class SalesforceAgentLLMService(LLMService):
         try:
             response = await self._make_authenticated_request(
-                "POST", session_url,
-                headers={"Content-Type": "application/json"},
-                json=payload
+                "POST", session_url, headers={"Content-Type": "application/json"}, json=payload
             )
             session_data = response.json()
             session_id = session_data["sessionId"]
@@ -419,8 +417,7 @@ class SalesforceAgentLLMService(LLMService):
                 # End the session via API
                 url = f"{self._api_host}/einstein/ai-agent/v1/sessions/{session_id}"
                 await self._make_authenticated_request(
-                    "DELETE", url,
-                    headers={"x-session-end-reason": "UserRequest"}
+                    "DELETE", url, headers={"x-session-end-reason": "UserRequest"}
                 )
             except Exception as e:
                 logger.warning(f"Failed to end session {session_id}: {e}")
@@ -431,32 +428,32 @@ class SalesforceAgentLLMService(LLMService):
     def _extract_user_message(self, context: OpenAILLMContext) -> str:
         """Extract the last user message from context.
         Similar to Vistaar pattern - extract only the most recent user message.
         Args:
             context: The OpenAI LLM context containing messages.
         Returns:
             The last user message as a string.
         """
         messages = context.get_messages()
         # Find the last user message (iterate in reverse for efficiency)
         for message in reversed(messages):
             if message.get("role") == "user":
                 content = message.get("content", "")
                 # Handle content that might be a list (for multimodal messages)
                 if isinstance(content, list):
                     text_parts = [
                         item.get("text", "") for item in content if item.get("type") == "text"
                     ]
                     content = " ".join(text_parts)
                 if isinstance(content, str):
                     return content.strip()
         return ""
     def _generate_sequence_id(self) -> int:
@@ -464,7 +461,9 @@ class SalesforceAgentLLMService(LLMService):
         self._sequence_counter += 1
         return self._sequence_counter
-    async def _stream_salesforce_response(self, session_id: str, user_message: str) -> AsyncGenerator[str, None]:
+    async def _stream_salesforce_response(
+        self, session_id: str, user_message: str
+    ) -> AsyncGenerator[str, None]:
         """Stream response from Salesforce Agent API."""
         url = f"{self._api_host}/einstein/ai-agent/v1/sessions/{session_id}/messages/stream"
@@ -472,15 +471,9 @@ class SalesforceAgentLLMService(LLMService):
             "message": {
                 "sequenceId": self._generate_sequence_id(),
                 "type": "Text",
-                "text": user_message
+                "text": user_message,
             },
-            "variables": [
-                {
-                    "name": "$Context.EndUserLanguage",
-                    "type": "Text",
-                    "value": "en_US"
-                }
-            ]
+            "variables": [{"name": "$Context.EndUserLanguage", "type": "Text", "value": "en_US"}],
         }
         # First attempt with current token
@@ -493,9 +486,11 @@ class SalesforceAgentLLMService(LLMService):
         try:
             logger.info(f"🌐 Salesforce API request: {user_message[:50]}...")
-            async with self._http_client.stream("POST", url, headers=headers, json=message_data) as response:
+            async with self._http_client.stream(
+                "POST", url, headers=headers, json=message_data
+            ) as response:
                 response.raise_for_status()
                 async for line in response.aiter_lines():
                     if not line:
                         continue
@@ -525,17 +520,23 @@ class SalesforceAgentLLMService(LLMService):
         except httpx.HTTPStatusError as e:
             # If authentication error, retry with fresh token
             if e.response.status_code in (401, 403):
-                logger.warning(f"Salesforce streaming authentication error ({e.response.status_code}), refreshing token")
+                logger.warning(
+                    f"Salesforce streaming authentication error ({e.response.status_code}), refreshing token"
+                )
                 await self._clear_cached_access_token()
                 # Retry with fresh token
                 fresh_token = await self._get_access_token(force_refresh=True)
                 headers["Authorization"] = f"Bearer {fresh_token}"
-                logger.info(f"🔄 Retrying Salesforce stream with fresh token: {user_message[:50]}...")
-                async with self._http_client.stream("POST", url, headers=headers, json=message_data) as response:
+                logger.info(
+                    f"🔄 Retrying Salesforce stream with fresh token: {user_message[:50]}..."
+                )
+                async with self._http_client.stream(
+                    "POST", url, headers=headers, json=message_data
+                ) as response:
                     response.raise_for_status()
                     async for line in response.aiter_lines():
                         if not line:
                             continue
@@ -576,40 +577,41 @@ class SalesforceAgentLLMService(LLMService):
             context: The OpenAI LLM context containing messages to process.
         """
         logger.info(f"🔄 Salesforce processing context with {len(context.get_messages())} messages")
         # Extract user message from context first
         user_message = self._extract_user_message(context)
         if not user_message:
             logger.warning("Salesforce: No user message found in context")
             return
         try:
             logger.info(f"🎯 Salesforce extracted query: {user_message}")
-            # Start response
+            # Start response
             await self.push_frame(LLMFullResponseStartFrame())
-            await self.push_frame(LLMFullResponseStartFrame(),FrameDirection.UPSTREAM)
+            await self.push_frame(LLMFullResponseStartFrame(), FrameDirection.UPSTREAM)
             await self.start_processing_metrics()
             await self.start_ttfb_metrics()
             # Get or create session
             session_id = await self._get_or_create_session()
             first_chunk = True
             # Stream the response
             async for text_chunk in self._stream_salesforce_response(session_id, user_message):
                 if first_chunk:
                     await self.stop_ttfb_metrics()
                     first_chunk = False
                 # Push each text chunk as it arrives
                 await self.push_frame(LLMTextFrame(text=text_chunk))
         except Exception as e:
             logger.error(f"Salesforce context processing error: {type(e).__name__}: {str(e)}")
             import traceback
             logger.error(f"Salesforce traceback: {traceback.format_exc()}")
             raise
         finally:
@@ -627,7 +629,9 @@ class SalesforceAgentLLMService(LLMService):
         context = None
         if isinstance(frame, OpenAILLMContextFrame):
             context = frame.context
-            logger.info(f"🔍 Received OpenAILLMContextFrame with {len(context.get_messages())} messages")
+            logger.info(
+                f"🔍 Received OpenAILLMContextFrame with {len(context.get_messages())} messages"
+            )
         elif isinstance(frame, LLMMessagesFrame):
             context = OpenAILLMContext.from_messages(frame.messages)
             logger.info(f"🔍 Received LLMMessagesFrame with {len(frame.messages)} messages")
@@ -680,6 +684,7 @@ class SalesforceAgentLLMService(LLMService):
     def get_llm_adapter(self):
         """Get the LLM adapter for this service."""
         from pipecat.adapters.services.open_ai_adapter import OpenAILLMAdapter
         return OpenAILLMAdapter()
     async def close(self):

pipecat/services/sarvam/tts.py CHANGED Viewed

@@ -23,7 +23,6 @@ from pipecat.frames.frames import (
     InterruptionFrame,
     LLMFullResponseEndFrame,
     StartFrame,
-    StartInterruptionFrame,
     TTSAudioRawFrame,
     TTSStartedFrame,
     TTSStoppedFrame,

pipecat/services/soniox/stt.py CHANGED Viewed

@@ -49,6 +49,33 @@ END_TOKEN = "<end>"
 FINALIZED_TOKEN = "<fin>"
+class SonioxContextGeneralItem(BaseModel):
+    """Represents a key-value pair for structured general context information."""
+    key: str
+    value: str
+class SonioxContextTranslationTerm(BaseModel):
+    """Represents a custom translation mapping for ambiguous or domain-specific terms."""
+    source: str
+    target: str
+class SonioxContextObject(BaseModel):
+    """Context object for models with context_version 2, for Soniox stt-rt-v3-preview and higher.
+    Learn more about context in the documentation:
+    https://soniox.com/docs/stt/concepts/context
+    """
+    general: Optional[List[SonioxContextGeneralItem]] = None
+    text: Optional[str] = None
+    terms: Optional[List[str]] = None
+    translation_terms: Optional[List[SonioxContextTranslationTerm]] = None
 class SonioxInputParams(BaseModel):
     """Real-time transcription settings.
@@ -60,9 +87,9 @@ class SonioxInputParams(BaseModel):
         audio_format: Audio format to use for transcription.
         num_channels: Number of channels to use for transcription.
         language_hints: List of language hints to use for transcription.
-        context: Customization for transcription.
-        enable_non_final_tokens: Whether to enable non-final tokens. If false, only final tokens will be returned.
-        max_non_final_tokens_duration_ms: Maximum duration of non-final tokens.
+        context: Customization for transcription. String for models with context_version 1 and ContextObject for models with context_version 2.
+        enable_speaker_diarization: Whether to enable speaker diarization. Tokens are annotated with speaker IDs.
+        enable_language_identification: Whether to enable language identification. Tokens are annotated with language IDs.
         client_reference_id: Client reference ID to use for transcription.
     """
@@ -72,10 +99,10 @@ class SonioxInputParams(BaseModel):
     num_channels: Optional[int] = 1
     language_hints: Optional[List[Language]] = None
-    context: Optional[str] = None
+    context: Optional[SonioxContextObject | str] = None
-    enable_non_final_tokens: Optional[bool] = True
-    max_non_final_tokens_duration_ms: Optional[int] = None
+    enable_speaker_diarization: Optional[bool] = False
+    enable_language_identification: Optional[bool] = False
     client_reference_id: Optional[str] = None
@@ -173,6 +200,10 @@ class SonioxSTTService(STTService):
         # Either one or the other is required.
         enable_endpoint_detection = not self._vad_force_turn_endpoint
+        context = self._params.context
+        if isinstance(context, SonioxContextObject):
+            context = context.model_dump()
         # Send the initial configuration message.
         config = {
             "api_key": self._api_key,
@@ -182,9 +213,9 @@ class SonioxSTTService(STTService):
             "enable_endpoint_detection": enable_endpoint_detection,
             "sample_rate": self.sample_rate,
             "language_hints": _prepare_language_hints(self._params.language_hints),
-            "context": self._params.context,
-            "enable_non_final_tokens": self._params.enable_non_final_tokens,
-            "max_non_final_tokens_duration_ms": self._params.max_non_final_tokens_duration_ms,
+            "context": context,
+            "enable_speaker_diarization": self._params.enable_speaker_diarization,
+            "enable_language_identification": self._params.enable_language_identification,
             "client_reference_id": self._params.client_reference_id,
         }
@@ -210,6 +241,7 @@ class SonioxSTTService(STTService):
             if self._receive_task != asyncio.current_task():
                 await self._receive_task
             self._receive_task = None
+        self.logger.debug("Disconnected from Soniox STT")
     async def stop(self, frame: EndFrame):
         """Stop the Soniox STT websocket connection.
@@ -351,7 +383,10 @@ class SonioxSTTService(STTService):
                 if self._final_transcription_buffer or non_final_transcription:
                     final_text = "".join(
-                        map(lambda token: token["text"], self._final_transcription_buffer)
+                        map(
+                            lambda token: token["text"],
+                            self._final_transcription_buffer,
+                        )
                     )
                     non_final_text = "".join(
                         map(lambda token: token["text"], non_final_transcription)

dv-pipecat-ai 0.0.85.dev824__py3-none-any.whl → 0.0.85.dev858__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.85.dev824py3-none-any.whl → 0.0.85.dev858py3-none-any.whl