PyPI - dv-pipecat-ai - Versions diffs - 0.0.85.dev818__py3-none-any.whl → 0.0.85.dev858__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.85.dev818py3-none-any.whl → 0.0.85.dev858py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (32) hide show

{dv_pipecat_ai-0.0.85.dev818.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/METADATA +2 -1
{dv_pipecat_ai-0.0.85.dev818.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/RECORD +32 -29
pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +5 -1
pipecat/frames/frames.py +34 -0
pipecat/metrics/connection_metrics.py +45 -0
pipecat/processors/aggregators/llm_response.py +25 -4
pipecat/processors/dtmf_aggregator.py +17 -21
pipecat/processors/frame_processor.py +51 -8
pipecat/processors/metrics/frame_processor_metrics.py +108 -0
pipecat/processors/transcript_processor.py +22 -1
pipecat/serializers/__init__.py +2 -0
pipecat/serializers/asterisk.py +16 -2
pipecat/serializers/convox.py +2 -2
pipecat/serializers/custom.py +2 -2
pipecat/serializers/vi.py +326 -0
pipecat/services/cartesia/tts.py +75 -10
pipecat/services/deepgram/stt.py +317 -17
pipecat/services/elevenlabs/stt.py +487 -19
pipecat/services/elevenlabs/tts.py +28 -4
pipecat/services/google/llm.py +26 -11
pipecat/services/openai/base_llm.py +79 -14
pipecat/services/salesforce/llm.py +321 -86
pipecat/services/sarvam/tts.py +0 -1
pipecat/services/soniox/stt.py +45 -10
pipecat/services/vistaar/llm.py +97 -6
pipecat/transcriptions/language.py +50 -0
pipecat/transports/base_input.py +15 -11
pipecat/transports/base_output.py +29 -3
pipecat/utils/redis.py +58 -0
{dv_pipecat_ai-0.0.85.dev818.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.85.dev818.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.85.dev818.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/top_level.txt +0 -0

pipecat/services/openai/base_llm.py CHANGED Viewed

@@ -18,6 +18,7 @@ from openai import (
     APITimeoutError,
     AsyncOpenAI,
     AsyncStream,
+    BadRequestError,
     DefaultAsyncHttpxClient,
 )
 from openai.types.chat import ChatCompletionChunk, ChatCompletionMessageParam
@@ -32,6 +33,7 @@ from pipecat.frames.frames import (
     LLMMessagesFrame,
     LLMTextFrame,
     LLMUpdateSettingsFrame,
+    WarmupLLMFrame,
 )
 from pipecat.metrics.metrics import LLMTokenUsage
 from pipecat.processors.aggregators.llm_context import LLMContext
@@ -99,6 +101,7 @@ class BaseOpenAILLMService(LLMService):
         params: Optional[InputParams] = None,
         retry_timeout_secs: Optional[float] = 5.0,
         retry_on_timeout: Optional[bool] = False,
+        enable_warmup: bool = False,
         **kwargs,
     ):
         """Initialize the BaseOpenAILLMService.
@@ -113,6 +116,7 @@ class BaseOpenAILLMService(LLMService):
             params: Input parameters for model configuration and behavior.
             retry_timeout_secs: Request timeout in seconds. Defaults to 5.0 seconds.
             retry_on_timeout: Whether to retry the request once if it times out.
+            enable_warmup: Whether to enable LLM cache warmup. Defaults to False.
             **kwargs: Additional arguments passed to the parent LLMService.
         """
         super().__init__(**kwargs)
@@ -132,6 +136,7 @@ class BaseOpenAILLMService(LLMService):
         }
         self._retry_timeout_secs = retry_timeout_secs
         self._retry_on_timeout = retry_on_timeout
+        self._enable_warmup = enable_warmup
         self.set_model_name(model)
         self._client = self.create_client(
             api_key=api_key,
@@ -200,20 +205,29 @@ class BaseOpenAILLMService(LLMService):
         """
         params = self.build_chat_completion_params(params_from_context)
-        if self._retry_on_timeout:
-            try:
-                chunks = await asyncio.wait_for(
-                    self._client.chat.completions.create(**params), timeout=self._retry_timeout_secs
-                )
-                return chunks
-            except (APITimeoutError, asyncio.TimeoutError):
-                # Retry, this time without a timeout so we get a response
-                logger.debug(f"{self}: Retrying chat completion due to timeout")
+        await self.start_connection_metrics()
+        try:
+            if self._retry_on_timeout:
+                try:
+                    chunks = await asyncio.wait_for(
+                        self._client.chat.completions.create(**params), timeout=self._retry_timeout_secs
+                    )
+                    await self.stop_connection_metrics(success=True, connection_type="http")
+                    return chunks
+                except (APITimeoutError, asyncio.TimeoutError):
+                    # Retry, this time without a timeout so we get a response
+                    logger.debug(f"{self}: Retrying chat completion due to timeout")
+                    chunks = await self._client.chat.completions.create(**params)
+                    await self.stop_connection_metrics(success=True, connection_type="http")
+                    return chunks
+            else:
                 chunks = await self._client.chat.completions.create(**params)
+                await self.stop_connection_metrics(success=True, connection_type="http")
                 return chunks
-        else:
-            chunks = await self._client.chat.completions.create(**params)
-            return chunks
+        except Exception as e:
+            await self.stop_connection_metrics(success=False, error=str(e), connection_type="http")
+            raise
     def build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict:
         """Build parameters for chat completion request.
@@ -438,14 +452,19 @@ class BaseOpenAILLMService(LLMService):
                 completions and manage settings.
         >>>>>>> dv-stage
-                Args:
+        Args:
                     frame: The frame to process.
                     direction: The direction of frame processing.
         """
         await super().process_frame(frame, direction)
         context = None
-        if isinstance(frame, OpenAILLMContextFrame):
+        if isinstance(frame, WarmupLLMFrame):
+            # Handle warmup frame - prime cache without emitting response
+            # Run in background to avoid blocking the pipeline
+            asyncio.create_task(self._handle_warmup_frame(frame))
+            return  # Don't process further, warmup is silent
+        elif isinstance(frame, OpenAILLMContextFrame):
             # Handle OpenAI-specific context frames
             context = frame.context
         elif isinstance(frame, LLMContextFrame):
@@ -470,3 +489,49 @@ class BaseOpenAILLMService(LLMService):
             finally:
                 await self.stop_processing_metrics()
                 await self.push_frame(LLMFullResponseEndFrame())
+    def _is_gpt5_model(self) -> bool:
+        """Check if the current model is a GPT-5 series model that requires max_completion_tokens."""
+        model = (self.model_name or "").lower()
+        return model.startswith("gpt-5")
+    async def _handle_warmup_frame(self, frame: WarmupLLMFrame):
+        """Handle WarmupLLMFrame to prime the LLM cache without emitting responses.
+        This method sends a minimal request to the LLM to warm up any provider-side
+        caches (like prompt caching). The response is discarded and no frames are emitted.
+        Args:
+            frame: WarmupLLMFrame containing the messages to cache.
+        """
+        # Skip warmup if disabled
+        if not self._enable_warmup:
+            self.logger.debug("LLM warmup is disabled, skipping")
+            return
+        try:
+            # Use the provided messages for warmup
+            messages: List[ChatCompletionMessageParam] = frame.messages  # type: ignore
+            # Make a non-streaming call to warm the cache
+            # We use a minimal token limit to reduce latency and cost
+            # GPT-5 series models require max_completion_tokens instead of max_tokens
+            warmup_params = {
+                "model": self.model_name,
+                "messages": messages,
+                "stream": False,
+            }
+            if self._is_gpt5_model():
+                warmup_params["max_completion_tokens"] = 10
+            else:
+                warmup_params["max_tokens"] = 10
+            await self._client.chat.completions.create(**warmup_params)
+            self.logger.info("LLM cache warmed successfully")
+            # Intentionally don't emit any frames - this is a silent warmup
+        except Exception as e:
+            self.logger.error(f"Failed to warm LLM cache: {e}")
+            # Don't propagate error - warmup failure shouldn't break the bot

dv-pipecat-ai 0.0.85.dev818__py3-none-any.whl → 0.0.85.dev858__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.85.dev818py3-none-any.whl → 0.0.85.dev858py3-none-any.whl