PyPI - dv-pipecat-ai - Versions diffs - 0.0.75.dev887__py3-none-any.whl → 0.0.82.dev23__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.75.dev887py3-none-any.whl → 0.0.82.dev23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (121) hide show

{dv_pipecat_ai-0.0.75.dev887.dist-info → dv_pipecat_ai-0.0.82.dev23.dist-info}/METADATA +8 -3
{dv_pipecat_ai-0.0.75.dev887.dist-info → dv_pipecat_ai-0.0.82.dev23.dist-info}/RECORD +121 -81
pipecat/adapters/base_llm_adapter.py +44 -6
pipecat/adapters/services/anthropic_adapter.py +302 -2
pipecat/adapters/services/aws_nova_sonic_adapter.py +40 -2
pipecat/adapters/services/bedrock_adapter.py +40 -2
pipecat/adapters/services/gemini_adapter.py +276 -6
pipecat/adapters/services/open_ai_adapter.py +88 -7
pipecat/adapters/services/open_ai_realtime_adapter.py +39 -1
pipecat/audio/dtmf/__init__.py +0 -0
pipecat/audio/dtmf/dtmf-0.wav +0 -0
pipecat/audio/dtmf/dtmf-1.wav +0 -0
pipecat/audio/dtmf/dtmf-2.wav +0 -0
pipecat/audio/dtmf/dtmf-3.wav +0 -0
pipecat/audio/dtmf/dtmf-4.wav +0 -0
pipecat/audio/dtmf/dtmf-5.wav +0 -0
pipecat/audio/dtmf/dtmf-6.wav +0 -0
pipecat/audio/dtmf/dtmf-7.wav +0 -0
pipecat/audio/dtmf/dtmf-8.wav +0 -0
pipecat/audio/dtmf/dtmf-9.wav +0 -0
pipecat/audio/dtmf/dtmf-pound.wav +0 -0
pipecat/audio/dtmf/dtmf-star.wav +0 -0
pipecat/audio/dtmf/types.py +47 -0
pipecat/audio/dtmf/utils.py +70 -0
pipecat/audio/filters/aic_filter.py +199 -0
pipecat/audio/utils.py +9 -7
pipecat/extensions/ivr/__init__.py +0 -0
pipecat/extensions/ivr/ivr_navigator.py +452 -0
pipecat/frames/frames.py +156 -43
pipecat/pipeline/llm_switcher.py +76 -0
pipecat/pipeline/parallel_pipeline.py +3 -3
pipecat/pipeline/service_switcher.py +144 -0
pipecat/pipeline/task.py +68 -28
pipecat/pipeline/task_observer.py +10 -0
pipecat/processors/aggregators/dtmf_aggregator.py +2 -2
pipecat/processors/aggregators/llm_context.py +277 -0
pipecat/processors/aggregators/llm_response.py +48 -15
pipecat/processors/aggregators/llm_response_universal.py +840 -0
pipecat/processors/aggregators/openai_llm_context.py +3 -3
pipecat/processors/dtmf_aggregator.py +0 -2
pipecat/processors/filters/stt_mute_filter.py +0 -2
pipecat/processors/frame_processor.py +18 -11
pipecat/processors/frameworks/rtvi.py +17 -10
pipecat/processors/metrics/sentry.py +2 -0
pipecat/runner/daily.py +137 -36
pipecat/runner/run.py +1 -1
pipecat/runner/utils.py +7 -7
pipecat/serializers/asterisk.py +20 -4
pipecat/serializers/exotel.py +1 -1
pipecat/serializers/plivo.py +1 -1
pipecat/serializers/telnyx.py +1 -1
pipecat/serializers/twilio.py +1 -1
pipecat/services/__init__.py +2 -2
pipecat/services/anthropic/llm.py +113 -28
pipecat/services/asyncai/tts.py +4 -0
pipecat/services/aws/llm.py +82 -8
pipecat/services/aws/tts.py +0 -10
pipecat/services/aws_nova_sonic/aws.py +5 -0
pipecat/services/azure/llm.py +53 -1
pipecat/services/cartesia/tts.py +28 -16
pipecat/services/cerebras/llm.py +15 -10
pipecat/services/deepgram/stt.py +8 -0
pipecat/services/deepseek/llm.py +13 -8
pipecat/services/elevenlabs/__init__.py +2 -0
pipecat/services/elevenlabs/stt.py +351 -0
pipecat/services/fireworks/llm.py +13 -8
pipecat/services/fish/tts.py +8 -6
pipecat/services/gemini_multimodal_live/gemini.py +5 -0
pipecat/services/gladia/config.py +7 -1
pipecat/services/gladia/stt.py +23 -15
pipecat/services/google/llm.py +159 -59
pipecat/services/google/llm_openai.py +18 -3
pipecat/services/grok/llm.py +2 -1
pipecat/services/llm_service.py +38 -3
pipecat/services/mem0/memory.py +2 -1
pipecat/services/mistral/llm.py +5 -6
pipecat/services/nim/llm.py +2 -1
pipecat/services/openai/base_llm.py +88 -26
pipecat/services/openai/image.py +6 -1
pipecat/services/openai_realtime_beta/openai.py +5 -2
pipecat/services/openpipe/llm.py +6 -8
pipecat/services/perplexity/llm.py +13 -8
pipecat/services/playht/tts.py +9 -6
pipecat/services/rime/tts.py +1 -1
pipecat/services/sambanova/llm.py +18 -13
pipecat/services/sarvam/tts.py +415 -10
pipecat/services/speechmatics/stt.py +4 -4
pipecat/services/tavus/video.py +1 -1
pipecat/services/tts_service.py +15 -5
pipecat/services/vistaar/llm.py +2 -5
pipecat/transports/base_input.py +32 -19
pipecat/transports/base_output.py +39 -5
pipecat/transports/daily/__init__.py +0 -0
pipecat/transports/daily/transport.py +2371 -0
pipecat/transports/daily/utils.py +410 -0
pipecat/transports/livekit/__init__.py +0 -0
pipecat/transports/livekit/transport.py +1042 -0
pipecat/transports/network/fastapi_websocket.py +12 -546
pipecat/transports/network/small_webrtc.py +12 -922
pipecat/transports/network/webrtc_connection.py +9 -595
pipecat/transports/network/websocket_client.py +12 -481
pipecat/transports/network/websocket_server.py +12 -487
pipecat/transports/services/daily.py +9 -2334
pipecat/transports/services/helpers/daily_rest.py +12 -396
pipecat/transports/services/livekit.py +12 -975
pipecat/transports/services/tavus.py +12 -757
pipecat/transports/smallwebrtc/__init__.py +0 -0
pipecat/transports/smallwebrtc/connection.py +612 -0
pipecat/transports/smallwebrtc/transport.py +936 -0
pipecat/transports/tavus/__init__.py +0 -0
pipecat/transports/tavus/transport.py +770 -0
pipecat/transports/websocket/__init__.py +0 -0
pipecat/transports/websocket/client.py +494 -0
pipecat/transports/websocket/fastapi.py +559 -0
pipecat/transports/websocket/server.py +500 -0
pipecat/transports/whatsapp/__init__.py +0 -0
pipecat/transports/whatsapp/api.py +345 -0
pipecat/transports/whatsapp/client.py +364 -0
{dv_pipecat_ai-0.0.75.dev887.dist-info → dv_pipecat_ai-0.0.82.dev23.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.75.dev887.dist-info → dv_pipecat_ai-0.0.82.dev23.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.75.dev887.dist-info → dv_pipecat_ai-0.0.82.dev23.dist-info}/top_level.txt +0 -0

pipecat/services/gladia/stt.py CHANGED Viewed

@@ -14,11 +14,12 @@ import asyncio
 import base64
 import json
 import warnings
-from typing import Any, AsyncGenerator, Dict, Literal, List, Optional
+from typing import Any, AsyncGenerator, Dict, List, Literal, Optional
 import aiohttp
 from loguru import logger
+from pipecat import __version__ as pipecat_version
 from pipecat.frames.frames import (
     CancelFrame,
     EndFrame,
@@ -179,12 +180,16 @@ class _InputParamsDescriptor:
     """Descriptor for backward compatibility with deprecation warning."""
     def __get__(self, obj, objtype=None):
-        warnings.warn(
-            "GladiaSTTService.InputParams is deprecated and will be removed in a future version. "
-            "Import and use GladiaInputParams directly instead.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
+        import warnings
+        with warnings.catch_warnings():
+            warnings.simplefilter("always")
+            warnings.warn(
+                "GladiaSTTService.InputParams is deprecated and will be removed in a future version. "
+                "Import and use GladiaInputParams directly instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         return GladiaInputParams
@@ -241,12 +246,14 @@ class GladiaSTTService(STTService):
         # Warn about deprecated language parameter if it's used
         if params.language is not None:
-            warnings.warn(
-                "The 'language' parameter is deprecated and will be removed in a future version. "
-                "Use 'language_config' instead.",
-                DeprecationWarning,
-                stacklevel=2,
-            )
+            with warnings.catch_warnings():
+                warnings.simplefilter("always")
+                warnings.warn(
+                    "The 'language' parameter is deprecated and will be removed in a future version. "
+                    "Use 'language_config' instead.",
+                    DeprecationWarning,
+                    stacklevel=2,
+                )
         self._api_key = api_key
         self._region = region
@@ -322,8 +329,8 @@ class GladiaSTTService(STTService):
         }
         # Add custom_metadata if provided
-        if self._params.custom_metadata:
-            settings["custom_metadata"] = self._params.custom_metadata
+        settings["custom_metadata"] = dict(self._params.custom_metadata or {})
+        settings["custom_metadata"]["pipecat"] = pipecat_version
         # Add endpointing parameters if provided
         if self._params.endpointing is not None:
@@ -449,6 +456,7 @@ class GladiaSTTService(STTService):
                     response = await self._setup_gladia(settings)
                     self._session_url = response["url"]
                     self._reconnection_attempts = 0
+                    logger.info(f"Session URL : {self._session_url}")
                 # Connect with automatic reconnection
                 async with websocket_connect(self._session_url) as websocket:

pipecat/services/google/llm.py CHANGED Viewed

@@ -16,19 +16,20 @@ import json
 import os
 import uuid
 from dataclasses import dataclass
-from typing import Any, Dict, List, Optional
+from typing import Any, AsyncIterator, Dict, List, Optional
 from loguru import logger
 from PIL import Image
 from pydantic import BaseModel, Field
-from pipecat.adapters.services.gemini_adapter import GeminiLLMAdapter
+from pipecat.adapters.services.gemini_adapter import GeminiLLMAdapter, GeminiLLMInvocationParams
 from pipecat.frames.frames import (
     AudioRawFrame,
     Frame,
     FunctionCallCancelFrame,
     FunctionCallInProgressFrame,
     FunctionCallResultFrame,
+    LLMContextFrame,
     LLMFullResponseEndFrame,
     LLMFullResponseStartFrame,
     LLMMessagesFrame,
@@ -38,6 +39,7 @@ from pipecat.frames.frames import (
     VisionImageRawFrame,
 )
 from pipecat.metrics.metrics import LLMTokenUsage
+from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response import (
     LLMAssistantAggregatorParams,
     LLMUserAggregatorParams,
@@ -67,6 +69,7 @@ try:
         FunctionCall,
         FunctionResponse,
         GenerateContentConfig,
+        GenerateContentResponse,
         HttpOptions,
         Part,
     )
@@ -289,11 +292,11 @@ class GoogleLLMContext(OpenAILLMContext):
         # Add the converted messages to our existing messages
         self._messages.extend(converted_messages)
-    def get_messages_for_logging(self):
+    def get_messages_for_logging(self) -> List[Dict[str, Any]]:
         """Get messages formatted for logging with sensitive data redacted.
         Returns:
-            List of message dictionaries with inline data redacted.
+            List of messages in a format ready for logging.
         """
         msgs = []
         for message in self.messages:
@@ -418,7 +421,14 @@ class GoogleLLMContext(OpenAILLMContext):
         role = message["role"]
         content = message.get("content", [])
         if role == "system":
-            self.system_message = content
+            # System instructions are returned as plain text
+            if isinstance(content, str):
+                self.system_message = content
+            elif isinstance(content, list):
+                # If content is a list, we assume it's a list of text parts, per the standard
+                self.system_message = " ".join(
+                    part["text"] for part in content if part.get("type") == "text"
+                )
             return None
         elif role == "assistant":
             role = "model"
@@ -436,11 +446,20 @@ class GoogleLLMContext(OpenAILLMContext):
                 )
         elif role == "tool":
             role = "model"
+            try:
+                response = json.loads(message["content"])
+                if isinstance(response, dict):
+                    response_dict = response
+                else:
+                    response_dict = {"value": response}
+            except Exception as e:
+                # Response might not be JSON-deserializable (e.g. plain text).
+                response_dict = {"value": message["content"]}
             parts.append(
                 Part(
                     function_response=FunctionResponse(
                         name="tool_call_result",  # seems to work to hard-code the same name every time
-                        response=json.loads(message["content"]),
+                        response=response_dict,
                     )
                 )
             )
@@ -636,9 +655,8 @@ class GoogleLLMService(LLMService):
     """Google AI (Gemini) LLM service implementation.
     This class implements inference with Google's AI models, translating internally
-    from OpenAILLMContext to the messages format expected by the Google AI model.
-    We use OpenAILLMContext as a lingua franca for all LLM services to enable
-    easy switching between different LLMs.
+    from an OpenAILLMContext or a universal LLMContext to the messages format
+    expected by the Google AI model.
     """
     # Overriding the default adapter to use the Gemini one.
@@ -715,6 +733,44 @@ class GoogleLLMService(LLMService):
     def _create_client(self, api_key: str, http_options: Optional[HttpOptions] = None):
         self._client = genai.Client(api_key=api_key, http_options=http_options)
+    async def run_inference(self, context: LLMContext | OpenAILLMContext) -> Optional[str]:
+        """Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context.
+        Args:
+            context: The LLM context containing conversation history.
+        Returns:
+            The LLM's response as a string, or None if no response is generated.
+        """
+        messages = []
+        system = []
+        if isinstance(context, LLMContext):
+            adapter = self.get_llm_adapter()
+            params: GeminiLLMInvocationParams = adapter.get_llm_invocation_params(context)
+            messages = params["messages"]
+            system = params["system_instruction"]
+        else:
+            context = GoogleLLMContext.upgrade_to_google(context)
+            messages = context.messages
+            system = getattr(context, "system_message", None)
+        generation_config = GenerateContentConfig(system_instruction=system)
+        # Use the new google-genai client's async method
+        response = await self._client.aio.models.generate_content(
+            model=self._model_name,
+            contents=messages,
+            config=generation_config,
+        )
+        # Extract text from response
+        if response.candidates and response.candidates[0].content:
+            for part in response.candidates[0].content.parts:
+                if part.text:
+                    return part.text
+        return None
     def needs_mcp_alternate_schema(self) -> bool:
         """Check if this LLM service requires alternate MCP schema.
@@ -740,8 +796,87 @@ class GoogleLLMService(LLMService):
         except Exception as e:
             logger.exception(f"Failed to unset thinking budget: {e}")
+    async def _stream_content(
+        self, params_from_context: GeminiLLMInvocationParams
+    ) -> AsyncIterator[GenerateContentResponse]:
+        messages = params_from_context["messages"]
+        if (
+            params_from_context["system_instruction"]
+            and self._system_instruction != params_from_context["system_instruction"]
+        ):
+            logger.debug(f"System instruction changed: {params_from_context['system_instruction']}")
+            self._system_instruction = params_from_context["system_instruction"]
+        tools = []
+        if params_from_context["tools"]:
+            tools = params_from_context["tools"]
+        elif self._tools:
+            tools = self._tools
+        tool_config = None
+        if self._tool_config:
+            tool_config = self._tool_config
+        # Filter out None values and create GenerationContentConfig
+        generation_params = {
+            k: v
+            for k, v in {
+                "system_instruction": self._system_instruction,
+                "temperature": self._settings["temperature"],
+                "top_p": self._settings["top_p"],
+                "top_k": self._settings["top_k"],
+                "max_output_tokens": self._settings["max_tokens"],
+                "tools": tools,
+                "tool_config": tool_config,
+            }.items()
+            if v is not None
+        }
+        if self._settings["extra"]:
+            generation_params.update(self._settings["extra"])
+        # possibly modify generation_params (in place) to set thinking to off by default
+        self._maybe_unset_thinking_budget(generation_params)
+        generation_config = (
+            GenerateContentConfig(**generation_params) if generation_params else None
+        )
+        await self.start_ttfb_metrics()
+        return await self._client.aio.models.generate_content_stream(
+            model=self._model_name,
+            contents=messages,
+            config=generation_config,
+        )
+    async def _stream_content_specific_context(
+        self, context: OpenAILLMContext
+    ) -> AsyncIterator[GenerateContentResponse]:
+        logger.debug(
+            f"{self}: Generating chat from LLM-specific context [{context.system_message}] | {context.get_messages_for_logging()}"
+        )
+        params = GeminiLLMInvocationParams(
+            messages=context.messages,
+            system_instruction=context.system_message,
+            tools=context.tools,
+        )
+        return await self._stream_content(params)
+    async def _stream_content_universal_context(
+        self, context: LLMContext
+    ) -> AsyncIterator[GenerateContentResponse]:
+        adapter = self.get_llm_adapter()
+        params: GeminiLLMInvocationParams = adapter.get_llm_invocation_params(context)
+        logger.debug(
+            f"{self}: Generating chat from universal context [{params['system_instruction']}] | {adapter.get_messages_for_logging(context)}"
+        )
+        return await self._stream_content(params)
     @traced_llm
-    async def _process_context(self, context: OpenAILLMContext):
+    async def _process_context(self, context: OpenAILLMContext | LLMContext):
         await self.push_frame(LLMFullResponseStartFrame())
         prompt_tokens = 0
@@ -754,55 +889,11 @@ class GoogleLLMService(LLMService):
         search_result = ""
         try:
-            self.logger.debug(
-                # f"{self}: Generating chat [{self._system_instruction}] | [{context.get_messages_for_logging()}]"
-                f"{self}: Generating chat [{context.get_messages_for_logging()}]"
-            )
-            messages = context.messages
-            if context.system_message and self._system_instruction != context.system_message:
-                self.logger.debug(f"System instruction changed: {context.system_message}")
-                self._system_instruction = context.system_message
-            tools = []
-            if context.tools:
-                tools = context.tools
-            elif self._tools:
-                tools = self._tools
-            tool_config = None
-            if self._tool_config:
-                tool_config = self._tool_config
-            # Filter out None values and create GenerationContentConfig
-            generation_params = {
-                k: v
-                for k, v in {
-                    "system_instruction": self._system_instruction,
-                    "temperature": self._settings["temperature"],
-                    "top_p": self._settings["top_p"],
-                    "top_k": self._settings["top_k"],
-                    "max_output_tokens": self._settings["max_tokens"],
-                    "tools": tools,
-                    "tool_config": tool_config,
-                }.items()
-                if v is not None
-            }
-            if self._settings["extra"]:
-                generation_params.update(self._settings["extra"])
-            # possibly modify generation_params (in place) to set thinking to off by default
-            self._maybe_unset_thinking_budget(generation_params)
-            generation_config = (
-                GenerateContentConfig(**generation_params) if generation_params else None
-            )
-            await self.start_ttfb_metrics()
-            response = await self._client.aio.models.generate_content_stream(
-                model=self._model_name,
-                contents=messages,
-                config=generation_config,
+            # Generate content using either OpenAILLMContext or universal LLMContext
+            response = await (
+                self._stream_content_specific_context(context)
+                if isinstance(context, OpenAILLMContext)
+                else self._stream_content_universal_context(context)
             )
             function_calls = []
@@ -915,9 +1006,18 @@ class GoogleLLMService(LLMService):
         if isinstance(frame, OpenAILLMContextFrame):
             context = GoogleLLMContext.upgrade_to_google(frame.context)
+        elif isinstance(frame, LLMContextFrame):
+            # Handle universal (LLM-agnostic) LLM context frames
+            context = frame.context
         elif isinstance(frame, LLMMessagesFrame):
+            # NOTE: LLMMessagesFrame is deprecated, so we don't support the newer universal
+            # LLMContext with it
             context = GoogleLLMContext(frame.messages)
         elif isinstance(frame, VisionImageRawFrame):
+            # This is only useful in very simple pipelines because it creates
+            # a new context. Generally we want a context manager to catch
+            # UserImageRawFrames coming through the pipeline and add them
+            # to the context.
             context = GoogleLLMContext()
             context.add_image_frame_message(
                 format=frame.format, size=frame.size, image=frame.image, text=frame.text

pipecat/services/google/llm_openai.py CHANGED Viewed

@@ -41,6 +41,10 @@ class GoogleLLMOpenAIBetaService(OpenAILLMService):
     Note: This service includes a workaround for a Google API bug where function
     call indices may be incorrectly set to None, resulting in empty function names.
+    .. deprecated:: 0.0.82
+        GoogleLLMOpenAIBetaService is deprecated and will be removed in a future version.
+        Use GoogleLLMService instead for better integration with Google's native API.
     Reference:
         https://ai.google.dev/gemini-api/docs/openai
     """
@@ -61,6 +65,17 @@ class GoogleLLMOpenAIBetaService(OpenAILLMService):
             model: Google model name to use (e.g., "gemini-2.0-flash").
             **kwargs: Additional arguments passed to the parent OpenAILLMService.
         """
+        import warnings
+        with warnings.catch_warnings():
+            warnings.simplefilter("always")
+            warnings.warn(
+                "GoogleLLMOpenAIBetaService is deprecated and will be removed in a future version. "
+                "Use GoogleLLMService instead for better integration with Google's native API.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
     async def _process_context(self, context: OpenAILLMContext):
@@ -74,9 +89,9 @@ class GoogleLLMOpenAIBetaService(OpenAILLMService):
         await self.start_ttfb_metrics()
-        chunk_stream: AsyncStream[ChatCompletionChunk] = await self._stream_chat_completions(
-            context
-        )
+        chunk_stream: AsyncStream[
+            ChatCompletionChunk
+        ] = await self._stream_chat_completions_specific_context(context)
         async for chunk in chunk_stream:
             if chunk.usage:

pipecat/services/grok/llm.py CHANGED Viewed

@@ -16,6 +16,7 @@ from dataclasses import dataclass
 from loguru import logger
 from pipecat.metrics.metrics import LLMTokenUsage
+from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response import (
     LLMAssistantAggregatorParams,
     LLMUserAggregatorParams,
@@ -107,7 +108,7 @@ class GrokLLMService(OpenAILLMService):
         logger.debug(f"Creating Grok client with api {base_url}")
         return super().create_client(api_key, base_url, **kwargs)
-    async def _process_context(self, context: OpenAILLMContext):
+    async def _process_context(self, context: OpenAILLMContext | LLMContext):
         """Process a context through the LLM and accumulate token usage metrics.
         This method overrides the parent class implementation to handle Grok's

pipecat/services/llm_service.py CHANGED Viewed

@@ -36,10 +36,15 @@ from pipecat.frames.frames import (
     FunctionCallResultFrame,
     FunctionCallResultProperties,
     FunctionCallsStartedFrame,
+    LLMConfigureOutputFrame,
+    LLMFullResponseEndFrame,
+    LLMFullResponseStartFrame,
+    LLMTextFrame,
     StartFrame,
     StartInterruptionFrame,
     UserImageRequestFrame,
 )
+from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response import (
     LLMAssistantAggregatorParams,
     LLMUserAggregatorParams,
@@ -88,7 +93,7 @@ class FunctionCallParams:
     tool_call_id: str
     arguments: Mapping[str, Any]
     llm: "LLMService"
-    context: OpenAILLMContext
+    context: OpenAILLMContext | LLMContext
     result_callback: FunctionCallResultCallback
@@ -129,7 +134,7 @@ class FunctionCallRunnerItem:
     function_name: str
     tool_call_id: str
     arguments: Mapping[str, Any]
-    context: OpenAILLMContext
+    context: OpenAILLMContext | LLMContext
     run_llm: Optional[bool] = None
@@ -177,6 +182,7 @@ class LLMService(AIService):
         self._function_call_tasks: Dict[asyncio.Task, FunctionCallRunnerItem] = {}
         self._sequential_runner_task: Optional[asyncio.Task] = None
         self._tracing_enabled: bool = False
+        self._skip_tts: bool = False
         self._register_event_handler("on_function_calls_started")
         self._register_event_handler("on_completion_timeout")
@@ -189,6 +195,19 @@ class LLMService(AIService):
         """
         return self._adapter
+    async def run_inference(self, context: LLMContext | OpenAILLMContext) -> Optional[str]:
+        """Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context.
+        Must be implemented by subclasses.
+        Args:
+            context: The LLM context containing conversation history.
+        Returns:
+            The LLM's response as a string, or None if no response is generated.
+        """
+        raise NotImplementedError(f"run_inference() not supported by {self.__class__.__name__}")
     def create_context_aggregator(
         self,
         context: OpenAILLMContext,
@@ -252,6 +271,20 @@ class LLMService(AIService):
         if isinstance(frame, StartInterruptionFrame):
             await self._handle_interruptions(frame)
+        elif isinstance(frame, LLMConfigureOutputFrame):
+            self._skip_tts = frame.skip_tts
+    async def push_frame(self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM):
+        """Pushes a frame.
+        Args:
+            frame: The frame to push.
+            direction: The direction of frame pushing.
+        """
+        if isinstance(frame, (LLMTextFrame, LLMFullResponseStartFrame, LLMFullResponseEndFrame)):
+            frame.skip_tts = self._skip_tts
+        await super().push_frame(frame, direction)
     async def _handle_interruptions(self, _: StartInterruptionFrame):
         # logger.info("In LLM Handling interruptions")
@@ -434,7 +467,9 @@ class LLMService(AIService):
             else:
                 await self._sequential_runner_queue.put(runner_item)
-    async def _call_start_function(self, context: OpenAILLMContext, function_name: str):
+    async def _call_start_function(
+        self, context: OpenAILLMContext | LLMContext, function_name: str
+    ):
         if function_name in self._start_callbacks.keys():
             await self._start_callbacks[function_name](function_name, self, context)
         elif None in self._start_callbacks.keys():

pipecat/services/mem0/memory.py CHANGED Viewed

@@ -120,6 +120,7 @@ class Mem0MemoryService(FrameProcessor):
         try:
             logger.debug(f"Storing {len(messages)} messages in Mem0")
             params = {
+                "async_mode": True,
                 "messages": messages,
                 "metadata": {"platform": "pipecat"},
                 "output_format": "v1.1",
@@ -163,7 +164,7 @@ class Mem0MemoryService(FrameProcessor):
                     ("run_id", self.run_id),
                 ]
                 clauses = [{name: value} for name, value in id_pairs if value is not None]
-                filters = {"AND": clauses} if clauses else {}
+                filters = {"OR": clauses} if clauses else {}
                 results = self.memory_client.search(
                     query=query,
                     filters=filters,

pipecat/services/mistral/llm.py CHANGED Viewed

@@ -12,6 +12,7 @@ from loguru import logger
 from openai import AsyncStream
 from openai.types.chat import ChatCompletionChunk, ChatCompletionMessageParam
+from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams
 from pipecat.frames.frames import FunctionCallFromLLM
 from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.services.openai.llm import OpenAILLMService
@@ -148,9 +149,7 @@ class MistralLLMService(OpenAILLMService):
         if calls_to_execute:
             await super().run_function_calls(calls_to_execute)
-    def build_chat_completion_params(
-        self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
-    ) -> dict:
+    def build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict:
         """Build parameters for Mistral chat completion request.
         Handles Mistral-specific requirements including:
@@ -159,14 +158,14 @@ class MistralLLMService(OpenAILLMService):
         - Core completion settings
         """
         # Apply Mistral's assistant prefix requirement for API compatibility
-        fixed_messages = self._apply_mistral_assistant_prefix(messages)
+        fixed_messages = self._apply_mistral_assistant_prefix(params_from_context["messages"])
         params = {
             "model": self.model_name,
             "stream": True,
             "messages": fixed_messages,
-            "tools": context.tools,
-            "tool_choice": context.tool_choice,
+            "tools": params_from_context["tools"],
+            "tool_choice": params_from_context["tool_choice"],
             "frequency_penalty": self._settings["frequency_penalty"],
             "presence_penalty": self._settings["presence_penalty"],
             "temperature": self._settings["temperature"],

pipecat/services/nim/llm.py CHANGED Viewed

@@ -11,6 +11,7 @@ Microservice) API while maintaining compatibility with the OpenAI-style interfac
 """
 from pipecat.metrics.metrics import LLMTokenUsage
+from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.services.openai.llm import OpenAILLMService
@@ -47,7 +48,7 @@ class NimLLMService(OpenAILLMService):
         self._has_reported_prompt_tokens = False
         self._is_processing = False
-    async def _process_context(self, context: OpenAILLMContext):
+    async def _process_context(self, context: OpenAILLMContext | LLMContext):
         """Process a context through the LLM and accumulate token usage metrics.
         This method overrides the parent class implementation to handle NVIDIA's

dv-pipecat-ai 0.0.75.dev887__py3-none-any.whl → 0.0.82.dev23__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.75.dev887py3-none-any.whl → 0.0.82.dev23py3-none-any.whl