PyPI - dv-pipecat-ai - Versions diffs - 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.82.dev857py3-none-any.whl → 0.0.85.dev837py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (195) hide show

{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
pipecat/adapters/base_llm_adapter.py +38 -1
pipecat/adapters/services/anthropic_adapter.py +9 -14
pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
pipecat/adapters/services/bedrock_adapter.py +236 -13
pipecat/adapters/services/gemini_adapter.py +12 -8
pipecat/adapters/services/open_ai_adapter.py +19 -7
pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
pipecat/audio/dtmf/dtmf-0.wav +0 -0
pipecat/audio/dtmf/dtmf-1.wav +0 -0
pipecat/audio/dtmf/dtmf-2.wav +0 -0
pipecat/audio/dtmf/dtmf-3.wav +0 -0
pipecat/audio/dtmf/dtmf-4.wav +0 -0
pipecat/audio/dtmf/dtmf-5.wav +0 -0
pipecat/audio/dtmf/dtmf-6.wav +0 -0
pipecat/audio/dtmf/dtmf-7.wav +0 -0
pipecat/audio/dtmf/dtmf-8.wav +0 -0
pipecat/audio/dtmf/dtmf-9.wav +0 -0
pipecat/audio/dtmf/dtmf-pound.wav +0 -0
pipecat/audio/dtmf/dtmf-star.wav +0 -0
pipecat/audio/filters/krisp_viva_filter.py +193 -0
pipecat/audio/filters/noisereduce_filter.py +15 -0
pipecat/audio/turn/base_turn_analyzer.py +9 -1
pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
pipecat/audio/vad/data/README.md +10 -0
pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
pipecat/audio/vad/silero.py +9 -3
pipecat/audio/vad/vad_analyzer.py +13 -1
pipecat/extensions/voicemail/voicemail_detector.py +5 -5
pipecat/frames/frames.py +277 -86
pipecat/observers/loggers/debug_log_observer.py +3 -3
pipecat/observers/loggers/llm_log_observer.py +7 -3
pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
pipecat/pipeline/runner.py +18 -6
pipecat/pipeline/service_switcher.py +64 -36
pipecat/pipeline/task.py +125 -79
pipecat/pipeline/tts_switcher.py +30 -0
pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
pipecat/processors/aggregators/llm_context.py +40 -2
pipecat/processors/aggregators/llm_response.py +32 -15
pipecat/processors/aggregators/llm_response_universal.py +19 -15
pipecat/processors/aggregators/user_response.py +6 -6
pipecat/processors/aggregators/vision_image_frame.py +24 -2
pipecat/processors/audio/audio_buffer_processor.py +43 -8
pipecat/processors/dtmf_aggregator.py +174 -77
pipecat/processors/filters/stt_mute_filter.py +17 -0
pipecat/processors/frame_processor.py +110 -24
pipecat/processors/frameworks/langchain.py +8 -2
pipecat/processors/frameworks/rtvi.py +210 -68
pipecat/processors/frameworks/strands_agents.py +170 -0
pipecat/processors/logger.py +2 -2
pipecat/processors/transcript_processor.py +26 -5
pipecat/processors/user_idle_processor.py +35 -11
pipecat/runner/daily.py +59 -20
pipecat/runner/run.py +395 -93
pipecat/runner/types.py +6 -4
pipecat/runner/utils.py +51 -10
pipecat/serializers/__init__.py +5 -1
pipecat/serializers/asterisk.py +16 -2
pipecat/serializers/convox.py +41 -4
pipecat/serializers/custom.py +257 -0
pipecat/serializers/exotel.py +5 -5
pipecat/serializers/livekit.py +20 -0
pipecat/serializers/plivo.py +5 -5
pipecat/serializers/protobuf.py +6 -5
pipecat/serializers/telnyx.py +2 -2
pipecat/serializers/twilio.py +43 -23
pipecat/serializers/vi.py +324 -0
pipecat/services/ai_service.py +2 -6
pipecat/services/anthropic/llm.py +2 -25
pipecat/services/assemblyai/models.py +6 -0
pipecat/services/assemblyai/stt.py +13 -5
pipecat/services/asyncai/tts.py +5 -3
pipecat/services/aws/__init__.py +1 -0
pipecat/services/aws/llm.py +147 -105
pipecat/services/aws/nova_sonic/__init__.py +0 -0
pipecat/services/aws/nova_sonic/context.py +436 -0
pipecat/services/aws/nova_sonic/frames.py +25 -0
pipecat/services/aws/nova_sonic/llm.py +1265 -0
pipecat/services/aws/stt.py +3 -3
pipecat/services/aws_nova_sonic/__init__.py +19 -1
pipecat/services/aws_nova_sonic/aws.py +11 -1151
pipecat/services/aws_nova_sonic/context.py +8 -354
pipecat/services/aws_nova_sonic/frames.py +13 -17
pipecat/services/azure/llm.py +51 -1
pipecat/services/azure/realtime/__init__.py +0 -0
pipecat/services/azure/realtime/llm.py +65 -0
pipecat/services/azure/stt.py +15 -0
pipecat/services/cartesia/stt.py +77 -70
pipecat/services/cartesia/tts.py +80 -13
pipecat/services/deepgram/__init__.py +1 -0
pipecat/services/deepgram/flux/__init__.py +0 -0
pipecat/services/deepgram/flux/stt.py +640 -0
pipecat/services/elevenlabs/__init__.py +4 -1
pipecat/services/elevenlabs/stt.py +339 -0
pipecat/services/elevenlabs/tts.py +87 -46
pipecat/services/fish/tts.py +5 -2
pipecat/services/gemini_multimodal_live/events.py +38 -524
pipecat/services/gemini_multimodal_live/file_api.py +23 -173
pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
pipecat/services/gladia/stt.py +56 -72
pipecat/services/google/__init__.py +1 -0
pipecat/services/google/gemini_live/__init__.py +3 -0
pipecat/services/google/gemini_live/file_api.py +189 -0
pipecat/services/google/gemini_live/llm.py +1582 -0
pipecat/services/google/gemini_live/llm_vertex.py +184 -0
pipecat/services/google/llm.py +15 -11
pipecat/services/google/llm_openai.py +3 -3
pipecat/services/google/llm_vertex.py +86 -16
pipecat/services/google/stt.py +4 -0
pipecat/services/google/tts.py +7 -3
pipecat/services/heygen/api.py +2 -0
pipecat/services/heygen/client.py +8 -4
pipecat/services/heygen/video.py +2 -0
pipecat/services/hume/__init__.py +5 -0
pipecat/services/hume/tts.py +220 -0
pipecat/services/inworld/tts.py +6 -6
pipecat/services/llm_service.py +15 -5
pipecat/services/lmnt/tts.py +4 -2
pipecat/services/mcp_service.py +4 -2
pipecat/services/mem0/memory.py +6 -5
pipecat/services/mistral/llm.py +29 -8
pipecat/services/moondream/vision.py +42 -16
pipecat/services/neuphonic/tts.py +5 -2
pipecat/services/openai/__init__.py +1 -0
pipecat/services/openai/base_llm.py +27 -20
pipecat/services/openai/realtime/__init__.py +0 -0
pipecat/services/openai/realtime/context.py +272 -0
pipecat/services/openai/realtime/events.py +1106 -0
pipecat/services/openai/realtime/frames.py +37 -0
pipecat/services/openai/realtime/llm.py +829 -0
pipecat/services/openai/tts.py +49 -10
pipecat/services/openai_realtime/__init__.py +27 -0
pipecat/services/openai_realtime/azure.py +21 -0
pipecat/services/openai_realtime/context.py +21 -0
pipecat/services/openai_realtime/events.py +21 -0
pipecat/services/openai_realtime/frames.py +21 -0
pipecat/services/openai_realtime_beta/azure.py +16 -0
pipecat/services/openai_realtime_beta/openai.py +17 -5
pipecat/services/piper/tts.py +7 -9
pipecat/services/playht/tts.py +34 -4
pipecat/services/rime/tts.py +12 -12
pipecat/services/riva/stt.py +3 -1
pipecat/services/salesforce/__init__.py +9 -0
pipecat/services/salesforce/llm.py +700 -0
pipecat/services/sarvam/__init__.py +7 -0
pipecat/services/sarvam/stt.py +540 -0
pipecat/services/sarvam/tts.py +97 -13
pipecat/services/simli/video.py +2 -2
pipecat/services/speechmatics/stt.py +22 -10
pipecat/services/stt_service.py +47 -0
pipecat/services/tavus/video.py +2 -2
pipecat/services/tts_service.py +75 -22
pipecat/services/vision_service.py +7 -6
pipecat/services/vistaar/llm.py +51 -9
pipecat/tests/utils.py +4 -4
pipecat/transcriptions/language.py +41 -1
pipecat/transports/base_input.py +13 -34
pipecat/transports/base_output.py +140 -104
pipecat/transports/daily/transport.py +199 -26
pipecat/transports/heygen/__init__.py +0 -0
pipecat/transports/heygen/transport.py +381 -0
pipecat/transports/livekit/transport.py +228 -63
pipecat/transports/local/audio.py +6 -1
pipecat/transports/local/tk.py +11 -2
pipecat/transports/network/fastapi_websocket.py +1 -1
pipecat/transports/smallwebrtc/connection.py +103 -19
pipecat/transports/smallwebrtc/request_handler.py +246 -0
pipecat/transports/smallwebrtc/transport.py +65 -23
pipecat/transports/tavus/transport.py +23 -12
pipecat/transports/websocket/client.py +41 -5
pipecat/transports/websocket/fastapi.py +21 -11
pipecat/transports/websocket/server.py +14 -7
pipecat/transports/whatsapp/api.py +8 -0
pipecat/transports/whatsapp/client.py +47 -0
pipecat/utils/base_object.py +54 -22
pipecat/utils/redis.py +58 -0
pipecat/utils/string.py +13 -1
pipecat/utils/tracing/service_decorators.py +21 -21
pipecat/serializers/genesys.py +0 -95
pipecat/services/google/test-google-chirp.py +0 -45
pipecat/services/openai.py +0 -698
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
/pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0

pipecat/services/hume/tts.py ADDED Viewed

@@ -0,0 +1,220 @@
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+"""Hume Text-to-Speech service implementation."""
+import base64
+import os
+from typing import Any, AsyncGenerator, Optional
+from loguru import logger
+from pydantic import BaseModel
+from pipecat.frames.frames import (
+    ErrorFrame,
+    Frame,
+    StartFrame,
+    TTSAudioRawFrame,
+    TTSStartedFrame,
+    TTSStoppedFrame,
+)
+from pipecat.services.tts_service import TTSService
+from pipecat.utils.tracing.service_decorators import traced_tts
+try:
+    from hume import AsyncHumeClient
+    from hume.tts import (
+        FormatPcm,
+        PostedUtterance,
+        PostedUtteranceVoiceWithId,
+    )
+except ModuleNotFoundError as e:  # pragma: no cover - import-time guidance
+    logger.error(f"Exception: {e}")
+    logger.error("In order to use Hume, you need to `pip install pipecat-ai[hume]`.")
+    raise Exception(f"Missing module: {e}")
+HUME_SAMPLE_RATE = 48_000  # Hume TTS streams at 48 kHz
+class HumeTTSService(TTSService):
+    """Hume Octave Text-to-Speech service.
+    Streams PCM audio via Hume's HTTP output streaming (JSON chunks) endpoint
+    using the Python SDK and emits ``TTSAudioRawFrame`` frames suitable for Pipecat transports.
+    Supported features:
+    - Generates speech from text using Hume TTS.
+    - Streams PCM audio.
+    - Supports dynamic updates of voice and synthesis parameters at runtime.
+    - Provides metrics for Time To First Byte (TTFB) and TTS usage.
+    """
+    class InputParams(BaseModel):
+        """Optional synthesis parameters for Hume TTS.
+        Parameters:
+            description: Natural-language acting directions (up to 100 characters).
+            speed: Speaking-rate multiplier (0.5-2.0).
+            trailing_silence: Seconds of silence to append at the end (0-5).
+        """
+        description: Optional[str] = None
+        speed: Optional[float] = None
+        trailing_silence: Optional[float] = None
+    def __init__(
+        self,
+        *,
+        api_key: Optional[str] = None,
+        voice_id: str,
+        params: Optional[InputParams] = None,
+        sample_rate: Optional[int] = HUME_SAMPLE_RATE,
+        **kwargs,
+    ) -> None:
+        """Initialize the HumeTTSService.
+        Args:
+            api_key: Hume API key. If omitted, reads the ``HUME_API_KEY`` environment variable.
+            voice_id: ID of the voice to use. Only voice IDs are supported; voice names are not.
+            params: Optional synthesis controls (acting instructions, speed, trailing silence).
+            sample_rate: Output sample rate for emitted PCM frames. Defaults to 48_000 (Hume).
+            **kwargs: Additional arguments passed to the parent class.
+        """
+        api_key = api_key or os.getenv("HUME_API_KEY")
+        if not api_key:
+            raise ValueError("HumeTTSService requires an API key (env HUME_API_KEY or api_key=)")
+        if sample_rate != HUME_SAMPLE_RATE:
+            logger.warning(
+                f"Hume TTS streams at {HUME_SAMPLE_RATE} Hz; configured sample_rate={sample_rate}"
+            )
+        super().__init__(sample_rate=sample_rate, **kwargs)
+        self._client = AsyncHumeClient(api_key=api_key)
+        self._params = params or HumeTTSService.InputParams()
+        # Store voice in the base class (mirrors other services)
+        self.set_voice(voice_id)
+        self._audio_bytes = b""
+    def can_generate_metrics(self) -> bool:
+        """Can generate metrics.
+        Returns:
+            True if metrics can be generated, False otherwise.
+        """
+        return True
+    async def start(self, frame: StartFrame) -> None:
+        """Start the service.
+        Args:
+            frame: The start frame.
+        """
+        await super().start(frame)
+    async def update_setting(self, key: str, value: Any) -> None:
+        """Runtime updates via `TTSUpdateSettingsFrame`.
+        Args:
+            key: The name of the setting to update. Recognized keys are:
+                - "voice_id"
+                - "description"
+                - "speed"
+                - "trailing_silence"
+            value: The new value for the setting.
+        """
+        key_l = (key or "").lower()
+        if key_l == "voice_id":
+            self.set_voice(str(value))
+            logger.info(f"HumeTTSService voice_id set to: {self.voice}")
+        elif key_l == "description":
+            self._params.description = None if value is None else str(value)
+        elif key_l == "speed":
+            self._params.speed = None if value is None else float(value)
+        elif key_l == "trailing_silence":
+            self._params.trailing_silence = None if value is None else float(value)
+        else:
+            # Defer unknown keys to the base class
+            await super().update_setting(key, value)
+    @traced_tts
+    async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
+        """Generate speech from text using Hume TTS.
+        Args:
+            text: The text to be synthesized.
+        Returns:
+            An async generator that yields `Frame` objects, including
+            `TTSStartedFrame`, `TTSAudioRawFrame`, `ErrorFrame`, and
+            `TTSStoppedFrame`.
+        """
+        logger.debug(f"{self}: Generating Hume TTS: [{text}]")
+        # Build the request payload
+        utterance_kwargs: dict[str, Any] = {
+            "text": text,
+            "voice": PostedUtteranceVoiceWithId(id=self._voice_id),
+        }
+        if self._params.description is not None:
+            utterance_kwargs["description"] = self._params.description
+        if self._params.speed is not None:
+            utterance_kwargs["speed"] = self._params.speed
+        if self._params.trailing_silence is not None:
+            utterance_kwargs["trailing_silence"] = self._params.trailing_silence
+        utterance = PostedUtterance(**utterance_kwargs)
+        # Request raw PCM chunks in the streaming JSON
+        pcm_fmt = FormatPcm(type="pcm")
+        await self.start_ttfb_metrics()
+        await self.start_tts_usage_metrics(text)
+        yield TTSStartedFrame()
+        try:
+            # Instant mode is always enabled here (not user-configurable)
+            # Hume emits mono PCM at 48 kHz; downstream can resample if needed.
+            # We buffer audio bytes before sending to prevent glitches.
+            self._audio_bytes = b""
+            async for chunk in self._client.tts.synthesize_json_streaming(
+                utterances=[utterance],
+                format=pcm_fmt,
+                instant_mode=True,
+                version="2",
+            ):
+                audio_b64 = getattr(chunk, "audio", None)
+                if not audio_b64:
+                    continue
+                pcm_bytes = base64.b64decode(audio_b64)
+                self._audio_bytes += pcm_bytes
+                # Buffer audio until we have enough to avoid glitches
+                if len(self._audio_bytes) < self.chunk_size:
+                    continue
+                frame = TTSAudioRawFrame(
+                    audio=self._audio_bytes,
+                    sample_rate=self.sample_rate,
+                    num_channels=1,
+                )
+                yield frame
+                self._audio_bytes = b""
+        except Exception as e:
+            logger.exception(f"{self} error generating TTS: {e}")
+            await self.push_error(ErrorFrame(f"Error generating TTS: {e}"))
+        finally:
+            # Ensure TTFB timer is stopped even on early failures
+            await self.stop_ttfb_metrics()
+            yield TTSStoppedFrame()

pipecat/services/inworld/tts.py CHANGED Viewed

@@ -38,7 +38,7 @@ Examples::
             model="inworld-tts-1",
             streaming=True,  # Default
             params=InworldTTSService.InputParams(
-                temperature=0.8,  # Optional: control synthesis variability (range: [0, 2])
+                temperature=1.1,  # Optional: control synthesis variability (range: [0, 2])
             ),
         )
@@ -50,7 +50,7 @@ Examples::
             model="inworld-tts-1",
             streaming=False,
             params=InworldTTSService.InputParams(
-                temperature=0.8,
+                temperature=1.1,
             ),
         )
 """
@@ -123,7 +123,7 @@ class InworldTTSService(TTSService):
                 model="inworld-tts-1",
                 streaming=True,  # Default behavior
                 params=InworldTTSService.InputParams(
-                    temperature=0.8,  # Add variability to speech synthesis (range: [0, 2])
+                    temperature=1.1,  # Add variability to speech synthesis (range: [0, 2])
                 ),
             )
@@ -135,7 +135,7 @@ class InworldTTSService(TTSService):
                 model="inworld-tts-1-max",
                 streaming=False,
                 params=InworldTTSService.InputParams(
-                    temperature=0.8,
+                    temperature=1.1,
                 ),
             )
     """
@@ -144,7 +144,7 @@ class InworldTTSService(TTSService):
         """Optional input parameters for Inworld TTS configuration.
         Parameters:
-            temperature: Voice temperature control for synthesis variability (e.g., 0.8).
+            temperature: Voice temperature control for synthesis variability (e.g., 1.1).
                         Valid range: [0, 2]. Higher values increase variability.
         Note:
@@ -197,7 +197,7 @@ class InworldTTSService(TTSService):
                      - "LINEAR16" (default) - Uncompressed PCM, best quality
                      - Other formats as supported by Inworld API
             params: Optional input parameters for additional configuration. Use this to specify:
-                   - temperature: Voice temperature control for variability (range: [0, 2], e.g., 0.8, optional)
+                   - temperature: Voice temperature control for variability (range: [0, 2], e.g., 1.1, optional)
                    Language is automatically inferred from input text.
             **kwargs: Additional arguments passed to the parent TTSService class.

pipecat/services/llm_service.py CHANGED Viewed

@@ -36,15 +36,15 @@ from pipecat.frames.frames import (
     FunctionCallResultFrame,
     FunctionCallResultProperties,
     FunctionCallsStartedFrame,
+    InterruptionFrame,
     LLMConfigureOutputFrame,
     LLMFullResponseEndFrame,
     LLMFullResponseStartFrame,
     LLMTextFrame,
     StartFrame,
-    StartInterruptionFrame,
     UserImageRequestFrame,
 )
-from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_context import LLMContext, LLMSpecificMessage
 from pipecat.processors.aggregators.llm_response import (
     LLMAssistantAggregatorParams,
     LLMUserAggregatorParams,
@@ -195,6 +195,17 @@ class LLMService(AIService):
         """
         return self._adapter
+    def create_llm_specific_message(self, message: Any) -> LLMSpecificMessage:
+        """Create an LLM-specific message (as opposed to a standard message) for use in an LLMContext.
+        Args:
+            message: The message content.
+        Returns:
+            A LLMSpecificMessage instance.
+        """
+        return self.get_llm_adapter().create_llm_specific_message(message)
     async def run_inference(self, context: LLMContext | OpenAILLMContext) -> Optional[str]:
         """Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context.
@@ -269,7 +280,7 @@ class LLMService(AIService):
         """
         await super().process_frame(frame, direction)
-        if isinstance(frame, StartInterruptionFrame):
+        if isinstance(frame, InterruptionFrame):
             await self._handle_interruptions(frame)
         elif isinstance(frame, LLMConfigureOutputFrame):
             self._skip_tts = frame.skip_tts
@@ -286,8 +297,7 @@ class LLMService(AIService):
         await super().push_frame(frame, direction)
-    async def _handle_interruptions(self, _: StartInterruptionFrame):
-        # logger.info("In LLM Handling interruptions")
+    async def _handle_interruptions(self, _: InterruptionFrame):
         for function_name, entry in self._functions.items():
             if entry.cancel_on_interruption:
                 await self._cancel_function_call(function_name)

pipecat/services/lmnt/tts.py CHANGED Viewed

@@ -16,8 +16,8 @@ from pipecat.frames.frames import (
     EndFrame,
     ErrorFrame,
     Frame,
+    InterruptionFrame,
     StartFrame,
-    StartInterruptionFrame,
     TTSAudioRawFrame,
     TTSStartedFrame,
     TTSStoppedFrame,
@@ -180,7 +180,7 @@ class LmntTTSService(InterruptibleTTSService):
             direction: The direction to push the frame.
         """
         await super().push_frame(frame, direction)
-        if isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)):
+        if isinstance(frame, (TTSStoppedFrame, InterruptionFrame)):
             self._started = False
     async def _connect(self):
@@ -222,6 +222,7 @@ class LmntTTSService(InterruptibleTTSService):
             # Send initialization message
             await self._websocket.send(json.dumps(init_msg))
+            await self._call_event_handler("on_connected")
         except Exception as e:
             logger.error(f"{self} initialization error: {e}")
             self._websocket = None
@@ -243,6 +244,7 @@ class LmntTTSService(InterruptibleTTSService):
         finally:
             self._started = False
             self._websocket = None
+            await self._call_event_handler("on_disconnected")
     def _get_websocket(self):
         """Get the WebSocket connection if available."""

pipecat/services/mcp_service.py CHANGED Viewed

@@ -7,7 +7,7 @@
 """MCP (Model Context Protocol) client for integrating external tools with LLMs."""
 import json
-from typing import Any, Dict, List, Tuple
+from typing import Any, Dict, List, TypeAlias
 from loguru import logger
@@ -28,6 +28,8 @@ except ModuleNotFoundError as e:
     logger.error("In order to use an MCP client, you need to `pip install pipecat-ai[mcp]`.")
     raise Exception(f"Missing module: {e}")
+ServerParameters: TypeAlias = StdioServerParameters | SseServerParameters | StreamableHttpParameters
 class MCPClient(BaseObject):
     """Client for Model Context Protocol (MCP) servers.
@@ -42,7 +44,7 @@ class MCPClient(BaseObject):
     def __init__(
         self,
-        server_params: Tuple[StdioServerParameters, SseServerParameters, StreamableHttpParameters],
+        server_params: ServerParameters,
         **kwargs,
     ):
         """Initialize the MCP client with server parameters.

pipecat/services/mem0/memory.py CHANGED Viewed

@@ -16,7 +16,8 @@ from typing import Any, Dict, List, Optional
 from loguru import logger
 from pydantic import BaseModel, Field
-from pipecat.frames.frames import ErrorFrame, Frame, LLMMessagesFrame
+from pipecat.frames.frames import ErrorFrame, Frame, LLMContextFrame, LLMMessagesFrame
+from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.openai_llm_context import (
     OpenAILLMContext,
     OpenAILLMContextFrame,
@@ -180,11 +181,11 @@ class Mem0MemoryService(FrameProcessor):
             logger.error(f"Error retrieving memories from Mem0: {e}")
             return []
-    def _enhance_context_with_memories(self, context: OpenAILLMContext, query: str):
+    def _enhance_context_with_memories(self, context: LLMContext | OpenAILLMContext, query: str):
         """Enhance the LLM context with relevant memories.
         Args:
-            context: The OpenAILLMContext to enhance with memory information.
+            context: The LLM context to enhance with memory information.
             query: The query to search for relevant memories.
         """
         # Skip if this is the same query we just processed
@@ -222,11 +223,11 @@ class Mem0MemoryService(FrameProcessor):
         context = None
         messages = None
-        if isinstance(frame, OpenAILLMContextFrame):
+        if isinstance(frame, (LLMContextFrame, OpenAILLMContextFrame)):
             context = frame.context
         elif isinstance(frame, LLMMessagesFrame):
             messages = frame.messages
-            context = OpenAILLMContext.from_messages(messages)
+            context = LLMContext(messages)
         if context:
             try:

pipecat/services/mistral/llm.py CHANGED Viewed

@@ -57,16 +57,18 @@ class MistralLLMService(OpenAILLMService):
         logger.debug(f"Creating Mistral client with api {base_url}")
         return super().create_client(api_key, base_url, **kwargs)
-    def _apply_mistral_assistant_prefix(
+    def _apply_mistral_fixups(
         self, messages: List[ChatCompletionMessageParam]
     ) -> List[ChatCompletionMessageParam]:
-        """Apply Mistral's assistant message prefix requirement.
+        """Apply fixups to messages to meet Mistral-specific requirements.
-        Mistral requires assistant messages to have prefix=True when they
-        are the final message in a conversation. According to Mistral's API:
-        - Assistant messages with prefix=True MUST be the last message
-        - Only add prefix=True to the final assistant message when needed
-        - This allows assistant messages to be accepted as the last message
+        1. A "tool"-role message must be followed by an assistant message.
+        2. "system"-role messages must only appear at the start of a
+           conversation.
+        3. Assistant messages must have prefix=True when they are the final
+           message in a conversation (but at no other point).
         Args:
             messages: The original list of messages.
@@ -80,6 +82,25 @@ class MistralLLMService(OpenAILLMService):
         # Create a copy to avoid modifying the original
         fixed_messages = [dict(msg) for msg in messages]
+        # Ensure all tool responses are followed by an assistant message
+        assistant_insert_indices = []
+        for i, msg in enumerate(fixed_messages):
+            if msg.get("role") == "tool":
+                # If this is the last message or the next message is not assistant
+                if i == len(fixed_messages) - 1 or fixed_messages[i + 1].get("role") != "assistant":
+                    assistant_insert_indices.append(i + 1)
+        for idx in reversed(assistant_insert_indices):
+            fixed_messages.insert(idx, {"role": "assistant", "content": " "})
+        # Convert any "system" messages that aren't at the start (i.e., after the initial contiguous block) to "user"
+        first_non_system_idx = next(
+            (i for i, msg in enumerate(fixed_messages) if msg.get("role") != "system"),
+            len(fixed_messages),
+        )
+        for i, msg in enumerate(fixed_messages):
+            if msg.get("role") == "system" and i >= first_non_system_idx:
+                msg["role"] = "user"
         # Get the last message
         last_message = fixed_messages[-1]
@@ -158,7 +179,7 @@ class MistralLLMService(OpenAILLMService):
         - Core completion settings
         """
         # Apply Mistral's assistant prefix requirement for API compatibility
-        fixed_messages = self._apply_mistral_assistant_prefix(params_from_context["messages"])
+        fixed_messages = self._apply_mistral_fixups(params_from_context["messages"])
         params = {
             "model": self.model_name,

pipecat/services/moondream/vision.py CHANGED Viewed

@@ -11,17 +11,20 @@ for image analysis and description generation.
 """
 import asyncio
-from typing import AsyncGenerator
+import base64
+from io import BytesIO
+from typing import AsyncGenerator, Optional
 from loguru import logger
 from PIL import Image
-from pipecat.frames.frames import ErrorFrame, Frame, TextFrame, VisionImageRawFrame
+from pipecat.frames.frames import ErrorFrame, Frame, TextFrame
+from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.services.vision_service import VisionService
 try:
     import torch
-    from transformers import AutoModelForCausalLM, AutoTokenizer
+    from transformers import AutoModelForCausalLM
 except ModuleNotFoundError as e:
     logger.error(f"Exception: {e}")
     logger.error("In order to use Moondream, you need to `pip install pipecat-ai[moondream]`.")
@@ -94,11 +97,11 @@ class MoondreamService(VisionService):
         logger.debug("Loaded Moondream model")
-    async def run_vision(self, frame: VisionImageRawFrame) -> AsyncGenerator[Frame, None]:
+    async def run_vision(self, context: LLMContext) -> AsyncGenerator[Frame, None]:
         """Analyze an image and generate a description.
         Args:
-            frame: Vision frame containing the image data and optional question text.
+            context: The context to process, containing image data.
         Yields:
             Frame: TextFrame containing the generated image description, or ErrorFrame
@@ -109,22 +112,45 @@ class MoondreamService(VisionService):
             yield ErrorFrame("Moondream model not available")
             return
-        logger.debug(f"Analyzing image: {frame}")
+        image_bytes = None
+        text = None
+        try:
+            messages = context.get_messages()
+            last_message = messages[-1]
+            last_message_content = last_message.get("content")
+            for item in last_message_content:
+                if isinstance(item, dict):
+                    if (
+                        "image_url" in item
+                        and isinstance(item["image_url"], dict)
+                        and item["image_url"].get("url")
+                    ):
+                        image_bytes = base64.b64decode(item["image_url"]["url"].split(",")[1])
+                    elif "text" in item and isinstance(item["text"], str):
+                        text = item["text"]
+        except Exception as e:
+            logger.error(f"Exception during image extraction: {e}")
+            yield ErrorFrame("Failed to extract image from context")
+            return
-        def get_image_description(frame: VisionImageRawFrame):
-            """Generate description for the given image frame.
+        if not image_bytes:
+            logger.error("No image found in context")
+            yield ErrorFrame("No image found in context")
+            return
-            Args:
-                frame: Vision frame containing image data and question.
+        logger.debug(
+            f"Analyzing image (bytes length: {len(image_bytes) if image_bytes else 'None'})"
+        )
-            Returns:
-                str: Generated description of the image.
-            """
-            image = Image.frombytes(frame.format, frame.size, frame.image)
+        def get_image_description(bytes: bytes, text: Optional[str]) -> str:
+            image_buffer = BytesIO(bytes)
+            image = Image.open(image_buffer)
             image_embeds = self._model.encode_image(image)
-            description = self._model.query(image_embeds, frame.text)["answer"]
+            description = self._model.query(image_embeds, text)["answer"]
             return description
-        description = await asyncio.to_thread(get_image_description, frame)
+        description = await asyncio.to_thread(get_image_description, image_bytes, text)
         yield TextFrame(text=description)

pipecat/services/neuphonic/tts.py CHANGED Viewed

@@ -25,9 +25,9 @@ from pipecat.frames.frames import (
     EndFrame,
     ErrorFrame,
     Frame,
+    InterruptionFrame,
     LLMFullResponseEndFrame,
     StartFrame,
-    StartInterruptionFrame,
     TTSAudioRawFrame,
     TTSSpeakFrame,
     TTSStartedFrame,
@@ -224,7 +224,7 @@ class NeuphonicTTSService(InterruptibleTTSService):
             direction: The direction to push the frame.
         """
         await super().push_frame(frame, direction)
-        if isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)):
+        if isinstance(frame, (TTSStoppedFrame, InterruptionFrame)):
             self._started = False
     async def process_frame(self, frame: Frame, direction: FrameDirection):
@@ -293,6 +293,8 @@ class NeuphonicTTSService(InterruptibleTTSService):
             headers = {"x-api-key": self._api_key}
             self._websocket = await websocket_connect(url, additional_headers=headers)
+            await self._call_event_handler("on_connected")
         except Exception as e:
             logger.error(f"{self} initialization error: {e}")
             self._websocket = None
@@ -311,6 +313,7 @@ class NeuphonicTTSService(InterruptibleTTSService):
         finally:
             self._started = False
             self._websocket = None
+            await self._call_event_handler("on_disconnected")
     async def _receive_messages(self):
         """Receive and process messages from Neuphonic WebSocket."""

pipecat/services/openai/__init__.py CHANGED Viewed

@@ -10,6 +10,7 @@ from pipecat.services import DeprecatedModuleProxy
 from .image import *
 from .llm import *
+from .realtime import *
 from .stt import *
 from .tts import *

dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.82.dev857py3-none-any.whl → 0.0.85.dev837py3-none-any.whl