PyPI - dv-pipecat-ai - Versions diffs - 0.0.85.dev5__py3-none-any.whl → 0.0.85.dev698__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.85.dev5py3-none-any.whl → 0.0.85.dev698py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (157) hide show

{dv_pipecat_ai-0.0.85.dev5.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/METADATA +78 -117
{dv_pipecat_ai-0.0.85.dev5.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/RECORD +157 -123
pipecat/adapters/base_llm_adapter.py +38 -1
pipecat/adapters/services/anthropic_adapter.py +9 -14
pipecat/adapters/services/aws_nova_sonic_adapter.py +5 -0
pipecat/adapters/services/bedrock_adapter.py +236 -13
pipecat/adapters/services/gemini_adapter.py +12 -8
pipecat/adapters/services/open_ai_adapter.py +19 -7
pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
pipecat/audio/filters/krisp_viva_filter.py +193 -0
pipecat/audio/filters/noisereduce_filter.py +15 -0
pipecat/audio/turn/base_turn_analyzer.py +9 -1
pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
pipecat/audio/vad/data/README.md +10 -0
pipecat/audio/vad/vad_analyzer.py +13 -1
pipecat/extensions/voicemail/voicemail_detector.py +5 -5
pipecat/frames/frames.py +120 -87
pipecat/observers/loggers/debug_log_observer.py +3 -3
pipecat/observers/loggers/llm_log_observer.py +7 -3
pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
pipecat/pipeline/runner.py +12 -4
pipecat/pipeline/service_switcher.py +64 -36
pipecat/pipeline/task.py +85 -24
pipecat/processors/aggregators/dtmf_aggregator.py +28 -22
pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
pipecat/processors/aggregators/llm_response.py +6 -7
pipecat/processors/aggregators/llm_response_universal.py +19 -15
pipecat/processors/aggregators/user_response.py +6 -6
pipecat/processors/aggregators/vision_image_frame.py +24 -2
pipecat/processors/audio/audio_buffer_processor.py +43 -8
pipecat/processors/filters/stt_mute_filter.py +2 -0
pipecat/processors/frame_processor.py +103 -17
pipecat/processors/frameworks/langchain.py +8 -2
pipecat/processors/frameworks/rtvi.py +209 -68
pipecat/processors/frameworks/strands_agents.py +170 -0
pipecat/processors/logger.py +2 -2
pipecat/processors/transcript_processor.py +4 -4
pipecat/processors/user_idle_processor.py +3 -6
pipecat/runner/run.py +270 -50
pipecat/runner/types.py +2 -0
pipecat/runner/utils.py +51 -10
pipecat/serializers/exotel.py +5 -5
pipecat/serializers/livekit.py +20 -0
pipecat/serializers/plivo.py +6 -9
pipecat/serializers/protobuf.py +6 -5
pipecat/serializers/telnyx.py +2 -2
pipecat/serializers/twilio.py +43 -23
pipecat/services/ai_service.py +2 -6
pipecat/services/anthropic/llm.py +2 -25
pipecat/services/asyncai/tts.py +2 -3
pipecat/services/aws/__init__.py +1 -0
pipecat/services/aws/llm.py +122 -97
pipecat/services/aws/nova_sonic/__init__.py +0 -0
pipecat/services/aws/nova_sonic/context.py +367 -0
pipecat/services/aws/nova_sonic/frames.py +25 -0
pipecat/services/aws/nova_sonic/llm.py +1155 -0
pipecat/services/aws/stt.py +1 -3
pipecat/services/aws_nova_sonic/__init__.py +19 -1
pipecat/services/aws_nova_sonic/aws.py +11 -1151
pipecat/services/aws_nova_sonic/context.py +13 -355
pipecat/services/aws_nova_sonic/frames.py +13 -17
pipecat/services/azure/realtime/__init__.py +0 -0
pipecat/services/azure/realtime/llm.py +65 -0
pipecat/services/azure/stt.py +15 -0
pipecat/services/cartesia/tts.py +2 -2
pipecat/services/deepgram/__init__.py +1 -0
pipecat/services/deepgram/flux/__init__.py +0 -0
pipecat/services/deepgram/flux/stt.py +636 -0
pipecat/services/elevenlabs/__init__.py +2 -1
pipecat/services/elevenlabs/stt.py +254 -276
pipecat/services/elevenlabs/tts.py +5 -5
pipecat/services/fish/tts.py +2 -2
pipecat/services/gemini_multimodal_live/events.py +38 -524
pipecat/services/gemini_multimodal_live/file_api.py +23 -173
pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
pipecat/services/gladia/stt.py +56 -72
pipecat/services/google/__init__.py +1 -0
pipecat/services/google/gemini_live/__init__.py +3 -0
pipecat/services/google/gemini_live/file_api.py +189 -0
pipecat/services/google/gemini_live/llm.py +1582 -0
pipecat/services/google/gemini_live/llm_vertex.py +184 -0
pipecat/services/google/llm.py +15 -11
pipecat/services/google/llm_openai.py +3 -3
pipecat/services/google/llm_vertex.py +86 -16
pipecat/services/google/tts.py +7 -3
pipecat/services/heygen/api.py +2 -0
pipecat/services/heygen/client.py +8 -4
pipecat/services/heygen/video.py +2 -0
pipecat/services/hume/__init__.py +5 -0
pipecat/services/hume/tts.py +220 -0
pipecat/services/inworld/tts.py +6 -6
pipecat/services/llm_service.py +15 -5
pipecat/services/lmnt/tts.py +2 -2
pipecat/services/mcp_service.py +4 -2
pipecat/services/mem0/memory.py +6 -5
pipecat/services/mistral/llm.py +29 -8
pipecat/services/moondream/vision.py +42 -16
pipecat/services/neuphonic/tts.py +2 -2
pipecat/services/openai/__init__.py +1 -0
pipecat/services/openai/base_llm.py +27 -20
pipecat/services/openai/realtime/__init__.py +0 -0
pipecat/services/openai/realtime/context.py +272 -0
pipecat/services/openai/realtime/events.py +1106 -0
pipecat/services/openai/realtime/frames.py +37 -0
pipecat/services/openai/realtime/llm.py +829 -0
pipecat/services/openai/tts.py +16 -8
pipecat/services/openai_realtime/__init__.py +27 -0
pipecat/services/openai_realtime/azure.py +21 -0
pipecat/services/openai_realtime/context.py +21 -0
pipecat/services/openai_realtime/events.py +21 -0
pipecat/services/openai_realtime/frames.py +21 -0
pipecat/services/openai_realtime_beta/azure.py +16 -0
pipecat/services/openai_realtime_beta/openai.py +17 -5
pipecat/services/playht/tts.py +31 -4
pipecat/services/rime/tts.py +3 -4
pipecat/services/sarvam/tts.py +2 -6
pipecat/services/simli/video.py +2 -2
pipecat/services/speechmatics/stt.py +1 -7
pipecat/services/stt_service.py +34 -0
pipecat/services/tavus/video.py +2 -2
pipecat/services/tts_service.py +9 -9
pipecat/services/vision_service.py +7 -6
pipecat/services/vistaar/llm.py +4 -0
pipecat/tests/utils.py +4 -4
pipecat/transcriptions/language.py +41 -1
pipecat/transports/base_input.py +17 -42
pipecat/transports/base_output.py +42 -26
pipecat/transports/daily/transport.py +199 -26
pipecat/transports/heygen/__init__.py +0 -0
pipecat/transports/heygen/transport.py +381 -0
pipecat/transports/livekit/transport.py +228 -63
pipecat/transports/local/audio.py +6 -1
pipecat/transports/local/tk.py +11 -2
pipecat/transports/network/fastapi_websocket.py +1 -1
pipecat/transports/smallwebrtc/connection.py +98 -19
pipecat/transports/smallwebrtc/request_handler.py +204 -0
pipecat/transports/smallwebrtc/transport.py +65 -23
pipecat/transports/tavus/transport.py +23 -12
pipecat/transports/websocket/client.py +41 -5
pipecat/transports/websocket/fastapi.py +21 -11
pipecat/transports/websocket/server.py +14 -7
pipecat/transports/whatsapp/api.py +8 -0
pipecat/transports/whatsapp/client.py +47 -0
pipecat/utils/base_object.py +54 -22
pipecat/utils/string.py +12 -1
pipecat/utils/tracing/service_decorators.py +21 -21
{dv_pipecat_ai-0.0.85.dev5.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.85.dev5.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.85.dev5.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/top_level.txt +0 -0
/pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0

pipecat/services/openai/tts.py CHANGED Viewed

@@ -64,6 +64,7 @@ class OpenAITTSService(TTSService):
         model: str = "gpt-4o-mini-tts",
         sample_rate: Optional[int] = None,
         instructions: Optional[str] = None,
+        speed: Optional[float] = None,
         **kwargs,
     ):
         """Initialize OpenAI TTS service.
@@ -75,6 +76,7 @@ class OpenAITTSService(TTSService):
             model: TTS model to use. Defaults to "gpt-4o-mini-tts".
             sample_rate: Output audio sample rate in Hz. If None, uses OpenAI's default 24kHz.
             instructions: Optional instructions to guide voice synthesis behavior.
+            speed: Voice speed control (0.25 to 4.0, default 1.0).
             **kwargs: Additional keyword arguments passed to TTSService.
         """
         if sample_rate and sample_rate != self.OPENAI_SAMPLE_RATE:
@@ -84,6 +86,7 @@ class OpenAITTSService(TTSService):
             )
         super().__init__(sample_rate=sample_rate, **kwargs)
+        self._speed = speed
         self.set_model_name(model)
         self.set_voice(voice)
         self._instructions = instructions
@@ -133,17 +136,22 @@ class OpenAITTSService(TTSService):
         try:
             await self.start_ttfb_metrics()
-            # Setup extra body parameters
-            extra_body = {}
+            # Setup API parameters
+            create_params = {
+                "input": text,
+                "model": self.model_name,
+                "voice": VALID_VOICES[self._voice_id],
+                "response_format": "pcm",
+            }
             if self._instructions:
-                extra_body["instructions"] = self._instructions
+                create_params["instructions"] = self._instructions
+            if self._speed:
+                create_params["speed"] = self._speed
             async with self._client.audio.speech.with_streaming_response.create(
-                input=text,
-                model=self.model_name,
-                voice=VALID_VOICES[self._voice_id],
-                response_format="pcm",
-                extra_body=extra_body,
+                **create_params
             ) as r:
                 if r.status_code != 200:
                     error = await r.text()

pipecat/services/openai_realtime/__init__.py ADDED Viewed

@@ -0,0 +1,27 @@
+#
+# Copyright (c) 2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+import warnings
+from pipecat.services.azure.realtime.llm import AzureRealtimeLLMService
+from pipecat.services.openai.realtime.events import (
+    InputAudioNoiseReduction,
+    InputAudioTranscription,
+    SemanticTurnDetection,
+    SessionProperties,
+    TurnDetection,
+)
+from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService
+with warnings.catch_warnings():
+    warnings.simplefilter("always")
+    warnings.warn(
+        "Types in pipecat.services.openai_realtime are deprecated. "
+        "Please use the equivalent types from "
+        "pipecat.services.openai.realtime instead.",
+        DeprecationWarning,
+        stacklevel=2,
+    )

pipecat/services/openai_realtime/azure.py ADDED Viewed

@@ -0,0 +1,21 @@
+#
+# Copyright (c) 2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+"""Azure OpenAI Realtime LLM service implementation."""
+import warnings
+from pipecat.services.azure.realtime.llm import *
+with warnings.catch_warnings():
+    warnings.simplefilter("always")
+    warnings.warn(
+        "Types in pipecat.services.openai_realtime.azure are deprecated. "
+        "Please use the equivalent types from "
+        "pipecat.services.azure.realtime.llm instead.",
+        DeprecationWarning,
+        stacklevel=2,
+    )

pipecat/services/openai_realtime/context.py ADDED Viewed

@@ -0,0 +1,21 @@
+#
+# Copyright (c) 2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+"""OpenAI Realtime LLM context and aggregator implementations."""
+import warnings
+from pipecat.services.openai.realtime.context import *
+with warnings.catch_warnings():
+    warnings.simplefilter("always")
+    warnings.warn(
+        "Types in pipecat.services.openai_realtime.context are deprecated. "
+        "Please use the equivalent types from "
+        "pipecat.services.openai.realtime.context instead.",
+        DeprecationWarning,
+        stacklevel=2,
+    )

pipecat/services/openai_realtime/events.py ADDED Viewed

@@ -0,0 +1,21 @@
+#
+# Copyright (c) 2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+"""Event models and data structures for OpenAI Realtime API communication."""
+import warnings
+from pipecat.services.openai.realtime.events import *
+with warnings.catch_warnings():
+    warnings.simplefilter("always")
+    warnings.warn(
+        "Types in pipecat.services.openai_realtime.events are deprecated. "
+        "Please use the equivalent types from "
+        "pipecat.services.openai.realtime.events instead.",
+        DeprecationWarning,
+        stacklevel=2,
+    )

pipecat/services/openai_realtime/frames.py ADDED Viewed

@@ -0,0 +1,21 @@
+#
+# Copyright (c) 2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+"""Custom frame types for OpenAI Realtime API integration."""
+import warnings
+from pipecat.services.openai.realtime.frames import *
+with warnings.catch_warnings():
+    warnings.simplefilter("always")
+    warnings.warn(
+        "Types in pipecat.services.openai_realtime.frames are deprecated. "
+        "Please use the equivalent types from "
+        "pipecat.services.openai.realtime.frames instead.",
+        DeprecationWarning,
+        stacklevel=2,
+    )

pipecat/services/openai_realtime_beta/azure.py CHANGED Viewed

@@ -6,6 +6,8 @@
 """Azure OpenAI Realtime Beta LLM service implementation."""
+import warnings
 from loguru import logger
 from .openai import OpenAIRealtimeBetaLLMService
@@ -23,6 +25,10 @@ except ModuleNotFoundError as e:
 class AzureRealtimeBetaLLMService(OpenAIRealtimeBetaLLMService):
     """Azure OpenAI Realtime Beta LLM service with Azure-specific authentication.
+    .. deprecated:: 0.0.84
+        `AzureRealtimeBetaLLMService` is deprecated, use `AzureRealtimeLLMService` instead.
+        This class will be removed in version 1.0.0.
     Extends the OpenAI Realtime service to work with Azure OpenAI endpoints,
     using Azure's authentication headers and endpoint format. Provides the same
     real-time audio and text communication capabilities as the base OpenAI service.
@@ -44,6 +50,16 @@ class AzureRealtimeBetaLLMService(OpenAIRealtimeBetaLLMService):
             **kwargs: Additional arguments passed to parent OpenAIRealtimeBetaLLMService.
         """
         super().__init__(base_url=base_url, api_key=api_key, **kwargs)
+        with warnings.catch_warnings():
+            warnings.simplefilter("always")
+            warnings.warn(
+                "AzureRealtimeBetaLLMService is deprecated and will be removed in version 1.0.0. "
+                "Use AzureRealtimeLLMService instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         self.api_key = api_key
         self.base_url = base_url

pipecat/services/openai_realtime_beta/openai.py CHANGED Viewed

@@ -9,6 +9,7 @@
 import base64
 import json
 import time
+import warnings
 from dataclasses import dataclass
 from typing import Optional
@@ -23,6 +24,7 @@ from pipecat.frames.frames import (
     Frame,
     InputAudioRawFrame,
     InterimTranscriptionFrame,
+    InterruptionFrame,
     LLMContextFrame,
     LLMFullResponseEndFrame,
     LLMFullResponseStartFrame,
@@ -31,7 +33,6 @@ from pipecat.frames.frames import (
     LLMTextFrame,
     LLMUpdateSettingsFrame,
     StartFrame,
-    StartInterruptionFrame,
     TranscriptionFrame,
     TTSAudioRawFrame,
     TTSStartedFrame,
@@ -92,6 +93,10 @@ class CurrentAudioResponse:
 class OpenAIRealtimeBetaLLMService(LLMService):
     """OpenAI Realtime Beta LLM service providing real-time audio and text communication.
+    .. deprecated:: 0.0.84
+        `OpenAIRealtimeBetaLLMService` is deprecated, use `OpenAIRealtimeLLMService` instead.
+        This class will be removed in version 1.0.0.
     Implements the OpenAI Realtime API Beta with WebSocket communication for low-latency
     bidirectional audio and text interactions. Supports function calling, conversation
     management, and real-time transcription.
@@ -124,6 +129,15 @@ class OpenAIRealtimeBetaLLMService(LLMService):
             send_transcription_frames: Whether to emit transcription frames. Defaults to True.
             **kwargs: Additional arguments passed to parent LLMService.
         """
+        with warnings.catch_warnings():
+            warnings.simplefilter("always")
+            warnings.warn(
+                "OpenAIRealtimeBetaLLMService is deprecated and will be removed in version 1.0.0. "
+                "Use OpenAIRealtimeLLMService instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         full_url = f"{base_url}?model={model}"
         super().__init__(base_url=full_url, **kwargs)
@@ -350,7 +364,7 @@ class OpenAIRealtimeBetaLLMService(LLMService):
         elif isinstance(frame, InputAudioRawFrame):
             if not self._audio_input_paused:
                 await self._send_user_audio(frame)
-        elif isinstance(frame, StartInterruptionFrame):
+        elif isinstance(frame, InterruptionFrame):
             await self._handle_interruption()
         elif isinstance(frame, UserStartedSpeakingFrame):
             await self._handle_user_started_speaking(frame)
@@ -644,14 +658,12 @@ class OpenAIRealtimeBetaLLMService(LLMService):
     async def _handle_evt_speech_started(self, evt):
         await self._truncate_current_audio_response()
-        await self._start_interruption()  # cancels this processor task
-        await self.push_frame(StartInterruptionFrame())  # cancels downstream tasks
+        await self.push_interruption_task_frame_and_wait()
         await self.push_frame(UserStartedSpeakingFrame())
     async def _handle_evt_speech_stopped(self, evt):
         await self.start_ttfb_metrics()
         await self.start_processing_metrics()
-        await self._stop_interruption()
         await self.push_frame(UserStoppedSpeakingFrame())
     async def _maybe_handle_evt_retrieve_conversation_item_error(self, evt: events.ErrorEvent):

pipecat/services/playht/tts.py CHANGED Viewed

@@ -14,6 +14,7 @@ import io
 import json
 import struct
 import uuid
+import warnings
 from typing import AsyncGenerator, Optional
 import aiohttp
@@ -25,8 +26,8 @@ from pipecat.frames.frames import (
     EndFrame,
     ErrorFrame,
     Frame,
+    InterruptionFrame,
     StartFrame,
-    StartInterruptionFrame,
     TTSAudioRawFrame,
     TTSStartedFrame,
     TTSStoppedFrame,
@@ -110,6 +111,11 @@ def language_to_playht_language(language: Language) -> Optional[str]:
 class PlayHTTTSService(InterruptibleTTSService):
     """PlayHT WebSocket-based text-to-speech service.
+    .. deprecated:: 0.0.88
+        This class is deprecated and will be removed in a future version.
+        PlayHT is shutting down their API on December 31st, 2025.
     Provides real-time text-to-speech synthesis using PlayHT's WebSocket API.
     Supports streaming audio generation with configurable voice engines and
     language settings.
@@ -158,6 +164,15 @@ class PlayHTTTSService(InterruptibleTTSService):
             **kwargs,
         )
+        with warnings.catch_warnings():
+            warnings.simplefilter("always")
+            warnings.warn(
+                "PlayHT is shutting down their API on December 31st, 2025. "
+                "'PlayHTTTSService' is deprecated and will be removed in a future version.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         params = params or PlayHTTTSService.InputParams()
         self._api_key = api_key
@@ -312,7 +327,7 @@ class PlayHTTTSService(InterruptibleTTSService):
             return self._websocket
         raise Exception("Websocket not connected")
-    async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
+    async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
         """Handle interruption by stopping metrics and clearing request ID."""
         await super()._handle_interruption(frame, direction)
         await self.stop_all_metrics()
@@ -401,6 +416,11 @@ class PlayHTTTSService(InterruptibleTTSService):
 class PlayHTHttpTTSService(TTSService):
     """PlayHT HTTP-based text-to-speech service.
+    .. deprecated:: 0.0.88
+        This class is deprecated and will be removed in a future version.
+        PlayHT is shutting down their API on December 31st, 2025.
     Provides text-to-speech synthesis using PlayHT's HTTP API for simpler,
     non-streaming synthesis. Suitable for use cases where streaming is not
     required and simpler integration is preferred.
@@ -454,8 +474,6 @@ class PlayHTHttpTTSService(TTSService):
         # Warn about deprecated protocol parameter if explicitly provided
         if protocol:
-            import warnings
             with warnings.catch_warnings():
                 warnings.simplefilter("always")
                 warnings.warn(
@@ -464,6 +482,15 @@ class PlayHTHttpTTSService(TTSService):
                     stacklevel=2,
                 )
+        with warnings.catch_warnings():
+            warnings.simplefilter("always")
+            warnings.warn(
+                "PlayHT is shutting down their API on December 31st, 2025. "
+                "'PlayHTHttpTTSService' is deprecated and will be removed in a future version.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         params = params or PlayHTHttpTTSService.InputParams()
         self._user_id = user_id

pipecat/services/rime/tts.py CHANGED Viewed

@@ -24,15 +24,14 @@ from pipecat.frames.frames import (
     EndFrame,
     ErrorFrame,
     Frame,
+    InterruptionFrame,
     StartFrame,
-    StartInterruptionFrame,
     TTSAudioRawFrame,
     TTSStartedFrame,
     TTSStoppedFrame,
 )
 from pipecat.processors.frame_processor import FrameDirection
 from pipecat.services.tts_service import AudioContextWordTTSService, TTSService
-from pipecat.transcriptions import language
 from pipecat.transcriptions.language import Language
 from pipecat.utils.text.base_text_aggregator import BaseTextAggregator
 from pipecat.utils.text.skip_tags_aggregator import SkipTagsAggregator
@@ -280,7 +279,7 @@ class RimeTTSService(AudioContextWordTTSService):
             return self._websocket
         raise Exception("Websocket not connected")
-    async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
+    async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
         """Handle interruption by clearing current context."""
         await super()._handle_interruption(frame, direction)
         await self.stop_all_metrics()
@@ -375,7 +374,7 @@ class RimeTTSService(AudioContextWordTTSService):
             direction: The direction to push the frame.
         """
         await super().push_frame(frame, direction)
-        if isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)):
+        if isinstance(frame, (TTSStoppedFrame, InterruptionFrame)):
             if isinstance(frame, TTSStoppedFrame):
                 await self.add_word_timestamps([("Reset", 0)])

pipecat/services/sarvam/tts.py CHANGED Viewed

@@ -20,6 +20,7 @@ from pipecat.frames.frames import (
     EndFrame,
     ErrorFrame,
     Frame,
+    InterruptionFrame,
     LLMFullResponseEndFrame,
     StartFrame,
     StartInterruptionFrame,
@@ -455,7 +456,7 @@ class SarvamTTSService(InterruptibleTTSService):
             direction: The direction to push the frame.
         """
         await super().push_frame(frame, direction)
-        if isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)):
+        if isinstance(frame, (TTSStoppedFrame, InterruptionFrame)):
             self._started = False
     async def process_frame(self, frame: Frame, direction: FrameDirection):
@@ -632,11 +633,6 @@ class SarvamTTSService(InterruptibleTTSService):
         """
         logger.debug(f"Generating TTS: [{text}]")
-        # Validate text input
-        if not text or not isinstance(text, str) or not text.strip():
-            logger.warning(f"Invalid text input for Sarvam TTS run_tts: {repr(text)}")
-            return
         try:
             if not self._websocket or self._websocket.state is State.CLOSED:
                 await self._connect()

pipecat/services/simli/video.py CHANGED Viewed

@@ -15,8 +15,8 @@ from pipecat.frames.frames import (
     CancelFrame,
     EndFrame,
     Frame,
+    InterruptionFrame,
     OutputImageRawFrame,
-    StartInterruptionFrame,
     TTSAudioRawFrame,
     TTSStoppedFrame,
     UserStartedSpeakingFrame,
@@ -179,7 +179,7 @@ class SimliVideoService(FrameProcessor):
             return
         elif isinstance(frame, (EndFrame, CancelFrame)):
             await self._stop()
-        elif isinstance(frame, (StartInterruptionFrame, UserStartedSpeakingFrame)):
+        elif isinstance(frame, (InterruptionFrame, UserStartedSpeakingFrame)):
             if not self._previously_interrupted:
                 await self._simli_client.clearBuffer()
             self._previously_interrupted = self._is_trinity_avatar

pipecat/services/speechmatics/stt.py CHANGED Viewed

@@ -19,7 +19,6 @@ from loguru import logger
 from pydantic import BaseModel
 from pipecat.frames.frames import (
-    BotInterruptionFrame,
     CancelFrame,
     EndFrame,
     ErrorFrame,
@@ -749,14 +748,13 @@ class SpeechmaticsSTTService(STTService):
             return
         # Frames to send
-        upstream_frames: list[Frame] = []
         downstream_frames: list[Frame] = []
         # If VAD is enabled, then send a speaking frame
         if self._params.enable_vad and not self._is_speaking:
             logger.debug("User started speaking")
             self._is_speaking = True
-            upstream_frames += [BotInterruptionFrame()]
+            await self.push_interruption_task_frame_and_wait()
             downstream_frames += [UserStartedSpeakingFrame()]
         # If final, then re-parse into TranscriptionFrame
@@ -794,10 +792,6 @@ class SpeechmaticsSTTService(STTService):
             self._is_speaking = False
             downstream_frames += [UserStoppedSpeakingFrame()]
-        # Send UPSTREAM frames
-        for frame in upstream_frames:
-            await self.push_frame(frame, FrameDirection.UPSTREAM)
         # Send the DOWNSTREAM frames
         for frame in downstream_frames:
             await self.push_frame(frame, FrameDirection.DOWNSTREAM)

pipecat/services/stt_service.py CHANGED Viewed

@@ -16,6 +16,7 @@ from loguru import logger
 from pipecat.frames.frames import (
     AudioRawFrame,
     BotStoppedSpeakingFrame,
+    ErrorFrame,
     Frame,
     StartFrame,
     STTMuteFrame,
@@ -25,6 +26,7 @@ from pipecat.frames.frames import (
 )
 from pipecat.processors.frame_processor import FrameDirection
 from pipecat.services.ai_service import AIService
+from pipecat.services.websocket_service import WebsocketService
 from pipecat.transcriptions.language import Language
@@ -298,3 +300,35 @@ class SegmentedSTTService(STTService):
         if not self._user_speaking and len(self._audio_buffer) > self._audio_buffer_size_1s:
             discarded = len(self._audio_buffer) - self._audio_buffer_size_1s
             self._audio_buffer = self._audio_buffer[discarded:]
+class WebsocketSTTService(STTService, WebsocketService):
+    """Base class for websocket-based STT services.
+    Combines STT functionality with websocket connectivity, providing automatic
+    error handling and reconnection capabilities.
+    Event handlers:
+        on_connection_error: Called when a websocket connection error occurs.
+    Example::
+        @stt.event_handler("on_connection_error")
+        async def on_connection_error(stt: STTService, error: str):
+            logger.error(f"STT connection error: {error}")
+    """
+    def __init__(self, *, reconnect_on_error: bool = True, **kwargs):
+        """Initialize the Websocket STT service.
+        Args:
+            reconnect_on_error: Whether to automatically reconnect on websocket errors.
+            **kwargs: Additional arguments passed to parent classes.
+        """
+        STTService.__init__(self, **kwargs)
+        WebsocketService.__init__(self, reconnect_on_error=reconnect_on_error, **kwargs)
+        self._register_event_handler("on_connection_error")
+    async def _report_error(self, error: ErrorFrame):
+        await self._call_event_handler("on_connection_error", error.error)
+        await self.push_error(error)

pipecat/services/tavus/video.py CHANGED Viewed

@@ -23,12 +23,12 @@ from pipecat.frames.frames import (
     CancelFrame,
     EndFrame,
     Frame,
+    InterruptionFrame,
     OutputAudioRawFrame,
     OutputImageRawFrame,
     OutputTransportReadyFrame,
     SpeechOutputAudioRawFrame,
     StartFrame,
-    StartInterruptionFrame,
     TTSAudioRawFrame,
     TTSStartedFrame,
 )
@@ -222,7 +222,7 @@ class TavusVideoService(AIService):
         """
         await super().process_frame(frame, direction)
-        if isinstance(frame, StartInterruptionFrame):
+        if isinstance(frame, InterruptionFrame):
             await self._handle_interruptions()
             await self.push_frame(frame, direction)
         elif isinstance(frame, TTSAudioRawFrame):

pipecat/services/tts_service.py CHANGED Viewed

@@ -20,10 +20,10 @@ from pipecat.frames.frames import (
     ErrorFrame,
     Frame,
     InterimTranscriptionFrame,
+    InterruptionFrame,
     LLMFullResponseEndFrame,
     LLMFullResponseStartFrame,
     StartFrame,
-    StartInterruptionFrame,
     TextFrame,
     TranscriptionFrame,
     TTSAudioRawFrame,
@@ -319,7 +319,7 @@ class TTSService(AIService):
             and not isinstance(frame, TranscriptionFrame)
         ):
             await self._process_text_frame(frame)
-        elif isinstance(frame, StartInterruptionFrame):
+        elif isinstance(frame, InterruptionFrame):
             await self._handle_interruption(frame, direction)
             await self.push_frame(frame, direction)
         elif isinstance(frame, (LLMFullResponseEndFrame, EndFrame)):
@@ -377,14 +377,14 @@ class TTSService(AIService):
         await super().push_frame(frame, direction)
         if self._push_stop_frames and (
-            isinstance(frame, StartInterruptionFrame)
+            isinstance(frame, InterruptionFrame)
             or isinstance(frame, TTSStartedFrame)
             or isinstance(frame, TTSAudioRawFrame)
             or isinstance(frame, TTSStoppedFrame)
         ):
             await self._stop_frame_queue.put(frame)
-    async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
+    async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
         self._processing_text = False
         await self._text_aggregator.handle_interruption()
         for filter in self._text_filters:
@@ -465,7 +465,7 @@ class TTSService(AIService):
                 )
                 if isinstance(frame, TTSStartedFrame):
                     has_started = True
-                elif isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)):
+                elif isinstance(frame, (TTSStoppedFrame, InterruptionFrame)):
                     has_started = False
             except asyncio.TimeoutError:
                 if has_started:
@@ -550,7 +550,7 @@ class WordTTSService(TTSService):
         elif isinstance(frame, (LLMFullResponseEndFrame, EndFrame)):
             await self.flush_audio()
-    async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
+    async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
         await super()._handle_interruption(frame, direction)
         self._llm_response_started = False
         self.reset_word_timestamps()
@@ -640,7 +640,7 @@ class InterruptibleTTSService(WebsocketTTSService):
         # user interrupts we need to reconnect.
         self._bot_speaking = False
-    async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
+    async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
         await super()._handle_interruption(frame, direction)
         if self._bot_speaking:
             await self._disconnect()
@@ -712,7 +712,7 @@ class InterruptibleWordTTSService(WebsocketWordTTSService):
         # user interrupts we need to reconnect.
         self._bot_speaking = False
-    async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
+    async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
         await super()._handle_interruption(frame, direction)
         if self._bot_speaking:
             await self._disconnect()
@@ -840,7 +840,7 @@ class AudioContextWordTTSService(WebsocketWordTTSService):
         await super().cancel(frame)
         await self._stop_audio_context_task()
-    async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
+    async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
         await super()._handle_interruption(frame, direction)
         await self._stop_audio_context_task()
         self._create_audio_context_task()

dv-pipecat-ai 0.0.85.dev5__py3-none-any.whl → 0.0.85.dev698__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.85.dev5py3-none-any.whl → 0.0.85.dev698py3-none-any.whl