PyPI - dv-pipecat-ai - Versions diffs - 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.82.dev857py3-none-any.whl → 0.0.85.dev837py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (195) hide show

{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
pipecat/adapters/base_llm_adapter.py +38 -1
pipecat/adapters/services/anthropic_adapter.py +9 -14
pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
pipecat/adapters/services/bedrock_adapter.py +236 -13
pipecat/adapters/services/gemini_adapter.py +12 -8
pipecat/adapters/services/open_ai_adapter.py +19 -7
pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
pipecat/audio/dtmf/dtmf-0.wav +0 -0
pipecat/audio/dtmf/dtmf-1.wav +0 -0
pipecat/audio/dtmf/dtmf-2.wav +0 -0
pipecat/audio/dtmf/dtmf-3.wav +0 -0
pipecat/audio/dtmf/dtmf-4.wav +0 -0
pipecat/audio/dtmf/dtmf-5.wav +0 -0
pipecat/audio/dtmf/dtmf-6.wav +0 -0
pipecat/audio/dtmf/dtmf-7.wav +0 -0
pipecat/audio/dtmf/dtmf-8.wav +0 -0
pipecat/audio/dtmf/dtmf-9.wav +0 -0
pipecat/audio/dtmf/dtmf-pound.wav +0 -0
pipecat/audio/dtmf/dtmf-star.wav +0 -0
pipecat/audio/filters/krisp_viva_filter.py +193 -0
pipecat/audio/filters/noisereduce_filter.py +15 -0
pipecat/audio/turn/base_turn_analyzer.py +9 -1
pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
pipecat/audio/vad/data/README.md +10 -0
pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
pipecat/audio/vad/silero.py +9 -3
pipecat/audio/vad/vad_analyzer.py +13 -1
pipecat/extensions/voicemail/voicemail_detector.py +5 -5
pipecat/frames/frames.py +277 -86
pipecat/observers/loggers/debug_log_observer.py +3 -3
pipecat/observers/loggers/llm_log_observer.py +7 -3
pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
pipecat/pipeline/runner.py +18 -6
pipecat/pipeline/service_switcher.py +64 -36
pipecat/pipeline/task.py +125 -79
pipecat/pipeline/tts_switcher.py +30 -0
pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
pipecat/processors/aggregators/llm_context.py +40 -2
pipecat/processors/aggregators/llm_response.py +32 -15
pipecat/processors/aggregators/llm_response_universal.py +19 -15
pipecat/processors/aggregators/user_response.py +6 -6
pipecat/processors/aggregators/vision_image_frame.py +24 -2
pipecat/processors/audio/audio_buffer_processor.py +43 -8
pipecat/processors/dtmf_aggregator.py +174 -77
pipecat/processors/filters/stt_mute_filter.py +17 -0
pipecat/processors/frame_processor.py +110 -24
pipecat/processors/frameworks/langchain.py +8 -2
pipecat/processors/frameworks/rtvi.py +210 -68
pipecat/processors/frameworks/strands_agents.py +170 -0
pipecat/processors/logger.py +2 -2
pipecat/processors/transcript_processor.py +26 -5
pipecat/processors/user_idle_processor.py +35 -11
pipecat/runner/daily.py +59 -20
pipecat/runner/run.py +395 -93
pipecat/runner/types.py +6 -4
pipecat/runner/utils.py +51 -10
pipecat/serializers/__init__.py +5 -1
pipecat/serializers/asterisk.py +16 -2
pipecat/serializers/convox.py +41 -4
pipecat/serializers/custom.py +257 -0
pipecat/serializers/exotel.py +5 -5
pipecat/serializers/livekit.py +20 -0
pipecat/serializers/plivo.py +5 -5
pipecat/serializers/protobuf.py +6 -5
pipecat/serializers/telnyx.py +2 -2
pipecat/serializers/twilio.py +43 -23
pipecat/serializers/vi.py +324 -0
pipecat/services/ai_service.py +2 -6
pipecat/services/anthropic/llm.py +2 -25
pipecat/services/assemblyai/models.py +6 -0
pipecat/services/assemblyai/stt.py +13 -5
pipecat/services/asyncai/tts.py +5 -3
pipecat/services/aws/__init__.py +1 -0
pipecat/services/aws/llm.py +147 -105
pipecat/services/aws/nova_sonic/__init__.py +0 -0
pipecat/services/aws/nova_sonic/context.py +436 -0
pipecat/services/aws/nova_sonic/frames.py +25 -0
pipecat/services/aws/nova_sonic/llm.py +1265 -0
pipecat/services/aws/stt.py +3 -3
pipecat/services/aws_nova_sonic/__init__.py +19 -1
pipecat/services/aws_nova_sonic/aws.py +11 -1151
pipecat/services/aws_nova_sonic/context.py +8 -354
pipecat/services/aws_nova_sonic/frames.py +13 -17
pipecat/services/azure/llm.py +51 -1
pipecat/services/azure/realtime/__init__.py +0 -0
pipecat/services/azure/realtime/llm.py +65 -0
pipecat/services/azure/stt.py +15 -0
pipecat/services/cartesia/stt.py +77 -70
pipecat/services/cartesia/tts.py +80 -13
pipecat/services/deepgram/__init__.py +1 -0
pipecat/services/deepgram/flux/__init__.py +0 -0
pipecat/services/deepgram/flux/stt.py +640 -0
pipecat/services/elevenlabs/__init__.py +4 -1
pipecat/services/elevenlabs/stt.py +339 -0
pipecat/services/elevenlabs/tts.py +87 -46
pipecat/services/fish/tts.py +5 -2
pipecat/services/gemini_multimodal_live/events.py +38 -524
pipecat/services/gemini_multimodal_live/file_api.py +23 -173
pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
pipecat/services/gladia/stt.py +56 -72
pipecat/services/google/__init__.py +1 -0
pipecat/services/google/gemini_live/__init__.py +3 -0
pipecat/services/google/gemini_live/file_api.py +189 -0
pipecat/services/google/gemini_live/llm.py +1582 -0
pipecat/services/google/gemini_live/llm_vertex.py +184 -0
pipecat/services/google/llm.py +15 -11
pipecat/services/google/llm_openai.py +3 -3
pipecat/services/google/llm_vertex.py +86 -16
pipecat/services/google/stt.py +4 -0
pipecat/services/google/tts.py +7 -3
pipecat/services/heygen/api.py +2 -0
pipecat/services/heygen/client.py +8 -4
pipecat/services/heygen/video.py +2 -0
pipecat/services/hume/__init__.py +5 -0
pipecat/services/hume/tts.py +220 -0
pipecat/services/inworld/tts.py +6 -6
pipecat/services/llm_service.py +15 -5
pipecat/services/lmnt/tts.py +4 -2
pipecat/services/mcp_service.py +4 -2
pipecat/services/mem0/memory.py +6 -5
pipecat/services/mistral/llm.py +29 -8
pipecat/services/moondream/vision.py +42 -16
pipecat/services/neuphonic/tts.py +5 -2
pipecat/services/openai/__init__.py +1 -0
pipecat/services/openai/base_llm.py +27 -20
pipecat/services/openai/realtime/__init__.py +0 -0
pipecat/services/openai/realtime/context.py +272 -0
pipecat/services/openai/realtime/events.py +1106 -0
pipecat/services/openai/realtime/frames.py +37 -0
pipecat/services/openai/realtime/llm.py +829 -0
pipecat/services/openai/tts.py +49 -10
pipecat/services/openai_realtime/__init__.py +27 -0
pipecat/services/openai_realtime/azure.py +21 -0
pipecat/services/openai_realtime/context.py +21 -0
pipecat/services/openai_realtime/events.py +21 -0
pipecat/services/openai_realtime/frames.py +21 -0
pipecat/services/openai_realtime_beta/azure.py +16 -0
pipecat/services/openai_realtime_beta/openai.py +17 -5
pipecat/services/piper/tts.py +7 -9
pipecat/services/playht/tts.py +34 -4
pipecat/services/rime/tts.py +12 -12
pipecat/services/riva/stt.py +3 -1
pipecat/services/salesforce/__init__.py +9 -0
pipecat/services/salesforce/llm.py +700 -0
pipecat/services/sarvam/__init__.py +7 -0
pipecat/services/sarvam/stt.py +540 -0
pipecat/services/sarvam/tts.py +97 -13
pipecat/services/simli/video.py +2 -2
pipecat/services/speechmatics/stt.py +22 -10
pipecat/services/stt_service.py +47 -0
pipecat/services/tavus/video.py +2 -2
pipecat/services/tts_service.py +75 -22
pipecat/services/vision_service.py +7 -6
pipecat/services/vistaar/llm.py +51 -9
pipecat/tests/utils.py +4 -4
pipecat/transcriptions/language.py +41 -1
pipecat/transports/base_input.py +13 -34
pipecat/transports/base_output.py +140 -104
pipecat/transports/daily/transport.py +199 -26
pipecat/transports/heygen/__init__.py +0 -0
pipecat/transports/heygen/transport.py +381 -0
pipecat/transports/livekit/transport.py +228 -63
pipecat/transports/local/audio.py +6 -1
pipecat/transports/local/tk.py +11 -2
pipecat/transports/network/fastapi_websocket.py +1 -1
pipecat/transports/smallwebrtc/connection.py +103 -19
pipecat/transports/smallwebrtc/request_handler.py +246 -0
pipecat/transports/smallwebrtc/transport.py +65 -23
pipecat/transports/tavus/transport.py +23 -12
pipecat/transports/websocket/client.py +41 -5
pipecat/transports/websocket/fastapi.py +21 -11
pipecat/transports/websocket/server.py +14 -7
pipecat/transports/whatsapp/api.py +8 -0
pipecat/transports/whatsapp/client.py +47 -0
pipecat/utils/base_object.py +54 -22
pipecat/utils/redis.py +58 -0
pipecat/utils/string.py +13 -1
pipecat/utils/tracing/service_decorators.py +21 -21
pipecat/serializers/genesys.py +0 -95
pipecat/services/google/test-google-chirp.py +0 -45
pipecat/services/openai.py +0 -698
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
/pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0

pipecat/services/cartesia/stt.py CHANGED Viewed

@@ -28,13 +28,12 @@ from pipecat.frames.frames import (
     UserStoppedSpeakingFrame,
 )
 from pipecat.processors.frame_processor import FrameDirection
-from pipecat.services.stt_service import STTService
+from pipecat.services.stt_service import WebsocketSTTService
 from pipecat.transcriptions.language import Language
 from pipecat.utils.time import time_now_iso8601
 from pipecat.utils.tracing.service_decorators import traced_stt
 try:
-    import websockets
     from websockets.asyncio.client import connect as websocket_connect
     from websockets.protocol import State
 except ModuleNotFoundError as e:
@@ -124,7 +123,7 @@ class CartesiaLiveOptions:
         return cls(**json.loads(json_str))
-class CartesiaSTTService(STTService):
+class CartesiaSTTService(WebsocketSTTService):
     """Speech-to-text service using Cartesia Live API.
     Provides real-time speech transcription through WebSocket connection
@@ -176,8 +175,7 @@ class CartesiaSTTService(STTService):
         self.set_model_name(merged_options.model)
         self._api_key = api_key
         self._base_url = base_url or "api.cartesia.ai"
-        self._connection = None
-        self._receiver_task = None
+        self._receive_task = None
     def can_generate_metrics(self) -> bool:
         """Check if the service can generate processing metrics.
@@ -214,6 +212,27 @@ class CartesiaSTTService(STTService):
         await super().cancel(frame)
         await self._disconnect()
+    async def start_metrics(self):
+        """Start performance metrics collection for transcription processing."""
+        await self.start_ttfb_metrics()
+        await self.start_processing_metrics()
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        """Process incoming frames and handle speech events.
+        Args:
+            frame: The frame to process.
+            direction: Direction of frame flow in the pipeline.
+        """
+        await super().process_frame(frame, direction)
+        if isinstance(frame, UserStartedSpeakingFrame):
+            await self.start_metrics()
+        elif isinstance(frame, UserStoppedSpeakingFrame):
+            # Send finalize command to flush the transcription session
+            if self._websocket and self._websocket.state is State.OPEN:
+                await self._websocket.send("finalize")
     async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
         """Process audio data for speech-to-text transcription.
@@ -224,45 +243,71 @@ class CartesiaSTTService(STTService):
             None - transcription results are handled via WebSocket responses.
         """
         # If the connection is closed, due to timeout, we need to reconnect when the user starts speaking again
-        if not self._connection or self._connection.state is State.CLOSED:
+        if not self._websocket or self._websocket.state is State.CLOSED:
             await self._connect()
-        await self._connection.send(audio)
+        await self._websocket.send(audio)
         yield None
     async def _connect(self):
-        params = self._settings.to_dict()
-        ws_url = f"wss://{self._base_url}/stt/websocket?{urllib.parse.urlencode(params)}"
-        logger.debug(f"Connecting to Cartesia: {ws_url}")
-        headers = {"Cartesia-Version": "2025-04-16", "X-API-Key": self._api_key}
+        await self._connect_websocket()
+        if self._websocket and not self._receive_task:
+            self._receive_task = asyncio.create_task(self._receive_task_handler(self._report_error))
+    async def _disconnect(self):
+        if self._receive_task:
+            await self.cancel_task(self._receive_task)
+            self._receive_task = None
+        await self._disconnect_websocket()
+    async def _connect_websocket(self):
         try:
-            self._connection = await websocket_connect(ws_url, additional_headers=headers)
-            # Setup the receiver task to handle the incoming messages from the Cartesia server
-            if self._receiver_task is None or self._receiver_task.done():
-                self._receiver_task = asyncio.create_task(self._receive_messages())
-            logger.debug(f"Connected to Cartesia")
+            if self._websocket and self._websocket.state is State.OPEN:
+                return
+            logger.debug("Connecting to Cartesia STT")
+            params = self._settings.to_dict()
+            ws_url = f"wss://{self._base_url}/stt/websocket?{urllib.parse.urlencode(params)}"
+            headers = {"Cartesia-Version": "2025-04-16", "X-API-Key": self._api_key}
+            self._websocket = await websocket_connect(ws_url, additional_headers=headers)
+            await self._call_event_handler("on_connected")
         except Exception as e:
             logger.error(f"{self}: unable to connect to Cartesia: {e}")
-    async def _receive_messages(self):
+    async def _disconnect_websocket(self):
         try:
-            while True:
-                if not self._connection or self._connection.state is State.CLOSED:
-                    break
-                message = await self._connection.recv()
-                try:
-                    data = json.loads(message)
-                    await self._process_response(data)
-                except json.JSONDecodeError:
-                    logger.warning(f"Received non-JSON message: {message}")
-        except asyncio.CancelledError:
-            pass
-        except websockets.exceptions.ConnectionClosed as e:
-            logger.debug(f"WebSocket connection closed: {e}")
+            if self._websocket and self._websocket.state is State.OPEN:
+                logger.debug("Disconnecting from Cartesia STT")
+                await self._websocket.close()
         except Exception as e:
-            logger.error(f"Error in message receiver: {e}")
+            logger.error(f"{self} error closing websocket: {e}")
+        finally:
+            self._websocket = None
+            await self._call_event_handler("on_disconnected")
+    def _get_websocket(self):
+        if self._websocket:
+            return self._websocket
+        raise Exception("Websocket not connected")
+    async def _process_messages(self):
+        async for message in self._get_websocket():
+            try:
+                data = json.loads(message)
+                await self._process_response(data)
+            except json.JSONDecodeError:
+                logger.warning(f"Received non-JSON message: {message}")
+    async def _receive_messages(self):
+        while True:
+            await self._process_messages()
+            # Cartesia times out after 5 minutes of innactivity (no keepalive
+            # mechanism is available). So, we try to reconnect.
+            logger.debug(f"{self} Cartesia connection was disconnected (timeout?), reconnecting")
+            await self._connect_websocket()
     async def _process_response(self, data):
         if "type" in data:
@@ -316,41 +361,3 @@ class CartesiaSTTService(STTService):
                         language,
                     )
                 )
-    async def _disconnect(self):
-        if self._receiver_task:
-            self._receiver_task.cancel()
-            try:
-                await self._receiver_task
-            except asyncio.CancelledError:
-                pass
-            except Exception as e:
-                logger.exception(f"Unexpected exception while cancelling task: {e}")
-            self._receiver_task = None
-        if self._connection and self._connection.state is State.OPEN:
-            logger.debug("Disconnecting from Cartesia")
-            await self._connection.close()
-            self._connection = None
-    async def start_metrics(self):
-        """Start performance metrics collection for transcription processing."""
-        await self.start_ttfb_metrics()
-        await self.start_processing_metrics()
-    async def process_frame(self, frame: Frame, direction: FrameDirection):
-        """Process incoming frames and handle speech events.
-        Args:
-            frame: The frame to process.
-            direction: Direction of frame flow in the pipeline.
-        """
-        await super().process_frame(frame, direction)
-        if isinstance(frame, UserStartedSpeakingFrame):
-            await self.start_metrics()
-        elif isinstance(frame, UserStoppedSpeakingFrame):
-            # Send finalize command to flush the transcription session
-            if self._connection and self._connection.state is State.OPEN:
-                await self._connection.send("finalize")

pipecat/services/cartesia/tts.py CHANGED Viewed

@@ -15,14 +15,13 @@ from typing import AsyncGenerator, List, Literal, Optional, Union
 from loguru import logger
 from pydantic import BaseModel, Field
 from pipecat.frames.frames import (
     CancelFrame,
     EndFrame,
     ErrorFrame,
     Frame,
+    InterruptionFrame,
     StartFrame,
-    StartInterruptionFrame,
     TTSAudioRawFrame,
     TTSStartedFrame,
     TTSStoppedFrame,
@@ -49,6 +48,26 @@ except ModuleNotFoundError as e:
     raise Exception(f"Missing module: {e}")
+class GenerationConfig(BaseModel):
+    """Configuration for Cartesia Sonic-3 generation parameters.
+    Sonic-3 interprets these parameters as guidance to ensure natural speech.
+    Test against your content for best results.
+    Parameters:
+        volume: Volume multiplier for generated speech. Valid range: [0.5, 2.0]. Default is 1.0.
+        speed: Speed multiplier for generated speech. Valid range: [0.6, 1.5]. Default is 1.0.
+        emotion: Single emotion string to guide the emotional tone. Examples include neutral,
+            angry, excited, content, sad, scared. Over 60 emotions are supported. For best
+            results, use with recommended voices: Leo, Jace, Kyle, Gavin, Maya, Tessa, Dana,
+            and Marian.
+    """
+    volume: Optional[float] = None
+    speed: Optional[float] = None
+    emotion: Optional[str] = None
 def language_to_cartesia_language(language: Language) -> Optional[str]:
     """Convert a Language enum to Cartesia language code.
@@ -74,6 +93,33 @@ def language_to_cartesia_language(language: Language) -> Optional[str]:
         Language.SV: "sv",
         Language.TR: "tr",
         Language.ZH: "zh",
+        Language.TL: "tl",
+        Language.BG: "bg",
+        Language.RO: "ro",
+        Language.AR: "ar",
+        Language.CS: "cs",
+        Language.EL: "el",
+        Language.FI: "fi",
+        Language.HR: "hr",
+        Language.MS: "ms",
+        Language.SK: "sk",
+        Language.DA: "da",
+        Language.TA: "ta",
+        Language.UK: "uk",
+        Language.HU: "hu",
+        Language.NO: "no",
+        Language.VI: "vi",
+        Language.BN: "bn",
+        Language.TH: "th",
+        Language.HE: "he",
+        Language.KA: "ka",
+        Language.ID: "id",
+        Language.TE: "te",
+        Language.GU: "gu",
+        Language.KN: "kn",
+        Language.ML: "ml",
+        Language.MR: "mr",
+        Language.PA: "pa",
     }
     result = BASE_LANGUAGES.get(language)
@@ -102,16 +148,20 @@ class CartesiaTTSService(AudioContextWordTTSService):
         Parameters:
             language: Language to use for synthesis.
-            speed: Voice speed control.
-            emotion: List of emotion controls.
+            speed: Voice speed control for non-Sonic-3 models (literal values).
+            emotion: List of emotion controls for non-Sonic-3 models.
                 .. deprecated:: 0.0.68
                         The `emotion` parameter is deprecated and will be removed in a future version.
+            generation_config: Generation configuration for Sonic-3 models. Includes volume,
+                speed (numeric), and emotion (string) parameters.
         """
         language: Optional[Language] = Language.EN
         speed: Optional[Literal["slow", "normal", "fast"]] = None
         emotion: Optional[List[str]] = []
+        generation_config: Optional[GenerationConfig] = None
     def __init__(
         self,
@@ -120,7 +170,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
         voice_id: str,
         cartesia_version: str = "2025-04-16",
         url: str = "wss://api.cartesia.ai/tts/websocket",
-        model: str = "sonic-2",
+        model: str = "sonic-3",
         sample_rate: Optional[int] = None,
         encoding: str = "pcm_s16le",
         container: str = "raw",
@@ -136,7 +186,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
             voice_id: ID of the voice to use for synthesis.
             cartesia_version: API version string for Cartesia service.
             url: WebSocket URL for Cartesia TTS API.
-            model: TTS model to use (e.g., "sonic-2").
+            model: TTS model to use (e.g., "sonic-3").
             sample_rate: Audio sample rate. If None, uses default.
             encoding: Audio encoding format.
             container: Audio container format.
@@ -180,6 +230,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
             else "en",
             "speed": params.speed,
             "emotion": params.emotion,
+            "generation_config": params.generation_config,
         }
         self.set_model_name(model)
         self.set_voice(voice_id)
@@ -298,6 +349,11 @@ class CartesiaTTSService(AudioContextWordTTSService):
         if self._settings["speed"]:
             msg["speed"] = self._settings["speed"]
+        if self._settings["generation_config"]:
+            msg["generation_config"] = self._settings["generation_config"].model_dump(
+                exclude_none=True
+            )
         return json.dumps(msg)
     async def start(self, frame: StartFrame):
@@ -345,10 +401,11 @@ class CartesiaTTSService(AudioContextWordTTSService):
         try:
             if self._websocket and self._websocket.state is State.OPEN:
                 return
-            logger.debug("Connecting to Cartesia")
+            logger.debug("Connecting to Cartesia TTS")
             self._websocket = await websocket_connect(
                 f"{self._url}?api_key={self._api_key}&cartesia_version={self._cartesia_version}"
             )
+            await self._call_event_handler("on_connected")
         except Exception as e:
             logger.error(f"{self} initialization error: {e}")
             self._websocket = None
@@ -366,13 +423,14 @@ class CartesiaTTSService(AudioContextWordTTSService):
         finally:
             self._context_id = None
             self._websocket = None
+            await self._call_event_handler("on_disconnected")
     def _get_websocket(self):
         if self._websocket:
             return self._websocket
         raise Exception("Websocket not connected")
-    async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
+    async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
         await super()._handle_interruption(frame, direction)
         await self.stop_all_metrics()
         if self._context_id:
@@ -417,7 +475,6 @@ class CartesiaTTSService(AudioContextWordTTSService):
                 logger.error(f"{self} error: {msg}")
                 await self.push_frame(TTSStoppedFrame())
                 await self.stop_all_metrics()
                 await self.push_error(ErrorFrame(f"{self} error: {msg['error']}"))
                 self._context_id = None
             else:
@@ -482,23 +539,27 @@ class CartesiaHttpTTSService(TTSService):
         Parameters:
             language: Language to use for synthesis.
-            speed: Voice speed control.
-            emotion: List of emotion controls.
+            speed: Voice speed control for non-Sonic-3 models (literal values).
+            emotion: List of emotion controls for non-Sonic-3 models.
                 .. deprecated:: 0.0.68
                         The `emotion` parameter is deprecated and will be removed in a future version.
+            generation_config: Generation configuration for Sonic-3 models. Includes volume,
+                speed (numeric), and emotion (string) parameters.
         """
         language: Optional[Language] = Language.EN
         speed: Optional[Literal["slow", "normal", "fast"]] = None
         emotion: Optional[List[str]] = Field(default_factory=list)
+        generation_config: Optional[GenerationConfig] = None
     def __init__(
         self,
         *,
         api_key: str,
         voice_id: str,
-        model: str = "sonic-2",
+        model: str = "sonic-3",
         base_url: str = "https://api.cartesia.ai",
         cartesia_version: str = "2024-11-13",
         sample_rate: Optional[int] = None,
@@ -512,7 +573,7 @@ class CartesiaHttpTTSService(TTSService):
         Args:
             api_key: Cartesia API key for authentication.
             voice_id: ID of the voice to use for synthesis.
-            model: TTS model to use (e.g., "sonic-2").
+            model: TTS model to use (e.g., "sonic-3").
             base_url: Base URL for Cartesia HTTP API.
             cartesia_version: API version string for Cartesia service.
             sample_rate: Audio sample rate. If None, uses default.
@@ -539,6 +600,7 @@ class CartesiaHttpTTSService(TTSService):
             else "en",
             "speed": params.speed,
             "emotion": params.emotion,
+            "generation_config": params.generation_config,
         }
         self.set_voice(voice_id)
         self.set_model_name(model)
@@ -632,6 +694,11 @@ class CartesiaHttpTTSService(TTSService):
             if self._settings["speed"]:
                 payload["speed"] = self._settings["speed"]
+            if self._settings["generation_config"]:
+                payload["generation_config"] = self._settings["generation_config"].model_dump(
+                    exclude_none=True
+                )
             yield TTSStartedFrame()
             session = await self._client._get_session()

pipecat/services/deepgram/__init__.py CHANGED Viewed

@@ -8,6 +8,7 @@ import sys
 from pipecat.services import DeprecatedModuleProxy
+from .flux import *
 from .stt import *
 from .tts import *

pipecat/services/deepgram/flux/__init__.py ADDED Viewed

File without changes

dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.82.dev857py3-none-any.whl → 0.0.85.dev837py3-none-any.whl