PyPI - dv-pipecat-ai - Versions diffs - 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show

{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
pipecat/__init__.py +17 -0
pipecat/adapters/base_llm_adapter.py +36 -1
pipecat/adapters/schemas/direct_function.py +296 -0
pipecat/adapters/schemas/function_schema.py +15 -6
pipecat/adapters/schemas/tools_schema.py +55 -7
pipecat/adapters/services/anthropic_adapter.py +22 -3
pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
pipecat/adapters/services/bedrock_adapter.py +22 -3
pipecat/adapters/services/gemini_adapter.py +16 -3
pipecat/adapters/services/open_ai_adapter.py +17 -2
pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
pipecat/audio/filters/base_audio_filter.py +30 -6
pipecat/audio/filters/koala_filter.py +37 -2
pipecat/audio/filters/krisp_filter.py +59 -6
pipecat/audio/filters/noisereduce_filter.py +37 -0
pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
pipecat/audio/mixers/base_audio_mixer.py +30 -7
pipecat/audio/mixers/soundfile_mixer.py +53 -6
pipecat/audio/resamplers/base_audio_resampler.py +17 -9
pipecat/audio/resamplers/resampy_resampler.py +26 -1
pipecat/audio/resamplers/soxr_resampler.py +32 -1
pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
pipecat/audio/utils.py +194 -1
pipecat/audio/vad/silero.py +60 -3
pipecat/audio/vad/vad_analyzer.py +114 -30
pipecat/clocks/base_clock.py +19 -0
pipecat/clocks/system_clock.py +25 -0
pipecat/extensions/voicemail/__init__.py +0 -0
pipecat/extensions/voicemail/voicemail_detector.py +707 -0
pipecat/frames/frames.py +590 -156
pipecat/metrics/metrics.py +64 -1
pipecat/observers/base_observer.py +58 -19
pipecat/observers/loggers/debug_log_observer.py +56 -64
pipecat/observers/loggers/llm_log_observer.py +8 -1
pipecat/observers/loggers/transcription_log_observer.py +19 -7
pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
pipecat/observers/turn_tracking_observer.py +26 -1
pipecat/pipeline/base_pipeline.py +5 -7
pipecat/pipeline/base_task.py +52 -9
pipecat/pipeline/parallel_pipeline.py +121 -177
pipecat/pipeline/pipeline.py +129 -20
pipecat/pipeline/runner.py +50 -1
pipecat/pipeline/sync_parallel_pipeline.py +132 -32
pipecat/pipeline/task.py +263 -280
pipecat/pipeline/task_observer.py +85 -34
pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
pipecat/processors/aggregators/gated.py +25 -24
pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
pipecat/processors/aggregators/llm_response.py +398 -89
pipecat/processors/aggregators/openai_llm_context.py +161 -13
pipecat/processors/aggregators/sentence.py +25 -14
pipecat/processors/aggregators/user_response.py +28 -3
pipecat/processors/aggregators/vision_image_frame.py +24 -14
pipecat/processors/async_generator.py +28 -0
pipecat/processors/audio/audio_buffer_processor.py +78 -37
pipecat/processors/consumer_processor.py +25 -6
pipecat/processors/filters/frame_filter.py +23 -0
pipecat/processors/filters/function_filter.py +30 -0
pipecat/processors/filters/identity_filter.py +17 -2
pipecat/processors/filters/null_filter.py +24 -1
pipecat/processors/filters/stt_mute_filter.py +56 -21
pipecat/processors/filters/wake_check_filter.py +46 -3
pipecat/processors/filters/wake_notifier_filter.py +21 -3
pipecat/processors/frame_processor.py +488 -131
pipecat/processors/frameworks/langchain.py +38 -3
pipecat/processors/frameworks/rtvi.py +719 -34
pipecat/processors/gstreamer/pipeline_source.py +41 -0
pipecat/processors/idle_frame_processor.py +26 -3
pipecat/processors/logger.py +23 -0
pipecat/processors/metrics/frame_processor_metrics.py +77 -4
pipecat/processors/metrics/sentry.py +42 -4
pipecat/processors/producer_processor.py +34 -14
pipecat/processors/text_transformer.py +22 -10
pipecat/processors/transcript_processor.py +48 -29
pipecat/processors/user_idle_processor.py +31 -21
pipecat/runner/__init__.py +1 -0
pipecat/runner/daily.py +132 -0
pipecat/runner/livekit.py +148 -0
pipecat/runner/run.py +543 -0
pipecat/runner/types.py +67 -0
pipecat/runner/utils.py +515 -0
pipecat/serializers/base_serializer.py +42 -0
pipecat/serializers/exotel.py +17 -6
pipecat/serializers/genesys.py +95 -0
pipecat/serializers/livekit.py +33 -0
pipecat/serializers/plivo.py +16 -15
pipecat/serializers/protobuf.py +37 -1
pipecat/serializers/telnyx.py +18 -17
pipecat/serializers/twilio.py +32 -16
pipecat/services/ai_service.py +5 -3
pipecat/services/anthropic/llm.py +113 -43
pipecat/services/assemblyai/models.py +63 -5
pipecat/services/assemblyai/stt.py +64 -11
pipecat/services/asyncai/__init__.py +0 -0
pipecat/services/asyncai/tts.py +501 -0
pipecat/services/aws/llm.py +185 -111
pipecat/services/aws/stt.py +217 -23
pipecat/services/aws/tts.py +118 -52
pipecat/services/aws/utils.py +101 -5
pipecat/services/aws_nova_sonic/aws.py +82 -64
pipecat/services/aws_nova_sonic/context.py +15 -6
pipecat/services/azure/common.py +10 -2
pipecat/services/azure/image.py +32 -0
pipecat/services/azure/llm.py +9 -7
pipecat/services/azure/stt.py +65 -2
pipecat/services/azure/tts.py +154 -23
pipecat/services/cartesia/stt.py +125 -8
pipecat/services/cartesia/tts.py +102 -38
pipecat/services/cerebras/llm.py +15 -23
pipecat/services/deepgram/stt.py +19 -11
pipecat/services/deepgram/tts.py +36 -0
pipecat/services/deepseek/llm.py +14 -23
pipecat/services/elevenlabs/tts.py +330 -64
pipecat/services/fal/image.py +43 -0
pipecat/services/fal/stt.py +48 -10
pipecat/services/fireworks/llm.py +14 -21
pipecat/services/fish/tts.py +109 -9
pipecat/services/gemini_multimodal_live/__init__.py +1 -0
pipecat/services/gemini_multimodal_live/events.py +83 -2
pipecat/services/gemini_multimodal_live/file_api.py +189 -0
pipecat/services/gemini_multimodal_live/gemini.py +218 -21
pipecat/services/gladia/config.py +17 -10
pipecat/services/gladia/stt.py +82 -36
pipecat/services/google/frames.py +40 -0
pipecat/services/google/google.py +2 -0
pipecat/services/google/image.py +39 -2
pipecat/services/google/llm.py +176 -58
pipecat/services/google/llm_openai.py +26 -4
pipecat/services/google/llm_vertex.py +37 -15
pipecat/services/google/rtvi.py +41 -0
pipecat/services/google/stt.py +65 -17
pipecat/services/google/test-google-chirp.py +45 -0
pipecat/services/google/tts.py +390 -19
pipecat/services/grok/llm.py +8 -6
pipecat/services/groq/llm.py +8 -6
pipecat/services/groq/stt.py +13 -9
pipecat/services/groq/tts.py +40 -0
pipecat/services/hamsa/__init__.py +9 -0
pipecat/services/hamsa/stt.py +241 -0
pipecat/services/heygen/__init__.py +5 -0
pipecat/services/heygen/api.py +281 -0
pipecat/services/heygen/client.py +620 -0
pipecat/services/heygen/video.py +338 -0
pipecat/services/image_service.py +5 -3
pipecat/services/inworld/__init__.py +1 -0
pipecat/services/inworld/tts.py +592 -0
pipecat/services/llm_service.py +127 -45
pipecat/services/lmnt/tts.py +80 -7
pipecat/services/mcp_service.py +85 -44
pipecat/services/mem0/memory.py +42 -13
pipecat/services/minimax/tts.py +74 -15
pipecat/services/mistral/__init__.py +0 -0
pipecat/services/mistral/llm.py +185 -0
pipecat/services/moondream/vision.py +55 -10
pipecat/services/neuphonic/tts.py +275 -48
pipecat/services/nim/llm.py +8 -6
pipecat/services/ollama/llm.py +27 -7
pipecat/services/openai/base_llm.py +54 -16
pipecat/services/openai/image.py +30 -0
pipecat/services/openai/llm.py +7 -5
pipecat/services/openai/stt.py +13 -9
pipecat/services/openai/tts.py +42 -10
pipecat/services/openai_realtime_beta/azure.py +11 -9
pipecat/services/openai_realtime_beta/context.py +7 -5
pipecat/services/openai_realtime_beta/events.py +10 -7
pipecat/services/openai_realtime_beta/openai.py +37 -18
pipecat/services/openpipe/llm.py +30 -24
pipecat/services/openrouter/llm.py +9 -7
pipecat/services/perplexity/llm.py +15 -19
pipecat/services/piper/tts.py +26 -12
pipecat/services/playht/tts.py +227 -65
pipecat/services/qwen/llm.py +8 -6
pipecat/services/rime/tts.py +128 -17
pipecat/services/riva/stt.py +160 -22
pipecat/services/riva/tts.py +67 -2
pipecat/services/sambanova/llm.py +19 -17
pipecat/services/sambanova/stt.py +14 -8
pipecat/services/sarvam/tts.py +60 -13
pipecat/services/simli/video.py +82 -21
pipecat/services/soniox/__init__.py +0 -0
pipecat/services/soniox/stt.py +398 -0
pipecat/services/speechmatics/stt.py +29 -17
pipecat/services/stt_service.py +47 -11
pipecat/services/tavus/video.py +94 -25
pipecat/services/together/llm.py +8 -6
pipecat/services/tts_service.py +77 -53
pipecat/services/ultravox/stt.py +46 -43
pipecat/services/vision_service.py +5 -3
pipecat/services/websocket_service.py +12 -11
pipecat/services/whisper/base_stt.py +58 -12
pipecat/services/whisper/stt.py +69 -58
pipecat/services/xtts/tts.py +59 -2
pipecat/sync/base_notifier.py +19 -0
pipecat/sync/event_notifier.py +24 -0
pipecat/tests/utils.py +73 -5
pipecat/transcriptions/language.py +24 -0
pipecat/transports/base_input.py +112 -8
pipecat/transports/base_output.py +235 -13
pipecat/transports/base_transport.py +119 -0
pipecat/transports/local/audio.py +76 -0
pipecat/transports/local/tk.py +84 -0
pipecat/transports/network/fastapi_websocket.py +174 -15
pipecat/transports/network/small_webrtc.py +383 -39
pipecat/transports/network/webrtc_connection.py +214 -8
pipecat/transports/network/websocket_client.py +171 -1
pipecat/transports/network/websocket_server.py +147 -9
pipecat/transports/services/daily.py +792 -70
pipecat/transports/services/helpers/daily_rest.py +122 -129
pipecat/transports/services/livekit.py +339 -4
pipecat/transports/services/tavus.py +273 -38
pipecat/utils/asyncio/task_manager.py +92 -186
pipecat/utils/base_object.py +83 -1
pipecat/utils/network.py +2 -0
pipecat/utils/string.py +114 -58
pipecat/utils/text/base_text_aggregator.py +44 -13
pipecat/utils/text/base_text_filter.py +46 -0
pipecat/utils/text/markdown_text_filter.py +70 -14
pipecat/utils/text/pattern_pair_aggregator.py +18 -14
pipecat/utils/text/simple_text_aggregator.py +43 -2
pipecat/utils/text/skip_tags_aggregator.py +21 -13
pipecat/utils/time.py +36 -0
pipecat/utils/tracing/class_decorators.py +32 -7
pipecat/utils/tracing/conversation_context_provider.py +12 -2
pipecat/utils/tracing/service_attributes.py +80 -64
pipecat/utils/tracing/service_decorators.py +48 -21
pipecat/utils/tracing/setup.py +13 -7
pipecat/utils/tracing/turn_context_provider.py +12 -2
pipecat/utils/tracing/turn_trace_observer.py +27 -0
pipecat/utils/utils.py +14 -14
dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
pipecat/examples/daily_runner.py +0 -64
pipecat/examples/run.py +0 -265
pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
pipecat/utils/asyncio/watchdog_event.py +0 -42
pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
pipecat/utils/asyncio/watchdog_queue.py +0 -48
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
/pipecat/{examples → extensions}/__init__.py +0 -0

pipecat/services/tavus/video.py CHANGED Viewed

@@ -4,7 +4,11 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
-"""This module implements Tavus as a sink transport layer"""
+"""Tavus video service implementation for avatar-based video generation.
+This module implements Tavus as a sink transport layer, providing video
+avatar functionality through Tavus's streaming API.
+"""
 import asyncio
 from typing import Optional
@@ -13,41 +17,37 @@ import aiohttp
 from daily.daily import AudioData, VideoFrame
 from loguru import logger
-from pipecat.audio.utils import create_default_resampler
+from pipecat.audio.utils import create_stream_resampler
 from pipecat.frames.frames import (
+    BotStartedSpeakingFrame,
     CancelFrame,
     EndFrame,
     Frame,
     OutputAudioRawFrame,
     OutputImageRawFrame,
+    OutputTransportReadyFrame,
+    SpeechOutputAudioRawFrame,
     StartFrame,
     StartInterruptionFrame,
     TTSAudioRawFrame,
+    TTSStartedFrame,
 )
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessorSetup
 from pipecat.services.ai_service import AIService
 from pipecat.transports.services.tavus import TavusCallbacks, TavusParams, TavusTransportClient
-from pipecat.utils.asyncio.watchdog_queue import WatchdogQueue
 class TavusVideoService(AIService):
-    """
-    Service class that proxies audio to Tavus and receives both audio and video in return.
-    It uses the `TavusTransportClient` to manage the session and handle communication. When
-    audio is sent, Tavus responds with both audio and video streams, which are then routed
-    through Pipecat’s media pipeline.
-    In use cases such as with `DailyTransport`, this results in two distinct virtual rooms:
-        - **Tavus room**: Contains the Tavus Avatar and the Pipecat Bot.
-        - **User room**: Contains the Pipecat Bot and the user.
-    Args:
-        api_key (str): Tavus API key used for authentication.
-        replica_id (str): ID of the Tavus voice replica to use for speech synthesis.
-        persona_id (str): ID of the Tavus persona. Defaults to "pipecat-stream" to use the Pipecat TTS voice.
-        session (aiohttp.ClientSession): Async HTTP session used for communication with Tavus.
-        **kwargs: Additional arguments passed to the parent `AIService` class.
+    """Service that proxies audio to Tavus and receives audio and video in return.
+    Uses the TavusTransportClient to manage sessions and handle communication.
+    When audio is sent, Tavus responds with both audio and video streams, which
+    are routed through Pipecat's media pipeline.
+    In use cases with DailyTransport, this creates two distinct virtual rooms:
+    - Tavus room: Contains the Tavus Avatar and the Pipecat Bot
+    - User room: Contains the Pipecat Bot and the user
     """
     def __init__(
@@ -59,6 +59,15 @@ class TavusVideoService(AIService):
         session: aiohttp.ClientSession,
         **kwargs,
     ) -> None:
+        """Initialize the Tavus video service.
+        Args:
+            api_key: Tavus API key used for authentication.
+            replica_id: ID of the Tavus voice replica to use for speech synthesis.
+            persona_id: ID of the Tavus persona. Defaults to "pipecat-stream" for Pipecat TTS voice.
+            session: Async HTTP session used for communication with Tavus.
+            **kwargs: Additional arguments passed to the parent AIService class.
+        """
         super().__init__(**kwargs)
         self._api_key = api_key
         self._session = session
@@ -69,14 +78,20 @@ class TavusVideoService(AIService):
         self._client: Optional[TavusTransportClient] = None
         self._conversation_id: str
-        self._resampler = create_default_resampler()
+        self._resampler = create_stream_resampler()
         self._audio_buffer = bytearray()
         self._send_task: Optional[asyncio.Task] = None
         # This is the custom track destination expected by Tavus
         self._transport_destination: Optional[str] = "stream"
+        self._transport_ready = False
     async def setup(self, setup: FrameProcessorSetup):
+        """Set up the Tavus video service.
+        Args:
+            setup: Frame processor setup configuration.
+        """
         await super().setup(setup)
         callbacks = TavusCallbacks(
             on_participant_joined=self._on_participant_joined,
@@ -99,15 +114,18 @@ class TavusVideoService(AIService):
         await self._client.setup(setup)
     async def cleanup(self):
+        """Clean up the service and release resources."""
         await super().cleanup()
         await self._client.cleanup()
         self._client = None
     async def _on_participant_left(self, participant, reason):
+        """Handle participant leaving the session."""
         participant_id = participant["id"]
         logger.info(f"Participant left {participant_id}, reason: {reason}")
     async def _on_participant_joined(self, participant):
+        """Handle participant joining the session."""
         participant_id = participant["id"]
         logger.info(f"Participant joined {participant_id}")
         if not self._other_participant_has_joined:
@@ -124,32 +142,51 @@ class TavusVideoService(AIService):
     async def _on_participant_video_frame(
         self, participant_id: str, video_frame: VideoFrame, video_source: str
     ):
+        """Handle incoming video frames from participants."""
         frame = OutputImageRawFrame(
             image=video_frame.buffer,
             size=(video_frame.width, video_frame.height),
             format=video_frame.color_format,
         )
         frame.transport_source = video_source
-        await self.push_frame(frame)
+        if self._transport_ready:
+            await self.push_frame(frame)
     async def _on_participant_audio_data(
         self, participant_id: str, audio: AudioData, audio_source: str
     ):
-        frame = OutputAudioRawFrame(
+        """Handle incoming audio data from participants."""
+        frame = SpeechOutputAudioRawFrame(
             audio=audio.audio_frames,
             sample_rate=audio.sample_rate,
             num_channels=audio.num_channels,
         )
         frame.transport_source = audio_source
-        await self.push_frame(frame)
+        if self._transport_ready:
+            await self.push_frame(frame)
     def can_generate_metrics(self) -> bool:
+        """Check if this service can generate processing metrics.
+        Returns:
+            True, as Tavus service supports metrics generation.
+        """
         return True
     async def get_persona_name(self) -> str:
+        """Get the name of the current persona.
+        Returns:
+            The persona name from the Tavus client.
+        """
         return await self._client.get_persona_name()
     async def start(self, frame: StartFrame):
+        """Start the Tavus video service.
+        Args:
+            frame: The start frame containing initialization parameters.
+        """
         await super().start(frame)
         await self._client.start(frame)
         if self._transport_destination:
@@ -157,16 +194,32 @@ class TavusVideoService(AIService):
         await self._create_send_task()
     async def stop(self, frame: EndFrame):
+        """Stop the Tavus video service.
+        Args:
+            frame: The end frame.
+        """
         await super().stop(frame)
         await self._end_conversation()
         await self._cancel_send_task()
     async def cancel(self, frame: CancelFrame):
+        """Cancel the Tavus video service.
+        Args:
+            frame: The cancel frame.
+        """
         await super().cancel(frame)
         await self._end_conversation()
         await self._cancel_send_task()
     async def process_frame(self, frame: Frame, direction: FrameDirection):
+        """Process frames through the service.
+        Args:
+            frame: The frame to process.
+            direction: The direction of frame processing.
+        """
         await super().process_frame(frame, direction)
         if isinstance(frame, StartInterruptionFrame):
@@ -174,29 +227,44 @@ class TavusVideoService(AIService):
             await self.push_frame(frame, direction)
         elif isinstance(frame, TTSAudioRawFrame):
             await self._handle_audio_frame(frame)
+        elif isinstance(frame, OutputTransportReadyFrame):
+            self._transport_ready = True
+            await self.push_frame(frame, direction)
+        elif isinstance(frame, TTSStartedFrame):
+            await self.start_ttfb_metrics()
+        elif isinstance(frame, BotStartedSpeakingFrame):
+            # We constantly receive audio through WebRTC, but most of the time it is silence.
+            # As soon as we receive actual audio, the base output transport will create a
+            # BotStartedSpeakingFrame, which we can use as a signal for the TTFB metrics.
+            await self.stop_ttfb_metrics()
         else:
             await self.push_frame(frame, direction)
     async def _handle_interruptions(self):
+        """Handle interruption events by resetting send tasks and notifying client."""
         await self._cancel_send_task()
         await self._create_send_task()
         await self._client.send_interrupt_message()
     async def _end_conversation(self):
+        """End the current conversation and reset state."""
         await self._client.stop()
         self._other_participant_has_joined = False
     async def _create_send_task(self):
+        """Create the audio sending task if it doesn't exist."""
         if not self._send_task:
-            self._queue = WatchdogQueue(self.task_manager)
+            self._queue = asyncio.Queue()
             self._send_task = self.create_task(self._send_task_handler())
     async def _cancel_send_task(self):
+        """Cancel the audio sending task if it exists."""
         if self._send_task:
             await self.cancel_task(self._send_task)
             self._send_task = None
     async def _handle_audio_frame(self, frame: OutputAudioRawFrame):
+        """Process audio frames for sending to Tavus."""
         sample_rate = self._client.out_sample_rate
         # 40 ms of audio
         chunk_size = int((sample_rate * 2) / 25)
@@ -215,6 +283,7 @@ class TavusVideoService(AIService):
             self._audio_buffer = self._audio_buffer[chunk_size:]
     async def _send_task_handler(self):
+        """Handle sending audio frames to the Tavus client."""
         while True:
             frame = await self._queue.get()
             if isinstance(frame, OutputAudioRawFrame) and self._client:

pipecat/services/together/llm.py CHANGED Viewed

@@ -16,12 +16,6 @@ class TogetherLLMService(OpenAILLMService):
     This service extends OpenAILLMService to connect to Together.ai's API endpoint while
     maintaining full compatibility with OpenAI's interface and functionality.
-    Args:
-        api_key: The API key for accessing Together.ai's API.
-        base_url: The base URL for Together.ai API. Defaults to "https://api.together.xyz/v1".
-        model: The model identifier to use. Defaults to "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo".
-        **kwargs: Additional keyword arguments passed to OpenAILLMService.
     """
     def __init__(
@@ -32,6 +26,14 @@ class TogetherLLMService(OpenAILLMService):
         model: str = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
         **kwargs,
     ):
+        """Initialize Together.ai LLM service.
+        Args:
+            api_key: The API key for accessing Together.ai's API.
+            base_url: The base URL for Together.ai API. Defaults to "https://api.together.xyz/v1".
+            model: The model identifier to use. Defaults to "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo".
+            **kwargs: Additional keyword arguments passed to OpenAILLMService.
+        """
         super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
     def create_client(self, api_key=None, base_url=None, **kwargs):

pipecat/services/tts_service.py CHANGED Viewed

@@ -37,7 +37,6 @@ from pipecat.processors.frame_processor import FrameDirection
 from pipecat.services.ai_service import AIService
 from pipecat.services.websocket_service import WebsocketService
 from pipecat.transcriptions.language import Language
-from pipecat.utils.asyncio.watchdog_queue import WatchdogQueue
 from pipecat.utils.text.base_text_aggregator import BaseTextAggregator
 from pipecat.utils.text.base_text_filter import BaseTextFilter
 from pipecat.utils.text.simple_text_aggregator import SimpleTextAggregator
@@ -50,21 +49,6 @@ class TTSService(AIService):
     Provides common functionality for TTS services including text aggregation,
     filtering, audio generation, and frame management. Supports configurable
     sentence aggregation, silence insertion, and frame processing control.
-    Args:
-        aggregate_sentences: Whether to aggregate text into sentences before synthesis.
-        push_text_frames: Whether to push TextFrames and LLMFullResponseEndFrames.
-        push_stop_frames: Whether to automatically push TTSStoppedFrames.
-        stop_frame_timeout_s: Idle time before pushing TTSStoppedFrame when push_stop_frames is True.
-        push_silence_after_stop: Whether to push silence audio after TTSStoppedFrame.
-        silence_time_s: Duration of silence to push when push_silence_after_stop is True.
-        pause_frame_processing: Whether to pause frame processing during audio generation.
-        sample_rate: Output sample rate for generated audio.
-        text_aggregator: Custom text aggregator for processing incoming text.
-        text_filters: Sequence of text filters to apply after aggregation.
-        text_filter: Single text filter (deprecated, use text_filters).
-        transport_destination: Destination for generated audio frames.
-        **kwargs: Additional arguments passed to the parent AIService.
     """
     def __init__(
@@ -97,6 +81,27 @@ class TTSService(AIService):
         transport_destination: Optional[str] = None,
         **kwargs,
     ):
+        """Initialize the TTS service.
+        Args:
+            aggregate_sentences: Whether to aggregate text into sentences before synthesis.
+            push_text_frames: Whether to push TextFrames and LLMFullResponseEndFrames.
+            push_stop_frames: Whether to automatically push TTSStoppedFrames.
+            stop_frame_timeout_s: Idle time before pushing TTSStoppedFrame when push_stop_frames is True.
+            push_silence_after_stop: Whether to push silence audio after TTSStoppedFrame.
+            silence_time_s: Duration of silence to push when push_silence_after_stop is True.
+            pause_frame_processing: Whether to pause frame processing during audio generation.
+            sample_rate: Output sample rate for generated audio.
+            text_aggregator: Custom text aggregator for processing incoming text.
+            text_filters: Sequence of text filters to apply after aggregation.
+            text_filter: Single text filter (deprecated, use text_filters).
+                .. deprecated:: 0.0.59
+                    Use `text_filters` instead, which allows multiple filters.
+            transport_destination: Destination for generated audio frames.
+            **kwargs: Additional arguments passed to the parent AIService.
+        """
         super().__init__(**kwargs)
         self._aggregate_sentences: bool = aggregate_sentences
         self._push_text_frames: bool = push_text_frames
@@ -112,9 +117,10 @@ class TTSService(AIService):
         self._text_aggregator: BaseTextAggregator = text_aggregator or SimpleTextAggregator()
         self._text_filters: Sequence[BaseTextFilter] = text_filters or []
         self._transport_destination: Optional[str] = transport_destination
         self._tracing_enabled: bool = False
+        self._voice_config: Dict[str, Any] = {}
+        self._voice = None
+        self._voice_clone_params = None
         if text_filter:
             import warnings
@@ -225,6 +231,7 @@ class TTSService(AIService):
         self._sample_rate = self._init_sample_rate or frame.audio_out_sample_rate
         if self._push_stop_frames and not self._stop_frame_task:
             self._stop_frame_task = self.create_task(self._stop_frame_handler())
+        self._tracing_enabled = frame.enable_tracing
     async def stop(self, frame: EndFrame):
         """Stop the TTS service.
@@ -257,7 +264,7 @@ class TTSService(AIService):
                     self._settings[key] = self.language_to_service_language(value)
             elif key == "model":
                 self.set_model_name(value)
-            elif key == "voice":
+            elif key == "voice" or key == "voice_id":
                 self.set_voice(value)
             elif key == "text_filter":
                 for filter in self._text_filters:
@@ -268,9 +275,20 @@ class TTSService(AIService):
     async def say(self, text: str):
         """Immediately speak the provided text.
+        .. deprecated:: 0.0.79
+            Push a `TTSSpeakFrame` instead to ensure frame ordering is maintained.
         Args:
             text: The text to speak.
         """
+        import warnings
+        warnings.warn(
+            "`TTSService.say()` is deprecated. Push a `TTSSpeakFrame` instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         await self.queue_frame(TTSSpeakFrame(text))
     async def process_frame(self, frame: Frame, direction: FrameDirection):
@@ -433,7 +451,7 @@ class TTSService(AIService):
         while True:
             try:
                 frame = await asyncio.wait_for(
-                    self._stop_frame_queue.get(), self._stop_frame_timeout_s
+                    self._stop_frame_queue.get(), timeout=self._stop_frame_timeout_s
                 )
                 if isinstance(frame, TTSStartedFrame):
                     has_started = True
@@ -443,8 +461,6 @@ class TTSService(AIService):
                 if has_started:
                     await self.push_frame(TTSStoppedFrame())
                     has_started = False
-            finally:
-                self.reset_watchdog()
 class WordTTSService(TTSService):
@@ -452,12 +468,14 @@ class WordTTSService(TTSService):
     Word timestamps are useful to synchronize audio with text of the spoken
     words. This way only the spoken words are added to the conversation context.
-    Args:
-        **kwargs: Additional arguments passed to the parent TTSService.
     """
     def __init__(self, **kwargs):
+        """Initialize the Word TTS service.
+        Args:
+            **kwargs: Additional arguments passed to the parent TTSService.
+        """
         super().__init__(**kwargs)
         self._initial_word_timestamp = -1
         self._words_task = None
@@ -529,7 +547,7 @@ class WordTTSService(TTSService):
     def _create_words_task(self):
         if not self._words_task:
-            self._words_queue = WatchdogQueue(self.task_manager)
+            self._words_queue = asyncio.Queue()
             self._words_task = self.create_task(self._words_task_handler())
     async def _stop_words_task(self):
@@ -566,22 +584,23 @@ class WebsocketTTSService(TTSService, WebsocketService):
     Combines TTS functionality with websocket connectivity, providing automatic
     error handling and reconnection capabilities.
-    Args:
-        reconnect_on_error: Whether to automatically reconnect on websocket errors.
-        **kwargs: Additional arguments passed to parent classes.
     Event handlers:
         on_connection_error: Called when a websocket connection error occurs.
-    Example:
-        ```python
+    Example::
         @tts.event_handler("on_connection_error")
         async def on_connection_error(tts: TTSService, error: str):
             logger.error(f"TTS connection error: {error}")
-        ```
     """
     def __init__(self, *, reconnect_on_error: bool = True, **kwargs):
+        """Initialize the Websocket TTS service.
+        Args:
+            reconnect_on_error: Whether to automatically reconnect on websocket errors.
+            **kwargs: Additional arguments passed to parent classes.
+        """
         TTSService.__init__(self, **kwargs)
         WebsocketService.__init__(self, reconnect_on_error=reconnect_on_error, **kwargs)
         self._register_event_handler("on_connection_error")
@@ -596,12 +615,14 @@ class InterruptibleTTSService(WebsocketTTSService):
     Designed for TTS services that don't support word timestamps. Handles interruptions
     by reconnecting the websocket when the bot is speaking and gets interrupted.
-    Args:
-        **kwargs: Additional arguments passed to the parent WebsocketTTSService.
     """
     def __init__(self, **kwargs):
+        """Initialize the Interruptible TTS service.
+        Args:
+            **kwargs: Additional arguments passed to the parent WebsocketTTSService.
+        """
         super().__init__(**kwargs)
         # Indicates if the bot is speaking. If the bot is not speaking we don't
@@ -635,22 +656,23 @@ class WebsocketWordTTSService(WordTTSService, WebsocketService):
     Combines word timestamp functionality with websocket connectivity.
-    Args:
-        reconnect_on_error: Whether to automatically reconnect on websocket errors.
-        **kwargs: Additional arguments passed to parent classes.
     Event handlers:
         on_connection_error: Called when a websocket connection error occurs.
-    Example:
-        ```python
+    Example::
         @tts.event_handler("on_connection_error")
         async def on_connection_error(tts: TTSService, error: str):
             logger.error(f"TTS connection error: {error}")
-        ```
     """
     def __init__(self, *, reconnect_on_error: bool = True, **kwargs):
+        """Initialize the Websocket Word TTS service.
+        Args:
+            reconnect_on_error: Whether to automatically reconnect on websocket errors.
+            **kwargs: Additional arguments passed to parent classes.
+        """
         WordTTSService.__init__(self, **kwargs)
         WebsocketService.__init__(self, reconnect_on_error=reconnect_on_error, **kwargs)
         self._register_event_handler("on_connection_error")
@@ -665,12 +687,14 @@ class InterruptibleWordTTSService(WebsocketWordTTSService):
     For TTS services that support word timestamps but can't correlate generated
     audio with requested text. Handles interruptions by reconnecting when needed.
-    Args:
-        **kwargs: Additional arguments passed to the parent WebsocketWordTTSService.
     """
     def __init__(self, **kwargs):
+        """Initialize the Interruptible Word TTS service.
+        Args:
+            **kwargs: Additional arguments passed to the parent WebsocketWordTTSService.
+        """
         super().__init__(**kwargs)
         # Indicates if the bot is speaking. If the bot is not speaking we don't
@@ -713,12 +737,14 @@ class AudioContextWordTTSService(WebsocketWordTTSService):
     The audio received from the TTS will be played in context order. That is, if
     we requested audio for a context "A" and then audio for context "B", the
     audio from context ID "A" will be played first.
-    Args:
-        **kwargs: Additional arguments passed to the parent WebsocketWordTTSService.
     """
     def __init__(self, **kwargs):
+        """Initialize the Audio Context Word TTS service.
+        Args:
+            **kwargs: Additional arguments passed to the parent WebsocketWordTTSService.
+        """
         super().__init__(**kwargs)
         self._contexts: Dict[str, asyncio.Queue] = {}
         self._audio_context_task = None
@@ -792,7 +818,7 @@ class AudioContextWordTTSService(WebsocketWordTTSService):
             # Indicate no more audio contexts are available. this will end the
             # task cleanly after all contexts have been processed.
             await self._contexts_queue.put(None)
-            await self.wait_for_task(self._audio_context_task)
+            await self._audio_context_task
             self._audio_context_task = None
     async def cancel(self, frame: CancelFrame):
@@ -811,7 +837,7 @@ class AudioContextWordTTSService(WebsocketWordTTSService):
     def _create_audio_context_task(self):
         if not self._audio_context_task:
-            self._contexts_queue = WatchdogQueue(self.task_manager)
+            self._contexts_queue = asyncio.Queue()
             self._contexts: Dict[str, asyncio.Queue] = {}
             self._audio_context_task = self.create_task(self._audio_context_task_handler())
@@ -853,12 +879,10 @@ class AudioContextWordTTSService(WebsocketWordTTSService):
         while running:
             try:
                 frame = await asyncio.wait_for(queue.get(), timeout=AUDIO_CONTEXT_TIMEOUT)
-                self.reset_watchdog()
                 if frame:
                     await self.push_frame(frame)
                 running = frame is not None
             except asyncio.TimeoutError:
-                self.reset_watchdog()
                 # We didn't get audio, so let's consider this context finished.
                 logger.trace(f"{self} time out on audio context {context_id}")
                 break

dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl