PyPI - dv-pipecat-ai - Versions diffs - 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show

{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
pipecat/__init__.py +17 -0
pipecat/adapters/base_llm_adapter.py +36 -1
pipecat/adapters/schemas/direct_function.py +296 -0
pipecat/adapters/schemas/function_schema.py +15 -6
pipecat/adapters/schemas/tools_schema.py +55 -7
pipecat/adapters/services/anthropic_adapter.py +22 -3
pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
pipecat/adapters/services/bedrock_adapter.py +22 -3
pipecat/adapters/services/gemini_adapter.py +16 -3
pipecat/adapters/services/open_ai_adapter.py +17 -2
pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
pipecat/audio/filters/base_audio_filter.py +30 -6
pipecat/audio/filters/koala_filter.py +37 -2
pipecat/audio/filters/krisp_filter.py +59 -6
pipecat/audio/filters/noisereduce_filter.py +37 -0
pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
pipecat/audio/mixers/base_audio_mixer.py +30 -7
pipecat/audio/mixers/soundfile_mixer.py +53 -6
pipecat/audio/resamplers/base_audio_resampler.py +17 -9
pipecat/audio/resamplers/resampy_resampler.py +26 -1
pipecat/audio/resamplers/soxr_resampler.py +32 -1
pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
pipecat/audio/utils.py +194 -1
pipecat/audio/vad/silero.py +60 -3
pipecat/audio/vad/vad_analyzer.py +114 -30
pipecat/clocks/base_clock.py +19 -0
pipecat/clocks/system_clock.py +25 -0
pipecat/extensions/voicemail/__init__.py +0 -0
pipecat/extensions/voicemail/voicemail_detector.py +707 -0
pipecat/frames/frames.py +590 -156
pipecat/metrics/metrics.py +64 -1
pipecat/observers/base_observer.py +58 -19
pipecat/observers/loggers/debug_log_observer.py +56 -64
pipecat/observers/loggers/llm_log_observer.py +8 -1
pipecat/observers/loggers/transcription_log_observer.py +19 -7
pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
pipecat/observers/turn_tracking_observer.py +26 -1
pipecat/pipeline/base_pipeline.py +5 -7
pipecat/pipeline/base_task.py +52 -9
pipecat/pipeline/parallel_pipeline.py +121 -177
pipecat/pipeline/pipeline.py +129 -20
pipecat/pipeline/runner.py +50 -1
pipecat/pipeline/sync_parallel_pipeline.py +132 -32
pipecat/pipeline/task.py +263 -280
pipecat/pipeline/task_observer.py +85 -34
pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
pipecat/processors/aggregators/gated.py +25 -24
pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
pipecat/processors/aggregators/llm_response.py +398 -89
pipecat/processors/aggregators/openai_llm_context.py +161 -13
pipecat/processors/aggregators/sentence.py +25 -14
pipecat/processors/aggregators/user_response.py +28 -3
pipecat/processors/aggregators/vision_image_frame.py +24 -14
pipecat/processors/async_generator.py +28 -0
pipecat/processors/audio/audio_buffer_processor.py +78 -37
pipecat/processors/consumer_processor.py +25 -6
pipecat/processors/filters/frame_filter.py +23 -0
pipecat/processors/filters/function_filter.py +30 -0
pipecat/processors/filters/identity_filter.py +17 -2
pipecat/processors/filters/null_filter.py +24 -1
pipecat/processors/filters/stt_mute_filter.py +56 -21
pipecat/processors/filters/wake_check_filter.py +46 -3
pipecat/processors/filters/wake_notifier_filter.py +21 -3
pipecat/processors/frame_processor.py +488 -131
pipecat/processors/frameworks/langchain.py +38 -3
pipecat/processors/frameworks/rtvi.py +719 -34
pipecat/processors/gstreamer/pipeline_source.py +41 -0
pipecat/processors/idle_frame_processor.py +26 -3
pipecat/processors/logger.py +23 -0
pipecat/processors/metrics/frame_processor_metrics.py +77 -4
pipecat/processors/metrics/sentry.py +42 -4
pipecat/processors/producer_processor.py +34 -14
pipecat/processors/text_transformer.py +22 -10
pipecat/processors/transcript_processor.py +48 -29
pipecat/processors/user_idle_processor.py +31 -21
pipecat/runner/__init__.py +1 -0
pipecat/runner/daily.py +132 -0
pipecat/runner/livekit.py +148 -0
pipecat/runner/run.py +543 -0
pipecat/runner/types.py +67 -0
pipecat/runner/utils.py +515 -0
pipecat/serializers/base_serializer.py +42 -0
pipecat/serializers/exotel.py +17 -6
pipecat/serializers/genesys.py +95 -0
pipecat/serializers/livekit.py +33 -0
pipecat/serializers/plivo.py +16 -15
pipecat/serializers/protobuf.py +37 -1
pipecat/serializers/telnyx.py +18 -17
pipecat/serializers/twilio.py +32 -16
pipecat/services/ai_service.py +5 -3
pipecat/services/anthropic/llm.py +113 -43
pipecat/services/assemblyai/models.py +63 -5
pipecat/services/assemblyai/stt.py +64 -11
pipecat/services/asyncai/__init__.py +0 -0
pipecat/services/asyncai/tts.py +501 -0
pipecat/services/aws/llm.py +185 -111
pipecat/services/aws/stt.py +217 -23
pipecat/services/aws/tts.py +118 -52
pipecat/services/aws/utils.py +101 -5
pipecat/services/aws_nova_sonic/aws.py +82 -64
pipecat/services/aws_nova_sonic/context.py +15 -6
pipecat/services/azure/common.py +10 -2
pipecat/services/azure/image.py +32 -0
pipecat/services/azure/llm.py +9 -7
pipecat/services/azure/stt.py +65 -2
pipecat/services/azure/tts.py +154 -23
pipecat/services/cartesia/stt.py +125 -8
pipecat/services/cartesia/tts.py +102 -38
pipecat/services/cerebras/llm.py +15 -23
pipecat/services/deepgram/stt.py +19 -11
pipecat/services/deepgram/tts.py +36 -0
pipecat/services/deepseek/llm.py +14 -23
pipecat/services/elevenlabs/tts.py +330 -64
pipecat/services/fal/image.py +43 -0
pipecat/services/fal/stt.py +48 -10
pipecat/services/fireworks/llm.py +14 -21
pipecat/services/fish/tts.py +109 -9
pipecat/services/gemini_multimodal_live/__init__.py +1 -0
pipecat/services/gemini_multimodal_live/events.py +83 -2
pipecat/services/gemini_multimodal_live/file_api.py +189 -0
pipecat/services/gemini_multimodal_live/gemini.py +218 -21
pipecat/services/gladia/config.py +17 -10
pipecat/services/gladia/stt.py +82 -36
pipecat/services/google/frames.py +40 -0
pipecat/services/google/google.py +2 -0
pipecat/services/google/image.py +39 -2
pipecat/services/google/llm.py +176 -58
pipecat/services/google/llm_openai.py +26 -4
pipecat/services/google/llm_vertex.py +37 -15
pipecat/services/google/rtvi.py +41 -0
pipecat/services/google/stt.py +65 -17
pipecat/services/google/test-google-chirp.py +45 -0
pipecat/services/google/tts.py +390 -19
pipecat/services/grok/llm.py +8 -6
pipecat/services/groq/llm.py +8 -6
pipecat/services/groq/stt.py +13 -9
pipecat/services/groq/tts.py +40 -0
pipecat/services/hamsa/__init__.py +9 -0
pipecat/services/hamsa/stt.py +241 -0
pipecat/services/heygen/__init__.py +5 -0
pipecat/services/heygen/api.py +281 -0
pipecat/services/heygen/client.py +620 -0
pipecat/services/heygen/video.py +338 -0
pipecat/services/image_service.py +5 -3
pipecat/services/inworld/__init__.py +1 -0
pipecat/services/inworld/tts.py +592 -0
pipecat/services/llm_service.py +127 -45
pipecat/services/lmnt/tts.py +80 -7
pipecat/services/mcp_service.py +85 -44
pipecat/services/mem0/memory.py +42 -13
pipecat/services/minimax/tts.py +74 -15
pipecat/services/mistral/__init__.py +0 -0
pipecat/services/mistral/llm.py +185 -0
pipecat/services/moondream/vision.py +55 -10
pipecat/services/neuphonic/tts.py +275 -48
pipecat/services/nim/llm.py +8 -6
pipecat/services/ollama/llm.py +27 -7
pipecat/services/openai/base_llm.py +54 -16
pipecat/services/openai/image.py +30 -0
pipecat/services/openai/llm.py +7 -5
pipecat/services/openai/stt.py +13 -9
pipecat/services/openai/tts.py +42 -10
pipecat/services/openai_realtime_beta/azure.py +11 -9
pipecat/services/openai_realtime_beta/context.py +7 -5
pipecat/services/openai_realtime_beta/events.py +10 -7
pipecat/services/openai_realtime_beta/openai.py +37 -18
pipecat/services/openpipe/llm.py +30 -24
pipecat/services/openrouter/llm.py +9 -7
pipecat/services/perplexity/llm.py +15 -19
pipecat/services/piper/tts.py +26 -12
pipecat/services/playht/tts.py +227 -65
pipecat/services/qwen/llm.py +8 -6
pipecat/services/rime/tts.py +128 -17
pipecat/services/riva/stt.py +160 -22
pipecat/services/riva/tts.py +67 -2
pipecat/services/sambanova/llm.py +19 -17
pipecat/services/sambanova/stt.py +14 -8
pipecat/services/sarvam/tts.py +60 -13
pipecat/services/simli/video.py +82 -21
pipecat/services/soniox/__init__.py +0 -0
pipecat/services/soniox/stt.py +398 -0
pipecat/services/speechmatics/stt.py +29 -17
pipecat/services/stt_service.py +47 -11
pipecat/services/tavus/video.py +94 -25
pipecat/services/together/llm.py +8 -6
pipecat/services/tts_service.py +77 -53
pipecat/services/ultravox/stt.py +46 -43
pipecat/services/vision_service.py +5 -3
pipecat/services/websocket_service.py +12 -11
pipecat/services/whisper/base_stt.py +58 -12
pipecat/services/whisper/stt.py +69 -58
pipecat/services/xtts/tts.py +59 -2
pipecat/sync/base_notifier.py +19 -0
pipecat/sync/event_notifier.py +24 -0
pipecat/tests/utils.py +73 -5
pipecat/transcriptions/language.py +24 -0
pipecat/transports/base_input.py +112 -8
pipecat/transports/base_output.py +235 -13
pipecat/transports/base_transport.py +119 -0
pipecat/transports/local/audio.py +76 -0
pipecat/transports/local/tk.py +84 -0
pipecat/transports/network/fastapi_websocket.py +174 -15
pipecat/transports/network/small_webrtc.py +383 -39
pipecat/transports/network/webrtc_connection.py +214 -8
pipecat/transports/network/websocket_client.py +171 -1
pipecat/transports/network/websocket_server.py +147 -9
pipecat/transports/services/daily.py +792 -70
pipecat/transports/services/helpers/daily_rest.py +122 -129
pipecat/transports/services/livekit.py +339 -4
pipecat/transports/services/tavus.py +273 -38
pipecat/utils/asyncio/task_manager.py +92 -186
pipecat/utils/base_object.py +83 -1
pipecat/utils/network.py +2 -0
pipecat/utils/string.py +114 -58
pipecat/utils/text/base_text_aggregator.py +44 -13
pipecat/utils/text/base_text_filter.py +46 -0
pipecat/utils/text/markdown_text_filter.py +70 -14
pipecat/utils/text/pattern_pair_aggregator.py +18 -14
pipecat/utils/text/simple_text_aggregator.py +43 -2
pipecat/utils/text/skip_tags_aggregator.py +21 -13
pipecat/utils/time.py +36 -0
pipecat/utils/tracing/class_decorators.py +32 -7
pipecat/utils/tracing/conversation_context_provider.py +12 -2
pipecat/utils/tracing/service_attributes.py +80 -64
pipecat/utils/tracing/service_decorators.py +48 -21
pipecat/utils/tracing/setup.py +13 -7
pipecat/utils/tracing/turn_context_provider.py +12 -2
pipecat/utils/tracing/turn_trace_observer.py +27 -0
pipecat/utils/utils.py +14 -14
dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
pipecat/examples/daily_runner.py +0 -64
pipecat/examples/run.py +0 -265
pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
pipecat/utils/asyncio/watchdog_event.py +0 -42
pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
pipecat/utils/asyncio/watchdog_queue.py +0 -48
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
/pipecat/{examples → extensions}/__init__.py +0 -0

pipecat/services/google/stt.py CHANGED Viewed

@@ -4,12 +4,18 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""Google Cloud Speech-to-Text V2 service implementation for Pipecat.
+This module provides a Google Cloud Speech-to-Text V2 service with streaming
+support, enabling real-time speech recognition with features like automatic
+punctuation, voice activity detection, and multi-language support.
+"""
 import asyncio
 import json
 import os
 import time
-from pipecat.utils.asyncio.watchdog_async_iterator import WatchdogAsyncIterator
 from pipecat.utils.tracing.service_decorators import traced_stt
 # Suppress gRPC fork warnings
@@ -353,9 +359,15 @@ class GoogleSTTService(STTService):
     Provides real-time speech recognition using Google Cloud's Speech-to-Text V2 API
     with streaming support. Handles audio transcription and optional voice activity detection.
+    Implements automatic stream reconnection to handle Google's 4-minute streaming limit.
-    Attributes:
+    Parameters:
         InputParams: Configuration parameters for the STT service.
+        STREAMING_LIMIT: Google Cloud's streaming limit in milliseconds (4 minutes).
+    Raises:
+        ValueError: If neither credentials nor credentials_path is provided.
+        ValueError: If project ID is not found in credentials.
     """
     # Google Cloud's STT service has a connection time limit of 5 minutes per stream.
@@ -367,7 +379,7 @@ class GoogleSTTService(STTService):
     class InputParams(BaseModel):
         """Configuration parameters for Google Speech-to-Text.
-        Attributes:
+        Parameters:
             languages: Single language or list of recognition languages. First language is primary.
             model: Speech recognition model to use.
             use_separate_recognition_per_channel: Process each audio channel separately.
@@ -396,13 +408,25 @@ class GoogleSTTService(STTService):
         @field_validator("languages", mode="before")
         @classmethod
         def validate_languages(cls, v) -> List[Language]:
+            """Ensure languages is always a list.
+            Args:
+                v: Single Language enum or list of Language enums.
+            Returns:
+                List[Language]: List of configured languages.
+            """
             if isinstance(v, Language):
                 return [v]
             return v
         @property
         def language_list(self) -> List[Language]:
-            """Get languages as a guaranteed list."""
+            """Get languages as a guaranteed list.
+            Returns:
+                List[Language]: List of configured languages.
+            """
             assert isinstance(self.languages, list)
             return self.languages
@@ -425,10 +449,6 @@ class GoogleSTTService(STTService):
             sample_rate: Audio sample rate in Hertz.
             params: Configuration parameters for the service.
             **kwargs: Additional arguments passed to STTService.
-        Raises:
-            ValueError: If neither credentials nor credentials_path is provided.
-            ValueError: If project ID is not found in credentials.
         """
         super().__init__(sample_rate=sample_rate, **kwargs)
@@ -501,6 +521,11 @@ class GoogleSTTService(STTService):
         }
     def can_generate_metrics(self) -> bool:
+        """Check if the service can generate metrics.
+        Returns:
+            bool: True, as this service supports metrics generation.
+        """
         return True
     def language_to_service_language(self, language: Language | List[Language]) -> str | List[str]:
@@ -548,7 +573,11 @@ class GoogleSTTService(STTService):
         await self._reconnect_if_needed()
     async def set_model(self, model: str):
-        """Update the service's recognition model."""
+        """Update the service's recognition model.
+        Args:
+            model: The new recognition model to use.
+        """
         logger.debug(f"Switching STT model to: {model}")
         await super().set_model(model)
         self._settings["model"] = model
@@ -556,14 +585,29 @@ class GoogleSTTService(STTService):
         await self._reconnect_if_needed()
     async def start(self, frame: StartFrame):
+        """Start the STT service and establish connection.
+        Args:
+            frame: The start frame triggering the service start.
+        """
         await super().start(frame)
         await self._connect()
     async def stop(self, frame: EndFrame):
+        """Stop the STT service and clean up resources.
+        Args:
+            frame: The end frame triggering the service stop.
+        """
         await super().stop(frame)
         await self._disconnect()
     async def cancel(self, frame: CancelFrame):
+        """Cancel the STT service and clean up resources.
+        Args:
+            frame: The cancel frame triggering the service cancellation.
+        """
         await super().cancel(frame)
         await self._disconnect()
@@ -585,7 +629,7 @@ class GoogleSTTService(STTService):
         """Update service options dynamically.
         Args:
-            languages: New list of recongition languages.
+            languages: New list of recognition languages.
             model: New recognition model.
             enable_automatic_punctuation: Enable/disable automatic punctuation.
             enable_spoken_punctuation: Enable/disable spoken punctuation.
@@ -736,7 +780,6 @@ class GoogleSTTService(STTService):
                     if self._request_queue.empty():
                         # wait for 10ms in case we don't have audio
                         await asyncio.sleep(0.01)
-                        self.reset_watchdog()
                         continue
                     # Start bi-directional streaming
@@ -767,7 +810,14 @@ class GoogleSTTService(STTService):
             await self.push_frame(ErrorFrame(str(e)))
     async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
-        """Process an audio chunk for STT transcription."""
+        """Process an audio chunk for STT transcription.
+        Args:
+            audio: Raw audio bytes to transcribe.
+        Yields:
+            Frame: None (actual transcription frames are pushed via internal processing).
+        """
         if self._streaming_task:
             # Queue the audio data
             await self.start_ttfb_metrics()
@@ -784,9 +834,7 @@ class GoogleSTTService(STTService):
     async def _process_responses(self, streaming_recognize):
         """Process streaming recognition responses."""
         try:
-            async for response in WatchdogAsyncIterator(
-                streaming_recognize, manager=self.task_manager
-            ):
+            async for response in streaming_recognize:
                 # Check streaming limit
                 if (int(time.time() * 1000) - self._stream_start_time) > self.STREAMING_LIMIT:
                     logger.debug("Stream timeout reached in response processing")
@@ -810,7 +858,7 @@ class GoogleSTTService(STTService):
                         await self.push_frame(
                             TranscriptionFrame(
                                 transcript,
-                                "",
+                                self._user_id,
                                 time_now_iso8601(),
                                 primary_language,
                                 result=result,
@@ -828,7 +876,7 @@ class GoogleSTTService(STTService):
                         await self.push_frame(
                             InterimTranscriptionFrame(
                                 transcript,
-                                "",
+                                self._user_id,
                                 time_now_iso8601(),
                                 primary_language,
                                 result=result,

pipecat/services/google/test-google-chirp.py ADDED Viewed

@@ -0,0 +1,45 @@
+import asyncio
+import os
+from pipecat.frames.frames import TTSAudioRawFrame
+from pipecat.services.google.tts import GoogleTTSService
+async def test_chirp_tts():
+    # Get credentials from environment variable
+    credentials_path = (
+        "/Users/kalicharanvemuru/Documents/Code/pipecat/examples/ringg-chatbot/creds.json"
+    )
+    if not credentials_path or not os.path.exists(credentials_path):
+        raise ValueError(
+            "Please set GOOGLE_APPLICATION_CREDENTIALS environment variable to your service account key file"
+        )
+    # Initialize the TTS service with Chirp voice
+    tts = GoogleTTSService(
+        credentials_path=credentials_path,
+        voice_id="en-US-Chirp3-HD-Charon",  # Using Chirp3 HD Charon voice
+        sample_rate=24000,
+    )
+    # Test text
+    test_text = "Hello, this is a test of the Google TTS service with Chirp voice."
+    print(f"Testing TTS with text: {test_text}")
+    # Generate speech
+    try:
+        async for frame in tts.run_tts(test_text):
+            if isinstance(frame, TTSAudioRawFrame):
+                print(f"Received audio chunk of size: {len(frame.audio)} bytes")
+            else:
+                print(f"Received frame: {frame.__class__.__name__}")
+        print("TTS generation completed successfully!")
+    except Exception as e:
+        print(f"Error during TTS generation: {str(e)}")
+if __name__ == "__main__":
+    asyncio.run(test_chirp_tts())

dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl