PyPI - dv-pipecat-ai - Versions diffs - 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show

{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
pipecat/__init__.py +17 -0
pipecat/adapters/base_llm_adapter.py +36 -1
pipecat/adapters/schemas/direct_function.py +296 -0
pipecat/adapters/schemas/function_schema.py +15 -6
pipecat/adapters/schemas/tools_schema.py +55 -7
pipecat/adapters/services/anthropic_adapter.py +22 -3
pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
pipecat/adapters/services/bedrock_adapter.py +22 -3
pipecat/adapters/services/gemini_adapter.py +16 -3
pipecat/adapters/services/open_ai_adapter.py +17 -2
pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
pipecat/audio/filters/base_audio_filter.py +30 -6
pipecat/audio/filters/koala_filter.py +37 -2
pipecat/audio/filters/krisp_filter.py +59 -6
pipecat/audio/filters/noisereduce_filter.py +37 -0
pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
pipecat/audio/mixers/base_audio_mixer.py +30 -7
pipecat/audio/mixers/soundfile_mixer.py +53 -6
pipecat/audio/resamplers/base_audio_resampler.py +17 -9
pipecat/audio/resamplers/resampy_resampler.py +26 -1
pipecat/audio/resamplers/soxr_resampler.py +32 -1
pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
pipecat/audio/utils.py +194 -1
pipecat/audio/vad/silero.py +60 -3
pipecat/audio/vad/vad_analyzer.py +114 -30
pipecat/clocks/base_clock.py +19 -0
pipecat/clocks/system_clock.py +25 -0
pipecat/extensions/voicemail/__init__.py +0 -0
pipecat/extensions/voicemail/voicemail_detector.py +707 -0
pipecat/frames/frames.py +590 -156
pipecat/metrics/metrics.py +64 -1
pipecat/observers/base_observer.py +58 -19
pipecat/observers/loggers/debug_log_observer.py +56 -64
pipecat/observers/loggers/llm_log_observer.py +8 -1
pipecat/observers/loggers/transcription_log_observer.py +19 -7
pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
pipecat/observers/turn_tracking_observer.py +26 -1
pipecat/pipeline/base_pipeline.py +5 -7
pipecat/pipeline/base_task.py +52 -9
pipecat/pipeline/parallel_pipeline.py +121 -177
pipecat/pipeline/pipeline.py +129 -20
pipecat/pipeline/runner.py +50 -1
pipecat/pipeline/sync_parallel_pipeline.py +132 -32
pipecat/pipeline/task.py +263 -280
pipecat/pipeline/task_observer.py +85 -34
pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
pipecat/processors/aggregators/gated.py +25 -24
pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
pipecat/processors/aggregators/llm_response.py +398 -89
pipecat/processors/aggregators/openai_llm_context.py +161 -13
pipecat/processors/aggregators/sentence.py +25 -14
pipecat/processors/aggregators/user_response.py +28 -3
pipecat/processors/aggregators/vision_image_frame.py +24 -14
pipecat/processors/async_generator.py +28 -0
pipecat/processors/audio/audio_buffer_processor.py +78 -37
pipecat/processors/consumer_processor.py +25 -6
pipecat/processors/filters/frame_filter.py +23 -0
pipecat/processors/filters/function_filter.py +30 -0
pipecat/processors/filters/identity_filter.py +17 -2
pipecat/processors/filters/null_filter.py +24 -1
pipecat/processors/filters/stt_mute_filter.py +56 -21
pipecat/processors/filters/wake_check_filter.py +46 -3
pipecat/processors/filters/wake_notifier_filter.py +21 -3
pipecat/processors/frame_processor.py +488 -131
pipecat/processors/frameworks/langchain.py +38 -3
pipecat/processors/frameworks/rtvi.py +719 -34
pipecat/processors/gstreamer/pipeline_source.py +41 -0
pipecat/processors/idle_frame_processor.py +26 -3
pipecat/processors/logger.py +23 -0
pipecat/processors/metrics/frame_processor_metrics.py +77 -4
pipecat/processors/metrics/sentry.py +42 -4
pipecat/processors/producer_processor.py +34 -14
pipecat/processors/text_transformer.py +22 -10
pipecat/processors/transcript_processor.py +48 -29
pipecat/processors/user_idle_processor.py +31 -21
pipecat/runner/__init__.py +1 -0
pipecat/runner/daily.py +132 -0
pipecat/runner/livekit.py +148 -0
pipecat/runner/run.py +543 -0
pipecat/runner/types.py +67 -0
pipecat/runner/utils.py +515 -0
pipecat/serializers/base_serializer.py +42 -0
pipecat/serializers/exotel.py +17 -6
pipecat/serializers/genesys.py +95 -0
pipecat/serializers/livekit.py +33 -0
pipecat/serializers/plivo.py +16 -15
pipecat/serializers/protobuf.py +37 -1
pipecat/serializers/telnyx.py +18 -17
pipecat/serializers/twilio.py +32 -16
pipecat/services/ai_service.py +5 -3
pipecat/services/anthropic/llm.py +113 -43
pipecat/services/assemblyai/models.py +63 -5
pipecat/services/assemblyai/stt.py +64 -11
pipecat/services/asyncai/__init__.py +0 -0
pipecat/services/asyncai/tts.py +501 -0
pipecat/services/aws/llm.py +185 -111
pipecat/services/aws/stt.py +217 -23
pipecat/services/aws/tts.py +118 -52
pipecat/services/aws/utils.py +101 -5
pipecat/services/aws_nova_sonic/aws.py +82 -64
pipecat/services/aws_nova_sonic/context.py +15 -6
pipecat/services/azure/common.py +10 -2
pipecat/services/azure/image.py +32 -0
pipecat/services/azure/llm.py +9 -7
pipecat/services/azure/stt.py +65 -2
pipecat/services/azure/tts.py +154 -23
pipecat/services/cartesia/stt.py +125 -8
pipecat/services/cartesia/tts.py +102 -38
pipecat/services/cerebras/llm.py +15 -23
pipecat/services/deepgram/stt.py +19 -11
pipecat/services/deepgram/tts.py +36 -0
pipecat/services/deepseek/llm.py +14 -23
pipecat/services/elevenlabs/tts.py +330 -64
pipecat/services/fal/image.py +43 -0
pipecat/services/fal/stt.py +48 -10
pipecat/services/fireworks/llm.py +14 -21
pipecat/services/fish/tts.py +109 -9
pipecat/services/gemini_multimodal_live/__init__.py +1 -0
pipecat/services/gemini_multimodal_live/events.py +83 -2
pipecat/services/gemini_multimodal_live/file_api.py +189 -0
pipecat/services/gemini_multimodal_live/gemini.py +218 -21
pipecat/services/gladia/config.py +17 -10
pipecat/services/gladia/stt.py +82 -36
pipecat/services/google/frames.py +40 -0
pipecat/services/google/google.py +2 -0
pipecat/services/google/image.py +39 -2
pipecat/services/google/llm.py +176 -58
pipecat/services/google/llm_openai.py +26 -4
pipecat/services/google/llm_vertex.py +37 -15
pipecat/services/google/rtvi.py +41 -0
pipecat/services/google/stt.py +65 -17
pipecat/services/google/test-google-chirp.py +45 -0
pipecat/services/google/tts.py +390 -19
pipecat/services/grok/llm.py +8 -6
pipecat/services/groq/llm.py +8 -6
pipecat/services/groq/stt.py +13 -9
pipecat/services/groq/tts.py +40 -0
pipecat/services/hamsa/__init__.py +9 -0
pipecat/services/hamsa/stt.py +241 -0
pipecat/services/heygen/__init__.py +5 -0
pipecat/services/heygen/api.py +281 -0
pipecat/services/heygen/client.py +620 -0
pipecat/services/heygen/video.py +338 -0
pipecat/services/image_service.py +5 -3
pipecat/services/inworld/__init__.py +1 -0
pipecat/services/inworld/tts.py +592 -0
pipecat/services/llm_service.py +127 -45
pipecat/services/lmnt/tts.py +80 -7
pipecat/services/mcp_service.py +85 -44
pipecat/services/mem0/memory.py +42 -13
pipecat/services/minimax/tts.py +74 -15
pipecat/services/mistral/__init__.py +0 -0
pipecat/services/mistral/llm.py +185 -0
pipecat/services/moondream/vision.py +55 -10
pipecat/services/neuphonic/tts.py +275 -48
pipecat/services/nim/llm.py +8 -6
pipecat/services/ollama/llm.py +27 -7
pipecat/services/openai/base_llm.py +54 -16
pipecat/services/openai/image.py +30 -0
pipecat/services/openai/llm.py +7 -5
pipecat/services/openai/stt.py +13 -9
pipecat/services/openai/tts.py +42 -10
pipecat/services/openai_realtime_beta/azure.py +11 -9
pipecat/services/openai_realtime_beta/context.py +7 -5
pipecat/services/openai_realtime_beta/events.py +10 -7
pipecat/services/openai_realtime_beta/openai.py +37 -18
pipecat/services/openpipe/llm.py +30 -24
pipecat/services/openrouter/llm.py +9 -7
pipecat/services/perplexity/llm.py +15 -19
pipecat/services/piper/tts.py +26 -12
pipecat/services/playht/tts.py +227 -65
pipecat/services/qwen/llm.py +8 -6
pipecat/services/rime/tts.py +128 -17
pipecat/services/riva/stt.py +160 -22
pipecat/services/riva/tts.py +67 -2
pipecat/services/sambanova/llm.py +19 -17
pipecat/services/sambanova/stt.py +14 -8
pipecat/services/sarvam/tts.py +60 -13
pipecat/services/simli/video.py +82 -21
pipecat/services/soniox/__init__.py +0 -0
pipecat/services/soniox/stt.py +398 -0
pipecat/services/speechmatics/stt.py +29 -17
pipecat/services/stt_service.py +47 -11
pipecat/services/tavus/video.py +94 -25
pipecat/services/together/llm.py +8 -6
pipecat/services/tts_service.py +77 -53
pipecat/services/ultravox/stt.py +46 -43
pipecat/services/vision_service.py +5 -3
pipecat/services/websocket_service.py +12 -11
pipecat/services/whisper/base_stt.py +58 -12
pipecat/services/whisper/stt.py +69 -58
pipecat/services/xtts/tts.py +59 -2
pipecat/sync/base_notifier.py +19 -0
pipecat/sync/event_notifier.py +24 -0
pipecat/tests/utils.py +73 -5
pipecat/transcriptions/language.py +24 -0
pipecat/transports/base_input.py +112 -8
pipecat/transports/base_output.py +235 -13
pipecat/transports/base_transport.py +119 -0
pipecat/transports/local/audio.py +76 -0
pipecat/transports/local/tk.py +84 -0
pipecat/transports/network/fastapi_websocket.py +174 -15
pipecat/transports/network/small_webrtc.py +383 -39
pipecat/transports/network/webrtc_connection.py +214 -8
pipecat/transports/network/websocket_client.py +171 -1
pipecat/transports/network/websocket_server.py +147 -9
pipecat/transports/services/daily.py +792 -70
pipecat/transports/services/helpers/daily_rest.py +122 -129
pipecat/transports/services/livekit.py +339 -4
pipecat/transports/services/tavus.py +273 -38
pipecat/utils/asyncio/task_manager.py +92 -186
pipecat/utils/base_object.py +83 -1
pipecat/utils/network.py +2 -0
pipecat/utils/string.py +114 -58
pipecat/utils/text/base_text_aggregator.py +44 -13
pipecat/utils/text/base_text_filter.py +46 -0
pipecat/utils/text/markdown_text_filter.py +70 -14
pipecat/utils/text/pattern_pair_aggregator.py +18 -14
pipecat/utils/text/simple_text_aggregator.py +43 -2
pipecat/utils/text/skip_tags_aggregator.py +21 -13
pipecat/utils/time.py +36 -0
pipecat/utils/tracing/class_decorators.py +32 -7
pipecat/utils/tracing/conversation_context_provider.py +12 -2
pipecat/utils/tracing/service_attributes.py +80 -64
pipecat/utils/tracing/service_decorators.py +48 -21
pipecat/utils/tracing/setup.py +13 -7
pipecat/utils/tracing/turn_context_provider.py +12 -2
pipecat/utils/tracing/turn_trace_observer.py +27 -0
pipecat/utils/utils.py +14 -14
dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
pipecat/examples/daily_runner.py +0 -64
pipecat/examples/run.py +0 -265
pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
pipecat/utils/asyncio/watchdog_event.py +0 -42
pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
pipecat/utils/asyncio/watchdog_queue.py +0 -48
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
/pipecat/{examples → extensions}/__init__.py +0 -0

pipecat/services/gladia/config.py CHANGED Viewed

@@ -4,6 +4,8 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""Configuration for the Gladia STT service."""
 from typing import Any, Dict, List, Optional, Union
 from pydantic import BaseModel
@@ -14,7 +16,7 @@ from pipecat.transcriptions.language import Language
 class LanguageConfig(BaseModel):
     """Configuration for language detection and handling.
-    Attributes:
+    Parameters:
         languages: List of language codes to use for transcription
         code_switching: Whether to auto-detect language changes during transcription
     """
@@ -26,7 +28,7 @@ class LanguageConfig(BaseModel):
 class PreProcessingConfig(BaseModel):
     """Configuration for audio pre-processing options.
-    Attributes:
+    Parameters:
         speech_threshold: Sensitivity for speech detection (0-1)
     """
@@ -36,7 +38,7 @@ class PreProcessingConfig(BaseModel):
 class CustomVocabularyItem(BaseModel):
     """Represents a custom vocabulary item with an intensity value.
-    Attributes:
+    Parameters:
         value: The vocabulary word or phrase
         intensity: The bias intensity for this vocabulary item (0-1)
     """
@@ -48,7 +50,7 @@ class CustomVocabularyItem(BaseModel):
 class CustomVocabularyConfig(BaseModel):
     """Configuration for custom vocabulary.
-    Attributes:
+    Parameters:
         vocabulary: List of words/phrases or CustomVocabularyItem objects
         default_intensity: Default intensity for simple string vocabulary items
     """
@@ -60,7 +62,7 @@ class CustomVocabularyConfig(BaseModel):
 class CustomSpellingConfig(BaseModel):
     """Configuration for custom spelling rules.
-    Attributes:
+    Parameters:
         spelling_dictionary: Mapping of correct spellings to phonetic variations
     """
@@ -70,7 +72,7 @@ class CustomSpellingConfig(BaseModel):
 class TranslationConfig(BaseModel):
     """Configuration for real-time translation.
-    Attributes:
+    Parameters:
         target_languages: List of target language codes for translation
         model: Translation model to use ("base" or "enhanced")
         match_original_utterances: Whether to align translations with original utterances
@@ -92,7 +94,7 @@ class TranslationConfig(BaseModel):
 class RealtimeProcessingConfig(BaseModel):
     """Configuration for real-time processing features.
-    Attributes:
+    Parameters:
         words_accurate_timestamps: Whether to provide per-word timestamps
         custom_vocabulary: Whether to enable custom vocabulary
         custom_vocabulary_config: Custom vocabulary configuration
@@ -118,7 +120,7 @@ class RealtimeProcessingConfig(BaseModel):
 class MessagesConfig(BaseModel):
     """Configuration for controlling which message types are sent via WebSocket.
-    Attributes:
+    Parameters:
         receive_partial_transcripts: Whether to receive intermediate transcription results
         receive_final_transcripts: Whether to receive final transcription results
         receive_speech_events: Whether to receive speech begin/end events
@@ -144,14 +146,19 @@ class MessagesConfig(BaseModel):
 class GladiaInputParams(BaseModel):
     """Configuration parameters for the Gladia STT service.
-    Attributes:
+    Parameters:
         encoding: Audio encoding format
         bit_depth: Audio bit depth
         channels: Number of audio channels
         custom_metadata: Additional metadata to include with requests
         endpointing: Silence duration in seconds to mark end of speech
         maximum_duration_without_endpointing: Maximum utterance duration without silence
-        language: DEPRECATED - Use language_config instead
+        language: Language code for transcription
+            .. deprecated:: 0.0.62
+                The 'language' parameter is deprecated and will be removed in a future version.
+                Use 'language_config' instead.
         language_config: Detailed language configuration
         pre_processing: Audio pre-processing options
         realtime_processing: Real-time processing features

pipecat/services/gladia/stt.py CHANGED Viewed

@@ -4,11 +4,17 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""Gladia Speech-to-Text (STT) service implementation.
+This module provides a Speech-to-Text service using Gladia's real-time WebSocket API,
+supporting multiple languages, custom vocabulary, and various audio processing options.
+"""
 import asyncio
 import base64
 import json
 import warnings
-from typing import Any, AsyncGenerator, Dict, List, Optional  # Add List
+from typing import Any, AsyncGenerator, Dict, Literal, List, Optional
 import aiohttp
 from loguru import logger
@@ -31,12 +37,13 @@ from pipecat.services.gladia.config import (
 )
 from pipecat.services.stt_service import STTService
 from pipecat.transcriptions.language import Language
-from pipecat.utils.asyncio.watchdog_async_iterator import WatchdogAsyncIterator
 from pipecat.utils.time import time_now_iso8601
 from pipecat.utils.tracing.service_decorators import traced_stt
 try:
     import websockets
+    from websockets.asyncio.client import connect as websocket_connect
+    from websockets.protocol import State
 except ModuleNotFoundError as e:
     logger.error(f"Exception: {e}")
     logger.error("In order to use Gladia, you need to `pip install pipecat-ai[gladia]`.")
@@ -47,10 +54,10 @@ def language_to_gladia_language(language: Language) -> Optional[str]:
     """Convert a Language enum to Gladia's language code format.
     Args:
-        language: The Language enum value to convert
+        language: The Language enum value to convert.
     Returns:
-        The Gladia language code string or None if not supported
+        The Gladia language code string or None if not supported.
     """
     BASE_LANGUAGES = {
         Language.AF: "af",
@@ -186,8 +193,12 @@ class GladiaSTTService(STTService):
     This service connects to Gladia's WebSocket API for real-time transcription
     with support for multiple languages, custom vocabulary, and various processing options.
+    Provides automatic reconnection, audio buffering, and comprehensive error handling.
     For complete API documentation, see: https://docs.gladia.io/api-reference/v2/live/init
+    .. deprecated:: 0.0.62
+        Use :class:`~pipecat.services.gladia.config.GladiaInputParams` directly instead.
     """
     # Maintain backward compatibility
@@ -197,6 +208,7 @@ class GladiaSTTService(STTService):
         self,
         *,
         api_key: str,
+        region: Literal["us-west", "eu-west"] | None = None,
         url: str = "https://api.gladia.io/v2/live",
         confidence: float = 0.5,
         sample_rate: Optional[int] = None,
@@ -210,16 +222,17 @@ class GladiaSTTService(STTService):
         """Initialize the Gladia STT service.
         Args:
-            api_key: Gladia API key
-            url: Gladia API URL
-            confidence: Minimum confidence threshold for transcriptions
-            sample_rate: Audio sample rate in Hz
-            model: Model to use ("solaria-1")
-            params: Additional configuration parameters
-            max_reconnection_attempts: Maximum number of reconnection attempts
-            reconnection_delay: Initial delay between reconnection attempts (exponential backoff)
-            max_buffer_size: Maximum size of audio buffer in bytes
-            **kwargs: Additional arguments passed to the STTService
+            api_key: Gladia API key for authentication.
+            region: Region used to process audio. eu-west or us-west. Defaults to eu-west.
+            url: Gladia API URL. Defaults to "https://api.gladia.io/v2/live".
+            confidence: Minimum confidence threshold for transcriptions (0.0-1.0).
+            sample_rate: Audio sample rate in Hz. If None, uses service default.
+            model: Model to use for transcription. Defaults to "solaria-1".
+            params: Additional configuration parameters for Gladia service.
+            max_reconnection_attempts: Maximum number of reconnection attempts. Defaults to 5.
+            reconnection_delay: Initial delay between reconnection attempts in seconds.
+            max_buffer_size: Maximum size of audio buffer in bytes. Defaults to 20MB.
+            **kwargs: Additional arguments passed to the STTService parent class.
         """
         super().__init__(sample_rate=sample_rate, **kwargs)
         vocab: Optional[List[str]] = kwargs.pop("vocab", None)  # Get vocab from kwargs
@@ -236,6 +249,7 @@ class GladiaSTTService(STTService):
             )
         self._api_key = api_key
+        self._region = region
         self._url = url
         self.set_model_name(model)
         self._confidence = confidence
@@ -280,10 +294,22 @@ class GladiaSTTService(STTService):
         self._should_reconnect = True
     def can_generate_metrics(self) -> bool:
+        """Check if the service can generate performance metrics.
+        Returns:
+            True, indicating this service supports metrics generation.
+        """
         return True
     def language_to_service_language(self, language: Language) -> Optional[str]:
-        """Convert pipecat Language enum to Gladia's language code."""
+        """Convert pipecat Language enum to Gladia's language code.
+        Args:
+            language: The Language enum value to convert.
+        Returns:
+            The Gladia language code string or None if not supported.
+        """
         return language_to_gladia_language(language)
     def _prepare_settings(self) -> Dict[str, Any]:
@@ -338,7 +364,11 @@ class GladiaSTTService(STTService):
         return settings
     async def start(self, frame: StartFrame):
-        """Start the Gladia STT websocket connection."""
+        """Start the Gladia STT websocket connection.
+        Args:
+            frame: The start frame triggering service startup.
+        """
         await super().start(frame)
         if self._connection_task:
             return
@@ -347,7 +377,11 @@ class GladiaSTTService(STTService):
         self._connection_task = self.create_task(self._connection_handler())
     async def stop(self, frame: EndFrame):
-        """Stop the Gladia STT websocket connection."""
+        """Stop the Gladia STT websocket connection.
+        Args:
+            frame: The end frame triggering service shutdown.
+        """
         await super().stop(frame)
         self._should_reconnect = False
         await self._send_stop_recording()
@@ -359,7 +393,11 @@ class GladiaSTTService(STTService):
         await self._cleanup_connection()
     async def cancel(self, frame: CancelFrame):
-        """Cancel the Gladia STT websocket connection."""
+        """Cancel the Gladia STT websocket connection.
+        Args:
+            frame: The cancel frame triggering service cancellation.
+        """
         await super().cancel(frame)
         self._should_reconnect = False
@@ -370,7 +408,14 @@ class GladiaSTTService(STTService):
         await self._cleanup_connection()
     async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
-        """Run speech-to-text on audio data."""
+        """Run speech-to-text on audio data.
+        Args:
+            audio: Raw audio bytes to transcribe.
+        Yields:
+            None (processing is handled asynchronously via WebSocket).
+        """
         await self.start_ttfb_metrics()
         await self.start_processing_metrics()
@@ -385,7 +430,7 @@ class GladiaSTTService(STTService):
                 logger.warning(f"Audio buffer exceeded max size, trimmed {trim_size} bytes")
         # Send audio if connected
-        if self._connection_active and self._websocket and not self._websocket.closed:
+        if self._connection_active and self._websocket and self._websocket.state is State.OPEN:
             try:
                 await self._send_audio(audio)
             except websockets.exceptions.ConnectionClosed as e:
@@ -406,11 +451,11 @@ class GladiaSTTService(STTService):
                     self._reconnection_attempts = 0
                 # Connect with automatic reconnection
-                async with websockets.connect(self._session_url) as websocket:
+                async with websocket_connect(self._session_url) as websocket:
                     try:
                         self._websocket = websocket
                         self._connection_active = True
-                        logger.info("Connected to Gladia WebSocket")
+                        logger.debug(f"{self} Connected to Gladia WebSocket")
                         # Send buffered audio if any
                         await self._send_buffered_audio()
@@ -465,10 +510,14 @@ class GladiaSTTService(STTService):
     async def _setup_gladia(self, settings: Dict[str, Any]):
         async with aiohttp.ClientSession() as session:
+            params = {}
+            if self._region:
+                params["region"] = self._region
             async with session.post(
                 self._url,
-                headers={"X-Gladia-Key": self._api_key, "Content-Type": "application/json"},
+                headers={"X-Gladia-Key": self._api_key},
                 json=settings,
+                params=params,
             ) as response:
                 if response.ok:
                     return await response.json()
@@ -490,7 +539,7 @@ class GladiaSTTService(STTService):
     async def _send_audio(self, audio: bytes):
         """Send audio chunk with proper message format."""
-        if self._websocket and not self._websocket.closed:
+        if self._websocket and self._websocket.state is State.OPEN:
             data = base64.b64encode(audio).decode("utf-8")
             message = {"type": "audio_chunk", "data": {"chunk": data}}
             await self._websocket.send(json.dumps(message))
@@ -499,22 +548,21 @@ class GladiaSTTService(STTService):
         """Send any buffered audio after reconnection."""
         async with self._buffer_lock:
             if self._audio_buffer:
-                logger.info(f"Sending {len(self._audio_buffer)} bytes of buffered audio")
+                logger.debug(f"{self} Sending {len(self._audio_buffer)} bytes of buffered audio")
                 await self._send_audio(bytes(self._audio_buffer))
     async def _send_stop_recording(self):
-        if self._websocket and not self._websocket.closed:
+        if self._websocket and self._websocket.state is State.OPEN:
             await self._websocket.send(json.dumps({"type": "stop_recording"}))
     async def _keepalive_task_handler(self):
         """Send periodic empty audio chunks to keep the connection alive."""
         try:
-            KEEPALIVE_SLEEP = 20 if self.task_manager.task_watchdog_enabled else 3
+            KEEPALIVE_SLEEP = 20
             while self._connection_active:
-                self.reset_watchdog()
                 # Send keepalive (Gladia times out after 30 seconds)
                 await asyncio.sleep(KEEPALIVE_SLEEP)
-                if self._websocket and not self._websocket.closed:
+                if self._websocket and self._websocket.state is State.OPEN:
                     # Send an empty audio chunk as keepalive
                     empty_audio = b""
                     await self._send_audio(empty_audio)
@@ -528,7 +576,7 @@ class GladiaSTTService(STTService):
     async def _receive_task_handler(self):
         try:
-            async for message in WatchdogAsyncIterator(self._websocket, manager=self.task_manager):
+            async for message in self._websocket:
                 content = json.loads(message)
                 # Handle audio chunk acknowledgments
@@ -553,7 +601,7 @@ class GladiaSTTService(STTService):
                             await self.push_frame(
                                 TranscriptionFrame(
                                     transcript,
-                                    "",
+                                    self._user_id,
                                     time_now_iso8601(),
                                     language,
                                     result=content,
@@ -568,7 +616,7 @@ class GladiaSTTService(STTService):
                             await self.push_frame(
                                 InterimTranscriptionFrame(
                                     transcript,
-                                    "",
+                                    self._user_id,
                                     time_now_iso8601(),
                                     language,
                                     result=content,
@@ -586,8 +634,6 @@ class GladiaSTTService(STTService):
                                 translation, "", time_now_iso8601(), translated_language
                             )
                         )
-                self.reset_watchdog()
         except websockets.exceptions.ConnectionClosed:
             # Expected when closing the connection
             pass
@@ -604,8 +650,8 @@ class GladiaSTTService(STTService):
             self._should_reconnect = False
             return False
         delay = self._reconnection_delay * (2 ** (self._reconnection_attempts - 1))
-        logger.info(
-            f"Reconnecting in {delay} seconds (attempt {self._reconnection_attempts}/{self._max_reconnection_attempts})"
+        logger.debug(
+            f"{self} Reconnecting in {delay} seconds (attempt {self._reconnection_attempts}/{self._max_reconnection_attempts})"
         )
         await asyncio.sleep(delay)
         return True

pipecat/services/google/frames.py CHANGED Viewed

@@ -4,6 +4,13 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""Google AI service frames for search and grounding functionality.
+This module defines specialized frame types for handling search results
+and grounding metadata from Google AI models, particularly for Gemini
+models that support web search and fact grounding capabilities.
+"""
 from dataclasses import dataclass, field
 from typing import List, Optional
@@ -12,12 +19,27 @@ from pipecat.frames.frames import DataFrame
 @dataclass
 class LLMSearchResult:
+    """Represents a single search result with confidence scores.
+    Parameters:
+        text: The search result text content.
+        confidence: List of confidence scores associated with the result.
+    """
     text: str
     confidence: List[float] = field(default_factory=list)
 @dataclass
 class LLMSearchOrigin:
+    """Represents the origin source of search results.
+    Parameters:
+        site_uri: URI of the source website.
+        site_title: Title of the source website.
+        results: List of search results from this origin.
+    """
     site_uri: Optional[str] = None
     site_title: Optional[str] = None
     results: List[LLMSearchResult] = field(default_factory=list)
@@ -25,9 +47,27 @@ class LLMSearchOrigin:
 @dataclass
 class LLMSearchResponseFrame(DataFrame):
+    """Frame containing search results and grounding information from Google AI models.
+    This frame is used to convey search results and grounding metadata
+    from Google AI models that support web search capabilities. It includes
+    the search result text, rendered content, and detailed origin information
+    with confidence scores.
+    Parameters:
+        search_result: The main search result text.
+        rendered_content: Rendered content from the search entry point.
+        origins: List of search result origins with detailed information.
+    """
     search_result: Optional[str] = None
     rendered_content: Optional[str] = None
     origins: List[LLMSearchOrigin] = field(default_factory=list)
     def __str__(self):
+        """Return string representation of the search response frame.
+        Returns:
+            String representation showing search result and origins.
+        """
         return f"LLMSearchResponseFrame(search_result={self.search_result}, origins={self.origins})"

pipecat/services/google/google.py CHANGED Viewed

@@ -4,6 +4,8 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""Google services module for Pipecat."""
 import sys
 from pipecat.services import DeprecatedModuleProxy

pipecat/services/google/image.py CHANGED Viewed

@@ -4,6 +4,12 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""Google AI image generation service implementation.
+This module provides integration with Google's Imagen model for generating
+images from text prompts using the Google AI API.
+"""
 import io
 import os
@@ -29,7 +35,22 @@ except ModuleNotFoundError as e:
 class GoogleImageGenService(ImageGenService):
+    """Google AI image generation service using Imagen models.
+    Provides text-to-image generation capabilities using Google's Imagen models
+    through the Google AI API. Supports multiple image generation and negative
+    prompting for enhanced control over generated content.
+    """
     class InputParams(BaseModel):
+        """Configuration parameters for Google image generation.
+        Parameters:
+            number_of_images: Number of images to generate (1-8). Defaults to 1.
+            model: Google Imagen model to use. Defaults to "imagen-3.0-generate-002".
+            negative_prompt: Optional negative prompt to guide what not to include.
+        """
         number_of_images: int = Field(default=1, ge=1, le=8)
         model: str = Field(default="imagen-3.0-generate-002")
         negative_prompt: Optional[str] = Field(default=None)
@@ -41,22 +62,38 @@ class GoogleImageGenService(ImageGenService):
         params: Optional[InputParams] = None,
         **kwargs,
     ):
+        """Initialize the GoogleImageGenService with API key and parameters.
+        Args:
+            api_key: Google AI API key for authentication.
+            params: Configuration parameters for image generation. Defaults to InputParams().
+            **kwargs: Additional arguments passed to the parent ImageGenService.
+        """
         super().__init__(**kwargs)
         self._params = params or GoogleImageGenService.InputParams()
         self._client = genai.Client(api_key=api_key)
         self.set_model_name(self._params.model)
     def can_generate_metrics(self) -> bool:
+        """Check if this service can generate processing metrics.
+        Returns:
+            True, as Google image generation service supports metrics.
+        """
         return True
     async def run_image_gen(self, prompt: str) -> AsyncGenerator[Frame, None]:
         """Generate images from a text prompt using Google's Imagen model.
         Args:
-            prompt (str): The text description to generate images from.
+            prompt: The text description to generate images from.
         Yields:
-            Frame: Generated image frames or error frames.
+            Frame: Generated URLImageRawFrame objects containing the generated
+                images, or ErrorFrame objects if generation fails.
+        Raises:
+            Exception: If there are issues with the Google AI API or image processing.
         """
         logger.debug(f"Generating image from prompt: {prompt}")
         await self.start_ttfb_metrics()

dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl