PyPI - dv-pipecat-ai - Versions diffs - 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show

{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
pipecat/__init__.py +17 -0
pipecat/adapters/base_llm_adapter.py +36 -1
pipecat/adapters/schemas/direct_function.py +296 -0
pipecat/adapters/schemas/function_schema.py +15 -6
pipecat/adapters/schemas/tools_schema.py +55 -7
pipecat/adapters/services/anthropic_adapter.py +22 -3
pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
pipecat/adapters/services/bedrock_adapter.py +22 -3
pipecat/adapters/services/gemini_adapter.py +16 -3
pipecat/adapters/services/open_ai_adapter.py +17 -2
pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
pipecat/audio/filters/base_audio_filter.py +30 -6
pipecat/audio/filters/koala_filter.py +37 -2
pipecat/audio/filters/krisp_filter.py +59 -6
pipecat/audio/filters/noisereduce_filter.py +37 -0
pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
pipecat/audio/mixers/base_audio_mixer.py +30 -7
pipecat/audio/mixers/soundfile_mixer.py +53 -6
pipecat/audio/resamplers/base_audio_resampler.py +17 -9
pipecat/audio/resamplers/resampy_resampler.py +26 -1
pipecat/audio/resamplers/soxr_resampler.py +32 -1
pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
pipecat/audio/utils.py +194 -1
pipecat/audio/vad/silero.py +60 -3
pipecat/audio/vad/vad_analyzer.py +114 -30
pipecat/clocks/base_clock.py +19 -0
pipecat/clocks/system_clock.py +25 -0
pipecat/extensions/voicemail/__init__.py +0 -0
pipecat/extensions/voicemail/voicemail_detector.py +707 -0
pipecat/frames/frames.py +590 -156
pipecat/metrics/metrics.py +64 -1
pipecat/observers/base_observer.py +58 -19
pipecat/observers/loggers/debug_log_observer.py +56 -64
pipecat/observers/loggers/llm_log_observer.py +8 -1
pipecat/observers/loggers/transcription_log_observer.py +19 -7
pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
pipecat/observers/turn_tracking_observer.py +26 -1
pipecat/pipeline/base_pipeline.py +5 -7
pipecat/pipeline/base_task.py +52 -9
pipecat/pipeline/parallel_pipeline.py +121 -177
pipecat/pipeline/pipeline.py +129 -20
pipecat/pipeline/runner.py +50 -1
pipecat/pipeline/sync_parallel_pipeline.py +132 -32
pipecat/pipeline/task.py +263 -280
pipecat/pipeline/task_observer.py +85 -34
pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
pipecat/processors/aggregators/gated.py +25 -24
pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
pipecat/processors/aggregators/llm_response.py +398 -89
pipecat/processors/aggregators/openai_llm_context.py +161 -13
pipecat/processors/aggregators/sentence.py +25 -14
pipecat/processors/aggregators/user_response.py +28 -3
pipecat/processors/aggregators/vision_image_frame.py +24 -14
pipecat/processors/async_generator.py +28 -0
pipecat/processors/audio/audio_buffer_processor.py +78 -37
pipecat/processors/consumer_processor.py +25 -6
pipecat/processors/filters/frame_filter.py +23 -0
pipecat/processors/filters/function_filter.py +30 -0
pipecat/processors/filters/identity_filter.py +17 -2
pipecat/processors/filters/null_filter.py +24 -1
pipecat/processors/filters/stt_mute_filter.py +56 -21
pipecat/processors/filters/wake_check_filter.py +46 -3
pipecat/processors/filters/wake_notifier_filter.py +21 -3
pipecat/processors/frame_processor.py +488 -131
pipecat/processors/frameworks/langchain.py +38 -3
pipecat/processors/frameworks/rtvi.py +719 -34
pipecat/processors/gstreamer/pipeline_source.py +41 -0
pipecat/processors/idle_frame_processor.py +26 -3
pipecat/processors/logger.py +23 -0
pipecat/processors/metrics/frame_processor_metrics.py +77 -4
pipecat/processors/metrics/sentry.py +42 -4
pipecat/processors/producer_processor.py +34 -14
pipecat/processors/text_transformer.py +22 -10
pipecat/processors/transcript_processor.py +48 -29
pipecat/processors/user_idle_processor.py +31 -21
pipecat/runner/__init__.py +1 -0
pipecat/runner/daily.py +132 -0
pipecat/runner/livekit.py +148 -0
pipecat/runner/run.py +543 -0
pipecat/runner/types.py +67 -0
pipecat/runner/utils.py +515 -0
pipecat/serializers/base_serializer.py +42 -0
pipecat/serializers/exotel.py +17 -6
pipecat/serializers/genesys.py +95 -0
pipecat/serializers/livekit.py +33 -0
pipecat/serializers/plivo.py +16 -15
pipecat/serializers/protobuf.py +37 -1
pipecat/serializers/telnyx.py +18 -17
pipecat/serializers/twilio.py +32 -16
pipecat/services/ai_service.py +5 -3
pipecat/services/anthropic/llm.py +113 -43
pipecat/services/assemblyai/models.py +63 -5
pipecat/services/assemblyai/stt.py +64 -11
pipecat/services/asyncai/__init__.py +0 -0
pipecat/services/asyncai/tts.py +501 -0
pipecat/services/aws/llm.py +185 -111
pipecat/services/aws/stt.py +217 -23
pipecat/services/aws/tts.py +118 -52
pipecat/services/aws/utils.py +101 -5
pipecat/services/aws_nova_sonic/aws.py +82 -64
pipecat/services/aws_nova_sonic/context.py +15 -6
pipecat/services/azure/common.py +10 -2
pipecat/services/azure/image.py +32 -0
pipecat/services/azure/llm.py +9 -7
pipecat/services/azure/stt.py +65 -2
pipecat/services/azure/tts.py +154 -23
pipecat/services/cartesia/stt.py +125 -8
pipecat/services/cartesia/tts.py +102 -38
pipecat/services/cerebras/llm.py +15 -23
pipecat/services/deepgram/stt.py +19 -11
pipecat/services/deepgram/tts.py +36 -0
pipecat/services/deepseek/llm.py +14 -23
pipecat/services/elevenlabs/tts.py +330 -64
pipecat/services/fal/image.py +43 -0
pipecat/services/fal/stt.py +48 -10
pipecat/services/fireworks/llm.py +14 -21
pipecat/services/fish/tts.py +109 -9
pipecat/services/gemini_multimodal_live/__init__.py +1 -0
pipecat/services/gemini_multimodal_live/events.py +83 -2
pipecat/services/gemini_multimodal_live/file_api.py +189 -0
pipecat/services/gemini_multimodal_live/gemini.py +218 -21
pipecat/services/gladia/config.py +17 -10
pipecat/services/gladia/stt.py +82 -36
pipecat/services/google/frames.py +40 -0
pipecat/services/google/google.py +2 -0
pipecat/services/google/image.py +39 -2
pipecat/services/google/llm.py +176 -58
pipecat/services/google/llm_openai.py +26 -4
pipecat/services/google/llm_vertex.py +37 -15
pipecat/services/google/rtvi.py +41 -0
pipecat/services/google/stt.py +65 -17
pipecat/services/google/test-google-chirp.py +45 -0
pipecat/services/google/tts.py +390 -19
pipecat/services/grok/llm.py +8 -6
pipecat/services/groq/llm.py +8 -6
pipecat/services/groq/stt.py +13 -9
pipecat/services/groq/tts.py +40 -0
pipecat/services/hamsa/__init__.py +9 -0
pipecat/services/hamsa/stt.py +241 -0
pipecat/services/heygen/__init__.py +5 -0
pipecat/services/heygen/api.py +281 -0
pipecat/services/heygen/client.py +620 -0
pipecat/services/heygen/video.py +338 -0
pipecat/services/image_service.py +5 -3
pipecat/services/inworld/__init__.py +1 -0
pipecat/services/inworld/tts.py +592 -0
pipecat/services/llm_service.py +127 -45
pipecat/services/lmnt/tts.py +80 -7
pipecat/services/mcp_service.py +85 -44
pipecat/services/mem0/memory.py +42 -13
pipecat/services/minimax/tts.py +74 -15
pipecat/services/mistral/__init__.py +0 -0
pipecat/services/mistral/llm.py +185 -0
pipecat/services/moondream/vision.py +55 -10
pipecat/services/neuphonic/tts.py +275 -48
pipecat/services/nim/llm.py +8 -6
pipecat/services/ollama/llm.py +27 -7
pipecat/services/openai/base_llm.py +54 -16
pipecat/services/openai/image.py +30 -0
pipecat/services/openai/llm.py +7 -5
pipecat/services/openai/stt.py +13 -9
pipecat/services/openai/tts.py +42 -10
pipecat/services/openai_realtime_beta/azure.py +11 -9
pipecat/services/openai_realtime_beta/context.py +7 -5
pipecat/services/openai_realtime_beta/events.py +10 -7
pipecat/services/openai_realtime_beta/openai.py +37 -18
pipecat/services/openpipe/llm.py +30 -24
pipecat/services/openrouter/llm.py +9 -7
pipecat/services/perplexity/llm.py +15 -19
pipecat/services/piper/tts.py +26 -12
pipecat/services/playht/tts.py +227 -65
pipecat/services/qwen/llm.py +8 -6
pipecat/services/rime/tts.py +128 -17
pipecat/services/riva/stt.py +160 -22
pipecat/services/riva/tts.py +67 -2
pipecat/services/sambanova/llm.py +19 -17
pipecat/services/sambanova/stt.py +14 -8
pipecat/services/sarvam/tts.py +60 -13
pipecat/services/simli/video.py +82 -21
pipecat/services/soniox/__init__.py +0 -0
pipecat/services/soniox/stt.py +398 -0
pipecat/services/speechmatics/stt.py +29 -17
pipecat/services/stt_service.py +47 -11
pipecat/services/tavus/video.py +94 -25
pipecat/services/together/llm.py +8 -6
pipecat/services/tts_service.py +77 -53
pipecat/services/ultravox/stt.py +46 -43
pipecat/services/vision_service.py +5 -3
pipecat/services/websocket_service.py +12 -11
pipecat/services/whisper/base_stt.py +58 -12
pipecat/services/whisper/stt.py +69 -58
pipecat/services/xtts/tts.py +59 -2
pipecat/sync/base_notifier.py +19 -0
pipecat/sync/event_notifier.py +24 -0
pipecat/tests/utils.py +73 -5
pipecat/transcriptions/language.py +24 -0
pipecat/transports/base_input.py +112 -8
pipecat/transports/base_output.py +235 -13
pipecat/transports/base_transport.py +119 -0
pipecat/transports/local/audio.py +76 -0
pipecat/transports/local/tk.py +84 -0
pipecat/transports/network/fastapi_websocket.py +174 -15
pipecat/transports/network/small_webrtc.py +383 -39
pipecat/transports/network/webrtc_connection.py +214 -8
pipecat/transports/network/websocket_client.py +171 -1
pipecat/transports/network/websocket_server.py +147 -9
pipecat/transports/services/daily.py +792 -70
pipecat/transports/services/helpers/daily_rest.py +122 -129
pipecat/transports/services/livekit.py +339 -4
pipecat/transports/services/tavus.py +273 -38
pipecat/utils/asyncio/task_manager.py +92 -186
pipecat/utils/base_object.py +83 -1
pipecat/utils/network.py +2 -0
pipecat/utils/string.py +114 -58
pipecat/utils/text/base_text_aggregator.py +44 -13
pipecat/utils/text/base_text_filter.py +46 -0
pipecat/utils/text/markdown_text_filter.py +70 -14
pipecat/utils/text/pattern_pair_aggregator.py +18 -14
pipecat/utils/text/simple_text_aggregator.py +43 -2
pipecat/utils/text/skip_tags_aggregator.py +21 -13
pipecat/utils/time.py +36 -0
pipecat/utils/tracing/class_decorators.py +32 -7
pipecat/utils/tracing/conversation_context_provider.py +12 -2
pipecat/utils/tracing/service_attributes.py +80 -64
pipecat/utils/tracing/service_decorators.py +48 -21
pipecat/utils/tracing/setup.py +13 -7
pipecat/utils/tracing/turn_context_provider.py +12 -2
pipecat/utils/tracing/turn_trace_observer.py +27 -0
pipecat/utils/utils.py +14 -14
dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
pipecat/examples/daily_runner.py +0 -64
pipecat/examples/run.py +0 -265
pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
pipecat/utils/asyncio/watchdog_event.py +0 -42
pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
pipecat/utils/asyncio/watchdog_queue.py +0 -48
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
/pipecat/{examples → extensions}/__init__.py +0 -0

pipecat/services/fal/image.py CHANGED Viewed

@@ -4,6 +4,12 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""Fal's image generation service implementation.
+This module provides integration with Fal's image generation API
+for creating images from text prompts using various AI models.
+"""
 import asyncio
 import io
 import os
@@ -26,7 +32,25 @@ except ModuleNotFoundError as e:
 class FalImageGenService(ImageGenService):
+    """Fal's image generation service.
+    Provides text-to-image generation using Fal.ai's API with configurable
+    parameters for image quality, safety, and format options.
+    """
     class InputParams(BaseModel):
+        """Input parameters for Fal.ai image generation.
+        Parameters:
+            seed: Random seed for reproducible generation. If None, uses random seed.
+            num_inference_steps: Number of inference steps for generation. Defaults to 8.
+            num_images: Number of images to generate. Defaults to 1.
+            image_size: Image dimensions as string preset or dict with width/height. Defaults to "square_hd".
+            expand_prompt: Whether to automatically expand/enhance the prompt. Defaults to False.
+            enable_safety_checker: Whether to enable content safety filtering. Defaults to True.
+            format: Output image format. Defaults to "png".
+        """
         seed: Optional[int] = None
         num_inference_steps: int = 8
         num_images: int = 1
@@ -44,6 +68,15 @@ class FalImageGenService(ImageGenService):
         key: Optional[str] = None,
         **kwargs,
     ):
+        """Initialize the FalImageGenService.
+        Args:
+            params: Input parameters for image generation configuration.
+            aiohttp_session: HTTP client session for downloading generated images.
+            model: The Fal.ai model to use for generation. Defaults to "fal-ai/fast-sdxl".
+            key: Optional API key for Fal.ai. If provided, sets FAL_KEY environment variable.
+            **kwargs: Additional arguments passed to parent ImageGenService.
+        """
         super().__init__(**kwargs)
         self.set_model_name(model)
         self._params = params
@@ -52,6 +85,16 @@ class FalImageGenService(ImageGenService):
             os.environ["FAL_KEY"] = key
     async def run_image_gen(self, prompt: str) -> AsyncGenerator[Frame, None]:
+        """Generate an image from a text prompt.
+        Args:
+            prompt: The text prompt to generate an image from.
+        Yields:
+            URLImageRawFrame: Frame containing the generated image data and metadata.
+            ErrorFrame: If image generation fails.
+        """
         def load_image_bytes(encoded_image: bytes):
             buffer = io.BytesIO(encoded_image)
             image = Image.open(buffer)

pipecat/services/fal/stt.py CHANGED Viewed

@@ -4,6 +4,12 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""Fal speech-to-text service implementation.
+This module provides integration with Fal's Wizper API for speech-to-text
+transcription using segmented audio processing.
+"""
 import os
 from typing import AsyncGenerator, Optional
@@ -27,7 +33,14 @@ except ModuleNotFoundError as e:
 def language_to_fal_language(language: Language) -> Optional[str]:
-    """Language support for Fal's Wizper API."""
+    """Convert a Language enum to Fal's Wizper language code.
+    Args:
+        language: The Language enum value to convert.
+    Returns:
+        The corresponding Fal Wizper language code, or None if not supported.
+    """
     BASE_LANGUAGES = {
         Language.AF: "af",
         Language.AM: "am",
@@ -145,18 +158,12 @@ class FalSTTService(SegmentedSTTService):
     This service uses Fal's Wizper API to perform speech-to-text transcription on audio
     segments. It inherits from SegmentedSTTService to handle audio buffering and speech detection.
-    Args:
-        api_key: Fal API key. If not provided, will check FAL_KEY environment variable.
-        sample_rate: Audio sample rate in Hz. If not provided, uses the pipeline's rate.
-        params: Configuration parameters for the Wizper API.
-        **kwargs: Additional arguments passed to SegmentedSTTService.
     """
     class InputParams(BaseModel):
         """Configuration parameters for Fal's Wizper API.
-        Attributes:
+        Parameters:
             language: Language of the audio input. Defaults to English.
             task: Task to perform ('transcribe' or 'translate'). Defaults to 'transcribe'.
             chunk_level: Level of chunking ('segment'). Defaults to 'segment'.
@@ -176,6 +183,14 @@ class FalSTTService(SegmentedSTTService):
         params: Optional[InputParams] = None,
         **kwargs,
     ):
+        """Initialize the FalSTTService with API key and parameters.
+        Args:
+            api_key: Fal API key. If not provided, will check FAL_KEY environment variable.
+            sample_rate: Audio sample rate in Hz. If not provided, uses the pipeline's rate.
+            params: Configuration parameters for the Wizper API.
+            **kwargs: Additional arguments passed to SegmentedSTTService.
+        """
         super().__init__(
             sample_rate=sample_rate,
             **kwargs,
@@ -201,16 +216,39 @@ class FalSTTService(SegmentedSTTService):
         }
     def can_generate_metrics(self) -> bool:
+        """Check if the service can generate processing metrics.
+        Returns:
+            True, as Fal STT service supports metrics generation.
+        """
         return True
     def language_to_service_language(self, language: Language) -> Optional[str]:
+        """Convert a Language enum to Fal's service-specific language code.
+        Args:
+            language: The language to convert.
+        Returns:
+            The Fal-specific language code, or None if not supported.
+        """
         return language_to_fal_language(language)
     async def set_language(self, language: Language):
+        """Set the transcription language.
+        Args:
+            language: The language to use for speech-to-text transcription.
+        """
         logger.info(f"Switching STT language to: [{language}]")
         self._settings["language"] = self.language_to_service_language(language)
     async def set_model(self, model: str):
+        """Set the STT model.
+        Args:
+            model: The model name to use for transcription.
+        """
         await super().set_model(model)
         logger.info(f"Switching STT model to: [{model}]")
@@ -229,7 +267,7 @@ class FalSTTService(SegmentedSTTService):
             audio: Raw audio bytes in WAV format (already converted by base class).
         Yields:
-            Frame: TranscriptionFrame containing the transcribed text.
+            Frame: TranscriptionFrame containing the transcribed text, or ErrorFrame on failure.
         Note:
             The audio is already in WAV format from the SegmentedSTTService.
@@ -253,7 +291,7 @@ class FalSTTService(SegmentedSTTService):
                     logger.debug(f"Transcription: [{text}]")
                     yield TranscriptionFrame(
                         text,
-                        "",
+                        self._user_id,
                         time_now_iso8601(),
                         Language(self._settings["language"]),
                         result=response,

pipecat/services/fireworks/llm.py CHANGED Viewed

@@ -20,12 +20,6 @@ class FireworksLLMService(OpenAILLMService):
     This service extends OpenAILLMService to connect to Fireworks' API endpoint while
     maintaining full compatibility with OpenAI's interface and functionality.
-    Args:
-        api_key: The API key for accessing Fireworks AI.
-        model: The model identifier to use. Defaults to "accounts/fireworks/models/firefunction-v2".
-        base_url: The base URL for Fireworks API. Defaults to "https://api.fireworks.ai/inference/v1".
-        **kwargs: Additional keyword arguments passed to OpenAILLMService.
     """
     def __init__(
@@ -36,6 +30,14 @@ class FireworksLLMService(OpenAILLMService):
         base_url: str = "https://api.fireworks.ai/inference/v1",
         **kwargs,
     ):
+        """Initialize the Fireworks LLM service.
+        Args:
+            api_key: The API key for accessing Fireworks AI.
+            model: The model identifier to use. Defaults to "accounts/fireworks/models/firefunction-v2".
+            base_url: The base URL for Fireworks API. Defaults to "https://api.fireworks.ai/inference/v1".
+            **kwargs: Additional keyword arguments passed to OpenAILLMService.
+        """
         super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
     def create_client(self, api_key=None, base_url=None, **kwargs):
@@ -52,20 +54,13 @@ class FireworksLLMService(OpenAILLMService):
         logger.debug(f"Creating Fireworks client with api {base_url}")
         return super().create_client(api_key, base_url, **kwargs)
-    async def get_chat_completions(
+    def build_chat_completion_params(
         self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
-    ):
-        """Get chat completions from Fireworks API.
+    ) -> dict:
+        """Build parameters for Fireworks chat completion request.
-        Removes OpenAI-specific parameters not supported by Fireworks and
-        configures the request with Fireworks-compatible settings.
-        Args:
-            context: The OpenAI LLM context containing tools and settings.
-            messages: List of chat completion message parameters.
-        Returns:
-            Async generator yielding chat completion chunks from Fireworks API.
+        Fireworks doesn't support some OpenAI parameters like seed, max_completion_tokens,
+        and stream_options.
         """
         params = {
             "model": self.model_name,
@@ -81,6 +76,4 @@ class FireworksLLMService(OpenAILLMService):
         }
         params.update(self._settings["extra"])
-        chunks = await self._client.chat.completions.create(**params)
-        return chunks
+        return params

pipecat/services/fish/tts.py CHANGED Viewed

@@ -4,6 +4,12 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""Fish Audio text-to-speech service implementation.
+This module provides integration with Fish Audio's real-time TTS WebSocket API
+for streaming text-to-speech synthesis with customizable voice parameters.
+"""
 import uuid
 from typing import AsyncGenerator, Literal, Optional
@@ -28,7 +34,8 @@ from pipecat.utils.tracing.service_decorators import traced_tts
 try:
     import ormsgpack
-    import websockets
+    from websockets.asyncio.client import connect as websocket_connect
+    from websockets.protocol import State
 except ModuleNotFoundError as e:
     logger.error(f"Exception: {e}")
     logger.error("In order to use Fish Audio, you need to `pip install pipecat-ai[fish]`.")
@@ -39,9 +46,27 @@ FishAudioOutputFormat = Literal["opus", "mp3", "pcm", "wav"]
 class FishAudioTTSService(InterruptibleTTSService):
+    """Fish Audio text-to-speech service with WebSocket streaming.
+    Provides real-time text-to-speech synthesis using Fish Audio's WebSocket API.
+    Supports various audio formats, customizable prosody controls, and streaming
+    audio generation with interruption handling.
+    """
     class InputParams(BaseModel):
+        """Input parameters for Fish Audio TTS configuration.
+        Parameters:
+            language: Language for synthesis. Defaults to English.
+            latency: Latency mode ("normal" or "balanced"). Defaults to "normal".
+            normalize: Whether to normalize audio output. Defaults to True.
+            prosody_speed: Speech speed multiplier (0.5-2.0). Defaults to 1.0.
+            prosody_volume: Volume adjustment in dB. Defaults to 0.
+        """
         language: Optional[Language] = Language.EN
         latency: Optional[str] = "normal"  # "normal" or "balanced"
+        normalize: Optional[bool] = True
         prosody_speed: Optional[float] = 1.0  # Speech speed (0.5-2.0)
         prosody_volume: Optional[int] = 0  # Volume adjustment in dB
@@ -49,12 +74,31 @@ class FishAudioTTSService(InterruptibleTTSService):
         self,
         *,
         api_key: str,
-        model: str,  # This is the reference_id
+        reference_id: Optional[str] = None,  # This is the voice ID
+        model: Optional[str] = None,  # Deprecated
+        model_id: str = "speech-1.5",
         output_format: FishAudioOutputFormat = "pcm",
         sample_rate: Optional[int] = None,
         params: Optional[InputParams] = None,
         **kwargs,
     ):
+        """Initialize the Fish Audio TTS service.
+        Args:
+            api_key: Fish Audio API key for authentication.
+            reference_id: Reference ID of the voice model to use for synthesis.
+            model: Deprecated. Reference ID of the voice model to use for synthesis.
+              .. deprecated:: 0.0.74
+                The `model` parameter is deprecated and will be removed in version 0.1.0.
+                Use `reference_id` instead to specify the voice model.
+            model_id: Specify which Fish Audio TTS model to use (e.g. "speech-1.5")
+            output_format: Audio output format. Defaults to "pcm".
+            sample_rate: Audio sample rate. If None, uses default.
+            params: Additional input parameters for voice customization.
+            **kwargs: Additional arguments passed to the parent service.
+        """
         super().__init__(
             push_stop_frames=True,
             pause_frame_processing=True,
@@ -64,6 +108,26 @@ class FishAudioTTSService(InterruptibleTTSService):
         params = params or FishAudioTTSService.InputParams()
+        # Validation for model and reference_id parameters
+        if model and reference_id:
+            raise ValueError(
+                "Cannot specify both 'model' and 'reference_id'. Use 'reference_id' only."
+            )
+        if model is None and reference_id is None:
+            raise ValueError("Must specify 'reference_id' (or deprecated 'model') parameter.")
+        if model:
+            import warnings
+            warnings.warn(
+                "Parameter 'model' is deprecated and will be removed in a future version. "
+                "Use 'reference_id' instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            reference_id = model
         self._api_key = api_key
         self._base_url = "wss://api.fish.audio/v1/tts/live"
         self._websocket = None
@@ -75,33 +139,60 @@ class FishAudioTTSService(InterruptibleTTSService):
             "sample_rate": 0,
             "latency": params.latency,
             "format": output_format,
+            "normalize": params.normalize,
             "prosody": {
                 "speed": params.prosody_speed,
                 "volume": params.prosody_volume,
             },
-            "reference_id": model,
+            "reference_id": reference_id,
         }
-        self.set_model_name(model)
+        self.set_model_name(model_id)
     def can_generate_metrics(self) -> bool:
+        """Check if this service can generate processing metrics.
+        Returns:
+            True, as Fish Audio service supports metrics generation.
+        """
         return True
     async def set_model(self, model: str):
-        self._settings["reference_id"] = model
+        """Set the TTS model and reconnect.
+        Args:
+            model: The model name to use for synthesis.
+        """
         await super().set_model(model)
         logger.info(f"Switching TTS model to: [{model}]")
+        await self._disconnect()
+        await self._connect()
     async def start(self, frame: StartFrame):
+        """Start the Fish Audio TTS service.
+        Args:
+            frame: The start frame containing initialization parameters.
+        """
         await super().start(frame)
         self._settings["sample_rate"] = self.sample_rate
         await self._connect()
     async def stop(self, frame: EndFrame):
+        """Stop the Fish Audio TTS service.
+        Args:
+            frame: The end frame.
+        """
         await super().stop(frame)
         await self._disconnect()
     async def cancel(self, frame: CancelFrame):
+        """Cancel the Fish Audio TTS service.
+        Args:
+            frame: The cancel frame.
+        """
         await super().cancel(frame)
         await self._disconnect()
@@ -120,12 +211,13 @@ class FishAudioTTSService(InterruptibleTTSService):
     async def _connect_websocket(self):
         try:
-            if self._websocket and self._websocket.open:
+            if self._websocket and self._websocket.state is State.OPEN:
                 return
             logger.debug("Connecting to Fish Audio")
             headers = {"Authorization": f"Bearer {self._api_key}"}
-            self._websocket = await websockets.connect(self._base_url, extra_headers=headers)
+            headers["model"] = self.model_name
+            self._websocket = await websocket_connect(self._base_url, additional_headers=headers)
             # Send initial start message with ormsgpack
             start_message = {"event": "start", "request": {"text": "", **self._settings}}
@@ -155,7 +247,7 @@ class FishAudioTTSService(InterruptibleTTSService):
     async def flush_audio(self):
         """Flush any buffered audio by sending a flush event to Fish Audio."""
         logger.trace(f"{self}: Flushing audio buffers")
-        if not self._websocket or self._websocket.closed:
+        if not self._websocket or self._websocket.state is State.CLOSED:
             return
         flush_message = {"event": "flush"}
         await self._get_websocket().send(ormsgpack.packb(flush_message))
@@ -191,9 +283,17 @@ class FishAudioTTSService(InterruptibleTTSService):
     @traced_tts
     async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
+        """Generate speech from text using Fish Audio's streaming API.
+        Args:
+            text: The text to synthesize into speech.
+        Yields:
+            Frame: Audio frames and control frames for the synthesized speech.
+        """
         logger.debug(f"{self}: Generating Fish TTS: [{text}]")
         try:
-            if not self._websocket or self._websocket.closed:
+            if not self._websocket or self._websocket.state is State.CLOSED:
                 await self._connect()
             if not self._request_id:

pipecat/services/gemini_multimodal_live/__init__.py CHANGED Viewed

	@@ -1 +1,2 @@
1	+ from .file_api import GeminiFileAPI
1 2	from .gemini import GeminiMultimodalLiveLLMService

pipecat/services/gemini_multimodal_live/events.py CHANGED Viewed

@@ -44,6 +44,17 @@ class ContentPart(BaseModel):
     text: Optional[str] = Field(default=None, validate_default=False)
     inlineData: Optional[MediaChunk] = Field(default=None, validate_default=False)
+    fileData: Optional["FileData"] = Field(default=None, validate_default=False)
+class FileData(BaseModel):
+    """Represents a file reference in the Gemini File API."""
+    mimeType: str
+    fileUri: str
+ContentPart.model_rebuild()  # Rebuild model to resolve forward reference
 class Turn(BaseModel):
@@ -103,13 +114,15 @@ class RealtimeInputConfig(BaseModel):
 class RealtimeInput(BaseModel):
-    """Contains realtime input media chunks.
+    """Contains realtime input media chunks and text.
     Parameters:
         mediaChunks: List of media chunks for realtime processing.
+        text: Text for realtime processing.
     """
-    mediaChunks: List[MediaChunk]
+    mediaChunks: Optional[List[MediaChunk]] = None
+    text: Optional[str] = None
 class ClientContent(BaseModel):
@@ -179,6 +192,24 @@ class VideoInputMessage(BaseModel):
         )
+class TextInputMessage(BaseModel):
+    """Message containing text input data."""
+    realtimeInput: RealtimeInput
+    @classmethod
+    def from_text(cls, text: str) -> "TextInputMessage":
+        """Create a text input message from a string.
+        Args:
+            text: The text to send.
+        Returns:
+            A TextInputMessage instance.
+        """
+        return cls(realtimeInput=RealtimeInput(text=text))
 class ClientContentMessage(BaseModel):
     """Message containing client content for the API.
@@ -237,6 +268,55 @@ class Config(BaseModel):
     setup: Setup
+#
+# Grounding metadata models
+#
+class SearchEntryPoint(BaseModel):
+    """Represents the search entry point with rendered content for search suggestions."""
+    renderedContent: Optional[str] = None
+class WebSource(BaseModel):
+    """Represents a web source from grounding chunks."""
+    uri: Optional[str] = None
+    title: Optional[str] = None
+class GroundingChunk(BaseModel):
+    """Represents a grounding chunk containing web source information."""
+    web: Optional[WebSource] = None
+class GroundingSegment(BaseModel):
+    """Represents a segment of text that is grounded."""
+    startIndex: Optional[int] = None
+    endIndex: Optional[int] = None
+    text: Optional[str] = None
+class GroundingSupport(BaseModel):
+    """Represents support information for grounded text segments."""
+    segment: Optional[GroundingSegment] = None
+    groundingChunkIndices: Optional[List[int]] = None
+    confidenceScores: Optional[List[float]] = None
+class GroundingMetadata(BaseModel):
+    """Represents grounding metadata from Google Search."""
+    searchEntryPoint: Optional[SearchEntryPoint] = None
+    groundingChunks: Optional[List[GroundingChunk]] = None
+    groundingSupports: Optional[List[GroundingSupport]] = None
+    webSearchQueries: Optional[List[str]] = None
 #
 # Server events
 #
@@ -328,6 +408,7 @@ class ServerContent(BaseModel):
     turnComplete: Optional[bool] = None
     inputTranscription: Optional[BidiGenerateContentTranscription] = None
     outputTranscription: Optional[BidiGenerateContentTranscription] = None
+    groundingMetadata: Optional[GroundingMetadata] = None
 class FunctionCall(BaseModel):

dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl