PyPI - dv-pipecat-ai - Versions diffs - 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.82.dev857py3-none-any.whl → 0.0.85.dev837py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (195) hide show

{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
pipecat/adapters/base_llm_adapter.py +38 -1
pipecat/adapters/services/anthropic_adapter.py +9 -14
pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
pipecat/adapters/services/bedrock_adapter.py +236 -13
pipecat/adapters/services/gemini_adapter.py +12 -8
pipecat/adapters/services/open_ai_adapter.py +19 -7
pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
pipecat/audio/dtmf/dtmf-0.wav +0 -0
pipecat/audio/dtmf/dtmf-1.wav +0 -0
pipecat/audio/dtmf/dtmf-2.wav +0 -0
pipecat/audio/dtmf/dtmf-3.wav +0 -0
pipecat/audio/dtmf/dtmf-4.wav +0 -0
pipecat/audio/dtmf/dtmf-5.wav +0 -0
pipecat/audio/dtmf/dtmf-6.wav +0 -0
pipecat/audio/dtmf/dtmf-7.wav +0 -0
pipecat/audio/dtmf/dtmf-8.wav +0 -0
pipecat/audio/dtmf/dtmf-9.wav +0 -0
pipecat/audio/dtmf/dtmf-pound.wav +0 -0
pipecat/audio/dtmf/dtmf-star.wav +0 -0
pipecat/audio/filters/krisp_viva_filter.py +193 -0
pipecat/audio/filters/noisereduce_filter.py +15 -0
pipecat/audio/turn/base_turn_analyzer.py +9 -1
pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
pipecat/audio/vad/data/README.md +10 -0
pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
pipecat/audio/vad/silero.py +9 -3
pipecat/audio/vad/vad_analyzer.py +13 -1
pipecat/extensions/voicemail/voicemail_detector.py +5 -5
pipecat/frames/frames.py +277 -86
pipecat/observers/loggers/debug_log_observer.py +3 -3
pipecat/observers/loggers/llm_log_observer.py +7 -3
pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
pipecat/pipeline/runner.py +18 -6
pipecat/pipeline/service_switcher.py +64 -36
pipecat/pipeline/task.py +125 -79
pipecat/pipeline/tts_switcher.py +30 -0
pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
pipecat/processors/aggregators/llm_context.py +40 -2
pipecat/processors/aggregators/llm_response.py +32 -15
pipecat/processors/aggregators/llm_response_universal.py +19 -15
pipecat/processors/aggregators/user_response.py +6 -6
pipecat/processors/aggregators/vision_image_frame.py +24 -2
pipecat/processors/audio/audio_buffer_processor.py +43 -8
pipecat/processors/dtmf_aggregator.py +174 -77
pipecat/processors/filters/stt_mute_filter.py +17 -0
pipecat/processors/frame_processor.py +110 -24
pipecat/processors/frameworks/langchain.py +8 -2
pipecat/processors/frameworks/rtvi.py +210 -68
pipecat/processors/frameworks/strands_agents.py +170 -0
pipecat/processors/logger.py +2 -2
pipecat/processors/transcript_processor.py +26 -5
pipecat/processors/user_idle_processor.py +35 -11
pipecat/runner/daily.py +59 -20
pipecat/runner/run.py +395 -93
pipecat/runner/types.py +6 -4
pipecat/runner/utils.py +51 -10
pipecat/serializers/__init__.py +5 -1
pipecat/serializers/asterisk.py +16 -2
pipecat/serializers/convox.py +41 -4
pipecat/serializers/custom.py +257 -0
pipecat/serializers/exotel.py +5 -5
pipecat/serializers/livekit.py +20 -0
pipecat/serializers/plivo.py +5 -5
pipecat/serializers/protobuf.py +6 -5
pipecat/serializers/telnyx.py +2 -2
pipecat/serializers/twilio.py +43 -23
pipecat/serializers/vi.py +324 -0
pipecat/services/ai_service.py +2 -6
pipecat/services/anthropic/llm.py +2 -25
pipecat/services/assemblyai/models.py +6 -0
pipecat/services/assemblyai/stt.py +13 -5
pipecat/services/asyncai/tts.py +5 -3
pipecat/services/aws/__init__.py +1 -0
pipecat/services/aws/llm.py +147 -105
pipecat/services/aws/nova_sonic/__init__.py +0 -0
pipecat/services/aws/nova_sonic/context.py +436 -0
pipecat/services/aws/nova_sonic/frames.py +25 -0
pipecat/services/aws/nova_sonic/llm.py +1265 -0
pipecat/services/aws/stt.py +3 -3
pipecat/services/aws_nova_sonic/__init__.py +19 -1
pipecat/services/aws_nova_sonic/aws.py +11 -1151
pipecat/services/aws_nova_sonic/context.py +8 -354
pipecat/services/aws_nova_sonic/frames.py +13 -17
pipecat/services/azure/llm.py +51 -1
pipecat/services/azure/realtime/__init__.py +0 -0
pipecat/services/azure/realtime/llm.py +65 -0
pipecat/services/azure/stt.py +15 -0
pipecat/services/cartesia/stt.py +77 -70
pipecat/services/cartesia/tts.py +80 -13
pipecat/services/deepgram/__init__.py +1 -0
pipecat/services/deepgram/flux/__init__.py +0 -0
pipecat/services/deepgram/flux/stt.py +640 -0
pipecat/services/elevenlabs/__init__.py +4 -1
pipecat/services/elevenlabs/stt.py +339 -0
pipecat/services/elevenlabs/tts.py +87 -46
pipecat/services/fish/tts.py +5 -2
pipecat/services/gemini_multimodal_live/events.py +38 -524
pipecat/services/gemini_multimodal_live/file_api.py +23 -173
pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
pipecat/services/gladia/stt.py +56 -72
pipecat/services/google/__init__.py +1 -0
pipecat/services/google/gemini_live/__init__.py +3 -0
pipecat/services/google/gemini_live/file_api.py +189 -0
pipecat/services/google/gemini_live/llm.py +1582 -0
pipecat/services/google/gemini_live/llm_vertex.py +184 -0
pipecat/services/google/llm.py +15 -11
pipecat/services/google/llm_openai.py +3 -3
pipecat/services/google/llm_vertex.py +86 -16
pipecat/services/google/stt.py +4 -0
pipecat/services/google/tts.py +7 -3
pipecat/services/heygen/api.py +2 -0
pipecat/services/heygen/client.py +8 -4
pipecat/services/heygen/video.py +2 -0
pipecat/services/hume/__init__.py +5 -0
pipecat/services/hume/tts.py +220 -0
pipecat/services/inworld/tts.py +6 -6
pipecat/services/llm_service.py +15 -5
pipecat/services/lmnt/tts.py +4 -2
pipecat/services/mcp_service.py +4 -2
pipecat/services/mem0/memory.py +6 -5
pipecat/services/mistral/llm.py +29 -8
pipecat/services/moondream/vision.py +42 -16
pipecat/services/neuphonic/tts.py +5 -2
pipecat/services/openai/__init__.py +1 -0
pipecat/services/openai/base_llm.py +27 -20
pipecat/services/openai/realtime/__init__.py +0 -0
pipecat/services/openai/realtime/context.py +272 -0
pipecat/services/openai/realtime/events.py +1106 -0
pipecat/services/openai/realtime/frames.py +37 -0
pipecat/services/openai/realtime/llm.py +829 -0
pipecat/services/openai/tts.py +49 -10
pipecat/services/openai_realtime/__init__.py +27 -0
pipecat/services/openai_realtime/azure.py +21 -0
pipecat/services/openai_realtime/context.py +21 -0
pipecat/services/openai_realtime/events.py +21 -0
pipecat/services/openai_realtime/frames.py +21 -0
pipecat/services/openai_realtime_beta/azure.py +16 -0
pipecat/services/openai_realtime_beta/openai.py +17 -5
pipecat/services/piper/tts.py +7 -9
pipecat/services/playht/tts.py +34 -4
pipecat/services/rime/tts.py +12 -12
pipecat/services/riva/stt.py +3 -1
pipecat/services/salesforce/__init__.py +9 -0
pipecat/services/salesforce/llm.py +700 -0
pipecat/services/sarvam/__init__.py +7 -0
pipecat/services/sarvam/stt.py +540 -0
pipecat/services/sarvam/tts.py +97 -13
pipecat/services/simli/video.py +2 -2
pipecat/services/speechmatics/stt.py +22 -10
pipecat/services/stt_service.py +47 -0
pipecat/services/tavus/video.py +2 -2
pipecat/services/tts_service.py +75 -22
pipecat/services/vision_service.py +7 -6
pipecat/services/vistaar/llm.py +51 -9
pipecat/tests/utils.py +4 -4
pipecat/transcriptions/language.py +41 -1
pipecat/transports/base_input.py +13 -34
pipecat/transports/base_output.py +140 -104
pipecat/transports/daily/transport.py +199 -26
pipecat/transports/heygen/__init__.py +0 -0
pipecat/transports/heygen/transport.py +381 -0
pipecat/transports/livekit/transport.py +228 -63
pipecat/transports/local/audio.py +6 -1
pipecat/transports/local/tk.py +11 -2
pipecat/transports/network/fastapi_websocket.py +1 -1
pipecat/transports/smallwebrtc/connection.py +103 -19
pipecat/transports/smallwebrtc/request_handler.py +246 -0
pipecat/transports/smallwebrtc/transport.py +65 -23
pipecat/transports/tavus/transport.py +23 -12
pipecat/transports/websocket/client.py +41 -5
pipecat/transports/websocket/fastapi.py +21 -11
pipecat/transports/websocket/server.py +14 -7
pipecat/transports/whatsapp/api.py +8 -0
pipecat/transports/whatsapp/client.py +47 -0
pipecat/utils/base_object.py +54 -22
pipecat/utils/redis.py +58 -0
pipecat/utils/string.py +13 -1
pipecat/utils/tracing/service_decorators.py +21 -21
pipecat/serializers/genesys.py +0 -95
pipecat/services/google/test-google-chirp.py +0 -45
pipecat/services/openai.py +0 -698
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
/pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0

pipecat/services/google/gemini_live/llm_vertex.py ADDED Viewed

@@ -0,0 +1,184 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+"""Service for accessing Gemini Live via Google Vertex AI.
+This module provides integration with Google's Gemini Live model via
+Vertex AI, supporting both text and audio modalities with voice transcription,
+streaming responses, and tool usage.
+"""
+import json
+from typing import List, Optional, Union
+from loguru import logger
+from pipecat.adapters.schemas.tools_schema import ToolsSchema
+from pipecat.services.google.gemini_live.llm import (
+    GeminiLiveLLMService,
+    HttpOptions,
+    InputParams,
+)
+try:
+    from google.auth import default
+    from google.auth.exceptions import GoogleAuthError
+    from google.auth.transport.requests import Request
+    from google.genai import Client
+    from google.oauth2 import service_account
+except ModuleNotFoundError as e:
+    logger.error(f"Exception: {e}")
+    logger.error("In order to use Google Vertex AI, you need to `pip install pipecat-ai[google]`.")
+    raise Exception(f"Missing module: {e}")
+class GeminiLiveVertexLLMService(GeminiLiveLLMService):
+    """Provides access to Google's Gemini Live model via Vertex AI.
+    This service enables real-time conversations with Gemini, supporting both
+    text and audio modalities. It handles voice transcription, streaming audio
+    responses, and tool usage.
+    """
+    def __init__(
+        self,
+        *,
+        credentials: Optional[str] = None,
+        credentials_path: Optional[str] = None,
+        location: str,
+        project_id: str,
+        model="google/gemini-2.0-flash-live-preview-04-09",
+        voice_id: str = "Charon",
+        start_audio_paused: bool = False,
+        start_video_paused: bool = False,
+        system_instruction: Optional[str] = None,
+        tools: Optional[Union[List[dict], ToolsSchema]] = None,
+        params: Optional[InputParams] = None,
+        inference_on_context_initialization: bool = True,
+        file_api_base_url: str = "https://generativelanguage.googleapis.com/v1beta/files",
+        http_options: Optional[HttpOptions] = None,
+        **kwargs,
+    ):
+        """Initialize the service for accessing Gemini Live via Google Vertex AI.
+        Args:
+            credentials: JSON string of service account credentials.
+            credentials_path: Path to the service account JSON file.
+            location: GCP region for Vertex AI endpoint (e.g., "us-east4").
+            project_id: Google Cloud project ID.
+            model: Model identifier to use. Defaults to "models/gemini-2.0-flash-live-preview-04-09".
+            voice_id: TTS voice identifier. Defaults to "Charon".
+            start_audio_paused: Whether to start with audio input paused. Defaults to False.
+            start_video_paused: Whether to start with video input paused. Defaults to False.
+            system_instruction: System prompt for the model. Defaults to None.
+            tools: Tools/functions available to the model. Defaults to None.
+            params: Configuration parameters for the model along with Vertex AI
+                location and project ID.
+            inference_on_context_initialization: Whether to generate a response when context
+                is first set. Defaults to True.
+            file_api_base_url: Base URL for the Gemini File API. Defaults to the official endpoint.
+            http_options: HTTP options for the client.
+            **kwargs: Additional arguments passed to parent GeminiLiveLLMService.
+        """
+        # Check if user incorrectly passed api_key, which is used by parent
+        # class but not here.
+        if "api_key" in kwargs:
+            logger.error(
+                "GeminiLiveVertexLLMService does not accept 'api_key' parameter. "
+                "Use 'credentials' or 'credentials_path' instead for Vertex AI authentication."
+            )
+            raise ValueError(
+                "Invalid parameter 'api_key'. Use 'credentials' or 'credentials_path' for Vertex AI authentication."
+            )
+        # These need to be set before calling super().__init__() because
+        # super().__init__() invokes create_client(), which needs these.
+        self._credentials = self._get_credentials(credentials, credentials_path)
+        self._project_id = project_id
+        self._location = location
+        # Call parent constructor with the obtained API key
+        super().__init__(
+            # api_key is required by parent class, but actually not used with
+            # Vertex
+            api_key="dummy",
+            model=model,
+            voice_id=voice_id,
+            start_audio_paused=start_audio_paused,
+            start_video_paused=start_video_paused,
+            system_instruction=system_instruction,
+            tools=tools,
+            params=params,
+            inference_on_context_initialization=inference_on_context_initialization,
+            file_api_base_url=file_api_base_url,
+            http_options=http_options,
+            **kwargs,
+        )
+    def create_client(self):
+        """Create the Gemini client instance."""
+        self._client = Client(
+            vertexai=True,
+            credentials=self._credentials,
+            project=self._project_id,
+            location=self._location,
+        )
+    @property
+    def file_api(self):
+        """Gemini File API is not supported with Vertex AI."""
+        raise NotImplementedError(
+            "When using Vertex AI, the recommended approach is to use Google Cloud Storage for file handling. The Gemini File API is not directly supported in this context."
+        )
+    @staticmethod
+    def _get_credentials(credentials: Optional[str], credentials_path: Optional[str]) -> str:
+        """Retrieve Credentials using Google service account credentials JSON.
+        Supports multiple authentication methods:
+        1. Direct JSON credentials string
+        2. Path to service account JSON file
+        3. Default application credentials (ADC)
+        Args:
+            credentials: JSON string of service account credentials.
+            credentials_path: Path to the service account JSON file.
+        Returns:
+            OAuth token for API authentication.
+        Raises:
+            ValueError: If no valid credentials are provided or found.
+        """
+        creds: Optional[service_account.Credentials] = None
+        if credentials:
+            # Parse and load credentials from JSON string
+            creds = service_account.Credentials.from_service_account_info(
+                json.loads(credentials),
+                scopes=["https://www.googleapis.com/auth/cloud-platform"],
+            )
+        elif credentials_path:
+            # Load credentials from JSON file
+            creds = service_account.Credentials.from_service_account_file(
+                credentials_path,
+                scopes=["https://www.googleapis.com/auth/cloud-platform"],
+            )
+        else:
+            try:
+                creds, project_id = default(
+                    scopes=["https://www.googleapis.com/auth/cloud-platform"]
+                )
+            except GoogleAuthError:
+                pass
+        if not creds:
+            raise ValueError("No valid credentials provided.")
+        creds.refresh(Request())  # Ensure token is up-to-date, lifetime is 1 hour.
+        return creds

pipecat/services/google/llm.py CHANGED Viewed

@@ -35,8 +35,8 @@ from pipecat.frames.frames import (
     LLMMessagesFrame,
     LLMTextFrame,
     LLMUpdateSettingsFrame,
+    OutputImageRawFrame,
     UserImageRawFrame,
-    VisionImageRawFrame,
 )
 from pipecat.metrics.metrics import LLMTokenUsage
 from pipecat.processors.aggregators.llm_context import LLMContext
@@ -73,6 +73,9 @@ try:
         HttpOptions,
         Part,
     )
+    # Temporary hack to be able to process Nano Banana returned images.
+    genai._api_client.READ_BUFFER_SIZE = 5 * 1024 * 1024
 except ModuleNotFoundError as e:
     logger.error(f"Exception: {e}")
     logger.error("In order to use Google AI, you need to `pip install pipecat-ai[google]`.")
@@ -683,7 +686,7 @@ class GoogleLLMService(LLMService):
         self,
         *,
         api_key: str,
-        model: str = "gemini-2.0-flash",
+        model: str = "gemini-2.5-flash",
         params: Optional[InputParams] = None,
         system_instruction: Optional[str] = None,
         tools: Optional[List[Dict[str, Any]]] = None,
@@ -711,6 +714,7 @@ class GoogleLLMService(LLMService):
         self._api_key = api_key
         self._system_instruction = system_instruction
         self._http_options = http_options
         self._create_client(api_key, http_options)
         self._settings = {
             "max_tokens": params.max_tokens,
@@ -789,6 +793,9 @@ class GoogleLLMService(LLMService):
             # and can be configured to turn it off.
             if not self._model_name.startswith("gemini-2.5-flash"):
                 return
+            # If we have an image model, we don't use a budget either.
+            if "image" in self._model_name:
+                return
             # If thinking_config is already set, don't override it.
             if "thinking_config" in generation_params:
                 return
@@ -928,6 +935,12 @@ class GoogleLLMService(LLMService):
                                         arguments=function_call.args or {},
                                     )
                                 )
+                            elif part.inline_data and part.inline_data.data:
+                                image = Image.open(io.BytesIO(part.inline_data.data))
+                                frame = OutputImageRawFrame(
+                                    image=image.tobytes(), size=image.size, format="RGB"
+                                )
+                                await self.push_frame(frame)
                     if (
                         candidate.grounding_metadata
@@ -1013,15 +1026,6 @@ class GoogleLLMService(LLMService):
             # NOTE: LLMMessagesFrame is deprecated, so we don't support the newer universal
             # LLMContext with it
             context = GoogleLLMContext(frame.messages)
-        elif isinstance(frame, VisionImageRawFrame):
-            # This is only useful in very simple pipelines because it creates
-            # a new context. Generally we want a context manager to catch
-            # UserImageRawFrames coming through the pipeline and add them
-            # to the context.
-            context = GoogleLLMContext()
-            context.add_image_frame_message(
-                format=frame.format, size=frame.size, image=frame.image, text=frame.text
-            )
         elif isinstance(frame, LLMUpdateSettingsFrame):
             await self._update_settings(frame.settings)
         else:

pipecat/services/google/llm_openai.py CHANGED Viewed

@@ -96,9 +96,9 @@ class GoogleLLMOpenAIBetaService(OpenAILLMService):
         async for chunk in chunk_stream:
             if chunk.usage:
                 tokens = LLMTokenUsage(
-                    prompt_tokens=chunk.usage.prompt_tokens,
-                    completion_tokens=chunk.usage.completion_tokens,
-                    total_tokens=chunk.usage.total_tokens,
+                    prompt_tokens=chunk.usage.prompt_tokens or 0,
+                    completion_tokens=chunk.usage.completion_tokens or 0,
+                    total_tokens=chunk.usage.total_tokens or 0,
                 )
                 await self.start_llm_usage_metrics(tokens)

pipecat/services/google/llm_vertex.py CHANGED Viewed

@@ -53,12 +53,44 @@ class GoogleVertexLLMService(OpenAILLMService):
         Parameters:
             location: GCP region for Vertex AI endpoint (e.g., "us-east4").
+                .. deprecated:: 0.0.90
+                    Use `location` as a direct argument to
+                    `GoogleVertexLLMService.__init__()` instead.
             project_id: Google Cloud project ID.
+                .. deprecated:: 0.0.90
+                    Use `project_id` as a direct argument to
+                    `GoogleVertexLLMService.__init__()` instead.
         """
         # https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations
-        location: str = "us-east4"
-        project_id: str
+        location: Optional[str] = None
+        project_id: Optional[str] = None
+        def __init__(self, **kwargs):
+            """Initializes the InputParams."""
+            import warnings
+            with warnings.catch_warnings():
+                warnings.simplefilter("always")
+                if "location" in kwargs and kwargs["location"] is not None:
+                    warnings.warn(
+                        "GoogleVertexLLMService.InputParams.location is deprecated. "
+                        "Please provide 'location' as a direct argument to GoogleVertexLLMService.__init__() instead.",
+                        DeprecationWarning,
+                        stacklevel=2,
+                    )
+                if "project_id" in kwargs and kwargs["project_id"] is not None:
+                    warnings.warn(
+                        "GoogleVertexLLMService.InputParams.project_id is deprecated. "
+                        "Please provide 'project_id' as a direct argument to GoogleVertexLLMService.__init__() instead.",
+                        DeprecationWarning,
+                        stacklevel=2,
+                    )
+            super().__init__(**kwargs)
     def __init__(
         self,
@@ -66,7 +98,8 @@ class GoogleVertexLLMService(OpenAILLMService):
         credentials: Optional[str] = None,
         credentials_path: Optional[str] = None,
         model: str = "google/gemini-2.0-flash-001",
-        params: Optional[InputParams] = None,
+        location: Optional[str] = None,
+        project_id: Optional[str] = None,
         **kwargs,
     ):
         """Initializes the VertexLLMService.
@@ -75,25 +108,60 @@ class GoogleVertexLLMService(OpenAILLMService):
             credentials: JSON string of service account credentials.
             credentials_path: Path to the service account JSON file.
             model: Model identifier (e.g., "google/gemini-2.0-flash-001").
-            params: Vertex AI input parameters including location and project.
+            location: GCP region for Vertex AI endpoint (e.g., "us-east4").
+            project_id: Google Cloud project ID.
             **kwargs: Additional arguments passed to OpenAILLMService.
         """
-        params = params or OpenAILLMService.InputParams()
-        base_url = self._get_base_url(params)
+        # Handle deprecated InputParams fields
+        if "params" in kwargs and isinstance(kwargs["params"], GoogleVertexLLMService.InputParams):
+            params = kwargs["params"]
+            # Extract location and project_id from params if not provided
+            # directly, for backward compatibility
+            if project_id is None:
+                project_id = params.project_id
+            if location is None:
+                location = params.location
+            # Convert to base InputParams
+            params = OpenAILLMService.InputParams(
+                **params.model_dump(exclude={"location", "project_id"}, exclude_unset=True)
+            )
+            kwargs["params"] = params
+        # Validate project_id and location parameters
+        # NOTE: once we remove Vertex-spcific InputParams class, we can update
+        #       __init__() signature as follows:
+        #       - location: str = "us-east4",
+        #       - project_id: str,
+        #       But for now, we need them as-is to maintain proper backward
+        #       compatibility.
+        if project_id is None:
+            raise ValueError("project_id is required")
+        if location is None:
+            # If location is not provided, default to "us-east4".
+            # Note: this is legacy behavior; ideally location would be
+            # required.
+            logger.warning("location is not provided. Defaulting to 'us-east4'.")
+            location = "us-east4"  # Default location if not provided
+        base_url = self._get_base_url(location, project_id)
         self._api_key = self._get_api_token(credentials, credentials_path)
         super().__init__(
-            api_key=self._api_key, base_url=base_url, model=model, params=params, **kwargs
+            api_key=self._api_key,
+            base_url=base_url,
+            model=model,
+            **kwargs,
         )
     @staticmethod
-    def _get_base_url(params: InputParams) -> str:
-        """Constructs the base URL for Vertex AI API."""
-        hostname_prefix = "" if params.location == "global" else f"{params.location}-"
-        return (
-            f"https://{hostname_prefix}aiplatform.googleapis.com/v1/"
-            f"projects/{params.project_id}/locations/{params.location}/endpoints/openapi"
-        )
+    def _get_base_url(location: str, project_id: str) -> str:
+        """Construct the base URL for Vertex AI API."""
+        # Determine the correct API host based on location
+        if location == "global":
+            api_host = "aiplatform.googleapis.com"
+        else:
+            api_host = f"{location}-aiplatform.googleapis.com"
+        return f"https://{api_host}/v1/projects/{project_id}/locations/{location}/endpoints/openapi"
     @staticmethod
     def _get_api_token(credentials: Optional[str], credentials_path: Optional[str]) -> str:
@@ -119,12 +187,14 @@ class GoogleVertexLLMService(OpenAILLMService):
         if credentials:
             # Parse and load credentials from JSON string
             creds = service_account.Credentials.from_service_account_info(
-                json.loads(credentials), scopes=["https://www.googleapis.com/auth/cloud-platform"]
+                json.loads(credentials),
+                scopes=["https://www.googleapis.com/auth/cloud-platform"],
             )
         elif credentials_path:
             # Load credentials from JSON file
             creds = service_account.Credentials.from_service_account_file(
-                credentials_path, scopes=["https://www.googleapis.com/auth/cloud-platform"]
+                credentials_path,
+                scopes=["https://www.googleapis.com/auth/cloud-platform"],
             )
         else:
             try:

pipecat/services/google/stt.py CHANGED Viewed

@@ -730,6 +730,8 @@ class GoogleSTTService(STTService):
         self._request_queue = asyncio.Queue()
         self._streaming_task = self.create_task(self._stream_audio())
+        await self._call_event_handler("on_connected")
     async def _disconnect(self):
         """Clean up streaming recognition resources."""
         if self._streaming_task:
@@ -737,6 +739,8 @@ class GoogleSTTService(STTService):
             await self.cancel_task(self._streaming_task)
             self._streaming_task = None
+        await self._call_event_handler("on_disconnected")
     async def _request_generator(self):
         """Generates requests for the streaming recognize method."""
         recognizer_path = f"projects/{self._project_id}/locations/{self._location}/recognizers/_"

pipecat/services/google/tts.py CHANGED Viewed

@@ -500,10 +500,11 @@ class GoogleTTSService(TTSService):
         Parameters:
             language: Language for synthesis. Defaults to English.
+            speaking_rate: The speaking rate, in the range [0.25, 4.0].
         """
         language: Optional[Language] = Language.EN
-        rate: Optional[float] = 1.0
+        speaking_rate: Optional[float] = None
     def __init__(
         self,
@@ -511,6 +512,7 @@ class GoogleTTSService(TTSService):
         credentials: Optional[str] = None,
         credentials_path: Optional[str] = None,
         voice_id: str = "en-US-Chirp3-HD-Charon",
+        voice_cloning_key: Optional[str] = None,
         sample_rate: Optional[int] = None,
         params: InputParams = InputParams(),
         **kwargs,
@@ -521,6 +523,7 @@ class GoogleTTSService(TTSService):
             credentials: JSON string containing Google Cloud service account credentials.
             credentials_path: Path to Google Cloud service account JSON file.
             voice_id: Google TTS voice identifier (e.g., "en-US-Chirp3-HD-Charon").
+            voice_cloning_key: The voice cloning key for Chirp 3 custom voices.
             sample_rate: Audio sample rate in Hz. If None, uses default.
             params: Language configuration parameters.
             **kwargs: Additional arguments passed to parent TTSService.
@@ -536,7 +539,7 @@ class GoogleTTSService(TTSService):
             "language": self.language_to_service_language(params.language)
             if params.language
             else "en-US",
-            "rate": params.rate,
+            "speaking_rate": params.speaking_rate,
         }
         self._voice_clone_params = None
         if self._voice_config.get("is_clone", False):
@@ -550,6 +553,7 @@ class GoogleTTSService(TTSService):
                 language_code=self._settings["language"], voice_clone=self._voice_clone_params
             )
         self.set_voice(voice_id)
+        self._voice_cloning_key = voice_cloning_key
         self._client: texttospeech_v1.TextToSpeechAsyncClient = self._create_client(
             credentials, credentials_path
         )
@@ -628,7 +632,7 @@ class GoogleTTSService(TTSService):
                 streaming_audio_config=texttospeech_v1.StreamingAudioConfig(
                     audio_encoding=texttospeech_v1.AudioEncoding.PCM,
                     sample_rate_hertz=self.sample_rate,
-                    speaking_rate=self._settings["rate"],
+                    speaking_rate=self._settings["speaking_rate"],
                 ),
             )
             config_request = texttospeech_v1.StreamingSynthesizeRequest(

pipecat/services/heygen/api.py CHANGED Viewed

@@ -108,12 +108,14 @@ class HeyGenSession(BaseModel):
     Parameters:
         session_id (str): Unique identifier for the streaming session.
         access_token (str): Token for accessing the session securely.
+        livekit_agent_token (str): Token for HeyGen’s audio agents(Pipecat).
         realtime_endpoint (str): Real-time communication endpoint URL.
         url (str): Direct URL for the session.
     """
     session_id: str
     access_token: str
+    livekit_agent_token: str
     realtime_endpoint: str
     url: str

pipecat/services/heygen/client.py CHANGED Viewed

@@ -393,7 +393,9 @@ class HeyGenClient:
             participant_id: Identifier of the participant to capture audio from
             callback: Async function to handle received audio frames
         """
-        logger.debug(f"capture_participant_audio: {participant_id}")
+        logger.debug(
+            f"capture_participant_audio: {participant_id}, sample_rate: {self._in_sample_rate}"
+        )
         self._audio_frame_callback = callback
         if self._audio_task is not None:
             logger.warning(
@@ -407,7 +409,9 @@ class HeyGenClient:
             for track_pub in participant.track_publications.values():
                 if track_pub.kind == rtc.TrackKind.KIND_AUDIO and track_pub.track is not None:
                     logger.debug(f"Starting audio capture for existing track: {track_pub.sid}")
-                    audio_stream = rtc.AudioStream(track_pub.track)
+                    audio_stream = rtc.AudioStream(
+                        track=track_pub.track, sample_rate=self._in_sample_rate
+                    )
                     self._audio_task = self._task_manager.create_task(
                         self._process_audio_frames(audio_stream), name="HeyGenClient_Receive_Audio"
                     )
@@ -536,7 +540,7 @@ class HeyGenClient:
                     and self._audio_task is None
                 ):
                     logger.debug(f"Creating audio stream processor for track: {publication.sid}")
-                    audio_stream = rtc.AudioStream(track)
+                    audio_stream = rtc.AudioStream(track=track, sample_rate=self._in_sample_rate)
                     self._audio_task = self._task_manager.create_task(
                         self._process_audio_frames(audio_stream), name="HeyGenClient_Receive_Audio"
                     )
@@ -559,7 +563,7 @@ class HeyGenClient:
                 )
             await self._livekit_room.connect(
-                self._heyGen_session.url, self._heyGen_session.access_token
+                self._heyGen_session.url, self._heyGen_session.livekit_agent_token
             )
             logger.debug(f"Successfully connected to LiveKit room: {self._livekit_room.name}")
             logger.debug(f"Local participant SID: {self._livekit_room.local_participant.sid}")

pipecat/services/heygen/video.py CHANGED Viewed

@@ -110,6 +110,7 @@ class HeyGenVideoService(AIService):
             api_key=self._api_key,
             session=self._session,
             params=TransportParams(
+                audio_in_sample_rate=48000,
                 audio_in_enabled=True,
                 video_in_enabled=True,
                 audio_out_enabled=True,
@@ -240,6 +241,7 @@ class HeyGenVideoService(AIService):
             # As soon as we receive actual audio, the base output transport will create a
             # BotStartedSpeakingFrame, which we can use as a signal for the TTFB metrics.
             await self.stop_ttfb_metrics()
+            await self.push_frame(frame, direction)
         else:
             await self.push_frame(frame, direction)

pipecat/services/hume/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#

dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.82.dev857py3-none-any.whl → 0.0.85.dev837py3-none-any.whl