PyPI - dv-pipecat-ai - Versions diffs - 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.82.dev857py3-none-any.whl → 0.0.85.dev837py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (195) hide show

{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
pipecat/adapters/base_llm_adapter.py +38 -1
pipecat/adapters/services/anthropic_adapter.py +9 -14
pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
pipecat/adapters/services/bedrock_adapter.py +236 -13
pipecat/adapters/services/gemini_adapter.py +12 -8
pipecat/adapters/services/open_ai_adapter.py +19 -7
pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
pipecat/audio/dtmf/dtmf-0.wav +0 -0
pipecat/audio/dtmf/dtmf-1.wav +0 -0
pipecat/audio/dtmf/dtmf-2.wav +0 -0
pipecat/audio/dtmf/dtmf-3.wav +0 -0
pipecat/audio/dtmf/dtmf-4.wav +0 -0
pipecat/audio/dtmf/dtmf-5.wav +0 -0
pipecat/audio/dtmf/dtmf-6.wav +0 -0
pipecat/audio/dtmf/dtmf-7.wav +0 -0
pipecat/audio/dtmf/dtmf-8.wav +0 -0
pipecat/audio/dtmf/dtmf-9.wav +0 -0
pipecat/audio/dtmf/dtmf-pound.wav +0 -0
pipecat/audio/dtmf/dtmf-star.wav +0 -0
pipecat/audio/filters/krisp_viva_filter.py +193 -0
pipecat/audio/filters/noisereduce_filter.py +15 -0
pipecat/audio/turn/base_turn_analyzer.py +9 -1
pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
pipecat/audio/vad/data/README.md +10 -0
pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
pipecat/audio/vad/silero.py +9 -3
pipecat/audio/vad/vad_analyzer.py +13 -1
pipecat/extensions/voicemail/voicemail_detector.py +5 -5
pipecat/frames/frames.py +277 -86
pipecat/observers/loggers/debug_log_observer.py +3 -3
pipecat/observers/loggers/llm_log_observer.py +7 -3
pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
pipecat/pipeline/runner.py +18 -6
pipecat/pipeline/service_switcher.py +64 -36
pipecat/pipeline/task.py +125 -79
pipecat/pipeline/tts_switcher.py +30 -0
pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
pipecat/processors/aggregators/llm_context.py +40 -2
pipecat/processors/aggregators/llm_response.py +32 -15
pipecat/processors/aggregators/llm_response_universal.py +19 -15
pipecat/processors/aggregators/user_response.py +6 -6
pipecat/processors/aggregators/vision_image_frame.py +24 -2
pipecat/processors/audio/audio_buffer_processor.py +43 -8
pipecat/processors/dtmf_aggregator.py +174 -77
pipecat/processors/filters/stt_mute_filter.py +17 -0
pipecat/processors/frame_processor.py +110 -24
pipecat/processors/frameworks/langchain.py +8 -2
pipecat/processors/frameworks/rtvi.py +210 -68
pipecat/processors/frameworks/strands_agents.py +170 -0
pipecat/processors/logger.py +2 -2
pipecat/processors/transcript_processor.py +26 -5
pipecat/processors/user_idle_processor.py +35 -11
pipecat/runner/daily.py +59 -20
pipecat/runner/run.py +395 -93
pipecat/runner/types.py +6 -4
pipecat/runner/utils.py +51 -10
pipecat/serializers/__init__.py +5 -1
pipecat/serializers/asterisk.py +16 -2
pipecat/serializers/convox.py +41 -4
pipecat/serializers/custom.py +257 -0
pipecat/serializers/exotel.py +5 -5
pipecat/serializers/livekit.py +20 -0
pipecat/serializers/plivo.py +5 -5
pipecat/serializers/protobuf.py +6 -5
pipecat/serializers/telnyx.py +2 -2
pipecat/serializers/twilio.py +43 -23
pipecat/serializers/vi.py +324 -0
pipecat/services/ai_service.py +2 -6
pipecat/services/anthropic/llm.py +2 -25
pipecat/services/assemblyai/models.py +6 -0
pipecat/services/assemblyai/stt.py +13 -5
pipecat/services/asyncai/tts.py +5 -3
pipecat/services/aws/__init__.py +1 -0
pipecat/services/aws/llm.py +147 -105
pipecat/services/aws/nova_sonic/__init__.py +0 -0
pipecat/services/aws/nova_sonic/context.py +436 -0
pipecat/services/aws/nova_sonic/frames.py +25 -0
pipecat/services/aws/nova_sonic/llm.py +1265 -0
pipecat/services/aws/stt.py +3 -3
pipecat/services/aws_nova_sonic/__init__.py +19 -1
pipecat/services/aws_nova_sonic/aws.py +11 -1151
pipecat/services/aws_nova_sonic/context.py +8 -354
pipecat/services/aws_nova_sonic/frames.py +13 -17
pipecat/services/azure/llm.py +51 -1
pipecat/services/azure/realtime/__init__.py +0 -0
pipecat/services/azure/realtime/llm.py +65 -0
pipecat/services/azure/stt.py +15 -0
pipecat/services/cartesia/stt.py +77 -70
pipecat/services/cartesia/tts.py +80 -13
pipecat/services/deepgram/__init__.py +1 -0
pipecat/services/deepgram/flux/__init__.py +0 -0
pipecat/services/deepgram/flux/stt.py +640 -0
pipecat/services/elevenlabs/__init__.py +4 -1
pipecat/services/elevenlabs/stt.py +339 -0
pipecat/services/elevenlabs/tts.py +87 -46
pipecat/services/fish/tts.py +5 -2
pipecat/services/gemini_multimodal_live/events.py +38 -524
pipecat/services/gemini_multimodal_live/file_api.py +23 -173
pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
pipecat/services/gladia/stt.py +56 -72
pipecat/services/google/__init__.py +1 -0
pipecat/services/google/gemini_live/__init__.py +3 -0
pipecat/services/google/gemini_live/file_api.py +189 -0
pipecat/services/google/gemini_live/llm.py +1582 -0
pipecat/services/google/gemini_live/llm_vertex.py +184 -0
pipecat/services/google/llm.py +15 -11
pipecat/services/google/llm_openai.py +3 -3
pipecat/services/google/llm_vertex.py +86 -16
pipecat/services/google/stt.py +4 -0
pipecat/services/google/tts.py +7 -3
pipecat/services/heygen/api.py +2 -0
pipecat/services/heygen/client.py +8 -4
pipecat/services/heygen/video.py +2 -0
pipecat/services/hume/__init__.py +5 -0
pipecat/services/hume/tts.py +220 -0
pipecat/services/inworld/tts.py +6 -6
pipecat/services/llm_service.py +15 -5
pipecat/services/lmnt/tts.py +4 -2
pipecat/services/mcp_service.py +4 -2
pipecat/services/mem0/memory.py +6 -5
pipecat/services/mistral/llm.py +29 -8
pipecat/services/moondream/vision.py +42 -16
pipecat/services/neuphonic/tts.py +5 -2
pipecat/services/openai/__init__.py +1 -0
pipecat/services/openai/base_llm.py +27 -20
pipecat/services/openai/realtime/__init__.py +0 -0
pipecat/services/openai/realtime/context.py +272 -0
pipecat/services/openai/realtime/events.py +1106 -0
pipecat/services/openai/realtime/frames.py +37 -0
pipecat/services/openai/realtime/llm.py +829 -0
pipecat/services/openai/tts.py +49 -10
pipecat/services/openai_realtime/__init__.py +27 -0
pipecat/services/openai_realtime/azure.py +21 -0
pipecat/services/openai_realtime/context.py +21 -0
pipecat/services/openai_realtime/events.py +21 -0
pipecat/services/openai_realtime/frames.py +21 -0
pipecat/services/openai_realtime_beta/azure.py +16 -0
pipecat/services/openai_realtime_beta/openai.py +17 -5
pipecat/services/piper/tts.py +7 -9
pipecat/services/playht/tts.py +34 -4
pipecat/services/rime/tts.py +12 -12
pipecat/services/riva/stt.py +3 -1
pipecat/services/salesforce/__init__.py +9 -0
pipecat/services/salesforce/llm.py +700 -0
pipecat/services/sarvam/__init__.py +7 -0
pipecat/services/sarvam/stt.py +540 -0
pipecat/services/sarvam/tts.py +97 -13
pipecat/services/simli/video.py +2 -2
pipecat/services/speechmatics/stt.py +22 -10
pipecat/services/stt_service.py +47 -0
pipecat/services/tavus/video.py +2 -2
pipecat/services/tts_service.py +75 -22
pipecat/services/vision_service.py +7 -6
pipecat/services/vistaar/llm.py +51 -9
pipecat/tests/utils.py +4 -4
pipecat/transcriptions/language.py +41 -1
pipecat/transports/base_input.py +13 -34
pipecat/transports/base_output.py +140 -104
pipecat/transports/daily/transport.py +199 -26
pipecat/transports/heygen/__init__.py +0 -0
pipecat/transports/heygen/transport.py +381 -0
pipecat/transports/livekit/transport.py +228 -63
pipecat/transports/local/audio.py +6 -1
pipecat/transports/local/tk.py +11 -2
pipecat/transports/network/fastapi_websocket.py +1 -1
pipecat/transports/smallwebrtc/connection.py +103 -19
pipecat/transports/smallwebrtc/request_handler.py +246 -0
pipecat/transports/smallwebrtc/transport.py +65 -23
pipecat/transports/tavus/transport.py +23 -12
pipecat/transports/websocket/client.py +41 -5
pipecat/transports/websocket/fastapi.py +21 -11
pipecat/transports/websocket/server.py +14 -7
pipecat/transports/whatsapp/api.py +8 -0
pipecat/transports/whatsapp/client.py +47 -0
pipecat/utils/base_object.py +54 -22
pipecat/utils/redis.py +58 -0
pipecat/utils/string.py +13 -1
pipecat/utils/tracing/service_decorators.py +21 -21
pipecat/serializers/genesys.py +0 -95
pipecat/services/google/test-google-chirp.py +0 -45
pipecat/services/openai.py +0 -698
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
/pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0

pipecat/services/gladia/stt.py CHANGED Viewed

@@ -14,7 +14,7 @@ import asyncio
 import base64
 import json
 import warnings
-from typing import Any, AsyncGenerator, Dict, List, Literal, Optional
+from typing import Any, AsyncGenerator, Dict, Literal, Optional
 import aiohttp
 from loguru import logger
@@ -29,13 +29,7 @@ from pipecat.frames.frames import (
     TranscriptionFrame,
     TranslationFrame,
 )
-# Import nested config models
-from pipecat.services.gladia.config import (
-    CustomVocabularyConfig,
-    GladiaInputParams,
-    RealtimeProcessingConfig,
-)
+from pipecat.services.gladia.config import GladiaInputParams
 from pipecat.services.stt_service import STTService
 from pipecat.transcriptions.language import Language
 from pipecat.utils.time import time_now_iso8601
@@ -180,8 +174,6 @@ class _InputParamsDescriptor:
     """Descriptor for backward compatibility with deprecation warning."""
     def __get__(self, obj, objtype=None):
-        import warnings
         with warnings.catch_warnings():
             warnings.simplefilter("always")
             warnings.warn(
@@ -215,7 +207,7 @@ class GladiaSTTService(STTService):
         api_key: str,
         region: Literal["us-west", "eu-west"] | None = None,
         url: str = "https://api.gladia.io/v2/live",
-        confidence: float = 0.5,
+        confidence: Optional[float] = None,
         sample_rate: Optional[int] = None,
         model: str = "solaria-1",
         params: Optional[GladiaInputParams] = None,
@@ -231,6 +223,11 @@ class GladiaSTTService(STTService):
             region: Region used to process audio. eu-west or us-west. Defaults to eu-west.
             url: Gladia API URL. Defaults to "https://api.gladia.io/v2/live".
             confidence: Minimum confidence threshold for transcriptions (0.0-1.0).
+                .. deprecated:: 0.0.86
+                    The 'confidence' parameter is deprecated and will be removed in a future version.
+                    No confidence threshold is applied.
             sample_rate: Audio sample rate in Hz. If None, uses service default.
             model: Model to use for transcription. Defaults to "solaria-1".
             params: Additional configuration parameters for Gladia service.
@@ -240,11 +237,9 @@ class GladiaSTTService(STTService):
             **kwargs: Additional arguments passed to the STTService parent class.
         """
         super().__init__(sample_rate=sample_rate, **kwargs)
-        vocab: Optional[List[str]] = kwargs.pop("vocab", None)  # Get vocab from kwargs
         params = params or GladiaInputParams()
-        # Warn about deprecated language parameter if it's used
         if params.language is not None:
             with warnings.catch_warnings():
                 warnings.simplefilter("always")
@@ -255,29 +250,21 @@ class GladiaSTTService(STTService):
                     stacklevel=2,
                 )
+        if confidence:
+            with warnings.catch_warnings():
+                warnings.simplefilter("always")
+                warnings.warn(
+                    "The 'confidence' parameter is deprecated and will be removed in a future version. "
+                    "No confidence threshold is applied.",
+                    DeprecationWarning,
+                    stacklevel=2,
+                )
         self._api_key = api_key
         self._region = region
         self._url = url
         self.set_model_name(model)
-        self._confidence = confidence
-        self._params = params  # This is GladiaInputParams instance
-        # TODO: To be tested.
-        if vocab:
-            # Filter out any non-string or empty items
-            valid_vocab = [item for item in vocab if isinstance(item, str) and item.strip()]
-            if valid_vocab:
-                if self._params.realtime_processing is None:
-                    self._params.realtime_processing = RealtimeProcessingConfig()
-                if self._params.realtime_processing.custom_vocabulary_config is None:
-                    self._params.realtime_processing.custom_vocabulary_config = (
-                        CustomVocabularyConfig()
-                    )
-                self._params.realtime_processing.custom_vocabulary_config.vocabulary = valid_vocab
-                self._params.realtime_processing.custom_vocabulary = True  # Explicitly enable
-                self.logger.info(f"Set Gladia custom vocabulary: {valid_vocab}")
+        self._params = params
         self._websocket = None
         self._receive_task = None
         self._keepalive_task = None
@@ -434,14 +421,14 @@ class GladiaSTTService(STTService):
                 trim_size = len(self._audio_buffer) - self._max_buffer_size
                 self._audio_buffer = self._audio_buffer[trim_size:]
                 self._bytes_sent = max(0, self._bytes_sent - trim_size)
-                logger.warning(f"Audio buffer exceeded max size, trimmed {trim_size} bytes")
+                self.logger.warning(f"Audio buffer exceeded max size, trimmed {trim_size} bytes")
         # Send audio if connected
         if self._connection_active and self._websocket and self._websocket.state is State.OPEN:
             try:
                 await self._send_audio(audio)
             except websockets.exceptions.ConnectionClosed as e:
-                logger.warning(f"Websocket closed while sending audio chunk: {e}")
+                self.logger.warning(f"Websocket closed while sending audio chunk: {e}")
                 self._connection_active = False
         yield None
@@ -456,14 +443,14 @@ class GladiaSTTService(STTService):
                     response = await self._setup_gladia(settings)
                     self._session_url = response["url"]
                     self._reconnection_attempts = 0
-                    logger.info(f"Session URL : {self._session_url}")
+                    self.logger.info(f"Session URL : {self._session_url}")
                 # Connect with automatic reconnection
                 async with websocket_connect(self._session_url) as websocket:
                     try:
                         self._websocket = websocket
                         self._connection_active = True
-                        logger.debug(f"{self} Connected to Gladia WebSocket")
+                        self.logger.debug(f"{self} Connected to Gladia WebSocket")
                         # Send buffered audio if any
                         await self._send_buffered_audio()
@@ -476,7 +463,7 @@ class GladiaSTTService(STTService):
                         await asyncio.gather(self._receive_task, self._keepalive_task)
                     except websockets.exceptions.ConnectionClosed as e:
-                        logger.warning(f"WebSocket connection closed: {e}")
+                        self.logger.warning(f"WebSocket connection closed: {e}")
                         self._connection_active = False
                         # Clean up tasks
@@ -490,7 +477,7 @@ class GladiaSTTService(STTService):
                             break
             except Exception as e:
-                logger.error(f"Error in connection handler: {e}")
+                self.logger.error(f"Error in connection handler: {e}")
                 self._connection_active = False
                 if not self._should_reconnect:
@@ -556,7 +543,7 @@ class GladiaSTTService(STTService):
         """Send any buffered audio after reconnection."""
         async with self._buffer_lock:
             if self._audio_buffer:
-                logger.debug(f"{self} Sending {len(self._audio_buffer)} bytes of buffered audio")
+                self.logger.debug(f"{self} Sending {len(self._audio_buffer)} bytes of buffered audio")
                 await self._send_audio(bytes(self._audio_buffer))
     async def _send_stop_recording(self):
@@ -575,12 +562,12 @@ class GladiaSTTService(STTService):
                     empty_audio = b""
                     await self._send_audio(empty_audio)
                 else:
-                    logger.debug("Websocket closed, stopping keepalive")
+                    self.logger.debug("Websocket closed, stopping keepalive")
                     break
         except websockets.exceptions.ConnectionClosed:
-            logger.debug("Connection closed during keepalive")
+            self.logger.debug("Connection closed during keepalive")
         except Exception as e:
-            logger.error(f"Error in Gladia keepalive task: {e}")
+            self.logger.error(f"Error in Gladia keepalive task: {e}")
     async def _receive_task_handler(self):
         try:
@@ -600,43 +587,40 @@ class GladiaSTTService(STTService):
                 elif content["type"] == "transcript":
                     utterance = content["data"]["utterance"]
-                    confidence = utterance.get("confidence", 0)
                     language = utterance["language"]
                     transcript = utterance["text"]
                     is_final = content["data"]["is_final"]
-                    if confidence >= self._confidence:
-                        if is_final:
-                            await self.push_frame(
-                                TranscriptionFrame(
-                                    transcript,
-                                    self._user_id,
-                                    time_now_iso8601(),
-                                    language,
-                                    result=content,
-                                )
-                            )
-                            await self._handle_transcription(
-                                transcript=transcript,
-                                is_final=is_final,
-                                language=language,
+                    if is_final:
+                        await self.push_frame(
+                            TranscriptionFrame(
+                                transcript,
+                                self._user_id,
+                                time_now_iso8601(),
+                                language,
+                                result=content,
                             )
-                        else:
-                            await self.push_frame(
-                                InterimTranscriptionFrame(
-                                    transcript,
-                                    self._user_id,
-                                    time_now_iso8601(),
-                                    language,
-                                    result=content,
-                                )
+                        )
+                        await self._handle_transcription(
+                            transcript=transcript,
+                            is_final=is_final,
+                            language=language,
+                        )
+                    else:
+                        await self.push_frame(
+                            InterimTranscriptionFrame(
+                                transcript,
+                                self._user_id,
+                                time_now_iso8601(),
+                                language,
+                                result=content,
                             )
+                        )
                 elif content["type"] == "translation":
                     translated_utterance = content["data"]["translated_utterance"]
                     original_language = content["data"]["original_language"]
                     translated_language = translated_utterance["language"]
-                    confidence = translated_utterance.get("confidence", 0)
                     translation = translated_utterance["text"]
-                    if translated_language != original_language and confidence >= self._confidence:
+                    if translated_language != original_language:
                         await self.push_frame(
                             TranslationFrame(
                                 translation, "", time_now_iso8601(), translated_language
@@ -646,7 +630,7 @@ class GladiaSTTService(STTService):
             # Expected when closing the connection
             pass
         except Exception as e:
-            logger.error(f"Error in Gladia WebSocket handler: {e}")
+            self.logger.error(f"Error in Gladia WebSocket handler: {e}")
     async def _maybe_reconnect(self) -> bool:
         """Handle exponential backoff reconnection logic."""
@@ -654,11 +638,11 @@ class GladiaSTTService(STTService):
             return False
         self._reconnection_attempts += 1
         if self._reconnection_attempts > self._max_reconnection_attempts:
-            logger.error(f"Max reconnection attempts ({self._max_reconnection_attempts}) reached")
+            self.logger.error(f"Max reconnection attempts ({self._max_reconnection_attempts}) reached")
             self._should_reconnect = False
             return False
         delay = self._reconnection_delay * (2 ** (self._reconnection_attempts - 1))
-        logger.debug(
+        self.logger.debug(
             f"{self} Reconnecting in {delay} seconds (attempt {self._reconnection_attempts}/{self._max_reconnection_attempts})"
         )
         await asyncio.sleep(delay)

pipecat/services/google/__init__.py CHANGED Viewed

@@ -9,6 +9,7 @@ import sys
 from pipecat.services import DeprecatedModuleProxy
 from .frames import *
+from .gemini_live import *
 from .image import *
 from .llm import *
 from .llm_openai import *

pipecat/services/google/gemini_live/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .file_api import GeminiFileAPI
+from .llm import GeminiLiveLLMService
+from .llm_vertex import GeminiLiveVertexLLMService

pipecat/services/google/gemini_live/file_api.py ADDED Viewed

@@ -0,0 +1,189 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+"""Gemini File API client for uploading and managing files.
+This module provides a client for Google's Gemini File API, enabling file
+uploads, metadata retrieval, listing, and deletion. Files uploaded through
+this API can be referenced in Gemini generative model calls.
+"""
+import mimetypes
+from typing import Any, Dict, Optional
+import aiohttp
+from loguru import logger
+class GeminiFileAPI:
+    """Client for the Gemini File API.
+    This class provides methods for uploading, fetching, listing, and deleting files
+    through Google's Gemini File API.
+    Files uploaded through this API remain available for 48 hours and can be referenced
+    in calls to the Gemini generative models. Maximum file size is 2GB, with total
+    project storage limited to 20GB.
+    """
+    def __init__(
+        self, api_key: str, base_url: str = "https://generativelanguage.googleapis.com/v1beta/files"
+    ):
+        """Initialize the Gemini File API client.
+        Args:
+            api_key: Google AI API key
+            base_url: Base URL for the Gemini File API (default is the v1beta endpoint)
+        """
+        self._api_key = api_key
+        self._base_url = base_url
+        # Upload URL uses the /upload/ path
+        self.upload_base_url = "https://generativelanguage.googleapis.com/upload/v1beta/files"
+    async def upload_file(
+        self, file_path: str, display_name: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """Upload a file to the Gemini File API using the correct resumable upload protocol.
+        Args:
+            file_path: Path to the file to upload
+            display_name: Optional display name for the file
+        Returns:
+            File metadata including uri, name, and display_name
+        """
+        logger.info(f"Uploading file: {file_path}")
+        async with aiohttp.ClientSession() as session:
+            # Determine the file's MIME type
+            mime_type, _ = mimetypes.guess_type(file_path)
+            if not mime_type:
+                mime_type = "application/octet-stream"
+            # Read the file
+            with open(file_path, "rb") as f:
+                file_data = f.read()
+            # Create the metadata payload
+            metadata = {}
+            if display_name:
+                metadata = {"file": {"display_name": display_name}}
+            # Step 1: Initial resumable request to get upload URL
+            headers = {
+                "X-Goog-Upload-Protocol": "resumable",
+                "X-Goog-Upload-Command": "start",
+                "X-Goog-Upload-Header-Content-Length": str(len(file_data)),
+                "X-Goog-Upload-Header-Content-Type": mime_type,
+                "Content-Type": "application/json",
+            }
+            logger.debug(f"Step 1: Getting upload URL from {self.upload_base_url}")
+            async with session.post(
+                f"{self.upload_base_url}?key={self._api_key}", headers=headers, json=metadata
+            ) as response:
+                if response.status != 200:
+                    error_text = await response.text()
+                    logger.error(f"Error initiating file upload: {error_text}")
+                    raise Exception(f"Failed to initiate upload: {response.status} - {error_text}")
+                # Get the upload URL from the response header
+                upload_url = response.headers.get("X-Goog-Upload-URL")
+                if not upload_url:
+                    logger.error(f"Response headers: {dict(response.headers)}")
+                    raise Exception("No upload URL in response headers")
+                logger.debug(f"Got upload URL: {upload_url}")
+            # Step 2: Upload the actual file data
+            upload_headers = {
+                "Content-Length": str(len(file_data)),
+                "X-Goog-Upload-Offset": "0",
+                "X-Goog-Upload-Command": "upload, finalize",
+            }
+            logger.debug(f"Step 2: Uploading file data to {upload_url}")
+            async with session.post(upload_url, headers=upload_headers, data=file_data) as response:
+                if response.status != 200:
+                    error_text = await response.text()
+                    logger.error(f"Error uploading file data: {error_text}")
+                    raise Exception(f"Failed to upload file: {response.status} - {error_text}")
+                file_info = await response.json()
+                logger.info(f"File uploaded successfully: {file_info.get('file', {}).get('name')}")
+                return file_info
+    async def get_file(self, name: str) -> Dict[str, Any]:
+        """Get metadata for a file.
+        Args:
+            name: File name (or full path)
+        Returns:
+            File metadata
+        """
+        # Extract just the name part if a full path is provided
+        if "/" in name:
+            name = name.split("/")[-1]
+        async with aiohttp.ClientSession() as session:
+            async with session.get(f"{self._base_url}/{name}?key={self._api_key}") as response:
+                if response.status != 200:
+                    error_text = await response.text()
+                    logger.error(f"Error getting file metadata: {error_text}")
+                    raise Exception(f"Failed to get file metadata: {response.status}")
+                file_info = await response.json()
+                return file_info
+    async def list_files(
+        self, page_size: int = 10, page_token: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """List uploaded files.
+        Args:
+            page_size: Number of files to return per page
+            page_token: Token for pagination
+        Returns:
+            List of files and next page token if available
+        """
+        params = {"key": self._api_key, "pageSize": page_size}
+        if page_token:
+            params["pageToken"] = page_token
+        async with aiohttp.ClientSession() as session:
+            async with session.get(self._base_url, params=params) as response:
+                if response.status != 200:
+                    error_text = await response.text()
+                    logger.error(f"Error listing files: {error_text}")
+                    raise Exception(f"Failed to list files: {response.status}")
+                result = await response.json()
+                return result
+    async def delete_file(self, name: str) -> bool:
+        """Delete a file.
+        Args:
+            name: File name (or full path)
+        Returns:
+            True if deleted successfully
+        """
+        # Extract just the name part if a full path is provided
+        if "/" in name:
+            name = name.split("/")[-1]
+        async with aiohttp.ClientSession() as session:
+            async with session.delete(f"{self._base_url}/{name}?key={self._api_key}") as response:
+                if response.status != 200:
+                    error_text = await response.text()
+                    logger.error(f"Error deleting file: {error_text}")
+                    raise Exception(f"Failed to delete file: {response.status}")
+                return True

dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.82.dev857py3-none-any.whl → 0.0.85.dev837py3-none-any.whl