dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
- pipecat/adapters/base_llm_adapter.py +38 -1
- pipecat/adapters/services/anthropic_adapter.py +9 -14
- pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
- pipecat/adapters/services/bedrock_adapter.py +236 -13
- pipecat/adapters/services/gemini_adapter.py +12 -8
- pipecat/adapters/services/open_ai_adapter.py +19 -7
- pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
- pipecat/audio/dtmf/dtmf-0.wav +0 -0
- pipecat/audio/dtmf/dtmf-1.wav +0 -0
- pipecat/audio/dtmf/dtmf-2.wav +0 -0
- pipecat/audio/dtmf/dtmf-3.wav +0 -0
- pipecat/audio/dtmf/dtmf-4.wav +0 -0
- pipecat/audio/dtmf/dtmf-5.wav +0 -0
- pipecat/audio/dtmf/dtmf-6.wav +0 -0
- pipecat/audio/dtmf/dtmf-7.wav +0 -0
- pipecat/audio/dtmf/dtmf-8.wav +0 -0
- pipecat/audio/dtmf/dtmf-9.wav +0 -0
- pipecat/audio/dtmf/dtmf-pound.wav +0 -0
- pipecat/audio/dtmf/dtmf-star.wav +0 -0
- pipecat/audio/filters/krisp_viva_filter.py +193 -0
- pipecat/audio/filters/noisereduce_filter.py +15 -0
- pipecat/audio/turn/base_turn_analyzer.py +9 -1
- pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
- pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
- pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
- pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
- pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
- pipecat/audio/vad/data/README.md +10 -0
- pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
- pipecat/audio/vad/silero.py +9 -3
- pipecat/audio/vad/vad_analyzer.py +13 -1
- pipecat/extensions/voicemail/voicemail_detector.py +5 -5
- pipecat/frames/frames.py +277 -86
- pipecat/observers/loggers/debug_log_observer.py +3 -3
- pipecat/observers/loggers/llm_log_observer.py +7 -3
- pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
- pipecat/pipeline/runner.py +18 -6
- pipecat/pipeline/service_switcher.py +64 -36
- pipecat/pipeline/task.py +125 -79
- pipecat/pipeline/tts_switcher.py +30 -0
- pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
- pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
- pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
- pipecat/processors/aggregators/llm_context.py +40 -2
- pipecat/processors/aggregators/llm_response.py +32 -15
- pipecat/processors/aggregators/llm_response_universal.py +19 -15
- pipecat/processors/aggregators/user_response.py +6 -6
- pipecat/processors/aggregators/vision_image_frame.py +24 -2
- pipecat/processors/audio/audio_buffer_processor.py +43 -8
- pipecat/processors/dtmf_aggregator.py +174 -77
- pipecat/processors/filters/stt_mute_filter.py +17 -0
- pipecat/processors/frame_processor.py +110 -24
- pipecat/processors/frameworks/langchain.py +8 -2
- pipecat/processors/frameworks/rtvi.py +210 -68
- pipecat/processors/frameworks/strands_agents.py +170 -0
- pipecat/processors/logger.py +2 -2
- pipecat/processors/transcript_processor.py +26 -5
- pipecat/processors/user_idle_processor.py +35 -11
- pipecat/runner/daily.py +59 -20
- pipecat/runner/run.py +395 -93
- pipecat/runner/types.py +6 -4
- pipecat/runner/utils.py +51 -10
- pipecat/serializers/__init__.py +5 -1
- pipecat/serializers/asterisk.py +16 -2
- pipecat/serializers/convox.py +41 -4
- pipecat/serializers/custom.py +257 -0
- pipecat/serializers/exotel.py +5 -5
- pipecat/serializers/livekit.py +20 -0
- pipecat/serializers/plivo.py +5 -5
- pipecat/serializers/protobuf.py +6 -5
- pipecat/serializers/telnyx.py +2 -2
- pipecat/serializers/twilio.py +43 -23
- pipecat/serializers/vi.py +324 -0
- pipecat/services/ai_service.py +2 -6
- pipecat/services/anthropic/llm.py +2 -25
- pipecat/services/assemblyai/models.py +6 -0
- pipecat/services/assemblyai/stt.py +13 -5
- pipecat/services/asyncai/tts.py +5 -3
- pipecat/services/aws/__init__.py +1 -0
- pipecat/services/aws/llm.py +147 -105
- pipecat/services/aws/nova_sonic/__init__.py +0 -0
- pipecat/services/aws/nova_sonic/context.py +436 -0
- pipecat/services/aws/nova_sonic/frames.py +25 -0
- pipecat/services/aws/nova_sonic/llm.py +1265 -0
- pipecat/services/aws/stt.py +3 -3
- pipecat/services/aws_nova_sonic/__init__.py +19 -1
- pipecat/services/aws_nova_sonic/aws.py +11 -1151
- pipecat/services/aws_nova_sonic/context.py +8 -354
- pipecat/services/aws_nova_sonic/frames.py +13 -17
- pipecat/services/azure/llm.py +51 -1
- pipecat/services/azure/realtime/__init__.py +0 -0
- pipecat/services/azure/realtime/llm.py +65 -0
- pipecat/services/azure/stt.py +15 -0
- pipecat/services/cartesia/stt.py +77 -70
- pipecat/services/cartesia/tts.py +80 -13
- pipecat/services/deepgram/__init__.py +1 -0
- pipecat/services/deepgram/flux/__init__.py +0 -0
- pipecat/services/deepgram/flux/stt.py +640 -0
- pipecat/services/elevenlabs/__init__.py +4 -1
- pipecat/services/elevenlabs/stt.py +339 -0
- pipecat/services/elevenlabs/tts.py +87 -46
- pipecat/services/fish/tts.py +5 -2
- pipecat/services/gemini_multimodal_live/events.py +38 -524
- pipecat/services/gemini_multimodal_live/file_api.py +23 -173
- pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
- pipecat/services/gladia/stt.py +56 -72
- pipecat/services/google/__init__.py +1 -0
- pipecat/services/google/gemini_live/__init__.py +3 -0
- pipecat/services/google/gemini_live/file_api.py +189 -0
- pipecat/services/google/gemini_live/llm.py +1582 -0
- pipecat/services/google/gemini_live/llm_vertex.py +184 -0
- pipecat/services/google/llm.py +15 -11
- pipecat/services/google/llm_openai.py +3 -3
- pipecat/services/google/llm_vertex.py +86 -16
- pipecat/services/google/stt.py +4 -0
- pipecat/services/google/tts.py +7 -3
- pipecat/services/heygen/api.py +2 -0
- pipecat/services/heygen/client.py +8 -4
- pipecat/services/heygen/video.py +2 -0
- pipecat/services/hume/__init__.py +5 -0
- pipecat/services/hume/tts.py +220 -0
- pipecat/services/inworld/tts.py +6 -6
- pipecat/services/llm_service.py +15 -5
- pipecat/services/lmnt/tts.py +4 -2
- pipecat/services/mcp_service.py +4 -2
- pipecat/services/mem0/memory.py +6 -5
- pipecat/services/mistral/llm.py +29 -8
- pipecat/services/moondream/vision.py +42 -16
- pipecat/services/neuphonic/tts.py +5 -2
- pipecat/services/openai/__init__.py +1 -0
- pipecat/services/openai/base_llm.py +27 -20
- pipecat/services/openai/realtime/__init__.py +0 -0
- pipecat/services/openai/realtime/context.py +272 -0
- pipecat/services/openai/realtime/events.py +1106 -0
- pipecat/services/openai/realtime/frames.py +37 -0
- pipecat/services/openai/realtime/llm.py +829 -0
- pipecat/services/openai/tts.py +49 -10
- pipecat/services/openai_realtime/__init__.py +27 -0
- pipecat/services/openai_realtime/azure.py +21 -0
- pipecat/services/openai_realtime/context.py +21 -0
- pipecat/services/openai_realtime/events.py +21 -0
- pipecat/services/openai_realtime/frames.py +21 -0
- pipecat/services/openai_realtime_beta/azure.py +16 -0
- pipecat/services/openai_realtime_beta/openai.py +17 -5
- pipecat/services/piper/tts.py +7 -9
- pipecat/services/playht/tts.py +34 -4
- pipecat/services/rime/tts.py +12 -12
- pipecat/services/riva/stt.py +3 -1
- pipecat/services/salesforce/__init__.py +9 -0
- pipecat/services/salesforce/llm.py +700 -0
- pipecat/services/sarvam/__init__.py +7 -0
- pipecat/services/sarvam/stt.py +540 -0
- pipecat/services/sarvam/tts.py +97 -13
- pipecat/services/simli/video.py +2 -2
- pipecat/services/speechmatics/stt.py +22 -10
- pipecat/services/stt_service.py +47 -0
- pipecat/services/tavus/video.py +2 -2
- pipecat/services/tts_service.py +75 -22
- pipecat/services/vision_service.py +7 -6
- pipecat/services/vistaar/llm.py +51 -9
- pipecat/tests/utils.py +4 -4
- pipecat/transcriptions/language.py +41 -1
- pipecat/transports/base_input.py +13 -34
- pipecat/transports/base_output.py +140 -104
- pipecat/transports/daily/transport.py +199 -26
- pipecat/transports/heygen/__init__.py +0 -0
- pipecat/transports/heygen/transport.py +381 -0
- pipecat/transports/livekit/transport.py +228 -63
- pipecat/transports/local/audio.py +6 -1
- pipecat/transports/local/tk.py +11 -2
- pipecat/transports/network/fastapi_websocket.py +1 -1
- pipecat/transports/smallwebrtc/connection.py +103 -19
- pipecat/transports/smallwebrtc/request_handler.py +246 -0
- pipecat/transports/smallwebrtc/transport.py +65 -23
- pipecat/transports/tavus/transport.py +23 -12
- pipecat/transports/websocket/client.py +41 -5
- pipecat/transports/websocket/fastapi.py +21 -11
- pipecat/transports/websocket/server.py +14 -7
- pipecat/transports/whatsapp/api.py +8 -0
- pipecat/transports/whatsapp/client.py +47 -0
- pipecat/utils/base_object.py +54 -22
- pipecat/utils/redis.py +58 -0
- pipecat/utils/string.py +13 -1
- pipecat/utils/tracing/service_decorators.py +21 -21
- pipecat/serializers/genesys.py +0 -95
- pipecat/services/google/test-google-chirp.py +0 -45
- pipecat/services/openai.py +0 -698
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
- /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
pipecat/services/gladia/stt.py
CHANGED
|
@@ -14,7 +14,7 @@ import asyncio
|
|
|
14
14
|
import base64
|
|
15
15
|
import json
|
|
16
16
|
import warnings
|
|
17
|
-
from typing import Any, AsyncGenerator, Dict,
|
|
17
|
+
from typing import Any, AsyncGenerator, Dict, Literal, Optional
|
|
18
18
|
|
|
19
19
|
import aiohttp
|
|
20
20
|
from loguru import logger
|
|
@@ -29,13 +29,7 @@ from pipecat.frames.frames import (
|
|
|
29
29
|
TranscriptionFrame,
|
|
30
30
|
TranslationFrame,
|
|
31
31
|
)
|
|
32
|
-
|
|
33
|
-
# Import nested config models
|
|
34
|
-
from pipecat.services.gladia.config import (
|
|
35
|
-
CustomVocabularyConfig,
|
|
36
|
-
GladiaInputParams,
|
|
37
|
-
RealtimeProcessingConfig,
|
|
38
|
-
)
|
|
32
|
+
from pipecat.services.gladia.config import GladiaInputParams
|
|
39
33
|
from pipecat.services.stt_service import STTService
|
|
40
34
|
from pipecat.transcriptions.language import Language
|
|
41
35
|
from pipecat.utils.time import time_now_iso8601
|
|
@@ -180,8 +174,6 @@ class _InputParamsDescriptor:
|
|
|
180
174
|
"""Descriptor for backward compatibility with deprecation warning."""
|
|
181
175
|
|
|
182
176
|
def __get__(self, obj, objtype=None):
|
|
183
|
-
import warnings
|
|
184
|
-
|
|
185
177
|
with warnings.catch_warnings():
|
|
186
178
|
warnings.simplefilter("always")
|
|
187
179
|
warnings.warn(
|
|
@@ -215,7 +207,7 @@ class GladiaSTTService(STTService):
|
|
|
215
207
|
api_key: str,
|
|
216
208
|
region: Literal["us-west", "eu-west"] | None = None,
|
|
217
209
|
url: str = "https://api.gladia.io/v2/live",
|
|
218
|
-
confidence: float =
|
|
210
|
+
confidence: Optional[float] = None,
|
|
219
211
|
sample_rate: Optional[int] = None,
|
|
220
212
|
model: str = "solaria-1",
|
|
221
213
|
params: Optional[GladiaInputParams] = None,
|
|
@@ -231,6 +223,11 @@ class GladiaSTTService(STTService):
|
|
|
231
223
|
region: Region used to process audio. eu-west or us-west. Defaults to eu-west.
|
|
232
224
|
url: Gladia API URL. Defaults to "https://api.gladia.io/v2/live".
|
|
233
225
|
confidence: Minimum confidence threshold for transcriptions (0.0-1.0).
|
|
226
|
+
|
|
227
|
+
.. deprecated:: 0.0.86
|
|
228
|
+
The 'confidence' parameter is deprecated and will be removed in a future version.
|
|
229
|
+
No confidence threshold is applied.
|
|
230
|
+
|
|
234
231
|
sample_rate: Audio sample rate in Hz. If None, uses service default.
|
|
235
232
|
model: Model to use for transcription. Defaults to "solaria-1".
|
|
236
233
|
params: Additional configuration parameters for Gladia service.
|
|
@@ -240,11 +237,9 @@ class GladiaSTTService(STTService):
|
|
|
240
237
|
**kwargs: Additional arguments passed to the STTService parent class.
|
|
241
238
|
"""
|
|
242
239
|
super().__init__(sample_rate=sample_rate, **kwargs)
|
|
243
|
-
vocab: Optional[List[str]] = kwargs.pop("vocab", None) # Get vocab from kwargs
|
|
244
240
|
|
|
245
241
|
params = params or GladiaInputParams()
|
|
246
242
|
|
|
247
|
-
# Warn about deprecated language parameter if it's used
|
|
248
243
|
if params.language is not None:
|
|
249
244
|
with warnings.catch_warnings():
|
|
250
245
|
warnings.simplefilter("always")
|
|
@@ -255,29 +250,21 @@ class GladiaSTTService(STTService):
|
|
|
255
250
|
stacklevel=2,
|
|
256
251
|
)
|
|
257
252
|
|
|
253
|
+
if confidence:
|
|
254
|
+
with warnings.catch_warnings():
|
|
255
|
+
warnings.simplefilter("always")
|
|
256
|
+
warnings.warn(
|
|
257
|
+
"The 'confidence' parameter is deprecated and will be removed in a future version. "
|
|
258
|
+
"No confidence threshold is applied.",
|
|
259
|
+
DeprecationWarning,
|
|
260
|
+
stacklevel=2,
|
|
261
|
+
)
|
|
262
|
+
|
|
258
263
|
self._api_key = api_key
|
|
259
264
|
self._region = region
|
|
260
265
|
self._url = url
|
|
261
266
|
self.set_model_name(model)
|
|
262
|
-
self.
|
|
263
|
-
self._params = params # This is GladiaInputParams instance
|
|
264
|
-
|
|
265
|
-
# TODO: To be tested.
|
|
266
|
-
if vocab:
|
|
267
|
-
# Filter out any non-string or empty items
|
|
268
|
-
valid_vocab = [item for item in vocab if isinstance(item, str) and item.strip()]
|
|
269
|
-
if valid_vocab:
|
|
270
|
-
if self._params.realtime_processing is None:
|
|
271
|
-
self._params.realtime_processing = RealtimeProcessingConfig()
|
|
272
|
-
if self._params.realtime_processing.custom_vocabulary_config is None:
|
|
273
|
-
self._params.realtime_processing.custom_vocabulary_config = (
|
|
274
|
-
CustomVocabularyConfig()
|
|
275
|
-
)
|
|
276
|
-
|
|
277
|
-
self._params.realtime_processing.custom_vocabulary_config.vocabulary = valid_vocab
|
|
278
|
-
self._params.realtime_processing.custom_vocabulary = True # Explicitly enable
|
|
279
|
-
self.logger.info(f"Set Gladia custom vocabulary: {valid_vocab}")
|
|
280
|
-
|
|
267
|
+
self._params = params
|
|
281
268
|
self._websocket = None
|
|
282
269
|
self._receive_task = None
|
|
283
270
|
self._keepalive_task = None
|
|
@@ -434,14 +421,14 @@ class GladiaSTTService(STTService):
|
|
|
434
421
|
trim_size = len(self._audio_buffer) - self._max_buffer_size
|
|
435
422
|
self._audio_buffer = self._audio_buffer[trim_size:]
|
|
436
423
|
self._bytes_sent = max(0, self._bytes_sent - trim_size)
|
|
437
|
-
logger.warning(f"Audio buffer exceeded max size, trimmed {trim_size} bytes")
|
|
424
|
+
self.logger.warning(f"Audio buffer exceeded max size, trimmed {trim_size} bytes")
|
|
438
425
|
|
|
439
426
|
# Send audio if connected
|
|
440
427
|
if self._connection_active and self._websocket and self._websocket.state is State.OPEN:
|
|
441
428
|
try:
|
|
442
429
|
await self._send_audio(audio)
|
|
443
430
|
except websockets.exceptions.ConnectionClosed as e:
|
|
444
|
-
logger.warning(f"Websocket closed while sending audio chunk: {e}")
|
|
431
|
+
self.logger.warning(f"Websocket closed while sending audio chunk: {e}")
|
|
445
432
|
self._connection_active = False
|
|
446
433
|
|
|
447
434
|
yield None
|
|
@@ -456,14 +443,14 @@ class GladiaSTTService(STTService):
|
|
|
456
443
|
response = await self._setup_gladia(settings)
|
|
457
444
|
self._session_url = response["url"]
|
|
458
445
|
self._reconnection_attempts = 0
|
|
459
|
-
logger.info(f"Session URL : {self._session_url}")
|
|
446
|
+
self.logger.info(f"Session URL : {self._session_url}")
|
|
460
447
|
|
|
461
448
|
# Connect with automatic reconnection
|
|
462
449
|
async with websocket_connect(self._session_url) as websocket:
|
|
463
450
|
try:
|
|
464
451
|
self._websocket = websocket
|
|
465
452
|
self._connection_active = True
|
|
466
|
-
logger.debug(f"{self} Connected to Gladia WebSocket")
|
|
453
|
+
self.logger.debug(f"{self} Connected to Gladia WebSocket")
|
|
467
454
|
|
|
468
455
|
# Send buffered audio if any
|
|
469
456
|
await self._send_buffered_audio()
|
|
@@ -476,7 +463,7 @@ class GladiaSTTService(STTService):
|
|
|
476
463
|
await asyncio.gather(self._receive_task, self._keepalive_task)
|
|
477
464
|
|
|
478
465
|
except websockets.exceptions.ConnectionClosed as e:
|
|
479
|
-
logger.warning(f"WebSocket connection closed: {e}")
|
|
466
|
+
self.logger.warning(f"WebSocket connection closed: {e}")
|
|
480
467
|
self._connection_active = False
|
|
481
468
|
|
|
482
469
|
# Clean up tasks
|
|
@@ -490,7 +477,7 @@ class GladiaSTTService(STTService):
|
|
|
490
477
|
break
|
|
491
478
|
|
|
492
479
|
except Exception as e:
|
|
493
|
-
logger.error(f"Error in connection handler: {e}")
|
|
480
|
+
self.logger.error(f"Error in connection handler: {e}")
|
|
494
481
|
self._connection_active = False
|
|
495
482
|
|
|
496
483
|
if not self._should_reconnect:
|
|
@@ -556,7 +543,7 @@ class GladiaSTTService(STTService):
|
|
|
556
543
|
"""Send any buffered audio after reconnection."""
|
|
557
544
|
async with self._buffer_lock:
|
|
558
545
|
if self._audio_buffer:
|
|
559
|
-
logger.debug(f"{self} Sending {len(self._audio_buffer)} bytes of buffered audio")
|
|
546
|
+
self.logger.debug(f"{self} Sending {len(self._audio_buffer)} bytes of buffered audio")
|
|
560
547
|
await self._send_audio(bytes(self._audio_buffer))
|
|
561
548
|
|
|
562
549
|
async def _send_stop_recording(self):
|
|
@@ -575,12 +562,12 @@ class GladiaSTTService(STTService):
|
|
|
575
562
|
empty_audio = b""
|
|
576
563
|
await self._send_audio(empty_audio)
|
|
577
564
|
else:
|
|
578
|
-
logger.debug("Websocket closed, stopping keepalive")
|
|
565
|
+
self.logger.debug("Websocket closed, stopping keepalive")
|
|
579
566
|
break
|
|
580
567
|
except websockets.exceptions.ConnectionClosed:
|
|
581
|
-
logger.debug("Connection closed during keepalive")
|
|
568
|
+
self.logger.debug("Connection closed during keepalive")
|
|
582
569
|
except Exception as e:
|
|
583
|
-
logger.error(f"Error in Gladia keepalive task: {e}")
|
|
570
|
+
self.logger.error(f"Error in Gladia keepalive task: {e}")
|
|
584
571
|
|
|
585
572
|
async def _receive_task_handler(self):
|
|
586
573
|
try:
|
|
@@ -600,43 +587,40 @@ class GladiaSTTService(STTService):
|
|
|
600
587
|
|
|
601
588
|
elif content["type"] == "transcript":
|
|
602
589
|
utterance = content["data"]["utterance"]
|
|
603
|
-
confidence = utterance.get("confidence", 0)
|
|
604
590
|
language = utterance["language"]
|
|
605
591
|
transcript = utterance["text"]
|
|
606
592
|
is_final = content["data"]["is_final"]
|
|
607
|
-
if
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
result=content,
|
|
616
|
-
)
|
|
617
|
-
)
|
|
618
|
-
await self._handle_transcription(
|
|
619
|
-
transcript=transcript,
|
|
620
|
-
is_final=is_final,
|
|
621
|
-
language=language,
|
|
593
|
+
if is_final:
|
|
594
|
+
await self.push_frame(
|
|
595
|
+
TranscriptionFrame(
|
|
596
|
+
transcript,
|
|
597
|
+
self._user_id,
|
|
598
|
+
time_now_iso8601(),
|
|
599
|
+
language,
|
|
600
|
+
result=content,
|
|
622
601
|
)
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
602
|
+
)
|
|
603
|
+
await self._handle_transcription(
|
|
604
|
+
transcript=transcript,
|
|
605
|
+
is_final=is_final,
|
|
606
|
+
language=language,
|
|
607
|
+
)
|
|
608
|
+
else:
|
|
609
|
+
await self.push_frame(
|
|
610
|
+
InterimTranscriptionFrame(
|
|
611
|
+
transcript,
|
|
612
|
+
self._user_id,
|
|
613
|
+
time_now_iso8601(),
|
|
614
|
+
language,
|
|
615
|
+
result=content,
|
|
632
616
|
)
|
|
617
|
+
)
|
|
633
618
|
elif content["type"] == "translation":
|
|
634
619
|
translated_utterance = content["data"]["translated_utterance"]
|
|
635
620
|
original_language = content["data"]["original_language"]
|
|
636
621
|
translated_language = translated_utterance["language"]
|
|
637
|
-
confidence = translated_utterance.get("confidence", 0)
|
|
638
622
|
translation = translated_utterance["text"]
|
|
639
|
-
if translated_language != original_language
|
|
623
|
+
if translated_language != original_language:
|
|
640
624
|
await self.push_frame(
|
|
641
625
|
TranslationFrame(
|
|
642
626
|
translation, "", time_now_iso8601(), translated_language
|
|
@@ -646,7 +630,7 @@ class GladiaSTTService(STTService):
|
|
|
646
630
|
# Expected when closing the connection
|
|
647
631
|
pass
|
|
648
632
|
except Exception as e:
|
|
649
|
-
logger.error(f"Error in Gladia WebSocket handler: {e}")
|
|
633
|
+
self.logger.error(f"Error in Gladia WebSocket handler: {e}")
|
|
650
634
|
|
|
651
635
|
async def _maybe_reconnect(self) -> bool:
|
|
652
636
|
"""Handle exponential backoff reconnection logic."""
|
|
@@ -654,11 +638,11 @@ class GladiaSTTService(STTService):
|
|
|
654
638
|
return False
|
|
655
639
|
self._reconnection_attempts += 1
|
|
656
640
|
if self._reconnection_attempts > self._max_reconnection_attempts:
|
|
657
|
-
logger.error(f"Max reconnection attempts ({self._max_reconnection_attempts}) reached")
|
|
641
|
+
self.logger.error(f"Max reconnection attempts ({self._max_reconnection_attempts}) reached")
|
|
658
642
|
self._should_reconnect = False
|
|
659
643
|
return False
|
|
660
644
|
delay = self._reconnection_delay * (2 ** (self._reconnection_attempts - 1))
|
|
661
|
-
logger.debug(
|
|
645
|
+
self.logger.debug(
|
|
662
646
|
f"{self} Reconnecting in {delay} seconds (attempt {self._reconnection_attempts}/{self._max_reconnection_attempts})"
|
|
663
647
|
)
|
|
664
648
|
await asyncio.sleep(delay)
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2024–2025, Daily
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""Gemini File API client for uploading and managing files.
|
|
8
|
+
|
|
9
|
+
This module provides a client for Google's Gemini File API, enabling file
|
|
10
|
+
uploads, metadata retrieval, listing, and deletion. Files uploaded through
|
|
11
|
+
this API can be referenced in Gemini generative model calls.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import mimetypes
|
|
15
|
+
from typing import Any, Dict, Optional
|
|
16
|
+
|
|
17
|
+
import aiohttp
|
|
18
|
+
from loguru import logger
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class GeminiFileAPI:
|
|
22
|
+
"""Client for the Gemini File API.
|
|
23
|
+
|
|
24
|
+
This class provides methods for uploading, fetching, listing, and deleting files
|
|
25
|
+
through Google's Gemini File API.
|
|
26
|
+
|
|
27
|
+
Files uploaded through this API remain available for 48 hours and can be referenced
|
|
28
|
+
in calls to the Gemini generative models. Maximum file size is 2GB, with total
|
|
29
|
+
project storage limited to 20GB.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self, api_key: str, base_url: str = "https://generativelanguage.googleapis.com/v1beta/files"
|
|
34
|
+
):
|
|
35
|
+
"""Initialize the Gemini File API client.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
api_key: Google AI API key
|
|
39
|
+
base_url: Base URL for the Gemini File API (default is the v1beta endpoint)
|
|
40
|
+
"""
|
|
41
|
+
self._api_key = api_key
|
|
42
|
+
self._base_url = base_url
|
|
43
|
+
# Upload URL uses the /upload/ path
|
|
44
|
+
self.upload_base_url = "https://generativelanguage.googleapis.com/upload/v1beta/files"
|
|
45
|
+
|
|
46
|
+
async def upload_file(
|
|
47
|
+
self, file_path: str, display_name: Optional[str] = None
|
|
48
|
+
) -> Dict[str, Any]:
|
|
49
|
+
"""Upload a file to the Gemini File API using the correct resumable upload protocol.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
file_path: Path to the file to upload
|
|
53
|
+
display_name: Optional display name for the file
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
File metadata including uri, name, and display_name
|
|
57
|
+
"""
|
|
58
|
+
logger.info(f"Uploading file: {file_path}")
|
|
59
|
+
|
|
60
|
+
async with aiohttp.ClientSession() as session:
|
|
61
|
+
# Determine the file's MIME type
|
|
62
|
+
mime_type, _ = mimetypes.guess_type(file_path)
|
|
63
|
+
if not mime_type:
|
|
64
|
+
mime_type = "application/octet-stream"
|
|
65
|
+
|
|
66
|
+
# Read the file
|
|
67
|
+
with open(file_path, "rb") as f:
|
|
68
|
+
file_data = f.read()
|
|
69
|
+
|
|
70
|
+
# Create the metadata payload
|
|
71
|
+
metadata = {}
|
|
72
|
+
if display_name:
|
|
73
|
+
metadata = {"file": {"display_name": display_name}}
|
|
74
|
+
|
|
75
|
+
# Step 1: Initial resumable request to get upload URL
|
|
76
|
+
headers = {
|
|
77
|
+
"X-Goog-Upload-Protocol": "resumable",
|
|
78
|
+
"X-Goog-Upload-Command": "start",
|
|
79
|
+
"X-Goog-Upload-Header-Content-Length": str(len(file_data)),
|
|
80
|
+
"X-Goog-Upload-Header-Content-Type": mime_type,
|
|
81
|
+
"Content-Type": "application/json",
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
logger.debug(f"Step 1: Getting upload URL from {self.upload_base_url}")
|
|
85
|
+
async with session.post(
|
|
86
|
+
f"{self.upload_base_url}?key={self._api_key}", headers=headers, json=metadata
|
|
87
|
+
) as response:
|
|
88
|
+
if response.status != 200:
|
|
89
|
+
error_text = await response.text()
|
|
90
|
+
logger.error(f"Error initiating file upload: {error_text}")
|
|
91
|
+
raise Exception(f"Failed to initiate upload: {response.status} - {error_text}")
|
|
92
|
+
|
|
93
|
+
# Get the upload URL from the response header
|
|
94
|
+
upload_url = response.headers.get("X-Goog-Upload-URL")
|
|
95
|
+
if not upload_url:
|
|
96
|
+
logger.error(f"Response headers: {dict(response.headers)}")
|
|
97
|
+
raise Exception("No upload URL in response headers")
|
|
98
|
+
|
|
99
|
+
logger.debug(f"Got upload URL: {upload_url}")
|
|
100
|
+
|
|
101
|
+
# Step 2: Upload the actual file data
|
|
102
|
+
upload_headers = {
|
|
103
|
+
"Content-Length": str(len(file_data)),
|
|
104
|
+
"X-Goog-Upload-Offset": "0",
|
|
105
|
+
"X-Goog-Upload-Command": "upload, finalize",
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
logger.debug(f"Step 2: Uploading file data to {upload_url}")
|
|
109
|
+
async with session.post(upload_url, headers=upload_headers, data=file_data) as response:
|
|
110
|
+
if response.status != 200:
|
|
111
|
+
error_text = await response.text()
|
|
112
|
+
logger.error(f"Error uploading file data: {error_text}")
|
|
113
|
+
raise Exception(f"Failed to upload file: {response.status} - {error_text}")
|
|
114
|
+
|
|
115
|
+
file_info = await response.json()
|
|
116
|
+
logger.info(f"File uploaded successfully: {file_info.get('file', {}).get('name')}")
|
|
117
|
+
return file_info
|
|
118
|
+
|
|
119
|
+
async def get_file(self, name: str) -> Dict[str, Any]:
|
|
120
|
+
"""Get metadata for a file.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
name: File name (or full path)
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
File metadata
|
|
127
|
+
"""
|
|
128
|
+
# Extract just the name part if a full path is provided
|
|
129
|
+
if "/" in name:
|
|
130
|
+
name = name.split("/")[-1]
|
|
131
|
+
|
|
132
|
+
async with aiohttp.ClientSession() as session:
|
|
133
|
+
async with session.get(f"{self._base_url}/{name}?key={self._api_key}") as response:
|
|
134
|
+
if response.status != 200:
|
|
135
|
+
error_text = await response.text()
|
|
136
|
+
logger.error(f"Error getting file metadata: {error_text}")
|
|
137
|
+
raise Exception(f"Failed to get file metadata: {response.status}")
|
|
138
|
+
|
|
139
|
+
file_info = await response.json()
|
|
140
|
+
return file_info
|
|
141
|
+
|
|
142
|
+
async def list_files(
|
|
143
|
+
self, page_size: int = 10, page_token: Optional[str] = None
|
|
144
|
+
) -> Dict[str, Any]:
|
|
145
|
+
"""List uploaded files.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
page_size: Number of files to return per page
|
|
149
|
+
page_token: Token for pagination
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
List of files and next page token if available
|
|
153
|
+
"""
|
|
154
|
+
params = {"key": self._api_key, "pageSize": page_size}
|
|
155
|
+
|
|
156
|
+
if page_token:
|
|
157
|
+
params["pageToken"] = page_token
|
|
158
|
+
|
|
159
|
+
async with aiohttp.ClientSession() as session:
|
|
160
|
+
async with session.get(self._base_url, params=params) as response:
|
|
161
|
+
if response.status != 200:
|
|
162
|
+
error_text = await response.text()
|
|
163
|
+
logger.error(f"Error listing files: {error_text}")
|
|
164
|
+
raise Exception(f"Failed to list files: {response.status}")
|
|
165
|
+
|
|
166
|
+
result = await response.json()
|
|
167
|
+
return result
|
|
168
|
+
|
|
169
|
+
async def delete_file(self, name: str) -> bool:
|
|
170
|
+
"""Delete a file.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
name: File name (or full path)
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
True if deleted successfully
|
|
177
|
+
"""
|
|
178
|
+
# Extract just the name part if a full path is provided
|
|
179
|
+
if "/" in name:
|
|
180
|
+
name = name.split("/")[-1]
|
|
181
|
+
|
|
182
|
+
async with aiohttp.ClientSession() as session:
|
|
183
|
+
async with session.delete(f"{self._base_url}/{name}?key={self._api_key}") as response:
|
|
184
|
+
if response.status != 200:
|
|
185
|
+
error_text = await response.text()
|
|
186
|
+
logger.error(f"Error deleting file: {error_text}")
|
|
187
|
+
raise Exception(f"Failed to delete file: {response.status}")
|
|
188
|
+
|
|
189
|
+
return True
|