dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (195) hide show
  1. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
  2. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
  3. pipecat/adapters/base_llm_adapter.py +38 -1
  4. pipecat/adapters/services/anthropic_adapter.py +9 -14
  5. pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
  6. pipecat/adapters/services/bedrock_adapter.py +236 -13
  7. pipecat/adapters/services/gemini_adapter.py +12 -8
  8. pipecat/adapters/services/open_ai_adapter.py +19 -7
  9. pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
  10. pipecat/audio/dtmf/dtmf-0.wav +0 -0
  11. pipecat/audio/dtmf/dtmf-1.wav +0 -0
  12. pipecat/audio/dtmf/dtmf-2.wav +0 -0
  13. pipecat/audio/dtmf/dtmf-3.wav +0 -0
  14. pipecat/audio/dtmf/dtmf-4.wav +0 -0
  15. pipecat/audio/dtmf/dtmf-5.wav +0 -0
  16. pipecat/audio/dtmf/dtmf-6.wav +0 -0
  17. pipecat/audio/dtmf/dtmf-7.wav +0 -0
  18. pipecat/audio/dtmf/dtmf-8.wav +0 -0
  19. pipecat/audio/dtmf/dtmf-9.wav +0 -0
  20. pipecat/audio/dtmf/dtmf-pound.wav +0 -0
  21. pipecat/audio/dtmf/dtmf-star.wav +0 -0
  22. pipecat/audio/filters/krisp_viva_filter.py +193 -0
  23. pipecat/audio/filters/noisereduce_filter.py +15 -0
  24. pipecat/audio/turn/base_turn_analyzer.py +9 -1
  25. pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
  26. pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
  27. pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
  28. pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
  29. pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
  30. pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
  31. pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
  32. pipecat/audio/vad/data/README.md +10 -0
  33. pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
  34. pipecat/audio/vad/silero.py +9 -3
  35. pipecat/audio/vad/vad_analyzer.py +13 -1
  36. pipecat/extensions/voicemail/voicemail_detector.py +5 -5
  37. pipecat/frames/frames.py +277 -86
  38. pipecat/observers/loggers/debug_log_observer.py +3 -3
  39. pipecat/observers/loggers/llm_log_observer.py +7 -3
  40. pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
  41. pipecat/pipeline/runner.py +18 -6
  42. pipecat/pipeline/service_switcher.py +64 -36
  43. pipecat/pipeline/task.py +125 -79
  44. pipecat/pipeline/tts_switcher.py +30 -0
  45. pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
  46. pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
  47. pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
  48. pipecat/processors/aggregators/llm_context.py +40 -2
  49. pipecat/processors/aggregators/llm_response.py +32 -15
  50. pipecat/processors/aggregators/llm_response_universal.py +19 -15
  51. pipecat/processors/aggregators/user_response.py +6 -6
  52. pipecat/processors/aggregators/vision_image_frame.py +24 -2
  53. pipecat/processors/audio/audio_buffer_processor.py +43 -8
  54. pipecat/processors/dtmf_aggregator.py +174 -77
  55. pipecat/processors/filters/stt_mute_filter.py +17 -0
  56. pipecat/processors/frame_processor.py +110 -24
  57. pipecat/processors/frameworks/langchain.py +8 -2
  58. pipecat/processors/frameworks/rtvi.py +210 -68
  59. pipecat/processors/frameworks/strands_agents.py +170 -0
  60. pipecat/processors/logger.py +2 -2
  61. pipecat/processors/transcript_processor.py +26 -5
  62. pipecat/processors/user_idle_processor.py +35 -11
  63. pipecat/runner/daily.py +59 -20
  64. pipecat/runner/run.py +395 -93
  65. pipecat/runner/types.py +6 -4
  66. pipecat/runner/utils.py +51 -10
  67. pipecat/serializers/__init__.py +5 -1
  68. pipecat/serializers/asterisk.py +16 -2
  69. pipecat/serializers/convox.py +41 -4
  70. pipecat/serializers/custom.py +257 -0
  71. pipecat/serializers/exotel.py +5 -5
  72. pipecat/serializers/livekit.py +20 -0
  73. pipecat/serializers/plivo.py +5 -5
  74. pipecat/serializers/protobuf.py +6 -5
  75. pipecat/serializers/telnyx.py +2 -2
  76. pipecat/serializers/twilio.py +43 -23
  77. pipecat/serializers/vi.py +324 -0
  78. pipecat/services/ai_service.py +2 -6
  79. pipecat/services/anthropic/llm.py +2 -25
  80. pipecat/services/assemblyai/models.py +6 -0
  81. pipecat/services/assemblyai/stt.py +13 -5
  82. pipecat/services/asyncai/tts.py +5 -3
  83. pipecat/services/aws/__init__.py +1 -0
  84. pipecat/services/aws/llm.py +147 -105
  85. pipecat/services/aws/nova_sonic/__init__.py +0 -0
  86. pipecat/services/aws/nova_sonic/context.py +436 -0
  87. pipecat/services/aws/nova_sonic/frames.py +25 -0
  88. pipecat/services/aws/nova_sonic/llm.py +1265 -0
  89. pipecat/services/aws/stt.py +3 -3
  90. pipecat/services/aws_nova_sonic/__init__.py +19 -1
  91. pipecat/services/aws_nova_sonic/aws.py +11 -1151
  92. pipecat/services/aws_nova_sonic/context.py +8 -354
  93. pipecat/services/aws_nova_sonic/frames.py +13 -17
  94. pipecat/services/azure/llm.py +51 -1
  95. pipecat/services/azure/realtime/__init__.py +0 -0
  96. pipecat/services/azure/realtime/llm.py +65 -0
  97. pipecat/services/azure/stt.py +15 -0
  98. pipecat/services/cartesia/stt.py +77 -70
  99. pipecat/services/cartesia/tts.py +80 -13
  100. pipecat/services/deepgram/__init__.py +1 -0
  101. pipecat/services/deepgram/flux/__init__.py +0 -0
  102. pipecat/services/deepgram/flux/stt.py +640 -0
  103. pipecat/services/elevenlabs/__init__.py +4 -1
  104. pipecat/services/elevenlabs/stt.py +339 -0
  105. pipecat/services/elevenlabs/tts.py +87 -46
  106. pipecat/services/fish/tts.py +5 -2
  107. pipecat/services/gemini_multimodal_live/events.py +38 -524
  108. pipecat/services/gemini_multimodal_live/file_api.py +23 -173
  109. pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
  110. pipecat/services/gladia/stt.py +56 -72
  111. pipecat/services/google/__init__.py +1 -0
  112. pipecat/services/google/gemini_live/__init__.py +3 -0
  113. pipecat/services/google/gemini_live/file_api.py +189 -0
  114. pipecat/services/google/gemini_live/llm.py +1582 -0
  115. pipecat/services/google/gemini_live/llm_vertex.py +184 -0
  116. pipecat/services/google/llm.py +15 -11
  117. pipecat/services/google/llm_openai.py +3 -3
  118. pipecat/services/google/llm_vertex.py +86 -16
  119. pipecat/services/google/stt.py +4 -0
  120. pipecat/services/google/tts.py +7 -3
  121. pipecat/services/heygen/api.py +2 -0
  122. pipecat/services/heygen/client.py +8 -4
  123. pipecat/services/heygen/video.py +2 -0
  124. pipecat/services/hume/__init__.py +5 -0
  125. pipecat/services/hume/tts.py +220 -0
  126. pipecat/services/inworld/tts.py +6 -6
  127. pipecat/services/llm_service.py +15 -5
  128. pipecat/services/lmnt/tts.py +4 -2
  129. pipecat/services/mcp_service.py +4 -2
  130. pipecat/services/mem0/memory.py +6 -5
  131. pipecat/services/mistral/llm.py +29 -8
  132. pipecat/services/moondream/vision.py +42 -16
  133. pipecat/services/neuphonic/tts.py +5 -2
  134. pipecat/services/openai/__init__.py +1 -0
  135. pipecat/services/openai/base_llm.py +27 -20
  136. pipecat/services/openai/realtime/__init__.py +0 -0
  137. pipecat/services/openai/realtime/context.py +272 -0
  138. pipecat/services/openai/realtime/events.py +1106 -0
  139. pipecat/services/openai/realtime/frames.py +37 -0
  140. pipecat/services/openai/realtime/llm.py +829 -0
  141. pipecat/services/openai/tts.py +49 -10
  142. pipecat/services/openai_realtime/__init__.py +27 -0
  143. pipecat/services/openai_realtime/azure.py +21 -0
  144. pipecat/services/openai_realtime/context.py +21 -0
  145. pipecat/services/openai_realtime/events.py +21 -0
  146. pipecat/services/openai_realtime/frames.py +21 -0
  147. pipecat/services/openai_realtime_beta/azure.py +16 -0
  148. pipecat/services/openai_realtime_beta/openai.py +17 -5
  149. pipecat/services/piper/tts.py +7 -9
  150. pipecat/services/playht/tts.py +34 -4
  151. pipecat/services/rime/tts.py +12 -12
  152. pipecat/services/riva/stt.py +3 -1
  153. pipecat/services/salesforce/__init__.py +9 -0
  154. pipecat/services/salesforce/llm.py +700 -0
  155. pipecat/services/sarvam/__init__.py +7 -0
  156. pipecat/services/sarvam/stt.py +540 -0
  157. pipecat/services/sarvam/tts.py +97 -13
  158. pipecat/services/simli/video.py +2 -2
  159. pipecat/services/speechmatics/stt.py +22 -10
  160. pipecat/services/stt_service.py +47 -0
  161. pipecat/services/tavus/video.py +2 -2
  162. pipecat/services/tts_service.py +75 -22
  163. pipecat/services/vision_service.py +7 -6
  164. pipecat/services/vistaar/llm.py +51 -9
  165. pipecat/tests/utils.py +4 -4
  166. pipecat/transcriptions/language.py +41 -1
  167. pipecat/transports/base_input.py +13 -34
  168. pipecat/transports/base_output.py +140 -104
  169. pipecat/transports/daily/transport.py +199 -26
  170. pipecat/transports/heygen/__init__.py +0 -0
  171. pipecat/transports/heygen/transport.py +381 -0
  172. pipecat/transports/livekit/transport.py +228 -63
  173. pipecat/transports/local/audio.py +6 -1
  174. pipecat/transports/local/tk.py +11 -2
  175. pipecat/transports/network/fastapi_websocket.py +1 -1
  176. pipecat/transports/smallwebrtc/connection.py +103 -19
  177. pipecat/transports/smallwebrtc/request_handler.py +246 -0
  178. pipecat/transports/smallwebrtc/transport.py +65 -23
  179. pipecat/transports/tavus/transport.py +23 -12
  180. pipecat/transports/websocket/client.py +41 -5
  181. pipecat/transports/websocket/fastapi.py +21 -11
  182. pipecat/transports/websocket/server.py +14 -7
  183. pipecat/transports/whatsapp/api.py +8 -0
  184. pipecat/transports/whatsapp/client.py +47 -0
  185. pipecat/utils/base_object.py +54 -22
  186. pipecat/utils/redis.py +58 -0
  187. pipecat/utils/string.py +13 -1
  188. pipecat/utils/tracing/service_decorators.py +21 -21
  189. pipecat/serializers/genesys.py +0 -95
  190. pipecat/services/google/test-google-chirp.py +0 -45
  191. pipecat/services/openai.py +0 -698
  192. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
  193. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
  194. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
  195. /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
@@ -14,7 +14,7 @@ import asyncio
14
14
  import base64
15
15
  import json
16
16
  import warnings
17
- from typing import Any, AsyncGenerator, Dict, List, Literal, Optional
17
+ from typing import Any, AsyncGenerator, Dict, Literal, Optional
18
18
 
19
19
  import aiohttp
20
20
  from loguru import logger
@@ -29,13 +29,7 @@ from pipecat.frames.frames import (
29
29
  TranscriptionFrame,
30
30
  TranslationFrame,
31
31
  )
32
-
33
- # Import nested config models
34
- from pipecat.services.gladia.config import (
35
- CustomVocabularyConfig,
36
- GladiaInputParams,
37
- RealtimeProcessingConfig,
38
- )
32
+ from pipecat.services.gladia.config import GladiaInputParams
39
33
  from pipecat.services.stt_service import STTService
40
34
  from pipecat.transcriptions.language import Language
41
35
  from pipecat.utils.time import time_now_iso8601
@@ -180,8 +174,6 @@ class _InputParamsDescriptor:
180
174
  """Descriptor for backward compatibility with deprecation warning."""
181
175
 
182
176
  def __get__(self, obj, objtype=None):
183
- import warnings
184
-
185
177
  with warnings.catch_warnings():
186
178
  warnings.simplefilter("always")
187
179
  warnings.warn(
@@ -215,7 +207,7 @@ class GladiaSTTService(STTService):
215
207
  api_key: str,
216
208
  region: Literal["us-west", "eu-west"] | None = None,
217
209
  url: str = "https://api.gladia.io/v2/live",
218
- confidence: float = 0.5,
210
+ confidence: Optional[float] = None,
219
211
  sample_rate: Optional[int] = None,
220
212
  model: str = "solaria-1",
221
213
  params: Optional[GladiaInputParams] = None,
@@ -231,6 +223,11 @@ class GladiaSTTService(STTService):
231
223
  region: Region used to process audio. eu-west or us-west. Defaults to eu-west.
232
224
  url: Gladia API URL. Defaults to "https://api.gladia.io/v2/live".
233
225
  confidence: Minimum confidence threshold for transcriptions (0.0-1.0).
226
+
227
+ .. deprecated:: 0.0.86
228
+ The 'confidence' parameter is deprecated and will be removed in a future version.
229
+ No confidence threshold is applied.
230
+
234
231
  sample_rate: Audio sample rate in Hz. If None, uses service default.
235
232
  model: Model to use for transcription. Defaults to "solaria-1".
236
233
  params: Additional configuration parameters for Gladia service.
@@ -240,11 +237,9 @@ class GladiaSTTService(STTService):
240
237
  **kwargs: Additional arguments passed to the STTService parent class.
241
238
  """
242
239
  super().__init__(sample_rate=sample_rate, **kwargs)
243
- vocab: Optional[List[str]] = kwargs.pop("vocab", None) # Get vocab from kwargs
244
240
 
245
241
  params = params or GladiaInputParams()
246
242
 
247
- # Warn about deprecated language parameter if it's used
248
243
  if params.language is not None:
249
244
  with warnings.catch_warnings():
250
245
  warnings.simplefilter("always")
@@ -255,29 +250,21 @@ class GladiaSTTService(STTService):
255
250
  stacklevel=2,
256
251
  )
257
252
 
253
+ if confidence:
254
+ with warnings.catch_warnings():
255
+ warnings.simplefilter("always")
256
+ warnings.warn(
257
+ "The 'confidence' parameter is deprecated and will be removed in a future version. "
258
+ "No confidence threshold is applied.",
259
+ DeprecationWarning,
260
+ stacklevel=2,
261
+ )
262
+
258
263
  self._api_key = api_key
259
264
  self._region = region
260
265
  self._url = url
261
266
  self.set_model_name(model)
262
- self._confidence = confidence
263
- self._params = params # This is GladiaInputParams instance
264
-
265
- # TODO: To be tested.
266
- if vocab:
267
- # Filter out any non-string or empty items
268
- valid_vocab = [item for item in vocab if isinstance(item, str) and item.strip()]
269
- if valid_vocab:
270
- if self._params.realtime_processing is None:
271
- self._params.realtime_processing = RealtimeProcessingConfig()
272
- if self._params.realtime_processing.custom_vocabulary_config is None:
273
- self._params.realtime_processing.custom_vocabulary_config = (
274
- CustomVocabularyConfig()
275
- )
276
-
277
- self._params.realtime_processing.custom_vocabulary_config.vocabulary = valid_vocab
278
- self._params.realtime_processing.custom_vocabulary = True # Explicitly enable
279
- self.logger.info(f"Set Gladia custom vocabulary: {valid_vocab}")
280
-
267
+ self._params = params
281
268
  self._websocket = None
282
269
  self._receive_task = None
283
270
  self._keepalive_task = None
@@ -434,14 +421,14 @@ class GladiaSTTService(STTService):
434
421
  trim_size = len(self._audio_buffer) - self._max_buffer_size
435
422
  self._audio_buffer = self._audio_buffer[trim_size:]
436
423
  self._bytes_sent = max(0, self._bytes_sent - trim_size)
437
- logger.warning(f"Audio buffer exceeded max size, trimmed {trim_size} bytes")
424
+ self.logger.warning(f"Audio buffer exceeded max size, trimmed {trim_size} bytes")
438
425
 
439
426
  # Send audio if connected
440
427
  if self._connection_active and self._websocket and self._websocket.state is State.OPEN:
441
428
  try:
442
429
  await self._send_audio(audio)
443
430
  except websockets.exceptions.ConnectionClosed as e:
444
- logger.warning(f"Websocket closed while sending audio chunk: {e}")
431
+ self.logger.warning(f"Websocket closed while sending audio chunk: {e}")
445
432
  self._connection_active = False
446
433
 
447
434
  yield None
@@ -456,14 +443,14 @@ class GladiaSTTService(STTService):
456
443
  response = await self._setup_gladia(settings)
457
444
  self._session_url = response["url"]
458
445
  self._reconnection_attempts = 0
459
- logger.info(f"Session URL : {self._session_url}")
446
+ self.logger.info(f"Session URL : {self._session_url}")
460
447
 
461
448
  # Connect with automatic reconnection
462
449
  async with websocket_connect(self._session_url) as websocket:
463
450
  try:
464
451
  self._websocket = websocket
465
452
  self._connection_active = True
466
- logger.debug(f"{self} Connected to Gladia WebSocket")
453
+ self.logger.debug(f"{self} Connected to Gladia WebSocket")
467
454
 
468
455
  # Send buffered audio if any
469
456
  await self._send_buffered_audio()
@@ -476,7 +463,7 @@ class GladiaSTTService(STTService):
476
463
  await asyncio.gather(self._receive_task, self._keepalive_task)
477
464
 
478
465
  except websockets.exceptions.ConnectionClosed as e:
479
- logger.warning(f"WebSocket connection closed: {e}")
466
+ self.logger.warning(f"WebSocket connection closed: {e}")
480
467
  self._connection_active = False
481
468
 
482
469
  # Clean up tasks
@@ -490,7 +477,7 @@ class GladiaSTTService(STTService):
490
477
  break
491
478
 
492
479
  except Exception as e:
493
- logger.error(f"Error in connection handler: {e}")
480
+ self.logger.error(f"Error in connection handler: {e}")
494
481
  self._connection_active = False
495
482
 
496
483
  if not self._should_reconnect:
@@ -556,7 +543,7 @@ class GladiaSTTService(STTService):
556
543
  """Send any buffered audio after reconnection."""
557
544
  async with self._buffer_lock:
558
545
  if self._audio_buffer:
559
- logger.debug(f"{self} Sending {len(self._audio_buffer)} bytes of buffered audio")
546
+ self.logger.debug(f"{self} Sending {len(self._audio_buffer)} bytes of buffered audio")
560
547
  await self._send_audio(bytes(self._audio_buffer))
561
548
 
562
549
  async def _send_stop_recording(self):
@@ -575,12 +562,12 @@ class GladiaSTTService(STTService):
575
562
  empty_audio = b""
576
563
  await self._send_audio(empty_audio)
577
564
  else:
578
- logger.debug("Websocket closed, stopping keepalive")
565
+ self.logger.debug("Websocket closed, stopping keepalive")
579
566
  break
580
567
  except websockets.exceptions.ConnectionClosed:
581
- logger.debug("Connection closed during keepalive")
568
+ self.logger.debug("Connection closed during keepalive")
582
569
  except Exception as e:
583
- logger.error(f"Error in Gladia keepalive task: {e}")
570
+ self.logger.error(f"Error in Gladia keepalive task: {e}")
584
571
 
585
572
  async def _receive_task_handler(self):
586
573
  try:
@@ -600,43 +587,40 @@ class GladiaSTTService(STTService):
600
587
 
601
588
  elif content["type"] == "transcript":
602
589
  utterance = content["data"]["utterance"]
603
- confidence = utterance.get("confidence", 0)
604
590
  language = utterance["language"]
605
591
  transcript = utterance["text"]
606
592
  is_final = content["data"]["is_final"]
607
- if confidence >= self._confidence:
608
- if is_final:
609
- await self.push_frame(
610
- TranscriptionFrame(
611
- transcript,
612
- self._user_id,
613
- time_now_iso8601(),
614
- language,
615
- result=content,
616
- )
617
- )
618
- await self._handle_transcription(
619
- transcript=transcript,
620
- is_final=is_final,
621
- language=language,
593
+ if is_final:
594
+ await self.push_frame(
595
+ TranscriptionFrame(
596
+ transcript,
597
+ self._user_id,
598
+ time_now_iso8601(),
599
+ language,
600
+ result=content,
622
601
  )
623
- else:
624
- await self.push_frame(
625
- InterimTranscriptionFrame(
626
- transcript,
627
- self._user_id,
628
- time_now_iso8601(),
629
- language,
630
- result=content,
631
- )
602
+ )
603
+ await self._handle_transcription(
604
+ transcript=transcript,
605
+ is_final=is_final,
606
+ language=language,
607
+ )
608
+ else:
609
+ await self.push_frame(
610
+ InterimTranscriptionFrame(
611
+ transcript,
612
+ self._user_id,
613
+ time_now_iso8601(),
614
+ language,
615
+ result=content,
632
616
  )
617
+ )
633
618
  elif content["type"] == "translation":
634
619
  translated_utterance = content["data"]["translated_utterance"]
635
620
  original_language = content["data"]["original_language"]
636
621
  translated_language = translated_utterance["language"]
637
- confidence = translated_utterance.get("confidence", 0)
638
622
  translation = translated_utterance["text"]
639
- if translated_language != original_language and confidence >= self._confidence:
623
+ if translated_language != original_language:
640
624
  await self.push_frame(
641
625
  TranslationFrame(
642
626
  translation, "", time_now_iso8601(), translated_language
@@ -646,7 +630,7 @@ class GladiaSTTService(STTService):
646
630
  # Expected when closing the connection
647
631
  pass
648
632
  except Exception as e:
649
- logger.error(f"Error in Gladia WebSocket handler: {e}")
633
+ self.logger.error(f"Error in Gladia WebSocket handler: {e}")
650
634
 
651
635
  async def _maybe_reconnect(self) -> bool:
652
636
  """Handle exponential backoff reconnection logic."""
@@ -654,11 +638,11 @@ class GladiaSTTService(STTService):
654
638
  return False
655
639
  self._reconnection_attempts += 1
656
640
  if self._reconnection_attempts > self._max_reconnection_attempts:
657
- logger.error(f"Max reconnection attempts ({self._max_reconnection_attempts}) reached")
641
+ self.logger.error(f"Max reconnection attempts ({self._max_reconnection_attempts}) reached")
658
642
  self._should_reconnect = False
659
643
  return False
660
644
  delay = self._reconnection_delay * (2 ** (self._reconnection_attempts - 1))
661
- logger.debug(
645
+ self.logger.debug(
662
646
  f"{self} Reconnecting in {delay} seconds (attempt {self._reconnection_attempts}/{self._max_reconnection_attempts})"
663
647
  )
664
648
  await asyncio.sleep(delay)
@@ -9,6 +9,7 @@ import sys
9
9
  from pipecat.services import DeprecatedModuleProxy
10
10
 
11
11
  from .frames import *
12
+ from .gemini_live import *
12
13
  from .image import *
13
14
  from .llm import *
14
15
  from .llm_openai import *
@@ -0,0 +1,3 @@
1
+ from .file_api import GeminiFileAPI
2
+ from .llm import GeminiLiveLLMService
3
+ from .llm_vertex import GeminiLiveVertexLLMService
@@ -0,0 +1,189 @@
1
+ #
2
+ # Copyright (c) 2024–2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ """Gemini File API client for uploading and managing files.
8
+
9
+ This module provides a client for Google's Gemini File API, enabling file
10
+ uploads, metadata retrieval, listing, and deletion. Files uploaded through
11
+ this API can be referenced in Gemini generative model calls.
12
+ """
13
+
14
+ import mimetypes
15
+ from typing import Any, Dict, Optional
16
+
17
+ import aiohttp
18
+ from loguru import logger
19
+
20
+
21
+ class GeminiFileAPI:
22
+ """Client for the Gemini File API.
23
+
24
+ This class provides methods for uploading, fetching, listing, and deleting files
25
+ through Google's Gemini File API.
26
+
27
+ Files uploaded through this API remain available for 48 hours and can be referenced
28
+ in calls to the Gemini generative models. Maximum file size is 2GB, with total
29
+ project storage limited to 20GB.
30
+ """
31
+
32
+ def __init__(
33
+ self, api_key: str, base_url: str = "https://generativelanguage.googleapis.com/v1beta/files"
34
+ ):
35
+ """Initialize the Gemini File API client.
36
+
37
+ Args:
38
+ api_key: Google AI API key
39
+ base_url: Base URL for the Gemini File API (default is the v1beta endpoint)
40
+ """
41
+ self._api_key = api_key
42
+ self._base_url = base_url
43
+ # Upload URL uses the /upload/ path
44
+ self.upload_base_url = "https://generativelanguage.googleapis.com/upload/v1beta/files"
45
+
46
+ async def upload_file(
47
+ self, file_path: str, display_name: Optional[str] = None
48
+ ) -> Dict[str, Any]:
49
+ """Upload a file to the Gemini File API using the correct resumable upload protocol.
50
+
51
+ Args:
52
+ file_path: Path to the file to upload
53
+ display_name: Optional display name for the file
54
+
55
+ Returns:
56
+ File metadata including uri, name, and display_name
57
+ """
58
+ logger.info(f"Uploading file: {file_path}")
59
+
60
+ async with aiohttp.ClientSession() as session:
61
+ # Determine the file's MIME type
62
+ mime_type, _ = mimetypes.guess_type(file_path)
63
+ if not mime_type:
64
+ mime_type = "application/octet-stream"
65
+
66
+ # Read the file
67
+ with open(file_path, "rb") as f:
68
+ file_data = f.read()
69
+
70
+ # Create the metadata payload
71
+ metadata = {}
72
+ if display_name:
73
+ metadata = {"file": {"display_name": display_name}}
74
+
75
+ # Step 1: Initial resumable request to get upload URL
76
+ headers = {
77
+ "X-Goog-Upload-Protocol": "resumable",
78
+ "X-Goog-Upload-Command": "start",
79
+ "X-Goog-Upload-Header-Content-Length": str(len(file_data)),
80
+ "X-Goog-Upload-Header-Content-Type": mime_type,
81
+ "Content-Type": "application/json",
82
+ }
83
+
84
+ logger.debug(f"Step 1: Getting upload URL from {self.upload_base_url}")
85
+ async with session.post(
86
+ f"{self.upload_base_url}?key={self._api_key}", headers=headers, json=metadata
87
+ ) as response:
88
+ if response.status != 200:
89
+ error_text = await response.text()
90
+ logger.error(f"Error initiating file upload: {error_text}")
91
+ raise Exception(f"Failed to initiate upload: {response.status} - {error_text}")
92
+
93
+ # Get the upload URL from the response header
94
+ upload_url = response.headers.get("X-Goog-Upload-URL")
95
+ if not upload_url:
96
+ logger.error(f"Response headers: {dict(response.headers)}")
97
+ raise Exception("No upload URL in response headers")
98
+
99
+ logger.debug(f"Got upload URL: {upload_url}")
100
+
101
+ # Step 2: Upload the actual file data
102
+ upload_headers = {
103
+ "Content-Length": str(len(file_data)),
104
+ "X-Goog-Upload-Offset": "0",
105
+ "X-Goog-Upload-Command": "upload, finalize",
106
+ }
107
+
108
+ logger.debug(f"Step 2: Uploading file data to {upload_url}")
109
+ async with session.post(upload_url, headers=upload_headers, data=file_data) as response:
110
+ if response.status != 200:
111
+ error_text = await response.text()
112
+ logger.error(f"Error uploading file data: {error_text}")
113
+ raise Exception(f"Failed to upload file: {response.status} - {error_text}")
114
+
115
+ file_info = await response.json()
116
+ logger.info(f"File uploaded successfully: {file_info.get('file', {}).get('name')}")
117
+ return file_info
118
+
119
+ async def get_file(self, name: str) -> Dict[str, Any]:
120
+ """Get metadata for a file.
121
+
122
+ Args:
123
+ name: File name (or full path)
124
+
125
+ Returns:
126
+ File metadata
127
+ """
128
+ # Extract just the name part if a full path is provided
129
+ if "/" in name:
130
+ name = name.split("/")[-1]
131
+
132
+ async with aiohttp.ClientSession() as session:
133
+ async with session.get(f"{self._base_url}/{name}?key={self._api_key}") as response:
134
+ if response.status != 200:
135
+ error_text = await response.text()
136
+ logger.error(f"Error getting file metadata: {error_text}")
137
+ raise Exception(f"Failed to get file metadata: {response.status}")
138
+
139
+ file_info = await response.json()
140
+ return file_info
141
+
142
+ async def list_files(
143
+ self, page_size: int = 10, page_token: Optional[str] = None
144
+ ) -> Dict[str, Any]:
145
+ """List uploaded files.
146
+
147
+ Args:
148
+ page_size: Number of files to return per page
149
+ page_token: Token for pagination
150
+
151
+ Returns:
152
+ List of files and next page token if available
153
+ """
154
+ params = {"key": self._api_key, "pageSize": page_size}
155
+
156
+ if page_token:
157
+ params["pageToken"] = page_token
158
+
159
+ async with aiohttp.ClientSession() as session:
160
+ async with session.get(self._base_url, params=params) as response:
161
+ if response.status != 200:
162
+ error_text = await response.text()
163
+ logger.error(f"Error listing files: {error_text}")
164
+ raise Exception(f"Failed to list files: {response.status}")
165
+
166
+ result = await response.json()
167
+ return result
168
+
169
+ async def delete_file(self, name: str) -> bool:
170
+ """Delete a file.
171
+
172
+ Args:
173
+ name: File name (or full path)
174
+
175
+ Returns:
176
+ True if deleted successfully
177
+ """
178
+ # Extract just the name part if a full path is provided
179
+ if "/" in name:
180
+ name = name.split("/")[-1]
181
+
182
+ async with aiohttp.ClientSession() as session:
183
+ async with session.delete(f"{self._base_url}/{name}?key={self._api_key}") as response:
184
+ if response.status != 200:
185
+ error_text = await response.text()
186
+ logger.error(f"Error deleting file: {error_text}")
187
+ raise Exception(f"Failed to delete file: {response.status}")
188
+
189
+ return True