dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show
  1. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
  2. dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
  3. pipecat/__init__.py +17 -0
  4. pipecat/adapters/base_llm_adapter.py +36 -1
  5. pipecat/adapters/schemas/direct_function.py +296 -0
  6. pipecat/adapters/schemas/function_schema.py +15 -6
  7. pipecat/adapters/schemas/tools_schema.py +55 -7
  8. pipecat/adapters/services/anthropic_adapter.py +22 -3
  9. pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
  10. pipecat/adapters/services/bedrock_adapter.py +22 -3
  11. pipecat/adapters/services/gemini_adapter.py +16 -3
  12. pipecat/adapters/services/open_ai_adapter.py +17 -2
  13. pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
  14. pipecat/audio/filters/base_audio_filter.py +30 -6
  15. pipecat/audio/filters/koala_filter.py +37 -2
  16. pipecat/audio/filters/krisp_filter.py +59 -6
  17. pipecat/audio/filters/noisereduce_filter.py +37 -0
  18. pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
  19. pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
  20. pipecat/audio/mixers/base_audio_mixer.py +30 -7
  21. pipecat/audio/mixers/soundfile_mixer.py +53 -6
  22. pipecat/audio/resamplers/base_audio_resampler.py +17 -9
  23. pipecat/audio/resamplers/resampy_resampler.py +26 -1
  24. pipecat/audio/resamplers/soxr_resampler.py +32 -1
  25. pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
  26. pipecat/audio/utils.py +194 -1
  27. pipecat/audio/vad/silero.py +60 -3
  28. pipecat/audio/vad/vad_analyzer.py +114 -30
  29. pipecat/clocks/base_clock.py +19 -0
  30. pipecat/clocks/system_clock.py +25 -0
  31. pipecat/extensions/voicemail/__init__.py +0 -0
  32. pipecat/extensions/voicemail/voicemail_detector.py +707 -0
  33. pipecat/frames/frames.py +590 -156
  34. pipecat/metrics/metrics.py +64 -1
  35. pipecat/observers/base_observer.py +58 -19
  36. pipecat/observers/loggers/debug_log_observer.py +56 -64
  37. pipecat/observers/loggers/llm_log_observer.py +8 -1
  38. pipecat/observers/loggers/transcription_log_observer.py +19 -7
  39. pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
  40. pipecat/observers/turn_tracking_observer.py +26 -1
  41. pipecat/pipeline/base_pipeline.py +5 -7
  42. pipecat/pipeline/base_task.py +52 -9
  43. pipecat/pipeline/parallel_pipeline.py +121 -177
  44. pipecat/pipeline/pipeline.py +129 -20
  45. pipecat/pipeline/runner.py +50 -1
  46. pipecat/pipeline/sync_parallel_pipeline.py +132 -32
  47. pipecat/pipeline/task.py +263 -280
  48. pipecat/pipeline/task_observer.py +85 -34
  49. pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
  50. pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
  51. pipecat/processors/aggregators/gated.py +25 -24
  52. pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
  53. pipecat/processors/aggregators/llm_response.py +398 -89
  54. pipecat/processors/aggregators/openai_llm_context.py +161 -13
  55. pipecat/processors/aggregators/sentence.py +25 -14
  56. pipecat/processors/aggregators/user_response.py +28 -3
  57. pipecat/processors/aggregators/vision_image_frame.py +24 -14
  58. pipecat/processors/async_generator.py +28 -0
  59. pipecat/processors/audio/audio_buffer_processor.py +78 -37
  60. pipecat/processors/consumer_processor.py +25 -6
  61. pipecat/processors/filters/frame_filter.py +23 -0
  62. pipecat/processors/filters/function_filter.py +30 -0
  63. pipecat/processors/filters/identity_filter.py +17 -2
  64. pipecat/processors/filters/null_filter.py +24 -1
  65. pipecat/processors/filters/stt_mute_filter.py +56 -21
  66. pipecat/processors/filters/wake_check_filter.py +46 -3
  67. pipecat/processors/filters/wake_notifier_filter.py +21 -3
  68. pipecat/processors/frame_processor.py +488 -131
  69. pipecat/processors/frameworks/langchain.py +38 -3
  70. pipecat/processors/frameworks/rtvi.py +719 -34
  71. pipecat/processors/gstreamer/pipeline_source.py +41 -0
  72. pipecat/processors/idle_frame_processor.py +26 -3
  73. pipecat/processors/logger.py +23 -0
  74. pipecat/processors/metrics/frame_processor_metrics.py +77 -4
  75. pipecat/processors/metrics/sentry.py +42 -4
  76. pipecat/processors/producer_processor.py +34 -14
  77. pipecat/processors/text_transformer.py +22 -10
  78. pipecat/processors/transcript_processor.py +48 -29
  79. pipecat/processors/user_idle_processor.py +31 -21
  80. pipecat/runner/__init__.py +1 -0
  81. pipecat/runner/daily.py +132 -0
  82. pipecat/runner/livekit.py +148 -0
  83. pipecat/runner/run.py +543 -0
  84. pipecat/runner/types.py +67 -0
  85. pipecat/runner/utils.py +515 -0
  86. pipecat/serializers/base_serializer.py +42 -0
  87. pipecat/serializers/exotel.py +17 -6
  88. pipecat/serializers/genesys.py +95 -0
  89. pipecat/serializers/livekit.py +33 -0
  90. pipecat/serializers/plivo.py +16 -15
  91. pipecat/serializers/protobuf.py +37 -1
  92. pipecat/serializers/telnyx.py +18 -17
  93. pipecat/serializers/twilio.py +32 -16
  94. pipecat/services/ai_service.py +5 -3
  95. pipecat/services/anthropic/llm.py +113 -43
  96. pipecat/services/assemblyai/models.py +63 -5
  97. pipecat/services/assemblyai/stt.py +64 -11
  98. pipecat/services/asyncai/__init__.py +0 -0
  99. pipecat/services/asyncai/tts.py +501 -0
  100. pipecat/services/aws/llm.py +185 -111
  101. pipecat/services/aws/stt.py +217 -23
  102. pipecat/services/aws/tts.py +118 -52
  103. pipecat/services/aws/utils.py +101 -5
  104. pipecat/services/aws_nova_sonic/aws.py +82 -64
  105. pipecat/services/aws_nova_sonic/context.py +15 -6
  106. pipecat/services/azure/common.py +10 -2
  107. pipecat/services/azure/image.py +32 -0
  108. pipecat/services/azure/llm.py +9 -7
  109. pipecat/services/azure/stt.py +65 -2
  110. pipecat/services/azure/tts.py +154 -23
  111. pipecat/services/cartesia/stt.py +125 -8
  112. pipecat/services/cartesia/tts.py +102 -38
  113. pipecat/services/cerebras/llm.py +15 -23
  114. pipecat/services/deepgram/stt.py +19 -11
  115. pipecat/services/deepgram/tts.py +36 -0
  116. pipecat/services/deepseek/llm.py +14 -23
  117. pipecat/services/elevenlabs/tts.py +330 -64
  118. pipecat/services/fal/image.py +43 -0
  119. pipecat/services/fal/stt.py +48 -10
  120. pipecat/services/fireworks/llm.py +14 -21
  121. pipecat/services/fish/tts.py +109 -9
  122. pipecat/services/gemini_multimodal_live/__init__.py +1 -0
  123. pipecat/services/gemini_multimodal_live/events.py +83 -2
  124. pipecat/services/gemini_multimodal_live/file_api.py +189 -0
  125. pipecat/services/gemini_multimodal_live/gemini.py +218 -21
  126. pipecat/services/gladia/config.py +17 -10
  127. pipecat/services/gladia/stt.py +82 -36
  128. pipecat/services/google/frames.py +40 -0
  129. pipecat/services/google/google.py +2 -0
  130. pipecat/services/google/image.py +39 -2
  131. pipecat/services/google/llm.py +176 -58
  132. pipecat/services/google/llm_openai.py +26 -4
  133. pipecat/services/google/llm_vertex.py +37 -15
  134. pipecat/services/google/rtvi.py +41 -0
  135. pipecat/services/google/stt.py +65 -17
  136. pipecat/services/google/test-google-chirp.py +45 -0
  137. pipecat/services/google/tts.py +390 -19
  138. pipecat/services/grok/llm.py +8 -6
  139. pipecat/services/groq/llm.py +8 -6
  140. pipecat/services/groq/stt.py +13 -9
  141. pipecat/services/groq/tts.py +40 -0
  142. pipecat/services/hamsa/__init__.py +9 -0
  143. pipecat/services/hamsa/stt.py +241 -0
  144. pipecat/services/heygen/__init__.py +5 -0
  145. pipecat/services/heygen/api.py +281 -0
  146. pipecat/services/heygen/client.py +620 -0
  147. pipecat/services/heygen/video.py +338 -0
  148. pipecat/services/image_service.py +5 -3
  149. pipecat/services/inworld/__init__.py +1 -0
  150. pipecat/services/inworld/tts.py +592 -0
  151. pipecat/services/llm_service.py +127 -45
  152. pipecat/services/lmnt/tts.py +80 -7
  153. pipecat/services/mcp_service.py +85 -44
  154. pipecat/services/mem0/memory.py +42 -13
  155. pipecat/services/minimax/tts.py +74 -15
  156. pipecat/services/mistral/__init__.py +0 -0
  157. pipecat/services/mistral/llm.py +185 -0
  158. pipecat/services/moondream/vision.py +55 -10
  159. pipecat/services/neuphonic/tts.py +275 -48
  160. pipecat/services/nim/llm.py +8 -6
  161. pipecat/services/ollama/llm.py +27 -7
  162. pipecat/services/openai/base_llm.py +54 -16
  163. pipecat/services/openai/image.py +30 -0
  164. pipecat/services/openai/llm.py +7 -5
  165. pipecat/services/openai/stt.py +13 -9
  166. pipecat/services/openai/tts.py +42 -10
  167. pipecat/services/openai_realtime_beta/azure.py +11 -9
  168. pipecat/services/openai_realtime_beta/context.py +7 -5
  169. pipecat/services/openai_realtime_beta/events.py +10 -7
  170. pipecat/services/openai_realtime_beta/openai.py +37 -18
  171. pipecat/services/openpipe/llm.py +30 -24
  172. pipecat/services/openrouter/llm.py +9 -7
  173. pipecat/services/perplexity/llm.py +15 -19
  174. pipecat/services/piper/tts.py +26 -12
  175. pipecat/services/playht/tts.py +227 -65
  176. pipecat/services/qwen/llm.py +8 -6
  177. pipecat/services/rime/tts.py +128 -17
  178. pipecat/services/riva/stt.py +160 -22
  179. pipecat/services/riva/tts.py +67 -2
  180. pipecat/services/sambanova/llm.py +19 -17
  181. pipecat/services/sambanova/stt.py +14 -8
  182. pipecat/services/sarvam/tts.py +60 -13
  183. pipecat/services/simli/video.py +82 -21
  184. pipecat/services/soniox/__init__.py +0 -0
  185. pipecat/services/soniox/stt.py +398 -0
  186. pipecat/services/speechmatics/stt.py +29 -17
  187. pipecat/services/stt_service.py +47 -11
  188. pipecat/services/tavus/video.py +94 -25
  189. pipecat/services/together/llm.py +8 -6
  190. pipecat/services/tts_service.py +77 -53
  191. pipecat/services/ultravox/stt.py +46 -43
  192. pipecat/services/vision_service.py +5 -3
  193. pipecat/services/websocket_service.py +12 -11
  194. pipecat/services/whisper/base_stt.py +58 -12
  195. pipecat/services/whisper/stt.py +69 -58
  196. pipecat/services/xtts/tts.py +59 -2
  197. pipecat/sync/base_notifier.py +19 -0
  198. pipecat/sync/event_notifier.py +24 -0
  199. pipecat/tests/utils.py +73 -5
  200. pipecat/transcriptions/language.py +24 -0
  201. pipecat/transports/base_input.py +112 -8
  202. pipecat/transports/base_output.py +235 -13
  203. pipecat/transports/base_transport.py +119 -0
  204. pipecat/transports/local/audio.py +76 -0
  205. pipecat/transports/local/tk.py +84 -0
  206. pipecat/transports/network/fastapi_websocket.py +174 -15
  207. pipecat/transports/network/small_webrtc.py +383 -39
  208. pipecat/transports/network/webrtc_connection.py +214 -8
  209. pipecat/transports/network/websocket_client.py +171 -1
  210. pipecat/transports/network/websocket_server.py +147 -9
  211. pipecat/transports/services/daily.py +792 -70
  212. pipecat/transports/services/helpers/daily_rest.py +122 -129
  213. pipecat/transports/services/livekit.py +339 -4
  214. pipecat/transports/services/tavus.py +273 -38
  215. pipecat/utils/asyncio/task_manager.py +92 -186
  216. pipecat/utils/base_object.py +83 -1
  217. pipecat/utils/network.py +2 -0
  218. pipecat/utils/string.py +114 -58
  219. pipecat/utils/text/base_text_aggregator.py +44 -13
  220. pipecat/utils/text/base_text_filter.py +46 -0
  221. pipecat/utils/text/markdown_text_filter.py +70 -14
  222. pipecat/utils/text/pattern_pair_aggregator.py +18 -14
  223. pipecat/utils/text/simple_text_aggregator.py +43 -2
  224. pipecat/utils/text/skip_tags_aggregator.py +21 -13
  225. pipecat/utils/time.py +36 -0
  226. pipecat/utils/tracing/class_decorators.py +32 -7
  227. pipecat/utils/tracing/conversation_context_provider.py +12 -2
  228. pipecat/utils/tracing/service_attributes.py +80 -64
  229. pipecat/utils/tracing/service_decorators.py +48 -21
  230. pipecat/utils/tracing/setup.py +13 -7
  231. pipecat/utils/tracing/turn_context_provider.py +12 -2
  232. pipecat/utils/tracing/turn_trace_observer.py +27 -0
  233. pipecat/utils/utils.py +14 -14
  234. dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
  235. pipecat/examples/daily_runner.py +0 -64
  236. pipecat/examples/run.py +0 -265
  237. pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
  238. pipecat/utils/asyncio/watchdog_event.py +0 -42
  239. pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
  240. pipecat/utils/asyncio/watchdog_queue.py +0 -48
  241. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
  242. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
  243. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
  244. /pipecat/{examples → extensions}/__init__.py +0 -0
@@ -53,7 +53,6 @@ from pipecat.services.openai.llm import (
53
53
  OpenAIAssistantContextAggregator,
54
54
  OpenAIUserContextAggregator,
55
55
  )
56
- from pipecat.utils.asyncio.watchdog_async_iterator import WatchdogAsyncIterator
57
56
  from pipecat.utils.tracing.service_decorators import traced_llm
58
57
 
59
58
  # Suppress gRPC fork warnings
@@ -68,6 +67,7 @@ try:
68
67
  FunctionCall,
69
68
  FunctionResponse,
70
69
  GenerateContentConfig,
70
+ HttpOptions,
71
71
  Part,
72
72
  )
73
73
  except ModuleNotFoundError as e:
@@ -83,21 +83,13 @@ class GoogleUserContextAggregator(OpenAIUserContextAggregator):
83
83
  Content and Part message format for user messages.
84
84
  """
85
85
 
86
- async def push_aggregation(self):
87
- """Push aggregated user text as a Google Content message."""
88
- if len(self._aggregation) > 0:
89
- self._context.add_message(Content(role="user", parts=[Part(text=self._aggregation)]))
90
-
91
- # Reset the aggregation. Reset it before pushing it down, otherwise
92
- # if the tasks gets cancelled we won't be able to clear things up.
93
- self._aggregation = ""
94
-
95
- # Push context frame
96
- frame = OpenAILLMContextFrame(self._context)
97
- await self.push_frame(frame)
86
+ async def handle_aggregation(self, aggregation: str):
87
+ """Add the aggregated user text to the context as a Google Content message.
98
88
 
99
- # Reset our accumulator state.
100
- await self.reset()
89
+ Args:
90
+ aggregation: The aggregated user text to add as a user message.
91
+ """
92
+ self._context.add_message(Content(role="user", parts=[Part(text=aggregation)]))
101
93
 
102
94
 
103
95
  class GoogleAssistantContextAggregator(OpenAIAssistantContextAggregator):
@@ -233,11 +225,6 @@ class GoogleLLMContext(OpenAILLMContext):
233
225
 
234
226
  This class handles conversion between OpenAI-style messages and Google AI's
235
227
  Content/Part format, including system messages, function calls, and media.
236
-
237
- Args:
238
- messages: Initial messages in OpenAI format.
239
- tools: Available tools/functions for the model.
240
- tool_choice: Tool choice configuration.
241
228
  """
242
229
 
243
230
  def __init__(
@@ -246,6 +233,13 @@ class GoogleLLMContext(OpenAILLMContext):
246
233
  tools: Optional[List[dict]] = None,
247
234
  tool_choice: Optional[dict] = None,
248
235
  ):
236
+ """Initialize GoogleLLMContext.
237
+
238
+ Args:
239
+ messages: Initial messages in OpenAI format.
240
+ tools: Available tools/functions for the model.
241
+ tool_choice: Tool choice configuration.
242
+ """
249
243
  super().__init__(messages=messages, tools=tools, tool_choice=tool_choice)
250
244
  self.system_message = None
251
245
 
@@ -378,18 +372,48 @@ class GoogleLLMContext(OpenAILLMContext):
378
372
  System messages are stored separately and return None.
379
373
 
380
374
  Args:
381
- message: Message in standard format:
375
+ message: Message in standard format.
376
+
377
+ Returns:
378
+ Content object with role and parts, or None for system messages.
379
+
380
+ Examples:
381
+ Standard text message::
382
+
382
383
  {
383
- "role": "user/assistant/system/tool",
384
- "content": str | [{"type": "text/image_url", ...}] | None,
385
- "tool_calls": [{"function": {"name": str, "arguments": str}}]
384
+ "role": "user",
385
+ "content": "Hello there"
386
386
  }
387
387
 
388
- Returns:
389
- Content object with:
390
- - role: "user" or "model" (converted from "assistant")
391
- - parts: List[Part] containing text, inline_data, or function calls
392
- Returns None for system messages.
388
+ Converts to Google Content with::
389
+
390
+ Content(
391
+ role="user",
392
+ parts=[Part(text="Hello there")]
393
+ )
394
+
395
+ Standard function call message::
396
+
397
+ {
398
+ "role": "assistant",
399
+ "tool_calls": [
400
+ {
401
+ "function": {
402
+ "name": "search",
403
+ "arguments": '{"query": "test"}'
404
+ }
405
+ }
406
+ ]
407
+ }
408
+
409
+ Converts to Google Content with::
410
+
411
+ Content(
412
+ role="model",
413
+ parts=[Part(function_call=FunctionCall(name="search", args={"query": "test"}))]
414
+ )
415
+
416
+ System message returns None and stores content in self.system_message.
393
417
  """
394
418
  role = message["role"]
395
419
  content = message.get("content", [])
@@ -445,21 +469,73 @@ class GoogleLLMContext(OpenAILLMContext):
445
469
  Handles text, images, and function calls from Google's Content/Part objects.
446
470
 
447
471
  Args:
448
- obj: Google Content object with:
449
- - role: "model" (converted to "assistant") or "user"
450
- - parts: List[Part] containing text, inline_data, or function calls
472
+ obj: Google Content object with role and parts.
451
473
 
452
474
  Returns:
453
- List of messages in standard format:
454
- [
455
- {
456
- "role": "user/assistant/tool",
457
- "content": [
458
- {"type": "text", "text": str} |
459
- {"type": "image_url", "image_url": {"url": str}}
460
- ]
461
- }
462
- ]
475
+ List containing a single message in standard format.
476
+
477
+ Examples:
478
+ Google Content with text::
479
+
480
+ Content(
481
+ role="user",
482
+ parts=[Part(text="Hello")]
483
+ )
484
+
485
+ Converts to::
486
+
487
+ [
488
+ {
489
+ "role": "user",
490
+ "content": [{"type": "text", "text": "Hello"}]
491
+ }
492
+ ]
493
+
494
+ Google Content with function call::
495
+
496
+ Content(
497
+ role="model",
498
+ parts=[Part(function_call=FunctionCall(name="search", args={"q": "test"}))]
499
+ )
500
+
501
+ Converts to::
502
+
503
+ [
504
+ {
505
+ "role": "assistant",
506
+ "tool_calls": [
507
+ {
508
+ "id": "search",
509
+ "type": "function",
510
+ "function": {
511
+ "name": "search",
512
+ "arguments": '{"q": "test"}'
513
+ }
514
+ }
515
+ ]
516
+ }
517
+ ]
518
+
519
+ Google Content with image::
520
+
521
+ Content(
522
+ role="user",
523
+ parts=[Part(inline_data=Blob(mime_type="image/jpeg", data=bytes_data))]
524
+ )
525
+
526
+ Converts to::
527
+
528
+ [
529
+ {
530
+ "role": "user",
531
+ "content": [
532
+ {
533
+ "type": "image_url",
534
+ "image_url": {"url": "data:image/jpeg;base64,<encoded_data>"}
535
+ }
536
+ ]
537
+ }
538
+ ]
463
539
  """
464
540
  msg = {"role": obj.role, "content": []}
465
541
  if msg["role"] == "model":
@@ -542,9 +618,9 @@ class GoogleLLMContext(OpenAILLMContext):
542
618
  # Check if we only have function-related messages (no regular text)
543
619
  has_regular_messages = any(
544
620
  len(msg.parts) == 1
545
- and not getattr(msg.parts[0], "text", None)
546
- and getattr(msg.parts[0], "function_call", None)
547
- and getattr(msg.parts[0], "function_response", None)
621
+ and getattr(msg.parts[0], "text", None)
622
+ and not getattr(msg.parts[0], "function_call", None)
623
+ and not getattr(msg.parts[0], "function_response", None)
548
624
  for msg in self._messages
549
625
  )
550
626
 
@@ -563,15 +639,6 @@ class GoogleLLMService(LLMService):
563
639
  from OpenAILLMContext to the messages format expected by the Google AI model.
564
640
  We use OpenAILLMContext as a lingua franca for all LLM services to enable
565
641
  easy switching between different LLMs.
566
-
567
- Args:
568
- api_key: Google AI API key for authentication.
569
- model: Model name to use. Defaults to "gemini-2.0-flash".
570
- params: Input parameters for the model.
571
- system_instruction: System instruction/prompt for the model.
572
- tools: List of available tools/functions.
573
- tool_config: Configuration for tool usage.
574
- **kwargs: Additional arguments passed to parent class.
575
642
  """
576
643
 
577
644
  # Overriding the default adapter to use the Gemini one.
@@ -603,8 +670,21 @@ class GoogleLLMService(LLMService):
603
670
  system_instruction: Optional[str] = None,
604
671
  tools: Optional[List[Dict[str, Any]]] = None,
605
672
  tool_config: Optional[Dict[str, Any]] = None,
673
+ http_options: Optional[HttpOptions] = None,
606
674
  **kwargs,
607
675
  ):
676
+ """Initialize the Google LLM service.
677
+
678
+ Args:
679
+ api_key: Google AI API key for authentication.
680
+ model: Model name to use. Defaults to "gemini-2.0-flash".
681
+ params: Input parameters for the model.
682
+ system_instruction: System instruction/prompt for the model.
683
+ tools: List of available tools/functions.
684
+ tool_config: Configuration for tool usage.
685
+ http_options: HTTP options for the client.
686
+ **kwargs: Additional arguments passed to parent class.
687
+ """
608
688
  super().__init__(**kwargs)
609
689
 
610
690
  params = params or GoogleLLMService.InputParams()
@@ -612,7 +692,8 @@ class GoogleLLMService(LLMService):
612
692
  self.set_model_name(model)
613
693
  self._api_key = api_key
614
694
  self._system_instruction = system_instruction
615
- self._create_client(api_key)
695
+ self._http_options = http_options
696
+ self._create_client(api_key, http_options)
616
697
  self._settings = {
617
698
  "max_tokens": params.max_tokens,
618
699
  "temperature": params.temperature,
@@ -631,8 +712,33 @@ class GoogleLLMService(LLMService):
631
712
  """
632
713
  return True
633
714
 
634
- def _create_client(self, api_key: str):
635
- self._client = genai.Client(api_key=api_key)
715
+ def _create_client(self, api_key: str, http_options: Optional[HttpOptions] = None):
716
+ self._client = genai.Client(api_key=api_key, http_options=http_options)
717
+
718
+ def needs_mcp_alternate_schema(self) -> bool:
719
+ """Check if this LLM service requires alternate MCP schema.
720
+
721
+ Google/Gemini has stricter JSON schema validation and requires
722
+ certain properties to be removed or modified for compatibility.
723
+
724
+ Returns:
725
+ True for Google/Gemini services.
726
+ """
727
+ return True
728
+
729
+ def _maybe_unset_thinking_budget(self, generation_params: Dict[str, Any]):
730
+ try:
731
+ # There's no way to introspect on model capabilities, so
732
+ # to check for models that we know default to thinkin on
733
+ # and can be configured to turn it off.
734
+ if not self._model_name.startswith("gemini-2.5-flash"):
735
+ return
736
+ # If thinking_config is already set, don't override it.
737
+ if "thinking_config" in generation_params:
738
+ return
739
+ generation_params.setdefault("thinking_config", {})["thinking_budget"] = 0
740
+ except Exception as e:
741
+ logger.exception(f"Failed to unset thinking budget: {e}")
636
742
 
637
743
  @traced_llm
638
744
  async def _process_context(self, context: OpenAILLMContext):
@@ -641,6 +747,8 @@ class GoogleLLMService(LLMService):
641
747
  prompt_tokens = 0
642
748
  completion_tokens = 0
643
749
  total_tokens = 0
750
+ cache_read_input_tokens = 0
751
+ reasoning_tokens = 0
644
752
 
645
753
  grounding_metadata = None
646
754
  search_result = ""
@@ -680,6 +788,12 @@ class GoogleLLMService(LLMService):
680
788
  if v is not None
681
789
  }
682
790
 
791
+ if self._settings["extra"]:
792
+ generation_params.update(self._settings["extra"])
793
+
794
+ # possibly modify generation_params (in place) to set thinking to off by default
795
+ self._maybe_unset_thinking_budget(generation_params)
796
+
683
797
  generation_config = (
684
798
  GenerateContentConfig(**generation_params) if generation_params else None
685
799
  )
@@ -692,13 +806,15 @@ class GoogleLLMService(LLMService):
692
806
  )
693
807
 
694
808
  function_calls = []
695
- async for chunk in WatchdogAsyncIterator(response, manager=self.task_manager):
809
+ async for chunk in response:
696
810
  # Stop TTFB metrics after the first chunk
697
811
  await self.stop_ttfb_metrics()
698
812
  if chunk.usage_metadata:
699
813
  prompt_tokens += chunk.usage_metadata.prompt_token_count or 0
700
814
  completion_tokens += chunk.usage_metadata.candidates_token_count or 0
701
815
  total_tokens += chunk.usage_metadata.total_token_count or 0
816
+ cache_read_input_tokens += chunk.usage_metadata.cached_content_token_count or 0
817
+ reasoning_tokens += chunk.usage_metadata.thoughts_token_count or 0
702
818
 
703
819
  if not chunk.candidates:
704
820
  continue
@@ -780,6 +896,8 @@ class GoogleLLMService(LLMService):
780
896
  prompt_tokens=prompt_tokens,
781
897
  completion_tokens=completion_tokens,
782
898
  total_tokens=total_tokens,
899
+ cache_read_input_tokens=cache_read_input_tokens,
900
+ reasoning_tokens=reasoning_tokens,
783
901
  )
784
902
  )
785
903
  await self.push_frame(LLMFullResponseEndFrame())
@@ -4,6 +4,12 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Google LLM service using OpenAI-compatible API format.
8
+
9
+ This module provides integration with Google's AI LLM models using the OpenAI
10
+ API format through Google's Gemini API OpenAI compatibility layer.
11
+ """
12
+
7
13
  import json
8
14
  import os
9
15
 
@@ -11,7 +17,6 @@ from openai import AsyncStream
11
17
  from openai.types.chat import ChatCompletionChunk
12
18
 
13
19
  from pipecat.services.llm_service import FunctionCallFromLLM
14
- from pipecat.utils.asyncio.watchdog_async_iterator import WatchdogAsyncIterator
15
20
 
16
21
  # Suppress gRPC fork warnings
17
22
  os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "false"
@@ -27,8 +32,17 @@ from pipecat.services.openai.llm import OpenAILLMService
27
32
 
28
33
 
29
34
  class GoogleLLMOpenAIBetaService(OpenAILLMService):
30
- """This class implements inference with Google's AI LLM models using the OpenAI format.
31
- Ref - https://ai.google.dev/gemini-api/docs/openai
35
+ """Google LLM service using OpenAI-compatible API format.
36
+
37
+ This service provides access to Google's AI LLM models (like Gemini) through
38
+ the OpenAI API format. It handles streaming responses, function calls, and
39
+ tool usage while maintaining compatibility with OpenAI's interface.
40
+
41
+ Note: This service includes a workaround for a Google API bug where function
42
+ call indices may be incorrectly set to None, resulting in empty function names.
43
+
44
+ Reference:
45
+ https://ai.google.dev/gemini-api/docs/openai
32
46
  """
33
47
 
34
48
  def __init__(
@@ -39,6 +53,14 @@ class GoogleLLMOpenAIBetaService(OpenAILLMService):
39
53
  model: str = "gemini-2.0-flash",
40
54
  **kwargs,
41
55
  ):
56
+ """Initialize the Google LLM service.
57
+
58
+ Args:
59
+ api_key: Google API key for authentication.
60
+ base_url: Base URL for Google's OpenAI-compatible API.
61
+ model: Google model name to use (e.g., "gemini-2.0-flash").
62
+ **kwargs: Additional arguments passed to the parent OpenAILLMService.
63
+ """
42
64
  super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
43
65
 
44
66
  async def _process_context(self, context: OpenAILLMContext):
@@ -56,7 +78,7 @@ class GoogleLLMOpenAIBetaService(OpenAILLMService):
56
78
  context
57
79
  )
58
80
 
59
- async for chunk in WatchdogAsyncIterator(chunk_stream, manager=self.task_manager):
81
+ async for chunk in chunk_stream:
60
82
  if chunk.usage:
61
83
  tokens = LLMTokenUsage(
62
84
  prompt_tokens=chunk.usage.prompt_tokens,
@@ -4,6 +4,12 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Google Vertex AI LLM service implementation.
8
+
9
+ This module provides integration with Google's AI models via Vertex AI while
10
+ maintaining OpenAI API compatibility through Google's OpenAI-compatible endpoint.
11
+ """
12
+
7
13
  import json
8
14
  import os
9
15
 
@@ -31,16 +37,24 @@ except ModuleNotFoundError as e:
31
37
 
32
38
 
33
39
  class GoogleVertexLLMService(OpenAILLMService):
34
- """Implements inference with Google's AI models via Vertex AI while
35
- maintaining OpenAI API compatibility.
40
+ """Google Vertex AI LLM service with OpenAI API compatibility.
36
41
 
37
- Reference:
38
- https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/call-vertex-using-openai-library
42
+ Provides access to Google's AI models via Vertex AI while maintaining
43
+ OpenAI API compatibility. Handles authentication using Google service
44
+ account credentials and constructs appropriate endpoint URLs for
45
+ different GCP regions and projects.
39
46
 
47
+ Reference:
48
+ https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/call-vertex-using-openai-library
40
49
  """
41
50
 
42
51
  class InputParams(OpenAILLMService.InputParams):
43
- """Input parameters specific to Vertex AI."""
52
+ """Input parameters specific to Vertex AI.
53
+
54
+ Parameters:
55
+ location: GCP region for Vertex AI endpoint (e.g., "us-east4").
56
+ project_id: Google Cloud project ID.
57
+ """
44
58
 
45
59
  # https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations
46
60
  location: str = "us-east4"
@@ -58,11 +72,11 @@ class GoogleVertexLLMService(OpenAILLMService):
58
72
  """Initializes the VertexLLMService.
59
73
 
60
74
  Args:
61
- credentials (Optional[str]): JSON string of service account credentials.
62
- credentials_path (Optional[str]): Path to the service account JSON file.
63
- model (str): Model identifier. Defaults to "google/gemini-2.0-flash-001".
64
- params (InputParams): Vertex AI input parameters.
65
- **kwargs: Additional arguments for OpenAILLMService.
75
+ credentials: JSON string of service account credentials.
76
+ credentials_path: Path to the service account JSON file.
77
+ model: Model identifier (e.g., "google/gemini-2.0-flash-001").
78
+ params: Vertex AI input parameters including location and project.
79
+ **kwargs: Additional arguments passed to OpenAILLMService.
66
80
  """
67
81
  params = params or OpenAILLMService.InputParams()
68
82
  base_url = self._get_base_url(params)
@@ -74,7 +88,7 @@ class GoogleVertexLLMService(OpenAILLMService):
74
88
 
75
89
  @staticmethod
76
90
  def _get_base_url(params: InputParams) -> str:
77
- """Constructs the base URL for the Vertex AI API."""
91
+ """Constructs the base URL for Vertex AI API."""
78
92
  hostname_prefix = "" if params.location == "global" else f"{params.location}-"
79
93
  return (
80
94
  f"https://{hostname_prefix}aiplatform.googleapis.com/v1/"
@@ -83,14 +97,22 @@ class GoogleVertexLLMService(OpenAILLMService):
83
97
 
84
98
  @staticmethod
85
99
  def _get_api_token(credentials: Optional[str], credentials_path: Optional[str]) -> str:
86
- """Retrieves an authentication token using Google service account credentials.
100
+ """Retrieve an authentication token using Google service account credentials.
101
+
102
+ Supports multiple authentication methods:
103
+ 1. Direct JSON credentials string
104
+ 2. Path to service account JSON file
105
+ 3. Default application credentials (ADC)
87
106
 
88
107
  Args:
89
- credentials (Optional[str]): JSON string of service account credentials.
90
- credentials_path (Optional[str]): Path to the service account JSON file.
108
+ credentials: JSON string of service account credentials.
109
+ credentials_path: Path to the service account JSON file.
91
110
 
92
111
  Returns:
93
- str: OAuth token for API authentication.
112
+ OAuth token for API authentication.
113
+
114
+ Raises:
115
+ ValueError: If no valid credentials are provided or found.
94
116
  """
95
117
  creds: Optional[service_account.Credentials] = None
96
118
 
@@ -4,6 +4,13 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Google RTVI integration models and observer implementation.
8
+
9
+ This module provides integration with Google's services through the RTVI framework,
10
+ including models for search responses and an observer for handling Google-specific
11
+ frame types.
12
+ """
13
+
7
14
  from typing import List, Literal, Optional
8
15
 
9
16
  from pydantic import BaseModel
@@ -16,22 +23,56 @@ from pipecat.services.google.frames import LLMSearchOrigin, LLMSearchResponseFra
16
23
 
17
24
 
18
25
  class RTVISearchResponseMessageData(BaseModel):
26
+ """Data payload for search response messages in RTVI protocol.
27
+
28
+ Parameters:
29
+ search_result: The search result text, if available.
30
+ rendered_content: The rendered content from the search, if available.
31
+ origins: List of search result origins with metadata.
32
+ """
33
+
19
34
  search_result: Optional[str]
20
35
  rendered_content: Optional[str]
21
36
  origins: List[LLMSearchOrigin]
22
37
 
23
38
 
24
39
  class RTVIBotLLMSearchResponseMessage(BaseModel):
40
+ """RTVI message for bot LLM search responses.
41
+
42
+ Parameters:
43
+ label: Always "rtvi-ai" for RTVI protocol messages.
44
+ type: Always "bot-llm-search-response" for this message type.
45
+ data: The search response data payload.
46
+ """
47
+
25
48
  label: Literal["rtvi-ai"] = "rtvi-ai"
26
49
  type: Literal["bot-llm-search-response"] = "bot-llm-search-response"
27
50
  data: RTVISearchResponseMessageData
28
51
 
29
52
 
30
53
  class GoogleRTVIObserver(RTVIObserver):
54
+ """RTVI observer for Google service integration.
55
+
56
+ Extends the base RTVIObserver to handle Google-specific frame types,
57
+ particularly LLM search response frames from Google services.
58
+ """
59
+
31
60
  def __init__(self, rtvi: RTVIProcessor):
61
+ """Initialize the Google RTVI observer.
62
+
63
+ Args:
64
+ rtvi: The RTVI processor to send messages through.
65
+ """
32
66
  super().__init__(rtvi)
33
67
 
34
68
  async def on_push_frame(self, data: FramePushed):
69
+ """Process frames being pushed through the pipeline.
70
+
71
+ Handles Google-specific frames in addition to the base RTVI frame types.
72
+
73
+ Args:
74
+ data: Frame push event data containing frame and metadata.
75
+ """
35
76
  await super().on_push_frame(data)
36
77
 
37
78
  frame = data.frame