dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
- dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
- pipecat/__init__.py +17 -0
- pipecat/adapters/base_llm_adapter.py +36 -1
- pipecat/adapters/schemas/direct_function.py +296 -0
- pipecat/adapters/schemas/function_schema.py +15 -6
- pipecat/adapters/schemas/tools_schema.py +55 -7
- pipecat/adapters/services/anthropic_adapter.py +22 -3
- pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
- pipecat/adapters/services/bedrock_adapter.py +22 -3
- pipecat/adapters/services/gemini_adapter.py +16 -3
- pipecat/adapters/services/open_ai_adapter.py +17 -2
- pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
- pipecat/audio/filters/base_audio_filter.py +30 -6
- pipecat/audio/filters/koala_filter.py +37 -2
- pipecat/audio/filters/krisp_filter.py +59 -6
- pipecat/audio/filters/noisereduce_filter.py +37 -0
- pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
- pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
- pipecat/audio/mixers/base_audio_mixer.py +30 -7
- pipecat/audio/mixers/soundfile_mixer.py +53 -6
- pipecat/audio/resamplers/base_audio_resampler.py +17 -9
- pipecat/audio/resamplers/resampy_resampler.py +26 -1
- pipecat/audio/resamplers/soxr_resampler.py +32 -1
- pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
- pipecat/audio/utils.py +194 -1
- pipecat/audio/vad/silero.py +60 -3
- pipecat/audio/vad/vad_analyzer.py +114 -30
- pipecat/clocks/base_clock.py +19 -0
- pipecat/clocks/system_clock.py +25 -0
- pipecat/extensions/voicemail/__init__.py +0 -0
- pipecat/extensions/voicemail/voicemail_detector.py +707 -0
- pipecat/frames/frames.py +590 -156
- pipecat/metrics/metrics.py +64 -1
- pipecat/observers/base_observer.py +58 -19
- pipecat/observers/loggers/debug_log_observer.py +56 -64
- pipecat/observers/loggers/llm_log_observer.py +8 -1
- pipecat/observers/loggers/transcription_log_observer.py +19 -7
- pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
- pipecat/observers/turn_tracking_observer.py +26 -1
- pipecat/pipeline/base_pipeline.py +5 -7
- pipecat/pipeline/base_task.py +52 -9
- pipecat/pipeline/parallel_pipeline.py +121 -177
- pipecat/pipeline/pipeline.py +129 -20
- pipecat/pipeline/runner.py +50 -1
- pipecat/pipeline/sync_parallel_pipeline.py +132 -32
- pipecat/pipeline/task.py +263 -280
- pipecat/pipeline/task_observer.py +85 -34
- pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
- pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
- pipecat/processors/aggregators/gated.py +25 -24
- pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
- pipecat/processors/aggregators/llm_response.py +398 -89
- pipecat/processors/aggregators/openai_llm_context.py +161 -13
- pipecat/processors/aggregators/sentence.py +25 -14
- pipecat/processors/aggregators/user_response.py +28 -3
- pipecat/processors/aggregators/vision_image_frame.py +24 -14
- pipecat/processors/async_generator.py +28 -0
- pipecat/processors/audio/audio_buffer_processor.py +78 -37
- pipecat/processors/consumer_processor.py +25 -6
- pipecat/processors/filters/frame_filter.py +23 -0
- pipecat/processors/filters/function_filter.py +30 -0
- pipecat/processors/filters/identity_filter.py +17 -2
- pipecat/processors/filters/null_filter.py +24 -1
- pipecat/processors/filters/stt_mute_filter.py +56 -21
- pipecat/processors/filters/wake_check_filter.py +46 -3
- pipecat/processors/filters/wake_notifier_filter.py +21 -3
- pipecat/processors/frame_processor.py +488 -131
- pipecat/processors/frameworks/langchain.py +38 -3
- pipecat/processors/frameworks/rtvi.py +719 -34
- pipecat/processors/gstreamer/pipeline_source.py +41 -0
- pipecat/processors/idle_frame_processor.py +26 -3
- pipecat/processors/logger.py +23 -0
- pipecat/processors/metrics/frame_processor_metrics.py +77 -4
- pipecat/processors/metrics/sentry.py +42 -4
- pipecat/processors/producer_processor.py +34 -14
- pipecat/processors/text_transformer.py +22 -10
- pipecat/processors/transcript_processor.py +48 -29
- pipecat/processors/user_idle_processor.py +31 -21
- pipecat/runner/__init__.py +1 -0
- pipecat/runner/daily.py +132 -0
- pipecat/runner/livekit.py +148 -0
- pipecat/runner/run.py +543 -0
- pipecat/runner/types.py +67 -0
- pipecat/runner/utils.py +515 -0
- pipecat/serializers/base_serializer.py +42 -0
- pipecat/serializers/exotel.py +17 -6
- pipecat/serializers/genesys.py +95 -0
- pipecat/serializers/livekit.py +33 -0
- pipecat/serializers/plivo.py +16 -15
- pipecat/serializers/protobuf.py +37 -1
- pipecat/serializers/telnyx.py +18 -17
- pipecat/serializers/twilio.py +32 -16
- pipecat/services/ai_service.py +5 -3
- pipecat/services/anthropic/llm.py +113 -43
- pipecat/services/assemblyai/models.py +63 -5
- pipecat/services/assemblyai/stt.py +64 -11
- pipecat/services/asyncai/__init__.py +0 -0
- pipecat/services/asyncai/tts.py +501 -0
- pipecat/services/aws/llm.py +185 -111
- pipecat/services/aws/stt.py +217 -23
- pipecat/services/aws/tts.py +118 -52
- pipecat/services/aws/utils.py +101 -5
- pipecat/services/aws_nova_sonic/aws.py +82 -64
- pipecat/services/aws_nova_sonic/context.py +15 -6
- pipecat/services/azure/common.py +10 -2
- pipecat/services/azure/image.py +32 -0
- pipecat/services/azure/llm.py +9 -7
- pipecat/services/azure/stt.py +65 -2
- pipecat/services/azure/tts.py +154 -23
- pipecat/services/cartesia/stt.py +125 -8
- pipecat/services/cartesia/tts.py +102 -38
- pipecat/services/cerebras/llm.py +15 -23
- pipecat/services/deepgram/stt.py +19 -11
- pipecat/services/deepgram/tts.py +36 -0
- pipecat/services/deepseek/llm.py +14 -23
- pipecat/services/elevenlabs/tts.py +330 -64
- pipecat/services/fal/image.py +43 -0
- pipecat/services/fal/stt.py +48 -10
- pipecat/services/fireworks/llm.py +14 -21
- pipecat/services/fish/tts.py +109 -9
- pipecat/services/gemini_multimodal_live/__init__.py +1 -0
- pipecat/services/gemini_multimodal_live/events.py +83 -2
- pipecat/services/gemini_multimodal_live/file_api.py +189 -0
- pipecat/services/gemini_multimodal_live/gemini.py +218 -21
- pipecat/services/gladia/config.py +17 -10
- pipecat/services/gladia/stt.py +82 -36
- pipecat/services/google/frames.py +40 -0
- pipecat/services/google/google.py +2 -0
- pipecat/services/google/image.py +39 -2
- pipecat/services/google/llm.py +176 -58
- pipecat/services/google/llm_openai.py +26 -4
- pipecat/services/google/llm_vertex.py +37 -15
- pipecat/services/google/rtvi.py +41 -0
- pipecat/services/google/stt.py +65 -17
- pipecat/services/google/test-google-chirp.py +45 -0
- pipecat/services/google/tts.py +390 -19
- pipecat/services/grok/llm.py +8 -6
- pipecat/services/groq/llm.py +8 -6
- pipecat/services/groq/stt.py +13 -9
- pipecat/services/groq/tts.py +40 -0
- pipecat/services/hamsa/__init__.py +9 -0
- pipecat/services/hamsa/stt.py +241 -0
- pipecat/services/heygen/__init__.py +5 -0
- pipecat/services/heygen/api.py +281 -0
- pipecat/services/heygen/client.py +620 -0
- pipecat/services/heygen/video.py +338 -0
- pipecat/services/image_service.py +5 -3
- pipecat/services/inworld/__init__.py +1 -0
- pipecat/services/inworld/tts.py +592 -0
- pipecat/services/llm_service.py +127 -45
- pipecat/services/lmnt/tts.py +80 -7
- pipecat/services/mcp_service.py +85 -44
- pipecat/services/mem0/memory.py +42 -13
- pipecat/services/minimax/tts.py +74 -15
- pipecat/services/mistral/__init__.py +0 -0
- pipecat/services/mistral/llm.py +185 -0
- pipecat/services/moondream/vision.py +55 -10
- pipecat/services/neuphonic/tts.py +275 -48
- pipecat/services/nim/llm.py +8 -6
- pipecat/services/ollama/llm.py +27 -7
- pipecat/services/openai/base_llm.py +54 -16
- pipecat/services/openai/image.py +30 -0
- pipecat/services/openai/llm.py +7 -5
- pipecat/services/openai/stt.py +13 -9
- pipecat/services/openai/tts.py +42 -10
- pipecat/services/openai_realtime_beta/azure.py +11 -9
- pipecat/services/openai_realtime_beta/context.py +7 -5
- pipecat/services/openai_realtime_beta/events.py +10 -7
- pipecat/services/openai_realtime_beta/openai.py +37 -18
- pipecat/services/openpipe/llm.py +30 -24
- pipecat/services/openrouter/llm.py +9 -7
- pipecat/services/perplexity/llm.py +15 -19
- pipecat/services/piper/tts.py +26 -12
- pipecat/services/playht/tts.py +227 -65
- pipecat/services/qwen/llm.py +8 -6
- pipecat/services/rime/tts.py +128 -17
- pipecat/services/riva/stt.py +160 -22
- pipecat/services/riva/tts.py +67 -2
- pipecat/services/sambanova/llm.py +19 -17
- pipecat/services/sambanova/stt.py +14 -8
- pipecat/services/sarvam/tts.py +60 -13
- pipecat/services/simli/video.py +82 -21
- pipecat/services/soniox/__init__.py +0 -0
- pipecat/services/soniox/stt.py +398 -0
- pipecat/services/speechmatics/stt.py +29 -17
- pipecat/services/stt_service.py +47 -11
- pipecat/services/tavus/video.py +94 -25
- pipecat/services/together/llm.py +8 -6
- pipecat/services/tts_service.py +77 -53
- pipecat/services/ultravox/stt.py +46 -43
- pipecat/services/vision_service.py +5 -3
- pipecat/services/websocket_service.py +12 -11
- pipecat/services/whisper/base_stt.py +58 -12
- pipecat/services/whisper/stt.py +69 -58
- pipecat/services/xtts/tts.py +59 -2
- pipecat/sync/base_notifier.py +19 -0
- pipecat/sync/event_notifier.py +24 -0
- pipecat/tests/utils.py +73 -5
- pipecat/transcriptions/language.py +24 -0
- pipecat/transports/base_input.py +112 -8
- pipecat/transports/base_output.py +235 -13
- pipecat/transports/base_transport.py +119 -0
- pipecat/transports/local/audio.py +76 -0
- pipecat/transports/local/tk.py +84 -0
- pipecat/transports/network/fastapi_websocket.py +174 -15
- pipecat/transports/network/small_webrtc.py +383 -39
- pipecat/transports/network/webrtc_connection.py +214 -8
- pipecat/transports/network/websocket_client.py +171 -1
- pipecat/transports/network/websocket_server.py +147 -9
- pipecat/transports/services/daily.py +792 -70
- pipecat/transports/services/helpers/daily_rest.py +122 -129
- pipecat/transports/services/livekit.py +339 -4
- pipecat/transports/services/tavus.py +273 -38
- pipecat/utils/asyncio/task_manager.py +92 -186
- pipecat/utils/base_object.py +83 -1
- pipecat/utils/network.py +2 -0
- pipecat/utils/string.py +114 -58
- pipecat/utils/text/base_text_aggregator.py +44 -13
- pipecat/utils/text/base_text_filter.py +46 -0
- pipecat/utils/text/markdown_text_filter.py +70 -14
- pipecat/utils/text/pattern_pair_aggregator.py +18 -14
- pipecat/utils/text/simple_text_aggregator.py +43 -2
- pipecat/utils/text/skip_tags_aggregator.py +21 -13
- pipecat/utils/time.py +36 -0
- pipecat/utils/tracing/class_decorators.py +32 -7
- pipecat/utils/tracing/conversation_context_provider.py +12 -2
- pipecat/utils/tracing/service_attributes.py +80 -64
- pipecat/utils/tracing/service_decorators.py +48 -21
- pipecat/utils/tracing/setup.py +13 -7
- pipecat/utils/tracing/turn_context_provider.py +12 -2
- pipecat/utils/tracing/turn_trace_observer.py +27 -0
- pipecat/utils/utils.py +14 -14
- dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
- pipecat/examples/daily_runner.py +0 -64
- pipecat/examples/run.py +0 -265
- pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
- pipecat/utils/asyncio/watchdog_event.py +0 -42
- pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
- pipecat/utils/asyncio/watchdog_queue.py +0 -48
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
- /pipecat/{examples → extensions}/__init__.py +0 -0
pipecat/services/google/llm.py
CHANGED
|
@@ -53,7 +53,6 @@ from pipecat.services.openai.llm import (
|
|
|
53
53
|
OpenAIAssistantContextAggregator,
|
|
54
54
|
OpenAIUserContextAggregator,
|
|
55
55
|
)
|
|
56
|
-
from pipecat.utils.asyncio.watchdog_async_iterator import WatchdogAsyncIterator
|
|
57
56
|
from pipecat.utils.tracing.service_decorators import traced_llm
|
|
58
57
|
|
|
59
58
|
# Suppress gRPC fork warnings
|
|
@@ -68,6 +67,7 @@ try:
|
|
|
68
67
|
FunctionCall,
|
|
69
68
|
FunctionResponse,
|
|
70
69
|
GenerateContentConfig,
|
|
70
|
+
HttpOptions,
|
|
71
71
|
Part,
|
|
72
72
|
)
|
|
73
73
|
except ModuleNotFoundError as e:
|
|
@@ -83,21 +83,13 @@ class GoogleUserContextAggregator(OpenAIUserContextAggregator):
|
|
|
83
83
|
Content and Part message format for user messages.
|
|
84
84
|
"""
|
|
85
85
|
|
|
86
|
-
async def
|
|
87
|
-
"""
|
|
88
|
-
if len(self._aggregation) > 0:
|
|
89
|
-
self._context.add_message(Content(role="user", parts=[Part(text=self._aggregation)]))
|
|
90
|
-
|
|
91
|
-
# Reset the aggregation. Reset it before pushing it down, otherwise
|
|
92
|
-
# if the tasks gets cancelled we won't be able to clear things up.
|
|
93
|
-
self._aggregation = ""
|
|
94
|
-
|
|
95
|
-
# Push context frame
|
|
96
|
-
frame = OpenAILLMContextFrame(self._context)
|
|
97
|
-
await self.push_frame(frame)
|
|
86
|
+
async def handle_aggregation(self, aggregation: str):
|
|
87
|
+
"""Add the aggregated user text to the context as a Google Content message.
|
|
98
88
|
|
|
99
|
-
|
|
100
|
-
|
|
89
|
+
Args:
|
|
90
|
+
aggregation: The aggregated user text to add as a user message.
|
|
91
|
+
"""
|
|
92
|
+
self._context.add_message(Content(role="user", parts=[Part(text=aggregation)]))
|
|
101
93
|
|
|
102
94
|
|
|
103
95
|
class GoogleAssistantContextAggregator(OpenAIAssistantContextAggregator):
|
|
@@ -233,11 +225,6 @@ class GoogleLLMContext(OpenAILLMContext):
|
|
|
233
225
|
|
|
234
226
|
This class handles conversion between OpenAI-style messages and Google AI's
|
|
235
227
|
Content/Part format, including system messages, function calls, and media.
|
|
236
|
-
|
|
237
|
-
Args:
|
|
238
|
-
messages: Initial messages in OpenAI format.
|
|
239
|
-
tools: Available tools/functions for the model.
|
|
240
|
-
tool_choice: Tool choice configuration.
|
|
241
228
|
"""
|
|
242
229
|
|
|
243
230
|
def __init__(
|
|
@@ -246,6 +233,13 @@ class GoogleLLMContext(OpenAILLMContext):
|
|
|
246
233
|
tools: Optional[List[dict]] = None,
|
|
247
234
|
tool_choice: Optional[dict] = None,
|
|
248
235
|
):
|
|
236
|
+
"""Initialize GoogleLLMContext.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
messages: Initial messages in OpenAI format.
|
|
240
|
+
tools: Available tools/functions for the model.
|
|
241
|
+
tool_choice: Tool choice configuration.
|
|
242
|
+
"""
|
|
249
243
|
super().__init__(messages=messages, tools=tools, tool_choice=tool_choice)
|
|
250
244
|
self.system_message = None
|
|
251
245
|
|
|
@@ -378,18 +372,48 @@ class GoogleLLMContext(OpenAILLMContext):
|
|
|
378
372
|
System messages are stored separately and return None.
|
|
379
373
|
|
|
380
374
|
Args:
|
|
381
|
-
message: Message in standard format
|
|
375
|
+
message: Message in standard format.
|
|
376
|
+
|
|
377
|
+
Returns:
|
|
378
|
+
Content object with role and parts, or None for system messages.
|
|
379
|
+
|
|
380
|
+
Examples:
|
|
381
|
+
Standard text message::
|
|
382
|
+
|
|
382
383
|
{
|
|
383
|
-
"role": "user
|
|
384
|
-
"content":
|
|
385
|
-
"tool_calls": [{"function": {"name": str, "arguments": str}}]
|
|
384
|
+
"role": "user",
|
|
385
|
+
"content": "Hello there"
|
|
386
386
|
}
|
|
387
387
|
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
388
|
+
Converts to Google Content with::
|
|
389
|
+
|
|
390
|
+
Content(
|
|
391
|
+
role="user",
|
|
392
|
+
parts=[Part(text="Hello there")]
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
Standard function call message::
|
|
396
|
+
|
|
397
|
+
{
|
|
398
|
+
"role": "assistant",
|
|
399
|
+
"tool_calls": [
|
|
400
|
+
{
|
|
401
|
+
"function": {
|
|
402
|
+
"name": "search",
|
|
403
|
+
"arguments": '{"query": "test"}'
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
]
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
Converts to Google Content with::
|
|
410
|
+
|
|
411
|
+
Content(
|
|
412
|
+
role="model",
|
|
413
|
+
parts=[Part(function_call=FunctionCall(name="search", args={"query": "test"}))]
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
System message returns None and stores content in self.system_message.
|
|
393
417
|
"""
|
|
394
418
|
role = message["role"]
|
|
395
419
|
content = message.get("content", [])
|
|
@@ -445,21 +469,73 @@ class GoogleLLMContext(OpenAILLMContext):
|
|
|
445
469
|
Handles text, images, and function calls from Google's Content/Part objects.
|
|
446
470
|
|
|
447
471
|
Args:
|
|
448
|
-
obj: Google Content object with
|
|
449
|
-
- role: "model" (converted to "assistant") or "user"
|
|
450
|
-
- parts: List[Part] containing text, inline_data, or function calls
|
|
472
|
+
obj: Google Content object with role and parts.
|
|
451
473
|
|
|
452
474
|
Returns:
|
|
453
|
-
List
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
]
|
|
461
|
-
|
|
462
|
-
|
|
475
|
+
List containing a single message in standard format.
|
|
476
|
+
|
|
477
|
+
Examples:
|
|
478
|
+
Google Content with text::
|
|
479
|
+
|
|
480
|
+
Content(
|
|
481
|
+
role="user",
|
|
482
|
+
parts=[Part(text="Hello")]
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
Converts to::
|
|
486
|
+
|
|
487
|
+
[
|
|
488
|
+
{
|
|
489
|
+
"role": "user",
|
|
490
|
+
"content": [{"type": "text", "text": "Hello"}]
|
|
491
|
+
}
|
|
492
|
+
]
|
|
493
|
+
|
|
494
|
+
Google Content with function call::
|
|
495
|
+
|
|
496
|
+
Content(
|
|
497
|
+
role="model",
|
|
498
|
+
parts=[Part(function_call=FunctionCall(name="search", args={"q": "test"}))]
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
Converts to::
|
|
502
|
+
|
|
503
|
+
[
|
|
504
|
+
{
|
|
505
|
+
"role": "assistant",
|
|
506
|
+
"tool_calls": [
|
|
507
|
+
{
|
|
508
|
+
"id": "search",
|
|
509
|
+
"type": "function",
|
|
510
|
+
"function": {
|
|
511
|
+
"name": "search",
|
|
512
|
+
"arguments": '{"q": "test"}'
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
]
|
|
516
|
+
}
|
|
517
|
+
]
|
|
518
|
+
|
|
519
|
+
Google Content with image::
|
|
520
|
+
|
|
521
|
+
Content(
|
|
522
|
+
role="user",
|
|
523
|
+
parts=[Part(inline_data=Blob(mime_type="image/jpeg", data=bytes_data))]
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
Converts to::
|
|
527
|
+
|
|
528
|
+
[
|
|
529
|
+
{
|
|
530
|
+
"role": "user",
|
|
531
|
+
"content": [
|
|
532
|
+
{
|
|
533
|
+
"type": "image_url",
|
|
534
|
+
"image_url": {"url": "data:image/jpeg;base64,<encoded_data>"}
|
|
535
|
+
}
|
|
536
|
+
]
|
|
537
|
+
}
|
|
538
|
+
]
|
|
463
539
|
"""
|
|
464
540
|
msg = {"role": obj.role, "content": []}
|
|
465
541
|
if msg["role"] == "model":
|
|
@@ -542,9 +618,9 @@ class GoogleLLMContext(OpenAILLMContext):
|
|
|
542
618
|
# Check if we only have function-related messages (no regular text)
|
|
543
619
|
has_regular_messages = any(
|
|
544
620
|
len(msg.parts) == 1
|
|
545
|
-
and
|
|
546
|
-
and getattr(msg.parts[0], "function_call", None)
|
|
547
|
-
and getattr(msg.parts[0], "function_response", None)
|
|
621
|
+
and getattr(msg.parts[0], "text", None)
|
|
622
|
+
and not getattr(msg.parts[0], "function_call", None)
|
|
623
|
+
and not getattr(msg.parts[0], "function_response", None)
|
|
548
624
|
for msg in self._messages
|
|
549
625
|
)
|
|
550
626
|
|
|
@@ -563,15 +639,6 @@ class GoogleLLMService(LLMService):
|
|
|
563
639
|
from OpenAILLMContext to the messages format expected by the Google AI model.
|
|
564
640
|
We use OpenAILLMContext as a lingua franca for all LLM services to enable
|
|
565
641
|
easy switching between different LLMs.
|
|
566
|
-
|
|
567
|
-
Args:
|
|
568
|
-
api_key: Google AI API key for authentication.
|
|
569
|
-
model: Model name to use. Defaults to "gemini-2.0-flash".
|
|
570
|
-
params: Input parameters for the model.
|
|
571
|
-
system_instruction: System instruction/prompt for the model.
|
|
572
|
-
tools: List of available tools/functions.
|
|
573
|
-
tool_config: Configuration for tool usage.
|
|
574
|
-
**kwargs: Additional arguments passed to parent class.
|
|
575
642
|
"""
|
|
576
643
|
|
|
577
644
|
# Overriding the default adapter to use the Gemini one.
|
|
@@ -603,8 +670,21 @@ class GoogleLLMService(LLMService):
|
|
|
603
670
|
system_instruction: Optional[str] = None,
|
|
604
671
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
605
672
|
tool_config: Optional[Dict[str, Any]] = None,
|
|
673
|
+
http_options: Optional[HttpOptions] = None,
|
|
606
674
|
**kwargs,
|
|
607
675
|
):
|
|
676
|
+
"""Initialize the Google LLM service.
|
|
677
|
+
|
|
678
|
+
Args:
|
|
679
|
+
api_key: Google AI API key for authentication.
|
|
680
|
+
model: Model name to use. Defaults to "gemini-2.0-flash".
|
|
681
|
+
params: Input parameters for the model.
|
|
682
|
+
system_instruction: System instruction/prompt for the model.
|
|
683
|
+
tools: List of available tools/functions.
|
|
684
|
+
tool_config: Configuration for tool usage.
|
|
685
|
+
http_options: HTTP options for the client.
|
|
686
|
+
**kwargs: Additional arguments passed to parent class.
|
|
687
|
+
"""
|
|
608
688
|
super().__init__(**kwargs)
|
|
609
689
|
|
|
610
690
|
params = params or GoogleLLMService.InputParams()
|
|
@@ -612,7 +692,8 @@ class GoogleLLMService(LLMService):
|
|
|
612
692
|
self.set_model_name(model)
|
|
613
693
|
self._api_key = api_key
|
|
614
694
|
self._system_instruction = system_instruction
|
|
615
|
-
self.
|
|
695
|
+
self._http_options = http_options
|
|
696
|
+
self._create_client(api_key, http_options)
|
|
616
697
|
self._settings = {
|
|
617
698
|
"max_tokens": params.max_tokens,
|
|
618
699
|
"temperature": params.temperature,
|
|
@@ -631,8 +712,33 @@ class GoogleLLMService(LLMService):
|
|
|
631
712
|
"""
|
|
632
713
|
return True
|
|
633
714
|
|
|
634
|
-
def _create_client(self, api_key: str):
|
|
635
|
-
self._client = genai.Client(api_key=api_key)
|
|
715
|
+
def _create_client(self, api_key: str, http_options: Optional[HttpOptions] = None):
|
|
716
|
+
self._client = genai.Client(api_key=api_key, http_options=http_options)
|
|
717
|
+
|
|
718
|
+
def needs_mcp_alternate_schema(self) -> bool:
|
|
719
|
+
"""Check if this LLM service requires alternate MCP schema.
|
|
720
|
+
|
|
721
|
+
Google/Gemini has stricter JSON schema validation and requires
|
|
722
|
+
certain properties to be removed or modified for compatibility.
|
|
723
|
+
|
|
724
|
+
Returns:
|
|
725
|
+
True for Google/Gemini services.
|
|
726
|
+
"""
|
|
727
|
+
return True
|
|
728
|
+
|
|
729
|
+
def _maybe_unset_thinking_budget(self, generation_params: Dict[str, Any]):
|
|
730
|
+
try:
|
|
731
|
+
# There's no way to introspect on model capabilities, so
|
|
732
|
+
# to check for models that we know default to thinkin on
|
|
733
|
+
# and can be configured to turn it off.
|
|
734
|
+
if not self._model_name.startswith("gemini-2.5-flash"):
|
|
735
|
+
return
|
|
736
|
+
# If thinking_config is already set, don't override it.
|
|
737
|
+
if "thinking_config" in generation_params:
|
|
738
|
+
return
|
|
739
|
+
generation_params.setdefault("thinking_config", {})["thinking_budget"] = 0
|
|
740
|
+
except Exception as e:
|
|
741
|
+
logger.exception(f"Failed to unset thinking budget: {e}")
|
|
636
742
|
|
|
637
743
|
@traced_llm
|
|
638
744
|
async def _process_context(self, context: OpenAILLMContext):
|
|
@@ -641,6 +747,8 @@ class GoogleLLMService(LLMService):
|
|
|
641
747
|
prompt_tokens = 0
|
|
642
748
|
completion_tokens = 0
|
|
643
749
|
total_tokens = 0
|
|
750
|
+
cache_read_input_tokens = 0
|
|
751
|
+
reasoning_tokens = 0
|
|
644
752
|
|
|
645
753
|
grounding_metadata = None
|
|
646
754
|
search_result = ""
|
|
@@ -680,6 +788,12 @@ class GoogleLLMService(LLMService):
|
|
|
680
788
|
if v is not None
|
|
681
789
|
}
|
|
682
790
|
|
|
791
|
+
if self._settings["extra"]:
|
|
792
|
+
generation_params.update(self._settings["extra"])
|
|
793
|
+
|
|
794
|
+
# possibly modify generation_params (in place) to set thinking to off by default
|
|
795
|
+
self._maybe_unset_thinking_budget(generation_params)
|
|
796
|
+
|
|
683
797
|
generation_config = (
|
|
684
798
|
GenerateContentConfig(**generation_params) if generation_params else None
|
|
685
799
|
)
|
|
@@ -692,13 +806,15 @@ class GoogleLLMService(LLMService):
|
|
|
692
806
|
)
|
|
693
807
|
|
|
694
808
|
function_calls = []
|
|
695
|
-
async for chunk in
|
|
809
|
+
async for chunk in response:
|
|
696
810
|
# Stop TTFB metrics after the first chunk
|
|
697
811
|
await self.stop_ttfb_metrics()
|
|
698
812
|
if chunk.usage_metadata:
|
|
699
813
|
prompt_tokens += chunk.usage_metadata.prompt_token_count or 0
|
|
700
814
|
completion_tokens += chunk.usage_metadata.candidates_token_count or 0
|
|
701
815
|
total_tokens += chunk.usage_metadata.total_token_count or 0
|
|
816
|
+
cache_read_input_tokens += chunk.usage_metadata.cached_content_token_count or 0
|
|
817
|
+
reasoning_tokens += chunk.usage_metadata.thoughts_token_count or 0
|
|
702
818
|
|
|
703
819
|
if not chunk.candidates:
|
|
704
820
|
continue
|
|
@@ -780,6 +896,8 @@ class GoogleLLMService(LLMService):
|
|
|
780
896
|
prompt_tokens=prompt_tokens,
|
|
781
897
|
completion_tokens=completion_tokens,
|
|
782
898
|
total_tokens=total_tokens,
|
|
899
|
+
cache_read_input_tokens=cache_read_input_tokens,
|
|
900
|
+
reasoning_tokens=reasoning_tokens,
|
|
783
901
|
)
|
|
784
902
|
)
|
|
785
903
|
await self.push_frame(LLMFullResponseEndFrame())
|
|
@@ -4,6 +4,12 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""Google LLM service using OpenAI-compatible API format.
|
|
8
|
+
|
|
9
|
+
This module provides integration with Google's AI LLM models using the OpenAI
|
|
10
|
+
API format through Google's Gemini API OpenAI compatibility layer.
|
|
11
|
+
"""
|
|
12
|
+
|
|
7
13
|
import json
|
|
8
14
|
import os
|
|
9
15
|
|
|
@@ -11,7 +17,6 @@ from openai import AsyncStream
|
|
|
11
17
|
from openai.types.chat import ChatCompletionChunk
|
|
12
18
|
|
|
13
19
|
from pipecat.services.llm_service import FunctionCallFromLLM
|
|
14
|
-
from pipecat.utils.asyncio.watchdog_async_iterator import WatchdogAsyncIterator
|
|
15
20
|
|
|
16
21
|
# Suppress gRPC fork warnings
|
|
17
22
|
os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "false"
|
|
@@ -27,8 +32,17 @@ from pipecat.services.openai.llm import OpenAILLMService
|
|
|
27
32
|
|
|
28
33
|
|
|
29
34
|
class GoogleLLMOpenAIBetaService(OpenAILLMService):
|
|
30
|
-
"""
|
|
31
|
-
|
|
35
|
+
"""Google LLM service using OpenAI-compatible API format.
|
|
36
|
+
|
|
37
|
+
This service provides access to Google's AI LLM models (like Gemini) through
|
|
38
|
+
the OpenAI API format. It handles streaming responses, function calls, and
|
|
39
|
+
tool usage while maintaining compatibility with OpenAI's interface.
|
|
40
|
+
|
|
41
|
+
Note: This service includes a workaround for a Google API bug where function
|
|
42
|
+
call indices may be incorrectly set to None, resulting in empty function names.
|
|
43
|
+
|
|
44
|
+
Reference:
|
|
45
|
+
https://ai.google.dev/gemini-api/docs/openai
|
|
32
46
|
"""
|
|
33
47
|
|
|
34
48
|
def __init__(
|
|
@@ -39,6 +53,14 @@ class GoogleLLMOpenAIBetaService(OpenAILLMService):
|
|
|
39
53
|
model: str = "gemini-2.0-flash",
|
|
40
54
|
**kwargs,
|
|
41
55
|
):
|
|
56
|
+
"""Initialize the Google LLM service.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
api_key: Google API key for authentication.
|
|
60
|
+
base_url: Base URL for Google's OpenAI-compatible API.
|
|
61
|
+
model: Google model name to use (e.g., "gemini-2.0-flash").
|
|
62
|
+
**kwargs: Additional arguments passed to the parent OpenAILLMService.
|
|
63
|
+
"""
|
|
42
64
|
super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
|
|
43
65
|
|
|
44
66
|
async def _process_context(self, context: OpenAILLMContext):
|
|
@@ -56,7 +78,7 @@ class GoogleLLMOpenAIBetaService(OpenAILLMService):
|
|
|
56
78
|
context
|
|
57
79
|
)
|
|
58
80
|
|
|
59
|
-
async for chunk in
|
|
81
|
+
async for chunk in chunk_stream:
|
|
60
82
|
if chunk.usage:
|
|
61
83
|
tokens = LLMTokenUsage(
|
|
62
84
|
prompt_tokens=chunk.usage.prompt_tokens,
|
|
@@ -4,6 +4,12 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""Google Vertex AI LLM service implementation.
|
|
8
|
+
|
|
9
|
+
This module provides integration with Google's AI models via Vertex AI while
|
|
10
|
+
maintaining OpenAI API compatibility through Google's OpenAI-compatible endpoint.
|
|
11
|
+
"""
|
|
12
|
+
|
|
7
13
|
import json
|
|
8
14
|
import os
|
|
9
15
|
|
|
@@ -31,16 +37,24 @@ except ModuleNotFoundError as e:
|
|
|
31
37
|
|
|
32
38
|
|
|
33
39
|
class GoogleVertexLLMService(OpenAILLMService):
|
|
34
|
-
"""
|
|
35
|
-
maintaining OpenAI API compatibility.
|
|
40
|
+
"""Google Vertex AI LLM service with OpenAI API compatibility.
|
|
36
41
|
|
|
37
|
-
|
|
38
|
-
|
|
42
|
+
Provides access to Google's AI models via Vertex AI while maintaining
|
|
43
|
+
OpenAI API compatibility. Handles authentication using Google service
|
|
44
|
+
account credentials and constructs appropriate endpoint URLs for
|
|
45
|
+
different GCP regions and projects.
|
|
39
46
|
|
|
47
|
+
Reference:
|
|
48
|
+
https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/call-vertex-using-openai-library
|
|
40
49
|
"""
|
|
41
50
|
|
|
42
51
|
class InputParams(OpenAILLMService.InputParams):
|
|
43
|
-
"""Input parameters specific to Vertex AI.
|
|
52
|
+
"""Input parameters specific to Vertex AI.
|
|
53
|
+
|
|
54
|
+
Parameters:
|
|
55
|
+
location: GCP region for Vertex AI endpoint (e.g., "us-east4").
|
|
56
|
+
project_id: Google Cloud project ID.
|
|
57
|
+
"""
|
|
44
58
|
|
|
45
59
|
# https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations
|
|
46
60
|
location: str = "us-east4"
|
|
@@ -58,11 +72,11 @@ class GoogleVertexLLMService(OpenAILLMService):
|
|
|
58
72
|
"""Initializes the VertexLLMService.
|
|
59
73
|
|
|
60
74
|
Args:
|
|
61
|
-
credentials
|
|
62
|
-
credentials_path
|
|
63
|
-
model
|
|
64
|
-
params
|
|
65
|
-
**kwargs: Additional arguments
|
|
75
|
+
credentials: JSON string of service account credentials.
|
|
76
|
+
credentials_path: Path to the service account JSON file.
|
|
77
|
+
model: Model identifier (e.g., "google/gemini-2.0-flash-001").
|
|
78
|
+
params: Vertex AI input parameters including location and project.
|
|
79
|
+
**kwargs: Additional arguments passed to OpenAILLMService.
|
|
66
80
|
"""
|
|
67
81
|
params = params or OpenAILLMService.InputParams()
|
|
68
82
|
base_url = self._get_base_url(params)
|
|
@@ -74,7 +88,7 @@ class GoogleVertexLLMService(OpenAILLMService):
|
|
|
74
88
|
|
|
75
89
|
@staticmethod
|
|
76
90
|
def _get_base_url(params: InputParams) -> str:
|
|
77
|
-
"""Constructs the base URL for
|
|
91
|
+
"""Constructs the base URL for Vertex AI API."""
|
|
78
92
|
hostname_prefix = "" if params.location == "global" else f"{params.location}-"
|
|
79
93
|
return (
|
|
80
94
|
f"https://{hostname_prefix}aiplatform.googleapis.com/v1/"
|
|
@@ -83,14 +97,22 @@ class GoogleVertexLLMService(OpenAILLMService):
|
|
|
83
97
|
|
|
84
98
|
@staticmethod
|
|
85
99
|
def _get_api_token(credentials: Optional[str], credentials_path: Optional[str]) -> str:
|
|
86
|
-
"""
|
|
100
|
+
"""Retrieve an authentication token using Google service account credentials.
|
|
101
|
+
|
|
102
|
+
Supports multiple authentication methods:
|
|
103
|
+
1. Direct JSON credentials string
|
|
104
|
+
2. Path to service account JSON file
|
|
105
|
+
3. Default application credentials (ADC)
|
|
87
106
|
|
|
88
107
|
Args:
|
|
89
|
-
credentials
|
|
90
|
-
credentials_path
|
|
108
|
+
credentials: JSON string of service account credentials.
|
|
109
|
+
credentials_path: Path to the service account JSON file.
|
|
91
110
|
|
|
92
111
|
Returns:
|
|
93
|
-
|
|
112
|
+
OAuth token for API authentication.
|
|
113
|
+
|
|
114
|
+
Raises:
|
|
115
|
+
ValueError: If no valid credentials are provided or found.
|
|
94
116
|
"""
|
|
95
117
|
creds: Optional[service_account.Credentials] = None
|
|
96
118
|
|
pipecat/services/google/rtvi.py
CHANGED
|
@@ -4,6 +4,13 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""Google RTVI integration models and observer implementation.
|
|
8
|
+
|
|
9
|
+
This module provides integration with Google's services through the RTVI framework,
|
|
10
|
+
including models for search responses and an observer for handling Google-specific
|
|
11
|
+
frame types.
|
|
12
|
+
"""
|
|
13
|
+
|
|
7
14
|
from typing import List, Literal, Optional
|
|
8
15
|
|
|
9
16
|
from pydantic import BaseModel
|
|
@@ -16,22 +23,56 @@ from pipecat.services.google.frames import LLMSearchOrigin, LLMSearchResponseFra
|
|
|
16
23
|
|
|
17
24
|
|
|
18
25
|
class RTVISearchResponseMessageData(BaseModel):
|
|
26
|
+
"""Data payload for search response messages in RTVI protocol.
|
|
27
|
+
|
|
28
|
+
Parameters:
|
|
29
|
+
search_result: The search result text, if available.
|
|
30
|
+
rendered_content: The rendered content from the search, if available.
|
|
31
|
+
origins: List of search result origins with metadata.
|
|
32
|
+
"""
|
|
33
|
+
|
|
19
34
|
search_result: Optional[str]
|
|
20
35
|
rendered_content: Optional[str]
|
|
21
36
|
origins: List[LLMSearchOrigin]
|
|
22
37
|
|
|
23
38
|
|
|
24
39
|
class RTVIBotLLMSearchResponseMessage(BaseModel):
|
|
40
|
+
"""RTVI message for bot LLM search responses.
|
|
41
|
+
|
|
42
|
+
Parameters:
|
|
43
|
+
label: Always "rtvi-ai" for RTVI protocol messages.
|
|
44
|
+
type: Always "bot-llm-search-response" for this message type.
|
|
45
|
+
data: The search response data payload.
|
|
46
|
+
"""
|
|
47
|
+
|
|
25
48
|
label: Literal["rtvi-ai"] = "rtvi-ai"
|
|
26
49
|
type: Literal["bot-llm-search-response"] = "bot-llm-search-response"
|
|
27
50
|
data: RTVISearchResponseMessageData
|
|
28
51
|
|
|
29
52
|
|
|
30
53
|
class GoogleRTVIObserver(RTVIObserver):
|
|
54
|
+
"""RTVI observer for Google service integration.
|
|
55
|
+
|
|
56
|
+
Extends the base RTVIObserver to handle Google-specific frame types,
|
|
57
|
+
particularly LLM search response frames from Google services.
|
|
58
|
+
"""
|
|
59
|
+
|
|
31
60
|
def __init__(self, rtvi: RTVIProcessor):
|
|
61
|
+
"""Initialize the Google RTVI observer.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
rtvi: The RTVI processor to send messages through.
|
|
65
|
+
"""
|
|
32
66
|
super().__init__(rtvi)
|
|
33
67
|
|
|
34
68
|
async def on_push_frame(self, data: FramePushed):
|
|
69
|
+
"""Process frames being pushed through the pipeline.
|
|
70
|
+
|
|
71
|
+
Handles Google-specific frames in addition to the base RTVI frame types.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
data: Frame push event data containing frame and metadata.
|
|
75
|
+
"""
|
|
35
76
|
await super().on_push_frame(data)
|
|
36
77
|
|
|
37
78
|
frame = data.frame
|