dv-pipecat-ai 0.0.82.dev815__py3-none-any.whl → 0.0.82.dev857__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/METADATA +8 -3
- {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/RECORD +106 -79
- pipecat/adapters/base_llm_adapter.py +44 -6
- pipecat/adapters/services/anthropic_adapter.py +302 -2
- pipecat/adapters/services/aws_nova_sonic_adapter.py +40 -2
- pipecat/adapters/services/bedrock_adapter.py +40 -2
- pipecat/adapters/services/gemini_adapter.py +276 -6
- pipecat/adapters/services/open_ai_adapter.py +88 -7
- pipecat/adapters/services/open_ai_realtime_adapter.py +39 -1
- pipecat/audio/dtmf/__init__.py +0 -0
- pipecat/audio/dtmf/types.py +47 -0
- pipecat/audio/dtmf/utils.py +70 -0
- pipecat/audio/filters/aic_filter.py +199 -0
- pipecat/audio/utils.py +9 -7
- pipecat/extensions/ivr/__init__.py +0 -0
- pipecat/extensions/ivr/ivr_navigator.py +452 -0
- pipecat/frames/frames.py +156 -43
- pipecat/pipeline/llm_switcher.py +76 -0
- pipecat/pipeline/parallel_pipeline.py +3 -3
- pipecat/pipeline/service_switcher.py +144 -0
- pipecat/pipeline/task.py +68 -28
- pipecat/pipeline/task_observer.py +10 -0
- pipecat/processors/aggregators/dtmf_aggregator.py +2 -2
- pipecat/processors/aggregators/llm_context.py +277 -0
- pipecat/processors/aggregators/llm_response.py +48 -15
- pipecat/processors/aggregators/llm_response_universal.py +840 -0
- pipecat/processors/aggregators/openai_llm_context.py +3 -3
- pipecat/processors/dtmf_aggregator.py +0 -2
- pipecat/processors/filters/stt_mute_filter.py +0 -2
- pipecat/processors/frame_processor.py +18 -11
- pipecat/processors/frameworks/rtvi.py +17 -10
- pipecat/processors/metrics/sentry.py +2 -0
- pipecat/runner/daily.py +137 -36
- pipecat/runner/run.py +1 -1
- pipecat/runner/utils.py +7 -7
- pipecat/serializers/asterisk.py +20 -4
- pipecat/serializers/exotel.py +1 -1
- pipecat/serializers/plivo.py +1 -1
- pipecat/serializers/telnyx.py +1 -1
- pipecat/serializers/twilio.py +1 -1
- pipecat/services/__init__.py +2 -2
- pipecat/services/anthropic/llm.py +113 -28
- pipecat/services/asyncai/tts.py +4 -0
- pipecat/services/aws/llm.py +82 -8
- pipecat/services/aws/tts.py +0 -10
- pipecat/services/aws_nova_sonic/aws.py +5 -0
- pipecat/services/cartesia/tts.py +28 -16
- pipecat/services/cerebras/llm.py +15 -10
- pipecat/services/deepgram/stt.py +8 -0
- pipecat/services/deepseek/llm.py +13 -8
- pipecat/services/fireworks/llm.py +13 -8
- pipecat/services/fish/tts.py +8 -6
- pipecat/services/gemini_multimodal_live/gemini.py +5 -0
- pipecat/services/gladia/config.py +7 -1
- pipecat/services/gladia/stt.py +23 -15
- pipecat/services/google/llm.py +159 -59
- pipecat/services/google/llm_openai.py +18 -3
- pipecat/services/grok/llm.py +2 -1
- pipecat/services/llm_service.py +38 -3
- pipecat/services/mem0/memory.py +2 -1
- pipecat/services/mistral/llm.py +5 -6
- pipecat/services/nim/llm.py +2 -1
- pipecat/services/openai/base_llm.py +88 -26
- pipecat/services/openai/image.py +6 -1
- pipecat/services/openai_realtime_beta/openai.py +5 -2
- pipecat/services/openpipe/llm.py +6 -8
- pipecat/services/perplexity/llm.py +13 -8
- pipecat/services/playht/tts.py +9 -6
- pipecat/services/rime/tts.py +1 -1
- pipecat/services/sambanova/llm.py +18 -13
- pipecat/services/sarvam/tts.py +415 -10
- pipecat/services/speechmatics/stt.py +2 -2
- pipecat/services/tavus/video.py +1 -1
- pipecat/services/tts_service.py +15 -5
- pipecat/services/vistaar/llm.py +2 -5
- pipecat/transports/base_input.py +32 -19
- pipecat/transports/base_output.py +39 -5
- pipecat/transports/daily/__init__.py +0 -0
- pipecat/transports/daily/transport.py +2371 -0
- pipecat/transports/daily/utils.py +410 -0
- pipecat/transports/livekit/__init__.py +0 -0
- pipecat/transports/livekit/transport.py +1042 -0
- pipecat/transports/network/fastapi_websocket.py +12 -546
- pipecat/transports/network/small_webrtc.py +12 -922
- pipecat/transports/network/webrtc_connection.py +9 -595
- pipecat/transports/network/websocket_client.py +12 -481
- pipecat/transports/network/websocket_server.py +12 -487
- pipecat/transports/services/daily.py +9 -2334
- pipecat/transports/services/helpers/daily_rest.py +12 -396
- pipecat/transports/services/livekit.py +12 -975
- pipecat/transports/services/tavus.py +12 -757
- pipecat/transports/smallwebrtc/__init__.py +0 -0
- pipecat/transports/smallwebrtc/connection.py +612 -0
- pipecat/transports/smallwebrtc/transport.py +936 -0
- pipecat/transports/tavus/__init__.py +0 -0
- pipecat/transports/tavus/transport.py +770 -0
- pipecat/transports/websocket/__init__.py +0 -0
- pipecat/transports/websocket/client.py +494 -0
- pipecat/transports/websocket/fastapi.py +559 -0
- pipecat/transports/websocket/server.py +500 -0
- pipecat/transports/whatsapp/__init__.py +0 -0
- pipecat/transports/whatsapp/api.py +345 -0
- pipecat/transports/whatsapp/client.py +364 -0
- {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/top_level.txt +0 -0
pipecat/services/deepseek/llm.py
CHANGED
|
@@ -9,9 +9,8 @@
|
|
|
9
9
|
from typing import List
|
|
10
10
|
|
|
11
11
|
from loguru import logger
|
|
12
|
-
from openai.types.chat import ChatCompletionMessageParam
|
|
13
12
|
|
|
14
|
-
from pipecat.
|
|
13
|
+
from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams
|
|
15
14
|
from pipecat.services.openai.llm import OpenAILLMService
|
|
16
15
|
|
|
17
16
|
|
|
@@ -54,19 +53,22 @@ class DeepSeekLLMService(OpenAILLMService):
|
|
|
54
53
|
logger.debug(f"Creating DeepSeek client with api {base_url}")
|
|
55
54
|
return super().create_client(api_key, base_url, **kwargs)
|
|
56
55
|
|
|
57
|
-
def _build_chat_completion_params(
|
|
58
|
-
self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
|
|
59
|
-
) -> dict:
|
|
56
|
+
def _build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict:
|
|
60
57
|
"""Build parameters for DeepSeek chat completion request.
|
|
61
58
|
|
|
62
59
|
DeepSeek doesn't support some OpenAI parameters like seed and max_completion_tokens.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
params_from_context: Parameters, derived from the LLM context, to
|
|
63
|
+
use for the chat completion. Contains messages, tools, and tool
|
|
64
|
+
choice.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
Dictionary of parameters for the chat completion request.
|
|
63
68
|
"""
|
|
64
69
|
params = {
|
|
65
70
|
"model": self.model_name,
|
|
66
71
|
"stream": True,
|
|
67
|
-
"messages": messages,
|
|
68
|
-
"tools": context.tools,
|
|
69
|
-
"tool_choice": context.tool_choice,
|
|
70
72
|
"stream_options": {"include_usage": True},
|
|
71
73
|
"frequency_penalty": self._settings["frequency_penalty"],
|
|
72
74
|
"presence_penalty": self._settings["presence_penalty"],
|
|
@@ -75,5 +77,8 @@ class DeepSeekLLMService(OpenAILLMService):
|
|
|
75
77
|
"max_tokens": self._settings["max_tokens"],
|
|
76
78
|
}
|
|
77
79
|
|
|
80
|
+
# Messages, tools, tool_choice
|
|
81
|
+
params.update(params_from_context)
|
|
82
|
+
|
|
78
83
|
params.update(self._settings["extra"])
|
|
79
84
|
return params
|
|
@@ -9,9 +9,8 @@
|
|
|
9
9
|
from typing import List
|
|
10
10
|
|
|
11
11
|
from loguru import logger
|
|
12
|
-
from openai.types.chat import ChatCompletionMessageParam
|
|
13
12
|
|
|
14
|
-
from pipecat.
|
|
13
|
+
from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams
|
|
15
14
|
from pipecat.services.openai.llm import OpenAILLMService
|
|
16
15
|
|
|
17
16
|
|
|
@@ -54,20 +53,23 @@ class FireworksLLMService(OpenAILLMService):
|
|
|
54
53
|
logger.debug(f"Creating Fireworks client with api {base_url}")
|
|
55
54
|
return super().create_client(api_key, base_url, **kwargs)
|
|
56
55
|
|
|
57
|
-
def build_chat_completion_params(
|
|
58
|
-
self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
|
|
59
|
-
) -> dict:
|
|
56
|
+
def build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict:
|
|
60
57
|
"""Build parameters for Fireworks chat completion request.
|
|
61
58
|
|
|
62
59
|
Fireworks doesn't support some OpenAI parameters like seed, max_completion_tokens,
|
|
63
60
|
and stream_options.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
params_from_context: Parameters, derived from the LLM context, to
|
|
64
|
+
use for the chat completion. Contains messages, tools, and tool
|
|
65
|
+
choice.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Dictionary of parameters for the chat completion request.
|
|
64
69
|
"""
|
|
65
70
|
params = {
|
|
66
71
|
"model": self.model_name,
|
|
67
72
|
"stream": True,
|
|
68
|
-
"messages": messages,
|
|
69
|
-
"tools": context.tools,
|
|
70
|
-
"tool_choice": context.tool_choice,
|
|
71
73
|
"frequency_penalty": self._settings["frequency_penalty"],
|
|
72
74
|
"presence_penalty": self._settings["presence_penalty"],
|
|
73
75
|
"temperature": self._settings["temperature"],
|
|
@@ -75,5 +77,8 @@ class FireworksLLMService(OpenAILLMService):
|
|
|
75
77
|
"max_tokens": self._settings["max_tokens"],
|
|
76
78
|
}
|
|
77
79
|
|
|
80
|
+
# Messages, tools, tool_choice
|
|
81
|
+
params.update(params_from_context)
|
|
82
|
+
|
|
78
83
|
params.update(self._settings["extra"])
|
|
79
84
|
return params
|
pipecat/services/fish/tts.py
CHANGED
|
@@ -120,12 +120,14 @@ class FishAudioTTSService(InterruptibleTTSService):
|
|
|
120
120
|
if model:
|
|
121
121
|
import warnings
|
|
122
122
|
|
|
123
|
-
warnings.
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
123
|
+
with warnings.catch_warnings():
|
|
124
|
+
warnings.simplefilter("always")
|
|
125
|
+
warnings.warn(
|
|
126
|
+
"Parameter 'model' is deprecated and will be removed in a future version. "
|
|
127
|
+
"Use 'reference_id' instead.",
|
|
128
|
+
DeprecationWarning,
|
|
129
|
+
stacklevel=2,
|
|
130
|
+
)
|
|
129
131
|
reference_id = model
|
|
130
132
|
|
|
131
133
|
self._api_key = api_key
|
|
@@ -33,6 +33,7 @@ from pipecat.frames.frames import (
|
|
|
33
33
|
InputAudioRawFrame,
|
|
34
34
|
InputImageRawFrame,
|
|
35
35
|
InputTextRawFrame,
|
|
36
|
+
LLMContextFrame,
|
|
36
37
|
LLMFullResponseEndFrame,
|
|
37
38
|
LLMFullResponseStartFrame,
|
|
38
39
|
LLMMessagesAppendFrame,
|
|
@@ -738,6 +739,10 @@ class GeminiMultimodalLiveLLMService(LLMService):
|
|
|
738
739
|
# Support just one tool call per context frame for now
|
|
739
740
|
tool_result_message = context.messages[-1]
|
|
740
741
|
await self._tool_result(tool_result_message)
|
|
742
|
+
elif isinstance(frame, LLMContextFrame):
|
|
743
|
+
raise NotImplementedError(
|
|
744
|
+
"Universal LLMContext is not yet supported for Gemini Multimodal Live."
|
|
745
|
+
)
|
|
741
746
|
elif isinstance(frame, InputTextRawFrame):
|
|
742
747
|
await self._send_user_text(frame.text)
|
|
743
748
|
await self.push_frame(frame, direction)
|
|
@@ -29,9 +29,11 @@ class PreProcessingConfig(BaseModel):
|
|
|
29
29
|
"""Configuration for audio pre-processing options.
|
|
30
30
|
|
|
31
31
|
Parameters:
|
|
32
|
+
audio_enhancer: Apply pre-processing to the audio stream to enhance quality
|
|
32
33
|
speech_threshold: Sensitivity for speech detection (0-1)
|
|
33
34
|
"""
|
|
34
35
|
|
|
36
|
+
audio_enhancer: Optional[bool] = None
|
|
35
37
|
speech_threshold: Optional[float] = None
|
|
36
38
|
|
|
37
39
|
|
|
@@ -41,10 +43,14 @@ class CustomVocabularyItem(BaseModel):
|
|
|
41
43
|
Parameters:
|
|
42
44
|
value: The vocabulary word or phrase
|
|
43
45
|
intensity: The bias intensity for this vocabulary item (0-1)
|
|
46
|
+
pronunciations: The pronunciations used in the transcription.
|
|
47
|
+
language: Specify the language in which it will be pronounced when sound comparison occurs. Default to transcription language.
|
|
44
48
|
"""
|
|
45
49
|
|
|
46
50
|
value: str
|
|
47
51
|
intensity: float
|
|
52
|
+
pronunciations: Optional[List[str]] = None
|
|
53
|
+
language: Optional[str] = None
|
|
48
54
|
|
|
49
55
|
|
|
50
56
|
class CustomVocabularyConfig(BaseModel):
|
|
@@ -170,7 +176,7 @@ class GladiaInputParams(BaseModel):
|
|
|
170
176
|
channels: Optional[int] = 1
|
|
171
177
|
custom_metadata: Optional[Dict[str, Any]] = None
|
|
172
178
|
endpointing: Optional[float] = None
|
|
173
|
-
maximum_duration_without_endpointing: Optional[int] =
|
|
179
|
+
maximum_duration_without_endpointing: Optional[int] = 5
|
|
174
180
|
language: Optional[Language] = None # Deprecated
|
|
175
181
|
language_config: Optional[LanguageConfig] = None
|
|
176
182
|
pre_processing: Optional[PreProcessingConfig] = None
|
pipecat/services/gladia/stt.py
CHANGED
|
@@ -14,11 +14,12 @@ import asyncio
|
|
|
14
14
|
import base64
|
|
15
15
|
import json
|
|
16
16
|
import warnings
|
|
17
|
-
from typing import Any, AsyncGenerator, Dict,
|
|
17
|
+
from typing import Any, AsyncGenerator, Dict, List, Literal, Optional
|
|
18
18
|
|
|
19
19
|
import aiohttp
|
|
20
20
|
from loguru import logger
|
|
21
21
|
|
|
22
|
+
from pipecat import __version__ as pipecat_version
|
|
22
23
|
from pipecat.frames.frames import (
|
|
23
24
|
CancelFrame,
|
|
24
25
|
EndFrame,
|
|
@@ -179,12 +180,16 @@ class _InputParamsDescriptor:
|
|
|
179
180
|
"""Descriptor for backward compatibility with deprecation warning."""
|
|
180
181
|
|
|
181
182
|
def __get__(self, obj, objtype=None):
|
|
182
|
-
warnings
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
183
|
+
import warnings
|
|
184
|
+
|
|
185
|
+
with warnings.catch_warnings():
|
|
186
|
+
warnings.simplefilter("always")
|
|
187
|
+
warnings.warn(
|
|
188
|
+
"GladiaSTTService.InputParams is deprecated and will be removed in a future version. "
|
|
189
|
+
"Import and use GladiaInputParams directly instead.",
|
|
190
|
+
DeprecationWarning,
|
|
191
|
+
stacklevel=2,
|
|
192
|
+
)
|
|
188
193
|
return GladiaInputParams
|
|
189
194
|
|
|
190
195
|
|
|
@@ -241,12 +246,14 @@ class GladiaSTTService(STTService):
|
|
|
241
246
|
|
|
242
247
|
# Warn about deprecated language parameter if it's used
|
|
243
248
|
if params.language is not None:
|
|
244
|
-
warnings.
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
249
|
+
with warnings.catch_warnings():
|
|
250
|
+
warnings.simplefilter("always")
|
|
251
|
+
warnings.warn(
|
|
252
|
+
"The 'language' parameter is deprecated and will be removed in a future version. "
|
|
253
|
+
"Use 'language_config' instead.",
|
|
254
|
+
DeprecationWarning,
|
|
255
|
+
stacklevel=2,
|
|
256
|
+
)
|
|
250
257
|
|
|
251
258
|
self._api_key = api_key
|
|
252
259
|
self._region = region
|
|
@@ -322,8 +329,8 @@ class GladiaSTTService(STTService):
|
|
|
322
329
|
}
|
|
323
330
|
|
|
324
331
|
# Add custom_metadata if provided
|
|
325
|
-
|
|
326
|
-
|
|
332
|
+
settings["custom_metadata"] = dict(self._params.custom_metadata or {})
|
|
333
|
+
settings["custom_metadata"]["pipecat"] = pipecat_version
|
|
327
334
|
|
|
328
335
|
# Add endpointing parameters if provided
|
|
329
336
|
if self._params.endpointing is not None:
|
|
@@ -449,6 +456,7 @@ class GladiaSTTService(STTService):
|
|
|
449
456
|
response = await self._setup_gladia(settings)
|
|
450
457
|
self._session_url = response["url"]
|
|
451
458
|
self._reconnection_attempts = 0
|
|
459
|
+
logger.info(f"Session URL : {self._session_url}")
|
|
452
460
|
|
|
453
461
|
# Connect with automatic reconnection
|
|
454
462
|
async with websocket_connect(self._session_url) as websocket:
|
pipecat/services/google/llm.py
CHANGED
|
@@ -16,19 +16,20 @@ import json
|
|
|
16
16
|
import os
|
|
17
17
|
import uuid
|
|
18
18
|
from dataclasses import dataclass
|
|
19
|
-
from typing import Any, Dict, List, Optional
|
|
19
|
+
from typing import Any, AsyncIterator, Dict, List, Optional
|
|
20
20
|
|
|
21
21
|
from loguru import logger
|
|
22
22
|
from PIL import Image
|
|
23
23
|
from pydantic import BaseModel, Field
|
|
24
24
|
|
|
25
|
-
from pipecat.adapters.services.gemini_adapter import GeminiLLMAdapter
|
|
25
|
+
from pipecat.adapters.services.gemini_adapter import GeminiLLMAdapter, GeminiLLMInvocationParams
|
|
26
26
|
from pipecat.frames.frames import (
|
|
27
27
|
AudioRawFrame,
|
|
28
28
|
Frame,
|
|
29
29
|
FunctionCallCancelFrame,
|
|
30
30
|
FunctionCallInProgressFrame,
|
|
31
31
|
FunctionCallResultFrame,
|
|
32
|
+
LLMContextFrame,
|
|
32
33
|
LLMFullResponseEndFrame,
|
|
33
34
|
LLMFullResponseStartFrame,
|
|
34
35
|
LLMMessagesFrame,
|
|
@@ -38,6 +39,7 @@ from pipecat.frames.frames import (
|
|
|
38
39
|
VisionImageRawFrame,
|
|
39
40
|
)
|
|
40
41
|
from pipecat.metrics.metrics import LLMTokenUsage
|
|
42
|
+
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
41
43
|
from pipecat.processors.aggregators.llm_response import (
|
|
42
44
|
LLMAssistantAggregatorParams,
|
|
43
45
|
LLMUserAggregatorParams,
|
|
@@ -67,6 +69,7 @@ try:
|
|
|
67
69
|
FunctionCall,
|
|
68
70
|
FunctionResponse,
|
|
69
71
|
GenerateContentConfig,
|
|
72
|
+
GenerateContentResponse,
|
|
70
73
|
HttpOptions,
|
|
71
74
|
Part,
|
|
72
75
|
)
|
|
@@ -289,11 +292,11 @@ class GoogleLLMContext(OpenAILLMContext):
|
|
|
289
292
|
# Add the converted messages to our existing messages
|
|
290
293
|
self._messages.extend(converted_messages)
|
|
291
294
|
|
|
292
|
-
def get_messages_for_logging(self):
|
|
295
|
+
def get_messages_for_logging(self) -> List[Dict[str, Any]]:
|
|
293
296
|
"""Get messages formatted for logging with sensitive data redacted.
|
|
294
297
|
|
|
295
298
|
Returns:
|
|
296
|
-
List of
|
|
299
|
+
List of messages in a format ready for logging.
|
|
297
300
|
"""
|
|
298
301
|
msgs = []
|
|
299
302
|
for message in self.messages:
|
|
@@ -418,7 +421,14 @@ class GoogleLLMContext(OpenAILLMContext):
|
|
|
418
421
|
role = message["role"]
|
|
419
422
|
content = message.get("content", [])
|
|
420
423
|
if role == "system":
|
|
421
|
-
|
|
424
|
+
# System instructions are returned as plain text
|
|
425
|
+
if isinstance(content, str):
|
|
426
|
+
self.system_message = content
|
|
427
|
+
elif isinstance(content, list):
|
|
428
|
+
# If content is a list, we assume it's a list of text parts, per the standard
|
|
429
|
+
self.system_message = " ".join(
|
|
430
|
+
part["text"] for part in content if part.get("type") == "text"
|
|
431
|
+
)
|
|
422
432
|
return None
|
|
423
433
|
elif role == "assistant":
|
|
424
434
|
role = "model"
|
|
@@ -436,11 +446,20 @@ class GoogleLLMContext(OpenAILLMContext):
|
|
|
436
446
|
)
|
|
437
447
|
elif role == "tool":
|
|
438
448
|
role = "model"
|
|
449
|
+
try:
|
|
450
|
+
response = json.loads(message["content"])
|
|
451
|
+
if isinstance(response, dict):
|
|
452
|
+
response_dict = response
|
|
453
|
+
else:
|
|
454
|
+
response_dict = {"value": response}
|
|
455
|
+
except Exception as e:
|
|
456
|
+
# Response might not be JSON-deserializable (e.g. plain text).
|
|
457
|
+
response_dict = {"value": message["content"]}
|
|
439
458
|
parts.append(
|
|
440
459
|
Part(
|
|
441
460
|
function_response=FunctionResponse(
|
|
442
461
|
name="tool_call_result", # seems to work to hard-code the same name every time
|
|
443
|
-
response=
|
|
462
|
+
response=response_dict,
|
|
444
463
|
)
|
|
445
464
|
)
|
|
446
465
|
)
|
|
@@ -636,9 +655,8 @@ class GoogleLLMService(LLMService):
|
|
|
636
655
|
"""Google AI (Gemini) LLM service implementation.
|
|
637
656
|
|
|
638
657
|
This class implements inference with Google's AI models, translating internally
|
|
639
|
-
from OpenAILLMContext
|
|
640
|
-
|
|
641
|
-
easy switching between different LLMs.
|
|
658
|
+
from an OpenAILLMContext or a universal LLMContext to the messages format
|
|
659
|
+
expected by the Google AI model.
|
|
642
660
|
"""
|
|
643
661
|
|
|
644
662
|
# Overriding the default adapter to use the Gemini one.
|
|
@@ -715,6 +733,44 @@ class GoogleLLMService(LLMService):
|
|
|
715
733
|
def _create_client(self, api_key: str, http_options: Optional[HttpOptions] = None):
|
|
716
734
|
self._client = genai.Client(api_key=api_key, http_options=http_options)
|
|
717
735
|
|
|
736
|
+
async def run_inference(self, context: LLMContext | OpenAILLMContext) -> Optional[str]:
|
|
737
|
+
"""Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context.
|
|
738
|
+
|
|
739
|
+
Args:
|
|
740
|
+
context: The LLM context containing conversation history.
|
|
741
|
+
|
|
742
|
+
Returns:
|
|
743
|
+
The LLM's response as a string, or None if no response is generated.
|
|
744
|
+
"""
|
|
745
|
+
messages = []
|
|
746
|
+
system = []
|
|
747
|
+
if isinstance(context, LLMContext):
|
|
748
|
+
adapter = self.get_llm_adapter()
|
|
749
|
+
params: GeminiLLMInvocationParams = adapter.get_llm_invocation_params(context)
|
|
750
|
+
messages = params["messages"]
|
|
751
|
+
system = params["system_instruction"]
|
|
752
|
+
else:
|
|
753
|
+
context = GoogleLLMContext.upgrade_to_google(context)
|
|
754
|
+
messages = context.messages
|
|
755
|
+
system = getattr(context, "system_message", None)
|
|
756
|
+
|
|
757
|
+
generation_config = GenerateContentConfig(system_instruction=system)
|
|
758
|
+
|
|
759
|
+
# Use the new google-genai client's async method
|
|
760
|
+
response = await self._client.aio.models.generate_content(
|
|
761
|
+
model=self._model_name,
|
|
762
|
+
contents=messages,
|
|
763
|
+
config=generation_config,
|
|
764
|
+
)
|
|
765
|
+
|
|
766
|
+
# Extract text from response
|
|
767
|
+
if response.candidates and response.candidates[0].content:
|
|
768
|
+
for part in response.candidates[0].content.parts:
|
|
769
|
+
if part.text:
|
|
770
|
+
return part.text
|
|
771
|
+
|
|
772
|
+
return None
|
|
773
|
+
|
|
718
774
|
def needs_mcp_alternate_schema(self) -> bool:
|
|
719
775
|
"""Check if this LLM service requires alternate MCP schema.
|
|
720
776
|
|
|
@@ -740,8 +796,87 @@ class GoogleLLMService(LLMService):
|
|
|
740
796
|
except Exception as e:
|
|
741
797
|
logger.exception(f"Failed to unset thinking budget: {e}")
|
|
742
798
|
|
|
799
|
+
async def _stream_content(
|
|
800
|
+
self, params_from_context: GeminiLLMInvocationParams
|
|
801
|
+
) -> AsyncIterator[GenerateContentResponse]:
|
|
802
|
+
messages = params_from_context["messages"]
|
|
803
|
+
if (
|
|
804
|
+
params_from_context["system_instruction"]
|
|
805
|
+
and self._system_instruction != params_from_context["system_instruction"]
|
|
806
|
+
):
|
|
807
|
+
logger.debug(f"System instruction changed: {params_from_context['system_instruction']}")
|
|
808
|
+
self._system_instruction = params_from_context["system_instruction"]
|
|
809
|
+
|
|
810
|
+
tools = []
|
|
811
|
+
if params_from_context["tools"]:
|
|
812
|
+
tools = params_from_context["tools"]
|
|
813
|
+
elif self._tools:
|
|
814
|
+
tools = self._tools
|
|
815
|
+
tool_config = None
|
|
816
|
+
if self._tool_config:
|
|
817
|
+
tool_config = self._tool_config
|
|
818
|
+
|
|
819
|
+
# Filter out None values and create GenerationContentConfig
|
|
820
|
+
generation_params = {
|
|
821
|
+
k: v
|
|
822
|
+
for k, v in {
|
|
823
|
+
"system_instruction": self._system_instruction,
|
|
824
|
+
"temperature": self._settings["temperature"],
|
|
825
|
+
"top_p": self._settings["top_p"],
|
|
826
|
+
"top_k": self._settings["top_k"],
|
|
827
|
+
"max_output_tokens": self._settings["max_tokens"],
|
|
828
|
+
"tools": tools,
|
|
829
|
+
"tool_config": tool_config,
|
|
830
|
+
}.items()
|
|
831
|
+
if v is not None
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
if self._settings["extra"]:
|
|
835
|
+
generation_params.update(self._settings["extra"])
|
|
836
|
+
|
|
837
|
+
# possibly modify generation_params (in place) to set thinking to off by default
|
|
838
|
+
self._maybe_unset_thinking_budget(generation_params)
|
|
839
|
+
|
|
840
|
+
generation_config = (
|
|
841
|
+
GenerateContentConfig(**generation_params) if generation_params else None
|
|
842
|
+
)
|
|
843
|
+
|
|
844
|
+
await self.start_ttfb_metrics()
|
|
845
|
+
return await self._client.aio.models.generate_content_stream(
|
|
846
|
+
model=self._model_name,
|
|
847
|
+
contents=messages,
|
|
848
|
+
config=generation_config,
|
|
849
|
+
)
|
|
850
|
+
|
|
851
|
+
async def _stream_content_specific_context(
|
|
852
|
+
self, context: OpenAILLMContext
|
|
853
|
+
) -> AsyncIterator[GenerateContentResponse]:
|
|
854
|
+
logger.debug(
|
|
855
|
+
f"{self}: Generating chat from LLM-specific context [{context.system_message}] | {context.get_messages_for_logging()}"
|
|
856
|
+
)
|
|
857
|
+
|
|
858
|
+
params = GeminiLLMInvocationParams(
|
|
859
|
+
messages=context.messages,
|
|
860
|
+
system_instruction=context.system_message,
|
|
861
|
+
tools=context.tools,
|
|
862
|
+
)
|
|
863
|
+
|
|
864
|
+
return await self._stream_content(params)
|
|
865
|
+
|
|
866
|
+
async def _stream_content_universal_context(
|
|
867
|
+
self, context: LLMContext
|
|
868
|
+
) -> AsyncIterator[GenerateContentResponse]:
|
|
869
|
+
adapter = self.get_llm_adapter()
|
|
870
|
+
params: GeminiLLMInvocationParams = adapter.get_llm_invocation_params(context)
|
|
871
|
+
|
|
872
|
+
logger.debug(
|
|
873
|
+
f"{self}: Generating chat from universal context [{params['system_instruction']}] | {adapter.get_messages_for_logging(context)}"
|
|
874
|
+
)
|
|
875
|
+
|
|
876
|
+
return await self._stream_content(params)
|
|
877
|
+
|
|
743
878
|
@traced_llm
|
|
744
|
-
async def _process_context(self, context: OpenAILLMContext):
|
|
879
|
+
async def _process_context(self, context: OpenAILLMContext | LLMContext):
|
|
745
880
|
await self.push_frame(LLMFullResponseStartFrame())
|
|
746
881
|
|
|
747
882
|
prompt_tokens = 0
|
|
@@ -754,55 +889,11 @@ class GoogleLLMService(LLMService):
|
|
|
754
889
|
search_result = ""
|
|
755
890
|
|
|
756
891
|
try:
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
messages = context.messages
|
|
763
|
-
if context.system_message and self._system_instruction != context.system_message:
|
|
764
|
-
self.logger.debug(f"System instruction changed: {context.system_message}")
|
|
765
|
-
self._system_instruction = context.system_message
|
|
766
|
-
|
|
767
|
-
tools = []
|
|
768
|
-
if context.tools:
|
|
769
|
-
tools = context.tools
|
|
770
|
-
elif self._tools:
|
|
771
|
-
tools = self._tools
|
|
772
|
-
tool_config = None
|
|
773
|
-
if self._tool_config:
|
|
774
|
-
tool_config = self._tool_config
|
|
775
|
-
|
|
776
|
-
# Filter out None values and create GenerationContentConfig
|
|
777
|
-
generation_params = {
|
|
778
|
-
k: v
|
|
779
|
-
for k, v in {
|
|
780
|
-
"system_instruction": self._system_instruction,
|
|
781
|
-
"temperature": self._settings["temperature"],
|
|
782
|
-
"top_p": self._settings["top_p"],
|
|
783
|
-
"top_k": self._settings["top_k"],
|
|
784
|
-
"max_output_tokens": self._settings["max_tokens"],
|
|
785
|
-
"tools": tools,
|
|
786
|
-
"tool_config": tool_config,
|
|
787
|
-
}.items()
|
|
788
|
-
if v is not None
|
|
789
|
-
}
|
|
790
|
-
|
|
791
|
-
if self._settings["extra"]:
|
|
792
|
-
generation_params.update(self._settings["extra"])
|
|
793
|
-
|
|
794
|
-
# possibly modify generation_params (in place) to set thinking to off by default
|
|
795
|
-
self._maybe_unset_thinking_budget(generation_params)
|
|
796
|
-
|
|
797
|
-
generation_config = (
|
|
798
|
-
GenerateContentConfig(**generation_params) if generation_params else None
|
|
799
|
-
)
|
|
800
|
-
|
|
801
|
-
await self.start_ttfb_metrics()
|
|
802
|
-
response = await self._client.aio.models.generate_content_stream(
|
|
803
|
-
model=self._model_name,
|
|
804
|
-
contents=messages,
|
|
805
|
-
config=generation_config,
|
|
892
|
+
# Generate content using either OpenAILLMContext or universal LLMContext
|
|
893
|
+
response = await (
|
|
894
|
+
self._stream_content_specific_context(context)
|
|
895
|
+
if isinstance(context, OpenAILLMContext)
|
|
896
|
+
else self._stream_content_universal_context(context)
|
|
806
897
|
)
|
|
807
898
|
|
|
808
899
|
function_calls = []
|
|
@@ -915,9 +1006,18 @@ class GoogleLLMService(LLMService):
|
|
|
915
1006
|
|
|
916
1007
|
if isinstance(frame, OpenAILLMContextFrame):
|
|
917
1008
|
context = GoogleLLMContext.upgrade_to_google(frame.context)
|
|
1009
|
+
elif isinstance(frame, LLMContextFrame):
|
|
1010
|
+
# Handle universal (LLM-agnostic) LLM context frames
|
|
1011
|
+
context = frame.context
|
|
918
1012
|
elif isinstance(frame, LLMMessagesFrame):
|
|
1013
|
+
# NOTE: LLMMessagesFrame is deprecated, so we don't support the newer universal
|
|
1014
|
+
# LLMContext with it
|
|
919
1015
|
context = GoogleLLMContext(frame.messages)
|
|
920
1016
|
elif isinstance(frame, VisionImageRawFrame):
|
|
1017
|
+
# This is only useful in very simple pipelines because it creates
|
|
1018
|
+
# a new context. Generally we want a context manager to catch
|
|
1019
|
+
# UserImageRawFrames coming through the pipeline and add them
|
|
1020
|
+
# to the context.
|
|
921
1021
|
context = GoogleLLMContext()
|
|
922
1022
|
context.add_image_frame_message(
|
|
923
1023
|
format=frame.format, size=frame.size, image=frame.image, text=frame.text
|
|
@@ -41,6 +41,10 @@ class GoogleLLMOpenAIBetaService(OpenAILLMService):
|
|
|
41
41
|
Note: This service includes a workaround for a Google API bug where function
|
|
42
42
|
call indices may be incorrectly set to None, resulting in empty function names.
|
|
43
43
|
|
|
44
|
+
.. deprecated:: 0.0.82
|
|
45
|
+
GoogleLLMOpenAIBetaService is deprecated and will be removed in a future version.
|
|
46
|
+
Use GoogleLLMService instead for better integration with Google's native API.
|
|
47
|
+
|
|
44
48
|
Reference:
|
|
45
49
|
https://ai.google.dev/gemini-api/docs/openai
|
|
46
50
|
"""
|
|
@@ -61,6 +65,17 @@ class GoogleLLMOpenAIBetaService(OpenAILLMService):
|
|
|
61
65
|
model: Google model name to use (e.g., "gemini-2.0-flash").
|
|
62
66
|
**kwargs: Additional arguments passed to the parent OpenAILLMService.
|
|
63
67
|
"""
|
|
68
|
+
import warnings
|
|
69
|
+
|
|
70
|
+
with warnings.catch_warnings():
|
|
71
|
+
warnings.simplefilter("always")
|
|
72
|
+
warnings.warn(
|
|
73
|
+
"GoogleLLMOpenAIBetaService is deprecated and will be removed in a future version. "
|
|
74
|
+
"Use GoogleLLMService instead for better integration with Google's native API.",
|
|
75
|
+
DeprecationWarning,
|
|
76
|
+
stacklevel=2,
|
|
77
|
+
)
|
|
78
|
+
|
|
64
79
|
super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
|
|
65
80
|
|
|
66
81
|
async def _process_context(self, context: OpenAILLMContext):
|
|
@@ -74,9 +89,9 @@ class GoogleLLMOpenAIBetaService(OpenAILLMService):
|
|
|
74
89
|
|
|
75
90
|
await self.start_ttfb_metrics()
|
|
76
91
|
|
|
77
|
-
chunk_stream: AsyncStream[
|
|
78
|
-
|
|
79
|
-
)
|
|
92
|
+
chunk_stream: AsyncStream[
|
|
93
|
+
ChatCompletionChunk
|
|
94
|
+
] = await self._stream_chat_completions_specific_context(context)
|
|
80
95
|
|
|
81
96
|
async for chunk in chunk_stream:
|
|
82
97
|
if chunk.usage:
|
pipecat/services/grok/llm.py
CHANGED
|
@@ -16,6 +16,7 @@ from dataclasses import dataclass
|
|
|
16
16
|
from loguru import logger
|
|
17
17
|
|
|
18
18
|
from pipecat.metrics.metrics import LLMTokenUsage
|
|
19
|
+
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
19
20
|
from pipecat.processors.aggregators.llm_response import (
|
|
20
21
|
LLMAssistantAggregatorParams,
|
|
21
22
|
LLMUserAggregatorParams,
|
|
@@ -107,7 +108,7 @@ class GrokLLMService(OpenAILLMService):
|
|
|
107
108
|
logger.debug(f"Creating Grok client with api {base_url}")
|
|
108
109
|
return super().create_client(api_key, base_url, **kwargs)
|
|
109
110
|
|
|
110
|
-
async def _process_context(self, context: OpenAILLMContext):
|
|
111
|
+
async def _process_context(self, context: OpenAILLMContext | LLMContext):
|
|
111
112
|
"""Process a context through the LLM and accumulate token usage metrics.
|
|
112
113
|
|
|
113
114
|
This method overrides the parent class implementation to handle Grok's
|