dv-pipecat-ai 0.0.75.dev887__py3-none-any.whl → 0.0.82.dev23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.75.dev887.dist-info → dv_pipecat_ai-0.0.82.dev23.dist-info}/METADATA +8 -3
- {dv_pipecat_ai-0.0.75.dev887.dist-info → dv_pipecat_ai-0.0.82.dev23.dist-info}/RECORD +121 -81
- pipecat/adapters/base_llm_adapter.py +44 -6
- pipecat/adapters/services/anthropic_adapter.py +302 -2
- pipecat/adapters/services/aws_nova_sonic_adapter.py +40 -2
- pipecat/adapters/services/bedrock_adapter.py +40 -2
- pipecat/adapters/services/gemini_adapter.py +276 -6
- pipecat/adapters/services/open_ai_adapter.py +88 -7
- pipecat/adapters/services/open_ai_realtime_adapter.py +39 -1
- pipecat/audio/dtmf/__init__.py +0 -0
- pipecat/audio/dtmf/dtmf-0.wav +0 -0
- pipecat/audio/dtmf/dtmf-1.wav +0 -0
- pipecat/audio/dtmf/dtmf-2.wav +0 -0
- pipecat/audio/dtmf/dtmf-3.wav +0 -0
- pipecat/audio/dtmf/dtmf-4.wav +0 -0
- pipecat/audio/dtmf/dtmf-5.wav +0 -0
- pipecat/audio/dtmf/dtmf-6.wav +0 -0
- pipecat/audio/dtmf/dtmf-7.wav +0 -0
- pipecat/audio/dtmf/dtmf-8.wav +0 -0
- pipecat/audio/dtmf/dtmf-9.wav +0 -0
- pipecat/audio/dtmf/dtmf-pound.wav +0 -0
- pipecat/audio/dtmf/dtmf-star.wav +0 -0
- pipecat/audio/dtmf/types.py +47 -0
- pipecat/audio/dtmf/utils.py +70 -0
- pipecat/audio/filters/aic_filter.py +199 -0
- pipecat/audio/utils.py +9 -7
- pipecat/extensions/ivr/__init__.py +0 -0
- pipecat/extensions/ivr/ivr_navigator.py +452 -0
- pipecat/frames/frames.py +156 -43
- pipecat/pipeline/llm_switcher.py +76 -0
- pipecat/pipeline/parallel_pipeline.py +3 -3
- pipecat/pipeline/service_switcher.py +144 -0
- pipecat/pipeline/task.py +68 -28
- pipecat/pipeline/task_observer.py +10 -0
- pipecat/processors/aggregators/dtmf_aggregator.py +2 -2
- pipecat/processors/aggregators/llm_context.py +277 -0
- pipecat/processors/aggregators/llm_response.py +48 -15
- pipecat/processors/aggregators/llm_response_universal.py +840 -0
- pipecat/processors/aggregators/openai_llm_context.py +3 -3
- pipecat/processors/dtmf_aggregator.py +0 -2
- pipecat/processors/filters/stt_mute_filter.py +0 -2
- pipecat/processors/frame_processor.py +18 -11
- pipecat/processors/frameworks/rtvi.py +17 -10
- pipecat/processors/metrics/sentry.py +2 -0
- pipecat/runner/daily.py +137 -36
- pipecat/runner/run.py +1 -1
- pipecat/runner/utils.py +7 -7
- pipecat/serializers/asterisk.py +20 -4
- pipecat/serializers/exotel.py +1 -1
- pipecat/serializers/plivo.py +1 -1
- pipecat/serializers/telnyx.py +1 -1
- pipecat/serializers/twilio.py +1 -1
- pipecat/services/__init__.py +2 -2
- pipecat/services/anthropic/llm.py +113 -28
- pipecat/services/asyncai/tts.py +4 -0
- pipecat/services/aws/llm.py +82 -8
- pipecat/services/aws/tts.py +0 -10
- pipecat/services/aws_nova_sonic/aws.py +5 -0
- pipecat/services/azure/llm.py +53 -1
- pipecat/services/cartesia/tts.py +28 -16
- pipecat/services/cerebras/llm.py +15 -10
- pipecat/services/deepgram/stt.py +8 -0
- pipecat/services/deepseek/llm.py +13 -8
- pipecat/services/elevenlabs/__init__.py +2 -0
- pipecat/services/elevenlabs/stt.py +351 -0
- pipecat/services/fireworks/llm.py +13 -8
- pipecat/services/fish/tts.py +8 -6
- pipecat/services/gemini_multimodal_live/gemini.py +5 -0
- pipecat/services/gladia/config.py +7 -1
- pipecat/services/gladia/stt.py +23 -15
- pipecat/services/google/llm.py +159 -59
- pipecat/services/google/llm_openai.py +18 -3
- pipecat/services/grok/llm.py +2 -1
- pipecat/services/llm_service.py +38 -3
- pipecat/services/mem0/memory.py +2 -1
- pipecat/services/mistral/llm.py +5 -6
- pipecat/services/nim/llm.py +2 -1
- pipecat/services/openai/base_llm.py +88 -26
- pipecat/services/openai/image.py +6 -1
- pipecat/services/openai_realtime_beta/openai.py +5 -2
- pipecat/services/openpipe/llm.py +6 -8
- pipecat/services/perplexity/llm.py +13 -8
- pipecat/services/playht/tts.py +9 -6
- pipecat/services/rime/tts.py +1 -1
- pipecat/services/sambanova/llm.py +18 -13
- pipecat/services/sarvam/tts.py +415 -10
- pipecat/services/speechmatics/stt.py +4 -4
- pipecat/services/tavus/video.py +1 -1
- pipecat/services/tts_service.py +15 -5
- pipecat/services/vistaar/llm.py +2 -5
- pipecat/transports/base_input.py +32 -19
- pipecat/transports/base_output.py +39 -5
- pipecat/transports/daily/__init__.py +0 -0
- pipecat/transports/daily/transport.py +2371 -0
- pipecat/transports/daily/utils.py +410 -0
- pipecat/transports/livekit/__init__.py +0 -0
- pipecat/transports/livekit/transport.py +1042 -0
- pipecat/transports/network/fastapi_websocket.py +12 -546
- pipecat/transports/network/small_webrtc.py +12 -922
- pipecat/transports/network/webrtc_connection.py +9 -595
- pipecat/transports/network/websocket_client.py +12 -481
- pipecat/transports/network/websocket_server.py +12 -487
- pipecat/transports/services/daily.py +9 -2334
- pipecat/transports/services/helpers/daily_rest.py +12 -396
- pipecat/transports/services/livekit.py +12 -975
- pipecat/transports/services/tavus.py +12 -757
- pipecat/transports/smallwebrtc/__init__.py +0 -0
- pipecat/transports/smallwebrtc/connection.py +612 -0
- pipecat/transports/smallwebrtc/transport.py +936 -0
- pipecat/transports/tavus/__init__.py +0 -0
- pipecat/transports/tavus/transport.py +770 -0
- pipecat/transports/websocket/__init__.py +0 -0
- pipecat/transports/websocket/client.py +494 -0
- pipecat/transports/websocket/fastapi.py +559 -0
- pipecat/transports/websocket/server.py +500 -0
- pipecat/transports/whatsapp/__init__.py +0 -0
- pipecat/transports/whatsapp/api.py +345 -0
- pipecat/transports/whatsapp/client.py +364 -0
- {dv_pipecat_ai-0.0.75.dev887.dist-info → dv_pipecat_ai-0.0.82.dev23.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.75.dev887.dist-info → dv_pipecat_ai-0.0.82.dev23.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.75.dev887.dist-info → dv_pipecat_ai-0.0.82.dev23.dist-info}/top_level.txt +0 -0
pipecat/services/gladia/stt.py
CHANGED
|
@@ -14,11 +14,12 @@ import asyncio
|
|
|
14
14
|
import base64
|
|
15
15
|
import json
|
|
16
16
|
import warnings
|
|
17
|
-
from typing import Any, AsyncGenerator, Dict,
|
|
17
|
+
from typing import Any, AsyncGenerator, Dict, List, Literal, Optional
|
|
18
18
|
|
|
19
19
|
import aiohttp
|
|
20
20
|
from loguru import logger
|
|
21
21
|
|
|
22
|
+
from pipecat import __version__ as pipecat_version
|
|
22
23
|
from pipecat.frames.frames import (
|
|
23
24
|
CancelFrame,
|
|
24
25
|
EndFrame,
|
|
@@ -179,12 +180,16 @@ class _InputParamsDescriptor:
|
|
|
179
180
|
"""Descriptor for backward compatibility with deprecation warning."""
|
|
180
181
|
|
|
181
182
|
def __get__(self, obj, objtype=None):
|
|
182
|
-
warnings
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
183
|
+
import warnings
|
|
184
|
+
|
|
185
|
+
with warnings.catch_warnings():
|
|
186
|
+
warnings.simplefilter("always")
|
|
187
|
+
warnings.warn(
|
|
188
|
+
"GladiaSTTService.InputParams is deprecated and will be removed in a future version. "
|
|
189
|
+
"Import and use GladiaInputParams directly instead.",
|
|
190
|
+
DeprecationWarning,
|
|
191
|
+
stacklevel=2,
|
|
192
|
+
)
|
|
188
193
|
return GladiaInputParams
|
|
189
194
|
|
|
190
195
|
|
|
@@ -241,12 +246,14 @@ class GladiaSTTService(STTService):
|
|
|
241
246
|
|
|
242
247
|
# Warn about deprecated language parameter if it's used
|
|
243
248
|
if params.language is not None:
|
|
244
|
-
warnings.
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
249
|
+
with warnings.catch_warnings():
|
|
250
|
+
warnings.simplefilter("always")
|
|
251
|
+
warnings.warn(
|
|
252
|
+
"The 'language' parameter is deprecated and will be removed in a future version. "
|
|
253
|
+
"Use 'language_config' instead.",
|
|
254
|
+
DeprecationWarning,
|
|
255
|
+
stacklevel=2,
|
|
256
|
+
)
|
|
250
257
|
|
|
251
258
|
self._api_key = api_key
|
|
252
259
|
self._region = region
|
|
@@ -322,8 +329,8 @@ class GladiaSTTService(STTService):
|
|
|
322
329
|
}
|
|
323
330
|
|
|
324
331
|
# Add custom_metadata if provided
|
|
325
|
-
|
|
326
|
-
|
|
332
|
+
settings["custom_metadata"] = dict(self._params.custom_metadata or {})
|
|
333
|
+
settings["custom_metadata"]["pipecat"] = pipecat_version
|
|
327
334
|
|
|
328
335
|
# Add endpointing parameters if provided
|
|
329
336
|
if self._params.endpointing is not None:
|
|
@@ -449,6 +456,7 @@ class GladiaSTTService(STTService):
|
|
|
449
456
|
response = await self._setup_gladia(settings)
|
|
450
457
|
self._session_url = response["url"]
|
|
451
458
|
self._reconnection_attempts = 0
|
|
459
|
+
logger.info(f"Session URL : {self._session_url}")
|
|
452
460
|
|
|
453
461
|
# Connect with automatic reconnection
|
|
454
462
|
async with websocket_connect(self._session_url) as websocket:
|
pipecat/services/google/llm.py
CHANGED
|
@@ -16,19 +16,20 @@ import json
|
|
|
16
16
|
import os
|
|
17
17
|
import uuid
|
|
18
18
|
from dataclasses import dataclass
|
|
19
|
-
from typing import Any, Dict, List, Optional
|
|
19
|
+
from typing import Any, AsyncIterator, Dict, List, Optional
|
|
20
20
|
|
|
21
21
|
from loguru import logger
|
|
22
22
|
from PIL import Image
|
|
23
23
|
from pydantic import BaseModel, Field
|
|
24
24
|
|
|
25
|
-
from pipecat.adapters.services.gemini_adapter import GeminiLLMAdapter
|
|
25
|
+
from pipecat.adapters.services.gemini_adapter import GeminiLLMAdapter, GeminiLLMInvocationParams
|
|
26
26
|
from pipecat.frames.frames import (
|
|
27
27
|
AudioRawFrame,
|
|
28
28
|
Frame,
|
|
29
29
|
FunctionCallCancelFrame,
|
|
30
30
|
FunctionCallInProgressFrame,
|
|
31
31
|
FunctionCallResultFrame,
|
|
32
|
+
LLMContextFrame,
|
|
32
33
|
LLMFullResponseEndFrame,
|
|
33
34
|
LLMFullResponseStartFrame,
|
|
34
35
|
LLMMessagesFrame,
|
|
@@ -38,6 +39,7 @@ from pipecat.frames.frames import (
|
|
|
38
39
|
VisionImageRawFrame,
|
|
39
40
|
)
|
|
40
41
|
from pipecat.metrics.metrics import LLMTokenUsage
|
|
42
|
+
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
41
43
|
from pipecat.processors.aggregators.llm_response import (
|
|
42
44
|
LLMAssistantAggregatorParams,
|
|
43
45
|
LLMUserAggregatorParams,
|
|
@@ -67,6 +69,7 @@ try:
|
|
|
67
69
|
FunctionCall,
|
|
68
70
|
FunctionResponse,
|
|
69
71
|
GenerateContentConfig,
|
|
72
|
+
GenerateContentResponse,
|
|
70
73
|
HttpOptions,
|
|
71
74
|
Part,
|
|
72
75
|
)
|
|
@@ -289,11 +292,11 @@ class GoogleLLMContext(OpenAILLMContext):
|
|
|
289
292
|
# Add the converted messages to our existing messages
|
|
290
293
|
self._messages.extend(converted_messages)
|
|
291
294
|
|
|
292
|
-
def get_messages_for_logging(self):
|
|
295
|
+
def get_messages_for_logging(self) -> List[Dict[str, Any]]:
|
|
293
296
|
"""Get messages formatted for logging with sensitive data redacted.
|
|
294
297
|
|
|
295
298
|
Returns:
|
|
296
|
-
List of
|
|
299
|
+
List of messages in a format ready for logging.
|
|
297
300
|
"""
|
|
298
301
|
msgs = []
|
|
299
302
|
for message in self.messages:
|
|
@@ -418,7 +421,14 @@ class GoogleLLMContext(OpenAILLMContext):
|
|
|
418
421
|
role = message["role"]
|
|
419
422
|
content = message.get("content", [])
|
|
420
423
|
if role == "system":
|
|
421
|
-
|
|
424
|
+
# System instructions are returned as plain text
|
|
425
|
+
if isinstance(content, str):
|
|
426
|
+
self.system_message = content
|
|
427
|
+
elif isinstance(content, list):
|
|
428
|
+
# If content is a list, we assume it's a list of text parts, per the standard
|
|
429
|
+
self.system_message = " ".join(
|
|
430
|
+
part["text"] for part in content if part.get("type") == "text"
|
|
431
|
+
)
|
|
422
432
|
return None
|
|
423
433
|
elif role == "assistant":
|
|
424
434
|
role = "model"
|
|
@@ -436,11 +446,20 @@ class GoogleLLMContext(OpenAILLMContext):
|
|
|
436
446
|
)
|
|
437
447
|
elif role == "tool":
|
|
438
448
|
role = "model"
|
|
449
|
+
try:
|
|
450
|
+
response = json.loads(message["content"])
|
|
451
|
+
if isinstance(response, dict):
|
|
452
|
+
response_dict = response
|
|
453
|
+
else:
|
|
454
|
+
response_dict = {"value": response}
|
|
455
|
+
except Exception as e:
|
|
456
|
+
# Response might not be JSON-deserializable (e.g. plain text).
|
|
457
|
+
response_dict = {"value": message["content"]}
|
|
439
458
|
parts.append(
|
|
440
459
|
Part(
|
|
441
460
|
function_response=FunctionResponse(
|
|
442
461
|
name="tool_call_result", # seems to work to hard-code the same name every time
|
|
443
|
-
response=
|
|
462
|
+
response=response_dict,
|
|
444
463
|
)
|
|
445
464
|
)
|
|
446
465
|
)
|
|
@@ -636,9 +655,8 @@ class GoogleLLMService(LLMService):
|
|
|
636
655
|
"""Google AI (Gemini) LLM service implementation.
|
|
637
656
|
|
|
638
657
|
This class implements inference with Google's AI models, translating internally
|
|
639
|
-
from OpenAILLMContext
|
|
640
|
-
|
|
641
|
-
easy switching between different LLMs.
|
|
658
|
+
from an OpenAILLMContext or a universal LLMContext to the messages format
|
|
659
|
+
expected by the Google AI model.
|
|
642
660
|
"""
|
|
643
661
|
|
|
644
662
|
# Overriding the default adapter to use the Gemini one.
|
|
@@ -715,6 +733,44 @@ class GoogleLLMService(LLMService):
|
|
|
715
733
|
def _create_client(self, api_key: str, http_options: Optional[HttpOptions] = None):
|
|
716
734
|
self._client = genai.Client(api_key=api_key, http_options=http_options)
|
|
717
735
|
|
|
736
|
+
async def run_inference(self, context: LLMContext | OpenAILLMContext) -> Optional[str]:
|
|
737
|
+
"""Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context.
|
|
738
|
+
|
|
739
|
+
Args:
|
|
740
|
+
context: The LLM context containing conversation history.
|
|
741
|
+
|
|
742
|
+
Returns:
|
|
743
|
+
The LLM's response as a string, or None if no response is generated.
|
|
744
|
+
"""
|
|
745
|
+
messages = []
|
|
746
|
+
system = []
|
|
747
|
+
if isinstance(context, LLMContext):
|
|
748
|
+
adapter = self.get_llm_adapter()
|
|
749
|
+
params: GeminiLLMInvocationParams = adapter.get_llm_invocation_params(context)
|
|
750
|
+
messages = params["messages"]
|
|
751
|
+
system = params["system_instruction"]
|
|
752
|
+
else:
|
|
753
|
+
context = GoogleLLMContext.upgrade_to_google(context)
|
|
754
|
+
messages = context.messages
|
|
755
|
+
system = getattr(context, "system_message", None)
|
|
756
|
+
|
|
757
|
+
generation_config = GenerateContentConfig(system_instruction=system)
|
|
758
|
+
|
|
759
|
+
# Use the new google-genai client's async method
|
|
760
|
+
response = await self._client.aio.models.generate_content(
|
|
761
|
+
model=self._model_name,
|
|
762
|
+
contents=messages,
|
|
763
|
+
config=generation_config,
|
|
764
|
+
)
|
|
765
|
+
|
|
766
|
+
# Extract text from response
|
|
767
|
+
if response.candidates and response.candidates[0].content:
|
|
768
|
+
for part in response.candidates[0].content.parts:
|
|
769
|
+
if part.text:
|
|
770
|
+
return part.text
|
|
771
|
+
|
|
772
|
+
return None
|
|
773
|
+
|
|
718
774
|
def needs_mcp_alternate_schema(self) -> bool:
|
|
719
775
|
"""Check if this LLM service requires alternate MCP schema.
|
|
720
776
|
|
|
@@ -740,8 +796,87 @@ class GoogleLLMService(LLMService):
|
|
|
740
796
|
except Exception as e:
|
|
741
797
|
logger.exception(f"Failed to unset thinking budget: {e}")
|
|
742
798
|
|
|
799
|
+
async def _stream_content(
|
|
800
|
+
self, params_from_context: GeminiLLMInvocationParams
|
|
801
|
+
) -> AsyncIterator[GenerateContentResponse]:
|
|
802
|
+
messages = params_from_context["messages"]
|
|
803
|
+
if (
|
|
804
|
+
params_from_context["system_instruction"]
|
|
805
|
+
and self._system_instruction != params_from_context["system_instruction"]
|
|
806
|
+
):
|
|
807
|
+
logger.debug(f"System instruction changed: {params_from_context['system_instruction']}")
|
|
808
|
+
self._system_instruction = params_from_context["system_instruction"]
|
|
809
|
+
|
|
810
|
+
tools = []
|
|
811
|
+
if params_from_context["tools"]:
|
|
812
|
+
tools = params_from_context["tools"]
|
|
813
|
+
elif self._tools:
|
|
814
|
+
tools = self._tools
|
|
815
|
+
tool_config = None
|
|
816
|
+
if self._tool_config:
|
|
817
|
+
tool_config = self._tool_config
|
|
818
|
+
|
|
819
|
+
# Filter out None values and create GenerationContentConfig
|
|
820
|
+
generation_params = {
|
|
821
|
+
k: v
|
|
822
|
+
for k, v in {
|
|
823
|
+
"system_instruction": self._system_instruction,
|
|
824
|
+
"temperature": self._settings["temperature"],
|
|
825
|
+
"top_p": self._settings["top_p"],
|
|
826
|
+
"top_k": self._settings["top_k"],
|
|
827
|
+
"max_output_tokens": self._settings["max_tokens"],
|
|
828
|
+
"tools": tools,
|
|
829
|
+
"tool_config": tool_config,
|
|
830
|
+
}.items()
|
|
831
|
+
if v is not None
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
if self._settings["extra"]:
|
|
835
|
+
generation_params.update(self._settings["extra"])
|
|
836
|
+
|
|
837
|
+
# possibly modify generation_params (in place) to set thinking to off by default
|
|
838
|
+
self._maybe_unset_thinking_budget(generation_params)
|
|
839
|
+
|
|
840
|
+
generation_config = (
|
|
841
|
+
GenerateContentConfig(**generation_params) if generation_params else None
|
|
842
|
+
)
|
|
843
|
+
|
|
844
|
+
await self.start_ttfb_metrics()
|
|
845
|
+
return await self._client.aio.models.generate_content_stream(
|
|
846
|
+
model=self._model_name,
|
|
847
|
+
contents=messages,
|
|
848
|
+
config=generation_config,
|
|
849
|
+
)
|
|
850
|
+
|
|
851
|
+
async def _stream_content_specific_context(
|
|
852
|
+
self, context: OpenAILLMContext
|
|
853
|
+
) -> AsyncIterator[GenerateContentResponse]:
|
|
854
|
+
logger.debug(
|
|
855
|
+
f"{self}: Generating chat from LLM-specific context [{context.system_message}] | {context.get_messages_for_logging()}"
|
|
856
|
+
)
|
|
857
|
+
|
|
858
|
+
params = GeminiLLMInvocationParams(
|
|
859
|
+
messages=context.messages,
|
|
860
|
+
system_instruction=context.system_message,
|
|
861
|
+
tools=context.tools,
|
|
862
|
+
)
|
|
863
|
+
|
|
864
|
+
return await self._stream_content(params)
|
|
865
|
+
|
|
866
|
+
async def _stream_content_universal_context(
|
|
867
|
+
self, context: LLMContext
|
|
868
|
+
) -> AsyncIterator[GenerateContentResponse]:
|
|
869
|
+
adapter = self.get_llm_adapter()
|
|
870
|
+
params: GeminiLLMInvocationParams = adapter.get_llm_invocation_params(context)
|
|
871
|
+
|
|
872
|
+
logger.debug(
|
|
873
|
+
f"{self}: Generating chat from universal context [{params['system_instruction']}] | {adapter.get_messages_for_logging(context)}"
|
|
874
|
+
)
|
|
875
|
+
|
|
876
|
+
return await self._stream_content(params)
|
|
877
|
+
|
|
743
878
|
@traced_llm
|
|
744
|
-
async def _process_context(self, context: OpenAILLMContext):
|
|
879
|
+
async def _process_context(self, context: OpenAILLMContext | LLMContext):
|
|
745
880
|
await self.push_frame(LLMFullResponseStartFrame())
|
|
746
881
|
|
|
747
882
|
prompt_tokens = 0
|
|
@@ -754,55 +889,11 @@ class GoogleLLMService(LLMService):
|
|
|
754
889
|
search_result = ""
|
|
755
890
|
|
|
756
891
|
try:
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
messages = context.messages
|
|
763
|
-
if context.system_message and self._system_instruction != context.system_message:
|
|
764
|
-
self.logger.debug(f"System instruction changed: {context.system_message}")
|
|
765
|
-
self._system_instruction = context.system_message
|
|
766
|
-
|
|
767
|
-
tools = []
|
|
768
|
-
if context.tools:
|
|
769
|
-
tools = context.tools
|
|
770
|
-
elif self._tools:
|
|
771
|
-
tools = self._tools
|
|
772
|
-
tool_config = None
|
|
773
|
-
if self._tool_config:
|
|
774
|
-
tool_config = self._tool_config
|
|
775
|
-
|
|
776
|
-
# Filter out None values and create GenerationContentConfig
|
|
777
|
-
generation_params = {
|
|
778
|
-
k: v
|
|
779
|
-
for k, v in {
|
|
780
|
-
"system_instruction": self._system_instruction,
|
|
781
|
-
"temperature": self._settings["temperature"],
|
|
782
|
-
"top_p": self._settings["top_p"],
|
|
783
|
-
"top_k": self._settings["top_k"],
|
|
784
|
-
"max_output_tokens": self._settings["max_tokens"],
|
|
785
|
-
"tools": tools,
|
|
786
|
-
"tool_config": tool_config,
|
|
787
|
-
}.items()
|
|
788
|
-
if v is not None
|
|
789
|
-
}
|
|
790
|
-
|
|
791
|
-
if self._settings["extra"]:
|
|
792
|
-
generation_params.update(self._settings["extra"])
|
|
793
|
-
|
|
794
|
-
# possibly modify generation_params (in place) to set thinking to off by default
|
|
795
|
-
self._maybe_unset_thinking_budget(generation_params)
|
|
796
|
-
|
|
797
|
-
generation_config = (
|
|
798
|
-
GenerateContentConfig(**generation_params) if generation_params else None
|
|
799
|
-
)
|
|
800
|
-
|
|
801
|
-
await self.start_ttfb_metrics()
|
|
802
|
-
response = await self._client.aio.models.generate_content_stream(
|
|
803
|
-
model=self._model_name,
|
|
804
|
-
contents=messages,
|
|
805
|
-
config=generation_config,
|
|
892
|
+
# Generate content using either OpenAILLMContext or universal LLMContext
|
|
893
|
+
response = await (
|
|
894
|
+
self._stream_content_specific_context(context)
|
|
895
|
+
if isinstance(context, OpenAILLMContext)
|
|
896
|
+
else self._stream_content_universal_context(context)
|
|
806
897
|
)
|
|
807
898
|
|
|
808
899
|
function_calls = []
|
|
@@ -915,9 +1006,18 @@ class GoogleLLMService(LLMService):
|
|
|
915
1006
|
|
|
916
1007
|
if isinstance(frame, OpenAILLMContextFrame):
|
|
917
1008
|
context = GoogleLLMContext.upgrade_to_google(frame.context)
|
|
1009
|
+
elif isinstance(frame, LLMContextFrame):
|
|
1010
|
+
# Handle universal (LLM-agnostic) LLM context frames
|
|
1011
|
+
context = frame.context
|
|
918
1012
|
elif isinstance(frame, LLMMessagesFrame):
|
|
1013
|
+
# NOTE: LLMMessagesFrame is deprecated, so we don't support the newer universal
|
|
1014
|
+
# LLMContext with it
|
|
919
1015
|
context = GoogleLLMContext(frame.messages)
|
|
920
1016
|
elif isinstance(frame, VisionImageRawFrame):
|
|
1017
|
+
# This is only useful in very simple pipelines because it creates
|
|
1018
|
+
# a new context. Generally we want a context manager to catch
|
|
1019
|
+
# UserImageRawFrames coming through the pipeline and add them
|
|
1020
|
+
# to the context.
|
|
921
1021
|
context = GoogleLLMContext()
|
|
922
1022
|
context.add_image_frame_message(
|
|
923
1023
|
format=frame.format, size=frame.size, image=frame.image, text=frame.text
|
|
@@ -41,6 +41,10 @@ class GoogleLLMOpenAIBetaService(OpenAILLMService):
|
|
|
41
41
|
Note: This service includes a workaround for a Google API bug where function
|
|
42
42
|
call indices may be incorrectly set to None, resulting in empty function names.
|
|
43
43
|
|
|
44
|
+
.. deprecated:: 0.0.82
|
|
45
|
+
GoogleLLMOpenAIBetaService is deprecated and will be removed in a future version.
|
|
46
|
+
Use GoogleLLMService instead for better integration with Google's native API.
|
|
47
|
+
|
|
44
48
|
Reference:
|
|
45
49
|
https://ai.google.dev/gemini-api/docs/openai
|
|
46
50
|
"""
|
|
@@ -61,6 +65,17 @@ class GoogleLLMOpenAIBetaService(OpenAILLMService):
|
|
|
61
65
|
model: Google model name to use (e.g., "gemini-2.0-flash").
|
|
62
66
|
**kwargs: Additional arguments passed to the parent OpenAILLMService.
|
|
63
67
|
"""
|
|
68
|
+
import warnings
|
|
69
|
+
|
|
70
|
+
with warnings.catch_warnings():
|
|
71
|
+
warnings.simplefilter("always")
|
|
72
|
+
warnings.warn(
|
|
73
|
+
"GoogleLLMOpenAIBetaService is deprecated and will be removed in a future version. "
|
|
74
|
+
"Use GoogleLLMService instead for better integration with Google's native API.",
|
|
75
|
+
DeprecationWarning,
|
|
76
|
+
stacklevel=2,
|
|
77
|
+
)
|
|
78
|
+
|
|
64
79
|
super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
|
|
65
80
|
|
|
66
81
|
async def _process_context(self, context: OpenAILLMContext):
|
|
@@ -74,9 +89,9 @@ class GoogleLLMOpenAIBetaService(OpenAILLMService):
|
|
|
74
89
|
|
|
75
90
|
await self.start_ttfb_metrics()
|
|
76
91
|
|
|
77
|
-
chunk_stream: AsyncStream[
|
|
78
|
-
|
|
79
|
-
)
|
|
92
|
+
chunk_stream: AsyncStream[
|
|
93
|
+
ChatCompletionChunk
|
|
94
|
+
] = await self._stream_chat_completions_specific_context(context)
|
|
80
95
|
|
|
81
96
|
async for chunk in chunk_stream:
|
|
82
97
|
if chunk.usage:
|
pipecat/services/grok/llm.py
CHANGED
|
@@ -16,6 +16,7 @@ from dataclasses import dataclass
|
|
|
16
16
|
from loguru import logger
|
|
17
17
|
|
|
18
18
|
from pipecat.metrics.metrics import LLMTokenUsage
|
|
19
|
+
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
19
20
|
from pipecat.processors.aggregators.llm_response import (
|
|
20
21
|
LLMAssistantAggregatorParams,
|
|
21
22
|
LLMUserAggregatorParams,
|
|
@@ -107,7 +108,7 @@ class GrokLLMService(OpenAILLMService):
|
|
|
107
108
|
logger.debug(f"Creating Grok client with api {base_url}")
|
|
108
109
|
return super().create_client(api_key, base_url, **kwargs)
|
|
109
110
|
|
|
110
|
-
async def _process_context(self, context: OpenAILLMContext):
|
|
111
|
+
async def _process_context(self, context: OpenAILLMContext | LLMContext):
|
|
111
112
|
"""Process a context through the LLM and accumulate token usage metrics.
|
|
112
113
|
|
|
113
114
|
This method overrides the parent class implementation to handle Grok's
|
pipecat/services/llm_service.py
CHANGED
|
@@ -36,10 +36,15 @@ from pipecat.frames.frames import (
|
|
|
36
36
|
FunctionCallResultFrame,
|
|
37
37
|
FunctionCallResultProperties,
|
|
38
38
|
FunctionCallsStartedFrame,
|
|
39
|
+
LLMConfigureOutputFrame,
|
|
40
|
+
LLMFullResponseEndFrame,
|
|
41
|
+
LLMFullResponseStartFrame,
|
|
42
|
+
LLMTextFrame,
|
|
39
43
|
StartFrame,
|
|
40
44
|
StartInterruptionFrame,
|
|
41
45
|
UserImageRequestFrame,
|
|
42
46
|
)
|
|
47
|
+
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
43
48
|
from pipecat.processors.aggregators.llm_response import (
|
|
44
49
|
LLMAssistantAggregatorParams,
|
|
45
50
|
LLMUserAggregatorParams,
|
|
@@ -88,7 +93,7 @@ class FunctionCallParams:
|
|
|
88
93
|
tool_call_id: str
|
|
89
94
|
arguments: Mapping[str, Any]
|
|
90
95
|
llm: "LLMService"
|
|
91
|
-
context: OpenAILLMContext
|
|
96
|
+
context: OpenAILLMContext | LLMContext
|
|
92
97
|
result_callback: FunctionCallResultCallback
|
|
93
98
|
|
|
94
99
|
|
|
@@ -129,7 +134,7 @@ class FunctionCallRunnerItem:
|
|
|
129
134
|
function_name: str
|
|
130
135
|
tool_call_id: str
|
|
131
136
|
arguments: Mapping[str, Any]
|
|
132
|
-
context: OpenAILLMContext
|
|
137
|
+
context: OpenAILLMContext | LLMContext
|
|
133
138
|
run_llm: Optional[bool] = None
|
|
134
139
|
|
|
135
140
|
|
|
@@ -177,6 +182,7 @@ class LLMService(AIService):
|
|
|
177
182
|
self._function_call_tasks: Dict[asyncio.Task, FunctionCallRunnerItem] = {}
|
|
178
183
|
self._sequential_runner_task: Optional[asyncio.Task] = None
|
|
179
184
|
self._tracing_enabled: bool = False
|
|
185
|
+
self._skip_tts: bool = False
|
|
180
186
|
|
|
181
187
|
self._register_event_handler("on_function_calls_started")
|
|
182
188
|
self._register_event_handler("on_completion_timeout")
|
|
@@ -189,6 +195,19 @@ class LLMService(AIService):
|
|
|
189
195
|
"""
|
|
190
196
|
return self._adapter
|
|
191
197
|
|
|
198
|
+
async def run_inference(self, context: LLMContext | OpenAILLMContext) -> Optional[str]:
|
|
199
|
+
"""Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context.
|
|
200
|
+
|
|
201
|
+
Must be implemented by subclasses.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
context: The LLM context containing conversation history.
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
The LLM's response as a string, or None if no response is generated.
|
|
208
|
+
"""
|
|
209
|
+
raise NotImplementedError(f"run_inference() not supported by {self.__class__.__name__}")
|
|
210
|
+
|
|
192
211
|
def create_context_aggregator(
|
|
193
212
|
self,
|
|
194
213
|
context: OpenAILLMContext,
|
|
@@ -252,6 +271,20 @@ class LLMService(AIService):
|
|
|
252
271
|
|
|
253
272
|
if isinstance(frame, StartInterruptionFrame):
|
|
254
273
|
await self._handle_interruptions(frame)
|
|
274
|
+
elif isinstance(frame, LLMConfigureOutputFrame):
|
|
275
|
+
self._skip_tts = frame.skip_tts
|
|
276
|
+
|
|
277
|
+
async def push_frame(self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM):
|
|
278
|
+
"""Pushes a frame.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
frame: The frame to push.
|
|
282
|
+
direction: The direction of frame pushing.
|
|
283
|
+
"""
|
|
284
|
+
if isinstance(frame, (LLMTextFrame, LLMFullResponseStartFrame, LLMFullResponseEndFrame)):
|
|
285
|
+
frame.skip_tts = self._skip_tts
|
|
286
|
+
|
|
287
|
+
await super().push_frame(frame, direction)
|
|
255
288
|
|
|
256
289
|
async def _handle_interruptions(self, _: StartInterruptionFrame):
|
|
257
290
|
# logger.info("In LLM Handling interruptions")
|
|
@@ -434,7 +467,9 @@ class LLMService(AIService):
|
|
|
434
467
|
else:
|
|
435
468
|
await self._sequential_runner_queue.put(runner_item)
|
|
436
469
|
|
|
437
|
-
async def _call_start_function(
|
|
470
|
+
async def _call_start_function(
|
|
471
|
+
self, context: OpenAILLMContext | LLMContext, function_name: str
|
|
472
|
+
):
|
|
438
473
|
if function_name in self._start_callbacks.keys():
|
|
439
474
|
await self._start_callbacks[function_name](function_name, self, context)
|
|
440
475
|
elif None in self._start_callbacks.keys():
|
pipecat/services/mem0/memory.py
CHANGED
|
@@ -120,6 +120,7 @@ class Mem0MemoryService(FrameProcessor):
|
|
|
120
120
|
try:
|
|
121
121
|
logger.debug(f"Storing {len(messages)} messages in Mem0")
|
|
122
122
|
params = {
|
|
123
|
+
"async_mode": True,
|
|
123
124
|
"messages": messages,
|
|
124
125
|
"metadata": {"platform": "pipecat"},
|
|
125
126
|
"output_format": "v1.1",
|
|
@@ -163,7 +164,7 @@ class Mem0MemoryService(FrameProcessor):
|
|
|
163
164
|
("run_id", self.run_id),
|
|
164
165
|
]
|
|
165
166
|
clauses = [{name: value} for name, value in id_pairs if value is not None]
|
|
166
|
-
filters = {"
|
|
167
|
+
filters = {"OR": clauses} if clauses else {}
|
|
167
168
|
results = self.memory_client.search(
|
|
168
169
|
query=query,
|
|
169
170
|
filters=filters,
|
pipecat/services/mistral/llm.py
CHANGED
|
@@ -12,6 +12,7 @@ from loguru import logger
|
|
|
12
12
|
from openai import AsyncStream
|
|
13
13
|
from openai.types.chat import ChatCompletionChunk, ChatCompletionMessageParam
|
|
14
14
|
|
|
15
|
+
from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams
|
|
15
16
|
from pipecat.frames.frames import FunctionCallFromLLM
|
|
16
17
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
|
17
18
|
from pipecat.services.openai.llm import OpenAILLMService
|
|
@@ -148,9 +149,7 @@ class MistralLLMService(OpenAILLMService):
|
|
|
148
149
|
if calls_to_execute:
|
|
149
150
|
await super().run_function_calls(calls_to_execute)
|
|
150
151
|
|
|
151
|
-
def build_chat_completion_params(
|
|
152
|
-
self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
|
|
153
|
-
) -> dict:
|
|
152
|
+
def build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict:
|
|
154
153
|
"""Build parameters for Mistral chat completion request.
|
|
155
154
|
|
|
156
155
|
Handles Mistral-specific requirements including:
|
|
@@ -159,14 +158,14 @@ class MistralLLMService(OpenAILLMService):
|
|
|
159
158
|
- Core completion settings
|
|
160
159
|
"""
|
|
161
160
|
# Apply Mistral's assistant prefix requirement for API compatibility
|
|
162
|
-
fixed_messages = self._apply_mistral_assistant_prefix(messages)
|
|
161
|
+
fixed_messages = self._apply_mistral_assistant_prefix(params_from_context["messages"])
|
|
163
162
|
|
|
164
163
|
params = {
|
|
165
164
|
"model": self.model_name,
|
|
166
165
|
"stream": True,
|
|
167
166
|
"messages": fixed_messages,
|
|
168
|
-
"tools":
|
|
169
|
-
"tool_choice":
|
|
167
|
+
"tools": params_from_context["tools"],
|
|
168
|
+
"tool_choice": params_from_context["tool_choice"],
|
|
170
169
|
"frequency_penalty": self._settings["frequency_penalty"],
|
|
171
170
|
"presence_penalty": self._settings["presence_penalty"],
|
|
172
171
|
"temperature": self._settings["temperature"],
|
pipecat/services/nim/llm.py
CHANGED
|
@@ -11,6 +11,7 @@ Microservice) API while maintaining compatibility with the OpenAI-style interfac
|
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
13
|
from pipecat.metrics.metrics import LLMTokenUsage
|
|
14
|
+
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
14
15
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
|
15
16
|
from pipecat.services.openai.llm import OpenAILLMService
|
|
16
17
|
|
|
@@ -47,7 +48,7 @@ class NimLLMService(OpenAILLMService):
|
|
|
47
48
|
self._has_reported_prompt_tokens = False
|
|
48
49
|
self._is_processing = False
|
|
49
50
|
|
|
50
|
-
async def _process_context(self, context: OpenAILLMContext):
|
|
51
|
+
async def _process_context(self, context: OpenAILLMContext | LLMContext):
|
|
51
52
|
"""Process a context through the LLM and accumulate token usage metrics.
|
|
52
53
|
|
|
53
54
|
This method overrides the parent class implementation to handle NVIDIA's
|