dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
- pipecat/adapters/base_llm_adapter.py +38 -1
- pipecat/adapters/services/anthropic_adapter.py +9 -14
- pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
- pipecat/adapters/services/bedrock_adapter.py +236 -13
- pipecat/adapters/services/gemini_adapter.py +12 -8
- pipecat/adapters/services/open_ai_adapter.py +19 -7
- pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
- pipecat/audio/dtmf/dtmf-0.wav +0 -0
- pipecat/audio/dtmf/dtmf-1.wav +0 -0
- pipecat/audio/dtmf/dtmf-2.wav +0 -0
- pipecat/audio/dtmf/dtmf-3.wav +0 -0
- pipecat/audio/dtmf/dtmf-4.wav +0 -0
- pipecat/audio/dtmf/dtmf-5.wav +0 -0
- pipecat/audio/dtmf/dtmf-6.wav +0 -0
- pipecat/audio/dtmf/dtmf-7.wav +0 -0
- pipecat/audio/dtmf/dtmf-8.wav +0 -0
- pipecat/audio/dtmf/dtmf-9.wav +0 -0
- pipecat/audio/dtmf/dtmf-pound.wav +0 -0
- pipecat/audio/dtmf/dtmf-star.wav +0 -0
- pipecat/audio/filters/krisp_viva_filter.py +193 -0
- pipecat/audio/filters/noisereduce_filter.py +15 -0
- pipecat/audio/turn/base_turn_analyzer.py +9 -1
- pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
- pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
- pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
- pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
- pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
- pipecat/audio/vad/data/README.md +10 -0
- pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
- pipecat/audio/vad/silero.py +9 -3
- pipecat/audio/vad/vad_analyzer.py +13 -1
- pipecat/extensions/voicemail/voicemail_detector.py +5 -5
- pipecat/frames/frames.py +277 -86
- pipecat/observers/loggers/debug_log_observer.py +3 -3
- pipecat/observers/loggers/llm_log_observer.py +7 -3
- pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
- pipecat/pipeline/runner.py +18 -6
- pipecat/pipeline/service_switcher.py +64 -36
- pipecat/pipeline/task.py +125 -79
- pipecat/pipeline/tts_switcher.py +30 -0
- pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
- pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
- pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
- pipecat/processors/aggregators/llm_context.py +40 -2
- pipecat/processors/aggregators/llm_response.py +32 -15
- pipecat/processors/aggregators/llm_response_universal.py +19 -15
- pipecat/processors/aggregators/user_response.py +6 -6
- pipecat/processors/aggregators/vision_image_frame.py +24 -2
- pipecat/processors/audio/audio_buffer_processor.py +43 -8
- pipecat/processors/dtmf_aggregator.py +174 -77
- pipecat/processors/filters/stt_mute_filter.py +17 -0
- pipecat/processors/frame_processor.py +110 -24
- pipecat/processors/frameworks/langchain.py +8 -2
- pipecat/processors/frameworks/rtvi.py +210 -68
- pipecat/processors/frameworks/strands_agents.py +170 -0
- pipecat/processors/logger.py +2 -2
- pipecat/processors/transcript_processor.py +26 -5
- pipecat/processors/user_idle_processor.py +35 -11
- pipecat/runner/daily.py +59 -20
- pipecat/runner/run.py +395 -93
- pipecat/runner/types.py +6 -4
- pipecat/runner/utils.py +51 -10
- pipecat/serializers/__init__.py +5 -1
- pipecat/serializers/asterisk.py +16 -2
- pipecat/serializers/convox.py +41 -4
- pipecat/serializers/custom.py +257 -0
- pipecat/serializers/exotel.py +5 -5
- pipecat/serializers/livekit.py +20 -0
- pipecat/serializers/plivo.py +5 -5
- pipecat/serializers/protobuf.py +6 -5
- pipecat/serializers/telnyx.py +2 -2
- pipecat/serializers/twilio.py +43 -23
- pipecat/serializers/vi.py +324 -0
- pipecat/services/ai_service.py +2 -6
- pipecat/services/anthropic/llm.py +2 -25
- pipecat/services/assemblyai/models.py +6 -0
- pipecat/services/assemblyai/stt.py +13 -5
- pipecat/services/asyncai/tts.py +5 -3
- pipecat/services/aws/__init__.py +1 -0
- pipecat/services/aws/llm.py +147 -105
- pipecat/services/aws/nova_sonic/__init__.py +0 -0
- pipecat/services/aws/nova_sonic/context.py +436 -0
- pipecat/services/aws/nova_sonic/frames.py +25 -0
- pipecat/services/aws/nova_sonic/llm.py +1265 -0
- pipecat/services/aws/stt.py +3 -3
- pipecat/services/aws_nova_sonic/__init__.py +19 -1
- pipecat/services/aws_nova_sonic/aws.py +11 -1151
- pipecat/services/aws_nova_sonic/context.py +8 -354
- pipecat/services/aws_nova_sonic/frames.py +13 -17
- pipecat/services/azure/llm.py +51 -1
- pipecat/services/azure/realtime/__init__.py +0 -0
- pipecat/services/azure/realtime/llm.py +65 -0
- pipecat/services/azure/stt.py +15 -0
- pipecat/services/cartesia/stt.py +77 -70
- pipecat/services/cartesia/tts.py +80 -13
- pipecat/services/deepgram/__init__.py +1 -0
- pipecat/services/deepgram/flux/__init__.py +0 -0
- pipecat/services/deepgram/flux/stt.py +640 -0
- pipecat/services/elevenlabs/__init__.py +4 -1
- pipecat/services/elevenlabs/stt.py +339 -0
- pipecat/services/elevenlabs/tts.py +87 -46
- pipecat/services/fish/tts.py +5 -2
- pipecat/services/gemini_multimodal_live/events.py +38 -524
- pipecat/services/gemini_multimodal_live/file_api.py +23 -173
- pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
- pipecat/services/gladia/stt.py +56 -72
- pipecat/services/google/__init__.py +1 -0
- pipecat/services/google/gemini_live/__init__.py +3 -0
- pipecat/services/google/gemini_live/file_api.py +189 -0
- pipecat/services/google/gemini_live/llm.py +1582 -0
- pipecat/services/google/gemini_live/llm_vertex.py +184 -0
- pipecat/services/google/llm.py +15 -11
- pipecat/services/google/llm_openai.py +3 -3
- pipecat/services/google/llm_vertex.py +86 -16
- pipecat/services/google/stt.py +4 -0
- pipecat/services/google/tts.py +7 -3
- pipecat/services/heygen/api.py +2 -0
- pipecat/services/heygen/client.py +8 -4
- pipecat/services/heygen/video.py +2 -0
- pipecat/services/hume/__init__.py +5 -0
- pipecat/services/hume/tts.py +220 -0
- pipecat/services/inworld/tts.py +6 -6
- pipecat/services/llm_service.py +15 -5
- pipecat/services/lmnt/tts.py +4 -2
- pipecat/services/mcp_service.py +4 -2
- pipecat/services/mem0/memory.py +6 -5
- pipecat/services/mistral/llm.py +29 -8
- pipecat/services/moondream/vision.py +42 -16
- pipecat/services/neuphonic/tts.py +5 -2
- pipecat/services/openai/__init__.py +1 -0
- pipecat/services/openai/base_llm.py +27 -20
- pipecat/services/openai/realtime/__init__.py +0 -0
- pipecat/services/openai/realtime/context.py +272 -0
- pipecat/services/openai/realtime/events.py +1106 -0
- pipecat/services/openai/realtime/frames.py +37 -0
- pipecat/services/openai/realtime/llm.py +829 -0
- pipecat/services/openai/tts.py +49 -10
- pipecat/services/openai_realtime/__init__.py +27 -0
- pipecat/services/openai_realtime/azure.py +21 -0
- pipecat/services/openai_realtime/context.py +21 -0
- pipecat/services/openai_realtime/events.py +21 -0
- pipecat/services/openai_realtime/frames.py +21 -0
- pipecat/services/openai_realtime_beta/azure.py +16 -0
- pipecat/services/openai_realtime_beta/openai.py +17 -5
- pipecat/services/piper/tts.py +7 -9
- pipecat/services/playht/tts.py +34 -4
- pipecat/services/rime/tts.py +12 -12
- pipecat/services/riva/stt.py +3 -1
- pipecat/services/salesforce/__init__.py +9 -0
- pipecat/services/salesforce/llm.py +700 -0
- pipecat/services/sarvam/__init__.py +7 -0
- pipecat/services/sarvam/stt.py +540 -0
- pipecat/services/sarvam/tts.py +97 -13
- pipecat/services/simli/video.py +2 -2
- pipecat/services/speechmatics/stt.py +22 -10
- pipecat/services/stt_service.py +47 -0
- pipecat/services/tavus/video.py +2 -2
- pipecat/services/tts_service.py +75 -22
- pipecat/services/vision_service.py +7 -6
- pipecat/services/vistaar/llm.py +51 -9
- pipecat/tests/utils.py +4 -4
- pipecat/transcriptions/language.py +41 -1
- pipecat/transports/base_input.py +13 -34
- pipecat/transports/base_output.py +140 -104
- pipecat/transports/daily/transport.py +199 -26
- pipecat/transports/heygen/__init__.py +0 -0
- pipecat/transports/heygen/transport.py +381 -0
- pipecat/transports/livekit/transport.py +228 -63
- pipecat/transports/local/audio.py +6 -1
- pipecat/transports/local/tk.py +11 -2
- pipecat/transports/network/fastapi_websocket.py +1 -1
- pipecat/transports/smallwebrtc/connection.py +103 -19
- pipecat/transports/smallwebrtc/request_handler.py +246 -0
- pipecat/transports/smallwebrtc/transport.py +65 -23
- pipecat/transports/tavus/transport.py +23 -12
- pipecat/transports/websocket/client.py +41 -5
- pipecat/transports/websocket/fastapi.py +21 -11
- pipecat/transports/websocket/server.py +14 -7
- pipecat/transports/whatsapp/api.py +8 -0
- pipecat/transports/whatsapp/client.py +47 -0
- pipecat/utils/base_object.py +54 -22
- pipecat/utils/redis.py +58 -0
- pipecat/utils/string.py +13 -1
- pipecat/utils/tracing/service_decorators.py +21 -21
- pipecat/serializers/genesys.py +0 -95
- pipecat/services/google/test-google-chirp.py +0 -45
- pipecat/services/openai.py +0 -698
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
- /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
|
@@ -16,7 +16,12 @@ from typing import Any, Dict, Generic, List, TypeVar
|
|
|
16
16
|
from loguru import logger
|
|
17
17
|
|
|
18
18
|
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
|
19
|
-
from pipecat.processors.aggregators.llm_context import
|
|
19
|
+
from pipecat.processors.aggregators.llm_context import (
|
|
20
|
+
LLMContext,
|
|
21
|
+
LLMContextMessage,
|
|
22
|
+
LLMSpecificMessage,
|
|
23
|
+
NotGiven,
|
|
24
|
+
)
|
|
20
25
|
|
|
21
26
|
# Should be a TypedDict
|
|
22
27
|
TLLMInvocationParams = TypeVar("TLLMInvocationParams", bound=dict[str, Any])
|
|
@@ -38,6 +43,16 @@ class BaseLLMAdapter(ABC, Generic[TLLMInvocationParams]):
|
|
|
38
43
|
Subclasses must implement provider-specific conversion logic.
|
|
39
44
|
"""
|
|
40
45
|
|
|
46
|
+
@property
|
|
47
|
+
@abstractmethod
|
|
48
|
+
def id_for_llm_specific_messages(self) -> str:
|
|
49
|
+
"""Get the identifier used in LLMSpecificMessage instances for this LLM provider.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
The identifier string.
|
|
53
|
+
"""
|
|
54
|
+
pass
|
|
55
|
+
|
|
41
56
|
@abstractmethod
|
|
42
57
|
def get_llm_invocation_params(self, context: LLMContext, **kwargs) -> TLLMInvocationParams:
|
|
43
58
|
"""Get provider-specific LLM invocation parameters from a universal LLM context.
|
|
@@ -76,6 +91,28 @@ class BaseLLMAdapter(ABC, Generic[TLLMInvocationParams]):
|
|
|
76
91
|
"""
|
|
77
92
|
pass
|
|
78
93
|
|
|
94
|
+
def create_llm_specific_message(self, message: Any) -> LLMSpecificMessage:
|
|
95
|
+
"""Create an LLM-specific message (as opposed to a standard message) for use in an LLMContext.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
message: The message content.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
A LLMSpecificMessage instance.
|
|
102
|
+
"""
|
|
103
|
+
return LLMSpecificMessage(llm=self.id_for_llm_specific_messages, message=message)
|
|
104
|
+
|
|
105
|
+
def get_messages(self, context: LLMContext) -> List[LLMContextMessage]:
|
|
106
|
+
"""Get messages from the LLM context, including standard and LLM-specific messages.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
context: The LLM context containing messages.
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
List of messages including standard and LLM-specific messages.
|
|
113
|
+
"""
|
|
114
|
+
return context.get_messages(self.id_for_llm_specific_messages)
|
|
115
|
+
|
|
79
116
|
def from_standard_tools(self, tools: Any) -> List[Any] | NotGiven:
|
|
80
117
|
"""Convert tools from standard format to provider format.
|
|
81
118
|
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
import copy
|
|
10
10
|
import json
|
|
11
11
|
from dataclasses import dataclass
|
|
12
|
-
from typing import Any, Dict, List,
|
|
12
|
+
from typing import Any, Dict, List, TypedDict
|
|
13
13
|
|
|
14
14
|
from anthropic import NOT_GIVEN, NotGiven
|
|
15
15
|
from anthropic.types.message_param import MessageParam
|
|
@@ -28,10 +28,7 @@ from pipecat.processors.aggregators.llm_context import (
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
class AnthropicLLMInvocationParams(TypedDict):
|
|
31
|
-
"""Context-based parameters for invoking Anthropic's LLM API.
|
|
32
|
-
|
|
33
|
-
This is a placeholder until support for universal LLMContext machinery is added for Anthropic.
|
|
34
|
-
"""
|
|
31
|
+
"""Context-based parameters for invoking Anthropic's LLM API."""
|
|
35
32
|
|
|
36
33
|
system: str | NotGiven
|
|
37
34
|
messages: List[MessageParam]
|
|
@@ -45,13 +42,16 @@ class AnthropicLLMAdapter(BaseLLMAdapter[AnthropicLLMInvocationParams]):
|
|
|
45
42
|
to the specific format required by Anthropic's Claude models for function calling.
|
|
46
43
|
"""
|
|
47
44
|
|
|
45
|
+
@property
|
|
46
|
+
def id_for_llm_specific_messages(self) -> str:
|
|
47
|
+
"""Get the identifier used in LLMSpecificMessage instances for Anthropic."""
|
|
48
|
+
return "anthropic"
|
|
49
|
+
|
|
48
50
|
def get_llm_invocation_params(
|
|
49
51
|
self, context: LLMContext, enable_prompt_caching: bool
|
|
50
52
|
) -> AnthropicLLMInvocationParams:
|
|
51
53
|
"""Get Anthropic-specific LLM invocation parameters from a universal LLM context.
|
|
52
54
|
|
|
53
|
-
This is a placeholder until support for universal LLMContext machinery is added for Anthropic.
|
|
54
|
-
|
|
55
55
|
Args:
|
|
56
56
|
context: The LLM context containing messages, tools, etc.
|
|
57
57
|
enable_prompt_caching: Whether prompt caching should be enabled.
|
|
@@ -59,7 +59,7 @@ class AnthropicLLMAdapter(BaseLLMAdapter[AnthropicLLMInvocationParams]):
|
|
|
59
59
|
Returns:
|
|
60
60
|
Dictionary of parameters for invoking Anthropic's LLM API.
|
|
61
61
|
"""
|
|
62
|
-
messages = self._from_universal_context_messages(self.
|
|
62
|
+
messages = self._from_universal_context_messages(self.get_messages(context))
|
|
63
63
|
return {
|
|
64
64
|
"system": messages.system,
|
|
65
65
|
"messages": (
|
|
@@ -76,8 +76,6 @@ class AnthropicLLMAdapter(BaseLLMAdapter[AnthropicLLMInvocationParams]):
|
|
|
76
76
|
|
|
77
77
|
Removes or truncates sensitive data like image content for safe logging.
|
|
78
78
|
|
|
79
|
-
This is a placeholder until support for universal LLMContext machinery is added for Anthropic.
|
|
80
|
-
|
|
81
79
|
Args:
|
|
82
80
|
context: The LLM context containing messages.
|
|
83
81
|
|
|
@@ -85,7 +83,7 @@ class AnthropicLLMAdapter(BaseLLMAdapter[AnthropicLLMInvocationParams]):
|
|
|
85
83
|
List of messages in a format ready for logging about Anthropic.
|
|
86
84
|
"""
|
|
87
85
|
# Get messages in Anthropic's format
|
|
88
|
-
messages = self._from_universal_context_messages(self.
|
|
86
|
+
messages = self._from_universal_context_messages(self.get_messages(context)).messages
|
|
89
87
|
|
|
90
88
|
# Sanitize messages for logging
|
|
91
89
|
messages_for_logging = []
|
|
@@ -99,9 +97,6 @@ class AnthropicLLMAdapter(BaseLLMAdapter[AnthropicLLMInvocationParams]):
|
|
|
99
97
|
messages_for_logging.append(msg)
|
|
100
98
|
return messages_for_logging
|
|
101
99
|
|
|
102
|
-
def _get_messages(self, context: LLMContext) -> List[LLMContextMessage]:
|
|
103
|
-
return context.get_messages("anthropic")
|
|
104
|
-
|
|
105
100
|
@dataclass
|
|
106
101
|
class ConvertedMessages:
|
|
107
102
|
"""Container for Anthropic-formatted messages converted from universal context."""
|
|
@@ -6,13 +6,47 @@
|
|
|
6
6
|
|
|
7
7
|
"""AWS Nova Sonic LLM adapter for Pipecat."""
|
|
8
8
|
|
|
9
|
+
import copy
|
|
9
10
|
import json
|
|
10
|
-
from
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from enum import Enum
|
|
13
|
+
from typing import Any, Dict, List, Optional, TypedDict
|
|
14
|
+
|
|
15
|
+
from loguru import logger
|
|
11
16
|
|
|
12
17
|
from pipecat.adapters.base_llm_adapter import BaseLLMAdapter
|
|
13
18
|
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
|
14
19
|
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
|
15
|
-
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
20
|
+
from pipecat.processors.aggregators.llm_context import LLMContext, LLMContextMessage
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Role(Enum):
|
|
24
|
+
"""Roles supported in AWS Nova Sonic conversations.
|
|
25
|
+
|
|
26
|
+
Parameters:
|
|
27
|
+
SYSTEM: System-level messages (not used in conversation history).
|
|
28
|
+
USER: Messages sent by the user.
|
|
29
|
+
ASSISTANT: Messages sent by the assistant.
|
|
30
|
+
TOOL: Messages sent by tools (not used in conversation history).
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
SYSTEM = "SYSTEM"
|
|
34
|
+
USER = "USER"
|
|
35
|
+
ASSISTANT = "ASSISTANT"
|
|
36
|
+
TOOL = "TOOL"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class AWSNovaSonicConversationHistoryMessage:
|
|
41
|
+
"""A single message in AWS Nova Sonic conversation history.
|
|
42
|
+
|
|
43
|
+
Parameters:
|
|
44
|
+
role: The role of the message sender (USER or ASSISTANT only).
|
|
45
|
+
text: The text content of the message.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
role: Role # only USER and ASSISTANT
|
|
49
|
+
text: str
|
|
16
50
|
|
|
17
51
|
|
|
18
52
|
class AWSNovaSonicLLMInvocationParams(TypedDict):
|
|
@@ -21,7 +55,9 @@ class AWSNovaSonicLLMInvocationParams(TypedDict):
|
|
|
21
55
|
This is a placeholder until support for universal LLMContext machinery is added for AWS Nova Sonic.
|
|
22
56
|
"""
|
|
23
57
|
|
|
24
|
-
|
|
58
|
+
system_instruction: Optional[str]
|
|
59
|
+
messages: List[AWSNovaSonicConversationHistoryMessage]
|
|
60
|
+
tools: List[Dict[str, Any]]
|
|
25
61
|
|
|
26
62
|
|
|
27
63
|
class AWSNovaSonicLLMAdapter(BaseLLMAdapter[AWSNovaSonicLLMInvocationParams]):
|
|
@@ -31,6 +67,11 @@ class AWSNovaSonicLLMAdapter(BaseLLMAdapter[AWSNovaSonicLLMInvocationParams]):
|
|
|
31
67
|
specific function-calling format, enabling tool use with Nova Sonic models.
|
|
32
68
|
"""
|
|
33
69
|
|
|
70
|
+
@property
|
|
71
|
+
def id_for_llm_specific_messages(self) -> str:
|
|
72
|
+
"""Get the identifier used in LLMSpecificMessage instances for AWS Nova Sonic."""
|
|
73
|
+
return "aws-nova-sonic"
|
|
74
|
+
|
|
34
75
|
def get_llm_invocation_params(self, context: LLMContext) -> AWSNovaSonicLLMInvocationParams:
|
|
35
76
|
"""Get AWS Nova Sonic-specific LLM invocation parameters from a universal LLM context.
|
|
36
77
|
|
|
@@ -42,7 +83,13 @@ class AWSNovaSonicLLMAdapter(BaseLLMAdapter[AWSNovaSonicLLMInvocationParams]):
|
|
|
42
83
|
Returns:
|
|
43
84
|
Dictionary of parameters for invoking AWS Nova Sonic's LLM API.
|
|
44
85
|
"""
|
|
45
|
-
|
|
86
|
+
messages = self._from_universal_context_messages(self.get_messages(context))
|
|
87
|
+
return {
|
|
88
|
+
"system_instruction": messages.system_instruction,
|
|
89
|
+
"messages": messages.messages,
|
|
90
|
+
# NOTE: LLMContext's tools are guaranteed to be a ToolsSchema (or NOT_GIVEN)
|
|
91
|
+
"tools": self.from_standard_tools(context.tools) or [],
|
|
92
|
+
}
|
|
46
93
|
|
|
47
94
|
def get_messages_for_logging(self, context) -> List[Dict[str, Any]]:
|
|
48
95
|
"""Get messages from a universal LLM context in a format ready for logging about AWS Nova Sonic.
|
|
@@ -57,7 +104,75 @@ class AWSNovaSonicLLMAdapter(BaseLLMAdapter[AWSNovaSonicLLMInvocationParams]):
|
|
|
57
104
|
Returns:
|
|
58
105
|
List of messages in a format ready for logging about AWS Nova Sonic.
|
|
59
106
|
"""
|
|
60
|
-
|
|
107
|
+
return self._from_universal_context_messages(self.get_messages(context)).messages
|
|
108
|
+
|
|
109
|
+
@dataclass
|
|
110
|
+
class ConvertedMessages:
|
|
111
|
+
"""Container for Google-formatted messages converted from universal context."""
|
|
112
|
+
|
|
113
|
+
messages: List[AWSNovaSonicConversationHistoryMessage]
|
|
114
|
+
system_instruction: Optional[str] = None
|
|
115
|
+
|
|
116
|
+
def _from_universal_context_messages(
|
|
117
|
+
self, universal_context_messages: List[LLMContextMessage]
|
|
118
|
+
) -> ConvertedMessages:
|
|
119
|
+
system_instruction = None
|
|
120
|
+
messages = []
|
|
121
|
+
|
|
122
|
+
# Bail if there are no messages
|
|
123
|
+
if not universal_context_messages:
|
|
124
|
+
return self.ConvertedMessages()
|
|
125
|
+
|
|
126
|
+
universal_context_messages = copy.deepcopy(universal_context_messages)
|
|
127
|
+
|
|
128
|
+
# If we have a "system" message as our first message, let's pull that out into "instruction"
|
|
129
|
+
if universal_context_messages[0].get("role") == "system":
|
|
130
|
+
system = universal_context_messages.pop(0)
|
|
131
|
+
content = system.get("content")
|
|
132
|
+
if isinstance(content, str):
|
|
133
|
+
system_instruction = content
|
|
134
|
+
elif isinstance(content, list):
|
|
135
|
+
system_instruction = content[0].get("text")
|
|
136
|
+
if system_instruction:
|
|
137
|
+
self._system_instruction = system_instruction
|
|
138
|
+
|
|
139
|
+
# Process remaining messages to fill out conversation history.
|
|
140
|
+
# Nova Sonic supports "user" and "assistant" messages in history.
|
|
141
|
+
for universal_context_message in universal_context_messages:
|
|
142
|
+
message = self._from_universal_context_message(universal_context_message)
|
|
143
|
+
if message:
|
|
144
|
+
messages.append(message)
|
|
145
|
+
|
|
146
|
+
return self.ConvertedMessages(messages=messages, system_instruction=system_instruction)
|
|
147
|
+
|
|
148
|
+
def _from_universal_context_message(self, message) -> AWSNovaSonicConversationHistoryMessage:
|
|
149
|
+
"""Convert standard message format to Nova Sonic format.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
message: Standard message dictionary to convert.
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
Nova Sonic conversation history message, or None if not convertible.
|
|
156
|
+
"""
|
|
157
|
+
role = message.get("role")
|
|
158
|
+
if message.get("role") == "user" or message.get("role") == "assistant":
|
|
159
|
+
content = message.get("content")
|
|
160
|
+
if isinstance(message.get("content"), list):
|
|
161
|
+
content = ""
|
|
162
|
+
for c in message.get("content"):
|
|
163
|
+
if c.get("type") == "text":
|
|
164
|
+
content += " " + c.get("text")
|
|
165
|
+
else:
|
|
166
|
+
logger.error(
|
|
167
|
+
f"Unhandled content type in context message: {c.get('type')} - {message}"
|
|
168
|
+
)
|
|
169
|
+
# There won't be content if this is an assistant tool call entry.
|
|
170
|
+
# We're ignoring those since they can't be loaded into AWS Nova Sonic conversation
|
|
171
|
+
# history
|
|
172
|
+
if content:
|
|
173
|
+
return AWSNovaSonicConversationHistoryMessage(role=Role[role.upper()], text=content)
|
|
174
|
+
# NOTE: we're ignoring messages with role "tool" since they can't be loaded into AWS Nova
|
|
175
|
+
# Sonic conversation history
|
|
61
176
|
|
|
62
177
|
@staticmethod
|
|
63
178
|
def _to_aws_nova_sonic_function_format(function: FunctionSchema) -> Dict[str, Any]:
|
|
@@ -6,21 +6,33 @@
|
|
|
6
6
|
|
|
7
7
|
"""AWS Bedrock LLM adapter for Pipecat."""
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
import base64
|
|
10
|
+
import copy
|
|
11
|
+
import json
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from typing import Any, Dict, List, Literal, Optional, TypedDict
|
|
14
|
+
|
|
15
|
+
from loguru import logger
|
|
10
16
|
|
|
11
17
|
from pipecat.adapters.base_llm_adapter import BaseLLMAdapter
|
|
12
18
|
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
|
13
19
|
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
|
14
|
-
from pipecat.processors.aggregators.llm_context import
|
|
20
|
+
from pipecat.processors.aggregators.llm_context import (
|
|
21
|
+
LLMContext,
|
|
22
|
+
LLMContextMessage,
|
|
23
|
+
LLMContextToolChoice,
|
|
24
|
+
LLMSpecificMessage,
|
|
25
|
+
LLMStandardMessage,
|
|
26
|
+
)
|
|
15
27
|
|
|
16
28
|
|
|
17
29
|
class AWSBedrockLLMInvocationParams(TypedDict):
|
|
18
|
-
"""Context-based parameters for invoking AWS Bedrock's LLM API.
|
|
19
|
-
|
|
20
|
-
This is a placeholder until support for universal LLMContext machinery is added for Bedrock.
|
|
21
|
-
"""
|
|
30
|
+
"""Context-based parameters for invoking AWS Bedrock's LLM API."""
|
|
22
31
|
|
|
23
|
-
|
|
32
|
+
system: Optional[List[dict[str, Any]]] # [{"text": "system message"}]
|
|
33
|
+
messages: List[dict[str, Any]]
|
|
34
|
+
tools: List[dict[str, Any]]
|
|
35
|
+
tool_choice: LLMContextToolChoice
|
|
24
36
|
|
|
25
37
|
|
|
26
38
|
class AWSBedrockLLMAdapter(BaseLLMAdapter[AWSBedrockLLMInvocationParams]):
|
|
@@ -30,33 +42,244 @@ class AWSBedrockLLMAdapter(BaseLLMAdapter[AWSBedrockLLMInvocationParams]):
|
|
|
30
42
|
into AWS Bedrock's expected tool format for function calling capabilities.
|
|
31
43
|
"""
|
|
32
44
|
|
|
45
|
+
@property
|
|
46
|
+
def id_for_llm_specific_messages(self) -> str:
|
|
47
|
+
"""Get the identifier used in LLMSpecificMessage instances for AWS Bedrock."""
|
|
48
|
+
return "aws"
|
|
49
|
+
|
|
33
50
|
def get_llm_invocation_params(self, context: LLMContext) -> AWSBedrockLLMInvocationParams:
|
|
34
51
|
"""Get AWS Bedrock-specific LLM invocation parameters from a universal LLM context.
|
|
35
52
|
|
|
36
|
-
This is a placeholder until support for universal LLMContext machinery is added for Bedrock.
|
|
37
|
-
|
|
38
53
|
Args:
|
|
39
54
|
context: The LLM context containing messages, tools, etc.
|
|
40
55
|
|
|
41
56
|
Returns:
|
|
42
57
|
Dictionary of parameters for invoking AWS Bedrock's LLM API.
|
|
43
58
|
"""
|
|
44
|
-
|
|
59
|
+
messages = self._from_universal_context_messages(self.get_messages(context))
|
|
60
|
+
return {
|
|
61
|
+
"system": messages.system,
|
|
62
|
+
"messages": messages.messages,
|
|
63
|
+
# NOTE: LLMContext's tools are guaranteed to be a ToolsSchema (or NOT_GIVEN)
|
|
64
|
+
"tools": self.from_standard_tools(context.tools) or [],
|
|
65
|
+
# To avoid refactoring in AWSBedrockLLMService, we just pass through tool_choice.
|
|
66
|
+
# Eventually (when we don't have to maintain the non-LLMContext code path) we should do
|
|
67
|
+
# the conversion to Bedrock's expected format here rather than in AWSBedrockLLMService.
|
|
68
|
+
"tool_choice": context.tool_choice,
|
|
69
|
+
}
|
|
45
70
|
|
|
46
71
|
def get_messages_for_logging(self, context) -> List[Dict[str, Any]]:
|
|
47
72
|
"""Get messages from a universal LLM context in a format ready for logging about AWS Bedrock.
|
|
48
73
|
|
|
49
74
|
Removes or truncates sensitive data like image content for safe logging.
|
|
50
75
|
|
|
51
|
-
This is a placeholder until support for universal LLMContext machinery is added for Bedrock.
|
|
52
|
-
|
|
53
76
|
Args:
|
|
54
77
|
context: The LLM context containing messages.
|
|
55
78
|
|
|
56
79
|
Returns:
|
|
57
80
|
List of messages in a format ready for logging about AWS Bedrock.
|
|
58
81
|
"""
|
|
59
|
-
|
|
82
|
+
# Get messages in Anthropic's format
|
|
83
|
+
messages = self._from_universal_context_messages(self.get_messages(context)).messages
|
|
84
|
+
|
|
85
|
+
# Sanitize messages for logging
|
|
86
|
+
messages_for_logging = []
|
|
87
|
+
for message in messages:
|
|
88
|
+
msg = copy.deepcopy(message)
|
|
89
|
+
if "content" in msg:
|
|
90
|
+
if isinstance(msg["content"], list):
|
|
91
|
+
for item in msg["content"]:
|
|
92
|
+
if item.get("image"):
|
|
93
|
+
item["image"]["source"]["bytes"] = "..."
|
|
94
|
+
messages_for_logging.append(msg)
|
|
95
|
+
return messages_for_logging
|
|
96
|
+
|
|
97
|
+
@dataclass
|
|
98
|
+
class ConvertedMessages:
|
|
99
|
+
"""Container for Anthropic-formatted messages converted from universal context."""
|
|
100
|
+
|
|
101
|
+
messages: List[dict[str, Any]]
|
|
102
|
+
system: Optional[str]
|
|
103
|
+
|
|
104
|
+
def _from_universal_context_messages(
|
|
105
|
+
self, universal_context_messages: List[LLMContextMessage]
|
|
106
|
+
) -> ConvertedMessages:
|
|
107
|
+
system = None
|
|
108
|
+
messages = []
|
|
109
|
+
|
|
110
|
+
# first, map messages using self._from_universal_context_message(m)
|
|
111
|
+
try:
|
|
112
|
+
messages = [self._from_universal_context_message(m) for m in universal_context_messages]
|
|
113
|
+
except Exception as e:
|
|
114
|
+
logger.error(f"Error mapping messages: {e}")
|
|
115
|
+
|
|
116
|
+
# See if we should pull the system message out of our messages list
|
|
117
|
+
if messages and messages[0]["role"] == "system":
|
|
118
|
+
system = messages[0]["content"]
|
|
119
|
+
messages.pop(0)
|
|
120
|
+
|
|
121
|
+
# Convert any subsequent "system"-role messages to "user"-role
|
|
122
|
+
# messages, as AWS Bedrock doesn't support system input messages.
|
|
123
|
+
for message in messages:
|
|
124
|
+
if message["role"] == "system":
|
|
125
|
+
message["role"] = "user"
|
|
126
|
+
|
|
127
|
+
# Merge consecutive messages with the same role.
|
|
128
|
+
i = 0
|
|
129
|
+
while i < len(messages) - 1:
|
|
130
|
+
current_message = messages[i]
|
|
131
|
+
next_message = messages[i + 1]
|
|
132
|
+
if current_message["role"] == next_message["role"]:
|
|
133
|
+
# Convert content to list of dictionaries if it's a string
|
|
134
|
+
if isinstance(current_message["content"], str):
|
|
135
|
+
current_message["content"] = [
|
|
136
|
+
{"type": "text", "text": current_message["content"]}
|
|
137
|
+
]
|
|
138
|
+
if isinstance(next_message["content"], str):
|
|
139
|
+
next_message["content"] = [{"type": "text", "text": next_message["content"]}]
|
|
140
|
+
# Concatenate the content
|
|
141
|
+
current_message["content"].extend(next_message["content"])
|
|
142
|
+
# Remove the next message from the list
|
|
143
|
+
messages.pop(i + 1)
|
|
144
|
+
else:
|
|
145
|
+
i += 1
|
|
146
|
+
|
|
147
|
+
# Avoid empty content in messages
|
|
148
|
+
for message in messages:
|
|
149
|
+
if isinstance(message["content"], str) and message["content"] == "":
|
|
150
|
+
message["content"] = "(empty)"
|
|
151
|
+
elif isinstance(message["content"], list) and len(message["content"]) == 0:
|
|
152
|
+
message["content"] = [{"type": "text", "text": "(empty)"}]
|
|
153
|
+
|
|
154
|
+
return self.ConvertedMessages(messages=messages, system=system)
|
|
155
|
+
|
|
156
|
+
def _from_universal_context_message(self, message: LLMContextMessage) -> dict[str, Any]:
|
|
157
|
+
if isinstance(message, LLMSpecificMessage):
|
|
158
|
+
return copy.deepcopy(message.message)
|
|
159
|
+
return self._from_standard_message(message)
|
|
160
|
+
|
|
161
|
+
def _from_standard_message(self, message: LLMStandardMessage) -> dict[str, Any]:
|
|
162
|
+
"""Convert standard format message to AWS Bedrock format.
|
|
163
|
+
|
|
164
|
+
Handles conversion of text content, tool calls, and tool results.
|
|
165
|
+
Empty text content is converted to "(empty)".
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
message: Message in standard format.
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
Message in AWS Bedrock format.
|
|
172
|
+
|
|
173
|
+
Examples:
|
|
174
|
+
Standard format input::
|
|
175
|
+
|
|
176
|
+
{
|
|
177
|
+
"role": "assistant",
|
|
178
|
+
"tool_calls": [
|
|
179
|
+
{
|
|
180
|
+
"id": "123",
|
|
181
|
+
"function": {"name": "search", "arguments": '{"q": "test"}'}
|
|
182
|
+
}
|
|
183
|
+
]
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
AWS Bedrock format output::
|
|
187
|
+
|
|
188
|
+
{
|
|
189
|
+
"role": "assistant",
|
|
190
|
+
"content": [
|
|
191
|
+
{
|
|
192
|
+
"toolUse": {
|
|
193
|
+
"toolUseId": "123",
|
|
194
|
+
"name": "search",
|
|
195
|
+
"input": {"q": "test"}
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
]
|
|
199
|
+
}
|
|
200
|
+
"""
|
|
201
|
+
message = copy.deepcopy(message)
|
|
202
|
+
if message["role"] == "tool":
|
|
203
|
+
# Try to parse the content as JSON if it looks like JSON
|
|
204
|
+
try:
|
|
205
|
+
if message["content"].strip().startswith("{") and message[
|
|
206
|
+
"content"
|
|
207
|
+
].strip().endswith("}"):
|
|
208
|
+
content_json = json.loads(message["content"])
|
|
209
|
+
tool_result_content = [{"json": content_json}]
|
|
210
|
+
else:
|
|
211
|
+
tool_result_content = [{"text": message["content"]}]
|
|
212
|
+
except:
|
|
213
|
+
tool_result_content = [{"text": message["content"]}]
|
|
214
|
+
|
|
215
|
+
return {
|
|
216
|
+
"role": "user",
|
|
217
|
+
"content": [
|
|
218
|
+
{
|
|
219
|
+
"toolResult": {
|
|
220
|
+
"toolUseId": message["tool_call_id"],
|
|
221
|
+
"content": tool_result_content,
|
|
222
|
+
},
|
|
223
|
+
},
|
|
224
|
+
],
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
if message.get("tool_calls"):
|
|
228
|
+
tc = message["tool_calls"]
|
|
229
|
+
ret = {"role": "assistant", "content": []}
|
|
230
|
+
for tool_call in tc:
|
|
231
|
+
function = tool_call["function"]
|
|
232
|
+
arguments = json.loads(function["arguments"])
|
|
233
|
+
new_tool_use = {
|
|
234
|
+
"toolUse": {
|
|
235
|
+
"toolUseId": tool_call["id"],
|
|
236
|
+
"name": function["name"],
|
|
237
|
+
"input": arguments,
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
ret["content"].append(new_tool_use)
|
|
241
|
+
return ret
|
|
242
|
+
|
|
243
|
+
# Handle text content
|
|
244
|
+
content = message.get("content")
|
|
245
|
+
if isinstance(content, str):
|
|
246
|
+
if content == "":
|
|
247
|
+
return {"role": message["role"], "content": [{"text": "(empty)"}]}
|
|
248
|
+
else:
|
|
249
|
+
return {"role": message["role"], "content": [{"text": content}]}
|
|
250
|
+
elif isinstance(content, list):
|
|
251
|
+
new_content = []
|
|
252
|
+
for item in content:
|
|
253
|
+
# fix empty text
|
|
254
|
+
if item.get("type", "") == "text":
|
|
255
|
+
text_content = item["text"] if item["text"] != "" else "(empty)"
|
|
256
|
+
new_content.append({"text": text_content})
|
|
257
|
+
# handle image_url -> image conversion
|
|
258
|
+
if item["type"] == "image_url":
|
|
259
|
+
new_item = {
|
|
260
|
+
"image": {
|
|
261
|
+
"format": "jpeg",
|
|
262
|
+
"source": {
|
|
263
|
+
"bytes": base64.b64decode(item["image_url"]["url"].split(",")[1])
|
|
264
|
+
},
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
new_content.append(new_item)
|
|
268
|
+
# In the case where there's a single image in the list (like what
|
|
269
|
+
# would result from a UserImageRawFrame), ensure that the image
|
|
270
|
+
# comes before text
|
|
271
|
+
image_indices = [i for i, item in enumerate(new_content) if "image" in item]
|
|
272
|
+
text_indices = [i for i, item in enumerate(new_content) if "text" in item]
|
|
273
|
+
if len(image_indices) == 1 and text_indices:
|
|
274
|
+
img_idx = image_indices[0]
|
|
275
|
+
first_txt_idx = text_indices[0]
|
|
276
|
+
if img_idx > first_txt_idx:
|
|
277
|
+
# Move image before the first text
|
|
278
|
+
image_item = new_content.pop(img_idx)
|
|
279
|
+
new_content.insert(first_txt_idx, image_item)
|
|
280
|
+
return {"role": message["role"], "content": new_content}
|
|
281
|
+
|
|
282
|
+
return message
|
|
60
283
|
|
|
61
284
|
@staticmethod
|
|
62
285
|
def _to_bedrock_function_format(function: FunctionSchema) -> Dict[str, Any]:
|
|
@@ -54,6 +54,11 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
|
|
|
54
54
|
- Extracting and sanitizing messages from the LLM context for logging with Gemini.
|
|
55
55
|
"""
|
|
56
56
|
|
|
57
|
+
@property
|
|
58
|
+
def id_for_llm_specific_messages(self) -> str:
|
|
59
|
+
"""Get the identifier used in LLMSpecificMessage instances for Google."""
|
|
60
|
+
return "google"
|
|
61
|
+
|
|
57
62
|
def get_llm_invocation_params(self, context: LLMContext) -> GeminiLLMInvocationParams:
|
|
58
63
|
"""Get Gemini-specific LLM invocation parameters from a universal LLM context.
|
|
59
64
|
|
|
@@ -63,7 +68,7 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
|
|
|
63
68
|
Returns:
|
|
64
69
|
Dictionary of parameters for Gemini's API.
|
|
65
70
|
"""
|
|
66
|
-
messages = self._from_universal_context_messages(self.
|
|
71
|
+
messages = self._from_universal_context_messages(self.get_messages(context))
|
|
67
72
|
return {
|
|
68
73
|
"system_instruction": messages.system_instruction,
|
|
69
74
|
"messages": messages.messages,
|
|
@@ -82,9 +87,11 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
|
|
|
82
87
|
Includes both converted standard tools and any custom Gemini-specific tools.
|
|
83
88
|
"""
|
|
84
89
|
functions_schema = tools_schema.standard_tools
|
|
85
|
-
formatted_standard_tools =
|
|
86
|
-
{"function_declarations": [func.to_default_dict() for func in functions_schema]}
|
|
87
|
-
|
|
90
|
+
formatted_standard_tools = (
|
|
91
|
+
[{"function_declarations": [func.to_default_dict() for func in functions_schema]}]
|
|
92
|
+
if functions_schema
|
|
93
|
+
else []
|
|
94
|
+
)
|
|
88
95
|
custom_gemini_tools = []
|
|
89
96
|
if tools_schema.custom_tools:
|
|
90
97
|
custom_gemini_tools = tools_schema.custom_tools.get(AdapterType.GEMINI, [])
|
|
@@ -103,7 +110,7 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
|
|
|
103
110
|
List of messages in a format ready for logging about Gemini.
|
|
104
111
|
"""
|
|
105
112
|
# Get messages in Gemini's format
|
|
106
|
-
messages = self._from_universal_context_messages(self.
|
|
113
|
+
messages = self._from_universal_context_messages(self.get_messages(context)).messages
|
|
107
114
|
|
|
108
115
|
# Sanitize messages for logging
|
|
109
116
|
messages_for_logging = []
|
|
@@ -119,9 +126,6 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
|
|
|
119
126
|
messages_for_logging.append(obj)
|
|
120
127
|
return messages_for_logging
|
|
121
128
|
|
|
122
|
-
def _get_messages(self, context: LLMContext) -> List[LLMContextMessage]:
|
|
123
|
-
return context.get_messages("google")
|
|
124
|
-
|
|
125
129
|
@dataclass
|
|
126
130
|
class ConvertedMessages:
|
|
127
131
|
"""Container for Google-formatted messages converted from universal context."""
|