dv-pipecat-ai 0.0.82.dev815__py3-none-any.whl → 0.0.82.dev857__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/METADATA +8 -3
- {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/RECORD +106 -79
- pipecat/adapters/base_llm_adapter.py +44 -6
- pipecat/adapters/services/anthropic_adapter.py +302 -2
- pipecat/adapters/services/aws_nova_sonic_adapter.py +40 -2
- pipecat/adapters/services/bedrock_adapter.py +40 -2
- pipecat/adapters/services/gemini_adapter.py +276 -6
- pipecat/adapters/services/open_ai_adapter.py +88 -7
- pipecat/adapters/services/open_ai_realtime_adapter.py +39 -1
- pipecat/audio/dtmf/__init__.py +0 -0
- pipecat/audio/dtmf/types.py +47 -0
- pipecat/audio/dtmf/utils.py +70 -0
- pipecat/audio/filters/aic_filter.py +199 -0
- pipecat/audio/utils.py +9 -7
- pipecat/extensions/ivr/__init__.py +0 -0
- pipecat/extensions/ivr/ivr_navigator.py +452 -0
- pipecat/frames/frames.py +156 -43
- pipecat/pipeline/llm_switcher.py +76 -0
- pipecat/pipeline/parallel_pipeline.py +3 -3
- pipecat/pipeline/service_switcher.py +144 -0
- pipecat/pipeline/task.py +68 -28
- pipecat/pipeline/task_observer.py +10 -0
- pipecat/processors/aggregators/dtmf_aggregator.py +2 -2
- pipecat/processors/aggregators/llm_context.py +277 -0
- pipecat/processors/aggregators/llm_response.py +48 -15
- pipecat/processors/aggregators/llm_response_universal.py +840 -0
- pipecat/processors/aggregators/openai_llm_context.py +3 -3
- pipecat/processors/dtmf_aggregator.py +0 -2
- pipecat/processors/filters/stt_mute_filter.py +0 -2
- pipecat/processors/frame_processor.py +18 -11
- pipecat/processors/frameworks/rtvi.py +17 -10
- pipecat/processors/metrics/sentry.py +2 -0
- pipecat/runner/daily.py +137 -36
- pipecat/runner/run.py +1 -1
- pipecat/runner/utils.py +7 -7
- pipecat/serializers/asterisk.py +20 -4
- pipecat/serializers/exotel.py +1 -1
- pipecat/serializers/plivo.py +1 -1
- pipecat/serializers/telnyx.py +1 -1
- pipecat/serializers/twilio.py +1 -1
- pipecat/services/__init__.py +2 -2
- pipecat/services/anthropic/llm.py +113 -28
- pipecat/services/asyncai/tts.py +4 -0
- pipecat/services/aws/llm.py +82 -8
- pipecat/services/aws/tts.py +0 -10
- pipecat/services/aws_nova_sonic/aws.py +5 -0
- pipecat/services/cartesia/tts.py +28 -16
- pipecat/services/cerebras/llm.py +15 -10
- pipecat/services/deepgram/stt.py +8 -0
- pipecat/services/deepseek/llm.py +13 -8
- pipecat/services/fireworks/llm.py +13 -8
- pipecat/services/fish/tts.py +8 -6
- pipecat/services/gemini_multimodal_live/gemini.py +5 -0
- pipecat/services/gladia/config.py +7 -1
- pipecat/services/gladia/stt.py +23 -15
- pipecat/services/google/llm.py +159 -59
- pipecat/services/google/llm_openai.py +18 -3
- pipecat/services/grok/llm.py +2 -1
- pipecat/services/llm_service.py +38 -3
- pipecat/services/mem0/memory.py +2 -1
- pipecat/services/mistral/llm.py +5 -6
- pipecat/services/nim/llm.py +2 -1
- pipecat/services/openai/base_llm.py +88 -26
- pipecat/services/openai/image.py +6 -1
- pipecat/services/openai_realtime_beta/openai.py +5 -2
- pipecat/services/openpipe/llm.py +6 -8
- pipecat/services/perplexity/llm.py +13 -8
- pipecat/services/playht/tts.py +9 -6
- pipecat/services/rime/tts.py +1 -1
- pipecat/services/sambanova/llm.py +18 -13
- pipecat/services/sarvam/tts.py +415 -10
- pipecat/services/speechmatics/stt.py +2 -2
- pipecat/services/tavus/video.py +1 -1
- pipecat/services/tts_service.py +15 -5
- pipecat/services/vistaar/llm.py +2 -5
- pipecat/transports/base_input.py +32 -19
- pipecat/transports/base_output.py +39 -5
- pipecat/transports/daily/__init__.py +0 -0
- pipecat/transports/daily/transport.py +2371 -0
- pipecat/transports/daily/utils.py +410 -0
- pipecat/transports/livekit/__init__.py +0 -0
- pipecat/transports/livekit/transport.py +1042 -0
- pipecat/transports/network/fastapi_websocket.py +12 -546
- pipecat/transports/network/small_webrtc.py +12 -922
- pipecat/transports/network/webrtc_connection.py +9 -595
- pipecat/transports/network/websocket_client.py +12 -481
- pipecat/transports/network/websocket_server.py +12 -487
- pipecat/transports/services/daily.py +9 -2334
- pipecat/transports/services/helpers/daily_rest.py +12 -396
- pipecat/transports/services/livekit.py +12 -975
- pipecat/transports/services/tavus.py +12 -757
- pipecat/transports/smallwebrtc/__init__.py +0 -0
- pipecat/transports/smallwebrtc/connection.py +612 -0
- pipecat/transports/smallwebrtc/transport.py +936 -0
- pipecat/transports/tavus/__init__.py +0 -0
- pipecat/transports/tavus/transport.py +770 -0
- pipecat/transports/websocket/__init__.py +0 -0
- pipecat/transports/websocket/client.py +494 -0
- pipecat/transports/websocket/fastapi.py +559 -0
- pipecat/transports/websocket/server.py +500 -0
- pipecat/transports/whatsapp/__init__.py +0 -0
- pipecat/transports/whatsapp/api.py +345 -0
- pipecat/transports/whatsapp/client.py +364 -0
- {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/top_level.txt +0 -0
|
@@ -6,20 +6,71 @@
|
|
|
6
6
|
|
|
7
7
|
"""Gemini LLM adapter for Pipecat."""
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
import base64
|
|
10
|
+
import json
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from typing import Any, Dict, List, Optional, TypedDict
|
|
13
|
+
|
|
14
|
+
from loguru import logger
|
|
15
|
+
from openai import NotGiven
|
|
10
16
|
|
|
11
17
|
from pipecat.adapters.base_llm_adapter import BaseLLMAdapter
|
|
12
18
|
from pipecat.adapters.schemas.tools_schema import AdapterType, ToolsSchema
|
|
19
|
+
from pipecat.processors.aggregators.llm_context import (
|
|
20
|
+
LLMContext,
|
|
21
|
+
LLMContextMessage,
|
|
22
|
+
LLMSpecificMessage,
|
|
23
|
+
LLMStandardMessage,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
from google.genai.types import (
|
|
28
|
+
Blob,
|
|
29
|
+
Content,
|
|
30
|
+
FunctionCall,
|
|
31
|
+
FunctionResponse,
|
|
32
|
+
Part,
|
|
33
|
+
)
|
|
34
|
+
except ModuleNotFoundError as e:
|
|
35
|
+
logger.error(f"Exception: {e}")
|
|
36
|
+
logger.error("In order to use Google AI, you need to `pip install pipecat-ai[google]`.")
|
|
37
|
+
raise Exception(f"Missing module: {e}")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class GeminiLLMInvocationParams(TypedDict):
|
|
41
|
+
"""Context-based parameters for invoking Gemini LLM."""
|
|
13
42
|
|
|
43
|
+
system_instruction: Optional[str]
|
|
44
|
+
messages: List[Content]
|
|
45
|
+
tools: List[Any] | NotGiven
|
|
14
46
|
|
|
15
|
-
class GeminiLLMAdapter(BaseLLMAdapter):
|
|
16
|
-
"""LLM adapter for Google's Gemini service.
|
|
17
47
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
48
|
+
class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
|
|
49
|
+
"""Gemini-specific adapter for Pipecat.
|
|
50
|
+
|
|
51
|
+
Handles:
|
|
52
|
+
- Extracting parameters for Gemini's API from a universal LLM context
|
|
53
|
+
- Converting Pipecat's standardized tools schema to Gemini's function-calling format.
|
|
54
|
+
- Extracting and sanitizing messages from the LLM context for logging with Gemini.
|
|
21
55
|
"""
|
|
22
56
|
|
|
57
|
+
def get_llm_invocation_params(self, context: LLMContext) -> GeminiLLMInvocationParams:
|
|
58
|
+
"""Get Gemini-specific LLM invocation parameters from a universal LLM context.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
context: The LLM context containing messages, tools, etc.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Dictionary of parameters for Gemini's API.
|
|
65
|
+
"""
|
|
66
|
+
messages = self._from_universal_context_messages(self._get_messages(context))
|
|
67
|
+
return {
|
|
68
|
+
"system_instruction": messages.system_instruction,
|
|
69
|
+
"messages": messages.messages,
|
|
70
|
+
# NOTE: LLMContext's tools are guaranteed to be a ToolsSchema (or NOT_GIVEN)
|
|
71
|
+
"tools": self.from_standard_tools(context.tools),
|
|
72
|
+
}
|
|
73
|
+
|
|
23
74
|
def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Dict[str, Any]]:
|
|
24
75
|
"""Convert tool schemas to Gemini's function-calling format.
|
|
25
76
|
|
|
@@ -39,3 +90,222 @@ class GeminiLLMAdapter(BaseLLMAdapter):
|
|
|
39
90
|
custom_gemini_tools = tools_schema.custom_tools.get(AdapterType.GEMINI, [])
|
|
40
91
|
|
|
41
92
|
return formatted_standard_tools + custom_gemini_tools
|
|
93
|
+
|
|
94
|
+
def get_messages_for_logging(self, context: LLMContext) -> List[Dict[str, Any]]:
|
|
95
|
+
"""Get messages from a universal LLM context in a format ready for logging about Gemini.
|
|
96
|
+
|
|
97
|
+
Removes or truncates sensitive data like image content for safe logging.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
context: The LLM context containing messages.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
List of messages in a format ready for logging about Gemini.
|
|
104
|
+
"""
|
|
105
|
+
# Get messages in Gemini's format
|
|
106
|
+
messages = self._from_universal_context_messages(self._get_messages(context)).messages
|
|
107
|
+
|
|
108
|
+
# Sanitize messages for logging
|
|
109
|
+
messages_for_logging = []
|
|
110
|
+
for message in messages:
|
|
111
|
+
obj = message.to_json_dict()
|
|
112
|
+
try:
|
|
113
|
+
if "parts" in obj:
|
|
114
|
+
for part in obj["parts"]:
|
|
115
|
+
if "inline_data" in part:
|
|
116
|
+
part["inline_data"]["data"] = "..."
|
|
117
|
+
except Exception as e:
|
|
118
|
+
logger.debug(f"Error: {e}")
|
|
119
|
+
messages_for_logging.append(obj)
|
|
120
|
+
return messages_for_logging
|
|
121
|
+
|
|
122
|
+
def _get_messages(self, context: LLMContext) -> List[LLMContextMessage]:
|
|
123
|
+
return context.get_messages("google")
|
|
124
|
+
|
|
125
|
+
@dataclass
|
|
126
|
+
class ConvertedMessages:
|
|
127
|
+
"""Container for Google-formatted messages converted from universal context."""
|
|
128
|
+
|
|
129
|
+
messages: List[Content]
|
|
130
|
+
system_instruction: Optional[str] = None
|
|
131
|
+
|
|
132
|
+
def _from_universal_context_messages(
|
|
133
|
+
self, universal_context_messages: List[LLMContextMessage]
|
|
134
|
+
) -> ConvertedMessages:
|
|
135
|
+
"""Restructures messages to ensure proper Google format and message ordering.
|
|
136
|
+
|
|
137
|
+
This method handles conversion of OpenAI-formatted messages to Google format,
|
|
138
|
+
with special handling for function calls, function responses, and system messages.
|
|
139
|
+
System messages are added back to the context as user messages when needed.
|
|
140
|
+
|
|
141
|
+
The final message order is preserved as:
|
|
142
|
+
|
|
143
|
+
1. Function calls (from model)
|
|
144
|
+
2. Function responses (from user)
|
|
145
|
+
3. Text messages (converted from system messages)
|
|
146
|
+
|
|
147
|
+
Note::
|
|
148
|
+
|
|
149
|
+
System messages are only added back when there are no regular text
|
|
150
|
+
messages in the context, ensuring proper conversation continuity
|
|
151
|
+
after function calls.
|
|
152
|
+
"""
|
|
153
|
+
system_instruction = None
|
|
154
|
+
messages = []
|
|
155
|
+
|
|
156
|
+
# Process each message, preserving Google-formatted messages and converting others
|
|
157
|
+
for message in universal_context_messages:
|
|
158
|
+
if isinstance(message, LLMSpecificMessage):
|
|
159
|
+
# Assume that LLMSpecificMessage wraps a message in Google format
|
|
160
|
+
messages.append(message.message)
|
|
161
|
+
continue
|
|
162
|
+
|
|
163
|
+
# Convert standard format to Google format
|
|
164
|
+
converted = self._from_standard_message(
|
|
165
|
+
message, already_have_system_instruction=bool(system_instruction)
|
|
166
|
+
)
|
|
167
|
+
if isinstance(converted, Content):
|
|
168
|
+
# Regular (non-system) message
|
|
169
|
+
messages.append(converted)
|
|
170
|
+
else:
|
|
171
|
+
# System instruction
|
|
172
|
+
system_instruction = converted
|
|
173
|
+
|
|
174
|
+
# Check if we only have function-related messages (no regular text)
|
|
175
|
+
has_regular_messages = any(
|
|
176
|
+
len(msg.parts) == 1
|
|
177
|
+
and getattr(msg.parts[0], "text", None)
|
|
178
|
+
and not getattr(msg.parts[0], "function_call", None)
|
|
179
|
+
and not getattr(msg.parts[0], "function_response", None)
|
|
180
|
+
for msg in messages
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# Add system instruction back as a user message if we only have function messages
|
|
184
|
+
if system_instruction and not has_regular_messages:
|
|
185
|
+
messages.append(Content(role="user", parts=[Part(text=system_instruction)]))
|
|
186
|
+
|
|
187
|
+
# Remove any empty messages
|
|
188
|
+
messages = [m for m in messages if m.parts]
|
|
189
|
+
|
|
190
|
+
return self.ConvertedMessages(messages=messages, system_instruction=system_instruction)
|
|
191
|
+
|
|
192
|
+
def _from_standard_message(
|
|
193
|
+
self, message: LLMStandardMessage, already_have_system_instruction: bool
|
|
194
|
+
) -> Content | str:
|
|
195
|
+
"""Convert standard universal context message to Google Content object.
|
|
196
|
+
|
|
197
|
+
Handles conversion of text, images, and function calls to Google's
|
|
198
|
+
format.
|
|
199
|
+
System instructions are returned as a plain string.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
message: Message in standard universal context format.
|
|
203
|
+
already_have_system_instruction: Whether we already have a system instruction
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
Content object with role and parts, or a plain string for system
|
|
207
|
+
messages.
|
|
208
|
+
|
|
209
|
+
Examples:
|
|
210
|
+
Standard text message::
|
|
211
|
+
|
|
212
|
+
{
|
|
213
|
+
"role": "user",
|
|
214
|
+
"content": "Hello there"
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
Converts to Google Content with::
|
|
218
|
+
|
|
219
|
+
Content(
|
|
220
|
+
role="user",
|
|
221
|
+
parts=[Part(text="Hello there")]
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
Standard function call message::
|
|
225
|
+
|
|
226
|
+
{
|
|
227
|
+
"role": "assistant",
|
|
228
|
+
"tool_calls": [
|
|
229
|
+
{
|
|
230
|
+
"function": {
|
|
231
|
+
"name": "search",
|
|
232
|
+
"arguments": '{"query": "test"}'
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
]
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
Converts to Google Content with::
|
|
239
|
+
|
|
240
|
+
Content(
|
|
241
|
+
role="model",
|
|
242
|
+
parts=[Part(function_call=FunctionCall(name="search", args={"query": "test"}))]
|
|
243
|
+
)
|
|
244
|
+
"""
|
|
245
|
+
role = message["role"]
|
|
246
|
+
content = message.get("content", [])
|
|
247
|
+
if role == "system":
|
|
248
|
+
if already_have_system_instruction:
|
|
249
|
+
role = "user" # Convert system message to user role if we already have a system instruction
|
|
250
|
+
else:
|
|
251
|
+
# System instructions are returned as plain text
|
|
252
|
+
if isinstance(content, str):
|
|
253
|
+
return content
|
|
254
|
+
elif isinstance(content, list):
|
|
255
|
+
# If content is a list, we assume it's a list of text parts, per the standard
|
|
256
|
+
return " ".join(part["text"] for part in content if part.get("type") == "text")
|
|
257
|
+
elif role == "assistant":
|
|
258
|
+
role = "model"
|
|
259
|
+
|
|
260
|
+
parts = []
|
|
261
|
+
if message.get("tool_calls"):
|
|
262
|
+
for tc in message["tool_calls"]:
|
|
263
|
+
parts.append(
|
|
264
|
+
Part(
|
|
265
|
+
function_call=FunctionCall(
|
|
266
|
+
name=tc["function"]["name"],
|
|
267
|
+
args=json.loads(tc["function"]["arguments"]),
|
|
268
|
+
)
|
|
269
|
+
)
|
|
270
|
+
)
|
|
271
|
+
elif role == "tool":
|
|
272
|
+
role = "model"
|
|
273
|
+
try:
|
|
274
|
+
response = json.loads(message["content"])
|
|
275
|
+
if isinstance(response, dict):
|
|
276
|
+
response_dict = response
|
|
277
|
+
else:
|
|
278
|
+
response_dict = {"value": response}
|
|
279
|
+
except Exception as e:
|
|
280
|
+
# Response might not be JSON-deserializable.
|
|
281
|
+
# This occurs with a UserImageFrame, for example, where we get a plain "COMPLETED" string.
|
|
282
|
+
response_dict = {"value": message["content"]}
|
|
283
|
+
parts.append(
|
|
284
|
+
Part(
|
|
285
|
+
function_response=FunctionResponse(
|
|
286
|
+
name="tool_call_result", # seems to work to hard-code the same name every time
|
|
287
|
+
response=response_dict,
|
|
288
|
+
)
|
|
289
|
+
)
|
|
290
|
+
)
|
|
291
|
+
elif isinstance(content, str):
|
|
292
|
+
parts.append(Part(text=content))
|
|
293
|
+
elif isinstance(content, list):
|
|
294
|
+
for c in content:
|
|
295
|
+
if c["type"] == "text":
|
|
296
|
+
parts.append(Part(text=c["text"]))
|
|
297
|
+
elif c["type"] == "image_url":
|
|
298
|
+
parts.append(
|
|
299
|
+
Part(
|
|
300
|
+
inline_data=Blob(
|
|
301
|
+
mime_type="image/jpeg",
|
|
302
|
+
data=base64.b64decode(c["image_url"]["url"].split(",")[1]),
|
|
303
|
+
)
|
|
304
|
+
)
|
|
305
|
+
)
|
|
306
|
+
elif c["type"] == "input_audio":
|
|
307
|
+
input_audio = c["input_audio"]
|
|
308
|
+
audio_bytes = base64.b64decode(input_audio["data"])
|
|
309
|
+
parts.append(Part(inline_data=Blob(mime_type="audio/wav", data=audio_bytes)))
|
|
310
|
+
|
|
311
|
+
return Content(role=role, parts=parts)
|
|
@@ -6,22 +6,63 @@
|
|
|
6
6
|
|
|
7
7
|
"""OpenAI LLM adapter for Pipecat."""
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
import copy
|
|
10
|
+
import json
|
|
11
|
+
from typing import Any, Dict, List, TypedDict
|
|
10
12
|
|
|
11
|
-
from openai.
|
|
13
|
+
from openai._types import NOT_GIVEN as OPEN_AI_NOT_GIVEN
|
|
14
|
+
from openai._types import NotGiven as OpenAINotGiven
|
|
15
|
+
from openai.types.chat import (
|
|
16
|
+
ChatCompletionMessageParam,
|
|
17
|
+
ChatCompletionToolChoiceOptionParam,
|
|
18
|
+
ChatCompletionToolParam,
|
|
19
|
+
)
|
|
12
20
|
|
|
13
21
|
from pipecat.adapters.base_llm_adapter import BaseLLMAdapter
|
|
14
22
|
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
|
23
|
+
from pipecat.processors.aggregators.llm_context import (
|
|
24
|
+
LLMContext,
|
|
25
|
+
LLMContextMessage,
|
|
26
|
+
LLMContextToolChoice,
|
|
27
|
+
NotGiven,
|
|
28
|
+
)
|
|
15
29
|
|
|
16
30
|
|
|
17
|
-
class
|
|
18
|
-
"""
|
|
31
|
+
class OpenAILLMInvocationParams(TypedDict):
|
|
32
|
+
"""Context-based parameters for invoking OpenAI ChatCompletion API."""
|
|
19
33
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
34
|
+
messages: List[ChatCompletionMessageParam]
|
|
35
|
+
tools: List[ChatCompletionToolParam] | OpenAINotGiven
|
|
36
|
+
tool_choice: ChatCompletionToolChoiceOptionParam | OpenAINotGiven
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class OpenAILLMAdapter(BaseLLMAdapter[OpenAILLMInvocationParams]):
|
|
40
|
+
"""OpenAI-specific adapter for Pipecat.
|
|
41
|
+
|
|
42
|
+
Handles:
|
|
43
|
+
|
|
44
|
+
- Extracting parameters for OpenAI's ChatCompletion API from a universal
|
|
45
|
+
LLM context
|
|
46
|
+
- Converting Pipecat's standardized tools schema to OpenAI's function-calling format.
|
|
47
|
+
- Extracting and sanitizing messages from the LLM context for logging about OpenAI.
|
|
23
48
|
"""
|
|
24
49
|
|
|
50
|
+
def get_llm_invocation_params(self, context: LLMContext) -> OpenAILLMInvocationParams:
|
|
51
|
+
"""Get OpenAI-specific LLM invocation parameters from a universal LLM context.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
context: The LLM context containing messages, tools, etc.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
Dictionary of parameters for OpenAI's ChatCompletion API.
|
|
58
|
+
"""
|
|
59
|
+
return {
|
|
60
|
+
"messages": self._from_universal_context_messages(self._get_messages(context)),
|
|
61
|
+
# NOTE; LLMContext's tools are guaranteed to be a ToolsSchema (or NOT_GIVEN)
|
|
62
|
+
"tools": self.from_standard_tools(context.tools),
|
|
63
|
+
"tool_choice": context.tool_choice,
|
|
64
|
+
}
|
|
65
|
+
|
|
25
66
|
def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[ChatCompletionToolParam]:
|
|
26
67
|
"""Convert function schemas to OpenAI's function-calling format.
|
|
27
68
|
|
|
@@ -37,3 +78,43 @@ class OpenAILLMAdapter(BaseLLMAdapter):
|
|
|
37
78
|
ChatCompletionToolParam(type="function", function=func.to_default_dict())
|
|
38
79
|
for func in functions_schema
|
|
39
80
|
]
|
|
81
|
+
|
|
82
|
+
def get_messages_for_logging(self, context: LLMContext) -> List[Dict[str, Any]]:
|
|
83
|
+
"""Get messages from a universal LLM context in a format ready for logging about OpenAI.
|
|
84
|
+
|
|
85
|
+
Removes or truncates sensitive data like image content for safe logging.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
context: The LLM context containing messages.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
List of messages in a format ready for logging about OpenAI.
|
|
92
|
+
"""
|
|
93
|
+
msgs = []
|
|
94
|
+
for message in self._get_messages(context):
|
|
95
|
+
msg = copy.deepcopy(message)
|
|
96
|
+
if "content" in msg:
|
|
97
|
+
if isinstance(msg["content"], list):
|
|
98
|
+
for item in msg["content"]:
|
|
99
|
+
if item["type"] == "image_url":
|
|
100
|
+
if item["image_url"]["url"].startswith("data:image/"):
|
|
101
|
+
item["image_url"]["url"] = "data:image/..."
|
|
102
|
+
if "mime_type" in msg and msg["mime_type"].startswith("image/"):
|
|
103
|
+
msg["data"] = "..."
|
|
104
|
+
msgs.append(msg)
|
|
105
|
+
return msgs
|
|
106
|
+
|
|
107
|
+
def _get_messages(self, context: LLMContext) -> List[LLMContextMessage]:
|
|
108
|
+
return context.get_messages("openai")
|
|
109
|
+
|
|
110
|
+
def _from_universal_context_messages(
|
|
111
|
+
self, messages: List[LLMContextMessage]
|
|
112
|
+
) -> List[ChatCompletionMessageParam]:
|
|
113
|
+
# Just a pass-through: messages are already the right type
|
|
114
|
+
return messages
|
|
115
|
+
|
|
116
|
+
def _from_standard_tool_choice(
|
|
117
|
+
self, tool_choice: LLMContextToolChoice | NotGiven
|
|
118
|
+
) -> ChatCompletionToolChoiceOptionParam | OpenAINotGiven:
|
|
119
|
+
# Just a pass-through: tool_choice is already the right type
|
|
120
|
+
return tool_choice
|
|
@@ -6,11 +6,21 @@
|
|
|
6
6
|
|
|
7
7
|
"""OpenAI Realtime LLM adapter for Pipecat."""
|
|
8
8
|
|
|
9
|
-
from typing import Any, Dict, List,
|
|
9
|
+
from typing import Any, Dict, List, TypedDict
|
|
10
10
|
|
|
11
11
|
from pipecat.adapters.base_llm_adapter import BaseLLMAdapter
|
|
12
12
|
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
|
13
13
|
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
|
14
|
+
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class OpenAIRealtimeLLMInvocationParams(TypedDict):
|
|
18
|
+
"""Context-based parameters for invoking OpenAI Realtime API.
|
|
19
|
+
|
|
20
|
+
This is a placeholder until support for universal LLMContext machinery is added for OpenAI Realtime.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
pass
|
|
14
24
|
|
|
15
25
|
|
|
16
26
|
class OpenAIRealtimeLLMAdapter(BaseLLMAdapter):
|
|
@@ -20,6 +30,34 @@ class OpenAIRealtimeLLMAdapter(BaseLLMAdapter):
|
|
|
20
30
|
OpenAI's Realtime API for function calling capabilities.
|
|
21
31
|
"""
|
|
22
32
|
|
|
33
|
+
def get_llm_invocation_params(self, context: LLMContext) -> OpenAIRealtimeLLMInvocationParams:
|
|
34
|
+
"""Get OpenAI Realtime-specific LLM invocation parameters from a universal LLM context.
|
|
35
|
+
|
|
36
|
+
This is a placeholder until support for universal LLMContext machinery is added for OpenAI Realtime.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
context: The LLM context containing messages, tools, etc.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Dictionary of parameters for invoking OpenAI Realtime's API.
|
|
43
|
+
"""
|
|
44
|
+
raise NotImplementedError("Universal LLMContext is not yet supported for OpenAI Realtime.")
|
|
45
|
+
|
|
46
|
+
def get_messages_for_logging(self, context) -> List[Dict[str, Any]]:
|
|
47
|
+
"""Get messages from a universal LLM context in a format ready for logging about OpenAI Realtime.
|
|
48
|
+
|
|
49
|
+
Removes or truncates sensitive data like image content for safe logging.
|
|
50
|
+
|
|
51
|
+
This is a placeholder until support for universal LLMContext machinery is added for OpenAI Realtime.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
context: The LLM context containing messages.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
List of messages in a format ready for logging about OpenAI Realtime.
|
|
58
|
+
"""
|
|
59
|
+
raise NotImplementedError("Universal LLMContext is not yet supported for OpenAI Realtime.")
|
|
60
|
+
|
|
23
61
|
@staticmethod
|
|
24
62
|
def _to_openai_realtime_function_format(function: FunctionSchema) -> Dict[str, Any]:
|
|
25
63
|
"""Convert a function schema to OpenAI Realtime format.
|
|
File without changes
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Copyright (c) 2024–2025, Daily
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: BSD 2-Clause License
|
|
4
|
+
#
|
|
5
|
+
|
|
6
|
+
"""This module defines generic type for DTMS.
|
|
7
|
+
|
|
8
|
+
It defines the `KeypadEntry` enumeration, representing dual-tone multi-frequency
|
|
9
|
+
(DTMF) keypad entries for phone system integration. Each entry corresponds to a
|
|
10
|
+
key on the telephone keypad, facilitating the handling of input in
|
|
11
|
+
telecommunication applications.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from enum import Enum
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class KeypadEntry(str, Enum):
|
|
18
|
+
"""DTMF keypad entries for phone system integration.
|
|
19
|
+
|
|
20
|
+
Parameters:
|
|
21
|
+
ONE: Number key 1.
|
|
22
|
+
TWO: Number key 2.
|
|
23
|
+
THREE: Number key 3.
|
|
24
|
+
FOUR: Number key 4.
|
|
25
|
+
FIVE: Number key 5.
|
|
26
|
+
SIX: Number key 6.
|
|
27
|
+
SEVEN: Number key 7.
|
|
28
|
+
EIGHT: Number key 8.
|
|
29
|
+
NINE: Number key 9.
|
|
30
|
+
ZERO: Number key 0.
|
|
31
|
+
POUND: Pound/hash key (#).
|
|
32
|
+
STAR: Star/asterisk key (*).
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
ONE = "1"
|
|
36
|
+
TWO = "2"
|
|
37
|
+
THREE = "3"
|
|
38
|
+
FOUR = "4"
|
|
39
|
+
FIVE = "5"
|
|
40
|
+
SIX = "6"
|
|
41
|
+
SEVEN = "7"
|
|
42
|
+
EIGHT = "8"
|
|
43
|
+
NINE = "9"
|
|
44
|
+
ZERO = "0"
|
|
45
|
+
|
|
46
|
+
POUND = "#"
|
|
47
|
+
STAR = "*"
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2024–2025, Daily
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""DTMF audio utilities.
|
|
8
|
+
|
|
9
|
+
This module provides functionality to load DTMF (Dual-Tone Multi-Frequency)
|
|
10
|
+
audio files corresponding to phone keypad entries. Audio data is cached
|
|
11
|
+
in-memory after first load to improve performance on subsequent accesses.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import asyncio
|
|
15
|
+
import io
|
|
16
|
+
import wave
|
|
17
|
+
from importlib.resources import files
|
|
18
|
+
from typing import Dict, Optional
|
|
19
|
+
|
|
20
|
+
import aiofiles
|
|
21
|
+
|
|
22
|
+
from pipecat.audio.dtmf.types import KeypadEntry
|
|
23
|
+
from pipecat.audio.resamplers.base_audio_resampler import BaseAudioResampler
|
|
24
|
+
from pipecat.audio.utils import create_file_resampler
|
|
25
|
+
|
|
26
|
+
__DTMF_LOCK__ = asyncio.Lock()
|
|
27
|
+
__DTMF_AUDIO__: Dict[KeypadEntry, bytes] = {}
|
|
28
|
+
__DTMF_RESAMPLER__: Optional[BaseAudioResampler] = None
|
|
29
|
+
|
|
30
|
+
__DTMF_FILE_NAME = {
|
|
31
|
+
KeypadEntry.POUND: "dtmf-pound.wav",
|
|
32
|
+
KeypadEntry.STAR: "dtmf-star.wav",
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
async def load_dtmf_audio(button: KeypadEntry, *, sample_rate: int = 8000) -> bytes:
|
|
37
|
+
"""Load audio for DTMF tones associated with the given button.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
button (KeypadEntry): The button for which the DTMF audio is to be loaded.
|
|
41
|
+
sample_rate (int, optional): The sample rate for the audio. Defaults to 8000.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
bytes: The audio data for the DTMF tone as bytes.
|
|
45
|
+
"""
|
|
46
|
+
global __DTMF_AUDIO__, __DTMF_RESAMPLER__
|
|
47
|
+
|
|
48
|
+
async with __DTMF_LOCK__:
|
|
49
|
+
if button in __DTMF_AUDIO__:
|
|
50
|
+
return __DTMF_AUDIO__[button]
|
|
51
|
+
|
|
52
|
+
if not __DTMF_RESAMPLER__:
|
|
53
|
+
__DTMF_RESAMPLER__ = create_file_resampler()
|
|
54
|
+
|
|
55
|
+
dtmf_file_name = __DTMF_FILE_NAME.get(button, f"dtmf-{button.value}.wav")
|
|
56
|
+
dtmf_file_path = files("pipecat.audio.dtmf").joinpath(dtmf_file_name)
|
|
57
|
+
|
|
58
|
+
async with aiofiles.open(dtmf_file_path, "rb") as f:
|
|
59
|
+
data = await f.read()
|
|
60
|
+
|
|
61
|
+
with io.BytesIO(data) as buffer:
|
|
62
|
+
with wave.open(buffer, "rb") as wf:
|
|
63
|
+
audio = wf.readframes(wf.getnframes())
|
|
64
|
+
in_sample_rate = wf.getframerate()
|
|
65
|
+
resampled_audio = await __DTMF_RESAMPLER__.resample(
|
|
66
|
+
audio, in_sample_rate, sample_rate
|
|
67
|
+
)
|
|
68
|
+
__DTMF_AUDIO__[button] = resampled_audio
|
|
69
|
+
|
|
70
|
+
return __DTMF_AUDIO__[button]
|