dv-pipecat-ai 0.0.82.dev815__py3-none-any.whl → 0.0.82.dev857__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (106) hide show
  1. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/METADATA +8 -3
  2. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/RECORD +106 -79
  3. pipecat/adapters/base_llm_adapter.py +44 -6
  4. pipecat/adapters/services/anthropic_adapter.py +302 -2
  5. pipecat/adapters/services/aws_nova_sonic_adapter.py +40 -2
  6. pipecat/adapters/services/bedrock_adapter.py +40 -2
  7. pipecat/adapters/services/gemini_adapter.py +276 -6
  8. pipecat/adapters/services/open_ai_adapter.py +88 -7
  9. pipecat/adapters/services/open_ai_realtime_adapter.py +39 -1
  10. pipecat/audio/dtmf/__init__.py +0 -0
  11. pipecat/audio/dtmf/types.py +47 -0
  12. pipecat/audio/dtmf/utils.py +70 -0
  13. pipecat/audio/filters/aic_filter.py +199 -0
  14. pipecat/audio/utils.py +9 -7
  15. pipecat/extensions/ivr/__init__.py +0 -0
  16. pipecat/extensions/ivr/ivr_navigator.py +452 -0
  17. pipecat/frames/frames.py +156 -43
  18. pipecat/pipeline/llm_switcher.py +76 -0
  19. pipecat/pipeline/parallel_pipeline.py +3 -3
  20. pipecat/pipeline/service_switcher.py +144 -0
  21. pipecat/pipeline/task.py +68 -28
  22. pipecat/pipeline/task_observer.py +10 -0
  23. pipecat/processors/aggregators/dtmf_aggregator.py +2 -2
  24. pipecat/processors/aggregators/llm_context.py +277 -0
  25. pipecat/processors/aggregators/llm_response.py +48 -15
  26. pipecat/processors/aggregators/llm_response_universal.py +840 -0
  27. pipecat/processors/aggregators/openai_llm_context.py +3 -3
  28. pipecat/processors/dtmf_aggregator.py +0 -2
  29. pipecat/processors/filters/stt_mute_filter.py +0 -2
  30. pipecat/processors/frame_processor.py +18 -11
  31. pipecat/processors/frameworks/rtvi.py +17 -10
  32. pipecat/processors/metrics/sentry.py +2 -0
  33. pipecat/runner/daily.py +137 -36
  34. pipecat/runner/run.py +1 -1
  35. pipecat/runner/utils.py +7 -7
  36. pipecat/serializers/asterisk.py +20 -4
  37. pipecat/serializers/exotel.py +1 -1
  38. pipecat/serializers/plivo.py +1 -1
  39. pipecat/serializers/telnyx.py +1 -1
  40. pipecat/serializers/twilio.py +1 -1
  41. pipecat/services/__init__.py +2 -2
  42. pipecat/services/anthropic/llm.py +113 -28
  43. pipecat/services/asyncai/tts.py +4 -0
  44. pipecat/services/aws/llm.py +82 -8
  45. pipecat/services/aws/tts.py +0 -10
  46. pipecat/services/aws_nova_sonic/aws.py +5 -0
  47. pipecat/services/cartesia/tts.py +28 -16
  48. pipecat/services/cerebras/llm.py +15 -10
  49. pipecat/services/deepgram/stt.py +8 -0
  50. pipecat/services/deepseek/llm.py +13 -8
  51. pipecat/services/fireworks/llm.py +13 -8
  52. pipecat/services/fish/tts.py +8 -6
  53. pipecat/services/gemini_multimodal_live/gemini.py +5 -0
  54. pipecat/services/gladia/config.py +7 -1
  55. pipecat/services/gladia/stt.py +23 -15
  56. pipecat/services/google/llm.py +159 -59
  57. pipecat/services/google/llm_openai.py +18 -3
  58. pipecat/services/grok/llm.py +2 -1
  59. pipecat/services/llm_service.py +38 -3
  60. pipecat/services/mem0/memory.py +2 -1
  61. pipecat/services/mistral/llm.py +5 -6
  62. pipecat/services/nim/llm.py +2 -1
  63. pipecat/services/openai/base_llm.py +88 -26
  64. pipecat/services/openai/image.py +6 -1
  65. pipecat/services/openai_realtime_beta/openai.py +5 -2
  66. pipecat/services/openpipe/llm.py +6 -8
  67. pipecat/services/perplexity/llm.py +13 -8
  68. pipecat/services/playht/tts.py +9 -6
  69. pipecat/services/rime/tts.py +1 -1
  70. pipecat/services/sambanova/llm.py +18 -13
  71. pipecat/services/sarvam/tts.py +415 -10
  72. pipecat/services/speechmatics/stt.py +2 -2
  73. pipecat/services/tavus/video.py +1 -1
  74. pipecat/services/tts_service.py +15 -5
  75. pipecat/services/vistaar/llm.py +2 -5
  76. pipecat/transports/base_input.py +32 -19
  77. pipecat/transports/base_output.py +39 -5
  78. pipecat/transports/daily/__init__.py +0 -0
  79. pipecat/transports/daily/transport.py +2371 -0
  80. pipecat/transports/daily/utils.py +410 -0
  81. pipecat/transports/livekit/__init__.py +0 -0
  82. pipecat/transports/livekit/transport.py +1042 -0
  83. pipecat/transports/network/fastapi_websocket.py +12 -546
  84. pipecat/transports/network/small_webrtc.py +12 -922
  85. pipecat/transports/network/webrtc_connection.py +9 -595
  86. pipecat/transports/network/websocket_client.py +12 -481
  87. pipecat/transports/network/websocket_server.py +12 -487
  88. pipecat/transports/services/daily.py +9 -2334
  89. pipecat/transports/services/helpers/daily_rest.py +12 -396
  90. pipecat/transports/services/livekit.py +12 -975
  91. pipecat/transports/services/tavus.py +12 -757
  92. pipecat/transports/smallwebrtc/__init__.py +0 -0
  93. pipecat/transports/smallwebrtc/connection.py +612 -0
  94. pipecat/transports/smallwebrtc/transport.py +936 -0
  95. pipecat/transports/tavus/__init__.py +0 -0
  96. pipecat/transports/tavus/transport.py +770 -0
  97. pipecat/transports/websocket/__init__.py +0 -0
  98. pipecat/transports/websocket/client.py +494 -0
  99. pipecat/transports/websocket/fastapi.py +559 -0
  100. pipecat/transports/websocket/server.py +500 -0
  101. pipecat/transports/whatsapp/__init__.py +0 -0
  102. pipecat/transports/whatsapp/api.py +345 -0
  103. pipecat/transports/whatsapp/client.py +364 -0
  104. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/WHEEL +0 -0
  105. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/licenses/LICENSE +0 -0
  106. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/top_level.txt +0 -0
@@ -6,20 +6,71 @@
6
6
 
7
7
  """Gemini LLM adapter for Pipecat."""
8
8
 
9
- from typing import Any, Dict, List, Union
9
+ import base64
10
+ import json
11
+ from dataclasses import dataclass
12
+ from typing import Any, Dict, List, Optional, TypedDict
13
+
14
+ from loguru import logger
15
+ from openai import NotGiven
10
16
 
11
17
  from pipecat.adapters.base_llm_adapter import BaseLLMAdapter
12
18
  from pipecat.adapters.schemas.tools_schema import AdapterType, ToolsSchema
19
+ from pipecat.processors.aggregators.llm_context import (
20
+ LLMContext,
21
+ LLMContextMessage,
22
+ LLMSpecificMessage,
23
+ LLMStandardMessage,
24
+ )
25
+
26
+ try:
27
+ from google.genai.types import (
28
+ Blob,
29
+ Content,
30
+ FunctionCall,
31
+ FunctionResponse,
32
+ Part,
33
+ )
34
+ except ModuleNotFoundError as e:
35
+ logger.error(f"Exception: {e}")
36
+ logger.error("In order to use Google AI, you need to `pip install pipecat-ai[google]`.")
37
+ raise Exception(f"Missing module: {e}")
38
+
39
+
40
+ class GeminiLLMInvocationParams(TypedDict):
41
+ """Context-based parameters for invoking Gemini LLM."""
13
42
 
43
+ system_instruction: Optional[str]
44
+ messages: List[Content]
45
+ tools: List[Any] | NotGiven
14
46
 
15
- class GeminiLLMAdapter(BaseLLMAdapter):
16
- """LLM adapter for Google's Gemini service.
17
47
 
18
- Provides tool schema conversion functionality to transform standard tool
19
- definitions into Gemini's specific function-calling format for use with
20
- Gemini LLM models.
48
+ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
49
+ """Gemini-specific adapter for Pipecat.
50
+
51
+ Handles:
52
+ - Extracting parameters for Gemini's API from a universal LLM context
53
+ - Converting Pipecat's standardized tools schema to Gemini's function-calling format.
54
+ - Extracting and sanitizing messages from the LLM context for logging with Gemini.
21
55
  """
22
56
 
57
+ def get_llm_invocation_params(self, context: LLMContext) -> GeminiLLMInvocationParams:
58
+ """Get Gemini-specific LLM invocation parameters from a universal LLM context.
59
+
60
+ Args:
61
+ context: The LLM context containing messages, tools, etc.
62
+
63
+ Returns:
64
+ Dictionary of parameters for Gemini's API.
65
+ """
66
+ messages = self._from_universal_context_messages(self._get_messages(context))
67
+ return {
68
+ "system_instruction": messages.system_instruction,
69
+ "messages": messages.messages,
70
+ # NOTE: LLMContext's tools are guaranteed to be a ToolsSchema (or NOT_GIVEN)
71
+ "tools": self.from_standard_tools(context.tools),
72
+ }
73
+
23
74
  def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Dict[str, Any]]:
24
75
  """Convert tool schemas to Gemini's function-calling format.
25
76
 
@@ -39,3 +90,222 @@ class GeminiLLMAdapter(BaseLLMAdapter):
39
90
  custom_gemini_tools = tools_schema.custom_tools.get(AdapterType.GEMINI, [])
40
91
 
41
92
  return formatted_standard_tools + custom_gemini_tools
93
+
94
+ def get_messages_for_logging(self, context: LLMContext) -> List[Dict[str, Any]]:
95
+ """Get messages from a universal LLM context in a format ready for logging about Gemini.
96
+
97
+ Removes or truncates sensitive data like image content for safe logging.
98
+
99
+ Args:
100
+ context: The LLM context containing messages.
101
+
102
+ Returns:
103
+ List of messages in a format ready for logging about Gemini.
104
+ """
105
+ # Get messages in Gemini's format
106
+ messages = self._from_universal_context_messages(self._get_messages(context)).messages
107
+
108
+ # Sanitize messages for logging
109
+ messages_for_logging = []
110
+ for message in messages:
111
+ obj = message.to_json_dict()
112
+ try:
113
+ if "parts" in obj:
114
+ for part in obj["parts"]:
115
+ if "inline_data" in part:
116
+ part["inline_data"]["data"] = "..."
117
+ except Exception as e:
118
+ logger.debug(f"Error: {e}")
119
+ messages_for_logging.append(obj)
120
+ return messages_for_logging
121
+
122
+ def _get_messages(self, context: LLMContext) -> List[LLMContextMessage]:
123
+ return context.get_messages("google")
124
+
125
+ @dataclass
126
+ class ConvertedMessages:
127
+ """Container for Google-formatted messages converted from universal context."""
128
+
129
+ messages: List[Content]
130
+ system_instruction: Optional[str] = None
131
+
132
+ def _from_universal_context_messages(
133
+ self, universal_context_messages: List[LLMContextMessage]
134
+ ) -> ConvertedMessages:
135
+ """Restructures messages to ensure proper Google format and message ordering.
136
+
137
+ This method handles conversion of OpenAI-formatted messages to Google format,
138
+ with special handling for function calls, function responses, and system messages.
139
+ System messages are added back to the context as user messages when needed.
140
+
141
+ The final message order is preserved as:
142
+
143
+ 1. Function calls (from model)
144
+ 2. Function responses (from user)
145
+ 3. Text messages (converted from system messages)
146
+
147
+ Note::
148
+
149
+ System messages are only added back when there are no regular text
150
+ messages in the context, ensuring proper conversation continuity
151
+ after function calls.
152
+ """
153
+ system_instruction = None
154
+ messages = []
155
+
156
+ # Process each message, preserving Google-formatted messages and converting others
157
+ for message in universal_context_messages:
158
+ if isinstance(message, LLMSpecificMessage):
159
+ # Assume that LLMSpecificMessage wraps a message in Google format
160
+ messages.append(message.message)
161
+ continue
162
+
163
+ # Convert standard format to Google format
164
+ converted = self._from_standard_message(
165
+ message, already_have_system_instruction=bool(system_instruction)
166
+ )
167
+ if isinstance(converted, Content):
168
+ # Regular (non-system) message
169
+ messages.append(converted)
170
+ else:
171
+ # System instruction
172
+ system_instruction = converted
173
+
174
+ # Check if we only have function-related messages (no regular text)
175
+ has_regular_messages = any(
176
+ len(msg.parts) == 1
177
+ and getattr(msg.parts[0], "text", None)
178
+ and not getattr(msg.parts[0], "function_call", None)
179
+ and not getattr(msg.parts[0], "function_response", None)
180
+ for msg in messages
181
+ )
182
+
183
+ # Add system instruction back as a user message if we only have function messages
184
+ if system_instruction and not has_regular_messages:
185
+ messages.append(Content(role="user", parts=[Part(text=system_instruction)]))
186
+
187
+ # Remove any empty messages
188
+ messages = [m for m in messages if m.parts]
189
+
190
+ return self.ConvertedMessages(messages=messages, system_instruction=system_instruction)
191
+
192
+ def _from_standard_message(
193
+ self, message: LLMStandardMessage, already_have_system_instruction: bool
194
+ ) -> Content | str:
195
+ """Convert standard universal context message to Google Content object.
196
+
197
+ Handles conversion of text, images, and function calls to Google's
198
+ format.
199
+ System instructions are returned as a plain string.
200
+
201
+ Args:
202
+ message: Message in standard universal context format.
203
+ already_have_system_instruction: Whether we already have a system instruction
204
+
205
+ Returns:
206
+ Content object with role and parts, or a plain string for system
207
+ messages.
208
+
209
+ Examples:
210
+ Standard text message::
211
+
212
+ {
213
+ "role": "user",
214
+ "content": "Hello there"
215
+ }
216
+
217
+ Converts to Google Content with::
218
+
219
+ Content(
220
+ role="user",
221
+ parts=[Part(text="Hello there")]
222
+ )
223
+
224
+ Standard function call message::
225
+
226
+ {
227
+ "role": "assistant",
228
+ "tool_calls": [
229
+ {
230
+ "function": {
231
+ "name": "search",
232
+ "arguments": '{"query": "test"}'
233
+ }
234
+ }
235
+ ]
236
+ }
237
+
238
+ Converts to Google Content with::
239
+
240
+ Content(
241
+ role="model",
242
+ parts=[Part(function_call=FunctionCall(name="search", args={"query": "test"}))]
243
+ )
244
+ """
245
+ role = message["role"]
246
+ content = message.get("content", [])
247
+ if role == "system":
248
+ if already_have_system_instruction:
249
+ role = "user" # Convert system message to user role if we already have a system instruction
250
+ else:
251
+ # System instructions are returned as plain text
252
+ if isinstance(content, str):
253
+ return content
254
+ elif isinstance(content, list):
255
+ # If content is a list, we assume it's a list of text parts, per the standard
256
+ return " ".join(part["text"] for part in content if part.get("type") == "text")
257
+ elif role == "assistant":
258
+ role = "model"
259
+
260
+ parts = []
261
+ if message.get("tool_calls"):
262
+ for tc in message["tool_calls"]:
263
+ parts.append(
264
+ Part(
265
+ function_call=FunctionCall(
266
+ name=tc["function"]["name"],
267
+ args=json.loads(tc["function"]["arguments"]),
268
+ )
269
+ )
270
+ )
271
+ elif role == "tool":
272
+ role = "model"
273
+ try:
274
+ response = json.loads(message["content"])
275
+ if isinstance(response, dict):
276
+ response_dict = response
277
+ else:
278
+ response_dict = {"value": response}
279
+ except Exception as e:
280
+ # Response might not be JSON-deserializable.
281
+ # This occurs with a UserImageFrame, for example, where we get a plain "COMPLETED" string.
282
+ response_dict = {"value": message["content"]}
283
+ parts.append(
284
+ Part(
285
+ function_response=FunctionResponse(
286
+ name="tool_call_result", # seems to work to hard-code the same name every time
287
+ response=response_dict,
288
+ )
289
+ )
290
+ )
291
+ elif isinstance(content, str):
292
+ parts.append(Part(text=content))
293
+ elif isinstance(content, list):
294
+ for c in content:
295
+ if c["type"] == "text":
296
+ parts.append(Part(text=c["text"]))
297
+ elif c["type"] == "image_url":
298
+ parts.append(
299
+ Part(
300
+ inline_data=Blob(
301
+ mime_type="image/jpeg",
302
+ data=base64.b64decode(c["image_url"]["url"].split(",")[1]),
303
+ )
304
+ )
305
+ )
306
+ elif c["type"] == "input_audio":
307
+ input_audio = c["input_audio"]
308
+ audio_bytes = base64.b64decode(input_audio["data"])
309
+ parts.append(Part(inline_data=Blob(mime_type="audio/wav", data=audio_bytes)))
310
+
311
+ return Content(role=role, parts=parts)
@@ -6,22 +6,63 @@
6
6
 
7
7
  """OpenAI LLM adapter for Pipecat."""
8
8
 
9
- from typing import List
9
+ import copy
10
+ import json
11
+ from typing import Any, Dict, List, TypedDict
10
12
 
11
- from openai.types.chat import ChatCompletionToolParam
13
+ from openai._types import NOT_GIVEN as OPEN_AI_NOT_GIVEN
14
+ from openai._types import NotGiven as OpenAINotGiven
15
+ from openai.types.chat import (
16
+ ChatCompletionMessageParam,
17
+ ChatCompletionToolChoiceOptionParam,
18
+ ChatCompletionToolParam,
19
+ )
12
20
 
13
21
  from pipecat.adapters.base_llm_adapter import BaseLLMAdapter
14
22
  from pipecat.adapters.schemas.tools_schema import ToolsSchema
23
+ from pipecat.processors.aggregators.llm_context import (
24
+ LLMContext,
25
+ LLMContextMessage,
26
+ LLMContextToolChoice,
27
+ NotGiven,
28
+ )
15
29
 
16
30
 
17
- class OpenAILLMAdapter(BaseLLMAdapter):
18
- """Adapter for converting tool schemas to OpenAI's format.
31
+ class OpenAILLMInvocationParams(TypedDict):
32
+ """Context-based parameters for invoking OpenAI ChatCompletion API."""
19
33
 
20
- Provides conversion utilities for transforming Pipecat's standard tool
21
- schemas into the format expected by OpenAI's ChatCompletion API for
22
- function calling capabilities.
34
+ messages: List[ChatCompletionMessageParam]
35
+ tools: List[ChatCompletionToolParam] | OpenAINotGiven
36
+ tool_choice: ChatCompletionToolChoiceOptionParam | OpenAINotGiven
37
+
38
+
39
+ class OpenAILLMAdapter(BaseLLMAdapter[OpenAILLMInvocationParams]):
40
+ """OpenAI-specific adapter for Pipecat.
41
+
42
+ Handles:
43
+
44
+ - Extracting parameters for OpenAI's ChatCompletion API from a universal
45
+ LLM context
46
+ - Converting Pipecat's standardized tools schema to OpenAI's function-calling format.
47
+ - Extracting and sanitizing messages from the LLM context for logging about OpenAI.
23
48
  """
24
49
 
50
+ def get_llm_invocation_params(self, context: LLMContext) -> OpenAILLMInvocationParams:
51
+ """Get OpenAI-specific LLM invocation parameters from a universal LLM context.
52
+
53
+ Args:
54
+ context: The LLM context containing messages, tools, etc.
55
+
56
+ Returns:
57
+ Dictionary of parameters for OpenAI's ChatCompletion API.
58
+ """
59
+ return {
60
+ "messages": self._from_universal_context_messages(self._get_messages(context)),
61
+ # NOTE; LLMContext's tools are guaranteed to be a ToolsSchema (or NOT_GIVEN)
62
+ "tools": self.from_standard_tools(context.tools),
63
+ "tool_choice": context.tool_choice,
64
+ }
65
+
25
66
  def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[ChatCompletionToolParam]:
26
67
  """Convert function schemas to OpenAI's function-calling format.
27
68
 
@@ -37,3 +78,43 @@ class OpenAILLMAdapter(BaseLLMAdapter):
37
78
  ChatCompletionToolParam(type="function", function=func.to_default_dict())
38
79
  for func in functions_schema
39
80
  ]
81
+
82
+ def get_messages_for_logging(self, context: LLMContext) -> List[Dict[str, Any]]:
83
+ """Get messages from a universal LLM context in a format ready for logging about OpenAI.
84
+
85
+ Removes or truncates sensitive data like image content for safe logging.
86
+
87
+ Args:
88
+ context: The LLM context containing messages.
89
+
90
+ Returns:
91
+ List of messages in a format ready for logging about OpenAI.
92
+ """
93
+ msgs = []
94
+ for message in self._get_messages(context):
95
+ msg = copy.deepcopy(message)
96
+ if "content" in msg:
97
+ if isinstance(msg["content"], list):
98
+ for item in msg["content"]:
99
+ if item["type"] == "image_url":
100
+ if item["image_url"]["url"].startswith("data:image/"):
101
+ item["image_url"]["url"] = "data:image/..."
102
+ if "mime_type" in msg and msg["mime_type"].startswith("image/"):
103
+ msg["data"] = "..."
104
+ msgs.append(msg)
105
+ return msgs
106
+
107
+ def _get_messages(self, context: LLMContext) -> List[LLMContextMessage]:
108
+ return context.get_messages("openai")
109
+
110
+ def _from_universal_context_messages(
111
+ self, messages: List[LLMContextMessage]
112
+ ) -> List[ChatCompletionMessageParam]:
113
+ # Just a pass-through: messages are already the right type
114
+ return messages
115
+
116
+ def _from_standard_tool_choice(
117
+ self, tool_choice: LLMContextToolChoice | NotGiven
118
+ ) -> ChatCompletionToolChoiceOptionParam | OpenAINotGiven:
119
+ # Just a pass-through: tool_choice is already the right type
120
+ return tool_choice
@@ -6,11 +6,21 @@
6
6
 
7
7
  """OpenAI Realtime LLM adapter for Pipecat."""
8
8
 
9
- from typing import Any, Dict, List, Union
9
+ from typing import Any, Dict, List, TypedDict
10
10
 
11
11
  from pipecat.adapters.base_llm_adapter import BaseLLMAdapter
12
12
  from pipecat.adapters.schemas.function_schema import FunctionSchema
13
13
  from pipecat.adapters.schemas.tools_schema import ToolsSchema
14
+ from pipecat.processors.aggregators.llm_context import LLMContext
15
+
16
+
17
+ class OpenAIRealtimeLLMInvocationParams(TypedDict):
18
+ """Context-based parameters for invoking OpenAI Realtime API.
19
+
20
+ This is a placeholder until support for universal LLMContext machinery is added for OpenAI Realtime.
21
+ """
22
+
23
+ pass
14
24
 
15
25
 
16
26
  class OpenAIRealtimeLLMAdapter(BaseLLMAdapter):
@@ -20,6 +30,34 @@ class OpenAIRealtimeLLMAdapter(BaseLLMAdapter):
20
30
  OpenAI's Realtime API for function calling capabilities.
21
31
  """
22
32
 
33
+ def get_llm_invocation_params(self, context: LLMContext) -> OpenAIRealtimeLLMInvocationParams:
34
+ """Get OpenAI Realtime-specific LLM invocation parameters from a universal LLM context.
35
+
36
+ This is a placeholder until support for universal LLMContext machinery is added for OpenAI Realtime.
37
+
38
+ Args:
39
+ context: The LLM context containing messages, tools, etc.
40
+
41
+ Returns:
42
+ Dictionary of parameters for invoking OpenAI Realtime's API.
43
+ """
44
+ raise NotImplementedError("Universal LLMContext is not yet supported for OpenAI Realtime.")
45
+
46
+ def get_messages_for_logging(self, context) -> List[Dict[str, Any]]:
47
+ """Get messages from a universal LLM context in a format ready for logging about OpenAI Realtime.
48
+
49
+ Removes or truncates sensitive data like image content for safe logging.
50
+
51
+ This is a placeholder until support for universal LLMContext machinery is added for OpenAI Realtime.
52
+
53
+ Args:
54
+ context: The LLM context containing messages.
55
+
56
+ Returns:
57
+ List of messages in a format ready for logging about OpenAI Realtime.
58
+ """
59
+ raise NotImplementedError("Universal LLMContext is not yet supported for OpenAI Realtime.")
60
+
23
61
  @staticmethod
24
62
  def _to_openai_realtime_function_format(function: FunctionSchema) -> Dict[str, Any]:
25
63
  """Convert a function schema to OpenAI Realtime format.
File without changes
@@ -0,0 +1,47 @@
1
+ # Copyright (c) 2024–2025, Daily
2
+ #
3
+ # SPDX-License-Identifier: BSD 2-Clause License
4
+ #
5
+
6
+ """This module defines generic type for DTMS.
7
+
8
+ It defines the `KeypadEntry` enumeration, representing dual-tone multi-frequency
9
+ (DTMF) keypad entries for phone system integration. Each entry corresponds to a
10
+ key on the telephone keypad, facilitating the handling of input in
11
+ telecommunication applications.
12
+ """
13
+
14
+ from enum import Enum
15
+
16
+
17
+ class KeypadEntry(str, Enum):
18
+ """DTMF keypad entries for phone system integration.
19
+
20
+ Parameters:
21
+ ONE: Number key 1.
22
+ TWO: Number key 2.
23
+ THREE: Number key 3.
24
+ FOUR: Number key 4.
25
+ FIVE: Number key 5.
26
+ SIX: Number key 6.
27
+ SEVEN: Number key 7.
28
+ EIGHT: Number key 8.
29
+ NINE: Number key 9.
30
+ ZERO: Number key 0.
31
+ POUND: Pound/hash key (#).
32
+ STAR: Star/asterisk key (*).
33
+ """
34
+
35
+ ONE = "1"
36
+ TWO = "2"
37
+ THREE = "3"
38
+ FOUR = "4"
39
+ FIVE = "5"
40
+ SIX = "6"
41
+ SEVEN = "7"
42
+ EIGHT = "8"
43
+ NINE = "9"
44
+ ZERO = "0"
45
+
46
+ POUND = "#"
47
+ STAR = "*"
@@ -0,0 +1,70 @@
1
+ #
2
+ # Copyright (c) 2024–2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ """DTMF audio utilities.
8
+
9
+ This module provides functionality to load DTMF (Dual-Tone Multi-Frequency)
10
+ audio files corresponding to phone keypad entries. Audio data is cached
11
+ in-memory after first load to improve performance on subsequent accesses.
12
+ """
13
+
14
+ import asyncio
15
+ import io
16
+ import wave
17
+ from importlib.resources import files
18
+ from typing import Dict, Optional
19
+
20
+ import aiofiles
21
+
22
+ from pipecat.audio.dtmf.types import KeypadEntry
23
+ from pipecat.audio.resamplers.base_audio_resampler import BaseAudioResampler
24
+ from pipecat.audio.utils import create_file_resampler
25
+
26
+ __DTMF_LOCK__ = asyncio.Lock()
27
+ __DTMF_AUDIO__: Dict[KeypadEntry, bytes] = {}
28
+ __DTMF_RESAMPLER__: Optional[BaseAudioResampler] = None
29
+
30
+ __DTMF_FILE_NAME = {
31
+ KeypadEntry.POUND: "dtmf-pound.wav",
32
+ KeypadEntry.STAR: "dtmf-star.wav",
33
+ }
34
+
35
+
36
+ async def load_dtmf_audio(button: KeypadEntry, *, sample_rate: int = 8000) -> bytes:
37
+ """Load audio for DTMF tones associated with the given button.
38
+
39
+ Args:
40
+ button (KeypadEntry): The button for which the DTMF audio is to be loaded.
41
+ sample_rate (int, optional): The sample rate for the audio. Defaults to 8000.
42
+
43
+ Returns:
44
+ bytes: The audio data for the DTMF tone as bytes.
45
+ """
46
+ global __DTMF_AUDIO__, __DTMF_RESAMPLER__
47
+
48
+ async with __DTMF_LOCK__:
49
+ if button in __DTMF_AUDIO__:
50
+ return __DTMF_AUDIO__[button]
51
+
52
+ if not __DTMF_RESAMPLER__:
53
+ __DTMF_RESAMPLER__ = create_file_resampler()
54
+
55
+ dtmf_file_name = __DTMF_FILE_NAME.get(button, f"dtmf-{button.value}.wav")
56
+ dtmf_file_path = files("pipecat.audio.dtmf").joinpath(dtmf_file_name)
57
+
58
+ async with aiofiles.open(dtmf_file_path, "rb") as f:
59
+ data = await f.read()
60
+
61
+ with io.BytesIO(data) as buffer:
62
+ with wave.open(buffer, "rb") as wf:
63
+ audio = wf.readframes(wf.getnframes())
64
+ in_sample_rate = wf.getframerate()
65
+ resampled_audio = await __DTMF_RESAMPLER__.resample(
66
+ audio, in_sample_rate, sample_rate
67
+ )
68
+ __DTMF_AUDIO__[button] = resampled_audio
69
+
70
+ return __DTMF_AUDIO__[button]