dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (195) hide show
  1. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
  2. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
  3. pipecat/adapters/base_llm_adapter.py +38 -1
  4. pipecat/adapters/services/anthropic_adapter.py +9 -14
  5. pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
  6. pipecat/adapters/services/bedrock_adapter.py +236 -13
  7. pipecat/adapters/services/gemini_adapter.py +12 -8
  8. pipecat/adapters/services/open_ai_adapter.py +19 -7
  9. pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
  10. pipecat/audio/dtmf/dtmf-0.wav +0 -0
  11. pipecat/audio/dtmf/dtmf-1.wav +0 -0
  12. pipecat/audio/dtmf/dtmf-2.wav +0 -0
  13. pipecat/audio/dtmf/dtmf-3.wav +0 -0
  14. pipecat/audio/dtmf/dtmf-4.wav +0 -0
  15. pipecat/audio/dtmf/dtmf-5.wav +0 -0
  16. pipecat/audio/dtmf/dtmf-6.wav +0 -0
  17. pipecat/audio/dtmf/dtmf-7.wav +0 -0
  18. pipecat/audio/dtmf/dtmf-8.wav +0 -0
  19. pipecat/audio/dtmf/dtmf-9.wav +0 -0
  20. pipecat/audio/dtmf/dtmf-pound.wav +0 -0
  21. pipecat/audio/dtmf/dtmf-star.wav +0 -0
  22. pipecat/audio/filters/krisp_viva_filter.py +193 -0
  23. pipecat/audio/filters/noisereduce_filter.py +15 -0
  24. pipecat/audio/turn/base_turn_analyzer.py +9 -1
  25. pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
  26. pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
  27. pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
  28. pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
  29. pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
  30. pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
  31. pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
  32. pipecat/audio/vad/data/README.md +10 -0
  33. pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
  34. pipecat/audio/vad/silero.py +9 -3
  35. pipecat/audio/vad/vad_analyzer.py +13 -1
  36. pipecat/extensions/voicemail/voicemail_detector.py +5 -5
  37. pipecat/frames/frames.py +277 -86
  38. pipecat/observers/loggers/debug_log_observer.py +3 -3
  39. pipecat/observers/loggers/llm_log_observer.py +7 -3
  40. pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
  41. pipecat/pipeline/runner.py +18 -6
  42. pipecat/pipeline/service_switcher.py +64 -36
  43. pipecat/pipeline/task.py +125 -79
  44. pipecat/pipeline/tts_switcher.py +30 -0
  45. pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
  46. pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
  47. pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
  48. pipecat/processors/aggregators/llm_context.py +40 -2
  49. pipecat/processors/aggregators/llm_response.py +32 -15
  50. pipecat/processors/aggregators/llm_response_universal.py +19 -15
  51. pipecat/processors/aggregators/user_response.py +6 -6
  52. pipecat/processors/aggregators/vision_image_frame.py +24 -2
  53. pipecat/processors/audio/audio_buffer_processor.py +43 -8
  54. pipecat/processors/dtmf_aggregator.py +174 -77
  55. pipecat/processors/filters/stt_mute_filter.py +17 -0
  56. pipecat/processors/frame_processor.py +110 -24
  57. pipecat/processors/frameworks/langchain.py +8 -2
  58. pipecat/processors/frameworks/rtvi.py +210 -68
  59. pipecat/processors/frameworks/strands_agents.py +170 -0
  60. pipecat/processors/logger.py +2 -2
  61. pipecat/processors/transcript_processor.py +26 -5
  62. pipecat/processors/user_idle_processor.py +35 -11
  63. pipecat/runner/daily.py +59 -20
  64. pipecat/runner/run.py +395 -93
  65. pipecat/runner/types.py +6 -4
  66. pipecat/runner/utils.py +51 -10
  67. pipecat/serializers/__init__.py +5 -1
  68. pipecat/serializers/asterisk.py +16 -2
  69. pipecat/serializers/convox.py +41 -4
  70. pipecat/serializers/custom.py +257 -0
  71. pipecat/serializers/exotel.py +5 -5
  72. pipecat/serializers/livekit.py +20 -0
  73. pipecat/serializers/plivo.py +5 -5
  74. pipecat/serializers/protobuf.py +6 -5
  75. pipecat/serializers/telnyx.py +2 -2
  76. pipecat/serializers/twilio.py +43 -23
  77. pipecat/serializers/vi.py +324 -0
  78. pipecat/services/ai_service.py +2 -6
  79. pipecat/services/anthropic/llm.py +2 -25
  80. pipecat/services/assemblyai/models.py +6 -0
  81. pipecat/services/assemblyai/stt.py +13 -5
  82. pipecat/services/asyncai/tts.py +5 -3
  83. pipecat/services/aws/__init__.py +1 -0
  84. pipecat/services/aws/llm.py +147 -105
  85. pipecat/services/aws/nova_sonic/__init__.py +0 -0
  86. pipecat/services/aws/nova_sonic/context.py +436 -0
  87. pipecat/services/aws/nova_sonic/frames.py +25 -0
  88. pipecat/services/aws/nova_sonic/llm.py +1265 -0
  89. pipecat/services/aws/stt.py +3 -3
  90. pipecat/services/aws_nova_sonic/__init__.py +19 -1
  91. pipecat/services/aws_nova_sonic/aws.py +11 -1151
  92. pipecat/services/aws_nova_sonic/context.py +8 -354
  93. pipecat/services/aws_nova_sonic/frames.py +13 -17
  94. pipecat/services/azure/llm.py +51 -1
  95. pipecat/services/azure/realtime/__init__.py +0 -0
  96. pipecat/services/azure/realtime/llm.py +65 -0
  97. pipecat/services/azure/stt.py +15 -0
  98. pipecat/services/cartesia/stt.py +77 -70
  99. pipecat/services/cartesia/tts.py +80 -13
  100. pipecat/services/deepgram/__init__.py +1 -0
  101. pipecat/services/deepgram/flux/__init__.py +0 -0
  102. pipecat/services/deepgram/flux/stt.py +640 -0
  103. pipecat/services/elevenlabs/__init__.py +4 -1
  104. pipecat/services/elevenlabs/stt.py +339 -0
  105. pipecat/services/elevenlabs/tts.py +87 -46
  106. pipecat/services/fish/tts.py +5 -2
  107. pipecat/services/gemini_multimodal_live/events.py +38 -524
  108. pipecat/services/gemini_multimodal_live/file_api.py +23 -173
  109. pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
  110. pipecat/services/gladia/stt.py +56 -72
  111. pipecat/services/google/__init__.py +1 -0
  112. pipecat/services/google/gemini_live/__init__.py +3 -0
  113. pipecat/services/google/gemini_live/file_api.py +189 -0
  114. pipecat/services/google/gemini_live/llm.py +1582 -0
  115. pipecat/services/google/gemini_live/llm_vertex.py +184 -0
  116. pipecat/services/google/llm.py +15 -11
  117. pipecat/services/google/llm_openai.py +3 -3
  118. pipecat/services/google/llm_vertex.py +86 -16
  119. pipecat/services/google/stt.py +4 -0
  120. pipecat/services/google/tts.py +7 -3
  121. pipecat/services/heygen/api.py +2 -0
  122. pipecat/services/heygen/client.py +8 -4
  123. pipecat/services/heygen/video.py +2 -0
  124. pipecat/services/hume/__init__.py +5 -0
  125. pipecat/services/hume/tts.py +220 -0
  126. pipecat/services/inworld/tts.py +6 -6
  127. pipecat/services/llm_service.py +15 -5
  128. pipecat/services/lmnt/tts.py +4 -2
  129. pipecat/services/mcp_service.py +4 -2
  130. pipecat/services/mem0/memory.py +6 -5
  131. pipecat/services/mistral/llm.py +29 -8
  132. pipecat/services/moondream/vision.py +42 -16
  133. pipecat/services/neuphonic/tts.py +5 -2
  134. pipecat/services/openai/__init__.py +1 -0
  135. pipecat/services/openai/base_llm.py +27 -20
  136. pipecat/services/openai/realtime/__init__.py +0 -0
  137. pipecat/services/openai/realtime/context.py +272 -0
  138. pipecat/services/openai/realtime/events.py +1106 -0
  139. pipecat/services/openai/realtime/frames.py +37 -0
  140. pipecat/services/openai/realtime/llm.py +829 -0
  141. pipecat/services/openai/tts.py +49 -10
  142. pipecat/services/openai_realtime/__init__.py +27 -0
  143. pipecat/services/openai_realtime/azure.py +21 -0
  144. pipecat/services/openai_realtime/context.py +21 -0
  145. pipecat/services/openai_realtime/events.py +21 -0
  146. pipecat/services/openai_realtime/frames.py +21 -0
  147. pipecat/services/openai_realtime_beta/azure.py +16 -0
  148. pipecat/services/openai_realtime_beta/openai.py +17 -5
  149. pipecat/services/piper/tts.py +7 -9
  150. pipecat/services/playht/tts.py +34 -4
  151. pipecat/services/rime/tts.py +12 -12
  152. pipecat/services/riva/stt.py +3 -1
  153. pipecat/services/salesforce/__init__.py +9 -0
  154. pipecat/services/salesforce/llm.py +700 -0
  155. pipecat/services/sarvam/__init__.py +7 -0
  156. pipecat/services/sarvam/stt.py +540 -0
  157. pipecat/services/sarvam/tts.py +97 -13
  158. pipecat/services/simli/video.py +2 -2
  159. pipecat/services/speechmatics/stt.py +22 -10
  160. pipecat/services/stt_service.py +47 -0
  161. pipecat/services/tavus/video.py +2 -2
  162. pipecat/services/tts_service.py +75 -22
  163. pipecat/services/vision_service.py +7 -6
  164. pipecat/services/vistaar/llm.py +51 -9
  165. pipecat/tests/utils.py +4 -4
  166. pipecat/transcriptions/language.py +41 -1
  167. pipecat/transports/base_input.py +13 -34
  168. pipecat/transports/base_output.py +140 -104
  169. pipecat/transports/daily/transport.py +199 -26
  170. pipecat/transports/heygen/__init__.py +0 -0
  171. pipecat/transports/heygen/transport.py +381 -0
  172. pipecat/transports/livekit/transport.py +228 -63
  173. pipecat/transports/local/audio.py +6 -1
  174. pipecat/transports/local/tk.py +11 -2
  175. pipecat/transports/network/fastapi_websocket.py +1 -1
  176. pipecat/transports/smallwebrtc/connection.py +103 -19
  177. pipecat/transports/smallwebrtc/request_handler.py +246 -0
  178. pipecat/transports/smallwebrtc/transport.py +65 -23
  179. pipecat/transports/tavus/transport.py +23 -12
  180. pipecat/transports/websocket/client.py +41 -5
  181. pipecat/transports/websocket/fastapi.py +21 -11
  182. pipecat/transports/websocket/server.py +14 -7
  183. pipecat/transports/whatsapp/api.py +8 -0
  184. pipecat/transports/whatsapp/client.py +47 -0
  185. pipecat/utils/base_object.py +54 -22
  186. pipecat/utils/redis.py +58 -0
  187. pipecat/utils/string.py +13 -1
  188. pipecat/utils/tracing/service_decorators.py +21 -21
  189. pipecat/serializers/genesys.py +0 -95
  190. pipecat/services/google/test-google-chirp.py +0 -45
  191. pipecat/services/openai.py +0 -698
  192. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
  193. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
  194. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
  195. /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
@@ -8,360 +8,14 @@
8
8
 
9
9
  This module provides specialized context aggregators and message handling for AWS Nova Sonic,
10
10
  including conversation history management and role-specific message processing.
11
- """
12
-
13
- import copy
14
- from dataclasses import dataclass, field
15
- from enum import Enum
16
-
17
- from loguru import logger
18
-
19
- from pipecat.frames.frames import (
20
- BotStoppedSpeakingFrame,
21
- DataFrame,
22
- Frame,
23
- FunctionCallResultFrame,
24
- LLMFullResponseEndFrame,
25
- LLMFullResponseStartFrame,
26
- LLMMessagesAppendFrame,
27
- LLMMessagesUpdateFrame,
28
- LLMSetToolChoiceFrame,
29
- LLMSetToolsFrame,
30
- StartInterruptionFrame,
31
- TextFrame,
32
- UserImageRawFrame,
33
- )
34
- from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
35
- from pipecat.processors.frame_processor import FrameDirection
36
- from pipecat.services.aws_nova_sonic.frames import AWSNovaSonicFunctionCallResultFrame
37
- from pipecat.services.openai.llm import (
38
- OpenAIAssistantContextAggregator,
39
- OpenAIUserContextAggregator,
40
- )
41
-
42
-
43
- class Role(Enum):
44
- """Roles supported in AWS Nova Sonic conversations.
45
-
46
- Parameters:
47
- SYSTEM: System-level messages (not used in conversation history).
48
- USER: Messages sent by the user.
49
- ASSISTANT: Messages sent by the assistant.
50
- TOOL: Messages sent by tools (not used in conversation history).
51
- """
52
-
53
- SYSTEM = "SYSTEM"
54
- USER = "USER"
55
- ASSISTANT = "ASSISTANT"
56
- TOOL = "TOOL"
57
-
58
-
59
- @dataclass
60
- class AWSNovaSonicConversationHistoryMessage:
61
- """A single message in AWS Nova Sonic conversation history.
62
-
63
- Parameters:
64
- role: The role of the message sender (USER or ASSISTANT only).
65
- text: The text content of the message.
66
- """
67
-
68
- role: Role # only USER and ASSISTANT
69
- text: str
70
-
71
-
72
- @dataclass
73
- class AWSNovaSonicConversationHistory:
74
- """Complete conversation history for AWS Nova Sonic initialization.
75
-
76
- Parameters:
77
- system_instruction: System-level instruction for the conversation.
78
- messages: List of conversation messages between user and assistant.
79
- """
80
-
81
- system_instruction: str = None
82
- messages: list[AWSNovaSonicConversationHistoryMessage] = field(default_factory=list)
83
-
84
-
85
- class AWSNovaSonicLLMContext(OpenAILLMContext):
86
- """Specialized LLM context for AWS Nova Sonic service.
87
-
88
- Extends OpenAI context with Nova Sonic-specific message handling,
89
- conversation history management, and text buffering capabilities.
90
- """
91
-
92
- def __init__(self, messages=None, tools=None, **kwargs):
93
- """Initialize AWS Nova Sonic LLM context.
94
-
95
- Args:
96
- messages: Initial messages for the context.
97
- tools: Available tools for the context.
98
- **kwargs: Additional arguments passed to parent class.
99
- """
100
- super().__init__(messages=messages, tools=tools, **kwargs)
101
- self.__setup_local()
102
-
103
- def __setup_local(self, system_instruction: str = ""):
104
- self._assistant_text = ""
105
- self._user_text = ""
106
- self._system_instruction = system_instruction
107
-
108
- @staticmethod
109
- def upgrade_to_nova_sonic(
110
- obj: OpenAILLMContext, system_instruction: str
111
- ) -> "AWSNovaSonicLLMContext":
112
- """Upgrade an OpenAI context to AWS Nova Sonic context.
113
-
114
- Args:
115
- obj: The OpenAI context to upgrade.
116
- system_instruction: System instruction for the context.
117
-
118
- Returns:
119
- The upgraded AWS Nova Sonic context.
120
- """
121
- if isinstance(obj, OpenAILLMContext) and not isinstance(obj, AWSNovaSonicLLMContext):
122
- obj.__class__ = AWSNovaSonicLLMContext
123
- obj.__setup_local(system_instruction)
124
- return obj
125
-
126
- # NOTE: this method has the side-effect of updating _system_instruction from messages
127
- def get_messages_for_initializing_history(self) -> AWSNovaSonicConversationHistory:
128
- """Get conversation history for initializing AWS Nova Sonic session.
129
-
130
- Processes stored messages and extracts system instruction and conversation
131
- history in the format expected by AWS Nova Sonic.
132
-
133
- Returns:
134
- Formatted conversation history with system instruction and messages.
135
- """
136
- history = AWSNovaSonicConversationHistory(system_instruction=self._system_instruction)
137
-
138
- # Bail if there are no messages
139
- if not self.messages:
140
- return history
141
-
142
- messages = copy.deepcopy(self.messages)
143
-
144
- # If we have a "system" message as our first message, let's pull that out into "instruction"
145
- if messages[0].get("role") == "system":
146
- system = messages.pop(0)
147
- content = system.get("content")
148
- if isinstance(content, str):
149
- history.system_instruction = content
150
- elif isinstance(content, list):
151
- history.system_instruction = content[0].get("text")
152
- if history.system_instruction:
153
- self._system_instruction = history.system_instruction
154
-
155
- # Process remaining messages to fill out conversation history.
156
- # Nova Sonic supports "user" and "assistant" messages in history.
157
- for message in messages:
158
- history_message = self.from_standard_message(message)
159
- if history_message:
160
- history.messages.append(history_message)
161
-
162
- return history
163
-
164
- def get_messages_for_persistent_storage(self):
165
- """Get messages formatted for persistent storage.
166
-
167
- Returns:
168
- List of messages including system instruction if present.
169
- """
170
- messages = super().get_messages_for_persistent_storage()
171
- # If we have a system instruction and messages doesn't already contain it, add it
172
- if self._system_instruction and not (messages and messages[0].get("role") == "system"):
173
- messages.insert(0, {"role": "system", "content": self._system_instruction})
174
- return messages
175
11
 
176
- def from_standard_message(self, message) -> AWSNovaSonicConversationHistoryMessage:
177
- """Convert standard message format to Nova Sonic format.
12
+ .. deprecated:: 0.0.91
13
+ AWS Nova Sonic no longer uses types from this module under the hood.
14
+ It now uses `LLMContext` and `LLMContextAggregatorPair`.
15
+ Using the new patterns should allow you to not need types from this module.
178
16
 
179
- Args:
180
- message: Standard message dictionary to convert.
181
-
182
- Returns:
183
- Nova Sonic conversation history message, or None if not convertible.
184
- """
185
- role = message.get("role")
186
- if message.get("role") == "user" or message.get("role") == "assistant":
187
- content = message.get("content")
188
- if isinstance(message.get("content"), list):
189
- content = ""
190
- for c in message.get("content"):
191
- if c.get("type") == "text":
192
- content += " " + c.get("text")
193
- else:
194
- logger.error(
195
- f"Unhandled content type in context message: {c.get('type')} - {message}"
196
- )
197
- # There won't be content if this is an assistant tool call entry.
198
- # We're ignoring those since they can't be loaded into AWS Nova Sonic conversation
199
- # history
200
- if content:
201
- return AWSNovaSonicConversationHistoryMessage(role=Role[role.upper()], text=content)
202
- # NOTE: we're ignoring messages with role "tool" since they can't be loaded into AWS Nova
203
- # Sonic conversation history
204
-
205
- def buffer_user_text(self, text):
206
- """Buffer user text for later flushing to context.
207
-
208
- Args:
209
- text: User text to buffer.
210
- """
211
- self._user_text += f" {text}" if self._user_text else text
212
- # logger.debug(f"User text buffered: {self._user_text}")
213
-
214
- def flush_aggregated_user_text(self) -> str:
215
- """Flush buffered user text to context as a complete message.
216
-
217
- Returns:
218
- The flushed user text, or empty string if no text was buffered.
219
- """
220
- if not self._user_text:
221
- return ""
222
- user_text = self._user_text
223
- message = {
224
- "role": "user",
225
- "content": [{"type": "text", "text": user_text}],
226
- }
227
- self._user_text = ""
228
- self.add_message(message)
229
- # logger.debug(f"Context updated (user): {self.get_messages_for_logging()}")
230
- return user_text
231
-
232
- def buffer_assistant_text(self, text):
233
- """Buffer assistant text for later flushing to context.
234
-
235
- Args:
236
- text: Assistant text to buffer.
237
- """
238
- self._assistant_text += text
239
- # logger.debug(f"Assistant text buffered: {self._assistant_text}")
240
-
241
- def flush_aggregated_assistant_text(self):
242
- """Flush buffered assistant text to context as a complete message."""
243
- if not self._assistant_text:
244
- return
245
- message = {
246
- "role": "assistant",
247
- "content": [{"type": "text", "text": self._assistant_text}],
248
- }
249
- self._assistant_text = ""
250
- self.add_message(message)
251
- # logger.debug(f"Context updated (assistant): {self.get_messages_for_logging()}")
252
-
253
-
254
- @dataclass
255
- class AWSNovaSonicMessagesUpdateFrame(DataFrame):
256
- """Frame containing updated AWS Nova Sonic context.
257
-
258
- Parameters:
259
- context: The updated AWS Nova Sonic LLM context.
260
- """
261
-
262
- context: AWSNovaSonicLLMContext
263
-
264
-
265
- class AWSNovaSonicUserContextAggregator(OpenAIUserContextAggregator):
266
- """Context aggregator for user messages in AWS Nova Sonic conversations.
267
-
268
- Extends the OpenAI user context aggregator to emit Nova Sonic-specific
269
- context update frames.
270
- """
271
-
272
- async def process_frame(
273
- self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM
274
- ):
275
- """Process frames and emit Nova Sonic-specific context updates.
276
-
277
- Args:
278
- frame: The frame to process.
279
- direction: The direction the frame is traveling.
280
- """
281
- await super().process_frame(frame, direction)
282
-
283
- # Parent does not push LLMMessagesUpdateFrame
284
- if isinstance(frame, LLMMessagesUpdateFrame):
285
- await self.push_frame(AWSNovaSonicMessagesUpdateFrame(context=self._context))
286
-
287
-
288
- class AWSNovaSonicAssistantContextAggregator(OpenAIAssistantContextAggregator):
289
- """Context aggregator for assistant messages in AWS Nova Sonic conversations.
290
-
291
- Provides specialized handling for assistant responses and function calls
292
- in AWS Nova Sonic context, with custom frame processing logic.
293
- """
294
-
295
- async def process_frame(self, frame: Frame, direction: FrameDirection):
296
- """Process frames with Nova Sonic-specific logic.
297
-
298
- Args:
299
- frame: The frame to process.
300
- direction: The direction the frame is traveling.
301
- """
302
- # HACK: For now, disable the context aggregator by making it just pass through all frames
303
- # that the parent handles (except the function call stuff, which we still need).
304
- # For an explanation of this hack, see
305
- # AWSNovaSonicLLMService._report_assistant_response_text_added.
306
- if isinstance(
307
- frame,
308
- (
309
- StartInterruptionFrame,
310
- LLMFullResponseStartFrame,
311
- LLMFullResponseEndFrame,
312
- TextFrame,
313
- LLMMessagesAppendFrame,
314
- LLMMessagesUpdateFrame,
315
- LLMSetToolsFrame,
316
- LLMSetToolChoiceFrame,
317
- UserImageRawFrame,
318
- BotStoppedSpeakingFrame,
319
- ),
320
- ):
321
- await self.push_frame(frame, direction)
322
- else:
323
- await super().process_frame(frame, direction)
324
-
325
- async def handle_function_call_result(self, frame: FunctionCallResultFrame):
326
- """Handle function call results for AWS Nova Sonic.
327
-
328
- Args:
329
- frame: The function call result frame to handle.
330
- """
331
- await super().handle_function_call_result(frame)
332
-
333
- # The standard function callback code path pushes the FunctionCallResultFrame from the LLM
334
- # itself, so we didn't have a chance to add the result to the AWS Nova Sonic server-side
335
- # context. Let's push a special frame to do that.
336
- await self.push_frame(
337
- AWSNovaSonicFunctionCallResultFrame(result_frame=frame), FrameDirection.UPSTREAM
338
- )
339
-
340
-
341
- @dataclass
342
- class AWSNovaSonicContextAggregatorPair:
343
- """Pair of user and assistant context aggregators for AWS Nova Sonic.
344
-
345
- Parameters:
346
- _user: The user context aggregator.
347
- _assistant: The assistant context aggregator.
348
- """
349
-
350
- _user: AWSNovaSonicUserContextAggregator
351
- _assistant: AWSNovaSonicAssistantContextAggregator
352
-
353
- def user(self) -> AWSNovaSonicUserContextAggregator:
354
- """Get the user context aggregator.
355
-
356
- Returns:
357
- The user context aggregator instance.
358
- """
359
- return self._user
360
-
361
- def assistant(self) -> AWSNovaSonicAssistantContextAggregator:
362
- """Get the assistant context aggregator.
17
+ See deprecation warning in pipecat.services.aws.nova_sonic.context for more
18
+ details.
19
+ """
363
20
 
364
- Returns:
365
- The assistant context aggregator instance.
366
- """
367
- return self._assistant
21
+ from pipecat.services.aws.nova_sonic.context import *
@@ -6,20 +6,16 @@
6
6
 
7
7
  """Custom frames for AWS Nova Sonic LLM service."""
8
8
 
9
- from dataclasses import dataclass
10
-
11
- from pipecat.frames.frames import DataFrame, FunctionCallResultFrame
12
-
13
-
14
- @dataclass
15
- class AWSNovaSonicFunctionCallResultFrame(DataFrame):
16
- """Frame containing function call result for AWS Nova Sonic processing.
17
-
18
- This frame wraps a standard function call result frame to enable
19
- AWS Nova Sonic-specific handling and context updates.
20
-
21
- Parameters:
22
- result_frame: The underlying function call result frame.
23
- """
24
-
25
- result_frame: FunctionCallResultFrame
9
+ import warnings
10
+
11
+ from pipecat.services.aws.nova_sonic.frames import *
12
+
13
+ with warnings.catch_warnings():
14
+ warnings.simplefilter("always")
15
+ warnings.warn(
16
+ "Types in pipecat.services.aws_nova_sonic.frames are deprecated. "
17
+ "Please use the equivalent types from "
18
+ "pipecat.services.aws.nova_sonic.frames instead.",
19
+ DeprecationWarning,
20
+ stacklevel=2,
21
+ )
@@ -1,4 +1,3 @@
1
- #
2
1
  # Copyright (c) 2024–2025, Daily
3
2
  #
4
3
  # SPDX-License-Identifier: BSD 2-Clause License
@@ -6,9 +5,12 @@
6
5
 
7
6
  """Azure OpenAI service implementation for the Pipecat AI framework."""
8
7
 
8
+ from typing import Optional
9
+
9
10
  from loguru import logger
10
11
  from openai import AsyncAzureOpenAI
11
12
 
13
+ from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams
12
14
  from pipecat.services.openai.llm import OpenAILLMService
13
15
 
14
16
 
@@ -17,6 +19,16 @@ class AzureLLMService(OpenAILLMService):
17
19
 
18
20
  This service extends OpenAILLMService to connect to Azure's OpenAI endpoint while
19
21
  maintaining full compatibility with OpenAI's interface and functionality.
22
+
23
+
24
+ Args:
25
+ api_key: The API key for accessing Azure OpenAI.
26
+ endpoint: The Azure endpoint URL.
27
+ model: The model identifier to use.
28
+ api_version: Azure API version. Defaults to "2024-09-01-preview".
29
+ reasoning_effort: If provided for reasoning models, sets the effort (e.g. "minimal").
30
+ **kwargs: Additional keyword arguments passed to OpenAILLMService.
31
+
20
32
  """
21
33
 
22
34
  def __init__(
@@ -26,6 +38,7 @@ class AzureLLMService(OpenAILLMService):
26
38
  endpoint: str,
27
39
  model: str,
28
40
  api_version: str = "2024-09-01-preview",
41
+ reasoning_effort: Optional[str] = None,
29
42
  **kwargs,
30
43
  ):
31
44
  """Initialize the Azure LLM service.
@@ -41,6 +54,7 @@ class AzureLLMService(OpenAILLMService):
41
54
  # will call create_client() and we need those values there.
42
55
  self._endpoint = endpoint
43
56
  self._api_version = api_version
57
+ self._reasoning_effort = reasoning_effort
44
58
  super().__init__(api_key=api_key, model=model, **kwargs)
45
59
 
46
60
  def create_client(self, api_key=None, base_url=None, **kwargs):
@@ -62,3 +76,39 @@ class AzureLLMService(OpenAILLMService):
62
76
  api_version=self._api_version,
63
77
  azure_deployment=azure_deployment,
64
78
  )
79
+
80
+ def _is_reasoning_model(self) -> bool:
81
+ """Check if the current model supports reasoning parameters.
82
+
83
+ Based on search results:
84
+ - GPT-5, GPT-5-mini, and GPT-5-nano are reasoning models
85
+ - GPT-5-chat is a standard chat model that doesn't use reasoning by default
86
+
87
+ Returns:
88
+ True if model supports reasoning parameters.
89
+ """
90
+ model_name_lower = self.model_name.lower()
91
+
92
+ # Reasoning-capable models
93
+ reasoning_models = {"gpt-5-nano", "gpt-5", "gpt-5-mini"}
94
+ return model_name_lower in reasoning_models
95
+
96
+ def build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict:
97
+ # include base params
98
+ params = super().build_chat_completion_params(params_from_context)
99
+
100
+ if self._is_reasoning_model():
101
+ # not required for reasoning models
102
+ for k in ("frequency_penalty", "presence_penalty", "temperature", "top_p"):
103
+ if k in params:
104
+ params.pop(k, None)
105
+ if self._reasoning_effort:
106
+ params["reasoning_effort"] = self._reasoning_effort
107
+ seed = self._settings.get("seed")
108
+ if seed is not None:
109
+ params["seed"] = seed
110
+ else:
111
+ # Standard models are fine with the defaults from the base class
112
+ pass
113
+
114
+ return params
File without changes
@@ -0,0 +1,65 @@
1
+ #
2
+ # Copyright (c) 2024–2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ """Azure OpenAI Realtime LLM service implementation."""
8
+
9
+ from loguru import logger
10
+
11
+ from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService
12
+
13
+ try:
14
+ from websockets.asyncio.client import connect as websocket_connect
15
+ except ModuleNotFoundError as e:
16
+ logger.error(f"Exception: {e}")
17
+ logger.error("In order to use Azure Realtime, you need to `pip install pipecat-ai[openai]`.")
18
+ raise Exception(f"Missing module: {e}")
19
+
20
+
21
+ class AzureRealtimeLLMService(OpenAIRealtimeLLMService):
22
+ """Azure OpenAI Realtime LLM service with Azure-specific authentication.
23
+
24
+ Extends the OpenAI Realtime service to work with Azure OpenAI endpoints,
25
+ using Azure's authentication headers and endpoint format. Provides the same
26
+ real-time audio and text communication capabilities as the base OpenAI service.
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ *,
32
+ api_key: str,
33
+ base_url: str,
34
+ **kwargs,
35
+ ):
36
+ """Initialize Azure Realtime LLM service.
37
+
38
+ Args:
39
+ api_key: The API key for the Azure OpenAI service.
40
+ base_url: The full Azure WebSocket endpoint URL including api-version and deployment.
41
+ Example: "wss://my-project.openai.azure.com/openai/realtime?api-version=2024-10-01-preview&deployment=my-realtime-deployment"
42
+ **kwargs: Additional arguments passed to parent OpenAIRealtimeLLMService.
43
+ """
44
+ super().__init__(base_url=base_url, api_key=api_key, **kwargs)
45
+ self.api_key = api_key
46
+ self.base_url = base_url
47
+
48
+ async def _connect(self):
49
+ try:
50
+ if self._websocket:
51
+ # Here we assume that if we have a websocket, we are connected. We
52
+ # handle disconnections in the send/recv code paths.
53
+ return
54
+
55
+ logger.info(f"Connecting to {self.base_url}, api key: {self.api_key}")
56
+ self._websocket = await websocket_connect(
57
+ uri=self.base_url,
58
+ additional_headers={
59
+ "api-key": self.api_key,
60
+ },
61
+ )
62
+ self._receive_task = self.create_task(self._receive_task_handler())
63
+ except Exception as e:
64
+ logger.error(f"{self} initialization error: {e}")
65
+ self._websocket = None
@@ -19,6 +19,7 @@ from pipecat.frames.frames import (
19
19
  CancelFrame,
20
20
  EndFrame,
21
21
  Frame,
22
+ InterimTranscriptionFrame,
22
23
  StartFrame,
23
24
  TranscriptionFrame,
24
25
  )
@@ -184,7 +185,9 @@ class AzureSTTService(STTService):
184
185
  self._apply_phrase_list(self._speech_recognizer)
185
186
 
186
187
  # Attach event handler for recognized speech
188
+ # self._speech_recognizer.recognizing.connect(self._on_handle_recognizing)
187
189
  self._speech_recognizer.recognized.connect(self._on_handle_recognized)
190
+
188
191
  self._speech_recognizer.start_continuous_recognition_async()
189
192
 
190
193
  async def stop(self, frame: EndFrame):
@@ -248,6 +251,18 @@ class AzureSTTService(STTService):
248
251
  )
249
252
  asyncio.run_coroutine_threadsafe(self.push_frame(frame), self.get_event_loop())
250
253
 
254
+ def _on_handle_recognizing(self, event):
255
+ if event.result.reason == ResultReason.RecognizingSpeech and len(event.result.text) > 0:
256
+ language = getattr(event.result, "language", None) or self._settings.get("language")
257
+ frame = InterimTranscriptionFrame(
258
+ event.result.text,
259
+ self._user_id,
260
+ time_now_iso8601(),
261
+ language,
262
+ result=event,
263
+ )
264
+ asyncio.run_coroutine_threadsafe(self.push_frame(frame), self.get_event_loop())
265
+
251
266
  def _apply_phrase_list(self, recognizer: SpeechRecognizer):
252
267
  """Applies the configured vocabulary as a phrase list to the recognizer."""
253
268
  if self._vocab and recognizer: