dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (195) hide show
  1. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
  2. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
  3. pipecat/adapters/base_llm_adapter.py +38 -1
  4. pipecat/adapters/services/anthropic_adapter.py +9 -14
  5. pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
  6. pipecat/adapters/services/bedrock_adapter.py +236 -13
  7. pipecat/adapters/services/gemini_adapter.py +12 -8
  8. pipecat/adapters/services/open_ai_adapter.py +19 -7
  9. pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
  10. pipecat/audio/dtmf/dtmf-0.wav +0 -0
  11. pipecat/audio/dtmf/dtmf-1.wav +0 -0
  12. pipecat/audio/dtmf/dtmf-2.wav +0 -0
  13. pipecat/audio/dtmf/dtmf-3.wav +0 -0
  14. pipecat/audio/dtmf/dtmf-4.wav +0 -0
  15. pipecat/audio/dtmf/dtmf-5.wav +0 -0
  16. pipecat/audio/dtmf/dtmf-6.wav +0 -0
  17. pipecat/audio/dtmf/dtmf-7.wav +0 -0
  18. pipecat/audio/dtmf/dtmf-8.wav +0 -0
  19. pipecat/audio/dtmf/dtmf-9.wav +0 -0
  20. pipecat/audio/dtmf/dtmf-pound.wav +0 -0
  21. pipecat/audio/dtmf/dtmf-star.wav +0 -0
  22. pipecat/audio/filters/krisp_viva_filter.py +193 -0
  23. pipecat/audio/filters/noisereduce_filter.py +15 -0
  24. pipecat/audio/turn/base_turn_analyzer.py +9 -1
  25. pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
  26. pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
  27. pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
  28. pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
  29. pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
  30. pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
  31. pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
  32. pipecat/audio/vad/data/README.md +10 -0
  33. pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
  34. pipecat/audio/vad/silero.py +9 -3
  35. pipecat/audio/vad/vad_analyzer.py +13 -1
  36. pipecat/extensions/voicemail/voicemail_detector.py +5 -5
  37. pipecat/frames/frames.py +277 -86
  38. pipecat/observers/loggers/debug_log_observer.py +3 -3
  39. pipecat/observers/loggers/llm_log_observer.py +7 -3
  40. pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
  41. pipecat/pipeline/runner.py +18 -6
  42. pipecat/pipeline/service_switcher.py +64 -36
  43. pipecat/pipeline/task.py +125 -79
  44. pipecat/pipeline/tts_switcher.py +30 -0
  45. pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
  46. pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
  47. pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
  48. pipecat/processors/aggregators/llm_context.py +40 -2
  49. pipecat/processors/aggregators/llm_response.py +32 -15
  50. pipecat/processors/aggregators/llm_response_universal.py +19 -15
  51. pipecat/processors/aggregators/user_response.py +6 -6
  52. pipecat/processors/aggregators/vision_image_frame.py +24 -2
  53. pipecat/processors/audio/audio_buffer_processor.py +43 -8
  54. pipecat/processors/dtmf_aggregator.py +174 -77
  55. pipecat/processors/filters/stt_mute_filter.py +17 -0
  56. pipecat/processors/frame_processor.py +110 -24
  57. pipecat/processors/frameworks/langchain.py +8 -2
  58. pipecat/processors/frameworks/rtvi.py +210 -68
  59. pipecat/processors/frameworks/strands_agents.py +170 -0
  60. pipecat/processors/logger.py +2 -2
  61. pipecat/processors/transcript_processor.py +26 -5
  62. pipecat/processors/user_idle_processor.py +35 -11
  63. pipecat/runner/daily.py +59 -20
  64. pipecat/runner/run.py +395 -93
  65. pipecat/runner/types.py +6 -4
  66. pipecat/runner/utils.py +51 -10
  67. pipecat/serializers/__init__.py +5 -1
  68. pipecat/serializers/asterisk.py +16 -2
  69. pipecat/serializers/convox.py +41 -4
  70. pipecat/serializers/custom.py +257 -0
  71. pipecat/serializers/exotel.py +5 -5
  72. pipecat/serializers/livekit.py +20 -0
  73. pipecat/serializers/plivo.py +5 -5
  74. pipecat/serializers/protobuf.py +6 -5
  75. pipecat/serializers/telnyx.py +2 -2
  76. pipecat/serializers/twilio.py +43 -23
  77. pipecat/serializers/vi.py +324 -0
  78. pipecat/services/ai_service.py +2 -6
  79. pipecat/services/anthropic/llm.py +2 -25
  80. pipecat/services/assemblyai/models.py +6 -0
  81. pipecat/services/assemblyai/stt.py +13 -5
  82. pipecat/services/asyncai/tts.py +5 -3
  83. pipecat/services/aws/__init__.py +1 -0
  84. pipecat/services/aws/llm.py +147 -105
  85. pipecat/services/aws/nova_sonic/__init__.py +0 -0
  86. pipecat/services/aws/nova_sonic/context.py +436 -0
  87. pipecat/services/aws/nova_sonic/frames.py +25 -0
  88. pipecat/services/aws/nova_sonic/llm.py +1265 -0
  89. pipecat/services/aws/stt.py +3 -3
  90. pipecat/services/aws_nova_sonic/__init__.py +19 -1
  91. pipecat/services/aws_nova_sonic/aws.py +11 -1151
  92. pipecat/services/aws_nova_sonic/context.py +8 -354
  93. pipecat/services/aws_nova_sonic/frames.py +13 -17
  94. pipecat/services/azure/llm.py +51 -1
  95. pipecat/services/azure/realtime/__init__.py +0 -0
  96. pipecat/services/azure/realtime/llm.py +65 -0
  97. pipecat/services/azure/stt.py +15 -0
  98. pipecat/services/cartesia/stt.py +77 -70
  99. pipecat/services/cartesia/tts.py +80 -13
  100. pipecat/services/deepgram/__init__.py +1 -0
  101. pipecat/services/deepgram/flux/__init__.py +0 -0
  102. pipecat/services/deepgram/flux/stt.py +640 -0
  103. pipecat/services/elevenlabs/__init__.py +4 -1
  104. pipecat/services/elevenlabs/stt.py +339 -0
  105. pipecat/services/elevenlabs/tts.py +87 -46
  106. pipecat/services/fish/tts.py +5 -2
  107. pipecat/services/gemini_multimodal_live/events.py +38 -524
  108. pipecat/services/gemini_multimodal_live/file_api.py +23 -173
  109. pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
  110. pipecat/services/gladia/stt.py +56 -72
  111. pipecat/services/google/__init__.py +1 -0
  112. pipecat/services/google/gemini_live/__init__.py +3 -0
  113. pipecat/services/google/gemini_live/file_api.py +189 -0
  114. pipecat/services/google/gemini_live/llm.py +1582 -0
  115. pipecat/services/google/gemini_live/llm_vertex.py +184 -0
  116. pipecat/services/google/llm.py +15 -11
  117. pipecat/services/google/llm_openai.py +3 -3
  118. pipecat/services/google/llm_vertex.py +86 -16
  119. pipecat/services/google/stt.py +4 -0
  120. pipecat/services/google/tts.py +7 -3
  121. pipecat/services/heygen/api.py +2 -0
  122. pipecat/services/heygen/client.py +8 -4
  123. pipecat/services/heygen/video.py +2 -0
  124. pipecat/services/hume/__init__.py +5 -0
  125. pipecat/services/hume/tts.py +220 -0
  126. pipecat/services/inworld/tts.py +6 -6
  127. pipecat/services/llm_service.py +15 -5
  128. pipecat/services/lmnt/tts.py +4 -2
  129. pipecat/services/mcp_service.py +4 -2
  130. pipecat/services/mem0/memory.py +6 -5
  131. pipecat/services/mistral/llm.py +29 -8
  132. pipecat/services/moondream/vision.py +42 -16
  133. pipecat/services/neuphonic/tts.py +5 -2
  134. pipecat/services/openai/__init__.py +1 -0
  135. pipecat/services/openai/base_llm.py +27 -20
  136. pipecat/services/openai/realtime/__init__.py +0 -0
  137. pipecat/services/openai/realtime/context.py +272 -0
  138. pipecat/services/openai/realtime/events.py +1106 -0
  139. pipecat/services/openai/realtime/frames.py +37 -0
  140. pipecat/services/openai/realtime/llm.py +829 -0
  141. pipecat/services/openai/tts.py +49 -10
  142. pipecat/services/openai_realtime/__init__.py +27 -0
  143. pipecat/services/openai_realtime/azure.py +21 -0
  144. pipecat/services/openai_realtime/context.py +21 -0
  145. pipecat/services/openai_realtime/events.py +21 -0
  146. pipecat/services/openai_realtime/frames.py +21 -0
  147. pipecat/services/openai_realtime_beta/azure.py +16 -0
  148. pipecat/services/openai_realtime_beta/openai.py +17 -5
  149. pipecat/services/piper/tts.py +7 -9
  150. pipecat/services/playht/tts.py +34 -4
  151. pipecat/services/rime/tts.py +12 -12
  152. pipecat/services/riva/stt.py +3 -1
  153. pipecat/services/salesforce/__init__.py +9 -0
  154. pipecat/services/salesforce/llm.py +700 -0
  155. pipecat/services/sarvam/__init__.py +7 -0
  156. pipecat/services/sarvam/stt.py +540 -0
  157. pipecat/services/sarvam/tts.py +97 -13
  158. pipecat/services/simli/video.py +2 -2
  159. pipecat/services/speechmatics/stt.py +22 -10
  160. pipecat/services/stt_service.py +47 -0
  161. pipecat/services/tavus/video.py +2 -2
  162. pipecat/services/tts_service.py +75 -22
  163. pipecat/services/vision_service.py +7 -6
  164. pipecat/services/vistaar/llm.py +51 -9
  165. pipecat/tests/utils.py +4 -4
  166. pipecat/transcriptions/language.py +41 -1
  167. pipecat/transports/base_input.py +13 -34
  168. pipecat/transports/base_output.py +140 -104
  169. pipecat/transports/daily/transport.py +199 -26
  170. pipecat/transports/heygen/__init__.py +0 -0
  171. pipecat/transports/heygen/transport.py +381 -0
  172. pipecat/transports/livekit/transport.py +228 -63
  173. pipecat/transports/local/audio.py +6 -1
  174. pipecat/transports/local/tk.py +11 -2
  175. pipecat/transports/network/fastapi_websocket.py +1 -1
  176. pipecat/transports/smallwebrtc/connection.py +103 -19
  177. pipecat/transports/smallwebrtc/request_handler.py +246 -0
  178. pipecat/transports/smallwebrtc/transport.py +65 -23
  179. pipecat/transports/tavus/transport.py +23 -12
  180. pipecat/transports/websocket/client.py +41 -5
  181. pipecat/transports/websocket/fastapi.py +21 -11
  182. pipecat/transports/websocket/server.py +14 -7
  183. pipecat/transports/whatsapp/api.py +8 -0
  184. pipecat/transports/whatsapp/client.py +47 -0
  185. pipecat/utils/base_object.py +54 -22
  186. pipecat/utils/redis.py +58 -0
  187. pipecat/utils/string.py +13 -1
  188. pipecat/utils/tracing/service_decorators.py +21 -21
  189. pipecat/serializers/genesys.py +0 -95
  190. pipecat/services/google/test-google-chirp.py +0 -45
  191. pipecat/services/openai.py +0 -698
  192. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
  193. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
  194. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
  195. /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
@@ -0,0 +1,170 @@
1
+ """Strands Agent integration for Pipecat.
2
+
3
+ This module provides integration with Strands Agents for handling conversational AI
4
+ interactions. It supports both single agent and multi-agent graphs.
5
+ """
6
+
7
+ from typing import Optional
8
+
9
+ from loguru import logger
10
+
11
+ from pipecat.frames.frames import (
12
+ Frame,
13
+ LLMContextFrame,
14
+ LLMFullResponseEndFrame,
15
+ LLMFullResponseStartFrame,
16
+ LLMTextFrame,
17
+ )
18
+ from pipecat.metrics.metrics import LLMTokenUsage
19
+ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContextFrame
20
+ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
21
+
22
+ try:
23
+ from strands import Agent
24
+ from strands.multiagent.graph import Graph
25
+ except ModuleNotFoundError as e:
26
+ logger.exception("In order to use Strands Agents, you need to `pip install strands-agents`.")
27
+ raise Exception(f"Missing module: {e}")
28
+
29
+
30
+ class StrandsAgentsProcessor(FrameProcessor):
31
+ """Processor that integrates Strands Agents with Pipecat's frame pipeline.
32
+
33
+ This processor takes LLM message frames, extracts the latest user message,
34
+ and processes it through either a single Strands Agent or a multi-agent Graph.
35
+ The response is streamed back as text frames with appropriate response markers.
36
+
37
+ Supports both single agent streaming and graph-based multi-agent workflows.
38
+ """
39
+
40
+ def __init__(
41
+ self,
42
+ agent: Optional[Agent] = None,
43
+ graph: Optional[Graph] = None,
44
+ graph_exit_node: Optional[str] = None,
45
+ ):
46
+ """Initialize the Strands Agents processor.
47
+
48
+ Args:
49
+ agent: The Strands Agent to use for single-agent processing.
50
+ graph: The Strands multi-agent Graph to use for graph-based processing.
51
+ graph_exit_node: The exit node name when using graph-based processing.
52
+
53
+ Raises:
54
+ AssertionError: If neither agent nor graph is provided, or if graph is
55
+ provided without a graph_exit_node.
56
+ """
57
+ super().__init__()
58
+ self.agent = agent
59
+ self.graph = graph
60
+ self.graph_exit_node = graph_exit_node
61
+
62
+ assert self.agent or self.graph, "Either agent or graph must be provided"
63
+
64
+ if self.graph:
65
+ assert self.graph_exit_node, "graph_exit_node must be provided if graph is provided"
66
+
67
+ async def process_frame(self, frame: Frame, direction: FrameDirection):
68
+ """Process incoming frames and handle LLM message frames.
69
+
70
+ Args:
71
+ frame: The incoming frame to process.
72
+ direction: The direction of frame flow in the pipeline.
73
+ """
74
+ await super().process_frame(frame, direction)
75
+ if isinstance(frame, (LLMContextFrame, OpenAILLMContextFrame)):
76
+ messages = frame.context.get_messages()
77
+ if messages:
78
+ last_message = messages[-1]
79
+ await self._ainvoke(str(last_message["content"]).strip())
80
+ else:
81
+ await self.push_frame(frame, direction)
82
+
83
+ async def _ainvoke(self, text: str):
84
+ """Invoke the Strands agent with the provided text and stream results as Pipecat frames.
85
+
86
+ Args:
87
+ text: The user input text to process through the agent or graph.
88
+ """
89
+ logger.debug(f"Invoking Strands agent with: {text}")
90
+ ttfb_tracking = True
91
+ try:
92
+ await self.push_frame(LLMFullResponseStartFrame())
93
+ await self.start_processing_metrics()
94
+ await self.start_ttfb_metrics()
95
+
96
+ if self.graph:
97
+ # Graph does not stream; await full result then emit assistant text
98
+ graph_result = await self.graph.invoke_async(text)
99
+ if ttfb_tracking:
100
+ await self.stop_ttfb_metrics()
101
+ ttfb_tracking = False
102
+ try:
103
+ node_result = graph_result.results[self.graph_exit_node]
104
+ logger.debug(f"Node result: {node_result}")
105
+ for agent_result in node_result.get_agent_results():
106
+ # Push to TTS service
107
+ message = getattr(agent_result, "message", None)
108
+ if isinstance(message, dict) and "content" in message:
109
+ for block in message["content"]:
110
+ if isinstance(block, dict) and "text" in block:
111
+ await self.push_frame(LLMTextFrame(str(block["text"])))
112
+ # Update usage metrics
113
+ await self._report_usage_metrics(
114
+ agent_result.metrics.accumulated_usage.get("inputTokens", 0),
115
+ agent_result.metrics.accumulated_usage.get("outputTokens", 0),
116
+ agent_result.metrics.accumulated_usage.get("totalTokens", 0),
117
+ )
118
+ except Exception as parse_err:
119
+ logger.warning(f"Failed to extract messages from GraphResult: {parse_err}")
120
+ else:
121
+ # Agent supports streaming events via async iterator
122
+ async for event in self.agent.stream_async(text):
123
+ # Push to TTS service
124
+ if isinstance(event, dict) and "data" in event:
125
+ await self.push_frame(LLMTextFrame(str(event["data"])))
126
+ if ttfb_tracking:
127
+ await self.stop_ttfb_metrics()
128
+ ttfb_tracking = False
129
+
130
+ # Update usage metrics
131
+ if (
132
+ isinstance(event, dict)
133
+ and "event" in event
134
+ and "metadata" in event["event"]
135
+ ):
136
+ if "usage" in event["event"]["metadata"]:
137
+ usage = event["event"]["metadata"]["usage"]
138
+ await self._report_usage_metrics(
139
+ usage.get("inputTokens", 0),
140
+ usage.get("outputTokens", 0),
141
+ usage.get("totalTokens", 0),
142
+ )
143
+ except GeneratorExit:
144
+ logger.warning(f"{self} generator was closed prematurely")
145
+ except Exception as e:
146
+ logger.exception(f"{self} an unknown error occurred: {e}")
147
+ finally:
148
+ if ttfb_tracking:
149
+ await self.stop_ttfb_metrics()
150
+ ttfb_tracking = False
151
+ await self.stop_processing_metrics()
152
+ await self.push_frame(LLMFullResponseEndFrame())
153
+
154
+ def can_generate_metrics(self) -> bool:
155
+ """Check if this service can generate performance metrics.
156
+
157
+ Returns:
158
+ True as this service supports metrics generation.
159
+ """
160
+ return True
161
+
162
+ async def _report_usage_metrics(
163
+ self, prompt_tokens: int, completion_tokens: int, total_tokens: int
164
+ ):
165
+ tokens = LLMTokenUsage(
166
+ prompt_tokens=prompt_tokens,
167
+ completion_tokens=completion_tokens,
168
+ total_tokens=total_tokens,
169
+ )
170
+ await self.start_llm_usage_metrics(tokens)
@@ -15,7 +15,7 @@ from pipecat.frames.frames import (
15
15
  Frame,
16
16
  InputAudioRawFrame,
17
17
  OutputAudioRawFrame,
18
- TransportMessageFrame,
18
+ UserSpeakingFrame,
19
19
  )
20
20
  from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
21
21
 
@@ -36,9 +36,9 @@ class FrameLogger(FrameProcessor):
36
36
  color: Optional[str] = None,
37
37
  ignored_frame_types: Tuple[Type[Frame], ...] = (
38
38
  BotSpeakingFrame,
39
+ UserSpeakingFrame,
39
40
  InputAudioRawFrame,
40
41
  OutputAudioRawFrame,
41
- TransportMessageFrame,
42
42
  ),
43
43
  ):
44
44
  """Initialize the frame logger.
@@ -19,7 +19,8 @@ from pipecat.frames.frames import (
19
19
  CancelFrame,
20
20
  EndFrame,
21
21
  Frame,
22
- StartInterruptionFrame,
22
+ InterruptionFrame,
23
+ TranscriptDropFrame,
23
24
  TranscriptionFrame,
24
25
  TranscriptionMessage,
25
26
  TranscriptionUpdateFrame,
@@ -44,6 +45,7 @@ class BaseTranscriptProcessor(FrameProcessor):
44
45
  super().__init__(**kwargs)
45
46
  self._processed_messages: List[TranscriptionMessage] = []
46
47
  self._register_event_handler("on_transcript_update")
48
+ self._register_event_handler("on_transcript_drop")
47
49
 
48
50
  async def _emit_update(self, messages: List[TranscriptionMessage]):
49
51
  """Emit transcript updates for new messages.
@@ -57,6 +59,19 @@ class BaseTranscriptProcessor(FrameProcessor):
57
59
  await self._call_event_handler("on_transcript_update", update_frame)
58
60
  await self.push_frame(update_frame)
59
61
 
62
+ async def _handle_transcript_drop(self, frame: TranscriptDropFrame):
63
+ """Handle transcript drop notifications by removing stored messages."""
64
+ if not frame.transcript_ids:
65
+ return
66
+
67
+ await self._call_event_handler("on_transcript_drop", frame)
68
+
69
+ drop_ids = set(frame.transcript_ids)
70
+ if drop_ids:
71
+ self._processed_messages = [
72
+ msg for msg in self._processed_messages if msg.message_id not in drop_ids
73
+ ]
74
+
60
75
 
61
76
  class UserTranscriptProcessor(BaseTranscriptProcessor):
62
77
  """Processes user transcription frames into timestamped conversation messages."""
@@ -72,9 +87,15 @@ class UserTranscriptProcessor(BaseTranscriptProcessor):
72
87
 
73
88
  if isinstance(frame, TranscriptionFrame):
74
89
  message = TranscriptionMessage(
75
- role="user", user_id=frame.user_id, content=frame.text, timestamp=frame.timestamp
90
+ role="user",
91
+ user_id=frame.user_id,
92
+ content=frame.text,
93
+ timestamp=frame.timestamp,
94
+ message_id=frame.id,
76
95
  )
77
96
  await self._emit_update([message])
97
+ elif isinstance(frame, TranscriptDropFrame):
98
+ await self._handle_transcript_drop(frame)
78
99
 
79
100
  await self.push_frame(frame, direction)
80
101
 
@@ -86,7 +107,7 @@ class AssistantTranscriptProcessor(BaseTranscriptProcessor):
86
107
  transcript messages. Utterances are completed when:
87
108
 
88
109
  - The bot stops speaking (BotStoppedSpeakingFrame)
89
- - The bot is interrupted (StartInterruptionFrame)
110
+ - The bot is interrupted (InterruptionFrame)
90
111
  - The pipeline ends (EndFrame)
91
112
  """
92
113
 
@@ -185,7 +206,7 @@ class AssistantTranscriptProcessor(BaseTranscriptProcessor):
185
206
 
186
207
  - TTSTextFrame: Aggregates text for current utterance
187
208
  - BotStoppedSpeakingFrame: Completes current utterance
188
- - StartInterruptionFrame: Completes current utterance due to interruption
209
+ - InterruptionFrame: Completes current utterance due to interruption
189
210
  - EndFrame: Completes current utterance at pipeline end
190
211
  - CancelFrame: Completes current utterance due to cancellation
191
212
 
@@ -195,7 +216,7 @@ class AssistantTranscriptProcessor(BaseTranscriptProcessor):
195
216
  """
196
217
  await super().process_frame(frame, direction)
197
218
 
198
- if isinstance(frame, (StartInterruptionFrame, CancelFrame)):
219
+ if isinstance(frame, (InterruptionFrame, CancelFrame)):
199
220
  # Push frame first otherwise our emitted transcription update frame
200
221
  # might get cleaned up.
201
222
  await self.push_frame(frame, direction)
@@ -15,17 +15,18 @@ from loguru import logger
15
15
  from pipecat.frames.frames import (
16
16
  BotSpeakingFrame,
17
17
  CancelFrame,
18
+ EndDTMFCaptureFrame,
18
19
  EndFrame,
19
20
  Frame,
20
- FunctionCallCancelFrame,
21
21
  FunctionCallInProgressFrame,
22
22
  FunctionCallResultFrame,
23
23
  InputDTMFFrame,
24
+ LLMFullResponseStartFrame,
25
+ LLMFullResponseEndFrame,
26
+ StartDTMFCaptureFrame,
27
+ StartFrame,
24
28
  StartUserIdleProcessorFrame,
25
29
  StopUserIdleProcessorFrame,
26
- FunctionCallInProgressFrame,
27
- FunctionCallResultFrame,
28
- StartFrame,
29
30
  UserStartedSpeakingFrame,
30
31
  UserStoppedSpeakingFrame,
31
32
  )
@@ -83,9 +84,12 @@ class UserIdleProcessor(FrameProcessor):
83
84
  self._timeout = timeout
84
85
  self._retry_count = 0
85
86
  self._interrupted = False
87
+ self._function_call_active = False
88
+ self._dtmf_capture_active = False
86
89
  self._conversation_started = False
87
90
  self._idle_task = None
88
91
  self._idle_event = asyncio.Event()
92
+ self._llm_in_progress = False
89
93
 
90
94
  def _wrap_callback(
91
95
  self,
@@ -180,10 +184,20 @@ class UserIdleProcessor(FrameProcessor):
180
184
  self._idle_event.set()
181
185
  elif isinstance(frame, FunctionCallInProgressFrame):
182
186
  # Function calls can take longer than the timeout, so we want to prevent idle callbacks
187
+ self._function_call_active = True
183
188
  self._interrupted = True
184
189
  self._idle_event.set()
185
190
  elif isinstance(frame, FunctionCallResultFrame):
186
- self._interrupted = False
191
+ self._function_call_active = False
192
+ self._interrupted = self._dtmf_capture_active
193
+ self._idle_event.set()
194
+ elif isinstance(frame, StartDTMFCaptureFrame):
195
+ self._dtmf_capture_active = True
196
+ self._interrupted = True
197
+ self._idle_event.set()
198
+ elif isinstance(frame, EndDTMFCaptureFrame):
199
+ self._dtmf_capture_active = False
200
+ self._interrupted = self._function_call_active
187
201
  self._idle_event.set()
188
202
  elif isinstance(frame, StartUserIdleProcessorFrame):
189
203
  if not self._idle_task:
@@ -192,6 +206,18 @@ class UserIdleProcessor(FrameProcessor):
192
206
  elif isinstance(frame, StopUserIdleProcessorFrame):
193
207
  self.logger.debug("Stopping Idle Processor")
194
208
  await self._stop()
209
+ elif isinstance(frame, LLMFullResponseStartFrame):
210
+ self.logger.debug(
211
+ "LLM FullResponseStartFrame received, making llm_in_progress to True"
212
+ )
213
+ self._idle_event.set()
214
+ self._llm_in_progress = True
215
+ elif isinstance(frame, LLMFullResponseEndFrame):
216
+ self.logger.debug(
217
+ "LLM FullResponseEndFrame received, making llm_in_progress to False"
218
+ )
219
+ self._idle_event.set()
220
+ self._llm_in_progress = False
195
221
 
196
222
  async def cleanup(self) -> None:
197
223
  """Cleans up resources when processor is shutting down."""
@@ -206,15 +232,13 @@ class UserIdleProcessor(FrameProcessor):
206
232
 
207
233
  Runs in a loop until cancelled or callback indicates completion.
208
234
  """
209
- while True:
235
+ running = True
236
+ while running:
210
237
  try:
211
238
  await asyncio.wait_for(self._idle_event.wait(), timeout=self._timeout)
212
239
  except asyncio.TimeoutError:
213
- if not self._interrupted:
240
+ if not self._interrupted and not self._llm_in_progress:
214
241
  self._retry_count += 1
215
- should_continue = await self._callback(self, self._retry_count)
216
- if not should_continue:
217
- await self._stop()
218
- break
242
+ running = await self._callback(self, self._retry_count)
219
243
  finally:
220
244
  self._idle_event.clear()
pipecat/runner/daily.py CHANGED
@@ -76,12 +76,14 @@ class DailyRoomConfig(BaseModel):
76
76
  async def configure(
77
77
  aiohttp_session: aiohttp.ClientSession,
78
78
  *,
79
+ api_key: Optional[str] = None,
79
80
  room_exp_duration: Optional[float] = 2.0,
80
81
  token_exp_duration: Optional[float] = 2.0,
81
82
  sip_caller_phone: Optional[str] = None,
82
83
  sip_enable_video: Optional[bool] = False,
83
84
  sip_num_endpoints: Optional[int] = 1,
84
85
  sip_codecs: Optional[Dict[str, List[str]]] = None,
86
+ room_properties: Optional[DailyRoomProperties] = None,
85
87
  ) -> DailyRoomConfig:
86
88
  """Configure Daily room URL and token with optional SIP capabilities.
87
89
 
@@ -91,6 +93,7 @@ async def configure(
91
93
 
92
94
  Args:
93
95
  aiohttp_session: HTTP session for making API requests.
96
+ api_key: Daily API key.
94
97
  room_exp_duration: Room expiration time in hours.
95
98
  token_exp_duration: Token expiration time in hours.
96
99
  sip_caller_phone: Phone number or identifier for SIP display name.
@@ -99,6 +102,10 @@ async def configure(
99
102
  sip_num_endpoints: Number of allowed SIP endpoints.
100
103
  sip_codecs: Codecs to support for audio and video. If None, uses Daily defaults.
101
104
  Example: {"audio": ["OPUS"], "video": ["H264"]}
105
+ room_properties: Optional DailyRoomProperties to use instead of building from
106
+ individual parameters. When provided, this overrides room_exp_duration and
107
+ SIP-related parameters. If not provided, properties are built from the
108
+ individual parameters as before.
102
109
 
103
110
  Returns:
104
111
  DailyRoomConfig: Object with room_url, token, and optional sip_endpoint.
@@ -115,18 +122,48 @@ async def configure(
115
122
  # SIP-enabled room
116
123
  sip_config = await configure(session, sip_caller_phone="+15551234567")
117
124
  print(f"SIP endpoint: {sip_config.sip_endpoint}")
125
+
126
+ # Custom room properties with recording enabled
127
+ custom_props = DailyRoomProperties(
128
+ enable_recording="cloud",
129
+ max_participants=2,
130
+ )
131
+ config = await configure(session, room_properties=custom_props)
118
132
  """
119
133
  # Check for required API key
120
- api_key = os.getenv("DAILY_API_KEY")
134
+ api_key = api_key or os.getenv("DAILY_API_KEY")
121
135
  if not api_key:
122
136
  raise Exception(
123
137
  "DAILY_API_KEY environment variable is required. "
124
138
  "Get your API key from https://dashboard.daily.co/developers"
125
139
  )
126
140
 
141
+ # Warn if both room_properties and individual parameters are provided
142
+ if room_properties is not None:
143
+ individual_params_provided = any(
144
+ [
145
+ room_exp_duration != 2.0,
146
+ token_exp_duration != 2.0,
147
+ sip_caller_phone is not None,
148
+ sip_enable_video is not False,
149
+ sip_num_endpoints != 1,
150
+ sip_codecs is not None,
151
+ ]
152
+ )
153
+ if individual_params_provided:
154
+ logger.warning(
155
+ "Both room_properties and individual parameters (room_exp_duration, token_exp_duration, "
156
+ "sip_*) were provided. The room_properties will be used and individual parameters "
157
+ "will be ignored."
158
+ )
159
+
127
160
  # Determine if SIP mode is enabled
128
161
  sip_enabled = sip_caller_phone is not None
129
162
 
163
+ # If room_properties is provided, check if it has SIP configuration
164
+ if room_properties and room_properties.sip:
165
+ sip_enabled = True
166
+
130
167
  daily_rest_helper = DailyRESTHelper(
131
168
  daily_api_key=api_key,
132
169
  daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
@@ -150,27 +187,29 @@ async def configure(
150
187
  room_name = f"{room_prefix}-{uuid.uuid4().hex[:8]}"
151
188
  logger.info(f"Creating new Daily room: {room_name}")
152
189
 
153
- # Calculate expiration time
154
- expiration_time = time.time() + (room_exp_duration * 60 * 60)
190
+ # Use provided room_properties or build from parameters
191
+ if room_properties is None:
192
+ # Calculate expiration time
193
+ expiration_time = time.time() + (room_exp_duration * 60 * 60)
155
194
 
156
- # Create room properties
157
- room_properties = DailyRoomProperties(
158
- exp=expiration_time,
159
- eject_at_room_exp=True,
160
- )
161
-
162
- # Add SIP configuration if enabled
163
- if sip_enabled:
164
- sip_params = DailyRoomSipParams(
165
- display_name=sip_caller_phone,
166
- video=sip_enable_video,
167
- sip_mode="dial-in",
168
- num_endpoints=sip_num_endpoints,
169
- codecs=sip_codecs,
195
+ # Create room properties
196
+ room_properties = DailyRoomProperties(
197
+ exp=expiration_time,
198
+ eject_at_room_exp=True,
170
199
  )
171
- room_properties.sip = sip_params
172
- room_properties.enable_dialout = True # Enable outbound calls if needed
173
- room_properties.start_video_off = not sip_enable_video # Voice-only by default
200
+
201
+ # Add SIP configuration if enabled
202
+ if sip_enabled:
203
+ sip_params = DailyRoomSipParams(
204
+ display_name=sip_caller_phone,
205
+ video=sip_enable_video,
206
+ sip_mode="dial-in",
207
+ num_endpoints=sip_num_endpoints,
208
+ codecs=sip_codecs,
209
+ )
210
+ room_properties.sip = sip_params
211
+ room_properties.enable_dialout = True # Enable outbound calls if needed
212
+ room_properties.start_video_off = not sip_enable_video # Voice-only by default
174
213
 
175
214
  # Create room parameters
176
215
  room_params = DailyRoomParams(name=room_name, properties=room_properties)