dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (195) hide show
  1. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
  2. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
  3. pipecat/adapters/base_llm_adapter.py +38 -1
  4. pipecat/adapters/services/anthropic_adapter.py +9 -14
  5. pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
  6. pipecat/adapters/services/bedrock_adapter.py +236 -13
  7. pipecat/adapters/services/gemini_adapter.py +12 -8
  8. pipecat/adapters/services/open_ai_adapter.py +19 -7
  9. pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
  10. pipecat/audio/dtmf/dtmf-0.wav +0 -0
  11. pipecat/audio/dtmf/dtmf-1.wav +0 -0
  12. pipecat/audio/dtmf/dtmf-2.wav +0 -0
  13. pipecat/audio/dtmf/dtmf-3.wav +0 -0
  14. pipecat/audio/dtmf/dtmf-4.wav +0 -0
  15. pipecat/audio/dtmf/dtmf-5.wav +0 -0
  16. pipecat/audio/dtmf/dtmf-6.wav +0 -0
  17. pipecat/audio/dtmf/dtmf-7.wav +0 -0
  18. pipecat/audio/dtmf/dtmf-8.wav +0 -0
  19. pipecat/audio/dtmf/dtmf-9.wav +0 -0
  20. pipecat/audio/dtmf/dtmf-pound.wav +0 -0
  21. pipecat/audio/dtmf/dtmf-star.wav +0 -0
  22. pipecat/audio/filters/krisp_viva_filter.py +193 -0
  23. pipecat/audio/filters/noisereduce_filter.py +15 -0
  24. pipecat/audio/turn/base_turn_analyzer.py +9 -1
  25. pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
  26. pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
  27. pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
  28. pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
  29. pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
  30. pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
  31. pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
  32. pipecat/audio/vad/data/README.md +10 -0
  33. pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
  34. pipecat/audio/vad/silero.py +9 -3
  35. pipecat/audio/vad/vad_analyzer.py +13 -1
  36. pipecat/extensions/voicemail/voicemail_detector.py +5 -5
  37. pipecat/frames/frames.py +277 -86
  38. pipecat/observers/loggers/debug_log_observer.py +3 -3
  39. pipecat/observers/loggers/llm_log_observer.py +7 -3
  40. pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
  41. pipecat/pipeline/runner.py +18 -6
  42. pipecat/pipeline/service_switcher.py +64 -36
  43. pipecat/pipeline/task.py +125 -79
  44. pipecat/pipeline/tts_switcher.py +30 -0
  45. pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
  46. pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
  47. pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
  48. pipecat/processors/aggregators/llm_context.py +40 -2
  49. pipecat/processors/aggregators/llm_response.py +32 -15
  50. pipecat/processors/aggregators/llm_response_universal.py +19 -15
  51. pipecat/processors/aggregators/user_response.py +6 -6
  52. pipecat/processors/aggregators/vision_image_frame.py +24 -2
  53. pipecat/processors/audio/audio_buffer_processor.py +43 -8
  54. pipecat/processors/dtmf_aggregator.py +174 -77
  55. pipecat/processors/filters/stt_mute_filter.py +17 -0
  56. pipecat/processors/frame_processor.py +110 -24
  57. pipecat/processors/frameworks/langchain.py +8 -2
  58. pipecat/processors/frameworks/rtvi.py +210 -68
  59. pipecat/processors/frameworks/strands_agents.py +170 -0
  60. pipecat/processors/logger.py +2 -2
  61. pipecat/processors/transcript_processor.py +26 -5
  62. pipecat/processors/user_idle_processor.py +35 -11
  63. pipecat/runner/daily.py +59 -20
  64. pipecat/runner/run.py +395 -93
  65. pipecat/runner/types.py +6 -4
  66. pipecat/runner/utils.py +51 -10
  67. pipecat/serializers/__init__.py +5 -1
  68. pipecat/serializers/asterisk.py +16 -2
  69. pipecat/serializers/convox.py +41 -4
  70. pipecat/serializers/custom.py +257 -0
  71. pipecat/serializers/exotel.py +5 -5
  72. pipecat/serializers/livekit.py +20 -0
  73. pipecat/serializers/plivo.py +5 -5
  74. pipecat/serializers/protobuf.py +6 -5
  75. pipecat/serializers/telnyx.py +2 -2
  76. pipecat/serializers/twilio.py +43 -23
  77. pipecat/serializers/vi.py +324 -0
  78. pipecat/services/ai_service.py +2 -6
  79. pipecat/services/anthropic/llm.py +2 -25
  80. pipecat/services/assemblyai/models.py +6 -0
  81. pipecat/services/assemblyai/stt.py +13 -5
  82. pipecat/services/asyncai/tts.py +5 -3
  83. pipecat/services/aws/__init__.py +1 -0
  84. pipecat/services/aws/llm.py +147 -105
  85. pipecat/services/aws/nova_sonic/__init__.py +0 -0
  86. pipecat/services/aws/nova_sonic/context.py +436 -0
  87. pipecat/services/aws/nova_sonic/frames.py +25 -0
  88. pipecat/services/aws/nova_sonic/llm.py +1265 -0
  89. pipecat/services/aws/stt.py +3 -3
  90. pipecat/services/aws_nova_sonic/__init__.py +19 -1
  91. pipecat/services/aws_nova_sonic/aws.py +11 -1151
  92. pipecat/services/aws_nova_sonic/context.py +8 -354
  93. pipecat/services/aws_nova_sonic/frames.py +13 -17
  94. pipecat/services/azure/llm.py +51 -1
  95. pipecat/services/azure/realtime/__init__.py +0 -0
  96. pipecat/services/azure/realtime/llm.py +65 -0
  97. pipecat/services/azure/stt.py +15 -0
  98. pipecat/services/cartesia/stt.py +77 -70
  99. pipecat/services/cartesia/tts.py +80 -13
  100. pipecat/services/deepgram/__init__.py +1 -0
  101. pipecat/services/deepgram/flux/__init__.py +0 -0
  102. pipecat/services/deepgram/flux/stt.py +640 -0
  103. pipecat/services/elevenlabs/__init__.py +4 -1
  104. pipecat/services/elevenlabs/stt.py +339 -0
  105. pipecat/services/elevenlabs/tts.py +87 -46
  106. pipecat/services/fish/tts.py +5 -2
  107. pipecat/services/gemini_multimodal_live/events.py +38 -524
  108. pipecat/services/gemini_multimodal_live/file_api.py +23 -173
  109. pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
  110. pipecat/services/gladia/stt.py +56 -72
  111. pipecat/services/google/__init__.py +1 -0
  112. pipecat/services/google/gemini_live/__init__.py +3 -0
  113. pipecat/services/google/gemini_live/file_api.py +189 -0
  114. pipecat/services/google/gemini_live/llm.py +1582 -0
  115. pipecat/services/google/gemini_live/llm_vertex.py +184 -0
  116. pipecat/services/google/llm.py +15 -11
  117. pipecat/services/google/llm_openai.py +3 -3
  118. pipecat/services/google/llm_vertex.py +86 -16
  119. pipecat/services/google/stt.py +4 -0
  120. pipecat/services/google/tts.py +7 -3
  121. pipecat/services/heygen/api.py +2 -0
  122. pipecat/services/heygen/client.py +8 -4
  123. pipecat/services/heygen/video.py +2 -0
  124. pipecat/services/hume/__init__.py +5 -0
  125. pipecat/services/hume/tts.py +220 -0
  126. pipecat/services/inworld/tts.py +6 -6
  127. pipecat/services/llm_service.py +15 -5
  128. pipecat/services/lmnt/tts.py +4 -2
  129. pipecat/services/mcp_service.py +4 -2
  130. pipecat/services/mem0/memory.py +6 -5
  131. pipecat/services/mistral/llm.py +29 -8
  132. pipecat/services/moondream/vision.py +42 -16
  133. pipecat/services/neuphonic/tts.py +5 -2
  134. pipecat/services/openai/__init__.py +1 -0
  135. pipecat/services/openai/base_llm.py +27 -20
  136. pipecat/services/openai/realtime/__init__.py +0 -0
  137. pipecat/services/openai/realtime/context.py +272 -0
  138. pipecat/services/openai/realtime/events.py +1106 -0
  139. pipecat/services/openai/realtime/frames.py +37 -0
  140. pipecat/services/openai/realtime/llm.py +829 -0
  141. pipecat/services/openai/tts.py +49 -10
  142. pipecat/services/openai_realtime/__init__.py +27 -0
  143. pipecat/services/openai_realtime/azure.py +21 -0
  144. pipecat/services/openai_realtime/context.py +21 -0
  145. pipecat/services/openai_realtime/events.py +21 -0
  146. pipecat/services/openai_realtime/frames.py +21 -0
  147. pipecat/services/openai_realtime_beta/azure.py +16 -0
  148. pipecat/services/openai_realtime_beta/openai.py +17 -5
  149. pipecat/services/piper/tts.py +7 -9
  150. pipecat/services/playht/tts.py +34 -4
  151. pipecat/services/rime/tts.py +12 -12
  152. pipecat/services/riva/stt.py +3 -1
  153. pipecat/services/salesforce/__init__.py +9 -0
  154. pipecat/services/salesforce/llm.py +700 -0
  155. pipecat/services/sarvam/__init__.py +7 -0
  156. pipecat/services/sarvam/stt.py +540 -0
  157. pipecat/services/sarvam/tts.py +97 -13
  158. pipecat/services/simli/video.py +2 -2
  159. pipecat/services/speechmatics/stt.py +22 -10
  160. pipecat/services/stt_service.py +47 -0
  161. pipecat/services/tavus/video.py +2 -2
  162. pipecat/services/tts_service.py +75 -22
  163. pipecat/services/vision_service.py +7 -6
  164. pipecat/services/vistaar/llm.py +51 -9
  165. pipecat/tests/utils.py +4 -4
  166. pipecat/transcriptions/language.py +41 -1
  167. pipecat/transports/base_input.py +13 -34
  168. pipecat/transports/base_output.py +140 -104
  169. pipecat/transports/daily/transport.py +199 -26
  170. pipecat/transports/heygen/__init__.py +0 -0
  171. pipecat/transports/heygen/transport.py +381 -0
  172. pipecat/transports/livekit/transport.py +228 -63
  173. pipecat/transports/local/audio.py +6 -1
  174. pipecat/transports/local/tk.py +11 -2
  175. pipecat/transports/network/fastapi_websocket.py +1 -1
  176. pipecat/transports/smallwebrtc/connection.py +103 -19
  177. pipecat/transports/smallwebrtc/request_handler.py +246 -0
  178. pipecat/transports/smallwebrtc/transport.py +65 -23
  179. pipecat/transports/tavus/transport.py +23 -12
  180. pipecat/transports/websocket/client.py +41 -5
  181. pipecat/transports/websocket/fastapi.py +21 -11
  182. pipecat/transports/websocket/server.py +14 -7
  183. pipecat/transports/whatsapp/api.py +8 -0
  184. pipecat/transports/whatsapp/client.py +47 -0
  185. pipecat/utils/base_object.py +54 -22
  186. pipecat/utils/redis.py +58 -0
  187. pipecat/utils/string.py +13 -1
  188. pipecat/utils/tracing/service_decorators.py +21 -21
  189. pipecat/serializers/genesys.py +0 -95
  190. pipecat/services/google/test-google-chirp.py +0 -45
  191. pipecat/services/openai.py +0 -698
  192. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
  193. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
  194. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
  195. /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
@@ -1,17 +1,18 @@
1
+ """DTMF aggregator processor for collecting and flushing DTMF input digits."""
2
+
1
3
  import asyncio
2
4
 
3
5
  from pipecat.frames.frames import (
4
6
  BotSpeakingFrame,
5
7
  CancelFrame,
8
+ DTMFUpdateSettingsFrame,
9
+ EndDTMFCaptureFrame,
6
10
  EndFrame,
7
11
  Frame,
8
12
  InputDTMFFrame,
9
- StartInterruptionFrame,
10
- StartUserIdleProcessorFrame,
11
- StopUserIdleProcessorFrame,
13
+ InterruptionFrame,
14
+ StartDTMFCaptureFrame,
12
15
  TranscriptionFrame,
13
- UserStartedSpeakingFrame,
14
- UserStoppedSpeakingFrame,
15
16
  WaitForDTMFFrame,
16
17
  )
17
18
  from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
@@ -20,10 +21,11 @@ from pipecat.utils.time import time_now_iso8601
20
21
 
21
22
  class DTMFAggregator(FrameProcessor):
22
23
  """Aggregates DTMF frames using idle wait logic.
24
+
23
25
  The aggregator accumulates digits from incoming InputDTMFFrame instances.
24
26
  It flushes the aggregated digits by emitting a TranscriptionFrame when:
25
27
  - No new digit arrives within the specified timeout period,
26
- - The termination digit (“#”) is received, or
28
+ - The termination digit ("#") is received, or
27
29
  - The number of digits aggregated equals the configured 'digits' value.
28
30
  """
29
31
 
@@ -35,7 +37,9 @@ class DTMFAggregator(FrameProcessor):
35
37
  digits: int = None,
36
38
  **kwargs,
37
39
  ):
38
- """:param timeout: Idle timeout in seconds before flushing the aggregated digits.
40
+ """Initialize the DTMF aggregator.
41
+
42
+ :param timeout: Idle timeout in seconds before flushing the aggregated digits.
39
43
  :param digits: Number of digits to aggregate before flushing.
40
44
  """
41
45
  super().__init__(**kwargs)
@@ -43,112 +47,205 @@ class DTMFAggregator(FrameProcessor):
43
47
  self._idle_timeout = timeout
44
48
  self._digits = digits
45
49
  self._digit_event = asyncio.Event()
46
- self._digit_aggregate_task = None
50
+ self._aggregation_task = None
47
51
  self._end_on = end_on if end_on else set()
48
52
  self._reset_on = reset_on if reset_on else set()
49
- self._stopped_idle_processor = False
50
-
51
- async def _start_idle_processor(self):
52
- await self.push_frame(StartUserIdleProcessorFrame(), FrameDirection.UPSTREAM)
53
- self._stopped_idle_processor = False
54
-
55
- async def _stop_idle_processor(self):
56
- await self.push_frame(StopUserIdleProcessorFrame(), FrameDirection.UPSTREAM)
57
- self._stopped_idle_processor = True
53
+ self._dtmf_capture_active = False
58
54
 
59
55
  async def process_frame(self, frame: Frame, direction: FrameDirection) -> None:
56
+ """Process incoming frames and handle DTMF input aggregation."""
60
57
  # Handle DTMF frames.
61
58
  await super().process_frame(frame, direction)
62
- await self.push_frame(frame, direction)
63
- if isinstance(frame, InputDTMFFrame):
64
- # Start the digit aggregation task if it's not running yet.
65
- if self._digit_aggregate_task is None:
66
- self._digit_aggregate_task = self.create_task(self._digit_agg_handler(direction))
67
-
68
- # Append the incoming digit.
69
- if frame.button.value in self._reset_on:
70
- self._aggregation = ""
71
- elif frame.button.value in self._end_on:
72
- await self.flush_aggregation(direction)
73
- self._aggregation = ""
74
- else:
75
- self._digit_event.set()
76
- self._aggregation += frame.button.value
77
-
78
- # Flush if the aggregated digits reach the specified length.
79
- if self._digits and len(self._aggregation) == self._digits:
80
- await self.flush_aggregation(direction)
81
- self._aggregation = ""
82
- if self._stopped_idle_processor:
83
- await self._start_idle_processor()
84
59
 
60
+ if isinstance(frame, InputDTMFFrame):
61
+ # Push the DTMF frame downstream first
62
+ await self.push_frame(frame, direction)
63
+ # Then handle it for proper frame ordering
64
+ await self._handle_dtmf_frame(frame)
85
65
  elif isinstance(frame, (EndFrame, CancelFrame)):
86
66
  # For EndFrame, flush any pending aggregation and stop the digit aggregation task.
87
67
  if self._aggregation:
88
- await self.flush_aggregation(direction)
89
- if self._digit_aggregate_task:
90
- await self._stop_digit_aggregate_task()
68
+ await self.flush_aggregation()
69
+ if self._aggregation_task:
70
+ await self._stop_aggregation_task()
71
+ await self.push_frame(frame, direction)
91
72
  elif isinstance(frame, WaitForDTMFFrame):
92
73
  self.logger.debug("Received WaitForDTMFFrame: Waiting for DTMF input")
93
- if self._digit_aggregate_task is None:
94
- self._digit_aggregate_task = self.create_task(
95
- self._digit_agg_handler(direction, raise_timeout=True)
96
- )
97
- self._digit_event.set()
98
- await self._stop_idle_processor()
99
- elif isinstance(frame, StartInterruptionFrame):
100
- self.logger.debug("Received StartInterruptionFrame: Starting idle processor")
101
- if self._stopped_idle_processor:
102
- await self._start_idle_processor()
74
+ self._create_aggregation_task(raise_timeout=True)
75
+ self._digit_event.set() # Trigger the timeout handler
76
+ await self._start_dtmf_capture()
77
+ await self.push_frame(frame, direction)
78
+ elif isinstance(frame, InterruptionFrame):
79
+ self.logger.debug("Received InterruptionFrame")
103
80
  if self._aggregation:
104
- await self.flush_aggregation(direction)
81
+ await self.flush_aggregation()
82
+ await self._end_dtmf_capture()
83
+ await self.push_frame(frame, direction)
105
84
  elif isinstance(frame, BotSpeakingFrame):
106
- if self._digit_aggregate_task is not None:
85
+ # Signal the aggregation task to continue when bot speaks
86
+ if self._aggregation_task is not None:
107
87
  self._digit_event.set()
88
+ await self.push_frame(frame, direction)
89
+ elif isinstance(frame, DTMFUpdateSettingsFrame):
90
+ await self._update_settings(frame.settings)
91
+ # Don't pass the settings frame downstream
92
+ else:
93
+ # Pass all other frames through
94
+ await self.push_frame(frame, direction)
95
+
96
+ async def _update_settings(self, settings: dict) -> None:
97
+ """Update DTMF aggregator settings dynamically.
108
98
 
109
- async def _digit_agg_handler(self, direction: FrameDirection, raise_timeout=False):
110
- """Idle task that waits for new DTMF activity. If no new digit is received within
111
- the timeout period, the current aggregation is flushed.
99
+ Args:
100
+ settings: Dictionary containing new DTMF settings
101
+ Supported keys: timeout, digits, end, reset
112
102
  """
103
+ settings_changed = False
104
+
105
+ if "timeout" in settings and settings["timeout"] is not None:
106
+ new_timeout = float(settings["timeout"])
107
+ if new_timeout != self._idle_timeout:
108
+ self.logger.debug(
109
+ f"Updating DTMF timeout from {self._idle_timeout} to {new_timeout}"
110
+ )
111
+ self._idle_timeout = new_timeout
112
+ settings_changed = True
113
+
114
+ if "digits" in settings:
115
+ new_digits = settings["digits"]
116
+ if new_digits != self._digits:
117
+ self.logger.debug(f"Updating DTMF digits from {self._digits} to {new_digits}")
118
+ self._digits = new_digits
119
+ settings_changed = True
120
+
121
+ if "end" in settings:
122
+ # Convert single string to set if needed
123
+ end_value = settings["end"]
124
+ if end_value is None:
125
+ new_end_on = set()
126
+ elif isinstance(end_value, str):
127
+ new_end_on = {end_value} if end_value else set()
128
+ else:
129
+ new_end_on = set(end_value)
130
+
131
+ if new_end_on != self._end_on:
132
+ self.logger.debug(f"Updating DTMF end_on from {self._end_on} to {new_end_on}")
133
+ self._end_on = new_end_on
134
+ settings_changed = True
135
+
136
+ if "reset" in settings:
137
+ # Convert single string to set if needed
138
+ reset_value = settings["reset"]
139
+ if reset_value is None:
140
+ new_reset_on = set()
141
+ elif isinstance(reset_value, str):
142
+ new_reset_on = {reset_value} if reset_value else set()
143
+ else:
144
+ new_reset_on = set(reset_value)
145
+
146
+ if new_reset_on != self._reset_on:
147
+ self.logger.debug(f"Updating DTMF reset_on from {self._reset_on} to {new_reset_on}")
148
+ self._reset_on = new_reset_on
149
+ settings_changed = True
150
+
151
+ if settings_changed:
152
+ self.logger.info(f"DTMF settings updated successfully")
153
+
154
+ async def _handle_dtmf_frame(self, frame: InputDTMFFrame):
155
+ """Handle DTMF input frame processing."""
156
+ # Create aggregation task if needed
157
+ if self._aggregation_task is None:
158
+ self._create_aggregation_task()
159
+
160
+ digit_value = frame.button.value
161
+
162
+ # Handle reset digits
163
+ if digit_value in self._reset_on:
164
+ self._aggregation = ""
165
+ return
166
+
167
+ # Handle end digits
168
+ if digit_value in self._end_on:
169
+ if self._aggregation: # Only flush if we have aggregation
170
+ await self.flush_aggregation()
171
+ return
172
+
173
+ # Add digit to aggregation
174
+ self._aggregation += digit_value
175
+
176
+ # Signal the aggregation task that a digit was received
177
+ self._digit_event.set()
178
+
179
+ # Check if we reached the digit limit
180
+ if self._digits and len(self._aggregation) == self._digits:
181
+ await self.flush_aggregation()
182
+
183
+ def _create_aggregation_task(self, raise_timeout: bool = False) -> None:
184
+ """Creates the aggregation task if it hasn't been created yet."""
185
+ if not self._aggregation_task:
186
+ self._aggregation_task = self.create_task(self._aggregation_task_handler(raise_timeout))
187
+
188
+ async def _stop_aggregation_task(self) -> None:
189
+ """Stops the aggregation task."""
190
+ if self._aggregation_task:
191
+ await self.cancel_task(self._aggregation_task)
192
+ self._aggregation_task = None
193
+
194
+ async def _aggregation_task_handler(self, raise_timeout=False):
195
+ """Background task that handles timeout-based flushing."""
113
196
  while True:
114
197
  try:
115
198
  # Wait for a new digit signal with a timeout.
116
199
  await asyncio.wait_for(self._digit_event.wait(), timeout=self._idle_timeout)
117
- except asyncio.TimeoutError:
118
- # No new digit arrived within the timeout period; flush aggregation if non-empty.
119
- await self.flush_aggregation(direction, raise_timeout)
120
- finally:
121
- # Clear the event for the next cycle.
122
200
  self._digit_event.clear()
201
+ except asyncio.TimeoutError:
202
+ # No new digit arrived within the timeout period; flush if needed
203
+ await self.flush_aggregation(raise_timeout=raise_timeout)
123
204
 
124
- async def flush_aggregation(self, direction: FrameDirection, raise_timeout=False):
205
+ async def flush_aggregation(self, *, raise_timeout: bool = False):
125
206
  """Flush the aggregated digits by emitting a TranscriptionFrame downstream."""
126
207
  if self._aggregation:
127
- # Todo: Change to different frame type if we decide to handle it in llm processor separately.
208
+ # Create transcription frame
128
209
  aggregated_frame = TranscriptionFrame(
129
210
  f"User inputted: {self._aggregation}.", "", time_now_iso8601()
130
211
  )
131
212
  aggregated_frame.metadata["push_aggregation"] = True
132
- await self.push_frame(StartInterruptionFrame())
133
- await self.push_frame(aggregated_frame, direction)
213
+
214
+ # Send interruption frame (as per original design)
215
+ await self.push_frame(InterruptionFrame(), FrameDirection.DOWNSTREAM)
216
+
217
+ # Push the transcription frame
218
+ await self.push_frame(aggregated_frame, FrameDirection.DOWNSTREAM)
219
+
220
+ # Reset state
134
221
  self._aggregation = ""
135
- elif raise_timeout and self._stopped_idle_processor:
222
+ await self._end_dtmf_capture()
223
+
224
+ elif raise_timeout and not self._aggregation:
225
+ # Timeout with no aggregation (WaitForDTMFFrame case)
136
226
  transcript_frame = TranscriptionFrame(
137
227
  "User didn't press any digits on the keyboard.", "", time_now_iso8601()
138
228
  )
139
229
  transcript_frame.metadata["push_aggregation"] = True
140
- await self.push_frame(transcript_frame)
141
- if self._stopped_idle_processor:
142
- await self._start_idle_processor()
230
+ await self.push_frame(transcript_frame, FrameDirection.DOWNSTREAM)
231
+ await self._end_dtmf_capture()
232
+
233
+ async def _start_dtmf_capture(self):
234
+ """Signal the start of DTMF capture upstream."""
235
+ if self._dtmf_capture_active:
236
+ return
237
+ await self.push_frame(StartDTMFCaptureFrame(), FrameDirection.UPSTREAM)
238
+ self._dtmf_capture_active = True
143
239
 
144
- async def _stop_digit_aggregate_task(self):
145
- """Cancels the digit aggregation task if it exists."""
146
- if self._digit_aggregate_task:
147
- await self.cancel_task(self._digit_aggregate_task)
148
- self._digit_aggregate_task = None
240
+ async def _end_dtmf_capture(self):
241
+ """Signal the end of DTMF capture upstream."""
242
+ if not self._dtmf_capture_active:
243
+ return
244
+ await self.push_frame(EndDTMFCaptureFrame(), FrameDirection.UPSTREAM)
245
+ self._dtmf_capture_active = False
149
246
 
150
247
  async def cleanup(self) -> None:
151
248
  """Cleans up resources, ensuring that the digit aggregation task is cancelled."""
152
249
  await super().cleanup()
153
- if self._digit_aggregate_task:
154
- await self._stop_digit_aggregate_task()
250
+ if self._aggregation_task:
251
+ await self._stop_aggregation_task()
@@ -25,14 +25,17 @@ from pipecat.frames.frames import (
25
25
  FunctionCallResultFrame,
26
26
  InputAudioRawFrame,
27
27
  InterimTranscriptionFrame,
28
+ InterruptionFrame,
28
29
  StartFrame,
29
30
  StartInterruptionFrame,
31
+ StartDTMFCaptureFrame,
30
32
  STTMuteFrame,
31
33
  TranscriptionFrame,
32
34
  UserStartedSpeakingFrame,
33
35
  UserStoppedSpeakingFrame,
34
36
  VADUserStartedSpeakingFrame,
35
37
  VADUserStoppedSpeakingFrame,
38
+ EndDTMFCaptureFrame,
36
39
  )
37
40
  from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
38
41
 
@@ -58,6 +61,7 @@ class STTMuteStrategy(Enum):
58
61
  FUNCTION_CALL = "function_call"
59
62
  ALWAYS = "always"
60
63
  CUSTOM = "custom"
64
+ DTMF_CAPTURE = "dtmf_capture"
61
65
 
62
66
 
63
67
  @dataclass
@@ -120,6 +124,7 @@ class STTMuteFilter(FrameProcessor):
120
124
  self._function_call_in_progress = False
121
125
  self._is_muted = False # Initialize as unmuted, will set state on StartFrame if needed
122
126
  self._voicemail_detection_enabled = False # Default to False
127
+ self._dtmf_capture_active = False
123
128
 
124
129
  @property
125
130
  def is_muted(self) -> bool:
@@ -165,6 +170,10 @@ class STTMuteFilter(FrameProcessor):
165
170
  if should_mute:
166
171
  return True
167
172
 
173
+ case STTMuteStrategy.DTMF_CAPTURE:
174
+ if self._dtmf_capture_active:
175
+ return True
176
+
168
177
  return False
169
178
 
170
179
  async def process_frame(self, frame: Frame, direction: FrameDirection):
@@ -205,12 +214,20 @@ class STTMuteFilter(FrameProcessor):
205
214
  self._first_speech_handled = True
206
215
  should_mute = await self._should_mute()
207
216
  self.logger.debug(f"BotStoppedSpeaking: should mute={should_mute}")
217
+ elif isinstance(frame, StartDTMFCaptureFrame):
218
+ self._dtmf_capture_active = True
219
+ should_mute = await self._should_mute()
220
+ elif isinstance(frame, EndDTMFCaptureFrame):
221
+ self._dtmf_capture_active = False
222
+ should_mute = await self._should_mute()
208
223
  elif isinstance(frame, STTMuteFrame):
224
+ # TODO: Duplication of frame is actually happening. We get this frame from the downstream and then we again push it downstream. Also we're psuhing is upstream and again push it upstream in _handle_mute_state.
209
225
  should_mute = frame.mute
210
226
 
211
227
  # Then push the original frame
212
228
  # Conditionally include InputAudioRawFrame in suppression tuple based on voicemail_detection_enabled
213
229
  suppression_types = (
230
+ InterruptionFrame,
214
231
  StartInterruptionFrame,
215
232
  VADUserStartedSpeakingFrame,
216
233
  VADUserStoppedSpeakingFrame,
@@ -29,8 +29,9 @@ from pipecat.frames.frames import (
29
29
  FrameProcessorPauseUrgentFrame,
30
30
  FrameProcessorResumeFrame,
31
31
  FrameProcessorResumeUrgentFrame,
32
+ InterruptionFrame,
33
+ InterruptionTaskFrame,
32
34
  StartFrame,
33
- StartInterruptionFrame,
34
35
  SystemFrame,
35
36
  )
36
37
  from pipecat.metrics.metrics import LLMTokenUsage, MetricsData
@@ -141,6 +142,12 @@ class FrameProcessor(BaseObject):
141
142
  task. System frames are also processed in a separate task which guarantees
142
143
  frame priority.
143
144
 
145
+ Event handlers available:
146
+
147
+ - on_before_process_frame: Called before a frame is processed
148
+ - on_after_process_frame: Called after a frame is processed
149
+ - on_before_push_frame: Called before a frame is pushed
150
+ - on_after_push_frame: Called after a frame is pushed
144
151
  """
145
152
 
146
153
  def __init__(
@@ -221,6 +228,20 @@ class FrameProcessor(BaseObject):
221
228
  self.__process_frame_task: Optional[asyncio.Task] = None
222
229
  self.logger = logger # Will later be replaced with a bound logger
223
230
 
231
+ # To interrupt a pipeline, we push an `InterruptionTaskFrame` upstream.
232
+ # Then we wait for the corresponding `InterruptionFrame` to travel from
233
+ # the start of the pipeline back to the processor that sent the
234
+ # `InterruptionTaskFrame`. This wait is handled using the following
235
+ # event.
236
+ self._wait_for_interruption = False
237
+ self._wait_interruption_event = asyncio.Event()
238
+
239
+ # Frame processor events.
240
+ self._register_event_handler("on_before_process_frame", sync=True)
241
+ self._register_event_handler("on_after_process_frame", sync=True)
242
+ self._register_event_handler("on_before_push_frame", sync=True)
243
+ self._register_event_handler("on_after_push_frame", sync=True)
244
+
224
245
  @property
225
246
  def id(self) -> int:
226
247
  """Get the unique identifier for this processor.
@@ -436,9 +457,13 @@ class FrameProcessor(BaseObject):
436
457
  name = f"{self}::{coroutine.cr_code.co_name}"
437
458
  return self.task_manager.create_task(coroutine, name)
438
459
 
439
- async def cancel_task(self, task: asyncio.Task, timeout: Optional[float] = None):
460
+ async def cancel_task(self, task: asyncio.Task, timeout: Optional[float] = 1.0):
440
461
  """Cancel a task managed by this processor.
441
462
 
463
+ A default timeout if 1 second is used in order to avoid potential
464
+ freezes caused by certain libraries that swallow
465
+ `asyncio.CancelledError`.
466
+
442
467
  Args:
443
468
  task: The task to cancel.
444
469
  timeout: Optional timeout for task cancellation.
@@ -544,6 +569,14 @@ class FrameProcessor(BaseObject):
544
569
  if self._cancelling:
545
570
  return
546
571
 
572
+ # If we are waiting for an interruption we will bypass all queued system
573
+ # frames and we will process the frame right away. This is because a
574
+ # previous system frame might be waiting for the interruption frame and
575
+ # it's blocking the input task.
576
+ if self._wait_for_interruption and isinstance(frame, InterruptionFrame):
577
+ await self.__process_frame(frame, direction, callback)
578
+ return
579
+
547
580
  if self._enable_direct_mode:
548
581
  await self.__process_frame(frame, direction, callback)
549
582
  else:
@@ -553,11 +586,15 @@ class FrameProcessor(BaseObject):
553
586
  """Pause processing of queued frames."""
554
587
  self.logger.trace(f"{self}: pausing frame processing")
555
588
  self.__should_block_frames = True
589
+ if self.__process_event:
590
+ self.__process_event.clear()
556
591
 
557
592
  async def pause_processing_system_frames(self):
558
593
  """Pause processing of queued system frames."""
559
- logger.trace(f"{self}: pausing system frame processing")
594
+ self.logger.trace(f"{self}: pausing system frame processing")
560
595
  self.__should_block_system_frames = True
596
+ if self.__input_event:
597
+ self.__input_event.clear()
561
598
 
562
599
  async def resume_processing_frames(self):
563
600
  """Resume processing of queued frames."""
@@ -590,7 +627,7 @@ class FrameProcessor(BaseObject):
590
627
 
591
628
  if isinstance(frame, StartFrame):
592
629
  await self.__start(frame)
593
- elif isinstance(frame, StartInterruptionFrame):
630
+ elif isinstance(frame, InterruptionFrame):
594
631
  await self._start_interruption()
595
632
  await self.stop_all_metrics()
596
633
  elif isinstance(frame, CancelFrame):
@@ -620,8 +657,40 @@ class FrameProcessor(BaseObject):
620
657
  if not self._check_started(frame):
621
658
  return
622
659
 
660
+ await self._call_event_handler("on_before_push_frame", frame)
661
+
623
662
  await self.__internal_push_frame(frame, direction)
624
663
 
664
+ await self._call_event_handler("on_after_push_frame", frame)
665
+
666
+ # If we are waiting for an interruption and we get an interruption, then
667
+ # we can unblock `push_interruption_task_frame_and_wait()`.
668
+ if self._wait_for_interruption and isinstance(frame, InterruptionFrame):
669
+ self._wait_interruption_event.set()
670
+
671
+ async def push_interruption_task_frame_and_wait(self):
672
+ """Push an interruption task frame upstream and wait for the interruption.
673
+
674
+ This function sends an `InterruptionTaskFrame` upstream to the pipeline
675
+ task and waits to receive the corresponding `InterruptionFrame`. When
676
+ the function finishes it is guaranteed that the `InterruptionFrame` has
677
+ been pushed downstream.
678
+ """
679
+ self._wait_for_interruption = True
680
+
681
+ await self.push_frame(InterruptionTaskFrame(), FrameDirection.UPSTREAM)
682
+
683
+ # Wait for an `InterruptionFrame` to come to this processor and be
684
+ # pushed. Take a look at `push_frame()` to see how we first push the
685
+ # `InterruptionFrame` and then we set the event in order to maintain
686
+ # frame ordering.
687
+ await self._wait_interruption_event.wait()
688
+
689
+ # Clean the event.
690
+ self._wait_interruption_event.clear()
691
+
692
+ self._wait_for_interruption = False
693
+
625
694
  async def __start(self, frame: StartFrame):
626
695
  """Handle the start frame to initialize processor state.
627
696
 
@@ -674,22 +743,24 @@ class FrameProcessor(BaseObject):
674
743
  async def _start_interruption(self):
675
744
  """Start handling an interruption by cancelling current tasks."""
676
745
  try:
677
- # Cancel the process task. This will stop processing queued frames.
678
- await self.__cancel_process_task()
746
+ if self._wait_for_interruption:
747
+ # If we get here we know the process task was just waiting for
748
+ # an interruption (push_interruption_task_frame_and_wait()), so
749
+ # we can't cancel the task because it might still need to do
750
+ # more things (e.g. pushing a frame after the
751
+ # interruption). Instead we just drain the queue because this is
752
+ # an interruption.
753
+ self.__reset_process_task()
754
+ else:
755
+ # Cancel and re-create the process task including the queue.
756
+ await self.__cancel_process_task()
757
+ self.__create_process_task()
679
758
  except Exception as e:
680
759
  self.logger.exception(
681
760
  f"Uncaught exception in {self} when handling _start_interruption: {e}"
682
761
  )
683
762
  await self.push_error(ErrorFrame(str(e)))
684
763
 
685
- # Create a new process queue and task.
686
- self.__create_process_task()
687
-
688
- async def _stop_interruption(self):
689
- """Stop handling an interruption."""
690
- # Nothing to do right now.
691
- pass
692
-
693
764
  async def __internal_push_frame(self, frame: Frame, direction: FrameDirection):
694
765
  """Internal method to push frames to adjacent processors.
695
766
 
@@ -741,7 +812,7 @@ class FrameProcessor(BaseObject):
741
812
  True if the processor has been started.
742
813
  """
743
814
  if not self.__started:
744
- logger.error(f"{self} Trying to process {frame} but StartFrame not received yet")
815
+ self.logger.error(f"{self} Trying to process {frame} but StartFrame not received yet")
745
816
  return self.__started
746
817
 
747
818
  def __create_input_task(self):
@@ -774,6 +845,17 @@ class FrameProcessor(BaseObject):
774
845
  self.__process_queue = asyncio.Queue()
775
846
  self.__process_frame_task = self.create_task(self.__process_frame_task_handler())
776
847
 
848
+ def __reset_process_task(self):
849
+ """Reset non-system frame processing task."""
850
+ if self._enable_direct_mode:
851
+ return
852
+
853
+ self.__should_block_frames = False
854
+ self.__process_event = asyncio.Event()
855
+ while not self.__process_queue.empty():
856
+ self.__process_queue.get_nowait()
857
+ self.__process_queue.task_done()
858
+
777
859
  async def __cancel_process_task(self):
778
860
  """Cancel the non-system frame processing task."""
779
861
  if self.__process_frame_task:
@@ -784,13 +866,17 @@ class FrameProcessor(BaseObject):
784
866
  self, frame: Frame, direction: FrameDirection, callback: Optional[FrameCallback]
785
867
  ):
786
868
  try:
869
+ await self._call_event_handler("on_before_process_frame", frame)
870
+
787
871
  # Process the frame.
788
872
  await self.process_frame(frame, direction)
789
873
  # If this frame has an associated callback, call it now.
790
874
  if callback:
791
875
  await callback(self, frame, direction)
876
+
877
+ await self._call_event_handler("on_after_process_frame", frame)
792
878
  except Exception as e:
793
- logger.exception(f"{self}: error processing frame: {e}")
879
+ self.logger.exception(f"{self}: error processing frame: {e}")
794
880
  await self.push_error(ErrorFrame(str(e)))
795
881
 
796
882
  async def __input_frame_task_handler(self):
@@ -801,14 +887,14 @@ class FrameProcessor(BaseObject):
801
887
 
802
888
  """
803
889
  while True:
890
+ (frame, direction, callback) = await self.__input_queue.get()
891
+
804
892
  if self.__should_block_system_frames and self.__input_event:
805
- logger.trace(f"{self}: system frame processing paused")
893
+ self.logger.trace(f"{self}: system frame processing paused")
806
894
  await self.__input_event.wait()
807
895
  self.__input_event.clear()
808
896
  self.__should_block_system_frames = False
809
- logger.trace(f"{self}: system frame processing resumed")
810
-
811
- (frame, direction, callback) = await self.__input_queue.get()
897
+ self.logger.trace(f"{self}: system frame processing resumed")
812
898
 
813
899
  if isinstance(frame, SystemFrame):
814
900
  await self.__process_frame(frame, direction, callback)
@@ -824,14 +910,14 @@ class FrameProcessor(BaseObject):
824
910
  async def __process_frame_task_handler(self):
825
911
  """Handle non-system frames from the process queue."""
826
912
  while True:
913
+ (frame, direction, callback) = await self.__process_queue.get()
914
+
827
915
  if self.__should_block_frames and self.__process_event:
828
- logger.trace(f"{self}: frame processing paused")
916
+ self.logger.trace(f"{self}: frame processing paused")
829
917
  await self.__process_event.wait()
830
918
  self.__process_event.clear()
831
919
  self.__should_block_frames = False
832
- logger.trace(f"{self}: frame processing resumed")
833
-
834
- (frame, direction, callback) = await self.__process_queue.get()
920
+ self.logger.trace(f"{self}: frame processing resumed")
835
921
 
836
922
  await self.__process_frame(frame, direction, callback)
837
923
 
@@ -12,6 +12,7 @@ from loguru import logger
12
12
 
13
13
  from pipecat.frames.frames import (
14
14
  Frame,
15
+ LLMContextFrame,
15
16
  LLMFullResponseEndFrame,
16
17
  LLMFullResponseStartFrame,
17
18
  TextFrame,
@@ -64,11 +65,16 @@ class LangchainProcessor(FrameProcessor):
64
65
  """
65
66
  await super().process_frame(frame, direction)
66
67
 
67
- if isinstance(frame, OpenAILLMContextFrame):
68
+ if isinstance(frame, (LLMContextFrame, OpenAILLMContextFrame)):
68
69
  # Messages are accumulated on the context as a list of messages.
69
70
  # The last one by the human is the one we want to send to the LLM.
70
71
  logger.debug(f"Got transcription frame {frame}")
71
- text: str = frame.context.messages[-1]["content"]
72
+ messages = (
73
+ frame.context.messages
74
+ if isinstance(frame, OpenAILLMContextFrame)
75
+ else frame.context.get_messages()
76
+ )
77
+ text: str = messages[-1]["content"]
72
78
 
73
79
  await self._ainvoke(text.strip())
74
80
  else: