dv-pipecat-ai 0.0.82.dev884__py3-none-any.whl → 0.0.85.dev5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,15 +3,14 @@ import asyncio
3
3
  from pipecat.frames.frames import (
4
4
  BotSpeakingFrame,
5
5
  CancelFrame,
6
+ DTMFUpdateSettingsFrame,
7
+ EndDTMFCaptureFrame,
6
8
  EndFrame,
7
9
  Frame,
8
10
  InputDTMFFrame,
11
+ StartDTMFCaptureFrame,
9
12
  StartInterruptionFrame,
10
- StartUserIdleProcessorFrame,
11
- StopUserIdleProcessorFrame,
12
13
  TranscriptionFrame,
13
- UserStartedSpeakingFrame,
14
- UserStoppedSpeakingFrame,
15
14
  WaitForDTMFFrame,
16
15
  )
17
16
  from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
@@ -43,112 +42,214 @@ class DTMFAggregator(FrameProcessor):
43
42
  self._idle_timeout = timeout
44
43
  self._digits = digits
45
44
  self._digit_event = asyncio.Event()
46
- self._digit_aggregate_task = None
45
+ self._aggregation_task = None
47
46
  self._end_on = end_on if end_on else set()
48
47
  self._reset_on = reset_on if reset_on else set()
49
- self._stopped_idle_processor = False
50
-
51
- async def _start_idle_processor(self):
52
- await self.push_frame(StartUserIdleProcessorFrame(), FrameDirection.UPSTREAM)
53
- self._stopped_idle_processor = False
54
-
55
- async def _stop_idle_processor(self):
56
- await self.push_frame(StopUserIdleProcessorFrame(), FrameDirection.UPSTREAM)
57
- self._stopped_idle_processor = True
48
+ self._dtmf_capture_active = False
58
49
 
59
50
  async def process_frame(self, frame: Frame, direction: FrameDirection) -> None:
60
51
  # Handle DTMF frames.
61
52
  await super().process_frame(frame, direction)
62
- await self.push_frame(frame, direction)
63
- if isinstance(frame, InputDTMFFrame):
64
- # Start the digit aggregation task if it's not running yet.
65
- if self._digit_aggregate_task is None:
66
- self._digit_aggregate_task = self.create_task(self._digit_agg_handler(direction))
67
-
68
- # Append the incoming digit.
69
- if frame.button.value in self._reset_on:
70
- self._aggregation = ""
71
- elif frame.button.value in self._end_on:
72
- await self.flush_aggregation(direction)
73
- self._aggregation = ""
74
- else:
75
- self._digit_event.set()
76
- self._aggregation += frame.button.value
77
-
78
- # Flush if the aggregated digits reach the specified length.
79
- if self._digits and len(self._aggregation) == self._digits:
80
- await self.flush_aggregation(direction)
81
- self._aggregation = ""
82
- if self._stopped_idle_processor:
83
- await self._start_idle_processor()
84
53
 
54
+ if isinstance(frame, InputDTMFFrame):
55
+ # Push the DTMF frame downstream first
56
+ await self.push_frame(frame, direction)
57
+ # Then handle it for proper frame ordering
58
+ await self._handle_dtmf_frame(frame)
85
59
  elif isinstance(frame, (EndFrame, CancelFrame)):
86
60
  # For EndFrame, flush any pending aggregation and stop the digit aggregation task.
87
61
  if self._aggregation:
88
- await self.flush_aggregation(direction)
89
- if self._digit_aggregate_task:
90
- await self._stop_digit_aggregate_task()
62
+ await self.flush_aggregation()
63
+ if self._aggregation_task:
64
+ await self._stop_aggregation_task()
65
+ await self.push_frame(frame, direction)
91
66
  elif isinstance(frame, WaitForDTMFFrame):
92
67
  self.logger.debug("Received WaitForDTMFFrame: Waiting for DTMF input")
93
- if self._digit_aggregate_task is None:
94
- self._digit_aggregate_task = self.create_task(
95
- self._digit_agg_handler(direction, raise_timeout=True)
96
- )
97
- self._digit_event.set()
98
- await self._stop_idle_processor()
68
+ self._create_aggregation_task(raise_timeout=True)
69
+ self._digit_event.set() # Trigger the timeout handler
70
+ await self._start_dtmf_capture()
71
+ await self.push_frame(frame, direction)
99
72
  elif isinstance(frame, StartInterruptionFrame):
100
- self.logger.debug("Received StartInterruptionFrame: Starting idle processor")
101
- if self._stopped_idle_processor:
102
- await self._start_idle_processor()
73
+ self.logger.debug("Received StartInterruptionFrame")
103
74
  if self._aggregation:
104
- await self.flush_aggregation(direction)
75
+ await self.flush_aggregation()
76
+ await self._end_dtmf_capture()
77
+ await self.push_frame(frame, direction)
105
78
  elif isinstance(frame, BotSpeakingFrame):
106
- if self._digit_aggregate_task is not None:
79
+ # Signal the aggregation task to continue when bot speaks
80
+ if self._aggregation_task is not None:
107
81
  self._digit_event.set()
82
+ await self.push_frame(frame, direction)
83
+ elif isinstance(frame, DTMFUpdateSettingsFrame):
84
+ await self._update_settings(frame.settings)
85
+ # Don't pass the settings frame downstream
86
+ else:
87
+ # Pass all other frames through
88
+ await self.push_frame(frame, direction)
108
89
 
109
- async def _digit_agg_handler(self, direction: FrameDirection, raise_timeout=False):
110
- """Idle task that waits for new DTMF activity. If no new digit is received within
111
- the timeout period, the current aggregation is flushed.
90
+ async def _update_settings(self, settings: dict) -> None:
91
+ """Update DTMF aggregator settings dynamically.
92
+
93
+ Args:
94
+ settings: Dictionary containing new DTMF settings
95
+ Supported keys: timeout, digits, end, reset
112
96
  """
97
+ settings_changed = False
98
+
99
+ if "timeout" in settings and settings["timeout"] is not None:
100
+ new_timeout = float(settings["timeout"])
101
+ if new_timeout != self._idle_timeout:
102
+ self.logger.debug(
103
+ f"Updating DTMF timeout from {self._idle_timeout} to {new_timeout}"
104
+ )
105
+ self._idle_timeout = new_timeout
106
+ settings_changed = True
107
+
108
+ if "digits" in settings:
109
+ new_digits = settings["digits"]
110
+ if new_digits != self._digits:
111
+ self.logger.debug(
112
+ f"Updating DTMF digits from {self._digits} to {new_digits}"
113
+ )
114
+ self._digits = new_digits
115
+ settings_changed = True
116
+
117
+ if "end" in settings:
118
+ # Convert single string to set if needed
119
+ end_value = settings["end"]
120
+ if end_value is None:
121
+ new_end_on = set()
122
+ elif isinstance(end_value, str):
123
+ new_end_on = {end_value} if end_value else set()
124
+ else:
125
+ new_end_on = set(end_value)
126
+
127
+ if new_end_on != self._end_on:
128
+ self.logger.debug(
129
+ f"Updating DTMF end_on from {self._end_on} to {new_end_on}"
130
+ )
131
+ self._end_on = new_end_on
132
+ settings_changed = True
133
+
134
+ if "reset" in settings:
135
+ # Convert single string to set if needed
136
+ reset_value = settings["reset"]
137
+ if reset_value is None:
138
+ new_reset_on = set()
139
+ elif isinstance(reset_value, str):
140
+ new_reset_on = {reset_value} if reset_value else set()
141
+ else:
142
+ new_reset_on = set(reset_value)
143
+
144
+ if new_reset_on != self._reset_on:
145
+ self.logger.debug(
146
+ f"Updating DTMF reset_on from {self._reset_on} to {new_reset_on}"
147
+ )
148
+ self._reset_on = new_reset_on
149
+ settings_changed = True
150
+
151
+ if settings_changed:
152
+ self.logger.info(f"DTMF settings updated successfully")
153
+
154
+ async def _handle_dtmf_frame(self, frame: InputDTMFFrame):
155
+ """Handle DTMF input frame processing."""
156
+ # Create aggregation task if needed
157
+ if self._aggregation_task is None:
158
+ self._create_aggregation_task()
159
+
160
+ digit_value = frame.button.value
161
+
162
+ # Handle reset digits
163
+ if digit_value in self._reset_on:
164
+ self._aggregation = ""
165
+ return
166
+
167
+ # Handle end digits
168
+ if digit_value in self._end_on:
169
+ if self._aggregation: # Only flush if we have aggregation
170
+ await self.flush_aggregation()
171
+ return
172
+
173
+ # Add digit to aggregation
174
+ self._aggregation += digit_value
175
+
176
+ # Signal the aggregation task that a digit was received
177
+ self._digit_event.set()
178
+
179
+ # Check if we reached the digit limit
180
+ if self._digits and len(self._aggregation) == self._digits:
181
+ await self.flush_aggregation()
182
+
183
+ def _create_aggregation_task(self, raise_timeout: bool = False) -> None:
184
+ """Creates the aggregation task if it hasn't been created yet."""
185
+ if not self._aggregation_task:
186
+ self._aggregation_task = self.create_task(
187
+ self._aggregation_task_handler(raise_timeout)
188
+ )
189
+
190
+ async def _stop_aggregation_task(self) -> None:
191
+ """Stops the aggregation task."""
192
+ if self._aggregation_task:
193
+ await self.cancel_task(self._aggregation_task)
194
+ self._aggregation_task = None
195
+
196
+ async def _aggregation_task_handler(self, raise_timeout=False):
197
+ """Background task that handles timeout-based flushing."""
113
198
  while True:
114
199
  try:
115
200
  # Wait for a new digit signal with a timeout.
116
- await asyncio.wait_for(self._digit_event.wait(), timeout=self._idle_timeout)
117
- except asyncio.TimeoutError:
118
- # No new digit arrived within the timeout period; flush aggregation if non-empty.
119
- await self.flush_aggregation(direction, raise_timeout)
120
- finally:
121
- # Clear the event for the next cycle.
201
+ await asyncio.wait_for(
202
+ self._digit_event.wait(), timeout=self._idle_timeout
203
+ )
122
204
  self._digit_event.clear()
205
+ except asyncio.TimeoutError:
206
+ # No new digit arrived within the timeout period; flush if needed
207
+ await self.flush_aggregation(raise_timeout=raise_timeout)
123
208
 
124
- async def flush_aggregation(self, direction: FrameDirection, raise_timeout=False):
209
+ async def flush_aggregation(self, *, raise_timeout: bool = False):
125
210
  """Flush the aggregated digits by emitting a TranscriptionFrame downstream."""
126
211
  if self._aggregation:
127
- # Todo: Change to different frame type if we decide to handle it in llm processor separately.
212
+ # Create transcription frame
128
213
  aggregated_frame = TranscriptionFrame(
129
214
  f"User inputted: {self._aggregation}.", "", time_now_iso8601()
130
215
  )
131
216
  aggregated_frame.metadata["push_aggregation"] = True
132
- await self.push_frame(StartInterruptionFrame())
133
- await self.push_frame(aggregated_frame, direction)
217
+
218
+ # Send interruption frame (as per original design)
219
+ await self.push_frame(StartInterruptionFrame(), FrameDirection.DOWNSTREAM)
220
+
221
+ # Push the transcription frame
222
+ await self.push_frame(aggregated_frame, FrameDirection.DOWNSTREAM)
223
+
224
+ # Reset state
134
225
  self._aggregation = ""
135
- elif raise_timeout and self._stopped_idle_processor:
226
+ await self._end_dtmf_capture()
227
+
228
+ elif raise_timeout and not self._aggregation:
229
+ # Timeout with no aggregation (WaitForDTMFFrame case)
136
230
  transcript_frame = TranscriptionFrame(
137
231
  "User didn't press any digits on the keyboard.", "", time_now_iso8601()
138
232
  )
139
233
  transcript_frame.metadata["push_aggregation"] = True
140
- await self.push_frame(transcript_frame)
141
- if self._stopped_idle_processor:
142
- await self._start_idle_processor()
234
+ await self.push_frame(transcript_frame, FrameDirection.DOWNSTREAM)
235
+ await self._end_dtmf_capture()
236
+
237
+ async def _start_dtmf_capture(self):
238
+ """Signal the start of DTMF capture upstream."""
239
+ if self._dtmf_capture_active:
240
+ return
241
+ await self.push_frame(StartDTMFCaptureFrame(), FrameDirection.UPSTREAM)
242
+ self._dtmf_capture_active = True
143
243
 
144
- async def _stop_digit_aggregate_task(self):
145
- """Cancels the digit aggregation task if it exists."""
146
- if self._digit_aggregate_task:
147
- await self.cancel_task(self._digit_aggregate_task)
148
- self._digit_aggregate_task = None
244
+ async def _end_dtmf_capture(self):
245
+ """Signal the end of DTMF capture upstream."""
246
+ if not self._dtmf_capture_active:
247
+ return
248
+ await self.push_frame(EndDTMFCaptureFrame(), FrameDirection.UPSTREAM)
249
+ self._dtmf_capture_active = False
149
250
 
150
251
  async def cleanup(self) -> None:
151
252
  """Cleans up resources, ensuring that the digit aggregation task is cancelled."""
152
253
  await super().cleanup()
153
- if self._digit_aggregate_task:
154
- await self._stop_digit_aggregate_task()
254
+ if self._aggregation_task:
255
+ await self._stop_aggregation_task()
@@ -27,12 +27,14 @@ from pipecat.frames.frames import (
27
27
  InterimTranscriptionFrame,
28
28
  StartFrame,
29
29
  StartInterruptionFrame,
30
+ StartDTMFCaptureFrame,
30
31
  STTMuteFrame,
31
32
  TranscriptionFrame,
32
33
  UserStartedSpeakingFrame,
33
34
  UserStoppedSpeakingFrame,
34
35
  VADUserStartedSpeakingFrame,
35
36
  VADUserStoppedSpeakingFrame,
37
+ EndDTMFCaptureFrame,
36
38
  )
37
39
  from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
38
40
 
@@ -58,6 +60,7 @@ class STTMuteStrategy(Enum):
58
60
  FUNCTION_CALL = "function_call"
59
61
  ALWAYS = "always"
60
62
  CUSTOM = "custom"
63
+ DTMF_CAPTURE = "dtmf_capture"
61
64
 
62
65
 
63
66
  @dataclass
@@ -120,6 +123,7 @@ class STTMuteFilter(FrameProcessor):
120
123
  self._function_call_in_progress = False
121
124
  self._is_muted = False # Initialize as unmuted, will set state on StartFrame if needed
122
125
  self._voicemail_detection_enabled = False # Default to False
126
+ self._dtmf_capture_active = False
123
127
 
124
128
  @property
125
129
  def is_muted(self) -> bool:
@@ -165,6 +169,10 @@ class STTMuteFilter(FrameProcessor):
165
169
  if should_mute:
166
170
  return True
167
171
 
172
+ case STTMuteStrategy.DTMF_CAPTURE:
173
+ if self._dtmf_capture_active:
174
+ return True
175
+
168
176
  return False
169
177
 
170
178
  async def process_frame(self, frame: Frame, direction: FrameDirection):
@@ -205,7 +213,14 @@ class STTMuteFilter(FrameProcessor):
205
213
  self._first_speech_handled = True
206
214
  should_mute = await self._should_mute()
207
215
  self.logger.debug(f"BotStoppedSpeaking: should mute={should_mute}")
216
+ elif isinstance(frame, StartDTMFCaptureFrame):
217
+ self._dtmf_capture_active = True
218
+ should_mute = await self._should_mute()
219
+ elif isinstance(frame, EndDTMFCaptureFrame):
220
+ self._dtmf_capture_active = False
221
+ should_mute = await self._should_mute()
208
222
  elif isinstance(frame, STTMuteFrame):
223
+ # TODO: Duplication of frame is actually happening. We get this frame from the downstream and then we again push it downstream. Also we're psuhing is upstream and again push it upstream in _handle_mute_state.
209
224
  should_mute = frame.mute
210
225
 
211
226
  # Then push the original frame
@@ -15,17 +15,19 @@ from loguru import logger
15
15
  from pipecat.frames.frames import (
16
16
  BotSpeakingFrame,
17
17
  CancelFrame,
18
+ EndDTMFCaptureFrame,
18
19
  EndFrame,
19
20
  Frame,
20
21
  FunctionCallCancelFrame,
21
22
  FunctionCallInProgressFrame,
22
23
  FunctionCallResultFrame,
23
24
  InputDTMFFrame,
25
+ LLMFullResponseStartFrame,
26
+ LLMFullResponseEndFrame,
27
+ StartDTMFCaptureFrame,
28
+ StartFrame,
24
29
  StartUserIdleProcessorFrame,
25
30
  StopUserIdleProcessorFrame,
26
- FunctionCallInProgressFrame,
27
- FunctionCallResultFrame,
28
- StartFrame,
29
31
  UserStartedSpeakingFrame,
30
32
  UserStoppedSpeakingFrame,
31
33
  )
@@ -83,9 +85,12 @@ class UserIdleProcessor(FrameProcessor):
83
85
  self._timeout = timeout
84
86
  self._retry_count = 0
85
87
  self._interrupted = False
88
+ self._function_call_active = False
89
+ self._dtmf_capture_active = False
86
90
  self._conversation_started = False
87
91
  self._idle_task = None
88
92
  self._idle_event = asyncio.Event()
93
+ self._llm_in_progress = False
89
94
 
90
95
  def _wrap_callback(
91
96
  self,
@@ -180,10 +185,20 @@ class UserIdleProcessor(FrameProcessor):
180
185
  self._idle_event.set()
181
186
  elif isinstance(frame, FunctionCallInProgressFrame):
182
187
  # Function calls can take longer than the timeout, so we want to prevent idle callbacks
188
+ self._function_call_active = True
183
189
  self._interrupted = True
184
190
  self._idle_event.set()
185
191
  elif isinstance(frame, FunctionCallResultFrame):
186
- self._interrupted = False
192
+ self._function_call_active = False
193
+ self._interrupted = self._dtmf_capture_active
194
+ self._idle_event.set()
195
+ elif isinstance(frame, StartDTMFCaptureFrame):
196
+ self._dtmf_capture_active = True
197
+ self._interrupted = True
198
+ self._idle_event.set()
199
+ elif isinstance(frame, EndDTMFCaptureFrame):
200
+ self._dtmf_capture_active = False
201
+ self._interrupted = self._function_call_active
187
202
  self._idle_event.set()
188
203
  elif isinstance(frame, StartUserIdleProcessorFrame):
189
204
  if not self._idle_task:
@@ -192,6 +207,18 @@ class UserIdleProcessor(FrameProcessor):
192
207
  elif isinstance(frame, StopUserIdleProcessorFrame):
193
208
  self.logger.debug("Stopping Idle Processor")
194
209
  await self._stop()
210
+ elif isinstance(frame, LLMFullResponseStartFrame):
211
+ self.logger.debug(
212
+ "LLM FullResponseStartFrame received, making llm_in_progress to True"
213
+ )
214
+ self._idle_event.set()
215
+ self._llm_in_progress = True
216
+ elif isinstance(frame, LLMFullResponseEndFrame):
217
+ self.logger.debug(
218
+ "LLM FullResponseEndFrame received, making llm_in_progress to False"
219
+ )
220
+ self._idle_event.set()
221
+ self._llm_in_progress = False
195
222
 
196
223
  async def cleanup(self) -> None:
197
224
  """Cleans up resources when processor is shutting down."""
@@ -210,7 +237,7 @@ class UserIdleProcessor(FrameProcessor):
210
237
  try:
211
238
  await asyncio.wait_for(self._idle_event.wait(), timeout=self._timeout)
212
239
  except asyncio.TimeoutError:
213
- if not self._interrupted:
240
+ if not self._interrupted and not self._llm_in_progress:
214
241
  self._retry_count += 1
215
242
  should_continue = await self._callback(self, self._retry_count)
216
243
  if not should_continue:
@@ -1,5 +1,6 @@
1
1
  from .base_serializer import FrameSerializer, FrameSerializerType
2
2
  from .convox import ConVoxFrameSerializer
3
+ from .custom import CustomFrameSerializer
3
4
  from .exotel import ExotelFrameSerializer
4
5
  from .plivo import PlivoFrameSerializer
5
6
  from .telnyx import TelnyxFrameSerializer
@@ -7,8 +8,9 @@ from .twilio import TwilioFrameSerializer
7
8
 
8
9
  __all__ = [
9
10
  "FrameSerializer",
10
- "FrameSerializerType",
11
+ "FrameSerializerType",
11
12
  "ConVoxFrameSerializer",
13
+ "CustomFrameSerializer",
12
14
  "ExotelFrameSerializer",
13
15
  "PlivoFrameSerializer",
14
16
  "TelnyxFrameSerializer",
@@ -4,9 +4,11 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """ConVox WebSocket frame serializer for audio streaming and call management."""
8
+
7
9
  import base64
8
- import datetime
9
10
  import json
11
+ from datetime import datetime, timezone
10
12
  from typing import Optional
11
13
 
12
14
  from loguru import logger
@@ -99,6 +101,7 @@ class ConVoxFrameSerializer(FrameSerializer):
99
101
  """Serializes a Pipecat frame to ConVox WebSocket format.
100
102
 
101
103
  Handles conversion of various frame types to ConVox WebSocket messages.
104
+ For EndFrames, initiates call termination if auto_hang_up is enabled.
102
105
 
103
106
  Args:
104
107
  frame: The Pipecat frame to serialize.
@@ -106,7 +109,15 @@ class ConVoxFrameSerializer(FrameSerializer):
106
109
  Returns:
107
110
  Serialized data as JSON string, or None if the frame isn't handled.
108
111
  """
109
- if isinstance(frame, StartInterruptionFrame):
112
+ if (
113
+ self._params.auto_hang_up
114
+ and not self._call_ended
115
+ and isinstance(frame, (EndFrame, CancelFrame))
116
+ ):
117
+ self._call_ended = True
118
+ # Return the callEnd event to be sent via the WebSocket
119
+ return await self._send_call_end_event()
120
+ elif isinstance(frame, StartInterruptionFrame):
110
121
  # Clear/interrupt command for ConVox
111
122
  message = {
112
123
  "event": "clear",
@@ -138,7 +149,7 @@ class ConVoxFrameSerializer(FrameSerializer):
138
149
  payload = base64.b64encode(serialized_data).decode("ascii")
139
150
 
140
151
  # ConVox expects play_audio event format according to the documentation
141
- timestamp = datetime.datetime.now().isoformat()
152
+ timestamp = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
142
153
 
143
154
  message = {
144
155
  "event": "play_audio",
@@ -164,6 +175,32 @@ class ConVoxFrameSerializer(FrameSerializer):
164
175
 
165
176
  return None
166
177
 
178
+ async def _send_call_end_event(self):
179
+ """Send a callEnd event to ConVox to terminate the call.
180
+
181
+ This method is called when auto_hang_up is enabled and an EndFrame or
182
+ CancelFrame is received, similar to the logic in end_call_handler.py.
183
+ """
184
+ try:
185
+ call_end_event = {
186
+ "event": "callEnd",
187
+ "details": {
188
+ "timestamp": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
189
+ "direction": "WSS",
190
+ "message": "Event trigger request",
191
+ },
192
+ }
193
+
194
+ logger.info(
195
+ f"ConVox auto_hang_up: Sending callEnd event for stream_id: {self._stream_id}, call_id: {self._call_id}"
196
+ )
197
+ # Note: The actual sending will be handled by the transport layer
198
+ # when this method returns the JSON string
199
+ return json.dumps(call_end_event)
200
+ except Exception as e:
201
+ logger.error(f"ConVox auto_hang_up: Failed to create callEnd event: {e}")
202
+ return None
203
+
167
204
  async def deserialize(self, data: str | bytes) -> Frame | None:
168
205
  """Deserializes ConVox WebSocket data to Pipecat frames.
169
206