dv-pipecat-ai 0.0.82.dev881__py3-none-any.whl → 0.0.85.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.82.dev881.dist-info → dv_pipecat_ai-0.0.85.dev1.dist-info}/METADATA +2 -1
- {dv_pipecat_ai-0.0.82.dev881.dist-info → dv_pipecat_ai-0.0.85.dev1.dist-info}/RECORD +24 -22
- pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
- pipecat/audio/vad/silero.py +9 -3
- pipecat/frames/frames.py +49 -0
- pipecat/pipeline/tts_switcher.py +30 -0
- pipecat/processors/aggregators/dtmf_aggregator.py +22 -29
- pipecat/processors/aggregators/llm_response.py +2 -0
- pipecat/processors/dtmf_aggregator.py +175 -74
- pipecat/processors/filters/stt_mute_filter.py +15 -0
- pipecat/processors/user_idle_processor.py +32 -5
- pipecat/serializers/__init__.py +3 -1
- pipecat/serializers/convox.py +40 -3
- pipecat/serializers/custom.py +257 -0
- pipecat/serializers/plivo.py +4 -1
- pipecat/services/elevenlabs/stt.py +18 -8
- pipecat/services/sarvam/__init__.py +7 -0
- pipecat/services/sarvam/stt.py +540 -0
- pipecat/services/sarvam/tts.py +13 -1
- pipecat/services/speechmatics/stt.py +16 -0
- pipecat/services/vistaar/llm.py +45 -7
- pipecat/serializers/genesys.py +0 -95
- pipecat/services/google/test-google-chirp.py +0 -45
- {dv_pipecat_ai-0.0.82.dev881.dist-info → dv_pipecat_ai-0.0.85.dev1.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.82.dev881.dist-info → dv_pipecat_ai-0.0.85.dev1.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.82.dev881.dist-info → dv_pipecat_ai-0.0.85.dev1.dist-info}/top_level.txt +0 -0
|
@@ -3,15 +3,14 @@ import asyncio
|
|
|
3
3
|
from pipecat.frames.frames import (
|
|
4
4
|
BotSpeakingFrame,
|
|
5
5
|
CancelFrame,
|
|
6
|
+
DTMFUpdateSettingsFrame,
|
|
7
|
+
EndDTMFCaptureFrame,
|
|
6
8
|
EndFrame,
|
|
7
9
|
Frame,
|
|
8
10
|
InputDTMFFrame,
|
|
11
|
+
StartDTMFCaptureFrame,
|
|
9
12
|
StartInterruptionFrame,
|
|
10
|
-
StartUserIdleProcessorFrame,
|
|
11
|
-
StopUserIdleProcessorFrame,
|
|
12
13
|
TranscriptionFrame,
|
|
13
|
-
UserStartedSpeakingFrame,
|
|
14
|
-
UserStoppedSpeakingFrame,
|
|
15
14
|
WaitForDTMFFrame,
|
|
16
15
|
)
|
|
17
16
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
|
@@ -43,112 +42,214 @@ class DTMFAggregator(FrameProcessor):
|
|
|
43
42
|
self._idle_timeout = timeout
|
|
44
43
|
self._digits = digits
|
|
45
44
|
self._digit_event = asyncio.Event()
|
|
46
|
-
self.
|
|
45
|
+
self._aggregation_task = None
|
|
47
46
|
self._end_on = end_on if end_on else set()
|
|
48
47
|
self._reset_on = reset_on if reset_on else set()
|
|
49
|
-
self.
|
|
50
|
-
|
|
51
|
-
async def _start_idle_processor(self):
|
|
52
|
-
await self.push_frame(StartUserIdleProcessorFrame(), FrameDirection.UPSTREAM)
|
|
53
|
-
self._stopped_idle_processor = False
|
|
54
|
-
|
|
55
|
-
async def _stop_idle_processor(self):
|
|
56
|
-
await self.push_frame(StopUserIdleProcessorFrame(), FrameDirection.UPSTREAM)
|
|
57
|
-
self._stopped_idle_processor = True
|
|
48
|
+
self._dtmf_capture_active = False
|
|
58
49
|
|
|
59
50
|
async def process_frame(self, frame: Frame, direction: FrameDirection) -> None:
|
|
60
51
|
# Handle DTMF frames.
|
|
61
52
|
await super().process_frame(frame, direction)
|
|
62
|
-
await self.push_frame(frame, direction)
|
|
63
|
-
if isinstance(frame, InputDTMFFrame):
|
|
64
|
-
# Start the digit aggregation task if it's not running yet.
|
|
65
|
-
if self._digit_aggregate_task is None:
|
|
66
|
-
self._digit_aggregate_task = self.create_task(self._digit_agg_handler(direction))
|
|
67
|
-
|
|
68
|
-
# Append the incoming digit.
|
|
69
|
-
if frame.button.value in self._reset_on:
|
|
70
|
-
self._aggregation = ""
|
|
71
|
-
elif frame.button.value in self._end_on:
|
|
72
|
-
await self.flush_aggregation(direction)
|
|
73
|
-
self._aggregation = ""
|
|
74
|
-
else:
|
|
75
|
-
self._digit_event.set()
|
|
76
|
-
self._aggregation += frame.button.value
|
|
77
|
-
|
|
78
|
-
# Flush if the aggregated digits reach the specified length.
|
|
79
|
-
if self._digits and len(self._aggregation) == self._digits:
|
|
80
|
-
await self.flush_aggregation(direction)
|
|
81
|
-
self._aggregation = ""
|
|
82
|
-
if self._stopped_idle_processor:
|
|
83
|
-
await self._start_idle_processor()
|
|
84
53
|
|
|
54
|
+
if isinstance(frame, InputDTMFFrame):
|
|
55
|
+
# Push the DTMF frame downstream first
|
|
56
|
+
await self.push_frame(frame, direction)
|
|
57
|
+
# Then handle it for proper frame ordering
|
|
58
|
+
await self._handle_dtmf_frame(frame)
|
|
85
59
|
elif isinstance(frame, (EndFrame, CancelFrame)):
|
|
86
60
|
# For EndFrame, flush any pending aggregation and stop the digit aggregation task.
|
|
87
61
|
if self._aggregation:
|
|
88
|
-
await self.flush_aggregation(
|
|
89
|
-
if self.
|
|
90
|
-
await self.
|
|
62
|
+
await self.flush_aggregation()
|
|
63
|
+
if self._aggregation_task:
|
|
64
|
+
await self._stop_aggregation_task()
|
|
65
|
+
await self.push_frame(frame, direction)
|
|
91
66
|
elif isinstance(frame, WaitForDTMFFrame):
|
|
92
67
|
self.logger.debug("Received WaitForDTMFFrame: Waiting for DTMF input")
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
self._digit_event.set()
|
|
98
|
-
await self._stop_idle_processor()
|
|
68
|
+
self._create_aggregation_task(raise_timeout=True)
|
|
69
|
+
self._digit_event.set() # Trigger the timeout handler
|
|
70
|
+
await self._start_dtmf_capture()
|
|
71
|
+
await self.push_frame(frame, direction)
|
|
99
72
|
elif isinstance(frame, StartInterruptionFrame):
|
|
100
|
-
self.logger.debug("Received StartInterruptionFrame
|
|
101
|
-
if self._stopped_idle_processor:
|
|
102
|
-
await self._start_idle_processor()
|
|
73
|
+
self.logger.debug("Received StartInterruptionFrame")
|
|
103
74
|
if self._aggregation:
|
|
104
|
-
await self.flush_aggregation(
|
|
75
|
+
await self.flush_aggregation()
|
|
76
|
+
await self._end_dtmf_capture()
|
|
77
|
+
await self.push_frame(frame, direction)
|
|
105
78
|
elif isinstance(frame, BotSpeakingFrame):
|
|
106
|
-
|
|
79
|
+
# Signal the aggregation task to continue when bot speaks
|
|
80
|
+
if self._aggregation_task is not None:
|
|
107
81
|
self._digit_event.set()
|
|
82
|
+
await self.push_frame(frame, direction)
|
|
83
|
+
elif isinstance(frame, DTMFUpdateSettingsFrame):
|
|
84
|
+
await self._update_settings(frame.settings)
|
|
85
|
+
# Don't pass the settings frame downstream
|
|
86
|
+
else:
|
|
87
|
+
# Pass all other frames through
|
|
88
|
+
await self.push_frame(frame, direction)
|
|
108
89
|
|
|
109
|
-
async def
|
|
110
|
-
"""
|
|
111
|
-
|
|
90
|
+
async def _update_settings(self, settings: dict) -> None:
|
|
91
|
+
"""Update DTMF aggregator settings dynamically.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
settings: Dictionary containing new DTMF settings
|
|
95
|
+
Supported keys: timeout, digits, end, reset
|
|
112
96
|
"""
|
|
97
|
+
settings_changed = False
|
|
98
|
+
|
|
99
|
+
if "timeout" in settings and settings["timeout"] is not None:
|
|
100
|
+
new_timeout = float(settings["timeout"])
|
|
101
|
+
if new_timeout != self._idle_timeout:
|
|
102
|
+
self.logger.debug(
|
|
103
|
+
f"Updating DTMF timeout from {self._idle_timeout} to {new_timeout}"
|
|
104
|
+
)
|
|
105
|
+
self._idle_timeout = new_timeout
|
|
106
|
+
settings_changed = True
|
|
107
|
+
|
|
108
|
+
if "digits" in settings:
|
|
109
|
+
new_digits = settings["digits"]
|
|
110
|
+
if new_digits != self._digits:
|
|
111
|
+
self.logger.debug(
|
|
112
|
+
f"Updating DTMF digits from {self._digits} to {new_digits}"
|
|
113
|
+
)
|
|
114
|
+
self._digits = new_digits
|
|
115
|
+
settings_changed = True
|
|
116
|
+
|
|
117
|
+
if "end" in settings:
|
|
118
|
+
# Convert single string to set if needed
|
|
119
|
+
end_value = settings["end"]
|
|
120
|
+
if end_value is None:
|
|
121
|
+
new_end_on = set()
|
|
122
|
+
elif isinstance(end_value, str):
|
|
123
|
+
new_end_on = {end_value} if end_value else set()
|
|
124
|
+
else:
|
|
125
|
+
new_end_on = set(end_value)
|
|
126
|
+
|
|
127
|
+
if new_end_on != self._end_on:
|
|
128
|
+
self.logger.debug(
|
|
129
|
+
f"Updating DTMF end_on from {self._end_on} to {new_end_on}"
|
|
130
|
+
)
|
|
131
|
+
self._end_on = new_end_on
|
|
132
|
+
settings_changed = True
|
|
133
|
+
|
|
134
|
+
if "reset" in settings:
|
|
135
|
+
# Convert single string to set if needed
|
|
136
|
+
reset_value = settings["reset"]
|
|
137
|
+
if reset_value is None:
|
|
138
|
+
new_reset_on = set()
|
|
139
|
+
elif isinstance(reset_value, str):
|
|
140
|
+
new_reset_on = {reset_value} if reset_value else set()
|
|
141
|
+
else:
|
|
142
|
+
new_reset_on = set(reset_value)
|
|
143
|
+
|
|
144
|
+
if new_reset_on != self._reset_on:
|
|
145
|
+
self.logger.debug(
|
|
146
|
+
f"Updating DTMF reset_on from {self._reset_on} to {new_reset_on}"
|
|
147
|
+
)
|
|
148
|
+
self._reset_on = new_reset_on
|
|
149
|
+
settings_changed = True
|
|
150
|
+
|
|
151
|
+
if settings_changed:
|
|
152
|
+
self.logger.info(f"DTMF settings updated successfully")
|
|
153
|
+
|
|
154
|
+
async def _handle_dtmf_frame(self, frame: InputDTMFFrame):
|
|
155
|
+
"""Handle DTMF input frame processing."""
|
|
156
|
+
# Create aggregation task if needed
|
|
157
|
+
if self._aggregation_task is None:
|
|
158
|
+
self._create_aggregation_task()
|
|
159
|
+
|
|
160
|
+
digit_value = frame.button.value
|
|
161
|
+
|
|
162
|
+
# Handle reset digits
|
|
163
|
+
if digit_value in self._reset_on:
|
|
164
|
+
self._aggregation = ""
|
|
165
|
+
return
|
|
166
|
+
|
|
167
|
+
# Handle end digits
|
|
168
|
+
if digit_value in self._end_on:
|
|
169
|
+
if self._aggregation: # Only flush if we have aggregation
|
|
170
|
+
await self.flush_aggregation()
|
|
171
|
+
return
|
|
172
|
+
|
|
173
|
+
# Add digit to aggregation
|
|
174
|
+
self._aggregation += digit_value
|
|
175
|
+
|
|
176
|
+
# Signal the aggregation task that a digit was received
|
|
177
|
+
self._digit_event.set()
|
|
178
|
+
|
|
179
|
+
# Check if we reached the digit limit
|
|
180
|
+
if self._digits and len(self._aggregation) == self._digits:
|
|
181
|
+
await self.flush_aggregation()
|
|
182
|
+
|
|
183
|
+
def _create_aggregation_task(self, raise_timeout: bool = False) -> None:
|
|
184
|
+
"""Creates the aggregation task if it hasn't been created yet."""
|
|
185
|
+
if not self._aggregation_task:
|
|
186
|
+
self._aggregation_task = self.create_task(
|
|
187
|
+
self._aggregation_task_handler(raise_timeout)
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
async def _stop_aggregation_task(self) -> None:
|
|
191
|
+
"""Stops the aggregation task."""
|
|
192
|
+
if self._aggregation_task:
|
|
193
|
+
await self.cancel_task(self._aggregation_task)
|
|
194
|
+
self._aggregation_task = None
|
|
195
|
+
|
|
196
|
+
async def _aggregation_task_handler(self, raise_timeout=False):
|
|
197
|
+
"""Background task that handles timeout-based flushing."""
|
|
113
198
|
while True:
|
|
114
199
|
try:
|
|
115
200
|
# Wait for a new digit signal with a timeout.
|
|
116
|
-
await asyncio.wait_for(
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
await self.flush_aggregation(direction, raise_timeout)
|
|
120
|
-
finally:
|
|
121
|
-
# Clear the event for the next cycle.
|
|
201
|
+
await asyncio.wait_for(
|
|
202
|
+
self._digit_event.wait(), timeout=self._idle_timeout
|
|
203
|
+
)
|
|
122
204
|
self._digit_event.clear()
|
|
205
|
+
except asyncio.TimeoutError:
|
|
206
|
+
# No new digit arrived within the timeout period; flush if needed
|
|
207
|
+
await self.flush_aggregation(raise_timeout=raise_timeout)
|
|
123
208
|
|
|
124
|
-
async def flush_aggregation(self,
|
|
209
|
+
async def flush_aggregation(self, *, raise_timeout: bool = False):
|
|
125
210
|
"""Flush the aggregated digits by emitting a TranscriptionFrame downstream."""
|
|
126
211
|
if self._aggregation:
|
|
127
|
-
#
|
|
212
|
+
# Create transcription frame
|
|
128
213
|
aggregated_frame = TranscriptionFrame(
|
|
129
214
|
f"User inputted: {self._aggregation}.", "", time_now_iso8601()
|
|
130
215
|
)
|
|
131
216
|
aggregated_frame.metadata["push_aggregation"] = True
|
|
132
|
-
|
|
133
|
-
|
|
217
|
+
|
|
218
|
+
# Send interruption frame (as per original design)
|
|
219
|
+
await self.push_frame(StartInterruptionFrame(), FrameDirection.DOWNSTREAM)
|
|
220
|
+
|
|
221
|
+
# Push the transcription frame
|
|
222
|
+
await self.push_frame(aggregated_frame, FrameDirection.DOWNSTREAM)
|
|
223
|
+
|
|
224
|
+
# Reset state
|
|
134
225
|
self._aggregation = ""
|
|
135
|
-
|
|
226
|
+
await self._end_dtmf_capture()
|
|
227
|
+
|
|
228
|
+
elif raise_timeout and not self._aggregation:
|
|
229
|
+
# Timeout with no aggregation (WaitForDTMFFrame case)
|
|
136
230
|
transcript_frame = TranscriptionFrame(
|
|
137
231
|
"User didn't press any digits on the keyboard.", "", time_now_iso8601()
|
|
138
232
|
)
|
|
139
233
|
transcript_frame.metadata["push_aggregation"] = True
|
|
140
|
-
await self.push_frame(transcript_frame)
|
|
141
|
-
|
|
142
|
-
|
|
234
|
+
await self.push_frame(transcript_frame, FrameDirection.DOWNSTREAM)
|
|
235
|
+
await self._end_dtmf_capture()
|
|
236
|
+
|
|
237
|
+
async def _start_dtmf_capture(self):
|
|
238
|
+
"""Signal the start of DTMF capture upstream."""
|
|
239
|
+
if self._dtmf_capture_active:
|
|
240
|
+
return
|
|
241
|
+
await self.push_frame(StartDTMFCaptureFrame(), FrameDirection.UPSTREAM)
|
|
242
|
+
self._dtmf_capture_active = True
|
|
143
243
|
|
|
144
|
-
async def
|
|
145
|
-
"""
|
|
146
|
-
if self.
|
|
147
|
-
|
|
148
|
-
|
|
244
|
+
async def _end_dtmf_capture(self):
|
|
245
|
+
"""Signal the end of DTMF capture upstream."""
|
|
246
|
+
if not self._dtmf_capture_active:
|
|
247
|
+
return
|
|
248
|
+
await self.push_frame(EndDTMFCaptureFrame(), FrameDirection.UPSTREAM)
|
|
249
|
+
self._dtmf_capture_active = False
|
|
149
250
|
|
|
150
251
|
async def cleanup(self) -> None:
|
|
151
252
|
"""Cleans up resources, ensuring that the digit aggregation task is cancelled."""
|
|
152
253
|
await super().cleanup()
|
|
153
|
-
if self.
|
|
154
|
-
await self.
|
|
254
|
+
if self._aggregation_task:
|
|
255
|
+
await self._stop_aggregation_task()
|
|
@@ -27,12 +27,14 @@ from pipecat.frames.frames import (
|
|
|
27
27
|
InterimTranscriptionFrame,
|
|
28
28
|
StartFrame,
|
|
29
29
|
StartInterruptionFrame,
|
|
30
|
+
StartDTMFCaptureFrame,
|
|
30
31
|
STTMuteFrame,
|
|
31
32
|
TranscriptionFrame,
|
|
32
33
|
UserStartedSpeakingFrame,
|
|
33
34
|
UserStoppedSpeakingFrame,
|
|
34
35
|
VADUserStartedSpeakingFrame,
|
|
35
36
|
VADUserStoppedSpeakingFrame,
|
|
37
|
+
EndDTMFCaptureFrame,
|
|
36
38
|
)
|
|
37
39
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
|
38
40
|
|
|
@@ -58,6 +60,7 @@ class STTMuteStrategy(Enum):
|
|
|
58
60
|
FUNCTION_CALL = "function_call"
|
|
59
61
|
ALWAYS = "always"
|
|
60
62
|
CUSTOM = "custom"
|
|
63
|
+
DTMF_CAPTURE = "dtmf_capture"
|
|
61
64
|
|
|
62
65
|
|
|
63
66
|
@dataclass
|
|
@@ -120,6 +123,7 @@ class STTMuteFilter(FrameProcessor):
|
|
|
120
123
|
self._function_call_in_progress = False
|
|
121
124
|
self._is_muted = False # Initialize as unmuted, will set state on StartFrame if needed
|
|
122
125
|
self._voicemail_detection_enabled = False # Default to False
|
|
126
|
+
self._dtmf_capture_active = False
|
|
123
127
|
|
|
124
128
|
@property
|
|
125
129
|
def is_muted(self) -> bool:
|
|
@@ -165,6 +169,10 @@ class STTMuteFilter(FrameProcessor):
|
|
|
165
169
|
if should_mute:
|
|
166
170
|
return True
|
|
167
171
|
|
|
172
|
+
case STTMuteStrategy.DTMF_CAPTURE:
|
|
173
|
+
if self._dtmf_capture_active:
|
|
174
|
+
return True
|
|
175
|
+
|
|
168
176
|
return False
|
|
169
177
|
|
|
170
178
|
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
@@ -205,7 +213,14 @@ class STTMuteFilter(FrameProcessor):
|
|
|
205
213
|
self._first_speech_handled = True
|
|
206
214
|
should_mute = await self._should_mute()
|
|
207
215
|
self.logger.debug(f"BotStoppedSpeaking: should mute={should_mute}")
|
|
216
|
+
elif isinstance(frame, StartDTMFCaptureFrame):
|
|
217
|
+
self._dtmf_capture_active = True
|
|
218
|
+
should_mute = await self._should_mute()
|
|
219
|
+
elif isinstance(frame, EndDTMFCaptureFrame):
|
|
220
|
+
self._dtmf_capture_active = False
|
|
221
|
+
should_mute = await self._should_mute()
|
|
208
222
|
elif isinstance(frame, STTMuteFrame):
|
|
223
|
+
# TODO: Duplication of frame is actually happening. We get this frame from the downstream and then we again push it downstream. Also we're psuhing is upstream and again push it upstream in _handle_mute_state.
|
|
209
224
|
should_mute = frame.mute
|
|
210
225
|
|
|
211
226
|
# Then push the original frame
|
|
@@ -15,17 +15,19 @@ from loguru import logger
|
|
|
15
15
|
from pipecat.frames.frames import (
|
|
16
16
|
BotSpeakingFrame,
|
|
17
17
|
CancelFrame,
|
|
18
|
+
EndDTMFCaptureFrame,
|
|
18
19
|
EndFrame,
|
|
19
20
|
Frame,
|
|
20
21
|
FunctionCallCancelFrame,
|
|
21
22
|
FunctionCallInProgressFrame,
|
|
22
23
|
FunctionCallResultFrame,
|
|
23
24
|
InputDTMFFrame,
|
|
25
|
+
LLMFullResponseStartFrame,
|
|
26
|
+
LLMFullResponseEndFrame,
|
|
27
|
+
StartDTMFCaptureFrame,
|
|
28
|
+
StartFrame,
|
|
24
29
|
StartUserIdleProcessorFrame,
|
|
25
30
|
StopUserIdleProcessorFrame,
|
|
26
|
-
FunctionCallInProgressFrame,
|
|
27
|
-
FunctionCallResultFrame,
|
|
28
|
-
StartFrame,
|
|
29
31
|
UserStartedSpeakingFrame,
|
|
30
32
|
UserStoppedSpeakingFrame,
|
|
31
33
|
)
|
|
@@ -83,9 +85,12 @@ class UserIdleProcessor(FrameProcessor):
|
|
|
83
85
|
self._timeout = timeout
|
|
84
86
|
self._retry_count = 0
|
|
85
87
|
self._interrupted = False
|
|
88
|
+
self._function_call_active = False
|
|
89
|
+
self._dtmf_capture_active = False
|
|
86
90
|
self._conversation_started = False
|
|
87
91
|
self._idle_task = None
|
|
88
92
|
self._idle_event = asyncio.Event()
|
|
93
|
+
self._llm_in_progress = False
|
|
89
94
|
|
|
90
95
|
def _wrap_callback(
|
|
91
96
|
self,
|
|
@@ -180,10 +185,20 @@ class UserIdleProcessor(FrameProcessor):
|
|
|
180
185
|
self._idle_event.set()
|
|
181
186
|
elif isinstance(frame, FunctionCallInProgressFrame):
|
|
182
187
|
# Function calls can take longer than the timeout, so we want to prevent idle callbacks
|
|
188
|
+
self._function_call_active = True
|
|
183
189
|
self._interrupted = True
|
|
184
190
|
self._idle_event.set()
|
|
185
191
|
elif isinstance(frame, FunctionCallResultFrame):
|
|
186
|
-
self.
|
|
192
|
+
self._function_call_active = False
|
|
193
|
+
self._interrupted = self._dtmf_capture_active
|
|
194
|
+
self._idle_event.set()
|
|
195
|
+
elif isinstance(frame, StartDTMFCaptureFrame):
|
|
196
|
+
self._dtmf_capture_active = True
|
|
197
|
+
self._interrupted = True
|
|
198
|
+
self._idle_event.set()
|
|
199
|
+
elif isinstance(frame, EndDTMFCaptureFrame):
|
|
200
|
+
self._dtmf_capture_active = False
|
|
201
|
+
self._interrupted = self._function_call_active
|
|
187
202
|
self._idle_event.set()
|
|
188
203
|
elif isinstance(frame, StartUserIdleProcessorFrame):
|
|
189
204
|
if not self._idle_task:
|
|
@@ -192,6 +207,18 @@ class UserIdleProcessor(FrameProcessor):
|
|
|
192
207
|
elif isinstance(frame, StopUserIdleProcessorFrame):
|
|
193
208
|
self.logger.debug("Stopping Idle Processor")
|
|
194
209
|
await self._stop()
|
|
210
|
+
elif isinstance(frame, LLMFullResponseStartFrame):
|
|
211
|
+
self.logger.debug(
|
|
212
|
+
"LLM FullResponseStartFrame received, making llm_in_progress to True"
|
|
213
|
+
)
|
|
214
|
+
self._idle_event.set()
|
|
215
|
+
self._llm_in_progress = True
|
|
216
|
+
elif isinstance(frame, LLMFullResponseEndFrame):
|
|
217
|
+
self.logger.debug(
|
|
218
|
+
"LLM FullResponseEndFrame received, making llm_in_progress to False"
|
|
219
|
+
)
|
|
220
|
+
self._idle_event.set()
|
|
221
|
+
self._llm_in_progress = False
|
|
195
222
|
|
|
196
223
|
async def cleanup(self) -> None:
|
|
197
224
|
"""Cleans up resources when processor is shutting down."""
|
|
@@ -210,7 +237,7 @@ class UserIdleProcessor(FrameProcessor):
|
|
|
210
237
|
try:
|
|
211
238
|
await asyncio.wait_for(self._idle_event.wait(), timeout=self._timeout)
|
|
212
239
|
except asyncio.TimeoutError:
|
|
213
|
-
if not self._interrupted:
|
|
240
|
+
if not self._interrupted and not self._llm_in_progress:
|
|
214
241
|
self._retry_count += 1
|
|
215
242
|
should_continue = await self._callback(self, self._retry_count)
|
|
216
243
|
if not should_continue:
|
pipecat/serializers/__init__.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from .base_serializer import FrameSerializer, FrameSerializerType
|
|
2
2
|
from .convox import ConVoxFrameSerializer
|
|
3
|
+
from .custom import CustomFrameSerializer
|
|
3
4
|
from .exotel import ExotelFrameSerializer
|
|
4
5
|
from .plivo import PlivoFrameSerializer
|
|
5
6
|
from .telnyx import TelnyxFrameSerializer
|
|
@@ -7,8 +8,9 @@ from .twilio import TwilioFrameSerializer
|
|
|
7
8
|
|
|
8
9
|
__all__ = [
|
|
9
10
|
"FrameSerializer",
|
|
10
|
-
"FrameSerializerType",
|
|
11
|
+
"FrameSerializerType",
|
|
11
12
|
"ConVoxFrameSerializer",
|
|
13
|
+
"CustomFrameSerializer",
|
|
12
14
|
"ExotelFrameSerializer",
|
|
13
15
|
"PlivoFrameSerializer",
|
|
14
16
|
"TelnyxFrameSerializer",
|
pipecat/serializers/convox.py
CHANGED
|
@@ -4,9 +4,11 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""ConVox WebSocket frame serializer for audio streaming and call management."""
|
|
8
|
+
|
|
7
9
|
import base64
|
|
8
|
-
import datetime
|
|
9
10
|
import json
|
|
11
|
+
from datetime import datetime, timezone
|
|
10
12
|
from typing import Optional
|
|
11
13
|
|
|
12
14
|
from loguru import logger
|
|
@@ -99,6 +101,7 @@ class ConVoxFrameSerializer(FrameSerializer):
|
|
|
99
101
|
"""Serializes a Pipecat frame to ConVox WebSocket format.
|
|
100
102
|
|
|
101
103
|
Handles conversion of various frame types to ConVox WebSocket messages.
|
|
104
|
+
For EndFrames, initiates call termination if auto_hang_up is enabled.
|
|
102
105
|
|
|
103
106
|
Args:
|
|
104
107
|
frame: The Pipecat frame to serialize.
|
|
@@ -106,7 +109,15 @@ class ConVoxFrameSerializer(FrameSerializer):
|
|
|
106
109
|
Returns:
|
|
107
110
|
Serialized data as JSON string, or None if the frame isn't handled.
|
|
108
111
|
"""
|
|
109
|
-
if
|
|
112
|
+
if (
|
|
113
|
+
self._params.auto_hang_up
|
|
114
|
+
and not self._call_ended
|
|
115
|
+
and isinstance(frame, (EndFrame, CancelFrame))
|
|
116
|
+
):
|
|
117
|
+
self._call_ended = True
|
|
118
|
+
# Return the callEnd event to be sent via the WebSocket
|
|
119
|
+
return await self._send_call_end_event()
|
|
120
|
+
elif isinstance(frame, StartInterruptionFrame):
|
|
110
121
|
# Clear/interrupt command for ConVox
|
|
111
122
|
message = {
|
|
112
123
|
"event": "clear",
|
|
@@ -138,7 +149,7 @@ class ConVoxFrameSerializer(FrameSerializer):
|
|
|
138
149
|
payload = base64.b64encode(serialized_data).decode("ascii")
|
|
139
150
|
|
|
140
151
|
# ConVox expects play_audio event format according to the documentation
|
|
141
|
-
timestamp = datetime.
|
|
152
|
+
timestamp = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
|
|
142
153
|
|
|
143
154
|
message = {
|
|
144
155
|
"event": "play_audio",
|
|
@@ -164,6 +175,32 @@ class ConVoxFrameSerializer(FrameSerializer):
|
|
|
164
175
|
|
|
165
176
|
return None
|
|
166
177
|
|
|
178
|
+
async def _send_call_end_event(self):
|
|
179
|
+
"""Send a callEnd event to ConVox to terminate the call.
|
|
180
|
+
|
|
181
|
+
This method is called when auto_hang_up is enabled and an EndFrame or
|
|
182
|
+
CancelFrame is received, similar to the logic in end_call_handler.py.
|
|
183
|
+
"""
|
|
184
|
+
try:
|
|
185
|
+
call_end_event = {
|
|
186
|
+
"event": "callEnd",
|
|
187
|
+
"details": {
|
|
188
|
+
"timestamp": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
|
|
189
|
+
"direction": "WSS",
|
|
190
|
+
"message": "Event trigger request",
|
|
191
|
+
},
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
logger.info(
|
|
195
|
+
f"ConVox auto_hang_up: Sending callEnd event for stream_id: {self._stream_id}, call_id: {self._call_id}"
|
|
196
|
+
)
|
|
197
|
+
# Note: The actual sending will be handled by the transport layer
|
|
198
|
+
# when this method returns the JSON string
|
|
199
|
+
return json.dumps(call_end_event)
|
|
200
|
+
except Exception as e:
|
|
201
|
+
logger.error(f"ConVox auto_hang_up: Failed to create callEnd event: {e}")
|
|
202
|
+
return None
|
|
203
|
+
|
|
167
204
|
async def deserialize(self, data: str | bytes) -> Frame | None:
|
|
168
205
|
"""Deserializes ConVox WebSocket data to Pipecat frames.
|
|
169
206
|
|