dv-pipecat-ai 0.0.85.dev5__py3-none-any.whl → 0.0.85.dev698__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.85.dev5.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/METADATA +78 -117
- {dv_pipecat_ai-0.0.85.dev5.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/RECORD +157 -123
- pipecat/adapters/base_llm_adapter.py +38 -1
- pipecat/adapters/services/anthropic_adapter.py +9 -14
- pipecat/adapters/services/aws_nova_sonic_adapter.py +5 -0
- pipecat/adapters/services/bedrock_adapter.py +236 -13
- pipecat/adapters/services/gemini_adapter.py +12 -8
- pipecat/adapters/services/open_ai_adapter.py +19 -7
- pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
- pipecat/audio/filters/krisp_viva_filter.py +193 -0
- pipecat/audio/filters/noisereduce_filter.py +15 -0
- pipecat/audio/turn/base_turn_analyzer.py +9 -1
- pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
- pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
- pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
- pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
- pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
- pipecat/audio/vad/data/README.md +10 -0
- pipecat/audio/vad/vad_analyzer.py +13 -1
- pipecat/extensions/voicemail/voicemail_detector.py +5 -5
- pipecat/frames/frames.py +120 -87
- pipecat/observers/loggers/debug_log_observer.py +3 -3
- pipecat/observers/loggers/llm_log_observer.py +7 -3
- pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
- pipecat/pipeline/runner.py +12 -4
- pipecat/pipeline/service_switcher.py +64 -36
- pipecat/pipeline/task.py +85 -24
- pipecat/processors/aggregators/dtmf_aggregator.py +28 -22
- pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
- pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
- pipecat/processors/aggregators/llm_response.py +6 -7
- pipecat/processors/aggregators/llm_response_universal.py +19 -15
- pipecat/processors/aggregators/user_response.py +6 -6
- pipecat/processors/aggregators/vision_image_frame.py +24 -2
- pipecat/processors/audio/audio_buffer_processor.py +43 -8
- pipecat/processors/filters/stt_mute_filter.py +2 -0
- pipecat/processors/frame_processor.py +103 -17
- pipecat/processors/frameworks/langchain.py +8 -2
- pipecat/processors/frameworks/rtvi.py +209 -68
- pipecat/processors/frameworks/strands_agents.py +170 -0
- pipecat/processors/logger.py +2 -2
- pipecat/processors/transcript_processor.py +4 -4
- pipecat/processors/user_idle_processor.py +3 -6
- pipecat/runner/run.py +270 -50
- pipecat/runner/types.py +2 -0
- pipecat/runner/utils.py +51 -10
- pipecat/serializers/exotel.py +5 -5
- pipecat/serializers/livekit.py +20 -0
- pipecat/serializers/plivo.py +6 -9
- pipecat/serializers/protobuf.py +6 -5
- pipecat/serializers/telnyx.py +2 -2
- pipecat/serializers/twilio.py +43 -23
- pipecat/services/ai_service.py +2 -6
- pipecat/services/anthropic/llm.py +2 -25
- pipecat/services/asyncai/tts.py +2 -3
- pipecat/services/aws/__init__.py +1 -0
- pipecat/services/aws/llm.py +122 -97
- pipecat/services/aws/nova_sonic/__init__.py +0 -0
- pipecat/services/aws/nova_sonic/context.py +367 -0
- pipecat/services/aws/nova_sonic/frames.py +25 -0
- pipecat/services/aws/nova_sonic/llm.py +1155 -0
- pipecat/services/aws/stt.py +1 -3
- pipecat/services/aws_nova_sonic/__init__.py +19 -1
- pipecat/services/aws_nova_sonic/aws.py +11 -1151
- pipecat/services/aws_nova_sonic/context.py +13 -355
- pipecat/services/aws_nova_sonic/frames.py +13 -17
- pipecat/services/azure/realtime/__init__.py +0 -0
- pipecat/services/azure/realtime/llm.py +65 -0
- pipecat/services/azure/stt.py +15 -0
- pipecat/services/cartesia/tts.py +2 -2
- pipecat/services/deepgram/__init__.py +1 -0
- pipecat/services/deepgram/flux/__init__.py +0 -0
- pipecat/services/deepgram/flux/stt.py +636 -0
- pipecat/services/elevenlabs/__init__.py +2 -1
- pipecat/services/elevenlabs/stt.py +254 -276
- pipecat/services/elevenlabs/tts.py +5 -5
- pipecat/services/fish/tts.py +2 -2
- pipecat/services/gemini_multimodal_live/events.py +38 -524
- pipecat/services/gemini_multimodal_live/file_api.py +23 -173
- pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
- pipecat/services/gladia/stt.py +56 -72
- pipecat/services/google/__init__.py +1 -0
- pipecat/services/google/gemini_live/__init__.py +3 -0
- pipecat/services/google/gemini_live/file_api.py +189 -0
- pipecat/services/google/gemini_live/llm.py +1582 -0
- pipecat/services/google/gemini_live/llm_vertex.py +184 -0
- pipecat/services/google/llm.py +15 -11
- pipecat/services/google/llm_openai.py +3 -3
- pipecat/services/google/llm_vertex.py +86 -16
- pipecat/services/google/tts.py +7 -3
- pipecat/services/heygen/api.py +2 -0
- pipecat/services/heygen/client.py +8 -4
- pipecat/services/heygen/video.py +2 -0
- pipecat/services/hume/__init__.py +5 -0
- pipecat/services/hume/tts.py +220 -0
- pipecat/services/inworld/tts.py +6 -6
- pipecat/services/llm_service.py +15 -5
- pipecat/services/lmnt/tts.py +2 -2
- pipecat/services/mcp_service.py +4 -2
- pipecat/services/mem0/memory.py +6 -5
- pipecat/services/mistral/llm.py +29 -8
- pipecat/services/moondream/vision.py +42 -16
- pipecat/services/neuphonic/tts.py +2 -2
- pipecat/services/openai/__init__.py +1 -0
- pipecat/services/openai/base_llm.py +27 -20
- pipecat/services/openai/realtime/__init__.py +0 -0
- pipecat/services/openai/realtime/context.py +272 -0
- pipecat/services/openai/realtime/events.py +1106 -0
- pipecat/services/openai/realtime/frames.py +37 -0
- pipecat/services/openai/realtime/llm.py +829 -0
- pipecat/services/openai/tts.py +16 -8
- pipecat/services/openai_realtime/__init__.py +27 -0
- pipecat/services/openai_realtime/azure.py +21 -0
- pipecat/services/openai_realtime/context.py +21 -0
- pipecat/services/openai_realtime/events.py +21 -0
- pipecat/services/openai_realtime/frames.py +21 -0
- pipecat/services/openai_realtime_beta/azure.py +16 -0
- pipecat/services/openai_realtime_beta/openai.py +17 -5
- pipecat/services/playht/tts.py +31 -4
- pipecat/services/rime/tts.py +3 -4
- pipecat/services/sarvam/tts.py +2 -6
- pipecat/services/simli/video.py +2 -2
- pipecat/services/speechmatics/stt.py +1 -7
- pipecat/services/stt_service.py +34 -0
- pipecat/services/tavus/video.py +2 -2
- pipecat/services/tts_service.py +9 -9
- pipecat/services/vision_service.py +7 -6
- pipecat/services/vistaar/llm.py +4 -0
- pipecat/tests/utils.py +4 -4
- pipecat/transcriptions/language.py +41 -1
- pipecat/transports/base_input.py +17 -42
- pipecat/transports/base_output.py +42 -26
- pipecat/transports/daily/transport.py +199 -26
- pipecat/transports/heygen/__init__.py +0 -0
- pipecat/transports/heygen/transport.py +381 -0
- pipecat/transports/livekit/transport.py +228 -63
- pipecat/transports/local/audio.py +6 -1
- pipecat/transports/local/tk.py +11 -2
- pipecat/transports/network/fastapi_websocket.py +1 -1
- pipecat/transports/smallwebrtc/connection.py +98 -19
- pipecat/transports/smallwebrtc/request_handler.py +204 -0
- pipecat/transports/smallwebrtc/transport.py +65 -23
- pipecat/transports/tavus/transport.py +23 -12
- pipecat/transports/websocket/client.py +41 -5
- pipecat/transports/websocket/fastapi.py +21 -11
- pipecat/transports/websocket/server.py +14 -7
- pipecat/transports/whatsapp/api.py +8 -0
- pipecat/transports/whatsapp/client.py +47 -0
- pipecat/utils/base_object.py +54 -22
- pipecat/utils/string.py +12 -1
- pipecat/utils/tracing/service_decorators.py +21 -21
- {dv_pipecat_ai-0.0.85.dev5.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.85.dev5.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.85.dev5.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/top_level.txt +0 -0
- /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
|
@@ -10,13 +10,22 @@ This module provides frame aggregation functionality to combine text and image
|
|
|
10
10
|
frames into vision frames for multimodal processing.
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
|
-
from pipecat.frames.frames import Frame, InputImageRawFrame, TextFrame
|
|
13
|
+
from pipecat.frames.frames import Frame, InputImageRawFrame, TextFrame
|
|
14
|
+
from pipecat.processors.aggregators.openai_llm_context import (
|
|
15
|
+
OpenAILLMContext,
|
|
16
|
+
OpenAILLMContextFrame,
|
|
17
|
+
)
|
|
14
18
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
|
15
19
|
|
|
16
20
|
|
|
17
21
|
class VisionImageFrameAggregator(FrameProcessor):
|
|
18
22
|
"""Aggregates consecutive text and image frames into vision frames.
|
|
19
23
|
|
|
24
|
+
.. deprecated:: 0.0.85
|
|
25
|
+
VisionImageRawFrame has been removed in favor of context frames
|
|
26
|
+
(LLMContextFrame or OpenAILLMContextFrame), so this aggregator is not
|
|
27
|
+
needed anymore. See the 12* examples for the new recommended pattern.
|
|
28
|
+
|
|
20
29
|
This aggregator waits for a consecutive TextFrame and an InputImageRawFrame.
|
|
21
30
|
After the InputImageRawFrame arrives it will output a VisionImageRawFrame
|
|
22
31
|
combining both the text and image data for multimodal processing.
|
|
@@ -28,6 +37,17 @@ class VisionImageFrameAggregator(FrameProcessor):
|
|
|
28
37
|
The aggregator starts with no cached text, waiting for the first
|
|
29
38
|
TextFrame to arrive before it can create vision frames.
|
|
30
39
|
"""
|
|
40
|
+
import warnings
|
|
41
|
+
|
|
42
|
+
warnings.warn(
|
|
43
|
+
"VisionImageFrameAggregator is deprecated. "
|
|
44
|
+
"VisionImageRawFrame has been removed in favor of context frames "
|
|
45
|
+
"(LLMContextFrame or OpenAILLMContextFrame), so this aggregator is "
|
|
46
|
+
"not needed anymore. See the 12* examples for the new recommended "
|
|
47
|
+
"pattern.",
|
|
48
|
+
DeprecationWarning,
|
|
49
|
+
stacklevel=2,
|
|
50
|
+
)
|
|
31
51
|
super().__init__()
|
|
32
52
|
self._describe_text = None
|
|
33
53
|
|
|
@@ -47,12 +67,14 @@ class VisionImageFrameAggregator(FrameProcessor):
|
|
|
47
67
|
self._describe_text = frame.text
|
|
48
68
|
elif isinstance(frame, InputImageRawFrame):
|
|
49
69
|
if self._describe_text:
|
|
50
|
-
|
|
70
|
+
context = OpenAILLMContext()
|
|
71
|
+
context.add_image_frame_message(
|
|
51
72
|
text=self._describe_text,
|
|
52
73
|
image=frame.image,
|
|
53
74
|
size=frame.size,
|
|
54
75
|
format=frame.format,
|
|
55
76
|
)
|
|
77
|
+
frame = OpenAILLMContextFrame(context)
|
|
56
78
|
await self.push_frame(frame)
|
|
57
79
|
self._describe_text = None
|
|
58
80
|
else:
|
|
@@ -137,12 +137,12 @@ class AudioBufferProcessor(FrameProcessor):
|
|
|
137
137
|
return self._num_channels
|
|
138
138
|
|
|
139
139
|
def has_audio(self) -> bool:
|
|
140
|
-
"""Check if
|
|
140
|
+
"""Check if either user or bot audio buffers contain data.
|
|
141
141
|
|
|
142
142
|
Returns:
|
|
143
|
-
True if
|
|
143
|
+
True if either buffer contains audio data.
|
|
144
144
|
"""
|
|
145
|
-
return self._buffer_has_audio(self._user_audio_buffer)
|
|
145
|
+
return self._buffer_has_audio(self._user_audio_buffer) or self._buffer_has_audio(
|
|
146
146
|
self._bot_audio_buffer
|
|
147
147
|
)
|
|
148
148
|
|
|
@@ -229,9 +229,12 @@ class AudioBufferProcessor(FrameProcessor):
|
|
|
229
229
|
# Save time of frame so we can compute silence.
|
|
230
230
|
self._last_bot_frame_at = time.time()
|
|
231
231
|
|
|
232
|
-
if self._buffer_size > 0 and
|
|
232
|
+
if self._buffer_size > 0 and (
|
|
233
|
+
len(self._user_audio_buffer) >= self._buffer_size
|
|
234
|
+
or len(self._bot_audio_buffer) >= self._buffer_size
|
|
235
|
+
):
|
|
233
236
|
await self._call_on_audio_data_handler()
|
|
234
|
-
self.
|
|
237
|
+
self._reset_primary_audio_buffers()
|
|
235
238
|
|
|
236
239
|
# Process turn recording with preprocessed data.
|
|
237
240
|
if self._enable_turn_audio:
|
|
@@ -272,9 +275,15 @@ class AudioBufferProcessor(FrameProcessor):
|
|
|
272
275
|
|
|
273
276
|
async def _call_on_audio_data_handler(self):
|
|
274
277
|
"""Call the audio data event handlers with buffered audio."""
|
|
275
|
-
if not self.
|
|
278
|
+
if not self._recording:
|
|
276
279
|
return
|
|
277
280
|
|
|
281
|
+
if len(self._user_audio_buffer) == 0 and len(self._bot_audio_buffer) == 0:
|
|
282
|
+
return
|
|
283
|
+
|
|
284
|
+
self._align_track_buffers()
|
|
285
|
+
flush_time = time.time()
|
|
286
|
+
|
|
278
287
|
# Call original handler with merged audio
|
|
279
288
|
merged_audio = self.merge_audio_buffers()
|
|
280
289
|
await self._call_event_handler(
|
|
@@ -290,23 +299,49 @@ class AudioBufferProcessor(FrameProcessor):
|
|
|
290
299
|
self._num_channels,
|
|
291
300
|
)
|
|
292
301
|
|
|
302
|
+
self._last_user_frame_at = flush_time
|
|
303
|
+
self._last_bot_frame_at = flush_time
|
|
304
|
+
|
|
293
305
|
def _buffer_has_audio(self, buffer: bytearray) -> bool:
|
|
294
306
|
"""Check if a buffer contains audio data."""
|
|
295
307
|
return buffer is not None and len(buffer) > 0
|
|
296
308
|
|
|
297
309
|
def _reset_recording(self):
|
|
298
310
|
"""Reset recording state and buffers."""
|
|
299
|
-
self.
|
|
311
|
+
self._reset_all_audio_buffers()
|
|
300
312
|
self._last_user_frame_at = time.time()
|
|
301
313
|
self._last_bot_frame_at = time.time()
|
|
302
314
|
|
|
303
|
-
def
|
|
315
|
+
def _reset_all_audio_buffers(self):
|
|
304
316
|
"""Reset all audio buffers to empty state."""
|
|
317
|
+
self._reset_primary_audio_buffers()
|
|
318
|
+
self._reset_turn_audio_buffers()
|
|
319
|
+
|
|
320
|
+
def _reset_primary_audio_buffers(self):
|
|
321
|
+
"""Clear user and bot buffers while preserving turn buffers and timestamps."""
|
|
305
322
|
self._user_audio_buffer = bytearray()
|
|
306
323
|
self._bot_audio_buffer = bytearray()
|
|
324
|
+
|
|
325
|
+
def _reset_turn_audio_buffers(self):
|
|
326
|
+
"""Clear user and bot turn buffers while preserving primary buffers and timestamps."""
|
|
307
327
|
self._user_turn_audio_buffer = bytearray()
|
|
308
328
|
self._bot_turn_audio_buffer = bytearray()
|
|
309
329
|
|
|
330
|
+
def _align_track_buffers(self):
|
|
331
|
+
"""Pad the shorter track with silence so both tracks stay in sync."""
|
|
332
|
+
user_len = len(self._user_audio_buffer)
|
|
333
|
+
bot_len = len(self._bot_audio_buffer)
|
|
334
|
+
if user_len == bot_len:
|
|
335
|
+
return
|
|
336
|
+
|
|
337
|
+
target_len = max(user_len, bot_len)
|
|
338
|
+
if user_len < target_len:
|
|
339
|
+
self._user_audio_buffer.extend(b"\x00" * (target_len - user_len))
|
|
340
|
+
self._last_user_frame_at = max(self._last_user_frame_at, self._last_bot_frame_at)
|
|
341
|
+
if bot_len < target_len:
|
|
342
|
+
self._bot_audio_buffer.extend(b"\x00" * (target_len - bot_len))
|
|
343
|
+
self._last_bot_frame_at = max(self._last_bot_frame_at, self._last_user_frame_at)
|
|
344
|
+
|
|
310
345
|
async def _resample_input_audio(self, frame: InputAudioRawFrame) -> bytes:
|
|
311
346
|
"""Resample audio frame to the target sample rate."""
|
|
312
347
|
return await self._input_resampler.resample(
|
|
@@ -25,6 +25,7 @@ from pipecat.frames.frames import (
|
|
|
25
25
|
FunctionCallResultFrame,
|
|
26
26
|
InputAudioRawFrame,
|
|
27
27
|
InterimTranscriptionFrame,
|
|
28
|
+
InterruptionFrame,
|
|
28
29
|
StartFrame,
|
|
29
30
|
StartInterruptionFrame,
|
|
30
31
|
StartDTMFCaptureFrame,
|
|
@@ -226,6 +227,7 @@ class STTMuteFilter(FrameProcessor):
|
|
|
226
227
|
# Then push the original frame
|
|
227
228
|
# Conditionally include InputAudioRawFrame in suppression tuple based on voicemail_detection_enabled
|
|
228
229
|
suppression_types = (
|
|
230
|
+
InterruptionFrame,
|
|
229
231
|
StartInterruptionFrame,
|
|
230
232
|
VADUserStartedSpeakingFrame,
|
|
231
233
|
VADUserStoppedSpeakingFrame,
|
|
@@ -29,8 +29,9 @@ from pipecat.frames.frames import (
|
|
|
29
29
|
FrameProcessorPauseUrgentFrame,
|
|
30
30
|
FrameProcessorResumeFrame,
|
|
31
31
|
FrameProcessorResumeUrgentFrame,
|
|
32
|
+
InterruptionFrame,
|
|
33
|
+
InterruptionTaskFrame,
|
|
32
34
|
StartFrame,
|
|
33
|
-
StartInterruptionFrame,
|
|
34
35
|
SystemFrame,
|
|
35
36
|
)
|
|
36
37
|
from pipecat.metrics.metrics import LLMTokenUsage, MetricsData
|
|
@@ -141,6 +142,12 @@ class FrameProcessor(BaseObject):
|
|
|
141
142
|
task. System frames are also processed in a separate task which guarantees
|
|
142
143
|
frame priority.
|
|
143
144
|
|
|
145
|
+
Event handlers available:
|
|
146
|
+
|
|
147
|
+
- on_before_process_frame: Called before a frame is processed
|
|
148
|
+
- on_after_process_frame: Called after a frame is processed
|
|
149
|
+
- on_before_push_frame: Called before a frame is pushed
|
|
150
|
+
- on_after_push_frame: Called after a frame is pushed
|
|
144
151
|
"""
|
|
145
152
|
|
|
146
153
|
def __init__(
|
|
@@ -221,6 +228,20 @@ class FrameProcessor(BaseObject):
|
|
|
221
228
|
self.__process_frame_task: Optional[asyncio.Task] = None
|
|
222
229
|
self.logger = logger # Will later be replaced with a bound logger
|
|
223
230
|
|
|
231
|
+
# To interrupt a pipeline, we push an `InterruptionTaskFrame` upstream.
|
|
232
|
+
# Then we wait for the corresponding `InterruptionFrame` to travel from
|
|
233
|
+
# the start of the pipeline back to the processor that sent the
|
|
234
|
+
# `InterruptionTaskFrame`. This wait is handled using the following
|
|
235
|
+
# event.
|
|
236
|
+
self._wait_for_interruption = False
|
|
237
|
+
self._wait_interruption_event = asyncio.Event()
|
|
238
|
+
|
|
239
|
+
# Frame processor events.
|
|
240
|
+
self._register_event_handler("on_before_process_frame", sync=True)
|
|
241
|
+
self._register_event_handler("on_after_process_frame", sync=True)
|
|
242
|
+
self._register_event_handler("on_before_push_frame", sync=True)
|
|
243
|
+
self._register_event_handler("on_after_push_frame", sync=True)
|
|
244
|
+
|
|
224
245
|
@property
|
|
225
246
|
def id(self) -> int:
|
|
226
247
|
"""Get the unique identifier for this processor.
|
|
@@ -436,9 +457,13 @@ class FrameProcessor(BaseObject):
|
|
|
436
457
|
name = f"{self}::{coroutine.cr_code.co_name}"
|
|
437
458
|
return self.task_manager.create_task(coroutine, name)
|
|
438
459
|
|
|
439
|
-
async def cancel_task(self, task: asyncio.Task, timeout: Optional[float] =
|
|
460
|
+
async def cancel_task(self, task: asyncio.Task, timeout: Optional[float] = 1.0):
|
|
440
461
|
"""Cancel a task managed by this processor.
|
|
441
462
|
|
|
463
|
+
A default timeout if 1 second is used in order to avoid potential
|
|
464
|
+
freezes caused by certain libraries that swallow
|
|
465
|
+
`asyncio.CancelledError`.
|
|
466
|
+
|
|
442
467
|
Args:
|
|
443
468
|
task: The task to cancel.
|
|
444
469
|
timeout: Optional timeout for task cancellation.
|
|
@@ -544,6 +569,14 @@ class FrameProcessor(BaseObject):
|
|
|
544
569
|
if self._cancelling:
|
|
545
570
|
return
|
|
546
571
|
|
|
572
|
+
# If we are waiting for an interruption we will bypass all queued system
|
|
573
|
+
# frames and we will process the frame right away. This is because a
|
|
574
|
+
# previous system frame might be waiting for the interruption frame and
|
|
575
|
+
# it's blocking the input task.
|
|
576
|
+
if self._wait_for_interruption and isinstance(frame, InterruptionFrame):
|
|
577
|
+
await self.__process_frame(frame, direction, callback)
|
|
578
|
+
return
|
|
579
|
+
|
|
547
580
|
if self._enable_direct_mode:
|
|
548
581
|
await self.__process_frame(frame, direction, callback)
|
|
549
582
|
else:
|
|
@@ -553,11 +586,15 @@ class FrameProcessor(BaseObject):
|
|
|
553
586
|
"""Pause processing of queued frames."""
|
|
554
587
|
self.logger.trace(f"{self}: pausing frame processing")
|
|
555
588
|
self.__should_block_frames = True
|
|
589
|
+
if self.__process_event:
|
|
590
|
+
self.__process_event.clear()
|
|
556
591
|
|
|
557
592
|
async def pause_processing_system_frames(self):
|
|
558
593
|
"""Pause processing of queued system frames."""
|
|
559
594
|
logger.trace(f"{self}: pausing system frame processing")
|
|
560
595
|
self.__should_block_system_frames = True
|
|
596
|
+
if self.__input_event:
|
|
597
|
+
self.__input_event.clear()
|
|
561
598
|
|
|
562
599
|
async def resume_processing_frames(self):
|
|
563
600
|
"""Resume processing of queued frames."""
|
|
@@ -590,7 +627,7 @@ class FrameProcessor(BaseObject):
|
|
|
590
627
|
|
|
591
628
|
if isinstance(frame, StartFrame):
|
|
592
629
|
await self.__start(frame)
|
|
593
|
-
elif isinstance(frame,
|
|
630
|
+
elif isinstance(frame, InterruptionFrame):
|
|
594
631
|
await self._start_interruption()
|
|
595
632
|
await self.stop_all_metrics()
|
|
596
633
|
elif isinstance(frame, CancelFrame):
|
|
@@ -620,8 +657,40 @@ class FrameProcessor(BaseObject):
|
|
|
620
657
|
if not self._check_started(frame):
|
|
621
658
|
return
|
|
622
659
|
|
|
660
|
+
await self._call_event_handler("on_before_push_frame", frame)
|
|
661
|
+
|
|
623
662
|
await self.__internal_push_frame(frame, direction)
|
|
624
663
|
|
|
664
|
+
await self._call_event_handler("on_after_push_frame", frame)
|
|
665
|
+
|
|
666
|
+
# If we are waiting for an interruption and we get an interruption, then
|
|
667
|
+
# we can unblock `push_interruption_task_frame_and_wait()`.
|
|
668
|
+
if self._wait_for_interruption and isinstance(frame, InterruptionFrame):
|
|
669
|
+
self._wait_interruption_event.set()
|
|
670
|
+
|
|
671
|
+
async def push_interruption_task_frame_and_wait(self):
|
|
672
|
+
"""Push an interruption task frame upstream and wait for the interruption.
|
|
673
|
+
|
|
674
|
+
This function sends an `InterruptionTaskFrame` upstream to the pipeline
|
|
675
|
+
task and waits to receive the corresponding `InterruptionFrame`. When
|
|
676
|
+
the function finishes it is guaranteed that the `InterruptionFrame` has
|
|
677
|
+
been pushed downstream.
|
|
678
|
+
"""
|
|
679
|
+
self._wait_for_interruption = True
|
|
680
|
+
|
|
681
|
+
await self.push_frame(InterruptionTaskFrame(), FrameDirection.UPSTREAM)
|
|
682
|
+
|
|
683
|
+
# Wait for an `InterruptionFrame` to come to this processor and be
|
|
684
|
+
# pushed. Take a look at `push_frame()` to see how we first push the
|
|
685
|
+
# `InterruptionFrame` and then we set the event in order to maintain
|
|
686
|
+
# frame ordering.
|
|
687
|
+
await self._wait_interruption_event.wait()
|
|
688
|
+
|
|
689
|
+
# Clean the event.
|
|
690
|
+
self._wait_interruption_event.clear()
|
|
691
|
+
|
|
692
|
+
self._wait_for_interruption = False
|
|
693
|
+
|
|
625
694
|
async def __start(self, frame: StartFrame):
|
|
626
695
|
"""Handle the start frame to initialize processor state.
|
|
627
696
|
|
|
@@ -674,22 +743,24 @@ class FrameProcessor(BaseObject):
|
|
|
674
743
|
async def _start_interruption(self):
|
|
675
744
|
"""Start handling an interruption by cancelling current tasks."""
|
|
676
745
|
try:
|
|
677
|
-
|
|
678
|
-
|
|
746
|
+
if self._wait_for_interruption:
|
|
747
|
+
# If we get here we know the process task was just waiting for
|
|
748
|
+
# an interruption (push_interruption_task_frame_and_wait()), so
|
|
749
|
+
# we can't cancel the task because it might still need to do
|
|
750
|
+
# more things (e.g. pushing a frame after the
|
|
751
|
+
# interruption). Instead we just drain the queue because this is
|
|
752
|
+
# an interruption.
|
|
753
|
+
self.__reset_process_task()
|
|
754
|
+
else:
|
|
755
|
+
# Cancel and re-create the process task including the queue.
|
|
756
|
+
await self.__cancel_process_task()
|
|
757
|
+
self.__create_process_task()
|
|
679
758
|
except Exception as e:
|
|
680
759
|
self.logger.exception(
|
|
681
760
|
f"Uncaught exception in {self} when handling _start_interruption: {e}"
|
|
682
761
|
)
|
|
683
762
|
await self.push_error(ErrorFrame(str(e)))
|
|
684
763
|
|
|
685
|
-
# Create a new process queue and task.
|
|
686
|
-
self.__create_process_task()
|
|
687
|
-
|
|
688
|
-
async def _stop_interruption(self):
|
|
689
|
-
"""Stop handling an interruption."""
|
|
690
|
-
# Nothing to do right now.
|
|
691
|
-
pass
|
|
692
|
-
|
|
693
764
|
async def __internal_push_frame(self, frame: Frame, direction: FrameDirection):
|
|
694
765
|
"""Internal method to push frames to adjacent processors.
|
|
695
766
|
|
|
@@ -774,6 +845,17 @@ class FrameProcessor(BaseObject):
|
|
|
774
845
|
self.__process_queue = asyncio.Queue()
|
|
775
846
|
self.__process_frame_task = self.create_task(self.__process_frame_task_handler())
|
|
776
847
|
|
|
848
|
+
def __reset_process_task(self):
|
|
849
|
+
"""Reset non-system frame processing task."""
|
|
850
|
+
if self._enable_direct_mode:
|
|
851
|
+
return
|
|
852
|
+
|
|
853
|
+
self.__should_block_frames = False
|
|
854
|
+
self.__process_event = asyncio.Event()
|
|
855
|
+
while not self.__process_queue.empty():
|
|
856
|
+
self.__process_queue.get_nowait()
|
|
857
|
+
self.__process_queue.task_done()
|
|
858
|
+
|
|
777
859
|
async def __cancel_process_task(self):
|
|
778
860
|
"""Cancel the non-system frame processing task."""
|
|
779
861
|
if self.__process_frame_task:
|
|
@@ -784,11 +866,15 @@ class FrameProcessor(BaseObject):
|
|
|
784
866
|
self, frame: Frame, direction: FrameDirection, callback: Optional[FrameCallback]
|
|
785
867
|
):
|
|
786
868
|
try:
|
|
869
|
+
await self._call_event_handler("on_before_process_frame", frame)
|
|
870
|
+
|
|
787
871
|
# Process the frame.
|
|
788
872
|
await self.process_frame(frame, direction)
|
|
789
873
|
# If this frame has an associated callback, call it now.
|
|
790
874
|
if callback:
|
|
791
875
|
await callback(self, frame, direction)
|
|
876
|
+
|
|
877
|
+
await self._call_event_handler("on_after_process_frame", frame)
|
|
792
878
|
except Exception as e:
|
|
793
879
|
logger.exception(f"{self}: error processing frame: {e}")
|
|
794
880
|
await self.push_error(ErrorFrame(str(e)))
|
|
@@ -801,6 +887,8 @@ class FrameProcessor(BaseObject):
|
|
|
801
887
|
|
|
802
888
|
"""
|
|
803
889
|
while True:
|
|
890
|
+
(frame, direction, callback) = await self.__input_queue.get()
|
|
891
|
+
|
|
804
892
|
if self.__should_block_system_frames and self.__input_event:
|
|
805
893
|
logger.trace(f"{self}: system frame processing paused")
|
|
806
894
|
await self.__input_event.wait()
|
|
@@ -808,8 +896,6 @@ class FrameProcessor(BaseObject):
|
|
|
808
896
|
self.__should_block_system_frames = False
|
|
809
897
|
logger.trace(f"{self}: system frame processing resumed")
|
|
810
898
|
|
|
811
|
-
(frame, direction, callback) = await self.__input_queue.get()
|
|
812
|
-
|
|
813
899
|
if isinstance(frame, SystemFrame):
|
|
814
900
|
await self.__process_frame(frame, direction, callback)
|
|
815
901
|
elif self.__process_queue:
|
|
@@ -824,6 +910,8 @@ class FrameProcessor(BaseObject):
|
|
|
824
910
|
async def __process_frame_task_handler(self):
|
|
825
911
|
"""Handle non-system frames from the process queue."""
|
|
826
912
|
while True:
|
|
913
|
+
(frame, direction, callback) = await self.__process_queue.get()
|
|
914
|
+
|
|
827
915
|
if self.__should_block_frames and self.__process_event:
|
|
828
916
|
logger.trace(f"{self}: frame processing paused")
|
|
829
917
|
await self.__process_event.wait()
|
|
@@ -831,8 +919,6 @@ class FrameProcessor(BaseObject):
|
|
|
831
919
|
self.__should_block_frames = False
|
|
832
920
|
logger.trace(f"{self}: frame processing resumed")
|
|
833
921
|
|
|
834
|
-
(frame, direction, callback) = await self.__process_queue.get()
|
|
835
|
-
|
|
836
922
|
await self.__process_frame(frame, direction, callback)
|
|
837
923
|
|
|
838
924
|
self.__process_queue.task_done()
|
|
@@ -12,6 +12,7 @@ from loguru import logger
|
|
|
12
12
|
|
|
13
13
|
from pipecat.frames.frames import (
|
|
14
14
|
Frame,
|
|
15
|
+
LLMContextFrame,
|
|
15
16
|
LLMFullResponseEndFrame,
|
|
16
17
|
LLMFullResponseStartFrame,
|
|
17
18
|
TextFrame,
|
|
@@ -64,11 +65,16 @@ class LangchainProcessor(FrameProcessor):
|
|
|
64
65
|
"""
|
|
65
66
|
await super().process_frame(frame, direction)
|
|
66
67
|
|
|
67
|
-
if isinstance(frame, OpenAILLMContextFrame):
|
|
68
|
+
if isinstance(frame, (LLMContextFrame, OpenAILLMContextFrame)):
|
|
68
69
|
# Messages are accumulated on the context as a list of messages.
|
|
69
70
|
# The last one by the human is the one we want to send to the LLM.
|
|
70
71
|
logger.debug(f"Got transcription frame {frame}")
|
|
71
|
-
|
|
72
|
+
messages = (
|
|
73
|
+
frame.context.messages
|
|
74
|
+
if isinstance(frame, OpenAILLMContextFrame)
|
|
75
|
+
else frame.context.get_messages()
|
|
76
|
+
)
|
|
77
|
+
text: str = messages[-1]["content"]
|
|
72
78
|
|
|
73
79
|
await self._ainvoke(text.strip())
|
|
74
80
|
else:
|