dv-pipecat-ai 0.0.85.dev7__py3-none-any.whl → 0.0.85.dev698__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/METADATA +78 -117
- {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/RECORD +156 -122
- pipecat/adapters/base_llm_adapter.py +38 -1
- pipecat/adapters/services/anthropic_adapter.py +9 -14
- pipecat/adapters/services/aws_nova_sonic_adapter.py +5 -0
- pipecat/adapters/services/bedrock_adapter.py +236 -13
- pipecat/adapters/services/gemini_adapter.py +12 -8
- pipecat/adapters/services/open_ai_adapter.py +19 -7
- pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
- pipecat/audio/filters/krisp_viva_filter.py +193 -0
- pipecat/audio/filters/noisereduce_filter.py +15 -0
- pipecat/audio/turn/base_turn_analyzer.py +9 -1
- pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
- pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
- pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
- pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
- pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
- pipecat/audio/vad/data/README.md +10 -0
- pipecat/audio/vad/vad_analyzer.py +13 -1
- pipecat/extensions/voicemail/voicemail_detector.py +5 -5
- pipecat/frames/frames.py +120 -87
- pipecat/observers/loggers/debug_log_observer.py +3 -3
- pipecat/observers/loggers/llm_log_observer.py +7 -3
- pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
- pipecat/pipeline/runner.py +12 -4
- pipecat/pipeline/service_switcher.py +64 -36
- pipecat/pipeline/task.py +85 -24
- pipecat/processors/aggregators/dtmf_aggregator.py +28 -22
- pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
- pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
- pipecat/processors/aggregators/llm_response.py +6 -7
- pipecat/processors/aggregators/llm_response_universal.py +19 -15
- pipecat/processors/aggregators/user_response.py +6 -6
- pipecat/processors/aggregators/vision_image_frame.py +24 -2
- pipecat/processors/audio/audio_buffer_processor.py +43 -8
- pipecat/processors/filters/stt_mute_filter.py +2 -0
- pipecat/processors/frame_processor.py +103 -17
- pipecat/processors/frameworks/langchain.py +8 -2
- pipecat/processors/frameworks/rtvi.py +209 -68
- pipecat/processors/frameworks/strands_agents.py +170 -0
- pipecat/processors/logger.py +2 -2
- pipecat/processors/transcript_processor.py +4 -4
- pipecat/processors/user_idle_processor.py +3 -6
- pipecat/runner/run.py +270 -50
- pipecat/runner/types.py +2 -0
- pipecat/runner/utils.py +51 -10
- pipecat/serializers/exotel.py +5 -5
- pipecat/serializers/livekit.py +20 -0
- pipecat/serializers/plivo.py +6 -9
- pipecat/serializers/protobuf.py +6 -5
- pipecat/serializers/telnyx.py +2 -2
- pipecat/serializers/twilio.py +43 -23
- pipecat/services/ai_service.py +2 -6
- pipecat/services/anthropic/llm.py +2 -25
- pipecat/services/asyncai/tts.py +2 -3
- pipecat/services/aws/__init__.py +1 -0
- pipecat/services/aws/llm.py +122 -97
- pipecat/services/aws/nova_sonic/__init__.py +0 -0
- pipecat/services/aws/nova_sonic/context.py +367 -0
- pipecat/services/aws/nova_sonic/frames.py +25 -0
- pipecat/services/aws/nova_sonic/llm.py +1155 -0
- pipecat/services/aws/stt.py +1 -3
- pipecat/services/aws_nova_sonic/__init__.py +19 -1
- pipecat/services/aws_nova_sonic/aws.py +11 -1151
- pipecat/services/aws_nova_sonic/context.py +13 -355
- pipecat/services/aws_nova_sonic/frames.py +13 -17
- pipecat/services/azure/realtime/__init__.py +0 -0
- pipecat/services/azure/realtime/llm.py +65 -0
- pipecat/services/azure/stt.py +15 -0
- pipecat/services/cartesia/tts.py +2 -2
- pipecat/services/deepgram/__init__.py +1 -0
- pipecat/services/deepgram/flux/__init__.py +0 -0
- pipecat/services/deepgram/flux/stt.py +636 -0
- pipecat/services/elevenlabs/__init__.py +2 -1
- pipecat/services/elevenlabs/stt.py +254 -276
- pipecat/services/elevenlabs/tts.py +5 -5
- pipecat/services/fish/tts.py +2 -2
- pipecat/services/gemini_multimodal_live/events.py +38 -524
- pipecat/services/gemini_multimodal_live/file_api.py +23 -173
- pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
- pipecat/services/gladia/stt.py +56 -72
- pipecat/services/google/__init__.py +1 -0
- pipecat/services/google/gemini_live/__init__.py +3 -0
- pipecat/services/google/gemini_live/file_api.py +189 -0
- pipecat/services/google/gemini_live/llm.py +1582 -0
- pipecat/services/google/gemini_live/llm_vertex.py +184 -0
- pipecat/services/google/llm.py +15 -11
- pipecat/services/google/llm_openai.py +3 -3
- pipecat/services/google/llm_vertex.py +86 -16
- pipecat/services/google/tts.py +7 -3
- pipecat/services/heygen/api.py +2 -0
- pipecat/services/heygen/client.py +8 -4
- pipecat/services/heygen/video.py +2 -0
- pipecat/services/hume/__init__.py +5 -0
- pipecat/services/hume/tts.py +220 -0
- pipecat/services/inworld/tts.py +6 -6
- pipecat/services/llm_service.py +15 -5
- pipecat/services/lmnt/tts.py +2 -2
- pipecat/services/mcp_service.py +4 -2
- pipecat/services/mem0/memory.py +6 -5
- pipecat/services/mistral/llm.py +29 -8
- pipecat/services/moondream/vision.py +42 -16
- pipecat/services/neuphonic/tts.py +2 -2
- pipecat/services/openai/__init__.py +1 -0
- pipecat/services/openai/base_llm.py +27 -20
- pipecat/services/openai/realtime/__init__.py +0 -0
- pipecat/services/openai/realtime/context.py +272 -0
- pipecat/services/openai/realtime/events.py +1106 -0
- pipecat/services/openai/realtime/frames.py +37 -0
- pipecat/services/openai/realtime/llm.py +829 -0
- pipecat/services/openai/tts.py +16 -8
- pipecat/services/openai_realtime/__init__.py +27 -0
- pipecat/services/openai_realtime/azure.py +21 -0
- pipecat/services/openai_realtime/context.py +21 -0
- pipecat/services/openai_realtime/events.py +21 -0
- pipecat/services/openai_realtime/frames.py +21 -0
- pipecat/services/openai_realtime_beta/azure.py +16 -0
- pipecat/services/openai_realtime_beta/openai.py +17 -5
- pipecat/services/playht/tts.py +31 -4
- pipecat/services/rime/tts.py +3 -4
- pipecat/services/sarvam/tts.py +2 -6
- pipecat/services/simli/video.py +2 -2
- pipecat/services/speechmatics/stt.py +1 -7
- pipecat/services/stt_service.py +34 -0
- pipecat/services/tavus/video.py +2 -2
- pipecat/services/tts_service.py +9 -9
- pipecat/services/vision_service.py +7 -6
- pipecat/tests/utils.py +4 -4
- pipecat/transcriptions/language.py +41 -1
- pipecat/transports/base_input.py +17 -42
- pipecat/transports/base_output.py +42 -26
- pipecat/transports/daily/transport.py +199 -26
- pipecat/transports/heygen/__init__.py +0 -0
- pipecat/transports/heygen/transport.py +381 -0
- pipecat/transports/livekit/transport.py +228 -63
- pipecat/transports/local/audio.py +6 -1
- pipecat/transports/local/tk.py +11 -2
- pipecat/transports/network/fastapi_websocket.py +1 -1
- pipecat/transports/smallwebrtc/connection.py +98 -19
- pipecat/transports/smallwebrtc/request_handler.py +204 -0
- pipecat/transports/smallwebrtc/transport.py +65 -23
- pipecat/transports/tavus/transport.py +23 -12
- pipecat/transports/websocket/client.py +41 -5
- pipecat/transports/websocket/fastapi.py +21 -11
- pipecat/transports/websocket/server.py +14 -7
- pipecat/transports/whatsapp/api.py +8 -0
- pipecat/transports/whatsapp/client.py +47 -0
- pipecat/utils/base_object.py +54 -22
- pipecat/utils/string.py +12 -1
- pipecat/utils/tracing/service_decorators.py +21 -21
- {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/top_level.txt +0 -0
- /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
pipecat/pipeline/task.py
CHANGED
|
@@ -13,8 +13,7 @@ including heartbeats, idle detection, and observer integration.
|
|
|
13
13
|
|
|
14
14
|
import asyncio
|
|
15
15
|
import time
|
|
16
|
-
from
|
|
17
|
-
from typing import Any, AsyncIterable, Deque, Dict, Iterable, List, Optional, Tuple, Type
|
|
16
|
+
from typing import Any, AsyncIterable, Dict, Iterable, List, Optional, Tuple, Type
|
|
18
17
|
|
|
19
18
|
from loguru import logger
|
|
20
19
|
from pydantic import BaseModel, ConfigDict, Field
|
|
@@ -31,7 +30,8 @@ from pipecat.frames.frames import (
|
|
|
31
30
|
ErrorFrame,
|
|
32
31
|
Frame,
|
|
33
32
|
HeartbeatFrame,
|
|
34
|
-
|
|
33
|
+
InterruptionFrame,
|
|
34
|
+
InterruptionTaskFrame,
|
|
35
35
|
MetricsFrame,
|
|
36
36
|
StartFrame,
|
|
37
37
|
StopFrame,
|
|
@@ -113,9 +113,32 @@ class PipelineTask(BasePipelineTask):
|
|
|
113
113
|
- on_frame_reached_downstream: Called when downstream frames reach the sink
|
|
114
114
|
- on_idle_timeout: Called when pipeline is idle beyond timeout threshold
|
|
115
115
|
- on_pipeline_started: Called when pipeline starts with StartFrame
|
|
116
|
-
- on_pipeline_stopped: Called when pipeline stops with StopFrame
|
|
117
|
-
|
|
118
|
-
|
|
116
|
+
- on_pipeline_stopped: [deprecated] Called when pipeline stops with StopFrame
|
|
117
|
+
|
|
118
|
+
.. deprecated:: 0.0.86
|
|
119
|
+
Use `on_pipeline_finished` instead.
|
|
120
|
+
|
|
121
|
+
- on_pipeline_ended: [deprecated] Called when pipeline ends with EndFrame
|
|
122
|
+
|
|
123
|
+
.. deprecated:: 0.0.86
|
|
124
|
+
Use `on_pipeline_finished` instead.
|
|
125
|
+
|
|
126
|
+
- on_pipeline_cancelled: [deprecated] Called when pipeline is cancelled with CancelFrame
|
|
127
|
+
|
|
128
|
+
.. deprecated:: 0.0.86
|
|
129
|
+
Use `on_pipeline_finished` instead.
|
|
130
|
+
|
|
131
|
+
- on_pipeline_finished: Called after the pipeline has reached any terminal state.
|
|
132
|
+
This includes:
|
|
133
|
+
|
|
134
|
+
- StopFrame: pipeline was stopped (processors keep connections open)
|
|
135
|
+
- EndFrame: pipeline ended normally
|
|
136
|
+
- CancelFrame: pipeline was cancelled
|
|
137
|
+
|
|
138
|
+
Use this event for cleanup, logging, or post-processing tasks. Users can inspect
|
|
139
|
+
the frame if they need to handle specific cases.
|
|
140
|
+
|
|
141
|
+
- on_pipeline_error: Called when an error occurs with ErrorFrame
|
|
119
142
|
|
|
120
143
|
Example::
|
|
121
144
|
|
|
@@ -126,6 +149,18 @@ class PipelineTask(BasePipelineTask):
|
|
|
126
149
|
@task.event_handler("on_idle_timeout")
|
|
127
150
|
async def on_pipeline_idle_timeout(task):
|
|
128
151
|
...
|
|
152
|
+
|
|
153
|
+
@task.event_handler("on_pipeline_started")
|
|
154
|
+
async def on_pipeline_started(task, frame):
|
|
155
|
+
...
|
|
156
|
+
|
|
157
|
+
@task.event_handler("on_pipeline_finished")
|
|
158
|
+
async def on_pipeline_finished(task, frame):
|
|
159
|
+
...
|
|
160
|
+
|
|
161
|
+
@task.event_handler("on_pipeline_error")
|
|
162
|
+
async def on_pipeline_error(task, frame):
|
|
163
|
+
...
|
|
129
164
|
"""
|
|
130
165
|
|
|
131
166
|
def __init__(
|
|
@@ -262,6 +297,8 @@ class PipelineTask(BasePipelineTask):
|
|
|
262
297
|
self._register_event_handler("on_pipeline_stopped")
|
|
263
298
|
self._register_event_handler("on_pipeline_ended")
|
|
264
299
|
self._register_event_handler("on_pipeline_cancelled")
|
|
300
|
+
self._register_event_handler("on_pipeline_finished")
|
|
301
|
+
self._register_event_handler("on_pipeline_error")
|
|
265
302
|
|
|
266
303
|
@property
|
|
267
304
|
def params(self) -> PipelineParams:
|
|
@@ -290,6 +327,27 @@ class PipelineTask(BasePipelineTask):
|
|
|
290
327
|
"""
|
|
291
328
|
return self._turn_trace_observer
|
|
292
329
|
|
|
330
|
+
def event_handler(self, event_name: str):
|
|
331
|
+
"""Decorator for registering event handlers.
|
|
332
|
+
|
|
333
|
+
Args:
|
|
334
|
+
event_name: The name of the event to handle.
|
|
335
|
+
|
|
336
|
+
Returns:
|
|
337
|
+
The decorator function that registers the handler.
|
|
338
|
+
"""
|
|
339
|
+
if event_name in ["on_pipeline_stopped", "on_pipeline_ended", "on_pipeline_cancelled"]:
|
|
340
|
+
import warnings
|
|
341
|
+
|
|
342
|
+
with warnings.catch_warnings():
|
|
343
|
+
warnings.simplefilter("always")
|
|
344
|
+
warnings.warn(
|
|
345
|
+
f"Event '{event_name}' is deprecated, use 'on_pipeline_finished' instead.",
|
|
346
|
+
DeprecationWarning,
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
return super().event_handler(event_name)
|
|
350
|
+
|
|
293
351
|
def add_observer(self, observer: BaseObserver):
|
|
294
352
|
"""Add an observer to monitor pipeline execution.
|
|
295
353
|
|
|
@@ -348,7 +406,8 @@ class PipelineTask(BasePipelineTask):
|
|
|
348
406
|
Cancels all running tasks and stops frame processing without
|
|
349
407
|
waiting for completion.
|
|
350
408
|
"""
|
|
351
|
-
|
|
409
|
+
if not self._finished:
|
|
410
|
+
await self._cancel()
|
|
352
411
|
|
|
353
412
|
async def run(self, params: PipelineTaskParams):
|
|
354
413
|
"""Start and manage the pipeline execution until completion or cancellation.
|
|
@@ -532,6 +591,7 @@ class PipelineTask(BasePipelineTask):
|
|
|
532
591
|
)
|
|
533
592
|
finally:
|
|
534
593
|
await self._call_event_handler("on_pipeline_cancelled", frame)
|
|
594
|
+
await self._call_event_handler("on_pipeline_finished", frame)
|
|
535
595
|
|
|
536
596
|
logger.debug(f"{self}: Closing. Waiting for {frame} to reach the end of the pipeline...")
|
|
537
597
|
|
|
@@ -627,22 +687,31 @@ class PipelineTask(BasePipelineTask):
|
|
|
627
687
|
|
|
628
688
|
if isinstance(frame, EndTaskFrame):
|
|
629
689
|
# Tell the task we should end nicely.
|
|
690
|
+
logger.debug(f"{self}: received end task frame {frame}")
|
|
630
691
|
await self.queue_frame(EndFrame())
|
|
631
692
|
elif isinstance(frame, CancelTaskFrame):
|
|
632
693
|
# Tell the task we should end right away.
|
|
694
|
+
logger.debug(f"{self}: received cancel task frame {frame}")
|
|
633
695
|
await self.queue_frame(CancelFrame())
|
|
634
696
|
elif isinstance(frame, StopTaskFrame):
|
|
635
697
|
# Tell the task we should stop nicely.
|
|
698
|
+
logger.debug(f"{self}: received stop task frame {frame}")
|
|
636
699
|
await self.queue_frame(StopFrame())
|
|
700
|
+
elif isinstance(frame, InterruptionTaskFrame):
|
|
701
|
+
# Tell the task we should interrupt the pipeline. Note that we are
|
|
702
|
+
# bypassing the push queue and directly queue into the
|
|
703
|
+
# pipeline. This is in case the push task is blocked waiting for a
|
|
704
|
+
# pipeline-ending frame to finish traversing the pipeline.
|
|
705
|
+
logger.debug(f"{self}: received interruption task frame {frame}")
|
|
706
|
+
await self._pipeline.queue_frame(InterruptionFrame())
|
|
637
707
|
elif isinstance(frame, ErrorFrame):
|
|
708
|
+
await self._call_event_handler("on_pipeline_error", frame)
|
|
638
709
|
if frame.fatal:
|
|
639
710
|
logger.error(f"A fatal error occurred: {frame}")
|
|
640
711
|
# Cancel all tasks downstream.
|
|
641
712
|
await self.queue_frame(CancelFrame())
|
|
642
|
-
# Tell the task we should stop.
|
|
643
|
-
await self.queue_frame(StopTaskFrame())
|
|
644
713
|
else:
|
|
645
|
-
logger.warning(f"Something went wrong: {frame}")
|
|
714
|
+
logger.warning(f"{self}: Something went wrong: {frame}")
|
|
646
715
|
|
|
647
716
|
async def _sink_push_frame(self, frame: Frame, direction: FrameDirection):
|
|
648
717
|
"""Process frames coming downstream from the pipeline.
|
|
@@ -669,9 +738,11 @@ class PipelineTask(BasePipelineTask):
|
|
|
669
738
|
self._pipeline_start_event.set()
|
|
670
739
|
elif isinstance(frame, EndFrame):
|
|
671
740
|
await self._call_event_handler("on_pipeline_ended", frame)
|
|
741
|
+
await self._call_event_handler("on_pipeline_finished", frame)
|
|
672
742
|
self._pipeline_end_event.set()
|
|
673
743
|
elif isinstance(frame, StopFrame):
|
|
674
744
|
await self._call_event_handler("on_pipeline_stopped", frame)
|
|
745
|
+
await self._call_event_handler("on_pipeline_finished", frame)
|
|
675
746
|
self._pipeline_end_event.set()
|
|
676
747
|
elif isinstance(frame, CancelFrame):
|
|
677
748
|
self._pipeline_end_event.set()
|
|
@@ -718,7 +789,6 @@ class PipelineTask(BasePipelineTask):
|
|
|
718
789
|
"""
|
|
719
790
|
running = True
|
|
720
791
|
last_frame_time = 0
|
|
721
|
-
frame_buffer = deque(maxlen=10) # Store last 10 frames
|
|
722
792
|
|
|
723
793
|
while running:
|
|
724
794
|
try:
|
|
@@ -726,9 +796,6 @@ class PipelineTask(BasePipelineTask):
|
|
|
726
796
|
self._idle_queue.get(), timeout=self._idle_timeout_secs
|
|
727
797
|
)
|
|
728
798
|
|
|
729
|
-
if not isinstance(frame, InputAudioRawFrame):
|
|
730
|
-
frame_buffer.append(frame)
|
|
731
|
-
|
|
732
799
|
if isinstance(frame, StartFrame) or isinstance(frame, self._idle_timeout_frames):
|
|
733
800
|
# If we find a StartFrame or one of the frames that prevents a
|
|
734
801
|
# time out we update the time.
|
|
@@ -739,7 +806,7 @@ class PipelineTask(BasePipelineTask):
|
|
|
739
806
|
# valid frames.
|
|
740
807
|
diff_time = time.time() - last_frame_time
|
|
741
808
|
if diff_time >= self._idle_timeout_secs:
|
|
742
|
-
running = await self._idle_timeout_detected(
|
|
809
|
+
running = await self._idle_timeout_detected()
|
|
743
810
|
# Reset `last_frame_time` so we don't trigger another
|
|
744
811
|
# immediate idle timeout if we are not cancelling. For
|
|
745
812
|
# example, we might want to force the bot to say goodbye
|
|
@@ -749,14 +816,11 @@ class PipelineTask(BasePipelineTask):
|
|
|
749
816
|
self._idle_queue.task_done()
|
|
750
817
|
|
|
751
818
|
except asyncio.TimeoutError:
|
|
752
|
-
running = await self._idle_timeout_detected(
|
|
819
|
+
running = await self._idle_timeout_detected()
|
|
753
820
|
|
|
754
|
-
async def _idle_timeout_detected(self
|
|
821
|
+
async def _idle_timeout_detected(self) -> bool:
|
|
755
822
|
"""Handle idle timeout detection and optional cancellation.
|
|
756
823
|
|
|
757
|
-
Args:
|
|
758
|
-
last_frames: Recent frames received before timeout for debugging.
|
|
759
|
-
|
|
760
824
|
Returns:
|
|
761
825
|
Whether the pipeline task should continue running.
|
|
762
826
|
"""
|
|
@@ -764,10 +828,7 @@ class PipelineTask(BasePipelineTask):
|
|
|
764
828
|
if self._cancelled:
|
|
765
829
|
return True
|
|
766
830
|
|
|
767
|
-
logger.warning("Idle timeout detected.
|
|
768
|
-
for i, frame in enumerate(last_frames, 1):
|
|
769
|
-
logger.warning(f"Frame {i}: {frame}")
|
|
770
|
-
|
|
831
|
+
logger.warning("Idle timeout detected.")
|
|
771
832
|
await self._call_event_handler("on_idle_timeout")
|
|
772
833
|
if self._cancel_on_idle_timeout:
|
|
773
834
|
logger.warning(
|
|
@@ -4,12 +4,18 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""DTMF aggregation processor for converting keypad input to transcription.
|
|
8
|
+
|
|
9
|
+
This module provides a frame processor that aggregates DTMF (Dual-Tone Multi-Frequency)
|
|
10
|
+
keypad inputs into meaningful sequences and converts them to transcription frames
|
|
11
|
+
for downstream processing by LLM context aggregators.
|
|
12
|
+
"""
|
|
13
|
+
|
|
7
14
|
import asyncio
|
|
8
15
|
from typing import Optional
|
|
9
16
|
|
|
10
17
|
from pipecat.audio.dtmf.types import KeypadEntry
|
|
11
18
|
from pipecat.frames.frames import (
|
|
12
|
-
BotInterruptionFrame,
|
|
13
19
|
CancelFrame,
|
|
14
20
|
EndFrame,
|
|
15
21
|
Frame,
|
|
@@ -26,16 +32,12 @@ class DTMFAggregator(FrameProcessor):
|
|
|
26
32
|
|
|
27
33
|
The aggregator accumulates digits from InputDTMFFrame instances and flushes
|
|
28
34
|
when:
|
|
35
|
+
|
|
29
36
|
- Timeout occurs (configurable idle period)
|
|
30
37
|
- Termination digit is received (default: '#')
|
|
31
38
|
- EndFrame or CancelFrame is received
|
|
32
39
|
|
|
33
40
|
Emits TranscriptionFrame for compatibility with existing LLM context aggregators.
|
|
34
|
-
|
|
35
|
-
Args:
|
|
36
|
-
timeout: Idle timeout in seconds before flushing
|
|
37
|
-
termination_digit: Digit that triggers immediate flush
|
|
38
|
-
prefix: Prefix added to DTMF sequence in transcription
|
|
39
41
|
"""
|
|
40
42
|
|
|
41
43
|
def __init__(
|
|
@@ -45,6 +47,14 @@ class DTMFAggregator(FrameProcessor):
|
|
|
45
47
|
prefix: str = "DTMF: ",
|
|
46
48
|
**kwargs,
|
|
47
49
|
):
|
|
50
|
+
"""Initialize the DTMF aggregator.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
timeout: Idle timeout in seconds before flushing
|
|
54
|
+
termination_digit: Digit that triggers immediate flush
|
|
55
|
+
prefix: Prefix added to DTMF sequence in transcription
|
|
56
|
+
**kwargs: Additional arguments passed to FrameProcessor
|
|
57
|
+
"""
|
|
48
58
|
super().__init__(**kwargs)
|
|
49
59
|
self._aggregation = ""
|
|
50
60
|
self._idle_timeout = timeout
|
|
@@ -54,7 +64,18 @@ class DTMFAggregator(FrameProcessor):
|
|
|
54
64
|
self._digit_event = asyncio.Event()
|
|
55
65
|
self._aggregation_task: Optional[asyncio.Task] = None
|
|
56
66
|
|
|
67
|
+
async def cleanup(self) -> None:
|
|
68
|
+
"""Clean up resources."""
|
|
69
|
+
await super().cleanup()
|
|
70
|
+
await self._stop_aggregation_task()
|
|
71
|
+
|
|
57
72
|
async def process_frame(self, frame: Frame, direction: FrameDirection) -> None:
|
|
73
|
+
"""Process incoming frames and handle DTMF aggregation.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
frame: The frame to process.
|
|
77
|
+
direction: The direction of frame flow in the pipeline.
|
|
78
|
+
"""
|
|
58
79
|
await super().process_frame(frame, direction)
|
|
59
80
|
|
|
60
81
|
if isinstance(frame, StartFrame):
|
|
@@ -83,7 +104,7 @@ class DTMFAggregator(FrameProcessor):
|
|
|
83
104
|
|
|
84
105
|
# For first digit, schedule interruption.
|
|
85
106
|
if is_first_digit:
|
|
86
|
-
|
|
107
|
+
await self.push_interruption_task_frame_and_wait()
|
|
87
108
|
|
|
88
109
|
# Check for immediate flush conditions
|
|
89
110
|
if frame.button == self._termination_digit:
|
|
@@ -92,15 +113,6 @@ class DTMFAggregator(FrameProcessor):
|
|
|
92
113
|
# Signal digit received for timeout handling
|
|
93
114
|
self._digit_event.set()
|
|
94
115
|
|
|
95
|
-
async def _send_interruption_task(self):
|
|
96
|
-
"""Send interruption frame safely in a separate task."""
|
|
97
|
-
try:
|
|
98
|
-
# Send the interruption frame
|
|
99
|
-
await self.push_frame(BotInterruptionFrame(), FrameDirection.UPSTREAM)
|
|
100
|
-
except Exception as e:
|
|
101
|
-
# Log error but don't propagate
|
|
102
|
-
print(f"Error sending interruption: {e}")
|
|
103
|
-
|
|
104
116
|
def _create_aggregation_task(self) -> None:
|
|
105
117
|
"""Creates the aggregation task if it hasn't been created yet."""
|
|
106
118
|
if not self._aggregation_task:
|
|
@@ -119,7 +131,6 @@ class DTMFAggregator(FrameProcessor):
|
|
|
119
131
|
await asyncio.wait_for(self._digit_event.wait(), timeout=self._idle_timeout)
|
|
120
132
|
self._digit_event.clear()
|
|
121
133
|
except asyncio.TimeoutError:
|
|
122
|
-
self.reset_watchdog()
|
|
123
134
|
if self._aggregation:
|
|
124
135
|
await self._flush_aggregation()
|
|
125
136
|
|
|
@@ -137,8 +148,3 @@ class DTMFAggregator(FrameProcessor):
|
|
|
137
148
|
await self.push_frame(transcription_frame)
|
|
138
149
|
|
|
139
150
|
self._aggregation = ""
|
|
140
|
-
|
|
141
|
-
async def cleanup(self) -> None:
|
|
142
|
-
"""Clean up resources."""
|
|
143
|
-
await super().cleanup()
|
|
144
|
-
await self._stop_aggregation_task()
|
|
@@ -4,20 +4,20 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
-
"""Gated
|
|
7
|
+
"""Gated LLM context aggregator for controlled message flow."""
|
|
8
8
|
|
|
9
|
-
from pipecat.frames.frames import CancelFrame, EndFrame, Frame, StartFrame
|
|
9
|
+
from pipecat.frames.frames import CancelFrame, EndFrame, Frame, LLMContextFrame, StartFrame
|
|
10
10
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContextFrame
|
|
11
11
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
|
12
12
|
from pipecat.sync.base_notifier import BaseNotifier
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
class
|
|
16
|
-
"""Aggregator that gates
|
|
15
|
+
class GatedLLMContextAggregator(FrameProcessor):
|
|
16
|
+
"""Aggregator that gates LLM context frames until notified.
|
|
17
17
|
|
|
18
|
-
This aggregator captures
|
|
19
|
-
|
|
20
|
-
|
|
18
|
+
This aggregator captures LLM context frames and holds them until a notifier
|
|
19
|
+
signals that they can be released. This is useful for controlling the flow
|
|
20
|
+
of context frames based on external conditions or timing.
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
23
|
def __init__(self, *, notifier: BaseNotifier, start_open: bool = False, **kwargs):
|
|
@@ -35,7 +35,7 @@ class GatedOpenAILLMContextAggregator(FrameProcessor):
|
|
|
35
35
|
self._gate_task = None
|
|
36
36
|
|
|
37
37
|
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
38
|
-
"""Process incoming frames, gating
|
|
38
|
+
"""Process incoming frames, gating LLM context frames.
|
|
39
39
|
|
|
40
40
|
Args:
|
|
41
41
|
frame: The frame to process.
|
|
@@ -49,7 +49,7 @@ class GatedOpenAILLMContextAggregator(FrameProcessor):
|
|
|
49
49
|
if isinstance(frame, (EndFrame, CancelFrame)):
|
|
50
50
|
await self._stop()
|
|
51
51
|
await self.push_frame(frame)
|
|
52
|
-
elif isinstance(frame, OpenAILLMContextFrame):
|
|
52
|
+
elif isinstance(frame, (LLMContextFrame, OpenAILLMContextFrame)):
|
|
53
53
|
if self._start_open:
|
|
54
54
|
self._start_open = False
|
|
55
55
|
await self.push_frame(frame, direction)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2025, Daily
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""Gated OpenAI LLM context aggregator for controlled message flow."""
|
|
8
|
+
|
|
9
|
+
from pipecat.processors.aggregators.gated_llm_context import GatedLLMContextAggregator
|
|
10
|
+
|
|
11
|
+
# Alias for backward compatibility with the previous name
|
|
12
|
+
GatedOpenAILLMContextAggregator = GatedLLMContextAggregator
|
|
@@ -23,7 +23,6 @@ from pipecat.audio.interruptions.base_interruption_strategy import BaseInterrupt
|
|
|
23
23
|
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
24
24
|
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
25
25
|
from pipecat.frames.frames import (
|
|
26
|
-
BotInterruptionFrame,
|
|
27
26
|
BotStartedSpeakingFrame,
|
|
28
27
|
BotStoppedSpeakingFrame,
|
|
29
28
|
CancelFrame,
|
|
@@ -37,6 +36,7 @@ from pipecat.frames.frames import (
|
|
|
37
36
|
FunctionCallsStartedFrame,
|
|
38
37
|
InputAudioRawFrame,
|
|
39
38
|
InterimTranscriptionFrame,
|
|
39
|
+
InterruptionFrame,
|
|
40
40
|
LLMFullResponseEndFrame,
|
|
41
41
|
LLMFullResponseStartFrame,
|
|
42
42
|
LLMMessagesAppendFrame,
|
|
@@ -49,7 +49,6 @@ from pipecat.frames.frames import (
|
|
|
49
49
|
OpenAILLMContextAssistantTimestampFrame,
|
|
50
50
|
SpeechControlParamsFrame,
|
|
51
51
|
StartFrame,
|
|
52
|
-
StartInterruptionFrame,
|
|
53
52
|
TextFrame,
|
|
54
53
|
TranscriptionFrame,
|
|
55
54
|
UserImageRawFrame,
|
|
@@ -139,7 +138,7 @@ class LLMFullResponseAggregator(FrameProcessor):
|
|
|
139
138
|
"""
|
|
140
139
|
await super().process_frame(frame, direction)
|
|
141
140
|
|
|
142
|
-
if isinstance(frame,
|
|
141
|
+
if isinstance(frame, InterruptionFrame):
|
|
143
142
|
await self._call_event_handler("on_completion", self._aggregation, False)
|
|
144
143
|
self._aggregation = ""
|
|
145
144
|
self._started = False
|
|
@@ -542,9 +541,9 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
|
|
542
541
|
|
|
543
542
|
if should_interrupt:
|
|
544
543
|
self.logger.debug(
|
|
545
|
-
"Interruption conditions met - pushing
|
|
544
|
+
"Interruption conditions met - pushing interruption and aggregation"
|
|
546
545
|
)
|
|
547
|
-
await self.
|
|
546
|
+
await self.push_interruption_task_frame_and_wait()
|
|
548
547
|
await self._process_aggregation()
|
|
549
548
|
else:
|
|
550
549
|
self.logger.debug("Interruption conditions not met - not pushing aggregation")
|
|
@@ -905,7 +904,7 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator):
|
|
|
905
904
|
"""
|
|
906
905
|
await super().process_frame(frame, direction)
|
|
907
906
|
|
|
908
|
-
if isinstance(frame,
|
|
907
|
+
if isinstance(frame, InterruptionFrame):
|
|
909
908
|
await self._handle_interruptions(frame)
|
|
910
909
|
await self.push_frame(frame, direction)
|
|
911
910
|
elif isinstance(frame, LLMFullResponseStartFrame):
|
|
@@ -971,7 +970,7 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator):
|
|
|
971
970
|
if frame.run_llm:
|
|
972
971
|
await self.push_context_frame(FrameDirection.UPSTREAM)
|
|
973
972
|
|
|
974
|
-
async def _handle_interruptions(self, frame:
|
|
973
|
+
async def _handle_interruptions(self, frame: InterruptionFrame):
|
|
975
974
|
await self.push_aggregation()
|
|
976
975
|
self._started = 0
|
|
977
976
|
await self.reset()
|
|
@@ -13,7 +13,7 @@ LLM processing, and text-to-speech components in conversational AI pipelines.
|
|
|
13
13
|
|
|
14
14
|
import asyncio
|
|
15
15
|
import json
|
|
16
|
-
from
|
|
16
|
+
from abc import abstractmethod
|
|
17
17
|
from typing import Any, Dict, List, Literal, Optional, Set
|
|
18
18
|
|
|
19
19
|
from loguru import logger
|
|
@@ -23,7 +23,6 @@ from pipecat.audio.interruptions.base_interruption_strategy import BaseInterrupt
|
|
|
23
23
|
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
24
24
|
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
25
25
|
from pipecat.frames.frames import (
|
|
26
|
-
BotInterruptionFrame,
|
|
27
26
|
BotStartedSpeakingFrame,
|
|
28
27
|
BotStoppedSpeakingFrame,
|
|
29
28
|
CancelFrame,
|
|
@@ -37,6 +36,7 @@ from pipecat.frames.frames import (
|
|
|
37
36
|
FunctionCallsStartedFrame,
|
|
38
37
|
InputAudioRawFrame,
|
|
39
38
|
InterimTranscriptionFrame,
|
|
39
|
+
InterruptionFrame,
|
|
40
40
|
LLMContextAssistantTimestampFrame,
|
|
41
41
|
LLMContextFrame,
|
|
42
42
|
LLMFullResponseEndFrame,
|
|
@@ -48,7 +48,6 @@ from pipecat.frames.frames import (
|
|
|
48
48
|
LLMSetToolsFrame,
|
|
49
49
|
SpeechControlParamsFrame,
|
|
50
50
|
StartFrame,
|
|
51
|
-
StartInterruptionFrame,
|
|
52
51
|
TextFrame,
|
|
53
52
|
TranscriptionFrame,
|
|
54
53
|
UserImageRawFrame,
|
|
@@ -171,6 +170,11 @@ class LLMContextAggregator(FrameProcessor):
|
|
|
171
170
|
"""Reset the aggregation state."""
|
|
172
171
|
self._aggregation = ""
|
|
173
172
|
|
|
173
|
+
@abstractmethod
|
|
174
|
+
async def push_aggregation(self):
|
|
175
|
+
"""Push the current aggregation downstream."""
|
|
176
|
+
pass
|
|
177
|
+
|
|
174
178
|
|
|
175
179
|
class LLMUserAggregator(LLMContextAggregator):
|
|
176
180
|
"""User LLM aggregator that processes speech-to-text transcriptions.
|
|
@@ -303,7 +307,7 @@ class LLMUserAggregator(LLMContextAggregator):
|
|
|
303
307
|
frame = LLMContextFrame(self._context)
|
|
304
308
|
await self.push_frame(frame)
|
|
305
309
|
|
|
306
|
-
async def
|
|
310
|
+
async def push_aggregation(self):
|
|
307
311
|
"""Push the current aggregation based on interruption strategies and conditions."""
|
|
308
312
|
if len(self._aggregation) > 0:
|
|
309
313
|
if self.interruption_strategies and self._bot_speaking:
|
|
@@ -311,9 +315,9 @@ class LLMUserAggregator(LLMContextAggregator):
|
|
|
311
315
|
|
|
312
316
|
if should_interrupt:
|
|
313
317
|
logger.debug(
|
|
314
|
-
"Interruption conditions met - pushing
|
|
318
|
+
"Interruption conditions met - pushing interruption and aggregation"
|
|
315
319
|
)
|
|
316
|
-
await self.
|
|
320
|
+
await self.push_interruption_task_frame_and_wait()
|
|
317
321
|
await self._process_aggregation()
|
|
318
322
|
else:
|
|
319
323
|
logger.debug("Interruption conditions not met - not pushing aggregation")
|
|
@@ -394,7 +398,7 @@ class LLMUserAggregator(LLMContextAggregator):
|
|
|
394
398
|
# pushing the aggregation as we will probably get a final transcription.
|
|
395
399
|
if len(self._aggregation) > 0:
|
|
396
400
|
if not self._seen_interim_results:
|
|
397
|
-
await self.
|
|
401
|
+
await self.push_aggregation()
|
|
398
402
|
# Handles the case where both the user and the bot are not speaking,
|
|
399
403
|
# and the bot was previously speaking before the user interruption.
|
|
400
404
|
# So in this case we are resetting the aggregation timer
|
|
@@ -473,7 +477,7 @@ class LLMUserAggregator(LLMContextAggregator):
|
|
|
473
477
|
await self._maybe_emulate_user_speaking()
|
|
474
478
|
except asyncio.TimeoutError:
|
|
475
479
|
if not self._user_speaking:
|
|
476
|
-
await self.
|
|
480
|
+
await self.push_aggregation()
|
|
477
481
|
|
|
478
482
|
# If we are emulating VAD we still need to send the user stopped
|
|
479
483
|
# speaking frame.
|
|
@@ -579,7 +583,7 @@ class LLMAssistantAggregator(LLMContextAggregator):
|
|
|
579
583
|
"""
|
|
580
584
|
await super().process_frame(frame, direction)
|
|
581
585
|
|
|
582
|
-
if isinstance(frame,
|
|
586
|
+
if isinstance(frame, InterruptionFrame):
|
|
583
587
|
await self._handle_interruptions(frame)
|
|
584
588
|
await self.push_frame(frame, direction)
|
|
585
589
|
elif isinstance(frame, LLMFullResponseStartFrame):
|
|
@@ -609,12 +613,12 @@ class LLMAssistantAggregator(LLMContextAggregator):
|
|
|
609
613
|
elif isinstance(frame, UserImageRawFrame) and frame.request and frame.request.tool_call_id:
|
|
610
614
|
await self._handle_user_image_frame(frame)
|
|
611
615
|
elif isinstance(frame, BotStoppedSpeakingFrame):
|
|
612
|
-
await self.
|
|
616
|
+
await self.push_aggregation()
|
|
613
617
|
await self.push_frame(frame, direction)
|
|
614
618
|
else:
|
|
615
619
|
await self.push_frame(frame, direction)
|
|
616
620
|
|
|
617
|
-
async def
|
|
621
|
+
async def push_aggregation(self):
|
|
618
622
|
"""Push the current assistant aggregation with timestamp."""
|
|
619
623
|
if not self._aggregation:
|
|
620
624
|
return
|
|
@@ -645,8 +649,8 @@ class LLMAssistantAggregator(LLMContextAggregator):
|
|
|
645
649
|
if frame.run_llm:
|
|
646
650
|
await self.push_context_frame(FrameDirection.UPSTREAM)
|
|
647
651
|
|
|
648
|
-
async def _handle_interruptions(self, frame:
|
|
649
|
-
await self.
|
|
652
|
+
async def _handle_interruptions(self, frame: InterruptionFrame):
|
|
653
|
+
await self.push_aggregation()
|
|
650
654
|
self._started = 0
|
|
651
655
|
await self.reset()
|
|
652
656
|
|
|
@@ -780,7 +784,7 @@ class LLMAssistantAggregator(LLMContextAggregator):
|
|
|
780
784
|
text=frame.request.context,
|
|
781
785
|
)
|
|
782
786
|
|
|
783
|
-
await self.
|
|
787
|
+
await self.push_aggregation()
|
|
784
788
|
await self.push_context_frame(FrameDirection.UPSTREAM)
|
|
785
789
|
|
|
786
790
|
async def _handle_llm_start(self, _: LLMFullResponseStartFrame):
|
|
@@ -788,7 +792,7 @@ class LLMAssistantAggregator(LLMContextAggregator):
|
|
|
788
792
|
|
|
789
793
|
async def _handle_llm_end(self, _: LLMFullResponseEndFrame):
|
|
790
794
|
self._started -= 1
|
|
791
|
-
await self.
|
|
795
|
+
await self.push_aggregation()
|
|
792
796
|
|
|
793
797
|
async def _handle_text(self, frame: TextFrame):
|
|
794
798
|
if not self._started:
|
|
@@ -12,14 +12,14 @@ in conversational pipelines.
|
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
14
|
from pipecat.frames.frames import TextFrame
|
|
15
|
-
from pipecat.processors.aggregators.
|
|
16
|
-
from pipecat.processors.aggregators.
|
|
15
|
+
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
16
|
+
from pipecat.processors.aggregators.llm_response_universal import LLMUserAggregator
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
class UserResponseAggregator(
|
|
19
|
+
class UserResponseAggregator(LLMUserAggregator):
|
|
20
20
|
"""Aggregates user responses into TextFrame objects.
|
|
21
21
|
|
|
22
|
-
This aggregator extends
|
|
22
|
+
This aggregator extends LLMUserAggregator to specifically handle
|
|
23
23
|
user input by collecting text responses and outputting them as TextFrame
|
|
24
24
|
objects when the aggregation is complete.
|
|
25
25
|
"""
|
|
@@ -28,9 +28,9 @@ class UserResponseAggregator(LLMUserContextAggregator):
|
|
|
28
28
|
"""Initialize the user response aggregator.
|
|
29
29
|
|
|
30
30
|
Args:
|
|
31
|
-
**kwargs: Additional arguments passed to parent
|
|
31
|
+
**kwargs: Additional arguments passed to parent LLMUserAggregator.
|
|
32
32
|
"""
|
|
33
|
-
super().__init__(context=
|
|
33
|
+
super().__init__(context=LLMContext(), **kwargs)
|
|
34
34
|
|
|
35
35
|
async def push_aggregation(self):
|
|
36
36
|
"""Push the aggregated user response as a TextFrame.
|