PyPI - dv-pipecat-ai - Versions diffs - 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.82.dev857py3-none-any.whl → 0.0.85.dev837py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (195) hide show

{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
pipecat/adapters/base_llm_adapter.py +38 -1
pipecat/adapters/services/anthropic_adapter.py +9 -14
pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
pipecat/adapters/services/bedrock_adapter.py +236 -13
pipecat/adapters/services/gemini_adapter.py +12 -8
pipecat/adapters/services/open_ai_adapter.py +19 -7
pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
pipecat/audio/dtmf/dtmf-0.wav +0 -0
pipecat/audio/dtmf/dtmf-1.wav +0 -0
pipecat/audio/dtmf/dtmf-2.wav +0 -0
pipecat/audio/dtmf/dtmf-3.wav +0 -0
pipecat/audio/dtmf/dtmf-4.wav +0 -0
pipecat/audio/dtmf/dtmf-5.wav +0 -0
pipecat/audio/dtmf/dtmf-6.wav +0 -0
pipecat/audio/dtmf/dtmf-7.wav +0 -0
pipecat/audio/dtmf/dtmf-8.wav +0 -0
pipecat/audio/dtmf/dtmf-9.wav +0 -0
pipecat/audio/dtmf/dtmf-pound.wav +0 -0
pipecat/audio/dtmf/dtmf-star.wav +0 -0
pipecat/audio/filters/krisp_viva_filter.py +193 -0
pipecat/audio/filters/noisereduce_filter.py +15 -0
pipecat/audio/turn/base_turn_analyzer.py +9 -1
pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
pipecat/audio/vad/data/README.md +10 -0
pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
pipecat/audio/vad/silero.py +9 -3
pipecat/audio/vad/vad_analyzer.py +13 -1
pipecat/extensions/voicemail/voicemail_detector.py +5 -5
pipecat/frames/frames.py +277 -86
pipecat/observers/loggers/debug_log_observer.py +3 -3
pipecat/observers/loggers/llm_log_observer.py +7 -3
pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
pipecat/pipeline/runner.py +18 -6
pipecat/pipeline/service_switcher.py +64 -36
pipecat/pipeline/task.py +125 -79
pipecat/pipeline/tts_switcher.py +30 -0
pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
pipecat/processors/aggregators/llm_context.py +40 -2
pipecat/processors/aggregators/llm_response.py +32 -15
pipecat/processors/aggregators/llm_response_universal.py +19 -15
pipecat/processors/aggregators/user_response.py +6 -6
pipecat/processors/aggregators/vision_image_frame.py +24 -2
pipecat/processors/audio/audio_buffer_processor.py +43 -8
pipecat/processors/dtmf_aggregator.py +174 -77
pipecat/processors/filters/stt_mute_filter.py +17 -0
pipecat/processors/frame_processor.py +110 -24
pipecat/processors/frameworks/langchain.py +8 -2
pipecat/processors/frameworks/rtvi.py +210 -68
pipecat/processors/frameworks/strands_agents.py +170 -0
pipecat/processors/logger.py +2 -2
pipecat/processors/transcript_processor.py +26 -5
pipecat/processors/user_idle_processor.py +35 -11
pipecat/runner/daily.py +59 -20
pipecat/runner/run.py +395 -93
pipecat/runner/types.py +6 -4
pipecat/runner/utils.py +51 -10
pipecat/serializers/__init__.py +5 -1
pipecat/serializers/asterisk.py +16 -2
pipecat/serializers/convox.py +41 -4
pipecat/serializers/custom.py +257 -0
pipecat/serializers/exotel.py +5 -5
pipecat/serializers/livekit.py +20 -0
pipecat/serializers/plivo.py +5 -5
pipecat/serializers/protobuf.py +6 -5
pipecat/serializers/telnyx.py +2 -2
pipecat/serializers/twilio.py +43 -23
pipecat/serializers/vi.py +324 -0
pipecat/services/ai_service.py +2 -6
pipecat/services/anthropic/llm.py +2 -25
pipecat/services/assemblyai/models.py +6 -0
pipecat/services/assemblyai/stt.py +13 -5
pipecat/services/asyncai/tts.py +5 -3
pipecat/services/aws/__init__.py +1 -0
pipecat/services/aws/llm.py +147 -105
pipecat/services/aws/nova_sonic/__init__.py +0 -0
pipecat/services/aws/nova_sonic/context.py +436 -0
pipecat/services/aws/nova_sonic/frames.py +25 -0
pipecat/services/aws/nova_sonic/llm.py +1265 -0
pipecat/services/aws/stt.py +3 -3
pipecat/services/aws_nova_sonic/__init__.py +19 -1
pipecat/services/aws_nova_sonic/aws.py +11 -1151
pipecat/services/aws_nova_sonic/context.py +8 -354
pipecat/services/aws_nova_sonic/frames.py +13 -17
pipecat/services/azure/llm.py +51 -1
pipecat/services/azure/realtime/__init__.py +0 -0
pipecat/services/azure/realtime/llm.py +65 -0
pipecat/services/azure/stt.py +15 -0
pipecat/services/cartesia/stt.py +77 -70
pipecat/services/cartesia/tts.py +80 -13
pipecat/services/deepgram/__init__.py +1 -0
pipecat/services/deepgram/flux/__init__.py +0 -0
pipecat/services/deepgram/flux/stt.py +640 -0
pipecat/services/elevenlabs/__init__.py +4 -1
pipecat/services/elevenlabs/stt.py +339 -0
pipecat/services/elevenlabs/tts.py +87 -46
pipecat/services/fish/tts.py +5 -2
pipecat/services/gemini_multimodal_live/events.py +38 -524
pipecat/services/gemini_multimodal_live/file_api.py +23 -173
pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
pipecat/services/gladia/stt.py +56 -72
pipecat/services/google/__init__.py +1 -0
pipecat/services/google/gemini_live/__init__.py +3 -0
pipecat/services/google/gemini_live/file_api.py +189 -0
pipecat/services/google/gemini_live/llm.py +1582 -0
pipecat/services/google/gemini_live/llm_vertex.py +184 -0
pipecat/services/google/llm.py +15 -11
pipecat/services/google/llm_openai.py +3 -3
pipecat/services/google/llm_vertex.py +86 -16
pipecat/services/google/stt.py +4 -0
pipecat/services/google/tts.py +7 -3
pipecat/services/heygen/api.py +2 -0
pipecat/services/heygen/client.py +8 -4
pipecat/services/heygen/video.py +2 -0
pipecat/services/hume/__init__.py +5 -0
pipecat/services/hume/tts.py +220 -0
pipecat/services/inworld/tts.py +6 -6
pipecat/services/llm_service.py +15 -5
pipecat/services/lmnt/tts.py +4 -2
pipecat/services/mcp_service.py +4 -2
pipecat/services/mem0/memory.py +6 -5
pipecat/services/mistral/llm.py +29 -8
pipecat/services/moondream/vision.py +42 -16
pipecat/services/neuphonic/tts.py +5 -2
pipecat/services/openai/__init__.py +1 -0
pipecat/services/openai/base_llm.py +27 -20
pipecat/services/openai/realtime/__init__.py +0 -0
pipecat/services/openai/realtime/context.py +272 -0
pipecat/services/openai/realtime/events.py +1106 -0
pipecat/services/openai/realtime/frames.py +37 -0
pipecat/services/openai/realtime/llm.py +829 -0
pipecat/services/openai/tts.py +49 -10
pipecat/services/openai_realtime/__init__.py +27 -0
pipecat/services/openai_realtime/azure.py +21 -0
pipecat/services/openai_realtime/context.py +21 -0
pipecat/services/openai_realtime/events.py +21 -0
pipecat/services/openai_realtime/frames.py +21 -0
pipecat/services/openai_realtime_beta/azure.py +16 -0
pipecat/services/openai_realtime_beta/openai.py +17 -5
pipecat/services/piper/tts.py +7 -9
pipecat/services/playht/tts.py +34 -4
pipecat/services/rime/tts.py +12 -12
pipecat/services/riva/stt.py +3 -1
pipecat/services/salesforce/__init__.py +9 -0
pipecat/services/salesforce/llm.py +700 -0
pipecat/services/sarvam/__init__.py +7 -0
pipecat/services/sarvam/stt.py +540 -0
pipecat/services/sarvam/tts.py +97 -13
pipecat/services/simli/video.py +2 -2
pipecat/services/speechmatics/stt.py +22 -10
pipecat/services/stt_service.py +47 -0
pipecat/services/tavus/video.py +2 -2
pipecat/services/tts_service.py +75 -22
pipecat/services/vision_service.py +7 -6
pipecat/services/vistaar/llm.py +51 -9
pipecat/tests/utils.py +4 -4
pipecat/transcriptions/language.py +41 -1
pipecat/transports/base_input.py +13 -34
pipecat/transports/base_output.py +140 -104
pipecat/transports/daily/transport.py +199 -26
pipecat/transports/heygen/__init__.py +0 -0
pipecat/transports/heygen/transport.py +381 -0
pipecat/transports/livekit/transport.py +228 -63
pipecat/transports/local/audio.py +6 -1
pipecat/transports/local/tk.py +11 -2
pipecat/transports/network/fastapi_websocket.py +1 -1
pipecat/transports/smallwebrtc/connection.py +103 -19
pipecat/transports/smallwebrtc/request_handler.py +246 -0
pipecat/transports/smallwebrtc/transport.py +65 -23
pipecat/transports/tavus/transport.py +23 -12
pipecat/transports/websocket/client.py +41 -5
pipecat/transports/websocket/fastapi.py +21 -11
pipecat/transports/websocket/server.py +14 -7
pipecat/transports/whatsapp/api.py +8 -0
pipecat/transports/whatsapp/client.py +47 -0
pipecat/utils/base_object.py +54 -22
pipecat/utils/redis.py +58 -0
pipecat/utils/string.py +13 -1
pipecat/utils/tracing/service_decorators.py +21 -21
pipecat/serializers/genesys.py +0 -95
pipecat/services/google/test-google-chirp.py +0 -45
pipecat/services/openai.py +0 -698
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
/pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0

pipecat/processors/dtmf_aggregator.py CHANGED Viewed

@@ -1,17 +1,18 @@
+"""DTMF aggregator processor for collecting and flushing DTMF input digits."""
 import asyncio
 from pipecat.frames.frames import (
     BotSpeakingFrame,
     CancelFrame,
+    DTMFUpdateSettingsFrame,
+    EndDTMFCaptureFrame,
     EndFrame,
     Frame,
     InputDTMFFrame,
-    StartInterruptionFrame,
-    StartUserIdleProcessorFrame,
-    StopUserIdleProcessorFrame,
+    InterruptionFrame,
+    StartDTMFCaptureFrame,
     TranscriptionFrame,
-    UserStartedSpeakingFrame,
-    UserStoppedSpeakingFrame,
     WaitForDTMFFrame,
 )
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
@@ -20,10 +21,11 @@ from pipecat.utils.time import time_now_iso8601
 class DTMFAggregator(FrameProcessor):
     """Aggregates DTMF frames using idle wait logic.
     The aggregator accumulates digits from incoming InputDTMFFrame instances.
     It flushes the aggregated digits by emitting a TranscriptionFrame when:
       - No new digit arrives within the specified timeout period,
-      - The termination digit (“#”) is received, or
+      - The termination digit ("#") is received, or
       - The number of digits aggregated equals the configured 'digits' value.
     """
@@ -35,7 +37,9 @@ class DTMFAggregator(FrameProcessor):
         digits: int = None,
         **kwargs,
     ):
-        """:param timeout: Idle timeout in seconds before flushing the aggregated digits.
+        """Initialize the DTMF aggregator.
+        :param timeout: Idle timeout in seconds before flushing the aggregated digits.
         :param digits: Number of digits to aggregate before flushing.
         """
         super().__init__(**kwargs)
@@ -43,112 +47,205 @@ class DTMFAggregator(FrameProcessor):
         self._idle_timeout = timeout
         self._digits = digits
         self._digit_event = asyncio.Event()
-        self._digit_aggregate_task = None
+        self._aggregation_task = None
         self._end_on = end_on if end_on else set()
         self._reset_on = reset_on if reset_on else set()
-        self._stopped_idle_processor = False
-    async def _start_idle_processor(self):
-        await self.push_frame(StartUserIdleProcessorFrame(), FrameDirection.UPSTREAM)
-        self._stopped_idle_processor = False
-    async def _stop_idle_processor(self):
-        await self.push_frame(StopUserIdleProcessorFrame(), FrameDirection.UPSTREAM)
-        self._stopped_idle_processor = True
+        self._dtmf_capture_active = False
     async def process_frame(self, frame: Frame, direction: FrameDirection) -> None:
+        """Process incoming frames and handle DTMF input aggregation."""
         # Handle DTMF frames.
         await super().process_frame(frame, direction)
-        await self.push_frame(frame, direction)
-        if isinstance(frame, InputDTMFFrame):
-            # Start the digit aggregation task if it's not running yet.
-            if self._digit_aggregate_task is None:
-                self._digit_aggregate_task = self.create_task(self._digit_agg_handler(direction))
-            # Append the incoming digit.
-            if frame.button.value in self._reset_on:
-                self._aggregation = ""
-            elif frame.button.value in self._end_on:
-                await self.flush_aggregation(direction)
-                self._aggregation = ""
-            else:
-                self._digit_event.set()
-                self._aggregation += frame.button.value
-                # Flush if the aggregated digits reach the specified length.
-                if self._digits and len(self._aggregation) == self._digits:
-                    await self.flush_aggregation(direction)
-                    self._aggregation = ""
-            if self._stopped_idle_processor:
-                await self._start_idle_processor()
+        if isinstance(frame, InputDTMFFrame):
+            # Push the DTMF frame downstream first
+            await self.push_frame(frame, direction)
+            # Then handle it for proper frame ordering
+            await self._handle_dtmf_frame(frame)
         elif isinstance(frame, (EndFrame, CancelFrame)):
             # For EndFrame, flush any pending aggregation and stop the digit aggregation task.
             if self._aggregation:
-                await self.flush_aggregation(direction)
-            if self._digit_aggregate_task:
-                await self._stop_digit_aggregate_task()
+                await self.flush_aggregation()
+            if self._aggregation_task:
+                await self._stop_aggregation_task()
+            await self.push_frame(frame, direction)
         elif isinstance(frame, WaitForDTMFFrame):
             self.logger.debug("Received WaitForDTMFFrame: Waiting for DTMF input")
-            if self._digit_aggregate_task is None:
-                self._digit_aggregate_task = self.create_task(
-                    self._digit_agg_handler(direction, raise_timeout=True)
-                )
-                self._digit_event.set()
-            await self._stop_idle_processor()
-        elif isinstance(frame, StartInterruptionFrame):
-            self.logger.debug("Received StartInterruptionFrame: Starting idle processor")
-            if self._stopped_idle_processor:
-                await self._start_idle_processor()
+            self._create_aggregation_task(raise_timeout=True)
+            self._digit_event.set()  # Trigger the timeout handler
+            await self._start_dtmf_capture()
+            await self.push_frame(frame, direction)
+        elif isinstance(frame, InterruptionFrame):
+            self.logger.debug("Received InterruptionFrame")
             if self._aggregation:
-                await self.flush_aggregation(direction)
+                await self.flush_aggregation()
+            await self._end_dtmf_capture()
+            await self.push_frame(frame, direction)
         elif isinstance(frame, BotSpeakingFrame):
-            if self._digit_aggregate_task is not None:
+            # Signal the aggregation task to continue when bot speaks
+            if self._aggregation_task is not None:
                 self._digit_event.set()
+            await self.push_frame(frame, direction)
+        elif isinstance(frame, DTMFUpdateSettingsFrame):
+            await self._update_settings(frame.settings)
+            # Don't pass the settings frame downstream
+        else:
+            # Pass all other frames through
+            await self.push_frame(frame, direction)
+    async def _update_settings(self, settings: dict) -> None:
+        """Update DTMF aggregator settings dynamically.
-    async def _digit_agg_handler(self, direction: FrameDirection, raise_timeout=False):
-        """Idle task that waits for new DTMF activity. If no new digit is received within
-        the timeout period, the current aggregation is flushed.
+        Args:
+            settings: Dictionary containing new DTMF settings
+                     Supported keys: timeout, digits, end, reset
         """
+        settings_changed = False
+        if "timeout" in settings and settings["timeout"] is not None:
+            new_timeout = float(settings["timeout"])
+            if new_timeout != self._idle_timeout:
+                self.logger.debug(
+                    f"Updating DTMF timeout from {self._idle_timeout} to {new_timeout}"
+                )
+                self._idle_timeout = new_timeout
+                settings_changed = True
+        if "digits" in settings:
+            new_digits = settings["digits"]
+            if new_digits != self._digits:
+                self.logger.debug(f"Updating DTMF digits from {self._digits} to {new_digits}")
+                self._digits = new_digits
+                settings_changed = True
+        if "end" in settings:
+            # Convert single string to set if needed
+            end_value = settings["end"]
+            if end_value is None:
+                new_end_on = set()
+            elif isinstance(end_value, str):
+                new_end_on = {end_value} if end_value else set()
+            else:
+                new_end_on = set(end_value)
+            if new_end_on != self._end_on:
+                self.logger.debug(f"Updating DTMF end_on from {self._end_on} to {new_end_on}")
+                self._end_on = new_end_on
+                settings_changed = True
+        if "reset" in settings:
+            # Convert single string to set if needed
+            reset_value = settings["reset"]
+            if reset_value is None:
+                new_reset_on = set()
+            elif isinstance(reset_value, str):
+                new_reset_on = {reset_value} if reset_value else set()
+            else:
+                new_reset_on = set(reset_value)
+            if new_reset_on != self._reset_on:
+                self.logger.debug(f"Updating DTMF reset_on from {self._reset_on} to {new_reset_on}")
+                self._reset_on = new_reset_on
+                settings_changed = True
+        if settings_changed:
+            self.logger.info(f"DTMF settings updated successfully")
+    async def _handle_dtmf_frame(self, frame: InputDTMFFrame):
+        """Handle DTMF input frame processing."""
+        # Create aggregation task if needed
+        if self._aggregation_task is None:
+            self._create_aggregation_task()
+        digit_value = frame.button.value
+        # Handle reset digits
+        if digit_value in self._reset_on:
+            self._aggregation = ""
+            return
+        # Handle end digits
+        if digit_value in self._end_on:
+            if self._aggregation:  # Only flush if we have aggregation
+                await self.flush_aggregation()
+            return
+        # Add digit to aggregation
+        self._aggregation += digit_value
+        # Signal the aggregation task that a digit was received
+        self._digit_event.set()
+        # Check if we reached the digit limit
+        if self._digits and len(self._aggregation) == self._digits:
+            await self.flush_aggregation()
+    def _create_aggregation_task(self, raise_timeout: bool = False) -> None:
+        """Creates the aggregation task if it hasn't been created yet."""
+        if not self._aggregation_task:
+            self._aggregation_task = self.create_task(self._aggregation_task_handler(raise_timeout))
+    async def _stop_aggregation_task(self) -> None:
+        """Stops the aggregation task."""
+        if self._aggregation_task:
+            await self.cancel_task(self._aggregation_task)
+            self._aggregation_task = None
+    async def _aggregation_task_handler(self, raise_timeout=False):
+        """Background task that handles timeout-based flushing."""
         while True:
             try:
                 # Wait for a new digit signal with a timeout.
                 await asyncio.wait_for(self._digit_event.wait(), timeout=self._idle_timeout)
-            except asyncio.TimeoutError:
-                # No new digit arrived within the timeout period; flush aggregation if non-empty.
-                await self.flush_aggregation(direction, raise_timeout)
-            finally:
-                # Clear the event for the next cycle.
                 self._digit_event.clear()
+            except asyncio.TimeoutError:
+                # No new digit arrived within the timeout period; flush if needed
+                await self.flush_aggregation(raise_timeout=raise_timeout)
-    async def flush_aggregation(self, direction: FrameDirection, raise_timeout=False):
+    async def flush_aggregation(self, *, raise_timeout: bool = False):
         """Flush the aggregated digits by emitting a TranscriptionFrame downstream."""
         if self._aggregation:
-            # Todo: Change to different frame type if we decide to handle it in llm processor separately.
+            # Create transcription frame
             aggregated_frame = TranscriptionFrame(
                 f"User inputted: {self._aggregation}.", "", time_now_iso8601()
             )
             aggregated_frame.metadata["push_aggregation"] = True
-            await self.push_frame(StartInterruptionFrame())
-            await self.push_frame(aggregated_frame, direction)
+            # Send interruption frame (as per original design)
+            await self.push_frame(InterruptionFrame(), FrameDirection.DOWNSTREAM)
+            # Push the transcription frame
+            await self.push_frame(aggregated_frame, FrameDirection.DOWNSTREAM)
+            # Reset state
             self._aggregation = ""
-        elif raise_timeout and self._stopped_idle_processor:
+            await self._end_dtmf_capture()
+        elif raise_timeout and not self._aggregation:
+            # Timeout with no aggregation (WaitForDTMFFrame case)
             transcript_frame = TranscriptionFrame(
                 "User didn't press any digits on the keyboard.", "", time_now_iso8601()
             )
             transcript_frame.metadata["push_aggregation"] = True
-            await self.push_frame(transcript_frame)
-            if self._stopped_idle_processor:
-                await self._start_idle_processor()
+            await self.push_frame(transcript_frame, FrameDirection.DOWNSTREAM)
+            await self._end_dtmf_capture()
+    async def _start_dtmf_capture(self):
+        """Signal the start of DTMF capture upstream."""
+        if self._dtmf_capture_active:
+            return
+        await self.push_frame(StartDTMFCaptureFrame(), FrameDirection.UPSTREAM)
+        self._dtmf_capture_active = True
-    async def _stop_digit_aggregate_task(self):
-        """Cancels the digit aggregation task if it exists."""
-        if self._digit_aggregate_task:
-            await self.cancel_task(self._digit_aggregate_task)
-            self._digit_aggregate_task = None
+    async def _end_dtmf_capture(self):
+        """Signal the end of DTMF capture upstream."""
+        if not self._dtmf_capture_active:
+            return
+        await self.push_frame(EndDTMFCaptureFrame(), FrameDirection.UPSTREAM)
+        self._dtmf_capture_active = False
     async def cleanup(self) -> None:
         """Cleans up resources, ensuring that the digit aggregation task is cancelled."""
         await super().cleanup()
-        if self._digit_aggregate_task:
-            await self._stop_digit_aggregate_task()
+        if self._aggregation_task:
+            await self._stop_aggregation_task()

pipecat/processors/filters/stt_mute_filter.py CHANGED Viewed

@@ -25,14 +25,17 @@ from pipecat.frames.frames import (
     FunctionCallResultFrame,
     InputAudioRawFrame,
     InterimTranscriptionFrame,
+    InterruptionFrame,
     StartFrame,
     StartInterruptionFrame,
+    StartDTMFCaptureFrame,
     STTMuteFrame,
     TranscriptionFrame,
     UserStartedSpeakingFrame,
     UserStoppedSpeakingFrame,
     VADUserStartedSpeakingFrame,
     VADUserStoppedSpeakingFrame,
+    EndDTMFCaptureFrame,
 )
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
@@ -58,6 +61,7 @@ class STTMuteStrategy(Enum):
     FUNCTION_CALL = "function_call"
     ALWAYS = "always"
     CUSTOM = "custom"
+    DTMF_CAPTURE = "dtmf_capture"
 @dataclass
@@ -120,6 +124,7 @@ class STTMuteFilter(FrameProcessor):
         self._function_call_in_progress = False
         self._is_muted = False  # Initialize as unmuted, will set state on StartFrame if needed
         self._voicemail_detection_enabled = False  # Default to False
+        self._dtmf_capture_active = False
     @property
     def is_muted(self) -> bool:
@@ -165,6 +170,10 @@ class STTMuteFilter(FrameProcessor):
                         if should_mute:
                             return True
+                case STTMuteStrategy.DTMF_CAPTURE:
+                    if self._dtmf_capture_active:
+                        return True
         return False
     async def process_frame(self, frame: Frame, direction: FrameDirection):
@@ -205,12 +214,20 @@ class STTMuteFilter(FrameProcessor):
                 self._first_speech_handled = True
             should_mute = await self._should_mute()
             self.logger.debug(f"BotStoppedSpeaking: should mute={should_mute}")
+        elif isinstance(frame, StartDTMFCaptureFrame):
+            self._dtmf_capture_active = True
+            should_mute = await self._should_mute()
+        elif isinstance(frame, EndDTMFCaptureFrame):
+            self._dtmf_capture_active = False
+            should_mute = await self._should_mute()
         elif isinstance(frame, STTMuteFrame):
+            # TODO: Duplication of frame is actually happening. We get this frame from the downstream and then we again push it downstream. Also we're psuhing is upstream  and again push it upstream in _handle_mute_state.
             should_mute = frame.mute
         # Then push the original frame
         # Conditionally include InputAudioRawFrame in suppression tuple based on voicemail_detection_enabled
         suppression_types = (
+            InterruptionFrame,
             StartInterruptionFrame,
             VADUserStartedSpeakingFrame,
             VADUserStoppedSpeakingFrame,

pipecat/processors/frame_processor.py CHANGED Viewed

@@ -29,8 +29,9 @@ from pipecat.frames.frames import (
     FrameProcessorPauseUrgentFrame,
     FrameProcessorResumeFrame,
     FrameProcessorResumeUrgentFrame,
+    InterruptionFrame,
+    InterruptionTaskFrame,
     StartFrame,
-    StartInterruptionFrame,
     SystemFrame,
 )
 from pipecat.metrics.metrics import LLMTokenUsage, MetricsData
@@ -141,6 +142,12 @@ class FrameProcessor(BaseObject):
     task. System frames are also processed in a separate task which guarantees
     frame priority.
+    Event handlers available:
+    - on_before_process_frame: Called before a frame is processed
+    - on_after_process_frame: Called after a frame is processed
+    - on_before_push_frame: Called before a frame is pushed
+    - on_after_push_frame: Called after a frame is pushed
     """
     def __init__(
@@ -221,6 +228,20 @@ class FrameProcessor(BaseObject):
         self.__process_frame_task: Optional[asyncio.Task] = None
         self.logger = logger  # Will later be replaced with a bound logger
+        # To interrupt a pipeline, we push an `InterruptionTaskFrame` upstream.
+        # Then we wait for the corresponding `InterruptionFrame` to travel from
+        # the start of the pipeline back to the processor that sent the
+        # `InterruptionTaskFrame`. This wait is handled using the following
+        # event.
+        self._wait_for_interruption = False
+        self._wait_interruption_event = asyncio.Event()
+        # Frame processor events.
+        self._register_event_handler("on_before_process_frame", sync=True)
+        self._register_event_handler("on_after_process_frame", sync=True)
+        self._register_event_handler("on_before_push_frame", sync=True)
+        self._register_event_handler("on_after_push_frame", sync=True)
     @property
     def id(self) -> int:
         """Get the unique identifier for this processor.
@@ -436,9 +457,13 @@ class FrameProcessor(BaseObject):
             name = f"{self}::{coroutine.cr_code.co_name}"
         return self.task_manager.create_task(coroutine, name)
-    async def cancel_task(self, task: asyncio.Task, timeout: Optional[float] = None):
+    async def cancel_task(self, task: asyncio.Task, timeout: Optional[float] = 1.0):
         """Cancel a task managed by this processor.
+        A default timeout if 1 second is used in order to avoid potential
+        freezes caused by certain libraries that swallow
+        `asyncio.CancelledError`.
         Args:
             task: The task to cancel.
             timeout: Optional timeout for task cancellation.
@@ -544,6 +569,14 @@ class FrameProcessor(BaseObject):
         if self._cancelling:
             return
+        # If we are waiting for an interruption we will bypass all queued system
+        # frames and we will process the frame right away. This is because a
+        # previous system frame might be waiting for the interruption frame and
+        # it's blocking the input task.
+        if self._wait_for_interruption and isinstance(frame, InterruptionFrame):
+            await self.__process_frame(frame, direction, callback)
+            return
         if self._enable_direct_mode:
             await self.__process_frame(frame, direction, callback)
         else:
@@ -553,11 +586,15 @@ class FrameProcessor(BaseObject):
         """Pause processing of queued frames."""
         self.logger.trace(f"{self}: pausing frame processing")
         self.__should_block_frames = True
+        if self.__process_event:
+            self.__process_event.clear()
     async def pause_processing_system_frames(self):
         """Pause processing of queued system frames."""
-        logger.trace(f"{self}: pausing system frame processing")
+        self.logger.trace(f"{self}: pausing system frame processing")
         self.__should_block_system_frames = True
+        if self.__input_event:
+            self.__input_event.clear()
     async def resume_processing_frames(self):
         """Resume processing of queued frames."""
@@ -590,7 +627,7 @@ class FrameProcessor(BaseObject):
         if isinstance(frame, StartFrame):
             await self.__start(frame)
-        elif isinstance(frame, StartInterruptionFrame):
+        elif isinstance(frame, InterruptionFrame):
             await self._start_interruption()
             await self.stop_all_metrics()
         elif isinstance(frame, CancelFrame):
@@ -620,8 +657,40 @@ class FrameProcessor(BaseObject):
         if not self._check_started(frame):
             return
+        await self._call_event_handler("on_before_push_frame", frame)
         await self.__internal_push_frame(frame, direction)
+        await self._call_event_handler("on_after_push_frame", frame)
+        # If we are waiting for an interruption and we get an interruption, then
+        # we can unblock `push_interruption_task_frame_and_wait()`.
+        if self._wait_for_interruption and isinstance(frame, InterruptionFrame):
+            self._wait_interruption_event.set()
+    async def push_interruption_task_frame_and_wait(self):
+        """Push an interruption task frame upstream and wait for the interruption.
+        This function sends an `InterruptionTaskFrame` upstream to the pipeline
+        task and waits to receive the corresponding `InterruptionFrame`. When
+        the function finishes it is guaranteed that the `InterruptionFrame` has
+        been pushed downstream.
+        """
+        self._wait_for_interruption = True
+        await self.push_frame(InterruptionTaskFrame(), FrameDirection.UPSTREAM)
+        # Wait for an `InterruptionFrame` to come to this processor and be
+        # pushed. Take a look at `push_frame()` to see how we first push the
+        # `InterruptionFrame` and then we set the event in order to maintain
+        # frame ordering.
+        await self._wait_interruption_event.wait()
+        # Clean the event.
+        self._wait_interruption_event.clear()
+        self._wait_for_interruption = False
     async def __start(self, frame: StartFrame):
         """Handle the start frame to initialize processor state.
@@ -674,22 +743,24 @@ class FrameProcessor(BaseObject):
     async def _start_interruption(self):
         """Start handling an interruption by cancelling current tasks."""
         try:
-            # Cancel the process task. This will stop processing queued frames.
-            await self.__cancel_process_task()
+            if self._wait_for_interruption:
+                # If we get here we know the process task was just waiting for
+                # an interruption (push_interruption_task_frame_and_wait()), so
+                # we can't cancel the task because it might still need to do
+                # more things (e.g. pushing a frame after the
+                # interruption). Instead we just drain the queue because this is
+                # an interruption.
+                self.__reset_process_task()
+            else:
+                # Cancel and re-create the process task including the queue.
+                await self.__cancel_process_task()
+                self.__create_process_task()
         except Exception as e:
             self.logger.exception(
                 f"Uncaught exception in {self} when handling _start_interruption: {e}"
             )
             await self.push_error(ErrorFrame(str(e)))
-        # Create a new process queue and task.
-        self.__create_process_task()
-    async def _stop_interruption(self):
-        """Stop handling an interruption."""
-        # Nothing to do right now.
-        pass
     async def __internal_push_frame(self, frame: Frame, direction: FrameDirection):
         """Internal method to push frames to adjacent processors.
@@ -741,7 +812,7 @@ class FrameProcessor(BaseObject):
             True if the processor has been started.
         """
         if not self.__started:
-            logger.error(f"{self} Trying to process {frame} but StartFrame not received yet")
+            self.logger.error(f"{self} Trying to process {frame} but StartFrame not received yet")
         return self.__started
     def __create_input_task(self):
@@ -774,6 +845,17 @@ class FrameProcessor(BaseObject):
             self.__process_queue = asyncio.Queue()
             self.__process_frame_task = self.create_task(self.__process_frame_task_handler())
+    def __reset_process_task(self):
+        """Reset non-system frame processing task."""
+        if self._enable_direct_mode:
+            return
+        self.__should_block_frames = False
+        self.__process_event = asyncio.Event()
+        while not self.__process_queue.empty():
+            self.__process_queue.get_nowait()
+            self.__process_queue.task_done()
     async def __cancel_process_task(self):
         """Cancel the non-system frame processing task."""
         if self.__process_frame_task:
@@ -784,13 +866,17 @@ class FrameProcessor(BaseObject):
         self, frame: Frame, direction: FrameDirection, callback: Optional[FrameCallback]
     ):
         try:
+            await self._call_event_handler("on_before_process_frame", frame)
             # Process the frame.
             await self.process_frame(frame, direction)
             # If this frame has an associated callback, call it now.
             if callback:
                 await callback(self, frame, direction)
+            await self._call_event_handler("on_after_process_frame", frame)
         except Exception as e:
-            logger.exception(f"{self}: error processing frame: {e}")
+            self.logger.exception(f"{self}: error processing frame: {e}")
             await self.push_error(ErrorFrame(str(e)))
     async def __input_frame_task_handler(self):
@@ -801,14 +887,14 @@ class FrameProcessor(BaseObject):
         """
         while True:
+            (frame, direction, callback) = await self.__input_queue.get()
             if self.__should_block_system_frames and self.__input_event:
-                logger.trace(f"{self}: system frame processing paused")
+                self.logger.trace(f"{self}: system frame processing paused")
                 await self.__input_event.wait()
                 self.__input_event.clear()
                 self.__should_block_system_frames = False
-                logger.trace(f"{self}: system frame processing resumed")
-            (frame, direction, callback) = await self.__input_queue.get()
+                self.logger.trace(f"{self}: system frame processing resumed")
             if isinstance(frame, SystemFrame):
                 await self.__process_frame(frame, direction, callback)
@@ -824,14 +910,14 @@ class FrameProcessor(BaseObject):
     async def __process_frame_task_handler(self):
         """Handle non-system frames from the process queue."""
         while True:
+            (frame, direction, callback) = await self.__process_queue.get()
             if self.__should_block_frames and self.__process_event:
-                logger.trace(f"{self}: frame processing paused")
+                self.logger.trace(f"{self}: frame processing paused")
                 await self.__process_event.wait()
                 self.__process_event.clear()
                 self.__should_block_frames = False
-                logger.trace(f"{self}: frame processing resumed")
-            (frame, direction, callback) = await self.__process_queue.get()
+                self.logger.trace(f"{self}: frame processing resumed")
             await self.__process_frame(frame, direction, callback)

pipecat/processors/frameworks/langchain.py CHANGED Viewed

@@ -12,6 +12,7 @@ from loguru import logger
 from pipecat.frames.frames import (
     Frame,
+    LLMContextFrame,
     LLMFullResponseEndFrame,
     LLMFullResponseStartFrame,
     TextFrame,
@@ -64,11 +65,16 @@ class LangchainProcessor(FrameProcessor):
         """
         await super().process_frame(frame, direction)
-        if isinstance(frame, OpenAILLMContextFrame):
+        if isinstance(frame, (LLMContextFrame, OpenAILLMContextFrame)):
             # Messages are accumulated on the context as a list of messages.
             # The last one by the human is the one we want to send to the LLM.
             logger.debug(f"Got transcription frame {frame}")
-            text: str = frame.context.messages[-1]["content"]
+            messages = (
+                frame.context.messages
+                if isinstance(frame, OpenAILLMContextFrame)
+                else frame.context.get_messages()
+            )
+            text: str = messages[-1]["content"]
             await self._ainvoke(text.strip())
         else:

dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.82.dev857py3-none-any.whl → 0.0.85.dev837py3-none-any.whl