PyPI - dv-pipecat-ai - Versions diffs - 0.0.82.dev815__py3-none-any.whl → 0.0.82.dev857__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.82.dev815py3-none-any.whl → 0.0.82.dev857py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (106) hide show

{dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/METADATA +8 -3
{dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/RECORD +106 -79
pipecat/adapters/base_llm_adapter.py +44 -6
pipecat/adapters/services/anthropic_adapter.py +302 -2
pipecat/adapters/services/aws_nova_sonic_adapter.py +40 -2
pipecat/adapters/services/bedrock_adapter.py +40 -2
pipecat/adapters/services/gemini_adapter.py +276 -6
pipecat/adapters/services/open_ai_adapter.py +88 -7
pipecat/adapters/services/open_ai_realtime_adapter.py +39 -1
pipecat/audio/dtmf/__init__.py +0 -0
pipecat/audio/dtmf/types.py +47 -0
pipecat/audio/dtmf/utils.py +70 -0
pipecat/audio/filters/aic_filter.py +199 -0
pipecat/audio/utils.py +9 -7
pipecat/extensions/ivr/__init__.py +0 -0
pipecat/extensions/ivr/ivr_navigator.py +452 -0
pipecat/frames/frames.py +156 -43
pipecat/pipeline/llm_switcher.py +76 -0
pipecat/pipeline/parallel_pipeline.py +3 -3
pipecat/pipeline/service_switcher.py +144 -0
pipecat/pipeline/task.py +68 -28
pipecat/pipeline/task_observer.py +10 -0
pipecat/processors/aggregators/dtmf_aggregator.py +2 -2
pipecat/processors/aggregators/llm_context.py +277 -0
pipecat/processors/aggregators/llm_response.py +48 -15
pipecat/processors/aggregators/llm_response_universal.py +840 -0
pipecat/processors/aggregators/openai_llm_context.py +3 -3
pipecat/processors/dtmf_aggregator.py +0 -2
pipecat/processors/filters/stt_mute_filter.py +0 -2
pipecat/processors/frame_processor.py +18 -11
pipecat/processors/frameworks/rtvi.py +17 -10
pipecat/processors/metrics/sentry.py +2 -0
pipecat/runner/daily.py +137 -36
pipecat/runner/run.py +1 -1
pipecat/runner/utils.py +7 -7
pipecat/serializers/asterisk.py +20 -4
pipecat/serializers/exotel.py +1 -1
pipecat/serializers/plivo.py +1 -1
pipecat/serializers/telnyx.py +1 -1
pipecat/serializers/twilio.py +1 -1
pipecat/services/__init__.py +2 -2
pipecat/services/anthropic/llm.py +113 -28
pipecat/services/asyncai/tts.py +4 -0
pipecat/services/aws/llm.py +82 -8
pipecat/services/aws/tts.py +0 -10
pipecat/services/aws_nova_sonic/aws.py +5 -0
pipecat/services/cartesia/tts.py +28 -16
pipecat/services/cerebras/llm.py +15 -10
pipecat/services/deepgram/stt.py +8 -0
pipecat/services/deepseek/llm.py +13 -8
pipecat/services/fireworks/llm.py +13 -8
pipecat/services/fish/tts.py +8 -6
pipecat/services/gemini_multimodal_live/gemini.py +5 -0
pipecat/services/gladia/config.py +7 -1
pipecat/services/gladia/stt.py +23 -15
pipecat/services/google/llm.py +159 -59
pipecat/services/google/llm_openai.py +18 -3
pipecat/services/grok/llm.py +2 -1
pipecat/services/llm_service.py +38 -3
pipecat/services/mem0/memory.py +2 -1
pipecat/services/mistral/llm.py +5 -6
pipecat/services/nim/llm.py +2 -1
pipecat/services/openai/base_llm.py +88 -26
pipecat/services/openai/image.py +6 -1
pipecat/services/openai_realtime_beta/openai.py +5 -2
pipecat/services/openpipe/llm.py +6 -8
pipecat/services/perplexity/llm.py +13 -8
pipecat/services/playht/tts.py +9 -6
pipecat/services/rime/tts.py +1 -1
pipecat/services/sambanova/llm.py +18 -13
pipecat/services/sarvam/tts.py +415 -10
pipecat/services/speechmatics/stt.py +2 -2
pipecat/services/tavus/video.py +1 -1
pipecat/services/tts_service.py +15 -5
pipecat/services/vistaar/llm.py +2 -5
pipecat/transports/base_input.py +32 -19
pipecat/transports/base_output.py +39 -5
pipecat/transports/daily/__init__.py +0 -0
pipecat/transports/daily/transport.py +2371 -0
pipecat/transports/daily/utils.py +410 -0
pipecat/transports/livekit/__init__.py +0 -0
pipecat/transports/livekit/transport.py +1042 -0
pipecat/transports/network/fastapi_websocket.py +12 -546
pipecat/transports/network/small_webrtc.py +12 -922
pipecat/transports/network/webrtc_connection.py +9 -595
pipecat/transports/network/websocket_client.py +12 -481
pipecat/transports/network/websocket_server.py +12 -487
pipecat/transports/services/daily.py +9 -2334
pipecat/transports/services/helpers/daily_rest.py +12 -396
pipecat/transports/services/livekit.py +12 -975
pipecat/transports/services/tavus.py +12 -757
pipecat/transports/smallwebrtc/__init__.py +0 -0
pipecat/transports/smallwebrtc/connection.py +612 -0
pipecat/transports/smallwebrtc/transport.py +936 -0
pipecat/transports/tavus/__init__.py +0 -0
pipecat/transports/tavus/transport.py +770 -0
pipecat/transports/websocket/__init__.py +0 -0
pipecat/transports/websocket/client.py +494 -0
pipecat/transports/websocket/fastapi.py +559 -0
pipecat/transports/websocket/server.py +500 -0
pipecat/transports/whatsapp/__init__.py +0 -0
pipecat/transports/whatsapp/api.py +345 -0
pipecat/transports/whatsapp/client.py +364 -0
{dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/top_level.txt +0 -0

pipecat/transports/base_input.py CHANGED Viewed

@@ -38,8 +38,8 @@ from pipecat.frames.frames import (
     StartFrame,
     StartInterruptionFrame,
     StopFrame,
-    StopInterruptionFrame,
     SystemFrame,
+    UserSpeakingFrame,
     UserStartedSpeakingFrame,
     UserStoppedSpeakingFrame,
     VADParamsUpdateFrame,
@@ -298,11 +298,11 @@ class BaseInputTransport(FrameProcessor):
             await self._handle_bot_stopped_speaking(frame)
             await self.push_frame(frame, direction)
         elif isinstance(frame, EmulateUserStartedSpeakingFrame):
-            self.logger.debug("Emulating user started speaking")
-            await self._handle_user_interruption(UserStartedSpeakingFrame(emulated=True))
+            logger.debug("Emulating user started speaking")
+            await self._handle_user_interruption(VADState.SPEAKING, emulated=True)
         elif isinstance(frame, EmulateUserStoppedSpeakingFrame):
-            self.logger.debug("Emulating user stopped speaking")
-            await self._handle_user_interruption(UserStoppedSpeakingFrame(emulated=True))
+            logger.debug("Emulating user stopped speaking")
+            await self._handle_user_interruption(VADState.QUIET, emulated=True)
         # All other system frames
         elif isinstance(frame, VADParamsUpdateFrame):
             if self.vad_analyzer:
@@ -342,12 +342,16 @@ class BaseInputTransport(FrameProcessor):
             await self._start_interruption()
             await self.push_frame(StartInterruptionFrame())
-    async def _handle_user_interruption(self, frame: Frame):
+    async def _handle_user_interruption(self, vad_state: VADState, emulated: bool = False):
         """Handle user interruption events based on speaking state."""
-        if isinstance(frame, UserStartedSpeakingFrame):
+        if vad_state == VADState.SPEAKING:
             self.logger.debug("User started speaking")
             self._user_speaking = True
-            await self.push_frame(frame)
+            upstream_frame = UserStartedSpeakingFrame(emulated=emulated)
+            downstream_frame = UserStartedSpeakingFrame(emulated=emulated)
+            await self.push_frame(downstream_frame)
+            await self.push_frame(upstream_frame, FrameDirection.UPSTREAM)
             # Only push StartInterruptionFrame if:
             # 1. No interruption config is set, OR
@@ -368,13 +372,17 @@ class BaseInputTransport(FrameProcessor):
                     "User started speaking while bot is speaking with interruption config - "
                     "deferring interruption to aggregator"
                 )
-        elif isinstance(frame, UserStoppedSpeakingFrame):
+        elif vad_state == VADState.QUIET:
             self.logger.debug("User stopped speaking")
             self._user_speaking = False
-            await self.push_frame(frame)
+            upstream_frame = UserStoppedSpeakingFrame(emulated=emulated)
+            downstream_frame = UserStoppedSpeakingFrame(emulated=emulated)
+            await self.push_frame(downstream_frame)
+            await self.push_frame(upstream_frame, FrameDirection.UPSTREAM)
             if self.interruptions_allowed:
                 await self._stop_interruption()
-                await self.push_frame(StopInterruptionFrame())
     #
     # Handle bot speaking state
@@ -413,7 +421,7 @@ class BaseInputTransport(FrameProcessor):
             )
         return state
-    async def _handle_vad(self, audio_frame: InputAudioRawFrame, vad_state: VADState):
+    async def _handle_vad(self, audio_frame: InputAudioRawFrame, vad_state: VADState) -> VADState:
         """Handle Voice Activity Detection results and generate appropriate frames."""
         new_vad_state = await self._vad_analyze(audio_frame)
         if (
@@ -421,7 +429,8 @@ class BaseInputTransport(FrameProcessor):
             and new_vad_state != VADState.STARTING
             and new_vad_state != VADState.STOPPING
         ):
-            frame = None
+            interruption_state = None
             # If the turn analyser is enabled, this will prevent:
             # - Creating the UserStoppedSpeakingFrame
             # - Creating the UserStartedSpeakingFrame multiple times
@@ -432,14 +441,14 @@ class BaseInputTransport(FrameProcessor):
             if new_vad_state == VADState.SPEAKING:
                 await self.push_frame(VADUserStartedSpeakingFrame())
                 if can_create_user_frames:
-                    frame = UserStartedSpeakingFrame()
+                    interruption_state = VADState.SPEAKING
             elif new_vad_state == VADState.QUIET:
                 await self.push_frame(VADUserStoppedSpeakingFrame())
                 if can_create_user_frames:
-                    frame = UserStoppedSpeakingFrame()
+                    interruption_state = VADState.QUIET
-            if frame:
-                await self._handle_user_interruption(frame)
+            if interruption_state:
+                await self._handle_user_interruption(interruption_state)
             vad_state = new_vad_state
         return vad_state
@@ -454,7 +463,7 @@ class BaseInputTransport(FrameProcessor):
     async def _handle_end_of_turn_complete(self, state: EndOfTurnState):
         """Handle completion of end-of-turn analysis."""
         if state == EndOfTurnState.COMPLETE:
-            await self._handle_user_interruption(UserStoppedSpeakingFrame())
+            await self._handle_user_interruption(VADState.QUIET)
     async def _run_turn_analyzer(
         self, frame: InputAudioRawFrame, vad_state: VADState, previous_vad_state: VADState
@@ -491,6 +500,10 @@ class BaseInputTransport(FrameProcessor):
                 if self._params.turn_analyzer:
                     await self._run_turn_analyzer(frame, vad_state, previous_vad_state)
+                if vad_state == VADState.SPEAKING:
+                    await self.push_frame(UserSpeakingFrame())
+                    await self.push_frame(UserSpeakingFrame(), FrameDirection.UPSTREAM)
                 # Push audio downstream if passthrough is set.
                 if self._params.audio_in_passthrough:
                     await self.push_frame(frame)
@@ -504,7 +517,7 @@ class BaseInputTransport(FrameProcessor):
                     vad_state = VADState.QUIET
                     if self._params.turn_analyzer:
                         self._params.turn_analyzer.clear()
-                    await self._handle_user_interruption(UserStoppedSpeakingFrame())
+                    await self._handle_user_interruption(VADState.QUIET)
     async def _handle_prediction_result(self, result: MetricsData):
         """Handle a prediction result event from the turn analyzer."""

pipecat/transports/base_output.py CHANGED Viewed

@@ -19,6 +19,7 @@ from typing import Any, AsyncGenerator, Dict, List, Mapping, Optional
 from loguru import logger
 from PIL import Image
+from pipecat.audio.dtmf.utils import load_dtmf_audio
 from pipecat.audio.mixers.base_audio_mixer import BaseAudioMixer
 from pipecat.audio.utils import create_stream_resampler, is_silence
 from pipecat.frames.frames import (
@@ -28,6 +29,7 @@ from pipecat.frames.frames import (
     CancelFrame,
     EndFrame,
     Frame,
+    InputTransportMessageUrgentFrame,
     MixerControlFrame,
     OutputAudioRawFrame,
     OutputDTMFFrame,
@@ -38,7 +40,6 @@ from pipecat.frames.frames import (
     SpriteFrame,
     StartFrame,
     StartInterruptionFrame,
-    StopInterruptionFrame,
     SystemFrame,
     TransportMessageFrame,
     TransportMessageUrgentFrame,
@@ -219,12 +220,43 @@ class BaseOutputTransport(FrameProcessor):
         pass
     async def write_dtmf(self, frame: OutputDTMFFrame | OutputDTMFUrgentFrame):
-        """Write a DTMF tone to the transport.
+        """Write a DTMF tone using the transport's preferred method.
         Args:
             frame: The DTMF frame to write.
         """
-        pass
+        if self._supports_native_dtmf():
+            await self._write_dtmf_native(frame)
+        else:
+            await self._write_dtmf_audio(frame)
+    def _supports_native_dtmf(self) -> bool:
+        """Override in transport implementations that support native DTMF.
+        Returns:
+            True if the transport supports native DTMF, False otherwise.
+        """
+        return False
+    async def _write_dtmf_native(self, frame: OutputDTMFFrame | OutputDTMFUrgentFrame):
+        """Override in transport implementations for native DTMF.
+        Args:
+            frame: The DTMF frame to write.
+        """
+        raise NotImplementedError("Transport claims native DTMF support but doesn't implement it")
+    async def _write_dtmf_audio(self, frame: OutputDTMFFrame | OutputDTMFUrgentFrame):
+        """Generate and send audio tones for DTMF.
+        Args:
+            frame: The DTMF frame to write.
+        """
+        dtmf_audio = await load_dtmf_audio(frame.button, sample_rate=self._sample_rate)
+        dtmf_audio_frame = OutputAudioRawFrame(
+            audio=dtmf_audio, sample_rate=self._sample_rate, num_channels=1
+        )
+        await self.write_audio_frame(dtmf_audio_frame)
     async def send_audio(self, frame: OutputAudioRawFrame):
         """Send an audio frame downstream.
@@ -268,10 +300,12 @@ class BaseOutputTransport(FrameProcessor):
         elif isinstance(frame, CancelFrame):
             await self.cancel(frame)
             await self.push_frame(frame, direction)
-        elif isinstance(frame, (StartInterruptionFrame, StopInterruptionFrame)):
+        elif isinstance(frame, StartInterruptionFrame):
             await self.push_frame(frame, direction)
             await self._handle_frame(frame)
-        elif isinstance(frame, TransportMessageUrgentFrame):
+        elif isinstance(frame, TransportMessageUrgentFrame) and not isinstance(
+            frame, InputTransportMessageUrgentFrame
+        ):
             await self.send_message(frame)
         elif isinstance(frame, OutputDTMFUrgentFrame):
             await self.write_dtmf(frame)

pipecat/transports/daily/__init__.py ADDED Viewed

File without changes

dv-pipecat-ai 0.0.82.dev815__py3-none-any.whl → 0.0.82.dev857__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.82.dev815py3-none-any.whl → 0.0.82.dev857py3-none-any.whl