PyPI - dv-pipecat-ai - Versions diffs - 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show

{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
pipecat/__init__.py +17 -0
pipecat/adapters/base_llm_adapter.py +36 -1
pipecat/adapters/schemas/direct_function.py +296 -0
pipecat/adapters/schemas/function_schema.py +15 -6
pipecat/adapters/schemas/tools_schema.py +55 -7
pipecat/adapters/services/anthropic_adapter.py +22 -3
pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
pipecat/adapters/services/bedrock_adapter.py +22 -3
pipecat/adapters/services/gemini_adapter.py +16 -3
pipecat/adapters/services/open_ai_adapter.py +17 -2
pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
pipecat/audio/filters/base_audio_filter.py +30 -6
pipecat/audio/filters/koala_filter.py +37 -2
pipecat/audio/filters/krisp_filter.py +59 -6
pipecat/audio/filters/noisereduce_filter.py +37 -0
pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
pipecat/audio/mixers/base_audio_mixer.py +30 -7
pipecat/audio/mixers/soundfile_mixer.py +53 -6
pipecat/audio/resamplers/base_audio_resampler.py +17 -9
pipecat/audio/resamplers/resampy_resampler.py +26 -1
pipecat/audio/resamplers/soxr_resampler.py +32 -1
pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
pipecat/audio/utils.py +194 -1
pipecat/audio/vad/silero.py +60 -3
pipecat/audio/vad/vad_analyzer.py +114 -30
pipecat/clocks/base_clock.py +19 -0
pipecat/clocks/system_clock.py +25 -0
pipecat/extensions/voicemail/__init__.py +0 -0
pipecat/extensions/voicemail/voicemail_detector.py +707 -0
pipecat/frames/frames.py +590 -156
pipecat/metrics/metrics.py +64 -1
pipecat/observers/base_observer.py +58 -19
pipecat/observers/loggers/debug_log_observer.py +56 -64
pipecat/observers/loggers/llm_log_observer.py +8 -1
pipecat/observers/loggers/transcription_log_observer.py +19 -7
pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
pipecat/observers/turn_tracking_observer.py +26 -1
pipecat/pipeline/base_pipeline.py +5 -7
pipecat/pipeline/base_task.py +52 -9
pipecat/pipeline/parallel_pipeline.py +121 -177
pipecat/pipeline/pipeline.py +129 -20
pipecat/pipeline/runner.py +50 -1
pipecat/pipeline/sync_parallel_pipeline.py +132 -32
pipecat/pipeline/task.py +263 -280
pipecat/pipeline/task_observer.py +85 -34
pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
pipecat/processors/aggregators/gated.py +25 -24
pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
pipecat/processors/aggregators/llm_response.py +398 -89
pipecat/processors/aggregators/openai_llm_context.py +161 -13
pipecat/processors/aggregators/sentence.py +25 -14
pipecat/processors/aggregators/user_response.py +28 -3
pipecat/processors/aggregators/vision_image_frame.py +24 -14
pipecat/processors/async_generator.py +28 -0
pipecat/processors/audio/audio_buffer_processor.py +78 -37
pipecat/processors/consumer_processor.py +25 -6
pipecat/processors/filters/frame_filter.py +23 -0
pipecat/processors/filters/function_filter.py +30 -0
pipecat/processors/filters/identity_filter.py +17 -2
pipecat/processors/filters/null_filter.py +24 -1
pipecat/processors/filters/stt_mute_filter.py +56 -21
pipecat/processors/filters/wake_check_filter.py +46 -3
pipecat/processors/filters/wake_notifier_filter.py +21 -3
pipecat/processors/frame_processor.py +488 -131
pipecat/processors/frameworks/langchain.py +38 -3
pipecat/processors/frameworks/rtvi.py +719 -34
pipecat/processors/gstreamer/pipeline_source.py +41 -0
pipecat/processors/idle_frame_processor.py +26 -3
pipecat/processors/logger.py +23 -0
pipecat/processors/metrics/frame_processor_metrics.py +77 -4
pipecat/processors/metrics/sentry.py +42 -4
pipecat/processors/producer_processor.py +34 -14
pipecat/processors/text_transformer.py +22 -10
pipecat/processors/transcript_processor.py +48 -29
pipecat/processors/user_idle_processor.py +31 -21
pipecat/runner/__init__.py +1 -0
pipecat/runner/daily.py +132 -0
pipecat/runner/livekit.py +148 -0
pipecat/runner/run.py +543 -0
pipecat/runner/types.py +67 -0
pipecat/runner/utils.py +515 -0
pipecat/serializers/base_serializer.py +42 -0
pipecat/serializers/exotel.py +17 -6
pipecat/serializers/genesys.py +95 -0
pipecat/serializers/livekit.py +33 -0
pipecat/serializers/plivo.py +16 -15
pipecat/serializers/protobuf.py +37 -1
pipecat/serializers/telnyx.py +18 -17
pipecat/serializers/twilio.py +32 -16
pipecat/services/ai_service.py +5 -3
pipecat/services/anthropic/llm.py +113 -43
pipecat/services/assemblyai/models.py +63 -5
pipecat/services/assemblyai/stt.py +64 -11
pipecat/services/asyncai/__init__.py +0 -0
pipecat/services/asyncai/tts.py +501 -0
pipecat/services/aws/llm.py +185 -111
pipecat/services/aws/stt.py +217 -23
pipecat/services/aws/tts.py +118 -52
pipecat/services/aws/utils.py +101 -5
pipecat/services/aws_nova_sonic/aws.py +82 -64
pipecat/services/aws_nova_sonic/context.py +15 -6
pipecat/services/azure/common.py +10 -2
pipecat/services/azure/image.py +32 -0
pipecat/services/azure/llm.py +9 -7
pipecat/services/azure/stt.py +65 -2
pipecat/services/azure/tts.py +154 -23
pipecat/services/cartesia/stt.py +125 -8
pipecat/services/cartesia/tts.py +102 -38
pipecat/services/cerebras/llm.py +15 -23
pipecat/services/deepgram/stt.py +19 -11
pipecat/services/deepgram/tts.py +36 -0
pipecat/services/deepseek/llm.py +14 -23
pipecat/services/elevenlabs/tts.py +330 -64
pipecat/services/fal/image.py +43 -0
pipecat/services/fal/stt.py +48 -10
pipecat/services/fireworks/llm.py +14 -21
pipecat/services/fish/tts.py +109 -9
pipecat/services/gemini_multimodal_live/__init__.py +1 -0
pipecat/services/gemini_multimodal_live/events.py +83 -2
pipecat/services/gemini_multimodal_live/file_api.py +189 -0
pipecat/services/gemini_multimodal_live/gemini.py +218 -21
pipecat/services/gladia/config.py +17 -10
pipecat/services/gladia/stt.py +82 -36
pipecat/services/google/frames.py +40 -0
pipecat/services/google/google.py +2 -0
pipecat/services/google/image.py +39 -2
pipecat/services/google/llm.py +176 -58
pipecat/services/google/llm_openai.py +26 -4
pipecat/services/google/llm_vertex.py +37 -15
pipecat/services/google/rtvi.py +41 -0
pipecat/services/google/stt.py +65 -17
pipecat/services/google/test-google-chirp.py +45 -0
pipecat/services/google/tts.py +390 -19
pipecat/services/grok/llm.py +8 -6
pipecat/services/groq/llm.py +8 -6
pipecat/services/groq/stt.py +13 -9
pipecat/services/groq/tts.py +40 -0
pipecat/services/hamsa/__init__.py +9 -0
pipecat/services/hamsa/stt.py +241 -0
pipecat/services/heygen/__init__.py +5 -0
pipecat/services/heygen/api.py +281 -0
pipecat/services/heygen/client.py +620 -0
pipecat/services/heygen/video.py +338 -0
pipecat/services/image_service.py +5 -3
pipecat/services/inworld/__init__.py +1 -0
pipecat/services/inworld/tts.py +592 -0
pipecat/services/llm_service.py +127 -45
pipecat/services/lmnt/tts.py +80 -7
pipecat/services/mcp_service.py +85 -44
pipecat/services/mem0/memory.py +42 -13
pipecat/services/minimax/tts.py +74 -15
pipecat/services/mistral/__init__.py +0 -0
pipecat/services/mistral/llm.py +185 -0
pipecat/services/moondream/vision.py +55 -10
pipecat/services/neuphonic/tts.py +275 -48
pipecat/services/nim/llm.py +8 -6
pipecat/services/ollama/llm.py +27 -7
pipecat/services/openai/base_llm.py +54 -16
pipecat/services/openai/image.py +30 -0
pipecat/services/openai/llm.py +7 -5
pipecat/services/openai/stt.py +13 -9
pipecat/services/openai/tts.py +42 -10
pipecat/services/openai_realtime_beta/azure.py +11 -9
pipecat/services/openai_realtime_beta/context.py +7 -5
pipecat/services/openai_realtime_beta/events.py +10 -7
pipecat/services/openai_realtime_beta/openai.py +37 -18
pipecat/services/openpipe/llm.py +30 -24
pipecat/services/openrouter/llm.py +9 -7
pipecat/services/perplexity/llm.py +15 -19
pipecat/services/piper/tts.py +26 -12
pipecat/services/playht/tts.py +227 -65
pipecat/services/qwen/llm.py +8 -6
pipecat/services/rime/tts.py +128 -17
pipecat/services/riva/stt.py +160 -22
pipecat/services/riva/tts.py +67 -2
pipecat/services/sambanova/llm.py +19 -17
pipecat/services/sambanova/stt.py +14 -8
pipecat/services/sarvam/tts.py +60 -13
pipecat/services/simli/video.py +82 -21
pipecat/services/soniox/__init__.py +0 -0
pipecat/services/soniox/stt.py +398 -0
pipecat/services/speechmatics/stt.py +29 -17
pipecat/services/stt_service.py +47 -11
pipecat/services/tavus/video.py +94 -25
pipecat/services/together/llm.py +8 -6
pipecat/services/tts_service.py +77 -53
pipecat/services/ultravox/stt.py +46 -43
pipecat/services/vision_service.py +5 -3
pipecat/services/websocket_service.py +12 -11
pipecat/services/whisper/base_stt.py +58 -12
pipecat/services/whisper/stt.py +69 -58
pipecat/services/xtts/tts.py +59 -2
pipecat/sync/base_notifier.py +19 -0
pipecat/sync/event_notifier.py +24 -0
pipecat/tests/utils.py +73 -5
pipecat/transcriptions/language.py +24 -0
pipecat/transports/base_input.py +112 -8
pipecat/transports/base_output.py +235 -13
pipecat/transports/base_transport.py +119 -0
pipecat/transports/local/audio.py +76 -0
pipecat/transports/local/tk.py +84 -0
pipecat/transports/network/fastapi_websocket.py +174 -15
pipecat/transports/network/small_webrtc.py +383 -39
pipecat/transports/network/webrtc_connection.py +214 -8
pipecat/transports/network/websocket_client.py +171 -1
pipecat/transports/network/websocket_server.py +147 -9
pipecat/transports/services/daily.py +792 -70
pipecat/transports/services/helpers/daily_rest.py +122 -129
pipecat/transports/services/livekit.py +339 -4
pipecat/transports/services/tavus.py +273 -38
pipecat/utils/asyncio/task_manager.py +92 -186
pipecat/utils/base_object.py +83 -1
pipecat/utils/network.py +2 -0
pipecat/utils/string.py +114 -58
pipecat/utils/text/base_text_aggregator.py +44 -13
pipecat/utils/text/base_text_filter.py +46 -0
pipecat/utils/text/markdown_text_filter.py +70 -14
pipecat/utils/text/pattern_pair_aggregator.py +18 -14
pipecat/utils/text/simple_text_aggregator.py +43 -2
pipecat/utils/text/skip_tags_aggregator.py +21 -13
pipecat/utils/time.py +36 -0
pipecat/utils/tracing/class_decorators.py +32 -7
pipecat/utils/tracing/conversation_context_provider.py +12 -2
pipecat/utils/tracing/service_attributes.py +80 -64
pipecat/utils/tracing/service_decorators.py +48 -21
pipecat/utils/tracing/setup.py +13 -7
pipecat/utils/tracing/turn_context_provider.py +12 -2
pipecat/utils/tracing/turn_trace_observer.py +27 -0
pipecat/utils/utils.py +14 -14
dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
pipecat/examples/daily_runner.py +0 -64
pipecat/examples/run.py +0 -265
pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
pipecat/utils/asyncio/watchdog_event.py +0 -42
pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
pipecat/utils/asyncio/watchdog_queue.py +0 -48
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
/pipecat/{examples → extensions}/__init__.py +0 -0

pipecat/audio/mixers/soundfile_mixer.py CHANGED Viewed

@@ -4,6 +4,13 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""Soundfile-based audio mixer for file playback integration.
+Provides an audio mixer that combines incoming audio with audio loaded from
+files using the soundfile library. Supports multiple audio formats and
+runtime configuration changes.
+"""
 import asyncio
 from typing import Any, Dict, Mapping
@@ -24,7 +31,9 @@ except ModuleNotFoundError as e:
 class SoundfileMixer(BaseAudioMixer):
-    """This is an audio mixer that mixes incoming audio with audio from a
+    """Audio mixer that combines incoming audio with file-based audio.
+    This is an audio mixer that mixes incoming audio with audio from a
     file. It uses the soundfile library to load files so it supports multiple
     formats. The audio files need to only have one channel (mono) and it needs
     to match the sample rate of the output transport.
@@ -33,7 +42,6 @@ class SoundfileMixer(BaseAudioMixer):
     `MixerUpdateSettingsFrame` has the following settings available: `sound`
     (str) and `volume` (float) to be able to update to a different sound file or
     to change the volume at runtime.
     """
     def __init__(
@@ -46,6 +54,16 @@ class SoundfileMixer(BaseAudioMixer):
         loop: bool = True,
         **kwargs,
     ):
+        """Initialize the soundfile mixer.
+        Args:
+            sound_files: Mapping of sound names to file paths for loading.
+            default_sound: Name of the default sound to play initially.
+            volume: Mixing volume level (0.0 to 1.0). Defaults to 0.4.
+            mixing: Whether mixing is initially enabled. Defaults to True.
+            loop: Whether to loop audio files when they end. Defaults to True.
+            **kwargs: Additional arguments passed to parent class.
+        """
         super().__init__(**kwargs)
         self._sound_files = sound_files
         self._volume = volume
@@ -58,14 +76,28 @@ class SoundfileMixer(BaseAudioMixer):
         self._loop = loop
     async def start(self, sample_rate: int):
+        """Initialize the mixer and load all sound files.
+        Args:
+            sample_rate: The sample rate of the output transport in Hz.
+        """
         self._sample_rate = sample_rate
         for sound_name, file_name in self._sound_files.items():
             await asyncio.to_thread(self._load_sound_file, sound_name, file_name)
     async def stop(self):
+        """Clean up mixer resources.
+        Currently performs no cleanup as sound data is managed by garbage collection.
+        """
         pass
     async def process_frame(self, frame: MixerControlFrame):
+        """Process mixer control frames to update settings or enable/disable mixing.
+        Args:
+            frame: The mixer control frame to process.
+        """
         if isinstance(frame, MixerUpdateSettingsFrame):
             await self._update_settings(frame)
         elif isinstance(frame, MixerEnableFrame):
@@ -73,12 +105,22 @@ class SoundfileMixer(BaseAudioMixer):
         pass
     async def mix(self, audio: bytes) -> bytes:
+        """Mix transport audio with the current sound file.
+        Args:
+            audio: Raw audio bytes from the transport to mix.
+        Returns:
+            Mixed audio bytes combining transport and file audio.
+        """
         return self._mix_with_sound(audio)
     async def _enable_mixing(self, enable: bool):
+        """Enable or disable audio mixing."""
         self._mixing = enable
     async def _update_settings(self, frame: MixerUpdateSettingsFrame):
+        """Update mixer settings from a control frame."""
         for setting, value in frame.settings.items():
             match setting:
                 case "sound":
@@ -89,6 +131,11 @@ class SoundfileMixer(BaseAudioMixer):
                     await self._update_loop(value)
     async def _change_sound(self, sound: str):
+        """Change the currently playing sound file.
+        Args:
+            sound: Name of the sound file to switch to.
+        """
         if sound in self._sound_files:
             self._current_sound = sound
             self._sound_pos = 0
@@ -96,12 +143,15 @@ class SoundfileMixer(BaseAudioMixer):
             logger.error(f"Sound {sound} is not available")
     async def _update_volume(self, volume: float):
+        """Update the mixing volume level."""
         self._volume = volume
     async def _update_loop(self, loop: bool):
+        """Update the looping behavior."""
         self._loop = loop
     def _load_sound_file(self, sound_name: str, file_name: str):
+        """Load an audio file into memory for mixing."""
         try:
             logger.debug(f"Loading mixer sound from {file_name}")
             sound, sample_rate = sf.read(file_name, dtype="int16")
@@ -118,10 +168,7 @@ class SoundfileMixer(BaseAudioMixer):
             logger.error(f"Unable to open file {file_name}: {e}")
     def _mix_with_sound(self, audio: bytes):
-        """Mixes raw audio frames with chunks of the same length from the sound
-        file.
-        """
+        """Mix raw audio frames with chunks of the same length from the sound file."""
         if not self._mixing or not self._current_sound in self._sounds:
             return audio

pipecat/audio/resamplers/base_audio_resampler.py CHANGED Viewed

@@ -4,27 +4,35 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""Base audio resampler interface for Pipecat.
+This module defines the abstract base class for audio resampling implementations,
+providing a common interface for converting audio between different sample rates.
+"""
 from abc import ABC, abstractmethod
 class BaseAudioResampler(ABC):
-    """Abstract base class for audio resampling. This class defines an
-    interface for audio resampling implementations.
+    """Abstract base class for audio resampling implementations.
+    This class defines the interface that all audio resampling implementations
+    must follow, providing a standardized way to convert audio data between
+    different sample rates.
     """
     @abstractmethod
     async def resample(self, audio: bytes, in_rate: int, out_rate: int) -> bytes:
-        """
-        Resamples the given audio data to a different sample rate.
+        """Resamples the given audio data to a different sample rate.
         This is an abstract method that must be implemented in subclasses.
-        Parameters:
-            audio (bytes): The audio data to be resampled, represented as a byte string.
-            in_rate (int): The original sample rate of the audio data (in Hz).
-            out_rate (int): The desired sample rate for the resampled audio data (in Hz).
+        Args:
+            audio: The audio data to be resampled, as raw bytes.
+            in_rate: The original sample rate of the audio data in Hz.
+            out_rate: The desired sample rate for the output audio in Hz.
         Returns:
-            bytes: The resampled audio data as a byte string.
+            The resampled audio data as raw bytes.
         """
         pass

pipecat/audio/resamplers/resampy_resampler.py CHANGED Viewed

@@ -4,6 +4,12 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""Resampy-based audio resampler implementation.
+This module provides an audio resampler that uses the resampy library
+for high-quality audio sample rate conversion.
+"""
 import numpy as np
 import resampy
@@ -11,12 +17,31 @@ from pipecat.audio.resamplers.base_audio_resampler import BaseAudioResampler
 class ResampyResampler(BaseAudioResampler):
-    """Audio resampler implementation using the resampy library."""
+    """Audio resampler implementation using the resampy library.
+    This resampler uses the resampy library's Kaiser windowing filter
+    for high-quality audio resampling with good performance characteristics.
+    """
     def __init__(self, **kwargs):
+        """Initialize the resampy resampler.
+        Args:
+            **kwargs: Additional keyword arguments (currently unused).
+        """
         pass
     async def resample(self, audio: bytes, in_rate: int, out_rate: int) -> bytes:
+        """Resample audio data using resampy library.
+        Args:
+            audio: Input audio data as raw bytes (16-bit signed integers).
+            in_rate: Original sample rate in Hz.
+            out_rate: Target sample rate in Hz.
+        Returns:
+            Resampled audio data as raw bytes (16-bit signed integers).
+        """
         if in_rate == out_rate:
             return audio
         audio_data = np.frombuffer(audio, dtype=np.int16)

pipecat/audio/resamplers/soxr_resampler.py CHANGED Viewed

@@ -4,6 +4,17 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""SoX-based audio resampler implementation.
+This module provides an audio resampler that uses the SoX resampler library
+for very high-quality audio sample rate conversion.
+When to use the SOXRAudioResampler:
+1. For batch processing of complete audio files
+2. When you have all the audio data available at once
+"""
 import numpy as np
 import soxr
@@ -11,12 +22,32 @@ from pipecat.audio.resamplers.base_audio_resampler import BaseAudioResampler
 class SOXRAudioResampler(BaseAudioResampler):
-    """Audio resampler implementation using the SoX resampler library."""
+    """Audio resampler implementation using the SoX resampler library.
+    This resampler uses the SoX resampler library configured for very high
+    quality (VHQ) resampling, providing excellent audio quality at the cost
+    of additional computational overhead.
+    """
     def __init__(self, **kwargs):
+        """Initialize the SoX audio resampler.
+        Args:
+            **kwargs: Additional keyword arguments (currently unused).
+        """
         pass
     async def resample(self, audio: bytes, in_rate: int, out_rate: int) -> bytes:
+        """Resample audio data using SoX resampler library.
+        Args:
+            audio: Input audio data as raw bytes (16-bit signed integers).
+            in_rate: Original sample rate in Hz.
+            out_rate: Target sample rate in Hz.
+        Returns:
+            Resampled audio data as raw bytes (16-bit signed integers).
+        """
         if in_rate == out_rate:
             return audio
         audio_data = np.frombuffer(audio, dtype=np.int16)

pipecat/audio/resamplers/soxr_stream_resampler.py ADDED Viewed

@@ -0,0 +1,101 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+"""SoX-based audio resampler stream implementation.
+This module provides an audio resampler that uses the SoX ResampleStream library
+for very high quality audio sample rate conversion.
+When to use the SOXRStreamAudioResampler:
+1. For real-time processing scenarios
+2. When dealing with very long audio signals
+3. When processing audio in chunks or streams
+4. When you need to reuse the same resampler configuration multiple times, as it saves initialization overhead
+"""
+import time
+import numpy as np
+import soxr
+from pipecat.audio.resamplers.base_audio_resampler import BaseAudioResampler
+CLEAR_STREAM_AFTER_SECS = 0.2
+class SOXRStreamAudioResampler(BaseAudioResampler):
+    """Audio resampler implementation using the SoX ResampleStream library.
+    This resampler uses the SoX ResampleStream library configured for very high
+    quality (VHQ) resampling, providing excellent audio quality at the cost
+    of additional computational overhead.
+    It keeps an internal history which avoids clicks at chunk boundaries.
+    Notes:
+        - Only supports mono audio (1 channel).
+        - Input must be 16-bit signed PCM audio as raw bytes.
+    """
+    def __init__(self, **kwargs):
+        """Initialize the resampler.
+        Args:
+            **kwargs: Additional keyword arguments (currently unused).
+        """
+        self._in_rate: float | None = None
+        self._out_rate: float | None = None
+        self._last_resample_time: float = 0
+        self._soxr_stream: soxr.ResampleStream | None = None
+    def _initialize(self, in_rate: float, out_rate: float):
+        self._in_rate = in_rate
+        self._out_rate = out_rate
+        self._last_resample_time = time.time()
+        self._soxr_stream = soxr.ResampleStream(
+            in_rate=in_rate, out_rate=out_rate, num_channels=1, quality="VHQ", dtype="int16"
+        )
+    def _maybe_clear_internal_state(self):
+        current_time = time.time()
+        time_since_last_resample = current_time - self._last_resample_time
+        # If more than CLEAR_STREAM_AFTER_SECS seconds have passed, clear the resampler state
+        if time_since_last_resample > CLEAR_STREAM_AFTER_SECS:
+            if self._soxr_stream:
+                self._soxr_stream.clear()
+        self._last_resample_time = current_time
+    def _maybe_initialize_sox_stream(self, in_rate: int, out_rate: int):
+        if self._soxr_stream is None:
+            self._initialize(in_rate, out_rate)
+        else:
+            self._maybe_clear_internal_state()
+        if self._in_rate != in_rate or self._out_rate != out_rate:
+            raise ValueError(
+                f"SOXRStreamAudioResampler cannot be reused with different sample rates: "
+                f"expected {self._in_rate}->{self._out_rate}, got {in_rate}->{out_rate}"
+            )
+    async def resample(self, audio: bytes, in_rate: int, out_rate: int) -> bytes:
+        """Resample audio data using soxr.ResampleStream resampler library.
+        Args:
+            audio: Input audio data as raw bytes (16-bit signed integers).
+            in_rate: Original sample rate in Hz.
+            out_rate: Target sample rate in Hz.
+        Returns:
+            Resampled audio data as raw bytes (16-bit signed integers).
+        """
+        if in_rate == out_rate:
+            return audio
+        self._maybe_initialize_sox_stream(in_rate, out_rate)
+        audio_data = np.frombuffer(audio, dtype=np.int16)
+        resampled_audio = self._soxr_stream.resample_chunk(audio_data)
+        result = resampled_audio.astype(np.int16).tobytes()
+        return result

pipecat/audio/utils.py CHANGED Viewed

@@ -4,21 +4,91 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""Audio utility functions for Pipecat.
+This module provides common audio processing utilities including mixing,
+format conversion, volume calculation, and codec transformations for
+various audio formats used in Pipecat pipelines.
+"""
 import audioop
 import numpy as np
 import pyloudnorm as pyln
-import soxr
 from pipecat.audio.resamplers.base_audio_resampler import BaseAudioResampler
 from pipecat.audio.resamplers.soxr_resampler import SOXRAudioResampler
+from pipecat.audio.resamplers.soxr_stream_resampler import SOXRStreamAudioResampler
+# Normal speech usually results in many samples between ±500 to ±5000, depending on loudness and mic gain.
+# So we are using a threshold that is well below what real speech produces.
+SPEAKING_THRESHOLD = 20
 def create_default_resampler(**kwargs) -> BaseAudioResampler:
+    """Create a default audio resampler instance.
+    .. deprecated:: 0.0.74
+        This function is deprecated and will be removed in a future version.
+        Use `create_stream_resampler` for real-time processing scenarios or
+        `create_file_resampler` for batch processing of complete audio files.
+    Args:
+        **kwargs: Additional keyword arguments passed to the resampler constructor.
+    Returns:
+        A configured SOXRAudioResampler instance.
+    """
+    import warnings
+    warnings.warn(
+        "`create_default_resampler` is deprecated. "
+        "Use `create_stream_resampler` for real-time processing scenarios or "
+        "`create_file_resampler` for batch processing of complete audio files.",
+        DeprecationWarning,
+        stacklevel=2,
+    )
     return SOXRAudioResampler(**kwargs)
+def create_file_resampler(**kwargs) -> BaseAudioResampler:
+    """Create an audio resampler instance for batch processing of complete audio files.
+    Args:
+        **kwargs: Additional keyword arguments passed to the resampler constructor.
+    Returns:
+        A configured SOXRAudioResampler instance.
+    """
+    return SOXRAudioResampler(**kwargs)
+def create_stream_resampler(**kwargs) -> BaseAudioResampler:
+    """Create a stream audio resampler instance.
+    Args:
+        **kwargs: Additional keyword arguments passed to the resampler constructor.
+    Returns:
+        A configured SOXRStreamAudioResampler instance.
+    """
+    return SOXRStreamAudioResampler(**kwargs)
 def mix_audio(audio1: bytes, audio2: bytes) -> bytes:
+    """Mix two audio streams together by adding their samples.
+    Both audio streams are assumed to be 16-bit signed integer PCM data.
+    If the streams have different lengths, the shorter one is zero-padded
+    to match the longer stream.
+    Args:
+        audio1: First audio stream as raw bytes (16-bit signed integers).
+        audio2: Second audio stream as raw bytes (16-bit signed integers).
+    Returns:
+        Mixed audio data as raw bytes with samples clipped to 16-bit range.
+    """
     data1 = np.frombuffer(audio1, dtype=np.int16)
     data2 = np.frombuffer(audio2, dtype=np.int16)
@@ -37,6 +107,19 @@ def mix_audio(audio1: bytes, audio2: bytes) -> bytes:
 def interleave_stereo_audio(left_audio: bytes, right_audio: bytes) -> bytes:
+    """Interleave left and right mono audio channels into stereo audio.
+    Takes two mono audio streams and combines them into a single stereo
+    stream by interleaving the samples (L, R, L, R, ...). If the channels
+    have different lengths, both are truncated to the shorter length.
+    Args:
+        left_audio: Left channel audio as raw bytes (16-bit signed integers).
+        right_audio: Right channel audio as raw bytes (16-bit signed integers).
+    Returns:
+        Interleaved stereo audio data as raw bytes.
+    """
     left = np.frombuffer(left_audio, dtype=np.int16)
     right = np.frombuffer(right_audio, dtype=np.int16)
@@ -50,12 +133,34 @@ def interleave_stereo_audio(left_audio: bytes, right_audio: bytes) -> bytes:
 def normalize_value(value, min_value, max_value):
+    """Normalize a value to the range [0, 1] and clamp it to bounds.
+    Args:
+        value: The value to normalize.
+        min_value: The minimum value of the input range.
+        max_value: The maximum value of the input range.
+    Returns:
+        Normalized value clamped to the range [0, 1].
+    """
     normalized = (value - min_value) / (max_value - min_value)
     normalized_clamped = max(0, min(1, normalized))
     return normalized_clamped
 def calculate_audio_volume(audio: bytes, sample_rate: int) -> float:
+    """Calculate the loudness level of audio data using EBU R128 standard.
+    Uses the pyloudnorm library to calculate integrated loudness according
+    to the EBU R128 recommendation, then normalizes the result to [0, 1].
+    Args:
+        audio: Audio data as raw bytes (16-bit signed integers).
+        sample_rate: Sample rate of the audio in Hz.
+    Returns:
+        Normalized loudness value between 0 (quiet) and 1 (loud).
+    """
     audio_np = np.frombuffer(audio, dtype=np.int16)
     audio_float = audio_np.astype(np.float64)
@@ -71,12 +176,37 @@ def calculate_audio_volume(audio: bytes, sample_rate: int) -> float:
 def exp_smoothing(value: float, prev_value: float, factor: float) -> float:
+    """Apply exponential smoothing to a value.
+    Exponential smoothing is used to reduce noise in time-series data by
+    giving more weight to recent values while still considering historical data.
+    Args:
+        value: The new value to incorporate.
+        prev_value: The previous smoothed value.
+        factor: Smoothing factor between 0 and 1. Higher values give more
+                weight to the new value.
+    Returns:
+        The exponentially smoothed value.
+    """
     return prev_value + factor * (value - prev_value)
 async def ulaw_to_pcm(
     ulaw_bytes: bytes, in_rate: int, out_rate: int, resampler: BaseAudioResampler
 ):
+    """Convert μ-law encoded audio to PCM and optionally resample.
+    Args:
+        ulaw_bytes: μ-law encoded audio data as raw bytes.
+        in_rate: Original sample rate of the μ-law audio in Hz.
+        out_rate: Desired output sample rate in Hz.
+        resampler: Audio resampler instance for rate conversion.
+    Returns:
+        PCM audio data as raw bytes at the specified output rate.
+    """
     # Convert μ-law to PCM
     in_pcm_bytes = audioop.ulaw2lin(ulaw_bytes, 2)
@@ -87,6 +217,17 @@ async def ulaw_to_pcm(
 async def pcm_to_ulaw(pcm_bytes: bytes, in_rate: int, out_rate: int, resampler: BaseAudioResampler):
+    """Convert PCM audio to μ-law encoding and optionally resample.
+    Args:
+        pcm_bytes: PCM audio data as raw bytes (16-bit signed integers).
+        in_rate: Original sample rate of the PCM audio in Hz.
+        out_rate: Desired output sample rate in Hz.
+        resampler: Audio resampler instance for rate conversion.
+    Returns:
+        μ-law encoded audio data as raw bytes at the specified output rate.
+    """
     # Resample
     in_pcm_bytes = await resampler.resample(pcm_bytes, in_rate, out_rate)
@@ -99,6 +240,17 @@ async def pcm_to_ulaw(pcm_bytes: bytes, in_rate: int, out_rate: int, resampler:
 async def alaw_to_pcm(
     alaw_bytes: bytes, in_rate: int, out_rate: int, resampler: BaseAudioResampler
 ) -> bytes:
+    """Convert A-law encoded audio to PCM and optionally resample.
+    Args:
+        alaw_bytes: A-law encoded audio data as raw bytes.
+        in_rate: Original sample rate of the A-law audio in Hz.
+        out_rate: Desired output sample rate in Hz.
+        resampler: Audio resampler instance for rate conversion.
+    Returns:
+        PCM audio data as raw bytes at the specified output rate.
+    """
     # Convert a-law to PCM
     in_pcm_bytes = audioop.alaw2lin(alaw_bytes, 2)
@@ -109,6 +261,17 @@ async def alaw_to_pcm(
 async def pcm_to_alaw(pcm_bytes: bytes, in_rate: int, out_rate: int, resampler: BaseAudioResampler):
+    """Convert PCM audio to A-law encoding and optionally resample.
+    Args:
+        pcm_bytes: PCM audio data as raw bytes (16-bit signed integers).
+        in_rate: Original sample rate of the PCM audio in Hz.
+        out_rate: Desired output sample rate in Hz.
+        resampler: Audio resampler instance for rate conversion.
+    Returns:
+        A-law encoded audio data as raw bytes at the specified output rate.
+    """
     # Resample
     in_pcm_bytes = await resampler.resample(pcm_bytes, in_rate, out_rate)
@@ -116,3 +279,33 @@ async def pcm_to_alaw(pcm_bytes: bytes, in_rate: int, out_rate: int, resampler:
     out_alaw_bytes = audioop.lin2alaw(in_pcm_bytes, 2)
     return out_alaw_bytes
+def is_silence(pcm_bytes: bytes) -> bool:
+    """Determine if an audio sample contains silence by checking amplitude levels.
+    This function analyzes raw PCM audio data to detect silence by comparing
+    the maximum absolute amplitude against a predefined threshold. The audio
+    is expected to be clean speech or complete silence without background noise.
+    Args:
+        pcm_bytes: Raw PCM audio data as bytes (16-bit signed integers).
+    Returns:
+        bool: True if the audio sample is considered silence (below threshold),
+              False otherwise.
+    Note:
+        Normal speech typically produces amplitude values between ±500 to ±5000,
+        depending on factors like loudness and microphone gain. The threshold
+        (SPEAKING_THRESHOLD) is set well below typical speech levels to
+        reliably detect silence vs. speech.
+    """
+    # Convert raw audio bytes to a NumPy array of int16 samples
+    audio_data = np.frombuffer(pcm_bytes, dtype=np.int16)
+    # Check the maximum absolute amplitude in the frame
+    max_value = np.abs(audio_data).max()
+    # If max value is lower than SPEAKING_THRESHOLD, consider it as silence
+    return max_value <= SPEAKING_THRESHOLD

dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl