dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
- dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
- pipecat/__init__.py +17 -0
- pipecat/adapters/base_llm_adapter.py +36 -1
- pipecat/adapters/schemas/direct_function.py +296 -0
- pipecat/adapters/schemas/function_schema.py +15 -6
- pipecat/adapters/schemas/tools_schema.py +55 -7
- pipecat/adapters/services/anthropic_adapter.py +22 -3
- pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
- pipecat/adapters/services/bedrock_adapter.py +22 -3
- pipecat/adapters/services/gemini_adapter.py +16 -3
- pipecat/adapters/services/open_ai_adapter.py +17 -2
- pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
- pipecat/audio/filters/base_audio_filter.py +30 -6
- pipecat/audio/filters/koala_filter.py +37 -2
- pipecat/audio/filters/krisp_filter.py +59 -6
- pipecat/audio/filters/noisereduce_filter.py +37 -0
- pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
- pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
- pipecat/audio/mixers/base_audio_mixer.py +30 -7
- pipecat/audio/mixers/soundfile_mixer.py +53 -6
- pipecat/audio/resamplers/base_audio_resampler.py +17 -9
- pipecat/audio/resamplers/resampy_resampler.py +26 -1
- pipecat/audio/resamplers/soxr_resampler.py +32 -1
- pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
- pipecat/audio/utils.py +194 -1
- pipecat/audio/vad/silero.py +60 -3
- pipecat/audio/vad/vad_analyzer.py +114 -30
- pipecat/clocks/base_clock.py +19 -0
- pipecat/clocks/system_clock.py +25 -0
- pipecat/extensions/voicemail/__init__.py +0 -0
- pipecat/extensions/voicemail/voicemail_detector.py +707 -0
- pipecat/frames/frames.py +590 -156
- pipecat/metrics/metrics.py +64 -1
- pipecat/observers/base_observer.py +58 -19
- pipecat/observers/loggers/debug_log_observer.py +56 -64
- pipecat/observers/loggers/llm_log_observer.py +8 -1
- pipecat/observers/loggers/transcription_log_observer.py +19 -7
- pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
- pipecat/observers/turn_tracking_observer.py +26 -1
- pipecat/pipeline/base_pipeline.py +5 -7
- pipecat/pipeline/base_task.py +52 -9
- pipecat/pipeline/parallel_pipeline.py +121 -177
- pipecat/pipeline/pipeline.py +129 -20
- pipecat/pipeline/runner.py +50 -1
- pipecat/pipeline/sync_parallel_pipeline.py +132 -32
- pipecat/pipeline/task.py +263 -280
- pipecat/pipeline/task_observer.py +85 -34
- pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
- pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
- pipecat/processors/aggregators/gated.py +25 -24
- pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
- pipecat/processors/aggregators/llm_response.py +398 -89
- pipecat/processors/aggregators/openai_llm_context.py +161 -13
- pipecat/processors/aggregators/sentence.py +25 -14
- pipecat/processors/aggregators/user_response.py +28 -3
- pipecat/processors/aggregators/vision_image_frame.py +24 -14
- pipecat/processors/async_generator.py +28 -0
- pipecat/processors/audio/audio_buffer_processor.py +78 -37
- pipecat/processors/consumer_processor.py +25 -6
- pipecat/processors/filters/frame_filter.py +23 -0
- pipecat/processors/filters/function_filter.py +30 -0
- pipecat/processors/filters/identity_filter.py +17 -2
- pipecat/processors/filters/null_filter.py +24 -1
- pipecat/processors/filters/stt_mute_filter.py +56 -21
- pipecat/processors/filters/wake_check_filter.py +46 -3
- pipecat/processors/filters/wake_notifier_filter.py +21 -3
- pipecat/processors/frame_processor.py +488 -131
- pipecat/processors/frameworks/langchain.py +38 -3
- pipecat/processors/frameworks/rtvi.py +719 -34
- pipecat/processors/gstreamer/pipeline_source.py +41 -0
- pipecat/processors/idle_frame_processor.py +26 -3
- pipecat/processors/logger.py +23 -0
- pipecat/processors/metrics/frame_processor_metrics.py +77 -4
- pipecat/processors/metrics/sentry.py +42 -4
- pipecat/processors/producer_processor.py +34 -14
- pipecat/processors/text_transformer.py +22 -10
- pipecat/processors/transcript_processor.py +48 -29
- pipecat/processors/user_idle_processor.py +31 -21
- pipecat/runner/__init__.py +1 -0
- pipecat/runner/daily.py +132 -0
- pipecat/runner/livekit.py +148 -0
- pipecat/runner/run.py +543 -0
- pipecat/runner/types.py +67 -0
- pipecat/runner/utils.py +515 -0
- pipecat/serializers/base_serializer.py +42 -0
- pipecat/serializers/exotel.py +17 -6
- pipecat/serializers/genesys.py +95 -0
- pipecat/serializers/livekit.py +33 -0
- pipecat/serializers/plivo.py +16 -15
- pipecat/serializers/protobuf.py +37 -1
- pipecat/serializers/telnyx.py +18 -17
- pipecat/serializers/twilio.py +32 -16
- pipecat/services/ai_service.py +5 -3
- pipecat/services/anthropic/llm.py +113 -43
- pipecat/services/assemblyai/models.py +63 -5
- pipecat/services/assemblyai/stt.py +64 -11
- pipecat/services/asyncai/__init__.py +0 -0
- pipecat/services/asyncai/tts.py +501 -0
- pipecat/services/aws/llm.py +185 -111
- pipecat/services/aws/stt.py +217 -23
- pipecat/services/aws/tts.py +118 -52
- pipecat/services/aws/utils.py +101 -5
- pipecat/services/aws_nova_sonic/aws.py +82 -64
- pipecat/services/aws_nova_sonic/context.py +15 -6
- pipecat/services/azure/common.py +10 -2
- pipecat/services/azure/image.py +32 -0
- pipecat/services/azure/llm.py +9 -7
- pipecat/services/azure/stt.py +65 -2
- pipecat/services/azure/tts.py +154 -23
- pipecat/services/cartesia/stt.py +125 -8
- pipecat/services/cartesia/tts.py +102 -38
- pipecat/services/cerebras/llm.py +15 -23
- pipecat/services/deepgram/stt.py +19 -11
- pipecat/services/deepgram/tts.py +36 -0
- pipecat/services/deepseek/llm.py +14 -23
- pipecat/services/elevenlabs/tts.py +330 -64
- pipecat/services/fal/image.py +43 -0
- pipecat/services/fal/stt.py +48 -10
- pipecat/services/fireworks/llm.py +14 -21
- pipecat/services/fish/tts.py +109 -9
- pipecat/services/gemini_multimodal_live/__init__.py +1 -0
- pipecat/services/gemini_multimodal_live/events.py +83 -2
- pipecat/services/gemini_multimodal_live/file_api.py +189 -0
- pipecat/services/gemini_multimodal_live/gemini.py +218 -21
- pipecat/services/gladia/config.py +17 -10
- pipecat/services/gladia/stt.py +82 -36
- pipecat/services/google/frames.py +40 -0
- pipecat/services/google/google.py +2 -0
- pipecat/services/google/image.py +39 -2
- pipecat/services/google/llm.py +176 -58
- pipecat/services/google/llm_openai.py +26 -4
- pipecat/services/google/llm_vertex.py +37 -15
- pipecat/services/google/rtvi.py +41 -0
- pipecat/services/google/stt.py +65 -17
- pipecat/services/google/test-google-chirp.py +45 -0
- pipecat/services/google/tts.py +390 -19
- pipecat/services/grok/llm.py +8 -6
- pipecat/services/groq/llm.py +8 -6
- pipecat/services/groq/stt.py +13 -9
- pipecat/services/groq/tts.py +40 -0
- pipecat/services/hamsa/__init__.py +9 -0
- pipecat/services/hamsa/stt.py +241 -0
- pipecat/services/heygen/__init__.py +5 -0
- pipecat/services/heygen/api.py +281 -0
- pipecat/services/heygen/client.py +620 -0
- pipecat/services/heygen/video.py +338 -0
- pipecat/services/image_service.py +5 -3
- pipecat/services/inworld/__init__.py +1 -0
- pipecat/services/inworld/tts.py +592 -0
- pipecat/services/llm_service.py +127 -45
- pipecat/services/lmnt/tts.py +80 -7
- pipecat/services/mcp_service.py +85 -44
- pipecat/services/mem0/memory.py +42 -13
- pipecat/services/minimax/tts.py +74 -15
- pipecat/services/mistral/__init__.py +0 -0
- pipecat/services/mistral/llm.py +185 -0
- pipecat/services/moondream/vision.py +55 -10
- pipecat/services/neuphonic/tts.py +275 -48
- pipecat/services/nim/llm.py +8 -6
- pipecat/services/ollama/llm.py +27 -7
- pipecat/services/openai/base_llm.py +54 -16
- pipecat/services/openai/image.py +30 -0
- pipecat/services/openai/llm.py +7 -5
- pipecat/services/openai/stt.py +13 -9
- pipecat/services/openai/tts.py +42 -10
- pipecat/services/openai_realtime_beta/azure.py +11 -9
- pipecat/services/openai_realtime_beta/context.py +7 -5
- pipecat/services/openai_realtime_beta/events.py +10 -7
- pipecat/services/openai_realtime_beta/openai.py +37 -18
- pipecat/services/openpipe/llm.py +30 -24
- pipecat/services/openrouter/llm.py +9 -7
- pipecat/services/perplexity/llm.py +15 -19
- pipecat/services/piper/tts.py +26 -12
- pipecat/services/playht/tts.py +227 -65
- pipecat/services/qwen/llm.py +8 -6
- pipecat/services/rime/tts.py +128 -17
- pipecat/services/riva/stt.py +160 -22
- pipecat/services/riva/tts.py +67 -2
- pipecat/services/sambanova/llm.py +19 -17
- pipecat/services/sambanova/stt.py +14 -8
- pipecat/services/sarvam/tts.py +60 -13
- pipecat/services/simli/video.py +82 -21
- pipecat/services/soniox/__init__.py +0 -0
- pipecat/services/soniox/stt.py +398 -0
- pipecat/services/speechmatics/stt.py +29 -17
- pipecat/services/stt_service.py +47 -11
- pipecat/services/tavus/video.py +94 -25
- pipecat/services/together/llm.py +8 -6
- pipecat/services/tts_service.py +77 -53
- pipecat/services/ultravox/stt.py +46 -43
- pipecat/services/vision_service.py +5 -3
- pipecat/services/websocket_service.py +12 -11
- pipecat/services/whisper/base_stt.py +58 -12
- pipecat/services/whisper/stt.py +69 -58
- pipecat/services/xtts/tts.py +59 -2
- pipecat/sync/base_notifier.py +19 -0
- pipecat/sync/event_notifier.py +24 -0
- pipecat/tests/utils.py +73 -5
- pipecat/transcriptions/language.py +24 -0
- pipecat/transports/base_input.py +112 -8
- pipecat/transports/base_output.py +235 -13
- pipecat/transports/base_transport.py +119 -0
- pipecat/transports/local/audio.py +76 -0
- pipecat/transports/local/tk.py +84 -0
- pipecat/transports/network/fastapi_websocket.py +174 -15
- pipecat/transports/network/small_webrtc.py +383 -39
- pipecat/transports/network/webrtc_connection.py +214 -8
- pipecat/transports/network/websocket_client.py +171 -1
- pipecat/transports/network/websocket_server.py +147 -9
- pipecat/transports/services/daily.py +792 -70
- pipecat/transports/services/helpers/daily_rest.py +122 -129
- pipecat/transports/services/livekit.py +339 -4
- pipecat/transports/services/tavus.py +273 -38
- pipecat/utils/asyncio/task_manager.py +92 -186
- pipecat/utils/base_object.py +83 -1
- pipecat/utils/network.py +2 -0
- pipecat/utils/string.py +114 -58
- pipecat/utils/text/base_text_aggregator.py +44 -13
- pipecat/utils/text/base_text_filter.py +46 -0
- pipecat/utils/text/markdown_text_filter.py +70 -14
- pipecat/utils/text/pattern_pair_aggregator.py +18 -14
- pipecat/utils/text/simple_text_aggregator.py +43 -2
- pipecat/utils/text/skip_tags_aggregator.py +21 -13
- pipecat/utils/time.py +36 -0
- pipecat/utils/tracing/class_decorators.py +32 -7
- pipecat/utils/tracing/conversation_context_provider.py +12 -2
- pipecat/utils/tracing/service_attributes.py +80 -64
- pipecat/utils/tracing/service_decorators.py +48 -21
- pipecat/utils/tracing/setup.py +13 -7
- pipecat/utils/tracing/turn_context_provider.py +12 -2
- pipecat/utils/tracing/turn_trace_observer.py +27 -0
- pipecat/utils/utils.py +14 -14
- dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
- pipecat/examples/daily_runner.py +0 -64
- pipecat/examples/run.py +0 -265
- pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
- pipecat/utils/asyncio/watchdog_event.py +0 -42
- pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
- pipecat/utils/asyncio/watchdog_queue.py +0 -48
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
- /pipecat/{examples → extensions}/__init__.py +0 -0
|
@@ -3,6 +3,9 @@
|
|
|
3
3
|
#
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
|
+
|
|
7
|
+
"""OpenAI Realtime LLM adapter for Pipecat."""
|
|
8
|
+
|
|
6
9
|
from typing import Any, Dict, List, Union
|
|
7
10
|
|
|
8
11
|
from pipecat.adapters.base_llm_adapter import BaseLLMAdapter
|
|
@@ -11,8 +14,22 @@ from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
|
|
11
14
|
|
|
12
15
|
|
|
13
16
|
class OpenAIRealtimeLLMAdapter(BaseLLMAdapter):
|
|
17
|
+
"""LLM adapter for OpenAI Realtime API function calling.
|
|
18
|
+
|
|
19
|
+
Converts Pipecat's tool schemas into the specific format required by
|
|
20
|
+
OpenAI's Realtime API for function calling capabilities.
|
|
21
|
+
"""
|
|
22
|
+
|
|
14
23
|
@staticmethod
|
|
15
24
|
def _to_openai_realtime_function_format(function: FunctionSchema) -> Dict[str, Any]:
|
|
25
|
+
"""Convert a function schema to OpenAI Realtime format.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
function: The function schema to convert.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
Dictionary in OpenAI Realtime function format.
|
|
32
|
+
"""
|
|
16
33
|
return {
|
|
17
34
|
"type": "function",
|
|
18
35
|
"name": function.name,
|
|
@@ -25,10 +42,13 @@ class OpenAIRealtimeLLMAdapter(BaseLLMAdapter):
|
|
|
25
42
|
}
|
|
26
43
|
|
|
27
44
|
def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Dict[str, Any]]:
|
|
28
|
-
"""
|
|
45
|
+
"""Convert tool schemas to OpenAI Realtime function-calling format.
|
|
29
46
|
|
|
30
|
-
:
|
|
31
|
-
|
|
47
|
+
Args:
|
|
48
|
+
tools_schema: The tools schema containing functions to convert.
|
|
32
49
|
|
|
50
|
+
Returns:
|
|
51
|
+
List of function definitions in OpenAI Realtime format.
|
|
52
|
+
"""
|
|
33
53
|
functions_schema = tools_schema.standard_tools
|
|
34
54
|
return [self._to_openai_realtime_function_format(func) for func in functions_schema]
|
|
@@ -4,44 +4,68 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""Base audio filter interface for input transport audio processing.
|
|
8
|
+
|
|
9
|
+
This module provides the abstract base class for implementing audio filters
|
|
10
|
+
that process audio data before VAD and downstream processing in input transports.
|
|
11
|
+
"""
|
|
12
|
+
|
|
7
13
|
from abc import ABC, abstractmethod
|
|
8
14
|
|
|
9
15
|
from pipecat.frames.frames import FilterControlFrame
|
|
10
16
|
|
|
11
17
|
|
|
12
18
|
class BaseAudioFilter(ABC):
|
|
13
|
-
"""
|
|
19
|
+
"""Base class for input transport audio filters.
|
|
20
|
+
|
|
21
|
+
This is a base class for input transport audio filters. If an audio
|
|
14
22
|
filter is provided to the input transport it will be used to process audio
|
|
15
23
|
before VAD and before pushing it downstream. There are control frames to
|
|
16
24
|
update filter settings or to enable or disable the filter at runtime.
|
|
17
|
-
|
|
18
25
|
"""
|
|
19
26
|
|
|
20
27
|
@abstractmethod
|
|
21
28
|
async def start(self, sample_rate: int):
|
|
22
|
-
"""
|
|
29
|
+
"""Initialize the filter when the input transport starts.
|
|
30
|
+
|
|
31
|
+
This will be called from the input transport when the transport is
|
|
23
32
|
started. It can be used to initialize the filter. The input transport
|
|
24
33
|
sample rate is provided so the filter can adjust to that sample rate.
|
|
25
34
|
|
|
35
|
+
Args:
|
|
36
|
+
sample_rate: The sample rate of the input transport in Hz.
|
|
26
37
|
"""
|
|
27
38
|
pass
|
|
28
39
|
|
|
29
40
|
@abstractmethod
|
|
30
41
|
async def stop(self):
|
|
31
|
-
"""
|
|
32
|
-
stopping.
|
|
42
|
+
"""Clean up the filter when the input transport stops.
|
|
33
43
|
|
|
44
|
+
This will be called from the input transport when the transport is
|
|
45
|
+
stopping.
|
|
34
46
|
"""
|
|
35
47
|
pass
|
|
36
48
|
|
|
37
49
|
@abstractmethod
|
|
38
50
|
async def process_frame(self, frame: FilterControlFrame):
|
|
39
|
-
"""
|
|
51
|
+
"""Process control frames for runtime filter configuration.
|
|
52
|
+
|
|
53
|
+
This will be called when the input transport receives a
|
|
40
54
|
FilterControlFrame.
|
|
41
55
|
|
|
56
|
+
Args:
|
|
57
|
+
frame: The control frame containing filter commands or settings.
|
|
42
58
|
"""
|
|
43
59
|
pass
|
|
44
60
|
|
|
45
61
|
@abstractmethod
|
|
46
62
|
async def filter(self, audio: bytes) -> bytes:
|
|
63
|
+
"""Apply the audio filter to the provided audio data.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
audio: Raw audio data as bytes to be filtered.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Filtered audio data as bytes.
|
|
70
|
+
"""
|
|
47
71
|
pass
|
|
@@ -4,6 +4,12 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""Koala noise suppression audio filter for Pipecat.
|
|
8
|
+
|
|
9
|
+
This module provides an audio filter implementation using PicoVoice's Koala
|
|
10
|
+
Noise Suppression engine to reduce background noise in audio streams.
|
|
11
|
+
"""
|
|
12
|
+
|
|
7
13
|
from typing import Sequence
|
|
8
14
|
|
|
9
15
|
import numpy as np
|
|
@@ -21,12 +27,19 @@ except ModuleNotFoundError as e:
|
|
|
21
27
|
|
|
22
28
|
|
|
23
29
|
class KoalaFilter(BaseAudioFilter):
|
|
24
|
-
"""
|
|
25
|
-
PicoVoice).
|
|
30
|
+
"""Audio filter using Koala Noise Suppression from PicoVoice.
|
|
26
31
|
|
|
32
|
+
Provides real-time noise suppression for audio streams using PicoVoice's
|
|
33
|
+
Koala engine. The filter buffers audio data to match Koala's required
|
|
34
|
+
frame length and processes it in chunks.
|
|
27
35
|
"""
|
|
28
36
|
|
|
29
37
|
def __init__(self, *, access_key: str) -> None:
|
|
38
|
+
"""Initialize the Koala noise suppression filter.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
access_key: PicoVoice access key for Koala engine authentication.
|
|
42
|
+
"""
|
|
30
43
|
self._access_key = access_key
|
|
31
44
|
|
|
32
45
|
self._filtering = True
|
|
@@ -36,6 +49,11 @@ class KoalaFilter(BaseAudioFilter):
|
|
|
36
49
|
self._audio_buffer = bytearray()
|
|
37
50
|
|
|
38
51
|
async def start(self, sample_rate: int):
|
|
52
|
+
"""Initialize the filter with the transport's sample rate.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
sample_rate: The sample rate of the input transport in Hz.
|
|
56
|
+
"""
|
|
39
57
|
self._sample_rate = sample_rate
|
|
40
58
|
if self._sample_rate != self._koala.sample_rate:
|
|
41
59
|
logger.warning(
|
|
@@ -44,13 +62,30 @@ class KoalaFilter(BaseAudioFilter):
|
|
|
44
62
|
self._koala_ready = False
|
|
45
63
|
|
|
46
64
|
async def stop(self):
|
|
65
|
+
"""Clean up the Koala engine when stopping."""
|
|
47
66
|
self._koala.reset()
|
|
48
67
|
|
|
49
68
|
async def process_frame(self, frame: FilterControlFrame):
|
|
69
|
+
"""Process control frames to enable/disable filtering.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
frame: The control frame containing filter commands.
|
|
73
|
+
"""
|
|
50
74
|
if isinstance(frame, FilterEnableFrame):
|
|
51
75
|
self._filtering = frame.enable
|
|
52
76
|
|
|
53
77
|
async def filter(self, audio: bytes) -> bytes:
|
|
78
|
+
"""Apply Koala noise suppression to audio data.
|
|
79
|
+
|
|
80
|
+
Buffers incoming audio and processes it in chunks that match Koala's
|
|
81
|
+
required frame length. Returns filtered audio data.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
audio: Raw audio data as bytes to be filtered.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
Noise-suppressed audio data as bytes.
|
|
88
|
+
"""
|
|
54
89
|
if not self._koala_ready or not self._filtering:
|
|
55
90
|
return audio
|
|
56
91
|
|
|
@@ -4,6 +4,12 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""Krisp noise reduction audio filter for Pipecat.
|
|
8
|
+
|
|
9
|
+
This module provides an audio filter implementation using Krisp's noise
|
|
10
|
+
reduction technology to suppress background noise in audio streams.
|
|
11
|
+
"""
|
|
12
|
+
|
|
7
13
|
import os
|
|
8
14
|
|
|
9
15
|
import numpy as np
|
|
@@ -21,14 +27,27 @@ except ModuleNotFoundError as e:
|
|
|
21
27
|
|
|
22
28
|
|
|
23
29
|
class KrispProcessorManager:
|
|
24
|
-
"""
|
|
25
|
-
|
|
30
|
+
"""Singleton manager for KrispAudioProcessor instances.
|
|
31
|
+
|
|
32
|
+
Ensures that only one KrispAudioProcessor instance exists for the entire
|
|
33
|
+
program.
|
|
26
34
|
"""
|
|
27
35
|
|
|
28
36
|
_krisp_instance = None
|
|
29
37
|
|
|
30
38
|
@classmethod
|
|
31
39
|
def get_processor(cls, sample_rate: int, sample_type: str, channels: int, model_path: str):
|
|
40
|
+
"""Get or create a KrispAudioProcessor instance.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
sample_rate: Audio sample rate in Hz.
|
|
44
|
+
sample_type: Audio sample type (e.g., "PCM_16").
|
|
45
|
+
channels: Number of audio channels.
|
|
46
|
+
model_path: Path to the Krisp model file.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
Shared KrispAudioProcessor instance.
|
|
50
|
+
"""
|
|
32
51
|
if cls._krisp_instance is None:
|
|
33
52
|
cls._krisp_instance = KrispAudioProcessor(
|
|
34
53
|
sample_rate, sample_type, channels, model_path
|
|
@@ -37,14 +56,26 @@ class KrispProcessorManager:
|
|
|
37
56
|
|
|
38
57
|
|
|
39
58
|
class KrispFilter(BaseAudioFilter):
|
|
59
|
+
"""Audio filter using Krisp noise reduction technology.
|
|
60
|
+
|
|
61
|
+
Provides real-time noise reduction for audio streams using Krisp's
|
|
62
|
+
proprietary noise suppression algorithms. Requires a Krisp model file
|
|
63
|
+
for operation.
|
|
64
|
+
"""
|
|
65
|
+
|
|
40
66
|
def __init__(
|
|
41
67
|
self, sample_type: str = "PCM_16", channels: int = 1, model_path: str = None
|
|
42
68
|
) -> None:
|
|
43
|
-
"""
|
|
69
|
+
"""Initialize the Krisp noise reduction filter.
|
|
44
70
|
|
|
45
|
-
:
|
|
46
|
-
|
|
47
|
-
|
|
71
|
+
Args:
|
|
72
|
+
sample_type: The audio sample format. Defaults to "PCM_16".
|
|
73
|
+
channels: Number of audio channels. Defaults to 1.
|
|
74
|
+
model_path: Path to the Krisp model file. If None, uses KRISP_MODEL_PATH
|
|
75
|
+
environment variable.
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
ValueError: If model_path is not provided and KRISP_MODEL_PATH is not set.
|
|
48
79
|
"""
|
|
49
80
|
super().__init__()
|
|
50
81
|
|
|
@@ -63,19 +94,41 @@ class KrispFilter(BaseAudioFilter):
|
|
|
63
94
|
self._krisp_processor = None
|
|
64
95
|
|
|
65
96
|
async def start(self, sample_rate: int):
|
|
97
|
+
"""Initialize the Krisp processor with the transport's sample rate.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
sample_rate: The sample rate of the input transport in Hz.
|
|
101
|
+
"""
|
|
66
102
|
self._sample_rate = sample_rate
|
|
67
103
|
self._krisp_processor = KrispProcessorManager.get_processor(
|
|
68
104
|
self._sample_rate, self._sample_type, self._channels, self._model_path
|
|
69
105
|
)
|
|
70
106
|
|
|
71
107
|
async def stop(self):
|
|
108
|
+
"""Clean up the Krisp processor when stopping."""
|
|
72
109
|
self._krisp_processor = None
|
|
73
110
|
|
|
74
111
|
async def process_frame(self, frame: FilterControlFrame):
|
|
112
|
+
"""Process control frames to enable/disable filtering.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
frame: The control frame containing filter commands.
|
|
116
|
+
"""
|
|
75
117
|
if isinstance(frame, FilterEnableFrame):
|
|
76
118
|
self._filtering = frame.enable
|
|
77
119
|
|
|
78
120
|
async def filter(self, audio: bytes) -> bytes:
|
|
121
|
+
"""Apply Krisp noise reduction to audio data.
|
|
122
|
+
|
|
123
|
+
Converts audio to float32, applies Krisp noise reduction processing,
|
|
124
|
+
and returns the filtered audio clipped to int16 range.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
audio: Raw audio data as bytes to be filtered.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
Noise-reduced audio data as bytes.
|
|
131
|
+
"""
|
|
79
132
|
if not self._filtering:
|
|
80
133
|
return audio
|
|
81
134
|
|
|
@@ -4,6 +4,13 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""Noisereduce audio filter for Pipecat.
|
|
8
|
+
|
|
9
|
+
This module provides an audio filter implementation using the noisereduce
|
|
10
|
+
library to reduce background noise in audio streams through spectral
|
|
11
|
+
gating algorithms.
|
|
12
|
+
"""
|
|
13
|
+
|
|
7
14
|
import numpy as np
|
|
8
15
|
from loguru import logger
|
|
9
16
|
|
|
@@ -21,21 +28,51 @@ except ModuleNotFoundError as e:
|
|
|
21
28
|
|
|
22
29
|
|
|
23
30
|
class NoisereduceFilter(BaseAudioFilter):
|
|
31
|
+
"""Audio filter using the noisereduce library for noise suppression.
|
|
32
|
+
|
|
33
|
+
Applies spectral gating noise reduction algorithms to suppress background
|
|
34
|
+
noise in audio streams. Uses the noisereduce library's default noise
|
|
35
|
+
reduction parameters.
|
|
36
|
+
"""
|
|
37
|
+
|
|
24
38
|
def __init__(self) -> None:
|
|
39
|
+
"""Initialize the noisereduce filter."""
|
|
25
40
|
self._filtering = True
|
|
26
41
|
self._sample_rate = 0
|
|
27
42
|
|
|
28
43
|
async def start(self, sample_rate: int):
|
|
44
|
+
"""Initialize the filter with the transport's sample rate.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
sample_rate: The sample rate of the input transport in Hz.
|
|
48
|
+
"""
|
|
29
49
|
self._sample_rate = sample_rate
|
|
30
50
|
|
|
31
51
|
async def stop(self):
|
|
52
|
+
"""Clean up the filter when stopping."""
|
|
32
53
|
pass
|
|
33
54
|
|
|
34
55
|
async def process_frame(self, frame: FilterControlFrame):
|
|
56
|
+
"""Process control frames to enable/disable filtering.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
frame: The control frame containing filter commands.
|
|
60
|
+
"""
|
|
35
61
|
if isinstance(frame, FilterEnableFrame):
|
|
36
62
|
self._filtering = frame.enable
|
|
37
63
|
|
|
38
64
|
async def filter(self, audio: bytes) -> bytes:
|
|
65
|
+
"""Apply noise reduction to audio data using spectral gating.
|
|
66
|
+
|
|
67
|
+
Converts audio to float32, applies noisereduce processing, and returns
|
|
68
|
+
the filtered audio clipped to int16 range.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
audio: Raw audio data as bytes to be filtered.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Noise-reduced audio data as bytes.
|
|
75
|
+
"""
|
|
39
76
|
if not self._filtering:
|
|
40
77
|
return audio
|
|
41
78
|
|
|
@@ -4,31 +4,51 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""Base interruption strategy for determining when users can interrupt bot speech."""
|
|
8
|
+
|
|
7
9
|
from abc import ABC, abstractmethod
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
class BaseInterruptionStrategy(ABC):
|
|
11
|
-
"""
|
|
13
|
+
"""Base class for interruption strategies.
|
|
14
|
+
|
|
15
|
+
This is a base class for interruption strategies. Interruption strategies
|
|
12
16
|
decide when the user can interrupt the bot while the bot is speaking. For
|
|
13
17
|
example, there could be strategies based on audio volume or strategies based
|
|
14
18
|
on the number of words the user spoke.
|
|
15
|
-
|
|
16
19
|
"""
|
|
17
20
|
|
|
18
21
|
async def append_audio(self, audio: bytes, sample_rate: int):
|
|
19
|
-
"""
|
|
22
|
+
"""Append audio data to the strategy for analysis.
|
|
23
|
+
|
|
24
|
+
Not all strategies handle audio. Default implementation does nothing.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
audio: Raw audio bytes to append.
|
|
28
|
+
sample_rate: Sample rate of the audio data in Hz.
|
|
29
|
+
"""
|
|
20
30
|
pass
|
|
21
31
|
|
|
22
32
|
async def append_text(self, text: str):
|
|
23
|
-
"""
|
|
33
|
+
"""Append text data to the strategy for analysis.
|
|
34
|
+
|
|
35
|
+
Not all strategies handle text. Default implementation does nothing.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
text: Text string to append for analysis.
|
|
39
|
+
"""
|
|
24
40
|
pass
|
|
25
41
|
|
|
26
42
|
@abstractmethod
|
|
27
43
|
async def should_interrupt(self) -> bool:
|
|
28
|
-
"""
|
|
44
|
+
"""Determine if the user should interrupt the bot.
|
|
45
|
+
|
|
46
|
+
This is called when the user stops speaking and it's time to decide
|
|
29
47
|
whether the user should interrupt the bot. The decision will be based on
|
|
30
48
|
the aggregated audio and/or text.
|
|
31
49
|
|
|
50
|
+
Returns:
|
|
51
|
+
True if the user should interrupt the bot, False otherwise.
|
|
32
52
|
"""
|
|
33
53
|
pass
|
|
34
54
|
|
|
@@ -4,31 +4,47 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""Minimum words interruption strategy for word count-based interruptions."""
|
|
8
|
+
|
|
7
9
|
from loguru import logger
|
|
8
10
|
|
|
9
11
|
from pipecat.audio.interruptions.base_interruption_strategy import BaseInterruptionStrategy
|
|
10
12
|
|
|
11
13
|
|
|
12
14
|
class MinWordsInterruptionStrategy(BaseInterruptionStrategy):
|
|
13
|
-
"""
|
|
15
|
+
"""Interruption strategy based on minimum number of words spoken.
|
|
16
|
+
|
|
17
|
+
This is an interruption strategy based on a minimum number of words said
|
|
14
18
|
by the user. That is, the strategy will be true if the user has said at
|
|
15
19
|
least that amount of words.
|
|
16
|
-
|
|
17
20
|
"""
|
|
18
21
|
|
|
19
22
|
def __init__(self, *, min_words: int):
|
|
23
|
+
"""Initialize the minimum words interruption strategy.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
min_words: Minimum number of words required to trigger an interruption.
|
|
27
|
+
"""
|
|
20
28
|
super().__init__()
|
|
21
29
|
self._min_words = min_words
|
|
22
30
|
self._text = ""
|
|
23
31
|
|
|
24
32
|
async def append_text(self, text: str):
|
|
25
|
-
"""
|
|
26
|
-
|
|
33
|
+
"""Append text for word count analysis.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
text: Text string to append to the accumulated text.
|
|
27
37
|
|
|
38
|
+
Note: Not all strategies need to handle text.
|
|
28
39
|
"""
|
|
29
40
|
self._text += text
|
|
30
41
|
|
|
31
42
|
async def should_interrupt(self) -> bool:
|
|
43
|
+
"""Check if the minimum word count has been reached.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
True if the user has spoken at least the minimum number of words.
|
|
47
|
+
"""
|
|
32
48
|
word_count = len(self._text.split())
|
|
33
49
|
interrupt = word_count >= self._min_words
|
|
34
50
|
logger.debug(
|
|
@@ -37,4 +53,5 @@ class MinWordsInterruptionStrategy(BaseInterruptionStrategy):
|
|
|
37
53
|
return interrupt
|
|
38
54
|
|
|
39
55
|
async def reset(self):
|
|
56
|
+
"""Reset the accumulated text for the next analysis cycle."""
|
|
40
57
|
self._text = ""
|
|
@@ -4,50 +4,73 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""Base audio mixer for output transport integration.
|
|
8
|
+
|
|
9
|
+
Provides the abstract base class for audio mixers that can be integrated with
|
|
10
|
+
output transports to mix incoming audio with generated audio from the mixer.
|
|
11
|
+
"""
|
|
12
|
+
|
|
7
13
|
from abc import ABC, abstractmethod
|
|
8
14
|
|
|
9
15
|
from pipecat.frames.frames import MixerControlFrame
|
|
10
16
|
|
|
11
17
|
|
|
12
18
|
class BaseAudioMixer(ABC):
|
|
13
|
-
"""
|
|
19
|
+
"""Base class for output transport audio mixers.
|
|
20
|
+
|
|
21
|
+
This is a base class for output transport audio mixers. If an audio mixer
|
|
14
22
|
is provided to the output transport it will be used to mix the audio frames
|
|
15
23
|
coming into to the transport with the audio generated from the mixer. There
|
|
16
24
|
are control frames to update mixer settings or to enable or disable the
|
|
17
25
|
mixer at runtime.
|
|
18
|
-
|
|
19
26
|
"""
|
|
20
27
|
|
|
21
28
|
@abstractmethod
|
|
22
29
|
async def start(self, sample_rate: int):
|
|
23
|
-
"""
|
|
30
|
+
"""Initialize the mixer when the output transport starts.
|
|
31
|
+
|
|
32
|
+
This will be called from the output transport when the transport is
|
|
24
33
|
started. It can be used to initialize the mixer. The output transport
|
|
25
34
|
sample rate is provided so the mixer can adjust to that sample rate.
|
|
26
35
|
|
|
36
|
+
Args:
|
|
37
|
+
sample_rate: The sample rate of the output transport in Hz.
|
|
27
38
|
"""
|
|
28
39
|
pass
|
|
29
40
|
|
|
30
41
|
@abstractmethod
|
|
31
42
|
async def stop(self):
|
|
32
|
-
"""
|
|
33
|
-
stopping.
|
|
43
|
+
"""Clean up the mixer when the output transport stops.
|
|
34
44
|
|
|
45
|
+
This will be called from the output transport when the transport is
|
|
46
|
+
stopping.
|
|
35
47
|
"""
|
|
36
48
|
pass
|
|
37
49
|
|
|
38
50
|
@abstractmethod
|
|
39
51
|
async def process_frame(self, frame: MixerControlFrame):
|
|
40
|
-
"""
|
|
52
|
+
"""Process mixer control frames from the transport.
|
|
53
|
+
|
|
54
|
+
This will be called when the output transport receives a
|
|
41
55
|
MixerControlFrame.
|
|
42
56
|
|
|
57
|
+
Args:
|
|
58
|
+
frame: The mixer control frame to process.
|
|
43
59
|
"""
|
|
44
60
|
pass
|
|
45
61
|
|
|
46
62
|
@abstractmethod
|
|
47
63
|
async def mix(self, audio: bytes) -> bytes:
|
|
48
|
-
"""
|
|
64
|
+
"""Mix transport audio with mixer-generated audio.
|
|
65
|
+
|
|
66
|
+
This is called with the audio that is about to be sent from the
|
|
49
67
|
output transport and that should be mixed with the mixer audio if the
|
|
50
68
|
mixer is enabled.
|
|
51
69
|
|
|
70
|
+
Args:
|
|
71
|
+
audio: Raw audio bytes from the transport to mix.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Mixed audio bytes combining transport and mixer audio.
|
|
52
75
|
"""
|
|
53
76
|
pass
|