dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show
  1. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
  2. dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
  3. pipecat/__init__.py +17 -0
  4. pipecat/adapters/base_llm_adapter.py +36 -1
  5. pipecat/adapters/schemas/direct_function.py +296 -0
  6. pipecat/adapters/schemas/function_schema.py +15 -6
  7. pipecat/adapters/schemas/tools_schema.py +55 -7
  8. pipecat/adapters/services/anthropic_adapter.py +22 -3
  9. pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
  10. pipecat/adapters/services/bedrock_adapter.py +22 -3
  11. pipecat/adapters/services/gemini_adapter.py +16 -3
  12. pipecat/adapters/services/open_ai_adapter.py +17 -2
  13. pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
  14. pipecat/audio/filters/base_audio_filter.py +30 -6
  15. pipecat/audio/filters/koala_filter.py +37 -2
  16. pipecat/audio/filters/krisp_filter.py +59 -6
  17. pipecat/audio/filters/noisereduce_filter.py +37 -0
  18. pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
  19. pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
  20. pipecat/audio/mixers/base_audio_mixer.py +30 -7
  21. pipecat/audio/mixers/soundfile_mixer.py +53 -6
  22. pipecat/audio/resamplers/base_audio_resampler.py +17 -9
  23. pipecat/audio/resamplers/resampy_resampler.py +26 -1
  24. pipecat/audio/resamplers/soxr_resampler.py +32 -1
  25. pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
  26. pipecat/audio/utils.py +194 -1
  27. pipecat/audio/vad/silero.py +60 -3
  28. pipecat/audio/vad/vad_analyzer.py +114 -30
  29. pipecat/clocks/base_clock.py +19 -0
  30. pipecat/clocks/system_clock.py +25 -0
  31. pipecat/extensions/voicemail/__init__.py +0 -0
  32. pipecat/extensions/voicemail/voicemail_detector.py +707 -0
  33. pipecat/frames/frames.py +590 -156
  34. pipecat/metrics/metrics.py +64 -1
  35. pipecat/observers/base_observer.py +58 -19
  36. pipecat/observers/loggers/debug_log_observer.py +56 -64
  37. pipecat/observers/loggers/llm_log_observer.py +8 -1
  38. pipecat/observers/loggers/transcription_log_observer.py +19 -7
  39. pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
  40. pipecat/observers/turn_tracking_observer.py +26 -1
  41. pipecat/pipeline/base_pipeline.py +5 -7
  42. pipecat/pipeline/base_task.py +52 -9
  43. pipecat/pipeline/parallel_pipeline.py +121 -177
  44. pipecat/pipeline/pipeline.py +129 -20
  45. pipecat/pipeline/runner.py +50 -1
  46. pipecat/pipeline/sync_parallel_pipeline.py +132 -32
  47. pipecat/pipeline/task.py +263 -280
  48. pipecat/pipeline/task_observer.py +85 -34
  49. pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
  50. pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
  51. pipecat/processors/aggregators/gated.py +25 -24
  52. pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
  53. pipecat/processors/aggregators/llm_response.py +398 -89
  54. pipecat/processors/aggregators/openai_llm_context.py +161 -13
  55. pipecat/processors/aggregators/sentence.py +25 -14
  56. pipecat/processors/aggregators/user_response.py +28 -3
  57. pipecat/processors/aggregators/vision_image_frame.py +24 -14
  58. pipecat/processors/async_generator.py +28 -0
  59. pipecat/processors/audio/audio_buffer_processor.py +78 -37
  60. pipecat/processors/consumer_processor.py +25 -6
  61. pipecat/processors/filters/frame_filter.py +23 -0
  62. pipecat/processors/filters/function_filter.py +30 -0
  63. pipecat/processors/filters/identity_filter.py +17 -2
  64. pipecat/processors/filters/null_filter.py +24 -1
  65. pipecat/processors/filters/stt_mute_filter.py +56 -21
  66. pipecat/processors/filters/wake_check_filter.py +46 -3
  67. pipecat/processors/filters/wake_notifier_filter.py +21 -3
  68. pipecat/processors/frame_processor.py +488 -131
  69. pipecat/processors/frameworks/langchain.py +38 -3
  70. pipecat/processors/frameworks/rtvi.py +719 -34
  71. pipecat/processors/gstreamer/pipeline_source.py +41 -0
  72. pipecat/processors/idle_frame_processor.py +26 -3
  73. pipecat/processors/logger.py +23 -0
  74. pipecat/processors/metrics/frame_processor_metrics.py +77 -4
  75. pipecat/processors/metrics/sentry.py +42 -4
  76. pipecat/processors/producer_processor.py +34 -14
  77. pipecat/processors/text_transformer.py +22 -10
  78. pipecat/processors/transcript_processor.py +48 -29
  79. pipecat/processors/user_idle_processor.py +31 -21
  80. pipecat/runner/__init__.py +1 -0
  81. pipecat/runner/daily.py +132 -0
  82. pipecat/runner/livekit.py +148 -0
  83. pipecat/runner/run.py +543 -0
  84. pipecat/runner/types.py +67 -0
  85. pipecat/runner/utils.py +515 -0
  86. pipecat/serializers/base_serializer.py +42 -0
  87. pipecat/serializers/exotel.py +17 -6
  88. pipecat/serializers/genesys.py +95 -0
  89. pipecat/serializers/livekit.py +33 -0
  90. pipecat/serializers/plivo.py +16 -15
  91. pipecat/serializers/protobuf.py +37 -1
  92. pipecat/serializers/telnyx.py +18 -17
  93. pipecat/serializers/twilio.py +32 -16
  94. pipecat/services/ai_service.py +5 -3
  95. pipecat/services/anthropic/llm.py +113 -43
  96. pipecat/services/assemblyai/models.py +63 -5
  97. pipecat/services/assemblyai/stt.py +64 -11
  98. pipecat/services/asyncai/__init__.py +0 -0
  99. pipecat/services/asyncai/tts.py +501 -0
  100. pipecat/services/aws/llm.py +185 -111
  101. pipecat/services/aws/stt.py +217 -23
  102. pipecat/services/aws/tts.py +118 -52
  103. pipecat/services/aws/utils.py +101 -5
  104. pipecat/services/aws_nova_sonic/aws.py +82 -64
  105. pipecat/services/aws_nova_sonic/context.py +15 -6
  106. pipecat/services/azure/common.py +10 -2
  107. pipecat/services/azure/image.py +32 -0
  108. pipecat/services/azure/llm.py +9 -7
  109. pipecat/services/azure/stt.py +65 -2
  110. pipecat/services/azure/tts.py +154 -23
  111. pipecat/services/cartesia/stt.py +125 -8
  112. pipecat/services/cartesia/tts.py +102 -38
  113. pipecat/services/cerebras/llm.py +15 -23
  114. pipecat/services/deepgram/stt.py +19 -11
  115. pipecat/services/deepgram/tts.py +36 -0
  116. pipecat/services/deepseek/llm.py +14 -23
  117. pipecat/services/elevenlabs/tts.py +330 -64
  118. pipecat/services/fal/image.py +43 -0
  119. pipecat/services/fal/stt.py +48 -10
  120. pipecat/services/fireworks/llm.py +14 -21
  121. pipecat/services/fish/tts.py +109 -9
  122. pipecat/services/gemini_multimodal_live/__init__.py +1 -0
  123. pipecat/services/gemini_multimodal_live/events.py +83 -2
  124. pipecat/services/gemini_multimodal_live/file_api.py +189 -0
  125. pipecat/services/gemini_multimodal_live/gemini.py +218 -21
  126. pipecat/services/gladia/config.py +17 -10
  127. pipecat/services/gladia/stt.py +82 -36
  128. pipecat/services/google/frames.py +40 -0
  129. pipecat/services/google/google.py +2 -0
  130. pipecat/services/google/image.py +39 -2
  131. pipecat/services/google/llm.py +176 -58
  132. pipecat/services/google/llm_openai.py +26 -4
  133. pipecat/services/google/llm_vertex.py +37 -15
  134. pipecat/services/google/rtvi.py +41 -0
  135. pipecat/services/google/stt.py +65 -17
  136. pipecat/services/google/test-google-chirp.py +45 -0
  137. pipecat/services/google/tts.py +390 -19
  138. pipecat/services/grok/llm.py +8 -6
  139. pipecat/services/groq/llm.py +8 -6
  140. pipecat/services/groq/stt.py +13 -9
  141. pipecat/services/groq/tts.py +40 -0
  142. pipecat/services/hamsa/__init__.py +9 -0
  143. pipecat/services/hamsa/stt.py +241 -0
  144. pipecat/services/heygen/__init__.py +5 -0
  145. pipecat/services/heygen/api.py +281 -0
  146. pipecat/services/heygen/client.py +620 -0
  147. pipecat/services/heygen/video.py +338 -0
  148. pipecat/services/image_service.py +5 -3
  149. pipecat/services/inworld/__init__.py +1 -0
  150. pipecat/services/inworld/tts.py +592 -0
  151. pipecat/services/llm_service.py +127 -45
  152. pipecat/services/lmnt/tts.py +80 -7
  153. pipecat/services/mcp_service.py +85 -44
  154. pipecat/services/mem0/memory.py +42 -13
  155. pipecat/services/minimax/tts.py +74 -15
  156. pipecat/services/mistral/__init__.py +0 -0
  157. pipecat/services/mistral/llm.py +185 -0
  158. pipecat/services/moondream/vision.py +55 -10
  159. pipecat/services/neuphonic/tts.py +275 -48
  160. pipecat/services/nim/llm.py +8 -6
  161. pipecat/services/ollama/llm.py +27 -7
  162. pipecat/services/openai/base_llm.py +54 -16
  163. pipecat/services/openai/image.py +30 -0
  164. pipecat/services/openai/llm.py +7 -5
  165. pipecat/services/openai/stt.py +13 -9
  166. pipecat/services/openai/tts.py +42 -10
  167. pipecat/services/openai_realtime_beta/azure.py +11 -9
  168. pipecat/services/openai_realtime_beta/context.py +7 -5
  169. pipecat/services/openai_realtime_beta/events.py +10 -7
  170. pipecat/services/openai_realtime_beta/openai.py +37 -18
  171. pipecat/services/openpipe/llm.py +30 -24
  172. pipecat/services/openrouter/llm.py +9 -7
  173. pipecat/services/perplexity/llm.py +15 -19
  174. pipecat/services/piper/tts.py +26 -12
  175. pipecat/services/playht/tts.py +227 -65
  176. pipecat/services/qwen/llm.py +8 -6
  177. pipecat/services/rime/tts.py +128 -17
  178. pipecat/services/riva/stt.py +160 -22
  179. pipecat/services/riva/tts.py +67 -2
  180. pipecat/services/sambanova/llm.py +19 -17
  181. pipecat/services/sambanova/stt.py +14 -8
  182. pipecat/services/sarvam/tts.py +60 -13
  183. pipecat/services/simli/video.py +82 -21
  184. pipecat/services/soniox/__init__.py +0 -0
  185. pipecat/services/soniox/stt.py +398 -0
  186. pipecat/services/speechmatics/stt.py +29 -17
  187. pipecat/services/stt_service.py +47 -11
  188. pipecat/services/tavus/video.py +94 -25
  189. pipecat/services/together/llm.py +8 -6
  190. pipecat/services/tts_service.py +77 -53
  191. pipecat/services/ultravox/stt.py +46 -43
  192. pipecat/services/vision_service.py +5 -3
  193. pipecat/services/websocket_service.py +12 -11
  194. pipecat/services/whisper/base_stt.py +58 -12
  195. pipecat/services/whisper/stt.py +69 -58
  196. pipecat/services/xtts/tts.py +59 -2
  197. pipecat/sync/base_notifier.py +19 -0
  198. pipecat/sync/event_notifier.py +24 -0
  199. pipecat/tests/utils.py +73 -5
  200. pipecat/transcriptions/language.py +24 -0
  201. pipecat/transports/base_input.py +112 -8
  202. pipecat/transports/base_output.py +235 -13
  203. pipecat/transports/base_transport.py +119 -0
  204. pipecat/transports/local/audio.py +76 -0
  205. pipecat/transports/local/tk.py +84 -0
  206. pipecat/transports/network/fastapi_websocket.py +174 -15
  207. pipecat/transports/network/small_webrtc.py +383 -39
  208. pipecat/transports/network/webrtc_connection.py +214 -8
  209. pipecat/transports/network/websocket_client.py +171 -1
  210. pipecat/transports/network/websocket_server.py +147 -9
  211. pipecat/transports/services/daily.py +792 -70
  212. pipecat/transports/services/helpers/daily_rest.py +122 -129
  213. pipecat/transports/services/livekit.py +339 -4
  214. pipecat/transports/services/tavus.py +273 -38
  215. pipecat/utils/asyncio/task_manager.py +92 -186
  216. pipecat/utils/base_object.py +83 -1
  217. pipecat/utils/network.py +2 -0
  218. pipecat/utils/string.py +114 -58
  219. pipecat/utils/text/base_text_aggregator.py +44 -13
  220. pipecat/utils/text/base_text_filter.py +46 -0
  221. pipecat/utils/text/markdown_text_filter.py +70 -14
  222. pipecat/utils/text/pattern_pair_aggregator.py +18 -14
  223. pipecat/utils/text/simple_text_aggregator.py +43 -2
  224. pipecat/utils/text/skip_tags_aggregator.py +21 -13
  225. pipecat/utils/time.py +36 -0
  226. pipecat/utils/tracing/class_decorators.py +32 -7
  227. pipecat/utils/tracing/conversation_context_provider.py +12 -2
  228. pipecat/utils/tracing/service_attributes.py +80 -64
  229. pipecat/utils/tracing/service_decorators.py +48 -21
  230. pipecat/utils/tracing/setup.py +13 -7
  231. pipecat/utils/tracing/turn_context_provider.py +12 -2
  232. pipecat/utils/tracing/turn_trace_observer.py +27 -0
  233. pipecat/utils/utils.py +14 -14
  234. dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
  235. pipecat/examples/daily_runner.py +0 -64
  236. pipecat/examples/run.py +0 -265
  237. pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
  238. pipecat/utils/asyncio/watchdog_event.py +0 -42
  239. pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
  240. pipecat/utils/asyncio/watchdog_queue.py +0 -48
  241. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
  242. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
  243. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
  244. /pipecat/{examples → extensions}/__init__.py +0 -0
@@ -3,6 +3,9 @@
3
3
  #
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
+
7
+ """OpenAI Realtime LLM adapter for Pipecat."""
8
+
6
9
  from typing import Any, Dict, List, Union
7
10
 
8
11
  from pipecat.adapters.base_llm_adapter import BaseLLMAdapter
@@ -11,8 +14,22 @@ from pipecat.adapters.schemas.tools_schema import ToolsSchema
11
14
 
12
15
 
13
16
  class OpenAIRealtimeLLMAdapter(BaseLLMAdapter):
17
+ """LLM adapter for OpenAI Realtime API function calling.
18
+
19
+ Converts Pipecat's tool schemas into the specific format required by
20
+ OpenAI's Realtime API for function calling capabilities.
21
+ """
22
+
14
23
  @staticmethod
15
24
  def _to_openai_realtime_function_format(function: FunctionSchema) -> Dict[str, Any]:
25
+ """Convert a function schema to OpenAI Realtime format.
26
+
27
+ Args:
28
+ function: The function schema to convert.
29
+
30
+ Returns:
31
+ Dictionary in OpenAI Realtime function format.
32
+ """
16
33
  return {
17
34
  "type": "function",
18
35
  "name": function.name,
@@ -25,10 +42,13 @@ class OpenAIRealtimeLLMAdapter(BaseLLMAdapter):
25
42
  }
26
43
 
27
44
  def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Dict[str, Any]]:
28
- """Converts function schemas to Openai Realtime function-calling format.
45
+ """Convert tool schemas to OpenAI Realtime function-calling format.
29
46
 
30
- :return: Openai Realtime formatted function call definition.
31
- """
47
+ Args:
48
+ tools_schema: The tools schema containing functions to convert.
32
49
 
50
+ Returns:
51
+ List of function definitions in OpenAI Realtime format.
52
+ """
33
53
  functions_schema = tools_schema.standard_tools
34
54
  return [self._to_openai_realtime_function_format(func) for func in functions_schema]
@@ -4,44 +4,68 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Base audio filter interface for input transport audio processing.
8
+
9
+ This module provides the abstract base class for implementing audio filters
10
+ that process audio data before VAD and downstream processing in input transports.
11
+ """
12
+
7
13
  from abc import ABC, abstractmethod
8
14
 
9
15
  from pipecat.frames.frames import FilterControlFrame
10
16
 
11
17
 
12
18
  class BaseAudioFilter(ABC):
13
- """This is a base class for input transport audio filters. If an audio
19
+ """Base class for input transport audio filters.
20
+
21
+ This is a base class for input transport audio filters. If an audio
14
22
  filter is provided to the input transport it will be used to process audio
15
23
  before VAD and before pushing it downstream. There are control frames to
16
24
  update filter settings or to enable or disable the filter at runtime.
17
-
18
25
  """
19
26
 
20
27
  @abstractmethod
21
28
  async def start(self, sample_rate: int):
22
- """This will be called from the input transport when the transport is
29
+ """Initialize the filter when the input transport starts.
30
+
31
+ This will be called from the input transport when the transport is
23
32
  started. It can be used to initialize the filter. The input transport
24
33
  sample rate is provided so the filter can adjust to that sample rate.
25
34
 
35
+ Args:
36
+ sample_rate: The sample rate of the input transport in Hz.
26
37
  """
27
38
  pass
28
39
 
29
40
  @abstractmethod
30
41
  async def stop(self):
31
- """This will be called from the input transport when the transport is
32
- stopping.
42
+ """Clean up the filter when the input transport stops.
33
43
 
44
+ This will be called from the input transport when the transport is
45
+ stopping.
34
46
  """
35
47
  pass
36
48
 
37
49
  @abstractmethod
38
50
  async def process_frame(self, frame: FilterControlFrame):
39
- """This will be called when the input transport receives a
51
+ """Process control frames for runtime filter configuration.
52
+
53
+ This will be called when the input transport receives a
40
54
  FilterControlFrame.
41
55
 
56
+ Args:
57
+ frame: The control frame containing filter commands or settings.
42
58
  """
43
59
  pass
44
60
 
45
61
  @abstractmethod
46
62
  async def filter(self, audio: bytes) -> bytes:
63
+ """Apply the audio filter to the provided audio data.
64
+
65
+ Args:
66
+ audio: Raw audio data as bytes to be filtered.
67
+
68
+ Returns:
69
+ Filtered audio data as bytes.
70
+ """
47
71
  pass
@@ -4,6 +4,12 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Koala noise suppression audio filter for Pipecat.
8
+
9
+ This module provides an audio filter implementation using PicoVoice's Koala
10
+ Noise Suppression engine to reduce background noise in audio streams.
11
+ """
12
+
7
13
  from typing import Sequence
8
14
 
9
15
  import numpy as np
@@ -21,12 +27,19 @@ except ModuleNotFoundError as e:
21
27
 
22
28
 
23
29
  class KoalaFilter(BaseAudioFilter):
24
- """This is an audio filter that uses Koala Noise Suppression (from
25
- PicoVoice).
30
+ """Audio filter using Koala Noise Suppression from PicoVoice.
26
31
 
32
+ Provides real-time noise suppression for audio streams using PicoVoice's
33
+ Koala engine. The filter buffers audio data to match Koala's required
34
+ frame length and processes it in chunks.
27
35
  """
28
36
 
29
37
  def __init__(self, *, access_key: str) -> None:
38
+ """Initialize the Koala noise suppression filter.
39
+
40
+ Args:
41
+ access_key: PicoVoice access key for Koala engine authentication.
42
+ """
30
43
  self._access_key = access_key
31
44
 
32
45
  self._filtering = True
@@ -36,6 +49,11 @@ class KoalaFilter(BaseAudioFilter):
36
49
  self._audio_buffer = bytearray()
37
50
 
38
51
  async def start(self, sample_rate: int):
52
+ """Initialize the filter with the transport's sample rate.
53
+
54
+ Args:
55
+ sample_rate: The sample rate of the input transport in Hz.
56
+ """
39
57
  self._sample_rate = sample_rate
40
58
  if self._sample_rate != self._koala.sample_rate:
41
59
  logger.warning(
@@ -44,13 +62,30 @@ class KoalaFilter(BaseAudioFilter):
44
62
  self._koala_ready = False
45
63
 
46
64
  async def stop(self):
65
+ """Clean up the Koala engine when stopping."""
47
66
  self._koala.reset()
48
67
 
49
68
  async def process_frame(self, frame: FilterControlFrame):
69
+ """Process control frames to enable/disable filtering.
70
+
71
+ Args:
72
+ frame: The control frame containing filter commands.
73
+ """
50
74
  if isinstance(frame, FilterEnableFrame):
51
75
  self._filtering = frame.enable
52
76
 
53
77
  async def filter(self, audio: bytes) -> bytes:
78
+ """Apply Koala noise suppression to audio data.
79
+
80
+ Buffers incoming audio and processes it in chunks that match Koala's
81
+ required frame length. Returns filtered audio data.
82
+
83
+ Args:
84
+ audio: Raw audio data as bytes to be filtered.
85
+
86
+ Returns:
87
+ Noise-suppressed audio data as bytes.
88
+ """
54
89
  if not self._koala_ready or not self._filtering:
55
90
  return audio
56
91
 
@@ -4,6 +4,12 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Krisp noise reduction audio filter for Pipecat.
8
+
9
+ This module provides an audio filter implementation using Krisp's noise
10
+ reduction technology to suppress background noise in audio streams.
11
+ """
12
+
7
13
  import os
8
14
 
9
15
  import numpy as np
@@ -21,14 +27,27 @@ except ModuleNotFoundError as e:
21
27
 
22
28
 
23
29
  class KrispProcessorManager:
24
- """
25
- Ensures that only one KrispAudioProcessor instance exists for the entire program.
30
+ """Singleton manager for KrispAudioProcessor instances.
31
+
32
+ Ensures that only one KrispAudioProcessor instance exists for the entire
33
+ program.
26
34
  """
27
35
 
28
36
  _krisp_instance = None
29
37
 
30
38
  @classmethod
31
39
  def get_processor(cls, sample_rate: int, sample_type: str, channels: int, model_path: str):
40
+ """Get or create a KrispAudioProcessor instance.
41
+
42
+ Args:
43
+ sample_rate: Audio sample rate in Hz.
44
+ sample_type: Audio sample type (e.g., "PCM_16").
45
+ channels: Number of audio channels.
46
+ model_path: Path to the Krisp model file.
47
+
48
+ Returns:
49
+ Shared KrispAudioProcessor instance.
50
+ """
32
51
  if cls._krisp_instance is None:
33
52
  cls._krisp_instance = KrispAudioProcessor(
34
53
  sample_rate, sample_type, channels, model_path
@@ -37,14 +56,26 @@ class KrispProcessorManager:
37
56
 
38
57
 
39
58
  class KrispFilter(BaseAudioFilter):
59
+ """Audio filter using Krisp noise reduction technology.
60
+
61
+ Provides real-time noise reduction for audio streams using Krisp's
62
+ proprietary noise suppression algorithms. Requires a Krisp model file
63
+ for operation.
64
+ """
65
+
40
66
  def __init__(
41
67
  self, sample_type: str = "PCM_16", channels: int = 1, model_path: str = None
42
68
  ) -> None:
43
- """Initializes the KrispAudioProcessor with customizable audio processing settings.
69
+ """Initialize the Krisp noise reduction filter.
44
70
 
45
- :param sample_type: The type of audio sample, default is 'PCM_16'.
46
- :param channels: Number of audio channels, default is 1.
47
- :param model_path: Path to the Krisp model; defaults to environment variable KRISP_MODEL_PATH if not provided.
71
+ Args:
72
+ sample_type: The audio sample format. Defaults to "PCM_16".
73
+ channels: Number of audio channels. Defaults to 1.
74
+ model_path: Path to the Krisp model file. If None, uses KRISP_MODEL_PATH
75
+ environment variable.
76
+
77
+ Raises:
78
+ ValueError: If model_path is not provided and KRISP_MODEL_PATH is not set.
48
79
  """
49
80
  super().__init__()
50
81
 
@@ -63,19 +94,41 @@ class KrispFilter(BaseAudioFilter):
63
94
  self._krisp_processor = None
64
95
 
65
96
  async def start(self, sample_rate: int):
97
+ """Initialize the Krisp processor with the transport's sample rate.
98
+
99
+ Args:
100
+ sample_rate: The sample rate of the input transport in Hz.
101
+ """
66
102
  self._sample_rate = sample_rate
67
103
  self._krisp_processor = KrispProcessorManager.get_processor(
68
104
  self._sample_rate, self._sample_type, self._channels, self._model_path
69
105
  )
70
106
 
71
107
  async def stop(self):
108
+ """Clean up the Krisp processor when stopping."""
72
109
  self._krisp_processor = None
73
110
 
74
111
  async def process_frame(self, frame: FilterControlFrame):
112
+ """Process control frames to enable/disable filtering.
113
+
114
+ Args:
115
+ frame: The control frame containing filter commands.
116
+ """
75
117
  if isinstance(frame, FilterEnableFrame):
76
118
  self._filtering = frame.enable
77
119
 
78
120
  async def filter(self, audio: bytes) -> bytes:
121
+ """Apply Krisp noise reduction to audio data.
122
+
123
+ Converts audio to float32, applies Krisp noise reduction processing,
124
+ and returns the filtered audio clipped to int16 range.
125
+
126
+ Args:
127
+ audio: Raw audio data as bytes to be filtered.
128
+
129
+ Returns:
130
+ Noise-reduced audio data as bytes.
131
+ """
79
132
  if not self._filtering:
80
133
  return audio
81
134
 
@@ -4,6 +4,13 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Noisereduce audio filter for Pipecat.
8
+
9
+ This module provides an audio filter implementation using the noisereduce
10
+ library to reduce background noise in audio streams through spectral
11
+ gating algorithms.
12
+ """
13
+
7
14
  import numpy as np
8
15
  from loguru import logger
9
16
 
@@ -21,21 +28,51 @@ except ModuleNotFoundError as e:
21
28
 
22
29
 
23
30
  class NoisereduceFilter(BaseAudioFilter):
31
+ """Audio filter using the noisereduce library for noise suppression.
32
+
33
+ Applies spectral gating noise reduction algorithms to suppress background
34
+ noise in audio streams. Uses the noisereduce library's default noise
35
+ reduction parameters.
36
+ """
37
+
24
38
  def __init__(self) -> None:
39
+ """Initialize the noisereduce filter."""
25
40
  self._filtering = True
26
41
  self._sample_rate = 0
27
42
 
28
43
  async def start(self, sample_rate: int):
44
+ """Initialize the filter with the transport's sample rate.
45
+
46
+ Args:
47
+ sample_rate: The sample rate of the input transport in Hz.
48
+ """
29
49
  self._sample_rate = sample_rate
30
50
 
31
51
  async def stop(self):
52
+ """Clean up the filter when stopping."""
32
53
  pass
33
54
 
34
55
  async def process_frame(self, frame: FilterControlFrame):
56
+ """Process control frames to enable/disable filtering.
57
+
58
+ Args:
59
+ frame: The control frame containing filter commands.
60
+ """
35
61
  if isinstance(frame, FilterEnableFrame):
36
62
  self._filtering = frame.enable
37
63
 
38
64
  async def filter(self, audio: bytes) -> bytes:
65
+ """Apply noise reduction to audio data using spectral gating.
66
+
67
+ Converts audio to float32, applies noisereduce processing, and returns
68
+ the filtered audio clipped to int16 range.
69
+
70
+ Args:
71
+ audio: Raw audio data as bytes to be filtered.
72
+
73
+ Returns:
74
+ Noise-reduced audio data as bytes.
75
+ """
39
76
  if not self._filtering:
40
77
  return audio
41
78
 
@@ -4,31 +4,51 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Base interruption strategy for determining when users can interrupt bot speech."""
8
+
7
9
  from abc import ABC, abstractmethod
8
10
 
9
11
 
10
12
  class BaseInterruptionStrategy(ABC):
11
- """This is a base class for interruption strategies. Interruption strategies
13
+ """Base class for interruption strategies.
14
+
15
+ This is a base class for interruption strategies. Interruption strategies
12
16
  decide when the user can interrupt the bot while the bot is speaking. For
13
17
  example, there could be strategies based on audio volume or strategies based
14
18
  on the number of words the user spoke.
15
-
16
19
  """
17
20
 
18
21
  async def append_audio(self, audio: bytes, sample_rate: int):
19
- """Appends audio to the strategy. Not all strategies handle audio."""
22
+ """Append audio data to the strategy for analysis.
23
+
24
+ Not all strategies handle audio. Default implementation does nothing.
25
+
26
+ Args:
27
+ audio: Raw audio bytes to append.
28
+ sample_rate: Sample rate of the audio data in Hz.
29
+ """
20
30
  pass
21
31
 
22
32
  async def append_text(self, text: str):
23
- """Appends text to the strategy. Not all strategies handle text."""
33
+ """Append text data to the strategy for analysis.
34
+
35
+ Not all strategies handle text. Default implementation does nothing.
36
+
37
+ Args:
38
+ text: Text string to append for analysis.
39
+ """
24
40
  pass
25
41
 
26
42
  @abstractmethod
27
43
  async def should_interrupt(self) -> bool:
28
- """This is called when the user stops speaking and it's time to decide
44
+ """Determine if the user should interrupt the bot.
45
+
46
+ This is called when the user stops speaking and it's time to decide
29
47
  whether the user should interrupt the bot. The decision will be based on
30
48
  the aggregated audio and/or text.
31
49
 
50
+ Returns:
51
+ True if the user should interrupt the bot, False otherwise.
32
52
  """
33
53
  pass
34
54
 
@@ -4,31 +4,47 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Minimum words interruption strategy for word count-based interruptions."""
8
+
7
9
  from loguru import logger
8
10
 
9
11
  from pipecat.audio.interruptions.base_interruption_strategy import BaseInterruptionStrategy
10
12
 
11
13
 
12
14
  class MinWordsInterruptionStrategy(BaseInterruptionStrategy):
13
- """This is an interruption strategy based on a minimum number of words said
15
+ """Interruption strategy based on minimum number of words spoken.
16
+
17
+ This is an interruption strategy based on a minimum number of words said
14
18
  by the user. That is, the strategy will be true if the user has said at
15
19
  least that amount of words.
16
-
17
20
  """
18
21
 
19
22
  def __init__(self, *, min_words: int):
23
+ """Initialize the minimum words interruption strategy.
24
+
25
+ Args:
26
+ min_words: Minimum number of words required to trigger an interruption.
27
+ """
20
28
  super().__init__()
21
29
  self._min_words = min_words
22
30
  self._text = ""
23
31
 
24
32
  async def append_text(self, text: str):
25
- """Appends text for later analysis. Not all strategies need to handle
26
- text.
33
+ """Append text for word count analysis.
34
+
35
+ Args:
36
+ text: Text string to append to the accumulated text.
27
37
 
38
+ Note: Not all strategies need to handle text.
28
39
  """
29
40
  self._text += text
30
41
 
31
42
  async def should_interrupt(self) -> bool:
43
+ """Check if the minimum word count has been reached.
44
+
45
+ Returns:
46
+ True if the user has spoken at least the minimum number of words.
47
+ """
32
48
  word_count = len(self._text.split())
33
49
  interrupt = word_count >= self._min_words
34
50
  logger.debug(
@@ -37,4 +53,5 @@ class MinWordsInterruptionStrategy(BaseInterruptionStrategy):
37
53
  return interrupt
38
54
 
39
55
  async def reset(self):
56
+ """Reset the accumulated text for the next analysis cycle."""
40
57
  self._text = ""
@@ -4,50 +4,73 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Base audio mixer for output transport integration.
8
+
9
+ Provides the abstract base class for audio mixers that can be integrated with
10
+ output transports to mix incoming audio with generated audio from the mixer.
11
+ """
12
+
7
13
  from abc import ABC, abstractmethod
8
14
 
9
15
  from pipecat.frames.frames import MixerControlFrame
10
16
 
11
17
 
12
18
  class BaseAudioMixer(ABC):
13
- """This is a base class for output transport audio mixers. If an audio mixer
19
+ """Base class for output transport audio mixers.
20
+
21
+ This is a base class for output transport audio mixers. If an audio mixer
14
22
  is provided to the output transport it will be used to mix the audio frames
15
23
  coming into to the transport with the audio generated from the mixer. There
16
24
  are control frames to update mixer settings or to enable or disable the
17
25
  mixer at runtime.
18
-
19
26
  """
20
27
 
21
28
  @abstractmethod
22
29
  async def start(self, sample_rate: int):
23
- """This will be called from the output transport when the transport is
30
+ """Initialize the mixer when the output transport starts.
31
+
32
+ This will be called from the output transport when the transport is
24
33
  started. It can be used to initialize the mixer. The output transport
25
34
  sample rate is provided so the mixer can adjust to that sample rate.
26
35
 
36
+ Args:
37
+ sample_rate: The sample rate of the output transport in Hz.
27
38
  """
28
39
  pass
29
40
 
30
41
  @abstractmethod
31
42
  async def stop(self):
32
- """This will be called from the output transport when the transport is
33
- stopping.
43
+ """Clean up the mixer when the output transport stops.
34
44
 
45
+ This will be called from the output transport when the transport is
46
+ stopping.
35
47
  """
36
48
  pass
37
49
 
38
50
  @abstractmethod
39
51
  async def process_frame(self, frame: MixerControlFrame):
40
- """This will be called when the output transport receives a
52
+ """Process mixer control frames from the transport.
53
+
54
+ This will be called when the output transport receives a
41
55
  MixerControlFrame.
42
56
 
57
+ Args:
58
+ frame: The mixer control frame to process.
43
59
  """
44
60
  pass
45
61
 
46
62
  @abstractmethod
47
63
  async def mix(self, audio: bytes) -> bytes:
48
- """This is called with the audio that is about to be sent from the
64
+ """Mix transport audio with mixer-generated audio.
65
+
66
+ This is called with the audio that is about to be sent from the
49
67
  output transport and that should be mixed with the mixer audio if the
50
68
  mixer is enabled.
51
69
 
70
+ Args:
71
+ audio: Raw audio bytes from the transport to mix.
72
+
73
+ Returns:
74
+ Mixed audio bytes combining transport and mixer audio.
52
75
  """
53
76
  pass