dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show
  1. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
  2. dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
  3. pipecat/__init__.py +17 -0
  4. pipecat/adapters/base_llm_adapter.py +36 -1
  5. pipecat/adapters/schemas/direct_function.py +296 -0
  6. pipecat/adapters/schemas/function_schema.py +15 -6
  7. pipecat/adapters/schemas/tools_schema.py +55 -7
  8. pipecat/adapters/services/anthropic_adapter.py +22 -3
  9. pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
  10. pipecat/adapters/services/bedrock_adapter.py +22 -3
  11. pipecat/adapters/services/gemini_adapter.py +16 -3
  12. pipecat/adapters/services/open_ai_adapter.py +17 -2
  13. pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
  14. pipecat/audio/filters/base_audio_filter.py +30 -6
  15. pipecat/audio/filters/koala_filter.py +37 -2
  16. pipecat/audio/filters/krisp_filter.py +59 -6
  17. pipecat/audio/filters/noisereduce_filter.py +37 -0
  18. pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
  19. pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
  20. pipecat/audio/mixers/base_audio_mixer.py +30 -7
  21. pipecat/audio/mixers/soundfile_mixer.py +53 -6
  22. pipecat/audio/resamplers/base_audio_resampler.py +17 -9
  23. pipecat/audio/resamplers/resampy_resampler.py +26 -1
  24. pipecat/audio/resamplers/soxr_resampler.py +32 -1
  25. pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
  26. pipecat/audio/utils.py +194 -1
  27. pipecat/audio/vad/silero.py +60 -3
  28. pipecat/audio/vad/vad_analyzer.py +114 -30
  29. pipecat/clocks/base_clock.py +19 -0
  30. pipecat/clocks/system_clock.py +25 -0
  31. pipecat/extensions/voicemail/__init__.py +0 -0
  32. pipecat/extensions/voicemail/voicemail_detector.py +707 -0
  33. pipecat/frames/frames.py +590 -156
  34. pipecat/metrics/metrics.py +64 -1
  35. pipecat/observers/base_observer.py +58 -19
  36. pipecat/observers/loggers/debug_log_observer.py +56 -64
  37. pipecat/observers/loggers/llm_log_observer.py +8 -1
  38. pipecat/observers/loggers/transcription_log_observer.py +19 -7
  39. pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
  40. pipecat/observers/turn_tracking_observer.py +26 -1
  41. pipecat/pipeline/base_pipeline.py +5 -7
  42. pipecat/pipeline/base_task.py +52 -9
  43. pipecat/pipeline/parallel_pipeline.py +121 -177
  44. pipecat/pipeline/pipeline.py +129 -20
  45. pipecat/pipeline/runner.py +50 -1
  46. pipecat/pipeline/sync_parallel_pipeline.py +132 -32
  47. pipecat/pipeline/task.py +263 -280
  48. pipecat/pipeline/task_observer.py +85 -34
  49. pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
  50. pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
  51. pipecat/processors/aggregators/gated.py +25 -24
  52. pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
  53. pipecat/processors/aggregators/llm_response.py +398 -89
  54. pipecat/processors/aggregators/openai_llm_context.py +161 -13
  55. pipecat/processors/aggregators/sentence.py +25 -14
  56. pipecat/processors/aggregators/user_response.py +28 -3
  57. pipecat/processors/aggregators/vision_image_frame.py +24 -14
  58. pipecat/processors/async_generator.py +28 -0
  59. pipecat/processors/audio/audio_buffer_processor.py +78 -37
  60. pipecat/processors/consumer_processor.py +25 -6
  61. pipecat/processors/filters/frame_filter.py +23 -0
  62. pipecat/processors/filters/function_filter.py +30 -0
  63. pipecat/processors/filters/identity_filter.py +17 -2
  64. pipecat/processors/filters/null_filter.py +24 -1
  65. pipecat/processors/filters/stt_mute_filter.py +56 -21
  66. pipecat/processors/filters/wake_check_filter.py +46 -3
  67. pipecat/processors/filters/wake_notifier_filter.py +21 -3
  68. pipecat/processors/frame_processor.py +488 -131
  69. pipecat/processors/frameworks/langchain.py +38 -3
  70. pipecat/processors/frameworks/rtvi.py +719 -34
  71. pipecat/processors/gstreamer/pipeline_source.py +41 -0
  72. pipecat/processors/idle_frame_processor.py +26 -3
  73. pipecat/processors/logger.py +23 -0
  74. pipecat/processors/metrics/frame_processor_metrics.py +77 -4
  75. pipecat/processors/metrics/sentry.py +42 -4
  76. pipecat/processors/producer_processor.py +34 -14
  77. pipecat/processors/text_transformer.py +22 -10
  78. pipecat/processors/transcript_processor.py +48 -29
  79. pipecat/processors/user_idle_processor.py +31 -21
  80. pipecat/runner/__init__.py +1 -0
  81. pipecat/runner/daily.py +132 -0
  82. pipecat/runner/livekit.py +148 -0
  83. pipecat/runner/run.py +543 -0
  84. pipecat/runner/types.py +67 -0
  85. pipecat/runner/utils.py +515 -0
  86. pipecat/serializers/base_serializer.py +42 -0
  87. pipecat/serializers/exotel.py +17 -6
  88. pipecat/serializers/genesys.py +95 -0
  89. pipecat/serializers/livekit.py +33 -0
  90. pipecat/serializers/plivo.py +16 -15
  91. pipecat/serializers/protobuf.py +37 -1
  92. pipecat/serializers/telnyx.py +18 -17
  93. pipecat/serializers/twilio.py +32 -16
  94. pipecat/services/ai_service.py +5 -3
  95. pipecat/services/anthropic/llm.py +113 -43
  96. pipecat/services/assemblyai/models.py +63 -5
  97. pipecat/services/assemblyai/stt.py +64 -11
  98. pipecat/services/asyncai/__init__.py +0 -0
  99. pipecat/services/asyncai/tts.py +501 -0
  100. pipecat/services/aws/llm.py +185 -111
  101. pipecat/services/aws/stt.py +217 -23
  102. pipecat/services/aws/tts.py +118 -52
  103. pipecat/services/aws/utils.py +101 -5
  104. pipecat/services/aws_nova_sonic/aws.py +82 -64
  105. pipecat/services/aws_nova_sonic/context.py +15 -6
  106. pipecat/services/azure/common.py +10 -2
  107. pipecat/services/azure/image.py +32 -0
  108. pipecat/services/azure/llm.py +9 -7
  109. pipecat/services/azure/stt.py +65 -2
  110. pipecat/services/azure/tts.py +154 -23
  111. pipecat/services/cartesia/stt.py +125 -8
  112. pipecat/services/cartesia/tts.py +102 -38
  113. pipecat/services/cerebras/llm.py +15 -23
  114. pipecat/services/deepgram/stt.py +19 -11
  115. pipecat/services/deepgram/tts.py +36 -0
  116. pipecat/services/deepseek/llm.py +14 -23
  117. pipecat/services/elevenlabs/tts.py +330 -64
  118. pipecat/services/fal/image.py +43 -0
  119. pipecat/services/fal/stt.py +48 -10
  120. pipecat/services/fireworks/llm.py +14 -21
  121. pipecat/services/fish/tts.py +109 -9
  122. pipecat/services/gemini_multimodal_live/__init__.py +1 -0
  123. pipecat/services/gemini_multimodal_live/events.py +83 -2
  124. pipecat/services/gemini_multimodal_live/file_api.py +189 -0
  125. pipecat/services/gemini_multimodal_live/gemini.py +218 -21
  126. pipecat/services/gladia/config.py +17 -10
  127. pipecat/services/gladia/stt.py +82 -36
  128. pipecat/services/google/frames.py +40 -0
  129. pipecat/services/google/google.py +2 -0
  130. pipecat/services/google/image.py +39 -2
  131. pipecat/services/google/llm.py +176 -58
  132. pipecat/services/google/llm_openai.py +26 -4
  133. pipecat/services/google/llm_vertex.py +37 -15
  134. pipecat/services/google/rtvi.py +41 -0
  135. pipecat/services/google/stt.py +65 -17
  136. pipecat/services/google/test-google-chirp.py +45 -0
  137. pipecat/services/google/tts.py +390 -19
  138. pipecat/services/grok/llm.py +8 -6
  139. pipecat/services/groq/llm.py +8 -6
  140. pipecat/services/groq/stt.py +13 -9
  141. pipecat/services/groq/tts.py +40 -0
  142. pipecat/services/hamsa/__init__.py +9 -0
  143. pipecat/services/hamsa/stt.py +241 -0
  144. pipecat/services/heygen/__init__.py +5 -0
  145. pipecat/services/heygen/api.py +281 -0
  146. pipecat/services/heygen/client.py +620 -0
  147. pipecat/services/heygen/video.py +338 -0
  148. pipecat/services/image_service.py +5 -3
  149. pipecat/services/inworld/__init__.py +1 -0
  150. pipecat/services/inworld/tts.py +592 -0
  151. pipecat/services/llm_service.py +127 -45
  152. pipecat/services/lmnt/tts.py +80 -7
  153. pipecat/services/mcp_service.py +85 -44
  154. pipecat/services/mem0/memory.py +42 -13
  155. pipecat/services/minimax/tts.py +74 -15
  156. pipecat/services/mistral/__init__.py +0 -0
  157. pipecat/services/mistral/llm.py +185 -0
  158. pipecat/services/moondream/vision.py +55 -10
  159. pipecat/services/neuphonic/tts.py +275 -48
  160. pipecat/services/nim/llm.py +8 -6
  161. pipecat/services/ollama/llm.py +27 -7
  162. pipecat/services/openai/base_llm.py +54 -16
  163. pipecat/services/openai/image.py +30 -0
  164. pipecat/services/openai/llm.py +7 -5
  165. pipecat/services/openai/stt.py +13 -9
  166. pipecat/services/openai/tts.py +42 -10
  167. pipecat/services/openai_realtime_beta/azure.py +11 -9
  168. pipecat/services/openai_realtime_beta/context.py +7 -5
  169. pipecat/services/openai_realtime_beta/events.py +10 -7
  170. pipecat/services/openai_realtime_beta/openai.py +37 -18
  171. pipecat/services/openpipe/llm.py +30 -24
  172. pipecat/services/openrouter/llm.py +9 -7
  173. pipecat/services/perplexity/llm.py +15 -19
  174. pipecat/services/piper/tts.py +26 -12
  175. pipecat/services/playht/tts.py +227 -65
  176. pipecat/services/qwen/llm.py +8 -6
  177. pipecat/services/rime/tts.py +128 -17
  178. pipecat/services/riva/stt.py +160 -22
  179. pipecat/services/riva/tts.py +67 -2
  180. pipecat/services/sambanova/llm.py +19 -17
  181. pipecat/services/sambanova/stt.py +14 -8
  182. pipecat/services/sarvam/tts.py +60 -13
  183. pipecat/services/simli/video.py +82 -21
  184. pipecat/services/soniox/__init__.py +0 -0
  185. pipecat/services/soniox/stt.py +398 -0
  186. pipecat/services/speechmatics/stt.py +29 -17
  187. pipecat/services/stt_service.py +47 -11
  188. pipecat/services/tavus/video.py +94 -25
  189. pipecat/services/together/llm.py +8 -6
  190. pipecat/services/tts_service.py +77 -53
  191. pipecat/services/ultravox/stt.py +46 -43
  192. pipecat/services/vision_service.py +5 -3
  193. pipecat/services/websocket_service.py +12 -11
  194. pipecat/services/whisper/base_stt.py +58 -12
  195. pipecat/services/whisper/stt.py +69 -58
  196. pipecat/services/xtts/tts.py +59 -2
  197. pipecat/sync/base_notifier.py +19 -0
  198. pipecat/sync/event_notifier.py +24 -0
  199. pipecat/tests/utils.py +73 -5
  200. pipecat/transcriptions/language.py +24 -0
  201. pipecat/transports/base_input.py +112 -8
  202. pipecat/transports/base_output.py +235 -13
  203. pipecat/transports/base_transport.py +119 -0
  204. pipecat/transports/local/audio.py +76 -0
  205. pipecat/transports/local/tk.py +84 -0
  206. pipecat/transports/network/fastapi_websocket.py +174 -15
  207. pipecat/transports/network/small_webrtc.py +383 -39
  208. pipecat/transports/network/webrtc_connection.py +214 -8
  209. pipecat/transports/network/websocket_client.py +171 -1
  210. pipecat/transports/network/websocket_server.py +147 -9
  211. pipecat/transports/services/daily.py +792 -70
  212. pipecat/transports/services/helpers/daily_rest.py +122 -129
  213. pipecat/transports/services/livekit.py +339 -4
  214. pipecat/transports/services/tavus.py +273 -38
  215. pipecat/utils/asyncio/task_manager.py +92 -186
  216. pipecat/utils/base_object.py +83 -1
  217. pipecat/utils/network.py +2 -0
  218. pipecat/utils/string.py +114 -58
  219. pipecat/utils/text/base_text_aggregator.py +44 -13
  220. pipecat/utils/text/base_text_filter.py +46 -0
  221. pipecat/utils/text/markdown_text_filter.py +70 -14
  222. pipecat/utils/text/pattern_pair_aggregator.py +18 -14
  223. pipecat/utils/text/simple_text_aggregator.py +43 -2
  224. pipecat/utils/text/skip_tags_aggregator.py +21 -13
  225. pipecat/utils/time.py +36 -0
  226. pipecat/utils/tracing/class_decorators.py +32 -7
  227. pipecat/utils/tracing/conversation_context_provider.py +12 -2
  228. pipecat/utils/tracing/service_attributes.py +80 -64
  229. pipecat/utils/tracing/service_decorators.py +48 -21
  230. pipecat/utils/tracing/setup.py +13 -7
  231. pipecat/utils/tracing/turn_context_provider.py +12 -2
  232. pipecat/utils/tracing/turn_trace_observer.py +27 -0
  233. pipecat/utils/utils.py +14 -14
  234. dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
  235. pipecat/examples/daily_runner.py +0 -64
  236. pipecat/examples/run.py +0 -265
  237. pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
  238. pipecat/utils/asyncio/watchdog_event.py +0 -42
  239. pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
  240. pipecat/utils/asyncio/watchdog_queue.py +0 -48
  241. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
  242. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
  243. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
  244. /pipecat/{examples → extensions}/__init__.py +0 -0
@@ -4,6 +4,13 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Silero Voice Activity Detection (VAD) implementation for Pipecat.
8
+
9
+ This module provides a VAD analyzer based on the Silero VAD ONNX model,
10
+ which can detect voice activity in audio streams with high accuracy.
11
+ Supports 8kHz and 16kHz sample rates.
12
+ """
13
+
7
14
  import time
8
15
  from typing import Optional
9
16
 
@@ -25,11 +32,20 @@ except ModuleNotFoundError as e:
25
32
 
26
33
 
27
34
  class SileroOnnxModel:
28
- def __init__(self, path, force_onnx_cpu=True):
29
- import numpy as np
35
+ """ONNX runtime wrapper for the Silero VAD model.
30
36
 
31
- global np
37
+ Provides voice activity detection using the pre-trained Silero VAD model
38
+ with ONNX runtime for efficient inference. Handles model state management
39
+ and input validation for audio processing.
40
+ """
32
41
 
42
+ def __init__(self, path, force_onnx_cpu=True):
43
+ """Initialize the Silero ONNX model.
44
+
45
+ Args:
46
+ path: Path to the ONNX model file.
47
+ force_onnx_cpu: Whether to force CPU execution provider.
48
+ """
33
49
  opts = onnxruntime.SessionOptions()
34
50
  opts.inter_op_num_threads = 1
35
51
  opts.intra_op_num_threads = 1
@@ -45,6 +61,7 @@ class SileroOnnxModel:
45
61
  self.sample_rates = [8000, 16000]
46
62
 
47
63
  def _validate_input(self, x, sr: int):
64
+ """Validate and preprocess input audio data."""
48
65
  if np.ndim(x) == 1:
49
66
  x = np.expand_dims(x, 0)
50
67
  if np.ndim(x) > 2:
@@ -60,12 +77,18 @@ class SileroOnnxModel:
60
77
  return x, sr
61
78
 
62
79
  def reset_states(self, batch_size=1):
80
+ """Reset the internal model states.
81
+
82
+ Args:
83
+ batch_size: Batch size for state initialization. Defaults to 1.
84
+ """
63
85
  self._state = np.zeros((2, batch_size, 128), dtype="float32")
64
86
  self._context = np.zeros((batch_size, 0), dtype="float32")
65
87
  self._last_sr = 0
66
88
  self._last_batch_size = 0
67
89
 
68
90
  def __call__(self, x, sr: int):
91
+ """Process audio input through the VAD model."""
69
92
  x, sr = self._validate_input(x, sr)
70
93
  num_samples = 512 if sr == 16000 else 256
71
94
 
@@ -105,7 +128,20 @@ class SileroOnnxModel:
105
128
 
106
129
 
107
130
  class SileroVADAnalyzer(VADAnalyzer):
131
+ """Voice Activity Detection analyzer using the Silero VAD model.
132
+
133
+ Implements VAD analysis using the pre-trained Silero ONNX model for
134
+ accurate voice activity detection. Supports 8kHz and 16kHz sample rates
135
+ with automatic model state management and periodic resets.
136
+ """
137
+
108
138
  def __init__(self, *, sample_rate: Optional[int] = None, params: Optional[VADParams] = None):
139
+ """Initialize the Silero VAD analyzer.
140
+
141
+ Args:
142
+ sample_rate: Audio sample rate (8000 or 16000 Hz). If None, will be set later.
143
+ params: VAD parameters for detection thresholds and timing.
144
+ """
109
145
  super().__init__(sample_rate=sample_rate, params=params)
110
146
 
111
147
  logger.debug("Loading Silero VAD model...")
@@ -137,6 +173,14 @@ class SileroVADAnalyzer(VADAnalyzer):
137
173
  #
138
174
 
139
175
  def set_sample_rate(self, sample_rate: int):
176
+ """Set the sample rate for audio processing.
177
+
178
+ Args:
179
+ sample_rate: Audio sample rate (must be 8000 or 16000 Hz).
180
+
181
+ Raises:
182
+ ValueError: If sample rate is not 8000 or 16000 Hz.
183
+ """
140
184
  if sample_rate != 16000 and sample_rate != 8000:
141
185
  raise ValueError(
142
186
  f"Silero VAD sample rate needs to be 16000 or 8000 (sample rate: {sample_rate})"
@@ -145,9 +189,22 @@ class SileroVADAnalyzer(VADAnalyzer):
145
189
  super().set_sample_rate(sample_rate)
146
190
 
147
191
  def num_frames_required(self) -> int:
192
+ """Get the number of audio frames required for VAD analysis.
193
+
194
+ Returns:
195
+ Number of frames required (512 for 16kHz, 256 for 8kHz).
196
+ """
148
197
  return 512 if self.sample_rate == 16000 else 256
149
198
 
150
199
  def voice_confidence(self, buffer) -> float:
200
+ """Calculate voice activity confidence for the given audio buffer.
201
+
202
+ Args:
203
+ buffer: Audio buffer to analyze.
204
+
205
+ Returns:
206
+ Voice confidence score between 0.0 and 1.0.
207
+ """
151
208
  try:
152
209
  audio_int16 = np.frombuffer(buffer, np.int16)
153
210
  # Divide by 32768 because we have signed 16-bit data.
@@ -4,6 +4,13 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Voice Activity Detection (VAD) analyzer base classes and utilities.
8
+
9
+ This module provides the abstract base class for VAD analyzers and associated
10
+ data structures for voice activity detection in audio streams. Includes state
11
+ management, parameter configuration, and audio analysis framework.
12
+ """
13
+
7
14
  from abc import ABC, abstractmethod
8
15
  from enum import Enum
9
16
  from typing import Optional
@@ -21,6 +28,15 @@ Logger = type(logger)
21
28
 
22
29
 
23
30
  class VADState(Enum):
31
+ """Voice Activity Detection states.
32
+
33
+ Parameters:
34
+ QUIET: No voice activity detected.
35
+ STARTING: Voice activity beginning, transitioning from quiet.
36
+ SPEAKING: Active voice detected and confirmed.
37
+ STOPPING: Voice activity ending, transitioning to quiet.
38
+ """
39
+
24
40
  QUIET = 1
25
41
  STARTING = 2
26
42
  SPEAKING = 3
@@ -28,6 +44,15 @@ class VADState(Enum):
28
44
 
29
45
 
30
46
  class VADParams(BaseModel):
47
+ """Configuration parameters for Voice Activity Detection.
48
+
49
+ Parameters:
50
+ confidence: Minimum confidence threshold for voice detection.
51
+ start_secs: Duration to wait before confirming voice start.
52
+ stop_secs: Duration to wait before confirming voice stop.
53
+ min_volume: Minimum audio volume threshold for voice detection.
54
+ """
55
+
31
56
  confidence: float = VAD_CONFIDENCE
32
57
  start_secs: float = VAD_START_SECS
33
58
  stop_secs: float = VAD_STOP_SECS
@@ -35,7 +60,20 @@ class VADParams(BaseModel):
35
60
 
36
61
 
37
62
  class VADAnalyzer(ABC):
63
+ """Abstract base class for Voice Activity Detection analyzers.
64
+
65
+ Provides the framework for implementing VAD analysis with configurable
66
+ parameters, state management, and audio processing capabilities.
67
+ Subclasses must implement the core voice confidence calculation.
68
+ """
69
+
38
70
  def __init__(self, *, sample_rate: Optional[int] = None, params: Optional[VADParams] = None):
71
+ """Initialize the VAD analyzer.
72
+
73
+ Args:
74
+ sample_rate: Audio sample rate in Hz. If None, will be set later.
75
+ params: VAD parameters for detection configuration.
76
+ """
39
77
  self._init_sample_rate = sample_rate
40
78
  self._sample_rate = 0
41
79
  self._params = params or VADParams()
@@ -49,25 +87,58 @@ class VADAnalyzer(ABC):
49
87
 
50
88
  @property
51
89
  def sample_rate(self) -> int:
90
+ """Get the current sample rate.
91
+
92
+ Returns:
93
+ Current audio sample rate in Hz.
94
+ """
52
95
  return self._sample_rate
53
96
 
54
97
  @property
55
98
  def num_channels(self) -> int:
99
+ """Get the number of audio channels.
100
+
101
+ Returns:
102
+ Number of audio channels (always 1 for mono).
103
+ """
56
104
  return self._num_channels
57
105
 
58
106
  @property
59
107
  def params(self) -> VADParams:
108
+ """Get the current VAD parameters.
109
+
110
+ Returns:
111
+ Current VAD configuration parameters.
112
+ """
60
113
  return self._params
61
114
 
62
115
  @abstractmethod
63
116
  def num_frames_required(self) -> int:
117
+ """Get the number of audio frames required for analysis.
118
+
119
+ Returns:
120
+ Number of frames needed for VAD processing.
121
+ """
64
122
  pass
65
123
 
66
124
  @abstractmethod
67
125
  def voice_confidence(self, buffer) -> float:
126
+ """Calculate voice activity confidence for the given audio buffer.
127
+
128
+ Args:
129
+ buffer: Audio buffer to analyze.
130
+
131
+ Returns:
132
+ Voice confidence score between 0.0 and 1.0.
133
+ """
68
134
  pass
69
135
 
70
136
  def set_sample_rate(self, sample_rate: int):
137
+ """Set the sample rate for audio processing.
138
+
139
+ Args:
140
+ sample_rate: Audio sample rate in Hz.
141
+ """
71
142
  self._sample_rate = self._init_sample_rate or sample_rate
72
143
  self.set_params(self._params)
73
144
 
@@ -86,46 +157,59 @@ class VADAnalyzer(ABC):
86
157
  self._vad_state: VADState = VADState.QUIET
87
158
 
88
159
  def _get_smoothed_volume(self, audio: bytes) -> float:
160
+ """Calculate smoothed audio volume using exponential smoothing."""
89
161
  volume = calculate_audio_volume(audio, self.sample_rate)
90
162
  return exp_smoothing(volume, self._prev_volume, self._smoothing_factor)
91
163
 
92
164
  def analyze_audio(self, buffer) -> VADState:
165
+ """Analyze audio buffer and return current VAD state.
166
+
167
+ Processes incoming audio data, maintains internal state, and determines
168
+ voice activity status based on confidence and volume thresholds.
169
+
170
+ Args:
171
+ buffer: Audio buffer to analyze.
172
+
173
+ Returns:
174
+ Current VAD state after processing the buffer.
175
+ """
93
176
  self._vad_buffer += buffer
94
177
 
95
178
  num_required_bytes = self._vad_frames_num_bytes
96
179
  if len(self._vad_buffer) < num_required_bytes:
97
180
  return self._vad_state
98
181
 
99
- audio_frames = self._vad_buffer[:num_required_bytes]
100
- self._vad_buffer = self._vad_buffer[num_required_bytes:]
101
-
102
- confidence = self.voice_confidence(audio_frames)
103
-
104
- volume = self._get_smoothed_volume(audio_frames)
105
- self._prev_volume = volume
106
-
107
- speaking = confidence >= self._params.confidence and volume >= self._params.min_volume
108
-
109
- if speaking:
110
- match self._vad_state:
111
- case VADState.QUIET:
112
- self._vad_state = VADState.STARTING
113
- self._vad_starting_count = 1
114
- case VADState.STARTING:
115
- self._vad_starting_count += 1
116
- case VADState.STOPPING:
117
- self._vad_state = VADState.SPEAKING
118
- self._vad_stopping_count = 0
119
- else:
120
- match self._vad_state:
121
- case VADState.STARTING:
122
- self._vad_state = VADState.QUIET
123
- self._vad_starting_count = 0
124
- case VADState.SPEAKING:
125
- self._vad_state = VADState.STOPPING
126
- self._vad_stopping_count = 1
127
- case VADState.STOPPING:
128
- self._vad_stopping_count += 1
182
+ while len(self._vad_buffer) >= num_required_bytes:
183
+ audio_frames = self._vad_buffer[:num_required_bytes]
184
+ self._vad_buffer = self._vad_buffer[num_required_bytes:]
185
+
186
+ confidence = self.voice_confidence(audio_frames)
187
+
188
+ volume = self._get_smoothed_volume(audio_frames)
189
+ self._prev_volume = volume
190
+
191
+ speaking = confidence >= self._params.confidence and volume >= self._params.min_volume
192
+
193
+ if speaking:
194
+ match self._vad_state:
195
+ case VADState.QUIET:
196
+ self._vad_state = VADState.STARTING
197
+ self._vad_starting_count = 1
198
+ case VADState.STARTING:
199
+ self._vad_starting_count += 1
200
+ case VADState.STOPPING:
201
+ self._vad_state = VADState.SPEAKING
202
+ self._vad_stopping_count = 0
203
+ else:
204
+ match self._vad_state:
205
+ case VADState.STARTING:
206
+ self._vad_state = VADState.QUIET
207
+ self._vad_starting_count = 0
208
+ case VADState.SPEAKING:
209
+ self._vad_state = VADState.STOPPING
210
+ self._vad_stopping_count = 1
211
+ case VADState.STOPPING:
212
+ self._vad_stopping_count += 1
129
213
 
130
214
  if (
131
215
  self._vad_state == VADState.STARTING
@@ -4,14 +4,33 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Base clock interface for Pipecat timing operations."""
8
+
7
9
  from abc import ABC, abstractmethod
8
10
 
9
11
 
10
12
  class BaseClock(ABC):
13
+ """Abstract base class for clock implementations.
14
+
15
+ Provides a common interface for timing operations used in Pipecat
16
+ for synchronization, scheduling, and time-based processing.
17
+ """
18
+
11
19
  @abstractmethod
12
20
  def get_time(self) -> int:
21
+ """Get the current time value.
22
+
23
+ Returns:
24
+ The current time as an integer value. The specific unit and
25
+ reference point depend on the concrete implementation.
26
+ """
13
27
  pass
14
28
 
15
29
  @abstractmethod
16
30
  def start(self):
31
+ """Start or initialize the clock.
32
+
33
+ Performs any necessary initialization or starts the timing mechanism.
34
+ This method should be called before using get_time().
35
+ """
17
36
  pass
@@ -4,17 +4,42 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """System clock implementation for Pipecat."""
8
+
7
9
  import time
8
10
 
9
11
  from pipecat.clocks.base_clock import BaseClock
10
12
 
11
13
 
12
14
  class SystemClock(BaseClock):
15
+ """A monotonic clock implementation using system time.
16
+
17
+ Provides high-precision timing using the system's monotonic clock,
18
+ which is not affected by system clock adjustments and is suitable
19
+ for measuring elapsed time in real-time applications.
20
+ """
21
+
13
22
  def __init__(self):
23
+ """Initialize the system clock.
24
+
25
+ The clock starts in an uninitialized state and must be started
26
+ explicitly using the start() method before time measurement begins.
27
+ """
14
28
  self._time = 0
15
29
 
16
30
  def get_time(self) -> int:
31
+ """Get the elapsed time since the clock was started.
32
+
33
+ Returns:
34
+ The elapsed time in nanoseconds since start() was called.
35
+ Returns 0 if the clock has not been started yet.
36
+ """
17
37
  return time.monotonic_ns() - self._time if self._time > 0 else 0
18
38
 
19
39
  def start(self):
40
+ """Start the clock and begin time measurement.
41
+
42
+ Records the current monotonic time as the reference point
43
+ for all subsequent get_time() calls.
44
+ """
20
45
  self._time = time.monotonic_ns()
File without changes