dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show
  1. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
  2. dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
  3. pipecat/__init__.py +17 -0
  4. pipecat/adapters/base_llm_adapter.py +36 -1
  5. pipecat/adapters/schemas/direct_function.py +296 -0
  6. pipecat/adapters/schemas/function_schema.py +15 -6
  7. pipecat/adapters/schemas/tools_schema.py +55 -7
  8. pipecat/adapters/services/anthropic_adapter.py +22 -3
  9. pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
  10. pipecat/adapters/services/bedrock_adapter.py +22 -3
  11. pipecat/adapters/services/gemini_adapter.py +16 -3
  12. pipecat/adapters/services/open_ai_adapter.py +17 -2
  13. pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
  14. pipecat/audio/filters/base_audio_filter.py +30 -6
  15. pipecat/audio/filters/koala_filter.py +37 -2
  16. pipecat/audio/filters/krisp_filter.py +59 -6
  17. pipecat/audio/filters/noisereduce_filter.py +37 -0
  18. pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
  19. pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
  20. pipecat/audio/mixers/base_audio_mixer.py +30 -7
  21. pipecat/audio/mixers/soundfile_mixer.py +53 -6
  22. pipecat/audio/resamplers/base_audio_resampler.py +17 -9
  23. pipecat/audio/resamplers/resampy_resampler.py +26 -1
  24. pipecat/audio/resamplers/soxr_resampler.py +32 -1
  25. pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
  26. pipecat/audio/utils.py +194 -1
  27. pipecat/audio/vad/silero.py +60 -3
  28. pipecat/audio/vad/vad_analyzer.py +114 -30
  29. pipecat/clocks/base_clock.py +19 -0
  30. pipecat/clocks/system_clock.py +25 -0
  31. pipecat/extensions/voicemail/__init__.py +0 -0
  32. pipecat/extensions/voicemail/voicemail_detector.py +707 -0
  33. pipecat/frames/frames.py +590 -156
  34. pipecat/metrics/metrics.py +64 -1
  35. pipecat/observers/base_observer.py +58 -19
  36. pipecat/observers/loggers/debug_log_observer.py +56 -64
  37. pipecat/observers/loggers/llm_log_observer.py +8 -1
  38. pipecat/observers/loggers/transcription_log_observer.py +19 -7
  39. pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
  40. pipecat/observers/turn_tracking_observer.py +26 -1
  41. pipecat/pipeline/base_pipeline.py +5 -7
  42. pipecat/pipeline/base_task.py +52 -9
  43. pipecat/pipeline/parallel_pipeline.py +121 -177
  44. pipecat/pipeline/pipeline.py +129 -20
  45. pipecat/pipeline/runner.py +50 -1
  46. pipecat/pipeline/sync_parallel_pipeline.py +132 -32
  47. pipecat/pipeline/task.py +263 -280
  48. pipecat/pipeline/task_observer.py +85 -34
  49. pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
  50. pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
  51. pipecat/processors/aggregators/gated.py +25 -24
  52. pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
  53. pipecat/processors/aggregators/llm_response.py +398 -89
  54. pipecat/processors/aggregators/openai_llm_context.py +161 -13
  55. pipecat/processors/aggregators/sentence.py +25 -14
  56. pipecat/processors/aggregators/user_response.py +28 -3
  57. pipecat/processors/aggregators/vision_image_frame.py +24 -14
  58. pipecat/processors/async_generator.py +28 -0
  59. pipecat/processors/audio/audio_buffer_processor.py +78 -37
  60. pipecat/processors/consumer_processor.py +25 -6
  61. pipecat/processors/filters/frame_filter.py +23 -0
  62. pipecat/processors/filters/function_filter.py +30 -0
  63. pipecat/processors/filters/identity_filter.py +17 -2
  64. pipecat/processors/filters/null_filter.py +24 -1
  65. pipecat/processors/filters/stt_mute_filter.py +56 -21
  66. pipecat/processors/filters/wake_check_filter.py +46 -3
  67. pipecat/processors/filters/wake_notifier_filter.py +21 -3
  68. pipecat/processors/frame_processor.py +488 -131
  69. pipecat/processors/frameworks/langchain.py +38 -3
  70. pipecat/processors/frameworks/rtvi.py +719 -34
  71. pipecat/processors/gstreamer/pipeline_source.py +41 -0
  72. pipecat/processors/idle_frame_processor.py +26 -3
  73. pipecat/processors/logger.py +23 -0
  74. pipecat/processors/metrics/frame_processor_metrics.py +77 -4
  75. pipecat/processors/metrics/sentry.py +42 -4
  76. pipecat/processors/producer_processor.py +34 -14
  77. pipecat/processors/text_transformer.py +22 -10
  78. pipecat/processors/transcript_processor.py +48 -29
  79. pipecat/processors/user_idle_processor.py +31 -21
  80. pipecat/runner/__init__.py +1 -0
  81. pipecat/runner/daily.py +132 -0
  82. pipecat/runner/livekit.py +148 -0
  83. pipecat/runner/run.py +543 -0
  84. pipecat/runner/types.py +67 -0
  85. pipecat/runner/utils.py +515 -0
  86. pipecat/serializers/base_serializer.py +42 -0
  87. pipecat/serializers/exotel.py +17 -6
  88. pipecat/serializers/genesys.py +95 -0
  89. pipecat/serializers/livekit.py +33 -0
  90. pipecat/serializers/plivo.py +16 -15
  91. pipecat/serializers/protobuf.py +37 -1
  92. pipecat/serializers/telnyx.py +18 -17
  93. pipecat/serializers/twilio.py +32 -16
  94. pipecat/services/ai_service.py +5 -3
  95. pipecat/services/anthropic/llm.py +113 -43
  96. pipecat/services/assemblyai/models.py +63 -5
  97. pipecat/services/assemblyai/stt.py +64 -11
  98. pipecat/services/asyncai/__init__.py +0 -0
  99. pipecat/services/asyncai/tts.py +501 -0
  100. pipecat/services/aws/llm.py +185 -111
  101. pipecat/services/aws/stt.py +217 -23
  102. pipecat/services/aws/tts.py +118 -52
  103. pipecat/services/aws/utils.py +101 -5
  104. pipecat/services/aws_nova_sonic/aws.py +82 -64
  105. pipecat/services/aws_nova_sonic/context.py +15 -6
  106. pipecat/services/azure/common.py +10 -2
  107. pipecat/services/azure/image.py +32 -0
  108. pipecat/services/azure/llm.py +9 -7
  109. pipecat/services/azure/stt.py +65 -2
  110. pipecat/services/azure/tts.py +154 -23
  111. pipecat/services/cartesia/stt.py +125 -8
  112. pipecat/services/cartesia/tts.py +102 -38
  113. pipecat/services/cerebras/llm.py +15 -23
  114. pipecat/services/deepgram/stt.py +19 -11
  115. pipecat/services/deepgram/tts.py +36 -0
  116. pipecat/services/deepseek/llm.py +14 -23
  117. pipecat/services/elevenlabs/tts.py +330 -64
  118. pipecat/services/fal/image.py +43 -0
  119. pipecat/services/fal/stt.py +48 -10
  120. pipecat/services/fireworks/llm.py +14 -21
  121. pipecat/services/fish/tts.py +109 -9
  122. pipecat/services/gemini_multimodal_live/__init__.py +1 -0
  123. pipecat/services/gemini_multimodal_live/events.py +83 -2
  124. pipecat/services/gemini_multimodal_live/file_api.py +189 -0
  125. pipecat/services/gemini_multimodal_live/gemini.py +218 -21
  126. pipecat/services/gladia/config.py +17 -10
  127. pipecat/services/gladia/stt.py +82 -36
  128. pipecat/services/google/frames.py +40 -0
  129. pipecat/services/google/google.py +2 -0
  130. pipecat/services/google/image.py +39 -2
  131. pipecat/services/google/llm.py +176 -58
  132. pipecat/services/google/llm_openai.py +26 -4
  133. pipecat/services/google/llm_vertex.py +37 -15
  134. pipecat/services/google/rtvi.py +41 -0
  135. pipecat/services/google/stt.py +65 -17
  136. pipecat/services/google/test-google-chirp.py +45 -0
  137. pipecat/services/google/tts.py +390 -19
  138. pipecat/services/grok/llm.py +8 -6
  139. pipecat/services/groq/llm.py +8 -6
  140. pipecat/services/groq/stt.py +13 -9
  141. pipecat/services/groq/tts.py +40 -0
  142. pipecat/services/hamsa/__init__.py +9 -0
  143. pipecat/services/hamsa/stt.py +241 -0
  144. pipecat/services/heygen/__init__.py +5 -0
  145. pipecat/services/heygen/api.py +281 -0
  146. pipecat/services/heygen/client.py +620 -0
  147. pipecat/services/heygen/video.py +338 -0
  148. pipecat/services/image_service.py +5 -3
  149. pipecat/services/inworld/__init__.py +1 -0
  150. pipecat/services/inworld/tts.py +592 -0
  151. pipecat/services/llm_service.py +127 -45
  152. pipecat/services/lmnt/tts.py +80 -7
  153. pipecat/services/mcp_service.py +85 -44
  154. pipecat/services/mem0/memory.py +42 -13
  155. pipecat/services/minimax/tts.py +74 -15
  156. pipecat/services/mistral/__init__.py +0 -0
  157. pipecat/services/mistral/llm.py +185 -0
  158. pipecat/services/moondream/vision.py +55 -10
  159. pipecat/services/neuphonic/tts.py +275 -48
  160. pipecat/services/nim/llm.py +8 -6
  161. pipecat/services/ollama/llm.py +27 -7
  162. pipecat/services/openai/base_llm.py +54 -16
  163. pipecat/services/openai/image.py +30 -0
  164. pipecat/services/openai/llm.py +7 -5
  165. pipecat/services/openai/stt.py +13 -9
  166. pipecat/services/openai/tts.py +42 -10
  167. pipecat/services/openai_realtime_beta/azure.py +11 -9
  168. pipecat/services/openai_realtime_beta/context.py +7 -5
  169. pipecat/services/openai_realtime_beta/events.py +10 -7
  170. pipecat/services/openai_realtime_beta/openai.py +37 -18
  171. pipecat/services/openpipe/llm.py +30 -24
  172. pipecat/services/openrouter/llm.py +9 -7
  173. pipecat/services/perplexity/llm.py +15 -19
  174. pipecat/services/piper/tts.py +26 -12
  175. pipecat/services/playht/tts.py +227 -65
  176. pipecat/services/qwen/llm.py +8 -6
  177. pipecat/services/rime/tts.py +128 -17
  178. pipecat/services/riva/stt.py +160 -22
  179. pipecat/services/riva/tts.py +67 -2
  180. pipecat/services/sambanova/llm.py +19 -17
  181. pipecat/services/sambanova/stt.py +14 -8
  182. pipecat/services/sarvam/tts.py +60 -13
  183. pipecat/services/simli/video.py +82 -21
  184. pipecat/services/soniox/__init__.py +0 -0
  185. pipecat/services/soniox/stt.py +398 -0
  186. pipecat/services/speechmatics/stt.py +29 -17
  187. pipecat/services/stt_service.py +47 -11
  188. pipecat/services/tavus/video.py +94 -25
  189. pipecat/services/together/llm.py +8 -6
  190. pipecat/services/tts_service.py +77 -53
  191. pipecat/services/ultravox/stt.py +46 -43
  192. pipecat/services/vision_service.py +5 -3
  193. pipecat/services/websocket_service.py +12 -11
  194. pipecat/services/whisper/base_stt.py +58 -12
  195. pipecat/services/whisper/stt.py +69 -58
  196. pipecat/services/xtts/tts.py +59 -2
  197. pipecat/sync/base_notifier.py +19 -0
  198. pipecat/sync/event_notifier.py +24 -0
  199. pipecat/tests/utils.py +73 -5
  200. pipecat/transcriptions/language.py +24 -0
  201. pipecat/transports/base_input.py +112 -8
  202. pipecat/transports/base_output.py +235 -13
  203. pipecat/transports/base_transport.py +119 -0
  204. pipecat/transports/local/audio.py +76 -0
  205. pipecat/transports/local/tk.py +84 -0
  206. pipecat/transports/network/fastapi_websocket.py +174 -15
  207. pipecat/transports/network/small_webrtc.py +383 -39
  208. pipecat/transports/network/webrtc_connection.py +214 -8
  209. pipecat/transports/network/websocket_client.py +171 -1
  210. pipecat/transports/network/websocket_server.py +147 -9
  211. pipecat/transports/services/daily.py +792 -70
  212. pipecat/transports/services/helpers/daily_rest.py +122 -129
  213. pipecat/transports/services/livekit.py +339 -4
  214. pipecat/transports/services/tavus.py +273 -38
  215. pipecat/utils/asyncio/task_manager.py +92 -186
  216. pipecat/utils/base_object.py +83 -1
  217. pipecat/utils/network.py +2 -0
  218. pipecat/utils/string.py +114 -58
  219. pipecat/utils/text/base_text_aggregator.py +44 -13
  220. pipecat/utils/text/base_text_filter.py +46 -0
  221. pipecat/utils/text/markdown_text_filter.py +70 -14
  222. pipecat/utils/text/pattern_pair_aggregator.py +18 -14
  223. pipecat/utils/text/simple_text_aggregator.py +43 -2
  224. pipecat/utils/text/skip_tags_aggregator.py +21 -13
  225. pipecat/utils/time.py +36 -0
  226. pipecat/utils/tracing/class_decorators.py +32 -7
  227. pipecat/utils/tracing/conversation_context_provider.py +12 -2
  228. pipecat/utils/tracing/service_attributes.py +80 -64
  229. pipecat/utils/tracing/service_decorators.py +48 -21
  230. pipecat/utils/tracing/setup.py +13 -7
  231. pipecat/utils/tracing/turn_context_provider.py +12 -2
  232. pipecat/utils/tracing/turn_trace_observer.py +27 -0
  233. pipecat/utils/utils.py +14 -14
  234. dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
  235. pipecat/examples/daily_runner.py +0 -64
  236. pipecat/examples/run.py +0 -265
  237. pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
  238. pipecat/utils/asyncio/watchdog_event.py +0 -42
  239. pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
  240. pipecat/utils/asyncio/watchdog_queue.py +0 -48
  241. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
  242. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
  243. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
  244. /pipecat/{examples → extensions}/__init__.py +0 -0
@@ -4,12 +4,18 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """XTTS text-to-speech service implementation.
8
+
9
+ This module provides integration with Coqui XTTS streaming server for
10
+ text-to-speech synthesis using local Docker deployment.
11
+ """
12
+
7
13
  from typing import Any, AsyncGenerator, Dict, Optional
8
14
 
9
15
  import aiohttp
10
16
  from loguru import logger
11
17
 
12
- from pipecat.audio.utils import create_default_resampler
18
+ from pipecat.audio.utils import create_stream_resampler
13
19
  from pipecat.frames.frames import (
14
20
  ErrorFrame,
15
21
  Frame,
@@ -31,6 +37,14 @@ from pipecat.utils.tracing.service_decorators import traced_tts
31
37
 
32
38
 
33
39
  def language_to_xtts_language(language: Language) -> Optional[str]:
40
+ """Convert a Language enum to XTTS language code.
41
+
42
+ Args:
43
+ language: The Language enum value to convert.
44
+
45
+ Returns:
46
+ The corresponding XTTS language code, or None if not supported.
47
+ """
34
48
  BASE_LANGUAGES = {
35
49
  Language.CS: "cs",
36
50
  Language.DE: "de",
@@ -70,6 +84,13 @@ def language_to_xtts_language(language: Language) -> Optional[str]:
70
84
 
71
85
 
72
86
  class XTTSService(TTSService):
87
+ """Coqui XTTS text-to-speech service.
88
+
89
+ Provides text-to-speech synthesis using a locally running Coqui XTTS
90
+ streaming server. Supports multiple languages and voice cloning through
91
+ studio speakers configuration.
92
+ """
93
+
73
94
  def __init__(
74
95
  self,
75
96
  *,
@@ -80,6 +101,16 @@ class XTTSService(TTSService):
80
101
  sample_rate: Optional[int] = None,
81
102
  **kwargs,
82
103
  ):
104
+ """Initialize the XTTS service.
105
+
106
+ Args:
107
+ voice_id: ID of the voice/speaker to use for synthesis.
108
+ base_url: Base URL of the XTTS streaming server.
109
+ aiohttp_session: HTTP session for making requests to the server.
110
+ language: Language for synthesis. Defaults to English.
111
+ sample_rate: Audio sample rate. If None, uses default.
112
+ **kwargs: Additional arguments passed to parent TTSService.
113
+ """
83
114
  super().__init__(sample_rate=sample_rate, **kwargs)
84
115
 
85
116
  self._settings = {
@@ -90,15 +121,33 @@ class XTTSService(TTSService):
90
121
  self._studio_speakers: Optional[Dict[str, Any]] = None
91
122
  self._aiohttp_session = aiohttp_session
92
123
 
93
- self._resampler = create_default_resampler()
124
+ self._resampler = create_stream_resampler()
94
125
 
95
126
  def can_generate_metrics(self) -> bool:
127
+ """Check if this service can generate processing metrics.
128
+
129
+ Returns:
130
+ True, as XTTS service supports metrics generation.
131
+ """
96
132
  return True
97
133
 
98
134
  def language_to_service_language(self, language: Language) -> Optional[str]:
135
+ """Convert a Language enum to XTTS service language format.
136
+
137
+ Args:
138
+ language: The language to convert.
139
+
140
+ Returns:
141
+ The XTTS-specific language code, or None if not supported.
142
+ """
99
143
  return language_to_xtts_language(language)
100
144
 
101
145
  async def start(self, frame: StartFrame):
146
+ """Start the XTTS service and load studio speakers.
147
+
148
+ Args:
149
+ frame: The start frame containing initialization parameters.
150
+ """
102
151
  await super().start(frame)
103
152
 
104
153
  if self._studio_speakers:
@@ -120,6 +169,14 @@ class XTTSService(TTSService):
120
169
 
121
170
  @traced_tts
122
171
  async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
172
+ """Generate speech from text using XTTS streaming server.
173
+
174
+ Args:
175
+ text: The text to synthesize into speech.
176
+
177
+ Yields:
178
+ Frame: Audio frames containing the synthesized speech.
179
+ """
123
180
  logger.debug(f"{self}: Generating TTS [{text}]")
124
181
 
125
182
  if not self._studio_speakers:
@@ -4,14 +4,33 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Base notifier interface for Pipecat."""
8
+
7
9
  from abc import ABC, abstractmethod
8
10
 
9
11
 
10
12
  class BaseNotifier(ABC):
13
+ """Abstract base class for notification mechanisms.
14
+
15
+ Provides a standard interface for implementing notification and waiting
16
+ patterns used for event coordination and signaling between components
17
+ in the Pipecat framework.
18
+ """
19
+
11
20
  @abstractmethod
12
21
  async def notify(self):
22
+ """Send a notification signal.
23
+
24
+ Implementations should trigger any waiting coroutines or processes
25
+ that are blocked on this notifier.
26
+ """
13
27
  pass
14
28
 
15
29
  @abstractmethod
16
30
  async def wait(self):
31
+ """Wait for a notification signal.
32
+
33
+ Implementations should block until a notification is received
34
+ from the corresponding notify() call.
35
+ """
17
36
  pass
@@ -4,18 +4,42 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Event-based notifier implementation using asyncio Event primitives."""
8
+
7
9
  import asyncio
8
10
 
9
11
  from pipecat.sync.base_notifier import BaseNotifier
10
12
 
11
13
 
12
14
  class EventNotifier(BaseNotifier):
15
+ """Event-based notifier using asyncio.Event for task synchronization.
16
+
17
+ Provides a simple notification mechanism where one task can signal
18
+ an event and other tasks can wait for that event to occur. The event
19
+ is automatically cleared after each wait operation.
20
+ """
21
+
13
22
  def __init__(self):
23
+ """Initialize the event notifier.
24
+
25
+ Creates an internal asyncio.Event for managing notifications.
26
+ """
14
27
  self._event = asyncio.Event()
15
28
 
16
29
  async def notify(self):
30
+ """Signal the event to notify waiting tasks.
31
+
32
+ Sets the internal event, causing any tasks waiting on this
33
+ notifier to be awakened.
34
+ """
17
35
  self._event.set()
18
36
 
19
37
  async def wait(self):
38
+ """Wait for the event to be signaled.
39
+
40
+ Blocks until another task calls notify(). Automatically clears
41
+ the event after being awakened so subsequent calls will wait
42
+ for the next notification.
43
+ """
20
44
  await self._event.wait()
21
45
  self._event.clear()
pipecat/tests/utils.py CHANGED
@@ -4,6 +4,8 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Testing utilities for Pipecat pipeline components."""
8
+
7
9
  import asyncio
8
10
  from dataclasses import dataclass
9
11
  from typing import Any, Awaitable, Callable, Dict, List, Optional, Sequence, Tuple
@@ -24,15 +26,27 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
24
26
 
25
27
  @dataclass
26
28
  class SleepFrame(SystemFrame):
27
- """This frame is used by test framework to introduce some sleep time before
28
- the next frame is pushed. This is useful to control system frames vs data or
29
- control frames.
29
+ """A system frame that introduces a sleep delay in the test pipeline.
30
+
31
+ This frame is used by the test framework to control timing between
32
+ frame processing, allowing tests to separate system frames from
33
+ data or control frames.
34
+
35
+ Parameters:
36
+ sleep: Duration to sleep in seconds before processing the next frame.
30
37
  """
31
38
 
32
- sleep: float = 0.1
39
+ sleep: float = 0.2
33
40
 
34
41
 
35
42
  class HeartbeatsObserver(BaseObserver):
43
+ """Observer that monitors heartbeat frames from a specific processor.
44
+
45
+ This observer watches for HeartbeatFrames from a target processor and
46
+ invokes a callback when they are detected, useful for testing timing
47
+ and lifecycle events.
48
+ """
49
+
36
50
  def __init__(
37
51
  self,
38
52
  *,
@@ -40,11 +54,23 @@ class HeartbeatsObserver(BaseObserver):
40
54
  heartbeat_callback: Callable[[FrameProcessor, HeartbeatFrame], Awaitable[None]],
41
55
  **kwargs,
42
56
  ):
57
+ """Initialize the heartbeats observer.
58
+
59
+ Args:
60
+ target: The frame processor to monitor for heartbeat frames.
61
+ heartbeat_callback: Async callback function to invoke when heartbeats are detected.
62
+ **kwargs: Additional arguments passed to the parent observer.
63
+ """
43
64
  super().__init__(**kwargs)
44
65
  self._target = target
45
66
  self._callback = heartbeat_callback
46
67
 
47
68
  async def on_push_frame(self, data: FramePushed):
69
+ """Handle frame push events and detect heartbeats from target processor.
70
+
71
+ Args:
72
+ data: The frame push event data containing source and frame information.
73
+ """
48
74
  src = data.source
49
75
  frame = data.frame
50
76
 
@@ -53,6 +79,13 @@ class HeartbeatsObserver(BaseObserver):
53
79
 
54
80
 
55
81
  class QueuedFrameProcessor(FrameProcessor):
82
+ """A processor that captures frames in a queue for testing purposes.
83
+
84
+ This processor intercepts frames flowing in a specific direction and
85
+ stores them in a queue for later inspection during testing, while
86
+ still allowing the frames to continue through the pipeline.
87
+ """
88
+
56
89
  def __init__(
57
90
  self,
58
91
  *,
@@ -60,12 +93,25 @@ class QueuedFrameProcessor(FrameProcessor):
60
93
  queue_direction: FrameDirection,
61
94
  ignore_start: bool = True,
62
95
  ):
63
- super().__init__()
96
+ """Initialize the queued frame processor.
97
+
98
+ Args:
99
+ queue: The asyncio queue to store captured frames.
100
+ queue_direction: The direction of frames to capture (UPSTREAM or DOWNSTREAM).
101
+ ignore_start: Whether to ignore StartFrames when capturing.
102
+ """
103
+ super().__init__(enable_direct_mode=True)
64
104
  self._queue = queue
65
105
  self._queue_direction = queue_direction
66
106
  self._ignore_start = ignore_start
67
107
 
68
108
  async def process_frame(self, frame: Frame, direction: FrameDirection):
109
+ """Process frames and capture them in the queue if they match the direction.
110
+
111
+ Args:
112
+ frame: The frame to process.
113
+ direction: The direction the frame is flowing.
114
+ """
69
115
  await super().process_frame(frame, direction)
70
116
 
71
117
  if direction == self._queue_direction:
@@ -85,6 +131,28 @@ async def run_test(
85
131
  start_metadata: Optional[Dict[str, Any]] = None,
86
132
  send_end_frame: bool = True,
87
133
  ) -> Tuple[Sequence[Frame], Sequence[Frame]]:
134
+ """Run a test pipeline with the specified processor and validate frame flow.
135
+
136
+ This function creates a test pipeline with the given processor, sends the
137
+ specified frames through it, and validates that the expected frames are
138
+ received in both upstream and downstream directions.
139
+
140
+ Args:
141
+ processor: The frame processor to test.
142
+ frames_to_send: Sequence of frames to send through the processor.
143
+ expected_down_frames: Expected frame types flowing downstream (optional).
144
+ expected_up_frames: Expected frame types flowing upstream (optional).
145
+ ignore_start: Whether to ignore StartFrames in frame validation.
146
+ observers: Optional list of observers to attach to the pipeline.
147
+ start_metadata: Optional metadata to include with the StartFrame.
148
+ send_end_frame: Whether to send an EndFrame at the end of the test.
149
+
150
+ Returns:
151
+ Tuple containing (downstream_frames, upstream_frames) that were received.
152
+
153
+ Raises:
154
+ AssertionError: If the received frames don't match the expected frame types.
155
+ """
88
156
  observers = observers or []
89
157
  start_metadata = start_metadata or {}
90
158
 
@@ -4,13 +4,23 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Language code enumerations for Pipecat.
8
+
9
+ This module provides comprehensive language code constants following ISO 639
10
+ and BCP 47 standards, supporting both language-only and language-region
11
+ combinations for various speech and text processing services.
12
+ """
13
+
7
14
  import sys
8
15
  from enum import Enum
9
16
 
10
17
  if sys.version_info < (3, 11):
11
18
 
12
19
  class StrEnum(str, Enum):
20
+ """String enumeration base class for Python < 3.11 compatibility."""
21
+
13
22
  def __new__(cls, value):
23
+ """Create a new instance of the StrEnum."""
14
24
  obj = str.__new__(cls, value)
15
25
  obj._value_ = value
16
26
  return obj
@@ -19,6 +29,14 @@ else:
19
29
 
20
30
 
21
31
  class Language(StrEnum):
32
+ """Language codes for speech and text processing services.
33
+
34
+ Provides comprehensive language code constants following ISO 639 and BCP 47
35
+ standards. Includes both language-only codes (e.g., 'en') and language-region
36
+ combinations (e.g., 'en-US') to support various speech synthesis, recognition,
37
+ and translation services.
38
+ """
39
+
22
40
  # Afrikaans
23
41
  AF = "af"
24
42
  AF_ZA = "af-ZA"
@@ -127,6 +145,9 @@ class Language(StrEnum):
127
145
  EN_US = "en-US"
128
146
  EN_ZA = "en-ZA"
129
147
 
148
+ # Esperanto
149
+ EO = "eo"
150
+
130
151
  # Spanish
131
152
  ES = "es"
132
153
  ES_AR = "es-AR"
@@ -456,6 +477,9 @@ class Language(StrEnum):
456
477
  # Tatar
457
478
  TT = "tt"
458
479
 
480
+ # Uyghur
481
+ UG = "ug"
482
+
459
483
  # Ukrainian
460
484
  UK = "uk"
461
485
  UK_UA = "uk-UA"
@@ -4,6 +4,12 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Base input transport implementation for Pipecat.
8
+
9
+ This module provides the BaseInputTransport class which handles audio and video
10
+ input processing, including VAD, turn analysis, and interruption management.
11
+ """
12
+
7
13
  import asyncio
8
14
  from concurrent.futures import ThreadPoolExecutor
9
15
  from typing import Optional
@@ -28,6 +34,7 @@ from pipecat.frames.frames import (
28
34
  InputAudioRawFrame,
29
35
  InputImageRawFrame,
30
36
  MetricsFrame,
37
+ SpeechControlParamsFrame,
31
38
  StartFrame,
32
39
  StartInterruptionFrame,
33
40
  StopFrame,
@@ -47,7 +54,20 @@ AUDIO_INPUT_TIMEOUT_SECS = 0.5
47
54
 
48
55
 
49
56
  class BaseInputTransport(FrameProcessor):
57
+ """Base class for input transport implementations.
58
+
59
+ Handles audio and video input processing including Voice Activity Detection,
60
+ turn analysis, audio filtering, and user interaction management. Supports
61
+ interruption handling and provides hooks for transport-specific implementations.
62
+ """
63
+
50
64
  def __init__(self, params: TransportParams, **kwargs):
65
+ """Initialize the base input transport.
66
+
67
+ Args:
68
+ params: Transport configuration parameters.
69
+ **kwargs: Additional arguments passed to parent class.
70
+ """
51
71
  super().__init__(**kwargs)
52
72
 
53
73
  self._params = params
@@ -115,25 +135,54 @@ class BaseInputTransport(FrameProcessor):
115
135
  self._params.video_out_color_format = self._params.camera_out_color_format
116
136
 
117
137
  def enable_audio_in_stream_on_start(self, enabled: bool) -> None:
138
+ """Enable or disable audio streaming on transport start.
139
+
140
+ Args:
141
+ enabled: Whether to start audio streaming immediately on transport start.
142
+ """
118
143
  self.logger.debug(f"Enabling audio on start. {enabled}")
119
144
  self._params.audio_in_stream_on_start = enabled
120
145
 
121
146
  async def start_audio_in_streaming(self):
147
+ """Start audio input streaming.
148
+
149
+ Override in subclasses to implement transport-specific audio streaming.
150
+ """
122
151
  pass
123
152
 
124
153
  @property
125
154
  def sample_rate(self) -> int:
155
+ """Get the current audio sample rate.
156
+
157
+ Returns:
158
+ The sample rate in Hz.
159
+ """
126
160
  return self._sample_rate
127
161
 
128
162
  @property
129
163
  def vad_analyzer(self) -> Optional[VADAnalyzer]:
164
+ """Get the Voice Activity Detection analyzer.
165
+
166
+ Returns:
167
+ The VAD analyzer instance if configured, None otherwise.
168
+ """
130
169
  return self._params.vad_analyzer
131
170
 
132
171
  @property
133
172
  def turn_analyzer(self) -> Optional[BaseTurnAnalyzer]:
173
+ """Get the turn-taking analyzer.
174
+
175
+ Returns:
176
+ The turn analyzer instance if configured, None otherwise.
177
+ """
134
178
  return self._params.turn_analyzer
135
179
 
136
180
  async def start(self, frame: StartFrame):
181
+ """Start the input transport and initialize components.
182
+
183
+ Args:
184
+ frame: The start frame containing initialization parameters.
185
+ """
137
186
  self._paused = False
138
187
  self._user_speaking = False
139
188
 
@@ -147,11 +196,23 @@ class BaseInputTransport(FrameProcessor):
147
196
  if self._params.turn_analyzer:
148
197
  self._params.turn_analyzer.set_sample_rate(self._sample_rate)
149
198
 
199
+ if self._params.vad_analyzer or self._params.turn_analyzer:
200
+ vad_params = self._params.vad_analyzer.params if self._params.vad_analyzer else None
201
+ turn_params = self._params.turn_analyzer.params if self._params.turn_analyzer else None
202
+
203
+ speech_frame = SpeechControlParamsFrame(vad_params=vad_params, turn_params=turn_params)
204
+ await self.push_frame(speech_frame)
205
+
150
206
  # Start audio filter.
151
207
  if self._params.audio_in_filter:
152
208
  await self._params.audio_in_filter.start(self._sample_rate)
153
209
 
154
210
  async def stop(self, frame: EndFrame):
211
+ """Stop the input transport and cleanup resources.
212
+
213
+ Args:
214
+ frame: The end frame signaling transport shutdown.
215
+ """
155
216
  # Cancel and wait for the audio input task to finish.
156
217
  await self._cancel_audio_task()
157
218
  # Stop audio filter.
@@ -159,6 +220,11 @@ class BaseInputTransport(FrameProcessor):
159
220
  await self._params.audio_in_filter.stop()
160
221
 
161
222
  async def pause(self, frame: StopFrame):
223
+ """Pause the input transport temporarily.
224
+
225
+ Args:
226
+ frame: The stop frame signaling transport pause.
227
+ """
162
228
  self._paused = True
163
229
  # Cancel task so we clear the queue
164
230
  await self._cancel_audio_task()
@@ -166,19 +232,38 @@ class BaseInputTransport(FrameProcessor):
166
232
  self._create_audio_task()
167
233
 
168
234
  async def cancel(self, frame: CancelFrame):
235
+ """Cancel the input transport and stop all processing.
236
+
237
+ Args:
238
+ frame: The cancel frame signaling immediate cancellation.
239
+ """
169
240
  # Cancel and wait for the audio input task to finish.
170
241
  await self._cancel_audio_task()
171
242
 
172
243
  async def set_transport_ready(self, frame: StartFrame):
173
- """To be called when the transport is ready to stream."""
244
+ """Called when the transport is ready to stream.
245
+
246
+ Args:
247
+ frame: The start frame containing initialization parameters.
248
+ """
174
249
  # Create audio input queue and task if needed.
175
250
  self._create_audio_task()
176
251
 
177
252
  async def push_video_frame(self, frame: InputImageRawFrame):
253
+ """Push a video frame downstream if video input is enabled.
254
+
255
+ Args:
256
+ frame: The input video frame to process.
257
+ """
178
258
  if self._params.video_in_enabled and not self._paused:
179
259
  await self.push_frame(frame)
180
260
 
181
261
  async def push_audio_frame(self, frame: InputAudioRawFrame):
262
+ """Push an audio frame to the processing queue if audio input is enabled.
263
+
264
+ Args:
265
+ frame: The input audio frame to process.
266
+ """
182
267
  if self._params.audio_in_enabled and not self._paused:
183
268
  await self._audio_in_queue.put(frame)
184
269
 
@@ -187,6 +272,12 @@ class BaseInputTransport(FrameProcessor):
187
272
  #
188
273
 
189
274
  async def process_frame(self, frame: Frame, direction: FrameDirection):
275
+ """Process incoming frames and handle transport-specific logic.
276
+
277
+ Args:
278
+ frame: The frame to process.
279
+ direction: The direction of frame flow in the pipeline.
280
+ """
190
281
  await super().process_frame(frame, direction)
191
282
 
192
283
  # Specific system frames
@@ -216,6 +307,13 @@ class BaseInputTransport(FrameProcessor):
216
307
  elif isinstance(frame, VADParamsUpdateFrame):
217
308
  if self.vad_analyzer:
218
309
  self.vad_analyzer.set_params(frame.params, bot_logger=self.logger)
310
+ speech_frame = SpeechControlParamsFrame(
311
+ vad_params=frame.params,
312
+ turn_params=self._params.turn_analyzer.params
313
+ if self._params.turn_analyzer
314
+ else None,
315
+ )
316
+ await self.push_frame(speech_frame)
219
317
  elif isinstance(frame, SystemFrame):
220
318
  await self.push_frame(frame, direction)
221
319
  # Control frames
@@ -238,12 +336,14 @@ class BaseInputTransport(FrameProcessor):
238
336
  #
239
337
 
240
338
  async def _handle_bot_interruption(self, frame: BotInterruptionFrame):
339
+ """Handle bot interruption frames."""
241
340
  self.logger.debug("Bot interruption")
242
341
  if self.interruptions_allowed:
243
342
  await self._start_interruption()
244
343
  await self.push_frame(StartInterruptionFrame())
245
344
 
246
345
  async def _handle_user_interruption(self, frame: Frame):
346
+ """Handle user interruption events based on speaking state."""
247
347
  if isinstance(frame, UserStartedSpeakingFrame):
248
348
  self.logger.debug("User started speaking")
249
349
  self._user_speaking = True
@@ -281,9 +381,11 @@ class BaseInputTransport(FrameProcessor):
281
381
  #
282
382
 
283
383
  async def _handle_bot_started_speaking(self, frame: BotStartedSpeakingFrame):
384
+ """Update bot speaking state when bot starts speaking."""
284
385
  self._bot_speaking = True
285
386
 
286
387
  async def _handle_bot_stopped_speaking(self, frame: BotStoppedSpeakingFrame):
388
+ """Update bot speaking state when bot stops speaking."""
287
389
  self._bot_speaking = False
288
390
 
289
391
  #
@@ -291,16 +393,19 @@ class BaseInputTransport(FrameProcessor):
291
393
  #
292
394
 
293
395
  def _create_audio_task(self):
396
+ """Create the audio processing task if audio input is enabled."""
294
397
  if not self._audio_task and self._params.audio_in_enabled:
295
398
  self._audio_in_queue = asyncio.Queue()
296
399
  self._audio_task = self.create_task(self._audio_task_handler())
297
400
 
298
401
  async def _cancel_audio_task(self):
402
+ """Cancel and cleanup the audio processing task."""
299
403
  if self._audio_task:
300
404
  await self.cancel_task(self._audio_task)
301
405
  self._audio_task = None
302
406
 
303
407
  async def _vad_analyze(self, audio_frame: InputAudioRawFrame) -> VADState:
408
+ """Analyze audio frame for voice activity."""
304
409
  state = VADState.QUIET
305
410
  if self.vad_analyzer:
306
411
  state = await self.get_event_loop().run_in_executor(
@@ -309,6 +414,7 @@ class BaseInputTransport(FrameProcessor):
309
414
  return state
310
415
 
311
416
  async def _handle_vad(self, audio_frame: InputAudioRawFrame, vad_state: VADState):
417
+ """Handle Voice Activity Detection results and generate appropriate frames."""
312
418
  new_vad_state = await self._vad_analyze(audio_frame)
313
419
  if (
314
420
  new_vad_state != vad_state
@@ -339,18 +445,21 @@ class BaseInputTransport(FrameProcessor):
339
445
  return vad_state
340
446
 
341
447
  async def _handle_end_of_turn(self):
448
+ """Handle end-of-turn analysis and generate prediction results."""
342
449
  if self.turn_analyzer:
343
450
  state, prediction = await self.turn_analyzer.analyze_end_of_turn()
344
451
  await self._handle_prediction_result(prediction)
345
452
  await self._handle_end_of_turn_complete(state)
346
453
 
347
454
  async def _handle_end_of_turn_complete(self, state: EndOfTurnState):
455
+ """Handle completion of end-of-turn analysis."""
348
456
  if state == EndOfTurnState.COMPLETE:
349
457
  await self._handle_user_interruption(UserStoppedSpeakingFrame())
350
458
 
351
459
  async def _run_turn_analyzer(
352
460
  self, frame: InputAudioRawFrame, vad_state: VADState, previous_vad_state: VADState
353
461
  ):
462
+ """Run turn analysis on audio frame and handle results."""
354
463
  is_speech = vad_state == VADState.SPEAKING or vad_state == VADState.STARTING
355
464
  # If silence exceeds threshold, we are going to receive EndOfTurnState.COMPLETE
356
465
  end_of_turn_state = self._params.turn_analyzer.append_audio(frame.audio, is_speech)
@@ -361,6 +470,7 @@ class BaseInputTransport(FrameProcessor):
361
470
  await self._handle_end_of_turn()
362
471
 
363
472
  async def _audio_task_handler(self):
473
+ """Main audio processing task handler for VAD and turn analysis."""
364
474
  vad_state: VADState = VADState.QUIET
365
475
  while True:
366
476
  try:
@@ -395,13 +505,7 @@ class BaseInputTransport(FrameProcessor):
395
505
  if self._params.turn_analyzer:
396
506
  self._params.turn_analyzer.clear()
397
507
  await self._handle_user_interruption(UserStoppedSpeakingFrame())
398
- finally:
399
- self.reset_watchdog()
400
508
 
401
509
  async def _handle_prediction_result(self, result: MetricsData):
402
- """Handle a prediction result event from the turn analyzer.
403
-
404
- Args:
405
- result: The prediction result MetricsData.
406
- """
510
+ """Handle a prediction result event from the turn analyzer."""
407
511
  await self.push_frame(MetricsFrame(data=[result]))