dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show
  1. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
  2. dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
  3. pipecat/__init__.py +17 -0
  4. pipecat/adapters/base_llm_adapter.py +36 -1
  5. pipecat/adapters/schemas/direct_function.py +296 -0
  6. pipecat/adapters/schemas/function_schema.py +15 -6
  7. pipecat/adapters/schemas/tools_schema.py +55 -7
  8. pipecat/adapters/services/anthropic_adapter.py +22 -3
  9. pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
  10. pipecat/adapters/services/bedrock_adapter.py +22 -3
  11. pipecat/adapters/services/gemini_adapter.py +16 -3
  12. pipecat/adapters/services/open_ai_adapter.py +17 -2
  13. pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
  14. pipecat/audio/filters/base_audio_filter.py +30 -6
  15. pipecat/audio/filters/koala_filter.py +37 -2
  16. pipecat/audio/filters/krisp_filter.py +59 -6
  17. pipecat/audio/filters/noisereduce_filter.py +37 -0
  18. pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
  19. pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
  20. pipecat/audio/mixers/base_audio_mixer.py +30 -7
  21. pipecat/audio/mixers/soundfile_mixer.py +53 -6
  22. pipecat/audio/resamplers/base_audio_resampler.py +17 -9
  23. pipecat/audio/resamplers/resampy_resampler.py +26 -1
  24. pipecat/audio/resamplers/soxr_resampler.py +32 -1
  25. pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
  26. pipecat/audio/utils.py +194 -1
  27. pipecat/audio/vad/silero.py +60 -3
  28. pipecat/audio/vad/vad_analyzer.py +114 -30
  29. pipecat/clocks/base_clock.py +19 -0
  30. pipecat/clocks/system_clock.py +25 -0
  31. pipecat/extensions/voicemail/__init__.py +0 -0
  32. pipecat/extensions/voicemail/voicemail_detector.py +707 -0
  33. pipecat/frames/frames.py +590 -156
  34. pipecat/metrics/metrics.py +64 -1
  35. pipecat/observers/base_observer.py +58 -19
  36. pipecat/observers/loggers/debug_log_observer.py +56 -64
  37. pipecat/observers/loggers/llm_log_observer.py +8 -1
  38. pipecat/observers/loggers/transcription_log_observer.py +19 -7
  39. pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
  40. pipecat/observers/turn_tracking_observer.py +26 -1
  41. pipecat/pipeline/base_pipeline.py +5 -7
  42. pipecat/pipeline/base_task.py +52 -9
  43. pipecat/pipeline/parallel_pipeline.py +121 -177
  44. pipecat/pipeline/pipeline.py +129 -20
  45. pipecat/pipeline/runner.py +50 -1
  46. pipecat/pipeline/sync_parallel_pipeline.py +132 -32
  47. pipecat/pipeline/task.py +263 -280
  48. pipecat/pipeline/task_observer.py +85 -34
  49. pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
  50. pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
  51. pipecat/processors/aggregators/gated.py +25 -24
  52. pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
  53. pipecat/processors/aggregators/llm_response.py +398 -89
  54. pipecat/processors/aggregators/openai_llm_context.py +161 -13
  55. pipecat/processors/aggregators/sentence.py +25 -14
  56. pipecat/processors/aggregators/user_response.py +28 -3
  57. pipecat/processors/aggregators/vision_image_frame.py +24 -14
  58. pipecat/processors/async_generator.py +28 -0
  59. pipecat/processors/audio/audio_buffer_processor.py +78 -37
  60. pipecat/processors/consumer_processor.py +25 -6
  61. pipecat/processors/filters/frame_filter.py +23 -0
  62. pipecat/processors/filters/function_filter.py +30 -0
  63. pipecat/processors/filters/identity_filter.py +17 -2
  64. pipecat/processors/filters/null_filter.py +24 -1
  65. pipecat/processors/filters/stt_mute_filter.py +56 -21
  66. pipecat/processors/filters/wake_check_filter.py +46 -3
  67. pipecat/processors/filters/wake_notifier_filter.py +21 -3
  68. pipecat/processors/frame_processor.py +488 -131
  69. pipecat/processors/frameworks/langchain.py +38 -3
  70. pipecat/processors/frameworks/rtvi.py +719 -34
  71. pipecat/processors/gstreamer/pipeline_source.py +41 -0
  72. pipecat/processors/idle_frame_processor.py +26 -3
  73. pipecat/processors/logger.py +23 -0
  74. pipecat/processors/metrics/frame_processor_metrics.py +77 -4
  75. pipecat/processors/metrics/sentry.py +42 -4
  76. pipecat/processors/producer_processor.py +34 -14
  77. pipecat/processors/text_transformer.py +22 -10
  78. pipecat/processors/transcript_processor.py +48 -29
  79. pipecat/processors/user_idle_processor.py +31 -21
  80. pipecat/runner/__init__.py +1 -0
  81. pipecat/runner/daily.py +132 -0
  82. pipecat/runner/livekit.py +148 -0
  83. pipecat/runner/run.py +543 -0
  84. pipecat/runner/types.py +67 -0
  85. pipecat/runner/utils.py +515 -0
  86. pipecat/serializers/base_serializer.py +42 -0
  87. pipecat/serializers/exotel.py +17 -6
  88. pipecat/serializers/genesys.py +95 -0
  89. pipecat/serializers/livekit.py +33 -0
  90. pipecat/serializers/plivo.py +16 -15
  91. pipecat/serializers/protobuf.py +37 -1
  92. pipecat/serializers/telnyx.py +18 -17
  93. pipecat/serializers/twilio.py +32 -16
  94. pipecat/services/ai_service.py +5 -3
  95. pipecat/services/anthropic/llm.py +113 -43
  96. pipecat/services/assemblyai/models.py +63 -5
  97. pipecat/services/assemblyai/stt.py +64 -11
  98. pipecat/services/asyncai/__init__.py +0 -0
  99. pipecat/services/asyncai/tts.py +501 -0
  100. pipecat/services/aws/llm.py +185 -111
  101. pipecat/services/aws/stt.py +217 -23
  102. pipecat/services/aws/tts.py +118 -52
  103. pipecat/services/aws/utils.py +101 -5
  104. pipecat/services/aws_nova_sonic/aws.py +82 -64
  105. pipecat/services/aws_nova_sonic/context.py +15 -6
  106. pipecat/services/azure/common.py +10 -2
  107. pipecat/services/azure/image.py +32 -0
  108. pipecat/services/azure/llm.py +9 -7
  109. pipecat/services/azure/stt.py +65 -2
  110. pipecat/services/azure/tts.py +154 -23
  111. pipecat/services/cartesia/stt.py +125 -8
  112. pipecat/services/cartesia/tts.py +102 -38
  113. pipecat/services/cerebras/llm.py +15 -23
  114. pipecat/services/deepgram/stt.py +19 -11
  115. pipecat/services/deepgram/tts.py +36 -0
  116. pipecat/services/deepseek/llm.py +14 -23
  117. pipecat/services/elevenlabs/tts.py +330 -64
  118. pipecat/services/fal/image.py +43 -0
  119. pipecat/services/fal/stt.py +48 -10
  120. pipecat/services/fireworks/llm.py +14 -21
  121. pipecat/services/fish/tts.py +109 -9
  122. pipecat/services/gemini_multimodal_live/__init__.py +1 -0
  123. pipecat/services/gemini_multimodal_live/events.py +83 -2
  124. pipecat/services/gemini_multimodal_live/file_api.py +189 -0
  125. pipecat/services/gemini_multimodal_live/gemini.py +218 -21
  126. pipecat/services/gladia/config.py +17 -10
  127. pipecat/services/gladia/stt.py +82 -36
  128. pipecat/services/google/frames.py +40 -0
  129. pipecat/services/google/google.py +2 -0
  130. pipecat/services/google/image.py +39 -2
  131. pipecat/services/google/llm.py +176 -58
  132. pipecat/services/google/llm_openai.py +26 -4
  133. pipecat/services/google/llm_vertex.py +37 -15
  134. pipecat/services/google/rtvi.py +41 -0
  135. pipecat/services/google/stt.py +65 -17
  136. pipecat/services/google/test-google-chirp.py +45 -0
  137. pipecat/services/google/tts.py +390 -19
  138. pipecat/services/grok/llm.py +8 -6
  139. pipecat/services/groq/llm.py +8 -6
  140. pipecat/services/groq/stt.py +13 -9
  141. pipecat/services/groq/tts.py +40 -0
  142. pipecat/services/hamsa/__init__.py +9 -0
  143. pipecat/services/hamsa/stt.py +241 -0
  144. pipecat/services/heygen/__init__.py +5 -0
  145. pipecat/services/heygen/api.py +281 -0
  146. pipecat/services/heygen/client.py +620 -0
  147. pipecat/services/heygen/video.py +338 -0
  148. pipecat/services/image_service.py +5 -3
  149. pipecat/services/inworld/__init__.py +1 -0
  150. pipecat/services/inworld/tts.py +592 -0
  151. pipecat/services/llm_service.py +127 -45
  152. pipecat/services/lmnt/tts.py +80 -7
  153. pipecat/services/mcp_service.py +85 -44
  154. pipecat/services/mem0/memory.py +42 -13
  155. pipecat/services/minimax/tts.py +74 -15
  156. pipecat/services/mistral/__init__.py +0 -0
  157. pipecat/services/mistral/llm.py +185 -0
  158. pipecat/services/moondream/vision.py +55 -10
  159. pipecat/services/neuphonic/tts.py +275 -48
  160. pipecat/services/nim/llm.py +8 -6
  161. pipecat/services/ollama/llm.py +27 -7
  162. pipecat/services/openai/base_llm.py +54 -16
  163. pipecat/services/openai/image.py +30 -0
  164. pipecat/services/openai/llm.py +7 -5
  165. pipecat/services/openai/stt.py +13 -9
  166. pipecat/services/openai/tts.py +42 -10
  167. pipecat/services/openai_realtime_beta/azure.py +11 -9
  168. pipecat/services/openai_realtime_beta/context.py +7 -5
  169. pipecat/services/openai_realtime_beta/events.py +10 -7
  170. pipecat/services/openai_realtime_beta/openai.py +37 -18
  171. pipecat/services/openpipe/llm.py +30 -24
  172. pipecat/services/openrouter/llm.py +9 -7
  173. pipecat/services/perplexity/llm.py +15 -19
  174. pipecat/services/piper/tts.py +26 -12
  175. pipecat/services/playht/tts.py +227 -65
  176. pipecat/services/qwen/llm.py +8 -6
  177. pipecat/services/rime/tts.py +128 -17
  178. pipecat/services/riva/stt.py +160 -22
  179. pipecat/services/riva/tts.py +67 -2
  180. pipecat/services/sambanova/llm.py +19 -17
  181. pipecat/services/sambanova/stt.py +14 -8
  182. pipecat/services/sarvam/tts.py +60 -13
  183. pipecat/services/simli/video.py +82 -21
  184. pipecat/services/soniox/__init__.py +0 -0
  185. pipecat/services/soniox/stt.py +398 -0
  186. pipecat/services/speechmatics/stt.py +29 -17
  187. pipecat/services/stt_service.py +47 -11
  188. pipecat/services/tavus/video.py +94 -25
  189. pipecat/services/together/llm.py +8 -6
  190. pipecat/services/tts_service.py +77 -53
  191. pipecat/services/ultravox/stt.py +46 -43
  192. pipecat/services/vision_service.py +5 -3
  193. pipecat/services/websocket_service.py +12 -11
  194. pipecat/services/whisper/base_stt.py +58 -12
  195. pipecat/services/whisper/stt.py +69 -58
  196. pipecat/services/xtts/tts.py +59 -2
  197. pipecat/sync/base_notifier.py +19 -0
  198. pipecat/sync/event_notifier.py +24 -0
  199. pipecat/tests/utils.py +73 -5
  200. pipecat/transcriptions/language.py +24 -0
  201. pipecat/transports/base_input.py +112 -8
  202. pipecat/transports/base_output.py +235 -13
  203. pipecat/transports/base_transport.py +119 -0
  204. pipecat/transports/local/audio.py +76 -0
  205. pipecat/transports/local/tk.py +84 -0
  206. pipecat/transports/network/fastapi_websocket.py +174 -15
  207. pipecat/transports/network/small_webrtc.py +383 -39
  208. pipecat/transports/network/webrtc_connection.py +214 -8
  209. pipecat/transports/network/websocket_client.py +171 -1
  210. pipecat/transports/network/websocket_server.py +147 -9
  211. pipecat/transports/services/daily.py +792 -70
  212. pipecat/transports/services/helpers/daily_rest.py +122 -129
  213. pipecat/transports/services/livekit.py +339 -4
  214. pipecat/transports/services/tavus.py +273 -38
  215. pipecat/utils/asyncio/task_manager.py +92 -186
  216. pipecat/utils/base_object.py +83 -1
  217. pipecat/utils/network.py +2 -0
  218. pipecat/utils/string.py +114 -58
  219. pipecat/utils/text/base_text_aggregator.py +44 -13
  220. pipecat/utils/text/base_text_filter.py +46 -0
  221. pipecat/utils/text/markdown_text_filter.py +70 -14
  222. pipecat/utils/text/pattern_pair_aggregator.py +18 -14
  223. pipecat/utils/text/simple_text_aggregator.py +43 -2
  224. pipecat/utils/text/skip_tags_aggregator.py +21 -13
  225. pipecat/utils/time.py +36 -0
  226. pipecat/utils/tracing/class_decorators.py +32 -7
  227. pipecat/utils/tracing/conversation_context_provider.py +12 -2
  228. pipecat/utils/tracing/service_attributes.py +80 -64
  229. pipecat/utils/tracing/service_decorators.py +48 -21
  230. pipecat/utils/tracing/setup.py +13 -7
  231. pipecat/utils/tracing/turn_context_provider.py +12 -2
  232. pipecat/utils/tracing/turn_trace_observer.py +27 -0
  233. pipecat/utils/utils.py +14 -14
  234. dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
  235. pipecat/examples/daily_runner.py +0 -64
  236. pipecat/examples/run.py +0 -265
  237. pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
  238. pipecat/utils/asyncio/watchdog_event.py +0 -42
  239. pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
  240. pipecat/utils/asyncio/watchdog_queue.py +0 -48
  241. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
  242. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
  243. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
  244. /pipecat/{examples → extensions}/__init__.py +0 -0
@@ -4,7 +4,11 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
- """This module implements Tavus as a sink transport layer"""
7
+ """Tavus video service implementation for avatar-based video generation.
8
+
9
+ This module implements Tavus as a sink transport layer, providing video
10
+ avatar functionality through Tavus's streaming API.
11
+ """
8
12
 
9
13
  import asyncio
10
14
  from typing import Optional
@@ -13,41 +17,37 @@ import aiohttp
13
17
  from daily.daily import AudioData, VideoFrame
14
18
  from loguru import logger
15
19
 
16
- from pipecat.audio.utils import create_default_resampler
20
+ from pipecat.audio.utils import create_stream_resampler
17
21
  from pipecat.frames.frames import (
22
+ BotStartedSpeakingFrame,
18
23
  CancelFrame,
19
24
  EndFrame,
20
25
  Frame,
21
26
  OutputAudioRawFrame,
22
27
  OutputImageRawFrame,
28
+ OutputTransportReadyFrame,
29
+ SpeechOutputAudioRawFrame,
23
30
  StartFrame,
24
31
  StartInterruptionFrame,
25
32
  TTSAudioRawFrame,
33
+ TTSStartedFrame,
26
34
  )
27
35
  from pipecat.processors.frame_processor import FrameDirection, FrameProcessorSetup
28
36
  from pipecat.services.ai_service import AIService
29
37
  from pipecat.transports.services.tavus import TavusCallbacks, TavusParams, TavusTransportClient
30
- from pipecat.utils.asyncio.watchdog_queue import WatchdogQueue
31
38
 
32
39
 
33
40
  class TavusVideoService(AIService):
34
- """
35
- Service class that proxies audio to Tavus and receives both audio and video in return.
36
-
37
- It uses the `TavusTransportClient` to manage the session and handle communication. When
38
- audio is sent, Tavus responds with both audio and video streams, which are then routed
39
- through Pipecat’s media pipeline.
40
-
41
- In use cases such as with `DailyTransport`, this results in two distinct virtual rooms:
42
- - **Tavus room**: Contains the Tavus Avatar and the Pipecat Bot.
43
- - **User room**: Contains the Pipecat Bot and the user.
44
-
45
- Args:
46
- api_key (str): Tavus API key used for authentication.
47
- replica_id (str): ID of the Tavus voice replica to use for speech synthesis.
48
- persona_id (str): ID of the Tavus persona. Defaults to "pipecat-stream" to use the Pipecat TTS voice.
49
- session (aiohttp.ClientSession): Async HTTP session used for communication with Tavus.
50
- **kwargs: Additional arguments passed to the parent `AIService` class.
41
+ """Service that proxies audio to Tavus and receives audio and video in return.
42
+
43
+ Uses the TavusTransportClient to manage sessions and handle communication.
44
+ When audio is sent, Tavus responds with both audio and video streams, which
45
+ are routed through Pipecat's media pipeline.
46
+
47
+ In use cases with DailyTransport, this creates two distinct virtual rooms:
48
+
49
+ - Tavus room: Contains the Tavus Avatar and the Pipecat Bot
50
+ - User room: Contains the Pipecat Bot and the user
51
51
  """
52
52
 
53
53
  def __init__(
@@ -59,6 +59,15 @@ class TavusVideoService(AIService):
59
59
  session: aiohttp.ClientSession,
60
60
  **kwargs,
61
61
  ) -> None:
62
+ """Initialize the Tavus video service.
63
+
64
+ Args:
65
+ api_key: Tavus API key used for authentication.
66
+ replica_id: ID of the Tavus voice replica to use for speech synthesis.
67
+ persona_id: ID of the Tavus persona. Defaults to "pipecat-stream" for Pipecat TTS voice.
68
+ session: Async HTTP session used for communication with Tavus.
69
+ **kwargs: Additional arguments passed to the parent AIService class.
70
+ """
62
71
  super().__init__(**kwargs)
63
72
  self._api_key = api_key
64
73
  self._session = session
@@ -69,14 +78,20 @@ class TavusVideoService(AIService):
69
78
  self._client: Optional[TavusTransportClient] = None
70
79
 
71
80
  self._conversation_id: str
72
- self._resampler = create_default_resampler()
81
+ self._resampler = create_stream_resampler()
73
82
 
74
83
  self._audio_buffer = bytearray()
75
84
  self._send_task: Optional[asyncio.Task] = None
76
85
  # This is the custom track destination expected by Tavus
77
86
  self._transport_destination: Optional[str] = "stream"
87
+ self._transport_ready = False
78
88
 
79
89
  async def setup(self, setup: FrameProcessorSetup):
90
+ """Set up the Tavus video service.
91
+
92
+ Args:
93
+ setup: Frame processor setup configuration.
94
+ """
80
95
  await super().setup(setup)
81
96
  callbacks = TavusCallbacks(
82
97
  on_participant_joined=self._on_participant_joined,
@@ -99,15 +114,18 @@ class TavusVideoService(AIService):
99
114
  await self._client.setup(setup)
100
115
 
101
116
  async def cleanup(self):
117
+ """Clean up the service and release resources."""
102
118
  await super().cleanup()
103
119
  await self._client.cleanup()
104
120
  self._client = None
105
121
 
106
122
  async def _on_participant_left(self, participant, reason):
123
+ """Handle participant leaving the session."""
107
124
  participant_id = participant["id"]
108
125
  logger.info(f"Participant left {participant_id}, reason: {reason}")
109
126
 
110
127
  async def _on_participant_joined(self, participant):
128
+ """Handle participant joining the session."""
111
129
  participant_id = participant["id"]
112
130
  logger.info(f"Participant joined {participant_id}")
113
131
  if not self._other_participant_has_joined:
@@ -124,32 +142,51 @@ class TavusVideoService(AIService):
124
142
  async def _on_participant_video_frame(
125
143
  self, participant_id: str, video_frame: VideoFrame, video_source: str
126
144
  ):
145
+ """Handle incoming video frames from participants."""
127
146
  frame = OutputImageRawFrame(
128
147
  image=video_frame.buffer,
129
148
  size=(video_frame.width, video_frame.height),
130
149
  format=video_frame.color_format,
131
150
  )
132
151
  frame.transport_source = video_source
133
- await self.push_frame(frame)
152
+ if self._transport_ready:
153
+ await self.push_frame(frame)
134
154
 
135
155
  async def _on_participant_audio_data(
136
156
  self, participant_id: str, audio: AudioData, audio_source: str
137
157
  ):
138
- frame = OutputAudioRawFrame(
158
+ """Handle incoming audio data from participants."""
159
+ frame = SpeechOutputAudioRawFrame(
139
160
  audio=audio.audio_frames,
140
161
  sample_rate=audio.sample_rate,
141
162
  num_channels=audio.num_channels,
142
163
  )
143
164
  frame.transport_source = audio_source
144
- await self.push_frame(frame)
165
+ if self._transport_ready:
166
+ await self.push_frame(frame)
145
167
 
146
168
  def can_generate_metrics(self) -> bool:
169
+ """Check if this service can generate processing metrics.
170
+
171
+ Returns:
172
+ True, as Tavus service supports metrics generation.
173
+ """
147
174
  return True
148
175
 
149
176
  async def get_persona_name(self) -> str:
177
+ """Get the name of the current persona.
178
+
179
+ Returns:
180
+ The persona name from the Tavus client.
181
+ """
150
182
  return await self._client.get_persona_name()
151
183
 
152
184
  async def start(self, frame: StartFrame):
185
+ """Start the Tavus video service.
186
+
187
+ Args:
188
+ frame: The start frame containing initialization parameters.
189
+ """
153
190
  await super().start(frame)
154
191
  await self._client.start(frame)
155
192
  if self._transport_destination:
@@ -157,16 +194,32 @@ class TavusVideoService(AIService):
157
194
  await self._create_send_task()
158
195
 
159
196
  async def stop(self, frame: EndFrame):
197
+ """Stop the Tavus video service.
198
+
199
+ Args:
200
+ frame: The end frame.
201
+ """
160
202
  await super().stop(frame)
161
203
  await self._end_conversation()
162
204
  await self._cancel_send_task()
163
205
 
164
206
  async def cancel(self, frame: CancelFrame):
207
+ """Cancel the Tavus video service.
208
+
209
+ Args:
210
+ frame: The cancel frame.
211
+ """
165
212
  await super().cancel(frame)
166
213
  await self._end_conversation()
167
214
  await self._cancel_send_task()
168
215
 
169
216
  async def process_frame(self, frame: Frame, direction: FrameDirection):
217
+ """Process frames through the service.
218
+
219
+ Args:
220
+ frame: The frame to process.
221
+ direction: The direction of frame processing.
222
+ """
170
223
  await super().process_frame(frame, direction)
171
224
 
172
225
  if isinstance(frame, StartInterruptionFrame):
@@ -174,29 +227,44 @@ class TavusVideoService(AIService):
174
227
  await self.push_frame(frame, direction)
175
228
  elif isinstance(frame, TTSAudioRawFrame):
176
229
  await self._handle_audio_frame(frame)
230
+ elif isinstance(frame, OutputTransportReadyFrame):
231
+ self._transport_ready = True
232
+ await self.push_frame(frame, direction)
233
+ elif isinstance(frame, TTSStartedFrame):
234
+ await self.start_ttfb_metrics()
235
+ elif isinstance(frame, BotStartedSpeakingFrame):
236
+ # We constantly receive audio through WebRTC, but most of the time it is silence.
237
+ # As soon as we receive actual audio, the base output transport will create a
238
+ # BotStartedSpeakingFrame, which we can use as a signal for the TTFB metrics.
239
+ await self.stop_ttfb_metrics()
177
240
  else:
178
241
  await self.push_frame(frame, direction)
179
242
 
180
243
  async def _handle_interruptions(self):
244
+ """Handle interruption events by resetting send tasks and notifying client."""
181
245
  await self._cancel_send_task()
182
246
  await self._create_send_task()
183
247
  await self._client.send_interrupt_message()
184
248
 
185
249
  async def _end_conversation(self):
250
+ """End the current conversation and reset state."""
186
251
  await self._client.stop()
187
252
  self._other_participant_has_joined = False
188
253
 
189
254
  async def _create_send_task(self):
255
+ """Create the audio sending task if it doesn't exist."""
190
256
  if not self._send_task:
191
- self._queue = WatchdogQueue(self.task_manager)
257
+ self._queue = asyncio.Queue()
192
258
  self._send_task = self.create_task(self._send_task_handler())
193
259
 
194
260
  async def _cancel_send_task(self):
261
+ """Cancel the audio sending task if it exists."""
195
262
  if self._send_task:
196
263
  await self.cancel_task(self._send_task)
197
264
  self._send_task = None
198
265
 
199
266
  async def _handle_audio_frame(self, frame: OutputAudioRawFrame):
267
+ """Process audio frames for sending to Tavus."""
200
268
  sample_rate = self._client.out_sample_rate
201
269
  # 40 ms of audio
202
270
  chunk_size = int((sample_rate * 2) / 25)
@@ -215,6 +283,7 @@ class TavusVideoService(AIService):
215
283
  self._audio_buffer = self._audio_buffer[chunk_size:]
216
284
 
217
285
  async def _send_task_handler(self):
286
+ """Handle sending audio frames to the Tavus client."""
218
287
  while True:
219
288
  frame = await self._queue.get()
220
289
  if isinstance(frame, OutputAudioRawFrame) and self._client:
@@ -16,12 +16,6 @@ class TogetherLLMService(OpenAILLMService):
16
16
 
17
17
  This service extends OpenAILLMService to connect to Together.ai's API endpoint while
18
18
  maintaining full compatibility with OpenAI's interface and functionality.
19
-
20
- Args:
21
- api_key: The API key for accessing Together.ai's API.
22
- base_url: The base URL for Together.ai API. Defaults to "https://api.together.xyz/v1".
23
- model: The model identifier to use. Defaults to "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo".
24
- **kwargs: Additional keyword arguments passed to OpenAILLMService.
25
19
  """
26
20
 
27
21
  def __init__(
@@ -32,6 +26,14 @@ class TogetherLLMService(OpenAILLMService):
32
26
  model: str = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
33
27
  **kwargs,
34
28
  ):
29
+ """Initialize Together.ai LLM service.
30
+
31
+ Args:
32
+ api_key: The API key for accessing Together.ai's API.
33
+ base_url: The base URL for Together.ai API. Defaults to "https://api.together.xyz/v1".
34
+ model: The model identifier to use. Defaults to "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo".
35
+ **kwargs: Additional keyword arguments passed to OpenAILLMService.
36
+ """
35
37
  super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
36
38
 
37
39
  def create_client(self, api_key=None, base_url=None, **kwargs):
@@ -37,7 +37,6 @@ from pipecat.processors.frame_processor import FrameDirection
37
37
  from pipecat.services.ai_service import AIService
38
38
  from pipecat.services.websocket_service import WebsocketService
39
39
  from pipecat.transcriptions.language import Language
40
- from pipecat.utils.asyncio.watchdog_queue import WatchdogQueue
41
40
  from pipecat.utils.text.base_text_aggregator import BaseTextAggregator
42
41
  from pipecat.utils.text.base_text_filter import BaseTextFilter
43
42
  from pipecat.utils.text.simple_text_aggregator import SimpleTextAggregator
@@ -50,21 +49,6 @@ class TTSService(AIService):
50
49
  Provides common functionality for TTS services including text aggregation,
51
50
  filtering, audio generation, and frame management. Supports configurable
52
51
  sentence aggregation, silence insertion, and frame processing control.
53
-
54
- Args:
55
- aggregate_sentences: Whether to aggregate text into sentences before synthesis.
56
- push_text_frames: Whether to push TextFrames and LLMFullResponseEndFrames.
57
- push_stop_frames: Whether to automatically push TTSStoppedFrames.
58
- stop_frame_timeout_s: Idle time before pushing TTSStoppedFrame when push_stop_frames is True.
59
- push_silence_after_stop: Whether to push silence audio after TTSStoppedFrame.
60
- silence_time_s: Duration of silence to push when push_silence_after_stop is True.
61
- pause_frame_processing: Whether to pause frame processing during audio generation.
62
- sample_rate: Output sample rate for generated audio.
63
- text_aggregator: Custom text aggregator for processing incoming text.
64
- text_filters: Sequence of text filters to apply after aggregation.
65
- text_filter: Single text filter (deprecated, use text_filters).
66
- transport_destination: Destination for generated audio frames.
67
- **kwargs: Additional arguments passed to the parent AIService.
68
52
  """
69
53
 
70
54
  def __init__(
@@ -97,6 +81,27 @@ class TTSService(AIService):
97
81
  transport_destination: Optional[str] = None,
98
82
  **kwargs,
99
83
  ):
84
+ """Initialize the TTS service.
85
+
86
+ Args:
87
+ aggregate_sentences: Whether to aggregate text into sentences before synthesis.
88
+ push_text_frames: Whether to push TextFrames and LLMFullResponseEndFrames.
89
+ push_stop_frames: Whether to automatically push TTSStoppedFrames.
90
+ stop_frame_timeout_s: Idle time before pushing TTSStoppedFrame when push_stop_frames is True.
91
+ push_silence_after_stop: Whether to push silence audio after TTSStoppedFrame.
92
+ silence_time_s: Duration of silence to push when push_silence_after_stop is True.
93
+ pause_frame_processing: Whether to pause frame processing during audio generation.
94
+ sample_rate: Output sample rate for generated audio.
95
+ text_aggregator: Custom text aggregator for processing incoming text.
96
+ text_filters: Sequence of text filters to apply after aggregation.
97
+ text_filter: Single text filter (deprecated, use text_filters).
98
+
99
+ .. deprecated:: 0.0.59
100
+ Use `text_filters` instead, which allows multiple filters.
101
+
102
+ transport_destination: Destination for generated audio frames.
103
+ **kwargs: Additional arguments passed to the parent AIService.
104
+ """
100
105
  super().__init__(**kwargs)
101
106
  self._aggregate_sentences: bool = aggregate_sentences
102
107
  self._push_text_frames: bool = push_text_frames
@@ -112,9 +117,10 @@ class TTSService(AIService):
112
117
  self._text_aggregator: BaseTextAggregator = text_aggregator or SimpleTextAggregator()
113
118
  self._text_filters: Sequence[BaseTextFilter] = text_filters or []
114
119
  self._transport_destination: Optional[str] = transport_destination
115
-
116
120
  self._tracing_enabled: bool = False
117
-
121
+ self._voice_config: Dict[str, Any] = {}
122
+ self._voice = None
123
+ self._voice_clone_params = None
118
124
 
119
125
  if text_filter:
120
126
  import warnings
@@ -225,6 +231,7 @@ class TTSService(AIService):
225
231
  self._sample_rate = self._init_sample_rate or frame.audio_out_sample_rate
226
232
  if self._push_stop_frames and not self._stop_frame_task:
227
233
  self._stop_frame_task = self.create_task(self._stop_frame_handler())
234
+ self._tracing_enabled = frame.enable_tracing
228
235
 
229
236
  async def stop(self, frame: EndFrame):
230
237
  """Stop the TTS service.
@@ -257,7 +264,7 @@ class TTSService(AIService):
257
264
  self._settings[key] = self.language_to_service_language(value)
258
265
  elif key == "model":
259
266
  self.set_model_name(value)
260
- elif key == "voice":
267
+ elif key == "voice" or key == "voice_id":
261
268
  self.set_voice(value)
262
269
  elif key == "text_filter":
263
270
  for filter in self._text_filters:
@@ -268,9 +275,20 @@ class TTSService(AIService):
268
275
  async def say(self, text: str):
269
276
  """Immediately speak the provided text.
270
277
 
278
+ .. deprecated:: 0.0.79
279
+ Push a `TTSSpeakFrame` instead to ensure frame ordering is maintained.
280
+
271
281
  Args:
272
282
  text: The text to speak.
273
283
  """
284
+ import warnings
285
+
286
+ warnings.warn(
287
+ "`TTSService.say()` is deprecated. Push a `TTSSpeakFrame` instead.",
288
+ DeprecationWarning,
289
+ stacklevel=2,
290
+ )
291
+
274
292
  await self.queue_frame(TTSSpeakFrame(text))
275
293
 
276
294
  async def process_frame(self, frame: Frame, direction: FrameDirection):
@@ -433,7 +451,7 @@ class TTSService(AIService):
433
451
  while True:
434
452
  try:
435
453
  frame = await asyncio.wait_for(
436
- self._stop_frame_queue.get(), self._stop_frame_timeout_s
454
+ self._stop_frame_queue.get(), timeout=self._stop_frame_timeout_s
437
455
  )
438
456
  if isinstance(frame, TTSStartedFrame):
439
457
  has_started = True
@@ -443,8 +461,6 @@ class TTSService(AIService):
443
461
  if has_started:
444
462
  await self.push_frame(TTSStoppedFrame())
445
463
  has_started = False
446
- finally:
447
- self.reset_watchdog()
448
464
 
449
465
 
450
466
  class WordTTSService(TTSService):
@@ -452,12 +468,14 @@ class WordTTSService(TTSService):
452
468
 
453
469
  Word timestamps are useful to synchronize audio with text of the spoken
454
470
  words. This way only the spoken words are added to the conversation context.
455
-
456
- Args:
457
- **kwargs: Additional arguments passed to the parent TTSService.
458
471
  """
459
472
 
460
473
  def __init__(self, **kwargs):
474
+ """Initialize the Word TTS service.
475
+
476
+ Args:
477
+ **kwargs: Additional arguments passed to the parent TTSService.
478
+ """
461
479
  super().__init__(**kwargs)
462
480
  self._initial_word_timestamp = -1
463
481
  self._words_task = None
@@ -529,7 +547,7 @@ class WordTTSService(TTSService):
529
547
 
530
548
  def _create_words_task(self):
531
549
  if not self._words_task:
532
- self._words_queue = WatchdogQueue(self.task_manager)
550
+ self._words_queue = asyncio.Queue()
533
551
  self._words_task = self.create_task(self._words_task_handler())
534
552
 
535
553
  async def _stop_words_task(self):
@@ -566,22 +584,23 @@ class WebsocketTTSService(TTSService, WebsocketService):
566
584
  Combines TTS functionality with websocket connectivity, providing automatic
567
585
  error handling and reconnection capabilities.
568
586
 
569
- Args:
570
- reconnect_on_error: Whether to automatically reconnect on websocket errors.
571
- **kwargs: Additional arguments passed to parent classes.
572
-
573
587
  Event handlers:
574
588
  on_connection_error: Called when a websocket connection error occurs.
575
589
 
576
- Example:
577
- ```python
590
+ Example::
591
+
578
592
  @tts.event_handler("on_connection_error")
579
593
  async def on_connection_error(tts: TTSService, error: str):
580
594
  logger.error(f"TTS connection error: {error}")
581
- ```
582
595
  """
583
596
 
584
597
  def __init__(self, *, reconnect_on_error: bool = True, **kwargs):
598
+ """Initialize the Websocket TTS service.
599
+
600
+ Args:
601
+ reconnect_on_error: Whether to automatically reconnect on websocket errors.
602
+ **kwargs: Additional arguments passed to parent classes.
603
+ """
585
604
  TTSService.__init__(self, **kwargs)
586
605
  WebsocketService.__init__(self, reconnect_on_error=reconnect_on_error, **kwargs)
587
606
  self._register_event_handler("on_connection_error")
@@ -596,12 +615,14 @@ class InterruptibleTTSService(WebsocketTTSService):
596
615
 
597
616
  Designed for TTS services that don't support word timestamps. Handles interruptions
598
617
  by reconnecting the websocket when the bot is speaking and gets interrupted.
599
-
600
- Args:
601
- **kwargs: Additional arguments passed to the parent WebsocketTTSService.
602
618
  """
603
619
 
604
620
  def __init__(self, **kwargs):
621
+ """Initialize the Interruptible TTS service.
622
+
623
+ Args:
624
+ **kwargs: Additional arguments passed to the parent WebsocketTTSService.
625
+ """
605
626
  super().__init__(**kwargs)
606
627
 
607
628
  # Indicates if the bot is speaking. If the bot is not speaking we don't
@@ -635,22 +656,23 @@ class WebsocketWordTTSService(WordTTSService, WebsocketService):
635
656
 
636
657
  Combines word timestamp functionality with websocket connectivity.
637
658
 
638
- Args:
639
- reconnect_on_error: Whether to automatically reconnect on websocket errors.
640
- **kwargs: Additional arguments passed to parent classes.
641
-
642
659
  Event handlers:
643
660
  on_connection_error: Called when a websocket connection error occurs.
644
661
 
645
- Example:
646
- ```python
662
+ Example::
663
+
647
664
  @tts.event_handler("on_connection_error")
648
665
  async def on_connection_error(tts: TTSService, error: str):
649
666
  logger.error(f"TTS connection error: {error}")
650
- ```
651
667
  """
652
668
 
653
669
  def __init__(self, *, reconnect_on_error: bool = True, **kwargs):
670
+ """Initialize the Websocket Word TTS service.
671
+
672
+ Args:
673
+ reconnect_on_error: Whether to automatically reconnect on websocket errors.
674
+ **kwargs: Additional arguments passed to parent classes.
675
+ """
654
676
  WordTTSService.__init__(self, **kwargs)
655
677
  WebsocketService.__init__(self, reconnect_on_error=reconnect_on_error, **kwargs)
656
678
  self._register_event_handler("on_connection_error")
@@ -665,12 +687,14 @@ class InterruptibleWordTTSService(WebsocketWordTTSService):
665
687
 
666
688
  For TTS services that support word timestamps but can't correlate generated
667
689
  audio with requested text. Handles interruptions by reconnecting when needed.
668
-
669
- Args:
670
- **kwargs: Additional arguments passed to the parent WebsocketWordTTSService.
671
690
  """
672
691
 
673
692
  def __init__(self, **kwargs):
693
+ """Initialize the Interruptible Word TTS service.
694
+
695
+ Args:
696
+ **kwargs: Additional arguments passed to the parent WebsocketWordTTSService.
697
+ """
674
698
  super().__init__(**kwargs)
675
699
 
676
700
  # Indicates if the bot is speaking. If the bot is not speaking we don't
@@ -713,12 +737,14 @@ class AudioContextWordTTSService(WebsocketWordTTSService):
713
737
  The audio received from the TTS will be played in context order. That is, if
714
738
  we requested audio for a context "A" and then audio for context "B", the
715
739
  audio from context ID "A" will be played first.
716
-
717
- Args:
718
- **kwargs: Additional arguments passed to the parent WebsocketWordTTSService.
719
740
  """
720
741
 
721
742
  def __init__(self, **kwargs):
743
+ """Initialize the Audio Context Word TTS service.
744
+
745
+ Args:
746
+ **kwargs: Additional arguments passed to the parent WebsocketWordTTSService.
747
+ """
722
748
  super().__init__(**kwargs)
723
749
  self._contexts: Dict[str, asyncio.Queue] = {}
724
750
  self._audio_context_task = None
@@ -792,7 +818,7 @@ class AudioContextWordTTSService(WebsocketWordTTSService):
792
818
  # Indicate no more audio contexts are available. this will end the
793
819
  # task cleanly after all contexts have been processed.
794
820
  await self._contexts_queue.put(None)
795
- await self.wait_for_task(self._audio_context_task)
821
+ await self._audio_context_task
796
822
  self._audio_context_task = None
797
823
 
798
824
  async def cancel(self, frame: CancelFrame):
@@ -811,7 +837,7 @@ class AudioContextWordTTSService(WebsocketWordTTSService):
811
837
 
812
838
  def _create_audio_context_task(self):
813
839
  if not self._audio_context_task:
814
- self._contexts_queue = WatchdogQueue(self.task_manager)
840
+ self._contexts_queue = asyncio.Queue()
815
841
  self._contexts: Dict[str, asyncio.Queue] = {}
816
842
  self._audio_context_task = self.create_task(self._audio_context_task_handler())
817
843
 
@@ -853,12 +879,10 @@ class AudioContextWordTTSService(WebsocketWordTTSService):
853
879
  while running:
854
880
  try:
855
881
  frame = await asyncio.wait_for(queue.get(), timeout=AUDIO_CONTEXT_TIMEOUT)
856
- self.reset_watchdog()
857
882
  if frame:
858
883
  await self.push_frame(frame)
859
884
  running = frame is not None
860
885
  except asyncio.TimeoutError:
861
- self.reset_watchdog()
862
886
  # We didn't get audio, so let's consider this context finished.
863
887
  logger.trace(f"{self} time out on audio context {context_id}")
864
888
  break