dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show
  1. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
  2. dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
  3. pipecat/__init__.py +17 -0
  4. pipecat/adapters/base_llm_adapter.py +36 -1
  5. pipecat/adapters/schemas/direct_function.py +296 -0
  6. pipecat/adapters/schemas/function_schema.py +15 -6
  7. pipecat/adapters/schemas/tools_schema.py +55 -7
  8. pipecat/adapters/services/anthropic_adapter.py +22 -3
  9. pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
  10. pipecat/adapters/services/bedrock_adapter.py +22 -3
  11. pipecat/adapters/services/gemini_adapter.py +16 -3
  12. pipecat/adapters/services/open_ai_adapter.py +17 -2
  13. pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
  14. pipecat/audio/filters/base_audio_filter.py +30 -6
  15. pipecat/audio/filters/koala_filter.py +37 -2
  16. pipecat/audio/filters/krisp_filter.py +59 -6
  17. pipecat/audio/filters/noisereduce_filter.py +37 -0
  18. pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
  19. pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
  20. pipecat/audio/mixers/base_audio_mixer.py +30 -7
  21. pipecat/audio/mixers/soundfile_mixer.py +53 -6
  22. pipecat/audio/resamplers/base_audio_resampler.py +17 -9
  23. pipecat/audio/resamplers/resampy_resampler.py +26 -1
  24. pipecat/audio/resamplers/soxr_resampler.py +32 -1
  25. pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
  26. pipecat/audio/utils.py +194 -1
  27. pipecat/audio/vad/silero.py +60 -3
  28. pipecat/audio/vad/vad_analyzer.py +114 -30
  29. pipecat/clocks/base_clock.py +19 -0
  30. pipecat/clocks/system_clock.py +25 -0
  31. pipecat/extensions/voicemail/__init__.py +0 -0
  32. pipecat/extensions/voicemail/voicemail_detector.py +707 -0
  33. pipecat/frames/frames.py +590 -156
  34. pipecat/metrics/metrics.py +64 -1
  35. pipecat/observers/base_observer.py +58 -19
  36. pipecat/observers/loggers/debug_log_observer.py +56 -64
  37. pipecat/observers/loggers/llm_log_observer.py +8 -1
  38. pipecat/observers/loggers/transcription_log_observer.py +19 -7
  39. pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
  40. pipecat/observers/turn_tracking_observer.py +26 -1
  41. pipecat/pipeline/base_pipeline.py +5 -7
  42. pipecat/pipeline/base_task.py +52 -9
  43. pipecat/pipeline/parallel_pipeline.py +121 -177
  44. pipecat/pipeline/pipeline.py +129 -20
  45. pipecat/pipeline/runner.py +50 -1
  46. pipecat/pipeline/sync_parallel_pipeline.py +132 -32
  47. pipecat/pipeline/task.py +263 -280
  48. pipecat/pipeline/task_observer.py +85 -34
  49. pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
  50. pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
  51. pipecat/processors/aggregators/gated.py +25 -24
  52. pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
  53. pipecat/processors/aggregators/llm_response.py +398 -89
  54. pipecat/processors/aggregators/openai_llm_context.py +161 -13
  55. pipecat/processors/aggregators/sentence.py +25 -14
  56. pipecat/processors/aggregators/user_response.py +28 -3
  57. pipecat/processors/aggregators/vision_image_frame.py +24 -14
  58. pipecat/processors/async_generator.py +28 -0
  59. pipecat/processors/audio/audio_buffer_processor.py +78 -37
  60. pipecat/processors/consumer_processor.py +25 -6
  61. pipecat/processors/filters/frame_filter.py +23 -0
  62. pipecat/processors/filters/function_filter.py +30 -0
  63. pipecat/processors/filters/identity_filter.py +17 -2
  64. pipecat/processors/filters/null_filter.py +24 -1
  65. pipecat/processors/filters/stt_mute_filter.py +56 -21
  66. pipecat/processors/filters/wake_check_filter.py +46 -3
  67. pipecat/processors/filters/wake_notifier_filter.py +21 -3
  68. pipecat/processors/frame_processor.py +488 -131
  69. pipecat/processors/frameworks/langchain.py +38 -3
  70. pipecat/processors/frameworks/rtvi.py +719 -34
  71. pipecat/processors/gstreamer/pipeline_source.py +41 -0
  72. pipecat/processors/idle_frame_processor.py +26 -3
  73. pipecat/processors/logger.py +23 -0
  74. pipecat/processors/metrics/frame_processor_metrics.py +77 -4
  75. pipecat/processors/metrics/sentry.py +42 -4
  76. pipecat/processors/producer_processor.py +34 -14
  77. pipecat/processors/text_transformer.py +22 -10
  78. pipecat/processors/transcript_processor.py +48 -29
  79. pipecat/processors/user_idle_processor.py +31 -21
  80. pipecat/runner/__init__.py +1 -0
  81. pipecat/runner/daily.py +132 -0
  82. pipecat/runner/livekit.py +148 -0
  83. pipecat/runner/run.py +543 -0
  84. pipecat/runner/types.py +67 -0
  85. pipecat/runner/utils.py +515 -0
  86. pipecat/serializers/base_serializer.py +42 -0
  87. pipecat/serializers/exotel.py +17 -6
  88. pipecat/serializers/genesys.py +95 -0
  89. pipecat/serializers/livekit.py +33 -0
  90. pipecat/serializers/plivo.py +16 -15
  91. pipecat/serializers/protobuf.py +37 -1
  92. pipecat/serializers/telnyx.py +18 -17
  93. pipecat/serializers/twilio.py +32 -16
  94. pipecat/services/ai_service.py +5 -3
  95. pipecat/services/anthropic/llm.py +113 -43
  96. pipecat/services/assemblyai/models.py +63 -5
  97. pipecat/services/assemblyai/stt.py +64 -11
  98. pipecat/services/asyncai/__init__.py +0 -0
  99. pipecat/services/asyncai/tts.py +501 -0
  100. pipecat/services/aws/llm.py +185 -111
  101. pipecat/services/aws/stt.py +217 -23
  102. pipecat/services/aws/tts.py +118 -52
  103. pipecat/services/aws/utils.py +101 -5
  104. pipecat/services/aws_nova_sonic/aws.py +82 -64
  105. pipecat/services/aws_nova_sonic/context.py +15 -6
  106. pipecat/services/azure/common.py +10 -2
  107. pipecat/services/azure/image.py +32 -0
  108. pipecat/services/azure/llm.py +9 -7
  109. pipecat/services/azure/stt.py +65 -2
  110. pipecat/services/azure/tts.py +154 -23
  111. pipecat/services/cartesia/stt.py +125 -8
  112. pipecat/services/cartesia/tts.py +102 -38
  113. pipecat/services/cerebras/llm.py +15 -23
  114. pipecat/services/deepgram/stt.py +19 -11
  115. pipecat/services/deepgram/tts.py +36 -0
  116. pipecat/services/deepseek/llm.py +14 -23
  117. pipecat/services/elevenlabs/tts.py +330 -64
  118. pipecat/services/fal/image.py +43 -0
  119. pipecat/services/fal/stt.py +48 -10
  120. pipecat/services/fireworks/llm.py +14 -21
  121. pipecat/services/fish/tts.py +109 -9
  122. pipecat/services/gemini_multimodal_live/__init__.py +1 -0
  123. pipecat/services/gemini_multimodal_live/events.py +83 -2
  124. pipecat/services/gemini_multimodal_live/file_api.py +189 -0
  125. pipecat/services/gemini_multimodal_live/gemini.py +218 -21
  126. pipecat/services/gladia/config.py +17 -10
  127. pipecat/services/gladia/stt.py +82 -36
  128. pipecat/services/google/frames.py +40 -0
  129. pipecat/services/google/google.py +2 -0
  130. pipecat/services/google/image.py +39 -2
  131. pipecat/services/google/llm.py +176 -58
  132. pipecat/services/google/llm_openai.py +26 -4
  133. pipecat/services/google/llm_vertex.py +37 -15
  134. pipecat/services/google/rtvi.py +41 -0
  135. pipecat/services/google/stt.py +65 -17
  136. pipecat/services/google/test-google-chirp.py +45 -0
  137. pipecat/services/google/tts.py +390 -19
  138. pipecat/services/grok/llm.py +8 -6
  139. pipecat/services/groq/llm.py +8 -6
  140. pipecat/services/groq/stt.py +13 -9
  141. pipecat/services/groq/tts.py +40 -0
  142. pipecat/services/hamsa/__init__.py +9 -0
  143. pipecat/services/hamsa/stt.py +241 -0
  144. pipecat/services/heygen/__init__.py +5 -0
  145. pipecat/services/heygen/api.py +281 -0
  146. pipecat/services/heygen/client.py +620 -0
  147. pipecat/services/heygen/video.py +338 -0
  148. pipecat/services/image_service.py +5 -3
  149. pipecat/services/inworld/__init__.py +1 -0
  150. pipecat/services/inworld/tts.py +592 -0
  151. pipecat/services/llm_service.py +127 -45
  152. pipecat/services/lmnt/tts.py +80 -7
  153. pipecat/services/mcp_service.py +85 -44
  154. pipecat/services/mem0/memory.py +42 -13
  155. pipecat/services/minimax/tts.py +74 -15
  156. pipecat/services/mistral/__init__.py +0 -0
  157. pipecat/services/mistral/llm.py +185 -0
  158. pipecat/services/moondream/vision.py +55 -10
  159. pipecat/services/neuphonic/tts.py +275 -48
  160. pipecat/services/nim/llm.py +8 -6
  161. pipecat/services/ollama/llm.py +27 -7
  162. pipecat/services/openai/base_llm.py +54 -16
  163. pipecat/services/openai/image.py +30 -0
  164. pipecat/services/openai/llm.py +7 -5
  165. pipecat/services/openai/stt.py +13 -9
  166. pipecat/services/openai/tts.py +42 -10
  167. pipecat/services/openai_realtime_beta/azure.py +11 -9
  168. pipecat/services/openai_realtime_beta/context.py +7 -5
  169. pipecat/services/openai_realtime_beta/events.py +10 -7
  170. pipecat/services/openai_realtime_beta/openai.py +37 -18
  171. pipecat/services/openpipe/llm.py +30 -24
  172. pipecat/services/openrouter/llm.py +9 -7
  173. pipecat/services/perplexity/llm.py +15 -19
  174. pipecat/services/piper/tts.py +26 -12
  175. pipecat/services/playht/tts.py +227 -65
  176. pipecat/services/qwen/llm.py +8 -6
  177. pipecat/services/rime/tts.py +128 -17
  178. pipecat/services/riva/stt.py +160 -22
  179. pipecat/services/riva/tts.py +67 -2
  180. pipecat/services/sambanova/llm.py +19 -17
  181. pipecat/services/sambanova/stt.py +14 -8
  182. pipecat/services/sarvam/tts.py +60 -13
  183. pipecat/services/simli/video.py +82 -21
  184. pipecat/services/soniox/__init__.py +0 -0
  185. pipecat/services/soniox/stt.py +398 -0
  186. pipecat/services/speechmatics/stt.py +29 -17
  187. pipecat/services/stt_service.py +47 -11
  188. pipecat/services/tavus/video.py +94 -25
  189. pipecat/services/together/llm.py +8 -6
  190. pipecat/services/tts_service.py +77 -53
  191. pipecat/services/ultravox/stt.py +46 -43
  192. pipecat/services/vision_service.py +5 -3
  193. pipecat/services/websocket_service.py +12 -11
  194. pipecat/services/whisper/base_stt.py +58 -12
  195. pipecat/services/whisper/stt.py +69 -58
  196. pipecat/services/xtts/tts.py +59 -2
  197. pipecat/sync/base_notifier.py +19 -0
  198. pipecat/sync/event_notifier.py +24 -0
  199. pipecat/tests/utils.py +73 -5
  200. pipecat/transcriptions/language.py +24 -0
  201. pipecat/transports/base_input.py +112 -8
  202. pipecat/transports/base_output.py +235 -13
  203. pipecat/transports/base_transport.py +119 -0
  204. pipecat/transports/local/audio.py +76 -0
  205. pipecat/transports/local/tk.py +84 -0
  206. pipecat/transports/network/fastapi_websocket.py +174 -15
  207. pipecat/transports/network/small_webrtc.py +383 -39
  208. pipecat/transports/network/webrtc_connection.py +214 -8
  209. pipecat/transports/network/websocket_client.py +171 -1
  210. pipecat/transports/network/websocket_server.py +147 -9
  211. pipecat/transports/services/daily.py +792 -70
  212. pipecat/transports/services/helpers/daily_rest.py +122 -129
  213. pipecat/transports/services/livekit.py +339 -4
  214. pipecat/transports/services/tavus.py +273 -38
  215. pipecat/utils/asyncio/task_manager.py +92 -186
  216. pipecat/utils/base_object.py +83 -1
  217. pipecat/utils/network.py +2 -0
  218. pipecat/utils/string.py +114 -58
  219. pipecat/utils/text/base_text_aggregator.py +44 -13
  220. pipecat/utils/text/base_text_filter.py +46 -0
  221. pipecat/utils/text/markdown_text_filter.py +70 -14
  222. pipecat/utils/text/pattern_pair_aggregator.py +18 -14
  223. pipecat/utils/text/simple_text_aggregator.py +43 -2
  224. pipecat/utils/text/skip_tags_aggregator.py +21 -13
  225. pipecat/utils/time.py +36 -0
  226. pipecat/utils/tracing/class_decorators.py +32 -7
  227. pipecat/utils/tracing/conversation_context_provider.py +12 -2
  228. pipecat/utils/tracing/service_attributes.py +80 -64
  229. pipecat/utils/tracing/service_decorators.py +48 -21
  230. pipecat/utils/tracing/setup.py +13 -7
  231. pipecat/utils/tracing/turn_context_provider.py +12 -2
  232. pipecat/utils/tracing/turn_trace_observer.py +27 -0
  233. pipecat/utils/utils.py +14 -14
  234. dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
  235. pipecat/examples/daily_runner.py +0 -64
  236. pipecat/examples/run.py +0 -265
  237. pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
  238. pipecat/utils/asyncio/watchdog_event.py +0 -42
  239. pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
  240. pipecat/utils/asyncio/watchdog_queue.py +0 -48
  241. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
  242. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
  243. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
  244. /pipecat/{examples → extensions}/__init__.py +0 -0
@@ -4,14 +4,20 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """PlayHT text-to-speech service implementations.
8
+
9
+ This module provides integration with PlayHT's text-to-speech API
10
+ supporting both WebSocket streaming and HTTP-based synthesis.
11
+ """
12
+
7
13
  import io
8
14
  import json
9
15
  import struct
10
16
  import uuid
17
+ import warnings
11
18
  from typing import AsyncGenerator, Optional
12
19
 
13
20
  import aiohttp
14
- import websockets
15
21
  from loguru import logger
16
22
  from pydantic import BaseModel
17
23
 
@@ -32,16 +38,23 @@ from pipecat.transcriptions.language import Language
32
38
  from pipecat.utils.tracing.service_decorators import traced_tts
33
39
 
34
40
  try:
35
- from pyht.async_client import AsyncClient
36
- from pyht.client import Format, TTSOptions
37
- from pyht.client import Language as PlayHTLanguage
41
+ from websockets.asyncio.client import connect as websocket_connect
42
+ from websockets.protocol import State
38
43
  except ModuleNotFoundError as e:
39
44
  logger.error(f"Exception: {e}")
40
- logger.error("In order to use PlayHT, you need to `pip install pipecat-ai[playht]`.")
45
+ logger.error("In order to use PlayHTTTSService, you need to `pip install pipecat-ai[playht]`.")
41
46
  raise Exception(f"Missing module: {e}")
42
47
 
43
48
 
44
49
  def language_to_playht_language(language: Language) -> Optional[str]:
50
+ """Convert a Language enum to PlayHT language code.
51
+
52
+ Args:
53
+ language: The Language enum value to convert.
54
+
55
+ Returns:
56
+ The corresponding PlayHT language code, or None if not supported.
57
+ """
45
58
  BASE_LANGUAGES = {
46
59
  Language.AF: "afrikans",
47
60
  Language.AM: "amharic",
@@ -96,7 +109,22 @@ def language_to_playht_language(language: Language) -> Optional[str]:
96
109
 
97
110
 
98
111
  class PlayHTTTSService(InterruptibleTTSService):
112
+ """PlayHT WebSocket-based text-to-speech service.
113
+
114
+ Provides real-time text-to-speech synthesis using PlayHT's WebSocket API.
115
+ Supports streaming audio generation with configurable voice engines and
116
+ language settings.
117
+ """
118
+
99
119
  class InputParams(BaseModel):
120
+ """Input parameters for PlayHT TTS configuration.
121
+
122
+ Parameters:
123
+ language: Language for synthesis. Defaults to English.
124
+ speed: Speech speed multiplier. Defaults to 1.0.
125
+ seed: Random seed for voice consistency.
126
+ """
127
+
100
128
  language: Optional[Language] = Language.EN
101
129
  speed: Optional[float] = 1.0
102
130
  seed: Optional[int] = None
@@ -113,6 +141,18 @@ class PlayHTTTSService(InterruptibleTTSService):
113
141
  params: Optional[InputParams] = None,
114
142
  **kwargs,
115
143
  ):
144
+ """Initialize the PlayHT WebSocket TTS service.
145
+
146
+ Args:
147
+ api_key: PlayHT API key for authentication.
148
+ user_id: PlayHT user ID for authentication.
149
+ voice_url: URL of the voice to use for synthesis.
150
+ voice_engine: Voice engine to use. Defaults to "Play3.0-mini".
151
+ sample_rate: Audio sample rate. If None, uses default.
152
+ output_format: Audio output format. Defaults to "wav".
153
+ params: Additional input parameters for voice customization.
154
+ **kwargs: Additional arguments passed to parent InterruptibleTTSService.
155
+ """
116
156
  super().__init__(
117
157
  pause_frame_processing=True,
118
158
  sample_rate=sample_rate,
@@ -140,30 +180,60 @@ class PlayHTTTSService(InterruptibleTTSService):
140
180
  self.set_voice(voice_url)
141
181
 
142
182
  def can_generate_metrics(self) -> bool:
183
+ """Check if this service can generate processing metrics.
184
+
185
+ Returns:
186
+ True, as PlayHT service supports metrics generation.
187
+ """
143
188
  return True
144
189
 
145
190
  def language_to_service_language(self, language: Language) -> Optional[str]:
191
+ """Convert a Language enum to PlayHT service language format.
192
+
193
+ Args:
194
+ language: The language to convert.
195
+
196
+ Returns:
197
+ The PlayHT-specific language code, or None if not supported.
198
+ """
146
199
  return language_to_playht_language(language)
147
200
 
148
201
  async def start(self, frame: StartFrame):
202
+ """Start the PlayHT TTS service.
203
+
204
+ Args:
205
+ frame: The start frame containing initialization parameters.
206
+ """
149
207
  await super().start(frame)
150
208
  await self._connect()
151
209
 
152
210
  async def stop(self, frame: EndFrame):
211
+ """Stop the PlayHT TTS service.
212
+
213
+ Args:
214
+ frame: The end frame.
215
+ """
153
216
  await super().stop(frame)
154
217
  await self._disconnect()
155
218
 
156
219
  async def cancel(self, frame: CancelFrame):
220
+ """Cancel the PlayHT TTS service.
221
+
222
+ Args:
223
+ frame: The cancel frame.
224
+ """
157
225
  await super().cancel(frame)
158
226
  await self._disconnect()
159
227
 
160
228
  async def _connect(self):
229
+ """Connect to PlayHT WebSocket and start receive task."""
161
230
  await self._connect_websocket()
162
231
 
163
232
  if self._websocket and not self._receive_task:
164
233
  self._receive_task = self.create_task(self._receive_task_handler(self._report_error))
165
234
 
166
235
  async def _disconnect(self):
236
+ """Disconnect from PlayHT WebSocket and clean up tasks."""
167
237
  if self._receive_task:
168
238
  await self.cancel_task(self._receive_task)
169
239
  self._receive_task = None
@@ -171,8 +241,9 @@ class PlayHTTTSService(InterruptibleTTSService):
171
241
  await self._disconnect_websocket()
172
242
 
173
243
  async def _connect_websocket(self):
244
+ """Connect to PlayHT websocket."""
174
245
  try:
175
- if self._websocket and self._websocket.open:
246
+ if self._websocket and self._websocket.state is State.OPEN:
176
247
  return
177
248
 
178
249
  logger.debug("Connecting to PlayHT")
@@ -183,7 +254,7 @@ class PlayHTTTSService(InterruptibleTTSService):
183
254
  if not isinstance(self._websocket_url, str):
184
255
  raise ValueError("WebSocket URL is not a string")
185
256
 
186
- self._websocket = await websockets.connect(self._websocket_url)
257
+ self._websocket = await websocket_connect(self._websocket_url)
187
258
  except ValueError as e:
188
259
  logger.error(f"{self} initialization error: {e}")
189
260
  self._websocket = None
@@ -194,6 +265,7 @@ class PlayHTTTSService(InterruptibleTTSService):
194
265
  await self._call_event_handler("on_connection_error", f"{e}")
195
266
 
196
267
  async def _disconnect_websocket(self):
268
+ """Disconnect from PlayHT websocket."""
197
269
  try:
198
270
  await self.stop_all_metrics()
199
271
 
@@ -207,6 +279,7 @@ class PlayHTTTSService(InterruptibleTTSService):
207
279
  self._websocket = None
208
280
 
209
281
  async def _get_websocket_url(self):
282
+ """Retrieve WebSocket URL from PlayHT API."""
210
283
  async with aiohttp.ClientSession() as session:
211
284
  async with session.post(
212
285
  "https://api.play.ht/api/v4/websocket-auth",
@@ -235,16 +308,19 @@ class PlayHTTTSService(InterruptibleTTSService):
235
308
  raise Exception(f"Failed to get WebSocket URL: {response.status}")
236
309
 
237
310
  def _get_websocket(self):
311
+ """Get the WebSocket connection if available."""
238
312
  if self._websocket:
239
313
  return self._websocket
240
314
  raise Exception("Websocket not connected")
241
315
 
242
316
  async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
317
+ """Handle interruption by stopping metrics and clearing request ID."""
243
318
  await super()._handle_interruption(frame, direction)
244
319
  await self.stop_all_metrics()
245
320
  self._request_id = None
246
321
 
247
322
  async def _receive_messages(self):
323
+ """Receive messages from PlayHT websocket."""
248
324
  async for message in self._get_websocket():
249
325
  if isinstance(message, bytes):
250
326
  # Skip the WAV header message
@@ -273,11 +349,19 @@ class PlayHTTTSService(InterruptibleTTSService):
273
349
 
274
350
  @traced_tts
275
351
  async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
352
+ """Generate TTS audio from text using PlayHT's WebSocket API.
353
+
354
+ Args:
355
+ text: The text to synthesize into speech.
356
+
357
+ Yields:
358
+ Frame: Audio frames containing the synthesized speech.
359
+ """
276
360
  logger.debug(f"{self}: Generating TTS [{text}]")
277
361
 
278
362
  try:
279
363
  # Reconnect if the websocket is closed
280
- if not self._websocket or self._websocket.closed:
364
+ if not self._websocket or self._websocket.state is State.CLOSED:
281
365
  await self._connect()
282
366
 
283
367
  if not self._request_id:
@@ -316,7 +400,22 @@ class PlayHTTTSService(InterruptibleTTSService):
316
400
 
317
401
 
318
402
  class PlayHTHttpTTSService(TTSService):
403
+ """PlayHT HTTP-based text-to-speech service.
404
+
405
+ Provides text-to-speech synthesis using PlayHT's HTTP API for simpler,
406
+ non-streaming synthesis. Suitable for use cases where streaming is not
407
+ required and simpler integration is preferred.
408
+ """
409
+
319
410
  class InputParams(BaseModel):
411
+ """Input parameters for PlayHT HTTP TTS configuration.
412
+
413
+ Parameters:
414
+ language: Language for synthesis. Defaults to English.
415
+ speed: Speech speed multiplier. Defaults to 1.0.
416
+ seed: Random seed for voice consistency.
417
+ """
418
+
320
419
  language: Optional[Language] = Language.EN
321
420
  speed: Optional[float] = 1.0
322
421
  seed: Optional[int] = None
@@ -328,40 +427,59 @@ class PlayHTHttpTTSService(TTSService):
328
427
  user_id: str,
329
428
  voice_url: str,
330
429
  voice_engine: str = "Play3.0-mini",
331
- protocol: str = "http", # Options: http, ws
430
+ protocol: Optional[str] = None,
431
+ output_format: str = "wav",
332
432
  sample_rate: Optional[int] = None,
333
433
  params: Optional[InputParams] = None,
334
434
  **kwargs,
335
435
  ):
436
+ """Initialize the PlayHT HTTP TTS service.
437
+
438
+ Args:
439
+ api_key: PlayHT API key for authentication.
440
+ user_id: PlayHT user ID for authentication.
441
+ voice_url: URL of the voice to use for synthesis.
442
+ voice_engine: Voice engine to use. Defaults to "Play3.0-mini".
443
+ protocol: Protocol to use ("http" or "ws").
444
+
445
+ .. deprecated:: 0.0.80
446
+ This parameter no longer has any effect and will be removed in a future version.
447
+ Use PlayHTTTSService for WebSocket or PlayHTHttpTTSService for HTTP.
448
+
449
+ output_format: Audio output format. Defaults to "wav".
450
+ sample_rate: Audio sample rate. If None, uses default.
451
+ params: Additional input parameters for voice customization.
452
+ **kwargs: Additional arguments passed to parent TTSService.
453
+ """
336
454
  super().__init__(sample_rate=sample_rate, **kwargs)
337
455
 
456
+ # Warn about deprecated protocol parameter if explicitly provided
457
+ if protocol:
458
+ warnings.warn(
459
+ "The 'protocol' parameter is deprecated and will be removed in a future version.",
460
+ DeprecationWarning,
461
+ stacklevel=2,
462
+ )
463
+
338
464
  params = params or PlayHTHttpTTSService.InputParams()
339
465
 
340
466
  self._user_id = user_id
341
467
  self._api_key = api_key
342
468
 
343
- self._client = AsyncClient(
344
- user_id=self._user_id,
345
- api_key=self._api_key,
346
- )
347
-
348
469
  # Check if voice_engine contains protocol information (backward compatibility)
349
470
  if "-http" in voice_engine:
350
471
  # Extract the base engine name
351
472
  voice_engine = voice_engine.replace("-http", "")
352
- protocol = "http"
353
473
  elif "-ws" in voice_engine:
354
474
  # Extract the base engine name
355
475
  voice_engine = voice_engine.replace("-ws", "")
356
- protocol = "ws"
357
476
 
358
477
  self._settings = {
359
478
  "language": self.language_to_service_language(params.language)
360
479
  if params.language
361
480
  else "english",
362
- "format": Format.FORMAT_WAV,
481
+ "output_format": output_format,
363
482
  "voice_engine": voice_engine,
364
- "protocol": protocol,
365
483
  "speed": params.speed,
366
484
  "seed": params.seed,
367
485
  }
@@ -369,74 +487,118 @@ class PlayHTHttpTTSService(TTSService):
369
487
  self.set_voice(voice_url)
370
488
 
371
489
  async def start(self, frame: StartFrame):
490
+ """Start the PlayHT HTTP TTS service.
491
+
492
+ Args:
493
+ frame: The start frame containing initialization parameters.
494
+ """
372
495
  await super().start(frame)
373
496
  self._settings["sample_rate"] = self.sample_rate
374
497
 
375
- def _create_options(self) -> TTSOptions:
376
- language_str = self._settings["language"]
377
- playht_language = None
378
- if language_str:
379
- # Convert string to PlayHT Language enum
380
- for lang in PlayHTLanguage:
381
- if lang.value == language_str:
382
- playht_language = lang
383
- break
384
-
385
- return TTSOptions(
386
- voice=self._voice_id,
387
- language=playht_language,
388
- sample_rate=self.sample_rate,
389
- format=self._settings["format"],
390
- speed=self._settings["speed"],
391
- seed=self._settings["seed"],
392
- )
393
-
394
498
  def can_generate_metrics(self) -> bool:
499
+ """Check if this service can generate processing metrics.
500
+
501
+ Returns:
502
+ True, as PlayHT HTTP service supports metrics generation.
503
+ """
395
504
  return True
396
505
 
397
506
  def language_to_service_language(self, language: Language) -> Optional[str]:
507
+ """Convert a Language enum to PlayHT service language format.
508
+
509
+ Args:
510
+ language: The language to convert.
511
+
512
+ Returns:
513
+ The PlayHT-specific language code, or None if not supported.
514
+ """
398
515
  return language_to_playht_language(language)
399
516
 
400
517
  @traced_tts
401
518
  async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
519
+ """Generate TTS audio from text using PlayHT's HTTP API.
520
+
521
+ Args:
522
+ text: The text to synthesize into speech.
523
+
524
+ Yields:
525
+ Frame: Audio frames containing the synthesized speech.
526
+ """
402
527
  logger.debug(f"{self}: Generating TTS [{text}]")
403
528
 
404
529
  try:
405
- options = self._create_options()
406
-
407
530
  await self.start_ttfb_metrics()
408
531
 
409
- playht_gen = self._client.tts(
410
- text,
411
- voice_engine=self._settings["voice_engine"],
412
- protocol=self._settings["protocol"],
413
- options=options,
414
- )
532
+ # Prepare the request payload
533
+ payload = {
534
+ "text": text,
535
+ "voice": self._voice_id,
536
+ "voice_engine": self._settings["voice_engine"],
537
+ "output_format": self._settings["output_format"],
538
+ "sample_rate": self.sample_rate,
539
+ "language": self._settings["language"],
540
+ }
541
+
542
+ # Add optional parameters if they exist
543
+ if self._settings["speed"] is not None:
544
+ payload["speed"] = self._settings["speed"]
545
+ if self._settings["seed"] is not None:
546
+ payload["seed"] = self._settings["seed"]
547
+
548
+ headers = {
549
+ "Authorization": f"Bearer {self._api_key}",
550
+ "X-User-Id": self._user_id,
551
+ "Content-Type": "application/json",
552
+ "Accept": "*/*",
553
+ }
415
554
 
416
555
  await self.start_tts_usage_metrics(text)
417
556
 
418
557
  yield TTSStartedFrame()
419
558
 
420
- b = bytearray()
421
- in_header = True
422
- async for chunk in playht_gen:
423
- # skip the RIFF header.
424
- if in_header:
425
- b.extend(chunk)
426
- if len(b) <= 36:
427
- continue
428
- else:
429
- fh = io.BytesIO(b)
430
- fh.seek(36)
431
- (data, size) = struct.unpack("<4sI", fh.read(8))
432
- while data != b"data":
433
- fh.read(size)
434
- (data, size) = struct.unpack("<4sI", fh.read(8))
435
- in_header = False
436
- elif len(chunk) > 0:
437
- await self.stop_ttfb_metrics()
438
- frame = TTSAudioRawFrame(chunk, self.sample_rate, 1)
439
- yield frame
559
+ async with aiohttp.ClientSession() as session:
560
+ async with session.post(
561
+ "https://api.play.ht/api/v2/tts/stream",
562
+ headers=headers,
563
+ json=payload,
564
+ ) as response:
565
+ if response.status not in (200, 201):
566
+ error_text = await response.text()
567
+ raise Exception(f"PlayHT API error {response.status}: {error_text}")
568
+
569
+ in_header = True
570
+ buffer = b""
571
+
572
+ CHUNK_SIZE = self.chunk_size
573
+
574
+ async for chunk in response.content.iter_chunked(CHUNK_SIZE):
575
+ if len(chunk) == 0:
576
+ continue
577
+
578
+ # Skip the RIFF header
579
+ if in_header:
580
+ buffer += chunk
581
+ if len(buffer) <= 36:
582
+ continue
583
+ else:
584
+ fh = io.BytesIO(buffer)
585
+ fh.seek(36)
586
+ (data, size) = struct.unpack("<4sI", fh.read(8))
587
+ while data != b"data":
588
+ fh.read(size)
589
+ (data, size) = struct.unpack("<4sI", fh.read(8))
590
+ # Extract audio data after header
591
+ audio_data = buffer[fh.tell() :]
592
+ if len(audio_data) > 0:
593
+ await self.stop_ttfb_metrics()
594
+ frame = TTSAudioRawFrame(audio_data, self.sample_rate, 1)
595
+ yield frame
596
+ in_header = False
597
+ elif len(chunk) > 0:
598
+ await self.stop_ttfb_metrics()
599
+ frame = TTSAudioRawFrame(chunk, self.sample_rate, 1)
600
+ yield frame
601
+
440
602
  except Exception as e:
441
603
  logger.error(f"{self} error generating TTS: {e}")
442
604
  finally:
@@ -16,12 +16,6 @@ class QwenLLMService(OpenAILLMService):
16
16
 
17
17
  This service extends OpenAILLMService to connect to Qwen's API endpoint while
18
18
  maintaining full compatibility with OpenAI's interface and functionality.
19
-
20
- Args:
21
- api_key: The API key for accessing Qwen's API (DashScope API key).
22
- base_url: Base URL for Qwen API. Defaults to "https://dashscope-intl.aliyuncs.com/compatible-mode/v1".
23
- model: The model identifier to use. Defaults to "qwen-plus".
24
- **kwargs: Additional keyword arguments passed to OpenAILLMService.
25
19
  """
26
20
 
27
21
  def __init__(
@@ -32,6 +26,14 @@ class QwenLLMService(OpenAILLMService):
32
26
  model: str = "qwen-plus",
33
27
  **kwargs,
34
28
  ):
29
+ """Initialize the Qwen LLM service.
30
+
31
+ Args:
32
+ api_key: The API key for accessing Qwen's API (DashScope API key).
33
+ base_url: Base URL for Qwen API. Defaults to "https://dashscope-intl.aliyuncs.com/compatible-mode/v1".
34
+ model: The model identifier to use. Defaults to "qwen-plus".
35
+ **kwargs: Additional keyword arguments passed to OpenAILLMService.
36
+ """
35
37
  super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
36
38
  logger.info(f"Initialized Qwen LLM service with model: {model}")
37
39