dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show
  1. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
  2. dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
  3. pipecat/__init__.py +17 -0
  4. pipecat/adapters/base_llm_adapter.py +36 -1
  5. pipecat/adapters/schemas/direct_function.py +296 -0
  6. pipecat/adapters/schemas/function_schema.py +15 -6
  7. pipecat/adapters/schemas/tools_schema.py +55 -7
  8. pipecat/adapters/services/anthropic_adapter.py +22 -3
  9. pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
  10. pipecat/adapters/services/bedrock_adapter.py +22 -3
  11. pipecat/adapters/services/gemini_adapter.py +16 -3
  12. pipecat/adapters/services/open_ai_adapter.py +17 -2
  13. pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
  14. pipecat/audio/filters/base_audio_filter.py +30 -6
  15. pipecat/audio/filters/koala_filter.py +37 -2
  16. pipecat/audio/filters/krisp_filter.py +59 -6
  17. pipecat/audio/filters/noisereduce_filter.py +37 -0
  18. pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
  19. pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
  20. pipecat/audio/mixers/base_audio_mixer.py +30 -7
  21. pipecat/audio/mixers/soundfile_mixer.py +53 -6
  22. pipecat/audio/resamplers/base_audio_resampler.py +17 -9
  23. pipecat/audio/resamplers/resampy_resampler.py +26 -1
  24. pipecat/audio/resamplers/soxr_resampler.py +32 -1
  25. pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
  26. pipecat/audio/utils.py +194 -1
  27. pipecat/audio/vad/silero.py +60 -3
  28. pipecat/audio/vad/vad_analyzer.py +114 -30
  29. pipecat/clocks/base_clock.py +19 -0
  30. pipecat/clocks/system_clock.py +25 -0
  31. pipecat/extensions/voicemail/__init__.py +0 -0
  32. pipecat/extensions/voicemail/voicemail_detector.py +707 -0
  33. pipecat/frames/frames.py +590 -156
  34. pipecat/metrics/metrics.py +64 -1
  35. pipecat/observers/base_observer.py +58 -19
  36. pipecat/observers/loggers/debug_log_observer.py +56 -64
  37. pipecat/observers/loggers/llm_log_observer.py +8 -1
  38. pipecat/observers/loggers/transcription_log_observer.py +19 -7
  39. pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
  40. pipecat/observers/turn_tracking_observer.py +26 -1
  41. pipecat/pipeline/base_pipeline.py +5 -7
  42. pipecat/pipeline/base_task.py +52 -9
  43. pipecat/pipeline/parallel_pipeline.py +121 -177
  44. pipecat/pipeline/pipeline.py +129 -20
  45. pipecat/pipeline/runner.py +50 -1
  46. pipecat/pipeline/sync_parallel_pipeline.py +132 -32
  47. pipecat/pipeline/task.py +263 -280
  48. pipecat/pipeline/task_observer.py +85 -34
  49. pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
  50. pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
  51. pipecat/processors/aggregators/gated.py +25 -24
  52. pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
  53. pipecat/processors/aggregators/llm_response.py +398 -89
  54. pipecat/processors/aggregators/openai_llm_context.py +161 -13
  55. pipecat/processors/aggregators/sentence.py +25 -14
  56. pipecat/processors/aggregators/user_response.py +28 -3
  57. pipecat/processors/aggregators/vision_image_frame.py +24 -14
  58. pipecat/processors/async_generator.py +28 -0
  59. pipecat/processors/audio/audio_buffer_processor.py +78 -37
  60. pipecat/processors/consumer_processor.py +25 -6
  61. pipecat/processors/filters/frame_filter.py +23 -0
  62. pipecat/processors/filters/function_filter.py +30 -0
  63. pipecat/processors/filters/identity_filter.py +17 -2
  64. pipecat/processors/filters/null_filter.py +24 -1
  65. pipecat/processors/filters/stt_mute_filter.py +56 -21
  66. pipecat/processors/filters/wake_check_filter.py +46 -3
  67. pipecat/processors/filters/wake_notifier_filter.py +21 -3
  68. pipecat/processors/frame_processor.py +488 -131
  69. pipecat/processors/frameworks/langchain.py +38 -3
  70. pipecat/processors/frameworks/rtvi.py +719 -34
  71. pipecat/processors/gstreamer/pipeline_source.py +41 -0
  72. pipecat/processors/idle_frame_processor.py +26 -3
  73. pipecat/processors/logger.py +23 -0
  74. pipecat/processors/metrics/frame_processor_metrics.py +77 -4
  75. pipecat/processors/metrics/sentry.py +42 -4
  76. pipecat/processors/producer_processor.py +34 -14
  77. pipecat/processors/text_transformer.py +22 -10
  78. pipecat/processors/transcript_processor.py +48 -29
  79. pipecat/processors/user_idle_processor.py +31 -21
  80. pipecat/runner/__init__.py +1 -0
  81. pipecat/runner/daily.py +132 -0
  82. pipecat/runner/livekit.py +148 -0
  83. pipecat/runner/run.py +543 -0
  84. pipecat/runner/types.py +67 -0
  85. pipecat/runner/utils.py +515 -0
  86. pipecat/serializers/base_serializer.py +42 -0
  87. pipecat/serializers/exotel.py +17 -6
  88. pipecat/serializers/genesys.py +95 -0
  89. pipecat/serializers/livekit.py +33 -0
  90. pipecat/serializers/plivo.py +16 -15
  91. pipecat/serializers/protobuf.py +37 -1
  92. pipecat/serializers/telnyx.py +18 -17
  93. pipecat/serializers/twilio.py +32 -16
  94. pipecat/services/ai_service.py +5 -3
  95. pipecat/services/anthropic/llm.py +113 -43
  96. pipecat/services/assemblyai/models.py +63 -5
  97. pipecat/services/assemblyai/stt.py +64 -11
  98. pipecat/services/asyncai/__init__.py +0 -0
  99. pipecat/services/asyncai/tts.py +501 -0
  100. pipecat/services/aws/llm.py +185 -111
  101. pipecat/services/aws/stt.py +217 -23
  102. pipecat/services/aws/tts.py +118 -52
  103. pipecat/services/aws/utils.py +101 -5
  104. pipecat/services/aws_nova_sonic/aws.py +82 -64
  105. pipecat/services/aws_nova_sonic/context.py +15 -6
  106. pipecat/services/azure/common.py +10 -2
  107. pipecat/services/azure/image.py +32 -0
  108. pipecat/services/azure/llm.py +9 -7
  109. pipecat/services/azure/stt.py +65 -2
  110. pipecat/services/azure/tts.py +154 -23
  111. pipecat/services/cartesia/stt.py +125 -8
  112. pipecat/services/cartesia/tts.py +102 -38
  113. pipecat/services/cerebras/llm.py +15 -23
  114. pipecat/services/deepgram/stt.py +19 -11
  115. pipecat/services/deepgram/tts.py +36 -0
  116. pipecat/services/deepseek/llm.py +14 -23
  117. pipecat/services/elevenlabs/tts.py +330 -64
  118. pipecat/services/fal/image.py +43 -0
  119. pipecat/services/fal/stt.py +48 -10
  120. pipecat/services/fireworks/llm.py +14 -21
  121. pipecat/services/fish/tts.py +109 -9
  122. pipecat/services/gemini_multimodal_live/__init__.py +1 -0
  123. pipecat/services/gemini_multimodal_live/events.py +83 -2
  124. pipecat/services/gemini_multimodal_live/file_api.py +189 -0
  125. pipecat/services/gemini_multimodal_live/gemini.py +218 -21
  126. pipecat/services/gladia/config.py +17 -10
  127. pipecat/services/gladia/stt.py +82 -36
  128. pipecat/services/google/frames.py +40 -0
  129. pipecat/services/google/google.py +2 -0
  130. pipecat/services/google/image.py +39 -2
  131. pipecat/services/google/llm.py +176 -58
  132. pipecat/services/google/llm_openai.py +26 -4
  133. pipecat/services/google/llm_vertex.py +37 -15
  134. pipecat/services/google/rtvi.py +41 -0
  135. pipecat/services/google/stt.py +65 -17
  136. pipecat/services/google/test-google-chirp.py +45 -0
  137. pipecat/services/google/tts.py +390 -19
  138. pipecat/services/grok/llm.py +8 -6
  139. pipecat/services/groq/llm.py +8 -6
  140. pipecat/services/groq/stt.py +13 -9
  141. pipecat/services/groq/tts.py +40 -0
  142. pipecat/services/hamsa/__init__.py +9 -0
  143. pipecat/services/hamsa/stt.py +241 -0
  144. pipecat/services/heygen/__init__.py +5 -0
  145. pipecat/services/heygen/api.py +281 -0
  146. pipecat/services/heygen/client.py +620 -0
  147. pipecat/services/heygen/video.py +338 -0
  148. pipecat/services/image_service.py +5 -3
  149. pipecat/services/inworld/__init__.py +1 -0
  150. pipecat/services/inworld/tts.py +592 -0
  151. pipecat/services/llm_service.py +127 -45
  152. pipecat/services/lmnt/tts.py +80 -7
  153. pipecat/services/mcp_service.py +85 -44
  154. pipecat/services/mem0/memory.py +42 -13
  155. pipecat/services/minimax/tts.py +74 -15
  156. pipecat/services/mistral/__init__.py +0 -0
  157. pipecat/services/mistral/llm.py +185 -0
  158. pipecat/services/moondream/vision.py +55 -10
  159. pipecat/services/neuphonic/tts.py +275 -48
  160. pipecat/services/nim/llm.py +8 -6
  161. pipecat/services/ollama/llm.py +27 -7
  162. pipecat/services/openai/base_llm.py +54 -16
  163. pipecat/services/openai/image.py +30 -0
  164. pipecat/services/openai/llm.py +7 -5
  165. pipecat/services/openai/stt.py +13 -9
  166. pipecat/services/openai/tts.py +42 -10
  167. pipecat/services/openai_realtime_beta/azure.py +11 -9
  168. pipecat/services/openai_realtime_beta/context.py +7 -5
  169. pipecat/services/openai_realtime_beta/events.py +10 -7
  170. pipecat/services/openai_realtime_beta/openai.py +37 -18
  171. pipecat/services/openpipe/llm.py +30 -24
  172. pipecat/services/openrouter/llm.py +9 -7
  173. pipecat/services/perplexity/llm.py +15 -19
  174. pipecat/services/piper/tts.py +26 -12
  175. pipecat/services/playht/tts.py +227 -65
  176. pipecat/services/qwen/llm.py +8 -6
  177. pipecat/services/rime/tts.py +128 -17
  178. pipecat/services/riva/stt.py +160 -22
  179. pipecat/services/riva/tts.py +67 -2
  180. pipecat/services/sambanova/llm.py +19 -17
  181. pipecat/services/sambanova/stt.py +14 -8
  182. pipecat/services/sarvam/tts.py +60 -13
  183. pipecat/services/simli/video.py +82 -21
  184. pipecat/services/soniox/__init__.py +0 -0
  185. pipecat/services/soniox/stt.py +398 -0
  186. pipecat/services/speechmatics/stt.py +29 -17
  187. pipecat/services/stt_service.py +47 -11
  188. pipecat/services/tavus/video.py +94 -25
  189. pipecat/services/together/llm.py +8 -6
  190. pipecat/services/tts_service.py +77 -53
  191. pipecat/services/ultravox/stt.py +46 -43
  192. pipecat/services/vision_service.py +5 -3
  193. pipecat/services/websocket_service.py +12 -11
  194. pipecat/services/whisper/base_stt.py +58 -12
  195. pipecat/services/whisper/stt.py +69 -58
  196. pipecat/services/xtts/tts.py +59 -2
  197. pipecat/sync/base_notifier.py +19 -0
  198. pipecat/sync/event_notifier.py +24 -0
  199. pipecat/tests/utils.py +73 -5
  200. pipecat/transcriptions/language.py +24 -0
  201. pipecat/transports/base_input.py +112 -8
  202. pipecat/transports/base_output.py +235 -13
  203. pipecat/transports/base_transport.py +119 -0
  204. pipecat/transports/local/audio.py +76 -0
  205. pipecat/transports/local/tk.py +84 -0
  206. pipecat/transports/network/fastapi_websocket.py +174 -15
  207. pipecat/transports/network/small_webrtc.py +383 -39
  208. pipecat/transports/network/webrtc_connection.py +214 -8
  209. pipecat/transports/network/websocket_client.py +171 -1
  210. pipecat/transports/network/websocket_server.py +147 -9
  211. pipecat/transports/services/daily.py +792 -70
  212. pipecat/transports/services/helpers/daily_rest.py +122 -129
  213. pipecat/transports/services/livekit.py +339 -4
  214. pipecat/transports/services/tavus.py +273 -38
  215. pipecat/utils/asyncio/task_manager.py +92 -186
  216. pipecat/utils/base_object.py +83 -1
  217. pipecat/utils/network.py +2 -0
  218. pipecat/utils/string.py +114 -58
  219. pipecat/utils/text/base_text_aggregator.py +44 -13
  220. pipecat/utils/text/base_text_filter.py +46 -0
  221. pipecat/utils/text/markdown_text_filter.py +70 -14
  222. pipecat/utils/text/pattern_pair_aggregator.py +18 -14
  223. pipecat/utils/text/simple_text_aggregator.py +43 -2
  224. pipecat/utils/text/skip_tags_aggregator.py +21 -13
  225. pipecat/utils/time.py +36 -0
  226. pipecat/utils/tracing/class_decorators.py +32 -7
  227. pipecat/utils/tracing/conversation_context_provider.py +12 -2
  228. pipecat/utils/tracing/service_attributes.py +80 -64
  229. pipecat/utils/tracing/service_decorators.py +48 -21
  230. pipecat/utils/tracing/setup.py +13 -7
  231. pipecat/utils/tracing/turn_context_provider.py +12 -2
  232. pipecat/utils/tracing/turn_trace_observer.py +27 -0
  233. pipecat/utils/utils.py +14 -14
  234. dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
  235. pipecat/examples/daily_runner.py +0 -64
  236. pipecat/examples/run.py +0 -265
  237. pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
  238. pipecat/utils/asyncio/watchdog_event.py +0 -42
  239. pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
  240. pipecat/utils/asyncio/watchdog_queue.py +0 -48
  241. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
  242. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
  243. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
  244. /pipecat/{examples → extensions}/__init__.py +0 -0
@@ -0,0 +1,501 @@
1
+ #
2
+ # Copyright (c) 2024–2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ """Async text-to-speech service implementations."""
8
+
9
+ import asyncio
10
+ import base64
11
+ import json
12
+ from typing import AsyncGenerator, Optional
13
+
14
+ import aiohttp
15
+ from loguru import logger
16
+ from pydantic import BaseModel
17
+
18
+ from pipecat.frames.frames import (
19
+ CancelFrame,
20
+ EndFrame,
21
+ ErrorFrame,
22
+ Frame,
23
+ StartFrame,
24
+ StartInterruptionFrame,
25
+ TTSAudioRawFrame,
26
+ TTSStartedFrame,
27
+ TTSStoppedFrame,
28
+ )
29
+ from pipecat.processors.frame_processor import FrameDirection
30
+ from pipecat.services.tts_service import InterruptibleTTSService, TTSService
31
+ from pipecat.transcriptions.language import Language
32
+ from pipecat.utils.tracing.service_decorators import traced_tts
33
+
34
+ try:
35
+ import websockets
36
+ from websockets.asyncio.client import connect as websocket_connect
37
+ from websockets.protocol import State
38
+ except ModuleNotFoundError as e:
39
+ logger.error(f"Exception: {e}")
40
+ logger.error("In order to use Async, you need to `pip install pipecat-ai[asyncai]`.")
41
+ raise Exception(f"Missing module: {e}")
42
+
43
+
44
+ def language_to_async_language(language: Language) -> Optional[str]:
45
+ """Convert a Language enum to Async language code.
46
+
47
+ Args:
48
+ language: The Language enum value to convert.
49
+
50
+ Returns:
51
+ The corresponding Async language code, or None if not supported.
52
+ """
53
+ BASE_LANGUAGES = {
54
+ Language.EN: "en",
55
+ }
56
+
57
+ result = BASE_LANGUAGES.get(language)
58
+
59
+ # If not found in base languages, try to find the base language from a variant
60
+ if not result:
61
+ # Convert enum value to string and get the base language part (e.g. en-En -> en)
62
+ lang_str = str(language.value)
63
+ base_code = lang_str.split("-")[0].lower()
64
+ # Look up the base code in our supported languages
65
+ result = base_code if base_code in BASE_LANGUAGES.values() else None
66
+
67
+ return result
68
+
69
+
70
+ class AsyncAITTSService(InterruptibleTTSService):
71
+ """Async TTS service with WebSocket streaming.
72
+
73
+ Provides text-to-speech using Async's streaming WebSocket API.
74
+ """
75
+
76
+ class InputParams(BaseModel):
77
+ """Input parameters for Async TTS configuration.
78
+
79
+ Parameters:
80
+ language: Language to use for synthesis.
81
+ """
82
+
83
+ language: Optional[Language] = Language.EN
84
+
85
+ def __init__(
86
+ self,
87
+ *,
88
+ api_key: str,
89
+ voice_id: str,
90
+ version: str = "v1",
91
+ url: str = "wss://api.async.ai/text_to_speech/websocket/ws",
92
+ model: str = "asyncflow_v2.0",
93
+ sample_rate: Optional[int] = None,
94
+ encoding: str = "pcm_s16le",
95
+ container: str = "raw",
96
+ params: Optional[InputParams] = None,
97
+ aggregate_sentences: Optional[bool] = True,
98
+ **kwargs,
99
+ ):
100
+ """Initialize the Async TTS service.
101
+
102
+ Args:
103
+ api_key: Async API key.
104
+ voice_id: UUID of the voice to use for synthesis. See docs for a full list:
105
+ https://docs.async.ai/list-voices-16699698e0
106
+ version: Async API version.
107
+ url: WebSocket URL for Async TTS API.
108
+ model: TTS model to use (e.g., "asyncflow_v2.0").
109
+ sample_rate: Audio sample rate.
110
+ encoding: Audio encoding format.
111
+ container: Audio container format.
112
+ params: Additional input parameters for voice customization.
113
+ aggregate_sentences: Whether to aggregate sentences within the TTSService.
114
+ **kwargs: Additional arguments passed to the parent service.
115
+ """
116
+ super().__init__(
117
+ aggregate_sentences=aggregate_sentences,
118
+ push_text_frames=False,
119
+ pause_frame_processing=True,
120
+ push_stop_frames=True,
121
+ sample_rate=sample_rate,
122
+ **kwargs,
123
+ )
124
+
125
+ params = params or AsyncAITTSService.InputParams()
126
+
127
+ self._api_key = api_key
128
+ self._api_version = version
129
+ self._url = url
130
+ self._settings = {
131
+ "output_format": {
132
+ "container": container,
133
+ "encoding": encoding,
134
+ "sample_rate": 0,
135
+ },
136
+ "language": self.language_to_service_language(params.language)
137
+ if params.language
138
+ else "en",
139
+ }
140
+
141
+ self.set_model_name(model)
142
+ self.set_voice(voice_id)
143
+
144
+ self._receive_task = None
145
+ self._keepalive_task = None
146
+ self._started = False
147
+
148
+ def can_generate_metrics(self) -> bool:
149
+ """Check if this service can generate processing metrics.
150
+
151
+ Returns:
152
+ True, as Async service supports metrics generation.
153
+ """
154
+ return True
155
+
156
+ def language_to_service_language(self, language: Language) -> Optional[str]:
157
+ """Convert a Language enum to Async language format.
158
+
159
+ Args:
160
+ language: The language to convert.
161
+
162
+ Returns:
163
+ The Async-specific language code, or None if not supported.
164
+ """
165
+ return language_to_async_language(language)
166
+
167
+ def _build_msg(self, text: str = "", force: bool = False) -> str:
168
+ msg = {"transcript": text, "force": force}
169
+ return json.dumps(msg)
170
+
171
+ async def start(self, frame: StartFrame):
172
+ """Start the Async TTS service.
173
+
174
+ Args:
175
+ frame: The start frame containing initialization parameters.
176
+ """
177
+ await super().start(frame)
178
+ self._settings["output_format"]["sample_rate"] = self.sample_rate
179
+ await self._connect()
180
+
181
+ async def stop(self, frame: EndFrame):
182
+ """Stop the Async TTS service.
183
+
184
+ Args:
185
+ frame: The end frame.
186
+ """
187
+ await super().stop(frame)
188
+ await self._disconnect()
189
+
190
+ async def cancel(self, frame: CancelFrame):
191
+ """Cancel the Async TTS service.
192
+
193
+ Args:
194
+ frame: The cancel frame.
195
+ """
196
+ await super().cancel(frame)
197
+ await self._disconnect()
198
+
199
+ async def _connect(self):
200
+ await self._connect_websocket()
201
+
202
+ if self._websocket and not self._receive_task:
203
+ self._receive_task = self.create_task(self._receive_task_handler(self._report_error))
204
+
205
+ if self._websocket and not self._keepalive_task:
206
+ self._keepalive_task = self.create_task(self._keepalive_task_handler())
207
+
208
+ async def _disconnect(self):
209
+ if self._receive_task:
210
+ await self.cancel_task(self._receive_task)
211
+ self._receive_task = None
212
+
213
+ if self._keepalive_task:
214
+ await self.cancel_task(self._keepalive_task)
215
+ self._keepalive_task = None
216
+
217
+ await self._disconnect_websocket()
218
+
219
+ async def _connect_websocket(self):
220
+ try:
221
+ if self._websocket and self._websocket.state is State.OPEN:
222
+ return
223
+ logger.debug("Connecting to Async")
224
+ self._websocket = await websocket_connect(
225
+ f"{self._url}?api_key={self._api_key}&version={self._api_version}"
226
+ )
227
+ init_msg = {
228
+ "model_id": self._model_name,
229
+ "voice": {"mode": "id", "id": self._voice_id},
230
+ "output_format": self._settings["output_format"],
231
+ "language": self._settings["language"],
232
+ }
233
+
234
+ await self._get_websocket().send(json.dumps(init_msg))
235
+ except Exception as e:
236
+ logger.error(f"{self} initialization error: {e}")
237
+ self._websocket = None
238
+ await self._call_event_handler("on_connection_error", f"{e}")
239
+
240
+ async def _disconnect_websocket(self):
241
+ try:
242
+ await self.stop_all_metrics()
243
+
244
+ if self._websocket:
245
+ logger.debug("Disconnecting from Async")
246
+ await self._websocket.close()
247
+ except Exception as e:
248
+ logger.error(f"{self} error closing websocket: {e}")
249
+ finally:
250
+ self._websocket = None
251
+ self._started = False
252
+
253
+ def _get_websocket(self):
254
+ if self._websocket:
255
+ return self._websocket
256
+ raise Exception("Websocket not connected")
257
+
258
+ async def flush_audio(self):
259
+ """Flush any pending audio."""
260
+ if not self._websocket:
261
+ return
262
+ logger.trace(f"{self}: flushing audio")
263
+ msg = self._build_msg(text=" ", force=True)
264
+ await self._websocket.send(msg)
265
+
266
+ async def push_frame(self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM):
267
+ """Push a frame downstream with special handling for stop conditions.
268
+
269
+ Args:
270
+ frame: The frame to push.
271
+ direction: The direction to push the frame.
272
+ """
273
+ await super().push_frame(frame, direction)
274
+ if isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)):
275
+ self._started = False
276
+
277
+ async def _receive_messages(self):
278
+ async for message in self._get_websocket():
279
+ msg = json.loads(message)
280
+ if not msg:
281
+ continue
282
+
283
+ elif msg.get("audio"):
284
+ await self.stop_ttfb_metrics()
285
+ frame = TTSAudioRawFrame(
286
+ audio=base64.b64decode(msg["audio"]),
287
+ sample_rate=self.sample_rate,
288
+ num_channels=1,
289
+ )
290
+ await self.push_frame(frame)
291
+ elif msg.get("error_code"):
292
+ logger.error(f"{self} error: {msg}")
293
+ await self.push_frame(TTSStoppedFrame())
294
+ await self.stop_all_metrics()
295
+ await self.push_error(ErrorFrame(f"{self} error: {msg['message']}"))
296
+ else:
297
+ logger.error(f"{self} error, unknown message type: {msg}")
298
+
299
+ async def _keepalive_task_handler(self):
300
+ """Send periodic keepalive messages to maintain WebSocket connection."""
301
+ KEEPALIVE_SLEEP = 3
302
+ while True:
303
+ await asyncio.sleep(KEEPALIVE_SLEEP)
304
+ try:
305
+ if self._websocket and self._websocket.state is State.OPEN:
306
+ keepalive_message = {"transcript": " "}
307
+ logger.trace("Sending keepalive message")
308
+ await self._websocket.send(json.dumps(keepalive_message))
309
+ except websockets.ConnectionClosed as e:
310
+ logger.warning(f"{self} keepalive error: {e}")
311
+ break
312
+
313
+ @traced_tts
314
+ async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
315
+ """Generate speech from text using Async API websocket endpoint.
316
+
317
+ Args:
318
+ text: The text to synthesize into speech.
319
+
320
+ Yields:
321
+ Frame: Audio frames containing the synthesized speech.
322
+ """
323
+ logger.debug(f"{self}: Generating TTS [{text}]")
324
+
325
+ try:
326
+ if not self._websocket or self._websocket.state is State.CLOSED:
327
+ await self._connect()
328
+
329
+ if not self._started:
330
+ await self.start_ttfb_metrics()
331
+ yield TTSStartedFrame()
332
+ self._started = True
333
+
334
+ msg = self._build_msg(text=text, force=True)
335
+
336
+ try:
337
+ await self._get_websocket().send(msg)
338
+ await self.start_tts_usage_metrics(text)
339
+ except Exception as e:
340
+ logger.error(f"{self} error sending message: {e}")
341
+ yield TTSStoppedFrame()
342
+ await self._disconnect()
343
+ await self._connect()
344
+ return
345
+ yield None
346
+ except Exception as e:
347
+ logger.error(f"{self} exception: {e}")
348
+
349
+
350
+ class AsyncAIHttpTTSService(TTSService):
351
+ """HTTP-based Async TTS service.
352
+
353
+ Provides text-to-speech using Async's HTTP streaming API for simpler,
354
+ non-WebSocket integration. Suitable for use cases where streaming WebSocket
355
+ connection is not required or desired.
356
+ """
357
+
358
+ class InputParams(BaseModel):
359
+ """Input parameters for Async API.
360
+
361
+ Parameters:
362
+ language: Language to use for synthesis.
363
+ """
364
+
365
+ language: Optional[Language] = Language.EN
366
+
367
+ def __init__(
368
+ self,
369
+ *,
370
+ api_key: str,
371
+ voice_id: str,
372
+ aiohttp_session: aiohttp.ClientSession,
373
+ model: str = "asyncflow_v2.0",
374
+ url: str = "https://api.async.ai",
375
+ version: str = "v1",
376
+ sample_rate: Optional[int] = None,
377
+ encoding: str = "pcm_s16le",
378
+ container: str = "raw",
379
+ params: Optional[InputParams] = None,
380
+ **kwargs,
381
+ ):
382
+ """Initialize the Async TTS service.
383
+
384
+ Args:
385
+ api_key: Async API key.
386
+ voice_id: ID of the voice to use for synthesis.
387
+ aiohttp_session: An aiohttp session for making HTTP requests.
388
+ model: TTS model to use (e.g., "asyncflow_v2.0").
389
+ url: Base URL for Async API.
390
+ version: API version string for Async API.
391
+ sample_rate: Audio sample rate.
392
+ encoding: Audio encoding format.
393
+ container: Audio container format.
394
+ params: Additional input parameters for voice customization.
395
+ **kwargs: Additional arguments passed to the parent TTSService.
396
+ """
397
+ super().__init__(sample_rate=sample_rate, **kwargs)
398
+
399
+ params = params or AsyncAIHttpTTSService.InputParams()
400
+
401
+ self._api_key = api_key
402
+ self._base_url = url
403
+ self._api_version = version
404
+ self._settings = {
405
+ "output_format": {
406
+ "container": container,
407
+ "encoding": encoding,
408
+ "sample_rate": 0,
409
+ },
410
+ "language": self.language_to_service_language(params.language)
411
+ if params.language
412
+ else "en",
413
+ }
414
+ self.set_voice(voice_id)
415
+ self.set_model_name(model)
416
+
417
+ self._session = aiohttp_session
418
+
419
+ def can_generate_metrics(self) -> bool:
420
+ """Check if this service can generate processing metrics.
421
+
422
+ Returns:
423
+ True, as Async HTTP service supports metrics generation.
424
+ """
425
+ return True
426
+
427
+ def language_to_service_language(self, language: Language) -> Optional[str]:
428
+ """Convert a Language enum to Async language format.
429
+
430
+ Args:
431
+ language: The language to convert.
432
+
433
+ Returns:
434
+ The Async-specific language code, or None if not supported.
435
+ """
436
+ return language_to_async_language(language)
437
+
438
+ async def start(self, frame: StartFrame):
439
+ """Start the Async HTTP TTS service.
440
+
441
+ Args:
442
+ frame: The start frame containing initialization parameters.
443
+ """
444
+ await super().start(frame)
445
+ self._settings["output_format"]["sample_rate"] = self.sample_rate
446
+
447
+ @traced_tts
448
+ async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
449
+ """Generate speech from text using Async's HTTP streaming API.
450
+
451
+ Args:
452
+ text: The text to synthesize into speech.
453
+
454
+ Yields:
455
+ Frame: Audio frames containing the synthesized speech.
456
+ """
457
+ logger.debug(f"{self}: Generating TTS [{text}]")
458
+
459
+ try:
460
+ voice_config = {"mode": "id", "id": self._voice_id}
461
+ await self.start_ttfb_metrics()
462
+ payload = {
463
+ "model_id": self._model_name,
464
+ "transcript": text,
465
+ "voice": voice_config,
466
+ "output_format": self._settings["output_format"],
467
+ "language": self._settings["language"],
468
+ }
469
+ yield TTSStartedFrame()
470
+ headers = {
471
+ "version": self._api_version,
472
+ "x-api-key": self._api_key,
473
+ "Content-Type": "application/json",
474
+ }
475
+ url = f"{self._base_url}/text_to_speech/streaming"
476
+
477
+ async with self._session.post(url, json=payload, headers=headers) as response:
478
+ if response.status != 200:
479
+ error_text = await response.text()
480
+ logger.error(f"Async API error: {error_text}")
481
+ await self.push_error(ErrorFrame(f"Async API error: {error_text}"))
482
+ raise Exception(f"Async API returned status {response.status}: {error_text}")
483
+
484
+ audio_data = await response.read()
485
+
486
+ await self.start_tts_usage_metrics(text)
487
+
488
+ frame = TTSAudioRawFrame(
489
+ audio=audio_data,
490
+ sample_rate=self.sample_rate,
491
+ num_channels=1,
492
+ )
493
+
494
+ yield frame
495
+
496
+ except Exception as e:
497
+ logger.error(f"{self} exception: {e}")
498
+ await self.push_error(ErrorFrame(f"Error generating TTS: {e}"))
499
+ finally:
500
+ await self.stop_ttfb_metrics()
501
+ yield TTSStoppedFrame()