dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show
  1. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
  2. dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
  3. pipecat/__init__.py +17 -0
  4. pipecat/adapters/base_llm_adapter.py +36 -1
  5. pipecat/adapters/schemas/direct_function.py +296 -0
  6. pipecat/adapters/schemas/function_schema.py +15 -6
  7. pipecat/adapters/schemas/tools_schema.py +55 -7
  8. pipecat/adapters/services/anthropic_adapter.py +22 -3
  9. pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
  10. pipecat/adapters/services/bedrock_adapter.py +22 -3
  11. pipecat/adapters/services/gemini_adapter.py +16 -3
  12. pipecat/adapters/services/open_ai_adapter.py +17 -2
  13. pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
  14. pipecat/audio/filters/base_audio_filter.py +30 -6
  15. pipecat/audio/filters/koala_filter.py +37 -2
  16. pipecat/audio/filters/krisp_filter.py +59 -6
  17. pipecat/audio/filters/noisereduce_filter.py +37 -0
  18. pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
  19. pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
  20. pipecat/audio/mixers/base_audio_mixer.py +30 -7
  21. pipecat/audio/mixers/soundfile_mixer.py +53 -6
  22. pipecat/audio/resamplers/base_audio_resampler.py +17 -9
  23. pipecat/audio/resamplers/resampy_resampler.py +26 -1
  24. pipecat/audio/resamplers/soxr_resampler.py +32 -1
  25. pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
  26. pipecat/audio/utils.py +194 -1
  27. pipecat/audio/vad/silero.py +60 -3
  28. pipecat/audio/vad/vad_analyzer.py +114 -30
  29. pipecat/clocks/base_clock.py +19 -0
  30. pipecat/clocks/system_clock.py +25 -0
  31. pipecat/extensions/voicemail/__init__.py +0 -0
  32. pipecat/extensions/voicemail/voicemail_detector.py +707 -0
  33. pipecat/frames/frames.py +590 -156
  34. pipecat/metrics/metrics.py +64 -1
  35. pipecat/observers/base_observer.py +58 -19
  36. pipecat/observers/loggers/debug_log_observer.py +56 -64
  37. pipecat/observers/loggers/llm_log_observer.py +8 -1
  38. pipecat/observers/loggers/transcription_log_observer.py +19 -7
  39. pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
  40. pipecat/observers/turn_tracking_observer.py +26 -1
  41. pipecat/pipeline/base_pipeline.py +5 -7
  42. pipecat/pipeline/base_task.py +52 -9
  43. pipecat/pipeline/parallel_pipeline.py +121 -177
  44. pipecat/pipeline/pipeline.py +129 -20
  45. pipecat/pipeline/runner.py +50 -1
  46. pipecat/pipeline/sync_parallel_pipeline.py +132 -32
  47. pipecat/pipeline/task.py +263 -280
  48. pipecat/pipeline/task_observer.py +85 -34
  49. pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
  50. pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
  51. pipecat/processors/aggregators/gated.py +25 -24
  52. pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
  53. pipecat/processors/aggregators/llm_response.py +398 -89
  54. pipecat/processors/aggregators/openai_llm_context.py +161 -13
  55. pipecat/processors/aggregators/sentence.py +25 -14
  56. pipecat/processors/aggregators/user_response.py +28 -3
  57. pipecat/processors/aggregators/vision_image_frame.py +24 -14
  58. pipecat/processors/async_generator.py +28 -0
  59. pipecat/processors/audio/audio_buffer_processor.py +78 -37
  60. pipecat/processors/consumer_processor.py +25 -6
  61. pipecat/processors/filters/frame_filter.py +23 -0
  62. pipecat/processors/filters/function_filter.py +30 -0
  63. pipecat/processors/filters/identity_filter.py +17 -2
  64. pipecat/processors/filters/null_filter.py +24 -1
  65. pipecat/processors/filters/stt_mute_filter.py +56 -21
  66. pipecat/processors/filters/wake_check_filter.py +46 -3
  67. pipecat/processors/filters/wake_notifier_filter.py +21 -3
  68. pipecat/processors/frame_processor.py +488 -131
  69. pipecat/processors/frameworks/langchain.py +38 -3
  70. pipecat/processors/frameworks/rtvi.py +719 -34
  71. pipecat/processors/gstreamer/pipeline_source.py +41 -0
  72. pipecat/processors/idle_frame_processor.py +26 -3
  73. pipecat/processors/logger.py +23 -0
  74. pipecat/processors/metrics/frame_processor_metrics.py +77 -4
  75. pipecat/processors/metrics/sentry.py +42 -4
  76. pipecat/processors/producer_processor.py +34 -14
  77. pipecat/processors/text_transformer.py +22 -10
  78. pipecat/processors/transcript_processor.py +48 -29
  79. pipecat/processors/user_idle_processor.py +31 -21
  80. pipecat/runner/__init__.py +1 -0
  81. pipecat/runner/daily.py +132 -0
  82. pipecat/runner/livekit.py +148 -0
  83. pipecat/runner/run.py +543 -0
  84. pipecat/runner/types.py +67 -0
  85. pipecat/runner/utils.py +515 -0
  86. pipecat/serializers/base_serializer.py +42 -0
  87. pipecat/serializers/exotel.py +17 -6
  88. pipecat/serializers/genesys.py +95 -0
  89. pipecat/serializers/livekit.py +33 -0
  90. pipecat/serializers/plivo.py +16 -15
  91. pipecat/serializers/protobuf.py +37 -1
  92. pipecat/serializers/telnyx.py +18 -17
  93. pipecat/serializers/twilio.py +32 -16
  94. pipecat/services/ai_service.py +5 -3
  95. pipecat/services/anthropic/llm.py +113 -43
  96. pipecat/services/assemblyai/models.py +63 -5
  97. pipecat/services/assemblyai/stt.py +64 -11
  98. pipecat/services/asyncai/__init__.py +0 -0
  99. pipecat/services/asyncai/tts.py +501 -0
  100. pipecat/services/aws/llm.py +185 -111
  101. pipecat/services/aws/stt.py +217 -23
  102. pipecat/services/aws/tts.py +118 -52
  103. pipecat/services/aws/utils.py +101 -5
  104. pipecat/services/aws_nova_sonic/aws.py +82 -64
  105. pipecat/services/aws_nova_sonic/context.py +15 -6
  106. pipecat/services/azure/common.py +10 -2
  107. pipecat/services/azure/image.py +32 -0
  108. pipecat/services/azure/llm.py +9 -7
  109. pipecat/services/azure/stt.py +65 -2
  110. pipecat/services/azure/tts.py +154 -23
  111. pipecat/services/cartesia/stt.py +125 -8
  112. pipecat/services/cartesia/tts.py +102 -38
  113. pipecat/services/cerebras/llm.py +15 -23
  114. pipecat/services/deepgram/stt.py +19 -11
  115. pipecat/services/deepgram/tts.py +36 -0
  116. pipecat/services/deepseek/llm.py +14 -23
  117. pipecat/services/elevenlabs/tts.py +330 -64
  118. pipecat/services/fal/image.py +43 -0
  119. pipecat/services/fal/stt.py +48 -10
  120. pipecat/services/fireworks/llm.py +14 -21
  121. pipecat/services/fish/tts.py +109 -9
  122. pipecat/services/gemini_multimodal_live/__init__.py +1 -0
  123. pipecat/services/gemini_multimodal_live/events.py +83 -2
  124. pipecat/services/gemini_multimodal_live/file_api.py +189 -0
  125. pipecat/services/gemini_multimodal_live/gemini.py +218 -21
  126. pipecat/services/gladia/config.py +17 -10
  127. pipecat/services/gladia/stt.py +82 -36
  128. pipecat/services/google/frames.py +40 -0
  129. pipecat/services/google/google.py +2 -0
  130. pipecat/services/google/image.py +39 -2
  131. pipecat/services/google/llm.py +176 -58
  132. pipecat/services/google/llm_openai.py +26 -4
  133. pipecat/services/google/llm_vertex.py +37 -15
  134. pipecat/services/google/rtvi.py +41 -0
  135. pipecat/services/google/stt.py +65 -17
  136. pipecat/services/google/test-google-chirp.py +45 -0
  137. pipecat/services/google/tts.py +390 -19
  138. pipecat/services/grok/llm.py +8 -6
  139. pipecat/services/groq/llm.py +8 -6
  140. pipecat/services/groq/stt.py +13 -9
  141. pipecat/services/groq/tts.py +40 -0
  142. pipecat/services/hamsa/__init__.py +9 -0
  143. pipecat/services/hamsa/stt.py +241 -0
  144. pipecat/services/heygen/__init__.py +5 -0
  145. pipecat/services/heygen/api.py +281 -0
  146. pipecat/services/heygen/client.py +620 -0
  147. pipecat/services/heygen/video.py +338 -0
  148. pipecat/services/image_service.py +5 -3
  149. pipecat/services/inworld/__init__.py +1 -0
  150. pipecat/services/inworld/tts.py +592 -0
  151. pipecat/services/llm_service.py +127 -45
  152. pipecat/services/lmnt/tts.py +80 -7
  153. pipecat/services/mcp_service.py +85 -44
  154. pipecat/services/mem0/memory.py +42 -13
  155. pipecat/services/minimax/tts.py +74 -15
  156. pipecat/services/mistral/__init__.py +0 -0
  157. pipecat/services/mistral/llm.py +185 -0
  158. pipecat/services/moondream/vision.py +55 -10
  159. pipecat/services/neuphonic/tts.py +275 -48
  160. pipecat/services/nim/llm.py +8 -6
  161. pipecat/services/ollama/llm.py +27 -7
  162. pipecat/services/openai/base_llm.py +54 -16
  163. pipecat/services/openai/image.py +30 -0
  164. pipecat/services/openai/llm.py +7 -5
  165. pipecat/services/openai/stt.py +13 -9
  166. pipecat/services/openai/tts.py +42 -10
  167. pipecat/services/openai_realtime_beta/azure.py +11 -9
  168. pipecat/services/openai_realtime_beta/context.py +7 -5
  169. pipecat/services/openai_realtime_beta/events.py +10 -7
  170. pipecat/services/openai_realtime_beta/openai.py +37 -18
  171. pipecat/services/openpipe/llm.py +30 -24
  172. pipecat/services/openrouter/llm.py +9 -7
  173. pipecat/services/perplexity/llm.py +15 -19
  174. pipecat/services/piper/tts.py +26 -12
  175. pipecat/services/playht/tts.py +227 -65
  176. pipecat/services/qwen/llm.py +8 -6
  177. pipecat/services/rime/tts.py +128 -17
  178. pipecat/services/riva/stt.py +160 -22
  179. pipecat/services/riva/tts.py +67 -2
  180. pipecat/services/sambanova/llm.py +19 -17
  181. pipecat/services/sambanova/stt.py +14 -8
  182. pipecat/services/sarvam/tts.py +60 -13
  183. pipecat/services/simli/video.py +82 -21
  184. pipecat/services/soniox/__init__.py +0 -0
  185. pipecat/services/soniox/stt.py +398 -0
  186. pipecat/services/speechmatics/stt.py +29 -17
  187. pipecat/services/stt_service.py +47 -11
  188. pipecat/services/tavus/video.py +94 -25
  189. pipecat/services/together/llm.py +8 -6
  190. pipecat/services/tts_service.py +77 -53
  191. pipecat/services/ultravox/stt.py +46 -43
  192. pipecat/services/vision_service.py +5 -3
  193. pipecat/services/websocket_service.py +12 -11
  194. pipecat/services/whisper/base_stt.py +58 -12
  195. pipecat/services/whisper/stt.py +69 -58
  196. pipecat/services/xtts/tts.py +59 -2
  197. pipecat/sync/base_notifier.py +19 -0
  198. pipecat/sync/event_notifier.py +24 -0
  199. pipecat/tests/utils.py +73 -5
  200. pipecat/transcriptions/language.py +24 -0
  201. pipecat/transports/base_input.py +112 -8
  202. pipecat/transports/base_output.py +235 -13
  203. pipecat/transports/base_transport.py +119 -0
  204. pipecat/transports/local/audio.py +76 -0
  205. pipecat/transports/local/tk.py +84 -0
  206. pipecat/transports/network/fastapi_websocket.py +174 -15
  207. pipecat/transports/network/small_webrtc.py +383 -39
  208. pipecat/transports/network/webrtc_connection.py +214 -8
  209. pipecat/transports/network/websocket_client.py +171 -1
  210. pipecat/transports/network/websocket_server.py +147 -9
  211. pipecat/transports/services/daily.py +792 -70
  212. pipecat/transports/services/helpers/daily_rest.py +122 -129
  213. pipecat/transports/services/livekit.py +339 -4
  214. pipecat/transports/services/tavus.py +273 -38
  215. pipecat/utils/asyncio/task_manager.py +92 -186
  216. pipecat/utils/base_object.py +83 -1
  217. pipecat/utils/network.py +2 -0
  218. pipecat/utils/string.py +114 -58
  219. pipecat/utils/text/base_text_aggregator.py +44 -13
  220. pipecat/utils/text/base_text_filter.py +46 -0
  221. pipecat/utils/text/markdown_text_filter.py +70 -14
  222. pipecat/utils/text/pattern_pair_aggregator.py +18 -14
  223. pipecat/utils/text/simple_text_aggregator.py +43 -2
  224. pipecat/utils/text/skip_tags_aggregator.py +21 -13
  225. pipecat/utils/time.py +36 -0
  226. pipecat/utils/tracing/class_decorators.py +32 -7
  227. pipecat/utils/tracing/conversation_context_provider.py +12 -2
  228. pipecat/utils/tracing/service_attributes.py +80 -64
  229. pipecat/utils/tracing/service_decorators.py +48 -21
  230. pipecat/utils/tracing/setup.py +13 -7
  231. pipecat/utils/tracing/turn_context_provider.py +12 -2
  232. pipecat/utils/tracing/turn_trace_observer.py +27 -0
  233. pipecat/utils/utils.py +14 -14
  234. dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
  235. pipecat/examples/daily_runner.py +0 -64
  236. pipecat/examples/run.py +0 -265
  237. pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
  238. pipecat/utils/asyncio/watchdog_event.py +0 -42
  239. pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
  240. pipecat/utils/asyncio/watchdog_queue.py +0 -48
  241. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
  242. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
  243. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
  244. /pipecat/{examples → extensions}/__init__.py +0 -0
@@ -4,11 +4,18 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Neuphonic text-to-speech service implementations.
8
+
9
+ This module provides WebSocket and HTTP-based integrations with Neuphonic's
10
+ text-to-speech API for real-time audio synthesis.
11
+ """
12
+
7
13
  import asyncio
8
14
  import base64
9
15
  import json
10
16
  from typing import Any, AsyncGenerator, Mapping, Optional
11
17
 
18
+ import aiohttp
12
19
  from loguru import logger
13
20
  from pydantic import BaseModel
14
21
 
@@ -29,12 +36,11 @@ from pipecat.frames.frames import (
29
36
  from pipecat.processors.frame_processor import FrameDirection
30
37
  from pipecat.services.tts_service import InterruptibleTTSService, TTSService
31
38
  from pipecat.transcriptions.language import Language
32
- from pipecat.utils.asyncio.watchdog_async_iterator import WatchdogAsyncIterator
33
39
  from pipecat.utils.tracing.service_decorators import traced_tts
34
40
 
35
41
  try:
36
- import websockets
37
- from pyneuphonic import Neuphonic, TTSConfig
42
+ from websockets.asyncio.client import connect as websocket_connect
43
+ from websockets.protocol import State
38
44
  except ModuleNotFoundError as e:
39
45
  logger.error(f"Exception: {e}")
40
46
  logger.error("In order to use Neuphonic, you need to `pip install pipecat-ai[neuphonic]`.")
@@ -42,6 +48,14 @@ except ModuleNotFoundError as e:
42
48
 
43
49
 
44
50
  def language_to_neuphonic_lang_code(language: Language) -> Optional[str]:
51
+ """Convert a Language enum to Neuphonic language code.
52
+
53
+ Args:
54
+ language: The Language enum value to convert.
55
+
56
+ Returns:
57
+ The corresponding Neuphonic language code, or None if not supported.
58
+ """
45
59
  BASE_LANGUAGES = {
46
60
  Language.DE: "de",
47
61
  Language.EN: "en",
@@ -69,7 +83,21 @@ def language_to_neuphonic_lang_code(language: Language) -> Optional[str]:
69
83
 
70
84
 
71
85
  class NeuphonicTTSService(InterruptibleTTSService):
86
+ """Neuphonic real-time text-to-speech service using WebSocket streaming.
87
+
88
+ Provides real-time text-to-speech synthesis using Neuphonic's WebSocket API.
89
+ Supports interruption handling, keepalive connections, and configurable voice
90
+ parameters for high-quality speech generation.
91
+ """
92
+
72
93
  class InputParams(BaseModel):
94
+ """Input parameters for Neuphonic TTS configuration.
95
+
96
+ Parameters:
97
+ language: Language for synthesis. Defaults to English.
98
+ speed: Speech speed multiplier. Defaults to 1.0.
99
+ """
100
+
73
101
  language: Optional[Language] = Language.EN
74
102
  speed: Optional[float] = 1.0
75
103
 
@@ -82,10 +110,23 @@ class NeuphonicTTSService(InterruptibleTTSService):
82
110
  sample_rate: Optional[int] = 22050,
83
111
  encoding: str = "pcm_linear",
84
112
  params: Optional[InputParams] = None,
113
+ aggregate_sentences: Optional[bool] = True,
85
114
  **kwargs,
86
115
  ):
116
+ """Initialize the Neuphonic TTS service.
117
+
118
+ Args:
119
+ api_key: Neuphonic API key for authentication.
120
+ voice_id: ID of the voice to use for synthesis.
121
+ url: WebSocket URL for the Neuphonic API.
122
+ sample_rate: Audio sample rate in Hz. Defaults to 22050.
123
+ encoding: Audio encoding format. Defaults to "pcm_linear".
124
+ params: Additional input parameters for TTS configuration.
125
+ aggregate_sentences: Whether to aggregate sentences within the TTSService.
126
+ **kwargs: Additional arguments passed to parent InterruptibleTTSService.
127
+ """
87
128
  super().__init__(
88
- aggregate_sentences=True,
129
+ aggregate_sentences=aggregate_sentences,
89
130
  push_text_frames=False,
90
131
  push_stop_frames=True,
91
132
  stop_frame_timeout_s=2.0,
@@ -114,12 +155,26 @@ class NeuphonicTTSService(InterruptibleTTSService):
114
155
  self._keepalive_task = None
115
156
 
116
157
  def can_generate_metrics(self) -> bool:
158
+ """Check if this service can generate processing metrics.
159
+
160
+ Returns:
161
+ True, as Neuphonic service supports metrics generation.
162
+ """
117
163
  return True
118
164
 
119
165
  def language_to_service_language(self, language: Language) -> Optional[str]:
166
+ """Convert a Language enum to Neuphonic service language format.
167
+
168
+ Args:
169
+ language: The language to convert.
170
+
171
+ Returns:
172
+ The Neuphonic-specific language code, or None if not supported.
173
+ """
120
174
  return language_to_neuphonic_lang_code(language)
121
175
 
122
176
  async def _update_settings(self, settings: Mapping[str, Any]):
177
+ """Update service settings and reconnect with new configuration."""
123
178
  if "voice_id" in settings:
124
179
  self.set_voice(settings["voice_id"])
125
180
 
@@ -129,28 +184,56 @@ class NeuphonicTTSService(InterruptibleTTSService):
129
184
  logger.info(f"Switching TTS to settings: [{self._settings}]")
130
185
 
131
186
  async def start(self, frame: StartFrame):
187
+ """Start the Neuphonic TTS service.
188
+
189
+ Args:
190
+ frame: The start frame containing initialization parameters.
191
+ """
132
192
  await super().start(frame)
133
193
  await self._connect()
134
194
 
135
195
  async def stop(self, frame: EndFrame):
196
+ """Stop the Neuphonic TTS service.
197
+
198
+ Args:
199
+ frame: The end frame.
200
+ """
136
201
  await super().stop(frame)
137
202
  await self._disconnect()
138
203
 
139
204
  async def cancel(self, frame: CancelFrame):
205
+ """Cancel the Neuphonic TTS service.
206
+
207
+ Args:
208
+ frame: The cancel frame.
209
+ """
140
210
  await super().cancel(frame)
141
211
  await self._disconnect()
142
212
 
143
213
  async def flush_audio(self):
214
+ """Flush any pending audio synthesis by sending stop command."""
144
215
  if self._websocket:
145
216
  msg = {"text": "<STOP>"}
146
217
  await self._websocket.send(json.dumps(msg))
147
218
 
148
219
  async def push_frame(self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM):
220
+ """Push a frame downstream with special handling for stop conditions.
221
+
222
+ Args:
223
+ frame: The frame to push.
224
+ direction: The direction to push the frame.
225
+ """
149
226
  await super().push_frame(frame, direction)
150
227
  if isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)):
151
228
  self._started = False
152
229
 
153
230
  async def process_frame(self, frame: Frame, direction: FrameDirection):
231
+ """Process frames with special handling for speech control.
232
+
233
+ Args:
234
+ frame: The frame to process.
235
+ direction: The direction of frame processing.
236
+ """
154
237
  await super().process_frame(frame, direction)
155
238
 
156
239
  # If we received a TTSSpeakFrame and the LLM response included text (it
@@ -164,6 +247,7 @@ class NeuphonicTTSService(InterruptibleTTSService):
164
247
  await self.resume_processing_frames()
165
248
 
166
249
  async def _connect(self):
250
+ """Connect to Neuphonic WebSocket and start background tasks."""
167
251
  await self._connect_websocket()
168
252
 
169
253
  if self._websocket and not self._receive_task:
@@ -173,6 +257,7 @@ class NeuphonicTTSService(InterruptibleTTSService):
173
257
  self._keepalive_task = self.create_task(self._keepalive_task_handler())
174
258
 
175
259
  async def _disconnect(self):
260
+ """Disconnect from Neuphonic WebSocket and clean up tasks."""
176
261
  if self._receive_task:
177
262
  await self.cancel_task(self._receive_task)
178
263
  self._receive_task = None
@@ -184,8 +269,9 @@ class NeuphonicTTSService(InterruptibleTTSService):
184
269
  await self._disconnect_websocket()
185
270
 
186
271
  async def _connect_websocket(self):
272
+ """Establish WebSocket connection to Neuphonic API."""
187
273
  try:
188
- if self._websocket and self._websocket.open:
274
+ if self._websocket and self._websocket.state is State.OPEN:
189
275
  return
190
276
 
191
277
  logger.debug("Connecting to Neuphonic")
@@ -195,20 +281,25 @@ class NeuphonicTTSService(InterruptibleTTSService):
195
281
  "voice_id": self._voice_id,
196
282
  }
197
283
 
198
- query_params = [f"api_key={self._api_key}"]
284
+ query_params = []
199
285
  for key, value in tts_config.items():
200
286
  if value is not None:
201
287
  query_params.append(f"{key}={value}")
202
288
 
203
- url = f"{self._url}/speak/{self._settings['lang_code']}?{'&'.join(query_params)}"
289
+ url = f"{self._url}/speak/{self._settings['lang_code']}"
290
+ if query_params:
291
+ url += f"?{'&'.join(query_params)}"
204
292
 
205
- self._websocket = await websockets.connect(url)
293
+ headers = {"x-api-key": self._api_key}
294
+
295
+ self._websocket = await websocket_connect(url, additional_headers=headers)
206
296
  except Exception as e:
207
297
  logger.error(f"{self} initialization error: {e}")
208
298
  self._websocket = None
209
299
  await self._call_event_handler("on_connection_error", f"{e}")
210
300
 
211
301
  async def _disconnect_websocket(self):
302
+ """Close WebSocket connection and clean up state."""
212
303
  try:
213
304
  await self.stop_all_metrics()
214
305
 
@@ -222,10 +313,11 @@ class NeuphonicTTSService(InterruptibleTTSService):
222
313
  self._websocket = None
223
314
 
224
315
  async def _receive_messages(self):
225
- async for message in WatchdogAsyncIterator(self._websocket, manager=self.task_manager):
316
+ """Receive and process messages from Neuphonic WebSocket."""
317
+ async for message in self._websocket:
226
318
  if isinstance(message, str):
227
319
  msg = json.loads(message)
228
- if msg.get("data", {}).get("audio") is not None:
320
+ if msg.get("data") and msg["data"].get("audio"):
229
321
  await self.stop_ttfb_metrics()
230
322
 
231
323
  audio = base64.b64decode(msg["data"]["audio"])
@@ -233,24 +325,40 @@ class NeuphonicTTSService(InterruptibleTTSService):
233
325
  await self.push_frame(frame)
234
326
 
235
327
  async def _keepalive_task_handler(self):
236
- KEEPALIVE_SLEEP = 10 if self.task_manager.task_watchdog_enabled else 3
328
+ """Handle keepalive messages to maintain WebSocket connection."""
329
+ KEEPALIVE_SLEEP = 10
237
330
  while True:
238
- self.reset_watchdog()
239
331
  await asyncio.sleep(KEEPALIVE_SLEEP)
240
- await self._send_text("")
332
+ await self._send_keepalive()
333
+
334
+ async def _send_keepalive(self):
335
+ """Send keepalive message to maintain connection."""
336
+ if self._websocket:
337
+ # Send empty text for keepalive
338
+ msg = {"text": ""}
339
+ await self._websocket.send(json.dumps(msg))
241
340
 
242
341
  async def _send_text(self, text: str):
342
+ """Send text to Neuphonic WebSocket for synthesis."""
243
343
  if self._websocket:
244
- msg = {"text": text}
344
+ msg = {"text": f"{text} <STOP>"}
245
345
  logger.debug(f"Sending text to websocket: {msg}")
246
346
  await self._websocket.send(json.dumps(msg))
247
347
 
248
348
  @traced_tts
249
349
  async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
350
+ """Generate speech from text using Neuphonic's streaming API.
351
+
352
+ Args:
353
+ text: The text to synthesize into speech.
354
+
355
+ Yields:
356
+ Frame: Audio frames containing the synthesized speech.
357
+ """
250
358
  logger.debug(f"Generating TTS: [{text}]")
251
359
 
252
360
  try:
253
- if not self._websocket or self._websocket.closed:
361
+ if not self._websocket or self._websocket.state is State.CLOSED:
254
362
  await self._connect()
255
363
 
256
364
  try:
@@ -274,19 +382,21 @@ class NeuphonicTTSService(InterruptibleTTSService):
274
382
 
275
383
 
276
384
  class NeuphonicHttpTTSService(TTSService):
277
- """Neuphonic Text-to-Speech service using HTTP streaming.
385
+ """Neuphonic text-to-speech service using HTTP streaming.
278
386
 
279
- Args:
280
- api_key: Neuphonic API key
281
- voice_id: ID of the voice to use
282
- url: Base URL for the Neuphonic API (default: "https://api.neuphonic.com")
283
- sample_rate: Sample rate for audio output (default: 22050Hz)
284
- encoding: Audio encoding format (default: "pcm_linear")
285
- params: Additional parameters for TTS generation including language and speed
286
- **kwargs: Additional keyword arguments passed to the parent class
387
+ Provides text-to-speech synthesis using Neuphonic's HTTP API with server-sent
388
+ events for streaming audio delivery. Suitable for applications that prefer
389
+ HTTP-based communication over WebSocket connections.
287
390
  """
288
391
 
289
392
  class InputParams(BaseModel):
393
+ """Input parameters for Neuphonic HTTP TTS configuration.
394
+
395
+ Parameters:
396
+ language: Language for synthesis. Defaults to English.
397
+ speed: Speech speed multiplier. Defaults to 1.0.
398
+ """
399
+
290
400
  language: Optional[Language] = Language.EN
291
401
  speed: Optional[float] = 1.0
292
402
 
@@ -295,66 +405,183 @@ class NeuphonicHttpTTSService(TTSService):
295
405
  *,
296
406
  api_key: str,
297
407
  voice_id: Optional[str] = None,
408
+ aiohttp_session: aiohttp.ClientSession,
298
409
  url: str = "https://api.neuphonic.com",
299
410
  sample_rate: Optional[int] = 22050,
300
- encoding: str = "pcm_linear",
411
+ encoding: Optional[str] = "pcm_linear",
301
412
  params: Optional[InputParams] = None,
302
413
  **kwargs,
303
414
  ):
415
+ """Initialize the Neuphonic HTTP TTS service.
416
+
417
+ Args:
418
+ api_key: Neuphonic API key for authentication.
419
+ voice_id: ID of the voice to use for synthesis.
420
+ aiohttp_session: Shared aiohttp session for HTTP requests.
421
+ url: Base URL for the Neuphonic HTTP API.
422
+ sample_rate: Audio sample rate in Hz. Defaults to 22050.
423
+ encoding: Audio encoding format. Defaults to "pcm_linear".
424
+ params: Additional input parameters for TTS configuration.
425
+ **kwargs: Additional arguments passed to parent TTSService.
426
+ """
304
427
  super().__init__(sample_rate=sample_rate, **kwargs)
305
428
 
306
429
  params = params or NeuphonicHttpTTSService.InputParams()
307
430
 
308
431
  self._api_key = api_key
309
- self._url = url
310
- self._settings = {
311
- "lang_code": self.language_to_service_language(params.language),
312
- "speed": params.speed,
313
- "encoding": encoding,
314
- "sampling_rate": sample_rate,
315
- }
432
+ self._session = aiohttp_session
433
+ self._base_url = url.rstrip("/")
434
+ self._lang_code = self.language_to_service_language(params.language) or "en"
435
+ self._speed = params.speed
436
+ self._encoding = encoding
316
437
  self.set_voice(voice_id)
317
438
 
318
439
  def can_generate_metrics(self) -> bool:
440
+ """Check if this service can generate processing metrics.
441
+
442
+ Returns:
443
+ True, as Neuphonic HTTP service supports metrics generation.
444
+ """
319
445
  return True
320
446
 
447
+ def language_to_service_language(self, language: Language) -> Optional[str]:
448
+ """Convert a Language enum to Neuphonic service language format.
449
+
450
+ Args:
451
+ language: The language to convert.
452
+
453
+ Returns:
454
+ The Neuphonic-specific language code, or None if not supported.
455
+ """
456
+ return language_to_neuphonic_lang_code(language)
457
+
321
458
  async def start(self, frame: StartFrame):
459
+ """Start the Neuphonic HTTP TTS service.
460
+
461
+ Args:
462
+ frame: The start frame containing initialization parameters.
463
+ """
322
464
  await super().start(frame)
323
465
 
324
466
  async def flush_audio(self):
467
+ """Flush any pending audio synthesis.
468
+
469
+ Note:
470
+ HTTP-based service doesn't require explicit flushing.
471
+ """
325
472
  pass
326
473
 
474
+ def _parse_sse_message(self, message: str) -> dict | None:
475
+ """Parse a Server-Sent Event message.
476
+
477
+ Args:
478
+ message: The SSE message to parse.
479
+
480
+ Returns:
481
+ Parsed message dictionary or None if not a data message.
482
+ """
483
+ message = message.strip()
484
+
485
+ if not message or "data" not in message:
486
+ return None
487
+
488
+ try:
489
+ # Split on ": " and take the part after "data: "
490
+ _, data_content = message.split(": ", 1)
491
+
492
+ if not data_content or data_content == "[DONE]":
493
+ return None
494
+
495
+ message_dict = json.loads(data_content)
496
+
497
+ # Check for errors in the response
498
+ if message_dict.get("errors") is not None:
499
+ raise Exception(
500
+ f"Neuphonic API error {message_dict.get('status_code', 'unknown')}: {message_dict['errors']}"
501
+ )
502
+
503
+ return message_dict
504
+ except (ValueError, json.JSONDecodeError) as e:
505
+ logger.warning(f"Failed to parse SSE message: {e}")
506
+ return None
507
+
327
508
  @traced_tts
328
509
  async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
329
510
  """Generate speech from text using Neuphonic streaming API.
330
511
 
331
512
  Args:
332
- text: The text to convert to speech
513
+ text: The text to convert to speech.
514
+
333
515
  Yields:
334
- Frames containing audio data and status information
516
+ Frame: Audio frames containing the synthesized speech and status information.
335
517
  """
336
518
  logger.debug(f"Generating TTS: [{text}]")
337
519
 
338
- client = Neuphonic(api_key=self._api_key, base_url=self._url.replace("https://", ""))
520
+ url = f"{self._base_url}/sse/speak/{self._lang_code}"
339
521
 
340
- sse = client.tts.AsyncSSEClient()
522
+ headers = {
523
+ "X-API-KEY": self._api_key,
524
+ "Content-Type": "application/json",
525
+ }
526
+
527
+ payload = {
528
+ "text": text,
529
+ "lang_code": self._lang_code,
530
+ "encoding": self._encoding,
531
+ "sampling_rate": self.sample_rate,
532
+ "speed": self._speed,
533
+ }
534
+
535
+ if self._voice_id:
536
+ payload["voice_id"] = self._voice_id
341
537
 
342
538
  try:
343
539
  await self.start_ttfb_metrics()
344
- response = sse.send(text, TTSConfig(**self._settings, voice_id=self._voice_id))
345
540
 
346
- await self.start_tts_usage_metrics(text)
347
- yield TTSStartedFrame()
541
+ async with self._session.post(url, json=payload, headers=headers) as response:
542
+ if response.status != 200:
543
+ error_text = await response.text()
544
+ error_message = f"Neuphonic API error: HTTP {response.status} - {error_text}"
545
+ logger.error(error_message)
546
+ yield ErrorFrame(error=error_message)
547
+ return
348
548
 
349
- async for message in response:
350
- if message.status_code != 200:
351
- logger.error(f"{self} error: {message.errors}")
352
- yield ErrorFrame(error=f"Neuphonic API error: {message.errors}")
353
-
354
- await self.stop_ttfb_metrics()
355
- yield TTSAudioRawFrame(message.data.audio, self.sample_rate, 1)
549
+ await self.start_tts_usage_metrics(text)
550
+ yield TTSStartedFrame()
551
+
552
+ # Process SSE stream line by line
553
+ async for line in response.content:
554
+ if not line:
555
+ continue
556
+
557
+ message = line.decode("utf-8", errors="ignore")
558
+ if not message.strip():
559
+ continue
560
+
561
+ try:
562
+ parsed_message = self._parse_sse_message(message)
563
+
564
+ if (
565
+ parsed_message is not None
566
+ and parsed_message.get("data", {}).get("audio") is not None
567
+ ):
568
+ audio_b64 = parsed_message["data"]["audio"]
569
+ audio_bytes = base64.b64decode(audio_b64)
570
+
571
+ await self.stop_ttfb_metrics()
572
+ yield TTSAudioRawFrame(audio_bytes, self.sample_rate, 1)
573
+
574
+ except Exception as e:
575
+ logger.error(f"Error processing SSE message: {e}")
576
+ # Don't yield error frame for individual message failures
577
+ continue
578
+
579
+ except asyncio.CancelledError:
580
+ logger.debug("TTS generation cancelled")
581
+ raise
356
582
  except Exception as e:
357
- logger.error(f"Error in run_tts: {e}")
358
- yield ErrorFrame(error=str(e))
583
+ logger.exception(f"Error in run_tts: {e}")
584
+ yield ErrorFrame(error=f"Neuphonic TTS error: {str(e)}")
359
585
  finally:
586
+ await self.stop_ttfb_metrics()
360
587
  yield TTSStoppedFrame()
@@ -21,12 +21,6 @@ class NimLLMService(OpenAILLMService):
21
21
  This service extends OpenAILLMService to work with NVIDIA's NIM API while maintaining
22
22
  compatibility with the OpenAI-style interface. It specifically handles the difference
23
23
  in token usage reporting between NIM (incremental) and OpenAI (final summary).
24
-
25
- Args:
26
- api_key: The API key for accessing NVIDIA's NIM API.
27
- base_url: The base URL for NIM API. Defaults to "https://integrate.api.nvidia.com/v1".
28
- model: The model identifier to use. Defaults to "nvidia/llama-3.1-nemotron-70b-instruct".
29
- **kwargs: Additional keyword arguments passed to OpenAILLMService.
30
24
  """
31
25
 
32
26
  def __init__(
@@ -37,6 +31,14 @@ class NimLLMService(OpenAILLMService):
37
31
  model: str = "nvidia/llama-3.1-nemotron-70b-instruct",
38
32
  **kwargs,
39
33
  ):
34
+ """Initialize the NimLLMService.
35
+
36
+ Args:
37
+ api_key: The API key for accessing NVIDIA's NIM API.
38
+ base_url: The base URL for NIM API. Defaults to "https://integrate.api.nvidia.com/v1".
39
+ model: The model identifier to use. Defaults to "nvidia/llama-3.1-nemotron-70b-instruct".
40
+ **kwargs: Additional keyword arguments passed to OpenAILLMService.
41
+ """
40
42
  super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
41
43
  # Counters for accumulating token usage metrics
42
44
  self._prompt_tokens = 0
@@ -6,6 +6,8 @@
6
6
 
7
7
  """OLLama LLM service implementation for Pipecat AI framework."""
8
8
 
9
+ from loguru import logger
10
+
9
11
  from pipecat.services.openai.llm import OpenAILLMService
10
12
 
11
13
 
@@ -14,12 +16,30 @@ class OLLamaLLMService(OpenAILLMService):
14
16
 
15
17
  This service extends OpenAILLMService to work with locally hosted OLLama models,
16
18
  providing a compatible interface for running large language models locally.
17
-
18
- Args:
19
- model: The OLLama model to use. Defaults to "llama2".
20
- base_url: The base URL for the OLLama API endpoint.
21
- Defaults to "http://localhost:11434/v1".
22
19
  """
23
20
 
24
- def __init__(self, *, model: str = "llama2", base_url: str = "http://localhost:11434/v1"):
25
- super().__init__(model=model, base_url=base_url, api_key="ollama")
21
+ def __init__(
22
+ self, *, model: str = "llama2", base_url: str = "http://localhost:11434/v1", **kwargs
23
+ ):
24
+ """Initialize OLLama LLM service.
25
+
26
+ Args:
27
+ model: The OLLama model to use. Defaults to "llama2".
28
+ base_url: The base URL for the OLLama API endpoint.
29
+ Defaults to "http://localhost:11434/v1".
30
+ **kwargs: Additional keyword arguments passed to OpenAILLMService.
31
+ """
32
+ super().__init__(model=model, base_url=base_url, api_key="ollama", **kwargs)
33
+
34
+ def create_client(self, base_url=None, **kwargs):
35
+ """Create OpenAI-compatible client for Ollama.
36
+
37
+ Args:
38
+ base_url: The base URL for the API. If None, uses instance base_url.
39
+ **kwargs: Additional keyword arguments passed to the parent create_client method.
40
+
41
+ Returns:
42
+ An OpenAI-compatible client configured for Ollama.
43
+ """
44
+ logger.debug(f"Creating Ollama client with api {base_url}")
45
+ return super().create_client(base_url=base_url, **kwargs)