dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show
  1. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
  2. dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
  3. pipecat/__init__.py +17 -0
  4. pipecat/adapters/base_llm_adapter.py +36 -1
  5. pipecat/adapters/schemas/direct_function.py +296 -0
  6. pipecat/adapters/schemas/function_schema.py +15 -6
  7. pipecat/adapters/schemas/tools_schema.py +55 -7
  8. pipecat/adapters/services/anthropic_adapter.py +22 -3
  9. pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
  10. pipecat/adapters/services/bedrock_adapter.py +22 -3
  11. pipecat/adapters/services/gemini_adapter.py +16 -3
  12. pipecat/adapters/services/open_ai_adapter.py +17 -2
  13. pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
  14. pipecat/audio/filters/base_audio_filter.py +30 -6
  15. pipecat/audio/filters/koala_filter.py +37 -2
  16. pipecat/audio/filters/krisp_filter.py +59 -6
  17. pipecat/audio/filters/noisereduce_filter.py +37 -0
  18. pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
  19. pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
  20. pipecat/audio/mixers/base_audio_mixer.py +30 -7
  21. pipecat/audio/mixers/soundfile_mixer.py +53 -6
  22. pipecat/audio/resamplers/base_audio_resampler.py +17 -9
  23. pipecat/audio/resamplers/resampy_resampler.py +26 -1
  24. pipecat/audio/resamplers/soxr_resampler.py +32 -1
  25. pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
  26. pipecat/audio/utils.py +194 -1
  27. pipecat/audio/vad/silero.py +60 -3
  28. pipecat/audio/vad/vad_analyzer.py +114 -30
  29. pipecat/clocks/base_clock.py +19 -0
  30. pipecat/clocks/system_clock.py +25 -0
  31. pipecat/extensions/voicemail/__init__.py +0 -0
  32. pipecat/extensions/voicemail/voicemail_detector.py +707 -0
  33. pipecat/frames/frames.py +590 -156
  34. pipecat/metrics/metrics.py +64 -1
  35. pipecat/observers/base_observer.py +58 -19
  36. pipecat/observers/loggers/debug_log_observer.py +56 -64
  37. pipecat/observers/loggers/llm_log_observer.py +8 -1
  38. pipecat/observers/loggers/transcription_log_observer.py +19 -7
  39. pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
  40. pipecat/observers/turn_tracking_observer.py +26 -1
  41. pipecat/pipeline/base_pipeline.py +5 -7
  42. pipecat/pipeline/base_task.py +52 -9
  43. pipecat/pipeline/parallel_pipeline.py +121 -177
  44. pipecat/pipeline/pipeline.py +129 -20
  45. pipecat/pipeline/runner.py +50 -1
  46. pipecat/pipeline/sync_parallel_pipeline.py +132 -32
  47. pipecat/pipeline/task.py +263 -280
  48. pipecat/pipeline/task_observer.py +85 -34
  49. pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
  50. pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
  51. pipecat/processors/aggregators/gated.py +25 -24
  52. pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
  53. pipecat/processors/aggregators/llm_response.py +398 -89
  54. pipecat/processors/aggregators/openai_llm_context.py +161 -13
  55. pipecat/processors/aggregators/sentence.py +25 -14
  56. pipecat/processors/aggregators/user_response.py +28 -3
  57. pipecat/processors/aggregators/vision_image_frame.py +24 -14
  58. pipecat/processors/async_generator.py +28 -0
  59. pipecat/processors/audio/audio_buffer_processor.py +78 -37
  60. pipecat/processors/consumer_processor.py +25 -6
  61. pipecat/processors/filters/frame_filter.py +23 -0
  62. pipecat/processors/filters/function_filter.py +30 -0
  63. pipecat/processors/filters/identity_filter.py +17 -2
  64. pipecat/processors/filters/null_filter.py +24 -1
  65. pipecat/processors/filters/stt_mute_filter.py +56 -21
  66. pipecat/processors/filters/wake_check_filter.py +46 -3
  67. pipecat/processors/filters/wake_notifier_filter.py +21 -3
  68. pipecat/processors/frame_processor.py +488 -131
  69. pipecat/processors/frameworks/langchain.py +38 -3
  70. pipecat/processors/frameworks/rtvi.py +719 -34
  71. pipecat/processors/gstreamer/pipeline_source.py +41 -0
  72. pipecat/processors/idle_frame_processor.py +26 -3
  73. pipecat/processors/logger.py +23 -0
  74. pipecat/processors/metrics/frame_processor_metrics.py +77 -4
  75. pipecat/processors/metrics/sentry.py +42 -4
  76. pipecat/processors/producer_processor.py +34 -14
  77. pipecat/processors/text_transformer.py +22 -10
  78. pipecat/processors/transcript_processor.py +48 -29
  79. pipecat/processors/user_idle_processor.py +31 -21
  80. pipecat/runner/__init__.py +1 -0
  81. pipecat/runner/daily.py +132 -0
  82. pipecat/runner/livekit.py +148 -0
  83. pipecat/runner/run.py +543 -0
  84. pipecat/runner/types.py +67 -0
  85. pipecat/runner/utils.py +515 -0
  86. pipecat/serializers/base_serializer.py +42 -0
  87. pipecat/serializers/exotel.py +17 -6
  88. pipecat/serializers/genesys.py +95 -0
  89. pipecat/serializers/livekit.py +33 -0
  90. pipecat/serializers/plivo.py +16 -15
  91. pipecat/serializers/protobuf.py +37 -1
  92. pipecat/serializers/telnyx.py +18 -17
  93. pipecat/serializers/twilio.py +32 -16
  94. pipecat/services/ai_service.py +5 -3
  95. pipecat/services/anthropic/llm.py +113 -43
  96. pipecat/services/assemblyai/models.py +63 -5
  97. pipecat/services/assemblyai/stt.py +64 -11
  98. pipecat/services/asyncai/__init__.py +0 -0
  99. pipecat/services/asyncai/tts.py +501 -0
  100. pipecat/services/aws/llm.py +185 -111
  101. pipecat/services/aws/stt.py +217 -23
  102. pipecat/services/aws/tts.py +118 -52
  103. pipecat/services/aws/utils.py +101 -5
  104. pipecat/services/aws_nova_sonic/aws.py +82 -64
  105. pipecat/services/aws_nova_sonic/context.py +15 -6
  106. pipecat/services/azure/common.py +10 -2
  107. pipecat/services/azure/image.py +32 -0
  108. pipecat/services/azure/llm.py +9 -7
  109. pipecat/services/azure/stt.py +65 -2
  110. pipecat/services/azure/tts.py +154 -23
  111. pipecat/services/cartesia/stt.py +125 -8
  112. pipecat/services/cartesia/tts.py +102 -38
  113. pipecat/services/cerebras/llm.py +15 -23
  114. pipecat/services/deepgram/stt.py +19 -11
  115. pipecat/services/deepgram/tts.py +36 -0
  116. pipecat/services/deepseek/llm.py +14 -23
  117. pipecat/services/elevenlabs/tts.py +330 -64
  118. pipecat/services/fal/image.py +43 -0
  119. pipecat/services/fal/stt.py +48 -10
  120. pipecat/services/fireworks/llm.py +14 -21
  121. pipecat/services/fish/tts.py +109 -9
  122. pipecat/services/gemini_multimodal_live/__init__.py +1 -0
  123. pipecat/services/gemini_multimodal_live/events.py +83 -2
  124. pipecat/services/gemini_multimodal_live/file_api.py +189 -0
  125. pipecat/services/gemini_multimodal_live/gemini.py +218 -21
  126. pipecat/services/gladia/config.py +17 -10
  127. pipecat/services/gladia/stt.py +82 -36
  128. pipecat/services/google/frames.py +40 -0
  129. pipecat/services/google/google.py +2 -0
  130. pipecat/services/google/image.py +39 -2
  131. pipecat/services/google/llm.py +176 -58
  132. pipecat/services/google/llm_openai.py +26 -4
  133. pipecat/services/google/llm_vertex.py +37 -15
  134. pipecat/services/google/rtvi.py +41 -0
  135. pipecat/services/google/stt.py +65 -17
  136. pipecat/services/google/test-google-chirp.py +45 -0
  137. pipecat/services/google/tts.py +390 -19
  138. pipecat/services/grok/llm.py +8 -6
  139. pipecat/services/groq/llm.py +8 -6
  140. pipecat/services/groq/stt.py +13 -9
  141. pipecat/services/groq/tts.py +40 -0
  142. pipecat/services/hamsa/__init__.py +9 -0
  143. pipecat/services/hamsa/stt.py +241 -0
  144. pipecat/services/heygen/__init__.py +5 -0
  145. pipecat/services/heygen/api.py +281 -0
  146. pipecat/services/heygen/client.py +620 -0
  147. pipecat/services/heygen/video.py +338 -0
  148. pipecat/services/image_service.py +5 -3
  149. pipecat/services/inworld/__init__.py +1 -0
  150. pipecat/services/inworld/tts.py +592 -0
  151. pipecat/services/llm_service.py +127 -45
  152. pipecat/services/lmnt/tts.py +80 -7
  153. pipecat/services/mcp_service.py +85 -44
  154. pipecat/services/mem0/memory.py +42 -13
  155. pipecat/services/minimax/tts.py +74 -15
  156. pipecat/services/mistral/__init__.py +0 -0
  157. pipecat/services/mistral/llm.py +185 -0
  158. pipecat/services/moondream/vision.py +55 -10
  159. pipecat/services/neuphonic/tts.py +275 -48
  160. pipecat/services/nim/llm.py +8 -6
  161. pipecat/services/ollama/llm.py +27 -7
  162. pipecat/services/openai/base_llm.py +54 -16
  163. pipecat/services/openai/image.py +30 -0
  164. pipecat/services/openai/llm.py +7 -5
  165. pipecat/services/openai/stt.py +13 -9
  166. pipecat/services/openai/tts.py +42 -10
  167. pipecat/services/openai_realtime_beta/azure.py +11 -9
  168. pipecat/services/openai_realtime_beta/context.py +7 -5
  169. pipecat/services/openai_realtime_beta/events.py +10 -7
  170. pipecat/services/openai_realtime_beta/openai.py +37 -18
  171. pipecat/services/openpipe/llm.py +30 -24
  172. pipecat/services/openrouter/llm.py +9 -7
  173. pipecat/services/perplexity/llm.py +15 -19
  174. pipecat/services/piper/tts.py +26 -12
  175. pipecat/services/playht/tts.py +227 -65
  176. pipecat/services/qwen/llm.py +8 -6
  177. pipecat/services/rime/tts.py +128 -17
  178. pipecat/services/riva/stt.py +160 -22
  179. pipecat/services/riva/tts.py +67 -2
  180. pipecat/services/sambanova/llm.py +19 -17
  181. pipecat/services/sambanova/stt.py +14 -8
  182. pipecat/services/sarvam/tts.py +60 -13
  183. pipecat/services/simli/video.py +82 -21
  184. pipecat/services/soniox/__init__.py +0 -0
  185. pipecat/services/soniox/stt.py +398 -0
  186. pipecat/services/speechmatics/stt.py +29 -17
  187. pipecat/services/stt_service.py +47 -11
  188. pipecat/services/tavus/video.py +94 -25
  189. pipecat/services/together/llm.py +8 -6
  190. pipecat/services/tts_service.py +77 -53
  191. pipecat/services/ultravox/stt.py +46 -43
  192. pipecat/services/vision_service.py +5 -3
  193. pipecat/services/websocket_service.py +12 -11
  194. pipecat/services/whisper/base_stt.py +58 -12
  195. pipecat/services/whisper/stt.py +69 -58
  196. pipecat/services/xtts/tts.py +59 -2
  197. pipecat/sync/base_notifier.py +19 -0
  198. pipecat/sync/event_notifier.py +24 -0
  199. pipecat/tests/utils.py +73 -5
  200. pipecat/transcriptions/language.py +24 -0
  201. pipecat/transports/base_input.py +112 -8
  202. pipecat/transports/base_output.py +235 -13
  203. pipecat/transports/base_transport.py +119 -0
  204. pipecat/transports/local/audio.py +76 -0
  205. pipecat/transports/local/tk.py +84 -0
  206. pipecat/transports/network/fastapi_websocket.py +174 -15
  207. pipecat/transports/network/small_webrtc.py +383 -39
  208. pipecat/transports/network/webrtc_connection.py +214 -8
  209. pipecat/transports/network/websocket_client.py +171 -1
  210. pipecat/transports/network/websocket_server.py +147 -9
  211. pipecat/transports/services/daily.py +792 -70
  212. pipecat/transports/services/helpers/daily_rest.py +122 -129
  213. pipecat/transports/services/livekit.py +339 -4
  214. pipecat/transports/services/tavus.py +273 -38
  215. pipecat/utils/asyncio/task_manager.py +92 -186
  216. pipecat/utils/base_object.py +83 -1
  217. pipecat/utils/network.py +2 -0
  218. pipecat/utils/string.py +114 -58
  219. pipecat/utils/text/base_text_aggregator.py +44 -13
  220. pipecat/utils/text/base_text_filter.py +46 -0
  221. pipecat/utils/text/markdown_text_filter.py +70 -14
  222. pipecat/utils/text/pattern_pair_aggregator.py +18 -14
  223. pipecat/utils/text/simple_text_aggregator.py +43 -2
  224. pipecat/utils/text/skip_tags_aggregator.py +21 -13
  225. pipecat/utils/time.py +36 -0
  226. pipecat/utils/tracing/class_decorators.py +32 -7
  227. pipecat/utils/tracing/conversation_context_provider.py +12 -2
  228. pipecat/utils/tracing/service_attributes.py +80 -64
  229. pipecat/utils/tracing/service_decorators.py +48 -21
  230. pipecat/utils/tracing/setup.py +13 -7
  231. pipecat/utils/tracing/turn_context_provider.py +12 -2
  232. pipecat/utils/tracing/turn_trace_observer.py +27 -0
  233. pipecat/utils/utils.py +14 -14
  234. dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
  235. pipecat/examples/daily_runner.py +0 -64
  236. pipecat/examples/run.py +0 -265
  237. pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
  238. pipecat/utils/asyncio/watchdog_event.py +0 -42
  239. pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
  240. pipecat/utils/asyncio/watchdog_queue.py +0 -48
  241. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
  242. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
  243. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
  244. /pipecat/{examples → extensions}/__init__.py +0 -0
@@ -4,6 +4,12 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """NVIDIA Riva text-to-speech service implementation.
8
+
9
+ This module provides integration with NVIDIA Riva's TTS services through
10
+ gRPC API for high-quality speech synthesis.
11
+ """
12
+
7
13
  import asyncio
8
14
  import os
9
15
  from typing import AsyncGenerator, Mapping, Optional
@@ -37,7 +43,21 @@ RIVA_TTS_TIMEOUT_SECS = 5
37
43
 
38
44
 
39
45
  class RivaTTSService(TTSService):
46
+ """NVIDIA Riva text-to-speech service.
47
+
48
+ Provides high-quality text-to-speech synthesis using NVIDIA Riva's
49
+ cloud-based TTS models. Supports multiple voices, languages, and
50
+ configurable quality settings.
51
+ """
52
+
40
53
  class InputParams(BaseModel):
54
+ """Input parameters for Riva TTS configuration.
55
+
56
+ Parameters:
57
+ language: Language code for synthesis. Defaults to US English.
58
+ quality: Audio quality setting (0-100). Defaults to 20.
59
+ """
60
+
41
61
  language: Optional[Language] = Language.EN_US
42
62
  quality: Optional[int] = 20
43
63
 
@@ -55,6 +75,17 @@ class RivaTTSService(TTSService):
55
75
  params: Optional[InputParams] = None,
56
76
  **kwargs,
57
77
  ):
78
+ """Initialize the NVIDIA Riva TTS service.
79
+
80
+ Args:
81
+ api_key: NVIDIA API key for authentication.
82
+ server: gRPC server endpoint. Defaults to NVIDIA's cloud endpoint.
83
+ voice_id: Voice model identifier. Defaults to multilingual Ray voice.
84
+ sample_rate: Audio sample rate. If None, uses service default.
85
+ model_function_map: Dictionary containing function_id and model_name for the TTS model.
86
+ params: Additional configuration parameters for TTS synthesis.
87
+ **kwargs: Additional arguments passed to parent TTSService.
88
+ """
58
89
  super().__init__(sample_rate=sample_rate, **kwargs)
59
90
 
60
91
  params = params or RivaTTSService.InputParams()
@@ -82,6 +113,13 @@ class RivaTTSService(TTSService):
82
113
  )
83
114
 
84
115
  async def set_model(self, model: str):
116
+ """Attempt to set the TTS model.
117
+
118
+ Note: Model cannot be changed after initialization for Riva service.
119
+
120
+ Args:
121
+ model: The model name to set (operation not supported).
122
+ """
85
123
  logger.warning(f"Cannot set model after initialization. Set model and function id like so:")
86
124
  example = {"function_id": "<UUID>", "model_name": "<model_name>"}
87
125
  logger.warning(
@@ -90,6 +128,15 @@ class RivaTTSService(TTSService):
90
128
 
91
129
  @traced_tts
92
130
  async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
131
+ """Generate speech from text using NVIDIA Riva TTS.
132
+
133
+ Args:
134
+ text: The text to synthesize into speech.
135
+
136
+ Yields:
137
+ Frame: Audio frames containing the synthesized speech data.
138
+ """
139
+
93
140
  def read_audio_responses(queue: asyncio.Queue):
94
141
  def add_response(r):
95
142
  asyncio.run_coroutine_threadsafe(queue.put(r), self.get_event_loop())
@@ -121,7 +168,7 @@ class RivaTTSService(TTSService):
121
168
  await asyncio.to_thread(read_audio_responses, queue)
122
169
 
123
170
  # Wait for the thread to start.
124
- resp = await asyncio.wait_for(queue.get(), RIVA_TTS_TIMEOUT_SECS)
171
+ resp = await asyncio.wait_for(queue.get(), timeout=RIVA_TTS_TIMEOUT_SECS)
125
172
  while resp:
126
173
  await self.stop_ttfb_metrics()
127
174
  frame = TTSAudioRawFrame(
@@ -130,7 +177,7 @@ class RivaTTSService(TTSService):
130
177
  num_channels=1,
131
178
  )
132
179
  yield frame
133
- resp = await asyncio.wait_for(queue.get(), RIVA_TTS_TIMEOUT_SECS)
180
+ resp = await asyncio.wait_for(queue.get(), timeout=RIVA_TTS_TIMEOUT_SECS)
134
181
  except asyncio.TimeoutError:
135
182
  logger.error(f"{self} timeout waiting for audio response")
136
183
 
@@ -139,6 +186,13 @@ class RivaTTSService(TTSService):
139
186
 
140
187
 
141
188
  class FastPitchTTSService(RivaTTSService):
189
+ """Deprecated FastPitch TTS service.
190
+
191
+ .. deprecated:: 0.0.66
192
+ This class is deprecated. Use RivaTTSService instead for new implementations.
193
+ Provides backward compatibility for existing FastPitch TTS integrations.
194
+ """
195
+
142
196
  def __init__(
143
197
  self,
144
198
  *,
@@ -153,6 +207,17 @@ class FastPitchTTSService(RivaTTSService):
153
207
  params: Optional[RivaTTSService.InputParams] = None,
154
208
  **kwargs,
155
209
  ):
210
+ """Initialize the deprecated FastPitch TTS service.
211
+
212
+ Args:
213
+ api_key: NVIDIA API key for authentication.
214
+ server: gRPC server endpoint. Defaults to NVIDIA's cloud endpoint.
215
+ voice_id: Voice model identifier. Defaults to Female-1 voice.
216
+ sample_rate: Audio sample rate. If None, uses service default.
217
+ model_function_map: Dictionary containing function_id and model_name for FastPitch model.
218
+ params: Additional configuration parameters for TTS synthesis.
219
+ **kwargs: Additional arguments passed to parent RivaTTSService.
220
+ """
156
221
  super().__init__(
157
222
  api_key=api_key,
158
223
  server=server,
@@ -20,7 +20,6 @@ from pipecat.metrics.metrics import LLMTokenUsage
20
20
  from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
21
21
  from pipecat.services.llm_service import FunctionCallFromLLM
22
22
  from pipecat.services.openai.llm import OpenAILLMService
23
- from pipecat.utils.asyncio.watchdog_async_iterator import WatchdogAsyncIterator
24
23
  from pipecat.utils.tracing.service_decorators import traced_llm
25
24
 
26
25
 
@@ -29,12 +28,6 @@ class SambaNovaLLMService(OpenAILLMService): # type: ignore
29
28
 
30
29
  This service extends OpenAILLMService to connect to SambaNova's API endpoint while
31
30
  maintaining full compatibility with OpenAI's interface and functionality.
32
-
33
- Args:
34
- api_key: The API key for accessing SambaNova API.
35
- model: The model identifier to use. Defaults to "Llama-4-Maverick-17B-128E-Instruct".
36
- base_url: The base URL for SambaNova API. Defaults to "https://api.sambanova.ai/v1".
37
- **kwargs: Additional keyword arguments passed to OpenAILLMService.
38
31
  """
39
32
 
40
33
  def __init__(
@@ -45,6 +38,14 @@ class SambaNovaLLMService(OpenAILLMService): # type: ignore
45
38
  base_url: str = "https://api.sambanova.ai/v1",
46
39
  **kwargs: Dict[Any, Any],
47
40
  ) -> None:
41
+ """Initialize SambaNova LLM service.
42
+
43
+ Args:
44
+ api_key: The API key for accessing SambaNova API.
45
+ model: The model identifier to use. Defaults to "Llama-4-Maverick-17B-128E-Instruct".
46
+ base_url: The base URL for SambaNova API. Defaults to "https://api.sambanova.ai/v1".
47
+ **kwargs: Additional keyword arguments passed to OpenAILLMService.
48
+ """
48
49
  super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
49
50
 
50
51
  def create_client(
@@ -66,17 +67,20 @@ class SambaNovaLLMService(OpenAILLMService): # type: ignore
66
67
  logger.debug(f"Creating SambaNova client with API {base_url}")
67
68
  return super().create_client(api_key, base_url, **kwargs)
68
69
 
69
- async def get_chat_completions(
70
+ def build_chat_completion_params(
70
71
  self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
71
- ) -> Any:
72
- """Get chat completions from SambaNova API endpoint.
72
+ ) -> dict:
73
+ """Build parameters for SambaNova chat completion request.
74
+
75
+ SambaNova doesn't support some OpenAI parameters like frequency_penalty,
76
+ presence_penalty, and seed.
73
77
 
74
78
  Args:
75
- context: OpenAI LLM context containing tools and configuration.
76
- messages: List of chat completion message parameters.
79
+ context: The LLM context containing tools and configuration.
80
+ messages: List of chat completion messages to send.
77
81
 
78
82
  Returns:
79
- Chat completion response stream from SambaNova API.
83
+ Dictionary of parameters for the chat completion request.
80
84
  """
81
85
  params = {
82
86
  "model": self.model_name,
@@ -92,9 +96,7 @@ class SambaNovaLLMService(OpenAILLMService): # type: ignore
92
96
  }
93
97
 
94
98
  params.update(self._settings["extra"])
95
-
96
- chunks = await self._client.chat.completions.create(**params)
97
- return chunks
99
+ return params
98
100
 
99
101
  @traced_llm # type: ignore
100
102
  async def _process_context(self, context: OpenAILLMContext) -> AsyncStream[ChatCompletionChunk]:
@@ -124,7 +126,7 @@ class SambaNovaLLMService(OpenAILLMService): # type: ignore
124
126
  context
125
127
  )
126
128
 
127
- async for chunk in WatchdogAsyncIterator(chunk_stream, manager=self.task_manager):
129
+ async for chunk in chunk_stream:
128
130
  if chunk.usage:
129
131
  tokens = LLMTokenUsage(
130
132
  prompt_tokens=chunk.usage.prompt_tokens,
@@ -4,6 +4,8 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """SambaNova's Speech-to-Text service implementation for real-time transcription."""
8
+
7
9
  from typing import Any, Optional
8
10
 
9
11
  from pipecat.services.whisper.base_stt import BaseWhisperSTTService, Transcription
@@ -12,16 +14,9 @@ from pipecat.transcriptions.language import Language
12
14
 
13
15
  class SambaNovaSTTService(BaseWhisperSTTService): # type: ignore
14
16
  """SambaNova Whisper speech-to-text service.
17
+
15
18
  Uses SambaNova's Whisper API to convert audio to text.
16
19
  Requires a SambaNova API key set via the api_key parameter or SAMBANOVA_API_KEY environment variable.
17
- Args:
18
- model: Whisper model to use. Defaults to "Whisper-Large-v3".
19
- api_key: SambaNova API key. Defaults to None.
20
- base_url: API base URL. Defaults to "https://api.sambanova.ai/v1".
21
- language: Language of the audio input. Defaults to English.
22
- prompt: Optional text to guide the model's style or continue a previous segment.
23
- temperature: Optional sampling temperature between 0 and 1. Defaults to 0.0.
24
- **kwargs: Additional arguments passed to `pipecat.services.whisper.base_stt.BaseWhisperSTTService`.
25
20
  """
26
21
 
27
22
  def __init__(
@@ -35,6 +30,17 @@ class SambaNovaSTTService(BaseWhisperSTTService): # type: ignore
35
30
  temperature: Optional[float] = None,
36
31
  **kwargs: Any,
37
32
  ) -> None:
33
+ """Initialize SambaNova STT service.
34
+
35
+ Args:
36
+ model: Whisper model to use. Defaults to "Whisper-Large-v3".
37
+ api_key: SambaNova API key. Defaults to None.
38
+ base_url: API base URL. Defaults to "https://api.sambanova.ai/v1".
39
+ language: Language of the audio input. Defaults to English.
40
+ prompt: Optional text to guide the model's style or continue a previous segment.
41
+ temperature: Optional sampling temperature between 0 and 1. Defaults to 0.0.
42
+ **kwargs: Additional arguments passed to `pipecat.services.whisper.base_stt.BaseWhisperSTTService`.
43
+ """
38
44
  super().__init__(
39
45
  model=model,
40
46
  api_key=api_key,
@@ -4,6 +4,8 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Sarvam AI text-to-speech service implementation."""
8
+
7
9
  import base64
8
10
  from typing import AsyncGenerator, Optional
9
11
 
@@ -25,7 +27,14 @@ from pipecat.utils.tracing.service_decorators import traced_tts
25
27
 
26
28
 
27
29
  def language_to_sarvam_language(language: Language) -> Optional[str]:
28
- """Convert Pipecat Language enum to Sarvam AI language codes."""
30
+ """Convert Pipecat Language enum to Sarvam AI language codes.
31
+
32
+ Args:
33
+ language: The Language enum value to convert.
34
+
35
+ Returns:
36
+ The corresponding Sarvam AI language code, or None if not supported.
37
+ """
29
38
  LANGUAGE_MAP = {
30
39
  Language.BN: "bn-IN", # Bengali
31
40
  Language.EN: "en-IN", # English (India)
@@ -50,17 +59,8 @@ class SarvamTTSService(TTSService):
50
59
  Indian languages. Provides control over voice characteristics like pitch, pace,
51
60
  and loudness.
52
61
 
53
- Args:
54
- api_key: Sarvam AI API subscription key.
55
- voice_id: Speaker voice ID (e.g., "anushka", "meera").
56
- model: TTS model to use ("bulbul:v1" or "bulbul:v2").
57
- aiohttp_session: Shared aiohttp session for making requests.
58
- base_url: Sarvam AI API base URL.
59
- sample_rate: Audio sample rate in Hz (8000, 16000, 22050, 24000).
60
- params: Additional voice and preprocessing parameters.
61
-
62
- Example:
63
- ```python
62
+ Example::
63
+
64
64
  tts = SarvamTTSService(
65
65
  api_key="your-api-key",
66
66
  voice_id="anushka",
@@ -72,10 +72,19 @@ class SarvamTTSService(TTSService):
72
72
  pace=1.2
73
73
  )
74
74
  )
75
- ```
76
75
  """
77
76
 
78
77
  class InputParams(BaseModel):
78
+ """Input parameters for Sarvam TTS configuration.
79
+
80
+ Parameters:
81
+ language: Language for synthesis. Defaults to English (India).
82
+ pitch: Voice pitch adjustment (-0.75 to 0.75). Defaults to 0.0.
83
+ pace: Speech pace multiplier (0.3 to 3.0). Defaults to 1.0.
84
+ loudness: Volume multiplier (0.1 to 3.0). Defaults to 1.0.
85
+ enable_preprocessing: Whether to enable text preprocessing. Defaults to False.
86
+ """
87
+
79
88
  language: Optional[Language] = Language.EN
80
89
  pitch: Optional[float] = Field(default=0.0, ge=-0.75, le=0.75)
81
90
  pace: Optional[float] = Field(default=1.0, ge=0.3, le=3.0)
@@ -94,6 +103,18 @@ class SarvamTTSService(TTSService):
94
103
  params: Optional[InputParams] = None,
95
104
  **kwargs,
96
105
  ):
106
+ """Initialize the Sarvam TTS service.
107
+
108
+ Args:
109
+ api_key: Sarvam AI API subscription key.
110
+ voice_id: Speaker voice ID (e.g., "anushka", "meera"). Defaults to "anushka".
111
+ model: TTS model to use ("bulbul:v1" or "bulbul:v2"). Defaults to "bulbul:v2".
112
+ aiohttp_session: Shared aiohttp session for making requests.
113
+ base_url: Sarvam AI API base URL. Defaults to "https://api.sarvam.ai".
114
+ sample_rate: Audio sample rate in Hz (8000, 16000, 22050, 24000). If None, uses default.
115
+ params: Additional voice and preprocessing parameters. If None, uses defaults.
116
+ **kwargs: Additional arguments passed to parent TTSService.
117
+ """
97
118
  super().__init__(sample_rate=sample_rate, **kwargs)
98
119
 
99
120
  params = params or SarvamTTSService.InputParams()
@@ -116,17 +137,43 @@ class SarvamTTSService(TTSService):
116
137
  self.set_voice(voice_id)
117
138
 
118
139
  def can_generate_metrics(self) -> bool:
140
+ """Check if this service can generate processing metrics.
141
+
142
+ Returns:
143
+ True, as Sarvam service supports metrics generation.
144
+ """
119
145
  return True
120
146
 
121
147
  def language_to_service_language(self, language: Language) -> Optional[str]:
148
+ """Convert a Language enum to Sarvam AI language format.
149
+
150
+ Args:
151
+ language: The language to convert.
152
+
153
+ Returns:
154
+ The Sarvam AI-specific language code, or None if not supported.
155
+ """
122
156
  return language_to_sarvam_language(language)
123
157
 
124
158
  async def start(self, frame: StartFrame):
159
+ """Start the Sarvam TTS service.
160
+
161
+ Args:
162
+ frame: The start frame containing initialization parameters.
163
+ """
125
164
  await super().start(frame)
126
165
  self._settings["sample_rate"] = self.sample_rate
127
166
 
128
167
  @traced_tts
129
168
  async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
169
+ """Generate speech from text using Sarvam AI's API.
170
+
171
+ Args:
172
+ text: The text to synthesize into speech.
173
+
174
+ Yields:
175
+ Frame: Audio frames containing the synthesized speech.
176
+ """
130
177
  logger.debug(f"{self}: Generating TTS [{text}]")
131
178
 
132
179
  try:
@@ -4,6 +4,8 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Simli video service for real-time avatar generation."""
8
+
7
9
  import asyncio
8
10
 
9
11
  import numpy as np
@@ -16,9 +18,10 @@ from pipecat.frames.frames import (
16
18
  OutputImageRawFrame,
17
19
  StartInterruptionFrame,
18
20
  TTSAudioRawFrame,
21
+ TTSStoppedFrame,
22
+ UserStartedSpeakingFrame,
19
23
  )
20
24
  from pipecat.processors.frame_processor import FrameDirection, FrameProcessor, StartFrame
21
- from pipecat.utils.asyncio.watchdog_async_iterator import WatchdogAsyncIterator
22
25
 
23
26
  try:
24
27
  from av.audio.frame import AudioFrame
@@ -31,39 +34,68 @@ except ModuleNotFoundError as e:
31
34
 
32
35
 
33
36
  class SimliVideoService(FrameProcessor):
37
+ """Simli video service for real-time avatar generation.
38
+
39
+ Provides real-time avatar video generation by processing audio frames
40
+ and producing synchronized video output using the Simli API. Handles
41
+ audio resampling, video frame processing, and connection management.
42
+ """
43
+
34
44
  def __init__(
35
45
  self,
36
46
  simli_config: SimliConfig,
37
47
  use_turn_server: bool = False,
38
48
  latency_interval: int = 0,
49
+ simli_url: str = "https://api.simli.ai",
50
+ is_trinity_avatar: bool = False,
39
51
  ):
52
+ """Initialize the Simli video service.
53
+
54
+ Args:
55
+ simli_config: Configuration object for Simli client settings.
56
+ use_turn_server: Whether to use TURN server for connection. Defaults to False.
57
+ latency_interval: Latency interval setting for sending health checks to check the latency to Simli Servers. Defaults to 0.
58
+ simli_url: URL of the simli servers. Can be changed for custom deployments of enterprise users.
59
+ is_trinity_avatar: boolean to tell simli client that this is a Trinity avatar which reduces latency when using Trinity.
60
+
61
+ """
40
62
  super().__init__()
41
- self._simli_client = SimliClient(simli_config, use_turn_server, latency_interval)
63
+ self._initialized = False
64
+ simli_config.maxIdleTime += 5
65
+ simli_config.maxSessionLength += 5
66
+ self._simli_client = SimliClient(
67
+ simli_config,
68
+ use_turn_server,
69
+ latency_interval,
70
+ simliURL=simli_url,
71
+ )
42
72
 
43
- self._pipecat_resampler_event = asyncio.Event()
44
73
  self._pipecat_resampler: AudioResampler = None
74
+ self._pipecat_resampler_event = asyncio.Event()
45
75
  self._simli_resampler = AudioResampler("s16", "mono", 16000)
46
76
 
47
- self._initialized = False
48
77
  self._audio_task: asyncio.Task = None
49
78
  self._video_task: asyncio.Task = None
79
+ self._is_trinity_avatar = is_trinity_avatar
80
+ self._previously_interrupted = is_trinity_avatar
81
+ self._audio_buffer = bytearray()
50
82
 
51
83
  async def _start_connection(self):
84
+ """Start the connection to Simli service and begin processing tasks."""
52
85
  if not self._initialized:
53
86
  await self._simli_client.Initialize()
54
87
  self._initialized = True
55
88
 
56
89
  # Create task to consume and process audio and video
57
- if not self._audio_task:
58
- self._audio_task = self.create_task(self._consume_and_process_audio())
59
-
60
- if not self._video_task:
61
- self._video_task = self.create_task(self._consume_and_process_video())
90
+ await self._simli_client.sendSilence()
91
+ self._audio_task = self.create_task(self._consume_and_process_audio())
92
+ self._video_task = self.create_task(self._consume_and_process_video())
62
93
 
63
94
  async def _consume_and_process_audio(self):
95
+ """Consume audio frames from Simli and push them downstream."""
64
96
  await self._pipecat_resampler_event.wait()
65
97
  audio_iterator = self._simli_client.getAudioStreamIterator()
66
- async for audio_frame in WatchdogAsyncIterator(audio_iterator, manager=self.task_manager):
98
+ async for audio_frame in audio_iterator:
67
99
  resampled_frames = self._pipecat_resampler.resample(audio_frame)
68
100
  for resampled_frame in resampled_frames:
69
101
  audio_array = resampled_frame.to_ndarray()
@@ -78,9 +110,10 @@ class SimliVideoService(FrameProcessor):
78
110
  )
79
111
 
80
112
  async def _consume_and_process_video(self):
113
+ """Consume video frames from Simli and convert them to output frames."""
81
114
  await self._pipecat_resampler_event.wait()
82
115
  video_iterator = self._simli_client.getVideoStreamIterator(targetFormat="rgb24")
83
- async for video_frame in WatchdogAsyncIterator(video_iterator, manager=self.task_manager):
116
+ async for video_frame in video_iterator:
84
117
  # Process the video frame
85
118
  convertedFrame: OutputImageRawFrame = OutputImageRawFrame(
86
119
  image=video_frame.to_rgb().to_image().tobytes(),
@@ -91,9 +124,14 @@ class SimliVideoService(FrameProcessor):
91
124
  await self.push_frame(convertedFrame)
92
125
 
93
126
  async def process_frame(self, frame: Frame, direction: FrameDirection):
127
+ """Process incoming frames and handle Simli video generation.
128
+
129
+ Args:
130
+ frame: The frame to process.
131
+ direction: The direction of frame processing.
132
+ """
94
133
  await super().process_frame(frame, direction)
95
134
  if isinstance(frame, StartFrame):
96
- await self.push_frame(frame, direction)
97
135
  await self._start_connection()
98
136
  elif isinstance(frame, TTSAudioRawFrame):
99
137
  # Send audio frame to Simli
@@ -112,21 +150,44 @@ class SimliVideoService(FrameProcessor):
112
150
 
113
151
  resampled_frames = self._simli_resampler.resample(old_frame)
114
152
  for resampled_frame in resampled_frames:
115
- await self._simli_client.send(
116
- resampled_frame.to_ndarray().astype(np.int16).tobytes()
117
- )
153
+ audioBytes = resampled_frame.to_ndarray().astype(np.int16).tobytes()
154
+ if self._previously_interrupted:
155
+ self._audio_buffer.extend(audioBytes)
156
+ if len(self._audio_buffer) >= 128000:
157
+ try:
158
+ for flushFrame in self._simli_resampler.resample(None):
159
+ self._audio_buffer.extend(
160
+ flushFrame.to_ndarray().astype(np.int16).tobytes()
161
+ )
162
+ finally:
163
+ await self._simli_client.playImmediate(self._audio_buffer)
164
+ self._previously_interrupted = False
165
+ self._audio_buffer = bytearray()
166
+ else:
167
+ await self._simli_client.send(audioBytes)
168
+ return
118
169
  except Exception as e:
119
170
  logger.exception(f"{self} exception: {e}")
171
+ elif isinstance(frame, TTSStoppedFrame):
172
+ try:
173
+ if self._previously_interrupted and len(self._audio_buffer) > 0:
174
+ await self._simli_client.playImmediate(self._audio_buffer)
175
+ self._previously_interrupted = False
176
+ self._audio_buffer = bytearray()
177
+ except Exception as e:
178
+ logger.exception(f"{self} exception: {e}")
179
+ return
120
180
  elif isinstance(frame, (EndFrame, CancelFrame)):
121
181
  await self._stop()
122
- await self.push_frame(frame, direction)
123
- elif isinstance(frame, StartInterruptionFrame):
124
- await self._simli_client.clearBuffer()
125
- await self.push_frame(frame, direction)
126
- else:
127
- await self.push_frame(frame, direction)
182
+ elif isinstance(frame, (StartInterruptionFrame, UserStartedSpeakingFrame)):
183
+ if not self._previously_interrupted:
184
+ await self._simli_client.clearBuffer()
185
+ self._previously_interrupted = self._is_trinity_avatar
186
+
187
+ await self.push_frame(frame, direction)
128
188
 
129
189
  async def _stop(self):
190
+ """Stop the Simli client and cancel processing tasks."""
130
191
  await self._simli_client.stop()
131
192
  if self._audio_task:
132
193
  await self.cancel_task(self._audio_task)
File without changes