dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show
  1. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
  2. dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
  3. pipecat/__init__.py +17 -0
  4. pipecat/adapters/base_llm_adapter.py +36 -1
  5. pipecat/adapters/schemas/direct_function.py +296 -0
  6. pipecat/adapters/schemas/function_schema.py +15 -6
  7. pipecat/adapters/schemas/tools_schema.py +55 -7
  8. pipecat/adapters/services/anthropic_adapter.py +22 -3
  9. pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
  10. pipecat/adapters/services/bedrock_adapter.py +22 -3
  11. pipecat/adapters/services/gemini_adapter.py +16 -3
  12. pipecat/adapters/services/open_ai_adapter.py +17 -2
  13. pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
  14. pipecat/audio/filters/base_audio_filter.py +30 -6
  15. pipecat/audio/filters/koala_filter.py +37 -2
  16. pipecat/audio/filters/krisp_filter.py +59 -6
  17. pipecat/audio/filters/noisereduce_filter.py +37 -0
  18. pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
  19. pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
  20. pipecat/audio/mixers/base_audio_mixer.py +30 -7
  21. pipecat/audio/mixers/soundfile_mixer.py +53 -6
  22. pipecat/audio/resamplers/base_audio_resampler.py +17 -9
  23. pipecat/audio/resamplers/resampy_resampler.py +26 -1
  24. pipecat/audio/resamplers/soxr_resampler.py +32 -1
  25. pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
  26. pipecat/audio/utils.py +194 -1
  27. pipecat/audio/vad/silero.py +60 -3
  28. pipecat/audio/vad/vad_analyzer.py +114 -30
  29. pipecat/clocks/base_clock.py +19 -0
  30. pipecat/clocks/system_clock.py +25 -0
  31. pipecat/extensions/voicemail/__init__.py +0 -0
  32. pipecat/extensions/voicemail/voicemail_detector.py +707 -0
  33. pipecat/frames/frames.py +590 -156
  34. pipecat/metrics/metrics.py +64 -1
  35. pipecat/observers/base_observer.py +58 -19
  36. pipecat/observers/loggers/debug_log_observer.py +56 -64
  37. pipecat/observers/loggers/llm_log_observer.py +8 -1
  38. pipecat/observers/loggers/transcription_log_observer.py +19 -7
  39. pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
  40. pipecat/observers/turn_tracking_observer.py +26 -1
  41. pipecat/pipeline/base_pipeline.py +5 -7
  42. pipecat/pipeline/base_task.py +52 -9
  43. pipecat/pipeline/parallel_pipeline.py +121 -177
  44. pipecat/pipeline/pipeline.py +129 -20
  45. pipecat/pipeline/runner.py +50 -1
  46. pipecat/pipeline/sync_parallel_pipeline.py +132 -32
  47. pipecat/pipeline/task.py +263 -280
  48. pipecat/pipeline/task_observer.py +85 -34
  49. pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
  50. pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
  51. pipecat/processors/aggregators/gated.py +25 -24
  52. pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
  53. pipecat/processors/aggregators/llm_response.py +398 -89
  54. pipecat/processors/aggregators/openai_llm_context.py +161 -13
  55. pipecat/processors/aggregators/sentence.py +25 -14
  56. pipecat/processors/aggregators/user_response.py +28 -3
  57. pipecat/processors/aggregators/vision_image_frame.py +24 -14
  58. pipecat/processors/async_generator.py +28 -0
  59. pipecat/processors/audio/audio_buffer_processor.py +78 -37
  60. pipecat/processors/consumer_processor.py +25 -6
  61. pipecat/processors/filters/frame_filter.py +23 -0
  62. pipecat/processors/filters/function_filter.py +30 -0
  63. pipecat/processors/filters/identity_filter.py +17 -2
  64. pipecat/processors/filters/null_filter.py +24 -1
  65. pipecat/processors/filters/stt_mute_filter.py +56 -21
  66. pipecat/processors/filters/wake_check_filter.py +46 -3
  67. pipecat/processors/filters/wake_notifier_filter.py +21 -3
  68. pipecat/processors/frame_processor.py +488 -131
  69. pipecat/processors/frameworks/langchain.py +38 -3
  70. pipecat/processors/frameworks/rtvi.py +719 -34
  71. pipecat/processors/gstreamer/pipeline_source.py +41 -0
  72. pipecat/processors/idle_frame_processor.py +26 -3
  73. pipecat/processors/logger.py +23 -0
  74. pipecat/processors/metrics/frame_processor_metrics.py +77 -4
  75. pipecat/processors/metrics/sentry.py +42 -4
  76. pipecat/processors/producer_processor.py +34 -14
  77. pipecat/processors/text_transformer.py +22 -10
  78. pipecat/processors/transcript_processor.py +48 -29
  79. pipecat/processors/user_idle_processor.py +31 -21
  80. pipecat/runner/__init__.py +1 -0
  81. pipecat/runner/daily.py +132 -0
  82. pipecat/runner/livekit.py +148 -0
  83. pipecat/runner/run.py +543 -0
  84. pipecat/runner/types.py +67 -0
  85. pipecat/runner/utils.py +515 -0
  86. pipecat/serializers/base_serializer.py +42 -0
  87. pipecat/serializers/exotel.py +17 -6
  88. pipecat/serializers/genesys.py +95 -0
  89. pipecat/serializers/livekit.py +33 -0
  90. pipecat/serializers/plivo.py +16 -15
  91. pipecat/serializers/protobuf.py +37 -1
  92. pipecat/serializers/telnyx.py +18 -17
  93. pipecat/serializers/twilio.py +32 -16
  94. pipecat/services/ai_service.py +5 -3
  95. pipecat/services/anthropic/llm.py +113 -43
  96. pipecat/services/assemblyai/models.py +63 -5
  97. pipecat/services/assemblyai/stt.py +64 -11
  98. pipecat/services/asyncai/__init__.py +0 -0
  99. pipecat/services/asyncai/tts.py +501 -0
  100. pipecat/services/aws/llm.py +185 -111
  101. pipecat/services/aws/stt.py +217 -23
  102. pipecat/services/aws/tts.py +118 -52
  103. pipecat/services/aws/utils.py +101 -5
  104. pipecat/services/aws_nova_sonic/aws.py +82 -64
  105. pipecat/services/aws_nova_sonic/context.py +15 -6
  106. pipecat/services/azure/common.py +10 -2
  107. pipecat/services/azure/image.py +32 -0
  108. pipecat/services/azure/llm.py +9 -7
  109. pipecat/services/azure/stt.py +65 -2
  110. pipecat/services/azure/tts.py +154 -23
  111. pipecat/services/cartesia/stt.py +125 -8
  112. pipecat/services/cartesia/tts.py +102 -38
  113. pipecat/services/cerebras/llm.py +15 -23
  114. pipecat/services/deepgram/stt.py +19 -11
  115. pipecat/services/deepgram/tts.py +36 -0
  116. pipecat/services/deepseek/llm.py +14 -23
  117. pipecat/services/elevenlabs/tts.py +330 -64
  118. pipecat/services/fal/image.py +43 -0
  119. pipecat/services/fal/stt.py +48 -10
  120. pipecat/services/fireworks/llm.py +14 -21
  121. pipecat/services/fish/tts.py +109 -9
  122. pipecat/services/gemini_multimodal_live/__init__.py +1 -0
  123. pipecat/services/gemini_multimodal_live/events.py +83 -2
  124. pipecat/services/gemini_multimodal_live/file_api.py +189 -0
  125. pipecat/services/gemini_multimodal_live/gemini.py +218 -21
  126. pipecat/services/gladia/config.py +17 -10
  127. pipecat/services/gladia/stt.py +82 -36
  128. pipecat/services/google/frames.py +40 -0
  129. pipecat/services/google/google.py +2 -0
  130. pipecat/services/google/image.py +39 -2
  131. pipecat/services/google/llm.py +176 -58
  132. pipecat/services/google/llm_openai.py +26 -4
  133. pipecat/services/google/llm_vertex.py +37 -15
  134. pipecat/services/google/rtvi.py +41 -0
  135. pipecat/services/google/stt.py +65 -17
  136. pipecat/services/google/test-google-chirp.py +45 -0
  137. pipecat/services/google/tts.py +390 -19
  138. pipecat/services/grok/llm.py +8 -6
  139. pipecat/services/groq/llm.py +8 -6
  140. pipecat/services/groq/stt.py +13 -9
  141. pipecat/services/groq/tts.py +40 -0
  142. pipecat/services/hamsa/__init__.py +9 -0
  143. pipecat/services/hamsa/stt.py +241 -0
  144. pipecat/services/heygen/__init__.py +5 -0
  145. pipecat/services/heygen/api.py +281 -0
  146. pipecat/services/heygen/client.py +620 -0
  147. pipecat/services/heygen/video.py +338 -0
  148. pipecat/services/image_service.py +5 -3
  149. pipecat/services/inworld/__init__.py +1 -0
  150. pipecat/services/inworld/tts.py +592 -0
  151. pipecat/services/llm_service.py +127 -45
  152. pipecat/services/lmnt/tts.py +80 -7
  153. pipecat/services/mcp_service.py +85 -44
  154. pipecat/services/mem0/memory.py +42 -13
  155. pipecat/services/minimax/tts.py +74 -15
  156. pipecat/services/mistral/__init__.py +0 -0
  157. pipecat/services/mistral/llm.py +185 -0
  158. pipecat/services/moondream/vision.py +55 -10
  159. pipecat/services/neuphonic/tts.py +275 -48
  160. pipecat/services/nim/llm.py +8 -6
  161. pipecat/services/ollama/llm.py +27 -7
  162. pipecat/services/openai/base_llm.py +54 -16
  163. pipecat/services/openai/image.py +30 -0
  164. pipecat/services/openai/llm.py +7 -5
  165. pipecat/services/openai/stt.py +13 -9
  166. pipecat/services/openai/tts.py +42 -10
  167. pipecat/services/openai_realtime_beta/azure.py +11 -9
  168. pipecat/services/openai_realtime_beta/context.py +7 -5
  169. pipecat/services/openai_realtime_beta/events.py +10 -7
  170. pipecat/services/openai_realtime_beta/openai.py +37 -18
  171. pipecat/services/openpipe/llm.py +30 -24
  172. pipecat/services/openrouter/llm.py +9 -7
  173. pipecat/services/perplexity/llm.py +15 -19
  174. pipecat/services/piper/tts.py +26 -12
  175. pipecat/services/playht/tts.py +227 -65
  176. pipecat/services/qwen/llm.py +8 -6
  177. pipecat/services/rime/tts.py +128 -17
  178. pipecat/services/riva/stt.py +160 -22
  179. pipecat/services/riva/tts.py +67 -2
  180. pipecat/services/sambanova/llm.py +19 -17
  181. pipecat/services/sambanova/stt.py +14 -8
  182. pipecat/services/sarvam/tts.py +60 -13
  183. pipecat/services/simli/video.py +82 -21
  184. pipecat/services/soniox/__init__.py +0 -0
  185. pipecat/services/soniox/stt.py +398 -0
  186. pipecat/services/speechmatics/stt.py +29 -17
  187. pipecat/services/stt_service.py +47 -11
  188. pipecat/services/tavus/video.py +94 -25
  189. pipecat/services/together/llm.py +8 -6
  190. pipecat/services/tts_service.py +77 -53
  191. pipecat/services/ultravox/stt.py +46 -43
  192. pipecat/services/vision_service.py +5 -3
  193. pipecat/services/websocket_service.py +12 -11
  194. pipecat/services/whisper/base_stt.py +58 -12
  195. pipecat/services/whisper/stt.py +69 -58
  196. pipecat/services/xtts/tts.py +59 -2
  197. pipecat/sync/base_notifier.py +19 -0
  198. pipecat/sync/event_notifier.py +24 -0
  199. pipecat/tests/utils.py +73 -5
  200. pipecat/transcriptions/language.py +24 -0
  201. pipecat/transports/base_input.py +112 -8
  202. pipecat/transports/base_output.py +235 -13
  203. pipecat/transports/base_transport.py +119 -0
  204. pipecat/transports/local/audio.py +76 -0
  205. pipecat/transports/local/tk.py +84 -0
  206. pipecat/transports/network/fastapi_websocket.py +174 -15
  207. pipecat/transports/network/small_webrtc.py +383 -39
  208. pipecat/transports/network/webrtc_connection.py +214 -8
  209. pipecat/transports/network/websocket_client.py +171 -1
  210. pipecat/transports/network/websocket_server.py +147 -9
  211. pipecat/transports/services/daily.py +792 -70
  212. pipecat/transports/services/helpers/daily_rest.py +122 -129
  213. pipecat/transports/services/livekit.py +339 -4
  214. pipecat/transports/services/tavus.py +273 -38
  215. pipecat/utils/asyncio/task_manager.py +92 -186
  216. pipecat/utils/base_object.py +83 -1
  217. pipecat/utils/network.py +2 -0
  218. pipecat/utils/string.py +114 -58
  219. pipecat/utils/text/base_text_aggregator.py +44 -13
  220. pipecat/utils/text/base_text_filter.py +46 -0
  221. pipecat/utils/text/markdown_text_filter.py +70 -14
  222. pipecat/utils/text/pattern_pair_aggregator.py +18 -14
  223. pipecat/utils/text/simple_text_aggregator.py +43 -2
  224. pipecat/utils/text/skip_tags_aggregator.py +21 -13
  225. pipecat/utils/time.py +36 -0
  226. pipecat/utils/tracing/class_decorators.py +32 -7
  227. pipecat/utils/tracing/conversation_context_provider.py +12 -2
  228. pipecat/utils/tracing/service_attributes.py +80 -64
  229. pipecat/utils/tracing/service_decorators.py +48 -21
  230. pipecat/utils/tracing/setup.py +13 -7
  231. pipecat/utils/tracing/turn_context_provider.py +12 -2
  232. pipecat/utils/tracing/turn_trace_observer.py +27 -0
  233. pipecat/utils/utils.py +14 -14
  234. dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
  235. pipecat/examples/daily_runner.py +0 -64
  236. pipecat/examples/run.py +0 -265
  237. pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
  238. pipecat/utils/asyncio/watchdog_event.py +0 -42
  239. pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
  240. pipecat/utils/asyncio/watchdog_queue.py +0 -48
  241. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
  242. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
  243. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
  244. /pipecat/{examples → extensions}/__init__.py +0 -0
@@ -4,6 +4,8 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Configuration for the Gladia STT service."""
8
+
7
9
  from typing import Any, Dict, List, Optional, Union
8
10
 
9
11
  from pydantic import BaseModel
@@ -14,7 +16,7 @@ from pipecat.transcriptions.language import Language
14
16
  class LanguageConfig(BaseModel):
15
17
  """Configuration for language detection and handling.
16
18
 
17
- Attributes:
19
+ Parameters:
18
20
  languages: List of language codes to use for transcription
19
21
  code_switching: Whether to auto-detect language changes during transcription
20
22
  """
@@ -26,7 +28,7 @@ class LanguageConfig(BaseModel):
26
28
  class PreProcessingConfig(BaseModel):
27
29
  """Configuration for audio pre-processing options.
28
30
 
29
- Attributes:
31
+ Parameters:
30
32
  speech_threshold: Sensitivity for speech detection (0-1)
31
33
  """
32
34
 
@@ -36,7 +38,7 @@ class PreProcessingConfig(BaseModel):
36
38
  class CustomVocabularyItem(BaseModel):
37
39
  """Represents a custom vocabulary item with an intensity value.
38
40
 
39
- Attributes:
41
+ Parameters:
40
42
  value: The vocabulary word or phrase
41
43
  intensity: The bias intensity for this vocabulary item (0-1)
42
44
  """
@@ -48,7 +50,7 @@ class CustomVocabularyItem(BaseModel):
48
50
  class CustomVocabularyConfig(BaseModel):
49
51
  """Configuration for custom vocabulary.
50
52
 
51
- Attributes:
53
+ Parameters:
52
54
  vocabulary: List of words/phrases or CustomVocabularyItem objects
53
55
  default_intensity: Default intensity for simple string vocabulary items
54
56
  """
@@ -60,7 +62,7 @@ class CustomVocabularyConfig(BaseModel):
60
62
  class CustomSpellingConfig(BaseModel):
61
63
  """Configuration for custom spelling rules.
62
64
 
63
- Attributes:
65
+ Parameters:
64
66
  spelling_dictionary: Mapping of correct spellings to phonetic variations
65
67
  """
66
68
 
@@ -70,7 +72,7 @@ class CustomSpellingConfig(BaseModel):
70
72
  class TranslationConfig(BaseModel):
71
73
  """Configuration for real-time translation.
72
74
 
73
- Attributes:
75
+ Parameters:
74
76
  target_languages: List of target language codes for translation
75
77
  model: Translation model to use ("base" or "enhanced")
76
78
  match_original_utterances: Whether to align translations with original utterances
@@ -92,7 +94,7 @@ class TranslationConfig(BaseModel):
92
94
  class RealtimeProcessingConfig(BaseModel):
93
95
  """Configuration for real-time processing features.
94
96
 
95
- Attributes:
97
+ Parameters:
96
98
  words_accurate_timestamps: Whether to provide per-word timestamps
97
99
  custom_vocabulary: Whether to enable custom vocabulary
98
100
  custom_vocabulary_config: Custom vocabulary configuration
@@ -118,7 +120,7 @@ class RealtimeProcessingConfig(BaseModel):
118
120
  class MessagesConfig(BaseModel):
119
121
  """Configuration for controlling which message types are sent via WebSocket.
120
122
 
121
- Attributes:
123
+ Parameters:
122
124
  receive_partial_transcripts: Whether to receive intermediate transcription results
123
125
  receive_final_transcripts: Whether to receive final transcription results
124
126
  receive_speech_events: Whether to receive speech begin/end events
@@ -144,14 +146,19 @@ class MessagesConfig(BaseModel):
144
146
  class GladiaInputParams(BaseModel):
145
147
  """Configuration parameters for the Gladia STT service.
146
148
 
147
- Attributes:
149
+ Parameters:
148
150
  encoding: Audio encoding format
149
151
  bit_depth: Audio bit depth
150
152
  channels: Number of audio channels
151
153
  custom_metadata: Additional metadata to include with requests
152
154
  endpointing: Silence duration in seconds to mark end of speech
153
155
  maximum_duration_without_endpointing: Maximum utterance duration without silence
154
- language: DEPRECATED - Use language_config instead
156
+ language: Language code for transcription
157
+
158
+ .. deprecated:: 0.0.62
159
+ The 'language' parameter is deprecated and will be removed in a future version.
160
+ Use 'language_config' instead.
161
+
155
162
  language_config: Detailed language configuration
156
163
  pre_processing: Audio pre-processing options
157
164
  realtime_processing: Real-time processing features
@@ -4,11 +4,17 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Gladia Speech-to-Text (STT) service implementation.
8
+
9
+ This module provides a Speech-to-Text service using Gladia's real-time WebSocket API,
10
+ supporting multiple languages, custom vocabulary, and various audio processing options.
11
+ """
12
+
7
13
  import asyncio
8
14
  import base64
9
15
  import json
10
16
  import warnings
11
- from typing import Any, AsyncGenerator, Dict, List, Optional # Add List
17
+ from typing import Any, AsyncGenerator, Dict, Literal, List, Optional
12
18
 
13
19
  import aiohttp
14
20
  from loguru import logger
@@ -31,12 +37,13 @@ from pipecat.services.gladia.config import (
31
37
  )
32
38
  from pipecat.services.stt_service import STTService
33
39
  from pipecat.transcriptions.language import Language
34
- from pipecat.utils.asyncio.watchdog_async_iterator import WatchdogAsyncIterator
35
40
  from pipecat.utils.time import time_now_iso8601
36
41
  from pipecat.utils.tracing.service_decorators import traced_stt
37
42
 
38
43
  try:
39
44
  import websockets
45
+ from websockets.asyncio.client import connect as websocket_connect
46
+ from websockets.protocol import State
40
47
  except ModuleNotFoundError as e:
41
48
  logger.error(f"Exception: {e}")
42
49
  logger.error("In order to use Gladia, you need to `pip install pipecat-ai[gladia]`.")
@@ -47,10 +54,10 @@ def language_to_gladia_language(language: Language) -> Optional[str]:
47
54
  """Convert a Language enum to Gladia's language code format.
48
55
 
49
56
  Args:
50
- language: The Language enum value to convert
57
+ language: The Language enum value to convert.
51
58
 
52
59
  Returns:
53
- The Gladia language code string or None if not supported
60
+ The Gladia language code string or None if not supported.
54
61
  """
55
62
  BASE_LANGUAGES = {
56
63
  Language.AF: "af",
@@ -186,8 +193,12 @@ class GladiaSTTService(STTService):
186
193
 
187
194
  This service connects to Gladia's WebSocket API for real-time transcription
188
195
  with support for multiple languages, custom vocabulary, and various processing options.
196
+ Provides automatic reconnection, audio buffering, and comprehensive error handling.
189
197
 
190
198
  For complete API documentation, see: https://docs.gladia.io/api-reference/v2/live/init
199
+
200
+ .. deprecated:: 0.0.62
201
+ Use :class:`~pipecat.services.gladia.config.GladiaInputParams` directly instead.
191
202
  """
192
203
 
193
204
  # Maintain backward compatibility
@@ -197,6 +208,7 @@ class GladiaSTTService(STTService):
197
208
  self,
198
209
  *,
199
210
  api_key: str,
211
+ region: Literal["us-west", "eu-west"] | None = None,
200
212
  url: str = "https://api.gladia.io/v2/live",
201
213
  confidence: float = 0.5,
202
214
  sample_rate: Optional[int] = None,
@@ -210,16 +222,17 @@ class GladiaSTTService(STTService):
210
222
  """Initialize the Gladia STT service.
211
223
 
212
224
  Args:
213
- api_key: Gladia API key
214
- url: Gladia API URL
215
- confidence: Minimum confidence threshold for transcriptions
216
- sample_rate: Audio sample rate in Hz
217
- model: Model to use ("solaria-1")
218
- params: Additional configuration parameters
219
- max_reconnection_attempts: Maximum number of reconnection attempts
220
- reconnection_delay: Initial delay between reconnection attempts (exponential backoff)
221
- max_buffer_size: Maximum size of audio buffer in bytes
222
- **kwargs: Additional arguments passed to the STTService
225
+ api_key: Gladia API key for authentication.
226
+ region: Region used to process audio. eu-west or us-west. Defaults to eu-west.
227
+ url: Gladia API URL. Defaults to "https://api.gladia.io/v2/live".
228
+ confidence: Minimum confidence threshold for transcriptions (0.0-1.0).
229
+ sample_rate: Audio sample rate in Hz. If None, uses service default.
230
+ model: Model to use for transcription. Defaults to "solaria-1".
231
+ params: Additional configuration parameters for Gladia service.
232
+ max_reconnection_attempts: Maximum number of reconnection attempts. Defaults to 5.
233
+ reconnection_delay: Initial delay between reconnection attempts in seconds.
234
+ max_buffer_size: Maximum size of audio buffer in bytes. Defaults to 20MB.
235
+ **kwargs: Additional arguments passed to the STTService parent class.
223
236
  """
224
237
  super().__init__(sample_rate=sample_rate, **kwargs)
225
238
  vocab: Optional[List[str]] = kwargs.pop("vocab", None) # Get vocab from kwargs
@@ -236,6 +249,7 @@ class GladiaSTTService(STTService):
236
249
  )
237
250
 
238
251
  self._api_key = api_key
252
+ self._region = region
239
253
  self._url = url
240
254
  self.set_model_name(model)
241
255
  self._confidence = confidence
@@ -280,10 +294,22 @@ class GladiaSTTService(STTService):
280
294
  self._should_reconnect = True
281
295
 
282
296
  def can_generate_metrics(self) -> bool:
297
+ """Check if the service can generate performance metrics.
298
+
299
+ Returns:
300
+ True, indicating this service supports metrics generation.
301
+ """
283
302
  return True
284
303
 
285
304
  def language_to_service_language(self, language: Language) -> Optional[str]:
286
- """Convert pipecat Language enum to Gladia's language code."""
305
+ """Convert pipecat Language enum to Gladia's language code.
306
+
307
+ Args:
308
+ language: The Language enum value to convert.
309
+
310
+ Returns:
311
+ The Gladia language code string or None if not supported.
312
+ """
287
313
  return language_to_gladia_language(language)
288
314
 
289
315
  def _prepare_settings(self) -> Dict[str, Any]:
@@ -338,7 +364,11 @@ class GladiaSTTService(STTService):
338
364
  return settings
339
365
 
340
366
  async def start(self, frame: StartFrame):
341
- """Start the Gladia STT websocket connection."""
367
+ """Start the Gladia STT websocket connection.
368
+
369
+ Args:
370
+ frame: The start frame triggering service startup.
371
+ """
342
372
  await super().start(frame)
343
373
  if self._connection_task:
344
374
  return
@@ -347,7 +377,11 @@ class GladiaSTTService(STTService):
347
377
  self._connection_task = self.create_task(self._connection_handler())
348
378
 
349
379
  async def stop(self, frame: EndFrame):
350
- """Stop the Gladia STT websocket connection."""
380
+ """Stop the Gladia STT websocket connection.
381
+
382
+ Args:
383
+ frame: The end frame triggering service shutdown.
384
+ """
351
385
  await super().stop(frame)
352
386
  self._should_reconnect = False
353
387
  await self._send_stop_recording()
@@ -359,7 +393,11 @@ class GladiaSTTService(STTService):
359
393
  await self._cleanup_connection()
360
394
 
361
395
  async def cancel(self, frame: CancelFrame):
362
- """Cancel the Gladia STT websocket connection."""
396
+ """Cancel the Gladia STT websocket connection.
397
+
398
+ Args:
399
+ frame: The cancel frame triggering service cancellation.
400
+ """
363
401
  await super().cancel(frame)
364
402
  self._should_reconnect = False
365
403
 
@@ -370,7 +408,14 @@ class GladiaSTTService(STTService):
370
408
  await self._cleanup_connection()
371
409
 
372
410
  async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
373
- """Run speech-to-text on audio data."""
411
+ """Run speech-to-text on audio data.
412
+
413
+ Args:
414
+ audio: Raw audio bytes to transcribe.
415
+
416
+ Yields:
417
+ None (processing is handled asynchronously via WebSocket).
418
+ """
374
419
  await self.start_ttfb_metrics()
375
420
  await self.start_processing_metrics()
376
421
 
@@ -385,7 +430,7 @@ class GladiaSTTService(STTService):
385
430
  logger.warning(f"Audio buffer exceeded max size, trimmed {trim_size} bytes")
386
431
 
387
432
  # Send audio if connected
388
- if self._connection_active and self._websocket and not self._websocket.closed:
433
+ if self._connection_active and self._websocket and self._websocket.state is State.OPEN:
389
434
  try:
390
435
  await self._send_audio(audio)
391
436
  except websockets.exceptions.ConnectionClosed as e:
@@ -406,11 +451,11 @@ class GladiaSTTService(STTService):
406
451
  self._reconnection_attempts = 0
407
452
 
408
453
  # Connect with automatic reconnection
409
- async with websockets.connect(self._session_url) as websocket:
454
+ async with websocket_connect(self._session_url) as websocket:
410
455
  try:
411
456
  self._websocket = websocket
412
457
  self._connection_active = True
413
- logger.info("Connected to Gladia WebSocket")
458
+ logger.debug(f"{self} Connected to Gladia WebSocket")
414
459
 
415
460
  # Send buffered audio if any
416
461
  await self._send_buffered_audio()
@@ -465,10 +510,14 @@ class GladiaSTTService(STTService):
465
510
 
466
511
  async def _setup_gladia(self, settings: Dict[str, Any]):
467
512
  async with aiohttp.ClientSession() as session:
513
+ params = {}
514
+ if self._region:
515
+ params["region"] = self._region
468
516
  async with session.post(
469
517
  self._url,
470
- headers={"X-Gladia-Key": self._api_key, "Content-Type": "application/json"},
518
+ headers={"X-Gladia-Key": self._api_key},
471
519
  json=settings,
520
+ params=params,
472
521
  ) as response:
473
522
  if response.ok:
474
523
  return await response.json()
@@ -490,7 +539,7 @@ class GladiaSTTService(STTService):
490
539
 
491
540
  async def _send_audio(self, audio: bytes):
492
541
  """Send audio chunk with proper message format."""
493
- if self._websocket and not self._websocket.closed:
542
+ if self._websocket and self._websocket.state is State.OPEN:
494
543
  data = base64.b64encode(audio).decode("utf-8")
495
544
  message = {"type": "audio_chunk", "data": {"chunk": data}}
496
545
  await self._websocket.send(json.dumps(message))
@@ -499,22 +548,21 @@ class GladiaSTTService(STTService):
499
548
  """Send any buffered audio after reconnection."""
500
549
  async with self._buffer_lock:
501
550
  if self._audio_buffer:
502
- logger.info(f"Sending {len(self._audio_buffer)} bytes of buffered audio")
551
+ logger.debug(f"{self} Sending {len(self._audio_buffer)} bytes of buffered audio")
503
552
  await self._send_audio(bytes(self._audio_buffer))
504
553
 
505
554
  async def _send_stop_recording(self):
506
- if self._websocket and not self._websocket.closed:
555
+ if self._websocket and self._websocket.state is State.OPEN:
507
556
  await self._websocket.send(json.dumps({"type": "stop_recording"}))
508
557
 
509
558
  async def _keepalive_task_handler(self):
510
559
  """Send periodic empty audio chunks to keep the connection alive."""
511
560
  try:
512
- KEEPALIVE_SLEEP = 20 if self.task_manager.task_watchdog_enabled else 3
561
+ KEEPALIVE_SLEEP = 20
513
562
  while self._connection_active:
514
- self.reset_watchdog()
515
563
  # Send keepalive (Gladia times out after 30 seconds)
516
564
  await asyncio.sleep(KEEPALIVE_SLEEP)
517
- if self._websocket and not self._websocket.closed:
565
+ if self._websocket and self._websocket.state is State.OPEN:
518
566
  # Send an empty audio chunk as keepalive
519
567
  empty_audio = b""
520
568
  await self._send_audio(empty_audio)
@@ -528,7 +576,7 @@ class GladiaSTTService(STTService):
528
576
 
529
577
  async def _receive_task_handler(self):
530
578
  try:
531
- async for message in WatchdogAsyncIterator(self._websocket, manager=self.task_manager):
579
+ async for message in self._websocket:
532
580
  content = json.loads(message)
533
581
 
534
582
  # Handle audio chunk acknowledgments
@@ -553,7 +601,7 @@ class GladiaSTTService(STTService):
553
601
  await self.push_frame(
554
602
  TranscriptionFrame(
555
603
  transcript,
556
- "",
604
+ self._user_id,
557
605
  time_now_iso8601(),
558
606
  language,
559
607
  result=content,
@@ -568,7 +616,7 @@ class GladiaSTTService(STTService):
568
616
  await self.push_frame(
569
617
  InterimTranscriptionFrame(
570
618
  transcript,
571
- "",
619
+ self._user_id,
572
620
  time_now_iso8601(),
573
621
  language,
574
622
  result=content,
@@ -586,8 +634,6 @@ class GladiaSTTService(STTService):
586
634
  translation, "", time_now_iso8601(), translated_language
587
635
  )
588
636
  )
589
-
590
- self.reset_watchdog()
591
637
  except websockets.exceptions.ConnectionClosed:
592
638
  # Expected when closing the connection
593
639
  pass
@@ -604,8 +650,8 @@ class GladiaSTTService(STTService):
604
650
  self._should_reconnect = False
605
651
  return False
606
652
  delay = self._reconnection_delay * (2 ** (self._reconnection_attempts - 1))
607
- logger.info(
608
- f"Reconnecting in {delay} seconds (attempt {self._reconnection_attempts}/{self._max_reconnection_attempts})"
653
+ logger.debug(
654
+ f"{self} Reconnecting in {delay} seconds (attempt {self._reconnection_attempts}/{self._max_reconnection_attempts})"
609
655
  )
610
656
  await asyncio.sleep(delay)
611
657
  return True
@@ -4,6 +4,13 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Google AI service frames for search and grounding functionality.
8
+
9
+ This module defines specialized frame types for handling search results
10
+ and grounding metadata from Google AI models, particularly for Gemini
11
+ models that support web search and fact grounding capabilities.
12
+ """
13
+
7
14
  from dataclasses import dataclass, field
8
15
  from typing import List, Optional
9
16
 
@@ -12,12 +19,27 @@ from pipecat.frames.frames import DataFrame
12
19
 
13
20
  @dataclass
14
21
  class LLMSearchResult:
22
+ """Represents a single search result with confidence scores.
23
+
24
+ Parameters:
25
+ text: The search result text content.
26
+ confidence: List of confidence scores associated with the result.
27
+ """
28
+
15
29
  text: str
16
30
  confidence: List[float] = field(default_factory=list)
17
31
 
18
32
 
19
33
  @dataclass
20
34
  class LLMSearchOrigin:
35
+ """Represents the origin source of search results.
36
+
37
+ Parameters:
38
+ site_uri: URI of the source website.
39
+ site_title: Title of the source website.
40
+ results: List of search results from this origin.
41
+ """
42
+
21
43
  site_uri: Optional[str] = None
22
44
  site_title: Optional[str] = None
23
45
  results: List[LLMSearchResult] = field(default_factory=list)
@@ -25,9 +47,27 @@ class LLMSearchOrigin:
25
47
 
26
48
  @dataclass
27
49
  class LLMSearchResponseFrame(DataFrame):
50
+ """Frame containing search results and grounding information from Google AI models.
51
+
52
+ This frame is used to convey search results and grounding metadata
53
+ from Google AI models that support web search capabilities. It includes
54
+ the search result text, rendered content, and detailed origin information
55
+ with confidence scores.
56
+
57
+ Parameters:
58
+ search_result: The main search result text.
59
+ rendered_content: Rendered content from the search entry point.
60
+ origins: List of search result origins with detailed information.
61
+ """
62
+
28
63
  search_result: Optional[str] = None
29
64
  rendered_content: Optional[str] = None
30
65
  origins: List[LLMSearchOrigin] = field(default_factory=list)
31
66
 
32
67
  def __str__(self):
68
+ """Return string representation of the search response frame.
69
+
70
+ Returns:
71
+ String representation showing search result and origins.
72
+ """
33
73
  return f"LLMSearchResponseFrame(search_result={self.search_result}, origins={self.origins})"
@@ -4,6 +4,8 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Google services module for Pipecat."""
8
+
7
9
  import sys
8
10
 
9
11
  from pipecat.services import DeprecatedModuleProxy
@@ -4,6 +4,12 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """Google AI image generation service implementation.
8
+
9
+ This module provides integration with Google's Imagen model for generating
10
+ images from text prompts using the Google AI API.
11
+ """
12
+
7
13
  import io
8
14
  import os
9
15
 
@@ -29,7 +35,22 @@ except ModuleNotFoundError as e:
29
35
 
30
36
 
31
37
  class GoogleImageGenService(ImageGenService):
38
+ """Google AI image generation service using Imagen models.
39
+
40
+ Provides text-to-image generation capabilities using Google's Imagen models
41
+ through the Google AI API. Supports multiple image generation and negative
42
+ prompting for enhanced control over generated content.
43
+ """
44
+
32
45
  class InputParams(BaseModel):
46
+ """Configuration parameters for Google image generation.
47
+
48
+ Parameters:
49
+ number_of_images: Number of images to generate (1-8). Defaults to 1.
50
+ model: Google Imagen model to use. Defaults to "imagen-3.0-generate-002".
51
+ negative_prompt: Optional negative prompt to guide what not to include.
52
+ """
53
+
33
54
  number_of_images: int = Field(default=1, ge=1, le=8)
34
55
  model: str = Field(default="imagen-3.0-generate-002")
35
56
  negative_prompt: Optional[str] = Field(default=None)
@@ -41,22 +62,38 @@ class GoogleImageGenService(ImageGenService):
41
62
  params: Optional[InputParams] = None,
42
63
  **kwargs,
43
64
  ):
65
+ """Initialize the GoogleImageGenService with API key and parameters.
66
+
67
+ Args:
68
+ api_key: Google AI API key for authentication.
69
+ params: Configuration parameters for image generation. Defaults to InputParams().
70
+ **kwargs: Additional arguments passed to the parent ImageGenService.
71
+ """
44
72
  super().__init__(**kwargs)
45
73
  self._params = params or GoogleImageGenService.InputParams()
46
74
  self._client = genai.Client(api_key=api_key)
47
75
  self.set_model_name(self._params.model)
48
76
 
49
77
  def can_generate_metrics(self) -> bool:
78
+ """Check if this service can generate processing metrics.
79
+
80
+ Returns:
81
+ True, as Google image generation service supports metrics.
82
+ """
50
83
  return True
51
84
 
52
85
  async def run_image_gen(self, prompt: str) -> AsyncGenerator[Frame, None]:
53
86
  """Generate images from a text prompt using Google's Imagen model.
54
87
 
55
88
  Args:
56
- prompt (str): The text description to generate images from.
89
+ prompt: The text description to generate images from.
57
90
 
58
91
  Yields:
59
- Frame: Generated image frames or error frames.
92
+ Frame: Generated URLImageRawFrame objects containing the generated
93
+ images, or ErrorFrame objects if generation fails.
94
+
95
+ Raises:
96
+ Exception: If there are issues with the Google AI API or image processing.
60
97
  """
61
98
  logger.debug(f"Generating image from prompt: {prompt}")
62
99
  await self.start_ttfb_metrics()