dv-pipecat-ai 0.0.85.dev840__py3-none-any.whl → 0.0.85.dev842__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dv-pipecat-ai
3
- Version: 0.0.85.dev840
3
+ Version: 0.0.85.dev842
4
4
  Summary: An open source framework for voice (and multimodal) assistants
5
5
  License-Expression: BSD-2-Clause
6
6
  Project-URL: Source, https://github.com/pipecat-ai/pipecat
@@ -1,4 +1,4 @@
1
- dv_pipecat_ai-0.0.85.dev840.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
1
+ dv_pipecat_ai-0.0.85.dev842.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
2
2
  pipecat/__init__.py,sha256=j0Xm6adxHhd7D06dIyyPV_GlBYLlBnTAERVvD_jAARQ,861
3
3
  pipecat/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  pipecat/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -210,7 +210,7 @@ pipecat/services/cartesia/tts.py,sha256=I_OZCINywkDXmYzFL35MjSN8cAuNEaJs7nj0YB_o
210
210
  pipecat/services/cerebras/__init__.py,sha256=5zBmqq9Zfcl-HC7ylekVS5qrRedbl1mAeEwUT-T-c_o,259
211
211
  pipecat/services/cerebras/llm.py,sha256=-yzSe_6YDGigwzES-LZS4vNXMPugmvsIYEpTySyr5nA,3047
212
212
  pipecat/services/deepgram/__init__.py,sha256=IjRtMI7WytRDdmYVpk2qDWClXUiNgdl7ZkvEAWg1eYE,304
213
- pipecat/services/deepgram/stt.py,sha256=IvdKvo23PxhKoWTJDxuK4Uoo0wCtkFGAE_QrMUoGdYM,13732
213
+ pipecat/services/deepgram/stt.py,sha256=fzKirTjTopwXNQEEPuUOIgk4AMvTJQcrh6H11w13q2c,16185
214
214
  pipecat/services/deepgram/tts.py,sha256=H_2WCJEx3_L4ytrHHRNkA-6GKTd1coou_vvTfiEodpQ,3745
215
215
  pipecat/services/deepgram/flux/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
216
216
  pipecat/services/deepgram/flux/stt.py,sha256=yCZodrHAOShgYy_GbdviX8iAuh36dBgDL41gHMXVxEM,25887
@@ -329,7 +329,7 @@ pipecat/services/sarvam/tts.py,sha256=lrwfdC53kZ7f2QPgNRxzryISNkrJCvNtlZ-19-iXg9
329
329
  pipecat/services/simli/__init__.py,sha256=cbDcqOaGsEgKbGYKpJ1Vv7LN4ZjOWA04sE84WW5vgQI,257
330
330
  pipecat/services/simli/video.py,sha256=Zu2XLvl2Y6VHaWzT9wEdzW9d0EYoZyzYLxjQFyV8vho,8320
331
331
  pipecat/services/soniox/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
332
- pipecat/services/soniox/stt.py,sha256=AhJF2YOzmqgB80x22jocgzr3neYCBMyxzP_WjkYR9Gc,15441
332
+ pipecat/services/soniox/stt.py,sha256=Ndml6QvPQ1WZBvdGT3LSg-LLWwrZ8KlqW8wBBFsQrrM,16509
333
333
  pipecat/services/speechmatics/__init__.py,sha256=Jgq1fqrZVkpWC21D79L1cn5Ub8PnYgnnCaqC5pOlbIc,89
334
334
  pipecat/services/speechmatics/stt.py,sha256=GLGJzlMSeZ1WzTOMjhKXDl5JYkqGhnFTbP3o0ez0hSw,44696
335
335
  pipecat/services/tavus/__init__.py,sha256=SNyyi2Xq6tXIihDG2Bwvmg6Srbd-uWd1RwG-NKWcPuI,257
@@ -416,7 +416,7 @@ pipecat/utils/tracing/service_decorators.py,sha256=fwzxFpi8DJl6BJbK74G0UEB4ccMJg
416
416
  pipecat/utils/tracing/setup.py,sha256=7TEgPNpq6M8lww8OQvf0P9FzYc5A30xICGklVA-fua0,2892
417
417
  pipecat/utils/tracing/turn_context_provider.py,sha256=ikon3plFOx0XbMrH6DdeHttNpb-U0gzMZIm3bWLc9eI,2485
418
418
  pipecat/utils/tracing/turn_trace_observer.py,sha256=dma16SBJpYSOE58YDWy89QzHyQFc_9gQZszKeWixuwc,9725
419
- dv_pipecat_ai-0.0.85.dev840.dist-info/METADATA,sha256=ALfdKasSbWLkqVlc0XSkl9lo6qzO7Wpior0WxVFzWZk,32955
420
- dv_pipecat_ai-0.0.85.dev840.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
421
- dv_pipecat_ai-0.0.85.dev840.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
422
- dv_pipecat_ai-0.0.85.dev840.dist-info/RECORD,,
419
+ dv_pipecat_ai-0.0.85.dev842.dist-info/METADATA,sha256=8uxfODboEJQwB04vWViiyMIo4KyT-pGag_ChfLt8STo,32955
420
+ dv_pipecat_ai-0.0.85.dev842.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
421
+ dv_pipecat_ai-0.0.85.dev842.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
422
+ dv_pipecat_ai-0.0.85.dev842.dist-info/RECORD,,
@@ -62,6 +62,8 @@ class DeepgramSTTService(STTService):
62
62
  sample_rate: Optional[int] = None,
63
63
  live_options: Optional[LiveOptions] = None,
64
64
  addons: Optional[Dict] = None,
65
+ max_connect_retries: int = 3,
66
+ connect_timeout_s: float = 2.5,
65
67
  **kwargs,
66
68
  ):
67
69
  """Initialize the Deepgram STT service.
@@ -77,6 +79,9 @@ class DeepgramSTTService(STTService):
77
79
  sample_rate: Audio sample rate. If None, uses default or live_options value.
78
80
  live_options: Deepgram LiveOptions for detailed configuration.
79
81
  addons: Additional Deepgram features to enable.
82
+ max_connect_retries: Maximum number of connection attempts before giving up.
83
+ connect_timeout_s: Maximum time in seconds to wait for a connection attempt.
84
+ Connection retries wait 100ms between attempts.
80
85
  **kwargs: Additional arguments passed to the parent STTService.
81
86
  """
82
87
  sample_rate = sample_rate or (live_options.sample_rate if live_options else None)
@@ -121,9 +126,9 @@ class DeepgramSTTService(STTService):
121
126
  self._settings = merged_options
122
127
  self._addons = addons
123
128
 
124
- # Connection retry settings
125
- self._max_connect_retries = 3
126
- self._connect_retry_delay_s = 0.1
129
+ # Connection retry settings (100ms delay between retries)
130
+ self._max_connect_retries = max_connect_retries
131
+ self._connect_timeout_s = connect_timeout_s
127
132
 
128
133
  self._client = DeepgramClient(
129
134
  api_key,
@@ -131,8 +136,8 @@ class DeepgramSTTService(STTService):
131
136
  url=base_url,
132
137
  options={
133
138
  "keepalive": "true",
134
- "open_timeout": 3, # Max wait for only 3 seconds for the connection to establish #
135
- # "termination_exception_connect": True, # Enable exception propagation
139
+ # Note: Connection timeout is enforced by asyncio.wait_for() in _connect()
140
+ # with the connect_timeout_s parameter (default 2.0s)
136
141
  },
137
142
  verbose=logging.ERROR, # Enable error level and above logging
138
143
  ),
@@ -227,6 +232,11 @@ class DeepgramSTTService(STTService):
227
232
 
228
233
  for attempt in range(self._max_connect_retries):
229
234
  try:
235
+ # Clean up any previous connection attempt in background (non-blocking)
236
+ if hasattr(self, "_connection") and self._connection is not None:
237
+ old_conn = self._connection
238
+ asyncio.create_task(self._cleanup_abandoned_connection(old_conn))
239
+
230
240
  # Create a new connection object for a clean attempt
231
241
  self._connection: AsyncListenWebSocketClient = self._client.listen.asyncwebsocket.v(
232
242
  "1"
@@ -250,10 +260,25 @@ class DeepgramSTTService(STTService):
250
260
  self._on_utterance_end,
251
261
  )
252
262
 
253
- # Attempt to start the connection (timeout handled by open_timeout config)
254
- if await self._connection.start(options=self._settings, addons=self._addons):
255
- self.logger.info("Successfully connected to Deepgram.")
256
- return # Exit the method on success
263
+ try:
264
+ start_result = await asyncio.wait_for(
265
+ self._connection.start(options=self._settings, addons=self._addons),
266
+ timeout=self._connect_timeout_s,
267
+ )
268
+ except asyncio.TimeoutError:
269
+ self.logger.warning(
270
+ f"Deepgram connection attempt {attempt + 1}/{self._max_connect_retries} timed out after {self._connect_timeout_s} second(s)."
271
+ )
272
+ start_result = False
273
+ except Exception as start_error:
274
+ self.logger.warning(
275
+ f"Deepgram connection attempt {attempt + 1}/{self._max_connect_retries} failed with an exception: {start_error}"
276
+ )
277
+ start_result = False
278
+ else:
279
+ if start_result:
280
+ self.logger.info("Successfully connected to Deepgram.")
281
+ return # Exit the method on success
257
282
 
258
283
  self.logger.warning(
259
284
  f"Deepgram connection attempt {attempt + 1}/{self._max_connect_retries} failed."
@@ -264,17 +289,21 @@ class DeepgramSTTService(STTService):
264
289
  f"Deepgram connection attempt {attempt + 1}/{self._max_connect_retries} failed with an exception: {e}"
265
290
  )
266
291
 
267
- # If this is not the last attempt, wait briefly before retrying
292
+ # If this is not the last attempt, wait 100ms before retrying
268
293
  if attempt < self._max_connect_retries - 1:
269
- self.logger.info(f"Retrying in {self._connect_retry_delay_s} second(s)...")
270
- await asyncio.sleep(self._connect_retry_delay_s)
294
+ self.logger.info("Retrying in 0.1 second(s)...")
295
+ await asyncio.sleep(0.1)
271
296
 
272
- self.logger.error(
297
+ error_msg = (
273
298
  f"{self}: unable to connect to Deepgram after {self._max_connect_retries} attempts."
274
299
  )
300
+ self.logger.error(error_msg)
301
+ await self.push_error(ErrorFrame(error_msg, fatal=True))
275
302
 
276
303
  async def _disconnect(self):
277
- if self._connection.is_connected:
304
+ # Guard against missing connection instance and ensure proper async check
305
+ connection: AsyncListenWebSocketClient = getattr(self, "_connection", None)
306
+ if connection and await connection.is_connected():
278
307
  self.logger.debug("Disconnecting from Deepgram")
279
308
  # Deepgram swallows asyncio.CancelledError internally which prevents
280
309
  # proper cancellation propagation. This issue was found with
@@ -284,7 +313,25 @@ class DeepgramSTTService(STTService):
284
313
  # Deepgram disconnection was still finishing and therefore
285
314
  # preventing the task cancellation that occurs during `cleanup()`.
286
315
  # GH issue: https://github.com/deepgram/deepgram-python-sdk/issues/570
287
- await self._connection.finish()
316
+ await connection.finish()
317
+
318
+ async def _cleanup_abandoned_connection(self, conn: AsyncListenWebSocketClient):
319
+ """Clean up abandoned connection attempt in background (non-blocking).
320
+
321
+ This prevents zombie connections from triggering spurious error events
322
+ when they eventually timeout and call _on_error().
323
+
324
+ Args:
325
+ conn: The abandoned connection object to clean up.
326
+ """
327
+ try:
328
+ # Try to finish with short timeout
329
+ await asyncio.wait_for(conn.finish(), timeout=5)
330
+ self.logger.debug("Successfully cleaned up abandoned connection")
331
+ except Exception as e:
332
+ # Ignore all cleanup errors - connection might not be fully started
333
+ # This is expected and fine - we just want best-effort cleanup
334
+ self.logger.debug(f"Abandoned connection cleanup failed: {e}")
288
335
 
289
336
  async def start_metrics(self):
290
337
  """Start TTFB and processing metrics collection."""
@@ -49,6 +49,33 @@ END_TOKEN = "<end>"
49
49
  FINALIZED_TOKEN = "<fin>"
50
50
 
51
51
 
52
+ class SonioxContextGeneralItem(BaseModel):
53
+ """Represents a key-value pair for structured general context information."""
54
+
55
+ key: str
56
+ value: str
57
+
58
+
59
+ class SonioxContextTranslationTerm(BaseModel):
60
+ """Represents a custom translation mapping for ambiguous or domain-specific terms."""
61
+
62
+ source: str
63
+ target: str
64
+
65
+
66
+ class SonioxContextObject(BaseModel):
67
+ """Context object for models with context_version 2, for Soniox stt-rt-v3-preview and higher.
68
+
69
+ Learn more about context in the documentation:
70
+ https://soniox.com/docs/stt/concepts/context
71
+ """
72
+
73
+ general: Optional[List[SonioxContextGeneralItem]] = None
74
+ text: Optional[str] = None
75
+ terms: Optional[List[str]] = None
76
+ translation_terms: Optional[List[SonioxContextTranslationTerm]] = None
77
+
78
+
52
79
  class SonioxInputParams(BaseModel):
53
80
  """Real-time transcription settings.
54
81
 
@@ -60,9 +87,9 @@ class SonioxInputParams(BaseModel):
60
87
  audio_format: Audio format to use for transcription.
61
88
  num_channels: Number of channels to use for transcription.
62
89
  language_hints: List of language hints to use for transcription.
63
- context: Customization for transcription.
64
- enable_non_final_tokens: Whether to enable non-final tokens. If false, only final tokens will be returned.
65
- max_non_final_tokens_duration_ms: Maximum duration of non-final tokens.
90
+ context: Customization for transcription. String for models with context_version 1 and ContextObject for models with context_version 2.
91
+ enable_speaker_diarization: Whether to enable speaker diarization. Tokens are annotated with speaker IDs.
92
+ enable_language_identification: Whether to enable language identification. Tokens are annotated with language IDs.
66
93
  client_reference_id: Client reference ID to use for transcription.
67
94
  """
68
95
 
@@ -72,10 +99,10 @@ class SonioxInputParams(BaseModel):
72
99
  num_channels: Optional[int] = 1
73
100
 
74
101
  language_hints: Optional[List[Language]] = None
75
- context: Optional[str] = None
102
+ context: Optional[SonioxContextObject | str] = None
76
103
 
77
- enable_non_final_tokens: Optional[bool] = True
78
- max_non_final_tokens_duration_ms: Optional[int] = None
104
+ enable_speaker_diarization: Optional[bool] = False
105
+ enable_language_identification: Optional[bool] = False
79
106
 
80
107
  client_reference_id: Optional[str] = None
81
108
 
@@ -173,6 +200,10 @@ class SonioxSTTService(STTService):
173
200
  # Either one or the other is required.
174
201
  enable_endpoint_detection = not self._vad_force_turn_endpoint
175
202
 
203
+ context = self._params.context
204
+ if isinstance(context, SonioxContextObject):
205
+ context = context.model_dump()
206
+
176
207
  # Send the initial configuration message.
177
208
  config = {
178
209
  "api_key": self._api_key,
@@ -182,9 +213,9 @@ class SonioxSTTService(STTService):
182
213
  "enable_endpoint_detection": enable_endpoint_detection,
183
214
  "sample_rate": self.sample_rate,
184
215
  "language_hints": _prepare_language_hints(self._params.language_hints),
185
- "context": self._params.context,
186
- "enable_non_final_tokens": self._params.enable_non_final_tokens,
187
- "max_non_final_tokens_duration_ms": self._params.max_non_final_tokens_duration_ms,
216
+ "context": context,
217
+ "enable_speaker_diarization": self._params.enable_speaker_diarization,
218
+ "enable_language_identification": self._params.enable_language_identification,
188
219
  "client_reference_id": self._params.client_reference_id,
189
220
  }
190
221