dv-pipecat-ai 0.0.85.dev824__py3-none-any.whl → 0.0.85.dev858__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (31) hide show
  1. {dv_pipecat_ai-0.0.85.dev824.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/METADATA +2 -1
  2. {dv_pipecat_ai-0.0.85.dev824.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/RECORD +31 -29
  3. pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +5 -1
  4. pipecat/frames/frames.py +22 -0
  5. pipecat/metrics/connection_metrics.py +45 -0
  6. pipecat/processors/aggregators/llm_response.py +15 -9
  7. pipecat/processors/dtmf_aggregator.py +17 -21
  8. pipecat/processors/frame_processor.py +44 -1
  9. pipecat/processors/metrics/frame_processor_metrics.py +108 -0
  10. pipecat/processors/transcript_processor.py +2 -1
  11. pipecat/serializers/__init__.py +2 -0
  12. pipecat/serializers/asterisk.py +16 -2
  13. pipecat/serializers/convox.py +2 -2
  14. pipecat/serializers/custom.py +2 -2
  15. pipecat/serializers/vi.py +326 -0
  16. pipecat/services/cartesia/tts.py +75 -10
  17. pipecat/services/deepgram/stt.py +317 -17
  18. pipecat/services/elevenlabs/stt.py +487 -19
  19. pipecat/services/elevenlabs/tts.py +28 -4
  20. pipecat/services/google/llm.py +26 -11
  21. pipecat/services/openai/base_llm.py +79 -14
  22. pipecat/services/salesforce/llm.py +64 -59
  23. pipecat/services/sarvam/tts.py +0 -1
  24. pipecat/services/soniox/stt.py +45 -10
  25. pipecat/services/vistaar/llm.py +97 -6
  26. pipecat/transcriptions/language.py +50 -0
  27. pipecat/transports/base_input.py +15 -11
  28. pipecat/transports/base_output.py +26 -3
  29. {dv_pipecat_ai-0.0.85.dev824.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/WHEEL +0 -0
  30. {dv_pipecat_ai-0.0.85.dev824.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/licenses/LICENSE +0 -0
  31. {dv_pipecat_ai-0.0.85.dev824.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/top_level.txt +0 -0
@@ -15,7 +15,6 @@ from typing import AsyncGenerator, List, Literal, Optional, Union
15
15
  from loguru import logger
16
16
  from pydantic import BaseModel, Field
17
17
 
18
-
19
18
  from pipecat.frames.frames import (
20
19
  CancelFrame,
21
20
  EndFrame,
@@ -49,6 +48,26 @@ except ModuleNotFoundError as e:
49
48
  raise Exception(f"Missing module: {e}")
50
49
 
51
50
 
51
+ class GenerationConfig(BaseModel):
52
+ """Configuration for Cartesia Sonic-3 generation parameters.
53
+
54
+ Sonic-3 interprets these parameters as guidance to ensure natural speech.
55
+ Test against your content for best results.
56
+
57
+ Parameters:
58
+ volume: Volume multiplier for generated speech. Valid range: [0.5, 2.0]. Default is 1.0.
59
+ speed: Speed multiplier for generated speech. Valid range: [0.6, 1.5]. Default is 1.0.
60
+ emotion: Single emotion string to guide the emotional tone. Examples include neutral,
61
+ angry, excited, content, sad, scared. Over 60 emotions are supported. For best
62
+ results, use with recommended voices: Leo, Jace, Kyle, Gavin, Maya, Tessa, Dana,
63
+ and Marian.
64
+ """
65
+
66
+ volume: Optional[float] = None
67
+ speed: Optional[float] = None
68
+ emotion: Optional[str] = None
69
+
70
+
52
71
  def language_to_cartesia_language(language: Language) -> Optional[str]:
53
72
  """Convert a Language enum to Cartesia language code.
54
73
 
@@ -74,6 +93,33 @@ def language_to_cartesia_language(language: Language) -> Optional[str]:
74
93
  Language.SV: "sv",
75
94
  Language.TR: "tr",
76
95
  Language.ZH: "zh",
96
+ Language.TL: "tl",
97
+ Language.BG: "bg",
98
+ Language.RO: "ro",
99
+ Language.AR: "ar",
100
+ Language.CS: "cs",
101
+ Language.EL: "el",
102
+ Language.FI: "fi",
103
+ Language.HR: "hr",
104
+ Language.MS: "ms",
105
+ Language.SK: "sk",
106
+ Language.DA: "da",
107
+ Language.TA: "ta",
108
+ Language.UK: "uk",
109
+ Language.HU: "hu",
110
+ Language.NO: "no",
111
+ Language.VI: "vi",
112
+ Language.BN: "bn",
113
+ Language.TH: "th",
114
+ Language.HE: "he",
115
+ Language.KA: "ka",
116
+ Language.ID: "id",
117
+ Language.TE: "te",
118
+ Language.GU: "gu",
119
+ Language.KN: "kn",
120
+ Language.ML: "ml",
121
+ Language.MR: "mr",
122
+ Language.PA: "pa",
77
123
  }
78
124
 
79
125
  result = BASE_LANGUAGES.get(language)
@@ -102,16 +148,20 @@ class CartesiaTTSService(AudioContextWordTTSService):
102
148
 
103
149
  Parameters:
104
150
  language: Language to use for synthesis.
105
- speed: Voice speed control.
106
- emotion: List of emotion controls.
151
+ speed: Voice speed control for non-Sonic-3 models (literal values).
152
+ emotion: List of emotion controls for non-Sonic-3 models.
107
153
 
108
154
  .. deprecated:: 0.0.68
109
155
  The `emotion` parameter is deprecated and will be removed in a future version.
156
+
157
+ generation_config: Generation configuration for Sonic-3 models. Includes volume,
158
+ speed (numeric), and emotion (string) parameters.
110
159
  """
111
160
 
112
161
  language: Optional[Language] = Language.EN
113
162
  speed: Optional[Literal["slow", "normal", "fast"]] = None
114
163
  emotion: Optional[List[str]] = []
164
+ generation_config: Optional[GenerationConfig] = None
115
165
 
116
166
  def __init__(
117
167
  self,
@@ -120,7 +170,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
120
170
  voice_id: str,
121
171
  cartesia_version: str = "2025-04-16",
122
172
  url: str = "wss://api.cartesia.ai/tts/websocket",
123
- model: str = "sonic-2",
173
+ model: str = "sonic-3",
124
174
  sample_rate: Optional[int] = None,
125
175
  encoding: str = "pcm_s16le",
126
176
  container: str = "raw",
@@ -136,7 +186,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
136
186
  voice_id: ID of the voice to use for synthesis.
137
187
  cartesia_version: API version string for Cartesia service.
138
188
  url: WebSocket URL for Cartesia TTS API.
139
- model: TTS model to use (e.g., "sonic-2").
189
+ model: TTS model to use (e.g., "sonic-3").
140
190
  sample_rate: Audio sample rate. If None, uses default.
141
191
  encoding: Audio encoding format.
142
192
  container: Audio container format.
@@ -180,6 +230,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
180
230
  else "en",
181
231
  "speed": params.speed,
182
232
  "emotion": params.emotion,
233
+ "generation_config": params.generation_config,
183
234
  }
184
235
  self.set_model_name(model)
185
236
  self.set_voice(voice_id)
@@ -298,6 +349,11 @@ class CartesiaTTSService(AudioContextWordTTSService):
298
349
  if self._settings["speed"]:
299
350
  msg["speed"] = self._settings["speed"]
300
351
 
352
+ if self._settings["generation_config"]:
353
+ msg["generation_config"] = self._settings["generation_config"].model_dump(
354
+ exclude_none=True
355
+ )
356
+
301
357
  return json.dumps(msg)
302
358
 
303
359
  async def start(self, frame: StartFrame):
@@ -419,7 +475,6 @@ class CartesiaTTSService(AudioContextWordTTSService):
419
475
  logger.error(f"{self} error: {msg}")
420
476
  await self.push_frame(TTSStoppedFrame())
421
477
  await self.stop_all_metrics()
422
-
423
478
  await self.push_error(ErrorFrame(f"{self} error: {msg['error']}"))
424
479
  self._context_id = None
425
480
  else:
@@ -484,23 +539,27 @@ class CartesiaHttpTTSService(TTSService):
484
539
 
485
540
  Parameters:
486
541
  language: Language to use for synthesis.
487
- speed: Voice speed control.
488
- emotion: List of emotion controls.
542
+ speed: Voice speed control for non-Sonic-3 models (literal values).
543
+ emotion: List of emotion controls for non-Sonic-3 models.
489
544
 
490
545
  .. deprecated:: 0.0.68
491
546
  The `emotion` parameter is deprecated and will be removed in a future version.
547
+
548
+ generation_config: Generation configuration for Sonic-3 models. Includes volume,
549
+ speed (numeric), and emotion (string) parameters.
492
550
  """
493
551
 
494
552
  language: Optional[Language] = Language.EN
495
553
  speed: Optional[Literal["slow", "normal", "fast"]] = None
496
554
  emotion: Optional[List[str]] = Field(default_factory=list)
555
+ generation_config: Optional[GenerationConfig] = None
497
556
 
498
557
  def __init__(
499
558
  self,
500
559
  *,
501
560
  api_key: str,
502
561
  voice_id: str,
503
- model: str = "sonic-2",
562
+ model: str = "sonic-3",
504
563
  base_url: str = "https://api.cartesia.ai",
505
564
  cartesia_version: str = "2024-11-13",
506
565
  sample_rate: Optional[int] = None,
@@ -514,7 +573,7 @@ class CartesiaHttpTTSService(TTSService):
514
573
  Args:
515
574
  api_key: Cartesia API key for authentication.
516
575
  voice_id: ID of the voice to use for synthesis.
517
- model: TTS model to use (e.g., "sonic-2").
576
+ model: TTS model to use (e.g., "sonic-3").
518
577
  base_url: Base URL for Cartesia HTTP API.
519
578
  cartesia_version: API version string for Cartesia service.
520
579
  sample_rate: Audio sample rate. If None, uses default.
@@ -541,6 +600,7 @@ class CartesiaHttpTTSService(TTSService):
541
600
  else "en",
542
601
  "speed": params.speed,
543
602
  "emotion": params.emotion,
603
+ "generation_config": params.generation_config,
544
604
  }
545
605
  self.set_voice(voice_id)
546
606
  self.set_model_name(model)
@@ -634,6 +694,11 @@ class CartesiaHttpTTSService(TTSService):
634
694
  if self._settings["speed"]:
635
695
  payload["speed"] = self._settings["speed"]
636
696
 
697
+ if self._settings["generation_config"]:
698
+ payload["generation_config"] = self._settings["generation_config"].model_dump(
699
+ exclude_none=True
700
+ )
701
+
637
702
  yield TTSStartedFrame()
638
703
 
639
704
  session = await self._client._get_session()
@@ -8,7 +8,11 @@
8
8
 
9
9
  import asyncio
10
10
  import logging
11
- from typing import AsyncGenerator, Dict, Optional
11
+ import os
12
+ import socket
13
+ import time
14
+ from typing import AsyncGenerator, Callable, Dict, Optional
15
+ from urllib.parse import urlparse
12
16
 
13
17
  from loguru import logger
14
18
 
@@ -29,6 +33,155 @@ from pipecat.transcriptions.language import Language
29
33
  from pipecat.utils.time import time_now_iso8601
30
34
  from pipecat.utils.tracing.service_decorators import traced_stt
31
35
 
36
+ _PROCESS_START_MONOTONIC = time.monotonic()
37
+
38
+
39
+ def _read_first_numeric_file(paths):
40
+ for path in paths:
41
+ try:
42
+ with open(path, "r", encoding="utf-8") as file:
43
+ value = file.read().strip()
44
+ except FileNotFoundError:
45
+ continue
46
+ except OSError:
47
+ continue
48
+
49
+ if not value or value == "max":
50
+ return None
51
+
52
+ try:
53
+ return int(value)
54
+ except ValueError:
55
+ continue
56
+ return None
57
+
58
+
59
+ def _read_proc_status_value(key):
60
+ try:
61
+ with open("/proc/self/status", "r", encoding="utf-8") as status_file:
62
+ for line in status_file:
63
+ if line.startswith(key):
64
+ parts = line.split()
65
+ if len(parts) >= 2:
66
+ return int(parts[1]) * 1024 # kB -> bytes
67
+ except FileNotFoundError:
68
+ return None
69
+ except OSError:
70
+ return None
71
+ return None
72
+
73
+
74
+ def _read_cpu_throttling():
75
+ paths = ["/sys/fs/cgroup/cpu.stat", "/sys/fs/cgroup/cpu/cpu.stat"]
76
+ for path in paths:
77
+ try:
78
+ with open(path, "r", encoding="utf-8") as cpu_file:
79
+ for line in cpu_file:
80
+ if line.startswith("nr_throttled"):
81
+ parts = line.split()
82
+ if len(parts) >= 2:
83
+ return int(parts[1])
84
+ except FileNotFoundError:
85
+ continue
86
+ except OSError:
87
+ continue
88
+ return None
89
+
90
+
91
+ def _collect_runtime_diagnostics(
92
+ loop: Optional[asyncio.AbstractEventLoop] = None,
93
+ extra_context: Optional[Dict] = None,
94
+ context_provider: Optional[Callable[[], Dict]] = None,
95
+ ):
96
+ if loop is None:
97
+ try:
98
+ loop = asyncio.get_running_loop()
99
+ except RuntimeError:
100
+ loop = None
101
+
102
+ uptime_s = round(time.monotonic() - _PROCESS_START_MONOTONIC, 1)
103
+ rss_bytes = _read_proc_status_value("VmRSS:")
104
+ rss_mb = round(rss_bytes / (1024**2), 2) if rss_bytes else None
105
+
106
+ cgroup_usage_bytes = _read_first_numeric_file(
107
+ ["/sys/fs/cgroup/memory.current", "/sys/fs/cgroup/memory/memory.usage_in_bytes"]
108
+ )
109
+ cgroup_limit_bytes = _read_first_numeric_file(
110
+ ["/sys/fs/cgroup/memory.max", "/sys/fs/cgroup/memory/memory.limit_in_bytes"]
111
+ )
112
+ cgroup_usage_mb = (
113
+ round(cgroup_usage_bytes / (1024**2), 2) if cgroup_usage_bytes is not None else None
114
+ )
115
+ cgroup_limit_mb = (
116
+ round(cgroup_limit_bytes / (1024**2), 2) if cgroup_limit_bytes not in (None, 0) else None
117
+ )
118
+ cgroup_pct = (
119
+ round(cgroup_usage_bytes / cgroup_limit_bytes * 100, 2)
120
+ if cgroup_usage_bytes is not None and cgroup_limit_bytes not in (None, 0)
121
+ else None
122
+ )
123
+
124
+ try:
125
+ open_fds = len(os.listdir("/proc/self/fd"))
126
+ except Exception:
127
+ open_fds = None
128
+
129
+ pending_tasks = None
130
+ if loop:
131
+ try:
132
+ pending_tasks = len(asyncio.all_tasks(loop))
133
+ except Exception:
134
+ pending_tasks = None
135
+
136
+ suspected_cause = "unknown"
137
+ if cgroup_pct and cgroup_pct >= 90:
138
+ suspected_cause = "memory_pressure"
139
+ elif uptime_s < 180:
140
+ suspected_cause = "pod_cold_start"
141
+
142
+ diagnostics = {
143
+ "uptime_s": uptime_s,
144
+ "rss_mb": rss_mb,
145
+ "cgroup_usage_mb": cgroup_usage_mb,
146
+ "cgroup_limit_mb": cgroup_limit_mb,
147
+ "cgroup_usage_pct": cgroup_pct,
148
+ "open_fds": open_fds,
149
+ "pending_tasks": pending_tasks,
150
+ "suspected_cause": suspected_cause,
151
+ }
152
+ cpu_throttled = _read_cpu_throttling()
153
+ if cpu_throttled is not None:
154
+ diagnostics["cpu_nr_throttled"] = cpu_throttled
155
+
156
+ if context_provider:
157
+ try:
158
+ ctx = context_provider() or {}
159
+ if isinstance(ctx, dict):
160
+ diagnostics.update({k: v for k, v in ctx.items() if v is not None})
161
+ except Exception as exc:
162
+ diagnostics["context_provider_error"] = str(exc)
163
+
164
+ if extra_context:
165
+ diagnostics.update({k: v for k, v in extra_context.items() if v is not None})
166
+
167
+ return {k: v for k, v in diagnostics.items() if v is not None}
168
+
169
+
170
+ def _derive_connect_endpoint(base_url: str):
171
+ if not base_url:
172
+ return "api.deepgram.com", 443
173
+
174
+ parsed = urlparse(base_url)
175
+ host = parsed.hostname or "api.deepgram.com"
176
+ if parsed.port:
177
+ port = parsed.port
178
+ elif parsed.scheme in ("https", "wss"):
179
+ port = 443
180
+ else:
181
+ port = 80
182
+ return host, port
183
+
184
+
32
185
  try:
33
186
  from deepgram import (
34
187
  AsyncListenWebSocketClient,
@@ -62,6 +215,9 @@ class DeepgramSTTService(STTService):
62
215
  sample_rate: Optional[int] = None,
63
216
  live_options: Optional[LiveOptions] = None,
64
217
  addons: Optional[Dict] = None,
218
+ max_connect_retries: int = 3,
219
+ connect_timeout_s: float = 2.5,
220
+ diagnostics_context_provider: Optional[Callable[[], Dict]] = None,
65
221
  **kwargs,
66
222
  ):
67
223
  """Initialize the Deepgram STT service.
@@ -77,6 +233,12 @@ class DeepgramSTTService(STTService):
77
233
  sample_rate: Audio sample rate. If None, uses default or live_options value.
78
234
  live_options: Deepgram LiveOptions for detailed configuration.
79
235
  addons: Additional Deepgram features to enable.
236
+ max_connect_retries: Maximum number of connection attempts before giving up.
237
+ connect_timeout_s: Maximum time in seconds to wait for a connection attempt.
238
+ Connection retries wait 100ms between attempts.
239
+ diagnostics_context_provider: Optional callable returning a dict with
240
+ additional runtime diagnostics (e.g., active call counts) to append
241
+ to warning logs.
80
242
  **kwargs: Additional arguments passed to the parent STTService.
81
243
  """
82
244
  sample_rate = sample_rate or (live_options.sample_rate if live_options else None)
@@ -120,10 +282,11 @@ class DeepgramSTTService(STTService):
120
282
  self.set_model_name(merged_options["model"])
121
283
  self._settings = merged_options
122
284
  self._addons = addons
285
+ self._diagnostics_context_provider = diagnostics_context_provider
123
286
 
124
- # Connection retry settings
125
- self._max_connect_retries = 3
126
- self._connect_retry_delay_s = 0.1
287
+ # Connection retry settings (100ms delay between retries)
288
+ self._max_connect_retries = max_connect_retries
289
+ self._connect_timeout_s = connect_timeout_s
127
290
 
128
291
  self._client = DeepgramClient(
129
292
  api_key,
@@ -131,12 +294,13 @@ class DeepgramSTTService(STTService):
131
294
  url=base_url,
132
295
  options={
133
296
  "keepalive": "true",
134
- "open_timeout": 3, # Max wait for only 3 seconds for the connection to establish #
135
- # "termination_exception_connect": True, # Enable exception propagation
297
+ # Note: Connection timeout is enforced by asyncio.wait_for() in _connect()
298
+ # with the connect_timeout_s parameter (default 2.0s)
136
299
  },
137
300
  verbose=logging.ERROR, # Enable error level and above logging
138
301
  ),
139
302
  )
303
+ self._connect_host, self._connect_port = _derive_connect_endpoint(base_url)
140
304
 
141
305
  if self.vad_enabled:
142
306
  self._register_event_handler("on_speech_started")
@@ -224,9 +388,18 @@ class DeepgramSTTService(STTService):
224
388
 
225
389
  async def _connect(self):
226
390
  self.logger.debug("Attempting to connect to Deepgram...")
391
+ await self.start_connection_metrics()
227
392
 
393
+ loop = asyncio.get_running_loop()
228
394
  for attempt in range(self._max_connect_retries):
395
+ attempt_started = time.perf_counter()
396
+ dns_ms = await self._measure_dns_resolution(loop)
229
397
  try:
398
+ # Clean up any previous connection attempt in background (non-blocking)
399
+ if hasattr(self, "_connection") and self._connection is not None:
400
+ old_conn = self._connection
401
+ asyncio.create_task(self._cleanup_abandoned_connection(old_conn))
402
+
230
403
  # Create a new connection object for a clean attempt
231
404
  self._connection: AsyncListenWebSocketClient = self._client.listen.asyncwebsocket.v(
232
405
  "1"
@@ -250,31 +423,139 @@ class DeepgramSTTService(STTService):
250
423
  self._on_utterance_end,
251
424
  )
252
425
 
253
- # Attempt to start the connection (timeout handled by open_timeout config)
254
- if await self._connection.start(options=self._settings, addons=self._addons):
255
- self.logger.info("Successfully connected to Deepgram.")
256
- return # Exit the method on success
426
+ try:
427
+ start_result = await asyncio.wait_for(
428
+ self._connection.start(options=self._settings, addons=self._addons),
429
+ timeout=self._connect_timeout_s,
430
+ )
431
+ except asyncio.TimeoutError:
432
+ elapsed_ms = round((time.perf_counter() - attempt_started) * 1000, 2)
433
+ diagnostics = _collect_runtime_diagnostics(
434
+ loop,
435
+ extra_context={
436
+ "dns_ms": dns_ms,
437
+ "connect_duration_ms": elapsed_ms,
438
+ },
439
+ context_provider=self._diagnostics_context_provider,
440
+ )
441
+ self.logger.warning(
442
+ (
443
+ "Deepgram connection attempt {}/{} timed out after {:.2f} second(s). "
444
+ "runtime_diagnostics={}"
445
+ ),
446
+ attempt + 1,
447
+ self._max_connect_retries,
448
+ self._connect_timeout_s,
449
+ diagnostics,
450
+ )
451
+ start_result = False
452
+ except Exception as start_error:
453
+ elapsed_ms = round((time.perf_counter() - attempt_started) * 1000, 2)
454
+ diagnostics = _collect_runtime_diagnostics(
455
+ loop,
456
+ extra_context={
457
+ "dns_ms": dns_ms,
458
+ "connect_duration_ms": elapsed_ms,
459
+ },
460
+ context_provider=self._diagnostics_context_provider,
461
+ )
462
+ self.logger.warning(
463
+ (
464
+ "Deepgram connection attempt {}/{} failed with an exception: {}. "
465
+ "runtime_diagnostics={}"
466
+ ),
467
+ attempt + 1,
468
+ self._max_connect_retries,
469
+ start_error,
470
+ diagnostics,
471
+ )
472
+ start_result = False
473
+ else:
474
+ if start_result:
475
+ elapsed_ms = round((time.perf_counter() - attempt_started) * 1000, 2)
476
+ diagnostics = _collect_runtime_diagnostics(
477
+ loop,
478
+ extra_context={
479
+ "dns_ms": dns_ms,
480
+ "connect_duration_ms": elapsed_ms,
481
+ },
482
+ context_provider=self._diagnostics_context_provider,
483
+ )
484
+ self.logger.info(
485
+ (
486
+ "Successfully connected to Deepgram on attempt {} in {:.2f} ms. "
487
+ "runtime_diagnostics={}"
488
+ ),
489
+ attempt + 1,
490
+ elapsed_ms,
491
+ diagnostics,
492
+ )
493
+ await self.stop_connection_metrics(success=True, connection_type="websocket")
494
+ await self.stop_reconnection_metrics(success=True, reason="successful_reconnection")
495
+ return # Exit the method on success
257
496
 
258
497
  self.logger.warning(
259
498
  f"Deepgram connection attempt {attempt + 1}/{self._max_connect_retries} failed."
260
499
  )
261
500
 
262
501
  except Exception as e:
502
+ elapsed_ms = round((time.perf_counter() - attempt_started) * 1000, 2)
503
+ diagnostics = _collect_runtime_diagnostics(
504
+ loop,
505
+ extra_context={
506
+ "dns_ms": dns_ms,
507
+ "connect_duration_ms": elapsed_ms,
508
+ },
509
+ context_provider=self._diagnostics_context_provider,
510
+ )
263
511
  self.logger.warning(
264
- f"Deepgram connection attempt {attempt + 1}/{self._max_connect_retries} failed with an exception: {e}"
512
+ (
513
+ "Deepgram connection attempt {}/{} failed with an exception: {}. "
514
+ "runtime_diagnostics={}"
515
+ ),
516
+ attempt + 1,
517
+ self._max_connect_retries,
518
+ e,
519
+ diagnostics,
265
520
  )
266
521
 
267
- # If this is not the last attempt, wait briefly before retrying
522
+ # If this is not the last attempt, wait 100ms before retrying
268
523
  if attempt < self._max_connect_retries - 1:
269
- self.logger.info(f"Retrying in {self._connect_retry_delay_s} second(s)...")
270
- await asyncio.sleep(self._connect_retry_delay_s)
524
+ self.logger.info("Retrying in 0.1 second(s)...")
525
+ await asyncio.sleep(0.1)
271
526
 
272
- self.logger.error(
527
+ error_msg = (
273
528
  f"{self}: unable to connect to Deepgram after {self._max_connect_retries} attempts."
274
529
  )
530
+ await self.stop_connection_metrics(
531
+ success=False,
532
+ error=f"Failed after {self._max_connect_retries} attempts",
533
+ connection_type="websocket"
534
+ )
535
+ await self.stop_reconnection_metrics(success=False, reason="max_retries_exceeded")
536
+ self.logger.error(error_msg)
537
+ await self.push_error(ErrorFrame(error_msg, fatal=True))
538
+
539
+ async def _measure_dns_resolution(self, loop: Optional[asyncio.AbstractEventLoop]):
540
+ if not loop or not self._connect_host:
541
+ return None
542
+ try:
543
+ dns_task = loop.getaddrinfo(
544
+ self._connect_host,
545
+ self._connect_port,
546
+ type=socket.SOCK_STREAM,
547
+ proto=socket.IPPROTO_TCP,
548
+ )
549
+ start = time.perf_counter()
550
+ await asyncio.wait_for(dns_task, timeout=1.0)
551
+ return round((time.perf_counter() - start) * 1000, 2)
552
+ except Exception:
553
+ return None
275
554
 
276
555
  async def _disconnect(self):
277
- if self._connection.is_connected:
556
+ # Guard against missing connection instance and ensure proper async check
557
+ connection: AsyncListenWebSocketClient = getattr(self, "_connection", None)
558
+ if connection and await connection.is_connected():
278
559
  self.logger.debug("Disconnecting from Deepgram")
279
560
  # Deepgram swallows asyncio.CancelledError internally which prevents
280
561
  # proper cancellation propagation. This issue was found with
@@ -284,7 +565,25 @@ class DeepgramSTTService(STTService):
284
565
  # Deepgram disconnection was still finishing and therefore
285
566
  # preventing the task cancellation that occurs during `cleanup()`.
286
567
  # GH issue: https://github.com/deepgram/deepgram-python-sdk/issues/570
287
- await self._connection.finish()
568
+ await connection.finish()
569
+
570
+ async def _cleanup_abandoned_connection(self, conn: AsyncListenWebSocketClient):
571
+ """Clean up abandoned connection attempt in background (non-blocking).
572
+
573
+ This prevents zombie connections from triggering spurious error events
574
+ when they eventually timeout and call _on_error().
575
+
576
+ Args:
577
+ conn: The abandoned connection object to clean up.
578
+ """
579
+ try:
580
+ # Try to finish with short timeout
581
+ await asyncio.wait_for(conn.finish(), timeout=5)
582
+ self.logger.debug("Successfully cleaned up abandoned connection")
583
+ except Exception as e:
584
+ # Ignore all cleanup errors - connection might not be fully started
585
+ # This is expected and fine - we just want best-effort cleanup
586
+ self.logger.debug(f"Abandoned connection cleanup failed: {e}")
288
587
 
289
588
  async def start_metrics(self):
290
589
  """Start TTFB and processing metrics collection."""
@@ -299,6 +598,7 @@ class DeepgramSTTService(STTService):
299
598
  # NOTE(aleix): we don't disconnect (i.e. call finish on the connection)
300
599
  # because this triggers more errors internally in the Deepgram SDK. So,
301
600
  # we just forget about the previous connection and create a new one.
601
+ await self.start_reconnection_metrics()
302
602
  await self._connect()
303
603
 
304
604
  async def _on_speech_started(self, *args, **kwargs):