dv-pipecat-ai 0.0.85.dev848__py3-none-any.whl → 0.0.85.dev850__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.85.dev848.dist-info → dv_pipecat_ai-0.0.85.dev850.dist-info}/METADATA +1 -1
- {dv_pipecat_ai-0.0.85.dev848.dist-info → dv_pipecat_ai-0.0.85.dev850.dist-info}/RECORD +6 -6
- pipecat/services/deepgram/stt.py +248 -5
- {dv_pipecat_ai-0.0.85.dev848.dist-info → dv_pipecat_ai-0.0.85.dev850.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.85.dev848.dist-info → dv_pipecat_ai-0.0.85.dev850.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.85.dev848.dist-info → dv_pipecat_ai-0.0.85.dev850.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
dv_pipecat_ai-0.0.85.
|
|
1
|
+
dv_pipecat_ai-0.0.85.dev850.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
|
|
2
2
|
pipecat/__init__.py,sha256=j0Xm6adxHhd7D06dIyyPV_GlBYLlBnTAERVvD_jAARQ,861
|
|
3
3
|
pipecat/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
pipecat/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -210,7 +210,7 @@ pipecat/services/cartesia/tts.py,sha256=I_OZCINywkDXmYzFL35MjSN8cAuNEaJs7nj0YB_o
|
|
|
210
210
|
pipecat/services/cerebras/__init__.py,sha256=5zBmqq9Zfcl-HC7ylekVS5qrRedbl1mAeEwUT-T-c_o,259
|
|
211
211
|
pipecat/services/cerebras/llm.py,sha256=-yzSe_6YDGigwzES-LZS4vNXMPugmvsIYEpTySyr5nA,3047
|
|
212
212
|
pipecat/services/deepgram/__init__.py,sha256=IjRtMI7WytRDdmYVpk2qDWClXUiNgdl7ZkvEAWg1eYE,304
|
|
213
|
-
pipecat/services/deepgram/stt.py,sha256=
|
|
213
|
+
pipecat/services/deepgram/stt.py,sha256=t7P0zWLBitSF_KQqHr5aYjKdJZRnC36styl_eL86R88,24752
|
|
214
214
|
pipecat/services/deepgram/tts.py,sha256=H_2WCJEx3_L4ytrHHRNkA-6GKTd1coou_vvTfiEodpQ,3745
|
|
215
215
|
pipecat/services/deepgram/flux/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
216
216
|
pipecat/services/deepgram/flux/stt.py,sha256=yCZodrHAOShgYy_GbdviX8iAuh36dBgDL41gHMXVxEM,25887
|
|
@@ -416,7 +416,7 @@ pipecat/utils/tracing/service_decorators.py,sha256=fwzxFpi8DJl6BJbK74G0UEB4ccMJg
|
|
|
416
416
|
pipecat/utils/tracing/setup.py,sha256=7TEgPNpq6M8lww8OQvf0P9FzYc5A30xICGklVA-fua0,2892
|
|
417
417
|
pipecat/utils/tracing/turn_context_provider.py,sha256=ikon3plFOx0XbMrH6DdeHttNpb-U0gzMZIm3bWLc9eI,2485
|
|
418
418
|
pipecat/utils/tracing/turn_trace_observer.py,sha256=dma16SBJpYSOE58YDWy89QzHyQFc_9gQZszKeWixuwc,9725
|
|
419
|
-
dv_pipecat_ai-0.0.85.
|
|
420
|
-
dv_pipecat_ai-0.0.85.
|
|
421
|
-
dv_pipecat_ai-0.0.85.
|
|
422
|
-
dv_pipecat_ai-0.0.85.
|
|
419
|
+
dv_pipecat_ai-0.0.85.dev850.dist-info/METADATA,sha256=rqzfsDkrkClO-BvwwJr5_b2ggADWXFKhgzPgToBwDm0,32955
|
|
420
|
+
dv_pipecat_ai-0.0.85.dev850.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
421
|
+
dv_pipecat_ai-0.0.85.dev850.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
|
|
422
|
+
dv_pipecat_ai-0.0.85.dev850.dist-info/RECORD,,
|
pipecat/services/deepgram/stt.py
CHANGED
|
@@ -8,7 +8,11 @@
|
|
|
8
8
|
|
|
9
9
|
import asyncio
|
|
10
10
|
import logging
|
|
11
|
-
|
|
11
|
+
import os
|
|
12
|
+
import socket
|
|
13
|
+
import time
|
|
14
|
+
from typing import AsyncGenerator, Callable, Dict, Optional
|
|
15
|
+
from urllib.parse import urlparse
|
|
12
16
|
|
|
13
17
|
from loguru import logger
|
|
14
18
|
|
|
@@ -29,6 +33,155 @@ from pipecat.transcriptions.language import Language
|
|
|
29
33
|
from pipecat.utils.time import time_now_iso8601
|
|
30
34
|
from pipecat.utils.tracing.service_decorators import traced_stt
|
|
31
35
|
|
|
36
|
+
_PROCESS_START_MONOTONIC = time.monotonic()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _read_first_numeric_file(paths):
|
|
40
|
+
for path in paths:
|
|
41
|
+
try:
|
|
42
|
+
with open(path, "r", encoding="utf-8") as file:
|
|
43
|
+
value = file.read().strip()
|
|
44
|
+
except FileNotFoundError:
|
|
45
|
+
continue
|
|
46
|
+
except OSError:
|
|
47
|
+
continue
|
|
48
|
+
|
|
49
|
+
if not value or value == "max":
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
return int(value)
|
|
54
|
+
except ValueError:
|
|
55
|
+
continue
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _read_proc_status_value(key):
|
|
60
|
+
try:
|
|
61
|
+
with open("/proc/self/status", "r", encoding="utf-8") as status_file:
|
|
62
|
+
for line in status_file:
|
|
63
|
+
if line.startswith(key):
|
|
64
|
+
parts = line.split()
|
|
65
|
+
if len(parts) >= 2:
|
|
66
|
+
return int(parts[1]) * 1024 # kB -> bytes
|
|
67
|
+
except FileNotFoundError:
|
|
68
|
+
return None
|
|
69
|
+
except OSError:
|
|
70
|
+
return None
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _read_cpu_throttling():
|
|
75
|
+
paths = ["/sys/fs/cgroup/cpu.stat", "/sys/fs/cgroup/cpu/cpu.stat"]
|
|
76
|
+
for path in paths:
|
|
77
|
+
try:
|
|
78
|
+
with open(path, "r", encoding="utf-8") as cpu_file:
|
|
79
|
+
for line in cpu_file:
|
|
80
|
+
if line.startswith("nr_throttled"):
|
|
81
|
+
parts = line.split()
|
|
82
|
+
if len(parts) >= 2:
|
|
83
|
+
return int(parts[1])
|
|
84
|
+
except FileNotFoundError:
|
|
85
|
+
continue
|
|
86
|
+
except OSError:
|
|
87
|
+
continue
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _collect_runtime_diagnostics(
|
|
92
|
+
loop: Optional[asyncio.AbstractEventLoop] = None,
|
|
93
|
+
extra_context: Optional[Dict] = None,
|
|
94
|
+
context_provider: Optional[Callable[[], Dict]] = None,
|
|
95
|
+
):
|
|
96
|
+
if loop is None:
|
|
97
|
+
try:
|
|
98
|
+
loop = asyncio.get_running_loop()
|
|
99
|
+
except RuntimeError:
|
|
100
|
+
loop = None
|
|
101
|
+
|
|
102
|
+
uptime_s = round(time.monotonic() - _PROCESS_START_MONOTONIC, 1)
|
|
103
|
+
rss_bytes = _read_proc_status_value("VmRSS:")
|
|
104
|
+
rss_mb = round(rss_bytes / (1024**2), 2) if rss_bytes else None
|
|
105
|
+
|
|
106
|
+
cgroup_usage_bytes = _read_first_numeric_file(
|
|
107
|
+
["/sys/fs/cgroup/memory.current", "/sys/fs/cgroup/memory/memory.usage_in_bytes"]
|
|
108
|
+
)
|
|
109
|
+
cgroup_limit_bytes = _read_first_numeric_file(
|
|
110
|
+
["/sys/fs/cgroup/memory.max", "/sys/fs/cgroup/memory/memory.limit_in_bytes"]
|
|
111
|
+
)
|
|
112
|
+
cgroup_usage_mb = (
|
|
113
|
+
round(cgroup_usage_bytes / (1024**2), 2) if cgroup_usage_bytes is not None else None
|
|
114
|
+
)
|
|
115
|
+
cgroup_limit_mb = (
|
|
116
|
+
round(cgroup_limit_bytes / (1024**2), 2) if cgroup_limit_bytes not in (None, 0) else None
|
|
117
|
+
)
|
|
118
|
+
cgroup_pct = (
|
|
119
|
+
round(cgroup_usage_bytes / cgroup_limit_bytes * 100, 2)
|
|
120
|
+
if cgroup_usage_bytes is not None and cgroup_limit_bytes not in (None, 0)
|
|
121
|
+
else None
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
try:
|
|
125
|
+
open_fds = len(os.listdir("/proc/self/fd"))
|
|
126
|
+
except Exception:
|
|
127
|
+
open_fds = None
|
|
128
|
+
|
|
129
|
+
pending_tasks = None
|
|
130
|
+
if loop:
|
|
131
|
+
try:
|
|
132
|
+
pending_tasks = len(asyncio.all_tasks(loop))
|
|
133
|
+
except Exception:
|
|
134
|
+
pending_tasks = None
|
|
135
|
+
|
|
136
|
+
suspected_cause = "unknown"
|
|
137
|
+
if cgroup_pct and cgroup_pct >= 90:
|
|
138
|
+
suspected_cause = "memory_pressure"
|
|
139
|
+
elif uptime_s < 180:
|
|
140
|
+
suspected_cause = "pod_cold_start"
|
|
141
|
+
|
|
142
|
+
diagnostics = {
|
|
143
|
+
"uptime_s": uptime_s,
|
|
144
|
+
"rss_mb": rss_mb,
|
|
145
|
+
"cgroup_usage_mb": cgroup_usage_mb,
|
|
146
|
+
"cgroup_limit_mb": cgroup_limit_mb,
|
|
147
|
+
"cgroup_usage_pct": cgroup_pct,
|
|
148
|
+
"open_fds": open_fds,
|
|
149
|
+
"pending_tasks": pending_tasks,
|
|
150
|
+
"suspected_cause": suspected_cause,
|
|
151
|
+
}
|
|
152
|
+
cpu_throttled = _read_cpu_throttling()
|
|
153
|
+
if cpu_throttled is not None:
|
|
154
|
+
diagnostics["cpu_nr_throttled"] = cpu_throttled
|
|
155
|
+
|
|
156
|
+
if context_provider:
|
|
157
|
+
try:
|
|
158
|
+
ctx = context_provider() or {}
|
|
159
|
+
if isinstance(ctx, dict):
|
|
160
|
+
diagnostics.update({k: v for k, v in ctx.items() if v is not None})
|
|
161
|
+
except Exception as exc:
|
|
162
|
+
diagnostics["context_provider_error"] = str(exc)
|
|
163
|
+
|
|
164
|
+
if extra_context:
|
|
165
|
+
diagnostics.update({k: v for k, v in extra_context.items() if v is not None})
|
|
166
|
+
|
|
167
|
+
return {k: v for k, v in diagnostics.items() if v is not None}
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _derive_connect_endpoint(base_url: str):
|
|
171
|
+
if not base_url:
|
|
172
|
+
return "api.deepgram.com", 443
|
|
173
|
+
|
|
174
|
+
parsed = urlparse(base_url)
|
|
175
|
+
host = parsed.hostname or "api.deepgram.com"
|
|
176
|
+
if parsed.port:
|
|
177
|
+
port = parsed.port
|
|
178
|
+
elif parsed.scheme in ("https", "wss"):
|
|
179
|
+
port = 443
|
|
180
|
+
else:
|
|
181
|
+
port = 80
|
|
182
|
+
return host, port
|
|
183
|
+
|
|
184
|
+
|
|
32
185
|
try:
|
|
33
186
|
from deepgram import (
|
|
34
187
|
AsyncListenWebSocketClient,
|
|
@@ -64,6 +217,7 @@ class DeepgramSTTService(STTService):
|
|
|
64
217
|
addons: Optional[Dict] = None,
|
|
65
218
|
max_connect_retries: int = 3,
|
|
66
219
|
connect_timeout_s: float = 2.5,
|
|
220
|
+
diagnostics_context_provider: Optional[Callable[[], Dict]] = None,
|
|
67
221
|
**kwargs,
|
|
68
222
|
):
|
|
69
223
|
"""Initialize the Deepgram STT service.
|
|
@@ -82,6 +236,9 @@ class DeepgramSTTService(STTService):
|
|
|
82
236
|
max_connect_retries: Maximum number of connection attempts before giving up.
|
|
83
237
|
connect_timeout_s: Maximum time in seconds to wait for a connection attempt.
|
|
84
238
|
Connection retries wait 100ms between attempts.
|
|
239
|
+
diagnostics_context_provider: Optional callable returning a dict with
|
|
240
|
+
additional runtime diagnostics (e.g., active call counts) to append
|
|
241
|
+
to warning logs.
|
|
85
242
|
**kwargs: Additional arguments passed to the parent STTService.
|
|
86
243
|
"""
|
|
87
244
|
sample_rate = sample_rate or (live_options.sample_rate if live_options else None)
|
|
@@ -125,6 +282,7 @@ class DeepgramSTTService(STTService):
|
|
|
125
282
|
self.set_model_name(merged_options["model"])
|
|
126
283
|
self._settings = merged_options
|
|
127
284
|
self._addons = addons
|
|
285
|
+
self._diagnostics_context_provider = diagnostics_context_provider
|
|
128
286
|
|
|
129
287
|
# Connection retry settings (100ms delay between retries)
|
|
130
288
|
self._max_connect_retries = max_connect_retries
|
|
@@ -142,6 +300,7 @@ class DeepgramSTTService(STTService):
|
|
|
142
300
|
verbose=logging.ERROR, # Enable error level and above logging
|
|
143
301
|
),
|
|
144
302
|
)
|
|
303
|
+
self._connect_host, self._connect_port = _derive_connect_endpoint(base_url)
|
|
145
304
|
|
|
146
305
|
if self.vad_enabled:
|
|
147
306
|
self._register_event_handler("on_speech_started")
|
|
@@ -230,7 +389,10 @@ class DeepgramSTTService(STTService):
|
|
|
230
389
|
async def _connect(self):
|
|
231
390
|
self.logger.debug("Attempting to connect to Deepgram...")
|
|
232
391
|
|
|
392
|
+
loop = asyncio.get_running_loop()
|
|
233
393
|
for attempt in range(self._max_connect_retries):
|
|
394
|
+
attempt_started = time.perf_counter()
|
|
395
|
+
dns_ms = await self._measure_dns_resolution(loop)
|
|
234
396
|
try:
|
|
235
397
|
# Clean up any previous connection attempt in background (non-blocking)
|
|
236
398
|
if hasattr(self, "_connection") and self._connection is not None:
|
|
@@ -266,18 +428,67 @@ class DeepgramSTTService(STTService):
|
|
|
266
428
|
timeout=self._connect_timeout_s,
|
|
267
429
|
)
|
|
268
430
|
except asyncio.TimeoutError:
|
|
431
|
+
elapsed_ms = round((time.perf_counter() - attempt_started) * 1000, 2)
|
|
432
|
+
diagnostics = _collect_runtime_diagnostics(
|
|
433
|
+
loop,
|
|
434
|
+
extra_context={
|
|
435
|
+
"dns_ms": dns_ms,
|
|
436
|
+
"connect_duration_ms": elapsed_ms,
|
|
437
|
+
},
|
|
438
|
+
context_provider=self._diagnostics_context_provider,
|
|
439
|
+
)
|
|
269
440
|
self.logger.warning(
|
|
270
|
-
|
|
441
|
+
(
|
|
442
|
+
"Deepgram connection attempt {}/{} timed out after {:.2f} second(s). "
|
|
443
|
+
"runtime_diagnostics={}"
|
|
444
|
+
),
|
|
445
|
+
attempt + 1,
|
|
446
|
+
self._max_connect_retries,
|
|
447
|
+
self._connect_timeout_s,
|
|
448
|
+
diagnostics,
|
|
271
449
|
)
|
|
272
450
|
start_result = False
|
|
273
451
|
except Exception as start_error:
|
|
452
|
+
elapsed_ms = round((time.perf_counter() - attempt_started) * 1000, 2)
|
|
453
|
+
diagnostics = _collect_runtime_diagnostics(
|
|
454
|
+
loop,
|
|
455
|
+
extra_context={
|
|
456
|
+
"dns_ms": dns_ms,
|
|
457
|
+
"connect_duration_ms": elapsed_ms,
|
|
458
|
+
},
|
|
459
|
+
context_provider=self._diagnostics_context_provider,
|
|
460
|
+
)
|
|
274
461
|
self.logger.warning(
|
|
275
|
-
|
|
462
|
+
(
|
|
463
|
+
"Deepgram connection attempt {}/{} failed with an exception: {}. "
|
|
464
|
+
"runtime_diagnostics={}"
|
|
465
|
+
),
|
|
466
|
+
attempt + 1,
|
|
467
|
+
self._max_connect_retries,
|
|
468
|
+
start_error,
|
|
469
|
+
diagnostics,
|
|
276
470
|
)
|
|
277
471
|
start_result = False
|
|
278
472
|
else:
|
|
279
473
|
if start_result:
|
|
280
|
-
|
|
474
|
+
elapsed_ms = round((time.perf_counter() - attempt_started) * 1000, 2)
|
|
475
|
+
diagnostics = _collect_runtime_diagnostics(
|
|
476
|
+
loop,
|
|
477
|
+
extra_context={
|
|
478
|
+
"dns_ms": dns_ms,
|
|
479
|
+
"connect_duration_ms": elapsed_ms,
|
|
480
|
+
},
|
|
481
|
+
context_provider=self._diagnostics_context_provider,
|
|
482
|
+
)
|
|
483
|
+
self.logger.info(
|
|
484
|
+
(
|
|
485
|
+
"Successfully connected to Deepgram on attempt {} in {:.2f} ms. "
|
|
486
|
+
"runtime_diagnostics={}"
|
|
487
|
+
),
|
|
488
|
+
attempt + 1,
|
|
489
|
+
elapsed_ms,
|
|
490
|
+
diagnostics,
|
|
491
|
+
)
|
|
281
492
|
return # Exit the method on success
|
|
282
493
|
|
|
283
494
|
self.logger.warning(
|
|
@@ -285,8 +496,24 @@ class DeepgramSTTService(STTService):
|
|
|
285
496
|
)
|
|
286
497
|
|
|
287
498
|
except Exception as e:
|
|
499
|
+
elapsed_ms = round((time.perf_counter() - attempt_started) * 1000, 2)
|
|
500
|
+
diagnostics = _collect_runtime_diagnostics(
|
|
501
|
+
loop,
|
|
502
|
+
extra_context={
|
|
503
|
+
"dns_ms": dns_ms,
|
|
504
|
+
"connect_duration_ms": elapsed_ms,
|
|
505
|
+
},
|
|
506
|
+
context_provider=self._diagnostics_context_provider,
|
|
507
|
+
)
|
|
288
508
|
self.logger.warning(
|
|
289
|
-
|
|
509
|
+
(
|
|
510
|
+
"Deepgram connection attempt {}/{} failed with an exception: {}. "
|
|
511
|
+
"runtime_diagnostics={}"
|
|
512
|
+
),
|
|
513
|
+
attempt + 1,
|
|
514
|
+
self._max_connect_retries,
|
|
515
|
+
e,
|
|
516
|
+
diagnostics,
|
|
290
517
|
)
|
|
291
518
|
|
|
292
519
|
# If this is not the last attempt, wait 100ms before retrying
|
|
@@ -300,6 +527,22 @@ class DeepgramSTTService(STTService):
|
|
|
300
527
|
self.logger.error(error_msg)
|
|
301
528
|
await self.push_error(ErrorFrame(error_msg, fatal=True))
|
|
302
529
|
|
|
530
|
+
async def _measure_dns_resolution(self, loop: Optional[asyncio.AbstractEventLoop]):
|
|
531
|
+
if not loop or not self._connect_host:
|
|
532
|
+
return None
|
|
533
|
+
try:
|
|
534
|
+
dns_task = loop.getaddrinfo(
|
|
535
|
+
self._connect_host,
|
|
536
|
+
self._connect_port,
|
|
537
|
+
type=socket.SOCK_STREAM,
|
|
538
|
+
proto=socket.IPPROTO_TCP,
|
|
539
|
+
)
|
|
540
|
+
start = time.perf_counter()
|
|
541
|
+
await asyncio.wait_for(dns_task, timeout=1.0)
|
|
542
|
+
return round((time.perf_counter() - start) * 1000, 2)
|
|
543
|
+
except Exception:
|
|
544
|
+
return None
|
|
545
|
+
|
|
303
546
|
async def _disconnect(self):
|
|
304
547
|
# Guard against missing connection instance and ensure proper async check
|
|
305
548
|
connection: AsyncListenWebSocketClient = getattr(self, "_connection", None)
|
|
File without changes
|
{dv_pipecat_ai-0.0.85.dev848.dist-info → dv_pipecat_ai-0.0.85.dev850.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{dv_pipecat_ai-0.0.85.dev848.dist-info → dv_pipecat_ai-0.0.85.dev850.dist-info}/top_level.txt
RENAMED
|
File without changes
|