dv-pipecat-ai 0.0.85.dev848__py3-none-any.whl → 0.0.85.dev851__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.85.dev848.dist-info → dv_pipecat_ai-0.0.85.dev851.dist-info}/METADATA +1 -1
- {dv_pipecat_ai-0.0.85.dev848.dist-info → dv_pipecat_ai-0.0.85.dev851.dist-info}/RECORD +7 -7
- pipecat/services/deepgram/stt.py +248 -5
- pipecat/services/elevenlabs/stt.py +87 -30
- {dv_pipecat_ai-0.0.85.dev848.dist-info → dv_pipecat_ai-0.0.85.dev851.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.85.dev848.dist-info → dv_pipecat_ai-0.0.85.dev851.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.85.dev848.dist-info → dv_pipecat_ai-0.0.85.dev851.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
dv_pipecat_ai-0.0.85.
|
|
1
|
+
dv_pipecat_ai-0.0.85.dev851.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
|
|
2
2
|
pipecat/__init__.py,sha256=j0Xm6adxHhd7D06dIyyPV_GlBYLlBnTAERVvD_jAARQ,861
|
|
3
3
|
pipecat/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
pipecat/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -210,14 +210,14 @@ pipecat/services/cartesia/tts.py,sha256=I_OZCINywkDXmYzFL35MjSN8cAuNEaJs7nj0YB_o
|
|
|
210
210
|
pipecat/services/cerebras/__init__.py,sha256=5zBmqq9Zfcl-HC7ylekVS5qrRedbl1mAeEwUT-T-c_o,259
|
|
211
211
|
pipecat/services/cerebras/llm.py,sha256=-yzSe_6YDGigwzES-LZS4vNXMPugmvsIYEpTySyr5nA,3047
|
|
212
212
|
pipecat/services/deepgram/__init__.py,sha256=IjRtMI7WytRDdmYVpk2qDWClXUiNgdl7ZkvEAWg1eYE,304
|
|
213
|
-
pipecat/services/deepgram/stt.py,sha256=
|
|
213
|
+
pipecat/services/deepgram/stt.py,sha256=t7P0zWLBitSF_KQqHr5aYjKdJZRnC36styl_eL86R88,24752
|
|
214
214
|
pipecat/services/deepgram/tts.py,sha256=H_2WCJEx3_L4ytrHHRNkA-6GKTd1coou_vvTfiEodpQ,3745
|
|
215
215
|
pipecat/services/deepgram/flux/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
216
216
|
pipecat/services/deepgram/flux/stt.py,sha256=yCZodrHAOShgYy_GbdviX8iAuh36dBgDL41gHMXVxEM,25887
|
|
217
217
|
pipecat/services/deepseek/__init__.py,sha256=bU5z_oNGzgrF_YpsD9pYIMtEibeZFaUobbRjJ9WcYyE,259
|
|
218
218
|
pipecat/services/deepseek/llm.py,sha256=5KjpU2blmhUTM3LcRE1ymdsk6OmoFkIzeQgyNOGwQh8,3112
|
|
219
219
|
pipecat/services/elevenlabs/__init__.py,sha256=cMx5v0HEMh4WetMm5byR9tIjG6_wNVs9UxqWyB3tjlM,313
|
|
220
|
-
pipecat/services/elevenlabs/stt.py,sha256=
|
|
220
|
+
pipecat/services/elevenlabs/stt.py,sha256=ZOVDJo3cG-f3ZugBIdxR5jrxJFtbfmDAP8Ps_KLyOgs,30117
|
|
221
221
|
pipecat/services/elevenlabs/tts.py,sha256=skUndgUatx2F5rjg2tBZLutB8k9B9Cjy-cUeglCDdwc,45314
|
|
222
222
|
pipecat/services/fal/__init__.py,sha256=z_kfZETvUcKy68Lyvni4B-RtdkOvz3J3eh6sFDVKq6M,278
|
|
223
223
|
pipecat/services/fal/image.py,sha256=vArKLKrIGoZfw_xeZY_E7zbUzfzVsScj-R7mOmVqjRQ,4585
|
|
@@ -416,7 +416,7 @@ pipecat/utils/tracing/service_decorators.py,sha256=fwzxFpi8DJl6BJbK74G0UEB4ccMJg
|
|
|
416
416
|
pipecat/utils/tracing/setup.py,sha256=7TEgPNpq6M8lww8OQvf0P9FzYc5A30xICGklVA-fua0,2892
|
|
417
417
|
pipecat/utils/tracing/turn_context_provider.py,sha256=ikon3plFOx0XbMrH6DdeHttNpb-U0gzMZIm3bWLc9eI,2485
|
|
418
418
|
pipecat/utils/tracing/turn_trace_observer.py,sha256=dma16SBJpYSOE58YDWy89QzHyQFc_9gQZszKeWixuwc,9725
|
|
419
|
-
dv_pipecat_ai-0.0.85.
|
|
420
|
-
dv_pipecat_ai-0.0.85.
|
|
421
|
-
dv_pipecat_ai-0.0.85.
|
|
422
|
-
dv_pipecat_ai-0.0.85.
|
|
419
|
+
dv_pipecat_ai-0.0.85.dev851.dist-info/METADATA,sha256=lmgj2aZSwfm8h9V1nljEVf_41rQpqAp-13HAtCuXiMw,32955
|
|
420
|
+
dv_pipecat_ai-0.0.85.dev851.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
421
|
+
dv_pipecat_ai-0.0.85.dev851.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
|
|
422
|
+
dv_pipecat_ai-0.0.85.dev851.dist-info/RECORD,,
|
pipecat/services/deepgram/stt.py
CHANGED
|
@@ -8,7 +8,11 @@
|
|
|
8
8
|
|
|
9
9
|
import asyncio
|
|
10
10
|
import logging
|
|
11
|
-
|
|
11
|
+
import os
|
|
12
|
+
import socket
|
|
13
|
+
import time
|
|
14
|
+
from typing import AsyncGenerator, Callable, Dict, Optional
|
|
15
|
+
from urllib.parse import urlparse
|
|
12
16
|
|
|
13
17
|
from loguru import logger
|
|
14
18
|
|
|
@@ -29,6 +33,155 @@ from pipecat.transcriptions.language import Language
|
|
|
29
33
|
from pipecat.utils.time import time_now_iso8601
|
|
30
34
|
from pipecat.utils.tracing.service_decorators import traced_stt
|
|
31
35
|
|
|
36
|
+
_PROCESS_START_MONOTONIC = time.monotonic()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _read_first_numeric_file(paths):
|
|
40
|
+
for path in paths:
|
|
41
|
+
try:
|
|
42
|
+
with open(path, "r", encoding="utf-8") as file:
|
|
43
|
+
value = file.read().strip()
|
|
44
|
+
except FileNotFoundError:
|
|
45
|
+
continue
|
|
46
|
+
except OSError:
|
|
47
|
+
continue
|
|
48
|
+
|
|
49
|
+
if not value or value == "max":
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
return int(value)
|
|
54
|
+
except ValueError:
|
|
55
|
+
continue
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _read_proc_status_value(key):
|
|
60
|
+
try:
|
|
61
|
+
with open("/proc/self/status", "r", encoding="utf-8") as status_file:
|
|
62
|
+
for line in status_file:
|
|
63
|
+
if line.startswith(key):
|
|
64
|
+
parts = line.split()
|
|
65
|
+
if len(parts) >= 2:
|
|
66
|
+
return int(parts[1]) * 1024 # kB -> bytes
|
|
67
|
+
except FileNotFoundError:
|
|
68
|
+
return None
|
|
69
|
+
except OSError:
|
|
70
|
+
return None
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _read_cpu_throttling():
|
|
75
|
+
paths = ["/sys/fs/cgroup/cpu.stat", "/sys/fs/cgroup/cpu/cpu.stat"]
|
|
76
|
+
for path in paths:
|
|
77
|
+
try:
|
|
78
|
+
with open(path, "r", encoding="utf-8") as cpu_file:
|
|
79
|
+
for line in cpu_file:
|
|
80
|
+
if line.startswith("nr_throttled"):
|
|
81
|
+
parts = line.split()
|
|
82
|
+
if len(parts) >= 2:
|
|
83
|
+
return int(parts[1])
|
|
84
|
+
except FileNotFoundError:
|
|
85
|
+
continue
|
|
86
|
+
except OSError:
|
|
87
|
+
continue
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _collect_runtime_diagnostics(
|
|
92
|
+
loop: Optional[asyncio.AbstractEventLoop] = None,
|
|
93
|
+
extra_context: Optional[Dict] = None,
|
|
94
|
+
context_provider: Optional[Callable[[], Dict]] = None,
|
|
95
|
+
):
|
|
96
|
+
if loop is None:
|
|
97
|
+
try:
|
|
98
|
+
loop = asyncio.get_running_loop()
|
|
99
|
+
except RuntimeError:
|
|
100
|
+
loop = None
|
|
101
|
+
|
|
102
|
+
uptime_s = round(time.monotonic() - _PROCESS_START_MONOTONIC, 1)
|
|
103
|
+
rss_bytes = _read_proc_status_value("VmRSS:")
|
|
104
|
+
rss_mb = round(rss_bytes / (1024**2), 2) if rss_bytes else None
|
|
105
|
+
|
|
106
|
+
cgroup_usage_bytes = _read_first_numeric_file(
|
|
107
|
+
["/sys/fs/cgroup/memory.current", "/sys/fs/cgroup/memory/memory.usage_in_bytes"]
|
|
108
|
+
)
|
|
109
|
+
cgroup_limit_bytes = _read_first_numeric_file(
|
|
110
|
+
["/sys/fs/cgroup/memory.max", "/sys/fs/cgroup/memory/memory.limit_in_bytes"]
|
|
111
|
+
)
|
|
112
|
+
cgroup_usage_mb = (
|
|
113
|
+
round(cgroup_usage_bytes / (1024**2), 2) if cgroup_usage_bytes is not None else None
|
|
114
|
+
)
|
|
115
|
+
cgroup_limit_mb = (
|
|
116
|
+
round(cgroup_limit_bytes / (1024**2), 2) if cgroup_limit_bytes not in (None, 0) else None
|
|
117
|
+
)
|
|
118
|
+
cgroup_pct = (
|
|
119
|
+
round(cgroup_usage_bytes / cgroup_limit_bytes * 100, 2)
|
|
120
|
+
if cgroup_usage_bytes is not None and cgroup_limit_bytes not in (None, 0)
|
|
121
|
+
else None
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
try:
|
|
125
|
+
open_fds = len(os.listdir("/proc/self/fd"))
|
|
126
|
+
except Exception:
|
|
127
|
+
open_fds = None
|
|
128
|
+
|
|
129
|
+
pending_tasks = None
|
|
130
|
+
if loop:
|
|
131
|
+
try:
|
|
132
|
+
pending_tasks = len(asyncio.all_tasks(loop))
|
|
133
|
+
except Exception:
|
|
134
|
+
pending_tasks = None
|
|
135
|
+
|
|
136
|
+
suspected_cause = "unknown"
|
|
137
|
+
if cgroup_pct and cgroup_pct >= 90:
|
|
138
|
+
suspected_cause = "memory_pressure"
|
|
139
|
+
elif uptime_s < 180:
|
|
140
|
+
suspected_cause = "pod_cold_start"
|
|
141
|
+
|
|
142
|
+
diagnostics = {
|
|
143
|
+
"uptime_s": uptime_s,
|
|
144
|
+
"rss_mb": rss_mb,
|
|
145
|
+
"cgroup_usage_mb": cgroup_usage_mb,
|
|
146
|
+
"cgroup_limit_mb": cgroup_limit_mb,
|
|
147
|
+
"cgroup_usage_pct": cgroup_pct,
|
|
148
|
+
"open_fds": open_fds,
|
|
149
|
+
"pending_tasks": pending_tasks,
|
|
150
|
+
"suspected_cause": suspected_cause,
|
|
151
|
+
}
|
|
152
|
+
cpu_throttled = _read_cpu_throttling()
|
|
153
|
+
if cpu_throttled is not None:
|
|
154
|
+
diagnostics["cpu_nr_throttled"] = cpu_throttled
|
|
155
|
+
|
|
156
|
+
if context_provider:
|
|
157
|
+
try:
|
|
158
|
+
ctx = context_provider() or {}
|
|
159
|
+
if isinstance(ctx, dict):
|
|
160
|
+
diagnostics.update({k: v for k, v in ctx.items() if v is not None})
|
|
161
|
+
except Exception as exc:
|
|
162
|
+
diagnostics["context_provider_error"] = str(exc)
|
|
163
|
+
|
|
164
|
+
if extra_context:
|
|
165
|
+
diagnostics.update({k: v for k, v in extra_context.items() if v is not None})
|
|
166
|
+
|
|
167
|
+
return {k: v for k, v in diagnostics.items() if v is not None}
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _derive_connect_endpoint(base_url: str):
|
|
171
|
+
if not base_url:
|
|
172
|
+
return "api.deepgram.com", 443
|
|
173
|
+
|
|
174
|
+
parsed = urlparse(base_url)
|
|
175
|
+
host = parsed.hostname or "api.deepgram.com"
|
|
176
|
+
if parsed.port:
|
|
177
|
+
port = parsed.port
|
|
178
|
+
elif parsed.scheme in ("https", "wss"):
|
|
179
|
+
port = 443
|
|
180
|
+
else:
|
|
181
|
+
port = 80
|
|
182
|
+
return host, port
|
|
183
|
+
|
|
184
|
+
|
|
32
185
|
try:
|
|
33
186
|
from deepgram import (
|
|
34
187
|
AsyncListenWebSocketClient,
|
|
@@ -64,6 +217,7 @@ class DeepgramSTTService(STTService):
|
|
|
64
217
|
addons: Optional[Dict] = None,
|
|
65
218
|
max_connect_retries: int = 3,
|
|
66
219
|
connect_timeout_s: float = 2.5,
|
|
220
|
+
diagnostics_context_provider: Optional[Callable[[], Dict]] = None,
|
|
67
221
|
**kwargs,
|
|
68
222
|
):
|
|
69
223
|
"""Initialize the Deepgram STT service.
|
|
@@ -82,6 +236,9 @@ class DeepgramSTTService(STTService):
|
|
|
82
236
|
max_connect_retries: Maximum number of connection attempts before giving up.
|
|
83
237
|
connect_timeout_s: Maximum time in seconds to wait for a connection attempt.
|
|
84
238
|
Connection retries wait 100ms between attempts.
|
|
239
|
+
diagnostics_context_provider: Optional callable returning a dict with
|
|
240
|
+
additional runtime diagnostics (e.g., active call counts) to append
|
|
241
|
+
to warning logs.
|
|
85
242
|
**kwargs: Additional arguments passed to the parent STTService.
|
|
86
243
|
"""
|
|
87
244
|
sample_rate = sample_rate or (live_options.sample_rate if live_options else None)
|
|
@@ -125,6 +282,7 @@ class DeepgramSTTService(STTService):
|
|
|
125
282
|
self.set_model_name(merged_options["model"])
|
|
126
283
|
self._settings = merged_options
|
|
127
284
|
self._addons = addons
|
|
285
|
+
self._diagnostics_context_provider = diagnostics_context_provider
|
|
128
286
|
|
|
129
287
|
# Connection retry settings (100ms delay between retries)
|
|
130
288
|
self._max_connect_retries = max_connect_retries
|
|
@@ -142,6 +300,7 @@ class DeepgramSTTService(STTService):
|
|
|
142
300
|
verbose=logging.ERROR, # Enable error level and above logging
|
|
143
301
|
),
|
|
144
302
|
)
|
|
303
|
+
self._connect_host, self._connect_port = _derive_connect_endpoint(base_url)
|
|
145
304
|
|
|
146
305
|
if self.vad_enabled:
|
|
147
306
|
self._register_event_handler("on_speech_started")
|
|
@@ -230,7 +389,10 @@ class DeepgramSTTService(STTService):
|
|
|
230
389
|
async def _connect(self):
|
|
231
390
|
self.logger.debug("Attempting to connect to Deepgram...")
|
|
232
391
|
|
|
392
|
+
loop = asyncio.get_running_loop()
|
|
233
393
|
for attempt in range(self._max_connect_retries):
|
|
394
|
+
attempt_started = time.perf_counter()
|
|
395
|
+
dns_ms = await self._measure_dns_resolution(loop)
|
|
234
396
|
try:
|
|
235
397
|
# Clean up any previous connection attempt in background (non-blocking)
|
|
236
398
|
if hasattr(self, "_connection") and self._connection is not None:
|
|
@@ -266,18 +428,67 @@ class DeepgramSTTService(STTService):
|
|
|
266
428
|
timeout=self._connect_timeout_s,
|
|
267
429
|
)
|
|
268
430
|
except asyncio.TimeoutError:
|
|
431
|
+
elapsed_ms = round((time.perf_counter() - attempt_started) * 1000, 2)
|
|
432
|
+
diagnostics = _collect_runtime_diagnostics(
|
|
433
|
+
loop,
|
|
434
|
+
extra_context={
|
|
435
|
+
"dns_ms": dns_ms,
|
|
436
|
+
"connect_duration_ms": elapsed_ms,
|
|
437
|
+
},
|
|
438
|
+
context_provider=self._diagnostics_context_provider,
|
|
439
|
+
)
|
|
269
440
|
self.logger.warning(
|
|
270
|
-
|
|
441
|
+
(
|
|
442
|
+
"Deepgram connection attempt {}/{} timed out after {:.2f} second(s). "
|
|
443
|
+
"runtime_diagnostics={}"
|
|
444
|
+
),
|
|
445
|
+
attempt + 1,
|
|
446
|
+
self._max_connect_retries,
|
|
447
|
+
self._connect_timeout_s,
|
|
448
|
+
diagnostics,
|
|
271
449
|
)
|
|
272
450
|
start_result = False
|
|
273
451
|
except Exception as start_error:
|
|
452
|
+
elapsed_ms = round((time.perf_counter() - attempt_started) * 1000, 2)
|
|
453
|
+
diagnostics = _collect_runtime_diagnostics(
|
|
454
|
+
loop,
|
|
455
|
+
extra_context={
|
|
456
|
+
"dns_ms": dns_ms,
|
|
457
|
+
"connect_duration_ms": elapsed_ms,
|
|
458
|
+
},
|
|
459
|
+
context_provider=self._diagnostics_context_provider,
|
|
460
|
+
)
|
|
274
461
|
self.logger.warning(
|
|
275
|
-
|
|
462
|
+
(
|
|
463
|
+
"Deepgram connection attempt {}/{} failed with an exception: {}. "
|
|
464
|
+
"runtime_diagnostics={}"
|
|
465
|
+
),
|
|
466
|
+
attempt + 1,
|
|
467
|
+
self._max_connect_retries,
|
|
468
|
+
start_error,
|
|
469
|
+
diagnostics,
|
|
276
470
|
)
|
|
277
471
|
start_result = False
|
|
278
472
|
else:
|
|
279
473
|
if start_result:
|
|
280
|
-
|
|
474
|
+
elapsed_ms = round((time.perf_counter() - attempt_started) * 1000, 2)
|
|
475
|
+
diagnostics = _collect_runtime_diagnostics(
|
|
476
|
+
loop,
|
|
477
|
+
extra_context={
|
|
478
|
+
"dns_ms": dns_ms,
|
|
479
|
+
"connect_duration_ms": elapsed_ms,
|
|
480
|
+
},
|
|
481
|
+
context_provider=self._diagnostics_context_provider,
|
|
482
|
+
)
|
|
483
|
+
self.logger.info(
|
|
484
|
+
(
|
|
485
|
+
"Successfully connected to Deepgram on attempt {} in {:.2f} ms. "
|
|
486
|
+
"runtime_diagnostics={}"
|
|
487
|
+
),
|
|
488
|
+
attempt + 1,
|
|
489
|
+
elapsed_ms,
|
|
490
|
+
diagnostics,
|
|
491
|
+
)
|
|
281
492
|
return # Exit the method on success
|
|
282
493
|
|
|
283
494
|
self.logger.warning(
|
|
@@ -285,8 +496,24 @@ class DeepgramSTTService(STTService):
|
|
|
285
496
|
)
|
|
286
497
|
|
|
287
498
|
except Exception as e:
|
|
499
|
+
elapsed_ms = round((time.perf_counter() - attempt_started) * 1000, 2)
|
|
500
|
+
diagnostics = _collect_runtime_diagnostics(
|
|
501
|
+
loop,
|
|
502
|
+
extra_context={
|
|
503
|
+
"dns_ms": dns_ms,
|
|
504
|
+
"connect_duration_ms": elapsed_ms,
|
|
505
|
+
},
|
|
506
|
+
context_provider=self._diagnostics_context_provider,
|
|
507
|
+
)
|
|
288
508
|
self.logger.warning(
|
|
289
|
-
|
|
509
|
+
(
|
|
510
|
+
"Deepgram connection attempt {}/{} failed with an exception: {}. "
|
|
511
|
+
"runtime_diagnostics={}"
|
|
512
|
+
),
|
|
513
|
+
attempt + 1,
|
|
514
|
+
self._max_connect_retries,
|
|
515
|
+
e,
|
|
516
|
+
diagnostics,
|
|
290
517
|
)
|
|
291
518
|
|
|
292
519
|
# If this is not the last attempt, wait 100ms before retrying
|
|
@@ -300,6 +527,22 @@ class DeepgramSTTService(STTService):
|
|
|
300
527
|
self.logger.error(error_msg)
|
|
301
528
|
await self.push_error(ErrorFrame(error_msg, fatal=True))
|
|
302
529
|
|
|
530
|
+
async def _measure_dns_resolution(self, loop: Optional[asyncio.AbstractEventLoop]):
|
|
531
|
+
if not loop or not self._connect_host:
|
|
532
|
+
return None
|
|
533
|
+
try:
|
|
534
|
+
dns_task = loop.getaddrinfo(
|
|
535
|
+
self._connect_host,
|
|
536
|
+
self._connect_port,
|
|
537
|
+
type=socket.SOCK_STREAM,
|
|
538
|
+
proto=socket.IPPROTO_TCP,
|
|
539
|
+
)
|
|
540
|
+
start = time.perf_counter()
|
|
541
|
+
await asyncio.wait_for(dns_task, timeout=1.0)
|
|
542
|
+
return round((time.perf_counter() - start) * 1000, 2)
|
|
543
|
+
except Exception:
|
|
544
|
+
return None
|
|
545
|
+
|
|
303
546
|
async def _disconnect(self):
|
|
304
547
|
# Guard against missing connection instance and ensure proper async check
|
|
305
548
|
connection: AsyncListenWebSocketClient = getattr(self, "_connection", None)
|
|
@@ -159,10 +159,16 @@ def language_to_elevenlabs_language(language: Language) -> Optional[str]:
|
|
|
159
159
|
result = BASE_LANGUAGES.get(language)
|
|
160
160
|
|
|
161
161
|
# If not found in base languages, try to find the base language from a variant
|
|
162
|
+
# For example, Language.EN_US (value "en-US") -> Language("en") -> "eng"
|
|
162
163
|
if not result:
|
|
163
164
|
lang_str = str(language.value)
|
|
164
|
-
base_code = lang_str.split("-")[0]
|
|
165
|
-
|
|
165
|
+
base_code = lang_str.split("-")[0] # Get "en" from "en-US"
|
|
166
|
+
try:
|
|
167
|
+
base_language = Language(base_code)
|
|
168
|
+
result = BASE_LANGUAGES.get(base_language)
|
|
169
|
+
except (ValueError, KeyError):
|
|
170
|
+
# If base language not found in Language enum, return None
|
|
171
|
+
result = None
|
|
166
172
|
|
|
167
173
|
return result
|
|
168
174
|
|
|
@@ -425,6 +431,7 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
|
|
|
425
431
|
self._pending_final_task: Optional[asyncio.Task] = None
|
|
426
432
|
self._timestamp_merge_delay_s = 0.25
|
|
427
433
|
self._ttfb_started = False
|
|
434
|
+
self._waiting_for_timestamps = False
|
|
428
435
|
|
|
429
436
|
@property
|
|
430
437
|
def commit_strategy(self) -> str:
|
|
@@ -474,7 +481,9 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
|
|
|
474
481
|
if isinstance(frame, UserStartedSpeakingFrame):
|
|
475
482
|
if frame.emulated:
|
|
476
483
|
return
|
|
477
|
-
|
|
484
|
+
# Start metrics and set flag to True so we can stop them later
|
|
485
|
+
await self.start_ttfb_metrics()
|
|
486
|
+
self._ttfb_started = True
|
|
478
487
|
await self.start_processing_metrics()
|
|
479
488
|
elif isinstance(frame, UserStoppedSpeakingFrame):
|
|
480
489
|
if frame.emulated:
|
|
@@ -488,18 +497,28 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
|
|
|
488
497
|
yield None
|
|
489
498
|
return
|
|
490
499
|
|
|
491
|
-
await self._ensure_connection()
|
|
500
|
+
if not await self._ensure_connection():
|
|
501
|
+
self.logger.error(f"{self} failed to establish connection, dropping audio")
|
|
502
|
+
yield None
|
|
503
|
+
return
|
|
504
|
+
|
|
492
505
|
await self._send_audio_chunk(audio)
|
|
493
506
|
yield None
|
|
494
507
|
|
|
495
|
-
async def _ensure_connection(self):
|
|
508
|
+
async def _ensure_connection(self) -> bool:
|
|
509
|
+
"""Ensure WebSocket connection is established and ready.
|
|
510
|
+
|
|
511
|
+
Returns:
|
|
512
|
+
bool: True if connection is ready, False otherwise.
|
|
513
|
+
"""
|
|
496
514
|
if not self._websocket or self._websocket.state is State.CLOSED:
|
|
497
515
|
await self._connect()
|
|
516
|
+
return self._websocket is not None and self._websocket.state is State.OPEN
|
|
498
517
|
|
|
499
518
|
async def _connect(self):
|
|
500
519
|
await self._connect_websocket()
|
|
501
|
-
if self._websocket and not self._receive_task:
|
|
502
|
-
self._receive_task =
|
|
520
|
+
if self._websocket and self._websocket.state is State.OPEN and not self._receive_task:
|
|
521
|
+
self._receive_task = self.create_task(self._receive_task_handler(self._report_error))
|
|
503
522
|
|
|
504
523
|
async def _disconnect(self):
|
|
505
524
|
if self._receive_task:
|
|
@@ -512,23 +531,30 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
|
|
|
512
531
|
async def _connect_websocket(self):
|
|
513
532
|
try:
|
|
514
533
|
if self._websocket and self._websocket.state is State.OPEN:
|
|
534
|
+
self.logger.debug(f"{self} already connected, skipping reconnection")
|
|
515
535
|
return
|
|
516
536
|
|
|
517
537
|
ws_url = self._build_websocket_url()
|
|
518
538
|
headers = {"xi-api-key": self._api_key}
|
|
519
|
-
self.logger.
|
|
539
|
+
self.logger.info(f"{self} connecting to ElevenLabs realtime STT (WebSocket URL built)")
|
|
520
540
|
self._websocket = await websocket_connect(ws_url, additional_headers=headers)
|
|
541
|
+
self.logger.info(f"{self} successfully connected to ElevenLabs realtime STT")
|
|
521
542
|
await self._call_event_handler("on_connected")
|
|
522
543
|
except Exception as e:
|
|
523
544
|
self.logger.error(f"{self} unable to connect to ElevenLabs realtime STT: {e}")
|
|
524
545
|
self._websocket = None
|
|
546
|
+
if self._receive_task:
|
|
547
|
+
await self.cancel_task(self._receive_task)
|
|
548
|
+
self._receive_task = None
|
|
549
|
+
# Push error to pipeline so callers know the connection failed
|
|
550
|
+
await self.push_error(ErrorFrame(f"ElevenLabs connection failed: {e}", fatal=False))
|
|
525
551
|
await self._call_event_handler("on_connection_error", f"{e}")
|
|
526
552
|
|
|
527
553
|
async def _disconnect_websocket(self):
|
|
528
554
|
try:
|
|
529
555
|
await self.stop_all_metrics()
|
|
530
556
|
if self._websocket and self._websocket.state is State.OPEN:
|
|
531
|
-
self.logger.debug("
|
|
557
|
+
self.logger.debug(f"{self} disconnecting from ElevenLabs realtime STT")
|
|
532
558
|
await self._websocket.close()
|
|
533
559
|
except Exception as e:
|
|
534
560
|
self.logger.error(f"{self} error closing ElevenLabs realtime websocket: {e}")
|
|
@@ -573,6 +599,10 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
|
|
|
573
599
|
}:
|
|
574
600
|
fatal = message_type in {"auth_error", "quota_exceeded", "error"}
|
|
575
601
|
description = data.get("error", data)
|
|
602
|
+
# Log full error details for debugging
|
|
603
|
+
self.logger.error(
|
|
604
|
+
f"{self} ElevenLabs error - Type: {message_type}, Fatal: {fatal}, Full data: {data}"
|
|
605
|
+
)
|
|
576
606
|
await self.push_error(
|
|
577
607
|
ErrorFrame(f"ElevenLabs realtime error: {description}", fatal=fatal)
|
|
578
608
|
)
|
|
@@ -588,7 +618,11 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
|
|
|
588
618
|
elevenlabs_language_code_to_language(data.get("language_code"))
|
|
589
619
|
or self._language_override
|
|
590
620
|
)
|
|
591
|
-
|
|
621
|
+
|
|
622
|
+
# Only stop TTFB metrics on first partial
|
|
623
|
+
if self._ttfb_started:
|
|
624
|
+
await self.stop_ttfb_metrics()
|
|
625
|
+
self._ttfb_started = False
|
|
592
626
|
|
|
593
627
|
await self.push_frame(
|
|
594
628
|
InterimTranscriptionFrame(
|
|
@@ -604,8 +638,10 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
|
|
|
604
638
|
if self._pending_final_message:
|
|
605
639
|
await self._emit_transcription(self._pending_final_message)
|
|
606
640
|
self._pending_final_message = None
|
|
641
|
+
self._waiting_for_timestamps = False
|
|
607
642
|
|
|
608
643
|
self._pending_final_message = data
|
|
644
|
+
self._waiting_for_timestamps = True
|
|
609
645
|
await self._schedule_pending_final_emit()
|
|
610
646
|
|
|
611
647
|
async def _handle_committed_transcript_with_timestamps(self, data: Dict[str, Any]):
|
|
@@ -613,12 +649,16 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
|
|
|
613
649
|
merged = {**self._pending_final_message, **data}
|
|
614
650
|
await self._emit_transcription(merged)
|
|
615
651
|
await self._clear_pending_final()
|
|
652
|
+
elif self._waiting_for_timestamps:
|
|
653
|
+
# Late arrival after timeout - don't emit duplicate
|
|
654
|
+
self.logger.warning(f"{self} timestamps arrived after timeout, skipping duplicate")
|
|
655
|
+
self._waiting_for_timestamps = False
|
|
616
656
|
else:
|
|
617
657
|
await self._emit_transcription(data)
|
|
618
658
|
|
|
619
659
|
async def _schedule_pending_final_emit(self):
|
|
620
660
|
await self._clear_pending_final(timer_only=True)
|
|
621
|
-
self._pending_final_task =
|
|
661
|
+
self._pending_final_task = self.create_task(self._emit_pending_after_delay())
|
|
622
662
|
|
|
623
663
|
async def _emit_pending_after_delay(self):
|
|
624
664
|
try:
|
|
@@ -626,6 +666,7 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
|
|
|
626
666
|
if self._pending_final_message:
|
|
627
667
|
await self._emit_transcription(self._pending_final_message)
|
|
628
668
|
self._pending_final_message = None
|
|
669
|
+
self._waiting_for_timestamps = False
|
|
629
670
|
except asyncio.CancelledError:
|
|
630
671
|
pass
|
|
631
672
|
finally:
|
|
@@ -638,6 +679,7 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
|
|
|
638
679
|
|
|
639
680
|
if not timer_only:
|
|
640
681
|
self._pending_final_message = None
|
|
682
|
+
self._waiting_for_timestamps = False
|
|
641
683
|
|
|
642
684
|
async def _emit_transcription(self, data: Dict[str, Any]):
|
|
643
685
|
text = (data.get("text") or data.get("transcript") or "").strip()
|
|
@@ -648,7 +690,11 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
|
|
|
648
690
|
elevenlabs_language_code_to_language(data.get("language_code"))
|
|
649
691
|
or self._language_override
|
|
650
692
|
)
|
|
651
|
-
|
|
693
|
+
|
|
694
|
+
# TTFB should already be stopped by partial, but guard just in case
|
|
695
|
+
if self._ttfb_started:
|
|
696
|
+
await self.stop_ttfb_metrics()
|
|
697
|
+
self._ttfb_started = False
|
|
652
698
|
|
|
653
699
|
frame = TranscriptionFrame(
|
|
654
700
|
text,
|
|
@@ -666,28 +712,39 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
|
|
|
666
712
|
if not audio or not self._websocket:
|
|
667
713
|
return
|
|
668
714
|
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
715
|
+
try:
|
|
716
|
+
payload = {
|
|
717
|
+
"message_type": "input_audio_chunk",
|
|
718
|
+
"audio_base_64": base64.b64encode(audio).decode("ascii"),
|
|
719
|
+
"commit": False,
|
|
720
|
+
"sample_rate": self.sample_rate,
|
|
721
|
+
}
|
|
722
|
+
await self._websocket.send(json.dumps(payload))
|
|
723
|
+
except Exception as e:
|
|
724
|
+
self.logger.error(f"{self} error sending audio chunk: {e}")
|
|
725
|
+
await self.push_error(ErrorFrame(f"Failed to send audio: {e}"))
|
|
726
|
+
# Trigger reconnection
|
|
727
|
+
await self._disconnect()
|
|
728
|
+
await self._connect()
|
|
680
729
|
|
|
681
730
|
async def _send_commit(self):
|
|
682
731
|
if not self._websocket:
|
|
683
732
|
return
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
733
|
+
|
|
734
|
+
try:
|
|
735
|
+
payload = {
|
|
736
|
+
"message_type": "input_audio_chunk",
|
|
737
|
+
"audio_base_64": "",
|
|
738
|
+
"commit": True,
|
|
739
|
+
"sample_rate": self.sample_rate,
|
|
740
|
+
}
|
|
741
|
+
await self._websocket.send(json.dumps(payload))
|
|
742
|
+
except Exception as e:
|
|
743
|
+
self.logger.error(f"{self} error sending commit: {e}")
|
|
744
|
+
await self.push_error(ErrorFrame(f"Failed to send commit: {e}"))
|
|
745
|
+
# Trigger reconnection
|
|
746
|
+
await self._disconnect()
|
|
747
|
+
await self._connect()
|
|
691
748
|
|
|
692
749
|
def _build_websocket_url(self) -> str:
|
|
693
750
|
if not self.sample_rate:
|
|
File without changes
|
{dv_pipecat_ai-0.0.85.dev848.dist-info → dv_pipecat_ai-0.0.85.dev851.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{dv_pipecat_ai-0.0.85.dev848.dist-info → dv_pipecat_ai-0.0.85.dev851.dist-info}/top_level.txt
RENAMED
|
File without changes
|