livekit-plugins-deepgram 0.3.dev0__tar.gz → 0.4.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/PKG-INFO +3 -3
- livekit_plugins_deepgram-0.4.dev0/livekit/plugins/deepgram/log.py +3 -0
- {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/livekit/plugins/deepgram/stt.py +55 -58
- {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/livekit/plugins/deepgram/version.py +1 -1
- {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/livekit_plugins_deepgram.egg-info/PKG-INFO +3 -3
- {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/livekit_plugins_deepgram.egg-info/SOURCES.txt +1 -0
- livekit_plugins_deepgram-0.4.dev0/livekit_plugins_deepgram.egg-info/requires.txt +3 -0
- {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/setup.py +2 -2
- livekit-plugins-deepgram-0.3.dev0/livekit_plugins_deepgram.egg-info/requires.txt +0 -3
- {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/README.md +0 -0
- {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/livekit/plugins/deepgram/__init__.py +0 -0
- {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/livekit/plugins/deepgram/models.py +0 -0
- {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/livekit/plugins/deepgram/py.typed +0 -0
- {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/livekit_plugins_deepgram.egg-info/dependency_links.txt +0 -0
- {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/livekit_plugins_deepgram.egg-info/top_level.txt +0 -0
- {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/pyproject.toml +0 -0
- {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: livekit-plugins-deepgram
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.dev0
|
|
4
4
|
Summary: Agent Framework plugin for services using DeepGram's API.
|
|
5
5
|
Home-page: https://github.com/livekit/agents
|
|
6
6
|
License: Apache-2.0
|
|
@@ -19,8 +19,8 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
19
19
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
20
20
|
Requires-Python: >=3.9.0
|
|
21
21
|
Description-Content-Type: text/markdown
|
|
22
|
-
Requires-Dist: livekit~=0.
|
|
23
|
-
Requires-Dist: livekit-agents~=0.
|
|
22
|
+
Requires-Dist: livekit~=0.11
|
|
23
|
+
Requires-Dist: livekit-agents~=0.6.dev0
|
|
24
24
|
Requires-Dist: aiohttp>=3.7.4
|
|
25
25
|
|
|
26
26
|
# LiveKit Plugins DeepGram
|
|
@@ -18,7 +18,6 @@ import asyncio
|
|
|
18
18
|
import dataclasses
|
|
19
19
|
import io
|
|
20
20
|
import json
|
|
21
|
-
import logging
|
|
22
21
|
import os
|
|
23
22
|
import wave
|
|
24
23
|
from contextlib import suppress
|
|
@@ -31,6 +30,7 @@ from livekit import rtc
|
|
|
31
30
|
from livekit.agents import stt
|
|
32
31
|
from livekit.agents.utils import AudioBuffer, merge_frames
|
|
33
32
|
|
|
33
|
+
from .log import logger
|
|
34
34
|
from .models import DeepgramLanguages, DeepgramModels
|
|
35
35
|
|
|
36
36
|
|
|
@@ -56,7 +56,7 @@ class STT(stt.STT):
|
|
|
56
56
|
smart_format: bool = True,
|
|
57
57
|
model: DeepgramModels = "nova-2-general",
|
|
58
58
|
api_key: str | None = None,
|
|
59
|
-
min_silence_duration: int =
|
|
59
|
+
min_silence_duration: int = 0,
|
|
60
60
|
) -> None:
|
|
61
61
|
super().__init__(streaming_supported=True)
|
|
62
62
|
api_key = api_key or os.environ.get("DEEPGRAM_API_KEY")
|
|
@@ -64,7 +64,7 @@ class STT(stt.STT):
|
|
|
64
64
|
raise ValueError("Deepgram API key is required")
|
|
65
65
|
self._api_key = api_key
|
|
66
66
|
|
|
67
|
-
self.
|
|
67
|
+
self._opts = STTOptions(
|
|
68
68
|
language=language,
|
|
69
69
|
detect_language=detect_language,
|
|
70
70
|
interim_results=interim_results,
|
|
@@ -132,7 +132,7 @@ class STT(stt.STT):
|
|
|
132
132
|
*,
|
|
133
133
|
language: str | None = None,
|
|
134
134
|
) -> STTOptions:
|
|
135
|
-
config = dataclasses.replace(self.
|
|
135
|
+
config = dataclasses.replace(self._opts)
|
|
136
136
|
config.language = language or config.language
|
|
137
137
|
|
|
138
138
|
if config.detect_language:
|
|
@@ -147,7 +147,7 @@ class SpeechStream(stt.SpeechStream):
|
|
|
147
147
|
|
|
148
148
|
def __init__(
|
|
149
149
|
self,
|
|
150
|
-
|
|
150
|
+
opts: STTOptions,
|
|
151
151
|
api_key: str,
|
|
152
152
|
sample_rate: int = 16000,
|
|
153
153
|
num_channels: int = 1,
|
|
@@ -155,10 +155,10 @@ class SpeechStream(stt.SpeechStream):
|
|
|
155
155
|
) -> None:
|
|
156
156
|
super().__init__()
|
|
157
157
|
|
|
158
|
-
if
|
|
158
|
+
if opts.detect_language and opts.language is None:
|
|
159
159
|
raise ValueError("language detection is not supported in streaming mode")
|
|
160
160
|
|
|
161
|
-
self.
|
|
161
|
+
self._opts = opts
|
|
162
162
|
self._sample_rate = sample_rate
|
|
163
163
|
self._num_channels = num_channels
|
|
164
164
|
self._api_key = api_key
|
|
@@ -173,19 +173,13 @@ class SpeechStream(stt.SpeechStream):
|
|
|
173
173
|
# keep a list of final transcripts to combine them inside the END_OF_SPEECH event
|
|
174
174
|
self._final_events: List[stt.SpeechEvent] = []
|
|
175
175
|
|
|
176
|
-
def log_exception(task: asyncio.Task) -> None:
|
|
177
|
-
if not task.cancelled() and task.exception():
|
|
178
|
-
logging.error(f"deepgram task failed: {task.exception()}")
|
|
179
|
-
|
|
180
|
-
self._main_task.add_done_callback(log_exception)
|
|
181
|
-
|
|
182
176
|
def push_frame(self, frame: rtc.AudioFrame) -> None:
|
|
183
177
|
if self._closed:
|
|
184
178
|
raise ValueError("cannot push frame to closed stream")
|
|
185
179
|
|
|
186
180
|
self._queue.put_nowait(frame)
|
|
187
181
|
|
|
188
|
-
async def aclose(self, wait: bool = True) -> None:
|
|
182
|
+
async def aclose(self, *, wait: bool = True) -> None:
|
|
189
183
|
self._closed = True
|
|
190
184
|
self._queue.put_nowait(SpeechStream._CLOSE_MSG)
|
|
191
185
|
|
|
@@ -208,19 +202,19 @@ class SpeechStream(stt.SpeechStream):
|
|
|
208
202
|
while not self._closed:
|
|
209
203
|
try:
|
|
210
204
|
live_config = {
|
|
211
|
-
"model": self.
|
|
212
|
-
"punctuate": self.
|
|
213
|
-
"smart_format": self.
|
|
214
|
-
"interim_results": self.
|
|
205
|
+
"model": self._opts.model,
|
|
206
|
+
"punctuate": self._opts.punctuate,
|
|
207
|
+
"smart_format": self._opts.smart_format,
|
|
208
|
+
"interim_results": self._opts.interim_results,
|
|
215
209
|
"encoding": "linear16",
|
|
216
210
|
"sample_rate": self._sample_rate,
|
|
217
211
|
"vad_events": True,
|
|
218
212
|
"channels": self._num_channels,
|
|
219
|
-
"endpointing": self.
|
|
213
|
+
"endpointing": self._opts.endpointing,
|
|
220
214
|
}
|
|
221
215
|
|
|
222
|
-
if self.
|
|
223
|
-
live_config["language"] = self.
|
|
216
|
+
if self._opts.language:
|
|
217
|
+
live_config["language"] = self._opts.language
|
|
224
218
|
|
|
225
219
|
headers = {"Authorization": f"Token {self._api_key}"}
|
|
226
220
|
|
|
@@ -229,23 +223,23 @@ class SpeechStream(stt.SpeechStream):
|
|
|
229
223
|
retry_count = 0 # connected successfully, reset the retry_count
|
|
230
224
|
|
|
231
225
|
await self._run_ws(ws)
|
|
232
|
-
except Exception
|
|
226
|
+
except Exception:
|
|
233
227
|
# Something went wrong, retry the connection
|
|
234
228
|
if retry_count >= max_retry:
|
|
235
|
-
|
|
236
|
-
f"failed to connect to deepgram after {max_retry} tries"
|
|
237
|
-
exc_info=e,
|
|
229
|
+
logger.exception(
|
|
230
|
+
f"failed to connect to deepgram after {max_retry} tries"
|
|
238
231
|
)
|
|
239
232
|
break
|
|
240
233
|
|
|
241
234
|
retry_delay = min(retry_count * 2, 10) # max 10s
|
|
242
235
|
retry_count += 1 # increment after calculating the delay, the first retry should happen directly
|
|
243
236
|
|
|
244
|
-
|
|
245
|
-
f"deepgram connection failed, retrying in {retry_delay}s"
|
|
246
|
-
exc_info=e,
|
|
237
|
+
logger.warning(
|
|
238
|
+
f"deepgram connection failed, retrying in {retry_delay}s"
|
|
247
239
|
)
|
|
248
240
|
await asyncio.sleep(retry_delay)
|
|
241
|
+
except Exception:
|
|
242
|
+
logger.exception("deepgram task failed")
|
|
249
243
|
finally:
|
|
250
244
|
self._event_queue.put_nowait(None)
|
|
251
245
|
|
|
@@ -305,27 +299,26 @@ class SpeechStream(stt.SpeechStream):
|
|
|
305
299
|
) # this will trigger a reconnection, see the _run loop
|
|
306
300
|
|
|
307
301
|
if msg.type != aiohttp.WSMsgType.TEXT:
|
|
308
|
-
|
|
302
|
+
logger.warning("unexpected deepgram message type %s", msg.type)
|
|
309
303
|
continue
|
|
310
304
|
|
|
311
305
|
try:
|
|
312
306
|
# received a message from deepgram
|
|
313
307
|
data = json.loads(msg.data)
|
|
314
308
|
self._process_stream_event(data)
|
|
315
|
-
except Exception
|
|
316
|
-
|
|
309
|
+
except Exception:
|
|
310
|
+
logger.exception("failed to process deepgram message")
|
|
317
311
|
|
|
318
312
|
await asyncio.gather(send_task(), recv_task(), keepalive_task())
|
|
319
313
|
|
|
320
314
|
def _end_speech(self) -> None:
|
|
321
315
|
if not self._speaking:
|
|
322
|
-
|
|
316
|
+
logger.warning(
|
|
323
317
|
"trying to commit final events without being in the speaking state"
|
|
324
318
|
)
|
|
325
319
|
return
|
|
326
320
|
|
|
327
321
|
if len(self._final_events) == 0:
|
|
328
|
-
logging.warning("received end of speech without any final transcription")
|
|
329
322
|
return
|
|
330
323
|
|
|
331
324
|
self._speaking = False
|
|
@@ -333,9 +326,10 @@ class SpeechStream(stt.SpeechStream):
|
|
|
333
326
|
# combine all final transcripts since the start of the speech
|
|
334
327
|
sentence = ""
|
|
335
328
|
confidence = 0.0
|
|
336
|
-
for
|
|
337
|
-
|
|
338
|
-
|
|
329
|
+
for f in self._final_events:
|
|
330
|
+
alt = f.alternatives[0]
|
|
331
|
+
sentence += f"{alt.text.strip()} "
|
|
332
|
+
confidence += alt.confidence
|
|
339
333
|
|
|
340
334
|
sentence = sentence.rstrip()
|
|
341
335
|
confidence /= len(self._final_events) # avg. of confidence
|
|
@@ -344,7 +338,7 @@ class SpeechStream(stt.SpeechStream):
|
|
|
344
338
|
type=stt.SpeechEventType.END_OF_SPEECH,
|
|
345
339
|
alternatives=[
|
|
346
340
|
stt.SpeechData(
|
|
347
|
-
language=str(self.
|
|
341
|
+
language=str(self._opts.language),
|
|
348
342
|
start_time=self._final_events[0].alternatives[0].start_time,
|
|
349
343
|
end_time=self._final_events[-1].alternatives[0].end_time,
|
|
350
344
|
confidence=confidence,
|
|
@@ -356,7 +350,7 @@ class SpeechStream(stt.SpeechStream):
|
|
|
356
350
|
self._final_events = []
|
|
357
351
|
|
|
358
352
|
def _process_stream_event(self, data: dict) -> None:
|
|
359
|
-
assert self.
|
|
353
|
+
assert self._opts.language is not None
|
|
360
354
|
|
|
361
355
|
if data["type"] == "SpeechStarted":
|
|
362
356
|
# This is a normal case. Deepgram's SpeechStarted events
|
|
@@ -377,28 +371,31 @@ class SpeechStream(stt.SpeechStream):
|
|
|
377
371
|
is_final_transcript = data["is_final"]
|
|
378
372
|
is_endpoint = data["speech_final"]
|
|
379
373
|
|
|
380
|
-
alts = live_transcription_to_speech_data(self.
|
|
374
|
+
alts = live_transcription_to_speech_data(self._opts.language, data)
|
|
381
375
|
# If, for some reason, we didn't get a SpeechStarted event but we got
|
|
382
376
|
# a transcript with text, we should start speaking. It's rare but has
|
|
383
377
|
# been observed.
|
|
384
|
-
if
|
|
385
|
-
self._speaking
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
378
|
+
if len(alts) > 0 and alts[0].text:
|
|
379
|
+
if not self._speaking:
|
|
380
|
+
self._speaking = True
|
|
381
|
+
start_event = stt.SpeechEvent(
|
|
382
|
+
type=stt.SpeechEventType.START_OF_SPEECH
|
|
383
|
+
)
|
|
384
|
+
self._event_queue.put_nowait(start_event)
|
|
385
|
+
|
|
386
|
+
if is_final_transcript:
|
|
387
|
+
final_event = stt.SpeechEvent(
|
|
388
|
+
type=stt.SpeechEventType.FINAL_TRANSCRIPT,
|
|
389
|
+
alternatives=alts,
|
|
390
|
+
)
|
|
391
|
+
self._final_events.append(final_event)
|
|
392
|
+
self._event_queue.put_nowait(final_event)
|
|
393
|
+
else:
|
|
394
|
+
interim_event = stt.SpeechEvent(
|
|
395
|
+
type=stt.SpeechEventType.INTERIM_TRANSCRIPT,
|
|
396
|
+
alternatives=alts,
|
|
397
|
+
)
|
|
398
|
+
self._event_queue.put_nowait(interim_event)
|
|
402
399
|
|
|
403
400
|
# if we receive an endpoint, only end the speech if
|
|
404
401
|
# we either had a SpeechStarted event or we have a seen
|
|
@@ -408,7 +405,7 @@ class SpeechStream(stt.SpeechStream):
|
|
|
408
405
|
elif data["type"] == "Metadata":
|
|
409
406
|
pass
|
|
410
407
|
else:
|
|
411
|
-
|
|
408
|
+
logger.warning("received unexpected message from deepgram %s", data)
|
|
412
409
|
|
|
413
410
|
async def __anext__(self) -> stt.SpeechEvent:
|
|
414
411
|
evt = await self._event_queue.get()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: livekit-plugins-deepgram
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.dev0
|
|
4
4
|
Summary: Agent Framework plugin for services using DeepGram's API.
|
|
5
5
|
Home-page: https://github.com/livekit/agents
|
|
6
6
|
License: Apache-2.0
|
|
@@ -19,8 +19,8 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
19
19
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
20
20
|
Requires-Python: >=3.9.0
|
|
21
21
|
Description-Content-Type: text/markdown
|
|
22
|
-
Requires-Dist: livekit~=0.
|
|
23
|
-
Requires-Dist: livekit-agents~=0.
|
|
22
|
+
Requires-Dist: livekit~=0.11
|
|
23
|
+
Requires-Dist: livekit-agents~=0.6.dev0
|
|
24
24
|
Requires-Dist: aiohttp>=3.7.4
|
|
25
25
|
|
|
26
26
|
# LiveKit Plugins DeepGram
|
|
@@ -48,8 +48,8 @@ setuptools.setup(
|
|
|
48
48
|
packages=setuptools.find_namespace_packages(include=["livekit.*"]),
|
|
49
49
|
python_requires=">=3.9.0",
|
|
50
50
|
install_requires=[
|
|
51
|
-
"livekit ~= 0.
|
|
52
|
-
"livekit-agents~=0.
|
|
51
|
+
"livekit ~= 0.11",
|
|
52
|
+
"livekit-agents~=0.6.dev0",
|
|
53
53
|
"aiohttp >= 3.7.4",
|
|
54
54
|
],
|
|
55
55
|
package_data={
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|