livekit-plugins-deepgram 0.3.dev0__tar.gz → 0.4.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (17) hide show
  1. {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/PKG-INFO +3 -3
  2. livekit_plugins_deepgram-0.4.dev0/livekit/plugins/deepgram/log.py +3 -0
  3. {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/livekit/plugins/deepgram/stt.py +55 -58
  4. {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/livekit/plugins/deepgram/version.py +1 -1
  5. {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/livekit_plugins_deepgram.egg-info/PKG-INFO +3 -3
  6. {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/livekit_plugins_deepgram.egg-info/SOURCES.txt +1 -0
  7. livekit_plugins_deepgram-0.4.dev0/livekit_plugins_deepgram.egg-info/requires.txt +3 -0
  8. {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/setup.py +2 -2
  9. livekit-plugins-deepgram-0.3.dev0/livekit_plugins_deepgram.egg-info/requires.txt +0 -3
  10. {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/README.md +0 -0
  11. {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/livekit/plugins/deepgram/__init__.py +0 -0
  12. {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/livekit/plugins/deepgram/models.py +0 -0
  13. {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/livekit/plugins/deepgram/py.typed +0 -0
  14. {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/livekit_plugins_deepgram.egg-info/dependency_links.txt +0 -0
  15. {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/livekit_plugins_deepgram.egg-info/top_level.txt +0 -0
  16. {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/pyproject.toml +0 -0
  17. {livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: livekit-plugins-deepgram
3
- Version: 0.3.dev0
3
+ Version: 0.4.dev0
4
4
  Summary: Agent Framework plugin for services using DeepGram's API.
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0
@@ -19,8 +19,8 @@ Classifier: Programming Language :: Python :: 3.10
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: >=3.9.0
21
21
  Description-Content-Type: text/markdown
22
- Requires-Dist: livekit~=0.9
23
- Requires-Dist: livekit-agents~=0.5.dev0
22
+ Requires-Dist: livekit~=0.11
23
+ Requires-Dist: livekit-agents~=0.6.dev0
24
24
  Requires-Dist: aiohttp>=3.7.4
25
25
 
26
26
  # LiveKit Plugins DeepGram
@@ -0,0 +1,3 @@
1
+ import logging
2
+
3
+ logger = logging.getLogger("livekit.plugins.deepgram")
@@ -18,7 +18,6 @@ import asyncio
18
18
  import dataclasses
19
19
  import io
20
20
  import json
21
- import logging
22
21
  import os
23
22
  import wave
24
23
  from contextlib import suppress
@@ -31,6 +30,7 @@ from livekit import rtc
31
30
  from livekit.agents import stt
32
31
  from livekit.agents.utils import AudioBuffer, merge_frames
33
32
 
33
+ from .log import logger
34
34
  from .models import DeepgramLanguages, DeepgramModels
35
35
 
36
36
 
@@ -56,7 +56,7 @@ class STT(stt.STT):
56
56
  smart_format: bool = True,
57
57
  model: DeepgramModels = "nova-2-general",
58
58
  api_key: str | None = None,
59
- min_silence_duration: int = 100, # 100ms for a RTC app seems like a strong default
59
+ min_silence_duration: int = 0,
60
60
  ) -> None:
61
61
  super().__init__(streaming_supported=True)
62
62
  api_key = api_key or os.environ.get("DEEPGRAM_API_KEY")
@@ -64,7 +64,7 @@ class STT(stt.STT):
64
64
  raise ValueError("Deepgram API key is required")
65
65
  self._api_key = api_key
66
66
 
67
- self._config = STTOptions(
67
+ self._opts = STTOptions(
68
68
  language=language,
69
69
  detect_language=detect_language,
70
70
  interim_results=interim_results,
@@ -132,7 +132,7 @@ class STT(stt.STT):
132
132
  *,
133
133
  language: str | None = None,
134
134
  ) -> STTOptions:
135
- config = dataclasses.replace(self._config)
135
+ config = dataclasses.replace(self._opts)
136
136
  config.language = language or config.language
137
137
 
138
138
  if config.detect_language:
@@ -147,7 +147,7 @@ class SpeechStream(stt.SpeechStream):
147
147
 
148
148
  def __init__(
149
149
  self,
150
- config: STTOptions,
150
+ opts: STTOptions,
151
151
  api_key: str,
152
152
  sample_rate: int = 16000,
153
153
  num_channels: int = 1,
@@ -155,10 +155,10 @@ class SpeechStream(stt.SpeechStream):
155
155
  ) -> None:
156
156
  super().__init__()
157
157
 
158
- if config.language is None:
158
+ if opts.detect_language and opts.language is None:
159
159
  raise ValueError("language detection is not supported in streaming mode")
160
160
 
161
- self._config = config
161
+ self._opts = opts
162
162
  self._sample_rate = sample_rate
163
163
  self._num_channels = num_channels
164
164
  self._api_key = api_key
@@ -173,19 +173,13 @@ class SpeechStream(stt.SpeechStream):
173
173
  # keep a list of final transcripts to combine them inside the END_OF_SPEECH event
174
174
  self._final_events: List[stt.SpeechEvent] = []
175
175
 
176
- def log_exception(task: asyncio.Task) -> None:
177
- if not task.cancelled() and task.exception():
178
- logging.error(f"deepgram task failed: {task.exception()}")
179
-
180
- self._main_task.add_done_callback(log_exception)
181
-
182
176
  def push_frame(self, frame: rtc.AudioFrame) -> None:
183
177
  if self._closed:
184
178
  raise ValueError("cannot push frame to closed stream")
185
179
 
186
180
  self._queue.put_nowait(frame)
187
181
 
188
- async def aclose(self, wait: bool = True) -> None:
182
+ async def aclose(self, *, wait: bool = True) -> None:
189
183
  self._closed = True
190
184
  self._queue.put_nowait(SpeechStream._CLOSE_MSG)
191
185
 
@@ -208,19 +202,19 @@ class SpeechStream(stt.SpeechStream):
208
202
  while not self._closed:
209
203
  try:
210
204
  live_config = {
211
- "model": self._config.model,
212
- "punctuate": self._config.punctuate,
213
- "smart_format": self._config.smart_format,
214
- "interim_results": self._config.interim_results,
205
+ "model": self._opts.model,
206
+ "punctuate": self._opts.punctuate,
207
+ "smart_format": self._opts.smart_format,
208
+ "interim_results": self._opts.interim_results,
215
209
  "encoding": "linear16",
216
210
  "sample_rate": self._sample_rate,
217
211
  "vad_events": True,
218
212
  "channels": self._num_channels,
219
- "endpointing": self._config.endpointing,
213
+ "endpointing": self._opts.endpointing,
220
214
  }
221
215
 
222
- if self._config.language:
223
- live_config["language"] = self._config.language
216
+ if self._opts.language:
217
+ live_config["language"] = self._opts.language
224
218
 
225
219
  headers = {"Authorization": f"Token {self._api_key}"}
226
220
 
@@ -229,23 +223,23 @@ class SpeechStream(stt.SpeechStream):
229
223
  retry_count = 0 # connected successfully, reset the retry_count
230
224
 
231
225
  await self._run_ws(ws)
232
- except Exception as e:
226
+ except Exception:
233
227
  # Something went wrong, retry the connection
234
228
  if retry_count >= max_retry:
235
- logging.error(
236
- f"failed to connect to deepgram after {max_retry} tries",
237
- exc_info=e,
229
+ logger.exception(
230
+ f"failed to connect to deepgram after {max_retry} tries"
238
231
  )
239
232
  break
240
233
 
241
234
  retry_delay = min(retry_count * 2, 10) # max 10s
242
235
  retry_count += 1 # increment after calculating the delay, the first retry should happen directly
243
236
 
244
- logging.warning(
245
- f"deepgram connection failed, retrying in {retry_delay}s",
246
- exc_info=e,
237
+ logger.warning(
238
+ f"deepgram connection failed, retrying in {retry_delay}s"
247
239
  )
248
240
  await asyncio.sleep(retry_delay)
241
+ except Exception:
242
+ logger.exception("deepgram task failed")
249
243
  finally:
250
244
  self._event_queue.put_nowait(None)
251
245
 
@@ -305,27 +299,26 @@ class SpeechStream(stt.SpeechStream):
305
299
  ) # this will trigger a reconnection, see the _run loop
306
300
 
307
301
  if msg.type != aiohttp.WSMsgType.TEXT:
308
- logging.warning("unexpected deepgram message type %s", msg.type)
302
+ logger.warning("unexpected deepgram message type %s", msg.type)
309
303
  continue
310
304
 
311
305
  try:
312
306
  # received a message from deepgram
313
307
  data = json.loads(msg.data)
314
308
  self._process_stream_event(data)
315
- except Exception as e:
316
- logging.error(f"failed to process deepgram message: {e}")
309
+ except Exception:
310
+ logger.exception("failed to process deepgram message")
317
311
 
318
312
  await asyncio.gather(send_task(), recv_task(), keepalive_task())
319
313
 
320
314
  def _end_speech(self) -> None:
321
315
  if not self._speaking:
322
- logging.warning(
316
+ logger.warning(
323
317
  "trying to commit final events without being in the speaking state"
324
318
  )
325
319
  return
326
320
 
327
321
  if len(self._final_events) == 0:
328
- logging.warning("received end of speech without any final transcription")
329
322
  return
330
323
 
331
324
  self._speaking = False
@@ -333,9 +326,10 @@ class SpeechStream(stt.SpeechStream):
333
326
  # combine all final transcripts since the start of the speech
334
327
  sentence = ""
335
328
  confidence = 0.0
336
- for alt in self._final_events:
337
- sentence += f"{alt.alternatives[0].text.strip()} "
338
- confidence += alt.alternatives[0].confidence
329
+ for f in self._final_events:
330
+ alt = f.alternatives[0]
331
+ sentence += f"{alt.text.strip()} "
332
+ confidence += alt.confidence
339
333
 
340
334
  sentence = sentence.rstrip()
341
335
  confidence /= len(self._final_events) # avg. of confidence
@@ -344,7 +338,7 @@ class SpeechStream(stt.SpeechStream):
344
338
  type=stt.SpeechEventType.END_OF_SPEECH,
345
339
  alternatives=[
346
340
  stt.SpeechData(
347
- language=str(self._config.language),
341
+ language=str(self._opts.language),
348
342
  start_time=self._final_events[0].alternatives[0].start_time,
349
343
  end_time=self._final_events[-1].alternatives[0].end_time,
350
344
  confidence=confidence,
@@ -356,7 +350,7 @@ class SpeechStream(stt.SpeechStream):
356
350
  self._final_events = []
357
351
 
358
352
  def _process_stream_event(self, data: dict) -> None:
359
- assert self._config.language is not None
353
+ assert self._opts.language is not None
360
354
 
361
355
  if data["type"] == "SpeechStarted":
362
356
  # This is a normal case. Deepgram's SpeechStarted events
@@ -377,28 +371,31 @@ class SpeechStream(stt.SpeechStream):
377
371
  is_final_transcript = data["is_final"]
378
372
  is_endpoint = data["speech_final"]
379
373
 
380
- alts = live_transcription_to_speech_data(self._config.language, data)
374
+ alts = live_transcription_to_speech_data(self._opts.language, data)
381
375
  # If, for some reason, we didn't get a SpeechStarted event but we got
382
376
  # a transcript with text, we should start speaking. It's rare but has
383
377
  # been observed.
384
- if not self._speaking and len(alts) and alts[0].text.strip() != "":
385
- self._speaking = True
386
- start_event = stt.SpeechEvent(type=stt.SpeechEventType.START_OF_SPEECH)
387
- self._event_queue.put_nowait(start_event)
388
-
389
- if is_final_transcript:
390
- final_event = stt.SpeechEvent(
391
- type=stt.SpeechEventType.FINAL_TRANSCRIPT,
392
- alternatives=alts,
393
- )
394
- self._final_events.append(final_event)
395
- self._event_queue.put_nowait(final_event)
396
- else:
397
- interim_event = stt.SpeechEvent(
398
- type=stt.SpeechEventType.INTERIM_TRANSCRIPT,
399
- alternatives=alts,
400
- )
401
- self._event_queue.put_nowait(interim_event)
378
+ if len(alts) > 0 and alts[0].text:
379
+ if not self._speaking:
380
+ self._speaking = True
381
+ start_event = stt.SpeechEvent(
382
+ type=stt.SpeechEventType.START_OF_SPEECH
383
+ )
384
+ self._event_queue.put_nowait(start_event)
385
+
386
+ if is_final_transcript:
387
+ final_event = stt.SpeechEvent(
388
+ type=stt.SpeechEventType.FINAL_TRANSCRIPT,
389
+ alternatives=alts,
390
+ )
391
+ self._final_events.append(final_event)
392
+ self._event_queue.put_nowait(final_event)
393
+ else:
394
+ interim_event = stt.SpeechEvent(
395
+ type=stt.SpeechEventType.INTERIM_TRANSCRIPT,
396
+ alternatives=alts,
397
+ )
398
+ self._event_queue.put_nowait(interim_event)
402
399
 
403
400
  # if we receive an endpoint, only end the speech if
404
401
  # we either had a SpeechStarted event or we have a seen
@@ -408,7 +405,7 @@ class SpeechStream(stt.SpeechStream):
408
405
  elif data["type"] == "Metadata":
409
406
  pass
410
407
  else:
411
- logging.warning("received unexpected message from deepgram %s", data)
408
+ logger.warning("received unexpected message from deepgram %s", data)
412
409
 
413
410
  async def __anext__(self) -> stt.SpeechEvent:
414
411
  evt = await self._event_queue.get()
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.3.dev0"
15
+ __version__ = "0.4.dev0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: livekit-plugins-deepgram
3
- Version: 0.3.dev0
3
+ Version: 0.4.dev0
4
4
  Summary: Agent Framework plugin for services using DeepGram's API.
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0
@@ -19,8 +19,8 @@ Classifier: Programming Language :: Python :: 3.10
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: >=3.9.0
21
21
  Description-Content-Type: text/markdown
22
- Requires-Dist: livekit~=0.9
23
- Requires-Dist: livekit-agents~=0.5.dev0
22
+ Requires-Dist: livekit~=0.11
23
+ Requires-Dist: livekit-agents~=0.6.dev0
24
24
  Requires-Dist: aiohttp>=3.7.4
25
25
 
26
26
  # LiveKit Plugins DeepGram
@@ -2,6 +2,7 @@ README.md
2
2
  pyproject.toml
3
3
  setup.py
4
4
  livekit/plugins/deepgram/__init__.py
5
+ livekit/plugins/deepgram/log.py
5
6
  livekit/plugins/deepgram/models.py
6
7
  livekit/plugins/deepgram/py.typed
7
8
  livekit/plugins/deepgram/stt.py
@@ -0,0 +1,3 @@
1
+ livekit~=0.11
2
+ livekit-agents~=0.6.dev0
3
+ aiohttp>=3.7.4
@@ -48,8 +48,8 @@ setuptools.setup(
48
48
  packages=setuptools.find_namespace_packages(include=["livekit.*"]),
49
49
  python_requires=">=3.9.0",
50
50
  install_requires=[
51
- "livekit ~= 0.9",
52
- "livekit-agents~=0.5.dev0",
51
+ "livekit ~= 0.11",
52
+ "livekit-agents~=0.6.dev0",
53
53
  "aiohttp >= 3.7.4",
54
54
  ],
55
55
  package_data={
@@ -1,3 +0,0 @@
1
- livekit~=0.9
2
- livekit-agents~=0.5.dev0
3
- aiohttp>=3.7.4