PyPI - livekit-plugins-deepgram - Versions diffs - 0.3.dev0__tar.gz → 0.4.dev0__tar.gz - Mend

livekit-plugins-deepgram 0.3.dev0tar.gz → 0.4.dev0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: livekit-plugins-deepgram
-Version: 0.3.dev0
+Version: 0.4.dev0
 Summary: Agent Framework plugin for services using DeepGram's API.
 Home-page: https://github.com/livekit/agents
 License: Apache-2.0
@@ -19,8 +19,8 @@ Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3 :: Only
 Requires-Python: >=3.9.0
 Description-Content-Type: text/markdown
-Requires-Dist: livekit~=0.9
-Requires-Dist: livekit-agents~=0.5.dev0
+Requires-Dist: livekit~=0.11
+Requires-Dist: livekit-agents~=0.6.dev0
 Requires-Dist: aiohttp>=3.7.4
 # LiveKit Plugins DeepGram

livekit_plugins_deepgram-0.4.dev0/livekit/plugins/deepgram/log.py ADDED Viewed

@@ -0,0 +1,3 @@
+import logging
+logger = logging.getLogger("livekit.plugins.deepgram")

{livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/livekit/plugins/deepgram/stt.py RENAMED Viewed

@@ -18,7 +18,6 @@ import asyncio
 import dataclasses
 import io
 import json
-import logging
 import os
 import wave
 from contextlib import suppress
@@ -31,6 +30,7 @@ from livekit import rtc
 from livekit.agents import stt
 from livekit.agents.utils import AudioBuffer, merge_frames
+from .log import logger
 from .models import DeepgramLanguages, DeepgramModels
@@ -56,7 +56,7 @@ class STT(stt.STT):
         smart_format: bool = True,
         model: DeepgramModels = "nova-2-general",
         api_key: str | None = None,
-        min_silence_duration: int = 100,  # 100ms for a RTC app seems like a strong default
+        min_silence_duration: int = 0,
     ) -> None:
         super().__init__(streaming_supported=True)
         api_key = api_key or os.environ.get("DEEPGRAM_API_KEY")
@@ -64,7 +64,7 @@ class STT(stt.STT):
             raise ValueError("Deepgram API key is required")
         self._api_key = api_key
-        self._config = STTOptions(
+        self._opts = STTOptions(
             language=language,
             detect_language=detect_language,
             interim_results=interim_results,
@@ -132,7 +132,7 @@ class STT(stt.STT):
         *,
         language: str | None = None,
     ) -> STTOptions:
-        config = dataclasses.replace(self._config)
+        config = dataclasses.replace(self._opts)
         config.language = language or config.language
         if config.detect_language:
@@ -147,7 +147,7 @@ class SpeechStream(stt.SpeechStream):
     def __init__(
         self,
-        config: STTOptions,
+        opts: STTOptions,
         api_key: str,
         sample_rate: int = 16000,
         num_channels: int = 1,
@@ -155,10 +155,10 @@ class SpeechStream(stt.SpeechStream):
     ) -> None:
         super().__init__()
-        if config.language is None:
+        if opts.detect_language and opts.language is None:
             raise ValueError("language detection is not supported in streaming mode")
-        self._config = config
+        self._opts = opts
         self._sample_rate = sample_rate
         self._num_channels = num_channels
         self._api_key = api_key
@@ -173,19 +173,13 @@ class SpeechStream(stt.SpeechStream):
         # keep a list of final transcripts to combine them inside the END_OF_SPEECH event
         self._final_events: List[stt.SpeechEvent] = []
-        def log_exception(task: asyncio.Task) -> None:
-            if not task.cancelled() and task.exception():
-                logging.error(f"deepgram task failed: {task.exception()}")
-        self._main_task.add_done_callback(log_exception)
     def push_frame(self, frame: rtc.AudioFrame) -> None:
         if self._closed:
             raise ValueError("cannot push frame to closed stream")
         self._queue.put_nowait(frame)
-    async def aclose(self, wait: bool = True) -> None:
+    async def aclose(self, *, wait: bool = True) -> None:
         self._closed = True
         self._queue.put_nowait(SpeechStream._CLOSE_MSG)
@@ -208,19 +202,19 @@ class SpeechStream(stt.SpeechStream):
             while not self._closed:
                 try:
                     live_config = {
-                        "model": self._config.model,
-                        "punctuate": self._config.punctuate,
-                        "smart_format": self._config.smart_format,
-                        "interim_results": self._config.interim_results,
+                        "model": self._opts.model,
+                        "punctuate": self._opts.punctuate,
+                        "smart_format": self._opts.smart_format,
+                        "interim_results": self._opts.interim_results,
                         "encoding": "linear16",
                         "sample_rate": self._sample_rate,
                         "vad_events": True,
                         "channels": self._num_channels,
-                        "endpointing": self._config.endpointing,
+                        "endpointing": self._opts.endpointing,
                     }
-                    if self._config.language:
-                        live_config["language"] = self._config.language
+                    if self._opts.language:
+                        live_config["language"] = self._opts.language
                     headers = {"Authorization": f"Token {self._api_key}"}
@@ -229,23 +223,23 @@ class SpeechStream(stt.SpeechStream):
                     retry_count = 0  # connected successfully, reset the retry_count
                     await self._run_ws(ws)
-                except Exception as e:
+                except Exception:
                     # Something went wrong, retry the connection
                     if retry_count >= max_retry:
-                        logging.error(
-                            f"failed to connect to deepgram after {max_retry} tries",
-                            exc_info=e,
+                        logger.exception(
+                            f"failed to connect to deepgram after {max_retry} tries"
                         )
                         break
                     retry_delay = min(retry_count * 2, 10)  # max 10s
                     retry_count += 1  # increment after calculating the delay, the first retry should happen directly
-                    logging.warning(
-                        f"deepgram connection failed, retrying in {retry_delay}s",
-                        exc_info=e,
+                    logger.warning(
+                        f"deepgram connection failed, retrying in {retry_delay}s"
                     )
                     await asyncio.sleep(retry_delay)
+        except Exception:
+            logger.exception("deepgram task failed")
         finally:
             self._event_queue.put_nowait(None)
@@ -305,27 +299,26 @@ class SpeechStream(stt.SpeechStream):
                     )  # this will trigger a reconnection, see the _run loop
                 if msg.type != aiohttp.WSMsgType.TEXT:
-                    logging.warning("unexpected deepgram message type %s", msg.type)
+                    logger.warning("unexpected deepgram message type %s", msg.type)
                     continue
                 try:
                     # received a message from deepgram
                     data = json.loads(msg.data)
                     self._process_stream_event(data)
-                except Exception as e:
-                    logging.error(f"failed to process deepgram message: {e}")
+                except Exception:
+                    logger.exception("failed to process deepgram message")
         await asyncio.gather(send_task(), recv_task(), keepalive_task())
     def _end_speech(self) -> None:
         if not self._speaking:
-            logging.warning(
+            logger.warning(
                 "trying to commit final events without being in the speaking state"
             )
             return
         if len(self._final_events) == 0:
-            logging.warning("received end of speech without any final transcription")
             return
         self._speaking = False
@@ -333,9 +326,10 @@ class SpeechStream(stt.SpeechStream):
         # combine all final transcripts since the start of the speech
         sentence = ""
         confidence = 0.0
-        for alt in self._final_events:
-            sentence += f"{alt.alternatives[0].text.strip()} "
-            confidence += alt.alternatives[0].confidence
+        for f in self._final_events:
+            alt = f.alternatives[0]
+            sentence += f"{alt.text.strip()} "
+            confidence += alt.confidence
         sentence = sentence.rstrip()
         confidence /= len(self._final_events)  # avg. of confidence
@@ -344,7 +338,7 @@ class SpeechStream(stt.SpeechStream):
             type=stt.SpeechEventType.END_OF_SPEECH,
             alternatives=[
                 stt.SpeechData(
-                    language=str(self._config.language),
+                    language=str(self._opts.language),
                     start_time=self._final_events[0].alternatives[0].start_time,
                     end_time=self._final_events[-1].alternatives[0].end_time,
                     confidence=confidence,
@@ -356,7 +350,7 @@ class SpeechStream(stt.SpeechStream):
         self._final_events = []
     def _process_stream_event(self, data: dict) -> None:
-        assert self._config.language is not None
+        assert self._opts.language is not None
         if data["type"] == "SpeechStarted":
             # This is a normal case. Deepgram's SpeechStarted events
@@ -377,28 +371,31 @@ class SpeechStream(stt.SpeechStream):
             is_final_transcript = data["is_final"]
             is_endpoint = data["speech_final"]
-            alts = live_transcription_to_speech_data(self._config.language, data)
+            alts = live_transcription_to_speech_data(self._opts.language, data)
             # If, for some reason, we didn't get a SpeechStarted event but we got
             # a transcript with text, we should start speaking. It's rare but has
             # been observed.
-            if not self._speaking and len(alts) and alts[0].text.strip() != "":
-                self._speaking = True
-                start_event = stt.SpeechEvent(type=stt.SpeechEventType.START_OF_SPEECH)
-                self._event_queue.put_nowait(start_event)
-            if is_final_transcript:
-                final_event = stt.SpeechEvent(
-                    type=stt.SpeechEventType.FINAL_TRANSCRIPT,
-                    alternatives=alts,
-                )
-                self._final_events.append(final_event)
-                self._event_queue.put_nowait(final_event)
-            else:
-                interim_event = stt.SpeechEvent(
-                    type=stt.SpeechEventType.INTERIM_TRANSCRIPT,
-                    alternatives=alts,
-                )
-                self._event_queue.put_nowait(interim_event)
+            if len(alts) > 0 and alts[0].text:
+                if not self._speaking:
+                    self._speaking = True
+                    start_event = stt.SpeechEvent(
+                        type=stt.SpeechEventType.START_OF_SPEECH
+                    )
+                    self._event_queue.put_nowait(start_event)
+                if is_final_transcript:
+                    final_event = stt.SpeechEvent(
+                        type=stt.SpeechEventType.FINAL_TRANSCRIPT,
+                        alternatives=alts,
+                    )
+                    self._final_events.append(final_event)
+                    self._event_queue.put_nowait(final_event)
+                else:
+                    interim_event = stt.SpeechEvent(
+                        type=stt.SpeechEventType.INTERIM_TRANSCRIPT,
+                        alternatives=alts,
+                    )
+                    self._event_queue.put_nowait(interim_event)
             # if we receive an endpoint, only end the speech if
             # we either had a SpeechStarted event or we have a seen
@@ -408,7 +405,7 @@ class SpeechStream(stt.SpeechStream):
         elif data["type"] == "Metadata":
             pass
         else:
-            logging.warning("received unexpected message from deepgram %s", data)
+            logger.warning("received unexpected message from deepgram %s", data)
     async def __anext__(self) -> stt.SpeechEvent:
         evt = await self._event_queue.get()

{livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/livekit/plugins/deepgram/version.py RENAMED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "0.3.dev0"
+__version__ = "0.4.dev0"

{livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/livekit_plugins_deepgram.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: livekit-plugins-deepgram
-Version: 0.3.dev0
+Version: 0.4.dev0
 Summary: Agent Framework plugin for services using DeepGram's API.
 Home-page: https://github.com/livekit/agents
 License: Apache-2.0
@@ -19,8 +19,8 @@ Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3 :: Only
 Requires-Python: >=3.9.0
 Description-Content-Type: text/markdown
-Requires-Dist: livekit~=0.9
-Requires-Dist: livekit-agents~=0.5.dev0
+Requires-Dist: livekit~=0.11
+Requires-Dist: livekit-agents~=0.6.dev0
 Requires-Dist: aiohttp>=3.7.4
 # LiveKit Plugins DeepGram

{livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/livekit_plugins_deepgram.egg-info/SOURCES.txt RENAMED Viewed

@@ -2,6 +2,7 @@ README.md
 pyproject.toml
 setup.py
 livekit/plugins/deepgram/__init__.py
+livekit/plugins/deepgram/log.py
 livekit/plugins/deepgram/models.py
 livekit/plugins/deepgram/py.typed
 livekit/plugins/deepgram/stt.py

livekit_plugins_deepgram-0.4.dev0/livekit_plugins_deepgram.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,3 @@
+livekit~=0.11
+livekit-agents~=0.6.dev0
+aiohttp>=3.7.4

{livekit-plugins-deepgram-0.3.dev0 → livekit_plugins_deepgram-0.4.dev0}/setup.py RENAMED Viewed

@@ -48,8 +48,8 @@ setuptools.setup(
     packages=setuptools.find_namespace_packages(include=["livekit.*"]),
     python_requires=">=3.9.0",
     install_requires=[
-        "livekit ~= 0.9",
-        "livekit-agents~=0.5.dev0",
+        "livekit ~= 0.11",
+        "livekit-agents~=0.6.dev0",
         "aiohttp >= 3.7.4",
     ],
     package_data={