PyPI - livekit-plugins-google - Versions diffs - 0.9.0__py3-none-any.whl → 0.9.1__py3-none-any.whl - Mend

livekit-plugins-google 0.9.0py3-none-any.whl → 0.9.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

livekit/plugins/google/beta/realtime/realtime_api.py CHANGED Viewed

@@ -105,7 +105,7 @@ class RealtimeModel:
         Args:
             instructions (str, optional): Initial system instructions for the model. Defaults to "".
-            api_key (str or None, optional): OpenAI API key. If None, will attempt to read from the environment variable OPENAI_API_KEY
+            api_key (str or None, optional): Google Gemini API key. If None, will attempt to read from the environment variable GOOGLE_API_KEY.
             modalities (ResponseModality): Modalities to use, such as ["TEXT", "AUDIO"]. Defaults to ["AUDIO"].
             model (str or None, optional): The name of the model to use. Defaults to "gemini-2.0-flash-exp".
             voice (api_proto.Voice, optional): Voice setting for audio outputs. Defaults to "Puck".

livekit/plugins/google/stt.py CHANGED Viewed

@@ -16,6 +16,7 @@ from __future__ import annotations
 import asyncio
 import dataclasses
+import time
 import weakref
 from dataclasses import dataclass
 from typing import List, Union
@@ -44,6 +45,10 @@ from .models import SpeechLanguages, SpeechModels
 LgType = Union[SpeechLanguages, str]
 LanguageCode = Union[LgType, List[LgType]]
+# Google STT has a timeout of 5 mins, we'll attempt to restart the session
+# before that timeout is reached
+_max_session_duration = 240
 # This class is only be used internally to encapsulate the options
 @dataclass
@@ -229,8 +234,6 @@ class STT(stt.STT):
             raise APIStatusError(
                 e.message,
                 status_code=e.code or -1,
-                request_id=None,
-                body=None,
             )
         except Exception as e:
             raise APIConnectionError() from e
@@ -278,6 +281,13 @@ class STT(stt.STT):
             self._config.spoken_punctuation = spoken_punctuation
         if model is not None:
             self._config.model = model
+        client = None
+        recognizer = None
+        if location is not None:
+            self._location = location
+            # if location is changed, fetch a new client and recognizer as per the new location
+            client = self._ensure_client()
+            recognizer = self._recognizer
         if keywords is not None:
             self._config.keywords = keywords
@@ -289,8 +299,9 @@ class STT(stt.STT):
                 punctuate=punctuate,
                 spoken_punctuation=spoken_punctuation,
                 model=model,
-                location=location,
                 keywords=keywords,
+                client=client,
+                recognizer=recognizer,
             )
@@ -312,6 +323,7 @@ class SpeechStream(stt.SpeechStream):
         self._recognizer = recognizer
         self._config = config
         self._reconnect_event = asyncio.Event()
+        self._session_connected_at: float = 0
     def update_options(
         self,
@@ -322,8 +334,9 @@ class SpeechStream(stt.SpeechStream):
         punctuate: bool | None = None,
         spoken_punctuation: bool | None = None,
         model: SpeechModels | None = None,
-        location: str | None = None,
         keywords: List[tuple[str, float]] | None = None,
+        client: SpeechAsyncClient | None = None,
+        recognizer: str | None = None,
     ):
         if languages is not None:
             if isinstance(languages, str):
@@ -341,13 +354,17 @@ class SpeechStream(stt.SpeechStream):
             self._config.model = model
         if keywords is not None:
             self._config.keywords = keywords
+        if client is not None:
+            self._client = client
+        if recognizer is not None:
+            self._recognizer = recognizer
         self._reconnect_event.set()
     async def _run(self) -> None:
         # google requires a async generator when calling streaming_recognize
         # this function basically convert the queue into a async generator
-        async def input_generator():
+        async def input_generator(should_stop: asyncio.Event):
             try:
                 # first request should contain the config
                 yield cloud_speech.StreamingRecognizeRequest(
@@ -356,6 +373,12 @@ class SpeechStream(stt.SpeechStream):
                 )
                 async for frame in self._input_ch:
+                    # when the stream is aborted due to reconnect, this input_generator
+                    # needs to stop consuming frames
+                    # when the generator stops, the previous gRPC stream will close
+                    if should_stop.is_set():
+                        return
                     if isinstance(frame, rtc.AudioFrame):
                         yield cloud_speech.StreamingRecognizeRequest(
                             audio=frame.data.tobytes()
@@ -367,6 +390,7 @@ class SpeechStream(stt.SpeechStream):
                 )
         async def process_stream(stream):
+            has_started = False
             async for resp in stream:
                 if (
                     resp.speech_event_type
@@ -375,6 +399,7 @@ class SpeechStream(stt.SpeechStream):
                     self._event_ch.send_nowait(
                         stt.SpeechEvent(type=stt.SpeechEventType.START_OF_SPEECH)
                     )
+                    has_started = True
                 if (
                     resp.speech_event_type
@@ -399,6 +424,22 @@ class SpeechStream(stt.SpeechStream):
                                 alternatives=[speech_data],
                             )
                         )
+                        if (
+                            time.time() - self._session_connected_at
+                            > _max_session_duration
+                        ):
+                            logger.debug(
+                                "Google STT maximum connection time reached. Reconnecting..."
+                            )
+                            if has_started:
+                                self._event_ch.send_nowait(
+                                    stt.SpeechEvent(
+                                        type=stt.SpeechEventType.END_OF_SPEECH
+                                    )
+                                )
+                                has_started = False
+                            self._reconnect_event.set()
+                            return
                 if (
                     resp.speech_event_type
@@ -407,6 +448,7 @@ class SpeechStream(stt.SpeechStream):
                     self._event_ch.send_nowait(
                         stt.SpeechEvent(type=stt.SpeechEventType.END_OF_SPEECH)
                     )
+                    has_started = False
         while True:
             try:
@@ -431,12 +473,15 @@ class SpeechStream(stt.SpeechStream):
                     ),
                 )
+                should_stop = asyncio.Event()
                 stream = await self._client.streaming_recognize(
-                    requests=input_generator(),
+                    requests=input_generator(should_stop),
                 )
+                self._session_connected_at = time.time()
                 process_stream_task = asyncio.create_task(process_stream(stream))
                 wait_reconnect_task = asyncio.create_task(self._reconnect_event.wait())
                 try:
                     done, _ = await asyncio.wait(
                         [process_stream_task, wait_reconnect_task],
@@ -445,14 +490,23 @@ class SpeechStream(stt.SpeechStream):
                     for task in done:
                         if task != wait_reconnect_task:
                             task.result()
+                    if wait_reconnect_task not in done:
+                        break
+                    self._reconnect_event.clear()
                 finally:
                     await utils.aio.gracefully_cancel(
                         process_stream_task, wait_reconnect_task
                     )
-            finally:
-                if not self._reconnect_event.is_set():
-                    break
-                self._reconnect_event.clear()
+                    should_stop.set()
+            except DeadlineExceeded:
+                raise APITimeoutError()
+            except GoogleAPICallError as e:
+                raise APIStatusError(
+                    e.message,
+                    status_code=e.code or -1,
+                )
+            except Exception as e:
+                raise APIConnectionError() from e
 def _recognize_response_to_speech_event(

livekit/plugins/google/version.py CHANGED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "0.9.0"
+__version__ = "0.9.1"

{livekit_plugins_google-0.9.0.dist-info → livekit_plugins_google-0.9.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: livekit-plugins-google
-Version: 0.9.0
+Version: 0.9.1
 Summary: Agent Framework plugin for services from Google Cloud
 Home-page: https://github.com/livekit/agents
 License: Apache-2.0
@@ -24,6 +24,16 @@ Requires-Dist: google-cloud-speech<3,>=2
 Requires-Dist: google-cloud-texttospeech<3,>=2
 Requires-Dist: google-genai>=0.3.0
 Requires-Dist: livekit-agents>=0.12.3
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: keywords
+Dynamic: license
+Dynamic: project-url
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
 # LiveKit Plugins Google

{livekit_plugins_google-0.9.0.dist-info → livekit_plugins_google-0.9.1.dist-info}/RECORD RENAMED Viewed

@@ -2,14 +2,14 @@ livekit/plugins/google/__init__.py,sha256=TY-5FwEX4Vs7GLO1wSegIxC5W4UPkHBthlr-__
 livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
 livekit/plugins/google/models.py,sha256=cBXhZGY9bFaSCyL9VeSng9wsxhf3peJi3AUYBKV-8GQ,1343
 livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-livekit/plugins/google/stt.py,sha256=SfmKgQotIVzk9-Hipo1X5cnLQG4uXLniTUoyM3IynwA,18712
+livekit/plugins/google/stt.py,sha256=E5kXPbicH4FEXBjyBzfqQWA-nPhKkojzcc-cbtWdmNs,21088
 livekit/plugins/google/tts.py,sha256=95qXCigVQYWNbcN3pIKBpIah4b31U_MWtXv5Ji0AMc4,9229
-livekit/plugins/google/version.py,sha256=onRKrcQ35NZG4oEg_95WGeTytHh_6VVAlQKAZhwiEe4,600
+livekit/plugins/google/version.py,sha256=4GcbYy7J7gvPMEA4wlPB0BJqg8CjF7HRVjQ-i1EH7M8,600
 livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
 livekit/plugins/google/beta/realtime/__init__.py,sha256=XnJpNIN6NRm7Y4hH2RNA8Xt-tTmkZEKCs_zzU3_koBI,251
 livekit/plugins/google/beta/realtime/api_proto.py,sha256=IHYBryuzpfGQD86Twlfq6qxrBhFHptf_IvOk36Wxo1M,2156
-livekit/plugins/google/beta/realtime/realtime_api.py,sha256=OxrbWnUOT_oFdrMruvLPHgEoXlOr6M5oGym9b2Iqz48,15958
-livekit_plugins_google-0.9.0.dist-info/METADATA,sha256=tB70OQMa7JtWLqRi1TMDUpv4y0TZEk0L609BN6y0x48,1841
-livekit_plugins_google-0.9.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
-livekit_plugins_google-0.9.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
-livekit_plugins_google-0.9.0.dist-info/RECORD,,
+livekit/plugins/google/beta/realtime/realtime_api.py,sha256=YUEf3iR9dIctnXRqev_qKSBM_plqcYKudodFO8nADJY,15966
+livekit_plugins_google-0.9.1.dist-info/METADATA,sha256=y5d0OEdbkoGk0IPGURiDZbt6e6sWhsxOU2cioNrPu7w,2056
+livekit_plugins_google-0.9.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+livekit_plugins_google-0.9.1.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
+livekit_plugins_google-0.9.1.dist-info/RECORD,,

{livekit_plugins_google-0.9.0.dist-info → livekit_plugins_google-0.9.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.6.0)
+Generator: setuptools (75.8.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{livekit_plugins_google-0.9.0.dist-info → livekit_plugins_google-0.9.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

livekit-plugins-google 0.9.0__py3-none-any.whl → 0.9.1__py3-none-any.whl

livekit-plugins-google 0.9.0py3-none-any.whl → 0.9.1py3-none-any.whl