PyPI - livekit-plugins-google - Versions diffs - 1.2.6__tar.gz → 1.2.7__tar.gz - Mend

livekit-plugins-google 1.2.6tar.gz → 1.2.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of livekit-plugins-google might be problematic. Click here for more details.

Files changed (19) hide show

{livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: livekit-plugins-google
-Version: 1.2.6
+Version: 1.2.7
 Summary: Agent Framework plugin for services from Google Cloud
 Project-URL: Documentation, https://docs.livekit.io
 Project-URL: Website, https://livekit.io/
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
 Requires-Dist: google-cloud-speech<3,>=2
 Requires-Dist: google-cloud-texttospeech<3,>=2.27
 Requires-Dist: google-genai>=v1.23.0
-Requires-Dist: livekit-agents>=1.2.6
+Requires-Dist: livekit-agents>=1.2.7
 Description-Content-Type: text/markdown
 # Google AI plugin for LiveKit Agents

{livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/livekit/plugins/google/beta/realtime/realtime_api.py RENAMED Viewed

@@ -708,7 +708,7 @@ class RealtimeSession(llm.RealtimeSession):
                             # reset the flag and still start a new generation in case it has any other content
                             response.server_content.interrupted = False
-                        if response.server_content or response.tool_call:
+                        if self._is_new_generation(response):
                             self._start_new_generation()
                     if response.session_resumption_update:
@@ -1091,3 +1091,16 @@ class RealtimeSession(llm.RealtimeSession):
                 recoverable=recoverable,
             ),
         )
+    def _is_new_generation(self, resp: types.LiveServerMessage) -> bool:
+        if resp.tool_call:
+            return True
+        if (sc := resp.server_content) and (
+            sc.model_turn
+            or (sc.output_transcription and sc.output_transcription.text is not None)
+            or (sc.input_transcription and sc.input_transcription.text is not None)
+        ):
+            return True
+        return False

{livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/livekit/plugins/google/llm.py RENAMED Viewed

@@ -157,10 +157,6 @@ class LLM(llm.LLM):
             if _thinking_budget is not None:
                 if not isinstance(_thinking_budget, int):
                     raise ValueError("thinking_budget inside thinking_config must be an integer")
-                if not (0 <= _thinking_budget <= 24576):
-                    raise ValueError(
-                        "thinking_budget inside thinking_config must be between 0 and 24576"
-                    )
         self._opts = _LLMOptions(
             model=model,

{livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/livekit/plugins/google/stt.py RENAMED Viewed

@@ -20,6 +20,7 @@ import time
 import weakref
 from collections.abc import AsyncGenerator, AsyncIterable
 from dataclasses import dataclass
+from datetime import timedelta
 from typing import Callable, Union, cast
 from google.api_core.client_options import ClientOptions
@@ -28,6 +29,7 @@ from google.auth import default as gauth_default
 from google.auth.exceptions import DefaultCredentialsError
 from google.cloud.speech_v2 import SpeechAsyncClient
 from google.cloud.speech_v2.types import cloud_speech
+from google.protobuf.duration_pb2 import Duration
 from livekit import rtc
 from livekit.agents import (
     DEFAULT_API_CONNECT_OPTIONS,
@@ -552,6 +554,14 @@ class SpeechStream(stt.SpeechStream):
                 raise APIConnectionError() from e
+def _duration_to_seconds(duration: Duration | timedelta) -> float:
+    # Proto Plus may auto-convert Duration to timedelta; handle both.
+    # https://proto-plus-python.readthedocs.io/en/latest/marshal.html
+    if isinstance(duration, timedelta):
+        return duration.total_seconds()
+    return duration.seconds + duration.nanos / 1e9
 def _recognize_response_to_speech_event(
     resp: cloud_speech.RecognizeResponse,
 ) -> stt.SpeechEvent:
@@ -561,24 +571,31 @@ def _recognize_response_to_speech_event(
         text += result.alternatives[0].transcript
         confidence += result.alternatives[0].confidence
-    # not sure why start_offset and end_offset returns a timedelta
-    start_offset = resp.results[0].alternatives[0].words[0].start_offset
-    end_offset = resp.results[-1].alternatives[0].words[-1].end_offset
+    alternatives = []
-    confidence /= len(resp.results)
-    lg = resp.results[0].language_code
-    return stt.SpeechEvent(
-        type=stt.SpeechEventType.FINAL_TRANSCRIPT,
-        alternatives=[
+    # Google STT may return empty results when spoken_lang != stt_lang
+    if resp.results:
+        try:
+            start_time = _duration_to_seconds(resp.results[0].alternatives[0].words[0].start_offset)
+            end_time = _duration_to_seconds(resp.results[-1].alternatives[0].words[-1].end_offset)
+        except IndexError:
+            # When enable_word_time_offsets=False, there are no "words" to access
+            start_time = end_time = 0
+        confidence /= len(resp.results)
+        lg = resp.results[0].language_code
+        alternatives = [
             stt.SpeechData(
                 language=lg,
-                start_time=start_offset.total_seconds(),  # type: ignore
-                end_time=end_offset.total_seconds(),  # type: ignore
+                start_time=start_time,
+                end_time=end_time,
                 confidence=confidence,
                 text=text,
             )
-        ],
-    )
+        ]
+    return stt.SpeechEvent(type=stt.SpeechEventType.FINAL_TRANSCRIPT, alternatives=alternatives)
 def _streaming_recognize_response_to_speech_data(

{livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/livekit/plugins/google/tts.py RENAMED Viewed

@@ -124,7 +124,7 @@ class TTS(tts.TTS):
         )
         if is_given(voice_cloning_key):
             voice_params.voice_clone = texttospeech.VoiceCloneParams(
-                voice_clone_key=voice_cloning_key,
+                voice_cloning_key=voice_cloning_key,
             )
         else:
             voice_params.name = voice_name if is_given(voice_name) else DEFAULT_VOICE_NAME

{livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/livekit/plugins/google/version.py RENAMED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.2.6"
+__version__ = "1.2.7"

{livekit_plugins_google-1.2.6 → livekit_plugins_google-1.2.7}/pyproject.toml RENAMED Viewed

@@ -27,7 +27,7 @@ dependencies = [
     "google-cloud-speech >= 2, < 3",
     "google-cloud-texttospeech >= 2.27, < 3",
     "google-genai >= v1.23.0",
-    "livekit-agents>=1.2.6",
+    "livekit-agents>=1.2.7",
 ]
 [project.urls]