PyPI - livekit-plugins-google - Versions diffs - 1.2.2__tar.gz → 1.2.4__tar.gz - Mend

livekit-plugins-google 1.2.2tar.gz → 1.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of livekit-plugins-google might be problematic. Click here for more details.

Files changed (19) hide show

{livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: livekit-plugins-google
-Version: 1.2.2
+Version: 1.2.4
 Summary: Agent Framework plugin for services from Google Cloud
 Project-URL: Documentation, https://docs.livekit.io
 Project-URL: Website, https://livekit.io/
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
 Requires-Dist: google-cloud-speech<3,>=2
 Requires-Dist: google-cloud-texttospeech<3,>=2.27
 Requires-Dist: google-genai>=v1.23.0
-Requires-Dist: livekit-agents>=1.2.2
+Requires-Dist: livekit-agents>=1.2.4
 Description-Content-Type: text/markdown
 # Google AI plugin for LiveKit Agents

{livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/livekit/plugins/google/beta/realtime/api_proto.py RENAMED Viewed

@@ -9,6 +9,7 @@ LiveAPIModels = Literal[
     "gemini-2.0-flash-exp",
     # models supported on Gemini API
     "gemini-2.0-flash-live-001",
+    "gemini-live-2.5-flash-preview",
     "gemini-2.5-flash-preview-native-audio-dialog",
     "gemini-2.5-flash-exp-native-audio-thinking-dialog",
 ]

{livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/livekit/plugins/google/beta/realtime/realtime_api.py RENAMED Viewed

@@ -8,6 +8,7 @@ import time
 import weakref
 from collections.abc import Iterator
 from dataclasses import dataclass, field
+from typing import Literal
 from google import genai
 from google.genai import types
@@ -542,7 +543,12 @@ class RealtimeSession(llm.RealtimeSession):
         self.start_user_activity()
     def truncate(
-        self, *, message_id: str, audio_end_ms: int, audio_transcript: NotGivenOr[str] = NOT_GIVEN
+        self,
+        *,
+        message_id: str,
+        modalities: list[Literal["text", "audio"]],
+        audio_end_ms: int,
+        audio_transcript: NotGivenOr[str] = NOT_GIVEN,
     ) -> None:
         logger.warning("truncate is not supported by the Google Realtime API.")
         pass
@@ -786,10 +792,6 @@ class RealtimeSession(llm.RealtimeSession):
             logger.warning("starting new generation while another is active. Finalizing previous.")
             self._mark_current_generation_done()
-        # emit input_speech_started event before starting a new generation
-        # to interrupt the previous audio playout if any
-        self._handle_input_speech_started()
         response_id = utils.shortuuid("GR_")
         self._current_generation = _ResponseGeneration(
             message_ch=utils.aio.Chan[llm.MessageGeneration](),
@@ -803,11 +805,16 @@ class RealtimeSession(llm.RealtimeSession):
         if not self._realtime_model.capabilities.audio_output:
             self._current_generation.audio_ch.close()
+        msg_modalities = asyncio.Future[list[Literal["text", "audio"]]]()
+        msg_modalities.set_result(
+            ["audio", "text"] if self._realtime_model.capabilities.audio_output else ["text"]
+        )
         self._current_generation.message_ch.send_nowait(
             llm.MessageGeneration(
                 message_id=response_id,
                 text_stream=self._current_generation.text_ch,
                 audio_stream=self._current_generation.audio_ch,
+                modalities=msg_modalities,
             )
         )
@@ -821,6 +828,10 @@ class RealtimeSession(llm.RealtimeSession):
             generation_event.user_initiated = True
             self._pending_generation_fut.set_result(generation_event)
             self._pending_generation_fut = None
+        else:
+            # emit input_speech_started event before starting an agent initiated generation
+            # to interrupt the previous audio playout if any
+            self._handle_input_speech_started()
         self.emit("generation_created", generation_event)
@@ -936,9 +947,7 @@ class RealtimeSession(llm.RealtimeSession):
     def _handle_input_speech_stopped(self) -> None:
         self.emit(
             "input_speech_stopped",
-            llm.InputSpeechStoppedEvent(
-                user_transcription_enabled=self._realtime_model.capabilities.user_transcription
-            ),
+            llm.InputSpeechStoppedEvent(user_transcription_enabled=False),
         )
     def _handle_tool_calls(self, tool_call: types.LiveServerToolCall) -> None:

{livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/livekit/plugins/google/models.py RENAMED Viewed

@@ -15,81 +15,174 @@ SpeechModels = Literal[
 ]
 SpeechLanguages = Literal[
-    "en-US",
-    "ja-JP",
-    "en-IN",
-    "en-GB",
-    "hi-IN",
     "af-ZA",
-    "sq-AL",
     "am-ET",
+    "ar-AE",
+    "ar-BH",
+    "ar-DZ",
     "ar-EG",
-    "hy-AM",
+    "ar-IL",
+    "ar-IQ",
+    "ar-JO",
+    "ar-KW",
+    "ar-LB",
+    "ar-MA",
+    "ar-MR",
+    "ar-OM",
+    "ar-PS",
+    "ar-QA",
+    "ar-SA",
+    "ar-TN",
+    "ar-YE",
+    "as-IN",
     "ast-ES",
     "az-AZ",
-    "eu-ES",
     "be-BY",
-    "bs-BA",
     "bg-BG",
-    "my-MM",
+    "bn-BD",
+    "bn-IN",
+    "bs-BA",
     "ca-ES",
     "ceb-PH",
     "ckb-IQ",
-    "zh-Hans-CN",
-    "yue-Hant-HK",
-    "zh-TW",
-    "hr-HR",
+    "cmn-Hans-CN",
+    "cmn-Hant-TW",
     "cs-CZ",
+    "cy-GB",
     "da-DK",
-    "nl-NL",
+    "de-AT",
+    "de-CH",
+    "de-DE",
+    "el-GR",
     "en-AU",
+    "en-CA",
+    "en-GB",
+    "en-HK",
+    "en-IE",
+    "en-IN",
+    "en-NZ",
+    "en-PK",
+    "en-SG",
+    "en-US",
+    "es-419",
+    "es-AR",
+    "es-BO",
+    "es-CL",
+    "es-CO",
+    "es-CR",
+    "es-DO",
+    "es-EC",
+    "es-ES",
+    "es-GT",
+    "es-HN",
+    "es-MX",
+    "es-NI",
+    "es-PA",
+    "es-PE",
+    "es-PR",
+    "es-SV",
+    "es-US",
+    "es-UY",
+    "es-VE",
     "et-EE",
-    "fil-PH",
+    "eu-ES",
+    "fa-IR",
+    "ff-SN",
     "fi-FI",
+    "fil-PH",
+    "fr-BE",
     "fr-CA",
+    "fr-CH",
     "fr-FR",
+    "ga-IE",
     "gl-ES",
-    "ka-GE",
-    "de-DE",
-    "el-GR",
     "gu-IN",
     "ha-NG",
-    "iw-IL",
     "hi-IN",
+    "hr-HR",
     "hu-HU",
-    "is-IS",
+    "hy-AM",
     "id-ID",
+    "ig-NG",
+    "is-IS",
+    "it-CH",
     "it-IT",
+    "iw-IL",
     "ja-JP",
     "jv-ID",
-    "kea-CV",
+    "ka-GE",
     "kam-KE",
-    "kn-IN",
+    "kea-CV",
     "kk-KZ",
     "km-KH",
+    "kn-IN",
     "ko-KR",
     "ky-KG",
-    "lo-LA",
-    "lv-LV",
+    "lb-LU",
+    "lg-UG",
     "ln-CD",
+    "lo-LA",
     "lt-LT",
     "luo-KE",
-    "lb-LU",
+    "lv-LV",
+    "mi-NZ",
     "mk-MK",
+    "ml-IN",
+    "mn-MN",
+    "mr-IN",
+    "ms-MY",
+    "mt-MT",
+    "my-MM",
+    "ne-NP",
+    "nl-BE",
+    "nl-NL",
     "no-NO",
+    "nso-ZA",
+    "ny-MW",
+    "oc-FR",
+    "om-ET",
+    "or-IN",
+    "pa-Guru-IN",
     "pl-PL",
+    "ps-AF",
     "pt-BR",
     "pt-PT",
     "ro-RO",
     "ru-RU",
-    "es-CO",
-    "es-MX",
-    "es-US",
+    "rup-BG",
+    "rw-RW",
+    "sd-IN",
+    "si-LK",
+    "sk-SK",
+    "sl-SI",
+    "sn-ZW",
+    "so-SO",
+    "sq-AL",
+    "sr-RS",
+    "ss-Latn-ZA",
+    "st-ZA",
+    "su-ID",
+    "sv-SE",
+    "sw",
+    "sw-KE",
+    "ta-IN",
+    "te-IN",
+    "tg-TJ",
     "th-TH",
+    "tn-Latn-ZA",
     "tr-TR",
+    "ts-ZA",
     "uk-UA",
+    "umb-AO",
+    "ur-PK",
+    "uz-UZ",
+    "ve-ZA",
     "vi-VN",
-    "da-DK",
+    "wo-SN",
+    "xh-ZA",
+    "yo-NG",
+    "yue-Hant-HK",
+    "zu-ZA",
 ]
 Gender = Literal["male", "female", "neutral"]

{livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/livekit/plugins/google/stt.py RENAMED Viewed

@@ -66,6 +66,7 @@ class STTOptions:
     interim_results: bool
     punctuate: bool
     spoken_punctuation: bool
+    enable_word_time_offsets: bool
     model: SpeechModels | str
     sample_rate: int
     min_confidence_threshold: float
@@ -97,6 +98,7 @@ class STT(stt.STT):
         interim_results: bool = True,
         punctuate: bool = True,
         spoken_punctuation: bool = False,
+        enable_word_time_offsets: bool = True,
         model: SpeechModels | str = "latest_long",
         location: str = "global",
         sample_rate: int = 16000,
@@ -119,6 +121,7 @@ class STT(stt.STT):
             interim_results(bool): whether to return interim results (default: True)
             punctuate(bool): whether to punctuate the audio (default: True)
             spoken_punctuation(bool): whether to use spoken punctuation (default: False)
+            enable_word_time_offsets(bool): whether to enable word time offsets (default: True)
             model(SpeechModels): the model to use for recognition default: "latest_long"
             location(str): the location to use for recognition default: "global"
             sample_rate(int): the sample rate of the audio default: 16000
@@ -158,6 +161,7 @@ class STT(stt.STT):
             interim_results=interim_results,
             punctuate=punctuate,
             spoken_punctuation=spoken_punctuation,
+            enable_word_time_offsets=enable_word_time_offsets,
             model=model,
             sample_rate=sample_rate,
             min_confidence_threshold=min_confidence_threshold,
@@ -238,7 +242,7 @@ class STT(stt.STT):
             features=cloud_speech.RecognitionFeatures(
                 enable_automatic_punctuation=config.punctuate,
                 enable_spoken_punctuation=config.spoken_punctuation,
-                enable_word_time_offsets=True,
+                enable_word_time_offsets=config.enable_word_time_offsets,
             ),
             model=config.model,
             language_codes=config.languages,
@@ -490,7 +494,7 @@ class SpeechStream(stt.SpeechStream):
                             model=self._config.model,
                             features=cloud_speech.RecognitionFeatures(
                                 enable_automatic_punctuation=self._config.punctuate,
-                                enable_word_time_offsets=True,
+                                enable_word_time_offsets=self._config.enable_word_time_offsets,
                                 enable_spoken_punctuation=self._config.spoken_punctuation,
                             ),
                         ),

{livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/livekit/plugins/google/version.py RENAMED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.2.2"
+__version__ = "1.2.4"

{livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/pyproject.toml RENAMED Viewed

@@ -27,7 +27,7 @@ dependencies = [
     "google-cloud-speech >= 2, < 3",
     "google-cloud-texttospeech >= 2.27, < 3",
     "google-genai >= v1.23.0",
-    "livekit-agents>=1.2.2",
+    "livekit-agents>=1.2.4",
 ]
 [project.urls]