PyPI - livekit-plugins-google - Versions diffs - 1.2.2__tar.gz → 1.2.3__tar.gz - Mend

livekit-plugins-google 1.2.2tar.gz → 1.2.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of livekit-plugins-google might be problematic. Click here for more details.

Files changed (19) hide show

{livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: livekit-plugins-google
-Version: 1.2.2
+Version: 1.2.3
 Summary: Agent Framework plugin for services from Google Cloud
 Project-URL: Documentation, https://docs.livekit.io
 Project-URL: Website, https://livekit.io/
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
 Requires-Dist: google-cloud-speech<3,>=2
 Requires-Dist: google-cloud-texttospeech<3,>=2.27
 Requires-Dist: google-genai>=v1.23.0
-Requires-Dist: livekit-agents>=1.2.2
+Requires-Dist: livekit-agents>=1.2.3
 Description-Content-Type: text/markdown
 # Google AI plugin for LiveKit Agents

{livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.3}/livekit/plugins/google/beta/realtime/api_proto.py RENAMED Viewed

@@ -9,6 +9,7 @@ LiveAPIModels = Literal[
     "gemini-2.0-flash-exp",
     # models supported on Gemini API
     "gemini-2.0-flash-live-001",
+    "gemini-live-2.5-flash-preview",
     "gemini-2.5-flash-preview-native-audio-dialog",
     "gemini-2.5-flash-exp-native-audio-thinking-dialog",
 ]

{livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.3}/livekit/plugins/google/beta/realtime/realtime_api.py RENAMED Viewed

@@ -8,6 +8,7 @@ import time
 import weakref
 from collections.abc import Iterator
 from dataclasses import dataclass, field
+from typing import Literal
 from google import genai
 from google.genai import types
@@ -542,7 +543,12 @@ class RealtimeSession(llm.RealtimeSession):
         self.start_user_activity()
     def truncate(
-        self, *, message_id: str, audio_end_ms: int, audio_transcript: NotGivenOr[str] = NOT_GIVEN
+        self,
+        *,
+        message_id: str,
+        modalities: list[Literal["text", "audio"]],
+        audio_end_ms: int,
+        audio_transcript: NotGivenOr[str] = NOT_GIVEN,
     ) -> None:
         logger.warning("truncate is not supported by the Google Realtime API.")
         pass
@@ -786,10 +792,6 @@ class RealtimeSession(llm.RealtimeSession):
             logger.warning("starting new generation while another is active. Finalizing previous.")
             self._mark_current_generation_done()
-        # emit input_speech_started event before starting a new generation
-        # to interrupt the previous audio playout if any
-        self._handle_input_speech_started()
         response_id = utils.shortuuid("GR_")
         self._current_generation = _ResponseGeneration(
             message_ch=utils.aio.Chan[llm.MessageGeneration](),
@@ -803,11 +805,16 @@ class RealtimeSession(llm.RealtimeSession):
         if not self._realtime_model.capabilities.audio_output:
             self._current_generation.audio_ch.close()
+        msg_modalities = asyncio.Future[list[Literal["text", "audio"]]]()
+        msg_modalities.set_result(
+            ["audio", "text"] if self._realtime_model.capabilities.audio_output else ["text"]
+        )
         self._current_generation.message_ch.send_nowait(
             llm.MessageGeneration(
                 message_id=response_id,
                 text_stream=self._current_generation.text_ch,
                 audio_stream=self._current_generation.audio_ch,
+                modalities=msg_modalities,
             )
         )
@@ -821,6 +828,10 @@ class RealtimeSession(llm.RealtimeSession):
             generation_event.user_initiated = True
             self._pending_generation_fut.set_result(generation_event)
             self._pending_generation_fut = None
+        else:
+            # emit input_speech_started event before starting an agent initiated generation
+            # to interrupt the previous audio playout if any
+            self._handle_input_speech_started()
         self.emit("generation_created", generation_event)
@@ -936,9 +947,7 @@ class RealtimeSession(llm.RealtimeSession):
     def _handle_input_speech_stopped(self) -> None:
         self.emit(
             "input_speech_stopped",
-            llm.InputSpeechStoppedEvent(
-                user_transcription_enabled=self._realtime_model.capabilities.user_transcription
-            ),
+            llm.InputSpeechStoppedEvent(user_transcription_enabled=False),
         )
     def _handle_tool_calls(self, tool_call: types.LiveServerToolCall) -> None:

{livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.3}/livekit/plugins/google/version.py RENAMED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.2.2"
+__version__ = "1.2.3"

{livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.3}/pyproject.toml RENAMED Viewed

@@ -27,7 +27,7 @@ dependencies = [
     "google-cloud-speech >= 2, < 3",
     "google-cloud-texttospeech >= 2.27, < 3",
     "google-genai >= v1.23.0",
-    "livekit-agents>=1.2.2",
+    "livekit-agents>=1.2.3",
 ]
 [project.urls]