PyPI - livekit-plugins-google - Versions diffs - 1.2.1__tar.gz → 1.2.3__tar.gz - Mend

livekit-plugins-google 1.2.1tar.gz → 1.2.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of livekit-plugins-google might be problematic. Click here for more details.

Files changed (19) hide show

{livekit_plugins_google-1.2.1 → livekit_plugins_google-1.2.3}/.gitignore RENAMED Viewed

@@ -169,4 +169,7 @@ node_modules
 credentials.json
 pyrightconfig.json
-docs/
+docs/
+# Database files
+*.db

{livekit_plugins_google-1.2.1 → livekit_plugins_google-1.2.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: livekit-plugins-google
-Version: 1.2.1
+Version: 1.2.3
 Summary: Agent Framework plugin for services from Google Cloud
 Project-URL: Documentation, https://docs.livekit.io
 Project-URL: Website, https://livekit.io/
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
 Requires-Dist: google-cloud-speech<3,>=2
 Requires-Dist: google-cloud-texttospeech<3,>=2.27
 Requires-Dist: google-genai>=v1.23.0
-Requires-Dist: livekit-agents>=1.2.1
+Requires-Dist: livekit-agents>=1.2.3
 Description-Content-Type: text/markdown
 # Google AI plugin for LiveKit Agents

{livekit_plugins_google-1.2.1 → livekit_plugins_google-1.2.3}/livekit/plugins/google/beta/realtime/api_proto.py RENAMED Viewed

@@ -9,6 +9,7 @@ LiveAPIModels = Literal[
     "gemini-2.0-flash-exp",
     # models supported on Gemini API
     "gemini-2.0-flash-live-001",
+    "gemini-live-2.5-flash-preview",
     "gemini-2.5-flash-preview-native-audio-dialog",
     "gemini-2.5-flash-exp-native-audio-thinking-dialog",
 ]

{livekit_plugins_google-1.2.1 → livekit_plugins_google-1.2.3}/livekit/plugins/google/beta/realtime/realtime_api.py RENAMED Viewed

@@ -8,6 +8,7 @@ import time
 import weakref
 from collections.abc import Iterator
 from dataclasses import dataclass, field
+from typing import Literal
 from google import genai
 from google.genai import types
@@ -542,7 +543,12 @@ class RealtimeSession(llm.RealtimeSession):
         self.start_user_activity()
     def truncate(
-        self, *, message_id: str, audio_end_ms: int, audio_transcript: NotGivenOr[str] = NOT_GIVEN
+        self,
+        *,
+        message_id: str,
+        modalities: list[Literal["text", "audio"]],
+        audio_end_ms: int,
+        audio_transcript: NotGivenOr[str] = NOT_GIVEN,
     ) -> None:
         logger.warning("truncate is not supported by the Google Realtime API.")
         pass
@@ -799,11 +805,16 @@ class RealtimeSession(llm.RealtimeSession):
         if not self._realtime_model.capabilities.audio_output:
             self._current_generation.audio_ch.close()
+        msg_modalities = asyncio.Future[list[Literal["text", "audio"]]]()
+        msg_modalities.set_result(
+            ["audio", "text"] if self._realtime_model.capabilities.audio_output else ["text"]
+        )
         self._current_generation.message_ch.send_nowait(
             llm.MessageGeneration(
                 message_id=response_id,
                 text_stream=self._current_generation.text_ch,
                 audio_stream=self._current_generation.audio_ch,
+                modalities=msg_modalities,
             )
         )
@@ -817,6 +828,10 @@ class RealtimeSession(llm.RealtimeSession):
             generation_event.user_initiated = True
             self._pending_generation_fut.set_result(generation_event)
             self._pending_generation_fut = None
+        else:
+            # emit input_speech_started event before starting an agent initiated generation
+            # to interrupt the previous audio playout if any
+            self._handle_input_speech_started()
         self.emit("generation_created", generation_event)
@@ -882,6 +897,9 @@ class RealtimeSession(llm.RealtimeSession):
         if not self._current_generation or self._current_generation._done:
             return
+        # emit input_speech_stopped event after the generation is done
+        self._handle_input_speech_stopped()
         gen = self._current_generation
         # The only way we'd know that the transcription is complete is by when they are
@@ -926,6 +944,12 @@ class RealtimeSession(llm.RealtimeSession):
     def _handle_input_speech_started(self) -> None:
         self.emit("input_speech_started", llm.InputSpeechStartedEvent())
+    def _handle_input_speech_stopped(self) -> None:
+        self.emit(
+            "input_speech_stopped",
+            llm.InputSpeechStoppedEvent(user_transcription_enabled=False),
+        )
     def _handle_tool_calls(self, tool_call: types.LiveServerToolCall) -> None:
         if not self._current_generation:
             logger.warning("received tool call but no active generation.")

{livekit_plugins_google-1.2.1 → livekit_plugins_google-1.2.3}/livekit/plugins/google/version.py RENAMED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.2.1"
+__version__ = "1.2.3"

{livekit_plugins_google-1.2.1 → livekit_plugins_google-1.2.3}/pyproject.toml RENAMED Viewed

@@ -27,7 +27,7 @@ dependencies = [
     "google-cloud-speech >= 2, < 3",
     "google-cloud-texttospeech >= 2.27, < 3",
     "google-genai >= v1.23.0",
-    "livekit-agents>=1.2.1",
+    "livekit-agents>=1.2.3",
 ]
 [project.urls]