livekit-plugins-google 1.2.1__tar.gz → 1.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of livekit-plugins-google might be problematic. Click here for more details.

Files changed (19) hide show
  1. {livekit_plugins_google-1.2.1 → livekit_plugins_google-1.2.3}/.gitignore +4 -1
  2. {livekit_plugins_google-1.2.1 → livekit_plugins_google-1.2.3}/PKG-INFO +2 -2
  3. {livekit_plugins_google-1.2.1 → livekit_plugins_google-1.2.3}/livekit/plugins/google/beta/realtime/api_proto.py +1 -0
  4. {livekit_plugins_google-1.2.1 → livekit_plugins_google-1.2.3}/livekit/plugins/google/beta/realtime/realtime_api.py +25 -1
  5. {livekit_plugins_google-1.2.1 → livekit_plugins_google-1.2.3}/livekit/plugins/google/version.py +1 -1
  6. {livekit_plugins_google-1.2.1 → livekit_plugins_google-1.2.3}/pyproject.toml +1 -1
  7. {livekit_plugins_google-1.2.1 → livekit_plugins_google-1.2.3}/README.md +0 -0
  8. {livekit_plugins_google-1.2.1 → livekit_plugins_google-1.2.3}/livekit/plugins/google/__init__.py +0 -0
  9. {livekit_plugins_google-1.2.1 → livekit_plugins_google-1.2.3}/livekit/plugins/google/beta/__init__.py +0 -0
  10. {livekit_plugins_google-1.2.1 → livekit_plugins_google-1.2.3}/livekit/plugins/google/beta/gemini_tts.py +0 -0
  11. {livekit_plugins_google-1.2.1 → livekit_plugins_google-1.2.3}/livekit/plugins/google/beta/realtime/__init__.py +0 -0
  12. {livekit_plugins_google-1.2.1 → livekit_plugins_google-1.2.3}/livekit/plugins/google/llm.py +0 -0
  13. {livekit_plugins_google-1.2.1 → livekit_plugins_google-1.2.3}/livekit/plugins/google/log.py +0 -0
  14. {livekit_plugins_google-1.2.1 → livekit_plugins_google-1.2.3}/livekit/plugins/google/models.py +0 -0
  15. {livekit_plugins_google-1.2.1 → livekit_plugins_google-1.2.3}/livekit/plugins/google/py.typed +0 -0
  16. {livekit_plugins_google-1.2.1 → livekit_plugins_google-1.2.3}/livekit/plugins/google/stt.py +0 -0
  17. {livekit_plugins_google-1.2.1 → livekit_plugins_google-1.2.3}/livekit/plugins/google/tools.py +0 -0
  18. {livekit_plugins_google-1.2.1 → livekit_plugins_google-1.2.3}/livekit/plugins/google/tts.py +0 -0
  19. {livekit_plugins_google-1.2.1 → livekit_plugins_google-1.2.3}/livekit/plugins/google/utils.py +0 -0
@@ -169,4 +169,7 @@ node_modules
169
169
 
170
170
  credentials.json
171
171
  pyrightconfig.json
172
- docs/
172
+ docs/
173
+
174
+ # Database files
175
+ *.db
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-google
3
- Version: 1.2.1
3
+ Version: 1.2.3
4
4
  Summary: Agent Framework plugin for services from Google Cloud
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
22
22
  Requires-Dist: google-cloud-speech<3,>=2
23
23
  Requires-Dist: google-cloud-texttospeech<3,>=2.27
24
24
  Requires-Dist: google-genai>=v1.23.0
25
- Requires-Dist: livekit-agents>=1.2.1
25
+ Requires-Dist: livekit-agents>=1.2.3
26
26
  Description-Content-Type: text/markdown
27
27
 
28
28
  # Google AI plugin for LiveKit Agents
@@ -9,6 +9,7 @@ LiveAPIModels = Literal[
9
9
  "gemini-2.0-flash-exp",
10
10
  # models supported on Gemini API
11
11
  "gemini-2.0-flash-live-001",
12
+ "gemini-live-2.5-flash-preview",
12
13
  "gemini-2.5-flash-preview-native-audio-dialog",
13
14
  "gemini-2.5-flash-exp-native-audio-thinking-dialog",
14
15
  ]
@@ -8,6 +8,7 @@ import time
8
8
  import weakref
9
9
  from collections.abc import Iterator
10
10
  from dataclasses import dataclass, field
11
+ from typing import Literal
11
12
 
12
13
  from google import genai
13
14
  from google.genai import types
@@ -542,7 +543,12 @@ class RealtimeSession(llm.RealtimeSession):
542
543
  self.start_user_activity()
543
544
 
544
545
  def truncate(
545
- self, *, message_id: str, audio_end_ms: int, audio_transcript: NotGivenOr[str] = NOT_GIVEN
546
+ self,
547
+ *,
548
+ message_id: str,
549
+ modalities: list[Literal["text", "audio"]],
550
+ audio_end_ms: int,
551
+ audio_transcript: NotGivenOr[str] = NOT_GIVEN,
546
552
  ) -> None:
547
553
  logger.warning("truncate is not supported by the Google Realtime API.")
548
554
  pass
@@ -799,11 +805,16 @@ class RealtimeSession(llm.RealtimeSession):
799
805
  if not self._realtime_model.capabilities.audio_output:
800
806
  self._current_generation.audio_ch.close()
801
807
 
808
+ msg_modalities = asyncio.Future[list[Literal["text", "audio"]]]()
809
+ msg_modalities.set_result(
810
+ ["audio", "text"] if self._realtime_model.capabilities.audio_output else ["text"]
811
+ )
802
812
  self._current_generation.message_ch.send_nowait(
803
813
  llm.MessageGeneration(
804
814
  message_id=response_id,
805
815
  text_stream=self._current_generation.text_ch,
806
816
  audio_stream=self._current_generation.audio_ch,
817
+ modalities=msg_modalities,
807
818
  )
808
819
  )
809
820
 
@@ -817,6 +828,10 @@ class RealtimeSession(llm.RealtimeSession):
817
828
  generation_event.user_initiated = True
818
829
  self._pending_generation_fut.set_result(generation_event)
819
830
  self._pending_generation_fut = None
831
+ else:
832
+ # emit input_speech_started event before starting an agent initiated generation
833
+ # to interrupt the previous audio playout if any
834
+ self._handle_input_speech_started()
820
835
 
821
836
  self.emit("generation_created", generation_event)
822
837
 
@@ -882,6 +897,9 @@ class RealtimeSession(llm.RealtimeSession):
882
897
  if not self._current_generation or self._current_generation._done:
883
898
  return
884
899
 
900
+ # emit input_speech_stopped event after the generation is done
901
+ self._handle_input_speech_stopped()
902
+
885
903
  gen = self._current_generation
886
904
 
887
905
  # The only way we'd know that the transcription is complete is by when they are
@@ -926,6 +944,12 @@ class RealtimeSession(llm.RealtimeSession):
926
944
  def _handle_input_speech_started(self) -> None:
927
945
  self.emit("input_speech_started", llm.InputSpeechStartedEvent())
928
946
 
947
+ def _handle_input_speech_stopped(self) -> None:
948
+ self.emit(
949
+ "input_speech_stopped",
950
+ llm.InputSpeechStoppedEvent(user_transcription_enabled=False),
951
+ )
952
+
929
953
  def _handle_tool_calls(self, tool_call: types.LiveServerToolCall) -> None:
930
954
  if not self._current_generation:
931
955
  logger.warning("received tool call but no active generation.")
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.2.1"
15
+ __version__ = "1.2.3"
@@ -27,7 +27,7 @@ dependencies = [
27
27
  "google-cloud-speech >= 2, < 3",
28
28
  "google-cloud-texttospeech >= 2.27, < 3",
29
29
  "google-genai >= v1.23.0",
30
- "livekit-agents>=1.2.1",
30
+ "livekit-agents>=1.2.3",
31
31
  ]
32
32
 
33
33
  [project.urls]