livekit-plugins-google 1.2.2__tar.gz → 1.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of livekit-plugins-google might be problematic. Click here for more details.

Files changed (19) hide show
  1. {livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/PKG-INFO +2 -2
  2. {livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/livekit/plugins/google/beta/realtime/api_proto.py +1 -0
  3. {livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/livekit/plugins/google/beta/realtime/realtime_api.py +17 -8
  4. {livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/livekit/plugins/google/models.py +123 -30
  5. {livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/livekit/plugins/google/stt.py +6 -2
  6. {livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/livekit/plugins/google/version.py +1 -1
  7. {livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/pyproject.toml +1 -1
  8. {livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/.gitignore +0 -0
  9. {livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/README.md +0 -0
  10. {livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/livekit/plugins/google/__init__.py +0 -0
  11. {livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/livekit/plugins/google/beta/__init__.py +0 -0
  12. {livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/livekit/plugins/google/beta/gemini_tts.py +0 -0
  13. {livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/livekit/plugins/google/beta/realtime/__init__.py +0 -0
  14. {livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/livekit/plugins/google/llm.py +0 -0
  15. {livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/livekit/plugins/google/log.py +0 -0
  16. {livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/livekit/plugins/google/py.typed +0 -0
  17. {livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/livekit/plugins/google/tools.py +0 -0
  18. {livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/livekit/plugins/google/tts.py +0 -0
  19. {livekit_plugins_google-1.2.2 → livekit_plugins_google-1.2.4}/livekit/plugins/google/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-google
3
- Version: 1.2.2
3
+ Version: 1.2.4
4
4
  Summary: Agent Framework plugin for services from Google Cloud
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
22
22
  Requires-Dist: google-cloud-speech<3,>=2
23
23
  Requires-Dist: google-cloud-texttospeech<3,>=2.27
24
24
  Requires-Dist: google-genai>=v1.23.0
25
- Requires-Dist: livekit-agents>=1.2.2
25
+ Requires-Dist: livekit-agents>=1.2.4
26
26
  Description-Content-Type: text/markdown
27
27
 
28
28
  # Google AI plugin for LiveKit Agents
@@ -9,6 +9,7 @@ LiveAPIModels = Literal[
9
9
  "gemini-2.0-flash-exp",
10
10
  # models supported on Gemini API
11
11
  "gemini-2.0-flash-live-001",
12
+ "gemini-live-2.5-flash-preview",
12
13
  "gemini-2.5-flash-preview-native-audio-dialog",
13
14
  "gemini-2.5-flash-exp-native-audio-thinking-dialog",
14
15
  ]
@@ -8,6 +8,7 @@ import time
8
8
  import weakref
9
9
  from collections.abc import Iterator
10
10
  from dataclasses import dataclass, field
11
+ from typing import Literal
11
12
 
12
13
  from google import genai
13
14
  from google.genai import types
@@ -542,7 +543,12 @@ class RealtimeSession(llm.RealtimeSession):
542
543
  self.start_user_activity()
543
544
 
544
545
  def truncate(
545
- self, *, message_id: str, audio_end_ms: int, audio_transcript: NotGivenOr[str] = NOT_GIVEN
546
+ self,
547
+ *,
548
+ message_id: str,
549
+ modalities: list[Literal["text", "audio"]],
550
+ audio_end_ms: int,
551
+ audio_transcript: NotGivenOr[str] = NOT_GIVEN,
546
552
  ) -> None:
547
553
  logger.warning("truncate is not supported by the Google Realtime API.")
548
554
  pass
@@ -786,10 +792,6 @@ class RealtimeSession(llm.RealtimeSession):
786
792
  logger.warning("starting new generation while another is active. Finalizing previous.")
787
793
  self._mark_current_generation_done()
788
794
 
789
- # emit input_speech_started event before starting a new generation
790
- # to interrupt the previous audio playout if any
791
- self._handle_input_speech_started()
792
-
793
795
  response_id = utils.shortuuid("GR_")
794
796
  self._current_generation = _ResponseGeneration(
795
797
  message_ch=utils.aio.Chan[llm.MessageGeneration](),
@@ -803,11 +805,16 @@ class RealtimeSession(llm.RealtimeSession):
803
805
  if not self._realtime_model.capabilities.audio_output:
804
806
  self._current_generation.audio_ch.close()
805
807
 
808
+ msg_modalities = asyncio.Future[list[Literal["text", "audio"]]]()
809
+ msg_modalities.set_result(
810
+ ["audio", "text"] if self._realtime_model.capabilities.audio_output else ["text"]
811
+ )
806
812
  self._current_generation.message_ch.send_nowait(
807
813
  llm.MessageGeneration(
808
814
  message_id=response_id,
809
815
  text_stream=self._current_generation.text_ch,
810
816
  audio_stream=self._current_generation.audio_ch,
817
+ modalities=msg_modalities,
811
818
  )
812
819
  )
813
820
 
@@ -821,6 +828,10 @@ class RealtimeSession(llm.RealtimeSession):
821
828
  generation_event.user_initiated = True
822
829
  self._pending_generation_fut.set_result(generation_event)
823
830
  self._pending_generation_fut = None
831
+ else:
832
+ # emit input_speech_started event before starting an agent initiated generation
833
+ # to interrupt the previous audio playout if any
834
+ self._handle_input_speech_started()
824
835
 
825
836
  self.emit("generation_created", generation_event)
826
837
 
@@ -936,9 +947,7 @@ class RealtimeSession(llm.RealtimeSession):
936
947
  def _handle_input_speech_stopped(self) -> None:
937
948
  self.emit(
938
949
  "input_speech_stopped",
939
- llm.InputSpeechStoppedEvent(
940
- user_transcription_enabled=self._realtime_model.capabilities.user_transcription
941
- ),
950
+ llm.InputSpeechStoppedEvent(user_transcription_enabled=False),
942
951
  )
943
952
 
944
953
  def _handle_tool_calls(self, tool_call: types.LiveServerToolCall) -> None:
@@ -15,81 +15,174 @@ SpeechModels = Literal[
15
15
  ]
16
16
 
17
17
  SpeechLanguages = Literal[
18
- "en-US",
19
- "ja-JP",
20
- "en-IN",
21
- "en-GB",
22
- "hi-IN",
23
18
  "af-ZA",
24
- "sq-AL",
25
19
  "am-ET",
20
+ "ar-AE",
21
+ "ar-BH",
22
+ "ar-DZ",
26
23
  "ar-EG",
27
- "hy-AM",
24
+ "ar-IL",
25
+ "ar-IQ",
26
+ "ar-JO",
27
+ "ar-KW",
28
+ "ar-LB",
29
+ "ar-MA",
30
+ "ar-MR",
31
+ "ar-OM",
32
+ "ar-PS",
33
+ "ar-QA",
34
+ "ar-SA",
35
+ "ar-TN",
36
+ "ar-YE",
37
+ "as-IN",
28
38
  "ast-ES",
29
39
  "az-AZ",
30
- "eu-ES",
31
40
  "be-BY",
32
- "bs-BA",
33
41
  "bg-BG",
34
- "my-MM",
42
+ "bn-BD",
43
+ "bn-IN",
44
+ "bs-BA",
35
45
  "ca-ES",
36
46
  "ceb-PH",
37
47
  "ckb-IQ",
38
- "zh-Hans-CN",
39
- "yue-Hant-HK",
40
- "zh-TW",
41
- "hr-HR",
48
+ "cmn-Hans-CN",
49
+ "cmn-Hant-TW",
42
50
  "cs-CZ",
51
+ "cy-GB",
43
52
  "da-DK",
44
- "nl-NL",
53
+ "de-AT",
54
+ "de-CH",
55
+ "de-DE",
56
+ "el-GR",
45
57
  "en-AU",
58
+ "en-CA",
59
+ "en-GB",
60
+ "en-HK",
61
+ "en-IE",
62
+ "en-IN",
63
+ "en-NZ",
64
+ "en-PK",
65
+ "en-SG",
66
+ "en-US",
67
+ "es-419",
68
+ "es-AR",
69
+ "es-BO",
70
+ "es-CL",
71
+ "es-CO",
72
+ "es-CR",
73
+ "es-DO",
74
+ "es-EC",
75
+ "es-ES",
76
+ "es-GT",
77
+ "es-HN",
78
+ "es-MX",
79
+ "es-NI",
80
+ "es-PA",
81
+ "es-PE",
82
+ "es-PR",
83
+ "es-SV",
84
+ "es-US",
85
+ "es-UY",
86
+ "es-VE",
46
87
  "et-EE",
47
- "fil-PH",
88
+ "eu-ES",
89
+ "fa-IR",
90
+ "ff-SN",
48
91
  "fi-FI",
92
+ "fil-PH",
93
+ "fr-BE",
49
94
  "fr-CA",
95
+ "fr-CH",
50
96
  "fr-FR",
97
+ "ga-IE",
51
98
  "gl-ES",
52
- "ka-GE",
53
- "de-DE",
54
- "el-GR",
55
99
  "gu-IN",
56
100
  "ha-NG",
57
- "iw-IL",
58
101
  "hi-IN",
102
+ "hr-HR",
59
103
  "hu-HU",
60
- "is-IS",
104
+ "hy-AM",
61
105
  "id-ID",
106
+ "ig-NG",
107
+ "is-IS",
108
+ "it-CH",
62
109
  "it-IT",
110
+ "iw-IL",
63
111
  "ja-JP",
64
112
  "jv-ID",
65
- "kea-CV",
113
+ "ka-GE",
66
114
  "kam-KE",
67
- "kn-IN",
115
+ "kea-CV",
68
116
  "kk-KZ",
69
117
  "km-KH",
118
+ "kn-IN",
70
119
  "ko-KR",
71
120
  "ky-KG",
72
- "lo-LA",
73
- "lv-LV",
121
+ "lb-LU",
122
+ "lg-UG",
74
123
  "ln-CD",
124
+ "lo-LA",
75
125
  "lt-LT",
76
126
  "luo-KE",
77
- "lb-LU",
127
+ "lv-LV",
128
+ "mi-NZ",
78
129
  "mk-MK",
130
+ "ml-IN",
131
+ "mn-MN",
132
+ "mr-IN",
133
+ "ms-MY",
134
+ "mt-MT",
135
+ "my-MM",
136
+ "ne-NP",
137
+ "nl-BE",
138
+ "nl-NL",
79
139
  "no-NO",
140
+ "nso-ZA",
141
+ "ny-MW",
142
+ "oc-FR",
143
+ "om-ET",
144
+ "or-IN",
145
+ "pa-Guru-IN",
80
146
  "pl-PL",
147
+ "ps-AF",
81
148
  "pt-BR",
82
149
  "pt-PT",
83
150
  "ro-RO",
84
151
  "ru-RU",
85
- "es-CO",
86
- "es-MX",
87
- "es-US",
152
+ "rup-BG",
153
+ "rw-RW",
154
+ "sd-IN",
155
+ "si-LK",
156
+ "sk-SK",
157
+ "sl-SI",
158
+ "sn-ZW",
159
+ "so-SO",
160
+ "sq-AL",
161
+ "sr-RS",
162
+ "ss-Latn-ZA",
163
+ "st-ZA",
164
+ "su-ID",
165
+ "sv-SE",
166
+ "sw",
167
+ "sw-KE",
168
+ "ta-IN",
169
+ "te-IN",
170
+ "tg-TJ",
88
171
  "th-TH",
172
+ "tn-Latn-ZA",
89
173
  "tr-TR",
174
+ "ts-ZA",
90
175
  "uk-UA",
176
+ "umb-AO",
177
+ "ur-PK",
178
+ "uz-UZ",
179
+ "ve-ZA",
91
180
  "vi-VN",
92
- "da-DK",
181
+ "wo-SN",
182
+ "xh-ZA",
183
+ "yo-NG",
184
+ "yue-Hant-HK",
185
+ "zu-ZA",
93
186
  ]
94
187
 
95
188
  Gender = Literal["male", "female", "neutral"]
@@ -66,6 +66,7 @@ class STTOptions:
66
66
  interim_results: bool
67
67
  punctuate: bool
68
68
  spoken_punctuation: bool
69
+ enable_word_time_offsets: bool
69
70
  model: SpeechModels | str
70
71
  sample_rate: int
71
72
  min_confidence_threshold: float
@@ -97,6 +98,7 @@ class STT(stt.STT):
97
98
  interim_results: bool = True,
98
99
  punctuate: bool = True,
99
100
  spoken_punctuation: bool = False,
101
+ enable_word_time_offsets: bool = True,
100
102
  model: SpeechModels | str = "latest_long",
101
103
  location: str = "global",
102
104
  sample_rate: int = 16000,
@@ -119,6 +121,7 @@ class STT(stt.STT):
119
121
  interim_results(bool): whether to return interim results (default: True)
120
122
  punctuate(bool): whether to punctuate the audio (default: True)
121
123
  spoken_punctuation(bool): whether to use spoken punctuation (default: False)
124
+ enable_word_time_offsets(bool): whether to enable word time offsets (default: True)
122
125
  model(SpeechModels): the model to use for recognition default: "latest_long"
123
126
  location(str): the location to use for recognition default: "global"
124
127
  sample_rate(int): the sample rate of the audio default: 16000
@@ -158,6 +161,7 @@ class STT(stt.STT):
158
161
  interim_results=interim_results,
159
162
  punctuate=punctuate,
160
163
  spoken_punctuation=spoken_punctuation,
164
+ enable_word_time_offsets=enable_word_time_offsets,
161
165
  model=model,
162
166
  sample_rate=sample_rate,
163
167
  min_confidence_threshold=min_confidence_threshold,
@@ -238,7 +242,7 @@ class STT(stt.STT):
238
242
  features=cloud_speech.RecognitionFeatures(
239
243
  enable_automatic_punctuation=config.punctuate,
240
244
  enable_spoken_punctuation=config.spoken_punctuation,
241
- enable_word_time_offsets=True,
245
+ enable_word_time_offsets=config.enable_word_time_offsets,
242
246
  ),
243
247
  model=config.model,
244
248
  language_codes=config.languages,
@@ -490,7 +494,7 @@ class SpeechStream(stt.SpeechStream):
490
494
  model=self._config.model,
491
495
  features=cloud_speech.RecognitionFeatures(
492
496
  enable_automatic_punctuation=self._config.punctuate,
493
- enable_word_time_offsets=True,
497
+ enable_word_time_offsets=self._config.enable_word_time_offsets,
494
498
  enable_spoken_punctuation=self._config.spoken_punctuation,
495
499
  ),
496
500
  ),
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.2.2"
15
+ __version__ = "1.2.4"
@@ -27,7 +27,7 @@ dependencies = [
27
27
  "google-cloud-speech >= 2, < 3",
28
28
  "google-cloud-texttospeech >= 2.27, < 3",
29
29
  "google-genai >= v1.23.0",
30
- "livekit-agents>=1.2.2",
30
+ "livekit-agents>=1.2.4",
31
31
  ]
32
32
 
33
33
  [project.urls]