livekit-plugins-google 1.2.14__py3-none-any.whl → 1.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of livekit-plugins-google might be problematic. Click here for more details.

@@ -19,14 +19,14 @@ Supports Gemini, Cloud Speech-to-Text, and Cloud Text-to-Speech.
19
19
  See https://docs.livekit.io/agents/integrations/stt/google/ for more information.
20
20
  """
21
21
 
22
- from . import beta
22
+ from . import beta, realtime
23
23
  from .llm import LLM
24
24
  from .stt import STT, SpeechStream
25
25
  from .tools import _LLMTool
26
26
  from .tts import TTS
27
27
  from .version import __version__
28
28
 
29
- __all__ = ["STT", "TTS", "SpeechStream", "__version__", "beta", "LLM", "_LLMTool"]
29
+ __all__ = ["STT", "TTS", "realtime", "SpeechStream", "__version__", "beta", "LLM", "_LLMTool"]
30
30
  from livekit.agents import Plugin
31
31
 
32
32
  from .log import logger
@@ -1,7 +1,7 @@
1
- from . import realtime
1
+ from .. import realtime
2
2
  from .gemini_tts import TTS as GeminiTTS
3
3
 
4
- __all__ = ["realtime", "GeminiTTS"]
4
+ __all__ = ["GeminiTTS", "realtime"]
5
5
 
6
6
  # Cleanup docs of unexported modules
7
7
  _module = dir()
@@ -136,6 +136,10 @@ class LLM(llm.LLM):
136
136
  _, gcp_project = default_async( # type: ignore
137
137
  scopes=["https://www.googleapis.com/auth/cloud-platform"]
138
138
  )
139
+ if not gcp_project or not gcp_location:
140
+ raise ValueError(
141
+ "Project is required for VertexAI via project kwarg or GOOGLE_CLOUD_PROJECT environment variable" # noqa: E501
142
+ )
139
143
  gemini_api_key = None # VertexAI does not require an API key
140
144
 
141
145
  else:
@@ -10,6 +10,7 @@ SpeechModels = Literal[
10
10
  "medical_conversation",
11
11
  "chirp",
12
12
  "chirp_2",
13
+ "chirp_3",
13
14
  "latest_long",
14
15
  "latest_short",
15
16
  ]
@@ -10,11 +10,43 @@ LiveAPIModels = Literal[
10
10
  # models supported on Gemini API
11
11
  "gemini-2.0-flash-live-001",
12
12
  "gemini-live-2.5-flash-preview",
13
+ "gemini-2.5-flash-native-audio-preview-09-2025",
13
14
  "gemini-2.5-flash-preview-native-audio-dialog",
14
15
  "gemini-2.5-flash-exp-native-audio-thinking-dialog",
15
16
  ]
16
17
 
17
- Voice = Literal["Puck", "Charon", "Kore", "Fenrir", "Aoede", "Leda", "Orus", "Zephyr"]
18
+ Voice = Literal[
19
+ "Achernar",
20
+ "Achird",
21
+ "Algenib",
22
+ "Algieba",
23
+ "Alnilam",
24
+ "Aoede",
25
+ "Autonoe",
26
+ "Callirrhoe",
27
+ "Charon",
28
+ "Despina",
29
+ "Enceladus",
30
+ "Erinome",
31
+ "Fenrir",
32
+ "Gacrux",
33
+ "Iapetus",
34
+ "Kore",
35
+ "Laomedeia",
36
+ "Leda",
37
+ "Orus",
38
+ "Pulcherrima",
39
+ "Puck",
40
+ "Rasalgethi",
41
+ "Sadachbia",
42
+ "Sadaltager",
43
+ "Schedar",
44
+ "Sulafat",
45
+ "Umbriel",
46
+ "Vindemiatrix",
47
+ "Zephyr",
48
+ "Zubenelgenubi",
49
+ ]
18
50
 
19
51
 
20
52
  ClientEvents = Union[
@@ -10,6 +10,7 @@ from collections.abc import Iterator
10
10
  from dataclasses import dataclass, field
11
11
  from typing import Literal
12
12
 
13
+ from google.auth._default_async import default_async
13
14
  from google.genai import Client as GenAIClient, types
14
15
  from google.genai.live import AsyncSession
15
16
  from livekit import rtc
@@ -23,11 +24,11 @@ from livekit.agents.types import (
23
24
  NotGivenOr,
24
25
  )
25
26
  from livekit.agents.utils import audio as audio_utils, images, is_given
26
- from livekit.plugins.google.beta.realtime.api_proto import ClientEvents, LiveAPIModels, Voice
27
+ from livekit.plugins.google.realtime.api_proto import ClientEvents, LiveAPIModels, Voice
27
28
 
28
- from ...log import logger
29
- from ...tools import _LLMTool
30
- from ...utils import create_tools_config, get_tool_results_for_realtime, to_fnc_ctx
29
+ from ..log import logger
30
+ from ..tools import _LLMTool
31
+ from ..utils import create_tools_config, get_tool_results_for_realtime, to_fnc_ctx
31
32
 
32
33
  INPUT_AUDIO_SAMPLE_RATE = 16000
33
34
  INPUT_AUDIO_CHANNELS = 1
@@ -78,6 +79,7 @@ class _RealtimeOptions:
78
79
  gemini_tools: NotGivenOr[list[_LLMTool]] = NOT_GIVEN
79
80
  tool_behavior: NotGivenOr[types.Behavior] = NOT_GIVEN
80
81
  tool_response_scheduling: NotGivenOr[types.FunctionResponseScheduling] = NOT_GIVEN
82
+ thinking_config: NotGivenOr[types.ThinkingConfig] = NOT_GIVEN
81
83
 
82
84
 
83
85
  @dataclass
@@ -144,6 +146,7 @@ class RealtimeModel(llm.RealtimeModel):
144
146
  conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
145
147
  http_options: NotGivenOr[types.HttpOptions] = NOT_GIVEN,
146
148
  _gemini_tools: NotGivenOr[list[_LLMTool]] = NOT_GIVEN,
149
+ thinking_config: NotGivenOr[types.ThinkingConfig] = NOT_GIVEN,
147
150
  ) -> None:
148
151
  """
149
152
  Initializes a RealtimeModel instance for interacting with Google's Realtime API.
@@ -180,6 +183,7 @@ class RealtimeModel(llm.RealtimeModel):
180
183
  context_window_compression (ContextWindowCompressionConfig, optional): The configuration for context window compression. Defaults to None.
181
184
  tool_behavior (Behavior, optional): The behavior for tool call. Default behavior is BLOCK in Gemini Realtime API.
182
185
  tool_response_scheduling (FunctionResponseScheduling, optional): The scheduling for tool response. Default scheduling is WHEN_IDLE.
186
+ thinking_config (ThinkingConfig, optional): Native audio thinking configuration.
183
187
  conn_options (APIConnectOptions, optional): The configuration for the API connection. Defaults to DEFAULT_API_CONNECT_OPTIONS.
184
188
  _gemini_tools (list[LLMTool], optional): Gemini-specific tools to use for the session. This parameter is experimental and may change.
185
189
 
@@ -232,6 +236,10 @@ class RealtimeModel(llm.RealtimeModel):
232
236
  )
233
237
 
234
238
  if use_vertexai:
239
+ if not gcp_project:
240
+ _, gcp_project = default_async( # type: ignore
241
+ scopes=["https://www.googleapis.com/auth/cloud-platform"]
242
+ )
235
243
  if not gcp_project or not gcp_location:
236
244
  raise ValueError(
237
245
  "Project is required for VertexAI via project kwarg or GOOGLE_CLOUD_PROJECT environment variable" # noqa: E501
@@ -274,6 +282,7 @@ class RealtimeModel(llm.RealtimeModel):
274
282
  tool_behavior=tool_behavior,
275
283
  conn_options=conn_options,
276
284
  http_options=http_options,
285
+ thinking_config=thinking_config,
277
286
  )
278
287
 
279
288
  self._sessions = weakref.WeakSet[RealtimeSession]()
@@ -510,7 +519,12 @@ class RealtimeSession(llm.RealtimeSession):
510
519
  for f in self._resample_audio(frame):
511
520
  for nf in self._bstream.write(f.data.tobytes()):
512
521
  realtime_input = types.LiveClientRealtimeInput(
513
- media_chunks=[types.Blob(data=nf.data.tobytes(), mime_type="audio/pcm")]
522
+ media_chunks=[
523
+ types.Blob(
524
+ data=nf.data.tobytes(),
525
+ mime_type=f"audio/pcm;rate={INPUT_AUDIO_SAMPLE_RATE}",
526
+ )
527
+ ]
514
528
  )
515
529
  self._send_client_event(realtime_input)
516
530
 
@@ -814,6 +828,9 @@ class RealtimeSession(llm.RealtimeSession):
814
828
  frequency_penalty=self._opts.frequency_penalty
815
829
  if is_given(self._opts.frequency_penalty)
816
830
  else None,
831
+ thinking_config=self._opts.thinking_config
832
+ if is_given(self._opts.thinking_config)
833
+ else None,
817
834
  ),
818
835
  system_instruction=types.Content(parts=[types.Part(text=self._opts.instructions)])
819
836
  if is_given(self._opts.instructions)
@@ -618,17 +618,28 @@ def _streaming_recognize_response_to_speech_data(
618
618
  ) -> stt.SpeechData | None:
619
619
  text = ""
620
620
  confidence = 0.0
621
+ final_result = None
621
622
  for result in resp.results:
622
623
  if len(result.alternatives) == 0:
623
624
  continue
624
- text += result.alternatives[0].transcript
625
- confidence += result.alternatives[0].confidence
626
-
627
- confidence /= len(resp.results)
628
- lg = resp.results[0].language_code
625
+ else:
626
+ if result.is_final:
627
+ final_result = result
628
+ break
629
+ else:
630
+ text += result.alternatives[0].transcript
631
+ confidence += result.alternatives[0].confidence
632
+
633
+ if final_result is not None:
634
+ text = final_result.alternatives[0].transcript
635
+ confidence = final_result.alternatives[0].confidence
636
+ lg = final_result.language_code
637
+ else:
638
+ confidence /= len(resp.results)
639
+ if confidence < min_confidence_threshold:
640
+ return None
641
+ lg = resp.results[0].language_code
629
642
 
630
- if confidence < min_confidence_threshold:
631
- return None
632
643
  if text == "":
633
644
  return None
634
645
 
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.2.14"
15
+ __version__ = "1.2.16"
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-google
3
- Version: 1.2.14
3
+ Version: 1.2.16
4
4
  Summary: Agent Framework plugin for services from Google Cloud
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
7
7
  Project-URL: Source, https://github.com/livekit/agents
8
8
  Author: LiveKit
9
9
  License-Expression: Apache-2.0
10
- Keywords: audio,livekit,realtime,video,webrtc
10
+ Keywords: ai,audio,gemini,google,livekit,realtime,video,voice
11
11
  Classifier: Intended Audience :: Developers
12
12
  Classifier: License :: OSI Approved :: Apache Software License
13
13
  Classifier: Programming Language :: Python :: 3
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
22
22
  Requires-Dist: google-cloud-speech<3,>=2
23
23
  Requires-Dist: google-cloud-texttospeech<3,>=2.27
24
24
  Requires-Dist: google-genai>=v1.23.0
25
- Requires-Dist: livekit-agents>=1.2.14
25
+ Requires-Dist: livekit-agents>=1.2.16
26
26
  Description-Content-Type: text/markdown
27
27
 
28
28
  # Google AI plugin for LiveKit Agents
@@ -0,0 +1,18 @@
1
+ livekit/plugins/google/__init__.py,sha256=bYHN04-Ttynj09POAnFP3mln-wrEc1vanUD_YpoWOE4,1434
2
+ livekit/plugins/google/llm.py,sha256=M2v1sUJVVNtmOOJvuWhHsGygQlCJo73pSyrwVxdjzcA,19198
3
+ livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
4
+ livekit/plugins/google/models.py,sha256=jsXHLSCDw-T5dZXeDE2nMT2lr0GooCYO4y4aW7Htps4,2816
5
+ livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ livekit/plugins/google/stt.py,sha256=fsWoNnpjgLxqY43cx6GbRI-_QLvXmMvD4WczJFjnoOA,26846
7
+ livekit/plugins/google/tools.py,sha256=tD5HVDHO5JfUF029Cx3axHMJec0Gxalkl7s1FDgxLzI,259
8
+ livekit/plugins/google/tts.py,sha256=2Ba4HjAc9RWYL3W4Z2586Ir3bYQGdSH2gfxSR7VsyY4,17454
9
+ livekit/plugins/google/utils.py,sha256=tFByjJ357A1WdCPwBQC4JABR9G5kxX0g7_FuWAIxix4,10002
10
+ livekit/plugins/google/version.py,sha256=6RxW2Q7KoSNRlDtulIUp5F0_o0atksX-Xpp45NaSCaI,601
11
+ livekit/plugins/google/beta/__init__.py,sha256=4q5dx-Y6o9peCDziB03Skf5ngH4PTBsZC86ZawWrgnk,271
12
+ livekit/plugins/google/beta/gemini_tts.py,sha256=SpKorOteQ7GYoGWsxV5YPuGeMexoosmtDXQVz_1ZeLA,8743
13
+ livekit/plugins/google/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
14
+ livekit/plugins/google/realtime/api_proto.py,sha256=oXKKlf0soMK_MA4LcqP8R5iPgpZvmqjb9KxHJFaBpgk,1261
15
+ livekit/plugins/google/realtime/realtime_api.py,sha256=mePWYDAdb3i9l5a-m7JwmTuU_mbtYngR-XRvOYrqxe4,51109
16
+ livekit_plugins_google-1.2.16.dist-info/METADATA,sha256=bBUYb-Z4kIrrK1s3ThbGr-PCt09zTkXuseOQhMjw3eA,1925
17
+ livekit_plugins_google-1.2.16.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
+ livekit_plugins_google-1.2.16.dist-info/RECORD,,
@@ -1,18 +0,0 @@
1
- livekit/plugins/google/__init__.py,sha256=XIyZ-iFnRBpaLtOJgVwojlB-a8GjdDugVFcjBpMEww8,1412
2
- livekit/plugins/google/llm.py,sha256=u9ZSSkdouPk0018UdiLfgthgTjjLLrXgseX1zrkeg64,18962
3
- livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
4
- livekit/plugins/google/models.py,sha256=poOvUBvgpqmmQV5EUQsq0RgNIRAq7nH-_IZIcIfPSBI,2801
5
- livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- livekit/plugins/google/stt.py,sha256=i99gqXAvYeuhdJ8wh6UlOqLXj6f5_cIni71EwSR4FGw,26467
7
- livekit/plugins/google/tools.py,sha256=tD5HVDHO5JfUF029Cx3axHMJec0Gxalkl7s1FDgxLzI,259
8
- livekit/plugins/google/tts.py,sha256=2Ba4HjAc9RWYL3W4Z2586Ir3bYQGdSH2gfxSR7VsyY4,17454
9
- livekit/plugins/google/utils.py,sha256=tFByjJ357A1WdCPwBQC4JABR9G5kxX0g7_FuWAIxix4,10002
10
- livekit/plugins/google/version.py,sha256=3ilX8YY1RDy023FFuAB6eNeNvQ8-OJdFmtdMR-6h7Y8,601
11
- livekit/plugins/google/beta/__init__.py,sha256=RvAUdvEiRN-fe4JrgPcN0Jkw1kZR9wPerGMFVjS1Cc0,270
12
- livekit/plugins/google/beta/gemini_tts.py,sha256=SpKorOteQ7GYoGWsxV5YPuGeMexoosmtDXQVz_1ZeLA,8743
13
- livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
14
- livekit/plugins/google/beta/realtime/api_proto.py,sha256=nb_QkVQDEH7h0SKA9vdS3JaL12a6t2Z1ja4SdnxE6a8,814
15
- livekit/plugins/google/beta/realtime/realtime_api.py,sha256=bvGLk75j6mO870PYLTZh2W3xY5IxuFkjGevltY2BhQA,50294
16
- livekit_plugins_google-1.2.14.dist-info/METADATA,sha256=BNs2mhT68ao86W-TD9s67SXy-T7sWrLDT-FAOr5TC2o,1909
17
- livekit_plugins_google-1.2.14.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
- livekit_plugins_google-1.2.14.dist-info/RECORD,,