livekit-plugins-google 1.0.13__py3-none-any.whl → 1.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,7 @@ from typing import Literal, Union
5
5
 
6
6
  from google.genai import types
7
7
 
8
- LiveAPIModels = Literal["gemini-2.0-flash-exp"]
8
+ LiveAPIModels = Literal["gemini-2.0-flash-exp", "gemini-2.0-flash-live-001"]
9
9
 
10
10
  Voice = Literal["Puck", "Charon", "Kore", "Fenrir", "Aoede"]
11
11
 
@@ -95,7 +95,7 @@ class RealtimeModel(llm.RealtimeModel):
95
95
  self,
96
96
  *,
97
97
  instructions: NotGivenOr[str] = NOT_GIVEN,
98
- model: LiveAPIModels | str = "gemini-2.0-flash-exp",
98
+ model: LiveAPIModels | str = "gemini-2.0-flash-live-001",
99
99
  api_key: NotGivenOr[str] = NOT_GIVEN,
100
100
  voice: Voice | str = "Puck",
101
101
  modalities: NotGivenOr[list[Modality]] = NOT_GIVEN,
@@ -357,10 +357,10 @@ class RealtimeSession(llm.RealtimeSession):
357
357
  return fut
358
358
 
359
359
  def interrupt(self) -> None:
360
- logger.warning("interrupt() - no direct cancellation in Gemini")
360
+ pass
361
361
 
362
362
  def truncate(self, *, message_id: str, audio_end_ms: int) -> None:
363
- logger.warning(f"truncate(...) called for {message_id}, ignoring for Gemini")
363
+ pass
364
364
 
365
365
  async def aclose(self) -> None:
366
366
  self._msg_ch.close()
@@ -423,8 +423,8 @@ class RealtimeSession(llm.RealtimeSession):
423
423
  async for msg in self._msg_ch:
424
424
  if isinstance(msg, LiveClientContent):
425
425
  await session.send(input=msg, end_of_turn=True)
426
-
427
- await session.send(input=msg)
426
+ else:
427
+ await session.send(input=msg)
428
428
  await session.send(input=".", end_of_turn=True)
429
429
 
430
430
  @utils.log_exceptions(logger=logger)
@@ -543,8 +543,11 @@ class RealtimeSession(llm.RealtimeSession):
543
543
  output_transcription = server_content.output_transcription
544
544
  if output_transcription and output_transcription.text:
545
545
  item_generation.text_ch.send_nowait(output_transcription.text)
546
+ if server_content.interrupted:
547
+ self._finalize_response()
548
+ self._handle_input_speech_started()
546
549
 
547
- if server_content.interrupted or server_content.turn_complete:
550
+ if server_content.turn_complete:
548
551
  self._finalize_response()
549
552
 
550
553
  def _finalize_response(self) -> None:
@@ -560,7 +563,9 @@ class RealtimeSession(llm.RealtimeSession):
560
563
  self._current_generation = None
561
564
  self._is_interrupted = True
562
565
  self._active_response_id = None
563
- self.emit("agent_speech_stopped")
566
+
567
+ def _handle_input_speech_started(self):
568
+ self.emit("input_speech_started", llm.InputSpeechStartedEvent())
564
569
 
565
570
  def _handle_tool_calls(self, tool_call: LiveServerToolCall):
566
571
  if not self._current_generation:
@@ -53,6 +53,7 @@ class _LLMOptions:
53
53
  top_k: NotGivenOr[float]
54
54
  presence_penalty: NotGivenOr[float]
55
55
  frequency_penalty: NotGivenOr[float]
56
+ thinking_config: NotGivenOr[types.ThinkingConfigOrDict]
56
57
 
57
58
 
58
59
  class LLM(llm.LLM):
@@ -71,6 +72,7 @@ class LLM(llm.LLM):
71
72
  presence_penalty: NotGivenOr[float] = NOT_GIVEN,
72
73
  frequency_penalty: NotGivenOr[float] = NOT_GIVEN,
73
74
  tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
75
+ thinking_config: NotGivenOr[types.ThinkingConfigOrDict] = NOT_GIVEN,
74
76
  ) -> None:
75
77
  """
76
78
  Create a new instance of Google GenAI LLM.
@@ -95,6 +97,7 @@ class LLM(llm.LLM):
95
97
  presence_penalty (float, optional): Penalizes the model for generating previously mentioned concepts. Defaults to None.
96
98
  frequency_penalty (float, optional): Penalizes the model for repeating words. Defaults to None.
97
99
  tool_choice (ToolChoice, optional): Specifies whether to use tools during response generation. Defaults to "auto".
100
+ thinking_config (ThinkingConfigOrDict, optional): The thinking configuration for response generation. Defaults to None.
98
101
  """ # noqa: E501
99
102
  super().__init__()
100
103
  gcp_project = project if is_given(project) else os.environ.get("GOOGLE_CLOUD_PROJECT")
@@ -121,6 +124,22 @@ class LLM(llm.LLM):
121
124
  "API key is required for Google API either via api_key or GOOGLE_API_KEY environment variable" # noqa: E501
122
125
  )
123
126
 
127
+ # Validate thinking_config
128
+ if is_given(thinking_config):
129
+ _thinking_budget = None
130
+ if isinstance(thinking_config, dict):
131
+ _thinking_budget = thinking_config.get("thinking_budget")
132
+ elif isinstance(thinking_config, types.ThinkingConfig):
133
+ _thinking_budget = thinking_config.thinking_budget
134
+
135
+ if _thinking_budget is not None:
136
+ if not isinstance(_thinking_budget, int):
137
+ raise ValueError("thinking_budget inside thinking_config must be an integer")
138
+ if not (0 <= _thinking_budget <= 24576):
139
+ raise ValueError(
140
+ "thinking_budget inside thinking_config must be between 0 and 24576"
141
+ )
142
+
124
143
  self._opts = _LLMOptions(
125
144
  model=model,
126
145
  temperature=temperature,
@@ -133,6 +152,7 @@ class LLM(llm.LLM):
133
152
  top_k=top_k,
134
153
  presence_penalty=presence_penalty,
135
154
  frequency_penalty=frequency_penalty,
155
+ thinking_config=thinking_config,
136
156
  )
137
157
  self._client = genai.Client(
138
158
  api_key=gemini_api_key,
@@ -212,6 +232,10 @@ class LLM(llm.LLM):
212
232
  if is_given(self._opts.frequency_penalty):
213
233
  extra["frequency_penalty"] = self._opts.frequency_penalty
214
234
 
235
+ # Add thinking config if thinking_budget is provided
236
+ if is_given(self._opts.thinking_config):
237
+ extra["thinking_config"] = self._opts.thinking_config
238
+
215
239
  return LLMStream(
216
240
  self,
217
241
  client=self._client,
@@ -310,7 +334,7 @@ class LLMStream(llm.LLMStream):
310
334
  raise APIStatusError(
311
335
  "gemini llm: client error",
312
336
  status_code=e.code,
313
- body=e.message + e.status,
337
+ body=f"{e.message} {e.status}",
314
338
  request_id=request_id,
315
339
  retryable=False if e.code != 429 else True,
316
340
  ) from e
@@ -318,7 +342,7 @@ class LLMStream(llm.LLMStream):
318
342
  raise APIStatusError(
319
343
  "gemini llm: server error",
320
344
  status_code=e.code,
321
- body=e.message + e.status,
345
+ body=f"{e.message} {e.status}",
322
346
  request_id=request_id,
323
347
  retryable=retryable,
324
348
  ) from e
@@ -326,7 +350,7 @@ class LLMStream(llm.LLMStream):
326
350
  raise APIStatusError(
327
351
  "gemini llm: api error",
328
352
  status_code=e.code,
329
- body=e.message + e.status,
353
+ body=f"{e.message} {e.status}",
330
354
  request_id=request_id,
331
355
  retryable=retryable,
332
356
  ) from e
@@ -35,6 +35,8 @@ from livekit.agents.types import (
35
35
  )
36
36
  from livekit.agents.utils import is_given
37
37
 
38
+ from .models import Gender, SpeechLanguages
39
+
38
40
 
39
41
  @dataclass
40
42
  class _TTSOptions:
@@ -46,7 +48,9 @@ class TTS(tts.TTS):
46
48
  def __init__(
47
49
  self,
48
50
  *,
49
- voice: NotGivenOr[texttospeech.VoiceSelectionParams] = NOT_GIVEN,
51
+ language: NotGivenOr[SpeechLanguages | str] = NOT_GIVEN,
52
+ gender: NotGivenOr[Gender | str] = NOT_GIVEN,
53
+ voice_name: NotGivenOr[str] = NOT_GIVEN,
50
54
  sample_rate: int = 24000,
51
55
  pitch: int = 0,
52
56
  effects_profile_id: str = "",
@@ -63,7 +67,9 @@ class TTS(tts.TTS):
63
67
  environmental variable.
64
68
 
65
69
  Args:
66
- voice (texttospeech.VoiceSelectionParams, optional): Voice selection parameters.
70
+ language (SpeechLanguages | str, optional): Language code (e.g., "en-US"). Default is "en-US".
71
+ gender (Gender | str, optional): Voice gender ("male", "female", "neutral"). Default is "neutral".
72
+ voice_name (str, optional): Specific voice name. Default is an empty string.
67
73
  sample_rate (int, optional): Audio sample rate in Hz. Default is 24000.
68
74
  location (str, optional): Location for the TTS client. Default is "global".
69
75
  pitch (float, optional): Speaking pitch, ranging from -20.0 to 20.0 semitones relative to the original pitch. Default is 0.
@@ -85,15 +91,19 @@ class TTS(tts.TTS):
85
91
  self._credentials_info = credentials_info
86
92
  self._credentials_file = credentials_file
87
93
  self._location = location
88
- if not is_given(voice):
89
- voice = texttospeech.VoiceSelectionParams(
90
- name="",
91
- language_code="en-US",
92
- ssml_gender=SsmlVoiceGender.NEUTRAL,
93
- )
94
+
95
+ lang = language if is_given(language) else "en-US"
96
+ ssml_gender = _gender_from_str("neutral" if not is_given(gender) else gender)
97
+ name = "" if not is_given(voice_name) else voice_name
98
+
99
+ voice_params = texttospeech.VoiceSelectionParams(
100
+ name=name,
101
+ language_code=lang,
102
+ ssml_gender=ssml_gender,
103
+ )
94
104
 
95
105
  self._opts = _TTSOptions(
96
- voice=voice,
106
+ voice=voice_params,
97
107
  audio_config=texttospeech.AudioConfig(
98
108
  audio_encoding=texttospeech.AudioEncoding.OGG_OPUS,
99
109
  sample_rate_hertz=sample_rate,
@@ -106,18 +116,31 @@ class TTS(tts.TTS):
106
116
  def update_options(
107
117
  self,
108
118
  *,
109
- voice: NotGivenOr[texttospeech.VoiceSelectionParams] = NOT_GIVEN,
119
+ language: NotGivenOr[SpeechLanguages | str] = NOT_GIVEN,
120
+ gender: NotGivenOr[Gender | str] = NOT_GIVEN,
121
+ voice_name: NotGivenOr[str] = NOT_GIVEN,
110
122
  speaking_rate: NotGivenOr[float] = NOT_GIVEN,
111
123
  ) -> None:
112
124
  """
113
125
  Update the TTS options.
114
126
 
115
127
  Args:
116
- voice (texttospeech.VoiceSelectionParams, optional): Voice selection parameters.
128
+ language (SpeechLanguages | str, optional): Language code (e.g., "en-US").
129
+ gender (Gender | str, optional): Voice gender ("male", "female", "neutral").
130
+ voice_name (str, optional): Specific voice name.
117
131
  speaking_rate (float, optional): Speed of speech.
118
132
  """ # noqa: E501
119
- if is_given(voice):
120
- self._opts.voice = voice
133
+ params = {}
134
+ if is_given(language):
135
+ params["language"] = language
136
+ if is_given(gender):
137
+ params["gender"] = gender
138
+ if is_given(voice_name):
139
+ params["voice_name"] = voice_name
140
+
141
+ if params:
142
+ self._opts.voice = texttospeech.VoiceSelectionParams(**params)
143
+
121
144
  if is_given(speaking_rate):
122
145
  self._opts.audio_config.speaking_rate = speaking_rate
123
146
 
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.0.13"
15
+ __version__ = "1.0.15"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-google
3
- Version: 1.0.13
3
+ Version: 1.0.15
4
4
  Summary: Agent Framework plugin for services from Google Cloud
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
22
22
  Requires-Dist: google-cloud-speech<3,>=2
23
23
  Requires-Dist: google-cloud-texttospeech<3,>=2
24
24
  Requires-Dist: google-genai>=1.10.0
25
- Requires-Dist: livekit-agents>=1.0.13
25
+ Requires-Dist: livekit-agents>=1.0.15
26
26
  Description-Content-Type: text/markdown
27
27
 
28
28
  # LiveKit Plugins Google
@@ -1,16 +1,16 @@
1
1
  livekit/plugins/google/__init__.py,sha256=e_kSlFNmKhyyeliz7f4WOKc_Y0-y39QjO5nCWuguhss,1171
2
- livekit/plugins/google/llm.py,sha256=yAm-to2ItTJ7dAHc-2mlPeI0Npz9ZxRdyuRLV8PINqg,14888
2
+ livekit/plugins/google/llm.py,sha256=SqNGg6-wlrIUo9uaismP7QW5XztkXyDivJXLVgOIZMI,16175
3
3
  livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
4
4
  livekit/plugins/google/models.py,sha256=SGjAumdDK97NNLwMFcqZdKR68f1NoGB2Rk1UP2-imG0,1457
5
5
  livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  livekit/plugins/google/stt.py,sha256=AG_lh2fuuduJi0jFbA_QKFXLJ6NUdF1W_FfkLUJML_Q,22413
7
- livekit/plugins/google/tts.py,sha256=P8Zu2s0TfmyzlrNxzDIqyn3sGiNSW0n3nB_JlO_ojiM,7985
7
+ livekit/plugins/google/tts.py,sha256=xhINokqY8UutXn85N-cbzq68eptbM6TTtIXmLktE_RM,9004
8
8
  livekit/plugins/google/utils.py,sha256=pbLSOAdQxInWhgI2Yhsrr9KvgvpFXYDdU2yx2p03pFg,9437
9
- livekit/plugins/google/version.py,sha256=i9Tq4ZlIN5uba7xHRxp31dxAE9NuzqobM8zWhdM4QgA,601
9
+ livekit/plugins/google/version.py,sha256=wHPUkZRYx-OB6iDuwTmMNVVQXU9eg5xFSjgmKBqqwd4,601
10
10
  livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
11
11
  livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
12
- livekit/plugins/google/beta/realtime/api_proto.py,sha256=cwpFOYjN_3v5PMY0TnzoHhJoASfZ7Qt9IO281ZhJ7Ww,565
13
- livekit/plugins/google/beta/realtime/realtime_api.py,sha256=JBEEOeTl6gv6Fe6GtYJjj9C-dqvfhWpOzNAa0tnTKgM,25002
14
- livekit_plugins_google-1.0.13.dist-info/METADATA,sha256=u8ocRjsu24AzO_FRgqYZzDqc3gKnQGp1hprKBc3RFm4,3492
15
- livekit_plugins_google-1.0.13.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
16
- livekit_plugins_google-1.0.13.dist-info/RECORD,,
12
+ livekit/plugins/google/beta/realtime/api_proto.py,sha256=Fyrejs3SG0EjOPCCFLEnWXKEUxCff47PMWk2VsKJm5E,594
13
+ livekit/plugins/google/beta/realtime/realtime_api.py,sha256=HvPYyQXC9OodWaDNxbRt1UAJ8IVdXZGK-PsIEr7UwbY,25078
14
+ livekit_plugins_google-1.0.15.dist-info/METADATA,sha256=wMOLBkgHx_fJ0o5s8URB7Ev6yEg2jhKHhb0OlH1_7p4,3492
15
+ livekit_plugins_google-1.0.15.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
16
+ livekit_plugins_google-1.0.15.dist-info/RECORD,,