livekit-plugins-google 1.3.8__py3-none-any.whl → 1.3.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,14 +19,22 @@ Supports Gemini, Cloud Speech-to-Text, and Cloud Text-to-Speech.
19
19
  See https://docs.livekit.io/agents/integrations/stt/google/ for more information.
20
20
  """
21
21
 
22
- from . import beta, realtime
22
+ from . import beta, realtime, tools
23
23
  from .llm import LLM
24
24
  from .stt import STT, SpeechStream
25
- from .tools import _LLMTool
26
25
  from .tts import TTS
27
26
  from .version import __version__
28
27
 
29
- __all__ = ["STT", "TTS", "realtime", "SpeechStream", "__version__", "beta", "LLM", "_LLMTool"]
28
+ __all__ = [
29
+ "STT",
30
+ "TTS",
31
+ "realtime",
32
+ "SpeechStream",
33
+ "__version__",
34
+ "beta",
35
+ "LLM",
36
+ "tools",
37
+ ]
30
38
  from livekit.agents import Plugin
31
39
 
32
40
  from .log import logger
@@ -24,13 +24,7 @@ from google.auth._default_async import default_async
24
24
  from google.genai import Client, types
25
25
  from google.genai.errors import APIError, ClientError, ServerError
26
26
  from livekit.agents import APIConnectionError, APIStatusError, llm, utils
27
- from livekit.agents.llm import FunctionTool, RawFunctionTool, ToolChoice, utils as llm_utils
28
- from livekit.agents.llm.tool_context import (
29
- get_function_info,
30
- get_raw_function_info,
31
- is_function_tool,
32
- is_raw_function_tool,
33
- )
27
+ from livekit.agents.llm import ToolChoice, utils as llm_utils
34
28
  from livekit.agents.types import (
35
29
  DEFAULT_API_CONNECT_OPTIONS,
36
30
  NOT_GIVEN,
@@ -41,11 +35,20 @@ from livekit.agents.utils import is_given
41
35
 
42
36
  from .log import logger
43
37
  from .models import ChatModels
44
- from .tools import _LLMTool
45
- from .utils import create_tools_config, to_fnc_ctx, to_response_format
38
+ from .utils import create_tools_config, to_response_format
46
39
  from .version import __version__
47
40
 
48
41
 
42
+ def _is_gemini_3_model(model: str) -> bool:
43
+ """Check if model is Gemini 3 series"""
44
+ return "gemini-3" in model.lower() or model.lower().startswith("gemini-3")
45
+
46
+
47
+ def _is_gemini_3_flash_model(model: str) -> bool:
48
+ """Check if model is Gemini 3 Flash"""
49
+ return "gemini-3-flash" in model.lower() or model.lower().startswith("gemini-3-flash")
50
+
51
+
49
52
  @dataclass
50
53
  class _LLMOptions:
51
54
  model: ChatModels | str
@@ -60,8 +63,8 @@ class _LLMOptions:
60
63
  presence_penalty: NotGivenOr[float]
61
64
  frequency_penalty: NotGivenOr[float]
62
65
  thinking_config: NotGivenOr[types.ThinkingConfigOrDict]
66
+ retrieval_config: NotGivenOr[types.RetrievalConfigOrDict]
63
67
  automatic_function_calling_config: NotGivenOr[types.AutomaticFunctionCallingConfigOrDict]
64
- gemini_tools: NotGivenOr[list[_LLMTool]]
65
68
  http_options: NotGivenOr[types.HttpOptions]
66
69
  seed: NotGivenOr[int]
67
70
  safety_settings: NotGivenOr[list[types.SafetySettingOrDict]]
@@ -81,7 +84,7 @@ class LLM(llm.LLM):
81
84
  def __init__(
82
85
  self,
83
86
  *,
84
- model: ChatModels | str = "gemini-2.0-flash-001",
87
+ model: ChatModels | str = "gemini-2.5-flash",
85
88
  api_key: NotGivenOr[str] = NOT_GIVEN,
86
89
  vertexai: NotGivenOr[bool] = NOT_GIVEN,
87
90
  project: NotGivenOr[str] = NOT_GIVEN,
@@ -94,10 +97,10 @@ class LLM(llm.LLM):
94
97
  frequency_penalty: NotGivenOr[float] = NOT_GIVEN,
95
98
  tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
96
99
  thinking_config: NotGivenOr[types.ThinkingConfigOrDict] = NOT_GIVEN,
100
+ retrieval_config: NotGivenOr[types.RetrievalConfigOrDict] = NOT_GIVEN,
97
101
  automatic_function_calling_config: NotGivenOr[
98
102
  types.AutomaticFunctionCallingConfigOrDict
99
103
  ] = NOT_GIVEN,
100
- gemini_tools: NotGivenOr[list[_LLMTool]] = NOT_GIVEN,
101
104
  http_options: NotGivenOr[types.HttpOptions] = NOT_GIVEN,
102
105
  seed: NotGivenOr[int] = NOT_GIVEN,
103
106
  safety_settings: NotGivenOr[list[types.SafetySettingOrDict]] = NOT_GIVEN,
@@ -126,8 +129,8 @@ class LLM(llm.LLM):
126
129
  frequency_penalty (float, optional): Penalizes the model for repeating words. Defaults to None.
127
130
  tool_choice (ToolChoice, optional): Specifies whether to use tools during response generation. Defaults to "auto".
128
131
  thinking_config (ThinkingConfigOrDict, optional): The thinking configuration for response generation. Defaults to None.
132
+ retrieval_config (RetrievalConfigOrDict, optional): The retrieval configuration for response generation. Defaults to None.
129
133
  automatic_function_calling_config (AutomaticFunctionCallingConfigOrDict, optional): The automatic function calling configuration for response generation. Defaults to None.
130
- gemini_tools (list[LLMTool], optional): The Gemini-specific tools to use for the session.
131
134
  http_options (HttpOptions, optional): The HTTP options to use for the session.
132
135
  seed (int, optional): Random seed for reproducible generation. Defaults to None.
133
136
  safety_settings (list[SafetySettingOrDict], optional): Safety settings for content filtering. Defaults to None.
@@ -168,10 +171,13 @@ class LLM(llm.LLM):
168
171
  # Validate thinking_config
169
172
  if is_given(thinking_config):
170
173
  _thinking_budget = None
174
+ _thinking_level = None
171
175
  if isinstance(thinking_config, dict):
172
176
  _thinking_budget = thinking_config.get("thinking_budget")
177
+ _thinking_level = thinking_config.get("thinking_level")
173
178
  elif isinstance(thinking_config, types.ThinkingConfig):
174
179
  _thinking_budget = thinking_config.thinking_budget
180
+ _thinking_level = getattr(thinking_config, "thinking_level", None)
175
181
 
176
182
  if _thinking_budget is not None:
177
183
  if not isinstance(_thinking_budget, int):
@@ -190,8 +196,8 @@ class LLM(llm.LLM):
190
196
  presence_penalty=presence_penalty,
191
197
  frequency_penalty=frequency_penalty,
192
198
  thinking_config=thinking_config,
199
+ retrieval_config=retrieval_config,
193
200
  automatic_function_calling_config=automatic_function_calling_config,
194
- gemini_tools=gemini_tools,
195
201
  http_options=http_options,
196
202
  seed=seed,
197
203
  safety_settings=safety_settings,
@@ -202,6 +208,8 @@ class LLM(llm.LLM):
202
208
  project=gcp_project,
203
209
  location=gcp_location,
204
210
  )
211
+ # Store thought_signatures for Gemini 3 multi-turn function calling
212
+ self._thought_signatures: dict[str, bytes] = {}
205
213
 
206
214
  @property
207
215
  def model(self) -> str:
@@ -218,7 +226,7 @@ class LLM(llm.LLM):
218
226
  self,
219
227
  *,
220
228
  chat_ctx: llm.ChatContext,
221
- tools: list[FunctionTool | RawFunctionTool] | None = None,
229
+ tools: list[llm.Tool] | None = None,
222
230
  conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
223
231
  parallel_tool_calls: NotGivenOr[bool] = NOT_GIVEN,
224
232
  tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
@@ -226,7 +234,6 @@ class LLM(llm.LLM):
226
234
  types.SchemaUnion | type[llm_utils.ResponseFormatT]
227
235
  ] = NOT_GIVEN,
228
236
  extra_kwargs: NotGivenOr[dict[str, Any]] = NOT_GIVEN,
229
- gemini_tools: NotGivenOr[list[_LLMTool]] = NOT_GIVEN,
230
237
  ) -> LLMStream:
231
238
  extra = {}
232
239
 
@@ -236,6 +243,12 @@ class LLM(llm.LLM):
236
243
  tool_choice = (
237
244
  cast(ToolChoice, tool_choice) if is_given(tool_choice) else self._opts.tool_choice
238
245
  )
246
+ retrieval_config = (
247
+ self._opts.retrieval_config if is_given(self._opts.retrieval_config) else None
248
+ )
249
+ if isinstance(retrieval_config, dict):
250
+ retrieval_config = types.RetrievalConfig.model_validate(retrieval_config)
251
+
239
252
  if is_given(tool_choice):
240
253
  gemini_tool_choice: types.ToolConfig
241
254
  if isinstance(tool_choice, dict) and tool_choice.get("type") == "function":
@@ -243,38 +256,44 @@ class LLM(llm.LLM):
243
256
  function_calling_config=types.FunctionCallingConfig(
244
257
  mode=types.FunctionCallingConfigMode.ANY,
245
258
  allowed_function_names=[tool_choice["function"]["name"]],
246
- )
259
+ ),
260
+ retrieval_config=retrieval_config,
247
261
  )
248
262
  extra["tool_config"] = gemini_tool_choice
249
263
  elif tool_choice == "required":
250
264
  tool_names = []
251
265
  for tool in tools or []:
252
- if is_function_tool(tool):
253
- tool_names.append(get_function_info(tool).name)
254
- elif is_raw_function_tool(tool):
255
- tool_names.append(get_raw_function_info(tool).name)
266
+ if isinstance(tool, (llm.FunctionTool, llm.RawFunctionTool)):
267
+ tool_names.append(tool.info.name)
256
268
 
257
269
  gemini_tool_choice = types.ToolConfig(
258
270
  function_calling_config=types.FunctionCallingConfig(
259
271
  mode=types.FunctionCallingConfigMode.ANY,
260
272
  allowed_function_names=tool_names or None,
261
- )
273
+ ),
274
+ retrieval_config=retrieval_config,
262
275
  )
263
276
  extra["tool_config"] = gemini_tool_choice
264
277
  elif tool_choice == "auto":
265
278
  gemini_tool_choice = types.ToolConfig(
266
279
  function_calling_config=types.FunctionCallingConfig(
267
280
  mode=types.FunctionCallingConfigMode.AUTO,
268
- )
281
+ ),
282
+ retrieval_config=retrieval_config,
269
283
  )
270
284
  extra["tool_config"] = gemini_tool_choice
271
285
  elif tool_choice == "none":
272
286
  gemini_tool_choice = types.ToolConfig(
273
287
  function_calling_config=types.FunctionCallingConfig(
274
288
  mode=types.FunctionCallingConfigMode.NONE,
275
- )
289
+ ),
290
+ retrieval_config=retrieval_config,
276
291
  )
277
292
  extra["tool_config"] = gemini_tool_choice
293
+ elif retrieval_config:
294
+ extra["tool_config"] = types.ToolConfig(
295
+ retrieval_config=retrieval_config,
296
+ )
278
297
 
279
298
  if is_given(response_format):
280
299
  extra["response_schema"] = to_response_format(response_format) # type: ignore
@@ -295,9 +314,51 @@ class LLM(llm.LLM):
295
314
  if is_given(self._opts.seed):
296
315
  extra["seed"] = self._opts.seed
297
316
 
298
- # Add thinking config if thinking_budget is provided
317
+ # Handle thinking_config based on model version
299
318
  if is_given(self._opts.thinking_config):
300
- extra["thinking_config"] = self._opts.thinking_config
319
+ is_gemini_3 = _is_gemini_3_model(self._opts.model)
320
+ is_gemini_3_flash = _is_gemini_3_flash_model(self._opts.model)
321
+ thinking_cfg = self._opts.thinking_config
322
+
323
+ # Extract both parameters
324
+ _budget = None
325
+ _level = None
326
+ if isinstance(thinking_cfg, dict):
327
+ _budget = thinking_cfg.get("thinking_budget")
328
+ _level = thinking_cfg.get("thinking_level")
329
+ elif isinstance(thinking_cfg, types.ThinkingConfig):
330
+ _budget = thinking_cfg.thinking_budget
331
+ _level = getattr(thinking_cfg, "thinking_level", None)
332
+
333
+ if is_gemini_3:
334
+ # Gemini 3: only support thinking_level
335
+ if _budget is not None and _level is None:
336
+ logger.warning(
337
+ f"Model {self._opts.model} is Gemini 3 which does not support thinking_budget. "
338
+ "Please use thinking_level ('low' or 'high') instead. Ignoring thinking_budget."
339
+ )
340
+ if _level is None:
341
+ # If no thinking_level is provided, use the fastest thinking level
342
+ if is_gemini_3_flash:
343
+ _level = "minimal"
344
+ else:
345
+ _level = "low"
346
+ # Use thinking_level only (pass as dict since SDK may not have this field yet)
347
+ extra["thinking_config"] = {"thinking_level": _level}
348
+
349
+ else:
350
+ # Gemini 2.5 and earlier: only support thinking_budget
351
+ if _level is not None and _budget is None:
352
+ raise ValueError(
353
+ f"Model {self._opts.model} does not support thinking_level. "
354
+ "Please use thinking_budget (int) instead for Gemini 2.5 and earlier models."
355
+ )
356
+ if _budget is not None:
357
+ # Use thinking_budget only
358
+ extra["thinking_config"] = types.ThinkingConfig(thinking_budget=_budget)
359
+ else:
360
+ # Pass through original config if no specific handling needed
361
+ extra["thinking_config"] = self._opts.thinking_config
301
362
 
302
363
  if is_given(self._opts.automatic_function_calling_config):
303
364
  extra["automatic_function_calling"] = self._opts.automatic_function_calling_config
@@ -305,8 +366,6 @@ class LLM(llm.LLM):
305
366
  if is_given(self._opts.safety_settings):
306
367
  extra["safety_settings"] = self._opts.safety_settings
307
368
 
308
- gemini_tools = gemini_tools if is_given(gemini_tools) else self._opts.gemini_tools
309
-
310
369
  return LLMStream(
311
370
  self,
312
371
  client=self._client,
@@ -314,7 +373,6 @@ class LLM(llm.LLM):
314
373
  chat_ctx=chat_ctx,
315
374
  tools=tools or [],
316
375
  conn_options=conn_options,
317
- gemini_tools=gemini_tools,
318
376
  extra_kwargs=extra,
319
377
  )
320
378
 
@@ -322,35 +380,38 @@ class LLM(llm.LLM):
322
380
  class LLMStream(llm.LLMStream):
323
381
  def __init__(
324
382
  self,
325
- llm: LLM,
383
+ llm_v: LLM,
326
384
  *,
327
385
  client: Client,
328
386
  model: str | ChatModels,
329
387
  chat_ctx: llm.ChatContext,
330
388
  conn_options: APIConnectOptions,
331
- tools: list[FunctionTool | RawFunctionTool],
389
+ tools: list[llm.Tool],
332
390
  extra_kwargs: dict[str, Any],
333
- gemini_tools: NotGivenOr[list[_LLMTool]] = NOT_GIVEN,
334
391
  ) -> None:
335
- super().__init__(llm, chat_ctx=chat_ctx, tools=tools, conn_options=conn_options)
392
+ super().__init__(llm_v, chat_ctx=chat_ctx, tools=tools, conn_options=conn_options)
336
393
  self._client = client
337
394
  self._model = model
338
- self._llm: LLM = llm
395
+ self._llm: LLM = llm_v
339
396
  self._extra_kwargs = extra_kwargs
340
- self._gemini_tools = gemini_tools
397
+ self._tool_ctx = llm.ToolContext(tools)
341
398
 
342
399
  async def _run(self) -> None:
343
400
  retryable = True
344
401
  request_id = utils.shortuuid()
345
402
 
346
403
  try:
347
- turns_dict, extra_data = self._chat_ctx.to_provider_format(format="google")
348
- turns = [types.Content.model_validate(turn) for turn in turns_dict]
349
- function_declarations = to_fnc_ctx(self._tools)
350
- tools_config = create_tools_config(
351
- function_tools=function_declarations,
352
- gemini_tools=self._gemini_tools if is_given(self._gemini_tools) else None,
404
+ # Pass thought_signatures for Gemini 3 multi-turn function calling
405
+ thought_sigs = (
406
+ self._llm._thought_signatures if _is_gemini_3_model(self._model) else None
353
407
  )
408
+ turns_dict, extra_data = self._chat_ctx.to_provider_format(
409
+ format="google", thought_signatures=thought_sigs
410
+ )
411
+
412
+ turns = [types.Content.model_validate(turn) for turn in turns_dict]
413
+ tool_context = llm.ToolContext(self._tools)
414
+ tools_config = create_tools_config(tool_context, _only_single_type=True)
354
415
  if tools_config:
355
416
  self._extra_kwargs["tools"] = tools_config
356
417
  http_options = self._llm._opts.http_options or types.HttpOptions(
@@ -368,31 +429,25 @@ class LLMStream(llm.LLMStream):
368
429
  http_options=http_options,
369
430
  **self._extra_kwargs,
370
431
  )
432
+
371
433
  stream = await self._client.aio.models.generate_content_stream(
372
434
  model=self._model,
373
435
  contents=cast(types.ContentListUnion, turns),
374
436
  config=config,
375
437
  )
376
438
 
439
+ response_generated = False
440
+ finish_reason: types.FinishReason | None = None
377
441
  async for response in stream:
378
442
  if response.prompt_feedback:
379
443
  raise APIStatusError(
380
- response.prompt_feedback.json(),
444
+ response.prompt_feedback.model_dump_json(),
381
445
  retryable=False,
382
446
  request_id=request_id,
383
447
  )
384
448
 
385
- if (
386
- not response.candidates
387
- or not response.candidates[0].content
388
- or not response.candidates[0].content.parts
389
- ):
390
- logger.warning(f"no content in the response: {response}")
391
- raise APIStatusError(
392
- "no content in the response",
393
- retryable=True,
394
- request_id=request_id,
395
- )
449
+ if not response.candidates:
450
+ continue
396
451
 
397
452
  if len(response.candidates) > 1:
398
453
  logger.warning(
@@ -401,35 +456,25 @@ class LLMStream(llm.LLMStream):
401
456
 
402
457
  candidate = response.candidates[0]
403
458
 
404
- if candidate.finish_reason in BLOCKED_REASONS:
405
- raise APIStatusError(
406
- f"generation blocked by gemini: {candidate.finish_reason}",
407
- retryable=False,
408
- request_id=request_id,
409
- )
410
-
411
459
  if not candidate.content or not candidate.content.parts:
412
- raise APIStatusError(
413
- "no content in the response",
414
- retryable=retryable,
415
- request_id=request_id,
416
- )
460
+ continue
461
+
462
+ if candidate.finish_reason is not None:
463
+ finish_reason = candidate.finish_reason
464
+ if candidate.finish_reason in BLOCKED_REASONS:
465
+ raise APIStatusError(
466
+ f"generation blocked by gemini: {candidate.finish_reason}",
467
+ retryable=False,
468
+ request_id=request_id,
469
+ )
417
470
 
418
- chunks_yielded = False
419
471
  for part in candidate.content.parts:
420
472
  chat_chunk = self._parse_part(request_id, part)
473
+ response_generated = True
421
474
  if chat_chunk is not None:
422
- chunks_yielded = True
423
475
  retryable = False
424
476
  self._event_ch.send_nowait(chat_chunk)
425
477
 
426
- if candidate.finish_reason == types.FinishReason.STOP and not chunks_yielded:
427
- raise APIStatusError(
428
- "no response generated",
429
- retryable=retryable,
430
- request_id=request_id,
431
- )
432
-
433
478
  if response.usage_metadata is not None:
434
479
  usage = response.usage_metadata
435
480
  self._event_ch.send_nowait(
@@ -444,6 +489,14 @@ class LLMStream(llm.LLMStream):
444
489
  )
445
490
  )
446
491
 
492
+ if not response_generated:
493
+ raise APIStatusError(
494
+ "no response generated",
495
+ retryable=retryable,
496
+ request_id=request_id,
497
+ body=f"finish reason: {finish_reason}",
498
+ )
499
+
447
500
  except ClientError as e:
448
501
  raise APIStatusError(
449
502
  "gemini llm: client error",
@@ -476,17 +529,25 @@ class LLMStream(llm.LLMStream):
476
529
 
477
530
  def _parse_part(self, id: str, part: types.Part) -> llm.ChatChunk | None:
478
531
  if part.function_call:
532
+ tool_call = llm.FunctionToolCall(
533
+ arguments=json.dumps(part.function_call.args),
534
+ name=part.function_call.name,
535
+ call_id=part.function_call.id or utils.shortuuid("function_call_"),
536
+ )
537
+
538
+ # Store thought_signature for Gemini 3 multi-turn function calling
539
+ if (
540
+ _is_gemini_3_model(self._model)
541
+ and hasattr(part, "thought_signature")
542
+ and part.thought_signature
543
+ ):
544
+ self._llm._thought_signatures[tool_call.call_id] = part.thought_signature
545
+
479
546
  chat_chunk = llm.ChatChunk(
480
547
  id=id,
481
548
  delta=llm.ChoiceDelta(
482
549
  role="assistant",
483
- tool_calls=[
484
- llm.FunctionToolCall(
485
- arguments=json.dumps(part.function_call.args),
486
- name=part.function_call.name,
487
- call_id=part.function_call.id or utils.shortuuid("function_call_"),
488
- )
489
- ],
550
+ tool_calls=[tool_call],
490
551
  content=part.text,
491
552
  ),
492
553
  )
@@ -1,6 +1,6 @@
1
1
  from typing import Literal
2
2
 
3
- # Speech to Text v2
3
+ # Speech to Text (v1 and v2)
4
4
 
5
5
  SpeechModels = Literal[
6
6
  "long",
@@ -14,6 +14,13 @@ SpeechModels = Literal[
14
14
  "latest_long",
15
15
  "latest_short",
16
16
  ]
17
+ # https://docs.cloud.google.com/speech-to-text/docs/transcription-model
18
+
19
+ SpeechModelsV2 = Literal[
20
+ "telephony",
21
+ "chirp_2",
22
+ "chirp_3",
23
+ ]
17
24
 
18
25
  SpeechLanguages = Literal[
19
26
  "af-ZA",
@@ -189,6 +196,9 @@ SpeechLanguages = Literal[
189
196
  Gender = Literal["male", "female", "neutral"]
190
197
 
191
198
  ChatModels = Literal[
199
+ "gemini-3-pro-preview",
200
+ "gemini-3-flash-preview",
201
+ "gemini-2.5-flash",
192
202
  "gemini-2.5-pro-preview-05-06",
193
203
  "gemini-2.5-flash-preview-04-17",
194
204
  "gemini-2.5-flash-preview-05-20",
@@ -197,3 +207,7 @@ ChatModels = Literal[
197
207
  "gemini-2.0-pro-exp-02-05",
198
208
  "gemini-1.5-pro",
199
209
  ]
210
+
211
+ GeminiTTSModels = Literal[
212
+ "gemini-2.5-flash-tts", "gemini-2.5-flash-lite-preview-tts", "gemini-2.5-pro-tts"
213
+ ]
@@ -5,19 +5,21 @@ from typing import Literal, Union
5
5
 
6
6
  from google.genai import types
7
7
 
8
+ # Gemini API deprecations: https://ai.google.dev/gemini-api/docs/deprecations
9
+ # Gemini API release notes with preview deprecations: https://ai.google.dev/gemini-api/docs/changelog
10
+ # live models: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/live-api
11
+ # VertexAI retirement: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/learn/model-versions#retired-models
12
+ # Additional references:
13
+ # 1. https://github.com/kazunori279/adk-streaming-test/blob/main/test_report.md
8
14
  LiveAPIModels = Literal[
9
15
  # VertexAI models
10
- "gemini-live-2.5-flash-native-audio",
11
- "gemini-live-2.5-flash-preview-native-audio",
12
- # deprecated vertexai models
13
- "gemini-2.0-flash-exp",
14
- "gemini-live-2.5-flash-preview-native-audio-09-2025",
16
+ "gemini-live-2.5-flash-native-audio", # GA https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash-live-api#live-2.5-flash
17
+ "gemini-live-2.5-flash-preview-native-audio-09-2025", # Public preview https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash-live-api#live-2.5-flash-preview
18
+ "gemini-live-2.5-flash-preview-native-audio", # still works, possibly an alias, but not mentioned in any docs or changelog
15
19
  # Gemini API models
16
- "gemini-2.5-flash-native-audio-preview-12-2025",
17
- "gemini-live-2.5-flash-preview",
18
- # deprecated Gemini API models
19
- "gemini-2.0-flash-live-001",
20
- "gemini-2.5-flash-native-audio-preview-09-2025",
20
+ "gemini-2.5-flash-native-audio-preview-12-2025", # https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-live
21
+ "gemini-2.5-flash-native-audio-preview-09-2025", # https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-live
22
+ "gemini-2.0-flash-exp", # still works in Gemini API but not VertexAI
21
23
  ]
22
24
 
23
25
  Voice = Literal[