livekit-plugins-google 1.3.8__py3-none-any.whl → 1.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/google/__init__.py +11 -3
- livekit/plugins/google/llm.py +142 -81
- livekit/plugins/google/models.py +15 -1
- livekit/plugins/google/realtime/api_proto.py +12 -10
- livekit/plugins/google/realtime/realtime_api.py +25 -28
- livekit/plugins/google/stt.py +281 -93
- livekit/plugins/google/tools.py +69 -9
- livekit/plugins/google/tts.py +17 -9
- livekit/plugins/google/utils.py +21 -87
- livekit/plugins/google/version.py +1 -1
- {livekit_plugins_google-1.3.8.dist-info → livekit_plugins_google-1.3.11.dist-info}/METADATA +1 -1
- livekit_plugins_google-1.3.11.dist-info/RECORD +18 -0
- livekit_plugins_google-1.3.8.dist-info/RECORD +0 -18
- {livekit_plugins_google-1.3.8.dist-info → livekit_plugins_google-1.3.11.dist-info}/WHEEL +0 -0
|
@@ -19,14 +19,22 @@ Supports Gemini, Cloud Speech-to-Text, and Cloud Text-to-Speech.
|
|
|
19
19
|
See https://docs.livekit.io/agents/integrations/stt/google/ for more information.
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
-
from . import beta, realtime
|
|
22
|
+
from . import beta, realtime, tools
|
|
23
23
|
from .llm import LLM
|
|
24
24
|
from .stt import STT, SpeechStream
|
|
25
|
-
from .tools import _LLMTool
|
|
26
25
|
from .tts import TTS
|
|
27
26
|
from .version import __version__
|
|
28
27
|
|
|
29
|
-
__all__ = [
|
|
28
|
+
__all__ = [
|
|
29
|
+
"STT",
|
|
30
|
+
"TTS",
|
|
31
|
+
"realtime",
|
|
32
|
+
"SpeechStream",
|
|
33
|
+
"__version__",
|
|
34
|
+
"beta",
|
|
35
|
+
"LLM",
|
|
36
|
+
"tools",
|
|
37
|
+
]
|
|
30
38
|
from livekit.agents import Plugin
|
|
31
39
|
|
|
32
40
|
from .log import logger
|
livekit/plugins/google/llm.py
CHANGED
|
@@ -24,13 +24,7 @@ from google.auth._default_async import default_async
|
|
|
24
24
|
from google.genai import Client, types
|
|
25
25
|
from google.genai.errors import APIError, ClientError, ServerError
|
|
26
26
|
from livekit.agents import APIConnectionError, APIStatusError, llm, utils
|
|
27
|
-
from livekit.agents.llm import
|
|
28
|
-
from livekit.agents.llm.tool_context import (
|
|
29
|
-
get_function_info,
|
|
30
|
-
get_raw_function_info,
|
|
31
|
-
is_function_tool,
|
|
32
|
-
is_raw_function_tool,
|
|
33
|
-
)
|
|
27
|
+
from livekit.agents.llm import ToolChoice, utils as llm_utils
|
|
34
28
|
from livekit.agents.types import (
|
|
35
29
|
DEFAULT_API_CONNECT_OPTIONS,
|
|
36
30
|
NOT_GIVEN,
|
|
@@ -41,11 +35,20 @@ from livekit.agents.utils import is_given
|
|
|
41
35
|
|
|
42
36
|
from .log import logger
|
|
43
37
|
from .models import ChatModels
|
|
44
|
-
from .
|
|
45
|
-
from .utils import create_tools_config, to_fnc_ctx, to_response_format
|
|
38
|
+
from .utils import create_tools_config, to_response_format
|
|
46
39
|
from .version import __version__
|
|
47
40
|
|
|
48
41
|
|
|
42
|
+
def _is_gemini_3_model(model: str) -> bool:
|
|
43
|
+
"""Check if model is Gemini 3 series"""
|
|
44
|
+
return "gemini-3" in model.lower() or model.lower().startswith("gemini-3")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _is_gemini_3_flash_model(model: str) -> bool:
|
|
48
|
+
"""Check if model is Gemini 3 Flash"""
|
|
49
|
+
return "gemini-3-flash" in model.lower() or model.lower().startswith("gemini-3-flash")
|
|
50
|
+
|
|
51
|
+
|
|
49
52
|
@dataclass
|
|
50
53
|
class _LLMOptions:
|
|
51
54
|
model: ChatModels | str
|
|
@@ -60,8 +63,8 @@ class _LLMOptions:
|
|
|
60
63
|
presence_penalty: NotGivenOr[float]
|
|
61
64
|
frequency_penalty: NotGivenOr[float]
|
|
62
65
|
thinking_config: NotGivenOr[types.ThinkingConfigOrDict]
|
|
66
|
+
retrieval_config: NotGivenOr[types.RetrievalConfigOrDict]
|
|
63
67
|
automatic_function_calling_config: NotGivenOr[types.AutomaticFunctionCallingConfigOrDict]
|
|
64
|
-
gemini_tools: NotGivenOr[list[_LLMTool]]
|
|
65
68
|
http_options: NotGivenOr[types.HttpOptions]
|
|
66
69
|
seed: NotGivenOr[int]
|
|
67
70
|
safety_settings: NotGivenOr[list[types.SafetySettingOrDict]]
|
|
@@ -81,7 +84,7 @@ class LLM(llm.LLM):
|
|
|
81
84
|
def __init__(
|
|
82
85
|
self,
|
|
83
86
|
*,
|
|
84
|
-
model: ChatModels | str = "gemini-2.
|
|
87
|
+
model: ChatModels | str = "gemini-2.5-flash",
|
|
85
88
|
api_key: NotGivenOr[str] = NOT_GIVEN,
|
|
86
89
|
vertexai: NotGivenOr[bool] = NOT_GIVEN,
|
|
87
90
|
project: NotGivenOr[str] = NOT_GIVEN,
|
|
@@ -94,10 +97,10 @@ class LLM(llm.LLM):
|
|
|
94
97
|
frequency_penalty: NotGivenOr[float] = NOT_GIVEN,
|
|
95
98
|
tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
|
|
96
99
|
thinking_config: NotGivenOr[types.ThinkingConfigOrDict] = NOT_GIVEN,
|
|
100
|
+
retrieval_config: NotGivenOr[types.RetrievalConfigOrDict] = NOT_GIVEN,
|
|
97
101
|
automatic_function_calling_config: NotGivenOr[
|
|
98
102
|
types.AutomaticFunctionCallingConfigOrDict
|
|
99
103
|
] = NOT_GIVEN,
|
|
100
|
-
gemini_tools: NotGivenOr[list[_LLMTool]] = NOT_GIVEN,
|
|
101
104
|
http_options: NotGivenOr[types.HttpOptions] = NOT_GIVEN,
|
|
102
105
|
seed: NotGivenOr[int] = NOT_GIVEN,
|
|
103
106
|
safety_settings: NotGivenOr[list[types.SafetySettingOrDict]] = NOT_GIVEN,
|
|
@@ -126,8 +129,8 @@ class LLM(llm.LLM):
|
|
|
126
129
|
frequency_penalty (float, optional): Penalizes the model for repeating words. Defaults to None.
|
|
127
130
|
tool_choice (ToolChoice, optional): Specifies whether to use tools during response generation. Defaults to "auto".
|
|
128
131
|
thinking_config (ThinkingConfigOrDict, optional): The thinking configuration for response generation. Defaults to None.
|
|
132
|
+
retrieval_config (RetrievalConfigOrDict, optional): The retrieval configuration for response generation. Defaults to None.
|
|
129
133
|
automatic_function_calling_config (AutomaticFunctionCallingConfigOrDict, optional): The automatic function calling configuration for response generation. Defaults to None.
|
|
130
|
-
gemini_tools (list[LLMTool], optional): The Gemini-specific tools to use for the session.
|
|
131
134
|
http_options (HttpOptions, optional): The HTTP options to use for the session.
|
|
132
135
|
seed (int, optional): Random seed for reproducible generation. Defaults to None.
|
|
133
136
|
safety_settings (list[SafetySettingOrDict], optional): Safety settings for content filtering. Defaults to None.
|
|
@@ -168,10 +171,13 @@ class LLM(llm.LLM):
|
|
|
168
171
|
# Validate thinking_config
|
|
169
172
|
if is_given(thinking_config):
|
|
170
173
|
_thinking_budget = None
|
|
174
|
+
_thinking_level = None
|
|
171
175
|
if isinstance(thinking_config, dict):
|
|
172
176
|
_thinking_budget = thinking_config.get("thinking_budget")
|
|
177
|
+
_thinking_level = thinking_config.get("thinking_level")
|
|
173
178
|
elif isinstance(thinking_config, types.ThinkingConfig):
|
|
174
179
|
_thinking_budget = thinking_config.thinking_budget
|
|
180
|
+
_thinking_level = getattr(thinking_config, "thinking_level", None)
|
|
175
181
|
|
|
176
182
|
if _thinking_budget is not None:
|
|
177
183
|
if not isinstance(_thinking_budget, int):
|
|
@@ -190,8 +196,8 @@ class LLM(llm.LLM):
|
|
|
190
196
|
presence_penalty=presence_penalty,
|
|
191
197
|
frequency_penalty=frequency_penalty,
|
|
192
198
|
thinking_config=thinking_config,
|
|
199
|
+
retrieval_config=retrieval_config,
|
|
193
200
|
automatic_function_calling_config=automatic_function_calling_config,
|
|
194
|
-
gemini_tools=gemini_tools,
|
|
195
201
|
http_options=http_options,
|
|
196
202
|
seed=seed,
|
|
197
203
|
safety_settings=safety_settings,
|
|
@@ -202,6 +208,8 @@ class LLM(llm.LLM):
|
|
|
202
208
|
project=gcp_project,
|
|
203
209
|
location=gcp_location,
|
|
204
210
|
)
|
|
211
|
+
# Store thought_signatures for Gemini 3 multi-turn function calling
|
|
212
|
+
self._thought_signatures: dict[str, bytes] = {}
|
|
205
213
|
|
|
206
214
|
@property
|
|
207
215
|
def model(self) -> str:
|
|
@@ -218,7 +226,7 @@ class LLM(llm.LLM):
|
|
|
218
226
|
self,
|
|
219
227
|
*,
|
|
220
228
|
chat_ctx: llm.ChatContext,
|
|
221
|
-
tools: list[
|
|
229
|
+
tools: list[llm.Tool] | None = None,
|
|
222
230
|
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
|
|
223
231
|
parallel_tool_calls: NotGivenOr[bool] = NOT_GIVEN,
|
|
224
232
|
tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
|
|
@@ -226,7 +234,6 @@ class LLM(llm.LLM):
|
|
|
226
234
|
types.SchemaUnion | type[llm_utils.ResponseFormatT]
|
|
227
235
|
] = NOT_GIVEN,
|
|
228
236
|
extra_kwargs: NotGivenOr[dict[str, Any]] = NOT_GIVEN,
|
|
229
|
-
gemini_tools: NotGivenOr[list[_LLMTool]] = NOT_GIVEN,
|
|
230
237
|
) -> LLMStream:
|
|
231
238
|
extra = {}
|
|
232
239
|
|
|
@@ -236,6 +243,12 @@ class LLM(llm.LLM):
|
|
|
236
243
|
tool_choice = (
|
|
237
244
|
cast(ToolChoice, tool_choice) if is_given(tool_choice) else self._opts.tool_choice
|
|
238
245
|
)
|
|
246
|
+
retrieval_config = (
|
|
247
|
+
self._opts.retrieval_config if is_given(self._opts.retrieval_config) else None
|
|
248
|
+
)
|
|
249
|
+
if isinstance(retrieval_config, dict):
|
|
250
|
+
retrieval_config = types.RetrievalConfig.model_validate(retrieval_config)
|
|
251
|
+
|
|
239
252
|
if is_given(tool_choice):
|
|
240
253
|
gemini_tool_choice: types.ToolConfig
|
|
241
254
|
if isinstance(tool_choice, dict) and tool_choice.get("type") == "function":
|
|
@@ -243,38 +256,44 @@ class LLM(llm.LLM):
|
|
|
243
256
|
function_calling_config=types.FunctionCallingConfig(
|
|
244
257
|
mode=types.FunctionCallingConfigMode.ANY,
|
|
245
258
|
allowed_function_names=[tool_choice["function"]["name"]],
|
|
246
|
-
)
|
|
259
|
+
),
|
|
260
|
+
retrieval_config=retrieval_config,
|
|
247
261
|
)
|
|
248
262
|
extra["tool_config"] = gemini_tool_choice
|
|
249
263
|
elif tool_choice == "required":
|
|
250
264
|
tool_names = []
|
|
251
265
|
for tool in tools or []:
|
|
252
|
-
if
|
|
253
|
-
tool_names.append(
|
|
254
|
-
elif is_raw_function_tool(tool):
|
|
255
|
-
tool_names.append(get_raw_function_info(tool).name)
|
|
266
|
+
if isinstance(tool, (llm.FunctionTool, llm.RawFunctionTool)):
|
|
267
|
+
tool_names.append(tool.info.name)
|
|
256
268
|
|
|
257
269
|
gemini_tool_choice = types.ToolConfig(
|
|
258
270
|
function_calling_config=types.FunctionCallingConfig(
|
|
259
271
|
mode=types.FunctionCallingConfigMode.ANY,
|
|
260
272
|
allowed_function_names=tool_names or None,
|
|
261
|
-
)
|
|
273
|
+
),
|
|
274
|
+
retrieval_config=retrieval_config,
|
|
262
275
|
)
|
|
263
276
|
extra["tool_config"] = gemini_tool_choice
|
|
264
277
|
elif tool_choice == "auto":
|
|
265
278
|
gemini_tool_choice = types.ToolConfig(
|
|
266
279
|
function_calling_config=types.FunctionCallingConfig(
|
|
267
280
|
mode=types.FunctionCallingConfigMode.AUTO,
|
|
268
|
-
)
|
|
281
|
+
),
|
|
282
|
+
retrieval_config=retrieval_config,
|
|
269
283
|
)
|
|
270
284
|
extra["tool_config"] = gemini_tool_choice
|
|
271
285
|
elif tool_choice == "none":
|
|
272
286
|
gemini_tool_choice = types.ToolConfig(
|
|
273
287
|
function_calling_config=types.FunctionCallingConfig(
|
|
274
288
|
mode=types.FunctionCallingConfigMode.NONE,
|
|
275
|
-
)
|
|
289
|
+
),
|
|
290
|
+
retrieval_config=retrieval_config,
|
|
276
291
|
)
|
|
277
292
|
extra["tool_config"] = gemini_tool_choice
|
|
293
|
+
elif retrieval_config:
|
|
294
|
+
extra["tool_config"] = types.ToolConfig(
|
|
295
|
+
retrieval_config=retrieval_config,
|
|
296
|
+
)
|
|
278
297
|
|
|
279
298
|
if is_given(response_format):
|
|
280
299
|
extra["response_schema"] = to_response_format(response_format) # type: ignore
|
|
@@ -295,9 +314,51 @@ class LLM(llm.LLM):
|
|
|
295
314
|
if is_given(self._opts.seed):
|
|
296
315
|
extra["seed"] = self._opts.seed
|
|
297
316
|
|
|
298
|
-
#
|
|
317
|
+
# Handle thinking_config based on model version
|
|
299
318
|
if is_given(self._opts.thinking_config):
|
|
300
|
-
|
|
319
|
+
is_gemini_3 = _is_gemini_3_model(self._opts.model)
|
|
320
|
+
is_gemini_3_flash = _is_gemini_3_flash_model(self._opts.model)
|
|
321
|
+
thinking_cfg = self._opts.thinking_config
|
|
322
|
+
|
|
323
|
+
# Extract both parameters
|
|
324
|
+
_budget = None
|
|
325
|
+
_level = None
|
|
326
|
+
if isinstance(thinking_cfg, dict):
|
|
327
|
+
_budget = thinking_cfg.get("thinking_budget")
|
|
328
|
+
_level = thinking_cfg.get("thinking_level")
|
|
329
|
+
elif isinstance(thinking_cfg, types.ThinkingConfig):
|
|
330
|
+
_budget = thinking_cfg.thinking_budget
|
|
331
|
+
_level = getattr(thinking_cfg, "thinking_level", None)
|
|
332
|
+
|
|
333
|
+
if is_gemini_3:
|
|
334
|
+
# Gemini 3: only support thinking_level
|
|
335
|
+
if _budget is not None and _level is None:
|
|
336
|
+
logger.warning(
|
|
337
|
+
f"Model {self._opts.model} is Gemini 3 which does not support thinking_budget. "
|
|
338
|
+
"Please use thinking_level ('low' or 'high') instead. Ignoring thinking_budget."
|
|
339
|
+
)
|
|
340
|
+
if _level is None:
|
|
341
|
+
# If no thinking_level is provided, use the fastest thinking level
|
|
342
|
+
if is_gemini_3_flash:
|
|
343
|
+
_level = "minimal"
|
|
344
|
+
else:
|
|
345
|
+
_level = "low"
|
|
346
|
+
# Use thinking_level only (pass as dict since SDK may not have this field yet)
|
|
347
|
+
extra["thinking_config"] = {"thinking_level": _level}
|
|
348
|
+
|
|
349
|
+
else:
|
|
350
|
+
# Gemini 2.5 and earlier: only support thinking_budget
|
|
351
|
+
if _level is not None and _budget is None:
|
|
352
|
+
raise ValueError(
|
|
353
|
+
f"Model {self._opts.model} does not support thinking_level. "
|
|
354
|
+
"Please use thinking_budget (int) instead for Gemini 2.5 and earlier models."
|
|
355
|
+
)
|
|
356
|
+
if _budget is not None:
|
|
357
|
+
# Use thinking_budget only
|
|
358
|
+
extra["thinking_config"] = types.ThinkingConfig(thinking_budget=_budget)
|
|
359
|
+
else:
|
|
360
|
+
# Pass through original config if no specific handling needed
|
|
361
|
+
extra["thinking_config"] = self._opts.thinking_config
|
|
301
362
|
|
|
302
363
|
if is_given(self._opts.automatic_function_calling_config):
|
|
303
364
|
extra["automatic_function_calling"] = self._opts.automatic_function_calling_config
|
|
@@ -305,8 +366,6 @@ class LLM(llm.LLM):
|
|
|
305
366
|
if is_given(self._opts.safety_settings):
|
|
306
367
|
extra["safety_settings"] = self._opts.safety_settings
|
|
307
368
|
|
|
308
|
-
gemini_tools = gemini_tools if is_given(gemini_tools) else self._opts.gemini_tools
|
|
309
|
-
|
|
310
369
|
return LLMStream(
|
|
311
370
|
self,
|
|
312
371
|
client=self._client,
|
|
@@ -314,7 +373,6 @@ class LLM(llm.LLM):
|
|
|
314
373
|
chat_ctx=chat_ctx,
|
|
315
374
|
tools=tools or [],
|
|
316
375
|
conn_options=conn_options,
|
|
317
|
-
gemini_tools=gemini_tools,
|
|
318
376
|
extra_kwargs=extra,
|
|
319
377
|
)
|
|
320
378
|
|
|
@@ -322,35 +380,38 @@ class LLM(llm.LLM):
|
|
|
322
380
|
class LLMStream(llm.LLMStream):
|
|
323
381
|
def __init__(
|
|
324
382
|
self,
|
|
325
|
-
|
|
383
|
+
llm_v: LLM,
|
|
326
384
|
*,
|
|
327
385
|
client: Client,
|
|
328
386
|
model: str | ChatModels,
|
|
329
387
|
chat_ctx: llm.ChatContext,
|
|
330
388
|
conn_options: APIConnectOptions,
|
|
331
|
-
tools: list[
|
|
389
|
+
tools: list[llm.Tool],
|
|
332
390
|
extra_kwargs: dict[str, Any],
|
|
333
|
-
gemini_tools: NotGivenOr[list[_LLMTool]] = NOT_GIVEN,
|
|
334
391
|
) -> None:
|
|
335
|
-
super().__init__(
|
|
392
|
+
super().__init__(llm_v, chat_ctx=chat_ctx, tools=tools, conn_options=conn_options)
|
|
336
393
|
self._client = client
|
|
337
394
|
self._model = model
|
|
338
|
-
self._llm: LLM =
|
|
395
|
+
self._llm: LLM = llm_v
|
|
339
396
|
self._extra_kwargs = extra_kwargs
|
|
340
|
-
self.
|
|
397
|
+
self._tool_ctx = llm.ToolContext(tools)
|
|
341
398
|
|
|
342
399
|
async def _run(self) -> None:
|
|
343
400
|
retryable = True
|
|
344
401
|
request_id = utils.shortuuid()
|
|
345
402
|
|
|
346
403
|
try:
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
tools_config = create_tools_config(
|
|
351
|
-
function_tools=function_declarations,
|
|
352
|
-
gemini_tools=self._gemini_tools if is_given(self._gemini_tools) else None,
|
|
404
|
+
# Pass thought_signatures for Gemini 3 multi-turn function calling
|
|
405
|
+
thought_sigs = (
|
|
406
|
+
self._llm._thought_signatures if _is_gemini_3_model(self._model) else None
|
|
353
407
|
)
|
|
408
|
+
turns_dict, extra_data = self._chat_ctx.to_provider_format(
|
|
409
|
+
format="google", thought_signatures=thought_sigs
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
turns = [types.Content.model_validate(turn) for turn in turns_dict]
|
|
413
|
+
tool_context = llm.ToolContext(self._tools)
|
|
414
|
+
tools_config = create_tools_config(tool_context, _only_single_type=True)
|
|
354
415
|
if tools_config:
|
|
355
416
|
self._extra_kwargs["tools"] = tools_config
|
|
356
417
|
http_options = self._llm._opts.http_options or types.HttpOptions(
|
|
@@ -368,31 +429,25 @@ class LLMStream(llm.LLMStream):
|
|
|
368
429
|
http_options=http_options,
|
|
369
430
|
**self._extra_kwargs,
|
|
370
431
|
)
|
|
432
|
+
|
|
371
433
|
stream = await self._client.aio.models.generate_content_stream(
|
|
372
434
|
model=self._model,
|
|
373
435
|
contents=cast(types.ContentListUnion, turns),
|
|
374
436
|
config=config,
|
|
375
437
|
)
|
|
376
438
|
|
|
439
|
+
response_generated = False
|
|
440
|
+
finish_reason: types.FinishReason | None = None
|
|
377
441
|
async for response in stream:
|
|
378
442
|
if response.prompt_feedback:
|
|
379
443
|
raise APIStatusError(
|
|
380
|
-
response.prompt_feedback.
|
|
444
|
+
response.prompt_feedback.model_dump_json(),
|
|
381
445
|
retryable=False,
|
|
382
446
|
request_id=request_id,
|
|
383
447
|
)
|
|
384
448
|
|
|
385
|
-
if
|
|
386
|
-
|
|
387
|
-
or not response.candidates[0].content
|
|
388
|
-
or not response.candidates[0].content.parts
|
|
389
|
-
):
|
|
390
|
-
logger.warning(f"no content in the response: {response}")
|
|
391
|
-
raise APIStatusError(
|
|
392
|
-
"no content in the response",
|
|
393
|
-
retryable=True,
|
|
394
|
-
request_id=request_id,
|
|
395
|
-
)
|
|
449
|
+
if not response.candidates:
|
|
450
|
+
continue
|
|
396
451
|
|
|
397
452
|
if len(response.candidates) > 1:
|
|
398
453
|
logger.warning(
|
|
@@ -401,35 +456,25 @@ class LLMStream(llm.LLMStream):
|
|
|
401
456
|
|
|
402
457
|
candidate = response.candidates[0]
|
|
403
458
|
|
|
404
|
-
if candidate.finish_reason in BLOCKED_REASONS:
|
|
405
|
-
raise APIStatusError(
|
|
406
|
-
f"generation blocked by gemini: {candidate.finish_reason}",
|
|
407
|
-
retryable=False,
|
|
408
|
-
request_id=request_id,
|
|
409
|
-
)
|
|
410
|
-
|
|
411
459
|
if not candidate.content or not candidate.content.parts:
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
460
|
+
continue
|
|
461
|
+
|
|
462
|
+
if candidate.finish_reason is not None:
|
|
463
|
+
finish_reason = candidate.finish_reason
|
|
464
|
+
if candidate.finish_reason in BLOCKED_REASONS:
|
|
465
|
+
raise APIStatusError(
|
|
466
|
+
f"generation blocked by gemini: {candidate.finish_reason}",
|
|
467
|
+
retryable=False,
|
|
468
|
+
request_id=request_id,
|
|
469
|
+
)
|
|
417
470
|
|
|
418
|
-
chunks_yielded = False
|
|
419
471
|
for part in candidate.content.parts:
|
|
420
472
|
chat_chunk = self._parse_part(request_id, part)
|
|
473
|
+
response_generated = True
|
|
421
474
|
if chat_chunk is not None:
|
|
422
|
-
chunks_yielded = True
|
|
423
475
|
retryable = False
|
|
424
476
|
self._event_ch.send_nowait(chat_chunk)
|
|
425
477
|
|
|
426
|
-
if candidate.finish_reason == types.FinishReason.STOP and not chunks_yielded:
|
|
427
|
-
raise APIStatusError(
|
|
428
|
-
"no response generated",
|
|
429
|
-
retryable=retryable,
|
|
430
|
-
request_id=request_id,
|
|
431
|
-
)
|
|
432
|
-
|
|
433
478
|
if response.usage_metadata is not None:
|
|
434
479
|
usage = response.usage_metadata
|
|
435
480
|
self._event_ch.send_nowait(
|
|
@@ -444,6 +489,14 @@ class LLMStream(llm.LLMStream):
|
|
|
444
489
|
)
|
|
445
490
|
)
|
|
446
491
|
|
|
492
|
+
if not response_generated:
|
|
493
|
+
raise APIStatusError(
|
|
494
|
+
"no response generated",
|
|
495
|
+
retryable=retryable,
|
|
496
|
+
request_id=request_id,
|
|
497
|
+
body=f"finish reason: {finish_reason}",
|
|
498
|
+
)
|
|
499
|
+
|
|
447
500
|
except ClientError as e:
|
|
448
501
|
raise APIStatusError(
|
|
449
502
|
"gemini llm: client error",
|
|
@@ -476,17 +529,25 @@ class LLMStream(llm.LLMStream):
|
|
|
476
529
|
|
|
477
530
|
def _parse_part(self, id: str, part: types.Part) -> llm.ChatChunk | None:
|
|
478
531
|
if part.function_call:
|
|
532
|
+
tool_call = llm.FunctionToolCall(
|
|
533
|
+
arguments=json.dumps(part.function_call.args),
|
|
534
|
+
name=part.function_call.name,
|
|
535
|
+
call_id=part.function_call.id or utils.shortuuid("function_call_"),
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
# Store thought_signature for Gemini 3 multi-turn function calling
|
|
539
|
+
if (
|
|
540
|
+
_is_gemini_3_model(self._model)
|
|
541
|
+
and hasattr(part, "thought_signature")
|
|
542
|
+
and part.thought_signature
|
|
543
|
+
):
|
|
544
|
+
self._llm._thought_signatures[tool_call.call_id] = part.thought_signature
|
|
545
|
+
|
|
479
546
|
chat_chunk = llm.ChatChunk(
|
|
480
547
|
id=id,
|
|
481
548
|
delta=llm.ChoiceDelta(
|
|
482
549
|
role="assistant",
|
|
483
|
-
tool_calls=[
|
|
484
|
-
llm.FunctionToolCall(
|
|
485
|
-
arguments=json.dumps(part.function_call.args),
|
|
486
|
-
name=part.function_call.name,
|
|
487
|
-
call_id=part.function_call.id or utils.shortuuid("function_call_"),
|
|
488
|
-
)
|
|
489
|
-
],
|
|
550
|
+
tool_calls=[tool_call],
|
|
490
551
|
content=part.text,
|
|
491
552
|
),
|
|
492
553
|
)
|
livekit/plugins/google/models.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from typing import Literal
|
|
2
2
|
|
|
3
|
-
# Speech to Text v2
|
|
3
|
+
# Speech to Text (v1 and v2)
|
|
4
4
|
|
|
5
5
|
SpeechModels = Literal[
|
|
6
6
|
"long",
|
|
@@ -14,6 +14,13 @@ SpeechModels = Literal[
|
|
|
14
14
|
"latest_long",
|
|
15
15
|
"latest_short",
|
|
16
16
|
]
|
|
17
|
+
# https://docs.cloud.google.com/speech-to-text/docs/transcription-model
|
|
18
|
+
|
|
19
|
+
SpeechModelsV2 = Literal[
|
|
20
|
+
"telephony",
|
|
21
|
+
"chirp_2",
|
|
22
|
+
"chirp_3",
|
|
23
|
+
]
|
|
17
24
|
|
|
18
25
|
SpeechLanguages = Literal[
|
|
19
26
|
"af-ZA",
|
|
@@ -189,6 +196,9 @@ SpeechLanguages = Literal[
|
|
|
189
196
|
Gender = Literal["male", "female", "neutral"]
|
|
190
197
|
|
|
191
198
|
ChatModels = Literal[
|
|
199
|
+
"gemini-3-pro-preview",
|
|
200
|
+
"gemini-3-flash-preview",
|
|
201
|
+
"gemini-2.5-flash",
|
|
192
202
|
"gemini-2.5-pro-preview-05-06",
|
|
193
203
|
"gemini-2.5-flash-preview-04-17",
|
|
194
204
|
"gemini-2.5-flash-preview-05-20",
|
|
@@ -197,3 +207,7 @@ ChatModels = Literal[
|
|
|
197
207
|
"gemini-2.0-pro-exp-02-05",
|
|
198
208
|
"gemini-1.5-pro",
|
|
199
209
|
]
|
|
210
|
+
|
|
211
|
+
GeminiTTSModels = Literal[
|
|
212
|
+
"gemini-2.5-flash-tts", "gemini-2.5-flash-lite-preview-tts", "gemini-2.5-pro-tts"
|
|
213
|
+
]
|
|
@@ -5,19 +5,21 @@ from typing import Literal, Union
|
|
|
5
5
|
|
|
6
6
|
from google.genai import types
|
|
7
7
|
|
|
8
|
+
# Gemini API deprecations: https://ai.google.dev/gemini-api/docs/deprecations
|
|
9
|
+
# Gemini API release notes with preview deprecations: https://ai.google.dev/gemini-api/docs/changelog
|
|
10
|
+
# live models: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/live-api
|
|
11
|
+
# VertexAI retirement: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/learn/model-versions#retired-models
|
|
12
|
+
# Additional references:
|
|
13
|
+
# 1. https://github.com/kazunori279/adk-streaming-test/blob/main/test_report.md
|
|
8
14
|
LiveAPIModels = Literal[
|
|
9
15
|
# VertexAI models
|
|
10
|
-
"gemini-live-2.5-flash-native-audio",
|
|
11
|
-
"gemini-live-2.5-flash-preview-native-audio",
|
|
12
|
-
#
|
|
13
|
-
"gemini-2.0-flash-exp",
|
|
14
|
-
"gemini-live-2.5-flash-preview-native-audio-09-2025",
|
|
16
|
+
"gemini-live-2.5-flash-native-audio", # GA https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash-live-api#live-2.5-flash
|
|
17
|
+
"gemini-live-2.5-flash-preview-native-audio-09-2025", # Public preview https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash-live-api#live-2.5-flash-preview
|
|
18
|
+
"gemini-live-2.5-flash-preview-native-audio", # still works, possibly an alias, but not mentioned in any docs or changelog
|
|
15
19
|
# Gemini API models
|
|
16
|
-
"gemini-2.5-flash-native-audio-preview-12-2025",
|
|
17
|
-
"gemini-
|
|
18
|
-
#
|
|
19
|
-
"gemini-2.0-flash-live-001",
|
|
20
|
-
"gemini-2.5-flash-native-audio-preview-09-2025",
|
|
20
|
+
"gemini-2.5-flash-native-audio-preview-12-2025", # https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-live
|
|
21
|
+
"gemini-2.5-flash-native-audio-preview-09-2025", # https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-live
|
|
22
|
+
"gemini-2.0-flash-exp", # still works in Gemini API but not VertexAI
|
|
21
23
|
]
|
|
22
24
|
|
|
23
25
|
Voice = Literal[
|