livekit-plugins-google 0.3.0__py3-none-any.whl → 1.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/google/__init__.py +33 -7
- livekit/plugins/google/beta/__init__.py +13 -0
- livekit/plugins/google/beta/gemini_tts.py +258 -0
- livekit/plugins/google/llm.py +562 -0
- livekit/plugins/google/log.py +3 -0
- livekit/plugins/google/models.py +160 -32
- livekit/plugins/google/realtime/__init__.py +9 -0
- livekit/plugins/google/realtime/api_proto.py +68 -0
- livekit/plugins/google/realtime/realtime_api.py +1249 -0
- livekit/plugins/google/stt.py +717 -283
- livekit/plugins/google/tools.py +71 -0
- livekit/plugins/google/tts.py +455 -0
- livekit/plugins/google/utils.py +220 -0
- livekit/plugins/google/version.py +1 -1
- livekit_plugins_google-1.3.11.dist-info/METADATA +63 -0
- livekit_plugins_google-1.3.11.dist-info/RECORD +18 -0
- {livekit_plugins_google-0.3.0.dist-info → livekit_plugins_google-1.3.11.dist-info}/WHEEL +1 -2
- livekit_plugins_google-0.3.0.dist-info/METADATA +0 -47
- livekit_plugins_google-0.3.0.dist-info/RECORD +0 -9
- livekit_plugins_google-0.3.0.dist-info/top_level.txt +0 -1
|
@@ -0,0 +1,562 @@
|
|
|
1
|
+
# Copyright 2023 LiveKit, Inc.
|
|
2
|
+
#
|
|
3
|
+
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import json
|
|
19
|
+
import os
|
|
20
|
+
from dataclasses import dataclass
|
|
21
|
+
from typing import Any, cast
|
|
22
|
+
|
|
23
|
+
from google.auth._default_async import default_async
|
|
24
|
+
from google.genai import Client, types
|
|
25
|
+
from google.genai.errors import APIError, ClientError, ServerError
|
|
26
|
+
from livekit.agents import APIConnectionError, APIStatusError, llm, utils
|
|
27
|
+
from livekit.agents.llm import ToolChoice, utils as llm_utils
|
|
28
|
+
from livekit.agents.types import (
|
|
29
|
+
DEFAULT_API_CONNECT_OPTIONS,
|
|
30
|
+
NOT_GIVEN,
|
|
31
|
+
APIConnectOptions,
|
|
32
|
+
NotGivenOr,
|
|
33
|
+
)
|
|
34
|
+
from livekit.agents.utils import is_given
|
|
35
|
+
|
|
36
|
+
from .log import logger
|
|
37
|
+
from .models import ChatModels
|
|
38
|
+
from .utils import create_tools_config, to_response_format
|
|
39
|
+
from .version import __version__
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _is_gemini_3_model(model: str) -> bool:
|
|
43
|
+
"""Check if model is Gemini 3 series"""
|
|
44
|
+
return "gemini-3" in model.lower() or model.lower().startswith("gemini-3")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _is_gemini_3_flash_model(model: str) -> bool:
|
|
48
|
+
"""Check if model is Gemini 3 Flash"""
|
|
49
|
+
return "gemini-3-flash" in model.lower() or model.lower().startswith("gemini-3-flash")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class _LLMOptions:
|
|
54
|
+
model: ChatModels | str
|
|
55
|
+
temperature: NotGivenOr[float]
|
|
56
|
+
tool_choice: NotGivenOr[ToolChoice]
|
|
57
|
+
vertexai: NotGivenOr[bool]
|
|
58
|
+
project: NotGivenOr[str]
|
|
59
|
+
location: NotGivenOr[str]
|
|
60
|
+
max_output_tokens: NotGivenOr[int]
|
|
61
|
+
top_p: NotGivenOr[float]
|
|
62
|
+
top_k: NotGivenOr[float]
|
|
63
|
+
presence_penalty: NotGivenOr[float]
|
|
64
|
+
frequency_penalty: NotGivenOr[float]
|
|
65
|
+
thinking_config: NotGivenOr[types.ThinkingConfigOrDict]
|
|
66
|
+
retrieval_config: NotGivenOr[types.RetrievalConfigOrDict]
|
|
67
|
+
automatic_function_calling_config: NotGivenOr[types.AutomaticFunctionCallingConfigOrDict]
|
|
68
|
+
http_options: NotGivenOr[types.HttpOptions]
|
|
69
|
+
seed: NotGivenOr[int]
|
|
70
|
+
safety_settings: NotGivenOr[list[types.SafetySettingOrDict]]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
BLOCKED_REASONS = [
|
|
74
|
+
types.FinishReason.SAFETY,
|
|
75
|
+
types.FinishReason.SPII,
|
|
76
|
+
types.FinishReason.PROHIBITED_CONTENT,
|
|
77
|
+
types.FinishReason.BLOCKLIST,
|
|
78
|
+
types.FinishReason.LANGUAGE,
|
|
79
|
+
types.FinishReason.RECITATION,
|
|
80
|
+
]
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class LLM(llm.LLM):
|
|
84
|
+
def __init__(
|
|
85
|
+
self,
|
|
86
|
+
*,
|
|
87
|
+
model: ChatModels | str = "gemini-2.5-flash",
|
|
88
|
+
api_key: NotGivenOr[str] = NOT_GIVEN,
|
|
89
|
+
vertexai: NotGivenOr[bool] = NOT_GIVEN,
|
|
90
|
+
project: NotGivenOr[str] = NOT_GIVEN,
|
|
91
|
+
location: NotGivenOr[str] = NOT_GIVEN,
|
|
92
|
+
temperature: NotGivenOr[float] = NOT_GIVEN,
|
|
93
|
+
max_output_tokens: NotGivenOr[int] = NOT_GIVEN,
|
|
94
|
+
top_p: NotGivenOr[float] = NOT_GIVEN,
|
|
95
|
+
top_k: NotGivenOr[float] = NOT_GIVEN,
|
|
96
|
+
presence_penalty: NotGivenOr[float] = NOT_GIVEN,
|
|
97
|
+
frequency_penalty: NotGivenOr[float] = NOT_GIVEN,
|
|
98
|
+
tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
|
|
99
|
+
thinking_config: NotGivenOr[types.ThinkingConfigOrDict] = NOT_GIVEN,
|
|
100
|
+
retrieval_config: NotGivenOr[types.RetrievalConfigOrDict] = NOT_GIVEN,
|
|
101
|
+
automatic_function_calling_config: NotGivenOr[
|
|
102
|
+
types.AutomaticFunctionCallingConfigOrDict
|
|
103
|
+
] = NOT_GIVEN,
|
|
104
|
+
http_options: NotGivenOr[types.HttpOptions] = NOT_GIVEN,
|
|
105
|
+
seed: NotGivenOr[int] = NOT_GIVEN,
|
|
106
|
+
safety_settings: NotGivenOr[list[types.SafetySettingOrDict]] = NOT_GIVEN,
|
|
107
|
+
) -> None:
|
|
108
|
+
"""
|
|
109
|
+
Create a new instance of Google GenAI LLM.
|
|
110
|
+
|
|
111
|
+
Environment Requirements:
|
|
112
|
+
- For VertexAI: Set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to the path of the service account key file or use any of the other Google Cloud auth methods.
|
|
113
|
+
The Google Cloud project and location can be set via `project` and `location` arguments or the environment variables
|
|
114
|
+
`GOOGLE_CLOUD_PROJECT` and `GOOGLE_CLOUD_LOCATION`. By default, the project is inferred from the service account key file,
|
|
115
|
+
and the location defaults to "us-central1".
|
|
116
|
+
- For Google Gemini API: Set the `api_key` argument or the `GOOGLE_API_KEY` environment variable.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
model (ChatModels | str, optional): The model name to use. Defaults to "gemini-2.0-flash-001".
|
|
120
|
+
api_key (str, optional): The API key for Google Gemini. If not provided, it attempts to read from the `GOOGLE_API_KEY` environment variable.
|
|
121
|
+
vertexai (bool, optional): Whether to use VertexAI. If not provided, it attempts to read from the `GOOGLE_GENAI_USE_VERTEXAI` environment variable. Defaults to False.
|
|
122
|
+
project (str, optional): The Google Cloud project to use (only for VertexAI). Defaults to None.
|
|
123
|
+
location (str, optional): The location to use for VertexAI API requests. Defaults value is "us-central1".
|
|
124
|
+
temperature (float, optional): Sampling temperature for response generation. Defaults to 0.8.
|
|
125
|
+
max_output_tokens (int, optional): Maximum number of tokens to generate in the output. Defaults to None.
|
|
126
|
+
top_p (float, optional): The nucleus sampling probability for response generation. Defaults to None.
|
|
127
|
+
top_k (int, optional): The top-k sampling value for response generation. Defaults to None.
|
|
128
|
+
presence_penalty (float, optional): Penalizes the model for generating previously mentioned concepts. Defaults to None.
|
|
129
|
+
frequency_penalty (float, optional): Penalizes the model for repeating words. Defaults to None.
|
|
130
|
+
tool_choice (ToolChoice, optional): Specifies whether to use tools during response generation. Defaults to "auto".
|
|
131
|
+
thinking_config (ThinkingConfigOrDict, optional): The thinking configuration for response generation. Defaults to None.
|
|
132
|
+
retrieval_config (RetrievalConfigOrDict, optional): The retrieval configuration for response generation. Defaults to None.
|
|
133
|
+
automatic_function_calling_config (AutomaticFunctionCallingConfigOrDict, optional): The automatic function calling configuration for response generation. Defaults to None.
|
|
134
|
+
http_options (HttpOptions, optional): The HTTP options to use for the session.
|
|
135
|
+
seed (int, optional): Random seed for reproducible generation. Defaults to None.
|
|
136
|
+
safety_settings (list[SafetySettingOrDict], optional): Safety settings for content filtering. Defaults to None.
|
|
137
|
+
""" # noqa: E501
|
|
138
|
+
super().__init__()
|
|
139
|
+
gcp_project = project if is_given(project) else os.environ.get("GOOGLE_CLOUD_PROJECT")
|
|
140
|
+
gcp_location: str | None = (
|
|
141
|
+
location
|
|
142
|
+
if is_given(location)
|
|
143
|
+
else os.environ.get("GOOGLE_CLOUD_LOCATION") or "us-central1"
|
|
144
|
+
)
|
|
145
|
+
use_vertexai = (
|
|
146
|
+
vertexai
|
|
147
|
+
if is_given(vertexai)
|
|
148
|
+
else os.environ.get("GOOGLE_GENAI_USE_VERTEXAI", "0").lower() in ["true", "1"]
|
|
149
|
+
)
|
|
150
|
+
gemini_api_key = api_key if is_given(api_key) else os.environ.get("GOOGLE_API_KEY")
|
|
151
|
+
|
|
152
|
+
if use_vertexai:
|
|
153
|
+
if not gcp_project:
|
|
154
|
+
_, gcp_project = default_async( # type: ignore
|
|
155
|
+
scopes=["https://www.googleapis.com/auth/cloud-platform"]
|
|
156
|
+
)
|
|
157
|
+
if not gcp_project or not gcp_location:
|
|
158
|
+
raise ValueError(
|
|
159
|
+
"Project is required for VertexAI via project kwarg or GOOGLE_CLOUD_PROJECT environment variable" # noqa: E501
|
|
160
|
+
)
|
|
161
|
+
gemini_api_key = None # VertexAI does not require an API key
|
|
162
|
+
|
|
163
|
+
else:
|
|
164
|
+
gcp_project = None
|
|
165
|
+
gcp_location = None
|
|
166
|
+
if not gemini_api_key:
|
|
167
|
+
raise ValueError(
|
|
168
|
+
"API key is required for Google API either via api_key or GOOGLE_API_KEY environment variable" # noqa: E501
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
# Validate thinking_config
|
|
172
|
+
if is_given(thinking_config):
|
|
173
|
+
_thinking_budget = None
|
|
174
|
+
_thinking_level = None
|
|
175
|
+
if isinstance(thinking_config, dict):
|
|
176
|
+
_thinking_budget = thinking_config.get("thinking_budget")
|
|
177
|
+
_thinking_level = thinking_config.get("thinking_level")
|
|
178
|
+
elif isinstance(thinking_config, types.ThinkingConfig):
|
|
179
|
+
_thinking_budget = thinking_config.thinking_budget
|
|
180
|
+
_thinking_level = getattr(thinking_config, "thinking_level", None)
|
|
181
|
+
|
|
182
|
+
if _thinking_budget is not None:
|
|
183
|
+
if not isinstance(_thinking_budget, int):
|
|
184
|
+
raise ValueError("thinking_budget inside thinking_config must be an integer")
|
|
185
|
+
|
|
186
|
+
self._opts = _LLMOptions(
|
|
187
|
+
model=model,
|
|
188
|
+
temperature=temperature,
|
|
189
|
+
tool_choice=tool_choice,
|
|
190
|
+
vertexai=use_vertexai,
|
|
191
|
+
project=project,
|
|
192
|
+
location=location,
|
|
193
|
+
max_output_tokens=max_output_tokens,
|
|
194
|
+
top_p=top_p,
|
|
195
|
+
top_k=top_k,
|
|
196
|
+
presence_penalty=presence_penalty,
|
|
197
|
+
frequency_penalty=frequency_penalty,
|
|
198
|
+
thinking_config=thinking_config,
|
|
199
|
+
retrieval_config=retrieval_config,
|
|
200
|
+
automatic_function_calling_config=automatic_function_calling_config,
|
|
201
|
+
http_options=http_options,
|
|
202
|
+
seed=seed,
|
|
203
|
+
safety_settings=safety_settings,
|
|
204
|
+
)
|
|
205
|
+
self._client = Client(
|
|
206
|
+
api_key=gemini_api_key,
|
|
207
|
+
vertexai=use_vertexai,
|
|
208
|
+
project=gcp_project,
|
|
209
|
+
location=gcp_location,
|
|
210
|
+
)
|
|
211
|
+
# Store thought_signatures for Gemini 3 multi-turn function calling
|
|
212
|
+
self._thought_signatures: dict[str, bytes] = {}
|
|
213
|
+
|
|
214
|
+
@property
|
|
215
|
+
def model(self) -> str:
|
|
216
|
+
return self._opts.model
|
|
217
|
+
|
|
218
|
+
@property
|
|
219
|
+
def provider(self) -> str:
|
|
220
|
+
if self._client.vertexai:
|
|
221
|
+
return "Vertex AI"
|
|
222
|
+
else:
|
|
223
|
+
return "Gemini"
|
|
224
|
+
|
|
225
|
+
def chat(
|
|
226
|
+
self,
|
|
227
|
+
*,
|
|
228
|
+
chat_ctx: llm.ChatContext,
|
|
229
|
+
tools: list[llm.Tool] | None = None,
|
|
230
|
+
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
|
|
231
|
+
parallel_tool_calls: NotGivenOr[bool] = NOT_GIVEN,
|
|
232
|
+
tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
|
|
233
|
+
response_format: NotGivenOr[
|
|
234
|
+
types.SchemaUnion | type[llm_utils.ResponseFormatT]
|
|
235
|
+
] = NOT_GIVEN,
|
|
236
|
+
extra_kwargs: NotGivenOr[dict[str, Any]] = NOT_GIVEN,
|
|
237
|
+
) -> LLMStream:
|
|
238
|
+
extra = {}
|
|
239
|
+
|
|
240
|
+
if is_given(extra_kwargs):
|
|
241
|
+
extra.update(extra_kwargs)
|
|
242
|
+
|
|
243
|
+
tool_choice = (
|
|
244
|
+
cast(ToolChoice, tool_choice) if is_given(tool_choice) else self._opts.tool_choice
|
|
245
|
+
)
|
|
246
|
+
retrieval_config = (
|
|
247
|
+
self._opts.retrieval_config if is_given(self._opts.retrieval_config) else None
|
|
248
|
+
)
|
|
249
|
+
if isinstance(retrieval_config, dict):
|
|
250
|
+
retrieval_config = types.RetrievalConfig.model_validate(retrieval_config)
|
|
251
|
+
|
|
252
|
+
if is_given(tool_choice):
|
|
253
|
+
gemini_tool_choice: types.ToolConfig
|
|
254
|
+
if isinstance(tool_choice, dict) and tool_choice.get("type") == "function":
|
|
255
|
+
gemini_tool_choice = types.ToolConfig(
|
|
256
|
+
function_calling_config=types.FunctionCallingConfig(
|
|
257
|
+
mode=types.FunctionCallingConfigMode.ANY,
|
|
258
|
+
allowed_function_names=[tool_choice["function"]["name"]],
|
|
259
|
+
),
|
|
260
|
+
retrieval_config=retrieval_config,
|
|
261
|
+
)
|
|
262
|
+
extra["tool_config"] = gemini_tool_choice
|
|
263
|
+
elif tool_choice == "required":
|
|
264
|
+
tool_names = []
|
|
265
|
+
for tool in tools or []:
|
|
266
|
+
if isinstance(tool, (llm.FunctionTool, llm.RawFunctionTool)):
|
|
267
|
+
tool_names.append(tool.info.name)
|
|
268
|
+
|
|
269
|
+
gemini_tool_choice = types.ToolConfig(
|
|
270
|
+
function_calling_config=types.FunctionCallingConfig(
|
|
271
|
+
mode=types.FunctionCallingConfigMode.ANY,
|
|
272
|
+
allowed_function_names=tool_names or None,
|
|
273
|
+
),
|
|
274
|
+
retrieval_config=retrieval_config,
|
|
275
|
+
)
|
|
276
|
+
extra["tool_config"] = gemini_tool_choice
|
|
277
|
+
elif tool_choice == "auto":
|
|
278
|
+
gemini_tool_choice = types.ToolConfig(
|
|
279
|
+
function_calling_config=types.FunctionCallingConfig(
|
|
280
|
+
mode=types.FunctionCallingConfigMode.AUTO,
|
|
281
|
+
),
|
|
282
|
+
retrieval_config=retrieval_config,
|
|
283
|
+
)
|
|
284
|
+
extra["tool_config"] = gemini_tool_choice
|
|
285
|
+
elif tool_choice == "none":
|
|
286
|
+
gemini_tool_choice = types.ToolConfig(
|
|
287
|
+
function_calling_config=types.FunctionCallingConfig(
|
|
288
|
+
mode=types.FunctionCallingConfigMode.NONE,
|
|
289
|
+
),
|
|
290
|
+
retrieval_config=retrieval_config,
|
|
291
|
+
)
|
|
292
|
+
extra["tool_config"] = gemini_tool_choice
|
|
293
|
+
elif retrieval_config:
|
|
294
|
+
extra["tool_config"] = types.ToolConfig(
|
|
295
|
+
retrieval_config=retrieval_config,
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
if is_given(response_format):
|
|
299
|
+
extra["response_schema"] = to_response_format(response_format) # type: ignore
|
|
300
|
+
extra["response_mime_type"] = "application/json"
|
|
301
|
+
|
|
302
|
+
if is_given(self._opts.temperature):
|
|
303
|
+
extra["temperature"] = self._opts.temperature
|
|
304
|
+
if is_given(self._opts.max_output_tokens):
|
|
305
|
+
extra["max_output_tokens"] = self._opts.max_output_tokens
|
|
306
|
+
if is_given(self._opts.top_p):
|
|
307
|
+
extra["top_p"] = self._opts.top_p
|
|
308
|
+
if is_given(self._opts.top_k):
|
|
309
|
+
extra["top_k"] = self._opts.top_k
|
|
310
|
+
if is_given(self._opts.presence_penalty):
|
|
311
|
+
extra["presence_penalty"] = self._opts.presence_penalty
|
|
312
|
+
if is_given(self._opts.frequency_penalty):
|
|
313
|
+
extra["frequency_penalty"] = self._opts.frequency_penalty
|
|
314
|
+
if is_given(self._opts.seed):
|
|
315
|
+
extra["seed"] = self._opts.seed
|
|
316
|
+
|
|
317
|
+
# Handle thinking_config based on model version
|
|
318
|
+
if is_given(self._opts.thinking_config):
|
|
319
|
+
is_gemini_3 = _is_gemini_3_model(self._opts.model)
|
|
320
|
+
is_gemini_3_flash = _is_gemini_3_flash_model(self._opts.model)
|
|
321
|
+
thinking_cfg = self._opts.thinking_config
|
|
322
|
+
|
|
323
|
+
# Extract both parameters
|
|
324
|
+
_budget = None
|
|
325
|
+
_level = None
|
|
326
|
+
if isinstance(thinking_cfg, dict):
|
|
327
|
+
_budget = thinking_cfg.get("thinking_budget")
|
|
328
|
+
_level = thinking_cfg.get("thinking_level")
|
|
329
|
+
elif isinstance(thinking_cfg, types.ThinkingConfig):
|
|
330
|
+
_budget = thinking_cfg.thinking_budget
|
|
331
|
+
_level = getattr(thinking_cfg, "thinking_level", None)
|
|
332
|
+
|
|
333
|
+
if is_gemini_3:
|
|
334
|
+
# Gemini 3: only support thinking_level
|
|
335
|
+
if _budget is not None and _level is None:
|
|
336
|
+
logger.warning(
|
|
337
|
+
f"Model {self._opts.model} is Gemini 3 which does not support thinking_budget. "
|
|
338
|
+
"Please use thinking_level ('low' or 'high') instead. Ignoring thinking_budget."
|
|
339
|
+
)
|
|
340
|
+
if _level is None:
|
|
341
|
+
# If no thinking_level is provided, use the fastest thinking level
|
|
342
|
+
if is_gemini_3_flash:
|
|
343
|
+
_level = "minimal"
|
|
344
|
+
else:
|
|
345
|
+
_level = "low"
|
|
346
|
+
# Use thinking_level only (pass as dict since SDK may not have this field yet)
|
|
347
|
+
extra["thinking_config"] = {"thinking_level": _level}
|
|
348
|
+
|
|
349
|
+
else:
|
|
350
|
+
# Gemini 2.5 and earlier: only support thinking_budget
|
|
351
|
+
if _level is not None and _budget is None:
|
|
352
|
+
raise ValueError(
|
|
353
|
+
f"Model {self._opts.model} does not support thinking_level. "
|
|
354
|
+
"Please use thinking_budget (int) instead for Gemini 2.5 and earlier models."
|
|
355
|
+
)
|
|
356
|
+
if _budget is not None:
|
|
357
|
+
# Use thinking_budget only
|
|
358
|
+
extra["thinking_config"] = types.ThinkingConfig(thinking_budget=_budget)
|
|
359
|
+
else:
|
|
360
|
+
# Pass through original config if no specific handling needed
|
|
361
|
+
extra["thinking_config"] = self._opts.thinking_config
|
|
362
|
+
|
|
363
|
+
if is_given(self._opts.automatic_function_calling_config):
|
|
364
|
+
extra["automatic_function_calling"] = self._opts.automatic_function_calling_config
|
|
365
|
+
|
|
366
|
+
if is_given(self._opts.safety_settings):
|
|
367
|
+
extra["safety_settings"] = self._opts.safety_settings
|
|
368
|
+
|
|
369
|
+
return LLMStream(
|
|
370
|
+
self,
|
|
371
|
+
client=self._client,
|
|
372
|
+
model=self._opts.model,
|
|
373
|
+
chat_ctx=chat_ctx,
|
|
374
|
+
tools=tools or [],
|
|
375
|
+
conn_options=conn_options,
|
|
376
|
+
extra_kwargs=extra,
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
class LLMStream(llm.LLMStream):
|
|
381
|
+
def __init__(
|
|
382
|
+
self,
|
|
383
|
+
llm_v: LLM,
|
|
384
|
+
*,
|
|
385
|
+
client: Client,
|
|
386
|
+
model: str | ChatModels,
|
|
387
|
+
chat_ctx: llm.ChatContext,
|
|
388
|
+
conn_options: APIConnectOptions,
|
|
389
|
+
tools: list[llm.Tool],
|
|
390
|
+
extra_kwargs: dict[str, Any],
|
|
391
|
+
) -> None:
|
|
392
|
+
super().__init__(llm_v, chat_ctx=chat_ctx, tools=tools, conn_options=conn_options)
|
|
393
|
+
self._client = client
|
|
394
|
+
self._model = model
|
|
395
|
+
self._llm: LLM = llm_v
|
|
396
|
+
self._extra_kwargs = extra_kwargs
|
|
397
|
+
self._tool_ctx = llm.ToolContext(tools)
|
|
398
|
+
|
|
399
|
+
async def _run(self) -> None:
|
|
400
|
+
retryable = True
|
|
401
|
+
request_id = utils.shortuuid()
|
|
402
|
+
|
|
403
|
+
try:
|
|
404
|
+
# Pass thought_signatures for Gemini 3 multi-turn function calling
|
|
405
|
+
thought_sigs = (
|
|
406
|
+
self._llm._thought_signatures if _is_gemini_3_model(self._model) else None
|
|
407
|
+
)
|
|
408
|
+
turns_dict, extra_data = self._chat_ctx.to_provider_format(
|
|
409
|
+
format="google", thought_signatures=thought_sigs
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
turns = [types.Content.model_validate(turn) for turn in turns_dict]
|
|
413
|
+
tool_context = llm.ToolContext(self._tools)
|
|
414
|
+
tools_config = create_tools_config(tool_context, _only_single_type=True)
|
|
415
|
+
if tools_config:
|
|
416
|
+
self._extra_kwargs["tools"] = tools_config
|
|
417
|
+
http_options = self._llm._opts.http_options or types.HttpOptions(
|
|
418
|
+
timeout=int(self._conn_options.timeout * 1000)
|
|
419
|
+
)
|
|
420
|
+
if not http_options.headers:
|
|
421
|
+
http_options.headers = {}
|
|
422
|
+
http_options.headers["x-goog-api-client"] = f"livekit-agents/{__version__}"
|
|
423
|
+
config = types.GenerateContentConfig(
|
|
424
|
+
system_instruction=(
|
|
425
|
+
[types.Part(text=content) for content in extra_data.system_messages]
|
|
426
|
+
if extra_data.system_messages
|
|
427
|
+
else None
|
|
428
|
+
),
|
|
429
|
+
http_options=http_options,
|
|
430
|
+
**self._extra_kwargs,
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
stream = await self._client.aio.models.generate_content_stream(
|
|
434
|
+
model=self._model,
|
|
435
|
+
contents=cast(types.ContentListUnion, turns),
|
|
436
|
+
config=config,
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
response_generated = False
|
|
440
|
+
finish_reason: types.FinishReason | None = None
|
|
441
|
+
async for response in stream:
|
|
442
|
+
if response.prompt_feedback:
|
|
443
|
+
raise APIStatusError(
|
|
444
|
+
response.prompt_feedback.model_dump_json(),
|
|
445
|
+
retryable=False,
|
|
446
|
+
request_id=request_id,
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
if not response.candidates:
|
|
450
|
+
continue
|
|
451
|
+
|
|
452
|
+
if len(response.candidates) > 1:
|
|
453
|
+
logger.warning(
|
|
454
|
+
"gemini llm: there are multiple candidates in the response, returning response from the first one." # noqa: E501
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
candidate = response.candidates[0]
|
|
458
|
+
|
|
459
|
+
if not candidate.content or not candidate.content.parts:
|
|
460
|
+
continue
|
|
461
|
+
|
|
462
|
+
if candidate.finish_reason is not None:
|
|
463
|
+
finish_reason = candidate.finish_reason
|
|
464
|
+
if candidate.finish_reason in BLOCKED_REASONS:
|
|
465
|
+
raise APIStatusError(
|
|
466
|
+
f"generation blocked by gemini: {candidate.finish_reason}",
|
|
467
|
+
retryable=False,
|
|
468
|
+
request_id=request_id,
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
for part in candidate.content.parts:
|
|
472
|
+
chat_chunk = self._parse_part(request_id, part)
|
|
473
|
+
response_generated = True
|
|
474
|
+
if chat_chunk is not None:
|
|
475
|
+
retryable = False
|
|
476
|
+
self._event_ch.send_nowait(chat_chunk)
|
|
477
|
+
|
|
478
|
+
if response.usage_metadata is not None:
|
|
479
|
+
usage = response.usage_metadata
|
|
480
|
+
self._event_ch.send_nowait(
|
|
481
|
+
llm.ChatChunk(
|
|
482
|
+
id=request_id,
|
|
483
|
+
usage=llm.CompletionUsage(
|
|
484
|
+
completion_tokens=usage.candidates_token_count or 0,
|
|
485
|
+
prompt_tokens=usage.prompt_token_count or 0,
|
|
486
|
+
prompt_cached_tokens=usage.cached_content_token_count or 0,
|
|
487
|
+
total_tokens=usage.total_token_count or 0,
|
|
488
|
+
),
|
|
489
|
+
)
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
if not response_generated:
|
|
493
|
+
raise APIStatusError(
|
|
494
|
+
"no response generated",
|
|
495
|
+
retryable=retryable,
|
|
496
|
+
request_id=request_id,
|
|
497
|
+
body=f"finish reason: {finish_reason}",
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
except ClientError as e:
|
|
501
|
+
raise APIStatusError(
|
|
502
|
+
"gemini llm: client error",
|
|
503
|
+
status_code=e.code,
|
|
504
|
+
body=f"{e.message} {e.status}",
|
|
505
|
+
request_id=request_id,
|
|
506
|
+
retryable=False if e.code != 429 else True,
|
|
507
|
+
) from e
|
|
508
|
+
except ServerError as e:
|
|
509
|
+
raise APIStatusError(
|
|
510
|
+
"gemini llm: server error",
|
|
511
|
+
status_code=e.code,
|
|
512
|
+
body=f"{e.message} {e.status}",
|
|
513
|
+
request_id=request_id,
|
|
514
|
+
retryable=retryable,
|
|
515
|
+
) from e
|
|
516
|
+
except APIError as e:
|
|
517
|
+
raise APIStatusError(
|
|
518
|
+
"gemini llm: api error",
|
|
519
|
+
status_code=e.code,
|
|
520
|
+
body=f"{e.message} {e.status}",
|
|
521
|
+
request_id=request_id,
|
|
522
|
+
retryable=retryable,
|
|
523
|
+
) from e
|
|
524
|
+
except Exception as e:
|
|
525
|
+
raise APIConnectionError(
|
|
526
|
+
f"gemini llm: error generating content {str(e)}",
|
|
527
|
+
retryable=retryable,
|
|
528
|
+
) from e
|
|
529
|
+
|
|
530
|
+
def _parse_part(self, id: str, part: types.Part) -> llm.ChatChunk | None:
|
|
531
|
+
if part.function_call:
|
|
532
|
+
tool_call = llm.FunctionToolCall(
|
|
533
|
+
arguments=json.dumps(part.function_call.args),
|
|
534
|
+
name=part.function_call.name,
|
|
535
|
+
call_id=part.function_call.id or utils.shortuuid("function_call_"),
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
# Store thought_signature for Gemini 3 multi-turn function calling
|
|
539
|
+
if (
|
|
540
|
+
_is_gemini_3_model(self._model)
|
|
541
|
+
and hasattr(part, "thought_signature")
|
|
542
|
+
and part.thought_signature
|
|
543
|
+
):
|
|
544
|
+
self._llm._thought_signatures[tool_call.call_id] = part.thought_signature
|
|
545
|
+
|
|
546
|
+
chat_chunk = llm.ChatChunk(
|
|
547
|
+
id=id,
|
|
548
|
+
delta=llm.ChoiceDelta(
|
|
549
|
+
role="assistant",
|
|
550
|
+
tool_calls=[tool_call],
|
|
551
|
+
content=part.text,
|
|
552
|
+
),
|
|
553
|
+
)
|
|
554
|
+
return chat_chunk
|
|
555
|
+
|
|
556
|
+
if not part.text:
|
|
557
|
+
return None
|
|
558
|
+
|
|
559
|
+
return llm.ChatChunk(
|
|
560
|
+
id=id,
|
|
561
|
+
delta=llm.ChoiceDelta(content=part.text, role="assistant"),
|
|
562
|
+
)
|