google-genai 1.7.0__py3-none-any.whl → 1.53.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. google/genai/__init__.py +4 -2
  2. google/genai/_adapters.py +55 -0
  3. google/genai/_api_client.py +1301 -299
  4. google/genai/_api_module.py +1 -1
  5. google/genai/_automatic_function_calling_util.py +54 -33
  6. google/genai/_base_transformers.py +26 -0
  7. google/genai/_base_url.py +50 -0
  8. google/genai/_common.py +560 -59
  9. google/genai/_extra_utils.py +371 -38
  10. google/genai/_live_converters.py +1467 -0
  11. google/genai/_local_tokenizer_loader.py +214 -0
  12. google/genai/_mcp_utils.py +117 -0
  13. google/genai/_operations_converters.py +394 -0
  14. google/genai/_replay_api_client.py +204 -92
  15. google/genai/_test_api_client.py +1 -1
  16. google/genai/_tokens_converters.py +520 -0
  17. google/genai/_transformers.py +633 -233
  18. google/genai/batches.py +1733 -538
  19. google/genai/caches.py +678 -1012
  20. google/genai/chats.py +48 -38
  21. google/genai/client.py +142 -15
  22. google/genai/documents.py +532 -0
  23. google/genai/errors.py +141 -35
  24. google/genai/file_search_stores.py +1296 -0
  25. google/genai/files.py +312 -744
  26. google/genai/live.py +617 -367
  27. google/genai/live_music.py +197 -0
  28. google/genai/local_tokenizer.py +395 -0
  29. google/genai/models.py +3598 -3116
  30. google/genai/operations.py +201 -362
  31. google/genai/pagers.py +23 -7
  32. google/genai/py.typed +1 -0
  33. google/genai/tokens.py +362 -0
  34. google/genai/tunings.py +1274 -496
  35. google/genai/types.py +14535 -5454
  36. google/genai/version.py +2 -2
  37. {google_genai-1.7.0.dist-info → google_genai-1.53.0.dist-info}/METADATA +736 -234
  38. google_genai-1.53.0.dist-info/RECORD +41 -0
  39. {google_genai-1.7.0.dist-info → google_genai-1.53.0.dist-info}/WHEEL +1 -1
  40. google_genai-1.7.0.dist-info/RECORD +0 -27
  41. {google_genai-1.7.0.dist-info → google_genai-1.53.0.dist-info/licenses}/LICENSE +0 -0
  42. {google_genai-1.7.0.dist-info → google_genai-1.53.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,214 @@
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+
16
+ import dataclasses
17
+ import functools
18
+ import hashlib
19
+ import os
20
+ import tempfile
21
+ from typing import Optional, cast
22
+ import uuid
23
+
24
+ import requests # type: ignore
25
+ import sentencepiece as spm
26
+ from sentencepiece import sentencepiece_model_pb2
27
+
28
+
29
+ # Source of truth: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models
30
+ _GEMINI_MODELS_TO_TOKENIZER_NAMES = {
31
+ "gemini-2.5-pro": "gemma3",
32
+ "gemini-2.5-flash": "gemma3",
33
+ "gemini-2.5-flash-lite": "gemma3",
34
+ "gemini-2.0-flash": "gemma3",
35
+ "gemini-2.0-flash-lite": "gemma3",
36
+ }
37
+ _GEMINI_STABLE_MODELS_TO_TOKENIZER_NAMES = {
38
+ "gemini-2.5-pro-preview-06-05": "gemma3",
39
+ "gemini-2.5-pro-preview-05-06": "gemma3",
40
+ "gemini-2.5-pro-exp-03-25": "gemma3",
41
+ "gemini-live-2.5-flash": "gemma3",
42
+ "gemini-2.5-flash-preview-05-20": "gemma3",
43
+ "gemini-2.5-flash-preview-04-17": "gemma3",
44
+ "gemini-2.5-flash-lite-preview-06-17": "gemma3",
45
+ "gemini-2.0-flash-001": "gemma3",
46
+ "gemini-2.0-flash-lite-001": "gemma3",
47
+ }
48
+
49
+
50
+ @dataclasses.dataclass(frozen=True)
51
+ class _TokenizerConfig:
52
+ model_url: str
53
+ model_hash: str
54
+
55
+
56
+ # TODO: update gemma3 tokenizer
57
+ _TOKENIZERS = {
58
+ "gemma2": _TokenizerConfig(
59
+ model_url="https://raw.githubusercontent.com/google/gemma_pytorch/33b652c465537c6158f9a472ea5700e5e770ad3f/tokenizer/tokenizer.model",
60
+ model_hash=(
61
+ "61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2"
62
+ ),
63
+ ),
64
+ "gemma3": _TokenizerConfig(
65
+ model_url="https://raw.githubusercontent.com/google/gemma_pytorch/014acb7ac4563a5f77c76d7ff98f31b568c16508/tokenizer/gemma3_cleaned_262144_v2.spiece.model",
66
+ model_hash=(
67
+ "1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c"
68
+ ),
69
+ ),
70
+ }
71
+
72
+
73
+ def _load_file(file_url_path: str) -> bytes:
74
+ """Loads file bytes from the given file url path."""
75
+ resp = requests.get(file_url_path)
76
+ resp.raise_for_status()
77
+ return cast(bytes, resp.content)
78
+
79
+
80
+ def _is_valid_model(*, model_data: bytes, expected_hash: str) -> bool:
81
+ """Returns true if the content is valid by checking the hash."""
82
+ if not expected_hash:
83
+ raise ValueError("expected_hash is required")
84
+ return hashlib.sha256(model_data).hexdigest() == expected_hash
85
+
86
+
87
+ def _maybe_remove_file(file_path: str) -> None:
88
+ """Removes the file if exists."""
89
+ if not os.path.exists(file_path):
90
+ return
91
+ try:
92
+ os.remove(file_path)
93
+ except OSError:
94
+ # Don't raise if we cannot remove file.
95
+ pass
96
+
97
+
98
+ def _maybe_load_from_cache(
99
+ *, file_path: str, expected_hash: str
100
+ ) -> Optional[bytes]:
101
+ """Loads the content from the cache path."""
102
+ if not os.path.exists(file_path):
103
+ return None
104
+ with open(file_path, "rb") as f:
105
+ content = f.read()
106
+ if _is_valid_model(model_data=content, expected_hash=expected_hash):
107
+ return content
108
+
109
+ # Cached file corrupted.
110
+ _maybe_remove_file(file_path)
111
+ return None
112
+
113
+
114
+ def _maybe_save_to_cache(
115
+ *, cache_dir: str, cache_path: str, content: bytes
116
+ ) -> None:
117
+ """Saves the content to the cache path."""
118
+ try:
119
+ os.makedirs(cache_dir, exist_ok=True)
120
+ tmp_path = cache_dir + "." + str(uuid.uuid4()) + ".tmp"
121
+ with open(tmp_path, "wb") as f:
122
+ f.write(content)
123
+ os.rename(tmp_path, cache_path)
124
+ except OSError:
125
+ # Don't raise if we cannot write file.
126
+ pass
127
+
128
+
129
+ def _load_from_url(*, file_url: str, expected_hash: str) -> bytes:
130
+ """Loads model bytes from the given file url."""
131
+ content = _load_file(file_url)
132
+ if not _is_valid_model(model_data=content, expected_hash=expected_hash):
133
+ actual_hash = hashlib.sha256(content).hexdigest()
134
+ raise ValueError(
135
+ "Downloaded model file is corrupted."
136
+ f" Expected hash {expected_hash}. Got file hash {actual_hash}."
137
+ )
138
+ return content
139
+
140
+
141
+ def _load(*, file_url: str, expected_hash: str) -> bytes:
142
+ """Loads model bytes from the given file url.
143
+
144
+ 1. If the find local cached file for the given url and the cached file hash
145
+ matches the expected hash, the cached file is returned.
146
+ 2. If local cached file is not found or the hash does not match, the file is
147
+ downloaded from the given url. And write to local cache and return the
148
+ file bytes.
149
+ 3. If the file downloaded from the given url does not match the expected
150
+ hash, raise ValueError.
151
+
152
+ Args:
153
+ file_url: The url of the file to load.
154
+ expected_hash: The expected hash of the file.
155
+
156
+ Returns:
157
+ The file bytes.
158
+ """
159
+ model_dir = os.path.join(tempfile.gettempdir(), "vertexai_tokenizer_model")
160
+ filename = hashlib.sha1(file_url.encode()).hexdigest()
161
+ model_path = os.path.join(model_dir, filename)
162
+
163
+ model_data = _maybe_load_from_cache(
164
+ file_path=model_path, expected_hash=expected_hash
165
+ )
166
+ if not model_data:
167
+ model_data = _load_from_url(file_url=file_url, expected_hash=expected_hash)
168
+
169
+ _maybe_save_to_cache(
170
+ cache_dir=model_dir, cache_path=model_path, content=model_data
171
+ )
172
+ return model_data
173
+
174
+
175
+ def _load_model_proto_bytes(tokenizer_name: str) -> bytes:
176
+ """Loads model proto bytes from the given tokenizer name."""
177
+ if tokenizer_name not in _TOKENIZERS:
178
+ raise ValueError(
179
+ f"Tokenizer {tokenizer_name} is not supported."
180
+ f"Supported tokenizers: {list(_TOKENIZERS.keys())}"
181
+ )
182
+ return _load(
183
+ file_url=_TOKENIZERS[tokenizer_name].model_url,
184
+ expected_hash=_TOKENIZERS[tokenizer_name].model_hash,
185
+ )
186
+
187
+
188
+ @functools.lru_cache()
189
+ def load_model_proto(
190
+ tokenizer_name: str,
191
+ ) -> sentencepiece_model_pb2.ModelProto:
192
+ """Loads model proto from the given tokenizer name."""
193
+ model_proto = sentencepiece_model_pb2.ModelProto()
194
+ model_proto.ParseFromString(_load_model_proto_bytes(tokenizer_name))
195
+ return model_proto
196
+
197
+
198
+ def get_tokenizer_name(model_name: str) -> str:
199
+ """Gets the tokenizer name for the given model name."""
200
+ if model_name in _GEMINI_MODELS_TO_TOKENIZER_NAMES.keys():
201
+ return _GEMINI_MODELS_TO_TOKENIZER_NAMES[model_name]
202
+ if model_name in _GEMINI_STABLE_MODELS_TO_TOKENIZER_NAMES.keys():
203
+ return _GEMINI_STABLE_MODELS_TO_TOKENIZER_NAMES[model_name]
204
+ raise ValueError(
205
+ f"Model {model_name} is not supported. Supported models: {', '.join(_GEMINI_MODELS_TO_TOKENIZER_NAMES.keys())}, {', '.join(_GEMINI_STABLE_MODELS_TO_TOKENIZER_NAMES.keys())}.\n" # pylint: disable=line-too-long
206
+ )
207
+
208
+
209
+ @functools.lru_cache()
210
+ def get_sentencepiece(tokenizer_name: str) -> spm.SentencePieceProcessor:
211
+ """Loads sentencepiece tokenizer from the given tokenizer name."""
212
+ processor = spm.SentencePieceProcessor()
213
+ processor.LoadFromSerializedProto(_load_model_proto_bytes(tokenizer_name))
214
+ return processor
@@ -0,0 +1,117 @@
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+
16
+ """Utils for working with MCP tools."""
17
+
18
+ from importlib.metadata import PackageNotFoundError, version
19
+ import typing
20
+ from typing import Any
21
+
22
+ from . import _common
23
+ from . import types
24
+
25
+ if typing.TYPE_CHECKING:
26
+ from mcp.types import Tool as McpTool
27
+ from mcp import ClientSession as McpClientSession
28
+ else:
29
+ McpClientSession: typing.Type = Any
30
+ McpTool: typing.Type = Any
31
+ try:
32
+ from mcp.types import Tool as McpTool
33
+ from mcp import ClientSession as McpClientSession
34
+ except ImportError:
35
+ McpTool = None
36
+ McpClientSession = None
37
+
38
+
39
+ def mcp_to_gemini_tool(tool: McpTool) -> types.Tool:
40
+ """Translates an MCP tool to a Google GenAI tool."""
41
+ return types.Tool(
42
+ function_declarations=[{
43
+ "name": tool.name,
44
+ "description": tool.description,
45
+ "parameters": types.Schema.from_json_schema(
46
+ json_schema=types.JSONSchema(
47
+ **_filter_to_supported_schema(tool.inputSchema)
48
+ )
49
+ ),
50
+ }]
51
+ )
52
+
53
+
54
+ def mcp_to_gemini_tools(tools: list[McpTool]) -> list[types.Tool]:
55
+ """Translates a list of MCP tools to a list of Google GenAI tools."""
56
+ return [mcp_to_gemini_tool(tool) for tool in tools]
57
+
58
+
59
+ def has_mcp_tool_usage(tools: types.ToolListUnion) -> bool:
60
+ """Checks whether the list of tools contains any MCP tools or sessions."""
61
+ if McpClientSession is None:
62
+ return False
63
+ for tool in tools:
64
+ if isinstance(tool, McpTool) or isinstance(tool, McpClientSession):
65
+ return True
66
+ return False
67
+
68
+
69
+ def has_mcp_session_usage(tools: types.ToolListUnion) -> bool:
70
+ """Checks whether the list of tools contains any MCP sessions."""
71
+ if McpClientSession is None:
72
+ return False
73
+ for tool in tools:
74
+ if isinstance(tool, McpClientSession):
75
+ return True
76
+ return False
77
+
78
+
79
+ def set_mcp_usage_header(headers: dict[str, str]) -> None:
80
+ """Sets the MCP version label in the Google API client header."""
81
+ if McpClientSession is None:
82
+ return
83
+ try:
84
+ version_label = version("mcp")
85
+ except PackageNotFoundError:
86
+ version_label = "0.0.0"
87
+ existing_header = headers.get("x-goog-api-client", "")
88
+ headers["x-goog-api-client"] = (
89
+ existing_header + f" mcp_used/{version_label}"
90
+ ).lstrip()
91
+
92
+
93
+ def _filter_to_supported_schema(
94
+ schema: _common.StringDict,
95
+ ) -> _common.StringDict:
96
+ """Filters the schema to only include fields that are supported by JSONSchema."""
97
+ supported_fields: set[str] = set(types.JSONSchema.model_fields.keys())
98
+ schema_field_names: tuple[str] = ("items",) # 'additional_properties' to come
99
+ list_schema_field_names: tuple[str] = (
100
+ "any_of", # 'one_of', 'all_of', 'not' to come
101
+ )
102
+ dict_schema_field_names: tuple[str] = ("properties",) # 'defs' to come
103
+ for field_name, field_value in schema.items():
104
+ if field_name in schema_field_names:
105
+ schema[field_name] = _filter_to_supported_schema(field_value)
106
+ elif field_name in list_schema_field_names:
107
+ schema[field_name] = [
108
+ _filter_to_supported_schema(value) for value in field_value
109
+ ]
110
+ elif field_name in dict_schema_field_names:
111
+ schema[field_name] = {
112
+ key: _filter_to_supported_schema(value)
113
+ for key, value in field_value.items()
114
+ }
115
+ return {
116
+ key: value for key, value in schema.items() if key in supported_fields
117
+ }