google-genai 1.31.0__py3-none-any.whl → 1.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
google/genai/caches.py CHANGED
@@ -1363,6 +1363,10 @@ def _DeleteCachedContentResponse_from_mldev(
1363
1363
  parent_object: Optional[dict[str, Any]] = None,
1364
1364
  ) -> dict[str, Any]:
1365
1365
  to_object: dict[str, Any] = {}
1366
+ if getv(from_object, ['sdkHttpResponse']) is not None:
1367
+ setv(
1368
+ to_object, ['sdk_http_response'], getv(from_object, ['sdkHttpResponse'])
1369
+ )
1366
1370
 
1367
1371
  return to_object
1368
1372
 
@@ -1427,6 +1431,10 @@ def _DeleteCachedContentResponse_from_vertex(
1427
1431
  parent_object: Optional[dict[str, Any]] = None,
1428
1432
  ) -> dict[str, Any]:
1429
1433
  to_object: dict[str, Any] = {}
1434
+ if getv(from_object, ['sdkHttpResponse']) is not None:
1435
+ setv(
1436
+ to_object, ['sdk_http_response'], getv(from_object, ['sdkHttpResponse'])
1437
+ )
1430
1438
 
1431
1439
  return to_object
1432
1440
 
@@ -1685,7 +1693,9 @@ class Caches(_api_module.BaseModule):
1685
1693
  return_value = types.DeleteCachedContentResponse._from_response(
1686
1694
  response=response_dict, kwargs=parameter_model.model_dump()
1687
1695
  )
1688
-
1696
+ return_value.sdk_http_response = types.HttpResponse(
1697
+ headers=response.headers
1698
+ )
1689
1699
  self._api_client._verify_response(return_value)
1690
1700
  return return_value
1691
1701
 
@@ -2077,7 +2087,9 @@ class AsyncCaches(_api_module.BaseModule):
2077
2087
  return_value = types.DeleteCachedContentResponse._from_response(
2078
2088
  response=response_dict, kwargs=parameter_model.model_dump()
2079
2089
  )
2080
-
2090
+ return_value.sdk_http_response = types.HttpResponse(
2091
+ headers=response.headers
2092
+ )
2081
2093
  self._api_client._verify_response(return_value)
2082
2094
  return return_value
2083
2095
 
google/genai/files.py CHANGED
@@ -307,6 +307,10 @@ def _DeleteFileResponse_from_mldev(
307
307
  parent_object: Optional[dict[str, Any]] = None,
308
308
  ) -> dict[str, Any]:
309
309
  to_object: dict[str, Any] = {}
310
+ if getv(from_object, ['sdkHttpResponse']) is not None:
311
+ setv(
312
+ to_object, ['sdk_http_response'], getv(from_object, ['sdkHttpResponse'])
313
+ )
310
314
 
311
315
  return to_object
312
316
 
@@ -430,6 +434,7 @@ class Files(_api_module.BaseModule):
430
434
  config, 'should_return_http_response', None
431
435
  ):
432
436
  return_value = types.CreateFileResponse(sdk_http_response=response)
437
+ self._api_client._verify_response(return_value)
433
438
  return return_value
434
439
 
435
440
  response_dict = '' if not response.body else json.loads(response.body)
@@ -577,7 +582,9 @@ class Files(_api_module.BaseModule):
577
582
  return_value = types.DeleteFileResponse._from_response(
578
583
  response=response_dict, kwargs=parameter_model.model_dump()
579
584
  )
580
-
585
+ return_value.sdk_http_response = types.HttpResponse(
586
+ headers=response.headers
587
+ )
581
588
  self._api_client._verify_response(return_value)
582
589
  return return_value
583
590
 
@@ -910,6 +917,7 @@ class AsyncFiles(_api_module.BaseModule):
910
917
  config, 'should_return_http_response', None
911
918
  ):
912
919
  return_value = types.CreateFileResponse(sdk_http_response=response)
920
+ self._api_client._verify_response(return_value)
913
921
  return return_value
914
922
 
915
923
  response_dict = '' if not response.body else json.loads(response.body)
@@ -1059,7 +1067,9 @@ class AsyncFiles(_api_module.BaseModule):
1059
1067
  return_value = types.DeleteFileResponse._from_response(
1060
1068
  response=response_dict, kwargs=parameter_model.model_dump()
1061
1069
  )
1062
-
1070
+ return_value.sdk_http_response = types.HttpResponse(
1071
+ headers=response.headers
1072
+ )
1063
1073
  self._api_client._verify_response(return_value)
1064
1074
  return return_value
1065
1075
 
@@ -0,0 +1,362 @@
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+
16
+ """[Experimental] Text Only Local Tokenizer."""
17
+
18
+ import logging
19
+ from typing import Any, Iterable
20
+ from typing import Optional, Union
21
+
22
+ from sentencepiece import sentencepiece_model_pb2
23
+
24
+ from . import _common
25
+ from . import _local_tokenizer_loader as loader
26
+ from . import _transformers as t
27
+ from . import types
28
+ from . import types
29
+ from ._transformers import t_contents
30
+
31
+ logger = logging.getLogger("google_genai.local_tokenizer")
32
+
33
+
34
+ class _TextsAccumulator:
35
+ """Accumulates countable texts from `Content` and `Tool` objects.
36
+
37
+ This class is responsible for traversing complex `Content` and `Tool`
38
+ objects and extracting all the text content that should be included when
39
+ calculating token counts.
40
+
41
+ A key feature of this class is its ability to detect unsupported fields in
42
+ `Content` objects. If a user provides a `Content` object with fields that
43
+ this local tokenizer doesn't recognize (e.g., new fields added in a future
44
+ API update), this class will log a warning.
45
+
46
+ The detection mechanism for `Content` objects works by recursively building
47
+ a "counted" version of the input object. This "counted" object only
48
+ contains the data that was successfully processed and added to the text
49
+ list for tokenization. After traversing the input, the original `Content`
50
+ object is compared to the "counted" object. If they don't match, it
51
+ signifies the presence of unsupported fields, and a warning is logged.
52
+ """
53
+
54
+ def __init__(self) -> None:
55
+ self._texts: list[str] = []
56
+
57
+ def get_texts(self) -> Iterable[str]:
58
+ return self._texts
59
+
60
+ def add_contents(self, contents: Iterable[types.Content]) -> None:
61
+ for content in contents:
62
+ self.add_content(content)
63
+
64
+ def add_content(self, content: types.Content) -> None:
65
+ counted_content = types.Content(parts=[], role=content.role)
66
+ if content.parts:
67
+ for part in content.parts:
68
+ assert counted_content.parts is not None
69
+ counted_part = types.Part()
70
+ if part.file_data is not None or part.inline_data is not None:
71
+ raise ValueError(
72
+ "LocalTokenizers do not support non-text content types."
73
+ )
74
+ if part.video_metadata is not None:
75
+ counted_part.video_metadata = part.video_metadata
76
+ if part.function_call is not None:
77
+ self.add_function_call(part.function_call)
78
+ counted_part.function_call = part.function_call
79
+ if part.function_response is not None:
80
+ self.add_function_response(part.function_response)
81
+ counted_part.function_response = part.function_response
82
+ if part.text is not None:
83
+ counted_part.text = part.text
84
+ self._texts.append(part.text)
85
+ counted_content.parts.append(counted_part)
86
+
87
+ if content.model_dump(exclude_none=True) != counted_content.model_dump(
88
+ exclude_none=True
89
+ ):
90
+ logger.warning(
91
+ "Content contains unsupported types for token counting. Supported"
92
+ f" fields {counted_content}. Got {content}."
93
+ )
94
+
95
+ def add_function_call(self, function_call: types.FunctionCall) -> None:
96
+ """Processes a function call and adds relevant text to the accumulator.
97
+
98
+ Args:
99
+ function_call: The function call to process.
100
+ """
101
+ if function_call.name:
102
+ self._texts.append(function_call.name)
103
+ counted_function_call = types.FunctionCall(name=function_call.name)
104
+ if function_call.args:
105
+ counted_args = self._dict_traverse(function_call.args)
106
+ counted_function_call.args = counted_args
107
+
108
+ def add_tool(self, tool: types.Tool) -> types.Tool:
109
+ counted_tool = types.Tool(function_declarations=[])
110
+ if tool.function_declarations:
111
+ for function_declaration in tool.function_declarations:
112
+ counted_function_declaration = self._function_declaration_traverse(
113
+ function_declaration
114
+ )
115
+ if counted_tool.function_declarations is None:
116
+ counted_tool.function_declarations = []
117
+ counted_tool.function_declarations.append(counted_function_declaration)
118
+
119
+ return counted_tool
120
+
121
+ def add_tools(self, tools: Iterable[types.Tool]) -> None:
122
+ for tool in tools:
123
+ self.add_tool(tool)
124
+
125
+ def add_function_responses(
126
+ self, function_responses: Iterable[types.FunctionResponse]
127
+ ) -> None:
128
+ for function_response in function_responses:
129
+ self.add_function_response(function_response)
130
+
131
+ def add_function_response(
132
+ self, function_response: types.FunctionResponse
133
+ ) -> None:
134
+ counted_function_response = types.FunctionResponse()
135
+ if function_response.name:
136
+ self._texts.append(function_response.name)
137
+ counted_function_response.name = function_response.name
138
+ if function_response.response:
139
+ counted_response = self._dict_traverse(function_response.response)
140
+ counted_function_response.response = counted_response
141
+
142
+ def _function_declaration_traverse(
143
+ self, function_declaration: types.FunctionDeclaration
144
+ ) -> types.FunctionDeclaration:
145
+ counted_function_declaration = types.FunctionDeclaration()
146
+ if function_declaration.name:
147
+ self._texts.append(function_declaration.name)
148
+ counted_function_declaration.name = function_declaration.name
149
+ if function_declaration.description:
150
+ self._texts.append(function_declaration.description)
151
+ counted_function_declaration.description = (
152
+ function_declaration.description
153
+ )
154
+ if function_declaration.parameters:
155
+ counted_parameters = self.add_schema(function_declaration.parameters)
156
+ counted_function_declaration.parameters = counted_parameters
157
+ if function_declaration.response:
158
+ counted_response = self.add_schema(function_declaration.response)
159
+ counted_function_declaration.response = counted_response
160
+ return counted_function_declaration
161
+
162
+ def add_schema(self, schema: types.Schema) -> types.Schema:
163
+ """Processes a schema and adds relevant text to the accumulator.
164
+
165
+ Args:
166
+ schema: The schema to process.
167
+
168
+ Returns:
169
+ The new schema object with only countable fields.
170
+ """
171
+ counted_schema = types.Schema()
172
+ if schema.type:
173
+ counted_schema.type = schema.type
174
+ if schema.title:
175
+ counted_schema.title = schema.title
176
+ if schema.default is not None:
177
+ counted_schema.default = schema.default
178
+ if schema.format:
179
+ self._texts.append(schema.format)
180
+ counted_schema.format = schema.format
181
+ if schema.description:
182
+ self._texts.append(schema.description)
183
+ counted_schema.description = schema.description
184
+ if schema.enum:
185
+ self._texts.extend(schema.enum)
186
+ counted_schema.enum = schema.enum
187
+ if schema.required:
188
+ self._texts.extend(schema.required)
189
+ counted_schema.required = schema.required
190
+ if schema.property_ordering:
191
+ counted_schema.property_ordering = schema.property_ordering
192
+ if schema.items:
193
+ counted_schema_items = self.add_schema(schema.items)
194
+ counted_schema.items = counted_schema_items
195
+ if schema.properties:
196
+ d = {}
197
+ for key, value in schema.properties.items():
198
+ self._texts.append(key)
199
+ counted_value = self.add_schema(value)
200
+ d[key] = counted_value
201
+ counted_schema.properties = d
202
+ if schema.example:
203
+ counted_schema_example = self._any_traverse(schema.example)
204
+ counted_schema.example = counted_schema_example
205
+ return counted_schema
206
+
207
+ def _dict_traverse(self, d: dict[str, Any]) -> dict[str, Any]:
208
+ """Processes a dict and adds relevant text to the accumulator.
209
+
210
+ Args:
211
+ d: The dict to process.
212
+
213
+ Returns:
214
+ The new dict object with only countable fields.
215
+ """
216
+ counted_dict = {}
217
+ self._texts.extend(list(d.keys()))
218
+ for key, val in d.items():
219
+ counted_dict[key] = self._any_traverse(val)
220
+ return counted_dict
221
+
222
+ def _any_traverse(self, value: Any) -> Any:
223
+ """Processes a value and adds relevant text to the accumulator.
224
+
225
+ Args:
226
+ value: The value to process.
227
+
228
+ Returns:
229
+ The new value with only countable fields.
230
+ """
231
+ if isinstance(value, str):
232
+ self._texts.append(value)
233
+ return value
234
+ elif isinstance(value, dict):
235
+ return self._dict_traverse(value)
236
+ elif isinstance(value, list):
237
+ return [self._any_traverse(item) for item in value]
238
+ else:
239
+ return value
240
+
241
+
242
+ def _token_str_to_bytes(
243
+ token: str, type: sentencepiece_model_pb2.ModelProto.SentencePiece.Type
244
+ ) -> bytes:
245
+ if type == sentencepiece_model_pb2.ModelProto.SentencePiece.Type.BYTE:
246
+ return _parse_hex_byte(token).to_bytes(length=1, byteorder="big")
247
+ else:
248
+ return token.replace("▁", " ").encode("utf-8")
249
+
250
+
251
+ def _parse_hex_byte(token: str) -> int:
252
+ """Parses a hex byte string of the form '<0xXX>' and returns the integer value.
253
+
254
+ Raises ValueError if the input is malformed or the byte value is invalid.
255
+ """
256
+
257
+ if len(token) != 6:
258
+ raise ValueError(f"Invalid byte length: {token}")
259
+ if not token.startswith("<0x") or not token.endswith(">"):
260
+ raise ValueError(f"Invalid byte format: {token}")
261
+
262
+ try:
263
+ val = int(token[3:5], 16) # Parse the hex part directly
264
+ except ValueError:
265
+ raise ValueError(f"Invalid hex value: {token}")
266
+
267
+ if val >= 256:
268
+ raise ValueError(f"Byte value out of range: {token}")
269
+
270
+ return val
271
+
272
+
273
+ class LocalTokenizer:
274
+ """[Experimental] Text Only Local Tokenizer.
275
+
276
+ This class provides a local tokenizer for text only token counting.
277
+
278
+ LIMITATIONS:
279
+ - Only supports text based tokenization and no multimodal tokenization.
280
+ - Forward compatibility depends on the open-source tokenizer models for future
281
+ Gemini versions.
282
+ - For token counting of tools and response schemas, the `LocalTokenizer` only
283
+ supports `types.Tool` and `types.Schema` objects. Python functions or Pydantic
284
+ models cannot be passed directly.
285
+ """
286
+
287
+ def __init__(self, model_name: str):
288
+ self._tokenizer_name = loader.get_tokenizer_name(model_name)
289
+ self._model_proto = loader.load_model_proto(self._tokenizer_name)
290
+ self._tokenizer = loader.get_sentencepiece(self._tokenizer_name)
291
+
292
+ @_common.experimental_warning(
293
+ "The SDK's local tokenizer implementation is experimental and may change"
294
+ " in the future. It only supports text based tokenization."
295
+ )
296
+ def count_tokens(
297
+ self,
298
+ contents: Union[types.ContentListUnion, types.ContentListUnionDict],
299
+ *,
300
+ config: Optional[types.CountTokensConfigOrDict] = None,
301
+ ) -> types.CountTokensResult:
302
+ """Counts the number of tokens in a given text.
303
+
304
+ Args:
305
+ contents: The contents to tokenize.
306
+
307
+ Returns:
308
+ A `CountTokensResult` containing the total number of tokens.
309
+ """
310
+ processed_contents = t.t_contents(contents)
311
+ text_accumulator = _TextsAccumulator()
312
+ config = types.CountTokensConfig.model_validate(config or {})
313
+ text_accumulator.add_contents(processed_contents)
314
+ if config.tools:
315
+ text_accumulator.add_tools(config.tools)
316
+ if config.generation_config and config.generation_config.response_schema:
317
+ text_accumulator.add_schema(config.generation_config.response_schema)
318
+ if config.system_instruction:
319
+ text_accumulator.add_contents(t.t_contents([config.system_instruction]))
320
+ tokens_list = self._tokenizer.encode(list(text_accumulator.get_texts()))
321
+ return types.CountTokensResult(
322
+ total_tokens=sum(len(tokens) for tokens in tokens_list)
323
+ )
324
+
325
+ @_common.experimental_warning(
326
+ "The SDK's local tokenizer implementation is experimental and may change"
327
+ " in the future. It only supports text based tokenization."
328
+ )
329
+ def compute_tokens(
330
+ self,
331
+ contents: Union[types.ContentListUnion, types.ContentListUnionDict],
332
+ ) -> types.ComputeTokensResult:
333
+ """Computes the tokens ids and string pieces in the input."""
334
+ processed_contents = t.t_contents(contents)
335
+ text_accumulator = _TextsAccumulator()
336
+ for content in processed_contents:
337
+ text_accumulator.add_content(content)
338
+ tokens_protos = self._tokenizer.EncodeAsImmutableProto(
339
+ text_accumulator.get_texts()
340
+ )
341
+
342
+ roles = []
343
+ for content in processed_contents:
344
+ if content.parts:
345
+ for _ in content.parts:
346
+ roles.append(content.role)
347
+
348
+ token_infos = []
349
+ for tokens_proto, role in zip(tokens_protos, roles):
350
+ token_infos.append(
351
+ types.TokensInfo(
352
+ token_ids=[piece.id for piece in tokens_proto.pieces],
353
+ tokens=[
354
+ _token_str_to_bytes(
355
+ piece.piece, self._model_proto.pieces[piece.id].type
356
+ )
357
+ for piece in tokens_proto.pieces
358
+ ],
359
+ role=role,
360
+ )
361
+ )
362
+ return types.ComputeTokensResult(tokens_info=token_infos)