PyPI - google-genai - Versions diffs - 0.4.0__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

google-genai 0.4.0py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

google/genai/_api_client.py +240 -71
google/genai/_common.py +47 -31
google/genai/_extra_utils.py +3 -3
google/genai/_replay_api_client.py +51 -74
google/genai/_transformers.py +197 -30
google/genai/batches.py +74 -72
google/genai/caches.py +104 -90
google/genai/chats.py +5 -8
google/genai/client.py +2 -1
google/genai/errors.py +1 -1
google/genai/files.py +302 -102
google/genai/live.py +42 -30
google/genai/models.py +379 -250
google/genai/tunings.py +78 -76
google/genai/types.py +563 -350
google/genai/version.py +1 -1
google_genai-0.6.0.dist-info/METADATA +973 -0
google_genai-0.6.0.dist-info/RECORD +25 -0
google_genai-0.4.0.dist-info/METADATA +0 -888
google_genai-0.4.0.dist-info/RECORD +0 -25
{google_genai-0.4.0.dist-info → google_genai-0.6.0.dist-info}/LICENSE +0 -0
{google_genai-0.4.0.dist-info → google_genai-0.6.0.dist-info}/WHEEL +0 -0
{google_genai-0.4.0.dist-info → google_genai-0.6.0.dist-info}/top_level.txt +0 -0

google/genai/_replay_api_client.py CHANGED Viewed

@@ -17,15 +17,15 @@
 import base64
 import copy
+import datetime
 import inspect
+import io
 import json
 import os
 import re
-import datetime
 from typing import Any, Literal, Optional, Union
 import google.auth
-from pydantic import BaseModel
 from requests.exceptions import HTTPError
 from . import errors
@@ -33,7 +33,8 @@ from ._api_client import ApiClient
 from ._api_client import HttpOptions
 from ._api_client import HttpRequest
 from ._api_client import HttpResponse
-from ._api_client import RequestJsonEncoder
+from ._common import BaseModel
 def _redact_version_numbers(version_string: str) -> str:
   """Redacts version numbers in the form x.y.z from a string."""
@@ -72,6 +73,11 @@ def _redact_request_url(url: str) -> str:
       '{VERTEX_URL_PREFIX}/',
       url,
   )
+  result = re.sub(
+      r'.*-aiplatform.googleapis.com/[^/]+/',
+      '{VERTEX_URL_PREFIX}/',
+      result,
+  )
   result = re.sub(
       r'https://generativelanguage.googleapis.com/[^/]+',
       '{MLDEV_URL_PREFIX}',
@@ -140,6 +146,7 @@ class ReplayResponse(BaseModel):
   status_code: int = 200
   headers: dict[str, str]
   body_segments: list[dict[str, object]]
+  byte_segments: Optional[list[bytes]] = None
   sdk_response_segments: list[dict[str, object]]
   def model_post_init(self, __context: Any) -> None:
@@ -259,26 +266,13 @@ class ReplayApiClient(ApiClient):
     replay_file_path = self._get_replay_file_path()
     os.makedirs(os.path.dirname(replay_file_path), exist_ok=True)
     with open(replay_file_path, 'w') as f:
-      replay_session_dict = self.replay_session.model_dump()
-      # Use for non-utf-8 bytes in image/video... output.
-      for interaction in replay_session_dict['interactions']:
-        segments = []
-        for response in interaction['response']['sdk_response_segments']:
-          segments.append(json.loads(json.dumps(
-              response, cls=ResponseJsonEncoder
-          )))
-        interaction['response']['sdk_response_segments'] = segments
-      f.write(
-          json.dumps(
-              replay_session_dict, indent=2, cls=RequestJsonEncoder
-          )
-      )
+      f.write(self.replay_session.model_dump_json(exclude_unset=True, indent=2))
     self.replay_session = None
   def _record_interaction(
       self,
       http_request: HttpRequest,
-      http_response: Union[HttpResponse, errors.APIError],
+      http_response: Union[HttpResponse, errors.APIError, bytes],
   ):
     if not self._should_update_replay():
       return
@@ -293,6 +287,9 @@ class ReplayApiClient(ApiClient):
       response = ReplayResponse(
           headers=dict(http_response.headers),
           body_segments=list(http_response.segments()),
+          byte_segments=[
+              seg[:100] + b'...' for seg in http_response.byte_segments()
+          ],
           status_code=http_response.status_code,
           sdk_response_segments=[],
       )
@@ -326,11 +323,7 @@ class ReplayApiClient(ApiClient):
     # so that the comparison is fair.
     _redact_request_body(request_data_copy)
-    # Need to call dumps() and loads() to convert dict bytes values to strings.
-    # Because the expected_request_body dict never contains bytes values.
-    actual_request_body = [
-        json.loads(json.dumps(request_data_copy, cls=RequestJsonEncoder))
-    ]
+    actual_request_body = [request_data_copy]
     expected_request_body = interaction.request.body_segments
     assert actual_request_body == expected_request_body, (
         'Request body mismatch:\n'
@@ -353,6 +346,7 @@ class ReplayApiClient(ApiClient):
             json.dumps(segment)
             for segment in interaction.response.body_segments
         ],
+        byte_stream=interaction.response.byte_segments,
     )
   def _verify_response(self, response_model: BaseModel):
@@ -371,15 +365,10 @@ class ReplayApiClient(ApiClient):
     if isinstance(response_model, list):
       response_model = response_model[0]
     print('response_model: ', response_model.model_dump(exclude_none=True))
-    actual = json.dumps(
-        response_model.model_dump(exclude_none=True),
-        cls=ResponseJsonEncoder,
-        sort_keys=True,
-    )
-    expected = json.dumps(
-        interaction.response.sdk_response_segments[self._sdk_response_index],
-        sort_keys=True,
-    )
+    actual = response_model.model_dump(exclude_none=True, mode='json')
+    expected = interaction.response.sdk_response_segments[
+        self._sdk_response_index
+    ]
     assert (
         actual == expected
     ), f'SDK response mismatch:\nActual: {actual}\nExpected: {expected}'
@@ -413,10 +402,21 @@ class ReplayApiClient(ApiClient):
     else:
       return self._build_response_from_replay(http_request)
-  def upload_file(self, file_path: str, upload_url: str, upload_size: int):
-    request = HttpRequest(
-        method='POST', url='', data={'file_path': file_path}, headers={}
-    )
+  def upload_file(self, file_path: Union[str, io.IOBase], upload_url: str, upload_size: int):
+    if isinstance(file_path, io.IOBase):
+      offset = file_path.tell()
+      content = file_path.read()
+      file_path.seek(offset, os.SEEK_SET)
+      request = HttpRequest(
+          method='POST',
+          url='',
+          data={'bytes': base64.b64encode(content).decode('utf-8')},
+          headers={}
+      )
+    else:
+      request = HttpRequest(
+          method='POST', url='', data={'file_path': file_path}, headers={}
+      )
     if self._should_call_api():
       try:
         result = super().upload_file(file_path, upload_url, upload_size)
@@ -431,42 +431,19 @@ class ReplayApiClient(ApiClient):
     else:
       return self._build_response_from_replay(request).text
-class ResponseJsonEncoder(json.JSONEncoder):
-  """The replay test json encoder for response.
-  We need RequestJsonEncoder and ResponseJsonEncoder because:
-    1. In production, we only need RequestJsonEncoder to help json module
-    to convert non-stringable and stringable types to json string. Especially
-    for bytes type, the value of bytes field is encoded to base64 string so it
-    is always stringable and the RequestJsonEncoder doesn't have to deal with
-    utf-8 JSON broken issue.
-    2. In replay test, we also need ResponseJsonEncoder to help json module
-    convert non-stringable and stringable types to json string. But response
-    object returned from SDK method is different from the request api_client
-    sent to server. For the bytes type, there is no base64 string in response
-    anymore, because SDK handles it internally. So bytes type in Response is
-    non-stringable. The ResponseJsonEncoder uses different encoding
-    strategy than the RequestJsonEncoder to deal with utf-8 JSON broken issue.
-  """
-  def default(self, o):
-    if isinstance(o, bytes):
-      # Use base64.b64encode() to encode bytes to string so that the media bytes
-      # fields are serializable.
-      # o.decode(encoding='utf-8', errors='replace') doesn't work because it
-      # uses a fixed error string `\ufffd` for all non-utf-8 characters,
-      # which cannot be converted back to original bytes. And other languages
-      # only have the original bytes to compare with.
-      # Since we use base64.b64encoding() in replay test, a change that breaks
-      # native bytes can be captured by
-      # test_compute_tokens.py::test_token_bytes_deserialization.
-      return base64.b64encode(o).decode(encoding='utf-8')
-    elif isinstance(o, datetime.datetime):
-      # dt.isoformat() prints "2024-11-15T23:27:45.624657+00:00"
-      # but replay files want "2024-11-15T23:27:45.624657Z"
-      if o.isoformat().endswith('+00:00'):
-        return o.isoformat().replace('+00:00', 'Z')
-      else:
-        return o.isoformat()
+  def _download_file_request(self, request):
+    self._initialize_replay_session_if_not_loaded()
+    if self._should_call_api():
+      try:
+        result = super()._download_file_request(request)
+      except HTTPError as e:
+        result = HttpResponse(
+            e.response.headers, [json.dumps({'reason': e.response.reason})]
+        )
+        result.status_code = e.response.status_code
+        raise e
+      self._record_interaction(request, result)
+      return result
     else:
-      return super().default(o)
+      return self._build_response_from_replay(request)

google/genai/_transformers.py CHANGED Viewed

@@ -21,10 +21,12 @@ import inspect
 import io
 import re
 import time
-from typing import Any, Optional, Union
+import typing
+from typing import Any, GenericAlias, Optional, Union
 import PIL.Image
 import PIL.PngImagePlugin
+import pydantic
 from . import _api_client
 from . import types
@@ -35,7 +37,7 @@ def _resource_name(
     resource_name: str,
     *,
     collection_identifier: str,
-    collection_hirearchy_depth: int = 2,
+    collection_hierarchy_depth: int = 2,
 ):
   # pylint: disable=line-too-long
   """Prepends resource name with project, location, collection_identifier if needed.
@@ -48,13 +50,13 @@ def _resource_name(
   Args:
     client: The API client.
     resource_name: The user input resource name to be completed.
-    collection_identifier: The collection identifier to be prepended.
-        See collection identifiers in https://google.aip.dev/122.
-    collection_hirearchy_depth: The collection hierarchy depth.
-        Only set this field when the resource has nested collections.
-        For example, `users/vhugo1802/events/birthday-dinner-226`, the
-        collection_identifier is `users` and collection_hirearchy_depth is 4.
-        See nested collections in https://google.aip.dev/122.
+    collection_identifier: The collection identifier to be prepended. See
+      collection identifiers in https://google.aip.dev/122.
+    collection_hierarchy_depth: The collection hierarchy depth. Only set this
+      field when the resource has nested collections. For example,
+      `users/vhugo1802/events/birthday-dinner-226`, the collection_identifier is
+      `users` and collection_hierarchy_depth is 4. See nested collections in
+      https://google.aip.dev/122.
   Example:
@@ -62,7 +64,8 @@ def _resource_name(
     client.vertexai = True
     client.project = 'bar'
     client.location = 'us-west1'
-    _resource_name(client, 'cachedContents/123', collection_identifier='cachedContents')
+    _resource_name(client, 'cachedContents/123',
+      collection_identifier='cachedContents')
     returns: 'projects/bar/locations/us-west1/cachedContents/123'
   Example:
@@ -72,7 +75,8 @@ def _resource_name(
     client.vertexai = True
     client.project = 'bar'
     client.location = 'us-west1'
-    _resource_name(client, resource_name, collection_identifier='cachedContents')
+    _resource_name(client, resource_name,
+      collection_identifier='cachedContents')
     returns: 'projects/foo/locations/us-central1/cachedContents/123'
   Example:
@@ -80,7 +84,8 @@ def _resource_name(
     resource_name = '123'
     # resource_name = 'cachedContents/123'
     client.vertexai = False
-    _resource_name(client, resource_name, collection_identifier='cachedContents')
+    _resource_name(client, resource_name,
+      collection_identifier='cachedContents')
     returns 'cachedContents/123'
   Example:
@@ -88,7 +93,8 @@ def _resource_name(
     resource_prefix = 'cachedContents'
     client.vertexai = False
     # client.vertexai = True
-    _resource_name(client, resource_name, collection_identifier='cachedContents')
+    _resource_name(client, resource_name,
+      collection_identifier='cachedContents')
     returns: 'some/wrong/cachedContents/resource/name/123'
   Returns:
@@ -99,7 +105,7 @@ def _resource_name(
       # Check if prepending the collection identifier won't violate the
       # collection hierarchy depth.
       and f'{collection_identifier}/{resource_name}'.count('/') + 1
-      == collection_hirearchy_depth
+      == collection_hierarchy_depth
   )
   if client.vertexai:
     if resource_name.startswith('projects/'):
@@ -142,6 +148,35 @@ def t_model(client: _api_client.ApiClient, model: str):
     else:
       return f'models/{model}'
+def t_models_url(api_client: _api_client.ApiClient, base_models: bool) -> str:
+  if api_client.vertexai:
+    if base_models:
+      return 'publishers/google/models'
+    else:
+      return 'models'
+  else:
+    if base_models:
+      return 'models'
+    else:
+      return 'tunedModels'
+def t_extract_models(
+    api_client: _api_client.ApiClient, response: dict
+) -> list[types.Model]:
+  if not response:
+    return []
+  elif response.get('models') is not None:
+    return response.get('models')
+  elif response.get('tunedModels') is not None:
+    return response.get('tunedModels')
+  elif response.get('publisherModels') is not None:
+    return response.get('publisherModels')
+  else:
+    raise ValueError('Cannot determine the models type.')
 def t_caches_model(api_client: _api_client.ApiClient, model: str):
   model = t_model(api_client, model)
   if not model:
@@ -180,6 +215,10 @@ def t_part(client: _api_client.ApiClient, part: PartType) -> types.Part:
     return types.Part(text=part)
   if isinstance(part, PIL.Image.Image):
     return types.Part(inline_data=pil_to_blob(part))
+  if isinstance(part, types.File):
+    if not part.uri or not part.mime_type:
+      raise ValueError('file uri and mime_type are required.')
+    return types.Part.from_uri(part.uri, part.mime_type)
   else:
     return part
@@ -258,32 +297,135 @@ def t_contents(
     return [t_content(client, contents)]
-def process_schema(data: dict):
+def process_schema(
+    data: dict[str, Any], client: Optional[_api_client.ApiClient] = None
+):
   if isinstance(data, dict):
     # Iterate over a copy of keys to allow deletion
     for key in list(data.keys()):
-      if key == 'title':
+      # Only delete 'title'for the Gemini API
+      if client and not client.vertexai and key == 'title':
         del data[key]
-      elif key == 'type':
-        data[key] = data[key].upper()
       else:
-        process_schema(data[key])
+        process_schema(data[key], client)
   elif isinstance(data, list):
     for item in data:
-      process_schema(item)
+      process_schema(item, client)
   return data
+def _build_schema(fname: str, fields_dict: dict[str, Any]) -> dict[str, Any]:
+  parameters = pydantic.create_model(fname, **fields_dict).model_json_schema()
+  defs = parameters.pop('$defs', {})
+  for _, value in defs.items():
+    unpack_defs(value, defs)
+  unpack_defs(parameters, defs)
+  return parameters['properties']['dummy']
+def unpack_defs(schema: dict[str, Any], defs: dict[str, Any]):
+  """Unpacks the $defs values in the schema generated by pydantic so they can be understood by the API.
+  Example of a schema before and after unpacking:
+    Before:
+    `schema`
+    {'properties': {
+        'dummy': {
+            'items': {
+                '$ref': '#/$defs/CountryInfo'
+            },
+            'title': 'Dummy',
+            'type': 'array'
+            }
+        },
+        'required': ['dummy'],
+        'title': 'dummy',
+        'type': 'object'}
+    `defs`
+    {'CountryInfo': {'properties': {'continent': {'title': 'Continent', 'type':
+    'string'}, 'gdp': {'title': 'Gdp', 'type': 'integer'}}, 'required':
+    ['continent', 'gdp'], 'title': 'CountryInfo', 'type': 'object'}}
+    After:
+    `schema`
+    {'properties': {
+        'continent': {'title': 'Continent', 'type': 'string'},
+        'gdp': {'title': 'Gdp', 'type': 'integer'}
+      },
+      'required': ['continent', 'gdp'],
+      'title': 'CountryInfo',
+      'type': 'object'
+    }
+  """
+  properties = schema.get('properties', None)
+  if properties is None:
+    return
+  for name, value in properties.items():
+    ref_key = value.get('$ref', None)
+    if ref_key is not None:
+      ref = defs[ref_key.split('defs/')[-1]]
+      unpack_defs(ref, defs)
+      properties[name] = ref
+      continue
+    anyof = value.get('anyOf', None)
+    if anyof is not None:
+      for i, atype in enumerate(anyof):
+        ref_key = atype.get('$ref', None)
+        if ref_key is not None:
+          ref = defs[ref_key.split('defs/')[-1]]
+          unpack_defs(ref, defs)
+          anyof[i] = ref
+      continue
+    items = value.get('items', None)
+    if items is not None:
+      ref_key = items.get('$ref', None)
+      if ref_key is not None:
+        ref = defs[ref_key.split('defs/')[-1]]
+        unpack_defs(ref, defs)
+        value['items'] = ref
+        continue
 def t_schema(
-    _: _api_client.ApiClient, origin: Union[types.SchemaDict, Any]
+    client: _api_client.ApiClient, origin: Union[types.SchemaUnionDict, Any]
 ) -> Optional[types.Schema]:
   if not origin:
     return None
   if isinstance(origin, dict):
-    return origin
-  schema = process_schema(origin.model_json_schema())
-  return types.Schema.model_validate(schema)
+    return process_schema(origin, client)
+  if isinstance(origin, types.Schema):
+    if dict(origin) == dict(types.Schema()):
+      # response_schema value was coerced to an empty Schema instance because it did not adhere to the Schema field annotation
+      raise ValueError(f'Unsupported schema type.')
+    schema = process_schema(origin.model_dump(exclude_unset=True), client)
+    return types.Schema.model_validate(schema)
+  if isinstance(origin, GenericAlias):
+    if origin.__origin__ is list:
+      if isinstance(origin.__args__[0], typing.types.UnionType):
+        raise ValueError(f'Unsupported schema type: GenericAlias {origin}')
+      if issubclass(origin.__args__[0], pydantic.BaseModel):
+        # Handle cases where response schema is `list[pydantic.BaseModel]`
+        list_schema = _build_schema(
+            'dummy', {'dummy': (origin, pydantic.Field())}
+        )
+        list_schema = process_schema(list_schema, client)
+        return types.Schema.model_validate(list_schema)
+    raise ValueError(f'Unsupported schema type: GenericAlias {origin}')
+  if issubclass(origin, pydantic.BaseModel):
+    schema = process_schema(origin.model_json_schema(), client)
+    return types.Schema.model_validate(schema)
+  raise ValueError(f'Unsupported schema type: {origin}')
 def t_speech_config(
@@ -319,10 +461,10 @@ def t_speech_config(
 def t_tool(client: _api_client.ApiClient, origin) -> types.Tool:
   if not origin:
     return None
-  if inspect.isfunction(origin):
+  if inspect.isfunction(origin) or inspect.ismethod(origin):
     return types.Tool(
         function_declarations=[
-            types.FunctionDeclaration.from_function(client, origin)
+            types.FunctionDeclaration.from_callable(client, origin)
         ]
     )
   else:
@@ -432,10 +574,25 @@ def t_resolve_operation(api_client: _api_client.ApiClient, struct: dict):
     return struct
-def t_file_name(api_client: _api_client.ApiClient, name: str):
-  # Remove the files/ prefx since it's added to the url path.
-  if name.startswith('files/'):
-    return name.split('files/')[1]
+def t_file_name(
+    api_client: _api_client.ApiClient, name: Union[str, types.File]
+):
+  # Remove the files/ prefix since it's added to the url path.
+  if isinstance(name, types.File):
+    name = name.name
+  if name is None:
+    raise ValueError('File name is required.')
+  if name.startswith('https://'):
+    suffix = name.split('files/')[1]
+    match = re.match('[a-z0-9]+', suffix)
+    if match is None:
+      raise ValueError(f'Could not extract file name from URI: {name}')
+    name = match.group(0)
+  elif name.startswith('files/'):
+    name = name.split('files/')[1]
   return name
@@ -452,3 +609,13 @@ def t_tuning_job_status(
     return 'JOB_STATE_FAILED'
   else:
     return status
+# Some fields don't accept url safe base64 encoding.
+# We shouldn't use this transformer if the backend adhere to Cloud Type
+# format https://cloud.google.com/docs/discovery/type-format.
+# TODO(b/389133914,b/390320301): Remove the hack after backend fix the issue.
+def t_bytes(api_client: _api_client.ApiClient, data: bytes) -> str:
+  if not isinstance(data, bytes):
+    return data
+  return base64.b64encode(data).decode('ascii')

google-genai 0.4.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

google-genai 0.4.0py3-none-any.whl → 0.6.0py3-none-any.whl