PyPI - nucliadb-utils - Versions diffs - 6.9.1.post5229__py3-none-any.whl → 6.10.0.post5732__py3-none-any.whl - Mend

nucliadb-utils 6.9.1.post5229py3-none-any.whl → 6.10.0.post5732py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nucliadb-utils might be problematic. Click here for more details.

Files changed (45) hide show

nucliadb_utils/asyncio_utils.py +3 -3
nucliadb_utils/audit/audit.py +41 -31
nucliadb_utils/audit/basic.py +22 -23
nucliadb_utils/audit/stream.py +31 -31
nucliadb_utils/authentication.py +8 -10
nucliadb_utils/cache/nats.py +10 -12
nucliadb_utils/cache/pubsub.py +5 -4
nucliadb_utils/cache/settings.py +2 -3
nucliadb_utils/const.py +1 -1
nucliadb_utils/debug.py +2 -2
nucliadb_utils/encryption/settings.py +1 -2
nucliadb_utils/fastapi/openapi.py +1 -2
nucliadb_utils/fastapi/versioning.py +10 -6
nucliadb_utils/featureflagging.py +10 -4
nucliadb_utils/grpc.py +3 -3
nucliadb_utils/helpers.py +1 -1
nucliadb_utils/nats.py +15 -16
nucliadb_utils/nuclia_usage/utils/kb_usage_report.py +4 -5
nucliadb_utils/run.py +1 -1
nucliadb_utils/settings.py +40 -41
nucliadb_utils/signals.py +3 -3
nucliadb_utils/storages/azure.py +34 -21
nucliadb_utils/storages/gcs.py +22 -21
nucliadb_utils/storages/local.py +8 -8
nucliadb_utils/storages/nuclia.py +1 -2
nucliadb_utils/storages/object_store.py +6 -6
nucliadb_utils/storages/s3.py +23 -23
nucliadb_utils/storages/settings.py +7 -8
nucliadb_utils/storages/storage.py +29 -45
nucliadb_utils/storages/utils.py +2 -3
nucliadb_utils/store.py +2 -2
nucliadb_utils/tests/asyncbenchmark.py +8 -10
nucliadb_utils/tests/azure.py +2 -1
nucliadb_utils/tests/fixtures.py +3 -2
nucliadb_utils/tests/gcs.py +3 -2
nucliadb_utils/tests/local.py +2 -1
nucliadb_utils/tests/nats.py +1 -1
nucliadb_utils/tests/s3.py +2 -1
nucliadb_utils/transaction.py +16 -18
nucliadb_utils/utilities.py +22 -24
{nucliadb_utils-6.9.1.post5229.dist-info → nucliadb_utils-6.10.0.post5732.dist-info}/METADATA +6 -6
nucliadb_utils-6.10.0.post5732.dist-info/RECORD +59 -0
nucliadb_utils-6.9.1.post5229.dist-info/RECORD +0 -59
{nucliadb_utils-6.9.1.post5229.dist-info → nucliadb_utils-6.10.0.post5732.dist-info}/WHEEL +0 -0
{nucliadb_utils-6.9.1.post5229.dist-info → nucliadb_utils-6.10.0.post5732.dist-info}/top_level.txt +0 -0

nucliadb_utils/storages/gcs.py CHANGED Viewed

@@ -23,10 +23,11 @@ import asyncio
 import base64
 import json
 import socket
+from collections.abc import AsyncGenerator, AsyncIterator
 from concurrent.futures import ThreadPoolExecutor
 from copy import deepcopy
 from datetime import datetime
-from typing import Any, AsyncGenerator, AsyncIterator, Dict, List, Optional, cast
+from typing import Any, cast
 from urllib.parse import quote_plus
 import aiohttp
@@ -153,7 +154,7 @@ class GCSStorageField(StorageField):
                 assert data["resource"]["name"] == destination_uri
     @storage_ops_observer.wrap({"type": "iter_data"})
-    async def iter_data(self, range: Optional[Range] = None) -> AsyncGenerator[bytes, None]:
+    async def iter_data(self, range: Range | None = None) -> AsyncGenerator[bytes]:
         attempt = 1
         while True:
             try:
@@ -170,13 +171,13 @@ class GCSStorageField(StorageField):
                     raise
                 wait_time = 2 ** (attempt - 1)
                 logger.warning(
-                    f"Error downloading from GCP. Retrying ({attempt} of {MAX_TRIES}) after {wait_time} seconds. Error: {ex}"  # noqa
+                    f"Error downloading from GCP. Retrying ({attempt} of {MAX_TRIES}) after {wait_time} seconds. Error: {ex}"
                 )
                 await asyncio.sleep(wait_time)
                 attempt += 1
     @storage_ops_observer.wrap({"type": "inner_iter_data"})
-    async def _inner_iter_data(self, range: Optional[Range] = None):
+    async def _inner_iter_data(self, range: Range | None = None):
         """
         Iterate through object data.
         """
@@ -322,7 +323,7 @@ class GCSStorageField(StorageField):
         async with self.storage.session.put(
             self.field.resumable_uri, headers=headers, data=data
         ) as call:
-            text = await call.text()  # noqa
+            text = await call.text()
             if call.status not in [200, 201, 308]:
                 if call.status == 410:
                     raise ResumableUploadGone(text)
@@ -377,7 +378,7 @@ class GCSStorageField(StorageField):
         max_tries=MAX_TRIES,
     )
     @storage_ops_observer.wrap({"type": "exists"})
-    async def exists(self) -> Optional[ObjectMetadata]:
+    async def exists(self) -> ObjectMetadata | None:
         """
         Existence can be checked either with a CloudFile data in the field attribute
         or own StorageField key and bucket. Field takes precendece
@@ -425,23 +426,23 @@ class GCSStorageField(StorageField):
 class GCSStorage(Storage):
     field_klass = GCSStorageField
-    _session: Optional[aiohttp.ClientSession] = None
+    _session: aiohttp.ClientSession | None = None
     _credentials = None
     _json_credentials = None
     chunk_size = CHUNK_SIZE
     def __init__(
         self,
-        account_credentials: Optional[str] = None,
-        bucket: Optional[str] = None,
-        location: Optional[str] = None,
-        project: Optional[str] = None,
-        executor: Optional[ThreadPoolExecutor] = None,
-        deadletter_bucket: Optional[str] = None,
-        indexing_bucket: Optional[str] = None,
-        labels: Optional[Dict[str, str]] = None,
+        account_credentials: str | None = None,
+        bucket: str | None = None,
+        location: str | None = None,
+        project: str | None = None,
+        executor: ThreadPoolExecutor | None = None,
+        deadletter_bucket: str | None = None,
+        indexing_bucket: str | None = None,
+        labels: dict[str, str] | None = None,
         url: str = "https://www.googleapis.com",
-        scopes: Optional[List[str]] = None,
+        scopes: list[str] | None = None,
         anonymous: bool = False,
     ):
         if anonymous:
@@ -533,7 +534,7 @@ class GCSStorage(Storage):
     @storage_ops_observer.wrap({"type": "delete"})
     async def delete_upload(self, uri: str, bucket_name: str):
         if uri:
-            url = "{}/{}/o/{}".format(self.object_base_url, bucket_name, quote_plus(uri))
+            url = f"{self.object_base_url}/{bucket_name}/o/{quote_plus(uri)}"
             headers = await self.get_access_headers()
             async with self.session.delete(url, headers=headers) as resp:
                 if resp.status in (200, 204, 404):
@@ -569,7 +570,7 @@ class GCSStorage(Storage):
         max_tries=MAX_TRIES,
     )
     @storage_ops_observer.wrap({"type": "create_bucket"})
-    async def create_bucket(self, bucket_name: str, kbid: Optional[str] = None):
+    async def create_bucket(self, bucket_name: str, kbid: str | None = None):
         if await self.check_exists(bucket_name=bucket_name):
             return
@@ -671,9 +672,9 @@ class GCSStorage(Storage):
         return deleted, conflict
     async def iterate_objects(
-        self, bucket: str, prefix: str, start: Optional[str] = None
-    ) -> AsyncGenerator[ObjectInfo, None]:
-        url = "{}/{}/o".format(self.object_base_url, bucket)
+        self, bucket: str, prefix: str, start: str | None = None
+    ) -> AsyncGenerator[ObjectInfo]:
+        url = f"{self.object_base_url}/{bucket}/o"
         headers = await self.get_access_headers()
         params = {"prefix": prefix}
         if start:

nucliadb_utils/storages/local.py CHANGED Viewed

@@ -23,8 +23,8 @@ import glob
 import json
 import os
 import shutil
+from collections.abc import AsyncGenerator, AsyncIterator
 from datetime import datetime
-from typing import AsyncGenerator, AsyncIterator, Optional
 import aiofiles
@@ -38,7 +38,7 @@ class LocalStorageField(StorageField):
     storage: LocalStorage
     _handler = None
-    def metadata_key(self, uri: Optional[str] = None):
+    def metadata_key(self, uri: str | None = None):
         if uri is None and self.field is not None:
             return f"{self.field.uri}.metadata"
         elif uri is None and self.key is not None:
@@ -73,7 +73,7 @@ class LocalStorageField(StorageField):
         destination_path = f"{destination_bucket_path}/{destination_uri}"
         shutil.copy(origin_path, destination_path)
-    async def iter_data(self, range: Optional[Range] = None) -> AsyncGenerator[bytes, None]:
+    async def iter_data(self, range: Range | None = None) -> AsyncGenerator[bytes]:
         range = range or Range()
         key = self.field.uri if self.field else self.key
         if self.field is None:
@@ -191,7 +191,7 @@ class LocalStorageField(StorageField):
         self.field.ClearField("offset")
         self.field.ClearField("upload_uri")
-    async def exists(self) -> Optional[ObjectMetadata]:
+    async def exists(self) -> ObjectMetadata | None:
         file_path = self.storage.get_file_path(self.bucket, self.key)
         metadata_path = self.metadata_key(file_path)
         if os.path.exists(metadata_path):
@@ -218,7 +218,7 @@ class LocalStorage(Storage):
     field_klass = LocalStorageField
     chunk_size = CHUNK_SIZE
-    def __init__(self, local_testing_files: str, indexing_bucket: Optional[str] = None):
+    def __init__(self, local_testing_files: str, indexing_bucket: str | None = None):
         self.local_testing_files = local_testing_files.rstrip("/")
         self.bucket_format = "ndb_{kbid}"
         self.source = CloudFile.LOCAL
@@ -281,8 +281,8 @@ class LocalStorage(Storage):
         return deleted
     async def iterate_objects(
-        self, bucket: str, prefix: str, start: Optional[str] = None
-    ) -> AsyncGenerator[ObjectInfo, None]:
+        self, bucket: str, prefix: str, start: str | None = None
+    ) -> AsyncGenerator[ObjectInfo]:
         bucket_path = self.get_bucket_path(bucket)
         pathname = f"{self.get_file_path(bucket, prefix)}**/*"
         for key in sorted(glob.glob(pathname, recursive=True)):
@@ -296,7 +296,7 @@ class LocalStorage(Storage):
                 continue
             yield ObjectInfo(name=name)
-    async def download(self, bucket: str, key: str, range: Optional[Range] = None):
+    async def download(self, bucket: str, key: str, range: Range | None = None):
         key_path = self.get_file_path(bucket, key)
         if not os.path.exists(key_path):
             return

nucliadb_utils/storages/nuclia.py CHANGED Viewed

@@ -17,7 +17,6 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
-from typing import Optional
 import aiohttp
@@ -32,7 +31,7 @@ class NucliaStorage:
         self,
         nuclia_public_url: str,
         nuclia_zone: str,
-        service_account: Optional[str] = None,
+        service_account: str | None = None,
     ):
         self.service_account = service_account
         self.nuclia_public_url = nuclia_public_url.format(zone=nuclia_zone)

nucliadb_utils/storages/object_store.py CHANGED Viewed

@@ -19,7 +19,7 @@
 #
 import abc
-from typing import AsyncGenerator, AsyncIterator, Optional, Union
+from collections.abc import AsyncGenerator, AsyncIterator
 from nucliadb_utils.storages.utils import ObjectInfo, ObjectMetadata, Range
@@ -37,7 +37,7 @@ class ObjectStore(abc.ABC, metaclass=abc.ABCMeta):
     async def finalize(self) -> None: ...
     @abc.abstractmethod
-    async def bucket_create(self, bucket: str, labels: Optional[dict[str, str]] = None) -> bool:
+    async def bucket_create(self, bucket: str, labels: dict[str, str] | None = None) -> bool:
         """
         Create a new bucket in the object storage. Labels the bucket with the given labels if provided.
         Returns True if the bucket was created, False if it already existed.
@@ -93,7 +93,7 @@ class ObjectStore(abc.ABC, metaclass=abc.ABCMeta):
         self,
         bucket: str,
         key: str,
-        data: Union[bytes, AsyncGenerator[bytes, None]],
+        data: bytes | AsyncGenerator[bytes, None],
         metadata: ObjectMetadata,
     ) -> None: ...
@@ -114,14 +114,14 @@ class ObjectStore(abc.ABC, metaclass=abc.ABCMeta):
     @abc.abstractmethod
     async def download_stream(
-        self, bucket: str, key: str, range: Optional[Range] = None
+        self, bucket: str, key: str, range: Range | None = None
     ) -> AsyncGenerator[bytes, None]:
         raise NotImplementedError()
         yield b""
     @abc.abstractmethod
     async def iterate(
-        self, bucket: str, prefix: str, start: Optional[str] = None
+        self, bucket: str, prefix: str, start: str | None = None
     ) -> AsyncGenerator[ObjectInfo, None]:
         raise NotImplementedError()
         yield ObjectInfo(name="")
@@ -132,7 +132,7 @@ class ObjectStore(abc.ABC, metaclass=abc.ABCMeta):
     @abc.abstractmethod
     async def upload_multipart_start(
         self, bucket: str, key: str, metadata: ObjectMetadata
-    ) -> Optional[str]:
+    ) -> str | None:
         """
         Start a multipart upload. May return the url for the resumable upload.
         """

nucliadb_utils/storages/s3.py CHANGED Viewed

@@ -20,9 +20,9 @@
 from __future__ import annotations
 import base64
+from collections.abc import AsyncGenerator, AsyncIterator
 from contextlib import AsyncExitStack
 from datetime import datetime
-from typing import AsyncGenerator, AsyncIterator, Optional
 import aiobotocore  # type: ignore
 import aiohttp
@@ -55,12 +55,12 @@ RETRIABLE_EXCEPTIONS = (
 POLICY_DELETE = {
     "Rules": [
         {
-            "Expiration": {"Days": 1},
+            "Expiration": {"Days": 7},
             "ID": "FullDelete",
             "Filter": {"Prefix": ""},
             "Status": "Enabled",
-            "NoncurrentVersionExpiration": {"NoncurrentDays": 1},
-            "AbortIncompleteMultipartUpload": {"DaysAfterInitiation": 1},
+            "NoncurrentVersionExpiration": {"NoncurrentDays": 7},
+            "AbortIncompleteMultipartUpload": {"DaysAfterInitiation": 7},
         },
         {
             "Expiration": {"ExpiredObjectDeleteMarker": True},
@@ -86,7 +86,7 @@ class S3StorageField(StorageField):
         self,
         uri,
         bucket,
-        range: Optional[Range] = None,
+        range: Range | None = None,
     ):
         range = range or Range()
         if range.any():
@@ -103,7 +103,7 @@ class S3StorageField(StorageField):
                 raise
     @s3_ops_observer.wrap({"type": "iter_data"})
-    async def iter_data(self, range: Optional[Range] = None) -> AsyncGenerator[bytes, None]:
+    async def iter_data(self, range: Range | None = None) -> AsyncGenerator[bytes]:
         # Suports field and key based iter
         uri = self.field.uri if self.field else self.key
         if self.field is None:
@@ -179,7 +179,7 @@ class S3StorageField(StorageField):
             Bucket=bucket_name,
             Key=upload_id,
             Metadata={
-                "FILENAME": cf.filename,
+                "base64_filename": base64.b64encode(cf.filename.encode()).decode(),
                 "SIZE": str(cf.size),
                 "CONTENT_TYPE": cf.content_type,
             },
@@ -277,7 +277,7 @@ class S3StorageField(StorageField):
         )
     @s3_ops_observer.wrap({"type": "exists"})
-    async def exists(self) -> Optional[ObjectMetadata]:
+    async def exists(self) -> ObjectMetadata | None:
         """
         Existence can be checked either with a CloudFile data in the field attribute
         or own StorageField key and bucket. Field takes precendece
@@ -348,18 +348,18 @@ class S3Storage(Storage):
     def __init__(
         self,
-        aws_client_id: Optional[str] = None,
-        aws_client_secret: Optional[str] = None,
-        deadletter_bucket: Optional[str] = None,
-        indexing_bucket: Optional[str] = None,
-        endpoint_url: Optional[str] = None,
+        aws_client_id: str | None = None,
+        aws_client_secret: str | None = None,
+        deadletter_bucket: str | None = None,
+        indexing_bucket: str | None = None,
+        endpoint_url: str | None = None,
         verify_ssl: bool = True,
         use_ssl: bool = True,
-        region_name: Optional[str] = None,
-        kms_key_id: Optional[str] = None,
+        region_name: str | None = None,
+        kms_key_id: str | None = None,
         max_pool_connections: int = 30,
-        bucket: Optional[str] = None,
-        bucket_tags: Optional[dict[str, str]] = None,
+        bucket: str | None = None,
+        bucket_tags: dict[str, str] | None = None,
     ):
         self.source = CloudFile.S3
         self.deadletter_bucket = deadletter_bucket
@@ -394,7 +394,7 @@ class S3Storage(Storage):
             self._session = get_session()
         return self._session
-    async def initialize(self: "S3Storage") -> None:
+    async def initialize(self: S3Storage) -> None:
         session = AioSession()
         self._s3aioclient: AioBaseClient = await self._exit_stack.enter_async_context(
             session.create_client("s3", **self.opts)
@@ -425,8 +425,8 @@ class S3Storage(Storage):
             raise AttributeError("No valid uri")
     async def iterate_objects(
-        self, bucket: str, prefix: str = "/", start: Optional[str] = None
-    ) -> AsyncGenerator[ObjectInfo, None]:
+        self, bucket: str, prefix: str = "/", start: str | None = None
+    ) -> AsyncGenerator[ObjectInfo]:
         paginator = self._s3aioclient.get_paginator("list_objects")
         async for result in paginator.paginate(
             Bucket=bucket, Prefix=prefix, PaginationConfig={"StartingToken": start}
@@ -531,9 +531,9 @@ async def bucket_exists(client: AioSession, bucket_name: str) -> bool:
 async def create_bucket(
     client: AioSession,
     bucket_name: str,
-    bucket_tags: Optional[dict[str, str]] = None,
-    region_name: Optional[str] = None,
-    kms_key_id: Optional[str] = None,
+    bucket_tags: dict[str, str] | None = None,
+    region_name: str | None = None,
+    kms_key_id: str | None = None,
 ):
     bucket_creation_options = {}
     if region_name is not None:

nucliadb_utils/storages/settings.py CHANGED Viewed

@@ -18,23 +18,22 @@
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 import os
-from typing import Dict, Optional
 from pydantic_settings import BaseSettings
 class Settings(BaseSettings):
-    gcs_deadletter_bucket: Optional[str] = None
-    gcs_indexing_bucket: Optional[str] = None
+    gcs_deadletter_bucket: str | None = None
+    gcs_indexing_bucket: str | None = None
     gcs_threads: int = 3
-    gcs_labels: Dict[str, str] = {}
+    gcs_labels: dict[str, str] = {}
-    s3_deadletter_bucket: Optional[str] = None
-    s3_indexing_bucket: Optional[str] = None
+    s3_deadletter_bucket: str | None = None
+    s3_indexing_bucket: str | None = None
-    azure_deadletter_bucket: Optional[str] = None
-    azure_indexing_bucket: Optional[str] = None
+    azure_deadletter_bucket: str | None = None
+    azure_indexing_bucket: str | None = None
     local_testing_files: str = os.path.dirname(__file__)

nucliadb_utils/storages/storage.py CHANGED Viewed

@@ -24,15 +24,10 @@ import asyncio
 import base64
 import hashlib
 import uuid
+from collections.abc import AsyncGenerator, AsyncIterator
 from io import BytesIO
 from typing import (
     Any,
-    AsyncGenerator,
-    AsyncIterator,
-    List,
-    Optional,
-    Type,
-    Union,
     cast,
 )
@@ -65,14 +60,14 @@ class StorageField(abc.ABC, metaclass=abc.ABCMeta):
     storage: Storage
     bucket: str
     key: str
-    field: Optional[CloudFile] = None
+    field: CloudFile | None = None
     def __init__(
         self,
         storage: Storage,
         bucket: str,
         fullkey: str,
-        field: Optional[CloudFile] = None,
+        field: CloudFile | None = None,
     ):
         self.storage = storage
         self.bucket = bucket
@@ -83,7 +78,7 @@ class StorageField(abc.ABC, metaclass=abc.ABCMeta):
     async def upload(self, iterator: AsyncIterator, origin: CloudFile) -> CloudFile: ...
     @abc.abstractmethod
-    async def iter_data(self, range: Optional[Range] = None) -> AsyncGenerator[bytes, None]:
+    async def iter_data(self, range: Range | None = None) -> AsyncGenerator[bytes]:
         raise NotImplementedError()
         yield b""
@@ -95,7 +90,7 @@ class StorageField(abc.ABC, metaclass=abc.ABCMeta):
         return deleted
     @abc.abstractmethod
-    async def exists(self) -> Optional[ObjectMetadata]: ...
+    async def exists(self) -> ObjectMetadata | None: ...
     @abc.abstractmethod
     async def copy(
@@ -130,10 +125,9 @@ class StorageField(abc.ABC, metaclass=abc.ABCMeta):
 class Storage(abc.ABC, metaclass=abc.ABCMeta):
     source: int
-    field_klass: Type
-    deadletter_bucket: Optional[str] = None
-    indexing_bucket: Optional[str] = None
-    cached_buckets: List[str] = []
+    field_klass: type
+    deadletter_bucket: str | None = None
+    indexing_bucket: str | None = None
     chunk_size = CHUNK_SIZE
     async def delete_resource(self, kbid: str, uuid: str):
@@ -166,7 +160,7 @@ class Storage(abc.ABC, metaclass=abc.ABCMeta):
         await self.upload_object(self.deadletter_bucket, key, message.SerializeToString())
     def get_indexing_storage_key(
-        self, *, kb: str, logical_shard: str, resource_uid: str, txid: Union[int, str]
+        self, *, kb: str, logical_shard: str, resource_uid: str, txid: int | str
     ):
         return INDEXING_KEY.format(kb=kb, shard=logical_shard, resource=resource_uid, txid=txid)
@@ -174,7 +168,7 @@ class Storage(abc.ABC, metaclass=abc.ABCMeta):
         self,
         message: BrainResource,
         txid: int,
-        partition: Optional[str],
+        partition: str | None,
         kb: str,
         logical_shard: str,
     ) -> str:
@@ -197,7 +191,7 @@ class Storage(abc.ABC, metaclass=abc.ABCMeta):
         self,
         message: BrainResource,
         reindex_id: str,
-        partition: Optional[str],
+        partition: str | None,
         kb: str,
         logical_shard: str,
     ) -> str:
@@ -328,7 +322,7 @@ class Storage(abc.ABC, metaclass=abc.ABCMeta):
         kbid: str,
         uuid: str,
         field: str,
-        old_field: Optional[CloudFile] = None,
+        old_field: CloudFile | None = None,
     ) -> StorageField:
         # Its a file field value
         bucket = self.get_bucket_name(kbid)
@@ -360,7 +354,7 @@ class Storage(abc.ABC, metaclass=abc.ABCMeta):
         payload: bytes,
         filename: str,
         content_type: str,
-        md5: Optional[str] = None,
+        md5: str | None = None,
     ):
         decoded_payload = base64.b64decode(payload)
         cf = CloudFile()
@@ -370,7 +364,7 @@ class Storage(abc.ABC, metaclass=abc.ABCMeta):
         cf.source = self.source  # type: ignore
         if md5 is None:
-            md5hash = hashlib.md5(decoded_payload).digest()
+            md5hash = hashlib.md5(decoded_payload, usedforsecurity=False).digest()
             cf.md5 = md5hash.decode()
         else:
             cf.md5 = md5
@@ -436,20 +430,19 @@ class Storage(abc.ABC, metaclass=abc.ABCMeta):
         self,
         bucket: str,
         key: str,
-        range: Optional[Range] = None,
-    ):
+        range: Range | None = None,
+    ) -> AsyncGenerator[bytes]:
         destination: StorageField = self.field_klass(storage=self, bucket=bucket, fullkey=key)
         try:
             async for data in destination.iter_data(range=range):
                 yield data
         except KeyError:
-            yield None
+            pass
     async def downloadbytes(self, bucket: str, key: str) -> BytesIO:
         result = BytesIO()
         async for data in self.download(bucket, key):
-            if data is not None:
-                result.write(data)
+            result.write(data)
         result.seek(0)
         return result
@@ -461,29 +454,24 @@ class Storage(abc.ABC, metaclass=abc.ABCMeta):
         result.seek(0)
         return result
-    async def downloadbytescf_iterator(
-        self, cf: CloudFile
-    ) -> AsyncGenerator[bytes, None]:  # pragma: no cover
+    async def downloadbytescf_iterator(self, cf: CloudFile) -> AsyncGenerator[bytes]:  # pragma: no cover
         # this is covered by other tests
         if cf.source == self.source:
             async for data in self.download(cf.bucket_name, cf.uri):
-                if data is not None:
-                    yield data
+                yield data
         elif cf.source == CloudFile.FLAPS:
             flaps_storage = await get_nuclia_storage()
             async for data in flaps_storage.download(cf):
-                if data is not None:
-                    yield data
+                yield data
         elif cf.source == CloudFile.LOCAL:
             local_storage = get_local_storage()
             async for data in local_storage.download(cf.bucket_name, cf.uri):
-                if data is not None:
-                    yield data
+                yield data
     async def upload_pb(self, sf: StorageField, payload: Any):
         await self.upload_object(sf.bucket, sf.key, payload.SerializeToString())
-    async def download_pb(self, sf: StorageField, PBKlass: Type):
+    async def download_pb(self, sf: StorageField, PBKlass: type):
         payload = await self.downloadbytes(sf.bucket, sf.key)
         if payload.getbuffer().nbytes == 0:
@@ -517,8 +505,8 @@ class Storage(abc.ABC, metaclass=abc.ABCMeta):
     @abc.abstractmethod
     async def iterate_objects(
-        self, bucket: str, prefix: str, start: Optional[str] = None
-    ) -> AsyncGenerator[ObjectInfo, None]:
+        self, bucket: str, prefix: str, start: str | None = None
+    ) -> AsyncGenerator[ObjectInfo]:
         raise NotImplementedError()
         yield ObjectInfo(name="")
@@ -577,9 +565,7 @@ class Storage(abc.ABC, metaclass=abc.ABCMeta):
         ...
-async def iter_and_add_size(
-    stream: AsyncGenerator[bytes, None], cf: CloudFile
-) -> AsyncGenerator[bytes, None]:
+async def iter_and_add_size(stream: AsyncGenerator[bytes], cf: CloudFile) -> AsyncGenerator[bytes]:
     # This is needed because some storage types like GCS or S3 require
     # the size of the file at least at the request done for the last chunk.
     total_size = 0
@@ -590,9 +576,7 @@ async def iter_and_add_size(
         yield chunk
-async def iter_in_chunk_size(
-    iterator: AsyncGenerator[bytes, None], chunk_size: int
-) -> AsyncGenerator[bytes, None]:
+async def iter_in_chunk_size(iterator: AsyncGenerator[bytes], chunk_size: int) -> AsyncGenerator[bytes]:
     # This is needed to make sure bytes uploaded to the blob storage complies with a particular chunk size.
     buffer = b""
     async for chunk in iterator:
@@ -606,8 +590,8 @@ async def iter_in_chunk_size(
 async def iterate_storage_compatible(
-    iterator: AsyncGenerator[bytes, None], storage: Storage, cf: CloudFile
-) -> AsyncGenerator[bytes, None]:
+    iterator: AsyncGenerator[bytes], storage: Storage, cf: CloudFile
+) -> AsyncGenerator[bytes]:
     """
     Makes sure to add the size to the cloudfile and split the data in
     chunks that are compatible with the storage type of choice

nucliadb_utils/storages/utils.py CHANGED Viewed

@@ -19,7 +19,6 @@
 #
 from dataclasses import dataclass
-from typing import Optional
 from pydantic import BaseModel
@@ -41,8 +40,8 @@ class Range:
     The start and end values are 0-based.
     """
-    start: Optional[int] = None
-    end: Optional[int] = None
+    start: int | None = None
+    end: int | None = None
     def any(self) -> bool:
         return self.start is not None or self.end is not None

nucliadb_utils/store.py CHANGED Viewed

@@ -17,6 +17,6 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
-from typing import Any, Dict
+from typing import Any
-MAIN: Dict[str, Any] = {}
+MAIN: dict[str, Any] = {}

nucliadb-utils 6.9.1.post5229__py3-none-any.whl → 6.10.0.post5732__py3-none-any.whl

Potentially problematic release.

nucliadb-utils 6.9.1.post5229py3-none-any.whl → 6.10.0.post5732py3-none-any.whl