nucliadb-utils 6.9.1.post5229__py3-none-any.whl → 6.10.0.post5732__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nucliadb-utils might be problematic. Click here for more details.

Files changed (45) hide show
  1. nucliadb_utils/asyncio_utils.py +3 -3
  2. nucliadb_utils/audit/audit.py +41 -31
  3. nucliadb_utils/audit/basic.py +22 -23
  4. nucliadb_utils/audit/stream.py +31 -31
  5. nucliadb_utils/authentication.py +8 -10
  6. nucliadb_utils/cache/nats.py +10 -12
  7. nucliadb_utils/cache/pubsub.py +5 -4
  8. nucliadb_utils/cache/settings.py +2 -3
  9. nucliadb_utils/const.py +1 -1
  10. nucliadb_utils/debug.py +2 -2
  11. nucliadb_utils/encryption/settings.py +1 -2
  12. nucliadb_utils/fastapi/openapi.py +1 -2
  13. nucliadb_utils/fastapi/versioning.py +10 -6
  14. nucliadb_utils/featureflagging.py +10 -4
  15. nucliadb_utils/grpc.py +3 -3
  16. nucliadb_utils/helpers.py +1 -1
  17. nucliadb_utils/nats.py +15 -16
  18. nucliadb_utils/nuclia_usage/utils/kb_usage_report.py +4 -5
  19. nucliadb_utils/run.py +1 -1
  20. nucliadb_utils/settings.py +40 -41
  21. nucliadb_utils/signals.py +3 -3
  22. nucliadb_utils/storages/azure.py +34 -21
  23. nucliadb_utils/storages/gcs.py +22 -21
  24. nucliadb_utils/storages/local.py +8 -8
  25. nucliadb_utils/storages/nuclia.py +1 -2
  26. nucliadb_utils/storages/object_store.py +6 -6
  27. nucliadb_utils/storages/s3.py +23 -23
  28. nucliadb_utils/storages/settings.py +7 -8
  29. nucliadb_utils/storages/storage.py +29 -45
  30. nucliadb_utils/storages/utils.py +2 -3
  31. nucliadb_utils/store.py +2 -2
  32. nucliadb_utils/tests/asyncbenchmark.py +8 -10
  33. nucliadb_utils/tests/azure.py +2 -1
  34. nucliadb_utils/tests/fixtures.py +3 -2
  35. nucliadb_utils/tests/gcs.py +3 -2
  36. nucliadb_utils/tests/local.py +2 -1
  37. nucliadb_utils/tests/nats.py +1 -1
  38. nucliadb_utils/tests/s3.py +2 -1
  39. nucliadb_utils/transaction.py +16 -18
  40. nucliadb_utils/utilities.py +22 -24
  41. {nucliadb_utils-6.9.1.post5229.dist-info → nucliadb_utils-6.10.0.post5732.dist-info}/METADATA +6 -6
  42. nucliadb_utils-6.10.0.post5732.dist-info/RECORD +59 -0
  43. nucliadb_utils-6.9.1.post5229.dist-info/RECORD +0 -59
  44. {nucliadb_utils-6.9.1.post5229.dist-info → nucliadb_utils-6.10.0.post5732.dist-info}/WHEEL +0 -0
  45. {nucliadb_utils-6.9.1.post5229.dist-info → nucliadb_utils-6.10.0.post5732.dist-info}/top_level.txt +0 -0
@@ -23,10 +23,11 @@ import asyncio
23
23
  import base64
24
24
  import json
25
25
  import socket
26
+ from collections.abc import AsyncGenerator, AsyncIterator
26
27
  from concurrent.futures import ThreadPoolExecutor
27
28
  from copy import deepcopy
28
29
  from datetime import datetime
29
- from typing import Any, AsyncGenerator, AsyncIterator, Dict, List, Optional, cast
30
+ from typing import Any, cast
30
31
  from urllib.parse import quote_plus
31
32
 
32
33
  import aiohttp
@@ -153,7 +154,7 @@ class GCSStorageField(StorageField):
153
154
  assert data["resource"]["name"] == destination_uri
154
155
 
155
156
  @storage_ops_observer.wrap({"type": "iter_data"})
156
- async def iter_data(self, range: Optional[Range] = None) -> AsyncGenerator[bytes, None]:
157
+ async def iter_data(self, range: Range | None = None) -> AsyncGenerator[bytes]:
157
158
  attempt = 1
158
159
  while True:
159
160
  try:
@@ -170,13 +171,13 @@ class GCSStorageField(StorageField):
170
171
  raise
171
172
  wait_time = 2 ** (attempt - 1)
172
173
  logger.warning(
173
- f"Error downloading from GCP. Retrying ({attempt} of {MAX_TRIES}) after {wait_time} seconds. Error: {ex}" # noqa
174
+ f"Error downloading from GCP. Retrying ({attempt} of {MAX_TRIES}) after {wait_time} seconds. Error: {ex}"
174
175
  )
175
176
  await asyncio.sleep(wait_time)
176
177
  attempt += 1
177
178
 
178
179
  @storage_ops_observer.wrap({"type": "inner_iter_data"})
179
- async def _inner_iter_data(self, range: Optional[Range] = None):
180
+ async def _inner_iter_data(self, range: Range | None = None):
180
181
  """
181
182
  Iterate through object data.
182
183
  """
@@ -322,7 +323,7 @@ class GCSStorageField(StorageField):
322
323
  async with self.storage.session.put(
323
324
  self.field.resumable_uri, headers=headers, data=data
324
325
  ) as call:
325
- text = await call.text() # noqa
326
+ text = await call.text()
326
327
  if call.status not in [200, 201, 308]:
327
328
  if call.status == 410:
328
329
  raise ResumableUploadGone(text)
@@ -377,7 +378,7 @@ class GCSStorageField(StorageField):
377
378
  max_tries=MAX_TRIES,
378
379
  )
379
380
  @storage_ops_observer.wrap({"type": "exists"})
380
- async def exists(self) -> Optional[ObjectMetadata]:
381
+ async def exists(self) -> ObjectMetadata | None:
381
382
  """
382
383
  Existence can be checked either with a CloudFile data in the field attribute
383
384
  or own StorageField key and bucket. Field takes precendece
@@ -425,23 +426,23 @@ class GCSStorageField(StorageField):
425
426
 
426
427
  class GCSStorage(Storage):
427
428
  field_klass = GCSStorageField
428
- _session: Optional[aiohttp.ClientSession] = None
429
+ _session: aiohttp.ClientSession | None = None
429
430
  _credentials = None
430
431
  _json_credentials = None
431
432
  chunk_size = CHUNK_SIZE
432
433
 
433
434
  def __init__(
434
435
  self,
435
- account_credentials: Optional[str] = None,
436
- bucket: Optional[str] = None,
437
- location: Optional[str] = None,
438
- project: Optional[str] = None,
439
- executor: Optional[ThreadPoolExecutor] = None,
440
- deadletter_bucket: Optional[str] = None,
441
- indexing_bucket: Optional[str] = None,
442
- labels: Optional[Dict[str, str]] = None,
436
+ account_credentials: str | None = None,
437
+ bucket: str | None = None,
438
+ location: str | None = None,
439
+ project: str | None = None,
440
+ executor: ThreadPoolExecutor | None = None,
441
+ deadletter_bucket: str | None = None,
442
+ indexing_bucket: str | None = None,
443
+ labels: dict[str, str] | None = None,
443
444
  url: str = "https://www.googleapis.com",
444
- scopes: Optional[List[str]] = None,
445
+ scopes: list[str] | None = None,
445
446
  anonymous: bool = False,
446
447
  ):
447
448
  if anonymous:
@@ -533,7 +534,7 @@ class GCSStorage(Storage):
533
534
  @storage_ops_observer.wrap({"type": "delete"})
534
535
  async def delete_upload(self, uri: str, bucket_name: str):
535
536
  if uri:
536
- url = "{}/{}/o/{}".format(self.object_base_url, bucket_name, quote_plus(uri))
537
+ url = f"{self.object_base_url}/{bucket_name}/o/{quote_plus(uri)}"
537
538
  headers = await self.get_access_headers()
538
539
  async with self.session.delete(url, headers=headers) as resp:
539
540
  if resp.status in (200, 204, 404):
@@ -569,7 +570,7 @@ class GCSStorage(Storage):
569
570
  max_tries=MAX_TRIES,
570
571
  )
571
572
  @storage_ops_observer.wrap({"type": "create_bucket"})
572
- async def create_bucket(self, bucket_name: str, kbid: Optional[str] = None):
573
+ async def create_bucket(self, bucket_name: str, kbid: str | None = None):
573
574
  if await self.check_exists(bucket_name=bucket_name):
574
575
  return
575
576
 
@@ -671,9 +672,9 @@ class GCSStorage(Storage):
671
672
  return deleted, conflict
672
673
 
673
674
  async def iterate_objects(
674
- self, bucket: str, prefix: str, start: Optional[str] = None
675
- ) -> AsyncGenerator[ObjectInfo, None]:
676
- url = "{}/{}/o".format(self.object_base_url, bucket)
675
+ self, bucket: str, prefix: str, start: str | None = None
676
+ ) -> AsyncGenerator[ObjectInfo]:
677
+ url = f"{self.object_base_url}/{bucket}/o"
677
678
  headers = await self.get_access_headers()
678
679
  params = {"prefix": prefix}
679
680
  if start:
@@ -23,8 +23,8 @@ import glob
23
23
  import json
24
24
  import os
25
25
  import shutil
26
+ from collections.abc import AsyncGenerator, AsyncIterator
26
27
  from datetime import datetime
27
- from typing import AsyncGenerator, AsyncIterator, Optional
28
28
 
29
29
  import aiofiles
30
30
 
@@ -38,7 +38,7 @@ class LocalStorageField(StorageField):
38
38
  storage: LocalStorage
39
39
  _handler = None
40
40
 
41
- def metadata_key(self, uri: Optional[str] = None):
41
+ def metadata_key(self, uri: str | None = None):
42
42
  if uri is None and self.field is not None:
43
43
  return f"{self.field.uri}.metadata"
44
44
  elif uri is None and self.key is not None:
@@ -73,7 +73,7 @@ class LocalStorageField(StorageField):
73
73
  destination_path = f"{destination_bucket_path}/{destination_uri}"
74
74
  shutil.copy(origin_path, destination_path)
75
75
 
76
- async def iter_data(self, range: Optional[Range] = None) -> AsyncGenerator[bytes, None]:
76
+ async def iter_data(self, range: Range | None = None) -> AsyncGenerator[bytes]:
77
77
  range = range or Range()
78
78
  key = self.field.uri if self.field else self.key
79
79
  if self.field is None:
@@ -191,7 +191,7 @@ class LocalStorageField(StorageField):
191
191
  self.field.ClearField("offset")
192
192
  self.field.ClearField("upload_uri")
193
193
 
194
- async def exists(self) -> Optional[ObjectMetadata]:
194
+ async def exists(self) -> ObjectMetadata | None:
195
195
  file_path = self.storage.get_file_path(self.bucket, self.key)
196
196
  metadata_path = self.metadata_key(file_path)
197
197
  if os.path.exists(metadata_path):
@@ -218,7 +218,7 @@ class LocalStorage(Storage):
218
218
  field_klass = LocalStorageField
219
219
  chunk_size = CHUNK_SIZE
220
220
 
221
- def __init__(self, local_testing_files: str, indexing_bucket: Optional[str] = None):
221
+ def __init__(self, local_testing_files: str, indexing_bucket: str | None = None):
222
222
  self.local_testing_files = local_testing_files.rstrip("/")
223
223
  self.bucket_format = "ndb_{kbid}"
224
224
  self.source = CloudFile.LOCAL
@@ -281,8 +281,8 @@ class LocalStorage(Storage):
281
281
  return deleted
282
282
 
283
283
  async def iterate_objects(
284
- self, bucket: str, prefix: str, start: Optional[str] = None
285
- ) -> AsyncGenerator[ObjectInfo, None]:
284
+ self, bucket: str, prefix: str, start: str | None = None
285
+ ) -> AsyncGenerator[ObjectInfo]:
286
286
  bucket_path = self.get_bucket_path(bucket)
287
287
  pathname = f"{self.get_file_path(bucket, prefix)}**/*"
288
288
  for key in sorted(glob.glob(pathname, recursive=True)):
@@ -296,7 +296,7 @@ class LocalStorage(Storage):
296
296
  continue
297
297
  yield ObjectInfo(name=name)
298
298
 
299
- async def download(self, bucket: str, key: str, range: Optional[Range] = None):
299
+ async def download(self, bucket: str, key: str, range: Range | None = None):
300
300
  key_path = self.get_file_path(bucket, key)
301
301
  if not os.path.exists(key_path):
302
302
  return
@@ -17,7 +17,6 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from typing import Optional
21
20
 
22
21
  import aiohttp
23
22
 
@@ -32,7 +31,7 @@ class NucliaStorage:
32
31
  self,
33
32
  nuclia_public_url: str,
34
33
  nuclia_zone: str,
35
- service_account: Optional[str] = None,
34
+ service_account: str | None = None,
36
35
  ):
37
36
  self.service_account = service_account
38
37
  self.nuclia_public_url = nuclia_public_url.format(zone=nuclia_zone)
@@ -19,7 +19,7 @@
19
19
  #
20
20
 
21
21
  import abc
22
- from typing import AsyncGenerator, AsyncIterator, Optional, Union
22
+ from collections.abc import AsyncGenerator, AsyncIterator
23
23
 
24
24
  from nucliadb_utils.storages.utils import ObjectInfo, ObjectMetadata, Range
25
25
 
@@ -37,7 +37,7 @@ class ObjectStore(abc.ABC, metaclass=abc.ABCMeta):
37
37
  async def finalize(self) -> None: ...
38
38
 
39
39
  @abc.abstractmethod
40
- async def bucket_create(self, bucket: str, labels: Optional[dict[str, str]] = None) -> bool:
40
+ async def bucket_create(self, bucket: str, labels: dict[str, str] | None = None) -> bool:
41
41
  """
42
42
  Create a new bucket in the object storage. Labels the bucket with the given labels if provided.
43
43
  Returns True if the bucket was created, False if it already existed.
@@ -93,7 +93,7 @@ class ObjectStore(abc.ABC, metaclass=abc.ABCMeta):
93
93
  self,
94
94
  bucket: str,
95
95
  key: str,
96
- data: Union[bytes, AsyncGenerator[bytes, None]],
96
+ data: bytes | AsyncGenerator[bytes, None],
97
97
  metadata: ObjectMetadata,
98
98
  ) -> None: ...
99
99
 
@@ -114,14 +114,14 @@ class ObjectStore(abc.ABC, metaclass=abc.ABCMeta):
114
114
 
115
115
  @abc.abstractmethod
116
116
  async def download_stream(
117
- self, bucket: str, key: str, range: Optional[Range] = None
117
+ self, bucket: str, key: str, range: Range | None = None
118
118
  ) -> AsyncGenerator[bytes, None]:
119
119
  raise NotImplementedError()
120
120
  yield b""
121
121
 
122
122
  @abc.abstractmethod
123
123
  async def iterate(
124
- self, bucket: str, prefix: str, start: Optional[str] = None
124
+ self, bucket: str, prefix: str, start: str | None = None
125
125
  ) -> AsyncGenerator[ObjectInfo, None]:
126
126
  raise NotImplementedError()
127
127
  yield ObjectInfo(name="")
@@ -132,7 +132,7 @@ class ObjectStore(abc.ABC, metaclass=abc.ABCMeta):
132
132
  @abc.abstractmethod
133
133
  async def upload_multipart_start(
134
134
  self, bucket: str, key: str, metadata: ObjectMetadata
135
- ) -> Optional[str]:
135
+ ) -> str | None:
136
136
  """
137
137
  Start a multipart upload. May return the url for the resumable upload.
138
138
  """
@@ -20,9 +20,9 @@
20
20
  from __future__ import annotations
21
21
 
22
22
  import base64
23
+ from collections.abc import AsyncGenerator, AsyncIterator
23
24
  from contextlib import AsyncExitStack
24
25
  from datetime import datetime
25
- from typing import AsyncGenerator, AsyncIterator, Optional
26
26
 
27
27
  import aiobotocore # type: ignore
28
28
  import aiohttp
@@ -55,12 +55,12 @@ RETRIABLE_EXCEPTIONS = (
55
55
  POLICY_DELETE = {
56
56
  "Rules": [
57
57
  {
58
- "Expiration": {"Days": 1},
58
+ "Expiration": {"Days": 7},
59
59
  "ID": "FullDelete",
60
60
  "Filter": {"Prefix": ""},
61
61
  "Status": "Enabled",
62
- "NoncurrentVersionExpiration": {"NoncurrentDays": 1},
63
- "AbortIncompleteMultipartUpload": {"DaysAfterInitiation": 1},
62
+ "NoncurrentVersionExpiration": {"NoncurrentDays": 7},
63
+ "AbortIncompleteMultipartUpload": {"DaysAfterInitiation": 7},
64
64
  },
65
65
  {
66
66
  "Expiration": {"ExpiredObjectDeleteMarker": True},
@@ -86,7 +86,7 @@ class S3StorageField(StorageField):
86
86
  self,
87
87
  uri,
88
88
  bucket,
89
- range: Optional[Range] = None,
89
+ range: Range | None = None,
90
90
  ):
91
91
  range = range or Range()
92
92
  if range.any():
@@ -103,7 +103,7 @@ class S3StorageField(StorageField):
103
103
  raise
104
104
 
105
105
  @s3_ops_observer.wrap({"type": "iter_data"})
106
- async def iter_data(self, range: Optional[Range] = None) -> AsyncGenerator[bytes, None]:
106
+ async def iter_data(self, range: Range | None = None) -> AsyncGenerator[bytes]:
107
107
  # Suports field and key based iter
108
108
  uri = self.field.uri if self.field else self.key
109
109
  if self.field is None:
@@ -179,7 +179,7 @@ class S3StorageField(StorageField):
179
179
  Bucket=bucket_name,
180
180
  Key=upload_id,
181
181
  Metadata={
182
- "FILENAME": cf.filename,
182
+ "base64_filename": base64.b64encode(cf.filename.encode()).decode(),
183
183
  "SIZE": str(cf.size),
184
184
  "CONTENT_TYPE": cf.content_type,
185
185
  },
@@ -277,7 +277,7 @@ class S3StorageField(StorageField):
277
277
  )
278
278
 
279
279
  @s3_ops_observer.wrap({"type": "exists"})
280
- async def exists(self) -> Optional[ObjectMetadata]:
280
+ async def exists(self) -> ObjectMetadata | None:
281
281
  """
282
282
  Existence can be checked either with a CloudFile data in the field attribute
283
283
  or own StorageField key and bucket. Field takes precendece
@@ -348,18 +348,18 @@ class S3Storage(Storage):
348
348
 
349
349
  def __init__(
350
350
  self,
351
- aws_client_id: Optional[str] = None,
352
- aws_client_secret: Optional[str] = None,
353
- deadletter_bucket: Optional[str] = None,
354
- indexing_bucket: Optional[str] = None,
355
- endpoint_url: Optional[str] = None,
351
+ aws_client_id: str | None = None,
352
+ aws_client_secret: str | None = None,
353
+ deadletter_bucket: str | None = None,
354
+ indexing_bucket: str | None = None,
355
+ endpoint_url: str | None = None,
356
356
  verify_ssl: bool = True,
357
357
  use_ssl: bool = True,
358
- region_name: Optional[str] = None,
359
- kms_key_id: Optional[str] = None,
358
+ region_name: str | None = None,
359
+ kms_key_id: str | None = None,
360
360
  max_pool_connections: int = 30,
361
- bucket: Optional[str] = None,
362
- bucket_tags: Optional[dict[str, str]] = None,
361
+ bucket: str | None = None,
362
+ bucket_tags: dict[str, str] | None = None,
363
363
  ):
364
364
  self.source = CloudFile.S3
365
365
  self.deadletter_bucket = deadletter_bucket
@@ -394,7 +394,7 @@ class S3Storage(Storage):
394
394
  self._session = get_session()
395
395
  return self._session
396
396
 
397
- async def initialize(self: "S3Storage") -> None:
397
+ async def initialize(self: S3Storage) -> None:
398
398
  session = AioSession()
399
399
  self._s3aioclient: AioBaseClient = await self._exit_stack.enter_async_context(
400
400
  session.create_client("s3", **self.opts)
@@ -425,8 +425,8 @@ class S3Storage(Storage):
425
425
  raise AttributeError("No valid uri")
426
426
 
427
427
  async def iterate_objects(
428
- self, bucket: str, prefix: str = "/", start: Optional[str] = None
429
- ) -> AsyncGenerator[ObjectInfo, None]:
428
+ self, bucket: str, prefix: str = "/", start: str | None = None
429
+ ) -> AsyncGenerator[ObjectInfo]:
430
430
  paginator = self._s3aioclient.get_paginator("list_objects")
431
431
  async for result in paginator.paginate(
432
432
  Bucket=bucket, Prefix=prefix, PaginationConfig={"StartingToken": start}
@@ -531,9 +531,9 @@ async def bucket_exists(client: AioSession, bucket_name: str) -> bool:
531
531
  async def create_bucket(
532
532
  client: AioSession,
533
533
  bucket_name: str,
534
- bucket_tags: Optional[dict[str, str]] = None,
535
- region_name: Optional[str] = None,
536
- kms_key_id: Optional[str] = None,
534
+ bucket_tags: dict[str, str] | None = None,
535
+ region_name: str | None = None,
536
+ kms_key_id: str | None = None,
537
537
  ):
538
538
  bucket_creation_options = {}
539
539
  if region_name is not None:
@@ -18,23 +18,22 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
 
20
20
  import os
21
- from typing import Dict, Optional
22
21
 
23
22
  from pydantic_settings import BaseSettings
24
23
 
25
24
 
26
25
  class Settings(BaseSettings):
27
- gcs_deadletter_bucket: Optional[str] = None
28
- gcs_indexing_bucket: Optional[str] = None
26
+ gcs_deadletter_bucket: str | None = None
27
+ gcs_indexing_bucket: str | None = None
29
28
 
30
29
  gcs_threads: int = 3
31
- gcs_labels: Dict[str, str] = {}
30
+ gcs_labels: dict[str, str] = {}
32
31
 
33
- s3_deadletter_bucket: Optional[str] = None
34
- s3_indexing_bucket: Optional[str] = None
32
+ s3_deadletter_bucket: str | None = None
33
+ s3_indexing_bucket: str | None = None
35
34
 
36
- azure_deadletter_bucket: Optional[str] = None
37
- azure_indexing_bucket: Optional[str] = None
35
+ azure_deadletter_bucket: str | None = None
36
+ azure_indexing_bucket: str | None = None
38
37
 
39
38
  local_testing_files: str = os.path.dirname(__file__)
40
39
 
@@ -24,15 +24,10 @@ import asyncio
24
24
  import base64
25
25
  import hashlib
26
26
  import uuid
27
+ from collections.abc import AsyncGenerator, AsyncIterator
27
28
  from io import BytesIO
28
29
  from typing import (
29
30
  Any,
30
- AsyncGenerator,
31
- AsyncIterator,
32
- List,
33
- Optional,
34
- Type,
35
- Union,
36
31
  cast,
37
32
  )
38
33
 
@@ -65,14 +60,14 @@ class StorageField(abc.ABC, metaclass=abc.ABCMeta):
65
60
  storage: Storage
66
61
  bucket: str
67
62
  key: str
68
- field: Optional[CloudFile] = None
63
+ field: CloudFile | None = None
69
64
 
70
65
  def __init__(
71
66
  self,
72
67
  storage: Storage,
73
68
  bucket: str,
74
69
  fullkey: str,
75
- field: Optional[CloudFile] = None,
70
+ field: CloudFile | None = None,
76
71
  ):
77
72
  self.storage = storage
78
73
  self.bucket = bucket
@@ -83,7 +78,7 @@ class StorageField(abc.ABC, metaclass=abc.ABCMeta):
83
78
  async def upload(self, iterator: AsyncIterator, origin: CloudFile) -> CloudFile: ...
84
79
 
85
80
  @abc.abstractmethod
86
- async def iter_data(self, range: Optional[Range] = None) -> AsyncGenerator[bytes, None]:
81
+ async def iter_data(self, range: Range | None = None) -> AsyncGenerator[bytes]:
87
82
  raise NotImplementedError()
88
83
  yield b""
89
84
 
@@ -95,7 +90,7 @@ class StorageField(abc.ABC, metaclass=abc.ABCMeta):
95
90
  return deleted
96
91
 
97
92
  @abc.abstractmethod
98
- async def exists(self) -> Optional[ObjectMetadata]: ...
93
+ async def exists(self) -> ObjectMetadata | None: ...
99
94
 
100
95
  @abc.abstractmethod
101
96
  async def copy(
@@ -130,10 +125,9 @@ class StorageField(abc.ABC, metaclass=abc.ABCMeta):
130
125
 
131
126
  class Storage(abc.ABC, metaclass=abc.ABCMeta):
132
127
  source: int
133
- field_klass: Type
134
- deadletter_bucket: Optional[str] = None
135
- indexing_bucket: Optional[str] = None
136
- cached_buckets: List[str] = []
128
+ field_klass: type
129
+ deadletter_bucket: str | None = None
130
+ indexing_bucket: str | None = None
137
131
  chunk_size = CHUNK_SIZE
138
132
 
139
133
  async def delete_resource(self, kbid: str, uuid: str):
@@ -166,7 +160,7 @@ class Storage(abc.ABC, metaclass=abc.ABCMeta):
166
160
  await self.upload_object(self.deadletter_bucket, key, message.SerializeToString())
167
161
 
168
162
  def get_indexing_storage_key(
169
- self, *, kb: str, logical_shard: str, resource_uid: str, txid: Union[int, str]
163
+ self, *, kb: str, logical_shard: str, resource_uid: str, txid: int | str
170
164
  ):
171
165
  return INDEXING_KEY.format(kb=kb, shard=logical_shard, resource=resource_uid, txid=txid)
172
166
 
@@ -174,7 +168,7 @@ class Storage(abc.ABC, metaclass=abc.ABCMeta):
174
168
  self,
175
169
  message: BrainResource,
176
170
  txid: int,
177
- partition: Optional[str],
171
+ partition: str | None,
178
172
  kb: str,
179
173
  logical_shard: str,
180
174
  ) -> str:
@@ -197,7 +191,7 @@ class Storage(abc.ABC, metaclass=abc.ABCMeta):
197
191
  self,
198
192
  message: BrainResource,
199
193
  reindex_id: str,
200
- partition: Optional[str],
194
+ partition: str | None,
201
195
  kb: str,
202
196
  logical_shard: str,
203
197
  ) -> str:
@@ -328,7 +322,7 @@ class Storage(abc.ABC, metaclass=abc.ABCMeta):
328
322
  kbid: str,
329
323
  uuid: str,
330
324
  field: str,
331
- old_field: Optional[CloudFile] = None,
325
+ old_field: CloudFile | None = None,
332
326
  ) -> StorageField:
333
327
  # Its a file field value
334
328
  bucket = self.get_bucket_name(kbid)
@@ -360,7 +354,7 @@ class Storage(abc.ABC, metaclass=abc.ABCMeta):
360
354
  payload: bytes,
361
355
  filename: str,
362
356
  content_type: str,
363
- md5: Optional[str] = None,
357
+ md5: str | None = None,
364
358
  ):
365
359
  decoded_payload = base64.b64decode(payload)
366
360
  cf = CloudFile()
@@ -370,7 +364,7 @@ class Storage(abc.ABC, metaclass=abc.ABCMeta):
370
364
  cf.source = self.source # type: ignore
371
365
 
372
366
  if md5 is None:
373
- md5hash = hashlib.md5(decoded_payload).digest()
367
+ md5hash = hashlib.md5(decoded_payload, usedforsecurity=False).digest()
374
368
  cf.md5 = md5hash.decode()
375
369
  else:
376
370
  cf.md5 = md5
@@ -436,20 +430,19 @@ class Storage(abc.ABC, metaclass=abc.ABCMeta):
436
430
  self,
437
431
  bucket: str,
438
432
  key: str,
439
- range: Optional[Range] = None,
440
- ):
433
+ range: Range | None = None,
434
+ ) -> AsyncGenerator[bytes]:
441
435
  destination: StorageField = self.field_klass(storage=self, bucket=bucket, fullkey=key)
442
436
  try:
443
437
  async for data in destination.iter_data(range=range):
444
438
  yield data
445
439
  except KeyError:
446
- yield None
440
+ pass
447
441
 
448
442
  async def downloadbytes(self, bucket: str, key: str) -> BytesIO:
449
443
  result = BytesIO()
450
444
  async for data in self.download(bucket, key):
451
- if data is not None:
452
- result.write(data)
445
+ result.write(data)
453
446
 
454
447
  result.seek(0)
455
448
  return result
@@ -461,29 +454,24 @@ class Storage(abc.ABC, metaclass=abc.ABCMeta):
461
454
  result.seek(0)
462
455
  return result
463
456
 
464
- async def downloadbytescf_iterator(
465
- self, cf: CloudFile
466
- ) -> AsyncGenerator[bytes, None]: # pragma: no cover
457
+ async def downloadbytescf_iterator(self, cf: CloudFile) -> AsyncGenerator[bytes]: # pragma: no cover
467
458
  # this is covered by other tests
468
459
  if cf.source == self.source:
469
460
  async for data in self.download(cf.bucket_name, cf.uri):
470
- if data is not None:
471
- yield data
461
+ yield data
472
462
  elif cf.source == CloudFile.FLAPS:
473
463
  flaps_storage = await get_nuclia_storage()
474
464
  async for data in flaps_storage.download(cf):
475
- if data is not None:
476
- yield data
465
+ yield data
477
466
  elif cf.source == CloudFile.LOCAL:
478
467
  local_storage = get_local_storage()
479
468
  async for data in local_storage.download(cf.bucket_name, cf.uri):
480
- if data is not None:
481
- yield data
469
+ yield data
482
470
 
483
471
  async def upload_pb(self, sf: StorageField, payload: Any):
484
472
  await self.upload_object(sf.bucket, sf.key, payload.SerializeToString())
485
473
 
486
- async def download_pb(self, sf: StorageField, PBKlass: Type):
474
+ async def download_pb(self, sf: StorageField, PBKlass: type):
487
475
  payload = await self.downloadbytes(sf.bucket, sf.key)
488
476
 
489
477
  if payload.getbuffer().nbytes == 0:
@@ -517,8 +505,8 @@ class Storage(abc.ABC, metaclass=abc.ABCMeta):
517
505
 
518
506
  @abc.abstractmethod
519
507
  async def iterate_objects(
520
- self, bucket: str, prefix: str, start: Optional[str] = None
521
- ) -> AsyncGenerator[ObjectInfo, None]:
508
+ self, bucket: str, prefix: str, start: str | None = None
509
+ ) -> AsyncGenerator[ObjectInfo]:
522
510
  raise NotImplementedError()
523
511
  yield ObjectInfo(name="")
524
512
 
@@ -577,9 +565,7 @@ class Storage(abc.ABC, metaclass=abc.ABCMeta):
577
565
  ...
578
566
 
579
567
 
580
- async def iter_and_add_size(
581
- stream: AsyncGenerator[bytes, None], cf: CloudFile
582
- ) -> AsyncGenerator[bytes, None]:
568
+ async def iter_and_add_size(stream: AsyncGenerator[bytes], cf: CloudFile) -> AsyncGenerator[bytes]:
583
569
  # This is needed because some storage types like GCS or S3 require
584
570
  # the size of the file at least at the request done for the last chunk.
585
571
  total_size = 0
@@ -590,9 +576,7 @@ async def iter_and_add_size(
590
576
  yield chunk
591
577
 
592
578
 
593
- async def iter_in_chunk_size(
594
- iterator: AsyncGenerator[bytes, None], chunk_size: int
595
- ) -> AsyncGenerator[bytes, None]:
579
+ async def iter_in_chunk_size(iterator: AsyncGenerator[bytes], chunk_size: int) -> AsyncGenerator[bytes]:
596
580
  # This is needed to make sure bytes uploaded to the blob storage complies with a particular chunk size.
597
581
  buffer = b""
598
582
  async for chunk in iterator:
@@ -606,8 +590,8 @@ async def iter_in_chunk_size(
606
590
 
607
591
 
608
592
  async def iterate_storage_compatible(
609
- iterator: AsyncGenerator[bytes, None], storage: Storage, cf: CloudFile
610
- ) -> AsyncGenerator[bytes, None]:
593
+ iterator: AsyncGenerator[bytes], storage: Storage, cf: CloudFile
594
+ ) -> AsyncGenerator[bytes]:
611
595
  """
612
596
  Makes sure to add the size to the cloudfile and split the data in
613
597
  chunks that are compatible with the storage type of choice
@@ -19,7 +19,6 @@
19
19
  #
20
20
 
21
21
  from dataclasses import dataclass
22
- from typing import Optional
23
22
 
24
23
  from pydantic import BaseModel
25
24
 
@@ -41,8 +40,8 @@ class Range:
41
40
  The start and end values are 0-based.
42
41
  """
43
42
 
44
- start: Optional[int] = None
45
- end: Optional[int] = None
43
+ start: int | None = None
44
+ end: int | None = None
46
45
 
47
46
  def any(self) -> bool:
48
47
  return self.start is not None or self.end is not None
nucliadb_utils/store.py CHANGED
@@ -17,6 +17,6 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
 
20
- from typing import Any, Dict
20
+ from typing import Any
21
21
 
22
- MAIN: Dict[str, Any] = {}
22
+ MAIN: dict[str, Any] = {}