nucliadb-utils 6.3.1.post3526__py3-none-any.whl → 6.3.1.post3531__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nucliadb-utils might be problematic. Click here for more details.

@@ -211,8 +211,10 @@ class AzureStorage(Storage):
211
211
  bucket_name = self.get_bucket_name(kbid)
212
212
  return await self.object_store.bucket_delete(bucket_name)
213
213
 
214
- async def iterate_objects(self, bucket: str, prefix: str) -> AsyncGenerator[ObjectInfo, None]:
215
- async for obj in self.object_store.iterate(bucket, prefix):
214
+ async def iterate_objects(
215
+ self, bucket: str, prefix: str, start: Optional[str] = None
216
+ ) -> AsyncGenerator[ObjectInfo, None]:
217
+ async for obj in self.object_store.iterate(bucket, prefix, start):
216
218
  yield obj
217
219
 
218
220
  async def insert_object(self, bucket_name: str, key: str, data: bytes) -> None:
@@ -373,9 +375,13 @@ class AzureObjectStore(ObjectStore):
373
375
  async for chunk in downloader.chunks():
374
376
  yield chunk
375
377
 
376
- async def iterate(self, bucket: str, prefix: str) -> AsyncGenerator[ObjectInfo, None]:
378
+ async def iterate(
379
+ self, bucket: str, prefix: str, start: Optional[str] = None
380
+ ) -> AsyncGenerator[ObjectInfo, None]:
377
381
  container_client = self.service_client.get_container_client(bucket)
378
382
  async for blob in container_client.list_blobs(name_starts_with=prefix):
383
+ if start and blob.name <= start:
384
+ continue
379
385
  yield ObjectInfo(name=blob.name)
380
386
 
381
387
  async def get_metadata(self, bucket: str, key: str) -> ObjectMetadata:
@@ -695,20 +695,29 @@ class GCSStorage(Storage):
695
695
  errors.capture_message(msg, "error", scope)
696
696
  return deleted, conflict
697
697
 
698
- async def iterate_objects(self, bucket: str, prefix: str) -> AsyncGenerator[ObjectInfo, None]:
698
+ async def iterate_objects(
699
+ self, bucket: str, prefix: str, start: Optional[str] = None
700
+ ) -> AsyncGenerator[ObjectInfo, None]:
699
701
  if self.session is None:
700
702
  raise AttributeError()
701
703
  url = "{}/{}/o".format(self.object_base_url, bucket)
702
704
  headers = await self.get_access_headers()
705
+ params = {"prefix": prefix}
706
+ if start:
707
+ params["startOffset"] = start
703
708
  async with self.session.get(
704
709
  url,
705
710
  headers=headers,
706
- params={"prefix": prefix},
711
+ params=params,
707
712
  ) as resp:
708
713
  assert resp.status == 200
709
714
  data = await resp.json()
710
715
  if "items" in data:
711
716
  for item in data["items"]:
717
+ if start is not None and item["name"] == start:
718
+ # Skip the start item to be compatible with all
719
+ # storage implementations
720
+ continue
712
721
  yield ObjectInfo(name=item["name"])
713
722
 
714
723
  page_token = data.get("nextPageToken")
@@ -241,14 +241,17 @@ class LocalStorage(Storage):
241
241
 
242
242
  async def create_kb(self, kbid: str):
243
243
  bucket = self.get_bucket_name(kbid)
244
- path = self.get_bucket_path(bucket)
245
244
  try:
246
- os.makedirs(path, exist_ok=True)
245
+ await self.create_bucket(bucket)
247
246
  created = True
248
247
  except FileExistsError:
249
248
  created = False
250
249
  return created
251
250
 
251
+ async def create_bucket(self, bucket_name: str):
252
+ path = self.get_bucket_path(bucket_name)
253
+ os.makedirs(path, exist_ok=True)
254
+
252
255
  async def delete_kb(self, kbid: str) -> tuple[bool, bool]:
253
256
  bucket = self.get_bucket_name(kbid)
254
257
  path = self.get_bucket_path(bucket)
@@ -277,20 +280,27 @@ class LocalStorage(Storage):
277
280
  deleted = False
278
281
  return deleted
279
282
 
280
- async def iterate_objects(self, bucket: str, prefix: str) -> AsyncGenerator[ObjectInfo, None]:
281
- pathname = f"{self.get_file_path(bucket, prefix)}*"
282
- for key in glob.glob(pathname):
283
+ async def iterate_objects(
284
+ self, bucket: str, prefix: str, start: Optional[str] = None
285
+ ) -> AsyncGenerator[ObjectInfo, None]:
286
+ bucket_path = self.get_bucket_path(bucket)
287
+ pathname = f"{self.get_file_path(bucket, prefix)}**/*"
288
+ for key in sorted(glob.glob(pathname, recursive=True)):
289
+ if not os.path.isfile(key):
290
+ continue
283
291
  if key.endswith(".metadata"):
284
292
  # Skip metadata files -- they are internal to the local-storage implementation.
285
293
  continue
286
- name = key.split("/")[-1]
294
+ name = key.split(bucket_path)[-1].lstrip("/")
295
+ if start is not None and name <= start:
296
+ continue
287
297
  yield ObjectInfo(name=name)
288
298
 
289
- async def download(self, bucket_name: str, key: str, range: Optional[Range] = None):
290
- key_path = self.get_file_path(bucket_name, key)
299
+ async def download(self, bucket: str, key: str, range: Optional[Range] = None):
300
+ key_path = self.get_file_path(bucket, key)
291
301
  if not os.path.exists(key_path):
292
302
  return
293
- async for chunk in super().download(bucket_name, key, range=range):
303
+ async for chunk in super().download(bucket, key, range=range):
294
304
  yield chunk
295
305
 
296
306
  async def insert_object(self, bucket: str, key: str, data: bytes) -> None:
@@ -120,7 +120,9 @@ class ObjectStore(abc.ABC, metaclass=abc.ABCMeta):
120
120
  yield b""
121
121
 
122
122
  @abc.abstractmethod
123
- async def iterate(self, bucket: str, prefix: str) -> AsyncGenerator[ObjectInfo, None]:
123
+ async def iterate(
124
+ self, bucket: str, prefix: str, start: Optional[str] = None
125
+ ) -> AsyncGenerator[ObjectInfo, None]:
124
126
  raise NotImplementedError()
125
127
  yield ObjectInfo(name="")
126
128
 
@@ -422,10 +422,14 @@ class S3Storage(Storage):
422
422
  else:
423
423
  raise AttributeError("No valid uri")
424
424
 
425
- async def iterate_objects(self, bucket: str, prefix: str = "/") -> AsyncGenerator[ObjectInfo, None]:
425
+ async def iterate_objects(
426
+ self, bucket: str, prefix: str = "/", start: Optional[str] = None
427
+ ) -> AsyncGenerator[ObjectInfo, None]:
426
428
  paginator = self._s3aioclient.get_paginator("list_objects")
427
- async for result in paginator.paginate(Bucket=bucket, Prefix=prefix):
428
- for item in result.get("Contents", []):
429
+ async for result in paginator.paginate(
430
+ Bucket=bucket, Prefix=prefix, PaginationConfig={"StartingToken": start}
431
+ ):
432
+ for item in result.get("Contents") or []:
429
433
  yield ObjectInfo(name=item["Key"])
430
434
 
431
435
  async def create_kb(self, kbid: str):
@@ -502,7 +502,9 @@ class Storage(abc.ABC, metaclass=abc.ABCMeta):
502
502
  async def finalize(self) -> None: ...
503
503
 
504
504
  @abc.abstractmethod
505
- async def iterate_objects(self, bucket: str, prefix: str) -> AsyncGenerator[ObjectInfo, None]:
505
+ async def iterate_objects(
506
+ self, bucket: str, prefix: str, start: Optional[str] = None
507
+ ) -> AsyncGenerator[ObjectInfo, None]:
506
508
  raise NotImplementedError()
507
509
  yield ObjectInfo(name="")
508
510
 
@@ -553,6 +555,13 @@ class Storage(abc.ABC, metaclass=abc.ABCMeta):
553
555
  else:
554
556
  await self.insert_object(bucket, key, data)
555
557
 
558
+ @abc.abstractmethod
559
+ async def create_bucket(self, bucket_name: str) -> None:
560
+ """
561
+ Create a new bucket in the storage.
562
+ """
563
+ ...
564
+
556
565
 
557
566
  async def iter_and_add_size(
558
567
  stream: AsyncGenerator[bytes, None], cf: CloudFile
@@ -51,6 +51,10 @@ class WaitFor:
51
51
  self.seq = seq
52
52
 
53
53
 
54
+ class TransactionError(Exception):
55
+ pass
56
+
57
+
54
58
  class TransactionCommitTimeoutError(Exception):
55
59
  pass
56
60
 
@@ -80,7 +84,7 @@ class LocalTransactionUtility:
80
84
 
81
85
  resp = await ingest.ProcessMessage(iterator(writer)) # type: ignore
82
86
  if resp.status != OpStatusWriter.Status.OK:
83
- logger.error(f"Local transaction failed processing {writer}")
87
+ raise TransactionError(f"Local transaction failed processing {writer}")
84
88
  return 0
85
89
 
86
90
  async def finalize(self):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: nucliadb_utils
3
- Version: 6.3.1.post3526
3
+ Version: 6.3.1.post3531
4
4
  Summary: NucliaDB util library
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License: AGPL
@@ -27,8 +27,8 @@ Requires-Dist: nats-py[nkeys]>=2.6.0
27
27
  Requires-Dist: PyNaCl
28
28
  Requires-Dist: pyjwt>=2.4.0
29
29
  Requires-Dist: mrflagly>=0.2.9
30
- Requires-Dist: nucliadb-protos>=6.3.1.post3526
31
- Requires-Dist: nucliadb-telemetry>=6.3.1.post3526
30
+ Requires-Dist: nucliadb-protos>=6.3.1.post3531
31
+ Requires-Dist: nucliadb-telemetry>=6.3.1.post3531
32
32
  Provides-Extra: cache
33
33
  Requires-Dist: redis>=4.3.4; extra == "cache"
34
34
  Requires-Dist: orjson>=3.6.7; extra == "cache"
@@ -14,7 +14,7 @@ nucliadb_utils/run.py,sha256=Es0_Bu5Yc-LWczvwL6gzWqSwC85RjDCk-0oFQAJi9g4,1827
14
14
  nucliadb_utils/settings.py,sha256=RnGhEUvwv6faNqALiqDCivvzNOyyXVBflYh_37uNkow,8193
15
15
  nucliadb_utils/signals.py,sha256=lo_Mk12NIX5Au--3H3WObvDOXq_OMurql2qiC2TnAao,2676
16
16
  nucliadb_utils/store.py,sha256=kQ35HemE0v4_Qg6xVqNIJi8vSFAYQtwI3rDtMsNy62Y,890
17
- nucliadb_utils/transaction.py,sha256=YYnTpxCDs56lo0tS6ErABjk9WjDuieUc4f7r63Q_OP8,7864
17
+ nucliadb_utils/transaction.py,sha256=z_VeiTIta48rosS2SXMqx86XaavprSMRWf6s6zWIeEs,7920
18
18
  nucliadb_utils/utilities.py,sha256=Vc4zLpDf-FQh9bs0ZyAfGAjzlbbMTMWf3VWt2Ao5V3k,15379
19
19
  nucliadb_utils/aiopynecone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
20
20
  nucliadb_utils/aiopynecone/client.py,sha256=MPyHnDXwhukJr7U3CJh7BpsekfSuOkyM4g5b9LLtzc8,22941
@@ -39,15 +39,15 @@ nucliadb_utils/nuclia_usage/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn
39
39
  nucliadb_utils/nuclia_usage/utils/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
40
40
  nucliadb_utils/nuclia_usage/utils/kb_usage_report.py,sha256=6lLuxCCPQVn3dOuZNL5ThPjl2yws-1TJ_7duhQSWkPU,3934
41
41
  nucliadb_utils/storages/__init__.py,sha256=5Qc8AUWiJv9_JbGCBpAn88AIJhwDlm0OPQpg2ZdRL4U,872
42
- nucliadb_utils/storages/azure.py,sha256=FK4c_v9AUAwagScm_F1uDmJeQQq7P4jZswiD2trwb4A,16394
42
+ nucliadb_utils/storages/azure.py,sha256=b5WzTjwsyrFxkuD5GzOUSBw6Szm0OkNt8t2NGoMiTJI,16557
43
43
  nucliadb_utils/storages/exceptions.py,sha256=GOPKH-F3dPTfHEkwGNfVkSfF70eWJJXjI83yccw9WpA,2501
44
- nucliadb_utils/storages/gcs.py,sha256=5QODikIj26tfjz4KqhNgezPqqmqrG1yvUbarKAQ6sS0,28899
45
- nucliadb_utils/storages/local.py,sha256=JxlWNtu49JJ04dq6o7bBAqbpbeYpVyvvBM5jq1sGJ-4,11003
44
+ nucliadb_utils/storages/gcs.py,sha256=KR267zRvBBjT0ksxcfCCz0CmqPD8W-Kj3_lWZXWFg0Q,29249
45
+ nucliadb_utils/storages/local.py,sha256=USEYXavJfTK5bU71iLozVM9XjDdtPNte3uWJBuiWIOo,11371
46
46
  nucliadb_utils/storages/nuclia.py,sha256=vEv94xAT7QM2g80S25QyrOw2pzvP2BAX-ADgZLtuCVc,2097
47
- nucliadb_utils/storages/object_store.py,sha256=HtKjIKhErSBvuqx1SuCOnL0SkiHqgfyekNMP8o2piZU,4492
48
- nucliadb_utils/storages/s3.py,sha256=isw4v9L4tlwLklNotdF-l7mU4coshPzO5wj_tbuSNOc,20916
47
+ nucliadb_utils/storages/object_store.py,sha256=2PueRP5Q3XOuWgKhj6B9Kp2fyBql5np0T400YRUbqn4,4535
48
+ nucliadb_utils/storages/s3.py,sha256=4-cs_uFC4ZoLHLTZqvGi8v3nQcbrPBFJZuIeWltkFHI,21026
49
49
  nucliadb_utils/storages/settings.py,sha256=ugCPy1zxBOmA2KosT-4tsjpvP002kg5iQyi42yCGCJA,1285
50
- nucliadb_utils/storages/storage.py,sha256=1LSaZKQ4xSoBP85OVATh7zBZdBQ369Wl1uYBymZPfPw,21135
50
+ nucliadb_utils/storages/storage.py,sha256=MGmwWmGuZFff5lOvTY6ZgKrYJUvJojISwAM3K4qzFKM,21344
51
51
  nucliadb_utils/storages/utils.py,sha256=8g2rIwJeYIumQLOB47Yw1rx3twlhRB_cJxer65QfZmk,1479
52
52
  nucliadb_utils/tests/__init__.py,sha256=Oo9CAE7B0eW5VHn8sHd6o30SQzOWUhktLPRXdlDOleA,1456
53
53
  nucliadb_utils/tests/asyncbenchmark.py,sha256=vrX_x9ifCXi18PfNShc23w9x_VUiB_Ph-2nuolh9z3Q,10707
@@ -57,7 +57,7 @@ nucliadb_utils/tests/gcs.py,sha256=MBMzn_UHU5SU6iILuCsB5zU4umhNcaCw_MKrxZhwvOc,4
57
57
  nucliadb_utils/tests/local.py,sha256=cxIfPrKuqs5Ef0nbrVYQQAH2mwc4E0iD9bC2sWegS-c,1934
58
58
  nucliadb_utils/tests/nats.py,sha256=RWHjwqq5esuO7OFbP24yYX1cXnpPLcWJwDUdmwCpH28,1897
59
59
  nucliadb_utils/tests/s3.py,sha256=DACUh3HvgH3BchKFZ9R7RFUzsrg3v9A-cxTcXx4nmvA,3734
60
- nucliadb_utils-6.3.1.post3526.dist-info/METADATA,sha256=aZS3f5jsY6xY837mfcDJxkqUsUksTbzTzpld0e5IRdg,2209
61
- nucliadb_utils-6.3.1.post3526.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
62
- nucliadb_utils-6.3.1.post3526.dist-info/top_level.txt,sha256=fE3vJtALTfgh7bcAWcNhcfXkNPp_eVVpbKK-2IYua3E,15
63
- nucliadb_utils-6.3.1.post3526.dist-info/RECORD,,
60
+ nucliadb_utils-6.3.1.post3531.dist-info/METADATA,sha256=42hW8sy26Suv1ZQdpxtvzqRARgqMWm-pfADzOY9-Bvk,2209
61
+ nucliadb_utils-6.3.1.post3531.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
62
+ nucliadb_utils-6.3.1.post3531.dist-info/top_level.txt,sha256=fE3vJtALTfgh7bcAWcNhcfXkNPp_eVVpbKK-2IYua3E,15
63
+ nucliadb_utils-6.3.1.post3531.dist-info/RECORD,,