nucliadb-utils 4.0.0.post544__py3-none-any.whl → 4.0.0.post546__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,7 +26,7 @@ import socket
26
26
  from concurrent.futures import ThreadPoolExecutor
27
27
  from copy import deepcopy
28
28
  from datetime import datetime
29
- from typing import Any, AsyncIterator, Dict, List, Optional
29
+ from typing import Any, AsyncGenerator, AsyncIterator, Dict, List, Optional
30
30
  from urllib.parse import quote_plus
31
31
 
32
32
  import aiohttp
@@ -221,7 +221,7 @@ class GCSStorageField(StorageField):
221
221
  break
222
222
 
223
223
  @storage_ops_observer.wrap({"type": "read_range"})
224
- async def read_range(self, start: int, end: int) -> AsyncIterator[bytes]:
224
+ async def read_range(self, start: int, end: int) -> AsyncGenerator[bytes, None]:
225
225
  """
226
226
  Iterate through ranges of data
227
227
  """
@@ -24,7 +24,7 @@ import json
24
24
  import os
25
25
  import shutil
26
26
  from datetime import datetime
27
- from typing import Any, AsyncIterator, Dict, Optional
27
+ from typing import Any, AsyncGenerator, AsyncIterator, Dict, Optional
28
28
 
29
29
  import aiofiles
30
30
  from nucliadb_protos.resources_pb2 import CloudFile
@@ -87,7 +87,7 @@ class LocalStorageField(StorageField):
87
87
  break
88
88
  yield data
89
89
 
90
- async def read_range(self, start: int, end: int) -> AsyncIterator[bytes]:
90
+ async def read_range(self, start: int, end: int) -> AsyncGenerator[bytes, None]:
91
91
  """
92
92
  Iterate through ranges of data
93
93
  """
@@ -22,7 +22,7 @@ from __future__ import annotations
22
22
  import asyncio
23
23
  import logging
24
24
  import uuid
25
- from typing import Any, AsyncIterator, Optional, TypedDict
25
+ from typing import Any, AsyncGenerator, AsyncIterator, Optional, TypedDict
26
26
 
27
27
  import asyncpg
28
28
  from nucliadb_protos.resources_pb2 import CloudFile
@@ -427,7 +427,7 @@ class PostgresStorageField(StorageField):
427
427
  async for chunk in dl.iterate_chunks(bucket, key):
428
428
  yield chunk["data"]
429
429
 
430
- async def read_range(self, start: int, end: int) -> AsyncIterator[bytes]:
430
+ async def read_range(self, start: int, end: int) -> AsyncGenerator[bytes, None]:
431
431
  """
432
432
  Iterate through ranges of data
433
433
  """
@@ -21,12 +21,13 @@ from __future__ import annotations
21
21
 
22
22
  from contextlib import AsyncExitStack
23
23
  from datetime import datetime
24
- from typing import Any, AsyncIterator, Optional
24
+ from typing import Any, AsyncGenerator, AsyncIterator, Optional
25
25
 
26
26
  import aiobotocore # type: ignore
27
27
  import aiohttp
28
28
  import backoff # type: ignore
29
29
  import botocore # type: ignore
30
+ from aiobotocore.client import AioBaseClient # type: ignore
30
31
  from aiobotocore.session import AioSession, get_session # type: ignore
31
32
  from nucliadb_protos.resources_pb2 import CloudFile
32
33
 
@@ -111,7 +112,7 @@ class S3StorageField(StorageField):
111
112
  yield data
112
113
  data = await stream.read(CHUNK_SIZE)
113
114
 
114
- async def read_range(self, start: int, end: int) -> AsyncIterator[bytes]:
115
+ async def read_range(self, start: int, end: int) -> AsyncGenerator[bytes, None]:
115
116
  """
116
117
  Iterate through ranges of data
117
118
  """
@@ -319,6 +320,18 @@ class S3StorageField(StorageField):
319
320
  Key=destination_uri,
320
321
  )
321
322
 
323
+ async def move(
324
+ self,
325
+ origin_uri: str,
326
+ destination_uri: str,
327
+ origin_bucket_name: str,
328
+ destination_bucket_name: str,
329
+ ):
330
+ await self.copy(
331
+ origin_uri, destination_uri, origin_bucket_name, destination_bucket_name
332
+ )
333
+ await self.storage.delete_upload(origin_uri, origin_bucket_name)
334
+
322
335
  async def upload(self, iterator: AsyncIterator, origin: CloudFile) -> CloudFile:
323
336
  self.field = await self.start(origin)
324
337
  await self.append(origin, iterator)
@@ -384,7 +397,7 @@ class S3Storage(Storage):
384
397
 
385
398
  async def initialize(self):
386
399
  session = AioSession()
387
- self._s3aioclient = await self._exit_stack.enter_async_context(
400
+ self._s3aioclient: AioBaseClient = await self._exit_stack.enter_async_context(
388
401
  session.create_client("s3", **self.opts)
389
402
  )
390
403
  for bucket in (self.deadletter_bucket, self.indexing_bucket):
@@ -60,7 +60,7 @@ INDEXING_KEY = "index/{kb}/{shard}/{resource}/{txid}"
60
60
  MESSAGE_KEY = "message/{kbid}/{rid}/{mid}"
61
61
 
62
62
 
63
- class StorageField:
63
+ class StorageField(abc.ABC, metaclass=abc.ABCMeta):
64
64
  storage: Storage
65
65
  bucket: str
66
66
  key: str
@@ -78,18 +78,18 @@ class StorageField:
78
78
  self.key = fullkey
79
79
  self.field = field
80
80
 
81
- async def upload(self, iterator: AsyncIterator, origin: CloudFile) -> CloudFile:
82
- raise NotImplementedError()
81
+ @abc.abstractmethod
82
+ async def upload(self, iterator: AsyncIterator, origin: CloudFile) -> CloudFile: ...
83
83
 
84
- async def iter_data(self, headers=None):
84
+ @abc.abstractmethod
85
+ async def iter_data(self, headers=None) -> AsyncGenerator[bytes, None]: # type: ignore
85
86
  raise NotImplementedError()
87
+ yield b""
86
88
 
87
- async def read_range(self, start: int, end: int) -> AsyncIterator[bytes]:
88
- """
89
- Iterate through ranges of data
90
- """
89
+ @abc.abstractmethod
90
+ async def read_range(self, start: int, end: int) -> AsyncGenerator[bytes, None]:
91
91
  raise NotImplementedError()
92
- yield b"" # pragma: no cover
92
+ yield b""
93
93
 
94
94
  async def delete(self) -> bool:
95
95
  deleted = False
@@ -98,38 +98,38 @@ class StorageField:
98
98
  deleted = True
99
99
  return deleted
100
100
 
101
- async def exists(self) -> Optional[Dict[str, str]]:
102
- raise NotImplementedError
101
+ @abc.abstractmethod
102
+ async def exists(self) -> Optional[Dict[str, str]]: ...
103
103
 
104
+ @abc.abstractmethod
104
105
  async def copy(
105
106
  self,
106
107
  origin_uri: str,
107
108
  destination_uri: str,
108
109
  origin_bucket_name: str,
109
110
  destination_bucket_name: str,
110
- ):
111
- raise NotImplementedError()
111
+ ): ...
112
112
 
113
+ @abc.abstractmethod
113
114
  async def move(
114
115
  self,
115
116
  origin_uri: str,
116
117
  destination_uri: str,
117
118
  origin_bucket_name: str,
118
119
  destination_bucket_name: str,
119
- ):
120
- raise NotImplementedError()
120
+ ): ...
121
121
 
122
- async def start(self, cf: CloudFile) -> CloudFile:
123
- raise NotImplementedError()
122
+ @abc.abstractmethod
123
+ async def start(self, cf: CloudFile) -> CloudFile: ...
124
124
 
125
- async def append(self, cf: CloudFile, iterable: AsyncIterator) -> int:
126
- raise NotImplementedError()
125
+ @abc.abstractmethod
126
+ async def append(self, cf: CloudFile, iterable: AsyncIterator) -> int: ...
127
127
 
128
- async def finish(self):
129
- raise NotImplementedError()
128
+ @abc.abstractmethod
129
+ async def finish(self): ...
130
130
 
131
131
 
132
- class Storage:
132
+ class Storage(abc.ABC, metaclass=abc.ABCMeta):
133
133
  source: int
134
134
  field_klass: Type
135
135
  deadletter_bucket: Optional[str] = None
@@ -498,40 +498,39 @@ class Storage:
498
498
  pb.ParseFromString(payload.read())
499
499
  return pb
500
500
 
501
- async def delete_upload(self, uri: str, bucket_name: str):
502
- raise NotImplementedError()
501
+ @abc.abstractmethod
502
+ async def delete_upload(self, uri: str, bucket_name: str): ...
503
503
 
504
- def get_bucket_name(self, kbid: str):
505
- raise NotImplementedError()
504
+ @abc.abstractmethod
505
+ def get_bucket_name(self, kbid: str) -> str: ...
506
506
 
507
- async def initialize(self):
508
- raise NotImplementedError()
507
+ @abc.abstractmethod
508
+ async def initialize(self) -> None: ...
509
509
 
510
- async def finalize(self):
511
- raise NotImplementedError()
510
+ @abc.abstractmethod
511
+ async def finalize(self) -> None: ...
512
512
 
513
513
  @abc.abstractmethod
514
- def iterate_bucket(self, bucket: str, prefix: str) -> AsyncIterator[Any]:
515
- raise NotImplementedError()
514
+ def iterate_bucket(self, bucket: str, prefix: str) -> AsyncIterator[Any]: ...
516
515
 
517
- async def copy(self, file: CloudFile, destination: StorageField):
516
+ async def copy(self, file: CloudFile, destination: StorageField) -> None:
518
517
  await destination.copy(
519
518
  file.uri, destination.key, file.bucket_name, destination.bucket
520
519
  )
521
520
 
522
- async def move(self, file: CloudFile, destination: StorageField):
521
+ async def move(self, file: CloudFile, destination: StorageField) -> None:
523
522
  await destination.move(
524
523
  file.uri, destination.key, file.bucket_name, destination.bucket
525
524
  )
526
525
 
527
- async def create_kb(self, kbid: str) -> bool:
528
- raise NotImplementedError()
526
+ @abc.abstractmethod
527
+ async def create_kb(self, kbid: str) -> bool: ...
529
528
 
530
- async def delete_kb(self, kbid: str) -> Tuple[bool, bool]:
531
- raise NotImplementedError()
529
+ @abc.abstractmethod
530
+ async def delete_kb(self, kbid: str) -> Tuple[bool, bool]: ...
532
531
 
533
- async def schedule_delete_kb(self, kbid: str) -> bool:
534
- raise NotImplementedError()
532
+ @abc.abstractmethod
533
+ async def schedule_delete_kb(self, kbid: str) -> bool: ...
535
534
 
536
535
  async def set_stream_message(self, kbid: str, rid: str, data: bytes) -> str:
537
536
  key = MESSAGE_KEY.format(kbid=kbid, rid=rid, mid=uuid.uuid4())
@@ -26,6 +26,7 @@ from nucliadb_protos.noderesources_pb2 import ResourceID
26
26
  from nucliadb_protos.nodewriter_pb2 import IndexMessage
27
27
  from nucliadb_protos.resources_pb2 import CloudFile
28
28
 
29
+ from nucliadb_utils.storages.local import LocalStorageField
29
30
  from nucliadb_utils.storages.storage import (
30
31
  Storage,
31
32
  StorageField,
@@ -45,7 +46,7 @@ class TestStorageField:
45
46
 
46
47
  @pytest.fixture
47
48
  def storage_field(self, storage, field):
48
- yield StorageField(storage, "bucket", "fullkey", field)
49
+ yield LocalStorageField(storage, "bucket", "fullkey", field)
49
50
 
50
51
  @pytest.mark.asyncio
51
52
  async def test_delete(self, storage_field: StorageField, storage):
@@ -73,6 +74,24 @@ class StorageTest(Storage):
73
74
  br = BrainResource(labels=["label"])
74
75
  yield br.SerializeToString()
75
76
 
77
+ async def create_kb(self, kbid):
78
+ return True
79
+
80
+ async def delete_kb(self, kbid):
81
+ return True
82
+
83
+ async def delete_upload(self, uri, bucket):
84
+ return True
85
+
86
+ async def initialize(self) -> None:
87
+ pass
88
+
89
+ async def finalize(self) -> None:
90
+ pass
91
+
92
+ async def schedule_delete_kb(self, kbid: str) -> bool:
93
+ return True
94
+
76
95
 
77
96
  class TestStorage:
78
97
  @pytest.fixture
@@ -136,7 +155,7 @@ class TestStorage:
136
155
  async def test_download_pb(self, storage: StorageTest):
137
156
  assert isinstance(
138
157
  await storage.download_pb(
139
- StorageField(storage, "bucket", "fullkey"), BrainResource
158
+ LocalStorageField(storage, "bucket", "fullkey"), BrainResource
140
159
  ),
141
160
  BrainResource,
142
161
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nucliadb-utils
3
- Version: 4.0.0.post544
3
+ Version: 4.0.0.post546
4
4
  Home-page: https://nuclia.com
5
5
  License: BSD
6
6
  Classifier: Development Status :: 4 - Beta
@@ -22,8 +22,8 @@ Requires-Dist: PyNaCl
22
22
  Requires-Dist: pyjwt >=2.4.0
23
23
  Requires-Dist: memorylru >=1.1.2
24
24
  Requires-Dist: mrflagly
25
- Requires-Dist: nucliadb-protos >=4.0.0.post544
26
- Requires-Dist: nucliadb-telemetry >=4.0.0.post544
25
+ Requires-Dist: nucliadb-protos >=4.0.0.post546
26
+ Requires-Dist: nucliadb-telemetry >=4.0.0.post546
27
27
  Provides-Extra: cache
28
28
  Requires-Dist: redis >=4.3.4 ; extra == 'cache'
29
29
  Requires-Dist: orjson >=3.6.7 ; extra == 'cache'
@@ -38,13 +38,13 @@ nucliadb_utils/nuclia_usage/utils/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZ
38
38
  nucliadb_utils/nuclia_usage/utils/kb_usage_report.py,sha256=E1eUSFXBVNzQP9Q2rWj9y3koCO5S7iKwckny_AoLKuk,3870
39
39
  nucliadb_utils/storages/__init__.py,sha256=5Qc8AUWiJv9_JbGCBpAn88AIJhwDlm0OPQpg2ZdRL4U,872
40
40
  nucliadb_utils/storages/exceptions.py,sha256=n6aBOyurWMo8mXd1XY6Psgno4VfXJ9TRbxCy67c08-g,2417
41
- nucliadb_utils/storages/gcs.py,sha256=-M-abojobXGwZaS1kuFXRBxwashsAaTXMStjx3QpX4U,27076
42
- nucliadb_utils/storages/local.py,sha256=SZrBkxnGg8DoMHlT4GaBP5LqMwi6QAe-d_eLU2MJ0RA,10074
41
+ nucliadb_utils/storages/gcs.py,sha256=3A4eqJe6PoF3oE7c9JSCpRtbcv6dtg6Pd28u4cXKwyE,27099
42
+ nucliadb_utils/storages/local.py,sha256=jgUn3AwaQWn5VBKAvikd97sLAS3jx9PaUE7pwq1iJrk,10097
43
43
  nucliadb_utils/storages/nuclia.py,sha256=UfvRu92eqG1v-PE-UWH2x8KEJFqDqATMmUGFmEuqSSs,2097
44
- nucliadb_utils/storages/pg.py,sha256=vxx8FQIToA_C6CO1uu9W080yZl0SgUVtTEWd6lQhPJc,18610
45
- nucliadb_utils/storages/s3.py,sha256=EYfn2CjoVL0tPFUqMaHgJYHjrSQaIUzLeNYoRa1qp-A,18809
44
+ nucliadb_utils/storages/pg.py,sha256=yFk6AVgZHPgQq6NwLN_qN7fwD05WgCU5XE7gsFt-B0Q,18633
45
+ nucliadb_utils/storages/s3.py,sha256=8IZoDlTeICZtU1Z0eouaxvafSR6y4GqCtLjFUBxTd1E,19269
46
46
  nucliadb_utils/storages/settings.py,sha256=ugCPy1zxBOmA2KosT-4tsjpvP002kg5iQyi42yCGCJA,1285
47
- nucliadb_utils/storages/storage.py,sha256=igV4CNjv5L09PTY4kqUsGhHzFvDUa_FbfLHxrfMSJDA,20324
47
+ nucliadb_utils/storages/storage.py,sha256=lrXa6eWY7HMCUgrEz9-jTk7PBPwP1Nor_zi3oZMiVZ4,20322
48
48
  nucliadb_utils/tests/__init__.py,sha256=Oo9CAE7B0eW5VHn8sHd6o30SQzOWUhktLPRXdlDOleA,1456
49
49
  nucliadb_utils/tests/asyncbenchmark.py,sha256=rN_NNDk4ras0qgFp0QlRyAi9ZU9xITdzxl2s5CigzBo,10698
50
50
  nucliadb_utils/tests/conftest.py,sha256=gPYVuVhj_e6Aeanb91wvUerwuxZgaS7d3luIBRQFIU0,1876
@@ -68,9 +68,9 @@ nucliadb_utils/tests/unit/storages/__init__.py,sha256=itSI7dtTwFP55YMX4iK7JzdMHS
68
68
  nucliadb_utils/tests/unit/storages/test_aws.py,sha256=GCsB_jwCUNV3Ogt8TZZEmNKAHvOlR0HGU7blrFbtJqs,1924
69
69
  nucliadb_utils/tests/unit/storages/test_gcs.py,sha256=2XzJwgNpfjVGjtE-QdZhu3ayuT1EMEXINdM-_SatPCY,3554
70
70
  nucliadb_utils/tests/unit/storages/test_pg.py,sha256=sJfUttMSzq8W1XYolAUcMxl_R5HcEzb5fpCklPeMJiY,17000
71
- nucliadb_utils/tests/unit/storages/test_storage.py,sha256=CwX4wO21og1pdw9IqnK4sFxBfKY3sPhQTd56AS7-Me8,6666
72
- nucliadb_utils-4.0.0.post544.dist-info/METADATA,sha256=KIbQdGrR0WuExS2FVIhJkm0NuViFu_T-wbBetxN0gok,1979
73
- nucliadb_utils-4.0.0.post544.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
74
- nucliadb_utils-4.0.0.post544.dist-info/top_level.txt,sha256=fE3vJtALTfgh7bcAWcNhcfXkNPp_eVVpbKK-2IYua3E,15
75
- nucliadb_utils-4.0.0.post544.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
76
- nucliadb_utils-4.0.0.post544.dist-info/RECORD,,
71
+ nucliadb_utils/tests/unit/storages/test_storage.py,sha256=54nUtElPwT3GQPTT4638F2awBYRl8nD6me9wWijhtbA,7107
72
+ nucliadb_utils-4.0.0.post546.dist-info/METADATA,sha256=pgruTbhfCt2YIDI8al3iPI6RjTHnTHVcmmm0ZLDDR6c,1979
73
+ nucliadb_utils-4.0.0.post546.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
74
+ nucliadb_utils-4.0.0.post546.dist-info/top_level.txt,sha256=fE3vJtALTfgh7bcAWcNhcfXkNPp_eVVpbKK-2IYua3E,15
75
+ nucliadb_utils-4.0.0.post546.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
76
+ nucliadb_utils-4.0.0.post546.dist-info/RECORD,,