nucliadb-utils 4.0.0.post544__py3-none-any.whl → 4.0.0.post546__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb_utils/storages/gcs.py +2 -2
- nucliadb_utils/storages/local.py +2 -2
- nucliadb_utils/storages/pg.py +2 -2
- nucliadb_utils/storages/s3.py +16 -3
- nucliadb_utils/storages/storage.py +39 -40
- nucliadb_utils/tests/unit/storages/test_storage.py +21 -2
- {nucliadb_utils-4.0.0.post544.dist-info → nucliadb_utils-4.0.0.post546.dist-info}/METADATA +3 -3
- {nucliadb_utils-4.0.0.post544.dist-info → nucliadb_utils-4.0.0.post546.dist-info}/RECORD +11 -11
- {nucliadb_utils-4.0.0.post544.dist-info → nucliadb_utils-4.0.0.post546.dist-info}/WHEEL +0 -0
- {nucliadb_utils-4.0.0.post544.dist-info → nucliadb_utils-4.0.0.post546.dist-info}/top_level.txt +0 -0
- {nucliadb_utils-4.0.0.post544.dist-info → nucliadb_utils-4.0.0.post546.dist-info}/zip-safe +0 -0
nucliadb_utils/storages/gcs.py
CHANGED
@@ -26,7 +26,7 @@ import socket
|
|
26
26
|
from concurrent.futures import ThreadPoolExecutor
|
27
27
|
from copy import deepcopy
|
28
28
|
from datetime import datetime
|
29
|
-
from typing import Any, AsyncIterator, Dict, List, Optional
|
29
|
+
from typing import Any, AsyncGenerator, AsyncIterator, Dict, List, Optional
|
30
30
|
from urllib.parse import quote_plus
|
31
31
|
|
32
32
|
import aiohttp
|
@@ -221,7 +221,7 @@ class GCSStorageField(StorageField):
|
|
221
221
|
break
|
222
222
|
|
223
223
|
@storage_ops_observer.wrap({"type": "read_range"})
|
224
|
-
async def read_range(self, start: int, end: int) ->
|
224
|
+
async def read_range(self, start: int, end: int) -> AsyncGenerator[bytes, None]:
|
225
225
|
"""
|
226
226
|
Iterate through ranges of data
|
227
227
|
"""
|
nucliadb_utils/storages/local.py
CHANGED
@@ -24,7 +24,7 @@ import json
|
|
24
24
|
import os
|
25
25
|
import shutil
|
26
26
|
from datetime import datetime
|
27
|
-
from typing import Any, AsyncIterator, Dict, Optional
|
27
|
+
from typing import Any, AsyncGenerator, AsyncIterator, Dict, Optional
|
28
28
|
|
29
29
|
import aiofiles
|
30
30
|
from nucliadb_protos.resources_pb2 import CloudFile
|
@@ -87,7 +87,7 @@ class LocalStorageField(StorageField):
|
|
87
87
|
break
|
88
88
|
yield data
|
89
89
|
|
90
|
-
async def read_range(self, start: int, end: int) ->
|
90
|
+
async def read_range(self, start: int, end: int) -> AsyncGenerator[bytes, None]:
|
91
91
|
"""
|
92
92
|
Iterate through ranges of data
|
93
93
|
"""
|
nucliadb_utils/storages/pg.py
CHANGED
@@ -22,7 +22,7 @@ from __future__ import annotations
|
|
22
22
|
import asyncio
|
23
23
|
import logging
|
24
24
|
import uuid
|
25
|
-
from typing import Any, AsyncIterator, Optional, TypedDict
|
25
|
+
from typing import Any, AsyncGenerator, AsyncIterator, Optional, TypedDict
|
26
26
|
|
27
27
|
import asyncpg
|
28
28
|
from nucliadb_protos.resources_pb2 import CloudFile
|
@@ -427,7 +427,7 @@ class PostgresStorageField(StorageField):
|
|
427
427
|
async for chunk in dl.iterate_chunks(bucket, key):
|
428
428
|
yield chunk["data"]
|
429
429
|
|
430
|
-
async def read_range(self, start: int, end: int) ->
|
430
|
+
async def read_range(self, start: int, end: int) -> AsyncGenerator[bytes, None]:
|
431
431
|
"""
|
432
432
|
Iterate through ranges of data
|
433
433
|
"""
|
nucliadb_utils/storages/s3.py
CHANGED
@@ -21,12 +21,13 @@ from __future__ import annotations
|
|
21
21
|
|
22
22
|
from contextlib import AsyncExitStack
|
23
23
|
from datetime import datetime
|
24
|
-
from typing import Any, AsyncIterator, Optional
|
24
|
+
from typing import Any, AsyncGenerator, AsyncIterator, Optional
|
25
25
|
|
26
26
|
import aiobotocore # type: ignore
|
27
27
|
import aiohttp
|
28
28
|
import backoff # type: ignore
|
29
29
|
import botocore # type: ignore
|
30
|
+
from aiobotocore.client import AioBaseClient # type: ignore
|
30
31
|
from aiobotocore.session import AioSession, get_session # type: ignore
|
31
32
|
from nucliadb_protos.resources_pb2 import CloudFile
|
32
33
|
|
@@ -111,7 +112,7 @@ class S3StorageField(StorageField):
|
|
111
112
|
yield data
|
112
113
|
data = await stream.read(CHUNK_SIZE)
|
113
114
|
|
114
|
-
async def read_range(self, start: int, end: int) ->
|
115
|
+
async def read_range(self, start: int, end: int) -> AsyncGenerator[bytes, None]:
|
115
116
|
"""
|
116
117
|
Iterate through ranges of data
|
117
118
|
"""
|
@@ -319,6 +320,18 @@ class S3StorageField(StorageField):
|
|
319
320
|
Key=destination_uri,
|
320
321
|
)
|
321
322
|
|
323
|
+
async def move(
|
324
|
+
self,
|
325
|
+
origin_uri: str,
|
326
|
+
destination_uri: str,
|
327
|
+
origin_bucket_name: str,
|
328
|
+
destination_bucket_name: str,
|
329
|
+
):
|
330
|
+
await self.copy(
|
331
|
+
origin_uri, destination_uri, origin_bucket_name, destination_bucket_name
|
332
|
+
)
|
333
|
+
await self.storage.delete_upload(origin_uri, origin_bucket_name)
|
334
|
+
|
322
335
|
async def upload(self, iterator: AsyncIterator, origin: CloudFile) -> CloudFile:
|
323
336
|
self.field = await self.start(origin)
|
324
337
|
await self.append(origin, iterator)
|
@@ -384,7 +397,7 @@ class S3Storage(Storage):
|
|
384
397
|
|
385
398
|
async def initialize(self):
|
386
399
|
session = AioSession()
|
387
|
-
self._s3aioclient = await self._exit_stack.enter_async_context(
|
400
|
+
self._s3aioclient: AioBaseClient = await self._exit_stack.enter_async_context(
|
388
401
|
session.create_client("s3", **self.opts)
|
389
402
|
)
|
390
403
|
for bucket in (self.deadletter_bucket, self.indexing_bucket):
|
@@ -60,7 +60,7 @@ INDEXING_KEY = "index/{kb}/{shard}/{resource}/{txid}"
|
|
60
60
|
MESSAGE_KEY = "message/{kbid}/{rid}/{mid}"
|
61
61
|
|
62
62
|
|
63
|
-
class StorageField:
|
63
|
+
class StorageField(abc.ABC, metaclass=abc.ABCMeta):
|
64
64
|
storage: Storage
|
65
65
|
bucket: str
|
66
66
|
key: str
|
@@ -78,18 +78,18 @@ class StorageField:
|
|
78
78
|
self.key = fullkey
|
79
79
|
self.field = field
|
80
80
|
|
81
|
-
|
82
|
-
|
81
|
+
@abc.abstractmethod
|
82
|
+
async def upload(self, iterator: AsyncIterator, origin: CloudFile) -> CloudFile: ...
|
83
83
|
|
84
|
-
|
84
|
+
@abc.abstractmethod
|
85
|
+
async def iter_data(self, headers=None) -> AsyncGenerator[bytes, None]: # type: ignore
|
85
86
|
raise NotImplementedError()
|
87
|
+
yield b""
|
86
88
|
|
87
|
-
|
88
|
-
|
89
|
-
Iterate through ranges of data
|
90
|
-
"""
|
89
|
+
@abc.abstractmethod
|
90
|
+
async def read_range(self, start: int, end: int) -> AsyncGenerator[bytes, None]:
|
91
91
|
raise NotImplementedError()
|
92
|
-
yield b""
|
92
|
+
yield b""
|
93
93
|
|
94
94
|
async def delete(self) -> bool:
|
95
95
|
deleted = False
|
@@ -98,38 +98,38 @@ class StorageField:
|
|
98
98
|
deleted = True
|
99
99
|
return deleted
|
100
100
|
|
101
|
-
|
102
|
-
|
101
|
+
@abc.abstractmethod
|
102
|
+
async def exists(self) -> Optional[Dict[str, str]]: ...
|
103
103
|
|
104
|
+
@abc.abstractmethod
|
104
105
|
async def copy(
|
105
106
|
self,
|
106
107
|
origin_uri: str,
|
107
108
|
destination_uri: str,
|
108
109
|
origin_bucket_name: str,
|
109
110
|
destination_bucket_name: str,
|
110
|
-
):
|
111
|
-
raise NotImplementedError()
|
111
|
+
): ...
|
112
112
|
|
113
|
+
@abc.abstractmethod
|
113
114
|
async def move(
|
114
115
|
self,
|
115
116
|
origin_uri: str,
|
116
117
|
destination_uri: str,
|
117
118
|
origin_bucket_name: str,
|
118
119
|
destination_bucket_name: str,
|
119
|
-
):
|
120
|
-
raise NotImplementedError()
|
120
|
+
): ...
|
121
121
|
|
122
|
-
|
123
|
-
|
122
|
+
@abc.abstractmethod
|
123
|
+
async def start(self, cf: CloudFile) -> CloudFile: ...
|
124
124
|
|
125
|
-
|
126
|
-
|
125
|
+
@abc.abstractmethod
|
126
|
+
async def append(self, cf: CloudFile, iterable: AsyncIterator) -> int: ...
|
127
127
|
|
128
|
-
|
129
|
-
|
128
|
+
@abc.abstractmethod
|
129
|
+
async def finish(self): ...
|
130
130
|
|
131
131
|
|
132
|
-
class Storage:
|
132
|
+
class Storage(abc.ABC, metaclass=abc.ABCMeta):
|
133
133
|
source: int
|
134
134
|
field_klass: Type
|
135
135
|
deadletter_bucket: Optional[str] = None
|
@@ -498,40 +498,39 @@ class Storage:
|
|
498
498
|
pb.ParseFromString(payload.read())
|
499
499
|
return pb
|
500
500
|
|
501
|
-
|
502
|
-
|
501
|
+
@abc.abstractmethod
|
502
|
+
async def delete_upload(self, uri: str, bucket_name: str): ...
|
503
503
|
|
504
|
-
|
505
|
-
|
504
|
+
@abc.abstractmethod
|
505
|
+
def get_bucket_name(self, kbid: str) -> str: ...
|
506
506
|
|
507
|
-
|
508
|
-
|
507
|
+
@abc.abstractmethod
|
508
|
+
async def initialize(self) -> None: ...
|
509
509
|
|
510
|
-
|
511
|
-
|
510
|
+
@abc.abstractmethod
|
511
|
+
async def finalize(self) -> None: ...
|
512
512
|
|
513
513
|
@abc.abstractmethod
|
514
|
-
def iterate_bucket(self, bucket: str, prefix: str) -> AsyncIterator[Any]:
|
515
|
-
raise NotImplementedError()
|
514
|
+
def iterate_bucket(self, bucket: str, prefix: str) -> AsyncIterator[Any]: ...
|
516
515
|
|
517
|
-
async def copy(self, file: CloudFile, destination: StorageField):
|
516
|
+
async def copy(self, file: CloudFile, destination: StorageField) -> None:
|
518
517
|
await destination.copy(
|
519
518
|
file.uri, destination.key, file.bucket_name, destination.bucket
|
520
519
|
)
|
521
520
|
|
522
|
-
async def move(self, file: CloudFile, destination: StorageField):
|
521
|
+
async def move(self, file: CloudFile, destination: StorageField) -> None:
|
523
522
|
await destination.move(
|
524
523
|
file.uri, destination.key, file.bucket_name, destination.bucket
|
525
524
|
)
|
526
525
|
|
527
|
-
|
528
|
-
|
526
|
+
@abc.abstractmethod
|
527
|
+
async def create_kb(self, kbid: str) -> bool: ...
|
529
528
|
|
530
|
-
|
531
|
-
|
529
|
+
@abc.abstractmethod
|
530
|
+
async def delete_kb(self, kbid: str) -> Tuple[bool, bool]: ...
|
532
531
|
|
533
|
-
|
534
|
-
|
532
|
+
@abc.abstractmethod
|
533
|
+
async def schedule_delete_kb(self, kbid: str) -> bool: ...
|
535
534
|
|
536
535
|
async def set_stream_message(self, kbid: str, rid: str, data: bytes) -> str:
|
537
536
|
key = MESSAGE_KEY.format(kbid=kbid, rid=rid, mid=uuid.uuid4())
|
@@ -26,6 +26,7 @@ from nucliadb_protos.noderesources_pb2 import ResourceID
|
|
26
26
|
from nucliadb_protos.nodewriter_pb2 import IndexMessage
|
27
27
|
from nucliadb_protos.resources_pb2 import CloudFile
|
28
28
|
|
29
|
+
from nucliadb_utils.storages.local import LocalStorageField
|
29
30
|
from nucliadb_utils.storages.storage import (
|
30
31
|
Storage,
|
31
32
|
StorageField,
|
@@ -45,7 +46,7 @@ class TestStorageField:
|
|
45
46
|
|
46
47
|
@pytest.fixture
|
47
48
|
def storage_field(self, storage, field):
|
48
|
-
yield
|
49
|
+
yield LocalStorageField(storage, "bucket", "fullkey", field)
|
49
50
|
|
50
51
|
@pytest.mark.asyncio
|
51
52
|
async def test_delete(self, storage_field: StorageField, storage):
|
@@ -73,6 +74,24 @@ class StorageTest(Storage):
|
|
73
74
|
br = BrainResource(labels=["label"])
|
74
75
|
yield br.SerializeToString()
|
75
76
|
|
77
|
+
async def create_kb(self, kbid):
|
78
|
+
return True
|
79
|
+
|
80
|
+
async def delete_kb(self, kbid):
|
81
|
+
return True
|
82
|
+
|
83
|
+
async def delete_upload(self, uri, bucket):
|
84
|
+
return True
|
85
|
+
|
86
|
+
async def initialize(self) -> None:
|
87
|
+
pass
|
88
|
+
|
89
|
+
async def finalize(self) -> None:
|
90
|
+
pass
|
91
|
+
|
92
|
+
async def schedule_delete_kb(self, kbid: str) -> bool:
|
93
|
+
return True
|
94
|
+
|
76
95
|
|
77
96
|
class TestStorage:
|
78
97
|
@pytest.fixture
|
@@ -136,7 +155,7 @@ class TestStorage:
|
|
136
155
|
async def test_download_pb(self, storage: StorageTest):
|
137
156
|
assert isinstance(
|
138
157
|
await storage.download_pb(
|
139
|
-
|
158
|
+
LocalStorageField(storage, "bucket", "fullkey"), BrainResource
|
140
159
|
),
|
141
160
|
BrainResource,
|
142
161
|
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nucliadb-utils
|
3
|
-
Version: 4.0.0.
|
3
|
+
Version: 4.0.0.post546
|
4
4
|
Home-page: https://nuclia.com
|
5
5
|
License: BSD
|
6
6
|
Classifier: Development Status :: 4 - Beta
|
@@ -22,8 +22,8 @@ Requires-Dist: PyNaCl
|
|
22
22
|
Requires-Dist: pyjwt >=2.4.0
|
23
23
|
Requires-Dist: memorylru >=1.1.2
|
24
24
|
Requires-Dist: mrflagly
|
25
|
-
Requires-Dist: nucliadb-protos >=4.0.0.
|
26
|
-
Requires-Dist: nucliadb-telemetry >=4.0.0.
|
25
|
+
Requires-Dist: nucliadb-protos >=4.0.0.post546
|
26
|
+
Requires-Dist: nucliadb-telemetry >=4.0.0.post546
|
27
27
|
Provides-Extra: cache
|
28
28
|
Requires-Dist: redis >=4.3.4 ; extra == 'cache'
|
29
29
|
Requires-Dist: orjson >=3.6.7 ; extra == 'cache'
|
@@ -38,13 +38,13 @@ nucliadb_utils/nuclia_usage/utils/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZ
|
|
38
38
|
nucliadb_utils/nuclia_usage/utils/kb_usage_report.py,sha256=E1eUSFXBVNzQP9Q2rWj9y3koCO5S7iKwckny_AoLKuk,3870
|
39
39
|
nucliadb_utils/storages/__init__.py,sha256=5Qc8AUWiJv9_JbGCBpAn88AIJhwDlm0OPQpg2ZdRL4U,872
|
40
40
|
nucliadb_utils/storages/exceptions.py,sha256=n6aBOyurWMo8mXd1XY6Psgno4VfXJ9TRbxCy67c08-g,2417
|
41
|
-
nucliadb_utils/storages/gcs.py,sha256
|
42
|
-
nucliadb_utils/storages/local.py,sha256=
|
41
|
+
nucliadb_utils/storages/gcs.py,sha256=3A4eqJe6PoF3oE7c9JSCpRtbcv6dtg6Pd28u4cXKwyE,27099
|
42
|
+
nucliadb_utils/storages/local.py,sha256=jgUn3AwaQWn5VBKAvikd97sLAS3jx9PaUE7pwq1iJrk,10097
|
43
43
|
nucliadb_utils/storages/nuclia.py,sha256=UfvRu92eqG1v-PE-UWH2x8KEJFqDqATMmUGFmEuqSSs,2097
|
44
|
-
nucliadb_utils/storages/pg.py,sha256=
|
45
|
-
nucliadb_utils/storages/s3.py,sha256=
|
44
|
+
nucliadb_utils/storages/pg.py,sha256=yFk6AVgZHPgQq6NwLN_qN7fwD05WgCU5XE7gsFt-B0Q,18633
|
45
|
+
nucliadb_utils/storages/s3.py,sha256=8IZoDlTeICZtU1Z0eouaxvafSR6y4GqCtLjFUBxTd1E,19269
|
46
46
|
nucliadb_utils/storages/settings.py,sha256=ugCPy1zxBOmA2KosT-4tsjpvP002kg5iQyi42yCGCJA,1285
|
47
|
-
nucliadb_utils/storages/storage.py,sha256=
|
47
|
+
nucliadb_utils/storages/storage.py,sha256=lrXa6eWY7HMCUgrEz9-jTk7PBPwP1Nor_zi3oZMiVZ4,20322
|
48
48
|
nucliadb_utils/tests/__init__.py,sha256=Oo9CAE7B0eW5VHn8sHd6o30SQzOWUhktLPRXdlDOleA,1456
|
49
49
|
nucliadb_utils/tests/asyncbenchmark.py,sha256=rN_NNDk4ras0qgFp0QlRyAi9ZU9xITdzxl2s5CigzBo,10698
|
50
50
|
nucliadb_utils/tests/conftest.py,sha256=gPYVuVhj_e6Aeanb91wvUerwuxZgaS7d3luIBRQFIU0,1876
|
@@ -68,9 +68,9 @@ nucliadb_utils/tests/unit/storages/__init__.py,sha256=itSI7dtTwFP55YMX4iK7JzdMHS
|
|
68
68
|
nucliadb_utils/tests/unit/storages/test_aws.py,sha256=GCsB_jwCUNV3Ogt8TZZEmNKAHvOlR0HGU7blrFbtJqs,1924
|
69
69
|
nucliadb_utils/tests/unit/storages/test_gcs.py,sha256=2XzJwgNpfjVGjtE-QdZhu3ayuT1EMEXINdM-_SatPCY,3554
|
70
70
|
nucliadb_utils/tests/unit/storages/test_pg.py,sha256=sJfUttMSzq8W1XYolAUcMxl_R5HcEzb5fpCklPeMJiY,17000
|
71
|
-
nucliadb_utils/tests/unit/storages/test_storage.py,sha256=
|
72
|
-
nucliadb_utils-4.0.0.
|
73
|
-
nucliadb_utils-4.0.0.
|
74
|
-
nucliadb_utils-4.0.0.
|
75
|
-
nucliadb_utils-4.0.0.
|
76
|
-
nucliadb_utils-4.0.0.
|
71
|
+
nucliadb_utils/tests/unit/storages/test_storage.py,sha256=54nUtElPwT3GQPTT4638F2awBYRl8nD6me9wWijhtbA,7107
|
72
|
+
nucliadb_utils-4.0.0.post546.dist-info/METADATA,sha256=pgruTbhfCt2YIDI8al3iPI6RjTHnTHVcmmm0ZLDDR6c,1979
|
73
|
+
nucliadb_utils-4.0.0.post546.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
74
|
+
nucliadb_utils-4.0.0.post546.dist-info/top_level.txt,sha256=fE3vJtALTfgh7bcAWcNhcfXkNPp_eVVpbKK-2IYua3E,15
|
75
|
+
nucliadb_utils-4.0.0.post546.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
76
|
+
nucliadb_utils-4.0.0.post546.dist-info/RECORD,,
|
File without changes
|
{nucliadb_utils-4.0.0.post544.dist-info → nucliadb_utils-4.0.0.post546.dist-info}/top_level.txt
RENAMED
File without changes
|
File without changes
|