nucliadb-utils 6.0.0.post2298__py3-none-any.whl → 6.0.0.post2310__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb_utils/storages/s3.py +24 -2
- {nucliadb_utils-6.0.0.post2298.dist-info → nucliadb_utils-6.0.0.post2310.dist-info}/METADATA +3 -3
- {nucliadb_utils-6.0.0.post2298.dist-info → nucliadb_utils-6.0.0.post2310.dist-info}/RECORD +6 -6
- {nucliadb_utils-6.0.0.post2298.dist-info → nucliadb_utils-6.0.0.post2310.dist-info}/WHEEL +0 -0
- {nucliadb_utils-6.0.0.post2298.dist-info → nucliadb_utils-6.0.0.post2310.dist-info}/top_level.txt +0 -0
- {nucliadb_utils-6.0.0.post2298.dist-info → nucliadb_utils-6.0.0.post2310.dist-info}/zip-safe +0 -0
nucliadb_utils/storages/s3.py
CHANGED
@@ -19,6 +19,7 @@
|
|
19
19
|
#
|
20
20
|
from __future__ import annotations
|
21
21
|
|
22
|
+
import base64
|
22
23
|
from contextlib import AsyncExitStack
|
23
24
|
from datetime import datetime
|
24
25
|
from typing import AsyncGenerator, AsyncIterator, Optional
|
@@ -31,12 +32,15 @@ from aiobotocore.client import AioBaseClient # type: ignore
|
|
31
32
|
from aiobotocore.session import AioSession, get_session # type: ignore
|
32
33
|
|
33
34
|
from nucliadb_protos.resources_pb2 import CloudFile
|
34
|
-
from nucliadb_telemetry import errors
|
35
|
+
from nucliadb_telemetry import errors, metrics
|
35
36
|
from nucliadb_utils import logger
|
36
37
|
from nucliadb_utils.storages.exceptions import UnparsableResponse
|
37
38
|
from nucliadb_utils.storages.storage import Storage, StorageField
|
38
39
|
from nucliadb_utils.storages.utils import ObjectInfo, ObjectMetadata, Range
|
39
40
|
|
41
|
+
s3_ops_observer = metrics.Observer("s3_ops", labels={"type": ""})
|
42
|
+
|
43
|
+
|
40
44
|
MB = 1024 * 1024
|
41
45
|
MIN_UPLOAD_SIZE = 5 * MB
|
42
46
|
CHUNK_SIZE = MIN_UPLOAD_SIZE
|
@@ -77,6 +81,7 @@ class S3StorageField(StorageField):
|
|
77
81
|
jitter=backoff.random_jitter,
|
78
82
|
max_tries=MAX_TRIES,
|
79
83
|
)
|
84
|
+
@s3_ops_observer.wrap({"type": "download"})
|
80
85
|
async def _download(
|
81
86
|
self,
|
82
87
|
uri,
|
@@ -97,6 +102,7 @@ class S3StorageField(StorageField):
|
|
97
102
|
else:
|
98
103
|
raise
|
99
104
|
|
105
|
+
@s3_ops_observer.wrap({"type": "iter_data"})
|
100
106
|
async def iter_data(self, range: Optional[Range] = None) -> AsyncGenerator[bytes, None]:
|
101
107
|
# Suports field and key based iter
|
102
108
|
uri = self.field.uri if self.field else self.key
|
@@ -113,6 +119,7 @@ class S3StorageField(StorageField):
|
|
113
119
|
yield data
|
114
120
|
data = await stream.read(CHUNK_SIZE)
|
115
121
|
|
122
|
+
@s3_ops_observer.wrap({"type": "abort_multipart"})
|
116
123
|
async def _abort_multipart(self):
|
117
124
|
try:
|
118
125
|
mpu = self.field.resumable_uri
|
@@ -124,6 +131,7 @@ class S3StorageField(StorageField):
|
|
124
131
|
except Exception:
|
125
132
|
logger.warning("Could not abort multipart upload", exc_info=True)
|
126
133
|
|
134
|
+
@s3_ops_observer.wrap({"type": "start_upload"})
|
127
135
|
async def start(self, cf: CloudFile) -> CloudFile:
|
128
136
|
if self.field is not None and self.field.upload_uri != "":
|
129
137
|
# Field has already a file beeing uploaded, cancel
|
@@ -165,6 +173,7 @@ class S3StorageField(StorageField):
|
|
165
173
|
jitter=backoff.random_jitter,
|
166
174
|
max_tries=MAX_TRIES,
|
167
175
|
)
|
176
|
+
@s3_ops_observer.wrap({"type": "create_multipart"})
|
168
177
|
async def _create_multipart(self, bucket_name: str, upload_id: str, cf: CloudFile):
|
169
178
|
return await self.storage._s3aioclient.create_multipart_upload(
|
170
179
|
Bucket=bucket_name,
|
@@ -176,6 +185,7 @@ class S3StorageField(StorageField):
|
|
176
185
|
},
|
177
186
|
)
|
178
187
|
|
188
|
+
@s3_ops_observer.wrap({"type": "append_data"})
|
179
189
|
async def append(self, cf: CloudFile, iterable: AsyncIterator) -> int:
|
180
190
|
size = 0
|
181
191
|
if self.field is None:
|
@@ -203,6 +213,7 @@ class S3StorageField(StorageField):
|
|
203
213
|
jitter=backoff.random_jitter,
|
204
214
|
max_tries=MAX_TRIES,
|
205
215
|
)
|
216
|
+
@s3_ops_observer.wrap({"type": "upload_part"})
|
206
217
|
async def _upload_part(self, cf: CloudFile, data: bytes):
|
207
218
|
if self.field is None:
|
208
219
|
raise AttributeError("No field configured")
|
@@ -214,6 +225,7 @@ class S3StorageField(StorageField):
|
|
214
225
|
Body=data,
|
215
226
|
)
|
216
227
|
|
228
|
+
@s3_ops_observer.wrap({"type": "finish_upload"})
|
217
229
|
async def finish(self):
|
218
230
|
if self.field is None:
|
219
231
|
raise AttributeError("No field configured")
|
@@ -244,6 +256,7 @@ class S3StorageField(StorageField):
|
|
244
256
|
jitter=backoff.random_jitter,
|
245
257
|
max_tries=MAX_TRIES,
|
246
258
|
)
|
259
|
+
@s3_ops_observer.wrap({"type": "complete_multipart"})
|
247
260
|
async def _complete_multipart_upload(self):
|
248
261
|
# if blocks is 0, it means the file is of zero length so we need to
|
249
262
|
# trick it to finish a multiple part with no data.
|
@@ -263,6 +276,7 @@ class S3StorageField(StorageField):
|
|
263
276
|
MultipartUpload=part_info,
|
264
277
|
)
|
265
278
|
|
279
|
+
@s3_ops_observer.wrap({"type": "exists"})
|
266
280
|
async def exists(self) -> Optional[ObjectMetadata]:
|
267
281
|
"""
|
268
282
|
Existence can be checked either with a CloudFile data in the field attribute
|
@@ -291,6 +305,7 @@ class S3StorageField(StorageField):
|
|
291
305
|
return None
|
292
306
|
raise
|
293
307
|
|
308
|
+
@s3_ops_observer.wrap({"type": "copy"})
|
294
309
|
async def copy(
|
295
310
|
self,
|
296
311
|
origin_uri: str,
|
@@ -304,6 +319,7 @@ class S3StorageField(StorageField):
|
|
304
319
|
Key=destination_uri,
|
305
320
|
)
|
306
321
|
|
322
|
+
@s3_ops_observer.wrap({"type": "move"})
|
307
323
|
async def move(
|
308
324
|
self,
|
309
325
|
origin_uri: str,
|
@@ -395,6 +411,7 @@ class S3Storage(Storage):
|
|
395
411
|
async def finalize(self):
|
396
412
|
await self._exit_stack.__aexit__(None, None, None)
|
397
413
|
|
414
|
+
@s3_ops_observer.wrap({"type": "delete"})
|
398
415
|
async def delete_upload(self, uri: str, bucket_name: str):
|
399
416
|
if uri:
|
400
417
|
try:
|
@@ -547,5 +564,10 @@ def parse_object_metadata(obj: dict, key: str) -> ObjectMetadata:
|
|
547
564
|
# Content type
|
548
565
|
content_type = custom_metadata.get("content_type") or obj.get("ContentType") or ""
|
549
566
|
# Filename
|
550
|
-
|
567
|
+
base64_filename = custom_metadata.get("base64_filename")
|
568
|
+
if base64_filename:
|
569
|
+
filename = base64.b64decode(base64_filename).decode()
|
570
|
+
else:
|
571
|
+
filename = custom_metadata.get("filename") or key.split("/")[-1]
|
572
|
+
|
551
573
|
return ObjectMetadata(size=size, content_type=content_type, filename=filename)
|
{nucliadb_utils-6.0.0.post2298.dist-info → nucliadb_utils-6.0.0.post2310.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nucliadb_utils
|
3
|
-
Version: 6.0.0.
|
3
|
+
Version: 6.0.0.post2310
|
4
4
|
Home-page: https://nuclia.com
|
5
5
|
License: BSD
|
6
6
|
Classifier: Development Status :: 4 - Beta
|
@@ -24,8 +24,8 @@ Requires-Dist: PyNaCl
|
|
24
24
|
Requires-Dist: pyjwt>=2.4.0
|
25
25
|
Requires-Dist: memorylru>=1.1.2
|
26
26
|
Requires-Dist: mrflagly>=0.2.9
|
27
|
-
Requires-Dist: nucliadb-protos>=6.0.0.
|
28
|
-
Requires-Dist: nucliadb-telemetry>=6.0.0.
|
27
|
+
Requires-Dist: nucliadb-protos>=6.0.0.post2310
|
28
|
+
Requires-Dist: nucliadb-telemetry>=6.0.0.post2310
|
29
29
|
Provides-Extra: cache
|
30
30
|
Requires-Dist: redis>=4.3.4; extra == "cache"
|
31
31
|
Requires-Dist: orjson>=3.6.7; extra == "cache"
|
@@ -46,7 +46,7 @@ nucliadb_utils/storages/gcs.py,sha256=MUPPTmPo5AfCWPykNvlIO35-C_8r94yWJgBxcDsRIg
|
|
46
46
|
nucliadb_utils/storages/local.py,sha256=kxofHdNy4QBtdM797AeksNjIUP2kw9vIC0X7AHABJBI,10374
|
47
47
|
nucliadb_utils/storages/nuclia.py,sha256=vEv94xAT7QM2g80S25QyrOw2pzvP2BAX-ADgZLtuCVc,2097
|
48
48
|
nucliadb_utils/storages/object_store.py,sha256=Tw10GmpYfM5TMqJ3Tk9pLQ9wLMBk1-snL_m6uasiZDQ,4257
|
49
|
-
nucliadb_utils/storages/s3.py,sha256=
|
49
|
+
nucliadb_utils/storages/s3.py,sha256=pbuukqpce_kqkmI_3eUTo390KbM5rmI7h8wsYAXtTAo,20377
|
50
50
|
nucliadb_utils/storages/settings.py,sha256=ugCPy1zxBOmA2KosT-4tsjpvP002kg5iQyi42yCGCJA,1285
|
51
51
|
nucliadb_utils/storages/storage.py,sha256=onwQJ4at-XewEG7dxcWdOqobfCw4w0PyPC7olvFJgjI,20295
|
52
52
|
nucliadb_utils/storages/utils.py,sha256=8g2rIwJeYIumQLOB47Yw1rx3twlhRB_cJxer65QfZmk,1479
|
@@ -59,8 +59,8 @@ nucliadb_utils/tests/indexing.py,sha256=YW2QhkhO9Q_8A4kKWJaWSvXvyQ_AiAwY1VylcfVQ
|
|
59
59
|
nucliadb_utils/tests/local.py,sha256=fXIBasrvdaFJM-sw2wk1_oiFzBcm9O10iCyC-OiXwY8,1914
|
60
60
|
nucliadb_utils/tests/nats.py,sha256=xqpww4jZjTKY9oPGlJdDJG67L3FIBQsa9qDHxILR8r8,7687
|
61
61
|
nucliadb_utils/tests/s3.py,sha256=pl-RJFjA4MH6iXkqhsh5g8gDuEhrYu1nPZ-laxlrMlE,3704
|
62
|
-
nucliadb_utils-6.0.0.
|
63
|
-
nucliadb_utils-6.0.0.
|
64
|
-
nucliadb_utils-6.0.0.
|
65
|
-
nucliadb_utils-6.0.0.
|
66
|
-
nucliadb_utils-6.0.0.
|
62
|
+
nucliadb_utils-6.0.0.post2310.dist-info/METADATA,sha256=lCSBxvLdTxa4utHPT4xX9D4SIuaa-EMor-KerNyMaNU,2055
|
63
|
+
nucliadb_utils-6.0.0.post2310.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
64
|
+
nucliadb_utils-6.0.0.post2310.dist-info/top_level.txt,sha256=fE3vJtALTfgh7bcAWcNhcfXkNPp_eVVpbKK-2IYua3E,15
|
65
|
+
nucliadb_utils-6.0.0.post2310.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
66
|
+
nucliadb_utils-6.0.0.post2310.dist-info/RECORD,,
|
File without changes
|
{nucliadb_utils-6.0.0.post2298.dist-info → nucliadb_utils-6.0.0.post2310.dist-info}/top_level.txt
RENAMED
File without changes
|
{nucliadb_utils-6.0.0.post2298.dist-info → nucliadb_utils-6.0.0.post2310.dist-info}/zip-safe
RENAMED
File without changes
|