karton-core 5.7.0__py3-none-any.whl → 5.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- karton/core/__version__.py +1 -1
- karton/core/asyncio/__init__.py +21 -0
- karton/core/asyncio/backend.py +370 -0
- karton/core/asyncio/base.py +133 -0
- karton/core/asyncio/karton.py +359 -0
- karton/core/asyncio/logger.py +57 -0
- karton/core/asyncio/resource.py +384 -0
- karton/core/backend.py +150 -109
- karton/core/base.py +119 -93
- karton/core/config.py +5 -0
- karton/core/karton.py +13 -12
- karton/core/logger.py +33 -15
- karton/core/resource.py +32 -30
- karton/core/task.py +24 -2
- {karton_core-5.7.0.dist-info → karton_core-5.8.0.dist-info}/METADATA +3 -2
- karton_core-5.8.0.dist-info/RECORD +33 -0
- karton_core-5.7.0.dist-info/RECORD +0 -27
- /karton_core-5.7.0-nspkg.pth → /karton_core-5.8.0-nspkg.pth +0 -0
- {karton_core-5.7.0.dist-info → karton_core-5.8.0.dist-info}/LICENSE +0 -0
- {karton_core-5.7.0.dist-info → karton_core-5.8.0.dist-info}/WHEEL +0 -0
- {karton_core-5.7.0.dist-info → karton_core-5.8.0.dist-info}/entry_points.txt +0 -0
- {karton_core-5.7.0.dist-info → karton_core-5.8.0.dist-info}/namespace_packages.txt +0 -0
- {karton_core-5.7.0.dist-info → karton_core-5.8.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,384 @@
|
|
1
|
+
import contextlib
|
2
|
+
import os
|
3
|
+
import shutil
|
4
|
+
import tempfile
|
5
|
+
import zipfile
|
6
|
+
from io import BytesIO
|
7
|
+
from typing import IO, TYPE_CHECKING, Any, AsyncIterator, Dict, List, Optional, Union
|
8
|
+
|
9
|
+
from karton.core.resource import LocalResourceBase, ResourceBase
|
10
|
+
|
11
|
+
if TYPE_CHECKING:
|
12
|
+
from .backend import KartonAsyncBackend
|
13
|
+
|
14
|
+
|
15
|
+
class LocalResource(LocalResourceBase):
|
16
|
+
"""
|
17
|
+
Represents local resource with arbitrary binary data e.g. file contents.
|
18
|
+
|
19
|
+
Local resources will be uploaded to object hub (S3) during
|
20
|
+
task dispatching.
|
21
|
+
|
22
|
+
.. code-block:: python
|
23
|
+
|
24
|
+
# Creating resource from bytes
|
25
|
+
sample = Resource("original_name.exe", content=b"X5O!P%@AP[4\\
|
26
|
+
PZX54(P^)7CC)7}$EICAR-STANDARD-ANT...")
|
27
|
+
|
28
|
+
# Creating resource from path
|
29
|
+
sample = Resource("original_name.exe", path="sample/original_name.exe")
|
30
|
+
|
31
|
+
:param name: Name of the resource (e.g. name of file)
|
32
|
+
:param content: Resource content
|
33
|
+
:param path: Path of file with resource content
|
34
|
+
:param bucket: Alternative S3 bucket for resource
|
35
|
+
:param metadata: Resource metadata
|
36
|
+
:param uid: Alternative S3 resource id
|
37
|
+
:param sha256: Resource sha256 hash
|
38
|
+
:param fd: Seekable file descriptor
|
39
|
+
:param _flags: Resource flags
|
40
|
+
:param _close_fd: Close file descriptor after upload (default: False)
|
41
|
+
"""
|
42
|
+
|
43
|
+
def __init__(
|
44
|
+
self,
|
45
|
+
name: str,
|
46
|
+
content: Optional[Union[str, bytes]] = None,
|
47
|
+
path: Optional[str] = None,
|
48
|
+
bucket: Optional[str] = None,
|
49
|
+
metadata: Optional[Dict[str, Any]] = None,
|
50
|
+
uid: Optional[str] = None,
|
51
|
+
sha256: Optional[str] = None,
|
52
|
+
fd: Optional[IO[bytes]] = None,
|
53
|
+
_flags: Optional[List[str]] = None,
|
54
|
+
_close_fd: bool = False,
|
55
|
+
) -> None:
|
56
|
+
super().__init__(
|
57
|
+
name=name,
|
58
|
+
content=content,
|
59
|
+
path=path,
|
60
|
+
bucket=bucket,
|
61
|
+
metadata=metadata,
|
62
|
+
uid=uid,
|
63
|
+
sha256=sha256,
|
64
|
+
fd=fd,
|
65
|
+
_flags=_flags,
|
66
|
+
_close_fd=_close_fd,
|
67
|
+
)
|
68
|
+
|
69
|
+
async def _upload(self, backend: "KartonAsyncBackend") -> None:
|
70
|
+
"""Internal function for uploading resources
|
71
|
+
|
72
|
+
:param backend: KartonBackend to use while uploading the resource
|
73
|
+
|
74
|
+
:meta private:
|
75
|
+
"""
|
76
|
+
|
77
|
+
# Note: never transform resource into Remote
|
78
|
+
# Multiple task dispatching with same local, in that case resource
|
79
|
+
# can be deleted between tasks.
|
80
|
+
if self.bucket is None:
|
81
|
+
raise RuntimeError(
|
82
|
+
"Resource object can't be uploaded because its bucket is not set"
|
83
|
+
)
|
84
|
+
|
85
|
+
if self._content:
|
86
|
+
# Upload contents
|
87
|
+
await backend.upload_object(self.bucket, self.uid, self._content)
|
88
|
+
elif self.fd:
|
89
|
+
if self.fd.tell() != 0:
|
90
|
+
raise RuntimeError(
|
91
|
+
f"Resource object can't be uploaded: "
|
92
|
+
f"file descriptor must point at first byte "
|
93
|
+
f"(fd.tell = {self.fd.tell()})"
|
94
|
+
)
|
95
|
+
# Upload contents from fd
|
96
|
+
await backend.upload_object(self.bucket, self.uid, self.fd)
|
97
|
+
# If file descriptor is managed by Resource, close it after upload
|
98
|
+
if self._close_fd:
|
99
|
+
self.fd.close()
|
100
|
+
elif self._path:
|
101
|
+
# Upload file provided by path
|
102
|
+
await backend.upload_object_from_file(self.bucket, self.uid, self._path)
|
103
|
+
|
104
|
+
async def upload(self, backend: "KartonAsyncBackend") -> None:
|
105
|
+
"""Internal function for uploading resources
|
106
|
+
|
107
|
+
:param backend: KartonBackend to use while uploading the resource
|
108
|
+
|
109
|
+
:meta private:
|
110
|
+
"""
|
111
|
+
if not self._content and not self._path and not self.fd:
|
112
|
+
raise RuntimeError("Can't upload resource without content")
|
113
|
+
await self._upload(backend)
|
114
|
+
|
115
|
+
|
116
|
+
Resource = LocalResource
|
117
|
+
|
118
|
+
|
119
|
+
class RemoteResource(ResourceBase):
|
120
|
+
"""
|
121
|
+
Keeps reference to remote resource object shared between subsystems
|
122
|
+
via object storage (S3)
|
123
|
+
|
124
|
+
Should never be instantiated directly by subsystem, but can be directly passed to
|
125
|
+
outgoing payload.
|
126
|
+
|
127
|
+
:param name: Name of the resource (e.g. name of file)
|
128
|
+
:param bucket: Alternative S3 bucket for resource
|
129
|
+
:param metadata: Resource metadata
|
130
|
+
:param uid: Alternative S3 resource id
|
131
|
+
:param size: Resource size
|
132
|
+
:param backend: :py:meth:`KartonBackend` to bind to this resource
|
133
|
+
:param sha256: Resource sha256 hash
|
134
|
+
:param _flags: Resource flags
|
135
|
+
"""
|
136
|
+
|
137
|
+
def __init__(
|
138
|
+
self,
|
139
|
+
name: str,
|
140
|
+
bucket: Optional[str] = None,
|
141
|
+
metadata: Optional[Dict[str, Any]] = None,
|
142
|
+
uid: Optional[str] = None,
|
143
|
+
size: Optional[int] = None,
|
144
|
+
backend: Optional["KartonAsyncBackend"] = None,
|
145
|
+
sha256: Optional[str] = None,
|
146
|
+
_flags: Optional[List[str]] = None,
|
147
|
+
) -> None:
|
148
|
+
super(RemoteResource, self).__init__(
|
149
|
+
name,
|
150
|
+
bucket=bucket,
|
151
|
+
metadata=metadata,
|
152
|
+
sha256=sha256,
|
153
|
+
_uid=uid,
|
154
|
+
_size=size,
|
155
|
+
_flags=_flags,
|
156
|
+
)
|
157
|
+
self.backend = backend
|
158
|
+
|
159
|
+
def loaded(self) -> bool:
|
160
|
+
"""
|
161
|
+
Checks whether resource is loaded into memory
|
162
|
+
|
163
|
+
:return: Flag indicating if the resource is loaded or not
|
164
|
+
"""
|
165
|
+
return self._content is not None
|
166
|
+
|
167
|
+
@property
|
168
|
+
def content(self) -> bytes:
|
169
|
+
"""
|
170
|
+
Resource content. Performs download when resource was not loaded before.
|
171
|
+
|
172
|
+
:return: Content bytes
|
173
|
+
"""
|
174
|
+
if self._content is None:
|
175
|
+
raise RuntimeError(
|
176
|
+
"Resource object needs to be explicitly downloaded first"
|
177
|
+
)
|
178
|
+
return self._content
|
179
|
+
|
180
|
+
@classmethod
|
181
|
+
def from_dict(
|
182
|
+
cls, dict: Dict[str, Any], backend: Optional["KartonAsyncBackend"]
|
183
|
+
) -> "RemoteResource":
|
184
|
+
"""
|
185
|
+
Internal deserialization method for remote resources
|
186
|
+
|
187
|
+
:param dict: Serialized information about resource
|
188
|
+
:param backend: KartonBackend object
|
189
|
+
:return: Deserialized :py:meth:`RemoteResource` object
|
190
|
+
|
191
|
+
:meta private:
|
192
|
+
"""
|
193
|
+
# Backwards compatibility
|
194
|
+
metadata = dict.get("metadata", {})
|
195
|
+
if "sha256" in dict:
|
196
|
+
metadata["sha256"] = dict["sha256"]
|
197
|
+
|
198
|
+
return cls(
|
199
|
+
name=dict["name"],
|
200
|
+
metadata=metadata,
|
201
|
+
bucket=dict["bucket"],
|
202
|
+
uid=dict["uid"],
|
203
|
+
size=dict.get("size"), # Backwards compatibility (2.x.x)
|
204
|
+
backend=backend,
|
205
|
+
_flags=dict.get("flags"), # Backwards compatibility (3.x.x)
|
206
|
+
)
|
207
|
+
|
208
|
+
def unload(self) -> None:
|
209
|
+
"""
|
210
|
+
Unloads resource object from memory
|
211
|
+
"""
|
212
|
+
self._content = None
|
213
|
+
|
214
|
+
async def download(self) -> bytes:
|
215
|
+
"""
|
216
|
+
Downloads remote resource content from object hub into memory.
|
217
|
+
|
218
|
+
.. code-block:: python
|
219
|
+
|
220
|
+
sample = self.current_task.get_resource("sample")
|
221
|
+
|
222
|
+
# Ensure that resource will be downloaded before it will be
|
223
|
+
# passed to processing method
|
224
|
+
sample.download()
|
225
|
+
|
226
|
+
self.process_sample(sample)
|
227
|
+
|
228
|
+
:return: Downloaded content bytes
|
229
|
+
"""
|
230
|
+
if self.backend is None:
|
231
|
+
raise RuntimeError(
|
232
|
+
(
|
233
|
+
"Resource object can't be downloaded because it's not bound to "
|
234
|
+
"the backend"
|
235
|
+
)
|
236
|
+
)
|
237
|
+
if self.bucket is None:
|
238
|
+
raise RuntimeError(
|
239
|
+
"Resource object can't be downloaded because its bucket is not set"
|
240
|
+
)
|
241
|
+
|
242
|
+
self._content = await self.backend.download_object(self.bucket, self.uid)
|
243
|
+
return self._content
|
244
|
+
|
245
|
+
async def download_to_file(self, path: str) -> None:
|
246
|
+
"""
|
247
|
+
Downloads remote resource into file.
|
248
|
+
|
249
|
+
.. code-block:: python
|
250
|
+
|
251
|
+
sample = self.current_task.get_resource("sample")
|
252
|
+
|
253
|
+
sample.download_to_file("sample/sample.exe")
|
254
|
+
|
255
|
+
with open("sample/sample.exe", "rb") as f:
|
256
|
+
contents = f.read()
|
257
|
+
|
258
|
+
:param path: Path to download the resource into
|
259
|
+
"""
|
260
|
+
if self.backend is None:
|
261
|
+
raise RuntimeError(
|
262
|
+
(
|
263
|
+
"Resource object can't be downloaded because it's not bound to "
|
264
|
+
"the backend"
|
265
|
+
)
|
266
|
+
)
|
267
|
+
if self.bucket is None:
|
268
|
+
raise RuntimeError(
|
269
|
+
"Resource object can't be downloaded because its bucket is not set"
|
270
|
+
)
|
271
|
+
|
272
|
+
await self.backend.download_object_to_file(self.bucket, self.uid, path)
|
273
|
+
|
274
|
+
@contextlib.asynccontextmanager
|
275
|
+
async def download_temporary_file(self, suffix=None) -> AsyncIterator[IO[bytes]]:
|
276
|
+
"""
|
277
|
+
Downloads remote resource into named temporary file.
|
278
|
+
|
279
|
+
.. code-block:: python
|
280
|
+
|
281
|
+
sample = self.current_task.get_resource("sample")
|
282
|
+
|
283
|
+
with sample.download_temporary_file() as f:
|
284
|
+
contents = f.read()
|
285
|
+
path = f.name
|
286
|
+
|
287
|
+
# Temporary file is deleted after exitting the "with" scope
|
288
|
+
|
289
|
+
:return: ContextManager with the temporary file
|
290
|
+
"""
|
291
|
+
# That tempfile-fu is necessary because minio.fget_object removes file
|
292
|
+
# under provided path and renames its own part-file with downloaded content
|
293
|
+
# under previously deleted path
|
294
|
+
# Weird move, but ok...
|
295
|
+
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
|
296
|
+
tmp.close()
|
297
|
+
try:
|
298
|
+
await self.download_to_file(tmp.name)
|
299
|
+
with open(tmp.name, "rb") as f:
|
300
|
+
yield f
|
301
|
+
finally:
|
302
|
+
os.remove(tmp.name)
|
303
|
+
|
304
|
+
@contextlib.asynccontextmanager
|
305
|
+
async def zip_file(self) -> AsyncIterator[zipfile.ZipFile]:
|
306
|
+
"""
|
307
|
+
If resource contains a Zip file, downloads it to the temporary file
|
308
|
+
and wraps it with ZipFile object.
|
309
|
+
|
310
|
+
.. code-block:: python
|
311
|
+
|
312
|
+
dumps = self.current_task.get_resource("dumps")
|
313
|
+
|
314
|
+
with dumps.zip_file() as zipf:
|
315
|
+
print("Fetched dumps: ", zipf.namelist())
|
316
|
+
|
317
|
+
By default: method downloads zip into temporary file, which is deleted after
|
318
|
+
leaving the context. If you want to load zip into memory,
|
319
|
+
call :py:meth:`RemoteResource.download` first.
|
320
|
+
|
321
|
+
If you want to pre-download Zip under specified path and open it using
|
322
|
+
zipfile module, you need to do this manually:
|
323
|
+
|
324
|
+
.. code-block:: python
|
325
|
+
|
326
|
+
dumps = self.current_task.get_resource("dumps")
|
327
|
+
|
328
|
+
# Download zip file
|
329
|
+
zip_path = "./dumps.zip"
|
330
|
+
dumps.download_to_file(zip_path)
|
331
|
+
|
332
|
+
zipf = zipfile.Zipfile(zip_path)
|
333
|
+
|
334
|
+
:return: ContextManager with zipfile
|
335
|
+
"""
|
336
|
+
if self._content:
|
337
|
+
yield zipfile.ZipFile(BytesIO(self._content))
|
338
|
+
else:
|
339
|
+
async with self.download_temporary_file() as f:
|
340
|
+
yield zipfile.ZipFile(f)
|
341
|
+
|
342
|
+
async def extract_to_directory(self, path: str) -> None:
|
343
|
+
"""
|
344
|
+
If resource contains a Zip file, extracts files contained in Zip into
|
345
|
+
provided path.
|
346
|
+
|
347
|
+
By default: method downloads zip into temporary file, which is deleted
|
348
|
+
after extraction. If you want to load zip into memory, call
|
349
|
+
:py:meth:`RemoteResource.download` first.
|
350
|
+
|
351
|
+
:param path: Directory path where the resource should be unpacked
|
352
|
+
"""
|
353
|
+
async with self.zip_file() as zf:
|
354
|
+
zf.extractall(path)
|
355
|
+
|
356
|
+
@contextlib.asynccontextmanager
|
357
|
+
async def extract_temporary(self) -> AsyncIterator[str]:
|
358
|
+
"""
|
359
|
+
If resource contains a Zip file, extracts files contained in Zip
|
360
|
+
to the temporary directory.
|
361
|
+
|
362
|
+
Returns path of directory with extracted files. Directory is recursively
|
363
|
+
deleted after leaving the context.
|
364
|
+
|
365
|
+
.. code-block:: python
|
366
|
+
|
367
|
+
dumps = self.current_task.get_resource("dumps")
|
368
|
+
|
369
|
+
with dumps.extract_temporary() as dumps_path:
|
370
|
+
print("Fetched dumps:", os.listdir(dumps_path))
|
371
|
+
|
372
|
+
By default: method downloads zip into temporary file, which is deleted
|
373
|
+
after extraction. If you want to load zip into memory, call
|
374
|
+
:py:meth:`RemoteResource.download` first.
|
375
|
+
|
376
|
+
:return: ContextManager with the temporary directory
|
377
|
+
"""
|
378
|
+
tmpdir = tempfile.mkdtemp()
|
379
|
+
try:
|
380
|
+
await self.extract_to_directory(tmpdir)
|
381
|
+
yield tmpdir
|
382
|
+
yield tmpdir
|
383
|
+
finally:
|
384
|
+
shutil.rmtree(tmpdir)
|