PyS3Uploader 0.4.0a1__py3-none-any.whl → 0.4.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pys3uploader/metadata.py +1 -0
- pys3uploader/timer.py +20 -17
- pys3uploader/uploader.py +32 -19
- pys3uploader/utils.py +2 -1
- pys3uploader/version.py +1 -1
- {pys3uploader-0.4.0a1.dist-info → pys3uploader-0.4.0b1.dist-info}/METADATA +3 -1
- pys3uploader-0.4.0b1.dist-info/RECORD +15 -0
- pys3uploader-0.4.0a1.dist-info/RECORD +0 -15
- {pys3uploader-0.4.0a1.dist-info → pys3uploader-0.4.0b1.dist-info}/LICENSE +0 -0
- {pys3uploader-0.4.0a1.dist-info → pys3uploader-0.4.0b1.dist-info}/WHEEL +0 -0
- {pys3uploader-0.4.0a1.dist-info → pys3uploader-0.4.0b1.dist-info}/top_level.txt +0 -0
pys3uploader/metadata.py
CHANGED
pys3uploader/timer.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
|
|
1
|
+
import logging
|
|
2
|
+
import threading
|
|
2
3
|
from typing import Any, Callable, Dict, Tuple
|
|
3
4
|
|
|
4
5
|
|
|
@@ -15,6 +16,7 @@ class RepeatedTimer:
|
|
|
15
16
|
function: Callable,
|
|
16
17
|
args: Tuple = None,
|
|
17
18
|
kwargs: Dict[str, Any] = None,
|
|
19
|
+
logger: logging.Logger = None,
|
|
18
20
|
):
|
|
19
21
|
"""Repeats the ``Timer`` object from threading.
|
|
20
22
|
|
|
@@ -23,32 +25,33 @@ class RepeatedTimer:
|
|
|
23
25
|
function: Function to trigger with intervals.
|
|
24
26
|
args: Arguments for the function.
|
|
25
27
|
kwargs: Keyword arguments for the function.
|
|
28
|
+
logger: Logger instance.
|
|
26
29
|
"""
|
|
27
|
-
self._timer = None
|
|
28
30
|
self.interval = interval
|
|
29
31
|
self.function = function
|
|
30
32
|
self.args = args or ()
|
|
31
33
|
self.kwargs = kwargs or {}
|
|
32
|
-
self.
|
|
34
|
+
self.logger = logger or logging.getLogger(__name__)
|
|
35
|
+
self.thread = None
|
|
36
|
+
self._stop_event = threading.Event()
|
|
33
37
|
|
|
34
38
|
def _run(self):
|
|
35
39
|
"""Triggers the target function."""
|
|
36
|
-
self.
|
|
37
|
-
|
|
38
|
-
|
|
40
|
+
while not self._stop_event.wait(self.interval):
|
|
41
|
+
try:
|
|
42
|
+
self.function(*self.args, **self.kwargs)
|
|
43
|
+
except Exception as error:
|
|
44
|
+
self.logger.error("Error in RepeatedTimer function [%s]: %s", self.function.__name__, error)
|
|
39
45
|
|
|
40
46
|
def start(self):
|
|
41
47
|
"""Trigger target function if timer isn't running already."""
|
|
42
|
-
if
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
48
|
+
if self.thread and self.thread.is_alive():
|
|
49
|
+
return
|
|
50
|
+
self.thread = threading.Thread(target=self._run, daemon=True)
|
|
51
|
+
self.thread.start()
|
|
46
52
|
|
|
47
|
-
def stop(self):
|
|
53
|
+
def stop(self, timeout: int = 3):
|
|
48
54
|
"""Stop the timer and cancel all futures."""
|
|
49
|
-
self.
|
|
50
|
-
self.
|
|
51
|
-
|
|
52
|
-
def cancel(self):
|
|
53
|
-
"""Initiate cancellation."""
|
|
54
|
-
self.stop()
|
|
55
|
+
self._stop_event.set()
|
|
56
|
+
if self.thread:
|
|
57
|
+
self.thread.join(timeout=timeout)
|
pys3uploader/uploader.py
CHANGED
|
@@ -2,8 +2,8 @@ import json
|
|
|
2
2
|
import logging
|
|
3
3
|
import os
|
|
4
4
|
import time
|
|
5
|
-
from datetime import datetime, UTC
|
|
6
5
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
6
|
+
from datetime import UTC, datetime
|
|
7
7
|
from typing import Dict, Iterable, NoReturn
|
|
8
8
|
|
|
9
9
|
import boto3.resources.factory
|
|
@@ -13,10 +13,10 @@ from botocore.config import Config
|
|
|
13
13
|
from botocore.exceptions import ClientError
|
|
14
14
|
|
|
15
15
|
from pys3uploader.exceptions import BucketNotFound
|
|
16
|
-
from pys3uploader.metadata import Metadata
|
|
17
|
-
from pys3uploader.timer import RepeatedTimer
|
|
18
16
|
from pys3uploader.logger import LogHandler, LogLevel, setup_logger
|
|
17
|
+
from pys3uploader.metadata import Metadata
|
|
19
18
|
from pys3uploader.progress import ProgressPercentage
|
|
19
|
+
from pys3uploader.timer import RepeatedTimer
|
|
20
20
|
from pys3uploader.utils import (
|
|
21
21
|
RETRY_CONFIG,
|
|
22
22
|
UploadResults,
|
|
@@ -45,6 +45,8 @@ class Uploader:
|
|
|
45
45
|
overwrite: bool = False,
|
|
46
46
|
file_exclusion: Iterable[str] = None,
|
|
47
47
|
folder_exclusion: Iterable[str] = None,
|
|
48
|
+
metadata_upload_interval: int = None,
|
|
49
|
+
metadata_filename: str = None,
|
|
48
50
|
region_name: str = None,
|
|
49
51
|
profile_name: str = None,
|
|
50
52
|
aws_access_key_id: str = None,
|
|
@@ -66,6 +68,8 @@ class Uploader:
|
|
|
66
68
|
overwrite: Boolean flag to overwrite files in S3.
|
|
67
69
|
file_exclusion: Sequence of files to exclude during upload.
|
|
68
70
|
folder_exclusion: Sequence of directories to exclude during upload.
|
|
71
|
+
metadata_upload_interval: Interval in seconds to upload metadata file.
|
|
72
|
+
metadata_filename: Metadata filename to upload periodically.
|
|
69
73
|
region_name: Name of the AWS region.
|
|
70
74
|
profile_name: AWS profile name.
|
|
71
75
|
aws_access_key_id: AWS access key ID.
|
|
@@ -145,10 +149,13 @@ class Uploader:
|
|
|
145
149
|
self.upload_files: Dict[str, str] = {}
|
|
146
150
|
self.file_size_map: Dict[str, int] = {}
|
|
147
151
|
|
|
152
|
+
self.metadata_filename = metadata_filename or getenv("METADATA_FILENAME", default="METADATA.json")
|
|
148
153
|
self.timer = RepeatedTimer(
|
|
149
154
|
function=self.metadata_uploader,
|
|
150
|
-
interval=int(getenv("METADATA_UPLOAD_INTERVAL", 300))
|
|
155
|
+
interval=metadata_upload_interval or int(getenv("METADATA_UPLOAD_INTERVAL", default="300")),
|
|
156
|
+
logger=self.logger,
|
|
151
157
|
)
|
|
158
|
+
self.alive_bar_kwargs = dict(title="Progress", bar="smooth", spinner=None, enrich_print=False)
|
|
152
159
|
|
|
153
160
|
def init(self) -> None | NoReturn:
|
|
154
161
|
"""Instantiates the bucket instance.
|
|
@@ -175,7 +182,7 @@ class Uploader:
|
|
|
175
182
|
self.upload_dir = os.path.abspath(self.upload_dir)
|
|
176
183
|
self.load_bucket_state()
|
|
177
184
|
|
|
178
|
-
def load_bucket_state(self):
|
|
185
|
+
def load_bucket_state(self) -> None:
|
|
179
186
|
"""Loads the bucket's current state."""
|
|
180
187
|
# noinspection PyUnresolvedReferences
|
|
181
188
|
self.bucket: boto3.resources.factory.s3.Bucket = self.s3.Bucket(self.bucket_name)
|
|
@@ -191,10 +198,11 @@ class Uploader:
|
|
|
191
198
|
def exit(self) -> None:
|
|
192
199
|
"""Exits after printing results, and run time."""
|
|
193
200
|
success = len(self.results.success)
|
|
201
|
+
skipped = len(self.results.skipped)
|
|
194
202
|
failed = len(self.results.failed)
|
|
195
203
|
total = success + failed
|
|
196
204
|
self.logger.info(
|
|
197
|
-
"Total number of uploads: %d, success: %d, failed: %d", total, success, failed
|
|
205
|
+
"Total number of uploads: %d, skipped: %d, success: %d, failed: %d", total, skipped, success, failed
|
|
198
206
|
)
|
|
199
207
|
# Stop the timer and upload the final state as metadata file
|
|
200
208
|
self.timer.stop()
|
|
@@ -253,6 +261,7 @@ class Uploader:
|
|
|
253
261
|
object_size,
|
|
254
262
|
size_converter(object_size),
|
|
255
263
|
)
|
|
264
|
+
self.results.skipped.append(filepath)
|
|
256
265
|
return False
|
|
257
266
|
self.logger.info(
|
|
258
267
|
"S3 object %s exists, but size mismatch. Local: [%d bytes / %s], S3: [%d bytes / %s]",
|
|
@@ -321,7 +330,9 @@ class Uploader:
|
|
|
321
330
|
|
|
322
331
|
def run(self) -> None:
|
|
323
332
|
"""Initiates object upload in a traditional loop."""
|
|
333
|
+
# Verify and initiate bucket state
|
|
324
334
|
self.init()
|
|
335
|
+
# Verify and initiate local state
|
|
325
336
|
self.load_local_state()
|
|
326
337
|
self.size_it()
|
|
327
338
|
self.timer.start()
|
|
@@ -333,7 +344,7 @@ class Uploader:
|
|
|
333
344
|
self.upload_dir,
|
|
334
345
|
self.bucket_name,
|
|
335
346
|
)
|
|
336
|
-
with alive_bar(total_files,
|
|
347
|
+
with alive_bar(total_files, **self.alive_bar_kwargs) as overall_bar:
|
|
337
348
|
for filepath, objectpath in self.upload_files.items():
|
|
338
349
|
progress_callback = ProgressPercentage(
|
|
339
350
|
filename=os.path.basename(filepath), size=self.filesize(filepath), bar=overall_bar
|
|
@@ -371,16 +382,18 @@ class Uploader:
|
|
|
371
382
|
self.bucket_name,
|
|
372
383
|
max_workers,
|
|
373
384
|
)
|
|
374
|
-
with alive_bar(total_files,
|
|
385
|
+
with alive_bar(total_files, **self.alive_bar_kwargs) as overall_bar:
|
|
375
386
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
376
|
-
futures =
|
|
387
|
+
futures = {}
|
|
377
388
|
for filepath, objectpath in self.upload_files.items():
|
|
378
389
|
progress_callback = ProgressPercentage(
|
|
379
390
|
filename=os.path.basename(filepath), size=self.filesize(filepath), bar=overall_bar
|
|
380
391
|
)
|
|
381
|
-
|
|
392
|
+
future = executor.submit(self._uploader, filepath, objectpath, progress_callback)
|
|
393
|
+
futures[future] = filepath
|
|
382
394
|
|
|
383
395
|
for future in as_completed(futures):
|
|
396
|
+
filepath = futures[future]
|
|
384
397
|
try:
|
|
385
398
|
future.result()
|
|
386
399
|
self.results.success.append(filepath)
|
|
@@ -392,29 +405,29 @@ class Uploader:
|
|
|
392
405
|
|
|
393
406
|
def metadata_uploader(self) -> None:
|
|
394
407
|
"""Metadata uploader."""
|
|
395
|
-
filename = objectpath = getenv("METADATA_FILENAME", "METADATA.json")
|
|
396
408
|
self.load_bucket_state()
|
|
397
|
-
|
|
398
|
-
|
|
409
|
+
success = list(set(self.results.success + self.results.skipped))
|
|
410
|
+
objects_uploaded = len(success)
|
|
411
|
+
size_uploaded = sum(self.filesize(file) for file in success)
|
|
399
412
|
|
|
400
|
-
pending_files = self.upload_files.keys() -
|
|
413
|
+
pending_files = set(self.upload_files.keys()) - set(success)
|
|
401
414
|
objects_pending = len(pending_files)
|
|
402
|
-
size_pending = sum(
|
|
415
|
+
size_pending = sum(self.filesize(file) for file in pending_files)
|
|
403
416
|
|
|
404
417
|
metadata = Metadata(
|
|
405
418
|
timestamp=datetime.now(tz=UTC).strftime("%A %B %d, %Y %H:%M:%S"),
|
|
406
419
|
objects_uploaded=objects_uploaded,
|
|
407
420
|
objects_pending=objects_pending,
|
|
408
421
|
size_uploaded=size_converter(size_uploaded),
|
|
409
|
-
size_pending=size_converter(size_pending)
|
|
422
|
+
size_pending=size_converter(size_pending),
|
|
410
423
|
)
|
|
411
|
-
self.logger.debug("\n" + json.dumps(metadata, indent=2) + "\n")
|
|
424
|
+
self.logger.debug("\n" + json.dumps(metadata.__dict__, indent=2) + "\n")
|
|
412
425
|
self.logger.debug("Uploading metadata to S3")
|
|
413
|
-
filepath = os.path.join(os.getcwd(),
|
|
426
|
+
filepath = os.path.join(os.getcwd(), self.metadata_filename)
|
|
414
427
|
with open(filepath, "w") as file:
|
|
415
428
|
json.dump(metadata.__dict__, file, indent=2)
|
|
416
429
|
file.flush()
|
|
417
|
-
self.bucket.upload_file(filepath,
|
|
430
|
+
self.bucket.upload_file(filepath, self.metadata_filename)
|
|
418
431
|
|
|
419
432
|
def get_bucket_structure(self) -> str:
|
|
420
433
|
"""Gets all the objects in an S3 bucket and forms it into a hierarchical folder like representation.
|
pys3uploader/utils.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import math
|
|
2
2
|
import os
|
|
3
|
-
from typing import Dict,
|
|
3
|
+
from typing import Dict, List, Set
|
|
4
4
|
|
|
5
5
|
from botocore.config import Config
|
|
6
6
|
|
|
@@ -25,6 +25,7 @@ class UploadResults(dict):
|
|
|
25
25
|
|
|
26
26
|
success: List[str] = []
|
|
27
27
|
failed: List[str] = []
|
|
28
|
+
skipped: List[str] = []
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
def getenv(*args, default: str = None) -> str:
|
pys3uploader/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
version = "0.4.
|
|
1
|
+
version = "0.4.0b1"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: PyS3Uploader
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.0b1
|
|
4
4
|
Summary: Python module to upload objects to an S3 bucket.
|
|
5
5
|
Author-email: Vignesh Rao <svignesh1793@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -157,6 +157,8 @@ if __name__ == '__main__':
|
|
|
157
157
|
- **overwrite** - Boolean flag to overwrite files present in S3. Defaults to ``False``
|
|
158
158
|
- **file_exclusion** - Sequence of files to exclude during upload. Defaults to ``None``
|
|
159
159
|
- **folder_exclusion** - Sequence of directories to exclude during upload. Defaults to ``None``
|
|
160
|
+
- **metadata_upload_interval** - Interval in seconds to upload metadata file. Defaults to ``300``
|
|
161
|
+
- **metadata_filename** - Metadata filename to upload periodically. Defaults to ``METADATA.json``
|
|
160
162
|
- **logger** - Bring your own custom pre-configured logger. Defaults to on-screen logging.
|
|
161
163
|
- **log_handler** - Choose between `stdout` vs `file` logging. Defaults to `pys3uploader.LogHandler.stdout`
|
|
162
164
|
- **log_level** - Choose the logging level. Defaults to `pys3uploader.LogLevel.debug`
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
pys3uploader/__init__.py,sha256=EqMScWbJNV4UWeMg4fMko2KB18xL2CO3a3o_od0H0Lc,124
|
|
2
|
+
pys3uploader/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
|
|
3
|
+
pys3uploader/logger.py,sha256=z9JEnyf4nHIakey0bAaCgEN7oXOYJYOpskZyM_4s-D4,2678
|
|
4
|
+
pys3uploader/metadata.py,sha256=4pn8Y9dVZLFXxq5Cocm20t1mfDkV5xJXY6YEekQ8ssQ,231
|
|
5
|
+
pys3uploader/progress.py,sha256=IladNMXLBhkPpxOntpANTam_hC9OWosmNDmdbweDNYM,1195
|
|
6
|
+
pys3uploader/timer.py,sha256=8tvlQaO1Z7TfFVBhl5qrDq9wsMiOR6P0J2bkVwfM96c,1816
|
|
7
|
+
pys3uploader/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
|
|
8
|
+
pys3uploader/uploader.py,sha256=Bt2agHNEPAQU4_gCWr9UkQnFmsvN1XA5mv6C_A9CP8o,19354
|
|
9
|
+
pys3uploader/utils.py,sha256=8eIM5ZhS7-bYT5Dq4FNbmojQ16vvoAPF1ihCCTZezGE,5783
|
|
10
|
+
pys3uploader/version.py,sha256=LbK2ZtjMqQY-aqhvblfIlZsvOYPfoOZ1bAnW5Cno4DY,20
|
|
11
|
+
pys3uploader-0.4.0b1.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
|
|
12
|
+
pys3uploader-0.4.0b1.dist-info/METADATA,sha256=-LG36ZcYpnRfCzAFwkRygT78hwCteQAHRL2CmEUGGkk,9155
|
|
13
|
+
pys3uploader-0.4.0b1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
|
14
|
+
pys3uploader-0.4.0b1.dist-info/top_level.txt,sha256=lVIFMMoUx7dj_myetBmOUQTJiOzz5VyDqchnQElmrWw,13
|
|
15
|
+
pys3uploader-0.4.0b1.dist-info/RECORD,,
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
pys3uploader/__init__.py,sha256=EqMScWbJNV4UWeMg4fMko2KB18xL2CO3a3o_od0H0Lc,124
|
|
2
|
-
pys3uploader/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
|
|
3
|
-
pys3uploader/logger.py,sha256=z9JEnyf4nHIakey0bAaCgEN7oXOYJYOpskZyM_4s-D4,2678
|
|
4
|
-
pys3uploader/metadata.py,sha256=tOOoLh2vISfH-GfH3yBcA_xtEjRwomaw7sCLEaDRK-8,230
|
|
5
|
-
pys3uploader/progress.py,sha256=IladNMXLBhkPpxOntpANTam_hC9OWosmNDmdbweDNYM,1195
|
|
6
|
-
pys3uploader/timer.py,sha256=qN2XNrGEyP3stsK3McvhE3VvIiUFh7mv4rbp5WDeyVU,1498
|
|
7
|
-
pys3uploader/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
|
|
8
|
-
pys3uploader/uploader.py,sha256=h5DYQA2yv0fQ2SSyAnAl8SsgJUajmN_o1PdMSqMbACM,18588
|
|
9
|
-
pys3uploader/utils.py,sha256=_2RYKUTyrQzwkxo7fSiLb5ASrpjcNpb3kZHqy_wByRk,5755
|
|
10
|
-
pys3uploader/version.py,sha256=VAwBBgd_skAqJS9UL1T_xDXryTqN5m58fbTTEXcKxgM,20
|
|
11
|
-
pys3uploader-0.4.0a1.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
|
|
12
|
-
pys3uploader-0.4.0a1.dist-info/METADATA,sha256=FdJdNSesnP1xHfb4il5HBw1pxsPn7ToAYkQ_T3PrIb0,8959
|
|
13
|
-
pys3uploader-0.4.0a1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
|
14
|
-
pys3uploader-0.4.0a1.dist-info/top_level.txt,sha256=lVIFMMoUx7dj_myetBmOUQTJiOzz5VyDqchnQElmrWw,13
|
|
15
|
-
pys3uploader-0.4.0a1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|