PyS3Uploader 0.2.4a1__py3-none-any.whl → 0.4.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of PyS3Uploader might be problematic. Click here for more details.
- pys3uploader/__init__.py +2 -0
- pys3uploader/logger.py +104 -0
- pys3uploader/metadata.py +11 -0
- pys3uploader/progress.py +39 -0
- pys3uploader/timer.py +54 -0
- {s3 → pys3uploader}/uploader.py +171 -58
- {s3 → pys3uploader}/utils.py +51 -4
- pys3uploader/version.py +1 -0
- {pys3uploader-0.2.4a1.dist-info → pys3uploader-0.4.0a1.dist-info}/METADATA +46 -7
- pys3uploader-0.4.0a1.dist-info/RECORD +15 -0
- pys3uploader-0.4.0a1.dist-info/top_level.txt +1 -0
- pys3uploader-0.2.4a1.dist-info/RECORD +0 -11
- pys3uploader-0.2.4a1.dist-info/top_level.txt +0 -1
- s3/__init__.py +0 -3
- s3/logger.py +0 -45
- {s3 → pys3uploader}/exceptions.py +0 -0
- {s3 → pys3uploader}/tree.py +0 -0
- {pys3uploader-0.2.4a1.dist-info → pys3uploader-0.4.0a1.dist-info}/LICENSE +0 -0
- {pys3uploader-0.2.4a1.dist-info → pys3uploader-0.4.0a1.dist-info}/WHEEL +0 -0
pys3uploader/__init__.py
ADDED
pys3uploader/logger.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Loads a default logger with StreamHandler set to DEBUG mode.
|
|
2
|
+
|
|
3
|
+
>>> logging.Logger
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import os
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from enum import IntEnum, StrEnum
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class LogHandler(StrEnum):
|
|
14
|
+
"""Logging handlers to choose from when default logger is used.
|
|
15
|
+
|
|
16
|
+
>>> LogHandler
|
|
17
|
+
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
file = "file"
|
|
21
|
+
stdout = "stdout"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class LogLevel(IntEnum):
|
|
25
|
+
"""Logging levels to choose from when default logger is used.
|
|
26
|
+
|
|
27
|
+
>>> LogLevel
|
|
28
|
+
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
debug = logging.DEBUG
|
|
32
|
+
info = logging.INFO
|
|
33
|
+
warning = logging.WARNING
|
|
34
|
+
error = logging.ERROR
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def _missing_(cls, value):
|
|
38
|
+
"""Allow constructing from string names."""
|
|
39
|
+
if isinstance(value, str):
|
|
40
|
+
value = value.lower()
|
|
41
|
+
for member in cls:
|
|
42
|
+
if member.name == value:
|
|
43
|
+
return member
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def stream_handler() -> logging.StreamHandler:
|
|
48
|
+
"""Creates a ``StreamHandler`` and assigns a default format to it.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
logging.StreamHandler:
|
|
52
|
+
Returns an instance of the ``StreamHandler`` object.
|
|
53
|
+
"""
|
|
54
|
+
handler = logging.StreamHandler()
|
|
55
|
+
handler.setFormatter(fmt=default_format())
|
|
56
|
+
return handler
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def file_handler() -> logging.FileHandler:
|
|
60
|
+
"""Creates a ``StreamHandler`` and assigns a default format to it.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
logging.StreamHandler:
|
|
64
|
+
Returns an instance of the ``StreamHandler`` object.
|
|
65
|
+
"""
|
|
66
|
+
os.makedirs("logs", exist_ok=True)
|
|
67
|
+
filename = os.path.join("logs", datetime.now().strftime("PyS3Uploader_%d-%m-%Y_%H:%M.log"))
|
|
68
|
+
handler = logging.FileHandler(filename, mode="a")
|
|
69
|
+
handler.setFormatter(fmt=default_format())
|
|
70
|
+
return handler
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def default_format() -> logging.Formatter:
|
|
74
|
+
"""Creates a logging ``Formatter`` with a custom message and datetime format.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
logging.Formatter:
|
|
78
|
+
Returns an instance of the ``Formatter`` object.
|
|
79
|
+
"""
|
|
80
|
+
return logging.Formatter(
|
|
81
|
+
fmt="%(asctime)s - %(levelname)s - [%(module)s:%(lineno)d] - %(funcName)s - %(message)s",
|
|
82
|
+
datefmt="%b-%d-%Y %I:%M:%S %p",
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def setup_logger(handler: LogHandler, level: LogLevel) -> logging.Logger:
|
|
87
|
+
"""Creates a default logger with debug mode enabled.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
handler: Logging handler to use.
|
|
91
|
+
level: Logging level to use.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
logging.Logger:
|
|
95
|
+
Returns an instance of the ``Logger`` object.
|
|
96
|
+
"""
|
|
97
|
+
logger = logging.getLogger(__name__)
|
|
98
|
+
if handler == LogHandler.file:
|
|
99
|
+
logger.addHandler(hdlr=file_handler())
|
|
100
|
+
elif handler == LogHandler.stdout:
|
|
101
|
+
logger.addHandler(hdlr=stream_handler())
|
|
102
|
+
|
|
103
|
+
logger.setLevel(level)
|
|
104
|
+
return logger
|
pys3uploader/metadata.py
ADDED
pys3uploader/progress.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import threading
|
|
2
|
+
|
|
3
|
+
from alive_progress import alive_bar
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ProgressPercentage:
|
|
7
|
+
"""Tracks progress of a file upload to S3 and updates the alive_bar.
|
|
8
|
+
|
|
9
|
+
>>> ProgressPercentage
|
|
10
|
+
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, filename: str, size: int, bar: alive_bar):
|
|
14
|
+
"""Initializes the progress tracker.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
filename: Name of the file being uploaded.
|
|
18
|
+
size: Total size of the file in bytes.
|
|
19
|
+
bar: alive_bar instance to update progress.
|
|
20
|
+
"""
|
|
21
|
+
self._filename = filename
|
|
22
|
+
self._size = size
|
|
23
|
+
self._seen_so_far = 0
|
|
24
|
+
self._lock = threading.Lock()
|
|
25
|
+
self._bar = bar
|
|
26
|
+
|
|
27
|
+
def __call__(self, bytes_amount: int) -> None:
|
|
28
|
+
"""Callback method to update progress.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
bytes_amount: Number of bytes transferred in the last chunk.
|
|
32
|
+
"""
|
|
33
|
+
with self._lock:
|
|
34
|
+
self._seen_so_far += bytes_amount
|
|
35
|
+
percent = (self._seen_so_far / self._size) * 100
|
|
36
|
+
bar_len = 20
|
|
37
|
+
filled = int(bar_len * percent / 100)
|
|
38
|
+
bar_str = "█" * filled + "." * (bar_len - filled)
|
|
39
|
+
self._bar.text(f" || {self._filename} [{bar_str}] {percent:.0f}%")
|
pys3uploader/timer.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from threading import Timer
|
|
2
|
+
from typing import Any, Callable, Dict, Tuple
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class RepeatedTimer:
|
|
6
|
+
"""Instantiates RepeatedTimer object to kick off the threading.Timer object with custom intervals.
|
|
7
|
+
|
|
8
|
+
>>> RepeatedTimer
|
|
9
|
+
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
interval: int,
|
|
15
|
+
function: Callable,
|
|
16
|
+
args: Tuple = None,
|
|
17
|
+
kwargs: Dict[str, Any] = None,
|
|
18
|
+
):
|
|
19
|
+
"""Repeats the ``Timer`` object from threading.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
interval: Interval in seconds.
|
|
23
|
+
function: Function to trigger with intervals.
|
|
24
|
+
args: Arguments for the function.
|
|
25
|
+
kwargs: Keyword arguments for the function.
|
|
26
|
+
"""
|
|
27
|
+
self._timer = None
|
|
28
|
+
self.interval = interval
|
|
29
|
+
self.function = function
|
|
30
|
+
self.args = args or ()
|
|
31
|
+
self.kwargs = kwargs or {}
|
|
32
|
+
self.is_running = False
|
|
33
|
+
|
|
34
|
+
def _run(self):
|
|
35
|
+
"""Triggers the target function."""
|
|
36
|
+
self.is_running = False
|
|
37
|
+
self.start()
|
|
38
|
+
self.function(*self.args, **self.kwargs)
|
|
39
|
+
|
|
40
|
+
def start(self):
|
|
41
|
+
"""Trigger target function if timer isn't running already."""
|
|
42
|
+
if not self.is_running:
|
|
43
|
+
self._timer = Timer(self.interval, self._run)
|
|
44
|
+
self._timer.start()
|
|
45
|
+
self.is_running = True
|
|
46
|
+
|
|
47
|
+
def stop(self):
|
|
48
|
+
"""Stop the timer and cancel all futures."""
|
|
49
|
+
self._timer.cancel()
|
|
50
|
+
self.is_running = False
|
|
51
|
+
|
|
52
|
+
def cancel(self):
|
|
53
|
+
"""Initiate cancellation."""
|
|
54
|
+
self.stop()
|
{s3 → pys3uploader}/uploader.py
RENAMED
|
@@ -1,23 +1,29 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import logging
|
|
2
3
|
import os
|
|
3
4
|
import time
|
|
5
|
+
from datetime import datetime, UTC
|
|
4
6
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
|
-
from typing import Dict, Iterable
|
|
7
|
+
from typing import Dict, Iterable, NoReturn
|
|
6
8
|
|
|
7
9
|
import boto3.resources.factory
|
|
8
10
|
import dotenv
|
|
11
|
+
from alive_progress import alive_bar
|
|
9
12
|
from botocore.config import Config
|
|
10
13
|
from botocore.exceptions import ClientError
|
|
11
|
-
from tqdm import tqdm
|
|
12
14
|
|
|
13
|
-
from
|
|
14
|
-
from
|
|
15
|
-
from
|
|
15
|
+
from pys3uploader.exceptions import BucketNotFound
|
|
16
|
+
from pys3uploader.metadata import Metadata
|
|
17
|
+
from pys3uploader.timer import RepeatedTimer
|
|
18
|
+
from pys3uploader.logger import LogHandler, LogLevel, setup_logger
|
|
19
|
+
from pys3uploader.progress import ProgressPercentage
|
|
20
|
+
from pys3uploader.utils import (
|
|
16
21
|
RETRY_CONFIG,
|
|
17
22
|
UploadResults,
|
|
18
23
|
convert_seconds,
|
|
19
24
|
convert_to_folder_structure,
|
|
20
25
|
getenv,
|
|
26
|
+
size_converter,
|
|
21
27
|
urljoin,
|
|
22
28
|
)
|
|
23
29
|
|
|
@@ -45,6 +51,8 @@ class Uploader:
|
|
|
45
51
|
aws_secret_access_key: str = None,
|
|
46
52
|
retry_config: Config = RETRY_CONFIG,
|
|
47
53
|
logger: logging.Logger = None,
|
|
54
|
+
log_handler: LogHandler = LogHandler.stdout,
|
|
55
|
+
log_level: LogLevel = LogLevel.debug,
|
|
48
56
|
env_file: str = None,
|
|
49
57
|
):
|
|
50
58
|
"""Initiates all the necessary args and creates a boto3 session with retry logic.
|
|
@@ -63,6 +71,8 @@ class Uploader:
|
|
|
63
71
|
aws_access_key_id: AWS access key ID.
|
|
64
72
|
aws_secret_access_key: AWS secret access key.
|
|
65
73
|
logger: Bring your own logger.
|
|
74
|
+
log_handler: Default log handler, can be ``file`` or ``stdout``.
|
|
75
|
+
log_level: Default log level, can be ``debug``, ``info``, ``warning`` or ``error``.
|
|
66
76
|
env_file: Dotenv file (.env) filepath to load environment variables.
|
|
67
77
|
|
|
68
78
|
See Also:
|
|
@@ -85,7 +95,7 @@ class Uploader:
|
|
|
85
95
|
If a filepath is provided, PyS3Uploader loads it directly or searches the root directory for the file.
|
|
86
96
|
If no filepath is provided, PyS3Uploader searches the current directory for a .env file.
|
|
87
97
|
"""
|
|
88
|
-
self.logger = logger or
|
|
98
|
+
self.logger = logger or setup_logger(handler=LogHandler(log_handler), level=LogLevel(log_level))
|
|
89
99
|
self.env_file = env_file or getenv("ENV_FILE", default=".env")
|
|
90
100
|
|
|
91
101
|
# Check for env_file in current working directory
|
|
@@ -132,7 +142,15 @@ class Uploader:
|
|
|
132
142
|
self.bucket_objects: boto3.resources.factory.s3.ObjectSummary = []
|
|
133
143
|
self.object_size_map: Dict[str, int] = {}
|
|
134
144
|
|
|
135
|
-
|
|
145
|
+
self.upload_files: Dict[str, str] = {}
|
|
146
|
+
self.file_size_map: Dict[str, int] = {}
|
|
147
|
+
|
|
148
|
+
self.timer = RepeatedTimer(
|
|
149
|
+
function=self.metadata_uploader,
|
|
150
|
+
interval=int(getenv("METADATA_UPLOAD_INTERVAL", 300))
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
def init(self) -> None | NoReturn:
|
|
136
154
|
"""Instantiates the bucket instance.
|
|
137
155
|
|
|
138
156
|
Raises:
|
|
@@ -150,27 +168,68 @@ class Uploader:
|
|
|
150
168
|
assert os.path.exists(self.upload_dir)
|
|
151
169
|
except AssertionError:
|
|
152
170
|
raise ValueError(f"\n\n\tPath not found: {self.upload_dir}")
|
|
153
|
-
buckets = [bucket.name for bucket in self.s3.buckets.all()]
|
|
154
171
|
if not self.bucket_name:
|
|
155
|
-
raise ValueError(
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
raise BucketNotFound(f"\n\n\t{self.bucket_name} was not found in {_alias} account.\n\tAvailable: {buckets}")
|
|
172
|
+
raise ValueError("\n\n\tCannot proceed without a bucket name.")
|
|
173
|
+
if (buckets := [bucket.name for bucket in self.s3.buckets.all()]) and self.bucket_name not in buckets:
|
|
174
|
+
raise BucketNotFound(f"\n\n\t{self.bucket_name} was not found.\n\tAvailable: {buckets}")
|
|
159
175
|
self.upload_dir = os.path.abspath(self.upload_dir)
|
|
176
|
+
self.load_bucket_state()
|
|
177
|
+
|
|
178
|
+
def load_bucket_state(self):
|
|
179
|
+
"""Loads the bucket's current state."""
|
|
160
180
|
# noinspection PyUnresolvedReferences
|
|
161
181
|
self.bucket: boto3.resources.factory.s3.Bucket = self.s3.Bucket(self.bucket_name)
|
|
162
182
|
# noinspection PyUnresolvedReferences
|
|
163
183
|
self.bucket_objects: boto3.resources.factory.s3.ObjectSummary = [obj for obj in self.bucket.objects.all()]
|
|
164
184
|
self.object_size_map = {obj.key: obj.size for obj in self.bucket_objects}
|
|
165
185
|
|
|
186
|
+
def load_local_state(self):
|
|
187
|
+
"""Loads the local file queue."""
|
|
188
|
+
self.upload_files = self._get_files()
|
|
189
|
+
self.file_size_map = {file: self.filesize(file) for file in self.upload_files}
|
|
190
|
+
|
|
166
191
|
def exit(self) -> None:
|
|
167
192
|
"""Exits after printing results, and run time."""
|
|
168
|
-
|
|
193
|
+
success = len(self.results.success)
|
|
194
|
+
failed = len(self.results.failed)
|
|
195
|
+
total = success + failed
|
|
169
196
|
self.logger.info(
|
|
170
|
-
"Total number of uploads: %d, success: %d, failed: %d", total,
|
|
197
|
+
"Total number of uploads: %d, success: %d, failed: %d", total, success, failed
|
|
171
198
|
)
|
|
199
|
+
# Stop the timer and upload the final state as metadata file
|
|
200
|
+
self.timer.stop()
|
|
201
|
+
self.metadata_uploader()
|
|
172
202
|
self.logger.info("Run time: %s", convert_seconds(time.time() - self.start))
|
|
173
203
|
|
|
204
|
+
def filesize(self, filepath: str) -> int:
|
|
205
|
+
"""Gets the file size of a given filepath.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
filepath: Full path of the file.
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
int:
|
|
212
|
+
Returns the file size in bytes.
|
|
213
|
+
"""
|
|
214
|
+
try:
|
|
215
|
+
return os.path.getsize(filepath)
|
|
216
|
+
except (OSError, PermissionError) as error:
|
|
217
|
+
self.logger.error(error)
|
|
218
|
+
return 0
|
|
219
|
+
|
|
220
|
+
def size_it(self) -> None:
|
|
221
|
+
"""Calculates and logs the total size of files in S3 and local."""
|
|
222
|
+
files_in_s3 = len(self.object_size_map)
|
|
223
|
+
files_local = len(self.upload_files)
|
|
224
|
+
|
|
225
|
+
total_size_s3 = sum(self.object_size_map.values())
|
|
226
|
+
total_size_local = sum(self.file_size_map.values())
|
|
227
|
+
|
|
228
|
+
self.logger.info("Files in S3: [#%d]: %s (%d bytes)", files_in_s3, size_converter(total_size_s3), total_size_s3)
|
|
229
|
+
self.logger.info(
|
|
230
|
+
"Files local: [#%d]: %s (%d bytes)", files_local, size_converter(total_size_local), total_size_local
|
|
231
|
+
)
|
|
232
|
+
|
|
174
233
|
def _proceed_to_upload(self, filepath: str, objectpath: str) -> bool:
|
|
175
234
|
"""Compares file size if the object already exists in S3.
|
|
176
235
|
|
|
@@ -184,32 +243,43 @@ class Uploader:
|
|
|
184
243
|
"""
|
|
185
244
|
if self.overwrite:
|
|
186
245
|
return True
|
|
187
|
-
|
|
188
|
-
file_size = os.path.getsize(filepath)
|
|
189
|
-
except (OSError, PermissionError) as error:
|
|
190
|
-
self.logger.error(error)
|
|
191
|
-
file_size = 0
|
|
246
|
+
file_size = self.filesize(filepath)
|
|
192
247
|
# Indicates that the object path already exists in S3
|
|
193
248
|
if object_size := self.object_size_map.get(objectpath):
|
|
194
249
|
if object_size == file_size:
|
|
195
|
-
self.logger.info(
|
|
250
|
+
self.logger.info(
|
|
251
|
+
"S3 object %s exists, and size [%d bytes / %s] matches, skipping..",
|
|
252
|
+
objectpath,
|
|
253
|
+
object_size,
|
|
254
|
+
size_converter(object_size),
|
|
255
|
+
)
|
|
196
256
|
return False
|
|
197
257
|
self.logger.info(
|
|
198
|
-
"S3 object %s exists, but size mismatch. Local: [%d], S3: [%d]",
|
|
258
|
+
"S3 object %s exists, but size mismatch. Local: [%d bytes / %s], S3: [%d bytes / %s]",
|
|
259
|
+
objectpath,
|
|
260
|
+
file_size,
|
|
261
|
+
object_size,
|
|
262
|
+
size_converter(object_size),
|
|
199
263
|
)
|
|
200
264
|
else:
|
|
201
|
-
self.logger.debug(
|
|
265
|
+
self.logger.debug(
|
|
266
|
+
"S3 object '%s' of size [%d bytes / %s] doesn't exist, uploading..",
|
|
267
|
+
objectpath,
|
|
268
|
+
file_size,
|
|
269
|
+
size_converter(file_size),
|
|
270
|
+
)
|
|
202
271
|
return True
|
|
203
272
|
|
|
204
|
-
def _uploader(self, filepath: str, objectpath: str) -> None:
|
|
273
|
+
def _uploader(self, filepath: str, objectpath: str, callback: ProgressPercentage) -> None:
|
|
205
274
|
"""Uploads the filepath to the specified S3 bucket.
|
|
206
275
|
|
|
207
276
|
Args:
|
|
208
277
|
filepath: Filepath to upload.
|
|
209
278
|
objectpath: Object path ref in S3.
|
|
279
|
+
callback: ProgressPercentage callback to track upload progress.
|
|
210
280
|
"""
|
|
211
281
|
if self._proceed_to_upload(filepath, objectpath):
|
|
212
|
-
self.bucket.upload_file(filepath, objectpath)
|
|
282
|
+
self.bucket.upload_file(filepath, objectpath, Callback=callback)
|
|
213
283
|
|
|
214
284
|
def _get_files(self) -> Dict[str, str]:
|
|
215
285
|
"""Get a mapping for all the file path and object paths in upload directory.
|
|
@@ -252,19 +322,32 @@ class Uploader:
|
|
|
252
322
|
def run(self) -> None:
|
|
253
323
|
"""Initiates object upload in a traditional loop."""
|
|
254
324
|
self.init()
|
|
255
|
-
|
|
256
|
-
self.
|
|
257
|
-
self.
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
325
|
+
self.load_local_state()
|
|
326
|
+
self.size_it()
|
|
327
|
+
self.timer.start()
|
|
328
|
+
total_files = len(self.upload_files)
|
|
329
|
+
|
|
330
|
+
self.logger.info(
|
|
331
|
+
"%d files from '%s' will be uploaded to '%s' sequentially",
|
|
332
|
+
total_files,
|
|
333
|
+
self.upload_dir,
|
|
334
|
+
self.bucket_name,
|
|
335
|
+
)
|
|
336
|
+
with alive_bar(total_files, title="Progress", bar="smooth", spinner="dots") as overall_bar:
|
|
337
|
+
for filepath, objectpath in self.upload_files.items():
|
|
338
|
+
progress_callback = ProgressPercentage(
|
|
339
|
+
filename=os.path.basename(filepath), size=self.filesize(filepath), bar=overall_bar
|
|
340
|
+
)
|
|
341
|
+
try:
|
|
342
|
+
self._uploader(filepath, objectpath, progress_callback)
|
|
343
|
+
self.results.success.append(filepath)
|
|
344
|
+
except ClientError as error:
|
|
345
|
+
self.logger.error("Upload failed: %s", error)
|
|
346
|
+
self.results.failed.append(filepath)
|
|
347
|
+
except KeyboardInterrupt:
|
|
348
|
+
self.logger.warning("Upload interrupted by user")
|
|
349
|
+
break
|
|
350
|
+
overall_bar() # increment overall progress bar
|
|
268
351
|
self.exit()
|
|
269
352
|
|
|
270
353
|
def run_in_parallel(self, max_workers: int = 5) -> None:
|
|
@@ -273,36 +356,66 @@ class Uploader:
|
|
|
273
356
|
Args:
|
|
274
357
|
max_workers: Number of maximum threads to use.
|
|
275
358
|
"""
|
|
359
|
+
# Verify and initiate bucket state
|
|
276
360
|
self.init()
|
|
277
|
-
|
|
278
|
-
self.
|
|
361
|
+
# Verify and initiate local state
|
|
362
|
+
self.load_local_state()
|
|
363
|
+
self.size_it()
|
|
364
|
+
self.timer.start()
|
|
365
|
+
total_files = len(self.upload_files)
|
|
366
|
+
|
|
279
367
|
self.logger.info(
|
|
280
368
|
"%d files from '%s' will be uploaded to '%s' with maximum concurrency of: %d",
|
|
281
|
-
|
|
369
|
+
total_files,
|
|
282
370
|
self.upload_dir,
|
|
283
371
|
self.bucket_name,
|
|
284
372
|
max_workers,
|
|
285
373
|
)
|
|
286
|
-
with
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
for filepath, objectpath in
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
self.results.failed += 1
|
|
374
|
+
with alive_bar(total_files, title="Progress", bar="smooth", spinner="dots") as overall_bar:
|
|
375
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
376
|
+
futures = []
|
|
377
|
+
for filepath, objectpath in self.upload_files.items():
|
|
378
|
+
progress_callback = ProgressPercentage(
|
|
379
|
+
filename=os.path.basename(filepath), size=self.filesize(filepath), bar=overall_bar
|
|
380
|
+
)
|
|
381
|
+
futures.append(executor.submit(self._uploader, filepath, objectpath, callback=progress_callback))
|
|
382
|
+
|
|
383
|
+
for future in as_completed(futures):
|
|
384
|
+
try:
|
|
385
|
+
future.result()
|
|
386
|
+
self.results.success.append(filepath)
|
|
387
|
+
except ClientError as error:
|
|
388
|
+
self.logger.error("Upload failed: %s", error)
|
|
389
|
+
self.results.failed.append(filepath)
|
|
390
|
+
overall_bar() # Increment overall bar after each upload finishes
|
|
304
391
|
self.exit()
|
|
305
392
|
|
|
393
|
+
def metadata_uploader(self) -> None:
|
|
394
|
+
"""Metadata uploader."""
|
|
395
|
+
filename = objectpath = getenv("METADATA_FILENAME", "METADATA.json")
|
|
396
|
+
self.load_bucket_state()
|
|
397
|
+
objects_uploaded = len(self.results.success)
|
|
398
|
+
size_uploaded = sum([self.filesize(file) for file in self.results.success])
|
|
399
|
+
|
|
400
|
+
pending_files = self.upload_files.keys() - self.results.success
|
|
401
|
+
objects_pending = len(pending_files)
|
|
402
|
+
size_pending = sum([self.filesize(file) for file in pending_files])
|
|
403
|
+
|
|
404
|
+
metadata = Metadata(
|
|
405
|
+
timestamp=datetime.now(tz=UTC).strftime("%A %B %d, %Y %H:%M:%S"),
|
|
406
|
+
objects_uploaded=objects_uploaded,
|
|
407
|
+
objects_pending=objects_pending,
|
|
408
|
+
size_uploaded=size_converter(size_uploaded),
|
|
409
|
+
size_pending=size_converter(size_pending)
|
|
410
|
+
)
|
|
411
|
+
self.logger.debug("\n" + json.dumps(metadata, indent=2) + "\n")
|
|
412
|
+
self.logger.debug("Uploading metadata to S3")
|
|
413
|
+
filepath = os.path.join(os.getcwd(), filename)
|
|
414
|
+
with open(filepath, "w") as file:
|
|
415
|
+
json.dump(metadata.__dict__, file, indent=2)
|
|
416
|
+
file.flush()
|
|
417
|
+
self.bucket.upload_file(filepath, objectpath)
|
|
418
|
+
|
|
306
419
|
def get_bucket_structure(self) -> str:
|
|
307
420
|
"""Gets all the objects in an S3 bucket and forms it into a hierarchical folder like representation.
|
|
308
421
|
|
{s3 → pys3uploader}/utils.py
RENAMED
|
@@ -1,5 +1,6 @@
|
|
|
1
|
+
import math
|
|
1
2
|
import os
|
|
2
|
-
from typing import Dict, Set
|
|
3
|
+
from typing import Dict, Set, List
|
|
3
4
|
|
|
4
5
|
from botocore.config import Config
|
|
5
6
|
|
|
@@ -22,12 +23,21 @@ class UploadResults(dict):
|
|
|
22
23
|
|
|
23
24
|
"""
|
|
24
25
|
|
|
25
|
-
success:
|
|
26
|
-
failed:
|
|
26
|
+
success: List[str] = []
|
|
27
|
+
failed: List[str] = []
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
def getenv(*args, default: str = None) -> str:
|
|
30
|
-
"""Returns the key-ed environment variable or the default value.
|
|
31
|
+
"""Returns the key-ed environment variable or the default value.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
args: Environment variable keys to search for.
|
|
35
|
+
default: Default value to return if no environment variable is found.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
str:
|
|
39
|
+
Environment variable value or the default value.
|
|
40
|
+
"""
|
|
31
41
|
for key in args:
|
|
32
42
|
if value := os.environ.get(key.upper()) or os.environ.get(key.lower()):
|
|
33
43
|
return value
|
|
@@ -37,6 +47,9 @@ def getenv(*args, default: str = None) -> str:
|
|
|
37
47
|
def urljoin(*args) -> str:
|
|
38
48
|
"""Joins given arguments into a url. Trailing but not leading slashes are stripped for each argument.
|
|
39
49
|
|
|
50
|
+
Args:
|
|
51
|
+
args: Parts of the url to join.
|
|
52
|
+
|
|
40
53
|
Returns:
|
|
41
54
|
str:
|
|
42
55
|
Joined url.
|
|
@@ -67,6 +80,10 @@ def convert_to_folder_structure(sequence: Set[str]) -> str:
|
|
|
67
80
|
Args:
|
|
68
81
|
structure: Structure of folder objects as key-value pairs.
|
|
69
82
|
indent: Required indentation for the ASCII.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
str:
|
|
86
|
+
String representation of the folder structure.
|
|
70
87
|
"""
|
|
71
88
|
result = ""
|
|
72
89
|
for i, (key, value) in enumerate(structure.items()):
|
|
@@ -145,3 +162,33 @@ def convert_seconds(seconds: int | float, n_elem: int = 2) -> str:
|
|
|
145
162
|
|
|
146
163
|
list_ = time_parts[:n_elem]
|
|
147
164
|
return ", and ".join([", ".join(list_[:-1]), list_[-1]] if len(list_) > 2 else list_)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def format_nos(input_: float) -> int | float:
|
|
168
|
+
"""Removes ``.0`` float values.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
input_: Strings or integers with ``.0`` at the end.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
int | float:
|
|
175
|
+
Int if found, else returns the received float value.
|
|
176
|
+
"""
|
|
177
|
+
return int(input_) if isinstance(input_, float) and input_.is_integer() else input_
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def size_converter(byte_size: int | float) -> str:
|
|
181
|
+
"""Gets the current memory consumed and converts it to human friendly format.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
byte_size: Receives byte size as argument.
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
str:
|
|
188
|
+
Converted understandable size.
|
|
189
|
+
"""
|
|
190
|
+
if not byte_size:
|
|
191
|
+
return "0 B"
|
|
192
|
+
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
|
|
193
|
+
index = int(math.floor(math.log(byte_size, 1024)))
|
|
194
|
+
return f"{format_nos(round(byte_size / pow(1024, index), 2))} {size_name[index]}"
|
pys3uploader/version.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
version = "0.4.0a1"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: PyS3Uploader
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0a1
|
|
4
4
|
Summary: Python module to upload objects to an S3 bucket.
|
|
5
5
|
Author-email: Vignesh Rao <svignesh1793@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -29,7 +29,7 @@ Project-URL: Homepage, https://github.com/thevickypedia/PyS3Uploader
|
|
|
29
29
|
Project-URL: Docs, https://thevickypedia.github.io/PyS3Uploader/
|
|
30
30
|
Project-URL: Source, https://github.com/thevickypedia/PyS3Uploader
|
|
31
31
|
Project-URL: Bug Tracker, https://github.com/thevickypedia/PyS3Uploader/issues
|
|
32
|
-
Keywords:
|
|
32
|
+
Keywords: pys3uploader
|
|
33
33
|
Classifier: Development Status :: 1 - Planning
|
|
34
34
|
Classifier: Intended Audience :: Information Technology
|
|
35
35
|
Classifier: Operating System :: OS Independent
|
|
@@ -39,9 +39,9 @@ Classifier: Topic :: Internet :: File Transfer Protocol (FTP)
|
|
|
39
39
|
Requires-Python: >=3.11
|
|
40
40
|
Description-Content-Type: text/markdown
|
|
41
41
|
License-File: LICENSE
|
|
42
|
+
Requires-Dist: alive-progress==3.3.*
|
|
42
43
|
Requires-Dist: boto3==1.40.*
|
|
43
44
|
Requires-Dist: python-dotenv==1.1.*
|
|
44
|
-
Requires-Dist: tqdm==4.67.*
|
|
45
45
|
Provides-Extra: dev
|
|
46
46
|
Requires-Dist: sphinx==5.1.1; extra == "dev"
|
|
47
47
|
Requires-Dist: pre-commit; extra == "dev"
|
|
@@ -76,6 +76,43 @@ Requires-Dist: recommonmark; extra == "dev"
|
|
|
76
76
|
# PyS3Uploader
|
|
77
77
|
Python module to upload an entire directory to an S3 bucket.
|
|
78
78
|
|
|
79
|
+
<details>
|
|
80
|
+
<summary><strong>Bucket Policy Required</strong></summary>
|
|
81
|
+
|
|
82
|
+
```json
|
|
83
|
+
{
|
|
84
|
+
"Version": "2012-10-17",
|
|
85
|
+
"Statement": [
|
|
86
|
+
{
|
|
87
|
+
"Sid": "ListBucketsForExistenceCheck",
|
|
88
|
+
"Effect": "Allow",
|
|
89
|
+
"Action": "s3:ListAllMyBuckets",
|
|
90
|
+
"Resource": "*"
|
|
91
|
+
},
|
|
92
|
+
{
|
|
93
|
+
"Sid": "ListAndUploadToSpecificBucket",
|
|
94
|
+
"Effect": "Allow",
|
|
95
|
+
"Action": [
|
|
96
|
+
"s3:ListBucket",
|
|
97
|
+
"s3:ListBucketMultipartUploads"
|
|
98
|
+
],
|
|
99
|
+
"Resource": "arn:aws:s3:::bucketname"
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
"Sid": "UploadObjectsToBucket",
|
|
103
|
+
"Effect": "Allow",
|
|
104
|
+
"Action": [
|
|
105
|
+
"s3:PutObject",
|
|
106
|
+
"s3:AbortMultipartUpload",
|
|
107
|
+
"s3:ListMultipartUploadParts"
|
|
108
|
+
],
|
|
109
|
+
"Resource": "arn:aws:s3:::bucketname/*"
|
|
110
|
+
}
|
|
111
|
+
]
|
|
112
|
+
}
|
|
113
|
+
```
|
|
114
|
+
</details>
|
|
115
|
+
|
|
79
116
|
### Installation
|
|
80
117
|
```shell
|
|
81
118
|
pip install PyS3Uploader
|
|
@@ -85,10 +122,10 @@ pip install PyS3Uploader
|
|
|
85
122
|
|
|
86
123
|
##### Upload objects in parallel
|
|
87
124
|
```python
|
|
88
|
-
import
|
|
125
|
+
import pys3uploader
|
|
89
126
|
|
|
90
127
|
if __name__ == '__main__':
|
|
91
|
-
wrapper =
|
|
128
|
+
wrapper = pys3uploader.Uploader(
|
|
92
129
|
bucket_name="BUCKET_NAME",
|
|
93
130
|
upload_dir="FULL_PATH_TO_UPLOAD",
|
|
94
131
|
exclude_prefix="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
|
|
@@ -98,10 +135,10 @@ if __name__ == '__main__':
|
|
|
98
135
|
|
|
99
136
|
##### Upload objects in sequence
|
|
100
137
|
```python
|
|
101
|
-
import
|
|
138
|
+
import pys3uploader
|
|
102
139
|
|
|
103
140
|
if __name__ == '__main__':
|
|
104
|
-
wrapper =
|
|
141
|
+
wrapper = pys3uploader.Uploader(
|
|
105
142
|
bucket_name="BUCKET_NAME",
|
|
106
143
|
upload_dir="FULL_PATH_TO_UPLOAD",
|
|
107
144
|
exclude_prefix="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
|
|
@@ -121,6 +158,8 @@ if __name__ == '__main__':
|
|
|
121
158
|
- **file_exclusion** - Sequence of files to exclude during upload. Defaults to ``None``
|
|
122
159
|
- **folder_exclusion** - Sequence of directories to exclude during upload. Defaults to ``None``
|
|
123
160
|
- **logger** - Bring your own custom pre-configured logger. Defaults to on-screen logging.
|
|
161
|
+
- **log_handler** - Choose between `stdout` vs `file` logging. Defaults to `pys3uploader.LogHandler.stdout`
|
|
162
|
+
- **log_level** - Choose the logging level. Defaults to `pys3uploader.LogLevel.debug`
|
|
124
163
|
- **env_file** – Path to a `.env` file for loading environment variables. Defaults to scanning the current directory.
|
|
125
164
|
<br><br>
|
|
126
165
|
- **region_name** - AWS region name. Defaults to the env var `AWS_DEFAULT_REGION`
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
pys3uploader/__init__.py,sha256=EqMScWbJNV4UWeMg4fMko2KB18xL2CO3a3o_od0H0Lc,124
|
|
2
|
+
pys3uploader/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
|
|
3
|
+
pys3uploader/logger.py,sha256=z9JEnyf4nHIakey0bAaCgEN7oXOYJYOpskZyM_4s-D4,2678
|
|
4
|
+
pys3uploader/metadata.py,sha256=tOOoLh2vISfH-GfH3yBcA_xtEjRwomaw7sCLEaDRK-8,230
|
|
5
|
+
pys3uploader/progress.py,sha256=IladNMXLBhkPpxOntpANTam_hC9OWosmNDmdbweDNYM,1195
|
|
6
|
+
pys3uploader/timer.py,sha256=qN2XNrGEyP3stsK3McvhE3VvIiUFh7mv4rbp5WDeyVU,1498
|
|
7
|
+
pys3uploader/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
|
|
8
|
+
pys3uploader/uploader.py,sha256=h5DYQA2yv0fQ2SSyAnAl8SsgJUajmN_o1PdMSqMbACM,18588
|
|
9
|
+
pys3uploader/utils.py,sha256=_2RYKUTyrQzwkxo7fSiLb5ASrpjcNpb3kZHqy_wByRk,5755
|
|
10
|
+
pys3uploader/version.py,sha256=VAwBBgd_skAqJS9UL1T_xDXryTqN5m58fbTTEXcKxgM,20
|
|
11
|
+
pys3uploader-0.4.0a1.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
|
|
12
|
+
pys3uploader-0.4.0a1.dist-info/METADATA,sha256=FdJdNSesnP1xHfb4il5HBw1pxsPn7ToAYkQ_T3PrIb0,8959
|
|
13
|
+
pys3uploader-0.4.0a1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
|
14
|
+
pys3uploader-0.4.0a1.dist-info/top_level.txt,sha256=lVIFMMoUx7dj_myetBmOUQTJiOzz5VyDqchnQElmrWw,13
|
|
15
|
+
pys3uploader-0.4.0a1.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
pys3uploader
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
s3/__init__.py,sha256=wolI_hcB1EXMCV8-uFu5Gri6ZEn_a3GZ9OgKSEBQUxA,68
|
|
2
|
-
s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
|
|
3
|
-
s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
|
|
4
|
-
s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
|
|
5
|
-
s3/uploader.py,sha256=cXH4lEyLeboKvG7TAtfyYy85BZ5y-rlROHfGqvlv0tc,13880
|
|
6
|
-
s3/utils.py,sha256=NbF28CYviK_St5qd1EOumMVyus9BvQON7clUFeR_SEQ,4473
|
|
7
|
-
pys3uploader-0.2.4a1.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
|
|
8
|
-
pys3uploader-0.2.4a1.dist-info/METADATA,sha256=p038fM45k9uX7A2QZkpKSypumgJsYNU13crP1Tn56qQ,7797
|
|
9
|
-
pys3uploader-0.2.4a1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
|
10
|
-
pys3uploader-0.2.4a1.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
|
|
11
|
-
pys3uploader-0.2.4a1.dist-info/RECORD,,
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
s3
|
s3/__init__.py
DELETED
s3/logger.py
DELETED
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
"""Loads a default logger with StreamHandler set to DEBUG mode.
|
|
2
|
-
|
|
3
|
-
>>> logging.Logger
|
|
4
|
-
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import logging
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def default_handler() -> logging.StreamHandler:
|
|
11
|
-
"""Creates a ``StreamHandler`` and assigns a default format to it.
|
|
12
|
-
|
|
13
|
-
Returns:
|
|
14
|
-
logging.StreamHandler:
|
|
15
|
-
Returns an instance of the ``StreamHandler`` object.
|
|
16
|
-
"""
|
|
17
|
-
handler = logging.StreamHandler()
|
|
18
|
-
handler.setFormatter(fmt=default_format())
|
|
19
|
-
return handler
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def default_format() -> logging.Formatter:
|
|
23
|
-
"""Creates a logging ``Formatter`` with a custom message and datetime format.
|
|
24
|
-
|
|
25
|
-
Returns:
|
|
26
|
-
logging.Formatter:
|
|
27
|
-
Returns an instance of the ``Formatter`` object.
|
|
28
|
-
"""
|
|
29
|
-
return logging.Formatter(
|
|
30
|
-
fmt="%(asctime)s - %(levelname)s - [%(module)s:%(lineno)d] - %(funcName)s - %(message)s",
|
|
31
|
-
datefmt="%b-%d-%Y %I:%M:%S %p",
|
|
32
|
-
)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def default_logger() -> logging.Logger:
|
|
36
|
-
"""Creates a default logger with debug mode enabled.
|
|
37
|
-
|
|
38
|
-
Returns:
|
|
39
|
-
logging.Logger:
|
|
40
|
-
Returns an instance of the ``Logger`` object.
|
|
41
|
-
"""
|
|
42
|
-
logger = logging.getLogger(__name__)
|
|
43
|
-
logger.addHandler(hdlr=default_handler())
|
|
44
|
-
logger.setLevel(level=logging.DEBUG)
|
|
45
|
-
return logger
|
|
File without changes
|
{s3 → pys3uploader}/tree.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|