PyS3Uploader 0.1.2__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of PyS3Uploader might be problematic. Click here for more details.
- {pys3uploader-0.1.2.dist-info → pys3uploader-0.2.1.dist-info}/METADATA +3 -1
- pys3uploader-0.2.1.dist-info/RECORD +11 -0
- s3/__init__.py +1 -1
- s3/uploader.py +77 -21
- s3/utils.py +14 -0
- pys3uploader-0.1.2.dist-info/RECORD +0 -11
- {pys3uploader-0.1.2.dist-info → pys3uploader-0.2.1.dist-info}/LICENSE +0 -0
- {pys3uploader-0.1.2.dist-info → pys3uploader-0.2.1.dist-info}/WHEEL +0 -0
- {pys3uploader-0.1.2.dist-info → pys3uploader-0.2.1.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: PyS3Uploader
|
|
3
|
-
Version: 0.1
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: Python module to upload objects to an S3 bucket.
|
|
5
5
|
Author-email: Vignesh Rao <svignesh1793@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -115,6 +115,8 @@ if __name__ == '__main__':
|
|
|
115
115
|
#### Optional kwargs
|
|
116
116
|
- **s3_prefix** - S3 object prefix for each file. Defaults to ``None``
|
|
117
117
|
- **exclude_path** - Path in ``upload_dir`` that has to be excluded in object keys. Defaults to `None`
|
|
118
|
+
- **skip_dot_files** - Boolean flag to skip dot files. Defaults to ``True``
|
|
119
|
+
- **overwrite** - Boolean flag to overwrite files present in S3. Defaults to ``False``
|
|
118
120
|
- **logger** - Bring your own custom pre-configured logger. Defaults to on-screen logging.
|
|
119
121
|
<br><br>
|
|
120
122
|
- **region_name** - AWS region name. Defaults to the env var `AWS_DEFAULT_REGION`
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
s3/__init__.py,sha256=IqcPR9iWMw0GDBEmKvLzW7P-AhInTkwRklkvYgiT1Xc,66
|
|
2
|
+
s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
|
|
3
|
+
s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
|
|
4
|
+
s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
|
|
5
|
+
s3/uploader.py,sha256=v6TGdm8EcAldoIw3GOAwkuoUzi9b9IRj8b94hl1Pkyw,11454
|
|
6
|
+
s3/utils.py,sha256=dd1OeLbswLzFVyjYiXixkJlFsoGWRtRCOHha6wLG5zQ,2485
|
|
7
|
+
pys3uploader-0.2.1.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
|
|
8
|
+
pys3uploader-0.2.1.dist-info/METADATA,sha256=NA7x6YqpWEKvn3XEYzVug7XTl1vPcwiphUzWtMvlzHE,7449
|
|
9
|
+
pys3uploader-0.2.1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
|
10
|
+
pys3uploader-0.2.1.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
|
|
11
|
+
pys3uploader-0.2.1.dist-info/RECORD,,
|
s3/__init__.py
CHANGED
s3/uploader.py
CHANGED
|
@@ -11,7 +11,13 @@ from tqdm import tqdm
|
|
|
11
11
|
|
|
12
12
|
from s3.exceptions import BucketNotFound
|
|
13
13
|
from s3.logger import default_logger
|
|
14
|
-
from s3.utils import
|
|
14
|
+
from s3.utils import (
|
|
15
|
+
RETRY_CONFIG,
|
|
16
|
+
UploadResults,
|
|
17
|
+
convert_to_folder_structure,
|
|
18
|
+
getenv,
|
|
19
|
+
urljoin,
|
|
20
|
+
)
|
|
15
21
|
|
|
16
22
|
|
|
17
23
|
class Uploader:
|
|
@@ -21,18 +27,19 @@ class Uploader:
|
|
|
21
27
|
|
|
22
28
|
"""
|
|
23
29
|
|
|
24
|
-
RETRY_CONFIG: Config = Config(retries={"max_attempts": 10, "mode": "standard"})
|
|
25
|
-
|
|
26
30
|
def __init__(
|
|
27
31
|
self,
|
|
28
32
|
bucket_name: str,
|
|
29
33
|
upload_dir: str,
|
|
30
34
|
s3_prefix: str = None,
|
|
31
35
|
exclude_path: str = None,
|
|
36
|
+
skip_dot_files: bool = True,
|
|
37
|
+
overwrite: bool = False,
|
|
32
38
|
region_name: str = None,
|
|
33
39
|
profile_name: str = None,
|
|
34
40
|
aws_access_key_id: str = None,
|
|
35
41
|
aws_secret_access_key: str = None,
|
|
42
|
+
retry_config: Config = RETRY_CONFIG,
|
|
36
43
|
logger: logging.Logger = None,
|
|
37
44
|
):
|
|
38
45
|
"""Initiates all the necessary args and creates a boto3 session with retry logic.
|
|
@@ -42,6 +49,8 @@ class Uploader:
|
|
|
42
49
|
upload_dir: Full path of the directory to be uploaded.
|
|
43
50
|
s3_prefix: Particular bucket prefix within which the upload should happen.
|
|
44
51
|
exclude_path: Full directory path to exclude from S3 object prefix.
|
|
52
|
+
skip_dot_files: Boolean flag to skip dot files.
|
|
53
|
+
overwrite: Boolean flag to overwrite files in S3.
|
|
45
54
|
region_name: Name of the AWS region.
|
|
46
55
|
profile_name: AWS profile name.
|
|
47
56
|
aws_access_key_id: AWS access key ID.
|
|
@@ -49,18 +58,18 @@ class Uploader:
|
|
|
49
58
|
logger: Bring your own logger.
|
|
50
59
|
|
|
51
60
|
See Also:
|
|
61
|
+
s3_prefix:
|
|
62
|
+
If provided, ``s3_prefix`` will always be attached to each object.
|
|
63
|
+
|
|
64
|
+
If ``s3_prefix`` is set to: ``2025``, then the file path
|
|
65
|
+
``/home/ubuntu/Desktop/S3Upload/sub/photo.jpg`` will be uploaded as ``2025/S3Upload/sub/photo.jpg``
|
|
66
|
+
|
|
52
67
|
exclude_path:
|
|
53
68
|
When upload directory is "/home/ubuntu/Desktop/S3Upload", each file will naturally have the full prefix.
|
|
54
69
|
However, this behavior can be avoided by specifying the ``exclude_path`` parameter.
|
|
55
70
|
|
|
56
71
|
If exclude_path is set to: ``/home/ubuntu/Desktop``, then the file path
|
|
57
72
|
``/home/ubuntu/Desktop/S3Upload/sub-dir/photo.jpg`` will be uploaded as ``S3Upload/sub-dir/photo.jpg``
|
|
58
|
-
|
|
59
|
-
s3_prefix:
|
|
60
|
-
If provided, ``s3_prefix`` will always be attached to each object.
|
|
61
|
-
|
|
62
|
-
If ``s3_prefix`` is set to: ``2025``, then the file path
|
|
63
|
-
``/home/ubuntu/Desktop/S3Upload/sub/photo.jpg`` will be uploaded as ``2025/S3Upload/sub/photo.jpg``
|
|
64
73
|
"""
|
|
65
74
|
self.session = boto3.Session(
|
|
66
75
|
profile_name=profile_name or getenv("PROFILE_NAME"),
|
|
@@ -68,17 +77,26 @@ class Uploader:
|
|
|
68
77
|
aws_access_key_id=aws_access_key_id or getenv("AWS_ACCESS_KEY_ID"),
|
|
69
78
|
aws_secret_access_key=aws_secret_access_key or getenv("AWS_SECRET_ACCESS_KEY"),
|
|
70
79
|
)
|
|
71
|
-
self.s3 = self.session.resource(service_name="s3", config=
|
|
80
|
+
self.s3 = self.session.resource(service_name="s3", config=retry_config)
|
|
81
|
+
|
|
72
82
|
self.logger = logger or default_logger()
|
|
83
|
+
|
|
84
|
+
self.bucket_name = bucket_name
|
|
73
85
|
self.upload_dir = upload_dir or getenv("UPLOAD_DIR", "UPLOAD_SOURCE")
|
|
74
86
|
self.s3_prefix = s3_prefix
|
|
75
87
|
self.exclude_path = exclude_path
|
|
76
|
-
self.
|
|
77
|
-
|
|
78
|
-
|
|
88
|
+
self.skip_dot_files = skip_dot_files
|
|
89
|
+
self.overwrite = overwrite
|
|
90
|
+
|
|
79
91
|
self.results = UploadResults()
|
|
80
92
|
self.start = time.time()
|
|
81
93
|
|
|
94
|
+
# noinspection PyUnresolvedReferences
|
|
95
|
+
self.bucket: boto3.resources.factory.s3.Bucket = None
|
|
96
|
+
# noinspection PyUnresolvedReferences
|
|
97
|
+
self.bucket_objects: boto3.resources.factory.s3.ObjectSummary = []
|
|
98
|
+
self.object_size_map: Dict[str, int] = {}
|
|
99
|
+
|
|
82
100
|
def init(self) -> None:
|
|
83
101
|
"""Instantiates the bucket instance.
|
|
84
102
|
|
|
@@ -106,6 +124,9 @@ class Uploader:
|
|
|
106
124
|
self.upload_dir = os.path.abspath(self.upload_dir)
|
|
107
125
|
# noinspection PyUnresolvedReferences
|
|
108
126
|
self.bucket: boto3.resources.factory.s3.Bucket = self.s3.Bucket(self.bucket_name)
|
|
127
|
+
# noinspection PyUnresolvedReferences
|
|
128
|
+
self.bucket_objects: boto3.resources.factory.s3.ObjectSummary = [obj for obj in self.bucket.objects.all()]
|
|
129
|
+
self.object_size_map = {obj.key: obj.size for obj in self.bucket_objects}
|
|
109
130
|
|
|
110
131
|
def exit(self) -> None:
|
|
111
132
|
"""Exits after printing results, and run time."""
|
|
@@ -115,25 +136,57 @@ class Uploader:
|
|
|
115
136
|
)
|
|
116
137
|
self.logger.info("Run Time: %.2fs", time.time() - self.start)
|
|
117
138
|
|
|
118
|
-
def
|
|
139
|
+
def _proceed_to_upload(self, filepath: str, objectpath: str) -> bool:
|
|
140
|
+
"""Compares file size if the object already exists in S3.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
filepath: Source filepath.
|
|
144
|
+
objectpath: S3 object path.
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
bool:
|
|
148
|
+
Returns a boolean flag to indicate upload flag.
|
|
149
|
+
"""
|
|
150
|
+
if self.overwrite:
|
|
151
|
+
return True
|
|
152
|
+
# Indicates that the object path already exists in S3
|
|
153
|
+
if object_size := self.object_size_map.get(objectpath):
|
|
154
|
+
try:
|
|
155
|
+
file_size = os.path.getsize(filepath)
|
|
156
|
+
except (OSError, PermissionError) as error:
|
|
157
|
+
self.logger.error(error)
|
|
158
|
+
return True
|
|
159
|
+
if object_size == file_size:
|
|
160
|
+
self.logger.info("S3 object %s exists, and size [%d] matches, skipping..", objectpath, object_size)
|
|
161
|
+
return False
|
|
162
|
+
self.logger.info(
|
|
163
|
+
"S3 object %s exists, but size mismatch. Local: [%d], S3: [%d]", objectpath, file_size, object_size
|
|
164
|
+
)
|
|
165
|
+
return True
|
|
166
|
+
|
|
167
|
+
def _uploader(self, filepath: str, objectpath: str) -> None:
|
|
119
168
|
"""Uploads the filepath to the specified S3 bucket.
|
|
120
169
|
|
|
121
170
|
Args:
|
|
122
|
-
objectpath: Object path ref in S3.
|
|
123
171
|
filepath: Filepath to upload.
|
|
172
|
+
objectpath: Object path ref in S3.
|
|
124
173
|
"""
|
|
125
|
-
self.
|
|
174
|
+
if self._proceed_to_upload(filepath, objectpath):
|
|
175
|
+
self.bucket.upload_file(filepath, objectpath)
|
|
126
176
|
|
|
127
177
|
def _get_files(self) -> Dict[str, str]:
|
|
128
178
|
"""Get a mapping for all the file path and object paths in upload directory.
|
|
129
179
|
|
|
130
180
|
Returns:
|
|
131
181
|
Dict[str, str]:
|
|
132
|
-
Returns a
|
|
182
|
+
Returns a key-value pair of filepath and objectpath.
|
|
133
183
|
"""
|
|
134
184
|
files_to_upload = {}
|
|
135
185
|
for __path, __directory, __files in os.walk(self.upload_dir):
|
|
136
186
|
for file_ in __files:
|
|
187
|
+
if self.skip_dot_files and file_.startswith("."):
|
|
188
|
+
self.logger.info("Skipping dot file: %s", file_)
|
|
189
|
+
continue
|
|
137
190
|
file_path = os.path.join(__path, file_)
|
|
138
191
|
if self.exclude_path:
|
|
139
192
|
relative_path = file_path.replace(self.exclude_path, "")
|
|
@@ -149,7 +202,7 @@ class Uploader:
|
|
|
149
202
|
url_parts.extend(relative_path.split(os.sep))
|
|
150
203
|
# Remove falsy values using filter - "None", "bool", "len" or "lambda item: item"
|
|
151
204
|
object_path = urljoin(*filter(None, url_parts))
|
|
152
|
-
files_to_upload[
|
|
205
|
+
files_to_upload[file_path] = object_path
|
|
153
206
|
return files_to_upload
|
|
154
207
|
|
|
155
208
|
def run(self) -> None:
|
|
@@ -163,7 +216,7 @@ class Uploader:
|
|
|
163
216
|
keys.items(), total=len(keys), unit="file", leave=True, desc=f"Uploading files from {self.upload_dir}"
|
|
164
217
|
):
|
|
165
218
|
try:
|
|
166
|
-
self._uploader(
|
|
219
|
+
self._uploader(filepath=filepath, objectpath=objectpath)
|
|
167
220
|
self.results.success += 1
|
|
168
221
|
except ClientError as error:
|
|
169
222
|
self.logger.error(error)
|
|
@@ -187,7 +240,10 @@ class Uploader:
|
|
|
187
240
|
max_workers,
|
|
188
241
|
)
|
|
189
242
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
190
|
-
futures = [
|
|
243
|
+
futures = [
|
|
244
|
+
executor.submit(self._uploader, **dict(filepath=filepath, objectpath=objectpath))
|
|
245
|
+
for filepath, objectpath in keys.items()
|
|
246
|
+
]
|
|
191
247
|
for future in tqdm(
|
|
192
248
|
iterable=as_completed(futures),
|
|
193
249
|
total=len(futures),
|
|
@@ -212,7 +268,7 @@ class Uploader:
|
|
|
212
268
|
"""
|
|
213
269
|
self.init()
|
|
214
270
|
# Using list and set will yield the same results but using set we can isolate directories from files
|
|
215
|
-
return convert_to_folder_structure(set(
|
|
271
|
+
return convert_to_folder_structure(set(obj.key for obj in self.bucket_objects))
|
|
216
272
|
|
|
217
273
|
def print_bucket_structure(self) -> None:
|
|
218
274
|
"""Prints all the objects in an S3 bucket with a folder like representation."""
|
s3/utils.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from typing import Dict, Set
|
|
3
3
|
|
|
4
|
+
from botocore.config import Config
|
|
5
|
+
|
|
4
6
|
|
|
5
7
|
class UploadResults(dict):
|
|
6
8
|
"""Object to store results of S3 upload.
|
|
@@ -13,6 +15,18 @@ class UploadResults(dict):
|
|
|
13
15
|
failed: int = 0
|
|
14
16
|
|
|
15
17
|
|
|
18
|
+
RETRY_CONFIG: Config = Config(
|
|
19
|
+
retries={
|
|
20
|
+
"max_attempts": 10,
|
|
21
|
+
"mode": "adaptive", # Adaptive retry mode with jitter
|
|
22
|
+
"total_max_attempts": 20, # Max retries across all requests
|
|
23
|
+
},
|
|
24
|
+
# Adding custom timeouts here:
|
|
25
|
+
connect_timeout=5, # 5 seconds for establishing a connection
|
|
26
|
+
read_timeout=30, # 30 seconds to wait for a response from the server
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
16
30
|
def getenv(*args, default: str = None) -> str:
|
|
17
31
|
"""Returns the key-ed environment variable or the default value."""
|
|
18
32
|
for key in args:
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
s3/__init__.py,sha256=qSltnC7r3AjwiYWzsD9JUs8SzeBEV16nrHldiWlrxtY,66
|
|
2
|
-
s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
|
|
3
|
-
s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
|
|
4
|
-
s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
|
|
5
|
-
s3/uploader.py,sha256=kkv7d2EaMH3OsoIJgTx7yRUd00s0n9PbRbjj6Rm7qdA,9355
|
|
6
|
-
s3/utils.py,sha256=0kcG0aE2olHhC8thaUEwx2J8tOI2-2TGCk6E6U-PiKw,2058
|
|
7
|
-
pys3uploader-0.1.2.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
|
|
8
|
-
pys3uploader-0.1.2.dist-info/METADATA,sha256=GtQq-ZDiZEMpl2CEs4VJw4AQ8tf5rzcfgjDu68oHX6c,7286
|
|
9
|
-
pys3uploader-0.1.2.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
|
10
|
-
pys3uploader-0.1.2.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
|
|
11
|
-
pys3uploader-0.1.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|