PyS3Uploader 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of PyS3Uploader might be problematic. Click here for more details.
- {pys3uploader-0.1.1.dist-info → pys3uploader-0.2.0.dist-info}/METADATA +2 -2
- pys3uploader-0.2.0.dist-info/RECORD +11 -0
- s3/__init__.py +1 -1
- s3/uploader.py +75 -13
- s3/utils.py +40 -0
- pys3uploader-0.1.1.dist-info/RECORD +0 -11
- {pys3uploader-0.1.1.dist-info → pys3uploader-0.2.0.dist-info}/LICENSE +0 -0
- {pys3uploader-0.1.1.dist-info → pys3uploader-0.2.0.dist-info}/WHEEL +0 -0
- {pys3uploader-0.1.1.dist-info → pys3uploader-0.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: PyS3Uploader
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Python module to upload objects to an S3 bucket.
|
|
5
5
|
Author-email: Vignesh Rao <svignesh1793@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -63,7 +63,7 @@ Requires-Dist: recommonmark; extra == "dev"
|
|
|
63
63
|
|
|
64
64
|
**Activity**
|
|
65
65
|
|
|
66
|
-
[][repo]
|
|
67
67
|
[][repo]
|
|
68
68
|
[][repo]
|
|
69
69
|
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
s3/__init__.py,sha256=yLvvl4-uTLZwhdhCMQpWq5juX_zFuYAfKSf4aB0WjZw,66
|
|
2
|
+
s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
|
|
3
|
+
s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
|
|
4
|
+
s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
|
|
5
|
+
s3/uploader.py,sha256=IAlFrEjfBuexrfmBPGN9OZAfHjQuwcGRzWi2es0r_fU,11154
|
|
6
|
+
s3/utils.py,sha256=0kcG0aE2olHhC8thaUEwx2J8tOI2-2TGCk6E6U-PiKw,2058
|
|
7
|
+
pys3uploader-0.2.0.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
|
|
8
|
+
pys3uploader-0.2.0.dist-info/METADATA,sha256=IXSmHXJJndlnd_6MHlpZrcVILPni8VUbVNJYQEjMIR8,7286
|
|
9
|
+
pys3uploader-0.2.0.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
|
10
|
+
pys3uploader-0.2.0.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
|
|
11
|
+
pys3uploader-0.2.0.dist-info/RECORD,,
|
s3/__init__.py
CHANGED
s3/uploader.py
CHANGED
|
@@ -11,7 +11,7 @@ from tqdm import tqdm
|
|
|
11
11
|
|
|
12
12
|
from s3.exceptions import BucketNotFound
|
|
13
13
|
from s3.logger import default_logger
|
|
14
|
-
from s3.utils import UploadResults, getenv, urljoin
|
|
14
|
+
from s3.utils import UploadResults, convert_to_folder_structure, getenv, urljoin
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class Uploader:
|
|
@@ -29,6 +29,7 @@ class Uploader:
|
|
|
29
29
|
upload_dir: str,
|
|
30
30
|
s3_prefix: str = None,
|
|
31
31
|
exclude_path: str = None,
|
|
32
|
+
overwrite: bool = False,
|
|
32
33
|
region_name: str = None,
|
|
33
34
|
profile_name: str = None,
|
|
34
35
|
aws_access_key_id: str = None,
|
|
@@ -42,6 +43,7 @@ class Uploader:
|
|
|
42
43
|
upload_dir: Full path of the directory to be uploaded.
|
|
43
44
|
s3_prefix: Particular bucket prefix within which the upload should happen.
|
|
44
45
|
exclude_path: Full directory path to exclude from S3 object prefix.
|
|
46
|
+
overwrite: Boolean flag to overwrite files in S3.
|
|
45
47
|
region_name: Name of the AWS region.
|
|
46
48
|
profile_name: AWS profile name.
|
|
47
49
|
aws_access_key_id: AWS access key ID.
|
|
@@ -69,16 +71,24 @@ class Uploader:
|
|
|
69
71
|
aws_secret_access_key=aws_secret_access_key or getenv("AWS_SECRET_ACCESS_KEY"),
|
|
70
72
|
)
|
|
71
73
|
self.s3 = self.session.resource(service_name="s3", config=self.RETRY_CONFIG)
|
|
74
|
+
|
|
72
75
|
self.logger = logger or default_logger()
|
|
76
|
+
|
|
77
|
+
self.bucket_name = bucket_name
|
|
73
78
|
self.upload_dir = upload_dir or getenv("UPLOAD_DIR", "UPLOAD_SOURCE")
|
|
74
79
|
self.s3_prefix = s3_prefix
|
|
75
80
|
self.exclude_path = exclude_path
|
|
76
|
-
self.
|
|
77
|
-
|
|
78
|
-
self.bucket: boto3.resources.factory.s3.Bucket = None
|
|
81
|
+
self.overwrite = overwrite
|
|
82
|
+
|
|
79
83
|
self.results = UploadResults()
|
|
80
84
|
self.start = time.time()
|
|
81
85
|
|
|
86
|
+
# noinspection PyUnresolvedReferences
|
|
87
|
+
self.bucket: boto3.resources.factory.s3.Bucket = None
|
|
88
|
+
# noinspection PyUnresolvedReferences
|
|
89
|
+
self.bucket_objects: boto3.resources.factory.s3.ObjectSummary = []
|
|
90
|
+
self.object_size_map: Dict[str, int] = {}
|
|
91
|
+
|
|
82
92
|
def init(self) -> None:
|
|
83
93
|
"""Instantiates the bucket instance.
|
|
84
94
|
|
|
@@ -106,6 +116,9 @@ class Uploader:
|
|
|
106
116
|
self.upload_dir = os.path.abspath(self.upload_dir)
|
|
107
117
|
# noinspection PyUnresolvedReferences
|
|
108
118
|
self.bucket: boto3.resources.factory.s3.Bucket = self.s3.Bucket(self.bucket_name)
|
|
119
|
+
# noinspection PyUnresolvedReferences
|
|
120
|
+
self.bucket_objects: boto3.resources.factory.s3.ObjectSummary = [obj for obj in self.bucket.objects.all()]
|
|
121
|
+
self.object_size_map = {obj.key: obj.size for obj in self.bucket_objects}
|
|
109
122
|
|
|
110
123
|
def exit(self) -> None:
|
|
111
124
|
"""Exits after printing results, and run time."""
|
|
@@ -115,28 +128,59 @@ class Uploader:
|
|
|
115
128
|
)
|
|
116
129
|
self.logger.info("Run Time: %.2fs", time.time() - self.start)
|
|
117
130
|
|
|
118
|
-
def
|
|
131
|
+
def _proceed_to_upload(self, filepath: str, objectpath: str) -> bool:
|
|
132
|
+
"""Compares file size if the object already exists in S3.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
filepath: Source filepath.
|
|
136
|
+
objectpath: S3 object path.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
bool:
|
|
140
|
+
Returns a boolean flag to indicate upload flag.
|
|
141
|
+
"""
|
|
142
|
+
if self.overwrite:
|
|
143
|
+
return True
|
|
144
|
+
# Indicates that the object path already exists in S3
|
|
145
|
+
if object_size := self.object_size_map.get(objectpath):
|
|
146
|
+
try:
|
|
147
|
+
file_size = os.path.getsize(filepath)
|
|
148
|
+
except (OSError, PermissionError) as error:
|
|
149
|
+
self.logger.error(error)
|
|
150
|
+
return True
|
|
151
|
+
if object_size == file_size:
|
|
152
|
+
self.logger.info("S3 object %s exists, and size [%d] matches, skipping..", objectpath, object_size)
|
|
153
|
+
return False
|
|
154
|
+
self.logger.info(
|
|
155
|
+
"S3 object %s exists, but size mismatch. Local: [%d], S3: [%d]", objectpath, file_size, object_size
|
|
156
|
+
)
|
|
157
|
+
return True
|
|
158
|
+
|
|
159
|
+
def _uploader(self, filepath: str, objectpath: str) -> None:
|
|
119
160
|
"""Uploads the filepath to the specified S3 bucket.
|
|
120
161
|
|
|
121
162
|
Args:
|
|
122
|
-
objectpath: Object path ref in S3.
|
|
123
163
|
filepath: Filepath to upload.
|
|
164
|
+
objectpath: Object path ref in S3.
|
|
124
165
|
"""
|
|
125
|
-
self.
|
|
166
|
+
if self._proceed_to_upload(filepath, objectpath):
|
|
167
|
+
self.bucket.upload_file(filepath, objectpath)
|
|
126
168
|
|
|
127
169
|
def _get_files(self) -> Dict[str, str]:
|
|
128
170
|
"""Get a mapping for all the file path and object paths in upload directory.
|
|
129
171
|
|
|
130
172
|
Returns:
|
|
131
173
|
Dict[str, str]:
|
|
132
|
-
Returns a
|
|
174
|
+
Returns a key-value pair of filepath and objectpath.
|
|
133
175
|
"""
|
|
134
176
|
files_to_upload = {}
|
|
135
177
|
for __path, __directory, __files in os.walk(self.upload_dir):
|
|
136
178
|
for file_ in __files:
|
|
137
179
|
file_path = os.path.join(__path, file_)
|
|
138
180
|
if self.exclude_path:
|
|
139
|
-
|
|
181
|
+
relative_path = file_path.replace(self.exclude_path, "")
|
|
182
|
+
else:
|
|
183
|
+
relative_path = file_path
|
|
140
184
|
# Lists in python are ordered, so s3 prefix will get loaded first when provided
|
|
141
185
|
url_parts = []
|
|
142
186
|
if self.s3_prefix:
|
|
@@ -144,10 +188,10 @@ class Uploader:
|
|
|
144
188
|
self.s3_prefix.split(os.sep) if os.sep in self.s3_prefix else self.s3_prefix.split("/")
|
|
145
189
|
)
|
|
146
190
|
# Add rest of the file path to parts before normalizing as an S3 object URL
|
|
147
|
-
url_parts.extend(
|
|
191
|
+
url_parts.extend(relative_path.split(os.sep))
|
|
148
192
|
# Remove falsy values using filter - "None", "bool", "len" or "lambda item: item"
|
|
149
193
|
object_path = urljoin(*filter(None, url_parts))
|
|
150
|
-
files_to_upload[
|
|
194
|
+
files_to_upload[file_path] = object_path
|
|
151
195
|
return files_to_upload
|
|
152
196
|
|
|
153
197
|
def run(self) -> None:
|
|
@@ -161,7 +205,7 @@ class Uploader:
|
|
|
161
205
|
keys.items(), total=len(keys), unit="file", leave=True, desc=f"Uploading files from {self.upload_dir}"
|
|
162
206
|
):
|
|
163
207
|
try:
|
|
164
|
-
self._uploader(
|
|
208
|
+
self._uploader(filepath=filepath, objectpath=objectpath)
|
|
165
209
|
self.results.success += 1
|
|
166
210
|
except ClientError as error:
|
|
167
211
|
self.logger.error(error)
|
|
@@ -185,7 +229,10 @@ class Uploader:
|
|
|
185
229
|
max_workers,
|
|
186
230
|
)
|
|
187
231
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
188
|
-
futures = [
|
|
232
|
+
futures = [
|
|
233
|
+
executor.submit(self._uploader, **dict(filepath=filepath, objectpath=objectpath))
|
|
234
|
+
for filepath, objectpath in keys.items()
|
|
235
|
+
]
|
|
189
236
|
for future in tqdm(
|
|
190
237
|
iterable=as_completed(futures),
|
|
191
238
|
total=len(futures),
|
|
@@ -200,3 +247,18 @@ class Uploader:
|
|
|
200
247
|
self.logger.error(f"Upload failed: {error}")
|
|
201
248
|
self.results.failed += 1
|
|
202
249
|
self.exit()
|
|
250
|
+
|
|
251
|
+
def get_bucket_structure(self) -> str:
|
|
252
|
+
"""Gets all the objects in an S3 bucket and forms it into a hierarchical folder like representation.
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
str:
|
|
256
|
+
Returns a hierarchical folder like representation of the chosen bucket.
|
|
257
|
+
"""
|
|
258
|
+
self.init()
|
|
259
|
+
# Using list and set will yield the same results but using set we can isolate directories from files
|
|
260
|
+
return convert_to_folder_structure(set(obj.key for obj in self.bucket_objects))
|
|
261
|
+
|
|
262
|
+
def print_bucket_structure(self) -> None:
|
|
263
|
+
"""Prints all the objects in an S3 bucket with a folder like representation."""
|
|
264
|
+
print(self.get_bucket_structure())
|
s3/utils.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from typing import Dict, Set
|
|
2
3
|
|
|
3
4
|
|
|
4
5
|
class UploadResults(dict):
|
|
@@ -28,3 +29,42 @@ def urljoin(*args) -> str:
|
|
|
28
29
|
Joined url.
|
|
29
30
|
"""
|
|
30
31
|
return "/".join(map(lambda x: str(x).rstrip("/").lstrip("/"), args))
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def convert_to_folder_structure(sequence: Set[str]) -> str:
|
|
35
|
+
"""Convert objects in a s3 buckets into a folder like representation.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
sequence: Takes either a mutable or immutable sequence as an argument.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
str:
|
|
42
|
+
String representation of the architecture.
|
|
43
|
+
"""
|
|
44
|
+
folder_structure = {}
|
|
45
|
+
for item in sequence:
|
|
46
|
+
parts = item.split("/")
|
|
47
|
+
current_level = folder_structure
|
|
48
|
+
for part in parts:
|
|
49
|
+
current_level = current_level.setdefault(part, {})
|
|
50
|
+
|
|
51
|
+
def generate_folder_structure(structure: Dict[str, dict], indent: str = "") -> str:
|
|
52
|
+
"""Generates the folder like structure.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
structure: Structure of folder objects as key-value pairs.
|
|
56
|
+
indent: Required indentation for the ASCII.
|
|
57
|
+
"""
|
|
58
|
+
result = ""
|
|
59
|
+
for i, (key, value) in enumerate(structure.items()):
|
|
60
|
+
if i == len(structure) - 1:
|
|
61
|
+
result += indent + "└── " + key + "\n"
|
|
62
|
+
sub_indent = indent + " "
|
|
63
|
+
else:
|
|
64
|
+
result += indent + "├── " + key + "\n"
|
|
65
|
+
sub_indent = indent + "│ "
|
|
66
|
+
if value:
|
|
67
|
+
result += generate_folder_structure(value, sub_indent)
|
|
68
|
+
return result
|
|
69
|
+
|
|
70
|
+
return generate_folder_structure(folder_structure)
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
s3/__init__.py,sha256=XgYHKbn7gc5_nzydIKmKVjigeMtOBLqRHKHb8GJi5M4,66
|
|
2
|
-
s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
|
|
3
|
-
s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
|
|
4
|
-
s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
|
|
5
|
-
s3/uploader.py,sha256=tQaelL7grZSWFydZOekQgVz4Fipm0PHzbt2J17ddYHs,8563
|
|
6
|
-
s3/utils.py,sha256=pKVT2GbDGQKpFaHOmVrCfiQhvgr1vuSsITt_0gHguAA,687
|
|
7
|
-
pys3uploader-0.1.1.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
|
|
8
|
-
pys3uploader-0.1.1.dist-info/METADATA,sha256=sW_fsQxpoZ8f8ivI0Vb4oUXt1RSlFuHJDmpP9h_CXVU,7286
|
|
9
|
-
pys3uploader-0.1.1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
|
10
|
-
pys3uploader-0.1.1.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
|
|
11
|
-
pys3uploader-0.1.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|