PyS3Uploader 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of PyS3Uploader might be problematic. Click here for more details.
- {pys3uploader-0.1.2.dist-info → pys3uploader-0.2.0.dist-info}/METADATA +1 -1
- pys3uploader-0.2.0.dist-info/RECORD +11 -0
- s3/__init__.py +1 -1
- s3/uploader.py +56 -11
- pys3uploader-0.1.2.dist-info/RECORD +0 -11
- {pys3uploader-0.1.2.dist-info → pys3uploader-0.2.0.dist-info}/LICENSE +0 -0
- {pys3uploader-0.1.2.dist-info → pys3uploader-0.2.0.dist-info}/WHEEL +0 -0
- {pys3uploader-0.1.2.dist-info → pys3uploader-0.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
s3/__init__.py,sha256=yLvvl4-uTLZwhdhCMQpWq5juX_zFuYAfKSf4aB0WjZw,66
|
|
2
|
+
s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
|
|
3
|
+
s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
|
|
4
|
+
s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
|
|
5
|
+
s3/uploader.py,sha256=IAlFrEjfBuexrfmBPGN9OZAfHjQuwcGRzWi2es0r_fU,11154
|
|
6
|
+
s3/utils.py,sha256=0kcG0aE2olHhC8thaUEwx2J8tOI2-2TGCk6E6U-PiKw,2058
|
|
7
|
+
pys3uploader-0.2.0.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
|
|
8
|
+
pys3uploader-0.2.0.dist-info/METADATA,sha256=IXSmHXJJndlnd_6MHlpZrcVILPni8VUbVNJYQEjMIR8,7286
|
|
9
|
+
pys3uploader-0.2.0.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
|
10
|
+
pys3uploader-0.2.0.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
|
|
11
|
+
pys3uploader-0.2.0.dist-info/RECORD,,
|
s3/__init__.py
CHANGED
s3/uploader.py
CHANGED
|
@@ -29,6 +29,7 @@ class Uploader:
|
|
|
29
29
|
upload_dir: str,
|
|
30
30
|
s3_prefix: str = None,
|
|
31
31
|
exclude_path: str = None,
|
|
32
|
+
overwrite: bool = False,
|
|
32
33
|
region_name: str = None,
|
|
33
34
|
profile_name: str = None,
|
|
34
35
|
aws_access_key_id: str = None,
|
|
@@ -42,6 +43,7 @@ class Uploader:
|
|
|
42
43
|
upload_dir: Full path of the directory to be uploaded.
|
|
43
44
|
s3_prefix: Particular bucket prefix within which the upload should happen.
|
|
44
45
|
exclude_path: Full directory path to exclude from S3 object prefix.
|
|
46
|
+
overwrite: Boolean flag to overwrite files in S3.
|
|
45
47
|
region_name: Name of the AWS region.
|
|
46
48
|
profile_name: AWS profile name.
|
|
47
49
|
aws_access_key_id: AWS access key ID.
|
|
@@ -69,16 +71,24 @@ class Uploader:
|
|
|
69
71
|
aws_secret_access_key=aws_secret_access_key or getenv("AWS_SECRET_ACCESS_KEY"),
|
|
70
72
|
)
|
|
71
73
|
self.s3 = self.session.resource(service_name="s3", config=self.RETRY_CONFIG)
|
|
74
|
+
|
|
72
75
|
self.logger = logger or default_logger()
|
|
76
|
+
|
|
77
|
+
self.bucket_name = bucket_name
|
|
73
78
|
self.upload_dir = upload_dir or getenv("UPLOAD_DIR", "UPLOAD_SOURCE")
|
|
74
79
|
self.s3_prefix = s3_prefix
|
|
75
80
|
self.exclude_path = exclude_path
|
|
76
|
-
self.
|
|
77
|
-
|
|
78
|
-
self.bucket: boto3.resources.factory.s3.Bucket = None
|
|
81
|
+
self.overwrite = overwrite
|
|
82
|
+
|
|
79
83
|
self.results = UploadResults()
|
|
80
84
|
self.start = time.time()
|
|
81
85
|
|
|
86
|
+
# noinspection PyUnresolvedReferences
|
|
87
|
+
self.bucket: boto3.resources.factory.s3.Bucket = None
|
|
88
|
+
# noinspection PyUnresolvedReferences
|
|
89
|
+
self.bucket_objects: boto3.resources.factory.s3.ObjectSummary = []
|
|
90
|
+
self.object_size_map: Dict[str, int] = {}
|
|
91
|
+
|
|
82
92
|
def init(self) -> None:
|
|
83
93
|
"""Instantiates the bucket instance.
|
|
84
94
|
|
|
@@ -106,6 +116,9 @@ class Uploader:
|
|
|
106
116
|
self.upload_dir = os.path.abspath(self.upload_dir)
|
|
107
117
|
# noinspection PyUnresolvedReferences
|
|
108
118
|
self.bucket: boto3.resources.factory.s3.Bucket = self.s3.Bucket(self.bucket_name)
|
|
119
|
+
# noinspection PyUnresolvedReferences
|
|
120
|
+
self.bucket_objects: boto3.resources.factory.s3.ObjectSummary = [obj for obj in self.bucket.objects.all()]
|
|
121
|
+
self.object_size_map = {obj.key: obj.size for obj in self.bucket_objects}
|
|
109
122
|
|
|
110
123
|
def exit(self) -> None:
|
|
111
124
|
"""Exits after printing results, and run time."""
|
|
@@ -115,21 +128,50 @@ class Uploader:
|
|
|
115
128
|
)
|
|
116
129
|
self.logger.info("Run Time: %.2fs", time.time() - self.start)
|
|
117
130
|
|
|
118
|
-
def
|
|
131
|
+
def _proceed_to_upload(self, filepath: str, objectpath: str) -> bool:
|
|
132
|
+
"""Compares file size if the object already exists in S3.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
filepath: Source filepath.
|
|
136
|
+
objectpath: S3 object path.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
bool:
|
|
140
|
+
Returns a boolean flag to indicate upload flag.
|
|
141
|
+
"""
|
|
142
|
+
if self.overwrite:
|
|
143
|
+
return True
|
|
144
|
+
# Indicates that the object path already exists in S3
|
|
145
|
+
if object_size := self.object_size_map.get(objectpath):
|
|
146
|
+
try:
|
|
147
|
+
file_size = os.path.getsize(filepath)
|
|
148
|
+
except (OSError, PermissionError) as error:
|
|
149
|
+
self.logger.error(error)
|
|
150
|
+
return True
|
|
151
|
+
if object_size == file_size:
|
|
152
|
+
self.logger.info("S3 object %s exists, and size [%d] matches, skipping..", objectpath, object_size)
|
|
153
|
+
return False
|
|
154
|
+
self.logger.info(
|
|
155
|
+
"S3 object %s exists, but size mismatch. Local: [%d], S3: [%d]", objectpath, file_size, object_size
|
|
156
|
+
)
|
|
157
|
+
return True
|
|
158
|
+
|
|
159
|
+
def _uploader(self, filepath: str, objectpath: str) -> None:
|
|
119
160
|
"""Uploads the filepath to the specified S3 bucket.
|
|
120
161
|
|
|
121
162
|
Args:
|
|
122
|
-
objectpath: Object path ref in S3.
|
|
123
163
|
filepath: Filepath to upload.
|
|
164
|
+
objectpath: Object path ref in S3.
|
|
124
165
|
"""
|
|
125
|
-
self.
|
|
166
|
+
if self._proceed_to_upload(filepath, objectpath):
|
|
167
|
+
self.bucket.upload_file(filepath, objectpath)
|
|
126
168
|
|
|
127
169
|
def _get_files(self) -> Dict[str, str]:
|
|
128
170
|
"""Get a mapping for all the file path and object paths in upload directory.
|
|
129
171
|
|
|
130
172
|
Returns:
|
|
131
173
|
Dict[str, str]:
|
|
132
|
-
Returns a
|
|
174
|
+
Returns a key-value pair of filepath and objectpath.
|
|
133
175
|
"""
|
|
134
176
|
files_to_upload = {}
|
|
135
177
|
for __path, __directory, __files in os.walk(self.upload_dir):
|
|
@@ -149,7 +191,7 @@ class Uploader:
|
|
|
149
191
|
url_parts.extend(relative_path.split(os.sep))
|
|
150
192
|
# Remove falsy values using filter - "None", "bool", "len" or "lambda item: item"
|
|
151
193
|
object_path = urljoin(*filter(None, url_parts))
|
|
152
|
-
files_to_upload[
|
|
194
|
+
files_to_upload[file_path] = object_path
|
|
153
195
|
return files_to_upload
|
|
154
196
|
|
|
155
197
|
def run(self) -> None:
|
|
@@ -163,7 +205,7 @@ class Uploader:
|
|
|
163
205
|
keys.items(), total=len(keys), unit="file", leave=True, desc=f"Uploading files from {self.upload_dir}"
|
|
164
206
|
):
|
|
165
207
|
try:
|
|
166
|
-
self._uploader(
|
|
208
|
+
self._uploader(filepath=filepath, objectpath=objectpath)
|
|
167
209
|
self.results.success += 1
|
|
168
210
|
except ClientError as error:
|
|
169
211
|
self.logger.error(error)
|
|
@@ -187,7 +229,10 @@ class Uploader:
|
|
|
187
229
|
max_workers,
|
|
188
230
|
)
|
|
189
231
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
190
|
-
futures = [
|
|
232
|
+
futures = [
|
|
233
|
+
executor.submit(self._uploader, **dict(filepath=filepath, objectpath=objectpath))
|
|
234
|
+
for filepath, objectpath in keys.items()
|
|
235
|
+
]
|
|
191
236
|
for future in tqdm(
|
|
192
237
|
iterable=as_completed(futures),
|
|
193
238
|
total=len(futures),
|
|
@@ -212,7 +257,7 @@ class Uploader:
|
|
|
212
257
|
"""
|
|
213
258
|
self.init()
|
|
214
259
|
# Using list and set will yield the same results but using set we can isolate directories from files
|
|
215
|
-
return convert_to_folder_structure(set(
|
|
260
|
+
return convert_to_folder_structure(set(obj.key for obj in self.bucket_objects))
|
|
216
261
|
|
|
217
262
|
def print_bucket_structure(self) -> None:
|
|
218
263
|
"""Prints all the objects in an S3 bucket with a folder like representation."""
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
s3/__init__.py,sha256=qSltnC7r3AjwiYWzsD9JUs8SzeBEV16nrHldiWlrxtY,66
|
|
2
|
-
s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
|
|
3
|
-
s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
|
|
4
|
-
s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
|
|
5
|
-
s3/uploader.py,sha256=kkv7d2EaMH3OsoIJgTx7yRUd00s0n9PbRbjj6Rm7qdA,9355
|
|
6
|
-
s3/utils.py,sha256=0kcG0aE2olHhC8thaUEwx2J8tOI2-2TGCk6E6U-PiKw,2058
|
|
7
|
-
pys3uploader-0.1.2.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
|
|
8
|
-
pys3uploader-0.1.2.dist-info/METADATA,sha256=GtQq-ZDiZEMpl2CEs4VJw4AQ8tf5rzcfgjDu68oHX6c,7286
|
|
9
|
-
pys3uploader-0.1.2.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
|
10
|
-
pys3uploader-0.1.2.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
|
|
11
|
-
pys3uploader-0.1.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|