PyS3Uploader 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pys3uploader-0.1.0.dist-info → pys3uploader-0.1.2.dist-info}/METADATA +6 -5
- pys3uploader-0.1.2.dist-info/RECORD +11 -0
- s3/__init__.py +1 -1
- s3/uploader.py +62 -18
- s3/utils.py +42 -17
- pys3uploader-0.1.0.dist-info/RECORD +0 -11
- {pys3uploader-0.1.0.dist-info → pys3uploader-0.1.2.dist-info}/LICENSE +0 -0
- {pys3uploader-0.1.0.dist-info → pys3uploader-0.1.2.dist-info}/WHEEL +0 -0
- {pys3uploader-0.1.0.dist-info → pys3uploader-0.1.2.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: PyS3Uploader
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: Python module to upload objects to an S3 bucket.
|
|
5
5
|
Author-email: Vignesh Rao <svignesh1793@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -63,7 +63,7 @@ Requires-Dist: recommonmark; extra == "dev"
|
|
|
63
63
|
|
|
64
64
|
**Activity**
|
|
65
65
|
|
|
66
|
-
[][repo]
|
|
67
67
|
[][repo]
|
|
68
68
|
[][repo]
|
|
69
69
|
|
|
@@ -90,7 +90,7 @@ if __name__ == '__main__':
|
|
|
90
90
|
wrapper = s3.Uploader(
|
|
91
91
|
bucket_name="BUCKET_NAME",
|
|
92
92
|
upload_dir="FULL_PATH_TO_UPLOAD",
|
|
93
|
-
|
|
93
|
+
exclude_path="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
|
|
94
94
|
)
|
|
95
95
|
wrapper.run_in_parallel()
|
|
96
96
|
```
|
|
@@ -103,7 +103,7 @@ if __name__ == '__main__':
|
|
|
103
103
|
wrapper = s3.Uploader(
|
|
104
104
|
bucket_name="BUCKET_NAME",
|
|
105
105
|
upload_dir="FULL_PATH_TO_UPLOAD",
|
|
106
|
-
|
|
106
|
+
exclude_path="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
|
|
107
107
|
)
|
|
108
108
|
wrapper.run()
|
|
109
109
|
```
|
|
@@ -113,7 +113,8 @@ if __name__ == '__main__':
|
|
|
113
113
|
- **upload_dir** - Directory to upload.
|
|
114
114
|
|
|
115
115
|
#### Optional kwargs
|
|
116
|
-
- **
|
|
116
|
+
- **s3_prefix** - S3 object prefix for each file. Defaults to ``None``
|
|
117
|
+
- **exclude_path** - Path in ``upload_dir`` that has to be excluded in object keys. Defaults to `None`
|
|
117
118
|
- **logger** - Bring your own custom pre-configured logger. Defaults to on-screen logging.
|
|
118
119
|
<br><br>
|
|
119
120
|
- **region_name** - AWS region name. Defaults to the env var `AWS_DEFAULT_REGION`
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
s3/__init__.py,sha256=qSltnC7r3AjwiYWzsD9JUs8SzeBEV16nrHldiWlrxtY,66
|
|
2
|
+
s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
|
|
3
|
+
s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
|
|
4
|
+
s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
|
|
5
|
+
s3/uploader.py,sha256=kkv7d2EaMH3OsoIJgTx7yRUd00s0n9PbRbjj6Rm7qdA,9355
|
|
6
|
+
s3/utils.py,sha256=0kcG0aE2olHhC8thaUEwx2J8tOI2-2TGCk6E6U-PiKw,2058
|
|
7
|
+
pys3uploader-0.1.2.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
|
|
8
|
+
pys3uploader-0.1.2.dist-info/METADATA,sha256=GtQq-ZDiZEMpl2CEs4VJw4AQ8tf5rzcfgjDu68oHX6c,7286
|
|
9
|
+
pys3uploader-0.1.2.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
|
10
|
+
pys3uploader-0.1.2.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
|
|
11
|
+
pys3uploader-0.1.2.dist-info/RECORD,,
|
s3/__init__.py
CHANGED
s3/uploader.py
CHANGED
|
@@ -11,7 +11,7 @@ from tqdm import tqdm
|
|
|
11
11
|
|
|
12
12
|
from s3.exceptions import BucketNotFound
|
|
13
13
|
from s3.logger import default_logger
|
|
14
|
-
from s3.utils import UploadResults,
|
|
14
|
+
from s3.utils import UploadResults, convert_to_folder_structure, getenv, urljoin
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class Uploader:
|
|
@@ -27,7 +27,8 @@ class Uploader:
|
|
|
27
27
|
self,
|
|
28
28
|
bucket_name: str,
|
|
29
29
|
upload_dir: str,
|
|
30
|
-
|
|
30
|
+
s3_prefix: str = None,
|
|
31
|
+
exclude_path: str = None,
|
|
31
32
|
region_name: str = None,
|
|
32
33
|
profile_name: str = None,
|
|
33
34
|
aws_access_key_id: str = None,
|
|
@@ -38,13 +39,28 @@ class Uploader:
|
|
|
38
39
|
|
|
39
40
|
Args:
|
|
40
41
|
bucket_name: Name of the bucket.
|
|
41
|
-
upload_dir:
|
|
42
|
-
|
|
42
|
+
upload_dir: Full path of the directory to be uploaded.
|
|
43
|
+
s3_prefix: Particular bucket prefix within which the upload should happen.
|
|
44
|
+
exclude_path: Full directory path to exclude from S3 object prefix.
|
|
43
45
|
region_name: Name of the AWS region.
|
|
44
46
|
profile_name: AWS profile name.
|
|
45
47
|
aws_access_key_id: AWS access key ID.
|
|
46
48
|
aws_secret_access_key: AWS secret access key.
|
|
47
49
|
logger: Bring your own logger.
|
|
50
|
+
|
|
51
|
+
See Also:
|
|
52
|
+
exclude_path:
|
|
53
|
+
When upload directory is "/home/ubuntu/Desktop/S3Upload", each file will naturally have the full prefix.
|
|
54
|
+
However, this behavior can be avoided by specifying the ``exclude_path`` parameter.
|
|
55
|
+
|
|
56
|
+
If exclude_path is set to: ``/home/ubuntu/Desktop``, then the file path
|
|
57
|
+
``/home/ubuntu/Desktop/S3Upload/sub-dir/photo.jpg`` will be uploaded as ``S3Upload/sub-dir/photo.jpg``
|
|
58
|
+
|
|
59
|
+
s3_prefix:
|
|
60
|
+
If provided, ``s3_prefix`` will always be attached to each object.
|
|
61
|
+
|
|
62
|
+
If ``s3_prefix`` is set to: ``2025``, then the file path
|
|
63
|
+
``/home/ubuntu/Desktop/S3Upload/sub/photo.jpg`` will be uploaded as ``2025/S3Upload/sub/photo.jpg``
|
|
48
64
|
"""
|
|
49
65
|
self.session = boto3.Session(
|
|
50
66
|
profile_name=profile_name or getenv("PROFILE_NAME"),
|
|
@@ -54,8 +70,9 @@ class Uploader:
|
|
|
54
70
|
)
|
|
55
71
|
self.s3 = self.session.resource(service_name="s3", config=self.RETRY_CONFIG)
|
|
56
72
|
self.logger = logger or default_logger()
|
|
57
|
-
self.upload_dir = upload_dir or getenv("UPLOAD_DIR", "
|
|
58
|
-
self.
|
|
73
|
+
self.upload_dir = upload_dir or getenv("UPLOAD_DIR", "UPLOAD_SOURCE")
|
|
74
|
+
self.s3_prefix = s3_prefix
|
|
75
|
+
self.exclude_path = exclude_path
|
|
59
76
|
self.bucket_name = bucket_name
|
|
60
77
|
# noinspection PyUnresolvedReferences
|
|
61
78
|
self.bucket: boto3.resources.factory.s3.Bucket = None
|
|
@@ -70,9 +87,9 @@ class Uploader:
|
|
|
70
87
|
BucketNotFound: If bucket name was not found.
|
|
71
88
|
"""
|
|
72
89
|
self.start = time.time()
|
|
73
|
-
if self.
|
|
90
|
+
if self.exclude_path and self.exclude_path not in self.upload_dir:
|
|
74
91
|
raise ValueError(
|
|
75
|
-
f"\n\n\
|
|
92
|
+
f"\n\n\tStart folder {self.exclude_path!r} is not a part of upload directory {self.upload_dir!r}"
|
|
76
93
|
)
|
|
77
94
|
if not self.upload_dir:
|
|
78
95
|
raise ValueError("\n\n\tCannot proceed without an upload directory.")
|
|
@@ -87,7 +104,6 @@ class Uploader:
|
|
|
87
104
|
if self.bucket_name not in buckets:
|
|
88
105
|
raise BucketNotFound(f"\n\n\t{self.bucket_name} was not found in {_alias} account.\n\tAvailable: {buckets}")
|
|
89
106
|
self.upload_dir = os.path.abspath(self.upload_dir)
|
|
90
|
-
self.logger.info("Bucket objects from '%s' will be uploaded to '%s'", self.upload_dir, self.bucket_name)
|
|
91
107
|
# noinspection PyUnresolvedReferences
|
|
92
108
|
self.bucket: boto3.resources.factory.s3.Bucket = self.s3.Bucket(self.bucket_name)
|
|
93
109
|
|
|
@@ -119,14 +135,20 @@ class Uploader:
|
|
|
119
135
|
for __path, __directory, __files in os.walk(self.upload_dir):
|
|
120
136
|
for file_ in __files:
|
|
121
137
|
file_path = os.path.join(__path, file_)
|
|
122
|
-
if self.
|
|
123
|
-
|
|
124
|
-
object_path = get_object_path(file_path, self.prefix_dir)
|
|
125
|
-
except ValueError as error:
|
|
126
|
-
self.logger.error(error)
|
|
127
|
-
continue
|
|
138
|
+
if self.exclude_path:
|
|
139
|
+
relative_path = file_path.replace(self.exclude_path, "")
|
|
128
140
|
else:
|
|
129
|
-
|
|
141
|
+
relative_path = file_path
|
|
142
|
+
# Lists in python are ordered, so s3 prefix will get loaded first when provided
|
|
143
|
+
url_parts = []
|
|
144
|
+
if self.s3_prefix:
|
|
145
|
+
url_parts.extend(
|
|
146
|
+
self.s3_prefix.split(os.sep) if os.sep in self.s3_prefix else self.s3_prefix.split("/")
|
|
147
|
+
)
|
|
148
|
+
# Add rest of the file path to parts before normalizing as an S3 object URL
|
|
149
|
+
url_parts.extend(relative_path.split(os.sep))
|
|
150
|
+
# Remove falsy values using filter - "None", "bool", "len" or "lambda item: item"
|
|
151
|
+
object_path = urljoin(*filter(None, url_parts))
|
|
130
152
|
files_to_upload[object_path] = file_path
|
|
131
153
|
return files_to_upload
|
|
132
154
|
|
|
@@ -135,6 +157,7 @@ class Uploader:
|
|
|
135
157
|
self.init()
|
|
136
158
|
keys = self._get_files()
|
|
137
159
|
self.logger.debug(keys)
|
|
160
|
+
self.logger.info("%d files from '%s' will be uploaded to '%s'", len(keys), self.upload_dir, self.bucket_name)
|
|
138
161
|
self.logger.info("Initiating upload process.")
|
|
139
162
|
for objectpath, filepath in tqdm(
|
|
140
163
|
keys.items(), total=len(keys), unit="file", leave=True, desc=f"Uploading files from {self.upload_dir}"
|
|
@@ -154,9 +177,15 @@ class Uploader:
|
|
|
154
177
|
max_workers: Number of maximum threads to use.
|
|
155
178
|
"""
|
|
156
179
|
self.init()
|
|
157
|
-
self.logger.info(f"Number of threads: {max_workers}")
|
|
158
180
|
keys = self._get_files()
|
|
159
|
-
self.logger.
|
|
181
|
+
self.logger.debug(keys)
|
|
182
|
+
self.logger.info(
|
|
183
|
+
"%d files from '%s' will be uploaded to '%s' with maximum concurrency of: %d",
|
|
184
|
+
len(keys),
|
|
185
|
+
self.upload_dir,
|
|
186
|
+
self.bucket_name,
|
|
187
|
+
max_workers,
|
|
188
|
+
)
|
|
160
189
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
161
190
|
futures = [executor.submit(self._uploader, *kv) for kv in keys.items()]
|
|
162
191
|
for future in tqdm(
|
|
@@ -173,3 +202,18 @@ class Uploader:
|
|
|
173
202
|
self.logger.error(f"Upload failed: {error}")
|
|
174
203
|
self.results.failed += 1
|
|
175
204
|
self.exit()
|
|
205
|
+
|
|
206
|
+
def get_bucket_structure(self) -> str:
|
|
207
|
+
"""Gets all the objects in an S3 bucket and forms it into a hierarchical folder like representation.
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
str:
|
|
211
|
+
Returns a hierarchical folder like representation of the chosen bucket.
|
|
212
|
+
"""
|
|
213
|
+
self.init()
|
|
214
|
+
# Using list and set will yield the same results but using set we can isolate directories from files
|
|
215
|
+
return convert_to_folder_structure(set([obj.key for obj in self.bucket.objects.all()]))
|
|
216
|
+
|
|
217
|
+
def print_bucket_structure(self) -> None:
|
|
218
|
+
"""Prints all the objects in an S3 bucket with a folder like representation."""
|
|
219
|
+
print(self.get_bucket_structure())
|
s3/utils.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from typing import Dict, Set
|
|
2
3
|
|
|
3
4
|
|
|
4
5
|
class UploadResults(dict):
|
|
@@ -20,26 +21,50 @@ def getenv(*args, default: str = None) -> str:
|
|
|
20
21
|
return default
|
|
21
22
|
|
|
22
23
|
|
|
23
|
-
def
|
|
24
|
-
"""
|
|
24
|
+
def urljoin(*args) -> str:
|
|
25
|
+
"""Joins given arguments into a url. Trailing but not leading slashes are stripped for each argument.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
str:
|
|
29
|
+
Joined url.
|
|
30
|
+
"""
|
|
31
|
+
return "/".join(map(lambda x: str(x).rstrip("/").lstrip("/"), args))
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def convert_to_folder_structure(sequence: Set[str]) -> str:
|
|
35
|
+
"""Convert objects in a s3 buckets into a folder like representation.
|
|
25
36
|
|
|
26
37
|
Args:
|
|
27
|
-
|
|
28
|
-
start_folder_name: Folder name to begin object path.
|
|
38
|
+
sequence: Takes either a mutable or immutable sequence as an argument.
|
|
29
39
|
|
|
30
40
|
Returns:
|
|
31
41
|
str:
|
|
32
|
-
|
|
42
|
+
String representation of the architecture.
|
|
33
43
|
"""
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
44
|
+
folder_structure = {}
|
|
45
|
+
for item in sequence:
|
|
46
|
+
parts = item.split("/")
|
|
47
|
+
current_level = folder_structure
|
|
48
|
+
for part in parts:
|
|
49
|
+
current_level = current_level.setdefault(part, {})
|
|
50
|
+
|
|
51
|
+
def generate_folder_structure(structure: Dict[str, dict], indent: str = "") -> str:
|
|
52
|
+
"""Generates the folder like structure.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
structure: Structure of folder objects as key-value pairs.
|
|
56
|
+
indent: Required indentation for the ASCII.
|
|
57
|
+
"""
|
|
58
|
+
result = ""
|
|
59
|
+
for i, (key, value) in enumerate(structure.items()):
|
|
60
|
+
if i == len(structure) - 1:
|
|
61
|
+
result += indent + "└── " + key + "\n"
|
|
62
|
+
sub_indent = indent + " "
|
|
63
|
+
else:
|
|
64
|
+
result += indent + "├── " + key + "\n"
|
|
65
|
+
sub_indent = indent + "│ "
|
|
66
|
+
if value:
|
|
67
|
+
result += generate_folder_structure(value, sub_indent)
|
|
68
|
+
return result
|
|
69
|
+
|
|
70
|
+
return generate_folder_structure(folder_structure)
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
s3/__init__.py,sha256=zSLbLrsnVD-tRtiiTcT3JDWzmpnSC9mP6uHMXt2cyEc,66
|
|
2
|
-
s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
|
|
3
|
-
s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
|
|
4
|
-
s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
|
|
5
|
-
s3/uploader.py,sha256=Z2EvtUlR5jlL1xbeQWj4XLBfhTn4yWPm9E8WhPcz6Qk,7056
|
|
6
|
-
s3/utils.py,sha256=swkdwkfn43e8I3dGL9HAGZ-dba3fIeorihVAjTE07wc,1291
|
|
7
|
-
pys3uploader-0.1.0.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
|
|
8
|
-
pys3uploader-0.1.0.dist-info/METADATA,sha256=gkAfOF-hEXYfW9p0PZiJhcrpLjhNorK7LFZNtU_ybrE,7188
|
|
9
|
-
pys3uploader-0.1.0.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
|
10
|
-
pys3uploader-0.1.0.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
|
|
11
|
-
pys3uploader-0.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|