PyS3Uploader 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of PyS3Uploader might be problematic. Click here for more details.
- {pys3uploader-0.1.0.dist-info → pys3uploader-0.1.1.dist-info}/METADATA +5 -4
- pys3uploader-0.1.1.dist-info/RECORD +11 -0
- s3/__init__.py +1 -1
- s3/uploader.py +46 -19
- s3/utils.py +4 -19
- pys3uploader-0.1.0.dist-info/RECORD +0 -11
- {pys3uploader-0.1.0.dist-info → pys3uploader-0.1.1.dist-info}/LICENSE +0 -0
- {pys3uploader-0.1.0.dist-info → pys3uploader-0.1.1.dist-info}/WHEEL +0 -0
- {pys3uploader-0.1.0.dist-info → pys3uploader-0.1.1.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: PyS3Uploader
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.1
|
|
4
4
|
Summary: Python module to upload objects to an S3 bucket.
|
|
5
5
|
Author-email: Vignesh Rao <svignesh1793@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -90,7 +90,7 @@ if __name__ == '__main__':
|
|
|
90
90
|
wrapper = s3.Uploader(
|
|
91
91
|
bucket_name="BUCKET_NAME",
|
|
92
92
|
upload_dir="FULL_PATH_TO_UPLOAD",
|
|
93
|
-
|
|
93
|
+
exclude_path="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
|
|
94
94
|
)
|
|
95
95
|
wrapper.run_in_parallel()
|
|
96
96
|
```
|
|
@@ -103,7 +103,7 @@ if __name__ == '__main__':
|
|
|
103
103
|
wrapper = s3.Uploader(
|
|
104
104
|
bucket_name="BUCKET_NAME",
|
|
105
105
|
upload_dir="FULL_PATH_TO_UPLOAD",
|
|
106
|
-
|
|
106
|
+
exclude_path="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
|
|
107
107
|
)
|
|
108
108
|
wrapper.run()
|
|
109
109
|
```
|
|
@@ -113,7 +113,8 @@ if __name__ == '__main__':
|
|
|
113
113
|
- **upload_dir** - Directory to upload.
|
|
114
114
|
|
|
115
115
|
#### Optional kwargs
|
|
116
|
-
- **
|
|
116
|
+
- **s3_prefix** - S3 object prefix for each file. Defaults to ``None``
|
|
117
|
+
- **exclude_path** - Path in ``upload_dir`` that has to be excluded in object keys. Defaults to `None`
|
|
117
118
|
- **logger** - Bring your own custom pre-configured logger. Defaults to on-screen logging.
|
|
118
119
|
<br><br>
|
|
119
120
|
- **region_name** - AWS region name. Defaults to the env var `AWS_DEFAULT_REGION`
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
s3/__init__.py,sha256=XgYHKbn7gc5_nzydIKmKVjigeMtOBLqRHKHb8GJi5M4,66
|
|
2
|
+
s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
|
|
3
|
+
s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
|
|
4
|
+
s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
|
|
5
|
+
s3/uploader.py,sha256=tQaelL7grZSWFydZOekQgVz4Fipm0PHzbt2J17ddYHs,8563
|
|
6
|
+
s3/utils.py,sha256=pKVT2GbDGQKpFaHOmVrCfiQhvgr1vuSsITt_0gHguAA,687
|
|
7
|
+
pys3uploader-0.1.1.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
|
|
8
|
+
pys3uploader-0.1.1.dist-info/METADATA,sha256=sW_fsQxpoZ8f8ivI0Vb4oUXt1RSlFuHJDmpP9h_CXVU,7286
|
|
9
|
+
pys3uploader-0.1.1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
|
10
|
+
pys3uploader-0.1.1.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
|
|
11
|
+
pys3uploader-0.1.1.dist-info/RECORD,,
|
s3/__init__.py
CHANGED
s3/uploader.py
CHANGED
|
@@ -11,7 +11,7 @@ from tqdm import tqdm
|
|
|
11
11
|
|
|
12
12
|
from s3.exceptions import BucketNotFound
|
|
13
13
|
from s3.logger import default_logger
|
|
14
|
-
from s3.utils import UploadResults,
|
|
14
|
+
from s3.utils import UploadResults, getenv, urljoin
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class Uploader:
|
|
@@ -27,7 +27,8 @@ class Uploader:
|
|
|
27
27
|
self,
|
|
28
28
|
bucket_name: str,
|
|
29
29
|
upload_dir: str,
|
|
30
|
-
|
|
30
|
+
s3_prefix: str = None,
|
|
31
|
+
exclude_path: str = None,
|
|
31
32
|
region_name: str = None,
|
|
32
33
|
profile_name: str = None,
|
|
33
34
|
aws_access_key_id: str = None,
|
|
@@ -38,13 +39,28 @@ class Uploader:
|
|
|
38
39
|
|
|
39
40
|
Args:
|
|
40
41
|
bucket_name: Name of the bucket.
|
|
41
|
-
upload_dir:
|
|
42
|
-
|
|
42
|
+
upload_dir: Full path of the directory to be uploaded.
|
|
43
|
+
s3_prefix: Particular bucket prefix within which the upload should happen.
|
|
44
|
+
exclude_path: Full directory path to exclude from S3 object prefix.
|
|
43
45
|
region_name: Name of the AWS region.
|
|
44
46
|
profile_name: AWS profile name.
|
|
45
47
|
aws_access_key_id: AWS access key ID.
|
|
46
48
|
aws_secret_access_key: AWS secret access key.
|
|
47
49
|
logger: Bring your own logger.
|
|
50
|
+
|
|
51
|
+
See Also:
|
|
52
|
+
exclude_path:
|
|
53
|
+
When upload directory is "/home/ubuntu/Desktop/S3Upload", each file will naturally have the full prefix.
|
|
54
|
+
However, this behavior can be avoided by specifying the ``exclude_path`` parameter.
|
|
55
|
+
|
|
56
|
+
If exclude_path is set to: ``/home/ubuntu/Desktop``, then the file path
|
|
57
|
+
``/home/ubuntu/Desktop/S3Upload/sub-dir/photo.jpg`` will be uploaded as ``S3Upload/sub-dir/photo.jpg``
|
|
58
|
+
|
|
59
|
+
s3_prefix:
|
|
60
|
+
If provided, ``s3_prefix`` will always be attached to each object.
|
|
61
|
+
|
|
62
|
+
If ``s3_prefix`` is set to: ``2025``, then the file path
|
|
63
|
+
``/home/ubuntu/Desktop/S3Upload/sub/photo.jpg`` will be uploaded as ``2025/S3Upload/sub/photo.jpg``
|
|
48
64
|
"""
|
|
49
65
|
self.session = boto3.Session(
|
|
50
66
|
profile_name=profile_name or getenv("PROFILE_NAME"),
|
|
@@ -54,8 +70,9 @@ class Uploader:
|
|
|
54
70
|
)
|
|
55
71
|
self.s3 = self.session.resource(service_name="s3", config=self.RETRY_CONFIG)
|
|
56
72
|
self.logger = logger or default_logger()
|
|
57
|
-
self.upload_dir = upload_dir or getenv("UPLOAD_DIR", "
|
|
58
|
-
self.
|
|
73
|
+
self.upload_dir = upload_dir or getenv("UPLOAD_DIR", "UPLOAD_SOURCE")
|
|
74
|
+
self.s3_prefix = s3_prefix
|
|
75
|
+
self.exclude_path = exclude_path
|
|
59
76
|
self.bucket_name = bucket_name
|
|
60
77
|
# noinspection PyUnresolvedReferences
|
|
61
78
|
self.bucket: boto3.resources.factory.s3.Bucket = None
|
|
@@ -70,9 +87,9 @@ class Uploader:
|
|
|
70
87
|
BucketNotFound: If bucket name was not found.
|
|
71
88
|
"""
|
|
72
89
|
self.start = time.time()
|
|
73
|
-
if self.
|
|
90
|
+
if self.exclude_path and self.exclude_path not in self.upload_dir:
|
|
74
91
|
raise ValueError(
|
|
75
|
-
f"\n\n\
|
|
92
|
+
f"\n\n\tStart folder {self.exclude_path!r} is not a part of upload directory {self.upload_dir!r}"
|
|
76
93
|
)
|
|
77
94
|
if not self.upload_dir:
|
|
78
95
|
raise ValueError("\n\n\tCannot proceed without an upload directory.")
|
|
@@ -87,7 +104,6 @@ class Uploader:
|
|
|
87
104
|
if self.bucket_name not in buckets:
|
|
88
105
|
raise BucketNotFound(f"\n\n\t{self.bucket_name} was not found in {_alias} account.\n\tAvailable: {buckets}")
|
|
89
106
|
self.upload_dir = os.path.abspath(self.upload_dir)
|
|
90
|
-
self.logger.info("Bucket objects from '%s' will be uploaded to '%s'", self.upload_dir, self.bucket_name)
|
|
91
107
|
# noinspection PyUnresolvedReferences
|
|
92
108
|
self.bucket: boto3.resources.factory.s3.Bucket = self.s3.Bucket(self.bucket_name)
|
|
93
109
|
|
|
@@ -119,14 +135,18 @@ class Uploader:
|
|
|
119
135
|
for __path, __directory, __files in os.walk(self.upload_dir):
|
|
120
136
|
for file_ in __files:
|
|
121
137
|
file_path = os.path.join(__path, file_)
|
|
122
|
-
if self.
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
138
|
+
if self.exclude_path:
|
|
139
|
+
file_path = file_path.replace(self.exclude_path, "")
|
|
140
|
+
# Lists in python are ordered, so s3 prefix will get loaded first when provided
|
|
141
|
+
url_parts = []
|
|
142
|
+
if self.s3_prefix:
|
|
143
|
+
url_parts.extend(
|
|
144
|
+
self.s3_prefix.split(os.sep) if os.sep in self.s3_prefix else self.s3_prefix.split("/")
|
|
145
|
+
)
|
|
146
|
+
# Add rest of the file path to parts before normalizing as an S3 object URL
|
|
147
|
+
url_parts.extend(file_path.split(os.sep))
|
|
148
|
+
# Remove falsy values using filter - "None", "bool", "len" or "lambda item: item"
|
|
149
|
+
object_path = urljoin(*filter(None, url_parts))
|
|
130
150
|
files_to_upload[object_path] = file_path
|
|
131
151
|
return files_to_upload
|
|
132
152
|
|
|
@@ -135,6 +155,7 @@ class Uploader:
|
|
|
135
155
|
self.init()
|
|
136
156
|
keys = self._get_files()
|
|
137
157
|
self.logger.debug(keys)
|
|
158
|
+
self.logger.info("%d files from '%s' will be uploaded to '%s'", len(keys), self.upload_dir, self.bucket_name)
|
|
138
159
|
self.logger.info("Initiating upload process.")
|
|
139
160
|
for objectpath, filepath in tqdm(
|
|
140
161
|
keys.items(), total=len(keys), unit="file", leave=True, desc=f"Uploading files from {self.upload_dir}"
|
|
@@ -154,9 +175,15 @@ class Uploader:
|
|
|
154
175
|
max_workers: Number of maximum threads to use.
|
|
155
176
|
"""
|
|
156
177
|
self.init()
|
|
157
|
-
self.logger.info(f"Number of threads: {max_workers}")
|
|
158
178
|
keys = self._get_files()
|
|
159
|
-
self.logger.
|
|
179
|
+
self.logger.debug(keys)
|
|
180
|
+
self.logger.info(
|
|
181
|
+
"%d files from '%s' will be uploaded to '%s' with maximum concurrency of: %d",
|
|
182
|
+
len(keys),
|
|
183
|
+
self.upload_dir,
|
|
184
|
+
self.bucket_name,
|
|
185
|
+
max_workers,
|
|
186
|
+
)
|
|
160
187
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
161
188
|
futures = [executor.submit(self._uploader, *kv) for kv in keys.items()]
|
|
162
189
|
for future in tqdm(
|
s3/utils.py
CHANGED
|
@@ -20,26 +20,11 @@ def getenv(*args, default: str = None) -> str:
|
|
|
20
20
|
return default
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
def
|
|
24
|
-
"""
|
|
25
|
-
|
|
26
|
-
Args:
|
|
27
|
-
filepath: Absolute file path to upload.
|
|
28
|
-
start_folder_name: Folder name to begin object path.
|
|
23
|
+
def urljoin(*args) -> str:
|
|
24
|
+
"""Joins given arguments into a url. Trailing but not leading slashes are stripped for each argument.
|
|
29
25
|
|
|
30
26
|
Returns:
|
|
31
27
|
str:
|
|
32
|
-
|
|
28
|
+
Joined url.
|
|
33
29
|
"""
|
|
34
|
-
|
|
35
|
-
parts = filepath.split(os.sep)
|
|
36
|
-
try:
|
|
37
|
-
# Find index of the folder to start from
|
|
38
|
-
start_index = parts.index(start_folder_name)
|
|
39
|
-
except ValueError:
|
|
40
|
-
# Folder not found in path, fallback to full path or raise error
|
|
41
|
-
raise ValueError(f"Folder '{start_folder_name}' not found in path '{filepath}'")
|
|
42
|
-
# Reconstruct path from start_folder_name onwards
|
|
43
|
-
relative_parts = parts[start_index:]
|
|
44
|
-
# Join with os.sep for system-appropriate separators
|
|
45
|
-
return os.sep.join(relative_parts)
|
|
30
|
+
return "/".join(map(lambda x: str(x).rstrip("/").lstrip("/"), args))
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
s3/__init__.py,sha256=zSLbLrsnVD-tRtiiTcT3JDWzmpnSC9mP6uHMXt2cyEc,66
|
|
2
|
-
s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
|
|
3
|
-
s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
|
|
4
|
-
s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
|
|
5
|
-
s3/uploader.py,sha256=Z2EvtUlR5jlL1xbeQWj4XLBfhTn4yWPm9E8WhPcz6Qk,7056
|
|
6
|
-
s3/utils.py,sha256=swkdwkfn43e8I3dGL9HAGZ-dba3fIeorihVAjTE07wc,1291
|
|
7
|
-
pys3uploader-0.1.0.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
|
|
8
|
-
pys3uploader-0.1.0.dist-info/METADATA,sha256=gkAfOF-hEXYfW9p0PZiJhcrpLjhNorK7LFZNtU_ybrE,7188
|
|
9
|
-
pys3uploader-0.1.0.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
|
10
|
-
pys3uploader-0.1.0.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
|
|
11
|
-
pys3uploader-0.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|