PyS3Uploader 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pys3uploader-0.2.1.dist-info → pys3uploader-0.2.3.dist-info}/METADATA +8 -4
- pys3uploader-0.2.3.dist-info/RECORD +11 -0
- s3/__init__.py +1 -1
- s3/uploader.py +59 -17
- s3/utils.py +75 -12
- pys3uploader-0.2.1.dist-info/RECORD +0 -11
- {pys3uploader-0.2.1.dist-info → pys3uploader-0.2.3.dist-info}/LICENSE +0 -0
- {pys3uploader-0.2.1.dist-info → pys3uploader-0.2.3.dist-info}/WHEEL +0 -0
- {pys3uploader-0.2.1.dist-info → pys3uploader-0.2.3.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: PyS3Uploader
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: Python module to upload objects to an S3 bucket.
|
|
5
5
|
Author-email: Vignesh Rao <svignesh1793@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -40,6 +40,7 @@ Requires-Python: >=3.11
|
|
|
40
40
|
Description-Content-Type: text/markdown
|
|
41
41
|
License-File: LICENSE
|
|
42
42
|
Requires-Dist: boto3==1.40.*
|
|
43
|
+
Requires-Dist: python-dotenv==1.1.*
|
|
43
44
|
Requires-Dist: tqdm==4.67.*
|
|
44
45
|
Provides-Extra: dev
|
|
45
46
|
Requires-Dist: sphinx==5.1.1; extra == "dev"
|
|
@@ -90,7 +91,7 @@ if __name__ == '__main__':
|
|
|
90
91
|
wrapper = s3.Uploader(
|
|
91
92
|
bucket_name="BUCKET_NAME",
|
|
92
93
|
upload_dir="FULL_PATH_TO_UPLOAD",
|
|
93
|
-
|
|
94
|
+
exclude_prefix="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
|
|
94
95
|
)
|
|
95
96
|
wrapper.run_in_parallel()
|
|
96
97
|
```
|
|
@@ -103,7 +104,7 @@ if __name__ == '__main__':
|
|
|
103
104
|
wrapper = s3.Uploader(
|
|
104
105
|
bucket_name="BUCKET_NAME",
|
|
105
106
|
upload_dir="FULL_PATH_TO_UPLOAD",
|
|
106
|
-
|
|
107
|
+
exclude_prefix="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
|
|
107
108
|
)
|
|
108
109
|
wrapper.run()
|
|
109
110
|
```
|
|
@@ -114,10 +115,13 @@ if __name__ == '__main__':
|
|
|
114
115
|
|
|
115
116
|
#### Optional kwargs
|
|
116
117
|
- **s3_prefix** - S3 object prefix for each file. Defaults to ``None``
|
|
117
|
-
- **
|
|
118
|
+
- **exclude_prefix** - Path in ``upload_dir`` that has to be excluded in object keys. Defaults to `None`
|
|
118
119
|
- **skip_dot_files** - Boolean flag to skip dot files. Defaults to ``True``
|
|
119
120
|
- **overwrite** - Boolean flag to overwrite files present in S3. Defaults to ``False``
|
|
121
|
+
- **file_exclusion** - Sequence of files to exclude during upload. Defaults to ``None``
|
|
122
|
+
- **folder_exclusion** - Sequence of directories to exclude during upload. Defaults to ``None``
|
|
120
123
|
- **logger** - Bring your own custom pre-configured logger. Defaults to on-screen logging.
|
|
124
|
+
- **env_file** – Path to a `.env` file for loading environment variables. Defaults to scanning the current directory.
|
|
121
125
|
<br><br>
|
|
122
126
|
- **region_name** - AWS region name. Defaults to the env var `AWS_DEFAULT_REGION`
|
|
123
127
|
- **profile_name** - AWS profile name. Defaults to the env var `PROFILE_NAME`
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
s3/__init__.py,sha256=aZ2woJ8TD2tgqXi0ElG-wWwJWoQLIdqTdm50FLaxL8w,66
|
|
2
|
+
s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
|
|
3
|
+
s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
|
|
4
|
+
s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
|
|
5
|
+
s3/uploader.py,sha256=KxrWbIInXxXQszP_uJLf_dBI5rUNjNnhco3gr9Vdrto,13767
|
|
6
|
+
s3/utils.py,sha256=NbF28CYviK_St5qd1EOumMVyus9BvQON7clUFeR_SEQ,4473
|
|
7
|
+
pys3uploader-0.2.3.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
|
|
8
|
+
pys3uploader-0.2.3.dist-info/METADATA,sha256=ae2lA8b7dsGWZSMfB8w4joDiAlaE6Wk1f3p1Fxywkc4,7795
|
|
9
|
+
pys3uploader-0.2.3.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
|
10
|
+
pys3uploader-0.2.3.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
|
|
11
|
+
pys3uploader-0.2.3.dist-info/RECORD,,
|
s3/__init__.py
CHANGED
s3/uploader.py
CHANGED
|
@@ -2,9 +2,10 @@ import logging
|
|
|
2
2
|
import os
|
|
3
3
|
import time
|
|
4
4
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
|
-
from typing import Dict
|
|
5
|
+
from typing import Dict, Iterable
|
|
6
6
|
|
|
7
7
|
import boto3.resources.factory
|
|
8
|
+
import dotenv
|
|
8
9
|
from botocore.config import Config
|
|
9
10
|
from botocore.exceptions import ClientError
|
|
10
11
|
from tqdm import tqdm
|
|
@@ -14,6 +15,7 @@ from s3.logger import default_logger
|
|
|
14
15
|
from s3.utils import (
|
|
15
16
|
RETRY_CONFIG,
|
|
16
17
|
UploadResults,
|
|
18
|
+
convert_seconds,
|
|
17
19
|
convert_to_folder_structure,
|
|
18
20
|
getenv,
|
|
19
21
|
urljoin,
|
|
@@ -32,15 +34,18 @@ class Uploader:
|
|
|
32
34
|
bucket_name: str,
|
|
33
35
|
upload_dir: str,
|
|
34
36
|
s3_prefix: str = None,
|
|
35
|
-
|
|
37
|
+
exclude_prefix: str = None,
|
|
36
38
|
skip_dot_files: bool = True,
|
|
37
39
|
overwrite: bool = False,
|
|
40
|
+
file_exclusion: Iterable[str] = None,
|
|
41
|
+
folder_exclusion: Iterable[str] = None,
|
|
38
42
|
region_name: str = None,
|
|
39
43
|
profile_name: str = None,
|
|
40
44
|
aws_access_key_id: str = None,
|
|
41
45
|
aws_secret_access_key: str = None,
|
|
42
46
|
retry_config: Config = RETRY_CONFIG,
|
|
43
47
|
logger: logging.Logger = None,
|
|
48
|
+
env_file: str = None,
|
|
44
49
|
):
|
|
45
50
|
"""Initiates all the necessary args and creates a boto3 session with retry logic.
|
|
46
51
|
|
|
@@ -48,14 +53,17 @@ class Uploader:
|
|
|
48
53
|
bucket_name: Name of the bucket.
|
|
49
54
|
upload_dir: Full path of the directory to be uploaded.
|
|
50
55
|
s3_prefix: Particular bucket prefix within which the upload should happen.
|
|
51
|
-
|
|
56
|
+
exclude_prefix: Full directory path to exclude from S3 object prefix.
|
|
52
57
|
skip_dot_files: Boolean flag to skip dot files.
|
|
53
58
|
overwrite: Boolean flag to overwrite files in S3.
|
|
59
|
+
file_exclusion: Sequence of files to exclude during upload.
|
|
60
|
+
folder_exclusion: Sequence of directories to exclude during upload.
|
|
54
61
|
region_name: Name of the AWS region.
|
|
55
62
|
profile_name: AWS profile name.
|
|
56
63
|
aws_access_key_id: AWS access key ID.
|
|
57
64
|
aws_secret_access_key: AWS secret access key.
|
|
58
65
|
logger: Bring your own logger.
|
|
66
|
+
env_file: Dotenv file (.env) filepath to load environment variables.
|
|
59
67
|
|
|
60
68
|
See Also:
|
|
61
69
|
s3_prefix:
|
|
@@ -64,29 +72,56 @@ class Uploader:
|
|
|
64
72
|
If ``s3_prefix`` is set to: ``2025``, then the file path
|
|
65
73
|
``/home/ubuntu/Desktop/S3Upload/sub/photo.jpg`` will be uploaded as ``2025/S3Upload/sub/photo.jpg``
|
|
66
74
|
|
|
67
|
-
|
|
75
|
+
exclude_prefix:
|
|
68
76
|
When upload directory is "/home/ubuntu/Desktop/S3Upload", each file will naturally have the full prefix.
|
|
69
|
-
However, this behavior can be avoided by specifying the ``
|
|
77
|
+
However, this behavior can be avoided by specifying the ``exclude_prefix`` parameter.
|
|
70
78
|
|
|
71
|
-
If
|
|
79
|
+
If exclude_prefix is set to: ``/home/ubuntu/Desktop``, then the file path
|
|
72
80
|
``/home/ubuntu/Desktop/S3Upload/sub-dir/photo.jpg`` will be uploaded as ``S3Upload/sub-dir/photo.jpg``
|
|
81
|
+
|
|
82
|
+
env_file:
|
|
83
|
+
Environment variables can be loaded from a .env file.
|
|
84
|
+
The filepath can be set as ``env_file`` during object instantiation or as an environment variable.
|
|
85
|
+
If a filepath is provided, PyS3Uploader loads it directly or searches the root directory for the file.
|
|
86
|
+
If no filepath is provided, PyS3Uploader searches the current directory for a .env file.
|
|
73
87
|
"""
|
|
88
|
+
self.logger = logger or default_logger()
|
|
89
|
+
self.env_file = env_file or getenv("ENV_FILE", default=".env")
|
|
90
|
+
|
|
91
|
+
# Check for env_file in current working directory
|
|
92
|
+
if os.path.isfile(self.env_file):
|
|
93
|
+
self.logger.debug("Loading env file: %s", self.env_file)
|
|
94
|
+
dotenv.load_dotenv(dotenv_path=self.env_file, override=True)
|
|
95
|
+
# Find the env_file from root
|
|
96
|
+
elif env_file := dotenv.find_dotenv(self.env_file, raise_error_if_not_found=False):
|
|
97
|
+
self.logger.debug("Loading env file: %s", env_file)
|
|
98
|
+
dotenv.load_dotenv(dotenv_path=env_file, override=True)
|
|
99
|
+
else:
|
|
100
|
+
# Scan current working directory for any .env files
|
|
101
|
+
for file in os.listdir():
|
|
102
|
+
if file.endswith(".env"):
|
|
103
|
+
self.logger.debug("Loading env file: %s", file)
|
|
104
|
+
dotenv.load_dotenv(dotenv_path=file, override=True)
|
|
105
|
+
break
|
|
106
|
+
else:
|
|
107
|
+
self.logger.debug("No .env files found to load")
|
|
108
|
+
|
|
74
109
|
self.session = boto3.Session(
|
|
75
|
-
profile_name=profile_name or getenv("PROFILE_NAME"),
|
|
110
|
+
profile_name=profile_name or getenv("PROFILE_NAME", "AWS_PROFILE_NAME"),
|
|
76
111
|
region_name=region_name or getenv("AWS_DEFAULT_REGION"),
|
|
77
112
|
aws_access_key_id=aws_access_key_id or getenv("AWS_ACCESS_KEY_ID"),
|
|
78
113
|
aws_secret_access_key=aws_secret_access_key or getenv("AWS_SECRET_ACCESS_KEY"),
|
|
79
114
|
)
|
|
80
115
|
self.s3 = self.session.resource(service_name="s3", config=retry_config)
|
|
81
116
|
|
|
82
|
-
self.logger = logger or default_logger()
|
|
83
|
-
|
|
84
117
|
self.bucket_name = bucket_name
|
|
85
|
-
self.upload_dir = upload_dir
|
|
118
|
+
self.upload_dir = upload_dir
|
|
86
119
|
self.s3_prefix = s3_prefix
|
|
87
|
-
self.
|
|
120
|
+
self.exclude_prefix = exclude_prefix
|
|
88
121
|
self.skip_dot_files = skip_dot_files
|
|
89
122
|
self.overwrite = overwrite
|
|
123
|
+
self.file_exclusion = file_exclusion or []
|
|
124
|
+
self.folder_exclusion = folder_exclusion or []
|
|
90
125
|
|
|
91
126
|
self.results = UploadResults()
|
|
92
127
|
self.start = time.time()
|
|
@@ -105,9 +140,9 @@ class Uploader:
|
|
|
105
140
|
BucketNotFound: If bucket name was not found.
|
|
106
141
|
"""
|
|
107
142
|
self.start = time.time()
|
|
108
|
-
if self.
|
|
143
|
+
if self.exclude_prefix and self.exclude_prefix not in self.upload_dir:
|
|
109
144
|
raise ValueError(
|
|
110
|
-
f"\n\n\tStart folder {self.
|
|
145
|
+
f"\n\n\tStart folder {self.exclude_prefix!r} is not a part of upload directory {self.upload_dir!r}"
|
|
111
146
|
)
|
|
112
147
|
if not self.upload_dir:
|
|
113
148
|
raise ValueError("\n\n\tCannot proceed without an upload directory.")
|
|
@@ -134,7 +169,7 @@ class Uploader:
|
|
|
134
169
|
self.logger.info(
|
|
135
170
|
"Total number of uploads: %d, success: %d, failed: %d", total, self.results.success, self.results.failed
|
|
136
171
|
)
|
|
137
|
-
self.logger.info("Run
|
|
172
|
+
self.logger.info("Run time: %s", convert_seconds(time.time() - self.start))
|
|
138
173
|
|
|
139
174
|
def _proceed_to_upload(self, filepath: str, objectpath: str) -> bool:
|
|
140
175
|
"""Compares file size if the object already exists in S3.
|
|
@@ -183,13 +218,20 @@ class Uploader:
|
|
|
183
218
|
"""
|
|
184
219
|
files_to_upload = {}
|
|
185
220
|
for __path, __directory, __files in os.walk(self.upload_dir):
|
|
221
|
+
scan_dir = os.path.split(__path)[-1]
|
|
222
|
+
if scan_dir in self.folder_exclusion:
|
|
223
|
+
self.logger.info("Skipping '%s' honoring folder exclusion", scan_dir)
|
|
224
|
+
continue
|
|
186
225
|
for file_ in __files:
|
|
226
|
+
if file_ in self.file_exclusion:
|
|
227
|
+
self.logger.info("Skipping '%s' honoring file exclusion", file_)
|
|
228
|
+
continue
|
|
187
229
|
if self.skip_dot_files and file_.startswith("."):
|
|
188
230
|
self.logger.info("Skipping dot file: %s", file_)
|
|
189
231
|
continue
|
|
190
232
|
file_path = os.path.join(__path, file_)
|
|
191
|
-
if self.
|
|
192
|
-
relative_path = file_path.replace(self.
|
|
233
|
+
if self.exclude_prefix:
|
|
234
|
+
relative_path = file_path.replace(self.exclude_prefix, "")
|
|
193
235
|
else:
|
|
194
236
|
relative_path = file_path
|
|
195
237
|
# Lists in python are ordered, so s3 prefix will get loaded first when provided
|
|
@@ -212,7 +254,7 @@ class Uploader:
|
|
|
212
254
|
self.logger.debug(keys)
|
|
213
255
|
self.logger.info("%d files from '%s' will be uploaded to '%s'", len(keys), self.upload_dir, self.bucket_name)
|
|
214
256
|
self.logger.info("Initiating upload process.")
|
|
215
|
-
for
|
|
257
|
+
for filepath, objectpath in tqdm(
|
|
216
258
|
keys.items(), total=len(keys), unit="file", leave=True, desc=f"Uploading files from {self.upload_dir}"
|
|
217
259
|
):
|
|
218
260
|
try:
|
s3/utils.py
CHANGED
|
@@ -3,18 +3,6 @@ from typing import Dict, Set
|
|
|
3
3
|
|
|
4
4
|
from botocore.config import Config
|
|
5
5
|
|
|
6
|
-
|
|
7
|
-
class UploadResults(dict):
|
|
8
|
-
"""Object to store results of S3 upload.
|
|
9
|
-
|
|
10
|
-
>>> UploadResults
|
|
11
|
-
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
success: int = 0
|
|
15
|
-
failed: int = 0
|
|
16
|
-
|
|
17
|
-
|
|
18
6
|
RETRY_CONFIG: Config = Config(
|
|
19
7
|
retries={
|
|
20
8
|
"max_attempts": 10,
|
|
@@ -27,6 +15,17 @@ RETRY_CONFIG: Config = Config(
|
|
|
27
15
|
)
|
|
28
16
|
|
|
29
17
|
|
|
18
|
+
class UploadResults(dict):
|
|
19
|
+
"""Object to store results of S3 upload.
|
|
20
|
+
|
|
21
|
+
>>> UploadResults
|
|
22
|
+
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
success: int = 0
|
|
26
|
+
failed: int = 0
|
|
27
|
+
|
|
28
|
+
|
|
30
29
|
def getenv(*args, default: str = None) -> str:
|
|
31
30
|
"""Returns the key-ed environment variable or the default value."""
|
|
32
31
|
for key in args:
|
|
@@ -82,3 +81,67 @@ def convert_to_folder_structure(sequence: Set[str]) -> str:
|
|
|
82
81
|
return result
|
|
83
82
|
|
|
84
83
|
return generate_folder_structure(folder_structure)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def convert_seconds(seconds: int | float, n_elem: int = 2) -> str:
|
|
87
|
+
"""Calculate years, months, days, hours, minutes, seconds, and milliseconds from given input.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
seconds: Number of seconds to convert (supports float values).
|
|
91
|
+
n_elem: Number of elements required from the converted list.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
str:
|
|
95
|
+
Returns a humanized string notion of the number of seconds.
|
|
96
|
+
"""
|
|
97
|
+
if not seconds:
|
|
98
|
+
return "0s"
|
|
99
|
+
elif seconds < 1:
|
|
100
|
+
return f"{seconds * 1000:.0f}ms"
|
|
101
|
+
|
|
102
|
+
seconds_in_year = 365 * 24 * 3600
|
|
103
|
+
seconds_in_month = 30 * 24 * 3600
|
|
104
|
+
|
|
105
|
+
years = seconds // seconds_in_year
|
|
106
|
+
seconds %= seconds_in_year
|
|
107
|
+
|
|
108
|
+
months = seconds // seconds_in_month
|
|
109
|
+
seconds %= seconds_in_month
|
|
110
|
+
|
|
111
|
+
days = seconds // (24 * 3600)
|
|
112
|
+
seconds %= 24 * 3600
|
|
113
|
+
|
|
114
|
+
hours = seconds // 3600
|
|
115
|
+
seconds %= 3600
|
|
116
|
+
|
|
117
|
+
minutes = seconds // 60
|
|
118
|
+
seconds %= 60
|
|
119
|
+
|
|
120
|
+
milliseconds = round((seconds % 1) * 1000)
|
|
121
|
+
seconds = int(seconds) # Convert remaining seconds to int for display
|
|
122
|
+
|
|
123
|
+
time_parts = []
|
|
124
|
+
|
|
125
|
+
if years > 0:
|
|
126
|
+
time_parts.append(f"{int(years)} year{'s' if years > 1 else ''}")
|
|
127
|
+
if months > 0:
|
|
128
|
+
time_parts.append(f"{int(months)} month{'s' if months > 1 else ''}")
|
|
129
|
+
if days > 0:
|
|
130
|
+
time_parts.append(f"{int(days)} day{'s' if days > 1 else ''}")
|
|
131
|
+
if hours > 0:
|
|
132
|
+
time_parts.append(f"{int(hours)} hour{'s' if hours > 1 else ''}")
|
|
133
|
+
if minutes > 0:
|
|
134
|
+
time_parts.append(f"{int(minutes)} minute{'s' if minutes > 1 else ''}")
|
|
135
|
+
if seconds > 0 or milliseconds > 0:
|
|
136
|
+
if seconds > 0 and milliseconds > 0:
|
|
137
|
+
time_parts.append(f"{seconds + milliseconds / 1000:.1f}s")
|
|
138
|
+
elif seconds > 0:
|
|
139
|
+
time_parts.append(f"{seconds}s")
|
|
140
|
+
else:
|
|
141
|
+
time_parts.append(f"{milliseconds}ms")
|
|
142
|
+
|
|
143
|
+
if len(time_parts) == 1:
|
|
144
|
+
return time_parts[0]
|
|
145
|
+
|
|
146
|
+
list_ = time_parts[:n_elem]
|
|
147
|
+
return ", and ".join([", ".join(list_[:-1]), list_[-1]] if len(list_) > 2 else list_)
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
s3/__init__.py,sha256=IqcPR9iWMw0GDBEmKvLzW7P-AhInTkwRklkvYgiT1Xc,66
|
|
2
|
-
s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
|
|
3
|
-
s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
|
|
4
|
-
s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
|
|
5
|
-
s3/uploader.py,sha256=v6TGdm8EcAldoIw3GOAwkuoUzi9b9IRj8b94hl1Pkyw,11454
|
|
6
|
-
s3/utils.py,sha256=dd1OeLbswLzFVyjYiXixkJlFsoGWRtRCOHha6wLG5zQ,2485
|
|
7
|
-
pys3uploader-0.2.1.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
|
|
8
|
-
pys3uploader-0.2.1.dist-info/METADATA,sha256=NA7x6YqpWEKvn3XEYzVug7XTl1vPcwiphUzWtMvlzHE,7449
|
|
9
|
-
pys3uploader-0.2.1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
|
10
|
-
pys3uploader-0.2.1.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
|
|
11
|
-
pys3uploader-0.2.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|