PyS3Uploader 0.2.0__py3-none-any.whl → 0.4.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of PyS3Uploader might be problematic. Click here for more details.
- pys3uploader/__init__.py +2 -0
- pys3uploader/logger.py +104 -0
- pys3uploader/metadata.py +11 -0
- pys3uploader/progress.py +39 -0
- pys3uploader/timer.py +54 -0
- pys3uploader/uploader.py +432 -0
- pys3uploader/utils.py +194 -0
- pys3uploader/version.py +1 -0
- {pys3uploader-0.2.0.dist-info → pys3uploader-0.4.0a1.dist-info}/METADATA +55 -10
- pys3uploader-0.4.0a1.dist-info/RECORD +15 -0
- pys3uploader-0.4.0a1.dist-info/top_level.txt +1 -0
- pys3uploader-0.2.0.dist-info/RECORD +0 -11
- pys3uploader-0.2.0.dist-info/top_level.txt +0 -1
- s3/__init__.py +0 -3
- s3/logger.py +0 -45
- s3/uploader.py +0 -264
- s3/utils.py +0 -70
- {s3 → pys3uploader}/exceptions.py +0 -0
- {s3 → pys3uploader}/tree.py +0 -0
- {pys3uploader-0.2.0.dist-info → pys3uploader-0.4.0a1.dist-info}/LICENSE +0 -0
- {pys3uploader-0.2.0.dist-info → pys3uploader-0.4.0a1.dist-info}/WHEEL +0 -0
pys3uploader/utils.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
import math
|
|
2
|
+
import os
|
|
3
|
+
from typing import Dict, Set, List
|
|
4
|
+
|
|
5
|
+
from botocore.config import Config
|
|
6
|
+
|
|
7
|
+
RETRY_CONFIG: Config = Config(
|
|
8
|
+
retries={
|
|
9
|
+
"max_attempts": 10,
|
|
10
|
+
"mode": "adaptive", # Adaptive retry mode with jitter
|
|
11
|
+
"total_max_attempts": 20, # Max retries across all requests
|
|
12
|
+
},
|
|
13
|
+
# Adding custom timeouts here:
|
|
14
|
+
connect_timeout=5, # 5 seconds for establishing a connection
|
|
15
|
+
read_timeout=30, # 30 seconds to wait for a response from the server
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class UploadResults(dict):
|
|
20
|
+
"""Object to store results of S3 upload.
|
|
21
|
+
|
|
22
|
+
>>> UploadResults
|
|
23
|
+
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
success: List[str] = []
|
|
27
|
+
failed: List[str] = []
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def getenv(*args, default: str = None) -> str:
|
|
31
|
+
"""Returns the key-ed environment variable or the default value.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
args: Environment variable keys to search for.
|
|
35
|
+
default: Default value to return if no environment variable is found.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
str:
|
|
39
|
+
Environment variable value or the default value.
|
|
40
|
+
"""
|
|
41
|
+
for key in args:
|
|
42
|
+
if value := os.environ.get(key.upper()) or os.environ.get(key.lower()):
|
|
43
|
+
return value
|
|
44
|
+
return default
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def urljoin(*args) -> str:
|
|
48
|
+
"""Joins given arguments into a url. Trailing but not leading slashes are stripped for each argument.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
args: Parts of the url to join.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
str:
|
|
55
|
+
Joined url.
|
|
56
|
+
"""
|
|
57
|
+
return "/".join(map(lambda x: str(x).rstrip("/").lstrip("/"), args))
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def convert_to_folder_structure(sequence: Set[str]) -> str:
|
|
61
|
+
"""Convert objects in a s3 buckets into a folder like representation.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
sequence: Takes either a mutable or immutable sequence as an argument.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
str:
|
|
68
|
+
String representation of the architecture.
|
|
69
|
+
"""
|
|
70
|
+
folder_structure = {}
|
|
71
|
+
for item in sequence:
|
|
72
|
+
parts = item.split("/")
|
|
73
|
+
current_level = folder_structure
|
|
74
|
+
for part in parts:
|
|
75
|
+
current_level = current_level.setdefault(part, {})
|
|
76
|
+
|
|
77
|
+
def generate_folder_structure(structure: Dict[str, dict], indent: str = "") -> str:
|
|
78
|
+
"""Generates the folder like structure.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
structure: Structure of folder objects as key-value pairs.
|
|
82
|
+
indent: Required indentation for the ASCII.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
str:
|
|
86
|
+
String representation of the folder structure.
|
|
87
|
+
"""
|
|
88
|
+
result = ""
|
|
89
|
+
for i, (key, value) in enumerate(structure.items()):
|
|
90
|
+
if i == len(structure) - 1:
|
|
91
|
+
result += indent + "└── " + key + "\n"
|
|
92
|
+
sub_indent = indent + " "
|
|
93
|
+
else:
|
|
94
|
+
result += indent + "├── " + key + "\n"
|
|
95
|
+
sub_indent = indent + "│ "
|
|
96
|
+
if value:
|
|
97
|
+
result += generate_folder_structure(value, sub_indent)
|
|
98
|
+
return result
|
|
99
|
+
|
|
100
|
+
return generate_folder_structure(folder_structure)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def convert_seconds(seconds: int | float, n_elem: int = 2) -> str:
|
|
104
|
+
"""Calculate years, months, days, hours, minutes, seconds, and milliseconds from given input.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
seconds: Number of seconds to convert (supports float values).
|
|
108
|
+
n_elem: Number of elements required from the converted list.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
str:
|
|
112
|
+
Returns a humanized string notion of the number of seconds.
|
|
113
|
+
"""
|
|
114
|
+
if not seconds:
|
|
115
|
+
return "0s"
|
|
116
|
+
elif seconds < 1:
|
|
117
|
+
return f"{seconds * 1000:.0f}ms"
|
|
118
|
+
|
|
119
|
+
seconds_in_year = 365 * 24 * 3600
|
|
120
|
+
seconds_in_month = 30 * 24 * 3600
|
|
121
|
+
|
|
122
|
+
years = seconds // seconds_in_year
|
|
123
|
+
seconds %= seconds_in_year
|
|
124
|
+
|
|
125
|
+
months = seconds // seconds_in_month
|
|
126
|
+
seconds %= seconds_in_month
|
|
127
|
+
|
|
128
|
+
days = seconds // (24 * 3600)
|
|
129
|
+
seconds %= 24 * 3600
|
|
130
|
+
|
|
131
|
+
hours = seconds // 3600
|
|
132
|
+
seconds %= 3600
|
|
133
|
+
|
|
134
|
+
minutes = seconds // 60
|
|
135
|
+
seconds %= 60
|
|
136
|
+
|
|
137
|
+
milliseconds = round((seconds % 1) * 1000)
|
|
138
|
+
seconds = int(seconds) # Convert remaining seconds to int for display
|
|
139
|
+
|
|
140
|
+
time_parts = []
|
|
141
|
+
|
|
142
|
+
if years > 0:
|
|
143
|
+
time_parts.append(f"{int(years)} year{'s' if years > 1 else ''}")
|
|
144
|
+
if months > 0:
|
|
145
|
+
time_parts.append(f"{int(months)} month{'s' if months > 1 else ''}")
|
|
146
|
+
if days > 0:
|
|
147
|
+
time_parts.append(f"{int(days)} day{'s' if days > 1 else ''}")
|
|
148
|
+
if hours > 0:
|
|
149
|
+
time_parts.append(f"{int(hours)} hour{'s' if hours > 1 else ''}")
|
|
150
|
+
if minutes > 0:
|
|
151
|
+
time_parts.append(f"{int(minutes)} minute{'s' if minutes > 1 else ''}")
|
|
152
|
+
if seconds > 0 or milliseconds > 0:
|
|
153
|
+
if seconds > 0 and milliseconds > 0:
|
|
154
|
+
time_parts.append(f"{seconds + milliseconds / 1000:.1f}s")
|
|
155
|
+
elif seconds > 0:
|
|
156
|
+
time_parts.append(f"{seconds}s")
|
|
157
|
+
else:
|
|
158
|
+
time_parts.append(f"{milliseconds}ms")
|
|
159
|
+
|
|
160
|
+
if len(time_parts) == 1:
|
|
161
|
+
return time_parts[0]
|
|
162
|
+
|
|
163
|
+
list_ = time_parts[:n_elem]
|
|
164
|
+
return ", and ".join([", ".join(list_[:-1]), list_[-1]] if len(list_) > 2 else list_)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def format_nos(input_: float) -> int | float:
|
|
168
|
+
"""Removes ``.0`` float values.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
input_: Strings or integers with ``.0`` at the end.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
int | float:
|
|
175
|
+
Int if found, else returns the received float value.
|
|
176
|
+
"""
|
|
177
|
+
return int(input_) if isinstance(input_, float) and input_.is_integer() else input_
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def size_converter(byte_size: int | float) -> str:
|
|
181
|
+
"""Gets the current memory consumed and converts it to human friendly format.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
byte_size: Receives byte size as argument.
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
str:
|
|
188
|
+
Converted understandable size.
|
|
189
|
+
"""
|
|
190
|
+
if not byte_size:
|
|
191
|
+
return "0 B"
|
|
192
|
+
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
|
|
193
|
+
index = int(math.floor(math.log(byte_size, 1024)))
|
|
194
|
+
return f"{format_nos(round(byte_size / pow(1024, index), 2))} {size_name[index]}"
|
pys3uploader/version.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
version = "0.4.0a1"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: PyS3Uploader
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0a1
|
|
4
4
|
Summary: Python module to upload objects to an S3 bucket.
|
|
5
5
|
Author-email: Vignesh Rao <svignesh1793@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -29,7 +29,7 @@ Project-URL: Homepage, https://github.com/thevickypedia/PyS3Uploader
|
|
|
29
29
|
Project-URL: Docs, https://thevickypedia.github.io/PyS3Uploader/
|
|
30
30
|
Project-URL: Source, https://github.com/thevickypedia/PyS3Uploader
|
|
31
31
|
Project-URL: Bug Tracker, https://github.com/thevickypedia/PyS3Uploader/issues
|
|
32
|
-
Keywords:
|
|
32
|
+
Keywords: pys3uploader
|
|
33
33
|
Classifier: Development Status :: 1 - Planning
|
|
34
34
|
Classifier: Intended Audience :: Information Technology
|
|
35
35
|
Classifier: Operating System :: OS Independent
|
|
@@ -39,8 +39,9 @@ Classifier: Topic :: Internet :: File Transfer Protocol (FTP)
|
|
|
39
39
|
Requires-Python: >=3.11
|
|
40
40
|
Description-Content-Type: text/markdown
|
|
41
41
|
License-File: LICENSE
|
|
42
|
+
Requires-Dist: alive-progress==3.3.*
|
|
42
43
|
Requires-Dist: boto3==1.40.*
|
|
43
|
-
Requires-Dist:
|
|
44
|
+
Requires-Dist: python-dotenv==1.1.*
|
|
44
45
|
Provides-Extra: dev
|
|
45
46
|
Requires-Dist: sphinx==5.1.1; extra == "dev"
|
|
46
47
|
Requires-Dist: pre-commit; extra == "dev"
|
|
@@ -75,6 +76,43 @@ Requires-Dist: recommonmark; extra == "dev"
|
|
|
75
76
|
# PyS3Uploader
|
|
76
77
|
Python module to upload an entire directory to an S3 bucket.
|
|
77
78
|
|
|
79
|
+
<details>
|
|
80
|
+
<summary><strong>Bucket Policy Required</strong></summary>
|
|
81
|
+
|
|
82
|
+
```json
|
|
83
|
+
{
|
|
84
|
+
"Version": "2012-10-17",
|
|
85
|
+
"Statement": [
|
|
86
|
+
{
|
|
87
|
+
"Sid": "ListBucketsForExistenceCheck",
|
|
88
|
+
"Effect": "Allow",
|
|
89
|
+
"Action": "s3:ListAllMyBuckets",
|
|
90
|
+
"Resource": "*"
|
|
91
|
+
},
|
|
92
|
+
{
|
|
93
|
+
"Sid": "ListAndUploadToSpecificBucket",
|
|
94
|
+
"Effect": "Allow",
|
|
95
|
+
"Action": [
|
|
96
|
+
"s3:ListBucket",
|
|
97
|
+
"s3:ListBucketMultipartUploads"
|
|
98
|
+
],
|
|
99
|
+
"Resource": "arn:aws:s3:::bucketname"
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
"Sid": "UploadObjectsToBucket",
|
|
103
|
+
"Effect": "Allow",
|
|
104
|
+
"Action": [
|
|
105
|
+
"s3:PutObject",
|
|
106
|
+
"s3:AbortMultipartUpload",
|
|
107
|
+
"s3:ListMultipartUploadParts"
|
|
108
|
+
],
|
|
109
|
+
"Resource": "arn:aws:s3:::bucketname/*"
|
|
110
|
+
}
|
|
111
|
+
]
|
|
112
|
+
}
|
|
113
|
+
```
|
|
114
|
+
</details>
|
|
115
|
+
|
|
78
116
|
### Installation
|
|
79
117
|
```shell
|
|
80
118
|
pip install PyS3Uploader
|
|
@@ -84,26 +122,26 @@ pip install PyS3Uploader
|
|
|
84
122
|
|
|
85
123
|
##### Upload objects in parallel
|
|
86
124
|
```python
|
|
87
|
-
import
|
|
125
|
+
import pys3uploader
|
|
88
126
|
|
|
89
127
|
if __name__ == '__main__':
|
|
90
|
-
wrapper =
|
|
128
|
+
wrapper = pys3uploader.Uploader(
|
|
91
129
|
bucket_name="BUCKET_NAME",
|
|
92
130
|
upload_dir="FULL_PATH_TO_UPLOAD",
|
|
93
|
-
|
|
131
|
+
exclude_prefix="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
|
|
94
132
|
)
|
|
95
133
|
wrapper.run_in_parallel()
|
|
96
134
|
```
|
|
97
135
|
|
|
98
136
|
##### Upload objects in sequence
|
|
99
137
|
```python
|
|
100
|
-
import
|
|
138
|
+
import pys3uploader
|
|
101
139
|
|
|
102
140
|
if __name__ == '__main__':
|
|
103
|
-
wrapper =
|
|
141
|
+
wrapper = pys3uploader.Uploader(
|
|
104
142
|
bucket_name="BUCKET_NAME",
|
|
105
143
|
upload_dir="FULL_PATH_TO_UPLOAD",
|
|
106
|
-
|
|
144
|
+
exclude_prefix="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
|
|
107
145
|
)
|
|
108
146
|
wrapper.run()
|
|
109
147
|
```
|
|
@@ -114,8 +152,15 @@ if __name__ == '__main__':
|
|
|
114
152
|
|
|
115
153
|
#### Optional kwargs
|
|
116
154
|
- **s3_prefix** - S3 object prefix for each file. Defaults to ``None``
|
|
117
|
-
- **
|
|
155
|
+
- **exclude_prefix** - Path in ``upload_dir`` that has to be excluded in object keys. Defaults to `None`
|
|
156
|
+
- **skip_dot_files** - Boolean flag to skip dot files. Defaults to ``True``
|
|
157
|
+
- **overwrite** - Boolean flag to overwrite files present in S3. Defaults to ``False``
|
|
158
|
+
- **file_exclusion** - Sequence of files to exclude during upload. Defaults to ``None``
|
|
159
|
+
- **folder_exclusion** - Sequence of directories to exclude during upload. Defaults to ``None``
|
|
118
160
|
- **logger** - Bring your own custom pre-configured logger. Defaults to on-screen logging.
|
|
161
|
+
- **log_handler** - Choose between `stdout` vs `file` logging. Defaults to `pys3uploader.LogHandler.stdout`
|
|
162
|
+
- **log_level** - Choose the logging level. Defaults to `pys3uploader.LogLevel.debug`
|
|
163
|
+
- **env_file** – Path to a `.env` file for loading environment variables. Defaults to scanning the current directory.
|
|
119
164
|
<br><br>
|
|
120
165
|
- **region_name** - AWS region name. Defaults to the env var `AWS_DEFAULT_REGION`
|
|
121
166
|
- **profile_name** - AWS profile name. Defaults to the env var `PROFILE_NAME`
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
pys3uploader/__init__.py,sha256=EqMScWbJNV4UWeMg4fMko2KB18xL2CO3a3o_od0H0Lc,124
|
|
2
|
+
pys3uploader/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
|
|
3
|
+
pys3uploader/logger.py,sha256=z9JEnyf4nHIakey0bAaCgEN7oXOYJYOpskZyM_4s-D4,2678
|
|
4
|
+
pys3uploader/metadata.py,sha256=tOOoLh2vISfH-GfH3yBcA_xtEjRwomaw7sCLEaDRK-8,230
|
|
5
|
+
pys3uploader/progress.py,sha256=IladNMXLBhkPpxOntpANTam_hC9OWosmNDmdbweDNYM,1195
|
|
6
|
+
pys3uploader/timer.py,sha256=qN2XNrGEyP3stsK3McvhE3VvIiUFh7mv4rbp5WDeyVU,1498
|
|
7
|
+
pys3uploader/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
|
|
8
|
+
pys3uploader/uploader.py,sha256=h5DYQA2yv0fQ2SSyAnAl8SsgJUajmN_o1PdMSqMbACM,18588
|
|
9
|
+
pys3uploader/utils.py,sha256=_2RYKUTyrQzwkxo7fSiLb5ASrpjcNpb3kZHqy_wByRk,5755
|
|
10
|
+
pys3uploader/version.py,sha256=VAwBBgd_skAqJS9UL1T_xDXryTqN5m58fbTTEXcKxgM,20
|
|
11
|
+
pys3uploader-0.4.0a1.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
|
|
12
|
+
pys3uploader-0.4.0a1.dist-info/METADATA,sha256=FdJdNSesnP1xHfb4il5HBw1pxsPn7ToAYkQ_T3PrIb0,8959
|
|
13
|
+
pys3uploader-0.4.0a1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
|
14
|
+
pys3uploader-0.4.0a1.dist-info/top_level.txt,sha256=lVIFMMoUx7dj_myetBmOUQTJiOzz5VyDqchnQElmrWw,13
|
|
15
|
+
pys3uploader-0.4.0a1.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
pys3uploader
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
s3/__init__.py,sha256=yLvvl4-uTLZwhdhCMQpWq5juX_zFuYAfKSf4aB0WjZw,66
|
|
2
|
-
s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
|
|
3
|
-
s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
|
|
4
|
-
s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
|
|
5
|
-
s3/uploader.py,sha256=IAlFrEjfBuexrfmBPGN9OZAfHjQuwcGRzWi2es0r_fU,11154
|
|
6
|
-
s3/utils.py,sha256=0kcG0aE2olHhC8thaUEwx2J8tOI2-2TGCk6E6U-PiKw,2058
|
|
7
|
-
pys3uploader-0.2.0.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
|
|
8
|
-
pys3uploader-0.2.0.dist-info/METADATA,sha256=IXSmHXJJndlnd_6MHlpZrcVILPni8VUbVNJYQEjMIR8,7286
|
|
9
|
-
pys3uploader-0.2.0.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
|
10
|
-
pys3uploader-0.2.0.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
|
|
11
|
-
pys3uploader-0.2.0.dist-info/RECORD,,
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
s3
|
s3/__init__.py
DELETED
s3/logger.py
DELETED
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
"""Loads a default logger with StreamHandler set to DEBUG mode.
|
|
2
|
-
|
|
3
|
-
>>> logging.Logger
|
|
4
|
-
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import logging
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def default_handler() -> logging.StreamHandler:
|
|
11
|
-
"""Creates a ``StreamHandler`` and assigns a default format to it.
|
|
12
|
-
|
|
13
|
-
Returns:
|
|
14
|
-
logging.StreamHandler:
|
|
15
|
-
Returns an instance of the ``StreamHandler`` object.
|
|
16
|
-
"""
|
|
17
|
-
handler = logging.StreamHandler()
|
|
18
|
-
handler.setFormatter(fmt=default_format())
|
|
19
|
-
return handler
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def default_format() -> logging.Formatter:
|
|
23
|
-
"""Creates a logging ``Formatter`` with a custom message and datetime format.
|
|
24
|
-
|
|
25
|
-
Returns:
|
|
26
|
-
logging.Formatter:
|
|
27
|
-
Returns an instance of the ``Formatter`` object.
|
|
28
|
-
"""
|
|
29
|
-
return logging.Formatter(
|
|
30
|
-
fmt="%(asctime)s - %(levelname)s - [%(module)s:%(lineno)d] - %(funcName)s - %(message)s",
|
|
31
|
-
datefmt="%b-%d-%Y %I:%M:%S %p",
|
|
32
|
-
)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def default_logger() -> logging.Logger:
|
|
36
|
-
"""Creates a default logger with debug mode enabled.
|
|
37
|
-
|
|
38
|
-
Returns:
|
|
39
|
-
logging.Logger:
|
|
40
|
-
Returns an instance of the ``Logger`` object.
|
|
41
|
-
"""
|
|
42
|
-
logger = logging.getLogger(__name__)
|
|
43
|
-
logger.addHandler(hdlr=default_handler())
|
|
44
|
-
logger.setLevel(level=logging.DEBUG)
|
|
45
|
-
return logger
|
s3/uploader.py
DELETED
|
@@ -1,264 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import os
|
|
3
|
-
import time
|
|
4
|
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
|
-
from typing import Dict
|
|
6
|
-
|
|
7
|
-
import boto3.resources.factory
|
|
8
|
-
from botocore.config import Config
|
|
9
|
-
from botocore.exceptions import ClientError
|
|
10
|
-
from tqdm import tqdm
|
|
11
|
-
|
|
12
|
-
from s3.exceptions import BucketNotFound
|
|
13
|
-
from s3.logger import default_logger
|
|
14
|
-
from s3.utils import UploadResults, convert_to_folder_structure, getenv, urljoin
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class Uploader:
|
|
18
|
-
"""Initiates Uploader object to upload entire directory to S3.
|
|
19
|
-
|
|
20
|
-
>>> Uploader
|
|
21
|
-
|
|
22
|
-
"""
|
|
23
|
-
|
|
24
|
-
RETRY_CONFIG: Config = Config(retries={"max_attempts": 10, "mode": "standard"})
|
|
25
|
-
|
|
26
|
-
def __init__(
|
|
27
|
-
self,
|
|
28
|
-
bucket_name: str,
|
|
29
|
-
upload_dir: str,
|
|
30
|
-
s3_prefix: str = None,
|
|
31
|
-
exclude_path: str = None,
|
|
32
|
-
overwrite: bool = False,
|
|
33
|
-
region_name: str = None,
|
|
34
|
-
profile_name: str = None,
|
|
35
|
-
aws_access_key_id: str = None,
|
|
36
|
-
aws_secret_access_key: str = None,
|
|
37
|
-
logger: logging.Logger = None,
|
|
38
|
-
):
|
|
39
|
-
"""Initiates all the necessary args and creates a boto3 session with retry logic.
|
|
40
|
-
|
|
41
|
-
Args:
|
|
42
|
-
bucket_name: Name of the bucket.
|
|
43
|
-
upload_dir: Full path of the directory to be uploaded.
|
|
44
|
-
s3_prefix: Particular bucket prefix within which the upload should happen.
|
|
45
|
-
exclude_path: Full directory path to exclude from S3 object prefix.
|
|
46
|
-
overwrite: Boolean flag to overwrite files in S3.
|
|
47
|
-
region_name: Name of the AWS region.
|
|
48
|
-
profile_name: AWS profile name.
|
|
49
|
-
aws_access_key_id: AWS access key ID.
|
|
50
|
-
aws_secret_access_key: AWS secret access key.
|
|
51
|
-
logger: Bring your own logger.
|
|
52
|
-
|
|
53
|
-
See Also:
|
|
54
|
-
exclude_path:
|
|
55
|
-
When upload directory is "/home/ubuntu/Desktop/S3Upload", each file will naturally have the full prefix.
|
|
56
|
-
However, this behavior can be avoided by specifying the ``exclude_path`` parameter.
|
|
57
|
-
|
|
58
|
-
If exclude_path is set to: ``/home/ubuntu/Desktop``, then the file path
|
|
59
|
-
``/home/ubuntu/Desktop/S3Upload/sub-dir/photo.jpg`` will be uploaded as ``S3Upload/sub-dir/photo.jpg``
|
|
60
|
-
|
|
61
|
-
s3_prefix:
|
|
62
|
-
If provided, ``s3_prefix`` will always be attached to each object.
|
|
63
|
-
|
|
64
|
-
If ``s3_prefix`` is set to: ``2025``, then the file path
|
|
65
|
-
``/home/ubuntu/Desktop/S3Upload/sub/photo.jpg`` will be uploaded as ``2025/S3Upload/sub/photo.jpg``
|
|
66
|
-
"""
|
|
67
|
-
self.session = boto3.Session(
|
|
68
|
-
profile_name=profile_name or getenv("PROFILE_NAME"),
|
|
69
|
-
region_name=region_name or getenv("AWS_DEFAULT_REGION"),
|
|
70
|
-
aws_access_key_id=aws_access_key_id or getenv("AWS_ACCESS_KEY_ID"),
|
|
71
|
-
aws_secret_access_key=aws_secret_access_key or getenv("AWS_SECRET_ACCESS_KEY"),
|
|
72
|
-
)
|
|
73
|
-
self.s3 = self.session.resource(service_name="s3", config=self.RETRY_CONFIG)
|
|
74
|
-
|
|
75
|
-
self.logger = logger or default_logger()
|
|
76
|
-
|
|
77
|
-
self.bucket_name = bucket_name
|
|
78
|
-
self.upload_dir = upload_dir or getenv("UPLOAD_DIR", "UPLOAD_SOURCE")
|
|
79
|
-
self.s3_prefix = s3_prefix
|
|
80
|
-
self.exclude_path = exclude_path
|
|
81
|
-
self.overwrite = overwrite
|
|
82
|
-
|
|
83
|
-
self.results = UploadResults()
|
|
84
|
-
self.start = time.time()
|
|
85
|
-
|
|
86
|
-
# noinspection PyUnresolvedReferences
|
|
87
|
-
self.bucket: boto3.resources.factory.s3.Bucket = None
|
|
88
|
-
# noinspection PyUnresolvedReferences
|
|
89
|
-
self.bucket_objects: boto3.resources.factory.s3.ObjectSummary = []
|
|
90
|
-
self.object_size_map: Dict[str, int] = {}
|
|
91
|
-
|
|
92
|
-
def init(self) -> None:
|
|
93
|
-
"""Instantiates the bucket instance.
|
|
94
|
-
|
|
95
|
-
Raises:
|
|
96
|
-
ValueError: If no bucket name was passed.
|
|
97
|
-
BucketNotFound: If bucket name was not found.
|
|
98
|
-
"""
|
|
99
|
-
self.start = time.time()
|
|
100
|
-
if self.exclude_path and self.exclude_path not in self.upload_dir:
|
|
101
|
-
raise ValueError(
|
|
102
|
-
f"\n\n\tStart folder {self.exclude_path!r} is not a part of upload directory {self.upload_dir!r}"
|
|
103
|
-
)
|
|
104
|
-
if not self.upload_dir:
|
|
105
|
-
raise ValueError("\n\n\tCannot proceed without an upload directory.")
|
|
106
|
-
try:
|
|
107
|
-
assert os.path.exists(self.upload_dir)
|
|
108
|
-
except AssertionError:
|
|
109
|
-
raise ValueError(f"\n\n\tPath not found: {self.upload_dir}")
|
|
110
|
-
buckets = [bucket.name for bucket in self.s3.buckets.all()]
|
|
111
|
-
if not self.bucket_name:
|
|
112
|
-
raise ValueError(f"\n\n\tCannot proceed without a bucket name.\n\tAvailable: {buckets}")
|
|
113
|
-
_account_id, _alias = self.session.resource(service_name="iam").CurrentUser().arn.split("/")
|
|
114
|
-
if self.bucket_name not in buckets:
|
|
115
|
-
raise BucketNotFound(f"\n\n\t{self.bucket_name} was not found in {_alias} account.\n\tAvailable: {buckets}")
|
|
116
|
-
self.upload_dir = os.path.abspath(self.upload_dir)
|
|
117
|
-
# noinspection PyUnresolvedReferences
|
|
118
|
-
self.bucket: boto3.resources.factory.s3.Bucket = self.s3.Bucket(self.bucket_name)
|
|
119
|
-
# noinspection PyUnresolvedReferences
|
|
120
|
-
self.bucket_objects: boto3.resources.factory.s3.ObjectSummary = [obj for obj in self.bucket.objects.all()]
|
|
121
|
-
self.object_size_map = {obj.key: obj.size for obj in self.bucket_objects}
|
|
122
|
-
|
|
123
|
-
def exit(self) -> None:
|
|
124
|
-
"""Exits after printing results, and run time."""
|
|
125
|
-
total = self.results.success + self.results.failed
|
|
126
|
-
self.logger.info(
|
|
127
|
-
"Total number of uploads: %d, success: %d, failed: %d", total, self.results.success, self.results.failed
|
|
128
|
-
)
|
|
129
|
-
self.logger.info("Run Time: %.2fs", time.time() - self.start)
|
|
130
|
-
|
|
131
|
-
def _proceed_to_upload(self, filepath: str, objectpath: str) -> bool:
|
|
132
|
-
"""Compares file size if the object already exists in S3.
|
|
133
|
-
|
|
134
|
-
Args:
|
|
135
|
-
filepath: Source filepath.
|
|
136
|
-
objectpath: S3 object path.
|
|
137
|
-
|
|
138
|
-
Returns:
|
|
139
|
-
bool:
|
|
140
|
-
Returns a boolean flag to indicate upload flag.
|
|
141
|
-
"""
|
|
142
|
-
if self.overwrite:
|
|
143
|
-
return True
|
|
144
|
-
# Indicates that the object path already exists in S3
|
|
145
|
-
if object_size := self.object_size_map.get(objectpath):
|
|
146
|
-
try:
|
|
147
|
-
file_size = os.path.getsize(filepath)
|
|
148
|
-
except (OSError, PermissionError) as error:
|
|
149
|
-
self.logger.error(error)
|
|
150
|
-
return True
|
|
151
|
-
if object_size == file_size:
|
|
152
|
-
self.logger.info("S3 object %s exists, and size [%d] matches, skipping..", objectpath, object_size)
|
|
153
|
-
return False
|
|
154
|
-
self.logger.info(
|
|
155
|
-
"S3 object %s exists, but size mismatch. Local: [%d], S3: [%d]", objectpath, file_size, object_size
|
|
156
|
-
)
|
|
157
|
-
return True
|
|
158
|
-
|
|
159
|
-
def _uploader(self, filepath: str, objectpath: str) -> None:
|
|
160
|
-
"""Uploads the filepath to the specified S3 bucket.
|
|
161
|
-
|
|
162
|
-
Args:
|
|
163
|
-
filepath: Filepath to upload.
|
|
164
|
-
objectpath: Object path ref in S3.
|
|
165
|
-
"""
|
|
166
|
-
if self._proceed_to_upload(filepath, objectpath):
|
|
167
|
-
self.bucket.upload_file(filepath, objectpath)
|
|
168
|
-
|
|
169
|
-
def _get_files(self) -> Dict[str, str]:
|
|
170
|
-
"""Get a mapping for all the file path and object paths in upload directory.
|
|
171
|
-
|
|
172
|
-
Returns:
|
|
173
|
-
Dict[str, str]:
|
|
174
|
-
Returns a key-value pair of filepath and objectpath.
|
|
175
|
-
"""
|
|
176
|
-
files_to_upload = {}
|
|
177
|
-
for __path, __directory, __files in os.walk(self.upload_dir):
|
|
178
|
-
for file_ in __files:
|
|
179
|
-
file_path = os.path.join(__path, file_)
|
|
180
|
-
if self.exclude_path:
|
|
181
|
-
relative_path = file_path.replace(self.exclude_path, "")
|
|
182
|
-
else:
|
|
183
|
-
relative_path = file_path
|
|
184
|
-
# Lists in python are ordered, so s3 prefix will get loaded first when provided
|
|
185
|
-
url_parts = []
|
|
186
|
-
if self.s3_prefix:
|
|
187
|
-
url_parts.extend(
|
|
188
|
-
self.s3_prefix.split(os.sep) if os.sep in self.s3_prefix else self.s3_prefix.split("/")
|
|
189
|
-
)
|
|
190
|
-
# Add rest of the file path to parts before normalizing as an S3 object URL
|
|
191
|
-
url_parts.extend(relative_path.split(os.sep))
|
|
192
|
-
# Remove falsy values using filter - "None", "bool", "len" or "lambda item: item"
|
|
193
|
-
object_path = urljoin(*filter(None, url_parts))
|
|
194
|
-
files_to_upload[file_path] = object_path
|
|
195
|
-
return files_to_upload
|
|
196
|
-
|
|
197
|
-
def run(self) -> None:
|
|
198
|
-
"""Initiates object upload in a traditional loop."""
|
|
199
|
-
self.init()
|
|
200
|
-
keys = self._get_files()
|
|
201
|
-
self.logger.debug(keys)
|
|
202
|
-
self.logger.info("%d files from '%s' will be uploaded to '%s'", len(keys), self.upload_dir, self.bucket_name)
|
|
203
|
-
self.logger.info("Initiating upload process.")
|
|
204
|
-
for objectpath, filepath in tqdm(
|
|
205
|
-
keys.items(), total=len(keys), unit="file", leave=True, desc=f"Uploading files from {self.upload_dir}"
|
|
206
|
-
):
|
|
207
|
-
try:
|
|
208
|
-
self._uploader(filepath=filepath, objectpath=objectpath)
|
|
209
|
-
self.results.success += 1
|
|
210
|
-
except ClientError as error:
|
|
211
|
-
self.logger.error(error)
|
|
212
|
-
self.results.failed += 1
|
|
213
|
-
self.exit()
|
|
214
|
-
|
|
215
|
-
def run_in_parallel(self, max_workers: int = 5) -> None:
|
|
216
|
-
"""Initiates upload in multi-threading.
|
|
217
|
-
|
|
218
|
-
Args:
|
|
219
|
-
max_workers: Number of maximum threads to use.
|
|
220
|
-
"""
|
|
221
|
-
self.init()
|
|
222
|
-
keys = self._get_files()
|
|
223
|
-
self.logger.debug(keys)
|
|
224
|
-
self.logger.info(
|
|
225
|
-
"%d files from '%s' will be uploaded to '%s' with maximum concurrency of: %d",
|
|
226
|
-
len(keys),
|
|
227
|
-
self.upload_dir,
|
|
228
|
-
self.bucket_name,
|
|
229
|
-
max_workers,
|
|
230
|
-
)
|
|
231
|
-
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
232
|
-
futures = [
|
|
233
|
-
executor.submit(self._uploader, **dict(filepath=filepath, objectpath=objectpath))
|
|
234
|
-
for filepath, objectpath in keys.items()
|
|
235
|
-
]
|
|
236
|
-
for future in tqdm(
|
|
237
|
-
iterable=as_completed(futures),
|
|
238
|
-
total=len(futures),
|
|
239
|
-
desc=f"Uploading files to {self.bucket_name}",
|
|
240
|
-
unit="files",
|
|
241
|
-
leave=True,
|
|
242
|
-
):
|
|
243
|
-
try:
|
|
244
|
-
future.result()
|
|
245
|
-
self.results.success += 1
|
|
246
|
-
except ClientError as error:
|
|
247
|
-
self.logger.error(f"Upload failed: {error}")
|
|
248
|
-
self.results.failed += 1
|
|
249
|
-
self.exit()
|
|
250
|
-
|
|
251
|
-
def get_bucket_structure(self) -> str:
|
|
252
|
-
"""Gets all the objects in an S3 bucket and forms it into a hierarchical folder like representation.
|
|
253
|
-
|
|
254
|
-
Returns:
|
|
255
|
-
str:
|
|
256
|
-
Returns a hierarchical folder like representation of the chosen bucket.
|
|
257
|
-
"""
|
|
258
|
-
self.init()
|
|
259
|
-
# Using list and set will yield the same results but using set we can isolate directories from files
|
|
260
|
-
return convert_to_folder_structure(set(obj.key for obj in self.bucket_objects))
|
|
261
|
-
|
|
262
|
-
def print_bucket_structure(self) -> None:
|
|
263
|
-
"""Prints all the objects in an S3 bucket with a folder like representation."""
|
|
264
|
-
print(self.get_bucket_structure())
|