PyS3Uploader 0.2.0__py3-none-any.whl → 0.4.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of PyS3Uploader might be problematic. Click here for more details.

pys3uploader/utils.py ADDED
@@ -0,0 +1,194 @@
1
+ import math
2
+ import os
3
+ from typing import Dict, Set, List
4
+
5
+ from botocore.config import Config
6
+
7
+ RETRY_CONFIG: Config = Config(
8
+ retries={
9
+ "max_attempts": 10,
10
+ "mode": "adaptive", # Adaptive retry mode with jitter
11
+ "total_max_attempts": 20, # Max retries across all requests
12
+ },
13
+ # Adding custom timeouts here:
14
+ connect_timeout=5, # 5 seconds for establishing a connection
15
+ read_timeout=30, # 30 seconds to wait for a response from the server
16
+ )
17
+
18
+
19
+ class UploadResults(dict):
20
+ """Object to store results of S3 upload.
21
+
22
+ >>> UploadResults
23
+
24
+ """
25
+
26
+ success: List[str] = []
27
+ failed: List[str] = []
28
+
29
+
30
+ def getenv(*args, default: str = None) -> str:
31
+ """Returns the key-ed environment variable or the default value.
32
+
33
+ Args:
34
+ args: Environment variable keys to search for.
35
+ default: Default value to return if no environment variable is found.
36
+
37
+ Returns:
38
+ str:
39
+ Environment variable value or the default value.
40
+ """
41
+ for key in args:
42
+ if value := os.environ.get(key.upper()) or os.environ.get(key.lower()):
43
+ return value
44
+ return default
45
+
46
+
47
+ def urljoin(*args) -> str:
48
+ """Joins given arguments into a url. Trailing but not leading slashes are stripped for each argument.
49
+
50
+ Args:
51
+ args: Parts of the url to join.
52
+
53
+ Returns:
54
+ str:
55
+ Joined url.
56
+ """
57
+ return "/".join(map(lambda x: str(x).rstrip("/").lstrip("/"), args))
58
+
59
+
60
+ def convert_to_folder_structure(sequence: Set[str]) -> str:
61
+ """Convert objects in a s3 buckets into a folder like representation.
62
+
63
+ Args:
64
+ sequence: Takes either a mutable or immutable sequence as an argument.
65
+
66
+ Returns:
67
+ str:
68
+ String representation of the architecture.
69
+ """
70
+ folder_structure = {}
71
+ for item in sequence:
72
+ parts = item.split("/")
73
+ current_level = folder_structure
74
+ for part in parts:
75
+ current_level = current_level.setdefault(part, {})
76
+
77
+ def generate_folder_structure(structure: Dict[str, dict], indent: str = "") -> str:
78
+ """Generates the folder like structure.
79
+
80
+ Args:
81
+ structure: Structure of folder objects as key-value pairs.
82
+ indent: Required indentation for the ASCII.
83
+
84
+ Returns:
85
+ str:
86
+ String representation of the folder structure.
87
+ """
88
+ result = ""
89
+ for i, (key, value) in enumerate(structure.items()):
90
+ if i == len(structure) - 1:
91
+ result += indent + "└── " + key + "\n"
92
+ sub_indent = indent + " "
93
+ else:
94
+ result += indent + "├── " + key + "\n"
95
+ sub_indent = indent + "│ "
96
+ if value:
97
+ result += generate_folder_structure(value, sub_indent)
98
+ return result
99
+
100
+ return generate_folder_structure(folder_structure)
101
+
102
+
103
+ def convert_seconds(seconds: int | float, n_elem: int = 2) -> str:
104
+ """Calculate years, months, days, hours, minutes, seconds, and milliseconds from given input.
105
+
106
+ Args:
107
+ seconds: Number of seconds to convert (supports float values).
108
+ n_elem: Number of elements required from the converted list.
109
+
110
+ Returns:
111
+ str:
112
+ Returns a humanized string notion of the number of seconds.
113
+ """
114
+ if not seconds:
115
+ return "0s"
116
+ elif seconds < 1:
117
+ return f"{seconds * 1000:.0f}ms"
118
+
119
+ seconds_in_year = 365 * 24 * 3600
120
+ seconds_in_month = 30 * 24 * 3600
121
+
122
+ years = seconds // seconds_in_year
123
+ seconds %= seconds_in_year
124
+
125
+ months = seconds // seconds_in_month
126
+ seconds %= seconds_in_month
127
+
128
+ days = seconds // (24 * 3600)
129
+ seconds %= 24 * 3600
130
+
131
+ hours = seconds // 3600
132
+ seconds %= 3600
133
+
134
+ minutes = seconds // 60
135
+ seconds %= 60
136
+
137
+ milliseconds = round((seconds % 1) * 1000)
138
+ seconds = int(seconds) # Convert remaining seconds to int for display
139
+
140
+ time_parts = []
141
+
142
+ if years > 0:
143
+ time_parts.append(f"{int(years)} year{'s' if years > 1 else ''}")
144
+ if months > 0:
145
+ time_parts.append(f"{int(months)} month{'s' if months > 1 else ''}")
146
+ if days > 0:
147
+ time_parts.append(f"{int(days)} day{'s' if days > 1 else ''}")
148
+ if hours > 0:
149
+ time_parts.append(f"{int(hours)} hour{'s' if hours > 1 else ''}")
150
+ if minutes > 0:
151
+ time_parts.append(f"{int(minutes)} minute{'s' if minutes > 1 else ''}")
152
+ if seconds > 0 or milliseconds > 0:
153
+ if seconds > 0 and milliseconds > 0:
154
+ time_parts.append(f"{seconds + milliseconds / 1000:.1f}s")
155
+ elif seconds > 0:
156
+ time_parts.append(f"{seconds}s")
157
+ else:
158
+ time_parts.append(f"{milliseconds}ms")
159
+
160
+ if len(time_parts) == 1:
161
+ return time_parts[0]
162
+
163
+ list_ = time_parts[:n_elem]
164
+ return ", and ".join([", ".join(list_[:-1]), list_[-1]] if len(list_) > 2 else list_)
165
+
166
+
167
+ def format_nos(input_: float) -> int | float:
168
+ """Removes ``.0`` float values.
169
+
170
+ Args:
171
+ input_: Strings or integers with ``.0`` at the end.
172
+
173
+ Returns:
174
+ int | float:
175
+ Int if found, else returns the received float value.
176
+ """
177
+ return int(input_) if isinstance(input_, float) and input_.is_integer() else input_
178
+
179
+
180
+ def size_converter(byte_size: int | float) -> str:
181
+ """Gets the current memory consumed and converts it to human friendly format.
182
+
183
+ Args:
184
+ byte_size: Receives byte size as argument.
185
+
186
+ Returns:
187
+ str:
188
+ Converted understandable size.
189
+ """
190
+ if not byte_size:
191
+ return "0 B"
192
+ size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
193
+ index = int(math.floor(math.log(byte_size, 1024)))
194
+ return f"{format_nos(round(byte_size / pow(1024, index), 2))} {size_name[index]}"
@@ -0,0 +1 @@
1
+ version = "0.4.0a1"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: PyS3Uploader
3
- Version: 0.2.0
3
+ Version: 0.4.0a1
4
4
  Summary: Python module to upload objects to an S3 bucket.
5
5
  Author-email: Vignesh Rao <svignesh1793@gmail.com>
6
6
  License: MIT License
@@ -29,7 +29,7 @@ Project-URL: Homepage, https://github.com/thevickypedia/PyS3Uploader
29
29
  Project-URL: Docs, https://thevickypedia.github.io/PyS3Uploader/
30
30
  Project-URL: Source, https://github.com/thevickypedia/PyS3Uploader
31
31
  Project-URL: Bug Tracker, https://github.com/thevickypedia/PyS3Uploader/issues
32
- Keywords: s3
32
+ Keywords: pys3uploader
33
33
  Classifier: Development Status :: 1 - Planning
34
34
  Classifier: Intended Audience :: Information Technology
35
35
  Classifier: Operating System :: OS Independent
@@ -39,8 +39,9 @@ Classifier: Topic :: Internet :: File Transfer Protocol (FTP)
39
39
  Requires-Python: >=3.11
40
40
  Description-Content-Type: text/markdown
41
41
  License-File: LICENSE
42
+ Requires-Dist: alive-progress==3.3.*
42
43
  Requires-Dist: boto3==1.40.*
43
- Requires-Dist: tqdm==4.67.*
44
+ Requires-Dist: python-dotenv==1.1.*
44
45
  Provides-Extra: dev
45
46
  Requires-Dist: sphinx==5.1.1; extra == "dev"
46
47
  Requires-Dist: pre-commit; extra == "dev"
@@ -75,6 +76,43 @@ Requires-Dist: recommonmark; extra == "dev"
75
76
  # PyS3Uploader
76
77
  Python module to upload an entire directory to an S3 bucket.
77
78
 
79
+ <details>
80
+ <summary><strong>Bucket Policy Required</strong></summary>
81
+
82
+ ```json
83
+ {
84
+ "Version": "2012-10-17",
85
+ "Statement": [
86
+ {
87
+ "Sid": "ListBucketsForExistenceCheck",
88
+ "Effect": "Allow",
89
+ "Action": "s3:ListAllMyBuckets",
90
+ "Resource": "*"
91
+ },
92
+ {
93
+ "Sid": "ListAndUploadToSpecificBucket",
94
+ "Effect": "Allow",
95
+ "Action": [
96
+ "s3:ListBucket",
97
+ "s3:ListBucketMultipartUploads"
98
+ ],
99
+ "Resource": "arn:aws:s3:::bucketname"
100
+ },
101
+ {
102
+ "Sid": "UploadObjectsToBucket",
103
+ "Effect": "Allow",
104
+ "Action": [
105
+ "s3:PutObject",
106
+ "s3:AbortMultipartUpload",
107
+ "s3:ListMultipartUploadParts"
108
+ ],
109
+ "Resource": "arn:aws:s3:::bucketname/*"
110
+ }
111
+ ]
112
+ }
113
+ ```
114
+ </details>
115
+
78
116
  ### Installation
79
117
  ```shell
80
118
  pip install PyS3Uploader
@@ -84,26 +122,26 @@ pip install PyS3Uploader
84
122
 
85
123
  ##### Upload objects in parallel
86
124
  ```python
87
- import s3
125
+ import pys3uploader
88
126
 
89
127
  if __name__ == '__main__':
90
- wrapper = s3.Uploader(
128
+ wrapper = pys3uploader.Uploader(
91
129
  bucket_name="BUCKET_NAME",
92
130
  upload_dir="FULL_PATH_TO_UPLOAD",
93
- exclude_path="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
131
+ exclude_prefix="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
94
132
  )
95
133
  wrapper.run_in_parallel()
96
134
  ```
97
135
 
98
136
  ##### Upload objects in sequence
99
137
  ```python
100
- import s3
138
+ import pys3uploader
101
139
 
102
140
  if __name__ == '__main__':
103
- wrapper = s3.Uploader(
141
+ wrapper = pys3uploader.Uploader(
104
142
  bucket_name="BUCKET_NAME",
105
143
  upload_dir="FULL_PATH_TO_UPLOAD",
106
- exclude_path="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
144
+ exclude_prefix="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
107
145
  )
108
146
  wrapper.run()
109
147
  ```
@@ -114,8 +152,15 @@ if __name__ == '__main__':
114
152
 
115
153
  #### Optional kwargs
116
154
  - **s3_prefix** - S3 object prefix for each file. Defaults to ``None``
117
- - **exclude_path** - Path in ``upload_dir`` that has to be excluded in object keys. Defaults to `None`
155
+ - **exclude_prefix** - Path in ``upload_dir`` that has to be excluded in object keys. Defaults to `None`
156
+ - **skip_dot_files** - Boolean flag to skip dot files. Defaults to ``True``
157
+ - **overwrite** - Boolean flag to overwrite files present in S3. Defaults to ``False``
158
+ - **file_exclusion** - Sequence of files to exclude during upload. Defaults to ``None``
159
+ - **folder_exclusion** - Sequence of directories to exclude during upload. Defaults to ``None``
118
160
  - **logger** - Bring your own custom pre-configured logger. Defaults to on-screen logging.
161
+ - **log_handler** - Choose between `stdout` vs `file` logging. Defaults to `pys3uploader.LogHandler.stdout`
162
+ - **log_level** - Choose the logging level. Defaults to `pys3uploader.LogLevel.debug`
163
+ - **env_file** – Path to a `.env` file for loading environment variables. Defaults to scanning the current directory.
119
164
  <br><br>
120
165
  - **region_name** - AWS region name. Defaults to the env var `AWS_DEFAULT_REGION`
121
166
  - **profile_name** - AWS profile name. Defaults to the env var `PROFILE_NAME`
@@ -0,0 +1,15 @@
1
+ pys3uploader/__init__.py,sha256=EqMScWbJNV4UWeMg4fMko2KB18xL2CO3a3o_od0H0Lc,124
2
+ pys3uploader/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
3
+ pys3uploader/logger.py,sha256=z9JEnyf4nHIakey0bAaCgEN7oXOYJYOpskZyM_4s-D4,2678
4
+ pys3uploader/metadata.py,sha256=tOOoLh2vISfH-GfH3yBcA_xtEjRwomaw7sCLEaDRK-8,230
5
+ pys3uploader/progress.py,sha256=IladNMXLBhkPpxOntpANTam_hC9OWosmNDmdbweDNYM,1195
6
+ pys3uploader/timer.py,sha256=qN2XNrGEyP3stsK3McvhE3VvIiUFh7mv4rbp5WDeyVU,1498
7
+ pys3uploader/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
8
+ pys3uploader/uploader.py,sha256=h5DYQA2yv0fQ2SSyAnAl8SsgJUajmN_o1PdMSqMbACM,18588
9
+ pys3uploader/utils.py,sha256=_2RYKUTyrQzwkxo7fSiLb5ASrpjcNpb3kZHqy_wByRk,5755
10
+ pys3uploader/version.py,sha256=VAwBBgd_skAqJS9UL1T_xDXryTqN5m58fbTTEXcKxgM,20
11
+ pys3uploader-0.4.0a1.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
12
+ pys3uploader-0.4.0a1.dist-info/METADATA,sha256=FdJdNSesnP1xHfb4il5HBw1pxsPn7ToAYkQ_T3PrIb0,8959
13
+ pys3uploader-0.4.0a1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
14
+ pys3uploader-0.4.0a1.dist-info/top_level.txt,sha256=lVIFMMoUx7dj_myetBmOUQTJiOzz5VyDqchnQElmrWw,13
15
+ pys3uploader-0.4.0a1.dist-info/RECORD,,
@@ -0,0 +1 @@
1
+ pys3uploader
@@ -1,11 +0,0 @@
1
- s3/__init__.py,sha256=yLvvl4-uTLZwhdhCMQpWq5juX_zFuYAfKSf4aB0WjZw,66
2
- s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
3
- s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
4
- s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
5
- s3/uploader.py,sha256=IAlFrEjfBuexrfmBPGN9OZAfHjQuwcGRzWi2es0r_fU,11154
6
- s3/utils.py,sha256=0kcG0aE2olHhC8thaUEwx2J8tOI2-2TGCk6E6U-PiKw,2058
7
- pys3uploader-0.2.0.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
8
- pys3uploader-0.2.0.dist-info/METADATA,sha256=IXSmHXJJndlnd_6MHlpZrcVILPni8VUbVNJYQEjMIR8,7286
9
- pys3uploader-0.2.0.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
10
- pys3uploader-0.2.0.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
11
- pys3uploader-0.2.0.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- s3
s3/__init__.py DELETED
@@ -1,3 +0,0 @@
1
- from s3.uploader import Uploader # noqa: F401
2
-
3
- version = "0.2.0"
s3/logger.py DELETED
@@ -1,45 +0,0 @@
1
- """Loads a default logger with StreamHandler set to DEBUG mode.
2
-
3
- >>> logging.Logger
4
-
5
- """
6
-
7
- import logging
8
-
9
-
10
- def default_handler() -> logging.StreamHandler:
11
- """Creates a ``StreamHandler`` and assigns a default format to it.
12
-
13
- Returns:
14
- logging.StreamHandler:
15
- Returns an instance of the ``StreamHandler`` object.
16
- """
17
- handler = logging.StreamHandler()
18
- handler.setFormatter(fmt=default_format())
19
- return handler
20
-
21
-
22
- def default_format() -> logging.Formatter:
23
- """Creates a logging ``Formatter`` with a custom message and datetime format.
24
-
25
- Returns:
26
- logging.Formatter:
27
- Returns an instance of the ``Formatter`` object.
28
- """
29
- return logging.Formatter(
30
- fmt="%(asctime)s - %(levelname)s - [%(module)s:%(lineno)d] - %(funcName)s - %(message)s",
31
- datefmt="%b-%d-%Y %I:%M:%S %p",
32
- )
33
-
34
-
35
- def default_logger() -> logging.Logger:
36
- """Creates a default logger with debug mode enabled.
37
-
38
- Returns:
39
- logging.Logger:
40
- Returns an instance of the ``Logger`` object.
41
- """
42
- logger = logging.getLogger(__name__)
43
- logger.addHandler(hdlr=default_handler())
44
- logger.setLevel(level=logging.DEBUG)
45
- return logger
s3/uploader.py DELETED
@@ -1,264 +0,0 @@
1
- import logging
2
- import os
3
- import time
4
- from concurrent.futures import ThreadPoolExecutor, as_completed
5
- from typing import Dict
6
-
7
- import boto3.resources.factory
8
- from botocore.config import Config
9
- from botocore.exceptions import ClientError
10
- from tqdm import tqdm
11
-
12
- from s3.exceptions import BucketNotFound
13
- from s3.logger import default_logger
14
- from s3.utils import UploadResults, convert_to_folder_structure, getenv, urljoin
15
-
16
-
17
- class Uploader:
18
- """Initiates Uploader object to upload entire directory to S3.
19
-
20
- >>> Uploader
21
-
22
- """
23
-
24
- RETRY_CONFIG: Config = Config(retries={"max_attempts": 10, "mode": "standard"})
25
-
26
- def __init__(
27
- self,
28
- bucket_name: str,
29
- upload_dir: str,
30
- s3_prefix: str = None,
31
- exclude_path: str = None,
32
- overwrite: bool = False,
33
- region_name: str = None,
34
- profile_name: str = None,
35
- aws_access_key_id: str = None,
36
- aws_secret_access_key: str = None,
37
- logger: logging.Logger = None,
38
- ):
39
- """Initiates all the necessary args and creates a boto3 session with retry logic.
40
-
41
- Args:
42
- bucket_name: Name of the bucket.
43
- upload_dir: Full path of the directory to be uploaded.
44
- s3_prefix: Particular bucket prefix within which the upload should happen.
45
- exclude_path: Full directory path to exclude from S3 object prefix.
46
- overwrite: Boolean flag to overwrite files in S3.
47
- region_name: Name of the AWS region.
48
- profile_name: AWS profile name.
49
- aws_access_key_id: AWS access key ID.
50
- aws_secret_access_key: AWS secret access key.
51
- logger: Bring your own logger.
52
-
53
- See Also:
54
- exclude_path:
55
- When upload directory is "/home/ubuntu/Desktop/S3Upload", each file will naturally have the full prefix.
56
- However, this behavior can be avoided by specifying the ``exclude_path`` parameter.
57
-
58
- If exclude_path is set to: ``/home/ubuntu/Desktop``, then the file path
59
- ``/home/ubuntu/Desktop/S3Upload/sub-dir/photo.jpg`` will be uploaded as ``S3Upload/sub-dir/photo.jpg``
60
-
61
- s3_prefix:
62
- If provided, ``s3_prefix`` will always be attached to each object.
63
-
64
- If ``s3_prefix`` is set to: ``2025``, then the file path
65
- ``/home/ubuntu/Desktop/S3Upload/sub/photo.jpg`` will be uploaded as ``2025/S3Upload/sub/photo.jpg``
66
- """
67
- self.session = boto3.Session(
68
- profile_name=profile_name or getenv("PROFILE_NAME"),
69
- region_name=region_name or getenv("AWS_DEFAULT_REGION"),
70
- aws_access_key_id=aws_access_key_id or getenv("AWS_ACCESS_KEY_ID"),
71
- aws_secret_access_key=aws_secret_access_key or getenv("AWS_SECRET_ACCESS_KEY"),
72
- )
73
- self.s3 = self.session.resource(service_name="s3", config=self.RETRY_CONFIG)
74
-
75
- self.logger = logger or default_logger()
76
-
77
- self.bucket_name = bucket_name
78
- self.upload_dir = upload_dir or getenv("UPLOAD_DIR", "UPLOAD_SOURCE")
79
- self.s3_prefix = s3_prefix
80
- self.exclude_path = exclude_path
81
- self.overwrite = overwrite
82
-
83
- self.results = UploadResults()
84
- self.start = time.time()
85
-
86
- # noinspection PyUnresolvedReferences
87
- self.bucket: boto3.resources.factory.s3.Bucket = None
88
- # noinspection PyUnresolvedReferences
89
- self.bucket_objects: boto3.resources.factory.s3.ObjectSummary = []
90
- self.object_size_map: Dict[str, int] = {}
91
-
92
- def init(self) -> None:
93
- """Instantiates the bucket instance.
94
-
95
- Raises:
96
- ValueError: If no bucket name was passed.
97
- BucketNotFound: If bucket name was not found.
98
- """
99
- self.start = time.time()
100
- if self.exclude_path and self.exclude_path not in self.upload_dir:
101
- raise ValueError(
102
- f"\n\n\tStart folder {self.exclude_path!r} is not a part of upload directory {self.upload_dir!r}"
103
- )
104
- if not self.upload_dir:
105
- raise ValueError("\n\n\tCannot proceed without an upload directory.")
106
- try:
107
- assert os.path.exists(self.upload_dir)
108
- except AssertionError:
109
- raise ValueError(f"\n\n\tPath not found: {self.upload_dir}")
110
- buckets = [bucket.name for bucket in self.s3.buckets.all()]
111
- if not self.bucket_name:
112
- raise ValueError(f"\n\n\tCannot proceed without a bucket name.\n\tAvailable: {buckets}")
113
- _account_id, _alias = self.session.resource(service_name="iam").CurrentUser().arn.split("/")
114
- if self.bucket_name not in buckets:
115
- raise BucketNotFound(f"\n\n\t{self.bucket_name} was not found in {_alias} account.\n\tAvailable: {buckets}")
116
- self.upload_dir = os.path.abspath(self.upload_dir)
117
- # noinspection PyUnresolvedReferences
118
- self.bucket: boto3.resources.factory.s3.Bucket = self.s3.Bucket(self.bucket_name)
119
- # noinspection PyUnresolvedReferences
120
- self.bucket_objects: boto3.resources.factory.s3.ObjectSummary = [obj for obj in self.bucket.objects.all()]
121
- self.object_size_map = {obj.key: obj.size for obj in self.bucket_objects}
122
-
123
- def exit(self) -> None:
124
- """Exits after printing results, and run time."""
125
- total = self.results.success + self.results.failed
126
- self.logger.info(
127
- "Total number of uploads: %d, success: %d, failed: %d", total, self.results.success, self.results.failed
128
- )
129
- self.logger.info("Run Time: %.2fs", time.time() - self.start)
130
-
131
- def _proceed_to_upload(self, filepath: str, objectpath: str) -> bool:
132
- """Compares file size if the object already exists in S3.
133
-
134
- Args:
135
- filepath: Source filepath.
136
- objectpath: S3 object path.
137
-
138
- Returns:
139
- bool:
140
- Returns a boolean flag to indicate upload flag.
141
- """
142
- if self.overwrite:
143
- return True
144
- # Indicates that the object path already exists in S3
145
- if object_size := self.object_size_map.get(objectpath):
146
- try:
147
- file_size = os.path.getsize(filepath)
148
- except (OSError, PermissionError) as error:
149
- self.logger.error(error)
150
- return True
151
- if object_size == file_size:
152
- self.logger.info("S3 object %s exists, and size [%d] matches, skipping..", objectpath, object_size)
153
- return False
154
- self.logger.info(
155
- "S3 object %s exists, but size mismatch. Local: [%d], S3: [%d]", objectpath, file_size, object_size
156
- )
157
- return True
158
-
159
- def _uploader(self, filepath: str, objectpath: str) -> None:
160
- """Uploads the filepath to the specified S3 bucket.
161
-
162
- Args:
163
- filepath: Filepath to upload.
164
- objectpath: Object path ref in S3.
165
- """
166
- if self._proceed_to_upload(filepath, objectpath):
167
- self.bucket.upload_file(filepath, objectpath)
168
-
169
- def _get_files(self) -> Dict[str, str]:
170
- """Get a mapping for all the file path and object paths in upload directory.
171
-
172
- Returns:
173
- Dict[str, str]:
174
- Returns a key-value pair of filepath and objectpath.
175
- """
176
- files_to_upload = {}
177
- for __path, __directory, __files in os.walk(self.upload_dir):
178
- for file_ in __files:
179
- file_path = os.path.join(__path, file_)
180
- if self.exclude_path:
181
- relative_path = file_path.replace(self.exclude_path, "")
182
- else:
183
- relative_path = file_path
184
- # Lists in python are ordered, so s3 prefix will get loaded first when provided
185
- url_parts = []
186
- if self.s3_prefix:
187
- url_parts.extend(
188
- self.s3_prefix.split(os.sep) if os.sep in self.s3_prefix else self.s3_prefix.split("/")
189
- )
190
- # Add rest of the file path to parts before normalizing as an S3 object URL
191
- url_parts.extend(relative_path.split(os.sep))
192
- # Remove falsy values using filter - "None", "bool", "len" or "lambda item: item"
193
- object_path = urljoin(*filter(None, url_parts))
194
- files_to_upload[file_path] = object_path
195
- return files_to_upload
196
-
197
- def run(self) -> None:
198
- """Initiates object upload in a traditional loop."""
199
- self.init()
200
- keys = self._get_files()
201
- self.logger.debug(keys)
202
- self.logger.info("%d files from '%s' will be uploaded to '%s'", len(keys), self.upload_dir, self.bucket_name)
203
- self.logger.info("Initiating upload process.")
204
- for objectpath, filepath in tqdm(
205
- keys.items(), total=len(keys), unit="file", leave=True, desc=f"Uploading files from {self.upload_dir}"
206
- ):
207
- try:
208
- self._uploader(filepath=filepath, objectpath=objectpath)
209
- self.results.success += 1
210
- except ClientError as error:
211
- self.logger.error(error)
212
- self.results.failed += 1
213
- self.exit()
214
-
215
- def run_in_parallel(self, max_workers: int = 5) -> None:
216
- """Initiates upload in multi-threading.
217
-
218
- Args:
219
- max_workers: Number of maximum threads to use.
220
- """
221
- self.init()
222
- keys = self._get_files()
223
- self.logger.debug(keys)
224
- self.logger.info(
225
- "%d files from '%s' will be uploaded to '%s' with maximum concurrency of: %d",
226
- len(keys),
227
- self.upload_dir,
228
- self.bucket_name,
229
- max_workers,
230
- )
231
- with ThreadPoolExecutor(max_workers=max_workers) as executor:
232
- futures = [
233
- executor.submit(self._uploader, **dict(filepath=filepath, objectpath=objectpath))
234
- for filepath, objectpath in keys.items()
235
- ]
236
- for future in tqdm(
237
- iterable=as_completed(futures),
238
- total=len(futures),
239
- desc=f"Uploading files to {self.bucket_name}",
240
- unit="files",
241
- leave=True,
242
- ):
243
- try:
244
- future.result()
245
- self.results.success += 1
246
- except ClientError as error:
247
- self.logger.error(f"Upload failed: {error}")
248
- self.results.failed += 1
249
- self.exit()
250
-
251
- def get_bucket_structure(self) -> str:
252
- """Gets all the objects in an S3 bucket and forms it into a hierarchical folder like representation.
253
-
254
- Returns:
255
- str:
256
- Returns a hierarchical folder like representation of the chosen bucket.
257
- """
258
- self.init()
259
- # Using list and set will yield the same results but using set we can isolate directories from files
260
- return convert_to_folder_structure(set(obj.key for obj in self.bucket_objects))
261
-
262
- def print_bucket_structure(self) -> None:
263
- """Prints all the objects in an S3 bucket with a folder like representation."""
264
- print(self.get_bucket_structure())