PyS3Uploader 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: PyS3Uploader
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: Python module to upload objects to an S3 bucket.
5
5
  Author-email: Vignesh Rao <svignesh1793@gmail.com>
6
6
  License: MIT License
@@ -40,6 +40,7 @@ Requires-Python: >=3.11
40
40
  Description-Content-Type: text/markdown
41
41
  License-File: LICENSE
42
42
  Requires-Dist: boto3==1.40.*
43
+ Requires-Dist: python-dotenv==1.1.*
43
44
  Requires-Dist: tqdm==4.67.*
44
45
  Provides-Extra: dev
45
46
  Requires-Dist: sphinx==5.1.1; extra == "dev"
@@ -90,7 +91,7 @@ if __name__ == '__main__':
90
91
  wrapper = s3.Uploader(
91
92
  bucket_name="BUCKET_NAME",
92
93
  upload_dir="FULL_PATH_TO_UPLOAD",
93
- exclude_path="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
94
+ exclude_prefix="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
94
95
  )
95
96
  wrapper.run_in_parallel()
96
97
  ```
@@ -103,7 +104,7 @@ if __name__ == '__main__':
103
104
  wrapper = s3.Uploader(
104
105
  bucket_name="BUCKET_NAME",
105
106
  upload_dir="FULL_PATH_TO_UPLOAD",
106
- exclude_path="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
107
+ exclude_prefix="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
107
108
  )
108
109
  wrapper.run()
109
110
  ```
@@ -114,10 +115,13 @@ if __name__ == '__main__':
114
115
 
115
116
  #### Optional kwargs
116
117
  - **s3_prefix** - S3 object prefix for each file. Defaults to ``None``
117
- - **exclude_path** - Path in ``upload_dir`` that has to be excluded in object keys. Defaults to `None`
118
+ - **exclude_prefix** - Path in ``upload_dir`` that has to be excluded in object keys. Defaults to `None`
118
119
  - **skip_dot_files** - Boolean flag to skip dot files. Defaults to ``True``
119
120
  - **overwrite** - Boolean flag to overwrite files present in S3. Defaults to ``False``
121
+ - **file_exclusion** - Sequence of files to exclude during upload. Defaults to ``None``
122
+ - **folder_exclusion** - Sequence of directories to exclude during upload. Defaults to ``None``
120
123
  - **logger** - Bring your own custom pre-configured logger. Defaults to on-screen logging.
124
+ - **env_file** – Path to a `.env` file for loading environment variables. Defaults to scanning the current directory.
121
125
  <br><br>
122
126
  - **region_name** - AWS region name. Defaults to the env var `AWS_DEFAULT_REGION`
123
127
  - **profile_name** - AWS profile name. Defaults to the env var `PROFILE_NAME`
@@ -0,0 +1,11 @@
1
+ s3/__init__.py,sha256=aZ2woJ8TD2tgqXi0ElG-wWwJWoQLIdqTdm50FLaxL8w,66
2
+ s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
3
+ s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
4
+ s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
5
+ s3/uploader.py,sha256=KxrWbIInXxXQszP_uJLf_dBI5rUNjNnhco3gr9Vdrto,13767
6
+ s3/utils.py,sha256=NbF28CYviK_St5qd1EOumMVyus9BvQON7clUFeR_SEQ,4473
7
+ pys3uploader-0.2.3.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
8
+ pys3uploader-0.2.3.dist-info/METADATA,sha256=ae2lA8b7dsGWZSMfB8w4joDiAlaE6Wk1f3p1Fxywkc4,7795
9
+ pys3uploader-0.2.3.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
10
+ pys3uploader-0.2.3.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
11
+ pys3uploader-0.2.3.dist-info/RECORD,,
s3/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  from s3.uploader import Uploader # noqa: F401
2
2
 
3
- version = "0.2.1"
3
+ version = "0.2.3"
s3/uploader.py CHANGED
@@ -2,9 +2,10 @@ import logging
2
2
  import os
3
3
  import time
4
4
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
- from typing import Dict
5
+ from typing import Dict, Iterable
6
6
 
7
7
  import boto3.resources.factory
8
+ import dotenv
8
9
  from botocore.config import Config
9
10
  from botocore.exceptions import ClientError
10
11
  from tqdm import tqdm
@@ -14,6 +15,7 @@ from s3.logger import default_logger
14
15
  from s3.utils import (
15
16
  RETRY_CONFIG,
16
17
  UploadResults,
18
+ convert_seconds,
17
19
  convert_to_folder_structure,
18
20
  getenv,
19
21
  urljoin,
@@ -32,15 +34,18 @@ class Uploader:
32
34
  bucket_name: str,
33
35
  upload_dir: str,
34
36
  s3_prefix: str = None,
35
- exclude_path: str = None,
37
+ exclude_prefix: str = None,
36
38
  skip_dot_files: bool = True,
37
39
  overwrite: bool = False,
40
+ file_exclusion: Iterable[str] = None,
41
+ folder_exclusion: Iterable[str] = None,
38
42
  region_name: str = None,
39
43
  profile_name: str = None,
40
44
  aws_access_key_id: str = None,
41
45
  aws_secret_access_key: str = None,
42
46
  retry_config: Config = RETRY_CONFIG,
43
47
  logger: logging.Logger = None,
48
+ env_file: str = None,
44
49
  ):
45
50
  """Initiates all the necessary args and creates a boto3 session with retry logic.
46
51
 
@@ -48,14 +53,17 @@ class Uploader:
48
53
  bucket_name: Name of the bucket.
49
54
  upload_dir: Full path of the directory to be uploaded.
50
55
  s3_prefix: Particular bucket prefix within which the upload should happen.
51
- exclude_path: Full directory path to exclude from S3 object prefix.
56
+ exclude_prefix: Full directory path to exclude from S3 object prefix.
52
57
  skip_dot_files: Boolean flag to skip dot files.
53
58
  overwrite: Boolean flag to overwrite files in S3.
59
+ file_exclusion: Sequence of files to exclude during upload.
60
+ folder_exclusion: Sequence of directories to exclude during upload.
54
61
  region_name: Name of the AWS region.
55
62
  profile_name: AWS profile name.
56
63
  aws_access_key_id: AWS access key ID.
57
64
  aws_secret_access_key: AWS secret access key.
58
65
  logger: Bring your own logger.
66
+ env_file: Dotenv file (.env) filepath to load environment variables.
59
67
 
60
68
  See Also:
61
69
  s3_prefix:
@@ -64,29 +72,56 @@ class Uploader:
64
72
  If ``s3_prefix`` is set to: ``2025``, then the file path
65
73
  ``/home/ubuntu/Desktop/S3Upload/sub/photo.jpg`` will be uploaded as ``2025/S3Upload/sub/photo.jpg``
66
74
 
67
- exclude_path:
75
+ exclude_prefix:
68
76
  When upload directory is "/home/ubuntu/Desktop/S3Upload", each file will naturally have the full prefix.
69
- However, this behavior can be avoided by specifying the ``exclude_path`` parameter.
77
+ However, this behavior can be avoided by specifying the ``exclude_prefix`` parameter.
70
78
 
71
- If exclude_path is set to: ``/home/ubuntu/Desktop``, then the file path
79
+ If exclude_prefix is set to: ``/home/ubuntu/Desktop``, then the file path
72
80
  ``/home/ubuntu/Desktop/S3Upload/sub-dir/photo.jpg`` will be uploaded as ``S3Upload/sub-dir/photo.jpg``
81
+
82
+ env_file:
83
+ Environment variables can be loaded from a .env file.
84
+ The filepath can be set as ``env_file`` during object instantiation or as an environment variable.
85
+ If a filepath is provided, PyS3Uploader loads it directly or searches the root directory for the file.
86
+ If no filepath is provided, PyS3Uploader searches the current directory for a .env file.
73
87
  """
88
+ self.logger = logger or default_logger()
89
+ self.env_file = env_file or getenv("ENV_FILE", default=".env")
90
+
91
+ # Check for env_file in current working directory
92
+ if os.path.isfile(self.env_file):
93
+ self.logger.debug("Loading env file: %s", self.env_file)
94
+ dotenv.load_dotenv(dotenv_path=self.env_file, override=True)
95
+ # Find the env_file from root
96
+ elif env_file := dotenv.find_dotenv(self.env_file, raise_error_if_not_found=False):
97
+ self.logger.debug("Loading env file: %s", env_file)
98
+ dotenv.load_dotenv(dotenv_path=env_file, override=True)
99
+ else:
100
+ # Scan current working directory for any .env files
101
+ for file in os.listdir():
102
+ if file.endswith(".env"):
103
+ self.logger.debug("Loading env file: %s", file)
104
+ dotenv.load_dotenv(dotenv_path=file, override=True)
105
+ break
106
+ else:
107
+ self.logger.debug("No .env files found to load")
108
+
74
109
  self.session = boto3.Session(
75
- profile_name=profile_name or getenv("PROFILE_NAME"),
110
+ profile_name=profile_name or getenv("PROFILE_NAME", "AWS_PROFILE_NAME"),
76
111
  region_name=region_name or getenv("AWS_DEFAULT_REGION"),
77
112
  aws_access_key_id=aws_access_key_id or getenv("AWS_ACCESS_KEY_ID"),
78
113
  aws_secret_access_key=aws_secret_access_key or getenv("AWS_SECRET_ACCESS_KEY"),
79
114
  )
80
115
  self.s3 = self.session.resource(service_name="s3", config=retry_config)
81
116
 
82
- self.logger = logger or default_logger()
83
-
84
117
  self.bucket_name = bucket_name
85
- self.upload_dir = upload_dir or getenv("UPLOAD_DIR", "UPLOAD_SOURCE")
118
+ self.upload_dir = upload_dir
86
119
  self.s3_prefix = s3_prefix
87
- self.exclude_path = exclude_path
120
+ self.exclude_prefix = exclude_prefix
88
121
  self.skip_dot_files = skip_dot_files
89
122
  self.overwrite = overwrite
123
+ self.file_exclusion = file_exclusion or []
124
+ self.folder_exclusion = folder_exclusion or []
90
125
 
91
126
  self.results = UploadResults()
92
127
  self.start = time.time()
@@ -105,9 +140,9 @@ class Uploader:
105
140
  BucketNotFound: If bucket name was not found.
106
141
  """
107
142
  self.start = time.time()
108
- if self.exclude_path and self.exclude_path not in self.upload_dir:
143
+ if self.exclude_prefix and self.exclude_prefix not in self.upload_dir:
109
144
  raise ValueError(
110
- f"\n\n\tStart folder {self.exclude_path!r} is not a part of upload directory {self.upload_dir!r}"
145
+ f"\n\n\tStart folder {self.exclude_prefix!r} is not a part of upload directory {self.upload_dir!r}"
111
146
  )
112
147
  if not self.upload_dir:
113
148
  raise ValueError("\n\n\tCannot proceed without an upload directory.")
@@ -134,7 +169,7 @@ class Uploader:
134
169
  self.logger.info(
135
170
  "Total number of uploads: %d, success: %d, failed: %d", total, self.results.success, self.results.failed
136
171
  )
137
- self.logger.info("Run Time: %.2fs", time.time() - self.start)
172
+ self.logger.info("Run time: %s", convert_seconds(time.time() - self.start))
138
173
 
139
174
  def _proceed_to_upload(self, filepath: str, objectpath: str) -> bool:
140
175
  """Compares file size if the object already exists in S3.
@@ -183,13 +218,20 @@ class Uploader:
183
218
  """
184
219
  files_to_upload = {}
185
220
  for __path, __directory, __files in os.walk(self.upload_dir):
221
+ scan_dir = os.path.split(__path)[-1]
222
+ if scan_dir in self.folder_exclusion:
223
+ self.logger.info("Skipping '%s' honoring folder exclusion", scan_dir)
224
+ continue
186
225
  for file_ in __files:
226
+ if file_ in self.file_exclusion:
227
+ self.logger.info("Skipping '%s' honoring file exclusion", file_)
228
+ continue
187
229
  if self.skip_dot_files and file_.startswith("."):
188
230
  self.logger.info("Skipping dot file: %s", file_)
189
231
  continue
190
232
  file_path = os.path.join(__path, file_)
191
- if self.exclude_path:
192
- relative_path = file_path.replace(self.exclude_path, "")
233
+ if self.exclude_prefix:
234
+ relative_path = file_path.replace(self.exclude_prefix, "")
193
235
  else:
194
236
  relative_path = file_path
195
237
  # Lists in python are ordered, so s3 prefix will get loaded first when provided
@@ -212,7 +254,7 @@ class Uploader:
212
254
  self.logger.debug(keys)
213
255
  self.logger.info("%d files from '%s' will be uploaded to '%s'", len(keys), self.upload_dir, self.bucket_name)
214
256
  self.logger.info("Initiating upload process.")
215
- for objectpath, filepath in tqdm(
257
+ for filepath, objectpath in tqdm(
216
258
  keys.items(), total=len(keys), unit="file", leave=True, desc=f"Uploading files from {self.upload_dir}"
217
259
  ):
218
260
  try:
s3/utils.py CHANGED
@@ -3,18 +3,6 @@ from typing import Dict, Set
3
3
 
4
4
  from botocore.config import Config
5
5
 
6
-
7
- class UploadResults(dict):
8
- """Object to store results of S3 upload.
9
-
10
- >>> UploadResults
11
-
12
- """
13
-
14
- success: int = 0
15
- failed: int = 0
16
-
17
-
18
6
  RETRY_CONFIG: Config = Config(
19
7
  retries={
20
8
  "max_attempts": 10,
@@ -27,6 +15,17 @@ RETRY_CONFIG: Config = Config(
27
15
  )
28
16
 
29
17
 
18
+ class UploadResults(dict):
19
+ """Object to store results of S3 upload.
20
+
21
+ >>> UploadResults
22
+
23
+ """
24
+
25
+ success: int = 0
26
+ failed: int = 0
27
+
28
+
30
29
  def getenv(*args, default: str = None) -> str:
31
30
  """Returns the key-ed environment variable or the default value."""
32
31
  for key in args:
@@ -82,3 +81,67 @@ def convert_to_folder_structure(sequence: Set[str]) -> str:
82
81
  return result
83
82
 
84
83
  return generate_folder_structure(folder_structure)
84
+
85
+
86
+ def convert_seconds(seconds: int | float, n_elem: int = 2) -> str:
87
+ """Calculate years, months, days, hours, minutes, seconds, and milliseconds from given input.
88
+
89
+ Args:
90
+ seconds: Number of seconds to convert (supports float values).
91
+ n_elem: Number of elements required from the converted list.
92
+
93
+ Returns:
94
+ str:
95
+ Returns a humanized string notion of the number of seconds.
96
+ """
97
+ if not seconds:
98
+ return "0s"
99
+ elif seconds < 1:
100
+ return f"{seconds * 1000:.0f}ms"
101
+
102
+ seconds_in_year = 365 * 24 * 3600
103
+ seconds_in_month = 30 * 24 * 3600
104
+
105
+ years = seconds // seconds_in_year
106
+ seconds %= seconds_in_year
107
+
108
+ months = seconds // seconds_in_month
109
+ seconds %= seconds_in_month
110
+
111
+ days = seconds // (24 * 3600)
112
+ seconds %= 24 * 3600
113
+
114
+ hours = seconds // 3600
115
+ seconds %= 3600
116
+
117
+ minutes = seconds // 60
118
+ seconds %= 60
119
+
120
+ milliseconds = round((seconds % 1) * 1000)
121
+ seconds = int(seconds) # Convert remaining seconds to int for display
122
+
123
+ time_parts = []
124
+
125
+ if years > 0:
126
+ time_parts.append(f"{int(years)} year{'s' if years > 1 else ''}")
127
+ if months > 0:
128
+ time_parts.append(f"{int(months)} month{'s' if months > 1 else ''}")
129
+ if days > 0:
130
+ time_parts.append(f"{int(days)} day{'s' if days > 1 else ''}")
131
+ if hours > 0:
132
+ time_parts.append(f"{int(hours)} hour{'s' if hours > 1 else ''}")
133
+ if minutes > 0:
134
+ time_parts.append(f"{int(minutes)} minute{'s' if minutes > 1 else ''}")
135
+ if seconds > 0 or milliseconds > 0:
136
+ if seconds > 0 and milliseconds > 0:
137
+ time_parts.append(f"{seconds + milliseconds / 1000:.1f}s")
138
+ elif seconds > 0:
139
+ time_parts.append(f"{seconds}s")
140
+ else:
141
+ time_parts.append(f"{milliseconds}ms")
142
+
143
+ if len(time_parts) == 1:
144
+ return time_parts[0]
145
+
146
+ list_ = time_parts[:n_elem]
147
+ return ", and ".join([", ".join(list_[:-1]), list_[-1]] if len(list_) > 2 else list_)
@@ -1,11 +0,0 @@
1
- s3/__init__.py,sha256=IqcPR9iWMw0GDBEmKvLzW7P-AhInTkwRklkvYgiT1Xc,66
2
- s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
3
- s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
4
- s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
5
- s3/uploader.py,sha256=v6TGdm8EcAldoIw3GOAwkuoUzi9b9IRj8b94hl1Pkyw,11454
6
- s3/utils.py,sha256=dd1OeLbswLzFVyjYiXixkJlFsoGWRtRCOHha6wLG5zQ,2485
7
- pys3uploader-0.2.1.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
8
- pys3uploader-0.2.1.dist-info/METADATA,sha256=NA7x6YqpWEKvn3XEYzVug7XTl1vPcwiphUzWtMvlzHE,7449
9
- pys3uploader-0.2.1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
10
- pys3uploader-0.2.1.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
11
- pys3uploader-0.2.1.dist-info/RECORD,,