PyS3Uploader 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of PyS3Uploader might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: PyS3Uploader
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: Python module to upload objects to an S3 bucket.
5
5
  Author-email: Vignesh Rao <svignesh1793@gmail.com>
6
6
  License: MIT License
@@ -40,6 +40,7 @@ Requires-Python: >=3.11
40
40
  Description-Content-Type: text/markdown
41
41
  License-File: LICENSE
42
42
  Requires-Dist: boto3==1.40.*
43
+ Requires-Dist: python-dotenv==1.1.*
43
44
  Requires-Dist: tqdm==4.67.*
44
45
  Provides-Extra: dev
45
46
  Requires-Dist: sphinx==5.1.1; extra == "dev"
@@ -90,7 +91,7 @@ if __name__ == '__main__':
90
91
  wrapper = s3.Uploader(
91
92
  bucket_name="BUCKET_NAME",
92
93
  upload_dir="FULL_PATH_TO_UPLOAD",
93
- exclude_path="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
94
+ exclude_prefix="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
94
95
  )
95
96
  wrapper.run_in_parallel()
96
97
  ```
@@ -103,7 +104,7 @@ if __name__ == '__main__':
103
104
  wrapper = s3.Uploader(
104
105
  bucket_name="BUCKET_NAME",
105
106
  upload_dir="FULL_PATH_TO_UPLOAD",
106
- exclude_path="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
107
+ exclude_prefix="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
107
108
  )
108
109
  wrapper.run()
109
110
  ```
@@ -114,10 +115,15 @@ if __name__ == '__main__':
114
115
 
115
116
  #### Optional kwargs
116
117
  - **s3_prefix** - S3 object prefix for each file. Defaults to ``None``
117
- - **exclude_path** - Path in ``upload_dir`` that has to be excluded in object keys. Defaults to `None`
118
+ - **exclude_prefix** - Path in ``upload_dir`` that has to be excluded in object keys. Defaults to `None`
118
119
  - **skip_dot_files** - Boolean flag to skip dot files. Defaults to ``True``
119
120
  - **overwrite** - Boolean flag to overwrite files present in S3. Defaults to ``False``
121
+ - **file_exclusion** - Sequence of files to exclude during upload. Defaults to ``None``
122
+ - **folder_exclusion** - Sequence of directories to exclude during upload. Defaults to ``None``
120
123
  - **logger** - Bring your own custom pre-configured logger. Defaults to on-screen logging.
124
+ - **log_handler** - Choose between `stdout` vs `file` logging. Defaults to `s3.LogHandler.stdout`
125
+ - **log_level** - Choose the logging level. Defaults to `s3.LogLevel.debug`
126
+ - **env_file** – Path to a `.env` file for loading environment variables. Defaults to scanning the current directory.
121
127
  <br><br>
122
128
  - **region_name** - AWS region name. Defaults to the env var `AWS_DEFAULT_REGION`
123
129
  - **profile_name** - AWS profile name. Defaults to the env var `PROFILE_NAME`
@@ -0,0 +1,11 @@
1
+ s3/__init__.py,sha256=YsBU1Xy4sLbm_8jU5kKP8QP3ayKBmnJDaF3NCNaDOsk,66
2
+ s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
3
+ s3/logger.py,sha256=igwMubdTQ_GrMkwie5DAIvmxIcgj6a9UA_EGFrwFYiQ,2571
4
+ s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
5
+ s3/uploader.py,sha256=S480dteogtnA3xWVkHb2aRr5gtL1WnRIukw3_aAX1z0,14228
6
+ s3/utils.py,sha256=NbF28CYviK_St5qd1EOumMVyus9BvQON7clUFeR_SEQ,4473
7
+ pys3uploader-0.2.4.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
8
+ pys3uploader-0.2.4.dist-info/METADATA,sha256=Dd-gyQ387bU7NcVC7Y0aqAeyXUzxlW1nsMWbMvfRDLg,7969
9
+ pys3uploader-0.2.4.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
10
+ pys3uploader-0.2.4.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
11
+ pys3uploader-0.2.4.dist-info/RECORD,,
s3/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  from s3.uploader import Uploader # noqa: F401
2
2
 
3
- version = "0.2.2"
3
+ version = "0.2.4"
s3/logger.py CHANGED
@@ -5,9 +5,46 @@
5
5
  """
6
6
 
7
7
  import logging
8
+ import os
9
+ from datetime import datetime
10
+ from enum import IntEnum, StrEnum
8
11
 
9
12
 
10
- def default_handler() -> logging.StreamHandler:
13
+ class LogHandler(StrEnum):
14
+ """Logging handlers to choose from when default logger is used.
15
+
16
+ >>> LogHandler
17
+
18
+ """
19
+
20
+ file = "file"
21
+ stdout = "stdout"
22
+
23
+
24
+ class LogLevel(IntEnum):
25
+ """Logging levels to choose from when default logger is used.
26
+
27
+ >>> LogLevel
28
+
29
+ """
30
+
31
+ debug = logging.DEBUG
32
+ info = logging.INFO
33
+ warning = logging.WARNING
34
+ error = logging.ERROR
35
+
36
+ @classmethod
37
+ def _missing_(cls, value):
38
+ """Allow constructing from string names."""
39
+ if isinstance(value, str):
40
+ value = value.lower()
41
+ for member in cls:
42
+ if member.name == value:
43
+ return member
44
+ return None
45
+
46
+
47
+ def stream_handler() -> logging.StreamHandler:
11
48
  """Creates a ``StreamHandler`` and assigns a default format to it.
12
49
 
13
50
  Returns:
@@ -19,6 +56,20 @@ def default_handler() -> logging.StreamHandler:
19
56
  return handler
20
57
 
21
58
 
59
+ def file_handler() -> logging.FileHandler:
60
+ """Creates a ``StreamHandler`` and assigns a default format to it.
61
+
62
+ Returns:
63
+ logging.StreamHandler:
64
+ Returns an instance of the ``StreamHandler`` object.
65
+ """
66
+ os.makedirs("logs", exist_ok=True)
67
+ filename = os.path.join("logs", datetime.now().strftime("PyS3Uploader_%d-%m-%Y_%H:%M.log"))
68
+ handler = logging.FileHandler(filename, mode="a")
69
+ handler.setFormatter(fmt=default_format())
70
+ return handler
71
+
72
+
22
73
  def default_format() -> logging.Formatter:
23
74
  """Creates a logging ``Formatter`` with a custom message and datetime format.
24
75
 
@@ -32,7 +83,7 @@ def default_format() -> logging.Formatter:
32
83
  )
33
84
 
34
85
 
35
- def default_logger() -> logging.Logger:
86
+ def setup_logger(handler: LogHandler, level: LogLevel):
36
87
  """Creates a default logger with debug mode enabled.
37
88
 
38
89
  Returns:
@@ -40,6 +91,10 @@ def default_logger() -> logging.Logger:
40
91
  Returns an instance of the ``Logger`` object.
41
92
  """
42
93
  logger = logging.getLogger(__name__)
43
- logger.addHandler(hdlr=default_handler())
44
- logger.setLevel(level=logging.DEBUG)
94
+ if handler == LogHandler.file:
95
+ logger.addHandler(hdlr=file_handler())
96
+ elif handler == LogHandler.stdout:
97
+ logger.addHandler(hdlr=stream_handler())
98
+
99
+ logger.setLevel(level)
45
100
  return logger
s3/uploader.py CHANGED
@@ -2,15 +2,16 @@ import logging
2
2
  import os
3
3
  import time
4
4
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
- from typing import Dict
5
+ from typing import Dict, Iterable
6
6
 
7
7
  import boto3.resources.factory
8
+ import dotenv
8
9
  from botocore.config import Config
9
10
  from botocore.exceptions import ClientError
10
11
  from tqdm import tqdm
11
12
 
12
13
  from s3.exceptions import BucketNotFound
13
- from s3.logger import default_logger
14
+ from s3.logger import LogHandler, LogLevel, setup_logger
14
15
  from s3.utils import (
15
16
  RETRY_CONFIG,
16
17
  UploadResults,
@@ -33,15 +34,20 @@ class Uploader:
33
34
  bucket_name: str,
34
35
  upload_dir: str,
35
36
  s3_prefix: str = None,
36
- exclude_path: str = None,
37
+ exclude_prefix: str = None,
37
38
  skip_dot_files: bool = True,
38
39
  overwrite: bool = False,
40
+ file_exclusion: Iterable[str] = None,
41
+ folder_exclusion: Iterable[str] = None,
39
42
  region_name: str = None,
40
43
  profile_name: str = None,
41
44
  aws_access_key_id: str = None,
42
45
  aws_secret_access_key: str = None,
43
46
  retry_config: Config = RETRY_CONFIG,
44
47
  logger: logging.Logger = None,
48
+ log_handler: LogHandler = LogHandler.stdout,
49
+ log_level: LogLevel = LogLevel.debug,
50
+ env_file: str = None,
45
51
  ):
46
52
  """Initiates all the necessary args and creates a boto3 session with retry logic.
47
53
 
@@ -49,14 +55,19 @@ class Uploader:
49
55
  bucket_name: Name of the bucket.
50
56
  upload_dir: Full path of the directory to be uploaded.
51
57
  s3_prefix: Particular bucket prefix within which the upload should happen.
52
- exclude_path: Full directory path to exclude from S3 object prefix.
58
+ exclude_prefix: Full directory path to exclude from S3 object prefix.
53
59
  skip_dot_files: Boolean flag to skip dot files.
54
60
  overwrite: Boolean flag to overwrite files in S3.
61
+ file_exclusion: Sequence of files to exclude during upload.
62
+ folder_exclusion: Sequence of directories to exclude during upload.
55
63
  region_name: Name of the AWS region.
56
64
  profile_name: AWS profile name.
57
65
  aws_access_key_id: AWS access key ID.
58
66
  aws_secret_access_key: AWS secret access key.
59
67
  logger: Bring your own logger.
68
+ log_handler: Default log handler, can be ``file`` or ``stdout``.
69
+ log_level: Default log level, can be ``debug``, ``info``, ``warning`` or ``error``.
70
+ env_file: Dotenv file (.env) filepath to load environment variables.
60
71
 
61
72
  See Also:
62
73
  s3_prefix:
@@ -65,29 +76,56 @@ class Uploader:
65
76
  If ``s3_prefix`` is set to: ``2025``, then the file path
66
77
  ``/home/ubuntu/Desktop/S3Upload/sub/photo.jpg`` will be uploaded as ``2025/S3Upload/sub/photo.jpg``
67
78
 
68
- exclude_path:
79
+ exclude_prefix:
69
80
  When upload directory is "/home/ubuntu/Desktop/S3Upload", each file will naturally have the full prefix.
70
- However, this behavior can be avoided by specifying the ``exclude_path`` parameter.
81
+ However, this behavior can be avoided by specifying the ``exclude_prefix`` parameter.
71
82
 
72
- If exclude_path is set to: ``/home/ubuntu/Desktop``, then the file path
83
+ If exclude_prefix is set to: ``/home/ubuntu/Desktop``, then the file path
73
84
  ``/home/ubuntu/Desktop/S3Upload/sub-dir/photo.jpg`` will be uploaded as ``S3Upload/sub-dir/photo.jpg``
85
+
86
+ env_file:
87
+ Environment variables can be loaded from a .env file.
88
+ The filepath can be set as ``env_file`` during object instantiation or as an environment variable.
89
+ If a filepath is provided, PyS3Uploader loads it directly or searches the root directory for the file.
90
+ If no filepath is provided, PyS3Uploader searches the current directory for a .env file.
74
91
  """
92
+ self.logger = logger or setup_logger(handler=LogHandler(log_handler), level=LogLevel(log_level))
93
+ self.env_file = env_file or getenv("ENV_FILE", default=".env")
94
+
95
+ # Check for env_file in current working directory
96
+ if os.path.isfile(self.env_file):
97
+ self.logger.debug("Loading env file: %s", self.env_file)
98
+ dotenv.load_dotenv(dotenv_path=self.env_file, override=True)
99
+ # Find the env_file from root
100
+ elif env_file := dotenv.find_dotenv(self.env_file, raise_error_if_not_found=False):
101
+ self.logger.debug("Loading env file: %s", env_file)
102
+ dotenv.load_dotenv(dotenv_path=env_file, override=True)
103
+ else:
104
+ # Scan current working directory for any .env files
105
+ for file in os.listdir():
106
+ if file.endswith(".env"):
107
+ self.logger.debug("Loading env file: %s", file)
108
+ dotenv.load_dotenv(dotenv_path=file, override=True)
109
+ break
110
+ else:
111
+ self.logger.debug("No .env files found to load")
112
+
75
113
  self.session = boto3.Session(
76
- profile_name=profile_name or getenv("PROFILE_NAME"),
114
+ profile_name=profile_name or getenv("PROFILE_NAME", "AWS_PROFILE_NAME"),
77
115
  region_name=region_name or getenv("AWS_DEFAULT_REGION"),
78
116
  aws_access_key_id=aws_access_key_id or getenv("AWS_ACCESS_KEY_ID"),
79
117
  aws_secret_access_key=aws_secret_access_key or getenv("AWS_SECRET_ACCESS_KEY"),
80
118
  )
81
119
  self.s3 = self.session.resource(service_name="s3", config=retry_config)
82
120
 
83
- self.logger = logger or default_logger()
84
-
85
121
  self.bucket_name = bucket_name
86
- self.upload_dir = upload_dir or getenv("UPLOAD_DIR", "UPLOAD_SOURCE")
122
+ self.upload_dir = upload_dir
87
123
  self.s3_prefix = s3_prefix
88
- self.exclude_path = exclude_path
124
+ self.exclude_prefix = exclude_prefix
89
125
  self.skip_dot_files = skip_dot_files
90
126
  self.overwrite = overwrite
127
+ self.file_exclusion = file_exclusion or []
128
+ self.folder_exclusion = folder_exclusion or []
91
129
 
92
130
  self.results = UploadResults()
93
131
  self.start = time.time()
@@ -106,9 +144,9 @@ class Uploader:
106
144
  BucketNotFound: If bucket name was not found.
107
145
  """
108
146
  self.start = time.time()
109
- if self.exclude_path and self.exclude_path not in self.upload_dir:
147
+ if self.exclude_prefix and self.exclude_prefix not in self.upload_dir:
110
148
  raise ValueError(
111
- f"\n\n\tStart folder {self.exclude_path!r} is not a part of upload directory {self.upload_dir!r}"
149
+ f"\n\n\tStart folder {self.exclude_prefix!r} is not a part of upload directory {self.upload_dir!r}"
112
150
  )
113
151
  if not self.upload_dir:
114
152
  raise ValueError("\n\n\tCannot proceed without an upload directory.")
@@ -150,19 +188,21 @@ class Uploader:
150
188
  """
151
189
  if self.overwrite:
152
190
  return True
191
+ try:
192
+ file_size = os.path.getsize(filepath)
193
+ except (OSError, PermissionError) as error:
194
+ self.logger.error(error)
195
+ file_size = 0
153
196
  # Indicates that the object path already exists in S3
154
197
  if object_size := self.object_size_map.get(objectpath):
155
- try:
156
- file_size = os.path.getsize(filepath)
157
- except (OSError, PermissionError) as error:
158
- self.logger.error(error)
159
- return True
160
198
  if object_size == file_size:
161
199
  self.logger.info("S3 object %s exists, and size [%d] matches, skipping..", objectpath, object_size)
162
200
  return False
163
201
  self.logger.info(
164
202
  "S3 object %s exists, but size mismatch. Local: [%d], S3: [%d]", objectpath, file_size, object_size
165
203
  )
204
+ else:
205
+ self.logger.debug("S3 object '%s' of size [%d bytes] doesn't exist, uploading..", objectpath, file_size)
166
206
  return True
167
207
 
168
208
  def _uploader(self, filepath: str, objectpath: str) -> None:
@@ -184,13 +224,20 @@ class Uploader:
184
224
  """
185
225
  files_to_upload = {}
186
226
  for __path, __directory, __files in os.walk(self.upload_dir):
227
+ scan_dir = os.path.split(__path)[-1]
228
+ if scan_dir in self.folder_exclusion:
229
+ self.logger.info("Skipping '%s' honoring folder exclusion", scan_dir)
230
+ continue
187
231
  for file_ in __files:
232
+ if file_ in self.file_exclusion:
233
+ self.logger.info("Skipping '%s' honoring file exclusion", file_)
234
+ continue
188
235
  if self.skip_dot_files and file_.startswith("."):
189
236
  self.logger.info("Skipping dot file: %s", file_)
190
237
  continue
191
238
  file_path = os.path.join(__path, file_)
192
- if self.exclude_path:
193
- relative_path = file_path.replace(self.exclude_path, "")
239
+ if self.exclude_prefix:
240
+ relative_path = file_path.replace(self.exclude_prefix, "")
194
241
  else:
195
242
  relative_path = file_path
196
243
  # Lists in python are ordered, so s3 prefix will get loaded first when provided
@@ -213,7 +260,7 @@ class Uploader:
213
260
  self.logger.debug(keys)
214
261
  self.logger.info("%d files from '%s' will be uploaded to '%s'", len(keys), self.upload_dir, self.bucket_name)
215
262
  self.logger.info("Initiating upload process.")
216
- for objectpath, filepath in tqdm(
263
+ for filepath, objectpath in tqdm(
217
264
  keys.items(), total=len(keys), unit="file", leave=True, desc=f"Uploading files from {self.upload_dir}"
218
265
  ):
219
266
  try:
@@ -1,11 +0,0 @@
1
- s3/__init__.py,sha256=L12aFBb0plj8WISe0_He1vvQ55aOKd8VCyMlj_2LqrQ,66
2
- s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
3
- s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
4
- s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
5
- s3/uploader.py,sha256=I2An6Ix0rFMlvDLtLaDQ6F-YrN70IDCNFgh9E32cXHA,11489
6
- s3/utils.py,sha256=NbF28CYviK_St5qd1EOumMVyus9BvQON7clUFeR_SEQ,4473
7
- pys3uploader-0.2.2.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
8
- pys3uploader-0.2.2.dist-info/METADATA,sha256=fIlyxO6dFHYH3uhFg4yZa7RtGGBKqaOynAHlrbuffoY,7449
9
- pys3uploader-0.2.2.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
10
- pys3uploader-0.2.2.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
11
- pys3uploader-0.2.2.dist-info/RECORD,,