PyS3Uploader 0.2.3__py3-none-any.whl → 0.4.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2 @@
1
+ from pys3uploader.logger import LogHandler, LogLevel # noqa: F401
2
+ from pys3uploader.uploader import Uploader # noqa: F401
pys3uploader/logger.py ADDED
@@ -0,0 +1,104 @@
1
+ """Loads a default logger with StreamHandler set to DEBUG mode.
2
+
3
+ >>> logging.Logger
4
+
5
+ """
6
+
7
+ import logging
8
+ import os
9
+ from datetime import datetime
10
+ from enum import IntEnum, StrEnum
11
+
12
+
13
+ class LogHandler(StrEnum):
14
+ """Logging handlers to choose from when default logger is used.
15
+
16
+ >>> LogHandler
17
+
18
+ """
19
+
20
+ file = "file"
21
+ stdout = "stdout"
22
+
23
+
24
+ class LogLevel(IntEnum):
25
+ """Logging levels to choose from when default logger is used.
26
+
27
+ >>> LogLevel
28
+
29
+ """
30
+
31
+ debug = logging.DEBUG
32
+ info = logging.INFO
33
+ warning = logging.WARNING
34
+ error = logging.ERROR
35
+
36
+ @classmethod
37
+ def _missing_(cls, value):
38
+ """Allow constructing from string names."""
39
+ if isinstance(value, str):
40
+ value = value.lower()
41
+ for member in cls:
42
+ if member.name == value:
43
+ return member
44
+ return None
45
+
46
+
47
+ def stream_handler() -> logging.StreamHandler:
48
+ """Creates a ``StreamHandler`` and assigns a default format to it.
49
+
50
+ Returns:
51
+ logging.StreamHandler:
52
+ Returns an instance of the ``StreamHandler`` object.
53
+ """
54
+ handler = logging.StreamHandler()
55
+ handler.setFormatter(fmt=default_format())
56
+ return handler
57
+
58
+
59
+ def file_handler() -> logging.FileHandler:
60
+ """Creates a ``StreamHandler`` and assigns a default format to it.
61
+
62
+ Returns:
63
+ logging.StreamHandler:
64
+ Returns an instance of the ``StreamHandler`` object.
65
+ """
66
+ os.makedirs("logs", exist_ok=True)
67
+ filename = os.path.join("logs", datetime.now().strftime("PyS3Uploader_%d-%m-%Y_%H:%M.log"))
68
+ handler = logging.FileHandler(filename, mode="a")
69
+ handler.setFormatter(fmt=default_format())
70
+ return handler
71
+
72
+
73
+ def default_format() -> logging.Formatter:
74
+ """Creates a logging ``Formatter`` with a custom message and datetime format.
75
+
76
+ Returns:
77
+ logging.Formatter:
78
+ Returns an instance of the ``Formatter`` object.
79
+ """
80
+ return logging.Formatter(
81
+ fmt="%(asctime)s - %(levelname)s - [%(module)s:%(lineno)d] - %(funcName)s - %(message)s",
82
+ datefmt="%b-%d-%Y %I:%M:%S %p",
83
+ )
84
+
85
+
86
+ def setup_logger(handler: LogHandler, level: LogLevel) -> logging.Logger:
87
+ """Creates a default logger with debug mode enabled.
88
+
89
+ Args:
90
+ handler: Logging handler to use.
91
+ level: Logging level to use.
92
+
93
+ Returns:
94
+ logging.Logger:
95
+ Returns an instance of the ``Logger`` object.
96
+ """
97
+ logger = logging.getLogger(__name__)
98
+ if handler == LogHandler.file:
99
+ logger.addHandler(hdlr=file_handler())
100
+ elif handler == LogHandler.stdout:
101
+ logger.addHandler(hdlr=stream_handler())
102
+
103
+ logger.setLevel(level)
104
+ return logger
@@ -0,0 +1,12 @@
1
+ from dataclasses import dataclass
2
+
3
+
4
+ @dataclass
5
+ class Metadata(dict):
6
+ """Dataclass for metadata information."""
7
+
8
+ timestamp: str
9
+ objects_uploaded: int
10
+ objects_pending: int
11
+ size_uploaded: str
12
+ size_pending: str
@@ -0,0 +1,39 @@
1
+ import threading
2
+
3
+ from alive_progress import alive_bar
4
+
5
+
6
+ class ProgressPercentage:
7
+ """Tracks progress of a file upload to S3 and updates the alive_bar.
8
+
9
+ >>> ProgressPercentage
10
+
11
+ """
12
+
13
+ def __init__(self, filename: str, size: int, bar: alive_bar):
14
+ """Initializes the progress tracker.
15
+
16
+ Args:
17
+ filename: Name of the file being uploaded.
18
+ size: Total size of the file in bytes.
19
+ bar: alive_bar instance to update progress.
20
+ """
21
+ self._filename = filename
22
+ self._size = size
23
+ self._seen_so_far = 0
24
+ self._lock = threading.Lock()
25
+ self._bar = bar
26
+
27
+ def __call__(self, bytes_amount: int) -> None:
28
+ """Callback method to update progress.
29
+
30
+ Args:
31
+ bytes_amount: Number of bytes transferred in the last chunk.
32
+ """
33
+ with self._lock:
34
+ self._seen_so_far += bytes_amount
35
+ percent = (self._seen_so_far / self._size) * 100
36
+ bar_len = 20
37
+ filled = int(bar_len * percent / 100)
38
+ bar_str = "█" * filled + "." * (bar_len - filled)
39
+ self._bar.text(f" || {self._filename} [{bar_str}] {percent:.0f}%")
pys3uploader/timer.py ADDED
@@ -0,0 +1,57 @@
1
+ import logging
2
+ import threading
3
+ from typing import Any, Callable, Dict, Tuple
4
+
5
+
6
+ class RepeatedTimer:
7
+ """Instantiates RepeatedTimer object to kick off the threading.Timer object with custom intervals.
8
+
9
+ >>> RepeatedTimer
10
+
11
+ """
12
+
13
+ def __init__(
14
+ self,
15
+ interval: int,
16
+ function: Callable,
17
+ args: Tuple = None,
18
+ kwargs: Dict[str, Any] = None,
19
+ logger: logging.Logger = None,
20
+ ):
21
+ """Repeats the ``Timer`` object from threading.
22
+
23
+ Args:
24
+ interval: Interval in seconds.
25
+ function: Function to trigger with intervals.
26
+ args: Arguments for the function.
27
+ kwargs: Keyword arguments for the function.
28
+ logger: Logger instance.
29
+ """
30
+ self.interval = interval
31
+ self.function = function
32
+ self.args = args or ()
33
+ self.kwargs = kwargs or {}
34
+ self.logger = logger or logging.getLogger(__name__)
35
+ self.thread = None
36
+ self._stop_event = threading.Event()
37
+
38
+ def _run(self):
39
+ """Triggers the target function."""
40
+ while not self._stop_event.wait(self.interval):
41
+ try:
42
+ self.function(*self.args, **self.kwargs)
43
+ except Exception as error:
44
+ self.logger.error("Error in RepeatedTimer function [%s]: %s", self.function.__name__, error)
45
+
46
+ def start(self):
47
+ """Trigger target function if timer isn't running already."""
48
+ if self.thread and self.thread.is_alive():
49
+ return
50
+ self.thread = threading.Thread(target=self._run, daemon=True)
51
+ self.thread.start()
52
+
53
+ def stop(self, timeout: int = 3):
54
+ """Stop the timer and cancel all futures."""
55
+ self._stop_event.set()
56
+ if self.thread:
57
+ self.thread.join(timeout=timeout)
@@ -1,23 +1,29 @@
1
+ import json
1
2
  import logging
2
3
  import os
3
4
  import time
4
5
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
- from typing import Dict, Iterable
6
+ from datetime import UTC, datetime
7
+ from typing import Dict, Iterable, NoReturn
6
8
 
7
9
  import boto3.resources.factory
8
10
  import dotenv
11
+ from alive_progress import alive_bar
9
12
  from botocore.config import Config
10
13
  from botocore.exceptions import ClientError
11
- from tqdm import tqdm
12
14
 
13
- from s3.exceptions import BucketNotFound
14
- from s3.logger import default_logger
15
- from s3.utils import (
15
+ from pys3uploader.exceptions import BucketNotFound
16
+ from pys3uploader.logger import LogHandler, LogLevel, setup_logger
17
+ from pys3uploader.metadata import Metadata
18
+ from pys3uploader.progress import ProgressPercentage
19
+ from pys3uploader.timer import RepeatedTimer
20
+ from pys3uploader.utils import (
16
21
  RETRY_CONFIG,
17
22
  UploadResults,
18
23
  convert_seconds,
19
24
  convert_to_folder_structure,
20
25
  getenv,
26
+ size_converter,
21
27
  urljoin,
22
28
  )
23
29
 
@@ -39,12 +45,16 @@ class Uploader:
39
45
  overwrite: bool = False,
40
46
  file_exclusion: Iterable[str] = None,
41
47
  folder_exclusion: Iterable[str] = None,
48
+ metadata_upload_interval: int = None,
49
+ metadata_filename: str = None,
42
50
  region_name: str = None,
43
51
  profile_name: str = None,
44
52
  aws_access_key_id: str = None,
45
53
  aws_secret_access_key: str = None,
46
54
  retry_config: Config = RETRY_CONFIG,
47
55
  logger: logging.Logger = None,
56
+ log_handler: LogHandler = LogHandler.stdout,
57
+ log_level: LogLevel = LogLevel.debug,
48
58
  env_file: str = None,
49
59
  ):
50
60
  """Initiates all the necessary args and creates a boto3 session with retry logic.
@@ -58,11 +68,15 @@ class Uploader:
58
68
  overwrite: Boolean flag to overwrite files in S3.
59
69
  file_exclusion: Sequence of files to exclude during upload.
60
70
  folder_exclusion: Sequence of directories to exclude during upload.
71
+ metadata_upload_interval: Interval in seconds to upload metadata file.
72
+ metadata_filename: Metadata filename to upload periodically.
61
73
  region_name: Name of the AWS region.
62
74
  profile_name: AWS profile name.
63
75
  aws_access_key_id: AWS access key ID.
64
76
  aws_secret_access_key: AWS secret access key.
65
77
  logger: Bring your own logger.
78
+ log_handler: Default log handler, can be ``file`` or ``stdout``.
79
+ log_level: Default log level, can be ``debug``, ``info``, ``warning`` or ``error``.
66
80
  env_file: Dotenv file (.env) filepath to load environment variables.
67
81
 
68
82
  See Also:
@@ -85,7 +99,7 @@ class Uploader:
85
99
  If a filepath is provided, PyS3Uploader loads it directly or searches the root directory for the file.
86
100
  If no filepath is provided, PyS3Uploader searches the current directory for a .env file.
87
101
  """
88
- self.logger = logger or default_logger()
102
+ self.logger = logger or setup_logger(handler=LogHandler(log_handler), level=LogLevel(log_level))
89
103
  self.env_file = env_file or getenv("ENV_FILE", default=".env")
90
104
 
91
105
  # Check for env_file in current working directory
@@ -132,7 +146,18 @@ class Uploader:
132
146
  self.bucket_objects: boto3.resources.factory.s3.ObjectSummary = []
133
147
  self.object_size_map: Dict[str, int] = {}
134
148
 
135
- def init(self) -> None:
149
+ self.upload_files: Dict[str, str] = {}
150
+ self.file_size_map: Dict[str, int] = {}
151
+
152
+ self.metadata_filename = metadata_filename or getenv("METADATA_FILENAME", default="METADATA.json")
153
+ self.timer = RepeatedTimer(
154
+ function=self.metadata_uploader,
155
+ interval=metadata_upload_interval or int(getenv("METADATA_UPLOAD_INTERVAL", default="300")),
156
+ logger=self.logger,
157
+ )
158
+ self.alive_bar_kwargs = dict(title="Progress", bar="smooth", spinner=None, enrich_print=False)
159
+
160
+ def init(self) -> None | NoReturn:
136
161
  """Instantiates the bucket instance.
137
162
 
138
163
  Raises:
@@ -150,27 +175,69 @@ class Uploader:
150
175
  assert os.path.exists(self.upload_dir)
151
176
  except AssertionError:
152
177
  raise ValueError(f"\n\n\tPath not found: {self.upload_dir}")
153
- buckets = [bucket.name for bucket in self.s3.buckets.all()]
154
178
  if not self.bucket_name:
155
- raise ValueError(f"\n\n\tCannot proceed without a bucket name.\n\tAvailable: {buckets}")
156
- _account_id, _alias = self.session.resource(service_name="iam").CurrentUser().arn.split("/")
157
- if self.bucket_name not in buckets:
158
- raise BucketNotFound(f"\n\n\t{self.bucket_name} was not found in {_alias} account.\n\tAvailable: {buckets}")
179
+ raise ValueError("\n\n\tCannot proceed without a bucket name.")
180
+ if (buckets := [bucket.name for bucket in self.s3.buckets.all()]) and self.bucket_name not in buckets:
181
+ raise BucketNotFound(f"\n\n\t{self.bucket_name} was not found.\n\tAvailable: {buckets}")
159
182
  self.upload_dir = os.path.abspath(self.upload_dir)
183
+ self.load_bucket_state()
184
+
185
+ def load_bucket_state(self) -> None:
186
+ """Loads the bucket's current state."""
160
187
  # noinspection PyUnresolvedReferences
161
188
  self.bucket: boto3.resources.factory.s3.Bucket = self.s3.Bucket(self.bucket_name)
162
189
  # noinspection PyUnresolvedReferences
163
190
  self.bucket_objects: boto3.resources.factory.s3.ObjectSummary = [obj for obj in self.bucket.objects.all()]
164
191
  self.object_size_map = {obj.key: obj.size for obj in self.bucket_objects}
165
192
 
193
+ def load_local_state(self):
194
+ """Loads the local file queue."""
195
+ self.upload_files = self._get_files()
196
+ self.file_size_map = {file: self.filesize(file) for file in self.upload_files}
197
+
166
198
  def exit(self) -> None:
167
199
  """Exits after printing results, and run time."""
168
- total = self.results.success + self.results.failed
200
+ success = len(self.results.success)
201
+ skipped = len(self.results.skipped)
202
+ failed = len(self.results.failed)
203
+ total = success + failed
169
204
  self.logger.info(
170
- "Total number of uploads: %d, success: %d, failed: %d", total, self.results.success, self.results.failed
205
+ "Total number of uploads: %d, skipped: %d, success: %d, failed: %d", total, skipped, success, failed
171
206
  )
207
+ # Stop the timer and upload the final state as metadata file
208
+ self.timer.stop()
209
+ self.metadata_uploader()
172
210
  self.logger.info("Run time: %s", convert_seconds(time.time() - self.start))
173
211
 
212
+ def filesize(self, filepath: str) -> int:
213
+ """Gets the file size of a given filepath.
214
+
215
+ Args:
216
+ filepath: Full path of the file.
217
+
218
+ Returns:
219
+ int:
220
+ Returns the file size in bytes.
221
+ """
222
+ try:
223
+ return os.path.getsize(filepath)
224
+ except (OSError, PermissionError) as error:
225
+ self.logger.error(error)
226
+ return 0
227
+
228
+ def size_it(self) -> None:
229
+ """Calculates and logs the total size of files in S3 and local."""
230
+ files_in_s3 = len(self.object_size_map)
231
+ files_local = len(self.upload_files)
232
+
233
+ total_size_s3 = sum(self.object_size_map.values())
234
+ total_size_local = sum(self.file_size_map.values())
235
+
236
+ self.logger.info("Files in S3: [#%d]: %s (%d bytes)", files_in_s3, size_converter(total_size_s3), total_size_s3)
237
+ self.logger.info(
238
+ "Files local: [#%d]: %s (%d bytes)", files_local, size_converter(total_size_local), total_size_local
239
+ )
240
+
174
241
  def _proceed_to_upload(self, filepath: str, objectpath: str) -> bool:
175
242
  """Compares file size if the object already exists in S3.
176
243
 
@@ -184,30 +251,44 @@ class Uploader:
184
251
  """
185
252
  if self.overwrite:
186
253
  return True
254
+ file_size = self.filesize(filepath)
187
255
  # Indicates that the object path already exists in S3
188
256
  if object_size := self.object_size_map.get(objectpath):
189
- try:
190
- file_size = os.path.getsize(filepath)
191
- except (OSError, PermissionError) as error:
192
- self.logger.error(error)
193
- return True
194
257
  if object_size == file_size:
195
- self.logger.info("S3 object %s exists, and size [%d] matches, skipping..", objectpath, object_size)
258
+ self.logger.info(
259
+ "S3 object %s exists, and size [%d bytes / %s] matches, skipping..",
260
+ objectpath,
261
+ object_size,
262
+ size_converter(object_size),
263
+ )
264
+ self.results.skipped.append(filepath)
196
265
  return False
197
266
  self.logger.info(
198
- "S3 object %s exists, but size mismatch. Local: [%d], S3: [%d]", objectpath, file_size, object_size
267
+ "S3 object %s exists, but size mismatch. Local: [%d bytes / %s], S3: [%d bytes / %s]",
268
+ objectpath,
269
+ file_size,
270
+ object_size,
271
+ size_converter(object_size),
272
+ )
273
+ else:
274
+ self.logger.debug(
275
+ "S3 object '%s' of size [%d bytes / %s] doesn't exist, uploading..",
276
+ objectpath,
277
+ file_size,
278
+ size_converter(file_size),
199
279
  )
200
280
  return True
201
281
 
202
- def _uploader(self, filepath: str, objectpath: str) -> None:
282
+ def _uploader(self, filepath: str, objectpath: str, callback: ProgressPercentage) -> None:
203
283
  """Uploads the filepath to the specified S3 bucket.
204
284
 
205
285
  Args:
206
286
  filepath: Filepath to upload.
207
287
  objectpath: Object path ref in S3.
288
+ callback: ProgressPercentage callback to track upload progress.
208
289
  """
209
290
  if self._proceed_to_upload(filepath, objectpath):
210
- self.bucket.upload_file(filepath, objectpath)
291
+ self.bucket.upload_file(filepath, objectpath, Callback=callback)
211
292
 
212
293
  def _get_files(self) -> Dict[str, str]:
213
294
  """Get a mapping for all the file path and object paths in upload directory.
@@ -247,22 +328,51 @@ class Uploader:
247
328
  files_to_upload[file_path] = object_path
248
329
  return files_to_upload
249
330
 
331
+ def _preflight(self) -> int:
332
+ """Preflight checks and tasks before upload.
333
+
334
+ Returns:
335
+ int:
336
+ Returns the total number of files to be uploaded.
337
+ """
338
+ # Verify and initiate bucket state
339
+ self.init()
340
+ # Verify and initiate local state
341
+ self.load_local_state()
342
+ # Make sure there are files to upload
343
+ assert self.upload_files, "\n\n\tNo files found to upload.\n"
344
+ # Log size details
345
+ self.size_it()
346
+ # Start metadata upload timer
347
+ self.timer.start()
348
+ # Return total files to upload
349
+ return len(self.upload_files)
350
+
250
351
  def run(self) -> None:
251
352
  """Initiates object upload in a traditional loop."""
252
- self.init()
253
- keys = self._get_files()
254
- self.logger.debug(keys)
255
- self.logger.info("%d files from '%s' will be uploaded to '%s'", len(keys), self.upload_dir, self.bucket_name)
256
- self.logger.info("Initiating upload process.")
257
- for filepath, objectpath in tqdm(
258
- keys.items(), total=len(keys), unit="file", leave=True, desc=f"Uploading files from {self.upload_dir}"
259
- ):
260
- try:
261
- self._uploader(filepath=filepath, objectpath=objectpath)
262
- self.results.success += 1
263
- except ClientError as error:
264
- self.logger.error(error)
265
- self.results.failed += 1
353
+ total_files = self._preflight()
354
+
355
+ self.logger.info(
356
+ "%d files from '%s' will be uploaded to '%s' sequentially",
357
+ total_files,
358
+ self.upload_dir,
359
+ self.bucket_name,
360
+ )
361
+ with alive_bar(total_files, **self.alive_bar_kwargs) as overall_bar:
362
+ for filepath, objectpath in self.upload_files.items():
363
+ progress_callback = ProgressPercentage(
364
+ filename=os.path.basename(filepath), size=self.filesize(filepath), bar=overall_bar
365
+ )
366
+ try:
367
+ self._uploader(filepath, objectpath, progress_callback)
368
+ self.results.success.append(filepath)
369
+ except ClientError as error:
370
+ self.logger.error("Upload failed: %s", error)
371
+ self.results.failed.append(filepath)
372
+ except KeyboardInterrupt:
373
+ self.logger.warning("Upload interrupted by user")
374
+ break
375
+ overall_bar() # increment overall progress bar
266
376
  self.exit()
267
377
 
268
378
  def run_in_parallel(self, max_workers: int = 5) -> None:
@@ -271,36 +381,62 @@ class Uploader:
271
381
  Args:
272
382
  max_workers: Number of maximum threads to use.
273
383
  """
274
- self.init()
275
- keys = self._get_files()
276
- self.logger.debug(keys)
384
+ total_files = self._preflight()
385
+
277
386
  self.logger.info(
278
387
  "%d files from '%s' will be uploaded to '%s' with maximum concurrency of: %d",
279
- len(keys),
388
+ total_files,
280
389
  self.upload_dir,
281
390
  self.bucket_name,
282
391
  max_workers,
283
392
  )
284
- with ThreadPoolExecutor(max_workers=max_workers) as executor:
285
- futures = [
286
- executor.submit(self._uploader, **dict(filepath=filepath, objectpath=objectpath))
287
- for filepath, objectpath in keys.items()
288
- ]
289
- for future in tqdm(
290
- iterable=as_completed(futures),
291
- total=len(futures),
292
- desc=f"Uploading files to {self.bucket_name}",
293
- unit="files",
294
- leave=True,
295
- ):
296
- try:
297
- future.result()
298
- self.results.success += 1
299
- except ClientError as error:
300
- self.logger.error(f"Upload failed: {error}")
301
- self.results.failed += 1
393
+ with alive_bar(total_files, **self.alive_bar_kwargs) as overall_bar:
394
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
395
+ futures = {}
396
+ for filepath, objectpath in self.upload_files.items():
397
+ progress_callback = ProgressPercentage(
398
+ filename=os.path.basename(filepath), size=self.filesize(filepath), bar=overall_bar
399
+ )
400
+ future = executor.submit(self._uploader, filepath, objectpath, progress_callback)
401
+ futures[future] = filepath
402
+
403
+ for future in as_completed(futures):
404
+ filepath = futures[future]
405
+ try:
406
+ future.result()
407
+ self.results.success.append(filepath)
408
+ except ClientError as error:
409
+ self.logger.error("Upload failed: %s", error)
410
+ self.results.failed.append(filepath)
411
+ overall_bar() # Increment overall bar after each upload finishes
302
412
  self.exit()
303
413
 
414
+ def metadata_uploader(self) -> None:
415
+ """Metadata uploader."""
416
+ self.load_bucket_state()
417
+ success = list(set(self.results.success + self.results.skipped))
418
+ objects_uploaded = len(success)
419
+ size_uploaded = sum(self.filesize(file) for file in success)
420
+
421
+ pending_files = set(self.upload_files.keys()) - set(success)
422
+ objects_pending = len(pending_files)
423
+ size_pending = sum(self.filesize(file) for file in pending_files)
424
+
425
+ metadata = Metadata(
426
+ timestamp=datetime.now(tz=UTC).strftime("%A %B %d, %Y %H:%M:%S"),
427
+ objects_uploaded=objects_uploaded,
428
+ objects_pending=objects_pending,
429
+ size_uploaded=size_converter(size_uploaded),
430
+ size_pending=size_converter(size_pending),
431
+ )
432
+ self.logger.debug("\n" + json.dumps(metadata.__dict__, indent=2) + "\n")
433
+ self.logger.debug("Uploading metadata to S3")
434
+ filepath = os.path.join(os.getcwd(), self.metadata_filename)
435
+ with open(filepath, "w") as file:
436
+ json.dump(metadata.__dict__, file, indent=2)
437
+ file.flush()
438
+ self.bucket.upload_file(filepath, self.metadata_filename)
439
+
304
440
  def get_bucket_structure(self) -> str:
305
441
  """Gets all the objects in an S3 bucket and forms it into a hierarchical folder like representation.
306
442
 
@@ -1,5 +1,6 @@
1
+ import math
1
2
  import os
2
- from typing import Dict, Set
3
+ from typing import Dict, List, Set
3
4
 
4
5
  from botocore.config import Config
5
6
 
@@ -22,12 +23,22 @@ class UploadResults(dict):
22
23
 
23
24
  """
24
25
 
25
- success: int = 0
26
- failed: int = 0
26
+ success: List[str] = []
27
+ failed: List[str] = []
28
+ skipped: List[str] = []
27
29
 
28
30
 
29
31
  def getenv(*args, default: str = None) -> str:
30
- """Returns the key-ed environment variable or the default value."""
32
+ """Returns the key-ed environment variable or the default value.
33
+
34
+ Args:
35
+ args: Environment variable keys to search for.
36
+ default: Default value to return if no environment variable is found.
37
+
38
+ Returns:
39
+ str:
40
+ Environment variable value or the default value.
41
+ """
31
42
  for key in args:
32
43
  if value := os.environ.get(key.upper()) or os.environ.get(key.lower()):
33
44
  return value
@@ -37,6 +48,9 @@ def getenv(*args, default: str = None) -> str:
37
48
  def urljoin(*args) -> str:
38
49
  """Joins given arguments into a url. Trailing but not leading slashes are stripped for each argument.
39
50
 
51
+ Args:
52
+ args: Parts of the url to join.
53
+
40
54
  Returns:
41
55
  str:
42
56
  Joined url.
@@ -67,6 +81,10 @@ def convert_to_folder_structure(sequence: Set[str]) -> str:
67
81
  Args:
68
82
  structure: Structure of folder objects as key-value pairs.
69
83
  indent: Required indentation for the ASCII.
84
+
85
+ Returns:
86
+ str:
87
+ String representation of the folder structure.
70
88
  """
71
89
  result = ""
72
90
  for i, (key, value) in enumerate(structure.items()):
@@ -145,3 +163,33 @@ def convert_seconds(seconds: int | float, n_elem: int = 2) -> str:
145
163
 
146
164
  list_ = time_parts[:n_elem]
147
165
  return ", and ".join([", ".join(list_[:-1]), list_[-1]] if len(list_) > 2 else list_)
166
+
167
+
168
+ def format_nos(input_: float) -> int | float:
169
+ """Removes ``.0`` float values.
170
+
171
+ Args:
172
+ input_: Strings or integers with ``.0`` at the end.
173
+
174
+ Returns:
175
+ int | float:
176
+ Int if found, else returns the received float value.
177
+ """
178
+ return int(input_) if isinstance(input_, float) and input_.is_integer() else input_
179
+
180
+
181
+ def size_converter(byte_size: int | float) -> str:
182
+ """Gets the current memory consumed and converts it to human friendly format.
183
+
184
+ Args:
185
+ byte_size: Receives byte size as argument.
186
+
187
+ Returns:
188
+ str:
189
+ Converted understandable size.
190
+ """
191
+ if not byte_size:
192
+ return "0 B"
193
+ size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
194
+ index = int(math.floor(math.log(byte_size, 1024)))
195
+ return f"{format_nos(round(byte_size / pow(1024, index), 2))} {size_name[index]}"
@@ -0,0 +1 @@
1
+ version = "0.4.0b2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: PyS3Uploader
3
- Version: 0.2.3
3
+ Version: 0.4.0b2
4
4
  Summary: Python module to upload objects to an S3 bucket.
5
5
  Author-email: Vignesh Rao <svignesh1793@gmail.com>
6
6
  License: MIT License
@@ -29,7 +29,7 @@ Project-URL: Homepage, https://github.com/thevickypedia/PyS3Uploader
29
29
  Project-URL: Docs, https://thevickypedia.github.io/PyS3Uploader/
30
30
  Project-URL: Source, https://github.com/thevickypedia/PyS3Uploader
31
31
  Project-URL: Bug Tracker, https://github.com/thevickypedia/PyS3Uploader/issues
32
- Keywords: s3
32
+ Keywords: pys3uploader
33
33
  Classifier: Development Status :: 1 - Planning
34
34
  Classifier: Intended Audience :: Information Technology
35
35
  Classifier: Operating System :: OS Independent
@@ -39,9 +39,9 @@ Classifier: Topic :: Internet :: File Transfer Protocol (FTP)
39
39
  Requires-Python: >=3.11
40
40
  Description-Content-Type: text/markdown
41
41
  License-File: LICENSE
42
+ Requires-Dist: alive-progress==3.3.*
42
43
  Requires-Dist: boto3==1.40.*
43
44
  Requires-Dist: python-dotenv==1.1.*
44
- Requires-Dist: tqdm==4.67.*
45
45
  Provides-Extra: dev
46
46
  Requires-Dist: sphinx==5.1.1; extra == "dev"
47
47
  Requires-Dist: pre-commit; extra == "dev"
@@ -76,6 +76,43 @@ Requires-Dist: recommonmark; extra == "dev"
76
76
  # PyS3Uploader
77
77
  Python module to upload an entire directory to an S3 bucket.
78
78
 
79
+ <details>
80
+ <summary><strong>Bucket Policy Required</strong></summary>
81
+
82
+ ```json
83
+ {
84
+ "Version": "2012-10-17",
85
+ "Statement": [
86
+ {
87
+ "Sid": "ListBucketsForExistenceCheck",
88
+ "Effect": "Allow",
89
+ "Action": "s3:ListAllMyBuckets",
90
+ "Resource": "*"
91
+ },
92
+ {
93
+ "Sid": "ListAndUploadToSpecificBucket",
94
+ "Effect": "Allow",
95
+ "Action": [
96
+ "s3:ListBucket",
97
+ "s3:ListBucketMultipartUploads"
98
+ ],
99
+ "Resource": "arn:aws:s3:::bucketname"
100
+ },
101
+ {
102
+ "Sid": "UploadObjectsToBucket",
103
+ "Effect": "Allow",
104
+ "Action": [
105
+ "s3:PutObject",
106
+ "s3:AbortMultipartUpload",
107
+ "s3:ListMultipartUploadParts"
108
+ ],
109
+ "Resource": "arn:aws:s3:::bucketname/*"
110
+ }
111
+ ]
112
+ }
113
+ ```
114
+ </details>
115
+
79
116
  ### Installation
80
117
  ```shell
81
118
  pip install PyS3Uploader
@@ -85,10 +122,10 @@ pip install PyS3Uploader
85
122
 
86
123
  ##### Upload objects in parallel
87
124
  ```python
88
- import s3
125
+ import pys3uploader
89
126
 
90
127
  if __name__ == '__main__':
91
- wrapper = s3.Uploader(
128
+ wrapper = pys3uploader.Uploader(
92
129
  bucket_name="BUCKET_NAME",
93
130
  upload_dir="FULL_PATH_TO_UPLOAD",
94
131
  exclude_prefix="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
@@ -98,10 +135,10 @@ if __name__ == '__main__':
98
135
 
99
136
  ##### Upload objects in sequence
100
137
  ```python
101
- import s3
138
+ import pys3uploader
102
139
 
103
140
  if __name__ == '__main__':
104
- wrapper = s3.Uploader(
141
+ wrapper = pys3uploader.Uploader(
105
142
  bucket_name="BUCKET_NAME",
106
143
  upload_dir="FULL_PATH_TO_UPLOAD",
107
144
  exclude_prefix="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
@@ -120,7 +157,11 @@ if __name__ == '__main__':
120
157
  - **overwrite** - Boolean flag to overwrite files present in S3. Defaults to ``False``
121
158
  - **file_exclusion** - Sequence of files to exclude during upload. Defaults to ``None``
122
159
  - **folder_exclusion** - Sequence of directories to exclude during upload. Defaults to ``None``
160
+ - **metadata_upload_interval** - Interval in seconds to upload metadata file. Defaults to ``300``
161
+ - **metadata_filename** - Metadata filename to upload periodically. Defaults to ``METADATA.json``
123
162
  - **logger** - Bring your own custom pre-configured logger. Defaults to on-screen logging.
163
+ - **log_handler** - Choose between `stdout` vs `file` logging. Defaults to `pys3uploader.LogHandler.stdout`
164
+ - **log_level** - Choose the logging level. Defaults to `pys3uploader.LogLevel.debug`
124
165
  - **env_file** – Path to a `.env` file for loading environment variables. Defaults to scanning the current directory.
125
166
  <br><br>
126
167
  - **region_name** - AWS region name. Defaults to the env var `AWS_DEFAULT_REGION`
@@ -0,0 +1,15 @@
1
+ pys3uploader/__init__.py,sha256=EqMScWbJNV4UWeMg4fMko2KB18xL2CO3a3o_od0H0Lc,124
2
+ pys3uploader/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
3
+ pys3uploader/logger.py,sha256=z9JEnyf4nHIakey0bAaCgEN7oXOYJYOpskZyM_4s-D4,2678
4
+ pys3uploader/metadata.py,sha256=4pn8Y9dVZLFXxq5Cocm20t1mfDkV5xJXY6YEekQ8ssQ,231
5
+ pys3uploader/progress.py,sha256=IladNMXLBhkPpxOntpANTam_hC9OWosmNDmdbweDNYM,1195
6
+ pys3uploader/timer.py,sha256=8tvlQaO1Z7TfFVBhl5qrDq9wsMiOR6P0J2bkVwfM96c,1816
7
+ pys3uploader/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
8
+ pys3uploader/uploader.py,sha256=iaLsYaqyI4SeRmuQhQWEQpEzaA_U3szloOegj_axp-0,19611
9
+ pys3uploader/utils.py,sha256=8eIM5ZhS7-bYT5Dq4FNbmojQ16vvoAPF1ihCCTZezGE,5783
10
+ pys3uploader/version.py,sha256=FqI7luRfbr03_FdtC3SRipnHn79CZY6AjZYAM2RmRi0,20
11
+ pys3uploader-0.4.0b2.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
12
+ pys3uploader-0.4.0b2.dist-info/METADATA,sha256=yVVlf8rbYL48rOXj6J5mt290xlz0xjhA3f7jO_tY1-Q,9155
13
+ pys3uploader-0.4.0b2.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
14
+ pys3uploader-0.4.0b2.dist-info/top_level.txt,sha256=lVIFMMoUx7dj_myetBmOUQTJiOzz5VyDqchnQElmrWw,13
15
+ pys3uploader-0.4.0b2.dist-info/RECORD,,
@@ -0,0 +1 @@
1
+ pys3uploader
@@ -1,11 +0,0 @@
1
- s3/__init__.py,sha256=aZ2woJ8TD2tgqXi0ElG-wWwJWoQLIdqTdm50FLaxL8w,66
2
- s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
3
- s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
4
- s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
5
- s3/uploader.py,sha256=KxrWbIInXxXQszP_uJLf_dBI5rUNjNnhco3gr9Vdrto,13767
6
- s3/utils.py,sha256=NbF28CYviK_St5qd1EOumMVyus9BvQON7clUFeR_SEQ,4473
7
- pys3uploader-0.2.3.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
8
- pys3uploader-0.2.3.dist-info/METADATA,sha256=ae2lA8b7dsGWZSMfB8w4joDiAlaE6Wk1f3p1Fxywkc4,7795
9
- pys3uploader-0.2.3.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
10
- pys3uploader-0.2.3.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
11
- pys3uploader-0.2.3.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- s3
s3/__init__.py DELETED
@@ -1,3 +0,0 @@
1
- from s3.uploader import Uploader # noqa: F401
2
-
3
- version = "0.2.3"
s3/logger.py DELETED
@@ -1,45 +0,0 @@
1
- """Loads a default logger with StreamHandler set to DEBUG mode.
2
-
3
- >>> logging.Logger
4
-
5
- """
6
-
7
- import logging
8
-
9
-
10
- def default_handler() -> logging.StreamHandler:
11
- """Creates a ``StreamHandler`` and assigns a default format to it.
12
-
13
- Returns:
14
- logging.StreamHandler:
15
- Returns an instance of the ``StreamHandler`` object.
16
- """
17
- handler = logging.StreamHandler()
18
- handler.setFormatter(fmt=default_format())
19
- return handler
20
-
21
-
22
- def default_format() -> logging.Formatter:
23
- """Creates a logging ``Formatter`` with a custom message and datetime format.
24
-
25
- Returns:
26
- logging.Formatter:
27
- Returns an instance of the ``Formatter`` object.
28
- """
29
- return logging.Formatter(
30
- fmt="%(asctime)s - %(levelname)s - [%(module)s:%(lineno)d] - %(funcName)s - %(message)s",
31
- datefmt="%b-%d-%Y %I:%M:%S %p",
32
- )
33
-
34
-
35
- def default_logger() -> logging.Logger:
36
- """Creates a default logger with debug mode enabled.
37
-
38
- Returns:
39
- logging.Logger:
40
- Returns an instance of the ``Logger`` object.
41
- """
42
- logger = logging.getLogger(__name__)
43
- logger.addHandler(hdlr=default_handler())
44
- logger.setLevel(level=logging.DEBUG)
45
- return logger
File without changes
File without changes