PyS3Uploader 0.4.0a1__py3-none-any.whl → 0.4.1a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of PyS3Uploader might be problematic. Click here for more details.

pys3uploader/metadata.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from dataclasses import dataclass
2
2
 
3
+
3
4
  @dataclass
4
5
  class Metadata(dict):
5
6
  """Dataclass for metadata information."""
pys3uploader/timer.py CHANGED
@@ -1,4 +1,5 @@
1
- from threading import Timer
1
+ import logging
2
+ import threading
2
3
  from typing import Any, Callable, Dict, Tuple
3
4
 
4
5
 
@@ -15,6 +16,7 @@ class RepeatedTimer:
15
16
  function: Callable,
16
17
  args: Tuple = None,
17
18
  kwargs: Dict[str, Any] = None,
19
+ logger: logging.Logger = None,
18
20
  ):
19
21
  """Repeats the ``Timer`` object from threading.
20
22
 
@@ -23,32 +25,33 @@ class RepeatedTimer:
23
25
  function: Function to trigger with intervals.
24
26
  args: Arguments for the function.
25
27
  kwargs: Keyword arguments for the function.
28
+ logger: Logger instance.
26
29
  """
27
- self._timer = None
28
30
  self.interval = interval
29
31
  self.function = function
30
32
  self.args = args or ()
31
33
  self.kwargs = kwargs or {}
32
- self.is_running = False
34
+ self.logger = logger or logging.getLogger(__name__)
35
+ self.thread = None
36
+ self._stop_event = threading.Event()
33
37
 
34
38
  def _run(self):
35
39
  """Triggers the target function."""
36
- self.is_running = False
37
- self.start()
38
- self.function(*self.args, **self.kwargs)
40
+ while not self._stop_event.wait(self.interval):
41
+ try:
42
+ self.function(*self.args, **self.kwargs)
43
+ except Exception as error:
44
+ self.logger.error("Error in RepeatedTimer function [%s]: %s", self.function.__name__, error)
39
45
 
40
46
  def start(self):
41
47
  """Trigger target function if timer isn't running already."""
42
- if not self.is_running:
43
- self._timer = Timer(self.interval, self._run)
44
- self._timer.start()
45
- self.is_running = True
48
+ if self.thread and self.thread.is_alive():
49
+ return
50
+ self.thread = threading.Thread(target=self._run, daemon=True)
51
+ self.thread.start()
46
52
 
47
- def stop(self):
53
+ def stop(self, timeout: int = 3):
48
54
  """Stop the timer and cancel all futures."""
49
- self._timer.cancel()
50
- self.is_running = False
51
-
52
- def cancel(self):
53
- """Initiate cancellation."""
54
- self.stop()
55
+ self._stop_event.set()
56
+ if self.thread:
57
+ self.thread.join(timeout=timeout)
pys3uploader/uploader.py CHANGED
@@ -2,8 +2,8 @@ import json
2
2
  import logging
3
3
  import os
4
4
  import time
5
- from datetime import datetime, UTC
6
5
  from concurrent.futures import ThreadPoolExecutor, as_completed
6
+ from datetime import UTC, datetime
7
7
  from typing import Dict, Iterable, NoReturn
8
8
 
9
9
  import boto3.resources.factory
@@ -13,10 +13,10 @@ from botocore.config import Config
13
13
  from botocore.exceptions import ClientError
14
14
 
15
15
  from pys3uploader.exceptions import BucketNotFound
16
- from pys3uploader.metadata import Metadata
17
- from pys3uploader.timer import RepeatedTimer
18
16
  from pys3uploader.logger import LogHandler, LogLevel, setup_logger
17
+ from pys3uploader.metadata import Metadata
19
18
  from pys3uploader.progress import ProgressPercentage
19
+ from pys3uploader.timer import RepeatedTimer
20
20
  from pys3uploader.utils import (
21
21
  RETRY_CONFIG,
22
22
  UploadResults,
@@ -45,6 +45,8 @@ class Uploader:
45
45
  overwrite: bool = False,
46
46
  file_exclusion: Iterable[str] = None,
47
47
  folder_exclusion: Iterable[str] = None,
48
+ metadata_upload_interval: int = None,
49
+ metadata_filename: str = None,
48
50
  region_name: str = None,
49
51
  profile_name: str = None,
50
52
  aws_access_key_id: str = None,
@@ -66,10 +68,13 @@ class Uploader:
66
68
  overwrite: Boolean flag to overwrite files in S3.
67
69
  file_exclusion: Sequence of files to exclude during upload.
68
70
  folder_exclusion: Sequence of directories to exclude during upload.
71
+ metadata_upload_interval: Interval in seconds to upload metadata file.
72
+ metadata_filename: Metadata filename to upload periodically.
69
73
  region_name: Name of the AWS region.
70
74
  profile_name: AWS profile name.
71
75
  aws_access_key_id: AWS access key ID.
72
76
  aws_secret_access_key: AWS secret access key.
77
+ retry_config: Boto3 retry configuration.
73
78
  logger: Bring your own logger.
74
79
  log_handler: Default log handler, can be ``file`` or ``stdout``.
75
80
  log_level: Default log level, can be ``debug``, ``info``, ``warning`` or ``error``.
@@ -145,10 +150,13 @@ class Uploader:
145
150
  self.upload_files: Dict[str, str] = {}
146
151
  self.file_size_map: Dict[str, int] = {}
147
152
 
153
+ self.metadata_filename = metadata_filename or getenv("METADATA_FILENAME", default="METADATA.json")
148
154
  self.timer = RepeatedTimer(
149
155
  function=self.metadata_uploader,
150
- interval=int(getenv("METADATA_UPLOAD_INTERVAL", 300))
156
+ interval=metadata_upload_interval or int(getenv("METADATA_UPLOAD_INTERVAL", default="300")),
157
+ logger=self.logger,
151
158
  )
159
+ self.alive_bar_kwargs = dict(title="Progress", bar="smooth", spinner=None, enrich_print=False)
152
160
 
153
161
  def init(self) -> None | NoReturn:
154
162
  """Instantiates the bucket instance.
@@ -175,7 +183,7 @@ class Uploader:
175
183
  self.upload_dir = os.path.abspath(self.upload_dir)
176
184
  self.load_bucket_state()
177
185
 
178
- def load_bucket_state(self):
186
+ def load_bucket_state(self) -> None:
179
187
  """Loads the bucket's current state."""
180
188
  # noinspection PyUnresolvedReferences
181
189
  self.bucket: boto3.resources.factory.s3.Bucket = self.s3.Bucket(self.bucket_name)
@@ -191,10 +199,11 @@ class Uploader:
191
199
  def exit(self) -> None:
192
200
  """Exits after printing results, and run time."""
193
201
  success = len(self.results.success)
202
+ skipped = len(self.results.skipped)
194
203
  failed = len(self.results.failed)
195
204
  total = success + failed
196
205
  self.logger.info(
197
- "Total number of uploads: %d, success: %d, failed: %d", total, success, failed
206
+ "Total number of uploads: %d, skipped: %d, success: %d, failed: %d", total, skipped, success, failed
198
207
  )
199
208
  # Stop the timer and upload the final state as metadata file
200
209
  self.timer.stop()
@@ -253,11 +262,13 @@ class Uploader:
253
262
  object_size,
254
263
  size_converter(object_size),
255
264
  )
265
+ self.results.skipped.append(filepath)
256
266
  return False
257
267
  self.logger.info(
258
268
  "S3 object %s exists, but size mismatch. Local: [%d bytes / %s], S3: [%d bytes / %s]",
259
269
  objectpath,
260
270
  file_size,
271
+ size_converter(file_size),
261
272
  object_size,
262
273
  size_converter(object_size),
263
274
  )
@@ -319,13 +330,29 @@ class Uploader:
319
330
  files_to_upload[file_path] = object_path
320
331
  return files_to_upload
321
332
 
322
- def run(self) -> None:
323
- """Initiates object upload in a traditional loop."""
333
+ def _preflight(self) -> int:
334
+ """Preflight checks and tasks before upload.
335
+
336
+ Returns:
337
+ int:
338
+ Returns the total number of files to be uploaded.
339
+ """
340
+ # Verify and initiate bucket state
324
341
  self.init()
342
+ # Verify and initiate local state
325
343
  self.load_local_state()
344
+ # Make sure there are files to upload
345
+ assert self.upload_files, "\n\n\tNo files found to upload.\n"
346
+ # Log size details
326
347
  self.size_it()
348
+ # Start metadata upload timer
327
349
  self.timer.start()
328
- total_files = len(self.upload_files)
350
+ # Return total files to upload
351
+ return len(self.upload_files)
352
+
353
+ def run(self) -> None:
354
+ """Initiates object upload in a traditional loop."""
355
+ total_files = self._preflight()
329
356
 
330
357
  self.logger.info(
331
358
  "%d files from '%s' will be uploaded to '%s' sequentially",
@@ -333,7 +360,7 @@ class Uploader:
333
360
  self.upload_dir,
334
361
  self.bucket_name,
335
362
  )
336
- with alive_bar(total_files, title="Progress", bar="smooth", spinner="dots") as overall_bar:
363
+ with alive_bar(total_files, **self.alive_bar_kwargs) as overall_bar:
337
364
  for filepath, objectpath in self.upload_files.items():
338
365
  progress_callback = ProgressPercentage(
339
366
  filename=os.path.basename(filepath), size=self.filesize(filepath), bar=overall_bar
@@ -356,13 +383,7 @@ class Uploader:
356
383
  Args:
357
384
  max_workers: Number of maximum threads to use.
358
385
  """
359
- # Verify and initiate bucket state
360
- self.init()
361
- # Verify and initiate local state
362
- self.load_local_state()
363
- self.size_it()
364
- self.timer.start()
365
- total_files = len(self.upload_files)
386
+ total_files = self._preflight()
366
387
 
367
388
  self.logger.info(
368
389
  "%d files from '%s' will be uploaded to '%s' with maximum concurrency of: %d",
@@ -371,16 +392,18 @@ class Uploader:
371
392
  self.bucket_name,
372
393
  max_workers,
373
394
  )
374
- with alive_bar(total_files, title="Progress", bar="smooth", spinner="dots") as overall_bar:
395
+ with alive_bar(total_files, **self.alive_bar_kwargs) as overall_bar:
375
396
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
376
- futures = []
397
+ futures = {}
377
398
  for filepath, objectpath in self.upload_files.items():
378
399
  progress_callback = ProgressPercentage(
379
400
  filename=os.path.basename(filepath), size=self.filesize(filepath), bar=overall_bar
380
401
  )
381
- futures.append(executor.submit(self._uploader, filepath, objectpath, callback=progress_callback))
402
+ future = executor.submit(self._uploader, filepath, objectpath, progress_callback)
403
+ futures[future] = filepath
382
404
 
383
405
  for future in as_completed(futures):
406
+ filepath = futures[future]
384
407
  try:
385
408
  future.result()
386
409
  self.results.success.append(filepath)
@@ -392,29 +415,29 @@ class Uploader:
392
415
 
393
416
  def metadata_uploader(self) -> None:
394
417
  """Metadata uploader."""
395
- filename = objectpath = getenv("METADATA_FILENAME", "METADATA.json")
396
418
  self.load_bucket_state()
397
- objects_uploaded = len(self.results.success)
398
- size_uploaded = sum([self.filesize(file) for file in self.results.success])
419
+ success = list(set(self.results.success + self.results.skipped))
420
+ objects_uploaded = len(success)
421
+ size_uploaded = sum(self.filesize(file) for file in success)
399
422
 
400
- pending_files = self.upload_files.keys() - self.results.success
423
+ pending_files = set(self.upload_files.keys()) - set(success)
401
424
  objects_pending = len(pending_files)
402
- size_pending = sum([self.filesize(file) for file in pending_files])
425
+ size_pending = sum(self.filesize(file) for file in pending_files)
403
426
 
404
427
  metadata = Metadata(
405
428
  timestamp=datetime.now(tz=UTC).strftime("%A %B %d, %Y %H:%M:%S"),
406
429
  objects_uploaded=objects_uploaded,
407
430
  objects_pending=objects_pending,
408
431
  size_uploaded=size_converter(size_uploaded),
409
- size_pending=size_converter(size_pending)
432
+ size_pending=size_converter(size_pending),
410
433
  )
411
- self.logger.debug("\n" + json.dumps(metadata, indent=2) + "\n")
434
+ self.logger.debug("\n" + json.dumps(metadata.__dict__, indent=2) + "\n")
412
435
  self.logger.debug("Uploading metadata to S3")
413
- filepath = os.path.join(os.getcwd(), filename)
436
+ filepath = os.path.join(os.getcwd(), self.metadata_filename)
414
437
  with open(filepath, "w") as file:
415
438
  json.dump(metadata.__dict__, file, indent=2)
416
439
  file.flush()
417
- self.bucket.upload_file(filepath, objectpath)
440
+ self.bucket.upload_file(filepath, self.metadata_filename)
418
441
 
419
442
  def get_bucket_structure(self) -> str:
420
443
  """Gets all the objects in an S3 bucket and forms it into a hierarchical folder like representation.
pys3uploader/utils.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import math
2
2
  import os
3
- from typing import Dict, Set, List
3
+ from typing import Dict, List, Set
4
4
 
5
5
  from botocore.config import Config
6
6
 
@@ -25,6 +25,7 @@ class UploadResults(dict):
25
25
 
26
26
  success: List[str] = []
27
27
  failed: List[str] = []
28
+ skipped: List[str] = []
28
29
 
29
30
 
30
31
  def getenv(*args, default: str = None) -> str:
pys3uploader/version.py CHANGED
@@ -1 +1 @@
1
- version = "0.4.0a1"
1
+ version = "0.4.1a0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: PyS3Uploader
3
- Version: 0.4.0a1
3
+ Version: 0.4.1a0
4
4
  Summary: Python module to upload objects to an S3 bucket.
5
5
  Author-email: Vignesh Rao <svignesh1793@gmail.com>
6
6
  License: MIT License
@@ -157,6 +157,8 @@ if __name__ == '__main__':
157
157
  - **overwrite** - Boolean flag to overwrite files present in S3. Defaults to ``False``
158
158
  - **file_exclusion** - Sequence of files to exclude during upload. Defaults to ``None``
159
159
  - **folder_exclusion** - Sequence of directories to exclude during upload. Defaults to ``None``
160
+ - **metadata_upload_interval** - Interval in seconds to upload metadata file. Defaults to ``300``
161
+ - **metadata_filename** - Metadata filename to upload periodically. Defaults to ``METADATA.json``
160
162
  - **logger** - Bring your own custom pre-configured logger. Defaults to on-screen logging.
161
163
  - **log_handler** - Choose between `stdout` vs `file` logging. Defaults to `pys3uploader.LogHandler.stdout`
162
164
  - **log_level** - Choose the logging level. Defaults to `pys3uploader.LogLevel.debug`
@@ -0,0 +1,15 @@
1
+ pys3uploader/__init__.py,sha256=EqMScWbJNV4UWeMg4fMko2KB18xL2CO3a3o_od0H0Lc,124
2
+ pys3uploader/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
3
+ pys3uploader/logger.py,sha256=z9JEnyf4nHIakey0bAaCgEN7oXOYJYOpskZyM_4s-D4,2678
4
+ pys3uploader/metadata.py,sha256=4pn8Y9dVZLFXxq5Cocm20t1mfDkV5xJXY6YEekQ8ssQ,231
5
+ pys3uploader/progress.py,sha256=IladNMXLBhkPpxOntpANTam_hC9OWosmNDmdbweDNYM,1195
6
+ pys3uploader/timer.py,sha256=8tvlQaO1Z7TfFVBhl5qrDq9wsMiOR6P0J2bkVwfM96c,1816
7
+ pys3uploader/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
8
+ pys3uploader/uploader.py,sha256=sKUcC5eFL_NPYoYi3FWUWRCwkjoW2OXlcozNP9OIrzM,19707
9
+ pys3uploader/utils.py,sha256=8eIM5ZhS7-bYT5Dq4FNbmojQ16vvoAPF1ihCCTZezGE,5783
10
+ pys3uploader/version.py,sha256=t9lIxi0skPjuG6f_lIZ9Hx9gXs5mbdbnIuC5KLIMc-Y,20
11
+ pys3uploader-0.4.1a0.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
12
+ pys3uploader-0.4.1a0.dist-info/METADATA,sha256=m4P0H49F97neV8LFFUOfASBHEmHGip3Ck-Yj4kvdbkU,9155
13
+ pys3uploader-0.4.1a0.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
14
+ pys3uploader-0.4.1a0.dist-info/top_level.txt,sha256=lVIFMMoUx7dj_myetBmOUQTJiOzz5VyDqchnQElmrWw,13
15
+ pys3uploader-0.4.1a0.dist-info/RECORD,,
@@ -1,15 +0,0 @@
1
- pys3uploader/__init__.py,sha256=EqMScWbJNV4UWeMg4fMko2KB18xL2CO3a3o_od0H0Lc,124
2
- pys3uploader/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
3
- pys3uploader/logger.py,sha256=z9JEnyf4nHIakey0bAaCgEN7oXOYJYOpskZyM_4s-D4,2678
4
- pys3uploader/metadata.py,sha256=tOOoLh2vISfH-GfH3yBcA_xtEjRwomaw7sCLEaDRK-8,230
5
- pys3uploader/progress.py,sha256=IladNMXLBhkPpxOntpANTam_hC9OWosmNDmdbweDNYM,1195
6
- pys3uploader/timer.py,sha256=qN2XNrGEyP3stsK3McvhE3VvIiUFh7mv4rbp5WDeyVU,1498
7
- pys3uploader/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
8
- pys3uploader/uploader.py,sha256=h5DYQA2yv0fQ2SSyAnAl8SsgJUajmN_o1PdMSqMbACM,18588
9
- pys3uploader/utils.py,sha256=_2RYKUTyrQzwkxo7fSiLb5ASrpjcNpb3kZHqy_wByRk,5755
10
- pys3uploader/version.py,sha256=VAwBBgd_skAqJS9UL1T_xDXryTqN5m58fbTTEXcKxgM,20
11
- pys3uploader-0.4.0a1.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
12
- pys3uploader-0.4.0a1.dist-info/METADATA,sha256=FdJdNSesnP1xHfb4il5HBw1pxsPn7ToAYkQ_T3PrIb0,8959
13
- pys3uploader-0.4.0a1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
14
- pys3uploader-0.4.0a1.dist-info/top_level.txt,sha256=lVIFMMoUx7dj_myetBmOUQTJiOzz5VyDqchnQElmrWw,13
15
- pys3uploader-0.4.0a1.dist-info/RECORD,,