PyS3Uploader 0.4.0a1__py3-none-any.whl → 0.4.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pys3uploader/metadata.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from dataclasses import dataclass
2
2
 
3
+
3
4
  @dataclass
4
5
  class Metadata(dict):
5
6
  """Dataclass for metadata information."""
pys3uploader/timer.py CHANGED
@@ -1,4 +1,5 @@
1
- from threading import Timer
1
+ import logging
2
+ import threading
2
3
  from typing import Any, Callable, Dict, Tuple
3
4
 
4
5
 
@@ -15,6 +16,7 @@ class RepeatedTimer:
15
16
  function: Callable,
16
17
  args: Tuple = None,
17
18
  kwargs: Dict[str, Any] = None,
19
+ logger: logging.Logger = None,
18
20
  ):
19
21
  """Repeats the ``Timer`` object from threading.
20
22
 
@@ -23,32 +25,33 @@ class RepeatedTimer:
23
25
  function: Function to trigger with intervals.
24
26
  args: Arguments for the function.
25
27
  kwargs: Keyword arguments for the function.
28
+ logger: Logger instance.
26
29
  """
27
- self._timer = None
28
30
  self.interval = interval
29
31
  self.function = function
30
32
  self.args = args or ()
31
33
  self.kwargs = kwargs or {}
32
- self.is_running = False
34
+ self.logger = logger or logging.getLogger(__name__)
35
+ self.thread = None
36
+ self._stop_event = threading.Event()
33
37
 
34
38
  def _run(self):
35
39
  """Triggers the target function."""
36
- self.is_running = False
37
- self.start()
38
- self.function(*self.args, **self.kwargs)
40
+ while not self._stop_event.wait(self.interval):
41
+ try:
42
+ self.function(*self.args, **self.kwargs)
43
+ except Exception as error:
44
+ self.logger.error("Error in RepeatedTimer function [%s]: %s", self.function.__name__, error)
39
45
 
40
46
  def start(self):
41
47
  """Trigger target function if timer isn't running already."""
42
- if not self.is_running:
43
- self._timer = Timer(self.interval, self._run)
44
- self._timer.start()
45
- self.is_running = True
48
+ if self.thread and self.thread.is_alive():
49
+ return
50
+ self.thread = threading.Thread(target=self._run, daemon=True)
51
+ self.thread.start()
46
52
 
47
- def stop(self):
53
+ def stop(self, timeout: int = 3):
48
54
  """Stop the timer and cancel all futures."""
49
- self._timer.cancel()
50
- self.is_running = False
51
-
52
- def cancel(self):
53
- """Initiate cancellation."""
54
- self.stop()
55
+ self._stop_event.set()
56
+ if self.thread:
57
+ self.thread.join(timeout=timeout)
pys3uploader/uploader.py CHANGED
@@ -2,8 +2,8 @@ import json
2
2
  import logging
3
3
  import os
4
4
  import time
5
- from datetime import datetime, UTC
6
5
  from concurrent.futures import ThreadPoolExecutor, as_completed
6
+ from datetime import UTC, datetime
7
7
  from typing import Dict, Iterable, NoReturn
8
8
 
9
9
  import boto3.resources.factory
@@ -13,10 +13,10 @@ from botocore.config import Config
13
13
  from botocore.exceptions import ClientError
14
14
 
15
15
  from pys3uploader.exceptions import BucketNotFound
16
- from pys3uploader.metadata import Metadata
17
- from pys3uploader.timer import RepeatedTimer
18
16
  from pys3uploader.logger import LogHandler, LogLevel, setup_logger
17
+ from pys3uploader.metadata import Metadata
19
18
  from pys3uploader.progress import ProgressPercentage
19
+ from pys3uploader.timer import RepeatedTimer
20
20
  from pys3uploader.utils import (
21
21
  RETRY_CONFIG,
22
22
  UploadResults,
@@ -45,6 +45,8 @@ class Uploader:
45
45
  overwrite: bool = False,
46
46
  file_exclusion: Iterable[str] = None,
47
47
  folder_exclusion: Iterable[str] = None,
48
+ metadata_upload_interval: int = None,
49
+ metadata_filename: str = None,
48
50
  region_name: str = None,
49
51
  profile_name: str = None,
50
52
  aws_access_key_id: str = None,
@@ -66,6 +68,8 @@ class Uploader:
66
68
  overwrite: Boolean flag to overwrite files in S3.
67
69
  file_exclusion: Sequence of files to exclude during upload.
68
70
  folder_exclusion: Sequence of directories to exclude during upload.
71
+ metadata_upload_interval: Interval in seconds to upload metadata file.
72
+ metadata_filename: Metadata filename to upload periodically.
69
73
  region_name: Name of the AWS region.
70
74
  profile_name: AWS profile name.
71
75
  aws_access_key_id: AWS access key ID.
@@ -145,10 +149,13 @@ class Uploader:
145
149
  self.upload_files: Dict[str, str] = {}
146
150
  self.file_size_map: Dict[str, int] = {}
147
151
 
152
+ self.metadata_filename = metadata_filename or getenv("METADATA_FILENAME", default="METADATA.json")
148
153
  self.timer = RepeatedTimer(
149
154
  function=self.metadata_uploader,
150
- interval=int(getenv("METADATA_UPLOAD_INTERVAL", 300))
155
+ interval=metadata_upload_interval or int(getenv("METADATA_UPLOAD_INTERVAL", default="300")),
156
+ logger=self.logger,
151
157
  )
158
+ self.alive_bar_kwargs = dict(title="Progress", bar="smooth", spinner=None, enrich_print=False)
152
159
 
153
160
  def init(self) -> None | NoReturn:
154
161
  """Instantiates the bucket instance.
@@ -175,7 +182,7 @@ class Uploader:
175
182
  self.upload_dir = os.path.abspath(self.upload_dir)
176
183
  self.load_bucket_state()
177
184
 
178
- def load_bucket_state(self):
185
+ def load_bucket_state(self) -> None:
179
186
  """Loads the bucket's current state."""
180
187
  # noinspection PyUnresolvedReferences
181
188
  self.bucket: boto3.resources.factory.s3.Bucket = self.s3.Bucket(self.bucket_name)
@@ -191,10 +198,11 @@ class Uploader:
191
198
  def exit(self) -> None:
192
199
  """Exits after printing results, and run time."""
193
200
  success = len(self.results.success)
201
+ skipped = len(self.results.skipped)
194
202
  failed = len(self.results.failed)
195
203
  total = success + failed
196
204
  self.logger.info(
197
- "Total number of uploads: %d, success: %d, failed: %d", total, success, failed
205
+ "Total number of uploads: %d, skipped: %d, success: %d, failed: %d", total, skipped, success, failed
198
206
  )
199
207
  # Stop the timer and upload the final state as metadata file
200
208
  self.timer.stop()
@@ -253,6 +261,7 @@ class Uploader:
253
261
  object_size,
254
262
  size_converter(object_size),
255
263
  )
264
+ self.results.skipped.append(filepath)
256
265
  return False
257
266
  self.logger.info(
258
267
  "S3 object %s exists, but size mismatch. Local: [%d bytes / %s], S3: [%d bytes / %s]",
@@ -321,7 +330,9 @@ class Uploader:
321
330
 
322
331
  def run(self) -> None:
323
332
  """Initiates object upload in a traditional loop."""
333
+ # Verify and initiate bucket state
324
334
  self.init()
335
+ # Verify and initiate local state
325
336
  self.load_local_state()
326
337
  self.size_it()
327
338
  self.timer.start()
@@ -333,7 +344,7 @@ class Uploader:
333
344
  self.upload_dir,
334
345
  self.bucket_name,
335
346
  )
336
- with alive_bar(total_files, title="Progress", bar="smooth", spinner="dots") as overall_bar:
347
+ with alive_bar(total_files, **self.alive_bar_kwargs) as overall_bar:
337
348
  for filepath, objectpath in self.upload_files.items():
338
349
  progress_callback = ProgressPercentage(
339
350
  filename=os.path.basename(filepath), size=self.filesize(filepath), bar=overall_bar
@@ -371,16 +382,18 @@ class Uploader:
371
382
  self.bucket_name,
372
383
  max_workers,
373
384
  )
374
- with alive_bar(total_files, title="Progress", bar="smooth", spinner="dots") as overall_bar:
385
+ with alive_bar(total_files, **self.alive_bar_kwargs) as overall_bar:
375
386
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
376
- futures = []
387
+ futures = {}
377
388
  for filepath, objectpath in self.upload_files.items():
378
389
  progress_callback = ProgressPercentage(
379
390
  filename=os.path.basename(filepath), size=self.filesize(filepath), bar=overall_bar
380
391
  )
381
- futures.append(executor.submit(self._uploader, filepath, objectpath, callback=progress_callback))
392
+ future = executor.submit(self._uploader, filepath, objectpath, progress_callback)
393
+ futures[future] = filepath
382
394
 
383
395
  for future in as_completed(futures):
396
+ filepath = futures[future]
384
397
  try:
385
398
  future.result()
386
399
  self.results.success.append(filepath)
@@ -392,29 +405,29 @@ class Uploader:
392
405
 
393
406
  def metadata_uploader(self) -> None:
394
407
  """Metadata uploader."""
395
- filename = objectpath = getenv("METADATA_FILENAME", "METADATA.json")
396
408
  self.load_bucket_state()
397
- objects_uploaded = len(self.results.success)
398
- size_uploaded = sum([self.filesize(file) for file in self.results.success])
409
+ success = list(set(self.results.success + self.results.skipped))
410
+ objects_uploaded = len(success)
411
+ size_uploaded = sum(self.filesize(file) for file in success)
399
412
 
400
- pending_files = self.upload_files.keys() - self.results.success
413
+ pending_files = set(self.upload_files.keys()) - set(success)
401
414
  objects_pending = len(pending_files)
402
- size_pending = sum([self.filesize(file) for file in pending_files])
415
+ size_pending = sum(self.filesize(file) for file in pending_files)
403
416
 
404
417
  metadata = Metadata(
405
418
  timestamp=datetime.now(tz=UTC).strftime("%A %B %d, %Y %H:%M:%S"),
406
419
  objects_uploaded=objects_uploaded,
407
420
  objects_pending=objects_pending,
408
421
  size_uploaded=size_converter(size_uploaded),
409
- size_pending=size_converter(size_pending)
422
+ size_pending=size_converter(size_pending),
410
423
  )
411
- self.logger.debug("\n" + json.dumps(metadata, indent=2) + "\n")
424
+ self.logger.debug("\n" + json.dumps(metadata.__dict__, indent=2) + "\n")
412
425
  self.logger.debug("Uploading metadata to S3")
413
- filepath = os.path.join(os.getcwd(), filename)
426
+ filepath = os.path.join(os.getcwd(), self.metadata_filename)
414
427
  with open(filepath, "w") as file:
415
428
  json.dump(metadata.__dict__, file, indent=2)
416
429
  file.flush()
417
- self.bucket.upload_file(filepath, objectpath)
430
+ self.bucket.upload_file(filepath, self.metadata_filename)
418
431
 
419
432
  def get_bucket_structure(self) -> str:
420
433
  """Gets all the objects in an S3 bucket and forms it into a hierarchical folder like representation.
pys3uploader/utils.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import math
2
2
  import os
3
- from typing import Dict, Set, List
3
+ from typing import Dict, List, Set
4
4
 
5
5
  from botocore.config import Config
6
6
 
@@ -25,6 +25,7 @@ class UploadResults(dict):
25
25
 
26
26
  success: List[str] = []
27
27
  failed: List[str] = []
28
+ skipped: List[str] = []
28
29
 
29
30
 
30
31
  def getenv(*args, default: str = None) -> str:
pys3uploader/version.py CHANGED
@@ -1 +1 @@
1
- version = "0.4.0a1"
1
+ version = "0.4.0b1"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: PyS3Uploader
3
- Version: 0.4.0a1
3
+ Version: 0.4.0b1
4
4
  Summary: Python module to upload objects to an S3 bucket.
5
5
  Author-email: Vignesh Rao <svignesh1793@gmail.com>
6
6
  License: MIT License
@@ -157,6 +157,8 @@ if __name__ == '__main__':
157
157
  - **overwrite** - Boolean flag to overwrite files present in S3. Defaults to ``False``
158
158
  - **file_exclusion** - Sequence of files to exclude during upload. Defaults to ``None``
159
159
  - **folder_exclusion** - Sequence of directories to exclude during upload. Defaults to ``None``
160
+ - **metadata_upload_interval** - Interval in seconds to upload metadata file. Defaults to ``300``
161
+ - **metadata_filename** - Metadata filename to upload periodically. Defaults to ``METADATA.json``
160
162
  - **logger** - Bring your own custom pre-configured logger. Defaults to on-screen logging.
161
163
  - **log_handler** - Choose between `stdout` vs `file` logging. Defaults to `pys3uploader.LogHandler.stdout`
162
164
  - **log_level** - Choose the logging level. Defaults to `pys3uploader.LogLevel.debug`
@@ -0,0 +1,15 @@
1
+ pys3uploader/__init__.py,sha256=EqMScWbJNV4UWeMg4fMko2KB18xL2CO3a3o_od0H0Lc,124
2
+ pys3uploader/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
3
+ pys3uploader/logger.py,sha256=z9JEnyf4nHIakey0bAaCgEN7oXOYJYOpskZyM_4s-D4,2678
4
+ pys3uploader/metadata.py,sha256=4pn8Y9dVZLFXxq5Cocm20t1mfDkV5xJXY6YEekQ8ssQ,231
5
+ pys3uploader/progress.py,sha256=IladNMXLBhkPpxOntpANTam_hC9OWosmNDmdbweDNYM,1195
6
+ pys3uploader/timer.py,sha256=8tvlQaO1Z7TfFVBhl5qrDq9wsMiOR6P0J2bkVwfM96c,1816
7
+ pys3uploader/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
8
+ pys3uploader/uploader.py,sha256=Bt2agHNEPAQU4_gCWr9UkQnFmsvN1XA5mv6C_A9CP8o,19354
9
+ pys3uploader/utils.py,sha256=8eIM5ZhS7-bYT5Dq4FNbmojQ16vvoAPF1ihCCTZezGE,5783
10
+ pys3uploader/version.py,sha256=LbK2ZtjMqQY-aqhvblfIlZsvOYPfoOZ1bAnW5Cno4DY,20
11
+ pys3uploader-0.4.0b1.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
12
+ pys3uploader-0.4.0b1.dist-info/METADATA,sha256=-LG36ZcYpnRfCzAFwkRygT78hwCteQAHRL2CmEUGGkk,9155
13
+ pys3uploader-0.4.0b1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
14
+ pys3uploader-0.4.0b1.dist-info/top_level.txt,sha256=lVIFMMoUx7dj_myetBmOUQTJiOzz5VyDqchnQElmrWw,13
15
+ pys3uploader-0.4.0b1.dist-info/RECORD,,
@@ -1,15 +0,0 @@
1
- pys3uploader/__init__.py,sha256=EqMScWbJNV4UWeMg4fMko2KB18xL2CO3a3o_od0H0Lc,124
2
- pys3uploader/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
3
- pys3uploader/logger.py,sha256=z9JEnyf4nHIakey0bAaCgEN7oXOYJYOpskZyM_4s-D4,2678
4
- pys3uploader/metadata.py,sha256=tOOoLh2vISfH-GfH3yBcA_xtEjRwomaw7sCLEaDRK-8,230
5
- pys3uploader/progress.py,sha256=IladNMXLBhkPpxOntpANTam_hC9OWosmNDmdbweDNYM,1195
6
- pys3uploader/timer.py,sha256=qN2XNrGEyP3stsK3McvhE3VvIiUFh7mv4rbp5WDeyVU,1498
7
- pys3uploader/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
8
- pys3uploader/uploader.py,sha256=h5DYQA2yv0fQ2SSyAnAl8SsgJUajmN_o1PdMSqMbACM,18588
9
- pys3uploader/utils.py,sha256=_2RYKUTyrQzwkxo7fSiLb5ASrpjcNpb3kZHqy_wByRk,5755
10
- pys3uploader/version.py,sha256=VAwBBgd_skAqJS9UL1T_xDXryTqN5m58fbTTEXcKxgM,20
11
- pys3uploader-0.4.0a1.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
12
- pys3uploader-0.4.0a1.dist-info/METADATA,sha256=FdJdNSesnP1xHfb4il5HBw1pxsPn7ToAYkQ_T3PrIb0,8959
13
- pys3uploader-0.4.0a1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
14
- pys3uploader-0.4.0a1.dist-info/top_level.txt,sha256=lVIFMMoUx7dj_myetBmOUQTJiOzz5VyDqchnQElmrWw,13
15
- pys3uploader-0.4.0a1.dist-info/RECORD,,