PyS3Uploader 0.2.0__py3-none-any.whl → 0.4.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of PyS3Uploader might be problematic. Click here for more details.

@@ -0,0 +1,2 @@
1
+ from pys3uploader.logger import LogHandler, LogLevel # noqa: F401
2
+ from pys3uploader.uploader import Uploader # noqa: F401
pys3uploader/logger.py ADDED
@@ -0,0 +1,104 @@
1
+ """Loads a default logger with StreamHandler set to DEBUG mode.
2
+
3
+ >>> logging.Logger
4
+
5
+ """
6
+
7
+ import logging
8
+ import os
9
+ from datetime import datetime
10
+ from enum import IntEnum, StrEnum
11
+
12
+
13
+ class LogHandler(StrEnum):
14
+ """Logging handlers to choose from when default logger is used.
15
+
16
+ >>> LogHandler
17
+
18
+ """
19
+
20
+ file = "file"
21
+ stdout = "stdout"
22
+
23
+
24
+ class LogLevel(IntEnum):
25
+ """Logging levels to choose from when default logger is used.
26
+
27
+ >>> LogLevel
28
+
29
+ """
30
+
31
+ debug = logging.DEBUG
32
+ info = logging.INFO
33
+ warning = logging.WARNING
34
+ error = logging.ERROR
35
+
36
+ @classmethod
37
+ def _missing_(cls, value):
38
+ """Allow constructing from string names."""
39
+ if isinstance(value, str):
40
+ value = value.lower()
41
+ for member in cls:
42
+ if member.name == value:
43
+ return member
44
+ return None
45
+
46
+
47
+ def stream_handler() -> logging.StreamHandler:
48
+ """Creates a ``StreamHandler`` and assigns a default format to it.
49
+
50
+ Returns:
51
+ logging.StreamHandler:
52
+ Returns an instance of the ``StreamHandler`` object.
53
+ """
54
+ handler = logging.StreamHandler()
55
+ handler.setFormatter(fmt=default_format())
56
+ return handler
57
+
58
+
59
+ def file_handler() -> logging.FileHandler:
60
+ """Creates a ``StreamHandler`` and assigns a default format to it.
61
+
62
+ Returns:
63
+ logging.StreamHandler:
64
+ Returns an instance of the ``StreamHandler`` object.
65
+ """
66
+ os.makedirs("logs", exist_ok=True)
67
+ filename = os.path.join("logs", datetime.now().strftime("PyS3Uploader_%d-%m-%Y_%H:%M.log"))
68
+ handler = logging.FileHandler(filename, mode="a")
69
+ handler.setFormatter(fmt=default_format())
70
+ return handler
71
+
72
+
73
+ def default_format() -> logging.Formatter:
74
+ """Creates a logging ``Formatter`` with a custom message and datetime format.
75
+
76
+ Returns:
77
+ logging.Formatter:
78
+ Returns an instance of the ``Formatter`` object.
79
+ """
80
+ return logging.Formatter(
81
+ fmt="%(asctime)s - %(levelname)s - [%(module)s:%(lineno)d] - %(funcName)s - %(message)s",
82
+ datefmt="%b-%d-%Y %I:%M:%S %p",
83
+ )
84
+
85
+
86
+ def setup_logger(handler: LogHandler, level: LogLevel) -> logging.Logger:
87
+ """Creates a default logger with debug mode enabled.
88
+
89
+ Args:
90
+ handler: Logging handler to use.
91
+ level: Logging level to use.
92
+
93
+ Returns:
94
+ logging.Logger:
95
+ Returns an instance of the ``Logger`` object.
96
+ """
97
+ logger = logging.getLogger(__name__)
98
+ if handler == LogHandler.file:
99
+ logger.addHandler(hdlr=file_handler())
100
+ elif handler == LogHandler.stdout:
101
+ logger.addHandler(hdlr=stream_handler())
102
+
103
+ logger.setLevel(level)
104
+ return logger
@@ -0,0 +1,11 @@
1
+ from dataclasses import dataclass
2
+
3
+ @dataclass
4
+ class Metadata(dict):
5
+ """Dataclass for metadata information."""
6
+
7
+ timestamp: str
8
+ objects_uploaded: int
9
+ objects_pending: int
10
+ size_uploaded: str
11
+ size_pending: str
@@ -0,0 +1,39 @@
1
+ import threading
2
+
3
+ from alive_progress import alive_bar
4
+
5
+
6
+ class ProgressPercentage:
7
+ """Tracks progress of a file upload to S3 and updates the alive_bar.
8
+
9
+ >>> ProgressPercentage
10
+
11
+ """
12
+
13
+ def __init__(self, filename: str, size: int, bar: alive_bar):
14
+ """Initializes the progress tracker.
15
+
16
+ Args:
17
+ filename: Name of the file being uploaded.
18
+ size: Total size of the file in bytes.
19
+ bar: alive_bar instance to update progress.
20
+ """
21
+ self._filename = filename
22
+ self._size = size
23
+ self._seen_so_far = 0
24
+ self._lock = threading.Lock()
25
+ self._bar = bar
26
+
27
+ def __call__(self, bytes_amount: int) -> None:
28
+ """Callback method to update progress.
29
+
30
+ Args:
31
+ bytes_amount: Number of bytes transferred in the last chunk.
32
+ """
33
+ with self._lock:
34
+ self._seen_so_far += bytes_amount
35
+ percent = (self._seen_so_far / self._size) * 100
36
+ bar_len = 20
37
+ filled = int(bar_len * percent / 100)
38
+ bar_str = "█" * filled + "." * (bar_len - filled)
39
+ self._bar.text(f" || {self._filename} [{bar_str}] {percent:.0f}%")
pys3uploader/timer.py ADDED
@@ -0,0 +1,54 @@
1
+ from threading import Timer
2
+ from typing import Any, Callable, Dict, Tuple
3
+
4
+
5
+ class RepeatedTimer:
6
+ """Instantiates RepeatedTimer object to kick off the threading.Timer object with custom intervals.
7
+
8
+ >>> RepeatedTimer
9
+
10
+ """
11
+
12
+ def __init__(
13
+ self,
14
+ interval: int,
15
+ function: Callable,
16
+ args: Tuple = None,
17
+ kwargs: Dict[str, Any] = None,
18
+ ):
19
+ """Repeats the ``Timer`` object from threading.
20
+
21
+ Args:
22
+ interval: Interval in seconds.
23
+ function: Function to trigger with intervals.
24
+ args: Arguments for the function.
25
+ kwargs: Keyword arguments for the function.
26
+ """
27
+ self._timer = None
28
+ self.interval = interval
29
+ self.function = function
30
+ self.args = args or ()
31
+ self.kwargs = kwargs or {}
32
+ self.is_running = False
33
+
34
+ def _run(self):
35
+ """Triggers the target function."""
36
+ self.is_running = False
37
+ self.start()
38
+ self.function(*self.args, **self.kwargs)
39
+
40
+ def start(self):
41
+ """Trigger target function if timer isn't running already."""
42
+ if not self.is_running:
43
+ self._timer = Timer(self.interval, self._run)
44
+ self._timer.start()
45
+ self.is_running = True
46
+
47
+ def stop(self):
48
+ """Stop the timer and cancel all futures."""
49
+ self._timer.cancel()
50
+ self.is_running = False
51
+
52
+ def cancel(self):
53
+ """Initiate cancellation."""
54
+ self.stop()
@@ -0,0 +1,432 @@
1
+ import json
2
+ import logging
3
+ import os
4
+ import time
5
+ from datetime import datetime, UTC
6
+ from concurrent.futures import ThreadPoolExecutor, as_completed
7
+ from typing import Dict, Iterable, NoReturn
8
+
9
+ import boto3.resources.factory
10
+ import dotenv
11
+ from alive_progress import alive_bar
12
+ from botocore.config import Config
13
+ from botocore.exceptions import ClientError
14
+
15
+ from pys3uploader.exceptions import BucketNotFound
16
+ from pys3uploader.metadata import Metadata
17
+ from pys3uploader.timer import RepeatedTimer
18
+ from pys3uploader.logger import LogHandler, LogLevel, setup_logger
19
+ from pys3uploader.progress import ProgressPercentage
20
+ from pys3uploader.utils import (
21
+ RETRY_CONFIG,
22
+ UploadResults,
23
+ convert_seconds,
24
+ convert_to_folder_structure,
25
+ getenv,
26
+ size_converter,
27
+ urljoin,
28
+ )
29
+
30
+
31
+ class Uploader:
32
+ """Initiates Uploader object to upload entire directory to S3.
33
+
34
+ >>> Uploader
35
+
36
+ """
37
+
38
+ def __init__(
39
+ self,
40
+ bucket_name: str,
41
+ upload_dir: str,
42
+ s3_prefix: str = None,
43
+ exclude_prefix: str = None,
44
+ skip_dot_files: bool = True,
45
+ overwrite: bool = False,
46
+ file_exclusion: Iterable[str] = None,
47
+ folder_exclusion: Iterable[str] = None,
48
+ region_name: str = None,
49
+ profile_name: str = None,
50
+ aws_access_key_id: str = None,
51
+ aws_secret_access_key: str = None,
52
+ retry_config: Config = RETRY_CONFIG,
53
+ logger: logging.Logger = None,
54
+ log_handler: LogHandler = LogHandler.stdout,
55
+ log_level: LogLevel = LogLevel.debug,
56
+ env_file: str = None,
57
+ ):
58
+ """Initiates all the necessary args and creates a boto3 session with retry logic.
59
+
60
+ Args:
61
+ bucket_name: Name of the bucket.
62
+ upload_dir: Full path of the directory to be uploaded.
63
+ s3_prefix: Particular bucket prefix within which the upload should happen.
64
+ exclude_prefix: Full directory path to exclude from S3 object prefix.
65
+ skip_dot_files: Boolean flag to skip dot files.
66
+ overwrite: Boolean flag to overwrite files in S3.
67
+ file_exclusion: Sequence of files to exclude during upload.
68
+ folder_exclusion: Sequence of directories to exclude during upload.
69
+ region_name: Name of the AWS region.
70
+ profile_name: AWS profile name.
71
+ aws_access_key_id: AWS access key ID.
72
+ aws_secret_access_key: AWS secret access key.
73
+ logger: Bring your own logger.
74
+ log_handler: Default log handler, can be ``file`` or ``stdout``.
75
+ log_level: Default log level, can be ``debug``, ``info``, ``warning`` or ``error``.
76
+ env_file: Dotenv file (.env) filepath to load environment variables.
77
+
78
+ See Also:
79
+ s3_prefix:
80
+ If provided, ``s3_prefix`` will always be attached to each object.
81
+
82
+ If ``s3_prefix`` is set to: ``2025``, then the file path
83
+ ``/home/ubuntu/Desktop/S3Upload/sub/photo.jpg`` will be uploaded as ``2025/S3Upload/sub/photo.jpg``
84
+
85
+ exclude_prefix:
86
+ When upload directory is "/home/ubuntu/Desktop/S3Upload", each file will naturally have the full prefix.
87
+ However, this behavior can be avoided by specifying the ``exclude_prefix`` parameter.
88
+
89
+ If exclude_prefix is set to: ``/home/ubuntu/Desktop``, then the file path
90
+ ``/home/ubuntu/Desktop/S3Upload/sub-dir/photo.jpg`` will be uploaded as ``S3Upload/sub-dir/photo.jpg``
91
+
92
+ env_file:
93
+ Environment variables can be loaded from a .env file.
94
+ The filepath can be set as ``env_file`` during object instantiation or as an environment variable.
95
+ If a filepath is provided, PyS3Uploader loads it directly or searches the root directory for the file.
96
+ If no filepath is provided, PyS3Uploader searches the current directory for a .env file.
97
+ """
98
+ self.logger = logger or setup_logger(handler=LogHandler(log_handler), level=LogLevel(log_level))
99
+ self.env_file = env_file or getenv("ENV_FILE", default=".env")
100
+
101
+ # Check for env_file in current working directory
102
+ if os.path.isfile(self.env_file):
103
+ self.logger.debug("Loading env file: %s", self.env_file)
104
+ dotenv.load_dotenv(dotenv_path=self.env_file, override=True)
105
+ # Find the env_file from root
106
+ elif env_file := dotenv.find_dotenv(self.env_file, raise_error_if_not_found=False):
107
+ self.logger.debug("Loading env file: %s", env_file)
108
+ dotenv.load_dotenv(dotenv_path=env_file, override=True)
109
+ else:
110
+ # Scan current working directory for any .env files
111
+ for file in os.listdir():
112
+ if file.endswith(".env"):
113
+ self.logger.debug("Loading env file: %s", file)
114
+ dotenv.load_dotenv(dotenv_path=file, override=True)
115
+ break
116
+ else:
117
+ self.logger.debug("No .env files found to load")
118
+
119
+ self.session = boto3.Session(
120
+ profile_name=profile_name or getenv("PROFILE_NAME", "AWS_PROFILE_NAME"),
121
+ region_name=region_name or getenv("AWS_DEFAULT_REGION"),
122
+ aws_access_key_id=aws_access_key_id or getenv("AWS_ACCESS_KEY_ID"),
123
+ aws_secret_access_key=aws_secret_access_key or getenv("AWS_SECRET_ACCESS_KEY"),
124
+ )
125
+ self.s3 = self.session.resource(service_name="s3", config=retry_config)
126
+
127
+ self.bucket_name = bucket_name
128
+ self.upload_dir = upload_dir
129
+ self.s3_prefix = s3_prefix
130
+ self.exclude_prefix = exclude_prefix
131
+ self.skip_dot_files = skip_dot_files
132
+ self.overwrite = overwrite
133
+ self.file_exclusion = file_exclusion or []
134
+ self.folder_exclusion = folder_exclusion or []
135
+
136
+ self.results = UploadResults()
137
+ self.start = time.time()
138
+
139
+ # noinspection PyUnresolvedReferences
140
+ self.bucket: boto3.resources.factory.s3.Bucket = None
141
+ # noinspection PyUnresolvedReferences
142
+ self.bucket_objects: boto3.resources.factory.s3.ObjectSummary = []
143
+ self.object_size_map: Dict[str, int] = {}
144
+
145
+ self.upload_files: Dict[str, str] = {}
146
+ self.file_size_map: Dict[str, int] = {}
147
+
148
+ self.timer = RepeatedTimer(
149
+ function=self.metadata_uploader,
150
+ interval=int(getenv("METADATA_UPLOAD_INTERVAL", 300))
151
+ )
152
+
153
+ def init(self) -> None | NoReturn:
154
+ """Instantiates the bucket instance.
155
+
156
+ Raises:
157
+ ValueError: If no bucket name was passed.
158
+ BucketNotFound: If bucket name was not found.
159
+ """
160
+ self.start = time.time()
161
+ if self.exclude_prefix and self.exclude_prefix not in self.upload_dir:
162
+ raise ValueError(
163
+ f"\n\n\tStart folder {self.exclude_prefix!r} is not a part of upload directory {self.upload_dir!r}"
164
+ )
165
+ if not self.upload_dir:
166
+ raise ValueError("\n\n\tCannot proceed without an upload directory.")
167
+ try:
168
+ assert os.path.exists(self.upload_dir)
169
+ except AssertionError:
170
+ raise ValueError(f"\n\n\tPath not found: {self.upload_dir}")
171
+ if not self.bucket_name:
172
+ raise ValueError("\n\n\tCannot proceed without a bucket name.")
173
+ if (buckets := [bucket.name for bucket in self.s3.buckets.all()]) and self.bucket_name not in buckets:
174
+ raise BucketNotFound(f"\n\n\t{self.bucket_name} was not found.\n\tAvailable: {buckets}")
175
+ self.upload_dir = os.path.abspath(self.upload_dir)
176
+ self.load_bucket_state()
177
+
178
+ def load_bucket_state(self):
179
+ """Loads the bucket's current state."""
180
+ # noinspection PyUnresolvedReferences
181
+ self.bucket: boto3.resources.factory.s3.Bucket = self.s3.Bucket(self.bucket_name)
182
+ # noinspection PyUnresolvedReferences
183
+ self.bucket_objects: boto3.resources.factory.s3.ObjectSummary = [obj for obj in self.bucket.objects.all()]
184
+ self.object_size_map = {obj.key: obj.size for obj in self.bucket_objects}
185
+
186
+ def load_local_state(self):
187
+ """Loads the local file queue."""
188
+ self.upload_files = self._get_files()
189
+ self.file_size_map = {file: self.filesize(file) for file in self.upload_files}
190
+
191
+ def exit(self) -> None:
192
+ """Exits after printing results, and run time."""
193
+ success = len(self.results.success)
194
+ failed = len(self.results.failed)
195
+ total = success + failed
196
+ self.logger.info(
197
+ "Total number of uploads: %d, success: %d, failed: %d", total, success, failed
198
+ )
199
+ # Stop the timer and upload the final state as metadata file
200
+ self.timer.stop()
201
+ self.metadata_uploader()
202
+ self.logger.info("Run time: %s", convert_seconds(time.time() - self.start))
203
+
204
+ def filesize(self, filepath: str) -> int:
205
+ """Gets the file size of a given filepath.
206
+
207
+ Args:
208
+ filepath: Full path of the file.
209
+
210
+ Returns:
211
+ int:
212
+ Returns the file size in bytes.
213
+ """
214
+ try:
215
+ return os.path.getsize(filepath)
216
+ except (OSError, PermissionError) as error:
217
+ self.logger.error(error)
218
+ return 0
219
+
220
+ def size_it(self) -> None:
221
+ """Calculates and logs the total size of files in S3 and local."""
222
+ files_in_s3 = len(self.object_size_map)
223
+ files_local = len(self.upload_files)
224
+
225
+ total_size_s3 = sum(self.object_size_map.values())
226
+ total_size_local = sum(self.file_size_map.values())
227
+
228
+ self.logger.info("Files in S3: [#%d]: %s (%d bytes)", files_in_s3, size_converter(total_size_s3), total_size_s3)
229
+ self.logger.info(
230
+ "Files local: [#%d]: %s (%d bytes)", files_local, size_converter(total_size_local), total_size_local
231
+ )
232
+
233
+ def _proceed_to_upload(self, filepath: str, objectpath: str) -> bool:
234
+ """Compares file size if the object already exists in S3.
235
+
236
+ Args:
237
+ filepath: Source filepath.
238
+ objectpath: S3 object path.
239
+
240
+ Returns:
241
+ bool:
242
+ Returns a boolean flag to indicate upload flag.
243
+ """
244
+ if self.overwrite:
245
+ return True
246
+ file_size = self.filesize(filepath)
247
+ # Indicates that the object path already exists in S3
248
+ if object_size := self.object_size_map.get(objectpath):
249
+ if object_size == file_size:
250
+ self.logger.info(
251
+ "S3 object %s exists, and size [%d bytes / %s] matches, skipping..",
252
+ objectpath,
253
+ object_size,
254
+ size_converter(object_size),
255
+ )
256
+ return False
257
+ self.logger.info(
258
+ "S3 object %s exists, but size mismatch. Local: [%d bytes / %s], S3: [%d bytes / %s]",
259
+ objectpath,
260
+ file_size,
261
+ object_size,
262
+ size_converter(object_size),
263
+ )
264
+ else:
265
+ self.logger.debug(
266
+ "S3 object '%s' of size [%d bytes / %s] doesn't exist, uploading..",
267
+ objectpath,
268
+ file_size,
269
+ size_converter(file_size),
270
+ )
271
+ return True
272
+
273
+ def _uploader(self, filepath: str, objectpath: str, callback: ProgressPercentage) -> None:
274
+ """Uploads the filepath to the specified S3 bucket.
275
+
276
+ Args:
277
+ filepath: Filepath to upload.
278
+ objectpath: Object path ref in S3.
279
+ callback: ProgressPercentage callback to track upload progress.
280
+ """
281
+ if self._proceed_to_upload(filepath, objectpath):
282
+ self.bucket.upload_file(filepath, objectpath, Callback=callback)
283
+
284
+ def _get_files(self) -> Dict[str, str]:
285
+ """Get a mapping for all the file path and object paths in upload directory.
286
+
287
+ Returns:
288
+ Dict[str, str]:
289
+ Returns a key-value pair of filepath and objectpath.
290
+ """
291
+ files_to_upload = {}
292
+ for __path, __directory, __files in os.walk(self.upload_dir):
293
+ scan_dir = os.path.split(__path)[-1]
294
+ if scan_dir in self.folder_exclusion:
295
+ self.logger.info("Skipping '%s' honoring folder exclusion", scan_dir)
296
+ continue
297
+ for file_ in __files:
298
+ if file_ in self.file_exclusion:
299
+ self.logger.info("Skipping '%s' honoring file exclusion", file_)
300
+ continue
301
+ if self.skip_dot_files and file_.startswith("."):
302
+ self.logger.info("Skipping dot file: %s", file_)
303
+ continue
304
+ file_path = os.path.join(__path, file_)
305
+ if self.exclude_prefix:
306
+ relative_path = file_path.replace(self.exclude_prefix, "")
307
+ else:
308
+ relative_path = file_path
309
+ # Lists in python are ordered, so s3 prefix will get loaded first when provided
310
+ url_parts = []
311
+ if self.s3_prefix:
312
+ url_parts.extend(
313
+ self.s3_prefix.split(os.sep) if os.sep in self.s3_prefix else self.s3_prefix.split("/")
314
+ )
315
+ # Add rest of the file path to parts before normalizing as an S3 object URL
316
+ url_parts.extend(relative_path.split(os.sep))
317
+ # Remove falsy values using filter - "None", "bool", "len" or "lambda item: item"
318
+ object_path = urljoin(*filter(None, url_parts))
319
+ files_to_upload[file_path] = object_path
320
+ return files_to_upload
321
+
322
+ def run(self) -> None:
323
+ """Initiates object upload in a traditional loop."""
324
+ self.init()
325
+ self.load_local_state()
326
+ self.size_it()
327
+ self.timer.start()
328
+ total_files = len(self.upload_files)
329
+
330
+ self.logger.info(
331
+ "%d files from '%s' will be uploaded to '%s' sequentially",
332
+ total_files,
333
+ self.upload_dir,
334
+ self.bucket_name,
335
+ )
336
+ with alive_bar(total_files, title="Progress", bar="smooth", spinner="dots") as overall_bar:
337
+ for filepath, objectpath in self.upload_files.items():
338
+ progress_callback = ProgressPercentage(
339
+ filename=os.path.basename(filepath), size=self.filesize(filepath), bar=overall_bar
340
+ )
341
+ try:
342
+ self._uploader(filepath, objectpath, progress_callback)
343
+ self.results.success.append(filepath)
344
+ except ClientError as error:
345
+ self.logger.error("Upload failed: %s", error)
346
+ self.results.failed.append(filepath)
347
+ except KeyboardInterrupt:
348
+ self.logger.warning("Upload interrupted by user")
349
+ break
350
+ overall_bar() # increment overall progress bar
351
+ self.exit()
352
+
353
+ def run_in_parallel(self, max_workers: int = 5) -> None:
354
+ """Initiates upload in multi-threading.
355
+
356
+ Args:
357
+ max_workers: Number of maximum threads to use.
358
+ """
359
+ # Verify and initiate bucket state
360
+ self.init()
361
+ # Verify and initiate local state
362
+ self.load_local_state()
363
+ self.size_it()
364
+ self.timer.start()
365
+ total_files = len(self.upload_files)
366
+
367
+ self.logger.info(
368
+ "%d files from '%s' will be uploaded to '%s' with maximum concurrency of: %d",
369
+ total_files,
370
+ self.upload_dir,
371
+ self.bucket_name,
372
+ max_workers,
373
+ )
374
+ with alive_bar(total_files, title="Progress", bar="smooth", spinner="dots") as overall_bar:
375
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
376
+ futures = []
377
+ for filepath, objectpath in self.upload_files.items():
378
+ progress_callback = ProgressPercentage(
379
+ filename=os.path.basename(filepath), size=self.filesize(filepath), bar=overall_bar
380
+ )
381
+ futures.append(executor.submit(self._uploader, filepath, objectpath, callback=progress_callback))
382
+
383
+ for future in as_completed(futures):
384
+ try:
385
+ future.result()
386
+ self.results.success.append(filepath)
387
+ except ClientError as error:
388
+ self.logger.error("Upload failed: %s", error)
389
+ self.results.failed.append(filepath)
390
+ overall_bar() # Increment overall bar after each upload finishes
391
+ self.exit()
392
+
393
+ def metadata_uploader(self) -> None:
394
+ """Metadata uploader."""
395
+ filename = objectpath = getenv("METADATA_FILENAME", "METADATA.json")
396
+ self.load_bucket_state()
397
+ objects_uploaded = len(self.results.success)
398
+ size_uploaded = sum([self.filesize(file) for file in self.results.success])
399
+
400
+ pending_files = self.upload_files.keys() - self.results.success
401
+ objects_pending = len(pending_files)
402
+ size_pending = sum([self.filesize(file) for file in pending_files])
403
+
404
+ metadata = Metadata(
405
+ timestamp=datetime.now(tz=UTC).strftime("%A %B %d, %Y %H:%M:%S"),
406
+ objects_uploaded=objects_uploaded,
407
+ objects_pending=objects_pending,
408
+ size_uploaded=size_converter(size_uploaded),
409
+ size_pending=size_converter(size_pending)
410
+ )
411
+ self.logger.debug("\n" + json.dumps(metadata, indent=2) + "\n")
412
+ self.logger.debug("Uploading metadata to S3")
413
+ filepath = os.path.join(os.getcwd(), filename)
414
+ with open(filepath, "w") as file:
415
+ json.dump(metadata.__dict__, file, indent=2)
416
+ file.flush()
417
+ self.bucket.upload_file(filepath, objectpath)
418
+
419
+ def get_bucket_structure(self) -> str:
420
+ """Gets all the objects in an S3 bucket and forms it into a hierarchical folder like representation.
421
+
422
+ Returns:
423
+ str:
424
+ Returns a hierarchical folder like representation of the chosen bucket.
425
+ """
426
+ self.init()
427
+ # Using list and set will yield the same results but using set we can isolate directories from files
428
+ return convert_to_folder_structure(set(obj.key for obj in self.bucket_objects))
429
+
430
+ def print_bucket_structure(self) -> None:
431
+ """Prints all the objects in an S3 bucket with a folder like representation."""
432
+ print(self.get_bucket_structure())