PyS3Uploader 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: PyS3Uploader
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: Python module to upload objects to an S3 bucket.
5
5
  Author-email: Vignesh Rao <svignesh1793@gmail.com>
6
6
  License: MIT License
@@ -63,7 +63,7 @@ Requires-Dist: recommonmark; extra == "dev"
63
63
 
64
64
  **Activity**
65
65
 
66
- [![GitHub Repo created](https://img.shields.io/date/1618966420)][repo]
66
+ [![GitHub Repo created](https://img.shields.io/date/1760313686)][repo]
67
67
  [![GitHub commit activity](https://img.shields.io/github/commit-activity/y/thevickypedia/PyS3Uploader)][repo]
68
68
  [![GitHub last commit](https://img.shields.io/github/last-commit/thevickypedia/PyS3Uploader)][repo]
69
69
 
@@ -90,7 +90,7 @@ if __name__ == '__main__':
90
90
  wrapper = s3.Uploader(
91
91
  bucket_name="BUCKET_NAME",
92
92
  upload_dir="FULL_PATH_TO_UPLOAD",
93
- prefix_dir="START_DIRECTORY_IN_S3"
93
+ exclude_path="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
94
94
  )
95
95
  wrapper.run_in_parallel()
96
96
  ```
@@ -103,7 +103,7 @@ if __name__ == '__main__':
103
103
  wrapper = s3.Uploader(
104
104
  bucket_name="BUCKET_NAME",
105
105
  upload_dir="FULL_PATH_TO_UPLOAD",
106
- prefix_dir="START_DIRECTORY_IN_S3"
106
+ exclude_path="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
107
107
  )
108
108
  wrapper.run()
109
109
  ```
@@ -113,7 +113,8 @@ if __name__ == '__main__':
113
113
  - **upload_dir** - Directory to upload.
114
114
 
115
115
  #### Optional kwargs
116
- - **prefix_dir** - Start directory from ``upload_dir`` to use as root in S3. Defaults to `None`
116
+ - **s3_prefix** - S3 object prefix for each file. Defaults to ``None``
117
+ - **exclude_path** - Path in ``upload_dir`` that has to be excluded in object keys. Defaults to `None`
117
118
  - **logger** - Bring your own custom pre-configured logger. Defaults to on-screen logging.
118
119
  <br><br>
119
120
  - **region_name** - AWS region name. Defaults to the env var `AWS_DEFAULT_REGION`
@@ -0,0 +1,11 @@
1
+ s3/__init__.py,sha256=qSltnC7r3AjwiYWzsD9JUs8SzeBEV16nrHldiWlrxtY,66
2
+ s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
3
+ s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
4
+ s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
5
+ s3/uploader.py,sha256=kkv7d2EaMH3OsoIJgTx7yRUd00s0n9PbRbjj6Rm7qdA,9355
6
+ s3/utils.py,sha256=0kcG0aE2olHhC8thaUEwx2J8tOI2-2TGCk6E6U-PiKw,2058
7
+ pys3uploader-0.1.2.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
8
+ pys3uploader-0.1.2.dist-info/METADATA,sha256=GtQq-ZDiZEMpl2CEs4VJw4AQ8tf5rzcfgjDu68oHX6c,7286
9
+ pys3uploader-0.1.2.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
10
+ pys3uploader-0.1.2.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
11
+ pys3uploader-0.1.2.dist-info/RECORD,,
s3/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  from s3.uploader import Uploader # noqa: F401
2
2
 
3
- version = "0.1.0"
3
+ version = "0.1.2"
s3/uploader.py CHANGED
@@ -11,7 +11,7 @@ from tqdm import tqdm
11
11
 
12
12
  from s3.exceptions import BucketNotFound
13
13
  from s3.logger import default_logger
14
- from s3.utils import UploadResults, get_object_path, getenv
14
+ from s3.utils import UploadResults, convert_to_folder_structure, getenv, urljoin
15
15
 
16
16
 
17
17
  class Uploader:
@@ -27,7 +27,8 @@ class Uploader:
27
27
  self,
28
28
  bucket_name: str,
29
29
  upload_dir: str,
30
- prefix_dir: str = None,
30
+ s3_prefix: str = None,
31
+ exclude_path: str = None,
31
32
  region_name: str = None,
32
33
  profile_name: str = None,
33
34
  aws_access_key_id: str = None,
@@ -38,13 +39,28 @@ class Uploader:
38
39
 
39
40
  Args:
40
41
  bucket_name: Name of the bucket.
41
- upload_dir: Name of the directory to be uploaded.
42
- prefix_dir: Start folder name from upload_dir.
42
+ upload_dir: Full path of the directory to be uploaded.
43
+ s3_prefix: Particular bucket prefix within which the upload should happen.
44
+ exclude_path: Full directory path to exclude from S3 object prefix.
43
45
  region_name: Name of the AWS region.
44
46
  profile_name: AWS profile name.
45
47
  aws_access_key_id: AWS access key ID.
46
48
  aws_secret_access_key: AWS secret access key.
47
49
  logger: Bring your own logger.
50
+
51
+ See Also:
52
+ exclude_path:
53
+ When upload directory is "/home/ubuntu/Desktop/S3Upload", each file will naturally have the full prefix.
54
+ However, this behavior can be avoided by specifying the ``exclude_path`` parameter.
55
+
56
+ If exclude_path is set to: ``/home/ubuntu/Desktop``, then the file path
57
+ ``/home/ubuntu/Desktop/S3Upload/sub-dir/photo.jpg`` will be uploaded as ``S3Upload/sub-dir/photo.jpg``
58
+
59
+ s3_prefix:
60
+ If provided, ``s3_prefix`` will always be attached to each object.
61
+
62
+ If ``s3_prefix`` is set to: ``2025``, then the file path
63
+ ``/home/ubuntu/Desktop/S3Upload/sub/photo.jpg`` will be uploaded as ``2025/S3Upload/sub/photo.jpg``
48
64
  """
49
65
  self.session = boto3.Session(
50
66
  profile_name=profile_name or getenv("PROFILE_NAME"),
@@ -54,8 +70,9 @@ class Uploader:
54
70
  )
55
71
  self.s3 = self.session.resource(service_name="s3", config=self.RETRY_CONFIG)
56
72
  self.logger = logger or default_logger()
57
- self.upload_dir = upload_dir or getenv("UPLOAD_DIR", "SOURCE")
58
- self.prefix_dir = prefix_dir
73
+ self.upload_dir = upload_dir or getenv("UPLOAD_DIR", "UPLOAD_SOURCE")
74
+ self.s3_prefix = s3_prefix
75
+ self.exclude_path = exclude_path
59
76
  self.bucket_name = bucket_name
60
77
  # noinspection PyUnresolvedReferences
61
78
  self.bucket: boto3.resources.factory.s3.Bucket = None
@@ -70,9 +87,9 @@ class Uploader:
70
87
  BucketNotFound: If bucket name was not found.
71
88
  """
72
89
  self.start = time.time()
73
- if self.prefix_dir and self.prefix_dir not in self.upload_dir.split(os.sep):
90
+ if self.exclude_path and self.exclude_path not in self.upload_dir:
74
91
  raise ValueError(
75
- f"\n\n\tPrefix folder name {self.prefix_dir!r} is not a part of upload directory {self.upload_dir!r}"
92
+ f"\n\n\tStart folder {self.exclude_path!r} is not a part of upload directory {self.upload_dir!r}"
76
93
  )
77
94
  if not self.upload_dir:
78
95
  raise ValueError("\n\n\tCannot proceed without an upload directory.")
@@ -87,7 +104,6 @@ class Uploader:
87
104
  if self.bucket_name not in buckets:
88
105
  raise BucketNotFound(f"\n\n\t{self.bucket_name} was not found in {_alias} account.\n\tAvailable: {buckets}")
89
106
  self.upload_dir = os.path.abspath(self.upload_dir)
90
- self.logger.info("Bucket objects from '%s' will be uploaded to '%s'", self.upload_dir, self.bucket_name)
91
107
  # noinspection PyUnresolvedReferences
92
108
  self.bucket: boto3.resources.factory.s3.Bucket = self.s3.Bucket(self.bucket_name)
93
109
 
@@ -119,14 +135,20 @@ class Uploader:
119
135
  for __path, __directory, __files in os.walk(self.upload_dir):
120
136
  for file_ in __files:
121
137
  file_path = os.path.join(__path, file_)
122
- if self.prefix_dir:
123
- try:
124
- object_path = get_object_path(file_path, self.prefix_dir)
125
- except ValueError as error:
126
- self.logger.error(error)
127
- continue
138
+ if self.exclude_path:
139
+ relative_path = file_path.replace(self.exclude_path, "")
128
140
  else:
129
- object_path = self.prefix_dir
141
+ relative_path = file_path
142
+ # Lists in python are ordered, so s3 prefix will get loaded first when provided
143
+ url_parts = []
144
+ if self.s3_prefix:
145
+ url_parts.extend(
146
+ self.s3_prefix.split(os.sep) if os.sep in self.s3_prefix else self.s3_prefix.split("/")
147
+ )
148
+ # Add rest of the file path to parts before normalizing as an S3 object URL
149
+ url_parts.extend(relative_path.split(os.sep))
150
+ # Remove falsy values using filter - "None", "bool", "len" or "lambda item: item"
151
+ object_path = urljoin(*filter(None, url_parts))
130
152
  files_to_upload[object_path] = file_path
131
153
  return files_to_upload
132
154
 
@@ -135,6 +157,7 @@ class Uploader:
135
157
  self.init()
136
158
  keys = self._get_files()
137
159
  self.logger.debug(keys)
160
+ self.logger.info("%d files from '%s' will be uploaded to '%s'", len(keys), self.upload_dir, self.bucket_name)
138
161
  self.logger.info("Initiating upload process.")
139
162
  for objectpath, filepath in tqdm(
140
163
  keys.items(), total=len(keys), unit="file", leave=True, desc=f"Uploading files from {self.upload_dir}"
@@ -154,9 +177,15 @@ class Uploader:
154
177
  max_workers: Number of maximum threads to use.
155
178
  """
156
179
  self.init()
157
- self.logger.info(f"Number of threads: {max_workers}")
158
180
  keys = self._get_files()
159
- self.logger.info("Initiating upload process.")
181
+ self.logger.debug(keys)
182
+ self.logger.info(
183
+ "%d files from '%s' will be uploaded to '%s' with maximum concurrency of: %d",
184
+ len(keys),
185
+ self.upload_dir,
186
+ self.bucket_name,
187
+ max_workers,
188
+ )
160
189
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
161
190
  futures = [executor.submit(self._uploader, *kv) for kv in keys.items()]
162
191
  for future in tqdm(
@@ -173,3 +202,18 @@ class Uploader:
173
202
  self.logger.error(f"Upload failed: {error}")
174
203
  self.results.failed += 1
175
204
  self.exit()
205
+
206
+ def get_bucket_structure(self) -> str:
207
+ """Gets all the objects in an S3 bucket and forms it into a hierarchical folder like representation.
208
+
209
+ Returns:
210
+ str:
211
+ Returns a hierarchical folder like representation of the chosen bucket.
212
+ """
213
+ self.init()
214
+ # Using list and set will yield the same results but using set we can isolate directories from files
215
+ return convert_to_folder_structure(set([obj.key for obj in self.bucket.objects.all()]))
216
+
217
+ def print_bucket_structure(self) -> None:
218
+ """Prints all the objects in an S3 bucket with a folder like representation."""
219
+ print(self.get_bucket_structure())
s3/utils.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import os
2
+ from typing import Dict, Set
2
3
 
3
4
 
4
5
  class UploadResults(dict):
@@ -20,26 +21,50 @@ def getenv(*args, default: str = None) -> str:
20
21
  return default
21
22
 
22
23
 
23
- def get_object_path(filepath: str, start_folder_name: str):
24
- """Construct object path without absolute path's pretext.
24
+ def urljoin(*args) -> str:
25
+ """Joins given arguments into a url. Trailing but not leading slashes are stripped for each argument.
26
+
27
+ Returns:
28
+ str:
29
+ Joined url.
30
+ """
31
+ return "/".join(map(lambda x: str(x).rstrip("/").lstrip("/"), args))
32
+
33
+
34
+ def convert_to_folder_structure(sequence: Set[str]) -> str:
35
+ """Convert objects in a s3 buckets into a folder like representation.
25
36
 
26
37
  Args:
27
- filepath: Absolute file path to upload.
28
- start_folder_name: Folder name to begin object path.
38
+ sequence: Takes either a mutable or immutable sequence as an argument.
29
39
 
30
40
  Returns:
31
41
  str:
32
- Returns the object name.
42
+ String representation of the architecture.
33
43
  """
34
- # Split file_path into parts
35
- parts = filepath.split(os.sep)
36
- try:
37
- # Find index of the folder to start from
38
- start_index = parts.index(start_folder_name)
39
- except ValueError:
40
- # Folder not found in path, fallback to full path or raise error
41
- raise ValueError(f"Folder '{start_folder_name}' not found in path '{filepath}'")
42
- # Reconstruct path from start_folder_name onwards
43
- relative_parts = parts[start_index:]
44
- # Join with os.sep for system-appropriate separators
45
- return os.sep.join(relative_parts)
44
+ folder_structure = {}
45
+ for item in sequence:
46
+ parts = item.split("/")
47
+ current_level = folder_structure
48
+ for part in parts:
49
+ current_level = current_level.setdefault(part, {})
50
+
51
+ def generate_folder_structure(structure: Dict[str, dict], indent: str = "") -> str:
52
+ """Generates the folder like structure.
53
+
54
+ Args:
55
+ structure: Structure of folder objects as key-value pairs.
56
+ indent: Required indentation for the ASCII.
57
+ """
58
+ result = ""
59
+ for i, (key, value) in enumerate(structure.items()):
60
+ if i == len(structure) - 1:
61
+ result += indent + "└── " + key + "\n"
62
+ sub_indent = indent + " "
63
+ else:
64
+ result += indent + "├── " + key + "\n"
65
+ sub_indent = indent + "│ "
66
+ if value:
67
+ result += generate_folder_structure(value, sub_indent)
68
+ return result
69
+
70
+ return generate_folder_structure(folder_structure)
@@ -1,11 +0,0 @@
1
- s3/__init__.py,sha256=zSLbLrsnVD-tRtiiTcT3JDWzmpnSC9mP6uHMXt2cyEc,66
2
- s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
3
- s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
4
- s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
5
- s3/uploader.py,sha256=Z2EvtUlR5jlL1xbeQWj4XLBfhTn4yWPm9E8WhPcz6Qk,7056
6
- s3/utils.py,sha256=swkdwkfn43e8I3dGL9HAGZ-dba3fIeorihVAjTE07wc,1291
7
- pys3uploader-0.1.0.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
8
- pys3uploader-0.1.0.dist-info/METADATA,sha256=gkAfOF-hEXYfW9p0PZiJhcrpLjhNorK7LFZNtU_ybrE,7188
9
- pys3uploader-0.1.0.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
10
- pys3uploader-0.1.0.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
11
- pys3uploader-0.1.0.dist-info/RECORD,,