PyS3Uploader 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of PyS3Uploader might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: PyS3Uploader
3
- Version: 0.1.1
3
+ Version: 0.2.0
4
4
  Summary: Python module to upload objects to an S3 bucket.
5
5
  Author-email: Vignesh Rao <svignesh1793@gmail.com>
6
6
  License: MIT License
@@ -63,7 +63,7 @@ Requires-Dist: recommonmark; extra == "dev"
63
63
 
64
64
  **Activity**
65
65
 
66
- [![GitHub Repo created](https://img.shields.io/date/1618966420)][repo]
66
+ [![GitHub Repo created](https://img.shields.io/date/1760313686)][repo]
67
67
  [![GitHub commit activity](https://img.shields.io/github/commit-activity/y/thevickypedia/PyS3Uploader)][repo]
68
68
  [![GitHub last commit](https://img.shields.io/github/last-commit/thevickypedia/PyS3Uploader)][repo]
69
69
 
@@ -0,0 +1,11 @@
1
+ s3/__init__.py,sha256=yLvvl4-uTLZwhdhCMQpWq5juX_zFuYAfKSf4aB0WjZw,66
2
+ s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
3
+ s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
4
+ s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
5
+ s3/uploader.py,sha256=IAlFrEjfBuexrfmBPGN9OZAfHjQuwcGRzWi2es0r_fU,11154
6
+ s3/utils.py,sha256=0kcG0aE2olHhC8thaUEwx2J8tOI2-2TGCk6E6U-PiKw,2058
7
+ pys3uploader-0.2.0.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
8
+ pys3uploader-0.2.0.dist-info/METADATA,sha256=IXSmHXJJndlnd_6MHlpZrcVILPni8VUbVNJYQEjMIR8,7286
9
+ pys3uploader-0.2.0.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
10
+ pys3uploader-0.2.0.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
11
+ pys3uploader-0.2.0.dist-info/RECORD,,
s3/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  from s3.uploader import Uploader # noqa: F401
2
2
 
3
- version = "0.1.1"
3
+ version = "0.2.0"
s3/uploader.py CHANGED
@@ -11,7 +11,7 @@ from tqdm import tqdm
11
11
 
12
12
  from s3.exceptions import BucketNotFound
13
13
  from s3.logger import default_logger
14
- from s3.utils import UploadResults, getenv, urljoin
14
+ from s3.utils import UploadResults, convert_to_folder_structure, getenv, urljoin
15
15
 
16
16
 
17
17
  class Uploader:
@@ -29,6 +29,7 @@ class Uploader:
29
29
  upload_dir: str,
30
30
  s3_prefix: str = None,
31
31
  exclude_path: str = None,
32
+ overwrite: bool = False,
32
33
  region_name: str = None,
33
34
  profile_name: str = None,
34
35
  aws_access_key_id: str = None,
@@ -42,6 +43,7 @@ class Uploader:
42
43
  upload_dir: Full path of the directory to be uploaded.
43
44
  s3_prefix: Particular bucket prefix within which the upload should happen.
44
45
  exclude_path: Full directory path to exclude from S3 object prefix.
46
+ overwrite: Boolean flag to overwrite files in S3.
45
47
  region_name: Name of the AWS region.
46
48
  profile_name: AWS profile name.
47
49
  aws_access_key_id: AWS access key ID.
@@ -69,16 +71,24 @@ class Uploader:
69
71
  aws_secret_access_key=aws_secret_access_key or getenv("AWS_SECRET_ACCESS_KEY"),
70
72
  )
71
73
  self.s3 = self.session.resource(service_name="s3", config=self.RETRY_CONFIG)
74
+
72
75
  self.logger = logger or default_logger()
76
+
77
+ self.bucket_name = bucket_name
73
78
  self.upload_dir = upload_dir or getenv("UPLOAD_DIR", "UPLOAD_SOURCE")
74
79
  self.s3_prefix = s3_prefix
75
80
  self.exclude_path = exclude_path
76
- self.bucket_name = bucket_name
77
- # noinspection PyUnresolvedReferences
78
- self.bucket: boto3.resources.factory.s3.Bucket = None
81
+ self.overwrite = overwrite
82
+
79
83
  self.results = UploadResults()
80
84
  self.start = time.time()
81
85
 
86
+ # noinspection PyUnresolvedReferences
87
+ self.bucket: boto3.resources.factory.s3.Bucket = None
88
+ # noinspection PyUnresolvedReferences
89
+ self.bucket_objects: boto3.resources.factory.s3.ObjectSummary = []
90
+ self.object_size_map: Dict[str, int] = {}
91
+
82
92
  def init(self) -> None:
83
93
  """Instantiates the bucket instance.
84
94
 
@@ -106,6 +116,9 @@ class Uploader:
106
116
  self.upload_dir = os.path.abspath(self.upload_dir)
107
117
  # noinspection PyUnresolvedReferences
108
118
  self.bucket: boto3.resources.factory.s3.Bucket = self.s3.Bucket(self.bucket_name)
119
+ # noinspection PyUnresolvedReferences
120
+ self.bucket_objects: boto3.resources.factory.s3.ObjectSummary = [obj for obj in self.bucket.objects.all()]
121
+ self.object_size_map = {obj.key: obj.size for obj in self.bucket_objects}
109
122
 
110
123
  def exit(self) -> None:
111
124
  """Exits after printing results, and run time."""
@@ -115,28 +128,59 @@ class Uploader:
115
128
  )
116
129
  self.logger.info("Run Time: %.2fs", time.time() - self.start)
117
130
 
118
- def _uploader(self, objectpath: str, filepath: str) -> None:
131
+ def _proceed_to_upload(self, filepath: str, objectpath: str) -> bool:
132
+ """Compares file size if the object already exists in S3.
133
+
134
+ Args:
135
+ filepath: Source filepath.
136
+ objectpath: S3 object path.
137
+
138
+ Returns:
139
+ bool:
140
+ Returns a boolean flag to indicate upload flag.
141
+ """
142
+ if self.overwrite:
143
+ return True
144
+ # Indicates that the object path already exists in S3
145
+ if object_size := self.object_size_map.get(objectpath):
146
+ try:
147
+ file_size = os.path.getsize(filepath)
148
+ except (OSError, PermissionError) as error:
149
+ self.logger.error(error)
150
+ return True
151
+ if object_size == file_size:
152
+ self.logger.info("S3 object %s exists, and size [%d] matches, skipping..", objectpath, object_size)
153
+ return False
154
+ self.logger.info(
155
+ "S3 object %s exists, but size mismatch. Local: [%d], S3: [%d]", objectpath, file_size, object_size
156
+ )
157
+ return True
158
+
159
+ def _uploader(self, filepath: str, objectpath: str) -> None:
119
160
  """Uploads the filepath to the specified S3 bucket.
120
161
 
121
162
  Args:
122
- objectpath: Object path ref in S3.
123
163
  filepath: Filepath to upload.
164
+ objectpath: Object path ref in S3.
124
165
  """
125
- self.bucket.upload_file(filepath, objectpath)
166
+ if self._proceed_to_upload(filepath, objectpath):
167
+ self.bucket.upload_file(filepath, objectpath)
126
168
 
127
169
  def _get_files(self) -> Dict[str, str]:
128
170
  """Get a mapping for all the file path and object paths in upload directory.
129
171
 
130
172
  Returns:
131
173
  Dict[str, str]:
132
- Returns a dictionary object path and filepath.
174
+ Returns a key-value pair of filepath and objectpath.
133
175
  """
134
176
  files_to_upload = {}
135
177
  for __path, __directory, __files in os.walk(self.upload_dir):
136
178
  for file_ in __files:
137
179
  file_path = os.path.join(__path, file_)
138
180
  if self.exclude_path:
139
- file_path = file_path.replace(self.exclude_path, "")
181
+ relative_path = file_path.replace(self.exclude_path, "")
182
+ else:
183
+ relative_path = file_path
140
184
  # Lists in python are ordered, so s3 prefix will get loaded first when provided
141
185
  url_parts = []
142
186
  if self.s3_prefix:
@@ -144,10 +188,10 @@ class Uploader:
144
188
  self.s3_prefix.split(os.sep) if os.sep in self.s3_prefix else self.s3_prefix.split("/")
145
189
  )
146
190
  # Add rest of the file path to parts before normalizing as an S3 object URL
147
- url_parts.extend(file_path.split(os.sep))
191
+ url_parts.extend(relative_path.split(os.sep))
148
192
  # Remove falsy values using filter - "None", "bool", "len" or "lambda item: item"
149
193
  object_path = urljoin(*filter(None, url_parts))
150
- files_to_upload[object_path] = file_path
194
+ files_to_upload[file_path] = object_path
151
195
  return files_to_upload
152
196
 
153
197
  def run(self) -> None:
@@ -161,7 +205,7 @@ class Uploader:
161
205
  keys.items(), total=len(keys), unit="file", leave=True, desc=f"Uploading files from {self.upload_dir}"
162
206
  ):
163
207
  try:
164
- self._uploader(objectpath=objectpath, filepath=filepath)
208
+ self._uploader(filepath=filepath, objectpath=objectpath)
165
209
  self.results.success += 1
166
210
  except ClientError as error:
167
211
  self.logger.error(error)
@@ -185,7 +229,10 @@ class Uploader:
185
229
  max_workers,
186
230
  )
187
231
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
188
- futures = [executor.submit(self._uploader, *kv) for kv in keys.items()]
232
+ futures = [
233
+ executor.submit(self._uploader, **dict(filepath=filepath, objectpath=objectpath))
234
+ for filepath, objectpath in keys.items()
235
+ ]
189
236
  for future in tqdm(
190
237
  iterable=as_completed(futures),
191
238
  total=len(futures),
@@ -200,3 +247,18 @@ class Uploader:
200
247
  self.logger.error(f"Upload failed: {error}")
201
248
  self.results.failed += 1
202
249
  self.exit()
250
+
251
+ def get_bucket_structure(self) -> str:
252
+ """Gets all the objects in an S3 bucket and forms it into a hierarchical folder like representation.
253
+
254
+ Returns:
255
+ str:
256
+ Returns a hierarchical folder like representation of the chosen bucket.
257
+ """
258
+ self.init()
259
+ # Using list and set will yield the same results but using set we can isolate directories from files
260
+ return convert_to_folder_structure(set(obj.key for obj in self.bucket_objects))
261
+
262
+ def print_bucket_structure(self) -> None:
263
+ """Prints all the objects in an S3 bucket with a folder like representation."""
264
+ print(self.get_bucket_structure())
s3/utils.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import os
2
+ from typing import Dict, Set
2
3
 
3
4
 
4
5
  class UploadResults(dict):
@@ -28,3 +29,42 @@ def urljoin(*args) -> str:
28
29
  Joined url.
29
30
  """
30
31
  return "/".join(map(lambda x: str(x).rstrip("/").lstrip("/"), args))
32
+
33
+
34
+ def convert_to_folder_structure(sequence: Set[str]) -> str:
35
+ """Convert objects in a s3 buckets into a folder like representation.
36
+
37
+ Args:
38
+ sequence: Takes either a mutable or immutable sequence as an argument.
39
+
40
+ Returns:
41
+ str:
42
+ String representation of the architecture.
43
+ """
44
+ folder_structure = {}
45
+ for item in sequence:
46
+ parts = item.split("/")
47
+ current_level = folder_structure
48
+ for part in parts:
49
+ current_level = current_level.setdefault(part, {})
50
+
51
+ def generate_folder_structure(structure: Dict[str, dict], indent: str = "") -> str:
52
+ """Generates the folder like structure.
53
+
54
+ Args:
55
+ structure: Structure of folder objects as key-value pairs.
56
+ indent: Required indentation for the ASCII.
57
+ """
58
+ result = ""
59
+ for i, (key, value) in enumerate(structure.items()):
60
+ if i == len(structure) - 1:
61
+ result += indent + "└── " + key + "\n"
62
+ sub_indent = indent + " "
63
+ else:
64
+ result += indent + "├── " + key + "\n"
65
+ sub_indent = indent + "│ "
66
+ if value:
67
+ result += generate_folder_structure(value, sub_indent)
68
+ return result
69
+
70
+ return generate_folder_structure(folder_structure)
@@ -1,11 +0,0 @@
1
- s3/__init__.py,sha256=XgYHKbn7gc5_nzydIKmKVjigeMtOBLqRHKHb8GJi5M4,66
2
- s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
3
- s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
4
- s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
5
- s3/uploader.py,sha256=tQaelL7grZSWFydZOekQgVz4Fipm0PHzbt2J17ddYHs,8563
6
- s3/utils.py,sha256=pKVT2GbDGQKpFaHOmVrCfiQhvgr1vuSsITt_0gHguAA,687
7
- pys3uploader-0.1.1.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
8
- pys3uploader-0.1.1.dist-info/METADATA,sha256=sW_fsQxpoZ8f8ivI0Vb4oUXt1RSlFuHJDmpP9h_CXVU,7286
9
- pys3uploader-0.1.1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
10
- pys3uploader-0.1.1.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
11
- pys3uploader-0.1.1.dist-info/RECORD,,