PyS3Uploader 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of PyS3Uploader might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: PyS3Uploader
3
- Version: 0.1.2
3
+ Version: 0.2.0
4
4
  Summary: Python module to upload objects to an S3 bucket.
5
5
  Author-email: Vignesh Rao <svignesh1793@gmail.com>
6
6
  License: MIT License
@@ -0,0 +1,11 @@
1
+ s3/__init__.py,sha256=yLvvl4-uTLZwhdhCMQpWq5juX_zFuYAfKSf4aB0WjZw,66
2
+ s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
3
+ s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
4
+ s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
5
+ s3/uploader.py,sha256=IAlFrEjfBuexrfmBPGN9OZAfHjQuwcGRzWi2es0r_fU,11154
6
+ s3/utils.py,sha256=0kcG0aE2olHhC8thaUEwx2J8tOI2-2TGCk6E6U-PiKw,2058
7
+ pys3uploader-0.2.0.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
8
+ pys3uploader-0.2.0.dist-info/METADATA,sha256=IXSmHXJJndlnd_6MHlpZrcVILPni8VUbVNJYQEjMIR8,7286
9
+ pys3uploader-0.2.0.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
10
+ pys3uploader-0.2.0.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
11
+ pys3uploader-0.2.0.dist-info/RECORD,,
s3/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  from s3.uploader import Uploader # noqa: F401
2
2
 
3
- version = "0.1.2"
3
+ version = "0.2.0"
s3/uploader.py CHANGED
@@ -29,6 +29,7 @@ class Uploader:
29
29
  upload_dir: str,
30
30
  s3_prefix: str = None,
31
31
  exclude_path: str = None,
32
+ overwrite: bool = False,
32
33
  region_name: str = None,
33
34
  profile_name: str = None,
34
35
  aws_access_key_id: str = None,
@@ -42,6 +43,7 @@ class Uploader:
42
43
  upload_dir: Full path of the directory to be uploaded.
43
44
  s3_prefix: Particular bucket prefix within which the upload should happen.
44
45
  exclude_path: Full directory path to exclude from S3 object prefix.
46
+ overwrite: Boolean flag to overwrite files in S3.
45
47
  region_name: Name of the AWS region.
46
48
  profile_name: AWS profile name.
47
49
  aws_access_key_id: AWS access key ID.
@@ -69,16 +71,24 @@ class Uploader:
69
71
  aws_secret_access_key=aws_secret_access_key or getenv("AWS_SECRET_ACCESS_KEY"),
70
72
  )
71
73
  self.s3 = self.session.resource(service_name="s3", config=self.RETRY_CONFIG)
74
+
72
75
  self.logger = logger or default_logger()
76
+
77
+ self.bucket_name = bucket_name
73
78
  self.upload_dir = upload_dir or getenv("UPLOAD_DIR", "UPLOAD_SOURCE")
74
79
  self.s3_prefix = s3_prefix
75
80
  self.exclude_path = exclude_path
76
- self.bucket_name = bucket_name
77
- # noinspection PyUnresolvedReferences
78
- self.bucket: boto3.resources.factory.s3.Bucket = None
81
+ self.overwrite = overwrite
82
+
79
83
  self.results = UploadResults()
80
84
  self.start = time.time()
81
85
 
86
+ # noinspection PyUnresolvedReferences
87
+ self.bucket: boto3.resources.factory.s3.Bucket = None
88
+ # noinspection PyUnresolvedReferences
89
+ self.bucket_objects: boto3.resources.factory.s3.ObjectSummary = []
90
+ self.object_size_map: Dict[str, int] = {}
91
+
82
92
  def init(self) -> None:
83
93
  """Instantiates the bucket instance.
84
94
 
@@ -106,6 +116,9 @@ class Uploader:
106
116
  self.upload_dir = os.path.abspath(self.upload_dir)
107
117
  # noinspection PyUnresolvedReferences
108
118
  self.bucket: boto3.resources.factory.s3.Bucket = self.s3.Bucket(self.bucket_name)
119
+ # noinspection PyUnresolvedReferences
120
+ self.bucket_objects: boto3.resources.factory.s3.ObjectSummary = [obj for obj in self.bucket.objects.all()]
121
+ self.object_size_map = {obj.key: obj.size for obj in self.bucket_objects}
109
122
 
110
123
  def exit(self) -> None:
111
124
  """Exits after printing results, and run time."""
@@ -115,21 +128,50 @@ class Uploader:
115
128
  )
116
129
  self.logger.info("Run Time: %.2fs", time.time() - self.start)
117
130
 
118
- def _uploader(self, objectpath: str, filepath: str) -> None:
131
+ def _proceed_to_upload(self, filepath: str, objectpath: str) -> bool:
132
+ """Compares file size if the object already exists in S3.
133
+
134
+ Args:
135
+ filepath: Source filepath.
136
+ objectpath: S3 object path.
137
+
138
+ Returns:
139
+ bool:
140
+ Returns a boolean flag to indicate upload flag.
141
+ """
142
+ if self.overwrite:
143
+ return True
144
+ # Indicates that the object path already exists in S3
145
+ if object_size := self.object_size_map.get(objectpath):
146
+ try:
147
+ file_size = os.path.getsize(filepath)
148
+ except (OSError, PermissionError) as error:
149
+ self.logger.error(error)
150
+ return True
151
+ if object_size == file_size:
152
+ self.logger.info("S3 object %s exists, and size [%d] matches, skipping..", objectpath, object_size)
153
+ return False
154
+ self.logger.info(
155
+ "S3 object %s exists, but size mismatch. Local: [%d], S3: [%d]", objectpath, file_size, object_size
156
+ )
157
+ return True
158
+
159
+ def _uploader(self, filepath: str, objectpath: str) -> None:
119
160
  """Uploads the filepath to the specified S3 bucket.
120
161
 
121
162
  Args:
122
- objectpath: Object path ref in S3.
123
163
  filepath: Filepath to upload.
164
+ objectpath: Object path ref in S3.
124
165
  """
125
- self.bucket.upload_file(filepath, objectpath)
166
+ if self._proceed_to_upload(filepath, objectpath):
167
+ self.bucket.upload_file(filepath, objectpath)
126
168
 
127
169
  def _get_files(self) -> Dict[str, str]:
128
170
  """Get a mapping for all the file path and object paths in upload directory.
129
171
 
130
172
  Returns:
131
173
  Dict[str, str]:
132
- Returns a dictionary object path and filepath.
174
+ Returns a key-value pair of filepath and objectpath.
133
175
  """
134
176
  files_to_upload = {}
135
177
  for __path, __directory, __files in os.walk(self.upload_dir):
@@ -149,7 +191,7 @@ class Uploader:
149
191
  url_parts.extend(relative_path.split(os.sep))
150
192
  # Remove falsy values using filter - "None", "bool", "len" or "lambda item: item"
151
193
  object_path = urljoin(*filter(None, url_parts))
152
- files_to_upload[object_path] = file_path
194
+ files_to_upload[file_path] = object_path
153
195
  return files_to_upload
154
196
 
155
197
  def run(self) -> None:
@@ -163,7 +205,7 @@ class Uploader:
163
205
  keys.items(), total=len(keys), unit="file", leave=True, desc=f"Uploading files from {self.upload_dir}"
164
206
  ):
165
207
  try:
166
- self._uploader(objectpath=objectpath, filepath=filepath)
208
+ self._uploader(filepath=filepath, objectpath=objectpath)
167
209
  self.results.success += 1
168
210
  except ClientError as error:
169
211
  self.logger.error(error)
@@ -187,7 +229,10 @@ class Uploader:
187
229
  max_workers,
188
230
  )
189
231
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
190
- futures = [executor.submit(self._uploader, *kv) for kv in keys.items()]
232
+ futures = [
233
+ executor.submit(self._uploader, **dict(filepath=filepath, objectpath=objectpath))
234
+ for filepath, objectpath in keys.items()
235
+ ]
191
236
  for future in tqdm(
192
237
  iterable=as_completed(futures),
193
238
  total=len(futures),
@@ -212,7 +257,7 @@ class Uploader:
212
257
  """
213
258
  self.init()
214
259
  # Using list and set will yield the same results but using set we can isolate directories from files
215
- return convert_to_folder_structure(set([obj.key for obj in self.bucket.objects.all()]))
260
+ return convert_to_folder_structure(set(obj.key for obj in self.bucket_objects))
216
261
 
217
262
  def print_bucket_structure(self) -> None:
218
263
  """Prints all the objects in an S3 bucket with a folder like representation."""
@@ -1,11 +0,0 @@
1
- s3/__init__.py,sha256=qSltnC7r3AjwiYWzsD9JUs8SzeBEV16nrHldiWlrxtY,66
2
- s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
3
- s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
4
- s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
5
- s3/uploader.py,sha256=kkv7d2EaMH3OsoIJgTx7yRUd00s0n9PbRbjj6Rm7qdA,9355
6
- s3/utils.py,sha256=0kcG0aE2olHhC8thaUEwx2J8tOI2-2TGCk6E6U-PiKw,2058
7
- pys3uploader-0.1.2.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
8
- pys3uploader-0.1.2.dist-info/METADATA,sha256=GtQq-ZDiZEMpl2CEs4VJw4AQ8tf5rzcfgjDu68oHX6c,7286
9
- pys3uploader-0.1.2.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
10
- pys3uploader-0.1.2.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
11
- pys3uploader-0.1.2.dist-info/RECORD,,