boto3-assist 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,606 @@
1
+ """
2
+ Geek Cafe, LLC
3
+ Maintainers: Eric Wilson
4
+ MIT License. See Project Root for the license information.
5
+ """
6
+
7
+ import os
8
+ import tempfile
9
+ import time
10
+ import io
11
+ from typing import Any, Dict, Optional, List
12
+
13
+ from aws_lambda_powertools import Logger
14
+ from botocore.exceptions import ClientError
15
+
16
+ from boto3_assist.errors.custom_exceptions import InvalidHttpMethod
17
+ from boto3_assist.s3.s3_connection import S3Connection
18
+ from boto3_assist.utilities.datetime_utility import DatetimeUtility
19
+ from boto3_assist.utilities.file_operations import FileOperations
20
+ from boto3_assist.utilities.http_utility import HttpUtility
21
+
22
+
23
+ logger = Logger(child=True)
24
+
25
+
26
+ class S3Object:
27
+ """S3 Object Actions"""
28
+
29
+ def __init__(self, connection: S3Connection):
30
+ self.connection = connection or S3Connection()
31
+
32
+ def delete(self, *, bucket_name: str, key: str) -> Dict[str, Any]:
33
+ """
34
+ Deletes an object key
35
+
36
+ Args:
37
+ bucket_name (str): The AWS Bucket Name
38
+ key (str): The Object Key
39
+ """
40
+ s3 = self.connection.client
41
+ # see if the object exists
42
+ try:
43
+ response = s3.head_object(Bucket=bucket_name, Key=key)
44
+ response = s3.delete_object(Bucket=bucket_name, Key=key)
45
+ except s3.exceptions.NoSuchKey:
46
+ response = {"ResponseMetadata": {"HTTPStatusCode": 404}}
47
+ except s3.exceptions.ClientError as e:
48
+ if e.response.get("Error", {}).get("Code") == "404":
49
+ response = {"ResponseMetadata": {"HTTPStatusCode": 404}}
50
+ else:
51
+ raise e
52
+
53
+ return dict(response)
54
+
55
+ def delete_all_versions(
56
+ self, *, bucket_name: str, key: str, include_deleted: bool = False
57
+ ) -> List[str]:
58
+ """
59
+ Deletes an object key and all the versions for that object key
60
+
61
+ Args:
62
+ bucket_name (str): The AWS Bucket Name
63
+ key (str): The Object Kuye
64
+ include_deleted (bool, optional): Should deleted files be removed as well.
65
+ If True it will look for the object keys with the deleted marker and remove it.
66
+ Defaults to False.
67
+ """
68
+ s3 = self.connection.client
69
+ paginator = s3.get_paginator("list_object_versions")
70
+ files: List[str] = []
71
+
72
+ for page in paginator.paginate(Bucket=bucket_name, Prefix=key):
73
+ # Delete object versions
74
+ if "Versions" in page:
75
+ for version in page["Versions"]:
76
+ s3.delete_object(
77
+ Bucket=bucket_name,
78
+ Key=version["Key"],
79
+ VersionId=version["VersionId"],
80
+ )
81
+
82
+ files.append(f"{version['Key']} - {version['VersionId']}")
83
+
84
+ if include_deleted:
85
+ # delete a previous files that may have just been a soft delete.
86
+ if "DeleteMarkers" in page:
87
+ for marker in page["DeleteMarkers"]:
88
+ s3.delete_object(
89
+ Bucket=bucket_name,
90
+ Key=marker["Key"],
91
+ VersionId=marker["VersionId"],
92
+ )
93
+
94
+ files.append(
95
+ f"{marker['Key']}:{marker['VersionId']}:delete-marker"
96
+ )
97
+ else:
98
+ response = self.delete(bucket_name=bucket_name, key=key)
99
+ if response["ResponseMetadata"]["HTTPStatusCode"] == 404:
100
+ return files
101
+
102
+ files.append(key)
103
+
104
+ return files
105
+
106
+ def generate_presigned_url(
107
+ self,
108
+ *,
109
+ bucket_name: str,
110
+ key_path: str,
111
+ file_name: str,
112
+ meta_data: Optional[dict] = None,
113
+ expiration: int = 3600,
114
+ method_type: str = "POST",
115
+ user_id: Optional[str] = None,
116
+ ) -> Dict[str, Any]:
117
+ """
118
+ Create a signed URL for uploading a file to S3.
119
+ :param bucket_name: The name of the S3 bucket.
120
+ :param user_id: The user ID of the user uploading the file.
121
+ :param file_name: The file name of the file being uploaded.
122
+ :param aws_profile: The name of the AWS profile to use.
123
+ :param aws_region: The name of the AWS region to use.
124
+ :param expiration: The number of seconds the URL is valid for.
125
+ :return: The signed URL.
126
+ """
127
+ start = DatetimeUtility.get_utc_now()
128
+ logger.debug(
129
+ f"Creating signed URL for bucket {bucket_name} for user {user_id} and file {file_name} at {start} UTC"
130
+ )
131
+
132
+ file_extension = FileOperations.get_file_extension(file_name)
133
+
134
+ local_meta = {
135
+ "user_id": f"{user_id}",
136
+ "file_name": f"{file_name}",
137
+ "extension": f"{file_extension}",
138
+ "method": "pre-signed-upload",
139
+ }
140
+
141
+ if not meta_data:
142
+ meta_data = local_meta
143
+ else:
144
+ meta_data.update(local_meta)
145
+
146
+ key = key_path
147
+ method_type = method_type.upper()
148
+
149
+ signed_url: str | Dict[str, Any]
150
+ if method_type == "PUT":
151
+ signed_url = self.connection.client.generate_presigned_url(
152
+ "put_object",
153
+ Params={
154
+ "Bucket": f"{bucket_name}",
155
+ "Key": f"{key}",
156
+ # NOTE: if you include the ContentType or Metadata then its required in the when they upload the file
157
+ # Otherwise you will get a `SignatureDoesNotMatch` error
158
+ # for now I'm commenting it out.
159
+ #'ContentType': 'application/octet-stream',
160
+ #'ACL': 'private',
161
+ # "Metadata": meta_data,
162
+ },
163
+ ExpiresIn=expiration, # URL is valid for x seconds
164
+ )
165
+ elif method_type == "POST":
166
+ signed_url = self.connection.client.generate_presigned_post(
167
+ bucket_name,
168
+ key,
169
+ ExpiresIn=expiration, # URL is valid for x seconds
170
+ )
171
+ elif method_type == "GET":
172
+ signed_url = self.connection.client.generate_presigned_url(
173
+ "get_object",
174
+ Params={
175
+ "Bucket": f"{bucket_name}",
176
+ "Key": f"{key}",
177
+ },
178
+ ExpiresIn=expiration, # URL is valid for x seconds
179
+ )
180
+ else:
181
+ raise InvalidHttpMethod(
182
+ f'Unknown method type was referenced. valid types are "PUT", "POST", "GET" , "{method_type}" as used '
183
+ )
184
+
185
+ end = DatetimeUtility.get_utc_now()
186
+ logger.debug(f"Signed URL created in {end-start}")
187
+
188
+ response = {
189
+ "signed_url": signed_url,
190
+ "key": key,
191
+ "meta_data": meta_data,
192
+ }
193
+
194
+ return response
195
+
196
+ def upload_file_obj(self, *, bucket: str, key: str, file_obj: bytes | str) -> str:
197
+ """
198
+ Uploads a file object to s3. Returns the full s3 path s3://<bucket>/<key>
199
+ """
200
+
201
+ if key.startswith("/"):
202
+ # remove the first slash
203
+ key = key[1:]
204
+
205
+ logger.debug(
206
+ {
207
+ "metric_filter": "upload_file_to_s3",
208
+ "bucket": bucket,
209
+ "key": key,
210
+ }
211
+ )
212
+ try:
213
+ # convert if necessary
214
+ file_obj: bytes = (
215
+ file_obj.encode("utf-8") if isinstance(file_obj, str) else file_obj
216
+ )
217
+ self.connection.client.upload_fileobj(
218
+ Fileobj=io.BytesIO(file_obj), Bucket=bucket, Key=key
219
+ )
220
+
221
+ except ClientError as ce:
222
+ error = {
223
+ "metric_filter": "upload_file_to_s3_failure",
224
+ "s3 upload": "failure",
225
+ "bucket": bucket,
226
+ "key": key,
227
+ }
228
+ logger.error(error)
229
+ raise RuntimeError(error) from ce
230
+
231
+ return f"s3://{bucket}/{key}"
232
+
233
+ def upload_file(
234
+ self,
235
+ *,
236
+ bucket: str,
237
+ key: str,
238
+ local_file_path: str,
239
+ throw_error_on_failure: bool = False,
240
+ ) -> str | None:
241
+ """
242
+ Uploads a file to s3. Returns the full s3 path s3://<bucket>/<key>
243
+ """
244
+
245
+ if key.startswith("/"):
246
+ # remove the first slash
247
+ key = key[1:]
248
+
249
+ # build the path
250
+ s3_path = f"s3://{bucket}/{key}"
251
+
252
+ logger.debug(
253
+ {
254
+ "metric_filter": "upload_file_to_s3",
255
+ "bucket": bucket,
256
+ "key": key,
257
+ "local_file_path": local_file_path,
258
+ }
259
+ )
260
+ try:
261
+ self.connection.client.upload_file(local_file_path, bucket, key)
262
+
263
+ except ClientError as ce:
264
+ error = {
265
+ "metric_filter": "upload_file_to_s3_failure",
266
+ "s3 upload": "failure",
267
+ "bucket": bucket,
268
+ "key": key,
269
+ "local_file_path": local_file_path,
270
+ }
271
+ logger.error(error)
272
+
273
+ if throw_error_on_failure:
274
+ raise RuntimeError(error) from ce
275
+
276
+ return None
277
+
278
+ return s3_path
279
+
280
+ def download_file(
281
+ self,
282
+ *,
283
+ bucket: str,
284
+ key: str,
285
+ local_directory: str | None = None,
286
+ local_file_path: str | None = None,
287
+ retry_attempts: int = 3,
288
+ retry_sleep: int = 5,
289
+ ) -> str:
290
+ """Download a file from s3"""
291
+ exception: Exception | None = None
292
+
293
+ if retry_attempts == 0:
294
+ retry_attempts = 1
295
+
296
+ for i in range(retry_attempts):
297
+ exception = None
298
+ try:
299
+ path = self.download_file_no_retries(
300
+ bucket=bucket,
301
+ key=key,
302
+ local_directory=local_directory,
303
+ local_file_path=local_file_path,
304
+ )
305
+ if path and os.path.exists(path):
306
+ return path
307
+
308
+ except Exception as e: # pylint: disable=w0718
309
+ logger.warning(
310
+ {
311
+ "action": "download_file",
312
+ "result": "failure",
313
+ "exception": str(e),
314
+ "attempt": i + 1,
315
+ "retry_attempts": retry_attempts,
316
+ }
317
+ )
318
+
319
+ exception = e
320
+
321
+ # sleep for a bit
322
+ attempt = i + 1
323
+ time.sleep(attempt * retry_sleep)
324
+
325
+ if exception:
326
+ logger.exception(
327
+ {
328
+ "action": "download_file",
329
+ "result": "failure",
330
+ "exception": str(exception),
331
+ "retry_attempts": retry_attempts,
332
+ }
333
+ )
334
+
335
+ raise exception from exception
336
+
337
+ raise RuntimeError("Unable to download file")
338
+
339
+ def download_file_no_retries(
340
+ self,
341
+ bucket: str,
342
+ key: str,
343
+ local_directory: str | None = None,
344
+ local_file_path: str | None = None,
345
+ ) -> str:
346
+ """
347
+ Downloads a file from s3
348
+
349
+ Args:
350
+ bucket (str): s3 bucket
351
+ key (str): the s3 object key
352
+ local_directory (str, optional): Local directory to download to. Defaults to None.
353
+ If None, we'll use a local tmp directory.
354
+
355
+ Raises:
356
+ e:
357
+
358
+ Returns:
359
+ str: Path to the downloaded file.
360
+ """
361
+
362
+ decoded_object_key: str
363
+ try:
364
+ logger.debug(
365
+ {
366
+ "action": "downloading file",
367
+ "bucket": bucket,
368
+ "key": key,
369
+ "local_directory": local_directory,
370
+ }
371
+ )
372
+ return self.__download_file(bucket, key, local_directory, local_file_path)
373
+ except FileNotFoundError:
374
+ logger.warning(
375
+ {
376
+ "metric_filter": "download_file_error",
377
+ "error": "FileNotFoundError",
378
+ "message": "attempting to find it decoded",
379
+ "bucket": bucket,
380
+ "key": key,
381
+ }
382
+ )
383
+
384
+ # attempt to decode the key
385
+ decoded_object_key = HttpUtility.decode_url(key)
386
+
387
+ logger.error(
388
+ {
389
+ "metric_filter": "download_file_error",
390
+ "error": "FileNotFoundError",
391
+ "message": "attempting to find it decoded",
392
+ "bucket": bucket,
393
+ "key": key,
394
+ "decoded_object_key": decoded_object_key,
395
+ }
396
+ )
397
+
398
+ return self.__download_file(bucket, decoded_object_key, local_directory)
399
+
400
+ except Exception as e:
401
+ logger.error(
402
+ {
403
+ "metric_filter": "download_file_error",
404
+ "error": str(e),
405
+ "bucket": bucket,
406
+ "decoded_object_key": decoded_object_key,
407
+ }
408
+ )
409
+ raise e
410
+
411
+ def stream_file(self, bucket_name: str, key: str) -> Dict[str, Any]:
412
+ """
413
+ Gets a file from s3 and returns the response.
414
+ The "Body" is a streaming body object. You can read it like a file.
415
+ For example:
416
+
417
+ with response["Body"] as f:
418
+ data = f.read()
419
+ print(data)
420
+
421
+ """
422
+
423
+ logger.debug(
424
+ {
425
+ "source": "download_file",
426
+ "action": "downloading a file from s3",
427
+ "bucket": bucket_name,
428
+ "key": key,
429
+ }
430
+ )
431
+
432
+ response: Dict[str, Any] = {}
433
+ error = None
434
+
435
+ try:
436
+ response = dict(
437
+ self.connection.client.get_object(Bucket=bucket_name, Key=key)
438
+ )
439
+
440
+ logger.debug(
441
+ {"metric_filter": "s3_download_response", "response": str(response)}
442
+ )
443
+
444
+ except Exception as e: # pylint: disable=W0718
445
+ error = str(e)
446
+ logger.error({"metric_filter": "s3_download_error", "error": str(e)})
447
+ raise RuntimeError(
448
+ {
449
+ "metric_filter": "s3_download_error",
450
+ "error": str(e),
451
+ "bucket": bucket_name,
452
+ "key": key,
453
+ }
454
+ ) from e
455
+
456
+ finally:
457
+ logger.debug(
458
+ {
459
+ "source": "download_file",
460
+ "action": "downloading a file from s3",
461
+ "bucket": bucket_name,
462
+ "key": key,
463
+ "response": response,
464
+ "errors": error,
465
+ }
466
+ )
467
+
468
+ return response
469
+
470
+ def __download_file(
471
+ self,
472
+ bucket: str,
473
+ key: str,
474
+ local_directory: str | None = None,
475
+ local_file_path: str | None = None,
476
+ ):
477
+ if local_directory and local_file_path:
478
+ raise ValueError(
479
+ "Only one of local_directory or local_file_path can be provided"
480
+ )
481
+
482
+ if local_directory and not os.path.exists(local_directory):
483
+ FileOperations.makedirs(local_directory)
484
+
485
+ if local_file_path and not os.path.exists(os.path.dirname(local_file_path)):
486
+ FileOperations.makedirs(os.path.dirname(local_file_path))
487
+
488
+ file_name = self.__get_file_name_from_path(key)
489
+ if local_directory is None and local_file_path is None:
490
+ local_path = self.get_local_path_for_file(file_name)
491
+ elif local_directory:
492
+ local_path = os.path.join(local_directory, file_name)
493
+ else:
494
+ local_path = local_file_path
495
+
496
+ logger.debug(
497
+ {
498
+ "source": "download_file",
499
+ "action": "downloading a file from s3",
500
+ "bucket": bucket,
501
+ "key": key,
502
+ "file_name": file_name,
503
+ "local_path": local_path,
504
+ }
505
+ )
506
+
507
+ error: str | None = None
508
+ try:
509
+ self.connection.client.download_file(bucket, key, local_path)
510
+
511
+ except Exception as e: # pylint: disable=W0718
512
+ error = str(e)
513
+ logger.error({"metric_filter": "s3_download_error", "error": str(e)})
514
+
515
+ file_exist = os.path.exists(local_path)
516
+
517
+ logger.debug(
518
+ {
519
+ "source": "download_file",
520
+ "action": "downloading a file from s3",
521
+ "bucket": bucket,
522
+ "key": key,
523
+ "file_name": file_name,
524
+ "local_path": local_path,
525
+ "file_downloaded": file_exist,
526
+ "errors": error,
527
+ }
528
+ )
529
+
530
+ if not file_exist:
531
+ raise FileNotFoundError("File Failed to download (does not exist) from S3.")
532
+
533
+ return local_path
534
+
535
+ def __get_file_name_from_path(self, path: str) -> str:
536
+ """
537
+ Get a file name from the path
538
+
539
+ Args:
540
+ path (str): a file path
541
+
542
+ Returns:
543
+ str: the file name
544
+ """
545
+ return path.rsplit("/")[-1]
546
+
547
+ def get_local_path_for_file(self, file_name: str):
548
+ """
549
+ Get a local temp location for a file.
550
+ This is designed to work with lambda functions.
551
+ The /tmp directory is the only writeable location for lambda functions.
552
+ """
553
+ temp_dir = self.get_temp_directory()
554
+ # use /tmp it's the only writeable location for lambda
555
+ local_path = os.path.join(temp_dir, file_name)
556
+ return local_path
557
+
558
+ def get_temp_directory(self):
559
+ """
560
+ Determines the appropriate temporary directory based on the environment.
561
+ If running in AWS Lambda, returns '/tmp'.
562
+ Otherwise, returns the system's standard temp directory.
563
+ """
564
+ if "AWS_LAMBDA_FUNCTION_NAME" in os.environ:
565
+ # In AWS Lambda environment
566
+ return "/tmp"
567
+ else:
568
+ # Not in AWS Lambda, use the system's default temp directory
569
+ return tempfile.gettempdir()
570
+
571
+ def encode(
572
+ self, text: str, encoding: str = "utf-8", errors: str = "strict"
573
+ ) -> bytes:
574
+ """
575
+ Encodes a string for s3
576
+ """
577
+ return text.encode(encoding=encoding, errors=errors)
578
+
579
+ def decode(
580
+ self, file_obj: bytes, encoding: str = "utf-8", errors: str = "strict"
581
+ ) -> str:
582
+ """
583
+ Decodes bytes to a string
584
+ """
585
+ return file_obj.decode(encoding=encoding, errors=errors)
586
+
587
+ def list_versions(self, bucket: str, prefix: str = "") -> List[str]:
588
+ """
589
+ List all versions of objects in an S3 bucket with a given prefix.
590
+
591
+ Args:
592
+ bucket (str): The name of the S3 bucket.
593
+ prefix (str, optional): The prefix to filter objects by. Defaults to "".
594
+
595
+ Returns:
596
+ list: A list of dictionaries containing information about each object version.
597
+ """
598
+ versions = []
599
+ paginator = self.connection.client.get_paginator("list_object_versions")
600
+ page_iterator = paginator.paginate(Bucket=bucket, Prefix=prefix)
601
+
602
+ for page in page_iterator:
603
+ if "Versions" in page:
604
+ versions.extend(page["Versions"])
605
+
606
+ return versions