my-aws-helpers 4.3.0__tar.gz → 6.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/PKG-INFO +1 -1
  2. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/my_aws_helpers/bedrock.py +125 -2
  3. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/my_aws_helpers/cognito.py +17 -0
  4. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/my_aws_helpers/dynamo.py +1 -1
  5. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/my_aws_helpers/s3.py +115 -4
  6. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/my_aws_helpers.egg-info/PKG-INFO +1 -1
  7. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/my_aws_helpers.egg-info/requires.txt +3 -3
  8. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/setup.py +2 -2
  9. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/MANIFEST.in +0 -0
  10. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/README.md +0 -0
  11. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/my_aws_helpers/api.py +0 -0
  12. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/my_aws_helpers/auth.py +0 -0
  13. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/my_aws_helpers/errors.py +0 -0
  14. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/my_aws_helpers/event.py +0 -0
  15. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/my_aws_helpers/logging.py +0 -0
  16. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/my_aws_helpers/prompts/__init__.py +0 -0
  17. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/my_aws_helpers/prompts/markdown_system_prompt.txt +0 -0
  18. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/my_aws_helpers/prompts/transactions_headers_prompt.txt +0 -0
  19. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/my_aws_helpers/prompts/transactions_headers_prompt_v2.txt +0 -0
  20. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/my_aws_helpers/prompts/transactions_prompt.txt +0 -0
  21. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/my_aws_helpers/sfn.py +0 -0
  22. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/my_aws_helpers.egg-info/SOURCES.txt +0 -0
  23. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/my_aws_helpers.egg-info/dependency_links.txt +0 -0
  24. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/my_aws_helpers.egg-info/top_level.txt +0 -0
  25. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/my_aws_helpers.egg-info/zip-safe +0 -0
  26. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/setup.cfg +0 -0
  27. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/tests/test_cognito.py +0 -0
  28. {my_aws_helpers-4.3.0 → my_aws_helpers-6.0.4}/tests/test_event.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: my_aws_helpers
3
- Version: 4.3.0
3
+ Version: 6.0.4
4
4
  Summary: AWS Helpers
5
5
  Home-page: https://github.com/JarrodMccarthy/aws_helpers.git
6
6
  Author: Jarrod McCarthy
@@ -5,11 +5,26 @@ import json
5
5
  import time
6
6
  import os
7
7
  import io
8
+ import re
9
+ from copy import copy
8
10
  from typing import Optional, List, Dict
9
11
  from enum import Enum
10
12
  import pymupdf
11
13
  import concurrent.futures
12
14
  from dataclasses import dataclass
15
+ from my_aws_helpers.s3 import S3Serialiser, BaseS3Object, BaseS3Queries, S3, S3Location
16
+
17
+ from my_aws_helpers.logging import select_powertools_logger
18
+
19
+
20
+ logger = select_powertools_logger("aws-helpers-bedrock")
21
+
22
+
23
+ class ImageType(str, Enum):
24
+ gif = "gif"
25
+ jpeg = "jpeg"
26
+ png = "png"
27
+ webp = "webp"
13
28
 
14
29
 
15
30
  class PromptType(str, Enum):
@@ -35,7 +50,7 @@ class TokenUsage:
35
50
 
36
51
 
37
52
  @dataclass
38
- class OCRResult:
53
+ class OCRResult(BaseS3Object):
39
54
  content: List[Dict[str, str]]
40
55
  token_usage: TokenUsage
41
56
  page_number: int
@@ -48,11 +63,69 @@ class OCRResult:
48
63
  page_number=data.get("page_number", 0),
49
64
  )
50
65
 
66
+ @classmethod
67
+ def from_s3_representation(cls, obj: dict) -> OCRResult:
68
+ obj["token_usage"] = (TokenUsage.from_dict(obj.get("token_usage", {})),)
69
+ return cls(**obj)
70
+
71
+ def to_s3_representation(self) -> dict:
72
+ obj = copy(vars(self))
73
+ obj["token_usage"] = S3Serialiser.object_serialiser(
74
+ obj=vars(obj["token_usage"])
75
+ )
76
+ return S3Serialiser.object_serialiser(obj=obj)
77
+
78
+ def get_save_location(self, bucket_name: str) -> S3Location:
79
+ pass
80
+
81
+
82
+ class OCRResultQueries(BaseS3Queries):
83
+ def __init__(self, s3_client: S3, bucket_name: str):
84
+ super().__init__(s3_client=s3_client, bucket_name=bucket_name)
85
+
86
+ def save_ocr_result_to_s3(
87
+ self, ocr_result: OCRResult, save_location: S3Location
88
+ ) -> Optional[S3Location]:
89
+ try:
90
+ obj = ocr_result.to_s3_representation()
91
+ return self.s3_client.save_dict_to_s3(
92
+ content=obj,
93
+ s3_location=save_location,
94
+ )
95
+ except Exception as e:
96
+ logger.exception(f"Failed to save ocr result to s3 due to {e}")
97
+ return None
98
+
99
+ def _concurrent_s3_read(
100
+ self, locations: List[S3Location], max_workers: int = 10
101
+ ) -> List[OCRResult]:
102
+ results: List[OCRResult] = list()
103
+ futures = list()
104
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
105
+ for loc in locations:
106
+ future = executor.submit(
107
+ self.s3_client.read_dict_from_s3,
108
+ s3_location=loc,
109
+ )
110
+ futures.append(future)
111
+ for f in futures:
112
+ results.append(f.result())
113
+ results = [r for r in results if r is not None]
114
+ return results
115
+
116
+ def get_ocr_results_by_key_prefix(self, prefix: str) -> List[OCRResult]:
117
+ locations = self.s3_client.list_objects_by_prefix(
118
+ bucket_name=self.bucket_name, prefix=prefix
119
+ )
120
+ objects = self._concurrent_s3_read(locations=locations)
121
+ ocr_results = [OCRResult.from_s3_representation(obj=obj) for obj in objects]
122
+ return ocr_results
123
+
51
124
 
52
125
  class Bedrock:
53
126
  def __init__(
54
127
  self,
55
- model_id: str = "apac.anthropic.claude-3-5-sonnet-20241022-v2:0",
128
+ model_id: str = "apac.anthropic.claude-3-5-sonnet-20241022-v2:0", # anthropic.claude-sonnet-4-20250514-v1:0
56
129
  logger=None,
57
130
  sleep_time: float = 1.0,
58
131
  ):
@@ -92,6 +165,19 @@ class Bedrock:
92
165
  print(e)
93
166
  return None
94
167
 
168
+ @staticmethod
169
+ def extract_json_from_markdown(text: str):
170
+ """
171
+ Extracts the JSON object from a string that may be wrapped in ```json ... ``` code block
172
+ """
173
+ # Match a {...} block anywhere in the text
174
+ match = re.search(r"\{.*\}", text, re.DOTALL)
175
+ if match:
176
+ json_str = match.group(0)
177
+ return json.loads(json_str)
178
+ else:
179
+ raise ValueError("No JSON object found in the text")
180
+
95
181
  def _get_prompt(self, prompt_type: str) -> Optional[str]:
96
182
  if prompt_type not in list(PromptType):
97
183
  raise Exception(f"Error: Invalid prompt type")
@@ -237,3 +323,40 @@ class Bedrock:
237
323
  except Exception as e:
238
324
  self.logger.exception(e)
239
325
  return []
326
+
327
+ def _get_image_block(self, image: bytes, image_content_type: ImageType) -> dict:
328
+ return {
329
+ "image": {
330
+ "format": image_content_type,
331
+ "source": {
332
+ "bytes": image,
333
+ },
334
+ }
335
+ }
336
+
337
+ def image_analysis(
338
+ self, images: List[bytes], prompt: str, image_content_type: ImageType
339
+ ) -> OCRResult:
340
+
341
+ system_prompt = [{"text": prompt}]
342
+ message = [
343
+ {
344
+ "role": "user",
345
+ "content": [
346
+ self._get_image_block(
347
+ image=image, image_content_type=image_content_type
348
+ )
349
+ for image in images
350
+ ],
351
+ }
352
+ ]
353
+ response = self.client.converse(
354
+ modelId=self.model_id, messages=message, system=system_prompt
355
+ )
356
+
357
+ result = {}
358
+ result["content"] = Bedrock.extract_json_from_markdown(
359
+ text=response["output"]["message"]["content"][0]["text"]
360
+ )
361
+ result["token_usage"] = response["usage"]
362
+ return OCRResult.from_dict(data=result)
@@ -69,6 +69,23 @@ class Cognito:
69
69
  logger.exception(f"Failed to sign up due to {e}")
70
70
  return None
71
71
 
72
+ def confirm_sign_up(
73
+ self,
74
+ username: str,
75
+ client_id: str,
76
+ confirmation_code: str,
77
+ ) -> dict:
78
+ try:
79
+ response = self.client.confirm_sign_up(
80
+ ClientId=client_id, Username=username, ConfirmationCode=confirmation_code,
81
+ )
82
+ return response
83
+ except Exception as e:
84
+ logger.exception(
85
+ f"Failed to confirm sign up username {username} due to {e}"
86
+ )
87
+ return None
88
+
72
89
  def admin_confirm_sign_up(
73
90
  self,
74
91
  username: str,
@@ -113,7 +113,7 @@ class Dynamo:
113
113
  return self.table.get_item(Item=item)
114
114
 
115
115
  def delete_item(self, item: dict):
116
- return self.table.delete_item(Item=item)
116
+ return self.table.delete_item(Key=item)
117
117
 
118
118
  def batch_put(self, items: List[dict]) -> None:
119
119
  with self.table.batch_writer() as batch:
@@ -1,7 +1,10 @@
1
+ from __future__ import annotations
1
2
  import boto3
2
3
  import io
3
4
  import json
4
- from typing import Optional, Any, Dict
5
+ from concurrent.futures import Future, ThreadPoolExecutor
6
+ from abc import ABC, abstractmethod
7
+ from typing import Optional, Any, Dict, List
5
8
  from datetime import datetime, date
6
9
  from copy import copy
7
10
  import os
@@ -31,16 +34,19 @@ class S3Serialiser:
31
34
  def _serialise(obj: Any):
32
35
  if isinstance(obj, datetime) or isinstance(obj, date):
33
36
  return obj.isoformat()
37
+ if isinstance(obj, S3Location):
38
+ return obj.location
34
39
  return obj
35
40
 
36
41
  @staticmethod
37
- def object_serialiser(obj: Any):
42
+ def object_serialiser(obj: Dict):
38
43
  if isinstance(obj, list):
39
44
  return [S3Serialiser.object_serialiser(obj=obj) for obj in obj]
40
45
  if isinstance(obj, dict):
41
46
  return {k: S3Serialiser.object_serialiser(v) for k, v in obj.items()}
42
47
  return S3Serialiser._serialise(obj=obj)
43
48
 
49
+
44
50
  class S3Location:
45
51
  bucket: str
46
52
  file_name: str
@@ -94,10 +100,63 @@ class S3:
94
100
  Body=json.dumps(object), Bucket=bucket_name, Key=file_name
95
101
  )
96
102
 
103
+ def list_objects_by_prefix(self, bucket_name: str, prefix: str) -> List[S3Location]:
104
+ """
105
+ list objects by prefix gets 1000 items at a time, if theres more, I want em
106
+ """
107
+ objects = list()
108
+ try:
109
+ continuation_token = None
110
+ while True:
111
+ if continuation_token:
112
+ response = self.client.list_objects_v2(
113
+ Bucket=bucket_name,
114
+ Prefix=prefix,
115
+ ContinuationToken=continuation_token,
116
+ )
117
+ else:
118
+ response = self.client.list_objects_v2(
119
+ Bucket=bucket_name, Prefix=prefix
120
+ )
121
+
122
+ # Append current batch
123
+ objects.extend(response.get("Contents", []))
124
+
125
+ # Check if more results exist
126
+ if response.get("IsTruncated"): # True if more pages available
127
+ continuation_token = response["NextContinuationToken"]
128
+ else:
129
+ break
130
+ locations = [
131
+ S3Location(bucket=bucket_name, file_name=c["Key"]) for c in objects
132
+ ]
133
+ return locations
134
+ except Exception as e:
135
+ logger.exception(
136
+ f"Failed to get objects from s3: {bucket_name}/{prefix} due to {e}"
137
+ )
138
+ return []
139
+
97
140
  def get_object(self, bucket_name: str, file_name: str):
98
141
  response = self.client.get_object(Bucket=bucket_name, Key=file_name)
99
142
  return self._streaming_body_to_dict(response["Body"])
100
143
 
144
+ def put_presigned_url(
145
+ self,
146
+ s3_location: S3Location,
147
+ expires_in: int = 3600,
148
+ content_type: str = ContentType.pdf_content.value
149
+ ) -> str:
150
+ return self.client.generate_presigned_url(
151
+ "put_object",
152
+ Params={
153
+ "Bucket": s3_location.bucket,
154
+ "Key": s3_location.file_name,
155
+ "ContentType": content_type,
156
+ },
157
+ ExpiresIn=expires_in,
158
+ )
159
+
101
160
  def get_presigned_url(
102
161
  self,
103
162
  bucket_name: str,
@@ -247,7 +306,59 @@ class S3:
247
306
  )
248
307
  except Exception as e:
249
308
  logger.exception(f"Failed to save jpeg to s3 due to {e}")
250
- return None
309
+ return None
251
310
 
252
311
  def read_dict_from_s3(self, s3_location: S3Location) -> dict:
253
- return json.loads(self.read_binary_from_s3(s3_location=s3_location).decode("utf-8"))
312
+ return json.loads(
313
+ self.read_binary_from_s3(s3_location=s3_location).decode("utf-8")
314
+ )
315
+
316
+
317
+ class BaseS3Object(ABC):
318
+ def to_s3_representation(self) -> dict:
319
+ obj = copy(vars(self))
320
+ return S3Serialiser.object_serialiser(obj=obj)
321
+
322
+ @classmethod
323
+ def from_s3_representation(cls, obj: dict) -> BaseS3Object:
324
+ return cls(**obj)
325
+
326
+ @abstractmethod
327
+ def get_save_location(self, bucket_name: str) -> S3Location:
328
+ pass
329
+
330
+
331
+ class BaseS3Queries:
332
+ s3_client: S3
333
+ bucket_name: str
334
+
335
+ def __init__(self, s3_client: S3, bucket_name: str):
336
+ self.s3_client = s3_client
337
+ self.bucket_name = bucket_name
338
+
339
+ def _concurrent_s3_dict_read(
340
+ self, locations: List[S3Location], max_workers: int = 10
341
+ ) -> List[BaseS3Object]:
342
+ results: List[BaseS3Object] = list()
343
+ futures: List[Future] = list()
344
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
345
+ for location in locations:
346
+ future = executor.submit(
347
+ self.s3_client.read_dict_from_s3,
348
+ s3_location=location,
349
+ )
350
+ futures.append(future)
351
+ for f in futures:
352
+ results.append(f.result())
353
+ results = [r for r in results if r is not None]
354
+ return results
355
+
356
+ def save_s3_object_to_s3(self, object: BaseS3Object) -> Optional[S3Location]:
357
+ try:
358
+ obj = object.to_s3_representation()
359
+ return self.s3_client.save_dict_to_s3(
360
+ content=obj, s3_location=object.get_save_location()
361
+ )
362
+ except Exception as e:
363
+ logger.exception(f"Failed to save s3 object to s3 due to {e}")
364
+ return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: my-aws-helpers
3
- Version: 4.3.0
3
+ Version: 6.0.4
4
4
  Summary: AWS Helpers
5
5
  Home-page: https://github.com/JarrodMccarthy/aws_helpers.git
6
6
  Author: Jarrod McCarthy
@@ -1,15 +1,15 @@
1
- boto3==1.34.36
1
+ boto3
2
2
  python-jose==3.5.0
3
3
 
4
4
  [all]
5
5
  PyMuPDF==1.26.0
6
6
  black<26.0.0,<=25.1.0
7
- boto3==1.34.36
7
+ boto3
8
8
  coverage==7.3.2
9
9
  pytest==7.4.3
10
10
  python-jose==3.5.0
11
11
 
12
12
  [bedrock]
13
13
  PyMuPDF==1.26.0
14
- boto3==1.34.36
14
+ boto3
15
15
  python-jose==3.5.0
@@ -3,10 +3,10 @@ from setuptools import find_namespace_packages, setup
3
3
 
4
4
  base_path = os.path.abspath(os.path.dirname(__file__))
5
5
 
6
- version = "4.3.0"
6
+ version = "6.0.4"
7
7
 
8
8
  core = [
9
- "boto3==1.34.36",
9
+ "boto3",
10
10
  "python-jose==3.5.0",
11
11
  ]
12
12
  dev = [
File without changes
File without changes