my-aws-helpers 3.0.0.dev7__tar.gz → 3.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of my-aws-helpers might be problematic. Click here for more details.

Files changed (27) hide show
  1. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/PKG-INFO +1 -1
  2. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/my_aws_helpers/bedrock.py +23 -20
  3. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/my_aws_helpers/prompts/transactions_headers_prompt_v2.txt +5 -1
  4. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/my_aws_helpers.egg-info/PKG-INFO +1 -1
  5. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/setup.py +1 -1
  6. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/MANIFEST.in +0 -0
  7. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/README.md +0 -0
  8. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/my_aws_helpers/api.py +0 -0
  9. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/my_aws_helpers/auth.py +0 -0
  10. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/my_aws_helpers/cognito.py +0 -0
  11. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/my_aws_helpers/dynamo.py +0 -0
  12. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/my_aws_helpers/errors.py +0 -0
  13. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/my_aws_helpers/event.py +0 -0
  14. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/my_aws_helpers/logging.py +0 -0
  15. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/my_aws_helpers/prompts/__init__.py +0 -0
  16. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/my_aws_helpers/prompts/markdown_system_prompt.txt +0 -0
  17. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/my_aws_helpers/prompts/transactions_headers_prompt.txt +0 -0
  18. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/my_aws_helpers/prompts/transactions_prompt.txt +0 -0
  19. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/my_aws_helpers/s3.py +0 -0
  20. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/my_aws_helpers/sfn.py +0 -0
  21. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/my_aws_helpers.egg-info/SOURCES.txt +0 -0
  22. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/my_aws_helpers.egg-info/dependency_links.txt +0 -0
  23. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/my_aws_helpers.egg-info/requires.txt +0 -0
  24. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/my_aws_helpers.egg-info/top_level.txt +0 -0
  25. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/my_aws_helpers.egg-info/zip-safe +0 -0
  26. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/setup.cfg +0 -0
  27. {my_aws_helpers-3.0.0.dev7 → my_aws_helpers-3.1.0}/tests/test_event.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: my_aws_helpers
3
- Version: 3.0.0.dev7
3
+ Version: 3.1.0
4
4
  Summary: AWS Helpers
5
5
  Home-page: https://github.com/JarrodMccarthy/aws_helpers.git
6
6
  Author: Jarrod McCarthy
@@ -51,8 +51,9 @@ class OCRResult:
51
51
  class Bedrock:
52
52
  def __init__(
53
53
  self,
54
- model_id: str = "anthropic.claude-3-5-sonnet-20241022-v2:0",
54
+ model_id: str = "apac.anthropic.claude-3-5-sonnet-20241022-v2:0",
55
55
  logger = None,
56
+ sleep_time: float = 1.0,
56
57
  ):
57
58
 
58
59
  self.session = Bedrock._set_session_params()
@@ -60,7 +61,7 @@ class Bedrock:
60
61
  region_name = "ap-southeast-2"
61
62
  if self.session is None:
62
63
  self.session = boto3.Session(region_name = region_name)
63
-
64
+ self.sleep_time = sleep_time
64
65
 
65
66
  custom_config = Config(
66
67
  retries={
@@ -133,6 +134,7 @@ class Bedrock:
133
134
  except Exception as e:
134
135
  self.logger.exception(f"Error during conversation due to {e}")
135
136
  if i >= len(retries) - 1: raise Exception(e)
137
+ time.sleep(self.sleep_time)
136
138
  continue
137
139
 
138
140
  result = {}
@@ -144,24 +146,19 @@ class Bedrock:
144
146
  def _parallel_ocr(
145
147
  self,
146
148
  image_bytes_list: List[bytes],
147
- prompt_type: str,
149
+ prompt: str,
148
150
  max_workers: int = 10,
149
151
  ):
150
- results = list()
151
- prompt = self._get_prompt(prompt_type=prompt_type)
152
+ execution_futures = []
152
153
  with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
153
- self.logger.info("Some log")
154
- execution_futures = {
155
- executor.submit(
156
- self._ocr,
157
- prompt = prompt,
158
- image_bytes = img,
159
- ): img for img in image_bytes_list
160
- }
161
- for future in concurrent.futures.as_completed(execution_futures):
162
- result = future.result()
163
- if result:
164
- results.append(result)
154
+ for i, img in enumerate(image_bytes_list):
155
+ self.logger.info(f"Starting OCR for page: {i}")
156
+ time.sleep(self.sleep_time) # Stagger start time
157
+ future = executor.submit(self._ocr, prompt=prompt, image_bytes=img)
158
+ execution_futures.append(future)
159
+
160
+ # Wait for all tasks and collect results in order of submission
161
+ results = [future.result() for future in execution_futures if future.result() is not None]
165
162
  return results
166
163
 
167
164
  def get_ocr_result(
@@ -183,6 +180,7 @@ class Bedrock:
183
180
  except Exception as e:
184
181
  self.logger.error(f"Could not get pix map for page {i}")
185
182
  continue
183
+ skip_page_zero = False
186
184
  header_ocr_result = None
187
185
  if len(image_bytes_list) > 1:
188
186
  headers_prompt = self._get_prompt(prompt_type=PromptType.transaction_headers.value)
@@ -193,6 +191,7 @@ class Bedrock:
193
191
  self.logger.info(f"No ocr result returned when getting headers {PromptType.transaction_headers.value}")
194
192
  headers = header_ocr_result.content.get("headers")
195
193
  if (len(headers) < 1) or (headers is None):
194
+ skip_page_zero = True
196
195
  continue
197
196
  else:
198
197
  break
@@ -204,9 +203,13 @@ class Bedrock:
204
203
  self.logger.info("Got Prompt")
205
204
  results = list()
206
205
 
207
- for i, image_bytes in enumerate(image_bytes_list):
208
- self.logger.info(f"Starting OCR for page: {i}")
209
- results.append(self._ocr(image_bytes=image_bytes, prompt=transactions_prompt))
206
+ if skip_page_zero:
207
+ image_bytes_list = image_bytes_list[1:] #page zero often has account summary info
208
+ results = self._parallel_ocr(image_bytes_list=image_bytes_list, prompt=transactions_prompt)
209
+
210
+ # for i, image_bytes in enumerate(image_bytes_list):
211
+ # self.logger.info(f"Starting OCR for page: {i}")
212
+ # results.append(self._ocr(image_bytes=image_bytes, prompt=transactions_prompt))
210
213
  return results
211
214
  except Exception as e:
212
215
  self.logger.exception(e)
@@ -13,7 +13,11 @@ Return a JSON object in the following structure:
13
13
  }
14
14
 
15
15
  ## Rules
16
-
16
+ - DO NOT USE DETAILS sections, DO NOT use Account Summary, Do NOT use Account Details, DO NOT use payment details
17
+ - ONLY Use tables that contain transaction to acquire headers, if you cannot find a table that contains a list of transactions, return:
18
+ {
19
+ "headers": []
20
+ }
17
21
  - Only return headers **if there is a table with transaction-like data** (e.g. purchases, sales, account activity).
18
22
  - There will likely be MORE THAN 1 transaction in this table name, if that is not the case, return:
19
23
  {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: my-aws-helpers
3
- Version: 3.0.0.dev7
3
+ Version: 3.1.0
4
4
  Summary: AWS Helpers
5
5
  Home-page: https://github.com/JarrodMccarthy/aws_helpers.git
6
6
  Author: Jarrod McCarthy
@@ -3,7 +3,7 @@ from setuptools import find_namespace_packages, setup
3
3
 
4
4
  base_path = os.path.abspath(os.path.dirname(__file__))
5
5
 
6
- version = "3.0.0.dev7"
6
+ version = "3.1.0"
7
7
 
8
8
  setup(
9
9
  name="my_aws_helpers",