my-aws-helpers 3.0.0.dev3__tar.gz → 3.0.0.dev5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of my-aws-helpers might be problematic. Click here for more details.

Files changed (27) hide show
  1. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/PKG-INFO +1 -1
  2. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/my_aws_helpers/bedrock.py +14 -6
  3. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/my_aws_helpers/prompts/transactions_headers_prompt.txt +2 -8
  4. my_aws_helpers-3.0.0.dev5/my_aws_helpers/prompts/transactions_headers_prompt_v2.txt +44 -0
  5. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/my_aws_helpers/prompts/transactions_prompt.txt +1 -0
  6. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/my_aws_helpers.egg-info/PKG-INFO +1 -1
  7. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/my_aws_helpers.egg-info/SOURCES.txt +1 -0
  8. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/setup.py +1 -1
  9. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/MANIFEST.in +0 -0
  10. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/README.md +0 -0
  11. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/my_aws_helpers/api.py +0 -0
  12. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/my_aws_helpers/auth.py +0 -0
  13. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/my_aws_helpers/cognito.py +0 -0
  14. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/my_aws_helpers/dynamo.py +0 -0
  15. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/my_aws_helpers/errors.py +0 -0
  16. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/my_aws_helpers/event.py +0 -0
  17. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/my_aws_helpers/logging.py +0 -0
  18. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/my_aws_helpers/prompts/__init__.py +0 -0
  19. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/my_aws_helpers/prompts/markdown_system_prompt.txt +0 -0
  20. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/my_aws_helpers/s3.py +0 -0
  21. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/my_aws_helpers/sfn.py +0 -0
  22. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/my_aws_helpers.egg-info/dependency_links.txt +0 -0
  23. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/my_aws_helpers.egg-info/requires.txt +0 -0
  24. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/my_aws_helpers.egg-info/top_level.txt +0 -0
  25. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/my_aws_helpers.egg-info/zip-safe +0 -0
  26. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/setup.cfg +0 -0
  27. {my_aws_helpers-3.0.0.dev3 → my_aws_helpers-3.0.0.dev5}/tests/test_event.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: my_aws_helpers
3
- Version: 3.0.0.dev3
3
+ Version: 3.0.0.dev5
4
4
  Summary: AWS Helpers
5
5
  Home-page: https://github.com/JarrodMccarthy/aws_helpers.git
6
6
  Author: Jarrod McCarthy
@@ -12,7 +12,7 @@ import concurrent.futures
12
12
  from dataclasses import dataclass
13
13
 
14
14
  class PromptType(str, Enum):
15
- transaction_headers = "transactions_headers_prompt.txt"
15
+ transaction_headers = "transactions_headers_prompt_v2.txt"
16
16
  transactions = "transactions_prompt.txt"
17
17
  # json = "json_system_prompt.txt"
18
18
  markdown = "markdown_system_prompt.txt"
@@ -183,15 +183,23 @@ class Bedrock:
183
183
  except Exception as e:
184
184
  self.logger.error(f"Could not get pix map for page {i}")
185
185
  continue
186
+ header_ocr_result = None
186
187
  if len(image_bytes_list) > 1:
187
188
  headers_prompt = self._get_prompt(prompt_type=PromptType.transaction_headers.value)
188
- ocr_result = self._ocr(prompt=headers_prompt, image_bytes=image_bytes_list[0])
189
- if ocr_result is None:
190
- self.logger.info(f"No ocr result returned when getting headers {PromptType.transaction_headers.value}")
191
- headers = ocr_result.content
189
+ for i in range(2):
190
+ # try to get headers from the first or second page
191
+ header_ocr_result = self._ocr(prompt=headers_prompt, image_bytes=image_bytes_list[i])
192
+ if header_ocr_result is None:
193
+ self.logger.info(f"No ocr result returned when getting headers {PromptType.transaction_headers.value}")
194
+ headers = header_ocr_result.content.get("headers")
195
+ if (len(headers) < 1) or (headers is None):
196
+ continue
197
+ else:
198
+ break
192
199
 
193
200
  transactions_prompt = self._get_prompt(prompt_type=prompt_type)
194
- transactions_prompt = transactions_prompt.replace("#### TABLE HEADERS ####", json.dumps(headers))
201
+ if header_ocr_result:
202
+ transactions_prompt = transactions_prompt.replace("#### TABLE HEADERS ####", json.dumps(header_ocr_result.content))
195
203
 
196
204
  self.logger.info("Got Prompt")
197
205
  results = list()
@@ -1,19 +1,13 @@
1
1
  You are an intelligent document and image interpreter.
2
2
 
3
- Your task is to analyze the provided image and extract transaction table headers.
3
+ Your task is to analyze the provided image and extract the headers of a table that contains transactions.
4
4
 
5
5
  ## Output Requirements
6
6
 
7
7
  - Return **only valid JSON**.
8
8
  - If the image contains one or more **tables**, use only the transactions table represent the headers as a JSON object.
9
- - Use **the table headers as keys**.
10
- - if the table headers are not present use:
11
- #### TABLE HEADERS ####
12
- - if i have just told you to use "TABLE HEADERS", ignore that instruction
13
- - if there are multiple sections, only include the row data in transactions
14
- - Every row should have the consistent headers
9
+ - **If there is no table containing transactions, return an empty array for the headers**
15
10
  - **Do not fabricate** values not visible in the image
16
- - if a descripion contains 2 lines of text, only include the most important text and the text should reside in 1 key in json response, do not split it
17
11
  - Do not return anything except the json content
18
12
 
19
13
  ## Example
@@ -0,0 +1,44 @@
1
+ You are an intelligent document and image interpreter.
2
+
3
+ Your task is to extract the headers of a table **only if it represents financial transactions**, such as those with dates, descriptions, amounts, or balances.
4
+
5
+ ## Output Format (JSON only)
6
+
7
+ Return a JSON object in the following structure:
8
+
9
+ {
10
+ "headers": [
11
+ { "header_name": "column number" }
12
+ ]
13
+ }
14
+
15
+ ## Rules
16
+
17
+ - Only return headers **if there is a table with transaction-like data** (e.g. purchases, sales, account activity).
18
+ - If no such table exists, return:
19
+ {
20
+ "headers": []
21
+ }
22
+ - The output must be strictly valid JSON. Do not include markdown, comments, or extra text.
23
+ - Do not fabricate or infer column names. Only return headers that are clearly **visible** in the image.
24
+ - Do not guess. If unsure or incomplete, treat it as missing and return an empty array.
25
+
26
+ ## Examples
27
+
28
+ ✅ If the image contains a table with rows like:
29
+ | Date | Description | Amount |
30
+ Return:
31
+ {
32
+ "headers": [
33
+ { "date": "column 1" },
34
+ { "description": "column 2" },
35
+ { "amount": "column 3" }
36
+ ]
37
+ }
38
+
39
+ ❌ If no table with transaction data is present, return:
40
+ {
41
+ "headers": []
42
+ }
43
+
44
+ Do not return anything else.
@@ -9,6 +9,7 @@ Your task is to analyze the provided image and extract all meaningful data as st
9
9
  - Use **the table headers as keys**.
10
10
  - if the table headers are not present use:
11
11
  #### TABLE HEADERS ####
12
+ - if i have just told you to use "TABLE HEADERS", ignore that instruction
12
13
  - if there are multiple sections, only include the row data in transactions
13
14
  - Every row should have the consistent headers
14
15
  - If content is unclear, use: `"[Unclear]"`.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: my-aws-helpers
3
- Version: 3.0.0.dev3
3
+ Version: 3.0.0.dev5
4
4
  Summary: AWS Helpers
5
5
  Home-page: https://github.com/JarrodMccarthy/aws_helpers.git
6
6
  Author: Jarrod McCarthy
@@ -21,5 +21,6 @@ my_aws_helpers.egg-info/zip-safe
21
21
  my_aws_helpers/prompts/__init__.py
22
22
  my_aws_helpers/prompts/markdown_system_prompt.txt
23
23
  my_aws_helpers/prompts/transactions_headers_prompt.txt
24
+ my_aws_helpers/prompts/transactions_headers_prompt_v2.txt
24
25
  my_aws_helpers/prompts/transactions_prompt.txt
25
26
  tests/test_event.py
@@ -3,7 +3,7 @@ from setuptools import find_namespace_packages, setup
3
3
 
4
4
  base_path = os.path.abspath(os.path.dirname(__file__))
5
5
 
6
- version = "3.0.0.dev3"
6
+ version = "3.0.0.dev5"
7
7
 
8
8
  setup(
9
9
  name="my_aws_helpers",