my-aws-helpers 2.6.4__tar.gz → 3.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of my-aws-helpers might be problematic. Click here for more details.

Files changed (27) hide show
  1. {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0}/PKG-INFO +1 -1
  2. {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0}/my_aws_helpers/bedrock.py +24 -4
  3. my_aws_helpers-3.0.0/my_aws_helpers/prompts/transactions_headers_prompt.txt +32 -0
  4. my_aws_helpers-3.0.0/my_aws_helpers/prompts/transactions_headers_prompt_v2.txt +55 -0
  5. my_aws_helpers-2.6.4/my_aws_helpers/prompts/json_system_prompt.txt → my_aws_helpers-3.0.0/my_aws_helpers/prompts/transactions_prompt.txt +4 -1
  6. {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0}/my_aws_helpers.egg-info/PKG-INFO +1 -1
  7. {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0}/my_aws_helpers.egg-info/SOURCES.txt +3 -1
  8. {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0}/setup.py +1 -1
  9. {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0}/MANIFEST.in +0 -0
  10. {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0}/README.md +0 -0
  11. {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0}/my_aws_helpers/api.py +0 -0
  12. {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0}/my_aws_helpers/auth.py +0 -0
  13. {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0}/my_aws_helpers/cognito.py +0 -0
  14. {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0}/my_aws_helpers/dynamo.py +0 -0
  15. {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0}/my_aws_helpers/errors.py +0 -0
  16. {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0}/my_aws_helpers/event.py +0 -0
  17. {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0}/my_aws_helpers/logging.py +0 -0
  18. {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0}/my_aws_helpers/prompts/__init__.py +0 -0
  19. {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0}/my_aws_helpers/prompts/markdown_system_prompt.txt +0 -0
  20. {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0}/my_aws_helpers/s3.py +0 -0
  21. {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0}/my_aws_helpers/sfn.py +0 -0
  22. {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0}/my_aws_helpers.egg-info/dependency_links.txt +0 -0
  23. {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0}/my_aws_helpers.egg-info/requires.txt +0 -0
  24. {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0}/my_aws_helpers.egg-info/top_level.txt +0 -0
  25. {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0}/my_aws_helpers.egg-info/zip-safe +0 -0
  26. {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0}/setup.cfg +0 -0
  27. {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0}/tests/test_event.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: my_aws_helpers
3
- Version: 2.6.4
3
+ Version: 3.0.0
4
4
  Summary: AWS Helpers
5
5
  Home-page: https://github.com/JarrodMccarthy/aws_helpers.git
6
6
  Author: Jarrod McCarthy
@@ -12,7 +12,9 @@ import concurrent.futures
12
12
  from dataclasses import dataclass
13
13
 
14
14
  class PromptType(str, Enum):
15
- json = "json_system_prompt.txt"
15
+ transaction_headers = "transactions_headers_prompt_v2.txt"
16
+ transactions = "transactions_prompt.txt"
17
+ # json = "json_system_prompt.txt"
16
18
  markdown = "markdown_system_prompt.txt"
17
19
 
18
20
 
@@ -49,7 +51,7 @@ class OCRResult:
49
51
  class Bedrock:
50
52
  def __init__(
51
53
  self,
52
- model_id: str = "apac.anthropic.claude-3-5-sonnet-20241022-v2:0",
54
+ model_id: str = "anthropic.claude-3-5-sonnet-20241022-v2:0",
53
55
  logger = None,
54
56
  ):
55
57
 
@@ -181,12 +183,30 @@ class Bedrock:
181
183
  except Exception as e:
182
184
  self.logger.error(f"Could not get pix map for page {i}")
183
185
  continue
184
- prompt = self._get_prompt(prompt_type=prompt_type)
186
+ header_ocr_result = None
187
+ if len(image_bytes_list) > 1:
188
+ headers_prompt = self._get_prompt(prompt_type=PromptType.transaction_headers.value)
189
+ for i in range(2):
190
+ # try to get headers from the first or second page
191
+ header_ocr_result = self._ocr(prompt=headers_prompt, image_bytes=image_bytes_list[i])
192
+ if header_ocr_result is None:
193
+ self.logger.info(f"No ocr result returned when getting headers {PromptType.transaction_headers.value}")
194
+ headers = header_ocr_result.content.get("headers")
195
+ if (len(headers) < 1) or (headers is None):
196
+ continue
197
+ else:
198
+ break
199
+
200
+ transactions_prompt = self._get_prompt(prompt_type=prompt_type)
201
+ if header_ocr_result:
202
+ transactions_prompt = transactions_prompt.replace("#### TABLE HEADERS ####", json.dumps(header_ocr_result.content))
203
+
185
204
  self.logger.info("Got Prompt")
186
205
  results = list()
206
+
187
207
  for i, image_bytes in enumerate(image_bytes_list):
188
208
  self.logger.info(f"Starting OCR for page: {i}")
189
- results.append(self._ocr(image_bytes=image_bytes, prompt=prompt))
209
+ results.append(self._ocr(image_bytes=image_bytes, prompt=transactions_prompt))
190
210
  return results
191
211
  except Exception as e:
192
212
  self.logger.exception(e)
@@ -0,0 +1,32 @@
1
+ You are an intelligent document and image interpreter.
2
+
3
+ Your task is to analyze the provided image and extract the headers of a table that contains transactions.
4
+
5
+ ## Output Requirements
6
+
7
+ - Return **only valid JSON**.
8
+ - If the image contains one or more **tables**, use only the transactions table represent the headers as a JSON object.
9
+ - **If there is no table containing transactions, return an empty array for the headers**
10
+ - **Do not fabricate** values not visible in the image
11
+ - Do not return anything except the json content
12
+
13
+ ## Example
14
+
15
+ If the image contains this table:
16
+
17
+ | Date | | Price |
18
+ | ------ |----------|-------|
19
+ | June 6 | desc 1 | $2.00 |
20
+ | June 5 | misc 2 | $1.70 |
21
+ | | item x | $1.50 |
22
+
23
+ Return:
24
+
25
+ ```json
26
+ {
27
+ "headers": [
28
+ { "date": "column 1"},
29
+ { "description": "column 2"},
30
+ { "price": "column 3"},
31
+ ]
32
+ }
@@ -0,0 +1,55 @@
1
+ You are an intelligent document and image interpreter.
2
+
3
+ Your task is to extract the headers of a table **only if it represents financial transactions**, such as those with dates, descriptions, amounts, or balances.
4
+
5
+ ## Output Format (JSON only)
6
+
7
+ Return a JSON object in the following structure:
8
+
9
+ {
10
+ "headers": [
11
+ { "header_name": "column number" }
12
+ ]
13
+ }
14
+
15
+ ## Rules
16
+ - DO NOT USE DETAILS sections, DO NOT use Account Summary, Do NOT use Account Details, DO NOT use payment details
17
+ - ONLY Use tables that contain transaction to acquire headers, if you cannot find a table that contains a list of transactions, return:
18
+ {
19
+ "headers": []
20
+ }
21
+ - Only return headers **if there is a table with transaction-like data** (e.g. purchases, sales, account activity).
22
+ - There will likely be MORE THAN 1 transaction in this table name, if that is not the case, return:
23
+ {
24
+ "headers": []
25
+ }
26
+ - If no such table exists, return:
27
+ {
28
+ "headers": []
29
+ }
30
+ - The output must be strictly valid JSON. Do not include markdown, comments, or extra text.
31
+ - Do not fabricate or infer column names. Only return headers that are clearly **visible** in the image.
32
+ - Do not guess. If unsure or incomplete, treat it as missing and return an empty array.
33
+
34
+ ## Examples
35
+
36
+ ✅ If the image contains a table with row headers like:
37
+ | Date | Description | Amount |
38
+ Return:
39
+ {
40
+ "headers": [
41
+ { "date": "column 1" },
42
+ { "description": "column 2" },
43
+ { "amount": "column 3" }
44
+ ]
45
+ }
46
+
47
+ ❌ If no table with transaction data is present, return:
48
+ {
49
+ "headers": []
50
+ }
51
+
52
+ Do not return anything else, if you are not sure, return:
53
+ {
54
+ "headers": []
55
+ }
@@ -7,10 +7,13 @@ Your task is to analyze the provided image and extract all meaningful data as st
7
7
  - Return **only valid JSON**.
8
8
  - If the image contains one or more **tables**, represent each row as a JSON object.
9
9
  - Use **the table headers as keys**.
10
+ - if the table headers are not present use:
11
+ #### TABLE HEADERS ####
12
+ - if i have just told you to use "TABLE HEADERS", ignore that instruction
10
13
  - if there are multiple sections, only include the row data in transactions
11
14
  - Every row should have the consistent headers
12
15
  - If content is unclear, use: `"[Unclear]"`.
13
- - **Do not fabricate** values not visible in the image.
16
+ - **Do not fabricate** values not visible in the image, if the image is empty, return an empty array for the transactions
14
17
  - if a descripion contains 2 lines of text, only include the most important text and the text should reside in 1 key in json response, do not split it
15
18
  - Do not return anything except the json content
16
19
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: my-aws-helpers
3
- Version: 2.6.4
3
+ Version: 3.0.0
4
4
  Summary: AWS Helpers
5
5
  Home-page: https://github.com/JarrodMccarthy/aws_helpers.git
6
6
  Author: Jarrod McCarthy
@@ -19,6 +19,8 @@ my_aws_helpers.egg-info/requires.txt
19
19
  my_aws_helpers.egg-info/top_level.txt
20
20
  my_aws_helpers.egg-info/zip-safe
21
21
  my_aws_helpers/prompts/__init__.py
22
- my_aws_helpers/prompts/json_system_prompt.txt
23
22
  my_aws_helpers/prompts/markdown_system_prompt.txt
23
+ my_aws_helpers/prompts/transactions_headers_prompt.txt
24
+ my_aws_helpers/prompts/transactions_headers_prompt_v2.txt
25
+ my_aws_helpers/prompts/transactions_prompt.txt
24
26
  tests/test_event.py
@@ -3,7 +3,7 @@ from setuptools import find_namespace_packages, setup
3
3
 
4
4
  base_path = os.path.abspath(os.path.dirname(__file__))
5
5
 
6
- version = "2.6.4"
6
+ version = "3.0.0"
7
7
 
8
8
  setup(
9
9
  name="my_aws_helpers",
File without changes
File without changes