my-aws-helpers 2.6.5__tar.gz → 3.0.0.dev2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of my-aws-helpers might be problematic. Click here for more details.

Files changed (26) hide show
  1. {my_aws_helpers-2.6.5 → my_aws_helpers-3.0.0.dev2}/PKG-INFO +1 -1
  2. {my_aws_helpers-2.6.5 → my_aws_helpers-3.0.0.dev2}/my_aws_helpers/bedrock.py +15 -3
  3. my_aws_helpers-3.0.0.dev2/my_aws_helpers/prompts/transactions_headers_prompt.txt +38 -0
  4. my_aws_helpers-2.6.5/my_aws_helpers/prompts/json_system_prompt.txt → my_aws_helpers-3.0.0.dev2/my_aws_helpers/prompts/transactions_prompt.txt +3 -1
  5. {my_aws_helpers-2.6.5 → my_aws_helpers-3.0.0.dev2}/my_aws_helpers.egg-info/PKG-INFO +1 -1
  6. {my_aws_helpers-2.6.5 → my_aws_helpers-3.0.0.dev2}/my_aws_helpers.egg-info/SOURCES.txt +2 -1
  7. {my_aws_helpers-2.6.5 → my_aws_helpers-3.0.0.dev2}/setup.py +1 -1
  8. {my_aws_helpers-2.6.5 → my_aws_helpers-3.0.0.dev2}/MANIFEST.in +0 -0
  9. {my_aws_helpers-2.6.5 → my_aws_helpers-3.0.0.dev2}/README.md +0 -0
  10. {my_aws_helpers-2.6.5 → my_aws_helpers-3.0.0.dev2}/my_aws_helpers/api.py +0 -0
  11. {my_aws_helpers-2.6.5 → my_aws_helpers-3.0.0.dev2}/my_aws_helpers/auth.py +0 -0
  12. {my_aws_helpers-2.6.5 → my_aws_helpers-3.0.0.dev2}/my_aws_helpers/cognito.py +0 -0
  13. {my_aws_helpers-2.6.5 → my_aws_helpers-3.0.0.dev2}/my_aws_helpers/dynamo.py +0 -0
  14. {my_aws_helpers-2.6.5 → my_aws_helpers-3.0.0.dev2}/my_aws_helpers/errors.py +0 -0
  15. {my_aws_helpers-2.6.5 → my_aws_helpers-3.0.0.dev2}/my_aws_helpers/event.py +0 -0
  16. {my_aws_helpers-2.6.5 → my_aws_helpers-3.0.0.dev2}/my_aws_helpers/logging.py +0 -0
  17. {my_aws_helpers-2.6.5 → my_aws_helpers-3.0.0.dev2}/my_aws_helpers/prompts/__init__.py +0 -0
  18. {my_aws_helpers-2.6.5 → my_aws_helpers-3.0.0.dev2}/my_aws_helpers/prompts/markdown_system_prompt.txt +0 -0
  19. {my_aws_helpers-2.6.5 → my_aws_helpers-3.0.0.dev2}/my_aws_helpers/s3.py +0 -0
  20. {my_aws_helpers-2.6.5 → my_aws_helpers-3.0.0.dev2}/my_aws_helpers/sfn.py +0 -0
  21. {my_aws_helpers-2.6.5 → my_aws_helpers-3.0.0.dev2}/my_aws_helpers.egg-info/dependency_links.txt +0 -0
  22. {my_aws_helpers-2.6.5 → my_aws_helpers-3.0.0.dev2}/my_aws_helpers.egg-info/requires.txt +0 -0
  23. {my_aws_helpers-2.6.5 → my_aws_helpers-3.0.0.dev2}/my_aws_helpers.egg-info/top_level.txt +0 -0
  24. {my_aws_helpers-2.6.5 → my_aws_helpers-3.0.0.dev2}/my_aws_helpers.egg-info/zip-safe +0 -0
  25. {my_aws_helpers-2.6.5 → my_aws_helpers-3.0.0.dev2}/setup.cfg +0 -0
  26. {my_aws_helpers-2.6.5 → my_aws_helpers-3.0.0.dev2}/tests/test_event.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: my_aws_helpers
3
- Version: 2.6.5
3
+ Version: 3.0.0.dev2
4
4
  Summary: AWS Helpers
5
5
  Home-page: https://github.com/JarrodMccarthy/aws_helpers.git
6
6
  Author: Jarrod McCarthy
@@ -12,7 +12,9 @@ import concurrent.futures
12
12
  from dataclasses import dataclass
13
13
 
14
14
  class PromptType(str, Enum):
15
- json = "json_system_prompt.txt"
15
+ transaction_headers = "transactions_headers_prompt.txt"
16
+ transactions = "transactions_prompt.txt"
17
+ # json = "json_system_prompt.txt"
16
18
  markdown = "markdown_system_prompt.txt"
17
19
 
18
20
 
@@ -181,12 +183,22 @@ class Bedrock:
181
183
  except Exception as e:
182
184
  self.logger.error(f"Could not get pix map for page {i}")
183
185
  continue
184
- prompt = self._get_prompt(prompt_type=prompt_type)
186
+ if len(image_bytes_list) > 1:
187
+ headers_prompt = self._get_prompt(prompt_type=PromptType.transaction_headers.value)
188
+ ocr_result = self._ocr(prompt=headers_prompt, image_bytes=image_bytes_list[0])
189
+ if ocr_result is None:
190
+ self.logger.info(f"No ocr result returned when getting headers {PromptType.transaction_headers.value}")
191
+ headers = ocr_result.content
192
+
193
+ transactions_prompt = self._get_prompt(prompt_type=prompt_type)
194
+ transactions_prompt.replace("#### TABLE HEADERS ####", json.dumps(headers))
195
+
185
196
  self.logger.info("Got Prompt")
186
197
  results = list()
198
+
187
199
  for i, image_bytes in enumerate(image_bytes_list):
188
200
  self.logger.info(f"Starting OCR for page: {i}")
189
- results.append(self._ocr(image_bytes=image_bytes, prompt=prompt))
201
+ results.append(self._ocr(image_bytes=image_bytes, prompt=transactions_prompt))
190
202
  return results
191
203
  except Exception as e:
192
204
  self.logger.exception(e)
@@ -0,0 +1,38 @@
1
+ You are an intelligent document and image interpreter.
2
+
3
+ Your task is to analyze the provided image and extract transaction table headers.
4
+
5
+ ## Output Requirements
6
+
7
+ - Return **only valid JSON**.
8
+ - If the image contains one or more **tables**, use only the transactions table represent the headers as a JSON object.
9
+ - Use **the table headers as keys**.
10
+ - if the table headers are not present use:
11
+ #### TABLE HEADERS ####
12
+ - if i have just told you to use "TABLE HEADERS", ignore that instruction
13
+ - if there are multiple sections, only include the row data in transactions
14
+ - Every row should have the consistent headers
15
+ - **Do not fabricate** values not visible in the image
16
+ - if a descripion contains 2 lines of text, only include the most important text and the text should reside in 1 key in json response, do not split it
17
+ - Do not return anything except the json content
18
+
19
+ ## Example
20
+
21
+ If the image contains this table:
22
+
23
+ | Date | | Price |
24
+ | ------ |----------|-------|
25
+ | June 6 | desc 1 | $2.00 |
26
+ | June 5 | misc 2 | $1.70 |
27
+ | | item x | $1.50 |
28
+
29
+ Return:
30
+
31
+ ```json
32
+ {
33
+ "headers": [
34
+ { "date": "column 1"},
35
+ { "description": "column 2"},
36
+ { "price": "column 3"},
37
+ ]
38
+ }
@@ -7,10 +7,12 @@ Your task is to analyze the provided image and extract all meaningful data as st
7
7
  - Return **only valid JSON**.
8
8
  - If the image contains one or more **tables**, represent each row as a JSON object.
9
9
  - Use **the table headers as keys**.
10
+ - if the table headers are not present use:
11
+ #### TABLE HEADERS ####
10
12
  - if there are multiple sections, only include the row data in transactions
11
13
  - Every row should have the consistent headers
12
14
  - If content is unclear, use: `"[Unclear]"`.
13
- - **Do not fabricate** values not visible in the image.
15
+ - **Do not fabricate** values not visible in the image, if the image is empty, return an empty array for the transactions
14
16
  - if a descripion contains 2 lines of text, only include the most important text and the text should reside in 1 key in json response, do not split it
15
17
  - Do not return anything except the json content
16
18
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: my-aws-helpers
3
- Version: 2.6.5
3
+ Version: 3.0.0.dev2
4
4
  Summary: AWS Helpers
5
5
  Home-page: https://github.com/JarrodMccarthy/aws_helpers.git
6
6
  Author: Jarrod McCarthy
@@ -19,6 +19,7 @@ my_aws_helpers.egg-info/requires.txt
19
19
  my_aws_helpers.egg-info/top_level.txt
20
20
  my_aws_helpers.egg-info/zip-safe
21
21
  my_aws_helpers/prompts/__init__.py
22
- my_aws_helpers/prompts/json_system_prompt.txt
23
22
  my_aws_helpers/prompts/markdown_system_prompt.txt
23
+ my_aws_helpers/prompts/transactions_headers_prompt.txt
24
+ my_aws_helpers/prompts/transactions_prompt.txt
24
25
  tests/test_event.py
@@ -3,7 +3,7 @@ from setuptools import find_namespace_packages, setup
3
3
 
4
4
  base_path = os.path.abspath(os.path.dirname(__file__))
5
5
 
6
- version = "2.6.5"
6
+ version = "3.0.0.dev2"
7
7
 
8
8
  setup(
9
9
  name="my_aws_helpers",