my-aws-helpers 2.6.4__tar.gz → 3.0.0.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of my-aws-helpers might be problematic. Click here for more details.
- {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/PKG-INFO +1 -1
- {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/my_aws_helpers/bedrock.py +16 -4
- my_aws_helpers-3.0.0.dev1/my_aws_helpers/prompts/transactions_headers_prompt.txt +37 -0
- my_aws_helpers-2.6.4/my_aws_helpers/prompts/json_system_prompt.txt → my_aws_helpers-3.0.0.dev1/my_aws_helpers/prompts/transactions_prompt.txt +3 -1
- {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/my_aws_helpers.egg-info/PKG-INFO +1 -1
- {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/my_aws_helpers.egg-info/SOURCES.txt +2 -1
- {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/setup.py +1 -1
- {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/MANIFEST.in +0 -0
- {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/README.md +0 -0
- {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/my_aws_helpers/api.py +0 -0
- {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/my_aws_helpers/auth.py +0 -0
- {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/my_aws_helpers/cognito.py +0 -0
- {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/my_aws_helpers/dynamo.py +0 -0
- {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/my_aws_helpers/errors.py +0 -0
- {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/my_aws_helpers/event.py +0 -0
- {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/my_aws_helpers/logging.py +0 -0
- {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/my_aws_helpers/prompts/__init__.py +0 -0
- {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/my_aws_helpers/prompts/markdown_system_prompt.txt +0 -0
- {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/my_aws_helpers/s3.py +0 -0
- {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/my_aws_helpers/sfn.py +0 -0
- {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/my_aws_helpers.egg-info/dependency_links.txt +0 -0
- {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/my_aws_helpers.egg-info/requires.txt +0 -0
- {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/my_aws_helpers.egg-info/top_level.txt +0 -0
- {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/my_aws_helpers.egg-info/zip-safe +0 -0
- {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/setup.cfg +0 -0
- {my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/tests/test_event.py +0 -0
|
@@ -12,7 +12,9 @@ import concurrent.futures
|
|
|
12
12
|
from dataclasses import dataclass
|
|
13
13
|
|
|
14
14
|
class PromptType(str, Enum):
|
|
15
|
-
|
|
15
|
+
transaction_headers = "transactions_headers_prompt.txt"
|
|
16
|
+
transactions = "transactions_prompt.txt"
|
|
17
|
+
# json = "json_system_prompt.txt"
|
|
16
18
|
markdown = "markdown_system_prompt.txt"
|
|
17
19
|
|
|
18
20
|
|
|
@@ -49,7 +51,7 @@ class OCRResult:
|
|
|
49
51
|
class Bedrock:
|
|
50
52
|
def __init__(
|
|
51
53
|
self,
|
|
52
|
-
model_id: str = "
|
|
54
|
+
model_id: str = "anthropic.claude-3-5-sonnet-20241022-v2:0",
|
|
53
55
|
logger = None,
|
|
54
56
|
):
|
|
55
57
|
|
|
@@ -181,12 +183,22 @@ class Bedrock:
|
|
|
181
183
|
except Exception as e:
|
|
182
184
|
self.logger.error(f"Could not get pix map for page {i}")
|
|
183
185
|
continue
|
|
184
|
-
|
|
186
|
+
if len(image_bytes_list) > 1:
|
|
187
|
+
headers_prompt = self._get_prompt(prompt_type=PromptType.transaction_headers.value)
|
|
188
|
+
ocr_result = self._ocr(prompt=headers_prompt, image_bytes=image_bytes_list[0])
|
|
189
|
+
if ocr_result is None:
|
|
190
|
+
self.logger.info(f"No ocr result returned when getting headers {PromptType.transaction_headers.value}")
|
|
191
|
+
headers = ocr_result.content
|
|
192
|
+
|
|
193
|
+
transactions_prompt = self._get_prompt(prompt_type=prompt_type)
|
|
194
|
+
transactions_prompt.replace("#### TABLE HEADERS ####", headers)
|
|
195
|
+
|
|
185
196
|
self.logger.info("Got Prompt")
|
|
186
197
|
results = list()
|
|
198
|
+
|
|
187
199
|
for i, image_bytes in enumerate(image_bytes_list):
|
|
188
200
|
self.logger.info(f"Starting OCR for page: {i}")
|
|
189
|
-
results.append(self._ocr(image_bytes=image_bytes, prompt=
|
|
201
|
+
results.append(self._ocr(image_bytes=image_bytes, prompt=transactions_prompt))
|
|
190
202
|
return results
|
|
191
203
|
except Exception as e:
|
|
192
204
|
self.logger.exception(e)
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
You are an intelligent document and image interpreter.
|
|
2
|
+
|
|
3
|
+
Your task is to analyze the provided image and extract transaction table headers.
|
|
4
|
+
|
|
5
|
+
## Output Requirements
|
|
6
|
+
|
|
7
|
+
- Return **only valid JSON**.
|
|
8
|
+
- If the image contains one or more **tables**, use only the transactions table represent the headers as a JSON object.
|
|
9
|
+
- Use **the table headers as keys**.
|
|
10
|
+
- if the table headers are not present use:
|
|
11
|
+
#### TABLE HEADERS ####
|
|
12
|
+
- if there are multiple sections, only include the row data in transactions
|
|
13
|
+
- Every row should have the consistent headers
|
|
14
|
+
- **Do not fabricate** values not visible in the image
|
|
15
|
+
- if a descripion contains 2 lines of text, only include the most important text and the text should reside in 1 key in json response, do not split it
|
|
16
|
+
- Do not return anything except the json content
|
|
17
|
+
|
|
18
|
+
## Example
|
|
19
|
+
|
|
20
|
+
If the image contains this table:
|
|
21
|
+
|
|
22
|
+
| Date | | Price |
|
|
23
|
+
| ------ |----------|-------|
|
|
24
|
+
| June 6 | desc 1 | $2.00 |
|
|
25
|
+
| June 5 | misc 2 | $1.70 |
|
|
26
|
+
| | item x | $1.50 |
|
|
27
|
+
|
|
28
|
+
Return:
|
|
29
|
+
|
|
30
|
+
```json
|
|
31
|
+
{
|
|
32
|
+
"headers": [
|
|
33
|
+
{ "date": "column 1"},
|
|
34
|
+
{ "description": "column 2"},
|
|
35
|
+
{ "price": "column 3"},
|
|
36
|
+
]
|
|
37
|
+
}
|
|
@@ -7,10 +7,12 @@ Your task is to analyze the provided image and extract all meaningful data as st
|
|
|
7
7
|
- Return **only valid JSON**.
|
|
8
8
|
- If the image contains one or more **tables**, represent each row as a JSON object.
|
|
9
9
|
- Use **the table headers as keys**.
|
|
10
|
+
- if the table headers are not present use:
|
|
11
|
+
#### TABLE HEADERS ####
|
|
10
12
|
- if there are multiple sections, only include the row data in transactions
|
|
11
13
|
- Every row should have the consistent headers
|
|
12
14
|
- If content is unclear, use: `"[Unclear]"`.
|
|
13
|
-
- **Do not fabricate** values not visible in the image
|
|
15
|
+
- **Do not fabricate** values not visible in the image, if the image is empty, return an empty array for the transactions
|
|
14
16
|
- if a descripion contains 2 lines of text, only include the most important text and the text should reside in 1 key in json response, do not split it
|
|
15
17
|
- Do not return anything except the json content
|
|
16
18
|
|
|
@@ -19,6 +19,7 @@ my_aws_helpers.egg-info/requires.txt
|
|
|
19
19
|
my_aws_helpers.egg-info/top_level.txt
|
|
20
20
|
my_aws_helpers.egg-info/zip-safe
|
|
21
21
|
my_aws_helpers/prompts/__init__.py
|
|
22
|
-
my_aws_helpers/prompts/json_system_prompt.txt
|
|
23
22
|
my_aws_helpers/prompts/markdown_system_prompt.txt
|
|
23
|
+
my_aws_helpers/prompts/transactions_headers_prompt.txt
|
|
24
|
+
my_aws_helpers/prompts/transactions_prompt.txt
|
|
24
25
|
tests/test_event.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/my_aws_helpers/prompts/markdown_system_prompt.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{my_aws_helpers-2.6.4 → my_aws_helpers-3.0.0.dev1}/my_aws_helpers.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|