my-aws-helpers 3.0.0.dev8__tar.gz → 3.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of my-aws-helpers might be problematic. Click here for more details.
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/PKG-INFO +1 -1
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/my_aws_helpers/bedrock.py +23 -20
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/my_aws_helpers.egg-info/PKG-INFO +1 -1
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/setup.py +1 -1
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/MANIFEST.in +0 -0
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/README.md +0 -0
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/my_aws_helpers/api.py +0 -0
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/my_aws_helpers/auth.py +0 -0
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/my_aws_helpers/cognito.py +0 -0
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/my_aws_helpers/dynamo.py +0 -0
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/my_aws_helpers/errors.py +0 -0
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/my_aws_helpers/event.py +0 -0
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/my_aws_helpers/logging.py +0 -0
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/my_aws_helpers/prompts/__init__.py +0 -0
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/my_aws_helpers/prompts/markdown_system_prompt.txt +0 -0
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/my_aws_helpers/prompts/transactions_headers_prompt.txt +0 -0
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/my_aws_helpers/prompts/transactions_headers_prompt_v2.txt +0 -0
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/my_aws_helpers/prompts/transactions_prompt.txt +0 -0
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/my_aws_helpers/s3.py +0 -0
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/my_aws_helpers/sfn.py +0 -0
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/my_aws_helpers.egg-info/SOURCES.txt +0 -0
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/my_aws_helpers.egg-info/dependency_links.txt +0 -0
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/my_aws_helpers.egg-info/requires.txt +0 -0
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/my_aws_helpers.egg-info/top_level.txt +0 -0
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/my_aws_helpers.egg-info/zip-safe +0 -0
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/setup.cfg +0 -0
- {my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/tests/test_event.py +0 -0
|
@@ -51,8 +51,9 @@ class OCRResult:
|
|
|
51
51
|
class Bedrock:
|
|
52
52
|
def __init__(
|
|
53
53
|
self,
|
|
54
|
-
model_id: str = "anthropic.claude-3-5-sonnet-20241022-v2:0",
|
|
54
|
+
model_id: str = "apac.anthropic.claude-3-5-sonnet-20241022-v2:0",
|
|
55
55
|
logger = None,
|
|
56
|
+
sleep_time: float = 1.0,
|
|
56
57
|
):
|
|
57
58
|
|
|
58
59
|
self.session = Bedrock._set_session_params()
|
|
@@ -60,7 +61,7 @@ class Bedrock:
|
|
|
60
61
|
region_name = "ap-southeast-2"
|
|
61
62
|
if self.session is None:
|
|
62
63
|
self.session = boto3.Session(region_name = region_name)
|
|
63
|
-
|
|
64
|
+
self.sleep_time = sleep_time
|
|
64
65
|
|
|
65
66
|
custom_config = Config(
|
|
66
67
|
retries={
|
|
@@ -133,6 +134,7 @@ class Bedrock:
|
|
|
133
134
|
except Exception as e:
|
|
134
135
|
self.logger.exception(f"Error during conversation due to {e}")
|
|
135
136
|
if i >= len(retries) - 1: raise Exception(e)
|
|
137
|
+
time.sleep(self.sleep_time)
|
|
136
138
|
continue
|
|
137
139
|
|
|
138
140
|
result = {}
|
|
@@ -144,24 +146,19 @@ class Bedrock:
|
|
|
144
146
|
def _parallel_ocr(
|
|
145
147
|
self,
|
|
146
148
|
image_bytes_list: List[bytes],
|
|
147
|
-
|
|
149
|
+
prompt: str,
|
|
148
150
|
max_workers: int = 10,
|
|
149
151
|
):
|
|
150
|
-
|
|
151
|
-
prompt = self._get_prompt(prompt_type=prompt_type)
|
|
152
|
+
execution_futures = []
|
|
152
153
|
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
for future in concurrent.futures.as_completed(execution_futures):
|
|
162
|
-
result = future.result()
|
|
163
|
-
if result:
|
|
164
|
-
results.append(result)
|
|
154
|
+
for i, img in enumerate(image_bytes_list):
|
|
155
|
+
self.logger.info(f"Starting OCR for page: {i}")
|
|
156
|
+
time.sleep(self.sleep_time) # Stagger start time
|
|
157
|
+
future = executor.submit(self._ocr, prompt=prompt, image_bytes=img)
|
|
158
|
+
execution_futures.append(future)
|
|
159
|
+
|
|
160
|
+
# Wait for all tasks and collect results in order of submission
|
|
161
|
+
results = [future.result() for future in execution_futures if future.result() is not None]
|
|
165
162
|
return results
|
|
166
163
|
|
|
167
164
|
def get_ocr_result(
|
|
@@ -183,6 +180,7 @@ class Bedrock:
|
|
|
183
180
|
except Exception as e:
|
|
184
181
|
self.logger.error(f"Could not get pix map for page {i}")
|
|
185
182
|
continue
|
|
183
|
+
skip_page_zero = False
|
|
186
184
|
header_ocr_result = None
|
|
187
185
|
if len(image_bytes_list) > 1:
|
|
188
186
|
headers_prompt = self._get_prompt(prompt_type=PromptType.transaction_headers.value)
|
|
@@ -193,6 +191,7 @@ class Bedrock:
|
|
|
193
191
|
self.logger.info(f"No ocr result returned when getting headers {PromptType.transaction_headers.value}")
|
|
194
192
|
headers = header_ocr_result.content.get("headers")
|
|
195
193
|
if (len(headers) < 1) or (headers is None):
|
|
194
|
+
skip_page_zero = True
|
|
196
195
|
continue
|
|
197
196
|
else:
|
|
198
197
|
break
|
|
@@ -204,9 +203,13 @@ class Bedrock:
|
|
|
204
203
|
self.logger.info("Got Prompt")
|
|
205
204
|
results = list()
|
|
206
205
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
206
|
+
if skip_page_zero:
|
|
207
|
+
image_bytes_list = image_bytes_list[1:] #page zero often has account summary info
|
|
208
|
+
results = self._parallel_ocr(image_bytes_list=image_bytes_list, prompt=transactions_prompt)
|
|
209
|
+
|
|
210
|
+
# for i, image_bytes in enumerate(image_bytes_list):
|
|
211
|
+
# self.logger.info(f"Starting OCR for page: {i}")
|
|
212
|
+
# results.append(self._ocr(image_bytes=image_bytes, prompt=transactions_prompt))
|
|
210
213
|
return results
|
|
211
214
|
except Exception as e:
|
|
212
215
|
self.logger.exception(e)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/my_aws_helpers/prompts/markdown_system_prompt.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/my_aws_helpers/prompts/transactions_prompt.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{my_aws_helpers-3.0.0.dev8 → my_aws_helpers-3.1.0}/my_aws_helpers.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|