my-aws-helpers 3.1.0.dev2__tar.gz → 3.1.0.dev4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/PKG-INFO +1 -1
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/my_aws_helpers/bedrock.py +11 -13
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/my_aws_helpers.egg-info/PKG-INFO +1 -1
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/setup.py +1 -1
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/MANIFEST.in +0 -0
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/README.md +0 -0
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/my_aws_helpers/api.py +0 -0
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/my_aws_helpers/auth.py +0 -0
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/my_aws_helpers/cognito.py +0 -0
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/my_aws_helpers/dynamo.py +0 -0
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/my_aws_helpers/errors.py +0 -0
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/my_aws_helpers/event.py +0 -0
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/my_aws_helpers/logging.py +0 -0
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/my_aws_helpers/prompts/__init__.py +0 -0
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/my_aws_helpers/prompts/markdown_system_prompt.txt +0 -0
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/my_aws_helpers/prompts/transactions_headers_prompt.txt +0 -0
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/my_aws_helpers/prompts/transactions_headers_prompt_v2.txt +0 -0
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/my_aws_helpers/prompts/transactions_prompt.txt +0 -0
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/my_aws_helpers/s3.py +0 -0
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/my_aws_helpers/sfn.py +0 -0
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/my_aws_helpers.egg-info/SOURCES.txt +0 -0
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/my_aws_helpers.egg-info/dependency_links.txt +0 -0
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/my_aws_helpers.egg-info/requires.txt +0 -0
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/my_aws_helpers.egg-info/top_level.txt +0 -0
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/my_aws_helpers.egg-info/zip-safe +0 -0
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/setup.cfg +0 -0
- {my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/tests/test_event.py +0 -0
|
@@ -149,22 +149,16 @@ class Bedrock:
|
|
|
149
149
|
prompt: str,
|
|
150
150
|
max_workers: int = 10,
|
|
151
151
|
):
|
|
152
|
-
|
|
152
|
+
execution_futures = []
|
|
153
153
|
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
154
154
|
for i, img in enumerate(image_bytes_list):
|
|
155
155
|
self.logger.info(f"Starting OCR for page: {i}")
|
|
156
|
-
time.sleep(self.sleep_time)
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
)
|
|
163
|
-
}
|
|
164
|
-
for future in concurrent.futures.as_completed(execution_futures):
|
|
165
|
-
result = future.result()
|
|
166
|
-
if result:
|
|
167
|
-
results.append(result)
|
|
156
|
+
time.sleep(self.sleep_time) # Stagger start time
|
|
157
|
+
future = executor.submit(self._ocr, prompt=prompt, image_bytes=img)
|
|
158
|
+
execution_futures.append(future)
|
|
159
|
+
|
|
160
|
+
# Wait for all tasks and collect results in order of submission
|
|
161
|
+
results = [future.result() for future in execution_futures if future.result() is not None]
|
|
168
162
|
return results
|
|
169
163
|
|
|
170
164
|
def get_ocr_result(
|
|
@@ -186,6 +180,7 @@ class Bedrock:
|
|
|
186
180
|
except Exception as e:
|
|
187
181
|
self.logger.error(f"Could not get pix map for page {i}")
|
|
188
182
|
continue
|
|
183
|
+
skip_page_zero = False
|
|
189
184
|
header_ocr_result = None
|
|
190
185
|
if len(image_bytes_list) > 1:
|
|
191
186
|
headers_prompt = self._get_prompt(prompt_type=PromptType.transaction_headers.value)
|
|
@@ -196,6 +191,7 @@ class Bedrock:
|
|
|
196
191
|
self.logger.info(f"No ocr result returned when getting headers {PromptType.transaction_headers.value}")
|
|
197
192
|
headers = header_ocr_result.content.get("headers")
|
|
198
193
|
if (len(headers) < 1) or (headers is None):
|
|
194
|
+
skip_page_zero = True
|
|
199
195
|
continue
|
|
200
196
|
else:
|
|
201
197
|
break
|
|
@@ -207,6 +203,8 @@ class Bedrock:
|
|
|
207
203
|
self.logger.info("Got Prompt")
|
|
208
204
|
results = list()
|
|
209
205
|
|
|
206
|
+
if skip_page_zero:
|
|
207
|
+
image_bytes_list = image_bytes_list[1:] #page zero often has account summary info
|
|
210
208
|
results = self._parallel_ocr(image_bytes_list=image_bytes_list, prompt=transactions_prompt)
|
|
211
209
|
|
|
212
210
|
# for i, image_bytes in enumerate(image_bytes_list):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/my_aws_helpers.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/my_aws_helpers.egg-info/requires.txt
RENAMED
|
File without changes
|
{my_aws_helpers-3.1.0.dev2 → my_aws_helpers-3.1.0.dev4}/my_aws_helpers.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|