my-aws-helpers 2.6.3__tar.gz → 2.6.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of my-aws-helpers might be problematic. Click here for more details.
- {my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/PKG-INFO +1 -1
- {my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/my_aws_helpers/bedrock.py +27 -32
- {my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/my_aws_helpers.egg-info/PKG-INFO +1 -1
- {my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/setup.py +1 -1
- {my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/MANIFEST.in +0 -0
- {my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/README.md +0 -0
- {my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/my_aws_helpers/api.py +0 -0
- {my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/my_aws_helpers/auth.py +0 -0
- {my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/my_aws_helpers/cognito.py +0 -0
- {my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/my_aws_helpers/dynamo.py +0 -0
- {my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/my_aws_helpers/errors.py +0 -0
- {my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/my_aws_helpers/event.py +0 -0
- {my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/my_aws_helpers/logging.py +0 -0
- {my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/my_aws_helpers/prompts/__init__.py +0 -0
- {my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/my_aws_helpers/prompts/json_system_prompt.txt +0 -0
- {my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/my_aws_helpers/prompts/markdown_system_prompt.txt +0 -0
- {my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/my_aws_helpers/s3.py +0 -0
- {my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/my_aws_helpers/sfn.py +0 -0
- {my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/my_aws_helpers.egg-info/SOURCES.txt +0 -0
- {my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/my_aws_helpers.egg-info/dependency_links.txt +0 -0
- {my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/my_aws_helpers.egg-info/requires.txt +0 -0
- {my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/my_aws_helpers.egg-info/top_level.txt +0 -0
- {my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/my_aws_helpers.egg-info/zip-safe +0 -0
- {my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/setup.cfg +0 -0
- {my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/tests/test_event.py +0 -0
|
@@ -10,12 +10,6 @@ from enum import Enum
|
|
|
10
10
|
import pymupdf
|
|
11
11
|
import concurrent.futures
|
|
12
12
|
from dataclasses import dataclass
|
|
13
|
-
from my_aws_helpers.logging import select_powertools_logger
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
logger = select_powertools_logger('bedrock-boy')
|
|
17
|
-
|
|
18
|
-
logger.info("Got logger")
|
|
19
13
|
|
|
20
14
|
class PromptType(str, Enum):
|
|
21
15
|
json = "json_system_prompt.txt"
|
|
@@ -55,10 +49,12 @@ class OCRResult:
|
|
|
55
49
|
class Bedrock:
|
|
56
50
|
def __init__(
|
|
57
51
|
self,
|
|
58
|
-
model_id: str = "
|
|
52
|
+
model_id: str = "anthropic.claude-3-5-sonnet-20241022-v2:0",
|
|
53
|
+
logger = None,
|
|
59
54
|
):
|
|
60
55
|
|
|
61
56
|
self.session = Bedrock._set_session_params()
|
|
57
|
+
self.logger = logger
|
|
62
58
|
region_name = "ap-southeast-2"
|
|
63
59
|
if self.session is None:
|
|
64
60
|
self.session = boto3.Session(region_name = region_name)
|
|
@@ -87,7 +83,7 @@ class Bedrock:
|
|
|
87
83
|
region_name=region_name
|
|
88
84
|
)
|
|
89
85
|
except Exception as e:
|
|
90
|
-
|
|
86
|
+
print(e)
|
|
91
87
|
return None
|
|
92
88
|
|
|
93
89
|
def _get_prompt(self, prompt_type: str) -> Optional[str]:
|
|
@@ -100,7 +96,7 @@ class Bedrock:
|
|
|
100
96
|
prompt = f.read()
|
|
101
97
|
return prompt
|
|
102
98
|
except Exception as e:
|
|
103
|
-
logger.exception(f"Failed to get {prompt_type} prompt due to {e}")
|
|
99
|
+
self.logger.exception(f"Failed to get {prompt_type} prompt due to {e}")
|
|
104
100
|
return None
|
|
105
101
|
|
|
106
102
|
def _ocr(
|
|
@@ -127,13 +123,13 @@ class Bedrock:
|
|
|
127
123
|
]
|
|
128
124
|
retries = 3
|
|
129
125
|
for i in range(retries):
|
|
130
|
-
logger.info(f"Attempt number {i} for {self.model_id} converse")
|
|
126
|
+
self.logger.info(f"Attempt number {i} for {self.model_id} converse")
|
|
131
127
|
try:
|
|
132
128
|
response = self.client.converse(modelId = self.model_id, messages = message, system = system_prompt)
|
|
133
129
|
if response['ResponseMetadata']['HTTPStatusCode'] == 200:
|
|
134
130
|
break
|
|
135
131
|
except Exception as e:
|
|
136
|
-
logger.exception(f"Error during conversation due to {e}")
|
|
132
|
+
self.logger.exception(f"Error during conversation due to {e}")
|
|
137
133
|
if i >= len(retries) - 1: raise Exception(e)
|
|
138
134
|
continue
|
|
139
135
|
|
|
@@ -152,7 +148,7 @@ class Bedrock:
|
|
|
152
148
|
results = list()
|
|
153
149
|
prompt = self._get_prompt(prompt_type=prompt_type)
|
|
154
150
|
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
155
|
-
logger.info("Some log")
|
|
151
|
+
self.logger.info("Some log")
|
|
156
152
|
execution_futures = {
|
|
157
153
|
executor.submit(
|
|
158
154
|
self._ocr,
|
|
@@ -172,28 +168,27 @@ class Bedrock:
|
|
|
172
168
|
prompt_type: str,
|
|
173
169
|
zoom: int = 7,
|
|
174
170
|
) -> List[OCRResult]:
|
|
175
|
-
logger.info("Getting OCR Results")
|
|
176
171
|
try:
|
|
172
|
+
self.logger.info("Getting OCR Results")
|
|
177
173
|
document = pymupdf.open(stream=pdf_bytes, filetype="pdf")
|
|
174
|
+
pages: List[pymupdf.Page] = [p for p in document]
|
|
175
|
+
|
|
176
|
+
image_bytes_list: List[bytes] = list()
|
|
177
|
+
for i, p in enumerate(pages):
|
|
178
|
+
try:
|
|
179
|
+
image_bytes: bytes = p.get_pixmap(matrix = pymupdf.Matrix(zoom, zoom)).tobytes("png")
|
|
180
|
+
image_bytes_list.append(image_bytes)
|
|
181
|
+
except Exception as e:
|
|
182
|
+
self.logger.error(f"Could not get pix map for page {i}")
|
|
183
|
+
continue
|
|
184
|
+
prompt = self._get_prompt(prompt_type=prompt_type)
|
|
185
|
+
self.logger.info("Got Prompt")
|
|
186
|
+
results = list()
|
|
187
|
+
for i, image_bytes in enumerate(image_bytes_list):
|
|
188
|
+
self.logger.info(f"Starting OCR for page: {i}")
|
|
189
|
+
results.append(self._ocr(image_bytes=image_bytes, prompt=prompt))
|
|
190
|
+
return results
|
|
178
191
|
except Exception as e:
|
|
179
|
-
logger.exception(
|
|
192
|
+
self.logger.exception(e)
|
|
180
193
|
return []
|
|
181
|
-
|
|
182
|
-
pages: List[pymupdf.Page] = [p for p in document]
|
|
183
|
-
|
|
184
|
-
image_bytes_list: List[bytes] = list()
|
|
185
|
-
for i, p in enumerate(pages):
|
|
186
|
-
try:
|
|
187
|
-
image_bytes: bytes = p.get_pixmap(matrix = pymupdf.Matrix(zoom, zoom)).tobytes("png")
|
|
188
|
-
image_bytes_list.append(image_bytes)
|
|
189
|
-
except Exception as e:
|
|
190
|
-
logger.error(f"Could not get pix map for page {i}")
|
|
191
|
-
continue
|
|
192
|
-
prompt = self._get_prompt(prompt_type=prompt_type)
|
|
193
|
-
logger.info("Got Prompt")
|
|
194
|
-
results = list()
|
|
195
|
-
for i, image_bytes in enumerate(image_bytes_list):
|
|
196
|
-
logger.info(f"Starting OCR for page: {i}")
|
|
197
|
-
results.append(self._ocr(image_bytes=image_bytes, prompt=prompt))
|
|
198
|
-
return results
|
|
199
194
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{my_aws_helpers-2.6.3 → my_aws_helpers-2.6.5}/my_aws_helpers/prompts/markdown_system_prompt.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|