my-aws-helpers 2.6.0__tar.gz → 2.6.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of my-aws-helpers might be problematic. Click here for more details.

Files changed (22) hide show
  1. {my_aws_helpers-2.6.0 → my_aws_helpers-2.6.2}/PKG-INFO +1 -1
  2. my_aws_helpers-2.6.2/my_aws_helpers/bedrock.py +199 -0
  3. my_aws_helpers-2.6.2/my_aws_helpers/prompts/__init__.py +0 -0
  4. {my_aws_helpers-2.6.0 → my_aws_helpers-2.6.2}/my_aws_helpers.egg-info/PKG-INFO +1 -1
  5. {my_aws_helpers-2.6.0 → my_aws_helpers-2.6.2}/my_aws_helpers.egg-info/SOURCES.txt +2 -0
  6. {my_aws_helpers-2.6.0 → my_aws_helpers-2.6.2}/setup.py +3 -2
  7. {my_aws_helpers-2.6.0 → my_aws_helpers-2.6.2}/README.md +0 -0
  8. {my_aws_helpers-2.6.0 → my_aws_helpers-2.6.2}/my_aws_helpers/api.py +0 -0
  9. {my_aws_helpers-2.6.0 → my_aws_helpers-2.6.2}/my_aws_helpers/auth.py +0 -0
  10. {my_aws_helpers-2.6.0 → my_aws_helpers-2.6.2}/my_aws_helpers/cognito.py +0 -0
  11. {my_aws_helpers-2.6.0 → my_aws_helpers-2.6.2}/my_aws_helpers/dynamo.py +0 -0
  12. {my_aws_helpers-2.6.0 → my_aws_helpers-2.6.2}/my_aws_helpers/errors.py +0 -0
  13. {my_aws_helpers-2.6.0 → my_aws_helpers-2.6.2}/my_aws_helpers/event.py +0 -0
  14. {my_aws_helpers-2.6.0 → my_aws_helpers-2.6.2}/my_aws_helpers/logging.py +0 -0
  15. {my_aws_helpers-2.6.0 → my_aws_helpers-2.6.2}/my_aws_helpers/s3.py +0 -0
  16. {my_aws_helpers-2.6.0 → my_aws_helpers-2.6.2}/my_aws_helpers/sfn.py +0 -0
  17. {my_aws_helpers-2.6.0 → my_aws_helpers-2.6.2}/my_aws_helpers.egg-info/dependency_links.txt +0 -0
  18. {my_aws_helpers-2.6.0 → my_aws_helpers-2.6.2}/my_aws_helpers.egg-info/requires.txt +0 -0
  19. {my_aws_helpers-2.6.0 → my_aws_helpers-2.6.2}/my_aws_helpers.egg-info/top_level.txt +0 -0
  20. {my_aws_helpers-2.6.0 → my_aws_helpers-2.6.2}/my_aws_helpers.egg-info/zip-safe +0 -0
  21. {my_aws_helpers-2.6.0 → my_aws_helpers-2.6.2}/setup.cfg +0 -0
  22. {my_aws_helpers-2.6.0 → my_aws_helpers-2.6.2}/tests/test_event.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: my_aws_helpers
3
- Version: 2.6.0
3
+ Version: 2.6.2
4
4
  Summary: AWS Helpers
5
5
  Home-page: https://github.com/JarrodMccarthy/aws_helpers.git
6
6
  Author: Jarrod McCarthy
@@ -0,0 +1,199 @@
1
+ from __future__ import annotations
2
+ import boto3
3
+ from botocore.config import Config
4
+ import json
5
+ import time
6
+ import os
7
+ import io
8
+ from typing import Optional, List, Dict
9
+ from enum import Enum
10
+ import pymupdf
11
+ import concurrent.futures
12
+ from dataclasses import dataclass
13
+ from my_aws_helpers.logging import select_powertools_logger
14
+
15
+
16
+ logger = select_powertools_logger('bedrock-boy')
17
+
18
+ logger.info("Got logger")
19
+
20
+ class PromptType(str, Enum):
21
+ json = "json_system_prompt.txt"
22
+ markdown = "markdown_system_prompt.txt"
23
+
24
+
25
+ @dataclass
26
+ class TokenUsage:
27
+ input_tokens: int
28
+ output_tokens: int
29
+ total_tokens: int
30
+
31
+ @classmethod
32
+ def from_dict(cls, data: Dict[str, int]) -> TokenUsage:
33
+ return cls(
34
+ input_tokens=data.get('inputTokens', 0),
35
+ output_tokens=data.get('outputTokens', 0),
36
+ total_tokens=data.get('totalTokens', 0),
37
+ )
38
+
39
+
40
+ @dataclass
41
+ class OCRResult:
42
+ content: List[Dict[str, str]]
43
+ token_usage: TokenUsage
44
+ page_number: int
45
+
46
+ @classmethod
47
+ def from_dict(cls, data: Dict) -> OCRResult:
48
+ return cls(
49
+ content = data.get("content", []),
50
+ token_usage = TokenUsage.from_dict(data.get("token_usage", {})),
51
+ page_number = data.get("page_number", 0)
52
+ )
53
+
54
+
55
+ class Bedrock:
56
+ def __init__(
57
+ self,
58
+ model_id: str = "apac.anthropic.claude-3-5-sonnet-20241022-v2:0",
59
+ ):
60
+
61
+ self.session = Bedrock._set_session_params()
62
+ region_name = "ap-southeast-2"
63
+ if self.session is None:
64
+ self.session = boto3.Session(region_name = region_name)
65
+
66
+
67
+ custom_config = Config(
68
+ retries={
69
+ 'max_attempts': 2, # Total attempts = 1 initial + 1 retry
70
+ 'mode': 'standard', # or 'adaptive'
71
+ }
72
+ )
73
+ self.client = self.session.client("bedrock-runtime", region_name=region_name, config = custom_config)
74
+ self.model_id = model_id
75
+
76
+ @staticmethod
77
+ def _set_session_params():
78
+ try:
79
+ aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
80
+ aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
81
+ aws_session_token = os.environ["AWS_SESSION_TOKEN"]
82
+ region_name = os.environ["AWS_DEFAULT_REGION"]
83
+ return boto3.Session(
84
+ aws_access_key_id=aws_access_key_id,
85
+ aws_secret_access_key=aws_secret_access_key,
86
+ aws_session_token=aws_session_token,
87
+ region_name=region_name
88
+ )
89
+ except Exception as e:
90
+ logger.exception(e)
91
+ return None
92
+
93
+ def _get_prompt(self, prompt_type: str) -> Optional[str]:
94
+ if prompt_type not in list(PromptType):
95
+ raise Exception(f"Error: Invalid prompt type")
96
+
97
+ path = os.path.join(os.path.dirname(__file__), "prompts", prompt_type)
98
+ try:
99
+ with open(path, "r") as f:
100
+ prompt = f.read()
101
+ return prompt
102
+ except Exception as e:
103
+ logger.exception(f"Failed to get {prompt_type} prompt due to {e}")
104
+ return None
105
+
106
+ def _ocr(
107
+ self,
108
+ prompt: str,
109
+ image_bytes: bytes,
110
+ page_number: Optional[int] = 0
111
+ ) -> Optional[OCRResult]:
112
+ system_prompt = [{"text": prompt}]
113
+ message = [
114
+ {
115
+ "role": "user",
116
+ "content": [
117
+ {
118
+ "image": {
119
+ "format": "png",
120
+ "source": {
121
+ "bytes": image_bytes,
122
+ }
123
+ }
124
+ }
125
+ ]
126
+ }
127
+ ]
128
+ retries = 3
129
+ for i in range(retries):
130
+ logger.info(f"Attempt number {i} for {self.model_id} converse")
131
+ try:
132
+ response = self.client.converse(modelId = self.model_id, messages = message, system = system_prompt)
133
+ if response['ResponseMetadata']['HTTPStatusCode'] == 200:
134
+ break
135
+ except Exception as e:
136
+ logger.exception(f"Error during conversation due to {e}")
137
+ if i >= len(retries) - 1: raise Exception(e)
138
+ continue
139
+
140
+ result = {}
141
+ result["content"] = json.loads(response["output"]["message"]["content"][0]["text"])
142
+ result["token_usage"] = response["usage"]
143
+ result["page_number"] = page_number
144
+ return OCRResult.from_dict(data = result)
145
+
146
+ def _parallel_ocr(
147
+ self,
148
+ image_bytes_list: List[bytes],
149
+ prompt_type: str,
150
+ max_workers: int = 10,
151
+ ):
152
+ results = list()
153
+ prompt = self._get_prompt(prompt_type=prompt_type)
154
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
155
+ logger.info("Some log")
156
+ execution_futures = {
157
+ executor.submit(
158
+ self._ocr,
159
+ prompt = prompt,
160
+ image_bytes = img,
161
+ ): img for img in image_bytes_list
162
+ }
163
+ for future in concurrent.futures.as_completed(execution_futures):
164
+ result = future.result()
165
+ if result:
166
+ results.append(result)
167
+ return results
168
+
169
+ def get_ocr_result(
170
+ self,
171
+ pdf_bytes: io.BytesIO,
172
+ prompt_type: str,
173
+ zoom: int = 7,
174
+ ) -> List[OCRResult]:
175
+ logger.info("Getting OCR Results")
176
+ try:
177
+ document = pymupdf.open(stream=pdf_bytes, filetype="pdf")
178
+ except Exception as e:
179
+ logger.exception(f"Failed to open pdf due to {e}")
180
+ return []
181
+
182
+ pages: List[pymupdf.Page] = [p for p in document]
183
+
184
+ image_bytes_list: List[bytes] = list()
185
+ for i, p in enumerate(pages):
186
+ try:
187
+ image_bytes: bytes = p.get_pixmap(matrix = pymupdf.Matrix(zoom, zoom)).tobytes("png")
188
+ image_bytes_list.append(image_bytes)
189
+ except Exception as e:
190
+ logger.error(f"Could not get pix map for page {i}")
191
+ continue
192
+ prompt = self._get_prompt(prompt_type=prompt_type)
193
+ logger.info("Got Prompt")
194
+ results = list()
195
+ for i, image_bytes in enumerate(image_bytes_list):
196
+ logger.info(f"Starting OCR for page: {i}")
197
+ results.append(self._ocr(image_bytes=image_bytes, prompt=prompt))
198
+ return results
199
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: my-aws-helpers
3
- Version: 2.6.0
3
+ Version: 2.6.2
4
4
  Summary: AWS Helpers
5
5
  Home-page: https://github.com/JarrodMccarthy/aws_helpers.git
6
6
  Author: Jarrod McCarthy
@@ -3,6 +3,7 @@ setup.cfg
3
3
  setup.py
4
4
  my_aws_helpers/api.py
5
5
  my_aws_helpers/auth.py
6
+ my_aws_helpers/bedrock.py
6
7
  my_aws_helpers/cognito.py
7
8
  my_aws_helpers/dynamo.py
8
9
  my_aws_helpers/errors.py
@@ -16,4 +17,5 @@ my_aws_helpers.egg-info/dependency_links.txt
16
17
  my_aws_helpers.egg-info/requires.txt
17
18
  my_aws_helpers.egg-info/top_level.txt
18
19
  my_aws_helpers.egg-info/zip-safe
20
+ my_aws_helpers/prompts/__init__.py
19
21
  tests/test_event.py
@@ -3,7 +3,7 @@ from setuptools import find_namespace_packages, setup
3
3
 
4
4
  base_path = os.path.abspath(os.path.dirname(__file__))
5
5
 
6
- version = "2.6.0"
6
+ version = "2.6.2"
7
7
 
8
8
  setup(
9
9
  name="my_aws_helpers",
@@ -23,5 +23,6 @@ setup(
23
23
  zip_safe = True,
24
24
  install_requires = [
25
25
  "boto3==1.34.36"
26
- ]
26
+ ],
27
+ include_package_data=True,
27
28
  )
File without changes
File without changes