my-aws-helpers 2.6.2__tar.gz → 2.6.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of my-aws-helpers might be problematic. Click here for more details.

Files changed (25) hide show
  1. my_aws_helpers-2.6.4/MANIFEST.in +1 -0
  2. {my_aws_helpers-2.6.2 → my_aws_helpers-2.6.4}/PKG-INFO +1 -1
  3. {my_aws_helpers-2.6.2 → my_aws_helpers-2.6.4}/my_aws_helpers/bedrock.py +26 -31
  4. my_aws_helpers-2.6.4/my_aws_helpers/prompts/json_system_prompt.txt +36 -0
  5. my_aws_helpers-2.6.4/my_aws_helpers/prompts/markdown_system_prompt.txt +35 -0
  6. {my_aws_helpers-2.6.2 → my_aws_helpers-2.6.4}/my_aws_helpers.egg-info/PKG-INFO +1 -1
  7. {my_aws_helpers-2.6.2 → my_aws_helpers-2.6.4}/my_aws_helpers.egg-info/SOURCES.txt +3 -0
  8. {my_aws_helpers-2.6.2 → my_aws_helpers-2.6.4}/setup.py +1 -1
  9. {my_aws_helpers-2.6.2 → my_aws_helpers-2.6.4}/README.md +0 -0
  10. {my_aws_helpers-2.6.2 → my_aws_helpers-2.6.4}/my_aws_helpers/api.py +0 -0
  11. {my_aws_helpers-2.6.2 → my_aws_helpers-2.6.4}/my_aws_helpers/auth.py +0 -0
  12. {my_aws_helpers-2.6.2 → my_aws_helpers-2.6.4}/my_aws_helpers/cognito.py +0 -0
  13. {my_aws_helpers-2.6.2 → my_aws_helpers-2.6.4}/my_aws_helpers/dynamo.py +0 -0
  14. {my_aws_helpers-2.6.2 → my_aws_helpers-2.6.4}/my_aws_helpers/errors.py +0 -0
  15. {my_aws_helpers-2.6.2 → my_aws_helpers-2.6.4}/my_aws_helpers/event.py +0 -0
  16. {my_aws_helpers-2.6.2 → my_aws_helpers-2.6.4}/my_aws_helpers/logging.py +0 -0
  17. {my_aws_helpers-2.6.2 → my_aws_helpers-2.6.4}/my_aws_helpers/prompts/__init__.py +0 -0
  18. {my_aws_helpers-2.6.2 → my_aws_helpers-2.6.4}/my_aws_helpers/s3.py +0 -0
  19. {my_aws_helpers-2.6.2 → my_aws_helpers-2.6.4}/my_aws_helpers/sfn.py +0 -0
  20. {my_aws_helpers-2.6.2 → my_aws_helpers-2.6.4}/my_aws_helpers.egg-info/dependency_links.txt +0 -0
  21. {my_aws_helpers-2.6.2 → my_aws_helpers-2.6.4}/my_aws_helpers.egg-info/requires.txt +0 -0
  22. {my_aws_helpers-2.6.2 → my_aws_helpers-2.6.4}/my_aws_helpers.egg-info/top_level.txt +0 -0
  23. {my_aws_helpers-2.6.2 → my_aws_helpers-2.6.4}/my_aws_helpers.egg-info/zip-safe +0 -0
  24. {my_aws_helpers-2.6.2 → my_aws_helpers-2.6.4}/setup.cfg +0 -0
  25. {my_aws_helpers-2.6.2 → my_aws_helpers-2.6.4}/tests/test_event.py +0 -0
@@ -0,0 +1 @@
1
+ recursive-include my_aws_helpers *.txt
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: my_aws_helpers
3
- Version: 2.6.2
3
+ Version: 2.6.4
4
4
  Summary: AWS Helpers
5
5
  Home-page: https://github.com/JarrodMccarthy/aws_helpers.git
6
6
  Author: Jarrod McCarthy
@@ -10,12 +10,6 @@ from enum import Enum
10
10
  import pymupdf
11
11
  import concurrent.futures
12
12
  from dataclasses import dataclass
13
- from my_aws_helpers.logging import select_powertools_logger
14
-
15
-
16
- logger = select_powertools_logger('bedrock-boy')
17
-
18
- logger.info("Got logger")
19
13
 
20
14
  class PromptType(str, Enum):
21
15
  json = "json_system_prompt.txt"
@@ -56,9 +50,11 @@ class Bedrock:
56
50
  def __init__(
57
51
  self,
58
52
  model_id: str = "apac.anthropic.claude-3-5-sonnet-20241022-v2:0",
53
+ logger = None,
59
54
  ):
60
55
 
61
56
  self.session = Bedrock._set_session_params()
57
+ self.logger = logger
62
58
  region_name = "ap-southeast-2"
63
59
  if self.session is None:
64
60
  self.session = boto3.Session(region_name = region_name)
@@ -87,7 +83,7 @@ class Bedrock:
87
83
  region_name=region_name
88
84
  )
89
85
  except Exception as e:
90
- logger.exception(e)
86
+ print(e)
91
87
  return None
92
88
 
93
89
  def _get_prompt(self, prompt_type: str) -> Optional[str]:
@@ -100,7 +96,7 @@ class Bedrock:
100
96
  prompt = f.read()
101
97
  return prompt
102
98
  except Exception as e:
103
- logger.exception(f"Failed to get {prompt_type} prompt due to {e}")
99
+ self.logger.exception(f"Failed to get {prompt_type} prompt due to {e}")
104
100
  return None
105
101
 
106
102
  def _ocr(
@@ -127,13 +123,13 @@ class Bedrock:
127
123
  ]
128
124
  retries = 3
129
125
  for i in range(retries):
130
- logger.info(f"Attempt number {i} for {self.model_id} converse")
126
+ self.logger.info(f"Attempt number {i} for {self.model_id} converse")
131
127
  try:
132
128
  response = self.client.converse(modelId = self.model_id, messages = message, system = system_prompt)
133
129
  if response['ResponseMetadata']['HTTPStatusCode'] == 200:
134
130
  break
135
131
  except Exception as e:
136
- logger.exception(f"Error during conversation due to {e}")
132
+ self.logger.exception(f"Error during conversation due to {e}")
137
133
  if i >= len(retries) - 1: raise Exception(e)
138
134
  continue
139
135
 
@@ -152,7 +148,7 @@ class Bedrock:
152
148
  results = list()
153
149
  prompt = self._get_prompt(prompt_type=prompt_type)
154
150
  with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
155
- logger.info("Some log")
151
+ self.logger.info("Some log")
156
152
  execution_futures = {
157
153
  executor.submit(
158
154
  self._ocr,
@@ -172,28 +168,27 @@ class Bedrock:
172
168
  prompt_type: str,
173
169
  zoom: int = 7,
174
170
  ) -> List[OCRResult]:
175
- logger.info("Getting OCR Results")
176
171
  try:
172
+ self.logger.info("Getting OCR Results")
177
173
  document = pymupdf.open(stream=pdf_bytes, filetype="pdf")
174
+ pages: List[pymupdf.Page] = [p for p in document]
175
+
176
+ image_bytes_list: List[bytes] = list()
177
+ for i, p in enumerate(pages):
178
+ try:
179
+ image_bytes: bytes = p.get_pixmap(matrix = pymupdf.Matrix(zoom, zoom)).tobytes("png")
180
+ image_bytes_list.append(image_bytes)
181
+ except Exception as e:
182
+ self.logger.error(f"Could not get pix map for page {i}")
183
+ continue
184
+ prompt = self._get_prompt(prompt_type=prompt_type)
185
+ self.logger.info("Got Prompt")
186
+ results = list()
187
+ for i, image_bytes in enumerate(image_bytes_list):
188
+ self.logger.info(f"Starting OCR for page: {i}")
189
+ results.append(self._ocr(image_bytes=image_bytes, prompt=prompt))
190
+ return results
178
191
  except Exception as e:
179
- logger.exception(f"Failed to open pdf due to {e}")
192
+ self.logger.exception(e)
180
193
  return []
181
-
182
- pages: List[pymupdf.Page] = [p for p in document]
183
-
184
- image_bytes_list: List[bytes] = list()
185
- for i, p in enumerate(pages):
186
- try:
187
- image_bytes: bytes = p.get_pixmap(matrix = pymupdf.Matrix(zoom, zoom)).tobytes("png")
188
- image_bytes_list.append(image_bytes)
189
- except Exception as e:
190
- logger.error(f"Could not get pix map for page {i}")
191
- continue
192
- prompt = self._get_prompt(prompt_type=prompt_type)
193
- logger.info("Got Prompt")
194
- results = list()
195
- for i, image_bytes in enumerate(image_bytes_list):
196
- logger.info(f"Starting OCR for page: {i}")
197
- results.append(self._ocr(image_bytes=image_bytes, prompt=prompt))
198
- return results
199
194
 
@@ -0,0 +1,36 @@
1
+ You are an intelligent document and image interpreter.
2
+
3
+ Your task is to analyze the provided image and extract all meaningful data as structured **JSON**.
4
+
5
+ ## Output Requirements
6
+
7
+ - Return **only valid JSON**.
8
+ - If the image contains one or more **tables**, represent each row as a JSON object.
9
+ - Use **the table headers as keys**.
10
+ - if there are multiple sections, only include the row data in transactions
11
+ - Every row should have the consistent headers
12
+ - If content is unclear, use: `"[Unclear]"`.
13
+ - **Do not fabricate** values not visible in the image.
14
+ - if a descripion contains 2 lines of text, only include the most important text and the text should reside in 1 key in json response, do not split it
15
+ - Do not return anything except the json content
16
+
17
+ ## Example
18
+
19
+ If the image contains this table:
20
+
21
+ | Date | | Price |
22
+ | ------ |----------|-------|
23
+ | June 6 | desc 1 | $2.00 |
24
+ | June 5 | misc 2 | $1.70 |
25
+ | | item x | $1.50 |
26
+
27
+ Return:
28
+
29
+ ```json
30
+ {
31
+ "transactions": [
32
+ { "date": "June 6", "description": "desc 1", "price": "$2.00" },
33
+ { "date": "June 5", "description": "desc 2", "price": "$1.70" },
34
+ { "date": "June 5", "description": "item x", "price": "$1.50" },
35
+ ]
36
+ }
@@ -0,0 +1,35 @@
1
+ You are an intelligent document and visual layout interpreter.
2
+
3
+ Your task is to:
4
+ 1. Analyze the image provided a bank statement with a list of transactions.
5
+ 2. Convert the contents into **well-formatted Markdown**, preserving:
6
+ - Headings
7
+ - Lists
8
+ - Tables
9
+ - Emphasis (bold, italic)
10
+ - Line breaks and whitespace if needed
11
+
12
+ ## Output Format
13
+
14
+ Please return only valid Markdown, and structure it clearly.
15
+
16
+ Use this format:
17
+
18
+ ```markdown
19
+ # [Main Title of Document or Topic]
20
+
21
+ ## Section 1 Title
22
+
23
+ - Bullet point 1
24
+ - Bullet point 2
25
+
26
+ ### Table
27
+
28
+ | Column A | Column B |
29
+ |----------|----------|
30
+ | Row 1A | Row 1B |
31
+ | Row 2A | Row 2B |
32
+
33
+ 3. The table data should be copied exactly as it appears without no exceptions
34
+
35
+ 4. Do not return anything except the markdown content
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: my-aws-helpers
3
- Version: 2.6.2
3
+ Version: 2.6.4
4
4
  Summary: AWS Helpers
5
5
  Home-page: https://github.com/JarrodMccarthy/aws_helpers.git
6
6
  Author: Jarrod McCarthy
@@ -1,3 +1,4 @@
1
+ MANIFEST.in
1
2
  README.md
2
3
  setup.cfg
3
4
  setup.py
@@ -18,4 +19,6 @@ my_aws_helpers.egg-info/requires.txt
18
19
  my_aws_helpers.egg-info/top_level.txt
19
20
  my_aws_helpers.egg-info/zip-safe
20
21
  my_aws_helpers/prompts/__init__.py
22
+ my_aws_helpers/prompts/json_system_prompt.txt
23
+ my_aws_helpers/prompts/markdown_system_prompt.txt
21
24
  tests/test_event.py
@@ -3,7 +3,7 @@ from setuptools import find_namespace_packages, setup
3
3
 
4
4
  base_path = os.path.abspath(os.path.dirname(__file__))
5
5
 
6
- version = "2.6.2"
6
+ version = "2.6.4"
7
7
 
8
8
  setup(
9
9
  name="my_aws_helpers",
File without changes
File without changes