PyPI - my-aws-helpers - Versions diffs - 4.3.0__tar.gz → 6.0.5__tar.gz - Mend

my-aws-helpers 4.3.0tar.gz → 6.0.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: my_aws_helpers
-Version: 4.3.0
+Version: 6.0.5
 Summary: AWS Helpers
 Home-page: https://github.com/JarrodMccarthy/aws_helpers.git
 Author: Jarrod McCarthy

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/my_aws_helpers/bedrock.py RENAMED Viewed

@@ -5,11 +5,26 @@ import json
 import time
 import os
 import io
+import re
+from copy import copy
 from typing import Optional, List, Dict
 from enum import Enum
 import pymupdf
 import concurrent.futures
 from dataclasses import dataclass
+from my_aws_helpers.s3 import S3Serialiser, BaseS3Object, BaseS3Queries, S3, S3Location
+from my_aws_helpers.logging import select_powertools_logger
+logger = select_powertools_logger("aws-helpers-bedrock")
+class ImageType(str, Enum):
+    gif = "gif"
+    jpeg = "jpeg"
+    png = "png"
+    webp = "webp"
 class PromptType(str, Enum):
@@ -35,7 +50,7 @@ class TokenUsage:
 @dataclass
-class OCRResult:
+class OCRResult(BaseS3Object):
     content: List[Dict[str, str]]
     token_usage: TokenUsage
     page_number: int
@@ -48,11 +63,69 @@ class OCRResult:
             page_number=data.get("page_number", 0),
         )
+    @classmethod
+    def from_s3_representation(cls, obj: dict) -> OCRResult:
+        obj["token_usage"] = (TokenUsage.from_dict(obj.get("token_usage", {})),)
+        return cls(**obj)
+    def to_s3_representation(self) -> dict:
+        obj = copy(vars(self))
+        obj["token_usage"] = S3Serialiser.object_serialiser(
+            obj=vars(obj["token_usage"])
+        )
+        return S3Serialiser.object_serialiser(obj=obj)
+    def get_save_location(self, bucket_name: str) -> S3Location:
+        pass
+class OCRResultQueries(BaseS3Queries):
+    def __init__(self, s3_client: S3, bucket_name: str):
+        super().__init__(s3_client=s3_client, bucket_name=bucket_name)
+    def save_ocr_result_to_s3(
+        self, ocr_result: OCRResult, save_location: S3Location
+    ) -> Optional[S3Location]:
+        try:
+            obj = ocr_result.to_s3_representation()
+            return self.s3_client.save_dict_to_s3(
+                content=obj,
+                s3_location=save_location,
+            )
+        except Exception as e:
+            logger.exception(f"Failed to save ocr result to s3 due to {e}")
+        return None
+    def _concurrent_s3_read(
+        self, locations: List[S3Location], max_workers: int = 10
+    ) -> List[OCRResult]:
+        results: List[OCRResult] = list()
+        futures = list()
+        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+            for loc in locations:
+                future = executor.submit(
+                    self.s3_client.read_dict_from_s3,
+                    s3_location=loc,
+                )
+                futures.append(future)
+        for f in futures:
+            results.append(f.result())
+        results = [r for r in results if r is not None]
+        return results
+    def get_ocr_results_by_key_prefix(self, prefix: str) -> List[OCRResult]:
+        locations = self.s3_client.list_objects_by_prefix(
+            bucket_name=self.bucket_name, prefix=prefix
+        )
+        objects = self._concurrent_s3_read(locations=locations)
+        ocr_results = [OCRResult.from_s3_representation(obj=obj) for obj in objects]
+        return ocr_results
 class Bedrock:
     def __init__(
         self,
-        model_id: str = "apac.anthropic.claude-3-5-sonnet-20241022-v2:0",
+        model_id: str = "apac.anthropic.claude-3-5-sonnet-20241022-v2:0",  # anthropic.claude-sonnet-4-20250514-v1:0
         logger=None,
         sleep_time: float = 1.0,
     ):
@@ -92,6 +165,19 @@ class Bedrock:
             print(e)
             return None
+    @staticmethod
+    def extract_json_from_markdown(text: str):
+        """
+        Extracts the JSON object from a string that may be wrapped in ```json ... ``` code block
+        """
+        # Match a {...} block anywhere in the text
+        match = re.search(r"\{.*\}", text, re.DOTALL)
+        if match:
+            json_str = match.group(0)
+            return json.loads(json_str)
+        else:
+            raise ValueError("No JSON object found in the text")
     def _get_prompt(self, prompt_type: str) -> Optional[str]:
         if prompt_type not in list(PromptType):
             raise Exception(f"Error: Invalid prompt type")
@@ -237,3 +323,40 @@ class Bedrock:
         except Exception as e:
             self.logger.exception(e)
             return []
+    def _get_image_block(self, image: bytes, image_content_type: ImageType) -> dict:
+        return {
+            "image": {
+                "format": image_content_type,
+                "source": {
+                    "bytes": image,
+                },
+            }
+        }
+    def image_analysis(
+        self, images: List[bytes], prompt: str, image_content_type: ImageType
+    ) -> OCRResult:
+        system_prompt = [{"text": prompt}]
+        message = [
+            {
+                "role": "user",
+                "content": [
+                    self._get_image_block(
+                        image=image, image_content_type=image_content_type
+                    )
+                    for image in images
+                ],
+            }
+        ]
+        response = self.client.converse(
+            modelId=self.model_id, messages=message, system=system_prompt
+        )
+        result = {}
+        result["content"] = Bedrock.extract_json_from_markdown(
+            text=response["output"]["message"]["content"][0]["text"]
+        )
+        result["token_usage"] = response["usage"]
+        return OCRResult.from_dict(data=result)

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/my_aws_helpers/cognito.py RENAMED Viewed

@@ -69,6 +69,23 @@ class Cognito:
             logger.exception(f"Failed to sign up due to {e}")
             return None
+    def confirm_sign_up(
+        self,
+        username: str,
+        client_id: str,
+        confirmation_code: str,
+    ) -> dict:
+        try:
+            response = self.client.confirm_sign_up(
+                ClientId=client_id, Username=username, ConfirmationCode=confirmation_code,
+            )
+            return response
+        except Exception as e:
+            logger.exception(
+                f"Failed to confirm sign up username {username} due to {e}"
+            )
+            return None
     def admin_confirm_sign_up(
         self,
         username: str,

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/my_aws_helpers/dynamo.py RENAMED Viewed

@@ -113,7 +113,7 @@ class Dynamo:
         return self.table.get_item(Item=item)
     def delete_item(self, item: dict):
-        return self.table.delete_item(Item=item)
+        return self.table.delete_item(Key=item)
     def batch_put(self, items: List[dict]) -> None:
         with self.table.batch_writer() as batch:

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/my_aws_helpers/s3.py RENAMED Viewed

@@ -1,7 +1,10 @@
+from __future__ import annotations
 import boto3
 import io
 import json
-from typing import Optional, Any, Dict
+from concurrent.futures import Future, ThreadPoolExecutor
+from abc import ABC, abstractmethod
+from typing import Optional, Any, Dict, List
 from datetime import datetime, date
 from copy import copy
 import os
@@ -19,6 +22,26 @@ class ContentType(str, Enum):
     json_content = "application/json"
     pdf_content = "application/pdf"
     jpeg_content = "image/jpeg"
+    png_content = "image/png"
+    @staticmethod
+    def get_content_type_from_file_name(file_name: str) -> Optional[ContentType]:
+        file_extension = file_name.split('.')[-1]
+        if file_extension == "xml":
+            return ContentType.xml_content
+        if file_extension == "txt":
+            return ContentType.plain_text
+        if file_extension == "json":
+            return ContentType.json_content
+        if file_extension == "pdf":
+            return ContentType.pdf_content
+        if file_extension == "jpg":
+            return ContentType.jpeg_content
+        if file_extension == "png":
+            return ContentType.png_content
+        return None
 class ContentEncoding(str, Enum):
@@ -31,16 +54,19 @@ class S3Serialiser:
     def _serialise(obj: Any):
         if isinstance(obj, datetime) or isinstance(obj, date):
             return obj.isoformat()
+        if isinstance(obj, S3Location):
+            return obj.location
         return obj
     @staticmethod
-    def object_serialiser(obj: Any):
+    def object_serialiser(obj: Dict):
         if isinstance(obj, list):
             return [S3Serialiser.object_serialiser(obj=obj) for obj in obj]
         if isinstance(obj, dict):
             return {k: S3Serialiser.object_serialiser(v) for k, v in obj.items()}
         return S3Serialiser._serialise(obj=obj)
 class S3Location:
     bucket: str
     file_name: str
@@ -94,10 +120,63 @@ class S3:
             Body=json.dumps(object), Bucket=bucket_name, Key=file_name
         )
+    def list_objects_by_prefix(self, bucket_name: str, prefix: str) -> List[S3Location]:
+        """
+        list objects by prefix gets 1000 items at a time, if theres more, I want em
+        """
+        objects = list()
+        try:
+            continuation_token = None
+            while True:
+                if continuation_token:
+                    response = self.client.list_objects_v2(
+                        Bucket=bucket_name,
+                        Prefix=prefix,
+                        ContinuationToken=continuation_token,
+                    )
+                else:
+                    response = self.client.list_objects_v2(
+                        Bucket=bucket_name, Prefix=prefix
+                    )
+                # Append current batch
+                objects.extend(response.get("Contents", []))
+                # Check if more results exist
+                if response.get("IsTruncated"):  # True if more pages available
+                    continuation_token = response["NextContinuationToken"]
+                else:
+                    break
+            locations = [
+                S3Location(bucket=bucket_name, file_name=c["Key"]) for c in objects
+            ]
+            return locations
+        except Exception as e:
+            logger.exception(
+                f"Failed to get objects from s3: {bucket_name}/{prefix} due to {e}"
+            )
+            return []
     def get_object(self, bucket_name: str, file_name: str):
         response = self.client.get_object(Bucket=bucket_name, Key=file_name)
         return self._streaming_body_to_dict(response["Body"])
+    def put_presigned_url(
+        self,
+        s3_location: S3Location,
+        expires_in: int = 3600,
+        content_type: str = ContentType.pdf_content.value
+    ) -> str:
+        return self.client.generate_presigned_url(
+            "put_object",
+            Params={
+                "Bucket": s3_location.bucket,
+                "Key": s3_location.file_name,
+                "ContentType": content_type,
+            },
+            ExpiresIn=expires_in,
+        )
     def get_presigned_url(
         self,
         bucket_name: str,
@@ -247,7 +326,59 @@ class S3:
             )
         except Exception as e:
             logger.exception(f"Failed to save jpeg to s3 due to {e}")
-            return None
+            return None
     def read_dict_from_s3(self, s3_location: S3Location) -> dict:
-        return json.loads(self.read_binary_from_s3(s3_location=s3_location).decode("utf-8"))
+        return json.loads(
+            self.read_binary_from_s3(s3_location=s3_location).decode("utf-8")
+        )
+class BaseS3Object(ABC):
+    def to_s3_representation(self) -> dict:
+        obj = copy(vars(self))
+        return S3Serialiser.object_serialiser(obj=obj)
+    @classmethod
+    def from_s3_representation(cls, obj: dict) -> BaseS3Object:
+        return cls(**obj)
+    @abstractmethod
+    def get_save_location(self, bucket_name: str) -> S3Location:
+        pass
+class BaseS3Queries:
+    s3_client: S3
+    bucket_name: str
+    def __init__(self, s3_client: S3, bucket_name: str):
+        self.s3_client = s3_client
+        self.bucket_name = bucket_name
+    def _concurrent_s3_dict_read(
+        self, locations: List[S3Location], max_workers: int = 10
+    ) -> List[BaseS3Object]:
+        results: List[BaseS3Object] = list()
+        futures: List[Future] = list()
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            for location in locations:
+                future = executor.submit(
+                    self.s3_client.read_dict_from_s3,
+                    s3_location=location,
+                )
+                futures.append(future)
+        for f in futures:
+            results.append(f.result())
+        results = [r for r in results if r is not None]
+        return results
+    def save_s3_object_to_s3(self, object: BaseS3Object) -> Optional[S3Location]:
+        try:
+            obj = object.to_s3_representation()
+            return self.s3_client.save_dict_to_s3(
+                content=obj, s3_location=object.get_save_location()
+            )
+        except Exception as e:
+            logger.exception(f"Failed to save s3 object to s3 due to {e}")
+        return None

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/my_aws_helpers.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: my-aws-helpers
-Version: 4.3.0
+Version: 6.0.5
 Summary: AWS Helpers
 Home-page: https://github.com/JarrodMccarthy/aws_helpers.git
 Author: Jarrod McCarthy

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/my_aws_helpers.egg-info/requires.txt RENAMED Viewed

@@ -1,15 +1,15 @@
-boto3==1.34.36
+boto3
 python-jose==3.5.0
 [all]
 PyMuPDF==1.26.0
 black<26.0.0,<=25.1.0
-boto3==1.34.36
+boto3
 coverage==7.3.2
 pytest==7.4.3
 python-jose==3.5.0
 [bedrock]
 PyMuPDF==1.26.0
-boto3==1.34.36
+boto3
 python-jose==3.5.0

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/setup.py RENAMED Viewed

@@ -3,10 +3,10 @@ from setuptools import find_namespace_packages, setup
 base_path = os.path.abspath(os.path.dirname(__file__))
-version = "4.3.0"
+version = "6.0.5"
 core = [
-    "boto3==1.34.36",
+    "boto3",
     "python-jose==3.5.0",
 ]
 dev = [

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/MANIFEST.in RENAMED Viewed

File without changes

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/README.md RENAMED Viewed

File without changes

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/my_aws_helpers/api.py RENAMED Viewed

File without changes

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/my_aws_helpers/auth.py RENAMED Viewed

File without changes

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/my_aws_helpers/errors.py RENAMED Viewed

File without changes

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/my_aws_helpers/event.py RENAMED Viewed

File without changes

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/my_aws_helpers/logging.py RENAMED Viewed

File without changes

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/my_aws_helpers/prompts/__init__.py RENAMED Viewed

File without changes

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/my_aws_helpers/prompts/markdown_system_prompt.txt RENAMED Viewed

File without changes

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/my_aws_helpers/prompts/transactions_headers_prompt.txt RENAMED Viewed

File without changes

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/my_aws_helpers/prompts/transactions_headers_prompt_v2.txt RENAMED Viewed

File without changes

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/my_aws_helpers/prompts/transactions_prompt.txt RENAMED Viewed

File without changes

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/my_aws_helpers/sfn.py RENAMED Viewed

File without changes

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/my_aws_helpers.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/my_aws_helpers.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/my_aws_helpers.egg-info/top_level.txt RENAMED Viewed

File without changes

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/my_aws_helpers.egg-info/zip-safe RENAMED Viewed

File without changes

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/setup.cfg RENAMED Viewed

File without changes

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/tests/test_cognito.py RENAMED Viewed

File without changes

{my_aws_helpers-4.3.0 → my_aws_helpers-6.0.5}/tests/test_event.py RENAMED Viewed

File without changes

my-aws-helpers 4.3.0__tar.gz → 6.0.5__tar.gz

my-aws-helpers 4.3.0tar.gz → 6.0.5tar.gz