my-aws-helpers 5.1.0__tar.gz → 5.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of my-aws-helpers might be problematic. Click here for more details.
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/PKG-INFO +1 -1
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/my_aws_helpers/bedrock.py +62 -2
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/my_aws_helpers.egg-info/PKG-INFO +1 -1
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/setup.py +1 -1
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/MANIFEST.in +0 -0
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/README.md +0 -0
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/my_aws_helpers/api.py +0 -0
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/my_aws_helpers/auth.py +0 -0
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/my_aws_helpers/cognito.py +0 -0
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/my_aws_helpers/dynamo.py +0 -0
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/my_aws_helpers/errors.py +0 -0
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/my_aws_helpers/event.py +0 -0
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/my_aws_helpers/logging.py +0 -0
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/my_aws_helpers/prompts/__init__.py +0 -0
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/my_aws_helpers/prompts/markdown_system_prompt.txt +0 -0
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/my_aws_helpers/prompts/transactions_headers_prompt.txt +0 -0
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/my_aws_helpers/prompts/transactions_headers_prompt_v2.txt +0 -0
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/my_aws_helpers/prompts/transactions_prompt.txt +0 -0
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/my_aws_helpers/s3.py +0 -0
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/my_aws_helpers/sfn.py +0 -0
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/my_aws_helpers.egg-info/SOURCES.txt +0 -0
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/my_aws_helpers.egg-info/dependency_links.txt +0 -0
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/my_aws_helpers.egg-info/requires.txt +0 -0
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/my_aws_helpers.egg-info/top_level.txt +0 -0
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/my_aws_helpers.egg-info/zip-safe +0 -0
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/setup.cfg +0 -0
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/tests/test_cognito.py +0 -0
- {my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/tests/test_event.py +0 -0
|
@@ -5,12 +5,18 @@ import json
|
|
|
5
5
|
import time
|
|
6
6
|
import os
|
|
7
7
|
import io
|
|
8
|
-
import
|
|
8
|
+
from copy import copy
|
|
9
9
|
from typing import Optional, List, Dict
|
|
10
10
|
from enum import Enum
|
|
11
11
|
import pymupdf
|
|
12
12
|
import concurrent.futures
|
|
13
13
|
from dataclasses import dataclass
|
|
14
|
+
from my_aws_helpers.s3 import S3Serialiser, BaseS3Object, BaseS3Queries, S3, S3Location
|
|
15
|
+
|
|
16
|
+
from my_aws_helpers.logging import select_powertools_logger
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
logger = select_powertools_logger("aws-helpers-bedrock")
|
|
14
20
|
|
|
15
21
|
|
|
16
22
|
class ImageType(str, Enum):
|
|
@@ -43,7 +49,7 @@ class TokenUsage:
|
|
|
43
49
|
|
|
44
50
|
|
|
45
51
|
@dataclass
|
|
46
|
-
class OCRResult:
|
|
52
|
+
class OCRResult(BaseS3Object):
|
|
47
53
|
content: List[Dict[str, str]]
|
|
48
54
|
token_usage: TokenUsage
|
|
49
55
|
page_number: int
|
|
@@ -56,6 +62,60 @@ class OCRResult:
|
|
|
56
62
|
page_number=data.get("page_number", 0),
|
|
57
63
|
)
|
|
58
64
|
|
|
65
|
+
@classmethod
|
|
66
|
+
def from_s3_representation(cls, obj: dict) -> OCRResult:
|
|
67
|
+
return cls(**obj)
|
|
68
|
+
|
|
69
|
+
def to_s3_representation(self) -> dict:
|
|
70
|
+
obj = copy(vars(self))
|
|
71
|
+
return S3Serialiser.object_serialiser(obj=obj)
|
|
72
|
+
|
|
73
|
+
def get_save_location(self, bucket_name: str) -> S3Location:
|
|
74
|
+
pass
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class OCRResultQueries(BaseS3Queries):
|
|
78
|
+
def __init__(self, s3_client: S3, bucket_name: str):
|
|
79
|
+
super().__init__(s3_client=s3_client, bucket_name=bucket_name)
|
|
80
|
+
|
|
81
|
+
def save_ocr_result_to_s3(
|
|
82
|
+
self, ocr_result: OCRResult, save_location: S3Location
|
|
83
|
+
) -> Optional[S3Location]:
|
|
84
|
+
try:
|
|
85
|
+
obj = ocr_result.to_s3_representation()
|
|
86
|
+
return self.s3_client.save_dict_to_s3(
|
|
87
|
+
content=obj,
|
|
88
|
+
s3_location=save_location,
|
|
89
|
+
)
|
|
90
|
+
except Exception as e:
|
|
91
|
+
logger.exception(f"Failed to save ocr result to s3 due to {e}")
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
def _concurrent_s3_read(
|
|
95
|
+
self, locations: List[S3Location], max_workers: int = 10
|
|
96
|
+
) -> List[OCRResult]:
|
|
97
|
+
results: List[OCRResult] = list()
|
|
98
|
+
futures = list()
|
|
99
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
100
|
+
for loc in locations:
|
|
101
|
+
future = executor.submit(
|
|
102
|
+
self.s3_client.read_dict_from_s3,
|
|
103
|
+
s3_location=loc,
|
|
104
|
+
)
|
|
105
|
+
futures.append(future)
|
|
106
|
+
for f in futures:
|
|
107
|
+
results.append(f.result())
|
|
108
|
+
results = [r for r in results if r is not None]
|
|
109
|
+
return results
|
|
110
|
+
|
|
111
|
+
def get_listing_by_key_prefix(self, prefix: str) -> List[OCRResult]:
|
|
112
|
+
locations = self.s3_client.list_objects_by_prefix(
|
|
113
|
+
bucket_name=self.bucket_name, prefix=prefix
|
|
114
|
+
)
|
|
115
|
+
objects = self._concurrent_s3_read(listing_locations=locations)
|
|
116
|
+
ocr_results = [OCRResult.from_s3_representation(obj=obj) for obj in objects]
|
|
117
|
+
return ocr_results
|
|
118
|
+
|
|
59
119
|
|
|
60
120
|
class Bedrock:
|
|
61
121
|
def __init__(
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/my_aws_helpers/prompts/markdown_system_prompt.txt
RENAMED
|
File without changes
|
{my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/my_aws_helpers/prompts/transactions_headers_prompt.txt
RENAMED
|
File without changes
|
|
File without changes
|
{my_aws_helpers-5.1.0 → my_aws_helpers-5.3.0}/my_aws_helpers/prompts/transactions_prompt.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|