documente_shared 0.1.33__tar.gz → 0.1.34__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of documente_shared might be problematic. Click here for more details.
- {documente_shared-0.1.33 → documente_shared-0.1.34}/PKG-INFO +2 -1
- {documente_shared-0.1.33 → documente_shared-0.1.34}/README.md +4 -4
- {documente_shared-0.1.33 → documente_shared-0.1.34}/documente_shared/application/digest.py +7 -7
- documente_shared-0.1.34/documente_shared/application/exceptions.py +23 -0
- {documente_shared-0.1.33 → documente_shared-0.1.34}/documente_shared/application/time_utils.py +9 -9
- {documente_shared-0.1.33 → documente_shared-0.1.34}/documente_shared/domain/base_enum.py +53 -53
- {documente_shared-0.1.33 → documente_shared-0.1.34}/documente_shared/domain/entities/document_process.py +199 -199
- {documente_shared-0.1.33 → documente_shared-0.1.34}/documente_shared/domain/entities/document_process_metadata.py +64 -64
- {documente_shared-0.1.33 → documente_shared-0.1.34}/documente_shared/domain/enums.py +22 -22
- {documente_shared-0.1.33 → documente_shared-0.1.34}/documente_shared/domain/repositories.py +18 -18
- {documente_shared-0.1.33 → documente_shared-0.1.34}/documente_shared/infrastructure/dynamo_repositories.py +23 -23
- {documente_shared-0.1.33 → documente_shared-0.1.34}/documente_shared/infrastructure/dynamo_table.py +75 -75
- {documente_shared-0.1.33 → documente_shared-0.1.34}/documente_shared/infrastructure/s3_bucket.py +57 -57
- {documente_shared-0.1.33 → documente_shared-0.1.34}/documente_shared/infrastructure/sqs_queue.py +47 -47
- {documente_shared-0.1.33 → documente_shared-0.1.34}/pyproject.toml +19 -18
- {documente_shared-0.1.33 → documente_shared-0.1.34}/documente_shared/__init__.py +0 -0
- {documente_shared-0.1.33 → documente_shared-0.1.34}/documente_shared/application/__init__.py +0 -0
- {documente_shared-0.1.33 → documente_shared-0.1.34}/documente_shared/domain/__init__.py +0 -0
- {documente_shared-0.1.33 → documente_shared-0.1.34}/documente_shared/domain/entities/__init__.py +0 -0
- {documente_shared-0.1.33 → documente_shared-0.1.34}/documente_shared/infrastructure/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: documente_shared
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.34
|
|
4
4
|
Summary: Shared utilities for Documente AI projects
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Tech
|
|
@@ -13,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.12
|
|
14
14
|
Requires-Dist: boto3 (>=1.34.102,<2.0.0)
|
|
15
15
|
Requires-Dist: botocore (>=1.34.102,<2.0.0)
|
|
16
|
+
Requires-Dist: sentry-sdk (>=1.0.0,<2.0.0)
|
|
16
17
|
Description-Content-Type: text/markdown
|
|
17
18
|
|
|
18
19
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
# Documente Shared
|
|
3
|
-
|
|
4
|
-
Utilidades para proyectos Documente AI
|
|
1
|
+
|
|
2
|
+
# Documente Shared
|
|
3
|
+
|
|
4
|
+
Utilidades para proyectos Documente AI
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import hashlib
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
def get_file_digest(file_bytes: bytes) -> str:
|
|
5
|
-
sha256_hash = hashlib.sha256()
|
|
6
|
-
sha256_hash.update(file_bytes)
|
|
7
|
-
return sha256_hash.hexdigest()
|
|
1
|
+
import hashlib
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def get_file_digest(file_bytes: bytes) -> str:
|
|
5
|
+
sha256_hash = hashlib.sha256()
|
|
6
|
+
sha256_hash.update(file_bytes)
|
|
7
|
+
return sha256_hash.hexdigest()
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import sentry_sdk
|
|
2
|
+
from functools import wraps
|
|
3
|
+
from typing import Callable, Any, TypeVar
|
|
4
|
+
|
|
5
|
+
F = TypeVar("F", bound=Callable[..., Any])
|
|
6
|
+
|
|
7
|
+
def initialize_sentry(dsn: str, environment: str = "dev") -> None:
|
|
8
|
+
if not sentry_sdk.Hub.current.client:
|
|
9
|
+
sentry_sdk.init(
|
|
10
|
+
dsn=dsn,
|
|
11
|
+
environment=environment,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
def track_exceptions(func: F) -> F:
|
|
15
|
+
@wraps(func)
|
|
16
|
+
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
17
|
+
try:
|
|
18
|
+
return func(*args, **kwargs)
|
|
19
|
+
except Exception as e:
|
|
20
|
+
sentry_sdk.capture_exception(e)
|
|
21
|
+
sentry_sdk.flush()
|
|
22
|
+
raise
|
|
23
|
+
return wrapper # type: ignore
|
{documente_shared-0.1.33 → documente_shared-0.1.34}/documente_shared/application/time_utils.py
RENAMED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
from datetime import datetime
|
|
2
|
-
from typing import Union
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
def get_datetime_from_data(input_datetime: Union[datetime, str]):
|
|
6
|
-
if isinstance(input_datetime, datetime):
|
|
7
|
-
return input_datetime
|
|
8
|
-
elif isinstance(input_datetime, str):
|
|
9
|
-
return datetime.fromisoformat(input_datetime)
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import Union
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def get_datetime_from_data(input_datetime: Union[datetime, str]):
|
|
6
|
+
if isinstance(input_datetime, datetime):
|
|
7
|
+
return input_datetime
|
|
8
|
+
elif isinstance(input_datetime, str):
|
|
9
|
+
return datetime.fromisoformat(input_datetime)
|
|
10
10
|
return None
|
|
@@ -1,54 +1,54 @@
|
|
|
1
|
-
from enum import Enum
|
|
2
|
-
from typing import Union, Optional
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class BaseEnum(Enum):
|
|
6
|
-
"""Provides the common functionalties to multiple model choices."""
|
|
7
|
-
|
|
8
|
-
@classmethod
|
|
9
|
-
def get_members(cls):
|
|
10
|
-
return [tag for tag in cls if type(tag.value) in [int, str, float]]
|
|
11
|
-
|
|
12
|
-
@classmethod
|
|
13
|
-
def choices(cls):
|
|
14
|
-
"""Generate choice options for models."""
|
|
15
|
-
return [
|
|
16
|
-
(option.value, option.value)
|
|
17
|
-
for option in cls
|
|
18
|
-
if type(option.value) in [int, str, float]
|
|
19
|
-
]
|
|
20
|
-
|
|
21
|
-
@classmethod
|
|
22
|
-
def values(cls):
|
|
23
|
-
"""Returns values from choices."""
|
|
24
|
-
return [option.value for option in cls]
|
|
25
|
-
|
|
26
|
-
def __str__(self): # noqa: D105
|
|
27
|
-
return str(self.value)
|
|
28
|
-
|
|
29
|
-
def __repr__(self):
|
|
30
|
-
return self.__str__()
|
|
31
|
-
|
|
32
|
-
def __hash__(self):
|
|
33
|
-
return hash(self.value)
|
|
34
|
-
|
|
35
|
-
@classmethod
|
|
36
|
-
def as_list(cls):
|
|
37
|
-
"""Returns properties as a list."""
|
|
38
|
-
return [
|
|
39
|
-
value
|
|
40
|
-
for key, value in cls.__dict__.items()
|
|
41
|
-
if isinstance(value, str) and not key.startswith('__')
|
|
42
|
-
]
|
|
43
|
-
|
|
44
|
-
@classmethod
|
|
45
|
-
def from_value(
|
|
46
|
-
cls,
|
|
47
|
-
value: Union[str, int],
|
|
48
|
-
) -> Optional['BaseEnum']:
|
|
49
|
-
for tag in cls:
|
|
50
|
-
if isinstance(tag.value, str) and str(tag.value).upper() == str(value).upper():
|
|
51
|
-
return tag
|
|
52
|
-
elif not isinstance(tag.value, str) and tag.value == value:
|
|
53
|
-
return tag
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from typing import Union, Optional
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class BaseEnum(Enum):
|
|
6
|
+
"""Provides the common functionalties to multiple model choices."""
|
|
7
|
+
|
|
8
|
+
@classmethod
|
|
9
|
+
def get_members(cls):
|
|
10
|
+
return [tag for tag in cls if type(tag.value) in [int, str, float]]
|
|
11
|
+
|
|
12
|
+
@classmethod
|
|
13
|
+
def choices(cls):
|
|
14
|
+
"""Generate choice options for models."""
|
|
15
|
+
return [
|
|
16
|
+
(option.value, option.value)
|
|
17
|
+
for option in cls
|
|
18
|
+
if type(option.value) in [int, str, float]
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
@classmethod
|
|
22
|
+
def values(cls):
|
|
23
|
+
"""Returns values from choices."""
|
|
24
|
+
return [option.value for option in cls]
|
|
25
|
+
|
|
26
|
+
def __str__(self): # noqa: D105
|
|
27
|
+
return str(self.value)
|
|
28
|
+
|
|
29
|
+
def __repr__(self):
|
|
30
|
+
return self.__str__()
|
|
31
|
+
|
|
32
|
+
def __hash__(self):
|
|
33
|
+
return hash(self.value)
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def as_list(cls):
|
|
37
|
+
"""Returns properties as a list."""
|
|
38
|
+
return [
|
|
39
|
+
value
|
|
40
|
+
for key, value in cls.__dict__.items()
|
|
41
|
+
if isinstance(value, str) and not key.startswith('__')
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def from_value(
|
|
46
|
+
cls,
|
|
47
|
+
value: Union[str, int],
|
|
48
|
+
) -> Optional['BaseEnum']:
|
|
49
|
+
for tag in cls:
|
|
50
|
+
if isinstance(tag.value, str) and str(tag.value).upper() == str(value).upper():
|
|
51
|
+
return tag
|
|
52
|
+
elif not isinstance(tag.value, str) and tag.value == value:
|
|
53
|
+
return tag
|
|
54
54
|
return None
|
|
@@ -1,199 +1,199 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
2
|
-
from datetime import datetime
|
|
3
|
-
from decimal import Decimal
|
|
4
|
-
from typing import Optional, List
|
|
5
|
-
|
|
6
|
-
from documente_shared.application.time_utils import get_datetime_from_data
|
|
7
|
-
from documente_shared.domain.entities.document_process_metadata import DocumentProcessMetadata
|
|
8
|
-
from documente_shared.domain.enums import (
|
|
9
|
-
DocumentProcessStatus,
|
|
10
|
-
DocumentProcessSubCategory,
|
|
11
|
-
DocumentProcessCategory,
|
|
12
|
-
)
|
|
13
|
-
|
|
14
|
-
def remove_slash_from_path(path: str) -> str:
|
|
15
|
-
if path and path.startswith('/'):
|
|
16
|
-
return path[1:]
|
|
17
|
-
return path
|
|
18
|
-
|
|
19
|
-
@dataclass
|
|
20
|
-
class DocumentProcess(object):
|
|
21
|
-
digest: str
|
|
22
|
-
status: DocumentProcessStatus
|
|
23
|
-
file_path: Optional[str] = None
|
|
24
|
-
file_bytes: Optional[bytes] = None
|
|
25
|
-
category: Optional[DocumentProcessCategory] = None
|
|
26
|
-
sub_category: Optional[DocumentProcessSubCategory] = None
|
|
27
|
-
processed_csv_path: Optional[str] = None
|
|
28
|
-
processed_csv_bytes: Optional[bytes] = None
|
|
29
|
-
processed_xlsx_path: Optional[str] = None
|
|
30
|
-
processed_xlsx_bytes: Optional[bytes] = None
|
|
31
|
-
processed_metadata_path: Optional[str] = None
|
|
32
|
-
processing_time: Optional[Decimal] = None
|
|
33
|
-
uploaded_at: Optional[datetime] = None
|
|
34
|
-
enqueued_at: Optional[datetime] = None
|
|
35
|
-
started_at: Optional[datetime] = None
|
|
36
|
-
failed_at: Optional[datetime] = None
|
|
37
|
-
completed_at: Optional[datetime] = None
|
|
38
|
-
metadata_items: Optional[List[DocumentProcessMetadata]] = None
|
|
39
|
-
|
|
40
|
-
def __post_init__(self):
|
|
41
|
-
self.metadata_items = self.metadata_items or []
|
|
42
|
-
|
|
43
|
-
@property
|
|
44
|
-
def is_pending(self) -> bool:
|
|
45
|
-
return self.status == DocumentProcessStatus.PENDING
|
|
46
|
-
|
|
47
|
-
@property
|
|
48
|
-
def is_enqueued(self) -> bool:
|
|
49
|
-
return self.status == DocumentProcessStatus.ENQUEUED
|
|
50
|
-
|
|
51
|
-
@property
|
|
52
|
-
def is_processing(self) -> bool:
|
|
53
|
-
return self.status == DocumentProcessStatus.PROCESSING
|
|
54
|
-
|
|
55
|
-
@property
|
|
56
|
-
def is_completed(self) -> bool:
|
|
57
|
-
return self.status == DocumentProcessStatus.COMPLETED
|
|
58
|
-
|
|
59
|
-
@property
|
|
60
|
-
def is_failed(self) -> bool:
|
|
61
|
-
return self.status == DocumentProcessStatus.FAILED
|
|
62
|
-
|
|
63
|
-
@property
|
|
64
|
-
def is_valid(self) -> bool:
|
|
65
|
-
return all([
|
|
66
|
-
self.digest,
|
|
67
|
-
self.status,
|
|
68
|
-
self.file_path,
|
|
69
|
-
])
|
|
70
|
-
|
|
71
|
-
def enqueue(self):
|
|
72
|
-
self.status = DocumentProcessStatus.ENQUEUED
|
|
73
|
-
self.enqueued_at = datetime.now()
|
|
74
|
-
|
|
75
|
-
def processing(self):
|
|
76
|
-
self.status = DocumentProcessStatus.PROCESSING
|
|
77
|
-
self.started_at = datetime.now()
|
|
78
|
-
|
|
79
|
-
def failed(self):
|
|
80
|
-
self.status = DocumentProcessStatus.FAILED
|
|
81
|
-
self.failed_at = datetime.now()
|
|
82
|
-
|
|
83
|
-
def completed(self):
|
|
84
|
-
self.status = DocumentProcessStatus.COMPLETED
|
|
85
|
-
self.completed_at = datetime.now()
|
|
86
|
-
|
|
87
|
-
def deleted(self):
|
|
88
|
-
self.status = DocumentProcessStatus.DELETED
|
|
89
|
-
|
|
90
|
-
@property
|
|
91
|
-
def file_key(self) -> str:
|
|
92
|
-
return remove_slash_from_path(self.file_path)
|
|
93
|
-
|
|
94
|
-
@property
|
|
95
|
-
def processed_csv_key(self) -> str:
|
|
96
|
-
return remove_slash_from_path(self.processed_csv_path)
|
|
97
|
-
|
|
98
|
-
@property
|
|
99
|
-
def processed_xlsx_key(self) -> str:
|
|
100
|
-
return remove_slash_from_path(self.processed_xlsx_path)
|
|
101
|
-
|
|
102
|
-
@property
|
|
103
|
-
def processed_metadata_key(self) -> str:
|
|
104
|
-
return remove_slash_from_path(self.processed_metadata_path)
|
|
105
|
-
|
|
106
|
-
@property
|
|
107
|
-
def extended_filename(self) -> str:
|
|
108
|
-
return self.file_path.split('/')[-1]
|
|
109
|
-
|
|
110
|
-
@property
|
|
111
|
-
def filename(self) -> str:
|
|
112
|
-
filename_with_extension = self.extended_filename
|
|
113
|
-
return filename_with_extension.split('.')[0]
|
|
114
|
-
|
|
115
|
-
@property
|
|
116
|
-
def to_dict(self) -> dict:
|
|
117
|
-
return {
|
|
118
|
-
'digest': self.digest,
|
|
119
|
-
'status': str(self.status),
|
|
120
|
-
'file_path': self.file_path,
|
|
121
|
-
'category': (
|
|
122
|
-
str(self.category)
|
|
123
|
-
if self.category else None
|
|
124
|
-
),
|
|
125
|
-
'sub_category': (
|
|
126
|
-
str(self.sub_category)
|
|
127
|
-
if self.sub_category else None
|
|
128
|
-
),
|
|
129
|
-
'processed_csv_path': self.processed_csv_path,
|
|
130
|
-
'processed_xlsx_path': self.processed_xlsx_path,
|
|
131
|
-
'processed_metadata_path': self.processed_metadata_path,
|
|
132
|
-
'processing_time': (
|
|
133
|
-
str(self.processing_time.quantize(Decimal('0.00001')))
|
|
134
|
-
if self.processing_time else None
|
|
135
|
-
),
|
|
136
|
-
'uploaded_at': self.uploaded_at.isoformat() if self.uploaded_at else None,
|
|
137
|
-
'enqueued_at': self.enqueued_at.isoformat() if self.enqueued_at else None,
|
|
138
|
-
'started_at': self.started_at.isoformat() if self.started_at else None,
|
|
139
|
-
'failed_at': self.failed_at.isoformat() if self.failed_at else None,
|
|
140
|
-
'completed_at': self.completed_at.isoformat() if self.completed_at else None,
|
|
141
|
-
'metadata_items': [metadata.to_dict for metadata in self.metadata_items],
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
@property
|
|
145
|
-
def to_simple_dict(self) -> dict:
|
|
146
|
-
simple_dict = self.to_dict.copy()
|
|
147
|
-
simple_dict.pop('metadata_items')
|
|
148
|
-
return simple_dict
|
|
149
|
-
|
|
150
|
-
def overload(
|
|
151
|
-
self,
|
|
152
|
-
new_instance: 'DocumentProcess',
|
|
153
|
-
properties: List[str] = None,
|
|
154
|
-
):
|
|
155
|
-
instance_properties = properties or [
|
|
156
|
-
'status', 'metadata', 'file_path', 'file_bytes', 'category', 'sub_category',
|
|
157
|
-
'processed_csv_path', 'processed_csv_bytes', 'processed_xlsx_path', 'processed_metadata_path',
|
|
158
|
-
'processed_xlsx_bytes', 'processing_time', 'uploaded_at',
|
|
159
|
-
'enqueued_at', 'started_at', 'failed_at', 'completed_at',
|
|
160
|
-
]
|
|
161
|
-
for _property in instance_properties:
|
|
162
|
-
property_value = getattr(new_instance, _property)
|
|
163
|
-
if not hasattr(self, _property):
|
|
164
|
-
continue
|
|
165
|
-
setattr(self, _property, property_value)
|
|
166
|
-
return self
|
|
167
|
-
|
|
168
|
-
@classmethod
|
|
169
|
-
def from_dict(cls, data: dict) -> 'DocumentProcess':
|
|
170
|
-
return cls(
|
|
171
|
-
digest=data.get('digest'),
|
|
172
|
-
status=DocumentProcessStatus.from_value(data.get('status')),
|
|
173
|
-
file_path=data.get('file_path'),
|
|
174
|
-
category=(
|
|
175
|
-
DocumentProcessCategory.from_value(data.get('category'))
|
|
176
|
-
if data.get('category') else None
|
|
177
|
-
),
|
|
178
|
-
sub_category=(
|
|
179
|
-
DocumentProcessSubCategory.from_value(data.get('sub_category'))
|
|
180
|
-
if data.get('sub_category') else None
|
|
181
|
-
),
|
|
182
|
-
processed_csv_path=data.get('processed_csv_path'),
|
|
183
|
-
processed_xlsx_path=data.get('processed_xlsx_path'),
|
|
184
|
-
processed_metadata_path=data.get('processed_metadata_path'),
|
|
185
|
-
processing_time=(
|
|
186
|
-
Decimal(data.get('processing_time'))
|
|
187
|
-
if data.get('processing_time') else None
|
|
188
|
-
),
|
|
189
|
-
uploaded_at=get_datetime_from_data(input_datetime=data.get('uploaded_at')),
|
|
190
|
-
enqueued_at=get_datetime_from_data(input_datetime=data.get('enqueued_at')),
|
|
191
|
-
started_at=get_datetime_from_data(input_datetime=data.get('started_at')),
|
|
192
|
-
failed_at=get_datetime_from_data(input_datetime=data.get('failed_at')),
|
|
193
|
-
completed_at=get_datetime_from_data(input_datetime=data.get('completed_at')),
|
|
194
|
-
metadata_items=[
|
|
195
|
-
DocumentProcessMetadata.from_dict(metadata)
|
|
196
|
-
for metadata in data.get('metadata_items', [])
|
|
197
|
-
],
|
|
198
|
-
)
|
|
199
|
-
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from decimal import Decimal
|
|
4
|
+
from typing import Optional, List
|
|
5
|
+
|
|
6
|
+
from documente_shared.application.time_utils import get_datetime_from_data
|
|
7
|
+
from documente_shared.domain.entities.document_process_metadata import DocumentProcessMetadata
|
|
8
|
+
from documente_shared.domain.enums import (
|
|
9
|
+
DocumentProcessStatus,
|
|
10
|
+
DocumentProcessSubCategory,
|
|
11
|
+
DocumentProcessCategory,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
def remove_slash_from_path(path: str) -> str:
|
|
15
|
+
if path and path.startswith('/'):
|
|
16
|
+
return path[1:]
|
|
17
|
+
return path
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class DocumentProcess(object):
|
|
21
|
+
digest: str
|
|
22
|
+
status: DocumentProcessStatus
|
|
23
|
+
file_path: Optional[str] = None
|
|
24
|
+
file_bytes: Optional[bytes] = None
|
|
25
|
+
category: Optional[DocumentProcessCategory] = None
|
|
26
|
+
sub_category: Optional[DocumentProcessSubCategory] = None
|
|
27
|
+
processed_csv_path: Optional[str] = None
|
|
28
|
+
processed_csv_bytes: Optional[bytes] = None
|
|
29
|
+
processed_xlsx_path: Optional[str] = None
|
|
30
|
+
processed_xlsx_bytes: Optional[bytes] = None
|
|
31
|
+
processed_metadata_path: Optional[str] = None
|
|
32
|
+
processing_time: Optional[Decimal] = None
|
|
33
|
+
uploaded_at: Optional[datetime] = None
|
|
34
|
+
enqueued_at: Optional[datetime] = None
|
|
35
|
+
started_at: Optional[datetime] = None
|
|
36
|
+
failed_at: Optional[datetime] = None
|
|
37
|
+
completed_at: Optional[datetime] = None
|
|
38
|
+
metadata_items: Optional[List[DocumentProcessMetadata]] = None
|
|
39
|
+
|
|
40
|
+
def __post_init__(self):
|
|
41
|
+
self.metadata_items = self.metadata_items or []
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def is_pending(self) -> bool:
|
|
45
|
+
return self.status == DocumentProcessStatus.PENDING
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def is_enqueued(self) -> bool:
|
|
49
|
+
return self.status == DocumentProcessStatus.ENQUEUED
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def is_processing(self) -> bool:
|
|
53
|
+
return self.status == DocumentProcessStatus.PROCESSING
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def is_completed(self) -> bool:
|
|
57
|
+
return self.status == DocumentProcessStatus.COMPLETED
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def is_failed(self) -> bool:
|
|
61
|
+
return self.status == DocumentProcessStatus.FAILED
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def is_valid(self) -> bool:
|
|
65
|
+
return all([
|
|
66
|
+
self.digest,
|
|
67
|
+
self.status,
|
|
68
|
+
self.file_path,
|
|
69
|
+
])
|
|
70
|
+
|
|
71
|
+
def enqueue(self):
|
|
72
|
+
self.status = DocumentProcessStatus.ENQUEUED
|
|
73
|
+
self.enqueued_at = datetime.now()
|
|
74
|
+
|
|
75
|
+
def processing(self):
|
|
76
|
+
self.status = DocumentProcessStatus.PROCESSING
|
|
77
|
+
self.started_at = datetime.now()
|
|
78
|
+
|
|
79
|
+
def failed(self):
|
|
80
|
+
self.status = DocumentProcessStatus.FAILED
|
|
81
|
+
self.failed_at = datetime.now()
|
|
82
|
+
|
|
83
|
+
def completed(self):
|
|
84
|
+
self.status = DocumentProcessStatus.COMPLETED
|
|
85
|
+
self.completed_at = datetime.now()
|
|
86
|
+
|
|
87
|
+
def deleted(self):
|
|
88
|
+
self.status = DocumentProcessStatus.DELETED
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def file_key(self) -> str:
|
|
92
|
+
return remove_slash_from_path(self.file_path)
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def processed_csv_key(self) -> str:
|
|
96
|
+
return remove_slash_from_path(self.processed_csv_path)
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
def processed_xlsx_key(self) -> str:
|
|
100
|
+
return remove_slash_from_path(self.processed_xlsx_path)
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def processed_metadata_key(self) -> str:
|
|
104
|
+
return remove_slash_from_path(self.processed_metadata_path)
|
|
105
|
+
|
|
106
|
+
@property
|
|
107
|
+
def extended_filename(self) -> str:
|
|
108
|
+
return self.file_path.split('/')[-1]
|
|
109
|
+
|
|
110
|
+
@property
|
|
111
|
+
def filename(self) -> str:
|
|
112
|
+
filename_with_extension = self.extended_filename
|
|
113
|
+
return filename_with_extension.split('.')[0]
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def to_dict(self) -> dict:
|
|
117
|
+
return {
|
|
118
|
+
'digest': self.digest,
|
|
119
|
+
'status': str(self.status),
|
|
120
|
+
'file_path': self.file_path,
|
|
121
|
+
'category': (
|
|
122
|
+
str(self.category)
|
|
123
|
+
if self.category else None
|
|
124
|
+
),
|
|
125
|
+
'sub_category': (
|
|
126
|
+
str(self.sub_category)
|
|
127
|
+
if self.sub_category else None
|
|
128
|
+
),
|
|
129
|
+
'processed_csv_path': self.processed_csv_path,
|
|
130
|
+
'processed_xlsx_path': self.processed_xlsx_path,
|
|
131
|
+
'processed_metadata_path': self.processed_metadata_path,
|
|
132
|
+
'processing_time': (
|
|
133
|
+
str(self.processing_time.quantize(Decimal('0.00001')))
|
|
134
|
+
if self.processing_time else None
|
|
135
|
+
),
|
|
136
|
+
'uploaded_at': self.uploaded_at.isoformat() if self.uploaded_at else None,
|
|
137
|
+
'enqueued_at': self.enqueued_at.isoformat() if self.enqueued_at else None,
|
|
138
|
+
'started_at': self.started_at.isoformat() if self.started_at else None,
|
|
139
|
+
'failed_at': self.failed_at.isoformat() if self.failed_at else None,
|
|
140
|
+
'completed_at': self.completed_at.isoformat() if self.completed_at else None,
|
|
141
|
+
'metadata_items': [metadata.to_dict for metadata in self.metadata_items],
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
@property
|
|
145
|
+
def to_simple_dict(self) -> dict:
|
|
146
|
+
simple_dict = self.to_dict.copy()
|
|
147
|
+
simple_dict.pop('metadata_items')
|
|
148
|
+
return simple_dict
|
|
149
|
+
|
|
150
|
+
def overload(
|
|
151
|
+
self,
|
|
152
|
+
new_instance: 'DocumentProcess',
|
|
153
|
+
properties: List[str] = None,
|
|
154
|
+
):
|
|
155
|
+
instance_properties = properties or [
|
|
156
|
+
'status', 'metadata', 'file_path', 'file_bytes', 'category', 'sub_category',
|
|
157
|
+
'processed_csv_path', 'processed_csv_bytes', 'processed_xlsx_path', 'processed_metadata_path',
|
|
158
|
+
'processed_xlsx_bytes', 'processing_time', 'uploaded_at',
|
|
159
|
+
'enqueued_at', 'started_at', 'failed_at', 'completed_at',
|
|
160
|
+
]
|
|
161
|
+
for _property in instance_properties:
|
|
162
|
+
property_value = getattr(new_instance, _property)
|
|
163
|
+
if not hasattr(self, _property):
|
|
164
|
+
continue
|
|
165
|
+
setattr(self, _property, property_value)
|
|
166
|
+
return self
|
|
167
|
+
|
|
168
|
+
@classmethod
|
|
169
|
+
def from_dict(cls, data: dict) -> 'DocumentProcess':
|
|
170
|
+
return cls(
|
|
171
|
+
digest=data.get('digest'),
|
|
172
|
+
status=DocumentProcessStatus.from_value(data.get('status')),
|
|
173
|
+
file_path=data.get('file_path'),
|
|
174
|
+
category=(
|
|
175
|
+
DocumentProcessCategory.from_value(data.get('category'))
|
|
176
|
+
if data.get('category') else None
|
|
177
|
+
),
|
|
178
|
+
sub_category=(
|
|
179
|
+
DocumentProcessSubCategory.from_value(data.get('sub_category'))
|
|
180
|
+
if data.get('sub_category') else None
|
|
181
|
+
),
|
|
182
|
+
processed_csv_path=data.get('processed_csv_path'),
|
|
183
|
+
processed_xlsx_path=data.get('processed_xlsx_path'),
|
|
184
|
+
processed_metadata_path=data.get('processed_metadata_path'),
|
|
185
|
+
processing_time=(
|
|
186
|
+
Decimal(data.get('processing_time'))
|
|
187
|
+
if data.get('processing_time') else None
|
|
188
|
+
),
|
|
189
|
+
uploaded_at=get_datetime_from_data(input_datetime=data.get('uploaded_at')),
|
|
190
|
+
enqueued_at=get_datetime_from_data(input_datetime=data.get('enqueued_at')),
|
|
191
|
+
started_at=get_datetime_from_data(input_datetime=data.get('started_at')),
|
|
192
|
+
failed_at=get_datetime_from_data(input_datetime=data.get('failed_at')),
|
|
193
|
+
completed_at=get_datetime_from_data(input_datetime=data.get('completed_at')),
|
|
194
|
+
metadata_items=[
|
|
195
|
+
DocumentProcessMetadata.from_dict(metadata)
|
|
196
|
+
for metadata in data.get('metadata_items', [])
|
|
197
|
+
],
|
|
198
|
+
)
|
|
199
|
+
|
|
@@ -1,65 +1,65 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
2
|
-
from datetime import datetime
|
|
3
|
-
from typing import Optional
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
@dataclass
|
|
7
|
-
class DocumentProcessMetadata(object):
|
|
8
|
-
publication_date: Optional[datetime] = None
|
|
9
|
-
num_circular: Optional[str] = None
|
|
10
|
-
asfi_identifier: Optional[str] = None
|
|
11
|
-
contains_tables: Optional[bool] = None
|
|
12
|
-
text_content: Optional[str] = None
|
|
13
|
-
case_name: Optional[str] = None
|
|
14
|
-
starting_office: Optional[str] = None
|
|
15
|
-
output_json: Optional[dict] = None
|
|
16
|
-
processing_time: Optional[float] = None
|
|
17
|
-
llm_model: Optional[str] = None
|
|
18
|
-
num_pages: Optional[float] = None
|
|
19
|
-
num_tokens: Optional[float] = None
|
|
20
|
-
citcular_type: Optional[str] = None
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
@property
|
|
24
|
-
def to_dict(self):
|
|
25
|
-
return {
|
|
26
|
-
'publication_date': (
|
|
27
|
-
self.publication_date.isoformat()
|
|
28
|
-
if self.publication_date
|
|
29
|
-
else None
|
|
30
|
-
),
|
|
31
|
-
'num_circular': self.num_circular,
|
|
32
|
-
'asfi_identifier': self.asfi_identifier,
|
|
33
|
-
'contains_tables': self.contains_tables,
|
|
34
|
-
'text_content': self.text_content,
|
|
35
|
-
'case_name': self.case_name,
|
|
36
|
-
'starting_office': self.starting_office,
|
|
37
|
-
'output_json': self.output_json,
|
|
38
|
-
'processing_time': self.processing_time,
|
|
39
|
-
'llm_model': self.llm_model,
|
|
40
|
-
'num_pages': self.num_pages,
|
|
41
|
-
'num_tokens': self.num_tokens,
|
|
42
|
-
'citcular_type': self.citcular_type
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
@classmethod
|
|
46
|
-
def from_dict(cls, data: dict):
|
|
47
|
-
return cls(
|
|
48
|
-
publication_date=(
|
|
49
|
-
datetime.fromisoformat(data.get('publication_date'))
|
|
50
|
-
if data.get('publication_date')
|
|
51
|
-
else None
|
|
52
|
-
),
|
|
53
|
-
num_circular=data.get('num_circular'),
|
|
54
|
-
asfi_identifier=data.get('asfi_identifier'),
|
|
55
|
-
contains_tables=data.get('contains_tables'),
|
|
56
|
-
text_content=data.get('text_content'),
|
|
57
|
-
case_name=data.get('case_name'),
|
|
58
|
-
starting_office=data.get('starting_office'),
|
|
59
|
-
output_json=data.get('output_json'),
|
|
60
|
-
processing_time=data.get('processing_time'),
|
|
61
|
-
llm_model=data.get('llm_model'),
|
|
62
|
-
num_pages=data.get('num_pages'),
|
|
63
|
-
num_tokens=data.get('num_tokens'),
|
|
64
|
-
citcular_type=data.get('citcular_type')
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class DocumentProcessMetadata(object):
|
|
8
|
+
publication_date: Optional[datetime] = None
|
|
9
|
+
num_circular: Optional[str] = None
|
|
10
|
+
asfi_identifier: Optional[str] = None
|
|
11
|
+
contains_tables: Optional[bool] = None
|
|
12
|
+
text_content: Optional[str] = None
|
|
13
|
+
case_name: Optional[str] = None
|
|
14
|
+
starting_office: Optional[str] = None
|
|
15
|
+
output_json: Optional[dict] = None
|
|
16
|
+
processing_time: Optional[float] = None
|
|
17
|
+
llm_model: Optional[str] = None
|
|
18
|
+
num_pages: Optional[float] = None
|
|
19
|
+
num_tokens: Optional[float] = None
|
|
20
|
+
citcular_type: Optional[str] = None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def to_dict(self):
|
|
25
|
+
return {
|
|
26
|
+
'publication_date': (
|
|
27
|
+
self.publication_date.isoformat()
|
|
28
|
+
if self.publication_date
|
|
29
|
+
else None
|
|
30
|
+
),
|
|
31
|
+
'num_circular': self.num_circular,
|
|
32
|
+
'asfi_identifier': self.asfi_identifier,
|
|
33
|
+
'contains_tables': self.contains_tables,
|
|
34
|
+
'text_content': self.text_content,
|
|
35
|
+
'case_name': self.case_name,
|
|
36
|
+
'starting_office': self.starting_office,
|
|
37
|
+
'output_json': self.output_json,
|
|
38
|
+
'processing_time': self.processing_time,
|
|
39
|
+
'llm_model': self.llm_model,
|
|
40
|
+
'num_pages': self.num_pages,
|
|
41
|
+
'num_tokens': self.num_tokens,
|
|
42
|
+
'citcular_type': self.citcular_type
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def from_dict(cls, data: dict):
|
|
47
|
+
return cls(
|
|
48
|
+
publication_date=(
|
|
49
|
+
datetime.fromisoformat(data.get('publication_date'))
|
|
50
|
+
if data.get('publication_date')
|
|
51
|
+
else None
|
|
52
|
+
),
|
|
53
|
+
num_circular=data.get('num_circular'),
|
|
54
|
+
asfi_identifier=data.get('asfi_identifier'),
|
|
55
|
+
contains_tables=data.get('contains_tables'),
|
|
56
|
+
text_content=data.get('text_content'),
|
|
57
|
+
case_name=data.get('case_name'),
|
|
58
|
+
starting_office=data.get('starting_office'),
|
|
59
|
+
output_json=data.get('output_json'),
|
|
60
|
+
processing_time=data.get('processing_time'),
|
|
61
|
+
llm_model=data.get('llm_model'),
|
|
62
|
+
num_pages=data.get('num_pages'),
|
|
63
|
+
num_tokens=data.get('num_tokens'),
|
|
64
|
+
citcular_type=data.get('citcular_type')
|
|
65
65
|
)
|
|
@@ -1,22 +1,22 @@
|
|
|
1
|
-
from documente_shared.domain.base_enum import BaseEnum
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class DocumentProcessStatus(BaseEnum):
|
|
5
|
-
PENDING = 'PENDING'
|
|
6
|
-
ENQUEUED = 'ENQUEUED'
|
|
7
|
-
PROCESSING = 'PROCESSING'
|
|
8
|
-
COMPLETED = 'COMPLETED'
|
|
9
|
-
FAILED = 'FAILED'
|
|
10
|
-
DELETED = 'DELETED'
|
|
11
|
-
CANCELLED = 'CANCELLED'
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class DocumentProcessCategory(BaseEnum):
|
|
15
|
-
CIRCULAR = 'CIRCULAR'
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class DocumentProcessSubCategory(BaseEnum):
|
|
19
|
-
CC_COMBINADA = 'CC_COMBINADA'
|
|
20
|
-
CC_NORMATIVA = 'CC_NORMATIVA'
|
|
21
|
-
|
|
22
|
-
|
|
1
|
+
from documente_shared.domain.base_enum import BaseEnum
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class DocumentProcessStatus(BaseEnum):
|
|
5
|
+
PENDING = 'PENDING'
|
|
6
|
+
ENQUEUED = 'ENQUEUED'
|
|
7
|
+
PROCESSING = 'PROCESSING'
|
|
8
|
+
COMPLETED = 'COMPLETED'
|
|
9
|
+
FAILED = 'FAILED'
|
|
10
|
+
DELETED = 'DELETED'
|
|
11
|
+
CANCELLED = 'CANCELLED'
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DocumentProcessCategory(BaseEnum):
|
|
15
|
+
CIRCULAR = 'CIRCULAR'
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class DocumentProcessSubCategory(BaseEnum):
|
|
19
|
+
CC_COMBINADA = 'CC_COMBINADA'
|
|
20
|
+
CC_NORMATIVA = 'CC_NORMATIVA'
|
|
21
|
+
|
|
22
|
+
|
|
@@ -1,19 +1,19 @@
|
|
|
1
|
-
from abc import ABC, abstractmethod
|
|
2
|
-
from typing import Optional
|
|
3
|
-
|
|
4
|
-
from documente_shared.domain.entities.document_process import DocumentProcess
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class DocumentProcessRepository(ABC):
|
|
8
|
-
|
|
9
|
-
@abstractmethod
|
|
10
|
-
def find(self, digest: str) ->Optional[DocumentProcess]:
|
|
11
|
-
raise NotImplementedError
|
|
12
|
-
|
|
13
|
-
@abstractmethod
|
|
14
|
-
def persist(self, instance: DocumentProcess) -> DocumentProcess:
|
|
15
|
-
raise NotImplementedError
|
|
16
|
-
|
|
17
|
-
@abstractmethod
|
|
18
|
-
def remove(self, instance: DocumentProcess):
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from documente_shared.domain.entities.document_process import DocumentProcess
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DocumentProcessRepository(ABC):
|
|
8
|
+
|
|
9
|
+
@abstractmethod
|
|
10
|
+
def find(self, digest: str) ->Optional[DocumentProcess]:
|
|
11
|
+
raise NotImplementedError
|
|
12
|
+
|
|
13
|
+
@abstractmethod
|
|
14
|
+
def persist(self, instance: DocumentProcess) -> DocumentProcess:
|
|
15
|
+
raise NotImplementedError
|
|
16
|
+
|
|
17
|
+
@abstractmethod
|
|
18
|
+
def remove(self, instance: DocumentProcess):
|
|
19
19
|
raise NotImplementedError
|
|
@@ -1,24 +1,24 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
|
|
3
|
-
from documente_shared.domain.entities.document_process import DocumentProcess
|
|
4
|
-
from documente_shared.domain.repositories import DocumentProcessRepository
|
|
5
|
-
from documente_shared.infrastructure.dynamo_table import DynamoDBTable
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class DynamoDocumentProcessRepository(
|
|
10
|
-
DynamoDBTable,
|
|
11
|
-
DocumentProcessRepository,
|
|
12
|
-
):
|
|
13
|
-
def find(self, digest: str) -> Optional[DocumentProcess]:
|
|
14
|
-
item = self.get(key={'digest': digest})
|
|
15
|
-
if item:
|
|
16
|
-
return DocumentProcess.from_dict(item)
|
|
17
|
-
return None
|
|
18
|
-
|
|
19
|
-
def persist(self, instance: DocumentProcess) -> DocumentProcess:
|
|
20
|
-
self.put(instance.to_simple_dict)
|
|
21
|
-
return instance
|
|
22
|
-
|
|
23
|
-
def remove(self, instance: DocumentProcess):
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from documente_shared.domain.entities.document_process import DocumentProcess
|
|
4
|
+
from documente_shared.domain.repositories import DocumentProcessRepository
|
|
5
|
+
from documente_shared.infrastructure.dynamo_table import DynamoDBTable
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class DynamoDocumentProcessRepository(
|
|
10
|
+
DynamoDBTable,
|
|
11
|
+
DocumentProcessRepository,
|
|
12
|
+
):
|
|
13
|
+
def find(self, digest: str) -> Optional[DocumentProcess]:
|
|
14
|
+
item = self.get(key={'digest': digest})
|
|
15
|
+
if item:
|
|
16
|
+
return DocumentProcess.from_dict(item)
|
|
17
|
+
return None
|
|
18
|
+
|
|
19
|
+
def persist(self, instance: DocumentProcess) -> DocumentProcess:
|
|
20
|
+
self.put(instance.to_simple_dict)
|
|
21
|
+
return instance
|
|
22
|
+
|
|
23
|
+
def remove(self, instance: DocumentProcess):
|
|
24
24
|
self.delete(key={'digest': instance.digest})
|
{documente_shared-0.1.33 → documente_shared-0.1.34}/documente_shared/infrastructure/dynamo_table.py
RENAMED
|
@@ -1,75 +1,75 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
2
|
-
|
|
3
|
-
import boto3
|
|
4
|
-
from boto3.dynamodb.conditions import Key
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
RETURN_VALUES = 'UPDATED_NEW'
|
|
8
|
-
|
|
9
|
-
@dataclass
|
|
10
|
-
class DynamoDBTable(object):
|
|
11
|
-
table_name: str
|
|
12
|
-
|
|
13
|
-
def __post_init__(self):
|
|
14
|
-
self._table = boto3.resource('dynamodb').Table(self.table_name)
|
|
15
|
-
|
|
16
|
-
def get(self, key: dict):
|
|
17
|
-
return self._table.get_item(Key=key).get('Item')
|
|
18
|
-
|
|
19
|
-
def get_all(self):
|
|
20
|
-
return self._table.scan().get('Items')
|
|
21
|
-
|
|
22
|
-
def upsert(self, key, attributes):
|
|
23
|
-
return self.put({**key, **attributes})
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def filter_by(self, attribute, target_value):
|
|
27
|
-
return self._table(
|
|
28
|
-
FilterExpression=Key(attribute).eq(target_value),
|
|
29
|
-
).get('Items')
|
|
30
|
-
|
|
31
|
-
def put(self, attributes: dict, condition: dict = None):
|
|
32
|
-
extra_args = {}
|
|
33
|
-
if condition:
|
|
34
|
-
extra_args['ConditionExpression'] = condition
|
|
35
|
-
return self._table.put_item(Item=attributes, **extra_args)
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def update(self, key: str, attributes: dict):
|
|
39
|
-
return self._table.update_item(
|
|
40
|
-
Key=key,
|
|
41
|
-
UpdateExpression=self._update_expression(attributes),
|
|
42
|
-
ExpressionAttributeNames=self._expression_attribute_names(attributes),
|
|
43
|
-
ExpressionAttributeValues=self._expression_attribute_values(attributes),
|
|
44
|
-
ReturnValues=RETURN_VALUES,
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
def delete(self, key: dict):
|
|
48
|
-
return self._table.delete_item(Key=key)
|
|
49
|
-
|
|
50
|
-
def count(self) -> int:
|
|
51
|
-
return self._table.item_count
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
@classmethod
|
|
55
|
-
def _update_expression(cls, attributes):
|
|
56
|
-
return 'SET {param}'.format(
|
|
57
|
-
param=','.join(
|
|
58
|
-
'#{key}=:{key}'.format(
|
|
59
|
-
key=key,
|
|
60
|
-
)
|
|
61
|
-
for key in attributes
|
|
62
|
-
),
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
@classmethod
|
|
66
|
-
def _expression_attribute_names(cls, attributes):
|
|
67
|
-
return {
|
|
68
|
-
'#{key}'.format(key=key): key for key in attributes
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
@classmethod
|
|
72
|
-
def _expression_attribute_values(cls, attributes):
|
|
73
|
-
return {
|
|
74
|
-
':{key}'.format(key=key): attr for key, attr in attributes.items()
|
|
75
|
-
}
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
import boto3
|
|
4
|
+
from boto3.dynamodb.conditions import Key
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
RETURN_VALUES = 'UPDATED_NEW'
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class DynamoDBTable(object):
|
|
11
|
+
table_name: str
|
|
12
|
+
|
|
13
|
+
def __post_init__(self):
|
|
14
|
+
self._table = boto3.resource('dynamodb').Table(self.table_name)
|
|
15
|
+
|
|
16
|
+
def get(self, key: dict):
|
|
17
|
+
return self._table.get_item(Key=key).get('Item')
|
|
18
|
+
|
|
19
|
+
def get_all(self):
|
|
20
|
+
return self._table.scan().get('Items')
|
|
21
|
+
|
|
22
|
+
def upsert(self, key, attributes):
|
|
23
|
+
return self.put({**key, **attributes})
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def filter_by(self, attribute, target_value):
|
|
27
|
+
return self._table(
|
|
28
|
+
FilterExpression=Key(attribute).eq(target_value),
|
|
29
|
+
).get('Items')
|
|
30
|
+
|
|
31
|
+
def put(self, attributes: dict, condition: dict = None):
|
|
32
|
+
extra_args = {}
|
|
33
|
+
if condition:
|
|
34
|
+
extra_args['ConditionExpression'] = condition
|
|
35
|
+
return self._table.put_item(Item=attributes, **extra_args)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def update(self, key: str, attributes: dict):
|
|
39
|
+
return self._table.update_item(
|
|
40
|
+
Key=key,
|
|
41
|
+
UpdateExpression=self._update_expression(attributes),
|
|
42
|
+
ExpressionAttributeNames=self._expression_attribute_names(attributes),
|
|
43
|
+
ExpressionAttributeValues=self._expression_attribute_values(attributes),
|
|
44
|
+
ReturnValues=RETURN_VALUES,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
def delete(self, key: dict):
|
|
48
|
+
return self._table.delete_item(Key=key)
|
|
49
|
+
|
|
50
|
+
def count(self) -> int:
|
|
51
|
+
return self._table.item_count
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@classmethod
|
|
55
|
+
def _update_expression(cls, attributes):
|
|
56
|
+
return 'SET {param}'.format(
|
|
57
|
+
param=','.join(
|
|
58
|
+
'#{key}=:{key}'.format(
|
|
59
|
+
key=key,
|
|
60
|
+
)
|
|
61
|
+
for key in attributes
|
|
62
|
+
),
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def _expression_attribute_names(cls, attributes):
|
|
67
|
+
return {
|
|
68
|
+
'#{key}'.format(key=key): key for key in attributes
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def _expression_attribute_values(cls, attributes):
|
|
73
|
+
return {
|
|
74
|
+
':{key}'.format(key=key): attr for key, attr in attributes.items()
|
|
75
|
+
}
|
{documente_shared-0.1.33 → documente_shared-0.1.34}/documente_shared/infrastructure/s3_bucket.py
RENAMED
|
@@ -1,57 +1,57 @@
|
|
|
1
|
-
import boto3
|
|
2
|
-
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
from typing import Optional
|
|
5
|
-
|
|
6
|
-
from documente_shared.domain.entities.document_process import remove_slash_from_path
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def remove_none_values(data: dict) -> dict: # noqa: WPS110
|
|
10
|
-
return {key: value for key, value in data.items() if value is not None} # noqa: WPS110
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
@dataclass
|
|
14
|
-
class S3Bucket(object):
|
|
15
|
-
bucket_name: str
|
|
16
|
-
|
|
17
|
-
def __post_init__(self):
|
|
18
|
-
self._resource = boto3.resource('s3')
|
|
19
|
-
|
|
20
|
-
def get(self, file_key: str) -> Optional[dict]:
|
|
21
|
-
try:
|
|
22
|
-
return self._resource.Object(self.bucket_name, file_key).get()
|
|
23
|
-
except self._resource.meta.client.exceptions.NoSuchKey:
|
|
24
|
-
return None
|
|
25
|
-
|
|
26
|
-
def get_bytes(self, file_key: str) -> Optional[bytes]:
|
|
27
|
-
cleaned_file_key = remove_slash_from_path(file_key)
|
|
28
|
-
file_context = self.get(cleaned_file_key)
|
|
29
|
-
if not file_context:
|
|
30
|
-
return None
|
|
31
|
-
return (
|
|
32
|
-
file_context['Body'].read()
|
|
33
|
-
if 'Body' in file_context
|
|
34
|
-
else None
|
|
35
|
-
)
|
|
36
|
-
|
|
37
|
-
def upload(self, file_key: str, file_content, content_type: Optional[str] = None):
|
|
38
|
-
cleaned_file_key = remove_slash_from_path(file_key)
|
|
39
|
-
optional_params = {'ContentType': content_type}
|
|
40
|
-
return self._resource.Object(self.bucket_name, cleaned_file_key).put(
|
|
41
|
-
Body=file_content,
|
|
42
|
-
**remove_none_values(optional_params),
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
def delete(self, file_key: str):
|
|
46
|
-
cleaned_file_key = remove_slash_from_path(file_key)
|
|
47
|
-
return self._resource.Object(self.bucket_name, cleaned_file_key).delete()
|
|
48
|
-
|
|
49
|
-
def get_url(self, file_key: str):
|
|
50
|
-
cleaned_file_key = remove_slash_from_path(file_key)
|
|
51
|
-
return 'https://{bucket_url}.s3.amazonaws.com/{file_key}'.format(
|
|
52
|
-
bucket_url=self.bucket_name,
|
|
53
|
-
file_key=cleaned_file_key,
|
|
54
|
-
)
|
|
55
|
-
|
|
56
|
-
def read(self, file_key: str) -> bytes:
|
|
57
|
-
return self.get(file_key)['Body'].read()
|
|
1
|
+
import boto3
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from documente_shared.domain.entities.document_process import remove_slash_from_path
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def remove_none_values(data: dict) -> dict: # noqa: WPS110
|
|
10
|
+
return {key: value for key, value in data.items() if value is not None} # noqa: WPS110
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class S3Bucket(object):
|
|
15
|
+
bucket_name: str
|
|
16
|
+
|
|
17
|
+
def __post_init__(self):
|
|
18
|
+
self._resource = boto3.resource('s3')
|
|
19
|
+
|
|
20
|
+
def get(self, file_key: str) -> Optional[dict]:
|
|
21
|
+
try:
|
|
22
|
+
return self._resource.Object(self.bucket_name, file_key).get()
|
|
23
|
+
except self._resource.meta.client.exceptions.NoSuchKey:
|
|
24
|
+
return None
|
|
25
|
+
|
|
26
|
+
def get_bytes(self, file_key: str) -> Optional[bytes]:
|
|
27
|
+
cleaned_file_key = remove_slash_from_path(file_key)
|
|
28
|
+
file_context = self.get(cleaned_file_key)
|
|
29
|
+
if not file_context:
|
|
30
|
+
return None
|
|
31
|
+
return (
|
|
32
|
+
file_context['Body'].read()
|
|
33
|
+
if 'Body' in file_context
|
|
34
|
+
else None
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
def upload(self, file_key: str, file_content, content_type: Optional[str] = None):
|
|
38
|
+
cleaned_file_key = remove_slash_from_path(file_key)
|
|
39
|
+
optional_params = {'ContentType': content_type}
|
|
40
|
+
return self._resource.Object(self.bucket_name, cleaned_file_key).put(
|
|
41
|
+
Body=file_content,
|
|
42
|
+
**remove_none_values(optional_params),
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
def delete(self, file_key: str):
|
|
46
|
+
cleaned_file_key = remove_slash_from_path(file_key)
|
|
47
|
+
return self._resource.Object(self.bucket_name, cleaned_file_key).delete()
|
|
48
|
+
|
|
49
|
+
def get_url(self, file_key: str):
|
|
50
|
+
cleaned_file_key = remove_slash_from_path(file_key)
|
|
51
|
+
return 'https://{bucket_url}.s3.amazonaws.com/{file_key}'.format(
|
|
52
|
+
bucket_url=self.bucket_name,
|
|
53
|
+
file_key=cleaned_file_key,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
def read(self, file_key: str) -> bytes:
|
|
57
|
+
return self.get(file_key)['Body'].read()
|
{documente_shared-0.1.33 → documente_shared-0.1.34}/documente_shared/infrastructure/sqs_queue.py
RENAMED
|
@@ -1,48 +1,48 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import boto3
|
|
3
|
-
|
|
4
|
-
from dataclasses import dataclass
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
@dataclass
|
|
8
|
-
class SQSQueue(object):
|
|
9
|
-
queue_url: str
|
|
10
|
-
visibility_timeout: int = 60 * 10
|
|
11
|
-
waiting_timeout: int = 20
|
|
12
|
-
|
|
13
|
-
def __post_init__(self):
|
|
14
|
-
self._client = boto3.client('sqs')
|
|
15
|
-
|
|
16
|
-
def send_message(
|
|
17
|
-
self,
|
|
18
|
-
payload: dict,
|
|
19
|
-
message_attributes: dict = None,
|
|
20
|
-
delay_seconds: dict = None,
|
|
21
|
-
message_group_id: dict = None,
|
|
22
|
-
message_deduplication_id: dict =None,
|
|
23
|
-
):
|
|
24
|
-
message_params = {
|
|
25
|
-
'QueueUrl': self.queue_url,
|
|
26
|
-
'MessageBody': json.dumps(payload),
|
|
27
|
-
'MessageAttributes': message_attributes,
|
|
28
|
-
'DelaySeconds': delay_seconds,
|
|
29
|
-
'MessageGroupId': message_group_id,
|
|
30
|
-
'MessageDeduplicationId': message_deduplication_id,
|
|
31
|
-
}
|
|
32
|
-
clean_params = {key: value for key, value in message_params.items() if value}
|
|
33
|
-
return self._client.send_message(**clean_params)
|
|
34
|
-
|
|
35
|
-
def delete_message(self, receipt_handle: str):
|
|
36
|
-
return self._client.delete_message(
|
|
37
|
-
QueueUrl=self.queue_url,
|
|
38
|
-
ReceiptHandle=receipt_handle
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
def fetch_messages(self, num_messages: int = 1) -> list[dict]:
|
|
42
|
-
response = self._client.receive_message(
|
|
43
|
-
QueueUrl=self.queue_url,
|
|
44
|
-
MaxNumberOfMessages=num_messages,
|
|
45
|
-
VisibilityTimeout=self.visibility_timeout,
|
|
46
|
-
WaitTimeSeconds=self.waiting_timeout,
|
|
47
|
-
)
|
|
1
|
+
import json
|
|
2
|
+
import boto3
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class SQSQueue(object):
|
|
9
|
+
queue_url: str
|
|
10
|
+
visibility_timeout: int = 60 * 10
|
|
11
|
+
waiting_timeout: int = 20
|
|
12
|
+
|
|
13
|
+
def __post_init__(self):
|
|
14
|
+
self._client = boto3.client('sqs')
|
|
15
|
+
|
|
16
|
+
def send_message(
|
|
17
|
+
self,
|
|
18
|
+
payload: dict,
|
|
19
|
+
message_attributes: dict = None,
|
|
20
|
+
delay_seconds: dict = None,
|
|
21
|
+
message_group_id: dict = None,
|
|
22
|
+
message_deduplication_id: dict =None,
|
|
23
|
+
):
|
|
24
|
+
message_params = {
|
|
25
|
+
'QueueUrl': self.queue_url,
|
|
26
|
+
'MessageBody': json.dumps(payload),
|
|
27
|
+
'MessageAttributes': message_attributes,
|
|
28
|
+
'DelaySeconds': delay_seconds,
|
|
29
|
+
'MessageGroupId': message_group_id,
|
|
30
|
+
'MessageDeduplicationId': message_deduplication_id,
|
|
31
|
+
}
|
|
32
|
+
clean_params = {key: value for key, value in message_params.items() if value}
|
|
33
|
+
return self._client.send_message(**clean_params)
|
|
34
|
+
|
|
35
|
+
def delete_message(self, receipt_handle: str):
|
|
36
|
+
return self._client.delete_message(
|
|
37
|
+
QueueUrl=self.queue_url,
|
|
38
|
+
ReceiptHandle=receipt_handle
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
def fetch_messages(self, num_messages: int = 1) -> list[dict]:
|
|
42
|
+
response = self._client.receive_message(
|
|
43
|
+
QueueUrl=self.queue_url,
|
|
44
|
+
MaxNumberOfMessages=num_messages,
|
|
45
|
+
VisibilityTimeout=self.visibility_timeout,
|
|
46
|
+
WaitTimeSeconds=self.waiting_timeout,
|
|
47
|
+
)
|
|
48
48
|
return response.get('Messages', [])
|
|
@@ -1,18 +1,19 @@
|
|
|
1
|
-
[tool.poetry]
|
|
2
|
-
name = "documente_shared"
|
|
3
|
-
version = "0.1.
|
|
4
|
-
description = "Shared utilities for Documente AI projects"
|
|
5
|
-
authors = ["Tech <tech@llamitai.com>"]
|
|
6
|
-
license = "MIT"
|
|
7
|
-
readme = "README.md"
|
|
8
|
-
|
|
9
|
-
[tool.poetry.dependencies]
|
|
10
|
-
python = ">=3.10,<3.13"
|
|
11
|
-
boto3 = "^1.34.102"
|
|
12
|
-
botocore = "^1.34.102"
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "documente_shared"
|
|
3
|
+
version = "0.1.34"
|
|
4
|
+
description = "Shared utilities for Documente AI projects"
|
|
5
|
+
authors = ["Tech <tech@llamitai.com>"]
|
|
6
|
+
license = "MIT"
|
|
7
|
+
readme = "README.md"
|
|
8
|
+
|
|
9
|
+
[tool.poetry.dependencies]
|
|
10
|
+
python = ">=3.10,<3.13"
|
|
11
|
+
boto3 = "^1.34.102"
|
|
12
|
+
botocore = "^1.34.102"
|
|
13
|
+
sentry-sdk = "^1.0.0"
|
|
14
|
+
|
|
15
|
+
[tool.poetry.dev-dependencies]
|
|
16
|
+
pytest = "^6.0"
|
|
17
|
+
[tool.poetry.group.package.dependencies]
|
|
18
|
+
ipdb = "^0.13.13"
|
|
19
|
+
|
|
File without changes
|
{documente_shared-0.1.33 → documente_shared-0.1.34}/documente_shared/application/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{documente_shared-0.1.33 → documente_shared-0.1.34}/documente_shared/domain/entities/__init__.py
RENAMED
|
File without changes
|
{documente_shared-0.1.33 → documente_shared-0.1.34}/documente_shared/infrastructure/__init__.py
RENAMED
|
File without changes
|