documente_shared 0.1.71__py3-none-any.whl → 0.1.72b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of documente_shared might be problematic. Click here for more details.
- documente_shared/__init__.py +0 -0
- documente_shared/application/__init__.py +0 -0
- documente_shared/application/digest.py +7 -7
- documente_shared/application/exceptions.py +23 -23
- documente_shared/application/files.py +22 -22
- documente_shared/application/time_utils.py +13 -13
- documente_shared/application/timezone.py +7 -7
- documente_shared/domain/__init__.py +0 -0
- documente_shared/domain/base_enum.py +53 -53
- documente_shared/domain/constants.py +2 -2
- documente_shared/domain/entities/__init__.py +0 -0
- documente_shared/domain/entities/document.py +348 -348
- documente_shared/domain/entities/document_metadata.py +63 -63
- documente_shared/domain/entities/in_memory_result.py +51 -51
- documente_shared/domain/entities/processing_case.py +144 -139
- documente_shared/domain/entities/processing_case_item.py +216 -210
- documente_shared/domain/entities/processing_event.py +49 -49
- documente_shared/domain/enums/__init__.py +0 -0
- documente_shared/domain/enums/common.py +95 -95
- documente_shared/domain/enums/document.py +71 -71
- documente_shared/domain/enums/processing_case.py +54 -54
- documente_shared/domain/repositories/__init__.py +0 -0
- documente_shared/domain/repositories/document.py +24 -24
- documente_shared/domain/repositories/processing_case.py +24 -24
- documente_shared/domain/repositories/processing_case_item.py +29 -29
- documente_shared/infrastructure/__init__.py +0 -0
- documente_shared/infrastructure/documente_client.py +21 -0
- documente_shared/infrastructure/dynamo_table.py +75 -75
- documente_shared/infrastructure/repositories/__init__.py +0 -0
- documente_shared/infrastructure/repositories/dynamo_document.py +43 -43
- documente_shared/infrastructure/repositories/dynamo_processing_case.py +43 -43
- documente_shared/infrastructure/repositories/dynamo_processing_case_item.py +53 -53
- documente_shared/infrastructure/repositories/http_document_processing.py +41 -0
- documente_shared/infrastructure/repositories/http_processing_case.py +41 -0
- documente_shared/infrastructure/repositories/http_processing_case_item.py +53 -0
- documente_shared/infrastructure/s3_bucket.py +57 -57
- documente_shared/infrastructure/sqs_queue.py +47 -47
- {documente_shared-0.1.71.dist-info → documente_shared-0.1.72b0.dist-info}/METADATA +2 -1
- documente_shared-0.1.72b0.dist-info/RECORD +40 -0
- documente_shared-0.1.71.dist-info/RECORD +0 -36
- {documente_shared-0.1.71.dist-info → documente_shared-0.1.72b0.dist-info}/WHEEL +0 -0
|
@@ -1,348 +1,348 @@
|
|
|
1
|
-
import json
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
from datetime import datetime, tzinfo
|
|
4
|
-
from decimal import Decimal
|
|
5
|
-
from typing import Optional, List
|
|
6
|
-
|
|
7
|
-
from documente_shared.application.files import remove_slash_from_path, get_filename_from_path
|
|
8
|
-
from documente_shared.application.time_utils import get_datetime_from_data
|
|
9
|
-
from documente_shared.domain.constants import la_paz_tz
|
|
10
|
-
from documente_shared.domain.entities.document_metadata import DocumentProcessingMetadata
|
|
11
|
-
from documente_shared.domain.enums.document import (
|
|
12
|
-
DocumentProcessingStatus,
|
|
13
|
-
DocumentProcessingCategory,
|
|
14
|
-
DocumentProcessingSubCategory,
|
|
15
|
-
DocumentProcessingSource,
|
|
16
|
-
)
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
@dataclass
|
|
20
|
-
class DocumentProcessing(object):
|
|
21
|
-
digest: str
|
|
22
|
-
status: DocumentProcessingStatus
|
|
23
|
-
file_path: Optional[str] = None
|
|
24
|
-
file_bytes: Optional[bytes] = None
|
|
25
|
-
category: Optional[DocumentProcessingCategory] = None
|
|
26
|
-
sub_category: Optional[DocumentProcessingSubCategory] = None
|
|
27
|
-
uploaded_from: Optional[DocumentProcessingSource] = None
|
|
28
|
-
processed_csv_path: Optional[str] = None
|
|
29
|
-
processed_csv_bytes: Optional[bytes] = None
|
|
30
|
-
processed_xlsx_path: Optional[str] = None
|
|
31
|
-
processed_xlsx_bytes: Optional[bytes] = None
|
|
32
|
-
processed_json_path: Optional[str] = None
|
|
33
|
-
processed_json_bytes: Optional[bytes] = None
|
|
34
|
-
processed_metadata_path: Optional[str] = None
|
|
35
|
-
processing_time: Optional[Decimal] = None
|
|
36
|
-
processing_accuracy: Optional[Decimal] = None
|
|
37
|
-
issued_at: Optional[datetime] = None
|
|
38
|
-
uploaded_at: Optional[datetime] = None
|
|
39
|
-
enqueued_at: Optional[datetime] = None
|
|
40
|
-
started_at: Optional[datetime] = None
|
|
41
|
-
failed_at: Optional[datetime] = None
|
|
42
|
-
failed_reason: Optional[str] = None
|
|
43
|
-
feedback: Optional[list | dict] = None
|
|
44
|
-
completed_at: Optional[datetime] = None
|
|
45
|
-
metadata: Optional[dict] = None
|
|
46
|
-
metadata_items: Optional[List[DocumentProcessingMetadata]] = None
|
|
47
|
-
|
|
48
|
-
def __post_init__(self):
|
|
49
|
-
self.metadata_items = self.metadata_items or []
|
|
50
|
-
|
|
51
|
-
@property
|
|
52
|
-
def is_pending(self) -> bool:
|
|
53
|
-
return self.status == DocumentProcessingStatus.PENDING
|
|
54
|
-
|
|
55
|
-
@property
|
|
56
|
-
def is_enqueued(self) -> bool:
|
|
57
|
-
return self.status == DocumentProcessingStatus.ENQUEUED
|
|
58
|
-
|
|
59
|
-
@property
|
|
60
|
-
def is_processing(self) -> bool:
|
|
61
|
-
return self.status == DocumentProcessingStatus.PROCESSING
|
|
62
|
-
|
|
63
|
-
@property
|
|
64
|
-
def is_completed(self) -> bool:
|
|
65
|
-
return self.status == DocumentProcessingStatus.COMPLETED
|
|
66
|
-
|
|
67
|
-
@property
|
|
68
|
-
def is_incomplete(self) -> bool:
|
|
69
|
-
return self.status == DocumentProcessingStatus.INCOMPLETE
|
|
70
|
-
|
|
71
|
-
@property
|
|
72
|
-
def is_failed(self) -> bool:
|
|
73
|
-
return self.status == DocumentProcessingStatus.FAILED
|
|
74
|
-
|
|
75
|
-
@property
|
|
76
|
-
def is_inreview(self) -> bool:
|
|
77
|
-
return self.status == DocumentProcessingStatus.IN_REVIEW
|
|
78
|
-
|
|
79
|
-
@property
|
|
80
|
-
def is_valid(self) -> bool:
|
|
81
|
-
return all([
|
|
82
|
-
self.digest,
|
|
83
|
-
self.status,
|
|
84
|
-
self.file_path,
|
|
85
|
-
])
|
|
86
|
-
|
|
87
|
-
@property
|
|
88
|
-
def is_finished(self) -> bool:
|
|
89
|
-
return self.status in [
|
|
90
|
-
DocumentProcessingStatus.COMPLETED,
|
|
91
|
-
DocumentProcessingStatus.FAILED,
|
|
92
|
-
]
|
|
93
|
-
|
|
94
|
-
@property
|
|
95
|
-
def file_key(self) -> str:
|
|
96
|
-
return remove_slash_from_path(self.file_path)
|
|
97
|
-
|
|
98
|
-
@property
|
|
99
|
-
def processed_csv_key(self) -> str:
|
|
100
|
-
return remove_slash_from_path(self.processed_csv_path)
|
|
101
|
-
|
|
102
|
-
@property
|
|
103
|
-
def processed_xlsx_key(self) -> str:
|
|
104
|
-
return remove_slash_from_path(self.processed_xlsx_path)
|
|
105
|
-
|
|
106
|
-
@property
|
|
107
|
-
def processed_json_key(self) -> str:
|
|
108
|
-
return remove_slash_from_path(self.processed_json_path)
|
|
109
|
-
|
|
110
|
-
@property
|
|
111
|
-
def processed_csv_filename(self) -> str:
|
|
112
|
-
return get_filename_from_path(self.processed_csv_path)
|
|
113
|
-
|
|
114
|
-
@property
|
|
115
|
-
def processed_xlsx_filename(self) -> str:
|
|
116
|
-
return get_filename_from_path(self.processed_xlsx_path)
|
|
117
|
-
|
|
118
|
-
@property
|
|
119
|
-
def processed_json_filename(self) -> str:
|
|
120
|
-
return get_filename_from_path(self.processed_json_path)
|
|
121
|
-
|
|
122
|
-
@property
|
|
123
|
-
def processed_metadata_key(self) -> str:
|
|
124
|
-
return remove_slash_from_path(self.processed_metadata_path)
|
|
125
|
-
|
|
126
|
-
@property
|
|
127
|
-
def extended_filename(self) -> str:
|
|
128
|
-
return self.file_path.split('/')[-1]
|
|
129
|
-
|
|
130
|
-
@property
|
|
131
|
-
def filename(self) -> str:
|
|
132
|
-
filename_with_extension = self.extended_filename
|
|
133
|
-
return filename_with_extension.split('.')[0]
|
|
134
|
-
|
|
135
|
-
@property
|
|
136
|
-
def metadata_items_bytes(self) -> bytes:
|
|
137
|
-
metadata_items = [
|
|
138
|
-
metadata_item.to_dict
|
|
139
|
-
for metadata_item in self.metadata_items
|
|
140
|
-
]
|
|
141
|
-
return json.dumps(metadata_items).encode('utf-8')
|
|
142
|
-
|
|
143
|
-
@property
|
|
144
|
-
def has_original_file(self) -> bool:
|
|
145
|
-
return bool(self.file_path) and self.file_bytes
|
|
146
|
-
|
|
147
|
-
@property
|
|
148
|
-
def has_processed_csv(self) -> bool:
|
|
149
|
-
return bool(self.processed_csv_path) and self.processed_csv_bytes
|
|
150
|
-
|
|
151
|
-
@property
|
|
152
|
-
def has_processed_xlsx(self) -> bool:
|
|
153
|
-
return bool(self.processed_xlsx_path) and self.processed_xlsx_bytes
|
|
154
|
-
|
|
155
|
-
@property
|
|
156
|
-
def has_processed_json(self) -> bool:
|
|
157
|
-
return bool(self.processed_json_path) and self.processed_json_bytes
|
|
158
|
-
|
|
159
|
-
@property
|
|
160
|
-
def has_processed_metadata(self) -> bool:
|
|
161
|
-
return bool(self.processed_metadata_path) and self.metadata_items
|
|
162
|
-
|
|
163
|
-
def pending(self, timezone: tzinfo = la_paz_tz):
|
|
164
|
-
self.status = DocumentProcessingStatus.PENDING
|
|
165
|
-
self.started_at = None
|
|
166
|
-
self.uploaded_at = datetime.now(tz=timezone)
|
|
167
|
-
|
|
168
|
-
def enqueue(self, timezone: tzinfo = la_paz_tz):
|
|
169
|
-
self.status = DocumentProcessingStatus.ENQUEUED
|
|
170
|
-
self.enqueued_at = datetime.now(tz=timezone)
|
|
171
|
-
|
|
172
|
-
def processing(self, timezone: tzinfo = la_paz_tz):
|
|
173
|
-
self.status = DocumentProcessingStatus.PROCESSING
|
|
174
|
-
self.started_at = datetime.now(tz=timezone)
|
|
175
|
-
|
|
176
|
-
def failed(
|
|
177
|
-
self,
|
|
178
|
-
error_message: Optional[str] = None,
|
|
179
|
-
timezone: tzinfo = la_paz_tz,
|
|
180
|
-
):
|
|
181
|
-
self.failed_reason = error_message
|
|
182
|
-
self.status = DocumentProcessingStatus.FAILED
|
|
183
|
-
self.failed_at = datetime.now(tz=timezone)
|
|
184
|
-
|
|
185
|
-
def completed(self, timezone: tzinfo = la_paz_tz):
|
|
186
|
-
self.status = DocumentProcessingStatus.COMPLETED
|
|
187
|
-
self.completed_at = datetime.now(tz=timezone)
|
|
188
|
-
self.failed_reason = None
|
|
189
|
-
|
|
190
|
-
def incomplete(self, timezone: tzinfo = la_paz_tz):
|
|
191
|
-
self.status = DocumentProcessingStatus.INCOMPLETE
|
|
192
|
-
self.completed_at = datetime.now(tz=timezone)
|
|
193
|
-
|
|
194
|
-
def deleted(self):
|
|
195
|
-
self.status = DocumentProcessingStatus.DELETED
|
|
196
|
-
|
|
197
|
-
def in_review(self):
|
|
198
|
-
self.status = DocumentProcessingStatus.IN_REVIEW
|
|
199
|
-
|
|
200
|
-
def __eq__(self, other: 'DocumentProcessing') -> bool:
|
|
201
|
-
if not other:
|
|
202
|
-
return False
|
|
203
|
-
|
|
204
|
-
return (
|
|
205
|
-
self.digest == other.digest
|
|
206
|
-
and self.status == other.status
|
|
207
|
-
and self.file_path == other.file_path
|
|
208
|
-
and self.issued_at == other.issued_at
|
|
209
|
-
and self.uploaded_at == other.uploaded_at
|
|
210
|
-
and self.enqueued_at == other.enqueued_at
|
|
211
|
-
and self.started_at == other.started_at
|
|
212
|
-
and self.failed_at == other.failed_at
|
|
213
|
-
and self.completed_at == other.completed_at
|
|
214
|
-
)
|
|
215
|
-
|
|
216
|
-
@property
|
|
217
|
-
def to_dict(self) -> dict:
|
|
218
|
-
return {
|
|
219
|
-
'digest': self.digest,
|
|
220
|
-
'status': str(self.status),
|
|
221
|
-
'file_path': self.file_path,
|
|
222
|
-
'category': (
|
|
223
|
-
str(self.category)
|
|
224
|
-
if self.category else None
|
|
225
|
-
),
|
|
226
|
-
'sub_category': (
|
|
227
|
-
str(self.sub_category)
|
|
228
|
-
if self.sub_category else None
|
|
229
|
-
),
|
|
230
|
-
'uploaded_from': (
|
|
231
|
-
str(self.uploaded_from)
|
|
232
|
-
if self.uploaded_from else None
|
|
233
|
-
),
|
|
234
|
-
'processed_csv_path': self.processed_csv_path,
|
|
235
|
-
'processed_xlsx_path': self.processed_xlsx_path,
|
|
236
|
-
'processed_json_path': self.processed_json_path,
|
|
237
|
-
'processed_metadata_path': self.processed_metadata_path,
|
|
238
|
-
'processing_time': (
|
|
239
|
-
str(self.processing_time.quantize(Decimal('0.00001')))
|
|
240
|
-
if self.processing_time else None
|
|
241
|
-
),
|
|
242
|
-
'processing_accuracy': (
|
|
243
|
-
str(self.processing_accuracy.quantize(Decimal('0.00001')))
|
|
244
|
-
if self.processing_accuracy else None
|
|
245
|
-
),
|
|
246
|
-
'issued_at': self.issued_at.isoformat() if self.issued_at else None,
|
|
247
|
-
'uploaded_at': self.uploaded_at.isoformat() if self.uploaded_at else None,
|
|
248
|
-
'enqueued_at': self.enqueued_at.isoformat() if self.enqueued_at else None,
|
|
249
|
-
'started_at': self.started_at.isoformat() if self.started_at else None,
|
|
250
|
-
'failed_at': self.failed_at.isoformat() if self.failed_at else None,
|
|
251
|
-
'failed_reason': self.failed_reason,
|
|
252
|
-
'feedback': self.feedback,
|
|
253
|
-
'metadata': self.metadata,
|
|
254
|
-
'completed_at': self.completed_at.isoformat() if self.completed_at else None,
|
|
255
|
-
'metadata_items': [metadata.to_dict for metadata in self.metadata_items],
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
@property
|
|
259
|
-
def to_simple_dict(self) -> dict:
|
|
260
|
-
simple_dict = self.to_dict.copy()
|
|
261
|
-
simple_dict.pop('metadata_items')
|
|
262
|
-
return simple_dict
|
|
263
|
-
|
|
264
|
-
def overload(
|
|
265
|
-
self,
|
|
266
|
-
new_instance: 'DocumentProcessing',
|
|
267
|
-
properties: List[str] = None,
|
|
268
|
-
):
|
|
269
|
-
instance_properties = properties or [
|
|
270
|
-
'status',
|
|
271
|
-
'metadata',
|
|
272
|
-
'file_path',
|
|
273
|
-
'file_bytes',
|
|
274
|
-
'category',
|
|
275
|
-
'sub_category',
|
|
276
|
-
'uploaded_from',
|
|
277
|
-
'processed_csv_path',
|
|
278
|
-
'processed_csv_bytes',
|
|
279
|
-
'processed_xlsx_path',
|
|
280
|
-
'processed_xlsx_bytes',
|
|
281
|
-
'processed_json_path',
|
|
282
|
-
'processed_json_bytes',
|
|
283
|
-
'processed_metadata_path',
|
|
284
|
-
'processed_metadata_bytes',
|
|
285
|
-
'processing_time',
|
|
286
|
-
'processing_accuracy',
|
|
287
|
-
'issued_at',
|
|
288
|
-
'uploaded_at',
|
|
289
|
-
'enqueued_at',
|
|
290
|
-
'started_at',
|
|
291
|
-
'failed_at',
|
|
292
|
-
'failed_reason',
|
|
293
|
-
'feedback',
|
|
294
|
-
'metadata',
|
|
295
|
-
'completed_at',
|
|
296
|
-
]
|
|
297
|
-
for _property in instance_properties:
|
|
298
|
-
property_value = getattr(new_instance, _property)
|
|
299
|
-
if not hasattr(self, _property):
|
|
300
|
-
continue
|
|
301
|
-
setattr(self, _property, property_value)
|
|
302
|
-
return self
|
|
303
|
-
|
|
304
|
-
@classmethod
|
|
305
|
-
def from_dict(cls, data: dict) -> 'DocumentProcessing':
|
|
306
|
-
return cls(
|
|
307
|
-
digest=data.get('digest'),
|
|
308
|
-
status=DocumentProcessingStatus.from_value(data.get('status')),
|
|
309
|
-
file_path=data.get('file_path'),
|
|
310
|
-
category=(
|
|
311
|
-
DocumentProcessingCategory.from_value(data.get('category'))
|
|
312
|
-
if data.get('category') else None
|
|
313
|
-
),
|
|
314
|
-
sub_category=(
|
|
315
|
-
DocumentProcessingSubCategory.from_value(data.get('sub_category'))
|
|
316
|
-
if data.get('sub_category') else None
|
|
317
|
-
),
|
|
318
|
-
uploaded_from=(
|
|
319
|
-
DocumentProcessingSource.from_value(data.get('uploaded_from'))
|
|
320
|
-
if data.get('uploaded_from') else None
|
|
321
|
-
),
|
|
322
|
-
processed_csv_path=data.get('processed_csv_path'),
|
|
323
|
-
processed_xlsx_path=data.get('processed_xlsx_path'),
|
|
324
|
-
processed_json_path=data.get('processed_json_path'),
|
|
325
|
-
processed_metadata_path=data.get('processed_metadata_path'),
|
|
326
|
-
processing_time=(
|
|
327
|
-
Decimal(data.get('processing_time'))
|
|
328
|
-
if data.get('processing_time') else None
|
|
329
|
-
),
|
|
330
|
-
processing_accuracy=(
|
|
331
|
-
Decimal(data.get('processing_accuracy'))
|
|
332
|
-
if data.get('processing_accuracy') else None
|
|
333
|
-
),
|
|
334
|
-
issued_at=get_datetime_from_data(input_datetime=data.get('issued_at')),
|
|
335
|
-
uploaded_at=get_datetime_from_data(input_datetime=data.get('uploaded_at')),
|
|
336
|
-
enqueued_at=get_datetime_from_data(input_datetime=data.get('enqueued_at')),
|
|
337
|
-
started_at=get_datetime_from_data(input_datetime=data.get('started_at')),
|
|
338
|
-
failed_at=get_datetime_from_data(input_datetime=data.get('failed_at')),
|
|
339
|
-
failed_reason=data.get('failed_reason'),
|
|
340
|
-
feedback=data.get('feedback'),
|
|
341
|
-
metadata=data.get('metadata', {}),
|
|
342
|
-
completed_at=get_datetime_from_data(input_datetime=data.get('completed_at')),
|
|
343
|
-
metadata_items=[
|
|
344
|
-
DocumentProcessingMetadata.from_dict(metadata)
|
|
345
|
-
for metadata in data.get('metadata_items', [])
|
|
346
|
-
],
|
|
347
|
-
)
|
|
348
|
-
|
|
1
|
+
import json
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from datetime import datetime, tzinfo
|
|
4
|
+
from decimal import Decimal
|
|
5
|
+
from typing import Optional, List
|
|
6
|
+
|
|
7
|
+
from documente_shared.application.files import remove_slash_from_path, get_filename_from_path
|
|
8
|
+
from documente_shared.application.time_utils import get_datetime_from_data
|
|
9
|
+
from documente_shared.domain.constants import la_paz_tz
|
|
10
|
+
from documente_shared.domain.entities.document_metadata import DocumentProcessingMetadata
|
|
11
|
+
from documente_shared.domain.enums.document import (
|
|
12
|
+
DocumentProcessingStatus,
|
|
13
|
+
DocumentProcessingCategory,
|
|
14
|
+
DocumentProcessingSubCategory,
|
|
15
|
+
DocumentProcessingSource,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class DocumentProcessing(object):
|
|
21
|
+
digest: str
|
|
22
|
+
status: DocumentProcessingStatus
|
|
23
|
+
file_path: Optional[str] = None
|
|
24
|
+
file_bytes: Optional[bytes] = None
|
|
25
|
+
category: Optional[DocumentProcessingCategory] = None
|
|
26
|
+
sub_category: Optional[DocumentProcessingSubCategory] = None
|
|
27
|
+
uploaded_from: Optional[DocumentProcessingSource] = None
|
|
28
|
+
processed_csv_path: Optional[str] = None
|
|
29
|
+
processed_csv_bytes: Optional[bytes] = None
|
|
30
|
+
processed_xlsx_path: Optional[str] = None
|
|
31
|
+
processed_xlsx_bytes: Optional[bytes] = None
|
|
32
|
+
processed_json_path: Optional[str] = None
|
|
33
|
+
processed_json_bytes: Optional[bytes] = None
|
|
34
|
+
processed_metadata_path: Optional[str] = None
|
|
35
|
+
processing_time: Optional[Decimal] = None
|
|
36
|
+
processing_accuracy: Optional[Decimal] = None
|
|
37
|
+
issued_at: Optional[datetime] = None
|
|
38
|
+
uploaded_at: Optional[datetime] = None
|
|
39
|
+
enqueued_at: Optional[datetime] = None
|
|
40
|
+
started_at: Optional[datetime] = None
|
|
41
|
+
failed_at: Optional[datetime] = None
|
|
42
|
+
failed_reason: Optional[str] = None
|
|
43
|
+
feedback: Optional[list | dict] = None
|
|
44
|
+
completed_at: Optional[datetime] = None
|
|
45
|
+
metadata: Optional[dict] = None
|
|
46
|
+
metadata_items: Optional[List[DocumentProcessingMetadata]] = None
|
|
47
|
+
|
|
48
|
+
def __post_init__(self):
|
|
49
|
+
self.metadata_items = self.metadata_items or []
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def is_pending(self) -> bool:
|
|
53
|
+
return self.status == DocumentProcessingStatus.PENDING
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def is_enqueued(self) -> bool:
|
|
57
|
+
return self.status == DocumentProcessingStatus.ENQUEUED
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def is_processing(self) -> bool:
|
|
61
|
+
return self.status == DocumentProcessingStatus.PROCESSING
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def is_completed(self) -> bool:
|
|
65
|
+
return self.status == DocumentProcessingStatus.COMPLETED
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def is_incomplete(self) -> bool:
|
|
69
|
+
return self.status == DocumentProcessingStatus.INCOMPLETE
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def is_failed(self) -> bool:
|
|
73
|
+
return self.status == DocumentProcessingStatus.FAILED
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def is_inreview(self) -> bool:
|
|
77
|
+
return self.status == DocumentProcessingStatus.IN_REVIEW
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def is_valid(self) -> bool:
|
|
81
|
+
return all([
|
|
82
|
+
self.digest,
|
|
83
|
+
self.status,
|
|
84
|
+
self.file_path,
|
|
85
|
+
])
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def is_finished(self) -> bool:
|
|
89
|
+
return self.status in [
|
|
90
|
+
DocumentProcessingStatus.COMPLETED,
|
|
91
|
+
DocumentProcessingStatus.FAILED,
|
|
92
|
+
]
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def file_key(self) -> str:
|
|
96
|
+
return remove_slash_from_path(self.file_path)
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
def processed_csv_key(self) -> str:
|
|
100
|
+
return remove_slash_from_path(self.processed_csv_path)
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def processed_xlsx_key(self) -> str:
|
|
104
|
+
return remove_slash_from_path(self.processed_xlsx_path)
|
|
105
|
+
|
|
106
|
+
@property
|
|
107
|
+
def processed_json_key(self) -> str:
|
|
108
|
+
return remove_slash_from_path(self.processed_json_path)
|
|
109
|
+
|
|
110
|
+
@property
|
|
111
|
+
def processed_csv_filename(self) -> str:
|
|
112
|
+
return get_filename_from_path(self.processed_csv_path)
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def processed_xlsx_filename(self) -> str:
|
|
116
|
+
return get_filename_from_path(self.processed_xlsx_path)
|
|
117
|
+
|
|
118
|
+
@property
|
|
119
|
+
def processed_json_filename(self) -> str:
|
|
120
|
+
return get_filename_from_path(self.processed_json_path)
|
|
121
|
+
|
|
122
|
+
@property
|
|
123
|
+
def processed_metadata_key(self) -> str:
|
|
124
|
+
return remove_slash_from_path(self.processed_metadata_path)
|
|
125
|
+
|
|
126
|
+
@property
|
|
127
|
+
def extended_filename(self) -> str:
|
|
128
|
+
return self.file_path.split('/')[-1]
|
|
129
|
+
|
|
130
|
+
@property
|
|
131
|
+
def filename(self) -> str:
|
|
132
|
+
filename_with_extension = self.extended_filename
|
|
133
|
+
return filename_with_extension.split('.')[0]
|
|
134
|
+
|
|
135
|
+
@property
|
|
136
|
+
def metadata_items_bytes(self) -> bytes:
|
|
137
|
+
metadata_items = [
|
|
138
|
+
metadata_item.to_dict
|
|
139
|
+
for metadata_item in self.metadata_items
|
|
140
|
+
]
|
|
141
|
+
return json.dumps(metadata_items).encode('utf-8')
|
|
142
|
+
|
|
143
|
+
@property
|
|
144
|
+
def has_original_file(self) -> bool:
|
|
145
|
+
return bool(self.file_path) and self.file_bytes
|
|
146
|
+
|
|
147
|
+
@property
|
|
148
|
+
def has_processed_csv(self) -> bool:
|
|
149
|
+
return bool(self.processed_csv_path) and self.processed_csv_bytes
|
|
150
|
+
|
|
151
|
+
@property
|
|
152
|
+
def has_processed_xlsx(self) -> bool:
|
|
153
|
+
return bool(self.processed_xlsx_path) and self.processed_xlsx_bytes
|
|
154
|
+
|
|
155
|
+
@property
|
|
156
|
+
def has_processed_json(self) -> bool:
|
|
157
|
+
return bool(self.processed_json_path) and self.processed_json_bytes
|
|
158
|
+
|
|
159
|
+
@property
|
|
160
|
+
def has_processed_metadata(self) -> bool:
|
|
161
|
+
return bool(self.processed_metadata_path) and self.metadata_items
|
|
162
|
+
|
|
163
|
+
def pending(self, timezone: tzinfo = la_paz_tz):
|
|
164
|
+
self.status = DocumentProcessingStatus.PENDING
|
|
165
|
+
self.started_at = None
|
|
166
|
+
self.uploaded_at = datetime.now(tz=timezone)
|
|
167
|
+
|
|
168
|
+
def enqueue(self, timezone: tzinfo = la_paz_tz):
|
|
169
|
+
self.status = DocumentProcessingStatus.ENQUEUED
|
|
170
|
+
self.enqueued_at = datetime.now(tz=timezone)
|
|
171
|
+
|
|
172
|
+
def processing(self, timezone: tzinfo = la_paz_tz):
|
|
173
|
+
self.status = DocumentProcessingStatus.PROCESSING
|
|
174
|
+
self.started_at = datetime.now(tz=timezone)
|
|
175
|
+
|
|
176
|
+
def failed(
|
|
177
|
+
self,
|
|
178
|
+
error_message: Optional[str] = None,
|
|
179
|
+
timezone: tzinfo = la_paz_tz,
|
|
180
|
+
):
|
|
181
|
+
self.failed_reason = error_message
|
|
182
|
+
self.status = DocumentProcessingStatus.FAILED
|
|
183
|
+
self.failed_at = datetime.now(tz=timezone)
|
|
184
|
+
|
|
185
|
+
def completed(self, timezone: tzinfo = la_paz_tz):
|
|
186
|
+
self.status = DocumentProcessingStatus.COMPLETED
|
|
187
|
+
self.completed_at = datetime.now(tz=timezone)
|
|
188
|
+
self.failed_reason = None
|
|
189
|
+
|
|
190
|
+
def incomplete(self, timezone: tzinfo = la_paz_tz):
|
|
191
|
+
self.status = DocumentProcessingStatus.INCOMPLETE
|
|
192
|
+
self.completed_at = datetime.now(tz=timezone)
|
|
193
|
+
|
|
194
|
+
def deleted(self):
|
|
195
|
+
self.status = DocumentProcessingStatus.DELETED
|
|
196
|
+
|
|
197
|
+
def in_review(self):
|
|
198
|
+
self.status = DocumentProcessingStatus.IN_REVIEW
|
|
199
|
+
|
|
200
|
+
def __eq__(self, other: 'DocumentProcessing') -> bool:
|
|
201
|
+
if not other:
|
|
202
|
+
return False
|
|
203
|
+
|
|
204
|
+
return (
|
|
205
|
+
self.digest == other.digest
|
|
206
|
+
and self.status == other.status
|
|
207
|
+
and self.file_path == other.file_path
|
|
208
|
+
and self.issued_at == other.issued_at
|
|
209
|
+
and self.uploaded_at == other.uploaded_at
|
|
210
|
+
and self.enqueued_at == other.enqueued_at
|
|
211
|
+
and self.started_at == other.started_at
|
|
212
|
+
and self.failed_at == other.failed_at
|
|
213
|
+
and self.completed_at == other.completed_at
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
@property
|
|
217
|
+
def to_dict(self) -> dict:
|
|
218
|
+
return {
|
|
219
|
+
'digest': self.digest,
|
|
220
|
+
'status': str(self.status),
|
|
221
|
+
'file_path': self.file_path,
|
|
222
|
+
'category': (
|
|
223
|
+
str(self.category)
|
|
224
|
+
if self.category else None
|
|
225
|
+
),
|
|
226
|
+
'sub_category': (
|
|
227
|
+
str(self.sub_category)
|
|
228
|
+
if self.sub_category else None
|
|
229
|
+
),
|
|
230
|
+
'uploaded_from': (
|
|
231
|
+
str(self.uploaded_from)
|
|
232
|
+
if self.uploaded_from else None
|
|
233
|
+
),
|
|
234
|
+
'processed_csv_path': self.processed_csv_path,
|
|
235
|
+
'processed_xlsx_path': self.processed_xlsx_path,
|
|
236
|
+
'processed_json_path': self.processed_json_path,
|
|
237
|
+
'processed_metadata_path': self.processed_metadata_path,
|
|
238
|
+
'processing_time': (
|
|
239
|
+
str(self.processing_time.quantize(Decimal('0.00001')))
|
|
240
|
+
if self.processing_time else None
|
|
241
|
+
),
|
|
242
|
+
'processing_accuracy': (
|
|
243
|
+
str(self.processing_accuracy.quantize(Decimal('0.00001')))
|
|
244
|
+
if self.processing_accuracy else None
|
|
245
|
+
),
|
|
246
|
+
'issued_at': self.issued_at.isoformat() if self.issued_at else None,
|
|
247
|
+
'uploaded_at': self.uploaded_at.isoformat() if self.uploaded_at else None,
|
|
248
|
+
'enqueued_at': self.enqueued_at.isoformat() if self.enqueued_at else None,
|
|
249
|
+
'started_at': self.started_at.isoformat() if self.started_at else None,
|
|
250
|
+
'failed_at': self.failed_at.isoformat() if self.failed_at else None,
|
|
251
|
+
'failed_reason': self.failed_reason,
|
|
252
|
+
'feedback': self.feedback,
|
|
253
|
+
'metadata': self.metadata,
|
|
254
|
+
'completed_at': self.completed_at.isoformat() if self.completed_at else None,
|
|
255
|
+
'metadata_items': [metadata.to_dict for metadata in self.metadata_items],
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
@property
|
|
259
|
+
def to_simple_dict(self) -> dict:
|
|
260
|
+
simple_dict = self.to_dict.copy()
|
|
261
|
+
simple_dict.pop('metadata_items')
|
|
262
|
+
return simple_dict
|
|
263
|
+
|
|
264
|
+
def overload(
|
|
265
|
+
self,
|
|
266
|
+
new_instance: 'DocumentProcessing',
|
|
267
|
+
properties: List[str] = None,
|
|
268
|
+
):
|
|
269
|
+
instance_properties = properties or [
|
|
270
|
+
'status',
|
|
271
|
+
'metadata',
|
|
272
|
+
'file_path',
|
|
273
|
+
'file_bytes',
|
|
274
|
+
'category',
|
|
275
|
+
'sub_category',
|
|
276
|
+
'uploaded_from',
|
|
277
|
+
'processed_csv_path',
|
|
278
|
+
'processed_csv_bytes',
|
|
279
|
+
'processed_xlsx_path',
|
|
280
|
+
'processed_xlsx_bytes',
|
|
281
|
+
'processed_json_path',
|
|
282
|
+
'processed_json_bytes',
|
|
283
|
+
'processed_metadata_path',
|
|
284
|
+
'processed_metadata_bytes',
|
|
285
|
+
'processing_time',
|
|
286
|
+
'processing_accuracy',
|
|
287
|
+
'issued_at',
|
|
288
|
+
'uploaded_at',
|
|
289
|
+
'enqueued_at',
|
|
290
|
+
'started_at',
|
|
291
|
+
'failed_at',
|
|
292
|
+
'failed_reason',
|
|
293
|
+
'feedback',
|
|
294
|
+
'metadata',
|
|
295
|
+
'completed_at',
|
|
296
|
+
]
|
|
297
|
+
for _property in instance_properties:
|
|
298
|
+
property_value = getattr(new_instance, _property)
|
|
299
|
+
if not hasattr(self, _property):
|
|
300
|
+
continue
|
|
301
|
+
setattr(self, _property, property_value)
|
|
302
|
+
return self
|
|
303
|
+
|
|
304
|
+
@classmethod
|
|
305
|
+
def from_dict(cls, data: dict) -> 'DocumentProcessing':
|
|
306
|
+
return cls(
|
|
307
|
+
digest=data.get('digest'),
|
|
308
|
+
status=DocumentProcessingStatus.from_value(data.get('status')),
|
|
309
|
+
file_path=data.get('file_path'),
|
|
310
|
+
category=(
|
|
311
|
+
DocumentProcessingCategory.from_value(data.get('category'))
|
|
312
|
+
if data.get('category') else None
|
|
313
|
+
),
|
|
314
|
+
sub_category=(
|
|
315
|
+
DocumentProcessingSubCategory.from_value(data.get('sub_category'))
|
|
316
|
+
if data.get('sub_category') else None
|
|
317
|
+
),
|
|
318
|
+
uploaded_from=(
|
|
319
|
+
DocumentProcessingSource.from_value(data.get('uploaded_from'))
|
|
320
|
+
if data.get('uploaded_from') else None
|
|
321
|
+
),
|
|
322
|
+
processed_csv_path=data.get('processed_csv_path'),
|
|
323
|
+
processed_xlsx_path=data.get('processed_xlsx_path'),
|
|
324
|
+
processed_json_path=data.get('processed_json_path'),
|
|
325
|
+
processed_metadata_path=data.get('processed_metadata_path'),
|
|
326
|
+
processing_time=(
|
|
327
|
+
Decimal(data.get('processing_time'))
|
|
328
|
+
if data.get('processing_time') else None
|
|
329
|
+
),
|
|
330
|
+
processing_accuracy=(
|
|
331
|
+
Decimal(data.get('processing_accuracy'))
|
|
332
|
+
if data.get('processing_accuracy') else None
|
|
333
|
+
),
|
|
334
|
+
issued_at=get_datetime_from_data(input_datetime=data.get('issued_at')),
|
|
335
|
+
uploaded_at=get_datetime_from_data(input_datetime=data.get('uploaded_at')),
|
|
336
|
+
enqueued_at=get_datetime_from_data(input_datetime=data.get('enqueued_at')),
|
|
337
|
+
started_at=get_datetime_from_data(input_datetime=data.get('started_at')),
|
|
338
|
+
failed_at=get_datetime_from_data(input_datetime=data.get('failed_at')),
|
|
339
|
+
failed_reason=data.get('failed_reason'),
|
|
340
|
+
feedback=data.get('feedback'),
|
|
341
|
+
metadata=data.get('metadata', {}),
|
|
342
|
+
completed_at=get_datetime_from_data(input_datetime=data.get('completed_at')),
|
|
343
|
+
metadata_items=[
|
|
344
|
+
DocumentProcessingMetadata.from_dict(metadata)
|
|
345
|
+
for metadata in data.get('metadata_items', [])
|
|
346
|
+
],
|
|
347
|
+
)
|
|
348
|
+
|