documente_shared 0.1.72__py3-none-any.whl → 0.1.72b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of documente_shared might be problematic. Click here for more details.

Files changed (41) hide show
  1. documente_shared/__init__.py +0 -0
  2. documente_shared/application/__init__.py +0 -0
  3. documente_shared/application/digest.py +7 -7
  4. documente_shared/application/exceptions.py +23 -23
  5. documente_shared/application/files.py +22 -22
  6. documente_shared/application/time_utils.py +13 -13
  7. documente_shared/application/timezone.py +7 -7
  8. documente_shared/domain/__init__.py +0 -0
  9. documente_shared/domain/base_enum.py +53 -53
  10. documente_shared/domain/constants.py +2 -2
  11. documente_shared/domain/entities/__init__.py +0 -0
  12. documente_shared/domain/entities/document.py +348 -348
  13. documente_shared/domain/entities/document_metadata.py +63 -63
  14. documente_shared/domain/entities/in_memory_result.py +51 -51
  15. documente_shared/domain/entities/processing_case.py +144 -144
  16. documente_shared/domain/entities/processing_case_item.py +216 -216
  17. documente_shared/domain/entities/processing_event.py +49 -49
  18. documente_shared/domain/enums/__init__.py +0 -0
  19. documente_shared/domain/enums/common.py +95 -95
  20. documente_shared/domain/enums/document.py +71 -71
  21. documente_shared/domain/enums/processing_case.py +54 -54
  22. documente_shared/domain/repositories/__init__.py +0 -0
  23. documente_shared/domain/repositories/document.py +24 -24
  24. documente_shared/domain/repositories/processing_case.py +24 -24
  25. documente_shared/domain/repositories/processing_case_item.py +29 -29
  26. documente_shared/infrastructure/__init__.py +0 -0
  27. documente_shared/infrastructure/documente_client.py +21 -0
  28. documente_shared/infrastructure/dynamo_table.py +75 -75
  29. documente_shared/infrastructure/repositories/__init__.py +0 -0
  30. documente_shared/infrastructure/repositories/dynamo_document.py +43 -43
  31. documente_shared/infrastructure/repositories/dynamo_processing_case.py +43 -43
  32. documente_shared/infrastructure/repositories/dynamo_processing_case_item.py +53 -53
  33. documente_shared/infrastructure/repositories/http_document_processing.py +41 -0
  34. documente_shared/infrastructure/repositories/http_processing_case.py +41 -0
  35. documente_shared/infrastructure/repositories/http_processing_case_item.py +53 -0
  36. documente_shared/infrastructure/s3_bucket.py +57 -57
  37. documente_shared/infrastructure/sqs_queue.py +47 -47
  38. {documente_shared-0.1.72.dist-info → documente_shared-0.1.72b0.dist-info}/METADATA +2 -1
  39. documente_shared-0.1.72b0.dist-info/RECORD +40 -0
  40. documente_shared-0.1.72.dist-info/RECORD +0 -36
  41. {documente_shared-0.1.72.dist-info → documente_shared-0.1.72b0.dist-info}/WHEEL +0 -0
@@ -1,348 +1,348 @@
1
- import json
2
- from dataclasses import dataclass
3
- from datetime import datetime, tzinfo
4
- from decimal import Decimal
5
- from typing import Optional, List
6
-
7
- from documente_shared.application.files import remove_slash_from_path, get_filename_from_path
8
- from documente_shared.application.time_utils import get_datetime_from_data
9
- from documente_shared.domain.constants import la_paz_tz
10
- from documente_shared.domain.entities.document_metadata import DocumentProcessingMetadata
11
- from documente_shared.domain.enums.document import (
12
- DocumentProcessingStatus,
13
- DocumentProcessingCategory,
14
- DocumentProcessingSubCategory,
15
- DocumentProcessingSource,
16
- )
17
-
18
-
19
- @dataclass
20
- class DocumentProcessing(object):
21
- digest: str
22
- status: DocumentProcessingStatus
23
- file_path: Optional[str] = None
24
- file_bytes: Optional[bytes] = None
25
- category: Optional[DocumentProcessingCategory] = None
26
- sub_category: Optional[DocumentProcessingSubCategory] = None
27
- uploaded_from: Optional[DocumentProcessingSource] = None
28
- processed_csv_path: Optional[str] = None
29
- processed_csv_bytes: Optional[bytes] = None
30
- processed_xlsx_path: Optional[str] = None
31
- processed_xlsx_bytes: Optional[bytes] = None
32
- processed_json_path: Optional[str] = None
33
- processed_json_bytes: Optional[bytes] = None
34
- processed_metadata_path: Optional[str] = None
35
- processing_time: Optional[Decimal] = None
36
- processing_accuracy: Optional[Decimal] = None
37
- issued_at: Optional[datetime] = None
38
- uploaded_at: Optional[datetime] = None
39
- enqueued_at: Optional[datetime] = None
40
- started_at: Optional[datetime] = None
41
- failed_at: Optional[datetime] = None
42
- failed_reason: Optional[str] = None
43
- feedback: Optional[list | dict] = None
44
- completed_at: Optional[datetime] = None
45
- metadata: Optional[dict] = None
46
- metadata_items: Optional[List[DocumentProcessingMetadata]] = None
47
-
48
- def __post_init__(self):
49
- self.metadata_items = self.metadata_items or []
50
-
51
- @property
52
- def is_pending(self) -> bool:
53
- return self.status == DocumentProcessingStatus.PENDING
54
-
55
- @property
56
- def is_enqueued(self) -> bool:
57
- return self.status == DocumentProcessingStatus.ENQUEUED
58
-
59
- @property
60
- def is_processing(self) -> bool:
61
- return self.status == DocumentProcessingStatus.PROCESSING
62
-
63
- @property
64
- def is_completed(self) -> bool:
65
- return self.status == DocumentProcessingStatus.COMPLETED
66
-
67
- @property
68
- def is_incomplete(self) -> bool:
69
- return self.status == DocumentProcessingStatus.INCOMPLETE
70
-
71
- @property
72
- def is_failed(self) -> bool:
73
- return self.status == DocumentProcessingStatus.FAILED
74
-
75
- @property
76
- def is_inreview(self) -> bool:
77
- return self.status == DocumentProcessingStatus.IN_REVIEW
78
-
79
- @property
80
- def is_valid(self) -> bool:
81
- return all([
82
- self.digest,
83
- self.status,
84
- self.file_path,
85
- ])
86
-
87
- @property
88
- def is_finished(self) -> bool:
89
- return self.status in [
90
- DocumentProcessingStatus.COMPLETED,
91
- DocumentProcessingStatus.FAILED,
92
- ]
93
-
94
- @property
95
- def file_key(self) -> str:
96
- return remove_slash_from_path(self.file_path)
97
-
98
- @property
99
- def processed_csv_key(self) -> str:
100
- return remove_slash_from_path(self.processed_csv_path)
101
-
102
- @property
103
- def processed_xlsx_key(self) -> str:
104
- return remove_slash_from_path(self.processed_xlsx_path)
105
-
106
- @property
107
- def processed_json_key(self) -> str:
108
- return remove_slash_from_path(self.processed_json_path)
109
-
110
- @property
111
- def processed_csv_filename(self) -> str:
112
- return get_filename_from_path(self.processed_csv_path)
113
-
114
- @property
115
- def processed_xlsx_filename(self) -> str:
116
- return get_filename_from_path(self.processed_xlsx_path)
117
-
118
- @property
119
- def processed_json_filename(self) -> str:
120
- return get_filename_from_path(self.processed_json_path)
121
-
122
- @property
123
- def processed_metadata_key(self) -> str:
124
- return remove_slash_from_path(self.processed_metadata_path)
125
-
126
- @property
127
- def extended_filename(self) -> str:
128
- return self.file_path.split('/')[-1]
129
-
130
- @property
131
- def filename(self) -> str:
132
- filename_with_extension = self.extended_filename
133
- return filename_with_extension.split('.')[0]
134
-
135
- @property
136
- def metadata_items_bytes(self) -> bytes:
137
- metadata_items = [
138
- metadata_item.to_dict
139
- for metadata_item in self.metadata_items
140
- ]
141
- return json.dumps(metadata_items).encode('utf-8')
142
-
143
- @property
144
- def has_original_file(self) -> bool:
145
- return bool(self.file_path) and self.file_bytes
146
-
147
- @property
148
- def has_processed_csv(self) -> bool:
149
- return bool(self.processed_csv_path) and self.processed_csv_bytes
150
-
151
- @property
152
- def has_processed_xlsx(self) -> bool:
153
- return bool(self.processed_xlsx_path) and self.processed_xlsx_bytes
154
-
155
- @property
156
- def has_processed_json(self) -> bool:
157
- return bool(self.processed_json_path) and self.processed_json_bytes
158
-
159
- @property
160
- def has_processed_metadata(self) -> bool:
161
- return bool(self.processed_metadata_path) and self.metadata_items
162
-
163
- def pending(self, timezone: tzinfo = la_paz_tz):
164
- self.status = DocumentProcessingStatus.PENDING
165
- self.started_at = None
166
- self.uploaded_at = datetime.now(tz=timezone)
167
-
168
- def enqueue(self, timezone: tzinfo = la_paz_tz):
169
- self.status = DocumentProcessingStatus.ENQUEUED
170
- self.enqueued_at = datetime.now(tz=timezone)
171
-
172
- def processing(self, timezone: tzinfo = la_paz_tz):
173
- self.status = DocumentProcessingStatus.PROCESSING
174
- self.started_at = datetime.now(tz=timezone)
175
-
176
- def failed(
177
- self,
178
- error_message: Optional[str] = None,
179
- timezone: tzinfo = la_paz_tz,
180
- ):
181
- self.failed_reason = error_message
182
- self.status = DocumentProcessingStatus.FAILED
183
- self.failed_at = datetime.now(tz=timezone)
184
-
185
- def completed(self, timezone: tzinfo = la_paz_tz):
186
- self.status = DocumentProcessingStatus.COMPLETED
187
- self.completed_at = datetime.now(tz=timezone)
188
- self.failed_reason = None
189
-
190
- def incomplete(self, timezone: tzinfo = la_paz_tz):
191
- self.status = DocumentProcessingStatus.INCOMPLETE
192
- self.completed_at = datetime.now(tz=timezone)
193
-
194
- def deleted(self):
195
- self.status = DocumentProcessingStatus.DELETED
196
-
197
- def in_review(self):
198
- self.status = DocumentProcessingStatus.IN_REVIEW
199
-
200
- def __eq__(self, other: 'DocumentProcessing') -> bool:
201
- if not other:
202
- return False
203
-
204
- return (
205
- self.digest == other.digest
206
- and self.status == other.status
207
- and self.file_path == other.file_path
208
- and self.issued_at == other.issued_at
209
- and self.uploaded_at == other.uploaded_at
210
- and self.enqueued_at == other.enqueued_at
211
- and self.started_at == other.started_at
212
- and self.failed_at == other.failed_at
213
- and self.completed_at == other.completed_at
214
- )
215
-
216
- @property
217
- def to_dict(self) -> dict:
218
- return {
219
- 'digest': self.digest,
220
- 'status': str(self.status),
221
- 'file_path': self.file_path,
222
- 'category': (
223
- str(self.category)
224
- if self.category else None
225
- ),
226
- 'sub_category': (
227
- str(self.sub_category)
228
- if self.sub_category else None
229
- ),
230
- 'uploaded_from': (
231
- str(self.uploaded_from)
232
- if self.uploaded_from else None
233
- ),
234
- 'processed_csv_path': self.processed_csv_path,
235
- 'processed_xlsx_path': self.processed_xlsx_path,
236
- 'processed_json_path': self.processed_json_path,
237
- 'processed_metadata_path': self.processed_metadata_path,
238
- 'processing_time': (
239
- str(self.processing_time.quantize(Decimal('0.00001')))
240
- if self.processing_time else None
241
- ),
242
- 'processing_accuracy': (
243
- str(self.processing_accuracy.quantize(Decimal('0.00001')))
244
- if self.processing_accuracy else None
245
- ),
246
- 'issued_at': self.issued_at.isoformat() if self.issued_at else None,
247
- 'uploaded_at': self.uploaded_at.isoformat() if self.uploaded_at else None,
248
- 'enqueued_at': self.enqueued_at.isoformat() if self.enqueued_at else None,
249
- 'started_at': self.started_at.isoformat() if self.started_at else None,
250
- 'failed_at': self.failed_at.isoformat() if self.failed_at else None,
251
- 'failed_reason': self.failed_reason,
252
- 'feedback': self.feedback,
253
- 'metadata': self.metadata,
254
- 'completed_at': self.completed_at.isoformat() if self.completed_at else None,
255
- 'metadata_items': [metadata.to_dict for metadata in self.metadata_items],
256
- }
257
-
258
- @property
259
- def to_simple_dict(self) -> dict:
260
- simple_dict = self.to_dict.copy()
261
- simple_dict.pop('metadata_items')
262
- return simple_dict
263
-
264
- def overload(
265
- self,
266
- new_instance: 'DocumentProcessing',
267
- properties: List[str] = None,
268
- ):
269
- instance_properties = properties or [
270
- 'status',
271
- 'metadata',
272
- 'file_path',
273
- 'file_bytes',
274
- 'category',
275
- 'sub_category',
276
- 'uploaded_from',
277
- 'processed_csv_path',
278
- 'processed_csv_bytes',
279
- 'processed_xlsx_path',
280
- 'processed_xlsx_bytes',
281
- 'processed_json_path',
282
- 'processed_json_bytes',
283
- 'processed_metadata_path',
284
- 'processed_metadata_bytes',
285
- 'processing_time',
286
- 'processing_accuracy',
287
- 'issued_at',
288
- 'uploaded_at',
289
- 'enqueued_at',
290
- 'started_at',
291
- 'failed_at',
292
- 'failed_reason',
293
- 'feedback',
294
- 'metadata',
295
- 'completed_at',
296
- ]
297
- for _property in instance_properties:
298
- property_value = getattr(new_instance, _property)
299
- if not hasattr(self, _property):
300
- continue
301
- setattr(self, _property, property_value)
302
- return self
303
-
304
- @classmethod
305
- def from_dict(cls, data: dict) -> 'DocumentProcessing':
306
- return cls(
307
- digest=data.get('digest'),
308
- status=DocumentProcessingStatus.from_value(data.get('status')),
309
- file_path=data.get('file_path'),
310
- category=(
311
- DocumentProcessingCategory.from_value(data.get('category'))
312
- if data.get('category') else None
313
- ),
314
- sub_category=(
315
- DocumentProcessingSubCategory.from_value(data.get('sub_category'))
316
- if data.get('sub_category') else None
317
- ),
318
- uploaded_from=(
319
- DocumentProcessingSource.from_value(data.get('uploaded_from'))
320
- if data.get('uploaded_from') else None
321
- ),
322
- processed_csv_path=data.get('processed_csv_path'),
323
- processed_xlsx_path=data.get('processed_xlsx_path'),
324
- processed_json_path=data.get('processed_json_path'),
325
- processed_metadata_path=data.get('processed_metadata_path'),
326
- processing_time=(
327
- Decimal(data.get('processing_time'))
328
- if data.get('processing_time') else None
329
- ),
330
- processing_accuracy=(
331
- Decimal(data.get('processing_accuracy'))
332
- if data.get('processing_accuracy') else None
333
- ),
334
- issued_at=get_datetime_from_data(input_datetime=data.get('issued_at')),
335
- uploaded_at=get_datetime_from_data(input_datetime=data.get('uploaded_at')),
336
- enqueued_at=get_datetime_from_data(input_datetime=data.get('enqueued_at')),
337
- started_at=get_datetime_from_data(input_datetime=data.get('started_at')),
338
- failed_at=get_datetime_from_data(input_datetime=data.get('failed_at')),
339
- failed_reason=data.get('failed_reason'),
340
- feedback=data.get('feedback'),
341
- metadata=data.get('metadata', {}),
342
- completed_at=get_datetime_from_data(input_datetime=data.get('completed_at')),
343
- metadata_items=[
344
- DocumentProcessingMetadata.from_dict(metadata)
345
- for metadata in data.get('metadata_items', [])
346
- ],
347
- )
348
-
1
+ import json
2
+ from dataclasses import dataclass
3
+ from datetime import datetime, tzinfo
4
+ from decimal import Decimal
5
+ from typing import Optional, List
6
+
7
+ from documente_shared.application.files import remove_slash_from_path, get_filename_from_path
8
+ from documente_shared.application.time_utils import get_datetime_from_data
9
+ from documente_shared.domain.constants import la_paz_tz
10
+ from documente_shared.domain.entities.document_metadata import DocumentProcessingMetadata
11
+ from documente_shared.domain.enums.document import (
12
+ DocumentProcessingStatus,
13
+ DocumentProcessingCategory,
14
+ DocumentProcessingSubCategory,
15
+ DocumentProcessingSource,
16
+ )
17
+
18
+
19
+ @dataclass
20
+ class DocumentProcessing(object):
21
+ digest: str
22
+ status: DocumentProcessingStatus
23
+ file_path: Optional[str] = None
24
+ file_bytes: Optional[bytes] = None
25
+ category: Optional[DocumentProcessingCategory] = None
26
+ sub_category: Optional[DocumentProcessingSubCategory] = None
27
+ uploaded_from: Optional[DocumentProcessingSource] = None
28
+ processed_csv_path: Optional[str] = None
29
+ processed_csv_bytes: Optional[bytes] = None
30
+ processed_xlsx_path: Optional[str] = None
31
+ processed_xlsx_bytes: Optional[bytes] = None
32
+ processed_json_path: Optional[str] = None
33
+ processed_json_bytes: Optional[bytes] = None
34
+ processed_metadata_path: Optional[str] = None
35
+ processing_time: Optional[Decimal] = None
36
+ processing_accuracy: Optional[Decimal] = None
37
+ issued_at: Optional[datetime] = None
38
+ uploaded_at: Optional[datetime] = None
39
+ enqueued_at: Optional[datetime] = None
40
+ started_at: Optional[datetime] = None
41
+ failed_at: Optional[datetime] = None
42
+ failed_reason: Optional[str] = None
43
+ feedback: Optional[list | dict] = None
44
+ completed_at: Optional[datetime] = None
45
+ metadata: Optional[dict] = None
46
+ metadata_items: Optional[List[DocumentProcessingMetadata]] = None
47
+
48
+ def __post_init__(self):
49
+ self.metadata_items = self.metadata_items or []
50
+
51
+ @property
52
+ def is_pending(self) -> bool:
53
+ return self.status == DocumentProcessingStatus.PENDING
54
+
55
+ @property
56
+ def is_enqueued(self) -> bool:
57
+ return self.status == DocumentProcessingStatus.ENQUEUED
58
+
59
+ @property
60
+ def is_processing(self) -> bool:
61
+ return self.status == DocumentProcessingStatus.PROCESSING
62
+
63
+ @property
64
+ def is_completed(self) -> bool:
65
+ return self.status == DocumentProcessingStatus.COMPLETED
66
+
67
+ @property
68
+ def is_incomplete(self) -> bool:
69
+ return self.status == DocumentProcessingStatus.INCOMPLETE
70
+
71
+ @property
72
+ def is_failed(self) -> bool:
73
+ return self.status == DocumentProcessingStatus.FAILED
74
+
75
+ @property
76
+ def is_inreview(self) -> bool:
77
+ return self.status == DocumentProcessingStatus.IN_REVIEW
78
+
79
+ @property
80
+ def is_valid(self) -> bool:
81
+ return all([
82
+ self.digest,
83
+ self.status,
84
+ self.file_path,
85
+ ])
86
+
87
+ @property
88
+ def is_finished(self) -> bool:
89
+ return self.status in [
90
+ DocumentProcessingStatus.COMPLETED,
91
+ DocumentProcessingStatus.FAILED,
92
+ ]
93
+
94
+ @property
95
+ def file_key(self) -> str:
96
+ return remove_slash_from_path(self.file_path)
97
+
98
+ @property
99
+ def processed_csv_key(self) -> str:
100
+ return remove_slash_from_path(self.processed_csv_path)
101
+
102
+ @property
103
+ def processed_xlsx_key(self) -> str:
104
+ return remove_slash_from_path(self.processed_xlsx_path)
105
+
106
+ @property
107
+ def processed_json_key(self) -> str:
108
+ return remove_slash_from_path(self.processed_json_path)
109
+
110
+ @property
111
+ def processed_csv_filename(self) -> str:
112
+ return get_filename_from_path(self.processed_csv_path)
113
+
114
+ @property
115
+ def processed_xlsx_filename(self) -> str:
116
+ return get_filename_from_path(self.processed_xlsx_path)
117
+
118
+ @property
119
+ def processed_json_filename(self) -> str:
120
+ return get_filename_from_path(self.processed_json_path)
121
+
122
+ @property
123
+ def processed_metadata_key(self) -> str:
124
+ return remove_slash_from_path(self.processed_metadata_path)
125
+
126
+ @property
127
+ def extended_filename(self) -> str:
128
+ return self.file_path.split('/')[-1]
129
+
130
+ @property
131
+ def filename(self) -> str:
132
+ filename_with_extension = self.extended_filename
133
+ return filename_with_extension.split('.')[0]
134
+
135
+ @property
136
+ def metadata_items_bytes(self) -> bytes:
137
+ metadata_items = [
138
+ metadata_item.to_dict
139
+ for metadata_item in self.metadata_items
140
+ ]
141
+ return json.dumps(metadata_items).encode('utf-8')
142
+
143
+ @property
144
+ def has_original_file(self) -> bool:
145
+ return bool(self.file_path) and self.file_bytes
146
+
147
+ @property
148
+ def has_processed_csv(self) -> bool:
149
+ return bool(self.processed_csv_path) and self.processed_csv_bytes
150
+
151
+ @property
152
+ def has_processed_xlsx(self) -> bool:
153
+ return bool(self.processed_xlsx_path) and self.processed_xlsx_bytes
154
+
155
+ @property
156
+ def has_processed_json(self) -> bool:
157
+ return bool(self.processed_json_path) and self.processed_json_bytes
158
+
159
+ @property
160
+ def has_processed_metadata(self) -> bool:
161
+ return bool(self.processed_metadata_path) and self.metadata_items
162
+
163
+ def pending(self, timezone: tzinfo = la_paz_tz):
164
+ self.status = DocumentProcessingStatus.PENDING
165
+ self.started_at = None
166
+ self.uploaded_at = datetime.now(tz=timezone)
167
+
168
+ def enqueue(self, timezone: tzinfo = la_paz_tz):
169
+ self.status = DocumentProcessingStatus.ENQUEUED
170
+ self.enqueued_at = datetime.now(tz=timezone)
171
+
172
+ def processing(self, timezone: tzinfo = la_paz_tz):
173
+ self.status = DocumentProcessingStatus.PROCESSING
174
+ self.started_at = datetime.now(tz=timezone)
175
+
176
+ def failed(
177
+ self,
178
+ error_message: Optional[str] = None,
179
+ timezone: tzinfo = la_paz_tz,
180
+ ):
181
+ self.failed_reason = error_message
182
+ self.status = DocumentProcessingStatus.FAILED
183
+ self.failed_at = datetime.now(tz=timezone)
184
+
185
+ def completed(self, timezone: tzinfo = la_paz_tz):
186
+ self.status = DocumentProcessingStatus.COMPLETED
187
+ self.completed_at = datetime.now(tz=timezone)
188
+ self.failed_reason = None
189
+
190
+ def incomplete(self, timezone: tzinfo = la_paz_tz):
191
+ self.status = DocumentProcessingStatus.INCOMPLETE
192
+ self.completed_at = datetime.now(tz=timezone)
193
+
194
+ def deleted(self):
195
+ self.status = DocumentProcessingStatus.DELETED
196
+
197
+ def in_review(self):
198
+ self.status = DocumentProcessingStatus.IN_REVIEW
199
+
200
+ def __eq__(self, other: 'DocumentProcessing') -> bool:
201
+ if not other:
202
+ return False
203
+
204
+ return (
205
+ self.digest == other.digest
206
+ and self.status == other.status
207
+ and self.file_path == other.file_path
208
+ and self.issued_at == other.issued_at
209
+ and self.uploaded_at == other.uploaded_at
210
+ and self.enqueued_at == other.enqueued_at
211
+ and self.started_at == other.started_at
212
+ and self.failed_at == other.failed_at
213
+ and self.completed_at == other.completed_at
214
+ )
215
+
216
+ @property
217
+ def to_dict(self) -> dict:
218
+ return {
219
+ 'digest': self.digest,
220
+ 'status': str(self.status),
221
+ 'file_path': self.file_path,
222
+ 'category': (
223
+ str(self.category)
224
+ if self.category else None
225
+ ),
226
+ 'sub_category': (
227
+ str(self.sub_category)
228
+ if self.sub_category else None
229
+ ),
230
+ 'uploaded_from': (
231
+ str(self.uploaded_from)
232
+ if self.uploaded_from else None
233
+ ),
234
+ 'processed_csv_path': self.processed_csv_path,
235
+ 'processed_xlsx_path': self.processed_xlsx_path,
236
+ 'processed_json_path': self.processed_json_path,
237
+ 'processed_metadata_path': self.processed_metadata_path,
238
+ 'processing_time': (
239
+ str(self.processing_time.quantize(Decimal('0.00001')))
240
+ if self.processing_time else None
241
+ ),
242
+ 'processing_accuracy': (
243
+ str(self.processing_accuracy.quantize(Decimal('0.00001')))
244
+ if self.processing_accuracy else None
245
+ ),
246
+ 'issued_at': self.issued_at.isoformat() if self.issued_at else None,
247
+ 'uploaded_at': self.uploaded_at.isoformat() if self.uploaded_at else None,
248
+ 'enqueued_at': self.enqueued_at.isoformat() if self.enqueued_at else None,
249
+ 'started_at': self.started_at.isoformat() if self.started_at else None,
250
+ 'failed_at': self.failed_at.isoformat() if self.failed_at else None,
251
+ 'failed_reason': self.failed_reason,
252
+ 'feedback': self.feedback,
253
+ 'metadata': self.metadata,
254
+ 'completed_at': self.completed_at.isoformat() if self.completed_at else None,
255
+ 'metadata_items': [metadata.to_dict for metadata in self.metadata_items],
256
+ }
257
+
258
+ @property
259
+ def to_simple_dict(self) -> dict:
260
+ simple_dict = self.to_dict.copy()
261
+ simple_dict.pop('metadata_items')
262
+ return simple_dict
263
+
264
+ def overload(
265
+ self,
266
+ new_instance: 'DocumentProcessing',
267
+ properties: List[str] = None,
268
+ ):
269
+ instance_properties = properties or [
270
+ 'status',
271
+ 'metadata',
272
+ 'file_path',
273
+ 'file_bytes',
274
+ 'category',
275
+ 'sub_category',
276
+ 'uploaded_from',
277
+ 'processed_csv_path',
278
+ 'processed_csv_bytes',
279
+ 'processed_xlsx_path',
280
+ 'processed_xlsx_bytes',
281
+ 'processed_json_path',
282
+ 'processed_json_bytes',
283
+ 'processed_metadata_path',
284
+ 'processed_metadata_bytes',
285
+ 'processing_time',
286
+ 'processing_accuracy',
287
+ 'issued_at',
288
+ 'uploaded_at',
289
+ 'enqueued_at',
290
+ 'started_at',
291
+ 'failed_at',
292
+ 'failed_reason',
293
+ 'feedback',
294
+ 'metadata',
295
+ 'completed_at',
296
+ ]
297
+ for _property in instance_properties:
298
+ property_value = getattr(new_instance, _property)
299
+ if not hasattr(self, _property):
300
+ continue
301
+ setattr(self, _property, property_value)
302
+ return self
303
+
304
+ @classmethod
305
+ def from_dict(cls, data: dict) -> 'DocumentProcessing':
306
+ return cls(
307
+ digest=data.get('digest'),
308
+ status=DocumentProcessingStatus.from_value(data.get('status')),
309
+ file_path=data.get('file_path'),
310
+ category=(
311
+ DocumentProcessingCategory.from_value(data.get('category'))
312
+ if data.get('category') else None
313
+ ),
314
+ sub_category=(
315
+ DocumentProcessingSubCategory.from_value(data.get('sub_category'))
316
+ if data.get('sub_category') else None
317
+ ),
318
+ uploaded_from=(
319
+ DocumentProcessingSource.from_value(data.get('uploaded_from'))
320
+ if data.get('uploaded_from') else None
321
+ ),
322
+ processed_csv_path=data.get('processed_csv_path'),
323
+ processed_xlsx_path=data.get('processed_xlsx_path'),
324
+ processed_json_path=data.get('processed_json_path'),
325
+ processed_metadata_path=data.get('processed_metadata_path'),
326
+ processing_time=(
327
+ Decimal(data.get('processing_time'))
328
+ if data.get('processing_time') else None
329
+ ),
330
+ processing_accuracy=(
331
+ Decimal(data.get('processing_accuracy'))
332
+ if data.get('processing_accuracy') else None
333
+ ),
334
+ issued_at=get_datetime_from_data(input_datetime=data.get('issued_at')),
335
+ uploaded_at=get_datetime_from_data(input_datetime=data.get('uploaded_at')),
336
+ enqueued_at=get_datetime_from_data(input_datetime=data.get('enqueued_at')),
337
+ started_at=get_datetime_from_data(input_datetime=data.get('started_at')),
338
+ failed_at=get_datetime_from_data(input_datetime=data.get('failed_at')),
339
+ failed_reason=data.get('failed_reason'),
340
+ feedback=data.get('feedback'),
341
+ metadata=data.get('metadata', {}),
342
+ completed_at=get_datetime_from_data(input_datetime=data.get('completed_at')),
343
+ metadata_items=[
344
+ DocumentProcessingMetadata.from_dict(metadata)
345
+ for metadata in data.get('metadata_items', [])
346
+ ],
347
+ )
348
+