documente_shared 0.1.52__py3-none-any.whl → 0.1.53__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of documente_shared might be problematic. Click here for more details.

@@ -1,268 +1,278 @@
1
- from dataclasses import dataclass
2
- from datetime import datetime, tzinfo
3
- from decimal import Decimal
4
- from typing import Optional, List
5
-
6
- from documente_shared.application.time_utils import get_datetime_from_data
7
- from documente_shared.domain.constants import la_paz_tz
8
- from documente_shared.domain.entities.document_metadata import DocumentProcessingMetadata
9
- from documente_shared.domain.enums import (
10
- DocumentProcessingStatus,
11
- DocumentProcessingSubCategory,
12
- DocumentProcessingCategory,
13
- )
14
-
15
- def remove_slash_from_path(path: str) -> str:
16
- if path and path.startswith('/'):
17
- return path[1:]
18
- return path
19
-
20
- @dataclass
21
- class DocumentProcessing(object):
22
- digest: str
23
- status: DocumentProcessingStatus
24
- file_path: Optional[str] = None
25
- file_bytes: Optional[bytes] = None
26
- category: Optional[DocumentProcessingCategory] = None
27
- sub_category: Optional[DocumentProcessingSubCategory] = None
28
- processed_csv_path: Optional[str] = None
29
- processed_csv_bytes: Optional[bytes] = None
30
- processed_xlsx_path: Optional[str] = None
31
- processed_xlsx_bytes: Optional[bytes] = None
32
- processed_json_path: Optional[str] = None
33
- processed_json_bytes: Optional[bytes] = None
34
- processed_metadata_path: Optional[str] = None
35
- processing_time: Optional[Decimal] = None
36
- issued_at: Optional[datetime] = None
37
- uploaded_at: Optional[datetime] = None
38
- enqueued_at: Optional[datetime] = None
39
- started_at: Optional[datetime] = None
40
- failed_at: Optional[datetime] = None
41
- failed_reason: Optional[str] = None
42
- completed_at: Optional[datetime] = None
43
- metadata_items: Optional[List[DocumentProcessingMetadata]] = None
44
-
45
- def __post_init__(self):
46
- self.metadata_items = self.metadata_items or []
47
-
48
- @property
49
- def is_pending(self) -> bool:
50
- return self.status == DocumentProcessingStatus.PENDING
51
-
52
- @property
53
- def is_enqueued(self) -> bool:
54
- return self.status == DocumentProcessingStatus.ENQUEUED
55
-
56
- @property
57
- def is_processing(self) -> bool:
58
- return self.status == DocumentProcessingStatus.PROCESSING
59
-
60
- @property
61
- def is_completed(self) -> bool:
62
- return self.status == DocumentProcessingStatus.COMPLETED
63
-
64
- @property
65
- def is_failed(self) -> bool:
66
- return self.status == DocumentProcessingStatus.FAILED
67
-
68
- @property
69
- def is_inreview(self) -> bool:
70
- return self.status == DocumentProcessingStatus.IN_REVIEW
71
-
72
- @property
73
- def is_valid(self) -> bool:
74
- return all([
75
- self.digest,
76
- self.status,
77
- self.file_path,
78
- ])
79
-
80
- @property
81
- def is_finished(self) -> bool:
82
- return self.status in [
83
- DocumentProcessingStatus.COMPLETED,
84
- DocumentProcessingStatus.FAILED,
85
- ]
86
-
87
- def enqueue(self, timezone: tzinfo = la_paz_tz):
88
- self.status = DocumentProcessingStatus.ENQUEUED
89
- self.enqueued_at = datetime.now(tz=timezone)
90
-
91
- def processing(self, timezone: tzinfo = la_paz_tz):
92
- self.status = DocumentProcessingStatus.PROCESSING
93
- self.started_at = datetime.now(tz=timezone)
94
-
95
- def failed(
96
- self,
97
- error_message: Optional[str] = None,
98
- timezone: tzinfo = la_paz_tz,
99
- ):
100
- self.failed_reason = error_message
101
- self.status = DocumentProcessingStatus.FAILED
102
- self.failed_at = datetime.now(tz=timezone)
103
-
104
- def completed(self, timezone: tzinfo = la_paz_tz):
105
- self.status = DocumentProcessingStatus.COMPLETED
106
- self.completed_at = datetime.now(tz=timezone)
107
-
108
- def deleted(self):
109
- self.status = DocumentProcessingStatus.DELETED
110
-
111
- def in_review(self):
112
- self.status = DocumentProcessingStatus.IN_REVIEW
113
-
114
- @property
115
- def file_key(self) -> str:
116
- return remove_slash_from_path(self.file_path)
117
-
118
- @property
119
- def processed_csv_key(self) -> str:
120
- return remove_slash_from_path(self.processed_csv_path)
121
-
122
- @property
123
- def processed_xlsx_key(self) -> str:
124
- return remove_slash_from_path(self.processed_xlsx_path)
125
-
126
- @property
127
- def processed_json_key(self) -> str:
128
- return remove_slash_from_path(self.processed_json_path)
129
-
130
- @property
131
- def processed_metadata_key(self) -> str:
132
- return remove_slash_from_path(self.processed_metadata_path)
133
-
134
- @property
135
- def extended_filename(self) -> str:
136
- return self.file_path.split('/')[-1]
137
-
138
- @property
139
- def filename(self) -> str:
140
- filename_with_extension = self.extended_filename
141
- return filename_with_extension.split('.')[0]
142
-
143
- def __eq__(self, other: 'DocumentProcessing') -> bool:
144
- if not other:
145
- return False
146
-
147
- return (
148
- self.digest == other.digest
149
- and self.status == other.status
150
- and self.file_path == other.file_path
151
- and self.issued_at == other.issued_at
152
- and self.uploaded_at == other.uploaded_at
153
- and self.enqueued_at == other.enqueued_at
154
- and self.started_at == other.started_at
155
- and self.failed_at == other.failed_at
156
- and self.completed_at == other.completed_at
157
- )
158
-
159
-
160
- @property
161
- def to_dict(self) -> dict:
162
- return {
163
- 'digest': self.digest,
164
- 'status': str(self.status),
165
- 'file_path': self.file_path,
166
- 'category': (
167
- str(self.category)
168
- if self.category else None
169
- ),
170
- 'sub_category': (
171
- str(self.sub_category)
172
- if self.sub_category else None
173
- ),
174
- 'processed_csv_path': self.processed_csv_path,
175
- 'processed_xlsx_path': self.processed_xlsx_path,
176
- 'processed_json_path': self.processed_json_path,
177
- 'processed_metadata_path': self.processed_metadata_path,
178
- 'processing_time': (
179
- str(self.processing_time.quantize(Decimal('0.00001')))
180
- if self.processing_time else None
181
- ),
182
- 'issued_at': self.issued_at.isoformat() if self.issued_at else None,
183
- 'uploaded_at': self.uploaded_at.isoformat() if self.uploaded_at else None,
184
- 'enqueued_at': self.enqueued_at.isoformat() if self.enqueued_at else None,
185
- 'started_at': self.started_at.isoformat() if self.started_at else None,
186
- 'failed_at': self.failed_at.isoformat() if self.failed_at else None,
187
- 'failed_reason': self.failed_reason,
188
- 'completed_at': self.completed_at.isoformat() if self.completed_at else None,
189
- 'metadata_items': [metadata.to_dict for metadata in self.metadata_items],
190
- }
191
-
192
- @property
193
- def to_simple_dict(self) -> dict:
194
- simple_dict = self.to_dict.copy()
195
- simple_dict.pop('metadata_items')
196
- return simple_dict
197
-
198
- def overload(
199
- self,
200
- new_instance: 'DocumentProcessing',
201
- properties: List[str] = None,
202
- ):
203
- instance_properties = properties or [
204
- 'status',
205
- 'metadata',
206
- 'file_path',
207
- 'file_bytes',
208
- 'category',
209
- 'sub_category',
210
- 'processed_csv_path',
211
- 'processed_csv_bytes',
212
- 'processed_xlsx_path',
213
- 'processed_xlsx_bytes',
214
- 'processed_json_path',
215
- 'processed_json_bytes',
216
- 'processed_metadata_path',
217
- 'processed_metadata_bytes',
218
- 'processing_time',
219
- 'issued_at',
220
- 'uploaded_at',
221
- 'enqueued_at',
222
- 'started_at',
223
- 'failed_at',
224
- 'failed_reason',
225
- 'completed_at',
226
- ]
227
- for _property in instance_properties:
228
- property_value = getattr(new_instance, _property)
229
- if not hasattr(self, _property):
230
- continue
231
- setattr(self, _property, property_value)
232
- return self
233
-
234
- @classmethod
235
- def from_dict(cls, data: dict) -> 'DocumentProcessing':
236
- return cls(
237
- digest=data.get('digest'),
238
- status=DocumentProcessingStatus.from_value(data.get('status')),
239
- file_path=data.get('file_path'),
240
- category=(
241
- DocumentProcessingCategory.from_value(data.get('category'))
242
- if data.get('category') else None
243
- ),
244
- sub_category=(
245
- DocumentProcessingSubCategory.from_value(data.get('sub_category'))
246
- if data.get('sub_category') else None
247
- ),
248
- processed_csv_path=data.get('processed_csv_path'),
249
- processed_xlsx_path=data.get('processed_xlsx_path'),
250
- processed_json_path=data.get('processed_json_path'),
251
- processed_metadata_path=data.get('processed_metadata_path'),
252
- processing_time=(
253
- Decimal(data.get('processing_time'))
254
- if data.get('processing_time') else None
255
- ),
256
- issued_at=get_datetime_from_data(input_datetime=data.get('issued_at')),
257
- uploaded_at=get_datetime_from_data(input_datetime=data.get('uploaded_at')),
258
- enqueued_at=get_datetime_from_data(input_datetime=data.get('enqueued_at')),
259
- started_at=get_datetime_from_data(input_datetime=data.get('started_at')),
260
- failed_at=get_datetime_from_data(input_datetime=data.get('failed_at')),
261
- failed_reason=data.get('failed_reason'),
262
- completed_at=get_datetime_from_data(input_datetime=data.get('completed_at')),
263
- metadata_items=[
264
- DocumentProcessingMetadata.from_dict(metadata)
265
- for metadata in data.get('metadata_items', [])
266
- ],
267
- )
268
-
1
+ from dataclasses import dataclass
2
+ from datetime import datetime, tzinfo
3
+ from decimal import Decimal
4
+ from typing import Optional, List
5
+
6
+ from documente_shared.application.time_utils import get_datetime_from_data
7
+ from documente_shared.domain.constants import la_paz_tz
8
+ from documente_shared.domain.entities.document_metadata import DocumentProcessingMetadata
9
+ from documente_shared.domain.enums import (
10
+ DocumentProcessingStatus,
11
+ DocumentProcessingSubCategory,
12
+ DocumentProcessingCategory,
13
+ )
14
+
15
+ def remove_slash_from_path(path: str) -> str:
16
+ if path and path.startswith('/'):
17
+ return path[1:]
18
+ return path
19
+
20
+ @dataclass
21
+ class DocumentProcessing(object):
22
+ digest: str
23
+ status: DocumentProcessingStatus
24
+ file_path: Optional[str] = None
25
+ file_bytes: Optional[bytes] = None
26
+ category: Optional[DocumentProcessingCategory] = None
27
+ sub_category: Optional[DocumentProcessingSubCategory] = None
28
+ processed_csv_path: Optional[str] = None
29
+ processed_csv_bytes: Optional[bytes] = None
30
+ processed_xlsx_path: Optional[str] = None
31
+ processed_xlsx_bytes: Optional[bytes] = None
32
+ processed_json_path: Optional[str] = None
33
+ processed_json_bytes: Optional[bytes] = None
34
+ processed_metadata_path: Optional[str] = None
35
+ processing_time: Optional[Decimal] = None
36
+ processing_accuracy: Optional[Decimal] = None
37
+ issued_at: Optional[datetime] = None
38
+ uploaded_at: Optional[datetime] = None
39
+ enqueued_at: Optional[datetime] = None
40
+ started_at: Optional[datetime] = None
41
+ failed_at: Optional[datetime] = None
42
+ failed_reason: Optional[str] = None
43
+ completed_at: Optional[datetime] = None
44
+ metadata_items: Optional[List[DocumentProcessingMetadata]] = None
45
+
46
+ def __post_init__(self):
47
+ self.metadata_items = self.metadata_items or []
48
+
49
+ @property
50
+ def is_pending(self) -> bool:
51
+ return self.status == DocumentProcessingStatus.PENDING
52
+
53
+ @property
54
+ def is_enqueued(self) -> bool:
55
+ return self.status == DocumentProcessingStatus.ENQUEUED
56
+
57
+ @property
58
+ def is_processing(self) -> bool:
59
+ return self.status == DocumentProcessingStatus.PROCESSING
60
+
61
+ @property
62
+ def is_completed(self) -> bool:
63
+ return self.status == DocumentProcessingStatus.COMPLETED
64
+
65
+ @property
66
+ def is_failed(self) -> bool:
67
+ return self.status == DocumentProcessingStatus.FAILED
68
+
69
+ @property
70
+ def is_inreview(self) -> bool:
71
+ return self.status == DocumentProcessingStatus.IN_REVIEW
72
+
73
+ @property
74
+ def is_valid(self) -> bool:
75
+ return all([
76
+ self.digest,
77
+ self.status,
78
+ self.file_path,
79
+ ])
80
+
81
+ @property
82
+ def is_finished(self) -> bool:
83
+ return self.status in [
84
+ DocumentProcessingStatus.COMPLETED,
85
+ DocumentProcessingStatus.FAILED,
86
+ ]
87
+
88
+ def enqueue(self, timezone: tzinfo = la_paz_tz):
89
+ self.status = DocumentProcessingStatus.ENQUEUED
90
+ self.enqueued_at = datetime.now(tz=timezone)
91
+
92
+ def processing(self, timezone: tzinfo = la_paz_tz):
93
+ self.status = DocumentProcessingStatus.PROCESSING
94
+ self.started_at = datetime.now(tz=timezone)
95
+
96
+ def failed(
97
+ self,
98
+ error_message: Optional[str] = None,
99
+ timezone: tzinfo = la_paz_tz,
100
+ ):
101
+ self.failed_reason = error_message
102
+ self.status = DocumentProcessingStatus.FAILED
103
+ self.failed_at = datetime.now(tz=timezone)
104
+
105
+ def completed(self, timezone: tzinfo = la_paz_tz):
106
+ self.status = DocumentProcessingStatus.COMPLETED
107
+ self.completed_at = datetime.now(tz=timezone)
108
+
109
+ def deleted(self):
110
+ self.status = DocumentProcessingStatus.DELETED
111
+
112
+ def in_review(self):
113
+ self.status = DocumentProcessingStatus.IN_REVIEW
114
+
115
+ @property
116
+ def file_key(self) -> str:
117
+ return remove_slash_from_path(self.file_path)
118
+
119
+ @property
120
+ def processed_csv_key(self) -> str:
121
+ return remove_slash_from_path(self.processed_csv_path)
122
+
123
+ @property
124
+ def processed_xlsx_key(self) -> str:
125
+ return remove_slash_from_path(self.processed_xlsx_path)
126
+
127
+ @property
128
+ def processed_json_key(self) -> str:
129
+ return remove_slash_from_path(self.processed_json_path)
130
+
131
+ @property
132
+ def processed_metadata_key(self) -> str:
133
+ return remove_slash_from_path(self.processed_metadata_path)
134
+
135
+ @property
136
+ def extended_filename(self) -> str:
137
+ return self.file_path.split('/')[-1]
138
+
139
+ @property
140
+ def filename(self) -> str:
141
+ filename_with_extension = self.extended_filename
142
+ return filename_with_extension.split('.')[0]
143
+
144
+ def __eq__(self, other: 'DocumentProcessing') -> bool:
145
+ if not other:
146
+ return False
147
+
148
+ return (
149
+ self.digest == other.digest
150
+ and self.status == other.status
151
+ and self.file_path == other.file_path
152
+ and self.issued_at == other.issued_at
153
+ and self.uploaded_at == other.uploaded_at
154
+ and self.enqueued_at == other.enqueued_at
155
+ and self.started_at == other.started_at
156
+ and self.failed_at == other.failed_at
157
+ and self.completed_at == other.completed_at
158
+ )
159
+
160
+
161
+ @property
162
+ def to_dict(self) -> dict:
163
+ return {
164
+ 'digest': self.digest,
165
+ 'status': str(self.status),
166
+ 'file_path': self.file_path,
167
+ 'category': (
168
+ str(self.category)
169
+ if self.category else None
170
+ ),
171
+ 'sub_category': (
172
+ str(self.sub_category)
173
+ if self.sub_category else None
174
+ ),
175
+ 'processed_csv_path': self.processed_csv_path,
176
+ 'processed_xlsx_path': self.processed_xlsx_path,
177
+ 'processed_json_path': self.processed_json_path,
178
+ 'processed_metadata_path': self.processed_metadata_path,
179
+ 'processing_time': (
180
+ str(self.processing_time.quantize(Decimal('0.00001')))
181
+ if self.processing_time else None
182
+ ),
183
+ 'processing_accuracy': (
184
+ str(self.processing_accuracy.quantize(Decimal('0.00001')))
185
+ if self.processing_accuracy else None
186
+ ),
187
+ 'issued_at': self.issued_at.isoformat() if self.issued_at else None,
188
+ 'uploaded_at': self.uploaded_at.isoformat() if self.uploaded_at else None,
189
+ 'enqueued_at': self.enqueued_at.isoformat() if self.enqueued_at else None,
190
+ 'started_at': self.started_at.isoformat() if self.started_at else None,
191
+ 'failed_at': self.failed_at.isoformat() if self.failed_at else None,
192
+ 'failed_reason': self.failed_reason,
193
+ 'completed_at': self.completed_at.isoformat() if self.completed_at else None,
194
+ 'metadata_items': [metadata.to_dict for metadata in self.metadata_items],
195
+ }
196
+
197
+ @property
198
+ def to_simple_dict(self) -> dict:
199
+ simple_dict = self.to_dict.copy()
200
+ simple_dict.pop('metadata_items')
201
+ return simple_dict
202
+
203
+ def overload(
204
+ self,
205
+ new_instance: 'DocumentProcessing',
206
+ properties: List[str] = None,
207
+ ):
208
+ instance_properties = properties or [
209
+ 'status',
210
+ 'metadata',
211
+ 'file_path',
212
+ 'file_bytes',
213
+ 'category',
214
+ 'sub_category',
215
+ 'processed_csv_path',
216
+ 'processed_csv_bytes',
217
+ 'processed_xlsx_path',
218
+ 'processed_xlsx_bytes',
219
+ 'processed_json_path',
220
+ 'processed_json_bytes',
221
+ 'processed_metadata_path',
222
+ 'processed_metadata_bytes',
223
+ 'processing_time',
224
+ 'processing_accuracy',
225
+ 'issued_at',
226
+ 'uploaded_at',
227
+ 'enqueued_at',
228
+ 'started_at',
229
+ 'failed_at',
230
+ 'failed_reason',
231
+ 'completed_at',
232
+ ]
233
+ for _property in instance_properties:
234
+ property_value = getattr(new_instance, _property)
235
+ if not hasattr(self, _property):
236
+ continue
237
+ setattr(self, _property, property_value)
238
+ return self
239
+
240
+ @classmethod
241
+ def from_dict(cls, data: dict) -> 'DocumentProcessing':
242
+ return cls(
243
+ digest=data.get('digest'),
244
+ status=DocumentProcessingStatus.from_value(data.get('status')),
245
+ file_path=data.get('file_path'),
246
+ category=(
247
+ DocumentProcessingCategory.from_value(data.get('category'))
248
+ if data.get('category') else None
249
+ ),
250
+ sub_category=(
251
+ DocumentProcessingSubCategory.from_value(data.get('sub_category'))
252
+ if data.get('sub_category') else None
253
+ ),
254
+ processed_csv_path=data.get('processed_csv_path'),
255
+ processed_xlsx_path=data.get('processed_xlsx_path'),
256
+ processed_json_path=data.get('processed_json_path'),
257
+ processed_metadata_path=data.get('processed_metadata_path'),
258
+ processing_time=(
259
+ Decimal(data.get('processing_time'))
260
+ if data.get('processing_time') else None
261
+ ),
262
+ processing_accuracy=(
263
+ Decimal(data.get('processing_accuracy'))
264
+ if data.get('processing_accuracy') else None
265
+ ),
266
+ issued_at=get_datetime_from_data(input_datetime=data.get('issued_at')),
267
+ uploaded_at=get_datetime_from_data(input_datetime=data.get('uploaded_at')),
268
+ enqueued_at=get_datetime_from_data(input_datetime=data.get('enqueued_at')),
269
+ started_at=get_datetime_from_data(input_datetime=data.get('started_at')),
270
+ failed_at=get_datetime_from_data(input_datetime=data.get('failed_at')),
271
+ failed_reason=data.get('failed_reason'),
272
+ completed_at=get_datetime_from_data(input_datetime=data.get('completed_at')),
273
+ metadata_items=[
274
+ DocumentProcessingMetadata.from_dict(metadata)
275
+ for metadata in data.get('metadata_items', [])
276
+ ],
277
+ )
278
+