documente_shared 0.1.72__py3-none-any.whl → 0.1.72b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of documente_shared might be problematic. Click here for more details.

Files changed (41) hide show
  1. documente_shared/__init__.py +0 -0
  2. documente_shared/application/__init__.py +0 -0
  3. documente_shared/application/digest.py +7 -7
  4. documente_shared/application/exceptions.py +23 -23
  5. documente_shared/application/files.py +22 -22
  6. documente_shared/application/time_utils.py +13 -13
  7. documente_shared/application/timezone.py +7 -7
  8. documente_shared/domain/__init__.py +0 -0
  9. documente_shared/domain/base_enum.py +53 -53
  10. documente_shared/domain/constants.py +2 -2
  11. documente_shared/domain/entities/__init__.py +0 -0
  12. documente_shared/domain/entities/document.py +348 -348
  13. documente_shared/domain/entities/document_metadata.py +63 -63
  14. documente_shared/domain/entities/in_memory_result.py +51 -51
  15. documente_shared/domain/entities/processing_case.py +144 -144
  16. documente_shared/domain/entities/processing_case_item.py +216 -216
  17. documente_shared/domain/entities/processing_event.py +49 -49
  18. documente_shared/domain/enums/__init__.py +0 -0
  19. documente_shared/domain/enums/common.py +95 -95
  20. documente_shared/domain/enums/document.py +71 -71
  21. documente_shared/domain/enums/processing_case.py +54 -54
  22. documente_shared/domain/repositories/__init__.py +0 -0
  23. documente_shared/domain/repositories/document.py +24 -24
  24. documente_shared/domain/repositories/processing_case.py +24 -24
  25. documente_shared/domain/repositories/processing_case_item.py +29 -29
  26. documente_shared/infrastructure/__init__.py +0 -0
  27. documente_shared/infrastructure/documente_client.py +21 -0
  28. documente_shared/infrastructure/dynamo_table.py +75 -75
  29. documente_shared/infrastructure/repositories/__init__.py +0 -0
  30. documente_shared/infrastructure/repositories/dynamo_document.py +43 -43
  31. documente_shared/infrastructure/repositories/dynamo_processing_case.py +43 -43
  32. documente_shared/infrastructure/repositories/dynamo_processing_case_item.py +53 -53
  33. documente_shared/infrastructure/repositories/http_document_processing.py +41 -0
  34. documente_shared/infrastructure/repositories/http_processing_case.py +41 -0
  35. documente_shared/infrastructure/repositories/http_processing_case_item.py +53 -0
  36. documente_shared/infrastructure/s3_bucket.py +57 -57
  37. documente_shared/infrastructure/sqs_queue.py +47 -47
  38. {documente_shared-0.1.72.dist-info → documente_shared-0.1.72b0.dist-info}/METADATA +2 -1
  39. documente_shared-0.1.72b0.dist-info/RECORD +40 -0
  40. documente_shared-0.1.72.dist-info/RECORD +0 -36
  41. {documente_shared-0.1.72.dist-info → documente_shared-0.1.72b0.dist-info}/WHEEL +0 -0
@@ -1,216 +1,216 @@
1
- from dataclasses import dataclass
2
- from datetime import datetime, tzinfo
3
- from decimal import Decimal
4
- from typing import Optional, List
5
-
6
- from documente_shared.application.time_utils import get_datetime_from_data
7
- from documente_shared.domain.constants import la_paz_tz
8
- from documente_shared.domain.entities.in_memory_result import InMemoryDocument
9
- from documente_shared.domain.enums.common import ProcessingStatus, ProcessingSource
10
- from documente_shared.domain.enums.processing_case import ProcessingDocumentType
11
-
12
-
13
- @dataclass
14
- class ProcessingCaseItem(object):
15
- uuid: str
16
- case_id: str
17
- digest: str
18
- status: ProcessingStatus
19
- document_type: ProcessingDocumentType
20
- document: InMemoryDocument
21
- uploaded_from: Optional[ProcessingSource] = None
22
- processed_csv: Optional[InMemoryDocument] = None
23
- processed_xlsx: Optional[InMemoryDocument] = None
24
- processed_json: Optional[InMemoryDocument] = None
25
- processing_time: Optional[Decimal] = None
26
- processing_confidence: Optional[Decimal] = None
27
- uploaded_at: Optional[datetime] = None
28
- started_at: Optional[datetime] = None
29
- failed_at: Optional[datetime] = None
30
- completed_at: Optional[datetime] = None
31
- feedback: Optional[list | dict] = None
32
- metadata: Optional[dict] = None
33
-
34
- def __post_init__(self):
35
- self.feedback = self.feedback or []
36
- self.metadata = self.metadata or {}
37
-
38
- @property
39
- def is_procesable(self) -> bool:
40
- return (
41
- (self.status.is_pending or self.status.is_enqueued)
42
- and self.digest
43
- and self.document
44
- and self.document.is_procesable
45
- )
46
-
47
- @property
48
- def is_finished(self) -> bool:
49
- return self.status in [
50
- ProcessingStatus.COMPLETED,
51
- ProcessingStatus.FAILED,
52
- ]
53
-
54
- def pending(self, timezone: tzinfo = la_paz_tz):
55
- self.status = ProcessingStatus.PENDING
56
- self.started_at = None
57
-
58
- def processing(self, timezone: tzinfo = la_paz_tz):
59
- self.status = ProcessingStatus.PROCESSING
60
- self.started_at = datetime.now(tz=timezone)
61
-
62
- def failed(
63
- self,
64
- error_message: Optional[str] = None,
65
- timezone: tzinfo = la_paz_tz,
66
- ):
67
- self.status = ProcessingStatus.FAILED
68
- self.failed_at = datetime.now(tz=timezone)
69
-
70
- def completed(self, timezone: tzinfo = la_paz_tz):
71
- self.status = ProcessingStatus.COMPLETED
72
- self.completed_at = datetime.now(tz=timezone)
73
-
74
- def incomplete(self, timezone: tzinfo = la_paz_tz):
75
- self.status = ProcessingStatus.INCOMPLETE
76
- self.completed_at = datetime.now(tz=timezone)
77
-
78
- def deleted(self):
79
- self.status = ProcessingStatus.DELETED
80
-
81
- def in_review(self):
82
- self.status = ProcessingStatus.IN_REVIEW
83
-
84
- def __eq__(self, other: 'ProcessingCaseItem') -> bool:
85
- if not other:
86
- return False
87
-
88
- return (
89
- self.uuid == other.uuid
90
- and self.digest == other.digest
91
- and self.status == other.status
92
- and self.document_type == other.document_type
93
- and self.document == other.document
94
- and self.processing_time == other.processing_time
95
- and self.processing_confidence == other.processing_confidence
96
- and self.uploaded_at == other.uploaded_at
97
- and self.started_at == other.started_at
98
- and self.failed_at == other.failed_at
99
- and self.completed_at == other.completed_at
100
- )
101
-
102
- @property
103
- def to_dict(self) -> dict:
104
- return {
105
- 'uuid': self.uuid,
106
- 'case_id': self.case_id,
107
- 'digest': self.digest,
108
- 'status': str(self.status),
109
- 'document': self.document.to_dict,
110
- 'document_type': str(self.document_type),
111
- 'uploaded_from': (
112
- str(self.uploaded_from)
113
- if self.uploaded_from else None
114
- ),
115
- 'processed_csv': (
116
- self.processed_csv.to_dict
117
- if self.processed_csv else None
118
- ),
119
- 'processed_xlsx': (
120
- self.processed_xlsx.to_dict
121
- if self.processed_xlsx else None
122
- ),
123
- 'processed_json': (
124
- self.processed_json.to_dict
125
- if self.processed_json else None
126
- ),
127
- 'processing_time': (
128
- str(self.processing_time.quantize(Decimal('0.001')))
129
- if self.processing_time else None
130
- ),
131
- 'processing_confidence': (
132
- str(self.processing_confidence.quantize(Decimal('0.001')))
133
- if self.processing_confidence else None
134
- ),
135
- 'uploaded_at': self.uploaded_at.isoformat() if self.uploaded_at else None,
136
- 'started_at': self.started_at.isoformat() if self.started_at else None,
137
- 'failed_at': self.failed_at.isoformat() if self.failed_at else None,
138
- 'feedback': self.feedback,
139
- 'metadata': self.metadata,
140
- 'completed_at': self.completed_at.isoformat() if self.completed_at else None,
141
- }
142
-
143
- @property
144
- def to_simple_dict(self) -> dict:
145
- simple_dict = self.to_dict.copy()
146
- return simple_dict
147
-
148
- def overload(
149
- self,
150
- new_instance: 'ProcessingCaseItem',
151
- properties: List[str] = None,
152
- ):
153
- instance_properties = properties or [
154
- 'status',
155
- 'document_type',
156
- 'document',
157
- 'uploaded_from',
158
- 'processed_csv',
159
- 'processed_xlsx',
160
- 'processed_json',
161
- 'processing_time',
162
- 'processing_confidence',
163
- 'uploaded_at',
164
- 'started_at',
165
- 'failed_at',
166
- 'completed_at',
167
- 'feedback',
168
- 'metadata',
169
- ]
170
- for _property in instance_properties:
171
- property_value = getattr(new_instance, _property)
172
- if not hasattr(self, _property):
173
- continue
174
- setattr(self, _property, property_value)
175
- return self
176
-
177
- @classmethod
178
- def from_dict(cls, data: dict) -> 'ProcessingCaseItem':
179
- return cls(
180
- uuid=data.get('uuid'),
181
- case_id=data.get('case_id'),
182
- digest=data.get('digest'),
183
- status=ProcessingStatus.from_value(data.get('status')),
184
- document=InMemoryDocument.from_dict(data.get('document')),
185
- document_type=ProcessingDocumentType.from_value(data.get('document_type')),
186
- uploaded_from=(
187
- ProcessingSource.from_value(data.get('uploaded_from'))
188
- if data.get('uploaded_from') else None
189
- ),
190
- processed_csv=(
191
- InMemoryDocument.from_dict(data.get('processed_csv'))
192
- if data.get('processed_csv') else None
193
- ),
194
- processed_xlsx=(
195
- InMemoryDocument.from_dict(data.get('processed_xlsx'))
196
- if data.get('processed_xlsx') else None
197
- ),
198
- processed_json=(
199
- InMemoryDocument.from_dict(data.get('processed_json'))
200
- if data.get('processed_json') else None
201
- ),
202
- processing_time=(
203
- Decimal(data.get('processing_time'))
204
- if data.get('processing_time') else None
205
- ),
206
- processing_confidence=(
207
- Decimal(data.get('processing_confidence'))
208
- if data.get('processing_confidence') else None
209
- ),
210
- uploaded_at=get_datetime_from_data(input_datetime=data.get('uploaded_at')),
211
- started_at=get_datetime_from_data(input_datetime=data.get('started_at')),
212
- failed_at=get_datetime_from_data(input_datetime=data.get('failed_at')),
213
- feedback=data.get('feedback'),
214
- metadata=data.get('metadata', {}),
215
- completed_at=get_datetime_from_data(input_datetime=data.get('completed_at')),
216
- )
1
+ from dataclasses import dataclass
2
+ from datetime import datetime, tzinfo
3
+ from decimal import Decimal
4
+ from typing import Optional, List
5
+
6
+ from documente_shared.application.time_utils import get_datetime_from_data
7
+ from documente_shared.domain.constants import la_paz_tz
8
+ from documente_shared.domain.entities.in_memory_result import InMemoryDocument
9
+ from documente_shared.domain.enums.common import ProcessingStatus, ProcessingSource
10
+ from documente_shared.domain.enums.processing_case import ProcessingDocumentType
11
+
12
+
13
+ @dataclass
14
+ class ProcessingCaseItem(object):
15
+ uuid: str
16
+ case_id: str
17
+ digest: str
18
+ status: ProcessingStatus
19
+ document_type: ProcessingDocumentType
20
+ document: InMemoryDocument
21
+ uploaded_from: Optional[ProcessingSource] = None
22
+ processed_csv: Optional[InMemoryDocument] = None
23
+ processed_xlsx: Optional[InMemoryDocument] = None
24
+ processed_json: Optional[InMemoryDocument] = None
25
+ processing_time: Optional[Decimal] = None
26
+ processing_confidence: Optional[Decimal] = None
27
+ uploaded_at: Optional[datetime] = None
28
+ started_at: Optional[datetime] = None
29
+ failed_at: Optional[datetime] = None
30
+ completed_at: Optional[datetime] = None
31
+ feedback: Optional[list | dict] = None
32
+ metadata: Optional[dict] = None
33
+
34
+ def __post_init__(self):
35
+ self.feedback = self.feedback or []
36
+ self.metadata = self.metadata or {}
37
+
38
+ @property
39
+ def is_procesable(self) -> bool:
40
+ return (
41
+ (self.status.is_pending or self.status.is_enqueued)
42
+ and self.digest
43
+ and self.document
44
+ and self.document.is_procesable
45
+ )
46
+
47
+ @property
48
+ def is_finished(self) -> bool:
49
+ return self.status in [
50
+ ProcessingStatus.COMPLETED,
51
+ ProcessingStatus.FAILED,
52
+ ]
53
+
54
+ def pending(self, timezone: tzinfo = la_paz_tz):
55
+ self.status = ProcessingStatus.PENDING
56
+ self.started_at = None
57
+
58
+ def processing(self, timezone: tzinfo = la_paz_tz):
59
+ self.status = ProcessingStatus.PROCESSING
60
+ self.started_at = datetime.now(tz=timezone)
61
+
62
+ def failed(
63
+ self,
64
+ error_message: Optional[str] = None,
65
+ timezone: tzinfo = la_paz_tz,
66
+ ):
67
+ self.status = ProcessingStatus.FAILED
68
+ self.failed_at = datetime.now(tz=timezone)
69
+
70
+ def completed(self, timezone: tzinfo = la_paz_tz):
71
+ self.status = ProcessingStatus.COMPLETED
72
+ self.completed_at = datetime.now(tz=timezone)
73
+
74
+ def incomplete(self, timezone: tzinfo = la_paz_tz):
75
+ self.status = ProcessingStatus.INCOMPLETE
76
+ self.completed_at = datetime.now(tz=timezone)
77
+
78
+ def deleted(self):
79
+ self.status = ProcessingStatus.DELETED
80
+
81
+ def in_review(self):
82
+ self.status = ProcessingStatus.IN_REVIEW
83
+
84
+ def __eq__(self, other: 'ProcessingCaseItem') -> bool:
85
+ if not other:
86
+ return False
87
+
88
+ return (
89
+ self.uuid == other.uuid
90
+ and self.digest == other.digest
91
+ and self.status == other.status
92
+ and self.document_type == other.document_type
93
+ and self.document == other.document
94
+ and self.processing_time == other.processing_time
95
+ and self.processing_confidence == other.processing_confidence
96
+ and self.uploaded_at == other.uploaded_at
97
+ and self.started_at == other.started_at
98
+ and self.failed_at == other.failed_at
99
+ and self.completed_at == other.completed_at
100
+ )
101
+
102
+ @property
103
+ def to_dict(self) -> dict:
104
+ return {
105
+ 'uuid': self.uuid,
106
+ 'case_id': self.case_id,
107
+ 'digest': self.digest,
108
+ 'status': str(self.status),
109
+ 'document': self.document.to_dict,
110
+ 'document_type': str(self.document_type),
111
+ 'uploaded_from': (
112
+ str(self.uploaded_from)
113
+ if self.uploaded_from else None
114
+ ),
115
+ 'processed_csv': (
116
+ self.processed_csv.to_dict
117
+ if self.processed_csv else None
118
+ ),
119
+ 'processed_xlsx': (
120
+ self.processed_xlsx.to_dict
121
+ if self.processed_xlsx else None
122
+ ),
123
+ 'processed_json': (
124
+ self.processed_json.to_dict
125
+ if self.processed_json else None
126
+ ),
127
+ 'processing_time': (
128
+ str(self.processing_time.quantize(Decimal('0.001')))
129
+ if self.processing_time else None
130
+ ),
131
+ 'processing_confidence': (
132
+ str(self.processing_confidence.quantize(Decimal('0.001')))
133
+ if self.processing_confidence else None
134
+ ),
135
+ 'uploaded_at': self.uploaded_at.isoformat() if self.uploaded_at else None,
136
+ 'started_at': self.started_at.isoformat() if self.started_at else None,
137
+ 'failed_at': self.failed_at.isoformat() if self.failed_at else None,
138
+ 'feedback': self.feedback,
139
+ 'metadata': self.metadata,
140
+ 'completed_at': self.completed_at.isoformat() if self.completed_at else None,
141
+ }
142
+
143
+ @property
144
+ def to_simple_dict(self) -> dict:
145
+ simple_dict = self.to_dict.copy()
146
+ return simple_dict
147
+
148
+ def overload(
149
+ self,
150
+ new_instance: 'ProcessingCaseItem',
151
+ properties: List[str] = None,
152
+ ):
153
+ instance_properties = properties or [
154
+ 'status',
155
+ 'document_type',
156
+ 'document',
157
+ 'uploaded_from',
158
+ 'processed_csv',
159
+ 'processed_xlsx',
160
+ 'processed_json',
161
+ 'processing_time',
162
+ 'processing_confidence',
163
+ 'uploaded_at',
164
+ 'started_at',
165
+ 'failed_at',
166
+ 'completed_at',
167
+ 'feedback',
168
+ 'metadata',
169
+ ]
170
+ for _property in instance_properties:
171
+ property_value = getattr(new_instance, _property)
172
+ if not hasattr(self, _property):
173
+ continue
174
+ setattr(self, _property, property_value)
175
+ return self
176
+
177
+ @classmethod
178
+ def from_dict(cls, data: dict) -> 'ProcessingCaseItem':
179
+ return cls(
180
+ uuid=data.get('uuid'),
181
+ case_id=data.get('case_id'),
182
+ digest=data.get('digest'),
183
+ status=ProcessingStatus.from_value(data.get('status')),
184
+ document=InMemoryDocument.from_dict(data.get('document')),
185
+ document_type=ProcessingDocumentType.from_value(data.get('document_type')),
186
+ uploaded_from=(
187
+ ProcessingSource.from_value(data.get('uploaded_from'))
188
+ if data.get('uploaded_from') else None
189
+ ),
190
+ processed_csv=(
191
+ InMemoryDocument.from_dict(data.get('processed_csv'))
192
+ if data.get('processed_csv') else None
193
+ ),
194
+ processed_xlsx=(
195
+ InMemoryDocument.from_dict(data.get('processed_xlsx'))
196
+ if data.get('processed_xlsx') else None
197
+ ),
198
+ processed_json=(
199
+ InMemoryDocument.from_dict(data.get('processed_json'))
200
+ if data.get('processed_json') else None
201
+ ),
202
+ processing_time=(
203
+ Decimal(data.get('processing_time'))
204
+ if data.get('processing_time') else None
205
+ ),
206
+ processing_confidence=(
207
+ Decimal(data.get('processing_confidence'))
208
+ if data.get('processing_confidence') else None
209
+ ),
210
+ uploaded_at=get_datetime_from_data(input_datetime=data.get('uploaded_at')),
211
+ started_at=get_datetime_from_data(input_datetime=data.get('started_at')),
212
+ failed_at=get_datetime_from_data(input_datetime=data.get('failed_at')),
213
+ feedback=data.get('feedback'),
214
+ metadata=data.get('metadata', {}),
215
+ completed_at=get_datetime_from_data(input_datetime=data.get('completed_at')),
216
+ )
@@ -1,49 +1,49 @@
1
- from dataclasses import dataclass
2
- from datetime import datetime
3
- from typing import Optional
4
-
5
- from documente_shared.application.time_utils import get_datetime_from_data
6
- from documente_shared.domain.entities.document import DocumentProcessing
7
- from documente_shared.domain.entities.processing_case import ProcessingCase
8
- from documente_shared.domain.enums.common import ProcessingType
9
-
10
-
11
- @dataclass
12
- class ProcessingEvent(object):
13
- processing_type: ProcessingType
14
- instance: DocumentProcessing | ProcessingCase | None
15
- timestamp: Optional[datetime] = None
16
-
17
- def __eq__(self, other: 'ProcessingEvent') -> bool:
18
- if not other:
19
- return False
20
-
21
- return (
22
- self.processing_type == other.processing_type
23
- and self.instance == other.instance
24
- )
25
-
26
- @property
27
- def to_dict(self) -> dict:
28
- return {
29
- 'processing_type': str(self.processing_type),
30
- 'instance': self.instance.to_dict,
31
- 'timestamp': self.timestamp.isoformat() if self.timestamp else None,
32
- }
33
-
34
- @classmethod
35
- def from_dict(cls, data: dict) -> 'ProcessingEvent':
36
- processing_type = ProcessingType.from_value(data.get('processing_type'))
37
-
38
- if processing_type.is_document:
39
- processing_instance = DocumentProcessing.from_dict(data.get('instance'))
40
- elif processing_type.is_processing_case:
41
- processing_instance = ProcessingCase.from_dict(data.get('instance'))
42
- else:
43
- processing_instance = None
44
-
45
- return cls(
46
- processing_type=processing_type,
47
- instance=processing_instance,
48
- timestamp=get_datetime_from_data(input_datetime=data.get('timestamp')),
49
- )
1
+ from dataclasses import dataclass
2
+ from datetime import datetime
3
+ from typing import Optional
4
+
5
+ from documente_shared.application.time_utils import get_datetime_from_data
6
+ from documente_shared.domain.entities.document import DocumentProcessing
7
+ from documente_shared.domain.entities.processing_case import ProcessingCase
8
+ from documente_shared.domain.enums.common import ProcessingType
9
+
10
+
11
+ @dataclass
12
+ class ProcessingEvent(object):
13
+ processing_type: ProcessingType
14
+ instance: DocumentProcessing | ProcessingCase | None
15
+ timestamp: Optional[datetime] = None
16
+
17
+ def __eq__(self, other: 'ProcessingEvent') -> bool:
18
+ if not other:
19
+ return False
20
+
21
+ return (
22
+ self.processing_type == other.processing_type
23
+ and self.instance == other.instance
24
+ )
25
+
26
+ @property
27
+ def to_dict(self) -> dict:
28
+ return {
29
+ 'processing_type': str(self.processing_type),
30
+ 'instance': self.instance.to_dict,
31
+ 'timestamp': self.timestamp.isoformat() if self.timestamp else None,
32
+ }
33
+
34
+ @classmethod
35
+ def from_dict(cls, data: dict) -> 'ProcessingEvent':
36
+ processing_type = ProcessingType.from_value(data.get('processing_type'))
37
+
38
+ if processing_type.is_document:
39
+ processing_instance = DocumentProcessing.from_dict(data.get('instance'))
40
+ elif processing_type.is_processing_case:
41
+ processing_instance = ProcessingCase.from_dict(data.get('instance'))
42
+ else:
43
+ processing_instance = None
44
+
45
+ return cls(
46
+ processing_type=processing_type,
47
+ instance=processing_instance,
48
+ timestamp=get_datetime_from_data(input_datetime=data.get('timestamp')),
49
+ )
File without changes