documente_shared 0.1.26__py3-none-any.whl → 0.1.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of documente_shared might be problematic. Click here for more details.
- documente_shared/domain/entities/__init__.py +0 -0
- documente_shared/domain/{entities.py → entities/document_process.py} +11 -1
- documente_shared/domain/entities/document_process_metadata.py +65 -0
- {documente_shared-0.1.26.dist-info → documente_shared-0.1.28.dist-info}/METADATA +1 -1
- {documente_shared-0.1.26.dist-info → documente_shared-0.1.28.dist-info}/RECORD +6 -4
- {documente_shared-0.1.26.dist-info → documente_shared-0.1.28.dist-info}/WHEEL +0 -0
|
File without changes
|
|
@@ -4,6 +4,7 @@ from decimal import Decimal
|
|
|
4
4
|
from typing import Optional, List
|
|
5
5
|
|
|
6
6
|
from documente_shared.application.time_utils import get_datetime_from_data
|
|
7
|
+
from documente_shared.domain.entities.document_process_metadata import DocumentProcessMetadata
|
|
7
8
|
from documente_shared.domain.enums import (
|
|
8
9
|
DocumentProcessStatus,
|
|
9
10
|
DocumentProcessSubCategory,
|
|
@@ -33,6 +34,10 @@ class DocumentProcess(object):
|
|
|
33
34
|
started_at: Optional[datetime] = None
|
|
34
35
|
failed_at: Optional[datetime] = None
|
|
35
36
|
completed_at: Optional[datetime] = None
|
|
37
|
+
metadata_items: Optional[List[DocumentProcessMetadata]] = None
|
|
38
|
+
|
|
39
|
+
def __post_init__(self):
|
|
40
|
+
self.metadata_items = self.metadata_items or []
|
|
36
41
|
|
|
37
42
|
@property
|
|
38
43
|
def is_pending(self) -> bool:
|
|
@@ -127,6 +132,7 @@ class DocumentProcess(object):
|
|
|
127
132
|
'started_at': self.started_at.isoformat() if self.started_at else None,
|
|
128
133
|
'failed_at': self.failed_at.isoformat() if self.failed_at else None,
|
|
129
134
|
'completed_at': self.completed_at.isoformat() if self.completed_at else None,
|
|
135
|
+
'metadata_items': [metadata.to_dict for metadata in self.metadata_items],
|
|
130
136
|
}
|
|
131
137
|
|
|
132
138
|
def overload(
|
|
@@ -135,7 +141,7 @@ class DocumentProcess(object):
|
|
|
135
141
|
properties: List[str] = None,
|
|
136
142
|
):
|
|
137
143
|
instance_properties = properties or [
|
|
138
|
-
'status', 'file_path', 'file_bytes', 'category', 'sub_category',
|
|
144
|
+
'status', 'metadata', 'file_path', 'file_bytes', 'category', 'sub_category',
|
|
139
145
|
'processed_csv_path', 'processed_csv_bytes', 'processed_xlsx_path',
|
|
140
146
|
'processed_xlsx_bytes', 'processing_time', 'uploaded_at',
|
|
141
147
|
'enqueued_at', 'started_at', 'failed_at', 'completed_at',
|
|
@@ -172,4 +178,8 @@ class DocumentProcess(object):
|
|
|
172
178
|
started_at=get_datetime_from_data(input_datetime=data.get('started_at')),
|
|
173
179
|
failed_at=get_datetime_from_data(input_datetime=data.get('failed_at')),
|
|
174
180
|
completed_at=get_datetime_from_data(input_datetime=data.get('completed_at')),
|
|
181
|
+
metadata_items=[
|
|
182
|
+
DocumentProcessMetadata.from_dict(metadata)
|
|
183
|
+
for metadata in data.get('metadata_items', [])
|
|
184
|
+
],
|
|
175
185
|
)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class DocumentProcessMetadata(object):
|
|
8
|
+
publication_date: Optional[datetime] = None
|
|
9
|
+
num_circular: Optional[str] = None
|
|
10
|
+
asfi_identifier: Optional[str] = None
|
|
11
|
+
contains_tables: Optional[bool] = None
|
|
12
|
+
text_content: Optional[str] = None
|
|
13
|
+
case_name: Optional[str] = None
|
|
14
|
+
starting_office: Optional[str] = None
|
|
15
|
+
output_json: Optional[dict] = None
|
|
16
|
+
processing_time: Optional[float] = None
|
|
17
|
+
llm_model: Optional[str] = None
|
|
18
|
+
num_pages: Optional[float] = None
|
|
19
|
+
num_tokens: Optional[float] = None
|
|
20
|
+
citcular_type: Optional[str] = None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def to_dict(self):
|
|
25
|
+
return {
|
|
26
|
+
'publication_date': (
|
|
27
|
+
self.publication_date.isoformat()
|
|
28
|
+
if self.publication_date
|
|
29
|
+
else None
|
|
30
|
+
),
|
|
31
|
+
'num_circular': self.num_circular,
|
|
32
|
+
'asfi_identifier': self.asfi_identifier,
|
|
33
|
+
'contains_tables': self.contains_tables,
|
|
34
|
+
'text_content': self.text_content,
|
|
35
|
+
'case_name': self.case_name,
|
|
36
|
+
'starting_office': self.starting_office,
|
|
37
|
+
'output_json': self.output_json,
|
|
38
|
+
'processing_time': self.processing_time,
|
|
39
|
+
'llm_model': self.llm_model,
|
|
40
|
+
'num_pages': self.num_pages,
|
|
41
|
+
'num_tokens': self.num_tokens,
|
|
42
|
+
'citcular_type': self.citcular_type
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def from_dict(cls, data: dict):
|
|
47
|
+
return cls(
|
|
48
|
+
publication_date=(
|
|
49
|
+
datetime.fromisoformat(data.get('publication_date'))
|
|
50
|
+
if data.get('publication_date')
|
|
51
|
+
else None
|
|
52
|
+
),
|
|
53
|
+
num_circular=data.get('num_circular'),
|
|
54
|
+
asfi_identifier=data.get('asfi_identifier'),
|
|
55
|
+
contains_tables=data.get('contains_tables'),
|
|
56
|
+
text_content=data.get('text_content'),
|
|
57
|
+
case_name=data.get('case_name'),
|
|
58
|
+
starting_office=data.get('starting_office'),
|
|
59
|
+
output_json=data.get('output_json'),
|
|
60
|
+
processing_time=data.get('processing_time'),
|
|
61
|
+
llm_model=data.get('llm_model'),
|
|
62
|
+
num_pages=data.get('num_pages'),
|
|
63
|
+
num_tokens=data.get('num_tokens'),
|
|
64
|
+
citcular_type=data.get('citcular_type')
|
|
65
|
+
)
|
|
@@ -4,7 +4,9 @@ documente_shared/application/digest.py,sha256=Um6E8WfFri2_lly4RFWydJyvSfPZGFcOX-
|
|
|
4
4
|
documente_shared/application/time_utils.py,sha256=XDH27cKgoTFO8ad1JgrxKaeT7sZ1fduuJqLkvHUjy-Q,309
|
|
5
5
|
documente_shared/domain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
documente_shared/domain/base_enum.py,sha256=DojAfn-zQdtjtImeHUpBzE6TBTm07XrbMOdW3h8RVd8,1449
|
|
7
|
-
documente_shared/domain/entities.py,sha256=
|
|
7
|
+
documente_shared/domain/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
documente_shared/domain/entities/document_process.py,sha256=ioWAyXvz-ORgkaZZzPsBibSOM6Kpo_ScIhQInNjHf7M,6735
|
|
9
|
+
documente_shared/domain/entities/document_process_metadata.py,sha256=rBHkDkoKwrxReKtXIScU1vrCO_6bg2LwWhrtXMcQ8TA,2351
|
|
8
10
|
documente_shared/domain/enums.py,sha256=s3bFDkpplWvJWNpUwPOkC9a3OYUYsx8uTusN_FrzNQk,463
|
|
9
11
|
documente_shared/domain/repositories.py,sha256=EekvB2BE4AnT5myEhU3DfwHfF9MkNJZRbqRVGFxCEWM,509
|
|
10
12
|
documente_shared/infrastructure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -12,6 +14,6 @@ documente_shared/infrastructure/dynamo_repositories.py,sha256=WULK8_N7PjA397vXTZ
|
|
|
12
14
|
documente_shared/infrastructure/dynamo_table.py,sha256=Lod8vwUCGX65SNH_8hvlpG6rRUYb4jfZqoFHD1YloAA,2096
|
|
13
15
|
documente_shared/infrastructure/s3_bucket.py,sha256=RwcEyYPUUXQ0fFR-Zr1KfkFGGpO6qAIfZ70CWHu1V8M,1923
|
|
14
16
|
documente_shared/infrastructure/sqs_queue.py,sha256=PSiTAnjXvQ-W-9mzLpH2UjbQJTvYkMiaxNaMecF-cR4,1505
|
|
15
|
-
documente_shared-0.1.
|
|
16
|
-
documente_shared-0.1.
|
|
17
|
-
documente_shared-0.1.
|
|
17
|
+
documente_shared-0.1.28.dist-info/METADATA,sha256=2Q2xK6OmOJzCJjFWg-sJ_Cyoz0a_ZcLoq0RKptXQ4q8,640
|
|
18
|
+
documente_shared-0.1.28.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
19
|
+
documente_shared-0.1.28.dist-info/RECORD,,
|
|
File without changes
|