documente_shared 0.1.71__py3-none-any.whl → 0.1.72b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of documente_shared might be problematic. Click here for more details.

Files changed (41) hide show
  1. documente_shared/__init__.py +0 -0
  2. documente_shared/application/__init__.py +0 -0
  3. documente_shared/application/digest.py +7 -7
  4. documente_shared/application/exceptions.py +23 -23
  5. documente_shared/application/files.py +22 -22
  6. documente_shared/application/time_utils.py +13 -13
  7. documente_shared/application/timezone.py +7 -7
  8. documente_shared/domain/__init__.py +0 -0
  9. documente_shared/domain/base_enum.py +53 -53
  10. documente_shared/domain/constants.py +2 -2
  11. documente_shared/domain/entities/__init__.py +0 -0
  12. documente_shared/domain/entities/document.py +348 -348
  13. documente_shared/domain/entities/document_metadata.py +63 -63
  14. documente_shared/domain/entities/in_memory_result.py +51 -51
  15. documente_shared/domain/entities/processing_case.py +144 -139
  16. documente_shared/domain/entities/processing_case_item.py +216 -210
  17. documente_shared/domain/entities/processing_event.py +49 -49
  18. documente_shared/domain/enums/__init__.py +0 -0
  19. documente_shared/domain/enums/common.py +95 -95
  20. documente_shared/domain/enums/document.py +71 -71
  21. documente_shared/domain/enums/processing_case.py +54 -54
  22. documente_shared/domain/repositories/__init__.py +0 -0
  23. documente_shared/domain/repositories/document.py +24 -24
  24. documente_shared/domain/repositories/processing_case.py +24 -24
  25. documente_shared/domain/repositories/processing_case_item.py +29 -29
  26. documente_shared/infrastructure/__init__.py +0 -0
  27. documente_shared/infrastructure/documente_client.py +21 -0
  28. documente_shared/infrastructure/dynamo_table.py +75 -75
  29. documente_shared/infrastructure/repositories/__init__.py +0 -0
  30. documente_shared/infrastructure/repositories/dynamo_document.py +43 -43
  31. documente_shared/infrastructure/repositories/dynamo_processing_case.py +43 -43
  32. documente_shared/infrastructure/repositories/dynamo_processing_case_item.py +53 -53
  33. documente_shared/infrastructure/repositories/http_document_processing.py +41 -0
  34. documente_shared/infrastructure/repositories/http_processing_case.py +41 -0
  35. documente_shared/infrastructure/repositories/http_processing_case_item.py +53 -0
  36. documente_shared/infrastructure/s3_bucket.py +57 -57
  37. documente_shared/infrastructure/sqs_queue.py +47 -47
  38. {documente_shared-0.1.71.dist-info → documente_shared-0.1.72b0.dist-info}/METADATA +2 -1
  39. documente_shared-0.1.72b0.dist-info/RECORD +40 -0
  40. documente_shared-0.1.71.dist-info/RECORD +0 -36
  41. {documente_shared-0.1.71.dist-info → documente_shared-0.1.72b0.dist-info}/WHEEL +0 -0
@@ -1,64 +1,64 @@
1
- from dataclasses import dataclass
2
- from datetime import datetime
3
- from typing import Optional
4
-
5
-
6
- @dataclass
7
- class DocumentProcessingMetadata(object):
8
- publication_date: Optional[datetime] = None
9
- num_circular: Optional[str] = None
10
- asfi_identifier: Optional[str] = None
11
- contains_tables: Optional[bool] = None
12
- text_content: Optional[str] = None
13
- case_name: Optional[str] = None
14
- starting_office: Optional[str] = None
15
- output_json: Optional[dict] = None
16
- processing_time: Optional[float] = None
17
- llm_model: Optional[str] = None
18
- num_pages: Optional[float] = None
19
- num_tokens: Optional[float] = None
20
- citcular_type: Optional[str] = None
21
-
22
- @property
23
- def to_dict(self):
24
- return {
25
- 'publication_date': (
26
- self.publication_date.isoformat()
27
- if self.publication_date
28
- else None
29
- ),
30
- 'num_circular': self.num_circular,
31
- 'asfi_identifier': self.asfi_identifier,
32
- 'contains_tables': self.contains_tables,
33
- 'text_content': self.text_content,
34
- 'case_name': self.case_name,
35
- 'starting_office': self.starting_office,
36
- 'output_json': self.output_json,
37
- 'processing_time': self.processing_time,
38
- 'llm_model': self.llm_model,
39
- 'num_pages': self.num_pages,
40
- 'num_tokens': self.num_tokens,
41
- 'citcular_type': self.citcular_type
42
- }
43
-
44
- @classmethod
45
- def from_dict(cls, data: dict):
46
- return cls(
47
- publication_date=(
48
- datetime.fromisoformat(data.get('publication_date'))
49
- if data.get('publication_date')
50
- else None
51
- ),
52
- num_circular=data.get('num_circular'),
53
- asfi_identifier=data.get('asfi_identifier'),
54
- contains_tables=data.get('contains_tables'),
55
- text_content=data.get('text_content'),
56
- case_name=data.get('case_name'),
57
- starting_office=data.get('starting_office'),
58
- output_json=data.get('output_json'),
59
- processing_time=data.get('processing_time'),
60
- llm_model=data.get('llm_model'),
61
- num_pages=data.get('num_pages'),
62
- num_tokens=data.get('num_tokens'),
63
- citcular_type=data.get('citcular_type')
1
+ from dataclasses import dataclass
2
+ from datetime import datetime
3
+ from typing import Optional
4
+
5
+
6
+ @dataclass
7
+ class DocumentProcessingMetadata(object):
8
+ publication_date: Optional[datetime] = None
9
+ num_circular: Optional[str] = None
10
+ asfi_identifier: Optional[str] = None
11
+ contains_tables: Optional[bool] = None
12
+ text_content: Optional[str] = None
13
+ case_name: Optional[str] = None
14
+ starting_office: Optional[str] = None
15
+ output_json: Optional[dict] = None
16
+ processing_time: Optional[float] = None
17
+ llm_model: Optional[str] = None
18
+ num_pages: Optional[float] = None
19
+ num_tokens: Optional[float] = None
20
+ citcular_type: Optional[str] = None
21
+
22
+ @property
23
+ def to_dict(self):
24
+ return {
25
+ 'publication_date': (
26
+ self.publication_date.isoformat()
27
+ if self.publication_date
28
+ else None
29
+ ),
30
+ 'num_circular': self.num_circular,
31
+ 'asfi_identifier': self.asfi_identifier,
32
+ 'contains_tables': self.contains_tables,
33
+ 'text_content': self.text_content,
34
+ 'case_name': self.case_name,
35
+ 'starting_office': self.starting_office,
36
+ 'output_json': self.output_json,
37
+ 'processing_time': self.processing_time,
38
+ 'llm_model': self.llm_model,
39
+ 'num_pages': self.num_pages,
40
+ 'num_tokens': self.num_tokens,
41
+ 'citcular_type': self.citcular_type
42
+ }
43
+
44
+ @classmethod
45
+ def from_dict(cls, data: dict):
46
+ return cls(
47
+ publication_date=(
48
+ datetime.fromisoformat(data.get('publication_date'))
49
+ if data.get('publication_date')
50
+ else None
51
+ ),
52
+ num_circular=data.get('num_circular'),
53
+ asfi_identifier=data.get('asfi_identifier'),
54
+ contains_tables=data.get('contains_tables'),
55
+ text_content=data.get('text_content'),
56
+ case_name=data.get('case_name'),
57
+ starting_office=data.get('starting_office'),
58
+ output_json=data.get('output_json'),
59
+ processing_time=data.get('processing_time'),
60
+ llm_model=data.get('llm_model'),
61
+ num_pages=data.get('num_pages'),
62
+ num_tokens=data.get('num_tokens'),
63
+ citcular_type=data.get('citcular_type')
64
64
  )
@@ -1,51 +1,51 @@
1
- from dataclasses import dataclass
2
- from typing import Optional
3
-
4
- from documente_shared.application.files import (
5
- remove_slash_from_path,
6
- get_filename_from_path,
7
- )
8
-
9
-
10
- @dataclass
11
- class InMemoryDocument(object):
12
- file_path: Optional[str] = None
13
- file_bytes: Optional[bytes] = None
14
-
15
- @property
16
- def is_valid(self) -> bool:
17
- return bool(self.file_path) and self.file_bytes
18
-
19
- @property
20
- def has_content(self) -> bool:
21
- return bool(self.file_bytes)
22
-
23
- @property
24
- def file_key(self) -> Optional[str]:
25
- if not self.file_path:
26
- return None
27
- return remove_slash_from_path(self.file_path)
28
-
29
- @property
30
- def file_name(self) -> Optional[str]:
31
- if not self.file_path:
32
- return None
33
- return get_filename_from_path(self.file_path)
34
-
35
- @property
36
- def is_procesable(self) -> bool:
37
- return self.is_valid and self.has_content
38
-
39
- @property
40
- def to_dict(self) -> dict:
41
- return {
42
- 'file_path': self.file_path,
43
- 'file_bytes': self.file_bytes,
44
- }
45
-
46
- @classmethod
47
- def from_dict(cls, data: dict):
48
- return cls(
49
- file_path=data.get('file_path'),
50
- file_bytes=data.get('file_bytes'),
51
- )
1
+ from dataclasses import dataclass
2
+ from typing import Optional
3
+
4
+ from documente_shared.application.files import (
5
+ remove_slash_from_path,
6
+ get_filename_from_path,
7
+ )
8
+
9
+
10
+ @dataclass
11
+ class InMemoryDocument(object):
12
+ file_path: Optional[str] = None
13
+ file_bytes: Optional[bytes] = None
14
+
15
+ @property
16
+ def is_valid(self) -> bool:
17
+ return bool(self.file_path) and self.file_bytes
18
+
19
+ @property
20
+ def has_content(self) -> bool:
21
+ return bool(self.file_bytes)
22
+
23
+ @property
24
+ def file_key(self) -> Optional[str]:
25
+ if not self.file_path:
26
+ return None
27
+ return remove_slash_from_path(self.file_path)
28
+
29
+ @property
30
+ def file_name(self) -> Optional[str]:
31
+ if not self.file_path:
32
+ return None
33
+ return get_filename_from_path(self.file_path)
34
+
35
+ @property
36
+ def is_procesable(self) -> bool:
37
+ return self.is_valid and self.has_content
38
+
39
+ @property
40
+ def to_dict(self) -> dict:
41
+ return {
42
+ 'file_path': self.file_path,
43
+ 'file_bytes': self.file_bytes,
44
+ }
45
+
46
+ @classmethod
47
+ def from_dict(cls, data: dict):
48
+ return cls(
49
+ file_path=data.get('file_path'),
50
+ file_bytes=data.get('file_bytes'),
51
+ )
@@ -1,139 +1,144 @@
1
- from dataclasses import dataclass
2
- from datetime import datetime, tzinfo
3
- from typing import Optional, List
4
-
5
- from documente_shared.application.time_utils import get_datetime_from_data
6
- from documente_shared.domain.constants import la_paz_tz
7
- from documente_shared.domain.entities.processing_case_item import ProcessingCaseItem
8
- from documente_shared.domain.enums.common import ProcessingStatus
9
- from documente_shared.domain.enums.processing_case import ProcessingCaseCategory
10
-
11
-
12
- @dataclass
13
- class ProcessingCase(object):
14
- case_id: str
15
- status: ProcessingStatus
16
- category: Optional[ProcessingCaseCategory] = None
17
- enqueued_at: Optional[datetime] = None
18
- started_at: Optional[datetime] = None
19
- failed_at: Optional[datetime] = None
20
- feedback: Optional[list | dict] = None
21
- completed_at: Optional[datetime] = None
22
- metadata: Optional[dict] = None
23
- items: Optional[List[ProcessingCaseItem]] = None
24
-
25
- def __post_init__(self):
26
- self.items = self.items or []
27
-
28
- @property
29
- def is_procesable(self) -> bool:
30
- return self.items and len(self.items) > 0
31
-
32
- def pending(self, timezone: tzinfo = la_paz_tz):
33
- self.status = ProcessingStatus.PENDING
34
- self.started_at = None
35
-
36
- def enqueue(self, timezone: tzinfo = la_paz_tz):
37
- self.status = ProcessingStatus.ENQUEUED
38
- self.enqueued_at = datetime.now(tz=timezone)
39
-
40
- def processing(self, timezone: tzinfo = la_paz_tz):
41
- self.status = ProcessingStatus.PROCESSING
42
- self.started_at = datetime.now(tz=timezone)
43
-
44
- def failed(
45
- self,
46
- error_message: Optional[str] = None,
47
- timezone: tzinfo = la_paz_tz,
48
- ):
49
- self.status = ProcessingStatus.FAILED
50
- self.failed_at = datetime.now(tz=timezone)
51
-
52
- def completed(self, timezone: tzinfo = la_paz_tz):
53
- self.status = ProcessingStatus.COMPLETED
54
- self.completed_at = datetime.now(tz=timezone)
55
-
56
- def deleted(self):
57
- self.status = ProcessingStatus.DELETED
58
-
59
- def __eq__(self, other: 'ProcessingCase') -> bool:
60
- if not other:
61
- return False
62
-
63
- return (
64
- self.case_id == other.case_id
65
- and self.status == other.status
66
- and self.category == other.category
67
- and self.enqueued_at == other.enqueued_at
68
- and self.started_at == other.started_at
69
- and self.failed_at == other.failed_at
70
- and self.feedback == other.feedback
71
- and self.completed_at == other.completed_at
72
- and self.metadata == other.metadata
73
- )
74
-
75
- @property
76
- def to_dict(self) -> dict:
77
- return {
78
- 'case_id': self.case_id,
79
- 'status': str(self.status),
80
- 'category': (
81
- str(self.category)
82
- if self.category else None
83
- ),
84
- 'enqueued_at': self.enqueued_at.isoformat() if self.enqueued_at else None,
85
- 'started_at': self.started_at.isoformat() if self.started_at else None,
86
- 'failed_at': self.failed_at.isoformat() if self.failed_at else None,
87
- 'feedback': self.feedback,
88
- 'completed_at': self.completed_at.isoformat() if self.completed_at else None,
89
- 'metadata': self.metadata,
90
- 'items': [item.to_dict for item in self.items],
91
- }
92
-
93
- @property
94
- def to_persist_dict(self) -> dict:
95
- return self.to_dict
96
-
97
- def overload(
98
- self,
99
- new_instance: 'ProcessingCase',
100
- properties: List[str] = None,
101
- ):
102
- instance_properties = properties or [
103
- 'status',
104
- 'category',
105
- 'enqueued_at',
106
- 'started_at',
107
- 'failed_at',
108
- 'feedback',
109
- 'completed_at',
110
- 'metadata',
111
- 'items',
112
- ]
113
- for _property in instance_properties:
114
- property_value = getattr(new_instance, _property)
115
- if not hasattr(self, _property):
116
- continue
117
- setattr(self, _property, property_value)
118
- return self
119
-
120
- @classmethod
121
- def from_dict(cls, data: dict) -> 'ProcessingCase':
122
- return cls(
123
- case_id=data.get('case_id'),
124
- status=ProcessingStatus.from_value(data.get('status')),
125
- category=(
126
- ProcessingCaseCategory.from_value(data.get('category'))
127
- if data.get('category') else None
128
- ),
129
- enqueued_at=get_datetime_from_data(input_datetime=data.get('enqueued_at')),
130
- started_at=get_datetime_from_data(input_datetime=data.get('started_at')),
131
- failed_at=get_datetime_from_data(input_datetime=data.get('failed_at')),
132
- feedback=data.get('feedback'),
133
- metadata=data.get('metadata', {}),
134
- completed_at=get_datetime_from_data(input_datetime=data.get('completed_at')),
135
- items=[
136
- ProcessingCaseItem.from_dict(item_dict)
137
- for item_dict in data.get('items', [])
138
- ],
139
- )
1
+ from dataclasses import dataclass
2
+ from datetime import datetime, tzinfo
3
+ from typing import Optional, List
4
+
5
+ from documente_shared.application.time_utils import get_datetime_from_data
6
+ from documente_shared.domain.constants import la_paz_tz
7
+ from documente_shared.domain.entities.processing_case_item import ProcessingCaseItem
8
+ from documente_shared.domain.enums.common import ProcessingStatus
9
+ from documente_shared.domain.enums.processing_case import ProcessingCaseCategory
10
+
11
+
12
+ @dataclass
13
+ class ProcessingCase(object):
14
+ uuid: str
15
+ label: str
16
+ status: ProcessingStatus
17
+ category: Optional[ProcessingCaseCategory] = None
18
+ enqueued_at: Optional[datetime] = None
19
+ started_at: Optional[datetime] = None
20
+ failed_at: Optional[datetime] = None
21
+ feedback: Optional[list | dict] = None
22
+ completed_at: Optional[datetime] = None
23
+ metadata: Optional[dict] = None
24
+ items: Optional[List[ProcessingCaseItem]] = None
25
+
26
+ def __post_init__(self):
27
+ self.items = self.items or []
28
+
29
+ @property
30
+ def is_procesable(self) -> bool:
31
+ return self.items and len(self.items) > 0
32
+
33
+ def pending(self, timezone: tzinfo = la_paz_tz):
34
+ self.status = ProcessingStatus.PENDING
35
+ self.started_at = None
36
+
37
+ def enqueue(self, timezone: tzinfo = la_paz_tz):
38
+ self.status = ProcessingStatus.ENQUEUED
39
+ self.enqueued_at = datetime.now(tz=timezone)
40
+
41
+ def processing(self, timezone: tzinfo = la_paz_tz):
42
+ self.status = ProcessingStatus.PROCESSING
43
+ self.started_at = datetime.now(tz=timezone)
44
+
45
+ def failed(
46
+ self,
47
+ error_message: Optional[str] = None,
48
+ timezone: tzinfo = la_paz_tz,
49
+ ):
50
+ self.status = ProcessingStatus.FAILED
51
+ self.failed_at = datetime.now(tz=timezone)
52
+
53
+ def completed(self, timezone: tzinfo = la_paz_tz):
54
+ self.status = ProcessingStatus.COMPLETED
55
+ self.completed_at = datetime.now(tz=timezone)
56
+
57
+ def deleted(self):
58
+ self.status = ProcessingStatus.DELETED
59
+
60
+ def __eq__(self, other: 'ProcessingCase') -> bool:
61
+ if not other:
62
+ return False
63
+
64
+ return (
65
+ self.uuid == other.uuid
66
+ and self.label == other.label
67
+ and self.status == other.status
68
+ and self.category == other.category
69
+ and self.enqueued_at == other.enqueued_at
70
+ and self.started_at == other.started_at
71
+ and self.failed_at == other.failed_at
72
+ and self.feedback == other.feedback
73
+ and self.completed_at == other.completed_at
74
+ and self.metadata == other.metadata
75
+ )
76
+
77
+ @property
78
+ def to_dict(self) -> dict:
79
+ return {
80
+ 'uuid': self.uuid,
81
+ 'label': self.label,
82
+ 'status': str(self.status),
83
+ 'category': (
84
+ str(self.category)
85
+ if self.category else None
86
+ ),
87
+ 'enqueued_at': self.enqueued_at.isoformat() if self.enqueued_at else None,
88
+ 'started_at': self.started_at.isoformat() if self.started_at else None,
89
+ 'failed_at': self.failed_at.isoformat() if self.failed_at else None,
90
+ 'feedback': self.feedback,
91
+ 'completed_at': self.completed_at.isoformat() if self.completed_at else None,
92
+ 'metadata': self.metadata,
93
+ 'items': [item.to_dict for item in self.items],
94
+ }
95
+
96
+ @property
97
+ def to_persist_dict(self) -> dict:
98
+ return self.to_dict
99
+
100
+ def overload(
101
+ self,
102
+ new_instance: 'ProcessingCase',
103
+ properties: List[str] = None,
104
+ ):
105
+ instance_properties = properties or [
106
+ 'label',
107
+ 'status',
108
+ 'category',
109
+ 'enqueued_at',
110
+ 'started_at',
111
+ 'failed_at',
112
+ 'feedback',
113
+ 'completed_at',
114
+ 'metadata',
115
+ 'items',
116
+ ]
117
+ for _property in instance_properties:
118
+ property_value = getattr(new_instance, _property)
119
+ if not hasattr(self, _property):
120
+ continue
121
+ setattr(self, _property, property_value)
122
+ return self
123
+
124
+ @classmethod
125
+ def from_dict(cls, data: dict) -> 'ProcessingCase':
126
+ return cls(
127
+ uuid=data.get('uuid'),
128
+ label=data.get('label'),
129
+ status=ProcessingStatus.from_value(data.get('status')),
130
+ category=(
131
+ ProcessingCaseCategory.from_value(data.get('category'))
132
+ if data.get('category') else None
133
+ ),
134
+ enqueued_at=get_datetime_from_data(input_datetime=data.get('enqueued_at')),
135
+ started_at=get_datetime_from_data(input_datetime=data.get('started_at')),
136
+ failed_at=get_datetime_from_data(input_datetime=data.get('failed_at')),
137
+ feedback=data.get('feedback'),
138
+ metadata=data.get('metadata', {}),
139
+ completed_at=get_datetime_from_data(input_datetime=data.get('completed_at')),
140
+ items=[
141
+ ProcessingCaseItem.from_dict(item_dict)
142
+ for item_dict in data.get('items', [])
143
+ ],
144
+ )