documente_shared 0.1.72b0__py3-none-any.whl → 0.1.73__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of documente_shared might be problematic. Click here for more details.

Files changed (41) hide show
  1. documente_shared/__init__.py +0 -0
  2. documente_shared/application/__init__.py +0 -0
  3. documente_shared/application/digest.py +7 -7
  4. documente_shared/application/exceptions.py +23 -23
  5. documente_shared/application/files.py +22 -22
  6. documente_shared/application/time_utils.py +13 -13
  7. documente_shared/application/timezone.py +7 -7
  8. documente_shared/domain/__init__.py +0 -0
  9. documente_shared/domain/base_enum.py +53 -53
  10. documente_shared/domain/constants.py +8 -3
  11. documente_shared/domain/entities/__init__.py +0 -0
  12. documente_shared/domain/entities/document.py +348 -348
  13. documente_shared/domain/entities/document_metadata.py +63 -63
  14. documente_shared/domain/entities/in_memory_result.py +51 -51
  15. documente_shared/domain/entities/processing_case.py +145 -144
  16. documente_shared/domain/entities/processing_case_item.py +216 -216
  17. documente_shared/domain/entities/processing_event.py +49 -49
  18. documente_shared/domain/enums/__init__.py +0 -0
  19. documente_shared/domain/enums/common.py +95 -95
  20. documente_shared/domain/enums/document.py +71 -71
  21. documente_shared/domain/enums/processing_case.py +55 -54
  22. documente_shared/domain/repositories/__init__.py +0 -0
  23. documente_shared/domain/repositories/document.py +24 -24
  24. documente_shared/domain/repositories/processing_case.py +24 -24
  25. documente_shared/domain/repositories/processing_case_item.py +29 -29
  26. documente_shared/infrastructure/__init__.py +0 -0
  27. documente_shared/infrastructure/documente_client.py +20 -20
  28. documente_shared/infrastructure/dynamo_table.py +75 -75
  29. documente_shared/infrastructure/repositories/__init__.py +0 -0
  30. documente_shared/infrastructure/repositories/dynamo_document.py +43 -43
  31. documente_shared/infrastructure/repositories/dynamo_processing_case.py +43 -43
  32. documente_shared/infrastructure/repositories/dynamo_processing_case_item.py +53 -53
  33. documente_shared/infrastructure/repositories/http_processing_case.py +40 -40
  34. documente_shared/infrastructure/repositories/http_processing_case_item.py +52 -52
  35. documente_shared/infrastructure/s3_bucket.py +57 -57
  36. documente_shared/infrastructure/sqs_queue.py +47 -47
  37. {documente_shared-0.1.72b0.dist-info → documente_shared-0.1.73.dist-info}/METADATA +1 -1
  38. documente_shared-0.1.73.dist-info/RECORD +39 -0
  39. documente_shared/infrastructure/repositories/http_document_processing.py +0 -41
  40. documente_shared-0.1.72b0.dist-info/RECORD +0 -40
  41. {documente_shared-0.1.72b0.dist-info → documente_shared-0.1.73.dist-info}/WHEEL +0 -0
@@ -1,64 +1,64 @@
1
- from dataclasses import dataclass
2
- from datetime import datetime
3
- from typing import Optional
4
-
5
-
6
- @dataclass
7
- class DocumentProcessingMetadata(object):
8
- publication_date: Optional[datetime] = None
9
- num_circular: Optional[str] = None
10
- asfi_identifier: Optional[str] = None
11
- contains_tables: Optional[bool] = None
12
- text_content: Optional[str] = None
13
- case_name: Optional[str] = None
14
- starting_office: Optional[str] = None
15
- output_json: Optional[dict] = None
16
- processing_time: Optional[float] = None
17
- llm_model: Optional[str] = None
18
- num_pages: Optional[float] = None
19
- num_tokens: Optional[float] = None
20
- citcular_type: Optional[str] = None
21
-
22
- @property
23
- def to_dict(self):
24
- return {
25
- 'publication_date': (
26
- self.publication_date.isoformat()
27
- if self.publication_date
28
- else None
29
- ),
30
- 'num_circular': self.num_circular,
31
- 'asfi_identifier': self.asfi_identifier,
32
- 'contains_tables': self.contains_tables,
33
- 'text_content': self.text_content,
34
- 'case_name': self.case_name,
35
- 'starting_office': self.starting_office,
36
- 'output_json': self.output_json,
37
- 'processing_time': self.processing_time,
38
- 'llm_model': self.llm_model,
39
- 'num_pages': self.num_pages,
40
- 'num_tokens': self.num_tokens,
41
- 'citcular_type': self.citcular_type
42
- }
43
-
44
- @classmethod
45
- def from_dict(cls, data: dict):
46
- return cls(
47
- publication_date=(
48
- datetime.fromisoformat(data.get('publication_date'))
49
- if data.get('publication_date')
50
- else None
51
- ),
52
- num_circular=data.get('num_circular'),
53
- asfi_identifier=data.get('asfi_identifier'),
54
- contains_tables=data.get('contains_tables'),
55
- text_content=data.get('text_content'),
56
- case_name=data.get('case_name'),
57
- starting_office=data.get('starting_office'),
58
- output_json=data.get('output_json'),
59
- processing_time=data.get('processing_time'),
60
- llm_model=data.get('llm_model'),
61
- num_pages=data.get('num_pages'),
62
- num_tokens=data.get('num_tokens'),
63
- citcular_type=data.get('citcular_type')
1
+ from dataclasses import dataclass
2
+ from datetime import datetime
3
+ from typing import Optional
4
+
5
+
6
+ @dataclass
7
+ class DocumentProcessingMetadata(object):
8
+ publication_date: Optional[datetime] = None
9
+ num_circular: Optional[str] = None
10
+ asfi_identifier: Optional[str] = None
11
+ contains_tables: Optional[bool] = None
12
+ text_content: Optional[str] = None
13
+ case_name: Optional[str] = None
14
+ starting_office: Optional[str] = None
15
+ output_json: Optional[dict] = None
16
+ processing_time: Optional[float] = None
17
+ llm_model: Optional[str] = None
18
+ num_pages: Optional[float] = None
19
+ num_tokens: Optional[float] = None
20
+ citcular_type: Optional[str] = None
21
+
22
+ @property
23
+ def to_dict(self):
24
+ return {
25
+ 'publication_date': (
26
+ self.publication_date.isoformat()
27
+ if self.publication_date
28
+ else None
29
+ ),
30
+ 'num_circular': self.num_circular,
31
+ 'asfi_identifier': self.asfi_identifier,
32
+ 'contains_tables': self.contains_tables,
33
+ 'text_content': self.text_content,
34
+ 'case_name': self.case_name,
35
+ 'starting_office': self.starting_office,
36
+ 'output_json': self.output_json,
37
+ 'processing_time': self.processing_time,
38
+ 'llm_model': self.llm_model,
39
+ 'num_pages': self.num_pages,
40
+ 'num_tokens': self.num_tokens,
41
+ 'citcular_type': self.citcular_type
42
+ }
43
+
44
+ @classmethod
45
+ def from_dict(cls, data: dict):
46
+ return cls(
47
+ publication_date=(
48
+ datetime.fromisoformat(data.get('publication_date'))
49
+ if data.get('publication_date')
50
+ else None
51
+ ),
52
+ num_circular=data.get('num_circular'),
53
+ asfi_identifier=data.get('asfi_identifier'),
54
+ contains_tables=data.get('contains_tables'),
55
+ text_content=data.get('text_content'),
56
+ case_name=data.get('case_name'),
57
+ starting_office=data.get('starting_office'),
58
+ output_json=data.get('output_json'),
59
+ processing_time=data.get('processing_time'),
60
+ llm_model=data.get('llm_model'),
61
+ num_pages=data.get('num_pages'),
62
+ num_tokens=data.get('num_tokens'),
63
+ citcular_type=data.get('citcular_type')
64
64
  )
@@ -1,51 +1,51 @@
1
- from dataclasses import dataclass
2
- from typing import Optional
3
-
4
- from documente_shared.application.files import (
5
- remove_slash_from_path,
6
- get_filename_from_path,
7
- )
8
-
9
-
10
- @dataclass
11
- class InMemoryDocument(object):
12
- file_path: Optional[str] = None
13
- file_bytes: Optional[bytes] = None
14
-
15
- @property
16
- def is_valid(self) -> bool:
17
- return bool(self.file_path) and self.file_bytes
18
-
19
- @property
20
- def has_content(self) -> bool:
21
- return bool(self.file_bytes)
22
-
23
- @property
24
- def file_key(self) -> Optional[str]:
25
- if not self.file_path:
26
- return None
27
- return remove_slash_from_path(self.file_path)
28
-
29
- @property
30
- def file_name(self) -> Optional[str]:
31
- if not self.file_path:
32
- return None
33
- return get_filename_from_path(self.file_path)
34
-
35
- @property
36
- def is_procesable(self) -> bool:
37
- return self.is_valid and self.has_content
38
-
39
- @property
40
- def to_dict(self) -> dict:
41
- return {
42
- 'file_path': self.file_path,
43
- 'file_bytes': self.file_bytes,
44
- }
45
-
46
- @classmethod
47
- def from_dict(cls, data: dict):
48
- return cls(
49
- file_path=data.get('file_path'),
50
- file_bytes=data.get('file_bytes'),
51
- )
1
+ from dataclasses import dataclass
2
+ from typing import Optional
3
+
4
+ from documente_shared.application.files import (
5
+ remove_slash_from_path,
6
+ get_filename_from_path,
7
+ )
8
+
9
+
10
+ @dataclass
11
+ class InMemoryDocument(object):
12
+ file_path: Optional[str] = None
13
+ file_bytes: Optional[bytes] = None
14
+
15
+ @property
16
+ def is_valid(self) -> bool:
17
+ return bool(self.file_path) and self.file_bytes
18
+
19
+ @property
20
+ def has_content(self) -> bool:
21
+ return bool(self.file_bytes)
22
+
23
+ @property
24
+ def file_key(self) -> Optional[str]:
25
+ if not self.file_path:
26
+ return None
27
+ return remove_slash_from_path(self.file_path)
28
+
29
+ @property
30
+ def file_name(self) -> Optional[str]:
31
+ if not self.file_path:
32
+ return None
33
+ return get_filename_from_path(self.file_path)
34
+
35
+ @property
36
+ def is_procesable(self) -> bool:
37
+ return self.is_valid and self.has_content
38
+
39
+ @property
40
+ def to_dict(self) -> dict:
41
+ return {
42
+ 'file_path': self.file_path,
43
+ 'file_bytes': self.file_bytes,
44
+ }
45
+
46
+ @classmethod
47
+ def from_dict(cls, data: dict):
48
+ return cls(
49
+ file_path=data.get('file_path'),
50
+ file_bytes=data.get('file_bytes'),
51
+ )
@@ -1,144 +1,145 @@
1
- from dataclasses import dataclass
2
- from datetime import datetime, tzinfo
3
- from typing import Optional, List
4
-
5
- from documente_shared.application.time_utils import get_datetime_from_data
6
- from documente_shared.domain.constants import la_paz_tz
7
- from documente_shared.domain.entities.processing_case_item import ProcessingCaseItem
8
- from documente_shared.domain.enums.common import ProcessingStatus
9
- from documente_shared.domain.enums.processing_case import ProcessingCaseCategory
10
-
11
-
12
- @dataclass
13
- class ProcessingCase(object):
14
- uuid: str
15
- label: str
16
- status: ProcessingStatus
17
- category: Optional[ProcessingCaseCategory] = None
18
- enqueued_at: Optional[datetime] = None
19
- started_at: Optional[datetime] = None
20
- failed_at: Optional[datetime] = None
21
- feedback: Optional[list | dict] = None
22
- completed_at: Optional[datetime] = None
23
- metadata: Optional[dict] = None
24
- items: Optional[List[ProcessingCaseItem]] = None
25
-
26
- def __post_init__(self):
27
- self.items = self.items or []
28
-
29
- @property
30
- def is_procesable(self) -> bool:
31
- return self.items and len(self.items) > 0
32
-
33
- def pending(self, timezone: tzinfo = la_paz_tz):
34
- self.status = ProcessingStatus.PENDING
35
- self.started_at = None
36
-
37
- def enqueue(self, timezone: tzinfo = la_paz_tz):
38
- self.status = ProcessingStatus.ENQUEUED
39
- self.enqueued_at = datetime.now(tz=timezone)
40
-
41
- def processing(self, timezone: tzinfo = la_paz_tz):
42
- self.status = ProcessingStatus.PROCESSING
43
- self.started_at = datetime.now(tz=timezone)
44
-
45
- def failed(
46
- self,
47
- error_message: Optional[str] = None,
48
- timezone: tzinfo = la_paz_tz,
49
- ):
50
- self.status = ProcessingStatus.FAILED
51
- self.failed_at = datetime.now(tz=timezone)
52
-
53
- def completed(self, timezone: tzinfo = la_paz_tz):
54
- self.status = ProcessingStatus.COMPLETED
55
- self.completed_at = datetime.now(tz=timezone)
56
-
57
- def deleted(self):
58
- self.status = ProcessingStatus.DELETED
59
-
60
- def __eq__(self, other: 'ProcessingCase') -> bool:
61
- if not other:
62
- return False
63
-
64
- return (
65
- self.uuid == other.uuid
66
- and self.label == other.label
67
- and self.status == other.status
68
- and self.category == other.category
69
- and self.enqueued_at == other.enqueued_at
70
- and self.started_at == other.started_at
71
- and self.failed_at == other.failed_at
72
- and self.feedback == other.feedback
73
- and self.completed_at == other.completed_at
74
- and self.metadata == other.metadata
75
- )
76
-
77
- @property
78
- def to_dict(self) -> dict:
79
- return {
80
- 'uuid': self.uuid,
81
- 'label': self.label,
82
- 'status': str(self.status),
83
- 'category': (
84
- str(self.category)
85
- if self.category else None
86
- ),
87
- 'enqueued_at': self.enqueued_at.isoformat() if self.enqueued_at else None,
88
- 'started_at': self.started_at.isoformat() if self.started_at else None,
89
- 'failed_at': self.failed_at.isoformat() if self.failed_at else None,
90
- 'feedback': self.feedback,
91
- 'completed_at': self.completed_at.isoformat() if self.completed_at else None,
92
- 'metadata': self.metadata,
93
- 'items': [item.to_dict for item in self.items],
94
- }
95
-
96
- @property
97
- def to_persist_dict(self) -> dict:
98
- return self.to_dict
99
-
100
- def overload(
101
- self,
102
- new_instance: 'ProcessingCase',
103
- properties: List[str] = None,
104
- ):
105
- instance_properties = properties or [
106
- 'label',
107
- 'status',
108
- 'category',
109
- 'enqueued_at',
110
- 'started_at',
111
- 'failed_at',
112
- 'feedback',
113
- 'completed_at',
114
- 'metadata',
115
- 'items',
116
- ]
117
- for _property in instance_properties:
118
- property_value = getattr(new_instance, _property)
119
- if not hasattr(self, _property):
120
- continue
121
- setattr(self, _property, property_value)
122
- return self
123
-
124
- @classmethod
125
- def from_dict(cls, data: dict) -> 'ProcessingCase':
126
- return cls(
127
- uuid=data.get('uuid'),
128
- label=data.get('label'),
129
- status=ProcessingStatus.from_value(data.get('status')),
130
- category=(
131
- ProcessingCaseCategory.from_value(data.get('category'))
132
- if data.get('category') else None
133
- ),
134
- enqueued_at=get_datetime_from_data(input_datetime=data.get('enqueued_at')),
135
- started_at=get_datetime_from_data(input_datetime=data.get('started_at')),
136
- failed_at=get_datetime_from_data(input_datetime=data.get('failed_at')),
137
- feedback=data.get('feedback'),
138
- metadata=data.get('metadata', {}),
139
- completed_at=get_datetime_from_data(input_datetime=data.get('completed_at')),
140
- items=[
141
- ProcessingCaseItem.from_dict(item_dict)
142
- for item_dict in data.get('items', [])
143
- ],
144
- )
1
+ from dataclasses import dataclass
2
+ from datetime import datetime, tzinfo
3
+ from typing import Optional, List
4
+
5
+ from documente_shared.application.time_utils import get_datetime_from_data
6
+ from documente_shared.domain.constants import la_paz_tz
7
+ from documente_shared.domain.entities.processing_case_item import ProcessingCaseItem
8
+ from documente_shared.domain.enums.common import ProcessingStatus
9
+ from documente_shared.domain.enums.processing_case import ProcessingCaseType
10
+
11
+
12
+ @dataclass
13
+ class ProcessingCase(object):
14
+ uuid: str
15
+ name: str
16
+ tenant_slug: str
17
+ status: ProcessingStatus
18
+ case_type: Optional[ProcessingCaseType] = None
19
+ enqueued_at: Optional[datetime] = None
20
+ started_at: Optional[datetime] = None
21
+ failed_at: Optional[datetime] = None
22
+ feedback: Optional[list | dict] = None
23
+ completed_at: Optional[datetime] = None
24
+ metadata: Optional[dict] = None
25
+ items: Optional[List[ProcessingCaseItem]] = None
26
+
27
+ def __post_init__(self):
28
+ self.items = self.items or []
29
+
30
+ @property
31
+ def is_procesable(self) -> bool:
32
+ return self.items and len(self.items) > 0
33
+
34
+ def pending(self, timezone: tzinfo = la_paz_tz):
35
+ self.status = ProcessingStatus.PENDING
36
+ self.started_at = None
37
+
38
+ def enqueue(self, timezone: tzinfo = la_paz_tz):
39
+ self.status = ProcessingStatus.ENQUEUED
40
+ self.enqueued_at = datetime.now(tz=timezone)
41
+
42
+ def processing(self, timezone: tzinfo = la_paz_tz):
43
+ self.status = ProcessingStatus.PROCESSING
44
+ self.started_at = datetime.now(tz=timezone)
45
+
46
+ def failed(
47
+ self,
48
+ error_message: Optional[str] = None,
49
+ timezone: tzinfo = la_paz_tz,
50
+ ):
51
+ self.status = ProcessingStatus.FAILED
52
+ self.failed_at = datetime.now(tz=timezone)
53
+
54
+ def completed(self, timezone: tzinfo = la_paz_tz):
55
+ self.status = ProcessingStatus.COMPLETED
56
+ self.completed_at = datetime.now(tz=timezone)
57
+
58
+ def deleted(self):
59
+ self.status = ProcessingStatus.DELETED
60
+
61
+ def __eq__(self, other: 'ProcessingCase') -> bool:
62
+ if not other:
63
+ return False
64
+
65
+ return (
66
+ self.uuid == other.uuid
67
+ and self.name == other.name
68
+ and self.status == other.status
69
+ and self.case_type == other.case_type
70
+ and self.enqueued_at == other.enqueued_at
71
+ and self.started_at == other.started_at
72
+ and self.failed_at == other.failed_at
73
+ and self.feedback == other.feedback
74
+ and self.completed_at == other.completed_at
75
+ and self.metadata == other.metadata
76
+ )
77
+
78
+ @property
79
+ def to_dict(self) -> dict:
80
+ return {
81
+ 'uuid': self.uuid,
82
+ 'label': self.name,
83
+ 'status': str(self.status),
84
+ 'category': (
85
+ str(self.case_type)
86
+ if self.case_type else None
87
+ ),
88
+ 'enqueued_at': self.enqueued_at.isoformat() if self.enqueued_at else None,
89
+ 'started_at': self.started_at.isoformat() if self.started_at else None,
90
+ 'failed_at': self.failed_at.isoformat() if self.failed_at else None,
91
+ 'feedback': self.feedback,
92
+ 'completed_at': self.completed_at.isoformat() if self.completed_at else None,
93
+ 'metadata': self.metadata,
94
+ 'items': [item.to_dict for item in self.items],
95
+ }
96
+
97
+ @property
98
+ def to_persist_dict(self) -> dict:
99
+ return self.to_dict
100
+
101
+ def overload(
102
+ self,
103
+ new_instance: 'ProcessingCase',
104
+ properties: List[str] = None,
105
+ ):
106
+ instance_properties = properties or [
107
+ 'label',
108
+ 'status',
109
+ 'category',
110
+ 'enqueued_at',
111
+ 'started_at',
112
+ 'failed_at',
113
+ 'feedback',
114
+ 'completed_at',
115
+ 'metadata',
116
+ 'items',
117
+ ]
118
+ for _property in instance_properties:
119
+ property_value = getattr(new_instance, _property)
120
+ if not hasattr(self, _property):
121
+ continue
122
+ setattr(self, _property, property_value)
123
+ return self
124
+
125
+ @classmethod
126
+ def from_dict(cls, data: dict) -> 'ProcessingCase':
127
+ return cls(
128
+ uuid=data.get('uuid'),
129
+ name=data.get('label'),
130
+ status=ProcessingStatus.from_value(data.get('status')),
131
+ case_type=(
132
+ ProcessingCaseType.from_value(data.get('category'))
133
+ if data.get('category') else None
134
+ ),
135
+ enqueued_at=get_datetime_from_data(input_datetime=data.get('enqueued_at')),
136
+ started_at=get_datetime_from_data(input_datetime=data.get('started_at')),
137
+ failed_at=get_datetime_from_data(input_datetime=data.get('failed_at')),
138
+ feedback=data.get('feedback'),
139
+ metadata=data.get('metadata', {}),
140
+ completed_at=get_datetime_from_data(input_datetime=data.get('completed_at')),
141
+ items=[
142
+ ProcessingCaseItem.from_dict(item_dict)
143
+ for item_dict in data.get('items', [])
144
+ ],
145
+ )