documente_shared 0.1.51__py3-none-any.whl → 0.1.52__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of documente_shared might be problematic. Click here for more details.

@@ -1,65 +1,65 @@
1
- from dataclasses import dataclass
2
- from datetime import datetime
3
- from typing import Optional
4
-
5
-
6
- @dataclass
7
- class DocumentProcessingMetadata(object):
8
- publication_date: Optional[datetime] = None
9
- num_circular: Optional[str] = None
10
- asfi_identifier: Optional[str] = None
11
- contains_tables: Optional[bool] = None
12
- text_content: Optional[str] = None
13
- case_name: Optional[str] = None
14
- starting_office: Optional[str] = None
15
- output_json: Optional[dict] = None
16
- processing_time: Optional[float] = None
17
- llm_model: Optional[str] = None
18
- num_pages: Optional[float] = None
19
- num_tokens: Optional[float] = None
20
- citcular_type: Optional[str] = None
21
-
22
-
23
- @property
24
- def to_dict(self):
25
- return {
26
- 'publication_date': (
27
- self.publication_date.isoformat()
28
- if self.publication_date
29
- else None
30
- ),
31
- 'num_circular': self.num_circular,
32
- 'asfi_identifier': self.asfi_identifier,
33
- 'contains_tables': self.contains_tables,
34
- 'text_content': self.text_content,
35
- 'case_name': self.case_name,
36
- 'starting_office': self.starting_office,
37
- 'output_json': self.output_json,
38
- 'processing_time': self.processing_time,
39
- 'llm_model': self.llm_model,
40
- 'num_pages': self.num_pages,
41
- 'num_tokens': self.num_tokens,
42
- 'citcular_type': self.citcular_type
43
- }
44
-
45
- @classmethod
46
- def from_dict(cls, data: dict):
47
- return cls(
48
- publication_date=(
49
- datetime.fromisoformat(data.get('publication_date'))
50
- if data.get('publication_date')
51
- else None
52
- ),
53
- num_circular=data.get('num_circular'),
54
- asfi_identifier=data.get('asfi_identifier'),
55
- contains_tables=data.get('contains_tables'),
56
- text_content=data.get('text_content'),
57
- case_name=data.get('case_name'),
58
- starting_office=data.get('starting_office'),
59
- output_json=data.get('output_json'),
60
- processing_time=data.get('processing_time'),
61
- llm_model=data.get('llm_model'),
62
- num_pages=data.get('num_pages'),
63
- num_tokens=data.get('num_tokens'),
64
- citcular_type=data.get('citcular_type')
1
+ from dataclasses import dataclass
2
+ from datetime import datetime
3
+ from typing import Optional
4
+
5
+
6
+ @dataclass
7
+ class DocumentProcessingMetadata(object):
8
+ publication_date: Optional[datetime] = None
9
+ num_circular: Optional[str] = None
10
+ asfi_identifier: Optional[str] = None
11
+ contains_tables: Optional[bool] = None
12
+ text_content: Optional[str] = None
13
+ case_name: Optional[str] = None
14
+ starting_office: Optional[str] = None
15
+ output_json: Optional[dict] = None
16
+ processing_time: Optional[float] = None
17
+ llm_model: Optional[str] = None
18
+ num_pages: Optional[float] = None
19
+ num_tokens: Optional[float] = None
20
+ citcular_type: Optional[str] = None
21
+
22
+
23
+ @property
24
+ def to_dict(self):
25
+ return {
26
+ 'publication_date': (
27
+ self.publication_date.isoformat()
28
+ if self.publication_date
29
+ else None
30
+ ),
31
+ 'num_circular': self.num_circular,
32
+ 'asfi_identifier': self.asfi_identifier,
33
+ 'contains_tables': self.contains_tables,
34
+ 'text_content': self.text_content,
35
+ 'case_name': self.case_name,
36
+ 'starting_office': self.starting_office,
37
+ 'output_json': self.output_json,
38
+ 'processing_time': self.processing_time,
39
+ 'llm_model': self.llm_model,
40
+ 'num_pages': self.num_pages,
41
+ 'num_tokens': self.num_tokens,
42
+ 'citcular_type': self.citcular_type
43
+ }
44
+
45
+ @classmethod
46
+ def from_dict(cls, data: dict):
47
+ return cls(
48
+ publication_date=(
49
+ datetime.fromisoformat(data.get('publication_date'))
50
+ if data.get('publication_date')
51
+ else None
52
+ ),
53
+ num_circular=data.get('num_circular'),
54
+ asfi_identifier=data.get('asfi_identifier'),
55
+ contains_tables=data.get('contains_tables'),
56
+ text_content=data.get('text_content'),
57
+ case_name=data.get('case_name'),
58
+ starting_office=data.get('starting_office'),
59
+ output_json=data.get('output_json'),
60
+ processing_time=data.get('processing_time'),
61
+ llm_model=data.get('llm_model'),
62
+ num_pages=data.get('num_pages'),
63
+ num_tokens=data.get('num_tokens'),
64
+ citcular_type=data.get('citcular_type')
65
65
  )
@@ -1,36 +1,36 @@
1
- from documente_shared.domain.base_enum import BaseEnum
2
-
3
-
4
- class DocumentProcessingStatus(BaseEnum):
5
- PENDING = 'PENDING'
6
- ENQUEUED = 'ENQUEUED'
7
- PROCESSING = 'PROCESSING'
8
- COMPLETED = 'COMPLETED'
9
- FAILED = 'FAILED'
10
- DELETED = 'DELETED'
11
- CANCELLED = 'CANCELLED'
12
-
13
-
14
- class DocumentProcessingCategory(BaseEnum):
15
- CIRCULAR = 'CIRCULAR'
16
- DESGRAVAMEN = 'DESGRAVAMEN'
17
-
18
- @property
19
- def is_circular(self):
20
- return self == DocumentProcessingCategory.CIRCULAR
21
-
22
- @property
23
- def is_desgravamen(self):
24
- return self == DocumentProcessingCategory.DESGRAVAMEN
25
-
26
-
27
- class DocumentProcessingSubCategory(BaseEnum):
28
- # Circulares
29
- CC_COMBINADA = 'CC_COMBINADA'
30
- CC_NORMATIVA = 'CC_NORMATIVA'
31
- CC_INFORMATIVA = 'CC_INFORMATIVA'
32
- CC_RETENCION_SUSPENSION_REMISION = 'CC_RETENCION_SUSPENSION_REMISION'
33
-
34
- # Desgravamenes
35
- DS_CREDISEGURO = 'DS_CREDISEGURO'
36
-
1
+ from documente_shared.domain.base_enum import BaseEnum
2
+
3
+
4
+ class DocumentProcessingStatus(BaseEnum):
5
+ PENDING = 'PENDING'
6
+ ENQUEUED = 'ENQUEUED'
7
+ PROCESSING = 'PROCESSING'
8
+ COMPLETED = 'COMPLETED'
9
+ FAILED = 'FAILED'
10
+ DELETED = 'DELETED'
11
+ CANCELLED = 'CANCELLED'
12
+ IN_REVIEW = 'IN_REVIEW'
13
+
14
+ class DocumentProcessingCategory(BaseEnum):
15
+ CIRCULAR = 'CIRCULAR'
16
+ DESGRAVAMEN = 'DESGRAVAMEN'
17
+
18
+ @property
19
+ def is_circular(self):
20
+ return self == DocumentProcessingCategory.CIRCULAR
21
+
22
+ @property
23
+ def is_desgravamen(self):
24
+ return self == DocumentProcessingCategory.DESGRAVAMEN
25
+
26
+
27
+ class DocumentProcessingSubCategory(BaseEnum):
28
+ # Circulares
29
+ CC_COMBINADA = 'CC_COMBINADA'
30
+ CC_NORMATIVA = 'CC_NORMATIVA'
31
+ CC_INFORMATIVA = 'CC_INFORMATIVA'
32
+ CC_RETENCION_SUSPENSION_REMISION = 'CC_RETENCION_SUSPENSION_REMISION'
33
+
34
+ # Desgravamenes
35
+ DS_CREDISEGURO = 'DS_CREDISEGURO'
36
+
@@ -1,25 +1,25 @@
1
- from abc import ABC, abstractmethod
2
- from typing import Optional, List
3
-
4
- from documente_shared.domain.entities.document import DocumentProcessing
5
- from documente_shared.domain.enums import DocumentProcessingStatus
6
-
7
-
8
- class DocumentProcessingRepository(ABC):
9
-
10
- @abstractmethod
11
- def find(self, digest: str) ->Optional[DocumentProcessing]:
12
- raise NotImplementedError
13
-
14
- @abstractmethod
15
- def persist(self, instance: DocumentProcessing) -> DocumentProcessing:
16
- raise NotImplementedError
17
-
18
- @abstractmethod
19
- def remove(self, instance: DocumentProcessing):
20
- raise NotImplementedError
21
-
22
-
23
- @abstractmethod
24
- def filter(self, statuses: List[DocumentProcessingStatus]) -> List[DocumentProcessing]:
1
+ from abc import ABC, abstractmethod
2
+ from typing import Optional, List
3
+
4
+ from documente_shared.domain.entities.document import DocumentProcessing
5
+ from documente_shared.domain.enums import DocumentProcessingStatus
6
+
7
+
8
+ class DocumentProcessingRepository(ABC):
9
+
10
+ @abstractmethod
11
+ def find(self, digest: str) ->Optional[DocumentProcessing]:
12
+ raise NotImplementedError
13
+
14
+ @abstractmethod
15
+ def persist(self, instance: DocumentProcessing) -> DocumentProcessing:
16
+ raise NotImplementedError
17
+
18
+ @abstractmethod
19
+ def remove(self, instance: DocumentProcessing):
20
+ raise NotImplementedError
21
+
22
+
23
+ @abstractmethod
24
+ def filter(self, statuses: List[DocumentProcessingStatus]) -> List[DocumentProcessing]:
25
25
  raise NotImplementedError
File without changes
@@ -1,43 +1,43 @@
1
- from typing import Optional, List
2
-
3
- from boto3.dynamodb.conditions import Key
4
-
5
- from documente_shared.domain.entities.document import DocumentProcessing
6
- from documente_shared.domain.enums import DocumentProcessingStatus, DocumentProcessingCategory
7
- from documente_shared.domain.repositories import DocumentProcessingRepository
8
- from documente_shared.infrastructure.dynamo_table import DynamoDBTable
9
-
10
-
11
-
12
- class DynamoDocumentProcessingRepository(
13
- DynamoDBTable,
14
- DocumentProcessingRepository,
15
- ):
16
- def find(self, digest: str) -> Optional[DocumentProcessing]:
17
- item = self.get(key={'digest': digest})
18
- if item:
19
- return DocumentProcessing.from_dict(item)
20
- return None
21
-
22
- def persist(self, instance: DocumentProcessing) -> DocumentProcessing:
23
- self.put(instance.to_simple_dict)
24
- return instance
25
-
26
- def remove(self, instance: DocumentProcessing):
27
- self.delete(key={'digest': instance.digest})
28
-
29
- def filter(self, statuses: List[DocumentProcessingStatus]) -> List[DocumentProcessing]:
30
- items = []
31
-
32
- for status in statuses:
33
- response = self._table.query(
34
- IndexName='status',
35
- KeyConditionExpression=Key('status').eq(status.value),
36
- )
37
- status_items = response.get('Items', [])
38
- items.extend(status_items)
39
-
40
- return [
41
- DocumentProcessing.from_dict(item)
42
- for item in items
43
- ]
1
+ from typing import Optional, List
2
+
3
+ from boto3.dynamodb.conditions import Key
4
+
5
+ from documente_shared.domain.entities.document import DocumentProcessing
6
+ from documente_shared.domain.enums import DocumentProcessingStatus, DocumentProcessingCategory
7
+ from documente_shared.domain.repositories import DocumentProcessingRepository
8
+ from documente_shared.infrastructure.dynamo_table import DynamoDBTable
9
+
10
+
11
+
12
+ class DynamoDocumentProcessingRepository(
13
+ DynamoDBTable,
14
+ DocumentProcessingRepository,
15
+ ):
16
+ def find(self, digest: str) -> Optional[DocumentProcessing]:
17
+ item = self.get(key={'digest': digest})
18
+ if item:
19
+ return DocumentProcessing.from_dict(item)
20
+ return None
21
+
22
+ def persist(self, instance: DocumentProcessing) -> DocumentProcessing:
23
+ self.put(instance.to_simple_dict)
24
+ return instance
25
+
26
+ def remove(self, instance: DocumentProcessing):
27
+ self.delete(key={'digest': instance.digest})
28
+
29
+ def filter(self, statuses: List[DocumentProcessingStatus]) -> List[DocumentProcessing]:
30
+ items = []
31
+
32
+ for status in statuses:
33
+ response = self._table.query(
34
+ IndexName='status',
35
+ KeyConditionExpression=Key('status').eq(status.value),
36
+ )
37
+ status_items = response.get('Items', [])
38
+ items.extend(status_items)
39
+
40
+ return [
41
+ DocumentProcessing.from_dict(item)
42
+ for item in items
43
+ ]
@@ -1,75 +1,75 @@
1
- from dataclasses import dataclass
2
-
3
- import boto3
4
- from boto3.dynamodb.conditions import Key
5
-
6
-
7
- RETURN_VALUES = 'UPDATED_NEW'
8
-
9
- @dataclass
10
- class DynamoDBTable(object):
11
- table_name: str
12
-
13
- def __post_init__(self):
14
- self._table = boto3.resource('dynamodb').Table(self.table_name)
15
-
16
- def get(self, key: dict):
17
- return self._table.get_item(Key=key).get('Item')
18
-
19
- def get_all(self):
20
- return self._table.scan().get('Items')
21
-
22
- def upsert(self, key, attributes):
23
- return self.put({**key, **attributes})
24
-
25
-
26
- def filter_by(self, attribute: str, target_value: str):
27
- return self._table.query(
28
- FilterExpression=Key(attribute).eq(target_value),
29
- ).get('Items')
30
-
31
- def put(self, attributes: dict, condition: dict = None):
32
- extra_args = {}
33
- if condition:
34
- extra_args['ConditionExpression'] = condition
35
- return self._table.put_item(Item=attributes, **extra_args)
36
-
37
-
38
- def update(self, key: str, attributes: dict):
39
- return self._table.update_item(
40
- Key=key,
41
- UpdateExpression=self._update_expression(attributes),
42
- ExpressionAttributeNames=self._expression_attribute_names(attributes),
43
- ExpressionAttributeValues=self._expression_attribute_values(attributes),
44
- ReturnValues=RETURN_VALUES,
45
- )
46
-
47
- def delete(self, key: dict):
48
- return self._table.delete_item(Key=key)
49
-
50
- def count(self) -> int:
51
- return self._table.item_count
52
-
53
-
54
- @classmethod
55
- def _update_expression(cls, attributes):
56
- return 'SET {param}'.format(
57
- param=','.join(
58
- '#{key}=:{key}'.format(
59
- key=key,
60
- )
61
- for key in attributes
62
- ),
63
- )
64
-
65
- @classmethod
66
- def _expression_attribute_names(cls, attributes):
67
- return {
68
- '#{key}'.format(key=key): key for key in attributes
69
- }
70
-
71
- @classmethod
72
- def _expression_attribute_values(cls, attributes):
73
- return {
74
- ':{key}'.format(key=key): attr for key, attr in attributes.items()
75
- }
1
+ from dataclasses import dataclass
2
+
3
+ import boto3
4
+ from boto3.dynamodb.conditions import Key
5
+
6
+
7
+ RETURN_VALUES = 'UPDATED_NEW'
8
+
9
+ @dataclass
10
+ class DynamoDBTable(object):
11
+ table_name: str
12
+
13
+ def __post_init__(self):
14
+ self._table = boto3.resource('dynamodb').Table(self.table_name)
15
+
16
+ def get(self, key: dict):
17
+ return self._table.get_item(Key=key).get('Item')
18
+
19
+ def get_all(self):
20
+ return self._table.scan().get('Items')
21
+
22
+ def upsert(self, key, attributes):
23
+ return self.put({**key, **attributes})
24
+
25
+
26
+ def filter_by(self, attribute: str, target_value: str):
27
+ return self._table.query(
28
+ FilterExpression=Key(attribute).eq(target_value),
29
+ ).get('Items')
30
+
31
+ def put(self, attributes: dict, condition: dict = None):
32
+ extra_args = {}
33
+ if condition:
34
+ extra_args['ConditionExpression'] = condition
35
+ return self._table.put_item(Item=attributes, **extra_args)
36
+
37
+
38
+ def update(self, key: str, attributes: dict):
39
+ return self._table.update_item(
40
+ Key=key,
41
+ UpdateExpression=self._update_expression(attributes),
42
+ ExpressionAttributeNames=self._expression_attribute_names(attributes),
43
+ ExpressionAttributeValues=self._expression_attribute_values(attributes),
44
+ ReturnValues=RETURN_VALUES,
45
+ )
46
+
47
+ def delete(self, key: dict):
48
+ return self._table.delete_item(Key=key)
49
+
50
+ def count(self) -> int:
51
+ return self._table.item_count
52
+
53
+
54
+ @classmethod
55
+ def _update_expression(cls, attributes):
56
+ return 'SET {param}'.format(
57
+ param=','.join(
58
+ '#{key}=:{key}'.format(
59
+ key=key,
60
+ )
61
+ for key in attributes
62
+ ),
63
+ )
64
+
65
+ @classmethod
66
+ def _expression_attribute_names(cls, attributes):
67
+ return {
68
+ '#{key}'.format(key=key): key for key in attributes
69
+ }
70
+
71
+ @classmethod
72
+ def _expression_attribute_values(cls, attributes):
73
+ return {
74
+ ':{key}'.format(key=key): attr for key, attr in attributes.items()
75
+ }
@@ -1,57 +1,57 @@
1
- import boto3
2
-
3
- from dataclasses import dataclass
4
- from typing import Optional
5
-
6
- from documente_shared.domain.entities.document import remove_slash_from_path
7
-
8
-
9
- def remove_none_values(data: dict) -> dict: # noqa: WPS110
10
- return {key: value for key, value in data.items() if value is not None} # noqa: WPS110
11
-
12
-
13
- @dataclass
14
- class S3Bucket(object):
15
- bucket_name: str
16
-
17
- def __post_init__(self):
18
- self._resource = boto3.resource('s3')
19
-
20
- def get(self, file_key: str) -> Optional[dict]:
21
- try:
22
- return self._resource.Object(self.bucket_name, file_key).get()
23
- except self._resource.meta.client.exceptions.NoSuchKey:
24
- return None
25
-
26
- def get_bytes(self, file_key: str) -> Optional[bytes]:
27
- cleaned_file_key = remove_slash_from_path(file_key)
28
- file_context = self.get(cleaned_file_key)
29
- if not file_context:
30
- return None
31
- return (
32
- file_context['Body'].read()
33
- if 'Body' in file_context
34
- else None
35
- )
36
-
37
- def upload(self, file_key: str, file_content, content_type: Optional[str] = None):
38
- cleaned_file_key = remove_slash_from_path(file_key)
39
- optional_params = {'ContentType': content_type}
40
- return self._resource.Object(self.bucket_name, cleaned_file_key).put(
41
- Body=file_content,
42
- **remove_none_values(optional_params),
43
- )
44
-
45
- def delete(self, file_key: str):
46
- cleaned_file_key = remove_slash_from_path(file_key)
47
- return self._resource.Object(self.bucket_name, cleaned_file_key).delete()
48
-
49
- def get_url(self, file_key: str):
50
- cleaned_file_key = remove_slash_from_path(file_key)
51
- return 'https://{bucket_url}.s3.amazonaws.com/{file_key}'.format(
52
- bucket_url=self.bucket_name,
53
- file_key=cleaned_file_key,
54
- )
55
-
56
- def read(self, file_key: str) -> bytes:
57
- return self.get(file_key)['Body'].read()
1
+ import boto3
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Optional
5
+
6
+ from documente_shared.domain.entities.document import remove_slash_from_path
7
+
8
+
9
+ def remove_none_values(data: dict) -> dict: # noqa: WPS110
10
+ return {key: value for key, value in data.items() if value is not None} # noqa: WPS110
11
+
12
+
13
+ @dataclass
14
+ class S3Bucket(object):
15
+ bucket_name: str
16
+
17
+ def __post_init__(self):
18
+ self._resource = boto3.resource('s3')
19
+
20
+ def get(self, file_key: str) -> Optional[dict]:
21
+ try:
22
+ return self._resource.Object(self.bucket_name, file_key).get()
23
+ except self._resource.meta.client.exceptions.NoSuchKey:
24
+ return None
25
+
26
+ def get_bytes(self, file_key: str) -> Optional[bytes]:
27
+ cleaned_file_key = remove_slash_from_path(file_key)
28
+ file_context = self.get(cleaned_file_key)
29
+ if not file_context:
30
+ return None
31
+ return (
32
+ file_context['Body'].read()
33
+ if 'Body' in file_context
34
+ else None
35
+ )
36
+
37
+ def upload(self, file_key: str, file_content, content_type: Optional[str] = None):
38
+ cleaned_file_key = remove_slash_from_path(file_key)
39
+ optional_params = {'ContentType': content_type}
40
+ return self._resource.Object(self.bucket_name, cleaned_file_key).put(
41
+ Body=file_content,
42
+ **remove_none_values(optional_params),
43
+ )
44
+
45
+ def delete(self, file_key: str):
46
+ cleaned_file_key = remove_slash_from_path(file_key)
47
+ return self._resource.Object(self.bucket_name, cleaned_file_key).delete()
48
+
49
+ def get_url(self, file_key: str):
50
+ cleaned_file_key = remove_slash_from_path(file_key)
51
+ return 'https://{bucket_url}.s3.amazonaws.com/{file_key}'.format(
52
+ bucket_url=self.bucket_name,
53
+ file_key=cleaned_file_key,
54
+ )
55
+
56
+ def read(self, file_key: str) -> bytes:
57
+ return self.get(file_key)['Body'].read()