documente_shared 0.1.39__py3-none-any.whl → 0.1.40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of documente_shared might be problematic. Click here for more details.

@@ -1,65 +1,65 @@
1
- from dataclasses import dataclass
2
- from datetime import datetime
3
- from typing import Optional
4
-
5
-
6
- @dataclass
7
- class DocumentProcessMetadata(object):
8
- publication_date: Optional[datetime] = None
9
- num_circular: Optional[str] = None
10
- asfi_identifier: Optional[str] = None
11
- contains_tables: Optional[bool] = None
12
- text_content: Optional[str] = None
13
- case_name: Optional[str] = None
14
- starting_office: Optional[str] = None
15
- output_json: Optional[dict] = None
16
- processing_time: Optional[float] = None
17
- llm_model: Optional[str] = None
18
- num_pages: Optional[float] = None
19
- num_tokens: Optional[float] = None
20
- citcular_type: Optional[str] = None
21
-
22
-
23
- @property
24
- def to_dict(self):
25
- return {
26
- 'publication_date': (
27
- self.publication_date.isoformat()
28
- if self.publication_date
29
- else None
30
- ),
31
- 'num_circular': self.num_circular,
32
- 'asfi_identifier': self.asfi_identifier,
33
- 'contains_tables': self.contains_tables,
34
- 'text_content': self.text_content,
35
- 'case_name': self.case_name,
36
- 'starting_office': self.starting_office,
37
- 'output_json': self.output_json,
38
- 'processing_time': self.processing_time,
39
- 'llm_model': self.llm_model,
40
- 'num_pages': self.num_pages,
41
- 'num_tokens': self.num_tokens,
42
- 'citcular_type': self.citcular_type
43
- }
44
-
45
- @classmethod
46
- def from_dict(cls, data: dict):
47
- return cls(
48
- publication_date=(
49
- datetime.fromisoformat(data.get('publication_date'))
50
- if data.get('publication_date')
51
- else None
52
- ),
53
- num_circular=data.get('num_circular'),
54
- asfi_identifier=data.get('asfi_identifier'),
55
- contains_tables=data.get('contains_tables'),
56
- text_content=data.get('text_content'),
57
- case_name=data.get('case_name'),
58
- starting_office=data.get('starting_office'),
59
- output_json=data.get('output_json'),
60
- processing_time=data.get('processing_time'),
61
- llm_model=data.get('llm_model'),
62
- num_pages=data.get('num_pages'),
63
- num_tokens=data.get('num_tokens'),
64
- citcular_type=data.get('citcular_type')
1
+ from dataclasses import dataclass
2
+ from datetime import datetime
3
+ from typing import Optional
4
+
5
+
6
+ @dataclass
7
+ class DocumentProcessMetadata(object):
8
+ publication_date: Optional[datetime] = None
9
+ num_circular: Optional[str] = None
10
+ asfi_identifier: Optional[str] = None
11
+ contains_tables: Optional[bool] = None
12
+ text_content: Optional[str] = None
13
+ case_name: Optional[str] = None
14
+ starting_office: Optional[str] = None
15
+ output_json: Optional[dict] = None
16
+ processing_time: Optional[float] = None
17
+ llm_model: Optional[str] = None
18
+ num_pages: Optional[float] = None
19
+ num_tokens: Optional[float] = None
20
+ citcular_type: Optional[str] = None
21
+
22
+
23
+ @property
24
+ def to_dict(self):
25
+ return {
26
+ 'publication_date': (
27
+ self.publication_date.isoformat()
28
+ if self.publication_date
29
+ else None
30
+ ),
31
+ 'num_circular': self.num_circular,
32
+ 'asfi_identifier': self.asfi_identifier,
33
+ 'contains_tables': self.contains_tables,
34
+ 'text_content': self.text_content,
35
+ 'case_name': self.case_name,
36
+ 'starting_office': self.starting_office,
37
+ 'output_json': self.output_json,
38
+ 'processing_time': self.processing_time,
39
+ 'llm_model': self.llm_model,
40
+ 'num_pages': self.num_pages,
41
+ 'num_tokens': self.num_tokens,
42
+ 'citcular_type': self.citcular_type
43
+ }
44
+
45
+ @classmethod
46
+ def from_dict(cls, data: dict):
47
+ return cls(
48
+ publication_date=(
49
+ datetime.fromisoformat(data.get('publication_date'))
50
+ if data.get('publication_date')
51
+ else None
52
+ ),
53
+ num_circular=data.get('num_circular'),
54
+ asfi_identifier=data.get('asfi_identifier'),
55
+ contains_tables=data.get('contains_tables'),
56
+ text_content=data.get('text_content'),
57
+ case_name=data.get('case_name'),
58
+ starting_office=data.get('starting_office'),
59
+ output_json=data.get('output_json'),
60
+ processing_time=data.get('processing_time'),
61
+ llm_model=data.get('llm_model'),
62
+ num_pages=data.get('num_pages'),
63
+ num_tokens=data.get('num_tokens'),
64
+ citcular_type=data.get('citcular_type')
65
65
  )
@@ -1,22 +1,22 @@
1
- from documente_shared.domain.base_enum import BaseEnum
2
-
3
-
4
- class DocumentProcessStatus(BaseEnum):
5
- PENDING = 'PENDING'
6
- ENQUEUED = 'ENQUEUED'
7
- PROCESSING = 'PROCESSING'
8
- COMPLETED = 'COMPLETED'
9
- FAILED = 'FAILED'
10
- DELETED = 'DELETED'
11
- CANCELLED = 'CANCELLED'
12
-
13
-
14
- class DocumentProcessCategory(BaseEnum):
15
- CIRCULAR = 'CIRCULAR'
16
-
17
-
18
- class DocumentProcessSubCategory(BaseEnum):
19
- CC_COMBINADA = 'CC_COMBINADA'
20
- CC_NORMATIVA = 'CC_NORMATIVA'
21
-
22
-
1
+ from documente_shared.domain.base_enum import BaseEnum
2
+
3
+
4
+ class DocumentProcessStatus(BaseEnum):
5
+ PENDING = 'PENDING'
6
+ ENQUEUED = 'ENQUEUED'
7
+ PROCESSING = 'PROCESSING'
8
+ COMPLETED = 'COMPLETED'
9
+ FAILED = 'FAILED'
10
+ DELETED = 'DELETED'
11
+ CANCELLED = 'CANCELLED'
12
+
13
+
14
+ class DocumentProcessCategory(BaseEnum):
15
+ CIRCULAR = 'CIRCULAR'
16
+
17
+
18
+ class DocumentProcessSubCategory(BaseEnum):
19
+ CC_COMBINADA = 'CC_COMBINADA'
20
+ CC_NORMATIVA = 'CC_NORMATIVA'
21
+
22
+
@@ -1,25 +1,25 @@
1
- from abc import ABC, abstractmethod
2
- from typing import Optional, List
3
-
4
- from documente_shared.domain.entities.document_process import DocumentProcess
5
- from documente_shared.domain.enums import DocumentProcessStatus
6
-
7
-
8
- class DocumentProcessRepository(ABC):
9
-
10
- @abstractmethod
11
- def find(self, digest: str) ->Optional[DocumentProcess]:
12
- raise NotImplementedError
13
-
14
- @abstractmethod
15
- def persist(self, instance: DocumentProcess) -> DocumentProcess:
16
- raise NotImplementedError
17
-
18
- @abstractmethod
19
- def remove(self, instance: DocumentProcess):
20
- raise NotImplementedError
21
-
22
-
23
- @abstractmethod
24
- def filter(self, statuses: List[DocumentProcessStatus]) -> List[DocumentProcess]:
1
+ from abc import ABC, abstractmethod
2
+ from typing import Optional, List
3
+
4
+ from documente_shared.domain.entities.document_process import DocumentProcess
5
+ from documente_shared.domain.enums import DocumentProcessStatus
6
+
7
+
8
+ class DocumentProcessRepository(ABC):
9
+
10
+ @abstractmethod
11
+ def find(self, digest: str) ->Optional[DocumentProcess]:
12
+ raise NotImplementedError
13
+
14
+ @abstractmethod
15
+ def persist(self, instance: DocumentProcess) -> DocumentProcess:
16
+ raise NotImplementedError
17
+
18
+ @abstractmethod
19
+ def remove(self, instance: DocumentProcess):
20
+ raise NotImplementedError
21
+
22
+
23
+ @abstractmethod
24
+ def filter(self, statuses: List[DocumentProcessStatus]) -> List[DocumentProcess]:
25
25
  raise NotImplementedError
@@ -1,43 +1,43 @@
1
- from typing import Optional, List
2
-
3
- from boto3.dynamodb.conditions import Key
4
-
5
- from documente_shared.domain.entities.document_process import DocumentProcess
6
- from documente_shared.domain.enums import DocumentProcessStatus
7
- from documente_shared.domain.repositories import DocumentProcessRepository
8
- from documente_shared.infrastructure.dynamo_table import DynamoDBTable
9
-
10
-
11
-
12
- class DynamoDocumentProcessRepository(
13
- DynamoDBTable,
14
- DocumentProcessRepository,
15
- ):
16
- def find(self, digest: str) -> Optional[DocumentProcess]:
17
- item = self.get(key={'digest': digest})
18
- if item:
19
- return DocumentProcess.from_dict(item)
20
- return None
21
-
22
- def persist(self, instance: DocumentProcess) -> DocumentProcess:
23
- self.put(instance.to_simple_dict)
24
- return instance
25
-
26
- def remove(self, instance: DocumentProcess):
27
- self.delete(key={'digest': instance.digest})
28
-
29
- def filter(self, statuses: List[DocumentProcessStatus]) -> List[DocumentProcess]:
30
- items = []
31
-
32
- for status in statuses:
33
- response = self._table.query(
34
- IndexName='status',
35
- KeyConditionExpression=Key('status').eq(status.value),
36
- )
37
- status_items = response.get('Items', [])
38
- items.extend(status_items)
39
-
40
- return [
41
- DocumentProcess.from_dict(item)
42
- for item in items
43
- ]
1
+ from typing import Optional, List
2
+
3
+ from boto3.dynamodb.conditions import Key
4
+
5
+ from documente_shared.domain.entities.document_process import DocumentProcess
6
+ from documente_shared.domain.enums import DocumentProcessStatus
7
+ from documente_shared.domain.repositories import DocumentProcessRepository
8
+ from documente_shared.infrastructure.dynamo_table import DynamoDBTable
9
+
10
+
11
+
12
+ class DynamoDocumentProcessRepository(
13
+ DynamoDBTable,
14
+ DocumentProcessRepository,
15
+ ):
16
+ def find(self, digest: str) -> Optional[DocumentProcess]:
17
+ item = self.get(key={'digest': digest})
18
+ if item:
19
+ return DocumentProcess.from_dict(item)
20
+ return None
21
+
22
+ def persist(self, instance: DocumentProcess) -> DocumentProcess:
23
+ self.put(instance.to_simple_dict)
24
+ return instance
25
+
26
+ def remove(self, instance: DocumentProcess):
27
+ self.delete(key={'digest': instance.digest})
28
+
29
+ def filter(self, statuses: List[DocumentProcessStatus]) -> List[DocumentProcess]:
30
+ items = []
31
+
32
+ for status in statuses:
33
+ response = self._table.query(
34
+ IndexName='status',
35
+ KeyConditionExpression=Key('status').eq(status.value),
36
+ )
37
+ status_items = response.get('Items', [])
38
+ items.extend(status_items)
39
+
40
+ return [
41
+ DocumentProcess.from_dict(item)
42
+ for item in items
43
+ ]
@@ -1,75 +1,75 @@
1
- from dataclasses import dataclass
2
-
3
- import boto3
4
- from boto3.dynamodb.conditions import Key
5
-
6
-
7
- RETURN_VALUES = 'UPDATED_NEW'
8
-
9
- @dataclass
10
- class DynamoDBTable(object):
11
- table_name: str
12
-
13
- def __post_init__(self):
14
- self._table = boto3.resource('dynamodb').Table(self.table_name)
15
-
16
- def get(self, key: dict):
17
- return self._table.get_item(Key=key).get('Item')
18
-
19
- def get_all(self):
20
- return self._table.scan().get('Items')
21
-
22
- def upsert(self, key, attributes):
23
- return self.put({**key, **attributes})
24
-
25
-
26
- def filter_by(self, attribute: str, target_value: str):
27
- return self._table.query(
28
- FilterExpression=Key(attribute).eq(target_value),
29
- ).get('Items')
30
-
31
- def put(self, attributes: dict, condition: dict = None):
32
- extra_args = {}
33
- if condition:
34
- extra_args['ConditionExpression'] = condition
35
- return self._table.put_item(Item=attributes, **extra_args)
36
-
37
-
38
- def update(self, key: str, attributes: dict):
39
- return self._table.update_item(
40
- Key=key,
41
- UpdateExpression=self._update_expression(attributes),
42
- ExpressionAttributeNames=self._expression_attribute_names(attributes),
43
- ExpressionAttributeValues=self._expression_attribute_values(attributes),
44
- ReturnValues=RETURN_VALUES,
45
- )
46
-
47
- def delete(self, key: dict):
48
- return self._table.delete_item(Key=key)
49
-
50
- def count(self) -> int:
51
- return self._table.item_count
52
-
53
-
54
- @classmethod
55
- def _update_expression(cls, attributes):
56
- return 'SET {param}'.format(
57
- param=','.join(
58
- '#{key}=:{key}'.format(
59
- key=key,
60
- )
61
- for key in attributes
62
- ),
63
- )
64
-
65
- @classmethod
66
- def _expression_attribute_names(cls, attributes):
67
- return {
68
- '#{key}'.format(key=key): key for key in attributes
69
- }
70
-
71
- @classmethod
72
- def _expression_attribute_values(cls, attributes):
73
- return {
74
- ':{key}'.format(key=key): attr for key, attr in attributes.items()
75
- }
1
+ from dataclasses import dataclass
2
+
3
+ import boto3
4
+ from boto3.dynamodb.conditions import Key
5
+
6
+
7
+ RETURN_VALUES = 'UPDATED_NEW'
8
+
9
+ @dataclass
10
+ class DynamoDBTable(object):
11
+ table_name: str
12
+
13
+ def __post_init__(self):
14
+ self._table = boto3.resource('dynamodb').Table(self.table_name)
15
+
16
+ def get(self, key: dict):
17
+ return self._table.get_item(Key=key).get('Item')
18
+
19
+ def get_all(self):
20
+ return self._table.scan().get('Items')
21
+
22
+ def upsert(self, key, attributes):
23
+ return self.put({**key, **attributes})
24
+
25
+
26
+ def filter_by(self, attribute: str, target_value: str):
27
+ return self._table.query(
28
+ FilterExpression=Key(attribute).eq(target_value),
29
+ ).get('Items')
30
+
31
+ def put(self, attributes: dict, condition: dict = None):
32
+ extra_args = {}
33
+ if condition:
34
+ extra_args['ConditionExpression'] = condition
35
+ return self._table.put_item(Item=attributes, **extra_args)
36
+
37
+
38
+ def update(self, key: str, attributes: dict):
39
+ return self._table.update_item(
40
+ Key=key,
41
+ UpdateExpression=self._update_expression(attributes),
42
+ ExpressionAttributeNames=self._expression_attribute_names(attributes),
43
+ ExpressionAttributeValues=self._expression_attribute_values(attributes),
44
+ ReturnValues=RETURN_VALUES,
45
+ )
46
+
47
+ def delete(self, key: dict):
48
+ return self._table.delete_item(Key=key)
49
+
50
+ def count(self) -> int:
51
+ return self._table.item_count
52
+
53
+
54
+ @classmethod
55
+ def _update_expression(cls, attributes):
56
+ return 'SET {param}'.format(
57
+ param=','.join(
58
+ '#{key}=:{key}'.format(
59
+ key=key,
60
+ )
61
+ for key in attributes
62
+ ),
63
+ )
64
+
65
+ @classmethod
66
+ def _expression_attribute_names(cls, attributes):
67
+ return {
68
+ '#{key}'.format(key=key): key for key in attributes
69
+ }
70
+
71
+ @classmethod
72
+ def _expression_attribute_values(cls, attributes):
73
+ return {
74
+ ':{key}'.format(key=key): attr for key, attr in attributes.items()
75
+ }
@@ -1,57 +1,57 @@
1
- import boto3
2
-
3
- from dataclasses import dataclass
4
- from typing import Optional
5
-
6
- from documente_shared.domain.entities.document_process import remove_slash_from_path
7
-
8
-
9
- def remove_none_values(data: dict) -> dict: # noqa: WPS110
10
- return {key: value for key, value in data.items() if value is not None} # noqa: WPS110
11
-
12
-
13
- @dataclass
14
- class S3Bucket(object):
15
- bucket_name: str
16
-
17
- def __post_init__(self):
18
- self._resource = boto3.resource('s3')
19
-
20
- def get(self, file_key: str) -> Optional[dict]:
21
- try:
22
- return self._resource.Object(self.bucket_name, file_key).get()
23
- except self._resource.meta.client.exceptions.NoSuchKey:
24
- return None
25
-
26
- def get_bytes(self, file_key: str) -> Optional[bytes]:
27
- cleaned_file_key = remove_slash_from_path(file_key)
28
- file_context = self.get(cleaned_file_key)
29
- if not file_context:
30
- return None
31
- return (
32
- file_context['Body'].read()
33
- if 'Body' in file_context
34
- else None
35
- )
36
-
37
- def upload(self, file_key: str, file_content, content_type: Optional[str] = None):
38
- cleaned_file_key = remove_slash_from_path(file_key)
39
- optional_params = {'ContentType': content_type}
40
- return self._resource.Object(self.bucket_name, cleaned_file_key).put(
41
- Body=file_content,
42
- **remove_none_values(optional_params),
43
- )
44
-
45
- def delete(self, file_key: str):
46
- cleaned_file_key = remove_slash_from_path(file_key)
47
- return self._resource.Object(self.bucket_name, cleaned_file_key).delete()
48
-
49
- def get_url(self, file_key: str):
50
- cleaned_file_key = remove_slash_from_path(file_key)
51
- return 'https://{bucket_url}.s3.amazonaws.com/{file_key}'.format(
52
- bucket_url=self.bucket_name,
53
- file_key=cleaned_file_key,
54
- )
55
-
56
- def read(self, file_key: str) -> bytes:
57
- return self.get(file_key)['Body'].read()
1
+ import boto3
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Optional
5
+
6
+ from documente_shared.domain.entities.document_process import remove_slash_from_path
7
+
8
+
9
+ def remove_none_values(data: dict) -> dict: # noqa: WPS110
10
+ return {key: value for key, value in data.items() if value is not None} # noqa: WPS110
11
+
12
+
13
+ @dataclass
14
+ class S3Bucket(object):
15
+ bucket_name: str
16
+
17
+ def __post_init__(self):
18
+ self._resource = boto3.resource('s3')
19
+
20
+ def get(self, file_key: str) -> Optional[dict]:
21
+ try:
22
+ return self._resource.Object(self.bucket_name, file_key).get()
23
+ except self._resource.meta.client.exceptions.NoSuchKey:
24
+ return None
25
+
26
+ def get_bytes(self, file_key: str) -> Optional[bytes]:
27
+ cleaned_file_key = remove_slash_from_path(file_key)
28
+ file_context = self.get(cleaned_file_key)
29
+ if not file_context:
30
+ return None
31
+ return (
32
+ file_context['Body'].read()
33
+ if 'Body' in file_context
34
+ else None
35
+ )
36
+
37
+ def upload(self, file_key: str, file_content, content_type: Optional[str] = None):
38
+ cleaned_file_key = remove_slash_from_path(file_key)
39
+ optional_params = {'ContentType': content_type}
40
+ return self._resource.Object(self.bucket_name, cleaned_file_key).put(
41
+ Body=file_content,
42
+ **remove_none_values(optional_params),
43
+ )
44
+
45
+ def delete(self, file_key: str):
46
+ cleaned_file_key = remove_slash_from_path(file_key)
47
+ return self._resource.Object(self.bucket_name, cleaned_file_key).delete()
48
+
49
+ def get_url(self, file_key: str):
50
+ cleaned_file_key = remove_slash_from_path(file_key)
51
+ return 'https://{bucket_url}.s3.amazonaws.com/{file_key}'.format(
52
+ bucket_url=self.bucket_name,
53
+ file_key=cleaned_file_key,
54
+ )
55
+
56
+ def read(self, file_key: str) -> bytes:
57
+ return self.get(file_key)['Body'].read()