PyPI - documente_shared - Versions diffs - 0.1.39__py3-none-any.whl → 0.1.40__py3-none-any.whl - Mend

documente_shared 0.1.39py3-none-any.whl → 0.1.40py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of documente_shared might be problematic. Click here for more details.

Files changed (16) hide show

documente_shared/application/digest.py +7 -7
documente_shared/application/exceptions.py +23 -23
documente_shared/application/time_utils.py +9 -9
documente_shared/domain/base_enum.py +53 -53
documente_shared/domain/entities/document_process.py +226 -226
documente_shared/domain/entities/document_process_metadata.py +64 -64
documente_shared/domain/enums.py +22 -22
documente_shared/domain/repositories.py +24 -24
documente_shared/infrastructure/dynamo_repositories.py +43 -43
documente_shared/infrastructure/dynamo_table.py +75 -75
documente_shared/infrastructure/s3_bucket.py +57 -57
documente_shared/infrastructure/sqs_queue.py +47 -47
{documente_shared-0.1.39.dist-info → documente_shared-0.1.40.dist-info}/METADATA +1 -1
documente_shared-0.1.40.dist-info/RECORD +20 -0
documente_shared-0.1.39.dist-info/RECORD +0 -20
{documente_shared-0.1.39.dist-info → documente_shared-0.1.40.dist-info}/WHEEL +0 -0

documente_shared/domain/entities/document_process_metadata.py CHANGED Viewed

@@ -1,65 +1,65 @@
-from dataclasses import dataclass
-from datetime import datetime
-from typing import Optional
-@dataclass
-class DocumentProcessMetadata(object):
-    publication_date: Optional[datetime] = None
-    num_circular: Optional[str] = None
-    asfi_identifier: Optional[str] = None
-    contains_tables: Optional[bool] = None
-    text_content: Optional[str] = None
-    case_name: Optional[str] = None
-    starting_office: Optional[str] = None
-    output_json: Optional[dict] = None
-    processing_time: Optional[float] = None
-    llm_model: Optional[str] = None
-    num_pages: Optional[float] = None
-    num_tokens: Optional[float] = None
-    citcular_type: Optional[str] = None
-    @property
-    def to_dict(self):
-        return {
-            'publication_date': (
-                self.publication_date.isoformat()
-                if self.publication_date
-                else None
-            ),
-            'num_circular': self.num_circular,
-            'asfi_identifier': self.asfi_identifier,
-            'contains_tables': self.contains_tables,
-            'text_content': self.text_content,
-            'case_name': self.case_name,
-            'starting_office': self.starting_office,
-            'output_json': self.output_json,
-            'processing_time': self.processing_time,
-            'llm_model': self.llm_model,
-            'num_pages': self.num_pages,
-            'num_tokens': self.num_tokens,
-            'citcular_type': self.citcular_type
-        }
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(
-            publication_date=(
-                datetime.fromisoformat(data.get('publication_date'))
-                if data.get('publication_date')
-                else None
-            ),
-            num_circular=data.get('num_circular'),
-            asfi_identifier=data.get('asfi_identifier'),
-            contains_tables=data.get('contains_tables'),
-            text_content=data.get('text_content'),
-            case_name=data.get('case_name'),
-            starting_office=data.get('starting_office'),
-            output_json=data.get('output_json'),
-            processing_time=data.get('processing_time'),
-            llm_model=data.get('llm_model'),
-            num_pages=data.get('num_pages'),
-            num_tokens=data.get('num_tokens'),
-            citcular_type=data.get('citcular_type')
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Optional
+@dataclass
+class DocumentProcessMetadata(object):
+    publication_date: Optional[datetime] = None
+    num_circular: Optional[str] = None
+    asfi_identifier: Optional[str] = None
+    contains_tables: Optional[bool] = None
+    text_content: Optional[str] = None
+    case_name: Optional[str] = None
+    starting_office: Optional[str] = None
+    output_json: Optional[dict] = None
+    processing_time: Optional[float] = None
+    llm_model: Optional[str] = None
+    num_pages: Optional[float] = None
+    num_tokens: Optional[float] = None
+    citcular_type: Optional[str] = None
+    @property
+    def to_dict(self):
+        return {
+            'publication_date': (
+                self.publication_date.isoformat()
+                if self.publication_date
+                else None
+            ),
+            'num_circular': self.num_circular,
+            'asfi_identifier': self.asfi_identifier,
+            'contains_tables': self.contains_tables,
+            'text_content': self.text_content,
+            'case_name': self.case_name,
+            'starting_office': self.starting_office,
+            'output_json': self.output_json,
+            'processing_time': self.processing_time,
+            'llm_model': self.llm_model,
+            'num_pages': self.num_pages,
+            'num_tokens': self.num_tokens,
+            'citcular_type': self.citcular_type
+        }
+    @classmethod
+    def from_dict(cls, data: dict):
+        return cls(
+            publication_date=(
+                datetime.fromisoformat(data.get('publication_date'))
+                if data.get('publication_date')
+                else None
+            ),
+            num_circular=data.get('num_circular'),
+            asfi_identifier=data.get('asfi_identifier'),
+            contains_tables=data.get('contains_tables'),
+            text_content=data.get('text_content'),
+            case_name=data.get('case_name'),
+            starting_office=data.get('starting_office'),
+            output_json=data.get('output_json'),
+            processing_time=data.get('processing_time'),
+            llm_model=data.get('llm_model'),
+            num_pages=data.get('num_pages'),
+            num_tokens=data.get('num_tokens'),
+            citcular_type=data.get('citcular_type')
         )

documente_shared/domain/enums.py CHANGED Viewed

@@ -1,22 +1,22 @@
-from documente_shared.domain.base_enum import BaseEnum
-class DocumentProcessStatus(BaseEnum):
-    PENDING = 'PENDING'
-    ENQUEUED = 'ENQUEUED'
-    PROCESSING = 'PROCESSING'
-    COMPLETED = 'COMPLETED'
-    FAILED = 'FAILED'
-    DELETED = 'DELETED'
-    CANCELLED = 'CANCELLED'
-class DocumentProcessCategory(BaseEnum):
-    CIRCULAR = 'CIRCULAR'
-class DocumentProcessSubCategory(BaseEnum):
-    CC_COMBINADA = 'CC_COMBINADA'
-    CC_NORMATIVA = 'CC_NORMATIVA'
+from documente_shared.domain.base_enum import BaseEnum
+class DocumentProcessStatus(BaseEnum):
+    PENDING = 'PENDING'
+    ENQUEUED = 'ENQUEUED'
+    PROCESSING = 'PROCESSING'
+    COMPLETED = 'COMPLETED'
+    FAILED = 'FAILED'
+    DELETED = 'DELETED'
+    CANCELLED = 'CANCELLED'
+class DocumentProcessCategory(BaseEnum):
+    CIRCULAR = 'CIRCULAR'
+class DocumentProcessSubCategory(BaseEnum):
+    CC_COMBINADA = 'CC_COMBINADA'
+    CC_NORMATIVA = 'CC_NORMATIVA'

documente_shared/domain/repositories.py CHANGED Viewed

@@ -1,25 +1,25 @@
-from abc import ABC, abstractmethod
-from typing import Optional, List
-from documente_shared.domain.entities.document_process import DocumentProcess
-from documente_shared.domain.enums import DocumentProcessStatus
-class DocumentProcessRepository(ABC):
-    @abstractmethod
-    def find(self, digest: str) ->Optional[DocumentProcess]:
-        raise NotImplementedError
-    @abstractmethod
-    def persist(self, instance: DocumentProcess) -> DocumentProcess:
-        raise NotImplementedError
-    @abstractmethod
-    def remove(self, instance: DocumentProcess):
-        raise NotImplementedError
-    @abstractmethod
-    def filter(self, statuses: List[DocumentProcessStatus]) -> List[DocumentProcess]:
+from abc import ABC, abstractmethod
+from typing import Optional, List
+from documente_shared.domain.entities.document_process import DocumentProcess
+from documente_shared.domain.enums import DocumentProcessStatus
+class DocumentProcessRepository(ABC):
+    @abstractmethod
+    def find(self, digest: str) ->Optional[DocumentProcess]:
+        raise NotImplementedError
+    @abstractmethod
+    def persist(self, instance: DocumentProcess) -> DocumentProcess:
+        raise NotImplementedError
+    @abstractmethod
+    def remove(self, instance: DocumentProcess):
+        raise NotImplementedError
+    @abstractmethod
+    def filter(self, statuses: List[DocumentProcessStatus]) -> List[DocumentProcess]:
         raise NotImplementedError

documente_shared/infrastructure/dynamo_repositories.py CHANGED Viewed

@@ -1,43 +1,43 @@
-from typing import Optional, List
-from boto3.dynamodb.conditions import Key
-from documente_shared.domain.entities.document_process import DocumentProcess
-from documente_shared.domain.enums import DocumentProcessStatus
-from documente_shared.domain.repositories import DocumentProcessRepository
-from documente_shared.infrastructure.dynamo_table import DynamoDBTable
-class DynamoDocumentProcessRepository(
-    DynamoDBTable,
-    DocumentProcessRepository,
-):
-    def find(self, digest: str) -> Optional[DocumentProcess]:
-        item = self.get(key={'digest': digest})
-        if item:
-            return DocumentProcess.from_dict(item)
-        return None
-    def persist(self, instance: DocumentProcess) -> DocumentProcess:
-        self.put(instance.to_simple_dict)
-        return instance
-    def remove(self, instance: DocumentProcess):
-        self.delete(key={'digest': instance.digest})
-    def filter(self, statuses: List[DocumentProcessStatus]) -> List[DocumentProcess]:
-        items = []
-        for status in statuses:
-            response = self._table.query(
-                IndexName='status',
-                KeyConditionExpression=Key('status').eq(status.value),
-            )
-            status_items = response.get('Items', [])
-            items.extend(status_items)
-        return [
-            DocumentProcess.from_dict(item)
-            for item in items
-        ]
+from typing import Optional, List
+from boto3.dynamodb.conditions import Key
+from documente_shared.domain.entities.document_process import DocumentProcess
+from documente_shared.domain.enums import DocumentProcessStatus
+from documente_shared.domain.repositories import DocumentProcessRepository
+from documente_shared.infrastructure.dynamo_table import DynamoDBTable
+class DynamoDocumentProcessRepository(
+    DynamoDBTable,
+    DocumentProcessRepository,
+):
+    def find(self, digest: str) -> Optional[DocumentProcess]:
+        item = self.get(key={'digest': digest})
+        if item:
+            return DocumentProcess.from_dict(item)
+        return None
+    def persist(self, instance: DocumentProcess) -> DocumentProcess:
+        self.put(instance.to_simple_dict)
+        return instance
+    def remove(self, instance: DocumentProcess):
+        self.delete(key={'digest': instance.digest})
+    def filter(self, statuses: List[DocumentProcessStatus]) -> List[DocumentProcess]:
+        items = []
+        for status in statuses:
+            response = self._table.query(
+                IndexName='status',
+                KeyConditionExpression=Key('status').eq(status.value),
+            )
+            status_items = response.get('Items', [])
+            items.extend(status_items)
+        return [
+            DocumentProcess.from_dict(item)
+            for item in items
+        ]

documente_shared/infrastructure/dynamo_table.py CHANGED Viewed

@@ -1,75 +1,75 @@
-from dataclasses import dataclass
-import boto3
-from boto3.dynamodb.conditions import Key
-RETURN_VALUES = 'UPDATED_NEW'
-@dataclass
-class DynamoDBTable(object):
-    table_name: str
-    def __post_init__(self):
-        self._table = boto3.resource('dynamodb').Table(self.table_name)
-    def get(self, key: dict):
-        return self._table.get_item(Key=key).get('Item')
-    def get_all(self):
-        return self._table.scan().get('Items')
-    def upsert(self, key, attributes):
-        return self.put({**key, **attributes})
-    def filter_by(self, attribute: str, target_value: str):
-        return self._table.query(
-            FilterExpression=Key(attribute).eq(target_value),
-        ).get('Items')
-    def put(self, attributes: dict, condition: dict = None):
-        extra_args = {}
-        if condition:
-            extra_args['ConditionExpression'] = condition
-        return self._table.put_item(Item=attributes, **extra_args)
-    def update(self, key: str, attributes: dict):
-        return self._table.update_item(
-            Key=key,
-            UpdateExpression=self._update_expression(attributes),
-            ExpressionAttributeNames=self._expression_attribute_names(attributes),
-            ExpressionAttributeValues=self._expression_attribute_values(attributes),
-            ReturnValues=RETURN_VALUES,
-        )
-    def delete(self, key: dict):
-        return self._table.delete_item(Key=key)
-    def count(self) -> int:
-        return self._table.item_count
-    @classmethod
-    def _update_expression(cls, attributes):
-        return 'SET {param}'.format(
-            param=','.join(
-                '#{key}=:{key}'.format(
-                    key=key,
-                )
-                for key in attributes
-            ),
-        )
-    @classmethod
-    def _expression_attribute_names(cls, attributes):
-        return {
-            '#{key}'.format(key=key): key for key in attributes
-        }
-    @classmethod
-    def _expression_attribute_values(cls, attributes):
-        return {
-            ':{key}'.format(key=key): attr for key, attr in attributes.items()
-        }
+from dataclasses import dataclass
+import boto3
+from boto3.dynamodb.conditions import Key
+RETURN_VALUES = 'UPDATED_NEW'
+@dataclass
+class DynamoDBTable(object):
+    table_name: str
+    def __post_init__(self):
+        self._table = boto3.resource('dynamodb').Table(self.table_name)
+    def get(self, key: dict):
+        return self._table.get_item(Key=key).get('Item')
+    def get_all(self):
+        return self._table.scan().get('Items')
+    def upsert(self, key, attributes):
+        return self.put({**key, **attributes})
+    def filter_by(self, attribute: str, target_value: str):
+        return self._table.query(
+            FilterExpression=Key(attribute).eq(target_value),
+        ).get('Items')
+    def put(self, attributes: dict, condition: dict = None):
+        extra_args = {}
+        if condition:
+            extra_args['ConditionExpression'] = condition
+        return self._table.put_item(Item=attributes, **extra_args)
+    def update(self, key: str, attributes: dict):
+        return self._table.update_item(
+            Key=key,
+            UpdateExpression=self._update_expression(attributes),
+            ExpressionAttributeNames=self._expression_attribute_names(attributes),
+            ExpressionAttributeValues=self._expression_attribute_values(attributes),
+            ReturnValues=RETURN_VALUES,
+        )
+    def delete(self, key: dict):
+        return self._table.delete_item(Key=key)
+    def count(self) -> int:
+        return self._table.item_count
+    @classmethod
+    def _update_expression(cls, attributes):
+        return 'SET {param}'.format(
+            param=','.join(
+                '#{key}=:{key}'.format(
+                    key=key,
+                )
+                for key in attributes
+            ),
+        )
+    @classmethod
+    def _expression_attribute_names(cls, attributes):
+        return {
+            '#{key}'.format(key=key): key for key in attributes
+        }
+    @classmethod
+    def _expression_attribute_values(cls, attributes):
+        return {
+            ':{key}'.format(key=key): attr for key, attr in attributes.items()
+        }

documente_shared/infrastructure/s3_bucket.py CHANGED Viewed

@@ -1,57 +1,57 @@
-import boto3
-from dataclasses import dataclass
-from typing import Optional
-from documente_shared.domain.entities.document_process import remove_slash_from_path
-def remove_none_values(data: dict) -> dict:  # noqa: WPS110
-    return {key: value for key, value in data.items() if value is not None}  # noqa: WPS110
-@dataclass
-class S3Bucket(object):
-    bucket_name: str
-    def __post_init__(self):
-        self._resource = boto3.resource('s3')
-    def get(self, file_key: str) -> Optional[dict]:
-        try:
-            return self._resource.Object(self.bucket_name, file_key).get()
-        except self._resource.meta.client.exceptions.NoSuchKey:
-            return None
-    def get_bytes(self, file_key: str) -> Optional[bytes]:
-        cleaned_file_key = remove_slash_from_path(file_key)
-        file_context = self.get(cleaned_file_key)
-        if not file_context:
-            return None
-        return (
-            file_context['Body'].read()
-            if 'Body' in file_context
-            else None
-        )
-    def upload(self, file_key: str, file_content, content_type: Optional[str] = None):
-        cleaned_file_key = remove_slash_from_path(file_key)
-        optional_params = {'ContentType': content_type}
-        return self._resource.Object(self.bucket_name, cleaned_file_key).put(
-            Body=file_content,
-            **remove_none_values(optional_params),
-        )
-    def delete(self, file_key: str):
-        cleaned_file_key = remove_slash_from_path(file_key)
-        return self._resource.Object(self.bucket_name, cleaned_file_key).delete()
-    def get_url(self, file_key: str):
-        cleaned_file_key = remove_slash_from_path(file_key)
-        return 'https://{bucket_url}.s3.amazonaws.com/{file_key}'.format(
-            bucket_url=self.bucket_name,
-            file_key=cleaned_file_key,
-        )
-    def read(self, file_key: str) -> bytes:
-        return self.get(file_key)['Body'].read()
+import boto3
+from dataclasses import dataclass
+from typing import Optional
+from documente_shared.domain.entities.document_process import remove_slash_from_path
+def remove_none_values(data: dict) -> dict:  # noqa: WPS110
+    return {key: value for key, value in data.items() if value is not None}  # noqa: WPS110
+@dataclass
+class S3Bucket(object):
+    bucket_name: str
+    def __post_init__(self):
+        self._resource = boto3.resource('s3')
+    def get(self, file_key: str) -> Optional[dict]:
+        try:
+            return self._resource.Object(self.bucket_name, file_key).get()
+        except self._resource.meta.client.exceptions.NoSuchKey:
+            return None
+    def get_bytes(self, file_key: str) -> Optional[bytes]:
+        cleaned_file_key = remove_slash_from_path(file_key)
+        file_context = self.get(cleaned_file_key)
+        if not file_context:
+            return None
+        return (
+            file_context['Body'].read()
+            if 'Body' in file_context
+            else None
+        )
+    def upload(self, file_key: str, file_content, content_type: Optional[str] = None):
+        cleaned_file_key = remove_slash_from_path(file_key)
+        optional_params = {'ContentType': content_type}
+        return self._resource.Object(self.bucket_name, cleaned_file_key).put(
+            Body=file_content,
+            **remove_none_values(optional_params),
+        )
+    def delete(self, file_key: str):
+        cleaned_file_key = remove_slash_from_path(file_key)
+        return self._resource.Object(self.bucket_name, cleaned_file_key).delete()
+    def get_url(self, file_key: str):
+        cleaned_file_key = remove_slash_from_path(file_key)
+        return 'https://{bucket_url}.s3.amazonaws.com/{file_key}'.format(
+            bucket_url=self.bucket_name,
+            file_key=cleaned_file_key,
+        )
+    def read(self, file_key: str) -> bytes:
+        return self.get(file_key)['Body'].read()

documente_shared 0.1.39__py3-none-any.whl → 0.1.40__py3-none-any.whl

Potentially problematic release.

documente_shared 0.1.39py3-none-any.whl → 0.1.40py3-none-any.whl