PyPI - documente_shared - Versions diffs - 0.1.51__py3-none-any.whl → 0.1.52__py3-none-any.whl - Mend

documente_shared 0.1.51py3-none-any.whl → 0.1.52py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of documente_shared might be problematic. Click here for more details.

Files changed (23) hide show

documente_shared/__init__.py +0 -0
documente_shared/application/__init__.py +0 -0
documente_shared/application/digest.py +7 -7
documente_shared/application/exceptions.py +23 -23
documente_shared/application/time_utils.py +9 -9
documente_shared/application/timezone.py +7 -7
documente_shared/domain/__init__.py +0 -0
documente_shared/domain/base_enum.py +53 -53
documente_shared/domain/constants.py +2 -2
documente_shared/domain/entities/__init__.py +0 -0
documente_shared/domain/entities/document.py +268 -261
documente_shared/domain/entities/document_metadata.py +64 -64
documente_shared/domain/enums.py +36 -36
documente_shared/domain/repositories.py +24 -24
documente_shared/infrastructure/__init__.py +0 -0
documente_shared/infrastructure/dynamo_repositories.py +43 -43
documente_shared/infrastructure/dynamo_table.py +75 -75
documente_shared/infrastructure/s3_bucket.py +57 -57
documente_shared/infrastructure/sqs_queue.py +47 -47
{documente_shared-0.1.51.dist-info → documente_shared-0.1.52.dist-info}/METADATA +1 -1
documente_shared-0.1.52.dist-info/RECORD +22 -0
documente_shared-0.1.51.dist-info/RECORD +0 -22
{documente_shared-0.1.51.dist-info → documente_shared-0.1.52.dist-info}/WHEEL +0 -0

documente_shared/domain/entities/document_metadata.py CHANGED Viewed

@@ -1,65 +1,65 @@
-from dataclasses import dataclass
-from datetime import datetime
-from typing import Optional
-@dataclass
-class DocumentProcessingMetadata(object):
-    publication_date: Optional[datetime] = None
-    num_circular: Optional[str] = None
-    asfi_identifier: Optional[str] = None
-    contains_tables: Optional[bool] = None
-    text_content: Optional[str] = None
-    case_name: Optional[str] = None
-    starting_office: Optional[str] = None
-    output_json: Optional[dict] = None
-    processing_time: Optional[float] = None
-    llm_model: Optional[str] = None
-    num_pages: Optional[float] = None
-    num_tokens: Optional[float] = None
-    citcular_type: Optional[str] = None
-    @property
-    def to_dict(self):
-        return {
-            'publication_date': (
-                self.publication_date.isoformat()
-                if self.publication_date
-                else None
-            ),
-            'num_circular': self.num_circular,
-            'asfi_identifier': self.asfi_identifier,
-            'contains_tables': self.contains_tables,
-            'text_content': self.text_content,
-            'case_name': self.case_name,
-            'starting_office': self.starting_office,
-            'output_json': self.output_json,
-            'processing_time': self.processing_time,
-            'llm_model': self.llm_model,
-            'num_pages': self.num_pages,
-            'num_tokens': self.num_tokens,
-            'citcular_type': self.citcular_type
-        }
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(
-            publication_date=(
-                datetime.fromisoformat(data.get('publication_date'))
-                if data.get('publication_date')
-                else None
-            ),
-            num_circular=data.get('num_circular'),
-            asfi_identifier=data.get('asfi_identifier'),
-            contains_tables=data.get('contains_tables'),
-            text_content=data.get('text_content'),
-            case_name=data.get('case_name'),
-            starting_office=data.get('starting_office'),
-            output_json=data.get('output_json'),
-            processing_time=data.get('processing_time'),
-            llm_model=data.get('llm_model'),
-            num_pages=data.get('num_pages'),
-            num_tokens=data.get('num_tokens'),
-            citcular_type=data.get('citcular_type')
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Optional
+@dataclass
+class DocumentProcessingMetadata(object):
+    publication_date: Optional[datetime] = None
+    num_circular: Optional[str] = None
+    asfi_identifier: Optional[str] = None
+    contains_tables: Optional[bool] = None
+    text_content: Optional[str] = None
+    case_name: Optional[str] = None
+    starting_office: Optional[str] = None
+    output_json: Optional[dict] = None
+    processing_time: Optional[float] = None
+    llm_model: Optional[str] = None
+    num_pages: Optional[float] = None
+    num_tokens: Optional[float] = None
+    citcular_type: Optional[str] = None
+    @property
+    def to_dict(self):
+        return {
+            'publication_date': (
+                self.publication_date.isoformat()
+                if self.publication_date
+                else None
+            ),
+            'num_circular': self.num_circular,
+            'asfi_identifier': self.asfi_identifier,
+            'contains_tables': self.contains_tables,
+            'text_content': self.text_content,
+            'case_name': self.case_name,
+            'starting_office': self.starting_office,
+            'output_json': self.output_json,
+            'processing_time': self.processing_time,
+            'llm_model': self.llm_model,
+            'num_pages': self.num_pages,
+            'num_tokens': self.num_tokens,
+            'citcular_type': self.citcular_type
+        }
+    @classmethod
+    def from_dict(cls, data: dict):
+        return cls(
+            publication_date=(
+                datetime.fromisoformat(data.get('publication_date'))
+                if data.get('publication_date')
+                else None
+            ),
+            num_circular=data.get('num_circular'),
+            asfi_identifier=data.get('asfi_identifier'),
+            contains_tables=data.get('contains_tables'),
+            text_content=data.get('text_content'),
+            case_name=data.get('case_name'),
+            starting_office=data.get('starting_office'),
+            output_json=data.get('output_json'),
+            processing_time=data.get('processing_time'),
+            llm_model=data.get('llm_model'),
+            num_pages=data.get('num_pages'),
+            num_tokens=data.get('num_tokens'),
+            citcular_type=data.get('citcular_type')
         )

documente_shared/domain/enums.py CHANGED Viewed

@@ -1,36 +1,36 @@
-from documente_shared.domain.base_enum import BaseEnum
-class DocumentProcessingStatus(BaseEnum):
-    PENDING = 'PENDING'
-    ENQUEUED = 'ENQUEUED'
-    PROCESSING = 'PROCESSING'
-    COMPLETED = 'COMPLETED'
-    FAILED = 'FAILED'
-    DELETED = 'DELETED'
-    CANCELLED = 'CANCELLED'
-class DocumentProcessingCategory(BaseEnum):
-    CIRCULAR = 'CIRCULAR'
-    DESGRAVAMEN = 'DESGRAVAMEN'
-    @property
-    def is_circular(self):
-        return self == DocumentProcessingCategory.CIRCULAR
-    @property
-    def is_desgravamen(self):
-        return self == DocumentProcessingCategory.DESGRAVAMEN
-class DocumentProcessingSubCategory(BaseEnum):
-    # Circulares
-    CC_COMBINADA = 'CC_COMBINADA'
-    CC_NORMATIVA = 'CC_NORMATIVA'
-    CC_INFORMATIVA = 'CC_INFORMATIVA'
-    CC_RETENCION_SUSPENSION_REMISION = 'CC_RETENCION_SUSPENSION_REMISION'
-    # Desgravamenes
-    DS_CREDISEGURO = 'DS_CREDISEGURO'
+from documente_shared.domain.base_enum import BaseEnum
+class DocumentProcessingStatus(BaseEnum):
+    PENDING = 'PENDING'
+    ENQUEUED = 'ENQUEUED'
+    PROCESSING = 'PROCESSING'
+    COMPLETED = 'COMPLETED'
+    FAILED = 'FAILED'
+    DELETED = 'DELETED'
+    CANCELLED = 'CANCELLED'
+    IN_REVIEW = 'IN_REVIEW'
+class DocumentProcessingCategory(BaseEnum):
+    CIRCULAR = 'CIRCULAR'
+    DESGRAVAMEN = 'DESGRAVAMEN'
+    @property
+    def is_circular(self):
+        return self == DocumentProcessingCategory.CIRCULAR
+    @property
+    def is_desgravamen(self):
+        return self == DocumentProcessingCategory.DESGRAVAMEN
+class DocumentProcessingSubCategory(BaseEnum):
+    # Circulares
+    CC_COMBINADA = 'CC_COMBINADA'
+    CC_NORMATIVA = 'CC_NORMATIVA'
+    CC_INFORMATIVA = 'CC_INFORMATIVA'
+    CC_RETENCION_SUSPENSION_REMISION = 'CC_RETENCION_SUSPENSION_REMISION'
+    # Desgravamenes
+    DS_CREDISEGURO = 'DS_CREDISEGURO'

documente_shared/domain/repositories.py CHANGED Viewed

@@ -1,25 +1,25 @@
-from abc import ABC, abstractmethod
-from typing import Optional, List
-from documente_shared.domain.entities.document import DocumentProcessing
-from documente_shared.domain.enums import DocumentProcessingStatus
-class DocumentProcessingRepository(ABC):
-    @abstractmethod
-    def find(self, digest: str) ->Optional[DocumentProcessing]:
-        raise NotImplementedError
-    @abstractmethod
-    def persist(self, instance: DocumentProcessing) -> DocumentProcessing:
-        raise NotImplementedError
-    @abstractmethod
-    def remove(self, instance: DocumentProcessing):
-        raise NotImplementedError
-    @abstractmethod
-    def filter(self, statuses: List[DocumentProcessingStatus]) -> List[DocumentProcessing]:
+from abc import ABC, abstractmethod
+from typing import Optional, List
+from documente_shared.domain.entities.document import DocumentProcessing
+from documente_shared.domain.enums import DocumentProcessingStatus
+class DocumentProcessingRepository(ABC):
+    @abstractmethod
+    def find(self, digest: str) ->Optional[DocumentProcessing]:
+        raise NotImplementedError
+    @abstractmethod
+    def persist(self, instance: DocumentProcessing) -> DocumentProcessing:
+        raise NotImplementedError
+    @abstractmethod
+    def remove(self, instance: DocumentProcessing):
+        raise NotImplementedError
+    @abstractmethod
+    def filter(self, statuses: List[DocumentProcessingStatus]) -> List[DocumentProcessing]:
         raise NotImplementedError

documente_shared/infrastructure/__init__.py CHANGED Viewed

File without changes

documente_shared/infrastructure/dynamo_repositories.py CHANGED Viewed

@@ -1,43 +1,43 @@
-from typing import Optional, List
-from boto3.dynamodb.conditions import Key
-from documente_shared.domain.entities.document import DocumentProcessing
-from documente_shared.domain.enums import DocumentProcessingStatus, DocumentProcessingCategory
-from documente_shared.domain.repositories import DocumentProcessingRepository
-from documente_shared.infrastructure.dynamo_table import DynamoDBTable
-class DynamoDocumentProcessingRepository(
-    DynamoDBTable,
-    DocumentProcessingRepository,
-):
-    def find(self, digest: str) -> Optional[DocumentProcessing]:
-        item = self.get(key={'digest': digest})
-        if item:
-            return DocumentProcessing.from_dict(item)
-        return None
-    def persist(self, instance: DocumentProcessing) -> DocumentProcessing:
-        self.put(instance.to_simple_dict)
-        return instance
-    def remove(self, instance: DocumentProcessing):
-        self.delete(key={'digest': instance.digest})
-    def filter(self, statuses: List[DocumentProcessingStatus]) -> List[DocumentProcessing]:
-        items = []
-        for status in statuses:
-            response = self._table.query(
-                IndexName='status',
-                KeyConditionExpression=Key('status').eq(status.value),
-            )
-            status_items = response.get('Items', [])
-            items.extend(status_items)
-        return [
-            DocumentProcessing.from_dict(item)
-            for item in items
-        ]
+from typing import Optional, List
+from boto3.dynamodb.conditions import Key
+from documente_shared.domain.entities.document import DocumentProcessing
+from documente_shared.domain.enums import DocumentProcessingStatus, DocumentProcessingCategory
+from documente_shared.domain.repositories import DocumentProcessingRepository
+from documente_shared.infrastructure.dynamo_table import DynamoDBTable
+class DynamoDocumentProcessingRepository(
+    DynamoDBTable,
+    DocumentProcessingRepository,
+):
+    def find(self, digest: str) -> Optional[DocumentProcessing]:
+        item = self.get(key={'digest': digest})
+        if item:
+            return DocumentProcessing.from_dict(item)
+        return None
+    def persist(self, instance: DocumentProcessing) -> DocumentProcessing:
+        self.put(instance.to_simple_dict)
+        return instance
+    def remove(self, instance: DocumentProcessing):
+        self.delete(key={'digest': instance.digest})
+    def filter(self, statuses: List[DocumentProcessingStatus]) -> List[DocumentProcessing]:
+        items = []
+        for status in statuses:
+            response = self._table.query(
+                IndexName='status',
+                KeyConditionExpression=Key('status').eq(status.value),
+            )
+            status_items = response.get('Items', [])
+            items.extend(status_items)
+        return [
+            DocumentProcessing.from_dict(item)
+            for item in items
+        ]

documente_shared/infrastructure/dynamo_table.py CHANGED Viewed

@@ -1,75 +1,75 @@
-from dataclasses import dataclass
-import boto3
-from boto3.dynamodb.conditions import Key
-RETURN_VALUES = 'UPDATED_NEW'
-@dataclass
-class DynamoDBTable(object):
-    table_name: str
-    def __post_init__(self):
-        self._table = boto3.resource('dynamodb').Table(self.table_name)
-    def get(self, key: dict):
-        return self._table.get_item(Key=key).get('Item')
-    def get_all(self):
-        return self._table.scan().get('Items')
-    def upsert(self, key, attributes):
-        return self.put({**key, **attributes})
-    def filter_by(self, attribute: str, target_value: str):
-        return self._table.query(
-            FilterExpression=Key(attribute).eq(target_value),
-        ).get('Items')
-    def put(self, attributes: dict, condition: dict = None):
-        extra_args = {}
-        if condition:
-            extra_args['ConditionExpression'] = condition
-        return self._table.put_item(Item=attributes, **extra_args)
-    def update(self, key: str, attributes: dict):
-        return self._table.update_item(
-            Key=key,
-            UpdateExpression=self._update_expression(attributes),
-            ExpressionAttributeNames=self._expression_attribute_names(attributes),
-            ExpressionAttributeValues=self._expression_attribute_values(attributes),
-            ReturnValues=RETURN_VALUES,
-        )
-    def delete(self, key: dict):
-        return self._table.delete_item(Key=key)
-    def count(self) -> int:
-        return self._table.item_count
-    @classmethod
-    def _update_expression(cls, attributes):
-        return 'SET {param}'.format(
-            param=','.join(
-                '#{key}=:{key}'.format(
-                    key=key,
-                )
-                for key in attributes
-            ),
-        )
-    @classmethod
-    def _expression_attribute_names(cls, attributes):
-        return {
-            '#{key}'.format(key=key): key for key in attributes
-        }
-    @classmethod
-    def _expression_attribute_values(cls, attributes):
-        return {
-            ':{key}'.format(key=key): attr for key, attr in attributes.items()
-        }
+from dataclasses import dataclass
+import boto3
+from boto3.dynamodb.conditions import Key
+RETURN_VALUES = 'UPDATED_NEW'
+@dataclass
+class DynamoDBTable(object):
+    table_name: str
+    def __post_init__(self):
+        self._table = boto3.resource('dynamodb').Table(self.table_name)
+    def get(self, key: dict):
+        return self._table.get_item(Key=key).get('Item')
+    def get_all(self):
+        return self._table.scan().get('Items')
+    def upsert(self, key, attributes):
+        return self.put({**key, **attributes})
+    def filter_by(self, attribute: str, target_value: str):
+        return self._table.query(
+            FilterExpression=Key(attribute).eq(target_value),
+        ).get('Items')
+    def put(self, attributes: dict, condition: dict = None):
+        extra_args = {}
+        if condition:
+            extra_args['ConditionExpression'] = condition
+        return self._table.put_item(Item=attributes, **extra_args)
+    def update(self, key: str, attributes: dict):
+        return self._table.update_item(
+            Key=key,
+            UpdateExpression=self._update_expression(attributes),
+            ExpressionAttributeNames=self._expression_attribute_names(attributes),
+            ExpressionAttributeValues=self._expression_attribute_values(attributes),
+            ReturnValues=RETURN_VALUES,
+        )
+    def delete(self, key: dict):
+        return self._table.delete_item(Key=key)
+    def count(self) -> int:
+        return self._table.item_count
+    @classmethod
+    def _update_expression(cls, attributes):
+        return 'SET {param}'.format(
+            param=','.join(
+                '#{key}=:{key}'.format(
+                    key=key,
+                )
+                for key in attributes
+            ),
+        )
+    @classmethod
+    def _expression_attribute_names(cls, attributes):
+        return {
+            '#{key}'.format(key=key): key for key in attributes
+        }
+    @classmethod
+    def _expression_attribute_values(cls, attributes):
+        return {
+            ':{key}'.format(key=key): attr for key, attr in attributes.items()
+        }

documente_shared/infrastructure/s3_bucket.py CHANGED Viewed

@@ -1,57 +1,57 @@
-import boto3
-from dataclasses import dataclass
-from typing import Optional
-from documente_shared.domain.entities.document import remove_slash_from_path
-def remove_none_values(data: dict) -> dict:  # noqa: WPS110
-    return {key: value for key, value in data.items() if value is not None}  # noqa: WPS110
-@dataclass
-class S3Bucket(object):
-    bucket_name: str
-    def __post_init__(self):
-        self._resource = boto3.resource('s3')
-    def get(self, file_key: str) -> Optional[dict]:
-        try:
-            return self._resource.Object(self.bucket_name, file_key).get()
-        except self._resource.meta.client.exceptions.NoSuchKey:
-            return None
-    def get_bytes(self, file_key: str) -> Optional[bytes]:
-        cleaned_file_key = remove_slash_from_path(file_key)
-        file_context = self.get(cleaned_file_key)
-        if not file_context:
-            return None
-        return (
-            file_context['Body'].read()
-            if 'Body' in file_context
-            else None
-        )
-    def upload(self, file_key: str, file_content, content_type: Optional[str] = None):
-        cleaned_file_key = remove_slash_from_path(file_key)
-        optional_params = {'ContentType': content_type}
-        return self._resource.Object(self.bucket_name, cleaned_file_key).put(
-            Body=file_content,
-            **remove_none_values(optional_params),
-        )
-    def delete(self, file_key: str):
-        cleaned_file_key = remove_slash_from_path(file_key)
-        return self._resource.Object(self.bucket_name, cleaned_file_key).delete()
-    def get_url(self, file_key: str):
-        cleaned_file_key = remove_slash_from_path(file_key)
-        return 'https://{bucket_url}.s3.amazonaws.com/{file_key}'.format(
-            bucket_url=self.bucket_name,
-            file_key=cleaned_file_key,
-        )
-    def read(self, file_key: str) -> bytes:
-        return self.get(file_key)['Body'].read()
+import boto3
+from dataclasses import dataclass
+from typing import Optional
+from documente_shared.domain.entities.document import remove_slash_from_path
+def remove_none_values(data: dict) -> dict:  # noqa: WPS110
+    return {key: value for key, value in data.items() if value is not None}  # noqa: WPS110
+@dataclass
+class S3Bucket(object):
+    bucket_name: str
+    def __post_init__(self):
+        self._resource = boto3.resource('s3')
+    def get(self, file_key: str) -> Optional[dict]:
+        try:
+            return self._resource.Object(self.bucket_name, file_key).get()
+        except self._resource.meta.client.exceptions.NoSuchKey:
+            return None
+    def get_bytes(self, file_key: str) -> Optional[bytes]:
+        cleaned_file_key = remove_slash_from_path(file_key)
+        file_context = self.get(cleaned_file_key)
+        if not file_context:
+            return None
+        return (
+            file_context['Body'].read()
+            if 'Body' in file_context
+            else None
+        )
+    def upload(self, file_key: str, file_content, content_type: Optional[str] = None):
+        cleaned_file_key = remove_slash_from_path(file_key)
+        optional_params = {'ContentType': content_type}
+        return self._resource.Object(self.bucket_name, cleaned_file_key).put(
+            Body=file_content,
+            **remove_none_values(optional_params),
+        )
+    def delete(self, file_key: str):
+        cleaned_file_key = remove_slash_from_path(file_key)
+        return self._resource.Object(self.bucket_name, cleaned_file_key).delete()
+    def get_url(self, file_key: str):
+        cleaned_file_key = remove_slash_from_path(file_key)
+        return 'https://{bucket_url}.s3.amazonaws.com/{file_key}'.format(
+            bucket_url=self.bucket_name,
+            file_key=cleaned_file_key,
+        )
+    def read(self, file_key: str) -> bytes:
+        return self.get(file_key)['Body'].read()

documente_shared 0.1.51__py3-none-any.whl → 0.1.52__py3-none-any.whl

Potentially problematic release.

documente_shared 0.1.51py3-none-any.whl → 0.1.52py3-none-any.whl