documente_shared 0.1.2__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of documente_shared might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: documente_shared
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: Shared utilities for Documente AI projects
5
5
  License: MIT
6
6
  Author: Tech
@@ -1,5 +1,6 @@
1
1
  from dataclasses import dataclass
2
2
  from datetime import datetime
3
+ from decimal import Decimal
3
4
  from typing import Optional
4
5
 
5
6
  from documente_shared.domain.enums import DocumentProcessStatus
@@ -12,7 +13,7 @@ class DocumentProcess(object):
12
13
  file_path: str
13
14
  processed_csv_path: str
14
15
  processed_xlsx_path: str
15
- processing_time: Optional[float] = None
16
+ processing_time: Optional[Decimal] = None
16
17
  enqueued_at: Optional[datetime] = None
17
18
  started_at: Optional[datetime] = None
18
19
  failed_at: Optional[datetime] = None
@@ -72,7 +73,10 @@ class DocumentProcess(object):
72
73
  'file_path': self.file_path,
73
74
  'processed_csv_path': self.processed_csv_path,
74
75
  'processed_xlsx_path': self.processed_xlsx_path,
75
- 'processing_time': self.processing_time,
76
+ 'processing_time': (
77
+ str(self.processing_time)
78
+ if self.processing_time else None
79
+ ),
76
80
  'enqueued_at': self.enqueued_at.isoformat() if self.enqueued_at else None,
77
81
  'started_at': self.started_at.isoformat() if self.started_at else None,
78
82
  'failed_at': self.failed_at.isoformat() if self.failed_at else None,
@@ -88,7 +92,7 @@ class DocumentProcess(object):
88
92
  processed_csv_path=data.get('processed_csv_path'),
89
93
  processed_xlsx_path=data.get('processed_xlsx_path'),
90
94
  processing_time=(
91
- data.get('processing_time')
95
+ Decimal(data.get('processing_time'))
92
96
  if data.get('processing_time') else None
93
97
  ),
94
98
  enqueued_at=(
@@ -10,5 +10,5 @@ class DocumentProcessRepository(ABC):
10
10
  raise NotImplementedError
11
11
 
12
12
  @abstractmethod
13
- def delete(self, instance: DocumentProcess):
13
+ def remove(self, instance: DocumentProcess):
14
14
  raise NotImplementedError
@@ -0,0 +1,16 @@
1
+ from documente_shared.domain.entities import DocumentProcess
2
+ from documente_shared.domain.repositories import DocumentProcessRepository
3
+ from documente_shared.infrastructure.dynamo_table import DynamoDBTable
4
+
5
+
6
+ class DynamoDocumentProcessRepository(
7
+ DynamoDBTable,
8
+ DocumentProcessRepository,
9
+ ):
10
+
11
+ def persist(self, instance: DocumentProcess) -> DocumentProcess:
12
+ self.put(instance.to_dict)
13
+ return instance
14
+
15
+ def remove(self, instance: DocumentProcess):
16
+ self.delete(key={'digest': instance.digest})
@@ -0,0 +1,75 @@
1
+ from dataclasses import dataclass
2
+
3
+ import boto3
4
+ from boto3.dynamodb.conditions import Key
5
+
6
+
7
+ RETURN_VALUES = 'UPDATED_NEW'
8
+
9
+ @dataclass
10
+ class DynamoDBTable(object):
11
+ table_name: str
12
+
13
+ def __post_init__(self):
14
+ self._table = boto3.resource('dynamodb').Table(self.table_name)
15
+
16
+ def get(self, key: str):
17
+ return self._table.get_item(Key=key).get('Item')
18
+
19
+ def get_all(self):
20
+ return self._table.scan().get('Items')
21
+
22
+ def upsert(self, key, attributes):
23
+ return self.put({**key, **attributes})
24
+
25
+
26
+ def filter_by(self, attribute, target_value):
27
+ return self._table(
28
+ FilterExpression=Key(attribute).eq(target_value),
29
+ ).get('Items')
30
+
31
+ def put(self, attributes: dict, condition: dict = None):
32
+ extra_args = {}
33
+ if condition:
34
+ extra_args['ConditionExpression'] = condition
35
+ return self._table.put_item(Item=attributes, **extra_args)
36
+
37
+
38
+ def update(self, key: str, attributes: dict):
39
+ return self._table.update_item(
40
+ Key=key,
41
+ UpdateExpression=self._update_expression(attributes),
42
+ ExpressionAttributeNames=self._expression_attribute_names(attributes),
43
+ ExpressionAttributeValues=self._expression_attribute_values(attributes),
44
+ ReturnValues=RETURN_VALUES,
45
+ )
46
+
47
+ def delete(self, key: dict):
48
+ return self._table.delete_item(Key=key)
49
+
50
+ def count(self) -> int:
51
+ return self._table.item_count
52
+
53
+
54
+ @classmethod
55
+ def _update_expression(cls, attributes):
56
+ return 'SET {param}'.format(
57
+ param=','.join(
58
+ '#{key}=:{key}'.format(
59
+ key=key,
60
+ )
61
+ for key in attributes
62
+ ),
63
+ )
64
+
65
+ @classmethod
66
+ def _expression_attribute_names(cls, attributes):
67
+ return {
68
+ '#{key}'.format(key=key): key for key in attributes
69
+ }
70
+
71
+ @classmethod
72
+ def _expression_attribute_values(cls, attributes):
73
+ return {
74
+ ':{key}'.format(key=key): attr for key, attr in attributes.items()
75
+ }
@@ -0,0 +1,51 @@
1
+ import boto3
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Optional
5
+
6
+
7
+ def remove_none_values(data: dict) -> dict: # noqa: WPS110
8
+ return {key: value for key, value in data.items() if value is not None} # noqa: WPS110
9
+
10
+
11
+ @dataclass
12
+ class S3Bucket(object):
13
+ bucket_name: str
14
+
15
+ def __post_init__(self):
16
+ self._resource = boto3.resource('s3')
17
+
18
+ def get(self, file_name: str) -> Optional[dict]:
19
+ try:
20
+ return self._resource.Object(self.bucket_name, file_name).get()
21
+ except self._resource.meta.client.exceptions.NoSuchKey:
22
+ return None
23
+
24
+ def get_bytes(self, file_name: str) -> Optional[dict]:
25
+ file_context = self.get(file_name)
26
+ if not file_context:
27
+ return None
28
+ return (
29
+ file_context['Body'].read()
30
+ if 'Body' in file_context
31
+ else None
32
+ )
33
+
34
+ def upload(self, file_name, file_content, content_type: Optional[str] = None):
35
+ optional_params = {'ContentType': content_type}
36
+ return self._resource.Object(self.bucket_name, file_name).put(
37
+ Body=file_content,
38
+ **remove_none_values(optional_params),
39
+ )
40
+
41
+ def delete(self, file_name):
42
+ return self._resource.Object(self.bucket_name, file_name).delete()
43
+
44
+ def get_url(self, file_name):
45
+ return 'https://{bucket_url}.s3.amazonaws.com/{file_name}'.format(
46
+ bucket_url=self.bucket_name,
47
+ file_name=file_name,
48
+ )
49
+
50
+ def read(self, file_name):
51
+ return self.get(file_name)['Body'].read()
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "documente_shared"
3
- version = "0.1.2"
3
+ version = "0.1.4"
4
4
  description = "Shared utilities for Documente AI projects"
5
5
  authors = ["Tech <tech@llamitai.com>"]
6
6
  license = "MIT"
@@ -12,4 +12,7 @@ boto3 = "^1.34.102"
12
12
  botocore = "^1.34.102"
13
13
 
14
14
  [tool.poetry.dev-dependencies]
15
- pytest = "^6.0"
15
+ pytest = "^6.0"
16
+ [tool.poetry.group.package.dependencies]
17
+ ipdb = "^0.13.13"
18
+
@@ -1,10 +0,0 @@
1
- from documente_shared.domain.entities import DocumentProcess
2
- from documente_shared.domain.repositories import DocumentProcessRepository
3
-
4
-
5
- class DynamoDocumentProcessRepository(DocumentProcessRepository):
6
- def persist(self, instance: DocumentProcess) -> DocumentProcess:
7
- pass
8
-
9
- def delete(self, instance: DocumentProcess):
10
- pass