rara-tools 0.0.13__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rara-tools might be problematic. Click here for more details.
- rara_tools/constants/digitizer.py +12 -0
- rara_tools/constants/general.py +1 -0
- rara_tools/exceptions.py +4 -0
- rara_tools/s3.py +53 -4
- {rara_tools-0.0.13.dist-info → rara_tools-0.1.0.dist-info}/METADATA +3 -2
- rara_tools-0.1.0.dist-info/RECORD +16 -0
- {rara_tools-0.0.13.dist-info → rara_tools-0.1.0.dist-info}/WHEEL +1 -1
- rara_tools-0.0.13.dist-info/RECORD +0 -16
- {rara_tools-0.0.13.dist-info → rara_tools-0.1.0.dist-info/licenses}/LICENSE.md +0 -0
- {rara_tools-0.0.13.dist-info → rara_tools-0.1.0.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,12 @@
|
|
|
1
|
+
COMPONENT_KEY = "digitizer"
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class ModelTypes:
|
|
5
|
+
IMAGE_PROCESSOR = "image_processor"
|
|
6
|
+
|
|
7
|
+
|
|
1
8
|
class StatusKeys:
|
|
9
|
+
DOWNLOAD_MODELS = "digitizer_download_models"
|
|
2
10
|
CLEAN_UP = "digitizer_clean_up"
|
|
3
11
|
ELASTICSEARCH_UPLOAD = "digitizer_elasticsearch_upload"
|
|
4
12
|
UPLOAD = "s3_upload"
|
|
@@ -11,3 +19,7 @@ class Queue:
|
|
|
11
19
|
DOWNLOAD = "download"
|
|
12
20
|
FINISH = "finish"
|
|
13
21
|
OCR = "ocr"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Tasks:
|
|
25
|
+
MODEL_UPDATE = "component_model_update"
|
rara_tools/constants/general.py
CHANGED
rara_tools/exceptions.py
CHANGED
|
@@ -7,6 +7,10 @@ class S3InitException(Exception):
|
|
|
7
7
|
class S3ConnectionException(Exception):
|
|
8
8
|
"""Raised S3 Bucket/Connection Error."""
|
|
9
9
|
|
|
10
|
+
class S3DownloadException(Exception):
|
|
11
|
+
"""Raised S3 Download Error."""
|
|
12
|
+
|
|
13
|
+
|
|
10
14
|
class ElasticsearchException(Exception):
|
|
11
15
|
"""Raised Elasticsearch Error."""
|
|
12
16
|
|
rara_tools/s3.py
CHANGED
|
@@ -1,11 +1,20 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
import os
|
|
3
|
+
import pathlib
|
|
4
|
+
import time
|
|
2
5
|
import uuid
|
|
3
6
|
from typing import Any, Generator, List, Optional
|
|
4
7
|
|
|
5
|
-
from minio import Minio
|
|
8
|
+
from minio import Minio, S3Error
|
|
6
9
|
|
|
7
|
-
from .exceptions import (
|
|
8
|
-
|
|
10
|
+
from .exceptions import (
|
|
11
|
+
S3ConnectionException,
|
|
12
|
+
S3InitException,
|
|
13
|
+
S3InputException,
|
|
14
|
+
S3DownloadException
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger("tools.s3")
|
|
9
18
|
|
|
10
19
|
|
|
11
20
|
class S3Files:
|
|
@@ -76,9 +85,49 @@ class S3Files:
|
|
|
76
85
|
list_of_objects = list(self.minio_client.list_objects(self.bucket, prefix=path, recursive=True))
|
|
77
86
|
for minio_object in list_of_objects:
|
|
78
87
|
full_path = os.path.join(download_dir, minio_object.object_name)
|
|
79
|
-
self.
|
|
88
|
+
self._download_file(minio_object.object_name, full_path)
|
|
80
89
|
yield full_path
|
|
81
90
|
|
|
91
|
+
def _download_file(self, path, download_dir=".", max_retries=3) -> str:
|
|
92
|
+
"""Download a single file with retry and resume support."""
|
|
93
|
+
attempts = 0
|
|
94
|
+
|
|
95
|
+
while attempts < max_retries:
|
|
96
|
+
try:
|
|
97
|
+
stat = self.minio_client.stat_object(self.bucket, path)
|
|
98
|
+
file_size = stat.size
|
|
99
|
+
temp_path = download_dir + ".part"
|
|
100
|
+
pathlib.Path(temp_path).parent.mkdir(parents=True, exist_ok=True)
|
|
101
|
+
|
|
102
|
+
# Check if a partial file exists
|
|
103
|
+
downloaded_size = os.path.getsize(temp_path) if os.path.exists(temp_path) else 0
|
|
104
|
+
|
|
105
|
+
if downloaded_size >= file_size:
|
|
106
|
+
os.rename(temp_path, download_dir) # Rename to final filename
|
|
107
|
+
logger.info(f"Completed: {path}")
|
|
108
|
+
return str(pathlib.Path(download_dir) / path)
|
|
109
|
+
|
|
110
|
+
logger.info(f"Downloading {path} ({downloaded_size}/{file_size} bytes)...")
|
|
111
|
+
|
|
112
|
+
# Open file in append mode to resume download
|
|
113
|
+
with open(temp_path, "ab") as f:
|
|
114
|
+
response = self.minio_client.get_object(self.bucket, path, offset=downloaded_size)
|
|
115
|
+
for data in response.stream(32 * 1024): # 32KB chunks
|
|
116
|
+
f.write(data)
|
|
117
|
+
response.close()
|
|
118
|
+
response.release_conn()
|
|
119
|
+
|
|
120
|
+
os.rename(temp_path, download_dir) # Rename temp to final
|
|
121
|
+
logger.info(f"Downloaded: {path}")
|
|
122
|
+
return str(pathlib.Path(download_dir) / path)
|
|
123
|
+
|
|
124
|
+
except S3Error as e:
|
|
125
|
+
logger.info(f"Error downloading {path}, attempt {attempts + 1}: {e}")
|
|
126
|
+
attempts += 1
|
|
127
|
+
time.sleep(2 ** attempts) # Exponential backoff
|
|
128
|
+
|
|
129
|
+
raise S3DownloadException(f"Failed to download {path} after {max_retries} attempts.")
|
|
130
|
+
|
|
82
131
|
def upload(self, path: str, prefix: Optional[str] = "") -> str:
|
|
83
132
|
"""Uploads file or folder to S3 bucket.
|
|
84
133
|
:param: path str: Path to the file to upload in local file system.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: rara-tools
|
|
3
|
-
Version: 0.0
|
|
3
|
+
Version: 0.1.0
|
|
4
4
|
Summary: Tools to support Kata's work.
|
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
|
6
6
|
Classifier: Programming Language :: Python :: 3.10
|
|
@@ -18,6 +18,7 @@ Requires-Dist: iso639-lang
|
|
|
18
18
|
Provides-Extra: testing
|
|
19
19
|
Requires-Dist: pytest>=8.0; extra == "testing"
|
|
20
20
|
Requires-Dist: pytest-order; extra == "testing"
|
|
21
|
+
Dynamic: license-file
|
|
21
22
|
|
|
22
23
|
# RaRa Tools
|
|
23
24
|
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
rara_tools/converters.py,sha256=O769zNjde1VCfEUF2VU_49IAbm8NT-cG-VR0uPxixtE,2687
|
|
2
|
+
rara_tools/decorators.py,sha256=MjOyvZ5nTkwxwx2JLFEGpKKBysvecFw6EN6UDrSvZLU,2187
|
|
3
|
+
rara_tools/digar_schema_converter.py,sha256=k95U2iRlEA3sh772-v6snhHW6fju6qSTMnvWJ6DpzZk,14254
|
|
4
|
+
rara_tools/elastic.py,sha256=MgPHxZ3UbSTIL8_sT9gU5V4PLKJjo3aQ8CGyhXjRz6M,13065
|
|
5
|
+
rara_tools/exceptions.py,sha256=YQyaueUbXeTkJYFDEuN6iWTXMI3eCv5l7PxGp87vg5I,550
|
|
6
|
+
rara_tools/s3.py,sha256=9ziDXsLjBtFAvsjTPxFddhfvkpA8773rzPJqO7y1N5Q,6415
|
|
7
|
+
rara_tools/task_reporter.py,sha256=WCcZts9dAUokPc4vbrG3-lNAFLnWaMgE3b3iaUB7mr8,3256
|
|
8
|
+
rara_tools/utils.py,sha256=9vSbmuWYU5ydr4lXBKlUKa0xzDccFsaJv4T-XwgUfuY,2578
|
|
9
|
+
rara_tools/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
+
rara_tools/constants/digitizer.py,sha256=MND0dUQySBAOVWzuUBxQGZWv_Ckdz2jCp25F2_oHGi8,496
|
|
11
|
+
rara_tools/constants/general.py,sha256=aVUQTMss89atAkTDZKJXNdnsBHPX-RSrlBOtt-wdPGU,195
|
|
12
|
+
rara_tools-0.1.0.dist-info/licenses/LICENSE.md,sha256=hkZVnIZll7e_KNEQzeY94Y9tlzVL8iVZBTMBvDykksU,35142
|
|
13
|
+
rara_tools-0.1.0.dist-info/METADATA,sha256=TM89xbTBJiyz3ZNXbYzLwYi84ChYLDN9Lyo2XZ-posE,3916
|
|
14
|
+
rara_tools-0.1.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
15
|
+
rara_tools-0.1.0.dist-info/top_level.txt,sha256=JwfB5b8BAtW5OFKRln2AQ_WElTRyIBM4nO0FKN1cupY,11
|
|
16
|
+
rara_tools-0.1.0.dist-info/RECORD,,
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
rara_tools/converters.py,sha256=O769zNjde1VCfEUF2VU_49IAbm8NT-cG-VR0uPxixtE,2687
|
|
2
|
-
rara_tools/decorators.py,sha256=MjOyvZ5nTkwxwx2JLFEGpKKBysvecFw6EN6UDrSvZLU,2187
|
|
3
|
-
rara_tools/digar_schema_converter.py,sha256=k95U2iRlEA3sh772-v6snhHW6fju6qSTMnvWJ6DpzZk,14254
|
|
4
|
-
rara_tools/elastic.py,sha256=MgPHxZ3UbSTIL8_sT9gU5V4PLKJjo3aQ8CGyhXjRz6M,13065
|
|
5
|
-
rara_tools/exceptions.py,sha256=BwNh4qWxau_ylr9RqZoYwd1KnExI6oWWWDno3jkh8q4,474
|
|
6
|
-
rara_tools/s3.py,sha256=uNDu2HzMYHAWh33RcHeyPFK7gdQfQPxsdfohyIKezEY,4467
|
|
7
|
-
rara_tools/task_reporter.py,sha256=WCcZts9dAUokPc4vbrG3-lNAFLnWaMgE3b3iaUB7mr8,3256
|
|
8
|
-
rara_tools/utils.py,sha256=9vSbmuWYU5ydr4lXBKlUKa0xzDccFsaJv4T-XwgUfuY,2578
|
|
9
|
-
rara_tools/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
-
rara_tools/constants/digitizer.py,sha256=gJ3jOMwuZfKcLqgOAxTyB266VYsskLabJiMUiSz3xX4,297
|
|
11
|
-
rara_tools/constants/general.py,sha256=E9Jaw-YxocS_tOZw9QBoxO3e9KK5EMbLoM0R7D4Iflw,171
|
|
12
|
-
rara_tools-0.0.13.dist-info/LICENSE.md,sha256=hkZVnIZll7e_KNEQzeY94Y9tlzVL8iVZBTMBvDykksU,35142
|
|
13
|
-
rara_tools-0.0.13.dist-info/METADATA,sha256=0Aipkuodi_CzCTUMkVqKOI__n5mN2r8hEGJ49-MjpMo,3895
|
|
14
|
-
rara_tools-0.0.13.dist-info/WHEEL,sha256=nn6H5-ilmfVryoAQl3ZQ2l8SH5imPWFpm1A5FgEuFV4,91
|
|
15
|
-
rara_tools-0.0.13.dist-info/top_level.txt,sha256=JwfB5b8BAtW5OFKRln2AQ_WElTRyIBM4nO0FKN1cupY,11
|
|
16
|
-
rara_tools-0.0.13.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|