arkindex-base-worker 0.3.6rc5__tar.gz → 0.3.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/PKG-INFO +14 -13
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/arkindex_base_worker.egg-info/PKG-INFO +14 -13
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/arkindex_base_worker.egg-info/SOURCES.txt +7 -5
- arkindex_base_worker-0.3.7/arkindex_base_worker.egg-info/requires.txt +19 -0
- arkindex_base_worker-0.3.7/arkindex_base_worker.egg-info/top_level.txt +6 -0
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/arkindex_worker/cache.py +14 -0
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/arkindex_worker/image.py +29 -19
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/arkindex_worker/models.py +14 -2
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/arkindex_worker/utils.py +17 -3
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/arkindex_worker/worker/__init__.py +122 -125
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/arkindex_worker/worker/base.py +24 -24
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/arkindex_worker/worker/classification.py +18 -25
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/arkindex_worker/worker/dataset.py +24 -18
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/arkindex_worker/worker/element.py +45 -6
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/arkindex_worker/worker/entity.py +35 -4
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/arkindex_worker/worker/metadata.py +21 -11
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/arkindex_worker/worker/training.py +13 -0
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/arkindex_worker/worker/transcription.py +45 -5
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/arkindex_worker/worker/version.py +22 -0
- arkindex_base_worker-0.3.7/hooks/pre_gen_project.py +3 -0
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/pyproject.toml +35 -8
- arkindex_base_worker-0.3.7/setup.cfg +4 -0
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/tests/conftest.py +14 -6
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/tests/test_base_worker.py +0 -6
- arkindex_base_worker-0.3.7/tests/test_dataset_worker.py +728 -0
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/tests/test_elements_worker/test_classifications.py +365 -539
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/tests/test_elements_worker/test_cli.py +1 -1
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/tests/test_elements_worker/test_dataset.py +97 -116
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/tests/test_elements_worker/test_elements.py +227 -61
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/tests/test_elements_worker/test_entities.py +22 -2
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/tests/test_elements_worker/test_metadata.py +53 -27
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/tests/test_elements_worker/test_training.py +35 -0
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/tests/test_elements_worker/test_transcriptions.py +149 -16
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/tests/test_elements_worker/test_worker.py +19 -6
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/tests/test_image.py +37 -0
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/tests/test_utils.py +23 -1
- arkindex_base_worker-0.3.7/worker-demo/tests/__init__.py +0 -0
- arkindex_base_worker-0.3.7/worker-demo/tests/conftest.py +32 -0
- arkindex_base_worker-0.3.7/worker-demo/tests/test_worker.py +12 -0
- arkindex_base_worker-0.3.7/worker-demo/worker_demo/__init__.py +6 -0
- arkindex_base_worker-0.3.7/worker-demo/worker_demo/worker.py +19 -0
- arkindex-base-worker-0.3.6rc5/arkindex_base_worker.egg-info/requires.txt +0 -17
- arkindex-base-worker-0.3.6rc5/arkindex_base_worker.egg-info/top_level.txt +0 -2
- arkindex-base-worker-0.3.6rc5/docs-requirements.txt +0 -7
- arkindex-base-worker-0.3.6rc5/requirements.txt +0 -8
- arkindex-base-worker-0.3.6rc5/setup.cfg +0 -8
- arkindex-base-worker-0.3.6rc5/setup.py +0 -4
- arkindex-base-worker-0.3.6rc5/tests/test_dataset_worker.py +0 -846
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/LICENSE +0 -0
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/README.md +0 -0
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/arkindex_base_worker.egg-info/dependency_links.txt +0 -0
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/arkindex_worker/__init__.py +0 -0
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/arkindex_worker/worker/task.py +0 -0
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/tests/__init__.py +0 -0
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/tests/test_cache.py +0 -0
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/tests/test_element.py +0 -0
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/tests/test_elements_worker/__init__.py +0 -0
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/tests/test_elements_worker/test_task.py +0 -0
- {arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/tests/test_merge.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: arkindex-base-worker
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.7
|
|
4
4
|
Summary: Base Worker to easily build Arkindex ML workflows
|
|
5
5
|
Author-email: Teklia <contact@teklia.com>
|
|
6
6
|
Maintainer-email: Teklia <contact@teklia.com>
|
|
@@ -41,22 +41,23 @@ Classifier: Topic :: Text Processing :: Linguistic
|
|
|
41
41
|
Requires-Python: >=3.10
|
|
42
42
|
Description-Content-Type: text/markdown
|
|
43
43
|
License-File: LICENSE
|
|
44
|
-
Requires-Dist:
|
|
45
|
-
Requires-Dist:
|
|
46
|
-
Requires-Dist:
|
|
47
|
-
Requires-Dist:
|
|
48
|
-
Requires-Dist:
|
|
49
|
-
Requires-Dist:
|
|
50
|
-
Requires-Dist: tenacity==8.2.3
|
|
44
|
+
Requires-Dist: peewee==3.17.1
|
|
45
|
+
Requires-Dist: Pillow==10.3.0
|
|
46
|
+
Requires-Dist: pymdown-extensions==10.7.1
|
|
47
|
+
Requires-Dist: python-gnupg==0.5.2
|
|
48
|
+
Requires-Dist: shapely==2.0.3
|
|
49
|
+
Requires-Dist: teklia-toolbox==0.1.4
|
|
51
50
|
Requires-Dist: zstandard==0.22.0
|
|
52
51
|
Provides-Extra: docs
|
|
53
|
-
Requires-Dist: black==
|
|
52
|
+
Requires-Dist: black==24.4.0; extra == "docs"
|
|
54
53
|
Requires-Dist: doc8==1.1.1; extra == "docs"
|
|
55
|
-
Requires-Dist: mkdocs==
|
|
56
|
-
Requires-Dist:
|
|
57
|
-
Requires-Dist: mkdocstrings==0.23.0; extra == "docs"
|
|
58
|
-
Requires-Dist: mkdocstrings-python==1.7.3; extra == "docs"
|
|
54
|
+
Requires-Dist: mkdocs-material==9.5.17; extra == "docs"
|
|
55
|
+
Requires-Dist: mkdocstrings-python==1.9.2; extra == "docs"
|
|
59
56
|
Requires-Dist: recommonmark==0.7.1; extra == "docs"
|
|
57
|
+
Provides-Extra: tests
|
|
58
|
+
Requires-Dist: pytest==8.1.1; extra == "tests"
|
|
59
|
+
Requires-Dist: pytest-mock==3.14.0; extra == "tests"
|
|
60
|
+
Requires-Dist: pytest-responses==0.5.1; extra == "tests"
|
|
60
61
|
|
|
61
62
|
# Arkindex base Worker
|
|
62
63
|
|
{arkindex-base-worker-0.3.6rc5 → arkindex_base_worker-0.3.7}/arkindex_base_worker.egg-info/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: arkindex-base-worker
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.7
|
|
4
4
|
Summary: Base Worker to easily build Arkindex ML workflows
|
|
5
5
|
Author-email: Teklia <contact@teklia.com>
|
|
6
6
|
Maintainer-email: Teklia <contact@teklia.com>
|
|
@@ -41,22 +41,23 @@ Classifier: Topic :: Text Processing :: Linguistic
|
|
|
41
41
|
Requires-Python: >=3.10
|
|
42
42
|
Description-Content-Type: text/markdown
|
|
43
43
|
License-File: LICENSE
|
|
44
|
-
Requires-Dist:
|
|
45
|
-
Requires-Dist:
|
|
46
|
-
Requires-Dist:
|
|
47
|
-
Requires-Dist:
|
|
48
|
-
Requires-Dist:
|
|
49
|
-
Requires-Dist:
|
|
50
|
-
Requires-Dist: tenacity==8.2.3
|
|
44
|
+
Requires-Dist: peewee==3.17.1
|
|
45
|
+
Requires-Dist: Pillow==10.3.0
|
|
46
|
+
Requires-Dist: pymdown-extensions==10.7.1
|
|
47
|
+
Requires-Dist: python-gnupg==0.5.2
|
|
48
|
+
Requires-Dist: shapely==2.0.3
|
|
49
|
+
Requires-Dist: teklia-toolbox==0.1.4
|
|
51
50
|
Requires-Dist: zstandard==0.22.0
|
|
52
51
|
Provides-Extra: docs
|
|
53
|
-
Requires-Dist: black==
|
|
52
|
+
Requires-Dist: black==24.4.0; extra == "docs"
|
|
54
53
|
Requires-Dist: doc8==1.1.1; extra == "docs"
|
|
55
|
-
Requires-Dist: mkdocs==
|
|
56
|
-
Requires-Dist:
|
|
57
|
-
Requires-Dist: mkdocstrings==0.23.0; extra == "docs"
|
|
58
|
-
Requires-Dist: mkdocstrings-python==1.7.3; extra == "docs"
|
|
54
|
+
Requires-Dist: mkdocs-material==9.5.17; extra == "docs"
|
|
55
|
+
Requires-Dist: mkdocstrings-python==1.9.2; extra == "docs"
|
|
59
56
|
Requires-Dist: recommonmark==0.7.1; extra == "docs"
|
|
57
|
+
Provides-Extra: tests
|
|
58
|
+
Requires-Dist: pytest==8.1.1; extra == "tests"
|
|
59
|
+
Requires-Dist: pytest-mock==3.14.0; extra == "tests"
|
|
60
|
+
Requires-Dist: pytest-responses==0.5.1; extra == "tests"
|
|
60
61
|
|
|
61
62
|
# Arkindex base Worker
|
|
62
63
|
|
|
@@ -1,10 +1,6 @@
|
|
|
1
1
|
LICENSE
|
|
2
2
|
README.md
|
|
3
|
-
docs-requirements.txt
|
|
4
3
|
pyproject.toml
|
|
5
|
-
requirements.txt
|
|
6
|
-
setup.cfg
|
|
7
|
-
setup.py
|
|
8
4
|
arkindex_base_worker.egg-info/PKG-INFO
|
|
9
5
|
arkindex_base_worker.egg-info/SOURCES.txt
|
|
10
6
|
arkindex_base_worker.egg-info/dependency_links.txt
|
|
@@ -26,6 +22,7 @@ arkindex_worker/worker/task.py
|
|
|
26
22
|
arkindex_worker/worker/training.py
|
|
27
23
|
arkindex_worker/worker/transcription.py
|
|
28
24
|
arkindex_worker/worker/version.py
|
|
25
|
+
hooks/pre_gen_project.py
|
|
29
26
|
tests/__init__.py
|
|
30
27
|
tests/conftest.py
|
|
31
28
|
tests/test_base_worker.py
|
|
@@ -45,4 +42,9 @@ tests/test_elements_worker/test_metadata.py
|
|
|
45
42
|
tests/test_elements_worker/test_task.py
|
|
46
43
|
tests/test_elements_worker/test_training.py
|
|
47
44
|
tests/test_elements_worker/test_transcriptions.py
|
|
48
|
-
tests/test_elements_worker/test_worker.py
|
|
45
|
+
tests/test_elements_worker/test_worker.py
|
|
46
|
+
worker-demo/tests/__init__.py
|
|
47
|
+
worker-demo/tests/conftest.py
|
|
48
|
+
worker-demo/tests/test_worker.py
|
|
49
|
+
worker-demo/worker_demo/__init__.py
|
|
50
|
+
worker-demo/worker_demo/worker.py
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
peewee==3.17.1
|
|
2
|
+
Pillow==10.3.0
|
|
3
|
+
pymdown-extensions==10.7.1
|
|
4
|
+
python-gnupg==0.5.2
|
|
5
|
+
shapely==2.0.3
|
|
6
|
+
teklia-toolbox==0.1.4
|
|
7
|
+
zstandard==0.22.0
|
|
8
|
+
|
|
9
|
+
[docs]
|
|
10
|
+
black==24.4.0
|
|
11
|
+
doc8==1.1.1
|
|
12
|
+
mkdocs-material==9.5.17
|
|
13
|
+
mkdocstrings-python==1.9.2
|
|
14
|
+
recommonmark==0.7.1
|
|
15
|
+
|
|
16
|
+
[tests]
|
|
17
|
+
pytest==8.1.1
|
|
18
|
+
pytest-mock==3.14.0
|
|
19
|
+
pytest-responses==0.5.1
|
|
@@ -374,3 +374,17 @@ def merge_parents_cache(paths: list, current_database: Path):
|
|
|
374
374
|
for statement in statements:
|
|
375
375
|
cursor.execute(statement)
|
|
376
376
|
connection.commit()
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def unsupported_cache(func):
|
|
380
|
+
def wrapper(self, *args, **kwargs):
|
|
381
|
+
results = func(self, *args, **kwargs)
|
|
382
|
+
|
|
383
|
+
if not (self.is_read_only or self.use_cache):
|
|
384
|
+
logger.warning(
|
|
385
|
+
f"This API helper `{func.__name__}` did not update the cache database"
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
return results
|
|
389
|
+
|
|
390
|
+
return wrapper
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Helper methods to download and open IIIF images, and manage polygons.
|
|
3
3
|
"""
|
|
4
|
+
|
|
4
5
|
import re
|
|
5
6
|
from collections import namedtuple
|
|
6
7
|
from io import BytesIO
|
|
@@ -20,6 +21,7 @@ from tenacity import (
|
|
|
20
21
|
)
|
|
21
22
|
|
|
22
23
|
from arkindex_worker import logger
|
|
24
|
+
from teklia_toolbox.requests import should_verify_cert
|
|
23
25
|
|
|
24
26
|
# Avoid circular imports error when type checking
|
|
25
27
|
if TYPE_CHECKING:
|
|
@@ -114,32 +116,38 @@ def download_image(url: str) -> Image:
|
|
|
114
116
|
)
|
|
115
117
|
else:
|
|
116
118
|
raise e
|
|
117
|
-
except requests.exceptions.SSLError:
|
|
118
|
-
logger.warning(
|
|
119
|
-
"An SSLError occurred during image download, retrying with a weaker and unsafe SSL configuration"
|
|
120
|
-
)
|
|
121
|
-
|
|
122
|
-
# Saving current ciphers
|
|
123
|
-
previous_ciphers = requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS
|
|
124
|
-
|
|
125
|
-
# Downgrading ciphers to download the image
|
|
126
|
-
requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS = "ALL:@SECLEVEL=1"
|
|
127
|
-
resp = _retried_request(url)
|
|
128
|
-
|
|
129
|
-
# Restoring previous ciphers
|
|
130
|
-
requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS = previous_ciphers
|
|
131
119
|
|
|
132
120
|
# Preprocess the image and prepare it for classification
|
|
133
121
|
image = Image.open(BytesIO(resp.content))
|
|
134
122
|
logger.info(
|
|
135
|
-
"Downloaded image {} - size={}x{} in {}"
|
|
136
|
-
url, image.size[0], image.size[1], resp.elapsed
|
|
137
|
-
)
|
|
123
|
+
f"Downloaded image {url} - size={image.size[0]}x{image.size[1]} in {resp.elapsed}"
|
|
138
124
|
)
|
|
139
125
|
|
|
140
126
|
return image
|
|
141
127
|
|
|
142
128
|
|
|
129
|
+
def upload_image(image: Image, url: str) -> requests.Response:
|
|
130
|
+
"""
|
|
131
|
+
Upload a Pillow image to a URL.
|
|
132
|
+
|
|
133
|
+
:param image: Pillow image to upload.
|
|
134
|
+
:param url: Destination URL.
|
|
135
|
+
:returns: The upload response.
|
|
136
|
+
"""
|
|
137
|
+
assert url.startswith("http"), "Destination URL for the image must be HTTP(S)"
|
|
138
|
+
|
|
139
|
+
# Retrieve a binarized version of the image
|
|
140
|
+
image_bytes = BytesIO()
|
|
141
|
+
image.save(image_bytes, format="jpeg")
|
|
142
|
+
image_bytes.seek(0)
|
|
143
|
+
|
|
144
|
+
# Upload the image
|
|
145
|
+
resp = _retried_request(url, method=requests.put, data=image_bytes)
|
|
146
|
+
logger.info(f"Uploaded image to {url} in {resp.elapsed}")
|
|
147
|
+
|
|
148
|
+
return resp
|
|
149
|
+
|
|
150
|
+
|
|
143
151
|
def polygon_bounding_box(polygon: list[list[int | float]]) -> BoundingBox:
|
|
144
152
|
"""
|
|
145
153
|
Compute the rectangle bounding box of a polygon.
|
|
@@ -167,8 +175,10 @@ def _retry_log(retry_state, *args, **kwargs):
|
|
|
167
175
|
before_sleep=_retry_log,
|
|
168
176
|
reraise=True,
|
|
169
177
|
)
|
|
170
|
-
def _retried_request(url):
|
|
171
|
-
resp =
|
|
178
|
+
def _retried_request(url, *args, method=requests.get, **kwargs):
|
|
179
|
+
resp = method(
|
|
180
|
+
url, *args, timeout=DOWNLOAD_TIMEOUT, verify=should_verify_cert(url), **kwargs
|
|
181
|
+
)
|
|
172
182
|
resp.raise_for_status()
|
|
173
183
|
return resp
|
|
174
184
|
|
|
@@ -20,6 +20,8 @@ class MagicDict(dict):
|
|
|
20
20
|
Automagically convert lists and dicts to MagicDicts and lists of MagicDicts
|
|
21
21
|
Allows for nested access: foo.bar.baz
|
|
22
22
|
"""
|
|
23
|
+
if isinstance(item, Dataset):
|
|
24
|
+
return item
|
|
23
25
|
if isinstance(item, list):
|
|
24
26
|
return list(map(self._magify, item))
|
|
25
27
|
if isinstance(item, dict):
|
|
@@ -75,10 +77,10 @@ class Element(MagicDict):
|
|
|
75
77
|
|
|
76
78
|
def image_url(self, size: str = "full") -> str | None:
|
|
77
79
|
"""
|
|
78
|
-
Build
|
|
80
|
+
Build a URL to access the image.
|
|
79
81
|
When possible, will return the S3 URL for images, so an ML worker can bypass IIIF servers.
|
|
80
82
|
:param size: Subresolution of the image, following the syntax of the IIIF resize parameter.
|
|
81
|
-
:returns:
|
|
83
|
+
:returns: A URL to the image, or None if the element does not have an image.
|
|
82
84
|
"""
|
|
83
85
|
if not self.get("zone"):
|
|
84
86
|
return
|
|
@@ -272,6 +274,16 @@ class Dataset(ArkindexModel):
|
|
|
272
274
|
return f"{self.id}.tar.zst"
|
|
273
275
|
|
|
274
276
|
|
|
277
|
+
class Set(MagicDict):
|
|
278
|
+
"""
|
|
279
|
+
Describes an Arkindex dataset set.
|
|
280
|
+
"""
|
|
281
|
+
|
|
282
|
+
def __str__(self):
|
|
283
|
+
# Not using ArkindexModel.__str__ as we do not retrieve the Set ID
|
|
284
|
+
return f"{self.__class__.__name__} ({self.name}) from {self.dataset}"
|
|
285
|
+
|
|
286
|
+
|
|
275
287
|
class Artifact(ArkindexModel):
|
|
276
288
|
"""
|
|
277
289
|
Describes an Arkindex artifact.
|
|
@@ -10,6 +10,19 @@ import zstandard as zstd
|
|
|
10
10
|
|
|
11
11
|
logger = logging.getLogger(__name__)
|
|
12
12
|
|
|
13
|
+
MANUAL_SOURCE = "manual"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def parse_source_id(value: str) -> bool | str | None:
|
|
17
|
+
"""
|
|
18
|
+
Parse a UUID argument (Worker Version, Worker Run, ...) to use it directly in the API.
|
|
19
|
+
Arkindex API filters generally expect `False` to filter manual sources.
|
|
20
|
+
"""
|
|
21
|
+
if value == MANUAL_SOURCE:
|
|
22
|
+
return False
|
|
23
|
+
return value or None
|
|
24
|
+
|
|
25
|
+
|
|
13
26
|
CHUNK_SIZE = 1024
|
|
14
27
|
"""Chunk Size used for ZSTD compression"""
|
|
15
28
|
|
|
@@ -31,9 +44,10 @@ def decompress_zst_archive(compressed_archive: Path) -> tuple[int, Path]:
|
|
|
31
44
|
|
|
32
45
|
logger.debug(f"Uncompressing file to {archive_path}")
|
|
33
46
|
try:
|
|
34
|
-
with
|
|
35
|
-
"
|
|
36
|
-
|
|
47
|
+
with (
|
|
48
|
+
compressed_archive.open("rb") as compressed,
|
|
49
|
+
archive_path.open("wb") as decompressed,
|
|
50
|
+
):
|
|
37
51
|
dctx.copy_stream(compressed, decompressed)
|
|
38
52
|
logger.debug(f"Successfully uncompressed archive {compressed_archive}")
|
|
39
53
|
except zstandard.ZstdError as e:
|