arkindex-base-worker 0.4.0__tar.gz → 0.4.0a1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/PKG-INFO +10 -12
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_base_worker.egg-info/PKG-INFO +10 -12
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_base_worker.egg-info/SOURCES.txt +4 -14
- arkindex_base_worker-0.4.0a1/arkindex_base_worker.egg-info/requires.txt +16 -0
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/cache.py +1 -1
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/image.py +1 -120
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/utils.py +0 -82
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/__init__.py +161 -46
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/base.py +11 -36
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/classification.py +18 -34
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/corpus.py +4 -21
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/dataset.py +1 -71
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/element.py +91 -352
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/entity.py +11 -11
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/metadata.py +9 -19
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/task.py +4 -5
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/training.py +18 -21
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/transcription.py +68 -89
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/version.py +1 -3
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/pyproject.toml +11 -12
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/__init__.py +1 -1
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/conftest.py +45 -33
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_base_worker.py +3 -204
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_dataset_worker.py +4 -7
- arkindex_base_worker-0.4.0/tests/test_elements_worker/test_classification.py → arkindex_base_worker-0.4.0a1/tests/test_elements_worker/test_classifications.py +61 -194
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_elements_worker/test_corpus.py +1 -32
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_elements_worker/test_dataset.py +1 -1
- arkindex_base_worker-0.4.0a1/tests/test_elements_worker/test_elements.py +2734 -0
- arkindex_base_worker-0.4.0/tests/test_elements_worker/test_entity_create.py → arkindex_base_worker-0.4.0a1/tests/test_elements_worker/test_entities.py +160 -26
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_elements_worker/test_image.py +1 -2
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_elements_worker/test_metadata.py +99 -224
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_elements_worker/test_task.py +1 -1
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_elements_worker/test_training.py +43 -17
- arkindex_base_worker-0.4.0a1/tests/test_elements_worker/test_transcriptions.py +2102 -0
- arkindex_base_worker-0.4.0a1/tests/test_elements_worker/test_worker.py +514 -0
- arkindex_base_worker-0.4.0a1/tests/test_image.py +584 -0
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_merge.py +2 -1
- arkindex_base_worker-0.4.0a1/tests/test_utils.py +57 -0
- arkindex_base_worker-0.4.0/arkindex_base_worker.egg-info/requires.txt +0 -17
- arkindex_base_worker-0.4.0/arkindex_worker/worker/process.py +0 -92
- arkindex_base_worker-0.4.0/tests/test_elements_worker/test_element.py +0 -427
- arkindex_base_worker-0.4.0/tests/test_elements_worker/test_element_create_multiple.py +0 -715
- arkindex_base_worker-0.4.0/tests/test_elements_worker/test_element_create_single.py +0 -528
- arkindex_base_worker-0.4.0/tests/test_elements_worker/test_element_list_children.py +0 -969
- arkindex_base_worker-0.4.0/tests/test_elements_worker/test_element_list_parents.py +0 -530
- arkindex_base_worker-0.4.0/tests/test_elements_worker/test_entity_list_and_check.py +0 -160
- arkindex_base_worker-0.4.0/tests/test_elements_worker/test_process.py +0 -89
- arkindex_base_worker-0.4.0/tests/test_elements_worker/test_transcription_create.py +0 -873
- arkindex_base_worker-0.4.0/tests/test_elements_worker/test_transcription_create_with_elements.py +0 -951
- arkindex_base_worker-0.4.0/tests/test_elements_worker/test_transcription_list.py +0 -450
- arkindex_base_worker-0.4.0/tests/test_elements_worker/test_version.py +0 -60
- arkindex_base_worker-0.4.0/tests/test_elements_worker/test_worker.py +0 -797
- arkindex_base_worker-0.4.0/tests/test_image.py +0 -809
- arkindex_base_worker-0.4.0/tests/test_utils.py +0 -120
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/LICENSE +0 -0
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/README.md +0 -0
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_base_worker.egg-info/dependency_links.txt +0 -0
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_base_worker.egg-info/top_level.txt +0 -0
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/__init__.py +0 -0
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/models.py +0 -0
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/image.py +0 -0
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/hooks/pre_gen_project.py +0 -0
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/setup.cfg +0 -0
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_cache.py +0 -0
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_element.py +0 -0
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_elements_worker/__init__.py +0 -0
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_elements_worker/test_cli.py +0 -0
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/worker-demo/tests/__init__.py +0 -0
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/worker-demo/tests/conftest.py +0 -0
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/worker-demo/tests/test_worker.py +0 -0
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/worker-demo/worker_demo/__init__.py +0 -0
- {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/worker-demo/worker_demo/worker.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: arkindex-base-worker
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.0a1
|
|
4
4
|
Summary: Base Worker to easily build Arkindex ML workflows
|
|
5
5
|
Author-email: Teklia <contact@teklia.com>
|
|
6
6
|
Maintainer-email: Teklia <contact@teklia.com>
|
|
@@ -37,23 +37,21 @@ Classifier: License :: OSI Approved :: MIT License
|
|
|
37
37
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
38
38
|
Classifier: Programming Language :: Python :: 3.10
|
|
39
39
|
Classifier: Programming Language :: Python :: 3.11
|
|
40
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
41
40
|
Requires-Python: >=3.10
|
|
42
41
|
Description-Content-Type: text/markdown
|
|
43
42
|
License-File: LICENSE
|
|
44
|
-
Requires-Dist: humanize==4.11.0
|
|
45
43
|
Requires-Dist: peewee~=3.17
|
|
46
|
-
Requires-Dist: Pillow==
|
|
47
|
-
Requires-Dist: python-gnupg==0.5.
|
|
48
|
-
Requires-Dist: shapely==2.0.
|
|
49
|
-
Requires-Dist: teklia-toolbox==0.1.
|
|
50
|
-
Requires-Dist: zstandard==0.
|
|
44
|
+
Requires-Dist: Pillow==10.3.0
|
|
45
|
+
Requires-Dist: python-gnupg==0.5.2
|
|
46
|
+
Requires-Dist: shapely==2.0.3
|
|
47
|
+
Requires-Dist: teklia-toolbox==0.1.5
|
|
48
|
+
Requires-Dist: zstandard==0.22.0
|
|
51
49
|
Provides-Extra: docs
|
|
52
|
-
Requires-Dist: black==24.
|
|
53
|
-
Requires-Dist: mkdocs-material==9.5.
|
|
54
|
-
Requires-Dist: mkdocstrings-python==1.
|
|
50
|
+
Requires-Dist: black==24.4.0; extra == "docs"
|
|
51
|
+
Requires-Dist: mkdocs-material==9.5.17; extra == "docs"
|
|
52
|
+
Requires-Dist: mkdocstrings-python==1.9.2; extra == "docs"
|
|
55
53
|
Provides-Extra: tests
|
|
56
|
-
Requires-Dist: pytest==8.
|
|
54
|
+
Requires-Dist: pytest==8.1.1; extra == "tests"
|
|
57
55
|
Requires-Dist: pytest-mock==3.14.0; extra == "tests"
|
|
58
56
|
Requires-Dist: pytest-responses==0.5.1; extra == "tests"
|
|
59
57
|
|
{arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_base_worker.egg-info/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: arkindex-base-worker
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.0a1
|
|
4
4
|
Summary: Base Worker to easily build Arkindex ML workflows
|
|
5
5
|
Author-email: Teklia <contact@teklia.com>
|
|
6
6
|
Maintainer-email: Teklia <contact@teklia.com>
|
|
@@ -37,23 +37,21 @@ Classifier: License :: OSI Approved :: MIT License
|
|
|
37
37
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
38
38
|
Classifier: Programming Language :: Python :: 3.10
|
|
39
39
|
Classifier: Programming Language :: Python :: 3.11
|
|
40
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
41
40
|
Requires-Python: >=3.10
|
|
42
41
|
Description-Content-Type: text/markdown
|
|
43
42
|
License-File: LICENSE
|
|
44
|
-
Requires-Dist: humanize==4.11.0
|
|
45
43
|
Requires-Dist: peewee~=3.17
|
|
46
|
-
Requires-Dist: Pillow==
|
|
47
|
-
Requires-Dist: python-gnupg==0.5.
|
|
48
|
-
Requires-Dist: shapely==2.0.
|
|
49
|
-
Requires-Dist: teklia-toolbox==0.1.
|
|
50
|
-
Requires-Dist: zstandard==0.
|
|
44
|
+
Requires-Dist: Pillow==10.3.0
|
|
45
|
+
Requires-Dist: python-gnupg==0.5.2
|
|
46
|
+
Requires-Dist: shapely==2.0.3
|
|
47
|
+
Requires-Dist: teklia-toolbox==0.1.5
|
|
48
|
+
Requires-Dist: zstandard==0.22.0
|
|
51
49
|
Provides-Extra: docs
|
|
52
|
-
Requires-Dist: black==24.
|
|
53
|
-
Requires-Dist: mkdocs-material==9.5.
|
|
54
|
-
Requires-Dist: mkdocstrings-python==1.
|
|
50
|
+
Requires-Dist: black==24.4.0; extra == "docs"
|
|
51
|
+
Requires-Dist: mkdocs-material==9.5.17; extra == "docs"
|
|
52
|
+
Requires-Dist: mkdocstrings-python==1.9.2; extra == "docs"
|
|
55
53
|
Provides-Extra: tests
|
|
56
|
-
Requires-Dist: pytest==8.
|
|
54
|
+
Requires-Dist: pytest==8.1.1; extra == "tests"
|
|
57
55
|
Requires-Dist: pytest-mock==3.14.0; extra == "tests"
|
|
58
56
|
Requires-Dist: pytest-responses==0.5.1; extra == "tests"
|
|
59
57
|
|
|
@@ -20,7 +20,6 @@ arkindex_worker/worker/element.py
|
|
|
20
20
|
arkindex_worker/worker/entity.py
|
|
21
21
|
arkindex_worker/worker/image.py
|
|
22
22
|
arkindex_worker/worker/metadata.py
|
|
23
|
-
arkindex_worker/worker/process.py
|
|
24
23
|
arkindex_worker/worker/task.py
|
|
25
24
|
arkindex_worker/worker/training.py
|
|
26
25
|
arkindex_worker/worker/transcription.py
|
|
@@ -36,26 +35,17 @@ tests/test_image.py
|
|
|
36
35
|
tests/test_merge.py
|
|
37
36
|
tests/test_utils.py
|
|
38
37
|
tests/test_elements_worker/__init__.py
|
|
39
|
-
tests/test_elements_worker/
|
|
38
|
+
tests/test_elements_worker/test_classifications.py
|
|
40
39
|
tests/test_elements_worker/test_cli.py
|
|
41
40
|
tests/test_elements_worker/test_corpus.py
|
|
42
41
|
tests/test_elements_worker/test_dataset.py
|
|
43
|
-
tests/test_elements_worker/
|
|
44
|
-
tests/test_elements_worker/
|
|
45
|
-
tests/test_elements_worker/test_element_create_single.py
|
|
46
|
-
tests/test_elements_worker/test_element_list_children.py
|
|
47
|
-
tests/test_elements_worker/test_element_list_parents.py
|
|
48
|
-
tests/test_elements_worker/test_entity_create.py
|
|
49
|
-
tests/test_elements_worker/test_entity_list_and_check.py
|
|
42
|
+
tests/test_elements_worker/test_elements.py
|
|
43
|
+
tests/test_elements_worker/test_entities.py
|
|
50
44
|
tests/test_elements_worker/test_image.py
|
|
51
45
|
tests/test_elements_worker/test_metadata.py
|
|
52
|
-
tests/test_elements_worker/test_process.py
|
|
53
46
|
tests/test_elements_worker/test_task.py
|
|
54
47
|
tests/test_elements_worker/test_training.py
|
|
55
|
-
tests/test_elements_worker/
|
|
56
|
-
tests/test_elements_worker/test_transcription_create_with_elements.py
|
|
57
|
-
tests/test_elements_worker/test_transcription_list.py
|
|
58
|
-
tests/test_elements_worker/test_version.py
|
|
48
|
+
tests/test_elements_worker/test_transcriptions.py
|
|
59
49
|
tests/test_elements_worker/test_worker.py
|
|
60
50
|
worker-demo/tests/__init__.py
|
|
61
51
|
worker-demo/tests/conftest.py
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
peewee~=3.17
|
|
2
|
+
Pillow==10.3.0
|
|
3
|
+
python-gnupg==0.5.2
|
|
4
|
+
shapely==2.0.3
|
|
5
|
+
teklia-toolbox==0.1.5
|
|
6
|
+
zstandard==0.22.0
|
|
7
|
+
|
|
8
|
+
[docs]
|
|
9
|
+
black==24.4.0
|
|
10
|
+
mkdocs-material==9.5.17
|
|
11
|
+
mkdocstrings-python==1.9.2
|
|
12
|
+
|
|
13
|
+
[tests]
|
|
14
|
+
pytest==8.1.1
|
|
15
|
+
pytest-mock==3.14.0
|
|
16
|
+
pytest-responses==0.5.1
|
|
@@ -380,7 +380,7 @@ def unsupported_cache(func):
|
|
|
380
380
|
def wrapper(self, *args, **kwargs):
|
|
381
381
|
results = func(self, *args, **kwargs)
|
|
382
382
|
|
|
383
|
-
if self.use_cache:
|
|
383
|
+
if not (self.is_read_only or self.use_cache):
|
|
384
384
|
logger.warning(
|
|
385
385
|
f"This API helper `{func.__name__}` did not update the cache database"
|
|
386
386
|
)
|
|
@@ -2,18 +2,13 @@
|
|
|
2
2
|
Helper methods to download and open IIIF images, and manage polygons.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
import functools
|
|
6
|
-
import os
|
|
7
5
|
import re
|
|
8
|
-
import tempfile
|
|
9
6
|
from collections import namedtuple
|
|
10
|
-
from collections.abc import Generator, Iterator
|
|
11
7
|
from io import BytesIO
|
|
12
8
|
from math import ceil
|
|
13
9
|
from pathlib import Path
|
|
14
10
|
from typing import TYPE_CHECKING
|
|
15
11
|
|
|
16
|
-
import humanize
|
|
17
12
|
import requests
|
|
18
13
|
from PIL import Image
|
|
19
14
|
from shapely.affinity import rotate, scale, translate
|
|
@@ -26,7 +21,6 @@ from tenacity import (
|
|
|
26
21
|
)
|
|
27
22
|
|
|
28
23
|
from arkindex_worker import logger
|
|
29
|
-
from arkindex_worker.utils import pluralize
|
|
30
24
|
from teklia_toolbox.requests import should_verify_cert
|
|
31
25
|
|
|
32
26
|
# Avoid circular imports error when type checking
|
|
@@ -45,57 +39,8 @@ IIIF_URL = re.compile(r"\w+:\/{2}.+\/.+\/.+\/.+\/(?P<size>.+)\/!?\d+\/\w+\.\w+")
|
|
|
45
39
|
IIIF_FULL = "full"
|
|
46
40
|
# Maximum size available
|
|
47
41
|
IIIF_MAX = "max"
|
|
48
|
-
# Ratio to resize image
|
|
49
|
-
IMAGE_RATIO = [1, 0.9, 0.85, 0.80, 0.75, 0.70, 0.60, 0.50, 0.40, 0.30]
|
|
50
42
|
|
|
51
43
|
|
|
52
|
-
def update_pillow_image_size_limit(func):
|
|
53
|
-
"""
|
|
54
|
-
Update Pillow Image size limit
|
|
55
|
-
"""
|
|
56
|
-
|
|
57
|
-
@functools.wraps(func)
|
|
58
|
-
def wrapper(
|
|
59
|
-
*args,
|
|
60
|
-
max_image_pixels: str | int | None = os.getenv("ARKINDEX_MAX_IMAGE_PIXELS"),
|
|
61
|
-
**kwargs,
|
|
62
|
-
):
|
|
63
|
-
"""
|
|
64
|
-
Wrapper to update Pillow Image size limit and restore it at the end of the function.
|
|
65
|
-
|
|
66
|
-
:param *args: Positional arguments passed to the function.
|
|
67
|
-
:param max_image_pixels: Pillow Image size limit to use.
|
|
68
|
-
:param **kwargs: Keyword arguments passed to the function.
|
|
69
|
-
"""
|
|
70
|
-
MAX_IMAGE_PIXELS = Image.MAX_IMAGE_PIXELS
|
|
71
|
-
|
|
72
|
-
# Override Pillow Image size limit
|
|
73
|
-
if max_image_pixels is not None:
|
|
74
|
-
max_image_pixels = int(max_image_pixels)
|
|
75
|
-
# Override Pillow limit for detecting decompression bombs, disabled if set to 0
|
|
76
|
-
if max_image_pixels == 0:
|
|
77
|
-
logger.warning(
|
|
78
|
-
"Pillow Image size limit is completely disabled, make sure you trust the image source."
|
|
79
|
-
)
|
|
80
|
-
Image.MAX_IMAGE_PIXELS = None
|
|
81
|
-
else:
|
|
82
|
-
Image.MAX_IMAGE_PIXELS = max_image_pixels
|
|
83
|
-
|
|
84
|
-
try:
|
|
85
|
-
results = func(*args, **kwargs)
|
|
86
|
-
except:
|
|
87
|
-
# Restore initial Pillow Image size limit
|
|
88
|
-
Image.MAX_IMAGE_PIXELS = MAX_IMAGE_PIXELS
|
|
89
|
-
raise
|
|
90
|
-
|
|
91
|
-
# Restore initial Pillow Image size limit
|
|
92
|
-
Image.MAX_IMAGE_PIXELS = MAX_IMAGE_PIXELS
|
|
93
|
-
return results
|
|
94
|
-
|
|
95
|
-
return wrapper
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
@update_pillow_image_size_limit
|
|
99
44
|
def open_image(
|
|
100
45
|
path: str,
|
|
101
46
|
mode: str | None = "RGB",
|
|
@@ -203,70 +148,6 @@ def upload_image(image: Image, url: str) -> requests.Response:
|
|
|
203
148
|
return resp
|
|
204
149
|
|
|
205
150
|
|
|
206
|
-
def resized_images(
|
|
207
|
-
*args,
|
|
208
|
-
element: "Element",
|
|
209
|
-
max_pixels: int | None = None,
|
|
210
|
-
max_bytes: int | None = None,
|
|
211
|
-
**kwargs,
|
|
212
|
-
) -> Iterator[Generator[tempfile.NamedTemporaryFile, None, None]]:
|
|
213
|
-
"""
|
|
214
|
-
Build resized images according to the pixel and byte limits.
|
|
215
|
-
|
|
216
|
-
:param *args: Positional arguments passed to [arkindex_worker.models.Element.open_image_tempfile][].
|
|
217
|
-
:param element: Element whose image needs to be resized.
|
|
218
|
-
:param max_pixels: Maximum pixel size of the resized images.
|
|
219
|
-
:param max_bytes: Maximum byte size of the resized images.
|
|
220
|
-
:param **kwargs: Keyword arguments passed to [arkindex_worker.models.Element.open_image_tempfile][].
|
|
221
|
-
:returns: An iterator of the temporary file of the resized image.
|
|
222
|
-
"""
|
|
223
|
-
_, _, element_width, element_height = polygon_bounding_box(element.polygon)
|
|
224
|
-
|
|
225
|
-
logger.info(f"This element's image sizes are ({element_width} x {element_height}).")
|
|
226
|
-
if max_pixels and max(element_width, element_height) > max_pixels:
|
|
227
|
-
logger.warning(
|
|
228
|
-
f"Maximum image input size supported is ({max_pixels} x {max_pixels})."
|
|
229
|
-
)
|
|
230
|
-
logger.warning("The image will be resized.")
|
|
231
|
-
|
|
232
|
-
element_pixel, param = (
|
|
233
|
-
(element_width, "max_width")
|
|
234
|
-
if element_width > element_height
|
|
235
|
-
else (element_height, "max_height")
|
|
236
|
-
)
|
|
237
|
-
|
|
238
|
-
for resized_pixel in sorted(
|
|
239
|
-
set(
|
|
240
|
-
min(round(ratio * element_pixel), max_pixels or element_pixel)
|
|
241
|
-
for ratio in IMAGE_RATIO
|
|
242
|
-
),
|
|
243
|
-
reverse=True,
|
|
244
|
-
):
|
|
245
|
-
with element.open_image_tempfile(
|
|
246
|
-
*args, **{**kwargs, param: resized_pixel}
|
|
247
|
-
) as image:
|
|
248
|
-
pillow_image = Image.open(image)
|
|
249
|
-
if (
|
|
250
|
-
pillow_image.width != element_width
|
|
251
|
-
or pillow_image.height != element_height
|
|
252
|
-
):
|
|
253
|
-
logger.warning(
|
|
254
|
-
f"The image was resized to ({pillow_image.width} x {pillow_image.height})."
|
|
255
|
-
)
|
|
256
|
-
|
|
257
|
-
# The image is still too large
|
|
258
|
-
image_size = Path(image.name).stat().st_size
|
|
259
|
-
if max_bytes and image_size > max_bytes:
|
|
260
|
-
logger.warning(f"The image size is {humanize.naturalsize(image_size)}.")
|
|
261
|
-
logger.warning(
|
|
262
|
-
f"Maximum image input size supported is {humanize.naturalsize(max_bytes)}."
|
|
263
|
-
)
|
|
264
|
-
logger.warning("The image will be resized.")
|
|
265
|
-
continue
|
|
266
|
-
|
|
267
|
-
yield image
|
|
268
|
-
|
|
269
|
-
|
|
270
151
|
def polygon_bounding_box(polygon: list[list[int | float]]) -> BoundingBox:
|
|
271
152
|
"""
|
|
272
153
|
Compute the rectangle bounding box of a polygon.
|
|
@@ -283,7 +164,7 @@ def polygon_bounding_box(polygon: list[list[int | float]]) -> BoundingBox:
|
|
|
283
164
|
def _retry_log(retry_state, *args, **kwargs):
|
|
284
165
|
logger.warning(
|
|
285
166
|
f"Request to {retry_state.args[0]} failed ({repr(retry_state.outcome.exception())}), "
|
|
286
|
-
f
|
|
167
|
+
f"retrying in {retry_state.idle_for} seconds"
|
|
287
168
|
)
|
|
288
169
|
|
|
289
170
|
|
|
@@ -1,41 +1,14 @@
|
|
|
1
1
|
import hashlib
|
|
2
|
-
import inspect
|
|
3
2
|
import logging
|
|
4
3
|
import os
|
|
5
4
|
import tarfile
|
|
6
5
|
import tempfile
|
|
7
|
-
from collections.abc import Callable, Generator
|
|
8
|
-
from itertools import islice
|
|
9
6
|
from pathlib import Path
|
|
10
|
-
from typing import Any
|
|
11
7
|
|
|
12
8
|
import zstandard as zstd
|
|
13
9
|
|
|
14
10
|
logger = logging.getLogger(__name__)
|
|
15
11
|
|
|
16
|
-
|
|
17
|
-
def pluralize(singular: str, count: int) -> str:
|
|
18
|
-
"""Pluralize a noun, if necessary, using simplified rules of English pluralization and a list of exceptions.
|
|
19
|
-
|
|
20
|
-
:param str singular: A singular noun describing an object
|
|
21
|
-
:param int count: The object count, to determine whether to pluralize or not
|
|
22
|
-
:return str: The noun in its singular or plural form
|
|
23
|
-
"""
|
|
24
|
-
if count == 1:
|
|
25
|
-
return singular
|
|
26
|
-
|
|
27
|
-
some_exceptions = {
|
|
28
|
-
"child": "children",
|
|
29
|
-
"class": "classes",
|
|
30
|
-
"entity": "entities",
|
|
31
|
-
"metadata": "metadata",
|
|
32
|
-
}
|
|
33
|
-
if singular in some_exceptions:
|
|
34
|
-
return some_exceptions[singular]
|
|
35
|
-
|
|
36
|
-
return singular + "s"
|
|
37
|
-
|
|
38
|
-
|
|
39
12
|
MANUAL_SOURCE = "manual"
|
|
40
13
|
|
|
41
14
|
|
|
@@ -223,58 +196,3 @@ def create_tar_zst_archive(
|
|
|
223
196
|
close_delete_file(tar_fd, tar_archive)
|
|
224
197
|
|
|
225
198
|
return zst_fd, zst_archive, zst_hash, tar_hash
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
DEFAULT_BATCH_SIZE = 50
|
|
229
|
-
"""Batch size used for bulk publication to Arkindex"""
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
def batch_publication(func: Callable) -> Callable:
|
|
233
|
-
"""
|
|
234
|
-
Decorator for functions that should raise an error when the value passed through the ``batch_size`` parameter is **not** a strictly positive integer.
|
|
235
|
-
|
|
236
|
-
:param func: The function to wrap with the ``batch_size`` check
|
|
237
|
-
:return: The function passing the ``batch_size`` check
|
|
238
|
-
"""
|
|
239
|
-
signature = inspect.signature(func)
|
|
240
|
-
|
|
241
|
-
def wrapper(self, *args, **kwargs):
|
|
242
|
-
bound_func = signature.bind(self, *args, **kwargs)
|
|
243
|
-
bound_func.apply_defaults()
|
|
244
|
-
batch_size = bound_func.arguments.get("batch_size")
|
|
245
|
-
assert (
|
|
246
|
-
batch_size is not None and isinstance(batch_size, int) and batch_size > 0
|
|
247
|
-
), "batch_size shouldn't be null and should be a strictly positive integer"
|
|
248
|
-
|
|
249
|
-
return func(self, *args, **kwargs)
|
|
250
|
-
|
|
251
|
-
wrapper.__name__ = func.__name__
|
|
252
|
-
return wrapper
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
def make_batches(
|
|
256
|
-
objects: list, singular_name: str, batch_size: int
|
|
257
|
-
) -> Generator[list[Any]]:
|
|
258
|
-
"""Split an object list in successive batches of maximum size ``batch_size``.
|
|
259
|
-
|
|
260
|
-
:param objects: The object list to divide in batches of ``batch_size`` size
|
|
261
|
-
:param singular_name: The singular form of the noun associated with the object list
|
|
262
|
-
:param batch_size: The maximum size of each batch to split the object list
|
|
263
|
-
:return: A generator of successive batches containing ``batch_size`` items from ``objects``
|
|
264
|
-
"""
|
|
265
|
-
count = len(objects)
|
|
266
|
-
logger.info(
|
|
267
|
-
f"Creating batches of size {batch_size} to process {count} {pluralize(singular_name, count)}"
|
|
268
|
-
)
|
|
269
|
-
|
|
270
|
-
index = 1
|
|
271
|
-
iterator = iter(objects)
|
|
272
|
-
while batch := list(islice(iterator, batch_size)):
|
|
273
|
-
count = len(batch)
|
|
274
|
-
logger.info(
|
|
275
|
-
f"Processing batch {index} containing {count} {pluralize(singular_name, count)}..."
|
|
276
|
-
)
|
|
277
|
-
|
|
278
|
-
yield batch
|
|
279
|
-
|
|
280
|
-
index += 1
|