arkindex-base-worker 0.4.0rc5__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/PKG-INFO +10 -13
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/README.md +1 -1
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/arkindex_base_worker.egg-info/PKG-INFO +10 -13
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/arkindex_base_worker.egg-info/SOURCES.txt +4 -4
- arkindex_base_worker-0.5.0/arkindex_base_worker.egg-info/requires.txt +12 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/arkindex_base_worker.egg-info/top_level.txt +1 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/arkindex_worker/__init__.py +3 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/arkindex_worker/cache.py +6 -25
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/arkindex_worker/image.py +105 -66
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/arkindex_worker/utils.py +2 -1
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/arkindex_worker/worker/__init__.py +22 -32
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/arkindex_worker/worker/base.py +16 -9
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/arkindex_worker/worker/classification.py +36 -34
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/arkindex_worker/worker/corpus.py +3 -3
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/arkindex_worker/worker/dataset.py +9 -9
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/arkindex_worker/worker/element.py +261 -231
- arkindex_base_worker-0.5.0/arkindex_worker/worker/entity.py +336 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/arkindex_worker/worker/image.py +3 -3
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/arkindex_worker/worker/metadata.py +27 -38
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/arkindex_worker/worker/process.py +24 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/arkindex_worker/worker/task.py +9 -9
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/arkindex_worker/worker/training.py +15 -11
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/arkindex_worker/worker/transcription.py +77 -71
- arkindex_base_worker-0.5.0/examples/standalone/python/worker.py +171 -0
- arkindex_base_worker-0.5.0/examples/tooled/python/worker.py +50 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/pyproject.toml +8 -13
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/conftest.py +22 -36
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/test_base_worker.py +1 -1
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/test_cache.py +1 -2
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/test_dataset_worker.py +1 -1
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_element.py +200 -26
- arkindex_base_worker-0.4.0rc5/tests/test_elements_worker/test_entity_create.py → arkindex_base_worker-0.5.0/tests/test_elements_worker/test_entity.py +220 -227
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_metadata.py +0 -47
- arkindex_base_worker-0.5.0/tests/test_elements_worker/test_process.py +89 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_training.py +8 -8
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_worker.py +61 -14
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/test_image.py +244 -126
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/test_merge.py +0 -7
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/test_utils.py +37 -0
- arkindex_base_worker-0.4.0rc5/arkindex_base_worker.egg-info/requires.txt +0 -17
- arkindex_base_worker-0.4.0rc5/arkindex_worker/worker/entity.py +0 -405
- arkindex_base_worker-0.4.0rc5/arkindex_worker/worker/version.py +0 -58
- arkindex_base_worker-0.4.0rc5/tests/test_elements_worker/test_entity_list_and_check.py +0 -160
- arkindex_base_worker-0.4.0rc5/tests/test_elements_worker/test_version.py +0 -60
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/LICENSE +0 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/arkindex_base_worker.egg-info/dependency_links.txt +0 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/arkindex_worker/models.py +0 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/hooks/pre_gen_project.py +0 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/setup.cfg +0 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/__init__.py +0 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/test_element.py +0 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/__init__.py +0 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_classification.py +0 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_cli.py +0 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_corpus.py +0 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_dataset.py +0 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_element_create_multiple.py +0 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_element_create_single.py +0 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_element_list_children.py +0 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_element_list_parents.py +0 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_image.py +0 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_task.py +0 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_transcription_create.py +0 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_transcription_create_with_elements.py +0 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_transcription_list.py +0 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/worker-demo/tests/__init__.py +0 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/worker-demo/tests/conftest.py +0 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/worker-demo/tests/test_worker.py +0 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/worker-demo/worker_demo/__init__.py +0 -0
- {arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/worker-demo/worker_demo/worker.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: arkindex-base-worker
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Base Worker to easily build Arkindex ML workflows
|
|
5
5
|
Author-email: Teklia <contact@teklia.com>
|
|
6
6
|
Maintainer-email: Teklia <contact@teklia.com>
|
|
@@ -41,21 +41,18 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
41
41
|
Requires-Python: >=3.10
|
|
42
42
|
Description-Content-Type: text/markdown
|
|
43
43
|
License-File: LICENSE
|
|
44
|
-
Requires-Dist: humanize==4.
|
|
44
|
+
Requires-Dist: humanize==4.12.3
|
|
45
45
|
Requires-Dist: peewee~=3.17
|
|
46
|
-
Requires-Dist: Pillow==
|
|
47
|
-
Requires-Dist: python-gnupg==0.5.
|
|
46
|
+
Requires-Dist: Pillow==11.3.0
|
|
47
|
+
Requires-Dist: python-gnupg==0.5.4
|
|
48
48
|
Requires-Dist: shapely==2.0.6
|
|
49
|
-
Requires-Dist: teklia-toolbox==0.1.
|
|
50
|
-
Requires-Dist: zstandard==0.
|
|
51
|
-
Provides-Extra: docs
|
|
52
|
-
Requires-Dist: black==24.4.2; extra == "docs"
|
|
53
|
-
Requires-Dist: mkdocs-material==9.5.33; extra == "docs"
|
|
54
|
-
Requires-Dist: mkdocstrings-python==1.11.1; extra == "docs"
|
|
49
|
+
Requires-Dist: teklia-toolbox==0.1.11
|
|
50
|
+
Requires-Dist: zstandard==0.23.0
|
|
55
51
|
Provides-Extra: tests
|
|
56
|
-
Requires-Dist: pytest==8.3.
|
|
52
|
+
Requires-Dist: pytest==8.3.5; extra == "tests"
|
|
57
53
|
Requires-Dist: pytest-mock==3.14.0; extra == "tests"
|
|
58
54
|
Requires-Dist: pytest-responses==0.5.1; extra == "tests"
|
|
55
|
+
Dynamic: license-file
|
|
59
56
|
|
|
60
57
|
# Arkindex base Worker
|
|
61
58
|
|
|
@@ -65,7 +62,7 @@ This is an open-source project, licensed using [the MIT license](https://opensou
|
|
|
65
62
|
|
|
66
63
|
## Documentation
|
|
67
64
|
|
|
68
|
-
The [documentation](https://workers.arkindex.org/) is made with [
|
|
65
|
+
The [documentation](https://workers.arkindex.org/) is made with [Antora](https://antora.org/) and is hosted by [GitLab Pages](https://docs.gitlab.com/ee/user/project/pages/).
|
|
69
66
|
|
|
70
67
|
## Create a new worker using our template
|
|
71
68
|
|
|
@@ -6,7 +6,7 @@ This is an open-source project, licensed using [the MIT license](https://opensou
|
|
|
6
6
|
|
|
7
7
|
## Documentation
|
|
8
8
|
|
|
9
|
-
The [documentation](https://workers.arkindex.org/) is made with [
|
|
9
|
+
The [documentation](https://workers.arkindex.org/) is made with [Antora](https://antora.org/) and is hosted by [GitLab Pages](https://docs.gitlab.com/ee/user/project/pages/).
|
|
10
10
|
|
|
11
11
|
## Create a new worker using our template
|
|
12
12
|
|
{arkindex_base_worker-0.4.0rc5 → arkindex_base_worker-0.5.0}/arkindex_base_worker.egg-info/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: arkindex-base-worker
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Base Worker to easily build Arkindex ML workflows
|
|
5
5
|
Author-email: Teklia <contact@teklia.com>
|
|
6
6
|
Maintainer-email: Teklia <contact@teklia.com>
|
|
@@ -41,21 +41,18 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
41
41
|
Requires-Python: >=3.10
|
|
42
42
|
Description-Content-Type: text/markdown
|
|
43
43
|
License-File: LICENSE
|
|
44
|
-
Requires-Dist: humanize==4.
|
|
44
|
+
Requires-Dist: humanize==4.12.3
|
|
45
45
|
Requires-Dist: peewee~=3.17
|
|
46
|
-
Requires-Dist: Pillow==
|
|
47
|
-
Requires-Dist: python-gnupg==0.5.
|
|
46
|
+
Requires-Dist: Pillow==11.3.0
|
|
47
|
+
Requires-Dist: python-gnupg==0.5.4
|
|
48
48
|
Requires-Dist: shapely==2.0.6
|
|
49
|
-
Requires-Dist: teklia-toolbox==0.1.
|
|
50
|
-
Requires-Dist: zstandard==0.
|
|
51
|
-
Provides-Extra: docs
|
|
52
|
-
Requires-Dist: black==24.4.2; extra == "docs"
|
|
53
|
-
Requires-Dist: mkdocs-material==9.5.33; extra == "docs"
|
|
54
|
-
Requires-Dist: mkdocstrings-python==1.11.1; extra == "docs"
|
|
49
|
+
Requires-Dist: teklia-toolbox==0.1.11
|
|
50
|
+
Requires-Dist: zstandard==0.23.0
|
|
55
51
|
Provides-Extra: tests
|
|
56
|
-
Requires-Dist: pytest==8.3.
|
|
52
|
+
Requires-Dist: pytest==8.3.5; extra == "tests"
|
|
57
53
|
Requires-Dist: pytest-mock==3.14.0; extra == "tests"
|
|
58
54
|
Requires-Dist: pytest-responses==0.5.1; extra == "tests"
|
|
55
|
+
Dynamic: license-file
|
|
59
56
|
|
|
60
57
|
# Arkindex base Worker
|
|
61
58
|
|
|
@@ -65,7 +62,7 @@ This is an open-source project, licensed using [the MIT license](https://opensou
|
|
|
65
62
|
|
|
66
63
|
## Documentation
|
|
67
64
|
|
|
68
|
-
The [documentation](https://workers.arkindex.org/) is made with [
|
|
65
|
+
The [documentation](https://workers.arkindex.org/) is made with [Antora](https://antora.org/) and is hosted by [GitLab Pages](https://docs.gitlab.com/ee/user/project/pages/).
|
|
69
66
|
|
|
70
67
|
## Create a new worker using our template
|
|
71
68
|
|
|
@@ -24,7 +24,8 @@ arkindex_worker/worker/process.py
|
|
|
24
24
|
arkindex_worker/worker/task.py
|
|
25
25
|
arkindex_worker/worker/training.py
|
|
26
26
|
arkindex_worker/worker/transcription.py
|
|
27
|
-
|
|
27
|
+
examples/standalone/python/worker.py
|
|
28
|
+
examples/tooled/python/worker.py
|
|
28
29
|
hooks/pre_gen_project.py
|
|
29
30
|
tests/__init__.py
|
|
30
31
|
tests/conftest.py
|
|
@@ -45,16 +46,15 @@ tests/test_elements_worker/test_element_create_multiple.py
|
|
|
45
46
|
tests/test_elements_worker/test_element_create_single.py
|
|
46
47
|
tests/test_elements_worker/test_element_list_children.py
|
|
47
48
|
tests/test_elements_worker/test_element_list_parents.py
|
|
48
|
-
tests/test_elements_worker/
|
|
49
|
-
tests/test_elements_worker/test_entity_list_and_check.py
|
|
49
|
+
tests/test_elements_worker/test_entity.py
|
|
50
50
|
tests/test_elements_worker/test_image.py
|
|
51
51
|
tests/test_elements_worker/test_metadata.py
|
|
52
|
+
tests/test_elements_worker/test_process.py
|
|
52
53
|
tests/test_elements_worker/test_task.py
|
|
53
54
|
tests/test_elements_worker/test_training.py
|
|
54
55
|
tests/test_elements_worker/test_transcription_create.py
|
|
55
56
|
tests/test_elements_worker/test_transcription_create_with_elements.py
|
|
56
57
|
tests/test_elements_worker/test_transcription_list.py
|
|
57
|
-
tests/test_elements_worker/test_version.py
|
|
58
58
|
tests/test_elements_worker/test_worker.py
|
|
59
59
|
worker-demo/tests/__init__.py
|
|
60
60
|
worker-demo/tests/conftest.py
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import importlib.metadata
|
|
1
2
|
import logging
|
|
2
3
|
|
|
3
4
|
logging.basicConfig(
|
|
@@ -5,3 +6,5 @@ logging.basicConfig(
|
|
|
5
6
|
format="%(asctime)s %(levelname)s/%(name)s: %(message)s",
|
|
6
7
|
)
|
|
7
8
|
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
VERSION = importlib.metadata.version("arkindex-base-worker")
|
|
@@ -206,23 +206,6 @@ class CachedClassification(Model):
|
|
|
206
206
|
table_name = "classifications"
|
|
207
207
|
|
|
208
208
|
|
|
209
|
-
class CachedEntity(Model):
|
|
210
|
-
"""
|
|
211
|
-
Cache entity table
|
|
212
|
-
"""
|
|
213
|
-
|
|
214
|
-
id = UUIDField(primary_key=True)
|
|
215
|
-
type = CharField(max_length=50)
|
|
216
|
-
name = TextField()
|
|
217
|
-
validated = BooleanField(default=False)
|
|
218
|
-
metas = JSONField(null=True)
|
|
219
|
-
worker_run_id = UUIDField(null=True)
|
|
220
|
-
|
|
221
|
-
class Meta:
|
|
222
|
-
database = db
|
|
223
|
-
table_name = "entities"
|
|
224
|
-
|
|
225
|
-
|
|
226
209
|
class CachedTranscriptionEntity(Model):
|
|
227
210
|
"""
|
|
228
211
|
Cache transcription entity table
|
|
@@ -231,14 +214,14 @@ class CachedTranscriptionEntity(Model):
|
|
|
231
214
|
transcription = ForeignKeyField(
|
|
232
215
|
CachedTranscription, backref="transcription_entities"
|
|
233
216
|
)
|
|
234
|
-
|
|
217
|
+
type = CharField(max_length=50)
|
|
235
218
|
offset = IntegerField(constraints=[Check("offset >= 0")])
|
|
236
219
|
length = IntegerField(constraints=[Check("length > 0")])
|
|
237
220
|
worker_run_id = UUIDField(null=True)
|
|
238
221
|
confidence = FloatField(null=True)
|
|
239
222
|
|
|
240
223
|
class Meta:
|
|
241
|
-
primary_key = CompositeKey("transcription", "
|
|
224
|
+
primary_key = CompositeKey("transcription", "type")
|
|
242
225
|
database = db
|
|
243
226
|
table_name = "transcription_entities"
|
|
244
227
|
|
|
@@ -272,12 +255,11 @@ MODELS = [
|
|
|
272
255
|
CachedElement,
|
|
273
256
|
CachedTranscription,
|
|
274
257
|
CachedClassification,
|
|
275
|
-
CachedEntity,
|
|
276
258
|
CachedTranscriptionEntity,
|
|
277
259
|
CachedDataset,
|
|
278
260
|
CachedDatasetElement,
|
|
279
261
|
]
|
|
280
|
-
SQL_VERSION =
|
|
262
|
+
SQL_VERSION = 4
|
|
281
263
|
|
|
282
264
|
|
|
283
265
|
def init_cache_db(path: Path):
|
|
@@ -327,9 +309,9 @@ def check_version(cache_path: str | Path):
|
|
|
327
309
|
except OperationalError:
|
|
328
310
|
version = None
|
|
329
311
|
|
|
330
|
-
assert (
|
|
331
|
-
version
|
|
332
|
-
)
|
|
312
|
+
assert version == SQL_VERSION, (
|
|
313
|
+
f"The SQLite database {cache_path} does not have the correct cache version, it should be {SQL_VERSION}"
|
|
314
|
+
)
|
|
333
315
|
|
|
334
316
|
|
|
335
317
|
def merge_parents_cache(paths: list, current_database: Path):
|
|
@@ -365,7 +347,6 @@ def merge_parents_cache(paths: list, current_database: Path):
|
|
|
365
347
|
f"REPLACE INTO elements SELECT * FROM source_{idx}.elements;",
|
|
366
348
|
f"REPLACE INTO transcriptions SELECT * FROM source_{idx}.transcriptions;",
|
|
367
349
|
f"REPLACE INTO classifications SELECT * FROM source_{idx}.classifications;",
|
|
368
|
-
f"REPLACE INTO entities SELECT * FROM source_{idx}.entities;",
|
|
369
350
|
f"REPLACE INTO transcription_entities SELECT * FROM source_{idx}.transcription_entities;",
|
|
370
351
|
f"REPLACE INTO datasets SELECT * FROM source_{idx}.datasets;",
|
|
371
352
|
f"REPLACE INTO dataset_elements SELECT * FROM source_{idx}.dataset_elements;",
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
Helper methods to download and open IIIF images, and manage polygons.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
+
import base64
|
|
5
6
|
import functools
|
|
6
7
|
import os
|
|
7
8
|
import re
|
|
@@ -14,6 +15,7 @@ from pathlib import Path
|
|
|
14
15
|
from typing import TYPE_CHECKING
|
|
15
16
|
|
|
16
17
|
import humanize
|
|
18
|
+
import numpy as np
|
|
17
19
|
import requests
|
|
18
20
|
from PIL import Image
|
|
19
21
|
from shapely.affinity import rotate, scale, translate
|
|
@@ -22,10 +24,11 @@ from tenacity import (
|
|
|
22
24
|
retry,
|
|
23
25
|
retry_if_exception_type,
|
|
24
26
|
stop_after_attempt,
|
|
25
|
-
|
|
27
|
+
wait_chain,
|
|
28
|
+
wait_fixed,
|
|
26
29
|
)
|
|
27
30
|
|
|
28
|
-
from arkindex_worker import logger
|
|
31
|
+
from arkindex_worker import VERSION, logger
|
|
29
32
|
from arkindex_worker.utils import pluralize
|
|
30
33
|
from teklia_toolbox.requests import should_verify_cert
|
|
31
34
|
|
|
@@ -39,14 +42,16 @@ DOWNLOAD_TIMEOUT = (30, 60)
|
|
|
39
42
|
|
|
40
43
|
BoundingBox = namedtuple("BoundingBox", ["x", "y", "width", "height"])
|
|
41
44
|
|
|
45
|
+
# Specific User-Agent to bypass potential server limitations
|
|
46
|
+
IIIF_USER_AGENT = f"Teklia/Workers {VERSION}"
|
|
42
47
|
# To parse IIIF Urls
|
|
43
48
|
IIIF_URL = re.compile(r"\w+:\/{2}.+\/.+\/.+\/.+\/(?P<size>.+)\/!?\d+\/\w+\.\w+")
|
|
44
49
|
# Full size of the region
|
|
45
50
|
IIIF_FULL = "full"
|
|
46
51
|
# Maximum size available
|
|
47
52
|
IIIF_MAX = "max"
|
|
48
|
-
#
|
|
49
|
-
|
|
53
|
+
# Ratios to resize images: 1.0, 0.95, [...], 0.1, 0.05
|
|
54
|
+
IMAGE_RATIOS = np.arange(1, 0, -0.05).round(2).tolist()
|
|
50
55
|
|
|
51
56
|
|
|
52
57
|
def update_pillow_image_size_limit(func):
|
|
@@ -206,44 +211,81 @@ def upload_image(image: Image, url: str) -> requests.Response:
|
|
|
206
211
|
def resized_images(
|
|
207
212
|
*args,
|
|
208
213
|
element: "Element",
|
|
209
|
-
|
|
214
|
+
max_pixels_short: int | None = None,
|
|
215
|
+
max_pixels_long: int | None = None,
|
|
210
216
|
max_bytes: int | None = None,
|
|
217
|
+
use_base64: bool = False,
|
|
211
218
|
**kwargs,
|
|
212
|
-
) -> Iterator[Generator[tempfile.
|
|
219
|
+
) -> Iterator[Generator[tempfile._TemporaryFileWrapper | str]]:
|
|
213
220
|
"""
|
|
214
|
-
Build resized images according to
|
|
221
|
+
Build resized images according to pixel and byte limits.
|
|
215
222
|
|
|
216
223
|
:param *args: Positional arguments passed to [arkindex_worker.models.Element.open_image_tempfile][].
|
|
217
224
|
:param element: Element whose image needs to be resized.
|
|
218
|
-
:param
|
|
225
|
+
:param max_pixels_short: Maximum pixel size of the resized images' short side.
|
|
226
|
+
:param max_pixels_long: Maximum pixel size of the resized images' long side.
|
|
219
227
|
:param max_bytes: Maximum byte size of the resized images.
|
|
228
|
+
:param use_base64: Whether or not to encode resized images in base64 before calculating their size.
|
|
220
229
|
:param **kwargs: Keyword arguments passed to [arkindex_worker.models.Element.open_image_tempfile][].
|
|
221
|
-
:returns: An iterator of
|
|
230
|
+
:returns: An iterator of temporary files for resized images OR an iterator of base64-encoded strings if `use_base64` is set.
|
|
222
231
|
"""
|
|
223
232
|
_, _, element_width, element_height = polygon_bounding_box(element.polygon)
|
|
233
|
+
logger.info(
|
|
234
|
+
f"This element's image dimensions are ({element_width} x {element_height})."
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
portrait_format = element_width <= element_height
|
|
238
|
+
max_pixels_width, max_pixels_height = (
|
|
239
|
+
(max_pixels_short, max_pixels_long)
|
|
240
|
+
if portrait_format
|
|
241
|
+
else (max_pixels_long, max_pixels_short)
|
|
242
|
+
)
|
|
224
243
|
|
|
225
|
-
|
|
226
|
-
if
|
|
244
|
+
# The image dimension is already within the pixel limitation, no need to resize the image
|
|
245
|
+
if max_pixels_width and max_pixels_width >= element_width:
|
|
246
|
+
max_pixels_width = None
|
|
247
|
+
if max_pixels_height and max_pixels_height >= element_height:
|
|
248
|
+
max_pixels_height = None
|
|
249
|
+
|
|
250
|
+
if (max_pixels_width and element_width > max_pixels_width) or (
|
|
251
|
+
max_pixels_height and element_height > max_pixels_height
|
|
252
|
+
):
|
|
227
253
|
logger.warning(
|
|
228
|
-
f"Maximum image
|
|
254
|
+
f"Maximum image dimensions supported are ({max_pixels_width or element_width} x {max_pixels_height or element_height})."
|
|
229
255
|
)
|
|
230
256
|
logger.warning("The image will be resized.")
|
|
231
257
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
258
|
+
# No limitations provided, we keep the image initial dimensions
|
|
259
|
+
if max_pixels_width is None and max_pixels_height is None:
|
|
260
|
+
open_image_param, max_value = (
|
|
261
|
+
("max_height", element_height)
|
|
262
|
+
if portrait_format
|
|
263
|
+
else ("max_width", element_width)
|
|
264
|
+
)
|
|
265
|
+
# A limitation is only given for the height, we resize it
|
|
266
|
+
elif max_pixels_width is None:
|
|
267
|
+
open_image_param, max_value = ("max_height", max_pixels_height)
|
|
268
|
+
# A limitation is only given for the width, we resize it
|
|
269
|
+
elif max_pixels_height is None:
|
|
270
|
+
open_image_param, max_value = ("max_width", max_pixels_width)
|
|
271
|
+
# Limitations are provided for both sides:
|
|
272
|
+
# - we resize only the one with the biggest scale factor
|
|
273
|
+
# - the remaining one will automatically fall within the other limitation
|
|
274
|
+
else:
|
|
275
|
+
width_rescaling_factor = element_width / max_pixels_width
|
|
276
|
+
height_rescaling_factor = element_height / max_pixels_height
|
|
277
|
+
open_image_param, max_value = (
|
|
278
|
+
("max_height", max_pixels_height)
|
|
279
|
+
if height_rescaling_factor > width_rescaling_factor
|
|
280
|
+
else ("max_width", max_pixels_width)
|
|
281
|
+
)
|
|
237
282
|
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
),
|
|
243
|
-
reverse=True,
|
|
244
|
-
):
|
|
283
|
+
resized_pixels = set(
|
|
284
|
+
min(round(ratio * max_value), max_value) for ratio in IMAGE_RATIOS
|
|
285
|
+
)
|
|
286
|
+
for resized_pixel in sorted(resized_pixels, reverse=True):
|
|
245
287
|
with element.open_image_tempfile(
|
|
246
|
-
*args, **{**kwargs,
|
|
288
|
+
*args, **{**kwargs, open_image_param: resized_pixel}
|
|
247
289
|
) as image:
|
|
248
290
|
pillow_image = Image.open(image)
|
|
249
291
|
if (
|
|
@@ -254,8 +296,12 @@ def resized_images(
|
|
|
254
296
|
f"The image was resized to ({pillow_image.width} x {pillow_image.height})."
|
|
255
297
|
)
|
|
256
298
|
|
|
257
|
-
# The image is still too large
|
|
258
299
|
image_size = Path(image.name).stat().st_size
|
|
300
|
+
if use_base64:
|
|
301
|
+
image = base64.b64encode(Path(image.name).read_bytes()).decode("utf-8")
|
|
302
|
+
image_size = len(image)
|
|
303
|
+
|
|
304
|
+
# The image is still too heavy
|
|
259
305
|
if max_bytes and image_size > max_bytes:
|
|
260
306
|
logger.warning(f"The image size is {humanize.naturalsize(image_size)}.")
|
|
261
307
|
logger.warning(
|
|
@@ -283,20 +329,26 @@ def polygon_bounding_box(polygon: list[list[int | float]]) -> BoundingBox:
|
|
|
283
329
|
def _retry_log(retry_state, *args, **kwargs):
|
|
284
330
|
logger.warning(
|
|
285
331
|
f"Request to {retry_state.args[0]} failed ({repr(retry_state.outcome.exception())}), "
|
|
286
|
-
f
|
|
332
|
+
f"retrying in {retry_state.idle_for} {pluralize('second', retry_state.idle_for)}"
|
|
287
333
|
)
|
|
288
334
|
|
|
289
335
|
|
|
290
336
|
@retry(
|
|
291
337
|
stop=stop_after_attempt(3),
|
|
292
|
-
|
|
338
|
+
# In the event of `requests.RequestException` errors, the call will be retried after 5 seconds, 10 seconds and finally 90 seconds before failing.
|
|
339
|
+
wait=wait_chain(wait_fixed(5), wait_fixed(10), wait_fixed(90)),
|
|
293
340
|
retry=retry_if_exception_type(requests.RequestException),
|
|
294
341
|
before_sleep=_retry_log,
|
|
295
342
|
reraise=True,
|
|
296
343
|
)
|
|
297
344
|
def _retried_request(url, *args, method=requests.get, **kwargs):
|
|
298
345
|
resp = method(
|
|
299
|
-
url,
|
|
346
|
+
url,
|
|
347
|
+
*args,
|
|
348
|
+
headers={"User-Agent": IIIF_USER_AGENT},
|
|
349
|
+
timeout=DOWNLOAD_TIMEOUT,
|
|
350
|
+
verify=should_verify_cert(url),
|
|
351
|
+
**kwargs,
|
|
300
352
|
)
|
|
301
353
|
resp.raise_for_status()
|
|
302
354
|
return resp
|
|
@@ -316,9 +368,9 @@ def download_tiles(url: str) -> Image:
|
|
|
316
368
|
|
|
317
369
|
image_width, image_height = info.get("width"), info.get("height")
|
|
318
370
|
assert image_width and image_height, "Missing image dimensions in info.json"
|
|
319
|
-
assert info.get(
|
|
320
|
-
"tiles"
|
|
321
|
-
)
|
|
371
|
+
assert info.get("tiles"), (
|
|
372
|
+
"Image cannot be retrieved at full size and tiles are not supported"
|
|
373
|
+
)
|
|
322
374
|
|
|
323
375
|
# Take the biggest available tile size
|
|
324
376
|
tile = sorted(info["tiles"], key=lambda tile: tile.get("width", 0), reverse=True)[0]
|
|
@@ -385,22 +437,20 @@ def trim_polygon(
|
|
|
385
437
|
:param image_width: Width of the image.
|
|
386
438
|
:param image_height: Height of the image.
|
|
387
439
|
:returns: A polygon trimmed to the image's bounds.
|
|
388
|
-
Some points may appear as missing, as the trimming can deduplicate points.
|
|
389
|
-
The first and last point are always equal, to reproduce the behavior
|
|
390
|
-
of the Arkindex backend.
|
|
391
440
|
:raises AssertionError: When argument types are invalid or when the trimmed polygon
|
|
392
441
|
is entirely outside of the image's bounds.
|
|
393
442
|
"""
|
|
394
443
|
|
|
395
|
-
assert isinstance(
|
|
396
|
-
|
|
397
|
-
)
|
|
398
|
-
assert
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
444
|
+
assert isinstance(polygon, list | tuple), (
|
|
445
|
+
"Polygon must be a valid list or tuple of points."
|
|
446
|
+
)
|
|
447
|
+
assert len(polygon) >= 3, "Polygon should have at least three points."
|
|
448
|
+
assert all(isinstance(point, list | tuple) for point in polygon), (
|
|
449
|
+
"Polygon points must be tuples or lists."
|
|
450
|
+
)
|
|
451
|
+
assert all(len(point) == 2 for point in polygon), (
|
|
452
|
+
"Polygon points must be tuples or lists of 2 elements."
|
|
453
|
+
)
|
|
404
454
|
assert all(
|
|
405
455
|
isinstance(point[0], int) and isinstance(point[1], int) for point in polygon
|
|
406
456
|
), "Polygon point coordinates must be integers."
|
|
@@ -408,7 +458,7 @@ def trim_polygon(
|
|
|
408
458
|
point[0] <= image_width and point[1] <= image_height for point in polygon
|
|
409
459
|
), "This polygon is entirely outside the image's bounds."
|
|
410
460
|
|
|
411
|
-
|
|
461
|
+
return [
|
|
412
462
|
[
|
|
413
463
|
min(image_width, max(0, x)),
|
|
414
464
|
min(image_height, max(0, y)),
|
|
@@ -416,17 +466,6 @@ def trim_polygon(
|
|
|
416
466
|
for x, y in polygon
|
|
417
467
|
]
|
|
418
468
|
|
|
419
|
-
updated_polygon = []
|
|
420
|
-
for point in trimmed_polygon:
|
|
421
|
-
if point not in updated_polygon:
|
|
422
|
-
updated_polygon.append(point)
|
|
423
|
-
|
|
424
|
-
# Add back the matching last point, if it was present in the original polygon
|
|
425
|
-
if polygon[-1] == polygon[0]:
|
|
426
|
-
updated_polygon.append(updated_polygon[0])
|
|
427
|
-
|
|
428
|
-
return updated_polygon
|
|
429
|
-
|
|
430
469
|
|
|
431
470
|
def revert_orientation(
|
|
432
471
|
element: "Element | CachedElement",
|
|
@@ -451,22 +490,22 @@ def revert_orientation(
|
|
|
451
490
|
from arkindex_worker.cache import CachedElement
|
|
452
491
|
from arkindex_worker.models import Element
|
|
453
492
|
|
|
454
|
-
assert element and isinstance(
|
|
455
|
-
element
|
|
456
|
-
)
|
|
457
|
-
assert polygon and isinstance(
|
|
458
|
-
polygon
|
|
459
|
-
)
|
|
460
|
-
assert isinstance(reverse, bool), "
|
|
493
|
+
assert element and isinstance(element, Element | CachedElement), (
|
|
494
|
+
"element shouldn't be null and should be an Element or CachedElement"
|
|
495
|
+
)
|
|
496
|
+
assert polygon and isinstance(polygon, list), (
|
|
497
|
+
"polygon shouldn't be null and should be a list"
|
|
498
|
+
)
|
|
499
|
+
assert isinstance(reverse, bool), "reverse should be a bool"
|
|
461
500
|
# Rotating with Pillow can cause it to move the image around, as the image cannot have negative coordinates
|
|
462
501
|
# and must be a rectangle. This means the origin point of any coordinates from an image is invalid, and the
|
|
463
502
|
# center of the bounding box of the rotated image is different from the center of the element's bounding box.
|
|
464
503
|
# To properly undo the mirroring and rotation implicitly applied by open_image, we first need to find the center
|
|
465
504
|
# of the rotated bounding box.
|
|
466
505
|
if isinstance(element, Element):
|
|
467
|
-
assert (
|
|
468
|
-
element
|
|
469
|
-
)
|
|
506
|
+
assert element.zone and element.zone.polygon, (
|
|
507
|
+
"element should have a zone and a polygon"
|
|
508
|
+
)
|
|
470
509
|
parent_ring = LinearRing(element.zone.polygon)
|
|
471
510
|
elif isinstance(element, CachedElement):
|
|
472
511
|
assert element.polygon, "cached element should have a polygon"
|
|
@@ -243,11 +243,12 @@ def batch_publication(func: Callable) -> Callable:
|
|
|
243
243
|
bound_func.apply_defaults()
|
|
244
244
|
batch_size = bound_func.arguments.get("batch_size")
|
|
245
245
|
assert (
|
|
246
|
-
batch_size and isinstance(batch_size, int) and batch_size > 0
|
|
246
|
+
batch_size is not None and isinstance(batch_size, int) and batch_size > 0
|
|
247
247
|
), "batch_size shouldn't be null and should be a strictly positive integer"
|
|
248
248
|
|
|
249
249
|
return func(self, *args, **kwargs)
|
|
250
250
|
|
|
251
|
+
wrapper.__name__ = func.__name__
|
|
251
252
|
return wrapper
|
|
252
253
|
|
|
253
254
|
|