arkindex-base-worker 0.4.0rc6__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/PKG-INFO +9 -12
  2. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/README.md +1 -1
  3. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/arkindex_base_worker.egg-info/PKG-INFO +9 -12
  4. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/arkindex_base_worker.egg-info/SOURCES.txt +3 -4
  5. arkindex_base_worker-0.5.0/arkindex_base_worker.egg-info/requires.txt +12 -0
  6. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/arkindex_base_worker.egg-info/top_level.txt +1 -0
  7. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/arkindex_worker/__init__.py +3 -0
  8. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/arkindex_worker/cache.py +6 -25
  9. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/arkindex_worker/image.py +105 -66
  10. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/arkindex_worker/utils.py +2 -1
  11. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/arkindex_worker/worker/__init__.py +17 -31
  12. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/arkindex_worker/worker/base.py +16 -9
  13. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/arkindex_worker/worker/classification.py +36 -34
  14. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/arkindex_worker/worker/corpus.py +3 -3
  15. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/arkindex_worker/worker/dataset.py +9 -9
  16. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/arkindex_worker/worker/element.py +261 -231
  17. arkindex_base_worker-0.5.0/arkindex_worker/worker/entity.py +336 -0
  18. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/arkindex_worker/worker/image.py +3 -3
  19. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/arkindex_worker/worker/metadata.py +27 -38
  20. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/arkindex_worker/worker/task.py +9 -9
  21. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/arkindex_worker/worker/training.py +15 -11
  22. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/arkindex_worker/worker/transcription.py +77 -71
  23. arkindex_base_worker-0.5.0/examples/standalone/python/worker.py +171 -0
  24. arkindex_base_worker-0.5.0/examples/tooled/python/worker.py +50 -0
  25. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/pyproject.toml +7 -12
  26. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/conftest.py +22 -36
  27. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_base_worker.py +1 -1
  28. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_cache.py +1 -2
  29. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_dataset_worker.py +1 -1
  30. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_element.py +200 -26
  31. arkindex_base_worker-0.4.0rc6/tests/test_elements_worker/test_entity_create.py → arkindex_base_worker-0.5.0/tests/test_elements_worker/test_entity.py +220 -227
  32. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_metadata.py +0 -47
  33. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_training.py +8 -8
  34. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_worker.py +15 -14
  35. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_image.py +244 -126
  36. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_merge.py +0 -7
  37. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_utils.py +37 -0
  38. arkindex_base_worker-0.4.0rc6/arkindex_base_worker.egg-info/requires.txt +0 -17
  39. arkindex_base_worker-0.4.0rc6/arkindex_worker/worker/entity.py +0 -405
  40. arkindex_base_worker-0.4.0rc6/arkindex_worker/worker/version.py +0 -58
  41. arkindex_base_worker-0.4.0rc6/tests/test_elements_worker/test_entity_list_and_check.py +0 -160
  42. arkindex_base_worker-0.4.0rc6/tests/test_elements_worker/test_version.py +0 -60
  43. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/LICENSE +0 -0
  44. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/arkindex_base_worker.egg-info/dependency_links.txt +0 -0
  45. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/arkindex_worker/models.py +0 -0
  46. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/arkindex_worker/worker/process.py +0 -0
  47. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/hooks/pre_gen_project.py +0 -0
  48. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/setup.cfg +0 -0
  49. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/__init__.py +0 -0
  50. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_element.py +0 -0
  51. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/__init__.py +0 -0
  52. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_classification.py +0 -0
  53. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_cli.py +0 -0
  54. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_corpus.py +0 -0
  55. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_dataset.py +0 -0
  56. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_element_create_multiple.py +0 -0
  57. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_element_create_single.py +0 -0
  58. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_element_list_children.py +0 -0
  59. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_element_list_parents.py +0 -0
  60. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_image.py +0 -0
  61. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_process.py +0 -0
  62. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_task.py +0 -0
  63. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_transcription_create.py +0 -0
  64. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_transcription_create_with_elements.py +0 -0
  65. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/tests/test_elements_worker/test_transcription_list.py +0 -0
  66. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/worker-demo/tests/__init__.py +0 -0
  67. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/worker-demo/tests/conftest.py +0 -0
  68. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/worker-demo/tests/test_worker.py +0 -0
  69. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/worker-demo/worker_demo/__init__.py +0 -0
  70. {arkindex_base_worker-0.4.0rc6 → arkindex_base_worker-0.5.0}/worker-demo/worker_demo/worker.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: arkindex-base-worker
3
- Version: 0.4.0rc6
3
+ Version: 0.5.0
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -41,21 +41,18 @@ Classifier: Programming Language :: Python :: 3.12
41
41
  Requires-Python: >=3.10
42
42
  Description-Content-Type: text/markdown
43
43
  License-File: LICENSE
44
- Requires-Dist: humanize==4.11.0
44
+ Requires-Dist: humanize==4.12.3
45
45
  Requires-Dist: peewee~=3.17
46
- Requires-Dist: Pillow==11.0.0
47
- Requires-Dist: python-gnupg==0.5.3
46
+ Requires-Dist: Pillow==11.3.0
47
+ Requires-Dist: python-gnupg==0.5.4
48
48
  Requires-Dist: shapely==2.0.6
49
- Requires-Dist: teklia-toolbox==0.1.7b1
49
+ Requires-Dist: teklia-toolbox==0.1.11
50
50
  Requires-Dist: zstandard==0.23.0
51
- Provides-Extra: docs
52
- Requires-Dist: black==24.10.0; extra == "docs"
53
- Requires-Dist: mkdocs-material==9.5.48; extra == "docs"
54
- Requires-Dist: mkdocstrings-python==1.12.2; extra == "docs"
55
51
  Provides-Extra: tests
56
- Requires-Dist: pytest==8.3.4; extra == "tests"
52
+ Requires-Dist: pytest==8.3.5; extra == "tests"
57
53
  Requires-Dist: pytest-mock==3.14.0; extra == "tests"
58
54
  Requires-Dist: pytest-responses==0.5.1; extra == "tests"
55
+ Dynamic: license-file
59
56
 
60
57
  # Arkindex base Worker
61
58
 
@@ -65,7 +62,7 @@ This is an open-source project, licensed using [the MIT license](https://opensou
65
62
 
66
63
  ## Documentation
67
64
 
68
- The [documentation](https://workers.arkindex.org/) is made with [Material for MkDocs](https://github.com/squidfunk/mkdocs-material) and is hosted by [GitLab Pages](https://docs.gitlab.com/ee/user/project/pages/).
65
+ The [documentation](https://workers.arkindex.org/) is made with [Antora](https://antora.org/) and is hosted by [GitLab Pages](https://docs.gitlab.com/ee/user/project/pages/).
69
66
 
70
67
  ## Create a new worker using our template
71
68
 
@@ -6,7 +6,7 @@ This is an open-source project, licensed using [the MIT license](https://opensou
6
6
 
7
7
  ## Documentation
8
8
 
9
- The [documentation](https://workers.arkindex.org/) is made with [Material for MkDocs](https://github.com/squidfunk/mkdocs-material) and is hosted by [GitLab Pages](https://docs.gitlab.com/ee/user/project/pages/).
9
+ The [documentation](https://workers.arkindex.org/) is made with [Antora](https://antora.org/) and is hosted by [GitLab Pages](https://docs.gitlab.com/ee/user/project/pages/).
10
10
 
11
11
  ## Create a new worker using our template
12
12
 
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: arkindex-base-worker
3
- Version: 0.4.0rc6
3
+ Version: 0.5.0
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -41,21 +41,18 @@ Classifier: Programming Language :: Python :: 3.12
41
41
  Requires-Python: >=3.10
42
42
  Description-Content-Type: text/markdown
43
43
  License-File: LICENSE
44
- Requires-Dist: humanize==4.11.0
44
+ Requires-Dist: humanize==4.12.3
45
45
  Requires-Dist: peewee~=3.17
46
- Requires-Dist: Pillow==11.0.0
47
- Requires-Dist: python-gnupg==0.5.3
46
+ Requires-Dist: Pillow==11.3.0
47
+ Requires-Dist: python-gnupg==0.5.4
48
48
  Requires-Dist: shapely==2.0.6
49
- Requires-Dist: teklia-toolbox==0.1.7b1
49
+ Requires-Dist: teklia-toolbox==0.1.11
50
50
  Requires-Dist: zstandard==0.23.0
51
- Provides-Extra: docs
52
- Requires-Dist: black==24.10.0; extra == "docs"
53
- Requires-Dist: mkdocs-material==9.5.48; extra == "docs"
54
- Requires-Dist: mkdocstrings-python==1.12.2; extra == "docs"
55
51
  Provides-Extra: tests
56
- Requires-Dist: pytest==8.3.4; extra == "tests"
52
+ Requires-Dist: pytest==8.3.5; extra == "tests"
57
53
  Requires-Dist: pytest-mock==3.14.0; extra == "tests"
58
54
  Requires-Dist: pytest-responses==0.5.1; extra == "tests"
55
+ Dynamic: license-file
59
56
 
60
57
  # Arkindex base Worker
61
58
 
@@ -65,7 +62,7 @@ This is an open-source project, licensed using [the MIT license](https://opensou
65
62
 
66
63
  ## Documentation
67
64
 
68
- The [documentation](https://workers.arkindex.org/) is made with [Material for MkDocs](https://github.com/squidfunk/mkdocs-material) and is hosted by [GitLab Pages](https://docs.gitlab.com/ee/user/project/pages/).
65
+ The [documentation](https://workers.arkindex.org/) is made with [Antora](https://antora.org/) and is hosted by [GitLab Pages](https://docs.gitlab.com/ee/user/project/pages/).
69
66
 
70
67
  ## Create a new worker using our template
71
68
 
@@ -24,7 +24,8 @@ arkindex_worker/worker/process.py
24
24
  arkindex_worker/worker/task.py
25
25
  arkindex_worker/worker/training.py
26
26
  arkindex_worker/worker/transcription.py
27
- arkindex_worker/worker/version.py
27
+ examples/standalone/python/worker.py
28
+ examples/tooled/python/worker.py
28
29
  hooks/pre_gen_project.py
29
30
  tests/__init__.py
30
31
  tests/conftest.py
@@ -45,8 +46,7 @@ tests/test_elements_worker/test_element_create_multiple.py
45
46
  tests/test_elements_worker/test_element_create_single.py
46
47
  tests/test_elements_worker/test_element_list_children.py
47
48
  tests/test_elements_worker/test_element_list_parents.py
48
- tests/test_elements_worker/test_entity_create.py
49
- tests/test_elements_worker/test_entity_list_and_check.py
49
+ tests/test_elements_worker/test_entity.py
50
50
  tests/test_elements_worker/test_image.py
51
51
  tests/test_elements_worker/test_metadata.py
52
52
  tests/test_elements_worker/test_process.py
@@ -55,7 +55,6 @@ tests/test_elements_worker/test_training.py
55
55
  tests/test_elements_worker/test_transcription_create.py
56
56
  tests/test_elements_worker/test_transcription_create_with_elements.py
57
57
  tests/test_elements_worker/test_transcription_list.py
58
- tests/test_elements_worker/test_version.py
59
58
  tests/test_elements_worker/test_worker.py
60
59
  worker-demo/tests/__init__.py
61
60
  worker-demo/tests/conftest.py
@@ -0,0 +1,12 @@
1
+ humanize==4.12.3
2
+ peewee~=3.17
3
+ Pillow==11.3.0
4
+ python-gnupg==0.5.4
5
+ shapely==2.0.6
6
+ teklia-toolbox==0.1.11
7
+ zstandard==0.23.0
8
+
9
+ [tests]
10
+ pytest==8.3.5
11
+ pytest-mock==3.14.0
12
+ pytest-responses==0.5.1
@@ -1,6 +1,7 @@
1
1
  arkindex_worker
2
2
  dist
3
3
  docs
4
+ examples
4
5
  hooks
5
6
  tests
6
7
  worker-demo
@@ -1,3 +1,4 @@
1
+ import importlib.metadata
1
2
  import logging
2
3
 
3
4
  logging.basicConfig(
@@ -5,3 +6,5 @@ logging.basicConfig(
5
6
  format="%(asctime)s %(levelname)s/%(name)s: %(message)s",
6
7
  )
7
8
  logger = logging.getLogger(__name__)
9
+
10
+ VERSION = importlib.metadata.version("arkindex-base-worker")
@@ -206,23 +206,6 @@ class CachedClassification(Model):
206
206
  table_name = "classifications"
207
207
 
208
208
 
209
- class CachedEntity(Model):
210
- """
211
- Cache entity table
212
- """
213
-
214
- id = UUIDField(primary_key=True)
215
- type = CharField(max_length=50)
216
- name = TextField()
217
- validated = BooleanField(default=False)
218
- metas = JSONField(null=True)
219
- worker_run_id = UUIDField(null=True)
220
-
221
- class Meta:
222
- database = db
223
- table_name = "entities"
224
-
225
-
226
209
  class CachedTranscriptionEntity(Model):
227
210
  """
228
211
  Cache transcription entity table
@@ -231,14 +214,14 @@ class CachedTranscriptionEntity(Model):
231
214
  transcription = ForeignKeyField(
232
215
  CachedTranscription, backref="transcription_entities"
233
216
  )
234
- entity = ForeignKeyField(CachedEntity, backref="transcription_entities")
217
+ type = CharField(max_length=50)
235
218
  offset = IntegerField(constraints=[Check("offset >= 0")])
236
219
  length = IntegerField(constraints=[Check("length > 0")])
237
220
  worker_run_id = UUIDField(null=True)
238
221
  confidence = FloatField(null=True)
239
222
 
240
223
  class Meta:
241
- primary_key = CompositeKey("transcription", "entity")
224
+ primary_key = CompositeKey("transcription", "type")
242
225
  database = db
243
226
  table_name = "transcription_entities"
244
227
 
@@ -272,12 +255,11 @@ MODELS = [
272
255
  CachedElement,
273
256
  CachedTranscription,
274
257
  CachedClassification,
275
- CachedEntity,
276
258
  CachedTranscriptionEntity,
277
259
  CachedDataset,
278
260
  CachedDatasetElement,
279
261
  ]
280
- SQL_VERSION = 3
262
+ SQL_VERSION = 4
281
263
 
282
264
 
283
265
  def init_cache_db(path: Path):
@@ -327,9 +309,9 @@ def check_version(cache_path: str | Path):
327
309
  except OperationalError:
328
310
  version = None
329
311
 
330
- assert (
331
- version == SQL_VERSION
332
- ), f"The SQLite database {cache_path} does not have the correct cache version, it should be {SQL_VERSION}"
312
+ assert version == SQL_VERSION, (
313
+ f"The SQLite database {cache_path} does not have the correct cache version, it should be {SQL_VERSION}"
314
+ )
333
315
 
334
316
 
335
317
  def merge_parents_cache(paths: list, current_database: Path):
@@ -365,7 +347,6 @@ def merge_parents_cache(paths: list, current_database: Path):
365
347
  f"REPLACE INTO elements SELECT * FROM source_{idx}.elements;",
366
348
  f"REPLACE INTO transcriptions SELECT * FROM source_{idx}.transcriptions;",
367
349
  f"REPLACE INTO classifications SELECT * FROM source_{idx}.classifications;",
368
- f"REPLACE INTO entities SELECT * FROM source_{idx}.entities;",
369
350
  f"REPLACE INTO transcription_entities SELECT * FROM source_{idx}.transcription_entities;",
370
351
  f"REPLACE INTO datasets SELECT * FROM source_{idx}.datasets;",
371
352
  f"REPLACE INTO dataset_elements SELECT * FROM source_{idx}.dataset_elements;",
@@ -2,6 +2,7 @@
2
2
  Helper methods to download and open IIIF images, and manage polygons.
3
3
  """
4
4
 
5
+ import base64
5
6
  import functools
6
7
  import os
7
8
  import re
@@ -14,6 +15,7 @@ from pathlib import Path
14
15
  from typing import TYPE_CHECKING
15
16
 
16
17
  import humanize
18
+ import numpy as np
17
19
  import requests
18
20
  from PIL import Image
19
21
  from shapely.affinity import rotate, scale, translate
@@ -22,10 +24,11 @@ from tenacity import (
22
24
  retry,
23
25
  retry_if_exception_type,
24
26
  stop_after_attempt,
25
- wait_exponential,
27
+ wait_chain,
28
+ wait_fixed,
26
29
  )
27
30
 
28
- from arkindex_worker import logger
31
+ from arkindex_worker import VERSION, logger
29
32
  from arkindex_worker.utils import pluralize
30
33
  from teklia_toolbox.requests import should_verify_cert
31
34
 
@@ -39,14 +42,16 @@ DOWNLOAD_TIMEOUT = (30, 60)
39
42
 
40
43
  BoundingBox = namedtuple("BoundingBox", ["x", "y", "width", "height"])
41
44
 
45
+ # Specific User-Agent to bypass potential server limitations
46
+ IIIF_USER_AGENT = f"Teklia/Workers {VERSION}"
42
47
  # To parse IIIF Urls
43
48
  IIIF_URL = re.compile(r"\w+:\/{2}.+\/.+\/.+\/.+\/(?P<size>.+)\/!?\d+\/\w+\.\w+")
44
49
  # Full size of the region
45
50
  IIIF_FULL = "full"
46
51
  # Maximum size available
47
52
  IIIF_MAX = "max"
48
- # Ratio to resize image
49
- IMAGE_RATIO = [1, 0.9, 0.85, 0.80, 0.75, 0.70, 0.60, 0.50, 0.40, 0.30]
53
+ # Ratios to resize images: 1.0, 0.95, [...], 0.1, 0.05
54
+ IMAGE_RATIOS = np.arange(1, 0, -0.05).round(2).tolist()
50
55
 
51
56
 
52
57
  def update_pillow_image_size_limit(func):
@@ -206,44 +211,81 @@ def upload_image(image: Image, url: str) -> requests.Response:
206
211
  def resized_images(
207
212
  *args,
208
213
  element: "Element",
209
- max_pixels: int | None = None,
214
+ max_pixels_short: int | None = None,
215
+ max_pixels_long: int | None = None,
210
216
  max_bytes: int | None = None,
217
+ use_base64: bool = False,
211
218
  **kwargs,
212
- ) -> Iterator[Generator[tempfile.NamedTemporaryFile, None, None]]:
219
+ ) -> Iterator[Generator[tempfile._TemporaryFileWrapper | str]]:
213
220
  """
214
- Build resized images according to the pixel and byte limits.
221
+ Build resized images according to pixel and byte limits.
215
222
 
216
223
  :param *args: Positional arguments passed to [arkindex_worker.models.Element.open_image_tempfile][].
217
224
  :param element: Element whose image needs to be resized.
218
- :param max_pixels: Maximum pixel size of the resized images.
225
+ :param max_pixels_short: Maximum pixel size of the resized images' short side.
226
+ :param max_pixels_long: Maximum pixel size of the resized images' long side.
219
227
  :param max_bytes: Maximum byte size of the resized images.
228
+ :param use_base64: Whether or not to encode resized images in base64 before calculating their size.
220
229
  :param **kwargs: Keyword arguments passed to [arkindex_worker.models.Element.open_image_tempfile][].
221
- :returns: An iterator of the temporary file of the resized image.
230
+ :returns: An iterator of temporary files for resized images OR an iterator of base64-encoded strings if `use_base64` is set.
222
231
  """
223
232
  _, _, element_width, element_height = polygon_bounding_box(element.polygon)
233
+ logger.info(
234
+ f"This element's image dimensions are ({element_width} x {element_height})."
235
+ )
236
+
237
+ portrait_format = element_width <= element_height
238
+ max_pixels_width, max_pixels_height = (
239
+ (max_pixels_short, max_pixels_long)
240
+ if portrait_format
241
+ else (max_pixels_long, max_pixels_short)
242
+ )
224
243
 
225
- logger.info(f"This element's image sizes are ({element_width} x {element_height}).")
226
- if max_pixels and max(element_width, element_height) > max_pixels:
244
+ # The image dimension is already within the pixel limitation, no need to resize the image
245
+ if max_pixels_width and max_pixels_width >= element_width:
246
+ max_pixels_width = None
247
+ if max_pixels_height and max_pixels_height >= element_height:
248
+ max_pixels_height = None
249
+
250
+ if (max_pixels_width and element_width > max_pixels_width) or (
251
+ max_pixels_height and element_height > max_pixels_height
252
+ ):
227
253
  logger.warning(
228
- f"Maximum image input size supported is ({max_pixels} x {max_pixels})."
254
+ f"Maximum image dimensions supported are ({max_pixels_width or element_width} x {max_pixels_height or element_height})."
229
255
  )
230
256
  logger.warning("The image will be resized.")
231
257
 
232
- element_pixel, param = (
233
- (element_width, "max_width")
234
- if element_width > element_height
235
- else (element_height, "max_height")
236
- )
258
+ # No limitations provided, we keep the image initial dimensions
259
+ if max_pixels_width is None and max_pixels_height is None:
260
+ open_image_param, max_value = (
261
+ ("max_height", element_height)
262
+ if portrait_format
263
+ else ("max_width", element_width)
264
+ )
265
+ # A limitation is only given for the height, we resize it
266
+ elif max_pixels_width is None:
267
+ open_image_param, max_value = ("max_height", max_pixels_height)
268
+ # A limitation is only given for the width, we resize it
269
+ elif max_pixels_height is None:
270
+ open_image_param, max_value = ("max_width", max_pixels_width)
271
+ # Limitations are provided for both sides:
272
+ # - we resize only the one with the biggest scale factor
273
+ # - the remaining one will automatically fall within the other limitation
274
+ else:
275
+ width_rescaling_factor = element_width / max_pixels_width
276
+ height_rescaling_factor = element_height / max_pixels_height
277
+ open_image_param, max_value = (
278
+ ("max_height", max_pixels_height)
279
+ if height_rescaling_factor > width_rescaling_factor
280
+ else ("max_width", max_pixels_width)
281
+ )
237
282
 
238
- for resized_pixel in sorted(
239
- set(
240
- min(round(ratio * element_pixel), max_pixels or element_pixel)
241
- for ratio in IMAGE_RATIO
242
- ),
243
- reverse=True,
244
- ):
283
+ resized_pixels = set(
284
+ min(round(ratio * max_value), max_value) for ratio in IMAGE_RATIOS
285
+ )
286
+ for resized_pixel in sorted(resized_pixels, reverse=True):
245
287
  with element.open_image_tempfile(
246
- *args, **{**kwargs, param: resized_pixel}
288
+ *args, **{**kwargs, open_image_param: resized_pixel}
247
289
  ) as image:
248
290
  pillow_image = Image.open(image)
249
291
  if (
@@ -254,8 +296,12 @@ def resized_images(
254
296
  f"The image was resized to ({pillow_image.width} x {pillow_image.height})."
255
297
  )
256
298
 
257
- # The image is still too large
258
299
  image_size = Path(image.name).stat().st_size
300
+ if use_base64:
301
+ image = base64.b64encode(Path(image.name).read_bytes()).decode("utf-8")
302
+ image_size = len(image)
303
+
304
+ # The image is still too heavy
259
305
  if max_bytes and image_size > max_bytes:
260
306
  logger.warning(f"The image size is {humanize.naturalsize(image_size)}.")
261
307
  logger.warning(
@@ -283,20 +329,26 @@ def polygon_bounding_box(polygon: list[list[int | float]]) -> BoundingBox:
283
329
  def _retry_log(retry_state, *args, **kwargs):
284
330
  logger.warning(
285
331
  f"Request to {retry_state.args[0]} failed ({repr(retry_state.outcome.exception())}), "
286
- f'retrying in {retry_state.idle_for} {pluralize("second", retry_state.idle_for)}'
332
+ f"retrying in {retry_state.idle_for} {pluralize('second', retry_state.idle_for)}"
287
333
  )
288
334
 
289
335
 
290
336
  @retry(
291
337
  stop=stop_after_attempt(3),
292
- wait=wait_exponential(multiplier=2),
338
+ # In the event of `requests.RequestException` errors, the call will be retried after 5 seconds, 10 seconds and finally 90 seconds before failing.
339
+ wait=wait_chain(wait_fixed(5), wait_fixed(10), wait_fixed(90)),
293
340
  retry=retry_if_exception_type(requests.RequestException),
294
341
  before_sleep=_retry_log,
295
342
  reraise=True,
296
343
  )
297
344
  def _retried_request(url, *args, method=requests.get, **kwargs):
298
345
  resp = method(
299
- url, *args, timeout=DOWNLOAD_TIMEOUT, verify=should_verify_cert(url), **kwargs
346
+ url,
347
+ *args,
348
+ headers={"User-Agent": IIIF_USER_AGENT},
349
+ timeout=DOWNLOAD_TIMEOUT,
350
+ verify=should_verify_cert(url),
351
+ **kwargs,
300
352
  )
301
353
  resp.raise_for_status()
302
354
  return resp
@@ -316,9 +368,9 @@ def download_tiles(url: str) -> Image:
316
368
 
317
369
  image_width, image_height = info.get("width"), info.get("height")
318
370
  assert image_width and image_height, "Missing image dimensions in info.json"
319
- assert info.get(
320
- "tiles"
321
- ), "Image cannot be retrieved at full size and tiles are not supported"
371
+ assert info.get("tiles"), (
372
+ "Image cannot be retrieved at full size and tiles are not supported"
373
+ )
322
374
 
323
375
  # Take the biggest available tile size
324
376
  tile = sorted(info["tiles"], key=lambda tile: tile.get("width", 0), reverse=True)[0]
@@ -385,22 +437,20 @@ def trim_polygon(
385
437
  :param image_width: Width of the image.
386
438
  :param image_height: Height of the image.
387
439
  :returns: A polygon trimmed to the image's bounds.
388
- Some points may appear as missing, as the trimming can deduplicate points.
389
- The first and last point are always equal, to reproduce the behavior
390
- of the Arkindex backend.
391
440
  :raises AssertionError: When argument types are invalid or when the trimmed polygon
392
441
  is entirely outside of the image's bounds.
393
442
  """
394
443
 
395
- assert isinstance(
396
- polygon, list | tuple
397
- ), "Input polygon must be a valid list or tuple of points."
398
- assert all(
399
- isinstance(point, list | tuple) for point in polygon
400
- ), "Polygon points must be tuples or lists."
401
- assert all(
402
- len(point) == 2 for point in polygon
403
- ), "Polygon points must be tuples or lists of 2 elements."
444
+ assert isinstance(polygon, list | tuple), (
445
+ "Polygon must be a valid list or tuple of points."
446
+ )
447
+ assert len(polygon) >= 3, "Polygon should have at least three points."
448
+ assert all(isinstance(point, list | tuple) for point in polygon), (
449
+ "Polygon points must be tuples or lists."
450
+ )
451
+ assert all(len(point) == 2 for point in polygon), (
452
+ "Polygon points must be tuples or lists of 2 elements."
453
+ )
404
454
  assert all(
405
455
  isinstance(point[0], int) and isinstance(point[1], int) for point in polygon
406
456
  ), "Polygon point coordinates must be integers."
@@ -408,7 +458,7 @@ def trim_polygon(
408
458
  point[0] <= image_width and point[1] <= image_height for point in polygon
409
459
  ), "This polygon is entirely outside the image's bounds."
410
460
 
411
- trimmed_polygon = [
461
+ return [
412
462
  [
413
463
  min(image_width, max(0, x)),
414
464
  min(image_height, max(0, y)),
@@ -416,17 +466,6 @@ def trim_polygon(
416
466
  for x, y in polygon
417
467
  ]
418
468
 
419
- updated_polygon = []
420
- for point in trimmed_polygon:
421
- if point not in updated_polygon:
422
- updated_polygon.append(point)
423
-
424
- # Add back the matching last point, if it was present in the original polygon
425
- if polygon[-1] == polygon[0]:
426
- updated_polygon.append(updated_polygon[0])
427
-
428
- return updated_polygon
429
-
430
469
 
431
470
  def revert_orientation(
432
471
  element: "Element | CachedElement",
@@ -451,22 +490,22 @@ def revert_orientation(
451
490
  from arkindex_worker.cache import CachedElement
452
491
  from arkindex_worker.models import Element
453
492
 
454
- assert element and isinstance(
455
- element, Element | CachedElement
456
- ), "element shouldn't be null and should be an Element or CachedElement"
457
- assert polygon and isinstance(
458
- polygon, list
459
- ), "polygon shouldn't be null and should be a list"
460
- assert isinstance(reverse, bool), "Reverse should be a bool"
493
+ assert element and isinstance(element, Element | CachedElement), (
494
+ "element shouldn't be null and should be an Element or CachedElement"
495
+ )
496
+ assert polygon and isinstance(polygon, list), (
497
+ "polygon shouldn't be null and should be a list"
498
+ )
499
+ assert isinstance(reverse, bool), "reverse should be a bool"
461
500
  # Rotating with Pillow can cause it to move the image around, as the image cannot have negative coordinates
462
501
  # and must be a rectangle. This means the origin point of any coordinates from an image is invalid, and the
463
502
  # center of the bounding box of the rotated image is different from the center of the element's bounding box.
464
503
  # To properly undo the mirroring and rotation implicitly applied by open_image, we first need to find the center
465
504
  # of the rotated bounding box.
466
505
  if isinstance(element, Element):
467
- assert (
468
- element.zone and element.zone.polygon
469
- ), "element should have a zone and a polygon"
506
+ assert element.zone and element.zone.polygon, (
507
+ "element should have a zone and a polygon"
508
+ )
470
509
  parent_ring = LinearRing(element.zone.polygon)
471
510
  elif isinstance(element, CachedElement):
472
511
  assert element.polygon, "cached element should have a polygon"
@@ -243,11 +243,12 @@ def batch_publication(func: Callable) -> Callable:
243
243
  bound_func.apply_defaults()
244
244
  batch_size = bound_func.arguments.get("batch_size")
245
245
  assert (
246
- batch_size and isinstance(batch_size, int) and batch_size > 0
246
+ batch_size is not None and isinstance(batch_size, int) and batch_size > 0
247
247
  ), "batch_size shouldn't be null and should be a strictly positive integer"
248
248
 
249
249
  return func(self, *args, **kwargs)
250
250
 
251
+ wrapper.__name__ = func.__name__
251
252
  return wrapper
252
253
 
253
254