arkindex-base-worker 0.4.0__tar.gz → 0.4.0a1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/PKG-INFO +10 -12
  2. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_base_worker.egg-info/PKG-INFO +10 -12
  3. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_base_worker.egg-info/SOURCES.txt +4 -14
  4. arkindex_base_worker-0.4.0a1/arkindex_base_worker.egg-info/requires.txt +16 -0
  5. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/cache.py +1 -1
  6. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/image.py +1 -120
  7. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/utils.py +0 -82
  8. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/__init__.py +161 -46
  9. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/base.py +11 -36
  10. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/classification.py +18 -34
  11. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/corpus.py +4 -21
  12. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/dataset.py +1 -71
  13. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/element.py +91 -352
  14. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/entity.py +11 -11
  15. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/metadata.py +9 -19
  16. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/task.py +4 -5
  17. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/training.py +18 -21
  18. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/transcription.py +68 -89
  19. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/version.py +1 -3
  20. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/pyproject.toml +11 -12
  21. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/__init__.py +1 -1
  22. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/conftest.py +45 -33
  23. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_base_worker.py +3 -204
  24. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_dataset_worker.py +4 -7
  25. arkindex_base_worker-0.4.0/tests/test_elements_worker/test_classification.py → arkindex_base_worker-0.4.0a1/tests/test_elements_worker/test_classifications.py +61 -194
  26. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_elements_worker/test_corpus.py +1 -32
  27. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_elements_worker/test_dataset.py +1 -1
  28. arkindex_base_worker-0.4.0a1/tests/test_elements_worker/test_elements.py +2734 -0
  29. arkindex_base_worker-0.4.0/tests/test_elements_worker/test_entity_create.py → arkindex_base_worker-0.4.0a1/tests/test_elements_worker/test_entities.py +160 -26
  30. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_elements_worker/test_image.py +1 -2
  31. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_elements_worker/test_metadata.py +99 -224
  32. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_elements_worker/test_task.py +1 -1
  33. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_elements_worker/test_training.py +43 -17
  34. arkindex_base_worker-0.4.0a1/tests/test_elements_worker/test_transcriptions.py +2102 -0
  35. arkindex_base_worker-0.4.0a1/tests/test_elements_worker/test_worker.py +514 -0
  36. arkindex_base_worker-0.4.0a1/tests/test_image.py +584 -0
  37. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_merge.py +2 -1
  38. arkindex_base_worker-0.4.0a1/tests/test_utils.py +57 -0
  39. arkindex_base_worker-0.4.0/arkindex_base_worker.egg-info/requires.txt +0 -17
  40. arkindex_base_worker-0.4.0/arkindex_worker/worker/process.py +0 -92
  41. arkindex_base_worker-0.4.0/tests/test_elements_worker/test_element.py +0 -427
  42. arkindex_base_worker-0.4.0/tests/test_elements_worker/test_element_create_multiple.py +0 -715
  43. arkindex_base_worker-0.4.0/tests/test_elements_worker/test_element_create_single.py +0 -528
  44. arkindex_base_worker-0.4.0/tests/test_elements_worker/test_element_list_children.py +0 -969
  45. arkindex_base_worker-0.4.0/tests/test_elements_worker/test_element_list_parents.py +0 -530
  46. arkindex_base_worker-0.4.0/tests/test_elements_worker/test_entity_list_and_check.py +0 -160
  47. arkindex_base_worker-0.4.0/tests/test_elements_worker/test_process.py +0 -89
  48. arkindex_base_worker-0.4.0/tests/test_elements_worker/test_transcription_create.py +0 -873
  49. arkindex_base_worker-0.4.0/tests/test_elements_worker/test_transcription_create_with_elements.py +0 -951
  50. arkindex_base_worker-0.4.0/tests/test_elements_worker/test_transcription_list.py +0 -450
  51. arkindex_base_worker-0.4.0/tests/test_elements_worker/test_version.py +0 -60
  52. arkindex_base_worker-0.4.0/tests/test_elements_worker/test_worker.py +0 -797
  53. arkindex_base_worker-0.4.0/tests/test_image.py +0 -809
  54. arkindex_base_worker-0.4.0/tests/test_utils.py +0 -120
  55. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/LICENSE +0 -0
  56. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/README.md +0 -0
  57. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_base_worker.egg-info/dependency_links.txt +0 -0
  58. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_base_worker.egg-info/top_level.txt +0 -0
  59. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/__init__.py +0 -0
  60. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/models.py +0 -0
  61. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/arkindex_worker/worker/image.py +0 -0
  62. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/hooks/pre_gen_project.py +0 -0
  63. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/setup.cfg +0 -0
  64. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_cache.py +0 -0
  65. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_element.py +0 -0
  66. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_elements_worker/__init__.py +0 -0
  67. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/tests/test_elements_worker/test_cli.py +0 -0
  68. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/worker-demo/tests/__init__.py +0 -0
  69. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/worker-demo/tests/conftest.py +0 -0
  70. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/worker-demo/tests/test_worker.py +0 -0
  71. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/worker-demo/worker_demo/__init__.py +0 -0
  72. {arkindex_base_worker-0.4.0 → arkindex_base_worker-0.4.0a1}/worker-demo/worker_demo/worker.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: arkindex-base-worker
3
- Version: 0.4.0
3
+ Version: 0.4.0a1
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -37,23 +37,21 @@ Classifier: License :: OSI Approved :: MIT License
37
37
  Classifier: Programming Language :: Python :: 3 :: Only
38
38
  Classifier: Programming Language :: Python :: 3.10
39
39
  Classifier: Programming Language :: Python :: 3.11
40
- Classifier: Programming Language :: Python :: 3.12
41
40
  Requires-Python: >=3.10
42
41
  Description-Content-Type: text/markdown
43
42
  License-File: LICENSE
44
- Requires-Dist: humanize==4.11.0
45
43
  Requires-Dist: peewee~=3.17
46
- Requires-Dist: Pillow==11.0.0
47
- Requires-Dist: python-gnupg==0.5.3
48
- Requires-Dist: shapely==2.0.6
49
- Requires-Dist: teklia-toolbox==0.1.7
50
- Requires-Dist: zstandard==0.23.0
44
+ Requires-Dist: Pillow==10.3.0
45
+ Requires-Dist: python-gnupg==0.5.2
46
+ Requires-Dist: shapely==2.0.3
47
+ Requires-Dist: teklia-toolbox==0.1.5
48
+ Requires-Dist: zstandard==0.22.0
51
49
  Provides-Extra: docs
52
- Requires-Dist: black==24.10.0; extra == "docs"
53
- Requires-Dist: mkdocs-material==9.5.48; extra == "docs"
54
- Requires-Dist: mkdocstrings-python==1.12.2; extra == "docs"
50
+ Requires-Dist: black==24.4.0; extra == "docs"
51
+ Requires-Dist: mkdocs-material==9.5.17; extra == "docs"
52
+ Requires-Dist: mkdocstrings-python==1.9.2; extra == "docs"
55
53
  Provides-Extra: tests
56
- Requires-Dist: pytest==8.3.4; extra == "tests"
54
+ Requires-Dist: pytest==8.1.1; extra == "tests"
57
55
  Requires-Dist: pytest-mock==3.14.0; extra == "tests"
58
56
  Requires-Dist: pytest-responses==0.5.1; extra == "tests"
59
57
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: arkindex-base-worker
3
- Version: 0.4.0
3
+ Version: 0.4.0a1
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -37,23 +37,21 @@ Classifier: License :: OSI Approved :: MIT License
37
37
  Classifier: Programming Language :: Python :: 3 :: Only
38
38
  Classifier: Programming Language :: Python :: 3.10
39
39
  Classifier: Programming Language :: Python :: 3.11
40
- Classifier: Programming Language :: Python :: 3.12
41
40
  Requires-Python: >=3.10
42
41
  Description-Content-Type: text/markdown
43
42
  License-File: LICENSE
44
- Requires-Dist: humanize==4.11.0
45
43
  Requires-Dist: peewee~=3.17
46
- Requires-Dist: Pillow==11.0.0
47
- Requires-Dist: python-gnupg==0.5.3
48
- Requires-Dist: shapely==2.0.6
49
- Requires-Dist: teklia-toolbox==0.1.7
50
- Requires-Dist: zstandard==0.23.0
44
+ Requires-Dist: Pillow==10.3.0
45
+ Requires-Dist: python-gnupg==0.5.2
46
+ Requires-Dist: shapely==2.0.3
47
+ Requires-Dist: teklia-toolbox==0.1.5
48
+ Requires-Dist: zstandard==0.22.0
51
49
  Provides-Extra: docs
52
- Requires-Dist: black==24.10.0; extra == "docs"
53
- Requires-Dist: mkdocs-material==9.5.48; extra == "docs"
54
- Requires-Dist: mkdocstrings-python==1.12.2; extra == "docs"
50
+ Requires-Dist: black==24.4.0; extra == "docs"
51
+ Requires-Dist: mkdocs-material==9.5.17; extra == "docs"
52
+ Requires-Dist: mkdocstrings-python==1.9.2; extra == "docs"
55
53
  Provides-Extra: tests
56
- Requires-Dist: pytest==8.3.4; extra == "tests"
54
+ Requires-Dist: pytest==8.1.1; extra == "tests"
57
55
  Requires-Dist: pytest-mock==3.14.0; extra == "tests"
58
56
  Requires-Dist: pytest-responses==0.5.1; extra == "tests"
59
57
 
@@ -20,7 +20,6 @@ arkindex_worker/worker/element.py
20
20
  arkindex_worker/worker/entity.py
21
21
  arkindex_worker/worker/image.py
22
22
  arkindex_worker/worker/metadata.py
23
- arkindex_worker/worker/process.py
24
23
  arkindex_worker/worker/task.py
25
24
  arkindex_worker/worker/training.py
26
25
  arkindex_worker/worker/transcription.py
@@ -36,26 +35,17 @@ tests/test_image.py
36
35
  tests/test_merge.py
37
36
  tests/test_utils.py
38
37
  tests/test_elements_worker/__init__.py
39
- tests/test_elements_worker/test_classification.py
38
+ tests/test_elements_worker/test_classifications.py
40
39
  tests/test_elements_worker/test_cli.py
41
40
  tests/test_elements_worker/test_corpus.py
42
41
  tests/test_elements_worker/test_dataset.py
43
- tests/test_elements_worker/test_element.py
44
- tests/test_elements_worker/test_element_create_multiple.py
45
- tests/test_elements_worker/test_element_create_single.py
46
- tests/test_elements_worker/test_element_list_children.py
47
- tests/test_elements_worker/test_element_list_parents.py
48
- tests/test_elements_worker/test_entity_create.py
49
- tests/test_elements_worker/test_entity_list_and_check.py
42
+ tests/test_elements_worker/test_elements.py
43
+ tests/test_elements_worker/test_entities.py
50
44
  tests/test_elements_worker/test_image.py
51
45
  tests/test_elements_worker/test_metadata.py
52
- tests/test_elements_worker/test_process.py
53
46
  tests/test_elements_worker/test_task.py
54
47
  tests/test_elements_worker/test_training.py
55
- tests/test_elements_worker/test_transcription_create.py
56
- tests/test_elements_worker/test_transcription_create_with_elements.py
57
- tests/test_elements_worker/test_transcription_list.py
58
- tests/test_elements_worker/test_version.py
48
+ tests/test_elements_worker/test_transcriptions.py
59
49
  tests/test_elements_worker/test_worker.py
60
50
  worker-demo/tests/__init__.py
61
51
  worker-demo/tests/conftest.py
@@ -0,0 +1,16 @@
1
+ peewee~=3.17
2
+ Pillow==10.3.0
3
+ python-gnupg==0.5.2
4
+ shapely==2.0.3
5
+ teklia-toolbox==0.1.5
6
+ zstandard==0.22.0
7
+
8
+ [docs]
9
+ black==24.4.0
10
+ mkdocs-material==9.5.17
11
+ mkdocstrings-python==1.9.2
12
+
13
+ [tests]
14
+ pytest==8.1.1
15
+ pytest-mock==3.14.0
16
+ pytest-responses==0.5.1
@@ -380,7 +380,7 @@ def unsupported_cache(func):
380
380
  def wrapper(self, *args, **kwargs):
381
381
  results = func(self, *args, **kwargs)
382
382
 
383
- if self.use_cache:
383
+ if not (self.is_read_only or self.use_cache):
384
384
  logger.warning(
385
385
  f"This API helper `{func.__name__}` did not update the cache database"
386
386
  )
@@ -2,18 +2,13 @@
2
2
  Helper methods to download and open IIIF images, and manage polygons.
3
3
  """
4
4
 
5
- import functools
6
- import os
7
5
  import re
8
- import tempfile
9
6
  from collections import namedtuple
10
- from collections.abc import Generator, Iterator
11
7
  from io import BytesIO
12
8
  from math import ceil
13
9
  from pathlib import Path
14
10
  from typing import TYPE_CHECKING
15
11
 
16
- import humanize
17
12
  import requests
18
13
  from PIL import Image
19
14
  from shapely.affinity import rotate, scale, translate
@@ -26,7 +21,6 @@ from tenacity import (
26
21
  )
27
22
 
28
23
  from arkindex_worker import logger
29
- from arkindex_worker.utils import pluralize
30
24
  from teklia_toolbox.requests import should_verify_cert
31
25
 
32
26
  # Avoid circular imports error when type checking
@@ -45,57 +39,8 @@ IIIF_URL = re.compile(r"\w+:\/{2}.+\/.+\/.+\/.+\/(?P<size>.+)\/!?\d+\/\w+\.\w+")
45
39
  IIIF_FULL = "full"
46
40
  # Maximum size available
47
41
  IIIF_MAX = "max"
48
- # Ratio to resize image
49
- IMAGE_RATIO = [1, 0.9, 0.85, 0.80, 0.75, 0.70, 0.60, 0.50, 0.40, 0.30]
50
42
 
51
43
 
52
- def update_pillow_image_size_limit(func):
53
- """
54
- Update Pillow Image size limit
55
- """
56
-
57
- @functools.wraps(func)
58
- def wrapper(
59
- *args,
60
- max_image_pixels: str | int | None = os.getenv("ARKINDEX_MAX_IMAGE_PIXELS"),
61
- **kwargs,
62
- ):
63
- """
64
- Wrapper to update Pillow Image size limit and restore it at the end of the function.
65
-
66
- :param *args: Positional arguments passed to the function.
67
- :param max_image_pixels: Pillow Image size limit to use.
68
- :param **kwargs: Keyword arguments passed to the function.
69
- """
70
- MAX_IMAGE_PIXELS = Image.MAX_IMAGE_PIXELS
71
-
72
- # Override Pillow Image size limit
73
- if max_image_pixels is not None:
74
- max_image_pixels = int(max_image_pixels)
75
- # Override Pillow limit for detecting decompression bombs, disabled if set to 0
76
- if max_image_pixels == 0:
77
- logger.warning(
78
- "Pillow Image size limit is completely disabled, make sure you trust the image source."
79
- )
80
- Image.MAX_IMAGE_PIXELS = None
81
- else:
82
- Image.MAX_IMAGE_PIXELS = max_image_pixels
83
-
84
- try:
85
- results = func(*args, **kwargs)
86
- except:
87
- # Restore initial Pillow Image size limit
88
- Image.MAX_IMAGE_PIXELS = MAX_IMAGE_PIXELS
89
- raise
90
-
91
- # Restore initial Pillow Image size limit
92
- Image.MAX_IMAGE_PIXELS = MAX_IMAGE_PIXELS
93
- return results
94
-
95
- return wrapper
96
-
97
-
98
- @update_pillow_image_size_limit
99
44
  def open_image(
100
45
  path: str,
101
46
  mode: str | None = "RGB",
@@ -203,70 +148,6 @@ def upload_image(image: Image, url: str) -> requests.Response:
203
148
  return resp
204
149
 
205
150
 
206
- def resized_images(
207
- *args,
208
- element: "Element",
209
- max_pixels: int | None = None,
210
- max_bytes: int | None = None,
211
- **kwargs,
212
- ) -> Iterator[Generator[tempfile.NamedTemporaryFile, None, None]]:
213
- """
214
- Build resized images according to the pixel and byte limits.
215
-
216
- :param *args: Positional arguments passed to [arkindex_worker.models.Element.open_image_tempfile][].
217
- :param element: Element whose image needs to be resized.
218
- :param max_pixels: Maximum pixel size of the resized images.
219
- :param max_bytes: Maximum byte size of the resized images.
220
- :param **kwargs: Keyword arguments passed to [arkindex_worker.models.Element.open_image_tempfile][].
221
- :returns: An iterator of the temporary file of the resized image.
222
- """
223
- _, _, element_width, element_height = polygon_bounding_box(element.polygon)
224
-
225
- logger.info(f"This element's image sizes are ({element_width} x {element_height}).")
226
- if max_pixels and max(element_width, element_height) > max_pixels:
227
- logger.warning(
228
- f"Maximum image input size supported is ({max_pixels} x {max_pixels})."
229
- )
230
- logger.warning("The image will be resized.")
231
-
232
- element_pixel, param = (
233
- (element_width, "max_width")
234
- if element_width > element_height
235
- else (element_height, "max_height")
236
- )
237
-
238
- for resized_pixel in sorted(
239
- set(
240
- min(round(ratio * element_pixel), max_pixels or element_pixel)
241
- for ratio in IMAGE_RATIO
242
- ),
243
- reverse=True,
244
- ):
245
- with element.open_image_tempfile(
246
- *args, **{**kwargs, param: resized_pixel}
247
- ) as image:
248
- pillow_image = Image.open(image)
249
- if (
250
- pillow_image.width != element_width
251
- or pillow_image.height != element_height
252
- ):
253
- logger.warning(
254
- f"The image was resized to ({pillow_image.width} x {pillow_image.height})."
255
- )
256
-
257
- # The image is still too large
258
- image_size = Path(image.name).stat().st_size
259
- if max_bytes and image_size > max_bytes:
260
- logger.warning(f"The image size is {humanize.naturalsize(image_size)}.")
261
- logger.warning(
262
- f"Maximum image input size supported is {humanize.naturalsize(max_bytes)}."
263
- )
264
- logger.warning("The image will be resized.")
265
- continue
266
-
267
- yield image
268
-
269
-
270
151
  def polygon_bounding_box(polygon: list[list[int | float]]) -> BoundingBox:
271
152
  """
272
153
  Compute the rectangle bounding box of a polygon.
@@ -283,7 +164,7 @@ def polygon_bounding_box(polygon: list[list[int | float]]) -> BoundingBox:
283
164
  def _retry_log(retry_state, *args, **kwargs):
284
165
  logger.warning(
285
166
  f"Request to {retry_state.args[0]} failed ({repr(retry_state.outcome.exception())}), "
286
- f'retrying in {retry_state.idle_for} {pluralize("second", retry_state.idle_for)}'
167
+ f"retrying in {retry_state.idle_for} seconds"
287
168
  )
288
169
 
289
170
 
@@ -1,41 +1,14 @@
1
1
  import hashlib
2
- import inspect
3
2
  import logging
4
3
  import os
5
4
  import tarfile
6
5
  import tempfile
7
- from collections.abc import Callable, Generator
8
- from itertools import islice
9
6
  from pathlib import Path
10
- from typing import Any
11
7
 
12
8
  import zstandard as zstd
13
9
 
14
10
  logger = logging.getLogger(__name__)
15
11
 
16
-
17
- def pluralize(singular: str, count: int) -> str:
18
- """Pluralize a noun, if necessary, using simplified rules of English pluralization and a list of exceptions.
19
-
20
- :param str singular: A singular noun describing an object
21
- :param int count: The object count, to determine whether to pluralize or not
22
- :return str: The noun in its singular or plural form
23
- """
24
- if count == 1:
25
- return singular
26
-
27
- some_exceptions = {
28
- "child": "children",
29
- "class": "classes",
30
- "entity": "entities",
31
- "metadata": "metadata",
32
- }
33
- if singular in some_exceptions:
34
- return some_exceptions[singular]
35
-
36
- return singular + "s"
37
-
38
-
39
12
  MANUAL_SOURCE = "manual"
40
13
 
41
14
 
@@ -223,58 +196,3 @@ def create_tar_zst_archive(
223
196
  close_delete_file(tar_fd, tar_archive)
224
197
 
225
198
  return zst_fd, zst_archive, zst_hash, tar_hash
226
-
227
-
228
- DEFAULT_BATCH_SIZE = 50
229
- """Batch size used for bulk publication to Arkindex"""
230
-
231
-
232
- def batch_publication(func: Callable) -> Callable:
233
- """
234
- Decorator for functions that should raise an error when the value passed through the ``batch_size`` parameter is **not** a strictly positive integer.
235
-
236
- :param func: The function to wrap with the ``batch_size`` check
237
- :return: The function passing the ``batch_size`` check
238
- """
239
- signature = inspect.signature(func)
240
-
241
- def wrapper(self, *args, **kwargs):
242
- bound_func = signature.bind(self, *args, **kwargs)
243
- bound_func.apply_defaults()
244
- batch_size = bound_func.arguments.get("batch_size")
245
- assert (
246
- batch_size is not None and isinstance(batch_size, int) and batch_size > 0
247
- ), "batch_size shouldn't be null and should be a strictly positive integer"
248
-
249
- return func(self, *args, **kwargs)
250
-
251
- wrapper.__name__ = func.__name__
252
- return wrapper
253
-
254
-
255
- def make_batches(
256
- objects: list, singular_name: str, batch_size: int
257
- ) -> Generator[list[Any]]:
258
- """Split an object list in successive batches of maximum size ``batch_size``.
259
-
260
- :param objects: The object list to divide in batches of ``batch_size`` size
261
- :param singular_name: The singular form of the noun associated with the object list
262
- :param batch_size: The maximum size of each batch to split the object list
263
- :return: A generator of successive batches containing ``batch_size`` items from ``objects``
264
- """
265
- count = len(objects)
266
- logger.info(
267
- f"Creating batches of size {batch_size} to process {count} {pluralize(singular_name, count)}"
268
- )
269
-
270
- index = 1
271
- iterator = iter(objects)
272
- while batch := list(islice(iterator, batch_size)):
273
- count = len(batch)
274
- logger.info(
275
- f"Processing batch {index} containing {count} {pluralize(singular_name, count)}..."
276
- )
277
-
278
- yield batch
279
-
280
- index += 1