arkindex-base-worker 0.4.0b3__tar.gz → 0.4.0rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/PKG-INFO +3 -2
  2. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/arkindex_base_worker.egg-info/PKG-INFO +3 -2
  3. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/arkindex_base_worker.egg-info/requires.txt +2 -1
  4. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/arkindex_worker/image.py +118 -0
  5. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/arkindex_worker/worker/__init__.py +4 -50
  6. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/arkindex_worker/worker/base.py +24 -1
  7. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/arkindex_worker/worker/element.py +243 -75
  8. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/arkindex_worker/worker/transcription.py +50 -50
  9. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/pyproject.toml +3 -2
  10. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/tests/conftest.py +2 -21
  11. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/tests/test_base_worker.py +203 -2
  12. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/tests/test_elements_worker/test_elements.py +443 -16
  13. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/tests/test_elements_worker/test_worker.py +0 -200
  14. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/tests/test_image.py +248 -6
  15. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/tests/test_merge.py +0 -1
  16. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/tests/test_utils.py +2 -4
  17. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/LICENSE +0 -0
  18. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/README.md +0 -0
  19. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/arkindex_base_worker.egg-info/SOURCES.txt +0 -0
  20. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/arkindex_base_worker.egg-info/dependency_links.txt +0 -0
  21. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/arkindex_base_worker.egg-info/top_level.txt +0 -0
  22. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/arkindex_worker/__init__.py +0 -0
  23. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/arkindex_worker/cache.py +0 -0
  24. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/arkindex_worker/models.py +0 -0
  25. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/arkindex_worker/utils.py +0 -0
  26. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/arkindex_worker/worker/classification.py +0 -0
  27. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/arkindex_worker/worker/corpus.py +0 -0
  28. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/arkindex_worker/worker/dataset.py +0 -0
  29. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/arkindex_worker/worker/entity.py +0 -0
  30. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/arkindex_worker/worker/image.py +0 -0
  31. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/arkindex_worker/worker/metadata.py +0 -0
  32. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/arkindex_worker/worker/task.py +0 -0
  33. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/arkindex_worker/worker/training.py +0 -0
  34. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/arkindex_worker/worker/version.py +0 -0
  35. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/hooks/pre_gen_project.py +0 -0
  36. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/setup.cfg +0 -0
  37. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/tests/__init__.py +0 -0
  38. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/tests/test_cache.py +0 -0
  39. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/tests/test_dataset_worker.py +0 -0
  40. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/tests/test_element.py +0 -0
  41. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/tests/test_elements_worker/__init__.py +0 -0
  42. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/tests/test_elements_worker/test_classifications.py +0 -0
  43. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/tests/test_elements_worker/test_cli.py +0 -0
  44. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/tests/test_elements_worker/test_corpus.py +0 -0
  45. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/tests/test_elements_worker/test_dataset.py +0 -0
  46. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/tests/test_elements_worker/test_entities.py +0 -0
  47. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/tests/test_elements_worker/test_image.py +0 -0
  48. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/tests/test_elements_worker/test_metadata.py +0 -0
  49. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/tests/test_elements_worker/test_task.py +0 -0
  50. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/tests/test_elements_worker/test_training.py +0 -0
  51. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/tests/test_elements_worker/test_transcriptions.py +0 -0
  52. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/worker-demo/tests/__init__.py +0 -0
  53. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/worker-demo/tests/conftest.py +0 -0
  54. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/worker-demo/tests/test_worker.py +0 -0
  55. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/worker-demo/worker_demo/__init__.py +0 -0
  56. {arkindex_base_worker-0.4.0b3 → arkindex_base_worker-0.4.0rc1}/worker-demo/worker_demo/worker.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: arkindex-base-worker
3
- Version: 0.4.0b3
3
+ Version: 0.4.0rc1
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -40,6 +40,7 @@ Classifier: Programming Language :: Python :: 3.11
40
40
  Requires-Python: >=3.10
41
41
  Description-Content-Type: text/markdown
42
42
  License-File: LICENSE
43
+ Requires-Dist: humanize==4.9.0
43
44
  Requires-Dist: peewee~=3.17
44
45
  Requires-Dist: Pillow==10.4.0
45
46
  Requires-Dist: python-gnupg==0.5.2
@@ -49,7 +50,7 @@ Requires-Dist: zstandard==0.22.0
49
50
  Provides-Extra: docs
50
51
  Requires-Dist: black==24.4.2; extra == "docs"
51
52
  Requires-Dist: mkdocs-material==9.5.31; extra == "docs"
52
- Requires-Dist: mkdocstrings-python==1.10.7; extra == "docs"
53
+ Requires-Dist: mkdocstrings-python==1.10.8; extra == "docs"
53
54
  Provides-Extra: tests
54
55
  Requires-Dist: pytest==8.3.2; extra == "tests"
55
56
  Requires-Dist: pytest-mock==3.14.0; extra == "tests"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: arkindex-base-worker
3
- Version: 0.4.0b3
3
+ Version: 0.4.0rc1
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -40,6 +40,7 @@ Classifier: Programming Language :: Python :: 3.11
40
40
  Requires-Python: >=3.10
41
41
  Description-Content-Type: text/markdown
42
42
  License-File: LICENSE
43
+ Requires-Dist: humanize==4.9.0
43
44
  Requires-Dist: peewee~=3.17
44
45
  Requires-Dist: Pillow==10.4.0
45
46
  Requires-Dist: python-gnupg==0.5.2
@@ -49,7 +50,7 @@ Requires-Dist: zstandard==0.22.0
49
50
  Provides-Extra: docs
50
51
  Requires-Dist: black==24.4.2; extra == "docs"
51
52
  Requires-Dist: mkdocs-material==9.5.31; extra == "docs"
52
- Requires-Dist: mkdocstrings-python==1.10.7; extra == "docs"
53
+ Requires-Dist: mkdocstrings-python==1.10.8; extra == "docs"
53
54
  Provides-Extra: tests
54
55
  Requires-Dist: pytest==8.3.2; extra == "tests"
55
56
  Requires-Dist: pytest-mock==3.14.0; extra == "tests"
@@ -1,3 +1,4 @@
1
+ humanize==4.9.0
1
2
  peewee~=3.17
2
3
  Pillow==10.4.0
3
4
  python-gnupg==0.5.2
@@ -8,7 +9,7 @@ zstandard==0.22.0
8
9
  [docs]
9
10
  black==24.4.2
10
11
  mkdocs-material==9.5.31
11
- mkdocstrings-python==1.10.7
12
+ mkdocstrings-python==1.10.8
12
13
 
13
14
  [tests]
14
15
  pytest==8.3.2
@@ -2,13 +2,18 @@
2
2
  Helper methods to download and open IIIF images, and manage polygons.
3
3
  """
4
4
 
5
+ import functools
6
+ import os
5
7
  import re
8
+ import tempfile
6
9
  from collections import namedtuple
10
+ from collections.abc import Generator, Iterator
7
11
  from io import BytesIO
8
12
  from math import ceil
9
13
  from pathlib import Path
10
14
  from typing import TYPE_CHECKING
11
15
 
16
+ import humanize
12
17
  import requests
13
18
  from PIL import Image
14
19
  from shapely.affinity import rotate, scale, translate
@@ -40,8 +45,57 @@ IIIF_URL = re.compile(r"\w+:\/{2}.+\/.+\/.+\/.+\/(?P<size>.+)\/!?\d+\/\w+\.\w+")
40
45
  IIIF_FULL = "full"
41
46
  # Maximum size available
42
47
  IIIF_MAX = "max"
48
+ # Ratio to resize image
49
+ IMAGE_RATIO = [1, 0.9, 0.85, 0.80, 0.75, 0.70, 0.60, 0.50, 0.40, 0.30]
43
50
 
44
51
 
52
+ def update_pillow_image_size_limit(func):
53
+ """
54
+ Update Pillow Image size limit
55
+ """
56
+
57
+ @functools.wraps(func)
58
+ def wrapper(
59
+ *args,
60
+ max_image_pixels: str | int | None = os.getenv("ARKINDEX_MAX_IMAGE_PIXELS"),
61
+ **kwargs,
62
+ ):
63
+ """
64
+ Wrapper to update Pillow Image size limit and restore it at the end of the function.
65
+
66
+ :param *args: Positional arguments passed to the function.
67
+ :param max_image_pixels: Pillow Image size limit to use.
68
+ :param **kwargs: Keyword arguments passed to the function.
69
+ """
70
+ MAX_IMAGE_PIXELS = Image.MAX_IMAGE_PIXELS
71
+
72
+ # Override Pillow Image size limit
73
+ if max_image_pixels is not None:
74
+ max_image_pixels = int(max_image_pixels)
75
+ # Override Pillow limit for detecting decompression bombs, disabled if set to 0
76
+ if max_image_pixels == 0:
77
+ logger.warning(
78
+ "Pillow Image size limit is completely disabled, make sure you trust the image source."
79
+ )
80
+ Image.MAX_IMAGE_PIXELS = None
81
+ else:
82
+ Image.MAX_IMAGE_PIXELS = max_image_pixels
83
+
84
+ try:
85
+ results = func(*args, **kwargs)
86
+ except:
87
+ # Restore initial Pillow Image size limit
88
+ Image.MAX_IMAGE_PIXELS = MAX_IMAGE_PIXELS
89
+ raise
90
+
91
+ # Restore initial Pillow Image size limit
92
+ Image.MAX_IMAGE_PIXELS = MAX_IMAGE_PIXELS
93
+ return results
94
+
95
+ return wrapper
96
+
97
+
98
+ @update_pillow_image_size_limit
45
99
  def open_image(
46
100
  path: str,
47
101
  mode: str | None = "RGB",
@@ -149,6 +203,70 @@ def upload_image(image: Image, url: str) -> requests.Response:
149
203
  return resp
150
204
 
151
205
 
206
+ def resized_images(
207
+ *args,
208
+ element: "Element",
209
+ max_pixels: int | None = None,
210
+ max_bytes: int | None = None,
211
+ **kwargs,
212
+ ) -> Iterator[Generator[tempfile.NamedTemporaryFile, None, None]]:
213
+ """
214
+ Build resized images according to the pixel and byte limits.
215
+
216
+ :param *args: Positional arguments passed to [arkindex_worker.models.Element.open_image_tempfile][].
217
+ :param element: Element whose image needs to be resized.
218
+ :param max_pixels: Maximum pixel size of the resized images.
219
+ :param max_bytes: Maximum byte size of the resized images.
220
+ :param **kwargs: Keyword arguments passed to [arkindex_worker.models.Element.open_image_tempfile][].
221
+ :returns: An iterator of the temporary file of the resized image.
222
+ """
223
+ _, _, element_width, element_height = polygon_bounding_box(element.polygon)
224
+
225
+ logger.info(f"This element's image sizes are ({element_width} x {element_height}).")
226
+ if max_pixels and max(element_width, element_height) > max_pixels:
227
+ logger.warning(
228
+ f"Maximum image input size supported is ({max_pixels} x {max_pixels})."
229
+ )
230
+ logger.warning("The image will be resized.")
231
+
232
+ element_pixel, param = (
233
+ (element_width, "max_width")
234
+ if element_width > element_height
235
+ else (element_height, "max_height")
236
+ )
237
+
238
+ for resized_pixel in sorted(
239
+ set(
240
+ min(round(ratio * element_pixel), max_pixels or element_pixel)
241
+ for ratio in IMAGE_RATIO
242
+ ),
243
+ reverse=True,
244
+ ):
245
+ with element.open_image_tempfile(
246
+ *args, **{**kwargs, param: resized_pixel}
247
+ ) as image:
248
+ pillow_image = Image.open(image)
249
+ if (
250
+ pillow_image.width != element_width
251
+ or pillow_image.height != element_height
252
+ ):
253
+ logger.warning(
254
+ f"The image was resized to ({pillow_image.width} x {pillow_image.height})."
255
+ )
256
+
257
+ # The image is still too large
258
+ image_size = Path(image.name).stat().st_size
259
+ if max_bytes and image_size > max_bytes:
260
+ logger.warning(f"The image size is {humanize.naturalsize(image_size)}.")
261
+ logger.warning(
262
+ f"Maximum image input size supported is {humanize.naturalsize(max_bytes)}."
263
+ )
264
+ logger.warning("The image will be resized.")
265
+ continue
266
+
267
+ yield image
268
+
269
+
152
270
  def polygon_bounding_box(polygon: list[list[int | float]]) -> BoundingBox:
153
271
  """
154
272
  Compute the rectangle bounding box of a polygon.
@@ -111,7 +111,7 @@ class ElementsWorker(
111
111
  help="One or more Arkindex element ID",
112
112
  )
113
113
 
114
- def list_elements(self) -> Iterable[CachedElement] | list[str]:
114
+ def get_elements(self) -> Iterable[CachedElement] | list[str]:
115
115
  """
116
116
  List the elements to be processed, either from the CLI arguments or
117
117
  the cache database when enabled.
@@ -173,33 +173,10 @@ class ElementsWorker(
173
173
  ), "Worker must be configured to access its process activity state"
174
174
  return self.process_information.get("activity_state") == "ready"
175
175
 
176
- def configure(self):
177
- """
178
- Setup the worker using CLI arguments and environment variables.
179
- """
180
- # CLI args are stored on the instance so that implementations can access them
181
- self.args = self.parser.parse_args()
182
-
183
- if self.is_read_only:
184
- super().configure_for_developers()
185
- else:
186
- super().configure()
187
- super().configure_cache()
188
-
189
- # Retrieve the model configuration
190
- if self.model_configuration:
191
- self.config.update(self.model_configuration)
192
- logger.info("Model version configuration retrieved")
193
-
194
- # Retrieve the user configuration
195
- if self.user_configuration:
196
- self.config.update(self.user_configuration)
197
- logger.info("User configuration retrieved")
198
-
199
176
  def run(self):
200
177
  """
201
178
  Implements an Arkindex worker that goes through each element returned by
202
- [list_elements][arkindex_worker.worker.ElementsWorker.list_elements].
179
+ [get_elements][arkindex_worker.worker.ElementsWorker.get_elements].
203
180
  It calls [process_element][arkindex_worker.worker.ElementsWorker.process_element],
204
181
  catching exceptions, and handles saving WorkerActivity updates when enabled.
205
182
  """
@@ -207,7 +184,7 @@ class ElementsWorker(
207
184
 
208
185
  # List all elements either from JSON file
209
186
  # or direct list of elements on CLI
210
- elements = self.list_elements()
187
+ elements = self.get_elements()
211
188
  if not elements:
212
189
  logger.warning("No elements to process, stopping.")
213
190
  sys.exit(1)
@@ -224,7 +201,7 @@ class ElementsWorker(
224
201
  element = None
225
202
  try:
226
203
  if self.use_cache:
227
- # Just use the result of list_elements as the element
204
+ # Just use the result of get_elements as the element
228
205
  element = item
229
206
  else:
230
207
  # Load element using the Arkindex API
@@ -397,29 +374,6 @@ class DatasetWorker(BaseWorker, DatasetMixin, TaskMixin):
397
374
  default=[],
398
375
  )
399
376
 
400
- def configure(self):
401
- """
402
- Setup the worker using CLI arguments and environment variables.
403
- """
404
- # CLI args are stored on the instance so that implementations can access them
405
- self.args = self.parser.parse_args()
406
-
407
- if self.is_read_only:
408
- super().configure_for_developers()
409
- else:
410
- super().configure()
411
- super().configure_cache()
412
-
413
- # Retrieve the model configuration
414
- if self.model_configuration:
415
- self.config.update(self.model_configuration)
416
- logger.info("Model version configuration retrieved")
417
-
418
- # Retrieve the user configuration
419
- if self.user_configuration:
420
- self.config.update(self.user_configuration)
421
- logger.info("User configuration retrieved")
422
-
423
377
  def cleanup_downloaded_artifact(self) -> None:
424
378
  """
425
379
  Cleanup the downloaded dataset artifact if any
@@ -219,7 +219,7 @@ class BaseWorker:
219
219
  # Load all required secrets
220
220
  self.secrets = {name: self.load_secret(Path(name)) for name in required_secrets}
221
221
 
222
- def configure(self):
222
+ def configure_worker_run(self):
223
223
  """
224
224
  Setup the necessary configuration needed using CLI args and environment variables.
225
225
  This is the method called when running a worker on Arkindex.
@@ -320,6 +320,29 @@ class BaseWorker:
320
320
  else:
321
321
  logger.debug("Cache is disabled")
322
322
 
323
+ def configure(self):
324
+ """
325
+ Setup the worker using CLI arguments and environment variables.
326
+ """
327
+ # CLI args are stored on the instance so that implementations can access them
328
+ self.args = self.parser.parse_args()
329
+
330
+ if self.is_read_only:
331
+ self.configure_for_developers()
332
+ else:
333
+ self.configure_worker_run()
334
+ self.configure_cache()
335
+
336
+ # Retrieve the model configuration
337
+ if self.model_configuration:
338
+ self.config.update(self.model_configuration)
339
+ logger.info("Model version configuration retrieved")
340
+
341
+ # Retrieve the user configuration
342
+ if self.user_configuration:
343
+ self.config.update(self.user_configuration)
344
+ logger.info("User configuration retrieved")
345
+
323
346
  def load_secret(self, name: Path):
324
347
  """
325
348
  Load a Ponos secret by name.
@@ -483,6 +483,178 @@ class ElementMixin:
483
483
 
484
484
  return updated_element
485
485
 
486
+ def list_elements(
487
+ self,
488
+ folder: bool | None = None,
489
+ name: str | None = None,
490
+ top_level: bool | None = None,
491
+ transcription_worker_version: str | bool | None = None,
492
+ transcription_worker_run: str | bool | None = None,
493
+ type: str | None = None,
494
+ with_classes: bool | None = None,
495
+ with_corpus: bool | None = None,
496
+ with_metadata: bool | None = None,
497
+ with_has_children: bool | None = None,
498
+ with_zone: bool | None = None,
499
+ worker_version: str | bool | None = None,
500
+ worker_run: str | bool | None = None,
501
+ ) -> Iterable[dict] | Iterable[CachedElement]:
502
+ """
503
+ List element in a corpus.
504
+
505
+ Warns:
506
+ ----
507
+ The following parameters are **deprecated**:
508
+
509
+ - `transcription_worker_version` in favor of `transcription_worker_run`
510
+ - `worker_version` in favor of `worker_run`
511
+
512
+ :param folder: Restrict to or exclude elements with folder types.
513
+ This parameter is not supported when caching is enabled.
514
+ :param name: Restrict to elements whose name contain a substring (case-insensitive).
515
+ This parameter is not supported when caching is enabled.
516
+ :param top_level: Restrict to or exclude folder elements without parent elements (top-level elements).
517
+ This parameter is not supported when caching is enabled.
518
+ :param transcription_worker_version: **Deprecated** Restrict to elements that have a transcription created by a worker version with this UUID. Set to False to look for elements that have a manual transcription.
519
+ This parameter is not supported when caching is enabled.
520
+ :param transcription_worker_run: Restrict to elements that have a transcription created by a worker run with this UUID. Set to False to look for elements that have a manual transcription.
521
+ This parameter is not supported when caching is enabled.
522
+ :param type: Restrict to elements with a specific type slug
523
+ This parameter is not supported when caching is enabled.
524
+ :param with_classes: Include each element's classifications in the response.
525
+ This parameter is not supported when caching is enabled.
526
+ :param with_corpus: Include each element's corpus in the response.
527
+ This parameter is not supported when caching is enabled.
528
+ :param with_has_children: Include the ``has_children`` attribute in the response,
529
+ indicating if this element has child elements of its own.
530
+ This parameter is not supported when caching is enabled.
531
+ :param with_metadata: Include each element's metadata in the response.
532
+ This parameter is not supported when caching is enabled.
533
+ :param with_zone: Include the ``zone`` attribute in the response,
534
+ holding the element's image and polygon.
535
+ This parameter is not supported when caching is enabled.
536
+ :param worker_version: **Deprecated** Restrict to elements created by a worker version with this UUID.
537
+ :param worker_run: Restrict to elements created by a worker run with this UUID.
538
+ :return: An iterable of dicts from the ``ListElementChildren`` API endpoint,
539
+ or an iterable of [CachedElement][arkindex_worker.cache.CachedElement] when caching is enabled.
540
+ """
541
+ query_params = {}
542
+ if folder is not None:
543
+ assert isinstance(folder, bool), "folder should be of type bool"
544
+ query_params["folder"] = folder
545
+ if name:
546
+ assert isinstance(name, str), "name should be of type str"
547
+ query_params["name"] = name
548
+ if top_level is not None:
549
+ assert isinstance(top_level, bool), "top_level should be of type bool"
550
+ query_params["top_level"] = top_level
551
+ if transcription_worker_version is not None:
552
+ warn(
553
+ "`transcription_worker_version` usage is deprecated. Consider using `transcription_worker_run` instead.",
554
+ DeprecationWarning,
555
+ stacklevel=1,
556
+ )
557
+ assert isinstance(
558
+ transcription_worker_version, str | bool
559
+ ), "transcription_worker_version should be of type str or bool"
560
+ if isinstance(transcription_worker_version, bool):
561
+ assert (
562
+ transcription_worker_version is False
563
+ ), "if of type bool, transcription_worker_version can only be set to False"
564
+ query_params["transcription_worker_version"] = transcription_worker_version
565
+ if transcription_worker_run is not None:
566
+ assert isinstance(
567
+ transcription_worker_run, str | bool
568
+ ), "transcription_worker_run should be of type str or bool"
569
+ if isinstance(transcription_worker_run, bool):
570
+ assert (
571
+ transcription_worker_run is False
572
+ ), "if of type bool, transcription_worker_run can only be set to False"
573
+ query_params["transcription_worker_run"] = transcription_worker_run
574
+ if type:
575
+ assert isinstance(type, str), "type should be of type str"
576
+ query_params["type"] = type
577
+ if with_classes is not None:
578
+ assert isinstance(with_classes, bool), "with_classes should be of type bool"
579
+ query_params["with_classes"] = with_classes
580
+ if with_corpus is not None:
581
+ assert isinstance(with_corpus, bool), "with_corpus should be of type bool"
582
+ query_params["with_corpus"] = with_corpus
583
+ if with_has_children is not None:
584
+ assert isinstance(
585
+ with_has_children, bool
586
+ ), "with_has_children should be of type bool"
587
+ query_params["with_has_children"] = with_has_children
588
+ if with_metadata is not None:
589
+ assert isinstance(
590
+ with_metadata, bool
591
+ ), "with_metadata should be of type bool"
592
+ query_params["with_metadata"] = with_metadata
593
+ if with_zone is not None:
594
+ assert isinstance(with_zone, bool), "with_zone should be of type bool"
595
+ query_params["with_zone"] = with_zone
596
+ if worker_version is not None:
597
+ warn(
598
+ "`worker_version` usage is deprecated. Consider using `worker_run` instead.",
599
+ DeprecationWarning,
600
+ stacklevel=1,
601
+ )
602
+ assert isinstance(
603
+ worker_version, str | bool
604
+ ), "worker_version should be of type str or bool"
605
+ if isinstance(worker_version, bool):
606
+ assert (
607
+ worker_version is False
608
+ ), "if of type bool, worker_version can only be set to False"
609
+ query_params["worker_version"] = worker_version
610
+ if worker_run is not None:
611
+ assert isinstance(
612
+ worker_run, str | bool
613
+ ), "worker_run should be of type str or bool"
614
+ if isinstance(worker_run, bool):
615
+ assert (
616
+ worker_run is False
617
+ ), "if of type bool, worker_run can only be set to False"
618
+ query_params["worker_run"] = worker_run
619
+
620
+ if not self.use_cache:
621
+ return self.api_client.paginate(
622
+ "ListElements", corpus=self.corpus_id, **query_params
623
+ )
624
+
625
+ # Checking that we only received query_params handled by the cache
626
+ assert (
627
+ set(query_params.keys())
628
+ <= {
629
+ "type",
630
+ "worker_version",
631
+ "worker_run",
632
+ }
633
+ ), "When using the local cache, you can only filter by 'type' and/or 'worker_version' and/or 'worker_run'"
634
+
635
+ query = CachedElement.select()
636
+ if type:
637
+ query = query.where(CachedElement.type == type)
638
+ if worker_version is not None:
639
+ # If worker_version=False, filter by manual worker_version e.g. None
640
+ worker_version_id = worker_version or None
641
+ if worker_version_id:
642
+ query = query.where(
643
+ CachedElement.worker_version_id == worker_version_id
644
+ )
645
+ else:
646
+ query = query.where(CachedElement.worker_version_id.is_null())
647
+
648
+ if worker_run is not None:
649
+ # If worker_run=False, filter by manual worker_run e.g. None
650
+ worker_run_id = worker_run or None
651
+ if worker_run_id:
652
+ query = query.where(CachedElement.worker_run_id == worker_run_id)
653
+ else:
654
+ query = query.where(CachedElement.worker_run_id.is_null())
655
+
656
+ return query
657
+
486
658
  def list_element_children(
487
659
  self,
488
660
  element: Element | CachedElement,
@@ -622,45 +794,43 @@ class ElementMixin:
622
794
  ), "if of type bool, worker_run can only be set to False"
623
795
  query_params["worker_run"] = worker_run
624
796
 
625
- if self.use_cache:
626
- # Checking that we only received query_params handled by the cache
627
- assert (
628
- set(query_params.keys())
629
- <= {
630
- "type",
631
- "worker_version",
632
- "worker_run",
633
- }
634
- ), "When using the local cache, you can only filter by 'type' and/or 'worker_version' and/or 'worker_run'"
635
-
636
- query = CachedElement.select().where(CachedElement.parent_id == element.id)
637
- if type:
638
- query = query.where(CachedElement.type == type)
639
- if worker_version is not None:
640
- # If worker_version=False, filter by manual worker_version e.g. None
641
- worker_version_id = worker_version or None
642
- if worker_version_id:
643
- query = query.where(
644
- CachedElement.worker_version_id == worker_version_id
645
- )
646
- else:
647
- query = query.where(CachedElement.worker_version_id.is_null())
648
-
649
- if worker_run is not None:
650
- # If worker_run=False, filter by manual worker_run e.g. None
651
- worker_run_id = worker_run or None
652
- if worker_run_id:
653
- query = query.where(CachedElement.worker_run_id == worker_run_id)
654
- else:
655
- query = query.where(CachedElement.worker_run_id.is_null())
656
-
657
- return query
658
- else:
659
- children = self.api_client.paginate(
797
+ if not self.use_cache:
798
+ return self.api_client.paginate(
660
799
  "ListElementChildren", id=element.id, **query_params
661
800
  )
662
801
 
663
- return children
802
+ # Checking that we only received query_params handled by the cache
803
+ assert (
804
+ set(query_params.keys())
805
+ <= {
806
+ "type",
807
+ "worker_version",
808
+ "worker_run",
809
+ }
810
+ ), "When using the local cache, you can only filter by 'type' and/or 'worker_version' and/or 'worker_run'"
811
+
812
+ query = CachedElement.select().where(CachedElement.parent_id == element.id)
813
+ if type:
814
+ query = query.where(CachedElement.type == type)
815
+ if worker_version is not None:
816
+ # If worker_version=False, filter by manual worker_version e.g. None
817
+ worker_version_id = worker_version or None
818
+ if worker_version_id:
819
+ query = query.where(
820
+ CachedElement.worker_version_id == worker_version_id
821
+ )
822
+ else:
823
+ query = query.where(CachedElement.worker_version_id.is_null())
824
+
825
+ if worker_run is not None:
826
+ # If worker_run=False, filter by manual worker_run e.g. None
827
+ worker_run_id = worker_run or None
828
+ if worker_run_id:
829
+ query = query.where(CachedElement.worker_run_id == worker_run_id)
830
+ else:
831
+ query = query.where(CachedElement.worker_run_id.is_null())
832
+
833
+ return query
664
834
 
665
835
  def list_element_parents(
666
836
  self,
@@ -801,45 +971,43 @@ class ElementMixin:
801
971
  ), "if of type bool, worker_run can only be set to False"
802
972
  query_params["worker_run"] = worker_run
803
973
 
804
- if self.use_cache:
805
- # Checking that we only received query_params handled by the cache
806
- assert (
807
- set(query_params.keys())
808
- <= {
809
- "type",
810
- "worker_version",
811
- "worker_run",
812
- }
813
- ), "When using the local cache, you can only filter by 'type' and/or 'worker_version' and/or 'worker_run'"
814
-
815
- parent_ids = CachedElement.select(CachedElement.parent_id).where(
816
- CachedElement.id == element.id
817
- )
818
- query = CachedElement.select().where(CachedElement.id.in_(parent_ids))
819
- if type:
820
- query = query.where(CachedElement.type == type)
821
- if worker_version is not None:
822
- # If worker_version=False, filter by manual worker_version e.g. None
823
- worker_version_id = worker_version or None
824
- if worker_version_id:
825
- query = query.where(
826
- CachedElement.worker_version_id == worker_version_id
827
- )
828
- else:
829
- query = query.where(CachedElement.worker_version_id.is_null())
830
-
831
- if worker_run is not None:
832
- # If worker_run=False, filter by manual worker_run e.g. None
833
- worker_run_id = worker_run or None
834
- if worker_run_id:
835
- query = query.where(CachedElement.worker_run_id == worker_run_id)
836
- else:
837
- query = query.where(CachedElement.worker_run_id.is_null())
838
-
839
- return query
840
- else:
841
- parents = self.api_client.paginate(
974
+ if not self.use_cache:
975
+ return self.api_client.paginate(
842
976
  "ListElementParents", id=element.id, **query_params
843
977
  )
844
978
 
845
- return parents
979
+ # Checking that we only received query_params handled by the cache
980
+ assert (
981
+ set(query_params.keys())
982
+ <= {
983
+ "type",
984
+ "worker_version",
985
+ "worker_run",
986
+ }
987
+ ), "When using the local cache, you can only filter by 'type' and/or 'worker_version' and/or 'worker_run'"
988
+
989
+ parent_ids = CachedElement.select(CachedElement.parent_id).where(
990
+ CachedElement.id == element.id
991
+ )
992
+ query = CachedElement.select().where(CachedElement.id.in_(parent_ids))
993
+ if type:
994
+ query = query.where(CachedElement.type == type)
995
+ if worker_version is not None:
996
+ # If worker_version=False, filter by manual worker_version e.g. None
997
+ worker_version_id = worker_version or None
998
+ if worker_version_id:
999
+ query = query.where(
1000
+ CachedElement.worker_version_id == worker_version_id
1001
+ )
1002
+ else:
1003
+ query = query.where(CachedElement.worker_version_id.is_null())
1004
+
1005
+ if worker_run is not None:
1006
+ # If worker_run=False, filter by manual worker_run e.g. None
1007
+ worker_run_id = worker_run or None
1008
+ if worker_run_id:
1009
+ query = query.where(CachedElement.worker_run_id == worker_run_id)
1010
+ else:
1011
+ query = query.where(CachedElement.worker_run_id.is_null())
1012
+
1013
+ return query