arkindex-base-worker 0.5.0a3__tar.gz → 0.5.0b1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/PKG-INFO +4 -7
  2. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/README.md +1 -1
  3. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/arkindex_base_worker.egg-info/PKG-INFO +4 -7
  4. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/arkindex_base_worker.egg-info/SOURCES.txt +2 -2
  5. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/arkindex_base_worker.egg-info/requires.txt +0 -5
  6. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/arkindex_base_worker.egg-info/top_level.txt +1 -0
  7. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/arkindex_worker/image.py +4 -17
  8. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/__init__.py +0 -14
  9. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/base.py +7 -0
  10. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/classification.py +3 -3
  11. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/element.py +71 -45
  12. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/entity.py +71 -30
  13. arkindex_base_worker-0.5.0b1/examples/standalone/python/worker.py +171 -0
  14. arkindex_base_worker-0.5.0b1/examples/tooled/python/worker.py +50 -0
  15. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/pyproject.toml +1 -6
  16. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_element.py +200 -26
  17. arkindex_base_worker-0.5.0b1/tests/test_elements_worker/test_entity_list_and_check.py +293 -0
  18. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_image.py +10 -2
  19. arkindex_base_worker-0.5.0a3/arkindex_worker/worker/version.py +0 -58
  20. arkindex_base_worker-0.5.0a3/tests/test_elements_worker/test_entity_list_and_check.py +0 -160
  21. arkindex_base_worker-0.5.0a3/tests/test_elements_worker/test_version.py +0 -60
  22. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/LICENSE +0 -0
  23. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/arkindex_base_worker.egg-info/dependency_links.txt +0 -0
  24. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/arkindex_worker/__init__.py +0 -0
  25. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/arkindex_worker/cache.py +0 -0
  26. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/arkindex_worker/models.py +0 -0
  27. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/arkindex_worker/utils.py +0 -0
  28. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/corpus.py +0 -0
  29. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/dataset.py +0 -0
  30. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/image.py +0 -0
  31. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/metadata.py +0 -0
  32. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/process.py +0 -0
  33. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/task.py +0 -0
  34. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/training.py +0 -0
  35. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/transcription.py +0 -0
  36. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/hooks/pre_gen_project.py +0 -0
  37. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/setup.cfg +0 -0
  38. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/__init__.py +0 -0
  39. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/conftest.py +0 -0
  40. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_base_worker.py +0 -0
  41. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_cache.py +0 -0
  42. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_dataset_worker.py +0 -0
  43. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_element.py +0 -0
  44. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/__init__.py +0 -0
  45. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_classification.py +0 -0
  46. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_cli.py +0 -0
  47. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_corpus.py +0 -0
  48. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_dataset.py +0 -0
  49. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_element_create_multiple.py +0 -0
  50. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_element_create_single.py +0 -0
  51. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_element_list_children.py +0 -0
  52. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_element_list_parents.py +0 -0
  53. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_entity_create.py +0 -0
  54. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_image.py +0 -0
  55. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_metadata.py +0 -0
  56. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_process.py +0 -0
  57. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_task.py +0 -0
  58. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_training.py +0 -0
  59. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_transcription_create.py +0 -0
  60. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_transcription_create_with_elements.py +0 -0
  61. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_transcription_list.py +0 -0
  62. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_worker.py +0 -0
  63. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_merge.py +0 -0
  64. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/tests/test_utils.py +0 -0
  65. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/worker-demo/tests/__init__.py +0 -0
  66. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/worker-demo/tests/conftest.py +0 -0
  67. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/worker-demo/tests/test_worker.py +0 -0
  68. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/worker-demo/worker_demo/__init__.py +0 -0
  69. {arkindex_base_worker-0.5.0a3 → arkindex_base_worker-0.5.0b1}/worker-demo/worker_demo/worker.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: arkindex-base-worker
3
- Version: 0.5.0a3
3
+ Version: 0.5.0b1
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -48,14 +48,11 @@ Requires-Dist: python-gnupg==0.5.3
48
48
  Requires-Dist: shapely==2.0.6
49
49
  Requires-Dist: teklia-toolbox==0.1.8
50
50
  Requires-Dist: zstandard==0.23.0
51
- Provides-Extra: docs
52
- Requires-Dist: black==24.10.0; extra == "docs"
53
- Requires-Dist: mkdocs-material==9.5.48; extra == "docs"
54
- Requires-Dist: mkdocstrings-python==1.12.2; extra == "docs"
55
51
  Provides-Extra: tests
56
52
  Requires-Dist: pytest==8.3.4; extra == "tests"
57
53
  Requires-Dist: pytest-mock==3.14.0; extra == "tests"
58
54
  Requires-Dist: pytest-responses==0.5.1; extra == "tests"
55
+ Dynamic: license-file
59
56
 
60
57
  # Arkindex base Worker
61
58
 
@@ -65,7 +62,7 @@ This is an open-source project, licensed using [the MIT license](https://opensou
65
62
 
66
63
  ## Documentation
67
64
 
68
- The [documentation](https://workers.arkindex.org/) is made with [Material for MkDocs](https://github.com/squidfunk/mkdocs-material) and is hosted by [GitLab Pages](https://docs.gitlab.com/ee/user/project/pages/).
65
+ The [documentation](https://workers.arkindex.org/) is made with [Antora](https://antora.org/) and is hosted by [GitLab Pages](https://docs.gitlab.com/ee/user/project/pages/).
69
66
 
70
67
  ## Create a new worker using our template
71
68
 
@@ -6,7 +6,7 @@ This is an open-source project, licensed using [the MIT license](https://opensou
6
6
 
7
7
  ## Documentation
8
8
 
9
- The [documentation](https://workers.arkindex.org/) is made with [Material for MkDocs](https://github.com/squidfunk/mkdocs-material) and is hosted by [GitLab Pages](https://docs.gitlab.com/ee/user/project/pages/).
9
+ The [documentation](https://workers.arkindex.org/) is made with [Antora](https://antora.org/) and is hosted by [GitLab Pages](https://docs.gitlab.com/ee/user/project/pages/).
10
10
 
11
11
  ## Create a new worker using our template
12
12
 
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: arkindex-base-worker
3
- Version: 0.5.0a3
3
+ Version: 0.5.0b1
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -48,14 +48,11 @@ Requires-Dist: python-gnupg==0.5.3
48
48
  Requires-Dist: shapely==2.0.6
49
49
  Requires-Dist: teklia-toolbox==0.1.8
50
50
  Requires-Dist: zstandard==0.23.0
51
- Provides-Extra: docs
52
- Requires-Dist: black==24.10.0; extra == "docs"
53
- Requires-Dist: mkdocs-material==9.5.48; extra == "docs"
54
- Requires-Dist: mkdocstrings-python==1.12.2; extra == "docs"
55
51
  Provides-Extra: tests
56
52
  Requires-Dist: pytest==8.3.4; extra == "tests"
57
53
  Requires-Dist: pytest-mock==3.14.0; extra == "tests"
58
54
  Requires-Dist: pytest-responses==0.5.1; extra == "tests"
55
+ Dynamic: license-file
59
56
 
60
57
  # Arkindex base Worker
61
58
 
@@ -65,7 +62,7 @@ This is an open-source project, licensed using [the MIT license](https://opensou
65
62
 
66
63
  ## Documentation
67
64
 
68
- The [documentation](https://workers.arkindex.org/) is made with [Material for MkDocs](https://github.com/squidfunk/mkdocs-material) and is hosted by [GitLab Pages](https://docs.gitlab.com/ee/user/project/pages/).
65
+ The [documentation](https://workers.arkindex.org/) is made with [Antora](https://antora.org/) and is hosted by [GitLab Pages](https://docs.gitlab.com/ee/user/project/pages/).
69
66
 
70
67
  ## Create a new worker using our template
71
68
 
@@ -24,7 +24,8 @@ arkindex_worker/worker/process.py
24
24
  arkindex_worker/worker/task.py
25
25
  arkindex_worker/worker/training.py
26
26
  arkindex_worker/worker/transcription.py
27
- arkindex_worker/worker/version.py
27
+ examples/standalone/python/worker.py
28
+ examples/tooled/python/worker.py
28
29
  hooks/pre_gen_project.py
29
30
  tests/__init__.py
30
31
  tests/conftest.py
@@ -55,7 +56,6 @@ tests/test_elements_worker/test_training.py
55
56
  tests/test_elements_worker/test_transcription_create.py
56
57
  tests/test_elements_worker/test_transcription_create_with_elements.py
57
58
  tests/test_elements_worker/test_transcription_list.py
58
- tests/test_elements_worker/test_version.py
59
59
  tests/test_elements_worker/test_worker.py
60
60
  worker-demo/tests/__init__.py
61
61
  worker-demo/tests/conftest.py
@@ -6,11 +6,6 @@ shapely==2.0.6
6
6
  teklia-toolbox==0.1.8
7
7
  zstandard==0.23.0
8
8
 
9
- [docs]
10
- black==24.10.0
11
- mkdocs-material==9.5.48
12
- mkdocstrings-python==1.12.2
13
-
14
9
  [tests]
15
10
  pytest==8.3.4
16
11
  pytest-mock==3.14.0
@@ -1,6 +1,7 @@
1
1
  arkindex_worker
2
2
  dist
3
3
  docs
4
+ examples
4
5
  hooks
5
6
  tests
6
7
  worker-demo
@@ -435,16 +435,14 @@ def trim_polygon(
435
435
  :param image_width: Width of the image.
436
436
  :param image_height: Height of the image.
437
437
  :returns: A polygon trimmed to the image's bounds.
438
- Some points may appear as missing, as the trimming can deduplicate points.
439
- The first and last point are always equal, to reproduce the behavior
440
- of the Arkindex backend.
441
438
  :raises AssertionError: When argument types are invalid or when the trimmed polygon
442
439
  is entirely outside of the image's bounds.
443
440
  """
444
441
 
445
442
  assert isinstance(polygon, list | tuple), (
446
- "Input polygon must be a valid list or tuple of points."
443
+ "Polygon must be a valid list or tuple of points."
447
444
  )
445
+ assert len(polygon) >= 3, "Polygon should have at least three points."
448
446
  assert all(isinstance(point, list | tuple) for point in polygon), (
449
447
  "Polygon points must be tuples or lists."
450
448
  )
@@ -458,7 +456,7 @@ def trim_polygon(
458
456
  point[0] <= image_width and point[1] <= image_height for point in polygon
459
457
  ), "This polygon is entirely outside the image's bounds."
460
458
 
461
- trimmed_polygon = [
459
+ return [
462
460
  [
463
461
  min(image_width, max(0, x)),
464
462
  min(image_height, max(0, y)),
@@ -466,17 +464,6 @@ def trim_polygon(
466
464
  for x, y in polygon
467
465
  ]
468
466
 
469
- updated_polygon = []
470
- for point in trimmed_polygon:
471
- if point not in updated_polygon:
472
- updated_polygon.append(point)
473
-
474
- # Add back the matching last point, if it was present in the original polygon
475
- if polygon[-1] == polygon[0]:
476
- updated_polygon.append(updated_polygon[0])
477
-
478
- return updated_polygon
479
-
480
467
 
481
468
  def revert_orientation(
482
469
  element: "Element | CachedElement",
@@ -507,7 +494,7 @@ def revert_orientation(
507
494
  assert polygon and isinstance(polygon, list), (
508
495
  "polygon shouldn't be null and should be a list"
509
496
  )
510
- assert isinstance(reverse, bool), "Reverse should be a bool"
497
+ assert isinstance(reverse, bool), "reverse should be a bool"
511
498
  # Rotating with Pillow can cause it to move the image around, as the image cannot have negative coordinates
512
499
  # and must be a rectangle. This means the origin point of any coordinates from an image is invalid, and the
513
500
  # center of the bounding box of the rotated image is different from the center of the element's bounding box.
@@ -30,7 +30,6 @@ from arkindex_worker.worker.metadata import MetaDataMixin, MetaType # noqa: F40
30
30
  from arkindex_worker.worker.process import ActivityState, ProcessMixin, ProcessMode
31
31
  from arkindex_worker.worker.task import TaskMixin
32
32
  from arkindex_worker.worker.transcription import TranscriptionMixin
33
- from arkindex_worker.worker.version import WorkerVersionMixin
34
33
 
35
34
 
36
35
  class ElementsWorker(
@@ -40,7 +39,6 @@ class ElementsWorker(
40
39
  ClassificationMixin,
41
40
  CorpusMixin,
42
41
  TranscriptionMixin,
43
- WorkerVersionMixin,
44
42
  EntityMixin,
45
43
  MetaDataMixin,
46
44
  ImageMixin,
@@ -62,18 +60,6 @@ class ElementsWorker(
62
60
  """
63
61
  super().__init__(description, support_cache)
64
62
 
65
- self.classes = {}
66
-
67
- self.entity_types = {}
68
- """Known and available entity types in processed corpus
69
- """
70
-
71
- self.corpus_types = {}
72
- """Known and available element types in processed corpus
73
- """
74
-
75
- self._worker_version_cache = {}
76
-
77
63
  def get_elements(self) -> Iterable[CachedElement] | list[str] | list[Element]:
78
64
  """
79
65
  List the elements to be processed, either from the CLI arguments or
@@ -146,6 +146,13 @@ class BaseWorker:
146
146
  # Define API Client
147
147
  self.setup_api_client()
148
148
 
149
+ # Known and available classes in processed corpus
150
+ self.classes = {}
151
+ # Known and available entity types in processed corpus
152
+ self.entity_types = {}
153
+ # Known and available element types in processed corpus
154
+ self.corpus_types = {}
155
+
149
156
  @property
150
157
  def corpus_id(self) -> str:
151
158
  """
@@ -49,7 +49,7 @@ class ClassificationMixin:
49
49
  "CreateMLClass", id=self.corpus_id, body={"name": ml_class}
50
50
  )
51
51
  ml_class_id = self.classes[ml_class] = response["id"]
52
- logger.debug(f"Created ML class {response['id']}")
52
+ logger.debug(f"Created a new ML class {response['id']}")
53
53
  except ErrorResponse as e:
54
54
  # Only reload for 400 errors
55
55
  if e.status_code != 400:
@@ -57,11 +57,11 @@ class ClassificationMixin:
57
57
 
58
58
  # Reload and make sure we have the class
59
59
  logger.info(
60
- f"Reloading corpus classes to see if {ml_class} already exists"
60
+ f"Unable to create the ML class `{ml_class}`. Refreshing corpus classes cache."
61
61
  )
62
62
  self.load_corpus_classes()
63
63
  assert ml_class in self.classes, (
64
- "Missing class {ml_class} even after reloading"
64
+ f"Missing ML class {ml_class} even after refreshing."
65
65
  )
66
66
  ml_class_id = self.classes[ml_class]
67
67
 
@@ -5,12 +5,12 @@ ElementsWorker methods for elements and element types.
5
5
  import os
6
6
  from collections.abc import Iterable
7
7
  from operator import attrgetter
8
- from typing import NamedTuple
9
8
  from uuid import UUID
10
9
  from warnings import warn
11
10
 
12
11
  from peewee import IntegrityError
13
12
 
13
+ from arkindex.exceptions import ErrorResponse
14
14
  from arkindex_worker import logger
15
15
  from arkindex_worker.cache import CachedElement, CachedImage, unsupported_cache
16
16
  from arkindex_worker.models import Element
@@ -22,19 +22,10 @@ from arkindex_worker.utils import (
22
22
  )
23
23
 
24
24
 
25
- class ElementType(NamedTuple):
25
+ class MissingElementType(Exception):
26
26
  """
27
- Arkindex Type of an element
28
- """
29
-
30
- name: str
31
- slug: str
32
- is_folder: bool
33
-
34
-
35
- class MissingTypeError(Exception):
36
- """
37
- A required element type was not found in a corpus.
27
+ Raised when the specified element type was not found in the corpus and
28
+ the worker cannot create it.
38
29
  """
39
30
 
40
31
 
@@ -71,57 +62,92 @@ class ElementMixin:
71
62
  )
72
63
 
73
64
  @unsupported_cache
74
- def create_required_types(self, element_types: list[ElementType]):
75
- """Creates given element types in the corpus.
65
+ def create_element_type(
66
+ self, slug: str, name: str, is_folder: bool = False
67
+ ) -> None:
68
+ """
69
+ Create an element type on the given corpus.
76
70
 
77
- :param element_types: The missing element types to create.
71
+ :param slug: Slug of the element type.
72
+ :param name: Name of the element type.
73
+ :param is_folder: Whether an element with this type can contain other elements or not.
78
74
  """
79
- for element_type in element_types:
80
- self.api_client.request(
75
+ assert slug and isinstance(slug, str), (
76
+ "slug shouldn't be null and should be of type str"
77
+ )
78
+ assert name and isinstance(name, str), (
79
+ "name shouldn't be null and should be of type str"
80
+ )
81
+ assert is_folder is not None and isinstance(is_folder, bool), (
82
+ "is_folder shouldn't be null and should be of type bool"
83
+ )
84
+
85
+ try:
86
+ element_type = self.api_client.request(
81
87
  "CreateElementType",
82
88
  body={
83
- "slug": element_type.slug,
84
- "display_name": element_type.name,
85
- "folder": element_type.is_folder,
89
+ "slug": slug,
90
+ "display_name": name,
91
+ "folder": is_folder,
86
92
  "corpus": self.corpus_id,
87
93
  },
88
94
  )
89
- logger.info(f"Created a new element type with slug {element_type.slug}")
95
+ self.corpus_types[slug] = element_type
96
+ logger.info(f"Created a new element type with slug `{slug}`.")
97
+ except ErrorResponse as e:
98
+ # Only reload for 400 errors
99
+ if e.status_code != 400:
100
+ raise
101
+
102
+ # Reload and make sure we have the element type now
103
+ logger.warning(
104
+ f"Unable to create the element type `{slug}`. Refreshing corpus element types cache."
105
+ )
106
+ self.list_corpus_types()
107
+ assert slug in self.corpus_types, (
108
+ f"Missing element type `{slug}` even after refreshing."
109
+ )
90
110
 
91
111
  def check_required_types(
92
- self, *type_slugs: str, create_missing: bool = False
93
- ) -> bool:
112
+ self, type_slugs: list[str], create_missing: bool = False
113
+ ) -> None:
94
114
  """
95
- Check that a corpus has a list of required element types,
96
- and raise an exception if any of them are missing.
115
+ Check that every element type needed is available in the corpus.
116
+ Missing ones may be created automatically if needed.
97
117
 
98
- :param *type_slugs: Type slugs to look for.
99
- :param create_missing: Whether missing types should be created.
100
- :returns: Whether all of the specified type slugs have been found.
101
- :raises MissingTypeError: If any of the specified type slugs were not found.
118
+ :param type_slugs: Element type slugs to search.
119
+ :param create_missing: Whether the missing types should be created. Defaults to False.
120
+ :raises MissingElementType: When an entity type is missing and cannot be created.
102
121
  """
103
- assert len(type_slugs), "At least one element type slug is required."
104
- assert all(isinstance(slug, str) for slug in type_slugs), (
105
- "Element type slugs must be strings."
122
+ assert type_slugs and isinstance(type_slugs, list), (
123
+ "type_slugs shouldn't be null and should be of type list"
124
+ )
125
+
126
+ for index, slug in enumerate(type_slugs):
127
+ assert isinstance(slug, str), (
128
+ f"Element type at index {index} in type_slugs: Should be of type str"
129
+ )
130
+
131
+ assert create_missing is not None and isinstance(create_missing, bool), (
132
+ "create_missing shouldn't be null and should be of type bool"
106
133
  )
107
134
 
108
135
  if not self.corpus_types:
109
136
  self.list_corpus_types()
110
137
 
111
- missing_slugs = set(type_slugs) - set(self.corpus_types)
112
- if missing_slugs:
113
- if create_missing:
114
- self.create_required_types(
115
- element_types=[
116
- ElementType(slug, slug, False) for slug in missing_slugs
117
- ],
118
- )
119
- else:
120
- raise MissingTypeError(
121
- f"Element {pluralize('type', len(missing_slugs))} {', '.join(sorted(missing_slugs))} were not found in corpus ({self.corpus_id})."
138
+ for slug in type_slugs:
139
+ # Do nothing if the type already exists
140
+ if slug in self.corpus_types:
141
+ continue
142
+
143
+ # Do not create missing if not requested
144
+ if not create_missing:
145
+ raise MissingElementType(
146
+ f"Element type `{slug}` was not in the corpus."
122
147
  )
123
148
 
124
- return True
149
+ # Create the type if non-existent
150
+ self.create_element_type(slug=slug, name=slug)
125
151
 
126
152
  @unsupported_cache
127
153
  def create_sub_element(
@@ -8,6 +8,7 @@ from warnings import warn
8
8
 
9
9
  from peewee import IntegrityError
10
10
 
11
+ from arkindex.exceptions import ErrorResponse
11
12
  from arkindex_worker import logger
12
13
  from arkindex_worker.cache import (
13
14
  CachedEntity,
@@ -34,24 +35,85 @@ class MissingEntityType(Exception):
34
35
 
35
36
 
36
37
  class EntityMixin:
38
+ def list_corpus_entity_types(self):
39
+ """
40
+ Loads available entity types in corpus.
41
+ """
42
+ self.entity_types = {
43
+ entity_type["name"]: entity_type["id"]
44
+ for entity_type in self.api_client.paginate(
45
+ "ListCorpusEntityTypes", id=self.corpus_id
46
+ )
47
+ }
48
+ count = len(self.entity_types)
49
+ logger.info(
50
+ f"Loaded {count} entity {pluralize('type', count)} in corpus ({self.corpus_id})."
51
+ )
52
+
37
53
  @unsupported_cache
54
+ def create_entity_type(self, name: str) -> None:
55
+ """
56
+ Create an entity type on the given corpus.
57
+
58
+ :param name: Name of the entity type.
59
+ """
60
+ assert name and isinstance(name, str), (
61
+ "name shouldn't be null and should be of type str"
62
+ )
63
+
64
+ try:
65
+ entity_type = self.api_client.request(
66
+ "CreateEntityType",
67
+ body={
68
+ "name": name,
69
+ "corpus": self.corpus_id,
70
+ },
71
+ )
72
+ self.entity_types[name] = entity_type["id"]
73
+ logger.info(f"Created a new entity type with name `{name}`.")
74
+ except ErrorResponse as e:
75
+ # Only reload for 400 errors
76
+ if e.status_code != 400:
77
+ raise
78
+
79
+ # Reload and make sure we have the element type now
80
+ logger.warning(
81
+ f"Unable to create the entity type `{name}`. Refreshing corpus entity types cache."
82
+ )
83
+ self.list_corpus_entity_types()
84
+ assert name in self.entity_types, (
85
+ f"Missing entity type `{name}` even after refreshing."
86
+ )
87
+
38
88
  def check_required_entity_types(
39
89
  self, entity_types: list[str], create_missing: bool = True
40
- ):
41
- """Checks that every entity type needed is available in the corpus.
90
+ ) -> None:
91
+ """
92
+ Check that every entity type needed is available in the corpus.
42
93
  Missing ones may be created automatically if needed.
43
94
 
44
95
  :param entity_types: Entity type names to search.
45
96
  :param create_missing: Whether the missing types should be created. Defaults to True.
46
- :raises MissingEntityType: When an entity type is missing and cannot create.
97
+ :raises MissingEntityType: When an entity type is missing and cannot be created.
47
98
  """
48
- # Retrieve entity_type ID
99
+ assert entity_types and isinstance(entity_types, list), (
100
+ "entity_types shouldn't be null and should be of type list"
101
+ )
102
+
103
+ for index, entity_type in enumerate(entity_types):
104
+ assert isinstance(entity_type, str), (
105
+ f"Entity type at index {index} in entity_types: Should be of type str"
106
+ )
107
+
108
+ assert create_missing is not None and isinstance(create_missing, bool), (
109
+ "create_missing shouldn't be null and should be of type bool"
110
+ )
111
+
49
112
  if not self.entity_types:
50
- # Load entity_types of corpus
51
113
  self.list_corpus_entity_types()
52
114
 
53
115
  for entity_type in entity_types:
54
- # Do nothing if type already exists
116
+ # Do nothing if the type already exists
55
117
  if entity_type in self.entity_types:
56
118
  continue
57
119
 
@@ -61,15 +123,8 @@ class EntityMixin:
61
123
  f"Entity type `{entity_type}` was not in the corpus."
62
124
  )
63
125
 
64
- # Create type if non-existent
65
- self.entity_types[entity_type] = self.api_client.request(
66
- "CreateEntityType",
67
- body={
68
- "name": entity_type,
69
- "corpus": self.corpus_id,
70
- },
71
- )["id"]
72
- logger.info(f"Created a new entity type with name `{entity_type}`.")
126
+ # Create the type if non-existent
127
+ self.create_entity_type(entity_type)
73
128
 
74
129
  def create_entity(
75
130
  self,
@@ -211,6 +266,7 @@ class EntityMixin:
211
266
  logger.warning(
212
267
  f"Couldn't save created transcription entity in local cache: {e}"
213
268
  )
269
+
214
270
  return transcription_ent
215
271
 
216
272
  @unsupported_cache
@@ -387,18 +443,3 @@ class EntityMixin:
387
443
  logger.info(
388
444
  f"Loaded {count} {pluralize('entity', count)} in corpus ({self.corpus_id})"
389
445
  )
390
-
391
- def list_corpus_entity_types(self):
392
- """
393
- Loads available entity types in corpus.
394
- """
395
- self.entity_types = {
396
- entity_type["name"]: entity_type["id"]
397
- for entity_type in self.api_client.paginate(
398
- "ListCorpusEntityTypes", id=self.corpus_id
399
- )
400
- }
401
- count = len(self.entity_types)
402
- logger.info(
403
- f"Loaded {count} entity {pluralize('type', count)} in corpus ({self.corpus_id})."
404
- )