arkindex-base-worker 0.5.0a2__tar.gz → 0.5.0b1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/PKG-INFO +5 -8
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/README.md +1 -1
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_base_worker.egg-info/PKG-INFO +5 -8
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_base_worker.egg-info/SOURCES.txt +2 -2
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_base_worker.egg-info/requires.txt +1 -6
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_base_worker.egg-info/top_level.txt +1 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_worker/image.py +4 -17
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/__init__.py +0 -14
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/base.py +7 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/classification.py +3 -3
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/element.py +71 -45
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/entity.py +71 -30
- arkindex_base_worker-0.5.0b1/examples/standalone/python/worker.py +171 -0
- arkindex_base_worker-0.5.0b1/examples/tooled/python/worker.py +50 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/pyproject.toml +2 -7
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_element.py +200 -26
- arkindex_base_worker-0.5.0b1/tests/test_elements_worker/test_entity_list_and_check.py +293 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_image.py +10 -2
- arkindex_base_worker-0.5.0a2/arkindex_worker/worker/version.py +0 -58
- arkindex_base_worker-0.5.0a2/tests/test_elements_worker/test_entity_list_and_check.py +0 -160
- arkindex_base_worker-0.5.0a2/tests/test_elements_worker/test_version.py +0 -60
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/LICENSE +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_base_worker.egg-info/dependency_links.txt +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_worker/__init__.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_worker/cache.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_worker/models.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_worker/utils.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/corpus.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/dataset.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/image.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/metadata.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/process.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/task.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/training.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/transcription.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/hooks/pre_gen_project.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/setup.cfg +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/__init__.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/conftest.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_base_worker.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_cache.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_dataset_worker.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_element.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/__init__.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_classification.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_cli.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_corpus.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_dataset.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_element_create_multiple.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_element_create_single.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_element_list_children.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_element_list_parents.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_entity_create.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_image.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_metadata.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_process.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_task.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_training.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_transcription_create.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_transcription_create_with_elements.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_transcription_list.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_elements_worker/test_worker.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_merge.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/tests/test_utils.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/worker-demo/tests/__init__.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/worker-demo/tests/conftest.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/worker-demo/tests/test_worker.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/worker-demo/worker_demo/__init__.py +0 -0
- {arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/worker-demo/worker_demo/worker.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: arkindex-base-worker
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.0b1
|
|
4
4
|
Summary: Base Worker to easily build Arkindex ML workflows
|
|
5
5
|
Author-email: Teklia <contact@teklia.com>
|
|
6
6
|
Maintainer-email: Teklia <contact@teklia.com>
|
|
@@ -46,16 +46,13 @@ Requires-Dist: peewee~=3.17
|
|
|
46
46
|
Requires-Dist: Pillow==11.0.0
|
|
47
47
|
Requires-Dist: python-gnupg==0.5.3
|
|
48
48
|
Requires-Dist: shapely==2.0.6
|
|
49
|
-
Requires-Dist: teklia-toolbox==0.1.
|
|
49
|
+
Requires-Dist: teklia-toolbox==0.1.8
|
|
50
50
|
Requires-Dist: zstandard==0.23.0
|
|
51
|
-
Provides-Extra: docs
|
|
52
|
-
Requires-Dist: black==24.10.0; extra == "docs"
|
|
53
|
-
Requires-Dist: mkdocs-material==9.5.48; extra == "docs"
|
|
54
|
-
Requires-Dist: mkdocstrings-python==1.12.2; extra == "docs"
|
|
55
51
|
Provides-Extra: tests
|
|
56
52
|
Requires-Dist: pytest==8.3.4; extra == "tests"
|
|
57
53
|
Requires-Dist: pytest-mock==3.14.0; extra == "tests"
|
|
58
54
|
Requires-Dist: pytest-responses==0.5.1; extra == "tests"
|
|
55
|
+
Dynamic: license-file
|
|
59
56
|
|
|
60
57
|
# Arkindex base Worker
|
|
61
58
|
|
|
@@ -65,7 +62,7 @@ This is an open-source project, licensed using [the MIT license](https://opensou
|
|
|
65
62
|
|
|
66
63
|
## Documentation
|
|
67
64
|
|
|
68
|
-
The [documentation](https://workers.arkindex.org/) is made with [
|
|
65
|
+
The [documentation](https://workers.arkindex.org/) is made with [Antora](https://antora.org/) and is hosted by [GitLab Pages](https://docs.gitlab.com/ee/user/project/pages/).
|
|
69
66
|
|
|
70
67
|
## Create a new worker using our template
|
|
71
68
|
|
|
@@ -6,7 +6,7 @@ This is an open-source project, licensed using [the MIT license](https://opensou
|
|
|
6
6
|
|
|
7
7
|
## Documentation
|
|
8
8
|
|
|
9
|
-
The [documentation](https://workers.arkindex.org/) is made with [
|
|
9
|
+
The [documentation](https://workers.arkindex.org/) is made with [Antora](https://antora.org/) and is hosted by [GitLab Pages](https://docs.gitlab.com/ee/user/project/pages/).
|
|
10
10
|
|
|
11
11
|
## Create a new worker using our template
|
|
12
12
|
|
{arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_base_worker.egg-info/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: arkindex-base-worker
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.0b1
|
|
4
4
|
Summary: Base Worker to easily build Arkindex ML workflows
|
|
5
5
|
Author-email: Teklia <contact@teklia.com>
|
|
6
6
|
Maintainer-email: Teklia <contact@teklia.com>
|
|
@@ -46,16 +46,13 @@ Requires-Dist: peewee~=3.17
|
|
|
46
46
|
Requires-Dist: Pillow==11.0.0
|
|
47
47
|
Requires-Dist: python-gnupg==0.5.3
|
|
48
48
|
Requires-Dist: shapely==2.0.6
|
|
49
|
-
Requires-Dist: teklia-toolbox==0.1.
|
|
49
|
+
Requires-Dist: teklia-toolbox==0.1.8
|
|
50
50
|
Requires-Dist: zstandard==0.23.0
|
|
51
|
-
Provides-Extra: docs
|
|
52
|
-
Requires-Dist: black==24.10.0; extra == "docs"
|
|
53
|
-
Requires-Dist: mkdocs-material==9.5.48; extra == "docs"
|
|
54
|
-
Requires-Dist: mkdocstrings-python==1.12.2; extra == "docs"
|
|
55
51
|
Provides-Extra: tests
|
|
56
52
|
Requires-Dist: pytest==8.3.4; extra == "tests"
|
|
57
53
|
Requires-Dist: pytest-mock==3.14.0; extra == "tests"
|
|
58
54
|
Requires-Dist: pytest-responses==0.5.1; extra == "tests"
|
|
55
|
+
Dynamic: license-file
|
|
59
56
|
|
|
60
57
|
# Arkindex base Worker
|
|
61
58
|
|
|
@@ -65,7 +62,7 @@ This is an open-source project, licensed using [the MIT license](https://opensou
|
|
|
65
62
|
|
|
66
63
|
## Documentation
|
|
67
64
|
|
|
68
|
-
The [documentation](https://workers.arkindex.org/) is made with [
|
|
65
|
+
The [documentation](https://workers.arkindex.org/) is made with [Antora](https://antora.org/) and is hosted by [GitLab Pages](https://docs.gitlab.com/ee/user/project/pages/).
|
|
69
66
|
|
|
70
67
|
## Create a new worker using our template
|
|
71
68
|
|
|
@@ -24,7 +24,8 @@ arkindex_worker/worker/process.py
|
|
|
24
24
|
arkindex_worker/worker/task.py
|
|
25
25
|
arkindex_worker/worker/training.py
|
|
26
26
|
arkindex_worker/worker/transcription.py
|
|
27
|
-
|
|
27
|
+
examples/standalone/python/worker.py
|
|
28
|
+
examples/tooled/python/worker.py
|
|
28
29
|
hooks/pre_gen_project.py
|
|
29
30
|
tests/__init__.py
|
|
30
31
|
tests/conftest.py
|
|
@@ -55,7 +56,6 @@ tests/test_elements_worker/test_training.py
|
|
|
55
56
|
tests/test_elements_worker/test_transcription_create.py
|
|
56
57
|
tests/test_elements_worker/test_transcription_create_with_elements.py
|
|
57
58
|
tests/test_elements_worker/test_transcription_list.py
|
|
58
|
-
tests/test_elements_worker/test_version.py
|
|
59
59
|
tests/test_elements_worker/test_worker.py
|
|
60
60
|
worker-demo/tests/__init__.py
|
|
61
61
|
worker-demo/tests/conftest.py
|
|
@@ -3,14 +3,9 @@ peewee~=3.17
|
|
|
3
3
|
Pillow==11.0.0
|
|
4
4
|
python-gnupg==0.5.3
|
|
5
5
|
shapely==2.0.6
|
|
6
|
-
teklia-toolbox==0.1.
|
|
6
|
+
teklia-toolbox==0.1.8
|
|
7
7
|
zstandard==0.23.0
|
|
8
8
|
|
|
9
|
-
[docs]
|
|
10
|
-
black==24.10.0
|
|
11
|
-
mkdocs-material==9.5.48
|
|
12
|
-
mkdocstrings-python==1.12.2
|
|
13
|
-
|
|
14
9
|
[tests]
|
|
15
10
|
pytest==8.3.4
|
|
16
11
|
pytest-mock==3.14.0
|
|
@@ -435,16 +435,14 @@ def trim_polygon(
|
|
|
435
435
|
:param image_width: Width of the image.
|
|
436
436
|
:param image_height: Height of the image.
|
|
437
437
|
:returns: A polygon trimmed to the image's bounds.
|
|
438
|
-
Some points may appear as missing, as the trimming can deduplicate points.
|
|
439
|
-
The first and last point are always equal, to reproduce the behavior
|
|
440
|
-
of the Arkindex backend.
|
|
441
438
|
:raises AssertionError: When argument types are invalid or when the trimmed polygon
|
|
442
439
|
is entirely outside of the image's bounds.
|
|
443
440
|
"""
|
|
444
441
|
|
|
445
442
|
assert isinstance(polygon, list | tuple), (
|
|
446
|
-
"
|
|
443
|
+
"Polygon must be a valid list or tuple of points."
|
|
447
444
|
)
|
|
445
|
+
assert len(polygon) >= 3, "Polygon should have at least three points."
|
|
448
446
|
assert all(isinstance(point, list | tuple) for point in polygon), (
|
|
449
447
|
"Polygon points must be tuples or lists."
|
|
450
448
|
)
|
|
@@ -458,7 +456,7 @@ def trim_polygon(
|
|
|
458
456
|
point[0] <= image_width and point[1] <= image_height for point in polygon
|
|
459
457
|
), "This polygon is entirely outside the image's bounds."
|
|
460
458
|
|
|
461
|
-
|
|
459
|
+
return [
|
|
462
460
|
[
|
|
463
461
|
min(image_width, max(0, x)),
|
|
464
462
|
min(image_height, max(0, y)),
|
|
@@ -466,17 +464,6 @@ def trim_polygon(
|
|
|
466
464
|
for x, y in polygon
|
|
467
465
|
]
|
|
468
466
|
|
|
469
|
-
updated_polygon = []
|
|
470
|
-
for point in trimmed_polygon:
|
|
471
|
-
if point not in updated_polygon:
|
|
472
|
-
updated_polygon.append(point)
|
|
473
|
-
|
|
474
|
-
# Add back the matching last point, if it was present in the original polygon
|
|
475
|
-
if polygon[-1] == polygon[0]:
|
|
476
|
-
updated_polygon.append(updated_polygon[0])
|
|
477
|
-
|
|
478
|
-
return updated_polygon
|
|
479
|
-
|
|
480
467
|
|
|
481
468
|
def revert_orientation(
|
|
482
469
|
element: "Element | CachedElement",
|
|
@@ -507,7 +494,7 @@ def revert_orientation(
|
|
|
507
494
|
assert polygon and isinstance(polygon, list), (
|
|
508
495
|
"polygon shouldn't be null and should be a list"
|
|
509
496
|
)
|
|
510
|
-
assert isinstance(reverse, bool), "
|
|
497
|
+
assert isinstance(reverse, bool), "reverse should be a bool"
|
|
511
498
|
# Rotating with Pillow can cause it to move the image around, as the image cannot have negative coordinates
|
|
512
499
|
# and must be a rectangle. This means the origin point of any coordinates from an image is invalid, and the
|
|
513
500
|
# center of the bounding box of the rotated image is different from the center of the element's bounding box.
|
{arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/__init__.py
RENAMED
|
@@ -30,7 +30,6 @@ from arkindex_worker.worker.metadata import MetaDataMixin, MetaType # noqa: F40
|
|
|
30
30
|
from arkindex_worker.worker.process import ActivityState, ProcessMixin, ProcessMode
|
|
31
31
|
from arkindex_worker.worker.task import TaskMixin
|
|
32
32
|
from arkindex_worker.worker.transcription import TranscriptionMixin
|
|
33
|
-
from arkindex_worker.worker.version import WorkerVersionMixin
|
|
34
33
|
|
|
35
34
|
|
|
36
35
|
class ElementsWorker(
|
|
@@ -40,7 +39,6 @@ class ElementsWorker(
|
|
|
40
39
|
ClassificationMixin,
|
|
41
40
|
CorpusMixin,
|
|
42
41
|
TranscriptionMixin,
|
|
43
|
-
WorkerVersionMixin,
|
|
44
42
|
EntityMixin,
|
|
45
43
|
MetaDataMixin,
|
|
46
44
|
ImageMixin,
|
|
@@ -62,18 +60,6 @@ class ElementsWorker(
|
|
|
62
60
|
"""
|
|
63
61
|
super().__init__(description, support_cache)
|
|
64
62
|
|
|
65
|
-
self.classes = {}
|
|
66
|
-
|
|
67
|
-
self.entity_types = {}
|
|
68
|
-
"""Known and available entity types in processed corpus
|
|
69
|
-
"""
|
|
70
|
-
|
|
71
|
-
self.corpus_types = {}
|
|
72
|
-
"""Known and available element types in processed corpus
|
|
73
|
-
"""
|
|
74
|
-
|
|
75
|
-
self._worker_version_cache = {}
|
|
76
|
-
|
|
77
63
|
def get_elements(self) -> Iterable[CachedElement] | list[str] | list[Element]:
|
|
78
64
|
"""
|
|
79
65
|
List the elements to be processed, either from the CLI arguments or
|
{arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/base.py
RENAMED
|
@@ -146,6 +146,13 @@ class BaseWorker:
|
|
|
146
146
|
# Define API Client
|
|
147
147
|
self.setup_api_client()
|
|
148
148
|
|
|
149
|
+
# Known and available classes in processed corpus
|
|
150
|
+
self.classes = {}
|
|
151
|
+
# Known and available entity types in processed corpus
|
|
152
|
+
self.entity_types = {}
|
|
153
|
+
# Known and available element types in processed corpus
|
|
154
|
+
self.corpus_types = {}
|
|
155
|
+
|
|
149
156
|
@property
|
|
150
157
|
def corpus_id(self) -> str:
|
|
151
158
|
"""
|
|
@@ -49,7 +49,7 @@ class ClassificationMixin:
|
|
|
49
49
|
"CreateMLClass", id=self.corpus_id, body={"name": ml_class}
|
|
50
50
|
)
|
|
51
51
|
ml_class_id = self.classes[ml_class] = response["id"]
|
|
52
|
-
logger.debug(f"Created ML class {response['id']}")
|
|
52
|
+
logger.debug(f"Created a new ML class {response['id']}")
|
|
53
53
|
except ErrorResponse as e:
|
|
54
54
|
# Only reload for 400 errors
|
|
55
55
|
if e.status_code != 400:
|
|
@@ -57,11 +57,11 @@ class ClassificationMixin:
|
|
|
57
57
|
|
|
58
58
|
# Reload and make sure we have the class
|
|
59
59
|
logger.info(
|
|
60
|
-
f"
|
|
60
|
+
f"Unable to create the ML class `{ml_class}`. Refreshing corpus classes cache."
|
|
61
61
|
)
|
|
62
62
|
self.load_corpus_classes()
|
|
63
63
|
assert ml_class in self.classes, (
|
|
64
|
-
"Missing class {ml_class} even after
|
|
64
|
+
f"Missing ML class {ml_class} even after refreshing."
|
|
65
65
|
)
|
|
66
66
|
ml_class_id = self.classes[ml_class]
|
|
67
67
|
|
{arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/element.py
RENAMED
|
@@ -5,12 +5,12 @@ ElementsWorker methods for elements and element types.
|
|
|
5
5
|
import os
|
|
6
6
|
from collections.abc import Iterable
|
|
7
7
|
from operator import attrgetter
|
|
8
|
-
from typing import NamedTuple
|
|
9
8
|
from uuid import UUID
|
|
10
9
|
from warnings import warn
|
|
11
10
|
|
|
12
11
|
from peewee import IntegrityError
|
|
13
12
|
|
|
13
|
+
from arkindex.exceptions import ErrorResponse
|
|
14
14
|
from arkindex_worker import logger
|
|
15
15
|
from arkindex_worker.cache import CachedElement, CachedImage, unsupported_cache
|
|
16
16
|
from arkindex_worker.models import Element
|
|
@@ -22,19 +22,10 @@ from arkindex_worker.utils import (
|
|
|
22
22
|
)
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
class
|
|
25
|
+
class MissingElementType(Exception):
|
|
26
26
|
"""
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
name: str
|
|
31
|
-
slug: str
|
|
32
|
-
is_folder: bool
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
class MissingTypeError(Exception):
|
|
36
|
-
"""
|
|
37
|
-
A required element type was not found in a corpus.
|
|
27
|
+
Raised when the specified element type was not found in the corpus and
|
|
28
|
+
the worker cannot create it.
|
|
38
29
|
"""
|
|
39
30
|
|
|
40
31
|
|
|
@@ -71,57 +62,92 @@ class ElementMixin:
|
|
|
71
62
|
)
|
|
72
63
|
|
|
73
64
|
@unsupported_cache
|
|
74
|
-
def
|
|
75
|
-
|
|
65
|
+
def create_element_type(
|
|
66
|
+
self, slug: str, name: str, is_folder: bool = False
|
|
67
|
+
) -> None:
|
|
68
|
+
"""
|
|
69
|
+
Create an element type on the given corpus.
|
|
76
70
|
|
|
77
|
-
:param
|
|
71
|
+
:param slug: Slug of the element type.
|
|
72
|
+
:param name: Name of the element type.
|
|
73
|
+
:param is_folder: Whether an element with this type can contain other elements or not.
|
|
78
74
|
"""
|
|
79
|
-
|
|
80
|
-
|
|
75
|
+
assert slug and isinstance(slug, str), (
|
|
76
|
+
"slug shouldn't be null and should be of type str"
|
|
77
|
+
)
|
|
78
|
+
assert name and isinstance(name, str), (
|
|
79
|
+
"name shouldn't be null and should be of type str"
|
|
80
|
+
)
|
|
81
|
+
assert is_folder is not None and isinstance(is_folder, bool), (
|
|
82
|
+
"is_folder shouldn't be null and should be of type bool"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
try:
|
|
86
|
+
element_type = self.api_client.request(
|
|
81
87
|
"CreateElementType",
|
|
82
88
|
body={
|
|
83
|
-
"slug":
|
|
84
|
-
"display_name":
|
|
85
|
-
"folder":
|
|
89
|
+
"slug": slug,
|
|
90
|
+
"display_name": name,
|
|
91
|
+
"folder": is_folder,
|
|
86
92
|
"corpus": self.corpus_id,
|
|
87
93
|
},
|
|
88
94
|
)
|
|
89
|
-
|
|
95
|
+
self.corpus_types[slug] = element_type
|
|
96
|
+
logger.info(f"Created a new element type with slug `{slug}`.")
|
|
97
|
+
except ErrorResponse as e:
|
|
98
|
+
# Only reload for 400 errors
|
|
99
|
+
if e.status_code != 400:
|
|
100
|
+
raise
|
|
101
|
+
|
|
102
|
+
# Reload and make sure we have the element type now
|
|
103
|
+
logger.warning(
|
|
104
|
+
f"Unable to create the element type `{slug}`. Refreshing corpus element types cache."
|
|
105
|
+
)
|
|
106
|
+
self.list_corpus_types()
|
|
107
|
+
assert slug in self.corpus_types, (
|
|
108
|
+
f"Missing element type `{slug}` even after refreshing."
|
|
109
|
+
)
|
|
90
110
|
|
|
91
111
|
def check_required_types(
|
|
92
|
-
self,
|
|
93
|
-
) ->
|
|
112
|
+
self, type_slugs: list[str], create_missing: bool = False
|
|
113
|
+
) -> None:
|
|
94
114
|
"""
|
|
95
|
-
Check that
|
|
96
|
-
|
|
115
|
+
Check that every element type needed is available in the corpus.
|
|
116
|
+
Missing ones may be created automatically if needed.
|
|
97
117
|
|
|
98
|
-
:param
|
|
99
|
-
:param create_missing: Whether missing types should be created.
|
|
100
|
-
:
|
|
101
|
-
:raises MissingTypeError: If any of the specified type slugs were not found.
|
|
118
|
+
:param type_slugs: Element type slugs to search.
|
|
119
|
+
:param create_missing: Whether the missing types should be created. Defaults to False.
|
|
120
|
+
:raises MissingElementType: When an entity type is missing and cannot be created.
|
|
102
121
|
"""
|
|
103
|
-
assert
|
|
104
|
-
|
|
105
|
-
|
|
122
|
+
assert type_slugs and isinstance(type_slugs, list), (
|
|
123
|
+
"type_slugs shouldn't be null and should be of type list"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
for index, slug in enumerate(type_slugs):
|
|
127
|
+
assert isinstance(slug, str), (
|
|
128
|
+
f"Element type at index {index} in type_slugs: Should be of type str"
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
assert create_missing is not None and isinstance(create_missing, bool), (
|
|
132
|
+
"create_missing shouldn't be null and should be of type bool"
|
|
106
133
|
)
|
|
107
134
|
|
|
108
135
|
if not self.corpus_types:
|
|
109
136
|
self.list_corpus_types()
|
|
110
137
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
if
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
raise MissingTypeError(
|
|
121
|
-
f"Element {pluralize('type', len(missing_slugs))} {', '.join(sorted(missing_slugs))} were not found in corpus ({self.corpus_id})."
|
|
138
|
+
for slug in type_slugs:
|
|
139
|
+
# Do nothing if the type already exists
|
|
140
|
+
if slug in self.corpus_types:
|
|
141
|
+
continue
|
|
142
|
+
|
|
143
|
+
# Do not create missing if not requested
|
|
144
|
+
if not create_missing:
|
|
145
|
+
raise MissingElementType(
|
|
146
|
+
f"Element type `{slug}` was not in the corpus."
|
|
122
147
|
)
|
|
123
148
|
|
|
124
|
-
|
|
149
|
+
# Create the type if non-existent
|
|
150
|
+
self.create_element_type(slug=slug, name=slug)
|
|
125
151
|
|
|
126
152
|
@unsupported_cache
|
|
127
153
|
def create_sub_element(
|
{arkindex_base_worker-0.5.0a2 → arkindex_base_worker-0.5.0b1}/arkindex_worker/worker/entity.py
RENAMED
|
@@ -8,6 +8,7 @@ from warnings import warn
|
|
|
8
8
|
|
|
9
9
|
from peewee import IntegrityError
|
|
10
10
|
|
|
11
|
+
from arkindex.exceptions import ErrorResponse
|
|
11
12
|
from arkindex_worker import logger
|
|
12
13
|
from arkindex_worker.cache import (
|
|
13
14
|
CachedEntity,
|
|
@@ -34,24 +35,85 @@ class MissingEntityType(Exception):
|
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
class EntityMixin:
|
|
38
|
+
def list_corpus_entity_types(self):
|
|
39
|
+
"""
|
|
40
|
+
Loads available entity types in corpus.
|
|
41
|
+
"""
|
|
42
|
+
self.entity_types = {
|
|
43
|
+
entity_type["name"]: entity_type["id"]
|
|
44
|
+
for entity_type in self.api_client.paginate(
|
|
45
|
+
"ListCorpusEntityTypes", id=self.corpus_id
|
|
46
|
+
)
|
|
47
|
+
}
|
|
48
|
+
count = len(self.entity_types)
|
|
49
|
+
logger.info(
|
|
50
|
+
f"Loaded {count} entity {pluralize('type', count)} in corpus ({self.corpus_id})."
|
|
51
|
+
)
|
|
52
|
+
|
|
37
53
|
@unsupported_cache
|
|
54
|
+
def create_entity_type(self, name: str) -> None:
|
|
55
|
+
"""
|
|
56
|
+
Create an entity type on the given corpus.
|
|
57
|
+
|
|
58
|
+
:param name: Name of the entity type.
|
|
59
|
+
"""
|
|
60
|
+
assert name and isinstance(name, str), (
|
|
61
|
+
"name shouldn't be null and should be of type str"
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
entity_type = self.api_client.request(
|
|
66
|
+
"CreateEntityType",
|
|
67
|
+
body={
|
|
68
|
+
"name": name,
|
|
69
|
+
"corpus": self.corpus_id,
|
|
70
|
+
},
|
|
71
|
+
)
|
|
72
|
+
self.entity_types[name] = entity_type["id"]
|
|
73
|
+
logger.info(f"Created a new entity type with name `{name}`.")
|
|
74
|
+
except ErrorResponse as e:
|
|
75
|
+
# Only reload for 400 errors
|
|
76
|
+
if e.status_code != 400:
|
|
77
|
+
raise
|
|
78
|
+
|
|
79
|
+
# Reload and make sure we have the element type now
|
|
80
|
+
logger.warning(
|
|
81
|
+
f"Unable to create the entity type `{name}`. Refreshing corpus entity types cache."
|
|
82
|
+
)
|
|
83
|
+
self.list_corpus_entity_types()
|
|
84
|
+
assert name in self.entity_types, (
|
|
85
|
+
f"Missing entity type `{name}` even after refreshing."
|
|
86
|
+
)
|
|
87
|
+
|
|
38
88
|
def check_required_entity_types(
|
|
39
89
|
self, entity_types: list[str], create_missing: bool = True
|
|
40
|
-
):
|
|
41
|
-
"""
|
|
90
|
+
) -> None:
|
|
91
|
+
"""
|
|
92
|
+
Check that every entity type needed is available in the corpus.
|
|
42
93
|
Missing ones may be created automatically if needed.
|
|
43
94
|
|
|
44
95
|
:param entity_types: Entity type names to search.
|
|
45
96
|
:param create_missing: Whether the missing types should be created. Defaults to True.
|
|
46
|
-
:raises MissingEntityType: When an entity type is missing and cannot
|
|
97
|
+
:raises MissingEntityType: When an entity type is missing and cannot be created.
|
|
47
98
|
"""
|
|
48
|
-
|
|
99
|
+
assert entity_types and isinstance(entity_types, list), (
|
|
100
|
+
"entity_types shouldn't be null and should be of type list"
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
for index, entity_type in enumerate(entity_types):
|
|
104
|
+
assert isinstance(entity_type, str), (
|
|
105
|
+
f"Entity type at index {index} in entity_types: Should be of type str"
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
assert create_missing is not None and isinstance(create_missing, bool), (
|
|
109
|
+
"create_missing shouldn't be null and should be of type bool"
|
|
110
|
+
)
|
|
111
|
+
|
|
49
112
|
if not self.entity_types:
|
|
50
|
-
# Load entity_types of corpus
|
|
51
113
|
self.list_corpus_entity_types()
|
|
52
114
|
|
|
53
115
|
for entity_type in entity_types:
|
|
54
|
-
# Do nothing if type already exists
|
|
116
|
+
# Do nothing if the type already exists
|
|
55
117
|
if entity_type in self.entity_types:
|
|
56
118
|
continue
|
|
57
119
|
|
|
@@ -61,15 +123,8 @@ class EntityMixin:
|
|
|
61
123
|
f"Entity type `{entity_type}` was not in the corpus."
|
|
62
124
|
)
|
|
63
125
|
|
|
64
|
-
# Create type if non-existent
|
|
65
|
-
self.
|
|
66
|
-
"CreateEntityType",
|
|
67
|
-
body={
|
|
68
|
-
"name": entity_type,
|
|
69
|
-
"corpus": self.corpus_id,
|
|
70
|
-
},
|
|
71
|
-
)["id"]
|
|
72
|
-
logger.info(f"Created a new entity type with name `{entity_type}`.")
|
|
126
|
+
# Create the type if non-existent
|
|
127
|
+
self.create_entity_type(entity_type)
|
|
73
128
|
|
|
74
129
|
def create_entity(
|
|
75
130
|
self,
|
|
@@ -211,6 +266,7 @@ class EntityMixin:
|
|
|
211
266
|
logger.warning(
|
|
212
267
|
f"Couldn't save created transcription entity in local cache: {e}"
|
|
213
268
|
)
|
|
269
|
+
|
|
214
270
|
return transcription_ent
|
|
215
271
|
|
|
216
272
|
@unsupported_cache
|
|
@@ -387,18 +443,3 @@ class EntityMixin:
|
|
|
387
443
|
logger.info(
|
|
388
444
|
f"Loaded {count} {pluralize('entity', count)} in corpus ({self.corpus_id})"
|
|
389
445
|
)
|
|
390
|
-
|
|
391
|
-
def list_corpus_entity_types(self):
|
|
392
|
-
"""
|
|
393
|
-
Loads available entity types in corpus.
|
|
394
|
-
"""
|
|
395
|
-
self.entity_types = {
|
|
396
|
-
entity_type["name"]: entity_type["id"]
|
|
397
|
-
for entity_type in self.api_client.paginate(
|
|
398
|
-
"ListCorpusEntityTypes", id=self.corpus_id
|
|
399
|
-
)
|
|
400
|
-
}
|
|
401
|
-
count = len(self.entity_types)
|
|
402
|
-
logger.info(
|
|
403
|
-
f"Loaded {count} entity {pluralize('type', count)} in corpus ({self.corpus_id})."
|
|
404
|
-
)
|