arkindex-base-worker 0.4.0rc4__tar.gz → 0.4.0rc6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/PKG-INFO +9 -9
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_base_worker.egg-info/PKG-INFO +9 -9
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_base_worker.egg-info/SOURCES.txt +1 -0
- arkindex_base_worker-0.4.0rc6/arkindex_base_worker.egg-info/requires.txt +17 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/__init__.py +11 -3
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/entity.py +8 -19
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/process.py +29 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/pyproject.toml +9 -9
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_elements_worker/test_entity_create.py +42 -92
- arkindex_base_worker-0.4.0rc6/tests/test_elements_worker/test_process.py +89 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_elements_worker/test_worker.py +46 -0
- arkindex_base_worker-0.4.0rc4/arkindex_base_worker.egg-info/requires.txt +0 -17
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/LICENSE +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/README.md +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_base_worker.egg-info/dependency_links.txt +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_base_worker.egg-info/top_level.txt +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/__init__.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/cache.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/image.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/models.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/utils.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/base.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/classification.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/corpus.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/dataset.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/element.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/image.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/metadata.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/task.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/training.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/transcription.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/version.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/hooks/pre_gen_project.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/setup.cfg +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/__init__.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/conftest.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_base_worker.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_cache.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_dataset_worker.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_element.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_elements_worker/__init__.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_elements_worker/test_classification.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_elements_worker/test_cli.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_elements_worker/test_corpus.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_elements_worker/test_dataset.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_elements_worker/test_element.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_elements_worker/test_element_create_multiple.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_elements_worker/test_element_create_single.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_elements_worker/test_element_list_children.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_elements_worker/test_element_list_parents.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_elements_worker/test_entity_list_and_check.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_elements_worker/test_image.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_elements_worker/test_metadata.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_elements_worker/test_task.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_elements_worker/test_training.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_elements_worker/test_transcription_create.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_elements_worker/test_transcription_create_with_elements.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_elements_worker/test_transcription_list.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_elements_worker/test_version.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_image.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_merge.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_utils.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/worker-demo/tests/__init__.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/worker-demo/tests/conftest.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/worker-demo/tests/test_worker.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/worker-demo/worker_demo/__init__.py +0 -0
- {arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/worker-demo/worker_demo/worker.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: arkindex-base-worker
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.0rc6
|
|
4
4
|
Summary: Base Worker to easily build Arkindex ML workflows
|
|
5
5
|
Author-email: Teklia <contact@teklia.com>
|
|
6
6
|
Maintainer-email: Teklia <contact@teklia.com>
|
|
@@ -41,19 +41,19 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
41
41
|
Requires-Python: >=3.10
|
|
42
42
|
Description-Content-Type: text/markdown
|
|
43
43
|
License-File: LICENSE
|
|
44
|
-
Requires-Dist: humanize==4.
|
|
44
|
+
Requires-Dist: humanize==4.11.0
|
|
45
45
|
Requires-Dist: peewee~=3.17
|
|
46
|
-
Requires-Dist: Pillow==
|
|
47
|
-
Requires-Dist: python-gnupg==0.5.
|
|
46
|
+
Requires-Dist: Pillow==11.0.0
|
|
47
|
+
Requires-Dist: python-gnupg==0.5.3
|
|
48
48
|
Requires-Dist: shapely==2.0.6
|
|
49
49
|
Requires-Dist: teklia-toolbox==0.1.7b1
|
|
50
|
-
Requires-Dist: zstandard==0.
|
|
50
|
+
Requires-Dist: zstandard==0.23.0
|
|
51
51
|
Provides-Extra: docs
|
|
52
|
-
Requires-Dist: black==24.
|
|
53
|
-
Requires-Dist: mkdocs-material==9.5.
|
|
54
|
-
Requires-Dist: mkdocstrings-python==1.
|
|
52
|
+
Requires-Dist: black==24.10.0; extra == "docs"
|
|
53
|
+
Requires-Dist: mkdocs-material==9.5.48; extra == "docs"
|
|
54
|
+
Requires-Dist: mkdocstrings-python==1.12.2; extra == "docs"
|
|
55
55
|
Provides-Extra: tests
|
|
56
|
-
Requires-Dist: pytest==8.3.
|
|
56
|
+
Requires-Dist: pytest==8.3.4; extra == "tests"
|
|
57
57
|
Requires-Dist: pytest-mock==3.14.0; extra == "tests"
|
|
58
58
|
Requires-Dist: pytest-responses==0.5.1; extra == "tests"
|
|
59
59
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: arkindex-base-worker
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.0rc6
|
|
4
4
|
Summary: Base Worker to easily build Arkindex ML workflows
|
|
5
5
|
Author-email: Teklia <contact@teklia.com>
|
|
6
6
|
Maintainer-email: Teklia <contact@teklia.com>
|
|
@@ -41,19 +41,19 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
41
41
|
Requires-Python: >=3.10
|
|
42
42
|
Description-Content-Type: text/markdown
|
|
43
43
|
License-File: LICENSE
|
|
44
|
-
Requires-Dist: humanize==4.
|
|
44
|
+
Requires-Dist: humanize==4.11.0
|
|
45
45
|
Requires-Dist: peewee~=3.17
|
|
46
|
-
Requires-Dist: Pillow==
|
|
47
|
-
Requires-Dist: python-gnupg==0.5.
|
|
46
|
+
Requires-Dist: Pillow==11.0.0
|
|
47
|
+
Requires-Dist: python-gnupg==0.5.3
|
|
48
48
|
Requires-Dist: shapely==2.0.6
|
|
49
49
|
Requires-Dist: teklia-toolbox==0.1.7b1
|
|
50
|
-
Requires-Dist: zstandard==0.
|
|
50
|
+
Requires-Dist: zstandard==0.23.0
|
|
51
51
|
Provides-Extra: docs
|
|
52
|
-
Requires-Dist: black==24.
|
|
53
|
-
Requires-Dist: mkdocs-material==9.5.
|
|
54
|
-
Requires-Dist: mkdocstrings-python==1.
|
|
52
|
+
Requires-Dist: black==24.10.0; extra == "docs"
|
|
53
|
+
Requires-Dist: mkdocs-material==9.5.48; extra == "docs"
|
|
54
|
+
Requires-Dist: mkdocstrings-python==1.12.2; extra == "docs"
|
|
55
55
|
Provides-Extra: tests
|
|
56
|
-
Requires-Dist: pytest==8.3.
|
|
56
|
+
Requires-Dist: pytest==8.3.4; extra == "tests"
|
|
57
57
|
Requires-Dist: pytest-mock==3.14.0; extra == "tests"
|
|
58
58
|
Requires-Dist: pytest-responses==0.5.1; extra == "tests"
|
|
59
59
|
|
|
@@ -49,6 +49,7 @@ tests/test_elements_worker/test_entity_create.py
|
|
|
49
49
|
tests/test_elements_worker/test_entity_list_and_check.py
|
|
50
50
|
tests/test_elements_worker/test_image.py
|
|
51
51
|
tests/test_elements_worker/test_metadata.py
|
|
52
|
+
tests/test_elements_worker/test_process.py
|
|
52
53
|
tests/test_elements_worker/test_task.py
|
|
53
54
|
tests/test_elements_worker/test_training.py
|
|
54
55
|
tests/test_elements_worker/test_transcription_create.py
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
humanize==4.11.0
|
|
2
|
+
peewee~=3.17
|
|
3
|
+
Pillow==11.0.0
|
|
4
|
+
python-gnupg==0.5.3
|
|
5
|
+
shapely==2.0.6
|
|
6
|
+
teklia-toolbox==0.1.7b1
|
|
7
|
+
zstandard==0.23.0
|
|
8
|
+
|
|
9
|
+
[docs]
|
|
10
|
+
black==24.10.0
|
|
11
|
+
mkdocs-material==9.5.48
|
|
12
|
+
mkdocstrings-python==1.12.2
|
|
13
|
+
|
|
14
|
+
[tests]
|
|
15
|
+
pytest==8.3.4
|
|
16
|
+
pytest-mock==3.14.0
|
|
17
|
+
pytest-responses==0.5.1
|
{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/__init__.py
RENAMED
|
@@ -27,7 +27,7 @@ from arkindex_worker.worker.element import ElementMixin
|
|
|
27
27
|
from arkindex_worker.worker.entity import EntityMixin
|
|
28
28
|
from arkindex_worker.worker.image import ImageMixin
|
|
29
29
|
from arkindex_worker.worker.metadata import MetaDataMixin, MetaType # noqa: F401
|
|
30
|
-
from arkindex_worker.worker.process import ActivityState, ProcessMode
|
|
30
|
+
from arkindex_worker.worker.process import ActivityState, ProcessMixin, ProcessMode
|
|
31
31
|
from arkindex_worker.worker.task import TaskMixin
|
|
32
32
|
from arkindex_worker.worker.transcription import TranscriptionMixin
|
|
33
33
|
from arkindex_worker.worker.version import WorkerVersionMixin
|
|
@@ -44,6 +44,7 @@ class ElementsWorker(
|
|
|
44
44
|
EntityMixin,
|
|
45
45
|
MetaDataMixin,
|
|
46
46
|
ImageMixin,
|
|
47
|
+
ProcessMixin,
|
|
47
48
|
):
|
|
48
49
|
"""
|
|
49
50
|
Base class for ML workers that operate on Arkindex elements.
|
|
@@ -119,6 +120,9 @@ class ElementsWorker(
|
|
|
119
120
|
return list(
|
|
120
121
|
chain.from_iterable(map(self.list_set_elements, self.list_sets()))
|
|
121
122
|
)
|
|
123
|
+
elif self.process_mode == ProcessMode.Export:
|
|
124
|
+
# For export mode processes, use list_process_elements and return element IDs
|
|
125
|
+
return {item["id"] for item in self.list_process_elements()}
|
|
122
126
|
|
|
123
127
|
invalid_element_ids = list(filter(invalid_element_id, out))
|
|
124
128
|
assert (
|
|
@@ -133,8 +137,12 @@ class ElementsWorker(
|
|
|
133
137
|
Whether or not WorkerActivity support has been enabled on the DataImport
|
|
134
138
|
used to run this worker.
|
|
135
139
|
"""
|
|
136
|
-
if self.is_read_only or self.process_mode
|
|
137
|
-
|
|
140
|
+
if self.is_read_only or self.process_mode in [
|
|
141
|
+
ProcessMode.Dataset,
|
|
142
|
+
ProcessMode.Export,
|
|
143
|
+
]:
|
|
144
|
+
# Worker activities are also disabled when running an ElementsWorker in a Dataset process
|
|
145
|
+
# and when running export processes.
|
|
138
146
|
return False
|
|
139
147
|
assert (
|
|
140
148
|
self.process_information
|
{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/entity.py
RENAMED
|
@@ -16,9 +16,6 @@ from arkindex_worker.cache import (
|
|
|
16
16
|
)
|
|
17
17
|
from arkindex_worker.models import Element, Transcription
|
|
18
18
|
from arkindex_worker.utils import (
|
|
19
|
-
DEFAULT_BATCH_SIZE,
|
|
20
|
-
batch_publication,
|
|
21
|
-
make_batches,
|
|
22
19
|
pluralize,
|
|
23
20
|
)
|
|
24
21
|
|
|
@@ -219,12 +216,10 @@ class EntityMixin:
|
|
|
219
216
|
return transcription_ent
|
|
220
217
|
|
|
221
218
|
@unsupported_cache
|
|
222
|
-
@batch_publication
|
|
223
219
|
def create_transcription_entities(
|
|
224
220
|
self,
|
|
225
221
|
transcription: Transcription,
|
|
226
222
|
entities: list[Entity],
|
|
227
|
-
batch_size: int = DEFAULT_BATCH_SIZE,
|
|
228
223
|
) -> list[dict[str, str]]:
|
|
229
224
|
"""
|
|
230
225
|
Create multiple entities attached to a transcription in a single API request.
|
|
@@ -247,8 +242,6 @@ class EntityMixin:
|
|
|
247
242
|
confidence (float or None)
|
|
248
243
|
Optional confidence score, between 0.0 and 1.0.
|
|
249
244
|
|
|
250
|
-
:param batch_size: The size of each batch, which will be used to split the publication to avoid API errors.
|
|
251
|
-
|
|
252
245
|
:return: List of dicts, with each dict having a two keys, `transcription_entity_id` and `entity_id`, holding the UUID of each created object.
|
|
253
246
|
"""
|
|
254
247
|
assert transcription and isinstance(
|
|
@@ -300,18 +293,14 @@ class EntityMixin:
|
|
|
300
293
|
)
|
|
301
294
|
return
|
|
302
295
|
|
|
303
|
-
created_entities =
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
"
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
"entities": batch,
|
|
312
|
-
},
|
|
313
|
-
)["entities"]
|
|
314
|
-
]
|
|
296
|
+
created_entities = self.api_client.request(
|
|
297
|
+
"CreateTranscriptionEntities",
|
|
298
|
+
id=transcription.id,
|
|
299
|
+
body={
|
|
300
|
+
"worker_run_id": self.worker_run_id,
|
|
301
|
+
"entities": entities,
|
|
302
|
+
},
|
|
303
|
+
)["entities"]
|
|
315
304
|
|
|
316
305
|
return created_entities
|
|
317
306
|
|
{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/process.py
RENAMED
|
@@ -1,5 +1,11 @@
|
|
|
1
|
+
from collections.abc import Iterator
|
|
1
2
|
from enum import Enum
|
|
2
3
|
|
|
4
|
+
from arkindex_worker.cache import unsupported_cache
|
|
5
|
+
|
|
6
|
+
# Increases the number of elements returned per page by the API
|
|
7
|
+
PROCESS_ELEMENTS_PAGE_SIZE = 500
|
|
8
|
+
|
|
3
9
|
|
|
4
10
|
class ActivityState(Enum):
|
|
5
11
|
"""
|
|
@@ -61,3 +67,26 @@ class ProcessMode(Enum):
|
|
|
61
67
|
"""
|
|
62
68
|
Dataset processes.
|
|
63
69
|
"""
|
|
70
|
+
|
|
71
|
+
Export = "export"
|
|
72
|
+
"""
|
|
73
|
+
Export processes.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class ProcessMixin:
|
|
78
|
+
@unsupported_cache
|
|
79
|
+
def list_process_elements(self, with_image: bool = False) -> Iterator[dict]:
|
|
80
|
+
"""
|
|
81
|
+
List the elements of a process.
|
|
82
|
+
|
|
83
|
+
:param with_image: whether or not to include zone and image information in the elements response.
|
|
84
|
+
:returns: the process' elements.
|
|
85
|
+
"""
|
|
86
|
+
return self.api_client.paginate(
|
|
87
|
+
"ListProcessElements",
|
|
88
|
+
id=self.process_information["id"],
|
|
89
|
+
with_image=with_image,
|
|
90
|
+
allow_missing_data=True,
|
|
91
|
+
page_size=PROCESS_ELEMENTS_PAGE_SIZE,
|
|
92
|
+
)
|
|
@@ -4,17 +4,17 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "arkindex-base-worker"
|
|
7
|
-
version = "0.4.
|
|
7
|
+
version = "0.4.0rc6"
|
|
8
8
|
description = "Base Worker to easily build Arkindex ML workflows"
|
|
9
9
|
license = { file = "LICENSE" }
|
|
10
10
|
dependencies = [
|
|
11
|
-
"humanize==4.
|
|
11
|
+
"humanize==4.11.0",
|
|
12
12
|
"peewee~=3.17",
|
|
13
|
-
"Pillow==
|
|
14
|
-
"python-gnupg==0.5.
|
|
13
|
+
"Pillow==11.0.0",
|
|
14
|
+
"python-gnupg==0.5.3",
|
|
15
15
|
"shapely==2.0.6",
|
|
16
16
|
"teklia-toolbox==0.1.7b1",
|
|
17
|
-
"zstandard==0.
|
|
17
|
+
"zstandard==0.23.0",
|
|
18
18
|
]
|
|
19
19
|
authors = [
|
|
20
20
|
{ name = "Teklia", email = "contact@teklia.com" },
|
|
@@ -44,12 +44,12 @@ Authors = "https://teklia.com"
|
|
|
44
44
|
|
|
45
45
|
[project.optional-dependencies]
|
|
46
46
|
docs = [
|
|
47
|
-
"black==24.
|
|
48
|
-
"mkdocs-material==9.5.
|
|
49
|
-
"mkdocstrings-python==1.
|
|
47
|
+
"black==24.10.0",
|
|
48
|
+
"mkdocs-material==9.5.48",
|
|
49
|
+
"mkdocstrings-python==1.12.2",
|
|
50
50
|
]
|
|
51
51
|
tests = [
|
|
52
|
-
"pytest==8.3.
|
|
52
|
+
"pytest==8.3.4",
|
|
53
53
|
"pytest-mock==3.14.0",
|
|
54
54
|
"pytest-responses==0.5.1",
|
|
55
55
|
]
|
|
@@ -13,7 +13,6 @@ from arkindex_worker.cache import (
|
|
|
13
13
|
CachedTranscriptionEntity,
|
|
14
14
|
)
|
|
15
15
|
from arkindex_worker.models import Transcription
|
|
16
|
-
from arkindex_worker.utils import DEFAULT_BATCH_SIZE
|
|
17
16
|
from arkindex_worker.worker.transcription import TextOrientation
|
|
18
17
|
from tests import CORPUS_ID
|
|
19
18
|
|
|
@@ -836,89 +835,50 @@ def test_create_transcription_entities_wrong_entity(
|
|
|
836
835
|
)
|
|
837
836
|
|
|
838
837
|
|
|
839
|
-
|
|
840
|
-
def test_create_transcription_entities(batch_size, responses, mock_elements_worker):
|
|
838
|
+
def test_create_transcription_entities(responses, mock_elements_worker):
|
|
841
839
|
transcription = Transcription(id="transcription-id")
|
|
842
840
|
|
|
843
841
|
# Call to Transcription entities creation in bulk
|
|
844
|
-
|
|
845
|
-
responses.
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
853
|
-
"entities": [
|
|
854
|
-
{
|
|
855
|
-
"name": "Teklia",
|
|
856
|
-
"type_id": "22222222-2222-2222-2222-222222222222",
|
|
857
|
-
"offset": 0,
|
|
858
|
-
"length": 6,
|
|
859
|
-
"confidence": 1.0,
|
|
860
|
-
},
|
|
861
|
-
{
|
|
862
|
-
"name": "Team Rocket",
|
|
863
|
-
"type_id": "22222222-2222-2222-2222-222222222222",
|
|
864
|
-
"offset": 7,
|
|
865
|
-
"length": 11,
|
|
866
|
-
"confidence": 1.0,
|
|
867
|
-
},
|
|
868
|
-
],
|
|
869
|
-
}
|
|
870
|
-
)
|
|
871
|
-
],
|
|
872
|
-
json={
|
|
873
|
-
"entities": [
|
|
874
|
-
{
|
|
875
|
-
"transcription_entity_id": "transc-entity-id",
|
|
876
|
-
"entity_id": "entity-id1",
|
|
877
|
-
},
|
|
878
|
-
{
|
|
879
|
-
"transcription_entity_id": "transc-entity-id",
|
|
880
|
-
"entity_id": "entity-id2",
|
|
881
|
-
},
|
|
882
|
-
]
|
|
883
|
-
},
|
|
884
|
-
)
|
|
885
|
-
else:
|
|
886
|
-
for idx, (name, offset, length) in enumerate(
|
|
887
|
-
[
|
|
888
|
-
("Teklia", 0, 6),
|
|
889
|
-
("Team Rocket", 7, 11),
|
|
890
|
-
],
|
|
891
|
-
start=1,
|
|
892
|
-
):
|
|
893
|
-
responses.add(
|
|
894
|
-
responses.POST,
|
|
895
|
-
"http://testserver/api/v1/transcription/transcription-id/entities/bulk/",
|
|
896
|
-
status=201,
|
|
897
|
-
match=[
|
|
898
|
-
matchers.json_params_matcher(
|
|
899
|
-
{
|
|
900
|
-
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
901
|
-
"entities": [
|
|
902
|
-
{
|
|
903
|
-
"name": name,
|
|
904
|
-
"type_id": "22222222-2222-2222-2222-222222222222",
|
|
905
|
-
"offset": offset,
|
|
906
|
-
"length": length,
|
|
907
|
-
"confidence": 1.0,
|
|
908
|
-
}
|
|
909
|
-
],
|
|
910
|
-
}
|
|
911
|
-
)
|
|
912
|
-
],
|
|
913
|
-
json={
|
|
842
|
+
responses.add(
|
|
843
|
+
responses.POST,
|
|
844
|
+
"http://testserver/api/v1/transcription/transcription-id/entities/bulk/",
|
|
845
|
+
status=201,
|
|
846
|
+
match=[
|
|
847
|
+
matchers.json_params_matcher(
|
|
848
|
+
{
|
|
849
|
+
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
914
850
|
"entities": [
|
|
915
851
|
{
|
|
916
|
-
"
|
|
917
|
-
"
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
852
|
+
"name": "Teklia",
|
|
853
|
+
"type_id": "22222222-2222-2222-2222-222222222222",
|
|
854
|
+
"offset": 0,
|
|
855
|
+
"length": 6,
|
|
856
|
+
"confidence": 1.0,
|
|
857
|
+
},
|
|
858
|
+
{
|
|
859
|
+
"name": "Team Rocket",
|
|
860
|
+
"type_id": "22222222-2222-2222-2222-222222222222",
|
|
861
|
+
"offset": 7,
|
|
862
|
+
"length": 11,
|
|
863
|
+
"confidence": 1.0,
|
|
864
|
+
},
|
|
865
|
+
],
|
|
866
|
+
}
|
|
921
867
|
)
|
|
868
|
+
],
|
|
869
|
+
json={
|
|
870
|
+
"entities": [
|
|
871
|
+
{
|
|
872
|
+
"transcription_entity_id": "transc-entity-id",
|
|
873
|
+
"entity_id": "entity-id1",
|
|
874
|
+
},
|
|
875
|
+
{
|
|
876
|
+
"transcription_entity_id": "transc-entity-id",
|
|
877
|
+
"entity_id": "entity-id2",
|
|
878
|
+
},
|
|
879
|
+
]
|
|
880
|
+
},
|
|
881
|
+
)
|
|
922
882
|
|
|
923
883
|
# Store entity type/slug correspondence on the worker
|
|
924
884
|
mock_elements_worker.entity_types = {
|
|
@@ -942,26 +902,16 @@ def test_create_transcription_entities(batch_size, responses, mock_elements_work
|
|
|
942
902
|
"confidence": 1.0,
|
|
943
903
|
},
|
|
944
904
|
],
|
|
945
|
-
batch_size=batch_size,
|
|
946
905
|
)
|
|
947
906
|
|
|
948
907
|
assert len(created_objects) == 2
|
|
949
908
|
|
|
950
|
-
|
|
909
|
+
assert len(responses.calls) == len(BASE_API_CALLS) + 1
|
|
910
|
+
assert [
|
|
911
|
+
(call.request.method, call.request.url) for call in responses.calls
|
|
912
|
+
] == BASE_API_CALLS + [
|
|
951
913
|
(
|
|
952
914
|
"POST",
|
|
953
915
|
"http://testserver/api/v1/transcription/transcription-id/entities/bulk/",
|
|
954
916
|
)
|
|
955
917
|
]
|
|
956
|
-
if batch_size != DEFAULT_BATCH_SIZE:
|
|
957
|
-
bulk_api_calls.append(
|
|
958
|
-
(
|
|
959
|
-
"POST",
|
|
960
|
-
"http://testserver/api/v1/transcription/transcription-id/entities/bulk/",
|
|
961
|
-
)
|
|
962
|
-
)
|
|
963
|
-
|
|
964
|
-
assert len(responses.calls) == len(BASE_API_CALLS) + len(bulk_api_calls)
|
|
965
|
-
assert [
|
|
966
|
-
(call.request.method, call.request.url) for call in responses.calls
|
|
967
|
-
] == BASE_API_CALLS + bulk_api_calls
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from tests import PROCESS_ID
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@pytest.mark.parametrize(
|
|
7
|
+
("with_image", "elements"),
|
|
8
|
+
[
|
|
9
|
+
(
|
|
10
|
+
False,
|
|
11
|
+
[
|
|
12
|
+
{
|
|
13
|
+
"id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
|
|
14
|
+
"type_id": "baaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
|
|
15
|
+
"name": "element 1",
|
|
16
|
+
"confidence": 1,
|
|
17
|
+
"image_id": None,
|
|
18
|
+
"image_width": None,
|
|
19
|
+
"image_height": None,
|
|
20
|
+
"image_url": None,
|
|
21
|
+
"polygon": None,
|
|
22
|
+
"rotation_angle": 0,
|
|
23
|
+
"mirrored": False,
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
"id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa0",
|
|
27
|
+
"type_id": "baaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
|
|
28
|
+
"name": "element 2",
|
|
29
|
+
"confidence": 1,
|
|
30
|
+
"image_id": None,
|
|
31
|
+
"image_width": None,
|
|
32
|
+
"image_height": None,
|
|
33
|
+
"image_url": None,
|
|
34
|
+
"polygon": None,
|
|
35
|
+
"rotation_angle": 0,
|
|
36
|
+
"mirrored": False,
|
|
37
|
+
},
|
|
38
|
+
],
|
|
39
|
+
),
|
|
40
|
+
(
|
|
41
|
+
True,
|
|
42
|
+
[
|
|
43
|
+
{
|
|
44
|
+
"id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
|
|
45
|
+
"type_id": "baaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
|
|
46
|
+
"name": "element 1",
|
|
47
|
+
"confidence": 1,
|
|
48
|
+
"image_id": "aaa2aaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
|
|
49
|
+
"image_width": 76,
|
|
50
|
+
"image_height": 138,
|
|
51
|
+
"image_url": "http://somewhere.com/iiif/image.jpeg",
|
|
52
|
+
"polygon": [[0, 0], [0, 40], [20, 40], [20, 0]],
|
|
53
|
+
"rotation_angle": 0,
|
|
54
|
+
"mirrored": False,
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
"id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa0",
|
|
58
|
+
"type_id": "baaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
|
|
59
|
+
"name": "element 2",
|
|
60
|
+
"confidence": 1,
|
|
61
|
+
"image_id": "aaa2aaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
|
|
62
|
+
"image_width": 138,
|
|
63
|
+
"image_height": 76,
|
|
64
|
+
"image_url": "http://somewhere.com/iiif/image.jpeg",
|
|
65
|
+
"polygon": [[0, 0], [0, 40], [20, 40], [20, 0]],
|
|
66
|
+
"rotation_angle": 0,
|
|
67
|
+
"mirrored": False,
|
|
68
|
+
},
|
|
69
|
+
],
|
|
70
|
+
),
|
|
71
|
+
],
|
|
72
|
+
)
|
|
73
|
+
def test_list_process_elements_with_image(
|
|
74
|
+
responses, mock_elements_worker, with_image, elements
|
|
75
|
+
):
|
|
76
|
+
responses.add(
|
|
77
|
+
responses.GET,
|
|
78
|
+
f"http://testserver/api/v1/process/{PROCESS_ID}/elements/?page_size=500&with_count=true&with_image={with_image}",
|
|
79
|
+
status=200,
|
|
80
|
+
json={
|
|
81
|
+
"count": 2,
|
|
82
|
+
"next": None,
|
|
83
|
+
"results": elements,
|
|
84
|
+
},
|
|
85
|
+
)
|
|
86
|
+
assert (
|
|
87
|
+
list(mock_elements_worker.list_process_elements(with_image=with_image))
|
|
88
|
+
== elements
|
|
89
|
+
)
|
|
@@ -16,6 +16,7 @@ from arkindex_worker.models import Element
|
|
|
16
16
|
from arkindex_worker.worker import ActivityState, ElementsWorker
|
|
17
17
|
from arkindex_worker.worker.dataset import DatasetState
|
|
18
18
|
from arkindex_worker.worker.process import ProcessMode
|
|
19
|
+
from tests import PROCESS_ID
|
|
19
20
|
|
|
20
21
|
from . import BASE_API_CALLS
|
|
21
22
|
|
|
@@ -523,6 +524,51 @@ def test_get_elements_both_args_error(mocker, mock_elements_worker, tmp_path):
|
|
|
523
524
|
worker.get_elements()
|
|
524
525
|
|
|
525
526
|
|
|
527
|
+
def test_get_elements_export_process(mock_elements_worker, responses):
|
|
528
|
+
responses.add(
|
|
529
|
+
responses.GET,
|
|
530
|
+
f"http://testserver/api/v1/process/{PROCESS_ID}/elements/?page_size=500&with_count=true&with_image=False",
|
|
531
|
+
status=200,
|
|
532
|
+
json={
|
|
533
|
+
"count": 2,
|
|
534
|
+
"next": None,
|
|
535
|
+
"results": [
|
|
536
|
+
{
|
|
537
|
+
"id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
|
|
538
|
+
"type_id": "baaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
|
|
539
|
+
"name": "element 1",
|
|
540
|
+
"confidence": 1,
|
|
541
|
+
"image_id": None,
|
|
542
|
+
"image_width": None,
|
|
543
|
+
"image_height": None,
|
|
544
|
+
"image_url": None,
|
|
545
|
+
"polygon": None,
|
|
546
|
+
"rotation_angle": 0,
|
|
547
|
+
"mirrored": False,
|
|
548
|
+
},
|
|
549
|
+
{
|
|
550
|
+
"id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa0",
|
|
551
|
+
"type_id": "baaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
|
|
552
|
+
"name": "element 2",
|
|
553
|
+
"confidence": 1,
|
|
554
|
+
"image_id": None,
|
|
555
|
+
"image_width": None,
|
|
556
|
+
"image_height": None,
|
|
557
|
+
"image_url": None,
|
|
558
|
+
"polygon": None,
|
|
559
|
+
"rotation_angle": 0,
|
|
560
|
+
"mirrored": False,
|
|
561
|
+
},
|
|
562
|
+
],
|
|
563
|
+
},
|
|
564
|
+
)
|
|
565
|
+
mock_elements_worker.process_information["mode"] = "export"
|
|
566
|
+
assert set(mock_elements_worker.get_elements()) == {
|
|
567
|
+
"aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
|
|
568
|
+
"aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa0",
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
|
|
526
572
|
@pytest.mark.usefixtures("_mock_worker_run_api")
|
|
527
573
|
def test_activities_disabled(responses, monkeypatch):
|
|
528
574
|
"""Test worker process elements without updating activities when they are disabled for the process"""
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
humanize==4.10.0
|
|
2
|
-
peewee~=3.17
|
|
3
|
-
Pillow==10.4.0
|
|
4
|
-
python-gnupg==0.5.2
|
|
5
|
-
shapely==2.0.6
|
|
6
|
-
teklia-toolbox==0.1.7b1
|
|
7
|
-
zstandard==0.22.0
|
|
8
|
-
|
|
9
|
-
[docs]
|
|
10
|
-
black==24.4.2
|
|
11
|
-
mkdocs-material==9.5.33
|
|
12
|
-
mkdocstrings-python==1.11.1
|
|
13
|
-
|
|
14
|
-
[tests]
|
|
15
|
-
pytest==8.3.2
|
|
16
|
-
pytest-mock==3.14.0
|
|
17
|
-
pytest-responses==0.5.1
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/base.py
RENAMED
|
File without changes
|
|
File without changes
|
{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/corpus.py
RENAMED
|
File without changes
|
{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/dataset.py
RENAMED
|
File without changes
|
{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/element.py
RENAMED
|
File without changes
|
{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/image.py
RENAMED
|
File without changes
|
{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/metadata.py
RENAMED
|
File without changes
|
{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/task.py
RENAMED
|
File without changes
|
{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/training.py
RENAMED
|
File without changes
|
|
File without changes
|
{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/arkindex_worker/worker/version.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/tests/test_dataset_worker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/worker-demo/tests/__init__.py
RENAMED
|
File without changes
|
{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/worker-demo/tests/conftest.py
RENAMED
|
File without changes
|
{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/worker-demo/tests/test_worker.py
RENAMED
|
File without changes
|
{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/worker-demo/worker_demo/__init__.py
RENAMED
|
File without changes
|
{arkindex_base_worker-0.4.0rc4 → arkindex_base_worker-0.4.0rc6}/worker-demo/worker_demo/worker.py
RENAMED
|
File without changes
|