arkindex-base-worker 0.3.6rc5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex_base_worker-0.3.6rc5.dist-info → arkindex_base_worker-0.3.7.dist-info}/METADATA +14 -13
- arkindex_base_worker-0.3.7.dist-info/RECORD +47 -0
- {arkindex_base_worker-0.3.6rc5.dist-info → arkindex_base_worker-0.3.7.dist-info}/WHEEL +1 -1
- {arkindex_base_worker-0.3.6rc5.dist-info → arkindex_base_worker-0.3.7.dist-info}/top_level.txt +2 -0
- arkindex_worker/cache.py +14 -0
- arkindex_worker/image.py +29 -19
- arkindex_worker/models.py +14 -2
- arkindex_worker/utils.py +17 -3
- arkindex_worker/worker/__init__.py +122 -125
- arkindex_worker/worker/base.py +24 -24
- arkindex_worker/worker/classification.py +18 -25
- arkindex_worker/worker/dataset.py +24 -18
- arkindex_worker/worker/element.py +45 -6
- arkindex_worker/worker/entity.py +35 -4
- arkindex_worker/worker/metadata.py +21 -11
- arkindex_worker/worker/training.py +13 -0
- arkindex_worker/worker/transcription.py +45 -5
- arkindex_worker/worker/version.py +22 -0
- hooks/pre_gen_project.py +3 -0
- tests/conftest.py +14 -6
- tests/test_base_worker.py +0 -6
- tests/test_dataset_worker.py +291 -409
- tests/test_elements_worker/test_classifications.py +365 -539
- tests/test_elements_worker/test_cli.py +1 -1
- tests/test_elements_worker/test_dataset.py +97 -116
- tests/test_elements_worker/test_elements.py +227 -61
- tests/test_elements_worker/test_entities.py +22 -2
- tests/test_elements_worker/test_metadata.py +53 -27
- tests/test_elements_worker/test_training.py +35 -0
- tests/test_elements_worker/test_transcriptions.py +149 -16
- tests/test_elements_worker/test_worker.py +19 -6
- tests/test_image.py +37 -0
- tests/test_utils.py +23 -1
- worker-demo/tests/__init__.py +0 -0
- worker-demo/tests/conftest.py +32 -0
- worker-demo/tests/test_worker.py +12 -0
- worker-demo/worker_demo/__init__.py +6 -0
- worker-demo/worker_demo/worker.py +19 -0
- arkindex_base_worker-0.3.6rc5.dist-info/RECORD +0 -41
- {arkindex_base_worker-0.3.6rc5.dist-info → arkindex_base_worker-0.3.7.dist-info}/LICENSE +0 -0
arkindex_worker/worker/base.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
The base class for all Arkindex workers.
|
|
3
3
|
"""
|
|
4
|
+
|
|
4
5
|
import argparse
|
|
5
6
|
import json
|
|
6
7
|
import logging
|
|
@@ -20,7 +21,6 @@ from tenacity import (
|
|
|
20
21
|
wait_exponential,
|
|
21
22
|
)
|
|
22
23
|
|
|
23
|
-
from arkindex import ArkindexClient, options_from_env
|
|
24
24
|
from arkindex_worker import logger
|
|
25
25
|
from arkindex_worker.cache import (
|
|
26
26
|
check_version,
|
|
@@ -30,18 +30,7 @@ from arkindex_worker.cache import (
|
|
|
30
30
|
merge_parents_cache,
|
|
31
31
|
)
|
|
32
32
|
from arkindex_worker.utils import close_delete_file, extract_tar_zst_archive
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def _is_500_error(exc: Exception) -> bool:
|
|
36
|
-
"""
|
|
37
|
-
Check if an Arkindex API error has a HTTP 5xx error code.
|
|
38
|
-
Used to retry most API calls in [BaseWorker][arkindex_worker.worker.base.BaseWorker].
|
|
39
|
-
:param exc: Exception to check
|
|
40
|
-
"""
|
|
41
|
-
if not isinstance(exc, ErrorResponse):
|
|
42
|
-
return False
|
|
43
|
-
|
|
44
|
-
return 500 <= exc.status_code < 600
|
|
33
|
+
from teklia_toolbox.requests import _get_arkindex_client, _is_500_error
|
|
45
34
|
|
|
46
35
|
|
|
47
36
|
class ExtrasDirNotFoundError(Exception):
|
|
@@ -72,7 +61,7 @@ class BaseWorker:
|
|
|
72
61
|
self.parser.add_argument(
|
|
73
62
|
"-c",
|
|
74
63
|
"--config",
|
|
75
|
-
help="Alternative configuration file when running without a Worker
|
|
64
|
+
help="Alternative configuration file when running without a Worker Run ID",
|
|
76
65
|
type=open,
|
|
77
66
|
)
|
|
78
67
|
self.parser.add_argument(
|
|
@@ -94,7 +83,7 @@ class BaseWorker:
|
|
|
94
83
|
"--dev",
|
|
95
84
|
help=(
|
|
96
85
|
"Run worker in developer mode. "
|
|
97
|
-
"Worker will be in read-only state even if a
|
|
86
|
+
"Worker will be in read-only state even if a worker run is supplied. "
|
|
98
87
|
),
|
|
99
88
|
action="store_true",
|
|
100
89
|
default=False,
|
|
@@ -148,6 +137,13 @@ class BaseWorker:
|
|
|
148
137
|
# there is at least one available sqlite database either given or in the parent tasks
|
|
149
138
|
self.use_cache = False
|
|
150
139
|
|
|
140
|
+
# model_version_id will be updated in configure() using the worker_run's model version
|
|
141
|
+
# or in configure_for_developers() from the environment
|
|
142
|
+
self.model_version_id = None
|
|
143
|
+
# model_details will be updated in configure() using the worker_run's model version
|
|
144
|
+
# or in configure_for_developers() from the environment
|
|
145
|
+
self.model_details = {}
|
|
146
|
+
|
|
151
147
|
# task_parents will be updated in configure_cache() if the cache is supported,
|
|
152
148
|
# if the task ID is set and if no database is passed as argument
|
|
153
149
|
self.task_parents = []
|
|
@@ -176,12 +172,20 @@ class BaseWorker:
|
|
|
176
172
|
"""
|
|
177
173
|
return self.args.dev or self.worker_run_id is None
|
|
178
174
|
|
|
175
|
+
@property
|
|
176
|
+
def worker_version_id(self):
|
|
177
|
+
"""Deprecated property previously used to retrieve the current WorkerVersion ID.
|
|
178
|
+
|
|
179
|
+
:raises DeprecationWarning: Whenever `worker_version_id` is used.
|
|
180
|
+
"""
|
|
181
|
+
raise DeprecationWarning("`worker_version_id` usage is deprecated")
|
|
182
|
+
|
|
179
183
|
def setup_api_client(self):
|
|
180
184
|
"""
|
|
181
185
|
Create an ArkindexClient to make API requests towards Arkindex instances.
|
|
182
186
|
"""
|
|
183
187
|
# Build Arkindex API client from environment variables
|
|
184
|
-
self.api_client =
|
|
188
|
+
self.api_client = _get_arkindex_client()
|
|
185
189
|
logger.debug(f"Setup Arkindex API client on {self.api_client.document.url}")
|
|
186
190
|
|
|
187
191
|
def configure_for_developers(self):
|
|
@@ -243,25 +247,21 @@ class BaseWorker:
|
|
|
243
247
|
|
|
244
248
|
# Load worker version information
|
|
245
249
|
worker_version = worker_run["worker_version"]
|
|
246
|
-
|
|
247
|
-
# Store worker version id
|
|
248
|
-
self.worker_version_id = worker_version["id"]
|
|
249
|
-
|
|
250
250
|
self.worker_details = worker_version["worker"]
|
|
251
251
|
|
|
252
252
|
logger.info(f"Loaded {worker_run['summary']} from API")
|
|
253
253
|
|
|
254
254
|
# Load model version configuration when available
|
|
255
255
|
model_version = worker_run.get("model_version")
|
|
256
|
-
if model_version
|
|
256
|
+
if model_version:
|
|
257
257
|
logger.info("Loaded model version configuration from WorkerRun")
|
|
258
|
-
self.model_configuration.update(model_version
|
|
258
|
+
self.model_configuration.update(model_version["configuration"])
|
|
259
259
|
|
|
260
260
|
# Set model_version ID as worker attribute
|
|
261
|
-
self.model_version_id = model_version
|
|
261
|
+
self.model_version_id = model_version["id"]
|
|
262
262
|
|
|
263
263
|
# Set model details as worker attribute
|
|
264
|
-
self.model_details = model_version
|
|
264
|
+
self.model_details = model_version["model"]
|
|
265
265
|
|
|
266
266
|
# Retrieve initial configuration from API
|
|
267
267
|
self.config = worker_version["configuration"].get("configuration", {})
|
|
@@ -2,8 +2,6 @@
|
|
|
2
2
|
ElementsWorker methods for classifications and ML classes.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
from uuid import UUID
|
|
6
|
-
|
|
7
5
|
from apistar.exceptions import ErrorResponse
|
|
8
6
|
from peewee import IntegrityError
|
|
9
7
|
|
|
@@ -154,13 +152,6 @@ class ClassificationMixin:
|
|
|
154
152
|
# Detect already existing classification
|
|
155
153
|
if e.status_code == 400 and "non_field_errors" in e.content:
|
|
156
154
|
if (
|
|
157
|
-
"The fields element, worker_version, ml_class must make a unique set."
|
|
158
|
-
in e.content["non_field_errors"]
|
|
159
|
-
):
|
|
160
|
-
logger.warning(
|
|
161
|
-
f"This worker version has already set {ml_class} on element {element.id}"
|
|
162
|
-
)
|
|
163
|
-
elif (
|
|
164
155
|
"The fields element, worker_run, ml_class must make a unique set."
|
|
165
156
|
in e.content["non_field_errors"]
|
|
166
157
|
):
|
|
@@ -185,10 +176,14 @@ class ClassificationMixin:
|
|
|
185
176
|
Create multiple classifications at once on the given element through the API.
|
|
186
177
|
|
|
187
178
|
:param element: The element to create classifications on.
|
|
188
|
-
:param classifications:
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
179
|
+
:param classifications: A list of dicts representing a classification each, with the following keys:
|
|
180
|
+
|
|
181
|
+
ml_class (str)
|
|
182
|
+
Required. Name of the MLClass to use.
|
|
183
|
+
confidence (float)
|
|
184
|
+
Required. Confidence score for the classification. Must be between 0 and 1.
|
|
185
|
+
high_confidence (bool)
|
|
186
|
+
Optional. Whether or not the classification is of high confidence.
|
|
192
187
|
|
|
193
188
|
:returns: List of created classifications, as returned in the ``classifications`` field by
|
|
194
189
|
the ``CreateClassifications`` API endpoint.
|
|
@@ -201,18 +196,10 @@ class ClassificationMixin:
|
|
|
201
196
|
), "classifications shouldn't be null and should be of type list"
|
|
202
197
|
|
|
203
198
|
for index, classification in enumerate(classifications):
|
|
204
|
-
|
|
199
|
+
ml_class = classification.get("ml_class")
|
|
205
200
|
assert (
|
|
206
|
-
|
|
207
|
-
), f"Classification at index {index} in classifications:
|
|
208
|
-
|
|
209
|
-
# Make sure it's a valid UUID
|
|
210
|
-
try:
|
|
211
|
-
UUID(ml_class_id)
|
|
212
|
-
except ValueError as e:
|
|
213
|
-
raise ValueError(
|
|
214
|
-
f"Classification at index {index} in classifications: ml_class_id is not a valid uuid."
|
|
215
|
-
) from e
|
|
201
|
+
ml_class and isinstance(ml_class, str)
|
|
202
|
+
), f"Classification at index {index} in classifications: ml_class shouldn't be null and should be of type str"
|
|
216
203
|
|
|
217
204
|
confidence = classification.get("confidence")
|
|
218
205
|
assert (
|
|
@@ -238,7 +225,13 @@ class ClassificationMixin:
|
|
|
238
225
|
body={
|
|
239
226
|
"parent": str(element.id),
|
|
240
227
|
"worker_run_id": self.worker_run_id,
|
|
241
|
-
"classifications":
|
|
228
|
+
"classifications": [
|
|
229
|
+
{
|
|
230
|
+
**classification,
|
|
231
|
+
"ml_class": self.get_ml_class_id(classification["ml_class"]),
|
|
232
|
+
}
|
|
233
|
+
for classification in classifications
|
|
234
|
+
],
|
|
242
235
|
},
|
|
243
236
|
)["classifications"]
|
|
244
237
|
|
|
@@ -6,7 +6,8 @@ from collections.abc import Iterator
|
|
|
6
6
|
from enum import Enum
|
|
7
7
|
|
|
8
8
|
from arkindex_worker import logger
|
|
9
|
-
from arkindex_worker.
|
|
9
|
+
from arkindex_worker.cache import unsupported_cache
|
|
10
|
+
from arkindex_worker.models import Dataset, Element, Set
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
class DatasetState(Enum):
|
|
@@ -36,38 +37,43 @@ class DatasetState(Enum):
|
|
|
36
37
|
|
|
37
38
|
|
|
38
39
|
class DatasetMixin:
|
|
39
|
-
def
|
|
40
|
+
def list_process_sets(self) -> Iterator[Set]:
|
|
40
41
|
"""
|
|
41
|
-
List
|
|
42
|
+
List dataset sets associated to the worker's process. This helper is not available in developer mode.
|
|
42
43
|
|
|
43
|
-
:returns: An iterator of ``
|
|
44
|
+
:returns: An iterator of ``Set`` objects built from the ``ListProcessSets`` API endpoint.
|
|
44
45
|
"""
|
|
45
46
|
assert not self.is_read_only, "This helper is not available in read-only mode."
|
|
46
47
|
|
|
47
48
|
results = self.api_client.paginate(
|
|
48
|
-
"
|
|
49
|
+
"ListProcessSets", id=self.process_information["id"]
|
|
49
50
|
)
|
|
50
51
|
|
|
51
|
-
return map(
|
|
52
|
+
return map(
|
|
53
|
+
lambda result: Set(
|
|
54
|
+
name=result["set_name"], dataset=Dataset(**result["dataset"])
|
|
55
|
+
),
|
|
56
|
+
results,
|
|
57
|
+
)
|
|
52
58
|
|
|
53
|
-
def
|
|
59
|
+
def list_set_elements(self, dataset_set: Set) -> Iterator[Element]:
|
|
54
60
|
"""
|
|
55
|
-
List elements in a dataset.
|
|
61
|
+
List elements in a dataset set.
|
|
56
62
|
|
|
57
|
-
:param
|
|
58
|
-
:returns: An iterator of
|
|
63
|
+
:param dataset_set: Set to find elements in.
|
|
64
|
+
:returns: An iterator of Element built from the ``ListDatasetElements`` API endpoint.
|
|
59
65
|
"""
|
|
60
|
-
assert
|
|
61
|
-
|
|
62
|
-
), "
|
|
66
|
+
assert dataset_set and isinstance(
|
|
67
|
+
dataset_set, Set
|
|
68
|
+
), "dataset_set shouldn't be null and should be a Set"
|
|
63
69
|
|
|
64
|
-
results = self.api_client.paginate(
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
return (result["set"], Element(**result["element"]))
|
|
70
|
+
results = self.api_client.paginate(
|
|
71
|
+
"ListDatasetElements", id=dataset_set.dataset.id, set=dataset_set.name
|
|
72
|
+
)
|
|
68
73
|
|
|
69
|
-
return map(
|
|
74
|
+
return map(lambda result: Element(**result["element"]), results)
|
|
70
75
|
|
|
76
|
+
@unsupported_cache
|
|
71
77
|
def update_dataset_state(self, dataset: Dataset, state: DatasetState) -> Dataset:
|
|
72
78
|
"""
|
|
73
79
|
Partially updates a dataset state through the API.
|
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
"""
|
|
2
2
|
ElementsWorker methods for elements and element types.
|
|
3
3
|
"""
|
|
4
|
+
|
|
4
5
|
from collections.abc import Iterable
|
|
5
6
|
from typing import NamedTuple
|
|
6
7
|
from uuid import UUID
|
|
8
|
+
from warnings import warn
|
|
7
9
|
|
|
8
10
|
from peewee import IntegrityError
|
|
9
11
|
|
|
10
12
|
from arkindex_worker import logger
|
|
11
|
-
from arkindex_worker.cache import CachedElement, CachedImage
|
|
13
|
+
from arkindex_worker.cache import CachedElement, CachedImage, unsupported_cache
|
|
12
14
|
from arkindex_worker.models import Element
|
|
13
15
|
|
|
14
16
|
|
|
@@ -29,6 +31,7 @@ class MissingTypeError(Exception):
|
|
|
29
31
|
|
|
30
32
|
|
|
31
33
|
class ElementMixin:
|
|
34
|
+
@unsupported_cache
|
|
32
35
|
def create_required_types(self, element_types: list[ElementType]):
|
|
33
36
|
"""Creates given element types in the corpus.
|
|
34
37
|
|
|
@@ -81,6 +84,7 @@ class ElementMixin:
|
|
|
81
84
|
|
|
82
85
|
return True
|
|
83
86
|
|
|
87
|
+
@unsupported_cache
|
|
84
88
|
def create_sub_element(
|
|
85
89
|
self,
|
|
86
90
|
element: Element,
|
|
@@ -283,6 +287,7 @@ class ElementMixin:
|
|
|
283
287
|
|
|
284
288
|
return created_ids
|
|
285
289
|
|
|
290
|
+
@unsupported_cache
|
|
286
291
|
def create_element_parent(
|
|
287
292
|
self,
|
|
288
293
|
parent: Element,
|
|
@@ -422,6 +427,13 @@ class ElementMixin:
|
|
|
422
427
|
"""
|
|
423
428
|
List children of an element.
|
|
424
429
|
|
|
430
|
+
Warns:
|
|
431
|
+
----
|
|
432
|
+
The following parameters are **deprecated**:
|
|
433
|
+
|
|
434
|
+
- `transcription_worker_version` in favor of `transcription_worker_run`
|
|
435
|
+
- `worker_version` in favor of `worker_run`
|
|
436
|
+
|
|
425
437
|
:param element: Parent element to find children of.
|
|
426
438
|
:param folder: Restrict to or exclude elements with folder types.
|
|
427
439
|
This parameter is not supported when caching is enabled.
|
|
@@ -429,9 +441,9 @@ class ElementMixin:
|
|
|
429
441
|
This parameter is not supported when caching is enabled.
|
|
430
442
|
:param recursive: Look for elements recursively (grand-children, etc.)
|
|
431
443
|
This parameter is not supported when caching is enabled.
|
|
432
|
-
:param transcription_worker_version: Restrict to elements that have a transcription created by a worker version with this UUID.
|
|
444
|
+
:param transcription_worker_version: **Deprecated** Restrict to elements that have a transcription created by a worker version with this UUID. Set to False to look for elements that have a manual transcription.
|
|
433
445
|
This parameter is not supported when caching is enabled.
|
|
434
|
-
:param transcription_worker_run: Restrict to elements that have a transcription created by a worker run with this UUID.
|
|
446
|
+
:param transcription_worker_run: Restrict to elements that have a transcription created by a worker run with this UUID. Set to False to look for elements that have a manual transcription.
|
|
435
447
|
This parameter is not supported when caching is enabled.
|
|
436
448
|
:param type: Restrict to elements with a specific type slug
|
|
437
449
|
This parameter is not supported when caching is enabled.
|
|
@@ -447,7 +459,7 @@ class ElementMixin:
|
|
|
447
459
|
:param with_zone: Include the ``zone`` attribute in the response,
|
|
448
460
|
holding the element's image and polygon.
|
|
449
461
|
This parameter is not supported when caching is enabled.
|
|
450
|
-
:param worker_version: Restrict to elements created by a worker version with this UUID.
|
|
462
|
+
:param worker_version: **Deprecated** Restrict to elements created by a worker version with this UUID.
|
|
451
463
|
:param worker_run: Restrict to elements created by a worker run with this UUID.
|
|
452
464
|
:return: An iterable of dicts from the ``ListElementChildren`` API endpoint,
|
|
453
465
|
or an iterable of [CachedElement][arkindex_worker.cache.CachedElement] when caching is enabled.
|
|
@@ -466,6 +478,11 @@ class ElementMixin:
|
|
|
466
478
|
assert isinstance(recursive, bool), "recursive should be of type bool"
|
|
467
479
|
query_params["recursive"] = recursive
|
|
468
480
|
if transcription_worker_version is not None:
|
|
481
|
+
warn(
|
|
482
|
+
"`transcription_worker_version` usage is deprecated. Consider using `transcription_worker_run` instead.",
|
|
483
|
+
DeprecationWarning,
|
|
484
|
+
stacklevel=1,
|
|
485
|
+
)
|
|
469
486
|
assert isinstance(
|
|
470
487
|
transcription_worker_version, str | bool
|
|
471
488
|
), "transcription_worker_version should be of type str or bool"
|
|
@@ -506,6 +523,11 @@ class ElementMixin:
|
|
|
506
523
|
assert isinstance(with_zone, bool), "with_zone should be of type bool"
|
|
507
524
|
query_params["with_zone"] = with_zone
|
|
508
525
|
if worker_version is not None:
|
|
526
|
+
warn(
|
|
527
|
+
"`worker_version` usage is deprecated. Consider using `worker_run` instead.",
|
|
528
|
+
DeprecationWarning,
|
|
529
|
+
stacklevel=1,
|
|
530
|
+
)
|
|
509
531
|
assert isinstance(
|
|
510
532
|
worker_version, str | bool
|
|
511
533
|
), "worker_version should be of type str or bool"
|
|
@@ -584,6 +606,13 @@ class ElementMixin:
|
|
|
584
606
|
"""
|
|
585
607
|
List parents of an element.
|
|
586
608
|
|
|
609
|
+
Warns:
|
|
610
|
+
----
|
|
611
|
+
The following parameters are **deprecated**:
|
|
612
|
+
|
|
613
|
+
- `transcription_worker_version` in favor of `transcription_worker_run`
|
|
614
|
+
- `worker_version` in favor of `worker_run`
|
|
615
|
+
|
|
587
616
|
:param element: Child element to find parents of.
|
|
588
617
|
:param folder: Restrict to or exclude elements with folder types.
|
|
589
618
|
This parameter is not supported when caching is enabled.
|
|
@@ -591,7 +620,7 @@ class ElementMixin:
|
|
|
591
620
|
This parameter is not supported when caching is enabled.
|
|
592
621
|
:param recursive: Look for elements recursively (grand-children, etc.)
|
|
593
622
|
This parameter is not supported when caching is enabled.
|
|
594
|
-
:param transcription_worker_version: Restrict to elements that have a transcription created by a worker version with this UUID.
|
|
623
|
+
:param transcription_worker_version: **Deprecated** Restrict to elements that have a transcription created by a worker version with this UUID.
|
|
595
624
|
This parameter is not supported when caching is enabled.
|
|
596
625
|
:param transcription_worker_run: Restrict to elements that have a transcription created by a worker run with this UUID.
|
|
597
626
|
This parameter is not supported when caching is enabled.
|
|
@@ -609,7 +638,7 @@ class ElementMixin:
|
|
|
609
638
|
:param with_zone: Include the ``zone`` attribute in the response,
|
|
610
639
|
holding the element's image and polygon.
|
|
611
640
|
This parameter is not supported when caching is enabled.
|
|
612
|
-
:param worker_version: Restrict to elements created by a worker version with this UUID.
|
|
641
|
+
:param worker_version: **Deprecated** Restrict to elements created by a worker version with this UUID.
|
|
613
642
|
:param worker_run: Restrict to elements created by a worker run with this UUID.
|
|
614
643
|
:return: An iterable of dicts from the ``ListElementParents`` API endpoint,
|
|
615
644
|
or an iterable of [CachedElement][arkindex_worker.cache.CachedElement] when caching is enabled.
|
|
@@ -628,6 +657,11 @@ class ElementMixin:
|
|
|
628
657
|
assert isinstance(recursive, bool), "recursive should be of type bool"
|
|
629
658
|
query_params["recursive"] = recursive
|
|
630
659
|
if transcription_worker_version is not None:
|
|
660
|
+
warn(
|
|
661
|
+
"`transcription_worker_version` usage is deprecated. Consider using `transcription_worker_run` instead.",
|
|
662
|
+
DeprecationWarning,
|
|
663
|
+
stacklevel=1,
|
|
664
|
+
)
|
|
631
665
|
assert isinstance(
|
|
632
666
|
transcription_worker_version, str | bool
|
|
633
667
|
), "transcription_worker_version should be of type str or bool"
|
|
@@ -668,6 +702,11 @@ class ElementMixin:
|
|
|
668
702
|
assert isinstance(with_zone, bool), "with_zone should be of type bool"
|
|
669
703
|
query_params["with_zone"] = with_zone
|
|
670
704
|
if worker_version is not None:
|
|
705
|
+
warn(
|
|
706
|
+
"`worker_version` usage is deprecated. Consider using `worker_run` instead.",
|
|
707
|
+
DeprecationWarning,
|
|
708
|
+
stacklevel=1,
|
|
709
|
+
)
|
|
671
710
|
assert isinstance(
|
|
672
711
|
worker_version, str | bool
|
|
673
712
|
), "worker_version should be of type str or bool"
|
arkindex_worker/worker/entity.py
CHANGED
|
@@ -4,11 +4,16 @@ ElementsWorker methods for entities.
|
|
|
4
4
|
|
|
5
5
|
from operator import itemgetter
|
|
6
6
|
from typing import TypedDict
|
|
7
|
+
from warnings import warn
|
|
7
8
|
|
|
8
9
|
from peewee import IntegrityError
|
|
9
10
|
|
|
10
11
|
from arkindex_worker import logger
|
|
11
|
-
from arkindex_worker.cache import
|
|
12
|
+
from arkindex_worker.cache import (
|
|
13
|
+
CachedEntity,
|
|
14
|
+
CachedTranscriptionEntity,
|
|
15
|
+
unsupported_cache,
|
|
16
|
+
)
|
|
12
17
|
from arkindex_worker.models import Element, Transcription
|
|
13
18
|
|
|
14
19
|
|
|
@@ -28,6 +33,7 @@ class MissingEntityType(Exception):
|
|
|
28
33
|
|
|
29
34
|
|
|
30
35
|
class EntityMixin:
|
|
36
|
+
@unsupported_cache
|
|
31
37
|
def check_required_entity_types(
|
|
32
38
|
self, entity_types: list[str], create_missing: bool = True
|
|
33
39
|
):
|
|
@@ -205,6 +211,7 @@ class EntityMixin:
|
|
|
205
211
|
)
|
|
206
212
|
return transcription_ent
|
|
207
213
|
|
|
214
|
+
@unsupported_cache
|
|
208
215
|
def create_transcription_entities(
|
|
209
216
|
self,
|
|
210
217
|
transcription: Transcription,
|
|
@@ -297,13 +304,21 @@ class EntityMixin:
|
|
|
297
304
|
self,
|
|
298
305
|
transcription: Transcription,
|
|
299
306
|
worker_version: str | bool | None = None,
|
|
307
|
+
worker_run: str | bool | None = None,
|
|
300
308
|
):
|
|
301
309
|
"""
|
|
302
310
|
List existing entities on a transcription
|
|
303
311
|
This method does not support cache
|
|
304
312
|
|
|
313
|
+
Warns:
|
|
314
|
+
----
|
|
315
|
+
The following parameters are **deprecated**:
|
|
316
|
+
|
|
317
|
+
- `worker_version` in favor of `worker_run`
|
|
318
|
+
|
|
305
319
|
:param transcription: The transcription to list entities on.
|
|
306
|
-
:param worker_version: Restrict to entities created by a worker version with this UUID. Set to False to look for manually created
|
|
320
|
+
:param worker_version: **Deprecated** Restrict to entities created by a worker version with this UUID. Set to False to look for manually created entities.
|
|
321
|
+
:param worker_run: Restrict to entities created by a worker run with this UUID. Set to False to look for manually created entities.
|
|
307
322
|
"""
|
|
308
323
|
query_params = {}
|
|
309
324
|
assert transcription and isinstance(
|
|
@@ -311,6 +326,11 @@ class EntityMixin:
|
|
|
311
326
|
), "transcription shouldn't be null and should be a Transcription"
|
|
312
327
|
|
|
313
328
|
if worker_version is not None:
|
|
329
|
+
warn(
|
|
330
|
+
"`worker_version` usage is deprecated. Consider using `worker_run` instead.",
|
|
331
|
+
DeprecationWarning,
|
|
332
|
+
stacklevel=1,
|
|
333
|
+
)
|
|
314
334
|
assert isinstance(
|
|
315
335
|
worker_version, str | bool
|
|
316
336
|
), "worker_version should be of type str or bool"
|
|
@@ -320,6 +340,15 @@ class EntityMixin:
|
|
|
320
340
|
worker_version is False
|
|
321
341
|
), "if of type bool, worker_version can only be set to False"
|
|
322
342
|
query_params["worker_version"] = worker_version
|
|
343
|
+
if worker_run is not None:
|
|
344
|
+
assert isinstance(
|
|
345
|
+
worker_run, str | bool
|
|
346
|
+
), "worker_run should be of type str or bool"
|
|
347
|
+
if isinstance(worker_run, bool):
|
|
348
|
+
assert (
|
|
349
|
+
worker_run is False
|
|
350
|
+
), "if of type bool, worker_run can only be set to False"
|
|
351
|
+
query_params["worker_run"] = worker_run
|
|
323
352
|
|
|
324
353
|
return self.api_client.paginate(
|
|
325
354
|
"ListTranscriptionEntities", id=transcription.id, **query_params
|
|
@@ -351,8 +380,9 @@ class EntityMixin:
|
|
|
351
380
|
"ListCorpusEntities", id=self.corpus_id, **query_params
|
|
352
381
|
)
|
|
353
382
|
}
|
|
383
|
+
count = len(self.entities)
|
|
354
384
|
logger.info(
|
|
355
|
-
f
|
|
385
|
+
f'Loaded {count} entit{"ies" if count > 1 else "y"} in corpus ({self.corpus_id})'
|
|
356
386
|
)
|
|
357
387
|
|
|
358
388
|
def list_corpus_entity_types(
|
|
@@ -367,6 +397,7 @@ class EntityMixin:
|
|
|
367
397
|
"ListCorpusEntityTypes", id=self.corpus_id
|
|
368
398
|
)
|
|
369
399
|
}
|
|
400
|
+
count = len(self.entity_types)
|
|
370
401
|
logger.info(
|
|
371
|
-
f
|
|
402
|
+
f'Loaded {count} entity type{"s"[:count>1]} in corpus ({self.corpus_id}).'
|
|
372
403
|
)
|
|
@@ -5,7 +5,7 @@ ElementsWorker methods for metadata.
|
|
|
5
5
|
from enum import Enum
|
|
6
6
|
|
|
7
7
|
from arkindex_worker import logger
|
|
8
|
-
from arkindex_worker.cache import CachedElement
|
|
8
|
+
from arkindex_worker.cache import CachedElement, unsupported_cache
|
|
9
9
|
from arkindex_worker.models import Element
|
|
10
10
|
|
|
11
11
|
|
|
@@ -50,12 +50,13 @@ class MetaType(Enum):
|
|
|
50
50
|
|
|
51
51
|
URL = "url"
|
|
52
52
|
"""
|
|
53
|
-
A metadata with a string value that should be interpreted as
|
|
53
|
+
A metadata with a string value that should be interpreted as a URL.
|
|
54
54
|
Only the ``http`` and ``https`` schemes are allowed.
|
|
55
55
|
"""
|
|
56
56
|
|
|
57
57
|
|
|
58
58
|
class MetaDataMixin:
|
|
59
|
+
@unsupported_cache
|
|
59
60
|
def create_metadata(
|
|
60
61
|
self,
|
|
61
62
|
element: Element | CachedElement,
|
|
@@ -106,17 +107,18 @@ class MetaDataMixin:
|
|
|
106
107
|
|
|
107
108
|
return metadata["id"]
|
|
108
109
|
|
|
109
|
-
|
|
110
|
+
@unsupported_cache
|
|
111
|
+
def create_metadata_bulk(
|
|
110
112
|
self,
|
|
111
113
|
element: Element | CachedElement,
|
|
112
|
-
|
|
114
|
+
metadata_list: list[dict[str, MetaType | str | int | float | None]],
|
|
113
115
|
) -> list[dict[str, str]]:
|
|
114
116
|
"""
|
|
115
117
|
Create multiple metadata on an existing element.
|
|
116
118
|
This method does not support cache.
|
|
117
119
|
|
|
118
120
|
:param element: The element to create multiple metadata on.
|
|
119
|
-
:param
|
|
121
|
+
:param metadata_list: The list of dict whose keys are the following:
|
|
120
122
|
- type: MetaType
|
|
121
123
|
- name: str
|
|
122
124
|
- value: str | int | float
|
|
@@ -126,13 +128,13 @@ class MetaDataMixin:
|
|
|
126
128
|
element, Element | CachedElement
|
|
127
129
|
), "element shouldn't be null and should be of type Element or CachedElement"
|
|
128
130
|
|
|
129
|
-
assert
|
|
130
|
-
|
|
131
|
-
), "
|
|
131
|
+
assert metadata_list and isinstance(
|
|
132
|
+
metadata_list, list
|
|
133
|
+
), "metadata_list shouldn't be null and should be of type list of dict"
|
|
132
134
|
|
|
133
135
|
# Make a copy to avoid modifying the metadata_list argument
|
|
134
136
|
metas = []
|
|
135
|
-
for index, metadata in enumerate(
|
|
137
|
+
for index, metadata in enumerate(metadata_list):
|
|
136
138
|
assert isinstance(
|
|
137
139
|
metadata, dict
|
|
138
140
|
), f"Element at index {index} in metadata_list: Should be of type dict"
|
|
@@ -178,16 +180,24 @@ class MetaDataMixin:
|
|
|
178
180
|
return created_metadata_list
|
|
179
181
|
|
|
180
182
|
def list_element_metadata(
|
|
181
|
-
self, element: Element | CachedElement
|
|
183
|
+
self, element: Element | CachedElement, load_parents: bool | None = None
|
|
182
184
|
) -> list[dict[str, str]]:
|
|
183
185
|
"""
|
|
184
186
|
List all metadata linked to an element.
|
|
185
187
|
This method does not support cache.
|
|
186
188
|
|
|
187
189
|
:param element: The element to list metadata on.
|
|
190
|
+
:param load_parents: Also include all metadata from the element's parents in the response.
|
|
188
191
|
"""
|
|
189
192
|
assert element and isinstance(
|
|
190
193
|
element, Element | CachedElement
|
|
191
194
|
), "element shouldn't be null and should be of type Element or CachedElement"
|
|
192
195
|
|
|
193
|
-
|
|
196
|
+
query_params = {}
|
|
197
|
+
if load_parents is not None:
|
|
198
|
+
assert isinstance(load_parents, bool), "load_parents should be of type bool"
|
|
199
|
+
query_params["load_parents"] = load_parents
|
|
200
|
+
|
|
201
|
+
return self.api_client.paginate(
|
|
202
|
+
"ListElementMetaData", id=element.id, **query_params
|
|
203
|
+
)
|
|
@@ -81,6 +81,10 @@ class TrainingMixin:
|
|
|
81
81
|
|
|
82
82
|
model_version = None
|
|
83
83
|
|
|
84
|
+
@property
|
|
85
|
+
def is_finetuning(self) -> bool:
|
|
86
|
+
return bool(self.model_version_id)
|
|
87
|
+
|
|
84
88
|
@skip_if_read_only
|
|
85
89
|
def publish_model_version(
|
|
86
90
|
self,
|
|
@@ -276,8 +280,17 @@ class TrainingMixin:
|
|
|
276
280
|
},
|
|
277
281
|
)
|
|
278
282
|
except ErrorResponse as e:
|
|
283
|
+
# Temporary fix while waiting for `ValidateModelVersion` refactoring as it can
|
|
284
|
+
# return errors even when the model version is properly validated
|
|
285
|
+
if e.status_code in [403, 500]:
|
|
286
|
+
logger.warning(
|
|
287
|
+
f'An error occurred while validating model version {self.model_version["id"]}, please check its status.'
|
|
288
|
+
)
|
|
289
|
+
return
|
|
290
|
+
|
|
279
291
|
if e.status_code != 409:
|
|
280
292
|
raise e
|
|
293
|
+
|
|
281
294
|
logger.warning(
|
|
282
295
|
f"An available model version exists with hash {hash}, using it instead of the pending version."
|
|
283
296
|
)
|