arkindex-base-worker 0.4.0__py3-none-any.whl → 0.4.0a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex_base_worker-0.4.0.dist-info → arkindex_base_worker-0.4.0a2.dist-info}/METADATA +13 -15
- arkindex_base_worker-0.4.0a2.dist-info/RECORD +51 -0
- {arkindex_base_worker-0.4.0.dist-info → arkindex_base_worker-0.4.0a2.dist-info}/WHEEL +1 -1
- arkindex_worker/cache.py +1 -1
- arkindex_worker/image.py +1 -120
- arkindex_worker/utils.py +0 -82
- arkindex_worker/worker/__init__.py +161 -46
- arkindex_worker/worker/base.py +11 -36
- arkindex_worker/worker/classification.py +18 -34
- arkindex_worker/worker/corpus.py +4 -21
- arkindex_worker/worker/dataset.py +1 -71
- arkindex_worker/worker/element.py +91 -352
- arkindex_worker/worker/entity.py +11 -11
- arkindex_worker/worker/metadata.py +9 -19
- arkindex_worker/worker/task.py +4 -5
- arkindex_worker/worker/training.py +6 -6
- arkindex_worker/worker/transcription.py +68 -89
- arkindex_worker/worker/version.py +1 -3
- tests/__init__.py +1 -1
- tests/conftest.py +45 -33
- tests/test_base_worker.py +3 -204
- tests/test_dataset_worker.py +4 -7
- tests/test_elements_worker/{test_classification.py → test_classifications.py} +61 -194
- tests/test_elements_worker/test_corpus.py +1 -32
- tests/test_elements_worker/test_dataset.py +1 -1
- tests/test_elements_worker/test_elements.py +2734 -0
- tests/test_elements_worker/{test_entity_create.py → test_entities.py} +160 -26
- tests/test_elements_worker/test_image.py +1 -2
- tests/test_elements_worker/test_metadata.py +99 -224
- tests/test_elements_worker/test_task.py +1 -1
- tests/test_elements_worker/test_training.py +2 -2
- tests/test_elements_worker/test_transcriptions.py +2102 -0
- tests/test_elements_worker/test_worker.py +280 -563
- tests/test_image.py +204 -429
- tests/test_merge.py +2 -1
- tests/test_utils.py +3 -66
- arkindex_base_worker-0.4.0.dist-info/RECORD +0 -61
- arkindex_worker/worker/process.py +0 -92
- tests/test_elements_worker/test_element.py +0 -427
- tests/test_elements_worker/test_element_create_multiple.py +0 -715
- tests/test_elements_worker/test_element_create_single.py +0 -528
- tests/test_elements_worker/test_element_list_children.py +0 -969
- tests/test_elements_worker/test_element_list_parents.py +0 -530
- tests/test_elements_worker/test_entity_list_and_check.py +0 -160
- tests/test_elements_worker/test_process.py +0 -89
- tests/test_elements_worker/test_transcription_create.py +0 -873
- tests/test_elements_worker/test_transcription_create_with_elements.py +0 -951
- tests/test_elements_worker/test_transcription_list.py +0 -450
- tests/test_elements_worker/test_version.py +0 -60
- {arkindex_base_worker-0.4.0.dist-info → arkindex_base_worker-0.4.0a2.dist-info}/LICENSE +0 -0
- {arkindex_base_worker-0.4.0.dist-info → arkindex_base_worker-0.4.0a2.dist-info}/top_level.txt +0 -0
|
@@ -2,8 +2,6 @@
|
|
|
2
2
|
BaseWorker methods for datasets.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
import uuid
|
|
6
|
-
from argparse import ArgumentTypeError
|
|
7
5
|
from collections.abc import Iterator
|
|
8
6
|
from enum import Enum
|
|
9
7
|
|
|
@@ -38,55 +36,7 @@ class DatasetState(Enum):
|
|
|
38
36
|
"""
|
|
39
37
|
|
|
40
38
|
|
|
41
|
-
class MissingDatasetArchive(Exception):
|
|
42
|
-
"""
|
|
43
|
-
Exception raised when the compressed archive associated to
|
|
44
|
-
a dataset isn't found in its task artifacts.
|
|
45
|
-
"""
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def check_dataset_set(value: str) -> tuple[uuid.UUID, str]:
|
|
49
|
-
"""The `--set` argument should have the following format:
|
|
50
|
-
<dataset_id>:<set_name>
|
|
51
|
-
|
|
52
|
-
Args:
|
|
53
|
-
value (str): Provided argument.
|
|
54
|
-
|
|
55
|
-
Raises:
|
|
56
|
-
ArgumentTypeError: When the value is invalid.
|
|
57
|
-
|
|
58
|
-
Returns:
|
|
59
|
-
tuple[uuid.UUID, str]: The ID of the dataset parsed as UUID and the name of the set.
|
|
60
|
-
"""
|
|
61
|
-
values = value.split(":")
|
|
62
|
-
if len(values) != 2:
|
|
63
|
-
raise ArgumentTypeError(
|
|
64
|
-
f"'{value}' is not in the correct format `<dataset_id>:<set_name>`"
|
|
65
|
-
)
|
|
66
|
-
|
|
67
|
-
dataset_id, set_name = values
|
|
68
|
-
try:
|
|
69
|
-
dataset_id = uuid.UUID(dataset_id)
|
|
70
|
-
return (dataset_id, set_name)
|
|
71
|
-
except (TypeError, ValueError) as e:
|
|
72
|
-
raise ArgumentTypeError(f"'{dataset_id}' should be a valid UUID") from e
|
|
73
|
-
|
|
74
|
-
|
|
75
39
|
class DatasetMixin:
|
|
76
|
-
def add_arguments(self) -> None:
|
|
77
|
-
"""Define specific ``argparse`` arguments for the worker using this mixin"""
|
|
78
|
-
self.parser.add_argument(
|
|
79
|
-
"--set",
|
|
80
|
-
type=check_dataset_set,
|
|
81
|
-
nargs="+",
|
|
82
|
-
help="""
|
|
83
|
-
One or more Arkindex dataset sets, format is <dataset_uuid>:<set_name>
|
|
84
|
-
(e.g.: "12341234-1234-1234-1234-123412341234:train")
|
|
85
|
-
""",
|
|
86
|
-
default=[],
|
|
87
|
-
)
|
|
88
|
-
super().add_arguments()
|
|
89
|
-
|
|
90
40
|
def list_process_sets(self) -> Iterator[Set]:
|
|
91
41
|
"""
|
|
92
42
|
List dataset sets associated to the worker's process. This helper is not available in developer mode.
|
|
@@ -123,26 +73,6 @@ class DatasetMixin:
|
|
|
123
73
|
|
|
124
74
|
return map(lambda result: Element(**result["element"]), results)
|
|
125
75
|
|
|
126
|
-
def list_sets(self) -> Iterator[Set]:
|
|
127
|
-
"""
|
|
128
|
-
List the sets to be processed, either from the CLI arguments or using the
|
|
129
|
-
[list_process_sets][arkindex_worker.worker.dataset.DatasetMixin.list_process_sets] method.
|
|
130
|
-
|
|
131
|
-
:returns: An iterator of ``Set`` objects.
|
|
132
|
-
"""
|
|
133
|
-
if not self.is_read_only:
|
|
134
|
-
yield from self.list_process_sets()
|
|
135
|
-
|
|
136
|
-
datasets: dict[uuid.UUID, Dataset] = {}
|
|
137
|
-
for dataset_id, set_name in self.args.set:
|
|
138
|
-
# Retrieving dataset information if not already cached
|
|
139
|
-
if dataset_id not in datasets:
|
|
140
|
-
datasets[dataset_id] = Dataset(
|
|
141
|
-
**self.api_client.request("RetrieveDataset", id=dataset_id)
|
|
142
|
-
)
|
|
143
|
-
|
|
144
|
-
yield Set(name=set_name, dataset=datasets[dataset_id])
|
|
145
|
-
|
|
146
76
|
@unsupported_cache
|
|
147
77
|
def update_dataset_state(self, dataset: Dataset, state: DatasetState) -> Dataset:
|
|
148
78
|
"""
|
|
@@ -163,7 +93,7 @@ class DatasetMixin:
|
|
|
163
93
|
logger.warning("Cannot update dataset as this worker is in read-only mode")
|
|
164
94
|
return
|
|
165
95
|
|
|
166
|
-
updated_dataset = self.
|
|
96
|
+
updated_dataset = self.request(
|
|
167
97
|
"PartialUpdateDataset",
|
|
168
98
|
id=dataset.id,
|
|
169
99
|
body={"state": state.value},
|
|
@@ -2,9 +2,7 @@
|
|
|
2
2
|
ElementsWorker methods for elements and element types.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
import os
|
|
6
5
|
from collections.abc import Iterable
|
|
7
|
-
from operator import attrgetter
|
|
8
6
|
from typing import NamedTuple
|
|
9
7
|
from uuid import UUID
|
|
10
8
|
from warnings import warn
|
|
@@ -14,12 +12,6 @@ from peewee import IntegrityError
|
|
|
14
12
|
from arkindex_worker import logger
|
|
15
13
|
from arkindex_worker.cache import CachedElement, CachedImage, unsupported_cache
|
|
16
14
|
from arkindex_worker.models import Element
|
|
17
|
-
from arkindex_worker.utils import (
|
|
18
|
-
DEFAULT_BATCH_SIZE,
|
|
19
|
-
batch_publication,
|
|
20
|
-
make_batches,
|
|
21
|
-
pluralize,
|
|
22
|
-
)
|
|
23
15
|
|
|
24
16
|
|
|
25
17
|
class ElementType(NamedTuple):
|
|
@@ -39,37 +31,6 @@ class MissingTypeError(Exception):
|
|
|
39
31
|
|
|
40
32
|
|
|
41
33
|
class ElementMixin:
|
|
42
|
-
def add_arguments(self):
|
|
43
|
-
"""Define specific ``argparse`` arguments for the worker using this mixin"""
|
|
44
|
-
self.parser.add_argument(
|
|
45
|
-
"--elements-list",
|
|
46
|
-
help="JSON elements list to use",
|
|
47
|
-
type=open,
|
|
48
|
-
default=os.environ.get("TASK_ELEMENTS"),
|
|
49
|
-
)
|
|
50
|
-
self.parser.add_argument(
|
|
51
|
-
"--element",
|
|
52
|
-
type=str,
|
|
53
|
-
nargs="+",
|
|
54
|
-
help="One or more Arkindex element ID",
|
|
55
|
-
)
|
|
56
|
-
super().add_arguments()
|
|
57
|
-
|
|
58
|
-
def list_corpus_types(self):
|
|
59
|
-
"""
|
|
60
|
-
Loads available element types in corpus.
|
|
61
|
-
"""
|
|
62
|
-
self.corpus_types = {
|
|
63
|
-
element_type["slug"]: element_type
|
|
64
|
-
for element_type in self.api_client.request(
|
|
65
|
-
"RetrieveCorpus", id=self.corpus_id
|
|
66
|
-
)["types"]
|
|
67
|
-
}
|
|
68
|
-
count = len(self.corpus_types)
|
|
69
|
-
logger.info(
|
|
70
|
-
f'Loaded {count} element {pluralize("type", count)} in corpus ({self.corpus_id}).'
|
|
71
|
-
)
|
|
72
|
-
|
|
73
34
|
@unsupported_cache
|
|
74
35
|
def create_required_types(self, element_types: list[ElementType]):
|
|
75
36
|
"""Creates given element types in the corpus.
|
|
@@ -77,7 +38,7 @@ class ElementMixin:
|
|
|
77
38
|
:param element_types: The missing element types to create.
|
|
78
39
|
"""
|
|
79
40
|
for element_type in element_types:
|
|
80
|
-
self.
|
|
41
|
+
self.request(
|
|
81
42
|
"CreateElementType",
|
|
82
43
|
body={
|
|
83
44
|
"slug": element_type.slug,
|
|
@@ -105,10 +66,10 @@ class ElementMixin:
|
|
|
105
66
|
isinstance(slug, str) for slug in type_slugs
|
|
106
67
|
), "Element type slugs must be strings."
|
|
107
68
|
|
|
108
|
-
|
|
109
|
-
|
|
69
|
+
corpus = self.request("RetrieveCorpus", id=self.corpus_id)
|
|
70
|
+
available_slugs = {element_type["slug"] for element_type in corpus["types"]}
|
|
71
|
+
missing_slugs = set(type_slugs) - available_slugs
|
|
110
72
|
|
|
111
|
-
missing_slugs = set(type_slugs) - set(self.corpus_types)
|
|
112
73
|
if missing_slugs:
|
|
113
74
|
if create_missing:
|
|
114
75
|
self.create_required_types(
|
|
@@ -118,7 +79,7 @@ class ElementMixin:
|
|
|
118
79
|
)
|
|
119
80
|
else:
|
|
120
81
|
raise MissingTypeError(
|
|
121
|
-
f'Element
|
|
82
|
+
f'Element type(s) {", ".join(sorted(missing_slugs))} were not found in the {corpus["name"]} corpus ({corpus["id"]}).'
|
|
122
83
|
)
|
|
123
84
|
|
|
124
85
|
return True
|
|
@@ -184,7 +145,7 @@ class ElementMixin:
|
|
|
184
145
|
logger.warning("Cannot create element as this worker is in read-only mode")
|
|
185
146
|
return
|
|
186
147
|
|
|
187
|
-
sub_element = self.
|
|
148
|
+
sub_element = self.request(
|
|
188
149
|
"CreateElement",
|
|
189
150
|
body={
|
|
190
151
|
"type": type,
|
|
@@ -200,12 +161,10 @@ class ElementMixin:
|
|
|
200
161
|
|
|
201
162
|
return sub_element["id"] if slim_output else sub_element
|
|
202
163
|
|
|
203
|
-
@batch_publication
|
|
204
164
|
def create_elements(
|
|
205
165
|
self,
|
|
206
166
|
parent: Element | CachedElement,
|
|
207
167
|
elements: list[dict[str, str | list[list[int | float]] | float | None]],
|
|
208
|
-
batch_size: int = DEFAULT_BATCH_SIZE,
|
|
209
168
|
) -> list[dict[str, str]]:
|
|
210
169
|
"""
|
|
211
170
|
Create child elements on the given element in a single API request.
|
|
@@ -226,8 +185,6 @@ class ElementMixin:
|
|
|
226
185
|
confidence (float or None)
|
|
227
186
|
Optional confidence score, between 0.0 and 1.0.
|
|
228
187
|
|
|
229
|
-
:param batch_size: The size of each batch, which will be used to split the publication to avoid API errors.
|
|
230
|
-
|
|
231
188
|
:return: List of dicts, with each dict having a single key, ``id``, holding the UUID of each created element.
|
|
232
189
|
"""
|
|
233
190
|
if isinstance(parent, Element):
|
|
@@ -286,18 +243,14 @@ class ElementMixin:
|
|
|
286
243
|
logger.warning("Cannot create elements as this worker is in read-only mode")
|
|
287
244
|
return
|
|
288
245
|
|
|
289
|
-
created_ids =
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
"
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
"elements": batch,
|
|
298
|
-
},
|
|
299
|
-
)
|
|
300
|
-
]
|
|
246
|
+
created_ids = self.request(
|
|
247
|
+
"CreateElements",
|
|
248
|
+
id=parent.id,
|
|
249
|
+
body={
|
|
250
|
+
"worker_run_id": self.worker_run_id,
|
|
251
|
+
"elements": elements,
|
|
252
|
+
},
|
|
253
|
+
)
|
|
301
254
|
|
|
302
255
|
if self.use_cache:
|
|
303
256
|
# Create the image as needed and handle both an Element and a CachedElement
|
|
@@ -358,58 +311,12 @@ class ElementMixin:
|
|
|
358
311
|
logger.warning("Cannot link elements as this worker is in read-only mode")
|
|
359
312
|
return
|
|
360
313
|
|
|
361
|
-
return self.
|
|
314
|
+
return self.request(
|
|
362
315
|
"CreateElementParent",
|
|
363
316
|
parent=parent.id,
|
|
364
317
|
child=child.id,
|
|
365
318
|
)
|
|
366
319
|
|
|
367
|
-
@unsupported_cache
|
|
368
|
-
@batch_publication
|
|
369
|
-
def create_element_children(
|
|
370
|
-
self,
|
|
371
|
-
parent: Element,
|
|
372
|
-
children: list[Element],
|
|
373
|
-
batch_size: int = DEFAULT_BATCH_SIZE,
|
|
374
|
-
) -> list[str]:
|
|
375
|
-
"""
|
|
376
|
-
Link multiple elements to a single parent through the API.
|
|
377
|
-
|
|
378
|
-
:param parent: Parent element.
|
|
379
|
-
:param children: A list of child elements.
|
|
380
|
-
:param batch_size: The size of each batch, which will be used to split the publication to avoid API errors.
|
|
381
|
-
|
|
382
|
-
:returns: A list containing the string UUID of each child linked to the parent.
|
|
383
|
-
"""
|
|
384
|
-
assert parent and isinstance(
|
|
385
|
-
parent, Element
|
|
386
|
-
), "parent shouldn't be null and should be of type Element"
|
|
387
|
-
|
|
388
|
-
assert children and isinstance(
|
|
389
|
-
children, list
|
|
390
|
-
), "children shouldn't be null and should be of type list"
|
|
391
|
-
|
|
392
|
-
for index, child in enumerate(children):
|
|
393
|
-
assert isinstance(
|
|
394
|
-
child, Element
|
|
395
|
-
), f"Child at index {index} in children: Should be of type Element"
|
|
396
|
-
|
|
397
|
-
if self.is_read_only:
|
|
398
|
-
logger.warning("Cannot link elements as this worker is in read-only mode")
|
|
399
|
-
return
|
|
400
|
-
|
|
401
|
-
return [
|
|
402
|
-
child_id
|
|
403
|
-
for batch in make_batches(children, "child", batch_size)
|
|
404
|
-
for child_id in self.api_client.request(
|
|
405
|
-
"CreateElementChildren",
|
|
406
|
-
id=parent.id,
|
|
407
|
-
body={
|
|
408
|
-
"children": list(map(attrgetter("id"), batch)),
|
|
409
|
-
},
|
|
410
|
-
)["children"]
|
|
411
|
-
]
|
|
412
|
-
|
|
413
320
|
def partial_update_element(
|
|
414
321
|
self, element: Element | CachedElement, **kwargs
|
|
415
322
|
) -> dict:
|
|
@@ -476,7 +383,7 @@ class ElementMixin:
|
|
|
476
383
|
logger.warning("Cannot update element as this worker is in read-only mode")
|
|
477
384
|
return
|
|
478
385
|
|
|
479
|
-
updated_element = self.
|
|
386
|
+
updated_element = self.request(
|
|
480
387
|
"PartialUpdateElement",
|
|
481
388
|
id=element.id,
|
|
482
389
|
body=kwargs,
|
|
@@ -500,178 +407,6 @@ class ElementMixin:
|
|
|
500
407
|
|
|
501
408
|
return updated_element
|
|
502
409
|
|
|
503
|
-
def list_elements(
|
|
504
|
-
self,
|
|
505
|
-
folder: bool | None = None,
|
|
506
|
-
name: str | None = None,
|
|
507
|
-
top_level: bool | None = None,
|
|
508
|
-
transcription_worker_version: str | bool | None = None,
|
|
509
|
-
transcription_worker_run: str | bool | None = None,
|
|
510
|
-
type: str | None = None,
|
|
511
|
-
with_classes: bool | None = None,
|
|
512
|
-
with_corpus: bool | None = None,
|
|
513
|
-
with_metadata: bool | None = None,
|
|
514
|
-
with_has_children: bool | None = None,
|
|
515
|
-
with_zone: bool | None = None,
|
|
516
|
-
worker_version: str | bool | None = None,
|
|
517
|
-
worker_run: str | bool | None = None,
|
|
518
|
-
) -> Iterable[dict] | Iterable[CachedElement]:
|
|
519
|
-
"""
|
|
520
|
-
List element in a corpus.
|
|
521
|
-
|
|
522
|
-
Warns:
|
|
523
|
-
----
|
|
524
|
-
The following parameters are **deprecated**:
|
|
525
|
-
|
|
526
|
-
- `transcription_worker_version` in favor of `transcription_worker_run`
|
|
527
|
-
- `worker_version` in favor of `worker_run`
|
|
528
|
-
|
|
529
|
-
:param folder: Restrict to or exclude elements with folder types.
|
|
530
|
-
This parameter is not supported when caching is enabled.
|
|
531
|
-
:param name: Restrict to elements whose name contain a substring (case-insensitive).
|
|
532
|
-
This parameter is not supported when caching is enabled.
|
|
533
|
-
:param top_level: Restrict to or exclude folder elements without parent elements (top-level elements).
|
|
534
|
-
This parameter is not supported when caching is enabled.
|
|
535
|
-
:param transcription_worker_version: **Deprecated** Restrict to elements that have a transcription created by a worker version with this UUID. Set to False to look for elements that have a manual transcription.
|
|
536
|
-
This parameter is not supported when caching is enabled.
|
|
537
|
-
:param transcription_worker_run: Restrict to elements that have a transcription created by a worker run with this UUID. Set to False to look for elements that have a manual transcription.
|
|
538
|
-
This parameter is not supported when caching is enabled.
|
|
539
|
-
:param type: Restrict to elements with a specific type slug
|
|
540
|
-
This parameter is not supported when caching is enabled.
|
|
541
|
-
:param with_classes: Include each element's classifications in the response.
|
|
542
|
-
This parameter is not supported when caching is enabled.
|
|
543
|
-
:param with_corpus: Include each element's corpus in the response.
|
|
544
|
-
This parameter is not supported when caching is enabled.
|
|
545
|
-
:param with_has_children: Include the ``has_children`` attribute in the response,
|
|
546
|
-
indicating if this element has child elements of its own.
|
|
547
|
-
This parameter is not supported when caching is enabled.
|
|
548
|
-
:param with_metadata: Include each element's metadata in the response.
|
|
549
|
-
This parameter is not supported when caching is enabled.
|
|
550
|
-
:param with_zone: Include the ``zone`` attribute in the response,
|
|
551
|
-
holding the element's image and polygon.
|
|
552
|
-
This parameter is not supported when caching is enabled.
|
|
553
|
-
:param worker_version: **Deprecated** Restrict to elements created by a worker version with this UUID.
|
|
554
|
-
:param worker_run: Restrict to elements created by a worker run with this UUID.
|
|
555
|
-
:return: An iterable of dicts from the ``ListElementChildren`` API endpoint,
|
|
556
|
-
or an iterable of [CachedElement][arkindex_worker.cache.CachedElement] when caching is enabled.
|
|
557
|
-
"""
|
|
558
|
-
query_params = {}
|
|
559
|
-
if folder is not None:
|
|
560
|
-
assert isinstance(folder, bool), "folder should be of type bool"
|
|
561
|
-
query_params["folder"] = folder
|
|
562
|
-
if name:
|
|
563
|
-
assert isinstance(name, str), "name should be of type str"
|
|
564
|
-
query_params["name"] = name
|
|
565
|
-
if top_level is not None:
|
|
566
|
-
assert isinstance(top_level, bool), "top_level should be of type bool"
|
|
567
|
-
query_params["top_level"] = top_level
|
|
568
|
-
if transcription_worker_version is not None:
|
|
569
|
-
warn(
|
|
570
|
-
"`transcription_worker_version` usage is deprecated. Consider using `transcription_worker_run` instead.",
|
|
571
|
-
DeprecationWarning,
|
|
572
|
-
stacklevel=1,
|
|
573
|
-
)
|
|
574
|
-
assert isinstance(
|
|
575
|
-
transcription_worker_version, str | bool
|
|
576
|
-
), "transcription_worker_version should be of type str or bool"
|
|
577
|
-
if isinstance(transcription_worker_version, bool):
|
|
578
|
-
assert (
|
|
579
|
-
transcription_worker_version is False
|
|
580
|
-
), "if of type bool, transcription_worker_version can only be set to False"
|
|
581
|
-
query_params["transcription_worker_version"] = transcription_worker_version
|
|
582
|
-
if transcription_worker_run is not None:
|
|
583
|
-
assert isinstance(
|
|
584
|
-
transcription_worker_run, str | bool
|
|
585
|
-
), "transcription_worker_run should be of type str or bool"
|
|
586
|
-
if isinstance(transcription_worker_run, bool):
|
|
587
|
-
assert (
|
|
588
|
-
transcription_worker_run is False
|
|
589
|
-
), "if of type bool, transcription_worker_run can only be set to False"
|
|
590
|
-
query_params["transcription_worker_run"] = transcription_worker_run
|
|
591
|
-
if type:
|
|
592
|
-
assert isinstance(type, str), "type should be of type str"
|
|
593
|
-
query_params["type"] = type
|
|
594
|
-
if with_classes is not None:
|
|
595
|
-
assert isinstance(with_classes, bool), "with_classes should be of type bool"
|
|
596
|
-
query_params["with_classes"] = with_classes
|
|
597
|
-
if with_corpus is not None:
|
|
598
|
-
assert isinstance(with_corpus, bool), "with_corpus should be of type bool"
|
|
599
|
-
query_params["with_corpus"] = with_corpus
|
|
600
|
-
if with_has_children is not None:
|
|
601
|
-
assert isinstance(
|
|
602
|
-
with_has_children, bool
|
|
603
|
-
), "with_has_children should be of type bool"
|
|
604
|
-
query_params["with_has_children"] = with_has_children
|
|
605
|
-
if with_metadata is not None:
|
|
606
|
-
assert isinstance(
|
|
607
|
-
with_metadata, bool
|
|
608
|
-
), "with_metadata should be of type bool"
|
|
609
|
-
query_params["with_metadata"] = with_metadata
|
|
610
|
-
if with_zone is not None:
|
|
611
|
-
assert isinstance(with_zone, bool), "with_zone should be of type bool"
|
|
612
|
-
query_params["with_zone"] = with_zone
|
|
613
|
-
if worker_version is not None:
|
|
614
|
-
warn(
|
|
615
|
-
"`worker_version` usage is deprecated. Consider using `worker_run` instead.",
|
|
616
|
-
DeprecationWarning,
|
|
617
|
-
stacklevel=1,
|
|
618
|
-
)
|
|
619
|
-
assert isinstance(
|
|
620
|
-
worker_version, str | bool
|
|
621
|
-
), "worker_version should be of type str or bool"
|
|
622
|
-
if isinstance(worker_version, bool):
|
|
623
|
-
assert (
|
|
624
|
-
worker_version is False
|
|
625
|
-
), "if of type bool, worker_version can only be set to False"
|
|
626
|
-
query_params["worker_version"] = worker_version
|
|
627
|
-
if worker_run is not None:
|
|
628
|
-
assert isinstance(
|
|
629
|
-
worker_run, str | bool
|
|
630
|
-
), "worker_run should be of type str or bool"
|
|
631
|
-
if isinstance(worker_run, bool):
|
|
632
|
-
assert (
|
|
633
|
-
worker_run is False
|
|
634
|
-
), "if of type bool, worker_run can only be set to False"
|
|
635
|
-
query_params["worker_run"] = worker_run
|
|
636
|
-
|
|
637
|
-
if not self.use_cache:
|
|
638
|
-
return self.api_client.paginate(
|
|
639
|
-
"ListElements", corpus=self.corpus_id, **query_params
|
|
640
|
-
)
|
|
641
|
-
|
|
642
|
-
# Checking that we only received query_params handled by the cache
|
|
643
|
-
assert (
|
|
644
|
-
set(query_params.keys())
|
|
645
|
-
<= {
|
|
646
|
-
"type",
|
|
647
|
-
"worker_version",
|
|
648
|
-
"worker_run",
|
|
649
|
-
}
|
|
650
|
-
), "When using the local cache, you can only filter by 'type' and/or 'worker_version' and/or 'worker_run'"
|
|
651
|
-
|
|
652
|
-
query = CachedElement.select()
|
|
653
|
-
if type:
|
|
654
|
-
query = query.where(CachedElement.type == type)
|
|
655
|
-
if worker_version is not None:
|
|
656
|
-
# If worker_version=False, filter by manual worker_version e.g. None
|
|
657
|
-
worker_version_id = worker_version or None
|
|
658
|
-
if worker_version_id:
|
|
659
|
-
query = query.where(
|
|
660
|
-
CachedElement.worker_version_id == worker_version_id
|
|
661
|
-
)
|
|
662
|
-
else:
|
|
663
|
-
query = query.where(CachedElement.worker_version_id.is_null())
|
|
664
|
-
|
|
665
|
-
if worker_run is not None:
|
|
666
|
-
# If worker_run=False, filter by manual worker_run e.g. None
|
|
667
|
-
worker_run_id = worker_run or None
|
|
668
|
-
if worker_run_id:
|
|
669
|
-
query = query.where(CachedElement.worker_run_id == worker_run_id)
|
|
670
|
-
else:
|
|
671
|
-
query = query.where(CachedElement.worker_run_id.is_null())
|
|
672
|
-
|
|
673
|
-
return query
|
|
674
|
-
|
|
675
410
|
def list_element_children(
|
|
676
411
|
self,
|
|
677
412
|
element: Element | CachedElement,
|
|
@@ -811,43 +546,45 @@ class ElementMixin:
|
|
|
811
546
|
), "if of type bool, worker_run can only be set to False"
|
|
812
547
|
query_params["worker_run"] = worker_run
|
|
813
548
|
|
|
814
|
-
if
|
|
815
|
-
|
|
549
|
+
if self.use_cache:
|
|
550
|
+
# Checking that we only received query_params handled by the cache
|
|
551
|
+
assert (
|
|
552
|
+
set(query_params.keys())
|
|
553
|
+
<= {
|
|
554
|
+
"type",
|
|
555
|
+
"worker_version",
|
|
556
|
+
"worker_run",
|
|
557
|
+
}
|
|
558
|
+
), "When using the local cache, you can only filter by 'type' and/or 'worker_version' and/or 'worker_run'"
|
|
559
|
+
|
|
560
|
+
query = CachedElement.select().where(CachedElement.parent_id == element.id)
|
|
561
|
+
if type:
|
|
562
|
+
query = query.where(CachedElement.type == type)
|
|
563
|
+
if worker_version is not None:
|
|
564
|
+
# If worker_version=False, filter by manual worker_version e.g. None
|
|
565
|
+
worker_version_id = worker_version or None
|
|
566
|
+
if worker_version_id:
|
|
567
|
+
query = query.where(
|
|
568
|
+
CachedElement.worker_version_id == worker_version_id
|
|
569
|
+
)
|
|
570
|
+
else:
|
|
571
|
+
query = query.where(CachedElement.worker_version_id.is_null())
|
|
572
|
+
|
|
573
|
+
if worker_run is not None:
|
|
574
|
+
# If worker_run=False, filter by manual worker_run e.g. None
|
|
575
|
+
worker_run_id = worker_run or None
|
|
576
|
+
if worker_run_id:
|
|
577
|
+
query = query.where(CachedElement.worker_run_id == worker_run_id)
|
|
578
|
+
else:
|
|
579
|
+
query = query.where(CachedElement.worker_run_id.is_null())
|
|
580
|
+
|
|
581
|
+
return query
|
|
582
|
+
else:
|
|
583
|
+
children = self.api_client.paginate(
|
|
816
584
|
"ListElementChildren", id=element.id, **query_params
|
|
817
585
|
)
|
|
818
586
|
|
|
819
|
-
|
|
820
|
-
assert (
|
|
821
|
-
set(query_params.keys())
|
|
822
|
-
<= {
|
|
823
|
-
"type",
|
|
824
|
-
"worker_version",
|
|
825
|
-
"worker_run",
|
|
826
|
-
}
|
|
827
|
-
), "When using the local cache, you can only filter by 'type' and/or 'worker_version' and/or 'worker_run'"
|
|
828
|
-
|
|
829
|
-
query = CachedElement.select().where(CachedElement.parent_id == element.id)
|
|
830
|
-
if type:
|
|
831
|
-
query = query.where(CachedElement.type == type)
|
|
832
|
-
if worker_version is not None:
|
|
833
|
-
# If worker_version=False, filter by manual worker_version e.g. None
|
|
834
|
-
worker_version_id = worker_version or None
|
|
835
|
-
if worker_version_id:
|
|
836
|
-
query = query.where(
|
|
837
|
-
CachedElement.worker_version_id == worker_version_id
|
|
838
|
-
)
|
|
839
|
-
else:
|
|
840
|
-
query = query.where(CachedElement.worker_version_id.is_null())
|
|
841
|
-
|
|
842
|
-
if worker_run is not None:
|
|
843
|
-
# If worker_run=False, filter by manual worker_run e.g. None
|
|
844
|
-
worker_run_id = worker_run or None
|
|
845
|
-
if worker_run_id:
|
|
846
|
-
query = query.where(CachedElement.worker_run_id == worker_run_id)
|
|
847
|
-
else:
|
|
848
|
-
query = query.where(CachedElement.worker_run_id.is_null())
|
|
849
|
-
|
|
850
|
-
return query
|
|
587
|
+
return children
|
|
851
588
|
|
|
852
589
|
def list_element_parents(
|
|
853
590
|
self,
|
|
@@ -988,43 +725,45 @@ class ElementMixin:
|
|
|
988
725
|
), "if of type bool, worker_run can only be set to False"
|
|
989
726
|
query_params["worker_run"] = worker_run
|
|
990
727
|
|
|
991
|
-
if
|
|
992
|
-
|
|
728
|
+
if self.use_cache:
|
|
729
|
+
# Checking that we only received query_params handled by the cache
|
|
730
|
+
assert (
|
|
731
|
+
set(query_params.keys())
|
|
732
|
+
<= {
|
|
733
|
+
"type",
|
|
734
|
+
"worker_version",
|
|
735
|
+
"worker_run",
|
|
736
|
+
}
|
|
737
|
+
), "When using the local cache, you can only filter by 'type' and/or 'worker_version' and/or 'worker_run'"
|
|
738
|
+
|
|
739
|
+
parent_ids = CachedElement.select(CachedElement.parent_id).where(
|
|
740
|
+
CachedElement.id == element.id
|
|
741
|
+
)
|
|
742
|
+
query = CachedElement.select().where(CachedElement.id.in_(parent_ids))
|
|
743
|
+
if type:
|
|
744
|
+
query = query.where(CachedElement.type == type)
|
|
745
|
+
if worker_version is not None:
|
|
746
|
+
# If worker_version=False, filter by manual worker_version e.g. None
|
|
747
|
+
worker_version_id = worker_version or None
|
|
748
|
+
if worker_version_id:
|
|
749
|
+
query = query.where(
|
|
750
|
+
CachedElement.worker_version_id == worker_version_id
|
|
751
|
+
)
|
|
752
|
+
else:
|
|
753
|
+
query = query.where(CachedElement.worker_version_id.is_null())
|
|
754
|
+
|
|
755
|
+
if worker_run is not None:
|
|
756
|
+
# If worker_run=False, filter by manual worker_run e.g. None
|
|
757
|
+
worker_run_id = worker_run or None
|
|
758
|
+
if worker_run_id:
|
|
759
|
+
query = query.where(CachedElement.worker_run_id == worker_run_id)
|
|
760
|
+
else:
|
|
761
|
+
query = query.where(CachedElement.worker_run_id.is_null())
|
|
762
|
+
|
|
763
|
+
return query
|
|
764
|
+
else:
|
|
765
|
+
parents = self.api_client.paginate(
|
|
993
766
|
"ListElementParents", id=element.id, **query_params
|
|
994
767
|
)
|
|
995
768
|
|
|
996
|
-
|
|
997
|
-
assert (
|
|
998
|
-
set(query_params.keys())
|
|
999
|
-
<= {
|
|
1000
|
-
"type",
|
|
1001
|
-
"worker_version",
|
|
1002
|
-
"worker_run",
|
|
1003
|
-
}
|
|
1004
|
-
), "When using the local cache, you can only filter by 'type' and/or 'worker_version' and/or 'worker_run'"
|
|
1005
|
-
|
|
1006
|
-
parent_ids = CachedElement.select(CachedElement.parent_id).where(
|
|
1007
|
-
CachedElement.id == element.id
|
|
1008
|
-
)
|
|
1009
|
-
query = CachedElement.select().where(CachedElement.id.in_(parent_ids))
|
|
1010
|
-
if type:
|
|
1011
|
-
query = query.where(CachedElement.type == type)
|
|
1012
|
-
if worker_version is not None:
|
|
1013
|
-
# If worker_version=False, filter by manual worker_version e.g. None
|
|
1014
|
-
worker_version_id = worker_version or None
|
|
1015
|
-
if worker_version_id:
|
|
1016
|
-
query = query.where(
|
|
1017
|
-
CachedElement.worker_version_id == worker_version_id
|
|
1018
|
-
)
|
|
1019
|
-
else:
|
|
1020
|
-
query = query.where(CachedElement.worker_version_id.is_null())
|
|
1021
|
-
|
|
1022
|
-
if worker_run is not None:
|
|
1023
|
-
# If worker_run=False, filter by manual worker_run e.g. None
|
|
1024
|
-
worker_run_id = worker_run or None
|
|
1025
|
-
if worker_run_id:
|
|
1026
|
-
query = query.where(CachedElement.worker_run_id == worker_run_id)
|
|
1027
|
-
else:
|
|
1028
|
-
query = query.where(CachedElement.worker_run_id.is_null())
|
|
1029
|
-
|
|
1030
|
-
return query
|
|
769
|
+
return parents
|