arkindex-base-worker 0.3.6rc5__py3-none-any.whl → 0.3.7.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {arkindex_base_worker-0.3.6rc5.dist-info → arkindex_base_worker-0.3.7.post1.dist-info}/METADATA +14 -16
  2. arkindex_base_worker-0.3.7.post1.dist-info/RECORD +47 -0
  3. {arkindex_base_worker-0.3.6rc5.dist-info → arkindex_base_worker-0.3.7.post1.dist-info}/WHEEL +1 -1
  4. {arkindex_base_worker-0.3.6rc5.dist-info → arkindex_base_worker-0.3.7.post1.dist-info}/top_level.txt +2 -0
  5. arkindex_worker/cache.py +14 -0
  6. arkindex_worker/image.py +29 -19
  7. arkindex_worker/models.py +14 -2
  8. arkindex_worker/utils.py +17 -3
  9. arkindex_worker/worker/__init__.py +122 -125
  10. arkindex_worker/worker/base.py +25 -45
  11. arkindex_worker/worker/classification.py +18 -25
  12. arkindex_worker/worker/dataset.py +24 -18
  13. arkindex_worker/worker/element.py +45 -6
  14. arkindex_worker/worker/entity.py +35 -4
  15. arkindex_worker/worker/metadata.py +21 -11
  16. arkindex_worker/worker/training.py +16 -0
  17. arkindex_worker/worker/transcription.py +45 -5
  18. arkindex_worker/worker/version.py +22 -0
  19. hooks/pre_gen_project.py +3 -0
  20. tests/conftest.py +15 -7
  21. tests/test_base_worker.py +0 -6
  22. tests/test_dataset_worker.py +292 -410
  23. tests/test_elements_worker/test_classifications.py +365 -539
  24. tests/test_elements_worker/test_cli.py +1 -1
  25. tests/test_elements_worker/test_dataset.py +97 -116
  26. tests/test_elements_worker/test_elements.py +227 -61
  27. tests/test_elements_worker/test_entities.py +22 -2
  28. tests/test_elements_worker/test_metadata.py +53 -27
  29. tests/test_elements_worker/test_training.py +35 -0
  30. tests/test_elements_worker/test_transcriptions.py +149 -16
  31. tests/test_elements_worker/test_worker.py +19 -6
  32. tests/test_image.py +37 -0
  33. tests/test_utils.py +23 -1
  34. worker-demo/tests/__init__.py +0 -0
  35. worker-demo/tests/conftest.py +32 -0
  36. worker-demo/tests/test_worker.py +12 -0
  37. worker-demo/worker_demo/__init__.py +6 -0
  38. worker-demo/worker_demo/worker.py +19 -0
  39. arkindex_base_worker-0.3.6rc5.dist-info/RECORD +0 -41
  40. {arkindex_base_worker-0.3.6rc5.dist-info → arkindex_base_worker-0.3.7.post1.dist-info}/LICENSE +0 -0
@@ -1,6 +1,7 @@
1
1
  """
2
2
  The base class for all Arkindex workers.
3
3
  """
4
+
4
5
  import argparse
5
6
  import json
6
7
  import logging
@@ -12,15 +13,8 @@ from tempfile import mkdtemp
12
13
  import gnupg
13
14
  import yaml
14
15
  from apistar.exceptions import ErrorResponse
15
- from tenacity import (
16
- before_sleep_log,
17
- retry,
18
- retry_if_exception,
19
- stop_after_attempt,
20
- wait_exponential,
21
- )
22
16
 
23
- from arkindex import ArkindexClient, options_from_env
17
+ from arkindex import options_from_env
24
18
  from arkindex_worker import logger
25
19
  from arkindex_worker.cache import (
26
20
  check_version,
@@ -30,18 +24,7 @@ from arkindex_worker.cache import (
30
24
  merge_parents_cache,
31
25
  )
32
26
  from arkindex_worker.utils import close_delete_file, extract_tar_zst_archive
33
-
34
-
35
- def _is_500_error(exc: Exception) -> bool:
36
- """
37
- Check if an Arkindex API error has a HTTP 5xx error code.
38
- Used to retry most API calls in [BaseWorker][arkindex_worker.worker.base.BaseWorker].
39
- :param exc: Exception to check
40
- """
41
- if not isinstance(exc, ErrorResponse):
42
- return False
43
-
44
- return 500 <= exc.status_code < 600
27
+ from teklia_toolbox.requests import get_arkindex_client
45
28
 
46
29
 
47
30
  class ExtrasDirNotFoundError(Exception):
@@ -72,7 +55,7 @@ class BaseWorker:
72
55
  self.parser.add_argument(
73
56
  "-c",
74
57
  "--config",
75
- help="Alternative configuration file when running without a Worker Version ID",
58
+ help="Alternative configuration file when running without a Worker Run ID",
76
59
  type=open,
77
60
  )
78
61
  self.parser.add_argument(
@@ -94,7 +77,7 @@ class BaseWorker:
94
77
  "--dev",
95
78
  help=(
96
79
  "Run worker in developer mode. "
97
- "Worker will be in read-only state even if a worker_version is supplied. "
80
+ "Worker will be in read-only state even if a worker run is supplied. "
98
81
  ),
99
82
  action="store_true",
100
83
  default=False,
@@ -148,6 +131,13 @@ class BaseWorker:
148
131
  # there is at least one available sqlite database either given or in the parent tasks
149
132
  self.use_cache = False
150
133
 
134
+ # model_version_id will be updated in configure() using the worker_run's model version
135
+ # or in configure_for_developers() from the environment
136
+ self.model_version_id = None
137
+ # model_details will be updated in configure() using the worker_run's model version
138
+ # or in configure_for_developers() from the environment
139
+ self.model_details = {}
140
+
151
141
  # task_parents will be updated in configure_cache() if the cache is supported,
152
142
  # if the task ID is set and if no database is passed as argument
153
143
  self.task_parents = []
@@ -176,12 +166,20 @@ class BaseWorker:
176
166
  """
177
167
  return self.args.dev or self.worker_run_id is None
178
168
 
169
+ @property
170
+ def worker_version_id(self):
171
+ """Deprecated property previously used to retrieve the current WorkerVersion ID.
172
+
173
+ :raises DeprecationWarning: Whenever `worker_version_id` is used.
174
+ """
175
+ raise DeprecationWarning("`worker_version_id` usage is deprecated")
176
+
179
177
  def setup_api_client(self):
180
178
  """
181
179
  Create an ArkindexClient to make API requests towards Arkindex instances.
182
180
  """
183
181
  # Build Arkindex API client from environment variables
184
- self.api_client = ArkindexClient(**options_from_env())
182
+ self.api_client = get_arkindex_client(**options_from_env())
185
183
  logger.debug(f"Setup Arkindex API client on {self.api_client.document.url}")
186
184
 
187
185
  def configure_for_developers(self):
@@ -243,25 +241,21 @@ class BaseWorker:
243
241
 
244
242
  # Load worker version information
245
243
  worker_version = worker_run["worker_version"]
246
-
247
- # Store worker version id
248
- self.worker_version_id = worker_version["id"]
249
-
250
244
  self.worker_details = worker_version["worker"]
251
245
 
252
246
  logger.info(f"Loaded {worker_run['summary']} from API")
253
247
 
254
248
  # Load model version configuration when available
255
249
  model_version = worker_run.get("model_version")
256
- if model_version and model_version.get("configuration"):
250
+ if model_version:
257
251
  logger.info("Loaded model version configuration from WorkerRun")
258
- self.model_configuration.update(model_version.get("configuration"))
252
+ self.model_configuration.update(model_version["configuration"])
259
253
 
260
254
  # Set model_version ID as worker attribute
261
- self.model_version_id = model_version.get("id")
255
+ self.model_version_id = model_version["id"]
262
256
 
263
257
  # Set model details as worker attribute
264
- self.model_details = model_version.get("model")
258
+ self.model_details = model_version["model"]
265
259
 
266
260
  # Retrieve initial configuration from API
267
261
  self.config = worker_version["configuration"].get("configuration", {})
@@ -477,23 +471,9 @@ class BaseWorker:
477
471
  # Clean up
478
472
  shutil.rmtree(base_extracted_path)
479
473
 
480
- @retry(
481
- retry=retry_if_exception(_is_500_error),
482
- wait=wait_exponential(multiplier=2, min=3),
483
- reraise=True,
484
- stop=stop_after_attempt(5),
485
- before_sleep=before_sleep_log(logger, logging.INFO),
486
- )
487
474
  def request(self, *args, **kwargs):
488
475
  """
489
476
  Wrapper around the ``ArkindexClient.request`` method.
490
-
491
- The API call will be retried up to 5 times in case of HTTP 5xx errors,
492
- with an exponential sleep time of 3, 4, 8 and 16 seconds between calls.
493
- If the 5th call still causes an HTTP 5xx error, the exception is re-raised
494
- and the caller should catch it.
495
-
496
- Log messages are displayed when an HTTP 5xx error occurs, before waiting for the next call.
497
477
  """
498
478
  return self.api_client.request(*args, **kwargs)
499
479
 
@@ -2,8 +2,6 @@
2
2
  ElementsWorker methods for classifications and ML classes.
3
3
  """
4
4
 
5
- from uuid import UUID
6
-
7
5
  from apistar.exceptions import ErrorResponse
8
6
  from peewee import IntegrityError
9
7
 
@@ -154,13 +152,6 @@ class ClassificationMixin:
154
152
  # Detect already existing classification
155
153
  if e.status_code == 400 and "non_field_errors" in e.content:
156
154
  if (
157
- "The fields element, worker_version, ml_class must make a unique set."
158
- in e.content["non_field_errors"]
159
- ):
160
- logger.warning(
161
- f"This worker version has already set {ml_class} on element {element.id}"
162
- )
163
- elif (
164
155
  "The fields element, worker_run, ml_class must make a unique set."
165
156
  in e.content["non_field_errors"]
166
157
  ):
@@ -185,10 +176,14 @@ class ClassificationMixin:
185
176
  Create multiple classifications at once on the given element through the API.
186
177
 
187
178
  :param element: The element to create classifications on.
188
- :param classifications: The classifications to create, a list of dicts. Each of them contains
189
- a **ml_class_id** (str), the ID of the MLClass for this classification;
190
- a **confidence** (float), the confidence score, between 0 and 1;
191
- a **high_confidence** (bool), the high confidence state of the classification.
179
+ :param classifications: A list of dicts representing a classification each, with the following keys:
180
+
181
+ ml_class (str)
182
+ Required. Name of the MLClass to use.
183
+ confidence (float)
184
+ Required. Confidence score for the classification. Must be between 0 and 1.
185
+ high_confidence (bool)
186
+ Optional. Whether or not the classification is of high confidence.
192
187
 
193
188
  :returns: List of created classifications, as returned in the ``classifications`` field by
194
189
  the ``CreateClassifications`` API endpoint.
@@ -201,18 +196,10 @@ class ClassificationMixin:
201
196
  ), "classifications shouldn't be null and should be of type list"
202
197
 
203
198
  for index, classification in enumerate(classifications):
204
- ml_class_id = classification.get("ml_class_id")
199
+ ml_class = classification.get("ml_class")
205
200
  assert (
206
- ml_class_id and isinstance(ml_class_id, str)
207
- ), f"Classification at index {index} in classifications: ml_class_id shouldn't be null and should be of type str"
208
-
209
- # Make sure it's a valid UUID
210
- try:
211
- UUID(ml_class_id)
212
- except ValueError as e:
213
- raise ValueError(
214
- f"Classification at index {index} in classifications: ml_class_id is not a valid uuid."
215
- ) from e
201
+ ml_class and isinstance(ml_class, str)
202
+ ), f"Classification at index {index} in classifications: ml_class shouldn't be null and should be of type str"
216
203
 
217
204
  confidence = classification.get("confidence")
218
205
  assert (
@@ -238,7 +225,13 @@ class ClassificationMixin:
238
225
  body={
239
226
  "parent": str(element.id),
240
227
  "worker_run_id": self.worker_run_id,
241
- "classifications": classifications,
228
+ "classifications": [
229
+ {
230
+ **classification,
231
+ "ml_class": self.get_ml_class_id(classification["ml_class"]),
232
+ }
233
+ for classification in classifications
234
+ ],
242
235
  },
243
236
  )["classifications"]
244
237
 
@@ -6,7 +6,8 @@ from collections.abc import Iterator
6
6
  from enum import Enum
7
7
 
8
8
  from arkindex_worker import logger
9
- from arkindex_worker.models import Dataset, Element
9
+ from arkindex_worker.cache import unsupported_cache
10
+ from arkindex_worker.models import Dataset, Element, Set
10
11
 
11
12
 
12
13
  class DatasetState(Enum):
@@ -36,38 +37,43 @@ class DatasetState(Enum):
36
37
 
37
38
 
38
39
  class DatasetMixin:
39
- def list_process_datasets(self) -> Iterator[Dataset]:
40
+ def list_process_sets(self) -> Iterator[Set]:
40
41
  """
41
- List datasets associated to the worker's process. This helper is not available in developer mode.
42
+ List dataset sets associated to the worker's process. This helper is not available in developer mode.
42
43
 
43
- :returns: An iterator of ``Dataset`` objects built from the ``ListProcessDatasets`` API endpoint.
44
+ :returns: An iterator of ``Set`` objects built from the ``ListProcessSets`` API endpoint.
44
45
  """
45
46
  assert not self.is_read_only, "This helper is not available in read-only mode."
46
47
 
47
48
  results = self.api_client.paginate(
48
- "ListProcessDatasets", id=self.process_information["id"]
49
+ "ListProcessSets", id=self.process_information["id"]
49
50
  )
50
51
 
51
- return map(Dataset, list(results))
52
+ return map(
53
+ lambda result: Set(
54
+ name=result["set_name"], dataset=Dataset(**result["dataset"])
55
+ ),
56
+ results,
57
+ )
52
58
 
53
- def list_dataset_elements(self, dataset: Dataset) -> Iterator[tuple[str, Element]]:
59
+ def list_set_elements(self, dataset_set: Set) -> Iterator[Element]:
54
60
  """
55
- List elements in a dataset.
61
+ List elements in a dataset set.
56
62
 
57
- :param dataset: Dataset to find elements in.
58
- :returns: An iterator of tuples built from the ``ListDatasetElements`` API endpoint.
63
+ :param dataset_set: Set to find elements in.
64
+ :returns: An iterator of Element built from the ``ListDatasetElements`` API endpoint.
59
65
  """
60
- assert dataset and isinstance(
61
- dataset, Dataset
62
- ), "dataset shouldn't be null and should be a Dataset"
66
+ assert dataset_set and isinstance(
67
+ dataset_set, Set
68
+ ), "dataset_set shouldn't be null and should be a Set"
63
69
 
64
- results = self.api_client.paginate("ListDatasetElements", id=dataset.id)
65
-
66
- def format_result(result):
67
- return (result["set"], Element(**result["element"]))
70
+ results = self.api_client.paginate(
71
+ "ListDatasetElements", id=dataset_set.dataset.id, set=dataset_set.name
72
+ )
68
73
 
69
- return map(format_result, list(results))
74
+ return map(lambda result: Element(**result["element"]), results)
70
75
 
76
+ @unsupported_cache
71
77
  def update_dataset_state(self, dataset: Dataset, state: DatasetState) -> Dataset:
72
78
  """
73
79
  Partially updates a dataset state through the API.
@@ -1,14 +1,16 @@
1
1
  """
2
2
  ElementsWorker methods for elements and element types.
3
3
  """
4
+
4
5
  from collections.abc import Iterable
5
6
  from typing import NamedTuple
6
7
  from uuid import UUID
8
+ from warnings import warn
7
9
 
8
10
  from peewee import IntegrityError
9
11
 
10
12
  from arkindex_worker import logger
11
- from arkindex_worker.cache import CachedElement, CachedImage
13
+ from arkindex_worker.cache import CachedElement, CachedImage, unsupported_cache
12
14
  from arkindex_worker.models import Element
13
15
 
14
16
 
@@ -29,6 +31,7 @@ class MissingTypeError(Exception):
29
31
 
30
32
 
31
33
  class ElementMixin:
34
+ @unsupported_cache
32
35
  def create_required_types(self, element_types: list[ElementType]):
33
36
  """Creates given element types in the corpus.
34
37
 
@@ -81,6 +84,7 @@ class ElementMixin:
81
84
 
82
85
  return True
83
86
 
87
+ @unsupported_cache
84
88
  def create_sub_element(
85
89
  self,
86
90
  element: Element,
@@ -283,6 +287,7 @@ class ElementMixin:
283
287
 
284
288
  return created_ids
285
289
 
290
+ @unsupported_cache
286
291
  def create_element_parent(
287
292
  self,
288
293
  parent: Element,
@@ -422,6 +427,13 @@ class ElementMixin:
422
427
  """
423
428
  List children of an element.
424
429
 
430
+ Warns:
431
+ ----
432
+ The following parameters are **deprecated**:
433
+
434
+ - `transcription_worker_version` in favor of `transcription_worker_run`
435
+ - `worker_version` in favor of `worker_run`
436
+
425
437
  :param element: Parent element to find children of.
426
438
  :param folder: Restrict to or exclude elements with folder types.
427
439
  This parameter is not supported when caching is enabled.
@@ -429,9 +441,9 @@ class ElementMixin:
429
441
  This parameter is not supported when caching is enabled.
430
442
  :param recursive: Look for elements recursively (grand-children, etc.)
431
443
  This parameter is not supported when caching is enabled.
432
- :param transcription_worker_version: Restrict to elements that have a transcription created by a worker version with this UUID.
444
+ :param transcription_worker_version: **Deprecated** Restrict to elements that have a transcription created by a worker version with this UUID. Set to False to look for elements that have a manual transcription.
433
445
  This parameter is not supported when caching is enabled.
434
- :param transcription_worker_run: Restrict to elements that have a transcription created by a worker run with this UUID.
446
+ :param transcription_worker_run: Restrict to elements that have a transcription created by a worker run with this UUID. Set to False to look for elements that have a manual transcription.
435
447
  This parameter is not supported when caching is enabled.
436
448
  :param type: Restrict to elements with a specific type slug
437
449
  This parameter is not supported when caching is enabled.
@@ -447,7 +459,7 @@ class ElementMixin:
447
459
  :param with_zone: Include the ``zone`` attribute in the response,
448
460
  holding the element's image and polygon.
449
461
  This parameter is not supported when caching is enabled.
450
- :param worker_version: Restrict to elements created by a worker version with this UUID.
462
+ :param worker_version: **Deprecated** Restrict to elements created by a worker version with this UUID.
451
463
  :param worker_run: Restrict to elements created by a worker run with this UUID.
452
464
  :return: An iterable of dicts from the ``ListElementChildren`` API endpoint,
453
465
  or an iterable of [CachedElement][arkindex_worker.cache.CachedElement] when caching is enabled.
@@ -466,6 +478,11 @@ class ElementMixin:
466
478
  assert isinstance(recursive, bool), "recursive should be of type bool"
467
479
  query_params["recursive"] = recursive
468
480
  if transcription_worker_version is not None:
481
+ warn(
482
+ "`transcription_worker_version` usage is deprecated. Consider using `transcription_worker_run` instead.",
483
+ DeprecationWarning,
484
+ stacklevel=1,
485
+ )
469
486
  assert isinstance(
470
487
  transcription_worker_version, str | bool
471
488
  ), "transcription_worker_version should be of type str or bool"
@@ -506,6 +523,11 @@ class ElementMixin:
506
523
  assert isinstance(with_zone, bool), "with_zone should be of type bool"
507
524
  query_params["with_zone"] = with_zone
508
525
  if worker_version is not None:
526
+ warn(
527
+ "`worker_version` usage is deprecated. Consider using `worker_run` instead.",
528
+ DeprecationWarning,
529
+ stacklevel=1,
530
+ )
509
531
  assert isinstance(
510
532
  worker_version, str | bool
511
533
  ), "worker_version should be of type str or bool"
@@ -584,6 +606,13 @@ class ElementMixin:
584
606
  """
585
607
  List parents of an element.
586
608
 
609
+ Warns:
610
+ ----
611
+ The following parameters are **deprecated**:
612
+
613
+ - `transcription_worker_version` in favor of `transcription_worker_run`
614
+ - `worker_version` in favor of `worker_run`
615
+
587
616
  :param element: Child element to find parents of.
588
617
  :param folder: Restrict to or exclude elements with folder types.
589
618
  This parameter is not supported when caching is enabled.
@@ -591,7 +620,7 @@ class ElementMixin:
591
620
  This parameter is not supported when caching is enabled.
592
621
  :param recursive: Look for elements recursively (grand-children, etc.)
593
622
  This parameter is not supported when caching is enabled.
594
- :param transcription_worker_version: Restrict to elements that have a transcription created by a worker version with this UUID.
623
+ :param transcription_worker_version: **Deprecated** Restrict to elements that have a transcription created by a worker version with this UUID.
595
624
  This parameter is not supported when caching is enabled.
596
625
  :param transcription_worker_run: Restrict to elements that have a transcription created by a worker run with this UUID.
597
626
  This parameter is not supported when caching is enabled.
@@ -609,7 +638,7 @@ class ElementMixin:
609
638
  :param with_zone: Include the ``zone`` attribute in the response,
610
639
  holding the element's image and polygon.
611
640
  This parameter is not supported when caching is enabled.
612
- :param worker_version: Restrict to elements created by a worker version with this UUID.
641
+ :param worker_version: **Deprecated** Restrict to elements created by a worker version with this UUID.
613
642
  :param worker_run: Restrict to elements created by a worker run with this UUID.
614
643
  :return: An iterable of dicts from the ``ListElementParents`` API endpoint,
615
644
  or an iterable of [CachedElement][arkindex_worker.cache.CachedElement] when caching is enabled.
@@ -628,6 +657,11 @@ class ElementMixin:
628
657
  assert isinstance(recursive, bool), "recursive should be of type bool"
629
658
  query_params["recursive"] = recursive
630
659
  if transcription_worker_version is not None:
660
+ warn(
661
+ "`transcription_worker_version` usage is deprecated. Consider using `transcription_worker_run` instead.",
662
+ DeprecationWarning,
663
+ stacklevel=1,
664
+ )
631
665
  assert isinstance(
632
666
  transcription_worker_version, str | bool
633
667
  ), "transcription_worker_version should be of type str or bool"
@@ -668,6 +702,11 @@ class ElementMixin:
668
702
  assert isinstance(with_zone, bool), "with_zone should be of type bool"
669
703
  query_params["with_zone"] = with_zone
670
704
  if worker_version is not None:
705
+ warn(
706
+ "`worker_version` usage is deprecated. Consider using `worker_run` instead.",
707
+ DeprecationWarning,
708
+ stacklevel=1,
709
+ )
671
710
  assert isinstance(
672
711
  worker_version, str | bool
673
712
  ), "worker_version should be of type str or bool"
@@ -4,11 +4,16 @@ ElementsWorker methods for entities.
4
4
 
5
5
  from operator import itemgetter
6
6
  from typing import TypedDict
7
+ from warnings import warn
7
8
 
8
9
  from peewee import IntegrityError
9
10
 
10
11
  from arkindex_worker import logger
11
- from arkindex_worker.cache import CachedEntity, CachedTranscriptionEntity
12
+ from arkindex_worker.cache import (
13
+ CachedEntity,
14
+ CachedTranscriptionEntity,
15
+ unsupported_cache,
16
+ )
12
17
  from arkindex_worker.models import Element, Transcription
13
18
 
14
19
 
@@ -28,6 +33,7 @@ class MissingEntityType(Exception):
28
33
 
29
34
 
30
35
  class EntityMixin:
36
+ @unsupported_cache
31
37
  def check_required_entity_types(
32
38
  self, entity_types: list[str], create_missing: bool = True
33
39
  ):
@@ -205,6 +211,7 @@ class EntityMixin:
205
211
  )
206
212
  return transcription_ent
207
213
 
214
+ @unsupported_cache
208
215
  def create_transcription_entities(
209
216
  self,
210
217
  transcription: Transcription,
@@ -297,13 +304,21 @@ class EntityMixin:
297
304
  self,
298
305
  transcription: Transcription,
299
306
  worker_version: str | bool | None = None,
307
+ worker_run: str | bool | None = None,
300
308
  ):
301
309
  """
302
310
  List existing entities on a transcription
303
311
  This method does not support cache
304
312
 
313
+ Warns:
314
+ ----
315
+ The following parameters are **deprecated**:
316
+
317
+ - `worker_version` in favor of `worker_run`
318
+
305
319
  :param transcription: The transcription to list entities on.
306
- :param worker_version: Restrict to entities created by a worker version with this UUID. Set to False to look for manually created transcriptions.
320
+ :param worker_version: **Deprecated** Restrict to entities created by a worker version with this UUID. Set to False to look for manually created entities.
321
+ :param worker_run: Restrict to entities created by a worker run with this UUID. Set to False to look for manually created entities.
307
322
  """
308
323
  query_params = {}
309
324
  assert transcription and isinstance(
@@ -311,6 +326,11 @@ class EntityMixin:
311
326
  ), "transcription shouldn't be null and should be a Transcription"
312
327
 
313
328
  if worker_version is not None:
329
+ warn(
330
+ "`worker_version` usage is deprecated. Consider using `worker_run` instead.",
331
+ DeprecationWarning,
332
+ stacklevel=1,
333
+ )
314
334
  assert isinstance(
315
335
  worker_version, str | bool
316
336
  ), "worker_version should be of type str or bool"
@@ -320,6 +340,15 @@ class EntityMixin:
320
340
  worker_version is False
321
341
  ), "if of type bool, worker_version can only be set to False"
322
342
  query_params["worker_version"] = worker_version
343
+ if worker_run is not None:
344
+ assert isinstance(
345
+ worker_run, str | bool
346
+ ), "worker_run should be of type str or bool"
347
+ if isinstance(worker_run, bool):
348
+ assert (
349
+ worker_run is False
350
+ ), "if of type bool, worker_run can only be set to False"
351
+ query_params["worker_run"] = worker_run
323
352
 
324
353
  return self.api_client.paginate(
325
354
  "ListTranscriptionEntities", id=transcription.id, **query_params
@@ -351,8 +380,9 @@ class EntityMixin:
351
380
  "ListCorpusEntities", id=self.corpus_id, **query_params
352
381
  )
353
382
  }
383
+ count = len(self.entities)
354
384
  logger.info(
355
- f"Loaded {len(self.entities)} entities in corpus ({self.corpus_id})"
385
+ f'Loaded {count} entit{"ies" if count > 1 else "y"} in corpus ({self.corpus_id})'
356
386
  )
357
387
 
358
388
  def list_corpus_entity_types(
@@ -367,6 +397,7 @@ class EntityMixin:
367
397
  "ListCorpusEntityTypes", id=self.corpus_id
368
398
  )
369
399
  }
400
+ count = len(self.entity_types)
370
401
  logger.info(
371
- f"Loaded {len(self.entity_types)} entity types in corpus ({self.corpus_id})."
402
+ f'Loaded {count} entity type{"s"[:count>1]} in corpus ({self.corpus_id}).'
372
403
  )
@@ -5,7 +5,7 @@ ElementsWorker methods for metadata.
5
5
  from enum import Enum
6
6
 
7
7
  from arkindex_worker import logger
8
- from arkindex_worker.cache import CachedElement
8
+ from arkindex_worker.cache import CachedElement, unsupported_cache
9
9
  from arkindex_worker.models import Element
10
10
 
11
11
 
@@ -50,12 +50,13 @@ class MetaType(Enum):
50
50
 
51
51
  URL = "url"
52
52
  """
53
- A metadata with a string value that should be interpreted as an URL.
53
+ A metadata with a string value that should be interpreted as a URL.
54
54
  Only the ``http`` and ``https`` schemes are allowed.
55
55
  """
56
56
 
57
57
 
58
58
  class MetaDataMixin:
59
+ @unsupported_cache
59
60
  def create_metadata(
60
61
  self,
61
62
  element: Element | CachedElement,
@@ -106,17 +107,18 @@ class MetaDataMixin:
106
107
 
107
108
  return metadata["id"]
108
109
 
109
- def create_metadatas(
110
+ @unsupported_cache
111
+ def create_metadata_bulk(
110
112
  self,
111
113
  element: Element | CachedElement,
112
- metadatas: list[dict[str, MetaType | str | int | float | None]],
114
+ metadata_list: list[dict[str, MetaType | str | int | float | None]],
113
115
  ) -> list[dict[str, str]]:
114
116
  """
115
117
  Create multiple metadata on an existing element.
116
118
  This method does not support cache.
117
119
 
118
120
  :param element: The element to create multiple metadata on.
119
- :param metadatas: The list of dict whose keys are the following:
121
+ :param metadata_list: The list of dict whose keys are the following:
120
122
  - type: MetaType
121
123
  - name: str
122
124
  - value: str | int | float
@@ -126,13 +128,13 @@ class MetaDataMixin:
126
128
  element, Element | CachedElement
127
129
  ), "element shouldn't be null and should be of type Element or CachedElement"
128
130
 
129
- assert metadatas and isinstance(
130
- metadatas, list
131
- ), "type shouldn't be null and should be of type list of Dict"
131
+ assert metadata_list and isinstance(
132
+ metadata_list, list
133
+ ), "metadata_list shouldn't be null and should be of type list of dict"
132
134
 
133
135
  # Make a copy to avoid modifying the metadata_list argument
134
136
  metas = []
135
- for index, metadata in enumerate(metadatas):
137
+ for index, metadata in enumerate(metadata_list):
136
138
  assert isinstance(
137
139
  metadata, dict
138
140
  ), f"Element at index {index} in metadata_list: Should be of type dict"
@@ -178,16 +180,24 @@ class MetaDataMixin:
178
180
  return created_metadata_list
179
181
 
180
182
  def list_element_metadata(
181
- self, element: Element | CachedElement
183
+ self, element: Element | CachedElement, load_parents: bool | None = None
182
184
  ) -> list[dict[str, str]]:
183
185
  """
184
186
  List all metadata linked to an element.
185
187
  This method does not support cache.
186
188
 
187
189
  :param element: The element to list metadata on.
190
+ :param load_parents: Also include all metadata from the element's parents in the response.
188
191
  """
189
192
  assert element and isinstance(
190
193
  element, Element | CachedElement
191
194
  ), "element shouldn't be null and should be of type Element or CachedElement"
192
195
 
193
- return self.api_client.paginate("ListElementMetaData", id=element.id)
196
+ query_params = {}
197
+ if load_parents is not None:
198
+ assert isinstance(load_parents, bool), "load_parents should be of type bool"
199
+ query_params["load_parents"] = load_parents
200
+
201
+ return self.api_client.paginate(
202
+ "ListElementMetaData", id=element.id, **query_params
203
+ )
@@ -81,6 +81,13 @@ class TrainingMixin:
81
81
 
82
82
  model_version = None
83
83
 
84
+ @property
85
+ def is_finetuning(self) -> bool:
86
+ """
87
+ Whether or not this worker is fine-tuning an existing model version.
88
+ """
89
+ return bool(self.model_version_id)
90
+
84
91
  @skip_if_read_only
85
92
  def publish_model_version(
86
93
  self,
@@ -276,8 +283,17 @@ class TrainingMixin:
276
283
  },
277
284
  )
278
285
  except ErrorResponse as e:
286
+ # Temporary fix while waiting for `ValidateModelVersion` refactoring as it can
287
+ # return errors even when the model version is properly validated
288
+ if e.status_code in [403, 500]:
289
+ logger.warning(
290
+ f'An error occurred while validating model version {self.model_version["id"]}, please check its status.'
291
+ )
292
+ return
293
+
279
294
  if e.status_code != 409:
280
295
  raise e
296
+
281
297
  logger.warning(
282
298
  f"An available model version exists with hash {hash}, using it instead of the pending version."
283
299
  )