arkindex-base-worker 0.4.0a2__tar.gz → 0.4.0b1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/PKG-INFO +7 -7
  2. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_base_worker.egg-info/PKG-INFO +7 -7
  3. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_base_worker.egg-info/requires.txt +6 -6
  4. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/__init__.py +21 -12
  5. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/base.py +3 -9
  6. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/classification.py +3 -3
  7. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/corpus.py +3 -1
  8. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/dataset.py +1 -1
  9. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/element.py +24 -9
  10. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/entity.py +6 -7
  11. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/metadata.py +2 -2
  12. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/task.py +4 -2
  13. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/training.py +5 -5
  14. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/transcription.py +3 -3
  15. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/version.py +3 -1
  16. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/pyproject.toml +7 -7
  17. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_base_worker.py +1 -1
  18. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/test_elements.py +29 -22
  19. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_merge.py +1 -1
  20. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/LICENSE +0 -0
  21. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/README.md +0 -0
  22. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_base_worker.egg-info/SOURCES.txt +0 -0
  23. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_base_worker.egg-info/dependency_links.txt +0 -0
  24. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_base_worker.egg-info/top_level.txt +0 -0
  25. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/__init__.py +0 -0
  26. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/cache.py +0 -0
  27. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/image.py +0 -0
  28. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/models.py +0 -0
  29. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/utils.py +0 -0
  30. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/image.py +0 -0
  31. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/hooks/pre_gen_project.py +0 -0
  32. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/setup.cfg +0 -0
  33. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/__init__.py +0 -0
  34. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/conftest.py +0 -0
  35. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_cache.py +0 -0
  36. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_dataset_worker.py +0 -0
  37. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_element.py +0 -0
  38. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/__init__.py +0 -0
  39. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/test_classifications.py +0 -0
  40. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/test_cli.py +0 -0
  41. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/test_corpus.py +0 -0
  42. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/test_dataset.py +0 -0
  43. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/test_entities.py +0 -0
  44. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/test_image.py +0 -0
  45. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/test_metadata.py +0 -0
  46. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/test_task.py +0 -0
  47. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/test_training.py +0 -0
  48. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/test_transcriptions.py +0 -0
  49. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/test_worker.py +0 -0
  50. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_image.py +0 -0
  51. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_utils.py +0 -0
  52. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/worker-demo/tests/__init__.py +0 -0
  53. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/worker-demo/tests/conftest.py +0 -0
  54. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/worker-demo/tests/test_worker.py +0 -0
  55. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/worker-demo/worker_demo/__init__.py +0 -0
  56. {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/worker-demo/worker_demo/worker.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: arkindex-base-worker
3
- Version: 0.4.0a2
3
+ Version: 0.4.0b1
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -41,17 +41,17 @@ Requires-Python: >=3.10
41
41
  Description-Content-Type: text/markdown
42
42
  License-File: LICENSE
43
43
  Requires-Dist: peewee~=3.17
44
- Requires-Dist: Pillow==10.3.0
44
+ Requires-Dist: Pillow==10.4.0
45
45
  Requires-Dist: python-gnupg==0.5.2
46
- Requires-Dist: shapely==2.0.3
46
+ Requires-Dist: shapely==2.0.5
47
47
  Requires-Dist: teklia-toolbox==0.1.5
48
48
  Requires-Dist: zstandard==0.22.0
49
49
  Provides-Extra: docs
50
- Requires-Dist: black==24.4.0; extra == "docs"
51
- Requires-Dist: mkdocs-material==9.5.17; extra == "docs"
52
- Requires-Dist: mkdocstrings-python==1.9.2; extra == "docs"
50
+ Requires-Dist: black==24.4.2; extra == "docs"
51
+ Requires-Dist: mkdocs-material==9.5.31; extra == "docs"
52
+ Requires-Dist: mkdocstrings-python==1.10.7; extra == "docs"
53
53
  Provides-Extra: tests
54
- Requires-Dist: pytest==8.1.1; extra == "tests"
54
+ Requires-Dist: pytest==8.3.2; extra == "tests"
55
55
  Requires-Dist: pytest-mock==3.14.0; extra == "tests"
56
56
  Requires-Dist: pytest-responses==0.5.1; extra == "tests"
57
57
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: arkindex-base-worker
3
- Version: 0.4.0a2
3
+ Version: 0.4.0b1
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -41,17 +41,17 @@ Requires-Python: >=3.10
41
41
  Description-Content-Type: text/markdown
42
42
  License-File: LICENSE
43
43
  Requires-Dist: peewee~=3.17
44
- Requires-Dist: Pillow==10.3.0
44
+ Requires-Dist: Pillow==10.4.0
45
45
  Requires-Dist: python-gnupg==0.5.2
46
- Requires-Dist: shapely==2.0.3
46
+ Requires-Dist: shapely==2.0.5
47
47
  Requires-Dist: teklia-toolbox==0.1.5
48
48
  Requires-Dist: zstandard==0.22.0
49
49
  Provides-Extra: docs
50
- Requires-Dist: black==24.4.0; extra == "docs"
51
- Requires-Dist: mkdocs-material==9.5.17; extra == "docs"
52
- Requires-Dist: mkdocstrings-python==1.9.2; extra == "docs"
50
+ Requires-Dist: black==24.4.2; extra == "docs"
51
+ Requires-Dist: mkdocs-material==9.5.31; extra == "docs"
52
+ Requires-Dist: mkdocstrings-python==1.10.7; extra == "docs"
53
53
  Provides-Extra: tests
54
- Requires-Dist: pytest==8.1.1; extra == "tests"
54
+ Requires-Dist: pytest==8.3.2; extra == "tests"
55
55
  Requires-Dist: pytest-mock==3.14.0; extra == "tests"
56
56
  Requires-Dist: pytest-responses==0.5.1; extra == "tests"
57
57
 
@@ -1,16 +1,16 @@
1
1
  peewee~=3.17
2
- Pillow==10.3.0
2
+ Pillow==10.4.0
3
3
  python-gnupg==0.5.2
4
- shapely==2.0.3
4
+ shapely==2.0.5
5
5
  teklia-toolbox==0.1.5
6
6
  zstandard==0.22.0
7
7
 
8
8
  [docs]
9
- black==24.4.0
10
- mkdocs-material==9.5.17
11
- mkdocstrings-python==1.9.2
9
+ black==24.4.2
10
+ mkdocs-material==9.5.31
11
+ mkdocstrings-python==1.10.7
12
12
 
13
13
  [tests]
14
- pytest==8.1.1
14
+ pytest==8.3.2
15
15
  pytest-mock==3.14.0
16
16
  pytest-responses==0.5.1
@@ -83,7 +83,20 @@ class ElementsWorker(
83
83
  """
84
84
  super().__init__(description, support_cache)
85
85
 
86
- # Add mandatory argument to process elements
86
+ self.classes = {}
87
+
88
+ self.entity_types = {}
89
+ """Known and available entity types in processed corpus
90
+ """
91
+
92
+ self.corpus_types = {}
93
+ """Known and available element types in processed corpus
94
+ """
95
+
96
+ self._worker_version_cache = {}
97
+
98
+ def add_arguments(self):
99
+ """Define specific ``argparse`` arguments for this worker"""
87
100
  self.parser.add_argument(
88
101
  "--elements-list",
89
102
  help="JSON elements list to use",
@@ -97,14 +110,6 @@ class ElementsWorker(
97
110
  help="One or more Arkindex element ID",
98
111
  )
99
112
 
100
- self.classes = {}
101
-
102
- self.entity_types = {}
103
- """Known and available entity types in processed corpus
104
- """
105
-
106
- self._worker_version_cache = {}
107
-
108
113
  def list_elements(self) -> Iterable[CachedElement] | list[str]:
109
114
  """
110
115
  List the elements to be processed, either from the CLI arguments or
@@ -222,7 +227,9 @@ class ElementsWorker(
222
227
  element = item
223
228
  else:
224
229
  # Load element using the Arkindex API
225
- element = Element(**self.request("RetrieveElement", id=item))
230
+ element = Element(
231
+ **self.api_client.request("RetrieveElement", id=item)
232
+ )
226
233
 
227
234
  logger.info(f"Processing {element} ({i}/{count})")
228
235
 
@@ -301,7 +308,7 @@ class ElementsWorker(
301
308
  assert isinstance(state, ActivityState), "state should be an ActivityState"
302
309
 
303
310
  try:
304
- self.request(
311
+ self.api_client.request(
305
312
  "UpdateWorkerActivity",
306
313
  id=self.worker_run_id,
307
314
  body={
@@ -376,6 +383,8 @@ class DatasetWorker(BaseWorker, DatasetMixin, TaskMixin):
376
383
  # Set as an instance variable as dataset workers might use it to easily extract its content
377
384
  self.downloaded_dataset_artifact: Path | None = None
378
385
 
386
+ def add_arguments(self):
387
+ """Define specific ``argparse`` arguments for this worker"""
379
388
  self.parser.add_argument(
380
389
  "--set",
381
390
  type=check_dataset_set,
@@ -472,7 +481,7 @@ class DatasetWorker(BaseWorker, DatasetMixin, TaskMixin):
472
481
  # Retrieving dataset information is not already cached
473
482
  if dataset_id not in datasets:
474
483
  datasets[dataset_id] = Dataset(
475
- **self.request("RetrieveDataset", id=dataset_id)
484
+ **self.api_client.request("RetrieveDataset", id=dataset_id)
476
485
  )
477
486
 
478
487
  yield Set(name=set_name, dataset=datasets[dataset_id])
@@ -231,7 +231,7 @@ class BaseWorker:
231
231
  logger.debug("Debug output enabled")
232
232
 
233
233
  # Load worker run information
234
- worker_run = self.request("RetrieveWorkerRun", id=self.worker_run_id)
234
+ worker_run = self.api_client.request("RetrieveWorkerRun", id=self.worker_run_id)
235
235
 
236
236
  # Load process information
237
237
  self.process_information = worker_run["process"]
@@ -290,7 +290,7 @@ class BaseWorker:
290
290
  if self.support_cache and self.args.database is not None:
291
291
  self.use_cache = True
292
292
  elif self.support_cache and self.task_id:
293
- task = self.request("RetrieveTaskFromAgent", id=self.task_id)
293
+ task = self.api_client.request("RetrieveTask", id=self.task_id)
294
294
  self.task_parents = task["parents"]
295
295
  paths = self.find_parents_file_paths(Path("db.sqlite"))
296
296
  self.use_cache = len(paths) > 0
@@ -331,7 +331,7 @@ class BaseWorker:
331
331
 
332
332
  # Load from the backend
333
333
  try:
334
- resp = self.request("RetrieveSecret", name=str(name))
334
+ resp = self.api_client.request("RetrieveSecret", name=str(name))
335
335
  secret = resp["content"]
336
336
  logging.info(f"Loaded API secret {name}")
337
337
  except ErrorResponse as e:
@@ -471,12 +471,6 @@ class BaseWorker:
471
471
  # Clean up
472
472
  shutil.rmtree(base_extracted_path)
473
473
 
474
- def request(self, *args, **kwargs):
475
- """
476
- Wrapper around the ``ArkindexClient.request`` method.
477
- """
478
- return self.api_client.request(*args, **kwargs)
479
-
480
474
  def add_arguments(self):
481
475
  """Override this method to add ``argparse`` arguments to this worker"""
482
476
 
@@ -39,7 +39,7 @@ class ClassificationMixin:
39
39
  if ml_class_id is None:
40
40
  logger.info(f"Creating ML class {ml_class} on corpus {self.corpus_id}")
41
41
  try:
42
- response = self.request(
42
+ response = self.api_client.request(
43
43
  "CreateMLClass", id=self.corpus_id, body={"name": ml_class}
44
44
  )
45
45
  ml_class_id = self.classes[ml_class] = response["id"]
@@ -119,7 +119,7 @@ class ClassificationMixin:
119
119
  )
120
120
  return
121
121
  try:
122
- created = self.request(
122
+ created = self.api_client.request(
123
123
  "CreateClassification",
124
124
  body={
125
125
  "element": str(element.id),
@@ -220,7 +220,7 @@ class ClassificationMixin:
220
220
  )
221
221
  return
222
222
 
223
- created_cls = self.request(
223
+ created_cls = self.api_client.request(
224
224
  "CreateClassifications",
225
225
  body={
226
226
  "parent": str(element.id),
@@ -63,7 +63,9 @@ class CorpusMixin:
63
63
  # Download latest export
64
64
  export_id: str = exports[0]["id"]
65
65
  logger.info(f"Downloading export ({export_id})...")
66
- export: _TemporaryFileWrapper = self.request("DownloadExport", id=export_id)
66
+ export: _TemporaryFileWrapper = self.api_client.request(
67
+ "DownloadExport", id=export_id
68
+ )
67
69
  logger.info(f"Downloaded export ({export_id}) @ `{export.name}`")
68
70
 
69
71
  return export
@@ -93,7 +93,7 @@ class DatasetMixin:
93
93
  logger.warning("Cannot update dataset as this worker is in read-only mode")
94
94
  return
95
95
 
96
- updated_dataset = self.request(
96
+ updated_dataset = self.api_client.request(
97
97
  "PartialUpdateDataset",
98
98
  id=dataset.id,
99
99
  body={"state": state.value},
@@ -31,6 +31,21 @@ class MissingTypeError(Exception):
31
31
 
32
32
 
33
33
  class ElementMixin:
34
+ def list_corpus_types(self):
35
+ """
36
+ Loads available element types in corpus.
37
+ """
38
+ self.corpus_types = {
39
+ element_type["slug"]: element_type
40
+ for element_type in self.api_client.request(
41
+ "RetrieveCorpus", id=self.corpus_id
42
+ )["types"]
43
+ }
44
+ count = len(self.corpus_types)
45
+ logger.info(
46
+ f'Loaded {count} element type{"s"[:count>1]} in corpus ({self.corpus_id}).'
47
+ )
48
+
34
49
  @unsupported_cache
35
50
  def create_required_types(self, element_types: list[ElementType]):
36
51
  """Creates given element types in the corpus.
@@ -38,7 +53,7 @@ class ElementMixin:
38
53
  :param element_types: The missing element types to create.
39
54
  """
40
55
  for element_type in element_types:
41
- self.request(
56
+ self.api_client.request(
42
57
  "CreateElementType",
43
58
  body={
44
59
  "slug": element_type.slug,
@@ -66,10 +81,10 @@ class ElementMixin:
66
81
  isinstance(slug, str) for slug in type_slugs
67
82
  ), "Element type slugs must be strings."
68
83
 
69
- corpus = self.request("RetrieveCorpus", id=self.corpus_id)
70
- available_slugs = {element_type["slug"] for element_type in corpus["types"]}
71
- missing_slugs = set(type_slugs) - available_slugs
84
+ if not self.corpus_types:
85
+ self.list_corpus_types()
72
86
 
87
+ missing_slugs = set(type_slugs) - set(self.corpus_types)
73
88
  if missing_slugs:
74
89
  if create_missing:
75
90
  self.create_required_types(
@@ -79,7 +94,7 @@ class ElementMixin:
79
94
  )
80
95
  else:
81
96
  raise MissingTypeError(
82
- f'Element type(s) {", ".join(sorted(missing_slugs))} were not found in the {corpus["name"]} corpus ({corpus["id"]}).'
97
+ f'Element type(s) {", ".join(sorted(missing_slugs))} were not found in corpus ({self.corpus_id}).'
83
98
  )
84
99
 
85
100
  return True
@@ -145,7 +160,7 @@ class ElementMixin:
145
160
  logger.warning("Cannot create element as this worker is in read-only mode")
146
161
  return
147
162
 
148
- sub_element = self.request(
163
+ sub_element = self.api_client.request(
149
164
  "CreateElement",
150
165
  body={
151
166
  "type": type,
@@ -243,7 +258,7 @@ class ElementMixin:
243
258
  logger.warning("Cannot create elements as this worker is in read-only mode")
244
259
  return
245
260
 
246
- created_ids = self.request(
261
+ created_ids = self.api_client.request(
247
262
  "CreateElements",
248
263
  id=parent.id,
249
264
  body={
@@ -311,7 +326,7 @@ class ElementMixin:
311
326
  logger.warning("Cannot link elements as this worker is in read-only mode")
312
327
  return
313
328
 
314
- return self.request(
329
+ return self.api_client.request(
315
330
  "CreateElementParent",
316
331
  parent=parent.id,
317
332
  child=child.id,
@@ -383,7 +398,7 @@ class ElementMixin:
383
398
  logger.warning("Cannot update element as this worker is in read-only mode")
384
399
  return
385
400
 
386
- updated_element = self.request(
401
+ updated_element = self.api_client.request(
387
402
  "PartialUpdateElement",
388
403
  id=element.id,
389
404
  body=kwargs,
@@ -48,6 +48,7 @@ class EntityMixin:
48
48
  if not self.entity_types:
49
49
  # Load entity_types of corpus
50
50
  self.list_corpus_entity_types()
51
+
51
52
  for entity_type in entity_types:
52
53
  # Do nothing if type already exists
53
54
  if entity_type in self.entity_types:
@@ -60,7 +61,7 @@ class EntityMixin:
60
61
  )
61
62
 
62
63
  # Create type if non-existent
63
- self.entity_types[entity_type] = self.request(
64
+ self.entity_types[entity_type] = self.api_client.request(
64
65
  "CreateEntityType",
65
66
  body={
66
67
  "name": entity_type,
@@ -106,7 +107,7 @@ class EntityMixin:
106
107
  entity_type_id = self.entity_types.get(type)
107
108
  assert entity_type_id, f"Entity type `{type}` not found in the corpus."
108
109
 
109
- entity = self.request(
110
+ entity = self.api_client.request(
110
111
  "CreateEntity",
111
112
  body={
112
113
  "name": name,
@@ -188,7 +189,7 @@ class EntityMixin:
188
189
  if confidence is not None:
189
190
  body["confidence"] = confidence
190
191
 
191
- transcription_ent = self.request(
192
+ transcription_ent = self.api_client.request(
192
193
  "CreateTranscriptionEntity",
193
194
  id=transcription.id,
194
195
  body=body,
@@ -289,7 +290,7 @@ class EntityMixin:
289
290
  )
290
291
  return
291
292
 
292
- created_ids = self.request(
293
+ created_ids = self.api_client.request(
293
294
  "CreateTranscriptionEntities",
294
295
  id=transcription.id,
295
296
  body={
@@ -385,9 +386,7 @@ class EntityMixin:
385
386
  f'Loaded {count} entit{"ies" if count > 1 else "y"} in corpus ({self.corpus_id})'
386
387
  )
387
388
 
388
- def list_corpus_entity_types(
389
- self,
390
- ):
389
+ def list_corpus_entity_types(self):
391
390
  """
392
391
  Loads available entity types in corpus.
393
392
  """
@@ -93,7 +93,7 @@ class MetaDataMixin:
93
93
  logger.warning("Cannot create metadata as this worker is in read-only mode")
94
94
  return
95
95
 
96
- metadata = self.request(
96
+ metadata = self.api_client.request(
97
97
  "CreateMetaData",
98
98
  id=element.id,
99
99
  body={
@@ -168,7 +168,7 @@ class MetaDataMixin:
168
168
  logger.warning("Cannot create metadata as this worker is in read-only mode")
169
169
  return
170
170
 
171
- created_metadata_list = self.request(
171
+ created_metadata_list = self.api_client.request(
172
172
  "CreateMetaDataBulk",
173
173
  id=element.id,
174
174
  body={
@@ -22,7 +22,7 @@ class TaskMixin:
22
22
  task_id, uuid.UUID
23
23
  ), "task_id shouldn't be null and should be an UUID"
24
24
 
25
- results = self.request("ListArtifacts", id=task_id)
25
+ results = self.api_client.request("ListArtifacts", id=task_id)
26
26
 
27
27
  return map(Artifact, results)
28
28
 
@@ -43,4 +43,6 @@ class TaskMixin:
43
43
  artifact, Artifact
44
44
  ), "artifact shouldn't be null and should be an Artifact"
45
45
 
46
- return self.request("DownloadArtifact", id=task_id, path=artifact.path)
46
+ return self.api_client.request(
47
+ "DownloadArtifact", id=task_id, path=artifact.path
48
+ )
@@ -185,7 +185,7 @@ class TrainingMixin:
185
185
  assert not self.model_version, "A model version has already been created."
186
186
 
187
187
  configuration = configuration or {}
188
- self.model_version = self.request(
188
+ self.model_version = self.api_client.request(
189
189
  "CreateModelVersion",
190
190
  id=model_id,
191
191
  body=build_clean_payload(
@@ -217,7 +217,7 @@ class TrainingMixin:
217
217
  :param parent: ID of the parent model version
218
218
  """
219
219
  assert self.model_version, "No model version has been created yet."
220
- self.model_version = self.request(
220
+ self.model_version = self.api_client.request(
221
221
  "UpdateModelVersion",
222
222
  id=self.model_version["id"],
223
223
  body=build_clean_payload(
@@ -273,7 +273,7 @@ class TrainingMixin:
273
273
  """
274
274
  assert self.model_version, "You must create the model version and upload its archive before validating it."
275
275
  try:
276
- self.model_version = self.request(
276
+ self.model_version = self.api_client.request(
277
277
  "PartialUpdateModelVersion",
278
278
  id=self.model_version["id"],
279
279
  body={
@@ -294,7 +294,7 @@ class TrainingMixin:
294
294
  pending_version_id = self.model_version["id"]
295
295
  logger.warning("Removing the pending model version.")
296
296
  try:
297
- self.request("DestroyModelVersion", id=pending_version_id)
297
+ self.api_client.request("DestroyModelVersion", id=pending_version_id)
298
298
  except ErrorResponse as e:
299
299
  msg = getattr(e, "content", str(e))
300
300
  logger.error(
@@ -304,7 +304,7 @@ class TrainingMixin:
304
304
  logger.info("Retrieving the existing model version.")
305
305
  existing_version_id = model_version["id"].pop()
306
306
  try:
307
- self.model_version = self.request(
307
+ self.model_version = self.api_client.request(
308
308
  "RetrieveModelVersion", id=existing_version_id
309
309
  )
310
310
  except ErrorResponse as e:
@@ -77,7 +77,7 @@ class TranscriptionMixin:
77
77
  )
78
78
  return
79
79
 
80
- created = self.request(
80
+ created = self.api_client.request(
81
81
  "CreateTranscription",
82
82
  id=element.id,
83
83
  body={
@@ -171,7 +171,7 @@ class TranscriptionMixin:
171
171
  )
172
172
  return
173
173
 
174
- created_trs = self.request(
174
+ created_trs = self.api_client.request(
175
175
  "CreateTranscriptions",
176
176
  body={
177
177
  "worker_run_id": self.worker_run_id,
@@ -291,7 +291,7 @@ class TranscriptionMixin:
291
291
  )
292
292
  return
293
293
 
294
- annotations = self.request(
294
+ annotations = self.api_client.request(
295
295
  "CreateElementTranscriptions",
296
296
  id=element.id,
297
297
  body={
@@ -34,7 +34,9 @@ class WorkerVersionMixin:
34
34
  if worker_version_id in self._worker_version_cache:
35
35
  return self._worker_version_cache[worker_version_id]
36
36
 
37
- worker_version = self.request("RetrieveWorkerVersion", id=worker_version_id)
37
+ worker_version = self.api_client.request(
38
+ "RetrieveWorkerVersion", id=worker_version_id
39
+ )
38
40
  self._worker_version_cache[worker_version_id] = worker_version
39
41
 
40
42
  return worker_version
@@ -4,14 +4,14 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "arkindex-base-worker"
7
- version = "0.4.0a2"
7
+ version = "0.4.0b1"
8
8
  description = "Base Worker to easily build Arkindex ML workflows"
9
9
  license = { file = "LICENSE" }
10
10
  dependencies = [
11
11
  "peewee~=3.17",
12
- "Pillow==10.3.0",
12
+ "Pillow==10.4.0",
13
13
  "python-gnupg==0.5.2",
14
- "shapely==2.0.3",
14
+ "shapely==2.0.5",
15
15
  "teklia-toolbox==0.1.5",
16
16
  "zstandard==0.22.0",
17
17
  ]
@@ -42,12 +42,12 @@ Authors = "https://teklia.com"
42
42
 
43
43
  [project.optional-dependencies]
44
44
  docs = [
45
- "black==24.4.0",
46
- "mkdocs-material==9.5.17",
47
- "mkdocstrings-python==1.9.2",
45
+ "black==24.4.2",
46
+ "mkdocs-material==9.5.31",
47
+ "mkdocstrings-python==1.10.7",
48
48
  ]
49
49
  tests = [
50
- "pytest==8.1.1",
50
+ "pytest==8.3.2",
51
51
  "pytest-mock==3.14.0",
52
52
  "pytest-responses==0.5.1",
53
53
  ]
@@ -658,7 +658,7 @@ def test_find_extras_directory_not_found(monkeypatch, extras_path, exists, error
658
658
  def test_find_parents_file_paths(responses, mock_base_worker_with_cache, tmp_path):
659
659
  responses.add(
660
660
  responses.GET,
661
- "http://testserver/api/v1/task/my_task/from-agent/",
661
+ "http://testserver/api/v1/task/my_task/",
662
662
  status=200,
663
663
  json={"parents": ["first", "second", "third"]},
664
664
  )
@@ -22,6 +22,24 @@ from tests import CORPUS_ID
22
22
  from . import BASE_API_CALLS
23
23
 
24
24
 
25
+ def test_list_corpus_types(responses, mock_elements_worker):
26
+ responses.add(
27
+ responses.GET,
28
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/",
29
+ json={
30
+ "id": CORPUS_ID,
31
+ "types": [{"slug": "folder"}, {"slug": "page"}],
32
+ },
33
+ )
34
+
35
+ mock_elements_worker.list_corpus_types()
36
+
37
+ assert mock_elements_worker.corpus_types == {
38
+ "folder": {"slug": "folder"},
39
+ "page": {"slug": "page"},
40
+ }
41
+
42
+
25
43
  def test_check_required_types_argument_types(mock_elements_worker):
26
44
  with pytest.raises(
27
45
  AssertionError, match="At least one element type slug is required."
@@ -32,17 +50,11 @@ def test_check_required_types_argument_types(mock_elements_worker):
32
50
  mock_elements_worker.check_required_types("lol", 42)
33
51
 
34
52
 
35
- def test_check_required_types(responses, mock_elements_worker):
36
- responses.add(
37
- responses.GET,
38
- f"http://testserver/api/v1/corpus/{CORPUS_ID}/",
39
- json={
40
- "id": CORPUS_ID,
41
- "name": "Some Corpus",
42
- "types": [{"slug": "folder"}, {"slug": "page"}],
43
- },
44
- )
45
- mock_elements_worker.setup_api_client()
53
+ def test_check_required_types(mock_elements_worker):
54
+ mock_elements_worker.corpus_types = {
55
+ "folder": {"slug": "folder"},
56
+ "page": {"slug": "page"},
57
+ }
46
58
 
47
59
  assert mock_elements_worker.check_required_types("page")
48
60
  assert mock_elements_worker.check_required_types("page", "folder")
@@ -50,22 +62,18 @@ def test_check_required_types(responses, mock_elements_worker):
50
62
  with pytest.raises(
51
63
  MissingTypeError,
52
64
  match=re.escape(
53
- "Element type(s) act, text_line were not found in the Some Corpus corpus (11111111-1111-1111-1111-111111111111)."
65
+ "Element type(s) act, text_line were not found in corpus (11111111-1111-1111-1111-111111111111)."
54
66
  ),
55
67
  ):
56
68
  assert mock_elements_worker.check_required_types("page", "text_line", "act")
57
69
 
58
70
 
59
71
  def test_create_missing_types(responses, mock_elements_worker):
60
- responses.add(
61
- responses.GET,
62
- f"http://testserver/api/v1/corpus/{CORPUS_ID}/",
63
- json={
64
- "id": CORPUS_ID,
65
- "name": "Some Corpus",
66
- "types": [{"slug": "folder"}, {"slug": "page"}],
67
- },
68
- )
72
+ mock_elements_worker.corpus_types = {
73
+ "folder": {"slug": "folder"},
74
+ "page": {"slug": "page"},
75
+ }
76
+
69
77
  responses.add(
70
78
  responses.POST,
71
79
  "http://testserver/api/v1/elements/type/",
@@ -94,7 +102,6 @@ def test_create_missing_types(responses, mock_elements_worker):
94
102
  )
95
103
  ],
96
104
  )
97
- mock_elements_worker.setup_api_client()
98
105
 
99
106
  assert mock_elements_worker.check_required_types(
100
107
  "page", "text_line", "act", create_missing=True
@@ -161,7 +161,7 @@ def test_merge_from_worker(
161
161
  """
162
162
  responses.add(
163
163
  responses.GET,
164
- "http://testserver/api/v1/task/my_task/from-agent/",
164
+ "http://testserver/api/v1/task/my_task/",
165
165
  status=200,
166
166
  json={"parents": ["first", "second"]},
167
167
  )