arkindex-base-worker 0.4.0rc1__py3-none-any.whl → 0.4.0rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {arkindex_base_worker-0.4.0rc1.dist-info → arkindex_base_worker-0.4.0rc3.dist-info}/METADATA +5 -5
  2. arkindex_base_worker-0.4.0rc3.dist-info/RECORD +52 -0
  3. {arkindex_base_worker-0.4.0rc1.dist-info → arkindex_base_worker-0.4.0rc3.dist-info}/WHEEL +1 -1
  4. arkindex_worker/worker/__init__.py +24 -111
  5. arkindex_worker/worker/base.py +9 -1
  6. arkindex_worker/worker/classification.py +1 -1
  7. arkindex_worker/worker/corpus.py +21 -6
  8. arkindex_worker/worker/dataset.py +70 -0
  9. arkindex_worker/worker/element.py +17 -0
  10. arkindex_worker/worker/entity.py +1 -1
  11. arkindex_worker/worker/process.py +63 -0
  12. arkindex_worker/worker/task.py +1 -2
  13. arkindex_worker/worker/training.py +1 -1
  14. tests/__init__.py +1 -1
  15. tests/conftest.py +10 -3
  16. tests/test_dataset_worker.py +6 -3
  17. tests/test_elements_worker/test_classifications.py +1 -1
  18. tests/test_elements_worker/test_corpus.py +32 -1
  19. tests/test_elements_worker/test_dataset.py +1 -1
  20. tests/test_elements_worker/test_elements.py +270 -3
  21. tests/test_elements_worker/test_entities.py +1 -1
  22. tests/test_elements_worker/test_image.py +2 -1
  23. tests/test_elements_worker/test_metadata.py +1 -1
  24. tests/test_elements_worker/test_task.py +1 -1
  25. tests/test_elements_worker/test_transcriptions.py +1 -1
  26. tests/test_elements_worker/test_worker.py +1 -1
  27. arkindex_base_worker-0.4.0rc1.dist-info/RECORD +0 -51
  28. {arkindex_base_worker-0.4.0rc1.dist-info → arkindex_base_worker-0.4.0rc3.dist-info}/LICENSE +0 -0
  29. {arkindex_base_worker-0.4.0rc1.dist-info → arkindex_base_worker-0.4.0rc3.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: arkindex-base-worker
3
- Version: 0.4.0rc1
3
+ Version: 0.4.0rc3
4
4
  Summary: Base Worker to easily build Arkindex ML workflows
5
5
  Author-email: Teklia <contact@teklia.com>
6
6
  Maintainer-email: Teklia <contact@teklia.com>
@@ -40,17 +40,17 @@ Classifier: Programming Language :: Python :: 3.11
40
40
  Requires-Python: >=3.10
41
41
  Description-Content-Type: text/markdown
42
42
  License-File: LICENSE
43
- Requires-Dist: humanize ==4.9.0
43
+ Requires-Dist: humanize ==4.10.0
44
44
  Requires-Dist: peewee ~=3.17
45
45
  Requires-Dist: Pillow ==10.4.0
46
46
  Requires-Dist: python-gnupg ==0.5.2
47
47
  Requires-Dist: shapely ==2.0.5
48
- Requires-Dist: teklia-toolbox ==0.1.5
48
+ Requires-Dist: teklia-toolbox ==0.1.7b1
49
49
  Requires-Dist: zstandard ==0.22.0
50
50
  Provides-Extra: docs
51
51
  Requires-Dist: black ==24.4.2 ; extra == 'docs'
52
- Requires-Dist: mkdocs-material ==9.5.31 ; extra == 'docs'
53
- Requires-Dist: mkdocstrings-python ==1.10.8 ; extra == 'docs'
52
+ Requires-Dist: mkdocs-material ==9.5.33 ; extra == 'docs'
53
+ Requires-Dist: mkdocstrings-python ==1.11.1 ; extra == 'docs'
54
54
  Provides-Extra: tests
55
55
  Requires-Dist: pytest ==8.3.2 ; extra == 'tests'
56
56
  Requires-Dist: pytest-mock ==3.14.0 ; extra == 'tests'
@@ -0,0 +1,52 @@
1
+ arkindex_worker/__init__.py,sha256=OlgCtTC9MaWeejviY0a3iQpALcRQGMVArFVVYwTF6I8,162
2
+ arkindex_worker/cache.py,sha256=FTlB0coXofn5zTNRTcVIvh709mcw4a1bPGqkwWjKs3w,11248
3
+ arkindex_worker/image.py,sha256=oEgVCrSHiGh3D5-UXfM6PvT17TttSxC0115irpvB3Dw,18581
4
+ arkindex_worker/models.py,sha256=bPQzGZNs5a6z6DEcygsa8T33VOqPlMUbwKzHqlKzwbw,9923
5
+ arkindex_worker/utils.py,sha256=q1EeLdC6ebYIH-C0LOAqw2cNpjCjVoP-Vbr-39mF4w0,9884
6
+ arkindex_worker/worker/__init__.py,sha256=tcqxrox9EpOjaN2EQgXumiABKpWHLsJiynPC2_sZuOQ,15880
7
+ arkindex_worker/worker/base.py,sha256=7Pmw-UQSxV-xkW8NO5cXsxJ8W8szzyppMaNjq_az81A,19844
8
+ arkindex_worker/worker/classification.py,sha256=zECSNzGCZFzoPoDVZN4kuGYRNLzMQLBaRt3q1jnBSaA,10952
9
+ arkindex_worker/worker/corpus.py,sha256=0TQFOwZ6Te-CZi6lgkZY1wzyJ5wO9LAmcVQtqHvZpPk,2291
10
+ arkindex_worker/worker/dataset.py,sha256=LwzKwNFX4FqfLxh29LSvJydPwRw3VHaB1wjuFhUshsE,5267
11
+ arkindex_worker/worker/element.py,sha256=Qvvq9kJnAHNATHW7zi96eIY1x-0MsR-T5rrSJg6e9Y4,45309
12
+ arkindex_worker/worker/entity.py,sha256=ThhP22xOYR5Z4P1VH_pOl_y_uDKZFeQVDqxO6aRkIhg,15227
13
+ arkindex_worker/worker/image.py,sha256=t_Az6IGnj0EZyvcA4XxfPikOUjn_pztgsyxTkFZhaXU,621
14
+ arkindex_worker/worker/metadata.py,sha256=VRajtd2kaBvar9GercX4knvR6l1WFYjoCdJWU9ccKgk,7291
15
+ arkindex_worker/worker/process.py,sha256=I1rBt3Y8bV4zcPr8N1E2NRZ0UClSTqhExsO9CPcP41E,1012
16
+ arkindex_worker/worker/task.py,sha256=r1j7_qbdNu2Z8H8HbGzO3P3qdx-2N1pBbUPFDca0rqg,1519
17
+ arkindex_worker/worker/training.py,sha256=H8FmCdzGcDW-WMMwcgvmZPlN5tPHwGo0BXn12qmzj8g,10875
18
+ arkindex_worker/worker/transcription.py,sha256=52RY9kYsiR1sz9FxOigyo12Ker3VDbQ4U42gK9DpR3g,21146
19
+ arkindex_worker/worker/version.py,sha256=JIT7OI3Mo7RPkNrjOB9hfqrsG-FYygz_zi4l8PbkuAo,1960
20
+ hooks/pre_gen_project.py,sha256=xQJERv3vv9VzIqcBHI281eeWLWREXUF4mMw7PvJHHXM,269
21
+ tests/__init__.py,sha256=DG--S6IpGl399rzSAjDdHL76CkOIeZIjajCcyUSDhOQ,241
22
+ tests/conftest.py,sha256=2ocZ2x-mZQrNe9zvWwhWk2_4ExdaBHIB74SvtDlExRE,21580
23
+ tests/test_base_worker.py,sha256=2EIYcd_3f9O0zB5WiGIQV0Cn9wndLvnEnSfcAE1qWWU,30607
24
+ tests/test_cache.py,sha256=ii0gyr0DrG7ChEs7pmT8hMdSguAOAcCze4bRMiFQxuk,10640
25
+ tests/test_dataset_worker.py,sha256=gApYz0LArHr1cNn079_fa_BQABF6RVQYuM1Tc4m3NsQ,22089
26
+ tests/test_element.py,sha256=2G9M15TLxQRmvrWM9Kw2ucnElh4kSv_oF_5FYwwAxTY,13181
27
+ tests/test_image.py,sha256=J3jqB5OhcdCpB6n0UnwivxrMlne8YjFLXhq1gBMANrs,26711
28
+ tests/test_merge.py,sha256=TuOeUS0UCz66DPOQFFhc4NQBxIjZL9f5czi4XnvGrr4,8270
29
+ tests/test_utils.py,sha256=_WJUPnt-pM_TQ0er4yjPZy-u_LePrHq1lxwk_teky7M,2544
30
+ tests/test_elements_worker/__init__.py,sha256=Fh4nkbbyJSMv_VtjQxnWrOqTnxXaaWI8S9WU0VrzCHs,179
31
+ tests/test_elements_worker/test_classifications.py,sha256=GtVyi9bg4YTd7nyw8u6IjQZYBwFMwoVZdrfSBc5UybU,27780
32
+ tests/test_elements_worker/test_cli.py,sha256=a23i1pUDbXi23MUtbWwGEcLLrmc_YlrbDgOG3h66wLM,2620
33
+ tests/test_elements_worker/test_corpus.py,sha256=OAbwgaQtHmcmPkcAl9Kuceun_BvMasnZvYj4_EdfugY,5483
34
+ tests/test_elements_worker/test_dataset.py,sha256=00IlOZv9YFlZ23rGXyR-HLbKLQxGelZ1Bf9lEZYA0IY,11412
35
+ tests/test_elements_worker/test_elements.py,sha256=l5YTfm0CzBTQyZvdOplhhza-gpPSz-8RVix1YUzAwhM,115497
36
+ tests/test_elements_worker/test_entities.py,sha256=nrCvkdJdjsyOrbD6R-H8NvxREZxciiR6CGIObXzeg50,36182
37
+ tests/test_elements_worker/test_image.py,sha256=BljMNKgec_9a5bzNzFpYZIvSbuvwsWDfdqLHVJaTa7M,2079
38
+ tests/test_elements_worker/test_metadata.py,sha256=Xfggy-vxw5DZ3hFKx3sB7OYb2d1tu1RiNK8fvKJIaBs,22294
39
+ tests/test_elements_worker/test_task.py,sha256=wTUWqN9UhfKmJn3IcFY75EW4I1ulRhisflmY1kmP47s,5574
40
+ tests/test_elements_worker/test_training.py,sha256=Qxi9EzGr_uKcn2Fh5aE6jNrq1K8QKLiOiSew4upASPs,8721
41
+ tests/test_elements_worker/test_transcriptions.py,sha256=iq-nR_st7Q9E_nD7knrKGY57g36J6nYSEzbPk9y-cxY,77061
42
+ tests/test_elements_worker/test_worker.py,sha256=VdprIWezB3dJdE8vNOrS71RQugqUysHlveOWTQate-8,10804
43
+ worker-demo/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
+ worker-demo/tests/conftest.py,sha256=XzNMNeg6pmABUAH8jN6eZTlZSFGLYjS3-DTXjiRN6Yc,1002
45
+ worker-demo/tests/test_worker.py,sha256=3DLd4NRK4bfyatG5P_PK4k9P9tJHx9XQq5_ryFEEFVg,304
46
+ worker-demo/worker_demo/__init__.py,sha256=2BPomV8ZMNf3YXJgloatKeHQCE6QOkwmsHGkO6MkQuM,125
47
+ worker-demo/worker_demo/worker.py,sha256=Rt-DjWa5iBP08k58NDZMfeyPuFbtNcbX6nc5jFX7GNo,440
48
+ arkindex_base_worker-0.4.0rc3.dist-info/LICENSE,sha256=NVshRi1efwVezMfW7xXYLrdDr2Li1AfwfGOd5WuH1kQ,1063
49
+ arkindex_base_worker-0.4.0rc3.dist-info/METADATA,sha256=eDT7HxTvEz2yg4U_lbzkuigNWFu4JTqaLTnY0fqSCiM,3306
50
+ arkindex_base_worker-0.4.0rc3.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
51
+ arkindex_base_worker-0.4.0rc3.dist-info/top_level.txt,sha256=58NuslgxQC2vT4DiqZEgO4JqJRrYa2yeNI9QvkbfGQU,40
52
+ arkindex_base_worker-0.4.0rc3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (73.0.1)
2
+ Generator: setuptools (75.3.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -4,16 +4,13 @@ Base classes to implement Arkindex workers.
4
4
 
5
5
  import contextlib
6
6
  import json
7
- import os
8
7
  import sys
9
8
  import uuid
10
- from argparse import ArgumentTypeError
11
- from collections.abc import Iterable, Iterator
12
- from enum import Enum
9
+ from collections.abc import Iterable
10
+ from itertools import chain
13
11
  from pathlib import Path
14
12
 
15
- from apistar.exceptions import ErrorResponse
16
-
13
+ from arkindex.exceptions import ErrorResponse
17
14
  from arkindex_worker import logger
18
15
  from arkindex_worker.cache import CachedElement
19
16
  from arkindex_worker.models import Dataset, Element, Set
@@ -21,47 +18,27 @@ from arkindex_worker.utils import pluralize
21
18
  from arkindex_worker.worker.base import BaseWorker
22
19
  from arkindex_worker.worker.classification import ClassificationMixin
23
20
  from arkindex_worker.worker.corpus import CorpusMixin
24
- from arkindex_worker.worker.dataset import DatasetMixin, DatasetState
21
+ from arkindex_worker.worker.dataset import (
22
+ DatasetMixin,
23
+ DatasetState,
24
+ MissingDatasetArchive,
25
+ )
25
26
  from arkindex_worker.worker.element import ElementMixin
26
27
  from arkindex_worker.worker.entity import EntityMixin
27
28
  from arkindex_worker.worker.image import ImageMixin
28
29
  from arkindex_worker.worker.metadata import MetaDataMixin, MetaType # noqa: F401
30
+ from arkindex_worker.worker.process import ActivityState, ProcessMode
29
31
  from arkindex_worker.worker.task import TaskMixin
30
32
  from arkindex_worker.worker.transcription import TranscriptionMixin
31
33
  from arkindex_worker.worker.version import WorkerVersionMixin
32
34
 
33
35
 
34
- class ActivityState(Enum):
35
- """
36
- Processing state of an element.
37
- """
38
-
39
- Queued = "queued"
40
- """
41
- The element has not yet been processed by a worker.
42
- """
43
-
44
- Started = "started"
45
- """
46
- The element is being processed by a worker.
47
- """
48
-
49
- Processed = "processed"
50
- """
51
- The element has been successfully processed by a worker.
52
- """
53
-
54
- Error = "error"
55
- """
56
- An error occurred while processing this element.
57
- """
58
-
59
-
60
36
  class ElementsWorker(
37
+ ElementMixin,
38
+ DatasetMixin,
61
39
  BaseWorker,
62
40
  ClassificationMixin,
63
41
  CorpusMixin,
64
- ElementMixin,
65
42
  TranscriptionMixin,
66
43
  WorkerVersionMixin,
67
44
  EntityMixin,
@@ -96,22 +73,7 @@ class ElementsWorker(
96
73
 
97
74
  self._worker_version_cache = {}
98
75
 
99
- def add_arguments(self):
100
- """Define specific ``argparse`` arguments for this worker"""
101
- self.parser.add_argument(
102
- "--elements-list",
103
- help="JSON elements list to use",
104
- type=open,
105
- default=os.environ.get("TASK_ELEMENTS"),
106
- )
107
- self.parser.add_argument(
108
- "--element",
109
- type=str,
110
- nargs="+",
111
- help="One or more Arkindex element ID",
112
- )
113
-
114
- def get_elements(self) -> Iterable[CachedElement] | list[str]:
76
+ def get_elements(self) -> Iterable[CachedElement] | list[str] | list[Element]:
115
77
  """
116
78
  List the elements to be processed, either from the CLI arguments or
117
79
  the cache database when enabled.
@@ -143,15 +105,20 @@ class ElementsWorker(
143
105
  )
144
106
  if self.use_cache and cache_query.exists():
145
107
  return cache_query
146
- # Process elements from JSON file
147
108
  elif self.args.elements_list:
109
+ # Process elements from JSON file
148
110
  data = json.load(self.args.elements_list)
149
111
  assert isinstance(data, list), "Elements list must be a list"
150
112
  assert len(data), "No elements in elements list"
151
113
  out += list(filter(None, [element.get("id") for element in data]))
152
- # Add any extra element from CLI
153
114
  elif self.args.element:
115
+ # Add any extra element from CLI
154
116
  out += self.args.element
117
+ elif self.process_mode == ProcessMode.Dataset or self.args.set:
118
+ # Elements from datasets
119
+ return list(
120
+ chain.from_iterable(map(self.list_set_elements, self.list_sets()))
121
+ )
155
122
 
156
123
  invalid_element_ids = list(filter(invalid_element_id, out))
157
124
  assert (
@@ -166,7 +133,8 @@ class ElementsWorker(
166
133
  Whether or not WorkerActivity support has been enabled on the DataImport
167
134
  used to run this worker.
168
135
  """
169
- if self.is_read_only:
136
+ if self.is_read_only or self.process_mode == ProcessMode.Dataset:
137
+ # Worker activities are also disabled when running an ElementsWorker in a Dataset process.
170
138
  return False
171
139
  assert (
172
140
  self.process_information
@@ -200,7 +168,7 @@ class ElementsWorker(
200
168
  for i, item in enumerate(elements, start=1):
201
169
  element = None
202
170
  try:
203
- if self.use_cache:
171
+ if isinstance(item, CachedElement | Element):
204
172
  # Just use the result of get_elements as the element
205
173
  element = item
206
174
  else:
@@ -316,29 +284,7 @@ class ElementsWorker(
316
284
  return True
317
285
 
318
286
 
319
- def check_dataset_set(value: str) -> tuple[uuid.UUID, str]:
320
- values = value.split(":")
321
- if len(values) != 2:
322
- raise ArgumentTypeError(
323
- f"'{value}' is not in the correct format `<dataset_id>:<set_name>`"
324
- )
325
-
326
- dataset_id, set_name = values
327
- try:
328
- dataset_id = uuid.UUID(dataset_id)
329
- return (dataset_id, set_name)
330
- except (TypeError, ValueError) as e:
331
- raise ArgumentTypeError(f"'{dataset_id}' should be a valid UUID") from e
332
-
333
-
334
- class MissingDatasetArchive(Exception):
335
- """
336
- Exception raised when the compressed archive associated to
337
- a dataset isn't found in its task artifacts.
338
- """
339
-
340
-
341
- class DatasetWorker(BaseWorker, DatasetMixin, TaskMixin):
287
+ class DatasetWorker(DatasetMixin, BaseWorker, TaskMixin):
342
288
  """
343
289
  Base class for ML workers that operate on Arkindex dataset sets.
344
290
 
@@ -361,19 +307,6 @@ class DatasetWorker(BaseWorker, DatasetMixin, TaskMixin):
361
307
  # Set as an instance variable as dataset workers might use it to easily extract its content
362
308
  self.downloaded_dataset_artifact: Path | None = None
363
309
 
364
- def add_arguments(self):
365
- """Define specific ``argparse`` arguments for this worker"""
366
- self.parser.add_argument(
367
- "--set",
368
- type=check_dataset_set,
369
- nargs="+",
370
- help="""
371
- One or more Arkindex dataset sets, format is <dataset_uuid>:<set_name>
372
- (e.g.: "12341234-1234-1234-1234-123412341234:train")
373
- """,
374
- default=[],
375
- )
376
-
377
310
  def cleanup_downloaded_artifact(self) -> None:
378
311
  """
379
312
  Cleanup the downloaded dataset artifact if any
@@ -421,30 +354,10 @@ class DatasetWorker(BaseWorker, DatasetMixin, TaskMixin):
421
354
  :param set: The set to process.
422
355
  """
423
356
 
424
- def list_sets(self) -> Iterator[Set]:
425
- """
426
- List the sets to be processed, either from the CLI arguments or using the
427
- [list_process_sets][arkindex_worker.worker.dataset.DatasetMixin.list_process_sets] method.
428
-
429
- :returns: An iterator of ``Set`` objects.
430
- """
431
- if not self.is_read_only:
432
- yield from self.list_process_sets()
433
-
434
- datasets: dict[uuid.UUID, Dataset] = {}
435
- for dataset_id, set_name in self.args.set:
436
- # Retrieving dataset information is not already cached
437
- if dataset_id not in datasets:
438
- datasets[dataset_id] = Dataset(
439
- **self.api_client.request("RetrieveDataset", id=dataset_id)
440
- )
441
-
442
- yield Set(name=set_name, dataset=datasets[dataset_id])
443
-
444
357
  def run(self):
445
358
  """
446
359
  Implements an Arkindex worker that goes through each dataset set returned by
447
- [list_sets][arkindex_worker.worker.DatasetWorker.list_sets].
360
+ [list_sets][arkindex_worker.worker.dataset.DatasetMixin.list_sets].
448
361
 
449
362
  It calls [process_set][arkindex_worker.worker.DatasetWorker.process_set],
450
363
  catching exceptions.
@@ -12,9 +12,9 @@ from tempfile import mkdtemp
12
12
 
13
13
  import gnupg
14
14
  import yaml
15
- from apistar.exceptions import ErrorResponse
16
15
 
17
16
  from arkindex import options_from_env
17
+ from arkindex.exceptions import ErrorResponse
18
18
  from arkindex_worker import logger
19
19
  from arkindex_worker.cache import (
20
20
  check_version,
@@ -24,6 +24,7 @@ from arkindex_worker.cache import (
24
24
  merge_parents_cache,
25
25
  )
26
26
  from arkindex_worker.utils import close_delete_file, extract_tar_zst_archive
27
+ from arkindex_worker.worker.process import ProcessMode
27
28
  from teklia_toolbox.requests import get_arkindex_client
28
29
 
29
30
 
@@ -156,6 +157,13 @@ class BaseWorker:
156
157
  raise Exception("Missing ARKINDEX_CORPUS_ID environment variable")
157
158
  return self._corpus_id
158
159
 
160
+ @property
161
+ def process_mode(self) -> ProcessMode | None:
162
+ """Mode of the process being run. Returns None when read-only."""
163
+ if self.is_read_only:
164
+ return
165
+ return ProcessMode(self.process_information["mode"])
166
+
159
167
  @property
160
168
  def is_read_only(self) -> bool:
161
169
  """
@@ -2,9 +2,9 @@
2
2
  ElementsWorker methods for classifications and ML classes.
3
3
  """
4
4
 
5
- from apistar.exceptions import ErrorResponse
6
5
  from peewee import IntegrityError
7
6
 
7
+ from arkindex.exceptions import ErrorResponse
8
8
  from arkindex_worker import logger
9
9
  from arkindex_worker.cache import CachedClassification, CachedElement
10
10
  from arkindex_worker.models import Element
@@ -5,6 +5,7 @@ BaseWorker methods for corpora.
5
5
  from enum import Enum
6
6
  from operator import itemgetter
7
7
  from tempfile import _TemporaryFileWrapper
8
+ from uuid import UUID
8
9
 
9
10
  from arkindex_worker import logger
10
11
 
@@ -36,6 +37,25 @@ class CorpusExportState(Enum):
36
37
 
37
38
 
38
39
  class CorpusMixin:
40
+ def download_export(self, export_id: str) -> _TemporaryFileWrapper:
41
+ """
42
+ Download an export.
43
+
44
+ :param export_id: UUID of the export to download
45
+ :returns: The downloaded export stored in a temporary file.
46
+ """
47
+ try:
48
+ UUID(export_id)
49
+ except ValueError as e:
50
+ raise ValueError("export_id is not a valid uuid.") from e
51
+
52
+ logger.info(f"Downloading export ({export_id})...")
53
+ export: _TemporaryFileWrapper = self.api_client.request(
54
+ "DownloadExport", id=export_id
55
+ )
56
+ logger.info(f"Downloaded export ({export_id}) @ `{export.name}`")
57
+ return export
58
+
39
59
  def download_latest_export(self) -> _TemporaryFileWrapper:
40
60
  """
41
61
  Download the latest export in `done` state of the current corpus.
@@ -62,10 +82,5 @@ class CorpusMixin:
62
82
 
63
83
  # Download latest export
64
84
  export_id: str = exports[0]["id"]
65
- logger.info(f"Downloading export ({export_id})...")
66
- export: _TemporaryFileWrapper = self.api_client.request(
67
- "DownloadExport", id=export_id
68
- )
69
- logger.info(f"Downloaded export ({export_id}) @ `{export.name}`")
70
85
 
71
- return export
86
+ return self.download_export(export_id)
@@ -2,6 +2,8 @@
2
2
  BaseWorker methods for datasets.
3
3
  """
4
4
 
5
+ import uuid
6
+ from argparse import ArgumentTypeError
5
7
  from collections.abc import Iterator
6
8
  from enum import Enum
7
9
 
@@ -36,7 +38,55 @@ class DatasetState(Enum):
36
38
  """
37
39
 
38
40
 
41
+ class MissingDatasetArchive(Exception):
42
+ """
43
+ Exception raised when the compressed archive associated to
44
+ a dataset isn't found in its task artifacts.
45
+ """
46
+
47
+
48
+ def check_dataset_set(value: str) -> tuple[uuid.UUID, str]:
49
+ """The `--set` argument should have the following format:
50
+ <dataset_id>:<set_name>
51
+
52
+ Args:
53
+ value (str): Provided argument.
54
+
55
+ Raises:
56
+ ArgumentTypeError: When the value is invalid.
57
+
58
+ Returns:
59
+ tuple[uuid.UUID, str]: The ID of the dataset parsed as UUID and the name of the set.
60
+ """
61
+ values = value.split(":")
62
+ if len(values) != 2:
63
+ raise ArgumentTypeError(
64
+ f"'{value}' is not in the correct format `<dataset_id>:<set_name>`"
65
+ )
66
+
67
+ dataset_id, set_name = values
68
+ try:
69
+ dataset_id = uuid.UUID(dataset_id)
70
+ return (dataset_id, set_name)
71
+ except (TypeError, ValueError) as e:
72
+ raise ArgumentTypeError(f"'{dataset_id}' should be a valid UUID") from e
73
+
74
+
39
75
  class DatasetMixin:
76
+ def add_arguments(self) -> None:
77
+ """Define specific ``argparse`` arguments for the worker using this mixin"""
78
+ self.parser.add_argument(
79
+ "--set",
80
+ type=check_dataset_set,
81
+ nargs="+",
82
+ help="""
83
+ One or more Arkindex dataset sets, format is <dataset_uuid>:<set_name>
84
+ (e.g.: "12341234-1234-1234-1234-123412341234:train")
85
+ """,
86
+ default=[],
87
+ )
88
+ super().add_arguments()
89
+
40
90
  def list_process_sets(self) -> Iterator[Set]:
41
91
  """
42
92
  List dataset sets associated to the worker's process. This helper is not available in developer mode.
@@ -73,6 +123,26 @@ class DatasetMixin:
73
123
 
74
124
  return map(lambda result: Element(**result["element"]), results)
75
125
 
126
+ def list_sets(self) -> Iterator[Set]:
127
+ """
128
+ List the sets to be processed, either from the CLI arguments or using the
129
+ [list_process_sets][arkindex_worker.worker.dataset.DatasetMixin.list_process_sets] method.
130
+
131
+ :returns: An iterator of ``Set`` objects.
132
+ """
133
+ if not self.is_read_only:
134
+ yield from self.list_process_sets()
135
+
136
+ datasets: dict[uuid.UUID, Dataset] = {}
137
+ for dataset_id, set_name in self.args.set:
138
+ # Retrieving dataset information if not already cached
139
+ if dataset_id not in datasets:
140
+ datasets[dataset_id] = Dataset(
141
+ **self.api_client.request("RetrieveDataset", id=dataset_id)
142
+ )
143
+
144
+ yield Set(name=set_name, dataset=datasets[dataset_id])
145
+
76
146
  @unsupported_cache
77
147
  def update_dataset_state(self, dataset: Dataset, state: DatasetState) -> Dataset:
78
148
  """
@@ -2,6 +2,7 @@
2
2
  ElementsWorker methods for elements and element types.
3
3
  """
4
4
 
5
+ import os
5
6
  from collections.abc import Iterable
6
7
  from operator import attrgetter
7
8
  from typing import NamedTuple
@@ -38,6 +39,22 @@ class MissingTypeError(Exception):
38
39
 
39
40
 
40
41
  class ElementMixin:
42
+ def add_arguments(self):
43
+ """Define specific ``argparse`` arguments for the worker using this mixin"""
44
+ self.parser.add_argument(
45
+ "--elements-list",
46
+ help="JSON elements list to use",
47
+ type=open,
48
+ default=os.environ.get("TASK_ELEMENTS"),
49
+ )
50
+ self.parser.add_argument(
51
+ "--element",
52
+ type=str,
53
+ nargs="+",
54
+ help="One or more Arkindex element ID",
55
+ )
56
+ super().add_arguments()
57
+
41
58
  def list_corpus_types(self):
42
59
  """
43
60
  Loads available element types in corpus.
@@ -302,7 +302,7 @@ class EntityMixin:
302
302
 
303
303
  created_entities = [
304
304
  created_entity
305
- for batch in make_batches(entities, "entities", batch_size)
305
+ for batch in make_batches(entities, "entity", batch_size)
306
306
  for created_entity in self.api_client.request(
307
307
  "CreateTranscriptionEntities",
308
308
  id=transcription.id,
@@ -0,0 +1,63 @@
1
+ from enum import Enum
2
+
3
+
4
+ class ActivityState(Enum):
5
+ """
6
+ Processing state of an element.
7
+ """
8
+
9
+ Queued = "queued"
10
+ """
11
+ The element has not yet been processed by a worker.
12
+ """
13
+
14
+ Started = "started"
15
+ """
16
+ The element is being processed by a worker.
17
+ """
18
+
19
+ Processed = "processed"
20
+ """
21
+ The element has been successfully processed by a worker.
22
+ """
23
+
24
+ Error = "error"
25
+ """
26
+ An error occurred while processing this element.
27
+ """
28
+
29
+
30
+ class ProcessMode(Enum):
31
+ """
32
+ Mode of the process of the worker.
33
+ """
34
+
35
+ Files = "files"
36
+ """
37
+ Processes of files (images, PDFs, IIIF, ...) imports.
38
+ """
39
+
40
+ Workers = "workers"
41
+ """
42
+ Processes of worker executions.
43
+ """
44
+
45
+ Template = "template"
46
+ """
47
+ Process templates.
48
+ """
49
+
50
+ S3 = "s3"
51
+ """
52
+ Processes of imports from an S3-compatible storage.
53
+ """
54
+
55
+ Local = "local"
56
+ """
57
+ Local processes.
58
+ """
59
+
60
+ Dataset = "dataset"
61
+ """
62
+ Dataset processes.
63
+ """
@@ -5,8 +5,7 @@ BaseWorker methods for tasks.
5
5
  import uuid
6
6
  from collections.abc import Iterator
7
7
 
8
- from apistar.compat import DownloadedFile
9
-
8
+ from arkindex.compat import DownloadedFile
10
9
  from arkindex_worker.models import Artifact
11
10
 
12
11
 
@@ -9,8 +9,8 @@ from typing import NewType
9
9
  from uuid import UUID
10
10
 
11
11
  import requests
12
- from apistar.exceptions import ErrorResponse
13
12
 
13
+ from arkindex.exceptions import ErrorResponse
14
14
  from arkindex_worker import logger
15
15
  from arkindex_worker.utils import close_delete_file, create_tar_zst_archive
16
16
 
tests/__init__.py CHANGED
@@ -5,4 +5,4 @@ FIXTURES_DIR = BASE_DIR / "data"
5
5
  SAMPLES_DIR = BASE_DIR / "samples"
6
6
 
7
7
  CORPUS_ID = "11111111-1111-1111-1111-111111111111"
8
- PROCESS_ID = "cafecafe-cafe-cafe-cafe-cafecafecafe"
8
+ PROCESS_ID = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff"
tests/conftest.py CHANGED
@@ -23,10 +23,15 @@ from arkindex_worker.cache import (
23
23
  init_cache_db,
24
24
  )
25
25
  from arkindex_worker.models import Artifact, Dataset, Set
26
- from arkindex_worker.worker import BaseWorker, DatasetWorker, ElementsWorker
26
+ from arkindex_worker.worker import (
27
+ BaseWorker,
28
+ DatasetWorker,
29
+ ElementsWorker,
30
+ ProcessMode,
31
+ )
27
32
  from arkindex_worker.worker.dataset import DatasetState
28
33
  from arkindex_worker.worker.transcription import TextOrientation
29
- from tests import CORPUS_ID, PROCESS_ID, SAMPLES_DIR
34
+ from tests import CORPUS_ID, SAMPLES_DIR
30
35
 
31
36
  __yaml_cache = {}
32
37
 
@@ -601,7 +606,9 @@ def mock_dataset_worker(monkeypatch, mocker, _mock_worker_run_api):
601
606
 
602
607
  dataset_worker = DatasetWorker()
603
608
  dataset_worker.configure()
604
- dataset_worker.process_information = {"id": PROCESS_ID}
609
+
610
+ # Update process mode
611
+ dataset_worker.process_information["mode"] = ProcessMode.Dataset
605
612
 
606
613
  assert not dataset_worker.is_read_only
607
614
 
@@ -3,11 +3,14 @@ import uuid
3
3
  from argparse import ArgumentTypeError
4
4
 
5
5
  import pytest
6
- from apistar.exceptions import ErrorResponse
7
6
 
7
+ from arkindex.exceptions import ErrorResponse
8
8
  from arkindex_worker.models import Dataset, Set
9
- from arkindex_worker.worker import MissingDatasetArchive, check_dataset_set
10
- from arkindex_worker.worker.dataset import DatasetState
9
+ from arkindex_worker.worker.dataset import (
10
+ DatasetState,
11
+ MissingDatasetArchive,
12
+ check_dataset_set,
13
+ )
11
14
  from tests import FIXTURES_DIR, PROCESS_ID
12
15
  from tests.test_elements_worker import BASE_API_CALLS
13
16
 
@@ -3,8 +3,8 @@ import re
3
3
  from uuid import UUID
4
4
 
5
5
  import pytest
6
- from apistar.exceptions import ErrorResponse
7
6
 
7
+ from arkindex.exceptions import ErrorResponse
8
8
  from arkindex_worker.cache import CachedClassification, CachedElement
9
9
  from arkindex_worker.models import Element
10
10
  from arkindex_worker.utils import DEFAULT_BATCH_SIZE
@@ -2,8 +2,8 @@ import re
2
2
  import uuid
3
3
 
4
4
  import pytest
5
- from apistar.exceptions import ErrorResponse
6
5
 
6
+ from arkindex.exceptions import ErrorResponse
7
7
  from arkindex_worker.worker.corpus import CorpusExportState
8
8
  from tests import CORPUS_ID
9
9
  from tests.test_elements_worker import BASE_API_CALLS
@@ -135,3 +135,34 @@ def test_download_latest_export(responses, mock_elements_worker):
135
135
  ("GET", f"http://testserver/api/v1/corpus/{CORPUS_ID}/export/"),
136
136
  ("GET", f"http://testserver/api/v1/export/{export_id}/"),
137
137
  ]
138
+
139
+
140
+ def test_download_export_not_a_uuid(responses, mock_elements_worker):
141
+ with pytest.raises(ValueError, match="export_id is not a valid uuid."):
142
+ mock_elements_worker.download_export("mon export")
143
+
144
+
145
+ def test_download_export(responses, mock_elements_worker):
146
+ responses.add(
147
+ responses.GET,
148
+ "http://testserver/api/v1/export/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff/",
149
+ status=302,
150
+ body=b"some SQLite export",
151
+ content_type="application/x-sqlite3",
152
+ stream=True,
153
+ )
154
+
155
+ export = mock_elements_worker.download_export(
156
+ "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff"
157
+ )
158
+ assert export.name == "/tmp/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff"
159
+
160
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
161
+ assert [
162
+ (call.request.method, call.request.url) for call in responses.calls
163
+ ] == BASE_API_CALLS + [
164
+ (
165
+ "GET",
166
+ "http://testserver/api/v1/export/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff/",
167
+ ),
168
+ ]
@@ -2,8 +2,8 @@ import json
2
2
  import logging
3
3
 
4
4
  import pytest
5
- from apistar.exceptions import ErrorResponse
6
5
 
6
+ from arkindex.exceptions import ErrorResponse
7
7
  from arkindex_worker.models import Dataset, Element, Set
8
8
  from arkindex_worker.worker.dataset import DatasetState
9
9
  from tests import PROCESS_ID
@@ -4,9 +4,9 @@ from argparse import Namespace
4
4
  from uuid import UUID
5
5
 
6
6
  import pytest
7
- from apistar.exceptions import ErrorResponse
8
7
  from responses import matchers
9
8
 
9
+ from arkindex.exceptions import ErrorResponse
10
10
  from arkindex_worker.cache import (
11
11
  SQL_VERSION,
12
12
  CachedElement,
@@ -17,7 +17,9 @@ from arkindex_worker.cache import (
17
17
  from arkindex_worker.models import Element
18
18
  from arkindex_worker.utils import DEFAULT_BATCH_SIZE
19
19
  from arkindex_worker.worker import ElementsWorker
20
+ from arkindex_worker.worker.dataset import DatasetState
20
21
  from arkindex_worker.worker.element import MissingTypeError
22
+ from arkindex_worker.worker.process import ProcessMode
21
23
  from tests import CORPUS_ID
22
24
 
23
25
  from . import BASE_API_CALLS
@@ -208,10 +210,12 @@ def test_get_elements_element_arg_not_uuid(mocker, mock_elements_worker):
208
210
  "arkindex_worker.worker.base.argparse.ArgumentParser.parse_args",
209
211
  return_value=Namespace(
210
212
  element=["volumeid", "pageid"],
213
+ config={},
211
214
  verbose=False,
212
215
  elements_list=None,
213
216
  database=None,
214
- dev=False,
217
+ dev=True,
218
+ set=[],
215
219
  ),
216
220
  )
217
221
 
@@ -232,10 +236,12 @@ def test_get_elements_element_arg(mocker, mock_elements_worker):
232
236
  "11111111-1111-1111-1111-111111111111",
233
237
  "22222222-2222-2222-2222-222222222222",
234
238
  ],
239
+ config={},
235
240
  verbose=False,
236
241
  elements_list=None,
237
242
  database=None,
238
- dev=False,
243
+ dev=True,
244
+ set=[],
239
245
  ),
240
246
  )
241
247
 
@@ -250,6 +256,264 @@ def test_get_elements_element_arg(mocker, mock_elements_worker):
250
256
  ]
251
257
 
252
258
 
259
+ def test_get_elements_dataset_set_arg(responses, mocker, mock_elements_worker):
260
+ mocker.patch(
261
+ "arkindex_worker.worker.base.argparse.ArgumentParser.parse_args",
262
+ return_value=Namespace(
263
+ element=[],
264
+ config={},
265
+ verbose=False,
266
+ elements_list=None,
267
+ database=None,
268
+ dev=True,
269
+ set=[(UUID("11111111-1111-1111-1111-111111111111"), "train")],
270
+ ),
271
+ )
272
+
273
+ # Mock RetrieveDataset call
274
+ responses.add(
275
+ responses.GET,
276
+ "http://testserver/api/v1/datasets/11111111-1111-1111-1111-111111111111/",
277
+ status=200,
278
+ json={
279
+ "id": "11111111-1111-1111-1111-111111111111",
280
+ "name": "My dataset",
281
+ "description": "A dataset about cats.",
282
+ "sets": ["train", "dev", "test"],
283
+ "state": DatasetState.Complete.value,
284
+ },
285
+ content_type="application/json",
286
+ )
287
+
288
+ # Mock ListSetElements call
289
+ element = {
290
+ "id": "22222222-2222-2222-2222-222222222222",
291
+ "type": "page",
292
+ "name": "1",
293
+ "corpus": {
294
+ "id": "11111111-1111-1111-1111-111111111111",
295
+ },
296
+ "thumbnail_url": "http://example.com",
297
+ "zone": {
298
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
299
+ "polygon": [[0, 0], [0, 0], [0, 0]],
300
+ "image": {
301
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
302
+ "path": "string",
303
+ "width": 0,
304
+ "height": 0,
305
+ "url": "http://example.com",
306
+ "s3_url": "string",
307
+ "status": "checked",
308
+ "server": {
309
+ "display_name": "string",
310
+ "url": "http://example.com",
311
+ "max_width": 2147483647,
312
+ "max_height": 2147483647,
313
+ },
314
+ },
315
+ "url": "http://example.com",
316
+ },
317
+ "rotation_angle": 0,
318
+ "mirrored": False,
319
+ "created": "2019-08-24T14:15:22Z",
320
+ "classes": [
321
+ {
322
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
323
+ "ml_class": {
324
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
325
+ "name": "string",
326
+ },
327
+ "state": "pending",
328
+ "confidence": 0,
329
+ "high_confidence": True,
330
+ "worker_run": {
331
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
332
+ "summary": "string",
333
+ },
334
+ }
335
+ ],
336
+ "metadata": [
337
+ {
338
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
339
+ "type": "text",
340
+ "name": "string",
341
+ "value": "string",
342
+ "dates": [{"type": "exact", "year": 0, "month": 1, "day": 1}],
343
+ }
344
+ ],
345
+ "transcriptions": [
346
+ {
347
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
348
+ "text": "string",
349
+ "confidence": 0,
350
+ "orientation": "horizontal-lr",
351
+ "worker_run": {
352
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
353
+ "summary": "string",
354
+ },
355
+ }
356
+ ],
357
+ "has_children": True,
358
+ "worker_run": {
359
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
360
+ "summary": "string",
361
+ },
362
+ "confidence": 1,
363
+ }
364
+ responses.add(
365
+ responses.GET,
366
+ "http://testserver/api/v1/datasets/11111111-1111-1111-1111-111111111111/elements/?set=train&with_count=true",
367
+ status=200,
368
+ json={
369
+ "next": None,
370
+ "previous": None,
371
+ "results": [
372
+ {
373
+ "set": "train",
374
+ "element": element,
375
+ }
376
+ ],
377
+ "count": 1,
378
+ },
379
+ content_type="application/json",
380
+ )
381
+
382
+ worker = ElementsWorker()
383
+ worker.configure()
384
+
385
+ elt_list = worker.get_elements()
386
+
387
+ assert elt_list == [
388
+ Element(**element),
389
+ ]
390
+
391
+
392
+ def test_get_elements_dataset_set_api(responses, mocker, mock_elements_worker):
393
+ # Mock ListProcessSets call
394
+ responses.add(
395
+ responses.GET,
396
+ "http://testserver/api/v1/process/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff/sets/",
397
+ status=200,
398
+ json={
399
+ "next": None,
400
+ "previous": None,
401
+ "results": [
402
+ {
403
+ "id": "33333333-3333-3333-3333-333333333333",
404
+ "dataset": {"id": "11111111-1111-1111-1111-111111111111"},
405
+ "set_name": "train",
406
+ }
407
+ ],
408
+ "count": 1,
409
+ },
410
+ content_type="application/json",
411
+ )
412
+
413
+ # Mock ListSetElements call
414
+ element = {
415
+ "id": "22222222-2222-2222-2222-222222222222",
416
+ "type": "page",
417
+ "name": "1",
418
+ "corpus": {
419
+ "id": "11111111-1111-1111-1111-111111111111",
420
+ },
421
+ "thumbnail_url": "http://example.com",
422
+ "zone": {
423
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
424
+ "polygon": [[0, 0], [0, 0], [0, 0]],
425
+ "image": {
426
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
427
+ "path": "string",
428
+ "width": 0,
429
+ "height": 0,
430
+ "url": "http://example.com",
431
+ "s3_url": "string",
432
+ "status": "checked",
433
+ "server": {
434
+ "display_name": "string",
435
+ "url": "http://example.com",
436
+ "max_width": 2147483647,
437
+ "max_height": 2147483647,
438
+ },
439
+ },
440
+ "url": "http://example.com",
441
+ },
442
+ "rotation_angle": 0,
443
+ "mirrored": False,
444
+ "created": "2019-08-24T14:15:22Z",
445
+ "classes": [
446
+ {
447
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
448
+ "ml_class": {
449
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
450
+ "name": "string",
451
+ },
452
+ "state": "pending",
453
+ "confidence": 0,
454
+ "high_confidence": True,
455
+ "worker_run": {
456
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
457
+ "summary": "string",
458
+ },
459
+ }
460
+ ],
461
+ "metadata": [
462
+ {
463
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
464
+ "type": "text",
465
+ "name": "string",
466
+ "value": "string",
467
+ "dates": [{"type": "exact", "year": 0, "month": 1, "day": 1}],
468
+ }
469
+ ],
470
+ "transcriptions": [
471
+ {
472
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
473
+ "text": "string",
474
+ "confidence": 0,
475
+ "orientation": "horizontal-lr",
476
+ "worker_run": {
477
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
478
+ "summary": "string",
479
+ },
480
+ }
481
+ ],
482
+ "has_children": True,
483
+ "worker_run": {
484
+ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
485
+ "summary": "string",
486
+ },
487
+ "confidence": 1,
488
+ }
489
+ responses.add(
490
+ responses.GET,
491
+ "http://testserver/api/v1/datasets/11111111-1111-1111-1111-111111111111/elements/?set=train&with_count=true",
492
+ status=200,
493
+ json={
494
+ "next": None,
495
+ "previous": None,
496
+ "results": [
497
+ {
498
+ "set": "train",
499
+ "element": element,
500
+ }
501
+ ],
502
+ "count": 1,
503
+ },
504
+ content_type="application/json",
505
+ )
506
+
507
+ # Update ProcessMode to Dataset
508
+ mock_elements_worker.process_information["mode"] = ProcessMode.Dataset
509
+
510
+ elt_list = mock_elements_worker.get_elements()
511
+
512
+ assert elt_list == [
513
+ Element(**element),
514
+ ]
515
+
516
+
253
517
  def test_get_elements_both_args_error(mocker, mock_elements_worker, tmp_path):
254
518
  elements_path = tmp_path / "elements.json"
255
519
  elements_path.write_text(
@@ -270,6 +534,7 @@ def test_get_elements_both_args_error(mocker, mock_elements_worker, tmp_path):
270
534
  elements_list=elements_path.open(),
271
535
  database=None,
272
536
  dev=False,
537
+ set=[],
273
538
  ),
274
539
  )
275
540
 
@@ -295,6 +560,7 @@ def test_database_arg(mocker, mock_elements_worker, tmp_path):
295
560
  elements_list=None,
296
561
  database=database_path,
297
562
  dev=False,
563
+ set=[],
298
564
  ),
299
565
  )
300
566
 
@@ -319,6 +585,7 @@ def test_database_arg_cache_missing_version_table(
319
585
  elements_list=None,
320
586
  database=database_path,
321
587
  dev=False,
588
+ set=[],
322
589
  ),
323
590
  )
324
591
 
@@ -3,9 +3,9 @@ import re
3
3
  from uuid import UUID
4
4
 
5
5
  import pytest
6
- from apistar.exceptions import ErrorResponse
7
6
  from responses import matchers
8
7
 
8
+ from arkindex.exceptions import ErrorResponse
9
9
  from arkindex_worker.cache import (
10
10
  CachedElement,
11
11
  CachedEntity,
@@ -1,7 +1,8 @@
1
1
  import json
2
2
 
3
3
  import pytest
4
- from apistar.exceptions import ErrorResponse
4
+
5
+ from arkindex.exceptions import ErrorResponse
5
6
 
6
7
  from . import BASE_API_CALLS
7
8
 
@@ -2,8 +2,8 @@ import json
2
2
  import re
3
3
 
4
4
  import pytest
5
- from apistar.exceptions import ErrorResponse
6
5
 
6
+ from arkindex.exceptions import ErrorResponse
7
7
  from arkindex.mock import MockApiClient
8
8
  from arkindex_worker.cache import CachedElement
9
9
  from arkindex_worker.models import Element
@@ -1,8 +1,8 @@
1
1
  import uuid
2
2
 
3
3
  import pytest
4
- from apistar.exceptions import ErrorResponse
5
4
 
5
+ from arkindex.exceptions import ErrorResponse
6
6
  from arkindex_worker.models import Artifact
7
7
  from tests import FIXTURES_DIR
8
8
  from tests.test_elements_worker import BASE_API_CALLS
@@ -3,9 +3,9 @@ import re
3
3
  from uuid import UUID
4
4
 
5
5
  import pytest
6
- from apistar.exceptions import ErrorResponse
7
6
  from playhouse.shortcuts import model_to_dict
8
7
 
8
+ from arkindex.exceptions import ErrorResponse
9
9
  from arkindex_worker.cache import CachedElement, CachedTranscription
10
10
  from arkindex_worker.models import Element
11
11
  from arkindex_worker.utils import DEFAULT_BATCH_SIZE
@@ -2,8 +2,8 @@ import json
2
2
  import sys
3
3
 
4
4
  import pytest
5
- from apistar.exceptions import ErrorResponse
6
5
 
6
+ from arkindex.exceptions import ErrorResponse
7
7
  from arkindex_worker.cache import CachedElement
8
8
  from arkindex_worker.worker import ActivityState, ElementsWorker
9
9
 
@@ -1,51 +0,0 @@
1
- arkindex_worker/__init__.py,sha256=OlgCtTC9MaWeejviY0a3iQpALcRQGMVArFVVYwTF6I8,162
2
- arkindex_worker/cache.py,sha256=FTlB0coXofn5zTNRTcVIvh709mcw4a1bPGqkwWjKs3w,11248
3
- arkindex_worker/image.py,sha256=oEgVCrSHiGh3D5-UXfM6PvT17TttSxC0115irpvB3Dw,18581
4
- arkindex_worker/models.py,sha256=bPQzGZNs5a6z6DEcygsa8T33VOqPlMUbwKzHqlKzwbw,9923
5
- arkindex_worker/utils.py,sha256=q1EeLdC6ebYIH-C0LOAqw2cNpjCjVoP-Vbr-39mF4w0,9884
6
- arkindex_worker/worker/__init__.py,sha256=Xzn20bD4THFcnDfPjZeE-uU41m_whs_3yA0WjZb9uqk,18195
7
- arkindex_worker/worker/base.py,sha256=wyEJB5_zcy4cTvqSXMhX8DLaWQVgvIKO77-uovcprq4,19539
8
- arkindex_worker/worker/classification.py,sha256=ECm1cnQPOj_9m-CoO0e182ElSySAUOoyddHrORbShhc,10951
9
- arkindex_worker/worker/corpus.py,sha256=s9bCxOszJMwRq1WWAmKjWq888mjDfbaJ18Wo7h-rNOw,1827
10
- arkindex_worker/worker/dataset.py,sha256=UXElhhARca9m7Himp-yxD5dAqWbdxDKWOUJUGgeCZXI,2934
11
- arkindex_worker/worker/element.py,sha256=1qTnz9Y4nbTSxn274-sRmM2stzT5wJrsbshxXHlBoPw,44789
12
- arkindex_worker/worker/entity.py,sha256=qGjQvOVXfP84rER0Dkui6q-rb9nTWerHVG0Z5voB8pU,15229
13
- arkindex_worker/worker/image.py,sha256=t_Az6IGnj0EZyvcA4XxfPikOUjn_pztgsyxTkFZhaXU,621
14
- arkindex_worker/worker/metadata.py,sha256=VRajtd2kaBvar9GercX4knvR6l1WFYjoCdJWU9ccKgk,7291
15
- arkindex_worker/worker/task.py,sha256=1O9zrWXxe3na3TOcoHX5Pxn1875v7EU08BSsCPnb62g,1519
16
- arkindex_worker/worker/training.py,sha256=qnBFEk11JOWWPLTbjF-lZ9iFBdTPpQzZAzQ9a03J1j4,10874
17
- arkindex_worker/worker/transcription.py,sha256=52RY9kYsiR1sz9FxOigyo12Ker3VDbQ4U42gK9DpR3g,21146
18
- arkindex_worker/worker/version.py,sha256=JIT7OI3Mo7RPkNrjOB9hfqrsG-FYygz_zi4l8PbkuAo,1960
19
- hooks/pre_gen_project.py,sha256=xQJERv3vv9VzIqcBHI281eeWLWREXUF4mMw7PvJHHXM,269
20
- tests/__init__.py,sha256=6aeTMHf4q_dKY4jIZWg1KT70VKaLvVlzCxh-Uu_cWiQ,241
21
- tests/conftest.py,sha256=KNBZ0xMC9xX2pKQXp_4XwVU07JGeTSFeM4rN2RpipfY,21522
22
- tests/test_base_worker.py,sha256=2EIYcd_3f9O0zB5WiGIQV0Cn9wndLvnEnSfcAE1qWWU,30607
23
- tests/test_cache.py,sha256=ii0gyr0DrG7ChEs7pmT8hMdSguAOAcCze4bRMiFQxuk,10640
24
- tests/test_dataset_worker.py,sha256=d9HG36qnO5HXu9vQ0UTBvdTSRR21FVq1FNoXM-vZbPk,22105
25
- tests/test_element.py,sha256=2G9M15TLxQRmvrWM9Kw2ucnElh4kSv_oF_5FYwwAxTY,13181
26
- tests/test_image.py,sha256=J3jqB5OhcdCpB6n0UnwivxrMlne8YjFLXhq1gBMANrs,26711
27
- tests/test_merge.py,sha256=TuOeUS0UCz66DPOQFFhc4NQBxIjZL9f5czi4XnvGrr4,8270
28
- tests/test_utils.py,sha256=_WJUPnt-pM_TQ0er4yjPZy-u_LePrHq1lxwk_teky7M,2544
29
- tests/test_elements_worker/__init__.py,sha256=Fh4nkbbyJSMv_VtjQxnWrOqTnxXaaWI8S9WU0VrzCHs,179
30
- tests/test_elements_worker/test_classifications.py,sha256=fXZ8cSzIWwZ6LHsY7tKsy9-Pp9fKyKUStIXS4ViBcek,27779
31
- tests/test_elements_worker/test_cli.py,sha256=a23i1pUDbXi23MUtbWwGEcLLrmc_YlrbDgOG3h66wLM,2620
32
- tests/test_elements_worker/test_corpus.py,sha256=c_LUHvkJIYgk_wXF06VQPNOoWfiZ06XpjOXrJ7MRiBc,4479
33
- tests/test_elements_worker/test_dataset.py,sha256=lSXqubhg1EEq2Y2goE8Y2RYaqIpM9Iejq6fGNW2BczU,11411
34
- tests/test_elements_worker/test_elements.py,sha256=PBVRIQB8yTCCa22A0VJKIsJSa4gvagDVZVtZT8mlZF0,107199
35
- tests/test_elements_worker/test_entities.py,sha256=oav2dtvWWavQe1l3Drbxw1Ta2ocUJEVxJfDQ_r6-rYQ,36181
36
- tests/test_elements_worker/test_image.py,sha256=_E3UGdDOwTo1MW5KMS81PrdeSPBPWinWYoQPNy2F9Ro,2077
37
- tests/test_elements_worker/test_metadata.py,sha256=cm2NNaXxBYmYMkPexSPVTAqb2skDTB4mliwQCLz8Y98,22293
38
- tests/test_elements_worker/test_task.py,sha256=7Sr3fbjdgWUXJUhJEiC9CwnbhQIQX3rCInmHMIrmA38,5573
39
- tests/test_elements_worker/test_training.py,sha256=Qxi9EzGr_uKcn2Fh5aE6jNrq1K8QKLiOiSew4upASPs,8721
40
- tests/test_elements_worker/test_transcriptions.py,sha256=FNY6E26iTKqe7LP9LO72By4oV4g9hBIZYTU9BAc_w7I,77060
41
- tests/test_elements_worker/test_worker.py,sha256=AuFDyqncIusT-rMMY4sEay9MqGvoNuSuZQq-5rHN02U,10803
42
- worker-demo/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
- worker-demo/tests/conftest.py,sha256=XzNMNeg6pmABUAH8jN6eZTlZSFGLYjS3-DTXjiRN6Yc,1002
44
- worker-demo/tests/test_worker.py,sha256=3DLd4NRK4bfyatG5P_PK4k9P9tJHx9XQq5_ryFEEFVg,304
45
- worker-demo/worker_demo/__init__.py,sha256=2BPomV8ZMNf3YXJgloatKeHQCE6QOkwmsHGkO6MkQuM,125
46
- worker-demo/worker_demo/worker.py,sha256=Rt-DjWa5iBP08k58NDZMfeyPuFbtNcbX6nc5jFX7GNo,440
47
- arkindex_base_worker-0.4.0rc1.dist-info/LICENSE,sha256=NVshRi1efwVezMfW7xXYLrdDr2Li1AfwfGOd5WuH1kQ,1063
48
- arkindex_base_worker-0.4.0rc1.dist-info/METADATA,sha256=22DYiI2CtAzJ9d0P21Y2ZlAoBFX_Ks-yRQMoYlMO5KM,3303
49
- arkindex_base_worker-0.4.0rc1.dist-info/WHEEL,sha256=Mdi9PDNwEZptOjTlUcAth7XJDFtKrHYaQMPulZeBCiQ,91
50
- arkindex_base_worker-0.4.0rc1.dist-info/top_level.txt,sha256=58NuslgxQC2vT4DiqZEgO4JqJRrYa2yeNI9QvkbfGQU,40
51
- arkindex_base_worker-0.4.0rc1.dist-info/RECORD,,