arkindex-base-worker 0.4.0a2__tar.gz → 0.4.0b1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/PKG-INFO +7 -7
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_base_worker.egg-info/PKG-INFO +7 -7
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_base_worker.egg-info/requires.txt +6 -6
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/__init__.py +21 -12
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/base.py +3 -9
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/classification.py +3 -3
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/corpus.py +3 -1
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/dataset.py +1 -1
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/element.py +24 -9
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/entity.py +6 -7
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/metadata.py +2 -2
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/task.py +4 -2
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/training.py +5 -5
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/transcription.py +3 -3
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/version.py +3 -1
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/pyproject.toml +7 -7
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_base_worker.py +1 -1
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/test_elements.py +29 -22
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_merge.py +1 -1
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/LICENSE +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/README.md +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_base_worker.egg-info/SOURCES.txt +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_base_worker.egg-info/dependency_links.txt +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_base_worker.egg-info/top_level.txt +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/__init__.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/cache.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/image.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/models.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/utils.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/image.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/hooks/pre_gen_project.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/setup.cfg +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/__init__.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/conftest.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_cache.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_dataset_worker.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_element.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/__init__.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/test_classifications.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/test_cli.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/test_corpus.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/test_dataset.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/test_entities.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/test_image.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/test_metadata.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/test_task.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/test_training.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/test_transcriptions.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/test_worker.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_image.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_utils.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/worker-demo/tests/__init__.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/worker-demo/tests/conftest.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/worker-demo/tests/test_worker.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/worker-demo/worker_demo/__init__.py +0 -0
- {arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/worker-demo/worker_demo/worker.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: arkindex-base-worker
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.0b1
|
|
4
4
|
Summary: Base Worker to easily build Arkindex ML workflows
|
|
5
5
|
Author-email: Teklia <contact@teklia.com>
|
|
6
6
|
Maintainer-email: Teklia <contact@teklia.com>
|
|
@@ -41,17 +41,17 @@ Requires-Python: >=3.10
|
|
|
41
41
|
Description-Content-Type: text/markdown
|
|
42
42
|
License-File: LICENSE
|
|
43
43
|
Requires-Dist: peewee~=3.17
|
|
44
|
-
Requires-Dist: Pillow==10.
|
|
44
|
+
Requires-Dist: Pillow==10.4.0
|
|
45
45
|
Requires-Dist: python-gnupg==0.5.2
|
|
46
|
-
Requires-Dist: shapely==2.0.
|
|
46
|
+
Requires-Dist: shapely==2.0.5
|
|
47
47
|
Requires-Dist: teklia-toolbox==0.1.5
|
|
48
48
|
Requires-Dist: zstandard==0.22.0
|
|
49
49
|
Provides-Extra: docs
|
|
50
|
-
Requires-Dist: black==24.4.
|
|
51
|
-
Requires-Dist: mkdocs-material==9.5.
|
|
52
|
-
Requires-Dist: mkdocstrings-python==1.
|
|
50
|
+
Requires-Dist: black==24.4.2; extra == "docs"
|
|
51
|
+
Requires-Dist: mkdocs-material==9.5.31; extra == "docs"
|
|
52
|
+
Requires-Dist: mkdocstrings-python==1.10.7; extra == "docs"
|
|
53
53
|
Provides-Extra: tests
|
|
54
|
-
Requires-Dist: pytest==8.
|
|
54
|
+
Requires-Dist: pytest==8.3.2; extra == "tests"
|
|
55
55
|
Requires-Dist: pytest-mock==3.14.0; extra == "tests"
|
|
56
56
|
Requires-Dist: pytest-responses==0.5.1; extra == "tests"
|
|
57
57
|
|
{arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_base_worker.egg-info/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: arkindex-base-worker
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.0b1
|
|
4
4
|
Summary: Base Worker to easily build Arkindex ML workflows
|
|
5
5
|
Author-email: Teklia <contact@teklia.com>
|
|
6
6
|
Maintainer-email: Teklia <contact@teklia.com>
|
|
@@ -41,17 +41,17 @@ Requires-Python: >=3.10
|
|
|
41
41
|
Description-Content-Type: text/markdown
|
|
42
42
|
License-File: LICENSE
|
|
43
43
|
Requires-Dist: peewee~=3.17
|
|
44
|
-
Requires-Dist: Pillow==10.
|
|
44
|
+
Requires-Dist: Pillow==10.4.0
|
|
45
45
|
Requires-Dist: python-gnupg==0.5.2
|
|
46
|
-
Requires-Dist: shapely==2.0.
|
|
46
|
+
Requires-Dist: shapely==2.0.5
|
|
47
47
|
Requires-Dist: teklia-toolbox==0.1.5
|
|
48
48
|
Requires-Dist: zstandard==0.22.0
|
|
49
49
|
Provides-Extra: docs
|
|
50
|
-
Requires-Dist: black==24.4.
|
|
51
|
-
Requires-Dist: mkdocs-material==9.5.
|
|
52
|
-
Requires-Dist: mkdocstrings-python==1.
|
|
50
|
+
Requires-Dist: black==24.4.2; extra == "docs"
|
|
51
|
+
Requires-Dist: mkdocs-material==9.5.31; extra == "docs"
|
|
52
|
+
Requires-Dist: mkdocstrings-python==1.10.7; extra == "docs"
|
|
53
53
|
Provides-Extra: tests
|
|
54
|
-
Requires-Dist: pytest==8.
|
|
54
|
+
Requires-Dist: pytest==8.3.2; extra == "tests"
|
|
55
55
|
Requires-Dist: pytest-mock==3.14.0; extra == "tests"
|
|
56
56
|
Requires-Dist: pytest-responses==0.5.1; extra == "tests"
|
|
57
57
|
|
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
peewee~=3.17
|
|
2
|
-
Pillow==10.
|
|
2
|
+
Pillow==10.4.0
|
|
3
3
|
python-gnupg==0.5.2
|
|
4
|
-
shapely==2.0.
|
|
4
|
+
shapely==2.0.5
|
|
5
5
|
teklia-toolbox==0.1.5
|
|
6
6
|
zstandard==0.22.0
|
|
7
7
|
|
|
8
8
|
[docs]
|
|
9
|
-
black==24.4.
|
|
10
|
-
mkdocs-material==9.5.
|
|
11
|
-
mkdocstrings-python==1.
|
|
9
|
+
black==24.4.2
|
|
10
|
+
mkdocs-material==9.5.31
|
|
11
|
+
mkdocstrings-python==1.10.7
|
|
12
12
|
|
|
13
13
|
[tests]
|
|
14
|
-
pytest==8.
|
|
14
|
+
pytest==8.3.2
|
|
15
15
|
pytest-mock==3.14.0
|
|
16
16
|
pytest-responses==0.5.1
|
{arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/__init__.py
RENAMED
|
@@ -83,7 +83,20 @@ class ElementsWorker(
|
|
|
83
83
|
"""
|
|
84
84
|
super().__init__(description, support_cache)
|
|
85
85
|
|
|
86
|
-
|
|
86
|
+
self.classes = {}
|
|
87
|
+
|
|
88
|
+
self.entity_types = {}
|
|
89
|
+
"""Known and available entity types in processed corpus
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
self.corpus_types = {}
|
|
93
|
+
"""Known and available element types in processed corpus
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
self._worker_version_cache = {}
|
|
97
|
+
|
|
98
|
+
def add_arguments(self):
|
|
99
|
+
"""Define specific ``argparse`` arguments for this worker"""
|
|
87
100
|
self.parser.add_argument(
|
|
88
101
|
"--elements-list",
|
|
89
102
|
help="JSON elements list to use",
|
|
@@ -97,14 +110,6 @@ class ElementsWorker(
|
|
|
97
110
|
help="One or more Arkindex element ID",
|
|
98
111
|
)
|
|
99
112
|
|
|
100
|
-
self.classes = {}
|
|
101
|
-
|
|
102
|
-
self.entity_types = {}
|
|
103
|
-
"""Known and available entity types in processed corpus
|
|
104
|
-
"""
|
|
105
|
-
|
|
106
|
-
self._worker_version_cache = {}
|
|
107
|
-
|
|
108
113
|
def list_elements(self) -> Iterable[CachedElement] | list[str]:
|
|
109
114
|
"""
|
|
110
115
|
List the elements to be processed, either from the CLI arguments or
|
|
@@ -222,7 +227,9 @@ class ElementsWorker(
|
|
|
222
227
|
element = item
|
|
223
228
|
else:
|
|
224
229
|
# Load element using the Arkindex API
|
|
225
|
-
element = Element(
|
|
230
|
+
element = Element(
|
|
231
|
+
**self.api_client.request("RetrieveElement", id=item)
|
|
232
|
+
)
|
|
226
233
|
|
|
227
234
|
logger.info(f"Processing {element} ({i}/{count})")
|
|
228
235
|
|
|
@@ -301,7 +308,7 @@ class ElementsWorker(
|
|
|
301
308
|
assert isinstance(state, ActivityState), "state should be an ActivityState"
|
|
302
309
|
|
|
303
310
|
try:
|
|
304
|
-
self.request(
|
|
311
|
+
self.api_client.request(
|
|
305
312
|
"UpdateWorkerActivity",
|
|
306
313
|
id=self.worker_run_id,
|
|
307
314
|
body={
|
|
@@ -376,6 +383,8 @@ class DatasetWorker(BaseWorker, DatasetMixin, TaskMixin):
|
|
|
376
383
|
# Set as an instance variable as dataset workers might use it to easily extract its content
|
|
377
384
|
self.downloaded_dataset_artifact: Path | None = None
|
|
378
385
|
|
|
386
|
+
def add_arguments(self):
|
|
387
|
+
"""Define specific ``argparse`` arguments for this worker"""
|
|
379
388
|
self.parser.add_argument(
|
|
380
389
|
"--set",
|
|
381
390
|
type=check_dataset_set,
|
|
@@ -472,7 +481,7 @@ class DatasetWorker(BaseWorker, DatasetMixin, TaskMixin):
|
|
|
472
481
|
# Retrieving dataset information is not already cached
|
|
473
482
|
if dataset_id not in datasets:
|
|
474
483
|
datasets[dataset_id] = Dataset(
|
|
475
|
-
**self.request("RetrieveDataset", id=dataset_id)
|
|
484
|
+
**self.api_client.request("RetrieveDataset", id=dataset_id)
|
|
476
485
|
)
|
|
477
486
|
|
|
478
487
|
yield Set(name=set_name, dataset=datasets[dataset_id])
|
{arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/base.py
RENAMED
|
@@ -231,7 +231,7 @@ class BaseWorker:
|
|
|
231
231
|
logger.debug("Debug output enabled")
|
|
232
232
|
|
|
233
233
|
# Load worker run information
|
|
234
|
-
worker_run = self.request("RetrieveWorkerRun", id=self.worker_run_id)
|
|
234
|
+
worker_run = self.api_client.request("RetrieveWorkerRun", id=self.worker_run_id)
|
|
235
235
|
|
|
236
236
|
# Load process information
|
|
237
237
|
self.process_information = worker_run["process"]
|
|
@@ -290,7 +290,7 @@ class BaseWorker:
|
|
|
290
290
|
if self.support_cache and self.args.database is not None:
|
|
291
291
|
self.use_cache = True
|
|
292
292
|
elif self.support_cache and self.task_id:
|
|
293
|
-
task = self.request("
|
|
293
|
+
task = self.api_client.request("RetrieveTask", id=self.task_id)
|
|
294
294
|
self.task_parents = task["parents"]
|
|
295
295
|
paths = self.find_parents_file_paths(Path("db.sqlite"))
|
|
296
296
|
self.use_cache = len(paths) > 0
|
|
@@ -331,7 +331,7 @@ class BaseWorker:
|
|
|
331
331
|
|
|
332
332
|
# Load from the backend
|
|
333
333
|
try:
|
|
334
|
-
resp = self.request("RetrieveSecret", name=str(name))
|
|
334
|
+
resp = self.api_client.request("RetrieveSecret", name=str(name))
|
|
335
335
|
secret = resp["content"]
|
|
336
336
|
logging.info(f"Loaded API secret {name}")
|
|
337
337
|
except ErrorResponse as e:
|
|
@@ -471,12 +471,6 @@ class BaseWorker:
|
|
|
471
471
|
# Clean up
|
|
472
472
|
shutil.rmtree(base_extracted_path)
|
|
473
473
|
|
|
474
|
-
def request(self, *args, **kwargs):
|
|
475
|
-
"""
|
|
476
|
-
Wrapper around the ``ArkindexClient.request`` method.
|
|
477
|
-
"""
|
|
478
|
-
return self.api_client.request(*args, **kwargs)
|
|
479
|
-
|
|
480
474
|
def add_arguments(self):
|
|
481
475
|
"""Override this method to add ``argparse`` arguments to this worker"""
|
|
482
476
|
|
|
@@ -39,7 +39,7 @@ class ClassificationMixin:
|
|
|
39
39
|
if ml_class_id is None:
|
|
40
40
|
logger.info(f"Creating ML class {ml_class} on corpus {self.corpus_id}")
|
|
41
41
|
try:
|
|
42
|
-
response = self.request(
|
|
42
|
+
response = self.api_client.request(
|
|
43
43
|
"CreateMLClass", id=self.corpus_id, body={"name": ml_class}
|
|
44
44
|
)
|
|
45
45
|
ml_class_id = self.classes[ml_class] = response["id"]
|
|
@@ -119,7 +119,7 @@ class ClassificationMixin:
|
|
|
119
119
|
)
|
|
120
120
|
return
|
|
121
121
|
try:
|
|
122
|
-
created = self.request(
|
|
122
|
+
created = self.api_client.request(
|
|
123
123
|
"CreateClassification",
|
|
124
124
|
body={
|
|
125
125
|
"element": str(element.id),
|
|
@@ -220,7 +220,7 @@ class ClassificationMixin:
|
|
|
220
220
|
)
|
|
221
221
|
return
|
|
222
222
|
|
|
223
|
-
created_cls = self.request(
|
|
223
|
+
created_cls = self.api_client.request(
|
|
224
224
|
"CreateClassifications",
|
|
225
225
|
body={
|
|
226
226
|
"parent": str(element.id),
|
{arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/corpus.py
RENAMED
|
@@ -63,7 +63,9 @@ class CorpusMixin:
|
|
|
63
63
|
# Download latest export
|
|
64
64
|
export_id: str = exports[0]["id"]
|
|
65
65
|
logger.info(f"Downloading export ({export_id})...")
|
|
66
|
-
export: _TemporaryFileWrapper = self.request(
|
|
66
|
+
export: _TemporaryFileWrapper = self.api_client.request(
|
|
67
|
+
"DownloadExport", id=export_id
|
|
68
|
+
)
|
|
67
69
|
logger.info(f"Downloaded export ({export_id}) @ `{export.name}`")
|
|
68
70
|
|
|
69
71
|
return export
|
{arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/dataset.py
RENAMED
|
@@ -93,7 +93,7 @@ class DatasetMixin:
|
|
|
93
93
|
logger.warning("Cannot update dataset as this worker is in read-only mode")
|
|
94
94
|
return
|
|
95
95
|
|
|
96
|
-
updated_dataset = self.request(
|
|
96
|
+
updated_dataset = self.api_client.request(
|
|
97
97
|
"PartialUpdateDataset",
|
|
98
98
|
id=dataset.id,
|
|
99
99
|
body={"state": state.value},
|
{arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/element.py
RENAMED
|
@@ -31,6 +31,21 @@ class MissingTypeError(Exception):
|
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
class ElementMixin:
|
|
34
|
+
def list_corpus_types(self):
|
|
35
|
+
"""
|
|
36
|
+
Loads available element types in corpus.
|
|
37
|
+
"""
|
|
38
|
+
self.corpus_types = {
|
|
39
|
+
element_type["slug"]: element_type
|
|
40
|
+
for element_type in self.api_client.request(
|
|
41
|
+
"RetrieveCorpus", id=self.corpus_id
|
|
42
|
+
)["types"]
|
|
43
|
+
}
|
|
44
|
+
count = len(self.corpus_types)
|
|
45
|
+
logger.info(
|
|
46
|
+
f'Loaded {count} element type{"s"[:count>1]} in corpus ({self.corpus_id}).'
|
|
47
|
+
)
|
|
48
|
+
|
|
34
49
|
@unsupported_cache
|
|
35
50
|
def create_required_types(self, element_types: list[ElementType]):
|
|
36
51
|
"""Creates given element types in the corpus.
|
|
@@ -38,7 +53,7 @@ class ElementMixin:
|
|
|
38
53
|
:param element_types: The missing element types to create.
|
|
39
54
|
"""
|
|
40
55
|
for element_type in element_types:
|
|
41
|
-
self.request(
|
|
56
|
+
self.api_client.request(
|
|
42
57
|
"CreateElementType",
|
|
43
58
|
body={
|
|
44
59
|
"slug": element_type.slug,
|
|
@@ -66,10 +81,10 @@ class ElementMixin:
|
|
|
66
81
|
isinstance(slug, str) for slug in type_slugs
|
|
67
82
|
), "Element type slugs must be strings."
|
|
68
83
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
missing_slugs = set(type_slugs) - available_slugs
|
|
84
|
+
if not self.corpus_types:
|
|
85
|
+
self.list_corpus_types()
|
|
72
86
|
|
|
87
|
+
missing_slugs = set(type_slugs) - set(self.corpus_types)
|
|
73
88
|
if missing_slugs:
|
|
74
89
|
if create_missing:
|
|
75
90
|
self.create_required_types(
|
|
@@ -79,7 +94,7 @@ class ElementMixin:
|
|
|
79
94
|
)
|
|
80
95
|
else:
|
|
81
96
|
raise MissingTypeError(
|
|
82
|
-
f'Element type(s) {", ".join(sorted(missing_slugs))} were not found in
|
|
97
|
+
f'Element type(s) {", ".join(sorted(missing_slugs))} were not found in corpus ({self.corpus_id}).'
|
|
83
98
|
)
|
|
84
99
|
|
|
85
100
|
return True
|
|
@@ -145,7 +160,7 @@ class ElementMixin:
|
|
|
145
160
|
logger.warning("Cannot create element as this worker is in read-only mode")
|
|
146
161
|
return
|
|
147
162
|
|
|
148
|
-
sub_element = self.request(
|
|
163
|
+
sub_element = self.api_client.request(
|
|
149
164
|
"CreateElement",
|
|
150
165
|
body={
|
|
151
166
|
"type": type,
|
|
@@ -243,7 +258,7 @@ class ElementMixin:
|
|
|
243
258
|
logger.warning("Cannot create elements as this worker is in read-only mode")
|
|
244
259
|
return
|
|
245
260
|
|
|
246
|
-
created_ids = self.request(
|
|
261
|
+
created_ids = self.api_client.request(
|
|
247
262
|
"CreateElements",
|
|
248
263
|
id=parent.id,
|
|
249
264
|
body={
|
|
@@ -311,7 +326,7 @@ class ElementMixin:
|
|
|
311
326
|
logger.warning("Cannot link elements as this worker is in read-only mode")
|
|
312
327
|
return
|
|
313
328
|
|
|
314
|
-
return self.request(
|
|
329
|
+
return self.api_client.request(
|
|
315
330
|
"CreateElementParent",
|
|
316
331
|
parent=parent.id,
|
|
317
332
|
child=child.id,
|
|
@@ -383,7 +398,7 @@ class ElementMixin:
|
|
|
383
398
|
logger.warning("Cannot update element as this worker is in read-only mode")
|
|
384
399
|
return
|
|
385
400
|
|
|
386
|
-
updated_element = self.request(
|
|
401
|
+
updated_element = self.api_client.request(
|
|
387
402
|
"PartialUpdateElement",
|
|
388
403
|
id=element.id,
|
|
389
404
|
body=kwargs,
|
{arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/entity.py
RENAMED
|
@@ -48,6 +48,7 @@ class EntityMixin:
|
|
|
48
48
|
if not self.entity_types:
|
|
49
49
|
# Load entity_types of corpus
|
|
50
50
|
self.list_corpus_entity_types()
|
|
51
|
+
|
|
51
52
|
for entity_type in entity_types:
|
|
52
53
|
# Do nothing if type already exists
|
|
53
54
|
if entity_type in self.entity_types:
|
|
@@ -60,7 +61,7 @@ class EntityMixin:
|
|
|
60
61
|
)
|
|
61
62
|
|
|
62
63
|
# Create type if non-existent
|
|
63
|
-
self.entity_types[entity_type] = self.request(
|
|
64
|
+
self.entity_types[entity_type] = self.api_client.request(
|
|
64
65
|
"CreateEntityType",
|
|
65
66
|
body={
|
|
66
67
|
"name": entity_type,
|
|
@@ -106,7 +107,7 @@ class EntityMixin:
|
|
|
106
107
|
entity_type_id = self.entity_types.get(type)
|
|
107
108
|
assert entity_type_id, f"Entity type `{type}` not found in the corpus."
|
|
108
109
|
|
|
109
|
-
entity = self.request(
|
|
110
|
+
entity = self.api_client.request(
|
|
110
111
|
"CreateEntity",
|
|
111
112
|
body={
|
|
112
113
|
"name": name,
|
|
@@ -188,7 +189,7 @@ class EntityMixin:
|
|
|
188
189
|
if confidence is not None:
|
|
189
190
|
body["confidence"] = confidence
|
|
190
191
|
|
|
191
|
-
transcription_ent = self.request(
|
|
192
|
+
transcription_ent = self.api_client.request(
|
|
192
193
|
"CreateTranscriptionEntity",
|
|
193
194
|
id=transcription.id,
|
|
194
195
|
body=body,
|
|
@@ -289,7 +290,7 @@ class EntityMixin:
|
|
|
289
290
|
)
|
|
290
291
|
return
|
|
291
292
|
|
|
292
|
-
created_ids = self.request(
|
|
293
|
+
created_ids = self.api_client.request(
|
|
293
294
|
"CreateTranscriptionEntities",
|
|
294
295
|
id=transcription.id,
|
|
295
296
|
body={
|
|
@@ -385,9 +386,7 @@ class EntityMixin:
|
|
|
385
386
|
f'Loaded {count} entit{"ies" if count > 1 else "y"} in corpus ({self.corpus_id})'
|
|
386
387
|
)
|
|
387
388
|
|
|
388
|
-
def list_corpus_entity_types(
|
|
389
|
-
self,
|
|
390
|
-
):
|
|
389
|
+
def list_corpus_entity_types(self):
|
|
391
390
|
"""
|
|
392
391
|
Loads available entity types in corpus.
|
|
393
392
|
"""
|
{arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/metadata.py
RENAMED
|
@@ -93,7 +93,7 @@ class MetaDataMixin:
|
|
|
93
93
|
logger.warning("Cannot create metadata as this worker is in read-only mode")
|
|
94
94
|
return
|
|
95
95
|
|
|
96
|
-
metadata = self.request(
|
|
96
|
+
metadata = self.api_client.request(
|
|
97
97
|
"CreateMetaData",
|
|
98
98
|
id=element.id,
|
|
99
99
|
body={
|
|
@@ -168,7 +168,7 @@ class MetaDataMixin:
|
|
|
168
168
|
logger.warning("Cannot create metadata as this worker is in read-only mode")
|
|
169
169
|
return
|
|
170
170
|
|
|
171
|
-
created_metadata_list = self.request(
|
|
171
|
+
created_metadata_list = self.api_client.request(
|
|
172
172
|
"CreateMetaDataBulk",
|
|
173
173
|
id=element.id,
|
|
174
174
|
body={
|
{arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/task.py
RENAMED
|
@@ -22,7 +22,7 @@ class TaskMixin:
|
|
|
22
22
|
task_id, uuid.UUID
|
|
23
23
|
), "task_id shouldn't be null and should be an UUID"
|
|
24
24
|
|
|
25
|
-
results = self.request("ListArtifacts", id=task_id)
|
|
25
|
+
results = self.api_client.request("ListArtifacts", id=task_id)
|
|
26
26
|
|
|
27
27
|
return map(Artifact, results)
|
|
28
28
|
|
|
@@ -43,4 +43,6 @@ class TaskMixin:
|
|
|
43
43
|
artifact, Artifact
|
|
44
44
|
), "artifact shouldn't be null and should be an Artifact"
|
|
45
45
|
|
|
46
|
-
return self.request(
|
|
46
|
+
return self.api_client.request(
|
|
47
|
+
"DownloadArtifact", id=task_id, path=artifact.path
|
|
48
|
+
)
|
{arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/training.py
RENAMED
|
@@ -185,7 +185,7 @@ class TrainingMixin:
|
|
|
185
185
|
assert not self.model_version, "A model version has already been created."
|
|
186
186
|
|
|
187
187
|
configuration = configuration or {}
|
|
188
|
-
self.model_version = self.request(
|
|
188
|
+
self.model_version = self.api_client.request(
|
|
189
189
|
"CreateModelVersion",
|
|
190
190
|
id=model_id,
|
|
191
191
|
body=build_clean_payload(
|
|
@@ -217,7 +217,7 @@ class TrainingMixin:
|
|
|
217
217
|
:param parent: ID of the parent model version
|
|
218
218
|
"""
|
|
219
219
|
assert self.model_version, "No model version has been created yet."
|
|
220
|
-
self.model_version = self.request(
|
|
220
|
+
self.model_version = self.api_client.request(
|
|
221
221
|
"UpdateModelVersion",
|
|
222
222
|
id=self.model_version["id"],
|
|
223
223
|
body=build_clean_payload(
|
|
@@ -273,7 +273,7 @@ class TrainingMixin:
|
|
|
273
273
|
"""
|
|
274
274
|
assert self.model_version, "You must create the model version and upload its archive before validating it."
|
|
275
275
|
try:
|
|
276
|
-
self.model_version = self.request(
|
|
276
|
+
self.model_version = self.api_client.request(
|
|
277
277
|
"PartialUpdateModelVersion",
|
|
278
278
|
id=self.model_version["id"],
|
|
279
279
|
body={
|
|
@@ -294,7 +294,7 @@ class TrainingMixin:
|
|
|
294
294
|
pending_version_id = self.model_version["id"]
|
|
295
295
|
logger.warning("Removing the pending model version.")
|
|
296
296
|
try:
|
|
297
|
-
self.request("DestroyModelVersion", id=pending_version_id)
|
|
297
|
+
self.api_client.request("DestroyModelVersion", id=pending_version_id)
|
|
298
298
|
except ErrorResponse as e:
|
|
299
299
|
msg = getattr(e, "content", str(e))
|
|
300
300
|
logger.error(
|
|
@@ -304,7 +304,7 @@ class TrainingMixin:
|
|
|
304
304
|
logger.info("Retrieving the existing model version.")
|
|
305
305
|
existing_version_id = model_version["id"].pop()
|
|
306
306
|
try:
|
|
307
|
-
self.model_version = self.request(
|
|
307
|
+
self.model_version = self.api_client.request(
|
|
308
308
|
"RetrieveModelVersion", id=existing_version_id
|
|
309
309
|
)
|
|
310
310
|
except ErrorResponse as e:
|
|
@@ -77,7 +77,7 @@ class TranscriptionMixin:
|
|
|
77
77
|
)
|
|
78
78
|
return
|
|
79
79
|
|
|
80
|
-
created = self.request(
|
|
80
|
+
created = self.api_client.request(
|
|
81
81
|
"CreateTranscription",
|
|
82
82
|
id=element.id,
|
|
83
83
|
body={
|
|
@@ -171,7 +171,7 @@ class TranscriptionMixin:
|
|
|
171
171
|
)
|
|
172
172
|
return
|
|
173
173
|
|
|
174
|
-
created_trs = self.request(
|
|
174
|
+
created_trs = self.api_client.request(
|
|
175
175
|
"CreateTranscriptions",
|
|
176
176
|
body={
|
|
177
177
|
"worker_run_id": self.worker_run_id,
|
|
@@ -291,7 +291,7 @@ class TranscriptionMixin:
|
|
|
291
291
|
)
|
|
292
292
|
return
|
|
293
293
|
|
|
294
|
-
annotations = self.request(
|
|
294
|
+
annotations = self.api_client.request(
|
|
295
295
|
"CreateElementTranscriptions",
|
|
296
296
|
id=element.id,
|
|
297
297
|
body={
|
{arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/version.py
RENAMED
|
@@ -34,7 +34,9 @@ class WorkerVersionMixin:
|
|
|
34
34
|
if worker_version_id in self._worker_version_cache:
|
|
35
35
|
return self._worker_version_cache[worker_version_id]
|
|
36
36
|
|
|
37
|
-
worker_version = self.request(
|
|
37
|
+
worker_version = self.api_client.request(
|
|
38
|
+
"RetrieveWorkerVersion", id=worker_version_id
|
|
39
|
+
)
|
|
38
40
|
self._worker_version_cache[worker_version_id] = worker_version
|
|
39
41
|
|
|
40
42
|
return worker_version
|
|
@@ -4,14 +4,14 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "arkindex-base-worker"
|
|
7
|
-
version = "0.4.
|
|
7
|
+
version = "0.4.0b1"
|
|
8
8
|
description = "Base Worker to easily build Arkindex ML workflows"
|
|
9
9
|
license = { file = "LICENSE" }
|
|
10
10
|
dependencies = [
|
|
11
11
|
"peewee~=3.17",
|
|
12
|
-
"Pillow==10.
|
|
12
|
+
"Pillow==10.4.0",
|
|
13
13
|
"python-gnupg==0.5.2",
|
|
14
|
-
"shapely==2.0.
|
|
14
|
+
"shapely==2.0.5",
|
|
15
15
|
"teklia-toolbox==0.1.5",
|
|
16
16
|
"zstandard==0.22.0",
|
|
17
17
|
]
|
|
@@ -42,12 +42,12 @@ Authors = "https://teklia.com"
|
|
|
42
42
|
|
|
43
43
|
[project.optional-dependencies]
|
|
44
44
|
docs = [
|
|
45
|
-
"black==24.4.
|
|
46
|
-
"mkdocs-material==9.5.
|
|
47
|
-
"mkdocstrings-python==1.
|
|
45
|
+
"black==24.4.2",
|
|
46
|
+
"mkdocs-material==9.5.31",
|
|
47
|
+
"mkdocstrings-python==1.10.7",
|
|
48
48
|
]
|
|
49
49
|
tests = [
|
|
50
|
-
"pytest==8.
|
|
50
|
+
"pytest==8.3.2",
|
|
51
51
|
"pytest-mock==3.14.0",
|
|
52
52
|
"pytest-responses==0.5.1",
|
|
53
53
|
]
|
|
@@ -658,7 +658,7 @@ def test_find_extras_directory_not_found(monkeypatch, extras_path, exists, error
|
|
|
658
658
|
def test_find_parents_file_paths(responses, mock_base_worker_with_cache, tmp_path):
|
|
659
659
|
responses.add(
|
|
660
660
|
responses.GET,
|
|
661
|
-
"http://testserver/api/v1/task/my_task/
|
|
661
|
+
"http://testserver/api/v1/task/my_task/",
|
|
662
662
|
status=200,
|
|
663
663
|
json={"parents": ["first", "second", "third"]},
|
|
664
664
|
)
|
|
@@ -22,6 +22,24 @@ from tests import CORPUS_ID
|
|
|
22
22
|
from . import BASE_API_CALLS
|
|
23
23
|
|
|
24
24
|
|
|
25
|
+
def test_list_corpus_types(responses, mock_elements_worker):
|
|
26
|
+
responses.add(
|
|
27
|
+
responses.GET,
|
|
28
|
+
f"http://testserver/api/v1/corpus/{CORPUS_ID}/",
|
|
29
|
+
json={
|
|
30
|
+
"id": CORPUS_ID,
|
|
31
|
+
"types": [{"slug": "folder"}, {"slug": "page"}],
|
|
32
|
+
},
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
mock_elements_worker.list_corpus_types()
|
|
36
|
+
|
|
37
|
+
assert mock_elements_worker.corpus_types == {
|
|
38
|
+
"folder": {"slug": "folder"},
|
|
39
|
+
"page": {"slug": "page"},
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
25
43
|
def test_check_required_types_argument_types(mock_elements_worker):
|
|
26
44
|
with pytest.raises(
|
|
27
45
|
AssertionError, match="At least one element type slug is required."
|
|
@@ -32,17 +50,11 @@ def test_check_required_types_argument_types(mock_elements_worker):
|
|
|
32
50
|
mock_elements_worker.check_required_types("lol", 42)
|
|
33
51
|
|
|
34
52
|
|
|
35
|
-
def test_check_required_types(
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
"id": CORPUS_ID,
|
|
41
|
-
"name": "Some Corpus",
|
|
42
|
-
"types": [{"slug": "folder"}, {"slug": "page"}],
|
|
43
|
-
},
|
|
44
|
-
)
|
|
45
|
-
mock_elements_worker.setup_api_client()
|
|
53
|
+
def test_check_required_types(mock_elements_worker):
|
|
54
|
+
mock_elements_worker.corpus_types = {
|
|
55
|
+
"folder": {"slug": "folder"},
|
|
56
|
+
"page": {"slug": "page"},
|
|
57
|
+
}
|
|
46
58
|
|
|
47
59
|
assert mock_elements_worker.check_required_types("page")
|
|
48
60
|
assert mock_elements_worker.check_required_types("page", "folder")
|
|
@@ -50,22 +62,18 @@ def test_check_required_types(responses, mock_elements_worker):
|
|
|
50
62
|
with pytest.raises(
|
|
51
63
|
MissingTypeError,
|
|
52
64
|
match=re.escape(
|
|
53
|
-
"Element type(s) act, text_line were not found in
|
|
65
|
+
"Element type(s) act, text_line were not found in corpus (11111111-1111-1111-1111-111111111111)."
|
|
54
66
|
),
|
|
55
67
|
):
|
|
56
68
|
assert mock_elements_worker.check_required_types("page", "text_line", "act")
|
|
57
69
|
|
|
58
70
|
|
|
59
71
|
def test_create_missing_types(responses, mock_elements_worker):
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
"name": "Some Corpus",
|
|
66
|
-
"types": [{"slug": "folder"}, {"slug": "page"}],
|
|
67
|
-
},
|
|
68
|
-
)
|
|
72
|
+
mock_elements_worker.corpus_types = {
|
|
73
|
+
"folder": {"slug": "folder"},
|
|
74
|
+
"page": {"slug": "page"},
|
|
75
|
+
}
|
|
76
|
+
|
|
69
77
|
responses.add(
|
|
70
78
|
responses.POST,
|
|
71
79
|
"http://testserver/api/v1/elements/type/",
|
|
@@ -94,7 +102,6 @@ def test_create_missing_types(responses, mock_elements_worker):
|
|
|
94
102
|
)
|
|
95
103
|
],
|
|
96
104
|
)
|
|
97
|
-
mock_elements_worker.setup_api_client()
|
|
98
105
|
|
|
99
106
|
assert mock_elements_worker.check_required_types(
|
|
100
107
|
"page", "text_line", "act", create_missing=True
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/arkindex_worker/worker/image.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/tests/test_elements_worker/test_cli.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/worker-demo/tests/test_worker.py
RENAMED
|
File without changes
|
{arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/worker-demo/worker_demo/__init__.py
RENAMED
|
File without changes
|
{arkindex_base_worker-0.4.0a2 → arkindex_base_worker-0.4.0b1}/worker-demo/worker_demo/worker.py
RENAMED
|
File without changes
|