arkindex-base-worker 0.3.7rc4__py3-none-any.whl → 0.5.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex_base_worker-0.3.7rc4.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/METADATA +18 -19
- arkindex_base_worker-0.5.0a1.dist-info/RECORD +61 -0
- {arkindex_base_worker-0.3.7rc4.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/WHEEL +1 -1
- {arkindex_base_worker-0.3.7rc4.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/top_level.txt +2 -0
- arkindex_worker/cache.py +1 -1
- arkindex_worker/image.py +167 -2
- arkindex_worker/models.py +18 -0
- arkindex_worker/utils.py +98 -4
- arkindex_worker/worker/__init__.py +117 -218
- arkindex_worker/worker/base.py +39 -46
- arkindex_worker/worker/classification.py +45 -29
- arkindex_worker/worker/corpus.py +86 -0
- arkindex_worker/worker/dataset.py +89 -26
- arkindex_worker/worker/element.py +352 -91
- arkindex_worker/worker/entity.py +13 -11
- arkindex_worker/worker/image.py +21 -0
- arkindex_worker/worker/metadata.py +26 -16
- arkindex_worker/worker/process.py +92 -0
- arkindex_worker/worker/task.py +5 -4
- arkindex_worker/worker/training.py +25 -10
- arkindex_worker/worker/transcription.py +89 -68
- arkindex_worker/worker/version.py +3 -1
- hooks/pre_gen_project.py +3 -0
- tests/__init__.py +8 -0
- tests/conftest.py +47 -58
- tests/test_base_worker.py +212 -12
- tests/test_dataset_worker.py +294 -437
- tests/test_elements_worker/{test_classifications.py → test_classification.py} +313 -200
- tests/test_elements_worker/test_cli.py +3 -11
- tests/test_elements_worker/test_corpus.py +168 -0
- tests/test_elements_worker/test_dataset.py +106 -157
- tests/test_elements_worker/test_element.py +427 -0
- tests/test_elements_worker/test_element_create_multiple.py +715 -0
- tests/test_elements_worker/test_element_create_single.py +528 -0
- tests/test_elements_worker/test_element_list_children.py +969 -0
- tests/test_elements_worker/test_element_list_parents.py +530 -0
- tests/test_elements_worker/{test_entities.py → test_entity_create.py} +37 -195
- tests/test_elements_worker/test_entity_list_and_check.py +160 -0
- tests/test_elements_worker/test_image.py +66 -0
- tests/test_elements_worker/test_metadata.py +252 -161
- tests/test_elements_worker/test_process.py +89 -0
- tests/test_elements_worker/test_task.py +8 -18
- tests/test_elements_worker/test_training.py +17 -8
- tests/test_elements_worker/test_transcription_create.py +873 -0
- tests/test_elements_worker/test_transcription_create_with_elements.py +951 -0
- tests/test_elements_worker/test_transcription_list.py +450 -0
- tests/test_elements_worker/test_version.py +60 -0
- tests/test_elements_worker/test_worker.py +578 -293
- tests/test_image.py +542 -209
- tests/test_merge.py +1 -2
- tests/test_utils.py +89 -4
- worker-demo/tests/__init__.py +0 -0
- worker-demo/tests/conftest.py +32 -0
- worker-demo/tests/test_worker.py +12 -0
- worker-demo/worker_demo/__init__.py +6 -0
- worker-demo/worker_demo/worker.py +19 -0
- arkindex_base_worker-0.3.7rc4.dist-info/RECORD +0 -41
- tests/test_elements_worker/test_elements.py +0 -2713
- tests/test_elements_worker/test_transcriptions.py +0 -2119
- {arkindex_base_worker-0.3.7rc4.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/LICENSE +0 -0
tests/test_merge.py
CHANGED
|
@@ -161,7 +161,7 @@ def test_merge_from_worker(
|
|
|
161
161
|
"""
|
|
162
162
|
responses.add(
|
|
163
163
|
responses.GET,
|
|
164
|
-
"http://testserver/api/v1/task/my_task/
|
|
164
|
+
"http://testserver/api/v1/task/my_task/",
|
|
165
165
|
status=200,
|
|
166
166
|
json={"parents": ["first", "second"]},
|
|
167
167
|
)
|
|
@@ -181,7 +181,6 @@ def test_merge_from_worker(
|
|
|
181
181
|
(tmp_path / "my_task").mkdir()
|
|
182
182
|
mock_base_worker_with_cache.args = mock_base_worker_with_cache.parser.parse_args()
|
|
183
183
|
mock_base_worker_with_cache.configure()
|
|
184
|
-
mock_base_worker_with_cache.configure_cache()
|
|
185
184
|
# Store parent tasks IDs as attribute
|
|
186
185
|
assert mock_base_worker_with_cache.task_parents == ["first", "second"]
|
|
187
186
|
|
tests/test_utils.py
CHANGED
|
@@ -1,9 +1,34 @@
|
|
|
1
|
-
|
|
1
|
+
import logging
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import pytest
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
from arkindex_worker.cache import unsupported_cache
|
|
6
|
+
from arkindex_worker.utils import (
|
|
7
|
+
DEFAULT_BATCH_SIZE,
|
|
8
|
+
batch_publication,
|
|
9
|
+
close_delete_file,
|
|
10
|
+
extract_tar_zst_archive,
|
|
11
|
+
parse_source_id,
|
|
12
|
+
)
|
|
13
|
+
from tests import FIXTURES_DIR
|
|
14
|
+
|
|
15
|
+
ARCHIVE = FIXTURES_DIR / "archive.tar.zst"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@pytest.mark.parametrize(
|
|
19
|
+
("source_id", "expected"),
|
|
20
|
+
[
|
|
21
|
+
(None, None),
|
|
22
|
+
("", None),
|
|
23
|
+
(
|
|
24
|
+
"cafecafe-cafe-cafe-cafe-cafecafecafe",
|
|
25
|
+
"cafecafe-cafe-cafe-cafe-cafecafecafe",
|
|
26
|
+
),
|
|
27
|
+
("manual", False),
|
|
28
|
+
],
|
|
29
|
+
)
|
|
30
|
+
def test_parse_source_id(source_id, expected):
|
|
31
|
+
assert parse_source_id(source_id) == expected
|
|
7
32
|
|
|
8
33
|
|
|
9
34
|
def test_extract_tar_zst_archive(tmp_path):
|
|
@@ -33,3 +58,63 @@ def test_close_delete_file(tmp_path):
|
|
|
33
58
|
close_delete_file(archive_fd, archive_path)
|
|
34
59
|
|
|
35
60
|
assert not archive_path.exists()
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class TestMixin:
|
|
64
|
+
def __init__(self, use_cache: bool = False):
|
|
65
|
+
"""
|
|
66
|
+
Args:
|
|
67
|
+
use_cache (bool, optional): To mock BaseWorker.use_cache attribute. Defaults to False.
|
|
68
|
+
"""
|
|
69
|
+
self.use_cache = use_cache
|
|
70
|
+
|
|
71
|
+
@batch_publication
|
|
72
|
+
def custom_publication_in_batches(self, batch_size: int = DEFAULT_BATCH_SIZE):
|
|
73
|
+
return batch_size
|
|
74
|
+
|
|
75
|
+
@unsupported_cache
|
|
76
|
+
@batch_publication
|
|
77
|
+
def custom_publication_in_batches_without_cache(
|
|
78
|
+
self, batch_size: int = DEFAULT_BATCH_SIZE
|
|
79
|
+
):
|
|
80
|
+
return batch_size
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def test_batch_publication_decorator_no_parameter():
|
|
84
|
+
assert TestMixin().custom_publication_in_batches() == DEFAULT_BATCH_SIZE
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@pytest.mark.parametrize("wrong_batch_size", [None, "not an int", 0])
|
|
88
|
+
def test_batch_publication_decorator_wrong_parameter(wrong_batch_size):
|
|
89
|
+
with pytest.raises(
|
|
90
|
+
AssertionError,
|
|
91
|
+
match="batch_size shouldn't be null and should be a strictly positive integer",
|
|
92
|
+
):
|
|
93
|
+
TestMixin().custom_publication_in_batches(batch_size=wrong_batch_size)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@pytest.mark.parametrize("batch_size", [1, 10, DEFAULT_BATCH_SIZE])
|
|
97
|
+
def test_batch_publication_decorator_right_parameter(batch_size):
|
|
98
|
+
assert (
|
|
99
|
+
TestMixin().custom_publication_in_batches(batch_size=batch_size) == batch_size
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def test_batch_publication_decorator_alongside_unsupported_cache(caplog):
|
|
104
|
+
# Capture log messages
|
|
105
|
+
caplog.clear()
|
|
106
|
+
with caplog.at_level(logging.WARNING):
|
|
107
|
+
# Call the helper
|
|
108
|
+
assert (
|
|
109
|
+
TestMixin(use_cache=True).custom_publication_in_batches_without_cache()
|
|
110
|
+
== DEFAULT_BATCH_SIZE
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Check logs
|
|
114
|
+
assert caplog.record_tuples == [
|
|
115
|
+
(
|
|
116
|
+
"arkindex_worker",
|
|
117
|
+
logging.WARNING,
|
|
118
|
+
"This API helper `custom_publication_in_batches_without_cache` did not update the cache database",
|
|
119
|
+
),
|
|
120
|
+
]
|
|
File without changes
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from arkindex.mock import MockApiClient
|
|
6
|
+
from arkindex_worker.worker.base import BaseWorker
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@pytest.fixture(autouse=True)
|
|
10
|
+
def _setup_environment(responses, monkeypatch) -> None:
|
|
11
|
+
"""Setup needed environment variables"""
|
|
12
|
+
|
|
13
|
+
# Allow accessing remote API schemas
|
|
14
|
+
# defaulting to the prod environment
|
|
15
|
+
schema_url = os.environ.get(
|
|
16
|
+
"ARKINDEX_API_SCHEMA_URL",
|
|
17
|
+
"https://demo.arkindex.org/api/v1/openapi/?format=openapi-json",
|
|
18
|
+
)
|
|
19
|
+
responses.add_passthru(schema_url)
|
|
20
|
+
|
|
21
|
+
# Set schema url in environment
|
|
22
|
+
os.environ["ARKINDEX_API_SCHEMA_URL"] = schema_url
|
|
23
|
+
# Setup a fake worker run ID
|
|
24
|
+
os.environ["ARKINDEX_WORKER_RUN_ID"] = "1234-demo"
|
|
25
|
+
# Setup a fake corpus ID
|
|
26
|
+
os.environ["ARKINDEX_CORPUS_ID"] = "1234-corpus-id"
|
|
27
|
+
|
|
28
|
+
# Setup a mock api client instead of using a real one
|
|
29
|
+
def mock_setup_api_client(self):
|
|
30
|
+
self.api_client = MockApiClient()
|
|
31
|
+
|
|
32
|
+
monkeypatch.setattr(BaseWorker, "setup_api_client", mock_setup_api_client)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_dummy():
|
|
5
|
+
assert True
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_import():
|
|
9
|
+
"""Import our newly created module, through importlib to avoid parsing issues"""
|
|
10
|
+
worker = importlib.import_module("worker_demo.worker")
|
|
11
|
+
assert hasattr(worker, "Demo")
|
|
12
|
+
assert hasattr(worker.Demo, "process_element")
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from logging import Logger, getLogger
|
|
2
|
+
|
|
3
|
+
from arkindex_worker.models import Element
|
|
4
|
+
from arkindex_worker.worker import ElementsWorker
|
|
5
|
+
|
|
6
|
+
logger: Logger = getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Demo(ElementsWorker):
|
|
10
|
+
def process_element(self, element: Element) -> None:
|
|
11
|
+
logger.info(f"Demo processing element ({element.id})")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def main() -> None:
|
|
15
|
+
Demo(description="Demo ML worker for Arkindex").run()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
if __name__ == "__main__":
|
|
19
|
+
main()
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
arkindex_worker/__init__.py,sha256=OlgCtTC9MaWeejviY0a3iQpALcRQGMVArFVVYwTF6I8,162
|
|
2
|
-
arkindex_worker/cache.py,sha256=FTlB0coXofn5zTNRTcVIvh709mcw4a1bPGqkwWjKs3w,11248
|
|
3
|
-
arkindex_worker/image.py,sha256=9-k_Wojk-sLbgvBSi7tWiiDc9YApWauJpHGKRay_nmo,14166
|
|
4
|
-
arkindex_worker/models.py,sha256=HdKFw3qk4WIWC-DrHDkhsw0mHP3OILuCLFf7aTjruZU,9526
|
|
5
|
-
arkindex_worker/utils.py,sha256=VSO8c21nsSaUCkyJaFX8wOwDQ0tztLOBFtiGvqlT0zU,6900
|
|
6
|
-
arkindex_worker/worker/__init__.py,sha256=I8QmdAs659SalxNjtCu2K2ItdyUlXYm3mK_WhZdjgBs,19498
|
|
7
|
-
arkindex_worker/worker/base.py,sha256=7ii3rZai6IB0-eB0TJ6pg-IhxMmW4izoJAKJKczbyZ4,19934
|
|
8
|
-
arkindex_worker/worker/classification.py,sha256=0OiwxV9lb97Zs3kODm3hzyk0V7IxBTiW5SL6AYgRH1M,10351
|
|
9
|
-
arkindex_worker/worker/dataset.py,sha256=qzjaXJtfeNCP2acsHbqp5tjQk-KpLHwVzjDAExeAmVg,3228
|
|
10
|
-
arkindex_worker/worker/element.py,sha256=AWK3YJSHWy3j4ajntJloi_2X4zxsgXZ6c6dzphgq3OI,33848
|
|
11
|
-
arkindex_worker/worker/entity.py,sha256=YT2Ttdn-L5TRoDdhOI3Z4GE1vtkWl7tKZqbYrtxZ2Ug,14630
|
|
12
|
-
arkindex_worker/worker/metadata.py,sha256=SC6apVaOjFrmYw5b-njhqIlH-_r0ExbNpZeQZzlUjBE,6669
|
|
13
|
-
arkindex_worker/worker/task.py,sha256=cz3wJNPgogZv1lm_3lm7WScitQtYQtL6H6I7Xokq208,1475
|
|
14
|
-
arkindex_worker/worker/training.py,sha256=SOs3YKGikTr3rdWYp9H-jbtgRnZxQAoqtwB26ztx9j8,10235
|
|
15
|
-
arkindex_worker/worker/transcription.py,sha256=6R7ofcGnNqX4rjT0kRKIE-G9FHq2TJ1tfztNM5sTqYE,20464
|
|
16
|
-
arkindex_worker/worker/version.py,sha256=cs2pdlDxpKRO2Oldvcu54w-D_DQhf1cdeEt4tKX_QYs,1927
|
|
17
|
-
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
|
-
tests/conftest.py,sha256=wzKXRnS7OKQCNHrlDaQhMC8EXlsQTY_S4L9U_hXbjpM,22004
|
|
19
|
-
tests/test_base_worker.py,sha256=Uq6_MpLW23gmKFXkU-SyDUaA_4dlViLBGG4e3gpBBz0,24512
|
|
20
|
-
tests/test_cache.py,sha256=ii0gyr0DrG7ChEs7pmT8hMdSguAOAcCze4bRMiFQxuk,10640
|
|
21
|
-
tests/test_dataset_worker.py,sha256=Q-3gVu2FNa6mJVkUW-PUVgyUAvRkxSzLCJrPhwoJlxQ,28273
|
|
22
|
-
tests/test_element.py,sha256=2G9M15TLxQRmvrWM9Kw2ucnElh4kSv_oF_5FYwwAxTY,13181
|
|
23
|
-
tests/test_image.py,sha256=FZv8njLxh45sVgmY71UFHt0lv1cHr0cK4rrtPhQleX8,16262
|
|
24
|
-
tests/test_merge.py,sha256=Q4zCbtZbe0wBfqE56gvAD06c6pDuhqnjKaioFqIgAQw,8331
|
|
25
|
-
tests/test_utils.py,sha256=pFXegcBvIuy1tJDDSgQtCbC_tRaoLjd2055R5lu3hS0,1236
|
|
26
|
-
tests/test_elements_worker/__init__.py,sha256=Fh4nkbbyJSMv_VtjQxnWrOqTnxXaaWI8S9WU0VrzCHs,179
|
|
27
|
-
tests/test_elements_worker/test_classifications.py,sha256=PE88fsdra8QsWcKjSyao-pTHlaIWNxlbfF0CrLe9LBA,26517
|
|
28
|
-
tests/test_elements_worker/test_cli.py,sha256=BsFTswLti63WAZ2pf6ipiZKWJJyCQuSfuKnSlESuK8g,2878
|
|
29
|
-
tests/test_elements_worker/test_dataset.py,sha256=-kVll1NcMPWkIx8D7r-Z5neEGkFiZ9YQfC4eTMIfjg0,13475
|
|
30
|
-
tests/test_elements_worker/test_elements.py,sha256=6XKtgXSVQJnTSgTHWwEVsAtIwLBapjYjUYPUdjxcHsY,84971
|
|
31
|
-
tests/test_elements_worker/test_entities.py,sha256=yi1mXzvKvNwUNMzo0UZ56YOIJstYHcLyeepPJ8f10MQ,34557
|
|
32
|
-
tests/test_elements_worker/test_metadata.py,sha256=b9CNv4W31TRJqYauvX_pRIN2SvnybaLqF-FWoFwa2Vc,18672
|
|
33
|
-
tests/test_elements_worker/test_task.py,sha256=FCpxE9UpouKXgjGvWgNHEai_Hiy2d1YmqRG-_v2s27s,6312
|
|
34
|
-
tests/test_elements_worker/test_training.py,sha256=WeG-cDuJ-YhPgfKH47TtXBxyargtLuk7c8tsik2WnL8,8414
|
|
35
|
-
tests/test_elements_worker/test_transcriptions.py,sha256=WVJG26sZyY66fu-Eka9A1_WWIeNI2scogjypzURnp8A,73468
|
|
36
|
-
tests/test_elements_worker/test_worker.py,sha256=7-jGJVT3yMGpIyN96Uafz5eIUrO4ieNLgw0k1D8BhGc,17163
|
|
37
|
-
arkindex_base_worker-0.3.7rc4.dist-info/LICENSE,sha256=NVshRi1efwVezMfW7xXYLrdDr2Li1AfwfGOd5WuH1kQ,1063
|
|
38
|
-
arkindex_base_worker-0.3.7rc4.dist-info/METADATA,sha256=ilh4IdFYSXepgr0imEMH3ZbewlFJlbg97VKnvhKXMVQ,3411
|
|
39
|
-
arkindex_base_worker-0.3.7rc4.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
|
40
|
-
arkindex_base_worker-0.3.7rc4.dist-info/top_level.txt,sha256=TtagLI8LSv7GE7nG8MQqDFAJ5bNDPJn7Z5vizOgrWkA,22
|
|
41
|
-
arkindex_base_worker-0.3.7rc4.dist-info/RECORD,,
|