arkindex-base-worker 0.4.0b3__py3-none-any.whl → 0.4.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex_base_worker-0.4.0b3.dist-info → arkindex_base_worker-0.4.0rc2.dist-info}/METADATA +4 -3
- {arkindex_base_worker-0.4.0b3.dist-info → arkindex_base_worker-0.4.0rc2.dist-info}/RECORD +21 -20
- {arkindex_base_worker-0.4.0b3.dist-info → arkindex_base_worker-0.4.0rc2.dist-info}/WHEEL +1 -1
- arkindex_worker/image.py +118 -0
- arkindex_worker/worker/__init__.py +26 -158
- arkindex_worker/worker/base.py +32 -1
- arkindex_worker/worker/dataset.py +70 -0
- arkindex_worker/worker/element.py +260 -75
- arkindex_worker/worker/process.py +63 -0
- arkindex_worker/worker/transcription.py +50 -50
- tests/__init__.py +1 -1
- tests/conftest.py +11 -23
- tests/test_base_worker.py +203 -2
- tests/test_dataset_worker.py +5 -2
- tests/test_elements_worker/test_elements.py +712 -18
- tests/test_elements_worker/test_worker.py +0 -200
- tests/test_image.py +248 -6
- tests/test_merge.py +0 -1
- tests/test_utils.py +2 -4
- {arkindex_base_worker-0.4.0b3.dist-info → arkindex_base_worker-0.4.0rc2.dist-info}/LICENSE +0 -0
- {arkindex_base_worker-0.4.0b3.dist-info → arkindex_base_worker-0.4.0rc2.dist-info}/top_level.txt +0 -0
tests/conftest.py
CHANGED
|
@@ -23,10 +23,15 @@ from arkindex_worker.cache import (
|
|
|
23
23
|
init_cache_db,
|
|
24
24
|
)
|
|
25
25
|
from arkindex_worker.models import Artifact, Dataset, Set
|
|
26
|
-
from arkindex_worker.worker import
|
|
26
|
+
from arkindex_worker.worker import (
|
|
27
|
+
BaseWorker,
|
|
28
|
+
DatasetWorker,
|
|
29
|
+
ElementsWorker,
|
|
30
|
+
ProcessMode,
|
|
31
|
+
)
|
|
27
32
|
from arkindex_worker.worker.dataset import DatasetState
|
|
28
33
|
from arkindex_worker.worker.transcription import TextOrientation
|
|
29
|
-
from tests import CORPUS_ID,
|
|
34
|
+
from tests import CORPUS_ID, SAMPLES_DIR
|
|
30
35
|
|
|
31
36
|
__yaml_cache = {}
|
|
32
37
|
|
|
@@ -277,9 +282,7 @@ def mock_elements_worker_with_list(monkeypatch, responses, mock_elements_worker)
|
|
|
277
282
|
"""
|
|
278
283
|
Mock a worker instance to list and retrieve a single element
|
|
279
284
|
"""
|
|
280
|
-
monkeypatch.setattr(
|
|
281
|
-
mock_elements_worker, "list_elements", lambda: ["1234-deadbeef"]
|
|
282
|
-
)
|
|
285
|
+
monkeypatch.setattr(mock_elements_worker, "get_elements", lambda: ["1234-deadbeef"])
|
|
283
286
|
responses.add(
|
|
284
287
|
responses.GET,
|
|
285
288
|
"http://testserver/api/v1/element/1234-deadbeef/",
|
|
@@ -326,23 +329,6 @@ def mock_elements_worker_with_cache(monkeypatch, mock_cache_db, _mock_worker_run
|
|
|
326
329
|
return worker
|
|
327
330
|
|
|
328
331
|
|
|
329
|
-
@pytest.fixture()
|
|
330
|
-
def fake_page_element():
|
|
331
|
-
return json.loads((FIXTURES_DIR / "page_element.json").read_text())
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
@pytest.fixture()
|
|
335
|
-
def fake_ufcn_worker_version():
|
|
336
|
-
return json.loads(
|
|
337
|
-
(FIXTURES_DIR / "ufcn_line_historical_worker_version.json").read_text()
|
|
338
|
-
)
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
@pytest.fixture()
|
|
342
|
-
def fake_transcriptions_small():
|
|
343
|
-
return json.loads((FIXTURES_DIR / "line_transcriptions_small.json").read_text())
|
|
344
|
-
|
|
345
|
-
|
|
346
332
|
@pytest.fixture()
|
|
347
333
|
def model_file_dir():
|
|
348
334
|
return SAMPLES_DIR / "model_files"
|
|
@@ -620,7 +606,9 @@ def mock_dataset_worker(monkeypatch, mocker, _mock_worker_run_api):
|
|
|
620
606
|
|
|
621
607
|
dataset_worker = DatasetWorker()
|
|
622
608
|
dataset_worker.configure()
|
|
623
|
-
|
|
609
|
+
|
|
610
|
+
# Update process mode
|
|
611
|
+
dataset_worker.process_information["mode"] = ProcessMode.Dataset
|
|
624
612
|
|
|
625
613
|
assert not dataset_worker.is_read_only
|
|
626
614
|
|
tests/test_base_worker.py
CHANGED
|
@@ -206,6 +206,7 @@ def test_configure_worker_run(mocker, responses, caplog):
|
|
|
206
206
|
"Loaded Worker Fake worker @ 123412 from API",
|
|
207
207
|
),
|
|
208
208
|
("arkindex_worker", logging.INFO, "Loaded user configuration from WorkerRun"),
|
|
209
|
+
("arkindex_worker", logging.INFO, "User configuration retrieved"),
|
|
209
210
|
]
|
|
210
211
|
|
|
211
212
|
assert worker.user_configuration == {"a": "b"}
|
|
@@ -284,12 +285,21 @@ def test_configure_user_configuration_defaults(mocker, responses):
|
|
|
284
285
|
|
|
285
286
|
worker.configure()
|
|
286
287
|
|
|
287
|
-
assert worker.config == {"param_1": "/some/path/file.pth", "param_2": 12}
|
|
288
288
|
assert worker.user_configuration == {
|
|
289
289
|
"integer_parameter": 0,
|
|
290
290
|
"param_3": "Animula vagula blandula",
|
|
291
291
|
"param_5": True,
|
|
292
292
|
}
|
|
293
|
+
# All configurations are merged
|
|
294
|
+
assert worker.config == {
|
|
295
|
+
# Default config
|
|
296
|
+
"param_1": "/some/path/file.pth",
|
|
297
|
+
"param_2": 12,
|
|
298
|
+
# User config
|
|
299
|
+
"integer_parameter": 0,
|
|
300
|
+
"param_3": "Animula vagula blandula",
|
|
301
|
+
"param_5": True,
|
|
302
|
+
}
|
|
293
303
|
|
|
294
304
|
|
|
295
305
|
@pytest.mark.parametrize("debug", [True, False])
|
|
@@ -676,7 +686,6 @@ def test_find_parents_file_paths(responses, mock_base_worker_with_cache, tmp_pat
|
|
|
676
686
|
mock_base_worker_with_cache.args = mock_base_worker_with_cache.parser.parse_args()
|
|
677
687
|
|
|
678
688
|
mock_base_worker_with_cache.configure()
|
|
679
|
-
mock_base_worker_with_cache.configure_cache()
|
|
680
689
|
|
|
681
690
|
assert mock_base_worker_with_cache.find_parents_file_paths(filename) == [
|
|
682
691
|
tmp_path / "first" / filename,
|
|
@@ -753,3 +762,195 @@ def test_corpus_id_set_read_only_mode(
|
|
|
753
762
|
mock_elements_worker_read_only.configure()
|
|
754
763
|
|
|
755
764
|
assert mock_elements_worker_read_only.corpus_id == corpus_id
|
|
765
|
+
|
|
766
|
+
|
|
767
|
+
@pytest.mark.parametrize(
|
|
768
|
+
(
|
|
769
|
+
"wk_version_config",
|
|
770
|
+
"wk_version_user_config",
|
|
771
|
+
"frontend_user_config",
|
|
772
|
+
"model_config",
|
|
773
|
+
"expected_config",
|
|
774
|
+
),
|
|
775
|
+
[
|
|
776
|
+
({}, {}, {}, {}, {}),
|
|
777
|
+
# Keep parameters from worker version configuration
|
|
778
|
+
({"parameter": 0}, {}, {}, {}, {"parameter": 0}),
|
|
779
|
+
# Keep parameters from worker version configuration + user_config defaults
|
|
780
|
+
(
|
|
781
|
+
{"parameter": 0},
|
|
782
|
+
{
|
|
783
|
+
"parameter2": {
|
|
784
|
+
"type": "int",
|
|
785
|
+
"title": "Lambda",
|
|
786
|
+
"default": 0,
|
|
787
|
+
"required": False,
|
|
788
|
+
}
|
|
789
|
+
},
|
|
790
|
+
{},
|
|
791
|
+
{},
|
|
792
|
+
{"parameter": 0, "parameter2": 0},
|
|
793
|
+
),
|
|
794
|
+
# Keep parameters from worker version configuration + user_config no defaults
|
|
795
|
+
(
|
|
796
|
+
{"parameter": 0},
|
|
797
|
+
{
|
|
798
|
+
"parameter2": {
|
|
799
|
+
"type": "int",
|
|
800
|
+
"title": "Lambda",
|
|
801
|
+
"required": False,
|
|
802
|
+
}
|
|
803
|
+
},
|
|
804
|
+
{},
|
|
805
|
+
{},
|
|
806
|
+
{"parameter": 0},
|
|
807
|
+
),
|
|
808
|
+
# Keep parameters from worker version configuration but user_config defaults overrides
|
|
809
|
+
(
|
|
810
|
+
{"parameter": 0},
|
|
811
|
+
{
|
|
812
|
+
"parameter": {
|
|
813
|
+
"type": "int",
|
|
814
|
+
"title": "Lambda",
|
|
815
|
+
"default": 1,
|
|
816
|
+
"required": False,
|
|
817
|
+
}
|
|
818
|
+
},
|
|
819
|
+
{},
|
|
820
|
+
{},
|
|
821
|
+
{"parameter": 1},
|
|
822
|
+
),
|
|
823
|
+
# Keep parameters from worker version configuration + frontend config
|
|
824
|
+
(
|
|
825
|
+
{"parameter": 0},
|
|
826
|
+
{},
|
|
827
|
+
{"parameter2": 0},
|
|
828
|
+
{},
|
|
829
|
+
{"parameter": 0, "parameter2": 0},
|
|
830
|
+
),
|
|
831
|
+
# Keep parameters from worker version configuration + frontend config overrides
|
|
832
|
+
({"parameter": 0}, {}, {"parameter": 1}, {}, {"parameter": 1}),
|
|
833
|
+
# Keep parameters from worker version configuration + model config
|
|
834
|
+
(
|
|
835
|
+
{"parameter": 0},
|
|
836
|
+
{},
|
|
837
|
+
{},
|
|
838
|
+
{"parameter2": 0},
|
|
839
|
+
{"parameter": 0, "parameter2": 0},
|
|
840
|
+
),
|
|
841
|
+
# Keep parameters from worker version configuration + model config overrides
|
|
842
|
+
({"parameter": 0}, {}, {}, {"parameter": 1}, {"parameter": 1}),
|
|
843
|
+
# Keep parameters from worker version configuration + user_config default + model config overrides
|
|
844
|
+
(
|
|
845
|
+
{"parameter": 0},
|
|
846
|
+
{
|
|
847
|
+
"parameter": {
|
|
848
|
+
"type": "int",
|
|
849
|
+
"title": "Lambda",
|
|
850
|
+
"default": 1,
|
|
851
|
+
"required": False,
|
|
852
|
+
}
|
|
853
|
+
},
|
|
854
|
+
{},
|
|
855
|
+
{"parameter": 2},
|
|
856
|
+
{"parameter": 2},
|
|
857
|
+
),
|
|
858
|
+
# Keep parameters from worker version configuration + model config + frontend config overrides
|
|
859
|
+
({"parameter": 0}, {}, {"parameter": 2}, {"parameter": 1}, {"parameter": 2}),
|
|
860
|
+
# Keep parameters from worker version configuration + user_config default + model config + frontend config overrides all
|
|
861
|
+
(
|
|
862
|
+
{"parameter": 0},
|
|
863
|
+
{
|
|
864
|
+
"parameter": {
|
|
865
|
+
"type": "int",
|
|
866
|
+
"title": "Lambda",
|
|
867
|
+
"default": 1,
|
|
868
|
+
"required": False,
|
|
869
|
+
}
|
|
870
|
+
},
|
|
871
|
+
{"parameter": 3},
|
|
872
|
+
{"parameter": 2},
|
|
873
|
+
{"parameter": 3},
|
|
874
|
+
),
|
|
875
|
+
],
|
|
876
|
+
)
|
|
877
|
+
def test_worker_config_multiple_source(
|
|
878
|
+
monkeypatch,
|
|
879
|
+
responses,
|
|
880
|
+
wk_version_config,
|
|
881
|
+
wk_version_user_config,
|
|
882
|
+
frontend_user_config,
|
|
883
|
+
model_config,
|
|
884
|
+
expected_config,
|
|
885
|
+
):
|
|
886
|
+
# Compute WorkerRun info
|
|
887
|
+
payload = {
|
|
888
|
+
"id": "56785678-5678-5678-5678-567856785678",
|
|
889
|
+
"parents": [],
|
|
890
|
+
"worker_version": {
|
|
891
|
+
"id": "12341234-1234-1234-1234-123412341234",
|
|
892
|
+
"configuration": {
|
|
893
|
+
"docker": {"image": "python:3"},
|
|
894
|
+
"configuration": wk_version_config,
|
|
895
|
+
"secrets": [],
|
|
896
|
+
"user_configuration": wk_version_user_config,
|
|
897
|
+
},
|
|
898
|
+
"revision": {
|
|
899
|
+
"hash": "deadbeef1234",
|
|
900
|
+
"name": "some git revision",
|
|
901
|
+
},
|
|
902
|
+
"docker_image": "python:3",
|
|
903
|
+
"docker_image_name": "python:3",
|
|
904
|
+
"state": "created",
|
|
905
|
+
"worker": {
|
|
906
|
+
"id": "deadbeef-1234-5678-1234-worker",
|
|
907
|
+
"name": "Fake worker",
|
|
908
|
+
"slug": "fake_worker",
|
|
909
|
+
"type": "classifier",
|
|
910
|
+
},
|
|
911
|
+
},
|
|
912
|
+
"configuration": {
|
|
913
|
+
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
|
|
914
|
+
"name": "Configuration entered by user",
|
|
915
|
+
"configuration": frontend_user_config,
|
|
916
|
+
},
|
|
917
|
+
"model_version": {
|
|
918
|
+
"id": "12341234-1234-1234-1234-123412341234",
|
|
919
|
+
"name": "Model version 1337",
|
|
920
|
+
"configuration": model_config,
|
|
921
|
+
"model": {
|
|
922
|
+
"id": "hahahaha-haha-haha-haha-hahahahahaha",
|
|
923
|
+
"name": "My model",
|
|
924
|
+
},
|
|
925
|
+
},
|
|
926
|
+
"process": {
|
|
927
|
+
"name": None,
|
|
928
|
+
"id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff",
|
|
929
|
+
"state": "running",
|
|
930
|
+
"mode": "workers",
|
|
931
|
+
"corpus": CORPUS_ID,
|
|
932
|
+
"use_cache": False,
|
|
933
|
+
"activity_state": "ready",
|
|
934
|
+
"model_id": None,
|
|
935
|
+
"train_folder_id": None,
|
|
936
|
+
"validation_folder_id": None,
|
|
937
|
+
"test_folder_id": None,
|
|
938
|
+
},
|
|
939
|
+
"summary": "Worker Fake worker @ 123412",
|
|
940
|
+
}
|
|
941
|
+
|
|
942
|
+
responses.add(
|
|
943
|
+
responses.GET,
|
|
944
|
+
"http://testserver/api/v1/process/workers/56785678-5678-5678-5678-567856785678/",
|
|
945
|
+
status=200,
|
|
946
|
+
body=json.dumps(payload),
|
|
947
|
+
content_type="application/json",
|
|
948
|
+
)
|
|
949
|
+
|
|
950
|
+
# Create and configure a worker
|
|
951
|
+
monkeypatch.setattr(sys, "argv", ["worker"])
|
|
952
|
+
worker = BaseWorker()
|
|
953
|
+
worker.configure()
|
|
954
|
+
|
|
955
|
+
# Check final config
|
|
956
|
+
assert worker.config == expected_config
|
tests/test_dataset_worker.py
CHANGED
|
@@ -6,8 +6,11 @@ import pytest
|
|
|
6
6
|
from apistar.exceptions import ErrorResponse
|
|
7
7
|
|
|
8
8
|
from arkindex_worker.models import Dataset, Set
|
|
9
|
-
from arkindex_worker.worker import
|
|
10
|
-
|
|
9
|
+
from arkindex_worker.worker.dataset import (
|
|
10
|
+
DatasetState,
|
|
11
|
+
MissingDatasetArchive,
|
|
12
|
+
check_dataset_set,
|
|
13
|
+
)
|
|
11
14
|
from tests import FIXTURES_DIR, PROCESS_ID
|
|
12
15
|
from tests.test_elements_worker import BASE_API_CALLS
|
|
13
16
|
|