arkindex-base-worker 0.4.0__py3-none-any.whl → 0.4.0a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex_base_worker-0.4.0.dist-info → arkindex_base_worker-0.4.0a2.dist-info}/METADATA +13 -15
- arkindex_base_worker-0.4.0a2.dist-info/RECORD +51 -0
- {arkindex_base_worker-0.4.0.dist-info → arkindex_base_worker-0.4.0a2.dist-info}/WHEEL +1 -1
- arkindex_worker/cache.py +1 -1
- arkindex_worker/image.py +1 -120
- arkindex_worker/utils.py +0 -82
- arkindex_worker/worker/__init__.py +161 -46
- arkindex_worker/worker/base.py +11 -36
- arkindex_worker/worker/classification.py +18 -34
- arkindex_worker/worker/corpus.py +4 -21
- arkindex_worker/worker/dataset.py +1 -71
- arkindex_worker/worker/element.py +91 -352
- arkindex_worker/worker/entity.py +11 -11
- arkindex_worker/worker/metadata.py +9 -19
- arkindex_worker/worker/task.py +4 -5
- arkindex_worker/worker/training.py +6 -6
- arkindex_worker/worker/transcription.py +68 -89
- arkindex_worker/worker/version.py +1 -3
- tests/__init__.py +1 -1
- tests/conftest.py +45 -33
- tests/test_base_worker.py +3 -204
- tests/test_dataset_worker.py +4 -7
- tests/test_elements_worker/{test_classification.py → test_classifications.py} +61 -194
- tests/test_elements_worker/test_corpus.py +1 -32
- tests/test_elements_worker/test_dataset.py +1 -1
- tests/test_elements_worker/test_elements.py +2734 -0
- tests/test_elements_worker/{test_entity_create.py → test_entities.py} +160 -26
- tests/test_elements_worker/test_image.py +1 -2
- tests/test_elements_worker/test_metadata.py +99 -224
- tests/test_elements_worker/test_task.py +1 -1
- tests/test_elements_worker/test_training.py +2 -2
- tests/test_elements_worker/test_transcriptions.py +2102 -0
- tests/test_elements_worker/test_worker.py +280 -563
- tests/test_image.py +204 -429
- tests/test_merge.py +2 -1
- tests/test_utils.py +3 -66
- arkindex_base_worker-0.4.0.dist-info/RECORD +0 -61
- arkindex_worker/worker/process.py +0 -92
- tests/test_elements_worker/test_element.py +0 -427
- tests/test_elements_worker/test_element_create_multiple.py +0 -715
- tests/test_elements_worker/test_element_create_single.py +0 -528
- tests/test_elements_worker/test_element_list_children.py +0 -969
- tests/test_elements_worker/test_element_list_parents.py +0 -530
- tests/test_elements_worker/test_entity_list_and_check.py +0 -160
- tests/test_elements_worker/test_process.py +0 -89
- tests/test_elements_worker/test_transcription_create.py +0 -873
- tests/test_elements_worker/test_transcription_create_with_elements.py +0 -951
- tests/test_elements_worker/test_transcription_list.py +0 -450
- tests/test_elements_worker/test_version.py +0 -60
- {arkindex_base_worker-0.4.0.dist-info → arkindex_base_worker-0.4.0a2.dist-info}/LICENSE +0 -0
- {arkindex_base_worker-0.4.0.dist-info → arkindex_base_worker-0.4.0a2.dist-info}/top_level.txt +0 -0
|
@@ -3,11 +3,10 @@ import re
|
|
|
3
3
|
from uuid import UUID
|
|
4
4
|
|
|
5
5
|
import pytest
|
|
6
|
+
from apistar.exceptions import ErrorResponse
|
|
6
7
|
|
|
7
|
-
from arkindex.exceptions import ErrorResponse
|
|
8
8
|
from arkindex_worker.cache import CachedClassification, CachedElement
|
|
9
9
|
from arkindex_worker.models import Element
|
|
10
|
-
from arkindex_worker.utils import DEFAULT_BATCH_SIZE
|
|
11
10
|
from tests import CORPUS_ID
|
|
12
11
|
|
|
13
12
|
from . import BASE_API_CALLS
|
|
@@ -17,92 +16,6 @@ from . import BASE_API_CALLS
|
|
|
17
16
|
DELETE_PARAMETER = "DELETE_PARAMETER"
|
|
18
17
|
|
|
19
18
|
|
|
20
|
-
def test_load_corpus_classes_api_error(responses, mock_elements_worker):
|
|
21
|
-
responses.add(
|
|
22
|
-
responses.GET,
|
|
23
|
-
f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
|
|
24
|
-
status=418,
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
assert not mock_elements_worker.classes
|
|
28
|
-
with pytest.raises(
|
|
29
|
-
Exception, match="Stopping pagination as data will be incomplete"
|
|
30
|
-
):
|
|
31
|
-
mock_elements_worker.load_corpus_classes()
|
|
32
|
-
|
|
33
|
-
assert len(responses.calls) == len(BASE_API_CALLS) + 5
|
|
34
|
-
assert [
|
|
35
|
-
(call.request.method, call.request.url) for call in responses.calls
|
|
36
|
-
] == BASE_API_CALLS + [
|
|
37
|
-
# We do 5 retries
|
|
38
|
-
(
|
|
39
|
-
"GET",
|
|
40
|
-
f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
|
|
41
|
-
),
|
|
42
|
-
(
|
|
43
|
-
"GET",
|
|
44
|
-
f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
|
|
45
|
-
),
|
|
46
|
-
(
|
|
47
|
-
"GET",
|
|
48
|
-
f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
|
|
49
|
-
),
|
|
50
|
-
(
|
|
51
|
-
"GET",
|
|
52
|
-
f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
|
|
53
|
-
),
|
|
54
|
-
(
|
|
55
|
-
"GET",
|
|
56
|
-
f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
|
|
57
|
-
),
|
|
58
|
-
]
|
|
59
|
-
assert not mock_elements_worker.classes
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def test_load_corpus_classes(responses, mock_elements_worker):
|
|
63
|
-
responses.add(
|
|
64
|
-
responses.GET,
|
|
65
|
-
f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
|
|
66
|
-
status=200,
|
|
67
|
-
json={
|
|
68
|
-
"count": 3,
|
|
69
|
-
"next": None,
|
|
70
|
-
"results": [
|
|
71
|
-
{
|
|
72
|
-
"id": "0000",
|
|
73
|
-
"name": "good",
|
|
74
|
-
},
|
|
75
|
-
{
|
|
76
|
-
"id": "1111",
|
|
77
|
-
"name": "average",
|
|
78
|
-
},
|
|
79
|
-
{
|
|
80
|
-
"id": "2222",
|
|
81
|
-
"name": "bad",
|
|
82
|
-
},
|
|
83
|
-
],
|
|
84
|
-
},
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
assert not mock_elements_worker.classes
|
|
88
|
-
mock_elements_worker.load_corpus_classes()
|
|
89
|
-
|
|
90
|
-
assert len(responses.calls) == len(BASE_API_CALLS) + 1
|
|
91
|
-
assert [
|
|
92
|
-
(call.request.method, call.request.url) for call in responses.calls
|
|
93
|
-
] == BASE_API_CALLS + [
|
|
94
|
-
(
|
|
95
|
-
"GET",
|
|
96
|
-
f"http://testserver/api/v1/corpus/{CORPUS_ID}/classes/",
|
|
97
|
-
),
|
|
98
|
-
]
|
|
99
|
-
assert mock_elements_worker.classes == {
|
|
100
|
-
"good": "0000",
|
|
101
|
-
"average": "1111",
|
|
102
|
-
"bad": "2222",
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
|
|
106
19
|
def test_get_ml_class_id_load_classes(responses, mock_elements_worker):
|
|
107
20
|
responses.add(
|
|
108
21
|
responses.GET,
|
|
@@ -779,8 +692,7 @@ def test_create_classifications_create_ml_class(mock_elements_worker, responses)
|
|
|
779
692
|
}
|
|
780
693
|
|
|
781
694
|
|
|
782
|
-
|
|
783
|
-
def test_create_classifications(batch_size, responses, mock_elements_worker):
|
|
695
|
+
def test_create_classifications(responses, mock_elements_worker):
|
|
784
696
|
mock_elements_worker.classes = {"portrait": "0000", "landscape": "1111"}
|
|
785
697
|
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
786
698
|
responses.add(
|
|
@@ -804,98 +716,62 @@ def test_create_classifications(batch_size, responses, mock_elements_worker):
|
|
|
804
716
|
"high_confidence": False,
|
|
805
717
|
},
|
|
806
718
|
],
|
|
807
|
-
batch_size=batch_size,
|
|
808
719
|
)
|
|
809
720
|
|
|
810
|
-
|
|
811
|
-
if batch_size != DEFAULT_BATCH_SIZE:
|
|
812
|
-
bulk_api_calls.append(("POST", "http://testserver/api/v1/classification/bulk/"))
|
|
813
|
-
|
|
814
|
-
assert len(responses.calls) == len(BASE_API_CALLS) + len(bulk_api_calls)
|
|
721
|
+
assert len(responses.calls) == len(BASE_API_CALLS) + 1
|
|
815
722
|
assert [
|
|
816
723
|
(call.request.method, call.request.url) for call in responses.calls
|
|
817
|
-
] == BASE_API_CALLS +
|
|
724
|
+
] == BASE_API_CALLS + [
|
|
725
|
+
("POST", "http://testserver/api/v1/classification/bulk/"),
|
|
726
|
+
]
|
|
818
727
|
|
|
819
|
-
|
|
820
|
-
second_cl = {"confidence": 0.25, "high_confidence": False, "ml_class": "1111"}
|
|
821
|
-
empty_payload = {
|
|
728
|
+
assert json.loads(responses.calls[-1].request.body) == {
|
|
822
729
|
"parent": str(elt.id),
|
|
823
730
|
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
824
|
-
"classifications": [
|
|
731
|
+
"classifications": [
|
|
732
|
+
{
|
|
733
|
+
"confidence": 0.75,
|
|
734
|
+
"high_confidence": False,
|
|
735
|
+
"ml_class": "0000",
|
|
736
|
+
},
|
|
737
|
+
{
|
|
738
|
+
"confidence": 0.25,
|
|
739
|
+
"high_confidence": False,
|
|
740
|
+
"ml_class": "1111",
|
|
741
|
+
},
|
|
742
|
+
],
|
|
825
743
|
}
|
|
826
744
|
|
|
827
|
-
bodies = []
|
|
828
|
-
first_call_idx = None
|
|
829
|
-
if batch_size > 1:
|
|
830
|
-
first_call_idx = -1
|
|
831
|
-
bodies.append({**empty_payload, "classifications": [first_cl, second_cl]})
|
|
832
|
-
else:
|
|
833
|
-
first_call_idx = -2
|
|
834
|
-
bodies.append({**empty_payload, "classifications": [first_cl]})
|
|
835
|
-
bodies.append({**empty_payload, "classifications": [second_cl]})
|
|
836
|
-
|
|
837
|
-
assert [
|
|
838
|
-
json.loads(bulk_call.request.body)
|
|
839
|
-
for bulk_call in responses.calls[first_call_idx:]
|
|
840
|
-
] == bodies
|
|
841
|
-
|
|
842
745
|
|
|
843
|
-
|
|
844
|
-
def test_create_classifications_with_cache(
|
|
845
|
-
batch_size, responses, mock_elements_worker_with_cache
|
|
846
|
-
):
|
|
746
|
+
def test_create_classifications_with_cache(responses, mock_elements_worker_with_cache):
|
|
847
747
|
mock_elements_worker_with_cache.classes = {"portrait": "0000", "landscape": "1111"}
|
|
848
748
|
elt = CachedElement.create(id="12341234-1234-1234-1234-123412341234", type="thing")
|
|
849
749
|
|
|
850
|
-
|
|
851
|
-
responses.
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
"state": "pending",
|
|
872
|
-
},
|
|
873
|
-
],
|
|
874
|
-
},
|
|
875
|
-
)
|
|
876
|
-
else:
|
|
877
|
-
for cl_id, cl_class, cl_conf in [
|
|
878
|
-
("00000000-0000-0000-0000-000000000000", "0000", 0.75),
|
|
879
|
-
("11111111-1111-1111-1111-111111111111", "1111", 0.25),
|
|
880
|
-
]:
|
|
881
|
-
responses.add(
|
|
882
|
-
responses.POST,
|
|
883
|
-
"http://testserver/api/v1/classification/bulk/",
|
|
884
|
-
status=200,
|
|
885
|
-
json={
|
|
886
|
-
"parent": str(elt.id),
|
|
887
|
-
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
888
|
-
"classifications": [
|
|
889
|
-
{
|
|
890
|
-
"id": cl_id,
|
|
891
|
-
"ml_class": cl_class,
|
|
892
|
-
"confidence": cl_conf,
|
|
893
|
-
"high_confidence": False,
|
|
894
|
-
"state": "pending",
|
|
895
|
-
},
|
|
896
|
-
],
|
|
750
|
+
responses.add(
|
|
751
|
+
responses.POST,
|
|
752
|
+
"http://testserver/api/v1/classification/bulk/",
|
|
753
|
+
status=200,
|
|
754
|
+
json={
|
|
755
|
+
"parent": str(elt.id),
|
|
756
|
+
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
757
|
+
"classifications": [
|
|
758
|
+
{
|
|
759
|
+
"id": "00000000-0000-0000-0000-000000000000",
|
|
760
|
+
"ml_class": "0000",
|
|
761
|
+
"confidence": 0.75,
|
|
762
|
+
"high_confidence": False,
|
|
763
|
+
"state": "pending",
|
|
764
|
+
},
|
|
765
|
+
{
|
|
766
|
+
"id": "11111111-1111-1111-1111-111111111111",
|
|
767
|
+
"ml_class": "1111",
|
|
768
|
+
"confidence": 0.25,
|
|
769
|
+
"high_confidence": False,
|
|
770
|
+
"state": "pending",
|
|
897
771
|
},
|
|
898
|
-
|
|
772
|
+
],
|
|
773
|
+
},
|
|
774
|
+
)
|
|
899
775
|
|
|
900
776
|
mock_elements_worker_with_cache.create_classifications(
|
|
901
777
|
element=elt,
|
|
@@ -911,41 +787,32 @@ def test_create_classifications_with_cache(
|
|
|
911
787
|
"high_confidence": False,
|
|
912
788
|
},
|
|
913
789
|
],
|
|
914
|
-
batch_size=batch_size,
|
|
915
790
|
)
|
|
916
791
|
|
|
917
|
-
|
|
918
|
-
if batch_size != DEFAULT_BATCH_SIZE:
|
|
919
|
-
bulk_api_calls.append(("POST", "http://testserver/api/v1/classification/bulk/"))
|
|
920
|
-
|
|
921
|
-
assert len(responses.calls) == len(BASE_API_CALLS) + len(bulk_api_calls)
|
|
792
|
+
assert len(responses.calls) == len(BASE_API_CALLS) + 1
|
|
922
793
|
assert [
|
|
923
794
|
(call.request.method, call.request.url) for call in responses.calls
|
|
924
|
-
] == BASE_API_CALLS +
|
|
795
|
+
] == BASE_API_CALLS + [
|
|
796
|
+
("POST", "http://testserver/api/v1/classification/bulk/"),
|
|
797
|
+
]
|
|
925
798
|
|
|
926
|
-
|
|
927
|
-
second_cl = {"confidence": 0.25, "high_confidence": False, "ml_class": "1111"}
|
|
928
|
-
empty_payload = {
|
|
799
|
+
assert json.loads(responses.calls[-1].request.body) == {
|
|
929
800
|
"parent": str(elt.id),
|
|
930
801
|
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
931
|
-
"classifications": [
|
|
802
|
+
"classifications": [
|
|
803
|
+
{
|
|
804
|
+
"confidence": 0.75,
|
|
805
|
+
"high_confidence": False,
|
|
806
|
+
"ml_class": "0000",
|
|
807
|
+
},
|
|
808
|
+
{
|
|
809
|
+
"confidence": 0.25,
|
|
810
|
+
"high_confidence": False,
|
|
811
|
+
"ml_class": "1111",
|
|
812
|
+
},
|
|
813
|
+
],
|
|
932
814
|
}
|
|
933
815
|
|
|
934
|
-
bodies = []
|
|
935
|
-
first_call_idx = None
|
|
936
|
-
if batch_size > 1:
|
|
937
|
-
first_call_idx = -1
|
|
938
|
-
bodies.append({**empty_payload, "classifications": [first_cl, second_cl]})
|
|
939
|
-
else:
|
|
940
|
-
first_call_idx = -2
|
|
941
|
-
bodies.append({**empty_payload, "classifications": [first_cl]})
|
|
942
|
-
bodies.append({**empty_payload, "classifications": [second_cl]})
|
|
943
|
-
|
|
944
|
-
assert [
|
|
945
|
-
json.loads(bulk_call.request.body)
|
|
946
|
-
for bulk_call in responses.calls[first_call_idx:]
|
|
947
|
-
] == bodies
|
|
948
|
-
|
|
949
816
|
# Check that created classifications were properly stored in SQLite cache
|
|
950
817
|
assert list(CachedClassification.select()) == [
|
|
951
818
|
CachedClassification(
|
|
@@ -2,44 +2,13 @@ import re
|
|
|
2
2
|
import uuid
|
|
3
3
|
|
|
4
4
|
import pytest
|
|
5
|
+
from apistar.exceptions import ErrorResponse
|
|
5
6
|
|
|
6
|
-
from arkindex.exceptions import ErrorResponse
|
|
7
7
|
from arkindex_worker.worker.corpus import CorpusExportState
|
|
8
8
|
from tests import CORPUS_ID
|
|
9
9
|
from tests.test_elements_worker import BASE_API_CALLS
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
def test_download_export_not_a_uuid(responses, mock_elements_worker):
|
|
13
|
-
with pytest.raises(ValueError, match="export_id is not a valid uuid."):
|
|
14
|
-
mock_elements_worker.download_export("mon export")
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def test_download_export(responses, mock_elements_worker):
|
|
18
|
-
responses.add(
|
|
19
|
-
responses.GET,
|
|
20
|
-
"http://testserver/api/v1/export/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff/",
|
|
21
|
-
status=302,
|
|
22
|
-
body=b"some SQLite export",
|
|
23
|
-
content_type="application/x-sqlite3",
|
|
24
|
-
stream=True,
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
export = mock_elements_worker.download_export(
|
|
28
|
-
"aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff"
|
|
29
|
-
)
|
|
30
|
-
assert export.name == "/tmp/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff"
|
|
31
|
-
|
|
32
|
-
assert len(responses.calls) == len(BASE_API_CALLS) + 1
|
|
33
|
-
assert [
|
|
34
|
-
(call.request.method, call.request.url) for call in responses.calls
|
|
35
|
-
] == BASE_API_CALLS + [
|
|
36
|
-
(
|
|
37
|
-
"GET",
|
|
38
|
-
"http://testserver/api/v1/export/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeffff/",
|
|
39
|
-
),
|
|
40
|
-
]
|
|
41
|
-
|
|
42
|
-
|
|
43
12
|
def mock_list_exports_call(responses, export_id):
|
|
44
13
|
responses.add(
|
|
45
14
|
responses.GET,
|
|
@@ -2,8 +2,8 @@ import json
|
|
|
2
2
|
import logging
|
|
3
3
|
|
|
4
4
|
import pytest
|
|
5
|
+
from apistar.exceptions import ErrorResponse
|
|
5
6
|
|
|
6
|
-
from arkindex.exceptions import ErrorResponse
|
|
7
7
|
from arkindex_worker.models import Dataset, Element, Set
|
|
8
8
|
from arkindex_worker.worker.dataset import DatasetState
|
|
9
9
|
from tests import PROCESS_ID
|