arkindex-base-worker 0.4.0rc3__py3-none-any.whl → 0.4.0rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {arkindex_base_worker-0.4.0rc3.dist-info → arkindex_base_worker-0.4.0rc5.dist-info}/METADATA +15 -14
  2. {arkindex_base_worker-0.4.0rc3.dist-info → arkindex_base_worker-0.4.0rc5.dist-info}/RECORD +24 -16
  3. {arkindex_base_worker-0.4.0rc3.dist-info → arkindex_base_worker-0.4.0rc5.dist-info}/WHEEL +1 -1
  4. arkindex_worker/cache.py +1 -1
  5. arkindex_worker/worker/__init__.py +6 -2
  6. arkindex_worker/worker/entity.py +8 -19
  7. arkindex_worker/worker/process.py +5 -0
  8. tests/test_elements_worker/{test_classifications.py → test_classification.py} +86 -0
  9. tests/test_elements_worker/test_corpus.py +31 -31
  10. tests/test_elements_worker/test_element.py +427 -0
  11. tests/test_elements_worker/test_element_create_multiple.py +715 -0
  12. tests/test_elements_worker/test_element_create_single.py +528 -0
  13. tests/test_elements_worker/test_element_list_children.py +969 -0
  14. tests/test_elements_worker/test_element_list_parents.py +530 -0
  15. tests/test_elements_worker/{test_entities.py → test_entity_create.py} +42 -245
  16. tests/test_elements_worker/test_entity_list_and_check.py +160 -0
  17. tests/test_elements_worker/test_transcription_create.py +873 -0
  18. tests/test_elements_worker/test_transcription_create_with_elements.py +951 -0
  19. tests/test_elements_worker/test_transcription_list.py +450 -0
  20. tests/test_elements_worker/test_version.py +60 -0
  21. tests/test_elements_worker/test_worker.py +525 -88
  22. tests/test_image.py +181 -198
  23. tests/test_elements_worker/test_elements.py +0 -3704
  24. tests/test_elements_worker/test_transcriptions.py +0 -2252
  25. {arkindex_base_worker-0.4.0rc3.dist-info → arkindex_base_worker-0.4.0rc5.dist-info}/LICENSE +0 -0
  26. {arkindex_base_worker-0.4.0rc3.dist-info → arkindex_base_worker-0.4.0rc5.dist-info}/top_level.txt +0 -0
@@ -13,8 +13,6 @@ from arkindex_worker.cache import (
13
13
  CachedTranscriptionEntity,
14
14
  )
15
15
  from arkindex_worker.models import Transcription
16
- from arkindex_worker.utils import DEFAULT_BATCH_SIZE
17
- from arkindex_worker.worker.entity import MissingEntityType
18
16
  from arkindex_worker.worker.transcription import TextOrientation
19
17
  from tests import CORPUS_ID
20
18
 
@@ -686,158 +684,6 @@ def test_create_transcription_entity_with_confidence_with_cache(
686
684
  ]
687
685
 
688
686
 
689
- def test_list_transcription_entities_deprecation(fake_dummy_worker):
690
- transcription = Transcription({"id": "fake_transcription_id"})
691
- worker_version = "worker_version_id"
692
- fake_dummy_worker.api_client.add_response(
693
- "ListTranscriptionEntities",
694
- id=transcription.id,
695
- worker_version=worker_version,
696
- response={"id": "entity_id"},
697
- )
698
- with pytest.deprecated_call(
699
- match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
700
- ):
701
- assert fake_dummy_worker.list_transcription_entities(
702
- transcription, worker_version=worker_version
703
- ) == {"id": "entity_id"}
704
-
705
- assert len(fake_dummy_worker.api_client.history) == 1
706
- assert len(fake_dummy_worker.api_client.responses) == 0
707
-
708
-
709
- def test_list_transcription_entities(fake_dummy_worker):
710
- transcription = Transcription({"id": "fake_transcription_id"})
711
- worker_run = "worker_run_id"
712
- fake_dummy_worker.api_client.add_response(
713
- "ListTranscriptionEntities",
714
- id=transcription.id,
715
- worker_run=worker_run,
716
- response={"id": "entity_id"},
717
- )
718
- assert fake_dummy_worker.list_transcription_entities(
719
- transcription, worker_run=worker_run
720
- ) == {"id": "entity_id"}
721
-
722
- assert len(fake_dummy_worker.api_client.history) == 1
723
- assert len(fake_dummy_worker.api_client.responses) == 0
724
-
725
-
726
- def test_list_corpus_entities(responses, mock_elements_worker):
727
- responses.add(
728
- responses.GET,
729
- f"http://testserver/api/v1/corpus/{CORPUS_ID}/entities/",
730
- json={
731
- "count": 1,
732
- "next": None,
733
- "results": [
734
- {
735
- "id": "fake_entity_id",
736
- }
737
- ],
738
- },
739
- )
740
-
741
- mock_elements_worker.list_corpus_entities()
742
-
743
- assert mock_elements_worker.entities == {
744
- "fake_entity_id": {
745
- "id": "fake_entity_id",
746
- }
747
- }
748
-
749
- assert len(responses.calls) == len(BASE_API_CALLS) + 1
750
- assert [
751
- (call.request.method, call.request.url) for call in responses.calls
752
- ] == BASE_API_CALLS + [
753
- (
754
- "GET",
755
- f"http://testserver/api/v1/corpus/{CORPUS_ID}/entities/",
756
- ),
757
- ]
758
-
759
-
760
- @pytest.mark.parametrize("wrong_name", [1234, 12.5])
761
- def test_list_corpus_entities_wrong_name(mock_elements_worker, wrong_name):
762
- with pytest.raises(AssertionError, match="name should be of type str"):
763
- mock_elements_worker.list_corpus_entities(name=wrong_name)
764
-
765
-
766
- @pytest.mark.parametrize("wrong_parent", [{"id": "element_id"}, 12.5, "blabla"])
767
- def test_list_corpus_entities_wrong_parent(mock_elements_worker, wrong_parent):
768
- with pytest.raises(AssertionError, match="parent should be of type Element"):
769
- mock_elements_worker.list_corpus_entities(parent=wrong_parent)
770
-
771
-
772
- def test_check_required_entity_types(responses, mock_elements_worker):
773
- # Set one entity type
774
- mock_elements_worker.entity_types = {"person": "person-entity-type-id"}
775
-
776
- checked_types = ["person", "new-entity"]
777
-
778
- # Call to create new entity type
779
- responses.add(
780
- responses.POST,
781
- "http://testserver/api/v1/entity/types/",
782
- status=200,
783
- match=[
784
- matchers.json_params_matcher(
785
- {
786
- "name": "new-entity",
787
- "corpus": CORPUS_ID,
788
- }
789
- )
790
- ],
791
- json={
792
- "id": "new-entity-id",
793
- "corpus": CORPUS_ID,
794
- "name": "new-entity",
795
- "color": "ffd1b3",
796
- },
797
- )
798
-
799
- mock_elements_worker.check_required_entity_types(
800
- entity_types=checked_types,
801
- )
802
-
803
- # Make sure the entity_types entry has been updated
804
- assert mock_elements_worker.entity_types == {
805
- "person": "person-entity-type-id",
806
- "new-entity": "new-entity-id",
807
- }
808
-
809
- assert len(responses.calls) == len(BASE_API_CALLS) + 1
810
- assert [
811
- (call.request.method, call.request.url) for call in responses.calls
812
- ] == BASE_API_CALLS + [
813
- (
814
- "POST",
815
- "http://testserver/api/v1/entity/types/",
816
- ),
817
- ]
818
-
819
-
820
- def test_check_required_entity_types_no_creation_allowed(
821
- responses, mock_elements_worker
822
- ):
823
- # Set one entity type
824
- mock_elements_worker.entity_types = {"person": "person-entity-type-id"}
825
-
826
- checked_types = ["person", "new-entity"]
827
-
828
- with pytest.raises(
829
- MissingEntityType, match="Entity type `new-entity` was not in the corpus."
830
- ):
831
- mock_elements_worker.check_required_entity_types(
832
- entity_types=checked_types, create_missing=False
833
- )
834
-
835
- assert len(responses.calls) == len(BASE_API_CALLS)
836
- assert [
837
- (call.request.method, call.request.url) for call in responses.calls
838
- ] == BASE_API_CALLS
839
-
840
-
841
687
  @pytest.mark.parametrize("transcription", [None, "not a transcription", 1])
842
688
  def test_create_transcription_entities_wrong_transcription(
843
689
  mock_elements_worker, transcription
@@ -989,89 +835,50 @@ def test_create_transcription_entities_wrong_entity(
989
835
  )
990
836
 
991
837
 
992
- @pytest.mark.parametrize("batch_size", [DEFAULT_BATCH_SIZE, 1])
993
- def test_create_transcription_entities(batch_size, responses, mock_elements_worker):
838
+ def test_create_transcription_entities(responses, mock_elements_worker):
994
839
  transcription = Transcription(id="transcription-id")
995
840
 
996
841
  # Call to Transcription entities creation in bulk
997
- if batch_size > 1:
998
- responses.add(
999
- responses.POST,
1000
- "http://testserver/api/v1/transcription/transcription-id/entities/bulk/",
1001
- status=201,
1002
- match=[
1003
- matchers.json_params_matcher(
1004
- {
1005
- "worker_run_id": "56785678-5678-5678-5678-567856785678",
1006
- "entities": [
1007
- {
1008
- "name": "Teklia",
1009
- "type_id": "22222222-2222-2222-2222-222222222222",
1010
- "offset": 0,
1011
- "length": 6,
1012
- "confidence": 1.0,
1013
- },
1014
- {
1015
- "name": "Team Rocket",
1016
- "type_id": "22222222-2222-2222-2222-222222222222",
1017
- "offset": 7,
1018
- "length": 11,
1019
- "confidence": 1.0,
1020
- },
1021
- ],
1022
- }
1023
- )
1024
- ],
1025
- json={
1026
- "entities": [
1027
- {
1028
- "transcription_entity_id": "transc-entity-id",
1029
- "entity_id": "entity-id1",
1030
- },
1031
- {
1032
- "transcription_entity_id": "transc-entity-id",
1033
- "entity_id": "entity-id2",
1034
- },
1035
- ]
1036
- },
1037
- )
1038
- else:
1039
- for idx, (name, offset, length) in enumerate(
1040
- [
1041
- ("Teklia", 0, 6),
1042
- ("Team Rocket", 7, 11),
1043
- ],
1044
- start=1,
1045
- ):
1046
- responses.add(
1047
- responses.POST,
1048
- "http://testserver/api/v1/transcription/transcription-id/entities/bulk/",
1049
- status=201,
1050
- match=[
1051
- matchers.json_params_matcher(
1052
- {
1053
- "worker_run_id": "56785678-5678-5678-5678-567856785678",
1054
- "entities": [
1055
- {
1056
- "name": name,
1057
- "type_id": "22222222-2222-2222-2222-222222222222",
1058
- "offset": offset,
1059
- "length": length,
1060
- "confidence": 1.0,
1061
- }
1062
- ],
1063
- }
1064
- )
1065
- ],
1066
- json={
842
+ responses.add(
843
+ responses.POST,
844
+ "http://testserver/api/v1/transcription/transcription-id/entities/bulk/",
845
+ status=201,
846
+ match=[
847
+ matchers.json_params_matcher(
848
+ {
849
+ "worker_run_id": "56785678-5678-5678-5678-567856785678",
1067
850
  "entities": [
1068
851
  {
1069
- "transcription_entity_id": "transc-entity-id",
1070
- "entity_id": f"entity-id{idx}",
1071
- }
1072
- ]
1073
- },
852
+ "name": "Teklia",
853
+ "type_id": "22222222-2222-2222-2222-222222222222",
854
+ "offset": 0,
855
+ "length": 6,
856
+ "confidence": 1.0,
857
+ },
858
+ {
859
+ "name": "Team Rocket",
860
+ "type_id": "22222222-2222-2222-2222-222222222222",
861
+ "offset": 7,
862
+ "length": 11,
863
+ "confidence": 1.0,
864
+ },
865
+ ],
866
+ }
1074
867
  )
868
+ ],
869
+ json={
870
+ "entities": [
871
+ {
872
+ "transcription_entity_id": "transc-entity-id",
873
+ "entity_id": "entity-id1",
874
+ },
875
+ {
876
+ "transcription_entity_id": "transc-entity-id",
877
+ "entity_id": "entity-id2",
878
+ },
879
+ ]
880
+ },
881
+ )
1075
882
 
1076
883
  # Store entity type/slug correspondence on the worker
1077
884
  mock_elements_worker.entity_types = {
@@ -1095,26 +902,16 @@ def test_create_transcription_entities(batch_size, responses, mock_elements_work
1095
902
  "confidence": 1.0,
1096
903
  },
1097
904
  ],
1098
- batch_size=batch_size,
1099
905
  )
1100
906
 
1101
907
  assert len(created_objects) == 2
1102
908
 
1103
- bulk_api_calls = [
909
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
910
+ assert [
911
+ (call.request.method, call.request.url) for call in responses.calls
912
+ ] == BASE_API_CALLS + [
1104
913
  (
1105
914
  "POST",
1106
915
  "http://testserver/api/v1/transcription/transcription-id/entities/bulk/",
1107
916
  )
1108
917
  ]
1109
- if batch_size != DEFAULT_BATCH_SIZE:
1110
- bulk_api_calls.append(
1111
- (
1112
- "POST",
1113
- "http://testserver/api/v1/transcription/transcription-id/entities/bulk/",
1114
- )
1115
- )
1116
-
1117
- assert len(responses.calls) == len(BASE_API_CALLS) + len(bulk_api_calls)
1118
- assert [
1119
- (call.request.method, call.request.url) for call in responses.calls
1120
- ] == BASE_API_CALLS + bulk_api_calls
@@ -0,0 +1,160 @@
1
+ import pytest
2
+ from responses import matchers
3
+
4
+ from arkindex_worker.models import Transcription
5
+ from arkindex_worker.worker.entity import MissingEntityType
6
+ from tests import CORPUS_ID
7
+
8
+ from . import BASE_API_CALLS
9
+
10
+
11
+ def test_check_required_entity_types(responses, mock_elements_worker):
12
+ # Set one entity type
13
+ mock_elements_worker.entity_types = {"person": "person-entity-type-id"}
14
+
15
+ checked_types = ["person", "new-entity"]
16
+
17
+ # Call to create new entity type
18
+ responses.add(
19
+ responses.POST,
20
+ "http://testserver/api/v1/entity/types/",
21
+ status=200,
22
+ match=[
23
+ matchers.json_params_matcher(
24
+ {
25
+ "name": "new-entity",
26
+ "corpus": CORPUS_ID,
27
+ }
28
+ )
29
+ ],
30
+ json={
31
+ "id": "new-entity-id",
32
+ "corpus": CORPUS_ID,
33
+ "name": "new-entity",
34
+ "color": "ffd1b3",
35
+ },
36
+ )
37
+
38
+ mock_elements_worker.check_required_entity_types(
39
+ entity_types=checked_types,
40
+ )
41
+
42
+ # Make sure the entity_types entry has been updated
43
+ assert mock_elements_worker.entity_types == {
44
+ "person": "person-entity-type-id",
45
+ "new-entity": "new-entity-id",
46
+ }
47
+
48
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
49
+ assert [
50
+ (call.request.method, call.request.url) for call in responses.calls
51
+ ] == BASE_API_CALLS + [
52
+ (
53
+ "POST",
54
+ "http://testserver/api/v1/entity/types/",
55
+ ),
56
+ ]
57
+
58
+
59
+ def test_check_required_entity_types_no_creation_allowed(
60
+ responses, mock_elements_worker
61
+ ):
62
+ # Set one entity type
63
+ mock_elements_worker.entity_types = {"person": "person-entity-type-id"}
64
+
65
+ checked_types = ["person", "new-entity"]
66
+
67
+ with pytest.raises(
68
+ MissingEntityType, match="Entity type `new-entity` was not in the corpus."
69
+ ):
70
+ mock_elements_worker.check_required_entity_types(
71
+ entity_types=checked_types, create_missing=False
72
+ )
73
+
74
+ assert len(responses.calls) == len(BASE_API_CALLS)
75
+ assert [
76
+ (call.request.method, call.request.url) for call in responses.calls
77
+ ] == BASE_API_CALLS
78
+
79
+
80
+ def test_list_transcription_entities_deprecation(fake_dummy_worker):
81
+ transcription = Transcription({"id": "fake_transcription_id"})
82
+ worker_version = "worker_version_id"
83
+ fake_dummy_worker.api_client.add_response(
84
+ "ListTranscriptionEntities",
85
+ id=transcription.id,
86
+ worker_version=worker_version,
87
+ response={"id": "entity_id"},
88
+ )
89
+ with pytest.deprecated_call(
90
+ match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
91
+ ):
92
+ assert fake_dummy_worker.list_transcription_entities(
93
+ transcription, worker_version=worker_version
94
+ ) == {"id": "entity_id"}
95
+
96
+ assert len(fake_dummy_worker.api_client.history) == 1
97
+ assert len(fake_dummy_worker.api_client.responses) == 0
98
+
99
+
100
+ def test_list_transcription_entities(fake_dummy_worker):
101
+ transcription = Transcription({"id": "fake_transcription_id"})
102
+ worker_run = "worker_run_id"
103
+ fake_dummy_worker.api_client.add_response(
104
+ "ListTranscriptionEntities",
105
+ id=transcription.id,
106
+ worker_run=worker_run,
107
+ response={"id": "entity_id"},
108
+ )
109
+ assert fake_dummy_worker.list_transcription_entities(
110
+ transcription, worker_run=worker_run
111
+ ) == {"id": "entity_id"}
112
+
113
+ assert len(fake_dummy_worker.api_client.history) == 1
114
+ assert len(fake_dummy_worker.api_client.responses) == 0
115
+
116
+
117
+ def test_list_corpus_entities(responses, mock_elements_worker):
118
+ responses.add(
119
+ responses.GET,
120
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/entities/",
121
+ json={
122
+ "count": 1,
123
+ "next": None,
124
+ "results": [
125
+ {
126
+ "id": "fake_entity_id",
127
+ }
128
+ ],
129
+ },
130
+ )
131
+
132
+ mock_elements_worker.list_corpus_entities()
133
+
134
+ assert mock_elements_worker.entities == {
135
+ "fake_entity_id": {
136
+ "id": "fake_entity_id",
137
+ }
138
+ }
139
+
140
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
141
+ assert [
142
+ (call.request.method, call.request.url) for call in responses.calls
143
+ ] == BASE_API_CALLS + [
144
+ (
145
+ "GET",
146
+ f"http://testserver/api/v1/corpus/{CORPUS_ID}/entities/",
147
+ ),
148
+ ]
149
+
150
+
151
+ @pytest.mark.parametrize("wrong_name", [1234, 12.5])
152
+ def test_list_corpus_entities_wrong_name(mock_elements_worker, wrong_name):
153
+ with pytest.raises(AssertionError, match="name should be of type str"):
154
+ mock_elements_worker.list_corpus_entities(name=wrong_name)
155
+
156
+
157
+ @pytest.mark.parametrize("wrong_parent", [{"id": "element_id"}, 12.5, "blabla"])
158
+ def test_list_corpus_entities_wrong_parent(mock_elements_worker, wrong_parent):
159
+ with pytest.raises(AssertionError, match="parent should be of type Element"):
160
+ mock_elements_worker.list_corpus_entities(parent=wrong_parent)