arkindex-base-worker 0.5.0b2__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex_base_worker-0.5.0b2.dist-info → arkindex_base_worker-0.5.0rc1.dist-info}/METADATA +1 -1
- {arkindex_base_worker-0.5.0b2.dist-info → arkindex_base_worker-0.5.0rc1.dist-info}/RECORD +16 -17
- {arkindex_base_worker-0.5.0b2.dist-info → arkindex_base_worker-0.5.0rc1.dist-info}/WHEEL +1 -1
- arkindex_worker/cache.py +3 -22
- arkindex_worker/image.py +4 -2
- arkindex_worker/worker/base.py +3 -3
- arkindex_worker/worker/entity.py +17 -126
- arkindex_worker/worker/metadata.py +0 -11
- tests/conftest.py +0 -14
- tests/test_base_worker.py +1 -1
- tests/test_cache.py +1 -2
- tests/test_elements_worker/{test_entity_create.py → test_entity.py} +220 -227
- tests/test_elements_worker/test_metadata.py +0 -47
- tests/test_merge.py +0 -7
- tests/test_elements_worker/test_entity_list_and_check.py +0 -293
- {arkindex_base_worker-0.5.0b2.dist-info → arkindex_base_worker-0.5.0rc1.dist-info}/licenses/LICENSE +0 -0
- {arkindex_base_worker-0.5.0b2.dist-info → arkindex_base_worker-0.5.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -8,216 +8,216 @@ from responses import matchers
|
|
|
8
8
|
from arkindex.exceptions import ErrorResponse
|
|
9
9
|
from arkindex_worker.cache import (
|
|
10
10
|
CachedElement,
|
|
11
|
-
CachedEntity,
|
|
12
11
|
CachedTranscription,
|
|
13
12
|
CachedTranscriptionEntity,
|
|
14
13
|
)
|
|
15
14
|
from arkindex_worker.models import Transcription
|
|
15
|
+
from arkindex_worker.worker.entity import MissingEntityType
|
|
16
16
|
from arkindex_worker.worker.transcription import TextOrientation
|
|
17
17
|
from tests import CORPUS_ID
|
|
18
18
|
|
|
19
19
|
from . import BASE_API_CALLS
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
def
|
|
22
|
+
def test_create_entity_type_wrong_name(mock_elements_worker):
|
|
23
23
|
with pytest.raises(
|
|
24
24
|
AssertionError, match="name shouldn't be null and should be of type str"
|
|
25
25
|
):
|
|
26
|
-
mock_elements_worker.
|
|
27
|
-
name=None,
|
|
28
|
-
type="person",
|
|
29
|
-
)
|
|
26
|
+
mock_elements_worker.create_entity_type(name=None)
|
|
30
27
|
|
|
31
28
|
with pytest.raises(
|
|
32
29
|
AssertionError, match="name shouldn't be null and should be of type str"
|
|
33
30
|
):
|
|
34
|
-
mock_elements_worker.
|
|
35
|
-
name=1234,
|
|
36
|
-
type="person",
|
|
37
|
-
)
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def test_create_entity_wrong_type(mock_elements_worker):
|
|
41
|
-
with pytest.raises(
|
|
42
|
-
AssertionError, match="type shouldn't be null and should be of type str"
|
|
43
|
-
):
|
|
44
|
-
mock_elements_worker.create_entity(
|
|
45
|
-
name="Bob Bob",
|
|
46
|
-
type=None,
|
|
47
|
-
)
|
|
48
|
-
|
|
49
|
-
with pytest.raises(
|
|
50
|
-
AssertionError, match="type shouldn't be null and should be of type str"
|
|
51
|
-
):
|
|
52
|
-
mock_elements_worker.create_entity(
|
|
53
|
-
name="Bob Bob",
|
|
54
|
-
type=1234,
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
def test_create_entity_wrong_corpus(mock_elements_worker):
|
|
59
|
-
# Triggering an error on metas param, not giving corpus should work since
|
|
60
|
-
# ARKINDEX_CORPUS_ID environment variable is set on mock_elements_worker
|
|
61
|
-
with pytest.raises(AssertionError, match="metas should be of type dict"):
|
|
62
|
-
mock_elements_worker.create_entity(
|
|
63
|
-
name="Bob Bob",
|
|
64
|
-
type="person",
|
|
65
|
-
metas="wrong metas",
|
|
66
|
-
)
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
def test_create_entity_wrong_metas(mock_elements_worker):
|
|
70
|
-
with pytest.raises(AssertionError, match="metas should be of type dict"):
|
|
71
|
-
mock_elements_worker.create_entity(
|
|
72
|
-
name="Bob Bob",
|
|
73
|
-
type="person",
|
|
74
|
-
metas="wrong metas",
|
|
75
|
-
)
|
|
31
|
+
mock_elements_worker.create_entity_type(name=1234)
|
|
76
32
|
|
|
77
33
|
|
|
78
|
-
def
|
|
79
|
-
with pytest.raises(AssertionError, match="validated should be of type bool"):
|
|
80
|
-
mock_elements_worker.create_entity(
|
|
81
|
-
name="Bob Bob",
|
|
82
|
-
type="person",
|
|
83
|
-
validated="wrong validated",
|
|
84
|
-
)
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
def test_create_entity_api_error(responses, mock_elements_worker):
|
|
88
|
-
# Set one entity type
|
|
89
|
-
mock_elements_worker.entity_types = {"person": "person-entity-type-id"}
|
|
34
|
+
def test_create_entity_type_api_error(responses, mock_elements_worker):
|
|
90
35
|
responses.add(
|
|
91
36
|
responses.POST,
|
|
92
|
-
"http://testserver/api/v1/entity/",
|
|
37
|
+
"http://testserver/api/v1/entity/types/",
|
|
93
38
|
status=418,
|
|
94
39
|
)
|
|
95
40
|
|
|
96
41
|
with pytest.raises(ErrorResponse):
|
|
97
|
-
mock_elements_worker.
|
|
98
|
-
name="Bob Bob",
|
|
99
|
-
type="person",
|
|
100
|
-
)
|
|
42
|
+
mock_elements_worker.create_entity_type(name="firstname")
|
|
101
43
|
|
|
102
44
|
assert len(responses.calls) == len(BASE_API_CALLS) + 1
|
|
103
45
|
assert [
|
|
104
46
|
(call.request.method, call.request.url) for call in responses.calls
|
|
105
|
-
] == BASE_API_CALLS + [("POST", "http://testserver/api/v1/entity/")]
|
|
47
|
+
] == BASE_API_CALLS + [("POST", "http://testserver/api/v1/entity/types/")]
|
|
106
48
|
|
|
107
49
|
|
|
108
|
-
def
|
|
109
|
-
|
|
110
|
-
mock_elements_worker.entity_types = {"person": "person-entity-type-id"}
|
|
50
|
+
def test_create_entity_type_already_exists(responses, mock_elements_worker):
|
|
51
|
+
assert mock_elements_worker.entity_types == {}
|
|
111
52
|
|
|
112
53
|
responses.add(
|
|
113
54
|
responses.POST,
|
|
114
|
-
"http://testserver/api/v1/entity/",
|
|
55
|
+
"http://testserver/api/v1/entity/types/",
|
|
56
|
+
status=400,
|
|
57
|
+
match=[
|
|
58
|
+
matchers.json_params_matcher({"name": "firstname", "corpus": CORPUS_ID})
|
|
59
|
+
],
|
|
60
|
+
)
|
|
61
|
+
responses.add(
|
|
62
|
+
responses.GET,
|
|
63
|
+
f"http://testserver/api/v1/corpus/{CORPUS_ID}/entity-types/",
|
|
115
64
|
status=200,
|
|
116
|
-
json={
|
|
65
|
+
json={
|
|
66
|
+
"count": 1,
|
|
67
|
+
"next": None,
|
|
68
|
+
"results": [
|
|
69
|
+
{"id": "lastname-id", "name": "lastname", "color": "ffd1b3"},
|
|
70
|
+
{"id": "firstname-id", "name": "firstname", "color": "ffd1b3"},
|
|
71
|
+
],
|
|
72
|
+
},
|
|
117
73
|
)
|
|
118
74
|
|
|
119
|
-
|
|
120
|
-
name="Bob Bob",
|
|
121
|
-
type="person",
|
|
122
|
-
)
|
|
75
|
+
mock_elements_worker.create_entity_type(name="firstname")
|
|
123
76
|
|
|
124
|
-
assert len(responses.calls) == len(BASE_API_CALLS) +
|
|
77
|
+
assert len(responses.calls) == len(BASE_API_CALLS) + 2
|
|
125
78
|
assert [
|
|
126
79
|
(call.request.method, call.request.url) for call in responses.calls
|
|
127
80
|
] == BASE_API_CALLS + [
|
|
128
|
-
("POST", "http://testserver/api/v1/entity/"),
|
|
81
|
+
("POST", "http://testserver/api/v1/entity/types/"),
|
|
82
|
+
("GET", f"http://testserver/api/v1/corpus/{CORPUS_ID}/entity-types/"),
|
|
129
83
|
]
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
"
|
|
134
|
-
"
|
|
135
|
-
"corpus": CORPUS_ID,
|
|
136
|
-
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
84
|
+
|
|
85
|
+
# Make sure the entity_types attribute has been updated
|
|
86
|
+
assert mock_elements_worker.entity_types == {
|
|
87
|
+
"lastname": "lastname-id",
|
|
88
|
+
"firstname": "firstname-id",
|
|
137
89
|
}
|
|
138
|
-
assert entity_id == "12345678-1234-1234-1234-123456789123"
|
|
139
90
|
|
|
140
91
|
|
|
141
|
-
def
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
"""
|
|
145
|
-
# Call to list entity types
|
|
92
|
+
def test_create_entity_type(responses, mock_elements_worker):
|
|
93
|
+
assert mock_elements_worker.entity_types == {}
|
|
94
|
+
|
|
146
95
|
responses.add(
|
|
147
|
-
responses.
|
|
148
|
-
|
|
96
|
+
responses.POST,
|
|
97
|
+
"http://testserver/api/v1/entity/types/",
|
|
149
98
|
status=200,
|
|
99
|
+
match=[
|
|
100
|
+
matchers.json_params_matcher({"name": "firstname", "corpus": CORPUS_ID})
|
|
101
|
+
],
|
|
150
102
|
json={
|
|
151
|
-
"
|
|
152
|
-
"
|
|
153
|
-
"
|
|
154
|
-
|
|
155
|
-
],
|
|
103
|
+
"id": "firstname-id",
|
|
104
|
+
"name": "firstname",
|
|
105
|
+
"corpus": CORPUS_ID,
|
|
106
|
+
"color": "ffd1b3",
|
|
156
107
|
},
|
|
157
108
|
)
|
|
158
109
|
|
|
159
|
-
|
|
160
|
-
AssertionError, match="Entity type `new-entity` not found in the corpus."
|
|
161
|
-
):
|
|
162
|
-
mock_elements_worker.create_entity(
|
|
163
|
-
name="Bob Bob",
|
|
164
|
-
type="new-entity",
|
|
165
|
-
)
|
|
110
|
+
mock_elements_worker.create_entity_type(name="firstname")
|
|
166
111
|
|
|
167
112
|
assert len(responses.calls) == len(BASE_API_CALLS) + 1
|
|
168
113
|
assert [
|
|
169
114
|
(call.request.method, call.request.url) for call in responses.calls
|
|
170
115
|
] == BASE_API_CALLS + [
|
|
171
|
-
(
|
|
172
|
-
"GET",
|
|
173
|
-
f"http://testserver/api/v1/corpus/{CORPUS_ID}/entity-types/",
|
|
174
|
-
),
|
|
116
|
+
("POST", "http://testserver/api/v1/entity/types/"),
|
|
175
117
|
]
|
|
176
118
|
|
|
119
|
+
# Make sure the entity_types attribute has been updated
|
|
120
|
+
assert mock_elements_worker.entity_types == {"firstname": "firstname-id"}
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def test_check_required_entity_types_wrong_entity_types(mock_elements_worker):
|
|
124
|
+
with pytest.raises(
|
|
125
|
+
AssertionError,
|
|
126
|
+
match="entity_types shouldn't be null and should be of type list",
|
|
127
|
+
):
|
|
128
|
+
mock_elements_worker.check_required_entity_types(entity_types=None)
|
|
129
|
+
|
|
130
|
+
with pytest.raises(
|
|
131
|
+
AssertionError,
|
|
132
|
+
match="entity_types shouldn't be null and should be of type list",
|
|
133
|
+
):
|
|
134
|
+
mock_elements_worker.check_required_entity_types(entity_types=1234)
|
|
135
|
+
|
|
136
|
+
with pytest.raises(
|
|
137
|
+
AssertionError,
|
|
138
|
+
match="Entity type at index 1 in entity_types: Should be of type str",
|
|
139
|
+
):
|
|
140
|
+
mock_elements_worker.check_required_entity_types(
|
|
141
|
+
entity_types=["firstname", 1234]
|
|
142
|
+
)
|
|
143
|
+
|
|
177
144
|
|
|
178
|
-
def
|
|
145
|
+
def test_check_required_entity_types_wrong_create_missing(mock_elements_worker):
|
|
146
|
+
with pytest.raises(
|
|
147
|
+
AssertionError,
|
|
148
|
+
match="create_missing shouldn't be null and should be of type bool",
|
|
149
|
+
):
|
|
150
|
+
mock_elements_worker.check_required_entity_types(
|
|
151
|
+
entity_types=["firstname"], create_missing=None
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
with pytest.raises(
|
|
155
|
+
AssertionError,
|
|
156
|
+
match="create_missing shouldn't be null and should be of type bool",
|
|
157
|
+
):
|
|
158
|
+
mock_elements_worker.check_required_entity_types(
|
|
159
|
+
entity_types=["firstname"], create_missing=1234
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def test_check_required_entity_types_do_not_create_missing(
|
|
164
|
+
responses, mock_elements_worker
|
|
165
|
+
):
|
|
179
166
|
# Set one entity type
|
|
180
|
-
|
|
167
|
+
mock_elements_worker.entity_types = {"lastname": "lastname-id"}
|
|
168
|
+
|
|
169
|
+
with pytest.raises(
|
|
170
|
+
MissingEntityType, match="Entity type `firstname` was not in the corpus."
|
|
171
|
+
):
|
|
172
|
+
mock_elements_worker.check_required_entity_types(
|
|
173
|
+
entity_types=["lastname", "firstname"], create_missing=False
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
assert len(responses.calls) == len(BASE_API_CALLS)
|
|
177
|
+
assert [
|
|
178
|
+
(call.request.method, call.request.url) for call in responses.calls
|
|
179
|
+
] == BASE_API_CALLS
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def test_check_required_entity_types(responses, mock_elements_worker):
|
|
183
|
+
# Set one entity type
|
|
184
|
+
mock_elements_worker.entity_types = {"lastname": "lastname-id"}
|
|
185
|
+
|
|
186
|
+
# Call to create a new entity type
|
|
181
187
|
responses.add(
|
|
182
188
|
responses.POST,
|
|
183
|
-
"http://testserver/api/v1/entity/",
|
|
189
|
+
"http://testserver/api/v1/entity/types/",
|
|
184
190
|
status=200,
|
|
185
|
-
|
|
191
|
+
match=[
|
|
192
|
+
matchers.json_params_matcher({"name": "firstname", "corpus": CORPUS_ID})
|
|
193
|
+
],
|
|
194
|
+
json={
|
|
195
|
+
"id": "firstname-id",
|
|
196
|
+
"name": "firstname",
|
|
197
|
+
"corpus": CORPUS_ID,
|
|
198
|
+
"color": "ffd1b3",
|
|
199
|
+
},
|
|
186
200
|
)
|
|
187
201
|
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
type="person",
|
|
202
|
+
mock_elements_worker.check_required_entity_types(
|
|
203
|
+
entity_types=["lastname", "firstname"], create_missing=True
|
|
191
204
|
)
|
|
192
205
|
|
|
193
206
|
assert len(responses.calls) == len(BASE_API_CALLS) + 1
|
|
194
207
|
assert [
|
|
195
208
|
(call.request.method, call.request.url) for call in responses.calls
|
|
196
209
|
] == BASE_API_CALLS + [
|
|
197
|
-
(
|
|
210
|
+
(
|
|
211
|
+
"POST",
|
|
212
|
+
"http://testserver/api/v1/entity/types/",
|
|
213
|
+
),
|
|
198
214
|
]
|
|
199
215
|
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
"
|
|
203
|
-
"
|
|
204
|
-
"validated": None,
|
|
205
|
-
"corpus": CORPUS_ID,
|
|
206
|
-
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
216
|
+
# Make sure the entity_types attribute has been updated
|
|
217
|
+
assert mock_elements_worker.entity_types == {
|
|
218
|
+
"lastname": "lastname-id",
|
|
219
|
+
"firstname": "firstname-id",
|
|
207
220
|
}
|
|
208
|
-
assert entity_id == "12345678-1234-1234-1234-123456789123"
|
|
209
|
-
|
|
210
|
-
# Check that created entity was properly stored in SQLite cache
|
|
211
|
-
assert list(CachedEntity.select()) == [
|
|
212
|
-
CachedEntity(
|
|
213
|
-
id=UUID("12345678-1234-1234-1234-123456789123"),
|
|
214
|
-
type="person",
|
|
215
|
-
name="Bob Bob",
|
|
216
|
-
validated=False,
|
|
217
|
-
metas={},
|
|
218
|
-
worker_run_id=UUID("56785678-5678-5678-5678-567856785678"),
|
|
219
|
-
)
|
|
220
|
-
]
|
|
221
221
|
|
|
222
222
|
|
|
223
223
|
def test_create_transcription_entity_wrong_transcription(mock_elements_worker):
|
|
@@ -227,7 +227,7 @@ def test_create_transcription_entity_wrong_transcription(mock_elements_worker):
|
|
|
227
227
|
):
|
|
228
228
|
mock_elements_worker.create_transcription_entity(
|
|
229
229
|
transcription=None,
|
|
230
|
-
|
|
230
|
+
type_id="11111111-1111-1111-1111-111111111111",
|
|
231
231
|
offset=5,
|
|
232
232
|
length=10,
|
|
233
233
|
)
|
|
@@ -238,15 +238,15 @@ def test_create_transcription_entity_wrong_transcription(mock_elements_worker):
|
|
|
238
238
|
):
|
|
239
239
|
mock_elements_worker.create_transcription_entity(
|
|
240
240
|
transcription=1234,
|
|
241
|
-
|
|
241
|
+
type_id="11111111-1111-1111-1111-111111111111",
|
|
242
242
|
offset=5,
|
|
243
243
|
length=10,
|
|
244
244
|
)
|
|
245
245
|
|
|
246
246
|
|
|
247
|
-
def
|
|
247
|
+
def test_create_transcription_entity_wrong_type_id(mock_elements_worker):
|
|
248
248
|
with pytest.raises(
|
|
249
|
-
AssertionError, match="
|
|
249
|
+
AssertionError, match="type_id shouldn't be null and should be of type str"
|
|
250
250
|
):
|
|
251
251
|
mock_elements_worker.create_transcription_entity(
|
|
252
252
|
transcription=Transcription(
|
|
@@ -255,13 +255,13 @@ def test_create_transcription_entity_wrong_entity(mock_elements_worker):
|
|
|
255
255
|
"element": {"id": "myelement"},
|
|
256
256
|
}
|
|
257
257
|
),
|
|
258
|
-
|
|
258
|
+
type_id=None,
|
|
259
259
|
offset=5,
|
|
260
260
|
length=10,
|
|
261
261
|
)
|
|
262
262
|
|
|
263
263
|
with pytest.raises(
|
|
264
|
-
AssertionError, match="
|
|
264
|
+
AssertionError, match="type_id shouldn't be null and should be of type str"
|
|
265
265
|
):
|
|
266
266
|
mock_elements_worker.create_transcription_entity(
|
|
267
267
|
transcription=Transcription(
|
|
@@ -270,7 +270,7 @@ def test_create_transcription_entity_wrong_entity(mock_elements_worker):
|
|
|
270
270
|
"element": {"id": "myelement"},
|
|
271
271
|
}
|
|
272
272
|
),
|
|
273
|
-
|
|
273
|
+
type_id=1234,
|
|
274
274
|
offset=5,
|
|
275
275
|
length=10,
|
|
276
276
|
)
|
|
@@ -288,7 +288,7 @@ def test_create_transcription_entity_wrong_offset(mock_elements_worker):
|
|
|
288
288
|
"element": {"id": "myelement"},
|
|
289
289
|
}
|
|
290
290
|
),
|
|
291
|
-
|
|
291
|
+
type_id="11111111-1111-1111-1111-111111111111",
|
|
292
292
|
offset=None,
|
|
293
293
|
length=10,
|
|
294
294
|
)
|
|
@@ -304,7 +304,7 @@ def test_create_transcription_entity_wrong_offset(mock_elements_worker):
|
|
|
304
304
|
"element": {"id": "myelement"},
|
|
305
305
|
}
|
|
306
306
|
),
|
|
307
|
-
|
|
307
|
+
type_id="11111111-1111-1111-1111-111111111111",
|
|
308
308
|
offset="not an int",
|
|
309
309
|
length=10,
|
|
310
310
|
)
|
|
@@ -320,7 +320,7 @@ def test_create_transcription_entity_wrong_offset(mock_elements_worker):
|
|
|
320
320
|
"element": {"id": "myelement"},
|
|
321
321
|
}
|
|
322
322
|
),
|
|
323
|
-
|
|
323
|
+
type_id="11111111-1111-1111-1111-111111111111",
|
|
324
324
|
offset=-1,
|
|
325
325
|
length=10,
|
|
326
326
|
)
|
|
@@ -338,7 +338,7 @@ def test_create_transcription_entity_wrong_length(mock_elements_worker):
|
|
|
338
338
|
"element": {"id": "myelement"},
|
|
339
339
|
}
|
|
340
340
|
),
|
|
341
|
-
|
|
341
|
+
type_id="11111111-1111-1111-1111-111111111111",
|
|
342
342
|
offset=5,
|
|
343
343
|
length=None,
|
|
344
344
|
)
|
|
@@ -354,7 +354,7 @@ def test_create_transcription_entity_wrong_length(mock_elements_worker):
|
|
|
354
354
|
"element": {"id": "myelement"},
|
|
355
355
|
}
|
|
356
356
|
),
|
|
357
|
-
|
|
357
|
+
type_id="11111111-1111-1111-1111-111111111111",
|
|
358
358
|
offset=5,
|
|
359
359
|
length="not an int",
|
|
360
360
|
)
|
|
@@ -370,7 +370,7 @@ def test_create_transcription_entity_wrong_length(mock_elements_worker):
|
|
|
370
370
|
"element": {"id": "myelement"},
|
|
371
371
|
}
|
|
372
372
|
),
|
|
373
|
-
|
|
373
|
+
type_id="11111111-1111-1111-1111-111111111111",
|
|
374
374
|
offset=5,
|
|
375
375
|
length=0,
|
|
376
376
|
)
|
|
@@ -379,7 +379,7 @@ def test_create_transcription_entity_wrong_length(mock_elements_worker):
|
|
|
379
379
|
def test_create_transcription_entity_api_error(responses, mock_elements_worker):
|
|
380
380
|
responses.add(
|
|
381
381
|
responses.POST,
|
|
382
|
-
"http://testserver/api/v1/transcription/11111111-1111-1111-1111-111111111111/
|
|
382
|
+
"http://testserver/api/v1/transcription/11111111-1111-1111-1111-111111111111/entities/",
|
|
383
383
|
status=418,
|
|
384
384
|
)
|
|
385
385
|
|
|
@@ -391,7 +391,7 @@ def test_create_transcription_entity_api_error(responses, mock_elements_worker):
|
|
|
391
391
|
"element": {"id": "myelement"},
|
|
392
392
|
}
|
|
393
393
|
),
|
|
394
|
-
|
|
394
|
+
type_id="11111111-1111-1111-1111-111111111111",
|
|
395
395
|
offset=5,
|
|
396
396
|
length=10,
|
|
397
397
|
)
|
|
@@ -402,7 +402,7 @@ def test_create_transcription_entity_api_error(responses, mock_elements_worker):
|
|
|
402
402
|
] == BASE_API_CALLS + [
|
|
403
403
|
(
|
|
404
404
|
"POST",
|
|
405
|
-
"http://testserver/api/v1/transcription/11111111-1111-1111-1111-111111111111/
|
|
405
|
+
"http://testserver/api/v1/transcription/11111111-1111-1111-1111-111111111111/entities/",
|
|
406
406
|
)
|
|
407
407
|
]
|
|
408
408
|
|
|
@@ -410,10 +410,10 @@ def test_create_transcription_entity_api_error(responses, mock_elements_worker):
|
|
|
410
410
|
def test_create_transcription_entity_no_confidence(responses, mock_elements_worker):
|
|
411
411
|
responses.add(
|
|
412
412
|
responses.POST,
|
|
413
|
-
"http://testserver/api/v1/transcription/11111111-1111-1111-1111-111111111111/
|
|
413
|
+
"http://testserver/api/v1/transcription/11111111-1111-1111-1111-111111111111/entities/",
|
|
414
414
|
status=200,
|
|
415
415
|
json={
|
|
416
|
-
"
|
|
416
|
+
"type": {"id": "11111111-1111-1111-1111-111111111111"},
|
|
417
417
|
"offset": 5,
|
|
418
418
|
"length": 10,
|
|
419
419
|
},
|
|
@@ -426,7 +426,7 @@ def test_create_transcription_entity_no_confidence(responses, mock_elements_work
|
|
|
426
426
|
"element": {"id": "myelement"},
|
|
427
427
|
}
|
|
428
428
|
),
|
|
429
|
-
|
|
429
|
+
type_id="11111111-1111-1111-1111-111111111111",
|
|
430
430
|
offset=5,
|
|
431
431
|
length=10,
|
|
432
432
|
)
|
|
@@ -437,11 +437,11 @@ def test_create_transcription_entity_no_confidence(responses, mock_elements_work
|
|
|
437
437
|
] == BASE_API_CALLS + [
|
|
438
438
|
(
|
|
439
439
|
"POST",
|
|
440
|
-
"http://testserver/api/v1/transcription/11111111-1111-1111-1111-111111111111/
|
|
440
|
+
"http://testserver/api/v1/transcription/11111111-1111-1111-1111-111111111111/entities/",
|
|
441
441
|
),
|
|
442
442
|
]
|
|
443
443
|
assert json.loads(responses.calls[-1].request.body) == {
|
|
444
|
-
"
|
|
444
|
+
"type_id": "11111111-1111-1111-1111-111111111111",
|
|
445
445
|
"offset": 5,
|
|
446
446
|
"length": 10,
|
|
447
447
|
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
@@ -451,10 +451,10 @@ def test_create_transcription_entity_no_confidence(responses, mock_elements_work
|
|
|
451
451
|
def test_create_transcription_entity_with_confidence(responses, mock_elements_worker):
|
|
452
452
|
responses.add(
|
|
453
453
|
responses.POST,
|
|
454
|
-
"http://testserver/api/v1/transcription/11111111-1111-1111-1111-111111111111/
|
|
454
|
+
"http://testserver/api/v1/transcription/11111111-1111-1111-1111-111111111111/entities/",
|
|
455
455
|
status=200,
|
|
456
456
|
json={
|
|
457
|
-
"
|
|
457
|
+
"type": {"id": "11111111-1111-1111-1111-111111111111"},
|
|
458
458
|
"offset": 5,
|
|
459
459
|
"length": 10,
|
|
460
460
|
"confidence": 0.33,
|
|
@@ -468,7 +468,7 @@ def test_create_transcription_entity_with_confidence(responses, mock_elements_wo
|
|
|
468
468
|
"element": {"id": "myelement"},
|
|
469
469
|
}
|
|
470
470
|
),
|
|
471
|
-
|
|
471
|
+
type_id="11111111-1111-1111-1111-111111111111",
|
|
472
472
|
offset=5,
|
|
473
473
|
length=10,
|
|
474
474
|
confidence=0.33,
|
|
@@ -480,11 +480,11 @@ def test_create_transcription_entity_with_confidence(responses, mock_elements_wo
|
|
|
480
480
|
] == BASE_API_CALLS + [
|
|
481
481
|
(
|
|
482
482
|
"POST",
|
|
483
|
-
"http://testserver/api/v1/transcription/11111111-1111-1111-1111-111111111111/
|
|
483
|
+
"http://testserver/api/v1/transcription/11111111-1111-1111-1111-111111111111/entities/",
|
|
484
484
|
),
|
|
485
485
|
]
|
|
486
486
|
assert json.loads(responses.calls[-1].request.body) == {
|
|
487
|
-
"
|
|
487
|
+
"type_id": "11111111-1111-1111-1111-111111111111",
|
|
488
488
|
"offset": 5,
|
|
489
489
|
"length": 10,
|
|
490
490
|
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
@@ -495,10 +495,10 @@ def test_create_transcription_entity_with_confidence(responses, mock_elements_wo
|
|
|
495
495
|
def test_create_transcription_entity_confidence_none(responses, mock_elements_worker):
|
|
496
496
|
responses.add(
|
|
497
497
|
responses.POST,
|
|
498
|
-
"http://testserver/api/v1/transcription/11111111-1111-1111-1111-111111111111/
|
|
498
|
+
"http://testserver/api/v1/transcription/11111111-1111-1111-1111-111111111111/entities/",
|
|
499
499
|
status=200,
|
|
500
500
|
json={
|
|
501
|
-
"
|
|
501
|
+
"type": {"id": "11111111-1111-1111-1111-111111111111"},
|
|
502
502
|
"offset": 5,
|
|
503
503
|
"length": 10,
|
|
504
504
|
"confidence": None,
|
|
@@ -512,7 +512,7 @@ def test_create_transcription_entity_confidence_none(responses, mock_elements_wo
|
|
|
512
512
|
"element": {"id": "myelement"},
|
|
513
513
|
}
|
|
514
514
|
),
|
|
515
|
-
|
|
515
|
+
type_id="11111111-1111-1111-1111-111111111111",
|
|
516
516
|
offset=5,
|
|
517
517
|
length=10,
|
|
518
518
|
confidence=None,
|
|
@@ -524,11 +524,11 @@ def test_create_transcription_entity_confidence_none(responses, mock_elements_wo
|
|
|
524
524
|
] == BASE_API_CALLS + [
|
|
525
525
|
(
|
|
526
526
|
"POST",
|
|
527
|
-
"http://testserver/api/v1/transcription/11111111-1111-1111-1111-111111111111/
|
|
527
|
+
"http://testserver/api/v1/transcription/11111111-1111-1111-1111-111111111111/entities/",
|
|
528
528
|
),
|
|
529
529
|
]
|
|
530
530
|
assert json.loads(responses.calls[-1].request.body) == {
|
|
531
|
-
"
|
|
531
|
+
"type_id": "11111111-1111-1111-1111-111111111111",
|
|
532
532
|
"offset": 5,
|
|
533
533
|
"length": 10,
|
|
534
534
|
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
@@ -550,19 +550,13 @@ def test_create_transcription_entity_with_cache(
|
|
|
550
550
|
orientation=TextOrientation.HorizontalLeftToRight,
|
|
551
551
|
worker_run_id=UUID("56785678-5678-5678-5678-567856785678"),
|
|
552
552
|
)
|
|
553
|
-
CachedEntity.create(
|
|
554
|
-
id=UUID("11111111-1111-1111-1111-111111111111"),
|
|
555
|
-
type="person",
|
|
556
|
-
name="Bob Bob",
|
|
557
|
-
worker_run_id=UUID("56785678-5678-5678-5678-567856785678"),
|
|
558
|
-
)
|
|
559
553
|
|
|
560
554
|
responses.add(
|
|
561
555
|
responses.POST,
|
|
562
|
-
"http://testserver/api/v1/transcription/11111111-1111-1111-1111-111111111111/
|
|
556
|
+
"http://testserver/api/v1/transcription/11111111-1111-1111-1111-111111111111/entities/",
|
|
563
557
|
status=200,
|
|
564
558
|
json={
|
|
565
|
-
"
|
|
559
|
+
"type": {"id": "11111111-1111-1111-1111-111111111111", "name": "Whatever"},
|
|
566
560
|
"offset": 5,
|
|
567
561
|
"length": 10,
|
|
568
562
|
},
|
|
@@ -575,7 +569,7 @@ def test_create_transcription_entity_with_cache(
|
|
|
575
569
|
"element": {"id": "myelement"},
|
|
576
570
|
}
|
|
577
571
|
),
|
|
578
|
-
|
|
572
|
+
type_id="11111111-1111-1111-1111-111111111111",
|
|
579
573
|
offset=5,
|
|
580
574
|
length=10,
|
|
581
575
|
)
|
|
@@ -586,11 +580,11 @@ def test_create_transcription_entity_with_cache(
|
|
|
586
580
|
] == BASE_API_CALLS + [
|
|
587
581
|
(
|
|
588
582
|
"POST",
|
|
589
|
-
"http://testserver/api/v1/transcription/11111111-1111-1111-1111-111111111111/
|
|
583
|
+
"http://testserver/api/v1/transcription/11111111-1111-1111-1111-111111111111/entities/",
|
|
590
584
|
),
|
|
591
585
|
]
|
|
592
586
|
assert json.loads(responses.calls[-1].request.body) == {
|
|
593
|
-
"
|
|
587
|
+
"type_id": "11111111-1111-1111-1111-111111111111",
|
|
594
588
|
"offset": 5,
|
|
595
589
|
"length": 10,
|
|
596
590
|
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
@@ -599,7 +593,7 @@ def test_create_transcription_entity_with_cache(
|
|
|
599
593
|
assert list(CachedTranscriptionEntity.select()) == [
|
|
600
594
|
CachedTranscriptionEntity(
|
|
601
595
|
transcription=UUID("11111111-1111-1111-1111-111111111111"),
|
|
602
|
-
|
|
596
|
+
type="Whatever",
|
|
603
597
|
offset=5,
|
|
604
598
|
length=10,
|
|
605
599
|
worker_run_id=UUID("56785678-5678-5678-5678-567856785678"),
|
|
@@ -622,19 +616,13 @@ def test_create_transcription_entity_with_confidence_with_cache(
|
|
|
622
616
|
orientation=TextOrientation.HorizontalLeftToRight,
|
|
623
617
|
worker_run_id=UUID("56785678-5678-5678-5678-567856785678"),
|
|
624
618
|
)
|
|
625
|
-
CachedEntity.create(
|
|
626
|
-
id=UUID("11111111-1111-1111-1111-111111111111"),
|
|
627
|
-
type="person",
|
|
628
|
-
name="Bob Bob",
|
|
629
|
-
worker_run_id=UUID("56785678-5678-5678-5678-567856785678"),
|
|
630
|
-
)
|
|
631
619
|
|
|
632
620
|
responses.add(
|
|
633
621
|
responses.POST,
|
|
634
|
-
"http://testserver/api/v1/transcription/11111111-1111-1111-1111-111111111111/
|
|
622
|
+
"http://testserver/api/v1/transcription/11111111-1111-1111-1111-111111111111/entities/",
|
|
635
623
|
status=200,
|
|
636
624
|
json={
|
|
637
|
-
"
|
|
625
|
+
"type": {"id": "11111111-1111-1111-1111-111111111111", "name": "Whatever"},
|
|
638
626
|
"offset": 5,
|
|
639
627
|
"length": 10,
|
|
640
628
|
"confidence": 0.77,
|
|
@@ -648,7 +636,7 @@ def test_create_transcription_entity_with_confidence_with_cache(
|
|
|
648
636
|
"element": {"id": "myelement"},
|
|
649
637
|
}
|
|
650
638
|
),
|
|
651
|
-
|
|
639
|
+
type_id="11111111-1111-1111-1111-111111111111",
|
|
652
640
|
offset=5,
|
|
653
641
|
length=10,
|
|
654
642
|
confidence=0.77,
|
|
@@ -660,11 +648,11 @@ def test_create_transcription_entity_with_confidence_with_cache(
|
|
|
660
648
|
] == BASE_API_CALLS + [
|
|
661
649
|
(
|
|
662
650
|
"POST",
|
|
663
|
-
"http://testserver/api/v1/transcription/11111111-1111-1111-1111-111111111111/
|
|
651
|
+
"http://testserver/api/v1/transcription/11111111-1111-1111-1111-111111111111/entities/",
|
|
664
652
|
),
|
|
665
653
|
]
|
|
666
654
|
assert json.loads(responses.calls[-1].request.body) == {
|
|
667
|
-
"
|
|
655
|
+
"type_id": "11111111-1111-1111-1111-111111111111",
|
|
668
656
|
"offset": 5,
|
|
669
657
|
"length": 10,
|
|
670
658
|
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
@@ -675,7 +663,7 @@ def test_create_transcription_entity_with_confidence_with_cache(
|
|
|
675
663
|
assert list(CachedTranscriptionEntity.select()) == [
|
|
676
664
|
CachedTranscriptionEntity(
|
|
677
665
|
transcription=UUID("11111111-1111-1111-1111-111111111111"),
|
|
678
|
-
|
|
666
|
+
type="Whatever",
|
|
679
667
|
offset=5,
|
|
680
668
|
length=10,
|
|
681
669
|
worker_run_id=UUID("56785678-5678-5678-5678-567856785678"),
|
|
@@ -710,7 +698,6 @@ def test_create_transcription_entities_wrong_transcription(
|
|
|
710
698
|
(
|
|
711
699
|
[
|
|
712
700
|
{
|
|
713
|
-
"name": "A",
|
|
714
701
|
"type_id": "12341234-1234-1234-1234-123412341234",
|
|
715
702
|
"offset": 0,
|
|
716
703
|
"length": 1,
|
|
@@ -746,26 +733,15 @@ def test_create_transcription_entities_wrong_entities_subtype(mock_elements_work
|
|
|
746
733
|
("entity", "error"),
|
|
747
734
|
[
|
|
748
735
|
(
|
|
749
|
-
{
|
|
750
|
-
"name": None,
|
|
751
|
-
"type_id": "12341234-1234-1234-1234-123412341234",
|
|
752
|
-
"offset": 0,
|
|
753
|
-
"length": 1,
|
|
754
|
-
"confidence": 0.5,
|
|
755
|
-
},
|
|
756
|
-
"Entity at index 0 in entities: name shouldn't be null and should be of type str",
|
|
757
|
-
),
|
|
758
|
-
(
|
|
759
|
-
{"name": "A", "type_id": None, "offset": 0, "length": 1, "confidence": 0.5},
|
|
736
|
+
{"type_id": None, "offset": 0, "length": 1, "confidence": 0.5},
|
|
760
737
|
"Entity at index 0 in entities: type_id shouldn't be null and should be of type str",
|
|
761
738
|
),
|
|
762
739
|
(
|
|
763
|
-
{"
|
|
740
|
+
{"type_id": 0, "offset": 0, "length": 1, "confidence": 0.5},
|
|
764
741
|
"Entity at index 0 in entities: type_id shouldn't be null and should be of type str",
|
|
765
742
|
),
|
|
766
743
|
(
|
|
767
744
|
{
|
|
768
|
-
"name": "A",
|
|
769
745
|
"type_id": "12341234-1234-1234-1234-123412341234",
|
|
770
746
|
"offset": None,
|
|
771
747
|
"length": 1,
|
|
@@ -775,7 +751,6 @@ def test_create_transcription_entities_wrong_entities_subtype(mock_elements_work
|
|
|
775
751
|
),
|
|
776
752
|
(
|
|
777
753
|
{
|
|
778
|
-
"name": "A",
|
|
779
754
|
"type_id": "12341234-1234-1234-1234-123412341234",
|
|
780
755
|
"offset": -2,
|
|
781
756
|
"length": 1,
|
|
@@ -785,7 +760,6 @@ def test_create_transcription_entities_wrong_entities_subtype(mock_elements_work
|
|
|
785
760
|
),
|
|
786
761
|
(
|
|
787
762
|
{
|
|
788
|
-
"name": "A",
|
|
789
763
|
"type_id": "12341234-1234-1234-1234-123412341234",
|
|
790
764
|
"offset": 0,
|
|
791
765
|
"length": None,
|
|
@@ -795,7 +769,6 @@ def test_create_transcription_entities_wrong_entities_subtype(mock_elements_work
|
|
|
795
769
|
),
|
|
796
770
|
(
|
|
797
771
|
{
|
|
798
|
-
"name": "A",
|
|
799
772
|
"type_id": "12341234-1234-1234-1234-123412341234",
|
|
800
773
|
"offset": 0,
|
|
801
774
|
"length": 0,
|
|
@@ -805,7 +778,6 @@ def test_create_transcription_entities_wrong_entities_subtype(mock_elements_work
|
|
|
805
778
|
),
|
|
806
779
|
(
|
|
807
780
|
{
|
|
808
|
-
"name": "A",
|
|
809
781
|
"type_id": "12341234-1234-1234-1234-123412341234",
|
|
810
782
|
"offset": 0,
|
|
811
783
|
"length": 1,
|
|
@@ -815,7 +787,6 @@ def test_create_transcription_entities_wrong_entities_subtype(mock_elements_work
|
|
|
815
787
|
),
|
|
816
788
|
(
|
|
817
789
|
{
|
|
818
|
-
"name": "A",
|
|
819
790
|
"type_id": "12341234-1234-1234-1234-123412341234",
|
|
820
791
|
"offset": 0,
|
|
821
792
|
"length": 1,
|
|
@@ -847,16 +818,14 @@ def test_create_transcription_entities(responses, mock_elements_worker):
|
|
|
847
818
|
matchers.json_params_matcher(
|
|
848
819
|
{
|
|
849
820
|
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
850
|
-
"
|
|
821
|
+
"transcription_entities": [
|
|
851
822
|
{
|
|
852
|
-
"name": "Teklia",
|
|
853
823
|
"type_id": "22222222-2222-2222-2222-222222222222",
|
|
854
824
|
"offset": 0,
|
|
855
825
|
"length": 6,
|
|
856
826
|
"confidence": 1.0,
|
|
857
827
|
},
|
|
858
828
|
{
|
|
859
|
-
"name": "Team Rocket",
|
|
860
829
|
"type_id": "22222222-2222-2222-2222-222222222222",
|
|
861
830
|
"offset": 7,
|
|
862
831
|
"length": 11,
|
|
@@ -866,18 +835,7 @@ def test_create_transcription_entities(responses, mock_elements_worker):
|
|
|
866
835
|
}
|
|
867
836
|
)
|
|
868
837
|
],
|
|
869
|
-
json={
|
|
870
|
-
"entities": [
|
|
871
|
-
{
|
|
872
|
-
"transcription_entity_id": "transc-entity-id",
|
|
873
|
-
"entity_id": "entity-id1",
|
|
874
|
-
},
|
|
875
|
-
{
|
|
876
|
-
"transcription_entity_id": "transc-entity-id",
|
|
877
|
-
"entity_id": "entity-id2",
|
|
878
|
-
},
|
|
879
|
-
]
|
|
880
|
-
},
|
|
838
|
+
json={"transcription_entities": ["transc-entity-id", "transc-entity-id"]},
|
|
881
839
|
)
|
|
882
840
|
|
|
883
841
|
# Store entity type/slug correspondence on the worker
|
|
@@ -888,14 +846,12 @@ def test_create_transcription_entities(responses, mock_elements_worker):
|
|
|
888
846
|
transcription=transcription,
|
|
889
847
|
entities=[
|
|
890
848
|
{
|
|
891
|
-
"name": "Teklia",
|
|
892
849
|
"type_id": "22222222-2222-2222-2222-222222222222",
|
|
893
850
|
"offset": 0,
|
|
894
851
|
"length": 6,
|
|
895
852
|
"confidence": 1.0,
|
|
896
853
|
},
|
|
897
854
|
{
|
|
898
|
-
"name": "Team Rocket",
|
|
899
855
|
"type_id": "22222222-2222-2222-2222-222222222222",
|
|
900
856
|
"offset": 7,
|
|
901
857
|
"length": 11,
|
|
@@ -915,3 +871,40 @@ def test_create_transcription_entities(responses, mock_elements_worker):
|
|
|
915
871
|
"http://testserver/api/v1/transcription/transcription-id/entities/bulk/",
|
|
916
872
|
)
|
|
917
873
|
]
|
|
874
|
+
|
|
875
|
+
|
|
876
|
+
def test_list_transcription_entities_deprecation(fake_dummy_worker):
|
|
877
|
+
transcription = Transcription({"id": "fake_transcription_id"})
|
|
878
|
+
worker_version = "worker_version_id"
|
|
879
|
+
fake_dummy_worker.api_client.add_response(
|
|
880
|
+
"ListTranscriptionEntities",
|
|
881
|
+
id=transcription.id,
|
|
882
|
+
worker_version=worker_version,
|
|
883
|
+
response={"id": "entity_id"},
|
|
884
|
+
)
|
|
885
|
+
with pytest.deprecated_call(
|
|
886
|
+
match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
|
|
887
|
+
):
|
|
888
|
+
assert fake_dummy_worker.list_transcription_entities(
|
|
889
|
+
transcription, worker_version=worker_version
|
|
890
|
+
) == {"id": "entity_id"}
|
|
891
|
+
|
|
892
|
+
assert len(fake_dummy_worker.api_client.history) == 1
|
|
893
|
+
assert len(fake_dummy_worker.api_client.responses) == 0
|
|
894
|
+
|
|
895
|
+
|
|
896
|
+
def test_list_transcription_entities(fake_dummy_worker):
|
|
897
|
+
transcription = Transcription({"id": "fake_transcription_id"})
|
|
898
|
+
worker_run = "worker_run_id"
|
|
899
|
+
fake_dummy_worker.api_client.add_response(
|
|
900
|
+
"ListTranscriptionEntities",
|
|
901
|
+
id=transcription.id,
|
|
902
|
+
worker_run=worker_run,
|
|
903
|
+
response={"id": "entity_id"},
|
|
904
|
+
)
|
|
905
|
+
assert fake_dummy_worker.list_transcription_entities(
|
|
906
|
+
transcription, worker_run=worker_run
|
|
907
|
+
) == {"id": "entity_id"}
|
|
908
|
+
|
|
909
|
+
assert len(fake_dummy_worker.api_client.history) == 1
|
|
910
|
+
assert len(fake_dummy_worker.api_client.responses) == 0
|