arkindex-base-worker 0.4.0__py3-none-any.whl → 0.4.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arkindex_base_worker-0.4.0.dist-info → arkindex_base_worker-0.4.0a1.dist-info}/METADATA +13 -15
- arkindex_base_worker-0.4.0a1.dist-info/RECORD +51 -0
- {arkindex_base_worker-0.4.0.dist-info → arkindex_base_worker-0.4.0a1.dist-info}/WHEEL +1 -1
- arkindex_worker/cache.py +1 -1
- arkindex_worker/image.py +1 -120
- arkindex_worker/utils.py +0 -82
- arkindex_worker/worker/__init__.py +161 -46
- arkindex_worker/worker/base.py +11 -36
- arkindex_worker/worker/classification.py +18 -34
- arkindex_worker/worker/corpus.py +4 -21
- arkindex_worker/worker/dataset.py +1 -71
- arkindex_worker/worker/element.py +91 -352
- arkindex_worker/worker/entity.py +11 -11
- arkindex_worker/worker/metadata.py +9 -19
- arkindex_worker/worker/task.py +4 -5
- arkindex_worker/worker/training.py +18 -21
- arkindex_worker/worker/transcription.py +68 -89
- arkindex_worker/worker/version.py +1 -3
- tests/__init__.py +1 -1
- tests/conftest.py +45 -33
- tests/test_base_worker.py +3 -204
- tests/test_dataset_worker.py +4 -7
- tests/test_elements_worker/{test_classification.py → test_classifications.py} +61 -194
- tests/test_elements_worker/test_corpus.py +1 -32
- tests/test_elements_worker/test_dataset.py +1 -1
- tests/test_elements_worker/test_elements.py +2734 -0
- tests/test_elements_worker/{test_entity_create.py → test_entities.py} +160 -26
- tests/test_elements_worker/test_image.py +1 -2
- tests/test_elements_worker/test_metadata.py +99 -224
- tests/test_elements_worker/test_task.py +1 -1
- tests/test_elements_worker/test_training.py +43 -17
- tests/test_elements_worker/test_transcriptions.py +2102 -0
- tests/test_elements_worker/test_worker.py +280 -563
- tests/test_image.py +204 -429
- tests/test_merge.py +2 -1
- tests/test_utils.py +3 -66
- arkindex_base_worker-0.4.0.dist-info/RECORD +0 -61
- arkindex_worker/worker/process.py +0 -92
- tests/test_elements_worker/test_element.py +0 -427
- tests/test_elements_worker/test_element_create_multiple.py +0 -715
- tests/test_elements_worker/test_element_create_single.py +0 -528
- tests/test_elements_worker/test_element_list_children.py +0 -969
- tests/test_elements_worker/test_element_list_parents.py +0 -530
- tests/test_elements_worker/test_entity_list_and_check.py +0 -160
- tests/test_elements_worker/test_process.py +0 -89
- tests/test_elements_worker/test_transcription_create.py +0 -873
- tests/test_elements_worker/test_transcription_create_with_elements.py +0 -951
- tests/test_elements_worker/test_transcription_list.py +0 -450
- tests/test_elements_worker/test_version.py +0 -60
- {arkindex_base_worker-0.4.0.dist-info → arkindex_base_worker-0.4.0a1.dist-info}/LICENSE +0 -0
- {arkindex_base_worker-0.4.0.dist-info → arkindex_base_worker-0.4.0a1.dist-info}/top_level.txt +0 -0
|
@@ -1,873 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import re
|
|
3
|
-
from uuid import UUID
|
|
4
|
-
|
|
5
|
-
import pytest
|
|
6
|
-
from playhouse.shortcuts import model_to_dict
|
|
7
|
-
|
|
8
|
-
from arkindex.exceptions import ErrorResponse
|
|
9
|
-
from arkindex_worker.cache import CachedElement, CachedTranscription
|
|
10
|
-
from arkindex_worker.models import Element
|
|
11
|
-
from arkindex_worker.utils import DEFAULT_BATCH_SIZE
|
|
12
|
-
from arkindex_worker.worker.transcription import TextOrientation
|
|
13
|
-
|
|
14
|
-
from . import BASE_API_CALLS
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def test_create_transcription_wrong_element(mock_elements_worker):
|
|
18
|
-
with pytest.raises(
|
|
19
|
-
AssertionError,
|
|
20
|
-
match="element shouldn't be null and should be an Element or CachedElement",
|
|
21
|
-
):
|
|
22
|
-
mock_elements_worker.create_transcription(
|
|
23
|
-
element=None,
|
|
24
|
-
text="i am a line",
|
|
25
|
-
confidence=0.42,
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
with pytest.raises(
|
|
29
|
-
AssertionError,
|
|
30
|
-
match="element shouldn't be null and should be an Element or CachedElement",
|
|
31
|
-
):
|
|
32
|
-
mock_elements_worker.create_transcription(
|
|
33
|
-
element="not element type",
|
|
34
|
-
text="i am a line",
|
|
35
|
-
confidence=0.42,
|
|
36
|
-
)
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def test_create_transcription_wrong_text(mock_elements_worker):
|
|
40
|
-
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
41
|
-
|
|
42
|
-
with pytest.raises(
|
|
43
|
-
AssertionError, match="text shouldn't be null and should be of type str"
|
|
44
|
-
):
|
|
45
|
-
mock_elements_worker.create_transcription(
|
|
46
|
-
element=elt,
|
|
47
|
-
text=None,
|
|
48
|
-
confidence=0.42,
|
|
49
|
-
)
|
|
50
|
-
|
|
51
|
-
with pytest.raises(
|
|
52
|
-
AssertionError, match="text shouldn't be null and should be of type str"
|
|
53
|
-
):
|
|
54
|
-
mock_elements_worker.create_transcription(
|
|
55
|
-
element=elt,
|
|
56
|
-
text=1234,
|
|
57
|
-
confidence=0.42,
|
|
58
|
-
)
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
def test_create_transcription_wrong_confidence(mock_elements_worker):
|
|
62
|
-
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
63
|
-
|
|
64
|
-
with pytest.raises(
|
|
65
|
-
AssertionError,
|
|
66
|
-
match=re.escape(
|
|
67
|
-
"confidence shouldn't be null and should be a float in [0..1] range"
|
|
68
|
-
),
|
|
69
|
-
):
|
|
70
|
-
mock_elements_worker.create_transcription(
|
|
71
|
-
element=elt,
|
|
72
|
-
text="i am a line",
|
|
73
|
-
confidence=None,
|
|
74
|
-
)
|
|
75
|
-
|
|
76
|
-
with pytest.raises(
|
|
77
|
-
AssertionError,
|
|
78
|
-
match=re.escape(
|
|
79
|
-
"confidence shouldn't be null and should be a float in [0..1] range"
|
|
80
|
-
),
|
|
81
|
-
):
|
|
82
|
-
mock_elements_worker.create_transcription(
|
|
83
|
-
element=elt,
|
|
84
|
-
text="i am a line",
|
|
85
|
-
confidence="wrong confidence",
|
|
86
|
-
)
|
|
87
|
-
|
|
88
|
-
with pytest.raises(
|
|
89
|
-
AssertionError,
|
|
90
|
-
match=re.escape(
|
|
91
|
-
"confidence shouldn't be null and should be a float in [0..1] range"
|
|
92
|
-
),
|
|
93
|
-
):
|
|
94
|
-
mock_elements_worker.create_transcription(
|
|
95
|
-
element=elt,
|
|
96
|
-
text="i am a line",
|
|
97
|
-
confidence=0,
|
|
98
|
-
)
|
|
99
|
-
|
|
100
|
-
with pytest.raises(
|
|
101
|
-
AssertionError,
|
|
102
|
-
match=re.escape(
|
|
103
|
-
"confidence shouldn't be null and should be a float in [0..1] range"
|
|
104
|
-
),
|
|
105
|
-
):
|
|
106
|
-
mock_elements_worker.create_transcription(
|
|
107
|
-
element=elt,
|
|
108
|
-
text="i am a line",
|
|
109
|
-
confidence=2.00,
|
|
110
|
-
)
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
def test_create_transcription_default_orientation(responses, mock_elements_worker):
|
|
114
|
-
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
115
|
-
responses.add(
|
|
116
|
-
responses.POST,
|
|
117
|
-
f"http://testserver/api/v1/element/{elt.id}/transcription/",
|
|
118
|
-
status=200,
|
|
119
|
-
json={
|
|
120
|
-
"id": "56785678-5678-5678-5678-567856785678",
|
|
121
|
-
"text": "Animula vagula blandula",
|
|
122
|
-
"confidence": 0.42,
|
|
123
|
-
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
124
|
-
},
|
|
125
|
-
)
|
|
126
|
-
mock_elements_worker.create_transcription(
|
|
127
|
-
element=elt,
|
|
128
|
-
text="Animula vagula blandula",
|
|
129
|
-
confidence=0.42,
|
|
130
|
-
)
|
|
131
|
-
assert json.loads(responses.calls[-1].request.body) == {
|
|
132
|
-
"text": "Animula vagula blandula",
|
|
133
|
-
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
134
|
-
"confidence": 0.42,
|
|
135
|
-
"orientation": "horizontal-lr",
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
def test_create_transcription_orientation(responses, mock_elements_worker):
|
|
140
|
-
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
141
|
-
responses.add(
|
|
142
|
-
responses.POST,
|
|
143
|
-
f"http://testserver/api/v1/element/{elt.id}/transcription/",
|
|
144
|
-
status=200,
|
|
145
|
-
json={
|
|
146
|
-
"id": "56785678-5678-5678-5678-567856785678",
|
|
147
|
-
"text": "Animula vagula blandula",
|
|
148
|
-
"confidence": 0.42,
|
|
149
|
-
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
150
|
-
},
|
|
151
|
-
)
|
|
152
|
-
mock_elements_worker.create_transcription(
|
|
153
|
-
element=elt,
|
|
154
|
-
text="Animula vagula blandula",
|
|
155
|
-
orientation=TextOrientation.VerticalLeftToRight,
|
|
156
|
-
confidence=0.42,
|
|
157
|
-
)
|
|
158
|
-
assert json.loads(responses.calls[-1].request.body) == {
|
|
159
|
-
"text": "Animula vagula blandula",
|
|
160
|
-
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
161
|
-
"confidence": 0.42,
|
|
162
|
-
"orientation": "vertical-lr",
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
def test_create_transcription_wrong_orientation(mock_elements_worker):
|
|
167
|
-
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
168
|
-
with pytest.raises(
|
|
169
|
-
AssertionError,
|
|
170
|
-
match="orientation shouldn't be null and should be of type TextOrientation",
|
|
171
|
-
):
|
|
172
|
-
mock_elements_worker.create_transcription(
|
|
173
|
-
element=elt,
|
|
174
|
-
text="Animula vagula blandula",
|
|
175
|
-
confidence=0.26,
|
|
176
|
-
orientation="elliptical",
|
|
177
|
-
)
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
def test_create_transcription_api_error(responses, mock_elements_worker):
|
|
181
|
-
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
182
|
-
responses.add(
|
|
183
|
-
responses.POST,
|
|
184
|
-
f"http://testserver/api/v1/element/{elt.id}/transcription/",
|
|
185
|
-
status=418,
|
|
186
|
-
)
|
|
187
|
-
|
|
188
|
-
with pytest.raises(ErrorResponse):
|
|
189
|
-
mock_elements_worker.create_transcription(
|
|
190
|
-
element=elt,
|
|
191
|
-
text="i am a line",
|
|
192
|
-
confidence=0.42,
|
|
193
|
-
)
|
|
194
|
-
|
|
195
|
-
assert len(responses.calls) == len(BASE_API_CALLS) + 1
|
|
196
|
-
assert [
|
|
197
|
-
(call.request.method, call.request.url) for call in responses.calls
|
|
198
|
-
] == BASE_API_CALLS + [
|
|
199
|
-
("POST", f"http://testserver/api/v1/element/{elt.id}/transcription/")
|
|
200
|
-
]
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
def test_create_transcription(responses, mock_elements_worker):
|
|
204
|
-
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
205
|
-
responses.add(
|
|
206
|
-
responses.POST,
|
|
207
|
-
f"http://testserver/api/v1/element/{elt.id}/transcription/",
|
|
208
|
-
status=200,
|
|
209
|
-
json={
|
|
210
|
-
"id": "56785678-5678-5678-5678-567856785678",
|
|
211
|
-
"text": "i am a line",
|
|
212
|
-
"confidence": 0.42,
|
|
213
|
-
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
214
|
-
},
|
|
215
|
-
)
|
|
216
|
-
|
|
217
|
-
mock_elements_worker.create_transcription(
|
|
218
|
-
element=elt,
|
|
219
|
-
text="i am a line",
|
|
220
|
-
confidence=0.42,
|
|
221
|
-
)
|
|
222
|
-
|
|
223
|
-
assert len(responses.calls) == len(BASE_API_CALLS) + 1
|
|
224
|
-
assert [
|
|
225
|
-
(call.request.method, call.request.url) for call in responses.calls
|
|
226
|
-
] == BASE_API_CALLS + [
|
|
227
|
-
("POST", f"http://testserver/api/v1/element/{elt.id}/transcription/"),
|
|
228
|
-
]
|
|
229
|
-
|
|
230
|
-
assert json.loads(responses.calls[-1].request.body) == {
|
|
231
|
-
"text": "i am a line",
|
|
232
|
-
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
233
|
-
"confidence": 0.42,
|
|
234
|
-
"orientation": "horizontal-lr",
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
def test_create_transcription_with_cache(responses, mock_elements_worker_with_cache):
|
|
239
|
-
elt = CachedElement.create(id="12341234-1234-1234-1234-123412341234", type="thing")
|
|
240
|
-
|
|
241
|
-
responses.add(
|
|
242
|
-
responses.POST,
|
|
243
|
-
f"http://testserver/api/v1/element/{elt.id}/transcription/",
|
|
244
|
-
status=200,
|
|
245
|
-
json={
|
|
246
|
-
"id": "56785678-5678-5678-5678-567856785678",
|
|
247
|
-
"text": "i am a line",
|
|
248
|
-
"confidence": 0.42,
|
|
249
|
-
"orientation": "horizontal-lr",
|
|
250
|
-
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
251
|
-
},
|
|
252
|
-
)
|
|
253
|
-
|
|
254
|
-
mock_elements_worker_with_cache.create_transcription(
|
|
255
|
-
element=elt,
|
|
256
|
-
text="i am a line",
|
|
257
|
-
confidence=0.42,
|
|
258
|
-
)
|
|
259
|
-
|
|
260
|
-
assert len(responses.calls) == len(BASE_API_CALLS) + 1
|
|
261
|
-
assert [
|
|
262
|
-
(call.request.method, call.request.url) for call in responses.calls
|
|
263
|
-
] == BASE_API_CALLS + [
|
|
264
|
-
("POST", f"http://testserver/api/v1/element/{elt.id}/transcription/"),
|
|
265
|
-
]
|
|
266
|
-
|
|
267
|
-
assert json.loads(responses.calls[-1].request.body) == {
|
|
268
|
-
"text": "i am a line",
|
|
269
|
-
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
270
|
-
"orientation": "horizontal-lr",
|
|
271
|
-
"confidence": 0.42,
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
# Check that created transcription was properly stored in SQLite cache
|
|
275
|
-
assert list(CachedTranscription.select()) == [
|
|
276
|
-
CachedTranscription(
|
|
277
|
-
id=UUID("56785678-5678-5678-5678-567856785678"),
|
|
278
|
-
element_id=UUID(elt.id),
|
|
279
|
-
text="i am a line",
|
|
280
|
-
confidence=0.42,
|
|
281
|
-
orientation=TextOrientation.HorizontalLeftToRight,
|
|
282
|
-
worker_version_id=None,
|
|
283
|
-
worker_run_id=UUID("56785678-5678-5678-5678-567856785678"),
|
|
284
|
-
)
|
|
285
|
-
]
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
def test_create_transcription_orientation_with_cache(
|
|
289
|
-
responses, mock_elements_worker_with_cache
|
|
290
|
-
):
|
|
291
|
-
elt = CachedElement.create(id="12341234-1234-1234-1234-123412341234", type="thing")
|
|
292
|
-
responses.add(
|
|
293
|
-
responses.POST,
|
|
294
|
-
f"http://testserver/api/v1/element/{elt.id}/transcription/",
|
|
295
|
-
status=200,
|
|
296
|
-
json={
|
|
297
|
-
"id": "56785678-5678-5678-5678-567856785678",
|
|
298
|
-
"text": "Animula vagula blandula",
|
|
299
|
-
"confidence": 0.42,
|
|
300
|
-
"orientation": "vertical-lr",
|
|
301
|
-
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
302
|
-
},
|
|
303
|
-
)
|
|
304
|
-
mock_elements_worker_with_cache.create_transcription(
|
|
305
|
-
element=elt,
|
|
306
|
-
text="Animula vagula blandula",
|
|
307
|
-
orientation=TextOrientation.VerticalLeftToRight,
|
|
308
|
-
confidence=0.42,
|
|
309
|
-
)
|
|
310
|
-
assert json.loads(responses.calls[-1].request.body) == {
|
|
311
|
-
"text": "Animula vagula blandula",
|
|
312
|
-
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
313
|
-
"orientation": "vertical-lr",
|
|
314
|
-
"confidence": 0.42,
|
|
315
|
-
}
|
|
316
|
-
# Check that the text orientation was properly stored in SQLite cache
|
|
317
|
-
assert list(map(model_to_dict, CachedTranscription.select())) == [
|
|
318
|
-
{
|
|
319
|
-
"id": UUID("56785678-5678-5678-5678-567856785678"),
|
|
320
|
-
"element": {
|
|
321
|
-
"id": UUID("12341234-1234-1234-1234-123412341234"),
|
|
322
|
-
"parent_id": None,
|
|
323
|
-
"type": "thing",
|
|
324
|
-
"image": None,
|
|
325
|
-
"polygon": None,
|
|
326
|
-
"rotation_angle": 0,
|
|
327
|
-
"mirrored": False,
|
|
328
|
-
"initial": False,
|
|
329
|
-
"worker_version_id": None,
|
|
330
|
-
"worker_run_id": None,
|
|
331
|
-
"confidence": None,
|
|
332
|
-
},
|
|
333
|
-
"text": "Animula vagula blandula",
|
|
334
|
-
"confidence": 0.42,
|
|
335
|
-
"orientation": TextOrientation.VerticalLeftToRight.value,
|
|
336
|
-
"worker_version_id": None,
|
|
337
|
-
"worker_run_id": UUID("56785678-5678-5678-5678-567856785678"),
|
|
338
|
-
}
|
|
339
|
-
]
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
def test_create_transcriptions_wrong_transcriptions(mock_elements_worker):
|
|
343
|
-
with pytest.raises(
|
|
344
|
-
AssertionError,
|
|
345
|
-
match="transcriptions shouldn't be null and should be of type list",
|
|
346
|
-
):
|
|
347
|
-
mock_elements_worker.create_transcriptions(
|
|
348
|
-
transcriptions=None,
|
|
349
|
-
)
|
|
350
|
-
|
|
351
|
-
with pytest.raises(
|
|
352
|
-
AssertionError,
|
|
353
|
-
match="transcriptions shouldn't be null and should be of type list",
|
|
354
|
-
):
|
|
355
|
-
mock_elements_worker.create_transcriptions(
|
|
356
|
-
transcriptions=1234,
|
|
357
|
-
)
|
|
358
|
-
|
|
359
|
-
with pytest.raises(
|
|
360
|
-
AssertionError,
|
|
361
|
-
match="Transcription at index 1 in transcriptions: element_id shouldn't be null and should be of type str",
|
|
362
|
-
):
|
|
363
|
-
mock_elements_worker.create_transcriptions(
|
|
364
|
-
transcriptions=[
|
|
365
|
-
{
|
|
366
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
367
|
-
"text": "The",
|
|
368
|
-
"confidence": 0.75,
|
|
369
|
-
},
|
|
370
|
-
{
|
|
371
|
-
"text": "word",
|
|
372
|
-
"confidence": 0.5,
|
|
373
|
-
},
|
|
374
|
-
],
|
|
375
|
-
)
|
|
376
|
-
|
|
377
|
-
with pytest.raises(
|
|
378
|
-
AssertionError,
|
|
379
|
-
match="Transcription at index 1 in transcriptions: element_id shouldn't be null and should be of type str",
|
|
380
|
-
):
|
|
381
|
-
mock_elements_worker.create_transcriptions(
|
|
382
|
-
transcriptions=[
|
|
383
|
-
{
|
|
384
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
385
|
-
"text": "The",
|
|
386
|
-
"confidence": 0.75,
|
|
387
|
-
},
|
|
388
|
-
{
|
|
389
|
-
"element_id": None,
|
|
390
|
-
"text": "word",
|
|
391
|
-
"confidence": 0.5,
|
|
392
|
-
},
|
|
393
|
-
],
|
|
394
|
-
)
|
|
395
|
-
|
|
396
|
-
with pytest.raises(
|
|
397
|
-
AssertionError,
|
|
398
|
-
match="Transcription at index 1 in transcriptions: element_id shouldn't be null and should be of type str",
|
|
399
|
-
):
|
|
400
|
-
mock_elements_worker.create_transcriptions(
|
|
401
|
-
transcriptions=[
|
|
402
|
-
{
|
|
403
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
404
|
-
"text": "The",
|
|
405
|
-
"confidence": 0.75,
|
|
406
|
-
},
|
|
407
|
-
{
|
|
408
|
-
"element_id": 1234,
|
|
409
|
-
"text": "word",
|
|
410
|
-
"confidence": 0.5,
|
|
411
|
-
},
|
|
412
|
-
],
|
|
413
|
-
)
|
|
414
|
-
|
|
415
|
-
with pytest.raises(
|
|
416
|
-
AssertionError,
|
|
417
|
-
match="Transcription at index 1 in transcriptions: text shouldn't be null and should be of type str",
|
|
418
|
-
):
|
|
419
|
-
mock_elements_worker.create_transcriptions(
|
|
420
|
-
transcriptions=[
|
|
421
|
-
{
|
|
422
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
423
|
-
"text": "The",
|
|
424
|
-
"confidence": 0.75,
|
|
425
|
-
},
|
|
426
|
-
{
|
|
427
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
428
|
-
"confidence": 0.5,
|
|
429
|
-
},
|
|
430
|
-
],
|
|
431
|
-
)
|
|
432
|
-
|
|
433
|
-
with pytest.raises(
|
|
434
|
-
AssertionError,
|
|
435
|
-
match="Transcription at index 1 in transcriptions: text shouldn't be null and should be of type str",
|
|
436
|
-
):
|
|
437
|
-
mock_elements_worker.create_transcriptions(
|
|
438
|
-
transcriptions=[
|
|
439
|
-
{
|
|
440
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
441
|
-
"text": "The",
|
|
442
|
-
"confidence": 0.75,
|
|
443
|
-
},
|
|
444
|
-
{
|
|
445
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
446
|
-
"text": None,
|
|
447
|
-
"confidence": 0.5,
|
|
448
|
-
},
|
|
449
|
-
],
|
|
450
|
-
)
|
|
451
|
-
|
|
452
|
-
with pytest.raises(
|
|
453
|
-
AssertionError,
|
|
454
|
-
match="Transcription at index 1 in transcriptions: text shouldn't be null and should be of type str",
|
|
455
|
-
):
|
|
456
|
-
mock_elements_worker.create_transcriptions(
|
|
457
|
-
transcriptions=[
|
|
458
|
-
{
|
|
459
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
460
|
-
"text": "The",
|
|
461
|
-
"confidence": 0.75,
|
|
462
|
-
},
|
|
463
|
-
{
|
|
464
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
465
|
-
"text": 1234,
|
|
466
|
-
"confidence": 0.5,
|
|
467
|
-
},
|
|
468
|
-
],
|
|
469
|
-
)
|
|
470
|
-
|
|
471
|
-
with pytest.raises(
|
|
472
|
-
AssertionError,
|
|
473
|
-
match=re.escape(
|
|
474
|
-
"Transcription at index 1 in transcriptions: confidence shouldn't be null and should be a float in [0..1] range"
|
|
475
|
-
),
|
|
476
|
-
):
|
|
477
|
-
mock_elements_worker.create_transcriptions(
|
|
478
|
-
transcriptions=[
|
|
479
|
-
{
|
|
480
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
481
|
-
"text": "The",
|
|
482
|
-
"confidence": 0.75,
|
|
483
|
-
},
|
|
484
|
-
{
|
|
485
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
486
|
-
"text": "word",
|
|
487
|
-
},
|
|
488
|
-
],
|
|
489
|
-
)
|
|
490
|
-
|
|
491
|
-
with pytest.raises(
|
|
492
|
-
AssertionError,
|
|
493
|
-
match=re.escape(
|
|
494
|
-
"Transcription at index 1 in transcriptions: confidence shouldn't be null and should be a float in [0..1] range"
|
|
495
|
-
),
|
|
496
|
-
):
|
|
497
|
-
mock_elements_worker.create_transcriptions(
|
|
498
|
-
transcriptions=[
|
|
499
|
-
{
|
|
500
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
501
|
-
"text": "The",
|
|
502
|
-
"confidence": 0.75,
|
|
503
|
-
},
|
|
504
|
-
{
|
|
505
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
506
|
-
"text": "word",
|
|
507
|
-
"confidence": None,
|
|
508
|
-
},
|
|
509
|
-
],
|
|
510
|
-
)
|
|
511
|
-
|
|
512
|
-
with pytest.raises(
|
|
513
|
-
AssertionError,
|
|
514
|
-
match=re.escape(
|
|
515
|
-
"Transcription at index 1 in transcriptions: confidence shouldn't be null and should be a float in [0..1] range"
|
|
516
|
-
),
|
|
517
|
-
):
|
|
518
|
-
mock_elements_worker.create_transcriptions(
|
|
519
|
-
transcriptions=[
|
|
520
|
-
{
|
|
521
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
522
|
-
"text": "The",
|
|
523
|
-
"confidence": 0.75,
|
|
524
|
-
},
|
|
525
|
-
{
|
|
526
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
527
|
-
"text": "word",
|
|
528
|
-
"confidence": "a wrong confidence",
|
|
529
|
-
},
|
|
530
|
-
],
|
|
531
|
-
)
|
|
532
|
-
|
|
533
|
-
with pytest.raises(
|
|
534
|
-
AssertionError,
|
|
535
|
-
match=re.escape(
|
|
536
|
-
"Transcription at index 1 in transcriptions: confidence shouldn't be null and should be a float in [0..1] range"
|
|
537
|
-
),
|
|
538
|
-
):
|
|
539
|
-
mock_elements_worker.create_transcriptions(
|
|
540
|
-
transcriptions=[
|
|
541
|
-
{
|
|
542
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
543
|
-
"text": "The",
|
|
544
|
-
"confidence": 0.75,
|
|
545
|
-
},
|
|
546
|
-
{
|
|
547
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
548
|
-
"text": "word",
|
|
549
|
-
"confidence": 0,
|
|
550
|
-
},
|
|
551
|
-
],
|
|
552
|
-
)
|
|
553
|
-
|
|
554
|
-
with pytest.raises(
|
|
555
|
-
AssertionError,
|
|
556
|
-
match=re.escape(
|
|
557
|
-
"Transcription at index 1 in transcriptions: confidence shouldn't be null and should be a float in [0..1] range"
|
|
558
|
-
),
|
|
559
|
-
):
|
|
560
|
-
mock_elements_worker.create_transcriptions(
|
|
561
|
-
transcriptions=[
|
|
562
|
-
{
|
|
563
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
564
|
-
"text": "The",
|
|
565
|
-
"confidence": 0.75,
|
|
566
|
-
},
|
|
567
|
-
{
|
|
568
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
569
|
-
"text": "word",
|
|
570
|
-
"confidence": 2.00,
|
|
571
|
-
},
|
|
572
|
-
],
|
|
573
|
-
)
|
|
574
|
-
|
|
575
|
-
with pytest.raises(
|
|
576
|
-
AssertionError,
|
|
577
|
-
match="Transcription at index 1 in transcriptions: orientation shouldn't be null and should be of type TextOrientation",
|
|
578
|
-
):
|
|
579
|
-
mock_elements_worker.create_transcriptions(
|
|
580
|
-
transcriptions=[
|
|
581
|
-
{
|
|
582
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
583
|
-
"text": "The",
|
|
584
|
-
"confidence": 0.75,
|
|
585
|
-
},
|
|
586
|
-
{
|
|
587
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
588
|
-
"text": "word",
|
|
589
|
-
"confidence": 0.28,
|
|
590
|
-
"orientation": "wobble",
|
|
591
|
-
},
|
|
592
|
-
],
|
|
593
|
-
)
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
def test_create_transcriptions_api_error(responses, mock_elements_worker):
|
|
597
|
-
responses.add(
|
|
598
|
-
responses.POST,
|
|
599
|
-
"http://testserver/api/v1/transcription/bulk/",
|
|
600
|
-
status=418,
|
|
601
|
-
)
|
|
602
|
-
trans = [
|
|
603
|
-
{
|
|
604
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
605
|
-
"text": "The",
|
|
606
|
-
"confidence": 0.75,
|
|
607
|
-
},
|
|
608
|
-
{
|
|
609
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
610
|
-
"text": "word",
|
|
611
|
-
"confidence": 0.42,
|
|
612
|
-
},
|
|
613
|
-
]
|
|
614
|
-
|
|
615
|
-
with pytest.raises(ErrorResponse):
|
|
616
|
-
mock_elements_worker.create_transcriptions(transcriptions=trans)
|
|
617
|
-
|
|
618
|
-
assert len(responses.calls) == len(BASE_API_CALLS) + 1
|
|
619
|
-
assert [
|
|
620
|
-
(call.request.method, call.request.url) for call in responses.calls
|
|
621
|
-
] == BASE_API_CALLS + [("POST", "http://testserver/api/v1/transcription/bulk/")]
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
@pytest.mark.parametrize("batch_size", [DEFAULT_BATCH_SIZE, 1])
|
|
625
|
-
def test_create_transcriptions(batch_size, responses, mock_elements_worker_with_cache):
|
|
626
|
-
CachedElement.create(id="11111111-1111-1111-1111-111111111111", type="thing")
|
|
627
|
-
transcriptions = [
|
|
628
|
-
{
|
|
629
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
630
|
-
"text": "The",
|
|
631
|
-
"confidence": 0.75,
|
|
632
|
-
},
|
|
633
|
-
{
|
|
634
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
635
|
-
"text": "word",
|
|
636
|
-
"confidence": 0.42,
|
|
637
|
-
},
|
|
638
|
-
]
|
|
639
|
-
|
|
640
|
-
if batch_size > 1:
|
|
641
|
-
responses.add(
|
|
642
|
-
responses.POST,
|
|
643
|
-
"http://testserver/api/v1/transcription/bulk/",
|
|
644
|
-
status=200,
|
|
645
|
-
json={
|
|
646
|
-
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
647
|
-
"transcriptions": [
|
|
648
|
-
{
|
|
649
|
-
"id": "00000000-0000-0000-0000-000000000000",
|
|
650
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
651
|
-
"text": "The",
|
|
652
|
-
"orientation": "horizontal-lr",
|
|
653
|
-
"confidence": 0.75,
|
|
654
|
-
},
|
|
655
|
-
{
|
|
656
|
-
"id": "11111111-1111-1111-1111-111111111111",
|
|
657
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
658
|
-
"text": "word",
|
|
659
|
-
"orientation": "horizontal-lr",
|
|
660
|
-
"confidence": 0.42,
|
|
661
|
-
},
|
|
662
|
-
],
|
|
663
|
-
},
|
|
664
|
-
)
|
|
665
|
-
else:
|
|
666
|
-
for tr, tr_id in zip(
|
|
667
|
-
transcriptions,
|
|
668
|
-
[
|
|
669
|
-
"00000000-0000-0000-0000-000000000000",
|
|
670
|
-
"11111111-1111-1111-1111-111111111111",
|
|
671
|
-
],
|
|
672
|
-
strict=False,
|
|
673
|
-
):
|
|
674
|
-
responses.add(
|
|
675
|
-
responses.POST,
|
|
676
|
-
"http://testserver/api/v1/transcription/bulk/",
|
|
677
|
-
status=200,
|
|
678
|
-
json={
|
|
679
|
-
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
680
|
-
"transcriptions": [
|
|
681
|
-
{
|
|
682
|
-
"id": tr_id,
|
|
683
|
-
"element_id": tr["element_id"],
|
|
684
|
-
"text": tr["text"],
|
|
685
|
-
"orientation": "horizontal-lr",
|
|
686
|
-
"confidence": tr["confidence"],
|
|
687
|
-
}
|
|
688
|
-
],
|
|
689
|
-
},
|
|
690
|
-
)
|
|
691
|
-
|
|
692
|
-
mock_elements_worker_with_cache.create_transcriptions(
|
|
693
|
-
transcriptions=transcriptions,
|
|
694
|
-
batch_size=batch_size,
|
|
695
|
-
)
|
|
696
|
-
|
|
697
|
-
bulk_api_calls = [
|
|
698
|
-
(
|
|
699
|
-
"POST",
|
|
700
|
-
"http://testserver/api/v1/transcription/bulk/",
|
|
701
|
-
)
|
|
702
|
-
]
|
|
703
|
-
if batch_size != DEFAULT_BATCH_SIZE:
|
|
704
|
-
bulk_api_calls.append(
|
|
705
|
-
(
|
|
706
|
-
"POST",
|
|
707
|
-
"http://testserver/api/v1/transcription/bulk/",
|
|
708
|
-
)
|
|
709
|
-
)
|
|
710
|
-
|
|
711
|
-
assert len(responses.calls) == len(BASE_API_CALLS) + len(bulk_api_calls)
|
|
712
|
-
assert [
|
|
713
|
-
(call.request.method, call.request.url) for call in responses.calls
|
|
714
|
-
] == BASE_API_CALLS + bulk_api_calls
|
|
715
|
-
|
|
716
|
-
first_tr = {
|
|
717
|
-
**transcriptions[0],
|
|
718
|
-
"orientation": TextOrientation.HorizontalLeftToRight.value,
|
|
719
|
-
}
|
|
720
|
-
second_tr = {
|
|
721
|
-
**transcriptions[1],
|
|
722
|
-
"orientation": TextOrientation.HorizontalLeftToRight.value,
|
|
723
|
-
}
|
|
724
|
-
empty_payload = {
|
|
725
|
-
"transcriptions": [],
|
|
726
|
-
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
727
|
-
}
|
|
728
|
-
|
|
729
|
-
bodies = []
|
|
730
|
-
first_call_idx = None
|
|
731
|
-
if batch_size > 1:
|
|
732
|
-
first_call_idx = -1
|
|
733
|
-
bodies.append({**empty_payload, "transcriptions": [first_tr, second_tr]})
|
|
734
|
-
else:
|
|
735
|
-
first_call_idx = -2
|
|
736
|
-
bodies.append({**empty_payload, "transcriptions": [first_tr]})
|
|
737
|
-
bodies.append({**empty_payload, "transcriptions": [second_tr]})
|
|
738
|
-
|
|
739
|
-
assert [
|
|
740
|
-
json.loads(bulk_call.request.body)
|
|
741
|
-
for bulk_call in responses.calls[first_call_idx:]
|
|
742
|
-
] == bodies
|
|
743
|
-
|
|
744
|
-
# Check that created transcriptions were properly stored in SQLite cache
|
|
745
|
-
assert list(CachedTranscription.select()) == [
|
|
746
|
-
CachedTranscription(
|
|
747
|
-
id=UUID("00000000-0000-0000-0000-000000000000"),
|
|
748
|
-
element_id=UUID("11111111-1111-1111-1111-111111111111"),
|
|
749
|
-
text="The",
|
|
750
|
-
confidence=0.75,
|
|
751
|
-
orientation=TextOrientation.HorizontalLeftToRight,
|
|
752
|
-
worker_run_id=UUID("56785678-5678-5678-5678-567856785678"),
|
|
753
|
-
),
|
|
754
|
-
CachedTranscription(
|
|
755
|
-
id=UUID("11111111-1111-1111-1111-111111111111"),
|
|
756
|
-
element_id=UUID("11111111-1111-1111-1111-111111111111"),
|
|
757
|
-
text="word",
|
|
758
|
-
confidence=0.42,
|
|
759
|
-
orientation=TextOrientation.HorizontalLeftToRight,
|
|
760
|
-
worker_run_id=UUID("56785678-5678-5678-5678-567856785678"),
|
|
761
|
-
),
|
|
762
|
-
]
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
def test_create_transcriptions_orientation(responses, mock_elements_worker_with_cache):
|
|
766
|
-
CachedElement.create(id="11111111-1111-1111-1111-111111111111", type="thing")
|
|
767
|
-
trans = [
|
|
768
|
-
{
|
|
769
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
770
|
-
"text": "Animula vagula blandula",
|
|
771
|
-
"confidence": 0.12,
|
|
772
|
-
"orientation": TextOrientation.HorizontalRightToLeft,
|
|
773
|
-
},
|
|
774
|
-
{
|
|
775
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
776
|
-
"text": "Hospes comesque corporis",
|
|
777
|
-
"confidence": 0.21,
|
|
778
|
-
"orientation": TextOrientation.VerticalLeftToRight,
|
|
779
|
-
},
|
|
780
|
-
]
|
|
781
|
-
|
|
782
|
-
responses.add(
|
|
783
|
-
responses.POST,
|
|
784
|
-
"http://testserver/api/v1/transcription/bulk/",
|
|
785
|
-
status=200,
|
|
786
|
-
json={
|
|
787
|
-
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
788
|
-
"transcriptions": [
|
|
789
|
-
{
|
|
790
|
-
"id": "00000000-0000-0000-0000-000000000000",
|
|
791
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
792
|
-
"text": "Animula vagula blandula",
|
|
793
|
-
"orientation": "horizontal-rl",
|
|
794
|
-
"confidence": 0.12,
|
|
795
|
-
},
|
|
796
|
-
{
|
|
797
|
-
"id": "11111111-1111-1111-1111-111111111111",
|
|
798
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
799
|
-
"text": "Hospes comesque corporis",
|
|
800
|
-
"orientation": "vertical-lr",
|
|
801
|
-
"confidence": 0.21,
|
|
802
|
-
},
|
|
803
|
-
],
|
|
804
|
-
},
|
|
805
|
-
)
|
|
806
|
-
|
|
807
|
-
mock_elements_worker_with_cache.create_transcriptions(
|
|
808
|
-
transcriptions=trans,
|
|
809
|
-
)
|
|
810
|
-
|
|
811
|
-
assert json.loads(responses.calls[-1].request.body) == {
|
|
812
|
-
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
813
|
-
"transcriptions": [
|
|
814
|
-
{
|
|
815
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
816
|
-
"text": "Animula vagula blandula",
|
|
817
|
-
"confidence": 0.12,
|
|
818
|
-
"orientation": TextOrientation.HorizontalRightToLeft.value,
|
|
819
|
-
},
|
|
820
|
-
{
|
|
821
|
-
"element_id": "11111111-1111-1111-1111-111111111111",
|
|
822
|
-
"text": "Hospes comesque corporis",
|
|
823
|
-
"confidence": 0.21,
|
|
824
|
-
"orientation": TextOrientation.VerticalLeftToRight.value,
|
|
825
|
-
},
|
|
826
|
-
],
|
|
827
|
-
}
|
|
828
|
-
|
|
829
|
-
# Check that oriented transcriptions were properly stored in SQLite cache
|
|
830
|
-
assert list(map(model_to_dict, CachedTranscription.select())) == [
|
|
831
|
-
{
|
|
832
|
-
"id": UUID("00000000-0000-0000-0000-000000000000"),
|
|
833
|
-
"element": {
|
|
834
|
-
"id": UUID("11111111-1111-1111-1111-111111111111"),
|
|
835
|
-
"parent_id": None,
|
|
836
|
-
"type": "thing",
|
|
837
|
-
"image": None,
|
|
838
|
-
"polygon": None,
|
|
839
|
-
"rotation_angle": 0,
|
|
840
|
-
"mirrored": False,
|
|
841
|
-
"initial": False,
|
|
842
|
-
"worker_version_id": None,
|
|
843
|
-
"worker_run_id": None,
|
|
844
|
-
"confidence": None,
|
|
845
|
-
},
|
|
846
|
-
"text": "Animula vagula blandula",
|
|
847
|
-
"confidence": 0.12,
|
|
848
|
-
"orientation": TextOrientation.HorizontalRightToLeft.value,
|
|
849
|
-
"worker_version_id": None,
|
|
850
|
-
"worker_run_id": UUID("56785678-5678-5678-5678-567856785678"),
|
|
851
|
-
},
|
|
852
|
-
{
|
|
853
|
-
"id": UUID("11111111-1111-1111-1111-111111111111"),
|
|
854
|
-
"element": {
|
|
855
|
-
"id": UUID("11111111-1111-1111-1111-111111111111"),
|
|
856
|
-
"parent_id": None,
|
|
857
|
-
"type": "thing",
|
|
858
|
-
"image": None,
|
|
859
|
-
"polygon": None,
|
|
860
|
-
"rotation_angle": 0,
|
|
861
|
-
"mirrored": False,
|
|
862
|
-
"initial": False,
|
|
863
|
-
"worker_version_id": None,
|
|
864
|
-
"worker_run_id": None,
|
|
865
|
-
"confidence": None,
|
|
866
|
-
},
|
|
867
|
-
"text": "Hospes comesque corporis",
|
|
868
|
-
"confidence": 0.21,
|
|
869
|
-
"orientation": TextOrientation.VerticalLeftToRight.value,
|
|
870
|
-
"worker_version_id": None,
|
|
871
|
-
"worker_run_id": UUID("56785678-5678-5678-5678-567856785678"),
|
|
872
|
-
},
|
|
873
|
-
]
|