arkindex-base-worker 0.3.7rc5__py3-none-any.whl → 0.5.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {arkindex_base_worker-0.3.7rc5.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/METADATA +18 -19
  2. arkindex_base_worker-0.5.0a1.dist-info/RECORD +61 -0
  3. {arkindex_base_worker-0.3.7rc5.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/WHEEL +1 -1
  4. {arkindex_base_worker-0.3.7rc5.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/top_level.txt +2 -0
  5. arkindex_worker/cache.py +1 -1
  6. arkindex_worker/image.py +167 -2
  7. arkindex_worker/models.py +18 -0
  8. arkindex_worker/utils.py +98 -4
  9. arkindex_worker/worker/__init__.py +117 -218
  10. arkindex_worker/worker/base.py +39 -46
  11. arkindex_worker/worker/classification.py +34 -18
  12. arkindex_worker/worker/corpus.py +86 -0
  13. arkindex_worker/worker/dataset.py +89 -26
  14. arkindex_worker/worker/element.py +352 -91
  15. arkindex_worker/worker/entity.py +13 -11
  16. arkindex_worker/worker/image.py +21 -0
  17. arkindex_worker/worker/metadata.py +26 -16
  18. arkindex_worker/worker/process.py +92 -0
  19. arkindex_worker/worker/task.py +5 -4
  20. arkindex_worker/worker/training.py +25 -10
  21. arkindex_worker/worker/transcription.py +89 -68
  22. arkindex_worker/worker/version.py +3 -1
  23. hooks/pre_gen_project.py +3 -0
  24. tests/__init__.py +8 -0
  25. tests/conftest.py +47 -58
  26. tests/test_base_worker.py +212 -12
  27. tests/test_dataset_worker.py +294 -437
  28. tests/test_elements_worker/{test_classifications.py → test_classification.py} +216 -100
  29. tests/test_elements_worker/test_cli.py +3 -11
  30. tests/test_elements_worker/test_corpus.py +168 -0
  31. tests/test_elements_worker/test_dataset.py +106 -157
  32. tests/test_elements_worker/test_element.py +427 -0
  33. tests/test_elements_worker/test_element_create_multiple.py +715 -0
  34. tests/test_elements_worker/test_element_create_single.py +528 -0
  35. tests/test_elements_worker/test_element_list_children.py +969 -0
  36. tests/test_elements_worker/test_element_list_parents.py +530 -0
  37. tests/test_elements_worker/{test_entities.py → test_entity_create.py} +37 -195
  38. tests/test_elements_worker/test_entity_list_and_check.py +160 -0
  39. tests/test_elements_worker/test_image.py +66 -0
  40. tests/test_elements_worker/test_metadata.py +252 -161
  41. tests/test_elements_worker/test_process.py +89 -0
  42. tests/test_elements_worker/test_task.py +8 -18
  43. tests/test_elements_worker/test_training.py +17 -8
  44. tests/test_elements_worker/test_transcription_create.py +873 -0
  45. tests/test_elements_worker/test_transcription_create_with_elements.py +951 -0
  46. tests/test_elements_worker/test_transcription_list.py +450 -0
  47. tests/test_elements_worker/test_version.py +60 -0
  48. tests/test_elements_worker/test_worker.py +578 -293
  49. tests/test_image.py +542 -209
  50. tests/test_merge.py +1 -2
  51. tests/test_utils.py +89 -4
  52. worker-demo/tests/__init__.py +0 -0
  53. worker-demo/tests/conftest.py +32 -0
  54. worker-demo/tests/test_worker.py +12 -0
  55. worker-demo/worker_demo/__init__.py +6 -0
  56. worker-demo/worker_demo/worker.py +19 -0
  57. arkindex_base_worker-0.3.7rc5.dist-info/RECORD +0 -41
  58. tests/test_elements_worker/test_elements.py +0 -2713
  59. tests/test_elements_worker/test_transcriptions.py +0 -2119
  60. {arkindex_base_worker-0.3.7rc5.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/LICENSE +0 -0
@@ -0,0 +1,873 @@
1
+ import json
2
+ import re
3
+ from uuid import UUID
4
+
5
+ import pytest
6
+ from playhouse.shortcuts import model_to_dict
7
+
8
+ from arkindex.exceptions import ErrorResponse
9
+ from arkindex_worker.cache import CachedElement, CachedTranscription
10
+ from arkindex_worker.models import Element
11
+ from arkindex_worker.utils import DEFAULT_BATCH_SIZE
12
+ from arkindex_worker.worker.transcription import TextOrientation
13
+
14
+ from . import BASE_API_CALLS
15
+
16
+
17
+ def test_create_transcription_wrong_element(mock_elements_worker):
18
+ with pytest.raises(
19
+ AssertionError,
20
+ match="element shouldn't be null and should be an Element or CachedElement",
21
+ ):
22
+ mock_elements_worker.create_transcription(
23
+ element=None,
24
+ text="i am a line",
25
+ confidence=0.42,
26
+ )
27
+
28
+ with pytest.raises(
29
+ AssertionError,
30
+ match="element shouldn't be null and should be an Element or CachedElement",
31
+ ):
32
+ mock_elements_worker.create_transcription(
33
+ element="not element type",
34
+ text="i am a line",
35
+ confidence=0.42,
36
+ )
37
+
38
+
39
+ def test_create_transcription_wrong_text(mock_elements_worker):
40
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
41
+
42
+ with pytest.raises(
43
+ AssertionError, match="text shouldn't be null and should be of type str"
44
+ ):
45
+ mock_elements_worker.create_transcription(
46
+ element=elt,
47
+ text=None,
48
+ confidence=0.42,
49
+ )
50
+
51
+ with pytest.raises(
52
+ AssertionError, match="text shouldn't be null and should be of type str"
53
+ ):
54
+ mock_elements_worker.create_transcription(
55
+ element=elt,
56
+ text=1234,
57
+ confidence=0.42,
58
+ )
59
+
60
+
61
+ def test_create_transcription_wrong_confidence(mock_elements_worker):
62
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
63
+
64
+ with pytest.raises(
65
+ AssertionError,
66
+ match=re.escape(
67
+ "confidence shouldn't be null and should be a float in [0..1] range"
68
+ ),
69
+ ):
70
+ mock_elements_worker.create_transcription(
71
+ element=elt,
72
+ text="i am a line",
73
+ confidence=None,
74
+ )
75
+
76
+ with pytest.raises(
77
+ AssertionError,
78
+ match=re.escape(
79
+ "confidence shouldn't be null and should be a float in [0..1] range"
80
+ ),
81
+ ):
82
+ mock_elements_worker.create_transcription(
83
+ element=elt,
84
+ text="i am a line",
85
+ confidence="wrong confidence",
86
+ )
87
+
88
+ with pytest.raises(
89
+ AssertionError,
90
+ match=re.escape(
91
+ "confidence shouldn't be null and should be a float in [0..1] range"
92
+ ),
93
+ ):
94
+ mock_elements_worker.create_transcription(
95
+ element=elt,
96
+ text="i am a line",
97
+ confidence=0,
98
+ )
99
+
100
+ with pytest.raises(
101
+ AssertionError,
102
+ match=re.escape(
103
+ "confidence shouldn't be null and should be a float in [0..1] range"
104
+ ),
105
+ ):
106
+ mock_elements_worker.create_transcription(
107
+ element=elt,
108
+ text="i am a line",
109
+ confidence=2.00,
110
+ )
111
+
112
+
113
+ def test_create_transcription_default_orientation(responses, mock_elements_worker):
114
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
115
+ responses.add(
116
+ responses.POST,
117
+ f"http://testserver/api/v1/element/{elt.id}/transcription/",
118
+ status=200,
119
+ json={
120
+ "id": "56785678-5678-5678-5678-567856785678",
121
+ "text": "Animula vagula blandula",
122
+ "confidence": 0.42,
123
+ "worker_run_id": "56785678-5678-5678-5678-567856785678",
124
+ },
125
+ )
126
+ mock_elements_worker.create_transcription(
127
+ element=elt,
128
+ text="Animula vagula blandula",
129
+ confidence=0.42,
130
+ )
131
+ assert json.loads(responses.calls[-1].request.body) == {
132
+ "text": "Animula vagula blandula",
133
+ "worker_run_id": "56785678-5678-5678-5678-567856785678",
134
+ "confidence": 0.42,
135
+ "orientation": "horizontal-lr",
136
+ }
137
+
138
+
139
+ def test_create_transcription_orientation(responses, mock_elements_worker):
140
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
141
+ responses.add(
142
+ responses.POST,
143
+ f"http://testserver/api/v1/element/{elt.id}/transcription/",
144
+ status=200,
145
+ json={
146
+ "id": "56785678-5678-5678-5678-567856785678",
147
+ "text": "Animula vagula blandula",
148
+ "confidence": 0.42,
149
+ "worker_run_id": "56785678-5678-5678-5678-567856785678",
150
+ },
151
+ )
152
+ mock_elements_worker.create_transcription(
153
+ element=elt,
154
+ text="Animula vagula blandula",
155
+ orientation=TextOrientation.VerticalLeftToRight,
156
+ confidence=0.42,
157
+ )
158
+ assert json.loads(responses.calls[-1].request.body) == {
159
+ "text": "Animula vagula blandula",
160
+ "worker_run_id": "56785678-5678-5678-5678-567856785678",
161
+ "confidence": 0.42,
162
+ "orientation": "vertical-lr",
163
+ }
164
+
165
+
166
+ def test_create_transcription_wrong_orientation(mock_elements_worker):
167
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
168
+ with pytest.raises(
169
+ AssertionError,
170
+ match="orientation shouldn't be null and should be of type TextOrientation",
171
+ ):
172
+ mock_elements_worker.create_transcription(
173
+ element=elt,
174
+ text="Animula vagula blandula",
175
+ confidence=0.26,
176
+ orientation="elliptical",
177
+ )
178
+
179
+
180
+ def test_create_transcription_api_error(responses, mock_elements_worker):
181
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
182
+ responses.add(
183
+ responses.POST,
184
+ f"http://testserver/api/v1/element/{elt.id}/transcription/",
185
+ status=418,
186
+ )
187
+
188
+ with pytest.raises(ErrorResponse):
189
+ mock_elements_worker.create_transcription(
190
+ element=elt,
191
+ text="i am a line",
192
+ confidence=0.42,
193
+ )
194
+
195
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
196
+ assert [
197
+ (call.request.method, call.request.url) for call in responses.calls
198
+ ] == BASE_API_CALLS + [
199
+ ("POST", f"http://testserver/api/v1/element/{elt.id}/transcription/")
200
+ ]
201
+
202
+
203
+ def test_create_transcription(responses, mock_elements_worker):
204
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
205
+ responses.add(
206
+ responses.POST,
207
+ f"http://testserver/api/v1/element/{elt.id}/transcription/",
208
+ status=200,
209
+ json={
210
+ "id": "56785678-5678-5678-5678-567856785678",
211
+ "text": "i am a line",
212
+ "confidence": 0.42,
213
+ "worker_run_id": "56785678-5678-5678-5678-567856785678",
214
+ },
215
+ )
216
+
217
+ mock_elements_worker.create_transcription(
218
+ element=elt,
219
+ text="i am a line",
220
+ confidence=0.42,
221
+ )
222
+
223
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
224
+ assert [
225
+ (call.request.method, call.request.url) for call in responses.calls
226
+ ] == BASE_API_CALLS + [
227
+ ("POST", f"http://testserver/api/v1/element/{elt.id}/transcription/"),
228
+ ]
229
+
230
+ assert json.loads(responses.calls[-1].request.body) == {
231
+ "text": "i am a line",
232
+ "worker_run_id": "56785678-5678-5678-5678-567856785678",
233
+ "confidence": 0.42,
234
+ "orientation": "horizontal-lr",
235
+ }
236
+
237
+
238
+ def test_create_transcription_with_cache(responses, mock_elements_worker_with_cache):
239
+ elt = CachedElement.create(id="12341234-1234-1234-1234-123412341234", type="thing")
240
+
241
+ responses.add(
242
+ responses.POST,
243
+ f"http://testserver/api/v1/element/{elt.id}/transcription/",
244
+ status=200,
245
+ json={
246
+ "id": "56785678-5678-5678-5678-567856785678",
247
+ "text": "i am a line",
248
+ "confidence": 0.42,
249
+ "orientation": "horizontal-lr",
250
+ "worker_run_id": "56785678-5678-5678-5678-567856785678",
251
+ },
252
+ )
253
+
254
+ mock_elements_worker_with_cache.create_transcription(
255
+ element=elt,
256
+ text="i am a line",
257
+ confidence=0.42,
258
+ )
259
+
260
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
261
+ assert [
262
+ (call.request.method, call.request.url) for call in responses.calls
263
+ ] == BASE_API_CALLS + [
264
+ ("POST", f"http://testserver/api/v1/element/{elt.id}/transcription/"),
265
+ ]
266
+
267
+ assert json.loads(responses.calls[-1].request.body) == {
268
+ "text": "i am a line",
269
+ "worker_run_id": "56785678-5678-5678-5678-567856785678",
270
+ "orientation": "horizontal-lr",
271
+ "confidence": 0.42,
272
+ }
273
+
274
+ # Check that created transcription was properly stored in SQLite cache
275
+ assert list(CachedTranscription.select()) == [
276
+ CachedTranscription(
277
+ id=UUID("56785678-5678-5678-5678-567856785678"),
278
+ element_id=UUID(elt.id),
279
+ text="i am a line",
280
+ confidence=0.42,
281
+ orientation=TextOrientation.HorizontalLeftToRight,
282
+ worker_version_id=None,
283
+ worker_run_id=UUID("56785678-5678-5678-5678-567856785678"),
284
+ )
285
+ ]
286
+
287
+
288
+ def test_create_transcription_orientation_with_cache(
289
+ responses, mock_elements_worker_with_cache
290
+ ):
291
+ elt = CachedElement.create(id="12341234-1234-1234-1234-123412341234", type="thing")
292
+ responses.add(
293
+ responses.POST,
294
+ f"http://testserver/api/v1/element/{elt.id}/transcription/",
295
+ status=200,
296
+ json={
297
+ "id": "56785678-5678-5678-5678-567856785678",
298
+ "text": "Animula vagula blandula",
299
+ "confidence": 0.42,
300
+ "orientation": "vertical-lr",
301
+ "worker_run_id": "56785678-5678-5678-5678-567856785678",
302
+ },
303
+ )
304
+ mock_elements_worker_with_cache.create_transcription(
305
+ element=elt,
306
+ text="Animula vagula blandula",
307
+ orientation=TextOrientation.VerticalLeftToRight,
308
+ confidence=0.42,
309
+ )
310
+ assert json.loads(responses.calls[-1].request.body) == {
311
+ "text": "Animula vagula blandula",
312
+ "worker_run_id": "56785678-5678-5678-5678-567856785678",
313
+ "orientation": "vertical-lr",
314
+ "confidence": 0.42,
315
+ }
316
+ # Check that the text orientation was properly stored in SQLite cache
317
+ assert list(map(model_to_dict, CachedTranscription.select())) == [
318
+ {
319
+ "id": UUID("56785678-5678-5678-5678-567856785678"),
320
+ "element": {
321
+ "id": UUID("12341234-1234-1234-1234-123412341234"),
322
+ "parent_id": None,
323
+ "type": "thing",
324
+ "image": None,
325
+ "polygon": None,
326
+ "rotation_angle": 0,
327
+ "mirrored": False,
328
+ "initial": False,
329
+ "worker_version_id": None,
330
+ "worker_run_id": None,
331
+ "confidence": None,
332
+ },
333
+ "text": "Animula vagula blandula",
334
+ "confidence": 0.42,
335
+ "orientation": TextOrientation.VerticalLeftToRight.value,
336
+ "worker_version_id": None,
337
+ "worker_run_id": UUID("56785678-5678-5678-5678-567856785678"),
338
+ }
339
+ ]
340
+
341
+
342
+ def test_create_transcriptions_wrong_transcriptions(mock_elements_worker):
343
+ with pytest.raises(
344
+ AssertionError,
345
+ match="transcriptions shouldn't be null and should be of type list",
346
+ ):
347
+ mock_elements_worker.create_transcriptions(
348
+ transcriptions=None,
349
+ )
350
+
351
+ with pytest.raises(
352
+ AssertionError,
353
+ match="transcriptions shouldn't be null and should be of type list",
354
+ ):
355
+ mock_elements_worker.create_transcriptions(
356
+ transcriptions=1234,
357
+ )
358
+
359
+ with pytest.raises(
360
+ AssertionError,
361
+ match="Transcription at index 1 in transcriptions: element_id shouldn't be null and should be of type str",
362
+ ):
363
+ mock_elements_worker.create_transcriptions(
364
+ transcriptions=[
365
+ {
366
+ "element_id": "11111111-1111-1111-1111-111111111111",
367
+ "text": "The",
368
+ "confidence": 0.75,
369
+ },
370
+ {
371
+ "text": "word",
372
+ "confidence": 0.5,
373
+ },
374
+ ],
375
+ )
376
+
377
+ with pytest.raises(
378
+ AssertionError,
379
+ match="Transcription at index 1 in transcriptions: element_id shouldn't be null and should be of type str",
380
+ ):
381
+ mock_elements_worker.create_transcriptions(
382
+ transcriptions=[
383
+ {
384
+ "element_id": "11111111-1111-1111-1111-111111111111",
385
+ "text": "The",
386
+ "confidence": 0.75,
387
+ },
388
+ {
389
+ "element_id": None,
390
+ "text": "word",
391
+ "confidence": 0.5,
392
+ },
393
+ ],
394
+ )
395
+
396
+ with pytest.raises(
397
+ AssertionError,
398
+ match="Transcription at index 1 in transcriptions: element_id shouldn't be null and should be of type str",
399
+ ):
400
+ mock_elements_worker.create_transcriptions(
401
+ transcriptions=[
402
+ {
403
+ "element_id": "11111111-1111-1111-1111-111111111111",
404
+ "text": "The",
405
+ "confidence": 0.75,
406
+ },
407
+ {
408
+ "element_id": 1234,
409
+ "text": "word",
410
+ "confidence": 0.5,
411
+ },
412
+ ],
413
+ )
414
+
415
+ with pytest.raises(
416
+ AssertionError,
417
+ match="Transcription at index 1 in transcriptions: text shouldn't be null and should be of type str",
418
+ ):
419
+ mock_elements_worker.create_transcriptions(
420
+ transcriptions=[
421
+ {
422
+ "element_id": "11111111-1111-1111-1111-111111111111",
423
+ "text": "The",
424
+ "confidence": 0.75,
425
+ },
426
+ {
427
+ "element_id": "11111111-1111-1111-1111-111111111111",
428
+ "confidence": 0.5,
429
+ },
430
+ ],
431
+ )
432
+
433
+ with pytest.raises(
434
+ AssertionError,
435
+ match="Transcription at index 1 in transcriptions: text shouldn't be null and should be of type str",
436
+ ):
437
+ mock_elements_worker.create_transcriptions(
438
+ transcriptions=[
439
+ {
440
+ "element_id": "11111111-1111-1111-1111-111111111111",
441
+ "text": "The",
442
+ "confidence": 0.75,
443
+ },
444
+ {
445
+ "element_id": "11111111-1111-1111-1111-111111111111",
446
+ "text": None,
447
+ "confidence": 0.5,
448
+ },
449
+ ],
450
+ )
451
+
452
+ with pytest.raises(
453
+ AssertionError,
454
+ match="Transcription at index 1 in transcriptions: text shouldn't be null and should be of type str",
455
+ ):
456
+ mock_elements_worker.create_transcriptions(
457
+ transcriptions=[
458
+ {
459
+ "element_id": "11111111-1111-1111-1111-111111111111",
460
+ "text": "The",
461
+ "confidence": 0.75,
462
+ },
463
+ {
464
+ "element_id": "11111111-1111-1111-1111-111111111111",
465
+ "text": 1234,
466
+ "confidence": 0.5,
467
+ },
468
+ ],
469
+ )
470
+
471
+ with pytest.raises(
472
+ AssertionError,
473
+ match=re.escape(
474
+ "Transcription at index 1 in transcriptions: confidence shouldn't be null and should be a float in [0..1] range"
475
+ ),
476
+ ):
477
+ mock_elements_worker.create_transcriptions(
478
+ transcriptions=[
479
+ {
480
+ "element_id": "11111111-1111-1111-1111-111111111111",
481
+ "text": "The",
482
+ "confidence": 0.75,
483
+ },
484
+ {
485
+ "element_id": "11111111-1111-1111-1111-111111111111",
486
+ "text": "word",
487
+ },
488
+ ],
489
+ )
490
+
491
+ with pytest.raises(
492
+ AssertionError,
493
+ match=re.escape(
494
+ "Transcription at index 1 in transcriptions: confidence shouldn't be null and should be a float in [0..1] range"
495
+ ),
496
+ ):
497
+ mock_elements_worker.create_transcriptions(
498
+ transcriptions=[
499
+ {
500
+ "element_id": "11111111-1111-1111-1111-111111111111",
501
+ "text": "The",
502
+ "confidence": 0.75,
503
+ },
504
+ {
505
+ "element_id": "11111111-1111-1111-1111-111111111111",
506
+ "text": "word",
507
+ "confidence": None,
508
+ },
509
+ ],
510
+ )
511
+
512
+ with pytest.raises(
513
+ AssertionError,
514
+ match=re.escape(
515
+ "Transcription at index 1 in transcriptions: confidence shouldn't be null and should be a float in [0..1] range"
516
+ ),
517
+ ):
518
+ mock_elements_worker.create_transcriptions(
519
+ transcriptions=[
520
+ {
521
+ "element_id": "11111111-1111-1111-1111-111111111111",
522
+ "text": "The",
523
+ "confidence": 0.75,
524
+ },
525
+ {
526
+ "element_id": "11111111-1111-1111-1111-111111111111",
527
+ "text": "word",
528
+ "confidence": "a wrong confidence",
529
+ },
530
+ ],
531
+ )
532
+
533
+ with pytest.raises(
534
+ AssertionError,
535
+ match=re.escape(
536
+ "Transcription at index 1 in transcriptions: confidence shouldn't be null and should be a float in [0..1] range"
537
+ ),
538
+ ):
539
+ mock_elements_worker.create_transcriptions(
540
+ transcriptions=[
541
+ {
542
+ "element_id": "11111111-1111-1111-1111-111111111111",
543
+ "text": "The",
544
+ "confidence": 0.75,
545
+ },
546
+ {
547
+ "element_id": "11111111-1111-1111-1111-111111111111",
548
+ "text": "word",
549
+ "confidence": 0,
550
+ },
551
+ ],
552
+ )
553
+
554
+ with pytest.raises(
555
+ AssertionError,
556
+ match=re.escape(
557
+ "Transcription at index 1 in transcriptions: confidence shouldn't be null and should be a float in [0..1] range"
558
+ ),
559
+ ):
560
+ mock_elements_worker.create_transcriptions(
561
+ transcriptions=[
562
+ {
563
+ "element_id": "11111111-1111-1111-1111-111111111111",
564
+ "text": "The",
565
+ "confidence": 0.75,
566
+ },
567
+ {
568
+ "element_id": "11111111-1111-1111-1111-111111111111",
569
+ "text": "word",
570
+ "confidence": 2.00,
571
+ },
572
+ ],
573
+ )
574
+
575
+ with pytest.raises(
576
+ AssertionError,
577
+ match="Transcription at index 1 in transcriptions: orientation shouldn't be null and should be of type TextOrientation",
578
+ ):
579
+ mock_elements_worker.create_transcriptions(
580
+ transcriptions=[
581
+ {
582
+ "element_id": "11111111-1111-1111-1111-111111111111",
583
+ "text": "The",
584
+ "confidence": 0.75,
585
+ },
586
+ {
587
+ "element_id": "11111111-1111-1111-1111-111111111111",
588
+ "text": "word",
589
+ "confidence": 0.28,
590
+ "orientation": "wobble",
591
+ },
592
+ ],
593
+ )
594
+
595
+
596
+ def test_create_transcriptions_api_error(responses, mock_elements_worker):
597
+ responses.add(
598
+ responses.POST,
599
+ "http://testserver/api/v1/transcription/bulk/",
600
+ status=418,
601
+ )
602
+ trans = [
603
+ {
604
+ "element_id": "11111111-1111-1111-1111-111111111111",
605
+ "text": "The",
606
+ "confidence": 0.75,
607
+ },
608
+ {
609
+ "element_id": "11111111-1111-1111-1111-111111111111",
610
+ "text": "word",
611
+ "confidence": 0.42,
612
+ },
613
+ ]
614
+
615
+ with pytest.raises(ErrorResponse):
616
+ mock_elements_worker.create_transcriptions(transcriptions=trans)
617
+
618
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
619
+ assert [
620
+ (call.request.method, call.request.url) for call in responses.calls
621
+ ] == BASE_API_CALLS + [("POST", "http://testserver/api/v1/transcription/bulk/")]
622
+
623
+
624
+ @pytest.mark.parametrize("batch_size", [DEFAULT_BATCH_SIZE, 1])
625
+ def test_create_transcriptions(batch_size, responses, mock_elements_worker_with_cache):
626
+ CachedElement.create(id="11111111-1111-1111-1111-111111111111", type="thing")
627
+ transcriptions = [
628
+ {
629
+ "element_id": "11111111-1111-1111-1111-111111111111",
630
+ "text": "The",
631
+ "confidence": 0.75,
632
+ },
633
+ {
634
+ "element_id": "11111111-1111-1111-1111-111111111111",
635
+ "text": "word",
636
+ "confidence": 0.42,
637
+ },
638
+ ]
639
+
640
+ if batch_size > 1:
641
+ responses.add(
642
+ responses.POST,
643
+ "http://testserver/api/v1/transcription/bulk/",
644
+ status=200,
645
+ json={
646
+ "worker_run_id": "56785678-5678-5678-5678-567856785678",
647
+ "transcriptions": [
648
+ {
649
+ "id": "00000000-0000-0000-0000-000000000000",
650
+ "element_id": "11111111-1111-1111-1111-111111111111",
651
+ "text": "The",
652
+ "orientation": "horizontal-lr",
653
+ "confidence": 0.75,
654
+ },
655
+ {
656
+ "id": "11111111-1111-1111-1111-111111111111",
657
+ "element_id": "11111111-1111-1111-1111-111111111111",
658
+ "text": "word",
659
+ "orientation": "horizontal-lr",
660
+ "confidence": 0.42,
661
+ },
662
+ ],
663
+ },
664
+ )
665
+ else:
666
+ for tr, tr_id in zip(
667
+ transcriptions,
668
+ [
669
+ "00000000-0000-0000-0000-000000000000",
670
+ "11111111-1111-1111-1111-111111111111",
671
+ ],
672
+ strict=False,
673
+ ):
674
+ responses.add(
675
+ responses.POST,
676
+ "http://testserver/api/v1/transcription/bulk/",
677
+ status=200,
678
+ json={
679
+ "worker_run_id": "56785678-5678-5678-5678-567856785678",
680
+ "transcriptions": [
681
+ {
682
+ "id": tr_id,
683
+ "element_id": tr["element_id"],
684
+ "text": tr["text"],
685
+ "orientation": "horizontal-lr",
686
+ "confidence": tr["confidence"],
687
+ }
688
+ ],
689
+ },
690
+ )
691
+
692
+ mock_elements_worker_with_cache.create_transcriptions(
693
+ transcriptions=transcriptions,
694
+ batch_size=batch_size,
695
+ )
696
+
697
+ bulk_api_calls = [
698
+ (
699
+ "POST",
700
+ "http://testserver/api/v1/transcription/bulk/",
701
+ )
702
+ ]
703
+ if batch_size != DEFAULT_BATCH_SIZE:
704
+ bulk_api_calls.append(
705
+ (
706
+ "POST",
707
+ "http://testserver/api/v1/transcription/bulk/",
708
+ )
709
+ )
710
+
711
+ assert len(responses.calls) == len(BASE_API_CALLS) + len(bulk_api_calls)
712
+ assert [
713
+ (call.request.method, call.request.url) for call in responses.calls
714
+ ] == BASE_API_CALLS + bulk_api_calls
715
+
716
+ first_tr = {
717
+ **transcriptions[0],
718
+ "orientation": TextOrientation.HorizontalLeftToRight.value,
719
+ }
720
+ second_tr = {
721
+ **transcriptions[1],
722
+ "orientation": TextOrientation.HorizontalLeftToRight.value,
723
+ }
724
+ empty_payload = {
725
+ "transcriptions": [],
726
+ "worker_run_id": "56785678-5678-5678-5678-567856785678",
727
+ }
728
+
729
+ bodies = []
730
+ first_call_idx = None
731
+ if batch_size > 1:
732
+ first_call_idx = -1
733
+ bodies.append({**empty_payload, "transcriptions": [first_tr, second_tr]})
734
+ else:
735
+ first_call_idx = -2
736
+ bodies.append({**empty_payload, "transcriptions": [first_tr]})
737
+ bodies.append({**empty_payload, "transcriptions": [second_tr]})
738
+
739
+ assert [
740
+ json.loads(bulk_call.request.body)
741
+ for bulk_call in responses.calls[first_call_idx:]
742
+ ] == bodies
743
+
744
+ # Check that created transcriptions were properly stored in SQLite cache
745
+ assert list(CachedTranscription.select()) == [
746
+ CachedTranscription(
747
+ id=UUID("00000000-0000-0000-0000-000000000000"),
748
+ element_id=UUID("11111111-1111-1111-1111-111111111111"),
749
+ text="The",
750
+ confidence=0.75,
751
+ orientation=TextOrientation.HorizontalLeftToRight,
752
+ worker_run_id=UUID("56785678-5678-5678-5678-567856785678"),
753
+ ),
754
+ CachedTranscription(
755
+ id=UUID("11111111-1111-1111-1111-111111111111"),
756
+ element_id=UUID("11111111-1111-1111-1111-111111111111"),
757
+ text="word",
758
+ confidence=0.42,
759
+ orientation=TextOrientation.HorizontalLeftToRight,
760
+ worker_run_id=UUID("56785678-5678-5678-5678-567856785678"),
761
+ ),
762
+ ]
763
+
764
+
765
+ def test_create_transcriptions_orientation(responses, mock_elements_worker_with_cache):
766
+ CachedElement.create(id="11111111-1111-1111-1111-111111111111", type="thing")
767
+ trans = [
768
+ {
769
+ "element_id": "11111111-1111-1111-1111-111111111111",
770
+ "text": "Animula vagula blandula",
771
+ "confidence": 0.12,
772
+ "orientation": TextOrientation.HorizontalRightToLeft,
773
+ },
774
+ {
775
+ "element_id": "11111111-1111-1111-1111-111111111111",
776
+ "text": "Hospes comesque corporis",
777
+ "confidence": 0.21,
778
+ "orientation": TextOrientation.VerticalLeftToRight,
779
+ },
780
+ ]
781
+
782
+ responses.add(
783
+ responses.POST,
784
+ "http://testserver/api/v1/transcription/bulk/",
785
+ status=200,
786
+ json={
787
+ "worker_run_id": "56785678-5678-5678-5678-567856785678",
788
+ "transcriptions": [
789
+ {
790
+ "id": "00000000-0000-0000-0000-000000000000",
791
+ "element_id": "11111111-1111-1111-1111-111111111111",
792
+ "text": "Animula vagula blandula",
793
+ "orientation": "horizontal-rl",
794
+ "confidence": 0.12,
795
+ },
796
+ {
797
+ "id": "11111111-1111-1111-1111-111111111111",
798
+ "element_id": "11111111-1111-1111-1111-111111111111",
799
+ "text": "Hospes comesque corporis",
800
+ "orientation": "vertical-lr",
801
+ "confidence": 0.21,
802
+ },
803
+ ],
804
+ },
805
+ )
806
+
807
+ mock_elements_worker_with_cache.create_transcriptions(
808
+ transcriptions=trans,
809
+ )
810
+
811
+ assert json.loads(responses.calls[-1].request.body) == {
812
+ "worker_run_id": "56785678-5678-5678-5678-567856785678",
813
+ "transcriptions": [
814
+ {
815
+ "element_id": "11111111-1111-1111-1111-111111111111",
816
+ "text": "Animula vagula blandula",
817
+ "confidence": 0.12,
818
+ "orientation": TextOrientation.HorizontalRightToLeft.value,
819
+ },
820
+ {
821
+ "element_id": "11111111-1111-1111-1111-111111111111",
822
+ "text": "Hospes comesque corporis",
823
+ "confidence": 0.21,
824
+ "orientation": TextOrientation.VerticalLeftToRight.value,
825
+ },
826
+ ],
827
+ }
828
+
829
+ # Check that oriented transcriptions were properly stored in SQLite cache
830
+ assert list(map(model_to_dict, CachedTranscription.select())) == [
831
+ {
832
+ "id": UUID("00000000-0000-0000-0000-000000000000"),
833
+ "element": {
834
+ "id": UUID("11111111-1111-1111-1111-111111111111"),
835
+ "parent_id": None,
836
+ "type": "thing",
837
+ "image": None,
838
+ "polygon": None,
839
+ "rotation_angle": 0,
840
+ "mirrored": False,
841
+ "initial": False,
842
+ "worker_version_id": None,
843
+ "worker_run_id": None,
844
+ "confidence": None,
845
+ },
846
+ "text": "Animula vagula blandula",
847
+ "confidence": 0.12,
848
+ "orientation": TextOrientation.HorizontalRightToLeft.value,
849
+ "worker_version_id": None,
850
+ "worker_run_id": UUID("56785678-5678-5678-5678-567856785678"),
851
+ },
852
+ {
853
+ "id": UUID("11111111-1111-1111-1111-111111111111"),
854
+ "element": {
855
+ "id": UUID("11111111-1111-1111-1111-111111111111"),
856
+ "parent_id": None,
857
+ "type": "thing",
858
+ "image": None,
859
+ "polygon": None,
860
+ "rotation_angle": 0,
861
+ "mirrored": False,
862
+ "initial": False,
863
+ "worker_version_id": None,
864
+ "worker_run_id": None,
865
+ "confidence": None,
866
+ },
867
+ "text": "Hospes comesque corporis",
868
+ "confidence": 0.21,
869
+ "orientation": TextOrientation.VerticalLeftToRight.value,
870
+ "worker_version_id": None,
871
+ "worker_run_id": UUID("56785678-5678-5678-5678-567856785678"),
872
+ },
873
+ ]