arkindex-base-worker 0.3.7rc4__py3-none-any.whl → 0.5.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {arkindex_base_worker-0.3.7rc4.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/METADATA +18 -19
  2. arkindex_base_worker-0.5.0a1.dist-info/RECORD +61 -0
  3. {arkindex_base_worker-0.3.7rc4.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/WHEEL +1 -1
  4. {arkindex_base_worker-0.3.7rc4.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/top_level.txt +2 -0
  5. arkindex_worker/cache.py +1 -1
  6. arkindex_worker/image.py +167 -2
  7. arkindex_worker/models.py +18 -0
  8. arkindex_worker/utils.py +98 -4
  9. arkindex_worker/worker/__init__.py +117 -218
  10. arkindex_worker/worker/base.py +39 -46
  11. arkindex_worker/worker/classification.py +45 -29
  12. arkindex_worker/worker/corpus.py +86 -0
  13. arkindex_worker/worker/dataset.py +89 -26
  14. arkindex_worker/worker/element.py +352 -91
  15. arkindex_worker/worker/entity.py +13 -11
  16. arkindex_worker/worker/image.py +21 -0
  17. arkindex_worker/worker/metadata.py +26 -16
  18. arkindex_worker/worker/process.py +92 -0
  19. arkindex_worker/worker/task.py +5 -4
  20. arkindex_worker/worker/training.py +25 -10
  21. arkindex_worker/worker/transcription.py +89 -68
  22. arkindex_worker/worker/version.py +3 -1
  23. hooks/pre_gen_project.py +3 -0
  24. tests/__init__.py +8 -0
  25. tests/conftest.py +47 -58
  26. tests/test_base_worker.py +212 -12
  27. tests/test_dataset_worker.py +294 -437
  28. tests/test_elements_worker/{test_classifications.py → test_classification.py} +313 -200
  29. tests/test_elements_worker/test_cli.py +3 -11
  30. tests/test_elements_worker/test_corpus.py +168 -0
  31. tests/test_elements_worker/test_dataset.py +106 -157
  32. tests/test_elements_worker/test_element.py +427 -0
  33. tests/test_elements_worker/test_element_create_multiple.py +715 -0
  34. tests/test_elements_worker/test_element_create_single.py +528 -0
  35. tests/test_elements_worker/test_element_list_children.py +969 -0
  36. tests/test_elements_worker/test_element_list_parents.py +530 -0
  37. tests/test_elements_worker/{test_entities.py → test_entity_create.py} +37 -195
  38. tests/test_elements_worker/test_entity_list_and_check.py +160 -0
  39. tests/test_elements_worker/test_image.py +66 -0
  40. tests/test_elements_worker/test_metadata.py +252 -161
  41. tests/test_elements_worker/test_process.py +89 -0
  42. tests/test_elements_worker/test_task.py +8 -18
  43. tests/test_elements_worker/test_training.py +17 -8
  44. tests/test_elements_worker/test_transcription_create.py +873 -0
  45. tests/test_elements_worker/test_transcription_create_with_elements.py +951 -0
  46. tests/test_elements_worker/test_transcription_list.py +450 -0
  47. tests/test_elements_worker/test_version.py +60 -0
  48. tests/test_elements_worker/test_worker.py +578 -293
  49. tests/test_image.py +542 -209
  50. tests/test_merge.py +1 -2
  51. tests/test_utils.py +89 -4
  52. worker-demo/tests/__init__.py +0 -0
  53. worker-demo/tests/conftest.py +32 -0
  54. worker-demo/tests/test_worker.py +12 -0
  55. worker-demo/worker_demo/__init__.py +6 -0
  56. worker-demo/worker_demo/worker.py +19 -0
  57. arkindex_base_worker-0.3.7rc4.dist-info/RECORD +0 -41
  58. tests/test_elements_worker/test_elements.py +0 -2713
  59. tests/test_elements_worker/test_transcriptions.py +0 -2119
  60. {arkindex_base_worker-0.3.7rc4.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/LICENSE +0 -0
@@ -0,0 +1,528 @@
1
+ import json
2
+ import re
3
+
4
+ import pytest
5
+
6
+ from arkindex.exceptions import ErrorResponse
7
+ from arkindex_worker.models import Element
8
+ from arkindex_worker.utils import DEFAULT_BATCH_SIZE
9
+ from tests import CORPUS_ID
10
+
11
+ from . import BASE_API_CALLS
12
+
13
+
14
+ def test_create_sub_element_wrong_element(mock_elements_worker):
15
+ with pytest.raises(
16
+ AssertionError, match="element shouldn't be null and should be of type Element"
17
+ ):
18
+ mock_elements_worker.create_sub_element(
19
+ element=None,
20
+ type="something",
21
+ name="0",
22
+ polygon=[[1, 1], [2, 2], [2, 1], [1, 2]],
23
+ )
24
+
25
+ with pytest.raises(
26
+ AssertionError, match="element shouldn't be null and should be of type Element"
27
+ ):
28
+ mock_elements_worker.create_sub_element(
29
+ element="not element type",
30
+ type="something",
31
+ name="0",
32
+ polygon=[[1, 1], [2, 2], [2, 1], [1, 2]],
33
+ )
34
+
35
+
36
+ def test_create_sub_element_wrong_type(mock_elements_worker):
37
+ elt = Element({"zone": None})
38
+
39
+ with pytest.raises(
40
+ AssertionError, match="type shouldn't be null and should be of type str"
41
+ ):
42
+ mock_elements_worker.create_sub_element(
43
+ element=elt,
44
+ type=None,
45
+ name="0",
46
+ polygon=[[1, 1], [2, 2], [2, 1], [1, 2]],
47
+ )
48
+
49
+ with pytest.raises(
50
+ AssertionError, match="type shouldn't be null and should be of type str"
51
+ ):
52
+ mock_elements_worker.create_sub_element(
53
+ element=elt,
54
+ type=1234,
55
+ name="0",
56
+ polygon=[[1, 1], [2, 2], [2, 1], [1, 2]],
57
+ )
58
+
59
+
60
+ def test_create_sub_element_wrong_name(mock_elements_worker):
61
+ elt = Element({"zone": None})
62
+
63
+ with pytest.raises(
64
+ AssertionError, match="name shouldn't be null and should be of type str"
65
+ ):
66
+ mock_elements_worker.create_sub_element(
67
+ element=elt,
68
+ type="something",
69
+ name=None,
70
+ polygon=[[1, 1], [2, 2], [2, 1], [1, 2]],
71
+ )
72
+
73
+ with pytest.raises(
74
+ AssertionError, match="name shouldn't be null and should be of type str"
75
+ ):
76
+ mock_elements_worker.create_sub_element(
77
+ element=elt,
78
+ type="something",
79
+ name=1234,
80
+ polygon=[[1, 1], [2, 2], [2, 1], [1, 2]],
81
+ )
82
+
83
+
84
+ def test_create_sub_element_wrong_polygon(mock_elements_worker):
85
+ elt = Element({"zone": None})
86
+
87
+ with pytest.raises(AssertionError, match="polygon should be None or a list"):
88
+ mock_elements_worker.create_sub_element(
89
+ element=elt,
90
+ type="something",
91
+ name="O",
92
+ polygon="not a polygon",
93
+ )
94
+
95
+ with pytest.raises(
96
+ AssertionError, match="polygon should have at least three points"
97
+ ):
98
+ mock_elements_worker.create_sub_element(
99
+ element=elt,
100
+ type="something",
101
+ name="O",
102
+ polygon=[[1, 1], [2, 2]],
103
+ )
104
+
105
+ with pytest.raises(
106
+ AssertionError, match="polygon points should be lists of two items"
107
+ ):
108
+ mock_elements_worker.create_sub_element(
109
+ element=elt,
110
+ type="something",
111
+ name="O",
112
+ polygon=[[1, 1, 1], [2, 2, 1], [2, 1, 1], [1, 2, 1]],
113
+ )
114
+
115
+ with pytest.raises(
116
+ AssertionError, match="polygon points should be lists of two items"
117
+ ):
118
+ mock_elements_worker.create_sub_element(
119
+ element=elt,
120
+ type="something",
121
+ name="O",
122
+ polygon=[[1], [2], [2], [1]],
123
+ )
124
+
125
+ with pytest.raises(
126
+ AssertionError, match="polygon points should be lists of two numbers"
127
+ ):
128
+ mock_elements_worker.create_sub_element(
129
+ element=elt,
130
+ type="something",
131
+ name="O",
132
+ polygon=[["not a coord", 1], [2, 2], [2, 1], [1, 2]],
133
+ )
134
+
135
+
136
+ @pytest.mark.parametrize("confidence", ["lol", "0.2", -1.0, 1.42, float("inf")])
137
+ def test_create_sub_element_wrong_confidence(mock_elements_worker, confidence):
138
+ with pytest.raises(
139
+ AssertionError,
140
+ match=re.escape("confidence should be None or a float in [0..1] range"),
141
+ ):
142
+ mock_elements_worker.create_sub_element(
143
+ element=Element({"zone": None}),
144
+ type="something",
145
+ name="blah",
146
+ polygon=[[0, 0], [0, 10], [10, 10], [10, 0], [0, 0]],
147
+ confidence=confidence,
148
+ )
149
+
150
+
151
+ @pytest.mark.parametrize(
152
+ ("image", "error_type", "error_message"),
153
+ [
154
+ (1, AssertionError, "image should be None or string"),
155
+ ("not a uuid", ValueError, "image is not a valid uuid."),
156
+ ],
157
+ )
158
+ def test_create_sub_element_wrong_image(
159
+ mock_elements_worker, image, error_type, error_message
160
+ ):
161
+ with pytest.raises(error_type, match=re.escape(error_message)):
162
+ mock_elements_worker.create_sub_element(
163
+ element=Element({"zone": None}),
164
+ type="something",
165
+ name="blah",
166
+ polygon=[[0, 0], [0, 10], [10, 10], [10, 0], [0, 0]],
167
+ image=image,
168
+ )
169
+
170
+
171
+ def test_create_sub_element_wrong_image_and_polygon(mock_elements_worker):
172
+ with pytest.raises(
173
+ AssertionError,
174
+ match=re.escape(
175
+ "An image or a parent with an image is required to create an element with a polygon."
176
+ ),
177
+ ):
178
+ mock_elements_worker.create_sub_element(
179
+ element=Element({"zone": None}),
180
+ type="something",
181
+ name="blah",
182
+ polygon=[[0, 0], [0, 10], [10, 10], [10, 0], [0, 0]],
183
+ image=None,
184
+ )
185
+
186
+
187
+ def test_create_sub_element_api_error(responses, mock_elements_worker):
188
+ elt = Element(
189
+ {
190
+ "id": "12341234-1234-1234-1234-123412341234",
191
+ "corpus": {"id": CORPUS_ID},
192
+ "zone": {"image": {"id": "22222222-2222-2222-2222-222222222222"}},
193
+ }
194
+ )
195
+ responses.add(
196
+ responses.POST,
197
+ "http://testserver/api/v1/elements/create/",
198
+ status=418,
199
+ )
200
+
201
+ with pytest.raises(ErrorResponse):
202
+ mock_elements_worker.create_sub_element(
203
+ element=elt,
204
+ type="something",
205
+ name="0",
206
+ polygon=[[1, 1], [2, 2], [2, 1], [1, 2]],
207
+ )
208
+
209
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
210
+ assert [
211
+ (call.request.method, call.request.url) for call in responses.calls
212
+ ] == BASE_API_CALLS + [("POST", "http://testserver/api/v1/elements/create/")]
213
+
214
+
215
+ @pytest.mark.parametrize("slim_output", [True, False])
216
+ def test_create_sub_element(responses, mock_elements_worker, slim_output):
217
+ elt = Element(
218
+ {
219
+ "id": "12341234-1234-1234-1234-123412341234",
220
+ "corpus": {"id": CORPUS_ID},
221
+ "zone": {"image": {"id": "22222222-2222-2222-2222-222222222222"}},
222
+ }
223
+ )
224
+ child_elt = {
225
+ "id": "12345678-1234-1234-1234-123456789123",
226
+ "corpus": {"id": CORPUS_ID},
227
+ "zone": {"image": {"id": "22222222-2222-2222-2222-222222222222"}},
228
+ }
229
+ responses.add(
230
+ responses.POST,
231
+ "http://testserver/api/v1/elements/create/",
232
+ status=200,
233
+ json=child_elt,
234
+ )
235
+
236
+ element_creation_response = mock_elements_worker.create_sub_element(
237
+ element=elt,
238
+ type="something",
239
+ name="0",
240
+ polygon=[[1, 1], [2, 2], [2, 1], [1, 2]],
241
+ slim_output=slim_output,
242
+ )
243
+
244
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
245
+ assert [
246
+ (call.request.method, call.request.url) for call in responses.calls
247
+ ] == BASE_API_CALLS + [
248
+ (
249
+ "POST",
250
+ "http://testserver/api/v1/elements/create/",
251
+ ),
252
+ ]
253
+ assert json.loads(responses.calls[-1].request.body) == {
254
+ "type": "something",
255
+ "name": "0",
256
+ "image": None,
257
+ "corpus": CORPUS_ID,
258
+ "polygon": [[1, 1], [2, 2], [2, 1], [1, 2]],
259
+ "parent": "12341234-1234-1234-1234-123412341234",
260
+ "worker_run_id": "56785678-5678-5678-5678-567856785678",
261
+ "confidence": None,
262
+ }
263
+ if slim_output:
264
+ assert element_creation_response == "12345678-1234-1234-1234-123456789123"
265
+ else:
266
+ assert Element(element_creation_response) == Element(child_elt)
267
+
268
+
269
+ def test_create_sub_element_confidence(responses, mock_elements_worker):
270
+ elt = Element(
271
+ {
272
+ "id": "12341234-1234-1234-1234-123412341234",
273
+ "corpus": {"id": CORPUS_ID},
274
+ "zone": {"image": {"id": "22222222-2222-2222-2222-222222222222"}},
275
+ }
276
+ )
277
+ responses.add(
278
+ responses.POST,
279
+ "http://testserver/api/v1/elements/create/",
280
+ status=200,
281
+ json={"id": "12345678-1234-1234-1234-123456789123"},
282
+ )
283
+
284
+ sub_element_id = mock_elements_worker.create_sub_element(
285
+ element=elt,
286
+ type="something",
287
+ name="0",
288
+ polygon=[[1, 1], [2, 2], [2, 1], [1, 2]],
289
+ confidence=0.42,
290
+ )
291
+
292
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
293
+ assert [
294
+ (call.request.method, call.request.url) for call in responses.calls
295
+ ] == BASE_API_CALLS + [
296
+ ("POST", "http://testserver/api/v1/elements/create/"),
297
+ ]
298
+ assert json.loads(responses.calls[-1].request.body) == {
299
+ "type": "something",
300
+ "name": "0",
301
+ "image": None,
302
+ "corpus": CORPUS_ID,
303
+ "polygon": [[1, 1], [2, 2], [2, 1], [1, 2]],
304
+ "parent": "12341234-1234-1234-1234-123412341234",
305
+ "worker_run_id": "56785678-5678-5678-5678-567856785678",
306
+ "confidence": 0.42,
307
+ }
308
+ assert sub_element_id == "12345678-1234-1234-1234-123456789123"
309
+
310
+
311
+ @pytest.mark.parametrize(
312
+ ("params", "error_message"),
313
+ [
314
+ (
315
+ {"parent": None, "child": None},
316
+ "parent shouldn't be null and should be of type Element",
317
+ ),
318
+ (
319
+ {"parent": "not an element", "child": None},
320
+ "parent shouldn't be null and should be of type Element",
321
+ ),
322
+ (
323
+ {"parent": Element(zone=None), "child": None},
324
+ "child shouldn't be null and should be of type Element",
325
+ ),
326
+ (
327
+ {"parent": Element(zone=None), "child": "not an element"},
328
+ "child shouldn't be null and should be of type Element",
329
+ ),
330
+ ],
331
+ )
332
+ def test_create_element_parent_invalid_params(
333
+ mock_elements_worker, params, error_message
334
+ ):
335
+ with pytest.raises(AssertionError, match=re.escape(error_message)):
336
+ mock_elements_worker.create_element_parent(**params)
337
+
338
+
339
+ def test_create_element_parent_api_error(responses, mock_elements_worker):
340
+ parent = Element({"id": "12341234-1234-1234-1234-123412341234"})
341
+ child = Element({"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08"})
342
+ responses.add(
343
+ responses.POST,
344
+ "http://testserver/api/v1/element/497f6eca-6276-4993-bfeb-53cbbbba6f08/parent/12341234-1234-1234-1234-123412341234/",
345
+ status=418,
346
+ )
347
+
348
+ with pytest.raises(ErrorResponse):
349
+ mock_elements_worker.create_element_parent(
350
+ parent=parent,
351
+ child=child,
352
+ )
353
+
354
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
355
+ assert [
356
+ (call.request.method, call.request.url) for call in responses.calls
357
+ ] == BASE_API_CALLS + [
358
+ (
359
+ "POST",
360
+ "http://testserver/api/v1/element/497f6eca-6276-4993-bfeb-53cbbbba6f08/parent/12341234-1234-1234-1234-123412341234/",
361
+ )
362
+ ]
363
+
364
+
365
+ def test_create_element_parent(responses, mock_elements_worker):
366
+ parent = Element({"id": "12341234-1234-1234-1234-123412341234"})
367
+ child = Element({"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08"})
368
+ responses.add(
369
+ responses.POST,
370
+ "http://testserver/api/v1/element/497f6eca-6276-4993-bfeb-53cbbbba6f08/parent/12341234-1234-1234-1234-123412341234/",
371
+ status=200,
372
+ json={
373
+ "parent": "12341234-1234-1234-1234-123412341234",
374
+ "child": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
375
+ },
376
+ )
377
+
378
+ created_element_parent = mock_elements_worker.create_element_parent(
379
+ parent=parent,
380
+ child=child,
381
+ )
382
+
383
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
384
+ assert [
385
+ (call.request.method, call.request.url) for call in responses.calls
386
+ ] == BASE_API_CALLS + [
387
+ (
388
+ "POST",
389
+ "http://testserver/api/v1/element/497f6eca-6276-4993-bfeb-53cbbbba6f08/parent/12341234-1234-1234-1234-123412341234/",
390
+ ),
391
+ ]
392
+ assert created_element_parent == {
393
+ "parent": "12341234-1234-1234-1234-123412341234",
394
+ "child": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
395
+ }
396
+
397
+
398
+ @pytest.mark.parametrize(
399
+ ("arg_name", "data", "error_message"),
400
+ [
401
+ (
402
+ "parent",
403
+ None,
404
+ "parent shouldn't be null and should be of type Element",
405
+ ),
406
+ (
407
+ "parent",
408
+ "not element type",
409
+ "parent shouldn't be null and should be of type Element",
410
+ ),
411
+ (
412
+ "children",
413
+ None,
414
+ "children shouldn't be null and should be of type list",
415
+ ),
416
+ (
417
+ "children",
418
+ "not a list",
419
+ "children shouldn't be null and should be of type list",
420
+ ),
421
+ (
422
+ "children",
423
+ [
424
+ Element({"id": "11111111-1111-1111-1111-111111111111"}),
425
+ "not element type",
426
+ ],
427
+ "Child at index 1 in children: Should be of type Element",
428
+ ),
429
+ ],
430
+ )
431
+ def test_create_element_children_wrong_params(
432
+ arg_name, data, error_message, mock_elements_worker
433
+ ):
434
+ with pytest.raises(AssertionError, match=error_message):
435
+ mock_elements_worker.create_element_children(
436
+ **{
437
+ "parent": Element({"id": "12341234-1234-1234-1234-123412341234"}),
438
+ "children": [
439
+ Element({"id": "11111111-1111-1111-1111-111111111111"}),
440
+ Element({"id": "22222222-2222-2222-2222-222222222222"}),
441
+ ],
442
+ # Overwrite with wrong data
443
+ arg_name: data,
444
+ },
445
+ )
446
+
447
+
448
+ def test_create_element_children_api_error(responses, mock_elements_worker):
449
+ parent = Element({"id": "12341234-1234-1234-1234-123412341234"})
450
+ responses.add(
451
+ responses.POST,
452
+ f"http://testserver/api/v1/element/parent/{parent.id}/",
453
+ status=418,
454
+ )
455
+
456
+ with pytest.raises(ErrorResponse):
457
+ mock_elements_worker.create_element_children(
458
+ parent=parent,
459
+ children=[
460
+ Element({"id": "11111111-1111-1111-1111-111111111111"}),
461
+ Element({"id": "22222222-2222-2222-2222-222222222222"}),
462
+ ],
463
+ )
464
+
465
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
466
+ assert [
467
+ (call.request.method, call.request.url) for call in responses.calls
468
+ ] == BASE_API_CALLS + [
469
+ (
470
+ "POST",
471
+ f"http://testserver/api/v1/element/parent/{parent.id}/",
472
+ )
473
+ ]
474
+
475
+
476
+ @pytest.mark.parametrize("batch_size", [DEFAULT_BATCH_SIZE, 1])
477
+ def test_create_element_children(batch_size, responses, mock_elements_worker):
478
+ parent = Element({"id": "12341234-1234-1234-1234-123412341234"})
479
+
480
+ first_child = Element({"id": "11111111-1111-1111-1111-111111111111"})
481
+ second_child = Element({"id": "22222222-2222-2222-2222-222222222222"})
482
+
483
+ responses.add(
484
+ responses.POST,
485
+ f"http://testserver/api/v1/element/parent/{parent.id}/",
486
+ status=200,
487
+ json={"children": []},
488
+ )
489
+
490
+ mock_elements_worker.create_element_children(
491
+ parent=parent,
492
+ children=[first_child, second_child],
493
+ batch_size=batch_size,
494
+ )
495
+
496
+ bulk_api_calls = [
497
+ (
498
+ "POST",
499
+ f"http://testserver/api/v1/element/parent/{parent.id}/",
500
+ )
501
+ ]
502
+ if batch_size != DEFAULT_BATCH_SIZE:
503
+ bulk_api_calls.append(
504
+ (
505
+ "POST",
506
+ f"http://testserver/api/v1/element/parent/{parent.id}/",
507
+ )
508
+ )
509
+
510
+ assert len(responses.calls) == len(BASE_API_CALLS) + len(bulk_api_calls)
511
+ assert [
512
+ (call.request.method, call.request.url) for call in responses.calls
513
+ ] == BASE_API_CALLS + bulk_api_calls
514
+
515
+ bodies = []
516
+ first_call_idx = None
517
+ if batch_size > 1:
518
+ first_call_idx = -1
519
+ bodies.append({"children": [first_child.id, second_child.id]})
520
+ else:
521
+ first_call_idx = -2
522
+ bodies.append({"children": [first_child.id]})
523
+ bodies.append({"children": [second_child.id]})
524
+
525
+ assert [
526
+ json.loads(bulk_call.request.body)
527
+ for bulk_call in responses.calls[first_call_idx:]
528
+ ] == bodies