arkindex-base-worker 0.3.7rc5__py3-none-any.whl → 0.5.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {arkindex_base_worker-0.3.7rc5.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/METADATA +18 -19
  2. arkindex_base_worker-0.5.0a1.dist-info/RECORD +61 -0
  3. {arkindex_base_worker-0.3.7rc5.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/WHEEL +1 -1
  4. {arkindex_base_worker-0.3.7rc5.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/top_level.txt +2 -0
  5. arkindex_worker/cache.py +1 -1
  6. arkindex_worker/image.py +167 -2
  7. arkindex_worker/models.py +18 -0
  8. arkindex_worker/utils.py +98 -4
  9. arkindex_worker/worker/__init__.py +117 -218
  10. arkindex_worker/worker/base.py +39 -46
  11. arkindex_worker/worker/classification.py +34 -18
  12. arkindex_worker/worker/corpus.py +86 -0
  13. arkindex_worker/worker/dataset.py +89 -26
  14. arkindex_worker/worker/element.py +352 -91
  15. arkindex_worker/worker/entity.py +13 -11
  16. arkindex_worker/worker/image.py +21 -0
  17. arkindex_worker/worker/metadata.py +26 -16
  18. arkindex_worker/worker/process.py +92 -0
  19. arkindex_worker/worker/task.py +5 -4
  20. arkindex_worker/worker/training.py +25 -10
  21. arkindex_worker/worker/transcription.py +89 -68
  22. arkindex_worker/worker/version.py +3 -1
  23. hooks/pre_gen_project.py +3 -0
  24. tests/__init__.py +8 -0
  25. tests/conftest.py +47 -58
  26. tests/test_base_worker.py +212 -12
  27. tests/test_dataset_worker.py +294 -437
  28. tests/test_elements_worker/{test_classifications.py → test_classification.py} +216 -100
  29. tests/test_elements_worker/test_cli.py +3 -11
  30. tests/test_elements_worker/test_corpus.py +168 -0
  31. tests/test_elements_worker/test_dataset.py +106 -157
  32. tests/test_elements_worker/test_element.py +427 -0
  33. tests/test_elements_worker/test_element_create_multiple.py +715 -0
  34. tests/test_elements_worker/test_element_create_single.py +528 -0
  35. tests/test_elements_worker/test_element_list_children.py +969 -0
  36. tests/test_elements_worker/test_element_list_parents.py +530 -0
  37. tests/test_elements_worker/{test_entities.py → test_entity_create.py} +37 -195
  38. tests/test_elements_worker/test_entity_list_and_check.py +160 -0
  39. tests/test_elements_worker/test_image.py +66 -0
  40. tests/test_elements_worker/test_metadata.py +252 -161
  41. tests/test_elements_worker/test_process.py +89 -0
  42. tests/test_elements_worker/test_task.py +8 -18
  43. tests/test_elements_worker/test_training.py +17 -8
  44. tests/test_elements_worker/test_transcription_create.py +873 -0
  45. tests/test_elements_worker/test_transcription_create_with_elements.py +951 -0
  46. tests/test_elements_worker/test_transcription_list.py +450 -0
  47. tests/test_elements_worker/test_version.py +60 -0
  48. tests/test_elements_worker/test_worker.py +578 -293
  49. tests/test_image.py +542 -209
  50. tests/test_merge.py +1 -2
  51. tests/test_utils.py +89 -4
  52. worker-demo/tests/__init__.py +0 -0
  53. worker-demo/tests/conftest.py +32 -0
  54. worker-demo/tests/test_worker.py +12 -0
  55. worker-demo/worker_demo/__init__.py +6 -0
  56. worker-demo/worker_demo/worker.py +19 -0
  57. arkindex_base_worker-0.3.7rc5.dist-info/RECORD +0 -41
  58. tests/test_elements_worker/test_elements.py +0 -2713
  59. tests/test_elements_worker/test_transcriptions.py +0 -2119
  60. {arkindex_base_worker-0.3.7rc5.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/LICENSE +0 -0
@@ -0,0 +1,530 @@
1
+ from uuid import UUID
2
+
3
+ import pytest
4
+
5
+ from arkindex_worker.cache import (
6
+ CachedElement,
7
+ )
8
+ from arkindex_worker.models import Element
9
+
10
+ from . import BASE_API_CALLS
11
+
12
+
13
+ def test_list_element_parents_wrong_element(mock_elements_worker):
14
+ with pytest.raises(
15
+ AssertionError,
16
+ match="element shouldn't be null and should be an Element or CachedElement",
17
+ ):
18
+ mock_elements_worker.list_element_parents(element=None)
19
+
20
+ with pytest.raises(
21
+ AssertionError,
22
+ match="element shouldn't be null and should be an Element or CachedElement",
23
+ ):
24
+ mock_elements_worker.list_element_parents(element="not element type")
25
+
26
+
27
+ def test_list_element_parents_wrong_folder(mock_elements_worker):
28
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
29
+
30
+ with pytest.raises(AssertionError, match="folder should be of type bool"):
31
+ mock_elements_worker.list_element_parents(
32
+ element=elt,
33
+ folder="not bool",
34
+ )
35
+
36
+
37
+ def test_list_element_parents_wrong_name(mock_elements_worker):
38
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
39
+
40
+ with pytest.raises(AssertionError, match="name should be of type str"):
41
+ mock_elements_worker.list_element_parents(
42
+ element=elt,
43
+ name=1234,
44
+ )
45
+
46
+
47
+ def test_list_element_parents_wrong_recursive(mock_elements_worker):
48
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
49
+
50
+ with pytest.raises(AssertionError, match="recursive should be of type bool"):
51
+ mock_elements_worker.list_element_parents(
52
+ element=elt,
53
+ recursive="not bool",
54
+ )
55
+
56
+
57
+ def test_list_element_parents_wrong_type(mock_elements_worker):
58
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
59
+
60
+ with pytest.raises(AssertionError, match="type should be of type str"):
61
+ mock_elements_worker.list_element_parents(
62
+ element=elt,
63
+ type=1234,
64
+ )
65
+
66
+
67
+ def test_list_element_parents_wrong_with_classes(mock_elements_worker):
68
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
69
+
70
+ with pytest.raises(AssertionError, match="with_classes should be of type bool"):
71
+ mock_elements_worker.list_element_parents(
72
+ element=elt,
73
+ with_classes="not bool",
74
+ )
75
+
76
+
77
+ def test_list_element_parents_wrong_with_corpus(mock_elements_worker):
78
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
79
+
80
+ with pytest.raises(AssertionError, match="with_corpus should be of type bool"):
81
+ mock_elements_worker.list_element_parents(
82
+ element=elt,
83
+ with_corpus="not bool",
84
+ )
85
+
86
+
87
+ def test_list_element_parents_wrong_with_has_children(mock_elements_worker):
88
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
89
+
90
+ with pytest.raises(
91
+ AssertionError, match="with_has_children should be of type bool"
92
+ ):
93
+ mock_elements_worker.list_element_parents(
94
+ element=elt,
95
+ with_has_children="not bool",
96
+ )
97
+
98
+
99
+ def test_list_element_parents_wrong_with_zone(mock_elements_worker):
100
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
101
+
102
+ with pytest.raises(AssertionError, match="with_zone should be of type bool"):
103
+ mock_elements_worker.list_element_parents(
104
+ element=elt,
105
+ with_zone="not bool",
106
+ )
107
+
108
+
109
+ def test_list_element_parents_wrong_with_metadata(mock_elements_worker):
110
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
111
+
112
+ with pytest.raises(AssertionError, match="with_metadata should be of type bool"):
113
+ mock_elements_worker.list_element_parents(
114
+ element=elt,
115
+ with_metadata="not bool",
116
+ )
117
+
118
+
119
+ @pytest.mark.parametrize(
120
+ ("param", "value"),
121
+ [
122
+ ("worker_run", 1234),
123
+ ("transcription_worker_run", 1234),
124
+ ],
125
+ )
126
+ def test_list_element_parents_wrong_worker_run(mock_elements_worker, param, value):
127
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
128
+
129
+ with pytest.raises(AssertionError, match=f"{param} should be of type str or bool"):
130
+ mock_elements_worker.list_element_parents(
131
+ element=elt,
132
+ **{param: value},
133
+ )
134
+
135
+
136
+ @pytest.mark.parametrize(
137
+ ("param", "alternative", "value"),
138
+ [
139
+ ("worker_version", "worker_run", 1234),
140
+ ("transcription_worker_version", "transcription_worker_run", 1234),
141
+ ],
142
+ )
143
+ def test_list_element_parents_wrong_worker_version(
144
+ mock_elements_worker, param, alternative, value
145
+ ):
146
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
147
+
148
+ # WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
149
+ with (
150
+ pytest.deprecated_call(
151
+ match=f"`{param}` usage is deprecated. Consider using `{alternative}` instead."
152
+ ),
153
+ pytest.raises(AssertionError, match=f"{param} should be of type str or bool"),
154
+ ):
155
+ mock_elements_worker.list_element_parents(
156
+ element=elt,
157
+ **{param: value},
158
+ )
159
+
160
+
161
+ @pytest.mark.parametrize(
162
+ "param",
163
+ [
164
+ "worker_run",
165
+ "transcription_worker_run",
166
+ ],
167
+ )
168
+ def test_list_element_parents_wrong_bool_worker_run(mock_elements_worker, param):
169
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
170
+
171
+ with pytest.raises(
172
+ AssertionError, match=f"if of type bool, {param} can only be set to False"
173
+ ):
174
+ mock_elements_worker.list_element_parents(
175
+ element=elt,
176
+ **{param: True},
177
+ )
178
+
179
+
180
+ @pytest.mark.parametrize(
181
+ ("param", "alternative"),
182
+ [
183
+ ("worker_version", "worker_run"),
184
+ ("transcription_worker_version", "transcription_worker_run"),
185
+ ],
186
+ )
187
+ def test_list_element_parents_wrong_bool_worker_version(
188
+ mock_elements_worker, param, alternative
189
+ ):
190
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
191
+
192
+ # WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
193
+ with (
194
+ pytest.deprecated_call(
195
+ match=f"`{param}` usage is deprecated. Consider using `{alternative}` instead."
196
+ ),
197
+ pytest.raises(
198
+ AssertionError, match=f"if of type bool, {param} can only be set to False"
199
+ ),
200
+ ):
201
+ mock_elements_worker.list_element_parents(
202
+ element=elt,
203
+ **{param: True},
204
+ )
205
+
206
+
207
+ def test_list_element_parents_api_error(responses, mock_elements_worker):
208
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
209
+ responses.add(
210
+ responses.GET,
211
+ "http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/parents/",
212
+ status=418,
213
+ )
214
+
215
+ with pytest.raises(
216
+ Exception, match="Stopping pagination as data will be incomplete"
217
+ ):
218
+ next(mock_elements_worker.list_element_parents(element=elt))
219
+
220
+ assert len(responses.calls) == len(BASE_API_CALLS) + 5
221
+ assert [
222
+ (call.request.method, call.request.url) for call in responses.calls
223
+ ] == BASE_API_CALLS + [
224
+ # We do 5 retries
225
+ (
226
+ "GET",
227
+ "http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/parents/",
228
+ ),
229
+ (
230
+ "GET",
231
+ "http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/parents/",
232
+ ),
233
+ (
234
+ "GET",
235
+ "http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/parents/",
236
+ ),
237
+ (
238
+ "GET",
239
+ "http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/parents/",
240
+ ),
241
+ (
242
+ "GET",
243
+ "http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/parents/",
244
+ ),
245
+ ]
246
+
247
+
248
+ def test_list_element_parents(responses, mock_elements_worker):
249
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
250
+ expected_parents = [
251
+ {
252
+ "id": "0000",
253
+ "type": "page",
254
+ "name": "Test",
255
+ "corpus": {},
256
+ "thumbnail_url": None,
257
+ "zone": {},
258
+ "best_classes": None,
259
+ "has_children": None,
260
+ "worker_version_id": None,
261
+ "worker_run_id": None,
262
+ },
263
+ {
264
+ "id": "1111",
265
+ "type": "page",
266
+ "name": "Test 2",
267
+ "corpus": {},
268
+ "thumbnail_url": None,
269
+ "zone": {},
270
+ "best_classes": None,
271
+ "has_children": None,
272
+ "worker_version_id": None,
273
+ "worker_run_id": None,
274
+ },
275
+ {
276
+ "id": "2222",
277
+ "type": "page",
278
+ "name": "Test 3",
279
+ "corpus": {},
280
+ "thumbnail_url": None,
281
+ "zone": {},
282
+ "best_classes": None,
283
+ "has_children": None,
284
+ "worker_version_id": None,
285
+ "worker_run_id": None,
286
+ },
287
+ ]
288
+ responses.add(
289
+ responses.GET,
290
+ "http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/parents/",
291
+ status=200,
292
+ json={
293
+ "count": 3,
294
+ "next": None,
295
+ "results": expected_parents,
296
+ },
297
+ )
298
+
299
+ for idx, parent in enumerate(
300
+ mock_elements_worker.list_element_parents(element=elt)
301
+ ):
302
+ assert parent == expected_parents[idx]
303
+
304
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
305
+ assert [
306
+ (call.request.method, call.request.url) for call in responses.calls
307
+ ] == BASE_API_CALLS + [
308
+ (
309
+ "GET",
310
+ "http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/parents/",
311
+ ),
312
+ ]
313
+
314
+
315
+ def test_list_element_parents_manual_worker_version(responses, mock_elements_worker):
316
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
317
+ expected_parents = [
318
+ {
319
+ "id": "0000",
320
+ "type": "page",
321
+ "name": "Test",
322
+ "corpus": {},
323
+ "thumbnail_url": None,
324
+ "zone": {},
325
+ "best_classes": None,
326
+ "has_children": None,
327
+ "worker_version_id": None,
328
+ "worker_run_id": None,
329
+ }
330
+ ]
331
+ responses.add(
332
+ responses.GET,
333
+ "http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/parents/?worker_version=False",
334
+ status=200,
335
+ json={
336
+ "count": 1,
337
+ "next": None,
338
+ "results": expected_parents,
339
+ },
340
+ )
341
+
342
+ with pytest.deprecated_call(
343
+ match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
344
+ ):
345
+ for idx, parent in enumerate(
346
+ mock_elements_worker.list_element_parents(element=elt, worker_version=False)
347
+ ):
348
+ assert parent == expected_parents[idx]
349
+
350
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
351
+ assert [
352
+ (call.request.method, call.request.url) for call in responses.calls
353
+ ] == BASE_API_CALLS + [
354
+ (
355
+ "GET",
356
+ "http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/parents/?worker_version=False",
357
+ ),
358
+ ]
359
+
360
+
361
+ def test_list_element_parents_manual_worker_run(responses, mock_elements_worker):
362
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
363
+ expected_parents = [
364
+ {
365
+ "id": "0000",
366
+ "type": "page",
367
+ "name": "Test",
368
+ "corpus": {},
369
+ "thumbnail_url": None,
370
+ "zone": {},
371
+ "best_classes": None,
372
+ "has_children": None,
373
+ "worker_version_id": None,
374
+ "worker_run_id": None,
375
+ }
376
+ ]
377
+ responses.add(
378
+ responses.GET,
379
+ "http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/parents/?worker_run=False",
380
+ status=200,
381
+ json={
382
+ "count": 1,
383
+ "next": None,
384
+ "results": expected_parents,
385
+ },
386
+ )
387
+
388
+ for idx, parent in enumerate(
389
+ mock_elements_worker.list_element_parents(element=elt, worker_run=False)
390
+ ):
391
+ assert parent == expected_parents[idx]
392
+
393
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
394
+ assert [
395
+ (call.request.method, call.request.url) for call in responses.calls
396
+ ] == BASE_API_CALLS + [
397
+ (
398
+ "GET",
399
+ "http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/parents/?worker_run=False",
400
+ ),
401
+ ]
402
+
403
+
404
+ def test_list_element_parents_with_cache_unhandled_param(
405
+ mock_elements_worker_with_cache,
406
+ ):
407
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
408
+
409
+ with pytest.raises(
410
+ AssertionError,
411
+ match="When using the local cache, you can only filter by 'type' and/or 'worker_version' and/or 'worker_run'",
412
+ ):
413
+ mock_elements_worker_with_cache.list_element_parents(
414
+ element=elt, with_corpus=True
415
+ )
416
+
417
+
418
+ @pytest.mark.usefixtures("_mock_cached_elements")
419
+ @pytest.mark.parametrize(
420
+ ("filters", "expected_id"),
421
+ [
422
+ # Filter on element
423
+ (
424
+ {
425
+ "element": CachedElement(id="11111111-1111-1111-1111-111111111111"),
426
+ },
427
+ "12341234-1234-1234-1234-123412341234",
428
+ ),
429
+ # Filter on element and double_page
430
+ (
431
+ {
432
+ "element": CachedElement(id="22222222-2222-2222-2222-222222222222"),
433
+ "type": "double_page",
434
+ },
435
+ "12341234-1234-1234-1234-123412341234",
436
+ ),
437
+ # Filter on element and worker run
438
+ (
439
+ {
440
+ "element": CachedElement(id="22222222-2222-2222-2222-222222222222"),
441
+ "worker_run": "56785678-5678-5678-5678-567856785678",
442
+ },
443
+ "12341234-1234-1234-1234-123412341234",
444
+ ),
445
+ # Filter on element, manual worker run
446
+ (
447
+ {
448
+ "element": CachedElement(id="12341234-1234-1234-1234-123412341234"),
449
+ "worker_run": False,
450
+ },
451
+ "99999999-9999-9999-9999-999999999999",
452
+ ),
453
+ ],
454
+ )
455
+ def test_list_element_parents_with_cache(
456
+ responses,
457
+ mock_elements_worker_with_cache,
458
+ filters,
459
+ expected_id,
460
+ ):
461
+ # Check we have 5 elements already present in database
462
+ assert CachedElement.select().count() == 5
463
+
464
+ # Query database through cache
465
+ elements = mock_elements_worker_with_cache.list_element_parents(**filters)
466
+ assert elements.count() == 1
467
+ for parent in elements.order_by("id"):
468
+ assert parent.id == UUID(expected_id)
469
+
470
+ # Check the worker never hits the API for elements
471
+ assert len(responses.calls) == len(BASE_API_CALLS)
472
+ assert [
473
+ (call.request.method, call.request.url) for call in responses.calls
474
+ ] == BASE_API_CALLS
475
+
476
+
477
+ @pytest.mark.usefixtures("_mock_cached_elements")
478
+ @pytest.mark.parametrize(
479
+ ("filters", "expected_id"),
480
+ [
481
+ # Filter on element and worker version
482
+ (
483
+ {
484
+ "element": CachedElement(id="33333333-3333-3333-3333-333333333333"),
485
+ "worker_version": "56785678-5678-5678-5678-567856785678",
486
+ },
487
+ "12341234-1234-1234-1234-123412341234",
488
+ ),
489
+ # Filter on element, type double_page and worker version
490
+ (
491
+ {
492
+ "element": CachedElement(id="11111111-1111-1111-1111-111111111111"),
493
+ "type": "double_page",
494
+ "worker_version": "56785678-5678-5678-5678-567856785678",
495
+ },
496
+ "12341234-1234-1234-1234-123412341234",
497
+ ),
498
+ # Filter on element, manual worker version
499
+ (
500
+ {
501
+ "element": CachedElement(id="12341234-1234-1234-1234-123412341234"),
502
+ "worker_version": False,
503
+ },
504
+ "99999999-9999-9999-9999-999999999999",
505
+ ),
506
+ ],
507
+ )
508
+ def test_list_element_parents_with_cache_deprecation(
509
+ responses,
510
+ mock_elements_worker_with_cache,
511
+ filters,
512
+ expected_id,
513
+ ):
514
+ # Check we have 5 elements already present in database
515
+ assert CachedElement.select().count() == 5
516
+
517
+ with pytest.deprecated_call(
518
+ match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
519
+ ):
520
+ # Query database through cache
521
+ elements = mock_elements_worker_with_cache.list_element_parents(**filters)
522
+ assert elements.count() == 1
523
+ for parent in elements.order_by("id"):
524
+ assert parent.id == UUID(expected_id)
525
+
526
+ # Check the worker never hits the API for elements
527
+ assert len(responses.calls) == len(BASE_API_CALLS)
528
+ assert [
529
+ (call.request.method, call.request.url) for call in responses.calls
530
+ ] == BASE_API_CALLS