arkindex-base-worker 0.3.7rc4__py3-none-any.whl → 0.5.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {arkindex_base_worker-0.3.7rc4.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/METADATA +18 -19
  2. arkindex_base_worker-0.5.0a1.dist-info/RECORD +61 -0
  3. {arkindex_base_worker-0.3.7rc4.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/WHEEL +1 -1
  4. {arkindex_base_worker-0.3.7rc4.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/top_level.txt +2 -0
  5. arkindex_worker/cache.py +1 -1
  6. arkindex_worker/image.py +167 -2
  7. arkindex_worker/models.py +18 -0
  8. arkindex_worker/utils.py +98 -4
  9. arkindex_worker/worker/__init__.py +117 -218
  10. arkindex_worker/worker/base.py +39 -46
  11. arkindex_worker/worker/classification.py +45 -29
  12. arkindex_worker/worker/corpus.py +86 -0
  13. arkindex_worker/worker/dataset.py +89 -26
  14. arkindex_worker/worker/element.py +352 -91
  15. arkindex_worker/worker/entity.py +13 -11
  16. arkindex_worker/worker/image.py +21 -0
  17. arkindex_worker/worker/metadata.py +26 -16
  18. arkindex_worker/worker/process.py +92 -0
  19. arkindex_worker/worker/task.py +5 -4
  20. arkindex_worker/worker/training.py +25 -10
  21. arkindex_worker/worker/transcription.py +89 -68
  22. arkindex_worker/worker/version.py +3 -1
  23. hooks/pre_gen_project.py +3 -0
  24. tests/__init__.py +8 -0
  25. tests/conftest.py +47 -58
  26. tests/test_base_worker.py +212 -12
  27. tests/test_dataset_worker.py +294 -437
  28. tests/test_elements_worker/{test_classifications.py → test_classification.py} +313 -200
  29. tests/test_elements_worker/test_cli.py +3 -11
  30. tests/test_elements_worker/test_corpus.py +168 -0
  31. tests/test_elements_worker/test_dataset.py +106 -157
  32. tests/test_elements_worker/test_element.py +427 -0
  33. tests/test_elements_worker/test_element_create_multiple.py +715 -0
  34. tests/test_elements_worker/test_element_create_single.py +528 -0
  35. tests/test_elements_worker/test_element_list_children.py +969 -0
  36. tests/test_elements_worker/test_element_list_parents.py +530 -0
  37. tests/test_elements_worker/{test_entities.py → test_entity_create.py} +37 -195
  38. tests/test_elements_worker/test_entity_list_and_check.py +160 -0
  39. tests/test_elements_worker/test_image.py +66 -0
  40. tests/test_elements_worker/test_metadata.py +252 -161
  41. tests/test_elements_worker/test_process.py +89 -0
  42. tests/test_elements_worker/test_task.py +8 -18
  43. tests/test_elements_worker/test_training.py +17 -8
  44. tests/test_elements_worker/test_transcription_create.py +873 -0
  45. tests/test_elements_worker/test_transcription_create_with_elements.py +951 -0
  46. tests/test_elements_worker/test_transcription_list.py +450 -0
  47. tests/test_elements_worker/test_version.py +60 -0
  48. tests/test_elements_worker/test_worker.py +578 -293
  49. tests/test_image.py +542 -209
  50. tests/test_merge.py +1 -2
  51. tests/test_utils.py +89 -4
  52. worker-demo/tests/__init__.py +0 -0
  53. worker-demo/tests/conftest.py +32 -0
  54. worker-demo/tests/test_worker.py +12 -0
  55. worker-demo/worker_demo/__init__.py +6 -0
  56. worker-demo/worker_demo/worker.py +19 -0
  57. arkindex_base_worker-0.3.7rc4.dist-info/RECORD +0 -41
  58. tests/test_elements_worker/test_elements.py +0 -2713
  59. tests/test_elements_worker/test_transcriptions.py +0 -2119
  60. {arkindex_base_worker-0.3.7rc4.dist-info → arkindex_base_worker-0.5.0a1.dist-info}/LICENSE +0 -0
@@ -0,0 +1,969 @@
1
+ from uuid import UUID
2
+
3
+ import pytest
4
+
5
+ from arkindex_worker.cache import (
6
+ CachedElement,
7
+ )
8
+ from arkindex_worker.models import Element
9
+
10
+ from . import BASE_API_CALLS
11
+
12
+
13
+ def test_list_elements_wrong_folder(mock_elements_worker):
14
+ with pytest.raises(AssertionError, match="folder should be of type bool"):
15
+ mock_elements_worker.list_elements(folder="not bool")
16
+
17
+
18
+ def test_list_elements_wrong_name(mock_elements_worker):
19
+ with pytest.raises(AssertionError, match="name should be of type str"):
20
+ mock_elements_worker.list_elements(name=1234)
21
+
22
+
23
+ def test_list_elements_wrong_top_level(mock_elements_worker):
24
+ with pytest.raises(AssertionError, match="top_level should be of type bool"):
25
+ mock_elements_worker.list_elements(top_level="not bool")
26
+
27
+
28
+ def test_list_elements_wrong_type(mock_elements_worker):
29
+ with pytest.raises(AssertionError, match="type should be of type str"):
30
+ mock_elements_worker.list_elements(type=1234)
31
+
32
+
33
+ def test_list_elements_wrong_with_classes(mock_elements_worker):
34
+ with pytest.raises(AssertionError, match="with_classes should be of type bool"):
35
+ mock_elements_worker.list_elements(with_classes="not bool")
36
+
37
+
38
+ def test_list_elements_wrong_with_corpus(mock_elements_worker):
39
+ with pytest.raises(AssertionError, match="with_corpus should be of type bool"):
40
+ mock_elements_worker.list_elements(with_corpus="not bool")
41
+
42
+
43
+ def test_list_elements_wrong_with_has_children(mock_elements_worker):
44
+ with pytest.raises(
45
+ AssertionError, match="with_has_children should be of type bool"
46
+ ):
47
+ mock_elements_worker.list_elements(with_has_children="not bool")
48
+
49
+
50
+ def test_list_elements_wrong_with_zone(mock_elements_worker):
51
+ with pytest.raises(AssertionError, match="with_zone should be of type bool"):
52
+ mock_elements_worker.list_elements(with_zone="not bool")
53
+
54
+
55
+ def test_list_elements_wrong_with_metadata(mock_elements_worker):
56
+ with pytest.raises(AssertionError, match="with_metadata should be of type bool"):
57
+ mock_elements_worker.list_elements(with_metadata="not bool")
58
+
59
+
60
+ @pytest.mark.parametrize(
61
+ ("param", "value"),
62
+ [
63
+ ("worker_run", 1234),
64
+ ("transcription_worker_run", 1234),
65
+ ],
66
+ )
67
+ def test_list_elements_wrong_worker_run(mock_elements_worker, param, value):
68
+ with pytest.raises(AssertionError, match=f"{param} should be of type str or bool"):
69
+ mock_elements_worker.list_elements(**{param: value})
70
+
71
+
72
+ @pytest.mark.parametrize(
73
+ ("param", "alternative", "value"),
74
+ [
75
+ ("worker_version", "worker_run", 1234),
76
+ ("transcription_worker_version", "transcription_worker_run", 1234),
77
+ ],
78
+ )
79
+ def test_list_elements_wrong_worker_version(
80
+ mock_elements_worker, param, alternative, value
81
+ ):
82
+ # WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
83
+ with (
84
+ pytest.deprecated_call(
85
+ match=f"`{param}` usage is deprecated. Consider using `{alternative}` instead."
86
+ ),
87
+ pytest.raises(AssertionError, match=f"{param} should be of type str or bool"),
88
+ ):
89
+ mock_elements_worker.list_elements(**{param: value})
90
+
91
+
92
+ @pytest.mark.parametrize(
93
+ "param",
94
+ [
95
+ "worker_run",
96
+ "transcription_worker_run",
97
+ ],
98
+ )
99
+ def test_list_elements_wrong_bool_worker_run(mock_elements_worker, param):
100
+ with pytest.raises(
101
+ AssertionError, match=f"if of type bool, {param} can only be set to False"
102
+ ):
103
+ mock_elements_worker.list_elements(**{param: True})
104
+
105
+
106
+ @pytest.mark.parametrize(
107
+ ("param", "alternative"),
108
+ [
109
+ ("worker_version", "worker_run"),
110
+ ("transcription_worker_version", "transcription_worker_run"),
111
+ ],
112
+ )
113
+ def test_list_elements_wrong_bool_worker_version(
114
+ mock_elements_worker, param, alternative
115
+ ):
116
+ # WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
117
+ with (
118
+ pytest.deprecated_call(
119
+ match=f"`{param}` usage is deprecated. Consider using `{alternative}` instead."
120
+ ),
121
+ pytest.raises(
122
+ AssertionError, match=f"if of type bool, {param} can only be set to False"
123
+ ),
124
+ ):
125
+ mock_elements_worker.list_elements(**{param: True})
126
+
127
+
128
+ def test_list_elements_api_error(responses, mock_elements_worker):
129
+ responses.add(
130
+ responses.GET,
131
+ f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/",
132
+ status=418,
133
+ )
134
+
135
+ with pytest.raises(
136
+ Exception, match="Stopping pagination as data will be incomplete"
137
+ ):
138
+ next(mock_elements_worker.list_elements())
139
+
140
+ assert len(responses.calls) == len(BASE_API_CALLS) + 5
141
+ assert [
142
+ (call.request.method, call.request.url) for call in responses.calls
143
+ ] == BASE_API_CALLS + [
144
+ # We do 5 retries
145
+ (
146
+ "GET",
147
+ f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/",
148
+ ),
149
+ (
150
+ "GET",
151
+ f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/",
152
+ ),
153
+ (
154
+ "GET",
155
+ f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/",
156
+ ),
157
+ (
158
+ "GET",
159
+ f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/",
160
+ ),
161
+ (
162
+ "GET",
163
+ f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/",
164
+ ),
165
+ ]
166
+
167
+
168
+ def test_list_elements(responses, mock_elements_worker):
169
+ expected_children = [
170
+ {
171
+ "id": "0000",
172
+ "type": "page",
173
+ "name": "Test",
174
+ "corpus": {},
175
+ "thumbnail_url": None,
176
+ "zone": {},
177
+ "best_classes": None,
178
+ "has_children": None,
179
+ "worker_version_id": None,
180
+ "worker_run_id": None,
181
+ },
182
+ {
183
+ "id": "1111",
184
+ "type": "page",
185
+ "name": "Test 2",
186
+ "corpus": {},
187
+ "thumbnail_url": None,
188
+ "zone": {},
189
+ "best_classes": None,
190
+ "has_children": None,
191
+ "worker_version_id": None,
192
+ "worker_run_id": None,
193
+ },
194
+ {
195
+ "id": "2222",
196
+ "type": "page",
197
+ "name": "Test 3",
198
+ "corpus": {},
199
+ "thumbnail_url": None,
200
+ "zone": {},
201
+ "best_classes": None,
202
+ "has_children": None,
203
+ "worker_version_id": None,
204
+ "worker_run_id": None,
205
+ },
206
+ ]
207
+ responses.add(
208
+ responses.GET,
209
+ f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/",
210
+ status=200,
211
+ json={
212
+ "count": 3,
213
+ "next": None,
214
+ "results": expected_children,
215
+ },
216
+ )
217
+
218
+ for idx, child in enumerate(mock_elements_worker.list_elements()):
219
+ assert child == expected_children[idx]
220
+
221
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
222
+ assert [
223
+ (call.request.method, call.request.url) for call in responses.calls
224
+ ] == BASE_API_CALLS + [
225
+ (
226
+ "GET",
227
+ f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/",
228
+ ),
229
+ ]
230
+
231
+
232
+ def test_list_elements_manual_worker_version(responses, mock_elements_worker):
233
+ expected_children = [
234
+ {
235
+ "id": "0000",
236
+ "type": "page",
237
+ "name": "Test",
238
+ "corpus": {},
239
+ "thumbnail_url": None,
240
+ "zone": {},
241
+ "best_classes": None,
242
+ "has_children": None,
243
+ "worker_version_id": None,
244
+ "worker_run_id": None,
245
+ }
246
+ ]
247
+ responses.add(
248
+ responses.GET,
249
+ f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/?worker_version=False",
250
+ status=200,
251
+ json={
252
+ "count": 1,
253
+ "next": None,
254
+ "results": expected_children,
255
+ },
256
+ )
257
+
258
+ with pytest.deprecated_call(
259
+ match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
260
+ ):
261
+ for idx, child in enumerate(
262
+ mock_elements_worker.list_elements(worker_version=False)
263
+ ):
264
+ assert child == expected_children[idx]
265
+
266
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
267
+ assert [
268
+ (call.request.method, call.request.url) for call in responses.calls
269
+ ] == BASE_API_CALLS + [
270
+ (
271
+ "GET",
272
+ f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/?worker_version=False",
273
+ ),
274
+ ]
275
+
276
+
277
+ def test_list_elements_manual_worker_run(responses, mock_elements_worker):
278
+ expected_children = [
279
+ {
280
+ "id": "0000",
281
+ "type": "page",
282
+ "name": "Test",
283
+ "corpus": {},
284
+ "thumbnail_url": None,
285
+ "zone": {},
286
+ "best_classes": None,
287
+ "has_children": None,
288
+ "worker_version_id": None,
289
+ "worker_run_id": None,
290
+ }
291
+ ]
292
+ responses.add(
293
+ responses.GET,
294
+ f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/?worker_run=False",
295
+ status=200,
296
+ json={
297
+ "count": 1,
298
+ "next": None,
299
+ "results": expected_children,
300
+ },
301
+ )
302
+
303
+ for idx, child in enumerate(mock_elements_worker.list_elements(worker_run=False)):
304
+ assert child == expected_children[idx]
305
+
306
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
307
+ assert [
308
+ (call.request.method, call.request.url) for call in responses.calls
309
+ ] == BASE_API_CALLS + [
310
+ (
311
+ "GET",
312
+ f"http://testserver/api/v1/corpus/{mock_elements_worker.corpus_id}/elements/?worker_run=False",
313
+ ),
314
+ ]
315
+
316
+
317
+ def test_list_elements_with_cache_unhandled_param(mock_elements_worker_with_cache):
318
+ with pytest.raises(
319
+ AssertionError,
320
+ match="When using the local cache, you can only filter by 'type' and/or 'worker_version' and/or 'worker_run'",
321
+ ):
322
+ mock_elements_worker_with_cache.list_elements(with_corpus=True)
323
+
324
+
325
+ @pytest.mark.usefixtures("_mock_cached_elements")
326
+ @pytest.mark.parametrize(
327
+ ("filters", "expected_ids"),
328
+ [
329
+ # Filter on element should give all elements inserted
330
+ (
331
+ {},
332
+ (
333
+ "99999999-9999-9999-9999-999999999999",
334
+ "12341234-1234-1234-1234-123412341234",
335
+ "11111111-1111-1111-1111-111111111111",
336
+ "22222222-2222-2222-2222-222222222222",
337
+ "33333333-3333-3333-3333-333333333333",
338
+ ),
339
+ ),
340
+ # Filter on element and page should give the second element
341
+ (
342
+ {"type": "page"},
343
+ ("22222222-2222-2222-2222-222222222222",),
344
+ ),
345
+ # Filter on element and worker run should give second
346
+ (
347
+ {
348
+ "worker_run": "56785678-5678-5678-5678-567856785678",
349
+ },
350
+ (
351
+ "12341234-1234-1234-1234-123412341234",
352
+ "22222222-2222-2222-2222-222222222222",
353
+ ),
354
+ ),
355
+ # Filter on element, manual worker run should give first and third
356
+ (
357
+ {"worker_run": False},
358
+ (
359
+ "99999999-9999-9999-9999-999999999999",
360
+ "11111111-1111-1111-1111-111111111111",
361
+ "33333333-3333-3333-3333-333333333333",
362
+ ),
363
+ ),
364
+ ],
365
+ )
366
+ def test_list_elements_with_cache(
367
+ responses, mock_elements_worker_with_cache, filters, expected_ids
368
+ ):
369
+ # Check we have 5 elements already present in database
370
+ assert CachedElement.select().count() == 5
371
+
372
+ # Query database through cache
373
+ elements = mock_elements_worker_with_cache.list_elements(**filters)
374
+ assert elements.count() == len(expected_ids)
375
+ for child, expected_id in zip(elements.order_by("id"), expected_ids, strict=True):
376
+ assert child.id == UUID(expected_id)
377
+
378
+ # Check the worker never hits the API for elements
379
+ assert len(responses.calls) == len(BASE_API_CALLS)
380
+ assert [
381
+ (call.request.method, call.request.url) for call in responses.calls
382
+ ] == BASE_API_CALLS
383
+
384
+
385
+ @pytest.mark.usefixtures("_mock_cached_elements")
386
+ @pytest.mark.parametrize(
387
+ ("filters", "expected_ids"),
388
+ [
389
+ # Filter on element and worker version
390
+ (
391
+ {
392
+ "worker_version": "56785678-5678-5678-5678-567856785678",
393
+ },
394
+ (
395
+ "12341234-1234-1234-1234-123412341234",
396
+ "11111111-1111-1111-1111-111111111111",
397
+ "22222222-2222-2222-2222-222222222222",
398
+ ),
399
+ ),
400
+ # Filter on element, type double_page and worker version
401
+ (
402
+ {"type": "page", "worker_version": "56785678-5678-5678-5678-567856785678"},
403
+ ("22222222-2222-2222-2222-222222222222",),
404
+ ),
405
+ # Filter on element, manual worker version
406
+ (
407
+ {"worker_version": False},
408
+ (
409
+ "99999999-9999-9999-9999-999999999999",
410
+ "33333333-3333-3333-3333-333333333333",
411
+ ),
412
+ ),
413
+ ],
414
+ )
415
+ def test_list_elements_with_cache_deprecation(
416
+ responses,
417
+ mock_elements_worker_with_cache,
418
+ filters,
419
+ expected_ids,
420
+ ):
421
+ # Check we have 5 elements already present in database
422
+ assert CachedElement.select().count() == 5
423
+
424
+ with pytest.deprecated_call(
425
+ match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
426
+ ):
427
+ # Query database through cache
428
+ elements = mock_elements_worker_with_cache.list_elements(**filters)
429
+ assert elements.count() == len(expected_ids)
430
+ for child, expected_id in zip(elements.order_by("id"), expected_ids, strict=True):
431
+ assert child.id == UUID(expected_id)
432
+
433
+ # Check the worker never hits the API for elements
434
+ assert len(responses.calls) == len(BASE_API_CALLS)
435
+ assert [
436
+ (call.request.method, call.request.url) for call in responses.calls
437
+ ] == BASE_API_CALLS
438
+
439
+
440
+ def test_list_element_children_wrong_element(mock_elements_worker):
441
+ with pytest.raises(
442
+ AssertionError,
443
+ match="element shouldn't be null and should be an Element or CachedElement",
444
+ ):
445
+ mock_elements_worker.list_element_children(element=None)
446
+
447
+ with pytest.raises(
448
+ AssertionError,
449
+ match="element shouldn't be null and should be an Element or CachedElement",
450
+ ):
451
+ mock_elements_worker.list_element_children(element="not element type")
452
+
453
+
454
+ def test_list_element_children_wrong_folder(mock_elements_worker):
455
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
456
+
457
+ with pytest.raises(AssertionError, match="folder should be of type bool"):
458
+ mock_elements_worker.list_element_children(
459
+ element=elt,
460
+ folder="not bool",
461
+ )
462
+
463
+
464
+ def test_list_element_children_wrong_name(mock_elements_worker):
465
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
466
+
467
+ with pytest.raises(AssertionError, match="name should be of type str"):
468
+ mock_elements_worker.list_element_children(
469
+ element=elt,
470
+ name=1234,
471
+ )
472
+
473
+
474
+ def test_list_element_children_wrong_recursive(mock_elements_worker):
475
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
476
+
477
+ with pytest.raises(AssertionError, match="recursive should be of type bool"):
478
+ mock_elements_worker.list_element_children(
479
+ element=elt,
480
+ recursive="not bool",
481
+ )
482
+
483
+
484
+ def test_list_element_children_wrong_type(mock_elements_worker):
485
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
486
+
487
+ with pytest.raises(AssertionError, match="type should be of type str"):
488
+ mock_elements_worker.list_element_children(
489
+ element=elt,
490
+ type=1234,
491
+ )
492
+
493
+
494
+ def test_list_element_children_wrong_with_classes(mock_elements_worker):
495
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
496
+
497
+ with pytest.raises(AssertionError, match="with_classes should be of type bool"):
498
+ mock_elements_worker.list_element_children(
499
+ element=elt,
500
+ with_classes="not bool",
501
+ )
502
+
503
+
504
+ def test_list_element_children_wrong_with_corpus(mock_elements_worker):
505
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
506
+
507
+ with pytest.raises(AssertionError, match="with_corpus should be of type bool"):
508
+ mock_elements_worker.list_element_children(
509
+ element=elt,
510
+ with_corpus="not bool",
511
+ )
512
+
513
+
514
+ def test_list_element_children_wrong_with_has_children(mock_elements_worker):
515
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
516
+
517
+ with pytest.raises(
518
+ AssertionError, match="with_has_children should be of type bool"
519
+ ):
520
+ mock_elements_worker.list_element_children(
521
+ element=elt,
522
+ with_has_children="not bool",
523
+ )
524
+
525
+
526
+ def test_list_element_children_wrong_with_zone(mock_elements_worker):
527
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
528
+
529
+ with pytest.raises(AssertionError, match="with_zone should be of type bool"):
530
+ mock_elements_worker.list_element_children(
531
+ element=elt,
532
+ with_zone="not bool",
533
+ )
534
+
535
+
536
+ def test_list_element_children_wrong_with_metadata(mock_elements_worker):
537
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
538
+
539
+ with pytest.raises(AssertionError, match="with_metadata should be of type bool"):
540
+ mock_elements_worker.list_element_children(
541
+ element=elt,
542
+ with_metadata="not bool",
543
+ )
544
+
545
+
546
+ @pytest.mark.parametrize(
547
+ ("param", "value"),
548
+ [
549
+ ("worker_run", 1234),
550
+ ("transcription_worker_run", 1234),
551
+ ],
552
+ )
553
+ def test_list_element_children_wrong_worker_run(mock_elements_worker, param, value):
554
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
555
+
556
+ with pytest.raises(AssertionError, match=f"{param} should be of type str or bool"):
557
+ mock_elements_worker.list_element_children(
558
+ element=elt,
559
+ **{param: value},
560
+ )
561
+
562
+
563
+ @pytest.mark.parametrize(
564
+ ("param", "alternative", "value"),
565
+ [
566
+ ("worker_version", "worker_run", 1234),
567
+ ("transcription_worker_version", "transcription_worker_run", 1234),
568
+ ],
569
+ )
570
+ def test_list_element_children_wrong_worker_version(
571
+ mock_elements_worker, param, alternative, value
572
+ ):
573
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
574
+
575
+ # WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
576
+ with (
577
+ pytest.deprecated_call(
578
+ match=f"`{param}` usage is deprecated. Consider using `{alternative}` instead."
579
+ ),
580
+ pytest.raises(AssertionError, match=f"{param} should be of type str or bool"),
581
+ ):
582
+ mock_elements_worker.list_element_children(
583
+ element=elt,
584
+ **{param: value},
585
+ )
586
+
587
+
588
+ @pytest.mark.parametrize(
589
+ "param",
590
+ [
591
+ "worker_run",
592
+ "transcription_worker_run",
593
+ ],
594
+ )
595
+ def test_list_element_children_wrong_bool_worker_run(mock_elements_worker, param):
596
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
597
+
598
+ with pytest.raises(
599
+ AssertionError, match=f"if of type bool, {param} can only be set to False"
600
+ ):
601
+ mock_elements_worker.list_element_children(
602
+ element=elt,
603
+ **{param: True},
604
+ )
605
+
606
+
607
+ @pytest.mark.parametrize(
608
+ ("param", "alternative"),
609
+ [
610
+ ("worker_version", "worker_run"),
611
+ ("transcription_worker_version", "transcription_worker_run"),
612
+ ],
613
+ )
614
+ def test_list_element_children_wrong_bool_worker_version(
615
+ mock_elements_worker, param, alternative
616
+ ):
617
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
618
+
619
+ # WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
620
+ with (
621
+ pytest.deprecated_call(
622
+ match=f"`{param}` usage is deprecated. Consider using `{alternative}` instead."
623
+ ),
624
+ pytest.raises(
625
+ AssertionError, match=f"if of type bool, {param} can only be set to False"
626
+ ),
627
+ ):
628
+ mock_elements_worker.list_element_children(
629
+ element=elt,
630
+ **{param: True},
631
+ )
632
+
633
+
634
+ def test_list_element_children_api_error(responses, mock_elements_worker):
635
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
636
+ responses.add(
637
+ responses.GET,
638
+ "http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/children/",
639
+ status=418,
640
+ )
641
+
642
+ with pytest.raises(
643
+ Exception, match="Stopping pagination as data will be incomplete"
644
+ ):
645
+ next(mock_elements_worker.list_element_children(element=elt))
646
+
647
+ assert len(responses.calls) == len(BASE_API_CALLS) + 5
648
+ assert [
649
+ (call.request.method, call.request.url) for call in responses.calls
650
+ ] == BASE_API_CALLS + [
651
+ # We do 5 retries
652
+ (
653
+ "GET",
654
+ "http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/children/",
655
+ ),
656
+ (
657
+ "GET",
658
+ "http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/children/",
659
+ ),
660
+ (
661
+ "GET",
662
+ "http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/children/",
663
+ ),
664
+ (
665
+ "GET",
666
+ "http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/children/",
667
+ ),
668
+ (
669
+ "GET",
670
+ "http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/children/",
671
+ ),
672
+ ]
673
+
674
+
675
+ def test_list_element_children(responses, mock_elements_worker):
676
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
677
+ expected_children = [
678
+ {
679
+ "id": "0000",
680
+ "type": "page",
681
+ "name": "Test",
682
+ "corpus": {},
683
+ "thumbnail_url": None,
684
+ "zone": {},
685
+ "best_classes": None,
686
+ "has_children": None,
687
+ "worker_version_id": None,
688
+ "worker_run_id": None,
689
+ },
690
+ {
691
+ "id": "1111",
692
+ "type": "page",
693
+ "name": "Test 2",
694
+ "corpus": {},
695
+ "thumbnail_url": None,
696
+ "zone": {},
697
+ "best_classes": None,
698
+ "has_children": None,
699
+ "worker_version_id": None,
700
+ "worker_run_id": None,
701
+ },
702
+ {
703
+ "id": "2222",
704
+ "type": "page",
705
+ "name": "Test 3",
706
+ "corpus": {},
707
+ "thumbnail_url": None,
708
+ "zone": {},
709
+ "best_classes": None,
710
+ "has_children": None,
711
+ "worker_version_id": None,
712
+ "worker_run_id": None,
713
+ },
714
+ ]
715
+ responses.add(
716
+ responses.GET,
717
+ "http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/children/",
718
+ status=200,
719
+ json={
720
+ "count": 3,
721
+ "next": None,
722
+ "results": expected_children,
723
+ },
724
+ )
725
+
726
+ for idx, child in enumerate(
727
+ mock_elements_worker.list_element_children(element=elt)
728
+ ):
729
+ assert child == expected_children[idx]
730
+
731
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
732
+ assert [
733
+ (call.request.method, call.request.url) for call in responses.calls
734
+ ] == BASE_API_CALLS + [
735
+ (
736
+ "GET",
737
+ "http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/children/",
738
+ ),
739
+ ]
740
+
741
+
742
+ def test_list_element_children_manual_worker_version(responses, mock_elements_worker):
743
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
744
+ expected_children = [
745
+ {
746
+ "id": "0000",
747
+ "type": "page",
748
+ "name": "Test",
749
+ "corpus": {},
750
+ "thumbnail_url": None,
751
+ "zone": {},
752
+ "best_classes": None,
753
+ "has_children": None,
754
+ "worker_version_id": None,
755
+ "worker_run_id": None,
756
+ }
757
+ ]
758
+ responses.add(
759
+ responses.GET,
760
+ "http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/children/?worker_version=False",
761
+ status=200,
762
+ json={
763
+ "count": 1,
764
+ "next": None,
765
+ "results": expected_children,
766
+ },
767
+ )
768
+
769
+ with pytest.deprecated_call(
770
+ match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
771
+ ):
772
+ for idx, child in enumerate(
773
+ mock_elements_worker.list_element_children(
774
+ element=elt, worker_version=False
775
+ )
776
+ ):
777
+ assert child == expected_children[idx]
778
+
779
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
780
+ assert [
781
+ (call.request.method, call.request.url) for call in responses.calls
782
+ ] == BASE_API_CALLS + [
783
+ (
784
+ "GET",
785
+ "http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/children/?worker_version=False",
786
+ ),
787
+ ]
788
+
789
+
790
+ def test_list_element_children_manual_worker_run(responses, mock_elements_worker):
791
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
792
+ expected_children = [
793
+ {
794
+ "id": "0000",
795
+ "type": "page",
796
+ "name": "Test",
797
+ "corpus": {},
798
+ "thumbnail_url": None,
799
+ "zone": {},
800
+ "best_classes": None,
801
+ "has_children": None,
802
+ "worker_version_id": None,
803
+ "worker_run_id": None,
804
+ }
805
+ ]
806
+ responses.add(
807
+ responses.GET,
808
+ "http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/children/?worker_run=False",
809
+ status=200,
810
+ json={
811
+ "count": 1,
812
+ "next": None,
813
+ "results": expected_children,
814
+ },
815
+ )
816
+
817
+ for idx, child in enumerate(
818
+ mock_elements_worker.list_element_children(element=elt, worker_run=False)
819
+ ):
820
+ assert child == expected_children[idx]
821
+
822
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
823
+ assert [
824
+ (call.request.method, call.request.url) for call in responses.calls
825
+ ] == BASE_API_CALLS + [
826
+ (
827
+ "GET",
828
+ "http://testserver/api/v1/elements/12341234-1234-1234-1234-123412341234/children/?worker_run=False",
829
+ ),
830
+ ]
831
+
832
+
833
+ def test_list_element_children_with_cache_unhandled_param(
834
+ mock_elements_worker_with_cache,
835
+ ):
836
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
837
+
838
+ with pytest.raises(
839
+ AssertionError,
840
+ match="When using the local cache, you can only filter by 'type' and/or 'worker_version' and/or 'worker_run'",
841
+ ):
842
+ mock_elements_worker_with_cache.list_element_children(
843
+ element=elt, with_corpus=True
844
+ )
845
+
846
+
847
+ @pytest.mark.usefixtures("_mock_cached_elements")
848
+ @pytest.mark.parametrize(
849
+ ("filters", "expected_ids"),
850
+ [
851
+ # Filter on element should give all elements inserted
852
+ (
853
+ {
854
+ "element": CachedElement(id="12341234-1234-1234-1234-123412341234"),
855
+ },
856
+ (
857
+ "11111111-1111-1111-1111-111111111111",
858
+ "22222222-2222-2222-2222-222222222222",
859
+ "33333333-3333-3333-3333-333333333333",
860
+ ),
861
+ ),
862
+ # Filter on element and page should give the second element
863
+ (
864
+ {
865
+ "element": CachedElement(id="12341234-1234-1234-1234-123412341234"),
866
+ "type": "page",
867
+ },
868
+ ("22222222-2222-2222-2222-222222222222",),
869
+ ),
870
+ # Filter on element and worker run should give second
871
+ (
872
+ {
873
+ "element": CachedElement(id="12341234-1234-1234-1234-123412341234"),
874
+ "worker_run": "56785678-5678-5678-5678-567856785678",
875
+ },
876
+ ("22222222-2222-2222-2222-222222222222",),
877
+ ),
878
+ # Filter on element, manual worker run should give first and third
879
+ (
880
+ {
881
+ "element": CachedElement(id="12341234-1234-1234-1234-123412341234"),
882
+ "worker_run": False,
883
+ },
884
+ (
885
+ "11111111-1111-1111-1111-111111111111",
886
+ "33333333-3333-3333-3333-333333333333",
887
+ ),
888
+ ),
889
+ ],
890
+ )
891
+ def test_list_element_children_with_cache(
892
+ responses,
893
+ mock_elements_worker_with_cache,
894
+ filters,
895
+ expected_ids,
896
+ ):
897
+ # Check we have 5 elements already present in database
898
+ assert CachedElement.select().count() == 5
899
+
900
+ # Query database through cache
901
+ elements = mock_elements_worker_with_cache.list_element_children(**filters)
902
+ assert elements.count() == len(expected_ids)
903
+ for child, expected_id in zip(elements.order_by("id"), expected_ids, strict=True):
904
+ assert child.id == UUID(expected_id)
905
+
906
+ # Check the worker never hits the API for elements
907
+ assert len(responses.calls) == len(BASE_API_CALLS)
908
+ assert [
909
+ (call.request.method, call.request.url) for call in responses.calls
910
+ ] == BASE_API_CALLS
911
+
912
+
913
+ @pytest.mark.usefixtures("_mock_cached_elements")
914
+ @pytest.mark.parametrize(
915
+ ("filters", "expected_ids"),
916
+ [
917
+ # Filter on element and worker version
918
+ (
919
+ {
920
+ "element": CachedElement(id="12341234-1234-1234-1234-123412341234"),
921
+ "worker_version": "56785678-5678-5678-5678-567856785678",
922
+ },
923
+ (
924
+ "11111111-1111-1111-1111-111111111111",
925
+ "22222222-2222-2222-2222-222222222222",
926
+ ),
927
+ ),
928
+ # Filter on element, type double_page and worker version
929
+ (
930
+ {
931
+ "element": CachedElement(id="12341234-1234-1234-1234-123412341234"),
932
+ "type": "page",
933
+ "worker_version": "56785678-5678-5678-5678-567856785678",
934
+ },
935
+ ("22222222-2222-2222-2222-222222222222",),
936
+ ),
937
+ # Filter on element, manual worker version
938
+ (
939
+ {
940
+ "element": CachedElement(id="12341234-1234-1234-1234-123412341234"),
941
+ "worker_version": False,
942
+ },
943
+ ("33333333-3333-3333-3333-333333333333",),
944
+ ),
945
+ ],
946
+ )
947
+ def test_list_element_children_with_cache_deprecation(
948
+ responses,
949
+ mock_elements_worker_with_cache,
950
+ filters,
951
+ expected_ids,
952
+ ):
953
+ # Check we have 5 elements already present in database
954
+ assert CachedElement.select().count() == 5
955
+
956
+ with pytest.deprecated_call(
957
+ match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
958
+ ):
959
+ # Query database through cache
960
+ elements = mock_elements_worker_with_cache.list_element_children(**filters)
961
+ assert elements.count() == len(expected_ids)
962
+ for child, expected_id in zip(elements.order_by("id"), expected_ids, strict=True):
963
+ assert child.id == UUID(expected_id)
964
+
965
+ # Check the worker never hits the API for elements
966
+ assert len(responses.calls) == len(BASE_API_CALLS)
967
+ assert [
968
+ (call.request.method, call.request.url) for call in responses.calls
969
+ ] == BASE_API_CALLS