arkindex-base-worker 0.3.6rc4__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arkindex_base_worker-0.3.7.dist-info/LICENSE +21 -0
- arkindex_base_worker-0.3.7.dist-info/METADATA +77 -0
- arkindex_base_worker-0.3.7.dist-info/RECORD +47 -0
- {arkindex_base_worker-0.3.6rc4.dist-info → arkindex_base_worker-0.3.7.dist-info}/WHEEL +1 -1
- {arkindex_base_worker-0.3.6rc4.dist-info → arkindex_base_worker-0.3.7.dist-info}/top_level.txt +2 -0
- arkindex_worker/cache.py +14 -0
- arkindex_worker/image.py +29 -19
- arkindex_worker/models.py +14 -2
- arkindex_worker/utils.py +17 -3
- arkindex_worker/worker/__init__.py +122 -125
- arkindex_worker/worker/base.py +24 -24
- arkindex_worker/worker/classification.py +18 -25
- arkindex_worker/worker/dataset.py +24 -18
- arkindex_worker/worker/element.py +100 -19
- arkindex_worker/worker/entity.py +35 -4
- arkindex_worker/worker/metadata.py +21 -11
- arkindex_worker/worker/training.py +13 -0
- arkindex_worker/worker/transcription.py +45 -5
- arkindex_worker/worker/version.py +22 -0
- hooks/pre_gen_project.py +3 -0
- tests/conftest.py +16 -8
- tests/test_base_worker.py +0 -6
- tests/test_dataset_worker.py +291 -409
- tests/test_elements_worker/test_classifications.py +365 -539
- tests/test_elements_worker/test_cli.py +1 -1
- tests/test_elements_worker/test_dataset.py +97 -116
- tests/test_elements_worker/test_elements.py +354 -76
- tests/test_elements_worker/test_entities.py +22 -2
- tests/test_elements_worker/test_metadata.py +53 -27
- tests/test_elements_worker/test_training.py +35 -0
- tests/test_elements_worker/test_transcriptions.py +149 -16
- tests/test_elements_worker/test_worker.py +19 -6
- tests/test_image.py +37 -0
- tests/test_utils.py +23 -1
- worker-demo/tests/__init__.py +0 -0
- worker-demo/tests/conftest.py +32 -0
- worker-demo/tests/test_worker.py +12 -0
- worker-demo/worker_demo/__init__.py +6 -0
- worker-demo/worker_demo/worker.py +19 -0
- arkindex_base_worker-0.3.6rc4.dist-info/METADATA +0 -47
- arkindex_base_worker-0.3.6rc4.dist-info/RECORD +0 -40
|
@@ -259,7 +259,7 @@ def test_create_metadata_cached_element(responses, mock_elements_worker_with_cac
|
|
|
259
259
|
],
|
|
260
260
|
],
|
|
261
261
|
)
|
|
262
|
-
def
|
|
262
|
+
def test_create_metadata_bulk(responses, mock_elements_worker, metadata_list):
|
|
263
263
|
element = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
264
264
|
responses.add(
|
|
265
265
|
responses.POST,
|
|
@@ -280,7 +280,7 @@ def test_create_metadatas(responses, mock_elements_worker, metadata_list):
|
|
|
280
280
|
},
|
|
281
281
|
)
|
|
282
282
|
|
|
283
|
-
created_metadata_list = mock_elements_worker.
|
|
283
|
+
created_metadata_list = mock_elements_worker.create_metadata_bulk(
|
|
284
284
|
element, metadata_list
|
|
285
285
|
)
|
|
286
286
|
|
|
@@ -327,7 +327,7 @@ def test_create_metadatas(responses, mock_elements_worker, metadata_list):
|
|
|
327
327
|
],
|
|
328
328
|
],
|
|
329
329
|
)
|
|
330
|
-
def
|
|
330
|
+
def test_create_metadata_bulk_cached_element(
|
|
331
331
|
responses, mock_elements_worker_with_cache, metadata_list
|
|
332
332
|
):
|
|
333
333
|
element = CachedElement.create(
|
|
@@ -352,7 +352,7 @@ def test_create_metadatas_cached_element(
|
|
|
352
352
|
},
|
|
353
353
|
)
|
|
354
354
|
|
|
355
|
-
created_metadata_list = mock_elements_worker_with_cache.
|
|
355
|
+
created_metadata_list = mock_elements_worker_with_cache.create_metadata_bulk(
|
|
356
356
|
element, metadata_list
|
|
357
357
|
)
|
|
358
358
|
|
|
@@ -386,7 +386,7 @@ def test_create_metadatas_cached_element(
|
|
|
386
386
|
|
|
387
387
|
|
|
388
388
|
@pytest.mark.parametrize("wrong_element", [None, "not_element_type", 1234, 12.5])
|
|
389
|
-
def
|
|
389
|
+
def test_create_metadata_bulk_wrong_element(mock_elements_worker, wrong_element):
|
|
390
390
|
wrong_metadata_list = [
|
|
391
391
|
{"type": MetaType.Text, "name": "fake_name", "value": "fake_value"}
|
|
392
392
|
]
|
|
@@ -394,13 +394,13 @@ def test_create_metadatas_wrong_element(mock_elements_worker, wrong_element):
|
|
|
394
394
|
AssertionError,
|
|
395
395
|
match="element shouldn't be null and should be of type Element or CachedElement",
|
|
396
396
|
):
|
|
397
|
-
mock_elements_worker.
|
|
398
|
-
element=wrong_element,
|
|
397
|
+
mock_elements_worker.create_metadata_bulk(
|
|
398
|
+
element=wrong_element, metadata_list=wrong_metadata_list
|
|
399
399
|
)
|
|
400
400
|
|
|
401
401
|
|
|
402
402
|
@pytest.mark.parametrize("wrong_type", [None, "not_metadata_type", 1234, 12.5])
|
|
403
|
-
def
|
|
403
|
+
def test_create_metadata_bulk_wrong_type(mock_elements_worker, wrong_type):
|
|
404
404
|
element = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
405
405
|
wrong_metadata_list = [
|
|
406
406
|
{"type": wrong_type, "name": "fake_name", "value": "fake_value"}
|
|
@@ -408,13 +408,13 @@ def test_create_metadatas_wrong_type(mock_elements_worker, wrong_type):
|
|
|
408
408
|
with pytest.raises(
|
|
409
409
|
AssertionError, match="type shouldn't be null and should be of type MetaType"
|
|
410
410
|
):
|
|
411
|
-
mock_elements_worker.
|
|
412
|
-
element=element,
|
|
411
|
+
mock_elements_worker.create_metadata_bulk(
|
|
412
|
+
element=element, metadata_list=wrong_metadata_list
|
|
413
413
|
)
|
|
414
414
|
|
|
415
415
|
|
|
416
416
|
@pytest.mark.parametrize("wrong_name", [None, 1234, 12.5, [1, 2, 3, 4]])
|
|
417
|
-
def
|
|
417
|
+
def test_create_metadata_bulk_wrong_name(mock_elements_worker, wrong_name):
|
|
418
418
|
element = Element({"id": "fake_element_id"})
|
|
419
419
|
wrong_metadata_list = [
|
|
420
420
|
{"type": MetaType.Text, "name": wrong_name, "value": "fake_value"}
|
|
@@ -422,13 +422,13 @@ def test_create_metadatas_wrong_name(mock_elements_worker, wrong_name):
|
|
|
422
422
|
with pytest.raises(
|
|
423
423
|
AssertionError, match="name shouldn't be null and should be of type str"
|
|
424
424
|
):
|
|
425
|
-
mock_elements_worker.
|
|
426
|
-
element=element,
|
|
425
|
+
mock_elements_worker.create_metadata_bulk(
|
|
426
|
+
element=element, metadata_list=wrong_metadata_list
|
|
427
427
|
)
|
|
428
428
|
|
|
429
429
|
|
|
430
430
|
@pytest.mark.parametrize("wrong_value", [None, [1, 2, 3, 4]])
|
|
431
|
-
def
|
|
431
|
+
def test_create_metadata_bulk_wrong_value(mock_elements_worker, wrong_value):
|
|
432
432
|
element = Element({"id": "fake_element_id"})
|
|
433
433
|
wrong_metadata_list = [
|
|
434
434
|
{"type": MetaType.Text, "name": "fake_name", "value": wrong_value}
|
|
@@ -439,13 +439,13 @@ def test_create_metadatas_wrong_value(mock_elements_worker, wrong_value):
|
|
|
439
439
|
"value shouldn't be null and should be of type (str or float or int)"
|
|
440
440
|
),
|
|
441
441
|
):
|
|
442
|
-
mock_elements_worker.
|
|
443
|
-
element=element,
|
|
442
|
+
mock_elements_worker.create_metadata_bulk(
|
|
443
|
+
element=element, metadata_list=wrong_metadata_list
|
|
444
444
|
)
|
|
445
445
|
|
|
446
446
|
|
|
447
447
|
@pytest.mark.parametrize("wrong_entity", [[1, 2, 3, 4], 1234, 12.5])
|
|
448
|
-
def
|
|
448
|
+
def test_create_metadata_bulk_wrong_entity(mock_elements_worker, wrong_entity):
|
|
449
449
|
element = Element({"id": "fake_element_id"})
|
|
450
450
|
wrong_metadata_list = [
|
|
451
451
|
{
|
|
@@ -456,12 +456,12 @@ def test_create_metadatas_wrong_entity(mock_elements_worker, wrong_entity):
|
|
|
456
456
|
}
|
|
457
457
|
]
|
|
458
458
|
with pytest.raises(AssertionError, match="entity_id should be None or a str"):
|
|
459
|
-
mock_elements_worker.
|
|
460
|
-
element=element,
|
|
459
|
+
mock_elements_worker.create_metadata_bulk(
|
|
460
|
+
element=element, metadata_list=wrong_metadata_list
|
|
461
461
|
)
|
|
462
462
|
|
|
463
463
|
|
|
464
|
-
def
|
|
464
|
+
def test_create_metadata_bulk_api_error(responses, mock_elements_worker):
|
|
465
465
|
element = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
466
466
|
metadata_list = [
|
|
467
467
|
{
|
|
@@ -478,7 +478,7 @@ def test_create_metadatas_api_error(responses, mock_elements_worker):
|
|
|
478
478
|
)
|
|
479
479
|
|
|
480
480
|
with pytest.raises(ErrorResponse):
|
|
481
|
-
mock_elements_worker.
|
|
481
|
+
mock_elements_worker.create_metadata_bulk(element, metadata_list)
|
|
482
482
|
|
|
483
483
|
assert len(responses.calls) == len(BASE_API_CALLS) + 5
|
|
484
484
|
assert [
|
|
@@ -508,14 +508,23 @@ def test_create_metadatas_api_error(responses, mock_elements_worker):
|
|
|
508
508
|
]
|
|
509
509
|
|
|
510
510
|
|
|
511
|
+
def test_list_element_metadata_wrong_load_parents(fake_dummy_worker):
|
|
512
|
+
element = Element({"id": "element_id"})
|
|
513
|
+
with pytest.raises(AssertionError, match="load_parents should be of type bool"):
|
|
514
|
+
fake_dummy_worker.list_element_metadata(
|
|
515
|
+
element=element,
|
|
516
|
+
load_parents="not bool",
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
|
|
511
520
|
def test_list_element_metadata(fake_dummy_worker):
|
|
512
521
|
element = Element({"id": "element_id"})
|
|
513
522
|
fake_dummy_worker.api_client.add_response(
|
|
514
523
|
"ListElementMetaData",
|
|
515
524
|
id=element.id,
|
|
516
|
-
response={"id": "metadata_id"},
|
|
525
|
+
response=[{"id": "metadata_id"}],
|
|
517
526
|
)
|
|
518
|
-
assert fake_dummy_worker.list_element_metadata(element) == {"id": "metadata_id"}
|
|
527
|
+
assert fake_dummy_worker.list_element_metadata(element) == [{"id": "metadata_id"}]
|
|
519
528
|
|
|
520
529
|
assert len(fake_dummy_worker.api_client.history) == 1
|
|
521
530
|
assert len(fake_dummy_worker.api_client.responses) == 0
|
|
@@ -527,11 +536,28 @@ def test_list_element_metadata_cached_element(mock_elements_worker_with_cache):
|
|
|
527
536
|
mock_elements_worker_with_cache.api_client.add_response(
|
|
528
537
|
"ListElementMetaData",
|
|
529
538
|
id="element_id",
|
|
530
|
-
response={"id": "metadata_id"},
|
|
539
|
+
response=[{"id": "metadata_id"}],
|
|
531
540
|
)
|
|
532
|
-
assert mock_elements_worker_with_cache.list_element_metadata(element) ==
|
|
533
|
-
"id": "metadata_id"
|
|
534
|
-
|
|
541
|
+
assert mock_elements_worker_with_cache.list_element_metadata(element) == [
|
|
542
|
+
{"id": "metadata_id"}
|
|
543
|
+
]
|
|
535
544
|
|
|
536
545
|
assert len(mock_elements_worker_with_cache.api_client.history) == 1
|
|
537
546
|
assert len(mock_elements_worker_with_cache.api_client.responses) == 0
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
def test_list_element_metadata_with_load_parents(fake_dummy_worker):
|
|
550
|
+
element = Element({"id": "element_id"})
|
|
551
|
+
fake_dummy_worker.api_client.add_response(
|
|
552
|
+
"ListElementMetaData",
|
|
553
|
+
id=element.id,
|
|
554
|
+
load_parents=True,
|
|
555
|
+
response=[{"id": "metadata_id"}, {"id": "parent_metadata_id"}],
|
|
556
|
+
)
|
|
557
|
+
assert fake_dummy_worker.list_element_metadata(element, load_parents=True) == [
|
|
558
|
+
{"id": "metadata_id"},
|
|
559
|
+
{"id": "parent_metadata_id"},
|
|
560
|
+
]
|
|
561
|
+
|
|
562
|
+
assert len(fake_dummy_worker.api_client.history) == 1
|
|
563
|
+
assert len(fake_dummy_worker.api_client.responses) == 0
|
|
@@ -179,6 +179,41 @@ def test_validate_model_version_not_created(mock_training_worker):
|
|
|
179
179
|
mock_training_worker.validate_model_version(hash="a", size=1, archive_hash="b")
|
|
180
180
|
|
|
181
181
|
|
|
182
|
+
@pytest.mark.parametrize("status_code", [403, 500])
|
|
183
|
+
def test_validate_model_version_catch_errors(
|
|
184
|
+
mocker, mock_training_worker, caplog, status_code
|
|
185
|
+
):
|
|
186
|
+
mocker.patch(
|
|
187
|
+
"arkindex_worker.worker.base.BaseWorker.request.retry.retry", return_value=False
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
mock_training_worker.model_version = {"id": "model_version_id"}
|
|
191
|
+
args = {
|
|
192
|
+
"hash": "hash",
|
|
193
|
+
"archive_hash": "archive_hash",
|
|
194
|
+
"size": 30,
|
|
195
|
+
}
|
|
196
|
+
mock_training_worker.api_client.add_error_response(
|
|
197
|
+
"ValidateModelVersion",
|
|
198
|
+
id="model_version_id",
|
|
199
|
+
status_code=status_code,
|
|
200
|
+
body=args,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
mock_training_worker.validate_model_version(**args)
|
|
204
|
+
assert mock_training_worker.model_version == {"id": "model_version_id"}
|
|
205
|
+
assert [
|
|
206
|
+
(level, message)
|
|
207
|
+
for module, level, message in caplog.record_tuples
|
|
208
|
+
if module == "arkindex_worker"
|
|
209
|
+
] == [
|
|
210
|
+
(
|
|
211
|
+
logging.WARNING,
|
|
212
|
+
"An error occurred while validating model version model_version_id, please check its status.",
|
|
213
|
+
),
|
|
214
|
+
]
|
|
215
|
+
|
|
216
|
+
|
|
182
217
|
@pytest.mark.parametrize("deletion_failed", [True, False])
|
|
183
218
|
def test_validate_model_version_hash_conflict(
|
|
184
219
|
mock_training_worker, default_model_version, caplog, deletion_failed
|
|
@@ -1711,11 +1711,29 @@ def test_list_transcriptions_wrong_recursive(mock_elements_worker):
|
|
|
1711
1711
|
)
|
|
1712
1712
|
|
|
1713
1713
|
|
|
1714
|
-
def
|
|
1714
|
+
def test_list_transcriptions_wrong_worker_run(mock_elements_worker):
|
|
1715
1715
|
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
1716
1716
|
|
|
1717
1717
|
with pytest.raises(
|
|
1718
|
-
AssertionError, match="
|
|
1718
|
+
AssertionError, match="worker_run should be of type str or bool"
|
|
1719
|
+
):
|
|
1720
|
+
mock_elements_worker.list_transcriptions(
|
|
1721
|
+
element=elt,
|
|
1722
|
+
worker_run=1234,
|
|
1723
|
+
)
|
|
1724
|
+
|
|
1725
|
+
|
|
1726
|
+
def test_list_transcriptions_wrong_worker_version(mock_elements_worker):
|
|
1727
|
+
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
1728
|
+
|
|
1729
|
+
# WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
|
|
1730
|
+
with (
|
|
1731
|
+
pytest.deprecated_call(
|
|
1732
|
+
match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
|
|
1733
|
+
),
|
|
1734
|
+
pytest.raises(
|
|
1735
|
+
AssertionError, match="worker_version should be of type str or bool"
|
|
1736
|
+
),
|
|
1719
1737
|
):
|
|
1720
1738
|
mock_elements_worker.list_transcriptions(
|
|
1721
1739
|
element=elt,
|
|
@@ -1723,11 +1741,30 @@ def test_list_transcriptions_wrong_worker_version(mock_elements_worker):
|
|
|
1723
1741
|
)
|
|
1724
1742
|
|
|
1725
1743
|
|
|
1726
|
-
def
|
|
1744
|
+
def test_list_transcriptions_wrong_bool_worker_run(mock_elements_worker):
|
|
1727
1745
|
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
1728
1746
|
|
|
1729
1747
|
with pytest.raises(
|
|
1730
|
-
AssertionError, match="if of type bool,
|
|
1748
|
+
AssertionError, match="if of type bool, worker_run can only be set to False"
|
|
1749
|
+
):
|
|
1750
|
+
mock_elements_worker.list_transcriptions(
|
|
1751
|
+
element=elt,
|
|
1752
|
+
worker_run=True,
|
|
1753
|
+
)
|
|
1754
|
+
|
|
1755
|
+
|
|
1756
|
+
def test_list_transcriptions_wrong_bool_worker_version(mock_elements_worker):
|
|
1757
|
+
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
1758
|
+
|
|
1759
|
+
# WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
|
|
1760
|
+
with (
|
|
1761
|
+
pytest.deprecated_call(
|
|
1762
|
+
match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
|
|
1763
|
+
),
|
|
1764
|
+
pytest.raises(
|
|
1765
|
+
AssertionError,
|
|
1766
|
+
match="if of type bool, worker_version can only be set to False",
|
|
1767
|
+
),
|
|
1731
1768
|
):
|
|
1732
1769
|
mock_elements_worker.list_transcriptions(
|
|
1733
1770
|
element=elt,
|
|
@@ -1784,6 +1821,7 @@ def test_list_transcriptions(responses, mock_elements_worker):
|
|
|
1784
1821
|
"text": "hey",
|
|
1785
1822
|
"confidence": 0.42,
|
|
1786
1823
|
"worker_version_id": "56785678-5678-5678-5678-567856785678",
|
|
1824
|
+
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
1787
1825
|
"element": None,
|
|
1788
1826
|
},
|
|
1789
1827
|
{
|
|
@@ -1791,6 +1829,7 @@ def test_list_transcriptions(responses, mock_elements_worker):
|
|
|
1791
1829
|
"text": "it's",
|
|
1792
1830
|
"confidence": 0.42,
|
|
1793
1831
|
"worker_version_id": "56785678-5678-5678-5678-567856785678",
|
|
1832
|
+
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
1794
1833
|
"element": None,
|
|
1795
1834
|
},
|
|
1796
1835
|
{
|
|
@@ -1798,6 +1837,7 @@ def test_list_transcriptions(responses, mock_elements_worker):
|
|
|
1798
1837
|
"text": "me",
|
|
1799
1838
|
"confidence": 0.42,
|
|
1800
1839
|
"worker_version_id": "56785678-5678-5678-5678-567856785678",
|
|
1840
|
+
"worker_run_id": "56785678-5678-5678-5678-567856785678",
|
|
1801
1841
|
"element": None,
|
|
1802
1842
|
},
|
|
1803
1843
|
]
|
|
@@ -1836,6 +1876,7 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
|
|
|
1836
1876
|
"text": "hey",
|
|
1837
1877
|
"confidence": 0.42,
|
|
1838
1878
|
"worker_version_id": None,
|
|
1879
|
+
"worker_run_id": None,
|
|
1839
1880
|
"element": None,
|
|
1840
1881
|
}
|
|
1841
1882
|
]
|
|
@@ -1850,8 +1891,50 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
|
|
|
1850
1891
|
},
|
|
1851
1892
|
)
|
|
1852
1893
|
|
|
1894
|
+
with pytest.deprecated_call(
|
|
1895
|
+
match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
|
|
1896
|
+
):
|
|
1897
|
+
for idx, transcription in enumerate(
|
|
1898
|
+
mock_elements_worker.list_transcriptions(element=elt, worker_version=False)
|
|
1899
|
+
):
|
|
1900
|
+
assert transcription == trans[idx]
|
|
1901
|
+
|
|
1902
|
+
assert len(responses.calls) == len(BASE_API_CALLS) + 1
|
|
1903
|
+
assert [
|
|
1904
|
+
(call.request.method, call.request.url) for call in responses.calls
|
|
1905
|
+
] == BASE_API_CALLS + [
|
|
1906
|
+
(
|
|
1907
|
+
"GET",
|
|
1908
|
+
"http://testserver/api/v1/element/12341234-1234-1234-1234-123412341234/transcriptions/?worker_version=False",
|
|
1909
|
+
),
|
|
1910
|
+
]
|
|
1911
|
+
|
|
1912
|
+
|
|
1913
|
+
def test_list_transcriptions_manual_worker_run(responses, mock_elements_worker):
|
|
1914
|
+
elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
|
|
1915
|
+
trans = [
|
|
1916
|
+
{
|
|
1917
|
+
"id": "0000",
|
|
1918
|
+
"text": "hey",
|
|
1919
|
+
"confidence": 0.42,
|
|
1920
|
+
"worker_version_id": None,
|
|
1921
|
+
"worker_run_id": None,
|
|
1922
|
+
"element": None,
|
|
1923
|
+
}
|
|
1924
|
+
]
|
|
1925
|
+
responses.add(
|
|
1926
|
+
responses.GET,
|
|
1927
|
+
"http://testserver/api/v1/element/12341234-1234-1234-1234-123412341234/transcriptions/?worker_run=False",
|
|
1928
|
+
status=200,
|
|
1929
|
+
json={
|
|
1930
|
+
"count": 1,
|
|
1931
|
+
"next": None,
|
|
1932
|
+
"results": trans,
|
|
1933
|
+
},
|
|
1934
|
+
)
|
|
1935
|
+
|
|
1853
1936
|
for idx, transcription in enumerate(
|
|
1854
|
-
mock_elements_worker.list_transcriptions(element=elt,
|
|
1937
|
+
mock_elements_worker.list_transcriptions(element=elt, worker_run=False)
|
|
1855
1938
|
):
|
|
1856
1939
|
assert transcription == trans[idx]
|
|
1857
1940
|
|
|
@@ -1861,7 +1944,7 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
|
|
|
1861
1944
|
] == BASE_API_CALLS + [
|
|
1862
1945
|
(
|
|
1863
1946
|
"GET",
|
|
1864
|
-
"http://testserver/api/v1/element/12341234-1234-1234-1234-123412341234/transcriptions/?
|
|
1947
|
+
"http://testserver/api/v1/element/12341234-1234-1234-1234-123412341234/transcriptions/?worker_run=False",
|
|
1865
1948
|
),
|
|
1866
1949
|
]
|
|
1867
1950
|
|
|
@@ -1895,16 +1978,26 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
|
|
|
1895
1978
|
"66666666-6666-6666-6666-666666666666",
|
|
1896
1979
|
),
|
|
1897
1980
|
),
|
|
1898
|
-
# Filter on element and
|
|
1981
|
+
# Filter on element and worker run should give the first transcription
|
|
1899
1982
|
(
|
|
1900
1983
|
{
|
|
1901
1984
|
"element": CachedElement(
|
|
1902
1985
|
id="11111111-1111-1111-1111-111111111111", type="page"
|
|
1903
1986
|
),
|
|
1904
|
-
"
|
|
1987
|
+
"worker_run": "56785678-5678-5678-5678-567856785678",
|
|
1905
1988
|
},
|
|
1906
1989
|
("11111111-1111-1111-1111-111111111111",),
|
|
1907
1990
|
),
|
|
1991
|
+
# Filter on element, manual worker run should give the sixth transcription
|
|
1992
|
+
(
|
|
1993
|
+
{
|
|
1994
|
+
"element": CachedElement(
|
|
1995
|
+
id="11111111-1111-1111-1111-111111111111", type="page"
|
|
1996
|
+
),
|
|
1997
|
+
"worker_run": False,
|
|
1998
|
+
},
|
|
1999
|
+
("66666666-6666-6666-6666-666666666666",),
|
|
2000
|
+
),
|
|
1908
2001
|
# Filter recursively on element should give all transcriptions inserted
|
|
1909
2002
|
(
|
|
1910
2003
|
{
|
|
@@ -1922,33 +2015,70 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
|
|
|
1922
2015
|
"66666666-6666-6666-6666-666666666666",
|
|
1923
2016
|
),
|
|
1924
2017
|
),
|
|
1925
|
-
# Filter recursively on element and
|
|
2018
|
+
# Filter recursively on element and element_type should give three transcriptions
|
|
1926
2019
|
(
|
|
1927
2020
|
{
|
|
1928
2021
|
"element": CachedElement(
|
|
1929
2022
|
id="11111111-1111-1111-1111-111111111111", type="page"
|
|
1930
2023
|
),
|
|
1931
|
-
"
|
|
2024
|
+
"element_type": "something_else",
|
|
1932
2025
|
"recursive": True,
|
|
1933
2026
|
},
|
|
1934
2027
|
(
|
|
1935
2028
|
"22222222-2222-2222-2222-222222222222",
|
|
1936
|
-
"33333333-3333-3333-3333-333333333333",
|
|
1937
2029
|
"44444444-4444-4444-4444-444444444444",
|
|
1938
2030
|
"55555555-5555-5555-5555-555555555555",
|
|
1939
2031
|
),
|
|
1940
2032
|
),
|
|
1941
|
-
|
|
2033
|
+
],
|
|
2034
|
+
)
|
|
2035
|
+
def test_list_transcriptions_with_cache(
|
|
2036
|
+
responses, mock_elements_worker_with_cache, filters, expected_ids
|
|
2037
|
+
):
|
|
2038
|
+
# Check we have 5 elements already present in database
|
|
2039
|
+
assert CachedTranscription.select().count() == 6
|
|
2040
|
+
|
|
2041
|
+
# Query database through cache
|
|
2042
|
+
transcriptions = mock_elements_worker_with_cache.list_transcriptions(**filters)
|
|
2043
|
+
assert transcriptions.count() == len(expected_ids)
|
|
2044
|
+
for transcription, expected_id in zip(
|
|
2045
|
+
transcriptions.order_by(CachedTranscription.id), expected_ids, strict=True
|
|
2046
|
+
):
|
|
2047
|
+
assert transcription.id == UUID(expected_id)
|
|
2048
|
+
|
|
2049
|
+
# Check the worker never hits the API for elements
|
|
2050
|
+
assert len(responses.calls) == len(BASE_API_CALLS)
|
|
2051
|
+
assert [
|
|
2052
|
+
(call.request.method, call.request.url) for call in responses.calls
|
|
2053
|
+
] == BASE_API_CALLS
|
|
2054
|
+
|
|
2055
|
+
|
|
2056
|
+
@pytest.mark.usefixtures("_mock_cached_transcriptions")
|
|
2057
|
+
@pytest.mark.parametrize(
|
|
2058
|
+
("filters", "expected_ids"),
|
|
2059
|
+
[
|
|
2060
|
+
# Filter on element and worker_version should give first transcription
|
|
1942
2061
|
(
|
|
1943
2062
|
{
|
|
1944
2063
|
"element": CachedElement(
|
|
1945
2064
|
id="11111111-1111-1111-1111-111111111111", type="page"
|
|
1946
2065
|
),
|
|
1947
|
-
"
|
|
2066
|
+
"worker_version": "56785678-5678-5678-5678-567856785678",
|
|
2067
|
+
},
|
|
2068
|
+
("11111111-1111-1111-1111-111111111111",),
|
|
2069
|
+
),
|
|
2070
|
+
# Filter recursively on element and worker_version should give four transcriptions
|
|
2071
|
+
(
|
|
2072
|
+
{
|
|
2073
|
+
"element": CachedElement(
|
|
2074
|
+
id="11111111-1111-1111-1111-111111111111", type="page"
|
|
2075
|
+
),
|
|
2076
|
+
"worker_version": "90129012-9012-9012-9012-901290129012",
|
|
1948
2077
|
"recursive": True,
|
|
1949
2078
|
},
|
|
1950
2079
|
(
|
|
1951
2080
|
"22222222-2222-2222-2222-222222222222",
|
|
2081
|
+
"33333333-3333-3333-3333-333333333333",
|
|
1952
2082
|
"44444444-4444-4444-4444-444444444444",
|
|
1953
2083
|
"55555555-5555-5555-5555-555555555555",
|
|
1954
2084
|
),
|
|
@@ -1965,14 +2095,17 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
|
|
|
1965
2095
|
),
|
|
1966
2096
|
],
|
|
1967
2097
|
)
|
|
1968
|
-
def
|
|
2098
|
+
def test_list_transcriptions_with_cache_deprecation(
|
|
1969
2099
|
responses, mock_elements_worker_with_cache, filters, expected_ids
|
|
1970
2100
|
):
|
|
1971
2101
|
# Check we have 5 elements already present in database
|
|
1972
2102
|
assert CachedTranscription.select().count() == 6
|
|
1973
2103
|
|
|
1974
|
-
|
|
1975
|
-
|
|
2104
|
+
with pytest.deprecated_call(
|
|
2105
|
+
match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
|
|
2106
|
+
):
|
|
2107
|
+
# Query database through cache
|
|
2108
|
+
transcriptions = mock_elements_worker_with_cache.list_transcriptions(**filters)
|
|
1976
2109
|
assert transcriptions.count() == len(expected_ids)
|
|
1977
2110
|
for transcription, expected_id in zip(
|
|
1978
2111
|
transcriptions.order_by(CachedTranscription.id), expected_ids, strict=True
|
|
@@ -20,7 +20,8 @@ def test_get_worker_version(fake_dummy_worker):
|
|
|
20
20
|
|
|
21
21
|
api_client.add_response("RetrieveWorkerVersion", response, id=TEST_VERSION_ID)
|
|
22
22
|
|
|
23
|
-
|
|
23
|
+
with pytest.deprecated_call(match="WorkerVersion usage is deprecated."):
|
|
24
|
+
res = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
|
|
24
25
|
|
|
25
26
|
assert res == response
|
|
26
27
|
assert fake_dummy_worker._worker_version_cache[TEST_VERSION_ID] == response
|
|
@@ -33,8 +34,11 @@ def test_get_worker_version__uses_cache(fake_dummy_worker):
|
|
|
33
34
|
|
|
34
35
|
api_client.add_response("RetrieveWorkerVersion", response, id=TEST_VERSION_ID)
|
|
35
36
|
|
|
36
|
-
|
|
37
|
-
|
|
37
|
+
with pytest.deprecated_call(match="WorkerVersion usage is deprecated."):
|
|
38
|
+
response_1 = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
|
|
39
|
+
|
|
40
|
+
with pytest.deprecated_call(match="WorkerVersion usage is deprecated."):
|
|
41
|
+
response_2 = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
|
|
38
42
|
|
|
39
43
|
assert response_1 == response
|
|
40
44
|
assert response_1 == response_2
|
|
@@ -51,12 +55,17 @@ def test_get_worker_version_slug(mocker, fake_dummy_worker):
|
|
|
51
55
|
"worker": {"slug": "mock_slug"},
|
|
52
56
|
}
|
|
53
57
|
|
|
54
|
-
|
|
58
|
+
with pytest.deprecated_call(match="WorkerVersion usage is deprecated."):
|
|
59
|
+
slug = fake_dummy_worker.get_worker_version_slug(TEST_VERSION_ID)
|
|
55
60
|
assert slug == "mock_slug"
|
|
56
61
|
|
|
57
62
|
|
|
58
63
|
def test_get_worker_version_slug_none(fake_dummy_worker):
|
|
59
|
-
|
|
64
|
+
# WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
|
|
65
|
+
with (
|
|
66
|
+
pytest.deprecated_call(match="WorkerVersion usage is deprecated."),
|
|
67
|
+
pytest.raises(ValueError, match="No worker version ID"),
|
|
68
|
+
):
|
|
60
69
|
fake_dummy_worker.get_worker_version_slug(None)
|
|
61
70
|
|
|
62
71
|
|
|
@@ -301,7 +310,7 @@ def test_start_activity_error(
|
|
|
301
310
|
),
|
|
302
311
|
]
|
|
303
312
|
assert logger.error.call_args_list == [
|
|
304
|
-
mocker.call("Ran on 1
|
|
313
|
+
mocker.call("Ran on 1 element: 0 completed, 1 failed")
|
|
305
314
|
]
|
|
306
315
|
|
|
307
316
|
|
|
@@ -459,6 +468,10 @@ def test_worker_config_multiple_source(
|
|
|
459
468
|
"id": "12341234-1234-1234-1234-123412341234",
|
|
460
469
|
"name": "Model version 1337",
|
|
461
470
|
"configuration": model_config,
|
|
471
|
+
"model": {
|
|
472
|
+
"id": "hahahaha-haha-haha-haha-hahahahahaha",
|
|
473
|
+
"name": "My model",
|
|
474
|
+
},
|
|
462
475
|
},
|
|
463
476
|
"process": {
|
|
464
477
|
"name": None,
|
tests/test_image.py
CHANGED
|
@@ -7,6 +7,7 @@ from pathlib import Path
|
|
|
7
7
|
|
|
8
8
|
import pytest
|
|
9
9
|
from PIL import Image, ImageChops, ImageOps
|
|
10
|
+
from requests import HTTPError
|
|
10
11
|
|
|
11
12
|
from arkindex_worker.cache import CachedElement, create_tables, init_cache_db
|
|
12
13
|
from arkindex_worker.image import (
|
|
@@ -19,6 +20,7 @@ from arkindex_worker.image import (
|
|
|
19
20
|
polygon_bounding_box,
|
|
20
21
|
revert_orientation,
|
|
21
22
|
trim_polygon,
|
|
23
|
+
upload_image,
|
|
22
24
|
)
|
|
23
25
|
from arkindex_worker.models import Element
|
|
24
26
|
|
|
@@ -547,3 +549,38 @@ def test_download_image_retry_with_max(responses):
|
|
|
547
549
|
assert list(map(attrgetter("request.url"), responses.calls)) == [full_url] * 3 + [
|
|
548
550
|
max_url
|
|
549
551
|
]
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
def test_upload_image_retries(responses):
|
|
555
|
+
dest_url = "https://blabla.com/iiif/2/image_path.jpg/full/full/0/default.jpg"
|
|
556
|
+
responses.add(
|
|
557
|
+
responses.PUT,
|
|
558
|
+
dest_url,
|
|
559
|
+
status=400,
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
image = Image.open(FULL_IMAGE).convert("RGB")
|
|
563
|
+
with pytest.raises(
|
|
564
|
+
HTTPError, match=f"400 Client Error: Bad Request for url: {dest_url}"
|
|
565
|
+
):
|
|
566
|
+
upload_image(image, dest_url)
|
|
567
|
+
|
|
568
|
+
# We try 3 times
|
|
569
|
+
assert len(responses.calls) == 3
|
|
570
|
+
assert list(map(attrgetter("request.url"), responses.calls)) == [dest_url] * 3
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
def test_upload_image(responses):
|
|
574
|
+
dest_url = "https://blabla.com/iiif/2/image_path.jpg/full/full/0/default.jpg"
|
|
575
|
+
responses.add(
|
|
576
|
+
responses.PUT,
|
|
577
|
+
dest_url,
|
|
578
|
+
status=200,
|
|
579
|
+
)
|
|
580
|
+
|
|
581
|
+
image = Image.open(FULL_IMAGE).convert("RGB")
|
|
582
|
+
resp = upload_image(image, dest_url)
|
|
583
|
+
assert resp
|
|
584
|
+
|
|
585
|
+
assert len(responses.calls) == 1
|
|
586
|
+
assert list(map(attrgetter("request.url"), responses.calls)) == [dest_url]
|
tests/test_utils.py
CHANGED
|
@@ -1,11 +1,33 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from arkindex_worker.utils import (
|
|
6
|
+
close_delete_file,
|
|
7
|
+
extract_tar_zst_archive,
|
|
8
|
+
parse_source_id,
|
|
9
|
+
)
|
|
4
10
|
|
|
5
11
|
FIXTURES = Path(__file__).absolute().parent / "data"
|
|
6
12
|
ARCHIVE = FIXTURES / "archive.tar.zst"
|
|
7
13
|
|
|
8
14
|
|
|
15
|
+
@pytest.mark.parametrize(
|
|
16
|
+
("source_id", "expected"),
|
|
17
|
+
[
|
|
18
|
+
(None, None),
|
|
19
|
+
("", None),
|
|
20
|
+
(
|
|
21
|
+
"cafecafe-cafe-cafe-cafe-cafecafecafe",
|
|
22
|
+
"cafecafe-cafe-cafe-cafe-cafecafecafe",
|
|
23
|
+
),
|
|
24
|
+
("manual", False),
|
|
25
|
+
],
|
|
26
|
+
)
|
|
27
|
+
def test_parse_source_id(source_id, expected):
|
|
28
|
+
assert parse_source_id(source_id) == expected
|
|
29
|
+
|
|
30
|
+
|
|
9
31
|
def test_extract_tar_zst_archive(tmp_path):
|
|
10
32
|
destination = tmp_path / "destination"
|
|
11
33
|
_, archive_path = extract_tar_zst_archive(ARCHIVE, destination)
|
|
File without changes
|