arkindex-base-worker 0.3.6rc4__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. arkindex_base_worker-0.3.7.dist-info/LICENSE +21 -0
  2. arkindex_base_worker-0.3.7.dist-info/METADATA +77 -0
  3. arkindex_base_worker-0.3.7.dist-info/RECORD +47 -0
  4. {arkindex_base_worker-0.3.6rc4.dist-info → arkindex_base_worker-0.3.7.dist-info}/WHEEL +1 -1
  5. {arkindex_base_worker-0.3.6rc4.dist-info → arkindex_base_worker-0.3.7.dist-info}/top_level.txt +2 -0
  6. arkindex_worker/cache.py +14 -0
  7. arkindex_worker/image.py +29 -19
  8. arkindex_worker/models.py +14 -2
  9. arkindex_worker/utils.py +17 -3
  10. arkindex_worker/worker/__init__.py +122 -125
  11. arkindex_worker/worker/base.py +24 -24
  12. arkindex_worker/worker/classification.py +18 -25
  13. arkindex_worker/worker/dataset.py +24 -18
  14. arkindex_worker/worker/element.py +100 -19
  15. arkindex_worker/worker/entity.py +35 -4
  16. arkindex_worker/worker/metadata.py +21 -11
  17. arkindex_worker/worker/training.py +13 -0
  18. arkindex_worker/worker/transcription.py +45 -5
  19. arkindex_worker/worker/version.py +22 -0
  20. hooks/pre_gen_project.py +3 -0
  21. tests/conftest.py +16 -8
  22. tests/test_base_worker.py +0 -6
  23. tests/test_dataset_worker.py +291 -409
  24. tests/test_elements_worker/test_classifications.py +365 -539
  25. tests/test_elements_worker/test_cli.py +1 -1
  26. tests/test_elements_worker/test_dataset.py +97 -116
  27. tests/test_elements_worker/test_elements.py +354 -76
  28. tests/test_elements_worker/test_entities.py +22 -2
  29. tests/test_elements_worker/test_metadata.py +53 -27
  30. tests/test_elements_worker/test_training.py +35 -0
  31. tests/test_elements_worker/test_transcriptions.py +149 -16
  32. tests/test_elements_worker/test_worker.py +19 -6
  33. tests/test_image.py +37 -0
  34. tests/test_utils.py +23 -1
  35. worker-demo/tests/__init__.py +0 -0
  36. worker-demo/tests/conftest.py +32 -0
  37. worker-demo/tests/test_worker.py +12 -0
  38. worker-demo/worker_demo/__init__.py +6 -0
  39. worker-demo/worker_demo/worker.py +19 -0
  40. arkindex_base_worker-0.3.6rc4.dist-info/METADATA +0 -47
  41. arkindex_base_worker-0.3.6rc4.dist-info/RECORD +0 -40
@@ -259,7 +259,7 @@ def test_create_metadata_cached_element(responses, mock_elements_worker_with_cac
259
259
  ],
260
260
  ],
261
261
  )
262
- def test_create_metadatas(responses, mock_elements_worker, metadata_list):
262
+ def test_create_metadata_bulk(responses, mock_elements_worker, metadata_list):
263
263
  element = Element({"id": "12341234-1234-1234-1234-123412341234"})
264
264
  responses.add(
265
265
  responses.POST,
@@ -280,7 +280,7 @@ def test_create_metadatas(responses, mock_elements_worker, metadata_list):
280
280
  },
281
281
  )
282
282
 
283
- created_metadata_list = mock_elements_worker.create_metadatas(
283
+ created_metadata_list = mock_elements_worker.create_metadata_bulk(
284
284
  element, metadata_list
285
285
  )
286
286
 
@@ -327,7 +327,7 @@ def test_create_metadatas(responses, mock_elements_worker, metadata_list):
327
327
  ],
328
328
  ],
329
329
  )
330
- def test_create_metadatas_cached_element(
330
+ def test_create_metadata_bulk_cached_element(
331
331
  responses, mock_elements_worker_with_cache, metadata_list
332
332
  ):
333
333
  element = CachedElement.create(
@@ -352,7 +352,7 @@ def test_create_metadatas_cached_element(
352
352
  },
353
353
  )
354
354
 
355
- created_metadata_list = mock_elements_worker_with_cache.create_metadatas(
355
+ created_metadata_list = mock_elements_worker_with_cache.create_metadata_bulk(
356
356
  element, metadata_list
357
357
  )
358
358
 
@@ -386,7 +386,7 @@ def test_create_metadatas_cached_element(
386
386
 
387
387
 
388
388
  @pytest.mark.parametrize("wrong_element", [None, "not_element_type", 1234, 12.5])
389
- def test_create_metadatas_wrong_element(mock_elements_worker, wrong_element):
389
+ def test_create_metadata_bulk_wrong_element(mock_elements_worker, wrong_element):
390
390
  wrong_metadata_list = [
391
391
  {"type": MetaType.Text, "name": "fake_name", "value": "fake_value"}
392
392
  ]
@@ -394,13 +394,13 @@ def test_create_metadatas_wrong_element(mock_elements_worker, wrong_element):
394
394
  AssertionError,
395
395
  match="element shouldn't be null and should be of type Element or CachedElement",
396
396
  ):
397
- mock_elements_worker.create_metadatas(
398
- element=wrong_element, metadatas=wrong_metadata_list
397
+ mock_elements_worker.create_metadata_bulk(
398
+ element=wrong_element, metadata_list=wrong_metadata_list
399
399
  )
400
400
 
401
401
 
402
402
  @pytest.mark.parametrize("wrong_type", [None, "not_metadata_type", 1234, 12.5])
403
- def test_create_metadatas_wrong_type(mock_elements_worker, wrong_type):
403
+ def test_create_metadata_bulk_wrong_type(mock_elements_worker, wrong_type):
404
404
  element = Element({"id": "12341234-1234-1234-1234-123412341234"})
405
405
  wrong_metadata_list = [
406
406
  {"type": wrong_type, "name": "fake_name", "value": "fake_value"}
@@ -408,13 +408,13 @@ def test_create_metadatas_wrong_type(mock_elements_worker, wrong_type):
408
408
  with pytest.raises(
409
409
  AssertionError, match="type shouldn't be null and should be of type MetaType"
410
410
  ):
411
- mock_elements_worker.create_metadatas(
412
- element=element, metadatas=wrong_metadata_list
411
+ mock_elements_worker.create_metadata_bulk(
412
+ element=element, metadata_list=wrong_metadata_list
413
413
  )
414
414
 
415
415
 
416
416
  @pytest.mark.parametrize("wrong_name", [None, 1234, 12.5, [1, 2, 3, 4]])
417
- def test_create_metadatas_wrong_name(mock_elements_worker, wrong_name):
417
+ def test_create_metadata_bulk_wrong_name(mock_elements_worker, wrong_name):
418
418
  element = Element({"id": "fake_element_id"})
419
419
  wrong_metadata_list = [
420
420
  {"type": MetaType.Text, "name": wrong_name, "value": "fake_value"}
@@ -422,13 +422,13 @@ def test_create_metadatas_wrong_name(mock_elements_worker, wrong_name):
422
422
  with pytest.raises(
423
423
  AssertionError, match="name shouldn't be null and should be of type str"
424
424
  ):
425
- mock_elements_worker.create_metadatas(
426
- element=element, metadatas=wrong_metadata_list
425
+ mock_elements_worker.create_metadata_bulk(
426
+ element=element, metadata_list=wrong_metadata_list
427
427
  )
428
428
 
429
429
 
430
430
  @pytest.mark.parametrize("wrong_value", [None, [1, 2, 3, 4]])
431
- def test_create_metadatas_wrong_value(mock_elements_worker, wrong_value):
431
+ def test_create_metadata_bulk_wrong_value(mock_elements_worker, wrong_value):
432
432
  element = Element({"id": "fake_element_id"})
433
433
  wrong_metadata_list = [
434
434
  {"type": MetaType.Text, "name": "fake_name", "value": wrong_value}
@@ -439,13 +439,13 @@ def test_create_metadatas_wrong_value(mock_elements_worker, wrong_value):
439
439
  "value shouldn't be null and should be of type (str or float or int)"
440
440
  ),
441
441
  ):
442
- mock_elements_worker.create_metadatas(
443
- element=element, metadatas=wrong_metadata_list
442
+ mock_elements_worker.create_metadata_bulk(
443
+ element=element, metadata_list=wrong_metadata_list
444
444
  )
445
445
 
446
446
 
447
447
  @pytest.mark.parametrize("wrong_entity", [[1, 2, 3, 4], 1234, 12.5])
448
- def test_create_metadatas_wrong_entity(mock_elements_worker, wrong_entity):
448
+ def test_create_metadata_bulk_wrong_entity(mock_elements_worker, wrong_entity):
449
449
  element = Element({"id": "fake_element_id"})
450
450
  wrong_metadata_list = [
451
451
  {
@@ -456,12 +456,12 @@ def test_create_metadatas_wrong_entity(mock_elements_worker, wrong_entity):
456
456
  }
457
457
  ]
458
458
  with pytest.raises(AssertionError, match="entity_id should be None or a str"):
459
- mock_elements_worker.create_metadatas(
460
- element=element, metadatas=wrong_metadata_list
459
+ mock_elements_worker.create_metadata_bulk(
460
+ element=element, metadata_list=wrong_metadata_list
461
461
  )
462
462
 
463
463
 
464
- def test_create_metadatas_api_error(responses, mock_elements_worker):
464
+ def test_create_metadata_bulk_api_error(responses, mock_elements_worker):
465
465
  element = Element({"id": "12341234-1234-1234-1234-123412341234"})
466
466
  metadata_list = [
467
467
  {
@@ -478,7 +478,7 @@ def test_create_metadatas_api_error(responses, mock_elements_worker):
478
478
  )
479
479
 
480
480
  with pytest.raises(ErrorResponse):
481
- mock_elements_worker.create_metadatas(element, metadata_list)
481
+ mock_elements_worker.create_metadata_bulk(element, metadata_list)
482
482
 
483
483
  assert len(responses.calls) == len(BASE_API_CALLS) + 5
484
484
  assert [
@@ -508,14 +508,23 @@ def test_create_metadatas_api_error(responses, mock_elements_worker):
508
508
  ]
509
509
 
510
510
 
511
+ def test_list_element_metadata_wrong_load_parents(fake_dummy_worker):
512
+ element = Element({"id": "element_id"})
513
+ with pytest.raises(AssertionError, match="load_parents should be of type bool"):
514
+ fake_dummy_worker.list_element_metadata(
515
+ element=element,
516
+ load_parents="not bool",
517
+ )
518
+
519
+
511
520
  def test_list_element_metadata(fake_dummy_worker):
512
521
  element = Element({"id": "element_id"})
513
522
  fake_dummy_worker.api_client.add_response(
514
523
  "ListElementMetaData",
515
524
  id=element.id,
516
- response={"id": "metadata_id"},
525
+ response=[{"id": "metadata_id"}],
517
526
  )
518
- assert fake_dummy_worker.list_element_metadata(element) == {"id": "metadata_id"}
527
+ assert fake_dummy_worker.list_element_metadata(element) == [{"id": "metadata_id"}]
519
528
 
520
529
  assert len(fake_dummy_worker.api_client.history) == 1
521
530
  assert len(fake_dummy_worker.api_client.responses) == 0
@@ -527,11 +536,28 @@ def test_list_element_metadata_cached_element(mock_elements_worker_with_cache):
527
536
  mock_elements_worker_with_cache.api_client.add_response(
528
537
  "ListElementMetaData",
529
538
  id="element_id",
530
- response={"id": "metadata_id"},
539
+ response=[{"id": "metadata_id"}],
531
540
  )
532
- assert mock_elements_worker_with_cache.list_element_metadata(element) == {
533
- "id": "metadata_id"
534
- }
541
+ assert mock_elements_worker_with_cache.list_element_metadata(element) == [
542
+ {"id": "metadata_id"}
543
+ ]
535
544
 
536
545
  assert len(mock_elements_worker_with_cache.api_client.history) == 1
537
546
  assert len(mock_elements_worker_with_cache.api_client.responses) == 0
547
+
548
+
549
+ def test_list_element_metadata_with_load_parents(fake_dummy_worker):
550
+ element = Element({"id": "element_id"})
551
+ fake_dummy_worker.api_client.add_response(
552
+ "ListElementMetaData",
553
+ id=element.id,
554
+ load_parents=True,
555
+ response=[{"id": "metadata_id"}, {"id": "parent_metadata_id"}],
556
+ )
557
+ assert fake_dummy_worker.list_element_metadata(element, load_parents=True) == [
558
+ {"id": "metadata_id"},
559
+ {"id": "parent_metadata_id"},
560
+ ]
561
+
562
+ assert len(fake_dummy_worker.api_client.history) == 1
563
+ assert len(fake_dummy_worker.api_client.responses) == 0
@@ -179,6 +179,41 @@ def test_validate_model_version_not_created(mock_training_worker):
179
179
  mock_training_worker.validate_model_version(hash="a", size=1, archive_hash="b")
180
180
 
181
181
 
182
+ @pytest.mark.parametrize("status_code", [403, 500])
183
+ def test_validate_model_version_catch_errors(
184
+ mocker, mock_training_worker, caplog, status_code
185
+ ):
186
+ mocker.patch(
187
+ "arkindex_worker.worker.base.BaseWorker.request.retry.retry", return_value=False
188
+ )
189
+
190
+ mock_training_worker.model_version = {"id": "model_version_id"}
191
+ args = {
192
+ "hash": "hash",
193
+ "archive_hash": "archive_hash",
194
+ "size": 30,
195
+ }
196
+ mock_training_worker.api_client.add_error_response(
197
+ "ValidateModelVersion",
198
+ id="model_version_id",
199
+ status_code=status_code,
200
+ body=args,
201
+ )
202
+
203
+ mock_training_worker.validate_model_version(**args)
204
+ assert mock_training_worker.model_version == {"id": "model_version_id"}
205
+ assert [
206
+ (level, message)
207
+ for module, level, message in caplog.record_tuples
208
+ if module == "arkindex_worker"
209
+ ] == [
210
+ (
211
+ logging.WARNING,
212
+ "An error occurred while validating model version model_version_id, please check its status.",
213
+ ),
214
+ ]
215
+
216
+
182
217
  @pytest.mark.parametrize("deletion_failed", [True, False])
183
218
  def test_validate_model_version_hash_conflict(
184
219
  mock_training_worker, default_model_version, caplog, deletion_failed
@@ -1711,11 +1711,29 @@ def test_list_transcriptions_wrong_recursive(mock_elements_worker):
1711
1711
  )
1712
1712
 
1713
1713
 
1714
- def test_list_transcriptions_wrong_worker_version(mock_elements_worker):
1714
+ def test_list_transcriptions_wrong_worker_run(mock_elements_worker):
1715
1715
  elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
1716
1716
 
1717
1717
  with pytest.raises(
1718
- AssertionError, match="worker_version should be of type str or bool"
1718
+ AssertionError, match="worker_run should be of type str or bool"
1719
+ ):
1720
+ mock_elements_worker.list_transcriptions(
1721
+ element=elt,
1722
+ worker_run=1234,
1723
+ )
1724
+
1725
+
1726
+ def test_list_transcriptions_wrong_worker_version(mock_elements_worker):
1727
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
1728
+
1729
+ # WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
1730
+ with (
1731
+ pytest.deprecated_call(
1732
+ match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
1733
+ ),
1734
+ pytest.raises(
1735
+ AssertionError, match="worker_version should be of type str or bool"
1736
+ ),
1719
1737
  ):
1720
1738
  mock_elements_worker.list_transcriptions(
1721
1739
  element=elt,
@@ -1723,11 +1741,30 @@ def test_list_transcriptions_wrong_worker_version(mock_elements_worker):
1723
1741
  )
1724
1742
 
1725
1743
 
1726
- def test_list_transcriptions_wrong_bool_worker_version(mock_elements_worker):
1744
+ def test_list_transcriptions_wrong_bool_worker_run(mock_elements_worker):
1727
1745
  elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
1728
1746
 
1729
1747
  with pytest.raises(
1730
- AssertionError, match="if of type bool, worker_version can only be set to False"
1748
+ AssertionError, match="if of type bool, worker_run can only be set to False"
1749
+ ):
1750
+ mock_elements_worker.list_transcriptions(
1751
+ element=elt,
1752
+ worker_run=True,
1753
+ )
1754
+
1755
+
1756
+ def test_list_transcriptions_wrong_bool_worker_version(mock_elements_worker):
1757
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
1758
+
1759
+ # WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
1760
+ with (
1761
+ pytest.deprecated_call(
1762
+ match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
1763
+ ),
1764
+ pytest.raises(
1765
+ AssertionError,
1766
+ match="if of type bool, worker_version can only be set to False",
1767
+ ),
1731
1768
  ):
1732
1769
  mock_elements_worker.list_transcriptions(
1733
1770
  element=elt,
@@ -1784,6 +1821,7 @@ def test_list_transcriptions(responses, mock_elements_worker):
1784
1821
  "text": "hey",
1785
1822
  "confidence": 0.42,
1786
1823
  "worker_version_id": "56785678-5678-5678-5678-567856785678",
1824
+ "worker_run_id": "56785678-5678-5678-5678-567856785678",
1787
1825
  "element": None,
1788
1826
  },
1789
1827
  {
@@ -1791,6 +1829,7 @@ def test_list_transcriptions(responses, mock_elements_worker):
1791
1829
  "text": "it's",
1792
1830
  "confidence": 0.42,
1793
1831
  "worker_version_id": "56785678-5678-5678-5678-567856785678",
1832
+ "worker_run_id": "56785678-5678-5678-5678-567856785678",
1794
1833
  "element": None,
1795
1834
  },
1796
1835
  {
@@ -1798,6 +1837,7 @@ def test_list_transcriptions(responses, mock_elements_worker):
1798
1837
  "text": "me",
1799
1838
  "confidence": 0.42,
1800
1839
  "worker_version_id": "56785678-5678-5678-5678-567856785678",
1840
+ "worker_run_id": "56785678-5678-5678-5678-567856785678",
1801
1841
  "element": None,
1802
1842
  },
1803
1843
  ]
@@ -1836,6 +1876,7 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
1836
1876
  "text": "hey",
1837
1877
  "confidence": 0.42,
1838
1878
  "worker_version_id": None,
1879
+ "worker_run_id": None,
1839
1880
  "element": None,
1840
1881
  }
1841
1882
  ]
@@ -1850,8 +1891,50 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
1850
1891
  },
1851
1892
  )
1852
1893
 
1894
+ with pytest.deprecated_call(
1895
+ match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
1896
+ ):
1897
+ for idx, transcription in enumerate(
1898
+ mock_elements_worker.list_transcriptions(element=elt, worker_version=False)
1899
+ ):
1900
+ assert transcription == trans[idx]
1901
+
1902
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
1903
+ assert [
1904
+ (call.request.method, call.request.url) for call in responses.calls
1905
+ ] == BASE_API_CALLS + [
1906
+ (
1907
+ "GET",
1908
+ "http://testserver/api/v1/element/12341234-1234-1234-1234-123412341234/transcriptions/?worker_version=False",
1909
+ ),
1910
+ ]
1911
+
1912
+
1913
+ def test_list_transcriptions_manual_worker_run(responses, mock_elements_worker):
1914
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
1915
+ trans = [
1916
+ {
1917
+ "id": "0000",
1918
+ "text": "hey",
1919
+ "confidence": 0.42,
1920
+ "worker_version_id": None,
1921
+ "worker_run_id": None,
1922
+ "element": None,
1923
+ }
1924
+ ]
1925
+ responses.add(
1926
+ responses.GET,
1927
+ "http://testserver/api/v1/element/12341234-1234-1234-1234-123412341234/transcriptions/?worker_run=False",
1928
+ status=200,
1929
+ json={
1930
+ "count": 1,
1931
+ "next": None,
1932
+ "results": trans,
1933
+ },
1934
+ )
1935
+
1853
1936
  for idx, transcription in enumerate(
1854
- mock_elements_worker.list_transcriptions(element=elt, worker_version=False)
1937
+ mock_elements_worker.list_transcriptions(element=elt, worker_run=False)
1855
1938
  ):
1856
1939
  assert transcription == trans[idx]
1857
1940
 
@@ -1861,7 +1944,7 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
1861
1944
  ] == BASE_API_CALLS + [
1862
1945
  (
1863
1946
  "GET",
1864
- "http://testserver/api/v1/element/12341234-1234-1234-1234-123412341234/transcriptions/?worker_version=False",
1947
+ "http://testserver/api/v1/element/12341234-1234-1234-1234-123412341234/transcriptions/?worker_run=False",
1865
1948
  ),
1866
1949
  ]
1867
1950
 
@@ -1895,16 +1978,26 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
1895
1978
  "66666666-6666-6666-6666-666666666666",
1896
1979
  ),
1897
1980
  ),
1898
- # Filter on element and worker_version should give first transcription
1981
+ # Filter on element and worker run should give the first transcription
1899
1982
  (
1900
1983
  {
1901
1984
  "element": CachedElement(
1902
1985
  id="11111111-1111-1111-1111-111111111111", type="page"
1903
1986
  ),
1904
- "worker_version": "56785678-5678-5678-5678-567856785678",
1987
+ "worker_run": "56785678-5678-5678-5678-567856785678",
1905
1988
  },
1906
1989
  ("11111111-1111-1111-1111-111111111111",),
1907
1990
  ),
1991
+ # Filter on element, manual worker run should give the sixth transcription
1992
+ (
1993
+ {
1994
+ "element": CachedElement(
1995
+ id="11111111-1111-1111-1111-111111111111", type="page"
1996
+ ),
1997
+ "worker_run": False,
1998
+ },
1999
+ ("66666666-6666-6666-6666-666666666666",),
2000
+ ),
1908
2001
  # Filter recursively on element should give all transcriptions inserted
1909
2002
  (
1910
2003
  {
@@ -1922,33 +2015,70 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
1922
2015
  "66666666-6666-6666-6666-666666666666",
1923
2016
  ),
1924
2017
  ),
1925
- # Filter recursively on element and worker_version should give four transcriptions
2018
+ # Filter recursively on element and element_type should give three transcriptions
1926
2019
  (
1927
2020
  {
1928
2021
  "element": CachedElement(
1929
2022
  id="11111111-1111-1111-1111-111111111111", type="page"
1930
2023
  ),
1931
- "worker_version": "90129012-9012-9012-9012-901290129012",
2024
+ "element_type": "something_else",
1932
2025
  "recursive": True,
1933
2026
  },
1934
2027
  (
1935
2028
  "22222222-2222-2222-2222-222222222222",
1936
- "33333333-3333-3333-3333-333333333333",
1937
2029
  "44444444-4444-4444-4444-444444444444",
1938
2030
  "55555555-5555-5555-5555-555555555555",
1939
2031
  ),
1940
2032
  ),
1941
- # Filter recursively on element and element_type should give three transcriptions
2033
+ ],
2034
+ )
2035
+ def test_list_transcriptions_with_cache(
2036
+ responses, mock_elements_worker_with_cache, filters, expected_ids
2037
+ ):
2038
+ # Check we have 5 elements already present in database
2039
+ assert CachedTranscription.select().count() == 6
2040
+
2041
+ # Query database through cache
2042
+ transcriptions = mock_elements_worker_with_cache.list_transcriptions(**filters)
2043
+ assert transcriptions.count() == len(expected_ids)
2044
+ for transcription, expected_id in zip(
2045
+ transcriptions.order_by(CachedTranscription.id), expected_ids, strict=True
2046
+ ):
2047
+ assert transcription.id == UUID(expected_id)
2048
+
2049
+ # Check the worker never hits the API for elements
2050
+ assert len(responses.calls) == len(BASE_API_CALLS)
2051
+ assert [
2052
+ (call.request.method, call.request.url) for call in responses.calls
2053
+ ] == BASE_API_CALLS
2054
+
2055
+
2056
+ @pytest.mark.usefixtures("_mock_cached_transcriptions")
2057
+ @pytest.mark.parametrize(
2058
+ ("filters", "expected_ids"),
2059
+ [
2060
+ # Filter on element and worker_version should give first transcription
1942
2061
  (
1943
2062
  {
1944
2063
  "element": CachedElement(
1945
2064
  id="11111111-1111-1111-1111-111111111111", type="page"
1946
2065
  ),
1947
- "element_type": "something_else",
2066
+ "worker_version": "56785678-5678-5678-5678-567856785678",
2067
+ },
2068
+ ("11111111-1111-1111-1111-111111111111",),
2069
+ ),
2070
+ # Filter recursively on element and worker_version should give four transcriptions
2071
+ (
2072
+ {
2073
+ "element": CachedElement(
2074
+ id="11111111-1111-1111-1111-111111111111", type="page"
2075
+ ),
2076
+ "worker_version": "90129012-9012-9012-9012-901290129012",
1948
2077
  "recursive": True,
1949
2078
  },
1950
2079
  (
1951
2080
  "22222222-2222-2222-2222-222222222222",
2081
+ "33333333-3333-3333-3333-333333333333",
1952
2082
  "44444444-4444-4444-4444-444444444444",
1953
2083
  "55555555-5555-5555-5555-555555555555",
1954
2084
  ),
@@ -1965,14 +2095,17 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
1965
2095
  ),
1966
2096
  ],
1967
2097
  )
1968
- def test_list_transcriptions_with_cache(
2098
+ def test_list_transcriptions_with_cache_deprecation(
1969
2099
  responses, mock_elements_worker_with_cache, filters, expected_ids
1970
2100
  ):
1971
2101
  # Check we have 5 elements already present in database
1972
2102
  assert CachedTranscription.select().count() == 6
1973
2103
 
1974
- # Query database through cache
1975
- transcriptions = mock_elements_worker_with_cache.list_transcriptions(**filters)
2104
+ with pytest.deprecated_call(
2105
+ match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
2106
+ ):
2107
+ # Query database through cache
2108
+ transcriptions = mock_elements_worker_with_cache.list_transcriptions(**filters)
1976
2109
  assert transcriptions.count() == len(expected_ids)
1977
2110
  for transcription, expected_id in zip(
1978
2111
  transcriptions.order_by(CachedTranscription.id), expected_ids, strict=True
@@ -20,7 +20,8 @@ def test_get_worker_version(fake_dummy_worker):
20
20
 
21
21
  api_client.add_response("RetrieveWorkerVersion", response, id=TEST_VERSION_ID)
22
22
 
23
- res = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
23
+ with pytest.deprecated_call(match="WorkerVersion usage is deprecated."):
24
+ res = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
24
25
 
25
26
  assert res == response
26
27
  assert fake_dummy_worker._worker_version_cache[TEST_VERSION_ID] == response
@@ -33,8 +34,11 @@ def test_get_worker_version__uses_cache(fake_dummy_worker):
33
34
 
34
35
  api_client.add_response("RetrieveWorkerVersion", response, id=TEST_VERSION_ID)
35
36
 
36
- response_1 = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
37
- response_2 = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
37
+ with pytest.deprecated_call(match="WorkerVersion usage is deprecated."):
38
+ response_1 = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
39
+
40
+ with pytest.deprecated_call(match="WorkerVersion usage is deprecated."):
41
+ response_2 = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
38
42
 
39
43
  assert response_1 == response
40
44
  assert response_1 == response_2
@@ -51,12 +55,17 @@ def test_get_worker_version_slug(mocker, fake_dummy_worker):
51
55
  "worker": {"slug": "mock_slug"},
52
56
  }
53
57
 
54
- slug = fake_dummy_worker.get_worker_version_slug(TEST_VERSION_ID)
58
+ with pytest.deprecated_call(match="WorkerVersion usage is deprecated."):
59
+ slug = fake_dummy_worker.get_worker_version_slug(TEST_VERSION_ID)
55
60
  assert slug == "mock_slug"
56
61
 
57
62
 
58
63
  def test_get_worker_version_slug_none(fake_dummy_worker):
59
- with pytest.raises(ValueError, match="No worker version ID"):
64
+ # WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
65
+ with (
66
+ pytest.deprecated_call(match="WorkerVersion usage is deprecated."),
67
+ pytest.raises(ValueError, match="No worker version ID"),
68
+ ):
60
69
  fake_dummy_worker.get_worker_version_slug(None)
61
70
 
62
71
 
@@ -301,7 +310,7 @@ def test_start_activity_error(
301
310
  ),
302
311
  ]
303
312
  assert logger.error.call_args_list == [
304
- mocker.call("Ran on 1 elements: 0 completed, 1 failed")
313
+ mocker.call("Ran on 1 element: 0 completed, 1 failed")
305
314
  ]
306
315
 
307
316
 
@@ -459,6 +468,10 @@ def test_worker_config_multiple_source(
459
468
  "id": "12341234-1234-1234-1234-123412341234",
460
469
  "name": "Model version 1337",
461
470
  "configuration": model_config,
471
+ "model": {
472
+ "id": "hahahaha-haha-haha-haha-hahahahahaha",
473
+ "name": "My model",
474
+ },
462
475
  },
463
476
  "process": {
464
477
  "name": None,
tests/test_image.py CHANGED
@@ -7,6 +7,7 @@ from pathlib import Path
7
7
 
8
8
  import pytest
9
9
  from PIL import Image, ImageChops, ImageOps
10
+ from requests import HTTPError
10
11
 
11
12
  from arkindex_worker.cache import CachedElement, create_tables, init_cache_db
12
13
  from arkindex_worker.image import (
@@ -19,6 +20,7 @@ from arkindex_worker.image import (
19
20
  polygon_bounding_box,
20
21
  revert_orientation,
21
22
  trim_polygon,
23
+ upload_image,
22
24
  )
23
25
  from arkindex_worker.models import Element
24
26
 
@@ -547,3 +549,38 @@ def test_download_image_retry_with_max(responses):
547
549
  assert list(map(attrgetter("request.url"), responses.calls)) == [full_url] * 3 + [
548
550
  max_url
549
551
  ]
552
+
553
+
554
+ def test_upload_image_retries(responses):
555
+ dest_url = "https://blabla.com/iiif/2/image_path.jpg/full/full/0/default.jpg"
556
+ responses.add(
557
+ responses.PUT,
558
+ dest_url,
559
+ status=400,
560
+ )
561
+
562
+ image = Image.open(FULL_IMAGE).convert("RGB")
563
+ with pytest.raises(
564
+ HTTPError, match=f"400 Client Error: Bad Request for url: {dest_url}"
565
+ ):
566
+ upload_image(image, dest_url)
567
+
568
+ # We try 3 times
569
+ assert len(responses.calls) == 3
570
+ assert list(map(attrgetter("request.url"), responses.calls)) == [dest_url] * 3
571
+
572
+
573
+ def test_upload_image(responses):
574
+ dest_url = "https://blabla.com/iiif/2/image_path.jpg/full/full/0/default.jpg"
575
+ responses.add(
576
+ responses.PUT,
577
+ dest_url,
578
+ status=200,
579
+ )
580
+
581
+ image = Image.open(FULL_IMAGE).convert("RGB")
582
+ resp = upload_image(image, dest_url)
583
+ assert resp
584
+
585
+ assert len(responses.calls) == 1
586
+ assert list(map(attrgetter("request.url"), responses.calls)) == [dest_url]
tests/test_utils.py CHANGED
@@ -1,11 +1,33 @@
1
1
  from pathlib import Path
2
2
 
3
- from arkindex_worker.utils import close_delete_file, extract_tar_zst_archive
3
+ import pytest
4
+
5
+ from arkindex_worker.utils import (
6
+ close_delete_file,
7
+ extract_tar_zst_archive,
8
+ parse_source_id,
9
+ )
4
10
 
5
11
  FIXTURES = Path(__file__).absolute().parent / "data"
6
12
  ARCHIVE = FIXTURES / "archive.tar.zst"
7
13
 
8
14
 
15
+ @pytest.mark.parametrize(
16
+ ("source_id", "expected"),
17
+ [
18
+ (None, None),
19
+ ("", None),
20
+ (
21
+ "cafecafe-cafe-cafe-cafe-cafecafecafe",
22
+ "cafecafe-cafe-cafe-cafe-cafecafecafe",
23
+ ),
24
+ ("manual", False),
25
+ ],
26
+ )
27
+ def test_parse_source_id(source_id, expected):
28
+ assert parse_source_id(source_id) == expected
29
+
30
+
9
31
  def test_extract_tar_zst_archive(tmp_path):
10
32
  destination = tmp_path / "destination"
11
33
  _, archive_path = extract_tar_zst_archive(ARCHIVE, destination)
File without changes