arkindex-base-worker 0.3.6rc5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {arkindex_base_worker-0.3.6rc5.dist-info → arkindex_base_worker-0.3.7.dist-info}/METADATA +14 -13
  2. arkindex_base_worker-0.3.7.dist-info/RECORD +47 -0
  3. {arkindex_base_worker-0.3.6rc5.dist-info → arkindex_base_worker-0.3.7.dist-info}/WHEEL +1 -1
  4. {arkindex_base_worker-0.3.6rc5.dist-info → arkindex_base_worker-0.3.7.dist-info}/top_level.txt +2 -0
  5. arkindex_worker/cache.py +14 -0
  6. arkindex_worker/image.py +29 -19
  7. arkindex_worker/models.py +14 -2
  8. arkindex_worker/utils.py +17 -3
  9. arkindex_worker/worker/__init__.py +122 -125
  10. arkindex_worker/worker/base.py +24 -24
  11. arkindex_worker/worker/classification.py +18 -25
  12. arkindex_worker/worker/dataset.py +24 -18
  13. arkindex_worker/worker/element.py +45 -6
  14. arkindex_worker/worker/entity.py +35 -4
  15. arkindex_worker/worker/metadata.py +21 -11
  16. arkindex_worker/worker/training.py +13 -0
  17. arkindex_worker/worker/transcription.py +45 -5
  18. arkindex_worker/worker/version.py +22 -0
  19. hooks/pre_gen_project.py +3 -0
  20. tests/conftest.py +14 -6
  21. tests/test_base_worker.py +0 -6
  22. tests/test_dataset_worker.py +291 -409
  23. tests/test_elements_worker/test_classifications.py +365 -539
  24. tests/test_elements_worker/test_cli.py +1 -1
  25. tests/test_elements_worker/test_dataset.py +97 -116
  26. tests/test_elements_worker/test_elements.py +227 -61
  27. tests/test_elements_worker/test_entities.py +22 -2
  28. tests/test_elements_worker/test_metadata.py +53 -27
  29. tests/test_elements_worker/test_training.py +35 -0
  30. tests/test_elements_worker/test_transcriptions.py +149 -16
  31. tests/test_elements_worker/test_worker.py +19 -6
  32. tests/test_image.py +37 -0
  33. tests/test_utils.py +23 -1
  34. worker-demo/tests/__init__.py +0 -0
  35. worker-demo/tests/conftest.py +32 -0
  36. worker-demo/tests/test_worker.py +12 -0
  37. worker-demo/worker_demo/__init__.py +6 -0
  38. worker-demo/worker_demo/worker.py +19 -0
  39. arkindex_base_worker-0.3.6rc5.dist-info/RECORD +0 -41
  40. {arkindex_base_worker-0.3.6rc5.dist-info → arkindex_base_worker-0.3.7.dist-info}/LICENSE +0 -0
@@ -179,6 +179,41 @@ def test_validate_model_version_not_created(mock_training_worker):
179
179
  mock_training_worker.validate_model_version(hash="a", size=1, archive_hash="b")
180
180
 
181
181
 
182
+ @pytest.mark.parametrize("status_code", [403, 500])
183
+ def test_validate_model_version_catch_errors(
184
+ mocker, mock_training_worker, caplog, status_code
185
+ ):
186
+ mocker.patch(
187
+ "arkindex_worker.worker.base.BaseWorker.request.retry.retry", return_value=False
188
+ )
189
+
190
+ mock_training_worker.model_version = {"id": "model_version_id"}
191
+ args = {
192
+ "hash": "hash",
193
+ "archive_hash": "archive_hash",
194
+ "size": 30,
195
+ }
196
+ mock_training_worker.api_client.add_error_response(
197
+ "ValidateModelVersion",
198
+ id="model_version_id",
199
+ status_code=status_code,
200
+ body=args,
201
+ )
202
+
203
+ mock_training_worker.validate_model_version(**args)
204
+ assert mock_training_worker.model_version == {"id": "model_version_id"}
205
+ assert [
206
+ (level, message)
207
+ for module, level, message in caplog.record_tuples
208
+ if module == "arkindex_worker"
209
+ ] == [
210
+ (
211
+ logging.WARNING,
212
+ "An error occurred while validating model version model_version_id, please check its status.",
213
+ ),
214
+ ]
215
+
216
+
182
217
  @pytest.mark.parametrize("deletion_failed", [True, False])
183
218
  def test_validate_model_version_hash_conflict(
184
219
  mock_training_worker, default_model_version, caplog, deletion_failed
@@ -1711,11 +1711,29 @@ def test_list_transcriptions_wrong_recursive(mock_elements_worker):
1711
1711
  )
1712
1712
 
1713
1713
 
1714
- def test_list_transcriptions_wrong_worker_version(mock_elements_worker):
1714
+ def test_list_transcriptions_wrong_worker_run(mock_elements_worker):
1715
1715
  elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
1716
1716
 
1717
1717
  with pytest.raises(
1718
- AssertionError, match="worker_version should be of type str or bool"
1718
+ AssertionError, match="worker_run should be of type str or bool"
1719
+ ):
1720
+ mock_elements_worker.list_transcriptions(
1721
+ element=elt,
1722
+ worker_run=1234,
1723
+ )
1724
+
1725
+
1726
+ def test_list_transcriptions_wrong_worker_version(mock_elements_worker):
1727
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
1728
+
1729
+ # WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
1730
+ with (
1731
+ pytest.deprecated_call(
1732
+ match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
1733
+ ),
1734
+ pytest.raises(
1735
+ AssertionError, match="worker_version should be of type str or bool"
1736
+ ),
1719
1737
  ):
1720
1738
  mock_elements_worker.list_transcriptions(
1721
1739
  element=elt,
@@ -1723,11 +1741,30 @@ def test_list_transcriptions_wrong_worker_version(mock_elements_worker):
1723
1741
  )
1724
1742
 
1725
1743
 
1726
- def test_list_transcriptions_wrong_bool_worker_version(mock_elements_worker):
1744
+ def test_list_transcriptions_wrong_bool_worker_run(mock_elements_worker):
1727
1745
  elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
1728
1746
 
1729
1747
  with pytest.raises(
1730
- AssertionError, match="if of type bool, worker_version can only be set to False"
1748
+ AssertionError, match="if of type bool, worker_run can only be set to False"
1749
+ ):
1750
+ mock_elements_worker.list_transcriptions(
1751
+ element=elt,
1752
+ worker_run=True,
1753
+ )
1754
+
1755
+
1756
+ def test_list_transcriptions_wrong_bool_worker_version(mock_elements_worker):
1757
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
1758
+
1759
+ # WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
1760
+ with (
1761
+ pytest.deprecated_call(
1762
+ match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
1763
+ ),
1764
+ pytest.raises(
1765
+ AssertionError,
1766
+ match="if of type bool, worker_version can only be set to False",
1767
+ ),
1731
1768
  ):
1732
1769
  mock_elements_worker.list_transcriptions(
1733
1770
  element=elt,
@@ -1784,6 +1821,7 @@ def test_list_transcriptions(responses, mock_elements_worker):
1784
1821
  "text": "hey",
1785
1822
  "confidence": 0.42,
1786
1823
  "worker_version_id": "56785678-5678-5678-5678-567856785678",
1824
+ "worker_run_id": "56785678-5678-5678-5678-567856785678",
1787
1825
  "element": None,
1788
1826
  },
1789
1827
  {
@@ -1791,6 +1829,7 @@ def test_list_transcriptions(responses, mock_elements_worker):
1791
1829
  "text": "it's",
1792
1830
  "confidence": 0.42,
1793
1831
  "worker_version_id": "56785678-5678-5678-5678-567856785678",
1832
+ "worker_run_id": "56785678-5678-5678-5678-567856785678",
1794
1833
  "element": None,
1795
1834
  },
1796
1835
  {
@@ -1798,6 +1837,7 @@ def test_list_transcriptions(responses, mock_elements_worker):
1798
1837
  "text": "me",
1799
1838
  "confidence": 0.42,
1800
1839
  "worker_version_id": "56785678-5678-5678-5678-567856785678",
1840
+ "worker_run_id": "56785678-5678-5678-5678-567856785678",
1801
1841
  "element": None,
1802
1842
  },
1803
1843
  ]
@@ -1836,6 +1876,7 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
1836
1876
  "text": "hey",
1837
1877
  "confidence": 0.42,
1838
1878
  "worker_version_id": None,
1879
+ "worker_run_id": None,
1839
1880
  "element": None,
1840
1881
  }
1841
1882
  ]
@@ -1850,8 +1891,50 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
1850
1891
  },
1851
1892
  )
1852
1893
 
1894
+ with pytest.deprecated_call(
1895
+ match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
1896
+ ):
1897
+ for idx, transcription in enumerate(
1898
+ mock_elements_worker.list_transcriptions(element=elt, worker_version=False)
1899
+ ):
1900
+ assert transcription == trans[idx]
1901
+
1902
+ assert len(responses.calls) == len(BASE_API_CALLS) + 1
1903
+ assert [
1904
+ (call.request.method, call.request.url) for call in responses.calls
1905
+ ] == BASE_API_CALLS + [
1906
+ (
1907
+ "GET",
1908
+ "http://testserver/api/v1/element/12341234-1234-1234-1234-123412341234/transcriptions/?worker_version=False",
1909
+ ),
1910
+ ]
1911
+
1912
+
1913
+ def test_list_transcriptions_manual_worker_run(responses, mock_elements_worker):
1914
+ elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
1915
+ trans = [
1916
+ {
1917
+ "id": "0000",
1918
+ "text": "hey",
1919
+ "confidence": 0.42,
1920
+ "worker_version_id": None,
1921
+ "worker_run_id": None,
1922
+ "element": None,
1923
+ }
1924
+ ]
1925
+ responses.add(
1926
+ responses.GET,
1927
+ "http://testserver/api/v1/element/12341234-1234-1234-1234-123412341234/transcriptions/?worker_run=False",
1928
+ status=200,
1929
+ json={
1930
+ "count": 1,
1931
+ "next": None,
1932
+ "results": trans,
1933
+ },
1934
+ )
1935
+
1853
1936
  for idx, transcription in enumerate(
1854
- mock_elements_worker.list_transcriptions(element=elt, worker_version=False)
1937
+ mock_elements_worker.list_transcriptions(element=elt, worker_run=False)
1855
1938
  ):
1856
1939
  assert transcription == trans[idx]
1857
1940
 
@@ -1861,7 +1944,7 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
1861
1944
  ] == BASE_API_CALLS + [
1862
1945
  (
1863
1946
  "GET",
1864
- "http://testserver/api/v1/element/12341234-1234-1234-1234-123412341234/transcriptions/?worker_version=False",
1947
+ "http://testserver/api/v1/element/12341234-1234-1234-1234-123412341234/transcriptions/?worker_run=False",
1865
1948
  ),
1866
1949
  ]
1867
1950
 
@@ -1895,16 +1978,26 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
1895
1978
  "66666666-6666-6666-6666-666666666666",
1896
1979
  ),
1897
1980
  ),
1898
- # Filter on element and worker_version should give first transcription
1981
+ # Filter on element and worker run should give the first transcription
1899
1982
  (
1900
1983
  {
1901
1984
  "element": CachedElement(
1902
1985
  id="11111111-1111-1111-1111-111111111111", type="page"
1903
1986
  ),
1904
- "worker_version": "56785678-5678-5678-5678-567856785678",
1987
+ "worker_run": "56785678-5678-5678-5678-567856785678",
1905
1988
  },
1906
1989
  ("11111111-1111-1111-1111-111111111111",),
1907
1990
  ),
1991
+ # Filter on element, manual worker run should give the sixth transcription
1992
+ (
1993
+ {
1994
+ "element": CachedElement(
1995
+ id="11111111-1111-1111-1111-111111111111", type="page"
1996
+ ),
1997
+ "worker_run": False,
1998
+ },
1999
+ ("66666666-6666-6666-6666-666666666666",),
2000
+ ),
1908
2001
  # Filter recursively on element should give all transcriptions inserted
1909
2002
  (
1910
2003
  {
@@ -1922,33 +2015,70 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
1922
2015
  "66666666-6666-6666-6666-666666666666",
1923
2016
  ),
1924
2017
  ),
1925
- # Filter recursively on element and worker_version should give four transcriptions
2018
+ # Filter recursively on element and element_type should give three transcriptions
1926
2019
  (
1927
2020
  {
1928
2021
  "element": CachedElement(
1929
2022
  id="11111111-1111-1111-1111-111111111111", type="page"
1930
2023
  ),
1931
- "worker_version": "90129012-9012-9012-9012-901290129012",
2024
+ "element_type": "something_else",
1932
2025
  "recursive": True,
1933
2026
  },
1934
2027
  (
1935
2028
  "22222222-2222-2222-2222-222222222222",
1936
- "33333333-3333-3333-3333-333333333333",
1937
2029
  "44444444-4444-4444-4444-444444444444",
1938
2030
  "55555555-5555-5555-5555-555555555555",
1939
2031
  ),
1940
2032
  ),
1941
- # Filter recursively on element and element_type should give three transcriptions
2033
+ ],
2034
+ )
2035
+ def test_list_transcriptions_with_cache(
2036
+ responses, mock_elements_worker_with_cache, filters, expected_ids
2037
+ ):
2038
+ # Check we have 5 elements already present in database
2039
+ assert CachedTranscription.select().count() == 6
2040
+
2041
+ # Query database through cache
2042
+ transcriptions = mock_elements_worker_with_cache.list_transcriptions(**filters)
2043
+ assert transcriptions.count() == len(expected_ids)
2044
+ for transcription, expected_id in zip(
2045
+ transcriptions.order_by(CachedTranscription.id), expected_ids, strict=True
2046
+ ):
2047
+ assert transcription.id == UUID(expected_id)
2048
+
2049
+ # Check the worker never hits the API for elements
2050
+ assert len(responses.calls) == len(BASE_API_CALLS)
2051
+ assert [
2052
+ (call.request.method, call.request.url) for call in responses.calls
2053
+ ] == BASE_API_CALLS
2054
+
2055
+
2056
+ @pytest.mark.usefixtures("_mock_cached_transcriptions")
2057
+ @pytest.mark.parametrize(
2058
+ ("filters", "expected_ids"),
2059
+ [
2060
+ # Filter on element and worker_version should give first transcription
1942
2061
  (
1943
2062
  {
1944
2063
  "element": CachedElement(
1945
2064
  id="11111111-1111-1111-1111-111111111111", type="page"
1946
2065
  ),
1947
- "element_type": "something_else",
2066
+ "worker_version": "56785678-5678-5678-5678-567856785678",
2067
+ },
2068
+ ("11111111-1111-1111-1111-111111111111",),
2069
+ ),
2070
+ # Filter recursively on element and worker_version should give four transcriptions
2071
+ (
2072
+ {
2073
+ "element": CachedElement(
2074
+ id="11111111-1111-1111-1111-111111111111", type="page"
2075
+ ),
2076
+ "worker_version": "90129012-9012-9012-9012-901290129012",
1948
2077
  "recursive": True,
1949
2078
  },
1950
2079
  (
1951
2080
  "22222222-2222-2222-2222-222222222222",
2081
+ "33333333-3333-3333-3333-333333333333",
1952
2082
  "44444444-4444-4444-4444-444444444444",
1953
2083
  "55555555-5555-5555-5555-555555555555",
1954
2084
  ),
@@ -1965,14 +2095,17 @@ def test_list_transcriptions_manual_worker_version(responses, mock_elements_work
1965
2095
  ),
1966
2096
  ],
1967
2097
  )
1968
- def test_list_transcriptions_with_cache(
2098
+ def test_list_transcriptions_with_cache_deprecation(
1969
2099
  responses, mock_elements_worker_with_cache, filters, expected_ids
1970
2100
  ):
1971
2101
  # Check we have 5 elements already present in database
1972
2102
  assert CachedTranscription.select().count() == 6
1973
2103
 
1974
- # Query database through cache
1975
- transcriptions = mock_elements_worker_with_cache.list_transcriptions(**filters)
2104
+ with pytest.deprecated_call(
2105
+ match="`worker_version` usage is deprecated. Consider using `worker_run` instead."
2106
+ ):
2107
+ # Query database through cache
2108
+ transcriptions = mock_elements_worker_with_cache.list_transcriptions(**filters)
1976
2109
  assert transcriptions.count() == len(expected_ids)
1977
2110
  for transcription, expected_id in zip(
1978
2111
  transcriptions.order_by(CachedTranscription.id), expected_ids, strict=True
@@ -20,7 +20,8 @@ def test_get_worker_version(fake_dummy_worker):
20
20
 
21
21
  api_client.add_response("RetrieveWorkerVersion", response, id=TEST_VERSION_ID)
22
22
 
23
- res = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
23
+ with pytest.deprecated_call(match="WorkerVersion usage is deprecated."):
24
+ res = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
24
25
 
25
26
  assert res == response
26
27
  assert fake_dummy_worker._worker_version_cache[TEST_VERSION_ID] == response
@@ -33,8 +34,11 @@ def test_get_worker_version__uses_cache(fake_dummy_worker):
33
34
 
34
35
  api_client.add_response("RetrieveWorkerVersion", response, id=TEST_VERSION_ID)
35
36
 
36
- response_1 = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
37
- response_2 = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
37
+ with pytest.deprecated_call(match="WorkerVersion usage is deprecated."):
38
+ response_1 = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
39
+
40
+ with pytest.deprecated_call(match="WorkerVersion usage is deprecated."):
41
+ response_2 = fake_dummy_worker.get_worker_version(TEST_VERSION_ID)
38
42
 
39
43
  assert response_1 == response
40
44
  assert response_1 == response_2
@@ -51,12 +55,17 @@ def test_get_worker_version_slug(mocker, fake_dummy_worker):
51
55
  "worker": {"slug": "mock_slug"},
52
56
  }
53
57
 
54
- slug = fake_dummy_worker.get_worker_version_slug(TEST_VERSION_ID)
58
+ with pytest.deprecated_call(match="WorkerVersion usage is deprecated."):
59
+ slug = fake_dummy_worker.get_worker_version_slug(TEST_VERSION_ID)
55
60
  assert slug == "mock_slug"
56
61
 
57
62
 
58
63
  def test_get_worker_version_slug_none(fake_dummy_worker):
59
- with pytest.raises(ValueError, match="No worker version ID"):
64
+ # WARNING: pytest.deprecated_call must be placed BEFORE pytest.raises, otherwise `match` argument won't be checked
65
+ with (
66
+ pytest.deprecated_call(match="WorkerVersion usage is deprecated."),
67
+ pytest.raises(ValueError, match="No worker version ID"),
68
+ ):
60
69
  fake_dummy_worker.get_worker_version_slug(None)
61
70
 
62
71
 
@@ -301,7 +310,7 @@ def test_start_activity_error(
301
310
  ),
302
311
  ]
303
312
  assert logger.error.call_args_list == [
304
- mocker.call("Ran on 1 elements: 0 completed, 1 failed")
313
+ mocker.call("Ran on 1 element: 0 completed, 1 failed")
305
314
  ]
306
315
 
307
316
 
@@ -459,6 +468,10 @@ def test_worker_config_multiple_source(
459
468
  "id": "12341234-1234-1234-1234-123412341234",
460
469
  "name": "Model version 1337",
461
470
  "configuration": model_config,
471
+ "model": {
472
+ "id": "hahahaha-haha-haha-haha-hahahahahaha",
473
+ "name": "My model",
474
+ },
462
475
  },
463
476
  "process": {
464
477
  "name": None,
tests/test_image.py CHANGED
@@ -7,6 +7,7 @@ from pathlib import Path
7
7
 
8
8
  import pytest
9
9
  from PIL import Image, ImageChops, ImageOps
10
+ from requests import HTTPError
10
11
 
11
12
  from arkindex_worker.cache import CachedElement, create_tables, init_cache_db
12
13
  from arkindex_worker.image import (
@@ -19,6 +20,7 @@ from arkindex_worker.image import (
19
20
  polygon_bounding_box,
20
21
  revert_orientation,
21
22
  trim_polygon,
23
+ upload_image,
22
24
  )
23
25
  from arkindex_worker.models import Element
24
26
 
@@ -547,3 +549,38 @@ def test_download_image_retry_with_max(responses):
547
549
  assert list(map(attrgetter("request.url"), responses.calls)) == [full_url] * 3 + [
548
550
  max_url
549
551
  ]
552
+
553
+
554
+ def test_upload_image_retries(responses):
555
+ dest_url = "https://blabla.com/iiif/2/image_path.jpg/full/full/0/default.jpg"
556
+ responses.add(
557
+ responses.PUT,
558
+ dest_url,
559
+ status=400,
560
+ )
561
+
562
+ image = Image.open(FULL_IMAGE).convert("RGB")
563
+ with pytest.raises(
564
+ HTTPError, match=f"400 Client Error: Bad Request for url: {dest_url}"
565
+ ):
566
+ upload_image(image, dest_url)
567
+
568
+ # We try 3 times
569
+ assert len(responses.calls) == 3
570
+ assert list(map(attrgetter("request.url"), responses.calls)) == [dest_url] * 3
571
+
572
+
573
+ def test_upload_image(responses):
574
+ dest_url = "https://blabla.com/iiif/2/image_path.jpg/full/full/0/default.jpg"
575
+ responses.add(
576
+ responses.PUT,
577
+ dest_url,
578
+ status=200,
579
+ )
580
+
581
+ image = Image.open(FULL_IMAGE).convert("RGB")
582
+ resp = upload_image(image, dest_url)
583
+ assert resp
584
+
585
+ assert len(responses.calls) == 1
586
+ assert list(map(attrgetter("request.url"), responses.calls)) == [dest_url]
tests/test_utils.py CHANGED
@@ -1,11 +1,33 @@
1
1
  from pathlib import Path
2
2
 
3
- from arkindex_worker.utils import close_delete_file, extract_tar_zst_archive
3
+ import pytest
4
+
5
+ from arkindex_worker.utils import (
6
+ close_delete_file,
7
+ extract_tar_zst_archive,
8
+ parse_source_id,
9
+ )
4
10
 
5
11
  FIXTURES = Path(__file__).absolute().parent / "data"
6
12
  ARCHIVE = FIXTURES / "archive.tar.zst"
7
13
 
8
14
 
15
+ @pytest.mark.parametrize(
16
+ ("source_id", "expected"),
17
+ [
18
+ (None, None),
19
+ ("", None),
20
+ (
21
+ "cafecafe-cafe-cafe-cafe-cafecafecafe",
22
+ "cafecafe-cafe-cafe-cafe-cafecafecafe",
23
+ ),
24
+ ("manual", False),
25
+ ],
26
+ )
27
+ def test_parse_source_id(source_id, expected):
28
+ assert parse_source_id(source_id) == expected
29
+
30
+
9
31
  def test_extract_tar_zst_archive(tmp_path):
10
32
  destination = tmp_path / "destination"
11
33
  _, archive_path = extract_tar_zst_archive(ARCHIVE, destination)
File without changes
@@ -0,0 +1,32 @@
1
+ import os
2
+
3
+ import pytest
4
+
5
+ from arkindex.mock import MockApiClient
6
+ from arkindex_worker.worker.base import BaseWorker
7
+
8
+
9
+ @pytest.fixture(autouse=True)
10
+ def _setup_environment(responses, monkeypatch) -> None:
11
+ """Setup needed environment variables"""
12
+
13
+ # Allow accessing remote API schemas
14
+ # defaulting to the prod environment
15
+ schema_url = os.environ.get(
16
+ "ARKINDEX_API_SCHEMA_URL",
17
+ "https://demo.arkindex.org/api/v1/openapi/?format=openapi-json",
18
+ )
19
+ responses.add_passthru(schema_url)
20
+
21
+ # Set schema url in environment
22
+ os.environ["ARKINDEX_API_SCHEMA_URL"] = schema_url
23
+ # Setup a fake worker run ID
24
+ os.environ["ARKINDEX_WORKER_RUN_ID"] = "1234-demo"
25
+ # Setup a fake corpus ID
26
+ os.environ["ARKINDEX_CORPUS_ID"] = "1234-corpus-id"
27
+
28
+ # Setup a mock api client instead of using a real one
29
+ def mock_setup_api_client(self):
30
+ self.api_client = MockApiClient()
31
+
32
+ monkeypatch.setattr(BaseWorker, "setup_api_client", mock_setup_api_client)
@@ -0,0 +1,12 @@
1
+ import importlib
2
+
3
+
4
+ def test_dummy():
5
+ assert True
6
+
7
+
8
+ def test_import():
9
+ """Import our newly created module, through importlib to avoid parsing issues"""
10
+ worker = importlib.import_module("worker_demo.worker")
11
+ assert hasattr(worker, "Demo")
12
+ assert hasattr(worker.Demo, "process_element")
@@ -0,0 +1,6 @@
1
+ import logging
2
+
3
+ logging.basicConfig(
4
+ level=logging.INFO,
5
+ format="%(asctime)s %(levelname)s/%(name)s: %(message)s",
6
+ )
@@ -0,0 +1,19 @@
1
+ from logging import Logger, getLogger
2
+
3
+ from arkindex_worker.models import Element
4
+ from arkindex_worker.worker import ElementsWorker
5
+
6
+ logger: Logger = getLogger(__name__)
7
+
8
+
9
+ class Demo(ElementsWorker):
10
+ def process_element(self, element: Element) -> None:
11
+ logger.info(f"Demo processing element ({element.id})")
12
+
13
+
14
+ def main() -> None:
15
+ Demo(description="Demo ML worker for Arkindex").run()
16
+
17
+
18
+ if __name__ == "__main__":
19
+ main()
@@ -1,41 +0,0 @@
1
- arkindex_worker/__init__.py,sha256=OlgCtTC9MaWeejviY0a3iQpALcRQGMVArFVVYwTF6I8,162
2
- arkindex_worker/cache.py,sha256=ZbXJo-O24W8x6nbS0IJm32Tas9CKLaHBBeyQyvF-Nyo,10903
3
- arkindex_worker/image.py,sha256=uwfUE9hy0Iw-e3vU7OHmLSqouxbznWq08SykXmPD1Cs,14107
4
- arkindex_worker/models.py,sha256=DVrZPIurSiOoHvj3t_Szwd0j1t6pnwBx_dqwhNakzN0,9528
5
- arkindex_worker/utils.py,sha256=_lC1-RYvNWXEkK-AuF4_FraoggP1tYPdalNFSj4jDb4,6885
6
- arkindex_worker/worker/__init__.py,sha256=Iun6jhuakKdCGKjQtgqDWEpWO1HrxK34RoxdzE5gcRs,19322
7
- arkindex_worker/worker/base.py,sha256=4eG4v4vejvFv9UtTRhxEZkXEBVzlFd3rILHK8lt-mbc,19397
8
- arkindex_worker/worker/classification.py,sha256=CoMIj7SFFlt90W1r5FQmsB80qK9Zfltcm3k-37FSHA0,10693
9
- arkindex_worker/worker/dataset.py,sha256=LRZU_KkOuCRkxlkdqw1PHYnu1zmoQfm_OiY8Sqt6mi0,2754
10
- arkindex_worker/worker/element.py,sha256=6lbJFu4vQrTOJvF4GqlnH3ynYtR-6WB5ljLV6wz7dGg,32283
11
- arkindex_worker/worker/entity.py,sha256=l0gCoeaoUBFU7pv2iC4pHkSQVjiIur4M15P7Mg_WlaA,13601
12
- arkindex_worker/worker/metadata.py,sha256=PnzyHkPyb-mtgItzRi4s-_f0dsEOM3ak8F_bFoqp3O0,6225
13
- arkindex_worker/worker/task.py,sha256=cz3wJNPgogZv1lm_3lm7WScitQtYQtL6H6I7Xokq208,1475
14
- arkindex_worker/worker/training.py,sha256=rhg4TPFo0ignnCkgbekUYmNXX8u2iZGyeM1VCs1R5kI,10140
15
- arkindex_worker/worker/transcription.py,sha256=OCjgajaPcX7CPHG1fT4nZccfp6bG6Mqdz3POPc9_iYQ,18765
16
- arkindex_worker/worker/version.py,sha256=uL-OrwuFZB8TNU6ePmdKIL3g3e-GE2tqHEWBRpXu-FU,1428
17
- tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- tests/conftest.py,sha256=9dmc6Lq-XpTZvv1hxfb0rl6zzfcwCtcPMqSBSeEONqc,21847
19
- tests/test_base_worker.py,sha256=AF1pjvNckN80LVyLJ4ILXJ122fEnWtztK7ZncVDI_Ms,24976
20
- tests/test_cache.py,sha256=ii0gyr0DrG7ChEs7pmT8hMdSguAOAcCze4bRMiFQxuk,10640
21
- tests/test_dataset_worker.py,sha256=XxBCLRroCIq97P37_qWc9I7QiyE3zUL7fLAw1J_BI7E,27703
22
- tests/test_element.py,sha256=2G9M15TLxQRmvrWM9Kw2ucnElh4kSv_oF_5FYwwAxTY,13181
23
- tests/test_image.py,sha256=nIT0NhmuHtD9I1rcMSvqSfjQqvTE5Ko8tAQGLIkm_zo,15232
24
- tests/test_merge.py,sha256=Q4zCbtZbe0wBfqE56gvAD06c6pDuhqnjKaioFqIgAQw,8331
25
- tests/test_utils.py,sha256=pFXegcBvIuy1tJDDSgQtCbC_tRaoLjd2055R5lu3hS0,1236
26
- tests/test_elements_worker/__init__.py,sha256=Fh4nkbbyJSMv_VtjQxnWrOqTnxXaaWI8S9WU0VrzCHs,179
27
- tests/test_elements_worker/test_classifications.py,sha256=0_6vryoQc2-s3CQWANsEvajkyC3aub34cxb3r97pRsk,32027
28
- tests/test_elements_worker/test_cli.py,sha256=DdCRKobesehL61c5QwaZOZCde7bsTlmeSN2iosQ5_2s,2873
29
- tests/test_elements_worker/test_dataset.py,sha256=ElDPrYTTt8KzaZ_Xf5uslUD6_kiGZybntO10uqCquLo,12063
30
- tests/test_elements_worker/test_elements.py,sha256=YQpF7y9depXElJcV9yECUGxVI7tF1kkJJ2ruYgXsVWE,79582
31
- tests/test_elements_worker/test_entities.py,sha256=ZOFB3ckKJvNG2kIPUX_kz_378k3uQrJmvYHpR_xiVuo,33789
32
- tests/test_elements_worker/test_metadata.py,sha256=c3kXPYRXVPDnGim28Ncg5YO4I0ejh3qyi7dBvbSYxMU,17739
33
- tests/test_elements_worker/test_task.py,sha256=FCpxE9UpouKXgjGvWgNHEai_Hiy2d1YmqRG-_v2s27s,6312
34
- tests/test_elements_worker/test_training.py,sha256=WeG-cDuJ-YhPgfKH47TtXBxyargtLuk7c8tsik2WnL8,8414
35
- tests/test_elements_worker/test_transcriptions.py,sha256=6UWGriQVwEORunJYW11mGcD16voZGFY41i_NIdXuqnI,68750
36
- tests/test_elements_worker/test_worker.py,sha256=zD8sY5yZFhuUr1txVX8z7bSgW4I2jNuzH5i1TM3qkZI,16491
37
- arkindex_base_worker-0.3.6rc5.dist-info/LICENSE,sha256=NVshRi1efwVezMfW7xXYLrdDr2Li1AfwfGOd5WuH1kQ,1063
38
- arkindex_base_worker-0.3.6rc5.dist-info/METADATA,sha256=E6SjEuSYO53WaxWUvFEPes6Tsap5wYoz87mhdoSeCkE,3413
39
- arkindex_base_worker-0.3.6rc5.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
40
- arkindex_base_worker-0.3.6rc5.dist-info/top_level.txt,sha256=TtagLI8LSv7GE7nG8MQqDFAJ5bNDPJn7Z5vizOgrWkA,22
41
- arkindex_base_worker-0.3.6rc5.dist-info/RECORD,,